12 Commits

Author SHA1 Message Date
pstruebi 3759a026d4 update mock database with wireguard internal addresses 2025-03-27 16:42:39 +00:00
pstruebi 6bfbc6e180 add speaker wav 2025-03-25 12:44:26 +01:00
pstruebi e02593c78d use different xtts voice 2025-03-25 12:34:16 +01:00
pstruebi c095b058d6 add basic support for xtts 2025-03-25 12:02:59 +01:00
pstruebi a9dbe52a7e update environment setup 2025-03-25 09:22:42 +00:00
pstruebi 1d4a2b3b45 update lock file 2025-03-25 09:54:04 +01:00
pstruebi 36dd34b042 some adjustements for server deployment 2025-03-25 08:53:22 +00:00
pstruebi 4971f1e7f6 update docker build 2025-03-23 16:36:33 +01:00
pstruebi 466fb1762e update dockerfile 2025-03-20 12:25:01 +00:00
pstruebi b9ca04af82 change project structure for packaging that makes sense with poetry 2025-03-20 11:56:33 +01:00
pstruebi 17cf41166b adjust dependencies and dockerfiles 2025-03-19 13:15:04 +00:00
pstruebi 5e5c3e2040 refractoring 2025-03-19 13:45:46 +01:00
33 changed files with 5660 additions and 109 deletions
+1
View File
@@ -1,5 +1,6 @@
*.pyc *.pyc
*.wav *.wav
!/src/voice_provider/speaker_wav/*
*.lc3 *.lc3
*.onnx *.onnx
*.onnx.json *.onnx.json
+35
View File
@@ -0,0 +1,35 @@
FROM python:3.11-slim
# Install system dependencies and poetry
RUN apt-get update && apt-get install -y \
git \
gcc \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# accept new ssh server
RUN sed /^StrictHostKeyChecking/d /etc/ssh/ssh_config; \
echo StrictHostKeyChecking no >> /etc/ssh/ssh_config
# Install and configure poetry
RUN --mount=type=cache,target=/root/.cache \
pip install poetry
RUN poetry config virtualenvs.create false
WORKDIR /app
# copy the app code
COPY ./src .
COPY poetry.lock .
COPY pyproject.toml .
# Install the project with all dependencies
RUN --mount=type=cache,target=/root/.cache \
--mount=type=ssh,required=true \
poetry install --no-interaction --without dev --no-root
# Expose the API port
EXPOSE 7999
# Run the translator server directly from the module path
CMD ["python", "-m", "auracast_translator.translator_server.translator_server"]
+13
View File
@@ -0,0 +1,13 @@
services:
auracast-translator:
build:
context: .
dockerfile: Dockerfile
ssh:
- default=~/.ssh/id_ed25519 #lappi
ports:
- "7999:7999"
environment:
- PYTHONUNBUFFERED=1
restart: unless-stopped
Generated
+5392
View File
File diff suppressed because it is too large Load Diff
+38 -22
View File
@@ -1,27 +1,43 @@
[project] [tool.poetry]
name = "multilang_translator" name = "auracast_translator"
requires-python = ">= 3.11" version = "0.1.0"
version = '0.1' authors = ["Patrick S <pstruebi>"]
description = "Announcement System"
dependencies = [ readme = "readme.md"
"auracast @git+https://git@gitea.pstruebi.xyz/auracaster/bumble-auracast", packages = [
"requests==2.32.3", { include = "translator_models", from = "src" },
"ollama==0.4.7", { include = "translator_client", from = "src" },
"aioconsole==0.8.1", { include = "auracast_translator", from = "src" },
"fastapi==0.115.11",
"uvicorn==0.34.0",
"aiohttp==3.9.3",
] ]
[project.optional-dependencies] [tool.poetry.dependencies]
test = [ python = "~3.11"
"pytest >= 8.2", setuptools= ">=77"
] coqui-tts = "0.26"
[tool.poetry.group.tts.dependencies] [tool.poetry.group.general.dependencies]
piper-phonemize = "==1.1.0" requests="2.32.3"
piper-tts = "==1.2.0" aiohttp="3.9.3"
fastapi="0.115.11"
uvicorn="0.34.0"
ollama="0.4.7"
piper-tts="1.2.0"
librosa="0.10.1"
aioconsole="0.8.1"
lc3 = { git = "ssh://git@ssh.pstruebi.xyz:222/auracaster/liblc3.git", rev = "7558637303106c7ea971e7bb8cedf379d3e08bcc" }
auracast = { git = "ssh://git@ssh.pstruebi.xyz:222/auracaster/bumble-auracast.git" }
#[tool.poetry.group.gpu.dependencies]
#onnxruntime-gpu = "^1.20.1"
# TODO: for running piper on gpu investigate
# https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html#requirements
# put everything in pytorch container according to piper github:
# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch
# Use a seperate container for the voice provider
[tool.poetry.group.dev.dependencies]
pytest = {version=">8.2", optional=true}
[tool.pytest.ini_options] [tool.pytest.ini_options]
addopts = [ addopts = [
@@ -29,5 +45,5 @@ addopts = [
] ]
[build-system] [build-system]
requires = ["setuptools>=61", "wheel", "setuptools_scm>=8"] requires = ["poetry-core"]
build-backend = "setuptools.build_meta" build-backend = "poetry.core.masonry.api"
+3 -8
View File
@@ -1,11 +1,6 @@
# Prerequisites # Install the project
sudo apt install liblc3-tools poetry env use python3.11
poetry install
use python3.9
pip install piper-tts soundfile librosa pyserial pytest
# Piper update voices # Piper update voices
piper --update-voices -m en_US-lessac-medium piper --update-voices -m en_US-lessac-medium
# TODO:
- investigate using a pipeline instead of writing to intermediate files to gain performance
@@ -10,14 +10,14 @@ from auracast import auracast_config
import voice_client import voice_client
import voice_models import voice_models
from multilang_translator import translator_config from auracast_translator.translator_models import translator_models
from multilang_translator.translator import llm_translator from auracast_translator.translator import llm_translator
import voice_client.tts_client import voice_client.tts_client
import voice_models.request_models import voice_models.request_models
async def announcement_from_german_text( async def announcement_from_german_text(
config: translator_config.TranslatorConfigGroup, config: translator_models.TranslatorConfigGroup,
text_de text_de
): ):
base_lang = "deu" base_lang = "deu"
@@ -9,9 +9,9 @@ from auracast import multicast_control
from auracast import auracast_config from auracast import auracast_config
from voice_provider import text_to_speech from voice_provider import text_to_speech
from multilang_translator import translator_config from auracast_translator import translator_config
from multilang_translator.translator import llm_translator from auracast_translator.translator import llm_translator
from multilang_translator.translator.test_content import TESTSENTENCE from auracast_translator.translator.test_content import TESTSENTENCE
# TODO: look for a end to end translation solution # TODO: look for a end to end translation solution
@@ -6,7 +6,7 @@ import time
import ollama import ollama
import aiohttp import aiohttp
from multilang_translator.translator import syspromts from auracast_translator.translator import syspromts
# ollama.create( # TODO: create models on startup # ollama.create( # TODO: create models on startup
# model='example', # model='example',
@@ -125,7 +125,7 @@ async def translate_de_to_x_async(
if __name__ == "__main__": if __name__ == "__main__":
import time import time
from multilang_translator.translator import test_content from auracast_translator.translator import test_content
start=time.time() start=time.time()
@@ -3,7 +3,7 @@ Database file for endpoint definitions.
This file contains configurations for auracast endpoints including their IP addresses and capabilities. This file contains configurations for auracast endpoints including their IP addresses and capabilities.
""" """
from typing import List, Optional from typing import List, Optional
from multilang_translator.translator_models.translator_models import EndpointGroup, Endpoint from translator_models.translator_models import EndpointGroup, Endpoint
SUPPORTED_LANGUAGES = ["deu", "eng", "fra", "spa", "ita"] SUPPORTED_LANGUAGES = ["deu", "eng", "fra", "spa", "ita"]
@@ -13,19 +13,19 @@ ENDPOINTS: dict[int: Endpoint] = { # for now make sure, .id and key are the same
0: Endpoint( 0: Endpoint(
id=0, id=0,
name="Local Endpoint", name="Local Endpoint",
url="http://localhost:5000", url="http://10.13.13.3:5000", #"http://localhost:5000", #patricks laptop
max_broadcasts=3, max_broadcasts=3,
), ),
1: Endpoint( 1: Endpoint(
id=1, id=1,
name="Gate 1", name="Gate 1",
url="http://pi3:5000", url="http://10.13.13.4:5000", #pi4
max_broadcasts=3, max_broadcasts=3,
), ),
2: Endpoint( 2: Endpoint(
id=2, id=2,
name="Gate 2", name="Gate 2",
url="http://192.168.1.102:5000", url="http://10.13.13.5:5000",
max_broadcasts=3, max_broadcasts=3,
), ),
} }
@@ -7,7 +7,7 @@ import logging as log
import sys import sys
import os import os
# Add the parent directory to the Python path to find the multilang_translator package # Add the parent directory to the Python path to find the auracast_translator package
current_dir = os.path.dirname(os.path.abspath(__file__)) current_dir = os.path.dirname(os.path.abspath(__file__))
parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(current_dir))) parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(current_dir)))
if parent_dir not in sys.path: if parent_dir not in sys.path:
@@ -20,7 +20,7 @@ if __name__ == "__main__":
) )
log.info("Starting Translator API server") log.info("Starting Translator API server")
uvicorn.run( uvicorn.run(
"multilang_translator.translator_server.translator_server:app", "auracast_translator.translator_server.translator_server:app",
host="0.0.0.0", host="0.0.0.0",
port=7999, port=7999,
reload=True, reload=True,
@@ -11,9 +11,9 @@ from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
# Import models # Import models
from multilang_translator.translator_models.translator_models import AnnouncementStates, Endpoint, EndpointGroup from translator_models.translator_models import AnnouncementStates, Endpoint, EndpointGroup
from multilang_translator.translator import llm_translator from auracast_translator.translator import llm_translator
from multilang_translator.translator_server import endpoints_db from auracast_translator.translator_server import endpoints_db
from voice_provider import text_to_speech from voice_provider import text_to_speech
# Import the endpoints database and multicast client # Import the endpoints database and multicast client
@@ -154,7 +154,10 @@ async def make_announcement(text: str, ep_group: EndpointGroup):
ep_group.sampling_rate_hz, ep_group.sampling_rate_hz,
trans_conf.tts_system, trans_conf.tts_system,
trans_conf.tts_model, trans_conf.tts_model,
return_lc3=True return_lc3=True,
language=trans_conf.xtts_language,
speaker=trans_conf.xtts_speaker,
speaker_wav=trans_conf.xtts_speaker_wav
) )
synthesis_tasks.append(task) synthesis_tasks.append(task)
@@ -329,9 +332,10 @@ async def get_available_languages():
if __name__ == "__main__": if __name__ == "__main__":
import os
import uvicorn import uvicorn
log.basicConfig( log.basicConfig(
level=log.DEBUG, level=os.environ.get('LOG_LEVEL', log.DEBUG),
format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s' format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
) )
# with reload=True logging of modules does not function as expected # with reload=True logging of modules does not function as expected
@@ -342,5 +346,5 @@ if __name__ == "__main__":
port=7999, port=7999,
#reload=True, #reload=True,
#log_config=None, #log_config=None,
#log_level="info" log_level="debug"
) )
@@ -1,20 +0,0 @@
import os
from pydantic import BaseModel
VENV_DIR = os.path.join(os.path.dirname(__file__), './../../venv')
class TranslatorLangConfig(BaseModel):
translator_llm: str = 'llama3.2:3b-instruct-q4_0' # TODO: this was migrated to translator_models - remove this
llm_client: str = 'ollama'
llm_host_url: str | None = 'http://localhost:11434'
llm_host_token: str | None = None
tts_system: str = 'piper'
tts_model: str ='de_DE-kerstin-low'
class TranslatorConfig(BaseModel):
deu: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'de_DE-thorsten-high')
eng: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'en_GB-alba-medium')
fra: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'fr_FR-siwis-medium')
spa: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'es_ES-sharvard-medium')
ita: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'it_IT-paola-medium')
@@ -3,10 +3,9 @@ API client functions for interacting with the Translator API.
""" """
import requests import requests
from typing import List, Optional, Dict, Any, Tuple from typing import List, Optional, Dict, Any, Tuple
from enum import Enum
from multilang_translator.translator_models.translator_models import AnnouncementStates, Endpoint, EndpointGroup from translator_models.translator_models import AnnouncementStates, Endpoint, EndpointGroup
# This can be overridden through environment variables # This can be overridden through environment variables
@@ -35,12 +35,19 @@ class TranslatorLangConfig(BaseModel):
# llm_host_url: str | None = 'http://localhost:11434' # llm_host_url: str | None = 'http://localhost:11434'
# llm_host_token: str | None = None # llm_host_token: str | None = None
tts_system: str = 'piper' tts_system: str = 'piper' # Options: 'piper', 'xtts'
tts_model: str ='de_DE-kerstin-low' tts_model: str = 'de_DE-kerstin-low' # For piper: model name, for xtts: unused
xtts_language: str = 'de' # Language code for XTTS
xtts_speaker: Optional[str] = None # Speaker name for XTTS
xtts_speaker_wav: Optional[str] = None # Path to speaker sample for XTTS voice cloning
class TranslatorConfig(BaseModel): class TranslatorConfig(BaseModel):
deu: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'de_DE-thorsten-high') deu: TranslatorLangConfig = TranslatorLangConfig(
tts_system='xtts',
xtts_language='de',
xtts_speaker_wav='female.wav'
)
eng: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'en_GB-alba-medium') eng: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'en_GB-alba-medium')
fra: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'fr_FR-siwis-medium') fra: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'fr_FR-siwis-medium')
spa: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'es_ES-sharvard-medium') spa: TranslatorLangConfig = TranslatorLangConfig(tts_model = 'es_ES-sharvard-medium')
+4 -1
View File
@@ -1,4 +1,5 @@
from pydantic import BaseModel from pydantic import BaseModel
from typing import Optional
class SynthesizeRequest(BaseModel): class SynthesizeRequest(BaseModel):
text: str text: str
@@ -6,4 +7,6 @@ class SynthesizeRequest(BaseModel):
framework: str = "piper" framework: str = "piper"
model: str = "en_US-lessac-medium" model: str = "en_US-lessac-medium"
return_lc3: bool = False return_lc3: bool = False
language: str = "en" # Language code for XTTS
speaker: Optional[str] = None # Speaker name for XTTS
speaker_wav: Optional[str] = None # Path to speaker sample for XTTS voice cloning
Binary file not shown.
+108 -14
View File
@@ -1,4 +1,7 @@
import os import os
# Set environment variable to auto-accept Coqui TTS license
os.environ["COQUI_TOS_AGREED"] = "1"
import shutil import shutil
import subprocess import subprocess
import time import time
@@ -6,16 +9,35 @@ import json
import logging as log import logging as log
import numpy as np import numpy as np
import asyncio import asyncio
import torch
from voice_provider.utils.resample import resample_array from voice_provider.utils.resample import resample_array
from voice_provider.utils.encode_lc3 import encode_lc3 from voice_provider.utils.encode_lc3 import encode_lc3
# Now import TTS - the license will be auto-accepted
from TTS.api import TTS
# Get device for XTTS
if torch.cuda.is_available():
log.info('XTTS will run on GPU')
XTTS_DEVICE = "cuda"
else:
log.info('XTTS will run on CPU')
XTTS_DEVICE = "cpu"
# Load XTTS model globally - only once
log.info("Initializing XTTS model...")
start_init = time.time()
XTTS_MODEL = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(XTTS_DEVICE)
end_init = time.time()
log.info(f"XTTS initialization completed in {end_init - start_init:.2f} seconds")
PIPER_EXE = shutil.which('piper') PIPER_EXE = shutil.which('piper')
TTS_DIR = os.path.join(os.path.dirname(__file__)) TTS_DIR = os.path.join(os.path.dirname(__file__))
PIPER_WORKDIR = f'{TTS_DIR}/piper' PIPER_WORKDIR = f'{TTS_DIR}/piper'
if not PIPER_EXE: if not PIPER_EXE:
PIPER_EXE = f'{TTS_DIR}/../../venv/bin/piper' PIPER_EXE = f'{TTS_DIR}/../../.venv/bin/piper'
def synth_piper(text, model="en_US-lessac-medium"): def synth_piper(text, model="en_US-lessac-medium"):
pwd = os.getcwd() pwd = os.getcwd()
@@ -26,7 +48,7 @@ def synth_piper(text, model="en_US-lessac-medium"):
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model
[ [
PIPER_EXE, PIPER_EXE,
'--cuda', #'--cuda',
'--model', model, '--model', model,
'--output-raw' '--output-raw'
], ],
@@ -52,7 +74,10 @@ def synthesize(
target_sample_rate, target_sample_rate,
framework, framework,
model="en_US-lessac-medium", model="en_US-lessac-medium",
return_lc3=True return_lc3=True,
language="en",
speaker=None,
speaker_wav=None
): ):
if framework == 'piper': if framework == 'piper':
@@ -64,7 +89,40 @@ def synthesize(
elif framework == 'koro': elif framework == 'koro':
pass pass
elif framework == 'xtts': elif framework == 'xtts':
pass start = time.time()
# Generate audio using XTTS
# XTTS always outputs at 24kHz
xtts_sample_rate = 24000
# Validate speaker parameters - XTTS needs either speaker or speaker_wav
if speaker is None and speaker_wav is None:
# Use the first available speaker if none specified
speaker = XTTS_MODEL.speakers[0]
log.info(f"No speaker specified, using default: {speaker}")
# Generate audio samples using tts.tts
if speaker_wav:
# expand path to speaker_wav folder
speaker_wav = os.path.join(os.path.dirname(__file__), 'speaker_wav', speaker_wav)
log.info(f"Generating XTTS audio with speaker_wav: {speaker_wav}")
audio_list = XTTS_MODEL.tts(text=text, speaker_wav=speaker_wav, language=language)
else:
log.info(f"Generating XTTS audio with speaker: {speaker}")
audio_list = XTTS_MODEL.tts(text=text, speaker=speaker, language=language)
# Ensure audio_np is a numpy array and properly scaled
audio_np = np.array(audio_list, dtype=np.float32)# / (2**15-1)
# Log some info about the audio data
log.info(f"XTTS audio shape: {audio_np.shape}, dtype: {audio_np.dtype}, "
f"min: {audio_np.min():.4f}, max: {audio_np.max():.4f}")
# Resample from 24kHz to target sample rate with speedup factor
audio = resample_array(audio_np, xtts_sample_rate, target_sample_rate)
log.info(f"XTTS synthesis completed in {time.time() - start:.2f} seconds")
elif framework == 'zonos': elif framework == 'zonos':
pass pass
else: raise NotImplementedError('unknown framework') else: raise NotImplementedError('unknown framework')
@@ -82,7 +140,10 @@ async def synthesize_async(
target_sample_rate, target_sample_rate,
framework, framework,
model="en_US-lessac-medium", model="en_US-lessac-medium",
return_lc3=True return_lc3=True,
language="en",
speaker=None,
speaker_wav=None
): ):
""" """
Asynchronous version of the synthesize function that runs in a thread pool. Asynchronous version of the synthesize function that runs in a thread pool.
@@ -90,9 +151,12 @@ async def synthesize_async(
Args: Args:
text: Text to synthesize text: Text to synthesize
target_sample_rate: Target sample rate for the audio target_sample_rate: Target sample rate for the audio
framework: TTS framework to use (e.g., 'piper') framework: TTS framework to use (e.g., 'piper', 'xtts')
model: Model to use for synthesis model: Model to use for synthesis
return_lc3: Whether to return LC3-encoded audio return_lc3: Whether to return LC3-encoded audio
language: Language code (used by XTTS)
speaker: Speaker ID for XTTS
speaker_wav: Path to speaker sample for XTTS voice cloning
Returns: Returns:
LC3-encoded audio as string or raw audio as numpy array LC3-encoded audio as string or raw audio as numpy array
@@ -101,23 +165,53 @@ async def synthesize_async(
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
result = await loop.run_in_executor( result = await loop.run_in_executor(
None, None,
lambda: synthesize(text, target_sample_rate, framework, model, return_lc3) lambda: synthesize(
text,
target_sample_rate,
framework,
model,
return_lc3,
language,
speaker,
speaker_wav
)
) )
return result return result
if __name__ == '__main__': if __name__ == '__main__':
import logging
import soundfile as sf import soundfile as sf
logging.basicConfig( log.basicConfig(
level=logging.INFO, level=log.INFO,
format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s' format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
) )
target_rate=16000 target_rate = 16000
audio = synthesize('Hello World', target_rate, 'piper', model= 'de_DE-kerstin-low', return_lc3=False) # First, print available XTTS speakers
print("Available XTTS speakers:")
sf.write('hello.wav', audio, target_rate) print(XTTS_MODEL.speakers)
# Demo of Piper
print("Testing Piper TTS...")
audio_piper = synthesize('Hello World', target_rate, 'piper', model='de_DE-kerstin-low', return_lc3=False)
sf.write('hello_piper.wav', audio_piper, target_rate)
# Demo of XTTS with Annmarie Nele for German
speaker_wav = 'female.wav'
print(f"Testing XTTS with German language using speaker: {speaker_wav}")
text_to_synthesize = "Dies ist ein Test der XTTS Stimme auf Deutsch mit Annmarie Nele als Sprecherin."
audio_xtts = synthesize(
text=text_to_synthesize,
target_sample_rate=target_rate,
framework='xtts',
language='de',
speaker_wav=speaker_wav,
return_lc3=False
)
# Save the wav file
sf.write('hello_xtts_german.wav', audio_xtts, target_rate)
print('Done.') print('Done.')
+4 -1
View File
@@ -17,7 +17,10 @@ async def synthesize_speech(request: SynthesizeRequest):
target_sample_rate=request.target_sample_rate, target_sample_rate=request.target_sample_rate,
framework=request.framework, framework=request.framework,
model=request.model, model=request.model,
return_lc3=request.return_lc3 return_lc3=request.return_lc3,
language=request.language,
speaker=request.speaker,
speaker_wav=request.speaker_wav
) )
if request.return_lc3: if request.return_lc3:
+15 -6
View File
@@ -24,20 +24,29 @@ def resample_file(filename, out_filename, target_rate):
log.info("Resampling of %s took %s s", os.path.basename(filename), round(time.time() - start, 3)) log.info("Resampling of %s took %s s", os.path.basename(filename), round(time.time() - start, 3))
def resample_array(audio, rate, target_rate): def resample_array(audio, rate, target_rate, speedup=1.0):
start=time.time() start=time.time()
# Load the original audio file # Load the original audio file
if rate == target_rate: # Nothing to do if rate == target_rate and speedup == 1.0: # Nothing to do
log.info('audio already at target rate, skipping resample') log.info('audio already at target rate with no speedup, skipping resample')
return audio return audio
# Apply speedup if needed
if speedup != 1.0:
# When speeding up, we need to resample to a lower rate first
# This effectively shortens the audio duration
effective_orig_sr = rate * speedup
log.info(f"Applying speedup factor of {speedup}")
else:
effective_orig_sr = rate
# Convert the sample rate to target rate # Convert the sample rate to target rate
resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate) resampled_audio = librosa.resample(audio, orig_sr=effective_orig_sr, target_sr=target_rate)
# Save the resampled audio as a new .wav file # Save the resampled audio as a new .wav file
log.info("Resampling took %s s", round(time.time() - start, 3)) log.info("Resampling took %s s", round(time.time() - start, 3))
return resampled_audio return resampled_audio
@@ -45,4 +54,4 @@ if __name__ == "__main__":
import os import os
os.chdir(os.path.dirname(__file__)) os.chdir(os.path.dirname(__file__))
file_dir = '../text_to_speech/' file_dir = '../text_to_speech/'
resample_file(f'{file_dir}/welcome.wav', f'{file_dir}/welcome_resampled.wav') resample_file(f'{file_dir}/welcome.wav', f'{file_dir}/welcome_resampled.wav', 16000)
+3 -3
View File
@@ -4,9 +4,9 @@ import time
import os import os
import subprocess import subprocess
from multilang_translator.backend_controller.broadcaster_config import broadcaster_config, BROADCAST_CONFIG from auracast_translator.backend_controller.broadcaster_config import broadcaster_config, BROADCAST_CONFIG
from multilang_translator.backend_controller.broadcaster_play_once import broadcaster_play_file from auracast_translator.backend_controller.broadcaster_play_once import broadcaster_play_file
from multilang_translator.backend_controller.broadcaster_copy_files import copy_to_broadcaster from auracast_translator.backend_controller.broadcaster_copy_files import copy_to_broadcaster
log.basicConfig( log.basicConfig(
level=log.INFO, level=log.INFO,
+3 -3
View File
@@ -4,9 +4,9 @@ import time
import os import os
import subprocess import subprocess
from multilang_translator.translator_config import LANG_CONFIG from auracast_translator.translator_config import LANG_CONFIG
from multilang_translator.backend_controller.broadcaster_play_once import broadcaster_play_file from auracast_translator.backend_controller.broadcaster_play_once import broadcaster_play_file
from multilang_translator.backend_controller.broadcaster_copy_files import copy_to_broadcaster from auracast_translator.backend_controller.broadcaster_copy_files import copy_to_broadcaster
def test_config_broadcaster(ft_configure_broadcaster): def test_config_broadcaster(ft_configure_broadcaster):
+2 -2
View File
@@ -1,5 +1,5 @@
from multilang_translator.main_local import announcement_from_german_text from auracast_translator.main_local import announcement_from_german_text
from multilang_translator.translator import test_content from auracast_translator.translator import test_content
def test_announcement_from_german_text( def test_announcement_from_german_text(
+3 -3
View File
@@ -1,6 +1,6 @@
from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it from auracast_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it
from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW from auracast_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW
from multilang_translator.main_local import translate_from_german from auracast_translator.main_local import translate_from_german
import time import time
+1 -1
View File
@@ -1,4 +1,4 @@
from multilang_translator.text_to_speech.text_to_speech import synthesize from auracast_translator.text_to_speech.text_to_speech import synthesize
def test_synthesize(): def test_synthesize():
synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav") synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav")