From f14902c6e7aeeddca28a2ae78045b0f0bfcf93db Mon Sep 17 00:00:00 2001 From: pstruebi Date: Thu, 6 Mar 2025 08:46:03 +0100 Subject: [PATCH] restructure the project --- .../multilang_translator}/__init__.py | 0 .../encode/encode_lc3.py | 0 .../multilang_translator}/main_local.py | 8 ++---- .../translator}/__init__.py | 0 .../translator/llm_translator.py | 0 .../translator/syspromts.py | 0 .../translator/test_content.py | 0 .../translator_config.py | 4 +-- .../voice_provider}/__init__.py | 0 .../voice_provider}/piper/voices.json | 0 .../voice_provider}/piper_welcome.sh | 0 .../voice_provider}/text_to_speech.py | 28 +++++++++++-------- .../voice_provider}/utils/__init__.py | 0 .../voice_provider/utils}/encode_lc3.py | 2 +- .../voice_provider}/utils/resample.py | 0 15 files changed, 22 insertions(+), 20 deletions(-) rename {multilang_translator => src/multilang_translator}/__init__.py (100%) rename {multilang_translator => src/multilang_translator}/encode/encode_lc3.py (100%) rename {multilang_translator => src/multilang_translator}/main_local.py (96%) rename {multilang_translator/text_to_speech => src/multilang_translator/translator}/__init__.py (100%) rename {multilang_translator => src/multilang_translator}/translator/llm_translator.py (100%) rename {multilang_translator => src/multilang_translator}/translator/syspromts.py (100%) rename {multilang_translator => src/multilang_translator}/translator/test_content.py (100%) rename {multilang_translator => src/multilang_translator}/translator_config.py (88%) rename {multilang_translator/translator => src/voice_provider}/__init__.py (100%) rename {multilang_translator/text_to_speech => src/voice_provider}/piper/voices.json (100%) rename {multilang_translator/text_to_speech => src/voice_provider}/piper_welcome.sh (100%) rename {multilang_translator/text_to_speech => src/voice_provider}/text_to_speech.py (77%) rename {multilang_translator => src/voice_provider}/utils/__init__.py (100%) rename {multilang_translator/text_to_speech => src/voice_provider/utils}/encode_lc3.py (98%) rename {multilang_translator => src/voice_provider}/utils/resample.py (100%) diff --git a/multilang_translator/__init__.py b/src/multilang_translator/__init__.py similarity index 100% rename from multilang_translator/__init__.py rename to src/multilang_translator/__init__.py diff --git a/multilang_translator/encode/encode_lc3.py b/src/multilang_translator/encode/encode_lc3.py similarity index 100% rename from multilang_translator/encode/encode_lc3.py rename to src/multilang_translator/encode/encode_lc3.py diff --git a/multilang_translator/main_local.py b/src/multilang_translator/main_local.py similarity index 96% rename from multilang_translator/main_local.py rename to src/multilang_translator/main_local.py index 1b3687c..3d87872 100644 --- a/multilang_translator/main_local.py +++ b/src/multilang_translator/main_local.py @@ -7,18 +7,16 @@ from __future__ import print_function, unicode_literals from typing import List from dataclasses import asdict import asyncio -from copy import copy import time import logging as log import aioconsole -import multilang_translator.translator_config as translator_config -from utils import resample -from translator import llm_translator, test_content -from text_to_speech import text_to_speech from auracast import multicast_control from auracast import auracast_config +import multilang_translator.translator_config as translator_config +from translator import llm_translator from translator.test_content import TESTSENTENCE +from voice_provider import text_to_speech # TODO: look for a end to end translation solution diff --git a/multilang_translator/text_to_speech/__init__.py b/src/multilang_translator/translator/__init__.py similarity index 100% rename from multilang_translator/text_to_speech/__init__.py rename to src/multilang_translator/translator/__init__.py diff --git a/multilang_translator/translator/llm_translator.py b/src/multilang_translator/translator/llm_translator.py similarity index 100% rename from multilang_translator/translator/llm_translator.py rename to src/multilang_translator/translator/llm_translator.py diff --git a/multilang_translator/translator/syspromts.py b/src/multilang_translator/translator/syspromts.py similarity index 100% rename from multilang_translator/translator/syspromts.py rename to src/multilang_translator/translator/syspromts.py diff --git a/multilang_translator/translator/test_content.py b/src/multilang_translator/translator/test_content.py similarity index 100% rename from multilang_translator/translator/test_content.py rename to src/multilang_translator/translator/test_content.py diff --git a/multilang_translator/translator_config.py b/src/multilang_translator/translator_config.py similarity index 88% rename from multilang_translator/translator_config.py rename to src/multilang_translator/translator_config.py index 4a1b112..d164bfc 100644 --- a/multilang_translator/translator_config.py +++ b/src/multilang_translator/translator_config.py @@ -2,9 +2,7 @@ import os from pydantic import BaseModel from auracast import auracast_config -ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') -VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv') -PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper' +VENV_DIR = os.path.join(os.path.dirname(__file__), './../../venv') class TranslatorBaseconfig(BaseModel): big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe() diff --git a/multilang_translator/translator/__init__.py b/src/voice_provider/__init__.py similarity index 100% rename from multilang_translator/translator/__init__.py rename to src/voice_provider/__init__.py diff --git a/multilang_translator/text_to_speech/piper/voices.json b/src/voice_provider/piper/voices.json similarity index 100% rename from multilang_translator/text_to_speech/piper/voices.json rename to src/voice_provider/piper/voices.json diff --git a/multilang_translator/text_to_speech/piper_welcome.sh b/src/voice_provider/piper_welcome.sh similarity index 100% rename from multilang_translator/text_to_speech/piper_welcome.sh rename to src/voice_provider/piper_welcome.sh diff --git a/multilang_translator/text_to_speech/text_to_speech.py b/src/voice_provider/text_to_speech.py similarity index 77% rename from multilang_translator/text_to_speech/text_to_speech.py rename to src/voice_provider/text_to_speech.py index 50ed861..4da5618 100644 --- a/multilang_translator/text_to_speech/text_to_speech.py +++ b/src/voice_provider/text_to_speech.py @@ -1,27 +1,33 @@ import os +import shutil import subprocess import time import json import logging as log import numpy as np -from multilang_translator import translator_config -from multilang_translator.utils.resample import resample_array -from multilang_translator.text_to_speech import encode_lc3 +from voice_provider.utils.resample import resample_array +from voice_provider.utils.encode_lc3 import encode_lc3 + +PIPER_EXE = shutil.which('piper') TTS_DIR = os.path.join(os.path.dirname(__file__)) -PIPER_DIR = f'{TTS_DIR}/piper' +PIPER_WORKDIR = f'{TTS_DIR}/piper' + +if not PIPER_EXE: + PIPER_EXE = f'{TTS_DIR}/../../venv/bin/piper' def synth_piper(text, model="en_US-lessac-medium"): pwd = os.getcwd() - os.chdir(PIPER_DIR) + os.chdir(PIPER_WORKDIR) start = time.time() # make sure piper has voices.json in working directory, otherwise it attempts to always load models ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model - [translator_config.PIPER_EXE_PATH, - '--cuda', - '--model', model, - '--output-raw' + [ + PIPER_EXE, + '--cuda', + '--model', model, + '--output-raw' ], input=text.encode('utf-8'), capture_output=True @@ -34,7 +40,7 @@ def synth_piper(text, model="en_US-lessac-medium"): log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3)) - with open (f'{PIPER_DIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently + with open (f'{PIPER_WORKDIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently model_json = json.load(f) return model_json, audio @@ -59,7 +65,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium", if return_lc3: audio_pcm = (audio * 2**15-1).astype(np.int16) - lc3 = encode_lc3.encode(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter + lc3 = encode_lc3(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter return lc3 else: return audio diff --git a/multilang_translator/utils/__init__.py b/src/voice_provider/utils/__init__.py similarity index 100% rename from multilang_translator/utils/__init__.py rename to src/voice_provider/utils/__init__.py diff --git a/multilang_translator/text_to_speech/encode_lc3.py b/src/voice_provider/utils/encode_lc3.py similarity index 98% rename from multilang_translator/text_to_speech/encode_lc3.py rename to src/voice_provider/utils/encode_lc3.py index c120771..57e3c2e 100644 --- a/multilang_translator/text_to_speech/encode_lc3.py +++ b/src/voice_provider/utils/encode_lc3.py @@ -1,7 +1,7 @@ import numpy as np import lc3 -def encode( +def encode_lc3( audio: np.array, output_sample_rate_hz, octets_per_frame, diff --git a/multilang_translator/utils/resample.py b/src/voice_provider/utils/resample.py similarity index 100% rename from multilang_translator/utils/resample.py rename to src/voice_provider/utils/resample.py