diff --git a/multilang_translator/config.py b/multilang_translator/config.py index e54ca61..e850d5b 100644 --- a/multilang_translator/config.py +++ b/multilang_translator/config.py @@ -35,3 +35,5 @@ LANG_CONFIG = { } os.makedirs(ANNOUNCEMENT_DIR, exist_ok=True) + +# TODO. use dataclasses from Multicaster with inherit \ No newline at end of file diff --git a/multilang_translator/main.py b/multilang_translator/main_local.py similarity index 100% rename from multilang_translator/main.py rename to multilang_translator/main_local.py diff --git a/multilang_translator/text_to_speech/text_to_speech.py b/multilang_translator/text_to_speech/text_to_speech.py index 5ce3dde..a2c231d 100644 --- a/multilang_translator/text_to_speech/text_to_speech.py +++ b/multilang_translator/text_to_speech/text_to_speech.py @@ -9,17 +9,20 @@ from multilang_translator.utils.resample import resample_array from multilang_translator.text_to_speech import encode_lc3 TTS_DIR = os.path.join(os.path.dirname(__file__)) +PIPER_DIR = f'{TTS_DIR}/piper' +os.makedirs(PIPER_DIR, exist_ok=True) def synth_piper(text, model="en_US-lessac-medium",): - pwd = os.getcwd() - os.chdir(TTS_DIR) start = time.time() ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model [config.PIPER_EXE_PATH, '--cuda', + '--data-dir', PIPER_DIR, + '--download-dir', PIPER_DIR, '--model', model, - '--output-raw'], + '--output-raw' + ], input=text.encode('utf-8'), capture_output=True ) @@ -30,15 +33,13 @@ def synth_piper(text, model="en_US-lessac-medium",): log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3)) - with open (f'{model}.onnx.json') as f: # TODO: wrap everything into a class, store the json permanently + with open (f'{PIPER_DIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently model_json = json.load(f) - os.chdir(pwd) return model_json, audio # TODO: framework should probably be a dataclass that holds all the relevant informations, also model -# TODO: make a common repo that hold the configuration dataclasses ? def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium", return_lc3=True): if framework == 'piper': @@ -74,7 +75,7 @@ if __name__ == '__main__': ) target_rate=16000 - audio = synthesize('Hello World', target_rate, 'piper', model= 'de_DE-kerstin-low', encode_lc3=False) + audio = synthesize('Hello World', target_rate, 'piper', model= 'de_DE-kerstin-low', return_lc3=False) sf.write('hello.wav', audio, target_rate) diff --git a/tests/test_system.py b/tests/test_system.py index 6dac681..dab0027 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -1,4 +1,4 @@ -from multilang_translator.main import announcement_from_german_text +from multilang_translator.main_local import announcement_from_german_text from multilang_translator.translator import test_content diff --git a/tests/test_translator.py b/tests/test_translator.py index 77b2d5c..04b7312 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,6 +1,6 @@ from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW -from multilang_translator.main import translate_from_german +from multilang_translator.main_local import translate_from_german import time