From b86b969240bdeed658e13f04dcf5fd3d1bae2493 Mon Sep 17 00:00:00 2001 From: pstruebi Date: Wed, 5 Mar 2025 17:39:58 +0100 Subject: [PATCH] fix broken resampling and piper always loading models --- multilang_translator/main_local.py | 16 ++++++++-------- .../text_to_speech/{ => piper}/voices.json | 0 .../text_to_speech/text_to_speech.py | 5 ++--- multilang_translator/translator/test_content.py | 3 +-- multilang_translator/translator_config.py | 4 ++-- multilang_translator/utils/resample.py | 4 ++-- pyproject.toml | 3 ++- 7 files changed, 17 insertions(+), 18 deletions(-) rename multilang_translator/text_to_speech/{ => piper}/voices.json (100%) diff --git a/multilang_translator/main_local.py b/multilang_translator/main_local.py index e59216f..0a0a91a 100644 --- a/multilang_translator/main_local.py +++ b/multilang_translator/main_local.py @@ -125,14 +125,14 @@ async def main(): caster = multicast_control.Multicaster(global_conf, [conf.big for conf in translator_conf]) await caster.init_broadcast() - await announcement_from_german_text( - global_conf, - translator_conf, - caster, - test_content.TESTSENTENCE.DE_HELLO - ) - await asyncio.wait([caster.streamer.task]) - #await command_line_ui(global_conf, translator_conf, caster) + # await announcement_from_german_text( + # global_conf, + # translator_conf, + # caster, + # test_content.TESTSENTENCE.DE_HELLO + # ) + # await asyncio.wait([caster.streamer.task]) + await command_line_ui(global_conf, translator_conf, caster) if __name__ == '__main__': asyncio.run(main()) diff --git a/multilang_translator/text_to_speech/voices.json b/multilang_translator/text_to_speech/piper/voices.json similarity index 100% rename from multilang_translator/text_to_speech/voices.json rename to multilang_translator/text_to_speech/piper/voices.json diff --git a/multilang_translator/text_to_speech/text_to_speech.py b/multilang_translator/text_to_speech/text_to_speech.py index 0b777e3..15f3fe1 100644 --- a/multilang_translator/text_to_speech/text_to_speech.py +++ b/multilang_translator/text_to_speech/text_to_speech.py @@ -10,7 +10,6 @@ from multilang_translator.text_to_speech import encode_lc3 TTS_DIR = os.path.join(os.path.dirname(__file__)) PIPER_DIR = f'{TTS_DIR}/piper' -os.makedirs(PIPER_DIR, exist_ok=True) def synth_piper(text, model="en_US-lessac-medium"): pwd = os.getcwd() @@ -51,7 +50,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium", tts_sample_rate = model_json['audio']['sample_rate'] audio_np = np.frombuffer(audio_raw, dtype=np.dtype('