diff --git a/multilang_translator/encode/encode_lc3.py b/multilang_translator/encode/encode_lc3.py index c823437..49c7cb4 100644 --- a/multilang_translator/encode/encode_lc3.py +++ b/multilang_translator/encode/encode_lc3.py @@ -1,10 +1,12 @@ import subprocess +import logging as log - -def encode_lc3(file): +def encode_lc3(file, frame_dur_ms=10, bps=48000): file = file.replace('.wav', '') - ret = subprocess.run(['elc3', '-b', '48000', f'{file}.wav', f'{file}.lc3'], check=True) + cmd = ['elc3', '-m', f'{frame_dur_ms}' , '-b', f'{bps}', f'{file}.wav', f'{file}.lc3'] + log.info("Executing: %s", " ".join(cmd)) + ret = subprocess.run(cmd, check=True) return ret.returncode, ret.stdout, ret.stderr diff --git a/multilang_translator/main.py b/multilang_translator/main.py index 3d42b43..f1c6087 100644 --- a/multilang_translator/main.py +++ b/multilang_translator/main.py @@ -11,63 +11,67 @@ from examples import custom_style_2 import os -from text_to_speech import text_to_speech, resample -from translator import llm_translator -from backend_controller.broadcaster_config import broadcaster_config -from backend_controller.broadcaster_play_once import broadcaster_play_file -from backend_controller.broadcaster_copy_files import copy_to_broadcaster +from .translator import llm_translator +from .text_to_speech import text_to_speech, resample +from .backend_controller.broadcaster_config import broadcaster_config +from .backend_controller.broadcaster_play_once import broadcaster_play_file +from .backend_controller.broadcaster_copy_files import copy_to_broadcaster -from encode import encode_lc3 +from .encode import encode_lc3 ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') -N_MAX_BIS = 5 +N_MAX_BIS = 4 +SAMPLING_RATE = int(8e3) +FRAME_DUR_MS = 10 +BPS = int(16e3) +FILENAMES = { + "de": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE//1000}_{FRAME_DUR_MS}_{BPS//1000}_de", + "en": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE//1000}_{FRAME_DUR_MS}_{BPS//1000}_en", + "fr": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE//1000}_{FRAME_DUR_MS}_{BPS//1000}_fr", + "es": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE//1000}_{FRAME_DUR_MS}_{BPS//1000}_es", + "it": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE//1000}_{FRAME_DUR_MS}_{BPS//1000}_it", +} os.makedirs(ANNOUNCEMENT_DIR, exist_ok=True) def synthesize_resample_encode(text, tts_model, output_file): text_to_speech.synthesize(text, tts_model, output_file) - resample.resample(output_file, output_file) - encode_lc3.encode_lc3(output_file) + resample.resample(output_file, output_file, target_rate=SAMPLING_RATE) + encode_lc3.encode_lc3(output_file, bps=BPS, frame_dur_ms=FRAME_DUR_MS) + +def translate_from_german_and_encode(text_de): + file = FILENAMES['de'] + synthesize_resample_encode(text_de, 'de_DE-kerstin-low', f'{file}.wav') + + text_en = llm_translator.translator_de_en(text_de) + file = FILENAMES['en'] + synthesize_resample_encode(text_en, 'en_US-lessac-medium', f'{file}.wav') + + text_fr = llm_translator.translator_de_fr(text_de) + file = FILENAMES['fr'] + synthesize_resample_encode(text_fr, 'fr_FR-siwis-medium', f'{file}.wav') + + text_es = llm_translator.translator_de_es(text_de) + file = FILENAMES['es'] + synthesize_resample_encode(text_es, 'es_ES-sharvard-medium', f'{file}.wav') + + text_it = llm_translator.translator_de_it(text_de) + file = FILENAMES['it'] + synthesize_resample_encode(text_it, 'it_IT-paola-medium', f'{file}.wav') def announcement_from_german_text(text_de): - files = { - "de": f"{ANNOUNCEMENT_DIR}/announcement_de", - "en": f"{ANNOUNCEMENT_DIR}/announcement_en", - # "fr": f"{ANNOUNCEMENT_DIR}/announcement_fr", - # "es": f"{ANNOUNCEMENT_DIR}/announcement_es", - "it": f"{ANNOUNCEMENT_DIR}/announcement_it", - } - - file = files['de'] - synthesize_resample_encode(text_de, 'de_DE-kerstin-low', f'{file}.wav') - - text_en = llm_translator.translator_de_en(text_de) - file = files['en'] - synthesize_resample_encode(text_en, 'en_US-lessac-medium', f'{file}.wav') - - # text_fr = llm_translator.translator_de_fr(text_de) - # file = files['fr'] - # synthesize_resample_encode(text_fr, 'fr_FR-siwis-medium', f'{file}.wav') - - # text_es = llm_translator.translator_de_es(text_de) - # file = files['es'] - # synthesize_resample_encode(text_es, 'es_ES-sharvard-medium', f'{file}.wav') - - text_it = llm_translator.translator_de_it(text_de) - file = files['it'] - synthesize_resample_encode(text_it, 'it_IT-paola-medium', f'{file}.wav') - + translate_from_german_and_encode(text_de) # Transfer the files to broadcaster memory start = time.time() - for val in files.values(): + for val in FILENAMES.values(): copy_to_broadcaster(f'{val}.lc3') log.info("Transfering files to broadcaster took %s s", round(time.time() - start, 3)) # Instruct the broadcaster to stream the files - for i, val in enumerate(list(files.values())[:N_MAX_BIS]): + for i, val in enumerate(list(FILENAMES.values())[:N_MAX_BIS]): time.sleep(1) broadcaster_play_file(i, f'{os.path.basename(val)}.lc3') log.info("Starting all broadcasts %s s", round(time.time() - start, 3)) diff --git a/multilang_translator/text_to_speech/__init__.py b/multilang_translator/text_to_speech/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_translator.py b/tests/test_translator.py index cb94f7a..1d9fab0 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,5 +1,8 @@ from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW +from multilang_translator.main import translate_from_german_and_encode + + import time import logging as log @@ -21,4 +24,8 @@ def test_translator(): start=time.time() response = translator_de_it(TESTSENTENCE_DE_RAINBOW) log.info("Second query took %s", time.time() - start) - log.info(response) \ No newline at end of file + log.info(response) + +def test_translate_from_german_and_encode(): + + translate_from_german_and_encode(TESTSENTENCE_DE_RAINBOW)