diff --git a/multilang_translator/config.py b/multilang_translator/config.py deleted file mode 100644 index e850d5b..0000000 --- a/multilang_translator/config.py +++ /dev/null @@ -1,39 +0,0 @@ -import os - -ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') -VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv') -PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper' -FRAME_DUR_MS = 10 -SAMPLING_RATE_HZ = int(16e3) -BITRATE_BPS = int(32e3) - - -LANG_CONFIG = { - "de": { - "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_de.wav", - "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_de_resamp.wav", - "tts": 'de_DE-kerstin-low', - }, - "en": { - "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_en.wav", - "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_en_resamp.wav", - "tts": 'en_US-lessac-medium' - }, - "fr": { - "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_fr.wav", - "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_fr_resamp.wav", - "tts": 'fr_FR-siwis-medium' - }, - # "es": { - # "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_es.wav", - # "tts": 'es_ES-sharvard-medium' - # }, - # "it": { - # "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_it.wav", - # "tts": 'it_IT-paola-medium' - # } -} - -os.makedirs(ANNOUNCEMENT_DIR, exist_ok=True) - -# TODO. use dataclasses from Multicaster with inherit \ No newline at end of file diff --git a/multilang_translator/main_local.py b/multilang_translator/main_local.py index a9adc42..1b3687c 100644 --- a/multilang_translator/main_local.py +++ b/multilang_translator/main_local.py @@ -4,6 +4,7 @@ list prompt example """ from __future__ import print_function, unicode_literals +from typing import List from dataclasses import asdict import asyncio from copy import copy @@ -11,13 +12,12 @@ import time import logging as log import aioconsole +import multilang_translator.translator_config as translator_config from utils import resample from translator import llm_translator, test_content from text_to_speech import text_to_speech -from encode import encode_lc3 from auracast import multicast_control from auracast import auracast_config -from config import LANG_CONFIG, SAMPLING_RATE_HZ from translator.test_content import TESTSENTENCE # TODO: look for a end to end translation solution @@ -26,35 +26,34 @@ def transcribe(): pass # TODO: Implement transcribing input audio e.g. with whisper -def syntesize_resample(text, tts_model, file_wav, file_wav_resamp): - audio_dur = text_to_speech.synthesize(text, tts_model, file_wav) - resample.resample_file(file_wav, file_wav_resamp, target_rate=SAMPLING_RATE_HZ) - return audio_dur - - async def announcement_from_german_text( + global_config: auracast_config.AuracastGlobalConfig, + translator_config: List[translator_config.TranslatorConfigDe], caster: multicast_control.Multicaster, text_de ): - TRANSLATOR_LLM = 'llama3.2:3b-instruct-q4_0' + base_lang = "deu" - config = copy(LANG_CONFIG) - base_lang = "de" - - for i, d in enumerate(config.items()): - key, val = d - if key == base_lang: + for i, trans in enumerate(translator_config): + if trans.big.language == base_lang: text = text_de else: - text = llm_translator.translate_de_to_x(text_de, key, model=TRANSLATOR_LLM) + text = llm_translator.translate_de_to_x( + text_de, + trans.big.language, + model=trans.translator_llm, + client = trans.llm_client, + host=trans.llm_host_url, + token=trans.llm_host_token + ) log.info('%s', text) lc3_audio = text_to_speech.synthesize( text, - SAMPLING_RATE_HZ, - 'piper', - val['tts'], + global_config.auracast_sampling_rate_hz, + trans.tts_system, + trans.tts_model, return_lc3=True ) caster.big_conf[i].audio_source = lc3_audio @@ -65,7 +64,7 @@ async def announcement_from_german_text( log.info("Starting all broadcasts took %s s", round(time.time() - start, 3)) -async def command_line_ui(caster: multicast_control.Multicaster): +async def command_line_ui(global_conf, translator_conf, caster: multicast_control.Multicaster): while True: # make a list of all available testsentence sentence_list = list(asdict(TESTSENTENCE).values()) @@ -86,43 +85,55 @@ async def command_line_ui(caster: multicast_control.Multicaster): # Check if command is a single number elif command.strip().isdigit(): ind = int(command.strip()) - await announcement_from_german_text(caster, sentence_list[ind]) + await announcement_from_german_text( + global_conf, + translator_conf, + caster, + sentence_list[ind]) await asyncio.wait([caster.streamer.task]) # Interpret the command as announcement else: await announcement_from_german_text(caster, command) await asyncio.wait([caster.streamer.task]) + async def main(): log.basicConfig( level=log.INFO, format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s' ) - global_conf = auracast_config.global_base_config + global_conf = auracast_config.AuracastGlobalConfig() #global_conf.transport='serial:/dev/serial/by-id/usb-SEGGER_J-Link_001057705357-if02,1000000,rtscts' # transport for nrf54l15dk global_conf.transport='serial:/dev/serial/by-id/usb-ZEPHYR_Zephyr_HCI_UART_sample_81BD14B8D71B5662-if00,115200,rtscts' #nrf52dongle hci_uart usb cdc - big_conf = [ - auracast_config.broadcast_de, - auracast_config.broadcast_en, - auracast_config.broadcast_fr, + + translator_conf = [ + translator_config.TranslatorConfigDe(), + translator_config.TranslatorConfigEn(), + translator_config.TranslatorConfigFr(), #auracast_config.broadcast_es, #auracast_config.broadcast_it, ] - for i, conf in enumerate(big_conf): - conf.loop = False + for conf in translator_conf: + conf.big.loop = False + conf.llm_client = 'openwebui' # comment out for local llm + conf.llm_host_url = 'https://ollama.pstruebi.xyz' + conf.llm_host_token = 'sk-17124cb84df14cc6ab2d9e17d0724d13' - caster = multicast_control.Multicaster(global_conf, big_conf) + caster = multicast_control.Multicaster(global_conf, [conf.big for conf in translator_conf]) await caster.init_broadcast() - #await announcement_from_german_text(caster, test_content.TESTSENTENCE.DE_HELLO) - #await asyncio.wait([caster.streamer.task]) - await command_line_ui(caster) + # await announcement_from_german_text( + # global_conf, + # translator_conf, + # caster, + # test_content.TESTSENTENCE.DE_HELLO + # ) + # await asyncio.wait([caster.streamer.task]) + await command_line_ui(global_conf, translator_conf, caster) if __name__ == '__main__': asyncio.run(main()) - # TODO: integrate this in the LANG_CONFIG dict, better: make a hierachy of dataclasses - # TODO: remove the nececcity for files # TODO: add support for multiple radios \ No newline at end of file diff --git a/multilang_translator/text_to_speech/voices.json b/multilang_translator/text_to_speech/piper/voices.json similarity index 100% rename from multilang_translator/text_to_speech/voices.json rename to multilang_translator/text_to_speech/piper/voices.json diff --git a/multilang_translator/text_to_speech/text_to_speech.py b/multilang_translator/text_to_speech/text_to_speech.py index a2c231d..50ed861 100644 --- a/multilang_translator/text_to_speech/text_to_speech.py +++ b/multilang_translator/text_to_speech/text_to_speech.py @@ -4,28 +4,29 @@ import time import json import logging as log import numpy as np -from multilang_translator import config +from multilang_translator import translator_config from multilang_translator.utils.resample import resample_array from multilang_translator.text_to_speech import encode_lc3 TTS_DIR = os.path.join(os.path.dirname(__file__)) PIPER_DIR = f'{TTS_DIR}/piper' -os.makedirs(PIPER_DIR, exist_ok=True) -def synth_piper(text, model="en_US-lessac-medium",): +def synth_piper(text, model="en_US-lessac-medium"): + pwd = os.getcwd() + os.chdir(PIPER_DIR) start = time.time() + # make sure piper has voices.json in working directory, otherwise it attempts to always load models ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model - [config.PIPER_EXE_PATH, - '--cuda', - '--data-dir', PIPER_DIR, - '--download-dir', PIPER_DIR, - '--model', model, - '--output-raw' - ], + [translator_config.PIPER_EXE_PATH, + '--cuda', + '--model', model, + '--output-raw' + ], input=text.encode('utf-8'), capture_output=True ) + os.chdir(pwd) log.warning('Piper stderr:\n%s', ret.stderr) assert ret.returncode == 0, 'Piper returncode was not 0.' @@ -47,7 +48,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium", tts_sample_rate = model_json['audio']['sample_rate'] audio_np = np.frombuffer(audio_raw, dtype=np.dtype('