From a9acfd2d2cdfbe37d70b4f2231508f355ebcf870 Mon Sep 17 00:00:00 2001 From: pstruebi Date: Tue, 25 Feb 2025 13:32:37 +0100 Subject: [PATCH] use_bumble (#1) Adapt the project to use the bumble auracaster Reviewed-on: https://gitea.pstruebi.xyz/auracaster/multilang-translator-local/pulls/1 --- .gitignore | 3 +- .../backend_controller/broadcaster_config.py | 107 ------------- .../broadcaster_copy_files.py | 7 - .../broadcaster_play_once.py | 28 ---- multilang_translator/config.py | 23 ++- multilang_translator/main.py | 144 +++++++++++------- .../text_to_speech/resample.py | 18 --- .../text_to_speech/text_to_speech.py | 34 +++-- .../translator/llm_translator.py | 59 ++++--- multilang_translator/translator/syspromts.py | 8 +- .../translator/test_content.py | 7 +- .../{backend_controller => utils}/__init__.py | 0 multilang_translator/utils/resample.py | 27 ++++ pyproject.toml | 25 ++- tests/test_translator.py | 4 +- 15 files changed, 219 insertions(+), 275 deletions(-) delete mode 100644 multilang_translator/backend_controller/broadcaster_config.py delete mode 100644 multilang_translator/backend_controller/broadcaster_copy_files.py delete mode 100644 multilang_translator/backend_controller/broadcaster_play_once.py delete mode 100644 multilang_translator/text_to_speech/resample.py rename multilang_translator/{backend_controller => utils}/__init__.py (100%) create mode 100644 multilang_translator/utils/resample.py diff --git a/.gitignore b/.gitignore index f2fb077..fb8d830 100644 --- a/.gitignore +++ b/.gitignore @@ -4,5 +4,6 @@ *.onnx *.onnx.json -venv/ +*.egg-info +venv*/ text_to_speech/models \ No newline at end of file diff --git a/multilang_translator/backend_controller/broadcaster_config.py b/multilang_translator/backend_controller/broadcaster_config.py deleted file mode 100644 index 71a7c66..0000000 --- a/multilang_translator/backend_controller/broadcaster_config.py +++ /dev/null @@ -1,107 +0,0 @@ -import time -import logging as log -import os -import serial -from ..config import SAMPLING_RATE_HZ, LANG_CONFIG - -PRESET = f'{SAMPLING_RATE_HZ//1000}_2_1' -BROADCAST_CONFIG = {} - -for i, d in enumerate(list(LANG_CONFIG.items())): - key, val = d - BROADCAST_CONFIG[i] = os.path.basename(val["file"]) - - -def write_to_serial_read_respone(port, cmd, timeout = 2): - # Initialize serial connection - ser = serial.Serial(timeout = timeout) - ser.port = port - ser.baudrate = 115200 - ser.bytesize = serial.EIGHTBITS - ser.parity = serial.PARITY_NONE - ser.stopbits = serial.STOPBITS_ONE - - try: - # Try to open the serial connection - #if not ser.is_open: - ser.open() - - # Send string to serial port and get response - command = f"{cmd.strip()}\r\n" - ser.write(command.encode()) - time.sleep(1) # wait a bit for response - - readlines = [] - for _ in range(20): - line = ser.readline().decode('utf-8').strip() - if not line: - ser.close() - break - else: - readlines.append(line) - - except serial.SerialException as e: - print(f"Error communicating with serial port: {e}") - finally: - # Close serial connection before returning - if ser.is_open: - ser.close() - return readlines - - - -def gen_broadcast_config_cmd(preset, broadcast_config: dict): - """ - Writes broadcaster configuration to the given serial port. - - Args: - serial_port (str): Device path of the serial port (e.g., '/dev/ttyACM0') - preset (str): Preset string used in nac preset line - broadcast_names (list): List of names for each broadcast group - """ - cmds = [] - for ch, file_name in broadcast_config.items(): - - cmds.append(f"nac preset {preset} {ch}") - cmds.append(f"nac broadcast_name broadcast{ch} {ch}") - cmds.append(f"nac file select_play_once {file_name}.lc3 {ch} 0 0") - cmds.append(f"nac num_bises 1 {ch} 0") - - return cmds - -# TODO: Advertising interval wird ungelmäßig bei mehr als 3 broadcasts 10ms -> 1s< bei 24kHz sampling rate - -def broadcaster_config(): - - import subprocess - - PORT = "/dev/ttyACM0" - - total_ret= "" - - cmds = gen_broadcast_config_cmd(PRESET, BROADCAST_CONFIG) - - subprocess.run(["nrfjprog", "--reset", "-s", "1050109484"], check=True) - - time.sleep(2) - ret = write_to_serial_read_respone(PORT, f"nac en_usb_mass", timeout=0.1) - total_ret += "\n".join(ret) - log.info("\n".join(ret)) - time.sleep(1) - - - for cmd in cmds: - ret = write_to_serial_read_respone(PORT, cmd, timeout=0.1) - log.info("\n".join(ret)) - total_ret += "\n".join(ret) - - time.sleep(1) - - for i in BROADCAST_CONFIG.keys(): - ret = write_to_serial_read_respone(PORT, f"nac start_idx {i}", timeout=0.1) - total_ret += "\n".join(ret) - log.info("\n".join(ret)) - time.sleep(0.2) - - return total_ret - diff --git a/multilang_translator/backend_controller/broadcaster_copy_files.py b/multilang_translator/backend_controller/broadcaster_copy_files.py deleted file mode 100644 index 83e48c9..0000000 --- a/multilang_translator/backend_controller/broadcaster_copy_files.py +++ /dev/null @@ -1,7 +0,0 @@ -import shutil -import os -BROADCASTER_DEFAULT_DIR = '/media/pstruebi/2C93-FED6' - -def copy_to_broadcaster(filepath, broadcaster_dir = BROADCASTER_DEFAULT_DIR): - filename = os.path.basename(filepath) - shutil.copy(filepath, f'{broadcaster_dir}/{filename}') \ No newline at end of file diff --git a/multilang_translator/backend_controller/broadcaster_play_once.py b/multilang_translator/backend_controller/broadcaster_play_once.py deleted file mode 100644 index f5e0d45..0000000 --- a/multilang_translator/backend_controller/broadcaster_play_once.py +++ /dev/null @@ -1,28 +0,0 @@ -#import broadcaster_config -from .broadcaster_config import write_to_serial_read_respone -import time -import logging as log - -def broadcaster_play_file(broadcast_ch, file, wait_after_stop = 1): - serial_port = "/dev/ttyACM0" - - ret_all_str = "" - for i in range(3): - ret = write_to_serial_read_respone(serial_port, f"nac file stream_close {broadcast_ch} 0 0", timeout=0.1) - if wait_after_stop is not None: - time.sleep(wait_after_stop) - ret += "\n" - ret += write_to_serial_read_respone(serial_port, f"nac file select_play_once {file} {broadcast_ch} 0 0", timeout=0.1) - - ret = "\n".join(ret) - ret_all_str += ret - - if (not "Failed" in ret) and (not "err" in ret): - log.info("Breaking after %s retries.", i) - log.info(ret_all_str) - return ret - - log.error("Failed to play file after 3 retries.") - log.error(ret_all_str) - return ret_all_str - \ No newline at end of file diff --git a/multilang_translator/config.py b/multilang_translator/config.py index 651f384..806dbef 100644 --- a/multilang_translator/config.py +++ b/multilang_translator/config.py @@ -1,28 +1,37 @@ import os ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') -SAMPLING_RATE_HZ = int(16e3) +VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv') +PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper' FRAME_DUR_MS = 10 -BITRATE_BPS = int(32e3) # TODO: test 16khz 16kbps +SAMPLING_RATE_HZ = int(16e3) +BITRATE_BPS = int(32e3) + +def mk_filename_lc3(lang=''): + return f"{ANNOUNCEMENT_DIR}/announcement_{lang}_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}.lc3" + LANG_CONFIG = { "de": { - "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_de", + "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_de.wav", + "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_de_resamp.wav", "tts": 'de_DE-kerstin-low', }, "en": { - "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_en", + "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_en.wav", + "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_en_resamp.wav", "tts": 'en_US-lessac-medium' }, "fr": { - "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_fr", + "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_fr.wav", + "filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_fr_resamp.wav", "tts": 'fr_FR-siwis-medium' }, # "es": { - # "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_es", + # "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_es.wav", # "tts": 'es_ES-sharvard-medium' # }, # "it": { - # "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_it", + # "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_it.wav", # "tts": 'it_IT-paola-medium' # } } diff --git a/multilang_translator/main.py b/multilang_translator/main.py index df7d2a9..f52d589 100644 --- a/multilang_translator/main.py +++ b/multilang_translator/main.py @@ -4,80 +4,112 @@ list prompt example """ from __future__ import print_function, unicode_literals -from pprint import pprint - -from PyInquirer import prompt, Separator -from examples import custom_style_2 - - -import os +import asyncio from copy import copy import time import logging as log -from .translator import llm_translator -from .text_to_speech import text_to_speech, resample -from .backend_controller.broadcaster_config import broadcaster_config -from .backend_controller.broadcaster_play_once import broadcaster_play_file -from .backend_controller.broadcaster_copy_files import copy_to_broadcaster -from .encode import encode_lc3 +import aioconsole -from .config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS +from utils import resample +from translator import llm_translator, test_content +from text_to_speech import text_to_speech +from encode import encode_lc3 +from auracast import multicast_control +from auracast import auracast_config +from config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS + +# TODO: look for a end to end translation solution + +def transcribe(): + pass # TODO: Implement transcribing input audio e.g. with whisper -def synthesize_resample_encode(text, tts_model, output_file): - audio_dur = text_to_speech.synthesize(text, tts_model, output_file) - resample.resample(output_file, output_file, target_rate=SAMPLING_RATE_HZ) - encode_lc3.encode_lc3(output_file, bps=BITRATE_BPS, frame_dur_ms=FRAME_DUR_MS) +def syntesize_resample(text, tts_model, file_wav, file_wav_resamp): + audio_dur = text_to_speech.synthesize(text, tts_model, file_wav) + resample.resample_file(file_wav, file_wav_resamp, target_rate=SAMPLING_RATE_HZ) return audio_dur -def translate_from_german_and_encode(text_de): + +def translate_from_german(text_de, model): config = copy(LANG_CONFIG) base_lang = "de" - file = config[base_lang]["file"] - audio_dur_s = {} - audio_dur_s [base_lang] = synthesize_resample_encode(text_de, config['de']["tts"], f'{file}.wav') - - del config[base_lang] + file = config[base_lang]["filepath_wav"] + file_resamp = config[base_lang]['filepath_wav_resamp'] + tts_json = {} for key, val in config.items(): - text = llm_translator.translate_de_to_x(key, text_de) - file = val['file'] - audio_dur_s[key] = synthesize_resample_encode(text, val['tts'], f'{file}.wav') - return audio_dur_s + if key == base_lang: + text = text_de + else: + text = llm_translator.translate_de_to_x(text_de, key, model=model) + + log.info('%s', text) + file = val['filepath_wav'] + file_resamp = val['filepath_wav_resamp'] + tts_json[key] = syntesize_resample(text, val['tts'], file, file_resamp) -def announcement_from_german_text(text_de): + return tts_json + + +async def announcement_from_german_text(caster:multicast_control.Multicaster, text_de): + + tts_json = translate_from_german(text_de, model = 'llama3.2:3b-instruct-q4_0') - audio_durs = translate_from_german_and_encode(text_de) - # Transfer the files to broadcaster memory start = time.time() - for val in LANG_CONFIG.values(): - copy_to_broadcaster(f'{val["file"]}.lc3') - log.info("Transfering files to broadcaster took %s s", round(time.time() - start, 3)) + await caster.init_audio() + caster.start_streaming() - time.sleep(2) + log.info("Starting all broadcasts took %s s", round(time.time() - start, 3)) - # Instruct the broadcaster to stream the files - for i, d in enumerate(list(LANG_CONFIG.items())): - key, val = d - broadcaster_play_file(i, f'{os.path.basename(val["file"])}.lc3') - time.sleep(audio_durs[key]) - log.info("Starting all broadcasts %s s", round(time.time() - start, 3)) - -# questions = [ -# { -# 'type': 'list', -# 'name': 'theme', -# 'message': 'What type of annoucement would you like to make?', -# 'choices': [ -# 'predefined', -# 'custom', -# 'audio' -# ] -# }, -# ] +async def command_line_ui(caster: multicast_control.Multicaster): + while True: + command = await aioconsole.ainput("\nEnter your Announcement|quit] > ") + + if command.strip().lower() == "quit": + print("👋 Exiting...") + if caster.device: + caster.stop_streaming() + await caster.shutdown() + break # Exit loop + # TODO: Implement predefined announcements -# answers = prompt(questions, style=custom_style_2) -# pprint(answers) + elif command.strip() == '': + print('Nothing to Announce') + else: + await announcement_from_german_text(caster, command) + +async def main(): + log.basicConfig( + level=log.INFO, + format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s' + ) + + global_conf = auracast_config.global_base_config + #global_conf.transport='serial:/dev/serial/by-id/usb-SEGGER_J-Link_001057705357-if02,1000000,rtscts' # transport for nrf54l15dk + global_conf.transport='serial:/dev/serial/by-id/usb-ZEPHYR_Zephyr_HCI_UART_sample_81BD14B8D71B5662-if00,115200,rtscts' #nrf52dongle hci_uart usb cdc + + big_conf = [ # TODO: integrate this in the LANG_CONFIG dict, better: make a hirachry of dataclasses + auracast_config.broadcast_de, + auracast_config.broadcast_en, + auracast_config.broadcast_fr, + #auracast_config.broadcast_es, + #auracast_config.broadcast_it, + ] + files = [v['filepath_wav_resamp'] for v in LANG_CONFIG.values()] + for i, conf in enumerate(big_conf): + conf.loop_wav = False + conf.audio_source = f'file:{files[i]}' + + caster = multicast_control.Multicaster(global_conf, big_conf) + await caster.init_broadcast() + + #await announcement_from_german_text(caster, test_content.TESTSENTENCE_DE_HELLO) + + await command_line_ui(caster) + #await asyncio.wait([caster.streamer.task]) + +if __name__ == '__main__': + asyncio.run(main()) \ No newline at end of file diff --git a/multilang_translator/text_to_speech/resample.py b/multilang_translator/text_to_speech/resample.py deleted file mode 100644 index 384d737..0000000 --- a/multilang_translator/text_to_speech/resample.py +++ /dev/null @@ -1,18 +0,0 @@ -# resample .wave from 22.05 to 24kHz sampling rate - -import librosa -import soundfile as sf - - -def resample(filename, out_filename, target_rate=int(24e3)): - # Load the original audio file - audio, rate = librosa.load(filename) - - # Convert the sample rate to 24 kHz - resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate) - - # Save the resampled audio as a new .wav file - sf.write(out_filename, resampled_audio, target_rate) - -if __name__ == "__main__": - resample('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav') diff --git a/multilang_translator/text_to_speech/text_to_speech.py b/multilang_translator/text_to_speech/text_to_speech.py index 6eb9134..d5895ba 100644 --- a/multilang_translator/text_to_speech/text_to_speech.py +++ b/multilang_translator/text_to_speech/text_to_speech.py @@ -1,8 +1,9 @@ import os import subprocess import time +import json import logging as log -import wave +from multilang_translator import config TTS_DIR = os.path.join(os.path.dirname(__file__)) @@ -11,16 +12,31 @@ def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"): pwd = os.getcwd() os.chdir(TTS_DIR) start = time.time() - ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True) + + ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent instance of the model + [config.PIPER_EXE_PATH, '--model', model, '--output_file', output_file], + input=text.encode('utf-8'), + capture_output=True + ) + log.info('%s', ret.stdout) + log.info('%s', ret.stderr) - with wave.open(output_file, "rb") as wf: - frames = wf.getnframes() - rate = wf.getframerate() + assert ret.returncode == 0, 'Piper returncode was not 0.' - length_in_seconds = round(frames / rate, 1) - log.info(f"Audio length: {length_in_seconds} s") + log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3)) + + with open (f'{model}.onnx.json') as f: # TODO: wrap everything into a class, store the json permanentl + model_json = json.load(f) os.chdir(pwd) - log.info("Running piper took %s s", round(time.time() - start, 3)) + return model_json - return length_in_seconds +if __name__ == '__main__': + import logging + + logging.basicConfig( + level=logging.INFO, + format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s' + ) + + synthesize('Hello World') diff --git a/multilang_translator/translator/llm_translator.py b/multilang_translator/translator/llm_translator.py index ff7e417..07e52b0 100644 --- a/multilang_translator/translator/llm_translator.py +++ b/multilang_translator/translator/llm_translator.py @@ -1,10 +1,18 @@ +import time import requests import json import logging as log import time +import ollama -from . import credentials -from . import syspromts +from multilang_translator.translator import credentials +from multilang_translator.translator import syspromts +from multilang_translator.translator import test_content + +# ollama.create( # TODO: create models on startup +# model='example', +# from_='llama3.2', system="You are Mario from Super Mario Bros." +# ) def query_model(model, query): url = f'{credentials.BASE_URL}/api/chat/completions' @@ -21,42 +29,33 @@ def query_model(model, query): return response.json() -def translate_de_to_x(target_language: str, text:str, model ='llama3.2:3b-instruct-q4_0'): +def translate_de_to_x(text:str, target_language: str, model='llama3.2:3b-instruct-q4_0'): # remember to use instruct models + start=time.time() s = getattr(syspromts, f"TRANSLATOR_DE_{target_language.upper()}") - return query_model(model, s + text)['choices'][0]['message']['content'] - - -def translator_de_en(query): - MODEL = 'llama3.2:3b-instruct-q4_0' - #MODEL = 'llama3.1:8b-instruct-q4_0' - return query_model(MODEL, syspromts.TRANSLATOR_DE_EN + query)['choices'][0]['message']['content'] - -def translator_de_fr(query): - MODEL = 'llama3.2:3b-instruct-q4_0' - return query_model(MODEL, syspromts.TRANSLATOR_DE_FR + query)['choices'][0]['message']['content'] - -def translator_de_es(query): - MODEL = 'llama3.2:3b-instruct-q4_0' - return query_model(MODEL, syspromts.TRANSLATOR_DE_ES + query)['choices'][0]['message']['content'] - -def translator_de_it(query): - MODEL = 'llama3.2:3b-instruct-q4_0' - return query_model(MODEL, syspromts.TRANSLATOR_DE_IT + query)['choices'][0]['message']['content'] - + response = ollama.chat( + model = model, + messages = [ + {'role': 'system', 'content': s}, + {'role': 'user', 'content': text} + ], + ) + log.info('Running the translator to %s took %s s', target_language, round(time.time() - start, 3)) + return response['message']['content'] if __name__ == "__main__": import time - TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.' - start=time.time() - response = translator_de_en(TESTSENTENCE_DE_BROKER) - print("First query took", start - time.time()) - print(json.dumps(response, indent=2)) + response = translate_de_to_x('Der Zug ist da.', target_language='en', model='llama3.2:1b-instruct-q4_0') + print("Query took", time.time() - start) print(response) start=time.time() - response = translator_de_fr(TESTSENTENCE_DE_BROKER) - print("Second query took", start - time.time()) + response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='en') + print("query took", time.time() - start) print(response) + start=time.time() + response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='fr') + print("query took", time.time() - start) + print(response) diff --git a/multilang_translator/translator/syspromts.py b/multilang_translator/translator/syspromts.py index 490d58f..562099d 100644 --- a/multilang_translator/translator/syspromts.py +++ b/multilang_translator/translator/syspromts.py @@ -1,4 +1,4 @@ -TRANSLATOR_DE_EN = 'You are a translator. Translate the following sentence from German to English. Only respond with the translated sentence:\n' -TRANSLATOR_DE_FR = 'Vous êtes un traducteur. Traduisez la phrase suivante de l\'allemand vers le français. Répondez uniquement par la traduction :\n' -TRANSLATOR_DE_ES = 'Estás un traductor. Traduce la siguiente frase del alemán al español. Responda solo con la traducción:\n' -TRANSLATOR_DE_IT = 'Siete un traduttore. Traducete la seguente frase dal tedesco all \'inglese. Rispondete solo con la traduzione della frase:\n' \ No newline at end of file +TRANSLATOR_DE_EN = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Englische. Antworte nur mit der übersetzten Satz.\n' +TRANSLATOR_DE_FR = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Französische. Antworte nur mit der übersetzten Satz.\n' +TRANSLATOR_DE_ES = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Spanische. Antworte nur mit der übersetzten Satz.\n' +TRANSLATOR_DE_IT = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Italienische. Antworte nur mit der übersetzten Satz.\n' diff --git a/multilang_translator/translator/test_content.py b/multilang_translator/translator/test_content.py index e204037..8eb9053 100644 --- a/multilang_translator/translator/test_content.py +++ b/multilang_translator/translator/test_content.py @@ -1,4 +1,9 @@ TESTSENTENCE_DE_HELLO = 'Hallo Welt.' TESTSENTENCE_DE_WAVE_PARTICLE = 'Der Wellen-Teilchen-Dualismus beschreibt die Eigenschaft von Teilchen, sowohl als Wellen auf der Mikroebene zu verhalten und gleichzeitig bestimmte Eigenschaften wie Impuls und Energietrang zu besitzen.' TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.' -TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird.' \ No newline at end of file +TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird.' +TESTSENTENCE_DE_GATE_OPENED = "Please be advised that Gate 23 has opened for boarding." +TESTSENTENCE_DE_TRAIN_DELAYS = "Please note that delays have been reported on the InterCity train route. We apologize for any inconvenience this may cause." +TESTSENTENCE_DE_LOST_LUGGAGE = "Attention passengers! Lost luggage has been reported at Track 4. If you have not yet received your bag, please report to our lost luggage desk for assistance." +TESTSENTENCE_DE_PLANE_TAKEOFF_DELAYED = "This departing flight's departure time has changed due to weather conditions. The new boarding time will be advised shortly." +TESTSENTENCE_DE_SECURITY_CHECKPOINT_OPENING = "Security Checkpoint 5 is now open. Please proceed through the checkpoint to minimize your wait time during security screening." diff --git a/multilang_translator/backend_controller/__init__.py b/multilang_translator/utils/__init__.py similarity index 100% rename from multilang_translator/backend_controller/__init__.py rename to multilang_translator/utils/__init__.py diff --git a/multilang_translator/utils/resample.py b/multilang_translator/utils/resample.py new file mode 100644 index 0000000..2d06f6b --- /dev/null +++ b/multilang_translator/utils/resample.py @@ -0,0 +1,27 @@ +# resample .wav source to target sampling rate +import logging as log +import time +import os +import librosa +import soundfile as sf + + +def resample_file(filename, out_filename, target_rate=int(24e3)): + start=time.time() + # Load the original audio file + audio, rate = librosa.load(filename) + + if rate == target_rate: # Nothing to do + sf.write(out_filename, audio, target_rate) + return + + # Convert the sample rate to 24 kHz + resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate) + + # Save the resampled audio as a new .wav file + sf.write(out_filename, resampled_audio, target_rate) + + log.info("Resampling of %s took %s s", os.path.basename(filename), round(time.time() - start, 3)) + +if __name__ == "__main__": + resample_file('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav') diff --git a/pyproject.toml b/pyproject.toml index 8cbc7f1..e5ac358 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,12 +1,27 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - [project] name = "multilang_translator" +requires-python = ">= 3.11" version = '0.1' +dependencies = [ + "bumble @git+https://git@gitea.pstruebi.xyz/auracaster/bumble_mirror.git@e027bcb57a0f29c82e3c02c8bb8691dcb91eac62", + #"auracast @git+https://git@gitea.pstruebi.xyz/auracaster/bumble-auracast", + "requests", + "ollama", + "aioconsole", + "piper-tts==1.2.0" +] + +[project.optional-dependencies] +test = [ + "pytest >= 8.2", +] + [tool.pytest.ini_options] addopts = [ "--import-mode=importlib","--count=1","-s","-v" -] \ No newline at end of file +] + +[build-system] +requires = ["setuptools>=61", "wheel", "setuptools_scm>=8"] +build-backend = "setuptools.build_meta" diff --git a/tests/test_translator.py b/tests/test_translator.py index 1d9fab0..77b2d5c 100644 --- a/tests/test_translator.py +++ b/tests/test_translator.py @@ -1,6 +1,6 @@ from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW -from multilang_translator.main import translate_from_german_and_encode +from multilang_translator.main import translate_from_german import time @@ -28,4 +28,4 @@ def test_translator(): def test_translate_from_german_and_encode(): - translate_from_german_and_encode(TESTSENTENCE_DE_RAINBOW) + translate_from_german(TESTSENTENCE_DE_RAINBOW)