use_bumble (#1)

Adapt the project to use the bumble auracaster

Reviewed-on: https://gitea.pstruebi.xyz/auracaster/multilang-translator-local/pulls/1
This commit was merged in pull request #1.
This commit is contained in:
2025-02-25 13:32:37 +01:00
parent 56b942ce39
commit a9acfd2d2c
15 changed files with 219 additions and 275 deletions

3
.gitignore vendored
View File

@@ -4,5 +4,6 @@
*.onnx
*.onnx.json
venv/
*.egg-info
venv*/
text_to_speech/models

View File

@@ -1,107 +0,0 @@
import time
import logging as log
import os
import serial
from ..config import SAMPLING_RATE_HZ, LANG_CONFIG
PRESET = f'{SAMPLING_RATE_HZ//1000}_2_1'
BROADCAST_CONFIG = {}
for i, d in enumerate(list(LANG_CONFIG.items())):
key, val = d
BROADCAST_CONFIG[i] = os.path.basename(val["file"])
def write_to_serial_read_respone(port, cmd, timeout = 2):
# Initialize serial connection
ser = serial.Serial(timeout = timeout)
ser.port = port
ser.baudrate = 115200
ser.bytesize = serial.EIGHTBITS
ser.parity = serial.PARITY_NONE
ser.stopbits = serial.STOPBITS_ONE
try:
# Try to open the serial connection
#if not ser.is_open:
ser.open()
# Send string to serial port and get response
command = f"{cmd.strip()}\r\n"
ser.write(command.encode())
time.sleep(1) # wait a bit for response
readlines = []
for _ in range(20):
line = ser.readline().decode('utf-8').strip()
if not line:
ser.close()
break
else:
readlines.append(line)
except serial.SerialException as e:
print(f"Error communicating with serial port: {e}")
finally:
# Close serial connection before returning
if ser.is_open:
ser.close()
return readlines
def gen_broadcast_config_cmd(preset, broadcast_config: dict):
"""
Writes broadcaster configuration to the given serial port.
Args:
serial_port (str): Device path of the serial port (e.g., '/dev/ttyACM0')
preset (str): Preset string used in nac preset line
broadcast_names (list): List of names for each broadcast group
"""
cmds = []
for ch, file_name in broadcast_config.items():
cmds.append(f"nac preset {preset} {ch}")
cmds.append(f"nac broadcast_name broadcast{ch} {ch}")
cmds.append(f"nac file select_play_once {file_name}.lc3 {ch} 0 0")
cmds.append(f"nac num_bises 1 {ch} 0")
return cmds
# TODO: Advertising interval wird ungelmäßig bei mehr als 3 broadcasts 10ms -> 1s< bei 24kHz sampling rate
def broadcaster_config():
import subprocess
PORT = "/dev/ttyACM0"
total_ret= ""
cmds = gen_broadcast_config_cmd(PRESET, BROADCAST_CONFIG)
subprocess.run(["nrfjprog", "--reset", "-s", "1050109484"], check=True)
time.sleep(2)
ret = write_to_serial_read_respone(PORT, f"nac en_usb_mass", timeout=0.1)
total_ret += "\n".join(ret)
log.info("\n".join(ret))
time.sleep(1)
for cmd in cmds:
ret = write_to_serial_read_respone(PORT, cmd, timeout=0.1)
log.info("\n".join(ret))
total_ret += "\n".join(ret)
time.sleep(1)
for i in BROADCAST_CONFIG.keys():
ret = write_to_serial_read_respone(PORT, f"nac start_idx {i}", timeout=0.1)
total_ret += "\n".join(ret)
log.info("\n".join(ret))
time.sleep(0.2)
return total_ret

View File

@@ -1,7 +0,0 @@
import shutil
import os
BROADCASTER_DEFAULT_DIR = '/media/pstruebi/2C93-FED6'
def copy_to_broadcaster(filepath, broadcaster_dir = BROADCASTER_DEFAULT_DIR):
filename = os.path.basename(filepath)
shutil.copy(filepath, f'{broadcaster_dir}/{filename}')

View File

@@ -1,28 +0,0 @@
#import broadcaster_config
from .broadcaster_config import write_to_serial_read_respone
import time
import logging as log
def broadcaster_play_file(broadcast_ch, file, wait_after_stop = 1):
serial_port = "/dev/ttyACM0"
ret_all_str = ""
for i in range(3):
ret = write_to_serial_read_respone(serial_port, f"nac file stream_close {broadcast_ch} 0 0", timeout=0.1)
if wait_after_stop is not None:
time.sleep(wait_after_stop)
ret += "\n"
ret += write_to_serial_read_respone(serial_port, f"nac file select_play_once {file} {broadcast_ch} 0 0", timeout=0.1)
ret = "\n".join(ret)
ret_all_str += ret
if (not "Failed" in ret) and (not "err" in ret):
log.info("Breaking after %s retries.", i)
log.info(ret_all_str)
return ret
log.error("Failed to play file after 3 retries.")
log.error(ret_all_str)
return ret_all_str

View File

@@ -1,28 +1,37 @@
import os
ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements')
SAMPLING_RATE_HZ = int(16e3)
VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv')
PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper'
FRAME_DUR_MS = 10
BITRATE_BPS = int(32e3) # TODO: test 16khz 16kbps
SAMPLING_RATE_HZ = int(16e3)
BITRATE_BPS = int(32e3)
def mk_filename_lc3(lang=''):
return f"{ANNOUNCEMENT_DIR}/announcement_{lang}_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}.lc3"
LANG_CONFIG = {
"de": {
"file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_de",
"filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_de.wav",
"filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_de_resamp.wav",
"tts": 'de_DE-kerstin-low',
},
"en": {
"file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_en",
"filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_en.wav",
"filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_en_resamp.wav",
"tts": 'en_US-lessac-medium'
},
"fr": {
"file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_fr",
"filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_fr.wav",
"filepath_wav_resamp": f"{ANNOUNCEMENT_DIR}/announcement_fr_resamp.wav",
"tts": 'fr_FR-siwis-medium'
},
# "es": {
# "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_es",
# "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_es.wav",
# "tts": 'es_ES-sharvard-medium'
# },
# "it": {
# "file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_it",
# "filepath_wav": f"{ANNOUNCEMENT_DIR}/announcement_it.wav",
# "tts": 'it_IT-paola-medium'
# }
}

View File

@@ -4,80 +4,112 @@ list prompt example
"""
from __future__ import print_function, unicode_literals
from pprint import pprint
from PyInquirer import prompt, Separator
from examples import custom_style_2
import os
import asyncio
from copy import copy
import time
import logging as log
from .translator import llm_translator
from .text_to_speech import text_to_speech, resample
from .backend_controller.broadcaster_config import broadcaster_config
from .backend_controller.broadcaster_play_once import broadcaster_play_file
from .backend_controller.broadcaster_copy_files import copy_to_broadcaster
from .encode import encode_lc3
import aioconsole
from .config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS
from utils import resample
from translator import llm_translator, test_content
from text_to_speech import text_to_speech
from encode import encode_lc3
from auracast import multicast_control
from auracast import auracast_config
from config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS
# TODO: look for a end to end translation solution
def transcribe():
pass # TODO: Implement transcribing input audio e.g. with whisper
def synthesize_resample_encode(text, tts_model, output_file):
audio_dur = text_to_speech.synthesize(text, tts_model, output_file)
resample.resample(output_file, output_file, target_rate=SAMPLING_RATE_HZ)
encode_lc3.encode_lc3(output_file, bps=BITRATE_BPS, frame_dur_ms=FRAME_DUR_MS)
def syntesize_resample(text, tts_model, file_wav, file_wav_resamp):
audio_dur = text_to_speech.synthesize(text, tts_model, file_wav)
resample.resample_file(file_wav, file_wav_resamp, target_rate=SAMPLING_RATE_HZ)
return audio_dur
def translate_from_german_and_encode(text_de):
def translate_from_german(text_de, model):
config = copy(LANG_CONFIG)
base_lang = "de"
file = config[base_lang]["file"]
audio_dur_s = {}
audio_dur_s [base_lang] = synthesize_resample_encode(text_de, config['de']["tts"], f'{file}.wav')
del config[base_lang]
file = config[base_lang]["filepath_wav"]
file_resamp = config[base_lang]['filepath_wav_resamp']
tts_json = {}
for key, val in config.items():
text = llm_translator.translate_de_to_x(key, text_de)
file = val['file']
audio_dur_s[key] = synthesize_resample_encode(text, val['tts'], f'{file}.wav')
return audio_dur_s
if key == base_lang:
text = text_de
else:
text = llm_translator.translate_de_to_x(text_de, key, model=model)
def announcement_from_german_text(text_de):
log.info('%s', text)
file = val['filepath_wav']
file_resamp = val['filepath_wav_resamp']
tts_json[key] = syntesize_resample(text, val['tts'], file, file_resamp)
return tts_json
async def announcement_from_german_text(caster:multicast_control.Multicaster, text_de):
tts_json = translate_from_german(text_de, model = 'llama3.2:3b-instruct-q4_0')
audio_durs = translate_from_german_and_encode(text_de)
# Transfer the files to broadcaster memory
start = time.time()
for val in LANG_CONFIG.values():
copy_to_broadcaster(f'{val["file"]}.lc3')
log.info("Transfering files to broadcaster took %s s", round(time.time() - start, 3))
await caster.init_audio()
caster.start_streaming()
time.sleep(2)
# Instruct the broadcaster to stream the files
for i, d in enumerate(list(LANG_CONFIG.items())):
key, val = d
broadcaster_play_file(i, f'{os.path.basename(val["file"])}.lc3')
time.sleep(audio_durs[key])
log.info("Starting all broadcasts %s s", round(time.time() - start, 3))
log.info("Starting all broadcasts took %s s", round(time.time() - start, 3))
# questions = [
# {
# 'type': 'list',
# 'name': 'theme',
# 'message': 'What type of annoucement would you like to make?',
# 'choices': [
# 'predefined',
# 'custom',
# 'audio'
# ]
# },
# ]
# answers = prompt(questions, style=custom_style_2)
# pprint(answers)
async def command_line_ui(caster: multicast_control.Multicaster):
while True:
command = await aioconsole.ainput("\nEnter your Announcement|quit] > ")
if command.strip().lower() == "quit":
print("👋 Exiting...")
if caster.device:
caster.stop_streaming()
await caster.shutdown()
break # Exit loop
# TODO: Implement predefined announcements
elif command.strip() == '':
print('Nothing to Announce')
else:
await announcement_from_german_text(caster, command)
async def main():
log.basicConfig(
level=log.INFO,
format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
)
global_conf = auracast_config.global_base_config
#global_conf.transport='serial:/dev/serial/by-id/usb-SEGGER_J-Link_001057705357-if02,1000000,rtscts' # transport for nrf54l15dk
global_conf.transport='serial:/dev/serial/by-id/usb-ZEPHYR_Zephyr_HCI_UART_sample_81BD14B8D71B5662-if00,115200,rtscts' #nrf52dongle hci_uart usb cdc
big_conf = [ # TODO: integrate this in the LANG_CONFIG dict, better: make a hirachry of dataclasses
auracast_config.broadcast_de,
auracast_config.broadcast_en,
auracast_config.broadcast_fr,
#auracast_config.broadcast_es,
#auracast_config.broadcast_it,
]
files = [v['filepath_wav_resamp'] for v in LANG_CONFIG.values()]
for i, conf in enumerate(big_conf):
conf.loop_wav = False
conf.audio_source = f'file:{files[i]}'
caster = multicast_control.Multicaster(global_conf, big_conf)
await caster.init_broadcast()
#await announcement_from_german_text(caster, test_content.TESTSENTENCE_DE_HELLO)
await command_line_ui(caster)
#await asyncio.wait([caster.streamer.task])
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -1,18 +0,0 @@
# resample .wave from 22.05 to 24kHz sampling rate
import librosa
import soundfile as sf
def resample(filename, out_filename, target_rate=int(24e3)):
# Load the original audio file
audio, rate = librosa.load(filename)
# Convert the sample rate to 24 kHz
resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate)
# Save the resampled audio as a new .wav file
sf.write(out_filename, resampled_audio, target_rate)
if __name__ == "__main__":
resample('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav')

View File

@@ -1,8 +1,9 @@
import os
import subprocess
import time
import json
import logging as log
import wave
from multilang_translator import config
TTS_DIR = os.path.join(os.path.dirname(__file__))
@@ -11,16 +12,31 @@ def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"):
pwd = os.getcwd()
os.chdir(TTS_DIR)
start = time.time()
ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True)
with wave.open(output_file, "rb") as wf:
frames = wf.getnframes()
rate = wf.getframerate()
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent instance of the model
[config.PIPER_EXE_PATH, '--model', model, '--output_file', output_file],
input=text.encode('utf-8'),
capture_output=True
)
log.info('%s', ret.stdout)
log.info('%s', ret.stderr)
length_in_seconds = round(frames / rate, 1)
log.info(f"Audio length: {length_in_seconds} s")
assert ret.returncode == 0, 'Piper returncode was not 0.'
log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
with open (f'{model}.onnx.json') as f: # TODO: wrap everything into a class, store the json permanentl
model_json = json.load(f)
os.chdir(pwd)
log.info("Running piper took %s s", round(time.time() - start, 3))
return model_json
return length_in_seconds
if __name__ == '__main__':
import logging
logging.basicConfig(
level=logging.INFO,
format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
)
synthesize('Hello World')

View File

@@ -1,10 +1,18 @@
import time
import requests
import json
import logging as log
import time
import ollama
from . import credentials
from . import syspromts
from multilang_translator.translator import credentials
from multilang_translator.translator import syspromts
from multilang_translator.translator import test_content
# ollama.create( # TODO: create models on startup
# model='example',
# from_='llama3.2', system="You are Mario from Super Mario Bros."
# )
def query_model(model, query):
url = f'{credentials.BASE_URL}/api/chat/completions'
@@ -21,42 +29,33 @@ def query_model(model, query):
return response.json()
def translate_de_to_x(target_language: str, text:str, model ='llama3.2:3b-instruct-q4_0'):
def translate_de_to_x(text:str, target_language: str, model='llama3.2:3b-instruct-q4_0'): # remember to use instruct models
start=time.time()
s = getattr(syspromts, f"TRANSLATOR_DE_{target_language.upper()}")
return query_model(model, s + text)['choices'][0]['message']['content']
def translator_de_en(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
#MODEL = 'llama3.1:8b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_EN + query)['choices'][0]['message']['content']
def translator_de_fr(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_FR + query)['choices'][0]['message']['content']
def translator_de_es(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_ES + query)['choices'][0]['message']['content']
def translator_de_it(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_IT + query)['choices'][0]['message']['content']
response = ollama.chat(
model = model,
messages = [
{'role': 'system', 'content': s},
{'role': 'user', 'content': text}
],
)
log.info('Running the translator to %s took %s s', target_language, round(time.time() - start, 3))
return response['message']['content']
if __name__ == "__main__":
import time
TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.'
start=time.time()
response = translator_de_en(TESTSENTENCE_DE_BROKER)
print("First query took", start - time.time())
print(json.dumps(response, indent=2))
response = translate_de_to_x('Der Zug ist da.', target_language='en', model='llama3.2:1b-instruct-q4_0')
print("Query took", time.time() - start)
print(response)
start=time.time()
response = translator_de_fr(TESTSENTENCE_DE_BROKER)
print("Second query took", start - time.time())
response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='en')
print("query took", time.time() - start)
print(response)
start=time.time()
response = translate_de_to_x(test_content.TESTSENTENCE_DE_RAINBOW, target_language='fr')
print("query took", time.time() - start)
print(response)

View File

@@ -1,4 +1,4 @@
TRANSLATOR_DE_EN = 'You are a translator. Translate the following sentence from German to English. Only respond with the translated sentence:\n'
TRANSLATOR_DE_FR = 'Vous êtes un traducteur. Traduisez la phrase suivante de l\'allemand vers le français. Répondez uniquement par la traduction :\n'
TRANSLATOR_DE_ES = 'Estás un traductor. Traduce la siguiente frase del alemán al español. Responda solo con la traducción:\n'
TRANSLATOR_DE_IT = 'Siete un traduttore. Traducete la seguente frase dal tedesco all \'inglese. Rispondete solo con la traduzione della frase:\n'
TRANSLATOR_DE_EN = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Englische. Antworte nur mit der übersetzten Satz.\n'
TRANSLATOR_DE_FR = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Französische. Antworte nur mit der übersetzten Satz.\n'
TRANSLATOR_DE_ES = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Spanische. Antworte nur mit der übersetzten Satz.\n'
TRANSLATOR_DE_IT = 'Du bist ein Übersetzer. Übersetze die folgende Satz aus dem Deutschen ins Italienische. Antworte nur mit der übersetzten Satz.\n'

View File

@@ -2,3 +2,8 @@ TESTSENTENCE_DE_HELLO = 'Hallo Welt.'
TESTSENTENCE_DE_WAVE_PARTICLE = 'Der Wellen-Teilchen-Dualismus beschreibt die Eigenschaft von Teilchen, sowohl als Wellen auf der Mikroebene zu verhalten und gleichzeitig bestimmte Eigenschaften wie Impuls und Energietrang zu besitzen.'
TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.'
TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird.'
TESTSENTENCE_DE_GATE_OPENED = "Please be advised that Gate 23 has opened for boarding."
TESTSENTENCE_DE_TRAIN_DELAYS = "Please note that delays have been reported on the InterCity train route. We apologize for any inconvenience this may cause."
TESTSENTENCE_DE_LOST_LUGGAGE = "Attention passengers! Lost luggage has been reported at Track 4. If you have not yet received your bag, please report to our lost luggage desk for assistance."
TESTSENTENCE_DE_PLANE_TAKEOFF_DELAYED = "This departing flight's departure time has changed due to weather conditions. The new boarding time will be advised shortly."
TESTSENTENCE_DE_SECURITY_CHECKPOINT_OPENING = "Security Checkpoint 5 is now open. Please proceed through the checkpoint to minimize your wait time during security screening."

View File

@@ -0,0 +1,27 @@
# resample .wav source to target sampling rate
import logging as log
import time
import os
import librosa
import soundfile as sf
def resample_file(filename, out_filename, target_rate=int(24e3)):
start=time.time()
# Load the original audio file
audio, rate = librosa.load(filename)
if rate == target_rate: # Nothing to do
sf.write(out_filename, audio, target_rate)
return
# Convert the sample rate to 24 kHz
resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate)
# Save the resampled audio as a new .wav file
sf.write(out_filename, resampled_audio, target_rate)
log.info("Resampling of %s took %s s", os.path.basename(filename), round(time.time() - start, 3))
if __name__ == "__main__":
resample_file('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav')

View File

@@ -1,12 +1,27 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "multilang_translator"
requires-python = ">= 3.11"
version = '0.1'
dependencies = [
"bumble @git+https://git@gitea.pstruebi.xyz/auracaster/bumble_mirror.git@e027bcb57a0f29c82e3c02c8bb8691dcb91eac62",
#"auracast @git+https://git@gitea.pstruebi.xyz/auracaster/bumble-auracast",
"requests",
"ollama",
"aioconsole",
"piper-tts==1.2.0"
]
[project.optional-dependencies]
test = [
"pytest >= 8.2",
]
[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib","--count=1","-s","-v"
]
[build-system]
requires = ["setuptools>=61", "wheel", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"

View File

@@ -1,6 +1,6 @@
from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it
from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW
from multilang_translator.main import translate_from_german_and_encode
from multilang_translator.main import translate_from_german
import time
@@ -28,4 +28,4 @@ def test_translator():
def test_translate_from_german_and_encode():
translate_from_german_and_encode(TESTSENTENCE_DE_RAINBOW)
translate_from_german(TESTSENTENCE_DE_RAINBOW)