preparations for using the bumble auracaster

This commit is contained in:
2025-02-24 15:10:57 +01:00
parent 56b942ce39
commit 18ecedfe45
12 changed files with 134 additions and 238 deletions

3
.gitignore vendored
View File

@@ -4,5 +4,6 @@
*.onnx
*.onnx.json
venv/
*.egg-info
venv*/
text_to_speech/models

View File

@@ -1,107 +0,0 @@
import time
import logging as log
import os
import serial
from ..config import SAMPLING_RATE_HZ, LANG_CONFIG
PRESET = f'{SAMPLING_RATE_HZ//1000}_2_1'
BROADCAST_CONFIG = {}
for i, d in enumerate(list(LANG_CONFIG.items())):
key, val = d
BROADCAST_CONFIG[i] = os.path.basename(val["file"])
def write_to_serial_read_respone(port, cmd, timeout = 2):
# Initialize serial connection
ser = serial.Serial(timeout = timeout)
ser.port = port
ser.baudrate = 115200
ser.bytesize = serial.EIGHTBITS
ser.parity = serial.PARITY_NONE
ser.stopbits = serial.STOPBITS_ONE
try:
# Try to open the serial connection
#if not ser.is_open:
ser.open()
# Send string to serial port and get response
command = f"{cmd.strip()}\r\n"
ser.write(command.encode())
time.sleep(1) # wait a bit for response
readlines = []
for _ in range(20):
line = ser.readline().decode('utf-8').strip()
if not line:
ser.close()
break
else:
readlines.append(line)
except serial.SerialException as e:
print(f"Error communicating with serial port: {e}")
finally:
# Close serial connection before returning
if ser.is_open:
ser.close()
return readlines
def gen_broadcast_config_cmd(preset, broadcast_config: dict):
"""
Writes broadcaster configuration to the given serial port.
Args:
serial_port (str): Device path of the serial port (e.g., '/dev/ttyACM0')
preset (str): Preset string used in nac preset line
broadcast_names (list): List of names for each broadcast group
"""
cmds = []
for ch, file_name in broadcast_config.items():
cmds.append(f"nac preset {preset} {ch}")
cmds.append(f"nac broadcast_name broadcast{ch} {ch}")
cmds.append(f"nac file select_play_once {file_name}.lc3 {ch} 0 0")
cmds.append(f"nac num_bises 1 {ch} 0")
return cmds
# TODO: Advertising interval wird ungelmäßig bei mehr als 3 broadcasts 10ms -> 1s< bei 24kHz sampling rate
def broadcaster_config():
import subprocess
PORT = "/dev/ttyACM0"
total_ret= ""
cmds = gen_broadcast_config_cmd(PRESET, BROADCAST_CONFIG)
subprocess.run(["nrfjprog", "--reset", "-s", "1050109484"], check=True)
time.sleep(2)
ret = write_to_serial_read_respone(PORT, f"nac en_usb_mass", timeout=0.1)
total_ret += "\n".join(ret)
log.info("\n".join(ret))
time.sleep(1)
for cmd in cmds:
ret = write_to_serial_read_respone(PORT, cmd, timeout=0.1)
log.info("\n".join(ret))
total_ret += "\n".join(ret)
time.sleep(1)
for i in BROADCAST_CONFIG.keys():
ret = write_to_serial_read_respone(PORT, f"nac start_idx {i}", timeout=0.1)
total_ret += "\n".join(ret)
log.info("\n".join(ret))
time.sleep(0.2)
return total_ret

View File

@@ -1,7 +0,0 @@
import shutil
import os
BROADCASTER_DEFAULT_DIR = '/media/pstruebi/2C93-FED6'
def copy_to_broadcaster(filepath, broadcaster_dir = BROADCASTER_DEFAULT_DIR):
filename = os.path.basename(filepath)
shutil.copy(filepath, f'{broadcaster_dir}/{filename}')

View File

@@ -1,28 +0,0 @@
#import broadcaster_config
from .broadcaster_config import write_to_serial_read_respone
import time
import logging as log
def broadcaster_play_file(broadcast_ch, file, wait_after_stop = 1):
serial_port = "/dev/ttyACM0"
ret_all_str = ""
for i in range(3):
ret = write_to_serial_read_respone(serial_port, f"nac file stream_close {broadcast_ch} 0 0", timeout=0.1)
if wait_after_stop is not None:
time.sleep(wait_after_stop)
ret += "\n"
ret += write_to_serial_read_respone(serial_port, f"nac file select_play_once {file} {broadcast_ch} 0 0", timeout=0.1)
ret = "\n".join(ret)
ret_all_str += ret
if (not "Failed" in ret) and (not "err" in ret):
log.info("Breaking after %s retries.", i)
log.info(ret_all_str)
return ret
log.error("Failed to play file after 3 retries.")
log.error(ret_all_str)
return ret_all_str

View File

@@ -1,9 +1,11 @@
import os
ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements')
VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv')
PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper'
SAMPLING_RATE_HZ = int(16e3)
FRAME_DUR_MS = 10
BITRATE_BPS = int(32e3) # TODO: test 16khz 16kbps
BITRATE_BPS = int(32e3)
LANG_CONFIG = {
"de": {
"file": f"{ANNOUNCEMENT_DIR}/announcement_{SAMPLING_RATE_HZ//1000}_{FRAME_DUR_MS}_{BITRATE_BPS//1000}_de",

View File

@@ -4,80 +4,93 @@ list prompt example
"""
from __future__ import print_function, unicode_literals
from pprint import pprint
from PyInquirer import prompt, Separator
from examples import custom_style_2
import os
import asyncio
from copy import copy
import time
import logging as log
from .translator import llm_translator
from .text_to_speech import text_to_speech, resample
from .backend_controller.broadcaster_config import broadcaster_config
from .backend_controller.broadcaster_play_once import broadcaster_play_file
from .backend_controller.broadcaster_copy_files import copy_to_broadcaster
from .encode import encode_lc3
from .config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS
from translator import llm_translator, test_content
from text_to_speech import text_to_speech
from encode import encode_lc3
from auracast import multicast_control
from auracast import auracast_config
from config import LANG_CONFIG, BITRATE_BPS, SAMPLING_RATE_HZ, FRAME_DUR_MS
def synthesize_resample_encode(text, tts_model, output_file):
def transcribe():
pass
def syntesize(text, tts_model, output_file):
audio_dur = text_to_speech.synthesize(text, tts_model, output_file)
resample.resample(output_file, output_file, target_rate=SAMPLING_RATE_HZ)
encode_lc3.encode_lc3(output_file, bps=BITRATE_BPS, frame_dur_ms=FRAME_DUR_MS)
#resample.resample(output_file, output_file, target_rate=SAMPLING_RATE_HZ)
#encode_lc3.encode_lc3(output_file, bps=BITRATE_BPS, frame_dur_ms=FRAME_DUR_MS)
return audio_dur
def translate_from_german_and_encode(text_de):
def translate_from_german(text_de):
config = copy(LANG_CONFIG)
base_lang = "de"
file = config[base_lang]["file"]
audio_dur_s = {}
audio_dur_s [base_lang] = synthesize_resample_encode(text_de, config['de']["tts"], f'{file}.wav')
audio_dur_s [base_lang] = syntesize(text_de, config['de']["tts"], f'{file}.wav')
del config[base_lang]
for key, val in config.items():
text = llm_translator.translate_de_to_x(key, text_de)
text = llm_translator.translate_de_to_x(text_de, key)
file = val['file']
audio_dur_s[key] = synthesize_resample_encode(text, val['tts'], f'{file}.wav')
audio_dur_s[key] = syntesize(text, val['tts'], f'{file}.wav')
return audio_dur_s
def announcement_from_german_text(text_de):
async def announcement_from_german_text(caster:multicast_control.Multicaster, text_de):
audio_durs = translate_from_german_and_encode(text_de)
translate_from_german(text_de)
# Transfer the files to broadcaster memory
start = time.time()
for val in LANG_CONFIG.values():
copy_to_broadcaster(f'{val["file"]}.lc3')
log.info("Transfering files to broadcaster took %s s", round(time.time() - start, 3))
await caster.init_audio()
await caster.start_streaming()
time.sleep(2)
#for val in LANG_CONFIG.values():
# copy_to_broadcaster(f'{val["file"]}.lc3')
#log.info("Transfering files to broadcaster took %s s", round(time.time() - start, 3))
#time.sleep(2)
# Instruct the broadcaster to stream the files
for i, d in enumerate(list(LANG_CONFIG.items())):
key, val = d
broadcaster_play_file(i, f'{os.path.basename(val["file"])}.lc3')
time.sleep(audio_durs[key])
# for i, d in enumerate(list(LANG_CONFIG.items())):
# key, val = d
# broadcaster_play_file(i, f'{os.path.basename(val["file"])}.lc3')
# time.sleep(audio_durs[key])
log.info("Starting all broadcasts %s s", round(time.time() - start, 3))
log.info("Starting all broadcasts took %s s", round(time.time() - start, 3))
# questions = [
# {
# 'type': 'list',
# 'name': 'theme',
# 'message': 'What type of annoucement would you like to make?',
# 'choices': [
# 'predefined',
# 'custom',
# 'audio'
# ]
# },
# ]
# answers = prompt(questions, style=custom_style_2)
# pprint(answers)
async def main():
global_conf = auracast_config.global_base_config
#global_conf.transport='serial:/dev/serial/by-id/usb-SEGGER_J-Link_001057705357-if02,1000000,rtscts' # transport for nrf54l15dk
global_conf.transport='serial:/dev/serial/by-id/usb-ZEPHYR_Zephyr_HCI_UART_sample_81BD14B8D71B5662-if00,115200,rtscts' #nrf52dongle hci_uart usb cdc
big_conf = [ # TODO: integrate this in the LANG_CONFIG dict
auracast_config.broadcast_de,
auracast_config.broadcast_en,
auracast_config.broadcast_fr,
#auracast_config.broadcast_es,
#auracast_config.broadcast_it,
]
files = [v['file'] for v in LANG_CONFIG.values()]
for i, conf in enumerate(big_conf):
conf.loop_wav = False
conf.audio_source = f'file:{files[i]}'
conf.input_format = 'int16le,48000,1' # TODO: Use actual samplint rate from piper
caster = multicast_control.Multicaster(global_conf, big_conf)
await caster.init_broadcast()
#await command_line_ui(caster)
await announcement_from_german_text(caster, test_content.TESTSENTENCE_DE_HELLO)
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -1,8 +1,10 @@
import os
import subprocess
import time
import json
import logging as log
import wave
from multilang_translator import config
TTS_DIR = os.path.join(os.path.dirname(__file__))
@@ -11,16 +13,25 @@ def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"):
pwd = os.getcwd()
os.chdir(TTS_DIR)
start = time.time()
ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True)
with wave.open(output_file, "rb") as wf:
frames = wf.getnframes()
rate = wf.getframerate()
length_in_seconds = round(frames / rate, 1)
log.info(f"Audio length: {length_in_seconds} s")
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent instance of the model
[config.PIPER_EXE_PATH, '--model', model, '--output_file', output_file],
input=text.encode('utf-8'),
capture_output=True
)
log.info('%s', ret.stdout)
log.info('%s', ret.stderr)
assert ret.returncode == 0, 'Piper returncode was not 0.'
os.chdir(pwd)
log.info("Running piper took %s s", round(time.time() - start, 3))
return length_in_seconds
with open (f'{model}.onnx.json') as f: # TODO: wrap everything into a class, store the json permanentl
model_json = json.load(f)
return model_json
if __name__ == '__main__':
synthesize('Hello World')

View File

@@ -2,9 +2,16 @@ import requests
import json
import logging as log
import time
import ollama
from . import credentials
from . import syspromts
from . import test_content
# ollama.create( # TODO: create models on startup
# model='example',
# from_='llama3.2', system="You are Mario from Super Mario Bros."
# )
def query_model(model, query):
url = f'{credentials.BASE_URL}/api/chat/completions'
@@ -21,42 +28,26 @@ def query_model(model, query):
return response.json()
def translate_de_to_x(target_language: str, text:str, model ='llama3.2:3b-instruct-q4_0'):
def translate_de_to_x(text:str, target_language: str, model='llama3.2:1b'):
s = getattr(syspromts, f"TRANSLATOR_DE_{target_language.upper()}")
return query_model(model, s + text)['choices'][0]['message']['content']
def translator_de_en(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
#MODEL = 'llama3.1:8b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_EN + query)['choices'][0]['message']['content']
def translator_de_fr(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_FR + query)['choices'][0]['message']['content']
def translator_de_es(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_ES + query)['choices'][0]['message']['content']
def translator_de_it(query):
MODEL = 'llama3.2:3b-instruct-q4_0'
return query_model(MODEL, syspromts.TRANSLATOR_DE_IT + query)['choices'][0]['message']['content']
response = ollama.chat(
model = model,
messages = [
{'role': 'system', 'content': s},
{'role': 'user', 'content': text}
],
)
return response['message']['content']
if __name__ == "__main__":
import time
TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.'
start=time.time()
response = translator_de_en(TESTSENTENCE_DE_BROKER)
print("First query took", start - time.time())
print(json.dumps(response, indent=2))
response = translate_de_to_x(test_content.TESTSENTENCE_DE_HELLO, target_language='en')
print("First query took", time.time() - start)
print(response)
start=time.time()
response = translator_de_fr(TESTSENTENCE_DE_BROKER)
print("Second query took", start - time.time())
response = translate_de_to_x(test_content.TESTSENTENCE_DE_HELLO, target_language='fr')
print("Second query took", time.time() - start)
print(response)

View File

@@ -1,4 +1,9 @@
TESTSENTENCE_DE_HELLO = 'Hallo Welt.'
TESTSENTENCE_DE_WAVE_PARTICLE = 'Der Wellen-Teilchen-Dualismus beschreibt die Eigenschaft von Teilchen, sowohl als Wellen auf der Mikroebene zu verhalten und gleichzeitig bestimmte Eigenschaften wie Impuls und Energietrang zu besitzen.'
TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.'
TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird.'
TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird.'
TESTSENTENCE_DE_GATE_OPENED = "Please be advised that Gate 23 has opened for boarding."
TESTSENTENCE_DE_TRAIN_DELAYS = "Please note that delays have been reported on the InterCity train route. We apologize for any inconvenience this may cause."
TESTSENTENCE_DE_LOST_LUGGAGE = "Attention passengers! Lost luggage has been reported at Track 4. If you have not yet received your bag, please report to our lost luggage desk for assistance."
TESTSENTENCE_DE_PLANE_TAKEOFF_DELAYED = "This departing flight's departure time has changed due to weather conditions. The new boarding time will be advised shortly."
TESTSENTENCE_DE_SECURITY_CHECKPOINT_OPENING = "Security Checkpoint 5 is now open. Please proceed through the checkpoint to minimize your wait time during security screening."

View File

@@ -1,12 +1,27 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "multilang_translator"
requires-python = ">= 3.11"
version = '0.1'
dependencies = [
"bumble @git+https://git@gitea.pstruebi.xyz/auracaster/bumble_mirror.git@e027bcb57a0f29c82e3c02c8bb8691dcb91eac62",
#"auracast @git+https://git@gitea.pstruebi.xyz/auracaster/bumble-auracast",
"requests",
"ollama",
"aioconsole",
"piper-tts==1.2.0"
]
[project.optional-dependencies]
test = [
"pytest >= 8.2",
]
[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib","--count=1","-s","-v"
]
]
[build-system]
requires = ["setuptools>=61", "wheel", "setuptools_scm>=8"]
build-backend = "setuptools.build_meta"

View File

@@ -1,6 +1,6 @@
from multilang_translator.translator.llm_translator import translator_de_en, translator_de_fr, translator_de_it
from multilang_translator.translator.test_content import TESTSENTENCE_DE_BROKER, TESTSENTENCE_DE_RAINBOW
from multilang_translator.main import translate_from_german_and_encode
from multilang_translator.main import translate_from_german
import time
@@ -28,4 +28,4 @@ def test_translator():
def test_translate_from_german_and_encode():
translate_from_german_and_encode(TESTSENTENCE_DE_RAINBOW)
translate_from_german(TESTSENTENCE_DE_RAINBOW)