8 Commits

Author SHA1 Message Date
pober 53fd074d22 STT experiments, implements both vosx and whisper; whisper is too slow (cpu heavy) vosx has low acc; bigger vosx needs too much storage. 2026-05-28 10:12:51 +02:00
pober d1471fae79 Textcast works. 2026-05-27 14:27:29 +02:00
pober 50761a4b37 bugfix/1025-local-link-lost-connection (#34)
Fixes the bug that local link loses connection after a few minutes.

Openproject:
#1025
#608

Reviewed-on: #34
2026-05-20 10:12:08 +00:00
pober 5bb31e3f6a bugfix/1087-UI-reset-refresh (#33)
Openproject:
#1087

Reviewed-on: #33
2026-05-20 10:01:13 +00:00
pober edd23fc115 feature/1040-dante-activation (#31)
Makes the activation of dante possible.
Fixes issues with local link for DANTE.
First implementation of audiopipeline in its own thread.
Prevents Frontend from interrupting the audio stream.

Openproject:
#1040
#1069
#652
#1041
#1063

Reviewed-on: #31
2026-05-20 09:54:35 +00:00
pober 19a01e404c Removes manufacturer data. (#30)
Reviewed-on: #30
2026-05-20 09:40:22 +00:00
pstruebi efb55050c0 feature/1khz_testtone (#27)
- 1kHz test tone added
- all audio file converted to lc3 to save space
- streaming loop for lc3 files fixed

@pober

Reviewed-on: #27
Co-authored-by: pstruebi <struebin.patrick@gmail.com>
Co-committed-by: pstruebi <struebin.patrick@gmail.com>
2026-05-19 13:21:48 +00:00
pstruebi c82a17016e implement changes for dynamic power setting (#28)
Implements power control for the radios, both radios independent.

Reviewed-on: #28
Co-authored-by: pstruebi <struebin.patrick@gmail.com>
Co-committed-by: pstruebi <struebin.patrick@gmail.com>
2026-05-19 12:33:50 +00:00
73 changed files with 12793 additions and 11338 deletions
+3 -1
View File
@@ -19,7 +19,9 @@ dependencies = [
"smbus2 (>=0.5.0,<0.6.0)",
"samplerate (>=0.2.2,<0.3.0)",
"rpi-gpio (>=0.7.1,<0.8.0)",
"pyalsaaudio @ git+ssh://git@gitea.summitwave.work:222/auracaster/sw_pyalsaaudio.git@b3d11582e03df6929b2e7acbaa1306afc7b8a6bc"
"pyalsaaudio @ git+ssh://git@gitea.summitwave.work:222/auracaster/sw_pyalsaaudio.git@b3d11582e03df6929b2e7acbaa1306afc7b8a6bc",
"vosk (>=0.3.45)",
"faster-whisper (>=1.0.0)"
]
[project.optional-dependencies]
+26 -9
View File
@@ -1,5 +1,11 @@
from typing import List
from pydantic import BaseModel
from pydantic import BaseModel, field_validator
# Discrete TX power levels (dBm) supported by the Nordic SoftDevice Controller
# for the nRF radio PA. The HCI controller will clamp requested values to the
# nearest supported step. The maximum is bounded by CONFIG_BT_CTLR_TX_PWR_*
# in the hci_uart firmware (currently +8 dBm).
TX_POWER_VALID = [8, 7, 6, 5, 4, 3, 2, 0, -4, -8, -12, -16, -20]
# Define some base to hold the relevant parameters
class AuracastQoSConfig(BaseModel):
@@ -28,13 +34,24 @@ class AuracastGlobalConfig(BaseModel):
octets_per_frame: int = 40 #48kbps@24kHz # bitrate = octets_per_frame * 8 / frame len
frame_duration_us: int = 10000
presentation_delay_us: int = 40000
# TODO:pydantic does not support bytes serialization - use .hex and np.fromhex()
manufacturer_data: tuple[int, bytes] | tuple[None, None] = (None, None)
# LE Audio: Broadcast Audio Immediate Rendering (metadata type 0x09)
# When true, include a zero-length LTV with type 0x09 in the subgroup metadata
# so receivers may render earlier than the presentation delay for lower latency.
immediate_rendering: bool = False
assisted_listening_stream: bool = False
# Bluetooth advertising TX power for this radio in dBm (per advertising set).
# Sent through HCI_LE_Set_Extended_Advertising_Parameters; the SDC clamps to
# nearest supported hardware step and propagates to primary/secondary adv,
# the periodic advertising train and the BIS ISO PDUs.
advertising_tx_power: int = 8
@field_validator('advertising_tx_power')
@classmethod
def _snap_tx_power(cls, v: int) -> int:
# Snap to the nearest supported discrete step in TX_POWER_VALID.
if v in TX_POWER_VALID:
return v
return min(TX_POWER_VALID, key=lambda s: abs(s - v))
# "Audio input. "
# "'device' -> use the host's default sound input device, "
@@ -62,7 +79,7 @@ class AuracastBigConfigDeu(AuracastBigConfig):
name: str = 'Hörsaal A'
language: str ='deu'
program_info: str = 'Vorlesung DE'
audio_source: str = 'file:./testdata/wave_particle_5min_de.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_de.lc3'
class AuracastBigConfigEng(AuracastBigConfig):
id: int = 123
@@ -70,7 +87,7 @@ class AuracastBigConfigEng(AuracastBigConfig):
name: str = 'Lecture Hall A'
language: str ='eng'
program_info: str = 'Lecture EN'
audio_source: str = 'file:./testdata/wave_particle_5min_en.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_en.lc3'
class AuracastBigConfigFra(AuracastBigConfig):
id: int = 1234
@@ -79,7 +96,7 @@ class AuracastBigConfigFra(AuracastBigConfig):
name: str = 'Auditoire A'
language: str ='fra'
program_info: str = 'Auditoire FR'
audio_source: str = 'file:./testdata/wave_particle_5min_fr.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_fr.lc3'
class AuracastBigConfigSpa(AuracastBigConfig):
id: int =12345
@@ -87,7 +104,7 @@ class AuracastBigConfigSpa(AuracastBigConfig):
name: str = 'Auditorio A'
language: str ='spa'
program_info: str = 'Auditorio ES'
audio_source: str = 'file:./testdata/wave_particle_5min_es.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_es.lc3'
class AuracastBigConfigIta(AuracastBigConfig):
id: int =1234567
@@ -95,7 +112,7 @@ class AuracastBigConfigIta(AuracastBigConfig):
name: str = 'Aula A'
language: str ='ita'
program_info: str = 'Aula IT'
audio_source: str = 'file:./testdata/wave_particle_5min_it.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_it.lc3'
class AuracastBigConfigPol(AuracastBigConfig):
@@ -104,7 +121,7 @@ class AuracastBigConfigPol(AuracastBigConfig):
name: str = 'Sala Wykładowa'
language: str ='pol'
program_info: str = 'Sala Wykładowa PL'
audio_source: str = 'file:./testdata/wave_particle_5min_pl.wav'
audio_source: str = 'file:./testdata/wave_particle_5min_pl.lc3'
class AuracastConfigGroup(AuracastGlobalConfig):
+72
View File
@@ -0,0 +1,72 @@
"""DCP XML subtitle file parser (Interop and SMPTE 428-7 formats).
Timecode format: HH:MM:SS:FF (frame-based, default 24 fps)
HH:MM:SS.mmm (millisecond decimal, also accepted)
"""
from __future__ import annotations
import re
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from typing import List
@dataclass
class Subtitle:
time_in: float # seconds (float)
time_out: float # seconds (float)
text: str
def _parse_timecode(tc: str, fps: int = 24) -> float:
"""Parse a DCP timecode string to float seconds."""
# HH:MM:SS:FF
m = re.match(r'^(\d+):(\d+):(\d+):(\d+)$', tc.strip())
if m:
h, mi, s, f = int(m.group(1)), int(m.group(2)), int(m.group(3)), int(m.group(4))
return h * 3600 + mi * 60 + s + f / fps
# HH:MM:SS.mmm
m = re.match(r'^(\d+):(\d+):(\d+)\.(\d+)$', tc.strip())
if m:
h, mi, s = int(m.group(1)), int(m.group(2)), int(m.group(3))
frac = float('0.' + m.group(4))
return h * 3600 + mi * 60 + s + frac
raise ValueError(f"Unrecognized DCP timecode: {tc!r}")
def parse_dcp_xml(path: str, fps: int = 24) -> List[Subtitle]:
"""Parse a DCP XML subtitle file and return a time-sorted list of Subtitles."""
tree = ET.parse(path)
root = tree.getroot()
# Strip namespace so element lookups work regardless of schema version
ns_match = re.match(r'\{(.+?)\}', root.tag)
ns = ns_match.group(0) if ns_match else ''
subtitles: List[Subtitle] = []
for subtitle_el in root.iter(f'{ns}Subtitle'):
time_in_str = subtitle_el.get('TimeIn', '')
time_out_str = subtitle_el.get('TimeOut', '')
if not time_in_str or not time_out_str:
continue
parts: List[str] = []
for text_el in subtitle_el.iter(f'{ns}Text'):
t = (text_el.text or '').strip()
if t:
parts.append(t)
text = ' '.join(parts)
if not text:
continue
subtitles.append(Subtitle(
time_in=_parse_timecode(time_in_str, fps),
time_out=_parse_timecode(time_out_str, fps),
text=text,
))
return sorted(subtitles, key=lambda s: s.time_in)
+259
View File
@@ -0,0 +1,259 @@
"""faster-whisper speech-to-text → TextCast streamer.
Captures mono audio from an analog ALSA/sounddevice input, runs
faster-whisper offline ASR in a background thread (chunked, every
CHUNK_S seconds), and broadcasts recognised text over the TextCast BLE
broadcast using the same SDU framing as text_multicast.py.
Usage (CLI):
poetry run python -m auracast.faster_whisper_textcast \\
--model tiny.en \\
--device ch1 \\
--transport serial:/dev/ttyAMA3,1000000,rtscts
"""
from __future__ import annotations
import asyncio
import logging
import os
import queue
import threading
import time
from typing import Optional
import numpy as np
import samplerate
import sounddevice as sd
from auracast import auracast_config, multicast
from auracast.text_multicast import (
SDU_SIZE,
SDU_INTERVAL_US,
_make_text_frame,
_make_idle_frame,
)
log = logging.getLogger('faster_whisper_textcast')
CAPTURE_SAMPLE_RATE = 48_000
WHISPER_SAMPLE_RATE = 16_000
BLOCK_FRAMES_48K = 4800 # 100 ms capture blocks
CHUNK_S = 3.0 # transcribe every N seconds of audio
CAPTION_HOLD_S = 4.0 # keep caption visible after last transcription
SILENCE_RMS = 0.003 # skip transcription if chunk is below this RMS
BROADCAST_NAME = 'LiveCaption'
VALID_MODELS = ['tiny.en', 'base.en', 'small.en', 'tiny', 'base', 'small']
def _tail_to_fit(text: str, max_bytes: int) -> str:
"""Return the tail of *text* that fits in *max_bytes* UTF-8 bytes."""
encoded = text.encode('utf-8')
if len(encoded) <= max_bytes:
return text
tail = encoded[-max_bytes:].decode('utf-8', errors='ignore')
sp = tail.find(' ')
return tail[sp + 1:] if sp != -1 else tail
def _resolve_device(device: str) -> Optional[int]:
"""Return sounddevice index for a name or numeric string, or None for default."""
if not device:
return None
if device.isdigit():
return int(device)
for i, d in enumerate(sd.query_devices()):
if d['name'] == device and d['max_input_channels'] > 0:
return i
log.warning("Device '%s' not found in sounddevice list using default input", device)
return None
async def _iso_write_loop(bigs: dict, shared: dict, lock: threading.Lock) -> None:
"""ISO SDU write loop runs at ~10 ms per iteration."""
iso_queue = bigs['big0']['iso_queue']
last_sent: str = ''
while True:
now = time.monotonic()
with lock:
text: str = shared.get('text', '')
expiry: float = shared.get('expiry', 0.0)
if text and now < expiry:
display_text = _tail_to_fit(text, SDU_SIZE - 2)
if display_text != last_sent:
log.info("Caption: %s", display_text)
last_sent = display_text
frame = _make_text_frame(display_text)
else:
if last_sent:
log.info("Caption cleared")
last_sent = ''
with lock:
shared['text'] = ''
frame = _make_idle_frame()
await iso_queue.write(frame)
def _whisper_thread(
model_size: str,
device: str,
shared: dict,
lock: threading.Lock,
stop_event: threading.Event,
) -> None:
"""Blocking audio capture + faster-whisper transcription loop."""
try:
from faster_whisper import WhisperModel # type: ignore
except ImportError:
log.error("faster-whisper is not installed. Run: poetry add faster-whisper")
return
log.info("Loading faster-whisper model '%s' (int8, CPU) …", model_size)
model = WhisperModel(model_size, device="cpu", compute_type="int8")
log.info("Model '%s' loaded.", model_size)
audio_q: queue.Queue = queue.Queue()
resampler = samplerate.Resampler('sinc_fastest', channels=1)
ratio = WHISPER_SAMPLE_RATE / CAPTURE_SAMPLE_RATE
chunk_frames = int(CHUNK_S * WHISPER_SAMPLE_RATE)
audio_buffer = np.zeros(0, dtype=np.float32)
dev_idx = _resolve_device(device)
def _cb(indata: np.ndarray, frames: int, time_info, status) -> None:
if status:
log.warning("Audio status: %s", status)
if stop_event.is_set():
raise sd.CallbackStop()
mono = indata[:, 0].astype(np.float32)
downsampled = resampler.process(mono, ratio, end_of_input=False)
audio_q.put(downsampled.copy())
try:
with sd.InputStream(
samplerate=CAPTURE_SAMPLE_RATE,
blocksize=BLOCK_FRAMES_48K,
device=dev_idx,
dtype='float32',
channels=1,
callback=_cb,
):
log.info("WhisperCast listening on device '%s' (idx=%s) …", device, dev_idx)
while not stop_event.is_set():
try:
chunk = audio_q.get(timeout=0.2)
audio_buffer = np.concatenate([audio_buffer, chunk])
except queue.Empty:
continue
if len(audio_buffer) < chunk_frames:
continue
pcm = audio_buffer[:chunk_frames].copy()
audio_buffer = audio_buffer[chunk_frames:]
rms = float(np.sqrt(np.mean(pcm ** 2)))
if rms < SILENCE_RMS:
continue
t0 = time.monotonic()
segments, _ = model.transcribe(
pcm,
beam_size=1,
language="en",
vad_filter=True,
vad_parameters={"min_silence_duration_ms": 300},
)
text = ' '.join(s.text.strip() for s in segments).strip()
elapsed = time.monotonic() - t0
if text:
log.info("Transcribed (%.2fs): %s", elapsed, text)
with lock:
shared['text'] = text
shared['expiry'] = time.monotonic() + CAPTION_HOLD_S
else:
log.debug("Silent chunk skipped (rms=%.4f, took=%.2fs)", rms, elapsed)
except Exception as exc:
log.error("WhisperCast thread error: %s", exc, exc_info=True)
async def broadcast_whisper(
transport: str,
model_size: str = 'tiny.en',
device: str = 'ch1',
) -> None:
"""Start a faster-whisper → TextCast broadcast. Runs until cancelled."""
if model_size not in VALID_MODELS:
raise ValueError(f"Unknown model '{model_size}'. Valid: {VALID_MODELS}")
config = auracast_config.AuracastConfigGroup(
bigs=[
auracast_config.AuracastBigConfig(
name=BROADCAST_NAME,
program_info='Live Captions',
language='eng',
audio_source='file:dummy',
iso_que_len=4,
),
],
auracast_sampling_rate_hz=16000,
octets_per_frame=SDU_SIZE,
frame_duration_us=SDU_INTERVAL_US,
presentation_delay_us=40_000,
qos_config=auracast_config.AuracastQosRobust(),
transport=transport,
)
shared: dict = {'text': '', 'expiry': 0.0}
lock = threading.Lock()
stop_event = threading.Event()
async with multicast.create_device(config) as ble_device:
bigs = await multicast.init_broadcast(ble_device, config, config.bigs)
t = threading.Thread(
target=_whisper_thread,
args=(model_size, device, shared, lock, stop_event),
daemon=True,
)
t.start()
log.info("WhisperCast started (device=%s, model=%s)", device, model_size)
try:
await _iso_write_loop(bigs, shared, lock)
except asyncio.CancelledError:
log.info("WhisperCast cancelled shutting down")
stop_event.set()
t.join(timeout=5.0)
raise
def main() -> None:
global CHUNK_S
import argparse
parser = argparse.ArgumentParser(description='faster-whisper → Auracast TextCast')
parser.add_argument(
'--model', default='tiny.en', choices=VALID_MODELS,
help='Whisper model size (default: tiny.en)',
)
parser.add_argument('--device', default='ch1',
help='sounddevice input name or index (default: ch1)')
parser.add_argument(
'--transport',
default=os.environ.get('AURACAST_TRANSPORT', 'serial:/dev/ttyAMA3,1000000,rtscts'),
help='Bumble HCI transport string',
)
parser.add_argument('--chunk', type=float, default=CHUNK_S,
help=f'Seconds per transcription chunk (default: {CHUNK_S})')
args = parser.parse_args()
CHUNK_S = args.chunk
multicast.run_async(broadcast_whisper(args.transport, args.model, args.device))
if __name__ == '__main__':
main()
+81 -24
View File
@@ -49,6 +49,7 @@ import bumble.transport
import bumble.utils
from bumble.device import Host, AdvertisingChannelMap
from bumble.audio import io as audio_io
from bumble.vendor.zephyr.hci import HCI_Write_Tx_Power_Level_Command
from auracast import auracast_config
from auracast.utils.read_lc3_file import read_lc3_file
@@ -462,21 +463,6 @@ async def init_broadcast(
],
)
logger.info('Setup Advertising')
advertising_manufacturer_data = (
b''
if global_config.manufacturer_data == (None, None)
else bytes(
core.AdvertisingData(
[
(
core.AdvertisingData.MANUFACTURER_SPECIFIC_DATA,
struct.pack('<H', global_config.manufacturer_data[0])
+ global_config.manufacturer_data[1],
)
]
)
)
)
bigs[f'big{i}']['broadcast_audio_announcement'] = bap.BroadcastAudioAnnouncement(conf.id)
# Build advertising data types list
@@ -519,13 +505,22 @@ async def init_broadcast(
advertising_sid=i,
primary_advertising_phy=hci.Phy.LE_1M, # 2m phy config throws error - because for primary advertising channels, 1mbit is only supported
secondary_advertising_phy=hci.Phy.LE_1M, # this is the secondary advertising beeing send on non advertising channels (extendend advertising)
#advertising_tx_power= # tx power in dbm (max 20)
# Pass NO_PREFERENCE (0x7F) here for two reasons:
# 1. The Nordic SoftDevice Controller ignores this field for
# advertising sets and always returns the compile-time
# CONFIG_BT_CTLR_TX_PWR_* value. The real TX power is
# applied via the Zephyr VS Write_Tx_Power_Level command
# issued right after create_advertising_set() returns.
# 2. Bumble's HCI metadata declares this field as 1-byte
# *unsigned* (a bumble bug — the BT spec defines it as
# signed int8), so negative values would raise
# "bytes must be in range(0, 256)" at serialization.
advertising_tx_power=hci.HCI_LE_Set_Extended_Advertising_Parameters_Command.TX_POWER_NO_PREFERENCE,
#secondary_advertising_max_skip=10,
),
advertising_data=(
bigs[f'big{i}']['broadcast_audio_announcement'].get_advertising_data()
+ bytes(core.AdvertisingData(advertising_data_types))
+ advertising_manufacturer_data
),
periodic_advertising_parameters=bumble.device.PeriodicAdvertisingParameters(
periodic_advertising_interval_min=80,
@@ -536,6 +531,48 @@ async def init_broadcast(
auto_start=True,
)
bigs[f'big{i}']['advertising_set'] = advertising_set
# NOTE: selected_tx_power below reflects the SDC's compile-time max
# (LE_Set_Ext_Adv_Params was sent with NO_PREFERENCE). The actual
# transmit power is set by the VS Write_Tx_Power_Level call below.
logging.debug(
'LE_Set_Ext_Adv_Params reports controller fallback TX power: %+d dBm (handle=%d)',
getattr(advertising_set, 'selected_tx_power', 0),
i,
)
# The Nordic SoftDevice Controller does not honor the per-set
# advertising_tx_power passed in HCI_LE_Set_Extended_Advertising_Parameters
# (it returns the compile-time CONFIG_BT_CTLR_TX_PWR_* value regardless).
# Apply the requested level via the Zephyr Vendor-Specific HCI command
# Write_Tx_Power_Level (opcode 0xFC0E), which the SDC honors per
# advertising handle. The SDC clamps the value to the nearest supported
# hardware step (max bounded by CONFIG_BT_CTLR_TX_PWR_PLUS_8).
try:
adv_handle = getattr(advertising_set, 'advertising_handle', i)
response = await device.send_command(
HCI_Write_Tx_Power_Level_Command(
handle_type=HCI_Write_Tx_Power_Level_Command.TX_POWER_HANDLE_TYPE_ADV,
connection_handle=adv_handle,
tx_power_level=global_config.advertising_tx_power,
)
)
rp = getattr(response, 'return_parameters', None)
status = getattr(rp, 'status', 0xFF) if rp is not None else 0xFF
selected = getattr(rp, 'selected_tx_power_level', None) if rp is not None else None
if status == 0 and selected is not None:
logging.info(
'Advertising TX power (VS Write_Tx_Power_Level): requested=%+d dBm, controller selected=%+d dBm (handle=%d)',
global_config.advertising_tx_power,
selected,
adv_handle,
)
else:
logging.warning(
'VS Write_Tx_Power_Level failed: status=0x%02X handle=%d requested=%+d dBm',
status, adv_handle, global_config.advertising_tx_power,
)
except Exception as e:
logging.warning('VS Write_Tx_Power_Level not supported by controller: %s', e)
logging.info('Start Periodic Advertising')
await advertising_set.start_periodic()
@@ -602,6 +639,29 @@ async def init_broadcast(
return bigs
def _lc3_file_byte_gen(filename: str, loop: bool = False):
"""Stream LC3 frames from disk as individual bytes, with optional looping.
Yields one byte (int) at a time so it is compatible with the existing
``bytes(itertools.islice(gen, bytes_per_frame))`` consumer without loading
the whole file into memory.
"""
while True:
with open(filename, 'rb') as f:
f.read(18) # skip 18-byte LC3 header
while True:
size_b = f.read(2)
if len(size_b) < 2:
break
frame_size = struct.unpack('=H', size_b)[0]
frame = f.read(frame_size)
if len(frame) < frame_size:
break
yield from frame
if not loop:
return
class Streamer():
"""
Streamer class that supports multiple input formats. See bumble for streaming from wav or device
@@ -757,13 +817,7 @@ class Streamer():
big['precoded'] = True
big['lc3_bytes_per_frame'] = global_config.octets_per_frame
filename = big_config[i].audio_source.replace('file:', '')
lc3_bytes = read_lc3_file(filename)
lc3_frames = iter(lc3_bytes)
if big_config[i].loop:
lc3_frames = itertools.cycle(lc3_frames)
big['lc3_frames'] = lc3_frames
big['lc3_frames'] = _lc3_file_byte_gen(filename, loop=big_config[i].loop)
# use wav files and code them entirely before streaming
elif big_config[i].precode_wav and big_config[i].audio_source.endswith('.wav'):
@@ -884,6 +938,9 @@ class Streamer():
if lc3_frame == b'': # Not all streams may stop at the same time
stream_finished[i] = True
continue
for q_idx in range(big.get('num_bis', 1)):
await big['iso_queues'][q_idx].write(lc3_frame)
else: # code lc3 on the fly with perf counters
# Ensure frames generator exists (so we can aclose() on stop)
frames_gen = big.get('frames_gen')
+272 -15
View File
@@ -100,6 +100,36 @@ QOS_PRESET_MAP = {
"Robust": auracast_config.AuracastQosRobust(),
}
# Discrete advertising TX power steps in dBm supported by the Nordic SDC radio
# PA. Sent through HCI_LE_Set_Extended_Advertising_Parameters; the controller
# clamps to the nearest hardware step.
TX_POWER_OPTIONS = [8, 7, 6, 5, 4, 3, 2, 0, -4, -8, -12, -16, -20]
TX_POWER_DEFAULT = 8
def _coerce_tx_power(value, default: int = TX_POWER_DEFAULT) -> int:
try:
v = int(value)
except (TypeError, ValueError):
return default
if v in TX_POWER_OPTIONS:
return v
return min(TX_POWER_OPTIONS, key=lambda s: abs(s - v))
def _tx_power_selectbox(label: str, key: str, default: int, disabled: bool, help_text: str | None = None) -> int:
snapped = _coerce_tx_power(default)
idx = TX_POWER_OPTIONS.index(snapped)
return st.selectbox(
label,
TX_POWER_OPTIONS,
index=idx,
key=key,
format_func=lambda v: f"{v:+d} dBm",
disabled=disabled,
help=help_text or "Bluetooth advertising TX power for this radio. Higher values increase range; lower values reduce interference and power draw.",
)
# Try loading persisted settings from backend
saved_settings = {}
try:
@@ -111,6 +141,9 @@ except Exception:
# Define is_streaming early from the fetched status for use throughout the UI
is_streaming = bool(saved_settings.get("is_streaming", False))
textcast_is_streaming = bool(saved_settings.get("textcast_is_streaming", False))
voskcast_is_streaming = bool(saved_settings.get("voskcast_is_streaming", False))
whispercast_is_streaming = bool(saved_settings.get("whispercast_is_streaming", False))
# Extract secondary status, if provided by the backend /status endpoint.
secondary_status = saved_settings.get("secondary") or {}
@@ -155,6 +188,9 @@ options = [
"Demo",
"Analog",
"Network - Dante",
"TextCast",
"VoskCast",
"WhisperCast",
]
saved_audio_mode = saved_settings.get("audio_mode", "Demo")
if saved_audio_mode not in options:
@@ -166,7 +202,7 @@ audio_mode = st.selectbox(
"Audio Mode",
options,
index=options.index(saved_audio_mode) if saved_audio_mode in options else options.index("Demo"),
disabled=is_streaming,
disabled=is_streaming or textcast_is_streaming or voskcast_is_streaming or whispercast_is_streaming,
help=(
"Select the audio input source. Choose 'USB' for a connected USB audio device (via PipeWire), "
"'Network' (AES67) for network RTP/AES67 sources, "
@@ -196,11 +232,94 @@ else:
running_mode = backend_mode_mapped if (is_streaming and backend_mode_mapped) else audio_mode
# Start/Stop buttons and status (moved to top)
if audio_mode == "Demo":
if audio_mode == "TextCast":
start_stream, stop_stream = render_stream_controls(textcast_is_streaming, "Start TextCast", "Stop TextCast", "TextCast", False)
elif audio_mode == "VoskCast":
start_stream, stop_stream = render_stream_controls(voskcast_is_streaming, "Start VoskCast", "Stop VoskCast", "VoskCast", False)
elif audio_mode == "Demo":
start_stream, stop_stream = render_stream_controls(is_streaming, "Start Demo", "Stop Demo", running_mode, secondary_is_streaming)
else:
start_stream, stop_stream = render_stream_controls(is_streaming, "Start Auracast", "Stop Auracast", running_mode, secondary_is_streaming)
# TextCast: DCP XML file uploader
if audio_mode == "TextCast":
st.markdown("#### DCP Subtitle File")
dcp_file = st.file_uploader(
"Upload DCP XML subtitle file (.xml)",
type=["xml"],
disabled=textcast_is_streaming,
help="Upload a DCP-compliant subtitle XML file. Subtitles will be broadcast over Auracast.",
)
if dcp_file is not None:
content = dcp_file.read().decode("utf-8", errors="replace")
st.session_state['_textcast_dcp_content'] = content
st.session_state['_textcast_dcp_name'] = dcp_file.name
st.success(f"Loaded: {dcp_file.name} ({len(content):,} bytes)")
elif st.session_state.get('_textcast_dcp_name'):
st.info(f"Using previously uploaded file: {st.session_state['_textcast_dcp_name']}")
else:
st.warning("No subtitle file loaded. Upload a DCP XML file or use the sample below.")
if st.button("Load sample subtitle file", disabled=textcast_is_streaming):
import os as _os
_sample = _os.path.abspath(_os.path.join(
_os.path.dirname(__file__), '..', 'testdata', 'sample_subtitles.xml'))
try:
with open(_sample, 'r', encoding='utf-8') as _f:
_content = _f.read()
st.session_state['_textcast_dcp_content'] = _content
st.session_state['_textcast_dcp_name'] = 'sample_subtitles.xml'
st.rerun()
except Exception as _e:
st.error(f"Could not load sample: {_e}")
# WhisperCast: model size + input device
if audio_mode == "WhisperCast":
st.markdown("#### Live Speech Recognition (faster-whisper)")
_whisper_default_model = saved_settings.get("whispercast_model", "tiny.en")
_whisper_default_device = saved_settings.get("whispercast_device", "ch1")
col_wm, col_wd = st.columns([2, 1])
with col_wm:
whisper_model_size = st.selectbox(
"Whisper Model",
["tiny.en", "base.en", "small.en"],
index=["tiny.en", "base.en", "small.en"].index(_whisper_default_model)
if _whisper_default_model in ["tiny.en", "base.en", "small.en"] else 0,
disabled=whispercast_is_streaming,
help="tiny.en (~39 MB, ~3-5s latency), base.en (~74 MB, ~5-8s latency)",
)
with col_wd:
whisper_device = st.selectbox(
"Input",
["ch1", "ch2"],
index=0 if _whisper_default_device == "ch1" else 1,
disabled=whispercast_is_streaming,
help="Analog input channel",
)
st.caption("Model downloads automatically on first use. Each sentence appears after ~3s of speech.")
# VoskCast: model path + input device
if audio_mode == "VoskCast":
st.markdown("#### Live Speech Recognition (Vosk)")
_vosk_default_model = saved_settings.get("voskcast_model") or os.environ.get("VOSK_MODEL_PATH", "~/vosk-model-en-us")
_vosk_default_device = saved_settings.get("voskcast_device", "ch1")
col_model, col_dev = st.columns([3, 1])
with col_model:
vosk_model_path = st.text_input(
"Vosk Model Path",
value=_vosk_default_model,
disabled=voskcast_is_streaming,
help="Local path to the Vosk model directory. Download from https://alphacephei.com/vosk/models",
)
with col_dev:
vosk_device = st.selectbox(
"Input",
["ch1", "ch2"],
index=0 if _vosk_default_device == "ch1" else 1,
disabled=voskcast_is_streaming,
help="Analog input channel (ch1 = left, ch2 = right)",
)
st.caption("Partial results appear immediately; final results are held for 4 s then cleared.")
# Analog gain control (only for Analog mode, placed below start button)
analog_gain_db_left = 0 # default (dB)
analog_gain_db_right = 0 # default (dB)
@@ -355,6 +474,17 @@ if audio_mode == "Demo":
disabled=is_streaming,
help="Select the demo stream configuration."
)
demo_content_options = ["Program material", "1 kHz test tone"]
saved_demo_content = saved_settings.get('demo_content', 'Program material')
if saved_demo_content not in demo_content_options:
saved_demo_content = 'Program material'
demo_content = st.selectbox(
"Demo Content",
demo_content_options,
index=demo_content_options.index(saved_demo_content),
disabled=is_streaming,
help="Select whether demo streams use program audio files or a continuous 1 kHz test tone."
)
# Stream password and flags (same as USB/AES67)
saved_pwd = saved_settings.get('stream_password', '') or ''
stream_passwort = st.text_input(
@@ -398,6 +528,22 @@ if audio_mode == "Demo":
disabled=is_streaming,
help="Fast: 2 retransmissions, lower latency. Robust: 4 retransmissions, better reliability."
)
# Per-radio TX power for Demo (independent for R1 and R2)
col_tx_r1, col_tx_r2 = st.columns(2, gap="small")
with col_tx_r1:
tx_power_r1 = _tx_power_selectbox(
"TX Power (R1)",
key="demo_tx_power_r1",
default=saved_settings.get('advertising_tx_power', TX_POWER_DEFAULT),
disabled=is_streaming,
)
with col_tx_r2:
tx_power_r2 = _tx_power_selectbox(
"TX Power (R2)",
key="demo_tx_power_r2",
default=saved_settings.get('secondary', {}).get('advertising_tx_power', TX_POWER_DEFAULT),
disabled=is_streaming,
)
#st.info(f"Demo mode selected: {demo_selected} (Streams: {demo_stream_map[demo_selected]['streams']}, Rate: {demo_stream_map[demo_selected]['rate']} Hz)")
quality = None # Not used in demo mode
else:
@@ -524,6 +670,13 @@ else:
help="Fast: 2 retransmissions, lower latency. Robust: 4 retransmissions, better reliability."
)
tx_power_r1 = _tx_power_selectbox(
"TX Power (R1)",
key="analog_tx_power_r1",
default=saved_settings.get('advertising_tx_power', TX_POWER_DEFAULT),
disabled=is_streaming,
)
col_r1_name, col_r1_lang = st.columns([2, 1])
with col_r1_name:
stream_name1 = st.text_input(
@@ -726,6 +879,13 @@ else:
help="Fast: 2 retransmissions, lower latency. Robust: 4 retransmissions, better reliability."
)
tx_power_r2 = _tx_power_selectbox(
"TX Power (R2)",
key="analog_tx_power_r2",
default=saved_settings.get('secondary', {}).get('advertising_tx_power', TX_POWER_DEFAULT),
disabled=is_streaming,
)
col_r2_name, col_r2_lang = st.columns([2, 1])
with col_r2_name:
stream_name2 = st.text_input(
@@ -785,6 +945,7 @@ else:
'immediate_rendering': immediate_rendering2,
'presentation_delay_ms': presentation_delay_ms2,
'qos_preset': qos_preset2,
'tx_power': tx_power_r2,
'analog_gain_db_left': analog_gain_db_left,
'analog_gain_db_right': analog_gain_db_right,
}
@@ -801,6 +962,7 @@ else:
'immediate_rendering': immediate_rendering1,
'presentation_delay_ms': presentation_delay_ms1,
'qos_preset': qos_preset1,
'tx_power': tx_power_r1,
'stereo_mode': stereo_enabled,
'analog_gain_db_left': analog_gain_db_left,
'analog_gain_db_right': analog_gain_db_right,
@@ -1018,7 +1180,14 @@ else:
disabled=is_streaming,
help="Quality of Service preset for Radio 1"
)
r1_tx_power = _tx_power_selectbox(
"TX Power (R1)",
key="dante_tx_power_r1",
default=saved_r1_config.get('advertising_tx_power', saved_settings.get('advertising_tx_power', TX_POWER_DEFAULT)),
disabled=is_streaming,
)
# Per-stream configuration for Radio 1
if dante_stereo_enabled:
st.write("**Stereo Stream Configuration (Radio 1)**")
@@ -1344,7 +1513,14 @@ else:
disabled=is_streaming,
help="Quality of Service preset for Radio 2"
)
r2_tx_power = _tx_power_selectbox(
"TX Power (R2)",
key="dante_tx_power_r2",
default=saved_r2_config.get('advertising_tx_power', saved_settings.get('secondary', {}).get('advertising_tx_power', TX_POWER_DEFAULT)),
disabled=is_streaming,
)
# Per-stream configuration for Radio 2
st.write("**Stream Configuration (Radio 2)**")
r2_streams = []
@@ -1471,6 +1647,7 @@ else:
r2_immediate_rendering = False
r2_presentation_delay_ms = 40
r2_qos_preset = 'Fast'
r2_tx_power = TX_POWER_DEFAULT
# Validate unique input devices for Network - Dante mode
if audio_mode == "Network - Dante":
@@ -1502,6 +1679,7 @@ else:
'immediate_rendering': r1_immediate_rendering,
'presentation_delay_ms': r1_presentation_delay_ms,
'qos_preset': r1_qos_preset,
'tx_power': r1_tx_power,
'dante_stereo_mode': dante_stereo_enabled,
'dante_stereo_left': dante_left_channel,
'dante_stereo_right': dante_right_channel,
@@ -1517,6 +1695,7 @@ else:
'immediate_rendering': r2_immediate_rendering if radio2_enabled else False,
'presentation_delay_ms': r2_presentation_delay_ms if radio2_enabled else 40000,
'qos_preset': r2_qos_preset if radio2_enabled else 'Fast',
'tx_power': r2_tx_power if radio2_enabled else TX_POWER_DEFAULT,
} if radio2_enabled else None
if audio_mode in ("USB", "Network"):
@@ -1595,6 +1774,13 @@ else:
help="Fast: 2 retransmissions, lower latency. Robust: 4 retransmissions, better reliability."
)
tx_power = _tx_power_selectbox(
"TX Power",
key="usb_tx_power",
default=saved_settings.get('advertising_tx_power', TX_POWER_DEFAULT),
disabled=is_streaming,
)
stream_name = st.text_input(
"Channel Name",
value=default_name,
@@ -1696,22 +1882,78 @@ else:
if stop_stream:
st.session_state['stream_started'] = False
try:
r = requests.post(f"{BACKEND_URL}/stop_audio").json()
if audio_mode == "Demo":
st.session_state['demo_stream_started'] = False
if r['was_running']:
if audio_mode == "TextCast":
r = requests.post(f"{BACKEND_URL}/stop_textcast").json()
elif audio_mode == "VoskCast":
r = requests.post(f"{BACKEND_URL}/stop_voskcast").json()
elif audio_mode == "WhisperCast":
r = requests.post(f"{BACKEND_URL}/stop_whispercast").json()
else:
r = requests.post(f"{BACKEND_URL}/stop_audio").json()
if audio_mode == "Demo":
st.session_state['demo_stream_started'] = False
if r.get('was_running'):
is_stopped = True
except Exception as e:
st.error(f"Error: {e}")
if start_stream:
# Always send stop to ensure backend is in a clean state, regardless of current status
r = requests.post(f"{BACKEND_URL}/stop_audio").json()
# Small pause lets backend fully release audio devices before re-init
time.sleep(1)
if audio_mode == "TextCast":
uploaded = st.session_state.get('_textcast_dcp_content')
if not uploaded:
st.error("Upload a DCP XML file first.")
else:
try:
ru = requests.post(f"{BACKEND_URL}/upload_dcp", json={"xml": uploaded})
if not ru.ok:
st.error(f"Upload failed: {ru.text}")
else:
rs = requests.post(f"{BACKEND_URL}/start_textcast")
if rs.ok:
st.success("TextCast started.")
st.rerun()
else:
st.error(f"Start failed: {rs.text}")
except Exception as e:
st.error(f"Error: {e}")
if audio_mode == "Demo":
elif audio_mode == "VoskCast":
try:
rs = requests.post(
f"{BACKEND_URL}/start_voskcast",
json={"model": vosk_model_path, "device": vosk_device},
)
if rs.ok:
st.success("VoskCast started.")
st.rerun()
else:
st.error(f"Start failed: {rs.text}")
except Exception as e:
st.error(f"Error: {e}")
elif audio_mode == "WhisperCast":
try:
rs = requests.post(
f"{BACKEND_URL}/start_whispercast",
json={"model": whisper_model_size, "device": whisper_device},
)
if rs.ok:
st.success("WhisperCast started.")
st.rerun()
else:
st.error(f"Start failed: {rs.text}")
except Exception as e:
st.error(f"Error: {e}")
else:
# Always send stop to ensure backend is in a clean state, regardless of current status
r = requests.post(f"{BACKEND_URL}/stop_audio").json()
# Small pause lets backend fully release audio devices before re-init
time.sleep(1)
if audio_mode == "Demo":
demo_cfg = demo_stream_map[demo_selected]
q = QUALITY_MAP[demo_cfg['quality']]
@@ -1726,12 +1968,22 @@ if start_stream:
bigs1 = []
for i in range(demo_cfg['streams']):
cfg_cls, lang = lang_cfgs[i % len(lang_cfgs)]
if demo_content == "1 kHz test tone":
source_file = f'../testdata/test_tone_1k_{int(q["rate"]/1000)}kHz_mono.lc3'
big_kwargs = {
'name': 'test tone',
'program_info': '1khz',
}
else:
source_file = f'../testdata/wave_particle_5min_{lang}_{int(q["rate"]/1000)}kHz_mono.lc3'
big_kwargs = {}
bigs1.append(cfg_cls(
code=(stream_passwort.strip() or None),
audio_source=f'file:../testdata/wave_particle_5min_{lang}_{int(q["rate"]/1000)}kHz_mono.wav',
audio_source=f'file:{source_file}',
iso_que_len=32,
sampling_frequency=q['rate'],
octets_per_frame=q['octets'],
**big_kwargs,
))
max_per_mc = {48000: 1, 24000: 2, 16000: 3}
@@ -1748,6 +2000,7 @@ if start_stream:
immediate_rendering=immediate_rendering,
presentation_delay_us=int(presentation_delay_ms * 1000),
qos_config=QOS_PRESET_MAP[qos_preset],
advertising_tx_power=tx_power_r1,
bigs=bigs1
)
config2 = None
@@ -1760,6 +2013,7 @@ if start_stream:
immediate_rendering=immediate_rendering,
presentation_delay_us=int(presentation_delay_ms * 1000),
qos_config=QOS_PRESET_MAP[qos_preset],
advertising_tx_power=tx_power_r2,
bigs=bigs2
)
@@ -1803,6 +2057,7 @@ if start_stream:
immediate_rendering=bool(cfg['immediate_rendering']),
presentation_delay_us=int(cfg['presentation_delay_ms'] * 1000),
qos_config=QOS_PRESET_MAP[cfg['qos_preset']],
advertising_tx_power=int(cfg.get('tx_power', TX_POWER_DEFAULT)),
analog_gain_db_left=cfg.get('analog_gain_db_left', 0.0),
analog_gain_db_right=cfg.get('analog_gain_db_right', 0.0),
bigs=[
@@ -1890,6 +2145,7 @@ if start_stream:
immediate_rendering=bool(radio_cfg['immediate_rendering']),
presentation_delay_us=int(radio_cfg['presentation_delay_ms'] * 1000),
qos_config=QOS_PRESET_MAP[radio_cfg['qos_preset']],
advertising_tx_power=int(radio_cfg.get('tx_power', TX_POWER_DEFAULT)),
bigs=bigs
)
@@ -1914,7 +2170,7 @@ if start_stream:
st.error(f"Failed to initialize Dante Radio 2: {r2.text}")
except Exception as e:
st.error(f"Error while starting Dante radios: {e}")
if audio_mode not in ("Demo", "Analog", "Network - Dante"):
if audio_mode not in ("Demo", "Analog", "Network - Dante", "VoskCast", "WhisperCast", "TextCast"):
# USB/Network: single config as before, using shared controls
q = QUALITY_MAP[quality]
config = auracast_config.AuracastConfigGroup(
@@ -1925,6 +2181,7 @@ if start_stream:
immediate_rendering=immediate_rendering,
presentation_delay_us=int(presentation_delay_ms * 1000),
qos_config=QOS_PRESET_MAP[qos_preset],
advertising_tx_power=tx_power,
bigs=[
auracast_config.AuracastBigConfig(
code=(stream_passwort.strip() or None),
+241 -3
View File
@@ -209,6 +209,16 @@ multicaster1: multicast_control.Multicaster | None = None
multicaster2: multicast_control.Multicaster | None = None
_stream_lock = asyncio.Lock() # serialize initialize/stop_audio on API side
# TextCast state
_textcast_task: asyncio.Task | None = None
DCP_UPLOAD_PATH = os.path.join(os.path.dirname(__file__), 'uploaded_subtitles.xml')
# VoskCast state
_voskcast_task: asyncio.Task | None = None
# WhisperCast state
_whispercast_task: asyncio.Task | None = None
# BLE / audio event loop set in __main__ before uvicorn starts.
# All coroutines that touch Bumble objects or the audio pipeline MUST run
# on this loop. HTTP handlers call _on_ble_loop() to cross into it.
@@ -592,6 +602,13 @@ async def init_radio(transport: str, conf: auracast_config.AuracastConfigGroup,
demo_count = sum(1 for big in conf.bigs if isinstance(big.audio_source, str) and big.audio_source.startswith('file:'))
demo_rate = int(conf.auracast_sampling_rate_hz or 0)
demo_type = None
demo_sources = [
str(b.audio_source)
for b in conf.bigs
if isinstance(b.audio_source, str) and b.audio_source.startswith('file:')
]
is_demo_tone = bool(demo_sources) and all('test_tone_1k_' in src for src in demo_sources)
demo_content = '1 kHz test tone' if is_demo_tone else 'Program material'
if demo_count > 0 and demo_rate > 0:
if demo_rate in (48000, 24000, 16000):
demo_type = f"{demo_count} × {demo_rate//1000}kHz"
@@ -614,13 +631,15 @@ async def init_radio(transport: str, conf: auracast_config.AuracastConfigGroup,
'analog_stereo_mode': getattr(conf.bigs[0], 'analog_stereo_mode', False) if conf.bigs else False,
'analog_gain_db_left': getattr(conf, 'analog_gain_db_left', 0.0),
'analog_gain_db_right': getattr(conf, 'analog_gain_db_right', 0.0),
'advertising_tx_power': getattr(conf, 'advertising_tx_power', 8),
'stream_password': (conf.bigs[0].code if conf.bigs and getattr(conf.bigs[0], 'code', None) else None),
'big_ids': [getattr(big, 'id', DEFAULT_BIG_ID) for big in conf.bigs],
'big_random_addresses': [getattr(big, 'random_address', DEFAULT_RANDOM_ADDRESS) for big in conf.bigs],
'demo_total_streams': demo_count,
'demo_stream_type': demo_type,
'demo_content': demo_content,
'is_streaming': auto_started,
'demo_sources': [str(b.audio_source) for b in conf.bigs if isinstance(b.audio_source, str) and b.audio_source.startswith('file:')],
'demo_sources': demo_sources,
}
return mc, persisted
except HTTPException:
@@ -696,6 +715,208 @@ async def _stop_audio_impl():
log.error("Exception in /stop_audio: %s", traceback.format_exc())
raise HTTPException(status_code=500, detail=str(e))
@app.post("/upload_dcp")
async def upload_dcp(payload: dict):
"""Save DCP XML content for TextCast. Body: {"xml": "<DCSubtitle>..."}"""
xml_content = payload.get("xml", "")
if not xml_content.strip():
raise HTTPException(status_code=400, detail="Empty XML content")
try:
with open(DCP_UPLOAD_PATH, 'w', encoding='utf-8') as f:
f.write(xml_content)
log.info("DCP XML saved to %s (%d bytes)", DCP_UPLOAD_PATH, len(xml_content))
return {"status": "ok", "path": DCP_UPLOAD_PATH}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post("/start_textcast")
async def start_textcast():
"""Start text-over-Auracast broadcast using the uploaded DCP XML file."""
return await _on_ble_loop(_start_textcast_impl())
async def _start_textcast_impl():
global _textcast_task
if not os.path.exists(DCP_UPLOAD_PATH):
raise HTTPException(status_code=400, detail="No DCP file uploaded. Use /upload_dcp first.")
# Stop any running audio/textcast first
await _stop_all()
await _stop_textcast_impl()
from auracast.text_multicast import broadcast_text
_textcast_task = asyncio.get_event_loop().create_task(
broadcast_text(DCP_UPLOAD_PATH, TRANSPORT1)
)
settings = {
'is_streaming': True,
'audio_mode': 'TextCast',
'textcast_is_streaming': True,
'timestamp': datetime.utcnow().isoformat(),
}
save_stream_settings(settings)
_led_on()
log.info("TextCast started (DCP: %s)", DCP_UPLOAD_PATH)
return {"status": "started"}
@app.post("/stop_textcast")
async def stop_textcast():
"""Stop an active TextCast broadcast."""
return await _on_ble_loop(_stop_textcast_impl())
async def _stop_textcast_impl():
global _textcast_task
was_running = False
if _textcast_task is not None and not _textcast_task.done():
was_running = True
_textcast_task.cancel()
try:
await asyncio.wait_for(asyncio.shield(_textcast_task), timeout=3.0)
except (asyncio.CancelledError, asyncio.TimeoutError, Exception):
pass
_textcast_task = None
_led_off()
settings = load_stream_settings() or {}
if settings.get('audio_mode') == 'TextCast':
settings['is_streaming'] = False
settings['textcast_is_streaming'] = False
settings['timestamp'] = datetime.utcnow().isoformat()
save_stream_settings(settings)
log.info("TextCast stopped")
return {"status": "stopped", "was_running": was_running}
@app.post("/start_voskcast")
async def start_voskcast(body: dict = {}):
"""Start Vosk STT → TextCast. Body (optional): {"model": "...", "device": "ch1"}"""
return await _on_ble_loop(_start_voskcast_impl(body))
async def _start_voskcast_impl(body: dict) -> dict:
global _voskcast_task
from auracast.vosk_textcast import broadcast_vosk, DEFAULT_MODEL_PATH
model = body.get('model') or DEFAULT_MODEL_PATH
device = body.get('device', 'ch1')
await _stop_all()
await _stop_textcast_impl()
await _stop_voskcast_impl()
_voskcast_task = asyncio.get_event_loop().create_task(
broadcast_vosk(TRANSPORT1, model, device)
)
settings = {
'is_streaming': True,
'audio_mode': 'VoskCast',
'voskcast_is_streaming': True,
'voskcast_device': device,
'voskcast_model': model,
'timestamp': datetime.utcnow().isoformat(),
}
save_stream_settings(settings)
_led_on()
log.info("VoskCast started (device=%s, model=%s)", device, model)
return {"status": "started"}
@app.post("/stop_voskcast")
async def stop_voskcast():
"""Stop an active VoskCast broadcast."""
return await _on_ble_loop(_stop_voskcast_impl())
async def _stop_voskcast_impl() -> dict:
global _voskcast_task
was_running = False
if _voskcast_task is not None and not _voskcast_task.done():
was_running = True
_voskcast_task.cancel()
try:
await asyncio.wait_for(asyncio.shield(_voskcast_task), timeout=4.0)
except (asyncio.CancelledError, asyncio.TimeoutError, Exception):
pass
_voskcast_task = None
_led_off()
settings = load_stream_settings() or {}
if settings.get('audio_mode') == 'VoskCast':
settings['is_streaming'] = False
settings['voskcast_is_streaming'] = False
settings['timestamp'] = datetime.utcnow().isoformat()
save_stream_settings(settings)
log.info("VoskCast stopped")
return {"status": "stopped", "was_running": was_running}
@app.post("/start_whispercast")
async def start_whispercast(body: dict = {}):
"""Start faster-whisper → TextCast. Body (optional): {"model": "tiny.en", "device": "ch1"}"""
return await _on_ble_loop(_start_whispercast_impl(body))
async def _start_whispercast_impl(body: dict) -> dict:
global _whispercast_task
from auracast.faster_whisper_textcast import broadcast_whisper
model = body.get('model', 'tiny.en')
device = body.get('device', 'ch1')
await _stop_all()
await _stop_textcast_impl()
await _stop_voskcast_impl()
await _stop_whispercast_impl()
_whispercast_task = asyncio.get_event_loop().create_task(
broadcast_whisper(TRANSPORT1, model, device)
)
settings = {
'is_streaming': True,
'audio_mode': 'WhisperCast',
'whispercast_is_streaming': True,
'whispercast_device': device,
'whispercast_model': model,
'timestamp': datetime.utcnow().isoformat(),
}
save_stream_settings(settings)
_led_on()
log.info("WhisperCast started (device=%s, model=%s)", device, model)
return {"status": "started"}
@app.post("/stop_whispercast")
async def stop_whispercast():
"""Stop an active WhisperCast broadcast."""
return await _on_ble_loop(_stop_whispercast_impl())
async def _stop_whispercast_impl() -> dict:
global _whispercast_task
was_running = False
if _whispercast_task is not None and not _whispercast_task.done():
was_running = True
_whispercast_task.cancel()
try:
await asyncio.wait_for(asyncio.shield(_whispercast_task), timeout=5.0)
except (asyncio.CancelledError, asyncio.TimeoutError, Exception):
pass
_whispercast_task = None
_led_off()
settings = load_stream_settings() or {}
if settings.get('audio_mode') == 'WhisperCast':
settings['is_streaming'] = False
settings['whispercast_is_streaming'] = False
settings['timestamp'] = datetime.utcnow().isoformat()
save_stream_settings(settings)
log.info("WhisperCast stopped")
return {"status": "stopped", "was_running": was_running}
@app.post("/adc_gain")
async def set_adc_gain(payload: dict):
"""Set ADC gain in dB for left and right channels without restarting the stream.
@@ -754,6 +975,15 @@ async def get_status():
status["secondary"] = secondary
status["secondary_is_streaming"] = bool(secondary.get("is_streaming", False))
status["led_enabled"] = _LED_ENABLED
status["textcast_is_streaming"] = (
_textcast_task is not None and not _textcast_task.done()
)
status["voskcast_is_streaming"] = (
_voskcast_task is not None and not _voskcast_task.done()
)
status["whispercast_is_streaming"] = (
_whispercast_task is not None and not _whispercast_task.done()
)
return status
@@ -794,11 +1024,12 @@ async def _autostart_from_settings():
big_ids = settings.get('big_ids') or []
big_addrs = settings.get('big_random_addresses') or []
stream_password = settings.get('stream_password')
tx_power = int(settings.get('advertising_tx_power', 8))
original_ts = settings.get('timestamp')
previously_streaming = bool(settings.get('is_streaming'))
log.info(
"[AUTOSTART][PRIMARY] loaded settings: previously_streaming=%s audio_mode=%s rate=%s octets=%s pres_delay=%s qos_preset=%s immediate_rendering=%s assisted_listening_stream=%s demo_sources=%s",
"[AUTOSTART][PRIMARY] loaded settings: previously_streaming=%s audio_mode=%s rate=%s octets=%s pres_delay=%s qos_preset=%s immediate_rendering=%s assisted_listening_stream=%s tx_power=%+d dBm demo_sources=%s",
previously_streaming,
audio_mode,
rate,
@@ -807,6 +1038,7 @@ async def _autostart_from_settings():
saved_qos_preset,
immediate_rendering,
assisted_listening_stream,
tx_power,
(settings.get('demo_sources') or []),
)
@@ -856,6 +1088,7 @@ async def _autostart_from_settings():
immediate_rendering=immediate_rendering,
assisted_listening_stream=assisted_listening_stream,
presentation_delay_us=pres_delay if pres_delay is not None else 40000,
advertising_tx_power=tx_power,
bigs=bigs,
)
# Set num_bis for stereo mode if needed
@@ -925,6 +1158,7 @@ async def _autostart_from_settings():
presentation_delay_us=pres_delay if pres_delay is not None else 40000,
analog_gain_db_left=settings.get('analog_gain_db_left', 0.0),
analog_gain_db_right=settings.get('analog_gain_db_right', 0.0),
advertising_tx_power=tx_power,
bigs=bigs,
)
# Set num_bis for stereo mode if needed
@@ -960,10 +1194,11 @@ async def _autostart_from_settings():
big_ids = settings.get('big_ids') or []
big_addrs = settings.get('big_random_addresses') or []
stream_password = settings.get('stream_password')
tx_power = int(settings.get('advertising_tx_power', 8))
original_ts = settings.get('timestamp')
previously_streaming = bool(settings.get('is_streaming'))
log.info(
"[AUTOSTART][SECONDARY] loaded settings: previously_streaming=%s audio_mode=%s rate=%s octets=%s pres_delay=%s qos_preset=%s immediate_rendering=%s assisted_listening_stream=%s demo_sources=%s",
"[AUTOSTART][SECONDARY] loaded settings: previously_streaming=%s audio_mode=%s rate=%s octets=%s pres_delay=%s qos_preset=%s immediate_rendering=%s assisted_listening_stream=%s tx_power=%+d dBm demo_sources=%s",
previously_streaming,
audio_mode,
rate,
@@ -972,6 +1207,7 @@ async def _autostart_from_settings():
saved_qos_preset,
immediate_rendering,
assisted_listening_stream,
tx_power,
(settings.get('demo_sources') or []),
)
if not previously_streaming:
@@ -1011,6 +1247,7 @@ async def _autostart_from_settings():
immediate_rendering=immediate_rendering,
assisted_listening_stream=assisted_listening_stream,
presentation_delay_us=pres_delay if pres_delay is not None else 40000,
advertising_tx_power=tx_power,
bigs=bigs,
)
conf.qos_config = QOS_PRESET_MAP.get(saved_qos_preset, QOS_PRESET_MAP["Fast"])
@@ -1080,6 +1317,7 @@ async def _autostart_from_settings():
presentation_delay_us=pres_delay if pres_delay is not None else 40000,
analog_gain_db_left=settings.get('analog_gain_db_left', 0.0),
analog_gain_db_right=settings.get('analog_gain_db_right', 0.0),
advertising_tx_power=tx_power,
bigs=bigs,
)
conf.qos_config = QOS_PRESET_MAP.get(saved_qos_preset, QOS_PRESET_MAP["Fast"])
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+71
View File
@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<DCSubtitle Version="1.0">
<SubtitleID>a1b2c3d4-e5f6-7890-abcd-ef1234567890</SubtitleID>
<MovieTitle>Sample TextCast Subtitles</MovieTitle>
<ReelNumber>1</ReelNumber>
<Language>en</Language>
<LoadFont Id="Font1" URI="Arial.ttf"/>
<Font Id="Font1" Color="FFFFFFFF" Effect="none" Size="42" Italic="no">
<Subtitle SpotNumber="1" TimeIn="00:00:02:00" TimeOut="00:00:05:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Welcome to TextCast.</Text>
</Subtitle>
<Subtitle SpotNumber="2" TimeIn="00:00:06:00" TimeOut="00:00:09:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Text transmitted over Auracast BLE.</Text>
</Subtitle>
<Subtitle SpotNumber="3" TimeIn="00:00:10:00" TimeOut="00:00:13:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">No LC3 audio codec involved.</Text>
</Subtitle>
<Subtitle SpotNumber="4" TimeIn="00:00:14:00" TimeOut="00:00:17:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Raw ISO SDUs carry UTF-8 text.</Text>
</Subtitle>
<Subtitle SpotNumber="5" TimeIn="00:00:18:00" TimeOut="00:00:21:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">100 frames per second at 40 bytes.</Text>
</Subtitle>
<Subtitle SpotNumber="6" TimeIn="00:00:22:00" TimeOut="00:00:25:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Scrolling display on SH1106 OLED.</Text>
</Subtitle>
<Subtitle SpotNumber="7" TimeIn="00:00:26:00" TimeOut="00:00:29:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Each new line scrolls up the screen.</Text>
</Subtitle>
<Subtitle SpotNumber="8" TimeIn="00:00:30:00" TimeOut="00:00:33:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">The quick brown fox jumps over</Text>
</Subtitle>
<Subtitle SpotNumber="9" TimeIn="00:00:34:00" TimeOut="00:00:37:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">the lazy dog.</Text>
</Subtitle>
<Subtitle SpotNumber="10" TimeIn="00:00:38:00" TimeOut="00:00:41:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Speech-to-text output goes here.</Text>
</Subtitle>
<Subtitle SpotNumber="11" TimeIn="00:00:42:00" TimeOut="00:00:45:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Latency is dominated by BLE BIG.</Text>
</Subtitle>
<Subtitle SpotNumber="12" TimeIn="00:00:46:00" TimeOut="00:00:49:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Typical end-to-end: under 50 ms.</Text>
</Subtitle>
<Subtitle SpotNumber="13" TimeIn="00:00:50:00" TimeOut="00:00:53:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">One transmitter, many receivers.</Text>
</Subtitle>
<Subtitle SpotNumber="14" TimeIn="00:00:54:00" TimeOut="00:00:57:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">Built on Bumble and Zephyr RTOS.</Text>
</Subtitle>
<Subtitle SpotNumber="15" TimeIn="00:00:58:00" TimeOut="00:01:01:00" FadeUpTime="0" FadeDownTime="0">
<Text HAlign="center" VAlign="bottom">End of demonstration. Thank you.</Text>
</Subtitle>
</Font>
</DCSubtitle>
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
+155
View File
@@ -0,0 +1,155 @@
"""Text-over-Auracast transmitter.
Reads a DCP XML subtitle file and broadcasts each subtitle as raw ISO SDUs.
No LC3 encoding is used. The BIG is advertised with codec_id=LC3 (required
for BAP sync) but the SDU payload is plain UTF-8 text with a magic header.
Frame format (SDU_SIZE bytes total):
Byte 0 : TEXT_MAGIC (0xAA) identifies this as a text SDU
Byte 1 : text length N 0 means idle/clear
Bytes 2..N+1: UTF-8 text
Bytes N+2.. : zero padding to SDU_SIZE
Usage:
poetry run python -m auracast.text_multicast \\
--dcp ./auracast/testdata/sample_subtitles.xml \\
--transport serial:/dev/ttyAMA3,1000000,rtscts
"""
from __future__ import annotations
import argparse
import asyncio
import logging
import os
from auracast import auracast_config, multicast
from auracast.dcp_parser import parse_dcp_xml
TEXT_MAGIC = 0xAA
SDU_SIZE = 64 # octets_per_frame; 62 usable text bytes per frame
SDU_INTERVAL_US = 10_000 # 10 ms → 100 SDUs/sec
BROADCAST_NAME = 'TextCast'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s: %(message)s',
)
log = logging.getLogger('text_multicast')
def _make_text_frame(text: str) -> bytes:
"""Encode a subtitle string into a fixed-size TEXT SDU."""
text_bytes = text.encode('utf-8')[: SDU_SIZE - 2]
frame = bytes([TEXT_MAGIC, len(text_bytes)]) + text_bytes
return frame + bytes(SDU_SIZE - len(frame))
def _make_idle_frame() -> bytes:
"""Return an idle frame (magic=0, signals 'no active subtitle')."""
return bytes(SDU_SIZE)
async def _text_stream(bigs: dict, subtitles: list, loop: bool = True) -> None:
"""Main text streaming loop.
Writes one SDU every ~10 ms (flow-controlled by the BLE controller).
Subtitle timing is derived from the frame counter: frame N ≈ N × 10 ms.
When *loop* is True (default) the subtitle list repeats indefinitely.
"""
iso_queue = bigs['big0']['iso_queue']
frame_interval_s = SDU_INTERVAL_US / 1_000_000
frame_count = 0
sub_idx = 0
n = len(subtitles)
last_log_sub = -1
loop_count = 0
# Total duration of one pass: end of last subtitle + 2 s gap before restart
_loop_gap_s = 2.0
_pass_duration_s = subtitles[-1].time_out + _loop_gap_s if n > 0 else 0.0
log.info("Streaming %d subtitle(s) (loop=%s). Press Ctrl-C to stop.", n, loop)
while True:
now_s = frame_count * frame_interval_s
# Advance past subtitles whose time_out has passed
while sub_idx < n and now_s >= subtitles[sub_idx].time_out:
sub_idx += 1
# Determine what to send
if sub_idx < n and now_s >= subtitles[sub_idx].time_in:
frame = _make_text_frame(subtitles[sub_idx].text)
if sub_idx != last_log_sub:
log.info("[loop %d %05.1fs] %s", loop_count, now_s, subtitles[sub_idx].text)
last_log_sub = sub_idx
else:
frame = _make_idle_frame()
await iso_queue.write(frame)
frame_count += 1
# End of pass
if n > 0 and now_s >= _pass_duration_s:
if loop:
loop_count += 1
log.info("Loop %d complete restarting.", loop_count)
frame_count = 0
sub_idx = 0
last_log_sub = -1
else:
log.info("All subtitles transmitted. Exiting.")
break
async def broadcast_text(dcp_path: str, transport: str, loop: bool = True) -> None:
subtitles = parse_dcp_xml(dcp_path)
if not subtitles:
log.error("No subtitles found in %s", dcp_path)
return
log.info("Loaded %d subtitle(s) from %s", len(subtitles), dcp_path)
config = auracast_config.AuracastConfigGroup(
bigs=[
auracast_config.AuracastBigConfig(
name=BROADCAST_NAME,
program_info='Text Broadcast',
language='eng',
audio_source='file:dummy', # not used streamer loop is replaced
iso_que_len=4,
),
],
auracast_sampling_rate_hz=16000,
octets_per_frame=SDU_SIZE,
frame_duration_us=SDU_INTERVAL_US,
presentation_delay_us=40_000,
qos_config=auracast_config.AuracastQosRobust(),
transport=transport,
)
async with multicast.create_device(config) as device:
bigs = await multicast.init_broadcast(device, config, config.bigs)
await _text_stream(bigs, subtitles, loop=loop)
def main() -> None:
parser = argparse.ArgumentParser(description='Auracast text (subtitle) transmitter')
parser.add_argument('--dcp', required=True, help='Path to DCP XML subtitle file')
parser.add_argument(
'--transport',
default=os.environ.get(
'AURACAST_TRANSPORT',
'serial:/dev/ttyAMA3,1000000,rtscts',
),
help='Bumble HCI transport string (default: $AURACAST_TRANSPORT or ttyAMA3)',
)
parser.add_argument(
'--no-loop',
action='store_true',
help='Play subtitles once and exit instead of looping indefinitely',
)
args = parser.parse_args()
multicast.run_async(broadcast_text(args.dcp, args.transport, loop=not args.no_loop))
if __name__ == '__main__':
main()
+3 -3
View File
@@ -21,12 +21,12 @@ def read_lc3_file(filepath):
logging.info('frame_duration %s', frame_duration)
logging.info('stream_length %s', stream_length)
lc3_bytes= b''
chunks = []
while True:
b = f_lc3.read(2)
if b == b'':
break
lc3_frame_size = struct.unpack('=H', b)[0]
lc3_bytes += f_lc3.read(lc3_frame_size)
chunks.append(f_lc3.read(lc3_frame_size))
return lc3_bytes
return b''.join(chunks)
+270
View File
@@ -0,0 +1,270 @@
"""Vosk speech-to-text → TextCast streamer.
Captures mono audio from an analog ALSA/sounddevice input, runs Vosk
offline ASR in a background thread, and broadcasts recognised text over
the TextCast BLE broadcast using the same SDU framing as text_multicast.py.
Usage (CLI):
poetry run python -m auracast.vosk_textcast \\
--model /path/to/vosk-model-en-us \\
--device ch1 \\
--transport serial:/dev/ttyAMA3,1000000,rtscts
Environment:
VOSK_MODEL_PATH default Vosk model directory
AURACAST_TRANSPORT default HCI transport string
"""
from __future__ import annotations
import argparse
import asyncio
import json
import logging
import os
import threading
import time
from typing import Optional
import numpy as np
import samplerate
import sounddevice as sd
from auracast import auracast_config, multicast
from auracast.text_multicast import (
SDU_SIZE,
SDU_INTERVAL_US,
_make_text_frame,
_make_idle_frame,
)
log = logging.getLogger('vosk_textcast')
VOSK_SAMPLE_RATE = 16_000 # Vosk models expect 16 kHz
CAPTURE_SAMPLE_RATE = 48_000 # Hardware capture rate (always 48 kHz)
BLOCK_FRAMES_48K = 4800 # 100 ms blocks at 48 kHz → 1600 frames at 16 kHz
CAPTION_HOLD_S = 4.0 # Keep caption visible N seconds after last speech
BROADCAST_NAME = 'LiveCaption'
DEFAULT_MODEL_PATH = os.environ.get(
'VOSK_MODEL_PATH',
os.path.expanduser('~/vosk-model-en-us'),
)
def _tail_to_fit(text: str, max_bytes: int) -> str:
"""Return the tail of *text* that fits in *max_bytes* UTF-8 bytes."""
encoded = text.encode('utf-8')
if len(encoded) <= max_bytes:
return text
tail = encoded[-max_bytes:].decode('utf-8', errors='ignore')
sp = tail.find(' ')
return tail[sp + 1:] if sp != -1 else tail
def _new_words(old: str, new: str) -> str:
"""Return the words appended to *new* beyond the shared prefix with *old*.
If *new* doesn't start with *old* (different utterance), return *new* in full.
"""
old_words = old.split()
new_words = new.split()
if new_words[:len(old_words)] == old_words:
extra = new_words[len(old_words):]
return ' '.join(extra)
return new
def _resolve_device(device: str) -> Optional[int]:
"""Return sounddevice index for a name or numeric string, or None for default."""
if not device:
return None
if device.isdigit():
return int(device)
for i, d in enumerate(sd.query_devices()):
if d['name'] == device and d['max_input_channels'] > 0:
return i
log.warning("Device '%s' not found in sounddevice list using default input", device)
return None
async def _iso_write_loop(bigs: dict, shared: dict, lock: threading.Lock) -> None:
"""ISO SDU write loop.
Runs at ~10 ms per iteration (flow-controlled by the BLE controller).
Sends the current recognised text (partial or final) as-is.
"""
iso_queue = bigs['big0']['iso_queue']
last_sent: str = ''
while True:
now = time.monotonic()
with lock:
text: str = shared.get('text', '')
expiry: float = shared.get('expiry', 0.0)
if text and now < expiry:
display_text = _tail_to_fit(text, SDU_SIZE - 2)
if display_text != last_sent:
log.info("Caption: %s", display_text)
last_sent = display_text
frame = _make_text_frame(display_text)
else:
if last_sent:
log.info("Caption cleared")
last_sent = ''
with lock:
shared['text'] = ''
frame = _make_idle_frame()
await iso_queue.write(frame)
def _vosk_thread(
model_path: str,
device: str,
shared: dict,
lock: threading.Lock,
stop_event: threading.Event,
) -> None:
"""Blocking audio capture + Vosk recognition loop. Runs in a daemon thread."""
try:
from vosk import KaldiRecognizer, Model # type: ignore
except ImportError:
log.error("vosk is not installed. Run: poetry add vosk")
return
log.info("Loading Vosk model from %s", model_path)
model = Model(model_path)
rec = KaldiRecognizer(model, VOSK_SAMPLE_RATE)
rec.SetMaxAlternatives(0)
rec.SetWords(False)
resampler = samplerate.Resampler('sinc_fastest', channels=1)
ratio = VOSK_SAMPLE_RATE / CAPTURE_SAMPLE_RATE
dev_idx = _resolve_device(device)
last_word_count = [0] # word count of last partial sent to display
def _cb(indata: np.ndarray, frames: int, time_info, status) -> None:
if status:
log.warning("Audio status: %s", status)
if stop_event.is_set():
raise sd.CallbackStop()
# Resample 48 kHz → 16 kHz
mono = indata[:, 0].astype(np.float32)
downsampled = resampler.process(mono, ratio, end_of_input=False)
pcm16 = (downsampled * 32767).astype(np.int16).tobytes()
if rec.AcceptWaveform(pcm16):
result = json.loads(rec.Result())
final_text = result.get('text', '').strip()
if final_text:
log.info("Final: %s", final_text)
with lock:
shared['text'] = _tail_to_fit(final_text, SDU_SIZE - 2)
shared['expiry'] = time.monotonic() + CAPTION_HOLD_S
last_word_count[0] = 0 # reset for next sentence
else:
partial_text = json.loads(rec.PartialResult()).get('partial', '').strip()
if partial_text:
wc = len(partial_text.split())
if wc > last_word_count[0]: # new word arrived
last_word_count[0] = wc
with lock:
shared['text'] = _tail_to_fit(partial_text, SDU_SIZE - 2)
shared['expiry'] = time.monotonic() + CAPTION_HOLD_S
try:
with sd.InputStream(
samplerate=CAPTURE_SAMPLE_RATE,
blocksize=BLOCK_FRAMES_48K,
device=dev_idx,
dtype='float32',
channels=1,
callback=_cb,
):
log.info("Vosk listening on device '%s' (idx=%s) …", device, dev_idx)
stop_event.wait()
except Exception as exc:
log.error("Vosk audio thread error: %s", exc, exc_info=True)
async def broadcast_vosk(
transport: str,
model_path: str = DEFAULT_MODEL_PATH,
device: str = 'ch1',
) -> None:
"""Start a Vosk STT → TextCast broadcast. Runs until cancelled."""
model_path = os.path.expanduser(model_path)
if not os.path.exists(model_path):
raise FileNotFoundError(
f"Vosk model not found at '{model_path}'. "
"Download from https://alphacephei.com/vosk/models and set VOSK_MODEL_PATH."
)
config = auracast_config.AuracastConfigGroup(
bigs=[
auracast_config.AuracastBigConfig(
name=BROADCAST_NAME,
program_info='Live Captions',
language='eng',
audio_source='file:dummy',
iso_que_len=4,
),
],
auracast_sampling_rate_hz=16000,
octets_per_frame=SDU_SIZE,
frame_duration_us=SDU_INTERVAL_US,
presentation_delay_us=40_000,
qos_config=auracast_config.AuracastQosRobust(),
transport=transport,
)
shared: dict = {'text': '', 'expiry': 0.0}
lock = threading.Lock()
stop_event = threading.Event()
async with multicast.create_device(config) as ble_device:
bigs = await multicast.init_broadcast(ble_device, config, config.bigs)
t = threading.Thread(
target=_vosk_thread,
args=(model_path, device, shared, lock, stop_event),
daemon=True,
)
t.start()
log.info("VoskCast started (device=%s, model=%s)", device, model_path)
try:
await _iso_write_loop(bigs, shared, lock)
except asyncio.CancelledError:
log.info("VoskCast cancelled shutting down")
stop_event.set()
t.join(timeout=3.0)
raise
def main() -> None:
parser = argparse.ArgumentParser(description='Vosk STT → Auracast TextCast')
parser.add_argument(
'--model',
default=DEFAULT_MODEL_PATH,
help=f'Path to Vosk model directory (default: {DEFAULT_MODEL_PATH})',
)
parser.add_argument(
'--device',
default='ch1',
help='sounddevice input device name or index (default: ch1)',
)
parser.add_argument(
'--transport',
default=os.environ.get('AURACAST_TRANSPORT', 'serial:/dev/ttyAMA3,1000000,rtscts'),
help='Bumble HCI transport string',
)
args = parser.parse_args()
multicast.run_async(broadcast_vosk(args.transport, args.model, args.device))
if __name__ == '__main__':
main()
+11340 -11283
View File
File diff suppressed because it is too large Load Diff