feat: add gain control for USB and webapp microphone inputs with persistence - to be tested

This commit is contained in:
2025-06-18 16:34:28 +02:00
parent 22d27ce2f5
commit 0bf87c85b7
4 changed files with 200 additions and 93 deletions

View File

@@ -51,6 +51,7 @@ class AuracastBigConfig(BaseModel):
program_info: str = 'Some Announcements' program_info: str = 'Some Announcements'
audio_source: str = 'file:./auracast/announcement_48_10_96000_en.wav' audio_source: str = 'file:./auracast/announcement_48_10_96000_en.wav'
input_format: str = 'auto' input_format: str = 'auto'
input_gain: float | None = None # Parsed from audio_source for device inputs
loop: bool = True loop: bool = True
precode_wav: bool = False precode_wav: bool = False
iso_que_len: int = 64 iso_que_len: int = 64

View File

@@ -452,97 +452,135 @@ class Streamer():
lc3_frames = itertools.cycle(lc3_frames) lc3_frames = itertools.cycle(lc3_frames)
big['lc3_frames'] = lc3_frames big['lc3_frames'] = lc3_frames
# anything else, e.g. realtime stream from device (bumble) # anything else, e.g. realtime stream from device (bumble) or non-precoded file
else: else:
audio_input = await audio_io.create_audio_input(audio_source, input_format) current_big_config = self.big_config[i]
# Store early so stop_streaming can close even if open() fails audio_source_str = str(current_big_config.audio_source) # Ensure string type
big['audio_input'] = audio_input input_format_str = current_big_config.input_format
# SoundDeviceAudioInput (used for `mic:<device>` captures) has no `.rewind`. input_gain_val = current_big_config.input_gain
if hasattr(audio_input, "rewind"):
audio_input.rewind = big_config[i].loop audio_filter_for_create = None
effective_audio_source_for_create = audio_source_str
if audio_source_str.startswith('device:'):
parts = audio_source_str.split(':', 1)
if len(parts) > 1:
device_specifier_with_potential_gain = parts[1]
pure_device_name = device_specifier_with_potential_gain.split(',', 1)[0]
effective_audio_source_for_create = f"device:{pure_device_name}"
gain_to_apply = input_gain_val if input_gain_val is not None else 1.0
if abs(gain_to_apply - 1.0) > 0.01:
audio_filter_for_create = f"volume={gain_to_apply:.2f}"
logger.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_for_create}")
elif audio_source_str.startswith('file:'):
gain_to_apply = input_gain_val if input_gain_val is not None else 1.0
if abs(gain_to_apply - 1.0) > 0.01:
audio_filter_for_create = f"volume={gain_to_apply:.2f}"
logger.info(f"Applying FFmpeg volume filter for {audio_source_str}: {audio_filter_for_create}")
# Prepare the source string, potentially with an FFmpeg filter
final_audio_source_spec = effective_audio_source_for_create
if current_big_config.input_gain is not None and input_format_str == 'ffmpeg': # Apply gain only if ffmpeg is used
audio_filter_value = f"volume={current_big_config.input_gain:.2f}"
logging.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_value}")
# Append 'af' (audio filter) option to the source spec for FFmpeg
if '?' in final_audio_source_spec: # if there are already ffmpeg options (e.g. sample_rate)
final_audio_source_spec = f"{final_audio_source_spec}&af={audio_filter_value}"
else: # if this is the first ffmpeg option
final_audio_source_spec = f"{final_audio_source_spec},af={audio_filter_value}"
# Initial creation of audio_input
audio_input = await audio_io.create_audio_input(
final_audio_source_spec,
input_format=input_format_str
)
big['audio_input'] = audio_input # Store early for potential cleanup
if hasattr(audio_input, "rewind"):
audio_input.rewind = current_big_config.loop
# Retry logic ALSA sometimes keeps the device busy for a short time after the
# previous stream has closed. Handle PortAudioError -9985 with back-off retries.
import sounddevice as _sd import sounddevice as _sd
max_attempts = 3 max_attempts = 3
pcm_format = None # Initialize pcm_format
for attempt in range(1, max_attempts + 1): for attempt in range(1, max_attempts + 1):
try: try:
logging.info(f"Attempting to open audio input: {effective_audio_source_for_create} (attempt {attempt})")
pcm_format = await audio_input.open() pcm_format = await audio_input.open()
logging.info(f"Successfully opened audio input: {effective_audio_source_for_create}, PCM Format: {pcm_format}")
break # success break # success
except _sd.PortAudioError as err: except _sd.PortAudioError as err:
# -9985 == paDeviceUnavailable logging.error('Could not open audio device %s with error %s (attempt %d/%d)', effective_audio_source_for_create, err, attempt, max_attempts)
logging.error('Could not open audio device %s with error %s', audio_source, err) code = getattr(err, 'errno', None) or (err.args[1] if len(err.args) > 1 and isinstance(err.args[1], int) else None)
code = None if code == -9985 and attempt < max_attempts: # paDeviceUnavailable
if hasattr(err, 'errno'): backoff_ms = (2 ** (attempt - 1)) * 100 # exponential backoff
code = err.errno logging.warning("PortAudio device busy. Retrying in %.1f ms…", backoff_ms)
elif len(err.args) > 1 and isinstance(err.args[1], int):
code = err.args[1]
if code == -9985 and attempt < max_attempts:
backoff_ms = 200 * attempt
logging.warning("PortAudio device busy (attempt %d/%d). Retrying in %.1f ms…", attempt, max_attempts, backoff_ms)
# ensure device handle and PortAudio context are closed before retrying
try: try:
if hasattr(audio_input, "aclose"): if hasattr(audio_input, "aclose"): await audio_input.aclose()
await audio_input.aclose() elif hasattr(audio_input, "close"): audio_input.close()
elif hasattr(audio_input, "close"): except Exception as close_err: logging.debug(f"Error closing audio_input during retry: {close_err}")
audio_input.close() if hasattr(_sd, "_terminate"): # sounddevice specific cleanup
except Exception: try: _sd._terminate()
pass except Exception as term_err: logging.debug(f"Error terminating PortAudio: {term_err}")
# Fully terminate PortAudio to drop lingering handles (sounddevice quirk)
if hasattr(_sd, "_terminate"):
try:
_sd._terminate()
except Exception:
pass
# Small pause then re-initialize PortAudio
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
if hasattr(_sd, "_initialize"): if hasattr(_sd, "_initialize"): # sounddevice specific reinit
try: try: _sd._initialize()
_sd._initialize() except Exception as init_err: logging.debug(f"Error initializing PortAudio: {init_err}")
except Exception:
pass
# Back-off before next attempt
await asyncio.sleep(backoff_ms / 1000) await asyncio.sleep(backoff_ms / 1000)
# Recreate audio_input fresh for next attempt # Recreate audio_input for next attempt, using the potentially modified source spec
audio_input = await audio_io.create_audio_input(audio_source, input_format) audio_input = await audio_io.create_audio_input(
final_audio_source_spec, # Use the spec that includes the filter if applicable
input_format=input_format_str
)
big['audio_input'] = audio_input # Update stored reference
if hasattr(audio_input, "rewind"):
audio_input.rewind = current_big_config.loop
continue continue
# Other errors or final attempt re-raise so caller can abort gracefully raise # Re-raise if not paDeviceUnavailable or max_attempts reached
raise except Exception as e:
else: logging.error(f"Unexpected error opening audio device {effective_audio_source_for_create}: {e}")
# Loop exhausted without break raise # Re-raise other unexpected errors
logging.error("Unable to open audio device after %d attempts giving up", max_attempts) else: # else for 'for' loop: if loop finished without break
logging.error("Unable to open audio device '%s' after %d attempts giving up.", effective_audio_source_for_create, max_attempts)
return # Or handle error more gracefully, e.g. mark BIG as inactive
# Proceed with encoder setup if pcm_format was obtained
if not pcm_format:
logging.error(f"Failed to obtain PCM format for {effective_audio_source_for_create}. Cannot set up encoder.")
return return
if pcm_format.channels != 1: if pcm_format.channels != 1:
logging.info("Input device provides %d channels will down-mix to mono for LC3", pcm_format.channels) logging.info("Input device '%s' provides %d channels will down-mix to mono for LC3", effective_audio_source_for_create, pcm_format.channels)
if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16: # Downmixing is typically handled by FFmpeg if channels > 1 and output is mono
pcm_bit_depth = 16 # For LC3, we always want mono, so this is informational.
elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32:
pcm_bit_depth = None # Determine pcm_bit_depth for encoder based on pcm_format.sample_type
else: if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16:
logging.error("Only INT16 and FLOAT32 sample types are supported") pcm_bit_depth = 16
return elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32:
encoder = lc3.Encoder( pcm_bit_depth = None # LC3 encoder can handle float32 directly
frame_duration_us=global_config.frame_duration_us, else:
sample_rate_hz=global_config.auracast_sampling_rate_hz, logging.error("Unsupported PCM sample type: %s for %s. Only INT16 and FLOAT32 are supported.", pcm_format.sample_type, effective_audio_source_for_create)
num_channels=1, return
input_sample_rate_hz=pcm_format.sample_rate,
)
lc3_frame_samples = encoder.get_frame_samples() # number of the pcm samples per lc3 frame
big['pcm_bit_depth'] = pcm_bit_depth encoder = lc3.Encoder(
big['channels'] = pcm_format.channels frame_duration_us=self.global_config.frame_duration_us,
big['lc3_frame_samples'] = lc3_frame_samples sample_rate_hz=self.global_config.auracast_sampling_rate_hz,
big['lc3_bytes_per_frame'] = global_config.octets_per_frame num_channels=1, # LC3 is mono
big['audio_input'] = audio_input input_sample_rate_hz=pcm_format.sample_rate,
big['encoder'] = encoder )
big['precoded'] = False lc3_frame_samples = encoder.get_frame_samples()
big['pcm_bit_depth'] = pcm_bit_depth
big['lc3_frame_samples'] = lc3_frame_samples
big['lc3_bytes_per_frame'] = self.global_config.octets_per_frame
big['encoder'] = encoder
big['precoded'] = False
logging.info("Streaming audio...") logging.info("Streaming audio...")
bigs = self.bigs bigs = self.bigs
self.is_streaming = True self.is_streaming = True
logging.info("Entering main streaming loop...")
# One streamer fits all # One streamer fits all
while self.is_streaming: while self.is_streaming:
stream_finished = [False for _ in range(len(bigs))] stream_finished = [False for _ in range(len(bigs))]
@@ -557,7 +595,9 @@ class Streamer():
stream_finished[i] = True stream_finished[i] = True
continue continue
else: # code lc3 on the fly else: # code lc3 on the fly
logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Attempting to read pcm_frame.")
pcm_frame = await anext(big['audio_input'].frames(big['lc3_frame_samples']), None) pcm_frame = await anext(big['audio_input'].frames(big['lc3_frame_samples']), None)
logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Read pcm_frame: {'None' if pcm_frame is None else f'type {type(pcm_frame)}, len {len(pcm_frame)} bytes' if isinstance(pcm_frame, bytes) else f'type {type(pcm_frame)}, shape {pcm_frame.shape}' if hasattr(pcm_frame, 'shape') else f'type {type(pcm_frame)}'}")
if pcm_frame is None: # Not all streams may stop at the same time if pcm_frame is None: # Not all streams may stop at the same time
stream_finished[i] = True stream_finished[i] = True

View File

@@ -14,14 +14,27 @@ PTIME = 40
BACKEND_URL = "http://localhost:5000" BACKEND_URL = "http://localhost:5000"
# Try loading persisted settings from backend # Try loading persisted settings from backend
# This is the correct place to define saved_settings before it's used for defaults
saved_settings = {} saved_settings = {}
try: try:
resp = requests.get(f"{BACKEND_URL}/status", timeout=1) resp = requests.get(f"{BACKEND_URL}/status", timeout=1)
if resp.status_code == 200: if resp.status_code == 200:
saved_settings = resp.json() saved_settings = resp.json()
except Exception: except Exception:
# If backend is not available or error, saved_settings will be empty dict
# Defaults will be used for gain values in this case.
saved_settings = {} saved_settings = {}
# Initialize gain session states
# This must come AFTER saved_settings is populated.
default_webapp_gain = float(saved_settings.get('webapp_mic_gain', 1.0))
if 'webapp_mic_gain' not in st.session_state:
st.session_state.webapp_mic_gain = default_webapp_gain
default_usb_gain = float(saved_settings.get('usb_mic_gain', 1.0))
if 'usb_mic_gain' not in st.session_state:
st.session_state.usb_mic_gain = default_usb_gain
st.title("🎙️ Auracast Audio Mode Control") st.title("🎙️ Auracast Audio Mode Control")
# Audio mode selection with persisted default # Audio mode selection with persisted default
@@ -53,9 +66,12 @@ if audio_mode in ["Webapp", "USB"]:
language = st.text_input("Language (ISO 639-3)", value=default_lang) language = st.text_input("Language (ISO 639-3)", value=default_lang)
# Gain slider for Webapp mode # Gain slider for Webapp mode
if audio_mode == "Webapp": if audio_mode == "Webapp":
mic_gain = st.slider("Microphone Gain", 0.0, 4.0, 1.0, 0.1, help="Adjust microphone volume sent to Auracast") st.session_state.webapp_mic_gain = st.slider(
else: "Microphone Gain", 0.0, 4.0, st.session_state.webapp_mic_gain, 0.1,
mic_gain = 1.0 help="Adjust microphone volume sent to Auracast (applied by browser)"
)
# For USB mode, gain slider is defined below.
# The variable 'mic_gain' for JS is sourced from st.session_state.webapp_mic_gain within Webapp mode logic.
# Input device selection for USB mode # Input device selection for USB mode
if audio_mode == "USB": if audio_mode == "USB":
@@ -87,6 +103,16 @@ if audio_mode in ["Webapp", "USB"]:
st.rerun() st.rerun()
# We send only the numeric/card identifier (before :) or 'default' # We send only the numeric/card identifier (before :) or 'default'
input_device = selected_option.split(":", 1)[0] if ":" in selected_option else selected_option input_device = selected_option.split(":", 1)[0] if ":" in selected_option else selected_option
# USB Microphone Gain Slider
st.session_state.usb_mic_gain = st.slider(
"Microphone Gain (USB)",
min_value=0.0,
max_value=4.0,
value=st.session_state.usb_mic_gain, # Use session state value
step=0.1,
help="Adjust microphone volume for USB input (applied by server)"
)
else: else:
input_device = None input_device = None
start_stream = st.button("Start Auracast") start_stream = st.button("Start Auracast")
@@ -96,7 +122,7 @@ if audio_mode in ["Webapp", "USB"]:
if audio_mode == "Webapp" and st.session_state.get('stream_started'): if audio_mode == "Webapp" and st.session_state.get('stream_started'):
update_js = f""" update_js = f"""
<script> <script>
if (window.gainNode) {{ window.gainNode.gain.value = {mic_gain}; }} if (window.gainNode) {{ window.gainNode.gain.value = {st.session_state.webapp_mic_gain}; }}
</script> </script>
""" """
st.components.v1.html(update_js, height=0) st.components.v1.html(update_js, height=0)
@@ -122,6 +148,17 @@ if audio_mode in ["Webapp", "USB"]:
import time; time.sleep(1) import time; time.sleep(1)
# Prepare config using the model (do NOT send qos_config, only relevant fields) # Prepare config using the model (do NOT send qos_config, only relevant fields)
q = quality_map[quality] q = quality_map[quality]
# Determine audio_source based on mode and gain settings
if audio_mode == "USB":
current_usb_gain = st.session_state.get('usb_mic_gain', 1.0) # Use .get for safety
audio_source_str = f"device:{input_device},gain={current_usb_gain}"
elif audio_mode == "Webapp":
audio_source_str = "webrtc"
# Webapp gain is handled client-side by JS using st.session_state.webapp_mic_gain
else: # Assuming a 'network' mode or other future modes
audio_source_str = "network" # Default or handle other modes
config = auracast_config.AuracastConfigGroup( config = auracast_config.AuracastConfigGroup(
auracast_sampling_rate_hz=q['rate'], auracast_sampling_rate_hz=q['rate'],
octets_per_frame=q['octets'], octets_per_frame=q['octets'],
@@ -131,11 +168,7 @@ if audio_mode in ["Webapp", "USB"]:
name=stream_name, name=stream_name,
program_info=f"{stream_name} {quality}", program_info=f"{stream_name} {quality}",
language=language, language=language,
audio_source=( audio_source=audio_source_str, # Use the constructed string
f"device:{input_device}" if audio_mode == "USB" else (
"webrtc" if audio_mode == "Webapp" else "network"
)
),
input_format=(f"int16le,{q['rate']},1" if audio_mode == "USB" else "auto"), input_format=(f"int16le,{q['rate']},1" if audio_mode == "USB" else "auto"),
iso_que_len=1, # TODO: this should be way less to decrease delay iso_que_len=1, # TODO: this should be way less to decrease delay
sampling_frequency=q['rate'], sampling_frequency=q['rate'],
@@ -160,7 +193,7 @@ if audio_mode in ["Webapp", "USB"]:
(async () => {{ (async () => {{
if (window.webrtc_started) return; // Prevent re-init on rerun if (window.webrtc_started) return; // Prevent re-init on rerun
window.webrtc_started = true; window.webrtc_started = true;
const GAIN_VALUE = {mic_gain}; const GAIN_VALUE = {st.session_state.webapp_mic_gain};
const pc = new RTCPeerConnection(); // No STUN needed for localhost const pc = new RTCPeerConnection(); // No STUN needed for localhost
const micStream = await navigator.mediaDevices.getUserMedia({{audio:true}}); const micStream = await navigator.mediaDevices.getUserMedia({{audio:true}});
// Create Web Audio gain processing // Create Web Audio gain processing

View File

@@ -4,7 +4,7 @@ import logging as log
import uuid import uuid
import json import json
import sys import sys
from datetime import datetime from datetime import datetime, timezone
import asyncio import asyncio
import numpy as np import numpy as np
from pydantic import BaseModel from pydantic import BaseModel
@@ -88,25 +88,58 @@ async def initialize(conf: auracast_config.AuracastConfigGroup):
# initialize the streams dict # initialize the streams dict
# persist stream settings for later retrieval # persist stream settings for later retrieval
# Derive audio_mode from first BIG audio_source # Derive audio_mode from first BIG audio_source and parse gain for all device sources
first_source = conf.bigs[0].audio_source if conf.bigs else '' audio_mode_persist = 'Network' # Default
if first_source.startswith('device:'): input_device_persist = None # Default for saving settings
audio_mode_persist = 'USB'
if conf.bigs:
first_big = conf.bigs[0]
# Determine audio_mode for saving settings based on the first BIG
if first_big.audio_source.startswith('device:'):
audio_mode_persist = 'USB'
# For saving settings, just get the device ID part from the first BIG
device_id_part = first_big.audio_source.split(':', 1)[1].split(',', 1)[0]
input_device_persist = device_id_part
elif first_big.audio_source == 'webrtc':
audio_mode_persist = 'Webapp'
# Parse gain for all BIGs that are device inputs
for big_config in conf.bigs:
if big_config.audio_source.startswith('device:'):
parts = big_config.audio_source.split(':', 1)[1].split(',')
device_id = parts[0]
gain_value = 1.0 # Default gain
if len(parts) > 1:
for part in parts[1:]:
if part.startswith('gain='):
try:
gain_value = float(part.split('=')[1])
except ValueError:
log.warning(f"Invalid gain value in audio_source: {part}. Using default 1.0.")
gain_value = 1.0
break # Found gain, no need to check other parts
big_config.input_gain = gain_value
# Update audio_source to only contain the device ID for Multicaster compatibility if needed
# For now, let's assume Multicaster will handle the full string or we adapt it later.
# big_config.audio_source = f"device:{device_id}" # Optional: simplify for downstream if it doesn't parse gain
input_device = first_source.split(':', 1)[1] if ':' in first_source else 'default'
elif first_source == 'webrtc':
audio_mode_persist = 'Webapp'
input_device = None
else:
audio_mode_persist = 'Network'
input_device = None
save_stream_settings({ save_stream_settings({
'channel_names': [big.name for big in conf.bigs], 'channel_names': [big.name for big in conf.bigs],
'languages': [big.language for big in conf.bigs], 'languages': [big.language for big in conf.bigs],
'audio_mode': audio_mode_persist, 'audio_mode': audio_mode_persist,
'input_device': input_device, 'input_device': input_device_persist, # Use the parsed device ID for saving
'timestamp': datetime.utcnow().isoformat() 'webapp_mic_gain': load_stream_settings().get('webapp_mic_gain', 1.0), # Preserve existing webapp gain
'usb_mic_gain': load_stream_settings().get('usb_mic_gain', 1.0), # Preserve existing usb gain
'timestamp': datetime.now(timezone.utc).isoformat()
}) })
# Persist the specific gain value that was just used for USB mode if applicable
if audio_mode_persist == 'USB' and conf.bigs and conf.bigs[0].input_gain is not None:
current_settings = load_stream_settings()
current_settings['usb_mic_gain'] = conf.bigs[0].input_gain
# Ensure timestamp is also updated if we are re-saving
current_settings['timestamp'] = datetime.now(timezone.utc).isoformat()
save_stream_settings(current_settings)
global_config_group = conf global_config_group = conf
# If there is an existing multicaster, cleanly shut it down first so audio devices are released # If there is an existing multicaster, cleanly shut it down first so audio devices are released
if multicaster is not None: if multicaster is not None: