diff --git a/src/auracast/auracast_config.py b/src/auracast/auracast_config.py index 1be73e7..a732393 100644 --- a/src/auracast/auracast_config.py +++ b/src/auracast/auracast_config.py @@ -51,7 +51,6 @@ class AuracastBigConfig(BaseModel): program_info: str = 'Some Announcements' audio_source: str = 'file:./auracast/announcement_48_10_96000_en.wav' input_format: str = 'auto' - input_gain: float | None = None # Parsed from audio_source for device inputs loop: bool = True precode_wav: bool = False iso_que_len: int = 64 diff --git a/src/auracast/multicast.py b/src/auracast/multicast.py index e37606b..c9aa32b 100644 --- a/src/auracast/multicast.py +++ b/src/auracast/multicast.py @@ -452,135 +452,97 @@ class Streamer(): lc3_frames = itertools.cycle(lc3_frames) big['lc3_frames'] = lc3_frames - # anything else, e.g. realtime stream from device (bumble) or non-precoded file + # anything else, e.g. realtime stream from device (bumble) else: - current_big_config = self.big_config[i] - audio_source_str = str(current_big_config.audio_source) # Ensure string type - input_format_str = current_big_config.input_format - input_gain_val = current_big_config.input_gain - - audio_filter_for_create = None - effective_audio_source_for_create = audio_source_str - - if audio_source_str.startswith('device:'): - parts = audio_source_str.split(':', 1) - if len(parts) > 1: - device_specifier_with_potential_gain = parts[1] - pure_device_name = device_specifier_with_potential_gain.split(',', 1)[0] - effective_audio_source_for_create = f"device:{pure_device_name}" - - gain_to_apply = input_gain_val if input_gain_val is not None else 1.0 - if abs(gain_to_apply - 1.0) > 0.01: - audio_filter_for_create = f"volume={gain_to_apply:.2f}" - logger.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_for_create}") - elif audio_source_str.startswith('file:'): - gain_to_apply = input_gain_val if input_gain_val is not None else 1.0 - if abs(gain_to_apply - 1.0) > 0.01: - audio_filter_for_create = f"volume={gain_to_apply:.2f}" - logger.info(f"Applying FFmpeg volume filter for {audio_source_str}: {audio_filter_for_create}") - - # Prepare the source string, potentially with an FFmpeg filter - final_audio_source_spec = effective_audio_source_for_create - if current_big_config.input_gain is not None and input_format_str == 'ffmpeg': # Apply gain only if ffmpeg is used - audio_filter_value = f"volume={current_big_config.input_gain:.2f}" - logging.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_value}") - # Append 'af' (audio filter) option to the source spec for FFmpeg - if '?' in final_audio_source_spec: # if there are already ffmpeg options (e.g. sample_rate) - final_audio_source_spec = f"{final_audio_source_spec}&af={audio_filter_value}" - else: # if this is the first ffmpeg option - final_audio_source_spec = f"{final_audio_source_spec},af={audio_filter_value}" - - # Initial creation of audio_input - audio_input = await audio_io.create_audio_input( - final_audio_source_spec, - input_format=input_format_str - ) - big['audio_input'] = audio_input # Store early for potential cleanup - + audio_input = await audio_io.create_audio_input(audio_source, input_format) + # Store early so stop_streaming can close even if open() fails + big['audio_input'] = audio_input + # SoundDeviceAudioInput (used for `mic:` captures) has no `.rewind`. if hasattr(audio_input, "rewind"): - audio_input.rewind = current_big_config.loop + audio_input.rewind = big_config[i].loop + # Retry logic – ALSA sometimes keeps the device busy for a short time after the + # previous stream has closed. Handle PortAudioError -9985 with back-off retries. import sounddevice as _sd max_attempts = 3 - pcm_format = None # Initialize pcm_format for attempt in range(1, max_attempts + 1): try: - logging.info(f"Attempting to open audio input: {effective_audio_source_for_create} (attempt {attempt})") pcm_format = await audio_input.open() - logging.info(f"Successfully opened audio input: {effective_audio_source_for_create}, PCM Format: {pcm_format}") break # success except _sd.PortAudioError as err: - logging.error('Could not open audio device %s with error %s (attempt %d/%d)', effective_audio_source_for_create, err, attempt, max_attempts) - code = getattr(err, 'errno', None) or (err.args[1] if len(err.args) > 1 and isinstance(err.args[1], int) else None) - if code == -9985 and attempt < max_attempts: # paDeviceUnavailable - backoff_ms = (2 ** (attempt - 1)) * 100 # exponential backoff - logging.warning("PortAudio device busy. Retrying in %.1f ms…", backoff_ms) + # -9985 == paDeviceUnavailable + logging.error('Could not open audio device %s with error %s', audio_source, err) + code = None + if hasattr(err, 'errno'): + code = err.errno + elif len(err.args) > 1 and isinstance(err.args[1], int): + code = err.args[1] + if code == -9985 and attempt < max_attempts: + backoff_ms = 200 * attempt + logging.warning("PortAudio device busy (attempt %d/%d). Retrying in %.1f ms…", attempt, max_attempts, backoff_ms) + # ensure device handle and PortAudio context are closed before retrying try: - if hasattr(audio_input, "aclose"): await audio_input.aclose() - elif hasattr(audio_input, "close"): audio_input.close() - except Exception as close_err: logging.debug(f"Error closing audio_input during retry: {close_err}") - if hasattr(_sd, "_terminate"): # sounddevice specific cleanup - try: _sd._terminate() - except Exception as term_err: logging.debug(f"Error terminating PortAudio: {term_err}") + if hasattr(audio_input, "aclose"): + await audio_input.aclose() + elif hasattr(audio_input, "close"): + audio_input.close() + except Exception: + pass + # Fully terminate PortAudio to drop lingering handles (sounddevice quirk) + if hasattr(_sd, "_terminate"): + try: + _sd._terminate() + except Exception: + pass + # Small pause then re-initialize PortAudio await asyncio.sleep(0.1) - if hasattr(_sd, "_initialize"): # sounddevice specific reinit - try: _sd._initialize() - except Exception as init_err: logging.debug(f"Error initializing PortAudio: {init_err}") - await asyncio.sleep(backoff_ms / 1000) - # Recreate audio_input for next attempt, using the potentially modified source spec - audio_input = await audio_io.create_audio_input( - final_audio_source_spec, # Use the spec that includes the filter if applicable - input_format=input_format_str - ) - big['audio_input'] = audio_input # Update stored reference - if hasattr(audio_input, "rewind"): - audio_input.rewind = current_big_config.loop - continue - raise # Re-raise if not paDeviceUnavailable or max_attempts reached - except Exception as e: - logging.error(f"Unexpected error opening audio device {effective_audio_source_for_create}: {e}") - raise # Re-raise other unexpected errors - else: # else for 'for' loop: if loop finished without break - logging.error("Unable to open audio device '%s' after %d attempts – giving up.", effective_audio_source_for_create, max_attempts) - return # Or handle error more gracefully, e.g. mark BIG as inactive + if hasattr(_sd, "_initialize"): + try: + _sd._initialize() + except Exception: + pass - # Proceed with encoder setup if pcm_format was obtained - if not pcm_format: - logging.error(f"Failed to obtain PCM format for {effective_audio_source_for_create}. Cannot set up encoder.") + # Back-off before next attempt + await asyncio.sleep(backoff_ms / 1000) + # Recreate audio_input fresh for next attempt + audio_input = await audio_io.create_audio_input(audio_source, input_format) + continue + # Other errors or final attempt – re-raise so caller can abort gracefully + raise + else: + # Loop exhausted without break + logging.error("Unable to open audio device after %d attempts – giving up", max_attempts) return if pcm_format.channels != 1: - logging.info("Input device '%s' provides %d channels – will down-mix to mono for LC3", effective_audio_source_for_create, pcm_format.channels) - # Downmixing is typically handled by FFmpeg if channels > 1 and output is mono - # For LC3, we always want mono, so this is informational. - - # Determine pcm_bit_depth for encoder based on pcm_format.sample_type - if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16: - pcm_bit_depth = 16 - elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32: - pcm_bit_depth = None # LC3 encoder can handle float32 directly - else: - logging.error("Unsupported PCM sample type: %s for %s. Only INT16 and FLOAT32 are supported.", pcm_format.sample_type, effective_audio_source_for_create) - return + logging.info("Input device provides %d channels – will down-mix to mono for LC3", pcm_format.channels) + if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16: + pcm_bit_depth = 16 + elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32: + pcm_bit_depth = None + else: + logging.error("Only INT16 and FLOAT32 sample types are supported") + return + encoder = lc3.Encoder( + frame_duration_us=global_config.frame_duration_us, + sample_rate_hz=global_config.auracast_sampling_rate_hz, + num_channels=1, + input_sample_rate_hz=pcm_format.sample_rate, + ) + lc3_frame_samples = encoder.get_frame_samples() # number of the pcm samples per lc3 frame - encoder = lc3.Encoder( - frame_duration_us=self.global_config.frame_duration_us, - sample_rate_hz=self.global_config.auracast_sampling_rate_hz, - num_channels=1, # LC3 is mono - input_sample_rate_hz=pcm_format.sample_rate, - ) - lc3_frame_samples = encoder.get_frame_samples() - big['pcm_bit_depth'] = pcm_bit_depth - big['lc3_frame_samples'] = lc3_frame_samples - big['lc3_bytes_per_frame'] = self.global_config.octets_per_frame - big['encoder'] = encoder - big['precoded'] = False + big['pcm_bit_depth'] = pcm_bit_depth + big['channels'] = pcm_format.channels + big['lc3_frame_samples'] = lc3_frame_samples + big['lc3_bytes_per_frame'] = global_config.octets_per_frame + big['audio_input'] = audio_input + big['encoder'] = encoder + big['precoded'] = False logging.info("Streaming audio...") bigs = self.bigs self.is_streaming = True - logging.info("Entering main streaming loop...") # One streamer fits all while self.is_streaming: stream_finished = [False for _ in range(len(bigs))] @@ -595,9 +557,7 @@ class Streamer(): stream_finished[i] = True continue else: # code lc3 on the fly - logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Attempting to read pcm_frame.") pcm_frame = await anext(big['audio_input'].frames(big['lc3_frame_samples']), None) - logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Read pcm_frame: {'None' if pcm_frame is None else f'type {type(pcm_frame)}, len {len(pcm_frame)} bytes' if isinstance(pcm_frame, bytes) else f'type {type(pcm_frame)}, shape {pcm_frame.shape}' if hasattr(pcm_frame, 'shape') else f'type {type(pcm_frame)}'}") if pcm_frame is None: # Not all streams may stop at the same time stream_finished[i] = True diff --git a/src/auracast/server/multicast_frontend.py b/src/auracast/server/multicast_frontend.py index 58f66ee..7a09ac9 100644 --- a/src/auracast/server/multicast_frontend.py +++ b/src/auracast/server/multicast_frontend.py @@ -14,27 +14,14 @@ PTIME = 40 BACKEND_URL = "http://localhost:5000" # Try loading persisted settings from backend -# This is the correct place to define saved_settings before it's used for defaults saved_settings = {} try: resp = requests.get(f"{BACKEND_URL}/status", timeout=1) if resp.status_code == 200: saved_settings = resp.json() except Exception: - # If backend is not available or error, saved_settings will be empty dict - # Defaults will be used for gain values in this case. saved_settings = {} -# Initialize gain session states -# This must come AFTER saved_settings is populated. -default_webapp_gain = float(saved_settings.get('webapp_mic_gain', 1.0)) -if 'webapp_mic_gain' not in st.session_state: - st.session_state.webapp_mic_gain = default_webapp_gain - -default_usb_gain = float(saved_settings.get('usb_mic_gain', 1.0)) -if 'usb_mic_gain' not in st.session_state: - st.session_state.usb_mic_gain = default_usb_gain - st.title("🎙️ Auracast Audio Mode Control") # Audio mode selection with persisted default @@ -66,12 +53,9 @@ if audio_mode in ["Webapp", "USB"]: language = st.text_input("Language (ISO 639-3)", value=default_lang) # Gain slider for Webapp mode if audio_mode == "Webapp": - st.session_state.webapp_mic_gain = st.slider( - "Microphone Gain", 0.0, 4.0, st.session_state.webapp_mic_gain, 0.1, - help="Adjust microphone volume sent to Auracast (applied by browser)" - ) - # For USB mode, gain slider is defined below. - # The variable 'mic_gain' for JS is sourced from st.session_state.webapp_mic_gain within Webapp mode logic. + mic_gain = st.slider("Microphone Gain", 0.0, 4.0, 1.0, 0.1, help="Adjust microphone volume sent to Auracast") + else: + mic_gain = 1.0 # Input device selection for USB mode if audio_mode == "USB": @@ -103,16 +87,6 @@ if audio_mode in ["Webapp", "USB"]: st.rerun() # We send only the numeric/card identifier (before :) or 'default' input_device = selected_option.split(":", 1)[0] if ":" in selected_option else selected_option - - # USB Microphone Gain Slider - st.session_state.usb_mic_gain = st.slider( - "Microphone Gain (USB)", - min_value=0.0, - max_value=4.0, - value=st.session_state.usb_mic_gain, # Use session state value - step=0.1, - help="Adjust microphone volume for USB input (applied by server)" - ) else: input_device = None start_stream = st.button("Start Auracast") @@ -122,7 +96,7 @@ if audio_mode in ["Webapp", "USB"]: if audio_mode == "Webapp" and st.session_state.get('stream_started'): update_js = f""" """ st.components.v1.html(update_js, height=0) @@ -148,17 +122,6 @@ if audio_mode in ["Webapp", "USB"]: import time; time.sleep(1) # Prepare config using the model (do NOT send qos_config, only relevant fields) q = quality_map[quality] - - # Determine audio_source based on mode and gain settings - if audio_mode == "USB": - current_usb_gain = st.session_state.get('usb_mic_gain', 1.0) # Use .get for safety - audio_source_str = f"device:{input_device},gain={current_usb_gain}" - elif audio_mode == "Webapp": - audio_source_str = "webrtc" - # Webapp gain is handled client-side by JS using st.session_state.webapp_mic_gain - else: # Assuming a 'network' mode or other future modes - audio_source_str = "network" # Default or handle other modes - config = auracast_config.AuracastConfigGroup( auracast_sampling_rate_hz=q['rate'], octets_per_frame=q['octets'], @@ -168,7 +131,11 @@ if audio_mode in ["Webapp", "USB"]: name=stream_name, program_info=f"{stream_name} {quality}", language=language, - audio_source=audio_source_str, # Use the constructed string + audio_source=( + f"device:{input_device}" if audio_mode == "USB" else ( + "webrtc" if audio_mode == "Webapp" else "network" + ) + ), input_format=(f"int16le,{q['rate']},1" if audio_mode == "USB" else "auto"), iso_que_len=1, # TODO: this should be way less to decrease delay sampling_frequency=q['rate'], @@ -193,7 +160,7 @@ if audio_mode in ["Webapp", "USB"]: (async () => {{ if (window.webrtc_started) return; // Prevent re-init on rerun window.webrtc_started = true; - const GAIN_VALUE = {st.session_state.webapp_mic_gain}; + const GAIN_VALUE = {mic_gain}; const pc = new RTCPeerConnection(); // No STUN needed for localhost const micStream = await navigator.mediaDevices.getUserMedia({{audio:true}}); // Create Web Audio gain processing diff --git a/src/auracast/server/multicast_server.py b/src/auracast/server/multicast_server.py index a4b9779..06b69cb 100644 --- a/src/auracast/server/multicast_server.py +++ b/src/auracast/server/multicast_server.py @@ -4,7 +4,7 @@ import logging as log import uuid import json import sys -from datetime import datetime, timezone +from datetime import datetime import asyncio import numpy as np from pydantic import BaseModel @@ -88,58 +88,25 @@ async def initialize(conf: auracast_config.AuracastConfigGroup): # initialize the streams dict # persist stream settings for later retrieval - # Derive audio_mode from first BIG audio_source and parse gain for all device sources - audio_mode_persist = 'Network' # Default - input_device_persist = None # Default for saving settings - - if conf.bigs: - first_big = conf.bigs[0] - # Determine audio_mode for saving settings based on the first BIG - if first_big.audio_source.startswith('device:'): - audio_mode_persist = 'USB' - # For saving settings, just get the device ID part from the first BIG - device_id_part = first_big.audio_source.split(':', 1)[1].split(',', 1)[0] - input_device_persist = device_id_part - elif first_big.audio_source == 'webrtc': - audio_mode_persist = 'Webapp' - - # Parse gain for all BIGs that are device inputs - for big_config in conf.bigs: - if big_config.audio_source.startswith('device:'): - parts = big_config.audio_source.split(':', 1)[1].split(',') - device_id = parts[0] - gain_value = 1.0 # Default gain - if len(parts) > 1: - for part in parts[1:]: - if part.startswith('gain='): - try: - gain_value = float(part.split('=')[1]) - except ValueError: - log.warning(f"Invalid gain value in audio_source: {part}. Using default 1.0.") - gain_value = 1.0 - break # Found gain, no need to check other parts - big_config.input_gain = gain_value - # Update audio_source to only contain the device ID for Multicaster compatibility if needed - # For now, let's assume Multicaster will handle the full string or we adapt it later. - # big_config.audio_source = f"device:{device_id}" # Optional: simplify for downstream if it doesn't parse gain + # Derive audio_mode from first BIG audio_source + first_source = conf.bigs[0].audio_source if conf.bigs else '' + if first_source.startswith('device:'): + audio_mode_persist = 'USB' + input_device = first_source.split(':', 1)[1] if ':' in first_source else 'default' + elif first_source == 'webrtc': + audio_mode_persist = 'Webapp' + input_device = None + else: + audio_mode_persist = 'Network' + input_device = None save_stream_settings({ 'channel_names': [big.name for big in conf.bigs], 'languages': [big.language for big in conf.bigs], 'audio_mode': audio_mode_persist, - 'input_device': input_device_persist, # Use the parsed device ID for saving - 'webapp_mic_gain': load_stream_settings().get('webapp_mic_gain', 1.0), # Preserve existing webapp gain - 'usb_mic_gain': load_stream_settings().get('usb_mic_gain', 1.0), # Preserve existing usb gain - 'timestamp': datetime.now(timezone.utc).isoformat() + 'input_device': input_device, + 'timestamp': datetime.utcnow().isoformat() }) - - # Persist the specific gain value that was just used for USB mode if applicable - if audio_mode_persist == 'USB' and conf.bigs and conf.bigs[0].input_gain is not None: - current_settings = load_stream_settings() - current_settings['usb_mic_gain'] = conf.bigs[0].input_gain - # Ensure timestamp is also updated if we are re-saving - current_settings['timestamp'] = datetime.now(timezone.utc).isoformat() - save_stream_settings(current_settings) global_config_group = conf # If there is an existing multicaster, cleanly shut it down first so audio devices are released if multicaster is not None: