Revert "feat: add gain control for USB and webapp microphone inputs with persistence - to be tested"

This reverts commit 0bf87c85b7.
2025-06-19 13:04:41 +02:00
parent 0bf87c85b7
commit 2e46535fa4
4 changed files with 93 additions and 200 deletions
@@ -51,7 +51,6 @@ class AuracastBigConfig(BaseModel):
    program_info: str = 'Some Announcements'
    audio_source: str = 'file:./auracast/announcement_48_10_96000_en.wav'
    input_format: str = 'auto'
-    input_gain: float | None = None # Parsed from audio_source for device inputs
    loop: bool = True
    precode_wav: bool = False
    iso_que_len: int = 64
@@ -452,135 +452,97 @@ class Streamer():
                    lc3_frames = itertools.cycle(lc3_frames)
                big['lc3_frames'] = lc3_frames

-            # anything else, e.g. realtime stream from device (bumble) or non-precoded file
+            # anything else, e.g. realtime stream from device (bumble)
            else:
-                current_big_config = self.big_config[i]
-                audio_source_str = str(current_big_config.audio_source) # Ensure string type
-                input_format_str = current_big_config.input_format
-                input_gain_val = current_big_config.input_gain
-
-                audio_filter_for_create = None
-                effective_audio_source_for_create = audio_source_str
-
-                if audio_source_str.startswith('device:'):
-                    parts = audio_source_str.split(':', 1)
-                    if len(parts) > 1:
-                        device_specifier_with_potential_gain = parts[1]
-                        pure_device_name = device_specifier_with_potential_gain.split(',', 1)[0]
-                        effective_audio_source_for_create = f"device:{pure_device_name}"
-                    
-                    gain_to_apply = input_gain_val if input_gain_val is not None else 1.0
-                    if abs(gain_to_apply - 1.0) > 0.01:
-                        audio_filter_for_create = f"volume={gain_to_apply:.2f}"
-                        logger.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_for_create}")
-                elif audio_source_str.startswith('file:'):
-                    gain_to_apply = input_gain_val if input_gain_val is not None else 1.0
-                    if abs(gain_to_apply - 1.0) > 0.01:
-                        audio_filter_for_create = f"volume={gain_to_apply:.2f}"
-                        logger.info(f"Applying FFmpeg volume filter for {audio_source_str}: {audio_filter_for_create}")
-
-                # Prepare the source string, potentially with an FFmpeg filter
-                final_audio_source_spec = effective_audio_source_for_create
-                if current_big_config.input_gain is not None and input_format_str == 'ffmpeg': # Apply gain only if ffmpeg is used
-                    audio_filter_value = f"volume={current_big_config.input_gain:.2f}"
-                    logging.info(f"Applying FFmpeg volume filter for {effective_audio_source_for_create}: {audio_filter_value}")
-                    # Append 'af' (audio filter) option to the source spec for FFmpeg
-                    if '?' in final_audio_source_spec: # if there are already ffmpeg options (e.g. sample_rate)
-                         final_audio_source_spec = f"{final_audio_source_spec}&af={audio_filter_value}"
-                    else: # if this is the first ffmpeg option
-                         final_audio_source_spec = f"{final_audio_source_spec},af={audio_filter_value}"
-
-                # Initial creation of audio_input
-                audio_input = await audio_io.create_audio_input(
-                    final_audio_source_spec,
-                    input_format=input_format_str
-                )
-                big['audio_input'] = audio_input # Store early for potential cleanup
-
+                audio_input = await audio_io.create_audio_input(audio_source, input_format)
+                # Store early so stop_streaming can close even if open() fails
+                big['audio_input'] = audio_input
+                # SoundDeviceAudioInput (used for `mic:<device>` captures) has no `.rewind`.
                if hasattr(audio_input, "rewind"):
-                    audio_input.rewind = current_big_config.loop
+                    audio_input.rewind = big_config[i].loop

+                # Retry logic – ALSA sometimes keeps the device busy for a short time after the
+                # previous stream has closed. Handle PortAudioError -9985 with back-off retries.
                import sounddevice as _sd
                max_attempts = 3
-                pcm_format = None # Initialize pcm_format
                for attempt in range(1, max_attempts + 1):
                    try:
-                        logging.info(f"Attempting to open audio input: {effective_audio_source_for_create} (attempt {attempt})")
                        pcm_format = await audio_input.open()
-                        logging.info(f"Successfully opened audio input: {effective_audio_source_for_create}, PCM Format: {pcm_format}")
                        break  # success
                    except _sd.PortAudioError as err:
-                        logging.error('Could not open audio device %s with error %s (attempt %d/%d)', effective_audio_source_for_create, err, attempt, max_attempts)
-                        code = getattr(err, 'errno', None) or (err.args[1] if len(err.args) > 1 and isinstance(err.args[1], int) else None)
-                        if code == -9985 and attempt < max_attempts: # paDeviceUnavailable
-                            backoff_ms = (2 ** (attempt - 1)) * 100 # exponential backoff
-                            logging.warning("PortAudio device busy. Retrying in %.1f ms…", backoff_ms)
+                        # -9985 == paDeviceUnavailable
+                        logging.error('Could not open audio device %s with error %s', audio_source, err)
+                        code = None
+                        if hasattr(err, 'errno'):
+                            code = err.errno
+                        elif len(err.args) > 1 and isinstance(err.args[1], int):
+                            code = err.args[1]
+                        if code == -9985 and attempt < max_attempts:
+                            backoff_ms = 200 * attempt
+                            logging.warning("PortAudio device busy (attempt %d/%d). Retrying in %.1f ms…", attempt, max_attempts, backoff_ms)
+                            # ensure device handle and PortAudio context are closed before retrying
                            try:
-                                if hasattr(audio_input, "aclose"): await audio_input.aclose()
-                                elif hasattr(audio_input, "close"): audio_input.close()
-                            except Exception as close_err: logging.debug(f"Error closing audio_input during retry: {close_err}")
-                            if hasattr(_sd, "_terminate"): # sounddevice specific cleanup
-                                try: _sd._terminate()
-                                except Exception as term_err: logging.debug(f"Error terminating PortAudio: {term_err}")
+                                if hasattr(audio_input, "aclose"):
+                                    await audio_input.aclose()
+                                elif hasattr(audio_input, "close"):
+                                    audio_input.close()
+                            except Exception:
+                                pass
+                            # Fully terminate PortAudio to drop lingering handles (sounddevice quirk)
+                            if hasattr(_sd, "_terminate"):
+                                try:
+                                    _sd._terminate()
+                                except Exception:
+                                    pass
+                            # Small pause then re-initialize PortAudio
                            await asyncio.sleep(0.1)
-                            if hasattr(_sd, "_initialize"): # sounddevice specific reinit
-                                try: _sd._initialize()
-                                except Exception as init_err: logging.debug(f"Error initializing PortAudio: {init_err}")
-                            await asyncio.sleep(backoff_ms / 1000)
-                            # Recreate audio_input for next attempt, using the potentially modified source spec
-                            audio_input = await audio_io.create_audio_input(
-                                final_audio_source_spec, # Use the spec that includes the filter if applicable
-                                input_format=input_format_str
-                            )
-                            big['audio_input'] = audio_input # Update stored reference
-                            if hasattr(audio_input, "rewind"):
-                                audio_input.rewind = current_big_config.loop
-                            continue
-                        raise # Re-raise if not paDeviceUnavailable or max_attempts reached
-                    except Exception as e:
-                        logging.error(f"Unexpected error opening audio device {effective_audio_source_for_create}: {e}")
-                        raise # Re-raise other unexpected errors
-                else: # else for 'for' loop: if loop finished without break
-                    logging.error("Unable to open audio device '%s' after %d attempts – giving up.", effective_audio_source_for_create, max_attempts)
-                    return # Or handle error more gracefully, e.g. mark BIG as inactive
+                            if hasattr(_sd, "_initialize"):
+                                try:
+                                    _sd._initialize()
+                                except Exception:
+                                    pass

-                # Proceed with encoder setup if pcm_format was obtained
-                if not pcm_format:
-                    logging.error(f"Failed to obtain PCM format for {effective_audio_source_for_create}. Cannot set up encoder.")
+                            # Back-off before next attempt
+                            await asyncio.sleep(backoff_ms / 1000)
+                            # Recreate audio_input fresh for next attempt
+                            audio_input = await audio_io.create_audio_input(audio_source, input_format)
+                            continue
+                        # Other errors or final attempt – re-raise so caller can abort gracefully
+                        raise
+                else:
+                    # Loop exhausted without break
+                    logging.error("Unable to open audio device after %d attempts – giving up", max_attempts)
                    return

                if pcm_format.channels != 1:
-                    logging.info("Input device '%s' provides %d channels – will down-mix to mono for LC3", effective_audio_source_for_create, pcm_format.channels)
-                    # Downmixing is typically handled by FFmpeg if channels > 1 and output is mono
-                    # For LC3, we always want mono, so this is informational.
-                
-                # Determine pcm_bit_depth for encoder based on pcm_format.sample_type
-                if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16:
-                    pcm_bit_depth = 16
-                elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32:
-                    pcm_bit_depth = None # LC3 encoder can handle float32 directly
-                else:
-                    logging.error("Unsupported PCM sample type: %s for %s. Only INT16 and FLOAT32 are supported.", pcm_format.sample_type, effective_audio_source_for_create)
-                    return
+                    logging.info("Input device provides %d channels – will down-mix to mono for LC3", pcm_format.channels)
+                    if pcm_format.sample_type == audio_io.PcmFormat.SampleType.INT16:
+                        pcm_bit_depth = 16
+                    elif pcm_format.sample_type == audio_io.PcmFormat.SampleType.FLOAT32:
+                        pcm_bit_depth = None
+                    else:
+                        logging.error("Only INT16 and FLOAT32 sample types are supported")
+                        return
+                    encoder = lc3.Encoder(
+                        frame_duration_us=global_config.frame_duration_us,
+                        sample_rate_hz=global_config.auracast_sampling_rate_hz,
+                        num_channels=1,
+                        input_sample_rate_hz=pcm_format.sample_rate,
+                    )
+                    lc3_frame_samples = encoder.get_frame_samples() # number of the pcm samples per lc3 frame

-                encoder = lc3.Encoder(
-                    frame_duration_us=self.global_config.frame_duration_us,
-                    sample_rate_hz=self.global_config.auracast_sampling_rate_hz,
-                    num_channels=1, # LC3 is mono
-                    input_sample_rate_hz=pcm_format.sample_rate,
-                )
-                lc3_frame_samples = encoder.get_frame_samples()
-                big['pcm_bit_depth'] = pcm_bit_depth
-                big['lc3_frame_samples'] = lc3_frame_samples
-                big['lc3_bytes_per_frame'] = self.global_config.octets_per_frame
-                big['encoder'] = encoder
-                big['precoded'] = False
+                    big['pcm_bit_depth'] = pcm_bit_depth
+                    big['channels'] = pcm_format.channels
+                    big['lc3_frame_samples'] = lc3_frame_samples
+                    big['lc3_bytes_per_frame'] = global_config.octets_per_frame
+                    big['audio_input'] = audio_input
+                    big['encoder'] = encoder
+                    big['precoded'] = False


            logging.info("Streaming audio...")
            bigs = self.bigs
            self.is_streaming = True
-            logging.info("Entering main streaming loop...")
            # One streamer fits all
            while self.is_streaming:
                stream_finished = [False for _ in range(len(bigs))]
@@ -595,9 +557,7 @@ class Streamer():
                            stream_finished[i] = True
                            continue
                    else: # code lc3 on the fly
-                        logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Attempting to read pcm_frame.")
                        pcm_frame = await anext(big['audio_input'].frames(big['lc3_frame_samples']), None)
-                        logging.debug(f"BIG {i} ({big.get('name', 'N/A')}): Read pcm_frame: {'None' if pcm_frame is None else f'type {type(pcm_frame)}, len {len(pcm_frame)} bytes' if isinstance(pcm_frame, bytes) else f'type {type(pcm_frame)}, shape {pcm_frame.shape}' if hasattr(pcm_frame, 'shape') else f'type {type(pcm_frame)}'}")

                        if pcm_frame is None: # Not all streams may stop at the same time
                            stream_finished[i] = True
@@ -14,27 +14,14 @@ PTIME = 40
 BACKEND_URL = "http://localhost:5000"

 # Try loading persisted settings from backend
-# This is the correct place to define saved_settings before it's used for defaults
 saved_settings = {}
 try:
    resp = requests.get(f"{BACKEND_URL}/status", timeout=1)
    if resp.status_code == 200:
        saved_settings = resp.json()
 except Exception:
-    # If backend is not available or error, saved_settings will be empty dict
-    # Defaults will be used for gain values in this case.
    saved_settings = {}

-# Initialize gain session states
-# This must come AFTER saved_settings is populated.
-default_webapp_gain = float(saved_settings.get('webapp_mic_gain', 1.0))
-if 'webapp_mic_gain' not in st.session_state:
-    st.session_state.webapp_mic_gain = default_webapp_gain
-
-default_usb_gain = float(saved_settings.get('usb_mic_gain', 1.0))
-if 'usb_mic_gain' not in st.session_state:
-    st.session_state.usb_mic_gain = default_usb_gain
-
 st.title("🎙️ Auracast Audio Mode Control")

 # Audio mode selection with persisted default
@@ -66,12 +53,9 @@ if audio_mode in ["Webapp", "USB"]:
    language = st.text_input("Language (ISO 639-3)", value=default_lang)
    # Gain slider for Webapp mode
    if audio_mode == "Webapp":
-        st.session_state.webapp_mic_gain = st.slider(
-            "Microphone Gain", 0.0, 4.0, st.session_state.webapp_mic_gain, 0.1,
-            help="Adjust microphone volume sent to Auracast (applied by browser)"
-        )
-    # For USB mode, gain slider is defined below.
-    # The variable 'mic_gain' for JS is sourced from st.session_state.webapp_mic_gain within Webapp mode logic.
+        mic_gain = st.slider("Microphone Gain", 0.0, 4.0, 1.0, 0.1, help="Adjust microphone volume sent to Auracast")
+    else:
+        mic_gain = 1.0

    # Input device selection for USB mode
    if audio_mode == "USB":
@@ -103,16 +87,6 @@ if audio_mode in ["Webapp", "USB"]:
                st.rerun()
        # We send only the numeric/card identifier (before :) or 'default'
        input_device = selected_option.split(":", 1)[0] if ":" in selected_option else selected_option
-
-        # USB Microphone Gain Slider
-        st.session_state.usb_mic_gain = st.slider(
-            "Microphone Gain (USB)",
-            min_value=0.0,
-            max_value=4.0,
-            value=st.session_state.usb_mic_gain, # Use session state value
-            step=0.1,
-            help="Adjust microphone volume for USB input (applied by server)"
-        )
    else:
        input_device = None
    start_stream = st.button("Start Auracast")
@@ -122,7 +96,7 @@ if audio_mode in ["Webapp", "USB"]:
    if audio_mode == "Webapp" and st.session_state.get('stream_started'):
        update_js = f"""
        <script>
-            if (window.gainNode) {{ window.gainNode.gain.value = {st.session_state.webapp_mic_gain}; }}
+            if (window.gainNode) {{ window.gainNode.gain.value = {mic_gain}; }}
        </script>
        """
        st.components.v1.html(update_js, height=0)
@@ -148,17 +122,6 @@ if audio_mode in ["Webapp", "USB"]:
        import time; time.sleep(1)
        # Prepare config using the model (do NOT send qos_config, only relevant fields)
        q = quality_map[quality]
-
-        # Determine audio_source based on mode and gain settings
-        if audio_mode == "USB":
-            current_usb_gain = st.session_state.get('usb_mic_gain', 1.0) # Use .get for safety
-            audio_source_str = f"device:{input_device},gain={current_usb_gain}"
-        elif audio_mode == "Webapp":
-            audio_source_str = "webrtc"
-            # Webapp gain is handled client-side by JS using st.session_state.webapp_mic_gain
-        else: # Assuming a 'network' mode or other future modes
-            audio_source_str = "network" # Default or handle other modes
-
        config = auracast_config.AuracastConfigGroup(
            auracast_sampling_rate_hz=q['rate'],
            octets_per_frame=q['octets'],
@@ -168,7 +131,11 @@ if audio_mode in ["Webapp", "USB"]:
                    name=stream_name,
                    program_info=f"{stream_name} {quality}",
                    language=language,
-                    audio_source=audio_source_str, # Use the constructed string
+                    audio_source=(
+                        f"device:{input_device}" if audio_mode == "USB" else (
+                            "webrtc" if audio_mode == "Webapp" else "network"
+                        )
+                    ),
                    input_format=(f"int16le,{q['rate']},1" if audio_mode == "USB" else "auto"),
                    iso_que_len=1, # TODO: this should be way less to decrease delay
                    sampling_frequency=q['rate'],
@@ -193,7 +160,7 @@ if audio_mode in ["Webapp", "USB"]:
            (async () => {{
                if (window.webrtc_started) return; // Prevent re-init on rerun
                window.webrtc_started = true;
-                const GAIN_VALUE = {st.session_state.webapp_mic_gain};
+                const GAIN_VALUE = {mic_gain};
                const pc = new RTCPeerConnection(); // No STUN needed for localhost
                const micStream = await navigator.mediaDevices.getUserMedia({{audio:true}});
                // Create Web Audio gain processing
@@ -4,7 +4,7 @@ import logging as log
 import uuid
 import json
 import sys
-from datetime import datetime, timezone
+from datetime import datetime
 import asyncio
 import numpy as np
 from pydantic import BaseModel
@@ -88,58 +88,25 @@ async def initialize(conf: auracast_config.AuracastConfigGroup):

        # initialize the streams dict
        # persist stream settings for later retrieval
-        # Derive audio_mode from first BIG audio_source and parse gain for all device sources
-        audio_mode_persist = 'Network' # Default
-        input_device_persist = None # Default for saving settings
-
-        if conf.bigs:
-            first_big = conf.bigs[0]
-            # Determine audio_mode for saving settings based on the first BIG
-            if first_big.audio_source.startswith('device:'):
-                audio_mode_persist = 'USB'
-                # For saving settings, just get the device ID part from the first BIG
-                device_id_part = first_big.audio_source.split(':', 1)[1].split(',', 1)[0]
-                input_device_persist = device_id_part
-            elif first_big.audio_source == 'webrtc':
-                audio_mode_persist = 'Webapp'
-            
-            # Parse gain for all BIGs that are device inputs
-            for big_config in conf.bigs:
-                if big_config.audio_source.startswith('device:'):
-                    parts = big_config.audio_source.split(':', 1)[1].split(',')
-                    device_id = parts[0]
-                    gain_value = 1.0  # Default gain
-                    if len(parts) > 1:
-                        for part in parts[1:]:
-                            if part.startswith('gain='):
-                                try:
-                                    gain_value = float(part.split('=')[1])
-                                except ValueError:
-                                    log.warning(f"Invalid gain value in audio_source: {part}. Using default 1.0.")
-                                    gain_value = 1.0
-                                break # Found gain, no need to check other parts
-                    big_config.input_gain = gain_value
-                    # Update audio_source to only contain the device ID for Multicaster compatibility if needed
-                    # For now, let's assume Multicaster will handle the full string or we adapt it later.
-                    # big_config.audio_source = f"device:{device_id}" # Optional: simplify for downstream if it doesn't parse gain
+        # Derive audio_mode from first BIG audio_source
+        first_source = conf.bigs[0].audio_source if conf.bigs else ''
+        if first_source.startswith('device:'):
+            audio_mode_persist = 'USB'

+            input_device = first_source.split(':', 1)[1] if ':' in first_source else 'default'
+        elif first_source == 'webrtc':
+            audio_mode_persist = 'Webapp'
+            input_device = None
+        else:
+            audio_mode_persist = 'Network'
+            input_device = None
        save_stream_settings({
            'channel_names': [big.name for big in conf.bigs],
            'languages': [big.language for big in conf.bigs],
            'audio_mode': audio_mode_persist,
-            'input_device': input_device_persist, # Use the parsed device ID for saving
-            'webapp_mic_gain': load_stream_settings().get('webapp_mic_gain', 1.0), # Preserve existing webapp gain
-            'usb_mic_gain': load_stream_settings().get('usb_mic_gain', 1.0), # Preserve existing usb gain
-            'timestamp': datetime.now(timezone.utc).isoformat()
+            'input_device': input_device,
+            'timestamp': datetime.utcnow().isoformat()
        })
-
-        # Persist the specific gain value that was just used for USB mode if applicable
-        if audio_mode_persist == 'USB' and conf.bigs and conf.bigs[0].input_gain is not None:
-            current_settings = load_stream_settings()
-            current_settings['usb_mic_gain'] = conf.bigs[0].input_gain
-            # Ensure timestamp is also updated if we are re-saving
-            current_settings['timestamp'] = datetime.now(timezone.utc).isoformat()
-            save_stream_settings(current_settings)
        global_config_group = conf
        # If there is an existing multicaster, cleanly shut it down first so audio devices are released
        if multicaster is not None: