tune the adaptive resampler to be near to inaudible

This commit is contained in:
pstruebi
2025-10-23 18:23:20 +02:00
parent 329510beae
commit 60aa653aeb
2 changed files with 99 additions and 19 deletions

View File

@@ -52,7 +52,36 @@ from auracast.utils.network_audio_receiver import NetworkAudioReceiverUncoded
from auracast.utils.webrtc_audio_input import WebRTCAudioInput
# Instantiate WebRTC audio input for streaming (can be used per-BIG or globally)
# Patch sounddevice.InputStream globally to use low-latency settings
import sounddevice as sd
class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
"""Patched SoundDeviceAudioInput that creates RawInputStream with low-latency parameters."""
def _open(self):
"""Patched _open method that creates RawInputStream with low-latency parameters."""
# Create RawInputStream with injected low-latency parameters
self._stream = sd.RawInputStream(
samplerate=self._pcm_format.sample_rate,
device=self._device,
channels=self._pcm_format.channels,
dtype='int16',
blocksize=240, # Match frame size
latency=0.010,
)
self._stream.start()
logging.info(f"SoundDeviceAudioInput: Opened with blocksize=240, latency=0.010 (10ms)")
return audio_io.PcmFormat(
audio_io.PcmFormat.Endianness.LITTLE,
audio_io.PcmFormat.SampleType.INT16,
self._pcm_format.sample_rate,
2,
)
audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput
# modified from bumble
class ModWaveAudioInput(audio_io.ThreadedAudioInput):
@@ -618,21 +647,24 @@ class Streamer():
big['encoder'] = encoder
big['precoded'] = False
logging.info("Streaming audio...")
bigs = self.bigs
self.is_streaming = True
# Sample discard stats (clock drift compensation)
sample_rate = big['audio_input']._pcm_format.sample_rate
samples_discarded_total = 0 # Total samples discarded
discard_events = 0 # Number of times we discarded samples
frames_since_last_discard = 999 # Guard: frames since last discard (start high to allow first drop)
enable_drift_compensation = global_config.enable_drift_compensation
discard_guard_frames = sample_rate // 100 # Don't allow discard within this many frames of previous discard
# Calculate threshold based on config (default 2ms)
drift_threshold_ms = global_config.drift_threshold_ms if enable_drift_compensation else 0
drop_threshold_samples = 0
drift_threshold_ms = global_config.drift_threshold_ms if global_config.enable_drift_compensation else 0
drop_threshold_samples = int(sample_rate * drift_threshold_ms / 1000.0)
static_drop_samples = int(sample_rate * 0.0005) # Always drop a static amount of samples
if enable_drift_compensation:
logging.info(f"Clock drift compensation ENABLED: threshold={drift_threshold_ms}ms")
if global_config.enable_drift_compensation:
logging.info(f"Clock drift compensation ENABLED: threshold={drift_threshold_ms}ms, guard={discard_guard_frames} frames")
else:
logging.info("Clock drift compensation DISABLED")
@@ -669,26 +701,41 @@ class Streamer():
# Calculate threshold samples based on sample rate (only once per BIG)
if enable_drift_compensation and drop_threshold_samples == 0:
sample_rate = big['audio_input']._pcm_format.sample_rate
drop_threshold_samples = int(sample_rate * drift_threshold_ms / 1000.0)
logging.info(f"Drift compensation threshold: {drop_threshold_samples} samples ({drift_threshold_ms}ms @ {sample_rate}Hz)")
logging.info(f"Static drop amount: {static_drop_samples} samples (3.0ms @ {sample_rate}Hz)")
# Discard excess samples in buffer if above threshold (clock drift compensation)
if enable_drift_compensation and hasattr(big['audio_input'], '_stream') and big['audio_input']._stream:
sd_buffer_samples = big['audio_input']._stream.read_available
if sd_buffer_samples > drop_threshold_samples:
# Discard ALL remaining samples to bring buffer back down
# Guard: only allow discard if enough frames have passed since last discard
if sd_buffer_samples > drop_threshold_samples and frames_since_last_discard >= discard_guard_frames:
# Always drop a static amount (3ms) for predictable behavior
# This matches the crossfade duration better for smoother transitions
samples_to_drop = static_drop_samples
try:
discarded_data = big['audio_input']._stream.read(sd_buffer_samples)
samples_discarded_total += sd_buffer_samples
discarded_data = await anext(big['audio_input'].frames(samples_to_drop))
samples_discarded_total += samples_to_drop
discard_events += 1
if discard_events % 100 == 0: # Log every 100th discard
logging.warning(
f"Discard #{discard_events}: {sd_buffer_samples} samples ({sd_buffer_samples / big['audio_input']._pcm_format.sample_rate * 1000:.1f} ms) "
f"| total discarded: {samples_discarded_total} samples"
)
# Log every discard event with timing information
sample_rate = big['audio_input']._pcm_format.sample_rate
time_since_last_ms = frames_since_last_discard * 10 # Each frame is 10ms
logging.info(
f"DISCARD #{discard_events}: dropped {samples_to_drop} samples ({samples_to_drop / sample_rate * 1000:.1f}ms) | "
f"buffer was {sd_buffer_samples} samples ({sd_buffer_samples / sample_rate * 1000:.1f}ms) | "
f"since_last={frames_since_last_discard} frames ({time_since_last_ms}ms) | "
f"frame={frame_count}"
)
# Reset guard counter
frames_since_last_discard = 0
# Store how much we dropped for potential adaptive crossfade
big['last_drop_samples'] = samples_to_drop
# Set flag to apply crossfade on next frame
big['apply_crossfade'] = True
except Exception as e:
logging.error(f"Failed to discard samples: {e}")
@@ -704,6 +751,39 @@ class Streamer():
samples = np.frombuffer(pcm_frame, dtype=dtype)
samples = samples.reshape(-1, big['channels']).mean(axis=1)
pcm_frame = samples.astype(dtype).tobytes()
# Apply crossfade if samples were just dropped (drift compensation)
if big.get('apply_crossfade') and big.get('prev_pcm_frame') is not None:
# Crossfade duration: 10ms for smoother transition (was 5ms)
dtype = np.int16 if big['pcm_bit_depth'] == 16 else np.float32
sample_rate = big['audio_input']._pcm_format.sample_rate
crossfade_samples = min(int(sample_rate * 0.010), big['lc3_frame_samples'] // 2)
# Convert frames to numpy arrays (make writable copies)
prev_samples = np.frombuffer(big['prev_pcm_frame'], dtype=dtype).copy()
curr_samples = np.frombuffer(pcm_frame, dtype=dtype).copy()
# Create equal-power crossfade curves (smoother than linear)
# Equal-power maintains perceived loudness during transition
t = np.linspace(0, 1, crossfade_samples)
fade_out = np.cos(t * np.pi / 2) # Cosine fade out
fade_in = np.sin(t * np.pi / 2) # Sine fade in
# Apply crossfade to the beginning of current frame with end of previous frame
if len(prev_samples) >= crossfade_samples and len(curr_samples) >= crossfade_samples:
crossfaded = (
prev_samples[-crossfade_samples:] * fade_out +
curr_samples[:crossfade_samples] * fade_in
).astype(dtype)
# Replace beginning of current frame with crossfaded section
curr_samples[:crossfade_samples] = crossfaded
pcm_frame = curr_samples.tobytes()
big['apply_crossfade'] = False
# Store current frame for potential next crossfade
if enable_drift_compensation:
big['prev_pcm_frame'] = pcm_frame
lc3_frame = big['encoder'].encode(
pcm_frame, num_bytes=big['lc3_bytes_per_frame'], bit_depth=big['pcm_bit_depth']
@@ -711,6 +791,8 @@ class Streamer():
await big['iso_queue'].write(lc3_frame)
frame_count += 1
# Increment guard counter (tracks frames since last discard)
frames_since_last_discard += 1
# Periodic stats logging
now = time.perf_counter()

View File

@@ -52,8 +52,6 @@ if __name__ == "__main__":
os.chdir(os.path.dirname(__file__))
# Load .env located next to this script (only uppercase keys will be referenced)
load_dotenv(dotenv_path='.env')
# Default tight ALSA latency (ms); can be overridden via environment
os.environ.setdefault('ALSA_LATENCY_MSEC', '2')
# List USB ALSA inputs
usb_inputs = get_alsa_usb_inputs()
@@ -112,7 +110,7 @@ if __name__ == "__main__":
auracast_sampling_rate_hz = LC3_SRATE,
octets_per_frame = OCTETS_PER_FRAME,
transport=TRANSPORT1,
enable_drift_compensation=False,
enable_drift_compensation=True,
drift_threshold_ms=2.0
)
config.debug = False