5 Commits

5 changed files with 128 additions and 75 deletions

View File

@@ -17,7 +17,8 @@ dependencies = [
"sounddevice (>=0.5.2,<0.6.0)",
"python-dotenv (>=1.1.1,<2.0.0)",
"smbus2 (>=0.5.0,<0.6.0)",
"samplerate (>=0.2.2,<0.3.0)"
"samplerate (>=0.2.2,<0.3.0)",
"pyalsaaudio (>=0.9.0,<1.0.0)"
]
[project.optional-dependencies]

View File

@@ -30,6 +30,7 @@ import time
import threading
import numpy as np # for audio down-mix
import samplerate
import os
import lc3 # type: ignore # pylint: disable=E0401
@@ -56,7 +57,7 @@ from auracast.utils.webrtc_audio_input import WebRTCAudioInput
# Patch sounddevice.InputStream globally to use low-latency settings
import sounddevice as sd
import alsaaudio
from collections import deque
@@ -139,96 +140,139 @@ class AlsaArecordAudioInput(audio_io.AudioInput):
self._proc = None
class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
"""Patched SoundDeviceAudioInput with low-latency capture and adaptive resampling."""
class PyAlsaAudioInput(audio_io.ThreadedAudioInput):
"""PyALSA audio input with non-blocking reads - supports mono/stereo."""
def _open(self):
"""Create RawInputStream with low-latency parameters and initialize ring buffer."""
dev_info = sd.query_devices(self._device)
hostapis = sd.query_hostapis()
api_index = dev_info.get('hostapi')
api_name = hostapis[api_index]['name'] if isinstance(api_index, int) and 0 <= api_index < len(hostapis) else 'unknown'
pa_ver = sd.get_portaudio_version()
def __init__(self, device, pcm_format: audio_io.PcmFormat):
super().__init__()
self._device = str(device) if not isinstance(device, str) else device
if self._device.isdigit():
self._device = 'default' if self._device == '0' else f'hw:{self._device}'
self._pcm_format = pcm_format
self._pcm = None
self._actual_channels = None
self._periodsize = None
self._hw_channels = None
self._first_read = True
self._resampler = None
self._resampler_buffer = np.empty(0, dtype=np.float32)
logging.info(
"SoundDevice backend=%s device='%s' (id=%s) ch=%s default_low_input_latency=%.4f default_high_input_latency=%.4f portaudio=%s",
api_name,
dev_info.get('name'),
self._device,
dev_info.get('max_input_channels'),
float(dev_info.get('default_low_input_latency') or 0.0),
float(dev_info.get('default_high_input_latency') or 0.0),
pa_ver[1] if isinstance(pa_ver, tuple) and len(pa_ver) >= 2 else pa_ver,
)
# Create RawInputStream with injected low-latency parameters
# Target ~2 ms blocksize (48 kHz -> 96 frames). For other rates, keep ~2 ms.
_sr = int(self._pcm_format.sample_rate)
self.counter=0
self.max_avail=0
self.logfile_name="available_samples.txt"
self.blocksize = 120
if os.path.exists(self.logfile_name):
os.remove(self.logfile_name)
self._stream = sd.RawInputStream(
samplerate=self._pcm_format.sample_rate,
def _open(self) -> audio_io.PcmFormat:
ALSA_PERIODSIZE = 240
ALSA_PERIODS = 4
ALSA_MODE = alsaaudio.PCM_NONBLOCK
requested_rate = int(self._pcm_format.sample_rate)
requested_channels = int(self._pcm_format.channels)
self._periodsize = ALSA_PERIODSIZE
self._pcm = alsaaudio.PCM(
type=alsaaudio.PCM_CAPTURE,
mode=ALSA_MODE,
device=self._device,
channels=self._pcm_format.channels,
dtype='int16',
blocksize=self.blocksize,
latency=0.004,
periods=ALSA_PERIODS,
)
self._stream.start()
self._pcm.setchannels(requested_channels)
self._pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE)
actual_rate = self._pcm.setrate(requested_rate)
self._pcm.setperiodsize(ALSA_PERIODSIZE)
logging.info("PyALSA: device=%s rate=%d ch=%d periodsize=%d (%.1fms) periods=%d mode=%s",
self._device, actual_rate, requested_channels, ALSA_PERIODSIZE,
(ALSA_PERIODSIZE / actual_rate) * 1000, ALSA_PERIODS, ALSA_MODE)
if actual_rate != requested_rate:
logging.warning("PyALSA: Sample rate mismatch! requested=%d actual=%d", requested_rate, actual_rate)
self._actual_channels = requested_channels
self._resampler = samplerate.Resampler('sinc_fastest', channels=requested_channels)
self._resampler_buffer = np.empty(0, dtype=np.float32)
self._bang_bang = 0
return audio_io.PcmFormat(
audio_io.PcmFormat.Endianness.LITTLE,
audio_io.PcmFormat.SampleType.INT16,
self._pcm_format.sample_rate,
1,
actual_rate,
requested_channels,
)
def _read(self, frame_size: int) -> bytes:
"""Read PCM samples from the stream."""
try:
avail = self._pcm.avail()
logging.debug("PyALSA: avail before read: %d", avail)
length, data = self._pcm.read_sw(frame_size + self._bang_bang)
avail = self._pcm.avail()
SETPOINT = 120
TOLERANCE = 40
if avail < SETPOINT - TOLERANCE:
self._bang_bang = -1
elif avail > SETPOINT + TOLERANCE:
self._bang_bang = 1
else:
self._bang_bang = 0
logging.debug("PyALSA: read length=%d, data length=%d, avail=%d, bang_bang=%d", length, len(data), avail, self._bang_bang)
#if self.counter % 50 == 0:
frame_size = frame_size + 1 # consume samples a little faster to avoid latency akkumulation
if length > 0:
if self._first_read:
expected_mono = self._periodsize * 2
expected_stereo = self._periodsize * 2 * 2
# self._hw_channels = 2 if len(data) == expected_stereo else 1
self._hw_channels = 2 # TODO fix stereo detection, on first read might detect 0 data
logging.info("PyALSA first read: bytes=%d detected_hw_channels=%d requested_channels=%d",
len(data), self._hw_channels, self._actual_channels)
self._first_read = False
if self._hw_channels == 2 and self._actual_channels == 1:
pcm_stereo = np.frombuffer(data, dtype=np.int16)
pcm_mono = pcm_stereo[::2]
data = pcm_mono.tobytes()
actual_samples = len(data) // (2 * self._actual_channels)
ratio = frame_size / actual_samples
pcm_f32 = np.frombuffer(data, dtype=np.int16).astype(np.float32) / 32768.0
if self._actual_channels > 1:
pcm_f32 = pcm_f32.reshape(-1, self._actual_channels)
resampled = self._resampler.process(pcm_f32, ratio, end_of_input=False)
if self._actual_channels > 1:
resampled = resampled.reshape(-1)
self._resampler_buffer = np.concatenate([self._resampler_buffer, resampled])
else:
logging.warning("PyALSA: No data read from ALSA")
self._resampler_buffer = np.concatenate([
self._resampler_buffer,
np.zeros(frame_size * self._actual_channels, dtype=np.float32),
])
except alsaaudio.ALSAAudioError as e:
logging.error("PyALSA: ALSA read error: %s", e)
self._resampler_buffer = np.concatenate([
self._resampler_buffer,
np.zeros(frame_size * self._actual_channels, dtype=np.float32),
])
pcm_buffer, overflowed = self._stream.read(frame_size)
if overflowed:
logging.warning("SoundDeviceAudioInput: overflowed")
needed = frame_size * self._actual_channels
if len(self._resampler_buffer) < needed:
pad = np.zeros(needed - len(self._resampler_buffer), dtype=np.float32)
self._resampler_buffer = np.concatenate([self._resampler_buffer, pad])
logging.debug("PyALSA: padded buffer with %d samples", needed - len(self._resampler_buffer))
n_available = self._stream.read_available
output = self._resampler_buffer[:needed]
self._resampler_buffer = self._resampler_buffer[needed:]
# adapt = n_available > 20
# if adapt:
# pcm_extra, overflowed = self._stream.read(3)
# logging.info('consuming extra samples, available was %d', n_available)
# if overflowed:
# logging.warning("SoundDeviceAudioInput: overflowed")
# out = bytes(pcm_buffer) + bytes(pcm_extra)
# else:
out = bytes(pcm_buffer)
logging.debug("PyALSA: resampler_buffer remaining=%d", len(self._resampler_buffer))
return np.clip(output * 32767.0, -32768, 32767).astype(np.int16).tobytes()
self.max_avail = max(self.max_avail, n_available)
#Diagnostics
#with open(self.logfile_name, "a", encoding="utf-8") as f:
# f.write(f"{n_available}, {adapt}, {round(self._runavg, 2)}, {overflowed}\n")
def _close(self) -> None:
if self._pcm:
self._pcm.close()
self._pcm = None
if self.counter % 500 == 0:
logging.info(
"read available=%d, max=%d, latency:%d",
n_available, self.max_avail, self._stream.latency
)
self.max_avail = 0
self.counter += 1
return out
audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput
audio_io.SoundDeviceAudioInput = PyAlsaAudioInput
# modified from bumble
class ModWaveAudioInput(audio_io.ThreadedAudioInput):
@@ -538,7 +582,7 @@ async def init_broadcast(
def on_flow():
data_packet_queue = iso_queue.data_packet_queue
print(
logging.info(
f'\rPACKETS: pending={data_packet_queue.pending}, '
f'queued={data_packet_queue.queued}, '
f'completed={data_packet_queue.completed}',

View File

@@ -143,6 +143,10 @@ async def main():
level=os.environ.get('LOG_LEVEL', logging.DEBUG),
format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
)
# Enable debug logging for bumble
# logging.getLogger('bumble').setLevel(logging.DEBUG)
os.chdir(os.path.dirname(__file__))
global_conf = auracast_config.AuracastGlobalConfig(

View File

@@ -26,6 +26,10 @@ from auracast.utils.sounddevice_utils import (
)
load_dotenv()
# Configure bumble debug logging
# log.getLogger('bumble').setLevel(log.DEBUG)
# make sure pipewire sets latency
# Primary and secondary persisted settings files
STREAM_SETTINGS_FILE1 = os.path.join(os.path.dirname(__file__), 'stream_settings.json')

0
src/service/update_and_run_server_and_frontend.sh Normal file → Executable file
View File