First good audio with alsaaudio.

2026-03-18 16:55:55 +01:00
parent e1d717ed5c
commit a605195646
2 changed files with 96 additions and 79 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,8 @@ dependencies = [
    "sounddevice (>=0.5.2,<0.6.0)",
    "python-dotenv (>=1.1.1,<2.0.0)",
    "smbus2 (>=0.5.0,<0.6.0)",
-    "samplerate (>=0.2.2,<0.3.0)"
+    "samplerate (>=0.2.2,<0.3.0)",
    "pyalsaaudio (>=0.9.0,<1.0.0)"
 ]
 [project.optional-dependencies]
--- a/src/auracast/multicast.py
+++ b/src/auracast/multicast.py
@@ -56,7 +56,7 @@ from auracast.utils.webrtc_audio_input import WebRTCAudioInput
 # Patch sounddevice.InputStream globally to use low-latency settings
-import sounddevice as sd
+import alsaaudio
 from collections import deque
@@ -139,96 +139,112 @@ class AlsaArecordAudioInput(audio_io.AudioInput):
        self._proc = None
-class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
+class PyAlsaAudioInput(audio_io.ThreadedAudioInput):
-    """Patched SoundDeviceAudioInput with low-latency capture and adaptive resampling."""
+    """PyALSA audio input with callback thread and ring buffer."""
-    def _open(self):
+    def __init__(self, device, pcm_format: audio_io.PcmFormat):
-        """Create RawInputStream with low-latency parameters and initialize ring buffer."""
+        super().__init__()
-        dev_info = sd.query_devices(self._device)
+        self._device = str(device) if not isinstance(device, str) else device
-        hostapis = sd.query_hostapis()
+        if self._device.isdigit():
-        api_index = dev_info.get('hostapi')
+            self._device = 'default' if self._device == '0' else f'hw:{self._device}'
-        api_name = hostapis[api_index]['name'] if isinstance(api_index, int) and 0 <= api_index < len(hostapis) else 'unknown'
+        self._pcm_format = pcm_format
-        pa_ver = sd.get_portaudio_version()
+        self._pcm = None
        self._ring_buffer = deque()
        self._ring_lock = threading.Lock()
        self._running = False
        self._callback_thread = None
        self._max_buffer_bytes = int(self._pcm_format.sample_rate * 0.1 * 2)
-        logging.info(
+    def _open(self) -> audio_io.PcmFormat:
-            "SoundDevice backend=%s device='%s' (id=%s) ch=%s default_low_input_latency=%.4f default_high_input_latency=%.4f portaudio=%s",
+        requested_rate = int(self._pcm_format.sample_rate)
-            api_name,
+        
-            dev_info.get('name'),
+        self._pcm = alsaaudio.PCM(
-            self._device,
+            type=alsaaudio.PCM_CAPTURE,
-            dev_info.get('max_input_channels'),
+            mode=alsaaudio.PCM_NORMAL,
            float(dev_info.get('default_low_input_latency') or 0.0),
            float(dev_info.get('default_high_input_latency') or 0.0),
            pa_ver[1] if isinstance(pa_ver, tuple) and len(pa_ver) >= 2 else pa_ver,
        )
        # Create RawInputStream with injected low-latency parameters
        # Target ~2 ms blocksize (48 kHz -> 96 frames). For other rates, keep ~2 ms.
        _sr = int(self._pcm_format.sample_rate)
        self.counter=0
        self.max_avail=0
        self.logfile_name="available_samples.txt"
        self.blocksize = 120
        if os.path.exists(self.logfile_name):
            os.remove(self.logfile_name)
        self._stream = sd.RawInputStream(
            samplerate=self._pcm_format.sample_rate,
            device=self._device,
            channels=self._pcm_format.channels,
            dtype='int16',
            blocksize=self.blocksize,
            latency=0.004,
        )
-        self._stream.start()
+        
-
+        self._pcm.setchannels(1)
        self._pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE)
        actual_rate = self._pcm.setrate(requested_rate)
        self._pcm.setperiodsize(240)
        logging.info("PyALSA: device=%s requested=%d actual=%d periodsize=240 (5ms)", 
                     self._device, requested_rate, actual_rate)
        if actual_rate != requested_rate:
            logging.warning("PyALSA: Sample rate mismatch! requested=%d actual=%d", requested_rate, actual_rate)
        self._running = True
        self._callback_thread = threading.Thread(target=self._capture_loop, daemon=True)
        self._callback_thread.start()
        return audio_io.PcmFormat(
            audio_io.PcmFormat.Endianness.LITTLE,
            audio_io.PcmFormat.SampleType.INT16,
-            self._pcm_format.sample_rate,
+            actual_rate,
            1,
        )
    def _capture_loop(self):
        first_read = True
        while self._running:
            try:
                length, data = self._pcm.read()
                if length > 0:
                    if first_read:
                        expected_bytes = 240 * 2  # 240 frames * 2 bytes/sample for mono
                        logging.info("PyALSA first capture: length=%d bytes=%d expected=%d", length, len(data), expected_bytes)
                        first_read = False
                    # If we got stereo data (480 bytes instead of 240), downsample to mono
                    if len(data) == 960:  # 240 frames * 2 channels * 2 bytes = stereo
                        logging.warning("PyALSA: Got stereo data, converting to mono")
                        pcm_stereo = np.frombuffer(data, dtype=np.int16)
                        pcm_mono = pcm_stereo[::2]  # Take only left channel
                        data = pcm_mono.tobytes()
                    with self._ring_lock:
                        self._ring_buffer.append(data)
                        total_bytes = sum(len(chunk) for chunk in self._ring_buffer)
                        while total_bytes > self._max_buffer_bytes:
                            self._ring_buffer.popleft()
                            total_bytes = sum(len(chunk) for chunk in self._ring_buffer)
            except:
                if self._running:
                    break
    def _read(self, frame_size: int) -> bytes:
-        """Read PCM samples from the stream."""
+        bytes_needed = frame_size * 2
        result = b''
        while len(result) < bytes_needed:
            with self._ring_lock:
                if self._ring_buffer:
                    chunk = self._ring_buffer.popleft()
                    needed = bytes_needed - len(result)
                    if len(chunk) <= needed:
                        result += chunk
                    else:
                        result += chunk[:needed]
                        self._ring_buffer.appendleft(chunk[needed:])
                else:
                    break
        if len(result) < bytes_needed:
            result += b'\x00' * (bytes_needed - len(result))
        return result
-        #if self.counter % 50 == 0:
+    def _close(self) -> None:
-        frame_size = frame_size + 1 # consume samples a little faster to avoid latency akkumulation
+        self._running = False
        if self._callback_thread:
            self._callback_thread.join(timeout=1.0)
        if self._pcm:
            self._pcm.close()
            self._pcm = None
-        pcm_buffer, overflowed = self._stream.read(frame_size)
+audio_io.SoundDeviceAudioInput = PyAlsaAudioInput
        if overflowed:
            logging.warning("SoundDeviceAudioInput: overflowed")
        n_available = self._stream.read_available
        # adapt = n_available > 20
        # if adapt:
        #     pcm_extra, overflowed = self._stream.read(3)
        #     logging.info('consuming extra samples, available was %d', n_available)
        #     if overflowed:
        #         logging.warning("SoundDeviceAudioInput: overflowed")
        #     out = bytes(pcm_buffer) +  bytes(pcm_extra)
        # else:
        out = bytes(pcm_buffer)     
        self.max_avail = max(self.max_avail, n_available)
        #Diagnostics 
        #with open(self.logfile_name, "a", encoding="utf-8") as f:
        #    f.write(f"{n_available}, {adapt}, {round(self._runavg, 2)}, {overflowed}\n")
        if self.counter % 500 == 0:
            logging.info(
                "read available=%d, max=%d, latency:%d", 
                n_available, self.max_avail, self._stream.latency
            )
            self.max_avail = 0
        self.counter += 1
        return out
 audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput
 # modified from bumble
 class ModWaveAudioInput(audio_io.ThreadedAudioInput):