feat: refactor audio input to use dedicated reader thread instead of per-frame executor

- Replaced per-frame `run_in_executor` calls with single background reader thread in `ThreadedAudioInput` - Reader thread continuously calls `_read()` and enqueues data via `call_soon_threadsafe` to asyncio.Queue - Reduces per-frame scheduling overhead and context-switch jitter while preserving async API - Added thread lifecycle management: lazy start on first `frames()` call, graceful stop in `aclose()` - Update
2025-11-19 18:52:37 +01:00
parent 1bda74cf79
commit c681e4ce39
5 changed files with 182 additions and 21 deletions
--- a/test_ringbuffer.py
+++ b/test_ringbuffer.py
@@ -67,7 +67,11 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):

    def _on_audio(self, indata, frames, time_info, status):
        if status:
-            logging.warning("SoundDeviceAudioInput: status=%s", status)
+            # Throttle logging to avoid callback overhead
+            c = getattr(self, "_status_cnt", 0) + 1
+            self._status_cnt = c
+            if c % 200 == 0:
+                logging.warning("SoundDeviceAudioInput: status=%s (x%d)", status, c)
        with self._qlock:
            self._q.append(bytes(indata))

@@ -76,9 +80,19 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
        with self._qlock:
            while self._q and len(self._rb) < needed:
                self._rb.extend(self._q.popleft())
+        # If not enough data yet, wait briefly to accumulate instead of padding immediately.
        if len(self._rb) < needed:
-            missing = needed - len(self._rb)
-            self._rb.extend(b"\x00" * missing)
+            import time as _t
+            t0 = _t.perf_counter()
+            # Wait up to ~15ms in small increments while pulling from _q
+            while len(self._rb) < needed and (_t.perf_counter() - t0) < 0.015:
+                with self._qlock:
+                    while self._q and len(self._rb) < needed:
+                        self._rb.extend(self._q.popleft())
+                _t.sleep(0.001)
+            if len(self._rb) < needed:
+                missing = needed - len(self._rb)
+                self._rb.extend(b"\x00" * missing)
            
        out = bytes(self._rb[:needed])
        del self._rb[:needed]
@@ -87,18 +101,87 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):

 audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput

+
+def duplex_main() -> None:
+    """Simple full-duplex callback stream: copy input directly to output and log latency."""
+    logging.basicConfig(level=logging.INFO)
+
+    in_device = 0
+    out_device = 1
+    sample_rate = 48000
+    blocksize = 120
+
+    try:
+        stream = sd.RawStream(
+            samplerate=sample_rate,
+            blocksize=blocksize,
+            device=(in_device, out_device),
+            channels=1,
+            dtype='int16',
+            callback=lambda indata, outdata, frames, time_info, status: outdata.__setitem__(slice(None), indata),
+        )
+    except Exception as e:
+        logging.error("Failed to open full-duplex stream: %s", e)
+        return
+
+    with stream:
+        try:
+            i = 0
+            while True:
+                time.sleep(0.5)
+                i += 1
+                if i % 4 == 0:
+                    lat = getattr(stream, 'latency', None)
+                    in_lat_ms = 0.0
+                    out_lat_ms = 0.0
+                    if isinstance(lat, (list, tuple)) and len(lat) >= 2:
+                        in_lat_ms = float(lat[0]) * 1000.0
+                        out_lat_ms = float(lat[1]) * 1000.0
+                    elif isinstance(lat, (int, float)):
+                        # If PortAudio reports a single latency, treat as symmetric
+                        in_lat_ms = out_lat_ms = float(lat) * 1000.0
+
+                    blk_ms = (blocksize / sample_rate) * 1000.0
+                    e2e_ms = in_lat_ms + out_lat_ms + blk_ms
+
+                    logging.info(
+                        "duplex: in_lat=%.2fms out_lat=%.2fms blk=%.2fms e2e~%.2fms",
+                        in_lat_ms,
+                        out_lat_ms,
+                        blk_ms,
+                        e2e_ms,
+                    )
+        except KeyboardInterrupt:
+            pass
+
+
 async def main() -> None:
    logging.basicConfig(level=logging.INFO)
-    device = audio_io.SoundDeviceAudioInput(device_name='1', pcm_format=audio_io.PcmFormat(audio_io.PcmFormat.Endianness.LITTLE, audio_io.PcmFormat.SampleType.INT16, 48000, 1))
+    device = audio_io.SoundDeviceAudioInput(
+        device_name='0',  # Shure MVX2U input (device index 0)
+        pcm_format=audio_io.PcmFormat(
+            audio_io.PcmFormat.Endianness.LITTLE,
+            audio_io.PcmFormat.SampleType.INT16,
+            48000,
+            1,
+        ),
+    )
    fmt = await device.open()
-    ostream = sd.RawOutputStream(samplerate=fmt.sample_rate, device=0, channels=1, dtype='int16', blocksize=480)
+    ostream = sd.RawOutputStream(
+        samplerate=fmt.sample_rate,
+        device=1,  # USB Audio output (device index 1)
+        channels=1,
+        dtype='int16',
+        blocksize=480,
+    )
    ostream.start()
    try:
-        gen = device.frames(480)
+        
        read_w = deque(maxlen=3)
        write_w = deque(maxlen=3)
        loop_w = deque(maxlen=3)
        i = 0
+        gen = device.frames(480)
        while True:
            t0 = time.perf_counter()
            t1 = time.perf_counter()
@@ -118,6 +201,7 @@ async def main() -> None:
                in_bytes_rb = len(device._rb)
                bytes_per_sample = 2 * fmt.channels
                in_q_ms = ((in_bytes_q + in_bytes_rb) / bytes_per_sample) / fmt.sample_rate * 1000.0
+                rb_fill_samples = in_bytes_rb / bytes_per_sample

                out_lat_ms = 0.0
                try:
@@ -163,7 +247,7 @@ async def main() -> None:
                    f"read min={min(read_w)*1000:.3f}ms mean={(sum(read_w)/len(read_w))*1000:.3f}ms max={max(read_w)*1000:.3f}ms "
                    f"write min={min(write_w)*1000:.3f}ms mean={(sum(write_w)/len(write_w))*1000:.3f}ms max={max(write_w)*1000:.3f}ms "
                    f"loop min={min(loop_w)*1000:.3f}ms mean={(sum(loop_w)/len(loop_w))*1000:.3f}ms max={max(loop_w)*1000:.3f}ms "
-                    f"qlen={len(device._q)} in_lat={in_lat_ms:.2f}ms in_q={in_q_ms:.2f}ms out_lat={out_lat_ms:.2f}ms out_blk={out_block_ms:.2f}ms out_free={out_free_ms:.2f}ms e2e~{e2e_ms:.2f}ms"
+                    f"qlen={len(device._q)} rbfill={rb_fill_samples:.1f}smp in_lat={in_lat_ms:.2f}ms in_q={in_q_ms:.2f}ms out_lat={out_lat_ms:.2f}ms out_blk={out_block_ms:.2f}ms out_free={out_free_ms:.2f}ms e2e~{e2e_ms:.2f}ms"
                )
    except KeyboardInterrupt:
        pass
@@ -174,5 +258,6 @@ async def main() -> None:
        except Exception:
            pass

+
 if __name__ == '__main__':
    asyncio.run(main())