feat: refactor audio input to use dedicated reader thread instead of per-frame executor

- Replaced per-frame `run_in_executor` calls with single background reader thread in `ThreadedAudioInput`
- Reader thread continuously calls `_read()` and enqueues data via `call_soon_threadsafe` to asyncio.Queue
- Reduces per-frame scheduling overhead and context-switch jitter while preserving async API
- Added thread lifecycle management: lazy start on first `frames()` call, graceful stop in `aclose()`
- Update
This commit is contained in:
pstruebi
2025-11-19 18:52:37 +01:00
parent 1bda74cf79
commit c681e4ce39
5 changed files with 182 additions and 21 deletions

View File

@@ -67,7 +67,11 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
def _on_audio(self, indata, frames, time_info, status):
if status:
logging.warning("SoundDeviceAudioInput: status=%s", status)
# Throttle logging to avoid callback overhead
c = getattr(self, "_status_cnt", 0) + 1
self._status_cnt = c
if c % 200 == 0:
logging.warning("SoundDeviceAudioInput: status=%s (x%d)", status, c)
with self._qlock:
self._q.append(bytes(indata))
@@ -76,9 +80,19 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
with self._qlock:
while self._q and len(self._rb) < needed:
self._rb.extend(self._q.popleft())
# If not enough data yet, wait briefly to accumulate instead of padding immediately.
if len(self._rb) < needed:
missing = needed - len(self._rb)
self._rb.extend(b"\x00" * missing)
import time as _t
t0 = _t.perf_counter()
# Wait up to ~15ms in small increments while pulling from _q
while len(self._rb) < needed and (_t.perf_counter() - t0) < 0.015:
with self._qlock:
while self._q and len(self._rb) < needed:
self._rb.extend(self._q.popleft())
_t.sleep(0.001)
if len(self._rb) < needed:
missing = needed - len(self._rb)
self._rb.extend(b"\x00" * missing)
out = bytes(self._rb[:needed])
del self._rb[:needed]
@@ -87,18 +101,87 @@ class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput
def duplex_main() -> None:
"""Simple full-duplex callback stream: copy input directly to output and log latency."""
logging.basicConfig(level=logging.INFO)
in_device = 0
out_device = 1
sample_rate = 48000
blocksize = 120
try:
stream = sd.RawStream(
samplerate=sample_rate,
blocksize=blocksize,
device=(in_device, out_device),
channels=1,
dtype='int16',
callback=lambda indata, outdata, frames, time_info, status: outdata.__setitem__(slice(None), indata),
)
except Exception as e:
logging.error("Failed to open full-duplex stream: %s", e)
return
with stream:
try:
i = 0
while True:
time.sleep(0.5)
i += 1
if i % 4 == 0:
lat = getattr(stream, 'latency', None)
in_lat_ms = 0.0
out_lat_ms = 0.0
if isinstance(lat, (list, tuple)) and len(lat) >= 2:
in_lat_ms = float(lat[0]) * 1000.0
out_lat_ms = float(lat[1]) * 1000.0
elif isinstance(lat, (int, float)):
# If PortAudio reports a single latency, treat as symmetric
in_lat_ms = out_lat_ms = float(lat) * 1000.0
blk_ms = (blocksize / sample_rate) * 1000.0
e2e_ms = in_lat_ms + out_lat_ms + blk_ms
logging.info(
"duplex: in_lat=%.2fms out_lat=%.2fms blk=%.2fms e2e~%.2fms",
in_lat_ms,
out_lat_ms,
blk_ms,
e2e_ms,
)
except KeyboardInterrupt:
pass
async def main() -> None:
logging.basicConfig(level=logging.INFO)
device = audio_io.SoundDeviceAudioInput(device_name='1', pcm_format=audio_io.PcmFormat(audio_io.PcmFormat.Endianness.LITTLE, audio_io.PcmFormat.SampleType.INT16, 48000, 1))
device = audio_io.SoundDeviceAudioInput(
device_name='0', # Shure MVX2U input (device index 0)
pcm_format=audio_io.PcmFormat(
audio_io.PcmFormat.Endianness.LITTLE,
audio_io.PcmFormat.SampleType.INT16,
48000,
1,
),
)
fmt = await device.open()
ostream = sd.RawOutputStream(samplerate=fmt.sample_rate, device=0, channels=1, dtype='int16', blocksize=480)
ostream = sd.RawOutputStream(
samplerate=fmt.sample_rate,
device=1, # USB Audio output (device index 1)
channels=1,
dtype='int16',
blocksize=480,
)
ostream.start()
try:
gen = device.frames(480)
read_w = deque(maxlen=3)
write_w = deque(maxlen=3)
loop_w = deque(maxlen=3)
i = 0
gen = device.frames(480)
while True:
t0 = time.perf_counter()
t1 = time.perf_counter()
@@ -118,6 +201,7 @@ async def main() -> None:
in_bytes_rb = len(device._rb)
bytes_per_sample = 2 * fmt.channels
in_q_ms = ((in_bytes_q + in_bytes_rb) / bytes_per_sample) / fmt.sample_rate * 1000.0
rb_fill_samples = in_bytes_rb / bytes_per_sample
out_lat_ms = 0.0
try:
@@ -163,7 +247,7 @@ async def main() -> None:
f"read min={min(read_w)*1000:.3f}ms mean={(sum(read_w)/len(read_w))*1000:.3f}ms max={max(read_w)*1000:.3f}ms "
f"write min={min(write_w)*1000:.3f}ms mean={(sum(write_w)/len(write_w))*1000:.3f}ms max={max(write_w)*1000:.3f}ms "
f"loop min={min(loop_w)*1000:.3f}ms mean={(sum(loop_w)/len(loop_w))*1000:.3f}ms max={max(loop_w)*1000:.3f}ms "
f"qlen={len(device._q)} in_lat={in_lat_ms:.2f}ms in_q={in_q_ms:.2f}ms out_lat={out_lat_ms:.2f}ms out_blk={out_block_ms:.2f}ms out_free={out_free_ms:.2f}ms e2e~{e2e_ms:.2f}ms"
f"qlen={len(device._q)} rbfill={rb_fill_samples:.1f}smp in_lat={in_lat_ms:.2f}ms in_q={in_q_ms:.2f}ms out_lat={out_lat_ms:.2f}ms out_blk={out_block_ms:.2f}ms out_free={out_free_ms:.2f}ms e2e~{e2e_ms:.2f}ms"
)
except KeyboardInterrupt:
pass
@@ -174,5 +258,6 @@ async def main() -> None:
except Exception:
pass
if __name__ == '__main__':
asyncio.run(main())