|
|
|
|
@@ -56,7 +56,7 @@ from auracast.utils.webrtc_audio_input import WebRTCAudioInput
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Patch sounddevice.InputStream globally to use low-latency settings
|
|
|
|
|
import sounddevice as sd
|
|
|
|
|
import alsaaudio
|
|
|
|
|
from collections import deque
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -139,96 +139,138 @@ class AlsaArecordAudioInput(audio_io.AudioInput):
|
|
|
|
|
self._proc = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ModSoundDeviceAudioInput(audio_io.SoundDeviceAudioInput):
|
|
|
|
|
"""Patched SoundDeviceAudioInput with low-latency capture and adaptive resampling."""
|
|
|
|
|
class PyAlsaAudioInput(audio_io.ThreadedAudioInput):
|
|
|
|
|
"""PyALSA audio input with callback thread and ring buffer - supports mono/stereo."""
|
|
|
|
|
|
|
|
|
|
def _open(self):
|
|
|
|
|
"""Create RawInputStream with low-latency parameters and initialize ring buffer."""
|
|
|
|
|
dev_info = sd.query_devices(self._device)
|
|
|
|
|
hostapis = sd.query_hostapis()
|
|
|
|
|
api_index = dev_info.get('hostapi')
|
|
|
|
|
api_name = hostapis[api_index]['name'] if isinstance(api_index, int) and 0 <= api_index < len(hostapis) else 'unknown'
|
|
|
|
|
pa_ver = sd.get_portaudio_version()
|
|
|
|
|
def __init__(self, device, pcm_format: audio_io.PcmFormat):
|
|
|
|
|
super().__init__()
|
|
|
|
|
self._device = str(device) if not isinstance(device, str) else device
|
|
|
|
|
if self._device.isdigit():
|
|
|
|
|
self._device = 'default' if self._device == '0' else f'hw:{self._device}'
|
|
|
|
|
self._pcm_format = pcm_format
|
|
|
|
|
self._pcm = None
|
|
|
|
|
self._ring_buffer = deque()
|
|
|
|
|
self._ring_lock = threading.Lock()
|
|
|
|
|
self._running = False
|
|
|
|
|
self._callback_thread = None
|
|
|
|
|
self._actual_channels = None
|
|
|
|
|
self._periodsize = None
|
|
|
|
|
|
|
|
|
|
logging.info(
|
|
|
|
|
"SoundDevice backend=%s device='%s' (id=%s) ch=%s default_low_input_latency=%.4f default_high_input_latency=%.4f portaudio=%s",
|
|
|
|
|
api_name,
|
|
|
|
|
dev_info.get('name'),
|
|
|
|
|
self._device,
|
|
|
|
|
dev_info.get('max_input_channels'),
|
|
|
|
|
float(dev_info.get('default_low_input_latency') or 0.0),
|
|
|
|
|
float(dev_info.get('default_high_input_latency') or 0.0),
|
|
|
|
|
pa_ver[1] if isinstance(pa_ver, tuple) and len(pa_ver) >= 2 else pa_ver,
|
|
|
|
|
)
|
|
|
|
|
# Create RawInputStream with injected low-latency parameters
|
|
|
|
|
# Target ~2 ms blocksize (48 kHz -> 96 frames). For other rates, keep ~2 ms.
|
|
|
|
|
_sr = int(self._pcm_format.sample_rate)
|
|
|
|
|
|
|
|
|
|
self.counter=0
|
|
|
|
|
self.max_avail=0
|
|
|
|
|
self.logfile_name="available_samples.txt"
|
|
|
|
|
self.blocksize = 120
|
|
|
|
|
|
|
|
|
|
if os.path.exists(self.logfile_name):
|
|
|
|
|
os.remove(self.logfile_name)
|
|
|
|
|
|
|
|
|
|
self._stream = sd.RawInputStream(
|
|
|
|
|
samplerate=self._pcm_format.sample_rate,
|
|
|
|
|
def _open(self) -> audio_io.PcmFormat:
|
|
|
|
|
# ========== LATENCY CONFIGURATION ==========
|
|
|
|
|
# Adjust these parameters to tune latency vs stability
|
|
|
|
|
ALSA_PERIODSIZE = 120 # Samples per ALSA read (240@48kHz = 5ms, 120 = 2.5ms, 96 = 2ms)
|
|
|
|
|
ALSA_PERIODS = 2 # Number of periods in ALSA buffer (lower = less latency, more risk of underrun)
|
|
|
|
|
# Ring buffer: keep only 3 periods max to minimize latency (safety margin only)
|
|
|
|
|
# ===========================================
|
|
|
|
|
|
|
|
|
|
requested_rate = int(self._pcm_format.sample_rate)
|
|
|
|
|
requested_channels = int(self._pcm_format.channels)
|
|
|
|
|
self._periodsize = ALSA_PERIODSIZE
|
|
|
|
|
# Max ring buffer = 3 periods worth of data (tight coupling, minimal latency)
|
|
|
|
|
self._max_buffer_bytes = ALSA_PERIODSIZE * 3 * 2 * requested_channels
|
|
|
|
|
|
|
|
|
|
self._pcm = alsaaudio.PCM(
|
|
|
|
|
type=alsaaudio.PCM_CAPTURE,
|
|
|
|
|
mode=alsaaudio.PCM_NORMAL,
|
|
|
|
|
device=self._device,
|
|
|
|
|
channels=self._pcm_format.channels,
|
|
|
|
|
dtype='int16',
|
|
|
|
|
blocksize=self.blocksize,
|
|
|
|
|
latency=0.004,
|
|
|
|
|
)
|
|
|
|
|
self._stream.start()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self._pcm.setchannels(requested_channels)
|
|
|
|
|
self._pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE)
|
|
|
|
|
actual_rate = self._pcm.setrate(requested_rate)
|
|
|
|
|
self._pcm.setperiodsize(ALSA_PERIODSIZE)
|
|
|
|
|
try:
|
|
|
|
|
self._pcm.setperiods(ALSA_PERIODS)
|
|
|
|
|
except AttributeError:
|
|
|
|
|
pass # Some pyalsaaudio versions don't have setperiods()
|
|
|
|
|
|
|
|
|
|
ring_buf_samples = self._max_buffer_bytes // (2 * requested_channels)
|
|
|
|
|
ring_buf_ms = (ring_buf_samples / actual_rate) * 1000
|
|
|
|
|
logging.info("PyALSA: device=%s rate=%d ch=%d periodsize=%d (%.1fms) periods=%d ring_buf=%d samples (%.1fms)",
|
|
|
|
|
self._device, actual_rate, requested_channels, ALSA_PERIODSIZE,
|
|
|
|
|
(ALSA_PERIODSIZE / actual_rate) * 1000, ALSA_PERIODS, ring_buf_samples, ring_buf_ms)
|
|
|
|
|
|
|
|
|
|
if actual_rate != requested_rate:
|
|
|
|
|
logging.warning("PyALSA: Sample rate mismatch! requested=%d actual=%d", requested_rate, actual_rate)
|
|
|
|
|
|
|
|
|
|
self._actual_channels = requested_channels
|
|
|
|
|
self._running = True
|
|
|
|
|
self._callback_thread = threading.Thread(target=self._capture_loop, daemon=True)
|
|
|
|
|
self._callback_thread.start()
|
|
|
|
|
|
|
|
|
|
return audio_io.PcmFormat(
|
|
|
|
|
audio_io.PcmFormat.Endianness.LITTLE,
|
|
|
|
|
audio_io.PcmFormat.SampleType.INT16,
|
|
|
|
|
self._pcm_format.sample_rate,
|
|
|
|
|
1,
|
|
|
|
|
actual_rate,
|
|
|
|
|
requested_channels,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _capture_loop(self):
|
|
|
|
|
first_read = True
|
|
|
|
|
hw_channels = None
|
|
|
|
|
while self._running:
|
|
|
|
|
try:
|
|
|
|
|
length, data = self._pcm.read()
|
|
|
|
|
if length > 0:
|
|
|
|
|
if first_read:
|
|
|
|
|
expected_mono = self._periodsize * 2
|
|
|
|
|
expected_stereo = self._periodsize * 2 * 2
|
|
|
|
|
hw_channels = 2 if len(data) == expected_stereo else 1
|
|
|
|
|
logging.info("PyALSA first capture: bytes=%d detected_hw_channels=%d requested_channels=%d",
|
|
|
|
|
len(data), hw_channels, self._actual_channels)
|
|
|
|
|
first_read = False
|
|
|
|
|
|
|
|
|
|
# Convert stereo hardware to mono if needed
|
|
|
|
|
if hw_channels == 2 and self._actual_channels == 1:
|
|
|
|
|
pcm_stereo = np.frombuffer(data, dtype=np.int16)
|
|
|
|
|
pcm_mono = pcm_stereo[::2]
|
|
|
|
|
data = pcm_mono.tobytes()
|
|
|
|
|
|
|
|
|
|
with self._ring_lock:
|
|
|
|
|
self._ring_buffer.append(data)
|
|
|
|
|
total_bytes = sum(len(chunk) for chunk in self._ring_buffer)
|
|
|
|
|
while total_bytes > self._max_buffer_bytes:
|
|
|
|
|
self._ring_buffer.popleft()
|
|
|
|
|
total_bytes = sum(len(chunk) for chunk in self._ring_buffer)
|
|
|
|
|
except:
|
|
|
|
|
if self._running:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
def _read(self, frame_size: int) -> bytes:
|
|
|
|
|
"""Read PCM samples from the stream."""
|
|
|
|
|
|
|
|
|
|
#if self.counter % 50 == 0:
|
|
|
|
|
frame_size = frame_size + 1 # consume samples a little faster to avoid latency akkumulation
|
|
|
|
|
|
|
|
|
|
pcm_buffer, overflowed = self._stream.read(frame_size)
|
|
|
|
|
if overflowed:
|
|
|
|
|
logging.warning("SoundDeviceAudioInput: overflowed")
|
|
|
|
|
|
|
|
|
|
n_available = self._stream.read_available
|
|
|
|
|
|
|
|
|
|
# adapt = n_available > 20
|
|
|
|
|
# if adapt:
|
|
|
|
|
# pcm_extra, overflowed = self._stream.read(3)
|
|
|
|
|
# logging.info('consuming extra samples, available was %d', n_available)
|
|
|
|
|
# if overflowed:
|
|
|
|
|
# logging.warning("SoundDeviceAudioInput: overflowed")
|
|
|
|
|
bytes_needed = frame_size * 2
|
|
|
|
|
result = b''
|
|
|
|
|
|
|
|
|
|
while len(result) < bytes_needed:
|
|
|
|
|
with self._ring_lock:
|
|
|
|
|
if self._ring_buffer:
|
|
|
|
|
chunk = self._ring_buffer.popleft()
|
|
|
|
|
needed = bytes_needed - len(result)
|
|
|
|
|
if len(chunk) <= needed:
|
|
|
|
|
result += chunk
|
|
|
|
|
else:
|
|
|
|
|
result += chunk[:needed]
|
|
|
|
|
self._ring_buffer.appendleft(chunk[needed:])
|
|
|
|
|
else:
|
|
|
|
|
# Ring buffer empty - release lock and wait a bit
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# out = bytes(pcm_buffer) + bytes(pcm_extra)
|
|
|
|
|
# else:
|
|
|
|
|
out = bytes(pcm_buffer)
|
|
|
|
|
if len(result) < bytes_needed:
|
|
|
|
|
# Don't busy-wait - sleep briefly to let capture thread fill buffer
|
|
|
|
|
import time
|
|
|
|
|
time.sleep(0.0001) # 0.1ms
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
self.max_avail = max(self.max_avail, n_available)
|
|
|
|
|
def _close(self) -> None:
|
|
|
|
|
self._running = False
|
|
|
|
|
if self._callback_thread:
|
|
|
|
|
self._callback_thread.join(timeout=1.0)
|
|
|
|
|
if self._pcm:
|
|
|
|
|
self._pcm.close()
|
|
|
|
|
self._pcm = None
|
|
|
|
|
|
|
|
|
|
#Diagnostics
|
|
|
|
|
#with open(self.logfile_name, "a", encoding="utf-8") as f:
|
|
|
|
|
# f.write(f"{n_available}, {adapt}, {round(self._runavg, 2)}, {overflowed}\n")
|
|
|
|
|
|
|
|
|
|
if self.counter % 500 == 0:
|
|
|
|
|
logging.info(
|
|
|
|
|
"read available=%d, max=%d, latency:%d",
|
|
|
|
|
n_available, self.max_avail, self._stream.latency
|
|
|
|
|
)
|
|
|
|
|
self.max_avail = 0
|
|
|
|
|
|
|
|
|
|
self.counter += 1
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
audio_io.SoundDeviceAudioInput = ModSoundDeviceAudioInput
|
|
|
|
|
audio_io.SoundDeviceAudioInput = PyAlsaAudioInput
|
|
|
|
|
|
|
|
|
|
# modified from bumble
|
|
|
|
|
class ModWaveAudioInput(audio_io.ThreadedAudioInput):
|
|
|
|
|
|