- Replaced per-frame `run_in_executor` calls with single background reader thread in `ThreadedAudioInput` - Reader thread continuously calls `_read()` and enqueues data via `call_soon_threadsafe` to asyncio.Queue - Reduces per-frame scheduling overhead and context-switch jitter while preserving async API - Added thread lifecycle management: lazy start on first `frames()` call, graceful stop in `aclose()` - Update
587 lines
19 KiB
Python
587 lines
19 KiB
Python
# Copyright 2025 Google LLC
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# https://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Imports
|
|
# -----------------------------------------------------------------------------
|
|
from __future__ import annotations
|
|
|
|
import abc
|
|
import asyncio
|
|
import dataclasses
|
|
import enum
|
|
import logging
|
|
import pathlib
|
|
import sys
|
|
import wave
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from typing import TYPE_CHECKING, AsyncGenerator, BinaryIO
|
|
import threading
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
import sounddevice # type: ignore[import-untyped]
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Logging
|
|
# -----------------------------------------------------------------------------
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Classes
|
|
# -----------------------------------------------------------------------------
|
|
@dataclasses.dataclass
|
|
class PcmFormat:
|
|
class Endianness(enum.Enum):
|
|
LITTLE = 0
|
|
BIG = 1
|
|
|
|
class SampleType(enum.Enum):
|
|
FLOAT32 = 0
|
|
INT16 = 1
|
|
|
|
endianness: Endianness
|
|
sample_type: SampleType
|
|
sample_rate: int
|
|
channels: int
|
|
|
|
@classmethod
|
|
def from_str(cls, format_str: str) -> PcmFormat:
|
|
endianness = cls.Endianness.LITTLE # Others not yet supported.
|
|
sample_type_str, sample_rate_str, channels_str = format_str.split(',')
|
|
if sample_type_str == 'int16le':
|
|
sample_type = cls.SampleType.INT16
|
|
elif sample_type_str == 'float32le':
|
|
sample_type = cls.SampleType.FLOAT32
|
|
else:
|
|
raise ValueError(f'sample type {sample_type_str} not supported')
|
|
sample_rate = int(sample_rate_str)
|
|
channels = int(channels_str)
|
|
|
|
return cls(endianness, sample_type, sample_rate, channels)
|
|
|
|
@property
|
|
def bytes_per_sample(self) -> int:
|
|
return 2 if self.sample_type == self.SampleType.INT16 else 4
|
|
|
|
|
|
def check_audio_output(output: str) -> bool:
|
|
if output == 'device' or output.startswith('device:'):
|
|
try:
|
|
import sounddevice
|
|
except ImportError as exc:
|
|
raise ValueError(
|
|
'audio output not available (sounddevice python module not installed)'
|
|
) from exc
|
|
except OSError as exc:
|
|
raise ValueError(
|
|
'audio output not available '
|
|
'(sounddevice python module failed to load: '
|
|
f'{exc})'
|
|
) from exc
|
|
|
|
if output == 'device':
|
|
# Default device
|
|
return True
|
|
|
|
# Specific device
|
|
device = output[7:]
|
|
if device == '?':
|
|
print(('Audio Devices:', 'yellow'))
|
|
for device_info in [
|
|
device_info
|
|
for device_info in sounddevice.query_devices()
|
|
if device_info['max_output_channels'] > 0
|
|
]:
|
|
device_index = device_info['index']
|
|
is_default = (
|
|
(' [default]', 'green')
|
|
if sounddevice.default.device[1] == device_index
|
|
else ''
|
|
)
|
|
print(
|
|
f'{(device_index, "cyan")}: {device_info["name"]}{is_default}'
|
|
)
|
|
return False
|
|
|
|
try:
|
|
device_info = sounddevice.query_devices(int(device))
|
|
except sounddevice.PortAudioError as exc:
|
|
raise ValueError('No such audio device') from exc
|
|
|
|
if device_info['max_output_channels'] < 1:
|
|
raise ValueError(
|
|
f'Device {device} ({device_info["name"]}) does not have an output'
|
|
)
|
|
|
|
return True
|
|
|
|
|
|
async def create_audio_output(output: str) -> AudioOutput:
|
|
if output == 'stdout':
|
|
return StreamAudioOutput(sys.stdout.buffer)
|
|
|
|
if output == 'device' or output.startswith('device:'):
|
|
device_name = '' if output == 'device' else output[7:]
|
|
return SoundDeviceAudioOutput(device_name)
|
|
|
|
if output == 'ffplay':
|
|
return SubprocessAudioOutput(
|
|
command=(
|
|
'ffplay -probesize 32 -fflags nobuffer -analyzeduration 0 '
|
|
'-ar {sample_rate} '
|
|
'-ch_layout {channel_layout} '
|
|
'-f f32le pipe:0'
|
|
)
|
|
)
|
|
|
|
if output.startswith('file:'):
|
|
return FileAudioOutput(output[5:])
|
|
|
|
raise ValueError('unsupported audio output')
|
|
|
|
|
|
class AudioOutput(abc.ABC):
|
|
"""Audio output to which PCM samples can be written."""
|
|
|
|
async def open(self, pcm_format: PcmFormat) -> None:
|
|
"""Start the output."""
|
|
|
|
@abc.abstractmethod
|
|
def write(self, pcm_samples: bytes) -> None:
|
|
"""Write PCM samples. Must not block."""
|
|
|
|
async def aclose(self) -> None:
|
|
"""Close the output."""
|
|
|
|
|
|
class ThreadedAudioOutput(AudioOutput):
|
|
"""Base class for AudioOutput classes that may need to call blocking functions.
|
|
|
|
The actual writing is performed in a thread, so as to ensure that calling write()
|
|
does not block the caller.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
self._thread_pool = ThreadPoolExecutor(1)
|
|
self._pcm_samples: asyncio.Queue[bytes] = asyncio.Queue()
|
|
self._write_task = asyncio.create_task(self._write_loop())
|
|
|
|
async def _write_loop(self) -> None:
|
|
while True:
|
|
pcm_samples = await self._pcm_samples.get()
|
|
await asyncio.get_running_loop().run_in_executor(
|
|
self._thread_pool, self._write, pcm_samples
|
|
)
|
|
|
|
@abc.abstractmethod
|
|
def _write(self, pcm_samples: bytes) -> None:
|
|
"""This method does the actual writing and can block."""
|
|
|
|
def write(self, pcm_samples: bytes) -> None:
|
|
self._pcm_samples.put_nowait(pcm_samples)
|
|
|
|
def _close(self) -> None:
|
|
"""This method does the actual closing and can block."""
|
|
|
|
async def aclose(self) -> None:
|
|
await asyncio.get_running_loop().run_in_executor(self._thread_pool, self._close)
|
|
self._write_task.cancel()
|
|
self._thread_pool.shutdown()
|
|
|
|
|
|
class SoundDeviceAudioOutput(ThreadedAudioOutput):
|
|
def __init__(self, device_name: str) -> None:
|
|
super().__init__()
|
|
self._device = int(device_name) if device_name else None
|
|
self._stream: sounddevice.RawOutputStream | None = None
|
|
|
|
async def open(self, pcm_format: PcmFormat) -> None:
|
|
import sounddevice # pylint: disable=import-error
|
|
|
|
self._stream = sounddevice.RawOutputStream(
|
|
samplerate=pcm_format.sample_rate,
|
|
device=self._device,
|
|
channels=pcm_format.channels,
|
|
dtype='float32',
|
|
)
|
|
self._stream.start()
|
|
|
|
def _write(self, pcm_samples: bytes) -> None:
|
|
if self._stream is None:
|
|
return
|
|
|
|
try:
|
|
self._stream.write(pcm_samples)
|
|
except Exception:
|
|
logger.exception('Sound device error')
|
|
raise
|
|
|
|
def _close(self):
|
|
self._stream.stop()
|
|
self._stream = None
|
|
|
|
|
|
class StreamAudioOutput(ThreadedAudioOutput):
|
|
"""AudioOutput where PCM samples are written to a stream that may block."""
|
|
|
|
def __init__(self, stream: BinaryIO) -> None:
|
|
super().__init__()
|
|
self._stream = stream
|
|
|
|
def _write(self, pcm_samples: bytes) -> None:
|
|
self._stream.write(pcm_samples)
|
|
self._stream.flush()
|
|
|
|
|
|
class FileAudioOutput(StreamAudioOutput):
|
|
"""AudioOutput where PCM samples are written to a file."""
|
|
|
|
def __init__(self, filename: str) -> None:
|
|
self._file = open(filename, "wb")
|
|
super().__init__(self._file)
|
|
|
|
async def shutdown(self):
|
|
self._file.close()
|
|
return await super().shutdown()
|
|
|
|
|
|
class SubprocessAudioOutput(AudioOutput):
|
|
"""AudioOutput where audio samples are written to a subprocess via stdin."""
|
|
|
|
def __init__(self, command: str) -> None:
|
|
self._command = command
|
|
self._subprocess: asyncio.subprocess.Process | None
|
|
|
|
async def open(self, pcm_format: PcmFormat) -> None:
|
|
if pcm_format.channels == 1:
|
|
channel_layout = 'mono'
|
|
elif pcm_format.channels == 2:
|
|
channel_layout = 'stereo'
|
|
else:
|
|
raise ValueError(f'{pcm_format.channels} channels not supported')
|
|
|
|
command = self._command.format(
|
|
sample_rate=pcm_format.sample_rate, channel_layout=channel_layout
|
|
)
|
|
self._subprocess = await asyncio.create_subprocess_shell(
|
|
command,
|
|
stdin=asyncio.subprocess.PIPE,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.PIPE,
|
|
)
|
|
|
|
def write(self, pcm_samples: bytes) -> None:
|
|
if self._subprocess is None or self._subprocess.stdin is None:
|
|
return
|
|
|
|
self._subprocess.stdin.write(pcm_samples)
|
|
|
|
async def aclose(self):
|
|
if self._subprocess:
|
|
self._subprocess.terminate()
|
|
|
|
|
|
def check_audio_input(input: str) -> bool:
|
|
if input == 'device' or input.startswith('device:'):
|
|
try:
|
|
import sounddevice # pylint: disable=import-error
|
|
except ImportError as exc:
|
|
raise ValueError(
|
|
'audio input not available (sounddevice python module not installed)'
|
|
) from exc
|
|
except OSError as exc:
|
|
raise ValueError(
|
|
'audio input not available '
|
|
'(sounddevice python module failed to load: '
|
|
f'{exc})'
|
|
) from exc
|
|
|
|
if input == 'device':
|
|
# Default device
|
|
return True
|
|
|
|
# Specific device
|
|
device = input[7:]
|
|
if device == '?':
|
|
print(('Audio Devices:', 'yellow'))
|
|
for device_info in [
|
|
device_info
|
|
for device_info in sounddevice.query_devices()
|
|
if device_info['max_input_channels'] > 0
|
|
]:
|
|
device_index = device_info["index"]
|
|
is_mono = device_info['max_input_channels'] == 1
|
|
max_channels = (f'[{"mono" if is_mono else "stereo"}]', 'cyan')
|
|
is_default = (
|
|
(' [default]', 'green')
|
|
if sounddevice.default.device[0] == device_index
|
|
else ''
|
|
)
|
|
print(
|
|
f'{(device_index, "cyan")}: {device_info["name"]}'
|
|
f' {max_channels}{is_default}'
|
|
)
|
|
return False
|
|
|
|
try:
|
|
device_info = sounddevice.query_devices(int(device))
|
|
except sounddevice.PortAudioError as exc:
|
|
raise ValueError('No such audio device') from exc
|
|
|
|
if device_info['max_input_channels'] < 1:
|
|
raise ValueError(
|
|
f'Device {device} ({device_info["name"]}) does not have an input'
|
|
)
|
|
|
|
return True
|
|
|
|
|
|
async def create_audio_input(input: str, input_format: str) -> AudioInput:
|
|
pcm_format: PcmFormat | None
|
|
if input_format == 'auto':
|
|
pcm_format = None
|
|
else:
|
|
pcm_format = PcmFormat.from_str(input_format)
|
|
|
|
if input == 'stdin':
|
|
if not pcm_format:
|
|
raise ValueError('input format details required for stdin')
|
|
return StreamAudioInput(sys.stdin.buffer, pcm_format)
|
|
|
|
if input == 'device' or input.startswith('device:'):
|
|
if not pcm_format:
|
|
raise ValueError('input format details required for device')
|
|
device_name = '' if input == 'device' else input[7:]
|
|
return SoundDeviceAudioInput(device_name, pcm_format)
|
|
|
|
# If there's no file: prefix, check if we can assume it is a file.
|
|
if pathlib.Path(input).is_file():
|
|
input = 'file:' + input
|
|
|
|
if input.startswith('file:'):
|
|
filename = input[5:]
|
|
if filename.endswith('.wav'):
|
|
if input_format != 'auto':
|
|
raise ValueError(".wav file only supported with 'auto' format")
|
|
return WaveAudioInput(filename)
|
|
|
|
if pcm_format is None:
|
|
raise ValueError('input format details required for raw PCM files')
|
|
return FileAudioInput(filename, pcm_format)
|
|
|
|
raise ValueError('input not supported')
|
|
|
|
|
|
class AudioInput(abc.ABC):
|
|
"""Audio input that produces PCM samples."""
|
|
|
|
@abc.abstractmethod
|
|
async def open(self) -> PcmFormat:
|
|
"""Open the input."""
|
|
|
|
@abc.abstractmethod
|
|
def frames(self, frame_size: int) -> AsyncGenerator[bytes]:
|
|
"""Generate one frame of PCM samples. Must not block."""
|
|
|
|
async def aclose(self) -> None:
|
|
"""Close the input."""
|
|
|
|
|
|
class ThreadedAudioInput(AudioInput):
|
|
"""Base class for AudioInput implementation where reading samples may block."""
|
|
|
|
def __init__(self) -> None:
|
|
self._thread_pool = ThreadPoolExecutor(1)
|
|
self._pcm_samples: asyncio.Queue[bytes] = asyncio.Queue()
|
|
self._reader_thread: threading.Thread | None = None
|
|
self._running: bool = False
|
|
self._loop: asyncio.AbstractEventLoop | None = None
|
|
|
|
@abc.abstractmethod
|
|
def _read(self, frame_size: int) -> bytes:
|
|
pass
|
|
|
|
@abc.abstractmethod
|
|
def _open(self) -> PcmFormat:
|
|
pass
|
|
|
|
def _close(self) -> None:
|
|
pass
|
|
|
|
async def open(self) -> PcmFormat:
|
|
return await asyncio.get_running_loop().run_in_executor(
|
|
self._thread_pool, self._open
|
|
)
|
|
|
|
async def frames(self, frame_size: int) -> AsyncGenerator[bytes]:
|
|
# Start a dedicated reader thread on first use to avoid per-frame
|
|
# run_in_executor overhead while preserving the same async API.
|
|
if not self._running:
|
|
self._running = True
|
|
self._loop = asyncio.get_running_loop()
|
|
|
|
def _reader() -> None:
|
|
try:
|
|
while self._running:
|
|
pcm_sample = self._read(frame_size)
|
|
if not pcm_sample:
|
|
# Propagate termination to the async generator.
|
|
if self._loop is not None:
|
|
self._loop.call_soon_threadsafe(
|
|
self._pcm_samples.put_nowait, b""
|
|
)
|
|
break
|
|
if self._loop is not None:
|
|
self._loop.call_soon_threadsafe(
|
|
self._pcm_samples.put_nowait, pcm_sample
|
|
)
|
|
except Exception:
|
|
logger.exception("ThreadedAudioInput reader thread failed")
|
|
|
|
self._reader_thread = threading.Thread(target=_reader, daemon=True)
|
|
self._reader_thread.start()
|
|
|
|
while True:
|
|
pcm_sample = await self._pcm_samples.get()
|
|
if not pcm_sample:
|
|
break
|
|
yield pcm_sample
|
|
|
|
async def aclose(self) -> None:
|
|
# Stop reader thread first so no more _read() calls are issued.
|
|
self._running = False
|
|
if self._reader_thread is not None:
|
|
self._reader_thread.join(timeout=1.0)
|
|
self._reader_thread = None
|
|
|
|
await asyncio.get_running_loop().run_in_executor(self._thread_pool, self._close)
|
|
self._thread_pool.shutdown()
|
|
|
|
|
|
class WaveAudioInput(ThreadedAudioInput):
|
|
"""Audio input that reads PCM samples from a .wav file."""
|
|
|
|
def __init__(self, filename: str) -> None:
|
|
super().__init__()
|
|
self._filename = filename
|
|
self._wav: wave.Wave_read | None = None
|
|
self._bytes_read = 0
|
|
|
|
def _open(self) -> PcmFormat:
|
|
self._wav = wave.open(self._filename, 'rb')
|
|
if self._wav.getsampwidth() != 2:
|
|
raise ValueError('sample width not supported')
|
|
return PcmFormat(
|
|
PcmFormat.Endianness.LITTLE,
|
|
PcmFormat.SampleType.INT16,
|
|
self._wav.getframerate(),
|
|
self._wav.getnchannels(),
|
|
)
|
|
|
|
def _read(self, frame_size: int) -> bytes:
|
|
if not self._wav:
|
|
return b''
|
|
|
|
pcm_samples = self._wav.readframes(frame_size)
|
|
if not pcm_samples and self._bytes_read:
|
|
# Loop around.
|
|
self._wav.rewind()
|
|
self._bytes_read = 0
|
|
pcm_samples = self._wav.readframes(frame_size)
|
|
|
|
self._bytes_read += len(pcm_samples)
|
|
return pcm_samples
|
|
|
|
def _close(self) -> None:
|
|
if self._wav:
|
|
self._wav.close()
|
|
|
|
|
|
class StreamAudioInput(ThreadedAudioInput):
|
|
"""AudioInput where samples are read from a raw PCM stream that may block."""
|
|
|
|
def __init__(self, stream: BinaryIO, pcm_format: PcmFormat) -> None:
|
|
super().__init__()
|
|
self._stream = stream
|
|
self._pcm_format = pcm_format
|
|
|
|
def _open(self) -> PcmFormat:
|
|
return self._pcm_format
|
|
|
|
def _read(self, frame_size: int) -> bytes:
|
|
return self._stream.read(
|
|
frame_size * self._pcm_format.channels * self._pcm_format.bytes_per_sample
|
|
)
|
|
|
|
|
|
class FileAudioInput(StreamAudioInput):
|
|
"""AudioInput where PCM samples are read from a raw PCM file."""
|
|
|
|
def __init__(self, filename: str, pcm_format: PcmFormat) -> None:
|
|
self._stream = open(filename, "rb")
|
|
super().__init__(self._stream, pcm_format)
|
|
|
|
def _close(self) -> None:
|
|
self._stream.close()
|
|
|
|
|
|
class SoundDeviceAudioInput(ThreadedAudioInput):
|
|
def __init__(self, device_name: str, pcm_format: PcmFormat) -> None:
|
|
super().__init__()
|
|
self._device = int(device_name) if device_name else None
|
|
self._pcm_format = pcm_format
|
|
self._stream: sounddevice.RawInputStream | None = None
|
|
|
|
def _open(self) -> PcmFormat:
|
|
import sounddevice # pylint: disable=import-error
|
|
|
|
self._stream = sounddevice.RawInputStream(
|
|
samplerate=self._pcm_format.sample_rate,
|
|
device=self._device,
|
|
channels=self._pcm_format.channels,
|
|
dtype='int16',
|
|
)
|
|
self._stream.start()
|
|
|
|
return PcmFormat(
|
|
PcmFormat.Endianness.LITTLE,
|
|
PcmFormat.SampleType.INT16,
|
|
self._pcm_format.sample_rate,
|
|
2,
|
|
)
|
|
|
|
def _read(self, frame_size: int) -> bytes:
|
|
if not self._stream:
|
|
return b''
|
|
pcm_buffer, overflowed = self._stream.read(frame_size)
|
|
if overflowed:
|
|
logger.warning("input overflow")
|
|
|
|
# Convert the buffer to stereo if needed
|
|
if self._pcm_format.channels == 1:
|
|
stereo_buffer = bytearray()
|
|
for i in range(frame_size):
|
|
sample = pcm_buffer[i * 2 : i * 2 + 2]
|
|
stereo_buffer += sample + sample
|
|
return stereo_buffer
|
|
|
|
return bytes(pcm_buffer)
|
|
|
|
def _close(self):
|
|
self._stream.stop()
|
|
self._stream = None
|