Merge pull request #701 from google/gbg/speaker-app-opus

speaker app: enable opus, enable more options
This commit is contained in:
Gilles Boccon-Gibod
2025-05-29 16:55:05 -04:00
committed by GitHub
4 changed files with 246 additions and 78 deletions

View File

@@ -15,6 +15,7 @@
<tr><td>Codec</td><td><span id="codecText"></span></td></tr>
<tr><td>Packets</td><td><span id="packetsReceivedText"></span></td></tr>
<tr><td>Bytes</td><td><span id="bytesReceivedText"></span></td></tr>
<tr><td>Bitrate</td><td><span id="bitrate"></span></td></tr>
</table>
</td>
<td>

View File

@@ -7,17 +7,19 @@ let connectionText;
let codecText;
let packetsReceivedText;
let bytesReceivedText;
let bitrateText;
let streamStateText;
let connectionStateText;
let controlsDiv;
let audioOnButton;
let mediaSource;
let sourceBuffer;
let audioElement;
let audioDecoder;
let audioCodec;
let audioContext;
let audioAnalyzer;
let audioFrequencyBinCount;
let audioFrequencyData;
let nextAudioStartPosition = 0;
let audioStartTime = 0;
let packetsReceived = 0;
let bytesReceived = 0;
let audioState = "stopped";
@@ -29,20 +31,17 @@ let bandwidthCanvas;
let bandwidthCanvasContext;
let bandwidthBinCount;
let bandwidthBins = [];
let bitrateSamples = [];
const FFT_WIDTH = 800;
const FFT_HEIGHT = 256;
const BANDWIDTH_WIDTH = 500;
const BANDWIDTH_HEIGHT = 100;
function hexToBytes(hex) {
return Uint8Array.from(hex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
}
const BITRATE_WINDOW = 30;
function init() {
initUI();
initMediaSource();
initAudioElement();
initAudioContext();
initAnalyzer();
connect();
@@ -56,6 +55,7 @@ function initUI() {
codecText = document.getElementById("codecText");
packetsReceivedText = document.getElementById("packetsReceivedText");
bytesReceivedText = document.getElementById("bytesReceivedText");
bitrateText = document.getElementById("bitrate");
streamStateText = document.getElementById("streamStateText");
connectionStateText = document.getElementById("connectionStateText");
audioSupportMessageText = document.getElementById("audioSupportMessageText");
@@ -67,17 +67,9 @@ function initUI() {
requestAnimationFrame(onAnimationFrame);
}
function initMediaSource() {
mediaSource = new MediaSource();
mediaSource.onsourceopen = onMediaSourceOpen;
mediaSource.onsourceclose = onMediaSourceClose;
mediaSource.onsourceended = onMediaSourceEnd;
}
function initAudioElement() {
audioElement = document.getElementById("audio");
audioElement.src = URL.createObjectURL(mediaSource);
// audioElement.controls = true;
function initAudioContext() {
audioContext = new AudioContext();
audioContext.onstatechange = () => console.log("AudioContext state:", audioContext.state);
}
function initAnalyzer() {
@@ -94,24 +86,16 @@ function initAnalyzer() {
bandwidthCanvasContext = bandwidthCanvas.getContext('2d');
bandwidthCanvasContext.fillStyle = "rgb(255, 255, 255)";
bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT);
}
function startAnalyzer() {
// FFT
if (audioElement.captureStream !== undefined) {
audioContext = new AudioContext();
audioAnalyzer = audioContext.createAnalyser();
audioAnalyzer.fftSize = 128;
audioFrequencyBinCount = audioAnalyzer.frequencyBinCount;
audioFrequencyData = new Uint8Array(audioFrequencyBinCount);
const stream = audioElement.captureStream();
const source = audioContext.createMediaStreamSource(stream);
source.connect(audioAnalyzer);
}
// Bandwidth
bandwidthBinCount = BANDWIDTH_WIDTH / 2;
bandwidthBins = [];
bitrateSamples = [];
audioAnalyzer = audioContext.createAnalyser();
audioAnalyzer.fftSize = 128;
audioFrequencyBinCount = audioAnalyzer.frequencyBinCount;
audioFrequencyData = new Uint8Array(audioFrequencyBinCount);
audioAnalyzer.connect(audioContext.destination)
}
function setConnectionText(message) {
@@ -148,7 +132,8 @@ function onAnimationFrame() {
bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT);
bandwidthCanvasContext.fillStyle = `rgb(100, 100, 100)`;
for (let t = 0; t < bandwidthBins.length; t++) {
const lineHeight = (bandwidthBins[t] / 1000) * BANDWIDTH_HEIGHT;
const bytesReceived = bandwidthBins[t]
const lineHeight = (bytesReceived / 1000) * BANDWIDTH_HEIGHT;
bandwidthCanvasContext.fillRect(t * 2, BANDWIDTH_HEIGHT - lineHeight, 2, lineHeight);
}
@@ -156,28 +141,14 @@ function onAnimationFrame() {
requestAnimationFrame(onAnimationFrame);
}
function onMediaSourceOpen() {
console.log(this.readyState);
sourceBuffer = mediaSource.addSourceBuffer("audio/aac");
}
function onMediaSourceClose() {
console.log(this.readyState);
}
function onMediaSourceEnd() {
console.log(this.readyState);
}
async function startAudio() {
try {
console.log("starting audio...");
audioOnButton.disabled = true;
audioState = "starting";
await audioElement.play();
audioContext.resume();
console.log("audio started");
audioState = "playing";
startAnalyzer();
} catch(error) {
console.error(`play failed: ${error}`);
audioState = "stopped";
@@ -185,12 +156,47 @@ async function startAudio() {
}
}
function onAudioPacket(packet) {
if (audioState != "stopped") {
// Queue the audio packet.
sourceBuffer.appendBuffer(packet);
function onDecodedAudio(audioData) {
const bufferSource = audioContext.createBufferSource()
const now = audioContext.currentTime;
let nextAudioStartTime = audioStartTime + (nextAudioStartPosition / audioData.sampleRate);
if (nextAudioStartTime < now) {
console.log("starting new audio time base")
audioStartTime = now;
nextAudioStartTime = now;
nextAudioStartPosition = 0;
} else {
console.log(`audio buffer scheduled in ${nextAudioStartTime - now}`)
}
const audioBuffer = audioContext.createBuffer(
audioData.numberOfChannels,
audioData.numberOfFrames,
audioData.sampleRate
);
for (let channel = 0; channel < audioData.numberOfChannels; channel++) {
audioData.copyTo(
audioBuffer.getChannelData(channel),
{
planeIndex: channel,
format: "f32-planar"
}
)
}
bufferSource.buffer = audioBuffer;
bufferSource.connect(audioAnalyzer)
bufferSource.start(nextAudioStartTime);
nextAudioStartPosition += audioData.numberOfFrames;
}
function onCodecError(error) {
console.log("Codec error:", error)
}
async function onAudioPacket(packet) {
packetsReceived += 1;
packetsReceivedText.innerText = packetsReceived;
bytesReceived += packet.byteLength;
@@ -200,6 +206,48 @@ function onAudioPacket(packet) {
if (bandwidthBins.length > bandwidthBinCount) {
bandwidthBins.shift();
}
bitrateSamples[bitrateSamples.length] = {ts: Date.now(), bytes: packet.byteLength}
if (bitrateSamples.length > BITRATE_WINDOW) {
bitrateSamples.shift();
}
if (bitrateSamples.length >= 2) {
const windowBytes = bitrateSamples.reduce((accumulator, x) => accumulator + x.bytes, 0) - bitrateSamples[0].bytes;
const elapsed = bitrateSamples[bitrateSamples.length-1].ts - bitrateSamples[0].ts;
const bitrate = Math.floor(8 * windowBytes / elapsed)
bitrateText.innerText = `${bitrate} kb/s`
}
if (audioState == "stopped") {
return;
}
if (audioDecoder === undefined) {
let audioConfig;
if (audioCodec == 'aac') {
audioConfig = {
codec: 'mp4a.40.2',
sampleRate: 44100, // ignored
numberOfChannels: 2, // ignored
}
} else if (audioCodec == 'opus') {
audioConfig = {
codec: 'opus',
sampleRate: 48000, // ignored
numberOfChannels: 2, // ignored
}
}
audioDecoder = new AudioDecoder({ output: onDecodedAudio, error: onCodecError });
audioDecoder.configure(audioConfig)
}
const encodedAudio = new EncodedAudioChunk({
type: "key",
data: packet,
timestamp: 0,
transfer: [packet],
});
audioDecoder.decode(encodedAudio);
}
function onChannelOpen() {
@@ -249,16 +297,19 @@ function onChannelMessage(message) {
}
}
function onHelloMessage(params) {
async function onHelloMessage(params) {
codecText.innerText = params.codec;
if (params.codec != "aac") {
audioOnButton.disabled = true;
audioSupportMessageText.innerText = "Only AAC can be played, audio will be disabled";
audioSupportMessageText.style.display = "inline-block";
} else {
if (params.codec == "aac" || params.codec == "opus") {
audioCodec = params.codec
audioSupportMessageText.innerText = "";
audioSupportMessageText.style.display = "none";
} else {
audioOnButton.disabled = true;
audioSupportMessageText.innerText = "Only AAC and Opus can be played, audio will be disabled";
audioSupportMessageText.style.display = "inline-block";
}
if (params.streamState) {
setStreamState(params.streamState);
}

View File

@@ -50,8 +50,10 @@ from bumble.a2dp import (
make_audio_sink_service_sdp_records,
A2DP_SBC_CODEC_TYPE,
A2DP_MPEG_2_4_AAC_CODEC_TYPE,
A2DP_NON_A2DP_CODEC_TYPE,
SbcMediaCodecInformation,
AacMediaCodecInformation,
OpusMediaCodecInformation,
)
from bumble.utils import AsyncRunner
from bumble.codecs import AacAudioRtpPacket
@@ -78,6 +80,8 @@ class AudioExtractor:
return AacAudioExtractor()
if codec == 'sbc':
return SbcAudioExtractor()
if codec == 'opus':
return OpusAudioExtractor()
def extract_audio(self, packet: MediaPacket) -> bytes:
raise NotImplementedError()
@@ -102,6 +106,13 @@ class SbcAudioExtractor:
return packet.payload[1:]
# -----------------------------------------------------------------------------
class OpusAudioExtractor:
def extract_audio(self, packet: MediaPacket) -> bytes:
# TODO: parse fields
return packet.payload[1:]
# -----------------------------------------------------------------------------
class Output:
async def start(self) -> None:
@@ -235,7 +246,7 @@ class FfplayOutput(QueuedOutput):
await super().start()
self.subprocess = await asyncio.create_subprocess_shell(
f'ffplay -f {self.codec} pipe:0',
f'ffplay -probesize 32 -f {self.codec} pipe:0',
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
@@ -399,10 +410,24 @@ class Speaker:
STARTED = 2
SUSPENDED = 3
def __init__(self, device_config, transport, codec, discover, outputs, ui_port):
def __init__(
self,
device_config,
transport,
codec,
sampling_frequencies,
bitrate,
vbr,
discover,
outputs,
ui_port,
):
self.device_config = device_config
self.transport = transport
self.codec = codec
self.sampling_frequencies = sampling_frequencies
self.bitrate = bitrate
self.vbr = vbr
self.discover = discover
self.ui_port = ui_port
self.device = None
@@ -438,32 +463,56 @@ class Speaker:
if self.codec == 'sbc':
return self.sbc_codec_capabilities()
if self.codec == 'opus':
return self.opus_codec_capabilities()
raise RuntimeError('unsupported codec')
def aac_codec_capabilities(self) -> MediaCodecCapabilities:
supported_sampling_frequencies = AacMediaCodecInformation.SamplingFrequency(0)
for sampling_frequency in self.sampling_frequencies or [
8000,
11025,
12000,
16000,
22050,
24000,
32000,
44100,
48000,
]:
supported_sampling_frequencies |= (
AacMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
)
return MediaCodecCapabilities(
media_type=AVDTP_AUDIO_MEDIA_TYPE,
media_codec_type=A2DP_MPEG_2_4_AAC_CODEC_TYPE,
media_codec_information=AacMediaCodecInformation(
object_type=AacMediaCodecInformation.ObjectType.MPEG_2_AAC_LC,
sampling_frequency=AacMediaCodecInformation.SamplingFrequency.SF_48000
| AacMediaCodecInformation.SamplingFrequency.SF_44100,
sampling_frequency=supported_sampling_frequencies,
channels=AacMediaCodecInformation.Channels.MONO
| AacMediaCodecInformation.Channels.STEREO,
vbr=1,
bitrate=256000,
vbr=1 if self.vbr else 0,
bitrate=self.bitrate or 256000,
),
)
def sbc_codec_capabilities(self) -> MediaCodecCapabilities:
supported_sampling_frequencies = SbcMediaCodecInformation.SamplingFrequency(0)
for sampling_frequency in self.sampling_frequencies or [
16000,
32000,
44100,
48000,
]:
supported_sampling_frequencies |= (
SbcMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
)
return MediaCodecCapabilities(
media_type=AVDTP_AUDIO_MEDIA_TYPE,
media_codec_type=A2DP_SBC_CODEC_TYPE,
media_codec_information=SbcMediaCodecInformation(
sampling_frequency=SbcMediaCodecInformation.SamplingFrequency.SF_48000
| SbcMediaCodecInformation.SamplingFrequency.SF_44100
| SbcMediaCodecInformation.SamplingFrequency.SF_32000
| SbcMediaCodecInformation.SamplingFrequency.SF_16000,
sampling_frequency=supported_sampling_frequencies,
channel_mode=SbcMediaCodecInformation.ChannelMode.MONO
| SbcMediaCodecInformation.ChannelMode.DUAL_CHANNEL
| SbcMediaCodecInformation.ChannelMode.STEREO
@@ -481,6 +530,25 @@ class Speaker:
),
)
def opus_codec_capabilities(self) -> MediaCodecCapabilities:
supported_sampling_frequencies = OpusMediaCodecInformation.SamplingFrequency(0)
for sampling_frequency in self.sampling_frequencies or [48000]:
supported_sampling_frequencies |= (
OpusMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
)
return MediaCodecCapabilities(
media_type=AVDTP_AUDIO_MEDIA_TYPE,
media_codec_type=A2DP_NON_A2DP_CODEC_TYPE,
media_codec_information=OpusMediaCodecInformation(
frame_size=OpusMediaCodecInformation.FrameSize.FS_10MS
| OpusMediaCodecInformation.FrameSize.FS_20MS,
channel_mode=OpusMediaCodecInformation.ChannelMode.MONO
| OpusMediaCodecInformation.ChannelMode.STEREO
| OpusMediaCodecInformation.ChannelMode.DUAL_MONO,
sampling_frequency=supported_sampling_frequencies,
),
)
async def dispatch_to_outputs(self, function):
for output in self.outputs:
await function(output)
@@ -675,7 +743,26 @@ def speaker_cli(ctx, device_config):
@click.command()
@click.option(
'--codec', type=click.Choice(['sbc', 'aac']), default='aac', show_default=True
'--codec',
type=click.Choice(['sbc', 'aac', 'opus']),
default='aac',
show_default=True,
)
@click.option(
'--sampling-frequency',
metavar='SAMPLING-FREQUENCY',
type=int,
multiple=True,
help='Enable a sampling frequency (may be specified more than once)',
)
@click.option(
'--bitrate',
metavar='BITRATE',
type=int,
help='Supported bitrate (AAC only)',
)
@click.option(
'--vbr/--no-vbr', is_flag=True, default=True, help='Enable VBR (AAC only)'
)
@click.option(
'--discover', is_flag=True, help='Discover remote endpoints once connected'
@@ -706,7 +793,16 @@ def speaker_cli(ctx, device_config):
@click.option('--device-config', metavar='FILENAME', help='Device configuration file')
@click.argument('transport')
def speaker(
transport, codec, connect_address, discover, output, ui_port, device_config
transport,
codec,
sampling_frequency,
bitrate,
vbr,
connect_address,
discover,
output,
ui_port,
device_config,
):
"""Run the speaker."""
@@ -721,15 +817,27 @@ def speaker(
output = list(filter(lambda x: x != '@ffplay', output))
asyncio.run(
Speaker(device_config, transport, codec, discover, output, ui_port).run(
connect_address
)
Speaker(
device_config,
transport,
codec,
sampling_frequency,
bitrate,
vbr,
discover,
output,
ui_port,
).run(connect_address)
)
# -----------------------------------------------------------------------------
def main():
logging.basicConfig(level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper())
logging.basicConfig(
level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper(),
format="[%(asctime)s.%(msecs)03d] %(levelname)s:%(name)s:%(message)s",
datefmt="%H:%M:%S",
)
speaker()

View File

@@ -479,6 +479,14 @@ class OpusMediaCodecInformation(VendorSpecificMediaCodecInformation):
class SamplingFrequency(enum.IntFlag):
SF_48000 = 1 << 0
@classmethod
def from_int(
cls, sampling_frequency: int
) -> OpusMediaCodecInformation.SamplingFrequency:
if sampling_frequency != 48000:
raise ValueError("no such sampling frequency")
return cls.SF_48000
VENDOR_ID: ClassVar[int] = 0x000000E0
CODEC_ID: ClassVar[int] = 0x0001