Merge pull request #701 from google/gbg/speaker-app-opus

speaker app: enable opus, enable more options
2025-05-29 16:55:05 -04:00
parent 4d07726acf 2e523b6f49
commit b8a055de45
4 changed files with 246 additions and 78 deletions
@@ -15,6 +15,7 @@
          <tr><td>Codec</td><td><span id="codecText"></span></td></tr>
          <tr><td>Packets</td><td><span id="packetsReceivedText"></span></td></tr>
          <tr><td>Bytes</td><td><span id="bytesReceivedText"></span></td></tr>
+          <tr><td>Bitrate</td><td><span id="bitrate"></span></td></tr>
        </table>
      </td>
      <td>
@@ -7,17 +7,19 @@ let connectionText;
 let codecText;
 let packetsReceivedText;
 let bytesReceivedText;
+let bitrateText;
 let streamStateText;
 let connectionStateText;
 let controlsDiv;
 let audioOnButton;
-let mediaSource;
-let sourceBuffer;
-let audioElement;
+let audioDecoder;
+let audioCodec;
 let audioContext;
 let audioAnalyzer;
 let audioFrequencyBinCount;
 let audioFrequencyData;
+let nextAudioStartPosition = 0;
+let audioStartTime = 0;
 let packetsReceived = 0;
 let bytesReceived = 0;
 let audioState = "stopped";
@@ -29,20 +31,17 @@ let bandwidthCanvas;
 let bandwidthCanvasContext;
 let bandwidthBinCount;
 let bandwidthBins = [];
+let bitrateSamples = [];

 const FFT_WIDTH = 800;
 const FFT_HEIGHT = 256;
 const BANDWIDTH_WIDTH = 500;
 const BANDWIDTH_HEIGHT = 100;
-
-function hexToBytes(hex) {
-    return Uint8Array.from(hex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16)));
-}
+const BITRATE_WINDOW = 30;

 function init() {
    initUI();
-    initMediaSource();
-    initAudioElement();
+    initAudioContext();
    initAnalyzer();

    connect();
@@ -56,6 +55,7 @@ function initUI() {
    codecText = document.getElementById("codecText");
    packetsReceivedText = document.getElementById("packetsReceivedText");
    bytesReceivedText = document.getElementById("bytesReceivedText");
+    bitrateText = document.getElementById("bitrate");
    streamStateText = document.getElementById("streamStateText");
    connectionStateText = document.getElementById("connectionStateText");
    audioSupportMessageText = document.getElementById("audioSupportMessageText");
@@ -67,17 +67,9 @@ function initUI() {
    requestAnimationFrame(onAnimationFrame);
 }

-function initMediaSource() {
-    mediaSource = new MediaSource();
-    mediaSource.onsourceopen = onMediaSourceOpen;
-    mediaSource.onsourceclose = onMediaSourceClose;
-    mediaSource.onsourceended = onMediaSourceEnd;
-}
-
-function initAudioElement() {
-    audioElement = document.getElementById("audio");
-    audioElement.src = URL.createObjectURL(mediaSource);
-    // audioElement.controls = true;
+function initAudioContext() {
+    audioContext = new AudioContext();
+    audioContext.onstatechange = () => console.log("AudioContext state:", audioContext.state);
 }

 function initAnalyzer() {
@@ -94,24 +86,16 @@ function initAnalyzer() {
    bandwidthCanvasContext = bandwidthCanvas.getContext('2d');
    bandwidthCanvasContext.fillStyle = "rgb(255, 255, 255)";
    bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT);
-}
-
-function startAnalyzer() {
-    // FFT
-    if (audioElement.captureStream !== undefined) {
-        audioContext = new AudioContext();
-        audioAnalyzer = audioContext.createAnalyser();
-        audioAnalyzer.fftSize = 128;
-        audioFrequencyBinCount = audioAnalyzer.frequencyBinCount;
-        audioFrequencyData = new Uint8Array(audioFrequencyBinCount);
-        const stream = audioElement.captureStream();
-        const source = audioContext.createMediaStreamSource(stream);
-        source.connect(audioAnalyzer);
-    }
-
-    // Bandwidth
    bandwidthBinCount = BANDWIDTH_WIDTH / 2;
    bandwidthBins = [];
+    bitrateSamples = [];
+
+    audioAnalyzer = audioContext.createAnalyser();
+    audioAnalyzer.fftSize = 128;
+    audioFrequencyBinCount = audioAnalyzer.frequencyBinCount;
+    audioFrequencyData = new Uint8Array(audioFrequencyBinCount);
+
+    audioAnalyzer.connect(audioContext.destination)
 }

 function setConnectionText(message) {
@@ -148,7 +132,8 @@ function onAnimationFrame() {
    bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT);
    bandwidthCanvasContext.fillStyle = `rgb(100, 100, 100)`;
    for (let t = 0; t < bandwidthBins.length; t++) {
-        const lineHeight = (bandwidthBins[t] / 1000) * BANDWIDTH_HEIGHT;
+        const bytesReceived = bandwidthBins[t]
+        const lineHeight = (bytesReceived / 1000) * BANDWIDTH_HEIGHT;
        bandwidthCanvasContext.fillRect(t * 2, BANDWIDTH_HEIGHT - lineHeight, 2, lineHeight);
    }

@@ -156,28 +141,14 @@ function onAnimationFrame() {
    requestAnimationFrame(onAnimationFrame);
 }

-function onMediaSourceOpen() {
-    console.log(this.readyState);
-    sourceBuffer = mediaSource.addSourceBuffer("audio/aac");
-}
-
-function onMediaSourceClose() {
-    console.log(this.readyState);
-}
-
-function onMediaSourceEnd() {
-    console.log(this.readyState);
-}
-
 async function startAudio() {
    try {
        console.log("starting audio...");
        audioOnButton.disabled = true;
        audioState = "starting";
-        await audioElement.play();
+        audioContext.resume();
        console.log("audio started");
        audioState = "playing";
-        startAnalyzer();
    } catch(error) {
        console.error(`play failed: ${error}`);
        audioState = "stopped";
@@ -185,12 +156,47 @@ async function startAudio() {
    }
 }

-function onAudioPacket(packet) {
-    if (audioState != "stopped") {
-        // Queue the audio packet.
-        sourceBuffer.appendBuffer(packet);
+function onDecodedAudio(audioData) {
+    const bufferSource = audioContext.createBufferSource()
+
+    const now = audioContext.currentTime;
+    let nextAudioStartTime = audioStartTime + (nextAudioStartPosition / audioData.sampleRate);
+    if (nextAudioStartTime < now) {
+        console.log("starting new audio time base")
+        audioStartTime = now;
+        nextAudioStartTime = now;
+        nextAudioStartPosition = 0;
+    } else {
+        console.log(`audio buffer scheduled in ${nextAudioStartTime - now}`)
    }

+    const audioBuffer = audioContext.createBuffer(
+        audioData.numberOfChannels,
+        audioData.numberOfFrames,
+        audioData.sampleRate
+    );
+
+    for (let channel = 0; channel < audioData.numberOfChannels; channel++) {
+        audioData.copyTo(
+            audioBuffer.getChannelData(channel),
+            {
+                planeIndex: channel,
+                format: "f32-planar"
+            }
+        )
+    }
+
+    bufferSource.buffer = audioBuffer;
+    bufferSource.connect(audioAnalyzer)
+    bufferSource.start(nextAudioStartTime);
+    nextAudioStartPosition += audioData.numberOfFrames;
+}
+
+function onCodecError(error) {
+    console.log("Codec error:", error)
+}
+
+async function onAudioPacket(packet) {
    packetsReceived += 1;
    packetsReceivedText.innerText = packetsReceived;
    bytesReceived += packet.byteLength;
@@ -200,6 +206,48 @@ function onAudioPacket(packet) {
    if (bandwidthBins.length > bandwidthBinCount) {
        bandwidthBins.shift();
    }
+    bitrateSamples[bitrateSamples.length] = {ts: Date.now(), bytes: packet.byteLength}
+    if (bitrateSamples.length > BITRATE_WINDOW) {
+        bitrateSamples.shift();
+    }
+    if (bitrateSamples.length >= 2) {
+        const windowBytes = bitrateSamples.reduce((accumulator, x) => accumulator + x.bytes, 0) - bitrateSamples[0].bytes;
+        const elapsed = bitrateSamples[bitrateSamples.length-1].ts - bitrateSamples[0].ts;
+        const bitrate = Math.floor(8 * windowBytes / elapsed)
+        bitrateText.innerText = `${bitrate} kb/s`
+    }
+
+    if (audioState == "stopped") {
+        return;
+    }
+
+    if (audioDecoder === undefined) {
+        let audioConfig;
+        if (audioCodec == 'aac') {
+            audioConfig = {
+                codec: 'mp4a.40.2',
+                sampleRate: 44100, // ignored
+                numberOfChannels: 2, // ignored
+            }
+        } else if (audioCodec == 'opus') {
+            audioConfig = {
+                codec: 'opus',
+                sampleRate: 48000, // ignored
+                numberOfChannels: 2, // ignored
+            }
+        }
+        audioDecoder = new AudioDecoder({ output: onDecodedAudio, error: onCodecError });
+        audioDecoder.configure(audioConfig)
+    }
+
+    const encodedAudio = new EncodedAudioChunk({
+        type: "key",
+        data: packet,
+        timestamp: 0,
+        transfer: [packet],
+    });
+
+    audioDecoder.decode(encodedAudio);
 }

 function onChannelOpen() {
@@ -249,16 +297,19 @@ function onChannelMessage(message) {
    }
 }

-function onHelloMessage(params) {
+async function onHelloMessage(params) {
    codecText.innerText = params.codec;
-    if (params.codec != "aac") {
-        audioOnButton.disabled = true;
-        audioSupportMessageText.innerText = "Only AAC can be played, audio will be disabled";
-        audioSupportMessageText.style.display = "inline-block";
-    } else {
+
+    if (params.codec == "aac" || params.codec == "opus") {
+        audioCodec = params.codec
        audioSupportMessageText.innerText = "";
        audioSupportMessageText.style.display = "none";
+    } else {
+        audioOnButton.disabled = true;
+        audioSupportMessageText.innerText = "Only AAC and Opus can be played, audio will be disabled";
+        audioSupportMessageText.style.display = "inline-block";
    }
+
    if (params.streamState) {
        setStreamState(params.streamState);
    }
@@ -50,8 +50,10 @@ from bumble.a2dp import (
    make_audio_sink_service_sdp_records,
    A2DP_SBC_CODEC_TYPE,
    A2DP_MPEG_2_4_AAC_CODEC_TYPE,
+    A2DP_NON_A2DP_CODEC_TYPE,
    SbcMediaCodecInformation,
    AacMediaCodecInformation,
+    OpusMediaCodecInformation,
 )
 from bumble.utils import AsyncRunner
 from bumble.codecs import AacAudioRtpPacket
@@ -78,6 +80,8 @@ class AudioExtractor:
            return AacAudioExtractor()
        if codec == 'sbc':
            return SbcAudioExtractor()
+        if codec == 'opus':
+            return OpusAudioExtractor()

    def extract_audio(self, packet: MediaPacket) -> bytes:
        raise NotImplementedError()
@@ -102,6 +106,13 @@ class SbcAudioExtractor:
        return packet.payload[1:]


+# -----------------------------------------------------------------------------
+class OpusAudioExtractor:
+    def extract_audio(self, packet: MediaPacket) -> bytes:
+        # TODO: parse fields
+        return packet.payload[1:]
+
+
 # -----------------------------------------------------------------------------
 class Output:
    async def start(self) -> None:
@@ -235,7 +246,7 @@ class FfplayOutput(QueuedOutput):
        await super().start()

        self.subprocess = await asyncio.create_subprocess_shell(
-            f'ffplay -f {self.codec} pipe:0',
+            f'ffplay -probesize 32 -f {self.codec} pipe:0',
            stdin=asyncio.subprocess.PIPE,
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
@@ -399,10 +410,24 @@ class Speaker:
        STARTED = 2
        SUSPENDED = 3

-    def __init__(self, device_config, transport, codec, discover, outputs, ui_port):
+    def __init__(
+        self,
+        device_config,
+        transport,
+        codec,
+        sampling_frequencies,
+        bitrate,
+        vbr,
+        discover,
+        outputs,
+        ui_port,
+    ):
        self.device_config = device_config
        self.transport = transport
        self.codec = codec
+        self.sampling_frequencies = sampling_frequencies
+        self.bitrate = bitrate
+        self.vbr = vbr
        self.discover = discover
        self.ui_port = ui_port
        self.device = None
@@ -438,32 +463,56 @@ class Speaker:
        if self.codec == 'sbc':
            return self.sbc_codec_capabilities()

+        if self.codec == 'opus':
+            return self.opus_codec_capabilities()
+
        raise RuntimeError('unsupported codec')

    def aac_codec_capabilities(self) -> MediaCodecCapabilities:
+        supported_sampling_frequencies = AacMediaCodecInformation.SamplingFrequency(0)
+        for sampling_frequency in self.sampling_frequencies or [
+            8000,
+            11025,
+            12000,
+            16000,
+            22050,
+            24000,
+            32000,
+            44100,
+            48000,
+        ]:
+            supported_sampling_frequencies |= (
+                AacMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
+            )
        return MediaCodecCapabilities(
            media_type=AVDTP_AUDIO_MEDIA_TYPE,
            media_codec_type=A2DP_MPEG_2_4_AAC_CODEC_TYPE,
            media_codec_information=AacMediaCodecInformation(
                object_type=AacMediaCodecInformation.ObjectType.MPEG_2_AAC_LC,
-                sampling_frequency=AacMediaCodecInformation.SamplingFrequency.SF_48000
-                | AacMediaCodecInformation.SamplingFrequency.SF_44100,
+                sampling_frequency=supported_sampling_frequencies,
                channels=AacMediaCodecInformation.Channels.MONO
                | AacMediaCodecInformation.Channels.STEREO,
-                vbr=1,
-                bitrate=256000,
+                vbr=1 if self.vbr else 0,
+                bitrate=self.bitrate or 256000,
            ),
        )

    def sbc_codec_capabilities(self) -> MediaCodecCapabilities:
+        supported_sampling_frequencies = SbcMediaCodecInformation.SamplingFrequency(0)
+        for sampling_frequency in self.sampling_frequencies or [
+            16000,
+            32000,
+            44100,
+            48000,
+        ]:
+            supported_sampling_frequencies |= (
+                SbcMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
+            )
        return MediaCodecCapabilities(
            media_type=AVDTP_AUDIO_MEDIA_TYPE,
            media_codec_type=A2DP_SBC_CODEC_TYPE,
            media_codec_information=SbcMediaCodecInformation(
-                sampling_frequency=SbcMediaCodecInformation.SamplingFrequency.SF_48000
-                | SbcMediaCodecInformation.SamplingFrequency.SF_44100
-                | SbcMediaCodecInformation.SamplingFrequency.SF_32000
-                | SbcMediaCodecInformation.SamplingFrequency.SF_16000,
+                sampling_frequency=supported_sampling_frequencies,
                channel_mode=SbcMediaCodecInformation.ChannelMode.MONO
                | SbcMediaCodecInformation.ChannelMode.DUAL_CHANNEL
                | SbcMediaCodecInformation.ChannelMode.STEREO
@@ -481,6 +530,25 @@ class Speaker:
            ),
        )

+    def opus_codec_capabilities(self) -> MediaCodecCapabilities:
+        supported_sampling_frequencies = OpusMediaCodecInformation.SamplingFrequency(0)
+        for sampling_frequency in self.sampling_frequencies or [48000]:
+            supported_sampling_frequencies |= (
+                OpusMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency)
+            )
+        return MediaCodecCapabilities(
+            media_type=AVDTP_AUDIO_MEDIA_TYPE,
+            media_codec_type=A2DP_NON_A2DP_CODEC_TYPE,
+            media_codec_information=OpusMediaCodecInformation(
+                frame_size=OpusMediaCodecInformation.FrameSize.FS_10MS
+                | OpusMediaCodecInformation.FrameSize.FS_20MS,
+                channel_mode=OpusMediaCodecInformation.ChannelMode.MONO
+                | OpusMediaCodecInformation.ChannelMode.STEREO
+                | OpusMediaCodecInformation.ChannelMode.DUAL_MONO,
+                sampling_frequency=supported_sampling_frequencies,
+            ),
+        )
+
    async def dispatch_to_outputs(self, function):
        for output in self.outputs:
            await function(output)
@@ -675,7 +743,26 @@ def speaker_cli(ctx, device_config):

@click.command()
@click.option(
-    '--codec', type=click.Choice(['sbc', 'aac']), default='aac', show_default=True
+    '--codec',
+    type=click.Choice(['sbc', 'aac', 'opus']),
+    default='aac',
+    show_default=True,
+)
+@click.option(
+    '--sampling-frequency',
+    metavar='SAMPLING-FREQUENCY',
+    type=int,
+    multiple=True,
+    help='Enable a sampling frequency (may be specified more than once)',
+)
+@click.option(
+    '--bitrate',
+    metavar='BITRATE',
+    type=int,
+    help='Supported bitrate (AAC only)',
+)
+@click.option(
+    '--vbr/--no-vbr', is_flag=True, default=True, help='Enable VBR (AAC only)'
 )
@click.option(
    '--discover', is_flag=True, help='Discover remote endpoints once connected'
@@ -706,7 +793,16 @@ def speaker_cli(ctx, device_config):
@click.option('--device-config', metavar='FILENAME', help='Device configuration file')
@click.argument('transport')
 def speaker(
-    transport, codec, connect_address, discover, output, ui_port, device_config
+    transport,
+    codec,
+    sampling_frequency,
+    bitrate,
+    vbr,
+    connect_address,
+    discover,
+    output,
+    ui_port,
+    device_config,
 ):
    """Run the speaker."""

@@ -721,15 +817,27 @@ def speaker(
            output = list(filter(lambda x: x != '@ffplay', output))

    asyncio.run(
-        Speaker(device_config, transport, codec, discover, output, ui_port).run(
-            connect_address
-        )
+        Speaker(
+            device_config,
+            transport,
+            codec,
+            sampling_frequency,
+            bitrate,
+            vbr,
+            discover,
+            output,
+            ui_port,
+        ).run(connect_address)
    )


 # -----------------------------------------------------------------------------
 def main():
-    logging.basicConfig(level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper())
+    logging.basicConfig(
+        level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper(),
+        format="[%(asctime)s.%(msecs)03d] %(levelname)s:%(name)s:%(message)s",
+        datefmt="%H:%M:%S",
+    )
    speaker()


@@ -479,6 +479,14 @@ class OpusMediaCodecInformation(VendorSpecificMediaCodecInformation):
    class SamplingFrequency(enum.IntFlag):
        SF_48000 = 1 << 0

+        @classmethod
+        def from_int(
+            cls, sampling_frequency: int
+        ) -> OpusMediaCodecInformation.SamplingFrequency:
+            if sampling_frequency != 48000:
+                raise ValueError("no such sampling frequency")
+            return cls.SF_48000
+
    VENDOR_ID: ClassVar[int] = 0x000000E0
    CODEC_ID: ClassVar[int] = 0x0001