diff --git a/apps/speaker/speaker.html b/apps/speaker/speaker.html index 550049bc..3b807e1a 100644 --- a/apps/speaker/speaker.html +++ b/apps/speaker/speaker.html @@ -15,6 +15,7 @@ Codec Packets Bytes + Bitrate diff --git a/apps/speaker/speaker.js b/apps/speaker/speaker.js index 77cb1ff3..9ab04576 100644 --- a/apps/speaker/speaker.js +++ b/apps/speaker/speaker.js @@ -7,17 +7,19 @@ let connectionText; let codecText; let packetsReceivedText; let bytesReceivedText; +let bitrateText; let streamStateText; let connectionStateText; let controlsDiv; let audioOnButton; -let mediaSource; -let sourceBuffer; -let audioElement; +let audioDecoder; +let audioCodec; let audioContext; let audioAnalyzer; let audioFrequencyBinCount; let audioFrequencyData; +let nextAudioStartPosition = 0; +let audioStartTime = 0; let packetsReceived = 0; let bytesReceived = 0; let audioState = "stopped"; @@ -29,20 +31,17 @@ let bandwidthCanvas; let bandwidthCanvasContext; let bandwidthBinCount; let bandwidthBins = []; +let bitrateSamples = []; const FFT_WIDTH = 800; const FFT_HEIGHT = 256; const BANDWIDTH_WIDTH = 500; const BANDWIDTH_HEIGHT = 100; - -function hexToBytes(hex) { - return Uint8Array.from(hex.match(/.{1,2}/g).map((byte) => parseInt(byte, 16))); -} +const BITRATE_WINDOW = 30; function init() { initUI(); - initMediaSource(); - initAudioElement(); + initAudioContext(); initAnalyzer(); connect(); @@ -56,6 +55,7 @@ function initUI() { codecText = document.getElementById("codecText"); packetsReceivedText = document.getElementById("packetsReceivedText"); bytesReceivedText = document.getElementById("bytesReceivedText"); + bitrateText = document.getElementById("bitrate"); streamStateText = document.getElementById("streamStateText"); connectionStateText = document.getElementById("connectionStateText"); audioSupportMessageText = document.getElementById("audioSupportMessageText"); @@ -67,17 +67,9 @@ function initUI() { requestAnimationFrame(onAnimationFrame); } -function initMediaSource() { - mediaSource = new MediaSource(); - mediaSource.onsourceopen = onMediaSourceOpen; - mediaSource.onsourceclose = onMediaSourceClose; - mediaSource.onsourceended = onMediaSourceEnd; -} - -function initAudioElement() { - audioElement = document.getElementById("audio"); - audioElement.src = URL.createObjectURL(mediaSource); - // audioElement.controls = true; +function initAudioContext() { + audioContext = new AudioContext(); + audioContext.onstatechange = () => console.log("AudioContext state:", audioContext.state); } function initAnalyzer() { @@ -94,24 +86,16 @@ function initAnalyzer() { bandwidthCanvasContext = bandwidthCanvas.getContext('2d'); bandwidthCanvasContext.fillStyle = "rgb(255, 255, 255)"; bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT); -} - -function startAnalyzer() { - // FFT - if (audioElement.captureStream !== undefined) { - audioContext = new AudioContext(); - audioAnalyzer = audioContext.createAnalyser(); - audioAnalyzer.fftSize = 128; - audioFrequencyBinCount = audioAnalyzer.frequencyBinCount; - audioFrequencyData = new Uint8Array(audioFrequencyBinCount); - const stream = audioElement.captureStream(); - const source = audioContext.createMediaStreamSource(stream); - source.connect(audioAnalyzer); - } - - // Bandwidth bandwidthBinCount = BANDWIDTH_WIDTH / 2; bandwidthBins = []; + bitrateSamples = []; + + audioAnalyzer = audioContext.createAnalyser(); + audioAnalyzer.fftSize = 128; + audioFrequencyBinCount = audioAnalyzer.frequencyBinCount; + audioFrequencyData = new Uint8Array(audioFrequencyBinCount); + + audioAnalyzer.connect(audioContext.destination) } function setConnectionText(message) { @@ -148,7 +132,8 @@ function onAnimationFrame() { bandwidthCanvasContext.fillRect(0, 0, BANDWIDTH_WIDTH, BANDWIDTH_HEIGHT); bandwidthCanvasContext.fillStyle = `rgb(100, 100, 100)`; for (let t = 0; t < bandwidthBins.length; t++) { - const lineHeight = (bandwidthBins[t] / 1000) * BANDWIDTH_HEIGHT; + const bytesReceived = bandwidthBins[t] + const lineHeight = (bytesReceived / 1000) * BANDWIDTH_HEIGHT; bandwidthCanvasContext.fillRect(t * 2, BANDWIDTH_HEIGHT - lineHeight, 2, lineHeight); } @@ -156,28 +141,14 @@ function onAnimationFrame() { requestAnimationFrame(onAnimationFrame); } -function onMediaSourceOpen() { - console.log(this.readyState); - sourceBuffer = mediaSource.addSourceBuffer("audio/aac"); -} - -function onMediaSourceClose() { - console.log(this.readyState); -} - -function onMediaSourceEnd() { - console.log(this.readyState); -} - async function startAudio() { try { console.log("starting audio..."); audioOnButton.disabled = true; audioState = "starting"; - await audioElement.play(); + audioContext.resume(); console.log("audio started"); audioState = "playing"; - startAnalyzer(); } catch(error) { console.error(`play failed: ${error}`); audioState = "stopped"; @@ -185,12 +156,47 @@ async function startAudio() { } } -function onAudioPacket(packet) { - if (audioState != "stopped") { - // Queue the audio packet. - sourceBuffer.appendBuffer(packet); +function onDecodedAudio(audioData) { + const bufferSource = audioContext.createBufferSource() + + const now = audioContext.currentTime; + let nextAudioStartTime = audioStartTime + (nextAudioStartPosition / audioData.sampleRate); + if (nextAudioStartTime < now) { + console.log("starting new audio time base") + audioStartTime = now; + nextAudioStartTime = now; + nextAudioStartPosition = 0; + } else { + console.log(`audio buffer scheduled in ${nextAudioStartTime - now}`) } + const audioBuffer = audioContext.createBuffer( + audioData.numberOfChannels, + audioData.numberOfFrames, + audioData.sampleRate + ); + + for (let channel = 0; channel < audioData.numberOfChannels; channel++) { + audioData.copyTo( + audioBuffer.getChannelData(channel), + { + planeIndex: channel, + format: "f32-planar" + } + ) + } + + bufferSource.buffer = audioBuffer; + bufferSource.connect(audioAnalyzer) + bufferSource.start(nextAudioStartTime); + nextAudioStartPosition += audioData.numberOfFrames; +} + +function onCodecError(error) { + console.log("Codec error:", error) +} + +async function onAudioPacket(packet) { packetsReceived += 1; packetsReceivedText.innerText = packetsReceived; bytesReceived += packet.byteLength; @@ -200,6 +206,48 @@ function onAudioPacket(packet) { if (bandwidthBins.length > bandwidthBinCount) { bandwidthBins.shift(); } + bitrateSamples[bitrateSamples.length] = {ts: Date.now(), bytes: packet.byteLength} + if (bitrateSamples.length > BITRATE_WINDOW) { + bitrateSamples.shift(); + } + if (bitrateSamples.length >= 2) { + const windowBytes = bitrateSamples.reduce((accumulator, x) => accumulator + x.bytes, 0) - bitrateSamples[0].bytes; + const elapsed = bitrateSamples[bitrateSamples.length-1].ts - bitrateSamples[0].ts; + const bitrate = Math.floor(8 * windowBytes / elapsed) + bitrateText.innerText = `${bitrate} kb/s` + } + + if (audioState == "stopped") { + return; + } + + if (audioDecoder === undefined) { + let audioConfig; + if (audioCodec == 'aac') { + audioConfig = { + codec: 'mp4a.40.2', + sampleRate: 44100, // ignored + numberOfChannels: 2, // ignored + } + } else if (audioCodec == 'opus') { + audioConfig = { + codec: 'opus', + sampleRate: 48000, // ignored + numberOfChannels: 2, // ignored + } + } + audioDecoder = new AudioDecoder({ output: onDecodedAudio, error: onCodecError }); + audioDecoder.configure(audioConfig) + } + + const encodedAudio = new EncodedAudioChunk({ + type: "key", + data: packet, + timestamp: 0, + transfer: [packet], + }); + + audioDecoder.decode(encodedAudio); } function onChannelOpen() { @@ -249,16 +297,19 @@ function onChannelMessage(message) { } } -function onHelloMessage(params) { +async function onHelloMessage(params) { codecText.innerText = params.codec; - if (params.codec != "aac") { - audioOnButton.disabled = true; - audioSupportMessageText.innerText = "Only AAC can be played, audio will be disabled"; - audioSupportMessageText.style.display = "inline-block"; - } else { + + if (params.codec == "aac" || params.codec == "opus") { + audioCodec = params.codec audioSupportMessageText.innerText = ""; audioSupportMessageText.style.display = "none"; + } else { + audioOnButton.disabled = true; + audioSupportMessageText.innerText = "Only AAC and Opus can be played, audio will be disabled"; + audioSupportMessageText.style.display = "inline-block"; } + if (params.streamState) { setStreamState(params.streamState); } diff --git a/apps/speaker/speaker.py b/apps/speaker/speaker.py index 92f55cbc..2f0105ce 100644 --- a/apps/speaker/speaker.py +++ b/apps/speaker/speaker.py @@ -50,8 +50,10 @@ from bumble.a2dp import ( make_audio_sink_service_sdp_records, A2DP_SBC_CODEC_TYPE, A2DP_MPEG_2_4_AAC_CODEC_TYPE, + A2DP_NON_A2DP_CODEC_TYPE, SbcMediaCodecInformation, AacMediaCodecInformation, + OpusMediaCodecInformation, ) from bumble.utils import AsyncRunner from bumble.codecs import AacAudioRtpPacket @@ -78,6 +80,8 @@ class AudioExtractor: return AacAudioExtractor() if codec == 'sbc': return SbcAudioExtractor() + if codec == 'opus': + return OpusAudioExtractor() def extract_audio(self, packet: MediaPacket) -> bytes: raise NotImplementedError() @@ -102,6 +106,13 @@ class SbcAudioExtractor: return packet.payload[1:] +# ----------------------------------------------------------------------------- +class OpusAudioExtractor: + def extract_audio(self, packet: MediaPacket) -> bytes: + # TODO: parse fields + return packet.payload[1:] + + # ----------------------------------------------------------------------------- class Output: async def start(self) -> None: @@ -235,7 +246,7 @@ class FfplayOutput(QueuedOutput): await super().start() self.subprocess = await asyncio.create_subprocess_shell( - f'ffplay -f {self.codec} pipe:0', + f'ffplay -probesize 32 -f {self.codec} pipe:0', stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, @@ -399,10 +410,24 @@ class Speaker: STARTED = 2 SUSPENDED = 3 - def __init__(self, device_config, transport, codec, discover, outputs, ui_port): + def __init__( + self, + device_config, + transport, + codec, + sampling_frequencies, + bitrate, + vbr, + discover, + outputs, + ui_port, + ): self.device_config = device_config self.transport = transport self.codec = codec + self.sampling_frequencies = sampling_frequencies + self.bitrate = bitrate + self.vbr = vbr self.discover = discover self.ui_port = ui_port self.device = None @@ -438,32 +463,56 @@ class Speaker: if self.codec == 'sbc': return self.sbc_codec_capabilities() + if self.codec == 'opus': + return self.opus_codec_capabilities() + raise RuntimeError('unsupported codec') def aac_codec_capabilities(self) -> MediaCodecCapabilities: + supported_sampling_frequencies = AacMediaCodecInformation.SamplingFrequency(0) + for sampling_frequency in self.sampling_frequencies or [ + 8000, + 11025, + 12000, + 16000, + 22050, + 24000, + 32000, + 44100, + 48000, + ]: + supported_sampling_frequencies |= ( + AacMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency) + ) return MediaCodecCapabilities( media_type=AVDTP_AUDIO_MEDIA_TYPE, media_codec_type=A2DP_MPEG_2_4_AAC_CODEC_TYPE, media_codec_information=AacMediaCodecInformation( object_type=AacMediaCodecInformation.ObjectType.MPEG_2_AAC_LC, - sampling_frequency=AacMediaCodecInformation.SamplingFrequency.SF_48000 - | AacMediaCodecInformation.SamplingFrequency.SF_44100, + sampling_frequency=supported_sampling_frequencies, channels=AacMediaCodecInformation.Channels.MONO | AacMediaCodecInformation.Channels.STEREO, - vbr=1, - bitrate=256000, + vbr=1 if self.vbr else 0, + bitrate=self.bitrate or 256000, ), ) def sbc_codec_capabilities(self) -> MediaCodecCapabilities: + supported_sampling_frequencies = SbcMediaCodecInformation.SamplingFrequency(0) + for sampling_frequency in self.sampling_frequencies or [ + 16000, + 32000, + 44100, + 48000, + ]: + supported_sampling_frequencies |= ( + SbcMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency) + ) return MediaCodecCapabilities( media_type=AVDTP_AUDIO_MEDIA_TYPE, media_codec_type=A2DP_SBC_CODEC_TYPE, media_codec_information=SbcMediaCodecInformation( - sampling_frequency=SbcMediaCodecInformation.SamplingFrequency.SF_48000 - | SbcMediaCodecInformation.SamplingFrequency.SF_44100 - | SbcMediaCodecInformation.SamplingFrequency.SF_32000 - | SbcMediaCodecInformation.SamplingFrequency.SF_16000, + sampling_frequency=supported_sampling_frequencies, channel_mode=SbcMediaCodecInformation.ChannelMode.MONO | SbcMediaCodecInformation.ChannelMode.DUAL_CHANNEL | SbcMediaCodecInformation.ChannelMode.STEREO @@ -481,6 +530,25 @@ class Speaker: ), ) + def opus_codec_capabilities(self) -> MediaCodecCapabilities: + supported_sampling_frequencies = OpusMediaCodecInformation.SamplingFrequency(0) + for sampling_frequency in self.sampling_frequencies or [48000]: + supported_sampling_frequencies |= ( + OpusMediaCodecInformation.SamplingFrequency.from_int(sampling_frequency) + ) + return MediaCodecCapabilities( + media_type=AVDTP_AUDIO_MEDIA_TYPE, + media_codec_type=A2DP_NON_A2DP_CODEC_TYPE, + media_codec_information=OpusMediaCodecInformation( + frame_size=OpusMediaCodecInformation.FrameSize.FS_10MS + | OpusMediaCodecInformation.FrameSize.FS_20MS, + channel_mode=OpusMediaCodecInformation.ChannelMode.MONO + | OpusMediaCodecInformation.ChannelMode.STEREO + | OpusMediaCodecInformation.ChannelMode.DUAL_MONO, + sampling_frequency=supported_sampling_frequencies, + ), + ) + async def dispatch_to_outputs(self, function): for output in self.outputs: await function(output) @@ -675,7 +743,26 @@ def speaker_cli(ctx, device_config): @click.command() @click.option( - '--codec', type=click.Choice(['sbc', 'aac']), default='aac', show_default=True + '--codec', + type=click.Choice(['sbc', 'aac', 'opus']), + default='aac', + show_default=True, +) +@click.option( + '--sampling-frequency', + metavar='SAMPLING-FREQUENCY', + type=int, + multiple=True, + help='Enable a sampling frequency (may be specified more than once)', +) +@click.option( + '--bitrate', + metavar='BITRATE', + type=int, + help='Supported bitrate (AAC only)', +) +@click.option( + '--vbr/--no-vbr', is_flag=True, default=True, help='Enable VBR (AAC only)' ) @click.option( '--discover', is_flag=True, help='Discover remote endpoints once connected' @@ -706,7 +793,16 @@ def speaker_cli(ctx, device_config): @click.option('--device-config', metavar='FILENAME', help='Device configuration file') @click.argument('transport') def speaker( - transport, codec, connect_address, discover, output, ui_port, device_config + transport, + codec, + sampling_frequency, + bitrate, + vbr, + connect_address, + discover, + output, + ui_port, + device_config, ): """Run the speaker.""" @@ -721,15 +817,27 @@ def speaker( output = list(filter(lambda x: x != '@ffplay', output)) asyncio.run( - Speaker(device_config, transport, codec, discover, output, ui_port).run( - connect_address - ) + Speaker( + device_config, + transport, + codec, + sampling_frequency, + bitrate, + vbr, + discover, + output, + ui_port, + ).run(connect_address) ) # ----------------------------------------------------------------------------- def main(): - logging.basicConfig(level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper()) + logging.basicConfig( + level=os.environ.get('BUMBLE_LOGLEVEL', 'WARNING').upper(), + format="[%(asctime)s.%(msecs)03d] %(levelname)s:%(name)s:%(message)s", + datefmt="%H:%M:%S", + ) speaker() diff --git a/bumble/a2dp.py b/bumble/a2dp.py index 6246d672..f76ea9bf 100644 --- a/bumble/a2dp.py +++ b/bumble/a2dp.py @@ -479,6 +479,14 @@ class OpusMediaCodecInformation(VendorSpecificMediaCodecInformation): class SamplingFrequency(enum.IntFlag): SF_48000 = 1 << 0 + @classmethod + def from_int( + cls, sampling_frequency: int + ) -> OpusMediaCodecInformation.SamplingFrequency: + if sampling_frequency != 48000: + raise ValueError("no such sampling frequency") + return cls.SF_48000 + VENDOR_ID: ClassVar[int] = 0x000000E0 CODEC_ID: ClassVar[int] = 0x0001