# Copyright 2023 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ----------------------------------------------------------------------------- # Imports # ----------------------------------------------------------------------------- from __future__ import annotations from dataclasses import dataclass # ----------------------------------------------------------------------------- class BitReader: """Simple but not optimized bit stream reader.""" data: bytes bytes_position: int bit_position: int cache: int bits_cached: int def __init__(self, data: bytes): self.data = data self.byte_position = 0 self.bit_position = 0 self.cache = 0 self.bits_cached = 0 def read(self, bits: int) -> int: """ "Read up to 32 bits.""" if bits > 32: raise ValueError('maximum read size is 32') if self.bits_cached >= bits: # We have enough bits. self.bits_cached -= bits self.bit_position += bits return (self.cache >> self.bits_cached) & ((1 << bits) - 1) # Read more cache, up to 32 bits feed_bytes = self.data[self.byte_position : self.byte_position + 4] feed_size = len(feed_bytes) feed_int = int.from_bytes(feed_bytes, byteorder='big') if 8 * feed_size + self.bits_cached < bits: raise ValueError('trying to read past the data') self.byte_position += feed_size # Combine the new cache and the old cache cache = self.cache & ((1 << self.bits_cached) - 1) new_bits = bits - self.bits_cached self.bits_cached = 8 * feed_size - new_bits result = (feed_int >> self.bits_cached) | (cache << new_bits) self.cache = feed_int self.bit_position += bits return result def read_bytes(self, count: int): if self.bit_position + 8 * count > 8 * len(self.data): raise ValueError('not enough data') if self.bit_position % 8: # Not byte aligned result = bytearray(count) for i in range(count): result[i] = self.read(8) return bytes(result) # Byte aligned self.byte_position = self.bit_position // 8 self.bits_cached = 0 self.cache = 0 offset = self.bit_position // 8 self.bit_position += 8 * count return self.data[offset : offset + count] def bits_left(self) -> int: return (8 * len(self.data)) - self.bit_position def skip(self, bits: int) -> None: # Slow, but simple... while bits: if bits > 32: self.read(32) bits -= 32 else: self.read(bits) break # ----------------------------------------------------------------------------- class AacAudioRtpPacket: """AAC payload encapsulated in an RTP packet payload""" @staticmethod def latm_value(reader: BitReader) -> int: bytes_for_value = reader.read(2) value = 0 for _ in range(bytes_for_value + 1): value = value * 256 + reader.read(8) return value @staticmethod def program_config_element(reader: BitReader): raise ValueError('program_config_element not supported') @dataclass class GASpecificConfig: def __init__( self, reader: BitReader, channel_configuration: int, audio_object_type: int ) -> None: # GASpecificConfig - ISO/EIC 14496-3 Table 4.1 frame_length_flag = reader.read(1) depends_on_core_coder = reader.read(1) if depends_on_core_coder: self.core_coder_delay = reader.read(14) extension_flag = reader.read(1) if not channel_configuration: AacAudioRtpPacket.program_config_element(reader) if audio_object_type in (6, 20): self.layer_nr = reader.read(3) if extension_flag: if audio_object_type == 22: num_of_sub_frame = reader.read(5) layer_length = reader.read(11) if audio_object_type in (17, 19, 20, 23): aac_section_data_resilience_flags = reader.read(1) aac_scale_factor_data_resilience_flags = reader.read(1) aac_spectral_data_resilience_flags = reader.read(1) extension_flag_3 = reader.read(1) if extension_flag_3 == 1: raise ValueError('extensionFlag3 == 1 not supported') @staticmethod def audio_object_type(reader: BitReader): # GetAudioObjectType - ISO/EIC 14496-3 Table 1.16 audio_object_type = reader.read(5) if audio_object_type == 31: audio_object_type = 32 + reader.read(6) return audio_object_type @dataclass class AudioSpecificConfig: audio_object_type: int sampling_frequency_index: int sampling_frequency: int channel_configuration: int sbr_present_flag: int ps_present_flag: int extension_audio_object_type: int extension_sampling_frequency_index: int extension_sampling_frequency: int extension_channel_configuration: int SAMPLING_FREQUENCIES = [ 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350, ] def __init__(self, reader: BitReader) -> None: # AudioSpecificConfig - ISO/EIC 14496-3 Table 1.15 self.audio_object_type = AacAudioRtpPacket.audio_object_type(reader) self.sampling_frequency_index = reader.read(4) if self.sampling_frequency_index == 0xF: self.sampling_frequency = reader.read(24) else: self.sampling_frequency = self.SAMPLING_FREQUENCIES[ self.sampling_frequency_index ] self.channel_configuration = reader.read(4) self.sbr_present_flag = -1 self.ps_present_flag = -1 if self.audio_object_type in (5, 29): self.extension_audio_object_type = 5 self.sbc_present_flag = 1 if self.audio_object_type == 29: self.ps_present_flag = 1 self.extension_sampling_frequency_index = reader.read(4) if self.extension_sampling_frequency_index == 0xF: self.extension_sampling_frequency = reader.read(24) else: self.extension_sampling_frequency = self.SAMPLING_FREQUENCIES[ self.extension_sampling_frequency_index ] self.audio_object_type = AacAudioRtpPacket.audio_object_type(reader) if self.audio_object_type == 22: self.extension_channel_configuration = reader.read(4) else: self.extension_audio_object_type = 0 if self.audio_object_type in (1, 2, 3, 4, 6, 7, 17, 19, 20, 21, 22, 23): ga_specific_config = AacAudioRtpPacket.GASpecificConfig( reader, self.channel_configuration, self.audio_object_type ) else: raise ValueError( f'audioObjectType {self.audio_object_type} not supported' ) # if self.extension_audio_object_type != 5 and bits_to_decode >= 16: # sync_extension_type = reader.read(11) # if sync_extension_type == 0x2B7: # self.extension_audio_object_type = AacAudioRtpPacket.audio_object_type(reader) # if self.extension_audio_object_type == 5: # self.sbr_present_flag = reader.read(1) # if self.sbr_present_flag: # self.extension_sampling_frequency_index = reader.read(4) # if self.extension_sampling_frequency_index == 0xF: # self.extension_sampling_frequency = reader.read(24) # else: # self.extension_sampling_frequency = self.SAMPLING_FREQUENCIES[self.extension_sampling_frequency_index] # if bits_to_decode >= 12: # sync_extension_type = reader.read(11) # if sync_extension_type == 0x548: # self.ps_present_flag = reader.read(1) # elif self.extension_audio_object_type == 22: # self.sbr_present_flag = reader.read(1) # if self.sbr_present_flag: # self.extension_sampling_frequency_index = reader.read(4) # if self.extension_sampling_frequency_index == 0xF: # self.extension_sampling_frequency = reader.read(24) # else: # self.extension_sampling_frequency = self.SAMPLING_FREQUENCIES[self.extension_sampling_frequency_index] # self.extension_channel_configuration = reader.read(4) @dataclass class StreamMuxConfig: other_data_present: int other_data_len_bits: int audio_specific_config: AacAudioRtpPacket.AudioSpecificConfig def __init__(self, reader: BitReader) -> None: # StreamMuxConfig - ISO/EIC 14496-3 Table 1.42 audio_mux_version = reader.read(1) if audio_mux_version == 1: audio_mux_version_a = reader.read(1) else: audio_mux_version_a = 0 if audio_mux_version_a != 0: raise ValueError('audioMuxVersionA != 0 not supported') if audio_mux_version == 1: tara_buffer_fullness = AacAudioRtpPacket.latm_value(reader) stream_cnt = 0 all_streams_same_time_framing = reader.read(1) num_sub_frames = reader.read(6) num_program = reader.read(4) if num_program != 0: raise ValueError('num_program != 0 not supported') num_layer = reader.read(3) if num_layer != 0: raise ValueError('num_layer != 0 not supported') if audio_mux_version == 0: self.audio_specific_config = AacAudioRtpPacket.AudioSpecificConfig( reader ) else: asc_len = AacAudioRtpPacket.latm_value(reader) marker = reader.bit_position self.audio_specific_config = AacAudioRtpPacket.AudioSpecificConfig( reader ) audio_specific_config_len = reader.bit_position - marker if asc_len < audio_specific_config_len: raise ValueError('audio_specific_config_len > asc_len') asc_len -= audio_specific_config_len reader.skip(asc_len) frame_length_type = reader.read(3) if frame_length_type == 0: latm_buffer_fullness = reader.read(8) elif frame_length_type == 1: frame_length = reader.read(9) else: raise ValueError(f'frame_length_type {frame_length_type} not supported') self.other_data_present = reader.read(1) if self.other_data_present: if audio_mux_version == 1: self.other_data_len_bits = AacAudioRtpPacket.latm_value(reader) else: self.other_data_len_bits = 0 while True: self.other_data_len_bits *= 256 other_data_len_esc = reader.read(1) self.other_data_len_bits += reader.read(8) if other_data_len_esc == 0: break crc_check_present = reader.read(1) if crc_check_present: crc_checksum = reader.read(8) @dataclass class AudioMuxElement: payload: bytes stream_mux_config: AacAudioRtpPacket.StreamMuxConfig def __init__(self, reader: BitReader, mux_config_present: int): if mux_config_present == 0: raise ValueError('muxConfigPresent == 0 not supported') # AudioMuxElement - ISO/EIC 14496-3 Table 1.41 use_same_stream_mux = reader.read(1) if use_same_stream_mux: raise ValueError('useSameStreamMux == 1 not supported') self.stream_mux_config = AacAudioRtpPacket.StreamMuxConfig(reader) # We only support: # allStreamsSameTimeFraming == 1 # audioMuxVersionA == 0, # numProgram == 0 # numSubFrames == 0 # numLayer == 0 mux_slot_length_bytes = 0 while True: tmp = reader.read(8) mux_slot_length_bytes += tmp if tmp != 255: break self.payload = reader.read_bytes(mux_slot_length_bytes) if self.stream_mux_config.other_data_present: reader.skip(self.stream_mux_config.other_data_len_bits) # ByteAlign while reader.bit_position % 8: reader.read(1) def __init__(self, data: bytes) -> None: # Parse the bit stream reader = BitReader(data) self.audio_mux_element = self.AudioMuxElement(reader, mux_config_present=1) def to_adts(self): # pylint: disable=line-too-long sampling_frequency_index = ( self.audio_mux_element.stream_mux_config.audio_specific_config.sampling_frequency_index ) channel_configuration = ( self.audio_mux_element.stream_mux_config.audio_specific_config.channel_configuration ) frame_size = len(self.audio_mux_element.payload) return ( bytes( [ 0xFF, 0xF1, # 0xF9 (MPEG2) 0x40 | (sampling_frequency_index << 2) | (channel_configuration >> 2), ((channel_configuration & 0x3) << 6) | ((frame_size + 7) >> 11), ((frame_size + 7) >> 3) & 0xFF, (((frame_size + 7) << 5) & 0xFF) | 0x1F, 0xFC, ] ) + self.audio_mux_element.payload )