Files
auracast-translator/multilang_translator/text_to_speech/encode_lc3.py

32 lines
1.0 KiB
Python

import numpy as np
import lc3
def encode(
audio: np.array,
output_sample_rate_hz,
octets_per_frame,
frame_duration_us=10000,
pcm_bit_depth = 16
):
encoder = lc3.Encoder(
frame_duration_us=frame_duration_us,
sample_rate_hz=output_sample_rate_hz,
num_channels=1,
#input_sample_rate_hz=input_sample_rate,
)
lc3_frame_samples = encoder.get_frame_samples() # number of the pcm samples per lc3 frame
# reshape array into slices of lc3_frame_samples and padd with zeros
pad_width = (lc3_frame_samples - len(audio) % lc3_frame_samples) % lc3_frame_samples # Compute padding length
arr_padded = np.pad(audio, (0, pad_width), mode='constant', constant_values=0)
reshaped_arr = arr_padded.reshape(-1, lc3_frame_samples)
lc3_bytes = b''
for pcm_frame in reshaped_arr:
lc3_bytes += encoder.encode(
pcm_frame, num_bytes=octets_per_frame, bit_depth=pcm_bit_depth
)
return lc3_bytes