auracast-translator/src/voice_client/tts_client.py

import requests
import numpy as np
import soundfile as sf

from voice_models.request_models import SynthesizeRequest


API_URL = "http://127.0.0.1:8099/synthesize/"

def request_synthesis(request_data: SynthesizeRequest):
    response = requests.post(API_URL, json=request_data.model_dump())

    if response.status_code == 200:
        response_data = response.json()

        if request_data.return_lc3:
            # Save LC3 audio as binary file
            lc3_bytes = bytes.fromhex(response_data["audio_lc3"])
            return lc3_bytes

        else:
            # Convert hex-encoded PCM bytes back to numpy array and save as WAV
            audio_bytes = bytes.fromhex(response_data["audio_pcm"])
            audio_array = np.frombuffer(audio_bytes, dtype=np.float32)
            return audio_array

    else:
        print(f"Error: {response.status_code}, {response.text}")

if __name__ == "__main__":

    target_rate=16000

    # Example request
    request_data = SynthesizeRequest(
        text="Hello, this is a test.",
        target_sample_rate=target_rate,
        framework="piper",
        model="de_DE-kerstin-low",
        return_lc3=False  # Set to True to receive LC3 compressed output
    )

    audio = request_synthesis(request_data)
    sf.write('hello.wav', audio, target_rate)