xtts-test/hello_world.py

import torch
import time
from TTS.api import TTS

# Get device
if torch.cuda.is_available():
  device = "cuda"
else:
  print('Running on cpu')
  device= "cpu"

# List available 🐸TTS models
print(TTS().list_models())

# Initialize TTS
start_init = time.time()
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
end_init = time.time()
print(f"Initialization time: {end_init - start_init:.2f} seconds")

# List speakers
print(tts.speakers)

# Run TTS
# ❗ XTTS supports both, but many models allow only one of the `speaker` and
# `speaker_wav` arguments

# TTS with list of amplitude values as output, clone the voice from `speaker_wav`
# wav = tts.tts(
#   text="Hello world!",
#   speaker_wav="en_sample.wav",
#   language="en"
# )

# Create output directory
import os
os.makedirs("output_wav", exist_ok=True)

# Generate sample for each speaker
start_gen = time.time()
total_speakers = len(tts.speakers)
print(f"Starting generation for {total_speakers} speakers...")

for idx, speaker in enumerate(tts.speakers, 1):
    # Sanitize speaker name for filename
    safe_name = speaker.replace(" ", "_").replace("/", "-")
    filename = f"output_wav/{safe_name}.wav"

    # Generate audio
    tts.tts_to_file(
        text="Bitte beachten Sie: Sicherheitscheck 5 ist jetzt geöffnet. Bitte warten Sie im bereitgestellten Wartebereich, bis Sie aufgerufen werden.",
        speaker=speaker,
        language="de",
        file_path=filename
    )

    # Print progress
    progress = f"[{idx}/{total_speakers}] {filename}"
    print(progress)
end_gen = time.time()
print(f"Generation time: {end_gen - start_gen:.2f} seconds")