Files
xtts-test/hello_world.py
2025-03-24 21:05:27 +01:00

62 lines
1.6 KiB
Python

import torch
import time
from TTS.api import TTS
# Get device
if torch.cuda.is_available():
device = "cuda"
else:
print('Running on cpu')
device= "cpu"
# List available 🐸TTS models
print(TTS().list_models())
# Initialize TTS
start_init = time.time()
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
end_init = time.time()
print(f"Initialization time: {end_init - start_init:.2f} seconds")
# List speakers
print(tts.speakers)
# Run TTS
# ❗ XTTS supports both, but many models allow only one of the `speaker` and
# `speaker_wav` arguments
# TTS with list of amplitude values as output, clone the voice from `speaker_wav`
# wav = tts.tts(
# text="Hello world!",
# speaker_wav="en_sample.wav",
# language="en"
# )
# Create output directory
import os
os.makedirs("output_wav", exist_ok=True)
# Generate sample for each speaker
start_gen = time.time()
total_speakers = len(tts.speakers)
print(f"Starting generation for {total_speakers} speakers...")
for idx, speaker in enumerate(tts.speakers, 1):
# Sanitize speaker name for filename
safe_name = speaker.replace(" ", "_").replace("/", "-")
filename = f"output_wav/{safe_name}.wav"
# Generate audio
tts.tts_to_file(
text="Bitte beachten Sie: Sicherheitscheck 5 ist jetzt geöffnet. Bitte warten Sie im bereitgestellten Wartebereich, bis Sie aufgerufen werden.",
speaker=speaker,
language="de",
file_path=filename
)
# Print progress
progress = f"[{idx}/{total_speakers}] {filename}"
print(progress)
end_gen = time.time()
print(f"Generation time: {end_gen - start_gen:.2f} seconds")