62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
import torch
|
|
import time
|
|
from TTS.api import TTS
|
|
|
|
# Get device
|
|
if torch.cuda.is_available():
|
|
device = "cuda"
|
|
else:
|
|
print('Running on cpu')
|
|
device= "cpu"
|
|
|
|
# List available 🐸TTS models
|
|
print(TTS().list_models())
|
|
|
|
# Initialize TTS
|
|
start_init = time.time()
|
|
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
|
|
end_init = time.time()
|
|
print(f"Initialization time: {end_init - start_init:.2f} seconds")
|
|
|
|
# List speakers
|
|
print(tts.speakers)
|
|
|
|
# Run TTS
|
|
# ❗ XTTS supports both, but many models allow only one of the `speaker` and
|
|
# `speaker_wav` arguments
|
|
|
|
# TTS with list of amplitude values as output, clone the voice from `speaker_wav`
|
|
# wav = tts.tts(
|
|
# text="Hello world!",
|
|
# speaker_wav="en_sample.wav",
|
|
# language="en"
|
|
# )
|
|
|
|
# Create output directory
|
|
import os
|
|
os.makedirs("output_wav", exist_ok=True)
|
|
|
|
# Generate sample for each speaker
|
|
start_gen = time.time()
|
|
total_speakers = len(tts.speakers)
|
|
print(f"Starting generation for {total_speakers} speakers...")
|
|
|
|
for idx, speaker in enumerate(tts.speakers, 1):
|
|
# Sanitize speaker name for filename
|
|
safe_name = speaker.replace(" ", "_").replace("/", "-")
|
|
filename = f"output_wav/{safe_name}.wav"
|
|
|
|
# Generate audio
|
|
tts.tts_to_file(
|
|
text="Bitte beachten Sie: Sicherheitscheck 5 ist jetzt geöffnet. Bitte warten Sie im bereitgestellten Wartebereich, bis Sie aufgerufen werden.",
|
|
speaker=speaker,
|
|
language="de",
|
|
file_path=filename
|
|
)
|
|
|
|
# Print progress
|
|
progress = f"[{idx}/{total_speakers}] {filename}"
|
|
print(progress)
|
|
end_gen = time.time()
|
|
print(f"Generation time: {end_gen - start_gen:.2f} seconds")
|