Add piper hello world example with resample to 24kHz

This commit is contained in:
2024-12-14 21:56:20 +01:00
parent 74c2b8a34d
commit d43c3a3a38
6 changed files with 6390 additions and 7 deletions
+4
View File
@@ -1,3 +1,7 @@
*.pyc
*.wav
*.onnx
*.onnx.json
venv/
text_to_speech/models
+16
View File
@@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal"
}
]
}
+5 -1
View File
@@ -3,4 +3,8 @@
sudo apt install liblc3-tools
use python3.9
pip install piper-tts soundfile librosa
pip install piper-tts soundfile librosa
# Piper update voices
piper --update-voices -m en_US-lessac-medium
+8 -1
View File
@@ -1,6 +1,13 @@
SCRIPT_DIR=$(dirname "$(readlink -f "$BASH_SOURCE")")
START_DIR=$(pwd)
cd $SCRIPT_DIR
echo 'Welcome to the world of speech synthesis!' | piper \
--model en_US-lessac-medium \
--output_file $SCRIPT_DIR/welcome.wav \
--download-dir $SCRIPT_DIR/models
#--download-dir $SCRIPT_DIR/models \
#--data-dir $SCRIPT_DIR/models
cd $START_DIR
+4 -5
View File
@@ -4,16 +4,15 @@ import librosa
import soundfile as sf
def resample():
def resample(target_rate=int(24e3)):
# Load the original audio file
audio, rate = librosa.load('welcome.wav')
audio, rate = librosa.load('text_to_speech/welcome.wav')
# Convert the sample rate to 24 kHz
resampled_rate = int(rate * 24 / 22050)
resampled_audio = librosa.resample(audio, rate, resampled_rate)
resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate)
# Save the resampled audio as a new .wav file
sf.write('welcome_resampled.wav', resampled_audio, resampled_rate)
sf.write('text_to_speech/welcome_resampled.wav', resampled_audio, target_rate)
if __name__ == "__main__":
resample()
File diff suppressed because it is too large Load Diff