restructure the project and start using pytest

2024-12-15 14:25:07 +01:00
parent 5d221f09b7
commit 83330e63a7
20 changed files with 160 additions and 6 deletions
--- a/multilang_translator/text_to_speech/piper_welcome.sh
+++ b/multilang_translator/text_to_speech/piper_welcome.sh
@@ -0,0 +1,10 @@
+SCRIPT_DIR=$(dirname "$(readlink -f "$BASH_SOURCE")")
+START_DIR=$(pwd)
+
+cd $SCRIPT_DIR
+
+echo 'Welcome to the world of speech synthesis!' | piper \
+  --model en_US-lessac-medium \
+  --output_file $SCRIPT_DIR/welcome.wav \
+
+cd $START_DIR
--- a/multilang_translator/text_to_speech/resample.py
+++ b/multilang_translator/text_to_speech/resample.py
@@ -0,0 +1,18 @@
+# resample .wave from 22.05 to 24kHz sampling rate
+
+import librosa
+import soundfile as sf
+
+
+def resample(filename, out_filename, target_rate=int(24e3)):
+    # Load the original audio file
+    audio, rate = librosa.load(filename)
+
+    # Convert the sample rate to 24 kHz
+    resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate)
+
+    # Save the resampled audio as a new .wav file
+    sf.write(out_filename, resampled_audio, target_rate)
+
+if __name__ == "__main__":
+    resample('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav')
--- a/multilang_translator/text_to_speech/text_to_speech.py
+++ b/multilang_translator/text_to_speech/text_to_speech.py
@@ -0,0 +1,19 @@
+import os
+import subprocess
+import time
+import logging as log
+
+TTS_DIR = os.path.join(os.path.dirname(__file__))
+
+def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"):
+
+    pwd = os.getcwd()
+    os.chdir(TTS_DIR)
+    start = time.time()
+    ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True)
+    log.info("Running piper took %s s", round(time.time() - start, 3))
+    os.chdir(pwd)
+
+
+if __name__ == "__main__":
+    synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav")
--- a/multilang_translator/text_to_speech/voices.json
+++ b/multilang_translator/text_to_speech/voices.json