use_bumble (#1)

Adapt the project to use the bumble auracaster Reviewed-on: https://gitea.pstruebi.xyz/auracaster/multilang-translator-local/pulls/1
2025-02-25 13:32:37 +01:00
parent 56b942ce39
commit a9acfd2d2c
15 changed files with 219 additions and 275 deletions
--- a/multilang_translator/text_to_speech/resample.py
+++ b/multilang_translator/text_to_speech/resample.py
@@ -1,18 +0,0 @@
-# resample .wave from 22.05 to 24kHz sampling rate
-
-import librosa
-import soundfile as sf
-
-
-def resample(filename, out_filename, target_rate=int(24e3)):
-    # Load the original audio file
-    audio, rate = librosa.load(filename)
-
-    # Convert the sample rate to 24 kHz
-    resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate)
-
-    # Save the resampled audio as a new .wav file
-    sf.write(out_filename, resampled_audio, target_rate)
-
-if __name__ == "__main__":
-    resample('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav')
--- a/multilang_translator/text_to_speech/text_to_speech.py
+++ b/multilang_translator/text_to_speech/text_to_speech.py
@@ -1,8 +1,9 @@
 import os
 import subprocess
 import time
+import json
 import logging as log
-import wave
+from multilang_translator import config

 TTS_DIR = os.path.join(os.path.dirname(__file__))

@@ -11,16 +12,31 @@ def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"):
    pwd = os.getcwd()
    os.chdir(TTS_DIR)
    start = time.time()
-    ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True)
+    
+    ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent instance of the model 
+        [config.PIPER_EXE_PATH, '--model', model, '--output_file', output_file], 
+        input=text.encode('utf-8'),
+        capture_output=True
+        )
+    log.info('%s', ret.stdout)
+    log.info('%s', ret.stderr)

-    with wave.open(output_file, "rb") as wf:
-        frames = wf.getnframes()
-        rate = wf.getframerate()
+    assert ret.returncode == 0, 'Piper returncode was not 0.'

-        length_in_seconds = round(frames / rate, 1)
-        log.info(f"Audio length: {length_in_seconds} s")
+    log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
+
+    with open (f'{model}.onnx.json') as f: # TODO: wrap everything into a class, store the json permanentl
+        model_json = json.load(f)

    os.chdir(pwd)
-    log.info("Running piper took %s s", round(time.time() - start, 3))
+    return model_json

-    return length_in_seconds
+if __name__ == '__main__':
+    import logging
+
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(module)s.py:%(lineno)d %(levelname)s: %(message)s'
+    )
+
+    synthesize('Hello World')