diff --git a/.gitignore b/.gitignore index 7ff4836..f2fb077 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc *.wav +*.lc3 *.onnx *.onnx.json diff --git a/encode/encode_lc3.py b/encode/encode_lc3.py new file mode 100644 index 0000000..66fd8b8 --- /dev/null +++ b/encode/encode_lc3.py @@ -0,0 +1,18 @@ +import subprocess + + +def encode_lc3(file): + + file = file.replace('.wav', '') + ret = subprocess.run(['elc3', '-b', '48000', f'{file}.wav', f'{file}.lc3']) + + return ret.returncode, ret.stdout, ret.stderr + +if __name__ == '__main__': + import os + os.chdir(os.path.dirname(__file__)) + r, stdout, stderr = encode_lc3('welcome_resampled.wav') + + print(r) + print(stdout) + print(stderr) \ No newline at end of file diff --git a/llm_translator.py b/llm_translator.py index 89b94a3..502e1df 100644 --- a/llm_translator.py +++ b/llm_translator.py @@ -21,13 +21,15 @@ def translate(model, query): def translator_de_en(query): MODEL = 'llama3.2:3b-instruct-q4_0' #MODEL = 'llama3.1:8b-instruct-q4_0' - return translate(MODEL, syspromts.TRANSLATOR_DE_EN + query) def translator_de_fr(query): - MODEL = 'llama3.1:8b-instruct-q4_0' + MODEL = 'llama3.2:3b-instruct-q4_0' return translate(MODEL, syspromts.TRANSLATOR_DE_FR + query) +def translator_de_es(query): + MODEL = 'llama3.2:3b-instruct-q4_0' + return translate(MODEL, syspromts.TRANSLATOR_DE_ES + query) if __name__ == "__main__": response = translator_de_en(test_content.TESTSENTENCE_DE_BROKER) diff --git a/readme.md b/readme.md index 7bcdce8..5f7db86 100644 --- a/readme.md +++ b/readme.md @@ -1,10 +1,8 @@ # Prerequisites - sudo apt install liblc3-tools use python3.9 pip install piper-tts soundfile librosa # Piper update voices - piper --update-voices -m en_US-lessac-medium \ No newline at end of file diff --git a/text_to_speech/piper_welcome.sh b/text_to_speech/piper_welcome.sh index 14045d4..f5f0629 100644 --- a/text_to_speech/piper_welcome.sh +++ b/text_to_speech/piper_welcome.sh @@ -6,8 +6,5 @@ cd $SCRIPT_DIR echo 'Welcome to the world of speech synthesis!' | piper \ --model en_US-lessac-medium \ --output_file $SCRIPT_DIR/welcome.wav \ - #--download-dir $SCRIPT_DIR/models \ - #--data-dir $SCRIPT_DIR/models - cd $START_DIR \ No newline at end of file diff --git a/text_to_speech/text_to_speech.py b/text_to_speech/text_to_speech.py index e69de29..62026fb 100644 --- a/text_to_speech/text_to_speech.py +++ b/text_to_speech/text_to_speech.py @@ -0,0 +1,20 @@ +import os +import subprocess + +TTS_DIR = os.path.join(os.path.dirname(__file__)) + +def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"): + + pwd = os.getcwd() + os.chdir(TTS_DIR) + ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True) + os.chdir(pwd) + + return ret.returncode, ret.stdout, ret.stderr + + +if __name__ == "__main__": + r, stout, sterr = synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav") + print(r) + print(stout) + print(sterr) \ No newline at end of file diff --git a/translator/syspromts.py b/translator/syspromts.py index f266745..3b2397b 100644 --- a/translator/syspromts.py +++ b/translator/syspromts.py @@ -1,2 +1,3 @@ TRANSLATOR_DE_EN = 'You are a translator. Translate the following sentence from German to English. Only respond with the translated sentence:\n' TRANSLATOR_DE_FR = 'Vous êtes un traducteur. Traduisez la phrase suivante de l\'allemand vers le français. Répondez uniquement par la traduction :\n' +TRANSLATOR_DE_ES = 'Estás un traductor. Traduce la siguiente frase del alemán al español. Responda solo con la traducción:\n' \ No newline at end of file