diff --git a/encode/encode_lc3.py b/encode/encode_lc3.py index 66fd8b8..c823437 100644 --- a/encode/encode_lc3.py +++ b/encode/encode_lc3.py @@ -4,7 +4,7 @@ import subprocess def encode_lc3(file): file = file.replace('.wav', '') - ret = subprocess.run(['elc3', '-b', '48000', f'{file}.wav', f'{file}.lc3']) + ret = subprocess.run(['elc3', '-b', '48000', f'{file}.wav', f'{file}.lc3'], check=True) return ret.returncode, ret.stdout, ret.stderr diff --git a/main.py b/main.py index e69de29..e71a29b 100644 --- a/main.py +++ b/main.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +""" +list prompt example +""" +from __future__ import print_function, unicode_literals + +from pprint import pprint + +from PyInquirer import prompt, Separator +from examples import custom_style_2 + + +import os +from text_to_speech import text_to_speech, resample +from translator import llm_translator +from encode import encode_lc3 + +ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') + +def synthesize_resample_encode(text, tts_model, output_file): + text_to_speech.synthesize(text, tts_model, output_file) + resample.resample(output_file, output_file) + encode_lc3.encode_lc3(output_file) + + +def announcement_from_german_text(test_de): + synthesize_resample_encode(test_de, 'de_DE-kerstin-low', f'{ANNOUNCEMENT_DIR}/announcement_de.wav') + + text_en = llm_translator.translator_de_en(test_de) + synthesize_resample_encode(text_en, 'en_US-lessac-medium', f'{ANNOUNCEMENT_DIR}/announcement_en.wav') + + text_fr = llm_translator.translator_de_fr(test_de) + synthesize_resample_encode(text_fr, 'fr_FR-siwis-medium', f'{ANNOUNCEMENT_DIR}/announcement_fr.wav') + + text_fr = llm_translator.translator_de_es(test_de) + synthesize_resample_encode(text_fr, 'es_ES-sharvard-medium', f'{ANNOUNCEMENT_DIR}/announcement_fr.wav') + +# questions = [ +# { +# 'type': 'list', +# 'name': 'theme', +# 'message': 'What type of annoucement would you like to make?', +# 'choices': [ +# 'predefined', +# 'custom', +# 'audio' +# ] +# }, +# ] + +# answers = prompt(questions, style=custom_style_2) +# pprint(answers) + +if __name__ == '__main__': + import time + from test_content import test_content + import logging as log + log.basicConfig(level=log.INFO) + + start= time.time() + announcement_from_german_text(test_content.TESTSENTENCE_DE_RAINBOW) + print("Generating the announcement took", time.time() - start) \ No newline at end of file diff --git a/readme.md b/readme.md index 5f7db86..c79142c 100644 --- a/readme.md +++ b/readme.md @@ -5,4 +5,7 @@ use python3.9 pip install piper-tts soundfile librosa # Piper update voices -piper --update-voices -m en_US-lessac-medium \ No newline at end of file +piper --update-voices -m en_US-lessac-medium + +# TODO: +- investigate using a pipeline instead of writing to intermediate files to gain performance \ No newline at end of file diff --git a/translator/test_content.py b/test_content/test_content.py similarity index 56% rename from translator/test_content.py rename to test_content/test_content.py index d9ae92f..87f3b2c 100644 --- a/translator/test_content.py +++ b/test_content/test_content.py @@ -1 +1,2 @@ -TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.' \ No newline at end of file +TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.' +TESTSENTENCE_DE_RAINBOW = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder wolke wahrgenommen wird. ' \ No newline at end of file diff --git a/text_to_speech/resample.py b/text_to_speech/resample.py index a9bbc95..384d737 100644 --- a/text_to_speech/resample.py +++ b/text_to_speech/resample.py @@ -4,15 +4,15 @@ import librosa import soundfile as sf -def resample(target_rate=int(24e3)): +def resample(filename, out_filename, target_rate=int(24e3)): # Load the original audio file - audio, rate = librosa.load('text_to_speech/welcome.wav') + audio, rate = librosa.load(filename) # Convert the sample rate to 24 kHz resampled_audio = librosa.resample(audio, orig_sr=rate, target_sr=target_rate) # Save the resampled audio as a new .wav file - sf.write('text_to_speech/welcome_resampled.wav', resampled_audio, target_rate) + sf.write(out_filename, resampled_audio, target_rate) if __name__ == "__main__": - resample() \ No newline at end of file + resample('text_to_speech/welcome.wav', 'text_to_speech/welcome_resampled.wav') diff --git a/text_to_speech/text_to_speech.py b/text_to_speech/text_to_speech.py index 62026fb..c0a0710 100644 --- a/text_to_speech/text_to_speech.py +++ b/text_to_speech/text_to_speech.py @@ -1,5 +1,7 @@ import os import subprocess +import time +import logging as log TTS_DIR = os.path.join(os.path.dirname(__file__)) @@ -7,14 +9,11 @@ def synthesize(text, model="en_US-lessac-medium", output_file="out.wav"): pwd = os.getcwd() os.chdir(TTS_DIR) + start = time.time() ret = subprocess.run(['piper', '--model', model, '--output_file', output_file], input=text.encode('utf-8'), check=True) + log.info("Running piper took %s s", round(time.time() - start, 3)) os.chdir(pwd) - return ret.returncode, ret.stdout, ret.stderr - if __name__ == "__main__": - r, stout, sterr = synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav") - print(r) - print(stout) - print(sterr) \ No newline at end of file + synthesize("Hello, how are you?", "en_US-lessac-medium", "hello.wav") diff --git a/translator/__init__.py b/translator/__init__.py new file mode 100644 index 0000000..5930689 --- /dev/null +++ b/translator/__init__.py @@ -0,0 +1,2 @@ +from .credentials import * +from .syspromts import * \ No newline at end of file diff --git a/translator/llm_translator.py b/translator/llm_translator.py index 4b6cf96..f400295 100644 --- a/translator/llm_translator.py +++ b/translator/llm_translator.py @@ -1,8 +1,8 @@ import requests import json -import credentials -import syspromts +from . import credentials +from . import syspromts def translate(model, query): url = f'{credentials.BASE_URL}/api/chat/completions' @@ -20,31 +20,29 @@ def translate(model, query): def translator_de_en(query): MODEL = 'llama3.2:3b-instruct-q4_0' #MODEL = 'llama3.1:8b-instruct-q4_0' - return translate(MODEL, syspromts.TRANSLATOR_DE_EN + query) + return translate(MODEL, syspromts.TRANSLATOR_DE_EN + query)['choices'][0]['message']['content'] def translator_de_fr(query): MODEL = 'llama3.2:3b-instruct-q4_0' - return translate(MODEL, syspromts.TRANSLATOR_DE_FR + query) + return translate(MODEL, syspromts.TRANSLATOR_DE_FR + query)['choices'][0]['message']['content'] def translator_de_es(query): MODEL = 'llama3.2:3b-instruct-q4_0' - return translate(MODEL, syspromts.TRANSLATOR_DE_ES + query) + return translate(MODEL, syspromts.TRANSLATOR_DE_ES + query)['choices'][0]['message']['content'] if __name__ == "__main__": import time - import test_content + + TESTSENTENCE_DE_BROKER = 'Ein Broker (oder Makler) ist eine Person oder ein Unternehmen, das sich zwischen dem Kauf- und Verkaufsberechtigten einer Wirtschaftsgüter (z.B. Aktien, Optionen, Derivate, Währungen, Rohstoffe usw.) stellt und als Vermittler fungiert. Sein Hauptziel ist es, Transaktionen zu erleichtern und Geld für sich selbst zu verdienen.' start=time.time() - response = translator_de_en(test_content.TESTSENTENCE_DE_BROKER) + response = translator_de_en(TESTSENTENCE_DE_BROKER) print("First query took", start - time.time()) print(json.dumps(response, indent=2)) - message = response['choices'][0]['message']['content'] - print(message) + print(response) start=time.time() - response = translator_de_fr(test_content.TESTSENTENCE_DE_BROKER) - message = response['choices'][0]['message']['content'] + response = translator_de_fr(TESTSENTENCE_DE_BROKER) print("Second query took", start - time.time()) - - print(message) + print(response)