restructure the project

This commit is contained in:
2025-03-06 08:46:03 +01:00
parent 7fa677d865
commit f14902c6e7
15 changed files with 22 additions and 20 deletions

View File

@@ -7,18 +7,16 @@ from __future__ import print_function, unicode_literals
from typing import List from typing import List
from dataclasses import asdict from dataclasses import asdict
import asyncio import asyncio
from copy import copy
import time import time
import logging as log import logging as log
import aioconsole import aioconsole
import multilang_translator.translator_config as translator_config
from utils import resample
from translator import llm_translator, test_content
from text_to_speech import text_to_speech
from auracast import multicast_control from auracast import multicast_control
from auracast import auracast_config from auracast import auracast_config
import multilang_translator.translator_config as translator_config
from translator import llm_translator
from translator.test_content import TESTSENTENCE from translator.test_content import TESTSENTENCE
from voice_provider import text_to_speech
# TODO: look for a end to end translation solution # TODO: look for a end to end translation solution

View File

@@ -2,9 +2,7 @@ import os
from pydantic import BaseModel from pydantic import BaseModel
from auracast import auracast_config from auracast import auracast_config
ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements') VENV_DIR = os.path.join(os.path.dirname(__file__), './../../venv')
VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv')
PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper'
class TranslatorBaseconfig(BaseModel): class TranslatorBaseconfig(BaseModel):
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe() big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe()

View File

@@ -1,27 +1,33 @@
import os import os
import shutil
import subprocess import subprocess
import time import time
import json import json
import logging as log import logging as log
import numpy as np import numpy as np
from multilang_translator import translator_config from voice_provider.utils.resample import resample_array
from multilang_translator.utils.resample import resample_array from voice_provider.utils.encode_lc3 import encode_lc3
from multilang_translator.text_to_speech import encode_lc3
PIPER_EXE = shutil.which('piper')
TTS_DIR = os.path.join(os.path.dirname(__file__)) TTS_DIR = os.path.join(os.path.dirname(__file__))
PIPER_DIR = f'{TTS_DIR}/piper' PIPER_WORKDIR = f'{TTS_DIR}/piper'
if not PIPER_EXE:
PIPER_EXE = f'{TTS_DIR}/../../venv/bin/piper'
def synth_piper(text, model="en_US-lessac-medium"): def synth_piper(text, model="en_US-lessac-medium"):
pwd = os.getcwd() pwd = os.getcwd()
os.chdir(PIPER_DIR) os.chdir(PIPER_WORKDIR)
start = time.time() start = time.time()
# make sure piper has voices.json in working directory, otherwise it attempts to always load models # make sure piper has voices.json in working directory, otherwise it attempts to always load models
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model
[translator_config.PIPER_EXE_PATH, [
'--cuda', PIPER_EXE,
'--model', model, '--cuda',
'--output-raw' '--model', model,
'--output-raw'
], ],
input=text.encode('utf-8'), input=text.encode('utf-8'),
capture_output=True capture_output=True
@@ -34,7 +40,7 @@ def synth_piper(text, model="en_US-lessac-medium"):
log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3)) log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
with open (f'{PIPER_DIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently with open (f'{PIPER_WORKDIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently
model_json = json.load(f) model_json = json.load(f)
return model_json, audio return model_json, audio
@@ -59,7 +65,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium",
if return_lc3: if return_lc3:
audio_pcm = (audio * 2**15-1).astype(np.int16) audio_pcm = (audio * 2**15-1).astype(np.int16)
lc3 = encode_lc3.encode(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter lc3 = encode_lc3(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
return lc3 return lc3
else: else:
return audio return audio

View File

@@ -1,7 +1,7 @@
import numpy as np import numpy as np
import lc3 import lc3
def encode( def encode_lc3(
audio: np.array, audio: np.array,
output_sample_rate_hz, output_sample_rate_hz,
octets_per_frame, octets_per_frame,