restructure the project

This commit is contained in:
2025-03-06 08:46:03 +01:00
parent 7fa677d865
commit f14902c6e7
15 changed files with 22 additions and 20 deletions

View File

@@ -7,18 +7,16 @@ from __future__ import print_function, unicode_literals
from typing import List
from dataclasses import asdict
import asyncio
from copy import copy
import time
import logging as log
import aioconsole
import multilang_translator.translator_config as translator_config
from utils import resample
from translator import llm_translator, test_content
from text_to_speech import text_to_speech
from auracast import multicast_control
from auracast import auracast_config
import multilang_translator.translator_config as translator_config
from translator import llm_translator
from translator.test_content import TESTSENTENCE
from voice_provider import text_to_speech
# TODO: look for a end to end translation solution

View File

@@ -2,9 +2,7 @@ import os
from pydantic import BaseModel
from auracast import auracast_config
ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements')
VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv')
PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper'
VENV_DIR = os.path.join(os.path.dirname(__file__), './../../venv')
class TranslatorBaseconfig(BaseModel):
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe()

View File

@@ -1,27 +1,33 @@
import os
import shutil
import subprocess
import time
import json
import logging as log
import numpy as np
from multilang_translator import translator_config
from multilang_translator.utils.resample import resample_array
from multilang_translator.text_to_speech import encode_lc3
from voice_provider.utils.resample import resample_array
from voice_provider.utils.encode_lc3 import encode_lc3
PIPER_EXE = shutil.which('piper')
TTS_DIR = os.path.join(os.path.dirname(__file__))
PIPER_DIR = f'{TTS_DIR}/piper'
PIPER_WORKDIR = f'{TTS_DIR}/piper'
if not PIPER_EXE:
PIPER_EXE = f'{TTS_DIR}/../../venv/bin/piper'
def synth_piper(text, model="en_US-lessac-medium"):
pwd = os.getcwd()
os.chdir(PIPER_DIR)
os.chdir(PIPER_WORKDIR)
start = time.time()
# make sure piper has voices.json in working directory, otherwise it attempts to always load models
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model
[translator_config.PIPER_EXE_PATH,
'--cuda',
'--model', model,
'--output-raw'
[
PIPER_EXE,
'--cuda',
'--model', model,
'--output-raw'
],
input=text.encode('utf-8'),
capture_output=True
@@ -34,7 +40,7 @@ def synth_piper(text, model="en_US-lessac-medium"):
log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
with open (f'{PIPER_DIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently
with open (f'{PIPER_WORKDIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently
model_json = json.load(f)
return model_json, audio
@@ -59,7 +65,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium",
if return_lc3:
audio_pcm = (audio * 2**15-1).astype(np.int16)
lc3 = encode_lc3.encode(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
lc3 = encode_lc3(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
return lc3
else:
return audio

View File

@@ -1,7 +1,7 @@
import numpy as np
import lc3
def encode(
def encode_lc3(
audio: np.array,
output_sample_rate_hz,
octets_per_frame,