restructure the project
This commit is contained in:
@@ -7,18 +7,16 @@ from __future__ import print_function, unicode_literals
|
|||||||
from typing import List
|
from typing import List
|
||||||
from dataclasses import asdict
|
from dataclasses import asdict
|
||||||
import asyncio
|
import asyncio
|
||||||
from copy import copy
|
|
||||||
import time
|
import time
|
||||||
import logging as log
|
import logging as log
|
||||||
import aioconsole
|
import aioconsole
|
||||||
|
|
||||||
import multilang_translator.translator_config as translator_config
|
|
||||||
from utils import resample
|
|
||||||
from translator import llm_translator, test_content
|
|
||||||
from text_to_speech import text_to_speech
|
|
||||||
from auracast import multicast_control
|
from auracast import multicast_control
|
||||||
from auracast import auracast_config
|
from auracast import auracast_config
|
||||||
|
import multilang_translator.translator_config as translator_config
|
||||||
|
from translator import llm_translator
|
||||||
from translator.test_content import TESTSENTENCE
|
from translator.test_content import TESTSENTENCE
|
||||||
|
from voice_provider import text_to_speech
|
||||||
|
|
||||||
# TODO: look for a end to end translation solution
|
# TODO: look for a end to end translation solution
|
||||||
|
|
||||||
@@ -2,9 +2,7 @@ import os
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from auracast import auracast_config
|
from auracast import auracast_config
|
||||||
|
|
||||||
ANNOUNCEMENT_DIR = os.path.join(os.path.dirname(__file__), 'announcements')
|
VENV_DIR = os.path.join(os.path.dirname(__file__), './../../venv')
|
||||||
VENV_DIR = os.path.join(os.path.dirname(__file__), '../venv')
|
|
||||||
PIPER_EXE_PATH = f'{VENV_DIR}/bin/piper'
|
|
||||||
|
|
||||||
class TranslatorBaseconfig(BaseModel):
|
class TranslatorBaseconfig(BaseModel):
|
||||||
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe()
|
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe()
|
||||||
@@ -1,27 +1,33 @@
|
|||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import logging as log
|
import logging as log
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from multilang_translator import translator_config
|
from voice_provider.utils.resample import resample_array
|
||||||
from multilang_translator.utils.resample import resample_array
|
from voice_provider.utils.encode_lc3 import encode_lc3
|
||||||
from multilang_translator.text_to_speech import encode_lc3
|
|
||||||
|
PIPER_EXE = shutil.which('piper')
|
||||||
|
|
||||||
TTS_DIR = os.path.join(os.path.dirname(__file__))
|
TTS_DIR = os.path.join(os.path.dirname(__file__))
|
||||||
PIPER_DIR = f'{TTS_DIR}/piper'
|
PIPER_WORKDIR = f'{TTS_DIR}/piper'
|
||||||
|
|
||||||
|
if not PIPER_EXE:
|
||||||
|
PIPER_EXE = f'{TTS_DIR}/../../venv/bin/piper'
|
||||||
|
|
||||||
def synth_piper(text, model="en_US-lessac-medium"):
|
def synth_piper(text, model="en_US-lessac-medium"):
|
||||||
pwd = os.getcwd()
|
pwd = os.getcwd()
|
||||||
os.chdir(PIPER_DIR)
|
os.chdir(PIPER_WORKDIR)
|
||||||
start = time.time()
|
start = time.time()
|
||||||
|
|
||||||
# make sure piper has voices.json in working directory, otherwise it attempts to always load models
|
# make sure piper has voices.json in working directory, otherwise it attempts to always load models
|
||||||
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model
|
ret = subprocess.run( # TODO: wrap this whole thing in a class and open a permanent pipe to the model
|
||||||
[translator_config.PIPER_EXE_PATH,
|
[
|
||||||
'--cuda',
|
PIPER_EXE,
|
||||||
'--model', model,
|
'--cuda',
|
||||||
'--output-raw'
|
'--model', model,
|
||||||
|
'--output-raw'
|
||||||
],
|
],
|
||||||
input=text.encode('utf-8'),
|
input=text.encode('utf-8'),
|
||||||
capture_output=True
|
capture_output=True
|
||||||
@@ -34,7 +40,7 @@ def synth_piper(text, model="en_US-lessac-medium"):
|
|||||||
|
|
||||||
log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
|
log.info("Running piper for model %s took %s s", model, round(time.time() - start, 3))
|
||||||
|
|
||||||
with open (f'{PIPER_DIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently
|
with open (f'{PIPER_WORKDIR}/{model}.onnx.json') as f: # TODO: wrap everyth0ing into a class, store the json permanently
|
||||||
model_json = json.load(f)
|
model_json = json.load(f)
|
||||||
|
|
||||||
return model_json, audio
|
return model_json, audio
|
||||||
@@ -59,7 +65,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium",
|
|||||||
|
|
||||||
if return_lc3:
|
if return_lc3:
|
||||||
audio_pcm = (audio * 2**15-1).astype(np.int16)
|
audio_pcm = (audio * 2**15-1).astype(np.int16)
|
||||||
lc3 = encode_lc3.encode(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
|
lc3 = encode_lc3(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
|
||||||
return lc3
|
return lc3
|
||||||
else:
|
else:
|
||||||
return audio
|
return audio
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import lc3
|
import lc3
|
||||||
|
|
||||||
def encode(
|
def encode_lc3(
|
||||||
audio: np.array,
|
audio: np.array,
|
||||||
output_sample_rate_hz,
|
output_sample_rate_hz,
|
||||||
octets_per_frame,
|
octets_per_frame,
|
||||||
Reference in New Issue
Block a user