fix broken resampling and piper always loading models
This commit is contained in:
@@ -125,14 +125,14 @@ async def main():
|
||||
caster = multicast_control.Multicaster(global_conf, [conf.big for conf in translator_conf])
|
||||
await caster.init_broadcast()
|
||||
|
||||
await announcement_from_german_text(
|
||||
global_conf,
|
||||
translator_conf,
|
||||
caster,
|
||||
test_content.TESTSENTENCE.DE_HELLO
|
||||
)
|
||||
await asyncio.wait([caster.streamer.task])
|
||||
#await command_line_ui(global_conf, translator_conf, caster)
|
||||
# await announcement_from_german_text(
|
||||
# global_conf,
|
||||
# translator_conf,
|
||||
# caster,
|
||||
# test_content.TESTSENTENCE.DE_HELLO
|
||||
# )
|
||||
# await asyncio.wait([caster.streamer.task])
|
||||
await command_line_ui(global_conf, translator_conf, caster)
|
||||
|
||||
if __name__ == '__main__':
|
||||
asyncio.run(main())
|
||||
|
||||
@@ -10,7 +10,6 @@ from multilang_translator.text_to_speech import encode_lc3
|
||||
|
||||
TTS_DIR = os.path.join(os.path.dirname(__file__))
|
||||
PIPER_DIR = f'{TTS_DIR}/piper'
|
||||
os.makedirs(PIPER_DIR, exist_ok=True)
|
||||
|
||||
def synth_piper(text, model="en_US-lessac-medium"):
|
||||
pwd = os.getcwd()
|
||||
@@ -51,7 +50,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium",
|
||||
tts_sample_rate = model_json['audio']['sample_rate']
|
||||
audio_np = np.frombuffer(audio_raw, dtype=np.dtype('<i2')).astype(np.float32) /(2**15-1)# convert to float fraction
|
||||
audio = resample_array(audio_np, tts_sample_rate, target_sample_rate)
|
||||
|
||||
|
||||
elif framework == 'koro':
|
||||
pass
|
||||
elif framework == 'xtts':
|
||||
@@ -61,7 +60,7 @@ def synthesize(text, target_sample_rate, framework, model="en_US-lessac-medium",
|
||||
else: raise NotImplementedError('unknown framework')
|
||||
|
||||
if return_lc3:
|
||||
audio_pcm = (audio_np * 2**15-1).astype(np.int16)
|
||||
audio_pcm = (audio * 2**15-1).astype(np.int16)
|
||||
lc3 = encode_lc3.encode(audio_pcm, target_sample_rate, 40) # TODO: octetts per frame should be parameter
|
||||
|
||||
return lc3
|
||||
|
||||
@@ -5,7 +5,6 @@ class TestContent:
|
||||
DE_HELLO: str = 'Hallo Welt.'
|
||||
DE_GATE_OPENED: str = "Gate 23 ist jetzt geöffnet."
|
||||
DE_TRAIN_ARRIVING: str = "Der Zug Nach Wien fährt heute von Gleis 3."
|
||||
DE_SECURITY_CHECKPOINT_OPENING: str = "Sicherheitskontrolle 5 ist jetzt geöffnet. Bitte setzen Sie sich in Bewegung, um Ihre Wartezeit während Sicherungsprüfungen zu minimieren."
|
||||
DE_SECURITY_CHECKPOINT_OPENING: str = "Sicherheitskontrolle 5 ist jetzt geöffnet. Bitte setzen Sie sich in Bewegung, um Ihre Wartezeit während Sicherheitsüberprüfungen zu minimieren."
|
||||
DE_RAINBOW: str = 'Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Wolke oder Regenwand wahrgenommen wird und ein großes Farbspektrum anzeigt.'
|
||||
DE_WAVE_PARTICLE: str = 'Der Wellen-Teilchen-Dualismus ist eine Konzeption, die postuliert, dass Teilchen sowohl als Wellen auf der Mikroebene verhalten sich und genau bestimme Eigenschaften wie Impuls und Energietrang besaßen.'
|
||||
TESTSENTENCE = TestContent()
|
||||
|
||||
@@ -18,11 +18,11 @@ class TranslatorBaseconfig(BaseModel):
|
||||
|
||||
class TranslatorConfigDe(TranslatorBaseconfig):
|
||||
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigDe()
|
||||
tts_model: str ='de_DE-kerstin-low'
|
||||
tts_model: str ='de_DE-thorsten-high'
|
||||
|
||||
class TranslatorConfigEn(TranslatorBaseconfig):
|
||||
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigEn()
|
||||
tts_model: str = 'en_US-lessac-medium'
|
||||
tts_model: str = 'en_GB-alba-medium'
|
||||
|
||||
class TranslatorConfigFr(TranslatorBaseconfig):
|
||||
big: auracast_config.AuracastBigConfig = auracast_config.AuracastBigConfigFr()
|
||||
|
||||
@@ -6,7 +6,7 @@ import librosa
|
||||
import soundfile as sf
|
||||
|
||||
|
||||
def resample_file(filename, out_filename, target_rate=int(24e3)):
|
||||
def resample_file(filename, out_filename, target_rate):
|
||||
start=time.time()
|
||||
# Load the original audio file
|
||||
audio, rate = librosa.load(filename)
|
||||
@@ -24,7 +24,7 @@ def resample_file(filename, out_filename, target_rate=int(24e3)):
|
||||
log.info("Resampling of %s took %s s", os.path.basename(filename), round(time.time() - start, 3))
|
||||
|
||||
|
||||
def resample_array(audio, rate, target_rate=int(24e3)):
|
||||
def resample_array(audio, rate, target_rate):
|
||||
start=time.time()
|
||||
# Load the original audio file
|
||||
|
||||
|
||||
+2
-1
@@ -8,7 +8,8 @@ dependencies = [
|
||||
"requests",
|
||||
"ollama",
|
||||
"aioconsole",
|
||||
"piper-tts==1.2.0"
|
||||
"piper-phonemize==1.1.0",
|
||||
"piper-tts==1.2.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
Reference in New Issue
Block a user