mirror of
https://github.com/pstrueb/piper.git
synced 2026-04-17 22:05:30 +00:00
fix single speaker models speaker_id arg
port https://github.com/OpenVoiceOS/ovos-tts-plugin-piper/pull/15/files some models fail if the arguments contain "sid", this commit adds a check to skip that kwarg
This commit is contained in:
@@ -128,13 +128,13 @@ class PiperVoice:
|
|||||||
noise_w=noise_w,
|
noise_w=noise_w,
|
||||||
) + silence_bytes
|
) + silence_bytes
|
||||||
|
|
||||||
def synthesize_ids_to_raw(
|
def synthesize_ids_to_raw(
|
||||||
self,
|
self,
|
||||||
phoneme_ids: List[int],
|
phoneme_ids: List[int],
|
||||||
speaker_id: Optional[int] = None,
|
speaker_id: Optional[int] = None,
|
||||||
length_scale: Optional[float] = None,
|
length_scale: Optional[float] = None,
|
||||||
noise_scale: Optional[float] = None,
|
noise_scale: Optional[float] = None,
|
||||||
noise_w: Optional[float] = None,
|
noise_w: Optional[float] = None,
|
||||||
) -> bytes:
|
) -> bytes:
|
||||||
"""Synthesize raw audio from phoneme ids."""
|
"""Synthesize raw audio from phoneme ids."""
|
||||||
if length_scale is None:
|
if length_scale is None:
|
||||||
@@ -153,25 +153,24 @@ class PiperVoice:
|
|||||||
dtype=np.float32,
|
dtype=np.float32,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
args = {
|
||||||
|
"input": phoneme_ids_array,
|
||||||
|
"input_lengths": phoneme_ids_lengths,
|
||||||
|
"scales": scales
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.config.num_speakers <= 1:
|
||||||
|
speaker_id = None
|
||||||
|
|
||||||
if (self.config.num_speakers > 1) and (speaker_id is None):
|
if (self.config.num_speakers > 1) and (speaker_id is None):
|
||||||
# Default speaker
|
# Default speaker
|
||||||
speaker_id = 0
|
speaker_id = 0
|
||||||
|
|
||||||
sid = None
|
|
||||||
|
|
||||||
if speaker_id is not None:
|
if speaker_id is not None:
|
||||||
sid = np.array([speaker_id], dtype=np.int64)
|
sid = np.array([speaker_id], dtype=np.int64)
|
||||||
|
args["sid"] = sid
|
||||||
|
|
||||||
# Synthesize through Onnx
|
# Synthesize through Onnx
|
||||||
audio = self.session.run(
|
audio = self.session.run(None, args, )[0].squeeze((0, 1))
|
||||||
None,
|
|
||||||
{
|
|
||||||
"input": phoneme_ids_array,
|
|
||||||
"input_lengths": phoneme_ids_lengths,
|
|
||||||
"scales": scales,
|
|
||||||
"sid": sid,
|
|
||||||
},
|
|
||||||
)[0].squeeze((0, 1))
|
|
||||||
audio = audio_float_to_int16(audio.squeeze())
|
audio = audio_float_to_int16(audio.squeeze())
|
||||||
|
|
||||||
return audio.tobytes()
|
return audio.tobytes()
|
||||||
|
|||||||
Reference in New Issue
Block a user