fix single speaker models speaker_id arg

port https://github.com/OpenVoiceOS/ovos-tts-plugin-piper/pull/15/files some models fail if the arguments contain "sid", this commit adds a check to skip that kwarg
2026-04-17 22:05:30 +00:00 · 2023-12-09 15:05:22 +00:00
parent 0bb4cb9daf
commit b0228cba6d
1 changed files with 18 additions and 19 deletions
--- a/src/python_run/piper/voice.py
+++ b/src/python_run/piper/voice.py
@@ -128,13 +128,13 @@ class PiperVoice:
                noise_w=noise_w,
            ) + silence_bytes
-    def synthesize_ids_to_raw(
+        def synthesize_ids_to_raw(
-        self,
+            self,
-        phoneme_ids: List[int],
+            phoneme_ids: List[int],
-        speaker_id: Optional[int] = None,
+            speaker_id: Optional[int] = None,
-        length_scale: Optional[float] = None,
+            length_scale: Optional[float] = None,
-        noise_scale: Optional[float] = None,
+            noise_scale: Optional[float] = None,
-        noise_w: Optional[float] = None,
+            noise_w: Optional[float] = None,
    ) -> bytes:
        """Synthesize raw audio from phoneme ids."""
        if length_scale is None:
@@ -153,25 +153,24 @@ class PiperVoice:
            dtype=np.float32,
        )
        args = {
            "input": phoneme_ids_array,
            "input_lengths": phoneme_ids_lengths,
            "scales": scales
        }
        if self.config.num_speakers <= 1:
            speaker_id = None
        if (self.config.num_speakers > 1) and (speaker_id is None):
            # Default speaker
            speaker_id = 0
        sid = None
        if speaker_id is not None:
            sid = np.array([speaker_id], dtype=np.int64)
            args["sid"] = sid
        # Synthesize through Onnx
-        audio = self.session.run(
+        audio = self.session.run(None, args, )[0].squeeze((0, 1))
            None,
            {
                "input": phoneme_ids_array,
                "input_lengths": phoneme_ids_lengths,
                "scales": scales,
                "sid": sid,
            },
        )[0].squeeze((0, 1))
        audio = audio_float_to_int16(audio.squeeze())
        return audio.tobytes()