diff --git a/src/python/piper_train/infer_onnx_streaming.py b/src/python/piper_train/infer_onnx_streaming.py index d33c903..7c1f170 100644 --- a/src/python/piper_train/infer_onnx_streaming.py +++ b/src/python/piper_train/infer_onnx_streaming.py @@ -138,12 +138,27 @@ def main(): parser.add_argument("--noise-scale", type=float, default=0.667) parser.add_argument("--noise-scale-w", type=float, default=0.8) parser.add_argument("--length-scale", type=float, default=1.0) + parser.add_argument( + "--chunk-size", + type=int, + default=45, + help="Number of mel frames to decode at each step" + ) + parser.add_argument( + "--chunk-padding", + type=int, + default=5, + help="Number of mel frames to add to the start and end of the current chunk to reduce decoding artifacts" + ) + args = parser.parse_args() streamer = SpeechStreamer( encoder_path=os.fspath(args.encoder), decoder_path=os.fspath(args.decoder), sample_rate=args.sample_rate, + chunk_size=args.chunk_size, + chunk_padding=args.chunk_padding, ) output_buffer = sys.stdout.buffer