diff --git a/src/benchmark/benchmark_generator.py b/src/benchmark/benchmark_generator.py index a1ee1b9..4dcb388 100644 --- a/src/benchmark/benchmark_generator.py +++ b/src/benchmark/benchmark_generator.py @@ -78,7 +78,7 @@ def synthesize(model, phoneme_ids, speaker_id, sample_rate) -> float: ) end_time = time.monotonic_ns() - audio_sec = (len(audio) / 2) / sample_rate + audio_sec = len(audio) / sample_rate infer_sec = (end_time - start_time) / 1e9 rtf = infer_sec / audio_sec diff --git a/src/benchmark/benchmark_onnx.py b/src/benchmark/benchmark_onnx.py index 9281810..3fa0c2b 100644 --- a/src/benchmark/benchmark_onnx.py +++ b/src/benchmark/benchmark_onnx.py @@ -107,7 +107,7 @@ def synthesize(session, phoneme_ids, speaker_id, sample_rate) -> float: )[0].squeeze() end_time = time.monotonic_ns() - audio_sec = (len(audio) / 2) / sample_rate + audio_sec = len(audio) / sample_rate infer_sec = (end_time - start_time) / 1e9 rtf = infer_sec / audio_sec diff --git a/src/benchmark/benchmark_torchscript.py b/src/benchmark/benchmark_torchscript.py index f1972df..d4bee21 100644 --- a/src/benchmark/benchmark_torchscript.py +++ b/src/benchmark/benchmark_torchscript.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import logging import json import time import statistics @@ -11,6 +12,8 @@ _NOISE_SCALE = 0.667 _LENGTH_SCALE = 1.0 _NOISE_W = 0.8 +_LOGGER = logging.getLogger(__name__) + def main() -> None: parser = argparse.ArgumentParser() @@ -19,6 +22,7 @@ def main() -> None: ) parser.add_argument("-c", "--config", help="Path to model config file (.json)") args = parser.parse_args() + logging.basicConfig(level=logging.DEBUG) if not args.config: args.config = f"{args.model}.json" @@ -81,10 +85,18 @@ def synthesize(model, phoneme_ids, speaker_id, sample_rate) -> float: ) end_time = time.monotonic_ns() - audio_sec = (len(audio) / 2) / sample_rate + audio_sec = len(audio) / sample_rate infer_sec = (end_time - start_time) / 1e9 + rtf = infer_sec / audio_sec - return infer_sec / audio_sec + _LOGGER.debug( + "Real-time factor: %s (infer=%s sec, audio=%s sec)", + rtf, + infer_sec, + audio_sec, + ) + + return rtf if __name__ == "__main__":