diff --git a/src/python/piper_train/check_phonemes.py b/src/python/piper_train/check_phonemes.py index 82e0685..7933689 100644 --- a/src/python/piper_train/check_phonemes.py +++ b/src/python/piper_train/check_phonemes.py @@ -8,8 +8,8 @@ from .phonemize import DEFAULT_PHONEME_ID_MAP def main() -> None: - used_phonemes: Counter[str] = Counter() - missing_phonemes: Counter[str] = Counter() + used_phonemes: "Counter[str]" = Counter() + missing_phonemes: "Counter[str]" = Counter() for line in sys.stdin: line = line.strip() diff --git a/src/python/piper_train/filter_utterances.py b/src/python/piper_train/filter_utterances.py index d88e3e1..0770ed3 100644 --- a/src/python/piper_train/filter_utterances.py +++ b/src/python/piper_train/filter_utterances.py @@ -3,10 +3,10 @@ import argparse import csv import json import re -import sys -import statistics import shutil +import statistics import subprocess +import sys import threading from collections import defaultdict from concurrent.futures import ThreadPoolExecutor @@ -17,7 +17,7 @@ from typing import Optional import numpy as np -from .norm_audio import make_silence_detector, trim_silence, SileroVoiceActivityDetector +from .norm_audio import make_silence_detector, trim_silence _DIR = Path(__file__).parent @@ -258,10 +258,5 @@ class ProcessUtterance: # ) -def make_silence_detector() -> SileroVoiceActivityDetector: - silence_model = _DIR / "norm_audio" / "models" / "silero_vad.onnx" - return SileroVoiceActivityDetector(silence_model) - - if __name__ == "__main__": main() diff --git a/src/python/piper_train/preprocess.py b/src/python/piper_train/preprocess.py index 8d081bb..3b8d24e 100644 --- a/src/python/piper_train/preprocess.py +++ b/src/python/piper_train/preprocess.py @@ -11,9 +11,8 @@ from collections import Counter from dataclasses import dataclass, field from multiprocessing import JoinableQueue, Process, Queue from pathlib import Path -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Dict, Iterable, List, Optional -import librosa from espeak_phonemizer import Phonemizer from .norm_audio import cache_norm_audio, make_silence_detector @@ -21,10 +20,10 @@ from .phonemize import ( ALPHABETS, DEFAULT_PHONEME_ID_MAP, MAX_PHONEMES, + PHONEME_MAPS, PhonemeType, phonemes_to_ids, phonemize, - PHONEME_MAPS, ) _LOGGER = logging.getLogger("preprocess") @@ -110,7 +109,7 @@ def main() -> None: # Count speakers _LOGGER.debug("Counting number of speakers/utterances in the dataset") - speaker_counts: Counter[str] = Counter() + speaker_counts: "Counter[str]" = Counter() num_utterances = 0 for utt in make_dataset(args): speaker = utt.speaker or "" @@ -192,7 +191,7 @@ def main() -> None: queue_in.put(utt_batch) _LOGGER.debug("Waiting for jobs to finish") - missing_phonemes: Counter[str] = Counter() + missing_phonemes: "Counter[str]" = Counter() for _ in range(num_utterances): utt = queue_out.get() if utt is not None: @@ -342,7 +341,7 @@ class Utterance: phoneme_ids: Optional[List[int]] = None audio_norm_path: Optional[Path] = None audio_spec_path: Optional[Path] = None - missing_phonemes: Counter[str] = field(default_factory=Counter) + missing_phonemes: "Counter[str]" = field(default_factory=Counter) class PathEncoder(json.JSONEncoder):