This commit is contained in:
Michael Hansen
2023-05-12 16:52:58 -05:00
parent e5c9683ce9
commit a92b2e15c7
3 changed files with 10 additions and 16 deletions

View File

@@ -8,8 +8,8 @@ from .phonemize import DEFAULT_PHONEME_ID_MAP
def main() -> None:
used_phonemes: Counter[str] = Counter()
missing_phonemes: Counter[str] = Counter()
used_phonemes: "Counter[str]" = Counter()
missing_phonemes: "Counter[str]" = Counter()
for line in sys.stdin:
line = line.strip()

View File

@@ -3,10 +3,10 @@ import argparse
import csv
import json
import re
import sys
import statistics
import shutil
import statistics
import subprocess
import sys
import threading
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
@@ -17,7 +17,7 @@ from typing import Optional
import numpy as np
from .norm_audio import make_silence_detector, trim_silence, SileroVoiceActivityDetector
from .norm_audio import make_silence_detector, trim_silence
_DIR = Path(__file__).parent
@@ -258,10 +258,5 @@ class ProcessUtterance:
# )
def make_silence_detector() -> SileroVoiceActivityDetector:
silence_model = _DIR / "norm_audio" / "models" / "silero_vad.onnx"
return SileroVoiceActivityDetector(silence_model)
if __name__ == "__main__":
main()

View File

@@ -11,9 +11,8 @@ from collections import Counter
from dataclasses import dataclass, field
from multiprocessing import JoinableQueue, Process, Queue
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple
from typing import Dict, Iterable, List, Optional
import librosa
from espeak_phonemizer import Phonemizer
from .norm_audio import cache_norm_audio, make_silence_detector
@@ -21,10 +20,10 @@ from .phonemize import (
ALPHABETS,
DEFAULT_PHONEME_ID_MAP,
MAX_PHONEMES,
PHONEME_MAPS,
PhonemeType,
phonemes_to_ids,
phonemize,
PHONEME_MAPS,
)
_LOGGER = logging.getLogger("preprocess")
@@ -110,7 +109,7 @@ def main() -> None:
# Count speakers
_LOGGER.debug("Counting number of speakers/utterances in the dataset")
speaker_counts: Counter[str] = Counter()
speaker_counts: "Counter[str]" = Counter()
num_utterances = 0
for utt in make_dataset(args):
speaker = utt.speaker or ""
@@ -192,7 +191,7 @@ def main() -> None:
queue_in.put(utt_batch)
_LOGGER.debug("Waiting for jobs to finish")
missing_phonemes: Counter[str] = Counter()
missing_phonemes: "Counter[str]" = Counter()
for _ in range(num_utterances):
utt = queue_out.get()
if utt is not None:
@@ -342,7 +341,7 @@ class Utterance:
phoneme_ids: Optional[List[int]] = None
audio_norm_path: Optional[Path] = None
audio_spec_path: Optional[Path] = None
missing_phonemes: Counter[str] = field(default_factory=Counter)
missing_phonemes: "Counter[str]" = field(default_factory=Counter)
class PathEncoder(json.JSONEncoder):