Add filter utterances

This commit is contained in:
Michael Hansen
2023-05-11 20:48:58 -05:00
committed by Michael Hansen
parent 10b136cdf8
commit 5a64768924
3 changed files with 267 additions and 110 deletions

View File

@@ -11,7 +11,7 @@ from collections import Counter
from dataclasses import dataclass, field
from multiprocessing import JoinableQueue, Process, Queue
from pathlib import Path
from typing import Dict, Iterable, List, Optional
from typing import Dict, Iterable, List, Optional, Tuple
import librosa
from espeak_phonemizer import Phonemizer
@@ -71,13 +71,6 @@ def main() -> None:
help="Casing applied to utterance text",
)
#
parser.add_argument(
"--speaking-rate-min", type=float, help="Minimum speaking rate (chars/sec)"
)
parser.add_argument(
"--speaking-rate-max", type=float, help="Maximum speaking rate (chars/sec)"
)
#
parser.add_argument(
"--skip-audio", action="store_true", help="Don't preprocess audio"
)
@@ -355,32 +348,6 @@ class PathEncoder(json.JSONEncoder):
return super().default(o)
def is_good_speaking_rate(
text: str,
wav_path: Path,
args: argparse.Namespace,
) -> bool:
min_rate: Optional[float] = args.speaking_rate_min
max_rate: Optional[float] = args.speaking_rate_max
if (min_rate is None) and (max_rate is None):
return True
if len(text) == 0:
return False
duration = librosa.get_duration(path=wav_path)
rate = len(text) / duration
if (min_rate is not None) and (rate < min_rate):
return False
if (max_rate is not None) and (rate > max_rate):
return False
return True
def ljspeech_dataset(args: argparse.Namespace) -> Iterable[Utterance]:
dataset_dir = args.input_dir
is_single_speaker = args.single_speaker
@@ -431,10 +398,6 @@ def ljspeech_dataset(args: argparse.Namespace) -> Iterable[Utterance]:
_LOGGER.warning("Empty file: %s", wav_path)
continue
if not is_good_speaking_rate(text, wav_path, args):
_LOGGER.warning("Bad speaking rate: %s", wav_path)
continue
yield Utterance(
text=text, audio_path=wav_path, speaker=speaker, speaker_id=speaker_id
)