mirror of
https://github.com/pstrueb/piper.git
synced 2026-04-21 07:14:49 +00:00
Add filter utterances
This commit is contained in:
committed by
Michael Hansen
parent
10b136cdf8
commit
5a64768924
@@ -11,7 +11,7 @@ from collections import Counter
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing import JoinableQueue, Process, Queue
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import librosa
|
||||
from espeak_phonemizer import Phonemizer
|
||||
@@ -71,13 +71,6 @@ def main() -> None:
|
||||
help="Casing applied to utterance text",
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--speaking-rate-min", type=float, help="Minimum speaking rate (chars/sec)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--speaking-rate-max", type=float, help="Maximum speaking rate (chars/sec)"
|
||||
)
|
||||
#
|
||||
parser.add_argument(
|
||||
"--skip-audio", action="store_true", help="Don't preprocess audio"
|
||||
)
|
||||
@@ -355,32 +348,6 @@ class PathEncoder(json.JSONEncoder):
|
||||
return super().default(o)
|
||||
|
||||
|
||||
def is_good_speaking_rate(
|
||||
text: str,
|
||||
wav_path: Path,
|
||||
args: argparse.Namespace,
|
||||
) -> bool:
|
||||
min_rate: Optional[float] = args.speaking_rate_min
|
||||
max_rate: Optional[float] = args.speaking_rate_max
|
||||
|
||||
if (min_rate is None) and (max_rate is None):
|
||||
return True
|
||||
|
||||
if len(text) == 0:
|
||||
return False
|
||||
|
||||
duration = librosa.get_duration(path=wav_path)
|
||||
rate = len(text) / duration
|
||||
|
||||
if (min_rate is not None) and (rate < min_rate):
|
||||
return False
|
||||
|
||||
if (max_rate is not None) and (rate > max_rate):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def ljspeech_dataset(args: argparse.Namespace) -> Iterable[Utterance]:
|
||||
dataset_dir = args.input_dir
|
||||
is_single_speaker = args.single_speaker
|
||||
@@ -431,10 +398,6 @@ def ljspeech_dataset(args: argparse.Namespace) -> Iterable[Utterance]:
|
||||
_LOGGER.warning("Empty file: %s", wav_path)
|
||||
continue
|
||||
|
||||
if not is_good_speaking_rate(text, wav_path, args):
|
||||
_LOGGER.warning("Bad speaking rate: %s", wav_path)
|
||||
continue
|
||||
|
||||
yield Utterance(
|
||||
text=text, audio_path=wav_path, speaker=speaker, speaker_id=speaker_id
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user