mirror of
https://github.com/pstrueb/piper.git
synced 2026-04-18 06:15:30 +00:00
Add uk tests sentences
This commit is contained in:
@@ -300,18 +300,55 @@ def phonemes_to_ids(
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("language")
|
||||
parser.add_argument(
|
||||
"--phoneme-type",
|
||||
choices=list(PhonemeType),
|
||||
default=PhonemeType.ESPEAK,
|
||||
help="Type of phonemes to use (default: espeak)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--text-casing",
|
||||
choices=("ignore", "lower", "upper", "casefold"),
|
||||
default="ignore",
|
||||
help="Casing applied to utterance text",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
phonemizer = Phonemizer(args.language)
|
||||
phonemizer: Optional[Phonemizer] = None
|
||||
|
||||
if args.text_casing == "lower":
|
||||
casing = str.lower
|
||||
elif args.text_casing == "upper":
|
||||
casing = str.upper
|
||||
else:
|
||||
# ignore
|
||||
casing = lambda s: s
|
||||
|
||||
if args.phoneme_type == PhonemeType.TEXT:
|
||||
# Use text directly
|
||||
phoneme_id_map = ALPHABETS[args.language]
|
||||
else:
|
||||
# Use eSpeak
|
||||
phonemizer = Phonemizer(args.language)
|
||||
phoneme_id_map = DEFAULT_PHONEME_ID_MAP
|
||||
|
||||
phoneme_map = PHONEME_MAPS.get(args.language)
|
||||
missing_phonemes: "Counter[str]" = Counter()
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
phonemes = phonemize(line, phonemizer, phoneme_map=phoneme_map)
|
||||
phoneme_ids = phonemes_to_ids(phonemes)
|
||||
if args.phoneme_type == PhonemeType.TEXT:
|
||||
phonemes = list(unicodedata.normalize("NFD", casing(line)))
|
||||
else:
|
||||
assert phonemizer is not None
|
||||
phonemes = phonemize(line, phonemizer, phoneme_map=phoneme_map)
|
||||
|
||||
phoneme_ids = phonemes_to_ids(
|
||||
phonemes, phoneme_id_map=phoneme_id_map, missing_phonemes=missing_phonemes
|
||||
)
|
||||
json.dump(
|
||||
{
|
||||
"text": line,
|
||||
@@ -323,6 +360,11 @@ def main() -> None:
|
||||
)
|
||||
print("")
|
||||
|
||||
if missing_phonemes:
|
||||
print("Missing", len(missing_phonemes), "phonemes", file=sys.stderr)
|
||||
for phoneme, count in missing_phonemes.most_common():
|
||||
print(phoneme, count, file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -303,12 +303,11 @@ def phonemize_batch_text(
|
||||
try:
|
||||
_LOGGER.debug(utt)
|
||||
utt.phonemes = list(unicodedata.normalize("NFD", casing(utt.text)))
|
||||
utt.phoneme_ids = []
|
||||
for phoneme in utt.phonemes:
|
||||
if phoneme in alphabet:
|
||||
utt.phoneme_ids.extend(alphabet[phoneme])
|
||||
else:
|
||||
utt.missing_phonemes[phoneme] += 1
|
||||
utt.phoneme_ids = phonemes_to_ids(
|
||||
utt.phonemes,
|
||||
phoneme_id_map=alphabet,
|
||||
missing_phonemes=utt.missing_phonemes,
|
||||
)
|
||||
if not args.skip_audio:
|
||||
utt.audio_norm_path, utt.audio_spec_path = cache_norm_audio(
|
||||
utt.audio_path,
|
||||
|
||||
Reference in New Issue
Block a user