mirror of
https://github.com/pstrueb/piper.git
synced 2026-04-19 23:04:49 +00:00
Extending phoneme set to 256
This commit is contained in:
43
src/python/piper_train/check_phonemes.py
Normal file
43
src/python/piper_train/check_phonemes.py
Normal file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python3
|
||||
import csv
|
||||
import json
|
||||
import sys
|
||||
import unicodedata
|
||||
from collections import Counter
|
||||
|
||||
from .phonemize import DEFAULT_PHONEME_ID_MAP
|
||||
|
||||
|
||||
def main() -> None:
|
||||
missing_phonemes: Counter[str] = Counter()
|
||||
|
||||
for line in sys.stdin:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
utt = json.loads(line)
|
||||
for phoneme in utt["phonemes"]:
|
||||
if phoneme not in DEFAULT_PHONEME_ID_MAP:
|
||||
missing_phonemes[phoneme] += 1
|
||||
|
||||
if missing_phonemes:
|
||||
print("Missing", len(missing_phonemes), "phoneme(s)", file=sys.stderr)
|
||||
writer = csv.writer(sys.stdout)
|
||||
for phoneme, count in missing_phonemes.most_common():
|
||||
hex_phoneme = hex(ord(phoneme))
|
||||
writer.writerow(
|
||||
(
|
||||
phoneme,
|
||||
unicodedata.category(phoneme),
|
||||
unicodedata.name(phoneme),
|
||||
f"\\u{hex_phoneme}",
|
||||
count,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user