Files
piper/src/python/piper_train/phonemize.py
2023-06-08 15:36:40 -05:00

238 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import argparse
import json
import sys
import unicodedata
from collections import Counter
from typing import Dict, Iterable, List, Mapping, Optional
from espeak_phonemizer import Phonemizer
MAX_PHONEMES = 256
DEFAULT_PHONEME_ID_MAP: Dict[str, List[int]] = {
"_": [0],
"^": [1],
"$": [2],
" ": [3],
"!": [4],
"'": [5],
"(": [6],
")": [7],
",": [8],
"-": [9],
".": [10],
":": [11],
";": [12],
"?": [13],
"a": [14],
"b": [15],
"c": [16],
"d": [17],
"e": [18],
"f": [19],
"h": [20],
"i": [21],
"j": [22],
"k": [23],
"l": [24],
"m": [25],
"n": [26],
"o": [27],
"p": [28],
"q": [29],
"r": [30],
"s": [31],
"t": [32],
"u": [33],
"v": [34],
"w": [35],
"x": [36],
"y": [37],
"z": [38],
"æ": [39],
"ç": [40],
"ð": [41],
"ø": [42],
"ħ": [43],
"ŋ": [44],
"œ": [45],
"ǀ": [46],
"ǁ": [47],
"ǂ": [48],
"ǃ": [49],
"ɐ": [50],
"ɑ": [51],
"ɒ": [52],
"ɓ": [53],
"ɔ": [54],
"ɕ": [55],
"ɖ": [56],
"ɗ": [57],
"ɘ": [58],
"ə": [59],
"ɚ": [60],
"ɛ": [61],
"ɜ": [62],
"ɞ": [63],
"ɟ": [64],
"ɠ": [65],
"ɡ": [66],
"ɢ": [67],
"ɣ": [68],
"ɤ": [69],
"ɥ": [70],
"ɦ": [71],
"ɧ": [72],
"ɨ": [73],
"ɪ": [74],
"ɫ": [75],
"ɬ": [76],
"ɭ": [77],
"ɮ": [78],
"ɯ": [79],
"ɰ": [80],
"ɱ": [81],
"ɲ": [82],
"ɳ": [83],
"ɴ": [84],
"ɵ": [85],
"ɶ": [86],
"ɸ": [87],
"ɹ": [88],
"ɺ": [89],
"ɻ": [90],
"ɽ": [91],
"ɾ": [92],
"ʀ": [93],
"ʁ": [94],
"ʂ": [95],
"ʃ": [96],
"ʄ": [97],
"ʈ": [98],
"ʉ": [99],
"ʊ": [100],
"ʋ": [101],
"ʌ": [102],
"ʍ": [103],
"ʎ": [104],
"ʏ": [105],
"ʐ": [106],
"ʑ": [107],
"ʒ": [108],
"ʔ": [109],
"ʕ": [110],
"ʘ": [111],
"ʙ": [112],
"ʛ": [113],
"ʜ": [114],
"ʝ": [115],
"ʟ": [116],
"ʡ": [117],
"ʢ": [118],
"ʲ": [119],
"ˈ": [120],
"ˌ": [121],
"ː": [122],
"ˑ": [123],
"˞": [124],
"β": [125],
"θ": [126],
"χ": [127],
"": [128],
"": [129],
"0": [130], # tones
"1": [131],
"2": [132],
"3": [133],
"4": [134],
"5": [135],
"6": [136],
"7": [137],
"8": [138],
"9": [139],
"\u0327": [140], # combining cedilla
"\u0303": [141], # combining tilde
"\u032a": [142], # combining bridge below
"\u032f": [143], # combining inverted breve below
"\u0329": [144], # combining vertical line below
"ʰ": [145],
"ˤ": [146],
"ε": [147],
"": [148],
}
def phonemize(text: str, phonemizer: Phonemizer) -> List[str]:
phonemes_str = phonemizer.phonemize(text=text, keep_clause_breakers=True)
# Phonemes are decomposed into unicode codepoints
return list(unicodedata.normalize("NFD", phonemes_str))
def phonemes_to_ids(
phonemes: Iterable[str],
phoneme_id_map: Optional[Mapping[str, Iterable[int]]] = None,
missing_phonemes: "Optional[Counter[str]]" = None,
pad: Optional[str] = "_",
bos: Optional[str] = "^",
eos: Optional[str] = "$",
) -> List[int]:
if phoneme_id_map is None:
phoneme_id_map = DEFAULT_PHONEME_ID_MAP
phoneme_ids: List[int] = []
if bos:
phoneme_ids.extend(phoneme_id_map[bos])
if pad:
phoneme_ids.extend(phoneme_id_map[pad])
for phoneme in phonemes:
mapped_phoneme_ids = phoneme_id_map.get(phoneme)
if mapped_phoneme_ids:
phoneme_ids.extend(mapped_phoneme_ids)
if pad:
phoneme_ids.extend(phoneme_id_map[pad])
elif missing_phonemes is not None:
# Make note of missing phonemes
missing_phonemes[phoneme] += 1
if eos:
phoneme_ids.extend(phoneme_id_map[eos])
return phoneme_ids
# -----------------------------------------------------------------------------
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("language")
args = parser.parse_args()
phonemizer = Phonemizer(args.language)
for line in sys.stdin:
line = line.strip()
if not line:
continue
phonemes = phonemize(line, phonemizer)
phoneme_ids = phonemes_to_ids(phonemes)
json.dump(
{
"text": line,
"phonemes": phonemes,
"phoneme_ids": phoneme_ids,
},
sys.stdout,
ensure_ascii=False,
)
print("")
if __name__ == "__main__":
main()