mirror of
https://github.com/pstrueb/piper.git
synced 2026-04-19 23:04:49 +00:00
Add text phonemes to preprocess
This commit is contained in:
@@ -3,10 +3,20 @@ import json
|
||||
import sys
|
||||
import unicodedata
|
||||
from collections import Counter
|
||||
from enum import Enum
|
||||
from typing import Dict, Iterable, List, Mapping, Optional
|
||||
|
||||
from espeak_phonemizer import Phonemizer
|
||||
|
||||
|
||||
class PhonemeType(str, Enum):
|
||||
ESPEAK = "espeak"
|
||||
"""Phonemes come from espeak-ng"""
|
||||
|
||||
TEXT = "text"
|
||||
"""Phonemes come from text itself"""
|
||||
|
||||
|
||||
MAX_PHONEMES = 256
|
||||
DEFAULT_PHONEME_ID_MAP: Dict[str, List[int]] = {
|
||||
"_": [0],
|
||||
@@ -162,6 +172,57 @@ DEFAULT_PHONEME_ID_MAP: Dict[str, List[int]] = {
|
||||
'"': [150], # Russian
|
||||
}
|
||||
|
||||
ALPHABETS = {
|
||||
# Ukrainian
|
||||
"uk": {
|
||||
"_": [0],
|
||||
"^": [1],
|
||||
"$": [2],
|
||||
" ": [3],
|
||||
"!": [4],
|
||||
"'": [5],
|
||||
",": [6],
|
||||
"-": [7],
|
||||
".": [8],
|
||||
":": [9],
|
||||
";": [10],
|
||||
"?": [11],
|
||||
"а": [12],
|
||||
"б": [13],
|
||||
"в": [14],
|
||||
"г": [15],
|
||||
"ґ": [16],
|
||||
"д": [17],
|
||||
"е": [18],
|
||||
"є": [19],
|
||||
"ж": [20],
|
||||
"з": [21],
|
||||
"и": [22],
|
||||
"і": [23],
|
||||
"ї": [24],
|
||||
"й": [25],
|
||||
"к": [26],
|
||||
"л": [27],
|
||||
"м": [28],
|
||||
"н": [29],
|
||||
"о": [30],
|
||||
"п": [31],
|
||||
"р": [32],
|
||||
"с": [33],
|
||||
"т": [34],
|
||||
"у": [35],
|
||||
"ф": [36],
|
||||
"х": [37],
|
||||
"ц": [38],
|
||||
"ч": [39],
|
||||
"ш": [40],
|
||||
"щ": [41],
|
||||
"ь": [42],
|
||||
"ю": [43],
|
||||
"я": [44],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def phonemize(text: str, phonemizer: Phonemizer) -> List[str]:
|
||||
phonemes_str = phonemizer.phonemize(text=text, keep_clause_breakers=True)
|
||||
|
||||
Reference in New Issue
Block a user