diff --git a/etc/test_sentences/test_uk.jsonl b/etc/test_sentences/test_uk.jsonl new file mode 100644 index 0000000..d96b42b --- /dev/null +++ b/etc/test_sentences/test_uk.jsonl @@ -0,0 +1,7 @@ +{"text": "Весе́лка, також ра́йдуга оптичне явище в атмосфері, що являє собою одну, дві чи декілька різнокольорових дуг ,або кіл, якщо дивитися з повітря, що спостерігаються на тлі хмари, якщо вона розташована проти Сонця.", "phonemes": ["в", "е", "с", "е", "́", "л", "к", "а", ",", " ", "т", "а", "к", "о", "ж", " ", "р", "а", "́", "и", "̆", "д", "у", "г", "а", " ", "о", "п", "т", "и", "ч", "н", "е", " ", "я", "в", "и", "щ", "е", " ", "в", " ", "а", "т", "м", "о", "с", "ф", "е", "р", "і", ",", " ", "щ", "о", " ", "я", "в", "л", "я", "є", " ", "с", "о", "б", "о", "ю", " ", "о", "д", "н", "у", ",", " ", "д", "в", "і", " ", "ч", "и", " ", "д", "е", "к", "і", "л", "ь", "к", "а", " ", "р", "і", "з", "н", "о", "к", "о", "л", "ь", "о", "р", "о", "в", "и", "х", " ", "д", "у", "г", " ", ",", "а", "б", "о", " ", "к", "і", "л", ",", " ", "я", "к", "щ", "о", " ", "д", "и", "в", "и", "т", "и", "с", "я", " ", "з", " ", "п", "о", "в", "і", "т", "р", "я", ",", " ", "щ", "о", " ", "с", "п", "о", "с", "т", "е", "р", "і", "г", "а", "ю", "т", "ь", "с", "я", " ", "н", "а", " ", "т", "л", "і", " ", "х", "м", "а", "р", "и", ",", " ", "я", "к", "щ", "о", " ", "в", "о", "н", "а", " ", "р", "о", "з", "т", "а", "ш", "о", "в", "а", "н", "а", " ", "п", "р", "о", "т", "и", " ", "с", "о", "н", "ц", "я", "."], "phoneme_ids": [1, 0, 14, 0, 18, 0, 33, 0, 18, 0, 45, 0, 27, 0, 26, 0, 12, 0, 6, 0, 3, 0, 34, 0, 12, 0, 26, 0, 30, 0, 20, 0, 3, 0, 32, 0, 12, 0, 45, 0, 22, 0, 46, 0, 17, 0, 35, 0, 15, 0, 12, 0, 3, 0, 30, 0, 31, 0, 34, 0, 22, 0, 39, 0, 29, 0, 18, 0, 3, 0, 44, 0, 14, 0, 22, 0, 41, 0, 18, 0, 3, 0, 14, 0, 3, 0, 12, 0, 34, 0, 28, 0, 30, 0, 33, 0, 36, 0, 18, 0, 32, 0, 23, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 44, 0, 14, 0, 27, 0, 44, 0, 19, 0, 3, 0, 33, 0, 30, 0, 13, 0, 30, 0, 43, 0, 3, 0, 30, 0, 17, 0, 29, 0, 35, 0, 6, 0, 3, 0, 17, 0, 14, 0, 23, 0, 3, 0, 39, 0, 22, 0, 3, 0, 17, 0, 18, 0, 26, 0, 23, 0, 27, 0, 42, 0, 26, 0, 12, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 26, 0, 30, 0, 27, 0, 42, 0, 30, 0, 32, 0, 30, 0, 14, 0, 22, 0, 37, 0, 3, 0, 17, 0, 35, 0, 15, 0, 3, 0, 6, 0, 12, 0, 13, 0, 30, 0, 3, 0, 26, 0, 23, 0, 27, 0, 6, 0, 3, 0, 44, 0, 26, 0, 41, 0, 30, 0, 3, 0, 17, 0, 22, 0, 14, 0, 22, 0, 34, 0, 22, 0, 33, 0, 44, 0, 3, 0, 21, 0, 3, 0, 31, 0, 30, 0, 14, 0, 23, 0, 34, 0, 32, 0, 44, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 33, 0, 31, 0, 30, 0, 33, 0, 34, 0, 18, 0, 32, 0, 23, 0, 15, 0, 12, 0, 43, 0, 34, 0, 42, 0, 33, 0, 44, 0, 3, 0, 29, 0, 12, 0, 3, 0, 34, 0, 27, 0, 23, 0, 3, 0, 37, 0, 28, 0, 12, 0, 32, 0, 22, 0, 6, 0, 3, 0, 44, 0, 26, 0, 41, 0, 30, 0, 3, 0, 14, 0, 30, 0, 29, 0, 12, 0, 3, 0, 32, 0, 30, 0, 21, 0, 34, 0, 12, 0, 40, 0, 30, 0, 14, 0, 12, 0, 29, 0, 12, 0, 3, 0, 31, 0, 32, 0, 30, 0, 34, 0, 22, 0, 3, 0, 33, 0, 30, 0, 29, 0, 38, 0, 44, 0, 8, 0, 2]} +{"text": "Червоний колір ми бачимо з зовнішнього боку первинної веселки, а фіолетовий — із внутрішнього.", "phonemes": ["ч", "е", "р", "в", "о", "н", "и", "и", "̆", " ", "к", "о", "л", "і", "р", " ", "м", "и", " ", "б", "а", "ч", "и", "м", "о", " ", "з", " ", "з", "о", "в", "н", "і", "ш", "н", "ь", "о", "г", "о", " ", "б", "о", "к", "у", " ", "п", "е", "р", "в", "и", "н", "н", "о", "і", "̈", " ", "в", "е", "с", "е", "л", "к", "и", ",", " ", "а", " ", "ф", "і", "о", "л", "е", "т", "о", "в", "и", "и", "̆", " ", "—", " ", "і", "з", " ", "в", "н", "у", "т", "р", "і", "ш", "н", "ь", "о", "г", "о", "."], "phoneme_ids": [1, 0, 39, 0, 18, 0, 32, 0, 14, 0, 30, 0, 29, 0, 22, 0, 22, 0, 46, 0, 3, 0, 26, 0, 30, 0, 27, 0, 23, 0, 32, 0, 3, 0, 28, 0, 22, 0, 3, 0, 13, 0, 12, 0, 39, 0, 22, 0, 28, 0, 30, 0, 3, 0, 21, 0, 3, 0, 21, 0, 30, 0, 14, 0, 29, 0, 23, 0, 40, 0, 29, 0, 42, 0, 30, 0, 15, 0, 30, 0, 3, 0, 13, 0, 30, 0, 26, 0, 35, 0, 3, 0, 31, 0, 18, 0, 32, 0, 14, 0, 22, 0, 29, 0, 29, 0, 30, 0, 23, 0, 47, 0, 3, 0, 14, 0, 18, 0, 33, 0, 18, 0, 27, 0, 26, 0, 22, 0, 6, 0, 3, 0, 12, 0, 3, 0, 36, 0, 23, 0, 30, 0, 27, 0, 18, 0, 34, 0, 30, 0, 14, 0, 22, 0, 22, 0, 46, 0, 3, 0, 48, 0, 3, 0, 23, 0, 21, 0, 3, 0, 14, 0, 29, 0, 35, 0, 34, 0, 32, 0, 23, 0, 40, 0, 29, 0, 42, 0, 30, 0, 15, 0, 30, 0, 8, 0, 2]} +{"text": "Веселка пов'язана з заломленням і відбиттям ,деякою мірою і з дифракцією, сонячного світла у водяних краплях, зважених у повітрі.", "phonemes": ["в", "е", "с", "е", "л", "к", "а", " ", "п", "о", "в", "'", "я", "з", "а", "н", "а", " ", "з", " ", "з", "а", "л", "о", "м", "л", "е", "н", "н", "я", "м", " ", "і", " ", "в", "і", "д", "б", "и", "т", "т", "я", "м", " ", ",", "д", "е", "я", "к", "о", "ю", " ", "м", "і", "р", "о", "ю", " ", "і", " ", "з", " ", "д", "и", "ф", "р", "а", "к", "ц", "і", "є", "ю", ",", " ", "с", "о", "н", "я", "ч", "н", "о", "г", "о", " ", "с", "в", "і", "т", "л", "а", " ", "у", " ", "в", "о", "д", "я", "н", "и", "х", " ", "к", "р", "а", "п", "л", "я", "х", ",", " ", "з", "в", "а", "ж", "е", "н", "и", "х", " ", "у", " ", "п", "о", "в", "і", "т", "р", "і", "."], "phoneme_ids": [1, 0, 14, 0, 18, 0, 33, 0, 18, 0, 27, 0, 26, 0, 12, 0, 3, 0, 31, 0, 30, 0, 14, 0, 5, 0, 44, 0, 21, 0, 12, 0, 29, 0, 12, 0, 3, 0, 21, 0, 3, 0, 21, 0, 12, 0, 27, 0, 30, 0, 28, 0, 27, 0, 18, 0, 29, 0, 29, 0, 44, 0, 28, 0, 3, 0, 23, 0, 3, 0, 14, 0, 23, 0, 17, 0, 13, 0, 22, 0, 34, 0, 34, 0, 44, 0, 28, 0, 3, 0, 6, 0, 17, 0, 18, 0, 44, 0, 26, 0, 30, 0, 43, 0, 3, 0, 28, 0, 23, 0, 32, 0, 30, 0, 43, 0, 3, 0, 23, 0, 3, 0, 21, 0, 3, 0, 17, 0, 22, 0, 36, 0, 32, 0, 12, 0, 26, 0, 38, 0, 23, 0, 19, 0, 43, 0, 6, 0, 3, 0, 33, 0, 30, 0, 29, 0, 44, 0, 39, 0, 29, 0, 30, 0, 15, 0, 30, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 12, 0, 3, 0, 35, 0, 3, 0, 14, 0, 30, 0, 17, 0, 44, 0, 29, 0, 22, 0, 37, 0, 3, 0, 26, 0, 32, 0, 12, 0, 31, 0, 27, 0, 44, 0, 37, 0, 6, 0, 3, 0, 21, 0, 14, 0, 12, 0, 20, 0, 18, 0, 29, 0, 22, 0, 37, 0, 3, 0, 35, 0, 3, 0, 31, 0, 30, 0, 14, 0, 23, 0, 34, 0, 32, 0, 23, 0, 8, 0, 2]} +{"text": "Ці крапельки по-різному відхиляють світло різних кольорів, у результаті чого біле світло розкладається на спектр.", "phonemes": ["ц", "і", " ", "к", "р", "а", "п", "е", "л", "ь", "к", "и", " ", "п", "о", "-", "р", "і", "з", "н", "о", "м", "у", " ", "в", "і", "д", "х", "и", "л", "я", "ю", "т", "ь", " ", "с", "в", "і", "т", "л", "о", " ", "р", "і", "з", "н", "и", "х", " ", "к", "о", "л", "ь", "о", "р", "і", "в", ",", " ", "у", " ", "р", "е", "з", "у", "л", "ь", "т", "а", "т", "і", " ", "ч", "о", "г", "о", " ", "б", "і", "л", "е", " ", "с", "в", "і", "т", "л", "о", " ", "р", "о", "з", "к", "л", "а", "д", "а", "є", "т", "ь", "с", "я", " ", "н", "а", " ", "с", "п", "е", "к", "т", "р", "."], "phoneme_ids": [1, 0, 38, 0, 23, 0, 3, 0, 26, 0, 32, 0, 12, 0, 31, 0, 18, 0, 27, 0, 42, 0, 26, 0, 22, 0, 3, 0, 31, 0, 30, 0, 7, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 28, 0, 35, 0, 3, 0, 14, 0, 23, 0, 17, 0, 37, 0, 22, 0, 27, 0, 44, 0, 43, 0, 34, 0, 42, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 30, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 22, 0, 37, 0, 3, 0, 26, 0, 30, 0, 27, 0, 42, 0, 30, 0, 32, 0, 23, 0, 14, 0, 6, 0, 3, 0, 35, 0, 3, 0, 32, 0, 18, 0, 21, 0, 35, 0, 27, 0, 42, 0, 34, 0, 12, 0, 34, 0, 23, 0, 3, 0, 39, 0, 30, 0, 15, 0, 30, 0, 3, 0, 13, 0, 23, 0, 27, 0, 18, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 30, 0, 3, 0, 32, 0, 30, 0, 21, 0, 26, 0, 27, 0, 12, 0, 17, 0, 12, 0, 19, 0, 34, 0, 42, 0, 33, 0, 44, 0, 3, 0, 29, 0, 12, 0, 3, 0, 33, 0, 31, 0, 18, 0, 26, 0, 34, 0, 32, 0, 8, 0, 2]} +{"text": "Спостерігач, що стоїть спиною до джерела світла, бачить різнобарвне світіння, що виходить із простору по концентричному колу ,дузі.", "phonemes": ["с", "п", "о", "с", "т", "е", "р", "і", "г", "а", "ч", ",", " ", "щ", "о", " ", "с", "т", "о", "і", "̈", "т", "ь", " ", "с", "п", "и", "н", "о", "ю", " ", "д", "о", " ", "д", "ж", "е", "р", "е", "л", "а", " ", "с", "в", "і", "т", "л", "а", ",", " ", "б", "а", "ч", "и", "т", "ь", " ", "р", "і", "з", "н", "о", "б", "а", "р", "в", "н", "е", " ", "с", "в", "і", "т", "і", "н", "н", "я", ",", " ", "щ", "о", " ", "в", "и", "х", "о", "д", "и", "т", "ь", " ", "і", "з", " ", "п", "р", "о", "с", "т", "о", "р", "у", " ", "п", "о", " ", "к", "о", "н", "ц", "е", "н", "т", "р", "и", "ч", "н", "о", "м", "у", " ", "к", "о", "л", "у", " ", ",", "д", "у", "з", "і", "."], "phoneme_ids": [1, 0, 33, 0, 31, 0, 30, 0, 33, 0, 34, 0, 18, 0, 32, 0, 23, 0, 15, 0, 12, 0, 39, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 33, 0, 34, 0, 30, 0, 23, 0, 47, 0, 34, 0, 42, 0, 3, 0, 33, 0, 31, 0, 22, 0, 29, 0, 30, 0, 43, 0, 3, 0, 17, 0, 30, 0, 3, 0, 17, 0, 20, 0, 18, 0, 32, 0, 18, 0, 27, 0, 12, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 12, 0, 6, 0, 3, 0, 13, 0, 12, 0, 39, 0, 22, 0, 34, 0, 42, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 13, 0, 12, 0, 32, 0, 14, 0, 29, 0, 18, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 23, 0, 29, 0, 29, 0, 44, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 14, 0, 22, 0, 37, 0, 30, 0, 17, 0, 22, 0, 34, 0, 42, 0, 3, 0, 23, 0, 21, 0, 3, 0, 31, 0, 32, 0, 30, 0, 33, 0, 34, 0, 30, 0, 32, 0, 35, 0, 3, 0, 31, 0, 30, 0, 3, 0, 26, 0, 30, 0, 29, 0, 38, 0, 18, 0, 29, 0, 34, 0, 32, 0, 22, 0, 39, 0, 29, 0, 30, 0, 28, 0, 35, 0, 3, 0, 26, 0, 30, 0, 27, 0, 35, 0, 3, 0, 6, 0, 17, 0, 35, 0, 21, 0, 23, 0, 8, 0, 2]} +{"text": "Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів!", "phonemes": ["ч", "у", "є", "ш", " ", "і", "̈", "х", ",", " ", "д", "о", "ц", "ю", ",", " ", "г", "а", "?", " ", "к", "у", "м", "е", "д", "н", "а", " ", "ж", " ", "т", "и", ",", " ", "п", "р", "о", "щ", "а", "и", "̆", "с", "я", " ", "б", "е", "з", " ", "ґ", "о", "л", "ь", "ф", "і", "в", "!"], "phoneme_ids": [1, 0, 39, 0, 35, 0, 19, 0, 40, 0, 3, 0, 23, 0, 47, 0, 37, 0, 6, 0, 3, 0, 17, 0, 30, 0, 38, 0, 43, 0, 6, 0, 3, 0, 15, 0, 12, 0, 11, 0, 3, 0, 26, 0, 35, 0, 28, 0, 18, 0, 17, 0, 29, 0, 12, 0, 3, 0, 20, 0, 3, 0, 34, 0, 22, 0, 6, 0, 3, 0, 31, 0, 32, 0, 30, 0, 41, 0, 12, 0, 22, 0, 46, 0, 33, 0, 44, 0, 3, 0, 13, 0, 18, 0, 21, 0, 3, 0, 16, 0, 30, 0, 27, 0, 42, 0, 36, 0, 23, 0, 14, 0, 4, 0, 2]} +{"text": "Жебракують філософи при ґанку церкви в Гадячі, ще й шатро їхнє п’яне знаємо.", "phonemes": ["ж", "е", "б", "р", "а", "к", "у", "ю", "т", "ь", " ", "ф", "і", "л", "о", "с", "о", "ф", "и", " ", "п", "р", "и", " ", "ґ", "а", "н", "к", "у", " ", "ц", "е", "р", "к", "в", "и", " ", "в", " ", "г", "а", "д", "я", "ч", "і", ",", " ", "щ", "е", " ", "и", "̆", " ", "ш", "а", "т", "р", "о", " ", "і", "̈", "х", "н", "є", " ", "п", "’", "я", "н", "е", " ", "з", "н", "а", "є", "м", "о", "."], "phoneme_ids": [1, 0, 20, 0, 18, 0, 13, 0, 32, 0, 12, 0, 26, 0, 35, 0, 43, 0, 34, 0, 42, 0, 3, 0, 36, 0, 23, 0, 27, 0, 30, 0, 33, 0, 30, 0, 36, 0, 22, 0, 3, 0, 31, 0, 32, 0, 22, 0, 3, 0, 16, 0, 12, 0, 29, 0, 26, 0, 35, 0, 3, 0, 38, 0, 18, 0, 32, 0, 26, 0, 14, 0, 22, 0, 3, 0, 14, 0, 3, 0, 15, 0, 12, 0, 17, 0, 44, 0, 39, 0, 23, 0, 6, 0, 3, 0, 41, 0, 18, 0, 3, 0, 22, 0, 46, 0, 3, 0, 40, 0, 12, 0, 34, 0, 32, 0, 30, 0, 3, 0, 23, 0, 47, 0, 37, 0, 29, 0, 19, 0, 3, 0, 31, 0, 44, 0, 29, 0, 18, 0, 3, 0, 21, 0, 29, 0, 12, 0, 19, 0, 28, 0, 30, 0, 8, 0, 2]} diff --git a/etc/test_sentences/uk.txt b/etc/test_sentences/uk.txt new file mode 100644 index 0000000..db22bca --- /dev/null +++ b/etc/test_sentences/uk.txt @@ -0,0 +1,7 @@ +Весе́лка, також ра́йдуга оптичне явище в атмосфері, що являє собою одну, дві чи декілька різнокольорових дуг ,або кіл, якщо дивитися з повітря, що спостерігаються на тлі хмари, якщо вона розташована проти Сонця. +Червоний колір ми бачимо з зовнішнього боку первинної веселки, а фіолетовий — із внутрішнього. +Веселка пов'язана з заломленням і відбиттям ,деякою мірою і з дифракцією, сонячного світла у водяних краплях, зважених у повітрі. +Ці крапельки по-різному відхиляють світло різних кольорів, у результаті чого біле світло розкладається на спектр. +Спостерігач, що стоїть спиною до джерела світла, бачить різнобарвне світіння, що виходить із простору по концентричному колу ,дузі. +Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів! +Жебракують філософи при ґанку церкви в Гадячі, ще й шатро їхнє п’яне знаємо. diff --git a/src/python/piper_train/phonemize.py b/src/python/piper_train/phonemize.py index e1f4243..760634f 100644 --- a/src/python/piper_train/phonemize.py +++ b/src/python/piper_train/phonemize.py @@ -300,18 +300,55 @@ def phonemes_to_ids( def main() -> None: parser = argparse.ArgumentParser() parser.add_argument("language") + parser.add_argument( + "--phoneme-type", + choices=list(PhonemeType), + default=PhonemeType.ESPEAK, + help="Type of phonemes to use (default: espeak)", + ) + parser.add_argument( + "--text-casing", + choices=("ignore", "lower", "upper", "casefold"), + default="ignore", + help="Casing applied to utterance text", + ) args = parser.parse_args() - phonemizer = Phonemizer(args.language) + phonemizer: Optional[Phonemizer] = None + + if args.text_casing == "lower": + casing = str.lower + elif args.text_casing == "upper": + casing = str.upper + else: + # ignore + casing = lambda s: s + + if args.phoneme_type == PhonemeType.TEXT: + # Use text directly + phoneme_id_map = ALPHABETS[args.language] + else: + # Use eSpeak + phonemizer = Phonemizer(args.language) + phoneme_id_map = DEFAULT_PHONEME_ID_MAP + phoneme_map = PHONEME_MAPS.get(args.language) + missing_phonemes: "Counter[str]" = Counter() for line in sys.stdin: line = line.strip() if not line: continue - phonemes = phonemize(line, phonemizer, phoneme_map=phoneme_map) - phoneme_ids = phonemes_to_ids(phonemes) + if args.phoneme_type == PhonemeType.TEXT: + phonemes = list(unicodedata.normalize("NFD", casing(line))) + else: + assert phonemizer is not None + phonemes = phonemize(line, phonemizer, phoneme_map=phoneme_map) + + phoneme_ids = phonemes_to_ids( + phonemes, phoneme_id_map=phoneme_id_map, missing_phonemes=missing_phonemes + ) json.dump( { "text": line, @@ -323,6 +360,11 @@ def main() -> None: ) print("") + if missing_phonemes: + print("Missing", len(missing_phonemes), "phonemes", file=sys.stderr) + for phoneme, count in missing_phonemes.most_common(): + print(phoneme, count, file=sys.stderr) + if __name__ == "__main__": main() diff --git a/src/python/piper_train/preprocess.py b/src/python/piper_train/preprocess.py index b237437..d9a0331 100644 --- a/src/python/piper_train/preprocess.py +++ b/src/python/piper_train/preprocess.py @@ -303,12 +303,11 @@ def phonemize_batch_text( try: _LOGGER.debug(utt) utt.phonemes = list(unicodedata.normalize("NFD", casing(utt.text))) - utt.phoneme_ids = [] - for phoneme in utt.phonemes: - if phoneme in alphabet: - utt.phoneme_ids.extend(alphabet[phoneme]) - else: - utt.missing_phonemes[phoneme] += 1 + utt.phoneme_ids = phonemes_to_ids( + utt.phonemes, + phoneme_id_map=alphabet, + missing_phonemes=utt.missing_phonemes, + ) if not args.skip_audio: utt.audio_norm_path, utt.audio_spec_path = cache_norm_audio( utt.audio_path,