From f83a6834bdb8147baf0b9e1a9a7e1d63158c4b24 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sun, 25 Jun 2023 15:19:40 -0500 Subject: [PATCH 1/4] Spanish test sentences --- etc/test_sentences/test_es-419.jsonl | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 etc/test_sentences/test_es-419.jsonl diff --git a/etc/test_sentences/test_es-419.jsonl b/etc/test_sentences/test_es-419.jsonl new file mode 100644 index 0000000..0c201b1 --- /dev/null +++ b/etc/test_sentences/test_es-419.jsonl @@ -0,0 +1,6 @@ +{"phoneme_ids":[1,0,120,0,33,0,26,0,3,0,121,0,14,0,92,0,23,0,27,0,120,0,21,0,92,0,21,0,31,0,3,0,27,0,3,0,120,0,14,0,92,0,23,0,27,0,3,0,120,0,21,0,92,0,21,0,31,0,3,0,120,0,18,0,31,0,3,0,120,0,33,0,26,0,3,0,19,0,18,0,26,0,120,0,27,0,25,0,18,0,26,0,27,0,3,0,120,0,27,0,28,0,122,0,32,0,21,0,23,0,27,0,3,0,21,0,3,0,25,0,121,0,18,0,32,0,18,0,121,0,27,0,92,0,27,0,24,0,120,0,27,0,36,0,21,0,23,0,27,0,3,0,23,0,18,0,3,0,23,0,27,0,26,0,31,0,120,0,21,0,31,0,32,0,18,0,3,0,18,0,26,0,3,0,24,0,14,0,3,0,121,0,14,0,28,0,14,0,92,0,21,0,31,0,22,0,120,0,27,0,26,0,3,0,18,0,26,0,3,0,18,0,24,0,3,0,31,0,22,0,120,0,18,0,24,0,27,0,3,0,41,0,18,0,3,0,120,0,33,0,26,0,3,0,120,0,14,0,92,0,23,0,27,0,3,0,41,0,18,0,3,0,24,0,120,0,33,0,31,0,3,0,25,0,121,0,33,0,24,0,32,0,21,0,23,0,27,0,24,0,120,0,27,0,92,0,8,0,121,0,27,0,92,0,21,0,36,0,21,0,26,0,120,0,14,0,41,0,27,0,3,0,28,0,27,0,92,0,3,0,24,0,14,0,3,0,41,0,121,0,18,0,31,0,23,0,27,0,25,0,28,0,121,0,27,0,31,0,21,0,31,0,22,0,120,0,27,0,26,0,3,0,17,0,18,0,3,0,24,0,14,0,3,0,24,0,120,0,33,0,31,0,3,0,31,0,27,0,24,0,120,0,14,0,92,0,3,0,18,0,26,0,3,0,18,0,24,0,3,0,18,0,31,0,28,0,120,0,18,0,23,0,32,0,92,0,27,0,3,0,125,0,21,0,31,0,120,0,21,0,125,0,24,0,18,0,8,0,24,0,14,0,3,0,23,0,35,0,120,0,14,0,24,0,3,0,31,0,18,0,3,0,28,0,92,0,27,0,41,0,120,0,33,0,31,0,18,0,3,0,28,0,27,0,92,0,3,0,30,0,121,0,18,0,19,0,92,0,14,0,23,0,31,0,22,0,120,0,27,0,26,0,8,0,23,0,35,0,121,0,14,0,26,0,17,0,27,0,3,0,24,0,27,0,31,0,3,0,30,0,120,0,14,0,115,0,27,0,31,0,3,0,41,0,18,0,24,0,3,0,31,0,120,0,27,0,24,0,3,0,121,0,14,0,32,0,92,0,14,0,125,0,22,0,120,0,18,0,31,0,14,0,25,0,3,0,28,0,18,0,23,0,120,0,18,0,82,0,14,0,31,0,3,0,68,0,120,0,27,0,32,0,14,0,31,0,3,0,41,0,18,0,3,0,120,0,14,0,68,0,35,0,14,0,3,0,23,0,121,0,27,0,26,0,32,0,18,0,26,0,120,0,21,0,41,0,14,0,31,0,3,0,18,0,26,0,3,0,24,0,14,0,3,0,14,0,32,0,25,0,120,0,27,0,31,0,19,0,18,0,92,0,14,0,3,0,32,0,18,0,30,0,120,0,18,0,31,0,32,0,92,0,18,0,10,0,2],"phonemes":["ˈ","u","n"," ","ˌ","a","ɾ","k","o","ˈ","i","ɾ","i","s"," ","o"," ","ˈ","a","ɾ","k","o"," ","ˈ","i","ɾ","i","s"," ","ˈ","e","s"," ","ˈ","u","n"," ","f","e","n","ˈ","o","m","e","n","o"," ","ˈ","o","p","ː","t","i","k","o"," ","i"," ","m","ˌ","e","t","e","ˌ","o","ɾ","o","l","ˈ","o","x","i","k","o"," ","k","e"," ","k","o","n","s","ˈ","i","s","t","e"," ","e","n"," ","l","a"," ","ˌ","a","p","a","ɾ","i","s","j","ˈ","o","n"," ","e","n"," ","e","l"," ","s","j","ˈ","e","l","o"," ","ð","e"," ","ˈ","u","n"," ","ˈ","a","ɾ","k","o"," ","ð","e"," ","l","ˈ","u","s"," ","m","ˌ","u","l","t","i","k","o","l","ˈ","o","ɾ",",","ˌ","o","ɾ","i","x","i","n","ˈ","a","ð","o"," ","p","o","ɾ"," ","l","a"," ","ð","ˌ","e","s","k","o","m","p","ˌ","o","s","i","s","j","ˈ","o","n"," ","d","e"," ","l","a"," ","l","ˈ","u","s"," ","s","o","l","ˈ","a","ɾ"," ","e","n"," ","e","l"," ","e","s","p","ˈ","e","k","t","ɾ","o"," ","β","i","s","ˈ","i","β","l","e",",","l","a"," ","k","w","ˈ","a","l"," ","s","e"," ","p","ɾ","o","ð","ˈ","u","s","e"," ","p","o","ɾ"," ","r","ˌ","e","f","ɾ","a","k","s","j","ˈ","o","n",",","k","w","ˌ","a","n","d","o"," ","l","o","s"," ","r","ˈ","a","ʝ","o","s"," ","ð","e","l"," ","s","ˈ","o","l"," ","ˌ","a","t","ɾ","a","β","j","ˈ","e","s","a","m"," ","p","e","k","ˈ","e","ɲ","a","s"," ","ɣ","ˈ","o","t","a","s"," ","ð","e"," ","ˈ","a","ɣ","w","a"," ","k","ˌ","o","n","t","e","n","ˈ","i","ð","a","s"," ","e","n"," ","l","a"," ","a","t","m","ˈ","o","s","f","e","ɾ","a"," ","t","e","r","ˈ","e","s","t","ɾ","e","."],"processed_text":"Un arcoíris​ o arco iris es un fenómeno óptico y meteorológico que consiste en la aparición en el cielo de un arco de luz multicolor, originado por la descomposición de la luz solar en el espectro visible, la cual se produce por refracción, cuando los rayos del sol atraviesan pequeñas gotas de agua contenidas en la atmósfera terrestre.","text":"Un arcoíris​ o arco iris es un fenómeno óptico y meteorológico que consiste en la aparición en el cielo de un arco de luz multicolor, originado por la descomposición de la luz solar en el espectro visible, la cual se produce por refracción, cuando los rayos del sol atraviesan pequeñas gotas de agua contenidas en la atmósfera terrestre."} +{"phoneme_ids":[1,0,120,0,18,0,31,0,3,0,120,0,33,0,26,0,3,0,120,0,14,0,92,0,23,0,27,0,3,0,23,0,27,0,25,0,28,0,35,0,120,0,18,0,31,0,32,0,27,0,3,0,41,0,18,0,3,0,120,0,14,0,92,0,23,0,27,0,31,0,3,0,23,0,27,0,26,0,31,0,120,0,18,0,26,0,32,0,92,0,21,0,23,0,27,0,31,0,3,0,41,0,18,0,3,0,23,0,27,0,24,0,120,0,27,0,92,0,18,0,31,0,8,0,31,0,21,0,26,0,3,0,31,0,121,0,27,0,24,0,33,0,31,0,22,0,120,0,27,0,26,0,3,0,17,0,18,0,3,0,23,0,121,0,27,0,26,0,32,0,21,0,26,0,35,0,21,0,41,0,120,0,14,0,41,0,3,0,121,0,18,0,26,0,32,0,92,0,18,0,3,0,120,0,18,0,22,0,22,0,27,0,31,0,8,0,23,0,27,0,26,0,3,0,18,0,24,0,3,0,30,0,120,0,27,0,36,0,27,0,3,0,121,0,14,0,31,0,22,0,14,0,3,0,24,0,14,0,3,0,28,0,120,0,14,0,92,0,32,0,18,0,3,0,121,0,18,0,23,0,31,0,32,0,18,0,92,0,22,0,120,0,27,0,92,0,3,0,21,0,3,0,18,0,24,0,3,0,125,0,22,0,27,0,24,0,120,0,18,0,32,0,14,0,3,0,121,0,14,0,31,0,22,0,14,0,3,0,18,0,24,0,3,0,121,0,21,0,26,0,32,0,18,0,92,0,22,0,120,0,27,0,92,0,10,0,2],"phonemes":["ˈ","e","s"," ","ˈ","u","n"," ","ˈ","a","ɾ","k","o"," ","k","o","m","p","w","ˈ","e","s","t","o"," ","ð","e"," ","ˈ","a","ɾ","k","o","s"," ","k","o","n","s","ˈ","e","n","t","ɾ","i","k","o","s"," ","ð","e"," ","k","o","l","ˈ","o","ɾ","e","s",",","s","i","n"," ","s","ˌ","o","l","u","s","j","ˈ","o","n"," ","d","e"," ","k","ˌ","o","n","t","i","n","w","i","ð","ˈ","a","ð"," ","ˌ","e","n","t","ɾ","e"," ","ˈ","e","j","j","o","s",",","k","o","n"," ","e","l"," ","r","ˈ","o","x","o"," ","ˌ","a","s","j","a"," ","l","a"," ","p","ˈ","a","ɾ","t","e"," ","ˌ","e","k","s","t","e","ɾ","j","ˈ","o","ɾ"," ","i"," ","e","l"," ","β","j","o","l","ˈ","e","t","a"," ","ˌ","a","s","j","a"," ","e","l"," ","ˌ","i","n","t","e","ɾ","j","ˈ","o","ɾ","."],"processed_text":"Es un arco compuesto de arcos concéntricos de colores, sin solución de continuidad entre ellos, con el rojo hacia la parte exterior y el violeta hacia el interior.","text":"Es un arco compuesto de arcos concéntricos de colores, sin solución de continuidad entre ellos, con el rojo hacia la parte exterior y el violeta hacia el interior."} +{"phoneme_ids":[1,0,14,0,3,0,121,0,14,0,24,0,32,0,21,0,32,0,120,0,33,0,17,0,3,0,31,0,121,0,33,0,19,0,21,0,31,0,22,0,120,0,61,0,26,0,32,0,18,0,8,0,28,0,27,0,92,0,3,0,18,0,36,0,120,0,18,0,25,0,28,0,24,0,27,0,3,0,23,0,35,0,121,0,14,0,26,0,17,0,27,0,3,0,31,0,18,0,3,0,125,0,22,0,120,0,14,0,36,0,14,0,3,0,18,0,26,0,3,0,14,0,125,0,22,0,120,0,27,0,26,0,8,0,18,0,24,0,3,0,121,0,14,0,92,0,23,0,27,0,120,0,21,0,92,0,21,0,31,0,3,0,31,0,18,0,3,0,28,0,35,0,120,0,18,0,41,0,18,0,3,0,121,0,27,0,125,0,31,0,18,0,92,0,125,0,120,0,14,0,92,0,3,0,18,0,26,0,3,0,19,0,120,0,27,0,92,0,25,0,14,0,3,0,41,0,18,0,3,0,31,0,120,0,21,0,92,0,23,0,33,0,24,0,27,0,3,0,23,0,27,0,25,0,28,0,24,0,120,0,18,0,32,0,27,0,10,0,2],"phonemes":["a"," ","ˌ","a","l","t","i","t","ˈ","u","d"," ","s","ˌ","u","f","i","s","j","ˈ","ɛ","n","t","e",",","p","o","ɾ"," ","e","x","ˈ","e","m","p","l","o"," ","k","w","ˌ","a","n","d","o"," ","s","e"," ","β","j","ˈ","a","x","a"," ","e","n"," ","a","β","j","ˈ","o","n",",","e","l"," ","ˌ","a","ɾ","k","o","ˈ","i","ɾ","i","s"," ","s","e"," ","p","w","ˈ","e","ð","e"," ","ˌ","o","β","s","e","ɾ","β","ˈ","a","ɾ"," ","e","n"," ","f","ˈ","o","ɾ","m","a"," ","ð","e"," ","s","ˈ","i","ɾ","k","u","l","o"," ","k","o","m","p","l","ˈ","e","t","o","."],"processed_text":"A altitud suficiente, por ejemplo cuando se viaja en avión, el arcoíris se puede observar en forma de círculo completo.","text":"A altitud suficiente, por ejemplo cuando se viaja en avión, el arcoíris se puede observar en forma de círculo completo."} +{"phoneme_ids":[1,0,15,0,121,0,18,0,44,0,36,0,14,0,25,0,120,0,21,0,25,0,3,0,28,0,21,0,41,0,22,0,120,0,27,0,3,0,120,0,33,0,26,0,14,0,3,0,125,0,18,0,125,0,120,0,21,0,41,0,14,0,3,0,41,0,18,0,3,0,23,0,120,0,21,0,35,0,21,0,3,0,21,0,3,0,19,0,92,0,120,0,18,0,31,0,14,0,12,0,26,0,27,0,120,0,18,0,8,0,31,0,21,0,25,0,3,0,15,0,121,0,18,0,92,0,68,0,33,0,120,0,61,0,26,0,31,0,14,0,8,0,24,0,14,0,3,0,25,0,120,0,14,0,31,0,3,0,121,0,18,0,23,0,31,0,23,0,21,0,31,0,120,0,21,0,32,0,14,0,3,0,32,0,96,0,14,0,25,0,28,0,120,0,14,0,82,0,14,0,3,0,41,0,18,0,24,0,3,0,25,0,18,0,26,0,120,0,33,0,10,0,2],"phonemes":["b","ˌ","e","ŋ","x","a","m","ˈ","i","m"," ","p","i","ð","j","ˈ","o"," ","ˈ","u","n","a"," ","β","e","β","ˈ","i","ð","a"," ","ð","e"," ","k","ˈ","i","w","i"," ","i"," ","f","ɾ","ˈ","e","s","a",";","n","o","ˈ","e",",","s","i","m"," ","b","ˌ","e","ɾ","ɣ","u","ˈ","ɛ","n","s","a",",","l","a"," ","m","ˈ","a","s"," ","ˌ","e","k","s","k","i","s","ˈ","i","t","a"," ","t","ʃ","a","m","p","ˈ","a","ɲ","a"," ","ð","e","l"," ","m","e","n","ˈ","u","."],"processed_text":"Benjamín pidió una bebida de kiwi y fresa; Noé, sin vergüenza, la más exquisita champaña del menú.","text":"Benjamín pidió una bebida de kiwi y fresa; Noé, sin vergüenza, la más exquisita champaña del menú."} +{"phoneme_ids":[1,0,36,0,27,0,31,0,120,0,18,0,3,0,23,0,27,0,25,0,28,0,92,0,120,0,27,0,3,0,120,0,33,0,26,0,14,0,3,0,125,0,22,0,120,0,18,0,36,0,14,0,3,0,31,0,14,0,25,0,28,0,120,0,27,0,82,0,14,0,3,0,18,0,25,0,3,0,28,0,18,0,92,0,120,0,33,0,10,0,2,1,0,121,0,18,0,23,0,31,0,23,0,33,0,31,0,120,0,14,0,26,0,17,0,27,0,31,0,18,0,8,0,31,0,27,0,19,0,120,0,21,0,14,0,3,0,32,0,21,0,92,0,120,0,27,0,3,0,31,0,33,0,3,0,35,0,120,0,21,0,31,0,23,0,21,0,3,0,14,0,24,0,3,0,41,0,121,0,18,0,31,0,14,0,68,0,120,0,33,0,18,0,3,0,41,0,18,0,3,0,24,0,14,0,3,0,125,0,14,0,26,0,23,0,120,0,18,0,32,0,14,0,10,0,2],"phonemes":["x","o","s","ˈ","e"," ","k","o","m","p","ɾ","ˈ","o"," ","ˈ","u","n","a"," ","β","j","ˈ","e","x","a"," ","s","a","m","p","ˈ","o","ɲ","a"," ","e","m"," ","p","e","ɾ","ˈ","u",".","ˌ","e","k","s","k","u","s","ˈ","a","n","d","o","s","e",",","s","o","f","ˈ","i","a"," ","t","i","ɾ","ˈ","o"," ","s","u"," ","w","ˈ","i","s","k","i"," ","a","l"," ","ð","ˌ","e","s","a","ɣ","ˈ","u","e"," ","ð","e"," ","l","a"," ","β","a","n","k","ˈ","e","t","a","."],"processed_text":"José compró una vieja zampoña en Perú. Excusándose, Sofía tiró su whisky al desagüe de la banqueta.","text":"José compró una vieja zampoña en Perú. Excusándose, Sofía tiró su whisky al desagüe de la banqueta."} +{"phoneme_ids":[1,0,18,0,24,0,3,0,125,0,18,0,24,0,120,0,27,0,31,0,3,0,25,0,33,0,92,0,31,0,22,0,120,0,18,0,24,0,14,0,68,0,27,0,3,0,21,0,26,0,17,0,120,0,33,0,3,0,23,0,27,0,25,0,120,0,21,0,14,0,3,0,19,0,18,0,24,0,120,0,21,0,31,0,3,0,23,0,14,0,92,0,41,0,120,0,21,0,22,0,22,0,27,0,3,0,21,0,3,0,23,0,120,0,21,0,35,0,21,0,10,0,2,1,0,24,0,14,0,3,0,31,0,121,0,21,0,68,0,33,0,120,0,18,0,82,0,14,0,3,0,32,0,27,0,23,0,120,0,14,0,125,0,14,0,3,0,18,0,24,0,3,0,31,0,121,0,14,0,23,0,31,0,27,0,19,0,120,0,27,0,26,0,3,0,17,0,18,0,32,0,92,0,120,0,14,0,31,0,3,0,41,0,18,0,24,0,3,0,28,0,14,0,24,0,120,0,61,0,26,0,23,0,18,0,3,0,41,0,18,0,3,0,28,0,120,0,14,0,36,0,14,0,10,0,2],"phonemes":["e","l"," ","β","e","l","ˈ","o","s"," ","m","u","ɾ","s","j","ˈ","e","l","a","ɣ","o"," ","i","n","d","ˈ","u"," ","k","o","m","ˈ","i","a"," ","f","e","l","ˈ","i","s"," ","k","a","ɾ","ð","ˈ","i","j","j","o"," ","i"," ","k","ˈ","i","w","i",".","l","a"," ","s","ˌ","i","ɣ","u","ˈ","e","ɲ","a"," ","t","o","k","ˈ","a","β","a"," ","e","l"," ","s","ˌ","a","k","s","o","f","ˈ","o","n"," ","d","e","t","ɾ","ˈ","a","s"," ","ð","e","l"," ","p","a","l","ˈ","ɛ","n","k","e"," ","ð","e"," ","p","ˈ","a","x","a","."],"processed_text":"El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.","text":"El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja."} From de7666110679d5a4bbc308c8e68d38a3b4ec7334 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sun, 25 Jun 2023 15:19:47 -0500 Subject: [PATCH 2/4] More error logging for phoneme id map --- src/cpp/piper.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index ede7bbb..4af46e8 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -67,6 +67,14 @@ void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) { for (auto &fromPhonemeItem : phonemeIdMapValue.items()) { std::string fromPhoneme = fromPhonemeItem.key(); if (!isSingleCodepoint(fromPhoneme)) { + std::stringstream idsStr; + for (auto &toIdValue : fromPhonemeItem.value()) { + PhonemeId toId = toIdValue.get(); + idsStr << toId << ","; + } + + spdlog::error("\"{}\" is not a single codepoint (ids={})", fromPhoneme, + idsStr.str()); throw std::runtime_error( "Phonemes must be one codepoint (phoneme id map)"); } @@ -90,6 +98,7 @@ void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) { for (auto &fromPhonemeItem : phonemeMapValue.items()) { std::string fromPhoneme = fromPhonemeItem.key(); if (!isSingleCodepoint(fromPhoneme)) { + spdlog::error("\"{}\" is not a single codepoint", fromPhoneme); throw std::runtime_error( "Phonemes must be one codepoint (phoneme map)"); } From 9f7702e3cd9abb87752cd86aa35863a8bd021273 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sun, 25 Jun 2023 15:19:57 -0500 Subject: [PATCH 3/4] Add piper_version to config.json --- src/python/piper_train/VERSION | 2 +- src/python/piper_train/preprocess.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/python/piper_train/VERSION b/src/python/piper_train/VERSION index 6e8bf73..3eefcb9 100644 --- a/src/python/piper_train/VERSION +++ b/src/python/piper_train/VERSION @@ -1 +1 @@ -0.1.0 +1.0.0 diff --git a/src/python/piper_train/preprocess.py b/src/python/piper_train/preprocess.py index 045deda..205563e 100644 --- a/src/python/piper_train/preprocess.py +++ b/src/python/piper_train/preprocess.py @@ -26,6 +26,8 @@ from .phonemize import ( phonemize, ) +_DIR = Path(__file__).parent +_VERSION = (_DIR / "VERSION").read_text(encoding="utf-8").strip() _LOGGER = logging.getLogger("preprocess") @@ -151,6 +153,7 @@ def main() -> None: "num_symbols": MAX_PHONEMES, "num_speakers": len(speaker_counts), "speaker_id_map": speaker_ids, + "piper_version": _VERSION, }, config_file, ensure_ascii=False, From a0a051a57fca1839f40c9c7475db889999d702ba Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 26 Jun 2023 14:31:46 -0500 Subject: [PATCH 4/4] Use phoneme/id map from config --- src/cpp/piper.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index 4af46e8..2be0019 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -433,19 +433,10 @@ void textToAudio(PiperConfig &config, Voice &voice, std::string text, SynthesisResult sentenceResult; + // Use phoneme/id map from config PhonemeIdConfig idConfig; - if (voice.phonemizeConfig.phonemeType == TextPhonemes) { - auto &language = voice.phonemizeConfig.eSpeak.voice; - spdlog::debug("Text phoneme language: {}", language); - if (DEFAULT_ALPHABET.count(language) < 1) { - throw std::runtime_error( - "Text phoneme language for voice is not supported"); - } - - // Use alphabet for language - idConfig.phonemeIdMap = - std::make_shared(DEFAULT_ALPHABET[language]); - } + idConfig.phonemeIdMap = + std::make_shared(voice.phonemizeConfig.phonemeIdMap); // phonemes -> ids phonemes_to_ids(sentencePhonemes, idConfig, phonemeIds, missingPhonemes);