diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..b1e1743 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,5 @@ +* +!Makefile +!src/cpp/ +!local/en-us/lessac/low/en-us-lessac-low.onnx +!local/en-us/lessac/low/en-us-lessac-low.onnx.json diff --git a/Dockerfile b/Dockerfile index 2fa146d..1ad90b0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,49 +13,68 @@ ENV DEBIAN_FRONTEND=noninteractive WORKDIR /build -# Build minimal version of espeak-ng -ADD lib/espeak-ng-1.52-patched.tar.gz ./ -RUN cd espeak-ng && \ - ./autogen.sh && \ - ./configure \ - --without-pcaudiolib \ - --without-klatt \ - --without-speechplayer \ - --without-mbrola \ - --without-sonic \ - --with-extdict-cmn \ - --prefix=/usr && \ - make -j8 src/espeak-ng src/speak-ng && \ - make && \ - make install +ARG SPDLOG_VERSION="1.11.0" +RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz" | \ + tar -xzvf - && \ + mkdir -p "spdlog-${SPDLOG_VERSION}/build" && \ + cd "spdlog-${SPDLOG_VERSION}/build" && \ + cmake .. && \ + make -j8 && \ + cmake --install . --prefix /usr -# Copy onnxruntime library -COPY lib/onnxruntime-linux-*.tgz ./lib/ -RUN export ONNX_DIR="./lib/Linux-$(uname -m)" && \ - mkdir -p "${ONNX_DIR}" && \ - tar -C "${ONNX_DIR}" \ - --strip-components 1 \ - -xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz" +RUN mkdir -p "lib/Linux-$(uname -m)" + +ARG ONNXRUNTIME_VERSION='1.14.1' +RUN if [ "${TARGETARCH}${TARGETVARIANT}" = 'amd64' ]; then \ + ONNXRUNTIME_ARCH='x64'; \ + else \ + ONNXRUNTIME_ARCH="$(uname -m)"; \ + fi && \ + curl -L "https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-${ONNXRUNTIME_ARCH}-${ONNXRUNTIME_VERSION}.tgz" | \ + tar -C "lib/Linux-$(uname -m)" -xzvf - && \ + mv "lib/Linux-$(uname -m)"/onnxruntime-* \ + "lib/Linux-$(uname -m)/onnxruntime" + +ARG PIPER_PHONEMIZE_VERSION='1.0.0' +RUN mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ + curl -L "https://github.com/rhasspy/piper-phonemize/releases/download/v${PIPER_PHONEMIZE_VERSION}/libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz" | \ + tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - # Build piper binary COPY Makefile ./ COPY src/cpp/ ./src/cpp/ -RUN make no-pcaudio +RUN make # Do a test run -RUN /build/build/piper --help +RUN ./build/piper --help # Build .tar.gz to keep symlinks WORKDIR /dist RUN mkdir -p piper && \ - cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \ - cp -dR /usr/share/espeak-ng-data ./piper/ && \ - find /build/lib/ -name 'libonnxruntime.so.*' -exec cp -d {} ./piper/ \; && \ - cp /build/build/piper ./piper/ && \ + cp -dR /build/build/*.so* /build/build/espeak-ng-data /build/build/libtashkeel_model.ort /build/build/piper ./piper/ && \ tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/ # ----------------------------------------------------------------------------- +FROM debian:buster as test +ARG TARGETARCH +ARG TARGETVARIANT + +WORKDIR /test + +COPY local/en-us/lessac/low/en-us-lessac-low.onnx \ + local/en-us/lessac/low/en-us-lessac-low.onnx.json ./ + +# Run Piper on a test sentence and verify that the WAV file isn't empty +COPY --from=build /dist/piper_*.tar.gz ./ +RUN tar -xzf piper*.tar.gz +RUN echo 'This is a test.' | ./piper/piper -m en-us-lessac-low.onnx -f test.wav +RUN if [ ! -f test.wav ]; then exit 1; fi +RUN size="$(wc -c < test.wav)"; \ + if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi + +# ----------------------------------------------------------------------------- + FROM scratch -COPY --from=build /dist/piper_*.tar.gz ./ +COPY --from=test /test/piper_*.tar.gz /test/test.wav ./ diff --git a/Dockerfile.dockerignore b/Dockerfile.dockerignore deleted file mode 100644 index f3d5121..0000000 --- a/Dockerfile.dockerignore +++ /dev/null @@ -1,5 +0,0 @@ -* -!Makefile -!src/cpp/ -!lib/onnxruntime*.tgz -!lib/espeak-ng*.tar.gz diff --git a/Dockerfile.test b/Dockerfile.test deleted file mode 100644 index bf31ebe..0000000 --- a/Dockerfile.test +++ /dev/null @@ -1,13 +0,0 @@ -FROM debian:buster -ARG TARGETARCH -ARG TARGETVARIANT - -COPY local/en-us/ljspeech/low/en-us-ljspeech-low.onnx \ - local/en-us/ljspeech/low/en-us-ljspeech-low.onnx.json ./ - -ADD dist/linux_${TARGETARCH}${TARGETVARIANT}/piper_${TARGETARCH}${TARGETVARIANT}.tar.gz ./ - -RUN cd piper/ && echo 'This is a test.' | ./piper -m ../en-us-ljspeech-low.onnx -f test.wav -RUN if [ ! -f piper/test.wav ]; then exit 1; fi -RUN size="$(wc -c < piper/test.wav)"; \ - if [ "${size}" -lt "1000" ]; then echo "File size is ${size} bytes"; exit 1; fi diff --git a/Dockerfile.test.dockerignore b/Dockerfile.test.dockerignore deleted file mode 100644 index 5b4b7e0..0000000 --- a/Dockerfile.test.dockerignore +++ /dev/null @@ -1,3 +0,0 @@ -* -!dist/ -!local/en-us/ljspeech/low/ diff --git a/Makefile b/Makefile index 20d5d7a..a03007c 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,11 @@ -.PHONY: release debug clean test +.PHONY: piper clean test -release: +LIB_DIR := lib/Linux-$(shell uname -m) + +piper: mkdir -p build cd build && cmake ../src/cpp -DCMAKE_BUILD_TYPE=Release && make - -no-pcaudio: - mkdir -p build - cd build && cmake ../src/cpp -DCMAKE_BUILD_TYPE=Release -DUSE_PCAUDIO=OFF && make - -debug: - mkdir -p build - cd build && cmake ../src/cpp -DCMAKE_BUILD_TYPE=Debug && make + cp -aR $(LIB_DIR)/piper_phonemize/lib/espeak-ng-data $(LIB_DIR)/piper_phonemize/lib/*.so* $(LIB_DIR)/piper_phonemize/etc/* build/ clean: rm -rf build/ dist/ diff --git a/README.md b/README.md index 5460e97..96b894e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ ![Piper logo](etc/logo.png) A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4. +Piper is used in a [variety of projects](#people-using-piper). ``` sh echo 'Welcome to the world of speech synthesis!' | \ @@ -9,6 +10,8 @@ echo 'Welcome to the world of speech synthesis!' | \ [Listen to voice samples](https://rhasspy.github.io/piper-samples) and check out a [video tutorial by Thorsten Müller](https://youtu.be/rjq5eZoWWSo) +[![Sponsored by Nabu Casa](etc/nabu_casa_sponsored.png)](https://nabucasa.com) + Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and exported to the [onnxruntime](https://onnxruntime.ai/). ## Voices @@ -28,6 +31,7 @@ Supported languages: * Finnish (fi) * French (fr) * Greek (el-gr) +* Icelandic (is) * Italian (it) * Kazakh (kk) * Nepali (ne) @@ -35,6 +39,8 @@ Supported languages: * Norwegian (no) * Polish (pl) * Brazilian Portuguese (pt-br) +* Russian (ru) +* Swedish (sv-se) * Ukrainian (uk) * Vietnamese (vi) * Chinese (zh-cn) @@ -62,7 +68,7 @@ For example: ``` sh echo 'Welcome to the world of speech synthesis!' | \ - ./piper --model blizzard_lessac-medium.onnx --output_file welcome.wav + ./piper --model en-us-lessac-medium.onnx --output_file welcome.wav ``` For multi-speaker models, use `--speaker ` to change speakers (default: 0). @@ -70,11 +76,28 @@ For multi-speaker models, use `--speaker ` to change speakers (default: See `piper --help` for more options. +## People using Piper + +Piper has been used in the following projects/papers: + +* [Home Assistant](https://github.com/home-assistant/addons/blob/master/piper/README.md) +* [Rhasspy 3](https://github.com/rhasspy/rhasspy3/) +* [NVDA - NonVisual Desktop Access](https://www.nvaccess.org/post/in-process-8th-may-2023/#voices) +* [Image Captioning for the Visually Impaired and Blind: A Recipe for Low-Resource Languages](https://www.techrxiv.org/articles/preprint/Image_Captioning_for_the_Visually_Impaired_and_Blind_A_Recipe_for_Low-Resource_Languages/22133894) +* [Open Voice Operating System](https://github.com/OpenVoiceOS/ovos-tts-plugin-piper) + + ## Training See [src/python](src/python) -Start by creating a virtual environment: +Start by installing system dependencies: + +``` sh +sudo apt-get install python3-dev +``` + +Then create a virtual environment: ``` sh cd piper/src/python diff --git a/VERSION b/VERSION index 6e8bf73..3eefcb9 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.1.0 +1.0.0 diff --git a/etc/nabu_casa_sponsored.png b/etc/nabu_casa_sponsored.png new file mode 100644 index 0000000..f5831cf Binary files /dev/null and b/etc/nabu_casa_sponsored.png differ diff --git a/etc/nabu_casa_sponsored.svg b/etc/nabu_casa_sponsored.svg new file mode 100644 index 0000000..151e242 --- /dev/null +++ b/etc/nabu_casa_sponsored.svg @@ -0,0 +1,215 @@ + + + + + + image/svg+xml + + logo-two-column + + + + + + + + + + + + + + logo-two-column + + + + + + + + + + + + + + + + + + + + + + Sponsored by + + diff --git a/etc/test_sentences/de.txt b/etc/test_sentences/de.txt new file mode 100644 index 0000000..992fc8b --- /dev/null +++ b/etc/test_sentences/de.txt @@ -0,0 +1,10 @@ +Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder -wolke wahrgenommen wird. +Sein radialer Farbverlauf ist das mehr oder weniger verweißlichte sichtbare Licht des Sonnenspektrums. +Das Sonnenlicht wird beim Ein- und beim Austritt an jedem annähernd kugelförmigen Regentropfen abgelenkt und in Licht mehrerer Farben zerlegt. +Dazwischen wird es an der Tropfenrückseite reflektiert. +Das jeden Tropfen verlassende Licht ist in farbigen Schichten konzentriert, die aufeinandergesteckte dünne Kegelmäntel bilden. +Der Beobachter hat die Regenwolke vor sich und die Sonne im Rücken. +Ihn erreicht Licht einer bestimmten Farbe aus Regentropfen, die sich auf einem schmalen Kreisbogen am Himmel befinden. +Der Winkel, unter dem der Regenbogen gesehen wird, ist gleich wie der Winkel der Kegelmäntel, in dem diese Farben beim Austritt am Regentropfen konzentriert sind. +Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich. +Falsches Üben von Xylophonmusik quält jeden größeren Zwerg. diff --git a/etc/test_sentences/en.txt b/etc/test_sentences/en.txt new file mode 100644 index 0000000..3af1c46 --- /dev/null +++ b/etc/test_sentences/en.txt @@ -0,0 +1,7 @@ +A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky. +It takes the form of a multi-colored circular arc. +Rainbows caused by sunlight always appear in the section of sky directly opposite the Sun. +With tenure, Suzie’d have all the more leisure for yachting, but her publications are no good. +Shaw, those twelve beige hooks are joined if I patch a young, gooey mouth. +Are those shy Eurasian footwear, cowboy chaps, or jolly earthmoving headgear? +The beige hue on the waters of the loch impressed all, including the French queen, before she heard that symphony again, just as young Arthur wanted. diff --git a/etc/test_sentences/es.txt b/etc/test_sentences/es.txt new file mode 100644 index 0000000..880930e --- /dev/null +++ b/etc/test_sentences/es.txt @@ -0,0 +1,6 @@ +Un arcoíris​ o arco iris es un fenómeno óptico y meteorológico que consiste en la aparición en el cielo de un arco de luz multicolor, originado por la descomposición de la luz solar en el espectro visible, la cual se produce por refracción, cuando los rayos del sol atraviesan pequeñas gotas de agua contenidas en la atmósfera terrestre. +Es un arco compuesto de arcos concéntricos de colores, sin solución de continuidad entre ellos, con el rojo hacia la parte exterior y el violeta hacia el interior. +A altitud suficiente, por ejemplo cuando se viaja en avión, el arcoíris se puede observar en forma de círculo completo. +Benjamín pidió una bebida de kiwi y fresa; Noé, sin vergüenza, la más exquisita champaña del menú. +José compró una vieja zampoña en Perú. Excusándose, Sofía tiró su whisky al desagüe de la banqueta. +El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja. diff --git a/etc/test_sentences/eu.txt b/etc/test_sentences/eu.txt new file mode 100644 index 0000000..8f5fc16 --- /dev/null +++ b/etc/test_sentences/eu.txt @@ -0,0 +1,4 @@ +Ostadarra, halaber Erromako zubia edo uztargia, gertaera optiko eta meteorologiko bat da, zeruan, jarraikako argi zerrenda bat eragiten duena, eguzkiaren izpiek Lurreko atmosferan aurkitzen diren hezetasun tanta txikiak zeharkatzen dituztenean. +Forma, arku kolore anitz batena da, gorria kanpoalderantz duena eta morea barnealderantz. +Ez da hain ohikoa ostadar bikoitza, bigarren arku bat duena, ilunagoa, koloreen ordena alderantziz duena, hau da, gorria barnealderantz eta morea kanpoalderantz. +Ostadarrak, jarraikako kolore zerrenda bat erakusten duen arren, ohi, osatzen duten koloreak sei direla onartzen da: gorria, laranja, hori, berdea, urdina eta morea, argiaren maiztasunen deskonposaketen ondorio, eta hiru oinarrizko koloreek, eta hauek, euren arteko nahasketetan emandako beste hirurek emandakoek osatua, tradizionalki, 7 kolore aipatzen diren arren, urdina eta morearen artean anila jarriz. diff --git a/etc/test_sentences/fr.txt b/etc/test_sentences/fr.txt new file mode 100644 index 0000000..29a2acc --- /dev/null +++ b/etc/test_sentences/fr.txt @@ -0,0 +1,7 @@ +Un arc-en-ciel est un photométéore, un phénomène optique se produisant dans le ciel, visible dans la direction opposée au Soleil quand il brille pendant la pluie. +C'est un arc de cercle coloré d'un dégradé de couleurs continu du rouge, à l'extérieur, au jaune au vert et au bleu, jusqu'au violet à l'intérieur. +Un arc-en-ciel se compose de deux arcs principaux : l'arc primaire et l'arc secondaire. +L'arc primaire est dû aux rayons ayant effectué une réflexion interne dans la goutte d'eau. +Les rayons ayant effectué deux réflexions internes dans la goutte d'eau provoquent un arc secondaire moins intense à l'extérieur du premier. +Les deux arcs sont séparés par la bande sombre d'Alexandre. +Buvez de ce whisky que le patron juge fameux. diff --git a/etc/test_sentences/ka.txt b/etc/test_sentences/ka.txt new file mode 100644 index 0000000..3494d6f --- /dev/null +++ b/etc/test_sentences/ka.txt @@ -0,0 +1,7 @@ +ცისარტყელა — ატმოსფერული ოპტიკური და მეტეოროლოგიური მოვლენა, რომელიც ხშირად წვიმის შემდეგ ჩნდება. +ეს თავისებური რკალია ან წრეხაზი, რომელიც ფერების სპექტრისგან შედგება. +ცისარტყელა შედგება შვიდი ფერისგან: წითელი, ნარინჯისფერი, ყვითელი, მწვანე, ცისფერი, ლურჯი, იისფერი. +ცენტრი წრისა, რომელსაც ცისარტყელა შემოწერს, ძევს წრფეზე, რომელიც გადის დამკვირვებელსა და მზეს შორის, ამავდროულად ცისარტყელას დანახვისას მზე ყოველთვის მდებარეობს დამკვირვებლის ზურგს უკან, შესაბამისად, სპეციალური ოპტიკური ხელსაწყოების გარეშე შეუძლებელია ერთდროულად ცისარტყელასა და მზის დანახვა. +ხმელეთზე მდებარე დამკვირვებლისთვის ცისარტყელას, როგორც წესი, აქვს რკალის, წრის ნაწილის, ფორმა. +რაც უფრო მაღალია დაკვირვების წერტილი — მით უფრო სრულია ეს რკალი (მთიდან ან თვითმფრინავიდან შესაძლებელია მთლიანი წრის დანახვაც). +როდესაც მზე აღიმართება ჰორიზონტიდან 42 გრადუსზე უფრო მაღლა, ცისარტყელა დედამიწის ზედაპირიდან უხილავია. diff --git a/etc/test_sentences/pl.txt b/etc/test_sentences/pl.txt new file mode 100644 index 0000000..17e0fbf --- /dev/null +++ b/etc/test_sentences/pl.txt @@ -0,0 +1,6 @@ +Tęcza, zjawisko optyczne i meteorologiczne, występujące w postaci charakterystycznego wielobarwnego łuku powstającego w wyniku rozszczepienia światła widzialnego, zwykle promieniowania słonecznego, załamującego się i odbijającego wewnątrz licznych kropli wody mających kształt zbliżony do kulistego. +Rozszczepienie światła jest wynikiem zjawiska dyspersji, powodującego różnice w kącie załamania światła o różnej długości fali przy przejściu z powietrza do wody i z wody do powietrza. +Jeżu klątw, spłódź Finom część gry hańb. +Pójdźże, kiń tę chmurność w głąb flaszy. +Mężny bądź, chroń pułk twój i sześć flag. +Filmuj rzeź żądań, pość, gnęb chłystków. diff --git a/etc/test_sentences/pt.txt b/etc/test_sentences/pt.txt new file mode 100644 index 0000000..d9bb377 --- /dev/null +++ b/etc/test_sentences/pt.txt @@ -0,0 +1,8 @@ +Um arco-íris, também popularmente denominado arco-da-velha, é um fenômeno óptico e meteorológico que separa a luz do sol em seu espectro contínuo quando o sol brilha sobre gotículas de água suspensas no ar. +É um arco multicolorido com o vermelho em seu exterior e o violeta em seu interior. +Por ser um espectro de dispersão da luz branca, o arco-íris contém uma quantidade infinita de cores sem qualquer delimitação entre elas. +Devido à necessidade humana de classificação dos fenômenos da natureza, a capacidade finita de distinção de cores pela visão humana e por questões didáticas, o arco-íris é mais conhecido por uma simplificação criada culturalmente que resume o espectro em sete cores na seguinte ordem: vermelho, laranja, amarelo, verde, azul, anil e violeta. +Tal simplificação foi proposta primeiramente por Isaac Newton, que decidiu nomear apenas cinco cores e depois adicionou mais duas apenas para fazer analogia com as sete notas musicais, os sete dias da semana e os sete objetos do sistema solar conhecidos à época. +Para informações sobre o espectro de cores do arco-íris, veja também o artigo sobre cores. +Luís argüia à Júlia que «brações, fé, chá, óxido, pôr, zângão» eram palavras do português. +À noite, vovô Kowalsky vê o ímã cair no pé do pingüim queixoso e vovó põe açúcar no chá de tâmaras do jabuti feliz. diff --git a/etc/test_sentences/ru.txt b/etc/test_sentences/ru.txt new file mode 100644 index 0000000..6856f36 --- /dev/null +++ b/etc/test_sentences/ru.txt @@ -0,0 +1,6 @@ +Ра́дуга, атмосферное, оптическое и метеорологическое явление, наблюдаемое при освещении ярким источником света множества водяных капель. +Радуга выглядит как разноцветная дуга или окружность, составленная из цветов спектра видимого излучения. +Это те семь цветов, которые принято выделять в радуге в русской культуре, но следует иметь в виду, что на самом деле спектр непрерывен, и его цвета плавно переходят друг в друга через множество промежуточных оттенков. +Широкая электрификация южных губерний даст мощный толчок подъёму сельского хозяйства. +Разъяренный чтец эгоистично бьёт пятью жердями шустрого фехтовальщика. +В чащах юга жил бы цитрус? Да, но фальшивый экземпляр! diff --git a/etc/test_sentences/test_de.jsonl b/etc/test_sentences/test_de.jsonl new file mode 100644 index 0000000..956e159 --- /dev/null +++ b/etc/test_sentences/test_de.jsonl @@ -0,0 +1,10 @@ +{"text": "Der Regenbogen ist ein atmosphärisch-optisches Phänomen, das als kreisbogenförmiges farbiges Lichtband in einer von der Sonne beschienenen Regenwand oder -wolke wahrgenommen wird.", "phonemes": ["d", "ɛ", "ɾ", " ", "r", "ˌ", "e", "ː", "ɡ", "ə", "n", "b", "ˈ", "o", "ː", "ɡ", "ə", "n", " ", "ɪ", "s", "t", " ", "a", "ɪ", "n", " ", "ˌ", "a", "t", "m", "ɔ", "s", "f", "ˈ", "ɛ", "ː", "r", "ɪ", "ʃ", "ˈ", "ɔ", "p", "t", "ɪ", "ʃ", "ə", "s", " ", "f", "ɛ", "ː", "n", "ˈ", "o", "ː", "m", "ə", "n", ",", " ", "d", "a", "s", " ", "a", "l", "s", " ", "k", "ɾ", "ˈ", "a", "ɪ", "s", "b", "o", "ː", "ɡ", "ˌ", "ɛ", "n", "f", "œ", "ɾ", "m", "ˌ", "ɪ", "ɡ", "ə", "s", " ", "f", "ˈ", "a", "ɾ", "b", "ɪ", "ɡ", "ə", "s", " ", "l", "ˈ", "ɪ", "c", "̧", "t", "b", "a", "n", "t", " ", "ɪ", "n", " ", "ˌ", "a", "ɪ", "n", "ɜ", " ", "f", "ɔ", "n", " ", "d", "ɛ", "ɾ", " ", "z", "ˈ", "ɔ", "n", "ə", " ", "b", "ə", "ʃ", "ˈ", "i", "ː", "n", "ə", "n", "ə", "n", " ", "r", "ˈ", "e", "ː", "ɡ", "ə", "n", "v", "ˌ", "a", "n", "t", " ", "ˌ", "o", "ː", "d", "ɜ", " ", "v", "ˈ", "ɔ", "l", "k", "ə", " ", "v", "ˈ", "ɑ", "ː", "ɾ", "ɡ", "ə", "n", "ˌ", "ɔ", "m", "ə", "n", " ", "v", "ˌ", "ɪ", "ɾ", "t", "."], "phoneme_ids": [1, 0, 17, 0, 61, 0, 92, 0, 3, 0, 30, 0, 121, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 15, 0, 120, 0, 27, 0, 122, 0, 66, 0, 59, 0, 26, 0, 3, 0, 74, 0, 31, 0, 32, 0, 3, 0, 14, 0, 74, 0, 26, 0, 3, 0, 121, 0, 14, 0, 32, 0, 25, 0, 54, 0, 31, 0, 19, 0, 120, 0, 61, 0, 122, 0, 30, 0, 74, 0, 96, 0, 120, 0, 54, 0, 28, 0, 32, 0, 74, 0, 96, 0, 59, 0, 31, 0, 3, 0, 19, 0, 61, 0, 122, 0, 26, 0, 120, 0, 27, 0, 122, 0, 25, 0, 59, 0, 26, 0, 8, 0, 3, 0, 17, 0, 14, 0, 31, 0, 3, 0, 14, 0, 24, 0, 31, 0, 3, 0, 23, 0, 92, 0, 120, 0, 14, 0, 74, 0, 31, 0, 15, 0, 27, 0, 122, 0, 66, 0, 121, 0, 61, 0, 26, 0, 19, 0, 45, 0, 92, 0, 25, 0, 121, 0, 74, 0, 66, 0, 59, 0, 31, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 15, 0, 74, 0, 66, 0, 59, 0, 31, 0, 3, 0, 24, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 15, 0, 14, 0, 26, 0, 32, 0, 3, 0, 74, 0, 26, 0, 3, 0, 121, 0, 14, 0, 74, 0, 26, 0, 62, 0, 3, 0, 19, 0, 54, 0, 26, 0, 3, 0, 17, 0, 61, 0, 92, 0, 3, 0, 38, 0, 120, 0, 54, 0, 26, 0, 59, 0, 3, 0, 15, 0, 59, 0, 96, 0, 120, 0, 21, 0, 122, 0, 26, 0, 59, 0, 26, 0, 59, 0, 26, 0, 3, 0, 30, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 34, 0, 121, 0, 14, 0, 26, 0, 32, 0, 3, 0, 121, 0, 27, 0, 122, 0, 17, 0, 62, 0, 3, 0, 34, 0, 120, 0, 54, 0, 24, 0, 23, 0, 59, 0, 3, 0, 34, 0, 120, 0, 51, 0, 122, 0, 92, 0, 66, 0, 59, 0, 26, 0, 121, 0, 54, 0, 25, 0, 59, 0, 26, 0, 3, 0, 34, 0, 121, 0, 74, 0, 92, 0, 32, 0, 10, 0, 2]} +{"text": "Sein radialer Farbverlauf ist das mehr oder weniger verweißlichte sichtbare Licht des Sonnenspektrums.", "phonemes": ["z", "a", "ɪ", "n", " ", "r", "ˌ", "ɑ", "d", "i", "ː", "ˈ", "ɑ", "ː", "l", "ɜ", " ", "f", "ˈ", "a", "ɾ", "p", "f", "ɛ", "ɾ", "l", "ˌ", "a", "ʊ", "f", " ", "ɪ", "s", "t", " ", "d", "a", "s", " ", "m", "ˈ", "e", "ː", "ɾ", " ", "ˌ", "o", "ː", "d", "ɜ", " ", "v", "ˈ", "e", "ː", "n", "ɪ", "ɡ", "ɜ", " ", "f", "ɛ", "ɾ", "v", "ˈ", "a", "ɪ", "s", "l", "ɪ", "c", "̧", "t", "ə", " ", "z", "ˈ", "ɪ", "c", "̧", "t", "b", "ɑ", "ː", "r", "ə", " ", "l", "ˈ", "ɪ", "c", "̧", "t", " ", "d", "ɛ", "s", " ", "z", "ˈ", "ɔ", "n", "ə", "n", "s", "p", "ˌ", "ɛ", "k", "t", "ɾ", "ʊ", "m", "s", "."], "phoneme_ids": [1, 0, 38, 0, 14, 0, 74, 0, 26, 0, 3, 0, 30, 0, 121, 0, 51, 0, 17, 0, 21, 0, 122, 0, 120, 0, 51, 0, 122, 0, 24, 0, 62, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 28, 0, 19, 0, 61, 0, 92, 0, 24, 0, 121, 0, 14, 0, 100, 0, 19, 0, 3, 0, 74, 0, 31, 0, 32, 0, 3, 0, 17, 0, 14, 0, 31, 0, 3, 0, 25, 0, 120, 0, 18, 0, 122, 0, 92, 0, 3, 0, 121, 0, 27, 0, 122, 0, 17, 0, 62, 0, 3, 0, 34, 0, 120, 0, 18, 0, 122, 0, 26, 0, 74, 0, 66, 0, 62, 0, 3, 0, 19, 0, 61, 0, 92, 0, 34, 0, 120, 0, 14, 0, 74, 0, 31, 0, 24, 0, 74, 0, 16, 0, 140, 0, 32, 0, 59, 0, 3, 0, 38, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 15, 0, 51, 0, 122, 0, 30, 0, 59, 0, 3, 0, 24, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 17, 0, 61, 0, 31, 0, 3, 0, 38, 0, 120, 0, 54, 0, 26, 0, 59, 0, 26, 0, 31, 0, 28, 0, 121, 0, 61, 0, 23, 0, 32, 0, 92, 0, 100, 0, 25, 0, 31, 0, 10, 0, 2]} +{"text": "Das Sonnenlicht wird beim Ein- und beim Austritt an jedem annähernd kugelförmigen Regentropfen abgelenkt und in Licht mehrerer Farben zerlegt.", "phonemes": ["d", "a", "s", " ", "z", "ˈ", "ɔ", "n", "ə", "n", "l", "ˌ", "ɪ", "c", "̧", "t", " ", "v", "ˌ", "ɪ", "ɾ", "t", " ", "b", "a", "ɪ", "m", " ", "a", "ɪ", "n", " ", "ʊ", "n", "t", " ", "b", "a", "ɪ", "m", " ", "ˈ", "a", "ʊ", "s", "t", "ɾ", "ˌ", "ɪ", "t", " ", "a", "n", " ", "j", "ˈ", "e", "ː", "d", "ə", "m", " ", "ˈ", "a", "n", "n", "ˌ", "ɛ", "ː", "ɛ", "ɾ", "n", "t", " ", "k", "ˈ", "u", "ː", "ɡ", "ə", "l", "f", "ˌ", "œ", "ɾ", "m", "ɪ", "ɡ", "ə", "n", " ", "r", "ˈ", "e", "ː", "ɡ", "ə", "n", "t", "ɾ", "ˌ", "ɔ", "p", "f", "ə", "n", " ", "ˈ", "a", "p", "ɡ", "ə", "l", "ˌ", "ɛ", "ŋ", "k", "t", " ", "ʊ", "n", "t", " ", "ɪ", "n", " ", "l", "ˈ", "ɪ", "c", "̧", "t", " ", "m", "ˈ", "e", "ː", "r", "ə", "r", "ɜ", " ", "f", "ˈ", "a", "ɾ", "b", "ə", "n", " ", "t", "s", "ɛ", "ɾ", "l", "ˈ", "e", "ː", "k", "t", "."], "phoneme_ids": [1, 0, 17, 0, 14, 0, 31, 0, 3, 0, 38, 0, 120, 0, 54, 0, 26, 0, 59, 0, 26, 0, 24, 0, 121, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 34, 0, 121, 0, 74, 0, 92, 0, 32, 0, 3, 0, 15, 0, 14, 0, 74, 0, 25, 0, 3, 0, 14, 0, 74, 0, 26, 0, 3, 0, 100, 0, 26, 0, 32, 0, 3, 0, 15, 0, 14, 0, 74, 0, 25, 0, 3, 0, 120, 0, 14, 0, 100, 0, 31, 0, 32, 0, 92, 0, 121, 0, 74, 0, 32, 0, 3, 0, 14, 0, 26, 0, 3, 0, 22, 0, 120, 0, 18, 0, 122, 0, 17, 0, 59, 0, 25, 0, 3, 0, 120, 0, 14, 0, 26, 0, 26, 0, 121, 0, 61, 0, 122, 0, 61, 0, 92, 0, 26, 0, 32, 0, 3, 0, 23, 0, 120, 0, 33, 0, 122, 0, 66, 0, 59, 0, 24, 0, 19, 0, 121, 0, 45, 0, 92, 0, 25, 0, 74, 0, 66, 0, 59, 0, 26, 0, 3, 0, 30, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 32, 0, 92, 0, 121, 0, 54, 0, 28, 0, 19, 0, 59, 0, 26, 0, 3, 0, 120, 0, 14, 0, 28, 0, 66, 0, 59, 0, 24, 0, 121, 0, 61, 0, 44, 0, 23, 0, 32, 0, 3, 0, 100, 0, 26, 0, 32, 0, 3, 0, 74, 0, 26, 0, 3, 0, 24, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 25, 0, 120, 0, 18, 0, 122, 0, 30, 0, 59, 0, 30, 0, 62, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 15, 0, 59, 0, 26, 0, 3, 0, 32, 0, 31, 0, 61, 0, 92, 0, 24, 0, 120, 0, 18, 0, 122, 0, 23, 0, 32, 0, 10, 0, 2]} +{"text": "Dazwischen wird es an der Tropfenrückseite reflektiert.", "phonemes": ["d", "ˈ", "a", "t", "s", "v", "ɪ", "ʃ", "ə", "n", " ", "v", "ˌ", "ɪ", "ɾ", "t", " ", "ɛ", "s", " ", "a", "n", " ", "d", "ɛ", "ɾ", " ", "t", "ɾ", "ˈ", "ɔ", "p", "f", "ə", "n", "r", "ˌ", "y", "k", "z", "a", "ɪ", "t", "ə", " ", "r", "ˌ", "ɛ", "f", "l", "ɛ", "k", "t", "ˈ", "i", "ː", "ɾ", "t", "."], "phoneme_ids": [1, 0, 17, 0, 120, 0, 14, 0, 32, 0, 31, 0, 34, 0, 74, 0, 96, 0, 59, 0, 26, 0, 3, 0, 34, 0, 121, 0, 74, 0, 92, 0, 32, 0, 3, 0, 61, 0, 31, 0, 3, 0, 14, 0, 26, 0, 3, 0, 17, 0, 61, 0, 92, 0, 3, 0, 32, 0, 92, 0, 120, 0, 54, 0, 28, 0, 19, 0, 59, 0, 26, 0, 30, 0, 121, 0, 37, 0, 23, 0, 38, 0, 14, 0, 74, 0, 32, 0, 59, 0, 3, 0, 30, 0, 121, 0, 61, 0, 19, 0, 24, 0, 61, 0, 23, 0, 32, 0, 120, 0, 21, 0, 122, 0, 92, 0, 32, 0, 10, 0, 2]} +{"text": "Das jeden Tropfen verlassende Licht ist in farbigen Schichten konzentriert, die aufeinandergesteckte dünne Kegelmäntel bilden.", "phonemes": ["d", "a", "s", " ", "j", "ˈ", "e", "ː", "d", "ə", "n", " ", "t", "ɾ", "ˈ", "ɔ", "p", "f", "ə", "n", " ", "f", "ɛ", "ɾ", "l", "ˈ", "a", "s", "ə", "n", "d", "ə", " ", "l", "ˈ", "ɪ", "c", "̧", "t", " ", "ɪ", "s", "t", " ", "ɪ", "n", " ", "f", "ˈ", "a", "ɾ", "b", "ɪ", "ɡ", "ə", "n", " ", "ʃ", "ˈ", "ɪ", "c", "̧", "t", "ə", "n", " ", "k", "ɔ", "n", "t", "s", "ɛ", "n", "t", "ɾ", "ˈ", "i", "ː", "ɾ", "t", ",", " ", "d", "i", "ː", " ", "ˌ", "a", "ʊ", "f", "a", "ɪ", "n", "ˈ", "a", "n", "d", "ɜ", "ɡ", "ˌ", "ɛ", "s", "t", "ɛ", "k", "t", "ə", " ", "d", "ˈ", "y", "n", "ə", " ", "k", "ˈ", "e", "ː", "ɡ", "ə", "l", "m", "ˌ", "ɛ", "n", "t", "ə", "l", " ", "b", "ˈ", "ɪ", "l", "d", "ə", "n", "."], "phoneme_ids": [1, 0, 17, 0, 14, 0, 31, 0, 3, 0, 22, 0, 120, 0, 18, 0, 122, 0, 17, 0, 59, 0, 26, 0, 3, 0, 32, 0, 92, 0, 120, 0, 54, 0, 28, 0, 19, 0, 59, 0, 26, 0, 3, 0, 19, 0, 61, 0, 92, 0, 24, 0, 120, 0, 14, 0, 31, 0, 59, 0, 26, 0, 17, 0, 59, 0, 3, 0, 24, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 74, 0, 31, 0, 32, 0, 3, 0, 74, 0, 26, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 15, 0, 74, 0, 66, 0, 59, 0, 26, 0, 3, 0, 96, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 59, 0, 26, 0, 3, 0, 23, 0, 54, 0, 26, 0, 32, 0, 31, 0, 61, 0, 26, 0, 32, 0, 92, 0, 120, 0, 21, 0, 122, 0, 92, 0, 32, 0, 8, 0, 3, 0, 17, 0, 21, 0, 122, 0, 3, 0, 121, 0, 14, 0, 100, 0, 19, 0, 14, 0, 74, 0, 26, 0, 120, 0, 14, 0, 26, 0, 17, 0, 62, 0, 66, 0, 121, 0, 61, 0, 31, 0, 32, 0, 61, 0, 23, 0, 32, 0, 59, 0, 3, 0, 17, 0, 120, 0, 37, 0, 26, 0, 59, 0, 3, 0, 23, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 24, 0, 25, 0, 121, 0, 61, 0, 26, 0, 32, 0, 59, 0, 24, 0, 3, 0, 15, 0, 120, 0, 74, 0, 24, 0, 17, 0, 59, 0, 26, 0, 10, 0, 2]} +{"text": "Der Beobachter hat die Regenwolke vor sich und die Sonne im Rücken.", "phonemes": ["d", "ɛ", "ɾ", " ", "b", "ə", "ˈ", "o", "ː", "b", "a", "x", "t", "ɜ", " ", "h", "a", "t", " ", "d", "i", "ː", " ", "r", "ˈ", "e", "ː", "ɡ", "ə", "n", "v", "ˌ", "ɔ", "l", "k", "ə", " ", "f", "ˌ", "ɔ", "ɾ", " ", "z", "ɪ", "c", "̧", " ", "ʊ", "n", "t", " ", "d", "i", "ː", " ", "z", "ˈ", "ɔ", "n", "ə", " ", "ɪ", "m", " ", "r", "ˈ", "y", "k", "ə", "n", "."], "phoneme_ids": [1, 0, 17, 0, 61, 0, 92, 0, 3, 0, 15, 0, 59, 0, 120, 0, 27, 0, 122, 0, 15, 0, 14, 0, 36, 0, 32, 0, 62, 0, 3, 0, 20, 0, 14, 0, 32, 0, 3, 0, 17, 0, 21, 0, 122, 0, 3, 0, 30, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 34, 0, 121, 0, 54, 0, 24, 0, 23, 0, 59, 0, 3, 0, 19, 0, 121, 0, 54, 0, 92, 0, 3, 0, 38, 0, 74, 0, 16, 0, 140, 0, 3, 0, 100, 0, 26, 0, 32, 0, 3, 0, 17, 0, 21, 0, 122, 0, 3, 0, 38, 0, 120, 0, 54, 0, 26, 0, 59, 0, 3, 0, 74, 0, 25, 0, 3, 0, 30, 0, 120, 0, 37, 0, 23, 0, 59, 0, 26, 0, 10, 0, 2]} +{"text": "Ihn erreicht Licht einer bestimmten Farbe aus Regentropfen, die sich auf einem schmalen Kreisbogen am Himmel befinden.", "phonemes": ["i", "ː", "n", " ", "ɛ", "ɾ", "r", "ˈ", "a", "ɪ", "c", "̧", "t", " ", "l", "ˈ", "ɪ", "c", "̧", "t", " ", "ˌ", "a", "ɪ", "n", "ɜ", " ", "b", "ə", "ʃ", "t", "ˈ", "ɪ", "m", "t", "ə", "n", " ", "f", "ˈ", "a", "ɾ", "b", "ə", " ", "ˌ", "a", "ʊ", "s", " ", "r", "ˈ", "e", "ː", "ɡ", "ə", "n", "t", "ɾ", "ˌ", "ɔ", "p", "f", "ə", "n", ",", " ", "d", "i", "ː", " ", "z", "ɪ", "c", "̧", " ", "a", "ʊ", "f", " ", "ˌ", "a", "ɪ", "n", "ə", "m", " ", "ʃ", "m", "ˈ", "ɑ", "ː", "l", "ə", "n", " ", "k", "ɾ", "a", "ɪ", "s", "b", "ˈ", "o", "ː", "ɡ", "ə", "n", " ", "a", "m", " ", "h", "ˈ", "ɪ", "m", "ə", "l", " ", "b", "ə", "f", "ˈ", "ɪ", "n", "d", "ə", "n", "."], "phoneme_ids": [1, 0, 21, 0, 122, 0, 26, 0, 3, 0, 61, 0, 92, 0, 30, 0, 120, 0, 14, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 24, 0, 120, 0, 74, 0, 16, 0, 140, 0, 32, 0, 3, 0, 121, 0, 14, 0, 74, 0, 26, 0, 62, 0, 3, 0, 15, 0, 59, 0, 96, 0, 32, 0, 120, 0, 74, 0, 25, 0, 32, 0, 59, 0, 26, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 15, 0, 59, 0, 3, 0, 121, 0, 14, 0, 100, 0, 31, 0, 3, 0, 30, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 32, 0, 92, 0, 121, 0, 54, 0, 28, 0, 19, 0, 59, 0, 26, 0, 8, 0, 3, 0, 17, 0, 21, 0, 122, 0, 3, 0, 38, 0, 74, 0, 16, 0, 140, 0, 3, 0, 14, 0, 100, 0, 19, 0, 3, 0, 121, 0, 14, 0, 74, 0, 26, 0, 59, 0, 25, 0, 3, 0, 96, 0, 25, 0, 120, 0, 51, 0, 122, 0, 24, 0, 59, 0, 26, 0, 3, 0, 23, 0, 92, 0, 14, 0, 74, 0, 31, 0, 15, 0, 120, 0, 27, 0, 122, 0, 66, 0, 59, 0, 26, 0, 3, 0, 14, 0, 25, 0, 3, 0, 20, 0, 120, 0, 74, 0, 25, 0, 59, 0, 24, 0, 3, 0, 15, 0, 59, 0, 19, 0, 120, 0, 74, 0, 26, 0, 17, 0, 59, 0, 26, 0, 10, 0, 2]} +{"text": "Der Winkel, unter dem der Regenbogen gesehen wird, ist gleich wie der Winkel der Kegelmäntel, in dem diese Farben beim Austritt am Regentropfen konzentriert sind.", "phonemes": ["d", "ɛ", "ɾ", " ", "v", "ˈ", "ɪ", "n", "k", "ə", "l", ",", " ", "ˌ", "ʊ", "n", "t", "ɜ", " ", "d", "e", "ː", "m", " ", "d", "ɛ", "ɾ", " ", "r", "ˌ", "e", "ː", "ɡ", "ə", "n", "b", "ˈ", "o", "ː", "ɡ", "ə", "n", " ", "ɡ", "ə", "z", "ˈ", "e", "ː", "ə", "n", " ", "v", "ˌ", "ɪ", "ɾ", "t", ",", " ", "ɪ", "s", "t", " ", "ɡ", "l", "ˈ", "a", "ɪ", "c", "̧", " ", "v", "i", "ː", " ", "d", "ɛ", "ɾ", " ", "v", "ˈ", "ɪ", "n", "k", "ə", "l", " ", "d", "ɛ", "ɾ", " ", "k", "ˈ", "e", "ː", "ɡ", "ə", "l", "m", "ˌ", "ɛ", "n", "t", "ə", "l", ",", " ", "ɪ", "n", " ", "d", "e", "ː", "m", " ", "d", "ˌ", "i", "ː", "z", "ə", " ", "f", "ˈ", "a", "ɾ", "b", "ə", "n", " ", "b", "a", "ɪ", "m", " ", "ˈ", "a", "ʊ", "s", "t", "ɾ", "ˌ", "ɪ", "t", " ", "a", "m", " ", "r", "ˈ", "e", "ː", "ɡ", "ə", "n", "t", "ɾ", "ˌ", "ɔ", "p", "f", "ə", "n", " ", "k", "ɔ", "n", "t", "s", "ɛ", "n", "t", "ɾ", "ˈ", "i", "ː", "ɾ", "t", " ", "z", "ɪ", "n", "t", "."], "phoneme_ids": [1, 0, 17, 0, 61, 0, 92, 0, 3, 0, 34, 0, 120, 0, 74, 0, 26, 0, 23, 0, 59, 0, 24, 0, 8, 0, 3, 0, 121, 0, 100, 0, 26, 0, 32, 0, 62, 0, 3, 0, 17, 0, 18, 0, 122, 0, 25, 0, 3, 0, 17, 0, 61, 0, 92, 0, 3, 0, 30, 0, 121, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 15, 0, 120, 0, 27, 0, 122, 0, 66, 0, 59, 0, 26, 0, 3, 0, 66, 0, 59, 0, 38, 0, 120, 0, 18, 0, 122, 0, 59, 0, 26, 0, 3, 0, 34, 0, 121, 0, 74, 0, 92, 0, 32, 0, 8, 0, 3, 0, 74, 0, 31, 0, 32, 0, 3, 0, 66, 0, 24, 0, 120, 0, 14, 0, 74, 0, 16, 0, 140, 0, 3, 0, 34, 0, 21, 0, 122, 0, 3, 0, 17, 0, 61, 0, 92, 0, 3, 0, 34, 0, 120, 0, 74, 0, 26, 0, 23, 0, 59, 0, 24, 0, 3, 0, 17, 0, 61, 0, 92, 0, 3, 0, 23, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 24, 0, 25, 0, 121, 0, 61, 0, 26, 0, 32, 0, 59, 0, 24, 0, 8, 0, 3, 0, 74, 0, 26, 0, 3, 0, 17, 0, 18, 0, 122, 0, 25, 0, 3, 0, 17, 0, 121, 0, 21, 0, 122, 0, 38, 0, 59, 0, 3, 0, 19, 0, 120, 0, 14, 0, 92, 0, 15, 0, 59, 0, 26, 0, 3, 0, 15, 0, 14, 0, 74, 0, 25, 0, 3, 0, 120, 0, 14, 0, 100, 0, 31, 0, 32, 0, 92, 0, 121, 0, 74, 0, 32, 0, 3, 0, 14, 0, 25, 0, 3, 0, 30, 0, 120, 0, 18, 0, 122, 0, 66, 0, 59, 0, 26, 0, 32, 0, 92, 0, 121, 0, 54, 0, 28, 0, 19, 0, 59, 0, 26, 0, 3, 0, 23, 0, 54, 0, 26, 0, 32, 0, 31, 0, 61, 0, 26, 0, 32, 0, 92, 0, 120, 0, 21, 0, 122, 0, 92, 0, 32, 0, 3, 0, 38, 0, 74, 0, 26, 0, 32, 0, 10, 0, 2]} +{"text": "Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich.", "phonemes": ["v", "ˈ", "ɪ", "k", "t", "o", "ː", "ɾ", " ", "j", "ˈ", "ɑ", "ː", "k", "t", " ", "t", "s", "v", "ˈ", "œ", "l", "f", " ", "b", "ˈ", "ɔ", "k", "s", "k", "ɛ", "m", "p", "f", "ɜ", " ", "k", "v", "ˈ", "e", "ː", "ɾ", " ", "ˌ", "y", "ː", "b", "ɜ", " ", "d", "e", "ː", "n", " ", "ɡ", "ɾ", "ˈ", "o", "ː", "s", "ə", "n", " ", "z", "ˈ", "y", "l", "t", "ɜ", " ", "d", "ˈ", "a", "ɪ", "c", "̧", "."], "phoneme_ids": [1, 0, 34, 0, 120, 0, 74, 0, 23, 0, 32, 0, 27, 0, 122, 0, 92, 0, 3, 0, 22, 0, 120, 0, 51, 0, 122, 0, 23, 0, 32, 0, 3, 0, 32, 0, 31, 0, 34, 0, 120, 0, 45, 0, 24, 0, 19, 0, 3, 0, 15, 0, 120, 0, 54, 0, 23, 0, 31, 0, 23, 0, 61, 0, 25, 0, 28, 0, 19, 0, 62, 0, 3, 0, 23, 0, 34, 0, 120, 0, 18, 0, 122, 0, 92, 0, 3, 0, 121, 0, 37, 0, 122, 0, 15, 0, 62, 0, 3, 0, 17, 0, 18, 0, 122, 0, 26, 0, 3, 0, 66, 0, 92, 0, 120, 0, 27, 0, 122, 0, 31, 0, 59, 0, 26, 0, 3, 0, 38, 0, 120, 0, 37, 0, 24, 0, 32, 0, 62, 0, 3, 0, 17, 0, 120, 0, 14, 0, 74, 0, 16, 0, 140, 0, 10, 0, 2]} +{"text": "Falsches Üben von Xylophonmusik quält jeden größeren Zwerg.", "phonemes": ["f", "ˈ", "a", "l", "ʃ", "ə", "s", " ", "ˈ", "y", "ː", "b", "ə", "n", " ", "f", "ɔ", "n", " ", "k", "s", "ˈ", "y", "ː", "l", "o", "ː", "f", "ˌ", "ɔ", "n", "m", "u", "ː", "z", "ˌ", "i", "ː", "k", " ", "k", "v", "ˈ", "ɛ", "l", "t", " ", "j", "ˈ", "e", "ː", "d", "ə", "n", " ", "ɡ", "ɾ", "ˈ", "ø", "ː", "s", "ə", "r", "ə", "n", " ", "t", "s", "v", "ˈ", "ɛ", "ɾ", "k", "."], "phoneme_ids": [1, 0, 19, 0, 120, 0, 14, 0, 24, 0, 96, 0, 59, 0, 31, 0, 3, 0, 120, 0, 37, 0, 122, 0, 15, 0, 59, 0, 26, 0, 3, 0, 19, 0, 54, 0, 26, 0, 3, 0, 23, 0, 31, 0, 120, 0, 37, 0, 122, 0, 24, 0, 27, 0, 122, 0, 19, 0, 121, 0, 54, 0, 26, 0, 25, 0, 33, 0, 122, 0, 38, 0, 121, 0, 21, 0, 122, 0, 23, 0, 3, 0, 23, 0, 34, 0, 120, 0, 61, 0, 24, 0, 32, 0, 3, 0, 22, 0, 120, 0, 18, 0, 122, 0, 17, 0, 59, 0, 26, 0, 3, 0, 66, 0, 92, 0, 120, 0, 42, 0, 122, 0, 31, 0, 59, 0, 30, 0, 59, 0, 26, 0, 3, 0, 32, 0, 31, 0, 34, 0, 120, 0, 61, 0, 92, 0, 23, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_en-gb-x-rp.jsonl b/etc/test_sentences/test_en-gb-x-rp.jsonl new file mode 100644 index 0000000..1935e9c --- /dev/null +++ b/etc/test_sentences/test_en-gb-x-rp.jsonl @@ -0,0 +1,7 @@ +{"text": "A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.", "phonemes": ["ɐ", " ", "ɹ", "ˈ", "e", "ɪ", "n", "b", "ə", "ʊ", " ", "ɪ", "z", " ", "ɐ", " ", "m", "ˌ", "i", "ː", "t", "ɪ", "ˌ", "ɔ", "ː", "ɹ", "ə", "l", "ˈ", "ɒ", "d", "ʒ", "ɪ", "k", "ə", "l", " ", "f", "ɪ", "n", "ˈ", "ɒ", "m", "ɪ", "n", "ə", "n", " ", "ð", "æ", "t", " ", "ɪ", "z", " ", "k", "ˈ", "ɔ", "ː", "z", "d", " ", "b", "a", "ɪ", " ", "ɹ", "ɪ", "f", "l", "ˈ", "ɛ", "k", "ʃ", "ə", "n", ",", " ", "ɹ", "ɪ", "f", "ɹ", "ˈ", "æ", "k", "ʃ", "ə", "n", " ", "æ", "n", "d", " ", "d", "ɪ", "s", "p", "ˈ", "ɜ", "ː", "ʃ", "ə", "n", " ", "ɒ", "v", " ", "l", "ˈ", "a", "ɪ", "t", " ", "ɪ", "n", " ", "w", "ˈ", "ɔ", "ː", "t", "ɐ", " ", "d", "ɹ", "ˈ", "ɒ", "p", "l", "ɪ", "t", "s", " ", "ɹ", "ɪ", "z", "ˈ", "ʌ", "l", "t", "ɪ", "ŋ", " ", "ɪ", "n", " ", "ɐ", " ", "s", "p", "ˈ", "ɛ", "k", "t", "ɹ", "ə", "m", " ", "ɒ", "v", " ", "l", "ˈ", "a", "ɪ", "t", " ", "ɐ", "p", "ˈ", "i", "ə", "ɹ", "ɪ", "ŋ", " ", "ɪ", "n", "ð", "ə", " ", "s", "k", "ˈ", "a", "ɪ", "."], "phoneme_ids": [1, 0, 50, 0, 3, 0, 88, 0, 120, 0, 18, 0, 74, 0, 26, 0, 15, 0, 59, 0, 100, 0, 3, 0, 74, 0, 38, 0, 3, 0, 50, 0, 3, 0, 25, 0, 121, 0, 21, 0, 122, 0, 32, 0, 74, 0, 121, 0, 54, 0, 122, 0, 88, 0, 59, 0, 24, 0, 120, 0, 52, 0, 17, 0, 108, 0, 74, 0, 23, 0, 59, 0, 24, 0, 3, 0, 19, 0, 74, 0, 26, 0, 120, 0, 52, 0, 25, 0, 74, 0, 26, 0, 59, 0, 26, 0, 3, 0, 41, 0, 39, 0, 32, 0, 3, 0, 74, 0, 38, 0, 3, 0, 23, 0, 120, 0, 54, 0, 122, 0, 38, 0, 17, 0, 3, 0, 15, 0, 14, 0, 74, 0, 3, 0, 88, 0, 74, 0, 19, 0, 24, 0, 120, 0, 61, 0, 23, 0, 96, 0, 59, 0, 26, 0, 8, 0, 3, 0, 88, 0, 74, 0, 19, 0, 88, 0, 120, 0, 39, 0, 23, 0, 96, 0, 59, 0, 26, 0, 3, 0, 39, 0, 26, 0, 17, 0, 3, 0, 17, 0, 74, 0, 31, 0, 28, 0, 120, 0, 62, 0, 122, 0, 96, 0, 59, 0, 26, 0, 3, 0, 52, 0, 34, 0, 3, 0, 24, 0, 120, 0, 14, 0, 74, 0, 32, 0, 3, 0, 74, 0, 26, 0, 3, 0, 35, 0, 120, 0, 54, 0, 122, 0, 32, 0, 50, 0, 3, 0, 17, 0, 88, 0, 120, 0, 52, 0, 28, 0, 24, 0, 74, 0, 32, 0, 31, 0, 3, 0, 88, 0, 74, 0, 38, 0, 120, 0, 102, 0, 24, 0, 32, 0, 74, 0, 44, 0, 3, 0, 74, 0, 26, 0, 3, 0, 50, 0, 3, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 88, 0, 59, 0, 25, 0, 3, 0, 52, 0, 34, 0, 3, 0, 24, 0, 120, 0, 14, 0, 74, 0, 32, 0, 3, 0, 50, 0, 28, 0, 120, 0, 21, 0, 59, 0, 88, 0, 74, 0, 44, 0, 3, 0, 74, 0, 26, 0, 41, 0, 59, 0, 3, 0, 31, 0, 23, 0, 120, 0, 14, 0, 74, 0, 10, 0, 2]} +{"text": "It takes the form of a multi-colored circular arc.", "phonemes": ["ɪ", "t", " ", "t", "ˈ", "e", "ɪ", "k", "s", " ", "ð", "ə", " ", "f", "ˈ", "ɔ", "ː", "m", " ", "ə", "v", "ɐ", " ", "m", "ˈ", "ʌ", "l", "t", "ɪ", "k", "ˈ", "ʌ", "l", "ə", "d", " ", "s", "ˈ", "ɜ", "ː", "k", "j", "ʊ", "l", "ɐ", "ɹ", " ", "ˈ", "ɑ", "ː", "k", "."], "phoneme_ids": [1, 0, 74, 0, 32, 0, 3, 0, 32, 0, 120, 0, 18, 0, 74, 0, 23, 0, 31, 0, 3, 0, 41, 0, 59, 0, 3, 0, 19, 0, 120, 0, 54, 0, 122, 0, 25, 0, 3, 0, 59, 0, 34, 0, 50, 0, 3, 0, 25, 0, 120, 0, 102, 0, 24, 0, 32, 0, 74, 0, 23, 0, 120, 0, 102, 0, 24, 0, 59, 0, 17, 0, 3, 0, 31, 0, 120, 0, 62, 0, 122, 0, 23, 0, 22, 0, 100, 0, 24, 0, 50, 0, 88, 0, 3, 0, 120, 0, 51, 0, 122, 0, 23, 0, 10, 0, 2]} +{"text": "Rainbows caused by sunlight always appear in the section of sky directly opposite the Sun.", "phonemes": ["ɹ", "ˈ", "e", "ɪ", "n", "b", "ə", "ʊ", "z", " ", "k", "ˈ", "ɔ", "ː", "z", "d", " ", "b", "a", "ɪ", " ", "s", "ˈ", "ʌ", "n", "l", "a", "ɪ", "t", " ", "ˈ", "ɔ", "ː", "l", "w", "e", "ɪ", "z", " ", "ɐ", "p", "ˈ", "i", "ə", "ɹ", " ", "ɪ", "n", "ð", "ə", " ", "s", "ˈ", "ɛ", "k", "ʃ", "ə", "n", " ", "ɒ", "v", " ", "s", "k", "ˈ", "a", "ɪ", " ", "d", "a", "ɪ", "ɹ", "ˈ", "ɛ", "k", "t", "l", "ɪ", " ", "ˈ", "ɒ", "p", "ə", "z", "ˌ", "ɪ", "t", " ", "ð", "ə", " ", "s", "ˈ", "ʌ", "n", "."], "phoneme_ids": [1, 0, 88, 0, 120, 0, 18, 0, 74, 0, 26, 0, 15, 0, 59, 0, 100, 0, 38, 0, 3, 0, 23, 0, 120, 0, 54, 0, 122, 0, 38, 0, 17, 0, 3, 0, 15, 0, 14, 0, 74, 0, 3, 0, 31, 0, 120, 0, 102, 0, 26, 0, 24, 0, 14, 0, 74, 0, 32, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 35, 0, 18, 0, 74, 0, 38, 0, 3, 0, 50, 0, 28, 0, 120, 0, 21, 0, 59, 0, 88, 0, 3, 0, 74, 0, 26, 0, 41, 0, 59, 0, 3, 0, 31, 0, 120, 0, 61, 0, 23, 0, 96, 0, 59, 0, 26, 0, 3, 0, 52, 0, 34, 0, 3, 0, 31, 0, 23, 0, 120, 0, 14, 0, 74, 0, 3, 0, 17, 0, 14, 0, 74, 0, 88, 0, 120, 0, 61, 0, 23, 0, 32, 0, 24, 0, 74, 0, 3, 0, 120, 0, 52, 0, 28, 0, 59, 0, 38, 0, 121, 0, 74, 0, 32, 0, 3, 0, 41, 0, 59, 0, 3, 0, 31, 0, 120, 0, 102, 0, 26, 0, 10, 0, 2]} +{"text": "With tenure, Suzie’d have all the more leisure for yachting, but her publications are no good.", "phonemes": ["w", "ɪ", "ð", " ", "t", "ˈ", "ɛ", "n", "j", "ɐ", ",", " ", "s", "ˈ", "u", "ː", "z", "ɪ", "d", " ", "h", "æ", "v", " ", "ˈ", "ɔ", "ː", "l", " ", "ð", "ə", " ", "m", "ˈ", "ɔ", "ː", " ", "l", "ˈ", "ɛ", "ʒ", "ɐ", " ", "f", "ɔ", "ː", " ", "j", "ˈ", "ɒ", "t", "ɪ", "ŋ", ",", " ", "b", "ˌ", "ʌ", "t", " ", "h", "ɜ", "ː", " ", "p", "ˌ", "ʌ", "b", "l", "ɪ", "k", "ˈ", "e", "ɪ", "ʃ", "ə", "n", "z", " ", "ɑ", "ː", " ", "n", "ˈ", "ə", "ʊ", " ", "ɡ", "ˈ", "ʊ", "d", "."], "phoneme_ids": [1, 0, 35, 0, 74, 0, 41, 0, 3, 0, 32, 0, 120, 0, 61, 0, 26, 0, 22, 0, 50, 0, 8, 0, 3, 0, 31, 0, 120, 0, 33, 0, 122, 0, 38, 0, 74, 0, 17, 0, 3, 0, 20, 0, 39, 0, 34, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 3, 0, 41, 0, 59, 0, 3, 0, 25, 0, 120, 0, 54, 0, 122, 0, 3, 0, 24, 0, 120, 0, 61, 0, 108, 0, 50, 0, 3, 0, 19, 0, 54, 0, 122, 0, 3, 0, 22, 0, 120, 0, 52, 0, 32, 0, 74, 0, 44, 0, 8, 0, 3, 0, 15, 0, 121, 0, 102, 0, 32, 0, 3, 0, 20, 0, 62, 0, 122, 0, 3, 0, 28, 0, 121, 0, 102, 0, 15, 0, 24, 0, 74, 0, 23, 0, 120, 0, 18, 0, 74, 0, 96, 0, 59, 0, 26, 0, 38, 0, 3, 0, 51, 0, 122, 0, 3, 0, 26, 0, 120, 0, 59, 0, 100, 0, 3, 0, 66, 0, 120, 0, 100, 0, 17, 0, 10, 0, 2]} +{"text": "Shaw, those twelve beige hooks are joined if I patch a young, gooey mouth.", "phonemes": ["ʃ", "ˈ", "ɔ", "ː", ",", " ", "ð", "ə", "ʊ", "z", " ", "t", "w", "ˈ", "ɛ", "l", "v", " ", "b", "ˈ", "e", "ɪ", "ʒ", " ", "h", "ˈ", "ʊ", "k", "s", " ", "ɑ", "ː", " ", "d", "ʒ", "ˈ", "ɔ", "ɪ", "n", "d", " ", "ɪ", "f", " ", "a", "ɪ", " ", "p", "ˈ", "æ", "t", "ʃ", " ", "ɐ", " ", "j", "ˈ", "ʌ", "ŋ", ",", " ", "ɡ", "ˈ", "u", "ː", "ɪ", " ", "m", "ˈ", "a", "ʊ", "θ", "."], "phoneme_ids": [1, 0, 96, 0, 120, 0, 54, 0, 122, 0, 8, 0, 3, 0, 41, 0, 59, 0, 100, 0, 38, 0, 3, 0, 32, 0, 35, 0, 120, 0, 61, 0, 24, 0, 34, 0, 3, 0, 15, 0, 120, 0, 18, 0, 74, 0, 108, 0, 3, 0, 20, 0, 120, 0, 100, 0, 23, 0, 31, 0, 3, 0, 51, 0, 122, 0, 3, 0, 17, 0, 108, 0, 120, 0, 54, 0, 74, 0, 26, 0, 17, 0, 3, 0, 74, 0, 19, 0, 3, 0, 14, 0, 74, 0, 3, 0, 28, 0, 120, 0, 39, 0, 32, 0, 96, 0, 3, 0, 50, 0, 3, 0, 22, 0, 120, 0, 102, 0, 44, 0, 8, 0, 3, 0, 66, 0, 120, 0, 33, 0, 122, 0, 74, 0, 3, 0, 25, 0, 120, 0, 14, 0, 100, 0, 126, 0, 10, 0, 2]} +{"text": "Are those shy Eurasian footwear, cowboy chaps, or jolly earthmoving headgear?", "phonemes": ["ɑ", "ː", " ", "ð", "ə", "ʊ", "z", " ", "ʃ", "ˈ", "a", "ɪ", " ", "j", "u", "ː", "ɹ", "ˈ", "e", "ɪ", "z", "i", "ə", "n", " ", "f", "ˈ", "ʊ", "t", "w", "e", "ə", ",", " ", "k", "ˈ", "a", "ʊ", "b", "ɔ", "ɪ", " ", "t", "ʃ", "ˈ", "æ", "p", "s", ",", " ", "ɔ", "ː", " ", "d", "ʒ", "ˈ", "ɒ", "l", "ɪ", " ", "ˈ", "ɜ", "ː", "θ", "m", "u", "ː", "v", "ɪ", "ŋ", " ", "h", "ˈ", "ɛ", "d", "ɡ", "i", "ə", "?"], "phoneme_ids": [1, 0, 51, 0, 122, 0, 3, 0, 41, 0, 59, 0, 100, 0, 38, 0, 3, 0, 96, 0, 120, 0, 14, 0, 74, 0, 3, 0, 22, 0, 33, 0, 122, 0, 88, 0, 120, 0, 18, 0, 74, 0, 38, 0, 21, 0, 59, 0, 26, 0, 3, 0, 19, 0, 120, 0, 100, 0, 32, 0, 35, 0, 18, 0, 59, 0, 8, 0, 3, 0, 23, 0, 120, 0, 14, 0, 100, 0, 15, 0, 54, 0, 74, 0, 3, 0, 32, 0, 96, 0, 120, 0, 39, 0, 28, 0, 31, 0, 8, 0, 3, 0, 54, 0, 122, 0, 3, 0, 17, 0, 108, 0, 120, 0, 52, 0, 24, 0, 74, 0, 3, 0, 120, 0, 62, 0, 122, 0, 126, 0, 25, 0, 33, 0, 122, 0, 34, 0, 74, 0, 44, 0, 3, 0, 20, 0, 120, 0, 61, 0, 17, 0, 66, 0, 21, 0, 59, 0, 13, 0, 2]} +{"text": "The beige hue on the waters of the loch impressed all, including the French queen, before she heard that symphony again, just as young Arthur wanted.", "phonemes": ["ð", "ə", " ", "b", "ˈ", "e", "ɪ", "ʒ", " ", "h", "j", "ˈ", "u", "ː", " ", "ɒ", "n", "ð", "ə", " ", "w", "ˈ", "ɔ", "ː", "t", "ə", "z", " ", "ɒ", "v", "ð", "ə", " ", "l", "ˈ", "ɒ", "x", " ", "ɪ", "m", "p", "ɹ", "ˈ", "ɛ", "s", "t", " ", "ˈ", "ɔ", "ː", "l", ",", " ", "ɪ", "ŋ", "k", "l", "ˈ", "u", "ː", "d", "ɪ", "ŋ", " ", "ð", "ə", " ", "f", "ɹ", "ˈ", "ɛ", "n", "t", "ʃ", " ", "k", "w", "ˈ", "i", "ː", "n", ",", " ", "b", "ɪ", "f", "ˌ", "ɔ", "ː", " ", "ʃ", "i", "ː", " ", "h", "ˈ", "ɜ", "ː", "d", " ", "ð", "æ", "t", " ", "s", "ˈ", "ɪ", "m", "f", "ə", "n", "ɪ", " ", "ɐ", "ɡ", "ˈ", "ɛ", "n", ",", " ", "d", "ʒ", "ˈ", "ʌ", "s", "t", " ", "æ", "z", " ", "j", "ˈ", "ʌ", "ŋ", " ", "ˈ", "ɑ", "ː", "θ", "ɐ", " ", "w", "ˈ", "ɒ", "n", "t", "ɪ", "d", "."], "phoneme_ids": [1, 0, 41, 0, 59, 0, 3, 0, 15, 0, 120, 0, 18, 0, 74, 0, 108, 0, 3, 0, 20, 0, 22, 0, 120, 0, 33, 0, 122, 0, 3, 0, 52, 0, 26, 0, 41, 0, 59, 0, 3, 0, 35, 0, 120, 0, 54, 0, 122, 0, 32, 0, 59, 0, 38, 0, 3, 0, 52, 0, 34, 0, 41, 0, 59, 0, 3, 0, 24, 0, 120, 0, 52, 0, 36, 0, 3, 0, 74, 0, 25, 0, 28, 0, 88, 0, 120, 0, 61, 0, 31, 0, 32, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 8, 0, 3, 0, 74, 0, 44, 0, 23, 0, 24, 0, 120, 0, 33, 0, 122, 0, 17, 0, 74, 0, 44, 0, 3, 0, 41, 0, 59, 0, 3, 0, 19, 0, 88, 0, 120, 0, 61, 0, 26, 0, 32, 0, 96, 0, 3, 0, 23, 0, 35, 0, 120, 0, 21, 0, 122, 0, 26, 0, 8, 0, 3, 0, 15, 0, 74, 0, 19, 0, 121, 0, 54, 0, 122, 0, 3, 0, 96, 0, 21, 0, 122, 0, 3, 0, 20, 0, 120, 0, 62, 0, 122, 0, 17, 0, 3, 0, 41, 0, 39, 0, 32, 0, 3, 0, 31, 0, 120, 0, 74, 0, 25, 0, 19, 0, 59, 0, 26, 0, 74, 0, 3, 0, 50, 0, 66, 0, 120, 0, 61, 0, 26, 0, 8, 0, 3, 0, 17, 0, 108, 0, 120, 0, 102, 0, 31, 0, 32, 0, 3, 0, 39, 0, 38, 0, 3, 0, 22, 0, 120, 0, 102, 0, 44, 0, 3, 0, 120, 0, 51, 0, 122, 0, 126, 0, 50, 0, 3, 0, 35, 0, 120, 0, 52, 0, 26, 0, 32, 0, 74, 0, 17, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_en-us.jsonl b/etc/test_sentences/test_en-us.jsonl new file mode 100644 index 0000000..2b6aba1 --- /dev/null +++ b/etc/test_sentences/test_en-us.jsonl @@ -0,0 +1,7 @@ +{"text": "A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.", "phonemes": ["ɐ", " ", "ɹ", "ˈ", "e", "ɪ", "n", "b", "o", "ʊ", " ", "ɪ", "z", " ", "ɐ", " ", "m", "ˌ", "i", "ː", "ɾ", "ɪ", "ˌ", "o", "ː", "ɹ", "ə", "l", "ˈ", "ɑ", "ː", "d", "ʒ", "ɪ", "k", "ə", "l", " ", "f", "ɪ", "n", "ˈ", "ɑ", "ː", "m", "ɪ", "n", "ə", "n", " ", "ð", "æ", "t", " ", "ɪ", "z", " ", "k", "ˈ", "ɔ", "ː", "z", "d", " ", "b", "a", "ɪ", " ", "ɹ", "ᵻ", "f", "l", "ˈ", "ɛ", "k", "ʃ", "ə", "n", ",", " ", "ɹ", "ᵻ", "f", "ɹ", "ˈ", "æ", "k", "ʃ", "ə", "n", " ", "æ", "n", "d", " ", "d", "ɪ", "s", "p", "ˈ", "ɜ", "ː", "ʒ", "ə", "n", " ", "ʌ", "v", " ", "l", "ˈ", "a", "ɪ", "t", " ", "ɪ", "n", " ", "w", "ˈ", "ɔ", "ː", "ɾ", "ɚ", " ", "d", "ɹ", "ˈ", "ɑ", "ː", "p", "l", "ɪ", "t", "s", " ", "ɹ", "ɪ", "z", "ˈ", "ʌ", "l", "t", "ɪ", "ŋ", " ", "ɪ", "n", " ", "ɐ", " ", "s", "p", "ˈ", "ɛ", "k", "t", "ɹ", "ə", "m", " ", "ʌ", "v", " ", "l", "ˈ", "a", "ɪ", "t", " ", "ɐ", "p", "ˈ", "ɪ", "ɹ", "ɪ", "ŋ", " ", "ɪ", "n", "ð", "ə", " ", "s", "k", "ˈ", "a", "ɪ", "."], "phoneme_ids": [1, 0, 50, 0, 3, 0, 88, 0, 120, 0, 18, 0, 74, 0, 26, 0, 15, 0, 27, 0, 100, 0, 3, 0, 74, 0, 38, 0, 3, 0, 50, 0, 3, 0, 25, 0, 121, 0, 21, 0, 122, 0, 92, 0, 74, 0, 121, 0, 27, 0, 122, 0, 88, 0, 59, 0, 24, 0, 120, 0, 51, 0, 122, 0, 17, 0, 108, 0, 74, 0, 23, 0, 59, 0, 24, 0, 3, 0, 19, 0, 74, 0, 26, 0, 120, 0, 51, 0, 122, 0, 25, 0, 74, 0, 26, 0, 59, 0, 26, 0, 3, 0, 41, 0, 39, 0, 32, 0, 3, 0, 74, 0, 38, 0, 3, 0, 23, 0, 120, 0, 54, 0, 122, 0, 38, 0, 17, 0, 3, 0, 15, 0, 14, 0, 74, 0, 3, 0, 88, 0, 128, 0, 19, 0, 24, 0, 120, 0, 61, 0, 23, 0, 96, 0, 59, 0, 26, 0, 8, 0, 3, 0, 88, 0, 128, 0, 19, 0, 88, 0, 120, 0, 39, 0, 23, 0, 96, 0, 59, 0, 26, 0, 3, 0, 39, 0, 26, 0, 17, 0, 3, 0, 17, 0, 74, 0, 31, 0, 28, 0, 120, 0, 62, 0, 122, 0, 108, 0, 59, 0, 26, 0, 3, 0, 102, 0, 34, 0, 3, 0, 24, 0, 120, 0, 14, 0, 74, 0, 32, 0, 3, 0, 74, 0, 26, 0, 3, 0, 35, 0, 120, 0, 54, 0, 122, 0, 92, 0, 60, 0, 3, 0, 17, 0, 88, 0, 120, 0, 51, 0, 122, 0, 28, 0, 24, 0, 74, 0, 32, 0, 31, 0, 3, 0, 88, 0, 74, 0, 38, 0, 120, 0, 102, 0, 24, 0, 32, 0, 74, 0, 44, 0, 3, 0, 74, 0, 26, 0, 3, 0, 50, 0, 3, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 88, 0, 59, 0, 25, 0, 3, 0, 102, 0, 34, 0, 3, 0, 24, 0, 120, 0, 14, 0, 74, 0, 32, 0, 3, 0, 50, 0, 28, 0, 120, 0, 74, 0, 88, 0, 74, 0, 44, 0, 3, 0, 74, 0, 26, 0, 41, 0, 59, 0, 3, 0, 31, 0, 23, 0, 120, 0, 14, 0, 74, 0, 10, 0, 2]} +{"text": "It takes the form of a multi-colored circular arc.", "phonemes": ["ɪ", "t", " ", "t", "ˈ", "e", "ɪ", "k", "s", " ", "ð", "ə", " ", "f", "ˈ", "ɔ", "ː", "ɹ", "m", " ", "ə", "v", "ə", " ", "m", "ˈ", "ʌ", "l", "t", "a", "ɪ", "k", "ˈ", "ʌ", "l", "ɚ", "d", " ", "s", "ˈ", "ɜ", "ː", "k", "j", "ʊ", "l", "ɚ", "ɹ", " ", "ˈ", "ɑ", "ː", "ɹ", "k", "."], "phoneme_ids": [1, 0, 74, 0, 32, 0, 3, 0, 32, 0, 120, 0, 18, 0, 74, 0, 23, 0, 31, 0, 3, 0, 41, 0, 59, 0, 3, 0, 19, 0, 120, 0, 54, 0, 122, 0, 88, 0, 25, 0, 3, 0, 59, 0, 34, 0, 59, 0, 3, 0, 25, 0, 120, 0, 102, 0, 24, 0, 32, 0, 14, 0, 74, 0, 23, 0, 120, 0, 102, 0, 24, 0, 60, 0, 17, 0, 3, 0, 31, 0, 120, 0, 62, 0, 122, 0, 23, 0, 22, 0, 100, 0, 24, 0, 60, 0, 88, 0, 3, 0, 120, 0, 51, 0, 122, 0, 88, 0, 23, 0, 10, 0, 2]} +{"text": "Rainbows caused by sunlight always appear in the section of sky directly opposite the Sun.", "phonemes": ["ɹ", "ˈ", "e", "ɪ", "n", "b", "o", "ʊ", "z", " ", "k", "ˈ", "ɔ", "ː", "z", "d", " ", "b", "a", "ɪ", " ", "s", "ˈ", "ʌ", "n", "l", "a", "ɪ", "t", " ", "ˈ", "ɔ", "ː", "l", "w", "e", "ɪ", "z", " ", "ɐ", "p", "ˈ", "ɪ", "ɹ", " ", "ɪ", "n", "ð", "ə", " ", "s", "ˈ", "ɛ", "k", "ʃ", "ə", "n", " ", "ʌ", "v", " ", "s", "k", "ˈ", "a", "ɪ", " ", "d", "ᵻ", "ɹ", "ˈ", "ɛ", "k", "t", "l", "i", " ", "ˈ", "ɑ", "ː", "p", "ə", "z", "ˌ", "ɪ", "t", " ", "ð", "ə", " ", "s", "ˈ", "ʌ", "n", "."], "phoneme_ids": [1, 0, 88, 0, 120, 0, 18, 0, 74, 0, 26, 0, 15, 0, 27, 0, 100, 0, 38, 0, 3, 0, 23, 0, 120, 0, 54, 0, 122, 0, 38, 0, 17, 0, 3, 0, 15, 0, 14, 0, 74, 0, 3, 0, 31, 0, 120, 0, 102, 0, 26, 0, 24, 0, 14, 0, 74, 0, 32, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 35, 0, 18, 0, 74, 0, 38, 0, 3, 0, 50, 0, 28, 0, 120, 0, 74, 0, 88, 0, 3, 0, 74, 0, 26, 0, 41, 0, 59, 0, 3, 0, 31, 0, 120, 0, 61, 0, 23, 0, 96, 0, 59, 0, 26, 0, 3, 0, 102, 0, 34, 0, 3, 0, 31, 0, 23, 0, 120, 0, 14, 0, 74, 0, 3, 0, 17, 0, 128, 0, 88, 0, 120, 0, 61, 0, 23, 0, 32, 0, 24, 0, 21, 0, 3, 0, 120, 0, 51, 0, 122, 0, 28, 0, 59, 0, 38, 0, 121, 0, 74, 0, 32, 0, 3, 0, 41, 0, 59, 0, 3, 0, 31, 0, 120, 0, 102, 0, 26, 0, 10, 0, 2]} +{"text": "With tenure, Suzie’d have all the more leisure for yachting, but her publications are no good.", "phonemes": ["w", "ɪ", "ð", " ", "t", "ˈ", "ɛ", "n", "j", "ɚ", ",", " ", "s", "ˈ", "u", "ː", "z", "i", "d", " ", "h", "æ", "v", " ", "ˈ", "ɔ", "ː", "l", " ", "ð", "ə", " ", "m", "ˈ", "o", "ː", "ɹ", " ", "l", "ˈ", "i", "ː", "ʒ", "ɚ", " ", "f", "ɔ", "ː", "ɹ", " ", "j", "ˈ", "ɑ", "ː", "ɾ", "ɪ", "ŋ", ",", " ", "b", "ˌ", "ʌ", "t", " ", "h", "ɜ", "ː", " ", "p", "ˌ", "ʌ", "b", "l", "ɪ", "k", "ˈ", "e", "ɪ", "ʃ", "ə", "n", "z", " ", "ɑ", "ː", "ɹ", " ", "n", "ˈ", "o", "ʊ", " ", "ɡ", "ˈ", "ʊ", "d", "."], "phoneme_ids": [1, 0, 35, 0, 74, 0, 41, 0, 3, 0, 32, 0, 120, 0, 61, 0, 26, 0, 22, 0, 60, 0, 8, 0, 3, 0, 31, 0, 120, 0, 33, 0, 122, 0, 38, 0, 21, 0, 17, 0, 3, 0, 20, 0, 39, 0, 34, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 3, 0, 41, 0, 59, 0, 3, 0, 25, 0, 120, 0, 27, 0, 122, 0, 88, 0, 3, 0, 24, 0, 120, 0, 21, 0, 122, 0, 108, 0, 60, 0, 3, 0, 19, 0, 54, 0, 122, 0, 88, 0, 3, 0, 22, 0, 120, 0, 51, 0, 122, 0, 92, 0, 74, 0, 44, 0, 8, 0, 3, 0, 15, 0, 121, 0, 102, 0, 32, 0, 3, 0, 20, 0, 62, 0, 122, 0, 3, 0, 28, 0, 121, 0, 102, 0, 15, 0, 24, 0, 74, 0, 23, 0, 120, 0, 18, 0, 74, 0, 96, 0, 59, 0, 26, 0, 38, 0, 3, 0, 51, 0, 122, 0, 88, 0, 3, 0, 26, 0, 120, 0, 27, 0, 100, 0, 3, 0, 66, 0, 120, 0, 100, 0, 17, 0, 10, 0, 2]} +{"text": "Shaw, those twelve beige hooks are joined if I patch a young, gooey mouth.", "phonemes": ["ʃ", "ˈ", "ɔ", "ː", ",", " ", "ð", "o", "ʊ", "z", " ", "t", "w", "ˈ", "ɛ", "l", "v", " ", "b", "ˈ", "e", "ɪ", "ʒ", " ", "h", "ˈ", "ʊ", "k", "s", " ", "ɑ", "ː", "ɹ", " ", "d", "ʒ", "ˈ", "ɔ", "ɪ", "n", "d", " ", "ɪ", "f", " ", "a", "ɪ", " ", "p", "ˈ", "æ", "t", "ʃ", " ", "ɐ", " ", "j", "ˈ", "ʌ", "ŋ", ",", " ", "ɡ", "ˈ", "u", "ː", "i", " ", "m", "ˈ", "a", "ʊ", "θ", "."], "phoneme_ids": [1, 0, 96, 0, 120, 0, 54, 0, 122, 0, 8, 0, 3, 0, 41, 0, 27, 0, 100, 0, 38, 0, 3, 0, 32, 0, 35, 0, 120, 0, 61, 0, 24, 0, 34, 0, 3, 0, 15, 0, 120, 0, 18, 0, 74, 0, 108, 0, 3, 0, 20, 0, 120, 0, 100, 0, 23, 0, 31, 0, 3, 0, 51, 0, 122, 0, 88, 0, 3, 0, 17, 0, 108, 0, 120, 0, 54, 0, 74, 0, 26, 0, 17, 0, 3, 0, 74, 0, 19, 0, 3, 0, 14, 0, 74, 0, 3, 0, 28, 0, 120, 0, 39, 0, 32, 0, 96, 0, 3, 0, 50, 0, 3, 0, 22, 0, 120, 0, 102, 0, 44, 0, 8, 0, 3, 0, 66, 0, 120, 0, 33, 0, 122, 0, 21, 0, 3, 0, 25, 0, 120, 0, 14, 0, 100, 0, 126, 0, 10, 0, 2]} +{"text": "Are those shy Eurasian footwear, cowboy chaps, or jolly earthmoving headgear?", "phonemes": ["ɑ", "ː", "ɹ", " ", "ð", "o", "ʊ", "z", " ", "ʃ", "ˈ", "a", "ɪ", " ", "j", "u", "ː", "ɹ", "ˈ", "e", "ɪ", "ʒ", "ə", "n", " ", "f", "ˈ", "ʊ", "t", "w", "ɛ", "ɹ", ",", " ", "k", "ˈ", "a", "ʊ", "b", "ɔ", "ɪ", " ", "t", "ʃ", "ˈ", "æ", "p", "s", ",", " ", "ɔ", "ː", "ɹ", " ", "d", "ʒ", "ˈ", "ɑ", "ː", "l", "i", " ", "ˈ", "ɜ", "ː", "θ", "m", "u", "ː", "v", "ɪ", "ŋ", " ", "h", "ˈ", "ɛ", "d", "ɡ", "ɪ", "ɹ", "?"], "phoneme_ids": [1, 0, 51, 0, 122, 0, 88, 0, 3, 0, 41, 0, 27, 0, 100, 0, 38, 0, 3, 0, 96, 0, 120, 0, 14, 0, 74, 0, 3, 0, 22, 0, 33, 0, 122, 0, 88, 0, 120, 0, 18, 0, 74, 0, 108, 0, 59, 0, 26, 0, 3, 0, 19, 0, 120, 0, 100, 0, 32, 0, 35, 0, 61, 0, 88, 0, 8, 0, 3, 0, 23, 0, 120, 0, 14, 0, 100, 0, 15, 0, 54, 0, 74, 0, 3, 0, 32, 0, 96, 0, 120, 0, 39, 0, 28, 0, 31, 0, 8, 0, 3, 0, 54, 0, 122, 0, 88, 0, 3, 0, 17, 0, 108, 0, 120, 0, 51, 0, 122, 0, 24, 0, 21, 0, 3, 0, 120, 0, 62, 0, 122, 0, 126, 0, 25, 0, 33, 0, 122, 0, 34, 0, 74, 0, 44, 0, 3, 0, 20, 0, 120, 0, 61, 0, 17, 0, 66, 0, 74, 0, 88, 0, 13, 0, 2]} +{"text": "The beige hue on the waters of the loch impressed all, including the French queen, before she heard that symphony again, just as young Arthur wanted.", "phonemes": ["ð", "ə", " ", "b", "ˈ", "e", "ɪ", "ʒ", " ", "h", "j", "ˈ", "u", "ː", " ", "ɔ", "n", "ð", "ə", " ", "w", "ˈ", "ɔ", "ː", "ɾ", "ɚ", "z", " ", "ʌ", "v", "ð", "ə", " ", "l", "ˈ", "ɑ", "ː", "x", " ", "ɪ", "m", "p", "ɹ", "ˈ", "ɛ", "s", "t", " ", "ˈ", "ɔ", "ː", "l", ",", " ", "ɪ", "ŋ", "k", "l", "ˈ", "u", "ː", "d", "ɪ", "ŋ", " ", "ð", "ə", " ", "f", "ɹ", "ˈ", "ɛ", "n", "t", "ʃ", " ", "k", "w", "ˈ", "i", "ː", "n", ",", " ", "b", "ᵻ", "f", "ˌ", "o", "ː", "ɹ", " ", "ʃ", "i", "ː", " ", "h", "ˈ", "ɜ", "ː", "d", " ", "ð", "æ", "t", " ", "s", "ˈ", "ɪ", "m", "f", "ə", "n", "i", " ", "ɐ", "ɡ", "ˈ", "ɛ", "n", ",", " ", "d", "ʒ", "ˈ", "ʌ", "s", "t", " ", "æ", "z", " ", "j", "ˈ", "ʌ", "ŋ", " ", "ˈ", "ɑ", "ː", "ɹ", "θ", "ɚ", " ", "w", "ˈ", "ɔ", "n", "t", "ᵻ", "d", "."], "phoneme_ids": [1, 0, 41, 0, 59, 0, 3, 0, 15, 0, 120, 0, 18, 0, 74, 0, 108, 0, 3, 0, 20, 0, 22, 0, 120, 0, 33, 0, 122, 0, 3, 0, 54, 0, 26, 0, 41, 0, 59, 0, 3, 0, 35, 0, 120, 0, 54, 0, 122, 0, 92, 0, 60, 0, 38, 0, 3, 0, 102, 0, 34, 0, 41, 0, 59, 0, 3, 0, 24, 0, 120, 0, 51, 0, 122, 0, 36, 0, 3, 0, 74, 0, 25, 0, 28, 0, 88, 0, 120, 0, 61, 0, 31, 0, 32, 0, 3, 0, 120, 0, 54, 0, 122, 0, 24, 0, 8, 0, 3, 0, 74, 0, 44, 0, 23, 0, 24, 0, 120, 0, 33, 0, 122, 0, 17, 0, 74, 0, 44, 0, 3, 0, 41, 0, 59, 0, 3, 0, 19, 0, 88, 0, 120, 0, 61, 0, 26, 0, 32, 0, 96, 0, 3, 0, 23, 0, 35, 0, 120, 0, 21, 0, 122, 0, 26, 0, 8, 0, 3, 0, 15, 0, 128, 0, 19, 0, 121, 0, 27, 0, 122, 0, 88, 0, 3, 0, 96, 0, 21, 0, 122, 0, 3, 0, 20, 0, 120, 0, 62, 0, 122, 0, 17, 0, 3, 0, 41, 0, 39, 0, 32, 0, 3, 0, 31, 0, 120, 0, 74, 0, 25, 0, 19, 0, 59, 0, 26, 0, 21, 0, 3, 0, 50, 0, 66, 0, 120, 0, 61, 0, 26, 0, 8, 0, 3, 0, 17, 0, 108, 0, 120, 0, 102, 0, 31, 0, 32, 0, 3, 0, 39, 0, 38, 0, 3, 0, 22, 0, 120, 0, 102, 0, 44, 0, 3, 0, 120, 0, 51, 0, 122, 0, 88, 0, 126, 0, 60, 0, 3, 0, 35, 0, 120, 0, 54, 0, 26, 0, 32, 0, 128, 0, 17, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_es.jsonl b/etc/test_sentences/test_es.jsonl new file mode 100644 index 0000000..6a89525 --- /dev/null +++ b/etc/test_sentences/test_es.jsonl @@ -0,0 +1,6 @@ +{"text": "Un arcoíris​ o arco iris es un fenómeno óptico y meteorológico que consiste en la aparición en el cielo de un arco de luz multicolor, originado por la descomposición de la luz solar en el espectro visible, la cual se produce por refracción, cuando los rayos del sol atraviesan pequeñas gotas de agua contenidas en la atmósfera terrestre.", "phonemes": ["ˈ", "u", "n", " ", "ˌ", "a", "ɾ", "k", "o", "ˈ", "i", "ɾ", "i", "s", " ", "o", " ", "ˈ", "a", "ɾ", "k", "o", " ", "ˈ", "i", "ɾ", "i", "s", " ", "ˈ", "e", "s", " ", "ˈ", "u", "n", " ", "f", "e", "n", "ˈ", "o", "m", "e", "n", "o", " ", "ˈ", "o", "p", "ː", "t", "i", "k", "o", " ", "i", " ", "m", "ˌ", "e", "t", "e", "ˌ", "o", "ɾ", "o", "l", "ˈ", "o", "x", "i", "k", "o", " ", "k", "e", " ", "k", "o", "n", "s", "ˈ", "i", "s", "t", "e", " ", "e", "n", " ", "l", "a", " ", "ˌ", "a", "p", "a", "ɾ", "i", "θ", "j", "ˈ", "o", "n", " ", "e", "n", " ", "e", "l", " ", "θ", "j", "ˈ", "e", "l", "o", " ", "ð", "e", " ", "ˈ", "u", "n", " ", "ˈ", "a", "ɾ", "k", "o", " ", "ð", "e", " ", "l", "ˈ", "u", "θ", " ", "m", "ˌ", "u", "l", "t", "i", "k", "o", "l", "ˈ", "o", "ɾ", ",", " ", "ˌ", "o", "ɾ", "i", "x", "i", "n", "ˈ", "a", "ð", "o", " ", "p", "o", "ɾ", " ", "l", "a", " ", "ð", "ˌ", "e", "s", "k", "o", "m", "p", "ˌ", "o", "s", "i", "θ", "j", "ˈ", "o", "n", " ", "d", "e", " ", "l", "a", " ", "l", "ˈ", "u", "θ", " ", "s", "o", "l", "ˈ", "a", "ɾ", " ", "e", "n", " ", "e", "l", " ", "e", "s", "p", "ˈ", "e", "k", "t", "ɾ", "o", " ", "β", "i", "s", "ˈ", "i", "β", "l", "e", ",", " ", "l", "a", " ", "k", "w", "ˈ", "a", "l", " ", "s", "e", " ", "p", "ɾ", "o", "ð", "ˈ", "u", "θ", "e", " ", "p", "o", "ɾ", " ", "r", "ˌ", "e", "f", "ɾ", "a", "k", "θ", "j", "ˈ", "o", "n", ",", " ", "k", "w", "ˌ", "a", "n", "d", "o", " ", "l", "o", "s", " ", "r", "ˈ", "a", "ʝ", "o", "s", " ", "ð", "e", "l", " ", "s", "ˈ", "o", "l", " ", "ˌ", "a", "t", "ɾ", "a", "β", "j", "ˈ", "e", "s", "a", "m", " ", "p", "e", "k", "ˈ", "e", "ɲ", "a", "s", " ", "ɣ", "ˈ", "o", "t", "a", "s", " ", "ð", "e", " ", "ˈ", "a", "ɣ", "w", "a", " ", "k", "ˌ", "o", "n", "t", "e", "n", "ˈ", "i", "ð", "a", "s", " ", "e", "n", " ", "l", "a", " ", "a", "t", "m", "ˈ", "o", "s", "f", "e", "ɾ", "a", " ", "t", "e", "r", "ˈ", "e", "s", "t", "ɾ", "e", "."], "phoneme_ids": [1, 0, 120, 0, 33, 0, 26, 0, 3, 0, 121, 0, 14, 0, 92, 0, 23, 0, 27, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 3, 0, 27, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 27, 0, 3, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 3, 0, 120, 0, 18, 0, 31, 0, 3, 0, 120, 0, 33, 0, 26, 0, 3, 0, 19, 0, 18, 0, 26, 0, 120, 0, 27, 0, 25, 0, 18, 0, 26, 0, 27, 0, 3, 0, 120, 0, 27, 0, 28, 0, 122, 0, 32, 0, 21, 0, 23, 0, 27, 0, 3, 0, 21, 0, 3, 0, 25, 0, 121, 0, 18, 0, 32, 0, 18, 0, 121, 0, 27, 0, 92, 0, 27, 0, 24, 0, 120, 0, 27, 0, 36, 0, 21, 0, 23, 0, 27, 0, 3, 0, 23, 0, 18, 0, 3, 0, 23, 0, 27, 0, 26, 0, 31, 0, 120, 0, 21, 0, 31, 0, 32, 0, 18, 0, 3, 0, 18, 0, 26, 0, 3, 0, 24, 0, 14, 0, 3, 0, 121, 0, 14, 0, 28, 0, 14, 0, 92, 0, 21, 0, 126, 0, 22, 0, 120, 0, 27, 0, 26, 0, 3, 0, 18, 0, 26, 0, 3, 0, 18, 0, 24, 0, 3, 0, 126, 0, 22, 0, 120, 0, 18, 0, 24, 0, 27, 0, 3, 0, 41, 0, 18, 0, 3, 0, 120, 0, 33, 0, 26, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 27, 0, 3, 0, 41, 0, 18, 0, 3, 0, 24, 0, 120, 0, 33, 0, 126, 0, 3, 0, 25, 0, 121, 0, 33, 0, 24, 0, 32, 0, 21, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 8, 0, 3, 0, 121, 0, 27, 0, 92, 0, 21, 0, 36, 0, 21, 0, 26, 0, 120, 0, 14, 0, 41, 0, 27, 0, 3, 0, 28, 0, 27, 0, 92, 0, 3, 0, 24, 0, 14, 0, 3, 0, 41, 0, 121, 0, 18, 0, 31, 0, 23, 0, 27, 0, 25, 0, 28, 0, 121, 0, 27, 0, 31, 0, 21, 0, 126, 0, 22, 0, 120, 0, 27, 0, 26, 0, 3, 0, 17, 0, 18, 0, 3, 0, 24, 0, 14, 0, 3, 0, 24, 0, 120, 0, 33, 0, 126, 0, 3, 0, 31, 0, 27, 0, 24, 0, 120, 0, 14, 0, 92, 0, 3, 0, 18, 0, 26, 0, 3, 0, 18, 0, 24, 0, 3, 0, 18, 0, 31, 0, 28, 0, 120, 0, 18, 0, 23, 0, 32, 0, 92, 0, 27, 0, 3, 0, 125, 0, 21, 0, 31, 0, 120, 0, 21, 0, 125, 0, 24, 0, 18, 0, 8, 0, 3, 0, 24, 0, 14, 0, 3, 0, 23, 0, 35, 0, 120, 0, 14, 0, 24, 0, 3, 0, 31, 0, 18, 0, 3, 0, 28, 0, 92, 0, 27, 0, 41, 0, 120, 0, 33, 0, 126, 0, 18, 0, 3, 0, 28, 0, 27, 0, 92, 0, 3, 0, 30, 0, 121, 0, 18, 0, 19, 0, 92, 0, 14, 0, 23, 0, 126, 0, 22, 0, 120, 0, 27, 0, 26, 0, 8, 0, 3, 0, 23, 0, 35, 0, 121, 0, 14, 0, 26, 0, 17, 0, 27, 0, 3, 0, 24, 0, 27, 0, 31, 0, 3, 0, 30, 0, 120, 0, 14, 0, 115, 0, 27, 0, 31, 0, 3, 0, 41, 0, 18, 0, 24, 0, 3, 0, 31, 0, 120, 0, 27, 0, 24, 0, 3, 0, 121, 0, 14, 0, 32, 0, 92, 0, 14, 0, 125, 0, 22, 0, 120, 0, 18, 0, 31, 0, 14, 0, 25, 0, 3, 0, 28, 0, 18, 0, 23, 0, 120, 0, 18, 0, 82, 0, 14, 0, 31, 0, 3, 0, 68, 0, 120, 0, 27, 0, 32, 0, 14, 0, 31, 0, 3, 0, 41, 0, 18, 0, 3, 0, 120, 0, 14, 0, 68, 0, 35, 0, 14, 0, 3, 0, 23, 0, 121, 0, 27, 0, 26, 0, 32, 0, 18, 0, 26, 0, 120, 0, 21, 0, 41, 0, 14, 0, 31, 0, 3, 0, 18, 0, 26, 0, 3, 0, 24, 0, 14, 0, 3, 0, 14, 0, 32, 0, 25, 0, 120, 0, 27, 0, 31, 0, 19, 0, 18, 0, 92, 0, 14, 0, 3, 0, 32, 0, 18, 0, 30, 0, 120, 0, 18, 0, 31, 0, 32, 0, 92, 0, 18, 0, 10, 0, 2]} +{"text": "Es un arco compuesto de arcos concéntricos de colores, sin solución de continuidad entre ellos, con el rojo hacia la parte exterior y el violeta hacia el interior.", "phonemes": ["ˈ", "e", "s", " ", "ˈ", "u", "n", " ", "ˈ", "a", "ɾ", "k", "o", " ", "k", "o", "m", "p", "w", "ˈ", "e", "s", "t", "o", " ", "ð", "e", " ", "ˈ", "a", "ɾ", "k", "o", "s", " ", "k", "o", "n", "θ", "ˈ", "e", "n", "t", "ɾ", "i", "k", "o", "s", " ", "ð", "e", " ", "k", "o", "l", "ˈ", "o", "ɾ", "e", "s", ",", " ", "s", "i", "n", " ", "s", "ˌ", "o", "l", "u", "θ", "j", "ˈ", "o", "n", " ", "d", "e", " ", "k", "ˌ", "o", "n", "t", "i", "n", "w", "i", "ð", "ˈ", "a", "ð", " ", "ˌ", "e", "n", "t", "ɾ", "e", " ", "ˈ", "e", "ʎ", "o", "s", ",", " ", "k", "o", "n", " ", "e", "l", " ", "r", "ˈ", "o", "x", "o", " ", "ˌ", "a", "θ", "j", "a", " ", "l", "a", " ", "p", "ˈ", "a", "ɾ", "t", "e", " ", "ˌ", "e", "k", "s", "t", "e", "ɾ", "j", "ˈ", "o", "ɾ", " ", "i", " ", "e", "l", " ", "β", "j", "o", "l", "ˈ", "e", "t", "a", " ", "ˌ", "a", "θ", "j", "a", " ", "e", "l", " ", "ˌ", "i", "n", "t", "e", "ɾ", "j", "ˈ", "o", "ɾ", "."], "phoneme_ids": [1, 0, 120, 0, 18, 0, 31, 0, 3, 0, 120, 0, 33, 0, 26, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 27, 0, 3, 0, 23, 0, 27, 0, 25, 0, 28, 0, 35, 0, 120, 0, 18, 0, 31, 0, 32, 0, 27, 0, 3, 0, 41, 0, 18, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 27, 0, 31, 0, 3, 0, 23, 0, 27, 0, 26, 0, 126, 0, 120, 0, 18, 0, 26, 0, 32, 0, 92, 0, 21, 0, 23, 0, 27, 0, 31, 0, 3, 0, 41, 0, 18, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 18, 0, 31, 0, 8, 0, 3, 0, 31, 0, 21, 0, 26, 0, 3, 0, 31, 0, 121, 0, 27, 0, 24, 0, 33, 0, 126, 0, 22, 0, 120, 0, 27, 0, 26, 0, 3, 0, 17, 0, 18, 0, 3, 0, 23, 0, 121, 0, 27, 0, 26, 0, 32, 0, 21, 0, 26, 0, 35, 0, 21, 0, 41, 0, 120, 0, 14, 0, 41, 0, 3, 0, 121, 0, 18, 0, 26, 0, 32, 0, 92, 0, 18, 0, 3, 0, 120, 0, 18, 0, 104, 0, 27, 0, 31, 0, 8, 0, 3, 0, 23, 0, 27, 0, 26, 0, 3, 0, 18, 0, 24, 0, 3, 0, 30, 0, 120, 0, 27, 0, 36, 0, 27, 0, 3, 0, 121, 0, 14, 0, 126, 0, 22, 0, 14, 0, 3, 0, 24, 0, 14, 0, 3, 0, 28, 0, 120, 0, 14, 0, 92, 0, 32, 0, 18, 0, 3, 0, 121, 0, 18, 0, 23, 0, 31, 0, 32, 0, 18, 0, 92, 0, 22, 0, 120, 0, 27, 0, 92, 0, 3, 0, 21, 0, 3, 0, 18, 0, 24, 0, 3, 0, 125, 0, 22, 0, 27, 0, 24, 0, 120, 0, 18, 0, 32, 0, 14, 0, 3, 0, 121, 0, 14, 0, 126, 0, 22, 0, 14, 0, 3, 0, 18, 0, 24, 0, 3, 0, 121, 0, 21, 0, 26, 0, 32, 0, 18, 0, 92, 0, 22, 0, 120, 0, 27, 0, 92, 0, 10, 0, 2]} +{"text": "A altitud suficiente, por ejemplo cuando se viaja en avión, el arcoíris se puede observar en forma de círculo completo.", "phonemes": ["a", " ", "ˌ", "a", "l", "t", "i", "t", "ˈ", "u", "d", " ", "s", "ˌ", "u", "f", "i", "θ", "j", "ˈ", "ɛ", "n", "t", "e", ",", " ", "p", "o", "ɾ", " ", "e", "x", "ˈ", "e", "m", "p", "l", "o", " ", "k", "w", "ˌ", "a", "n", "d", "o", " ", "s", "e", " ", "β", "j", "ˈ", "a", "x", "a", " ", "e", "n", " ", "a", "β", "j", "ˈ", "o", "n", ",", " ", "e", "l", " ", "ˌ", "a", "ɾ", "k", "o", "ˈ", "i", "ɾ", "i", "s", " ", "s", "e", " ", "p", "w", "ˈ", "e", "ð", "e", " ", "ˌ", "o", "β", "s", "e", "ɾ", "β", "ˈ", "a", "ɾ", " ", "e", "n", " ", "f", "ˈ", "o", "ɾ", "m", "a", " ", "ð", "e", " ", "θ", "ˈ", "i", "ɾ", "k", "u", "l", "o", " ", "k", "o", "m", "p", "l", "ˈ", "e", "t", "o", "."], "phoneme_ids": [1, 0, 14, 0, 3, 0, 121, 0, 14, 0, 24, 0, 32, 0, 21, 0, 32, 0, 120, 0, 33, 0, 17, 0, 3, 0, 31, 0, 121, 0, 33, 0, 19, 0, 21, 0, 126, 0, 22, 0, 120, 0, 61, 0, 26, 0, 32, 0, 18, 0, 8, 0, 3, 0, 28, 0, 27, 0, 92, 0, 3, 0, 18, 0, 36, 0, 120, 0, 18, 0, 25, 0, 28, 0, 24, 0, 27, 0, 3, 0, 23, 0, 35, 0, 121, 0, 14, 0, 26, 0, 17, 0, 27, 0, 3, 0, 31, 0, 18, 0, 3, 0, 125, 0, 22, 0, 120, 0, 14, 0, 36, 0, 14, 0, 3, 0, 18, 0, 26, 0, 3, 0, 14, 0, 125, 0, 22, 0, 120, 0, 27, 0, 26, 0, 8, 0, 3, 0, 18, 0, 24, 0, 3, 0, 121, 0, 14, 0, 92, 0, 23, 0, 27, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 3, 0, 31, 0, 18, 0, 3, 0, 28, 0, 35, 0, 120, 0, 18, 0, 41, 0, 18, 0, 3, 0, 121, 0, 27, 0, 125, 0, 31, 0, 18, 0, 92, 0, 125, 0, 120, 0, 14, 0, 92, 0, 3, 0, 18, 0, 26, 0, 3, 0, 19, 0, 120, 0, 27, 0, 92, 0, 25, 0, 14, 0, 3, 0, 41, 0, 18, 0, 3, 0, 126, 0, 120, 0, 21, 0, 92, 0, 23, 0, 33, 0, 24, 0, 27, 0, 3, 0, 23, 0, 27, 0, 25, 0, 28, 0, 24, 0, 120, 0, 18, 0, 32, 0, 27, 0, 10, 0, 2]} +{"text": "Benjamín pidió una bebida de kiwi y fresa; Noé, sin vergüenza, la más exquisita champaña del menú.", "phonemes": ["b", "ˌ", "e", "ŋ", "x", "a", "m", "ˈ", "i", "m", " ", "p", "i", "ð", "j", "ˈ", "o", " ", "ˈ", "u", "n", "a", " ", "β", "e", "β", "ˈ", "i", "ð", "a", " ", "ð", "e", " ", "k", "ˈ", "i", "w", "i", " ", "i", " ", "f", "ɾ", "ˈ", "e", "s", "a", ";", " ", "n", "o", "ˈ", "e", ",", " ", "s", "i", "m", " ", "b", "ˌ", "e", "ɾ", "ɣ", "u", "ˈ", "ɛ", "n", "θ", "a", ",", " ", "l", "a", " ", "m", "ˈ", "a", "s", " ", "ˌ", "e", "k", "s", "k", "i", "s", "ˈ", "i", "t", "a", " ", "t", "ʃ", "a", "m", "p", "ˈ", "a", "ɲ", "a", " ", "ð", "e", "l", " ", "m", "e", "n", "ˈ", "u", "."], "phoneme_ids": [1, 0, 15, 0, 121, 0, 18, 0, 44, 0, 36, 0, 14, 0, 25, 0, 120, 0, 21, 0, 25, 0, 3, 0, 28, 0, 21, 0, 41, 0, 22, 0, 120, 0, 27, 0, 3, 0, 120, 0, 33, 0, 26, 0, 14, 0, 3, 0, 125, 0, 18, 0, 125, 0, 120, 0, 21, 0, 41, 0, 14, 0, 3, 0, 41, 0, 18, 0, 3, 0, 23, 0, 120, 0, 21, 0, 35, 0, 21, 0, 3, 0, 21, 0, 3, 0, 19, 0, 92, 0, 120, 0, 18, 0, 31, 0, 14, 0, 12, 0, 3, 0, 26, 0, 27, 0, 120, 0, 18, 0, 8, 0, 3, 0, 31, 0, 21, 0, 25, 0, 3, 0, 15, 0, 121, 0, 18, 0, 92, 0, 68, 0, 33, 0, 120, 0, 61, 0, 26, 0, 126, 0, 14, 0, 8, 0, 3, 0, 24, 0, 14, 0, 3, 0, 25, 0, 120, 0, 14, 0, 31, 0, 3, 0, 121, 0, 18, 0, 23, 0, 31, 0, 23, 0, 21, 0, 31, 0, 120, 0, 21, 0, 32, 0, 14, 0, 3, 0, 32, 0, 96, 0, 14, 0, 25, 0, 28, 0, 120, 0, 14, 0, 82, 0, 14, 0, 3, 0, 41, 0, 18, 0, 24, 0, 3, 0, 25, 0, 18, 0, 26, 0, 120, 0, 33, 0, 10, 0, 2]} +{"text": "José compró una vieja zampoña en Perú. Excusándose, Sofía tiró su whisky al desagüe de la banqueta.", "phonemes": ["x", "o", "s", "ˈ", "e", " ", "k", "o", "m", "p", "ɾ", "ˈ", "o", " ", "ˈ", "u", "n", "a", " ", "β", "j", "ˈ", "e", "x", "a", " ", "θ", "a", "m", "p", "ˈ", "o", "ɲ", "a", " ", "e", "m", " ", "p", "e", "ɾ", "ˈ", "u", ".", " ", "ˌ", "e", "k", "s", "k", "u", "s", "ˈ", "a", "n", "d", "o", "s", "e", ",", " ", "s", "o", "f", "ˈ", "i", "a", " ", "t", "i", "ɾ", "ˈ", "o", " ", "s", "u", " ", "w", "ˈ", "i", "s", "k", "i", " ", "a", "l", " ", "ð", "ˌ", "e", "s", "a", "ɣ", "ˈ", "u", "e", " ", "ð", "e", " ", "l", "a", " ", "β", "a", "n", "k", "ˈ", "e", "t", "a", "."], "phoneme_ids": [1, 0, 36, 0, 27, 0, 31, 0, 120, 0, 18, 0, 3, 0, 23, 0, 27, 0, 25, 0, 28, 0, 92, 0, 120, 0, 27, 0, 3, 0, 120, 0, 33, 0, 26, 0, 14, 0, 3, 0, 125, 0, 22, 0, 120, 0, 18, 0, 36, 0, 14, 0, 3, 0, 126, 0, 14, 0, 25, 0, 28, 0, 120, 0, 27, 0, 82, 0, 14, 0, 3, 0, 18, 0, 25, 0, 3, 0, 28, 0, 18, 0, 92, 0, 120, 0, 33, 0, 10, 0, 3, 0, 121, 0, 18, 0, 23, 0, 31, 0, 23, 0, 33, 0, 31, 0, 120, 0, 14, 0, 26, 0, 17, 0, 27, 0, 31, 0, 18, 0, 8, 0, 3, 0, 31, 0, 27, 0, 19, 0, 120, 0, 21, 0, 14, 0, 3, 0, 32, 0, 21, 0, 92, 0, 120, 0, 27, 0, 3, 0, 31, 0, 33, 0, 3, 0, 35, 0, 120, 0, 21, 0, 31, 0, 23, 0, 21, 0, 3, 0, 14, 0, 24, 0, 3, 0, 41, 0, 121, 0, 18, 0, 31, 0, 14, 0, 68, 0, 120, 0, 33, 0, 18, 0, 3, 0, 41, 0, 18, 0, 3, 0, 24, 0, 14, 0, 3, 0, 125, 0, 14, 0, 26, 0, 23, 0, 120, 0, 18, 0, 32, 0, 14, 0, 10, 0, 2]} +{"text": "El veloz murciélago hindú comía feliz cardillo y kiwi. La cigüeña tocaba el saxofón detrás del palenque de paja.", "phonemes": ["e", "l", " ", "β", "e", "l", "ˈ", "o", "θ", " ", "m", "u", "ɾ", "θ", "j", "ˈ", "e", "l", "a", "ɣ", "o", " ", "i", "n", "d", "ˈ", "u", " ", "k", "o", "m", "ˈ", "i", "a", " ", "f", "e", "l", "ˈ", "i", "θ", " ", "k", "a", "ɾ", "ð", "ˈ", "i", "ʎ", "o", " ", "i", " ", "k", "ˈ", "i", "w", "i", ".", " ", "l", "a", " ", "θ", "ˌ", "i", "ɣ", "u", "ˈ", "e", "ɲ", "a", " ", "t", "o", "k", "ˈ", "a", "β", "a", " ", "e", "l", " ", "s", "ˌ", "a", "k", "s", "o", "f", "ˈ", "o", "n", " ", "d", "e", "t", "ɾ", "ˈ", "a", "s", " ", "ð", "e", "l", " ", "p", "a", "l", "ˈ", "ɛ", "n", "k", "e", " ", "ð", "e", " ", "p", "ˈ", "a", "x", "a", "."], "phoneme_ids": [1, 0, 18, 0, 24, 0, 3, 0, 125, 0, 18, 0, 24, 0, 120, 0, 27, 0, 126, 0, 3, 0, 25, 0, 33, 0, 92, 0, 126, 0, 22, 0, 120, 0, 18, 0, 24, 0, 14, 0, 68, 0, 27, 0, 3, 0, 21, 0, 26, 0, 17, 0, 120, 0, 33, 0, 3, 0, 23, 0, 27, 0, 25, 0, 120, 0, 21, 0, 14, 0, 3, 0, 19, 0, 18, 0, 24, 0, 120, 0, 21, 0, 126, 0, 3, 0, 23, 0, 14, 0, 92, 0, 41, 0, 120, 0, 21, 0, 104, 0, 27, 0, 3, 0, 21, 0, 3, 0, 23, 0, 120, 0, 21, 0, 35, 0, 21, 0, 10, 0, 3, 0, 24, 0, 14, 0, 3, 0, 126, 0, 121, 0, 21, 0, 68, 0, 33, 0, 120, 0, 18, 0, 82, 0, 14, 0, 3, 0, 32, 0, 27, 0, 23, 0, 120, 0, 14, 0, 125, 0, 14, 0, 3, 0, 18, 0, 24, 0, 3, 0, 31, 0, 121, 0, 14, 0, 23, 0, 31, 0, 27, 0, 19, 0, 120, 0, 27, 0, 26, 0, 3, 0, 17, 0, 18, 0, 32, 0, 92, 0, 120, 0, 14, 0, 31, 0, 3, 0, 41, 0, 18, 0, 24, 0, 3, 0, 28, 0, 14, 0, 24, 0, 120, 0, 61, 0, 26, 0, 23, 0, 18, 0, 3, 0, 41, 0, 18, 0, 3, 0, 28, 0, 120, 0, 14, 0, 36, 0, 14, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_eu.jsonl b/etc/test_sentences/test_eu.jsonl new file mode 100644 index 0000000..9611cc4 --- /dev/null +++ b/etc/test_sentences/test_eu.jsonl @@ -0,0 +1,4 @@ +{"text": "Ostadarra, halaber Erromako zubia edo uztargia, gertaera optiko eta meteorologiko bat da, zeruan, jarraikako argi zerrenda bat eragiten duena, eguzkiaren izpiek Lurreko atmosferan aurkitzen diren hezetasun tanta txikiak zeharkatzen dituztenean.", "phonemes": ["o", "s", "̺", "t", "ˈ", "a", "ð", "a", "r", "ˌ", "a", ",", " ", "a", "l", "ˈ", "a", "β", "ˌ", "e", "ɾ", " ", "e", "r", "ˈ", "o", "m", "a", "k", "ˌ", "o", " ", "s", "̻", "u", "β", "ˈ", "i", "ˌ", "a", " ", "ˈ", "e", "ð", "o", " ", "u", "s", "̻", "t", "ˈ", "a", "ɾ", "ɣ", "i", "ˌ", "a", ",", " ", "ɡ", "e", "ɾ", "t", "ˈ", "a", "e", "ɾ", "ˌ", "a", " ", "o", "p", "t", "ˈ", "i", "k", "ˌ", "o", " ", "ˌ", "e", "t", "a", " ", "m", "e", "t", "ˈ", "e", "o", "ɾ", "o", "l", "o", "ɣ", "i", "k", "ˌ", "o", " ", "β", "ˈ", "a", "t", " ", "ð", "ˈ", "a", ",", " ", "s", "̻", "e", "ɾ", "ˈ", "u", "ˌ", "a", "n", ",", " ", "ɟ", "a", "r", "ˈ", "a", "ɪ", "k", "a", "k", "ˌ", "o", " ", "ˈ", "a", "ɾ", "ɣ", "i", " ", "s", "̻", "e", "r", "ˈ", "e", "n", "d", "ˌ", "a", " ", "β", "ˈ", "a", "t", " ", "e", "ɾ", "ˈ", "a", "ɣ", "i", "t", "ˌ", "e", "n", " ", "d", "u", "ˈ", "e", "n", "ˌ", "a", ",", " ", "e", "ɣ", "ˈ", "u", "s", "̻", "k", "i", "a", "ɾ", "ˌ", "e", "n", " ", "i", "s", "̻", "p", "ˈ", "i", "ˌ", "e", "k", " ", "l", "u", "r", "ˈ", "e", "k", "ˌ", "o", " ", "a", "t", "m", "ˈ", "o", "s", "̺", "f", "e", "ɾ", "ˌ", "a", "n", " ", "a", "ʊ", "ɾ", "k", "ˈ", "i", "t", "s", "̻", "ˌ", "e", "n", " ", "d", "i", "ɾ", "ˈ", "e", "n", " ", "e", "s", "̻", "ˈ", "e", "t", "a", "s", "̺", "ˌ", "u", "n", " ", "t", "ˈ", "a", "n", "t", "a", " ", "t", "ʃ", "i", "k", "ˈ", "i", "ˌ", "a", "k", " ", "s", "̻", "e", "ˈ", "a", "ɾ", "k", "a", "t", "s", "̻", "ˌ", "e", "n", " ", "d", "i", "t", "ˈ", "u", "s", "̻", "t", "e", "n", "e", "ˌ", "a", "n", "."], "phoneme_ids": [1, 0, 27, 0, 31, 0, 152, 0, 32, 0, 120, 0, 14, 0, 41, 0, 14, 0, 30, 0, 121, 0, 14, 0, 8, 0, 3, 0, 14, 0, 24, 0, 120, 0, 14, 0, 125, 0, 121, 0, 18, 0, 92, 0, 3, 0, 18, 0, 30, 0, 120, 0, 27, 0, 25, 0, 14, 0, 23, 0, 121, 0, 27, 0, 3, 0, 31, 0, 153, 0, 33, 0, 125, 0, 120, 0, 21, 0, 121, 0, 14, 0, 3, 0, 120, 0, 18, 0, 41, 0, 27, 0, 3, 0, 33, 0, 31, 0, 153, 0, 32, 0, 120, 0, 14, 0, 92, 0, 68, 0, 21, 0, 121, 0, 14, 0, 8, 0, 3, 0, 66, 0, 18, 0, 92, 0, 32, 0, 120, 0, 14, 0, 18, 0, 92, 0, 121, 0, 14, 0, 3, 0, 27, 0, 28, 0, 32, 0, 120, 0, 21, 0, 23, 0, 121, 0, 27, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 18, 0, 32, 0, 120, 0, 18, 0, 27, 0, 92, 0, 27, 0, 24, 0, 27, 0, 68, 0, 21, 0, 23, 0, 121, 0, 27, 0, 3, 0, 125, 0, 120, 0, 14, 0, 32, 0, 3, 0, 41, 0, 120, 0, 14, 0, 8, 0, 3, 0, 31, 0, 153, 0, 18, 0, 92, 0, 120, 0, 33, 0, 121, 0, 14, 0, 26, 0, 8, 0, 3, 0, 64, 0, 14, 0, 30, 0, 120, 0, 14, 0, 74, 0, 23, 0, 14, 0, 23, 0, 121, 0, 27, 0, 3, 0, 120, 0, 14, 0, 92, 0, 68, 0, 21, 0, 3, 0, 31, 0, 153, 0, 18, 0, 30, 0, 120, 0, 18, 0, 26, 0, 17, 0, 121, 0, 14, 0, 3, 0, 125, 0, 120, 0, 14, 0, 32, 0, 3, 0, 18, 0, 92, 0, 120, 0, 14, 0, 68, 0, 21, 0, 32, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 33, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 8, 0, 3, 0, 18, 0, 68, 0, 120, 0, 33, 0, 31, 0, 153, 0, 23, 0, 21, 0, 14, 0, 92, 0, 121, 0, 18, 0, 26, 0, 3, 0, 21, 0, 31, 0, 153, 0, 28, 0, 120, 0, 21, 0, 121, 0, 18, 0, 23, 0, 3, 0, 24, 0, 33, 0, 30, 0, 120, 0, 18, 0, 23, 0, 121, 0, 27, 0, 3, 0, 14, 0, 32, 0, 25, 0, 120, 0, 27, 0, 31, 0, 152, 0, 19, 0, 18, 0, 92, 0, 121, 0, 14, 0, 26, 0, 3, 0, 14, 0, 100, 0, 92, 0, 23, 0, 120, 0, 21, 0, 32, 0, 31, 0, 153, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 21, 0, 92, 0, 120, 0, 18, 0, 26, 0, 3, 0, 18, 0, 31, 0, 153, 0, 120, 0, 18, 0, 32, 0, 14, 0, 31, 0, 152, 0, 121, 0, 33, 0, 26, 0, 3, 0, 32, 0, 120, 0, 14, 0, 26, 0, 32, 0, 14, 0, 3, 0, 32, 0, 96, 0, 21, 0, 23, 0, 120, 0, 21, 0, 121, 0, 14, 0, 23, 0, 3, 0, 31, 0, 153, 0, 18, 0, 120, 0, 14, 0, 92, 0, 23, 0, 14, 0, 32, 0, 31, 0, 153, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 21, 0, 32, 0, 120, 0, 33, 0, 31, 0, 153, 0, 32, 0, 18, 0, 26, 0, 18, 0, 121, 0, 14, 0, 26, 0, 10, 0, 2]} +{"text": "Forma, arku kolore anitz batena da, gorria kanpoalderantz duena eta morea barnealderantz.", "phonemes": ["f", "ˈ", "o", "ɾ", "m", "a", ",", " ", "ˈ", "a", "ɾ", "k", "u", " ", "k", "o", "l", "ˈ", "o", "ɾ", "ˌ", "e", " ", "a", "n", "ˈ", "i", "t", "s", "̻", " ", "β", "a", "t", "ˈ", "e", "n", "ˌ", "a", " ", "ð", "ˈ", "a", ",", " ", "ɡ", "o", "r", "ˈ", "i", "ˌ", "a", " ", "k", "a", "n", "p", "ˈ", "o", "a", "l", "ð", "e", "ɾ", "ˌ", "a", "n", "t", "s", "̻", " ", "ð", "u", "ˈ", "e", "n", "ˌ", "a", " ", "ˌ", "e", "t", "a", " ", "m", "o", "ɾ", "ˈ", "e", "ˌ", "a", " ", "β", "a", "ɾ", "n", "ˈ", "e", "a", "l", "ð", "e", "ɾ", "ˌ", "a", "n", "t", "s", "̻", "."], "phoneme_ids": [1, 0, 19, 0, 120, 0, 27, 0, 92, 0, 25, 0, 14, 0, 8, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 33, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 121, 0, 18, 0, 3, 0, 14, 0, 26, 0, 120, 0, 21, 0, 32, 0, 31, 0, 153, 0, 3, 0, 125, 0, 14, 0, 32, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 3, 0, 41, 0, 120, 0, 14, 0, 8, 0, 3, 0, 66, 0, 27, 0, 30, 0, 120, 0, 21, 0, 121, 0, 14, 0, 3, 0, 23, 0, 14, 0, 26, 0, 28, 0, 120, 0, 27, 0, 14, 0, 24, 0, 41, 0, 18, 0, 92, 0, 121, 0, 14, 0, 26, 0, 32, 0, 31, 0, 153, 0, 3, 0, 41, 0, 33, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 27, 0, 92, 0, 120, 0, 18, 0, 121, 0, 14, 0, 3, 0, 125, 0, 14, 0, 92, 0, 26, 0, 120, 0, 18, 0, 14, 0, 24, 0, 41, 0, 18, 0, 92, 0, 121, 0, 14, 0, 26, 0, 32, 0, 31, 0, 153, 0, 10, 0, 2]} +{"text": "Ez da hain ohikoa ostadar bikoitza, bigarren arku bat duena, ilunagoa, koloreen ordena alderantziz duena, hau da, gorria barnealderantz eta morea kanpoalderantz.", "phonemes": ["ˈ", "e", "s", "̻", " ", "t", "ˈ", "a", " ", "ˈ", "a", "ɪ", "n", " ", "o", "ˈ", "i", "k", "o", "ˌ", "a", " ", "o", "s", "̺", "t", "ˈ", "a", "ð", "ˌ", "a", "ɾ", " ", "β", "i", "k", "ˈ", "o", "ɪ", "t", "s", "̻", "ˌ", "a", ",", " ", "b", "i", "ɣ", "ˈ", "a", "r", "ˌ", "e", "n", " ", "ˈ", "a", "ɾ", "k", "u", " ", "β", "ˈ", "a", "t", " ", "ð", "u", "ˈ", "e", "n", "ˌ", "a", ",", " ", "i", "ʎ", "ˈ", "u", "n", "a", "ɣ", "o", "ˌ", "a", ",", " ", "k", "o", "l", "ˈ", "o", "ɾ", "e", "ˌ", "e", "n", " ", "o", "ɾ", "ð", "ˈ", "e", "n", "ˌ", "a", " ", "a", "l", "ð", "ˈ", "e", "ɾ", "a", "n", "t", "s", "̻", "ˌ", "i", "s", "̻", " ", "ð", "u", "ˈ", "e", "n", "ˌ", "a", ",", " ", "ˈ", "a", "ʊ", " ", "ð", "ˈ", "a", ",", " ", "ɡ", "o", "r", "ˈ", "i", "ˌ", "a", " ", "β", "a", "ɾ", "n", "ˈ", "e", "a", "l", "ð", "e", "ɾ", "ˌ", "a", "n", "t", "s", "̻", " ", "ˌ", "e", "t", "a", " ", "m", "o", "ɾ", "ˈ", "e", "ˌ", "a", " ", "k", "a", "n", "p", "ˈ", "o", "a", "l", "ð", "e", "ɾ", "ˌ", "a", "n", "t", "s", "̻", "."], "phoneme_ids": [1, 0, 120, 0, 18, 0, 31, 0, 153, 0, 3, 0, 32, 0, 120, 0, 14, 0, 3, 0, 120, 0, 14, 0, 74, 0, 26, 0, 3, 0, 27, 0, 120, 0, 21, 0, 23, 0, 27, 0, 121, 0, 14, 0, 3, 0, 27, 0, 31, 0, 152, 0, 32, 0, 120, 0, 14, 0, 41, 0, 121, 0, 14, 0, 92, 0, 3, 0, 125, 0, 21, 0, 23, 0, 120, 0, 27, 0, 74, 0, 32, 0, 31, 0, 153, 0, 121, 0, 14, 0, 8, 0, 3, 0, 15, 0, 21, 0, 68, 0, 120, 0, 14, 0, 30, 0, 121, 0, 18, 0, 26, 0, 3, 0, 120, 0, 14, 0, 92, 0, 23, 0, 33, 0, 3, 0, 125, 0, 120, 0, 14, 0, 32, 0, 3, 0, 41, 0, 33, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 8, 0, 3, 0, 21, 0, 104, 0, 120, 0, 33, 0, 26, 0, 14, 0, 68, 0, 27, 0, 121, 0, 14, 0, 8, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 18, 0, 121, 0, 18, 0, 26, 0, 3, 0, 27, 0, 92, 0, 41, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 3, 0, 14, 0, 24, 0, 41, 0, 120, 0, 18, 0, 92, 0, 14, 0, 26, 0, 32, 0, 31, 0, 153, 0, 121, 0, 21, 0, 31, 0, 153, 0, 3, 0, 41, 0, 33, 0, 120, 0, 18, 0, 26, 0, 121, 0, 14, 0, 8, 0, 3, 0, 120, 0, 14, 0, 100, 0, 3, 0, 41, 0, 120, 0, 14, 0, 8, 0, 3, 0, 66, 0, 27, 0, 30, 0, 120, 0, 21, 0, 121, 0, 14, 0, 3, 0, 125, 0, 14, 0, 92, 0, 26, 0, 120, 0, 18, 0, 14, 0, 24, 0, 41, 0, 18, 0, 92, 0, 121, 0, 14, 0, 26, 0, 32, 0, 31, 0, 153, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 27, 0, 92, 0, 120, 0, 18, 0, 121, 0, 14, 0, 3, 0, 23, 0, 14, 0, 26, 0, 28, 0, 120, 0, 27, 0, 14, 0, 24, 0, 41, 0, 18, 0, 92, 0, 121, 0, 14, 0, 26, 0, 32, 0, 31, 0, 153, 0, 10, 0, 2]} +{"text": "Ostadarrak, jarraikako kolore zerrenda bat erakusten duen arren, ohi, osatzen duten koloreak sei direla onartzen da: gorria, laranja, hori, berdea, urdina eta morea, argiaren maiztasunen deskonposaketen ondorio, eta hiru oinarrizko koloreek, eta hauek, euren arteko nahasketetan emandako beste hirurek emandakoek osatua, tradizionalki, 7 kolore aipatzen diren arren, urdina eta morearen artean anila jarriz.", "phonemes": ["o", "s", "̺", "t", "ˈ", "a", "ð", "a", "r", "ˌ", "a", "k", ",", " ", "ɟ", "a", "r", "ˈ", "a", "ɪ", "k", "a", "k", "ˌ", "o", " ", "k", "o", "l", "ˈ", "o", "ɾ", "ˌ", "e", " ", "s", "̻", "e", "r", "ˈ", "e", "n", "d", "ˌ", "a", " ", "β", "ˈ", "a", "t", " ", "e", "ɾ", "ˈ", "a", "k", "u", "s", "̺", "t", "ˌ", "e", "n", " ", "d", "u", "ˈ", "e", "n", " ", "a", "r", "ˈ", "e", "n", ",", " ", "ˈ", "o", "i", ",", " ", "o", "s", "̺", "ˈ", "a", "t", "s", "̻", "ˌ", "e", "n", " ", "d", "u", "t", "ˈ", "e", "n", " ", "k", "o", "l", "ˈ", "o", "ɾ", "e", "ˌ", "a", "k", " ", "s", "̺", "ˈ", "e", "ɪ", " ", "ð", "i", "ɾ", "ˈ", "e", "l", "ˌ", "a", " ", "o", "n", "ˈ", "a", "ɾ", "t", "s", "̻", "ˌ", "e", "n", " ", "d", "ˈ", "a", ":", " ", "ɡ", "o", "r", "ˈ", "i", "ˌ", "a", ",", " ", "l", "a", "ɾ", "ˈ", "a", "n", "ɟ", "ˌ", "a", ",", " ", "ˈ", "o", "ɾ", "i", ",", " ", "b", "e", "ɾ", "ð", "ˈ", "e", "ˌ", "a", ",", " ", "u", "ɾ", "ð", "ˈ", "i", "ɲ", "ˌ", "a", " ", "ˌ", "e", "t", "a", " ", "m", "o", "ɾ", "ˈ", "e", "ˌ", "a", ",", " ", "a", "ɾ", "ɣ", "ˈ", "i", "a", "ɾ", "ˌ", "e", "n", " ", "m", "a", "ɪ", "s", "̻", "t", "ˈ", "a", "s", "̺", "u", "n", "ˌ", "e", "n", " ", "d", "e", "s", "̺", "k", "ˈ", "o", "n", "p", "o", "s", "̺", "a", "k", "e", "t", "ˌ", "e", "n", " ", "o", "n", "d", "ˈ", "o", "ɾ", "i", "ˌ", "o", ",", " ", "ˌ", "e", "t", "a", " ", "ˈ", "i", "ɾ", "u", " ", "o", "ɪ", "ɲ", "ˈ", "a", "r", "i", "s", "̻", "k", "ˌ", "o", " ", "k", "o", "l", "ˈ", "o", "ɾ", "e", "ˌ", "e", "k", ",", " ", "ˌ", "e", "t", "a", " ", "ˈ", "a", "ʊ", "e", "k", ",", " ", "e", "ʊ", "ɾ", "ˈ", "e", "n", " ", "a", "ɾ", "t", "ˈ", "e", "k", "ˌ", "o", " ", "n", "a", "ˈ", "a", "s", "̺", "k", "e", "t", "e", "t", "ˌ", "a", "n", " ", "e", "m", "ˈ", "a", "n", "d", "a", "k", "ˌ", "o", " ", "β", "ˈ", "e", "s", "̺", "t", "e", " ", "i", "ɾ", "ˈ", "u", "ɾ", "ˌ", "e", "k", " ", "e", "m", "ˈ", "a", "n", "d", "a", "k", "o", "ˌ", "e", "k", " ", "o", "s", "̺", "ˈ", "a", "t", "u", "ˌ", "a", ",", " ", "t", "ɾ", "a", "ð", "ˈ", "i", "s", "̻", "i", "o", "n", "a", "l", "k", "ˌ", "i", ",", " ", "s", "̻", "ˈ", "a", "s", "̻", "p", "i", " ", "k", "o", "l", "ˈ", "o", "ɾ", "ˌ", "e", " ", "a", "ɪ", "p", "ˈ", "a", "t", "s", "̻", "ˌ", "e", "n", " ", "d", "i", "ɾ", "ˈ", "e", "n", " ", "a", "r", "ˈ", "e", "n", ",", " ", "u", "ɾ", "ð", "ˈ", "i", "ɲ", "ˌ", "a", " ", "ˌ", "e", "t", "a", " ", "m", "o", "ɾ", "ˈ", "e", "a", "ɾ", "ˌ", "e", "n", " ", "a", "ɾ", "t", "ˈ", "e", "ˌ", "a", "n", " ", "a", "n", "ˈ", "i", "ʎ", "ˌ", "a", " ", "j", "a", "r", "ˈ", "i", "s", "̻", "."], "phoneme_ids": [1, 0, 27, 0, 31, 0, 152, 0, 32, 0, 120, 0, 14, 0, 41, 0, 14, 0, 30, 0, 121, 0, 14, 0, 23, 0, 8, 0, 3, 0, 64, 0, 14, 0, 30, 0, 120, 0, 14, 0, 74, 0, 23, 0, 14, 0, 23, 0, 121, 0, 27, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 121, 0, 18, 0, 3, 0, 31, 0, 153, 0, 18, 0, 30, 0, 120, 0, 18, 0, 26, 0, 17, 0, 121, 0, 14, 0, 3, 0, 125, 0, 120, 0, 14, 0, 32, 0, 3, 0, 18, 0, 92, 0, 120, 0, 14, 0, 23, 0, 33, 0, 31, 0, 152, 0, 32, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 33, 0, 120, 0, 18, 0, 26, 0, 3, 0, 14, 0, 30, 0, 120, 0, 18, 0, 26, 0, 8, 0, 3, 0, 120, 0, 27, 0, 21, 0, 8, 0, 3, 0, 27, 0, 31, 0, 152, 0, 120, 0, 14, 0, 32, 0, 31, 0, 153, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 33, 0, 32, 0, 120, 0, 18, 0, 26, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 18, 0, 121, 0, 14, 0, 23, 0, 3, 0, 31, 0, 152, 0, 120, 0, 18, 0, 74, 0, 3, 0, 41, 0, 21, 0, 92, 0, 120, 0, 18, 0, 24, 0, 121, 0, 14, 0, 3, 0, 27, 0, 26, 0, 120, 0, 14, 0, 92, 0, 32, 0, 31, 0, 153, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 120, 0, 14, 0, 11, 0, 3, 0, 66, 0, 27, 0, 30, 0, 120, 0, 21, 0, 121, 0, 14, 0, 8, 0, 3, 0, 24, 0, 14, 0, 92, 0, 120, 0, 14, 0, 26, 0, 64, 0, 121, 0, 14, 0, 8, 0, 3, 0, 120, 0, 27, 0, 92, 0, 21, 0, 8, 0, 3, 0, 15, 0, 18, 0, 92, 0, 41, 0, 120, 0, 18, 0, 121, 0, 14, 0, 8, 0, 3, 0, 33, 0, 92, 0, 41, 0, 120, 0, 21, 0, 82, 0, 121, 0, 14, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 27, 0, 92, 0, 120, 0, 18, 0, 121, 0, 14, 0, 8, 0, 3, 0, 14, 0, 92, 0, 68, 0, 120, 0, 21, 0, 14, 0, 92, 0, 121, 0, 18, 0, 26, 0, 3, 0, 25, 0, 14, 0, 74, 0, 31, 0, 153, 0, 32, 0, 120, 0, 14, 0, 31, 0, 152, 0, 33, 0, 26, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 18, 0, 31, 0, 152, 0, 23, 0, 120, 0, 27, 0, 26, 0, 28, 0, 27, 0, 31, 0, 152, 0, 14, 0, 23, 0, 18, 0, 32, 0, 121, 0, 18, 0, 26, 0, 3, 0, 27, 0, 26, 0, 17, 0, 120, 0, 27, 0, 92, 0, 21, 0, 121, 0, 27, 0, 8, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 120, 0, 21, 0, 92, 0, 33, 0, 3, 0, 27, 0, 74, 0, 82, 0, 120, 0, 14, 0, 30, 0, 21, 0, 31, 0, 153, 0, 23, 0, 121, 0, 27, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 18, 0, 121, 0, 18, 0, 23, 0, 8, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 120, 0, 14, 0, 100, 0, 18, 0, 23, 0, 8, 0, 3, 0, 18, 0, 100, 0, 92, 0, 120, 0, 18, 0, 26, 0, 3, 0, 14, 0, 92, 0, 32, 0, 120, 0, 18, 0, 23, 0, 121, 0, 27, 0, 3, 0, 26, 0, 14, 0, 120, 0, 14, 0, 31, 0, 152, 0, 23, 0, 18, 0, 32, 0, 18, 0, 32, 0, 121, 0, 14, 0, 26, 0, 3, 0, 18, 0, 25, 0, 120, 0, 14, 0, 26, 0, 17, 0, 14, 0, 23, 0, 121, 0, 27, 0, 3, 0, 125, 0, 120, 0, 18, 0, 31, 0, 152, 0, 32, 0, 18, 0, 3, 0, 21, 0, 92, 0, 120, 0, 33, 0, 92, 0, 121, 0, 18, 0, 23, 0, 3, 0, 18, 0, 25, 0, 120, 0, 14, 0, 26, 0, 17, 0, 14, 0, 23, 0, 27, 0, 121, 0, 18, 0, 23, 0, 3, 0, 27, 0, 31, 0, 152, 0, 120, 0, 14, 0, 32, 0, 33, 0, 121, 0, 14, 0, 8, 0, 3, 0, 32, 0, 92, 0, 14, 0, 41, 0, 120, 0, 21, 0, 31, 0, 153, 0, 21, 0, 27, 0, 26, 0, 14, 0, 24, 0, 23, 0, 121, 0, 21, 0, 8, 0, 3, 0, 31, 0, 153, 0, 120, 0, 14, 0, 31, 0, 153, 0, 28, 0, 21, 0, 3, 0, 23, 0, 27, 0, 24, 0, 120, 0, 27, 0, 92, 0, 121, 0, 18, 0, 3, 0, 14, 0, 74, 0, 28, 0, 120, 0, 14, 0, 32, 0, 31, 0, 153, 0, 121, 0, 18, 0, 26, 0, 3, 0, 17, 0, 21, 0, 92, 0, 120, 0, 18, 0, 26, 0, 3, 0, 14, 0, 30, 0, 120, 0, 18, 0, 26, 0, 8, 0, 3, 0, 33, 0, 92, 0, 41, 0, 120, 0, 21, 0, 82, 0, 121, 0, 14, 0, 3, 0, 121, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 27, 0, 92, 0, 120, 0, 18, 0, 14, 0, 92, 0, 121, 0, 18, 0, 26, 0, 3, 0, 14, 0, 92, 0, 32, 0, 120, 0, 18, 0, 121, 0, 14, 0, 26, 0, 3, 0, 14, 0, 26, 0, 120, 0, 21, 0, 104, 0, 121, 0, 14, 0, 3, 0, 22, 0, 14, 0, 30, 0, 120, 0, 21, 0, 31, 0, 153, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_fr.jsonl b/etc/test_sentences/test_fr.jsonl new file mode 100644 index 0000000..535f0a7 --- /dev/null +++ b/etc/test_sentences/test_fr.jsonl @@ -0,0 +1,7 @@ +{"text": "Un arc-en-ciel est un photométéore, un phénomène optique se produisant dans le ciel, visible dans la direction opposée au Soleil quand il brille pendant la pluie.", "phonemes": ["œ", "̃", "n", " ", "ˈ", "a", "ʁ", "k", "ɑ", "̃", "s", "j", "ˈ", "ɛ", "l", " ", "ɛ", "t", " ", "œ", "̃", " ", "f", "o", "t", "o", "m", "e", "t", "e", "ˈ", "ɔ", "ʁ", ",", " ", "œ", "̃", " ", "f", "e", "n", "o", "m", "ˈ", "ɛ", "n", " ", "ɔ", "p", "t", "ˈ", "i", "k", " ", "s", "ə", "-", " ", "p", "ʁ", "o", "d", "y", "i", "z", "ˈ", "ɑ", "̃", " ", "d", "ɑ", "̃", " ", "l", "ə", "-", " ", "s", "j", "ˈ", "ɛ", "l", ",", " ", "v", "i", "z", "ˈ", "i", "b", "l", " ", "d", "ɑ", "̃", " ", "l", "a", "-", " ", "d", "i", "ʁ", "ɛ", "k", "s", "j", "ˈ", "ɔ", "̃", " ", "ɔ", "p", "o", "z", "ˈ", "e", " ", "o", " ", "s", "o", "l", "ˈ", "ɛ", "j", " ", "k", "ɑ", "̃", "t", " ", "i", "l", " ", "b", "ʁ", "ˈ", "i", "j", " ", "p", "ɑ", "̃", "d", "ˈ", "ɑ", "̃", " ", "l", "a", "-", " ", "p", "l", "y", "ˈ", "i", "."], "phoneme_ids": [1, 0, 45, 0, 141, 0, 26, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 51, 0, 141, 0, 31, 0, 22, 0, 120, 0, 61, 0, 24, 0, 3, 0, 61, 0, 32, 0, 3, 0, 45, 0, 141, 0, 3, 0, 19, 0, 27, 0, 32, 0, 27, 0, 25, 0, 18, 0, 32, 0, 18, 0, 120, 0, 54, 0, 94, 0, 8, 0, 3, 0, 45, 0, 141, 0, 3, 0, 19, 0, 18, 0, 26, 0, 27, 0, 25, 0, 120, 0, 61, 0, 26, 0, 3, 0, 54, 0, 28, 0, 32, 0, 120, 0, 21, 0, 23, 0, 3, 0, 31, 0, 59, 0, 9, 0, 3, 0, 28, 0, 94, 0, 27, 0, 17, 0, 37, 0, 21, 0, 38, 0, 120, 0, 51, 0, 141, 0, 3, 0, 17, 0, 51, 0, 141, 0, 3, 0, 24, 0, 59, 0, 9, 0, 3, 0, 31, 0, 22, 0, 120, 0, 61, 0, 24, 0, 8, 0, 3, 0, 34, 0, 21, 0, 38, 0, 120, 0, 21, 0, 15, 0, 24, 0, 3, 0, 17, 0, 51, 0, 141, 0, 3, 0, 24, 0, 14, 0, 9, 0, 3, 0, 17, 0, 21, 0, 94, 0, 61, 0, 23, 0, 31, 0, 22, 0, 120, 0, 54, 0, 141, 0, 3, 0, 54, 0, 28, 0, 27, 0, 38, 0, 120, 0, 18, 0, 3, 0, 27, 0, 3, 0, 31, 0, 27, 0, 24, 0, 120, 0, 61, 0, 22, 0, 3, 0, 23, 0, 51, 0, 141, 0, 32, 0, 3, 0, 21, 0, 24, 0, 3, 0, 15, 0, 94, 0, 120, 0, 21, 0, 22, 0, 3, 0, 28, 0, 51, 0, 141, 0, 17, 0, 120, 0, 51, 0, 141, 0, 3, 0, 24, 0, 14, 0, 9, 0, 3, 0, 28, 0, 24, 0, 37, 0, 120, 0, 21, 0, 10, 0, 2]} +{"text": "C'est un arc de cercle coloré d'un dégradé de couleurs continu du rouge, à l'extérieur, au jaune au vert et au bleu, jusqu'au violet à l'intérieur.", "phonemes": ["s", "ɛ", "t", " ", "œ", "̃", "n", " ", "ˈ", "a", "ʁ", "k", " ", "d", "ə", "-", " ", "s", "ˈ", "ɛ", "ʁ", "k", "l", " ", "k", "o", "l", "o", "ʁ", "ˈ", "e", " ", "d", "œ", "̃", " ", "d", "e", "ɡ", "ʁ", "a", "d", "ˈ", "e", " ", "d", "ə", "-", " ", "k", "u", "l", "ˈ", "œ", "ʁ", " ", "k", "ɔ", "̃", "t", "i", "n", "ˈ", "y", " ", "d", "y", "-", " ", "ʁ", "ˈ", "u", "ʒ", ",", " ", "a", " ", "l", "ɛ", "k", "s", "t", "e", "ʁ", "j", "ˈ", "œ", "ʁ", ",", " ", "o", " ", "ʒ", "ˈ", "o", "n", " ", "o", " ", "v", "ˈ", "ɛ", "ʁ", " ", "e", " ", "o", " ", "b", "l", "ˈ", "ø", ",", " ", "ʒ", "y", "s", "k", "o", " ", "v", "j", "o", "l", "ˈ", "ɛ", " ", "a", " ", "l", "ɛ", "̃", "t", "e", "ʁ", "j", "ˈ", "œ", "ʁ", "."], "phoneme_ids": [1, 0, 31, 0, 61, 0, 32, 0, 3, 0, 45, 0, 141, 0, 26, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 17, 0, 59, 0, 9, 0, 3, 0, 31, 0, 120, 0, 61, 0, 94, 0, 23, 0, 24, 0, 3, 0, 23, 0, 27, 0, 24, 0, 27, 0, 94, 0, 120, 0, 18, 0, 3, 0, 17, 0, 45, 0, 141, 0, 3, 0, 17, 0, 18, 0, 66, 0, 94, 0, 14, 0, 17, 0, 120, 0, 18, 0, 3, 0, 17, 0, 59, 0, 9, 0, 3, 0, 23, 0, 33, 0, 24, 0, 120, 0, 45, 0, 94, 0, 3, 0, 23, 0, 54, 0, 141, 0, 32, 0, 21, 0, 26, 0, 120, 0, 37, 0, 3, 0, 17, 0, 37, 0, 9, 0, 3, 0, 94, 0, 120, 0, 33, 0, 108, 0, 8, 0, 3, 0, 14, 0, 3, 0, 24, 0, 61, 0, 23, 0, 31, 0, 32, 0, 18, 0, 94, 0, 22, 0, 120, 0, 45, 0, 94, 0, 8, 0, 3, 0, 27, 0, 3, 0, 108, 0, 120, 0, 27, 0, 26, 0, 3, 0, 27, 0, 3, 0, 34, 0, 120, 0, 61, 0, 94, 0, 3, 0, 18, 0, 3, 0, 27, 0, 3, 0, 15, 0, 24, 0, 120, 0, 42, 0, 8, 0, 3, 0, 108, 0, 37, 0, 31, 0, 23, 0, 27, 0, 3, 0, 34, 0, 22, 0, 27, 0, 24, 0, 120, 0, 61, 0, 3, 0, 14, 0, 3, 0, 24, 0, 61, 0, 141, 0, 32, 0, 18, 0, 94, 0, 22, 0, 120, 0, 45, 0, 94, 0, 10, 0, 2]} +{"text": "Un arc-en-ciel se compose de deux arcs principaux : l'arc primaire et l'arc secondaire.", "phonemes": ["œ", "̃", "n", " ", "ˈ", "a", "ʁ", "k", "ɑ", "̃", "s", "j", "ˈ", "ɛ", "l", " ", "s", "ə", "-", " ", "k", "ɔ", "̃", "p", "ˈ", "ɔ", "z", " ", "d", "ə", "-", " ", "d", "ˈ", "ø", "z", " ", "ˈ", "a", "ʁ", "k", " ", "p", "ʁ", "ɛ", "̃", "s", "i", "p", "ˈ", "o", ":", " ", "l", "ˈ", "a", "ʁ", "k", " ", "p", "ʁ", "i", "m", "ˈ", "ɛ", "ʁ", " ", "e", " ", "l", "ˈ", "a", "ʁ", "k", " ", "s", "ə", "ɡ", "ɔ", "̃", "d", "ˈ", "ɛ", "ʁ", "."], "phoneme_ids": [1, 0, 45, 0, 141, 0, 26, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 51, 0, 141, 0, 31, 0, 22, 0, 120, 0, 61, 0, 24, 0, 3, 0, 31, 0, 59, 0, 9, 0, 3, 0, 23, 0, 54, 0, 141, 0, 28, 0, 120, 0, 54, 0, 38, 0, 3, 0, 17, 0, 59, 0, 9, 0, 3, 0, 17, 0, 120, 0, 42, 0, 38, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 28, 0, 94, 0, 61, 0, 141, 0, 31, 0, 21, 0, 28, 0, 120, 0, 27, 0, 11, 0, 3, 0, 24, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 28, 0, 94, 0, 21, 0, 25, 0, 120, 0, 61, 0, 94, 0, 3, 0, 18, 0, 3, 0, 24, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 31, 0, 59, 0, 66, 0, 54, 0, 141, 0, 17, 0, 120, 0, 61, 0, 94, 0, 10, 0, 2]} +{"text": "L'arc primaire est dû aux rayons ayant effectué une réflexion interne dans la goutte d'eau.", "phonemes": ["l", "ˈ", "a", "ʁ", "k", " ", "p", "ʁ", "i", "m", "ˈ", "ɛ", "ʁ", " ", "ɛ", " ", "d", "ˈ", "y", "ː", " ", "o", " ", "ʁ", "ɛ", "j", "ˈ", "ɔ", "̃", "z", " ", "ɛ", "j", "ˈ", "ɑ", "̃", " ", "e", "f", "ɛ", "k", "t", "y", "ˈ", "e", " ", "y", "n", " ", "ʁ", "e", "f", "l", "ɛ", "k", "s", "j", "ˈ", "ɔ", "̃", " ", "ɛ", "̃", "t", "ˈ", "ɛ", "ʁ", "n", " ", "d", "ɑ", "̃", " ", "l", "a", "-", " ", "ɡ", "ˈ", "u", "t", " ", "d", "ˈ", "o", "."], "phoneme_ids": [1, 0, 24, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 28, 0, 94, 0, 21, 0, 25, 0, 120, 0, 61, 0, 94, 0, 3, 0, 61, 0, 3, 0, 17, 0, 120, 0, 37, 0, 122, 0, 3, 0, 27, 0, 3, 0, 94, 0, 61, 0, 22, 0, 120, 0, 54, 0, 141, 0, 38, 0, 3, 0, 61, 0, 22, 0, 120, 0, 51, 0, 141, 0, 3, 0, 18, 0, 19, 0, 61, 0, 23, 0, 32, 0, 37, 0, 120, 0, 18, 0, 3, 0, 37, 0, 26, 0, 3, 0, 94, 0, 18, 0, 19, 0, 24, 0, 61, 0, 23, 0, 31, 0, 22, 0, 120, 0, 54, 0, 141, 0, 3, 0, 61, 0, 141, 0, 32, 0, 120, 0, 61, 0, 94, 0, 26, 0, 3, 0, 17, 0, 51, 0, 141, 0, 3, 0, 24, 0, 14, 0, 9, 0, 3, 0, 66, 0, 120, 0, 33, 0, 32, 0, 3, 0, 17, 0, 120, 0, 27, 0, 10, 0, 2]} +{"text": "Les rayons ayant effectué deux réflexions internes dans la goutte d'eau provoquent un arc secondaire moins intense à l'extérieur du premier.", "phonemes": ["l", "e", "-", " ", "ʁ", "ɛ", "j", "ˈ", "ɔ", "̃", "z", " ", "ɛ", "j", "ˈ", "ɑ", "̃", " ", "e", "f", "ɛ", "k", "t", "y", "ˈ", "e", " ", "d", "ˈ", "ø", " ", "ʁ", "e", "f", "l", "ɛ", "k", "s", "j", "ˈ", "ɔ", "̃", "z", " ", "ɛ", "̃", "t", "ˈ", "ɛ", "ʁ", "n", " ", "d", "ɑ", "̃", " ", "l", "a", "-", " ", "ɡ", "ˈ", "u", "t", " ", "d", "ˈ", "o", " ", "p", "ʁ", "o", "v", "ˈ", "o", "k", "t", " ", "œ", "̃", "n", " ", "ˈ", "a", "ʁ", "k", " ", "s", "ə", "ɡ", "ɔ", "̃", "d", "ˈ", "ɛ", "ʁ", " ", "m", "w", "ˈ", "ɛ", "̃", "z", " ", "ɛ", "̃", "t", "ˈ", "ɑ", "̃", "s", " ", "a", " ", "l", "ɛ", "k", "s", "t", "e", "ʁ", "j", "ˈ", "œ", "ʁ", " ", "d", "y", "-", " ", "p", "ʁ", "ə", "m", "j", "ˈ", "e", "."], "phoneme_ids": [1, 0, 24, 0, 18, 0, 9, 0, 3, 0, 94, 0, 61, 0, 22, 0, 120, 0, 54, 0, 141, 0, 38, 0, 3, 0, 61, 0, 22, 0, 120, 0, 51, 0, 141, 0, 3, 0, 18, 0, 19, 0, 61, 0, 23, 0, 32, 0, 37, 0, 120, 0, 18, 0, 3, 0, 17, 0, 120, 0, 42, 0, 3, 0, 94, 0, 18, 0, 19, 0, 24, 0, 61, 0, 23, 0, 31, 0, 22, 0, 120, 0, 54, 0, 141, 0, 38, 0, 3, 0, 61, 0, 141, 0, 32, 0, 120, 0, 61, 0, 94, 0, 26, 0, 3, 0, 17, 0, 51, 0, 141, 0, 3, 0, 24, 0, 14, 0, 9, 0, 3, 0, 66, 0, 120, 0, 33, 0, 32, 0, 3, 0, 17, 0, 120, 0, 27, 0, 3, 0, 28, 0, 94, 0, 27, 0, 34, 0, 120, 0, 27, 0, 23, 0, 32, 0, 3, 0, 45, 0, 141, 0, 26, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 31, 0, 59, 0, 66, 0, 54, 0, 141, 0, 17, 0, 120, 0, 61, 0, 94, 0, 3, 0, 25, 0, 35, 0, 120, 0, 61, 0, 141, 0, 38, 0, 3, 0, 61, 0, 141, 0, 32, 0, 120, 0, 51, 0, 141, 0, 31, 0, 3, 0, 14, 0, 3, 0, 24, 0, 61, 0, 23, 0, 31, 0, 32, 0, 18, 0, 94, 0, 22, 0, 120, 0, 45, 0, 94, 0, 3, 0, 17, 0, 37, 0, 9, 0, 3, 0, 28, 0, 94, 0, 59, 0, 25, 0, 22, 0, 120, 0, 18, 0, 10, 0, 2]} +{"text": "Les deux arcs sont séparés par la bande sombre d'Alexandre.", "phonemes": ["l", "e", "-", " ", "d", "ˈ", "ø", "z", " ", "ˈ", "a", "ʁ", "k", " ", "s", "ˈ", "ɔ", "̃", " ", "s", "e", "p", "a", "ʁ", "ˈ", "e", " ", "p", "a", "ʁ", " ", "l", "a", "-", " ", "b", "ˈ", "ɑ", "̃", "d", " ", "s", "ˈ", "ɔ", "̃", "b", "ʁ", " ", "d", "a", "l", "ɛ", "k", "s", "ˈ", "ɑ", "̃", "d", "ʁ", "."], "phoneme_ids": [1, 0, 24, 0, 18, 0, 9, 0, 3, 0, 17, 0, 120, 0, 42, 0, 38, 0, 3, 0, 120, 0, 14, 0, 94, 0, 23, 0, 3, 0, 31, 0, 120, 0, 54, 0, 141, 0, 3, 0, 31, 0, 18, 0, 28, 0, 14, 0, 94, 0, 120, 0, 18, 0, 3, 0, 28, 0, 14, 0, 94, 0, 3, 0, 24, 0, 14, 0, 9, 0, 3, 0, 15, 0, 120, 0, 51, 0, 141, 0, 17, 0, 3, 0, 31, 0, 120, 0, 54, 0, 141, 0, 15, 0, 94, 0, 3, 0, 17, 0, 14, 0, 24, 0, 61, 0, 23, 0, 31, 0, 120, 0, 51, 0, 141, 0, 17, 0, 94, 0, 10, 0, 2]} +{"text": "Buvez de ce whisky que le patron juge fameux.", "phonemes": ["b", "y", "v", "ˈ", "e", " ", "d", "ə", "-", " ", "s", "ə", "-", " ", "w", "ˈ", "ɪ", "s", "k", "i", " ", "k", "ə", " ", "l", "ə", "-", " ", "p", "a", "t", "ʁ", "ˈ", "ɔ", "̃", " ", "ʒ", "ˈ", "y", "ʒ", " ", "f", "a", "m", "ˈ", "ø", "."], "phoneme_ids": [1, 0, 15, 0, 37, 0, 34, 0, 120, 0, 18, 0, 3, 0, 17, 0, 59, 0, 9, 0, 3, 0, 31, 0, 59, 0, 9, 0, 3, 0, 35, 0, 120, 0, 74, 0, 31, 0, 23, 0, 21, 0, 3, 0, 23, 0, 59, 0, 3, 0, 24, 0, 59, 0, 9, 0, 3, 0, 28, 0, 14, 0, 32, 0, 94, 0, 120, 0, 54, 0, 141, 0, 3, 0, 108, 0, 120, 0, 37, 0, 108, 0, 3, 0, 19, 0, 14, 0, 25, 0, 120, 0, 42, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_ka.jsonl b/etc/test_sentences/test_ka.jsonl new file mode 100644 index 0000000..5ceb3df --- /dev/null +++ b/etc/test_sentences/test_ka.jsonl @@ -0,0 +1,7 @@ +{"text": "ცისარტყელა — ატმოსფერული ოპტიკური და მეტეოროლოგიური მოვლენა, რომელიც ხშირად წვიმის შემდეგ ჩნდება.", "phonemes": ["t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", ",", " ", "ˈ", "a", "t", "m", "o", "s", "p", "ʰ", "ˌ", "e", "r", "u", "l", "i", " ", "ˈ", "o", "p", "t", "i", "k", "ˌ", "u", "r", "i", " ", "d", "a", " ", "m", "ˈ", "e", "t", "e", "ˌ", "o", "r", "o", "l", "ˌ", "o", "ɡ", "i", "ˌ", "u", "r", "i", " ", "m", "ˈ", "o", "v", "l", "e", "n", "a", ".", " ", "r", "ˈ", "o", "m", "e", "l", "i", "t", "s", "ʰ", " ", "x", "ʃ", "ˈ", "i", "r", "a", "d", " ", "t", "s", "v", "ˈ", "i", "m", "i", "s", " ", "ʃ", "ˈ", "e", "m", "d", "e", "ɡ", " ", "t", "ʃ", "n", "d", "ˈ", "e", "b", "a"], "phoneme_ids": [1, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 8, 0, 3, 0, 120, 0, 14, 0, 32, 0, 25, 0, 27, 0, 31, 0, 28, 0, 145, 0, 121, 0, 18, 0, 30, 0, 33, 0, 24, 0, 21, 0, 3, 0, 120, 0, 27, 0, 28, 0, 32, 0, 21, 0, 23, 0, 121, 0, 33, 0, 30, 0, 21, 0, 3, 0, 17, 0, 14, 0, 3, 0, 25, 0, 120, 0, 18, 0, 32, 0, 18, 0, 121, 0, 27, 0, 30, 0, 27, 0, 24, 0, 121, 0, 27, 0, 66, 0, 21, 0, 121, 0, 33, 0, 30, 0, 21, 0, 3, 0, 25, 0, 120, 0, 27, 0, 34, 0, 24, 0, 18, 0, 26, 0, 14, 0, 10, 0, 3, 0, 30, 0, 120, 0, 27, 0, 25, 0, 18, 0, 24, 0, 21, 0, 32, 0, 31, 0, 145, 0, 3, 0, 36, 0, 96, 0, 120, 0, 21, 0, 30, 0, 14, 0, 17, 0, 3, 0, 32, 0, 31, 0, 34, 0, 120, 0, 21, 0, 25, 0, 21, 0, 31, 0, 3, 0, 96, 0, 120, 0, 18, 0, 25, 0, 17, 0, 18, 0, 66, 0, 3, 0, 32, 0, 96, 0, 26, 0, 17, 0, 120, 0, 18, 0, 15, 0, 14, 0, 2]} +{"text": "ეს თავისებური რკალია ან წრეხაზი, რომელიც ფერების სპექტრისგან შედგება.", "phonemes": ["ˈ", "e", "s", " ", "t", "ʰ", "ˈ", "a", "v", "i", "s", "ˌ", "e", "b", "u", "r", "i", " ", "r", "k", "ˈ", "a", "l", "i", "a", " ", "ˈ", "a", "n", " ", "t", "s", "r", "ˈ", "e", "x", "a", "z", "i", ",", " ", "r", "ˈ", "o", "m", "e", "l", "i", "t", "s", "ʰ", " ", "p", "ʰ", "ˈ", "e", "r", "e", "b", "i", "s", " ", "s", "p", "ˈ", "e", "k", "ʰ", "t", "r", "i", "s", "ɡ", "a", "n", " ", "ʃ", "ˈ", "e", "d", "ɡ", "e", "b", "a", "."], "phoneme_ids": [1, 0, 120, 0, 18, 0, 31, 0, 3, 0, 32, 0, 145, 0, 120, 0, 14, 0, 34, 0, 21, 0, 31, 0, 121, 0, 18, 0, 15, 0, 33, 0, 30, 0, 21, 0, 3, 0, 30, 0, 23, 0, 120, 0, 14, 0, 24, 0, 21, 0, 14, 0, 3, 0, 120, 0, 14, 0, 26, 0, 3, 0, 32, 0, 31, 0, 30, 0, 120, 0, 18, 0, 36, 0, 14, 0, 38, 0, 21, 0, 8, 0, 3, 0, 30, 0, 120, 0, 27, 0, 25, 0, 18, 0, 24, 0, 21, 0, 32, 0, 31, 0, 145, 0, 3, 0, 28, 0, 145, 0, 120, 0, 18, 0, 30, 0, 18, 0, 15, 0, 21, 0, 31, 0, 3, 0, 31, 0, 28, 0, 120, 0, 18, 0, 23, 0, 145, 0, 32, 0, 30, 0, 21, 0, 31, 0, 66, 0, 14, 0, 26, 0, 3, 0, 96, 0, 120, 0, 18, 0, 17, 0, 66, 0, 18, 0, 15, 0, 14, 0, 10, 0, 2]} +{"text": "ცისარტყელა შედგება შვიდი ფერისგან: წითელი, ნარინჯისფერი, ყვითელი, მწვანე, ცისფერი, ლურჯი, იისფერი.", "phonemes": ["t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", " ", "ʃ", "ˈ", "e", "d", "ɡ", "e", "b", "a", " ", "ʃ", "v", "ˈ", "i", "d", "i", " ", "p", "ʰ", "ˈ", "e", "r", "i", "s", "ɡ", "a", "n", ":", " ", "t", "s", "ˈ", "i", "t", "ʰ", "e", "l", "i", ",", " ", "n", "ˈ", "a", "r", "i", "n", "d", "ʒ", "ˌ", "i", "s", "p", "ʰ", "e", "r", "i", ",", " ", "q", "v", "ˈ", "i", "t", "ʰ", "e", "l", "i", ",", " ", "m", "t", "s", "v", "ˈ", "a", "n", "e", ",", " ", "t", "s", "ʰ", "ˈ", "i", "s", "p", "ʰ", "e", "r", "i", ",", " ", "l", "ˈ", "u", "r", "d", "ʒ", "i", ",", " ", "ˈ", "i", "i", "s", "p", "ʰ", "ˌ", "e", "r", "i", "."], "phoneme_ids": [1, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 3, 0, 96, 0, 120, 0, 18, 0, 17, 0, 66, 0, 18, 0, 15, 0, 14, 0, 3, 0, 96, 0, 34, 0, 120, 0, 21, 0, 17, 0, 21, 0, 3, 0, 28, 0, 145, 0, 120, 0, 18, 0, 30, 0, 21, 0, 31, 0, 66, 0, 14, 0, 26, 0, 11, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 32, 0, 145, 0, 18, 0, 24, 0, 21, 0, 8, 0, 3, 0, 26, 0, 120, 0, 14, 0, 30, 0, 21, 0, 26, 0, 17, 0, 108, 0, 121, 0, 21, 0, 31, 0, 28, 0, 145, 0, 18, 0, 30, 0, 21, 0, 8, 0, 3, 0, 29, 0, 34, 0, 120, 0, 21, 0, 32, 0, 145, 0, 18, 0, 24, 0, 21, 0, 8, 0, 3, 0, 25, 0, 32, 0, 31, 0, 34, 0, 120, 0, 14, 0, 26, 0, 18, 0, 8, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 28, 0, 145, 0, 18, 0, 30, 0, 21, 0, 8, 0, 3, 0, 24, 0, 120, 0, 33, 0, 30, 0, 17, 0, 108, 0, 21, 0, 8, 0, 3, 0, 120, 0, 21, 0, 21, 0, 31, 0, 28, 0, 145, 0, 121, 0, 18, 0, 30, 0, 21, 0, 10, 0, 2]} +{"text": "ცენტრი წრისა, რომელსაც ცისარტყელა შემოწერს, ძევს წრფეზე, რომელიც გადის დამკვირვებელსა და მზეს შორის, ამავდროულად ცისარტყელას დანახვისას მზე ყოველთვის მდებარეობს დამკვირვებლის ზურგს უკან, შესაბამისად, სპეციალური ოპტიკური ხელსაწყოების გარეშე შეუძლებელია ერთდროულად ცისარტყელასა და მზის დანახვა.", "phonemes": ["t", "s", "ʰ", "ˈ", "e", "n", "t", "r", "i", " ", "t", "s", "r", "ˈ", "i", "s", "a", ",", " ", "r", "ˈ", "o", "m", "e", "l", "s", "a", "t", "s", "ʰ", " ", "t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", " ", "ʃ", "ˈ", "e", "m", "o", "t", "s", "e", "r", "s", ",", " ", "d", "z", "ˈ", "e", "v", "s", " ", "t", "s", "r", "p", "ʰ", "ˈ", "e", "z", "e", ",", " ", "r", "ˈ", "o", "m", "e", "l", "i", "t", "s", "ʰ", " ", "ɡ", "ˈ", "a", "d", "i", "s", " ", "d", "ˈ", "a", "m", "k", "v", "i", "r", "v", "ˌ", "e", "b", "e", "l", "s", "a", " ", "d", "a", " ", "m", "z", "ˈ", "e", "s", " ", "ʃ", "ˈ", "o", "r", "i", "s", ",", " ", "ˈ", "a", "m", "a", "v", "d", "r", "ˌ", "o", "u", "l", "a", "d", " ", "t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", "s", " ", "d", "ˈ", "a", "n", "a", "x", "v", "ˌ", "i", "s", "a", "s", " ", "m", "z", "ˈ", "e", " ", "q", "ˈ", "o", "v", "e", "l", "t", "ʰ", "v", "i", "s", " ", "m", "d", "ˈ", "e", "b", "a", "r", "ˌ", "e", "o", "b", "s", " ", "d", "ˈ", "a", "m", "k", "v", "i", "r", "v", "ˌ", "e", "b", "l", "i", "s", " ", "z", "ˈ", "u", "r", "ɡ", "s", " ", "ˈ", "u", "k", "a", "n", ",", " ", "ʃ", "ˈ", "e", "s", "a", "b", "ˌ", "a", "m", "i", "s", "a", "d", ",", " ", "s", "p", "ˈ", "e", "t", "s", "ʰ", "i", "ˌ", "a", "l", "u", "r", "i", " ", "ˈ", "o", "p", "t", "i", "k", "ˌ", "u", "r", "i", " ", "x", "ˈ", "e", "l", "s", "a", "t", "s", "q", "ˌ", "o", "e", "b", "i", "s", " ", "ɡ", "ˈ", "a", "r", "e", "ʃ", "e", " ", "ʃ", "ˈ", "e", "u", "d", "z", "l", "ˌ", "e", "b", "e", "l", "ˌ", "i", "a", " ", "ˈ", "e", "r", "t", "ʰ", "d", "r", "o", "ˌ", "u", "l", "a", "d", " ", "t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", "s", "a", " ", "d", "a", " ", "m", "z", "ˈ", "i", "s", " ", "d", "ˈ", "a", "n", "a", "x", "v", "a", "."], "phoneme_ids": [1, 0, 32, 0, 31, 0, 145, 0, 120, 0, 18, 0, 26, 0, 32, 0, 30, 0, 21, 0, 3, 0, 32, 0, 31, 0, 30, 0, 120, 0, 21, 0, 31, 0, 14, 0, 8, 0, 3, 0, 30, 0, 120, 0, 27, 0, 25, 0, 18, 0, 24, 0, 31, 0, 14, 0, 32, 0, 31, 0, 145, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 3, 0, 96, 0, 120, 0, 18, 0, 25, 0, 27, 0, 32, 0, 31, 0, 18, 0, 30, 0, 31, 0, 8, 0, 3, 0, 17, 0, 38, 0, 120, 0, 18, 0, 34, 0, 31, 0, 3, 0, 32, 0, 31, 0, 30, 0, 28, 0, 145, 0, 120, 0, 18, 0, 38, 0, 18, 0, 8, 0, 3, 0, 30, 0, 120, 0, 27, 0, 25, 0, 18, 0, 24, 0, 21, 0, 32, 0, 31, 0, 145, 0, 3, 0, 66, 0, 120, 0, 14, 0, 17, 0, 21, 0, 31, 0, 3, 0, 17, 0, 120, 0, 14, 0, 25, 0, 23, 0, 34, 0, 21, 0, 30, 0, 34, 0, 121, 0, 18, 0, 15, 0, 18, 0, 24, 0, 31, 0, 14, 0, 3, 0, 17, 0, 14, 0, 3, 0, 25, 0, 38, 0, 120, 0, 18, 0, 31, 0, 3, 0, 96, 0, 120, 0, 27, 0, 30, 0, 21, 0, 31, 0, 8, 0, 3, 0, 120, 0, 14, 0, 25, 0, 14, 0, 34, 0, 17, 0, 30, 0, 121, 0, 27, 0, 33, 0, 24, 0, 14, 0, 17, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 31, 0, 3, 0, 17, 0, 120, 0, 14, 0, 26, 0, 14, 0, 36, 0, 34, 0, 121, 0, 21, 0, 31, 0, 14, 0, 31, 0, 3, 0, 25, 0, 38, 0, 120, 0, 18, 0, 3, 0, 29, 0, 120, 0, 27, 0, 34, 0, 18, 0, 24, 0, 32, 0, 145, 0, 34, 0, 21, 0, 31, 0, 3, 0, 25, 0, 17, 0, 120, 0, 18, 0, 15, 0, 14, 0, 30, 0, 121, 0, 18, 0, 27, 0, 15, 0, 31, 0, 3, 0, 17, 0, 120, 0, 14, 0, 25, 0, 23, 0, 34, 0, 21, 0, 30, 0, 34, 0, 121, 0, 18, 0, 15, 0, 24, 0, 21, 0, 31, 0, 3, 0, 38, 0, 120, 0, 33, 0, 30, 0, 66, 0, 31, 0, 3, 0, 120, 0, 33, 0, 23, 0, 14, 0, 26, 0, 8, 0, 3, 0, 96, 0, 120, 0, 18, 0, 31, 0, 14, 0, 15, 0, 121, 0, 14, 0, 25, 0, 21, 0, 31, 0, 14, 0, 17, 0, 8, 0, 3, 0, 31, 0, 28, 0, 120, 0, 18, 0, 32, 0, 31, 0, 145, 0, 21, 0, 121, 0, 14, 0, 24, 0, 33, 0, 30, 0, 21, 0, 3, 0, 120, 0, 27, 0, 28, 0, 32, 0, 21, 0, 23, 0, 121, 0, 33, 0, 30, 0, 21, 0, 3, 0, 36, 0, 120, 0, 18, 0, 24, 0, 31, 0, 14, 0, 32, 0, 31, 0, 29, 0, 121, 0, 27, 0, 18, 0, 15, 0, 21, 0, 31, 0, 3, 0, 66, 0, 120, 0, 14, 0, 30, 0, 18, 0, 96, 0, 18, 0, 3, 0, 96, 0, 120, 0, 18, 0, 33, 0, 17, 0, 38, 0, 24, 0, 121, 0, 18, 0, 15, 0, 18, 0, 24, 0, 121, 0, 21, 0, 14, 0, 3, 0, 120, 0, 18, 0, 30, 0, 32, 0, 145, 0, 17, 0, 30, 0, 27, 0, 121, 0, 33, 0, 24, 0, 14, 0, 17, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 31, 0, 14, 0, 3, 0, 17, 0, 14, 0, 3, 0, 25, 0, 38, 0, 120, 0, 21, 0, 31, 0, 3, 0, 17, 0, 120, 0, 14, 0, 26, 0, 14, 0, 36, 0, 34, 0, 14, 0, 10, 0, 2]} +{"text": "ხმელეთზე მდებარე დამკვირვებლისთვის ცისარტყელას, როგორც წესი, აქვს რკალის, წრის ნაწილის, ფორმა.", "phonemes": ["x", "m", "ˈ", "e", "l", "e", "t", "ʰ", "z", "e", " ", "m", "d", "ˈ", "e", "b", "a", "r", "e", " ", "d", "ˈ", "a", "m", "k", "v", "i", "r", "v", "ˌ", "e", "b", "l", "i", "s", "t", "ʰ", "v", "i", "s", " ", "t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", "s", ",", " ", "r", "ˈ", "o", "ɡ", "o", "r", "t", "s", "ʰ", " ", "t", "s", "ˈ", "e", "s", "i", ",", " ", "ˈ", "a", "k", "ʰ", "v", "s", " ", "r", "k", "ˈ", "a", "l", "i", "s", ",", " ", "t", "s", "r", "ˈ", "i", "s", " ", "n", "ˈ", "a", "t", "s", "i", "l", "i", "s", ",", " ", "p", "ʰ", "ˈ", "o", "r", "m", "a", "."], "phoneme_ids": [1, 0, 36, 0, 25, 0, 120, 0, 18, 0, 24, 0, 18, 0, 32, 0, 145, 0, 38, 0, 18, 0, 3, 0, 25, 0, 17, 0, 120, 0, 18, 0, 15, 0, 14, 0, 30, 0, 18, 0, 3, 0, 17, 0, 120, 0, 14, 0, 25, 0, 23, 0, 34, 0, 21, 0, 30, 0, 34, 0, 121, 0, 18, 0, 15, 0, 24, 0, 21, 0, 31, 0, 32, 0, 145, 0, 34, 0, 21, 0, 31, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 31, 0, 8, 0, 3, 0, 30, 0, 120, 0, 27, 0, 66, 0, 27, 0, 30, 0, 32, 0, 31, 0, 145, 0, 3, 0, 32, 0, 31, 0, 120, 0, 18, 0, 31, 0, 21, 0, 8, 0, 3, 0, 120, 0, 14, 0, 23, 0, 145, 0, 34, 0, 31, 0, 3, 0, 30, 0, 23, 0, 120, 0, 14, 0, 24, 0, 21, 0, 31, 0, 8, 0, 3, 0, 32, 0, 31, 0, 30, 0, 120, 0, 21, 0, 31, 0, 3, 0, 26, 0, 120, 0, 14, 0, 32, 0, 31, 0, 21, 0, 24, 0, 21, 0, 31, 0, 8, 0, 3, 0, 28, 0, 145, 0, 120, 0, 27, 0, 30, 0, 25, 0, 14, 0, 10, 0, 2]} +{"text": "რაც უფრო მაღალია დაკვირვების წერტილი — მით უფრო სრულია ეს რკალი (მთიდან ან თვითმფრინავიდან შესაძლებელია მთლიანი წრის დანახვაც).", "phonemes": ["r", "ˈ", "a", "t", "s", "ʰ", " ", "ˈ", "u", "p", "ʰ", "r", "o", " ", "m", "ˈ", "a", "ɣ", "a", "l", "ˌ", "i", "a", " ", "d", "ˈ", "a", "k", "v", "i", "r", "v", "ˌ", "e", "b", "i", "s", " ", "t", "s", "ˈ", "e", "r", "t", "i", "l", "i", ".", " ", "m", "ˈ", "i", "t", "ʰ", " ", "ˈ", "u", "p", "ʰ", "r", "o", " ", "s", "r", "ˈ", "u", "l", "i", "a", " ", "ˈ", "e", "s", " ", "r", "k", "ˈ", "a", "l", "i", " ", "m", "t", "ʰ", "ˈ", "i", "d", "a", "n", " ", "ˈ", "a", "n", " ", "t", "ʰ", "v", "ˈ", "i", "t", "ʰ", "m", "p", "ʰ", "r", "i", "n", "ˌ", "a", "v", "i", "d", "a", "n", " ", "ʃ", "ˈ", "e", "s", "a", "d", "z", "l", "ˌ", "e", "b", "e", "l", "ˌ", "i", "a", " ", "m", "t", "ʰ", "l", "ˈ", "i", "a", "n", "i", " ", "t", "s", "r", "ˈ", "i", "s", " ", "d", "ˈ", "a", "n", "a", "x", "v", "a", "t", "s", "ʰ"], "phoneme_ids": [1, 0, 30, 0, 120, 0, 14, 0, 32, 0, 31, 0, 145, 0, 3, 0, 120, 0, 33, 0, 28, 0, 145, 0, 30, 0, 27, 0, 3, 0, 25, 0, 120, 0, 14, 0, 68, 0, 14, 0, 24, 0, 121, 0, 21, 0, 14, 0, 3, 0, 17, 0, 120, 0, 14, 0, 23, 0, 34, 0, 21, 0, 30, 0, 34, 0, 121, 0, 18, 0, 15, 0, 21, 0, 31, 0, 3, 0, 32, 0, 31, 0, 120, 0, 18, 0, 30, 0, 32, 0, 21, 0, 24, 0, 21, 0, 10, 0, 3, 0, 25, 0, 120, 0, 21, 0, 32, 0, 145, 0, 3, 0, 120, 0, 33, 0, 28, 0, 145, 0, 30, 0, 27, 0, 3, 0, 31, 0, 30, 0, 120, 0, 33, 0, 24, 0, 21, 0, 14, 0, 3, 0, 120, 0, 18, 0, 31, 0, 3, 0, 30, 0, 23, 0, 120, 0, 14, 0, 24, 0, 21, 0, 3, 0, 25, 0, 32, 0, 145, 0, 120, 0, 21, 0, 17, 0, 14, 0, 26, 0, 3, 0, 120, 0, 14, 0, 26, 0, 3, 0, 32, 0, 145, 0, 34, 0, 120, 0, 21, 0, 32, 0, 145, 0, 25, 0, 28, 0, 145, 0, 30, 0, 21, 0, 26, 0, 121, 0, 14, 0, 34, 0, 21, 0, 17, 0, 14, 0, 26, 0, 3, 0, 96, 0, 120, 0, 18, 0, 31, 0, 14, 0, 17, 0, 38, 0, 24, 0, 121, 0, 18, 0, 15, 0, 18, 0, 24, 0, 121, 0, 21, 0, 14, 0, 3, 0, 25, 0, 32, 0, 145, 0, 24, 0, 120, 0, 21, 0, 14, 0, 26, 0, 21, 0, 3, 0, 32, 0, 31, 0, 30, 0, 120, 0, 21, 0, 31, 0, 3, 0, 17, 0, 120, 0, 14, 0, 26, 0, 14, 0, 36, 0, 34, 0, 14, 0, 32, 0, 31, 0, 145, 0, 2]} +{"text": "როდესაც მზე აღიმართება ჰორიზონტიდან 42 გრადუსზე უფრო მაღლა, ცისარტყელა დედამიწის ზედაპირიდან უხილავია.", "phonemes": ["r", "ˈ", "o", "d", "e", "s", "a", "t", "s", "ʰ", " ", "m", "z", "ˈ", "e", " ", "ˈ", "a", "ɣ", "i", "m", "ˌ", "a", "r", "t", "ʰ", "e", "b", "a", " ", "h", "ˈ", "o", "r", "i", "z", "ˌ", "o", "n", "t", "i", "d", "a", "n", " ", "ˈ", "o", "r", "m", "o", "t", "s", "d", "a", "ˈ", "o", "r", "i", " ", "ɡ", "r", "ˈ", "a", "d", "u", "s", "z", "e", " ", "ˈ", "u", "p", "ʰ", "r", "o", " ", "m", "ˈ", "a", "ɣ", "l", "a", ",", " ", "t", "s", "ʰ", "ˈ", "i", "s", "a", "r", "t", "q", "ˌ", "e", "l", "a", " ", "d", "ˈ", "e", "d", "a", "m", "ˌ", "i", "t", "s", "i", "s", " ", "z", "ˈ", "e", "d", "a", "p", "ˌ", "i", "r", "i", "d", "a", "n", " ", "ˈ", "u", "x", "i", "l", "ˌ", "a", "v", "i", "a", "."], "phoneme_ids": [1, 0, 30, 0, 120, 0, 27, 0, 17, 0, 18, 0, 31, 0, 14, 0, 32, 0, 31, 0, 145, 0, 3, 0, 25, 0, 38, 0, 120, 0, 18, 0, 3, 0, 120, 0, 14, 0, 68, 0, 21, 0, 25, 0, 121, 0, 14, 0, 30, 0, 32, 0, 145, 0, 18, 0, 15, 0, 14, 0, 3, 0, 20, 0, 120, 0, 27, 0, 30, 0, 21, 0, 38, 0, 121, 0, 27, 0, 26, 0, 32, 0, 21, 0, 17, 0, 14, 0, 26, 0, 3, 0, 120, 0, 27, 0, 30, 0, 25, 0, 27, 0, 32, 0, 31, 0, 17, 0, 14, 0, 120, 0, 27, 0, 30, 0, 21, 0, 3, 0, 66, 0, 30, 0, 120, 0, 14, 0, 17, 0, 33, 0, 31, 0, 38, 0, 18, 0, 3, 0, 120, 0, 33, 0, 28, 0, 145, 0, 30, 0, 27, 0, 3, 0, 25, 0, 120, 0, 14, 0, 68, 0, 24, 0, 14, 0, 8, 0, 3, 0, 32, 0, 31, 0, 145, 0, 120, 0, 21, 0, 31, 0, 14, 0, 30, 0, 32, 0, 29, 0, 121, 0, 18, 0, 24, 0, 14, 0, 3, 0, 17, 0, 120, 0, 18, 0, 17, 0, 14, 0, 25, 0, 121, 0, 21, 0, 32, 0, 31, 0, 21, 0, 31, 0, 3, 0, 38, 0, 120, 0, 18, 0, 17, 0, 14, 0, 28, 0, 121, 0, 21, 0, 30, 0, 21, 0, 17, 0, 14, 0, 26, 0, 3, 0, 120, 0, 33, 0, 36, 0, 21, 0, 24, 0, 121, 0, 14, 0, 34, 0, 21, 0, 14, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_pl.jsonl b/etc/test_sentences/test_pl.jsonl new file mode 100644 index 0000000..49a0aa6 --- /dev/null +++ b/etc/test_sentences/test_pl.jsonl @@ -0,0 +1,6 @@ +{"text": "Tęcza, zjawisko optyczne i meteorologiczne, występujące w postaci charakterystycznego wielobarwnego łuku powstającego w wyniku rozszczepienia światła widzialnego, zwykle promieniowania słonecznego, załamującego się i odbijającego wewnątrz licznych kropli wody mających kształt zbliżony do kulistego.", "phonemes": ["t", "ˈ", "ɛ", "n", "t", "ʃ", "a", ",", " ", "z", "j", "a", "v", "ˈ", "i", "s", "k", "ɔ", "ː", " ", "ɔ", "p", "t", "ˈ", "ɨ", "t", "ʃ", "n", "ɛ", " ", "i", " ", "m", "ˌ", "ɛ", "t", "ɛ", "ˌ", "ɔ", "r", "ɔ", "l", "ɔ", "ɡ", "ʲ", "ˈ", "i", "t", "ʃ", "n", "ɛ", ",", " ", "v", "ˌ", "ɨ", "s", "t", "ɛ", "m", "p", "u", "j", "ˈ", "ɔ", "n", "t", "s", "ɛ", " ", "f", " ", "p", "ɔ", "s", "t", "ˈ", "a", "t", "ɕ", "i", " ", "x", "ˌ", "a", "r", "a", "k", "t", "ˌ", "ɛ", "r", "ɨ", "s", "t", "ɨ", "t", "ʃ", "n", "ˈ", "ɛ", "ɡ", "ɔ", " ", "v", "ʲ", "ˌ", "ɛ", "l", "ɔ", "b", "a", "r", "v", "n", "ˈ", "ɛ", "ɡ", "ɔ", " ", "w", "ˈ", "u", "k", "u", " ", "p", "ˌ", "ɔ", "f", "s", "t", "a", "j", "ɔ", "n", "t", "s", "ˈ", "ɛ", "ɡ", "ɔ", " ", "w", " ", "v", "ɨ", "ɲ", "ˈ", "i", "k", "u", " ", "r", "ˌ", "ɔ", "s", "ʃ", "t", "ʃ", "ɛ", "p", "ʲ", "ˈ", "ɛ", "ɲ", "ʲ", "a", " ", "ɕ", "f", "ʲ", "ˈ", "a", "t", "w", "a", " ", "v", "ˌ", "i", "d", "ʑ", "a", "l", "n", "ˈ", "ɛ", "ɡ", "ɔ", ",", " ", "z", "v", "ˈ", "ɨ", "k", "l", "ɛ", " ", "p", "r", "ˌ", "ɔ", "m", "j", "ɛ", "ɲ", "ʲ", "ɔ", "v", "ˈ", "a", "ɲ", "ʲ", "a", " ", "s", "w", "ˌ", "ɔ", "n", "ɛ", "t", "ʃ", "n", "ˈ", "ɛ", "ɡ", "ɔ", ",", " ", "z", "ˌ", "a", "w", "a", "m", "ˌ", "u", "j", "ɔ", "n", "t", "s", "ˈ", "ɛ", "ɡ", "ɔ", " ", "ɕ", "ɛ", " ", "i", " ", "ˌ", "ɔ", "d", "b", "ʲ", "i", "j", "ˌ", "a", "j", "ɔ", "n", "t", "s", "ˈ", "ɛ", "ɡ", "ɔ", " ", "v", "ˈ", "ɛ", "v", "n", "ɔ", "n", "t", "ʃ", " ", "l", "ˈ", "i", "t", "ʃ", "n", "ɨ", "x", " ", "k", "r", "ˈ", "ɔ", "p", "l", "i", " ", "v", "ˈ", "ɔ", "d", "ɨ", " ", "m", "a", "j", "ˈ", "ɔ", "n", "t", "s", "ɨ", "x", " ", "k", "ʃ", "t", "ˈ", "a", "w", "d", " ", "z", "b", "l", "i", "ʒ", "ˈ", "ɔ", "n", "ɨ", " ", "d", "ɔ", " ", "k", "ˌ", "u", "l", "i", "s", "t", "ˈ", "ɛ", "ɡ", "ɔ", "."], "phoneme_ids": [1, 0, 32, 0, 120, 0, 61, 0, 26, 0, 32, 0, 96, 0, 14, 0, 8, 0, 3, 0, 38, 0, 22, 0, 14, 0, 34, 0, 120, 0, 21, 0, 31, 0, 23, 0, 54, 0, 122, 0, 3, 0, 54, 0, 28, 0, 32, 0, 120, 0, 73, 0, 32, 0, 96, 0, 26, 0, 61, 0, 3, 0, 21, 0, 3, 0, 25, 0, 121, 0, 61, 0, 32, 0, 61, 0, 121, 0, 54, 0, 30, 0, 54, 0, 24, 0, 54, 0, 66, 0, 119, 0, 120, 0, 21, 0, 32, 0, 96, 0, 26, 0, 61, 0, 8, 0, 3, 0, 34, 0, 121, 0, 73, 0, 31, 0, 32, 0, 61, 0, 25, 0, 28, 0, 33, 0, 22, 0, 120, 0, 54, 0, 26, 0, 32, 0, 31, 0, 61, 0, 3, 0, 19, 0, 3, 0, 28, 0, 54, 0, 31, 0, 32, 0, 120, 0, 14, 0, 32, 0, 55, 0, 21, 0, 3, 0, 36, 0, 121, 0, 14, 0, 30, 0, 14, 0, 23, 0, 32, 0, 121, 0, 61, 0, 30, 0, 73, 0, 31, 0, 32, 0, 73, 0, 32, 0, 96, 0, 26, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 34, 0, 119, 0, 121, 0, 61, 0, 24, 0, 54, 0, 15, 0, 14, 0, 30, 0, 34, 0, 26, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 35, 0, 120, 0, 33, 0, 23, 0, 33, 0, 3, 0, 28, 0, 121, 0, 54, 0, 19, 0, 31, 0, 32, 0, 14, 0, 22, 0, 54, 0, 26, 0, 32, 0, 31, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 35, 0, 3, 0, 34, 0, 73, 0, 82, 0, 120, 0, 21, 0, 23, 0, 33, 0, 3, 0, 30, 0, 121, 0, 54, 0, 31, 0, 96, 0, 32, 0, 96, 0, 61, 0, 28, 0, 119, 0, 120, 0, 61, 0, 82, 0, 119, 0, 14, 0, 3, 0, 55, 0, 19, 0, 119, 0, 120, 0, 14, 0, 32, 0, 35, 0, 14, 0, 3, 0, 34, 0, 121, 0, 21, 0, 17, 0, 107, 0, 14, 0, 24, 0, 26, 0, 120, 0, 61, 0, 66, 0, 54, 0, 8, 0, 3, 0, 38, 0, 34, 0, 120, 0, 73, 0, 23, 0, 24, 0, 61, 0, 3, 0, 28, 0, 30, 0, 121, 0, 54, 0, 25, 0, 22, 0, 61, 0, 82, 0, 119, 0, 54, 0, 34, 0, 120, 0, 14, 0, 82, 0, 119, 0, 14, 0, 3, 0, 31, 0, 35, 0, 121, 0, 54, 0, 26, 0, 61, 0, 32, 0, 96, 0, 26, 0, 120, 0, 61, 0, 66, 0, 54, 0, 8, 0, 3, 0, 38, 0, 121, 0, 14, 0, 35, 0, 14, 0, 25, 0, 121, 0, 33, 0, 22, 0, 54, 0, 26, 0, 32, 0, 31, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 55, 0, 61, 0, 3, 0, 21, 0, 3, 0, 121, 0, 54, 0, 17, 0, 15, 0, 119, 0, 21, 0, 22, 0, 121, 0, 14, 0, 22, 0, 54, 0, 26, 0, 32, 0, 31, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 34, 0, 120, 0, 61, 0, 34, 0, 26, 0, 54, 0, 26, 0, 32, 0, 96, 0, 3, 0, 24, 0, 120, 0, 21, 0, 32, 0, 96, 0, 26, 0, 73, 0, 36, 0, 3, 0, 23, 0, 30, 0, 120, 0, 54, 0, 28, 0, 24, 0, 21, 0, 3, 0, 34, 0, 120, 0, 54, 0, 17, 0, 73, 0, 3, 0, 25, 0, 14, 0, 22, 0, 120, 0, 54, 0, 26, 0, 32, 0, 31, 0, 73, 0, 36, 0, 3, 0, 23, 0, 96, 0, 32, 0, 120, 0, 14, 0, 35, 0, 17, 0, 3, 0, 38, 0, 15, 0, 24, 0, 21, 0, 108, 0, 120, 0, 54, 0, 26, 0, 73, 0, 3, 0, 17, 0, 54, 0, 3, 0, 23, 0, 121, 0, 33, 0, 24, 0, 21, 0, 31, 0, 32, 0, 120, 0, 61, 0, 66, 0, 54, 0, 10, 0, 2]} +{"text": "Rozszczepienie światła jest wynikiem zjawiska dyspersji, powodującego różnice w kącie załamania światła o różnej długości fali przy przejściu z powietrza do wody i z wody do powietrza.", "phonemes": ["r", "ˌ", "ɔ", "s", "ʃ", "t", "ʃ", "ɛ", "p", "ʲ", "ˈ", "ɛ", "ɲ", "ʲ", "ɛ", " ", "ɕ", "f", "ʲ", "ˈ", "a", "t", "w", "a", " ", "j", "ɛ", "z", "d", " ", "v", "ɨ", "ɲ", "ˈ", "i", "k", "ʲ", "ɛ", "m", " ", "z", "j", "a", "v", "ˈ", "i", "s", "k", "a", " ", "d", "ɨ", "s", "p", "ˈ", "ɛ", "r", "s", "j", "i", ",", " ", "p", "ˌ", "ɔ", "v", "ɔ", "d", "ˌ", "u", "j", "ɔ", "n", "t", "s", "ˈ", "ɛ", "ɡ", "ɔ", " ", "r", "u", "ʒ", "ɲ", "ˈ", "i", "t", "s", "ɛ", " ", "f", " ", "k", "ˈ", "ɔ", "ɲ", "t", "ɕ", "ɛ", " ", "z", "ˌ", "a", "w", "a", "m", "ˈ", "a", "ɲ", "ʲ", "a", " ", "ɕ", "f", "ʲ", "ˈ", "a", "t", "w", "a", " ", "ɔ", " ", "r", "ˈ", "u", "ʒ", "n", "ɛ", "j", " ", "d", "w", "u", "ɡ", "ˈ", "ɔ", "ɕ", "t", "ɕ", "i", " ", "f", "ˈ", "a", "l", "i", " ", "p", "ʃ", "ɨ", " ", "p", "ʃ", "ˈ", "ɛ", "j", "ɕ", "t", "ɕ", "u", " ", "s", " ", "p", "ɔ", "v", "ʲ", "ˈ", "ɛ", "t", "ʃ", "a", " ", "d", "ɔ", " ", "v", "ˈ", "ɔ", "d", "ɨ", " ", "i", " ", "z", " ", "v", "ˈ", "ɔ", "d", "ɨ", " ", "d", "ɔ", " ", "p", "ɔ", "v", "ʲ", "ˈ", "ɛ", "t", "ʃ", "a", "."], "phoneme_ids": [1, 0, 30, 0, 121, 0, 54, 0, 31, 0, 96, 0, 32, 0, 96, 0, 61, 0, 28, 0, 119, 0, 120, 0, 61, 0, 82, 0, 119, 0, 61, 0, 3, 0, 55, 0, 19, 0, 119, 0, 120, 0, 14, 0, 32, 0, 35, 0, 14, 0, 3, 0, 22, 0, 61, 0, 38, 0, 17, 0, 3, 0, 34, 0, 73, 0, 82, 0, 120, 0, 21, 0, 23, 0, 119, 0, 61, 0, 25, 0, 3, 0, 38, 0, 22, 0, 14, 0, 34, 0, 120, 0, 21, 0, 31, 0, 23, 0, 14, 0, 3, 0, 17, 0, 73, 0, 31, 0, 28, 0, 120, 0, 61, 0, 30, 0, 31, 0, 22, 0, 21, 0, 8, 0, 3, 0, 28, 0, 121, 0, 54, 0, 34, 0, 54, 0, 17, 0, 121, 0, 33, 0, 22, 0, 54, 0, 26, 0, 32, 0, 31, 0, 120, 0, 61, 0, 66, 0, 54, 0, 3, 0, 30, 0, 33, 0, 108, 0, 82, 0, 120, 0, 21, 0, 32, 0, 31, 0, 61, 0, 3, 0, 19, 0, 3, 0, 23, 0, 120, 0, 54, 0, 82, 0, 32, 0, 55, 0, 61, 0, 3, 0, 38, 0, 121, 0, 14, 0, 35, 0, 14, 0, 25, 0, 120, 0, 14, 0, 82, 0, 119, 0, 14, 0, 3, 0, 55, 0, 19, 0, 119, 0, 120, 0, 14, 0, 32, 0, 35, 0, 14, 0, 3, 0, 54, 0, 3, 0, 30, 0, 120, 0, 33, 0, 108, 0, 26, 0, 61, 0, 22, 0, 3, 0, 17, 0, 35, 0, 33, 0, 66, 0, 120, 0, 54, 0, 55, 0, 32, 0, 55, 0, 21, 0, 3, 0, 19, 0, 120, 0, 14, 0, 24, 0, 21, 0, 3, 0, 28, 0, 96, 0, 73, 0, 3, 0, 28, 0, 96, 0, 120, 0, 61, 0, 22, 0, 55, 0, 32, 0, 55, 0, 33, 0, 3, 0, 31, 0, 3, 0, 28, 0, 54, 0, 34, 0, 119, 0, 120, 0, 61, 0, 32, 0, 96, 0, 14, 0, 3, 0, 17, 0, 54, 0, 3, 0, 34, 0, 120, 0, 54, 0, 17, 0, 73, 0, 3, 0, 21, 0, 3, 0, 38, 0, 3, 0, 34, 0, 120, 0, 54, 0, 17, 0, 73, 0, 3, 0, 17, 0, 54, 0, 3, 0, 28, 0, 54, 0, 34, 0, 119, 0, 120, 0, 61, 0, 32, 0, 96, 0, 14, 0, 10, 0, 2]} +{"text": "Jeżu klątw, spłódź Finom część gry hańb.", "phonemes": ["j", "ˈ", "ɛ", "ʒ", "u", " ", "k", "l", "ˈ", "ɔ", "n", "t", "f", ",", " ", "s", "p", "w", "ˈ", "u", "t", "ɕ", " ", "f", "ˈ", "i", "n", "ɔ", "m", " ", "t", "ʃ", "ˈ", "ɛ", "ɲ", "ʑ", "d", "ʑ", " ", "ɡ", "r", "ˈ", "ɨ", " ", "x", "ˈ", "a", "ɲ", "p", "."], "phoneme_ids": [1, 0, 22, 0, 120, 0, 61, 0, 108, 0, 33, 0, 3, 0, 23, 0, 24, 0, 120, 0, 54, 0, 26, 0, 32, 0, 19, 0, 8, 0, 3, 0, 31, 0, 28, 0, 35, 0, 120, 0, 33, 0, 32, 0, 55, 0, 3, 0, 19, 0, 120, 0, 21, 0, 26, 0, 54, 0, 25, 0, 3, 0, 32, 0, 96, 0, 120, 0, 61, 0, 82, 0, 107, 0, 17, 0, 107, 0, 3, 0, 66, 0, 30, 0, 120, 0, 73, 0, 3, 0, 36, 0, 120, 0, 14, 0, 82, 0, 28, 0, 10, 0, 2]} +{"text": "Pójdźże, kiń tę chmurność w głąb flaszy.", "phonemes": ["p", "ˈ", "u", "j", "d", "ʑ", "ʒ", "ɛ", ",", " ", "k", "ˈ", "i", "ɲ", " ", "t", "ˈ", "ɛ", " ", "x", "m", "ˈ", "u", "r", "n", "ɔ", "ʑ", "d", "ʑ", " ", "w", " ", "ɡ", "w", "ˈ", "ɔ", "m", "p", " ", "f", "l", "ˈ", "a", "ʃ", "ɨ", "."], "phoneme_ids": [1, 0, 28, 0, 120, 0, 33, 0, 22, 0, 17, 0, 107, 0, 108, 0, 61, 0, 8, 0, 3, 0, 23, 0, 120, 0, 21, 0, 82, 0, 3, 0, 32, 0, 120, 0, 61, 0, 3, 0, 36, 0, 25, 0, 120, 0, 33, 0, 30, 0, 26, 0, 54, 0, 107, 0, 17, 0, 107, 0, 3, 0, 35, 0, 3, 0, 66, 0, 35, 0, 120, 0, 54, 0, 25, 0, 28, 0, 3, 0, 19, 0, 24, 0, 120, 0, 14, 0, 96, 0, 73, 0, 10, 0, 2]} +{"text": "Mężny bądź, chroń pułk twój i sześć flag.", "phonemes": ["m", "ˈ", "ɛ", "̃", "ʒ", "n", "ɨ", " ", "b", "ˈ", "ɔ", "ɲ", "t", "ɕ", ",", " ", "x", "r", "ˈ", "ɔ", "ɲ", " ", "p", "ˈ", "u", "w", "k", " ", "t", "f", "ˈ", "u", "j", " ", "i", " ", "ʃ", "ˈ", "ɛ", "ɕ", "t", "ɕ", " ", "f", "l", "ˈ", "a", "k", "."], "phoneme_ids": [1, 0, 25, 0, 120, 0, 61, 0, 141, 0, 108, 0, 26, 0, 73, 0, 3, 0, 15, 0, 120, 0, 54, 0, 82, 0, 32, 0, 55, 0, 8, 0, 3, 0, 36, 0, 30, 0, 120, 0, 54, 0, 82, 0, 3, 0, 28, 0, 120, 0, 33, 0, 35, 0, 23, 0, 3, 0, 32, 0, 19, 0, 120, 0, 33, 0, 22, 0, 3, 0, 21, 0, 3, 0, 96, 0, 120, 0, 61, 0, 55, 0, 32, 0, 55, 0, 3, 0, 19, 0, 24, 0, 120, 0, 14, 0, 23, 0, 10, 0, 2]} +{"text": "Filmuj rzeź żądań, pość, gnęb chłystków.", "phonemes": ["f", "ˈ", "i", "l", "m", "u", "j", " ", "ʒ", "ˈ", "ɛ", "ʑ", " ", "ʒ", "ˈ", "ɔ", "n", "d", "a", "ɲ", ",", " ", "p", "ˈ", "ɔ", "ɕ", "t", "ɕ", ",", " ", "ɡ", "n", "ˈ", "ɛ", "m", "p", " ", "x", "w", "ˈ", "ɨ", "s", "t", "k", "u", "f", "."], "phoneme_ids": [1, 0, 19, 0, 120, 0, 21, 0, 24, 0, 25, 0, 33, 0, 22, 0, 3, 0, 108, 0, 120, 0, 61, 0, 107, 0, 3, 0, 108, 0, 120, 0, 54, 0, 26, 0, 17, 0, 14, 0, 82, 0, 8, 0, 3, 0, 28, 0, 120, 0, 54, 0, 55, 0, 32, 0, 55, 0, 8, 0, 3, 0, 66, 0, 26, 0, 120, 0, 61, 0, 25, 0, 28, 0, 3, 0, 36, 0, 35, 0, 120, 0, 73, 0, 31, 0, 32, 0, 23, 0, 33, 0, 19, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_pt-br.jsonl b/etc/test_sentences/test_pt-br.jsonl new file mode 100644 index 0000000..14b4f66 --- /dev/null +++ b/etc/test_sentences/test_pt-br.jsonl @@ -0,0 +1,8 @@ +{"text": "Um arco-íris, também popularmente denominado arco-da-velha, é um fenômeno óptico e meteorológico que separa a luz do sol em seu espectro contínuo quando o sol brilha sobre gotículas de água suspensas no ar.", "phonemes": ["u", "̃", "ŋ", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", "ˈ", "i", "ɾ", "i", "s", ",", " ", "t", "ɐ", "̃", "m", "b", "ˈ", "e", "ɪ", "ŋ", " ", "p", "ˌ", "o", "p", "u", "l", "a", "ɾ", "ə", "m", "ˈ", "e", "ɪ", "ŋ", "t", "ʃ", "y", " ", "d", "ˌ", "e", "n", "o", "m", "i", "n", "ˈ", "a", "d", "w", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", "d", "a", "v", "ˈ", "ɛ", "l", "j", "æ", ",", " ", "ɛ", " ", "u", "̃", "ŋ", " ", "f", "ˌ", "e", "n", "ˈ", "o", "m", "e", "n", "w", " ", "ˈ", "ɔ", "p", "t", "ʃ", "i", "k", "w", " ", "i", " ", "m", "ˌ", "e", "t", "e", "ˌ", "o", "ɾ", "o", "l", "ˈ", "ɔ", "ʒ", "i", "k", "ʊ", " ", "k", "y", " ", "s", "ˌ", "e", "p", "ˈ", "a", "ɾ", "æ", " ", "a", " ", "l", "ˈ", "u", "z", " ", "d", "ʊ", " ", "s", "ˈ", "ɔ", "l", " ", "ˈ", "e", "ɪ", "ŋ", " ", "s", "e", "ʊ", " ", "ˌ", "e", "s", "p", "ˈ", "ɛ", "k", "t", "r", "ʊ", " ", "k", "ˌ", "o", "ŋ", "t", "ʃ", "ˈ", "i", "n", "u", "ʊ", " ", "k", "w", "ˈ", "ɐ", "̃", "ŋ", "d", "w", " ", "ʊ", " ", "s", "ˈ", "ɔ", "l", " ", "b", "r", "ˈ", "i", "l", "j", "æ", " ", "s", "ˈ", "o", "b", "r", "y", " ", "ɡ", "ˌ", "o", "t", "ʃ", "ˈ", "i", "k", "u", "l", "æ", "z", " ", "d", "ʒ", "j", " ", "ˈ", "a", "ɡ", "w", "æ", " ", "s", "ˌ", "u", "s", "p", "ˈ", "e", "ɪ", "ŋ", "s", "æ", "z", " ", "n", "ʊ", " ", "ˈ", "a", "r", "."], "phoneme_ids": [1, 0, 33, 0, 141, 0, 44, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 8, 0, 3, 0, 32, 0, 50, 0, 141, 0, 25, 0, 15, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 28, 0, 121, 0, 27, 0, 28, 0, 33, 0, 24, 0, 14, 0, 92, 0, 59, 0, 25, 0, 120, 0, 18, 0, 74, 0, 44, 0, 32, 0, 96, 0, 37, 0, 3, 0, 17, 0, 121, 0, 18, 0, 26, 0, 27, 0, 25, 0, 21, 0, 26, 0, 120, 0, 14, 0, 17, 0, 35, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 17, 0, 14, 0, 34, 0, 120, 0, 61, 0, 24, 0, 22, 0, 39, 0, 8, 0, 3, 0, 61, 0, 3, 0, 33, 0, 141, 0, 44, 0, 3, 0, 19, 0, 121, 0, 18, 0, 26, 0, 120, 0, 27, 0, 25, 0, 18, 0, 26, 0, 35, 0, 3, 0, 120, 0, 54, 0, 28, 0, 32, 0, 96, 0, 21, 0, 23, 0, 35, 0, 3, 0, 21, 0, 3, 0, 25, 0, 121, 0, 18, 0, 32, 0, 18, 0, 121, 0, 27, 0, 92, 0, 27, 0, 24, 0, 120, 0, 54, 0, 108, 0, 21, 0, 23, 0, 100, 0, 3, 0, 23, 0, 37, 0, 3, 0, 31, 0, 121, 0, 18, 0, 28, 0, 120, 0, 14, 0, 92, 0, 39, 0, 3, 0, 14, 0, 3, 0, 24, 0, 120, 0, 33, 0, 38, 0, 3, 0, 17, 0, 100, 0, 3, 0, 31, 0, 120, 0, 54, 0, 24, 0, 3, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 31, 0, 18, 0, 100, 0, 3, 0, 121, 0, 18, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 30, 0, 100, 0, 3, 0, 23, 0, 121, 0, 27, 0, 44, 0, 32, 0, 96, 0, 120, 0, 21, 0, 26, 0, 33, 0, 100, 0, 3, 0, 23, 0, 35, 0, 120, 0, 50, 0, 141, 0, 44, 0, 17, 0, 35, 0, 3, 0, 100, 0, 3, 0, 31, 0, 120, 0, 54, 0, 24, 0, 3, 0, 15, 0, 30, 0, 120, 0, 21, 0, 24, 0, 22, 0, 39, 0, 3, 0, 31, 0, 120, 0, 27, 0, 15, 0, 30, 0, 37, 0, 3, 0, 66, 0, 121, 0, 27, 0, 32, 0, 96, 0, 120, 0, 21, 0, 23, 0, 33, 0, 24, 0, 39, 0, 38, 0, 3, 0, 17, 0, 108, 0, 22, 0, 3, 0, 120, 0, 14, 0, 66, 0, 35, 0, 39, 0, 3, 0, 31, 0, 121, 0, 33, 0, 31, 0, 28, 0, 120, 0, 18, 0, 74, 0, 44, 0, 31, 0, 39, 0, 38, 0, 3, 0, 26, 0, 100, 0, 3, 0, 120, 0, 14, 0, 30, 0, 10, 0, 2]} +{"text": "É um arco multicolorido com o vermelho em seu exterior e o violeta em seu interior.", "phonemes": ["ɛ", " ", "u", "̃", "ŋ", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", " ", "m", "ˌ", "u", "w", "t", "ʃ", "i", "k", "ˌ", "o", "l", "o", "ɾ", "ˈ", "i", "d", "ʊ", " ", "k", "o", "ŋ", " ", "ʊ", " ", "v", "ˌ", "e", "ɾ", "ə", "m", "ˈ", "e", "l", "j", "w", " ", "ˈ", "e", "ɪ", "ŋ", " ", "s", "e", "ʊ", " ", "ˌ", "e", "s", "t", "e", "ɾ", "i", "ˈ", "o", "ɾ", " ", "i", " ", "ʊ", " ", "v", "ˌ", "i", "o", "l", "ˈ", "e", "t", "æ", " ", "ˈ", "e", "ɪ", "ŋ", " ", "s", "e", "ʊ", " ", "ˌ", "i", "ŋ", "t", "e", "ɾ", "i", "ˈ", "o", "r", "."], "phoneme_ids": [1, 0, 61, 0, 3, 0, 33, 0, 141, 0, 44, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 3, 0, 25, 0, 121, 0, 33, 0, 35, 0, 32, 0, 96, 0, 21, 0, 23, 0, 121, 0, 27, 0, 24, 0, 27, 0, 92, 0, 120, 0, 21, 0, 17, 0, 100, 0, 3, 0, 23, 0, 27, 0, 44, 0, 3, 0, 100, 0, 3, 0, 34, 0, 121, 0, 18, 0, 92, 0, 59, 0, 25, 0, 120, 0, 18, 0, 24, 0, 22, 0, 35, 0, 3, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 31, 0, 18, 0, 100, 0, 3, 0, 121, 0, 18, 0, 31, 0, 32, 0, 18, 0, 92, 0, 21, 0, 120, 0, 27, 0, 92, 0, 3, 0, 21, 0, 3, 0, 100, 0, 3, 0, 34, 0, 121, 0, 21, 0, 27, 0, 24, 0, 120, 0, 18, 0, 32, 0, 39, 0, 3, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 31, 0, 18, 0, 100, 0, 3, 0, 121, 0, 21, 0, 44, 0, 32, 0, 18, 0, 92, 0, 21, 0, 120, 0, 27, 0, 30, 0, 10, 0, 2]} +{"text": "Por ser um espectro de dispersão da luz branca, o arco-íris contém uma quantidade infinita de cores sem qualquer delimitação entre elas.", "phonemes": ["p", "o", "r", " ", "s", "e", "ɾ", " ", "u", "̃", "ŋ", " ", "ˌ", "e", "s", "p", "ˈ", "ɛ", "k", "t", "r", "ʊ", " ", "d", "ʒ", "y", " ", "d", "ʒ", "ˌ", "i", "s", "p", "e", "ɾ", "ə", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "d", "a", " ", "l", "ˈ", "u", "z", " ", "b", "r", "ˈ", "ɐ", "̃", "ŋ", "k", "æ", ",", " ", "u", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", "ˈ", "i", "ɾ", "i", "s", " ", "k", "o", "ŋ", "t", "ˈ", "e", "ɪ", "ŋ", " ", "ˌ", "u", "m", "æ", " ", "k", "w", "ˌ", "ɐ", "̃", "ŋ", "t", "ʃ", "i", "d", "ˈ", "a", "d", "ʒ", "j", " ", "ˌ", "i", "ŋ", "f", "i", "n", "ˈ", "i", "t", "æ", " ", "d", "ʒ", "y", " ", "k", "ˈ", "o", "ɾ", "y", "s", " ", "s", "ˈ", "e", "ɪ", "ŋ", " ", "k", "w", "a", "ʊ", "k", "ˌ", "ɛ", "r", " ", "d", "ˌ", "e", "l", "i", "m", "ˌ", "i", "t", "a", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "ˈ", "e", "ɪ", "ŋ", "t", "r", "i", " ", "ˈ", "ɛ", "l", "æ", "s", "."], "phoneme_ids": [1, 0, 28, 0, 27, 0, 30, 0, 3, 0, 31, 0, 18, 0, 92, 0, 3, 0, 33, 0, 141, 0, 44, 0, 3, 0, 121, 0, 18, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 30, 0, 100, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 17, 0, 108, 0, 121, 0, 21, 0, 31, 0, 28, 0, 18, 0, 92, 0, 59, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 17, 0, 14, 0, 3, 0, 24, 0, 120, 0, 33, 0, 38, 0, 3, 0, 15, 0, 30, 0, 120, 0, 50, 0, 141, 0, 44, 0, 23, 0, 39, 0, 8, 0, 3, 0, 33, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 3, 0, 23, 0, 27, 0, 44, 0, 32, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 121, 0, 33, 0, 25, 0, 39, 0, 3, 0, 23, 0, 35, 0, 121, 0, 50, 0, 141, 0, 44, 0, 32, 0, 96, 0, 21, 0, 17, 0, 120, 0, 14, 0, 17, 0, 108, 0, 22, 0, 3, 0, 121, 0, 21, 0, 44, 0, 19, 0, 21, 0, 26, 0, 120, 0, 21, 0, 32, 0, 39, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 31, 0, 3, 0, 31, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 23, 0, 35, 0, 14, 0, 100, 0, 23, 0, 121, 0, 61, 0, 30, 0, 3, 0, 17, 0, 121, 0, 18, 0, 24, 0, 21, 0, 25, 0, 121, 0, 21, 0, 32, 0, 14, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 120, 0, 18, 0, 74, 0, 44, 0, 32, 0, 30, 0, 21, 0, 3, 0, 120, 0, 61, 0, 24, 0, 39, 0, 31, 0, 10, 0, 2]} +{"text": "Devido à necessidade humana de classificação dos fenômenos da natureza, a capacidade finita de distinção de cores pela visão humana e por questões didáticas, o arco-íris é mais conhecido por uma simplificação criada culturalmente que resume o espectro em sete cores na seguinte ordem: vermelho, laranja, amarelo, verde, azul, anil e violeta.", "phonemes": ["d", "ˌ", "e", "v", "ˈ", "i", "d", "w", " ", "ˌ", "a", "ː", " ", "n", "ˌ", "e", "s", "e", "s", "i", "d", "ˈ", "a", "d", "ʒ", "y", " ", "ˌ", "u", "m", "ˈ", "ɐ", "̃", "n", "æ", " ", "d", "ʒ", "y", " ", "k", "l", "ˌ", "a", "s", "i", "f", "ˌ", "i", "k", "a", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "d", "ʊ", "s", " ", "f", "ˌ", "e", "n", "ˈ", "o", "m", "e", "n", "ʊ", "z", " ", "d", "a", " ", "n", "ˌ", "a", "t", "u", "ɾ", "ˈ", "e", "z", "æ", ",", " ", "a", " ", "k", "ˌ", "a", "p", "a", "s", "i", "d", "ˈ", "a", "d", "ʒ", "y", " ", "f", "ˌ", "i", "n", "ˈ", "i", "t", "æ", " ", "d", "ʒ", "y", " ", "d", "ʒ", "ˌ", "i", "s", "t", "ʃ", "i", "ŋ", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "d", "ʒ", "y", " ", "k", "ˈ", "o", "ɾ", "y", "s", " ", "p", "ˈ", "e", "l", "æ", " ", "v", "i", "z", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "ˌ", "u", "m", "ˈ", "ɐ", "̃", "n", "æ", " ", "i", " ", "p", "o", "r", " ", "k", "e", "s", "t", "ˈ", "o", "̃", "j", "z", " ", "d", "ʒ", "ˌ", "i", "d", "ˈ", "a", "t", "ʃ", "i", "k", "æ", "s", ",", " ", "u", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", "ˈ", "i", "ɾ", "i", "z", " ", "ɛ", " ", "m", "ˈ", "a", "ɪ", "s", " ", "k", "ˌ", "o", "ɲ", "e", "s", "ˈ", "i", "d", "ʊ", " ", "p", "o", "ɾ", " ", "ˌ", "u", "m", "æ", " ", "s", "ˌ", "i", "m", "p", "l", "i", "f", "ˌ", "i", "k", "a", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "k", "r", "ˌ", "i", "ˈ", "a", "d", "æ", " ", "k", "ˌ", "u", "w", "t", "u", "ɾ", "a", "ʊ", "m", "ˈ", "e", "ɪ", "ŋ", "t", "ʃ", "y", " ", "k", "y", " ", "x", "ˌ", "e", "z", "ˈ", "u", "m", "j", " ", "u", " ", "ˌ", "e", "s", "p", "ˈ", "ɛ", "k", "t", "r", "w", " ", "ˈ", "e", "ɪ", "ŋ", " ", "s", "ˈ", "ɛ", "t", "ʃ", "y", " ", "k", "ˈ", "o", "ɾ", "y", "z", " ", "n", "a", " ", "s", "ˌ", "e", "ɡ", "ˈ", "i", "ŋ", "t", "ʃ", "j", " ", "ˈ", "ɔ", "ɾ", "ə", "d", "e", "ɪ", "ŋ", ":", " ", "v", "ˌ", "e", "ɾ", "ə", "m", "ˈ", "e", "l", "j", "ʊ", ",", " ", "l", "ˌ", "a", "ɾ", "ˈ", "ɐ", "̃", "ŋ", "ʒ", "æ", ",", " ", "ˌ", "æ", "m", "a", "ɾ", "ˈ", "ɛ", "l", "ʊ", ",", " ", "v", "ˈ", "e", "ɾ", "ə", "d", "ʒ", "y", ",", " ", "a", "z", "ˈ", "u", "w", ",", " ", "ɐ", "̃", "n", "ˈ", "i", "ʊ", " ", "i", " ", "v", "ˌ", "i", "o", "l", "ˈ", "e", "t", "æ", "."], "phoneme_ids": [1, 0, 17, 0, 121, 0, 18, 0, 34, 0, 120, 0, 21, 0, 17, 0, 35, 0, 3, 0, 121, 0, 14, 0, 122, 0, 3, 0, 26, 0, 121, 0, 18, 0, 31, 0, 18, 0, 31, 0, 21, 0, 17, 0, 120, 0, 14, 0, 17, 0, 108, 0, 37, 0, 3, 0, 121, 0, 33, 0, 25, 0, 120, 0, 50, 0, 141, 0, 26, 0, 39, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 23, 0, 24, 0, 121, 0, 14, 0, 31, 0, 21, 0, 19, 0, 121, 0, 21, 0, 23, 0, 14, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 17, 0, 100, 0, 31, 0, 3, 0, 19, 0, 121, 0, 18, 0, 26, 0, 120, 0, 27, 0, 25, 0, 18, 0, 26, 0, 100, 0, 38, 0, 3, 0, 17, 0, 14, 0, 3, 0, 26, 0, 121, 0, 14, 0, 32, 0, 33, 0, 92, 0, 120, 0, 18, 0, 38, 0, 39, 0, 8, 0, 3, 0, 14, 0, 3, 0, 23, 0, 121, 0, 14, 0, 28, 0, 14, 0, 31, 0, 21, 0, 17, 0, 120, 0, 14, 0, 17, 0, 108, 0, 37, 0, 3, 0, 19, 0, 121, 0, 21, 0, 26, 0, 120, 0, 21, 0, 32, 0, 39, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 17, 0, 108, 0, 121, 0, 21, 0, 31, 0, 32, 0, 96, 0, 21, 0, 44, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 31, 0, 3, 0, 28, 0, 120, 0, 18, 0, 24, 0, 39, 0, 3, 0, 34, 0, 21, 0, 38, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 121, 0, 33, 0, 25, 0, 120, 0, 50, 0, 141, 0, 26, 0, 39, 0, 3, 0, 21, 0, 3, 0, 28, 0, 27, 0, 30, 0, 3, 0, 23, 0, 18, 0, 31, 0, 32, 0, 120, 0, 27, 0, 141, 0, 22, 0, 38, 0, 3, 0, 17, 0, 108, 0, 121, 0, 21, 0, 17, 0, 120, 0, 14, 0, 32, 0, 96, 0, 21, 0, 23, 0, 39, 0, 31, 0, 8, 0, 3, 0, 33, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 120, 0, 21, 0, 92, 0, 21, 0, 38, 0, 3, 0, 61, 0, 3, 0, 25, 0, 120, 0, 14, 0, 74, 0, 31, 0, 3, 0, 23, 0, 121, 0, 27, 0, 82, 0, 18, 0, 31, 0, 120, 0, 21, 0, 17, 0, 100, 0, 3, 0, 28, 0, 27, 0, 92, 0, 3, 0, 121, 0, 33, 0, 25, 0, 39, 0, 3, 0, 31, 0, 121, 0, 21, 0, 25, 0, 28, 0, 24, 0, 21, 0, 19, 0, 121, 0, 21, 0, 23, 0, 14, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 23, 0, 30, 0, 121, 0, 21, 0, 120, 0, 14, 0, 17, 0, 39, 0, 3, 0, 23, 0, 121, 0, 33, 0, 35, 0, 32, 0, 33, 0, 92, 0, 14, 0, 100, 0, 25, 0, 120, 0, 18, 0, 74, 0, 44, 0, 32, 0, 96, 0, 37, 0, 3, 0, 23, 0, 37, 0, 3, 0, 36, 0, 121, 0, 18, 0, 38, 0, 120, 0, 33, 0, 25, 0, 22, 0, 3, 0, 33, 0, 3, 0, 121, 0, 18, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 30, 0, 35, 0, 3, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 31, 0, 120, 0, 61, 0, 32, 0, 96, 0, 37, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 38, 0, 3, 0, 26, 0, 14, 0, 3, 0, 31, 0, 121, 0, 18, 0, 66, 0, 120, 0, 21, 0, 44, 0, 32, 0, 96, 0, 22, 0, 3, 0, 120, 0, 54, 0, 92, 0, 59, 0, 17, 0, 18, 0, 74, 0, 44, 0, 11, 0, 3, 0, 34, 0, 121, 0, 18, 0, 92, 0, 59, 0, 25, 0, 120, 0, 18, 0, 24, 0, 22, 0, 100, 0, 8, 0, 3, 0, 24, 0, 121, 0, 14, 0, 92, 0, 120, 0, 50, 0, 141, 0, 44, 0, 108, 0, 39, 0, 8, 0, 3, 0, 121, 0, 39, 0, 25, 0, 14, 0, 92, 0, 120, 0, 61, 0, 24, 0, 100, 0, 8, 0, 3, 0, 34, 0, 120, 0, 18, 0, 92, 0, 59, 0, 17, 0, 108, 0, 37, 0, 8, 0, 3, 0, 14, 0, 38, 0, 120, 0, 33, 0, 35, 0, 8, 0, 3, 0, 50, 0, 141, 0, 26, 0, 120, 0, 21, 0, 100, 0, 3, 0, 21, 0, 3, 0, 34, 0, 121, 0, 21, 0, 27, 0, 24, 0, 120, 0, 18, 0, 32, 0, 39, 0, 10, 0, 2]} +{"text": "Tal simplificação foi proposta primeiramente por Isaac Newton, que decidiu nomear apenas cinco cores e depois adicionou mais duas apenas para fazer analogia com as sete notas musicais, os sete dias da semana e os sete objetos do sistema solar conhecidos à época.", "phonemes": ["t", "ˈ", "a", "ʊ", " ", "s", "ˌ", "i", "m", "p", "l", "i", "f", "ˌ", "i", "k", "a", "s", "ˈ", "ɐ", "̃", "ʊ", "̃", " ", "f", "o", "ɪ", " ", "p", "r", "ˌ", "o", "p", "ˈ", "ɔ", "s", "t", "æ", " ", "p", "r", "ˌ", "i", "m", "e", "ɪ", "ɾ", "æ", "m", "ˈ", "e", "ɪ", "ŋ", "t", "ʃ", "y", " ", "p", "o", "ɾ", " ", "i", "z", "ˈ", "a", "k", " ", "n", "ˈ", "ɪ", "u", "t", "o", "ŋ", ",", " ", "k", "y", " ", "d", "ˌ", "e", "s", "i", "d", "ʒ", "ˈ", "i", "ʊ", " ", "n", "ˌ", "o", "m", "e", "ˈ", "a", "ɾ", " ", "ˌ", "a", "p", "ˈ", "e", "n", "æ", "s", " ", "s", "ˈ", "i", "ŋ", "k", "ʊ", " ", "k", "ˈ", "o", "ɾ", "y", "z", " ", "i", " ", "d", "e", "p", "ˈ", "o", "ɪ", "z", " ", "ˌ", "a", "d", "ʒ", "i", "s", "ˌ", "i", "o", "n", "ˈ", "o", "w", " ", "m", "ˈ", "a", "ɪ", "z", " ", "d", "ˈ", "u", "æ", "z", " ", "ˌ", "a", "p", "ˈ", "e", "n", "æ", "s", " ", "p", "ˌ", "a", "ɾ", "æ", " ", "f", "a", "z", "ˌ", "e", "ɾ", " ", "ˌ", "æ", "n", "a", "l", "o", "ʒ", "ˈ", "i", "æ", " ", "k", "o", "ŋ", " ", "a", "s", " ", "s", "ˈ", "ɛ", "t", "ʃ", "y", " ", "n", "ˈ", "ɔ", "t", "æ", "z", " ", "m", "ˌ", "u", "z", "i", "k", "ˈ", "a", "ɪ", "s", ",", " ", "ʊ", "s", " ", "s", "ˈ", "ɛ", "t", "ʃ", "y", " ", "d", "ʒ", "ˈ", "i", "æ", "z", " ", "d", "a", " ", "s", "ˌ", "e", "m", "ˈ", "ɐ", "̃", "n", "æ", " ", "i", " ", "ʊ", "s", " ", "s", "ˈ", "ɛ", "t", "ʃ", "j", " ", "ˌ", "o", "b", "ʒ", "ˈ", "ɛ", "t", "ʊ", "z", " ", "d", "ʊ", " ", "s", "ˌ", "i", "s", "t", "ˈ", "e", "m", "æ", " ", "s", "o", "l", "ˈ", "a", "r", " ", "k", "ˌ", "o", "ɲ", "e", "s", "ˈ", "i", "d", "ʊ", "z", " ", "ˌ", "a", "ː", " ", "ˈ", "ɛ", "p", "o", "k", "æ", "."], "phoneme_ids": [1, 0, 32, 0, 120, 0, 14, 0, 100, 0, 3, 0, 31, 0, 121, 0, 21, 0, 25, 0, 28, 0, 24, 0, 21, 0, 19, 0, 121, 0, 21, 0, 23, 0, 14, 0, 31, 0, 120, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 19, 0, 27, 0, 74, 0, 3, 0, 28, 0, 30, 0, 121, 0, 27, 0, 28, 0, 120, 0, 54, 0, 31, 0, 32, 0, 39, 0, 3, 0, 28, 0, 30, 0, 121, 0, 21, 0, 25, 0, 18, 0, 74, 0, 92, 0, 39, 0, 25, 0, 120, 0, 18, 0, 74, 0, 44, 0, 32, 0, 96, 0, 37, 0, 3, 0, 28, 0, 27, 0, 92, 0, 3, 0, 21, 0, 38, 0, 120, 0, 14, 0, 23, 0, 3, 0, 26, 0, 120, 0, 74, 0, 33, 0, 32, 0, 27, 0, 44, 0, 8, 0, 3, 0, 23, 0, 37, 0, 3, 0, 17, 0, 121, 0, 18, 0, 31, 0, 21, 0, 17, 0, 108, 0, 120, 0, 21, 0, 100, 0, 3, 0, 26, 0, 121, 0, 27, 0, 25, 0, 18, 0, 120, 0, 14, 0, 92, 0, 3, 0, 121, 0, 14, 0, 28, 0, 120, 0, 18, 0, 26, 0, 39, 0, 31, 0, 3, 0, 31, 0, 120, 0, 21, 0, 44, 0, 23, 0, 100, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 38, 0, 3, 0, 21, 0, 3, 0, 17, 0, 18, 0, 28, 0, 120, 0, 27, 0, 74, 0, 38, 0, 3, 0, 121, 0, 14, 0, 17, 0, 108, 0, 21, 0, 31, 0, 121, 0, 21, 0, 27, 0, 26, 0, 120, 0, 27, 0, 35, 0, 3, 0, 25, 0, 120, 0, 14, 0, 74, 0, 38, 0, 3, 0, 17, 0, 120, 0, 33, 0, 39, 0, 38, 0, 3, 0, 121, 0, 14, 0, 28, 0, 120, 0, 18, 0, 26, 0, 39, 0, 31, 0, 3, 0, 28, 0, 121, 0, 14, 0, 92, 0, 39, 0, 3, 0, 19, 0, 14, 0, 38, 0, 121, 0, 18, 0, 92, 0, 3, 0, 121, 0, 39, 0, 26, 0, 14, 0, 24, 0, 27, 0, 108, 0, 120, 0, 21, 0, 39, 0, 3, 0, 23, 0, 27, 0, 44, 0, 3, 0, 14, 0, 31, 0, 3, 0, 31, 0, 120, 0, 61, 0, 32, 0, 96, 0, 37, 0, 3, 0, 26, 0, 120, 0, 54, 0, 32, 0, 39, 0, 38, 0, 3, 0, 25, 0, 121, 0, 33, 0, 38, 0, 21, 0, 23, 0, 120, 0, 14, 0, 74, 0, 31, 0, 8, 0, 3, 0, 100, 0, 31, 0, 3, 0, 31, 0, 120, 0, 61, 0, 32, 0, 96, 0, 37, 0, 3, 0, 17, 0, 108, 0, 120, 0, 21, 0, 39, 0, 38, 0, 3, 0, 17, 0, 14, 0, 3, 0, 31, 0, 121, 0, 18, 0, 25, 0, 120, 0, 50, 0, 141, 0, 26, 0, 39, 0, 3, 0, 21, 0, 3, 0, 100, 0, 31, 0, 3, 0, 31, 0, 120, 0, 61, 0, 32, 0, 96, 0, 22, 0, 3, 0, 121, 0, 27, 0, 15, 0, 108, 0, 120, 0, 61, 0, 32, 0, 100, 0, 38, 0, 3, 0, 17, 0, 100, 0, 3, 0, 31, 0, 121, 0, 21, 0, 31, 0, 32, 0, 120, 0, 18, 0, 25, 0, 39, 0, 3, 0, 31, 0, 27, 0, 24, 0, 120, 0, 14, 0, 30, 0, 3, 0, 23, 0, 121, 0, 27, 0, 82, 0, 18, 0, 31, 0, 120, 0, 21, 0, 17, 0, 100, 0, 38, 0, 3, 0, 121, 0, 14, 0, 122, 0, 3, 0, 120, 0, 61, 0, 28, 0, 27, 0, 23, 0, 39, 0, 10, 0, 2]} +{"text": "Para informações sobre o espectro de cores do arco-íris, veja também o artigo sobre cores.", "phonemes": ["p", "ˌ", "a", "ɾ", "æ", " ", "ˌ", "i", "ŋ", "f", "o", "ɾ", "ə", "m", "a", "s", "ˈ", "o", "̃", "j", "s", " ", "s", "ˈ", "o", "b", "r", "i", " ", "u", " ", "ˌ", "e", "s", "p", "ˈ", "ɛ", "k", "t", "r", "ʊ", " ", "d", "ʒ", "y", " ", "k", "ˈ", "o", "ɾ", "y", "z", " ", "d", "ʊ", " ", "ˈ", "a", "ɾ", "ə", "k", "ʊ", "ˈ", "i", "ɾ", "i", "s", ",", " ", "v", "ˈ", "e", "ʒ", "æ", " ", "t", "ɐ", "̃", "m", "b", "ˈ", "e", "ɪ", "ŋ", " ", "u", " ", "ˌ", "a", "ɾ", "ə", "t", "ʃ", "ˈ", "i", "ɡ", "ʊ", " ", "s", "ˈ", "o", "b", "r", "y", " ", "k", "ˈ", "o", "ɾ", "y", "s", "."], "phoneme_ids": [1, 0, 28, 0, 121, 0, 14, 0, 92, 0, 39, 0, 3, 0, 121, 0, 21, 0, 44, 0, 19, 0, 27, 0, 92, 0, 59, 0, 25, 0, 14, 0, 31, 0, 120, 0, 27, 0, 141, 0, 22, 0, 31, 0, 3, 0, 31, 0, 120, 0, 27, 0, 15, 0, 30, 0, 21, 0, 3, 0, 33, 0, 3, 0, 121, 0, 18, 0, 31, 0, 28, 0, 120, 0, 61, 0, 23, 0, 32, 0, 30, 0, 100, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 38, 0, 3, 0, 17, 0, 100, 0, 3, 0, 120, 0, 14, 0, 92, 0, 59, 0, 23, 0, 100, 0, 120, 0, 21, 0, 92, 0, 21, 0, 31, 0, 8, 0, 3, 0, 34, 0, 120, 0, 18, 0, 108, 0, 39, 0, 3, 0, 32, 0, 50, 0, 141, 0, 25, 0, 15, 0, 120, 0, 18, 0, 74, 0, 44, 0, 3, 0, 33, 0, 3, 0, 121, 0, 14, 0, 92, 0, 59, 0, 32, 0, 96, 0, 120, 0, 21, 0, 66, 0, 100, 0, 3, 0, 31, 0, 120, 0, 27, 0, 15, 0, 30, 0, 37, 0, 3, 0, 23, 0, 120, 0, 27, 0, 92, 0, 37, 0, 31, 0, 10, 0, 2]} +{"text": "Luís argüia à Júlia que «brações, fé, chá, óxido, pôr, zângão» eram palavras do português.", "phonemes": ["l", "w", "ˈ", "i", "z", " ", "ˌ", "a", "ɾ", "ə", "ɡ", "u", "ˈ", "i", " ", "ˌ", "a", "ː", " ", "ʒ", "ˈ", "u", "l", "j", "æ", " ", "k", "y", " ", "b", "r", "a", "s", "ˈ", "o", "̃", "j", "s", ",", " ", "f", "ˈ", "ɛ", ",", " ", "ʃ", "ˈ", "a", ",", " ", "ˈ", "ɔ", "k", "s", "i", "d", "ʊ", ",", " ", "p", "ˈ", "o", "r", ",", " ", "z", "ˈ", "ɐ", "̃", "ŋ", "ɡ", "ɐ", "̃", "ʊ", "̃", " ", "ˌ", "ɛ", "ɾ", "ɐ", "̃", "ʊ", "̃", " ", "p", "ˌ", "a", "l", "ˈ", "a", "v", "r", "æ", "z", " ", "d", "ʊ", " ", "p", "ˌ", "o", "ɾ", "ə", "t", "u", "ɡ", "ˈ", "e", "s", "."], "phoneme_ids": [1, 0, 24, 0, 35, 0, 120, 0, 21, 0, 38, 0, 3, 0, 121, 0, 14, 0, 92, 0, 59, 0, 66, 0, 33, 0, 120, 0, 21, 0, 3, 0, 121, 0, 14, 0, 122, 0, 3, 0, 108, 0, 120, 0, 33, 0, 24, 0, 22, 0, 39, 0, 3, 0, 23, 0, 37, 0, 3, 0, 15, 0, 30, 0, 14, 0, 31, 0, 120, 0, 27, 0, 141, 0, 22, 0, 31, 0, 8, 0, 3, 0, 19, 0, 120, 0, 61, 0, 8, 0, 3, 0, 96, 0, 120, 0, 14, 0, 8, 0, 3, 0, 120, 0, 54, 0, 23, 0, 31, 0, 21, 0, 17, 0, 100, 0, 8, 0, 3, 0, 28, 0, 120, 0, 27, 0, 30, 0, 8, 0, 3, 0, 38, 0, 120, 0, 50, 0, 141, 0, 44, 0, 66, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 121, 0, 61, 0, 92, 0, 50, 0, 141, 0, 100, 0, 141, 0, 3, 0, 28, 0, 121, 0, 14, 0, 24, 0, 120, 0, 14, 0, 34, 0, 30, 0, 39, 0, 38, 0, 3, 0, 17, 0, 100, 0, 3, 0, 28, 0, 121, 0, 27, 0, 92, 0, 59, 0, 32, 0, 33, 0, 66, 0, 120, 0, 18, 0, 31, 0, 10, 0, 2]} +{"text": "À noite, vovô Kowalsky vê o ímã cair no pé do pingüim queixoso e vovó põe açúcar no chá de tâmaras do jabuti feliz.", "phonemes": ["ˌ", "a", "ː", " ", "n", "ˈ", "o", "ɪ", "t", "ʃ", "y", ",", " ", "v", "o", "v", "ˈ", "o", " ", "k", "ˌ", "o", "w", "ˈ", "a", "ʊ", "s", "k", "i", " ", "v", "ˈ", "e", " ", "u", " ", "ˈ", "i", "m", "ɐ", "̃", " ", "k", "a", "ˈ", "i", "r", " ", "n", "ʊ", " ", "p", "ˈ", "ɛ", " ", "d", "ʊ", " ", "p", "ˌ", "i", "ŋ", "ɡ", "u", "ˈ", "i", "ŋ", " ", "k", "ˌ", "e", "ɪ", "ʃ", "ˈ", "o", "z", "w", " ", "i", " ", "v", "o", "v", "ˈ", "ɔ", " ", "p", "ˈ", "o", "̃", "j", " ", "ˌ", "a", "s", "ˈ", "u", "k", "a", "r", " ", "n", "ʊ", " ", "ʃ", "ˈ", "a", " ", "d", "ʒ", "y", " ", "t", "ˈ", "ɐ", "̃", "m", "æ", "ɾ", "æ", "z", " ", "d", "ʊ", " ", "ʒ", "ˌ", "a", "b", "u", "t", "ʃ", "ˈ", "i", " ", "f", "e", "l", "ˈ", "i", "s", "."], "phoneme_ids": [1, 0, 121, 0, 14, 0, 122, 0, 3, 0, 26, 0, 120, 0, 27, 0, 74, 0, 32, 0, 96, 0, 37, 0, 8, 0, 3, 0, 34, 0, 27, 0, 34, 0, 120, 0, 27, 0, 3, 0, 23, 0, 121, 0, 27, 0, 35, 0, 120, 0, 14, 0, 100, 0, 31, 0, 23, 0, 21, 0, 3, 0, 34, 0, 120, 0, 18, 0, 3, 0, 33, 0, 3, 0, 120, 0, 21, 0, 25, 0, 50, 0, 141, 0, 3, 0, 23, 0, 14, 0, 120, 0, 21, 0, 30, 0, 3, 0, 26, 0, 100, 0, 3, 0, 28, 0, 120, 0, 61, 0, 3, 0, 17, 0, 100, 0, 3, 0, 28, 0, 121, 0, 21, 0, 44, 0, 66, 0, 33, 0, 120, 0, 21, 0, 44, 0, 3, 0, 23, 0, 121, 0, 18, 0, 74, 0, 96, 0, 120, 0, 27, 0, 38, 0, 35, 0, 3, 0, 21, 0, 3, 0, 34, 0, 27, 0, 34, 0, 120, 0, 54, 0, 3, 0, 28, 0, 120, 0, 27, 0, 141, 0, 22, 0, 3, 0, 121, 0, 14, 0, 31, 0, 120, 0, 33, 0, 23, 0, 14, 0, 30, 0, 3, 0, 26, 0, 100, 0, 3, 0, 96, 0, 120, 0, 14, 0, 3, 0, 17, 0, 108, 0, 37, 0, 3, 0, 32, 0, 120, 0, 50, 0, 141, 0, 25, 0, 39, 0, 92, 0, 39, 0, 38, 0, 3, 0, 17, 0, 100, 0, 3, 0, 108, 0, 121, 0, 14, 0, 15, 0, 33, 0, 32, 0, 96, 0, 120, 0, 21, 0, 3, 0, 19, 0, 18, 0, 24, 0, 120, 0, 21, 0, 31, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_ru.jsonl b/etc/test_sentences/test_ru.jsonl new file mode 100644 index 0000000..4ec4674 --- /dev/null +++ b/etc/test_sentences/test_ru.jsonl @@ -0,0 +1,6 @@ +{"text": "Ра́дуга, атмосферное, оптическое и метеорологическое явление, наблюдаемое при освещении ярким источником света множества водяных капель.", "phonemes": ["r", "a", "d", "ˈ", "u", "ɡ", "a", ",", " ", "a", "t", "m", "ʌ", "s", "f", "ʲ", "ˈ", "e", "r", "n", "ʌ", "j", "ɪ", ",", " ", "ʌ", "p", "t", "ʲ", "ˈ", "i", "t", "ʃ", "ʲ", "i", "s", "k", "ʌ", "j", "ɪ", " ", "ˈ", "i", " ", "m", "ʲ", "i", "t", "ʲ", "i", "ʌ", "r", "ʌ", "ɭ", "ʌ", "ɡ", "ʲ", "ˈ", "i", "t", "ʃ", "ʲ", "i", "s", "k", "ʌ", "j", "ɪ", " ", "j", "a", "v", "ɭ", "ʲ", "ˈ", "e", "n", "ʲ", "i", "j", "ɪ", ",", " ", "n", "ʌ", "b", "ɭ", "ʲ", "u", "\"", "d", "ˈ", "ɑ", "j", "i", "m", "ʌ", "j", "ɪ", " ", "p", "r", "ʲ", "ˈ", "i", " ", "ʌ", "s", "v", "ʲ", "i", "ɕ", "ˈ", "e", "n", "ʲ", "i", "ɪ", " ", "ˈ", "j", "a", "r", "k", "ʲ", "i", "m", " ", "ɪ", "s", "t", "ˈ", "o", "t", "ʃ", "ʲ", "n", "ʲ", "i", "k", "ʌ", "m", " ", "s", "v", "ʲ", "ˈ", "e", "t", "a", " ", "m", "n", "ˈ", "o", "ʒ", "y", "s", "t", "v", "a", " ", "v", "ʌ", "d", "ʲ", "a", "n", "ˈ", "y", "x", " ", "k", "ˈ", "ɑ", "p", "ʲ", "i", "ɭ", "."], "phoneme_ids": [1, 0, 30, 0, 14, 0, 17, 0, 120, 0, 33, 0, 66, 0, 14, 0, 8, 0, 3, 0, 14, 0, 32, 0, 25, 0, 102, 0, 31, 0, 19, 0, 119, 0, 120, 0, 18, 0, 30, 0, 26, 0, 102, 0, 22, 0, 74, 0, 8, 0, 3, 0, 102, 0, 28, 0, 32, 0, 119, 0, 120, 0, 21, 0, 32, 0, 96, 0, 119, 0, 21, 0, 31, 0, 23, 0, 102, 0, 22, 0, 74, 0, 3, 0, 120, 0, 21, 0, 3, 0, 25, 0, 119, 0, 21, 0, 32, 0, 119, 0, 21, 0, 102, 0, 30, 0, 102, 0, 77, 0, 102, 0, 66, 0, 119, 0, 120, 0, 21, 0, 32, 0, 96, 0, 119, 0, 21, 0, 31, 0, 23, 0, 102, 0, 22, 0, 74, 0, 3, 0, 22, 0, 14, 0, 34, 0, 77, 0, 119, 0, 120, 0, 18, 0, 26, 0, 119, 0, 21, 0, 22, 0, 74, 0, 8, 0, 3, 0, 26, 0, 102, 0, 15, 0, 77, 0, 119, 0, 33, 0, 150, 0, 17, 0, 120, 0, 51, 0, 22, 0, 21, 0, 25, 0, 102, 0, 22, 0, 74, 0, 3, 0, 28, 0, 30, 0, 119, 0, 120, 0, 21, 0, 3, 0, 102, 0, 31, 0, 34, 0, 119, 0, 21, 0, 55, 0, 120, 0, 18, 0, 26, 0, 119, 0, 21, 0, 74, 0, 3, 0, 120, 0, 22, 0, 14, 0, 30, 0, 23, 0, 119, 0, 21, 0, 25, 0, 3, 0, 74, 0, 31, 0, 32, 0, 120, 0, 27, 0, 32, 0, 96, 0, 119, 0, 26, 0, 119, 0, 21, 0, 23, 0, 102, 0, 25, 0, 3, 0, 31, 0, 34, 0, 119, 0, 120, 0, 18, 0, 32, 0, 14, 0, 3, 0, 25, 0, 26, 0, 120, 0, 27, 0, 108, 0, 37, 0, 31, 0, 32, 0, 34, 0, 14, 0, 3, 0, 34, 0, 102, 0, 17, 0, 119, 0, 14, 0, 26, 0, 120, 0, 37, 0, 36, 0, 3, 0, 23, 0, 120, 0, 51, 0, 28, 0, 119, 0, 21, 0, 77, 0, 10, 0, 2]} +{"text": "Радуга выглядит как разноцветная дуга или окружность, составленная из цветов спектра видимого излучения.", "phonemes": ["r", "ˈ", "ɑ", "d", "u", "ɡ", "a", " ", "v", "ˈ", "y", "ɡ", "ɭ", "ʲ", "ʌ", "d", "ʲ", "i", "t", " ", "k", "ˈ", "ɑ", "k", " ", "r", "ʌ", "z", "n", "ʌ", "t", "s", "v", "ʲ", "ˈ", "e", "t", "n", "ʌ", "j", "a", " ", "d", "u", "ɡ", "ˈ", "ɑ", " ", "ˈ", "i", "ɭ", "ʲ", "ɪ", " ", "ʌ", "k", "r", "ˈ", "u", "ʒ", "n", "ʌ", "s", "t", "ʲ", ",", " ", "s", "ʌ", "s", "t", "ˈ", "ɑ", "v", "ɭ", "ʲ", "i", "n", "n", "ʌ", "j", "a", " ", "ˈ", "i", "s", " ", "t", "s", "v", "ʲ", "i", "t", "ˈ", "o", "f", " ", "s", "p", "ʲ", "ˈ", "e", "k", "t", "r", "a", " ", "v", "ʲ", "ˈ", "i", "d", "ʲ", "i", "m", "ʌ", "v", "ʌ", " ", "ɪ", "z", "ɭ", "u", "t", "ʃ", "ʲ", "ˈ", "e", "n", "ʲ", "i", "j", "a", "."], "phoneme_ids": [1, 0, 30, 0, 120, 0, 51, 0, 17, 0, 33, 0, 66, 0, 14, 0, 3, 0, 34, 0, 120, 0, 37, 0, 66, 0, 77, 0, 119, 0, 102, 0, 17, 0, 119, 0, 21, 0, 32, 0, 3, 0, 23, 0, 120, 0, 51, 0, 23, 0, 3, 0, 30, 0, 102, 0, 38, 0, 26, 0, 102, 0, 32, 0, 31, 0, 34, 0, 119, 0, 120, 0, 18, 0, 32, 0, 26, 0, 102, 0, 22, 0, 14, 0, 3, 0, 17, 0, 33, 0, 66, 0, 120, 0, 51, 0, 3, 0, 120, 0, 21, 0, 77, 0, 119, 0, 74, 0, 3, 0, 102, 0, 23, 0, 30, 0, 120, 0, 33, 0, 108, 0, 26, 0, 102, 0, 31, 0, 32, 0, 119, 0, 8, 0, 3, 0, 31, 0, 102, 0, 31, 0, 32, 0, 120, 0, 51, 0, 34, 0, 77, 0, 119, 0, 21, 0, 26, 0, 26, 0, 102, 0, 22, 0, 14, 0, 3, 0, 120, 0, 21, 0, 31, 0, 3, 0, 32, 0, 31, 0, 34, 0, 119, 0, 21, 0, 32, 0, 120, 0, 27, 0, 19, 0, 3, 0, 31, 0, 28, 0, 119, 0, 120, 0, 18, 0, 23, 0, 32, 0, 30, 0, 14, 0, 3, 0, 34, 0, 119, 0, 120, 0, 21, 0, 17, 0, 119, 0, 21, 0, 25, 0, 102, 0, 34, 0, 102, 0, 3, 0, 74, 0, 38, 0, 77, 0, 33, 0, 32, 0, 96, 0, 119, 0, 120, 0, 18, 0, 26, 0, 119, 0, 21, 0, 22, 0, 14, 0, 10, 0, 2]} +{"text": "Это те семь цветов, которые принято выделять в радуге в русской культуре, но следует иметь в виду, что на самом деле спектр непрерывен, и его цвета плавно переходят друг в друга через множество промежуточных оттенков.", "phonemes": ["ˈ", "ɛ", "t", "ʌ", " ", "t", "ʲ", "ˈ", "e", " ", "s", "ʲ", "ˈ", "e", "m", "ʲ", " ", "t", "s", "v", "ʲ", "i", "t", "ˈ", "o", "f", ",", " ", "k", "ʌ", "t", "ˈ", "o", "r", "y", "j", "ɪ", " ", "p", "r", "ʲ", "ˈ", "i", "n", "ʲ", "ʌ", "t", "ʌ", " ", "v", "y", "d", "ʲ", "i", "ɭ", "ʲ", "ˈ", "ɑ", "t", "ʲ", " ", "v", " ", "r", "ˈ", "ɑ", "d", "u", "ɡ", "ʲ", "i", " ", "v", " ", "r", "ˈ", "u", "s", "s", "k", "ʌ", "j", " ", "k", "u", "ɭ", "t", "ˈ", "u", "r", "ʲ", "i", ",", " ", "n", "o", " ", "s", "ɭ", "ʲ", "ˈ", "e", "d", "u", "j", "i", "t", " ", "ɪ", "m", "ʲ", "ˈ", "e", "t", "ʲ", " ", "v", " ", "v", "ʲ", "ˈ", "i", "d", "u", ",", " ", "ʃ", "t", "o", " ", "n", "ə", " ", "s", "ˈ", "ɑ", "m", "ʌ", "m", " ", "d", "ʲ", "ˈ", "e", "ɭ", "ʲ", "i", " ", "s", "p", "ʲ", "ˈ", "e", "k", "t", "r", " ", "n", "ʲ", "i", "p", "r", "ʲ", "i", "r", "ˈ", "y", "v", "ʲ", "i", "n", ",", " ", "ˈ", "i", " ", "j", "ɪ", "v", "ˈ", "o", " ", "t", "s", "v", "ʲ", "ˈ", "e", "t", "a", " ", "p", "ɭ", "ˈ", "ɑ", "v", "n", "ʌ", " ", "p", "ʲ", "i", "r", "ʲ", "i", "x", "ˈ", "o", "d", "ʲ", "ʌ", "t", " ", "d", "r", "ˈ", "u", "k", " ", "v", " ", "d", "r", "ˈ", "u", "ɡ", "a", " ", "t", "ʃ", "ʲ", "ˈ", "e", "r", "ʲ", "i", "s", " ", "m", "n", "ˈ", "o", "ʒ", "y", "s", "t", "v", "ʌ", " ", "p", "r", "ʌ", "m", "ʲ", "i", "ʒ", "ˈ", "u", "t", "ʌ", "t", "ʃ", "ʲ", "n", "y", "x", " ", "ʌ", "t", "ʲ", "t", "ʲ", "ˈ", "e", "n", "k", "ʌ", "f", "."], "phoneme_ids": [1, 0, 120, 0, 61, 0, 32, 0, 102, 0, 3, 0, 32, 0, 119, 0, 120, 0, 18, 0, 3, 0, 31, 0, 119, 0, 120, 0, 18, 0, 25, 0, 119, 0, 3, 0, 32, 0, 31, 0, 34, 0, 119, 0, 21, 0, 32, 0, 120, 0, 27, 0, 19, 0, 8, 0, 3, 0, 23, 0, 102, 0, 32, 0, 120, 0, 27, 0, 30, 0, 37, 0, 22, 0, 74, 0, 3, 0, 28, 0, 30, 0, 119, 0, 120, 0, 21, 0, 26, 0, 119, 0, 102, 0, 32, 0, 102, 0, 3, 0, 34, 0, 37, 0, 17, 0, 119, 0, 21, 0, 77, 0, 119, 0, 120, 0, 51, 0, 32, 0, 119, 0, 3, 0, 34, 0, 3, 0, 30, 0, 120, 0, 51, 0, 17, 0, 33, 0, 66, 0, 119, 0, 21, 0, 3, 0, 34, 0, 3, 0, 30, 0, 120, 0, 33, 0, 31, 0, 31, 0, 23, 0, 102, 0, 22, 0, 3, 0, 23, 0, 33, 0, 77, 0, 32, 0, 120, 0, 33, 0, 30, 0, 119, 0, 21, 0, 8, 0, 3, 0, 26, 0, 27, 0, 3, 0, 31, 0, 77, 0, 119, 0, 120, 0, 18, 0, 17, 0, 33, 0, 22, 0, 21, 0, 32, 0, 3, 0, 74, 0, 25, 0, 119, 0, 120, 0, 18, 0, 32, 0, 119, 0, 3, 0, 34, 0, 3, 0, 34, 0, 119, 0, 120, 0, 21, 0, 17, 0, 33, 0, 8, 0, 3, 0, 96, 0, 32, 0, 27, 0, 3, 0, 26, 0, 59, 0, 3, 0, 31, 0, 120, 0, 51, 0, 25, 0, 102, 0, 25, 0, 3, 0, 17, 0, 119, 0, 120, 0, 18, 0, 77, 0, 119, 0, 21, 0, 3, 0, 31, 0, 28, 0, 119, 0, 120, 0, 18, 0, 23, 0, 32, 0, 30, 0, 3, 0, 26, 0, 119, 0, 21, 0, 28, 0, 30, 0, 119, 0, 21, 0, 30, 0, 120, 0, 37, 0, 34, 0, 119, 0, 21, 0, 26, 0, 8, 0, 3, 0, 120, 0, 21, 0, 3, 0, 22, 0, 74, 0, 34, 0, 120, 0, 27, 0, 3, 0, 32, 0, 31, 0, 34, 0, 119, 0, 120, 0, 18, 0, 32, 0, 14, 0, 3, 0, 28, 0, 77, 0, 120, 0, 51, 0, 34, 0, 26, 0, 102, 0, 3, 0, 28, 0, 119, 0, 21, 0, 30, 0, 119, 0, 21, 0, 36, 0, 120, 0, 27, 0, 17, 0, 119, 0, 102, 0, 32, 0, 3, 0, 17, 0, 30, 0, 120, 0, 33, 0, 23, 0, 3, 0, 34, 0, 3, 0, 17, 0, 30, 0, 120, 0, 33, 0, 66, 0, 14, 0, 3, 0, 32, 0, 96, 0, 119, 0, 120, 0, 18, 0, 30, 0, 119, 0, 21, 0, 31, 0, 3, 0, 25, 0, 26, 0, 120, 0, 27, 0, 108, 0, 37, 0, 31, 0, 32, 0, 34, 0, 102, 0, 3, 0, 28, 0, 30, 0, 102, 0, 25, 0, 119, 0, 21, 0, 108, 0, 120, 0, 33, 0, 32, 0, 102, 0, 32, 0, 96, 0, 119, 0, 26, 0, 37, 0, 36, 0, 3, 0, 102, 0, 32, 0, 119, 0, 32, 0, 119, 0, 120, 0, 18, 0, 26, 0, 23, 0, 102, 0, 19, 0, 10, 0, 2]} +{"text": "Широкая электрификация южных губерний даст мощный толчок подъёму сельского хозяйства.", "phonemes": ["ʃ", "y", "r", "ˈ", "o", "k", "ʌ", "j", "a", " ", "ɛ", "ɭ", "ʲ", "i", "k", "t", "r", "ʲ", "i", "f", "ʲ", "i", "k", "ˈ", "ɑ", "t", "s", "y", "j", "a", " ", "ˈ", "j", "u", "ʒ", "n", "y", "x", " ", "ɡ", "u", "b", "ʲ", "ˈ", "e", "r", "n", "ʲ", "i", "j", " ", "d", "ˈ", "ɑ", "s", "t", " ", "m", "ˈ", "o", "ɕ", "n", "y", "j", " ", "t", "ʌ", "ɭ", "t", "ʃ", "ʲ", "ˈ", "o", "k", " ", "p", "ʌ", "d", "j", "ˈ", "ɵ", "m", "u", " ", "s", "ʲ", "ˈ", "e", "ɭ", "s", "k", "ʌ", "v", "ʌ", " ", "x", "ʌ", "ʑ", "ˈ", "ɑ", "j", "s", "t", "v", "a", "."], "phoneme_ids": [1, 0, 96, 0, 37, 0, 30, 0, 120, 0, 27, 0, 23, 0, 102, 0, 22, 0, 14, 0, 3, 0, 61, 0, 77, 0, 119, 0, 21, 0, 23, 0, 32, 0, 30, 0, 119, 0, 21, 0, 19, 0, 119, 0, 21, 0, 23, 0, 120, 0, 51, 0, 32, 0, 31, 0, 37, 0, 22, 0, 14, 0, 3, 0, 120, 0, 22, 0, 33, 0, 108, 0, 26, 0, 37, 0, 36, 0, 3, 0, 66, 0, 33, 0, 15, 0, 119, 0, 120, 0, 18, 0, 30, 0, 26, 0, 119, 0, 21, 0, 22, 0, 3, 0, 17, 0, 120, 0, 51, 0, 31, 0, 32, 0, 3, 0, 25, 0, 120, 0, 27, 0, 55, 0, 26, 0, 37, 0, 22, 0, 3, 0, 32, 0, 102, 0, 77, 0, 32, 0, 96, 0, 119, 0, 120, 0, 27, 0, 23, 0, 3, 0, 28, 0, 102, 0, 17, 0, 22, 0, 120, 0, 85, 0, 25, 0, 33, 0, 3, 0, 31, 0, 119, 0, 120, 0, 18, 0, 77, 0, 31, 0, 23, 0, 102, 0, 34, 0, 102, 0, 3, 0, 36, 0, 102, 0, 107, 0, 120, 0, 51, 0, 22, 0, 31, 0, 32, 0, 34, 0, 14, 0, 10, 0, 2]} +{"text": "Разъяренный чтец эгоистично бьёт пятью жердями шустрого фехтовальщика.", "phonemes": ["r", "ʌ", "z", "j", "j", "a", "r", "ʲ", "ˈ", "e", "n", "n", "y", "j", " ", "t", "ʃ", "ʲ", "t", "ʲ", "ˈ", "e", "t", "s", " ", "ɛ", "ɡ", "ʌ", "i", "s", "ʲ", "t", "ʲ", "ˈ", "i", "t", "ʃ", "ʲ", "n", "ʌ", " ", "b", "j", "ˈ", "ɵ", "t", " ", "p", "ʲ", "ˈ", "ɑ", "t", "ʲ", "j", "j", "u", " ", "ʒ", "y", "r", "d", "ʲ", "ˈ", "ɑ", "m", "ʲ", "ɪ", " ", "ʃ", "ˈ", "u", "s", "t", "r", "ʌ", "v", "ʌ", " ", "f", "ʲ", "i", "x", "t", "ʌ", "v", "ˈ", "ɑ", "ɭ", "ɕ", "i", "k", "a", "."], "phoneme_ids": [1, 0, 30, 0, 102, 0, 38, 0, 22, 0, 22, 0, 14, 0, 30, 0, 119, 0, 120, 0, 18, 0, 26, 0, 26, 0, 37, 0, 22, 0, 3, 0, 32, 0, 96, 0, 119, 0, 32, 0, 119, 0, 120, 0, 18, 0, 32, 0, 31, 0, 3, 0, 61, 0, 66, 0, 102, 0, 21, 0, 31, 0, 119, 0, 32, 0, 119, 0, 120, 0, 21, 0, 32, 0, 96, 0, 119, 0, 26, 0, 102, 0, 3, 0, 15, 0, 22, 0, 120, 0, 85, 0, 32, 0, 3, 0, 28, 0, 119, 0, 120, 0, 51, 0, 32, 0, 119, 0, 22, 0, 22, 0, 33, 0, 3, 0, 108, 0, 37, 0, 30, 0, 17, 0, 119, 0, 120, 0, 51, 0, 25, 0, 119, 0, 74, 0, 3, 0, 96, 0, 120, 0, 33, 0, 31, 0, 32, 0, 30, 0, 102, 0, 34, 0, 102, 0, 3, 0, 19, 0, 119, 0, 21, 0, 36, 0, 32, 0, 102, 0, 34, 0, 120, 0, 51, 0, 77, 0, 55, 0, 21, 0, 23, 0, 14, 0, 10, 0, 2]} +{"text": "В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!", "phonemes": ["f", " ", "t", "ʃ", "ʲ", "ˈ", "ɑ", "ɕ", "ʌ", "x", " ", "ˈ", "j", "u", "ɡ", "a", " ", "ʒ", "ˈ", "y", "ɭ", " ", "b", "ˈ", "y", " ", "t", "s", "ˈ", "y", "t", "r", "u", "s", "?", " ", "d", "ˈ", "ɑ", ",", " ", "n", "o", " ", "f", "a", "ɭ", "ʃ", "ˈ", "y", "v", "y", "j", " ", "ɛ", "ɡ", "ʑ", "i", "m", "p", "ɭ", "ʲ", "ˈ", "ɑ", "r", "!"], "phoneme_ids": [1, 0, 19, 0, 3, 0, 32, 0, 96, 0, 119, 0, 120, 0, 51, 0, 55, 0, 102, 0, 36, 0, 3, 0, 120, 0, 22, 0, 33, 0, 66, 0, 14, 0, 3, 0, 108, 0, 120, 0, 37, 0, 77, 0, 3, 0, 15, 0, 120, 0, 37, 0, 3, 0, 32, 0, 31, 0, 120, 0, 37, 0, 32, 0, 30, 0, 33, 0, 31, 0, 13, 0, 3, 0, 17, 0, 120, 0, 51, 0, 8, 0, 3, 0, 26, 0, 27, 0, 3, 0, 19, 0, 14, 0, 77, 0, 96, 0, 120, 0, 37, 0, 34, 0, 37, 0, 22, 0, 3, 0, 61, 0, 66, 0, 107, 0, 21, 0, 25, 0, 28, 0, 77, 0, 119, 0, 120, 0, 51, 0, 30, 0, 4, 0, 2]} diff --git a/etc/test_sentences/test_uk.jsonl b/etc/test_sentences/test_uk.jsonl new file mode 100644 index 0000000..d96b42b --- /dev/null +++ b/etc/test_sentences/test_uk.jsonl @@ -0,0 +1,7 @@ +{"text": "Весе́лка, також ра́йдуга оптичне явище в атмосфері, що являє собою одну, дві чи декілька різнокольорових дуг ,або кіл, якщо дивитися з повітря, що спостерігаються на тлі хмари, якщо вона розташована проти Сонця.", "phonemes": ["в", "е", "с", "е", "́", "л", "к", "а", ",", " ", "т", "а", "к", "о", "ж", " ", "р", "а", "́", "и", "̆", "д", "у", "г", "а", " ", "о", "п", "т", "и", "ч", "н", "е", " ", "я", "в", "и", "щ", "е", " ", "в", " ", "а", "т", "м", "о", "с", "ф", "е", "р", "і", ",", " ", "щ", "о", " ", "я", "в", "л", "я", "є", " ", "с", "о", "б", "о", "ю", " ", "о", "д", "н", "у", ",", " ", "д", "в", "і", " ", "ч", "и", " ", "д", "е", "к", "і", "л", "ь", "к", "а", " ", "р", "і", "з", "н", "о", "к", "о", "л", "ь", "о", "р", "о", "в", "и", "х", " ", "д", "у", "г", " ", ",", "а", "б", "о", " ", "к", "і", "л", ",", " ", "я", "к", "щ", "о", " ", "д", "и", "в", "и", "т", "и", "с", "я", " ", "з", " ", "п", "о", "в", "і", "т", "р", "я", ",", " ", "щ", "о", " ", "с", "п", "о", "с", "т", "е", "р", "і", "г", "а", "ю", "т", "ь", "с", "я", " ", "н", "а", " ", "т", "л", "і", " ", "х", "м", "а", "р", "и", ",", " ", "я", "к", "щ", "о", " ", "в", "о", "н", "а", " ", "р", "о", "з", "т", "а", "ш", "о", "в", "а", "н", "а", " ", "п", "р", "о", "т", "и", " ", "с", "о", "н", "ц", "я", "."], "phoneme_ids": [1, 0, 14, 0, 18, 0, 33, 0, 18, 0, 45, 0, 27, 0, 26, 0, 12, 0, 6, 0, 3, 0, 34, 0, 12, 0, 26, 0, 30, 0, 20, 0, 3, 0, 32, 0, 12, 0, 45, 0, 22, 0, 46, 0, 17, 0, 35, 0, 15, 0, 12, 0, 3, 0, 30, 0, 31, 0, 34, 0, 22, 0, 39, 0, 29, 0, 18, 0, 3, 0, 44, 0, 14, 0, 22, 0, 41, 0, 18, 0, 3, 0, 14, 0, 3, 0, 12, 0, 34, 0, 28, 0, 30, 0, 33, 0, 36, 0, 18, 0, 32, 0, 23, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 44, 0, 14, 0, 27, 0, 44, 0, 19, 0, 3, 0, 33, 0, 30, 0, 13, 0, 30, 0, 43, 0, 3, 0, 30, 0, 17, 0, 29, 0, 35, 0, 6, 0, 3, 0, 17, 0, 14, 0, 23, 0, 3, 0, 39, 0, 22, 0, 3, 0, 17, 0, 18, 0, 26, 0, 23, 0, 27, 0, 42, 0, 26, 0, 12, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 26, 0, 30, 0, 27, 0, 42, 0, 30, 0, 32, 0, 30, 0, 14, 0, 22, 0, 37, 0, 3, 0, 17, 0, 35, 0, 15, 0, 3, 0, 6, 0, 12, 0, 13, 0, 30, 0, 3, 0, 26, 0, 23, 0, 27, 0, 6, 0, 3, 0, 44, 0, 26, 0, 41, 0, 30, 0, 3, 0, 17, 0, 22, 0, 14, 0, 22, 0, 34, 0, 22, 0, 33, 0, 44, 0, 3, 0, 21, 0, 3, 0, 31, 0, 30, 0, 14, 0, 23, 0, 34, 0, 32, 0, 44, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 33, 0, 31, 0, 30, 0, 33, 0, 34, 0, 18, 0, 32, 0, 23, 0, 15, 0, 12, 0, 43, 0, 34, 0, 42, 0, 33, 0, 44, 0, 3, 0, 29, 0, 12, 0, 3, 0, 34, 0, 27, 0, 23, 0, 3, 0, 37, 0, 28, 0, 12, 0, 32, 0, 22, 0, 6, 0, 3, 0, 44, 0, 26, 0, 41, 0, 30, 0, 3, 0, 14, 0, 30, 0, 29, 0, 12, 0, 3, 0, 32, 0, 30, 0, 21, 0, 34, 0, 12, 0, 40, 0, 30, 0, 14, 0, 12, 0, 29, 0, 12, 0, 3, 0, 31, 0, 32, 0, 30, 0, 34, 0, 22, 0, 3, 0, 33, 0, 30, 0, 29, 0, 38, 0, 44, 0, 8, 0, 2]} +{"text": "Червоний колір ми бачимо з зовнішнього боку первинної веселки, а фіолетовий — із внутрішнього.", "phonemes": ["ч", "е", "р", "в", "о", "н", "и", "и", "̆", " ", "к", "о", "л", "і", "р", " ", "м", "и", " ", "б", "а", "ч", "и", "м", "о", " ", "з", " ", "з", "о", "в", "н", "і", "ш", "н", "ь", "о", "г", "о", " ", "б", "о", "к", "у", " ", "п", "е", "р", "в", "и", "н", "н", "о", "і", "̈", " ", "в", "е", "с", "е", "л", "к", "и", ",", " ", "а", " ", "ф", "і", "о", "л", "е", "т", "о", "в", "и", "и", "̆", " ", "—", " ", "і", "з", " ", "в", "н", "у", "т", "р", "і", "ш", "н", "ь", "о", "г", "о", "."], "phoneme_ids": [1, 0, 39, 0, 18, 0, 32, 0, 14, 0, 30, 0, 29, 0, 22, 0, 22, 0, 46, 0, 3, 0, 26, 0, 30, 0, 27, 0, 23, 0, 32, 0, 3, 0, 28, 0, 22, 0, 3, 0, 13, 0, 12, 0, 39, 0, 22, 0, 28, 0, 30, 0, 3, 0, 21, 0, 3, 0, 21, 0, 30, 0, 14, 0, 29, 0, 23, 0, 40, 0, 29, 0, 42, 0, 30, 0, 15, 0, 30, 0, 3, 0, 13, 0, 30, 0, 26, 0, 35, 0, 3, 0, 31, 0, 18, 0, 32, 0, 14, 0, 22, 0, 29, 0, 29, 0, 30, 0, 23, 0, 47, 0, 3, 0, 14, 0, 18, 0, 33, 0, 18, 0, 27, 0, 26, 0, 22, 0, 6, 0, 3, 0, 12, 0, 3, 0, 36, 0, 23, 0, 30, 0, 27, 0, 18, 0, 34, 0, 30, 0, 14, 0, 22, 0, 22, 0, 46, 0, 3, 0, 48, 0, 3, 0, 23, 0, 21, 0, 3, 0, 14, 0, 29, 0, 35, 0, 34, 0, 32, 0, 23, 0, 40, 0, 29, 0, 42, 0, 30, 0, 15, 0, 30, 0, 8, 0, 2]} +{"text": "Веселка пов'язана з заломленням і відбиттям ,деякою мірою і з дифракцією, сонячного світла у водяних краплях, зважених у повітрі.", "phonemes": ["в", "е", "с", "е", "л", "к", "а", " ", "п", "о", "в", "'", "я", "з", "а", "н", "а", " ", "з", " ", "з", "а", "л", "о", "м", "л", "е", "н", "н", "я", "м", " ", "і", " ", "в", "і", "д", "б", "и", "т", "т", "я", "м", " ", ",", "д", "е", "я", "к", "о", "ю", " ", "м", "і", "р", "о", "ю", " ", "і", " ", "з", " ", "д", "и", "ф", "р", "а", "к", "ц", "і", "є", "ю", ",", " ", "с", "о", "н", "я", "ч", "н", "о", "г", "о", " ", "с", "в", "і", "т", "л", "а", " ", "у", " ", "в", "о", "д", "я", "н", "и", "х", " ", "к", "р", "а", "п", "л", "я", "х", ",", " ", "з", "в", "а", "ж", "е", "н", "и", "х", " ", "у", " ", "п", "о", "в", "і", "т", "р", "і", "."], "phoneme_ids": [1, 0, 14, 0, 18, 0, 33, 0, 18, 0, 27, 0, 26, 0, 12, 0, 3, 0, 31, 0, 30, 0, 14, 0, 5, 0, 44, 0, 21, 0, 12, 0, 29, 0, 12, 0, 3, 0, 21, 0, 3, 0, 21, 0, 12, 0, 27, 0, 30, 0, 28, 0, 27, 0, 18, 0, 29, 0, 29, 0, 44, 0, 28, 0, 3, 0, 23, 0, 3, 0, 14, 0, 23, 0, 17, 0, 13, 0, 22, 0, 34, 0, 34, 0, 44, 0, 28, 0, 3, 0, 6, 0, 17, 0, 18, 0, 44, 0, 26, 0, 30, 0, 43, 0, 3, 0, 28, 0, 23, 0, 32, 0, 30, 0, 43, 0, 3, 0, 23, 0, 3, 0, 21, 0, 3, 0, 17, 0, 22, 0, 36, 0, 32, 0, 12, 0, 26, 0, 38, 0, 23, 0, 19, 0, 43, 0, 6, 0, 3, 0, 33, 0, 30, 0, 29, 0, 44, 0, 39, 0, 29, 0, 30, 0, 15, 0, 30, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 12, 0, 3, 0, 35, 0, 3, 0, 14, 0, 30, 0, 17, 0, 44, 0, 29, 0, 22, 0, 37, 0, 3, 0, 26, 0, 32, 0, 12, 0, 31, 0, 27, 0, 44, 0, 37, 0, 6, 0, 3, 0, 21, 0, 14, 0, 12, 0, 20, 0, 18, 0, 29, 0, 22, 0, 37, 0, 3, 0, 35, 0, 3, 0, 31, 0, 30, 0, 14, 0, 23, 0, 34, 0, 32, 0, 23, 0, 8, 0, 2]} +{"text": "Ці крапельки по-різному відхиляють світло різних кольорів, у результаті чого біле світло розкладається на спектр.", "phonemes": ["ц", "і", " ", "к", "р", "а", "п", "е", "л", "ь", "к", "и", " ", "п", "о", "-", "р", "і", "з", "н", "о", "м", "у", " ", "в", "і", "д", "х", "и", "л", "я", "ю", "т", "ь", " ", "с", "в", "і", "т", "л", "о", " ", "р", "і", "з", "н", "и", "х", " ", "к", "о", "л", "ь", "о", "р", "і", "в", ",", " ", "у", " ", "р", "е", "з", "у", "л", "ь", "т", "а", "т", "і", " ", "ч", "о", "г", "о", " ", "б", "і", "л", "е", " ", "с", "в", "і", "т", "л", "о", " ", "р", "о", "з", "к", "л", "а", "д", "а", "є", "т", "ь", "с", "я", " ", "н", "а", " ", "с", "п", "е", "к", "т", "р", "."], "phoneme_ids": [1, 0, 38, 0, 23, 0, 3, 0, 26, 0, 32, 0, 12, 0, 31, 0, 18, 0, 27, 0, 42, 0, 26, 0, 22, 0, 3, 0, 31, 0, 30, 0, 7, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 28, 0, 35, 0, 3, 0, 14, 0, 23, 0, 17, 0, 37, 0, 22, 0, 27, 0, 44, 0, 43, 0, 34, 0, 42, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 30, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 22, 0, 37, 0, 3, 0, 26, 0, 30, 0, 27, 0, 42, 0, 30, 0, 32, 0, 23, 0, 14, 0, 6, 0, 3, 0, 35, 0, 3, 0, 32, 0, 18, 0, 21, 0, 35, 0, 27, 0, 42, 0, 34, 0, 12, 0, 34, 0, 23, 0, 3, 0, 39, 0, 30, 0, 15, 0, 30, 0, 3, 0, 13, 0, 23, 0, 27, 0, 18, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 30, 0, 3, 0, 32, 0, 30, 0, 21, 0, 26, 0, 27, 0, 12, 0, 17, 0, 12, 0, 19, 0, 34, 0, 42, 0, 33, 0, 44, 0, 3, 0, 29, 0, 12, 0, 3, 0, 33, 0, 31, 0, 18, 0, 26, 0, 34, 0, 32, 0, 8, 0, 2]} +{"text": "Спостерігач, що стоїть спиною до джерела світла, бачить різнобарвне світіння, що виходить із простору по концентричному колу ,дузі.", "phonemes": ["с", "п", "о", "с", "т", "е", "р", "і", "г", "а", "ч", ",", " ", "щ", "о", " ", "с", "т", "о", "і", "̈", "т", "ь", " ", "с", "п", "и", "н", "о", "ю", " ", "д", "о", " ", "д", "ж", "е", "р", "е", "л", "а", " ", "с", "в", "і", "т", "л", "а", ",", " ", "б", "а", "ч", "и", "т", "ь", " ", "р", "і", "з", "н", "о", "б", "а", "р", "в", "н", "е", " ", "с", "в", "і", "т", "і", "н", "н", "я", ",", " ", "щ", "о", " ", "в", "и", "х", "о", "д", "и", "т", "ь", " ", "і", "з", " ", "п", "р", "о", "с", "т", "о", "р", "у", " ", "п", "о", " ", "к", "о", "н", "ц", "е", "н", "т", "р", "и", "ч", "н", "о", "м", "у", " ", "к", "о", "л", "у", " ", ",", "д", "у", "з", "і", "."], "phoneme_ids": [1, 0, 33, 0, 31, 0, 30, 0, 33, 0, 34, 0, 18, 0, 32, 0, 23, 0, 15, 0, 12, 0, 39, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 33, 0, 34, 0, 30, 0, 23, 0, 47, 0, 34, 0, 42, 0, 3, 0, 33, 0, 31, 0, 22, 0, 29, 0, 30, 0, 43, 0, 3, 0, 17, 0, 30, 0, 3, 0, 17, 0, 20, 0, 18, 0, 32, 0, 18, 0, 27, 0, 12, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 27, 0, 12, 0, 6, 0, 3, 0, 13, 0, 12, 0, 39, 0, 22, 0, 34, 0, 42, 0, 3, 0, 32, 0, 23, 0, 21, 0, 29, 0, 30, 0, 13, 0, 12, 0, 32, 0, 14, 0, 29, 0, 18, 0, 3, 0, 33, 0, 14, 0, 23, 0, 34, 0, 23, 0, 29, 0, 29, 0, 44, 0, 6, 0, 3, 0, 41, 0, 30, 0, 3, 0, 14, 0, 22, 0, 37, 0, 30, 0, 17, 0, 22, 0, 34, 0, 42, 0, 3, 0, 23, 0, 21, 0, 3, 0, 31, 0, 32, 0, 30, 0, 33, 0, 34, 0, 30, 0, 32, 0, 35, 0, 3, 0, 31, 0, 30, 0, 3, 0, 26, 0, 30, 0, 29, 0, 38, 0, 18, 0, 29, 0, 34, 0, 32, 0, 22, 0, 39, 0, 29, 0, 30, 0, 28, 0, 35, 0, 3, 0, 26, 0, 30, 0, 27, 0, 35, 0, 3, 0, 6, 0, 17, 0, 35, 0, 21, 0, 23, 0, 8, 0, 2]} +{"text": "Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів!", "phonemes": ["ч", "у", "є", "ш", " ", "і", "̈", "х", ",", " ", "д", "о", "ц", "ю", ",", " ", "г", "а", "?", " ", "к", "у", "м", "е", "д", "н", "а", " ", "ж", " ", "т", "и", ",", " ", "п", "р", "о", "щ", "а", "и", "̆", "с", "я", " ", "б", "е", "з", " ", "ґ", "о", "л", "ь", "ф", "і", "в", "!"], "phoneme_ids": [1, 0, 39, 0, 35, 0, 19, 0, 40, 0, 3, 0, 23, 0, 47, 0, 37, 0, 6, 0, 3, 0, 17, 0, 30, 0, 38, 0, 43, 0, 6, 0, 3, 0, 15, 0, 12, 0, 11, 0, 3, 0, 26, 0, 35, 0, 28, 0, 18, 0, 17, 0, 29, 0, 12, 0, 3, 0, 20, 0, 3, 0, 34, 0, 22, 0, 6, 0, 3, 0, 31, 0, 32, 0, 30, 0, 41, 0, 12, 0, 22, 0, 46, 0, 33, 0, 44, 0, 3, 0, 13, 0, 18, 0, 21, 0, 3, 0, 16, 0, 30, 0, 27, 0, 42, 0, 36, 0, 23, 0, 14, 0, 4, 0, 2]} +{"text": "Жебракують філософи при ґанку церкви в Гадячі, ще й шатро їхнє п’яне знаємо.", "phonemes": ["ж", "е", "б", "р", "а", "к", "у", "ю", "т", "ь", " ", "ф", "і", "л", "о", "с", "о", "ф", "и", " ", "п", "р", "и", " ", "ґ", "а", "н", "к", "у", " ", "ц", "е", "р", "к", "в", "и", " ", "в", " ", "г", "а", "д", "я", "ч", "і", ",", " ", "щ", "е", " ", "и", "̆", " ", "ш", "а", "т", "р", "о", " ", "і", "̈", "х", "н", "є", " ", "п", "’", "я", "н", "е", " ", "з", "н", "а", "є", "м", "о", "."], "phoneme_ids": [1, 0, 20, 0, 18, 0, 13, 0, 32, 0, 12, 0, 26, 0, 35, 0, 43, 0, 34, 0, 42, 0, 3, 0, 36, 0, 23, 0, 27, 0, 30, 0, 33, 0, 30, 0, 36, 0, 22, 0, 3, 0, 31, 0, 32, 0, 22, 0, 3, 0, 16, 0, 12, 0, 29, 0, 26, 0, 35, 0, 3, 0, 38, 0, 18, 0, 32, 0, 26, 0, 14, 0, 22, 0, 3, 0, 14, 0, 3, 0, 15, 0, 12, 0, 17, 0, 44, 0, 39, 0, 23, 0, 6, 0, 3, 0, 41, 0, 18, 0, 3, 0, 22, 0, 46, 0, 3, 0, 40, 0, 12, 0, 34, 0, 32, 0, 30, 0, 3, 0, 23, 0, 47, 0, 37, 0, 29, 0, 19, 0, 3, 0, 31, 0, 44, 0, 29, 0, 18, 0, 3, 0, 21, 0, 29, 0, 12, 0, 19, 0, 28, 0, 30, 0, 8, 0, 2]} diff --git a/etc/test_sentences/test_vi.jsonl b/etc/test_sentences/test_vi.jsonl new file mode 100644 index 0000000..66071f1 --- /dev/null +++ b/etc/test_sentences/test_vi.jsonl @@ -0,0 +1,9 @@ +{"text": "Cầu vồng hay mống cũng như quang phổ là hiện tượng tán sắc của các ánh sáng từ Mặt Trời khi khúc xạ và phản xạ qua các giọt nước mưa.", "phonemes": ["k", "ˈ", "ə", "2", "w", " ", "v", "ˈ", "o", "2", "ŋ", " ", "h", "ˈ", "a", "1", "j", " ", "m", "ˈ", "o", "ɜ", "ŋ", " ", "k", "ˈ", "u", "5", "ŋ", " ", "ɲ", "ˌ", "y", "1", " ", "k", "w", "ˈ", "a", "ː", "1", "ŋ", " ", "f", "ˈ", "o", "4", " ", "l", "ˌ", "a", "ː", "2", " ", "h", "ˈ", "i", "ɛ", "6", "n", " ", "t", "̪", "ˈ", "y", "ə", "6", "ŋ", " ", "t", "̪", "ˈ", "a", "ː", "ɜ", "n", " ", "s", "ˈ", "a", "ɜ", "c", " ", "k", "ˌ", "u", "ə", "4", " ", "k", "ˌ", "a", "ː", "ɜ", "c", " ", "ˈ", "e", "-", "ɜ", "ɲ", " ", "s", "ˈ", "a", "ː", "ɜ", "ŋ", " ", "t", "̪", "ˌ", "y", "2", " ", "m", "ˈ", "a", "6", "t", "̪", " ", "t", "ʃ", "ˈ", "ə", "ː", "2", "j", " ", "x", "ˌ", "i", "1", " ", "x", "ˈ", "u", "ɜ", "c", " ", "s", "ˈ", "a", "ː", "6", " ", "v", "ˌ", "a", "ː", "2", " ", "f", "ˈ", "a", "ː", "4", "n", " ", "s", "ˈ", "a", "ː", "6", " ", "k", "w", "ˈ", "a", "ː", "1", " ", "k", "ˌ", "a", "ː", "ɜ", "c", " ", "z", "ˈ", "ɔ", "6", "t", "̪", " ", "n", "ˈ", "y", "ə", "ɜ", "c", " ", "m", "ˈ", "y", "ə", "7", "."], "phoneme_ids": [1, 0, 23, 0, 120, 0, 59, 0, 132, 0, 35, 0, 3, 0, 34, 0, 120, 0, 27, 0, 132, 0, 44, 0, 3, 0, 20, 0, 120, 0, 14, 0, 131, 0, 22, 0, 3, 0, 25, 0, 120, 0, 27, 0, 62, 0, 44, 0, 3, 0, 23, 0, 120, 0, 33, 0, 135, 0, 44, 0, 3, 0, 82, 0, 121, 0, 37, 0, 131, 0, 3, 0, 23, 0, 35, 0, 120, 0, 14, 0, 122, 0, 131, 0, 44, 0, 3, 0, 19, 0, 120, 0, 27, 0, 134, 0, 3, 0, 24, 0, 121, 0, 14, 0, 122, 0, 132, 0, 3, 0, 20, 0, 120, 0, 21, 0, 61, 0, 136, 0, 26, 0, 3, 0, 32, 0, 142, 0, 120, 0, 37, 0, 59, 0, 136, 0, 44, 0, 3, 0, 32, 0, 142, 0, 120, 0, 14, 0, 122, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 14, 0, 62, 0, 16, 0, 3, 0, 23, 0, 121, 0, 33, 0, 59, 0, 134, 0, 3, 0, 23, 0, 121, 0, 14, 0, 122, 0, 62, 0, 16, 0, 3, 0, 120, 0, 18, 0, 9, 0, 62, 0, 82, 0, 3, 0, 31, 0, 120, 0, 14, 0, 122, 0, 62, 0, 44, 0, 3, 0, 32, 0, 142, 0, 121, 0, 37, 0, 132, 0, 3, 0, 25, 0, 120, 0, 14, 0, 136, 0, 32, 0, 142, 0, 3, 0, 32, 0, 96, 0, 120, 0, 59, 0, 122, 0, 132, 0, 22, 0, 3, 0, 36, 0, 121, 0, 21, 0, 131, 0, 3, 0, 36, 0, 120, 0, 33, 0, 62, 0, 16, 0, 3, 0, 31, 0, 120, 0, 14, 0, 122, 0, 136, 0, 3, 0, 34, 0, 121, 0, 14, 0, 122, 0, 132, 0, 3, 0, 19, 0, 120, 0, 14, 0, 122, 0, 134, 0, 26, 0, 3, 0, 31, 0, 120, 0, 14, 0, 122, 0, 136, 0, 3, 0, 23, 0, 35, 0, 120, 0, 14, 0, 122, 0, 131, 0, 3, 0, 23, 0, 121, 0, 14, 0, 122, 0, 62, 0, 16, 0, 3, 0, 38, 0, 120, 0, 54, 0, 136, 0, 32, 0, 142, 0, 3, 0, 26, 0, 120, 0, 37, 0, 59, 0, 62, 0, 16, 0, 3, 0, 25, 0, 120, 0, 37, 0, 59, 0, 137, 0, 10, 0, 2]} +{"text": "Ở nhiều nền văn hóa khác nhau, cầu vồng xuất hiện được coi là mang đến điềm lành cho nhân thế.", "phonemes": ["ˈ", "ə", "ː", "4", " ", "ɲ", "ˈ", "i", "ɛ", "2", "w", " ", "n", "ˈ", "e", "2", "n", " ", "v", "ˈ", "a", "1", "n", " ", "h", "w", "ˈ", "a", "ː", "ɜ", " ", "x", "ˈ", "a", "ː", "ɜ", "c", " ", "ɲ", "ˈ", "a", "7", "w", ",", " ", "k", "ˈ", "ə", "2", "w", " ", "v", "ˈ", "o", "2", "ŋ", " ", "s", "w", "ˈ", "ə", "ɜ", "t", "̪", " ", "h", "ˈ", "i", "ɛ", "6", "n", " ", "ɗ", "ˌ", "y", "ə", "6", "c", " ", "k", "ˈ", "ɔ", "1", "j", " ", "l", "ˌ", "a", "ː", "2", " ", "m", "ˈ", "a", "ː", "1", "ŋ", " ", "ɗ", "ˌ", "e", "ɜ", "n", " ", "ɗ", "ˈ", "i", "ɛ", "2", "m", " ", "l", "ˈ", "e", "-", "2", "ɲ", " ", "t", "ʃ", "ˌ", "ɔ", "1", " ", "ɲ", "ˈ", "ə", "1", "n", " ", "t", "ˈ", "e", "ɜ", "."], "phoneme_ids": [1, 0, 120, 0, 59, 0, 122, 0, 134, 0, 3, 0, 82, 0, 120, 0, 21, 0, 61, 0, 132, 0, 35, 0, 3, 0, 26, 0, 120, 0, 18, 0, 132, 0, 26, 0, 3, 0, 34, 0, 120, 0, 14, 0, 131, 0, 26, 0, 3, 0, 20, 0, 35, 0, 120, 0, 14, 0, 122, 0, 62, 0, 3, 0, 36, 0, 120, 0, 14, 0, 122, 0, 62, 0, 16, 0, 3, 0, 82, 0, 120, 0, 14, 0, 137, 0, 35, 0, 8, 0, 3, 0, 23, 0, 120, 0, 59, 0, 132, 0, 35, 0, 3, 0, 34, 0, 120, 0, 27, 0, 132, 0, 44, 0, 3, 0, 31, 0, 35, 0, 120, 0, 59, 0, 62, 0, 32, 0, 142, 0, 3, 0, 20, 0, 120, 0, 21, 0, 61, 0, 136, 0, 26, 0, 3, 0, 57, 0, 121, 0, 37, 0, 59, 0, 136, 0, 16, 0, 3, 0, 23, 0, 120, 0, 54, 0, 131, 0, 22, 0, 3, 0, 24, 0, 121, 0, 14, 0, 122, 0, 132, 0, 3, 0, 25, 0, 120, 0, 14, 0, 122, 0, 131, 0, 44, 0, 3, 0, 57, 0, 121, 0, 18, 0, 62, 0, 26, 0, 3, 0, 57, 0, 120, 0, 21, 0, 61, 0, 132, 0, 25, 0, 3, 0, 24, 0, 120, 0, 18, 0, 9, 0, 132, 0, 82, 0, 3, 0, 32, 0, 96, 0, 121, 0, 54, 0, 131, 0, 3, 0, 82, 0, 120, 0, 59, 0, 131, 0, 26, 0, 3, 0, 32, 0, 120, 0, 18, 0, 62, 0, 10, 0, 2]} +{"text": "Do bạch kim rất quý nên sẽ dùng để lắp vô xương.", "phonemes": ["z", "ˈ", "ɔ", "1", " ", "b", "ˈ", "e", "-", "6", "c", " ", "k", "ˈ", "i", "1", "m", " ", "z", "ˈ", "ə", "ɜ", "t", "̪", " ", "k", "w", "ˈ", "i", "ɜ", " ", "n", "ˌ", "e", "1", "n", " ", "s", "ˌ", "ɛ", "5", " ", "z", "ˈ", "u", "2", "ŋ", " ", "ɗ", "ˌ", "e", "4", " ", "l", "ˈ", "a", "ɜ", "p", " ", "v", "ˈ", "o", "1", " ", "s", "ˈ", "y", "ə", "7", "ŋ", "."], "phoneme_ids": [1, 0, 38, 0, 120, 0, 54, 0, 131, 0, 3, 0, 15, 0, 120, 0, 18, 0, 9, 0, 136, 0, 16, 0, 3, 0, 23, 0, 120, 0, 21, 0, 131, 0, 25, 0, 3, 0, 38, 0, 120, 0, 59, 0, 62, 0, 32, 0, 142, 0, 3, 0, 23, 0, 35, 0, 120, 0, 21, 0, 62, 0, 3, 0, 26, 0, 121, 0, 18, 0, 131, 0, 26, 0, 3, 0, 31, 0, 121, 0, 61, 0, 135, 0, 3, 0, 38, 0, 120, 0, 33, 0, 132, 0, 44, 0, 3, 0, 57, 0, 121, 0, 18, 0, 134, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 28, 0, 3, 0, 34, 0, 120, 0, 27, 0, 131, 0, 3, 0, 31, 0, 120, 0, 37, 0, 59, 0, 137, 0, 44, 0, 10, 0, 2]} +{"text": "Tâm tưởng tôi tỏ tình tới Tú từ tháng tư, thú thật, tôi thương Tâm thì tôi thì thầm thử Tâm thế thôị.", "phonemes": ["t", "̪", "ˈ", "ə", "1", "m", " ", "t", "̪", "ˈ", "y", "ə", "4", "ŋ", " ", "t", "̪", "ˈ", "o", "1", "j", " ", "t", "̪", "ˈ", "ɔ", "4", " ", "t", "̪", "ˈ", "i", "2", "ɲ", " ", "t", "̪", "ˌ", "ə", "ː", "ɜ", "j", " ", "t", "̪", "ˈ", "u", "ɜ", " ", "t", "̪", "ˌ", "y", "2", " ", "t", "ˈ", "a", "ː", "ɜ", "ŋ", " ", "t", "̪", "ˈ", "y", "7", ",", " ", "t", "ˈ", "u", "ɜ", " ", "t", "ˈ", "ə", "6", "t", "̪", ",", " ", "t", "̪", "ˈ", "o", "1", "j", " ", "t", "ˈ", "y", "ə", "1", "ŋ", " ", "t", "̪", "ˈ", "ə", "1", "m", " ", "t", "ˌ", "i", "2", " ", "t", "̪", "ˈ", "o", "1", "j", " ", "t", "ˌ", "i", "2", " ", "t", "ˈ", "ə", "2", "m", " ", "t", "ˈ", "y", "4", " ", "t", "̪", "ˈ", "ə", "1", "m", " ", "t", "ˈ", "e", "ɜ", " ", "t", "ˈ", "o", "7", "i", "6", "."], "phoneme_ids": [1, 0, 32, 0, 142, 0, 120, 0, 59, 0, 131, 0, 25, 0, 3, 0, 32, 0, 142, 0, 120, 0, 37, 0, 59, 0, 134, 0, 44, 0, 3, 0, 32, 0, 142, 0, 120, 0, 27, 0, 131, 0, 22, 0, 3, 0, 32, 0, 142, 0, 120, 0, 54, 0, 134, 0, 3, 0, 32, 0, 142, 0, 120, 0, 21, 0, 132, 0, 82, 0, 3, 0, 32, 0, 142, 0, 121, 0, 59, 0, 122, 0, 62, 0, 22, 0, 3, 0, 32, 0, 142, 0, 120, 0, 33, 0, 62, 0, 3, 0, 32, 0, 142, 0, 121, 0, 37, 0, 132, 0, 3, 0, 32, 0, 120, 0, 14, 0, 122, 0, 62, 0, 44, 0, 3, 0, 32, 0, 142, 0, 120, 0, 37, 0, 137, 0, 8, 0, 3, 0, 32, 0, 120, 0, 33, 0, 62, 0, 3, 0, 32, 0, 120, 0, 59, 0, 136, 0, 32, 0, 142, 0, 8, 0, 3, 0, 32, 0, 142, 0, 120, 0, 27, 0, 131, 0, 22, 0, 3, 0, 32, 0, 120, 0, 37, 0, 59, 0, 131, 0, 44, 0, 3, 0, 32, 0, 142, 0, 120, 0, 59, 0, 131, 0, 25, 0, 3, 0, 32, 0, 121, 0, 21, 0, 132, 0, 3, 0, 32, 0, 142, 0, 120, 0, 27, 0, 131, 0, 22, 0, 3, 0, 32, 0, 121, 0, 21, 0, 132, 0, 3, 0, 32, 0, 120, 0, 59, 0, 132, 0, 25, 0, 3, 0, 32, 0, 120, 0, 37, 0, 134, 0, 3, 0, 32, 0, 142, 0, 120, 0, 59, 0, 131, 0, 25, 0, 3, 0, 32, 0, 120, 0, 18, 0, 62, 0, 3, 0, 32, 0, 120, 0, 27, 0, 137, 0, 21, 0, 136, 0, 10, 0, 2]} +{"text": "Nồi đồng nấu ốc, nồi đất nấu ếch.", "phonemes": ["n", "ˈ", "o", "2", "j", " ", "ɗ", "ˈ", "o", "2", "ŋ", " ", "n", "ˈ", "ə", "ɜ", "w", " ", "ˈ", "o", "ɜ", "k", ",", " ", "n", "ˈ", "o", "2", "j", " ", "ɗ", "ˈ", "ə", "ɜ", "t", "̪", " ", "n", "ˈ", "ə", "ɜ", "w", " ", "ˈ", "e", "ɜ", "c", "."], "phoneme_ids": [1, 0, 26, 0, 120, 0, 27, 0, 132, 0, 22, 0, 3, 0, 57, 0, 120, 0, 27, 0, 132, 0, 44, 0, 3, 0, 26, 0, 120, 0, 59, 0, 62, 0, 35, 0, 3, 0, 120, 0, 27, 0, 62, 0, 23, 0, 8, 0, 3, 0, 26, 0, 120, 0, 27, 0, 132, 0, 22, 0, 3, 0, 57, 0, 120, 0, 59, 0, 62, 0, 32, 0, 142, 0, 3, 0, 26, 0, 120, 0, 59, 0, 62, 0, 35, 0, 3, 0, 120, 0, 18, 0, 62, 0, 16, 0, 10, 0, 2]} +{"text": "Lan leo lên lầu Lan lấy lưỡi lam. Lan lấy lộn lưỡi liềm Lan leo lên lầu lấy lại.", "phonemes": ["l", "ˈ", "a", "ː", "1", "n", " ", "l", "ˈ", "ɛ", "1", "w", " ", "l", "ˈ", "e", "1", "n", " ", "l", "ˈ", "ə", "2", "w", " ", "l", "ˈ", "a", "ː", "1", "n", " ", "l", "ˈ", "ə", "ɪ", "ɜ", " ", "l", "ˈ", "y", "ə", "5", "j", " ", "l", "ˈ", "a", "ː", "7", "m", ".", " ", "l", "ˈ", "a", "ː", "1", "n", " ", "l", "ˈ", "ə", "ɪ", "ɜ", " ", "l", "ˈ", "o", "6", "n", " ", "l", "ˈ", "y", "ə", "5", "j", " ", "l", "ˈ", "i", "ɛ", "2", "m", " ", "l", "ˈ", "a", "ː", "1", "n", " ", "l", "ˈ", "ɛ", "1", "w", " ", "l", "ˈ", "e", "1", "n", " ", "l", "ˈ", "ə", "2", "w", " ", "l", "ˈ", "ə", "ɪ", "ɜ", " ", "l", "ˈ", "a", "ː", "6", "j", "."], "phoneme_ids": [1, 0, 24, 0, 120, 0, 14, 0, 122, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 61, 0, 131, 0, 35, 0, 3, 0, 24, 0, 120, 0, 18, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 59, 0, 132, 0, 35, 0, 3, 0, 24, 0, 120, 0, 14, 0, 122, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 59, 0, 74, 0, 62, 0, 3, 0, 24, 0, 120, 0, 37, 0, 59, 0, 135, 0, 22, 0, 3, 0, 24, 0, 120, 0, 14, 0, 122, 0, 137, 0, 25, 0, 10, 0, 3, 0, 24, 0, 120, 0, 14, 0, 122, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 59, 0, 74, 0, 62, 0, 3, 0, 24, 0, 120, 0, 27, 0, 136, 0, 26, 0, 3, 0, 24, 0, 120, 0, 37, 0, 59, 0, 135, 0, 22, 0, 3, 0, 24, 0, 120, 0, 21, 0, 61, 0, 132, 0, 25, 0, 3, 0, 24, 0, 120, 0, 14, 0, 122, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 61, 0, 131, 0, 35, 0, 3, 0, 24, 0, 120, 0, 18, 0, 131, 0, 26, 0, 3, 0, 24, 0, 120, 0, 59, 0, 132, 0, 35, 0, 3, 0, 24, 0, 120, 0, 59, 0, 74, 0, 62, 0, 3, 0, 24, 0, 120, 0, 14, 0, 122, 0, 136, 0, 22, 0, 10, 0, 2]} +{"text": "Bà Ba béo bán bánh bò, bán bòn bon, bán bong bóng, bên bờ biển, bả bị bộ binh bắt ba bốn bận.", "phonemes": ["b", "ˈ", "a", "ː", "2", " ", "b", "ˈ", "a", "ː", "1", " ", "b", "ˈ", "ɛ", "ɜ", "w", " ", "b", "ˈ", "a", "ː", "ɜ", "n", " ", "b", "ˈ", "e", "-", "ɜ", "ɲ", " ", "b", "ˈ", "ɔ", "2", ",", " ", "b", "ˈ", "a", "ː", "ɜ", "n", " ", "b", "ˈ", "ɔ", "2", "n", " ", "b", "ˈ", "ɔ", "7", "n", ",", " ", "b", "ˈ", "a", "ː", "ɜ", "n", " ", "b", "ˈ", "ɔ", "1", "ŋ", " ", "b", "ˈ", "ɔ", "ɜ", "ŋ", ",", " ", "b", "ˈ", "e", "1", "n", " ", "b", "ˈ", "ə", "ː", "2", " ", "b", "ˈ", "i", "ɛ", "4", "n", ",", " ", "b", "ˈ", "a", "ː", "4", " ", "b", "ˌ", "i", "6", " ", "b", "ˈ", "o", "6", " ", "b", "ˈ", "i", "1", "ɲ", " ", "b", "ˈ", "a", "ɜ", "t", "̪", " ", "b", "ˈ", "a", "ː", "1", " ", "b", "ˈ", "o", "ɜ", "n", " ", "b", "ˈ", "ə", "6", "n", "."], "phoneme_ids": [1, 0, 15, 0, 120, 0, 14, 0, 122, 0, 132, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 131, 0, 3, 0, 15, 0, 120, 0, 61, 0, 62, 0, 35, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 62, 0, 26, 0, 3, 0, 15, 0, 120, 0, 18, 0, 9, 0, 62, 0, 82, 0, 3, 0, 15, 0, 120, 0, 54, 0, 132, 0, 8, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 62, 0, 26, 0, 3, 0, 15, 0, 120, 0, 54, 0, 132, 0, 26, 0, 3, 0, 15, 0, 120, 0, 54, 0, 137, 0, 26, 0, 8, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 62, 0, 26, 0, 3, 0, 15, 0, 120, 0, 54, 0, 131, 0, 44, 0, 3, 0, 15, 0, 120, 0, 54, 0, 62, 0, 44, 0, 8, 0, 3, 0, 15, 0, 120, 0, 18, 0, 131, 0, 26, 0, 3, 0, 15, 0, 120, 0, 59, 0, 122, 0, 132, 0, 3, 0, 15, 0, 120, 0, 21, 0, 61, 0, 134, 0, 26, 0, 8, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 134, 0, 3, 0, 15, 0, 121, 0, 21, 0, 136, 0, 3, 0, 15, 0, 120, 0, 27, 0, 136, 0, 3, 0, 15, 0, 120, 0, 21, 0, 131, 0, 82, 0, 3, 0, 15, 0, 120, 0, 14, 0, 62, 0, 32, 0, 142, 0, 3, 0, 15, 0, 120, 0, 14, 0, 122, 0, 131, 0, 3, 0, 15, 0, 120, 0, 27, 0, 62, 0, 26, 0, 3, 0, 15, 0, 120, 0, 59, 0, 136, 0, 26, 0, 10, 0, 2]} +{"text": "Chồng chị chín chết chị chưa chôn, chị chờ chuối chín chị chôn cho chồng", "phonemes": ["t", "ʃ", "ˈ", "o", "2", "ŋ", " ", "t", "ʃ", "ˈ", "i", "6", " ", "t", "ʃ", "ˈ", "i", "ɜ", "n", " ", "t", "ʃ", "ˈ", "e", "ɜ", "t", "̪", " ", "t", "ʃ", "ˈ", "i", "6", " ", "t", "ʃ", "ˌ", "y", "ə", "1", " ", "t", "ʃ", "ˈ", "o", "7", "n", ",", " ", "t", "ʃ", "ˈ", "i", "6", " ", "t", "ʃ", "ˈ", "ə", "ː", "2", " ", "t", "ʃ", "ˈ", "u", "ə", "ɜ", "j", " ", "t", "ʃ", "ˈ", "i", "ɜ", "n", " ", "t", "ʃ", "ˈ", "i", "6", " ", "t", "ʃ", "ˈ", "o", "1", "n", " ", "t", "ʃ", "ˌ", "ɔ", "1", " ", "t", "ʃ", "ˈ", "o", "2", "ŋ"], "phoneme_ids": [1, 0, 32, 0, 96, 0, 120, 0, 27, 0, 132, 0, 44, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 136, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 62, 0, 26, 0, 3, 0, 32, 0, 96, 0, 120, 0, 18, 0, 62, 0, 32, 0, 142, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 136, 0, 3, 0, 32, 0, 96, 0, 121, 0, 37, 0, 59, 0, 131, 0, 3, 0, 32, 0, 96, 0, 120, 0, 27, 0, 137, 0, 26, 0, 8, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 136, 0, 3, 0, 32, 0, 96, 0, 120, 0, 59, 0, 122, 0, 132, 0, 3, 0, 32, 0, 96, 0, 120, 0, 33, 0, 59, 0, 62, 0, 22, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 62, 0, 26, 0, 3, 0, 32, 0, 96, 0, 120, 0, 21, 0, 136, 0, 3, 0, 32, 0, 96, 0, 120, 0, 27, 0, 131, 0, 26, 0, 3, 0, 32, 0, 96, 0, 121, 0, 54, 0, 131, 0, 3, 0, 32, 0, 96, 0, 120, 0, 27, 0, 132, 0, 44, 0, 2]} +{"text": "Ðêm đen Đào đốt đèn đi đâu đó. Ðào đốt đèn đi đợi Ðài. Đài đến. Đào đòi đô, Đài đưa Đào đô, Ðào đòi Dylan Ðài đưa Dylan.", "phonemes": ["ɗ", "ˈ", "e", "1", "m", " ", "ɗ", "ˈ", "ɛ", "1", "n", " ", "ɗ", "ˈ", "a", "ː", "2", "w", " ", "ɗ", "ˈ", "o", "ɜ", "t", "̪", " ", "ɗ", "ˈ", "ɛ", "2", "n", " ", "ɗ", "ˈ", "i", "1", " ", "ɗ", "ˈ", "ə", "1", "w", " ", "ɗ", "ˈ", "ɔ", "ɜ", ".", " ", "ɗ", "ˈ", "a", "ː", "2", "w", " ", "ɗ", "ˈ", "o", "ɜ", "t", "̪", " ", "ɗ", "ˈ", "ɛ", "2", "n", " ", "ɗ", "ˈ", "i", "1", " ", "ɗ", "ˈ", "ə", "ː", "6", "j", " ", "ɗ", "ˈ", "a", "ː", "2", "j", ".", " ", "ɗ", "ˈ", "a", "ː", "2", "j", " ", "ɗ", "ˌ", "e", "ɜ", "n", ".", " ", "ɗ", "ˈ", "a", "ː", "2", "w", " ", "ɗ", "ˈ", "ɔ", "2", "j", " ", "ɗ", "ˈ", "o", "7", ",", " ", "ɗ", "ˈ", "a", "ː", "2", "j", " ", "ɗ", "ˈ", "y", "ə", "1", " ", "ɗ", "ˈ", "a", "ː", "2", "w", " ", "ɗ", "ˈ", "o", "7", ",", " ", "ɗ", "ˈ", "a", "ː", "2", "w", " ", "ɗ", "ˈ", "ɔ", "2", "j", " ", "z", "ˈ", "i", "1", "l", "a", "ː", "1", "n", " ", "ɗ", "ˈ", "a", "ː", "2", "j", " ", "ɗ", "ˈ", "y", "ə", "1", " ", "z", "ˈ", "i", "7", "l", "a", "ː", "1", "n", "."], "phoneme_ids": [1, 0, 57, 0, 120, 0, 18, 0, 131, 0, 25, 0, 3, 0, 57, 0, 120, 0, 61, 0, 131, 0, 26, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 35, 0, 3, 0, 57, 0, 120, 0, 27, 0, 62, 0, 32, 0, 142, 0, 3, 0, 57, 0, 120, 0, 61, 0, 132, 0, 26, 0, 3, 0, 57, 0, 120, 0, 21, 0, 131, 0, 3, 0, 57, 0, 120, 0, 59, 0, 131, 0, 35, 0, 3, 0, 57, 0, 120, 0, 54, 0, 62, 0, 10, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 35, 0, 3, 0, 57, 0, 120, 0, 27, 0, 62, 0, 32, 0, 142, 0, 3, 0, 57, 0, 120, 0, 61, 0, 132, 0, 26, 0, 3, 0, 57, 0, 120, 0, 21, 0, 131, 0, 3, 0, 57, 0, 120, 0, 59, 0, 122, 0, 136, 0, 22, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 22, 0, 10, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 22, 0, 3, 0, 57, 0, 121, 0, 18, 0, 62, 0, 26, 0, 10, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 35, 0, 3, 0, 57, 0, 120, 0, 54, 0, 132, 0, 22, 0, 3, 0, 57, 0, 120, 0, 27, 0, 137, 0, 8, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 22, 0, 3, 0, 57, 0, 120, 0, 37, 0, 59, 0, 131, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 35, 0, 3, 0, 57, 0, 120, 0, 27, 0, 137, 0, 8, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 35, 0, 3, 0, 57, 0, 120, 0, 54, 0, 132, 0, 22, 0, 3, 0, 38, 0, 120, 0, 21, 0, 131, 0, 24, 0, 14, 0, 122, 0, 131, 0, 26, 0, 3, 0, 57, 0, 120, 0, 14, 0, 122, 0, 132, 0, 22, 0, 3, 0, 57, 0, 120, 0, 37, 0, 59, 0, 131, 0, 3, 0, 38, 0, 120, 0, 21, 0, 137, 0, 24, 0, 14, 0, 122, 0, 131, 0, 26, 0, 10, 0, 2]} diff --git a/etc/test_sentences/test_zh-cn.jsonl b/etc/test_sentences/test_zh-cn.jsonl new file mode 100644 index 0000000..d5322be --- /dev/null +++ b/etc/test_sentences/test_zh-cn.jsonl @@ -0,0 +1,7 @@ +{"text": "彩虹,又稱天弓、天虹、絳等,簡稱虹,是氣象中的一種光學現象,當太陽 光照射到半空中的水滴,光線被折射及反射,在天空上形成拱形的七彩光譜,由外 圈至内圈呈紅、橙、黃、綠、蓝、靛蓝、堇紫七种颜色(霓虹則相反)。", "phonemes": ["t", "s", "h", "a", "i", "2", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "j", "ˈ", "i", "o", "u", "5", " ", "t", "s", ".", "h", "ˈ", "ə", "5", "ŋ", " ", "t", "h", "ˈ", "i", "ɛ", "5", "n", " ", "k", "ˈ", "o", "n", "ɡ", "5", " ", "t", "h", "ˈ", "i", "ɛ", "5", "n", " ", "t", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "t", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "t", "ˈ", "ə", "2", "ŋ", " ", "t", "ɕ", "ˈ", "i", "ɛ", "2", "n", " ", "t", "s", ".", "h", "ˈ", "ə", "5", "ŋ", " ", "t", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "5", " ", "t", "ə", "2", " ", "j", "i", "5", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "k", "w", "ˈ", "ɑ", "5", "ŋ", " ", "ɕ", "ˈ", "y", "ɛ", "ɜ", " ", "ɕ", "ˈ", "i", "ɛ", "5", "n", " ", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "t", "ˈ", "ɑ", "5", "ŋ", " ", "t", "h", "ˈ", "a", "i", "5", " ", "j", "ˈ", "i", "ɑ", "ɜ", "ŋ", " ", "k", "w", "ˈ", "ɑ", "5", "ŋ", " ", "t", "s", ".", "ˈ", "ɑ", "u", "5", " ", "s", ".", "ˈ", "o", "-", "5", " ", "t", "ˈ", "ɑ", "u", "5", " ", "p", "ˈ", "a", "5", "n", " ", "k", "h", "ˈ", "o", "n", "ɡ", "5", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "5", " ", "t", "ə", "2", " ", "s", ".", "w", "ˈ", "e", "i", "2", " ", "t", "ˈ", "i", "5", " ", "k", "w", "ˈ", "ɑ", "5", "ŋ", " ", "ɕ", "ˈ", "i", "ɛ", "5", "n", " ", "p", "ˈ", "e", "i", "5", " ", "t", "s", ".", "ˈ", "o", "-", "ɜ", " ", "s", ".", "ˈ", "o", "-", "5", " ", "t", "ɕ", "ˈ", "i", "ɜ", " ", "f", "ˈ", "a", "2", "n", " ", "s", ".", "ˈ", "o", "-", "5", " ", "t", "s", "ˈ", "a", "i", "5", " ", "t", "h", "ˈ", "i", "ɛ", "5", "n", " ", "k", "h", "ˈ", "o", "n", "ɡ", "5", " ", "s", ".", "ˈ", "ɑ", "5", "ŋ", " ", "ɕ", "ˈ", "i", "ɜ", "ŋ", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "k", "ˈ", "o", "n", "ɡ", "2", " ", "ɕ", "ˈ", "i", "ɜ", "ŋ", " ", "t", "ə", "ɜ", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "t", "s", "h", "ˈ", "a", "i", "2", " ", "k", "w", "ɑ", "5", "ŋ", "p", "h", "ˈ", "u", "2", " ", "j", "ˈ", "i", "o", "u", "ɜ", " ", "w", "ˈ", "a", "i", "5", " ", "t", "ɕ", "h", "ˈ", "y", "æ", "5", "n", " ", "t", "s", ".", "ˈ", "i", ".", "5", " ", "n", "ˈ", "e", "i", "5", " ", "t", "ɕ", "h", "ˈ", "y", "æ", "5", "n", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "χ", "w", "ˈ", "ɑ", "ɜ", "ŋ", " ", "l", "ˈ", "y", "5", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "ˈ", "i", "ɛ", "5", "n", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "ɕ", "ˈ", "i", "ɜ", "n", " ", "t", "s", "ˈ", "i", "̪", "2", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "n", "i", "ɜ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "s", "ˈ", "o", "-", "ɜ", " ", "ɕ", "i", "ɑ", "5", "ŋ", "f", "ˈ", "a", "2", "n"], "phoneme_ids": [1, 0, 32, 0, 31, 0, 20, 0, 14, 0, 21, 0, 132, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 135, 0, 44, 0, 3, 0, 32, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 23, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 120, 0, 59, 0, 132, 0, 44, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 61, 0, 132, 0, 26, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 135, 0, 44, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 59, 0, 132, 0, 3, 0, 22, 0, 21, 0, 135, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 23, 0, 35, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 55, 0, 120, 0, 37, 0, 61, 0, 62, 0, 3, 0, 55, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 20, 0, 120, 0, 14, 0, 21, 0, 135, 0, 3, 0, 22, 0, 120, 0, 21, 0, 51, 0, 62, 0, 44, 0, 3, 0, 23, 0, 35, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 51, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 120, 0, 51, 0, 33, 0, 135, 0, 3, 0, 28, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 23, 0, 20, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 59, 0, 132, 0, 3, 0, 31, 0, 10, 0, 35, 0, 120, 0, 18, 0, 21, 0, 132, 0, 3, 0, 32, 0, 120, 0, 21, 0, 135, 0, 3, 0, 23, 0, 35, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 55, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 28, 0, 120, 0, 18, 0, 21, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 9, 0, 62, 0, 3, 0, 31, 0, 10, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 62, 0, 3, 0, 19, 0, 120, 0, 14, 0, 132, 0, 26, 0, 3, 0, 31, 0, 10, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 31, 0, 120, 0, 14, 0, 21, 0, 135, 0, 3, 0, 32, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 23, 0, 20, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 55, 0, 120, 0, 21, 0, 62, 0, 44, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 23, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 55, 0, 120, 0, 21, 0, 62, 0, 44, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 32, 0, 31, 0, 20, 0, 120, 0, 14, 0, 21, 0, 132, 0, 3, 0, 23, 0, 35, 0, 51, 0, 135, 0, 44, 0, 28, 0, 20, 0, 120, 0, 33, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 62, 0, 3, 0, 35, 0, 120, 0, 14, 0, 21, 0, 135, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 37, 0, 39, 0, 135, 0, 26, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 26, 0, 120, 0, 18, 0, 21, 0, 135, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 37, 0, 39, 0, 135, 0, 26, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 62, 0, 26, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 26, 0, 21, 0, 62, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 31, 0, 120, 0, 27, 0, 9, 0, 62, 0, 3, 0, 55, 0, 21, 0, 51, 0, 135, 0, 44, 0, 19, 0, 120, 0, 14, 0, 132, 0, 26, 0, 2]} +{"text": "事實 上彩虹有无数種顏色,比如,在紅色和橙色之間還有許多種細微差別的顏色,根據 不同的文化背景被解讀爲3-9種不等,通常只用六七種顏色作為區別。", "phonemes": ["s", ".", "ˈ", "i", ".", "5", " ", "s", ".", "ˈ", "i", ".", "ɜ", " ", "s", ".", "ˈ", "ɑ", "5", "ŋ", " ", "t", "s", "h", "a", "i", "2", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "j", "ˈ", "i", "o", "u", "2", " ", "w", "ˈ", "u", "ɜ", " ", "s", ".", "ˈ", "u", "5", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "p", "ˈ", "i", "2", " ", "ʐ", "ˈ", "u", "ɜ", " ", "t", "s", "ˈ", "a", "i", "5", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "s", "ˈ", "o", "-", "5", " ", "χ", "ˈ", "o", "-", "ɜ", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "s", "ˈ", "o", "-", "5", " ", "t", "s", ".", "ˈ", "i", ".", "5", " ", "t", "ɕ", "ˈ", "i", "ɛ", "5", "n", " ", "χ", "a", "i", "ɜ", "j", "ˈ", "i", "o", "u", "ɜ", " ", "ɕ", "ˈ", "y", "2", " ", "t", "u", "o", "5", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "ɕ", "ˈ", "i", "5", " ", "w", "ˈ", "e", "i", "5", " ", "t", "s", ".", "h", "ɑ", "5", "p", "ˈ", "i", "ɛ", "ɜ", " ", "t", "ə", "ɜ", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "k", "ˈ", "ə", "5", "n", " ", "t", "ɕ", "ˈ", "y", "5", " ", "p", "ˈ", "u", "5", " ", "t", "h", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "ə", "ɜ", " ", "w", "ˈ", "u", "ə", "ɜ", "n", " ", "χ", "w", "ˈ", "ɑ", "5", " ", "p", "e", "i", "5", "t", "ɕ", "ˈ", "i", "2", "ŋ", " ", "p", "ˈ", "e", "i", "5", " ", "t", "ɕ", "ˈ", "i", "ɛ", "2", " ", "t", "ˈ", "u", "ɜ", " ", "w", "ˈ", "e", "i", "ɜ", " ", "s", "ˈ", "a", "5", "n", " ", "t", "ɕ", "ˈ", "i", "o", "u", "ɜ", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "p", "ˈ", "u", "5", " ", "t", "ˈ", "ə", "2", "ŋ", " ", "t", "h", "ˈ", "o", "n", "ɡ", "5", " ", "t", "s", ".", "h", "ˈ", "ɑ", "ɜ", "ŋ", " ", "t", "s", ".", "ˈ", "i", ".", "2", " ", "j", "ˈ", "o", "n", "ɡ", "5", " ", "l", "ˈ", "i", "o", "u", "5", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "t", "s", "u", "o", "5", "w", "ˈ", "e", "i", "ɜ", " ", "t", "ɕ", "h", "ˈ", "y", "5", " ", "p", "ˈ", "i", "ɛ", "ɜ"], "phoneme_ids": [1, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 62, 0, 3, 0, 31, 0, 10, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 31, 0, 20, 0, 14, 0, 21, 0, 132, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 132, 0, 3, 0, 35, 0, 120, 0, 33, 0, 62, 0, 3, 0, 31, 0, 10, 0, 120, 0, 33, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 28, 0, 120, 0, 21, 0, 132, 0, 3, 0, 106, 0, 120, 0, 33, 0, 62, 0, 3, 0, 32, 0, 31, 0, 120, 0, 14, 0, 21, 0, 135, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 127, 0, 120, 0, 27, 0, 9, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 127, 0, 14, 0, 21, 0, 62, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 62, 0, 3, 0, 55, 0, 120, 0, 37, 0, 132, 0, 3, 0, 32, 0, 33, 0, 27, 0, 135, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 55, 0, 120, 0, 21, 0, 135, 0, 3, 0, 35, 0, 120, 0, 18, 0, 21, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 51, 0, 135, 0, 28, 0, 120, 0, 21, 0, 61, 0, 62, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 23, 0, 120, 0, 59, 0, 135, 0, 26, 0, 3, 0, 32, 0, 55, 0, 120, 0, 37, 0, 135, 0, 3, 0, 28, 0, 120, 0, 33, 0, 135, 0, 3, 0, 32, 0, 20, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 35, 0, 120, 0, 33, 0, 59, 0, 62, 0, 26, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 135, 0, 3, 0, 28, 0, 18, 0, 21, 0, 135, 0, 32, 0, 55, 0, 120, 0, 21, 0, 132, 0, 44, 0, 3, 0, 28, 0, 120, 0, 18, 0, 21, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 61, 0, 132, 0, 3, 0, 32, 0, 120, 0, 33, 0, 62, 0, 3, 0, 35, 0, 120, 0, 18, 0, 21, 0, 62, 0, 3, 0, 31, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 28, 0, 120, 0, 33, 0, 135, 0, 3, 0, 32, 0, 120, 0, 59, 0, 132, 0, 44, 0, 3, 0, 32, 0, 20, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 132, 0, 3, 0, 22, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 24, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 31, 0, 33, 0, 27, 0, 135, 0, 35, 0, 120, 0, 18, 0, 21, 0, 62, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 37, 0, 135, 0, 3, 0, 28, 0, 120, 0, 21, 0, 61, 0, 62, 0, 2]} +{"text": "國際LGBT 聯盟的彩虹旗为六色:紅橙黃綠藍紫。", "phonemes": ["k", "ˈ", "u", "o", "ɜ", " ", "t", "ɕ", "ˈ", "i", "5", " ", "ɛ", "1", "l", "d", "ʒ", "i", "ː", "1", "b", "i", "ː", "1", "t", "i", "ː", "1", " ", "l", "ˈ", "i", "ɛ", "ɜ", "n", " ", "m", "ˈ", "ə", "ɜ", "ŋ", " ", "t", "ə", "ɜ", " ", "t", "s", "h", "a", "i", "2", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "ɕ", "h", "ˈ", "i", "ɜ", " ", "w", "ˈ", "e", "i", "5", " ", "l", "ˈ", "i", "o", "u", "5", " ", "s", "ˈ", "o", "-", "5", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "χ", "w", "ˈ", "ɑ", "ɜ", "ŋ", " ", "l", "ˈ", "y", "5", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "s", "ˈ", "i", "̪", "2"], "phoneme_ids": [1, 0, 23, 0, 120, 0, 33, 0, 27, 0, 62, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 135, 0, 3, 0, 61, 0, 131, 0, 24, 0, 17, 0, 108, 0, 21, 0, 122, 0, 131, 0, 15, 0, 21, 0, 122, 0, 131, 0, 32, 0, 21, 0, 122, 0, 131, 0, 3, 0, 24, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 25, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 32, 0, 31, 0, 20, 0, 14, 0, 21, 0, 132, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 62, 0, 3, 0, 35, 0, 120, 0, 18, 0, 21, 0, 135, 0, 3, 0, 24, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 2]} +{"text": "紅橙黃綠藍靛紫的七色說,就是在六色基礎 上將紫色分出偏藍色的靛。", "phonemes": ["χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "χ", "w", "ˈ", "ɑ", "ɜ", "ŋ", " ", "l", "ˈ", "y", "5", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "ˈ", "i", "ɛ", "5", "n", " ", "t", "s", "ˈ", "i", "̪", "2", " ", "t", "ə", "4", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "s", "ˈ", "o", "-", "5", " ", "s", ".", "w", "ˈ", "o", "5", " ", "t", "ɕ", "ˈ", "i", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "s", "ˈ", "a", "i", "5", " ", "l", "ˈ", "i", "o", "u", "5", " ", "s", "ˈ", "o", "-", "5", " ", "t", "ɕ", "i", "5", "t", "s", ".", "h", "ˈ", "u", "2", " ", "s", ".", "ɑ", "5", "ŋ", "t", "ɕ", "ˈ", "i", "ɑ", "5", "ŋ", " ", "t", "s", "ˈ", "i", "̪", "2", " ", "s", "ˈ", "o", "-", "5", " ", "f", "ˈ", "ə", "5", "n", " ", "t", "s", ".", "h", "ˈ", "u", "5", " ", "p", "h", "ˈ", "i", "ɛ", "5", "n", " ", "l", "ˈ", "a", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "t", "ə", "1", " ", "t", "ˈ", "i", "ɛ", "5", "n"], "phoneme_ids": [1, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 3, 0, 32, 0, 59, 0, 134, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 10, 0, 35, 0, 120, 0, 27, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 31, 0, 120, 0, 14, 0, 21, 0, 135, 0, 3, 0, 24, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 55, 0, 21, 0, 135, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 33, 0, 132, 0, 3, 0, 31, 0, 10, 0, 51, 0, 135, 0, 44, 0, 32, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 19, 0, 120, 0, 59, 0, 135, 0, 26, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 33, 0, 135, 0, 3, 0, 28, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 59, 0, 131, 0, 3, 0, 32, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 2]} +{"text": "傳統中國文化說的七色是:赤橙黃綠青藍紫,青色 就是偏藍的綠色。", "phonemes": ["t", "s", ".", "h", "u", "a", "ɜ", "n", "t", "h", "ˈ", "o", "n", "ɡ", "2", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "5", " ", "k", "ˈ", "u", "o", "ɜ", " ", "w", "ˈ", "u", "ə", "ɜ", "n", " ", "χ", "w", "ˈ", "ɑ", "5", " ", "s", ".", "w", "ˈ", "o", "5", " ", "t", "ə", "2", " ", "t", "ɕ", "h", "ˈ", "i", "5", " ", "s", "ˈ", "o", "-", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "s", ".", "h", "ˈ", "i", ".", "5", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "χ", "w", "ˈ", "ɑ", "ɜ", "ŋ", " ", "l", "ˈ", "y", "5", " ", "t", "ɕ", "h", "ˈ", "i", "5", "ŋ", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "s", "ˈ", "i", "̪", "2", " ", "t", "ɕ", "h", "ˈ", "i", "5", "ŋ", " ", "s", "ˈ", "o", "-", "5", " ", "t", "ɕ", "ˈ", "i", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "p", "h", "ˈ", "i", "ɛ", "5", "n", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "ə", "ɜ", " ", "l", "ˈ", "y", "5", " ", "s", "ˈ", "o", "-", "5"], "phoneme_ids": [1, 0, 32, 0, 31, 0, 10, 0, 20, 0, 33, 0, 14, 0, 62, 0, 26, 0, 32, 0, 20, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 23, 0, 120, 0, 33, 0, 27, 0, 62, 0, 3, 0, 35, 0, 120, 0, 33, 0, 59, 0, 62, 0, 26, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 135, 0, 3, 0, 31, 0, 10, 0, 35, 0, 120, 0, 27, 0, 135, 0, 3, 0, 32, 0, 59, 0, 132, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 44, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 3, 0, 32, 0, 55, 0, 20, 0, 120, 0, 21, 0, 135, 0, 44, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 28, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 2]} +{"text": "要是把橙色也分爲偏紅、偏黃的兩種就是九色。", "phonemes": ["j", "ɑ", "u", "5", "s", ".", "i", ".", "1", " ", "p", "ˈ", "ɑ", "2", " ", "t", "s", ".", "h", "ˈ", "ə", "ɜ", "ŋ", " ", "s", "ˈ", "o", "-", "5", " ", "j", "ˈ", "i", "ɛ", "2", " ", "f", "ˈ", "ə", "5", "n", " ", "w", "ˈ", "e", "i", "ɜ", " ", "p", "h", "ˈ", "i", "ɛ", "5", "n", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "p", "h", "ˈ", "i", "ɛ", "5", "n", " ", "χ", "w", "ˈ", "ɑ", "ɜ", "ŋ", " ", "t", "ə", "ɜ", " ", "l", "ˈ", "i", "ɑ", "ɜ", "ŋ", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "t", "ɕ", "ˈ", "i", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "ɕ", "ˈ", "i", "o", "u", "2", " ", "s", "ˈ", "o", "-", "5"], "phoneme_ids": [1, 0, 22, 0, 51, 0, 33, 0, 135, 0, 31, 0, 10, 0, 21, 0, 10, 0, 131, 0, 3, 0, 28, 0, 120, 0, 51, 0, 132, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 120, 0, 59, 0, 62, 0, 44, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 132, 0, 3, 0, 19, 0, 120, 0, 59, 0, 135, 0, 26, 0, 3, 0, 35, 0, 120, 0, 18, 0, 21, 0, 62, 0, 3, 0, 28, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 28, 0, 20, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 3, 0, 127, 0, 35, 0, 120, 0, 51, 0, 62, 0, 44, 0, 3, 0, 32, 0, 59, 0, 62, 0, 3, 0, 24, 0, 120, 0, 21, 0, 51, 0, 62, 0, 44, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 132, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 2]} +{"text": "三色說有:紅綠 藍,就是光學三原色,所有顏色的光都是這三種顏色混合出來的,和亚里士多 德紅、綠、紫三色說,就是兩頭加中間。", "phonemes": ["s", "ˈ", "a", "5", "n", " ", "s", "ˈ", "o", "-", "5", " ", "s", ".", "w", "ˈ", "o", "5", " ", "j", "ˈ", "i", "o", "u", "2", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "l", "ˈ", "y", "5", " ", "l", "ˈ", "a", "ɜ", "n", " ", "t", "ɕ", "ˈ", "i", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "k", "w", "ˈ", "ɑ", "5", "ŋ", " ", "ɕ", "ˈ", "y", "ɛ", "ɜ", " ", "s", "ˈ", "a", "5", "n", " ", "j", "ˈ", "y", "æ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "s", "u", "o", "ɜ", "j", "ˈ", "i", "o", "u", "2", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "t", "ə", "1", " ", "k", "w", "ˈ", "ɑ", "5", "ŋ", " ", "t", "ˈ", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "s", ".", "ˈ", "o", "-", "5", " ", "s", "ˈ", "a", "5", "n", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "2", " ", "j", "ˈ", "i", "ɛ", "ɜ", "n", " ", "s", "ˈ", "o", "-", "5", " ", "χ", "w", "ˈ", "u", "ə", "5", "n", " ", "χ", "ˈ", "o", "-", "ɜ", " ", "t", "s", ".", "h", "u", "5", "l", "a", "i", "2", " ", "t", "ə", "1", " ", "χ", "ˈ", "o", "-", "ɜ", " ", "j", "ˈ", "i", "ɑ", "5", " ", "l", "ˈ", "i", "2", " ", "s", ".", "ˈ", "i", ".", "5", " ", "t", "ˈ", "u", "o", "5", " ", "t", "ˈ", "ə", "ɜ", " ", "χ", "ˈ", "o", "n", "ɡ", "ɜ", " ", "l", "ˈ", "y", "5", " ", "t", "s", "ˈ", "i", "̪", "2", " ", "s", "ˈ", "a", "5", "n", " ", "s", "ˈ", "o", "-", "5", " ", "s", ".", "w", "ˈ", "o", "5", " ", "t", "ɕ", "ˈ", "i", "o", "u", "5", " ", "s", ".", "ˈ", "i", ".", "5", " ", "l", "ˈ", "i", "ɑ", "2", "ŋ", " ", "t", "h", "ˈ", "o", "u", "ɜ", " ", "t", "ɕ", "ˈ", "i", "ɑ", "5", " ", "t", "s", ".", "ˈ", "o", "n", "ɡ", "5", " ", "t", "ɕ", "ˈ", "i", "ɛ", "5", "n"], "phoneme_ids": [1, 0, 31, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 10, 0, 35, 0, 120, 0, 27, 0, 135, 0, 3, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 132, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 24, 0, 120, 0, 14, 0, 62, 0, 26, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 23, 0, 35, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 55, 0, 120, 0, 37, 0, 61, 0, 62, 0, 3, 0, 31, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 22, 0, 120, 0, 37, 0, 39, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 33, 0, 27, 0, 62, 0, 22, 0, 120, 0, 21, 0, 27, 0, 33, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 32, 0, 59, 0, 131, 0, 3, 0, 23, 0, 35, 0, 120, 0, 51, 0, 135, 0, 44, 0, 3, 0, 32, 0, 120, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 132, 0, 3, 0, 22, 0, 120, 0, 21, 0, 61, 0, 62, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 127, 0, 35, 0, 120, 0, 33, 0, 59, 0, 135, 0, 26, 0, 3, 0, 127, 0, 120, 0, 27, 0, 9, 0, 62, 0, 3, 0, 32, 0, 31, 0, 10, 0, 20, 0, 33, 0, 135, 0, 24, 0, 14, 0, 21, 0, 132, 0, 3, 0, 32, 0, 59, 0, 131, 0, 3, 0, 127, 0, 120, 0, 27, 0, 9, 0, 62, 0, 3, 0, 22, 0, 120, 0, 21, 0, 51, 0, 135, 0, 3, 0, 24, 0, 120, 0, 21, 0, 132, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 32, 0, 120, 0, 33, 0, 27, 0, 135, 0, 3, 0, 32, 0, 120, 0, 59, 0, 62, 0, 3, 0, 127, 0, 120, 0, 27, 0, 26, 0, 66, 0, 62, 0, 3, 0, 24, 0, 120, 0, 37, 0, 135, 0, 3, 0, 32, 0, 31, 0, 120, 0, 21, 0, 142, 0, 132, 0, 3, 0, 31, 0, 120, 0, 14, 0, 135, 0, 26, 0, 3, 0, 31, 0, 120, 0, 27, 0, 9, 0, 135, 0, 3, 0, 31, 0, 10, 0, 35, 0, 120, 0, 27, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 27, 0, 33, 0, 135, 0, 3, 0, 31, 0, 10, 0, 120, 0, 21, 0, 10, 0, 135, 0, 3, 0, 24, 0, 120, 0, 21, 0, 51, 0, 132, 0, 44, 0, 3, 0, 32, 0, 20, 0, 120, 0, 27, 0, 33, 0, 62, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 51, 0, 135, 0, 3, 0, 32, 0, 31, 0, 10, 0, 120, 0, 27, 0, 26, 0, 66, 0, 135, 0, 3, 0, 32, 0, 55, 0, 120, 0, 21, 0, 61, 0, 135, 0, 26, 0, 2]} diff --git a/etc/test_sentences/uk.txt b/etc/test_sentences/uk.txt new file mode 100644 index 0000000..db22bca --- /dev/null +++ b/etc/test_sentences/uk.txt @@ -0,0 +1,7 @@ +Весе́лка, також ра́йдуга оптичне явище в атмосфері, що являє собою одну, дві чи декілька різнокольорових дуг ,або кіл, якщо дивитися з повітря, що спостерігаються на тлі хмари, якщо вона розташована проти Сонця. +Червоний колір ми бачимо з зовнішнього боку первинної веселки, а фіолетовий — із внутрішнього. +Веселка пов'язана з заломленням і відбиттям ,деякою мірою і з дифракцією, сонячного світла у водяних краплях, зважених у повітрі. +Ці крапельки по-різному відхиляють світло різних кольорів, у результаті чого біле світло розкладається на спектр. +Спостерігач, що стоїть спиною до джерела світла, бачить різнобарвне світіння, що виходить із простору по концентричному колу ,дузі. +Чуєш їх, доцю, га? Кумедна ж ти, прощайся без ґольфів! +Жебракують філософи при ґанку церкви в Гадячі, ще й шатро їхнє п’яне знаємо. diff --git a/etc/test_sentences/vi.txt b/etc/test_sentences/vi.txt new file mode 100644 index 0000000..6554451 --- /dev/null +++ b/etc/test_sentences/vi.txt @@ -0,0 +1,9 @@ +Cầu vồng hay mống cũng như quang phổ là hiện tượng tán sắc của các ánh sáng từ Mặt Trời khi khúc xạ và phản xạ qua các giọt nước mưa. +Ở nhiều nền văn hóa khác nhau, cầu vồng xuất hiện được coi là mang đến điềm lành cho nhân thế. +Do bạch kim rất quý nên sẽ dùng để lắp vô xương. +Tâm tưởng tôi tỏ tình tới Tú từ tháng tư, thú thật, tôi thương Tâm thì tôi thì thầm thử Tâm thế thôị. +Nồi đồng nấu ốc, nồi đất nấu ếch. +Lan leo lên lầu Lan lấy lưỡi lam. Lan lấy lộn lưỡi liềm Lan leo lên lầu lấy lại. +Bà Ba béo bán bánh bò, bán bòn bon, bán bong bóng, bên bờ biển, bả bị bộ binh bắt ba bốn bận. +Chồng chị chín chết chị chưa chôn, chị chờ chuối chín chị chôn cho chồng +Ðêm đen Đào đốt đèn đi đâu đó. Ðào đốt đèn đi đợi Ðài. Đài đến. Đào đòi đô, Đài đưa Đào đô, Ðào đòi Dylan Ðài đưa Dylan. diff --git a/etc/test_sentences/zh.txt b/etc/test_sentences/zh.txt new file mode 100644 index 0000000..b6b1854 --- /dev/null +++ b/etc/test_sentences/zh.txt @@ -0,0 +1,7 @@ +彩虹,又稱天弓、天虹、絳等,簡稱虹,是氣象中的一種光學現象,當太陽 光照射到半空中的水滴,光線被折射及反射,在天空上形成拱形的七彩光譜,由外 圈至内圈呈紅、橙、黃、綠、蓝、靛蓝、堇紫七种颜色(霓虹則相反)。 +事實 上彩虹有无数種顏色,比如,在紅色和橙色之間還有許多種細微差別的顏色,根據 不同的文化背景被解讀爲3-9種不等,通常只用六七種顏色作為區別。 +國際LGBT 聯盟的彩虹旗为六色:紅橙黃綠藍紫。 +紅橙黃綠藍靛紫的七色說,就是在六色基礎 上將紫色分出偏藍色的靛。 +傳統中國文化說的七色是:赤橙黃綠青藍紫,青色 就是偏藍的綠色。 +要是把橙色也分爲偏紅、偏黃的兩種就是九色。 +三色說有:紅綠 藍,就是光學三原色,所有顏色的光都是這三種顏色混合出來的,和亚里士多 德紅、綠、紫三色說,就是兩頭加中間。 diff --git a/lib/espeak-ng-1.52-patched.tar.gz b/lib/espeak-ng-1.52-patched.tar.gz deleted file mode 100644 index 164cd97..0000000 Binary files a/lib/espeak-ng-1.52-patched.tar.gz and /dev/null differ diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 2c2d746..792b680 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -1,47 +1,37 @@ cmake_minimum_required(VERSION 3.13) -include(CheckIncludeFileCXX) - project(piper C CXX) -set(CMAKE_CXX_STANDARD 20) +find_package(PkgConfig) +pkg_check_modules(SPDLOG REQUIRED spdlog) + +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -ADD_EXECUTABLE(piper main.cpp) +ADD_EXECUTABLE(piper main.cpp piper.cpp) string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") -find_package(PkgConfig) -pkg_check_modules(ESPEAK_NG REQUIRED espeak-ng<2) - -# https://github.com/espeak-ng/pcaudiolib -check_include_file_cxx("pcaudiolib/audio.h" PCAUDIO_INCLUDE_FOUND) - -if(PCAUDIO_INCLUDE_FOUND) - option(USE_PCAUDIO "Build with pcaudiolib" ON) - if(USE_PCAUDIO) - target_compile_definitions(piper PUBLIC HAVE_PCAUDIO) - set(PCAUDIO_LIBRARIES "pcaudio") - endif() -endif() - -set(ONNXRUNTIME_ROOTDIR ${CMAKE_CURRENT_LIST_DIR}/../../lib/${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}) +set(PIPER_PHONEMIZE_ROOTDIR ${CMAKE_CURRENT_LIST_DIR}/../../lib/${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}/piper_phonemize) target_link_libraries(piper + piper_phonemize + espeak-ng onnxruntime pthread - -static-libgcc -static-libstdc++ - ${ESPEAK_NG_LIBRARIES} - ${PCAUDIO_LIBRARIES}) + ${SPDLOG_LIBRARIES}) + +if(NOT APPLE) + target_link_libraries(piper -static-libgcc -static-libstdc++) +endif() target_link_directories(piper PUBLIC - ${ESPEAK_NG_LIBRARY_DIRS} - ${ONNXRUNTIME_ROOTDIR}/lib) + ${PIPER_PHONEMIZE_ROOTDIR}/lib) target_include_directories(piper PUBLIC - ${ONNXRUNTIME_ROOTDIR}/include - ${ESPEAK_NG_INCLUDE_DIRS}) + ${PIPER_PHONEMIZE_ROOTDIR}/include + ${SPDLOG_INCLUDE_DIRS}) target_compile_options(piper PUBLIC - ${ESPEAK_NG_CFLAGS_OTHER}) + ${SPDLOG_CFLAGS_OTHER}) diff --git a/src/cpp/config.hpp b/src/cpp/config.hpp deleted file mode 100644 index b717d51..0000000 --- a/src/cpp/config.hpp +++ /dev/null @@ -1,155 +0,0 @@ -#ifndef CONFIG_H_ -#define CONFIG_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "json.hpp" -#include "utf8.h" - -using namespace std; -using json = nlohmann::json; - -namespace piper { - -typedef char32_t Phoneme; -typedef int64_t PhonemeId; -typedef int64_t SpeakerId; - -const string DefaultVoice = "en-us"; - -enum eSpeakMode { Text, TextWithPhonemes, SSML }; - -struct eSpeakConfig { - string voice = DefaultVoice; - eSpeakMode mode = Text; - - // Characters that eSpeak uses to break apart paragraphs/sentences - set clauseBreakers{U'.', U'?', U'!', U',', U';', U':'}; - - Phoneme fullStop = U'.'; - Phoneme comma = U','; - Phoneme question = U'?'; - Phoneme exclamation = U'!'; -}; - -struct PhonemizeConfig { - optional>> phonemeMap; - map> phonemeIdMap; - - PhonemeId idPad = 0; // padding (optionally interspersed) - PhonemeId idBos = 1; // beginning of sentence - PhonemeId idEos = 2; // end of sentence - bool interspersePad = true; - - optional eSpeak; -}; - -struct SynthesisConfig { - float noiseScale = 0.667f; - float lengthScale = 1.0f; - float noiseW = 0.8f; - int sampleRate = 22050; - int sampleWidth = 2; // 16-bit - int channels = 1; // mono - optional speakerId; - float sentenceSilenceSeconds = 0.2f; -}; - -struct ModelConfig { - int numSpeakers; -}; - -bool isSingleCodepoint(string s) { - return utf8::distance(s.begin(), s.end()) == 1; -} - -Phoneme getCodepoint(string s) { - utf8::iterator character_iter(s.begin(), s.begin(), s.end()); - return *character_iter; -} - -void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) { - - if (configRoot.contains("espeak")) { - if (!phonemizeConfig.eSpeak) { - phonemizeConfig.eSpeak.emplace(); - } - - auto espeakValue = configRoot["espeak"]; - if (espeakValue.contains("voice")) { - phonemizeConfig.eSpeak->voice = espeakValue["voice"].get(); - } - } - - // phoneme to [phoneme] map - if (configRoot.contains("phoneme_map")) { - if (!phonemizeConfig.phonemeMap) { - phonemizeConfig.phonemeMap.emplace(); - } - - auto phonemeMapValue = configRoot["phoneme_map"]; - for (auto &fromPhonemeItem : phonemeMapValue.items()) { - string fromPhoneme = fromPhonemeItem.key(); - if (!isSingleCodepoint(fromPhoneme)) { - throw runtime_error("Phonemes must be one codepoint (phoneme map)"); - } - - auto fromCodepoint = getCodepoint(fromPhoneme); - for (auto &toPhonemeValue : fromPhonemeItem.value()) { - string toPhoneme = toPhonemeValue.get(); - if (!isSingleCodepoint(toPhoneme)) { - throw runtime_error("Phonemes must be one codepoint (phoneme map)"); - } - - auto toCodepoint = getCodepoint(toPhoneme); - (*phonemizeConfig.phonemeMap)[fromCodepoint].push_back(toCodepoint); - } - } - } - - // phoneme to [id] map - if (configRoot.contains("phoneme_id_map")) { - auto phonemeIdMapValue = configRoot["phoneme_id_map"]; - for (auto &fromPhonemeItem : phonemeIdMapValue.items()) { - string fromPhoneme = fromPhonemeItem.key(); - if (!isSingleCodepoint(fromPhoneme)) { - throw runtime_error("Phonemes must be one codepoint (phoneme id map)"); - } - - auto fromCodepoint = getCodepoint(fromPhoneme); - for (auto &toIdValue : fromPhonemeItem.value()) { - PhonemeId toId = toIdValue.get(); - phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId); - } - } - } - -} /* parsePhonemizeConfig */ - -void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) { - - if (configRoot.contains("audio")) { - auto audioValue = configRoot["audio"]; - if (audioValue.contains("sample_rate")) { - // Default sample rate is 22050 Hz - synthesisConfig.sampleRate = audioValue.value("sample_rate", 22050); - } - } - -} /* parseSynthesisConfig */ - -void parseModelConfig(json &configRoot, ModelConfig &modelConfig) { - - modelConfig.numSpeakers = configRoot["num_speakers"].get(); - -} /* parseModelConfig */ - -} // namespace piper - -#endif // CONFIG_H_ diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index f827d47..b3fe24e 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -10,38 +11,60 @@ #include #include -#ifdef HAVE_PCAUDIO -// https://github.com/espeak-ng/pcaudiolib -#include -#endif - #ifdef _MSC_VER #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include #endif +#ifdef __APPLE__ +#include +#endif + +#include +#include + #include "piper.hpp" using namespace std; -enum OutputType { - OUTPUT_FILE, - OUTPUT_DIRECTORY, - OUTPUT_STDOUT, - OUTPUT_PLAY, - OUTPUT_RAW -}; +enum OutputType { OUTPUT_FILE, OUTPUT_DIRECTORY, OUTPUT_STDOUT, OUTPUT_RAW }; struct RunConfig { + // Path to .onnx voice file filesystem::path modelPath; + + // Path to JSON voice config file filesystem::path modelConfigPath; - OutputType outputType = OUTPUT_PLAY; - optional outputPath; + + // Type of output to produce. + // Default is to write a WAV file in the current directory. + OutputType outputType = OUTPUT_DIRECTORY; + + // Path for output + optional outputPath = filesystem::path("."); + + // Numerical id of the default speaker (multi-speaker voices) optional speakerId; + + // Amount of noise to add during audio generation optional noiseScale; + + // Speed of speaking (1 = normal, < 1 is faster, > 1 is slower) optional lengthScale; + + // Variation in phoneme lengths optional noiseW; + + // Seconds of silence to add after each sentence + optional sentenceSilenceSeconds; + + // Path to espeak-ng data directory (default is next to piper executable) + optional eSpeakDataPath; + + // Path to libtashkeel ort model + // https://github.com/mush42/libtashkeel/ + optional tashkeelModelPath; }; void parseArgs(int argc, char *argv[], RunConfig &runConfig); @@ -49,35 +72,89 @@ void rawOutputProc(vector &sharedAudioBuffer, mutex &mutAudio, condition_variable &cvAudio, bool &audioReady, bool &audioFinished); -#ifdef HAVE_PCAUDIO -void playProc(audio_object *my_audio, vector &sharedAudioBuffer, - mutex &mutAudio, condition_variable &cvAudio, bool &audioReady, - bool &audioFinished); -#endif +// ---------------------------------------------------------------------------- int main(int argc, char *argv[]) { + spdlog::set_default_logger(spdlog::stderr_color_st("piper")); + RunConfig runConfig; parseArgs(argc, argv, runConfig); - // NOTE: This won't work for Windows (need GetModuleFileName) + piper::PiperConfig piperConfig; + piper::Voice voice; + + spdlog::debug("Loading voice from {} (config={})", + runConfig.modelPath.string(), + runConfig.modelConfigPath.string()); + + auto startTime = chrono::steady_clock::now(); + loadVoice(piperConfig, runConfig.modelPath.string(), + runConfig.modelConfigPath.string(), voice, runConfig.speakerId); + auto endTime = chrono::steady_clock::now(); + spdlog::info("Loaded voice in {} second(s)", + chrono::duration(endTime - startTime).count()); + + // Get the path to the piper executable so we can locate espeak-ng-data, etc. + // next to it. #ifdef _MSC_VER auto exePath = []() { - wchar_t moduleFileName[MAX_PATH] = { 0 }; + wchar_t moduleFileName[MAX_PATH] = {0}; GetModuleFileNameW(nullptr, moduleFileName, std::size(moduleFileName)); return filesystem::path(moduleFileName); }(); +#elifdef __APPLE__ + auto exePath = []() { + char moduleFileName[PATH_MAX] = {0}; + uint32_t moduleFileNameSize = std::size(moduleFileName); + _NSGetExecutablePath(moduleFileName, &moduleFileNameSize); + return filesystem::path(moduleFileName); + }(); #else auto exePath = filesystem::canonical("/proc/self/exe"); #endif - piper::initialize(exePath.parent_path()); - piper::Voice voice; - auto startTime = chrono::steady_clock::now(); - loadVoice(runConfig.modelPath.string(), runConfig.modelConfigPath.string(), - voice, runConfig.speakerId); - auto endTime = chrono::steady_clock::now(); - auto loadSeconds = chrono::duration(endTime - startTime).count(); - cerr << "Load time: " << loadSeconds << " sec" << endl; + if (voice.phonemizeConfig.phonemeType == piper::eSpeakPhonemes) { + spdlog::debug("Voice uses eSpeak phonemes ({})", + voice.phonemizeConfig.eSpeak.voice); + + if (runConfig.eSpeakDataPath) { + // User provided path + piperConfig.eSpeakDataPath = runConfig.eSpeakDataPath.value().string(); + } else { + // Assume next to piper executable + piperConfig.eSpeakDataPath = + std::filesystem::absolute( + exePath.parent_path().append("espeak-ng-data")) + .string(); + + spdlog::debug("espeak-ng-data directory is expected at {}", + piperConfig.eSpeakDataPath); + } + } else { + // Not using eSpeak + piperConfig.useESpeak = false; + } + + // Enable libtashkeel for Arabic + if (voice.phonemizeConfig.eSpeak.voice == "ar") { + piperConfig.useTashkeel = true; + if (runConfig.tashkeelModelPath) { + // User provided path + piperConfig.tashkeelModelPath = + runConfig.tashkeelModelPath.value().string(); + } else { + // Assume next to piper executable + piperConfig.tashkeelModelPath = + std::filesystem::absolute( + exePath.parent_path().append("libtashkeel_model.ort")) + .string(); + + spdlog::debug("libtashkeel model is expected at {}", + piperConfig.tashkeelModelPath.value()); + } + } + + piper::initialize(piperConfig); // Scales if (runConfig.noiseScale) { @@ -92,36 +169,14 @@ int main(int argc, char *argv[]) { voice.synthesisConfig.noiseW = runConfig.noiseW.value(); } -#ifdef HAVE_PCAUDIO - audio_object *my_audio = nullptr; - - if (runConfig.outputType == OUTPUT_PLAY) { - // Output audio to the default audio device - my_audio = create_audio_device_object(NULL, "piper", "Text-to-Speech"); - - // TODO: Support 32-bit sample widths - auto audioFormat = AUDIO_OBJECT_FORMAT_S16LE; - int error = audio_object_open(my_audio, audioFormat, - voice.synthesisConfig.sampleRate, - voice.synthesisConfig.channels); - if (error != 0) { - throw runtime_error(audio_object_strerror(my_audio, error)); - } + if (runConfig.sentenceSilenceSeconds) { + voice.synthesisConfig.sentenceSilenceSeconds = + runConfig.sentenceSilenceSeconds.value(); } -#else - if (runConfig.outputType == OUTPUT_PLAY) { - // Cannot play audio directly - cerr << "WARNING: Piper was not compiled with pcaudiolib. Output audio " - "will be written to the current directory." - << endl; - runConfig.outputType = OUTPUT_DIRECTORY; - runConfig.outputPath = filesystem::path("."); - } -#endif if (runConfig.outputType == OUTPUT_DIRECTORY) { runConfig.outputPath = filesystem::absolute(runConfig.outputPath.value()); - cerr << "Output directory: " << runConfig.outputPath.value() << endl; + spdlog::info("Output directory: {}", runConfig.outputPath.value().string()); } string line; @@ -142,15 +197,23 @@ int main(int argc, char *argv[]) { // Output audio to automatically-named WAV file in a directory ofstream audioFile(outputPath.string(), ios::binary); - piper::textToWavFile(voice, line, audioFile, result); + piper::textToWavFile(piperConfig, voice, line, audioFile, result); cout << outputPath.string() << endl; } else if (runConfig.outputType == OUTPUT_FILE) { + // Read all of standard input before synthesizing. + // Otherwise, we would overwrite the output file for each line. + stringstream text; + text << line; + while (getline(cin, line)) { + text << " " << line; + } + // Output audio to WAV file ofstream audioFile(runConfig.outputPath.value().string(), ios::binary); - piper::textToWavFile(voice, line, audioFile, result); + piper::textToWavFile(piperConfig, voice, text.str(), audioFile, result); } else if (runConfig.outputType == OUTPUT_STDOUT) { // Output WAV to stdout - piper::textToWavFile(voice, line, cout, result); + piper::textToWavFile(piperConfig, voice, line, cout, result); } else if (runConfig.outputType == OUTPUT_RAW) { // Raw output to stdout mutex mutAudio; @@ -174,7 +237,8 @@ int main(int argc, char *argv[]) { cvAudio.notify_one(); } }; - piper::textToAudio(voice, line, audioBuffer, result, audioCallback); + piper::textToAudio(piperConfig, voice, line, audioBuffer, result, + audioCallback); // Signal thread that there is no more audio { @@ -185,65 +249,22 @@ int main(int argc, char *argv[]) { } // Wait for audio output to finish - cerr << "Waiting for audio..." << endl; + spdlog::info("Waiting for audio to finish playing..."); rawOutputThread.join(); - } else if (runConfig.outputType == OUTPUT_PLAY) { -#ifdef HAVE_PCAUDIO - mutex mutAudio; - condition_variable cvAudio; - bool audioReady = false; - bool audioFinished = false; - vector audioBuffer; - vector sharedAudioBuffer; - - thread playThread(playProc, my_audio, ref(sharedAudioBuffer), - ref(mutAudio), ref(cvAudio), ref(audioReady), - ref(audioFinished)); - auto audioCallback = [&audioBuffer, &sharedAudioBuffer, &mutAudio, - &cvAudio, &audioReady]() { - // Signal thread that audio is ready - { - unique_lock lockAudio(mutAudio); - copy(audioBuffer.begin(), audioBuffer.end(), - back_inserter(sharedAudioBuffer)); - audioReady = true; - cvAudio.notify_one(); - } - }; - piper::textToAudio(voice, line, audioBuffer, result, audioCallback); - - // Signal thread that there is no more audio - { - unique_lock lockAudio(mutAudio); - audioReady = true; - audioFinished = true; - cvAudio.notify_one(); - } - - // Wait for audio output to finish - cerr << "Waiting for audio..." << endl; - playThread.join(); -#else - throw runtime_error("Cannot play audio! Not compiled with pcaudiolib."); -#endif } - cerr << "Real-time factor: " << result.realTimeFactor - << " (infer=" << result.inferSeconds - << " sec, audio=" << result.audioSeconds << " sec)" << endl; + spdlog::info("Real-time factor: {} (infer={} sec, audio={} sec)", + result.realTimeFactor, result.inferSeconds, + result.audioSeconds); } - piper::terminate(); - -#ifdef HAVE_PCAUDIO - audio_object_close(my_audio); - audio_object_destroy(my_audio); - my_audio = nullptr; -#endif + piper::terminate(piperConfig); return EXIT_SUCCESS; } +// ---------------------------------------------------------------------------- + void rawOutputProc(vector &sharedAudioBuffer, mutex &mutAudio, condition_variable &cvAudio, bool &audioReady, bool &audioFinished) { @@ -275,42 +296,7 @@ void rawOutputProc(vector &sharedAudioBuffer, mutex &mutAudio, } // rawOutputProc -#ifdef HAVE_PCAUDIO -void playProc(audio_object *my_audio, vector &sharedAudioBuffer, - mutex &mutAudio, condition_variable &cvAudio, bool &audioReady, - bool &audioFinished) { - vector internalAudioBuffer; - while (true) { - { - unique_lock lockAudio{mutAudio}; - cvAudio.wait(lockAudio, [&audioReady] { return audioReady; }); - - if (sharedAudioBuffer.empty() && audioFinished) { - break; - } - - copy(sharedAudioBuffer.begin(), sharedAudioBuffer.end(), - back_inserter(internalAudioBuffer)); - - sharedAudioBuffer.clear(); - - if (!audioFinished) { - audioReady = false; - } - } - - int error = - audio_object_write(my_audio, (const char *)internalAudioBuffer.data(), - sizeof(int16_t) * internalAudioBuffer.size()); - if (error != 0) { - throw runtime_error(audio_object_strerror(my_audio, error)); - } - audio_object_flush(my_audio); - internalAudioBuffer.clear(); - } - -} // playProc -#endif +// ---------------------------------------------------------------------------- void printUsage(char *argv[]) { cerr << endl; @@ -332,11 +318,18 @@ void printUsage(char *argv[]) { "becomes available" << endl; cerr << " -s NUM --speaker NUM id of speaker (default: 0)" << endl; - cerr << " --noise-scale NUM generator noise (default: 0.667)" + cerr << " --noise_scale NUM generator noise (default: 0.667)" << endl; - cerr << " --length-scale NUM phoneme length (default: 1.0)" + cerr << " --length_scale NUM phoneme length (default: 1.0)" << endl; - cerr << " --noise-w NUM phonene width noise (default: 0.8)" + cerr << " --noise_w NUM phoneme width noise (default: 0.8)" + << endl; + cerr << " --silence_seconds NUM seconds of silence after each " + "sentence (default: 0.2)" + << endl; + cerr << " --espeak_data DIR path to espeak-ng data directory" + << endl; + cerr << " --debug print DEBUG messages to the console" << endl; cerr << endl; } @@ -361,7 +354,8 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) { } else if (arg == "-c" || arg == "--config") { ensureArg(argc, argv, i); modelConfigPath = filesystem::path(argv[++i]); - } else if (arg == "-f" || arg == "--output_file") { + } else if (arg == "-f" || arg == "--output_file" || + arg == "--output-file") { ensureArg(argc, argv, i); std::string filePath = argv[++i]; if (filePath == "-") { @@ -371,24 +365,36 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) { runConfig.outputType = OUTPUT_FILE; runConfig.outputPath = filesystem::path(filePath); } - } else if (arg == "-d" || arg == "--output_dir") { + } else if (arg == "-d" || arg == "--output_dir" || arg == "output-dir") { ensureArg(argc, argv, i); runConfig.outputType = OUTPUT_DIRECTORY; runConfig.outputPath = filesystem::path(argv[++i]); - } else if (arg == "--output_raw") { + } else if (arg == "--output_raw" || arg == "--output-raw") { runConfig.outputType = OUTPUT_RAW; } else if (arg == "-s" || arg == "--speaker") { ensureArg(argc, argv, i); runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]); - } else if (arg == "--noise-scale") { + } else if (arg == "--noise_scale" || arg == "--noise-scale") { ensureArg(argc, argv, i); runConfig.noiseScale = stof(argv[++i]); - } else if (arg == "--length-scale") { + } else if (arg == "--length_scale" || arg == "--length-scale") { ensureArg(argc, argv, i); runConfig.lengthScale = stof(argv[++i]); - } else if (arg == "--noise-w") { + } else if (arg == "--noise_w" || arg == "--noise-w") { ensureArg(argc, argv, i); runConfig.noiseW = stof(argv[++i]); + } else if (arg == "--sentence_silence" || arg == "--sentence-silence") { + ensureArg(argc, argv, i); + runConfig.sentenceSilenceSeconds = stof(argv[++i]); + } else if (arg == "--espeak_data" || arg == "--espeak-data") { + ensureArg(argc, argv, i); + runConfig.eSpeakDataPath = filesystem::path(argv[++i]); + } else if (arg == "--tashkeel_model" || arg == "--tashkeel-model") { + ensureArg(argc, argv, i); + runConfig.tashkeelModelPath = filesystem::path(argv[++i]); + } else if (arg == "--debug") { + // Set DEBUG logging + spdlog::set_level(spdlog::level::debug); } else if (arg == "-h" || arg == "--help") { printUsage(argv); exit(0); diff --git a/src/cpp/model.hpp b/src/cpp/model.hpp deleted file mode 100644 index 01070a7..0000000 --- a/src/cpp/model.hpp +++ /dev/null @@ -1,53 +0,0 @@ -#ifndef MODEL_H_ -#define MODEL_H_ - -#include - -#include - -using namespace std; - -namespace piper { -const string instanceName{"piper"}; - -struct ModelSession { - Ort::Session onnx; - Ort::AllocatorWithDefaultOptions allocator; - Ort::SessionOptions options; - Ort::Env env; - - ModelSession() : onnx(nullptr){}; -}; - -void loadModel(string modelPath, ModelSession &session) { - - session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, - instanceName.c_str()); - session.env.DisableTelemetryEvents(); - - // Slows down performance by ~2x - // session.options.SetIntraOpNumThreads(1); - - // Roughly doubles load time for no visible inference benefit - // session.options.SetGraphOptimizationLevel( - // GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - - session.options.SetGraphOptimizationLevel( - GraphOptimizationLevel::ORT_DISABLE_ALL); - - // Slows down performance very slightly - // session.options.SetExecutionMode(ExecutionMode::ORT_PARALLEL); - - session.options.DisableCpuMemArena(); - session.options.DisableMemPattern(); - session.options.DisableProfiling(); - - auto startTime = chrono::steady_clock::now(); - session.onnx = Ort::Session(session.env, filesystem::path(modelPath).c_str(), session.options); - auto endTime = chrono::steady_clock::now(); - auto loadDuration = chrono::duration(endTime - startTime); -} - -} // namespace piper - -#endif // MODEL_H_ diff --git a/src/cpp/phonemize.hpp b/src/cpp/phonemize.hpp deleted file mode 100644 index 80e166d..0000000 --- a/src/cpp/phonemize.hpp +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef PHONEMIZE_H_ -#define PHONEMIZE_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "config.hpp" -#include "utf8.h" - -#define CLAUSE_INTONATION_FULL_STOP 0x00000000 -#define CLAUSE_INTONATION_COMMA 0x00001000 -#define CLAUSE_INTONATION_QUESTION 0x00002000 -#define CLAUSE_INTONATION_EXCLAMATION 0x00003000 - -#define CLAUSE_TYPE_SENTENCE 0x00080000 - -using namespace std; - -namespace piper { - -// Text to phonemes using eSpeak-ng -void phonemize(string text, PhonemizeConfig &phonemizeConfig, - vector> &phonemes) { - if (!phonemizeConfig.eSpeak) { - throw runtime_error("Missing eSpeak config"); - } - - auto voice = phonemizeConfig.eSpeak->voice; - int result = espeak_SetVoiceByName(voice.c_str()); - if (result != 0) { - throw runtime_error("Failed to set eSpeak-ng voice"); - } - - // Modified by eSpeak - string textCopy(text); - - utf8::iterator textIter(textCopy.begin(), textCopy.begin(), textCopy.end()); - utf8::iterator textIterEnd(textCopy.end(), textCopy.begin(), textCopy.end()); - vector textClauseBreakers; - - // Identify clause breakers in the sentence, since eSpeak removes them during - // phonemization. - // - // This will unfortunately do the wrong thing with abbreviations, etc. - while (textIter != textIterEnd) { - auto codepoint = *textIter; - if (phonemizeConfig.eSpeak->clauseBreakers.contains(codepoint)) { - textClauseBreakers.push_back(codepoint); - } - - textIter++; - } - - vector *sentencePhonemes = nullptr; - const char *inputTextPointer = textCopy.c_str(); - int terminator = 0; - - while (inputTextPointer != NULL) { - // Modified espeak-ng API to get access to clause terminator - string clausePhonemes( - espeak_TextToPhonemes2((const void **)&inputTextPointer, - /*textmode*/ espeakCHARS_AUTO, - /*phonememode = IPA*/ 0x02, - &terminator)); - - utf8::iterator phonemeIter(clausePhonemes.begin(), clausePhonemes.begin(), - clausePhonemes.end()); - utf8::iterator phonemeEnd(clausePhonemes.end(), clausePhonemes.begin(), - clausePhonemes.end()); - - if (!sentencePhonemes) { - // Start new sentence - phonemes.emplace_back(); - sentencePhonemes = &phonemes[phonemes.size() - 1]; - } - - sentencePhonemes->insert(sentencePhonemes->end(), phonemeIter, phonemeEnd); - - // Add appropriate puntuation depending on terminator type - int intonation = terminator & 0x0000F000; - if (intonation == CLAUSE_INTONATION_FULL_STOP) { - sentencePhonemes->push_back(phonemizeConfig.eSpeak->fullStop); - } else if (intonation == CLAUSE_INTONATION_COMMA) { - sentencePhonemes->push_back(phonemizeConfig.eSpeak->comma); - } else if (intonation == CLAUSE_INTONATION_QUESTION) { - sentencePhonemes->push_back(phonemizeConfig.eSpeak->question); - } else if (intonation == CLAUSE_INTONATION_EXCLAMATION) { - sentencePhonemes->push_back(phonemizeConfig.eSpeak->exclamation); - } - - if ((terminator & CLAUSE_TYPE_SENTENCE) == CLAUSE_TYPE_SENTENCE) { - // End of sentence - sentencePhonemes = nullptr; - } - - } // while inputTextPointer != NULL - -} /* phonemize */ - -// Phonemes to ids using JSON map -void phonemes2ids(vector &phonemes, PhonemizeConfig &phonemizeConfig, - vector &phonemeIds) { - if (phonemes.empty()) { - throw runtime_error("No phonemes"); - } - - phonemeIds.push_back(phonemizeConfig.idBos); - if (phonemizeConfig.interspersePad) { - phonemeIds.push_back(phonemizeConfig.idPad); - } - - for (auto phoneme = phonemes.begin(); phoneme != phonemes.end(); phoneme++) { - if (phonemizeConfig.phonemeIdMap.contains(*phoneme)) { - for (auto id : phonemizeConfig.phonemeIdMap[*phoneme]) { - phonemeIds.push_back(id); - - if (phonemizeConfig.interspersePad) { - phonemeIds.push_back(phonemizeConfig.idPad); - } - } - } - } - - phonemeIds.push_back(phonemizeConfig.idEos); - -} /* phonemes2ids */ - -} // namespace piper - -#endif // PHONEMIZE_H_ diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp new file mode 100644 index 0000000..ede7bbb --- /dev/null +++ b/src/cpp/piper.cpp @@ -0,0 +1,514 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "piper.hpp" +#include "utf8.h" +#include "wavfile.hpp" + +namespace piper { + +// Maximum value for 16-bit signed WAV sample +const float MAX_WAV_VALUE = 32767.0f; + +const std::string instanceName{"piper"}; + +// True if the string is a single UTF-8 codepoint +bool isSingleCodepoint(std::string s) { + return utf8::distance(s.begin(), s.end()) == 1; +} + +// Get the first UTF-8 codepoint of a string +Phoneme getCodepoint(std::string s) { + utf8::iterator character_iter(s.begin(), s.begin(), s.end()); + return *character_iter; +} + +// Load JSON config information for phonemization +void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) { + // { + // "espeak": { + // "voice": "" + // }, + // "phoneme_type": "", + // "phoneme_map": { + // "": ["", "", ...] + // }, + // "phoneme_id_map": { + // "": [, , ...] + // } + // } + + if (configRoot.contains("espeak")) { + auto espeakValue = configRoot["espeak"]; + if (espeakValue.contains("voice")) { + phonemizeConfig.eSpeak.voice = espeakValue["voice"].get(); + } + } + + if (configRoot.contains("phoneme_type")) { + auto phonemeTypeStr = configRoot["phoneme_type"].get(); + if (phonemeTypeStr == "text") { + phonemizeConfig.phonemeType = TextPhonemes; + } + } + + // phoneme to [id] map + // Maps phonemes to one or more phoneme ids (required). + if (configRoot.contains("phoneme_id_map")) { + auto phonemeIdMapValue = configRoot["phoneme_id_map"]; + for (auto &fromPhonemeItem : phonemeIdMapValue.items()) { + std::string fromPhoneme = fromPhonemeItem.key(); + if (!isSingleCodepoint(fromPhoneme)) { + throw std::runtime_error( + "Phonemes must be one codepoint (phoneme id map)"); + } + + auto fromCodepoint = getCodepoint(fromPhoneme); + for (auto &toIdValue : fromPhonemeItem.value()) { + PhonemeId toId = toIdValue.get(); + phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId); + } + } + } + + // phoneme to [phoneme] map + // Maps phonemes to one or more other phonemes (not normally used). + if (configRoot.contains("phoneme_map")) { + if (!phonemizeConfig.phonemeMap) { + phonemizeConfig.phonemeMap.emplace(); + } + + auto phonemeMapValue = configRoot["phoneme_map"]; + for (auto &fromPhonemeItem : phonemeMapValue.items()) { + std::string fromPhoneme = fromPhonemeItem.key(); + if (!isSingleCodepoint(fromPhoneme)) { + throw std::runtime_error( + "Phonemes must be one codepoint (phoneme map)"); + } + + auto fromCodepoint = getCodepoint(fromPhoneme); + for (auto &toPhonemeValue : fromPhonemeItem.value()) { + std::string toPhoneme = toPhonemeValue.get(); + if (!isSingleCodepoint(toPhoneme)) { + throw std::runtime_error( + "Phonemes must be one codepoint (phoneme map)"); + } + + auto toCodepoint = getCodepoint(toPhoneme); + (*phonemizeConfig.phonemeMap)[fromCodepoint].push_back(toCodepoint); + } + } + } + +} /* parsePhonemizeConfig */ + +// Load JSON config for audio synthesis +void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) { + // { + // "audio": { + // "sample_rate": 22050 + // }, + // "inference": { + // "noise_scale": 0.667, + // "length_scale": 1, + // "noise_w": 0.8 + // } + // } + + if (configRoot.contains("audio")) { + auto audioValue = configRoot["audio"]; + if (audioValue.contains("sample_rate")) { + // Default sample rate is 22050 Hz + synthesisConfig.sampleRate = audioValue.value("sample_rate", 22050); + } + } + + if (configRoot.contains("inference")) { + // Overrides default inference settings + auto inferenceValue = configRoot["inference"]; + if (inferenceValue.contains("noise_scale")) { + synthesisConfig.noiseScale = inferenceValue.value("noise_scale", 0.667f); + } + + if (inferenceValue.contains("length_scale")) { + synthesisConfig.lengthScale = inferenceValue.value("length_scale", 1.0f); + } + + if (inferenceValue.contains("noise_w")) { + synthesisConfig.noiseW = inferenceValue.value("noise_w", 0.8f); + } + } + +} /* parseSynthesisConfig */ + +void parseModelConfig(json &configRoot, ModelConfig &modelConfig) { + + modelConfig.numSpeakers = configRoot["num_speakers"].get(); + +} /* parseModelConfig */ + +void initialize(PiperConfig &config) { + if (config.useESpeak) { + // Set up espeak-ng for calling espeak_TextToPhonemesWithTerminator + // See: https://github.com/rhasspy/espeak-ng + spdlog::debug("Initializing eSpeak"); + int result = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, + /*buflength*/ 0, + /*path*/ config.eSpeakDataPath.c_str(), + /*options*/ 0); + if (result < 0) { + throw std::runtime_error("Failed to initialize eSpeak-ng"); + } + + spdlog::debug("Initialized eSpeak"); + } + + // Load onnx model for libtashkeel + // https://github.com/mush42/libtashkeel/ + if (config.useTashkeel) { + spdlog::debug("Using libtashkeel for diacritization"); + if (!config.tashkeelModelPath) { + throw std::runtime_error("No path to libtashkeel model"); + } + + spdlog::debug("Loading libtashkeel model from {}", + config.tashkeelModelPath.value()); + config.tashkeelState = std::make_unique(); + tashkeel::tashkeel_load(config.tashkeelModelPath.value(), + *config.tashkeelState); + spdlog::debug("Initialized libtashkeel"); + } + + spdlog::info("Initialized piper"); +} + +void terminate(PiperConfig &config) { + if (config.useESpeak) { + // Clean up espeak-ng + spdlog::debug("Terminating eSpeak"); + espeak_Terminate(); + spdlog::debug("Terminated eSpeak"); + } + + spdlog::info("Terminated piper"); +} + +void loadModel(std::string modelPath, ModelSession &session) { + spdlog::debug("Loading onnx model from {}", modelPath); + session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, + instanceName.c_str()); + session.env.DisableTelemetryEvents(); + + // Slows down performance by ~2x + // session.options.SetIntraOpNumThreads(1); + + // Roughly doubles load time for no visible inference benefit + // session.options.SetGraphOptimizationLevel( + // GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + + session.options.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_DISABLE_ALL); + + // Slows down performance very slightly + // session.options.SetExecutionMode(ExecutionMode::ORT_PARALLEL); + + session.options.DisableCpuMemArena(); + session.options.DisableMemPattern(); + session.options.DisableProfiling(); + + auto startTime = std::chrono::steady_clock::now(); + session.onnx = Ort::Session(session.env, modelPath.c_str(), session.options); + auto endTime = std::chrono::steady_clock::now(); + spdlog::debug("Loaded onnx model in {} second(s)", + std::chrono::duration(endTime - startTime).count()); +} + +// Load Onnx model and JSON config file +void loadVoice(PiperConfig &config, std::string modelPath, + std::string modelConfigPath, Voice &voice, + std::optional &speakerId) { + spdlog::debug("Parsing voice config at {}", modelConfigPath); + std::ifstream modelConfigFile(modelConfigPath); + voice.configRoot = json::parse(modelConfigFile); + + parsePhonemizeConfig(voice.configRoot, voice.phonemizeConfig); + parseSynthesisConfig(voice.configRoot, voice.synthesisConfig); + parseModelConfig(voice.configRoot, voice.modelConfig); + + if (voice.modelConfig.numSpeakers > 1) { + // Multi-speaker model + if (speakerId) { + voice.synthesisConfig.speakerId = speakerId; + } else { + // Default speaker + voice.synthesisConfig.speakerId = 0; + } + } + + spdlog::debug("Voice contains {} speaker(s)", voice.modelConfig.numSpeakers); + + loadModel(modelPath, voice.session); + +} /* loadVoice */ + +// Phoneme ids to WAV audio +void synthesize(std::vector &phonemeIds, + SynthesisConfig &synthesisConfig, ModelSession &session, + std::vector &audioBuffer, SynthesisResult &result) { + spdlog::debug("Synthesizing audio for {} phoneme id(s)", phonemeIds.size()); + + auto memoryInfo = Ort::MemoryInfo::CreateCpu( + OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + + // Allocate + std::vector phonemeIdLengths{(int64_t)phonemeIds.size()}; + std::vector scales{synthesisConfig.noiseScale, + synthesisConfig.lengthScale, + synthesisConfig.noiseW}; + + std::vector inputTensors; + std::vector phonemeIdsShape{1, (int64_t)phonemeIds.size()}; + inputTensors.push_back(Ort::Value::CreateTensor( + memoryInfo, phonemeIds.data(), phonemeIds.size(), phonemeIdsShape.data(), + phonemeIdsShape.size())); + + std::vector phomemeIdLengthsShape{(int64_t)phonemeIdLengths.size()}; + inputTensors.push_back(Ort::Value::CreateTensor( + memoryInfo, phonemeIdLengths.data(), phonemeIdLengths.size(), + phomemeIdLengthsShape.data(), phomemeIdLengthsShape.size())); + + std::vector scalesShape{(int64_t)scales.size()}; + inputTensors.push_back( + Ort::Value::CreateTensor(memoryInfo, scales.data(), scales.size(), + scalesShape.data(), scalesShape.size())); + + // Add speaker id. + // NOTE: These must be kept outside the "if" below to avoid being deallocated. + std::vector speakerId{ + (int64_t)synthesisConfig.speakerId.value_or(0)}; + std::vector speakerIdShape{(int64_t)speakerId.size()}; + + if (synthesisConfig.speakerId) { + inputTensors.push_back(Ort::Value::CreateTensor( + memoryInfo, speakerId.data(), speakerId.size(), speakerIdShape.data(), + speakerIdShape.size())); + } + + // From export_onnx.py + std::array inputNames = {"input", "input_lengths", "scales", + "sid"}; + std::array outputNames = {"output"}; + + // Infer + auto startTime = std::chrono::steady_clock::now(); + auto outputTensors = session.onnx.Run( + Ort::RunOptions{nullptr}, inputNames.data(), inputTensors.data(), + inputTensors.size(), outputNames.data(), outputNames.size()); + auto endTime = std::chrono::steady_clock::now(); + + if ((outputTensors.size() != 1) || (!outputTensors.front().IsTensor())) { + throw std::runtime_error("Invalid output tensors"); + } + auto inferDuration = std::chrono::duration(endTime - startTime); + result.inferSeconds = inferDuration.count(); + + const float *audio = outputTensors.front().GetTensorData(); + auto audioShape = + outputTensors.front().GetTensorTypeAndShapeInfo().GetShape(); + int64_t audioCount = audioShape[audioShape.size() - 1]; + + result.audioSeconds = (double)audioCount / (double)synthesisConfig.sampleRate; + result.realTimeFactor = 0.0; + if (result.audioSeconds > 0) { + result.realTimeFactor = result.inferSeconds / result.audioSeconds; + } + spdlog::debug("Synthesized {} second(s) of audio in {} second(s)", + result.audioSeconds, result.inferSeconds); + + // Get max audio value for scaling + float maxAudioValue = 0.01f; + for (int64_t i = 0; i < audioCount; i++) { + float audioValue = abs(audio[i]); + if (audioValue > maxAudioValue) { + maxAudioValue = audioValue; + } + } + + // We know the size up front + audioBuffer.reserve(audioCount); + + // Scale audio to fill range and convert to int16 + float audioScale = (MAX_WAV_VALUE / std::max(0.01f, maxAudioValue)); + for (int64_t i = 0; i < audioCount; i++) { + int16_t intAudioValue = static_cast( + std::clamp(audio[i] * audioScale, + static_cast(std::numeric_limits::min()), + static_cast(std::numeric_limits::max()))); + + audioBuffer.push_back(intAudioValue); + } + + // Clean up + for (std::size_t i = 0; i < outputTensors.size(); i++) { + Ort::detail::OrtRelease(outputTensors[i].release()); + } + + for (std::size_t i = 0; i < inputTensors.size(); i++) { + Ort::detail::OrtRelease(inputTensors[i].release()); + } +} + +// ---------------------------------------------------------------------------- + +// Phonemize text and synthesize audio +void textToAudio(PiperConfig &config, Voice &voice, std::string text, + std::vector &audioBuffer, SynthesisResult &result, + const std::function &audioCallback) { + + std::size_t sentenceSilenceSamples = 0; + if (voice.synthesisConfig.sentenceSilenceSeconds > 0) { + sentenceSilenceSamples = (std::size_t)( + voice.synthesisConfig.sentenceSilenceSeconds * + voice.synthesisConfig.sampleRate * voice.synthesisConfig.channels); + } + + if (config.useTashkeel) { + if (!config.tashkeelState) { + throw std::runtime_error("Tashkeel model is not loaded"); + } + + spdlog::debug("Diacritizing text with libtashkeel: {}", text); + text = tashkeel::tashkeel_run(text, *config.tashkeelState); + } + + // Phonemes for each sentence + spdlog::debug("Phonemizing text: {}", text); + std::vector> phonemes; + + if (voice.phonemizeConfig.phonemeType == eSpeakPhonemes) { + // Use espeak-ng for phonemization + eSpeakPhonemeConfig eSpeakConfig; + eSpeakConfig.voice = voice.phonemizeConfig.eSpeak.voice; + phonemize_eSpeak(text, eSpeakConfig, phonemes); + } else { + // Use UTF-8 codepoints as "phonemes" + CodepointsPhonemeConfig codepointsConfig; + phonemize_codepoints(text, codepointsConfig, phonemes); + } + + // Synthesize each sentence independently. + std::vector phonemeIds; + std::map missingPhonemes; + for (auto phonemesIter = phonemes.begin(); phonemesIter != phonemes.end(); + ++phonemesIter) { + std::vector &sentencePhonemes = *phonemesIter; + + if (spdlog::should_log(spdlog::level::debug)) { + // DEBUG log for phonemes + std::string phonemesStr; + for (auto phoneme : sentencePhonemes) { + utf8::append(phoneme, phonemesStr); + } + + spdlog::debug("Converting {} phoneme(s) to ids: {}", + sentencePhonemes.size(), phonemesStr); + } + + SynthesisResult sentenceResult; + + PhonemeIdConfig idConfig; + if (voice.phonemizeConfig.phonemeType == TextPhonemes) { + auto &language = voice.phonemizeConfig.eSpeak.voice; + spdlog::debug("Text phoneme language: {}", language); + if (DEFAULT_ALPHABET.count(language) < 1) { + throw std::runtime_error( + "Text phoneme language for voice is not supported"); + } + + // Use alphabet for language + idConfig.phonemeIdMap = + std::make_shared(DEFAULT_ALPHABET[language]); + } + + // phonemes -> ids + phonemes_to_ids(sentencePhonemes, idConfig, phonemeIds, missingPhonemes); + if (spdlog::should_log(spdlog::level::debug)) { + // DEBUG log for phoneme ids + std::stringstream phonemeIdsStr; + for (auto phonemeId : phonemeIds) { + phonemeIdsStr << phonemeId << ", "; + } + + spdlog::debug("Converted {} phoneme(s) to {} phoneme id(s): {}", + sentencePhonemes.size(), phonemeIds.size(), + phonemeIdsStr.str()); + } + + // ids -> audio + synthesize(phonemeIds, voice.synthesisConfig, voice.session, audioBuffer, + sentenceResult); + + // Add end of sentence silence + if (sentenceSilenceSamples > 0) { + for (std::size_t i = 0; i < sentenceSilenceSamples; i++) { + audioBuffer.push_back(0); + } + } + + if (audioCallback) { + // Call back must copy audio since it is cleared afterwards. + audioCallback(); + audioBuffer.clear(); + } + + result.audioSeconds += sentenceResult.audioSeconds; + result.inferSeconds += sentenceResult.inferSeconds; + + phonemeIds.clear(); + } + + if (missingPhonemes.size() > 0) { + spdlog::warn("Missing {} phoneme(s) from phoneme/id map!", + missingPhonemes.size()); + + for (auto phonemeCount : missingPhonemes) { + std::string phonemeStr; + utf8::append(phonemeCount.first, phonemeStr); + spdlog::warn("Missing \"{}\" (\\u{:04X}): {} time(s)", phonemeStr, + (uint32_t)phonemeCount.first, phonemeCount.second); + } + } + + if (result.audioSeconds > 0) { + result.realTimeFactor = result.inferSeconds / result.audioSeconds; + } + +} /* textToAudio */ + +// Phonemize text and synthesize audio to WAV file +void textToWavFile(PiperConfig &config, Voice &voice, std::string text, + std::ostream &audioFile, SynthesisResult &result) { + + std::vector audioBuffer; + textToAudio(config, voice, text, audioBuffer, result, NULL); + + // Write WAV + auto synthesisConfig = voice.synthesisConfig; + writeWavHeader(synthesisConfig.sampleRate, synthesisConfig.sampleWidth, + synthesisConfig.channels, (int32_t)audioBuffer.size(), + audioFile); + + audioFile.write((const char *)audioBuffer.data(), + sizeof(int16_t) * audioBuffer.size()); + +} /* textToWavFile */ + +} // namespace piper diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 933d8cc..29a8bcf 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -1,24 +1,83 @@ #ifndef PIPER_H_ #define PIPER_H_ -#include -#include +#include +#include +#include #include #include -#include "json.hpp" -#include +#include +#include +#include +#include -#include "config.hpp" -#include "model.hpp" -#include "phonemize.hpp" -#include "synthesize.hpp" -#include "wavfile.hpp" +#include "json.hpp" using json = nlohmann::json; namespace piper { +typedef int64_t SpeakerId; + +struct eSpeakConfig { + std::string voice = "en-us"; +}; + +struct PiperConfig { + std::string eSpeakDataPath; + bool useESpeak = true; + + bool useTashkeel = false; + std::optional tashkeelModelPath; + std::unique_ptr tashkeelState; +}; + +enum PhonemeType { eSpeakPhonemes, TextPhonemes }; + +struct PhonemizeConfig { + PhonemeType phonemeType = eSpeakPhonemes; + std::optional>> phonemeMap; + std::map> phonemeIdMap; + + PhonemeId idPad = 0; // padding (optionally interspersed) + PhonemeId idBos = 1; // beginning of sentence + PhonemeId idEos = 2; // end of sentence + bool interspersePad = true; + + eSpeakConfig eSpeak; +}; + +struct SynthesisConfig { + float noiseScale = 0.667f; + float lengthScale = 1.0f; + float noiseW = 0.8f; + int sampleRate = 22050; + int sampleWidth = 2; // 16-bit + int channels = 1; // mono + std::optional speakerId; + float sentenceSilenceSeconds = 0.2f; +}; + +struct ModelConfig { + int numSpeakers; +}; + +struct ModelSession { + Ort::Session onnx; + Ort::AllocatorWithDefaultOptions allocator; + Ort::SessionOptions options; + Ort::Env env; + + ModelSession() : onnx(nullptr){}; +}; + +struct SynthesisResult { + double inferSeconds; + double audioSeconds; + double realTimeFactor; +}; + struct Voice { json configRoot; PhonemizeConfig phonemizeConfig; @@ -27,122 +86,25 @@ struct Voice { ModelSession session; }; -void initialize(std::filesystem::path cwd) { - string dataPath; +// Must be called before using textTo* functions +void initialize(PiperConfig &config); - auto cwdDataPath = std::filesystem::absolute(cwd.append("espeak-ng-data")); - if (std::filesystem::is_directory(cwdDataPath)) { - dataPath = cwdDataPath.string(); - } - - cerr << "dataPath: " << dataPath << endl; - - // Set up espeak-ng for calling espeak_TextToPhonemes - int result = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, - /*buflength*/ 0, - /*path*/ dataPath.c_str(), - /*options*/ 0); - if (result < 0) { - throw runtime_error("Failed to initialize eSpeak-ng"); - } -} - -void terminate() { - // Clean up espeak-ng - espeak_Terminate(); -} +// Clean up +void terminate(PiperConfig &config); // Load Onnx model and JSON config file -void loadVoice(string modelPath, string modelConfigPath, Voice &voice, - optional &speakerId) { - ifstream modelConfigFile(modelConfigPath.c_str()); - voice.configRoot = json::parse(modelConfigFile); - - parsePhonemizeConfig(voice.configRoot, voice.phonemizeConfig); - parseSynthesisConfig(voice.configRoot, voice.synthesisConfig); - parseModelConfig(voice.configRoot, voice.modelConfig); - - if (voice.modelConfig.numSpeakers > 1) { - // Multispeaker model - if (speakerId) { - voice.synthesisConfig.speakerId = speakerId; - } else { - // Default speaker - voice.synthesisConfig.speakerId = 0; - } - } - - loadModel(modelPath, voice.session); - -} /* loadVoice */ +void loadVoice(PiperConfig &config, std::string modelPath, + std::string modelConfigPath, Voice &voice, + std::optional &speakerId); // Phonemize text and synthesize audio -void textToAudio(Voice &voice, string text, vector &audioBuffer, - SynthesisResult &result, - const function &audioCallback) { - - size_t sentenceSilenceSamples = 0; - if (voice.synthesisConfig.sentenceSilenceSeconds > 0) { - sentenceSilenceSamples = (size_t)( - voice.synthesisConfig.sentenceSilenceSeconds * - voice.synthesisConfig.sampleRate * voice.synthesisConfig.channels); - } - - // Phonemes for each sentence - vector> phonemes; - phonemize(text, voice.phonemizeConfig, phonemes); - - vector phonemeIds; - for (auto phonemesIter = phonemes.begin(); phonemesIter != phonemes.end(); - ++phonemesIter) { - vector &sentencePhonemes = *phonemesIter; - SynthesisResult sentenceResult; - phonemes2ids(sentencePhonemes, voice.phonemizeConfig, phonemeIds); - synthesize(phonemeIds, voice.synthesisConfig, voice.session, audioBuffer, - sentenceResult); - - // Add end of sentence silence - if (sentenceSilenceSamples > 0) { - for (size_t i = 0; i < sentenceSilenceSamples; i++) { - audioBuffer.push_back(0); - } - } - - if (audioCallback) { - // Call back must copy audio since it is cleared afterwards. - audioCallback(); - audioBuffer.clear(); - } - - result.audioSeconds += sentenceResult.audioSeconds; - result.inferSeconds += sentenceResult.inferSeconds; - - phonemeIds.clear(); - } - - if (result.audioSeconds > 0) { - result.realTimeFactor = result.inferSeconds / result.audioSeconds; - } - -} /* textToAudio */ +void textToAudio(PiperConfig &config, Voice &voice, std::string text, + std::vector &audioBuffer, SynthesisResult &result, + const std::function &audioCallback); // Phonemize text and synthesize audio to WAV file -void textToWavFile(Voice &voice, string text, ostream &audioFile, - SynthesisResult &result) { - - vector audioBuffer; - textToAudio(voice, text, audioBuffer, result, NULL); - - // Write WAV - auto synthesisConfig = voice.synthesisConfig; - writeWavHeader(synthesisConfig.sampleRate, synthesisConfig.sampleWidth, - synthesisConfig.channels, (int32_t)audioBuffer.size(), - audioFile); - - audioFile.write((const char *)audioBuffer.data(), - sizeof(int16_t) * audioBuffer.size()); - -} /* textToWavFile */ +void textToWavFile(PiperConfig &config, Voice &voice, std::string text, + std::ostream &audioFile, SynthesisResult &result); } // namespace piper diff --git a/src/cpp/synthesize.hpp b/src/cpp/synthesize.hpp deleted file mode 100644 index 70e7195..0000000 --- a/src/cpp/synthesize.hpp +++ /dev/null @@ -1,130 +0,0 @@ -#ifndef SYNTHESIZE_H_ -#define SYNTHESIZE_H_ - -#include -#include -#include -#include -#include - -#include - -#include "config.hpp" -#include "model.hpp" - -using namespace std; - -namespace piper { - -// Maximum value for 16-bit signed WAV sample -const float MAX_WAV_VALUE = 32767.0f; - -struct SynthesisResult { - double inferSeconds; - double audioSeconds; - double realTimeFactor; -}; - -// Phoneme ids to WAV audio -void synthesize(vector &phonemeIds, SynthesisConfig &synthesisConfig, - ModelSession &session, vector &audioBuffer, - SynthesisResult &result) { - auto memoryInfo = Ort::MemoryInfo::CreateCpu( - OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); - - // Allocate - vector phonemeIdLengths{(int64_t)phonemeIds.size()}; - vector scales{synthesisConfig.noiseScale, synthesisConfig.lengthScale, - synthesisConfig.noiseW}; - - vector inputTensors; - vector phonemeIdsShape{1, (int64_t)phonemeIds.size()}; - inputTensors.push_back(Ort::Value::CreateTensor( - memoryInfo, phonemeIds.data(), phonemeIds.size(), phonemeIdsShape.data(), - phonemeIdsShape.size())); - - vector phomemeIdLengthsShape{(int64_t)phonemeIdLengths.size()}; - inputTensors.push_back(Ort::Value::CreateTensor( - memoryInfo, phonemeIdLengths.data(), phonemeIdLengths.size(), - phomemeIdLengthsShape.data(), phomemeIdLengthsShape.size())); - - vector scalesShape{(int64_t)scales.size()}; - inputTensors.push_back( - Ort::Value::CreateTensor(memoryInfo, scales.data(), scales.size(), - scalesShape.data(), scalesShape.size())); - - // Add speaker id. - // NOTE: These must be kept outside the "if" below to avoid being deallocated. - vector speakerId{(int64_t)synthesisConfig.speakerId.value_or(0)}; - vector speakerIdShape{(int64_t)speakerId.size()}; - - if (synthesisConfig.speakerId) { - inputTensors.push_back(Ort::Value::CreateTensor( - memoryInfo, speakerId.data(), speakerId.size(), speakerIdShape.data(), - speakerIdShape.size())); - } - - // From export_onnx.py - array inputNames = {"input", "input_lengths", "scales", - "sid"}; - array outputNames = {"output"}; - - // Infer - auto startTime = chrono::steady_clock::now(); - auto outputTensors = session.onnx.Run( - Ort::RunOptions{nullptr}, inputNames.data(), inputTensors.data(), - inputTensors.size(), outputNames.data(), outputNames.size()); - auto endTime = chrono::steady_clock::now(); - - if ((outputTensors.size() != 1) || (!outputTensors.front().IsTensor())) { - throw runtime_error("Invalid output tensors"); - } - auto inferDuration = chrono::duration(endTime - startTime); - result.inferSeconds = inferDuration.count(); - - const float *audio = outputTensors.front().GetTensorData(); - auto audioShape = - outputTensors.front().GetTensorTypeAndShapeInfo().GetShape(); - int64_t audioCount = audioShape[audioShape.size() - 1]; - - result.audioSeconds = (double)audioCount / (double)synthesisConfig.sampleRate; - result.realTimeFactor = 0.0; - if (result.audioSeconds > 0) { - result.realTimeFactor = result.inferSeconds / result.audioSeconds; - } - - // Get max audio value for scaling - float maxAudioValue = 0.01f; - for (int64_t i = 0; i < audioCount; i++) { - float audioValue = abs(audio[i]); - if (audioValue > maxAudioValue) { - maxAudioValue = audioValue; - } - } - - // We know the size up front - audioBuffer.reserve(audioCount); - - // Scale audio to fill range and convert to int16 - float audioScale = (MAX_WAV_VALUE / max(0.01f, maxAudioValue)); - for (int64_t i = 0; i < audioCount; i++) { - int16_t intAudioValue = static_cast( - clamp(audio[i] * audioScale, - static_cast(numeric_limits::min()), - static_cast(numeric_limits::max()))); - - audioBuffer.push_back(intAudioValue); - } - - // Clean up - for (size_t i = 0; i < outputTensors.size(); i++) { - Ort::detail::OrtRelease(outputTensors[i].release()); - } - - for (size_t i = 0; i < inputTensors.size(); i++) { - Ort::detail::OrtRelease(inputTensors[i].release()); - } -} -} // namespace piper - -#endif // SYNTHESIZE_H_ diff --git a/src/cpp/wavfile.hpp b/src/cpp/wavfile.hpp index e99caf6..39c58e1 100644 --- a/src/cpp/wavfile.hpp +++ b/src/cpp/wavfile.hpp @@ -3,8 +3,6 @@ #include -namespace piper { - struct WavHeader { uint8_t RIFF[4] = {'R', 'I', 'F', 'F'}; uint32_t chunkSize; @@ -14,7 +12,7 @@ struct WavHeader { uint8_t fmt[4] = {'f', 'm', 't', ' '}; uint32_t fmtSize = 16; // bytes uint16_t audioFormat = 1; // PCM - uint16_t numChannels; // mono + uint16_t numChannels; // mono uint32_t sampleRate; // Hertz uint32_t bytesPerSec; // sampleRate * sampleWidth uint16_t blockAlign = 2; // 16-bit mono @@ -39,6 +37,4 @@ void writeWavHeader(int sampleRate, int sampleWidth, int channels, } /* writeWavHeader */ -} // namespace piper - #endif // WAVFILE_H_ diff --git a/src/python/piper_train/check_phonemes.py b/src/python/piper_train/check_phonemes.py new file mode 100644 index 0000000..7933689 --- /dev/null +++ b/src/python/piper_train/check_phonemes.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +import json +import sys +import unicodedata +from collections import Counter + +from .phonemize import DEFAULT_PHONEME_ID_MAP + + +def main() -> None: + used_phonemes: "Counter[str]" = Counter() + missing_phonemes: "Counter[str]" = Counter() + + for line in sys.stdin: + line = line.strip() + if not line: + continue + + utt = json.loads(line) + for phoneme in utt["phonemes"]: + used_phonemes[phoneme] += 1 + + if phoneme not in DEFAULT_PHONEME_ID_MAP: + missing_phonemes[phoneme] += 1 + + if missing_phonemes: + print("Missing", len(missing_phonemes), "phoneme(s)", file=sys.stderr) + + json.dump( + { + "used": { + phoneme: { + "count": count, + "hex": f"\\u{hex(ord(phoneme))}", + "name": unicodedata.category(phoneme), + "category": unicodedata.category(phoneme), + } + for phoneme, count in used_phonemes.most_common() + }, + "missing": { + phoneme: { + "count": count, + "hex": f"\\u{hex(ord(phoneme))}", + "name": unicodedata.category(phoneme), + "category": unicodedata.category(phoneme), + } + for phoneme, count in missing_phonemes.most_common() + }, + }, + sys.stdout, + ) + + +# ----------------------------------------------------------------------------- + +if __name__ == "__main__": + main() diff --git a/src/python/piper_train/export_torchscript.py b/src/python/piper_train/export_torchscript.py index 3555a20..312cc95 100644 --- a/src/python/piper_train/export_torchscript.py +++ b/src/python/piper_train/export_torchscript.py @@ -2,7 +2,6 @@ import argparse import logging from pathlib import Path -from typing import Optional import torch @@ -41,7 +40,6 @@ def main(): model_g = model.model_g num_symbols = model_g.n_vocab - num_speakers = model_g.n_speakers # Inference only model_g.eval() diff --git a/src/python/piper_train/filter_utterances.py b/src/python/piper_train/filter_utterances.py new file mode 100644 index 0000000..8a528d3 --- /dev/null +++ b/src/python/piper_train/filter_utterances.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 +import argparse +import csv +import json +import re +import shutil +import statistics +import subprocess +import sys +import threading +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor +from dataclasses import asdict, dataclass +from enum import Enum +from pathlib import Path +from typing import Optional + +import numpy as np + +from .norm_audio import make_silence_detector, trim_silence + +_DIR = Path(__file__).parent + +# Removed from the speaking rate calculation +_PUNCTUATION = re.compile(".。,,?¿?؟!!;;::-—") + + +class ExcludeReason(str, Enum): + MISSING = "file_missing" + EMPTY = "file_empty" + LOW = "rate_low" + HIGH = "rate_high" + + +@dataclass +class Utterance: + id: str + text: str + duration_sec: float + speaker: str + exclude_reason: Optional[ExcludeReason] = None + rate: float = 0.0 + + def __post_init__(self): + if self.duration_sec > 0: + # Don't include punctuation is speaking rate calculation since we + # remove silence. + text_nopunct = _PUNCTUATION.sub("", self.text) + self.rate = len(text_nopunct) / self.duration_sec + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--write-json", help="Path to write information about excluded utterances" + ) + parser.add_argument( + "--dataset-dir", default=Path.cwd(), help="Path to dataset directory" + ) + parser.add_argument("--scale-lower", type=float, default=2.0) + parser.add_argument("--scale-upper", type=float, default=2.0) + args = parser.parse_args() + + if not shutil.which("ffprobe"): + raise RuntimeError("ffprobe not found (is ffmpeg installed?)") + + dataset_dir = Path(args.dataset_dir) + wav_dir = dataset_dir / "wav" + if not wav_dir.is_dir(): + wav_dir = dataset_dir / "wavs" + + reader = csv.reader(sys.stdin, delimiter="|") + + text_and_audio = [] + for row in reader: + filename, text = row[0], row[-1] + speaker = row[1] if len(row) > 2 else "default" + + # Try file name relative to metadata + wav_path = dataset_dir / filename + + if not wav_path.exists(): + # Try with .wav + wav_path = dataset_dir / f"{filename}.wav" + + if not wav_path.exists(): + # Try wav/ or wavs/ + wav_path = wav_dir / filename + + if not wav_path.exists(): + # Try with .wav + wav_path = wav_dir / f"{filename}.wav" + + text_and_audio.append((filename, text, wav_path, speaker)) + + writer = csv.writer(sys.stdout, delimiter="|") + + # speaker -> [rate] + utts_by_speaker = defaultdict(list) + process_utterance = ProcessUtterance() + with ThreadPoolExecutor() as executor: + for utt in executor.map(lambda args: process_utterance(*args), text_and_audio): + utts_by_speaker[utt.speaker].append(utt) + + is_multispeaker = len(utts_by_speaker) > 1 + writer = csv.writer(sys.stdout, delimiter="|") + + speaker_details = {} + for speaker, utts in utts_by_speaker.items(): + rates = [utt.rate for utt in utts] + if rates: + # Exclude rates well outside the 25%/75% quantiles + rate_qs = statistics.quantiles(rates, n=4) + q1 = rate_qs[0] # 25% + q3 = rate_qs[-1] # 75% + iqr = q3 - q1 + lower = q1 - (args.scale_lower * iqr) + upper = q3 + (args.scale_upper * iqr) + speaker_details[speaker] = { + "min": min(rates), + "max": max(rates), + "quanties": rate_qs, + "lower": lower, + "upper": upper, + } + + for utt in utts: + if utt.rate < lower: + utt.exclude_reason = ExcludeReason.LOW + elif utt.rate > upper: + utt.exclude_reason = ExcludeReason.HIGH + else: + if is_multispeaker: + writer.writerow((utt.id, utt.speaker, utt.text)) + else: + writer.writerow((utt.id, utt.text)) + + if args.write_json: + speaker_excluded = { + speaker: [ + asdict(utt) + for utt in utts_by_speaker[speaker] + if utt.exclude_reason is not None + ] + for speaker in speaker_details + } + + with open(args.write_json, "w") as json_file: + json.dump( + { + speaker: { + "details": speaker_details[speaker], + "num_utterances": len(utts_by_speaker[speaker]), + "num_excluded": len(speaker_excluded[speaker]), + "excluded": speaker_excluded[speaker], + } + for speaker in speaker_details + }, + json_file, + indent=4, + ensure_ascii=False, + ) + + +class ProcessUtterance: + def __init__(self): + self.thread_data = threading.local() + + def __call__( + self, utt_id: str, text: str, wav_path: Path, speaker: str + ) -> Utterance: + if not wav_path.exists(): + return Utterance( + utt_id, + text, + 0.0, + speaker, + exclude_reason=ExcludeReason.MISSING, + ) + + if wav_path.stat().st_size == 0: + return Utterance( + utt_id, + text, + 0.0, + speaker, + exclude_reason=ExcludeReason.EMPTY, + ) + + return Utterance(utt_id, text, self.get_duration(wav_path), speaker) + + def get_duration(self, audio_path: Path) -> float: + """Uses ffmpeg to get audio duration.""" + if not hasattr(self.thread_data, "detector"): + self.thread_data.detector = make_silence_detector() + + vad_sample_rate = 16000 + audio_16khz_bytes = subprocess.check_output( + [ + "ffmpeg", + "-i", + str(audio_path), + "-f", + "s16le", + "-acodec", + "pcm_s16le", + "-ac", + "1", + "-ar", + str(vad_sample_rate), + "pipe:", + ], + stderr=subprocess.DEVNULL, + ) + + # Normalize + audio_16khz = np.frombuffer(audio_16khz_bytes, dtype=np.int16).astype( + np.float32 + ) + audio_16khz /= np.abs(np.max(audio_16khz)) + + # Get speaking duration + offset_sec, duration_sec = trim_silence( + audio_16khz, + self.thread_data.detector, + threshold=0.8, + samples_per_chunk=480, + sample_rate=vad_sample_rate, + keep_chunks_before=2, + keep_chunks_after=2, + ) + + if duration_sec is None: + # Speech goes to end of audio + if len(audio_16khz) > 0: + duration_sec = (len(audio_16khz) / 16000.0) - offset_sec + else: + duration_sec = 0.0 + + return duration_sec + + # return float( + # subprocess.check_output( + # [ + # "ffprobe", + # "-i", + # str(audio_path), + # "-show_entries", + # "format=duration", + # "-v", + # "quiet", + # "-of", + # "csv=p=0", + # ], + # stderr=subprocess.DEVNULL, + # universal_newlines=True, + # ).strip() + # ) + + +if __name__ == "__main__": + main() diff --git a/src/python/piper_train/phonemize.py b/src/python/piper_train/phonemize.py index 3acc2fb..46d646d 100644 --- a/src/python/piper_train/phonemize.py +++ b/src/python/piper_train/phonemize.py @@ -1,9 +1,23 @@ +import argparse +import json +import sys import unicodedata from collections import Counter +from enum import Enum from typing import Dict, Iterable, List, Mapping, Optional from espeak_phonemizer import Phonemizer + +class PhonemeType(str, Enum): + ESPEAK = "espeak" + """Phonemes come from espeak-ng""" + + TEXT = "text" + """Phonemes come from text itself""" + + +MAX_PHONEMES = 256 DEFAULT_PHONEME_ID_MAP: Dict[str, List[int]] = { "_": [0], "^": [1], @@ -135,14 +149,115 @@ DEFAULT_PHONEME_ID_MAP: Dict[str, List[int]] = { "χ": [127], "ᵻ": [128], "ⱱ": [129], + "0": [130], # tones + "1": [131], + "2": [132], + "3": [133], + "4": [134], + "5": [135], + "6": [136], + "7": [137], + "8": [138], + "9": [139], + "\u0327": [140], # combining cedilla + "\u0303": [141], # combining tilde + "\u032a": [142], # combining bridge below + "\u032f": [143], # combining inverted breve below + "\u0329": [144], # combining vertical line below + "ʰ": [145], + "ˤ": [146], + "ε": [147], + "↓": [148], + "#": [149], # Icelandic + '"': [150], # Russian + "↑": [151], + "\u033a": [152], # Basque + "\u033b": [153], +} + +PHONEME_MAPS = { + # Brazilian Portuguese + "pt-br": {"c": ["k"]} +} + +ALPHABETS = { + # Ukrainian + "uk": { + "_": [0], + "^": [1], + "$": [2], + " ": [3], + "!": [4], + "'": [5], + ",": [6], + "-": [7], + ".": [8], + ":": [9], + ";": [10], + "?": [11], + "а": [12], + "б": [13], + "в": [14], + "г": [15], + "ґ": [16], + "д": [17], + "е": [18], + "є": [19], + "ж": [20], + "з": [21], + "и": [22], + "і": [23], + "ї": [24], + "й": [25], + "к": [26], + "л": [27], + "м": [28], + "н": [29], + "о": [30], + "п": [31], + "р": [32], + "с": [33], + "т": [34], + "у": [35], + "ф": [36], + "х": [37], + "ц": [38], + "ч": [39], + "ш": [40], + "щ": [41], + "ь": [42], + "ю": [43], + "я": [44], + "\u0301": [45], # combining acute accent + "\u0306": [46], # combining breve + "\u0308": [47], # combining diaeresis + "—": [48], # em dash + } } -def phonemize(text: str, phonemizer: Phonemizer) -> List[str]: +def phonemize( + text: str, + phonemizer: Phonemizer, + phoneme_map: Optional[Dict[str, List[str]]] = None, +) -> List[str]: phonemes_str = phonemizer.phonemize(text=text, keep_clause_breakers=True) # Phonemes are decomposed into unicode codepoints - return list(unicodedata.normalize("NFD", phonemes_str)) + unmapped_phonemes = list(unicodedata.normalize("NFD", phonemes_str)) + if not phoneme_map: + return unmapped_phonemes + + # Phonemes can be mapped to lists of other phonemes + mapped_phonemes = [] + for phoneme in unmapped_phonemes: + sub_phonemes = phoneme_map.get(phoneme) + if sub_phonemes: + mapped_phonemes.extend(sub_phonemes) + else: + mapped_phonemes.append(phoneme) + + return mapped_phonemes def phonemes_to_ids( @@ -179,3 +294,79 @@ def phonemes_to_ids( phoneme_ids.extend(phoneme_id_map[eos]) return phoneme_ids + + +# ----------------------------------------------------------------------------- + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("language") + parser.add_argument( + "--phoneme-type", + choices=list(PhonemeType), + default=PhonemeType.ESPEAK, + help="Type of phonemes to use (default: espeak)", + ) + parser.add_argument( + "--text-casing", + choices=("ignore", "lower", "upper", "casefold"), + default="ignore", + help="Casing applied to utterance text", + ) + args = parser.parse_args() + + phonemizer: Optional[Phonemizer] = None + + if args.text_casing == "lower": + casing = str.lower + elif args.text_casing == "upper": + casing = str.upper + else: + # ignore + casing = lambda s: s + + if args.phoneme_type == PhonemeType.TEXT: + # Use text directly + phoneme_id_map = ALPHABETS[args.language] + else: + # Use eSpeak + phonemizer = Phonemizer(args.language) + phoneme_id_map = DEFAULT_PHONEME_ID_MAP + + phoneme_map = PHONEME_MAPS.get(args.language) + missing_phonemes: "Counter[str]" = Counter() + + for line in sys.stdin: + line = line.strip() + if not line: + continue + + if args.phoneme_type == PhonemeType.TEXT: + phonemes = list(unicodedata.normalize("NFD", casing(line))) + else: + assert phonemizer is not None + phonemes = phonemize(line, phonemizer, phoneme_map=phoneme_map) + + phoneme_ids = phonemes_to_ids( + phonemes, phoneme_id_map=phoneme_id_map, missing_phonemes=missing_phonemes + ) + json.dump( + { + "text": line, + "phonemes": phonemes, + "phoneme_ids": phoneme_ids, + }, + sys.stdout, + ensure_ascii=False, + ) + print("") + + if missing_phonemes: + print("Missing", len(missing_phonemes), "phonemes", file=sys.stderr) + for phoneme, count in missing_phonemes.most_common(): + print(phoneme, count, file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/src/python/piper_train/preprocess.py b/src/python/piper_train/preprocess.py index 722b399..3d94088 100644 --- a/src/python/piper_train/preprocess.py +++ b/src/python/piper_train/preprocess.py @@ -6,9 +6,9 @@ import itertools import json import logging import os +import unicodedata from collections import Counter -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass +from dataclasses import dataclass, field from multiprocessing import JoinableQueue, Process, Queue from pathlib import Path from typing import Dict, Iterable, List, Optional @@ -16,7 +16,15 @@ from typing import Dict, Iterable, List, Optional from espeak_phonemizer import Phonemizer from .norm_audio import cache_norm_audio, make_silence_detector -from .phonemize import DEFAULT_PHONEME_ID_MAP, phonemes_to_ids, phonemize +from .phonemize import ( + ALPHABETS, + DEFAULT_PHONEME_ID_MAP, + MAX_PHONEMES, + PHONEME_MAPS, + PhonemeType, + phonemes_to_ids, + phonemize, +) _LOGGER = logging.getLogger("preprocess") @@ -49,6 +57,23 @@ def main() -> None: parser.add_argument( "--speaker-id", type=int, help="Add speaker id to single speaker dataset" ) + # + parser.add_argument( + "--phoneme-type", + choices=list(PhonemeType), + default=PhonemeType.ESPEAK, + help="Type of phonemes to use (default: espeak)", + ) + parser.add_argument( + "--text-casing", + choices=("ignore", "lower", "upper", "casefold"), + default="ignore", + help="Casing applied to utterance text", + ) + # + parser.add_argument( + "--skip-audio", action="store_true", help="Don't preprocess audio" + ) parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to the console" ) @@ -84,9 +109,9 @@ def main() -> None: # Count speakers _LOGGER.debug("Counting number of speakers/utterances in the dataset") - speaker_counts: Counter[str] = Counter() + speaker_counts: "Counter[str]" = Counter() num_utterances = 0 - for utt in make_dataset(args.input_dir, args.single_speaker, args.speaker_id): + for utt in make_dataset(args): speaker = utt.speaker or "" speaker_counts[speaker] += 1 num_utterances += 1 @@ -118,11 +143,12 @@ def main() -> None: "voice": args.language, }, "inference": {"noise_scale": 0.667, "length_scale": 1, "noise_w": 0.8}, + "phoneme_type": str(args.phoneme_type), "phoneme_map": {}, - "phoneme_id_map": DEFAULT_PHONEME_ID_MAP, - "num_symbols": len( - set(itertools.chain.from_iterable(DEFAULT_PHONEME_ID_MAP.values())) - ), + "phoneme_id_map": ALPHABETS[args.language] + if args.phoneme_type == PhonemeType.TEXT + else DEFAULT_PHONEME_ID_MAP, + "num_symbols": MAX_PHONEMES, "num_speakers": len(speaker_counts), "speaker_id_map": speaker_ids, }, @@ -142,8 +168,13 @@ def main() -> None: queue_out: "Queue[Optional[Utterance]]" = Queue() # Start workers + if args.phoneme_type == PhonemeType.TEXT: + target = phonemize_batch_text + else: + target = phonemize_batch_espeak + processes = [ - Process(target=process_batch, args=(args, queue_in, queue_out)) + Process(target=target, args=(args, queue_in, queue_out)) for _ in range(args.max_workers) ] for proc in processes: @@ -154,27 +185,39 @@ def main() -> None: ) with open(args.output_dir / "dataset.jsonl", "w", encoding="utf-8") as dataset_file: for utt_batch in batched( - make_dataset(args.input_dir, args.single_speaker, args.speaker_id), + make_dataset(args), batch_size, ): queue_in.put(utt_batch) _LOGGER.debug("Waiting for jobs to finish") + missing_phonemes: "Counter[str]" = Counter() for _ in range(num_utterances): utt = queue_out.get() if utt is not None: if utt.speaker is not None: utt.speaker_id = speaker_ids[utt.speaker] + utt_dict = dataclasses.asdict(utt) + utt_dict.pop("missing_phonemes") + # JSONL json.dump( - dataclasses.asdict(utt), + utt_dict, dataset_file, ensure_ascii=False, cls=PathEncoder, ) print("", file=dataset_file) + missing_phonemes.update(utt.missing_phonemes) + + if missing_phonemes: + for phoneme, count in missing_phonemes.most_common(): + _LOGGER.warning("Missing %s (%s)", phoneme, count) + + _LOGGER.warning("Missing %s phoneme(s)", len(missing_phonemes)) + # Signal workers to stop for proc in processes: queue_in.put(None) @@ -187,10 +230,27 @@ def main() -> None: # ----------------------------------------------------------------------------- -def process_batch(args: argparse.Namespace, queue_in: JoinableQueue, queue_out: Queue): +def get_text_casing(casing: str): + if casing == "lower": + return str.lower + + if casing == "upper": + return str.upper + + if casing == "casefold": + return str.casefold + + return lambda s: s + + +def phonemize_batch_espeak( + args: argparse.Namespace, queue_in: JoinableQueue, queue_out: Queue +): try: + casing = get_text_casing(args.text_casing) silence_detector = make_silence_detector() phonemizer = Phonemizer(default_voice=args.language) + phoneme_map = PHONEME_MAPS.get(args.language) while True: utt_batch = queue_in.get() @@ -200,14 +260,20 @@ def process_batch(args: argparse.Namespace, queue_in: JoinableQueue, queue_out: for utt in utt_batch: try: _LOGGER.debug(utt) - utt.phonemes = phonemize(utt.text, phonemizer) - utt.phoneme_ids = phonemes_to_ids(utt.phonemes) - utt.audio_norm_path, utt.audio_spec_path = cache_norm_audio( - utt.audio_path, - args.cache_dir, - silence_detector, - args.sample_rate, + utt.phonemes = phonemize( + casing(utt.text), phonemizer, phoneme_map=phoneme_map ) + utt.phoneme_ids = phonemes_to_ids( + utt.phonemes, + missing_phonemes=utt.missing_phonemes, + ) + if not args.skip_audio: + utt.audio_norm_path, utt.audio_spec_path = cache_norm_audio( + utt.audio_path, + args.cache_dir, + silence_detector, + args.sample_rate, + ) queue_out.put(utt) except TimeoutError: _LOGGER.error("Skipping utterance due to timeout: %s", utt) @@ -217,7 +283,48 @@ def process_batch(args: argparse.Namespace, queue_in: JoinableQueue, queue_out: queue_in.task_done() except Exception: - _LOGGER.exception("process_batch") + _LOGGER.exception("phonemize_batch_espeak") + + +def phonemize_batch_text( + args: argparse.Namespace, queue_in: JoinableQueue, queue_out: Queue +): + try: + casing = get_text_casing(args.text_casing) + silence_detector = make_silence_detector() + alphabet = ALPHABETS[args.language] + + while True: + utt_batch = queue_in.get() + if utt_batch is None: + break + + for utt in utt_batch: + try: + _LOGGER.debug(utt) + utt.phonemes = list(unicodedata.normalize("NFD", casing(utt.text))) + utt.phoneme_ids = phonemes_to_ids( + utt.phonemes, + phoneme_id_map=alphabet, + missing_phonemes=utt.missing_phonemes, + ) + if not args.skip_audio: + utt.audio_norm_path, utt.audio_spec_path = cache_norm_audio( + utt.audio_path, + args.cache_dir, + silence_detector, + args.sample_rate, + ) + queue_out.put(utt) + except TimeoutError: + _LOGGER.error("Skipping utterance due to timeout: %s", utt) + except Exception: + _LOGGER.exception("Failed to process utterance: %s", utt) + queue_out.put(None) + + queue_in.task_done() + except Exception: + _LOGGER.exception("phonemize_batch_text") # ----------------------------------------------------------------------------- @@ -233,6 +340,7 @@ class Utterance: phoneme_ids: Optional[List[int]] = None audio_norm_path: Optional[Path] = None audio_spec_path: Optional[Path] = None + missing_phonemes: "Counter[str]" = field(default_factory=Counter) class PathEncoder(json.JSONEncoder): @@ -242,9 +350,12 @@ class PathEncoder(json.JSONEncoder): return super().default(o) -def ljspeech_dataset( - dataset_dir: Path, is_single_speaker: bool, speaker_id: Optional[int] = None -) -> Iterable[Utterance]: +def ljspeech_dataset(args: argparse.Namespace) -> Iterable[Utterance]: + dataset_dir = args.input_dir + is_single_speaker = args.single_speaker + speaker_id = args.speaker_id + skip_audio = args.skip_audio + # filename|speaker|text # speaker is optional metadata_path = dataset_dir / "metadata.csv" @@ -257,7 +368,7 @@ def ljspeech_dataset( with open(metadata_path, "r", encoding="utf-8") as csv_file: reader = csv.reader(csv_file, delimiter="|") for row in reader: - assert len(row) >= 2, "Not enough colums" + assert len(row) >= 2, "Not enough columns" speaker: Optional[str] = None if is_single_speaker or (len(row) == 2): @@ -280,18 +391,25 @@ def ljspeech_dataset( # Try with .wav wav_path = wav_dir / f"{filename}.wav" - if not wav_path.exists(): - _LOGGER.warning("Missing %s", filename) - continue + if not skip_audio: + if not wav_path.exists(): + _LOGGER.warning("Missing %s", filename) + continue + + if wav_path.stat().st_size == 0: + _LOGGER.warning("Empty file: %s", wav_path) + continue yield Utterance( text=text, audio_path=wav_path, speaker=speaker, speaker_id=speaker_id ) -def mycroft_dataset( - dataset_dir: Path, is_single_speaker: bool, speaker_id: Optional[int] = None -) -> Iterable[Utterance]: +def mycroft_dataset(args: argparse.Namespace) -> Iterable[Utterance]: + dataset_dir = args.input_dir + is_single_speaker = args.single_speaker + skip_audio = args.skip_audio + speaker_id = 0 for metadata_path in dataset_dir.glob("**/*-metadata.txt"): speaker = metadata_path.parent.name if not is_single_speaker else None @@ -301,15 +419,15 @@ def mycroft_dataset( for row in reader: filename, text = row[0], row[1] wav_path = metadata_path.parent / filename - yield Utterance( - text=text, - audio_path=wav_path, - speaker=speaker, - speaker_id=speaker_id if not is_single_speaker else None, - ) + if skip_audio or (wav_path.exists() and (wav_path.stat().st_size > 0)): + yield Utterance( + text=text, + audio_path=wav_path, + speaker=speaker, + speaker_id=speaker_id if not is_single_speaker else None, + ) speaker_id += 1 - # ----------------------------------------------------------------------------- diff --git a/src/python/piper_train/select_speaker.py b/src/python/piper_train/select_speaker.py new file mode 100644 index 0000000..f92ce34 --- /dev/null +++ b/src/python/piper_train/select_speaker.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python3 +import argparse +import csv +import sys +from collections import Counter, defaultdict + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--speaker-number", type=int) + parser.add_argument("--speaker-name") + args = parser.parse_args() + + assert (args.speaker_number is not None) or (args.speaker_name is not None) + + reader = csv.reader(sys.stdin, delimiter="|") + writer = csv.writer(sys.stdout, delimiter="|") + + if args.speaker_name is not None: + for row in reader: + audio, speaker_id, text = row[0], row[1], row[-1] + if args.speaker_name == speaker_id: + writer.writerow((audio, text)) + else: + utterances = defaultdict(list) + counts = Counter() + for row in reader: + audio, speaker_id, text = row[0], row[1], row[-1] + utterances[speaker_id].append((audio, text)) + counts[speaker_id] += 1 + + writer = csv.writer(sys.stdout, delimiter="|") + for i, (speaker_id, _count) in enumerate(counts.most_common()): + if i == args.speaker_number: + for row in utterances[speaker_id]: + writer.writerow(row) + + print(speaker_id, file=sys.stderr) + break + + +if __name__ == "__main__": + main() diff --git a/src/python/run-docker b/src/python/run-docker index 08eace7..191d364 100755 --- a/src/python/run-docker +++ b/src/python/run-docker @@ -8,7 +8,8 @@ docker run \ --user "$(id -u):$(id -g)" \ --ipc=host \ -v "${HOME}:${HOME}" \ + -v /media/cache:/media/cache:ro \ -v /etc/hostname:/etc/hostname:ro \ -v /etc/localtime:/etc/localtime:ro \ - piper-train \ + larynx2-train \ "$@" diff --git a/src/python_run/piper/__init__.py b/src/python_run/piper/__init__.py index 2ab2622..e14d2c9 100644 --- a/src/python_run/piper/__init__.py +++ b/src/python_run/piper/__init__.py @@ -1,5 +1,6 @@ import io import json +import logging import wave from dataclasses import dataclass from pathlib import Path @@ -9,6 +10,8 @@ import numpy as np import onnxruntime from espeak_phonemizer import Phonemizer +_LOGGER = logging.getLogger(__name__) + _BOS = "^" _EOS = "$" _PAD = "_" @@ -69,8 +72,11 @@ class Piper: phoneme_ids: List[int] = [] for phoneme in phonemes: - phoneme_ids.extend(self.config.phoneme_id_map[phoneme]) - phoneme_ids.extend(self.config.phoneme_id_map[_PAD]) + if phoneme in self.config.phoneme_id_map: + phoneme_ids.extend(self.config.phoneme_id_map[phoneme]) + phoneme_ids.extend(self.config.phoneme_id_map[_PAD]) + else: + _LOGGER.warning("No id for phoneme: %s", phoneme) phoneme_ids.extend(self.config.phoneme_id_map[_EOS])