From 19411f8c544afd530c33f5f0ec2ed3245c7f2f1a Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 22 Jun 2023 10:38:54 +0200 Subject: [PATCH 1/8] Add LocalAI to People using Piper --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ce296b..d8eb6d3 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ Piper has been used in the following projects/papers: * [Image Captioning for the Visually Impaired and Blind: A Recipe for Low-Resource Languages](https://www.techrxiv.org/articles/preprint/Image_Captioning_for_the_Visually_Impaired_and_Blind_A_Recipe_for_Low-Resource_Languages/22133894) * [Open Voice Operating System](https://github.com/OpenVoiceOS/ovos-tts-plugin-piper) * [JetsonGPT](https://github.com/shahizat/jetsonGPT) - +* [LocalAI](https://github.com/go-skynet/LocalAI) ## Training From 42d14ef21f43f3e56214edab63707479cbbea651 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 10 Jul 2023 12:46:46 -0500 Subject: [PATCH 2/8] Add "speaker" to JSON input --- VERSION | 2 +- src/cpp/main.cpp | 10 ++++++++++ src/cpp/piper.cpp | 13 +++++++++++++ src/cpp/piper.hpp | 3 +++ 4 files changed, 27 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 3eefcb9..9084fa2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.0 +1.1.0 diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 5f1bde1..892fd40 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -214,6 +214,16 @@ int main(int argc, char *argv[]) { // Override speaker id voice.synthesisConfig.speakerId = lineRoot["speaker_id"].get(); + } else if (lineRoot.contains("speaker")) { + // Resolve to id using speaker id map + auto speakerName = lineRoot["speaker"].get(); + if ((voice.modelConfig.speakerIdMap) && + (voice.modelConfig.speakerIdMap->count(speakerName) > 0)) { + voice.synthesisConfig.speakerId = + (*voice.modelConfig.speakerIdMap)[speakerName]; + } else { + spdlog::warn("No speaker named: {}", speakerName); + } } } diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index 633e95c..ccc2c1a 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -163,6 +163,19 @@ void parseModelConfig(json &configRoot, ModelConfig &modelConfig) { modelConfig.numSpeakers = configRoot["num_speakers"].get(); + if (configRoot.contains("speaker_id_map")) { + if (!modelConfig.speakerIdMap) { + modelConfig.speakerIdMap.emplace(); + } + + auto speakerIdMapValue = configRoot["speaker_id_map"]; + for (auto &speakerItem : speakerIdMapValue.items()) { + std::string speakerName = speakerItem.key(); + (*modelConfig.speakerIdMap)[speakerName] = + speakerItem.value().get(); + } + } + } /* parseModelConfig */ void initialize(PiperConfig &config) { diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 29a8bcf..0c3175b 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -61,6 +61,9 @@ struct SynthesisConfig { struct ModelConfig { int numSpeakers; + + // speaker name -> id + std::optional> speakerIdMap; }; struct ModelSession { From daf6e7fcf7b57078ec984b1f88d79415d305ceb9 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 10 Jul 2023 15:09:58 -0500 Subject: [PATCH 3/8] Add --version --- Makefile | 1 + README.md | 73 +++++++++++++++++++++++++++++------------- src/cpp/CMakeLists.txt | 4 +++ src/cpp/main.cpp | 8 +++-- src/cpp/piper.cpp | 13 ++++++++ src/cpp/piper.hpp | 3 ++ 6 files changed, 77 insertions(+), 25 deletions(-) diff --git a/Makefile b/Makefile index 6d8b4c5..505c962 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ .PHONY: piper clean LIB_DIR := lib/Linux-$(shell uname -m) +VERSION := $(cat VERSION) piper: mkdir -p build diff --git a/README.md b/README.md index bbfddd6..f2002a9 100644 --- a/README.md +++ b/README.md @@ -18,30 +18,31 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export Our goal is to support Home Assistant and the [Year of Voice](https://www.home-assistant.io/blog/2022/12/20/year-of-voice/). -[Download voices](https://github.com/rhasspy/piper/releases/tag/v0.0.2) for the supported languages: +[Download voices](https://huggingface.co/rhasspy/piper-voices/tree/main) for the supported languages: -* Catalan (ca) -* Danish (da) -* German (de) -* British English (en-gb) -* U.S. English (en-us) -* Spanish (es) -* Finnish (fi) -* French (fr) -* Greek (el-gr) -* Icelandic (is) -* Italian (it) -* Kazakh (kk) -* Nepali (ne) -* Dutch (nl) -* Norwegian (no) -* Polish (pl) -* Brazilian Portuguese (pt-br) -* Russian (ru) -* Swedish (sv-se) -* Ukrainian (uk) -* Vietnamese (vi) -* Chinese (zh-cn) +* Catalan (ca_ES) +* Danish (da_DK) +* German (de_DE) +* English (en_GB, en_US) +* Spanish (es_ES, es_MX) +* Finnish (fi_FI) +* French (fr_FR) +* Greek (el_GR) +* Icelandic (is_IS) +* Italian (it_IT) +* Georgian (ka_GE) +* Kazakh (kk_KZ) +* Nepali (ne_NP) +* Dutch (nl_BE, nl_NL) +* Norwegian (no_NO) +* Polish (pl_PL) +* Portuguese (pt_BR) +* Russian (ru_RU) +* Swedish (sv_SE) +* Swahili (sw_CD) +* Ukrainian (uk_UA) +* Vietnamese (vi_VN) +* Chinese (zh_CN) ## Installation @@ -74,6 +75,32 @@ For multi-speaker models, use `--speaker ` to change speakers (default: See `piper --help` for more options. +### JSON Input + +The `piper` executable can accept JSON input when using the `--json-input` flag. Each line of input must be a JSON object with `text` field. For example: + +``` json +{ "text": "First sentence to speak." } +{ "text": "Second sentence to speak." } +``` + +Optional fields include: + +* `speaker` - string + * Name of the speaker to use from `speaker_id_map` in config (multi-speaker voices only) +* `speaker_id` - number + * Id of speaker to use from 0 to number of speakers - 1 (multi-speaker voices only, overrides "speaker") +* `output_file` - string + * Path to output WAV file + +The following example writes two sentences with different speakers to different files: + +``` json +{ "text": "First speaker.", "speaker_id": 0, "output_file": "/tmp/speaker_0.wav" } +{ "text": "Second speaker.", "speaker_id": 1, "output_file": "/tmp/speaker_1.wav" } +``` + + ## People using Piper Piper has been used in the following projects/papers: diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 792b680..b422426 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -5,6 +5,8 @@ project(piper C CXX) find_package(PkgConfig) pkg_check_modules(SPDLOG REQUIRED spdlog) +file(READ "${CMAKE_CURRENT_LIST_DIR}/../../VERSION" piper_version) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -35,3 +37,5 @@ target_include_directories(piper PUBLIC target_compile_options(piper PUBLIC ${SPDLOG_CFLAGS_OTHER}) + +target_compile_definitions(piper PUBLIC _PIPER_VERSION=${piper_version}) diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 892fd40..fa255a0 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -70,9 +70,10 @@ struct RunConfig { // stdin input is lines of JSON instead of text with format: // { - // "text": "...", (required) + // "text": str, (required) // "speaker_id": int, (optional) - // "output_file": "...", (optional) + // "speaker": str, (optional) + // "output_file": str, (optional) // } bool jsonInput = false; }; @@ -454,6 +455,9 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) { runConfig.tashkeelModelPath = filesystem::path(argv[++i]); } else if (arg == "--json_input" || arg == "--json-input") { runConfig.jsonInput = true; + } else if (arg == "--version") { + std::cout << piper::getVersion() << std::endl; + exit(0); } else if (arg == "--debug") { // Set DEBUG logging spdlog::set_level(spdlog::level::debug); diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index ccc2c1a..d83dd3f 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -16,11 +16,24 @@ namespace piper { +#ifdef _PIPER_VERSION +// https://stackoverflow.com/questions/47346133/how-to-use-a-define-inside-a-format-string +#define _STR(x) #x +#define STR(x) _STR(x) +const std::string VERSION = STR(_PIPER_VERSION); +#else +const std::string VERSION = ""; +#endif + // Maximum value for 16-bit signed WAV sample const float MAX_WAV_VALUE = 32767.0f; const std::string instanceName{"piper"}; +std::string getVersion() { + return VERSION; +} + // True if the string is a single UTF-8 codepoint bool isSingleCodepoint(std::string s) { return utf8::distance(s.begin(), s.end()) == 1; diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 0c3175b..9e7c222 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -89,6 +89,9 @@ struct Voice { ModelSession session; }; +// Get version of Piper +std::string getVersion(); + // Must be called before using textTo* functions void initialize(PiperConfig &config); From 2ab5380d1cfb64b228314e37e2ed12807d75f516 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 10 Jul 2023 15:26:52 -0500 Subject: [PATCH 4/8] Fix -f --- .dockerignore | 1 + Dockerfile | 2 +- Makefile | 3 ++- src/cpp/main.cpp | 22 ++++++++++++++-------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/.dockerignore b/.dockerignore index b1e1743..d63fb2a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,5 @@ * +!VERSION !Makefile !src/cpp/ !local/en-us/lessac/low/en-us-lessac-low.onnx diff --git a/Dockerfile b/Dockerfile index 945a337..caf5980 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,7 +39,7 @@ RUN mkdir -p "lib/Linux-$(uname -m)/piper_phonemize" && \ tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - # Build piper binary -COPY Makefile ./ +COPY VERSION Makefile ./ COPY src/cpp/ ./src/cpp/ RUN make diff --git a/Makefile b/Makefile index 505c962..ddde320 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,7 @@ LIB_DIR := lib/Linux-$(shell uname -m) VERSION := $(cat VERSION) +DOCKER_PLATFORM ?= linux/amd64,linux/arm64,linux/arm/v7 piper: mkdir -p build @@ -12,4 +13,4 @@ clean: rm -rf build/ dist/ docker: - docker buildx build . --platform 'linux/amd64,linux/arm64,linux/arm/v7' --output 'type=local,dest=dist' + docker buildx build . --platform '$(DOCKER_PLATFORM)' --output 'type=local,dest=dist' diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index fa255a0..9812805 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -195,7 +195,7 @@ int main(int argc, char *argv[]) { while (getline(cin, line)) { auto outputType = runConfig.outputType; auto speakerId = voice.synthesisConfig.speakerId; - std::optional outputPath; + std::optional maybeOutputPath = runConfig.outputPath; if (runConfig.jsonInput) { // Each line is a JSON object @@ -207,7 +207,7 @@ int main(int argc, char *argv[]) { if (lineRoot.contains("output_file")) { // Override output WAV file path outputType = OUTPUT_FILE; - outputPath = + maybeOutputPath = filesystem::path(lineRoot["output_file"].get()); } @@ -238,14 +238,20 @@ int main(int argc, char *argv[]) { // Generate path using timestamp stringstream outputName; outputName << timestamp << ".wav"; - outputPath = runConfig.outputPath.value(); - outputPath->append(outputName.str()); + filesystem::path outputPath = runConfig.outputPath.value(); + outputPath.append(outputName.str()); // Output audio to automatically-named WAV file in a directory - ofstream audioFile(outputPath->string(), ios::binary); + ofstream audioFile(outputPath.string(), ios::binary); piper::textToWavFile(piperConfig, voice, line, audioFile, result); - cout << outputPath->string() << endl; + cout << outputPath.string() << endl; } else if (outputType == OUTPUT_FILE) { + if (!maybeOutputPath || maybeOutputPath->empty()) { + throw runtime_error("No output path provided"); + } + + filesystem::path outputPath = maybeOutputPath.value(); + if (!runConfig.jsonInput) { // Read all of standard input before synthesizing. // Otherwise, we would overwrite the output file for each line. @@ -259,9 +265,9 @@ int main(int argc, char *argv[]) { } // Output audio to WAV file - ofstream audioFile(outputPath->string(), ios::binary); + ofstream audioFile(outputPath.string(), ios::binary); piper::textToWavFile(piperConfig, voice, line, audioFile, result); - cout << outputPath->string() << endl; + cout << outputPath.string() << endl; } else if (outputType == OUTPUT_STDOUT) { // Output WAV to stdout piper::textToWavFile(piperConfig, voice, line, cout, result); From b0e2b017a94d8bf07a4ae3f172ec9e31c0b6c95d Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Mon, 10 Jul 2023 15:32:19 -0500 Subject: [PATCH 5/8] Update README links --- README.md | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f2002a9..8d150c6 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export Our goal is to support Home Assistant and the [Year of Voice](https://www.home-assistant.io/blog/2022/12/20/year-of-voice/). -[Download voices](https://huggingface.co/rhasspy/piper-voices/tree/main) for the supported languages: +[Download voices](https://huggingface.co/rhasspy/piper-voices/tree/v1.0.0) for the supported languages: * Catalan (ca_ES) * Danish (da_DK) @@ -44,14 +44,21 @@ Our goal is to support Home Assistant and the [Year of Voice](https://www.home-a * Vietnamese (vi_VN) * Chinese (zh_CN) +You will need two files per voice: + +1. A `.onnx` model file, such as [`en_US-lessac-medium.onnx`](https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx) +2. A `.onnx.json` config file, such as [`en_US-lessac-medium.onnx.json`](https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json) + +The `MODEL_CARD` file for each voice contains important licensing information. Piper is intended for text to speech research, and does not impose any additional restrictions on voice models. Some voices may have restrictive licenses, however, so please review them carefully! + ## Installation Download a release: -* [amd64](https://github.com/rhasspy/piper/releases/download/v1.0.0/piper_amd64.tar.gz) (64-bit desktop Linux) -* [arm64](https://github.com/rhasspy/piper/releases/download/v1.0.0/piper_arm64.tar.gz) (64-bit Raspberry Pi 4) -* [armv7](https://github.com/rhasspy/piper/releases/download/v1.0.0/piper_armv7.tar.gz) (32-bit Raspberry Pi 3/4) +* [amd64](https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_amd64.tar.gz) (64-bit desktop Linux) +* [arm64](https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_arm64.tar.gz) (64-bit Raspberry Pi 4) +* [armv7](https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_armv7.tar.gz) (32-bit Raspberry Pi 3/4) If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). You must download and extract [piper-phonemize](https://github.com/rhasspy/piper-phonemize) to `lib/Linux-$(uname -m)/piper_phonemize` before building. @@ -67,7 +74,7 @@ For example: ``` sh echo 'Welcome to the world of speech synthesis!' | \ - ./piper --model en-us-lessac-medium.onnx --output_file welcome.wav + ./piper --model en_US-lessac-medium.onnx --output_file welcome.wav ``` For multi-speaker models, use `--speaker ` to change speakers (default: 0). From c00ffd33340afa417d69760dc2c92c73dacef5c8 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Sat, 22 Jul 2023 14:32:00 -0500 Subject: [PATCH 6/8] Add tr test sentences --- etc/test_sentences/test_tr.jsonl | 5 +++++ etc/test_sentences/tr.txt | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 etc/test_sentences/test_tr.jsonl create mode 100644 etc/test_sentences/tr.txt diff --git a/etc/test_sentences/test_tr.jsonl b/etc/test_sentences/test_tr.jsonl new file mode 100644 index 0000000..43dc5a6 --- /dev/null +++ b/etc/test_sentences/test_tr.jsonl @@ -0,0 +1,5 @@ +{"phoneme_ids":[1,0,64,0,45,0,23,0,122,0,33,0,96,0,14,0,122,0,120,0,79,0,8,0,64,0,37,0,26,0,120,0,61,0,96,0,3,0,79,0,96,0,79,0,26,0,75,0,14,0,92,0,79,0,26,0,120,0,79,0,26,0,3,0,22,0,14,0,122,0,25,0,120,0,100,0,30,0,3,0,17,0,14,0,25,0,75,0,14,0,75,0,14,0,92,0,79,0,26,0,17,0,120,0,14,0,3,0,34,0,18,0,22,0,121,0,14,0,3,0,31,0,120,0,74,0,31,0,3,0,15,0,33,0,75,0,100,0,32,0,75,0,14,0,92,0,79,0,26,0,17,0,120,0,14,0,3,0,22,0,14,0,26,0,31,0,79,0,25,0,14,0,31,0,120,0,79,0,3,0,34,0,61,0,3,0,23,0,79,0,92,0,79,0,75,0,25,0,14,0,31,0,120,0,79,0,22,0,75,0,14,0,3,0,25,0,61,0,22,0,17,0,14,0,26,0,120,0,14,0,3,0,64,0,18,0,24,0,120,0,39,0,26,0,3,0,34,0,61,0,3,0,79,0,96,0,120,0,79,0,23,0,3,0,32,0,14,0,22,0,19,0,120,0,79,0,3,0,30,0,39,0,26,0,23,0,24,0,18,0,92,0,21,0,26,0,120,0,74,0,26,0,3,0,15,0,74,0,30,0,3,0,22,0,120,0,14,0,22,0,3,0,96,0,61,0,23,0,24,0,74,0,26,0,17,0,120,0,61,0,3,0,64,0,45,0,92,0,42,0,26,0,17,0,120,0,42,0,122,0,3,0,25,0,18,0,32,0,18,0,27,0,92,0,27,0,75,0,27,0,108,0,120,0,74,0,23,0,3,0,15,0,74,0,30,0,3,0,27,0,75,0,120,0,14,0,22,0,17,0,79,0,30,0,10,0,2],"phonemes":["ɟ","œ","k","ː","u","ʃ","a","ː","ˈ","ɯ",",","ɟ","y","n","ˈ","ɛ","ʃ"," ","ɯ","ʃ","ɯ","n","ɫ","a","ɾ","ɯ","n","ˈ","ɯ","n"," ","j","a","ː","m","ˈ","ʊ","r"," ","d","a","m","ɫ","a","ɫ","a","ɾ","ɯ","n","d","ˈ","a"," ","v","e","j","ˌ","a"," ","s","ˈ","ɪ","s"," ","b","u","ɫ","ʊ","t","ɫ","a","ɾ","ɯ","n","d","ˈ","a"," ","j","a","n","s","ɯ","m","a","s","ˈ","ɯ"," ","v","ɛ"," ","k","ɯ","ɾ","ɯ","ɫ","m","a","s","ˈ","ɯ","j","ɫ","a"," ","m","ɛ","j","d","a","n","ˈ","a"," ","ɟ","e","l","ˈ","æ","n"," ","v","ɛ"," ","ɯ","ʃ","ˈ","ɯ","k"," ","t","a","j","f","ˈ","ɯ"," ","r","æ","n","k","l","e","ɾ","i","n","ˈ","ɪ","n"," ","b","ɪ","r"," ","j","ˈ","a","j"," ","ʃ","ɛ","k","l","ɪ","n","d","ˈ","ɛ"," ","ɟ","œ","ɾ","ø","n","d","ˈ","ø","ː"," ","m","e","t","e","o","ɾ","o","ɫ","o","ʒ","ˈ","ɪ","k"," ","b","ɪ","r"," ","o","ɫ","ˈ","a","j","d","ɯ","r","."],"processed_text":"Gökkuşağı, güneş ışınlarının yağmur damlalarında veya sis bulutlarında yansıması ve kırılmasıyla meydana gelen ve ışık tayfı renklerinin bir yay şeklinde göründüğü meteorolojik bir olaydır.","text":"Gökkuşağı, güneş ışınlarının yağmur damlalarında veya sis bulutlarında yansıması ve kırılmasıyla meydana gelen ve ışık tayfı renklerinin bir yay şeklinde göründüğü meteorolojik bir olaydır."} +{"phoneme_ids":[1,0,64,0,45,0,23,0,122,0,33,0,96,0,14,0,122,0,79,0,26,0,17,0,14,0,23,0,120,0,74,0,3,0,30,0,39,0,26,0,23,0,24,0,120,0,61,0,30,0,3,0,15,0,74,0,30,0,3,0,31,0,28,0,120,0,61,0,23,0,32,0,30,0,100,0,25,0,3,0,27,0,75,0,100,0,96,0,32,0,33,0,92,0,120,0,100,0,30,0,10,0,2],"phonemes":["ɟ","œ","k","ː","u","ʃ","a","ː","ɯ","n","d","a","k","ˈ","ɪ"," ","r","æ","n","k","l","ˈ","ɛ","r"," ","b","ɪ","r"," ","s","p","ˈ","ɛ","k","t","r","ʊ","m"," ","o","ɫ","ʊ","ʃ","t","u","ɾ","ˈ","ʊ","r","."],"processed_text":"Gökkuşağındaki renkler bir spektrum oluşturur.","text":"Gökkuşağındaki renkler bir spektrum oluşturur."} +{"phoneme_ids":[1,0,32,0,21,0,28,0,120,0,74,0,23,0,3,0,15,0,74,0,30,0,3,0,64,0,45,0,23,0,122,0,33,0,96,0,14,0,122,0,120,0,79,0,3,0,23,0,120,0,79,0,30,0,25,0,79,0,38,0,79,0,8,0,32,0,33,0,92,0,100,0,26,0,17,0,108,0,120,0,100,0,8,0,31,0,14,0,92,0,120,0,79,0,8,0,22,0,18,0,96,0,120,0,74,0,24,0,8,0,25,0,14,0,122,0,34,0,120,0,74,0,8,0,75,0,14,0,17,0,108,0,21,0,34,0,120,0,61,0,30,0,32,0,3,0,34,0,61,0,3,0,25,0,120,0,54,0,30,0,3,0,30,0,39,0,26,0,23,0,24,0,18,0,92,0,74,0,26,0,17,0,120,0,39,0,26,0,3,0,25,0,61,0,22,0,17,0,14,0,26,0,120,0,14,0,3,0,64,0,18,0,24,0,120,0,39,0,26,0,3,0,15,0,74,0,30,0,3,0,30,0,120,0,39,0,26,0,23,0,3,0,31,0,79,0,92,0,14,0,31,0,79,0,26,0,120,0,14,0,3,0,31,0,14,0,20,0,120,0,74,0,28,0,3,0,15,0,74,0,30,0,3,0,34,0,18,0,22,0,121,0,14,0,3,0,17,0,14,0,20,0,120,0,14,0,3,0,19,0,120,0,14,0,38,0,75,0,14,0,3,0,14,0,22,0,26,0,120,0,79,0,3,0,25,0,61,0,30,0,23,0,61,0,38,0,24,0,120,0,74,0,3,0,14,0,30,0,23,0,75,0,14,0,30,0,17,0,120,0,14,0,26,0,3,0,21,0,15,0,14,0,92,0,18,0,32,0,122,0,120,0,74,0,30,0,10,0,2],"phonemes":["t","i","p","ˈ","ɪ","k"," ","b","ɪ","r"," ","ɟ","œ","k","ː","u","ʃ","a","ː","ˈ","ɯ"," ","k","ˈ","ɯ","r","m","ɯ","z","ɯ",",","t","u","ɾ","ʊ","n","d","ʒ","ˈ","ʊ",",","s","a","ɾ","ˈ","ɯ",",","j","e","ʃ","ˈ","ɪ","l",",","m","a","ː","v","ˈ","ɪ",",","ɫ","a","d","ʒ","i","v","ˈ","ɛ","r","t"," ","v","ɛ"," ","m","ˈ","ɔ","r"," ","r","æ","n","k","l","e","ɾ","ɪ","n","d","ˈ","æ","n"," ","m","ɛ","j","d","a","n","ˈ","a"," ","ɟ","e","l","ˈ","æ","n"," ","b","ɪ","r"," ","r","ˈ","æ","n","k"," ","s","ɯ","ɾ","a","s","ɯ","n","ˈ","a"," ","s","a","h","ˈ","ɪ","p"," ","b","ɪ","r"," ","v","e","j","ˌ","a"," ","d","a","h","ˈ","a"," ","f","ˈ","a","z","ɫ","a"," ","a","j","n","ˈ","ɯ"," ","m","ɛ","r","k","ɛ","z","l","ˈ","ɪ"," ","a","r","k","ɫ","a","r","d","ˈ","a","n"," ","i","b","a","ɾ","e","t","ː","ˈ","ɪ","r","."],"processed_text":"Tipik bir gökkuşağı kırmızı, turuncu, sarı, yeşil, mavi, lacivert ve mor renklerinden meydana gelen bir renk sırasına sahip bir veya daha fazla aynı merkezli arklardan ibarettir.","text":"Tipik bir gökkuşağı kırmızı, turuncu, sarı, yeşil, mavi, lacivert ve mor renklerinden meydana gelen bir renk sırasına sahip bir veya daha fazla aynı merkezli arklardan ibarettir."} +{"phoneme_ids":[1,0,28,0,21,0,108,0,120,0,14,0,25,0,14,0,75,0,79,0,3,0,20,0,14,0,31,0,32,0,120,0,14,0,3,0,22,0,120,0,14,0,122,0,79,0,38,0,3,0,96,0,27,0,19,0,45,0,92,0,120,0,61,0,3,0,32,0,96,0,14,0,15,0,33,0,17,0,108,0,120,0,14,0,23,0,3,0,64,0,37,0,34,0,39,0,26,0,17,0,120,0,74,0,10,0,2],"phonemes":["p","i","ʒ","ˈ","a","m","a","ɫ","ɯ"," ","h","a","s","t","ˈ","a"," ","j","ˈ","a","ː","ɯ","z"," ","ʃ","o","f","œ","ɾ","ˈ","ɛ"," ","t","ʃ","a","b","u","d","ʒ","ˈ","a","k"," ","ɟ","y","v","æ","n","d","ˈ","ɪ","."],"processed_text":"Pijamalı hasta yağız şoföre çabucak güvendi.","text":"Pijamalı hasta yağız şoföre çabucak güvendi."} +{"phoneme_ids":[1,0,120,0,45,0,23,0,42,0,38,0,3,0,14,0,108,0,120,0,14,0,26,0,3,0,20,0,120,0,14,0,28,0,31,0,61,0,3,0,17,0,42,0,96,0,32,0,120,0,42,0,3,0,22,0,120,0,14,0,34,0,30,0,100,0,25,0,8,0,27,0,17,0,108,0,14,0,122,0,120,0,79,0,3,0,19,0,120,0,39,0,24,0,32,0,96,0,3,0,64,0,21,0,15,0,120,0,74,0,10,0,2],"phonemes":["ˈ","œ","k","ø","z"," ","a","ʒ","ˈ","a","n"," ","h","ˈ","a","p","s","ɛ"," ","d","ø","ʃ","t","ˈ","ø"," ","j","ˈ","a","v","r","ʊ","m",",","o","d","ʒ","a","ː","ˈ","ɯ"," ","f","ˈ","æ","l","t","ʃ"," ","ɟ","i","b","ˈ","ɪ","."],"processed_text":"Öküz ajan hapse düştü yavrum, ocağı felç gibi.","text":"Öküz ajan hapse düştü yavrum, ocağı felç gibi."} diff --git a/etc/test_sentences/tr.txt b/etc/test_sentences/tr.txt new file mode 100644 index 0000000..5148bbb --- /dev/null +++ b/etc/test_sentences/tr.txt @@ -0,0 +1,5 @@ +Gökkuşağı, güneş ışınlarının yağmur damlalarında veya sis bulutlarında yansıması ve kırılmasıyla meydana gelen ve ışık tayfı renklerinin bir yay şeklinde göründüğü meteorolojik bir olaydır. +Gökkuşağındaki renkler bir spektrum oluşturur. +Tipik bir gökkuşağı kırmızı, turuncu, sarı, yeşil, mavi, lacivert ve mor renklerinden meydana gelen bir renk sırasına sahip bir veya daha fazla aynı merkezli arklardan ibarettir. +Pijamalı hasta yağız şoföre çabucak güvendi. +Öküz ajan hapse düştü yavrum, ocağı felç gibi. From 7eb60332142385a3658b9005ddabc3cb61337514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hubert=20=C5=81=C4=99picki?= Date: Thu, 27 Jul 2023 10:32:55 +0200 Subject: [PATCH 7/8] Fix help screen using outdated --silence_seconds instead of --sentence_silence --- src/cpp/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 9812805..1242b87 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -385,7 +385,7 @@ void printUsage(char *argv[]) { << endl; cerr << " --noise_w NUM phoneme width noise (default: 0.8)" << endl; - cerr << " --silence_seconds NUM seconds of silence after each " + cerr << " --sentence_silence NUM seconds of silence after each " "sentence (default: 0.2)" << endl; cerr << " --espeak_data DIR path to espeak-ng data directory" From a9be4c0314d4e4136917c6777faf68b984cf6c3f Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Thu, 27 Jul 2023 17:00:44 -0500 Subject: [PATCH 8/8] Update Python package --- README.md | 20 +- src/python_run/.gitignore | 3 + src/python_run/MANIFEST.in | 2 + src/python_run/piper/__init__.py | 150 +- src/python_run/piper/__main__.py | 123 +- src/python_run/piper/config.py | 53 + src/python_run/piper/const.py | 5 + src/python_run/piper/download.py | 120 + src/python_run/piper/file_hash.py | 46 + src/python_run/piper/util.py | 12 + src/python_run/piper/voice.py | 177 ++ src/python_run/piper/voices.json | 3782 +++++++++++++++++++++++++++++ src/python_run/requirements.txt | 4 +- src/python_run/setup.py | 47 + 14 files changed, 4368 insertions(+), 176 deletions(-) create mode 100644 src/python_run/.gitignore create mode 100644 src/python_run/MANIFEST.in create mode 100644 src/python_run/piper/config.py create mode 100644 src/python_run/piper/const.py create mode 100755 src/python_run/piper/download.py create mode 100644 src/python_run/piper/file_hash.py create mode 100644 src/python_run/piper/util.py create mode 100644 src/python_run/piper/voice.py create mode 100644 src/python_run/piper/voices.json create mode 100644 src/python_run/setup.py diff --git a/README.md b/README.md index f811a12..f144049 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Piper is used in a [variety of projects](#people-using-piper). ``` sh echo 'Welcome to the world of speech synthesis!' | \ - ./piper --model en-us-blizzard_lessac-medium.onnx --output_file welcome.wav + ./piper --model en_US-lessac-medium.onnx --output_file welcome.wav ``` [Listen to voice samples](https://rhasspy.github.io/piper-samples) and check out a [video tutorial by Thorsten Müller](https://youtu.be/rjq5eZoWWSo) @@ -54,7 +54,7 @@ The `MODEL_CARD` file for each voice contains important licensing information. P ## Installation -Download a release: +You can [run Piper with Python](#running-in-python) or download a binary release: * [amd64](https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_amd64.tar.gz) (64-bit desktop Linux) * [arm64](https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_arm64.tar.gz) (64-bit Raspberry Pi 4) @@ -131,14 +131,22 @@ Pretrained checkpoints are available on [Hugging Face](https://huggingface.co/da See [src/python_run](src/python_run) -Run `scripts/setup.sh` to create a virtual environment and install the requirements. Then run: +Install with `pip`: ``` sh -echo 'Welcome to the world of speech synthesis!' | scripts/piper \ - --model /path/to/voice.onnx \ +pip install piper-tts +``` + +and then run: + +``` sh +echo 'Welcome to the world of speech synthesis!' | piper \ + --model en_US-lessac-medium \ --output_file welcome.wav ``` +This will automatically download [voice files](https://huggingface.co/rhasspy/piper-voices/tree/v1.0.0) the first time they're used. Use `--data-dir` and `--download-dir` to adjust where voices are found/downloaded. + If you'd like to use a GPU, install the `onnxruntime-gpu` package: @@ -146,5 +154,5 @@ If you'd like to use a GPU, install the `onnxruntime-gpu` package: .venv/bin/pip3 install onnxruntime-gpu ``` -and then run `scripts/piper` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). +and then run `piper` with the `--cuda` argument. You will need to have a functioning CUDA environment, such as what's available in [NVIDIA's PyTorch containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch). diff --git a/src/python_run/.gitignore b/src/python_run/.gitignore new file mode 100644 index 0000000..25aacff --- /dev/null +++ b/src/python_run/.gitignore @@ -0,0 +1,3 @@ +build/ +dist/ +*.egg-info/ diff --git a/src/python_run/MANIFEST.in b/src/python_run/MANIFEST.in new file mode 100644 index 0000000..09afd36 --- /dev/null +++ b/src/python_run/MANIFEST.in @@ -0,0 +1,2 @@ +include requirements.txt +include piper/voices.json diff --git a/src/python_run/piper/__init__.py b/src/python_run/piper/__init__.py index 8c521ed..a331d29 100644 --- a/src/python_run/piper/__init__.py +++ b/src/python_run/piper/__init__.py @@ -1,147 +1,5 @@ -import io -import json -import logging -import wave -from dataclasses import dataclass -from pathlib import Path -from typing import List, Mapping, Optional, Sequence, Union +from .voice import PiperVoice -import numpy as np -import onnxruntime -from espeak_phonemizer import Phonemizer - -_LOGGER = logging.getLogger(__name__) - -_BOS = "^" -_EOS = "$" -_PAD = "_" - - -@dataclass -class PiperConfig: - num_symbols: int - num_speakers: int - sample_rate: int - espeak_voice: str - length_scale: float - noise_scale: float - noise_w: float - phoneme_id_map: Mapping[str, Sequence[int]] - - -class Piper: - def __init__( - self, - model_path: Union[str, Path], - config_path: Optional[Union[str, Path]] = None, - use_cuda: bool = False, - ): - if config_path is None: - config_path = f"{model_path}.json" - - self.config = load_config(config_path) - self.phonemizer = Phonemizer(self.config.espeak_voice) - self.model = onnxruntime.InferenceSession( - str(model_path), - sess_options=onnxruntime.SessionOptions(), - providers=["CPUExecutionProvider"] - if not use_cuda - else ["CUDAExecutionProvider"], - ) - - def synthesize( - self, - text: str, - speaker_id: Optional[int] = None, - length_scale: Optional[float] = None, - noise_scale: Optional[float] = None, - noise_w: Optional[float] = None, - ) -> bytes: - """Synthesize WAV audio from text.""" - if length_scale is None: - length_scale = self.config.length_scale - - if noise_scale is None: - noise_scale = self.config.noise_scale - - if noise_w is None: - noise_w = self.config.noise_w - - phonemes_str = self.phonemizer.phonemize(text) - phonemes = [_BOS] + list(phonemes_str) - phoneme_ids: List[int] = [] - - for phoneme in phonemes: - if phoneme in self.config.phoneme_id_map: - phoneme_ids.extend(self.config.phoneme_id_map[phoneme]) - phoneme_ids.extend(self.config.phoneme_id_map[_PAD]) - else: - _LOGGER.warning("No id for phoneme: %s", phoneme) - - phoneme_ids.extend(self.config.phoneme_id_map[_EOS]) - - phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0) - phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64) - scales = np.array( - [noise_scale, length_scale, noise_w], - dtype=np.float32, - ) - - if (self.config.num_speakers > 1) and (speaker_id is None): - # Default speaker - speaker_id = 0 - - sid = None - - if speaker_id is not None: - sid = np.array([speaker_id], dtype=np.int64) - - # Synthesize through Onnx - audio = self.model.run( - None, - { - "input": phoneme_ids_array, - "input_lengths": phoneme_ids_lengths, - "scales": scales, - "sid": sid, - }, - )[0].squeeze((0, 1)) - audio = audio_float_to_int16(audio.squeeze()) - - # Convert to WAV - with io.BytesIO() as wav_io: - wav_file: wave.Wave_write = wave.open(wav_io, "wb") - with wav_file: - wav_file.setframerate(self.config.sample_rate) - wav_file.setsampwidth(2) - wav_file.setnchannels(1) - wav_file.writeframes(audio.tobytes()) - - return wav_io.getvalue() - - -def load_config(config_path: Union[str, Path]) -> PiperConfig: - with open(config_path, "r", encoding="utf-8") as config_file: - config_dict = json.load(config_file) - inference = config_dict.get("inference", {}) - - return PiperConfig( - num_symbols=config_dict["num_symbols"], - num_speakers=config_dict["num_speakers"], - sample_rate=config_dict["audio"]["sample_rate"], - espeak_voice=config_dict["espeak"]["voice"], - noise_scale=inference.get("noise_scale", 0.667), - length_scale=inference.get("length_scale", 1.0), - noise_w=inference.get("noise_w", 0.8), - phoneme_id_map=config_dict["phoneme_id_map"], - ) - - -def audio_float_to_int16( - audio: np.ndarray, max_wav_value: float = 32767.0 -) -> np.ndarray: - """Normalize audio and convert to int16 range""" - audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio)))) - audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value) - audio_norm = audio_norm.astype("int16") - return audio_norm +__all__ = [ + "PiperVoice", +] diff --git a/src/python_run/piper/__main__.py b/src/python_run/piper/__main__.py index a4cadb2..21e3ee2 100644 --- a/src/python_run/piper/__main__.py +++ b/src/python_run/piper/__main__.py @@ -2,10 +2,12 @@ import argparse import logging import sys import time -from functools import partial +import wave from pathlib import Path +from typing import Any, Dict -from . import Piper +from . import PiperVoice +from .download import ensure_voice_exists, find_voice, get_voices _FILE = Path(__file__) _DIR = _FILE.parent @@ -17,33 +19,108 @@ def main() -> None: parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file") parser.add_argument("-c", "--config", help="Path to model config file") parser.add_argument( - "-f", "--output_file", help="Path to output WAV file (default: stdout)" + "-f", + "--output-file", + "--output_file", + help="Path to output WAV file (default: stdout)", ) parser.add_argument( - "-d", "--output_dir", help="Path to output directory (default: cwd)" + "-d", + "--output-dir", + "--output_dir", + help="Path to output directory (default: cwd)", ) + parser.add_argument( + "--output-raw", + "--output_raw", + action="store_true", + help="Stream raw audio to stdout", + ) + # parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)") - parser.add_argument("--noise-scale", type=float, help="Generator noise") - parser.add_argument("--length-scale", type=float, help="Phoneme length") - parser.add_argument("--noise-w", type=float, help="Phoneme width noise") + parser.add_argument( + "--length-scale", "--length_scale", type=float, help="Phoneme length" + ) + parser.add_argument( + "--noise-scale", "--noise_scale", type=float, help="Generator noise" + ) + parser.add_argument( + "--noise-w", "--noise_w", type=float, help="Phoneme width noise" + ) + # parser.add_argument("--cuda", action="store_true", help="Use GPU") # + parser.add_argument( + "--sentence-silence", + "--sentence_silence", + type=float, + default=0.0, + help="Seconds of silence after each sentence", + ) + # + parser.add_argument( + "--data-dir", + "--data_dir", + action="append", + default=[str(Path.cwd())], + help="Data directory to check for downloaded models (default: current directory)", + ) + parser.add_argument( + "--download-dir", + "--download_dir", + help="Directory to download voices into (default: first data dir)", + ) + # parser.add_argument( "--debug", action="store_true", help="Print DEBUG messages to console" ) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) + _LOGGER.debug(args) - voice = Piper(args.model, config_path=args.config, use_cuda=args.cuda) - synthesize = partial( - voice.synthesize, - speaker_id=args.speaker, - length_scale=args.length_scale, - noise_scale=args.noise_scale, - noise_w=args.noise_w, - ) + if not args.download_dir: + # Download to first data directory by default + args.download_dir = args.data_dir[0] - if args.output_dir: + # Download voice if file doesn't exist + model_path = Path(args.model) + if not model_path.exists(): + # Load voice info + voices_info = get_voices() + + # Resolve aliases for backwards compatibility with old voice names + aliases_info: Dict[str, Any] = {} + for voice_info in voices_info.values(): + for voice_alias in voice_info.get("aliases", []): + aliases_info[voice_alias] = {"_is_alias": True, **voice_info} + + voices_info.update(aliases_info) + ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info) + args.model, args.config = find_voice(args.model, args.data_dir) + + # Load voice + voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda) + synthesize_args = { + "speaker_id": args.speaker, + "length_scale": args.length_scale, + "noise_scale": args.noise_scale, + "noise_w": args.noise_w, + "sentence_silence": args.sentence_silence, + } + + if args.output_raw: + # Read line-by-line + for line in sys.stdin: + line = line.strip() + if not line: + continue + + # Write raw audio to stdout as its produced + audio_stream = voice.synthesize_stream_raw(line, **synthesize_args) + for audio_bytes in audio_stream: + sys.stdout.buffer.write(audio_bytes) + sys.stdout.buffer.flush() + elif args.output_dir: output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) @@ -53,21 +130,23 @@ def main() -> None: if not line: continue - wav_bytes = synthesize(line) wav_path = output_dir / f"{time.monotonic_ns()}.wav" - wav_path.write_bytes(wav_bytes) + with wave.open(str(wav_path), "wb") as wav_file: + voice.synthesize(line, wav_file, **synthesize_args) + _LOGGER.info("Wrote %s", wav_path) else: # Read entire input text = sys.stdin.read() - wav_bytes = synthesize(text) if (not args.output_file) or (args.output_file == "-"): # Write to stdout - sys.stdout.buffer.write(wav_bytes) + with wave.open(sys.stdout.buffer, "wb") as wav_file: + voice.synthesize(text, wav_file, **synthesize_args) else: - with open(args.output_file, "wb") as output_file: - output_file.write(wav_bytes) + # Write to file + with wave.open(args.output_file, "wb") as wav_file: + voice.synthesize(text, wav_file, **synthesize_args) if __name__ == "__main__": diff --git a/src/python_run/piper/config.py b/src/python_run/piper/config.py new file mode 100644 index 0000000..afda572 --- /dev/null +++ b/src/python_run/piper/config.py @@ -0,0 +1,53 @@ +"""Piper configuration""" +from dataclasses import dataclass +from enum import Enum +from typing import Any, Dict, Mapping, Sequence + + +class PhonemeType(str, Enum): + ESPEAK = "espeak" + TEXT = "text" + + +@dataclass +class PiperConfig: + """Piper configuration""" + + num_symbols: int + """Number of phonemes""" + + num_speakers: int + """Number of speakers""" + + sample_rate: int + """Sample rate of output audio""" + + espeak_voice: str + """Name of espeak-ng voice or alphabet""" + + length_scale: float + noise_scale: float + noise_w: float + + phoneme_id_map: Mapping[str, Sequence[int]] + """Phoneme -> [id,]""" + + phoneme_type: PhonemeType + """espeak or text""" + + @staticmethod + def from_dict(config: Dict[str, Any]) -> "PiperConfig": + inference = config.get("inference", {}) + + return PiperConfig( + num_symbols=config["num_symbols"], + num_speakers=config["num_speakers"], + sample_rate=config["audio"]["sample_rate"], + noise_scale=inference.get("noise_scale", 0.667), + length_scale=inference.get("length_scale", 1.0), + noise_w=inference.get("noise_w", 0.8), + # + espeak_voice=config["espeak"]["voice"], + phoneme_id_map=config["phoneme_id_map"], + phoneme_type=PhonemeType(config.get("phoneme_type", PhonemeType.ESPEAK)), + ) diff --git a/src/python_run/piper/const.py b/src/python_run/piper/const.py new file mode 100644 index 0000000..a9eadbb --- /dev/null +++ b/src/python_run/piper/const.py @@ -0,0 +1,5 @@ +"""Constants""" + +PAD = "_" # padding (0) +BOS = "^" # beginning of sentence +EOS = "$" # end of sentence diff --git a/src/python_run/piper/download.py b/src/python_run/piper/download.py new file mode 100755 index 0000000..9f59a6a --- /dev/null +++ b/src/python_run/piper/download.py @@ -0,0 +1,120 @@ +"""Utility for downloading Piper voices.""" +import json +import logging +import shutil +from pathlib import Path +from typing import Any, Dict, Iterable, Set, Tuple, Union +from urllib.request import urlopen + +from .file_hash import get_file_hash + +URL_FORMAT = "https://huggingface.co/rhasspy/piper-voices/resolve/v1.0.0/{file}" + +_DIR = Path(__file__).parent +_LOGGER = logging.getLogger(__name__) + +_SKIP_FILES = {"MODEL_CARD"} + + +class VoiceNotFoundError(Exception): + pass + + +def get_voices() -> Dict[str, Any]: + """Loads available voices from embedded JSON file.""" + with open(_DIR / "voices.json", "r", encoding="utf-8") as voices_file: + return json.load(voices_file) + + +def ensure_voice_exists( + name: str, + data_dirs: Iterable[Union[str, Path]], + download_dir: Union[str, Path], + voices_info: Dict[str, Any], +): + assert data_dirs, "No data dirs" + if name not in voices_info: + raise VoiceNotFoundError(name) + + voice_info = voices_info[name] + voice_files = voice_info["files"] + files_to_download: Set[str] = set() + + for data_dir in data_dirs: + data_dir = Path(data_dir) + + # Check sizes/hashes + for file_path, file_info in voice_files.items(): + if file_path in files_to_download: + # Already planning to download + continue + + file_name = Path(file_path).name + if file_name in _SKIP_FILES: + continue + + data_file_path = data_dir / file_name + _LOGGER.debug("Checking %s", data_file_path) + if not data_file_path.exists(): + _LOGGER.debug("Missing %s", data_file_path) + files_to_download.add(file_path) + continue + + expected_size = file_info["size_bytes"] + actual_size = data_file_path.stat().st_size + if expected_size != actual_size: + _LOGGER.warning( + "Wrong size (expected=%s, actual=%s) for %s", + expected_size, + actual_size, + data_file_path, + ) + files_to_download.add(file_path) + continue + + expected_hash = file_info["md5_digest"] + actual_hash = get_file_hash(data_file_path) + if expected_hash != actual_hash: + _LOGGER.warning( + "Wrong hash (expected=%s, actual=%s) for %s", + expected_hash, + actual_hash, + data_file_path, + ) + files_to_download.add(file_path) + continue + + if (not voice_files) and (not files_to_download): + raise ValueError(f"Unable to find or download voice: {name}") + + # Download missing files + download_dir = Path(download_dir) + + for file_path in files_to_download: + file_name = Path(file_path).name + if file_name in _SKIP_FILES: + continue + + file_url = URL_FORMAT.format(file=file_path) + download_file_path = download_dir / file_name + download_file_path.parent.mkdir(parents=True, exist_ok=True) + + _LOGGER.debug("Downloading %s to %s", file_url, download_file_path) + with urlopen(file_url) as response, open( + download_file_path, "wb" + ) as download_file: + shutil.copyfileobj(response, download_file) + + _LOGGER.info("Downloaded %s (%s)", download_file_path, file_url) + + +def find_voice(name: str, data_dirs: Iterable[Union[str, Path]]) -> Tuple[Path, Path]: + for data_dir in data_dirs: + data_dir = Path(data_dir) + onnx_path = data_dir / f"{name}.onnx" + config_path = data_dir / f"{name}.onnx.json" + + if onnx_path.exists() and config_path.exists(): + return onnx_path, config_path + + raise ValueError(f"Missing files for voice {name}") diff --git a/src/python_run/piper/file_hash.py b/src/python_run/piper/file_hash.py new file mode 100644 index 0000000..b54587a --- /dev/null +++ b/src/python_run/piper/file_hash.py @@ -0,0 +1,46 @@ +import argparse +import hashlib +import json +import sys +from pathlib import Path +from typing import Union + + +def get_file_hash(path: Union[str, Path], bytes_per_chunk: int = 8192) -> str: + """Hash a file in chunks using md5.""" + path_hash = hashlib.md5() + with open(path, "rb") as path_file: + chunk = path_file.read(bytes_per_chunk) + while chunk: + path_hash.update(chunk) + chunk = path_file.read(bytes_per_chunk) + + return path_hash.hexdigest() + + +# ----------------------------------------------------------------------------- + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("file", nargs="+") + parser.add_argument("--dir", help="Parent directory") + args = parser.parse_args() + + if args.dir: + args.dir = Path(args.dir) + + hashes = {} + for path_str in args.file: + path = Path(path_str) + path_hash = get_file_hash(path) + if args.dir: + path = path.relative_to(args.dir) + + hashes[str(path)] = path_hash + + json.dump(hashes, sys.stdout) + + +if __name__ == "__main__": + main() diff --git a/src/python_run/piper/util.py b/src/python_run/piper/util.py new file mode 100644 index 0000000..ad94930 --- /dev/null +++ b/src/python_run/piper/util.py @@ -0,0 +1,12 @@ +"""Utilities""" +import numpy as np + + +def audio_float_to_int16( + audio: np.ndarray, max_wav_value: float = 32767.0 +) -> np.ndarray: + """Normalize audio and convert to int16 range""" + audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio)))) + audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value) + audio_norm = audio_norm.astype("int16") + return audio_norm diff --git a/src/python_run/piper/voice.py b/src/python_run/piper/voice.py new file mode 100644 index 0000000..02a2ddd --- /dev/null +++ b/src/python_run/piper/voice.py @@ -0,0 +1,177 @@ +import json +import logging +import wave +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable, List, Optional, Union + +import numpy as np +import onnxruntime +from piper_phonemize import phonemize_codepoints, phonemize_espeak, tashkeel_run + +from .config import PhonemeType, PiperConfig +from .const import BOS, EOS, PAD +from .util import audio_float_to_int16 + +_LOGGER = logging.getLogger(__name__) + + +@dataclass +class PiperVoice: + session: onnxruntime.InferenceSession + config: PiperConfig + + @staticmethod + def load( + model_path: Union[str, Path], + config_path: Optional[Union[str, Path]] = None, + use_cuda: bool = False, + ) -> "PiperVoice": + """Load an ONNX model and config.""" + if config_path is None: + config_path = f"{model_path}.json" + + with open(config_path, "r", encoding="utf-8") as config_file: + config_dict = json.load(config_file) + + return PiperVoice( + config=PiperConfig.from_dict(config_dict), + session=onnxruntime.InferenceSession( + str(model_path), + sess_options=onnxruntime.SessionOptions(), + providers=["CPUExecutionProvider"] + if not use_cuda + else ["CUDAExecutionProvider"], + ), + ) + + def phonemize(self, text: str) -> List[List[str]]: + """Text to phonemes grouped by sentence.""" + if self.config.phoneme_type == PhonemeType.ESPEAK: + if self.config.espeak_voice == "ar": + # Arabic diacritization + # https://github.com/mush42/libtashkeel/ + text = tashkeel_run(text) + + return phonemize_espeak(text, self.config.espeak_voice) + + if self.config.phoneme_type == PhonemeType.TEXT: + return phonemize_codepoints(text) + + raise ValueError(f"Unexpected phoneme type: {self.config.phoneme_type}") + + def phonemes_to_ids(self, phonemes: List[str]) -> List[int]: + """Phonemes to ids.""" + id_map = self.config.phoneme_id_map + ids: List[int] = list(id_map[BOS]) + + for phoneme in phonemes: + if phoneme not in id_map: + _LOGGER.warning("Missing phoneme from id map: %s", phoneme) + continue + + ids.extend(id_map[phoneme]) + ids.extend(id_map[PAD]) + + ids.extend(id_map[EOS]) + + return ids + + def synthesize( + self, + text: str, + wav_file: wave.Wave_write, + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, + sentence_silence: float = 0.0, + ): + """Synthesize WAV audio from text.""" + wav_file.setframerate(self.config.sample_rate) + wav_file.setsampwidth(2) # 16-bit + wav_file.setnchannels(1) # mono + + for audio_bytes in self.synthesize_stream_raw( + text, + speaker_id=speaker_id, + length_scale=length_scale, + noise_scale=noise_scale, + noise_w=noise_w, + sentence_silence=sentence_silence, + ): + wav_file.writeframes(audio_bytes) + + def synthesize_stream_raw( + self, + text: str, + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, + sentence_silence: float = 0.0, + ) -> Iterable[bytes]: + """Synthesize raw audio per sentence from text.""" + sentence_phonemes = self.phonemize(text) + + # 16-bit mono + num_silence_samples = int(sentence_silence * self.config.sample_rate) + silence_bytes = bytes(num_silence_samples * 2) + + for phonemes in sentence_phonemes: + phoneme_ids = self.phonemes_to_ids(phonemes) + yield self.synthesize_ids_to_raw( + phoneme_ids, + speaker_id=speaker_id, + length_scale=length_scale, + noise_scale=noise_scale, + noise_w=noise_w, + ) + silence_bytes + + def synthesize_ids_to_raw( + self, + phoneme_ids: List[int], + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, + ) -> bytes: + """Synthesize raw audio from phoneme ids.""" + if length_scale is None: + length_scale = self.config.length_scale + + if noise_scale is None: + noise_scale = self.config.noise_scale + + if noise_w is None: + noise_w = self.config.noise_w + + phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0) + phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64) + scales = np.array( + [noise_scale, length_scale, noise_w], + dtype=np.float32, + ) + + if (self.config.num_speakers > 1) and (speaker_id is None): + # Default speaker + speaker_id = 0 + + sid = None + + if speaker_id is not None: + sid = np.array([speaker_id], dtype=np.int64) + + # Synthesize through Onnx + audio = self.session.run( + None, + { + "input": phoneme_ids_array, + "input_lengths": phoneme_ids_lengths, + "scales": scales, + "sid": sid, + }, + )[0].squeeze((0, 1)) + audio = audio_float_to_int16(audio.squeeze()) + + return audio.tobytes() diff --git a/src/python_run/piper/voices.json b/src/python_run/piper/voices.json new file mode 100644 index 0000000..08d1908 --- /dev/null +++ b/src/python_run/piper/voices.json @@ -0,0 +1,3782 @@ +{ + "zh_CN-huayan-x_low": { + "key": "zh_CN-huayan-x_low", + "name": "huayan", + "language": { + "code": "zh_CN", + "family": "zh", + "region": "CN", + "name_native": "简体中文", + "name_english": "Chinese", + "country_english": "China" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "2b96570db6becd09814a608c8d14a64f" + }, + "zh/zh_CN/huayan/x_low/zh_CN-huayan-x_low.onnx.json": { + "size_bytes": 5954, + "md5_digest": "9270af91d9b44a35b75cb6ce70dac93e" + }, + "zh/zh_CN/huayan/x_low/MODEL_CARD": { + "size_bytes": 237, + "md5_digest": "715587a977945498c5741b74eb81a1fd" + } + }, + "aliases": [ + "zh-cn-huayan-x-low" + ] + }, + "zh_CN-huayan-medium": { + "key": "zh_CN-huayan-medium", + "name": "huayan", + "language": { + "code": "zh_CN", + "family": "zh", + "region": "CN", + "name_native": "简体中文", + "name_english": "Chinese", + "country_english": "China" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "40cdb7930ff91b81574d5f0489e076ea" + }, + "zh/zh_CN/huayan/medium/zh_CN-huayan-medium.onnx.json": { + "size_bytes": 6922, + "md5_digest": "f1c77a621e220d545042a4c8c2eaae64" + }, + "zh/zh_CN/huayan/medium/MODEL_CARD": { + "size_bytes": 276, + "md5_digest": "b23255ace0cda4c2e02134d8a70c2e03" + } + }, + "aliases": [] + }, + "ru_RU-irina-medium": { + "key": "ru_RU-irina-medium", + "name": "irina", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "21fbe77fdc68bdc35d7adb6bf4f52199" + }, + "ru/ru_RU/irina/medium/ru_RU-irina-medium.onnx.json": { + "size_bytes": 6846, + "md5_digest": "b915de8ef7523207d131bba462be11f3" + }, + "ru/ru_RU/irina/medium/MODEL_CARD": { + "size_bytes": 271, + "md5_digest": "397e67453b4ea5a95642673d0debb5ba" + } + }, + "aliases": [ + "ru-irinia-medium" + ] + }, + "ru_RU-dmitri-medium": { + "key": "ru_RU-dmitri-medium", + "name": "dmitri", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "589ccc91745a1e2353508ff62c5941b7" + }, + "ru/ru_RU/dmitri/medium/ru_RU-dmitri-medium.onnx.json": { + "size_bytes": 6921, + "md5_digest": "d9eff56c1b8441273bf83441e51ca06e" + }, + "ru/ru_RU/dmitri/medium/MODEL_CARD": { + "size_bytes": 276, + "md5_digest": "c19f9eff768d0c0e1f476a4c6ca1ff1e" + } + }, + "aliases": [] + }, + "ru_RU-denis-medium": { + "key": "ru_RU-denis-medium", + "name": "denis", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "76c2f14e521fef3ed574f97ad492728e" + }, + "ru/ru_RU/denis/medium/ru_RU-denis-medium.onnx.json": { + "size_bytes": 6920, + "md5_digest": "cdecd72b59bbd7b22f406d91e0680beb" + }, + "ru/ru_RU/denis/medium/MODEL_CARD": { + "size_bytes": 275, + "md5_digest": "6fe09e0e097e4538809cc420653974e4" + } + }, + "aliases": [] + }, + "ru_RU-ruslan-medium": { + "key": "ru_RU-ruslan-medium", + "name": "ruslan", + "language": { + "code": "ru_RU", + "family": "ru", + "region": "RU", + "name_native": "Русский", + "name_english": "Russian", + "country_english": "Russia" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "731eb188e63b4c57320e38047ba2d850" + }, + "ru/ru_RU/ruslan/medium/ru_RU-ruslan-medium.onnx.json": { + "size_bytes": 7007, + "md5_digest": "b5b2edc5be5ce94e32ccb5239d18c012" + }, + "ru/ru_RU/ruslan/medium/MODEL_CARD": { + "size_bytes": 313, + "md5_digest": "7b50a255192cc1c44358d7cb20ddbb5c" + } + }, + "aliases": [] + }, + "nl_BE-nathalie-x_low": { + "key": "nl_BE-nathalie-x_low", + "name": "nathalie", + "language": { + "code": "nl_BE", + "family": "nl", + "region": "BE", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Belgium" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_BE/nathalie/x_low/nl_BE-nathalie-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "4a00803b60caecad30ea612bcd9f9344" + }, + "nl/nl_BE/nathalie/x_low/nl_BE-nathalie-x_low.onnx.json": { + "size_bytes": 5955, + "md5_digest": "9370d6cec73eaa3006303b490983119a" + }, + "nl/nl_BE/nathalie/x_low/MODEL_CARD": { + "size_bytes": 246, + "md5_digest": "5df62094bde427374223f91f44476392" + } + }, + "aliases": [ + "nl-nathalie-x-low" + ] + }, + "nl_BE-nathalie-medium": { + "key": "nl_BE-nathalie-medium", + "name": "nathalie", + "language": { + "code": "nl_BE", + "family": "nl", + "region": "BE", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Belgium" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "ab0c38b5f66764b59ad9e3e98b1c2172" + }, + "nl/nl_BE/nathalie/medium/nl_BE-nathalie-medium.onnx.json": { + "size_bytes": 7009, + "md5_digest": "ed7e4ab955d9514f4274ad93cafe0618" + }, + "nl/nl_BE/nathalie/medium/MODEL_CARD": { + "size_bytes": 284, + "md5_digest": "ff335f87ca41a3f89180781498e02635" + } + }, + "aliases": [] + }, + "nl_BE-rdh-x_low": { + "key": "nl_BE-rdh-x_low", + "name": "rdh", + "language": { + "code": "nl_BE", + "family": "nl", + "region": "BE", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Belgium" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_BE/rdh/x_low/nl_BE-rdh-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "7d60d0de9ad9ec11a1d293665743afda" + }, + "nl/nl_BE/rdh/x_low/nl_BE-rdh-x_low.onnx.json": { + "size_bytes": 5950, + "md5_digest": "44fc96689545fb39bce58301d2b1d5d7" + }, + "nl/nl_BE/rdh/x_low/MODEL_CARD": { + "size_bytes": 242, + "md5_digest": "6d0157bcd5ff281717e663d56dab980e" + } + }, + "aliases": [ + "nl-rdh-x-low" + ] + }, + "nl_BE-rdh-medium": { + "key": "nl_BE-rdh-medium", + "name": "rdh", + "language": { + "code": "nl_BE", + "family": "nl", + "region": "BE", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Belgium" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_BE/rdh/medium/nl_BE-rdh-medium.onnx": { + "size_bytes": 63104526, + "md5_digest": "33d3469d745677ec4d7e96eb4145b09e" + }, + "nl/nl_BE/rdh/medium/nl_BE-rdh-medium.onnx.json": { + "size_bytes": 5951, + "md5_digest": "652cc22303033a5ddc3dd12e595cd83e" + }, + "nl/nl_BE/rdh/medium/MODEL_CARD": { + "size_bytes": 244, + "md5_digest": "dc4487b06fcef6ff270c852ce12947b9" + } + }, + "aliases": [ + "nl-rdh-medium" + ] + }, + "nl_NL-mls_7432-low": { + "key": "nl_NL-mls_7432-low", + "name": "mls_7432", + "language": { + "code": "nl_NL", + "family": "nl", + "region": "NL", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Netherlands" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_NL/mls_7432/low/nl_NL-mls_7432-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "044b69d583e191203997761434607273" + }, + "nl/nl_NL/mls_7432/low/nl_NL-mls_7432-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "242366c4042cd3df8c4f8727e4171010" + }, + "nl/nl_NL/mls_7432/low/MODEL_CARD": { + "size_bytes": 260, + "md5_digest": "5d8ee8e955f077fc99cac61191d00892" + } + }, + "aliases": [ + "nl-mls_7432-low" + ] + }, + "nl_NL-mls_5809-low": { + "key": "nl_NL-mls_5809-low", + "name": "mls_5809", + "language": { + "code": "nl_NL", + "family": "nl", + "region": "NL", + "name_native": "Nederlands", + "name_english": "Dutch", + "country_english": "Netherlands" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "nl/nl_NL/mls_5809/low/nl_NL-mls_5809-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "e69130a776b04c9962a1fefb4878d7d9" + }, + "nl/nl_NL/mls_5809/low/nl_NL-mls_5809-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "f829e4463305e56d3c4dfeb25d3f9232" + }, + "nl/nl_NL/mls_5809/low/MODEL_CARD": { + "size_bytes": 261, + "md5_digest": "ac4b35e581cea8418909947a29a671bb" + } + }, + "aliases": [ + "nl-mls_5809-low" + ] + }, + "it_IT-riccardo-x_low": { + "key": "it_IT-riccardo-x_low", + "name": "riccardo", + "language": { + "code": "it_IT", + "family": "it", + "region": "IT", + "name_native": "Italiano", + "name_english": "Italian", + "country_english": "Italy" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "it/it_IT/riccardo/x_low/it_IT-riccardo-x_low.onnx": { + "size_bytes": 28130791, + "md5_digest": "2c564b67f6bfaf3ad02d28ab528929b8" + }, + "it/it_IT/riccardo/x_low/it_IT-riccardo-x_low.onnx.json": { + "size_bytes": 5955, + "md5_digest": "110a150990a72d9533677b914cb028ce" + }, + "it/it_IT/riccardo/x_low/MODEL_CARD": { + "size_bytes": 260, + "md5_digest": "3e70f29ab998ac0380edc0cec7395e80" + } + }, + "aliases": [ + "it-riccardo_fasol-x-low" + ] + }, + "ne_NP-google-x_low": { + "key": "ne_NP-google-x_low", + "name": "google", + "language": { + "code": "ne_NP", + "family": "ne", + "region": "NP", + "name_native": "नेपाली", + "name_english": "Nepali", + "country_english": "Nepal" + }, + "quality": "x_low", + "num_speakers": 18, + "speaker_id_map": { + "0546": 0, + "3614": 1, + "2099": 2, + "3960": 3, + "6834": 4, + "7957": 5, + "6329": 6, + "9407": 7, + "6587": 8, + "0258": 9, + "2139": 10, + "5687": 11, + "0283": 12, + "3997": 13, + "3154": 14, + "0883": 15, + "2027": 16, + "0649": 17 + }, + "files": { + "ne/ne_NP/google/x_low/ne_NP-google-x_low.onnx": { + "size_bytes": 27693157, + "md5_digest": "b11030daccc781a7db64c9413197ca8a" + }, + "ne/ne_NP/google/x_low/ne_NP-google-x_low.onnx.json": { + "size_bytes": 6308, + "md5_digest": "908b3b9ef643863b9f4cb26562c01caf" + }, + "ne/ne_NP/google/x_low/MODEL_CARD": { + "size_bytes": 244, + "md5_digest": "5ea405c002a69df5961c8d43cadbb844" + } + }, + "aliases": [ + "ne-google-x-low" + ] + }, + "ne_NP-google-medium": { + "key": "ne_NP-google-medium", + "name": "google", + "language": { + "code": "ne_NP", + "family": "ne", + "region": "NP", + "name_native": "नेपाली", + "name_english": "Nepali", + "country_english": "Nepal" + }, + "quality": "medium", + "num_speakers": 18, + "speaker_id_map": { + "0546": 0, + "3614": 1, + "2099": 2, + "3960": 3, + "6834": 4, + "7957": 5, + "6329": 6, + "9407": 7, + "6587": 8, + "0258": 9, + "2139": 10, + "5687": 11, + "0283": 12, + "3997": 13, + "3154": 14, + "0883": 15, + "2027": 16, + "0649": 17 + }, + "files": { + "ne/ne_NP/google/medium/ne_NP-google-medium.onnx": { + "size_bytes": 76766385, + "md5_digest": "2c24ccfe18eca2f14bccd0a188516109" + }, + "ne/ne_NP/google/medium/ne_NP-google-medium.onnx.json": { + "size_bytes": 7362, + "md5_digest": "8a28e58fead8c687caa0db23ba5d602e" + }, + "ne/ne_NP/google/medium/MODEL_CARD": { + "size_bytes": 283, + "md5_digest": "afe022ba061870d0c9fe085fe9a9f31f" + } + }, + "aliases": [ + "ne-google-medium" + ] + }, + "el_GR-rapunzelina-low": { + "key": "el_GR-rapunzelina-low", + "name": "rapunzelina", + "language": { + "code": "el_GR", + "family": "el", + "region": "GR", + "name_native": "Ελληνικά", + "name_english": "Greek", + "country_english": "Greece" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "el/el_GR/rapunzelina/low/el_GR-rapunzelina-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "04e0151b653bb64540b1cde027054140" + }, + "el/el_GR/rapunzelina/low/el_GR-rapunzelina-low.onnx.json": { + "size_bytes": 5956, + "md5_digest": "7db5de1c165aec2c8ff9b9dd63f09f71" + }, + "el/el_GR/rapunzelina/low/MODEL_CARD": { + "size_bytes": 303, + "md5_digest": "c75270b41e7bf60dacd351753a483574" + } + }, + "aliases": [] + }, + "kk_KZ-issai-high": { + "key": "kk_KZ-issai-high", + "name": "issai", + "language": { + "code": "kk_KZ", + "family": "kk", + "region": "KZ", + "name_native": "қазақша", + "name_english": "Kazakh", + "country_english": "Kazakhstan" + }, + "quality": "high", + "num_speakers": 6, + "speaker_id_map": { + "ISSAI_KazakhTTS2_M2": 0, + "ISSAI_KazakhTTS_M1_Iseke": 1, + "ISSAI_KazakhTTS2_F3": 2, + "ISSAI_KazakhTTS_F1_Raya": 3, + "ISSAI_KazakhTTS2_F1": 4, + "ISSAI_KazakhTTS2_F2": 5 + }, + "files": { + "kk/kk_KZ/issai/high/kk_KZ-issai-high.onnx": { + "size_bytes": 127864258, + "md5_digest": "d5a97c25feb0949c187ae5f8e72753e3" + }, + "kk/kk_KZ/issai/high/kk_KZ-issai-high.onnx.json": { + "size_bytes": 6168, + "md5_digest": "13aaa087c24754c9e7aa14f1e223765a" + }, + "kk/kk_KZ/issai/high/MODEL_CARD": { + "size_bytes": 237, + "md5_digest": "30487d1011336ed15feabd156424cbd9" + } + }, + "aliases": [ + "kk-issai-high" + ] + }, + "kk_KZ-raya-x_low": { + "key": "kk_KZ-raya-x_low", + "name": "raya", + "language": { + "code": "kk_KZ", + "family": "kk", + "region": "KZ", + "name_native": "қазақша", + "name_english": "Kazakh", + "country_english": "Kazakhstan" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "kk/kk_KZ/raya/x_low/kk_KZ-raya-x_low.onnx": { + "size_bytes": 28130791, + "md5_digest": "476ecc32e07cad26572a50f26d0ebe28" + }, + "kk/kk_KZ/raya/x_low/kk_KZ-raya-x_low.onnx.json": { + "size_bytes": 5951, + "md5_digest": "459f40a8f5842a170793fcbf4a1130e6" + }, + "kk/kk_KZ/raya/x_low/MODEL_CARD": { + "size_bytes": 238, + "md5_digest": "fb34d2e65fac42f4d6e003d3d30c897e" + } + }, + "aliases": [ + "kk-raya-x-low" + ] + }, + "kk_KZ-iseke-x_low": { + "key": "kk_KZ-iseke-x_low", + "name": "iseke", + "language": { + "code": "kk_KZ", + "family": "kk", + "region": "KZ", + "name_native": "қазақша", + "name_english": "Kazakh", + "country_english": "Kazakhstan" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "kk/kk_KZ/iseke/x_low/kk_KZ-iseke-x_low.onnx": { + "size_bytes": 28130791, + "md5_digest": "1674f3f4ce48981d77e500741afa4ff9" + }, + "kk/kk_KZ/iseke/x_low/kk_KZ-iseke-x_low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "780d3d67bea7f29fac0dcdcd74ef3e54" + }, + "kk/kk_KZ/iseke/x_low/MODEL_CARD": { + "size_bytes": 239, + "md5_digest": "fce637093c4437a1f929280913a86aa5" + } + }, + "aliases": [ + "kk-iseke-x-low" + ] + }, + "pt_BR-faber-medium": { + "key": "pt_BR-faber-medium", + "name": "faber", + "language": { + "code": "pt_BR", + "family": "pt", + "region": "BR", + "name_native": "Português", + "name_english": "Portuguese", + "country_english": "Brazil" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "pt/pt_BR/faber/medium/pt_BR-faber-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "e0724a2f07965f6523d2a1e96b488a4c" + }, + "pt/pt_BR/faber/medium/pt_BR-faber-medium.onnx.json": { + "size_bytes": 6969, + "md5_digest": "cbd8c19032f775db27d563b3bc3df7c6" + }, + "pt/pt_BR/faber/medium/MODEL_CARD": { + "size_bytes": 278, + "md5_digest": "a81a3840b1749cf34b0e31de1577ef47" + } + }, + "aliases": [] + }, + "pt_BR-edresson-low": { + "key": "pt_BR-edresson-low", + "name": "edresson", + "language": { + "code": "pt_BR", + "family": "pt", + "region": "BR", + "name_native": "Português", + "name_english": "Portuguese", + "country_english": "Brazil" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "pt/pt_BR/edresson/low/pt_BR-edresson-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "53e365c040dd07890fe1855b64c7cc58" + }, + "pt/pt_BR/edresson/low/pt_BR-edresson-low.onnx.json": { + "size_bytes": 5956, + "md5_digest": "83421e3628c25e62fb0ea046b939f926" + }, + "pt/pt_BR/edresson/low/MODEL_CARD": { + "size_bytes": 283, + "md5_digest": "62cde47b9a3214109e601f90eeadea11" + } + }, + "aliases": [ + "pt-br-edresson-low" + ] + }, + "de_DE-eva_k-x_low": { + "key": "de_DE-eva_k-x_low", + "name": "eva_k", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/eva_k/x_low/de_DE-eva_k-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "51bfc52a58282c2e4fc01ae66567a708" + }, + "de/de_DE/eva_k/x_low/de_DE-eva_k-x_low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "2593d0fab71795b7ba76832579c1185a" + }, + "de/de_DE/eva_k/x_low/MODEL_CARD": { + "size_bytes": 246, + "md5_digest": "02b01f3d47b2798ece347b2c7e94c9e9" + } + }, + "aliases": [ + "de-eva_k-x-low" + ] + }, + "de_DE-pavoque-low": { + "key": "de_DE-pavoque-low", + "name": "pavoque", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/pavoque/low/de_DE-pavoque-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "bc37dccbad87fd65c8501c412c0c31ca" + }, + "de/de_DE/pavoque/low/de_DE-pavoque-low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "9dbe88264431af0ea20fa4ca4fe03091" + }, + "de/de_DE/pavoque/low/MODEL_CARD": { + "size_bytes": 309, + "md5_digest": "e0aacaf7b834938c4e3ad1fb3f68ef87" + } + }, + "aliases": [ + "de-pavoque-low" + ] + }, + "de_DE-kerstin-low": { + "key": "de_DE-kerstin-low", + "name": "kerstin", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/kerstin/low/de_DE-kerstin-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "1d5e5788cfddb04cbb34418f2841931e" + }, + "de/de_DE/kerstin/low/de_DE-kerstin-low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "952af66f912007b6ec6b065b94072447" + }, + "de/de_DE/kerstin/low/MODEL_CARD": { + "size_bytes": 272, + "md5_digest": "69ec1bc99fc7e19c9ddcdf712920a6c7" + } + }, + "aliases": [ + "de-kerstin-low" + ] + }, + "de_DE-thorsten-low": { + "key": "de_DE-thorsten-low", + "name": "thorsten", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/thorsten/low/de_DE-thorsten-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "c06eb96aceb61895fcb09ffc30eef60b" + }, + "de/de_DE/thorsten/low/de_DE-thorsten-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "a6d7c8de4b562e9631ef3f262756e51b" + }, + "de/de_DE/thorsten/low/MODEL_CARD": { + "size_bytes": 274, + "md5_digest": "203f58b93f0372564e745f1e05ea47bb" + } + }, + "aliases": [ + "de-thorsten-low" + ] + }, + "de_DE-thorsten-medium": { + "key": "de_DE-thorsten-medium", + "name": "thorsten", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/thorsten/medium/de_DE-thorsten-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "a129b00fb3078df43c96bab6c94535c0" + }, + "de/de_DE/thorsten/medium/de_DE-thorsten-medium.onnx.json": { + "size_bytes": 6923, + "md5_digest": "0ba02428e952e6556157df235b740e93" + }, + "de/de_DE/thorsten/medium/MODEL_CARD": { + "size_bytes": 285, + "md5_digest": "e84cf8b09957fccceb068a3c1664d0f3" + } + }, + "aliases": [] + }, + "de_DE-ramona-low": { + "key": "de_DE-ramona-low", + "name": "ramona", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/ramona/low/de_DE-ramona-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "b4aaf3673170a0d96519cdc992c23fda" + }, + "de/de_DE/ramona/low/de_DE-ramona-low.onnx.json": { + "size_bytes": 5951, + "md5_digest": "b86770e4875dc512fc4fe53ebf3492bc" + }, + "de/de_DE/ramona/low/MODEL_CARD": { + "size_bytes": 255, + "md5_digest": "c970992423b5fc7a26340a9363e15952" + } + }, + "aliases": [ + "de-ramona-low" + ] + }, + "de_DE-karlsson-low": { + "key": "de_DE-karlsson-low", + "name": "karlsson", + "language": { + "code": "de_DE", + "family": "de", + "region": "DE", + "name_native": "Deutsch", + "name_english": "German", + "country_english": "Germany" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "de/de_DE/karlsson/low/de_DE-karlsson-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "c94b5b8e8c7147b4b2c4a19ca5a3c41b" + }, + "de/de_DE/karlsson/low/de_DE-karlsson-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "09bb5d214e30fe91427097634070b191" + }, + "de/de_DE/karlsson/low/MODEL_CARD": { + "size_bytes": 289, + "md5_digest": "6e2f3eec10cf7fceb0b68b67eccd06a4" + } + }, + "aliases": [ + "de-karlsson-low" + ] + }, + "da_DK-talesyntese-medium": { + "key": "da_DK-talesyntese-medium", + "name": "talesyntese", + "language": { + "code": "da_DK", + "family": "da", + "region": "DK", + "name_native": "Dansk", + "name_english": "Danish", + "country_english": "Denmark" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "da/da_DK/talesyntese/medium/da_DK-talesyntese-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "9c05494a3e0c1136337581e01222395d" + }, + "da/da_DK/talesyntese/medium/da_DK-talesyntese-medium.onnx.json": { + "size_bytes": 7012, + "md5_digest": "4cfe905fcf8fdb0f84d6705a3dc7844b" + }, + "da/da_DK/talesyntese/medium/MODEL_CARD": { + "size_bytes": 308, + "md5_digest": "628cc03fca8f5d2c454824d6252955ad" + } + }, + "aliases": [ + "da-nst_talesyntese-medium" + ] + }, + "is_IS-ugla-medium": { + "key": "is_IS-ugla-medium", + "name": "ugla", + "language": { + "code": "is_IS", + "family": "is", + "region": "IS", + "name_native": "íslenska", + "name_english": "Icelandic", + "country_english": "Iceland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "is/is_IS/ugla/medium/is_IS-ugla-medium.onnx": { + "size_bytes": 76495465, + "md5_digest": "722fcea3546f0113ad6664290aa97cab" + }, + "is/is_IS/ugla/medium/is_IS-ugla-medium.onnx.json": { + "size_bytes": 5952, + "md5_digest": "13be98e291aada712bf0ca47d805209f" + }, + "is/is_IS/ugla/medium/MODEL_CARD": { + "size_bytes": 240, + "md5_digest": "a3ba0a35bc26d440ee3b0872e435fcd5" + } + }, + "aliases": [ + "is-ugla-medium" + ] + }, + "is_IS-salka-medium": { + "key": "is_IS-salka-medium", + "name": "salka", + "language": { + "code": "is_IS", + "family": "is", + "region": "IS", + "name_native": "íslenska", + "name_english": "Icelandic", + "country_english": "Iceland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "is/is_IS/salka/medium/is_IS-salka-medium.onnx": { + "size_bytes": 76495465, + "md5_digest": "5967c9456b931d6123687d7b78fd81a7" + }, + "is/is_IS/salka/medium/is_IS-salka-medium.onnx.json": { + "size_bytes": 5953, + "md5_digest": "f6424072a68ed4d57c210682cf1a33a1" + }, + "is/is_IS/salka/medium/MODEL_CARD": { + "size_bytes": 241, + "md5_digest": "0f3d286069e4c7bead9b40ece3bbefe6" + } + }, + "aliases": [ + "is-salka-medium" + ] + }, + "is_IS-steinn-medium": { + "key": "is_IS-steinn-medium", + "name": "steinn", + "language": { + "code": "is_IS", + "family": "is", + "region": "IS", + "name_native": "íslenska", + "name_english": "Icelandic", + "country_english": "Iceland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "is/is_IS/steinn/medium/is_IS-steinn-medium.onnx": { + "size_bytes": 76495465, + "md5_digest": "fd8189eb0a72e78d525e70a71aaa792c" + }, + "is/is_IS/steinn/medium/is_IS-steinn-medium.onnx.json": { + "size_bytes": 5954, + "md5_digest": "4e777952d5cdbfe6a074fba17555e714" + }, + "is/is_IS/steinn/medium/MODEL_CARD": { + "size_bytes": 242, + "md5_digest": "45ab46f37e5a6bdf739d58496752e6a0" + } + }, + "aliases": [ + "is-steinn-medium" + ] + }, + "is_IS-bui-medium": { + "key": "is_IS-bui-medium", + "name": "bui", + "language": { + "code": "is_IS", + "family": "is", + "region": "IS", + "name_native": "íslenska", + "name_english": "Icelandic", + "country_english": "Iceland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "is/is_IS/bui/medium/is_IS-bui-medium.onnx": { + "size_bytes": 76495465, + "md5_digest": "08332bb41a67b52a3361bd1e8e36fb10" + }, + "is/is_IS/bui/medium/is_IS-bui-medium.onnx.json": { + "size_bytes": 5951, + "md5_digest": "ff4231eeba2c144bf8466d0adb9671db" + }, + "is/is_IS/bui/medium/MODEL_CARD": { + "size_bytes": 246, + "md5_digest": "a055aad199d8cc58e52913ff2af461d8" + } + }, + "aliases": [ + "is-bui-medium" + ] + }, + "vi_VN-vivos-x_low": { + "key": "vi_VN-vivos-x_low", + "name": "vivos", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "quality": "x_low", + "num_speakers": 65, + "speaker_id_map": { + "VIVOSSPK13": 0, + "VIVOSSPK14": 1, + "VIVOSSPK15": 2, + "VIVOSSPK16": 3, + "VIVOSSPK17": 4, + "VIVOSSPK18": 5, + "VIVOSSPK19": 6, + "VIVOSSPK20": 7, + "VIVOSSPK21": 8, + "VIVOSSPK22": 9, + "VIVOSSPK26": 10, + "VIVOSSPK34": 11, + "VIVOSSPK40": 12, + "VIVOSSPK41": 13, + "VIVOSSPK42": 14, + "VIVOSSPK43": 15, + "VIVOSSPK44": 16, + "VIVOSSPK45": 17, + "VIVOSSPK46": 18, + "VIVOSSPK38": 19, + "VIVOSSPK31": 20, + "VIVOSSPK35": 21, + "VIVOSSPK01": 22, + "VIVOSSPK02": 23, + "VIVOSSPK03": 24, + "VIVOSSPK04": 25, + "VIVOSSPK05": 26, + "VIVOSSPK06": 27, + "VIVOSSPK07": 28, + "VIVOSSPK08": 29, + "VIVOSSPK09": 30, + "VIVOSSPK10": 31, + "VIVOSSPK11": 32, + "VIVOSSPK12": 33, + "VIVOSSPK27": 34, + "VIVOSSPK36": 35, + "VIVOSSPK33": 36, + "VIVOSSPK32": 37, + "VIVOSSPK29": 38, + "VIVOSSPK39": 39, + "VIVOSSPK25": 40, + "VIVOSSPK28": 41, + "VIVOSSPK30": 42, + "VIVOSSPK37": 43, + "VIVOSSPK23": 44, + "VIVOSSPK24": 45, + "VIVOSDEV02": 46, + "VIVOSDEV03": 47, + "VIVOSDEV01": 48, + "VIVOSDEV04": 49, + "VIVOSDEV05": 50, + "VIVOSDEV06": 51, + "VIVOSDEV07": 52, + "VIVOSDEV08": 53, + "VIVOSDEV09": 54, + "VIVOSDEV10": 55, + "VIVOSDEV11": 56, + "VIVOSDEV12": 57, + "VIVOSDEV13": 58, + "VIVOSDEV14": 59, + "VIVOSDEV15": 60, + "VIVOSDEV16": 61, + "VIVOSDEV17": 62, + "VIVOSDEV18": 63, + "VIVOSDEV19": 64 + }, + "files": { + "vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx": { + "size_bytes": 27789413, + "md5_digest": "d5880d32e340f57489dcb9d4f1f7aa04" + }, + "vi/vi_VN/vivos/x_low/vi_VN-vivos-x_low.onnx.json": { + "size_bytes": 7637, + "md5_digest": "85cf0a21c7de5c37bad0439bbcc6223e" + }, + "vi/vi_VN/vivos/x_low/MODEL_CARD": { + "size_bytes": 272, + "md5_digest": "6bd1265a94a8f6bcce74a5b1145a7f95" + } + }, + "aliases": [ + "vi-vivos-x-low" + ] + }, + "vi_VN-25hours_single-low": { + "key": "vi_VN-25hours_single-low", + "name": "25hours_single", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "54ff8fb35b0084336377ddd10717e1fa" + }, + "vi/vi_VN/25hours_single/low/vi_VN-25hours_single-low.onnx.json": { + "size_bytes": 5959, + "md5_digest": "38c5cd03543f3a76a81ae28915b9eee0" + }, + "vi/vi_VN/25hours_single/low/MODEL_CARD": { + "size_bytes": 343, + "md5_digest": "25eb4744418cd7b8da0a9096dcfa6e61" + } + }, + "aliases": [ + "vi-25hours-single-low" + ] + }, + "vi_VN-vais1000-medium": { + "key": "vi_VN-vais1000-medium", + "name": "vais1000", + "language": { + "code": "vi_VN", + "family": "vi", + "region": "VN", + "name_native": "Tiếng Việt", + "name_english": "Vietnamese", + "country_english": "Vietnam" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "5e42428c4f6131f75557cf156c9c1526" + }, + "vi/vi_VN/vais1000/medium/vi_VN-vais1000-medium.onnx.json": { + "size_bytes": 6979, + "md5_digest": "5fd6b9a5116abc0ceeccef1140a222b5" + }, + "vi/vi_VN/vais1000/medium/MODEL_CARD": { + "size_bytes": 361, + "md5_digest": "1beeecba9042e5925b0c5fbd138c779d" + } + }, + "aliases": [] + }, + "sv_SE-nst-medium": { + "key": "sv_SE-nst-medium", + "name": "nst", + "language": { + "code": "sv_SE", + "family": "sv", + "region": "SE", + "name_native": "Svenska", + "name_english": "Swedish", + "country_english": "Sweden" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx": { + "size_bytes": 63104526, + "md5_digest": "20266cf58e93ca2140444b77398aea04" + }, + "sv/sv_SE/nst/medium/sv_SE-nst-medium.onnx.json": { + "size_bytes": 5951, + "md5_digest": "a370e4dbc4acb86dfab4a43c28939b3c" + }, + "sv/sv_SE/nst/medium/MODEL_CARD": { + "size_bytes": 306, + "md5_digest": "4a7cdb8f218a909b2b5e81d1903628da" + } + }, + "aliases": [] + }, + "ca_ES-upc_ona-x_low": { + "key": "ca_ES-upc_ona-x_low", + "name": "upc_ona", + "language": { + "code": "ca_ES", + "family": "ca", + "region": "ES", + "name_native": "Català", + "name_english": "Catalan", + "country_english": "Spain" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ca/ca_ES/upc_ona/x_low/ca_ES-upc_ona-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "ca22734cd8c5b01dd1fefbb42067ab06" + }, + "ca/ca_ES/upc_ona/x_low/ca_ES-upc_ona-x_low.onnx.json": { + "size_bytes": 5954, + "md5_digest": "27a280606d09c37017df9c268c70360e" + }, + "ca/ca_ES/upc_ona/x_low/MODEL_CARD": { + "size_bytes": 258, + "md5_digest": "1f555643ff6f7d9133679d730f3f6016" + } + }, + "aliases": [ + "ca-upc_ona-x-low" + ] + }, + "ca_ES-upc_ona-medium": { + "key": "ca_ES-upc_ona-medium", + "name": "upc_ona", + "language": { + "code": "ca_ES", + "family": "ca", + "region": "ES", + "name_native": "Català", + "name_english": "Catalan", + "country_english": "Spain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ca/ca_ES/upc_ona/medium/ca_ES-upc_ona-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "58ff3b049b6b721a4c353a551ec5ef3a" + }, + "ca/ca_ES/upc_ona/medium/ca_ES-upc_ona-medium.onnx.json": { + "size_bytes": 7008, + "md5_digest": "2340f71d7b29683f746db6e03c0dfabd" + }, + "ca/ca_ES/upc_ona/medium/MODEL_CARD": { + "size_bytes": 296, + "md5_digest": "395c782a56632400f46e7c442c7718bb" + } + }, + "aliases": [] + }, + "ca_ES-upc_pau-x_low": { + "key": "ca_ES-upc_pau-x_low", + "name": "upc_pau", + "language": { + "code": "ca_ES", + "family": "ca", + "region": "ES", + "name_native": "Català", + "name_english": "Catalan", + "country_english": "Spain" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ca/ca_ES/upc_pau/x_low/ca_ES-upc_pau-x_low.onnx": { + "size_bytes": 28130791, + "md5_digest": "504e8a643d5284fbfc95e9e392288b86" + }, + "ca/ca_ES/upc_pau/x_low/ca_ES-upc_pau-x_low.onnx.json": { + "size_bytes": 5954, + "md5_digest": "2006cd9e35ac60338234fa86edf165a5" + }, + "ca/ca_ES/upc_pau/x_low/MODEL_CARD": { + "size_bytes": 258, + "md5_digest": "4ff8699c4439c9f49180457f0becc49e" + } + }, + "aliases": [ + "ca-upc_pau-x-low" + ] + }, + "en_GB-southern_english_female-low": { + "key": "en_GB-southern_english_female-low", + "name": "southern_english_female", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/southern_english_female/low/en_GB-southern_english_female-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "596c7ed4d8488cf64e027765dce2dad1" + }, + "en/en_GB/southern_english_female/low/en_GB-southern_english_female-low.onnx.json": { + "size_bytes": 5976, + "md5_digest": "6ae00287de2a78aa1661f75ae9e7bce6" + }, + "en/en_GB/southern_english_female/low/MODEL_CARD": { + "size_bytes": 296, + "md5_digest": "77ac998c8b37842ef98594567f141629" + } + }, + "aliases": [ + "en-gb-southern_english_female-low" + ] + }, + "en_GB-northern_english_male-medium": { + "key": "en_GB-northern_english_male-medium", + "name": "northern_english_male", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/northern_english_male/medium/en_GB-northern_english_male-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "4c9a9735bfb76ad67c8b31b23d6840a0" + }, + "en/en_GB/northern_english_male/medium/en_GB-northern_english_male-medium.onnx.json": { + "size_bytes": 6944, + "md5_digest": "cfbc79946b49f6a57ebcc85feb55aad8" + }, + "en/en_GB/northern_english_male/medium/MODEL_CARD": { + "size_bytes": 305, + "md5_digest": "8d1b725154c658ead4f068389c319c82" + } + }, + "aliases": [] + }, + "en_GB-alba-medium": { + "key": "en_GB-alba-medium", + "name": "alba", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/alba/medium/en_GB-alba-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "c07f313752bb3aba8061041666251654" + }, + "en/en_GB/alba/medium/en_GB-alba-medium.onnx.json": { + "size_bytes": 7013, + "md5_digest": "58b58c2541079e5ae693f93ffd15becf" + }, + "en/en_GB/alba/medium/MODEL_CARD": { + "size_bytes": 324, + "md5_digest": "d5a8716acb311b20e0f28710d0fcc982" + } + }, + "aliases": [] + }, + "en_GB-vctk-medium": { + "key": "en_GB-vctk-medium", + "name": "vctk", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 109, + "speaker_id_map": { + "p239": 0, + "p236": 1, + "p264": 2, + "p250": 3, + "p259": 4, + "p247": 5, + "p261": 6, + "p263": 7, + "p283": 8, + "p286": 9, + "p274": 10, + "p276": 11, + "p270": 12, + "p281": 13, + "p277": 14, + "p231": 15, + "p271": 16, + "p238": 17, + "p257": 18, + "p273": 19, + "p284": 20, + "p329": 21, + "p361": 22, + "p287": 23, + "p360": 24, + "p374": 25, + "p376": 26, + "p310": 27, + "p304": 28, + "p334": 29, + "p340": 30, + "p323": 31, + "p347": 32, + "p330": 33, + "p308": 34, + "p314": 35, + "p317": 36, + "p339": 37, + "p311": 38, + "p294": 39, + "p305": 40, + "p266": 41, + "p335": 42, + "p318": 43, + "p351": 44, + "p333": 45, + "p313": 46, + "p316": 47, + "p244": 48, + "p307": 49, + "p363": 50, + "p336": 51, + "p297": 52, + "p312": 53, + "p267": 54, + "p275": 55, + "p295": 56, + "p258": 57, + "p288": 58, + "p301": 59, + "p232": 60, + "p292": 61, + "p272": 62, + "p280": 63, + "p278": 64, + "p341": 65, + "p268": 66, + "p298": 67, + "p299": 68, + "p279": 69, + "p285": 70, + "p326": 71, + "p300": 72, + "s5": 73, + "p230": 74, + "p345": 75, + "p254": 76, + "p269": 77, + "p293": 78, + "p252": 79, + "p262": 80, + "p243": 81, + "p227": 82, + "p343": 83, + "p255": 84, + "p229": 85, + "p240": 86, + "p248": 87, + "p253": 88, + "p233": 89, + "p228": 90, + "p282": 91, + "p251": 92, + "p246": 93, + "p234": 94, + "p226": 95, + "p260": 96, + "p245": 97, + "p241": 98, + "p303": 99, + "p265": 100, + "p306": 101, + "p237": 102, + "p249": 103, + "p256": 104, + "p302": 105, + "p364": 106, + "p225": 107, + "p362": 108 + }, + "files": { + "en/en_GB/vctk/medium/en_GB-vctk-medium.onnx": { + "size_bytes": 76952753, + "md5_digest": "573025290fdc68812543b7438ace0c29" + }, + "en/en_GB/vctk/medium/en_GB-vctk-medium.onnx.json": { + "size_bytes": 9200, + "md5_digest": "0eca51508c23837212e7c35d51c4a1fb" + }, + "en/en_GB/vctk/medium/MODEL_CARD": { + "size_bytes": 326, + "md5_digest": "b88a963e3bee27bc4fff84563f1be388" + } + }, + "aliases": [] + }, + "en_GB-alan-low": { + "key": "en_GB-alan-low", + "name": "alan", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/alan/low/en_GB-alan-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "2acae8c79395ab109a7572f0afa61fff" + }, + "en/en_GB/alan/low/en_GB-alan-low.onnx.json": { + "size_bytes": 5957, + "md5_digest": "739bed9864cc4e6c691384de77b15814" + }, + "en/en_GB/alan/low/MODEL_CARD": { + "size_bytes": 309, + "md5_digest": "b116c3cbdebac99ade9af03807cb9301" + } + }, + "aliases": [ + "en-gb-alan-low" + ] + }, + "en_GB-alan-medium": { + "key": "en_GB-alan-medium", + "name": "alan", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/alan/medium/en_GB-alan-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "8f6b35eeb8ef6269021c6cb6d2414c9b" + }, + "en/en_GB/alan/medium/en_GB-alan-medium.onnx.json": { + "size_bytes": 7013, + "md5_digest": "8927af81e8b16650fb7c9593464daa6e" + }, + "en/en_GB/alan/medium/MODEL_CARD": { + "size_bytes": 320, + "md5_digest": "24a2232470ca1be071debf53c984666e" + } + }, + "aliases": [] + }, + "en_GB-aru-medium": { + "key": "en_GB-aru-medium", + "name": "aru", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 12, + "speaker_id_map": { + "03": 0, + "06": 1, + "10": 2, + "01": 3, + "09": 4, + "08": 5, + "11": 6, + "05": 7, + "12": 8, + "02": 9, + "07": 10, + "04": 11 + }, + "files": { + "en/en_GB/aru/medium/en_GB-aru-medium.onnx": { + "size_bytes": 76754097, + "md5_digest": "7862d75539b8ef867e7c04e772d323ea" + }, + "en/en_GB/aru/medium/en_GB-aru-medium.onnx.json": { + "size_bytes": 7223, + "md5_digest": "00b26fa5bfc67a6a11845b1f39c33ad1" + }, + "en/en_GB/aru/medium/MODEL_CARD": { + "size_bytes": 368, + "md5_digest": "09496f38078e0eefe220a497b7b70631" + } + }, + "aliases": [] + }, + "en_GB-jenny_dioco-medium": { + "key": "en_GB-jenny_dioco-medium", + "name": "jenny_dioco", + "language": { + "code": "en_GB", + "family": "en", + "region": "GB", + "name_native": "English", + "name_english": "English", + "country_english": "Great Britain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "d08f2f7edf0c858275a7eca74ff2a9e4" + }, + "en/en_GB/jenny_dioco/medium/en_GB-jenny_dioco-medium.onnx.json": { + "size_bytes": 7020, + "md5_digest": "5338cad16dcd408d2b0a247cf163abac" + }, + "en/en_GB/jenny_dioco/medium/MODEL_CARD": { + "size_bytes": 298, + "md5_digest": "ff351d05502764d5b4a074e0648e9434" + } + }, + "aliases": [] + }, + "en_US-kusal-medium": { + "key": "en_US-kusal-medium", + "name": "kusal", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/kusal/medium/en_US-kusal-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "95334de7385a03c5c9de25b920c33492" + }, + "en/en_US/kusal/medium/en_US-kusal-medium.onnx.json": { + "size_bytes": 7009, + "md5_digest": "68bf8ca4d72b9f2aedfba64af13f847d" + }, + "en/en_US/kusal/medium/MODEL_CARD": { + "size_bytes": 279, + "md5_digest": "b627e950e8e10a1ec7b30e5f9b312a05" + } + }, + "aliases": [] + }, + "en_US-libritts-high": { + "key": "en_US-libritts-high", + "name": "libritts", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "high", + "num_speakers": 904, + "speaker_id_map": { + "p3922": 0, + "p8699": 1, + "p4535": 2, + "p6701": 3, + "p3638": 4, + "p922": 5, + "p2531": 6, + "p1638": 7, + "p8848": 8, + "p6544": 9, + "p3615": 10, + "p318": 11, + "p6104": 12, + "p1382": 13, + "p5400": 14, + "p5712": 15, + "p2769": 16, + "p2573": 17, + "p1463": 18, + "p6458": 19, + "p3274": 20, + "p4356": 21, + "p8498": 22, + "p5570": 23, + "p176": 24, + "p339": 25, + "p28": 26, + "p5909": 27, + "p3869": 28, + "p4899": 29, + "p64": 30, + "p3368": 31, + "p3307": 32, + "p5618": 33, + "p3370": 34, + "p7704": 35, + "p8506": 36, + "p8410": 37, + "p6904": 38, + "p5655": 39, + "p2204": 40, + "p501": 41, + "p7314": 42, + "p1027": 43, + "p5054": 44, + "p534": 45, + "p2853": 46, + "p5935": 47, + "p2404": 48, + "p7874": 49, + "p816": 50, + "p2053": 51, + "p8066": 52, + "p16": 53, + "p4586": 54, + "p1923": 55, + "p2592": 56, + "p1265": 57, + "p6189": 58, + "p100": 59, + "p6371": 60, + "p4957": 61, + "p4116": 62, + "p3003": 63, + "p7739": 64, + "p1752": 65, + "p5717": 66, + "p5012": 67, + "p5062": 68, + "p7481": 69, + "p4595": 70, + "p2299": 71, + "p7188": 72, + "p93": 73, + "p4145": 74, + "p8684": 75, + "p7594": 76, + "p2598": 77, + "p3540": 78, + "p7717": 79, + "p6426": 80, + "p4148": 81, + "p335": 82, + "p1379": 83, + "p2512": 84, + "p242": 85, + "p8855": 86, + "p8118": 87, + "p369": 88, + "p6575": 89, + "p6694": 90, + "p8080": 91, + "p1283": 92, + "p7434": 93, + "p5290": 94, + "p1731": 95, + "p2401": 96, + "p459": 97, + "p192": 98, + "p7910": 99, + "p114": 100, + "p5660": 101, + "p1313": 102, + "p203": 103, + "p7460": 104, + "p207": 105, + "p6497": 106, + "p6696": 107, + "p7766": 108, + "p6233": 109, + "p3185": 110, + "p2010": 111, + "p2056": 112, + "p3717": 113, + "p5802": 114, + "p5622": 115, + "p2156": 116, + "p4243": 117, + "p1422": 118, + "p5039": 119, + "p4110": 120, + "p1093": 121, + "p1776": 122, + "p7995": 123, + "p6877": 124, + "p5635": 125, + "p54": 126, + "p288": 127, + "p4592": 128, + "p7276": 129, + "p688": 130, + "p8388": 131, + "p8152": 132, + "p8194": 133, + "p7000": 134, + "p8527": 135, + "p5126": 136, + "p3923": 137, + "p1054": 138, + "p3927": 139, + "p5029": 140, + "p4098": 141, + "p1789": 142, + "p56": 143, + "p7240": 144, + "p5538": 145, + "p1903": 146, + "p6538": 147, + "p3380": 148, + "p6643": 149, + "p7495": 150, + "p8718": 151, + "p8050": 152, + "p126": 153, + "p7245": 154, + "p2517": 155, + "p4438": 156, + "p4945": 157, + "p7145": 158, + "p724": 159, + "p9022": 160, + "p6637": 161, + "p6927": 162, + "p6937": 163, + "p8113": 164, + "p5724": 165, + "p6006": 166, + "p3584": 167, + "p2971": 168, + "p2230": 169, + "p7982": 170, + "p1649": 171, + "p3994": 172, + "p7720": 173, + "p6981": 174, + "p781": 175, + "p4973": 176, + "p6206": 177, + "p2481": 178, + "p3157": 179, + "p1509": 180, + "p510": 181, + "p7540": 182, + "p8887": 183, + "p7120": 184, + "p2882": 185, + "p7128": 186, + "p8142": 187, + "p7229": 188, + "p2787": 189, + "p8820": 190, + "p2368": 191, + "p4331": 192, + "p4967": 193, + "p4427": 194, + "p6054": 195, + "p3728": 196, + "p274": 197, + "p7134": 198, + "p1603": 199, + "p1383": 200, + "p1165": 201, + "p4363": 202, + "p512": 203, + "p5985": 204, + "p7967": 205, + "p2060": 206, + "p7752": 207, + "p7484": 208, + "p8643": 209, + "p3549": 210, + "p5731": 211, + "p7881": 212, + "p667": 213, + "p6828": 214, + "p5740": 215, + "p3483": 216, + "p718": 217, + "p6341": 218, + "p1913": 219, + "p3228": 220, + "p7247": 221, + "p7705": 222, + "p1018": 223, + "p8193": 224, + "p6098": 225, + "p3989": 226, + "p7828": 227, + "p5876": 228, + "p7754": 229, + "p4719": 230, + "p8011": 231, + "p7939": 232, + "p5975": 233, + "p2004": 234, + "p6139": 235, + "p8183": 236, + "p3482": 237, + "p3361": 238, + "p4289": 239, + "p231": 240, + "p7789": 241, + "p4598": 242, + "p5239": 243, + "p2638": 244, + "p6300": 245, + "p8474": 246, + "p2194": 247, + "p7832": 248, + "p1079": 249, + "p1335": 250, + "p188": 251, + "p1195": 252, + "p5914": 253, + "p1401": 254, + "p7318": 255, + "p5448": 256, + "p1392": 257, + "p3703": 258, + "p2113": 259, + "p7783": 260, + "p8176": 261, + "p6519": 262, + "p7933": 263, + "p7938": 264, + "p7802": 265, + "p6120": 266, + "p224": 267, + "p209": 268, + "p5656": 269, + "p3032": 270, + "p6965": 271, + "p258": 272, + "p4837": 273, + "p5489": 274, + "p272": 275, + "p3851": 276, + "p7140": 277, + "p2562": 278, + "p1472": 279, + "p79": 280, + "p2775": 281, + "p3046": 282, + "p2532": 283, + "p8266": 284, + "p6099": 285, + "p4425": 286, + "p5293": 287, + "p7981": 288, + "p2045": 289, + "p920": 290, + "p511": 291, + "p7416": 292, + "p835": 293, + "p1289": 294, + "p8195": 295, + "p7833": 296, + "p8772": 297, + "p968": 298, + "p1641": 299, + "p7117": 300, + "p1678": 301, + "p5809": 302, + "p8028": 303, + "p500": 304, + "p6505": 305, + "p7868": 306, + "p14": 307, + "p2238": 308, + "p4744": 309, + "p3733": 310, + "p7515": 311, + "p699": 312, + "p5093": 313, + "p6388": 314, + "p7959": 315, + "p98": 316, + "p3914": 317, + "p5246": 318, + "p2570": 319, + "p8396": 320, + "p3513": 321, + "p882": 322, + "p7994": 323, + "p5968": 324, + "p8591": 325, + "p806": 326, + "p5261": 327, + "p1271": 328, + "p899": 329, + "p3945": 330, + "p8404": 331, + "p249": 332, + "p3008": 333, + "p7139": 334, + "p6395": 335, + "p6215": 336, + "p6080": 337, + "p4054": 338, + "p7825": 339, + "p6683": 340, + "p8725": 341, + "p3230": 342, + "p4138": 343, + "p6160": 344, + "p666": 345, + "p6510": 346, + "p3551": 347, + "p8075": 348, + "p225": 349, + "p7169": 350, + "p1851": 351, + "p5984": 352, + "p2960": 353, + "p8329": 354, + "p175": 355, + "p6378": 356, + "p480": 357, + "p7538": 358, + "p479": 359, + "p5519": 360, + "p8534": 361, + "p4856": 362, + "p101": 363, + "p3521": 364, + "p2256": 365, + "p3083": 366, + "p4278": 367, + "p8713": 368, + "p1226": 369, + "p4222": 370, + "p8494": 371, + "p8776": 372, + "p731": 373, + "p6574": 374, + "p5319": 375, + "p8605": 376, + "p5583": 377, + "p6406": 378, + "p4064": 379, + "p4806": 380, + "p3972": 381, + "p7383": 382, + "p5133": 383, + "p597": 384, + "p1025": 385, + "p7313": 386, + "p5304": 387, + "p8758": 388, + "p1050": 389, + "p6499": 390, + "p6956": 391, + "p770": 392, + "p4108": 393, + "p2774": 394, + "p3864": 395, + "p4490": 396, + "p4848": 397, + "p1826": 398, + "p6294": 399, + "p7949": 400, + "p1446": 401, + "p7867": 402, + "p8163": 403, + "p953": 404, + "p8138": 405, + "p353": 406, + "p7553": 407, + "p8825": 408, + "p5189": 409, + "p2012": 410, + "p948": 411, + "p205": 412, + "p1535": 413, + "p8008": 414, + "p1112": 415, + "p7926": 416, + "p4039": 417, + "p716": 418, + "p3967": 419, + "p7932": 420, + "p7525": 421, + "p7316": 422, + "p3448": 423, + "p2393": 424, + "p6788": 425, + "p6550": 426, + "p7011": 427, + "p8791": 428, + "p8119": 429, + "p1777": 430, + "p6014": 431, + "p1046": 432, + "p6269": 433, + "p6188": 434, + "p5266": 435, + "p3490": 436, + "p8786": 437, + "p8824": 438, + "p589": 439, + "p576": 440, + "p1121": 441, + "p1806": 442, + "p7294": 443, + "p3119": 444, + "p2688": 445, + "p1012": 446, + "p4807": 447, + "p7498": 448, + "p3905": 449, + "p7384": 450, + "p2992": 451, + "p30": 452, + "p497": 453, + "p227": 454, + "p4226": 455, + "p5007": 456, + "p1066": 457, + "p8222": 458, + "p7688": 459, + "p6865": 460, + "p6286": 461, + "p8225": 462, + "p3224": 463, + "p8635": 464, + "p1348": 465, + "p3645": 466, + "p1961": 467, + "p8190": 468, + "p6032": 469, + "p7286": 470, + "p5389": 471, + "p3105": 472, + "p1028": 473, + "p6038": 474, + "p764": 475, + "p7437": 476, + "p6555": 477, + "p8875": 478, + "p2074": 479, + "p7809": 480, + "p2240": 481, + "p2827": 482, + "p5386": 483, + "p6763": 484, + "p3009": 485, + "p6339": 486, + "p1825": 487, + "p7569": 488, + "p359": 489, + "p7956": 490, + "p2137": 491, + "p8677": 492, + "p4434": 493, + "p329": 494, + "p3289": 495, + "p4290": 496, + "p2999": 497, + "p2427": 498, + "p637": 499, + "p2229": 500, + "p1874": 501, + "p3446": 502, + "p9023": 503, + "p3114": 504, + "p6235": 505, + "p4860": 506, + "p4519": 507, + "p561": 508, + "p70": 509, + "p4800": 510, + "p2294": 511, + "p6115": 512, + "p2582": 513, + "p8464": 514, + "p5139": 515, + "p6918": 516, + "p337": 517, + "p5810": 518, + "p8401": 519, + "p303": 520, + "p5206": 521, + "p2589": 522, + "p7061": 523, + "p2269": 524, + "p2758": 525, + "p3389": 526, + "p4629": 527, + "p707": 528, + "p5606": 529, + "p1513": 530, + "p2473": 531, + "p664": 532, + "p5092": 533, + "p5154": 534, + "p6288": 535, + "p6308": 536, + "p4731": 537, + "p3328": 538, + "p7816": 539, + "p3221": 540, + "p8687": 541, + "p7030": 542, + "p476": 543, + "p4257": 544, + "p5918": 545, + "p6317": 546, + "p204": 547, + "p8006": 548, + "p6895": 549, + "p1264": 550, + "p2494": 551, + "p112": 552, + "p1859": 553, + "p398": 554, + "p1052": 555, + "p3294": 556, + "p1460": 557, + "p8573": 558, + "p5684": 559, + "p8421": 560, + "p5883": 561, + "p7297": 562, + "p246": 563, + "p8057": 564, + "p3835": 565, + "p1748": 566, + "p3816": 567, + "p3357": 568, + "p1053": 569, + "p409": 570, + "p868": 571, + "p3118": 572, + "p7520": 573, + "p6686": 574, + "p1241": 575, + "p5190": 576, + "p166": 577, + "p1482": 578, + "p5604": 579, + "p1212": 580, + "p2741": 581, + "p1259": 582, + "p984": 583, + "p6492": 584, + "p6167": 585, + "p296": 586, + "p6567": 587, + "p6924": 588, + "p2272": 589, + "p7085": 590, + "p345": 591, + "p2388": 592, + "p1705": 593, + "p1343": 594, + "p7241": 595, + "p451": 596, + "p5401": 597, + "p6446": 598, + "p612": 599, + "p594": 600, + "p7555": 601, + "p7069": 602, + "p2577": 603, + "p5333": 604, + "p8742": 605, + "p6727": 606, + "p1571": 607, + "p4734": 608, + "p7258": 609, + "p3977": 610, + "p373": 611, + "p5723": 612, + "p1365": 613, + "p7285": 614, + "p580": 615, + "p836": 616, + "p6782": 617, + "p3654": 618, + "p1974": 619, + "p6258": 620, + "p925": 621, + "p949": 622, + "p2790": 623, + "p698": 624, + "p6373": 625, + "p2785": 626, + "p1222": 627, + "p2751": 628, + "p3825": 629, + "p5115": 630, + "p1827": 631, + "p3171": 632, + "p119": 633, + "p850": 634, + "p3258": 635, + "p7909": 636, + "p1322": 637, + "p8097": 638, + "p22": 639, + "p7478": 640, + "p1349": 641, + "p4854": 642, + "p2929": 643, + "p7335": 644, + "p5868": 645, + "p454": 646, + "p7945": 647, + "p2654": 648, + "p3493": 649, + "p1060": 650, + "p8545": 651, + "p6509": 652, + "p5002": 653, + "p7732": 654, + "p3082": 655, + "p1779": 656, + "p2709": 657, + "p7398": 658, + "p8879": 659, + "p639": 660, + "p598": 661, + "p5672": 662, + "p6553": 663, + "p4111": 664, + "p1417": 665, + "p7991": 666, + "p380": 667, + "p8459": 668, + "p8347": 669, + "p1769": 670, + "p2673": 671, + "p3330": 672, + "p7051": 673, + "p1337": 674, + "p4057": 675, + "p4839": 676, + "p6060": 677, + "p7095": 678, + "p278": 679, + "p1445": 680, + "p6518": 681, + "p2364": 682, + "p1958": 683, + "p548": 684, + "p4010": 685, + "p3072": 686, + "p6993": 687, + "p8575": 688, + "p2149": 689, + "p240": 690, + "p2920": 691, + "p5588": 692, + "p1885": 693, + "p6082": 694, + "p9026": 695, + "p340": 696, + "p159": 697, + "p7730": 698, + "p7962": 699, + "p1987": 700, + "p3876": 701, + "p8771": 702, + "p5123": 703, + "p3866": 704, + "p3546": 705, + "p7777": 706, + "p115": 707, + "p5337": 708, + "p475": 709, + "p1724": 710, + "p6359": 711, + "p4260": 712, + "p2110": 713, + "p1845": 714, + "p4335": 715, + "p4133": 716, + "p783": 717, + "p8479": 718, + "p1448": 719, + "p1160": 720, + "p7647": 721, + "p2618": 722, + "p3630": 723, + "p4013": 724, + "p5242": 725, + "p7957": 726, + "p3852": 727, + "p3889": 728, + "p1387": 729, + "p439": 730, + "p1425": 731, + "p2061": 732, + "p7395": 733, + "p7837": 734, + "p5147": 735, + "p2319": 736, + "p3781": 737, + "p1311": 738, + "p4733": 739, + "p8705": 740, + "p3094": 741, + "p2823": 742, + "p1914": 743, + "p954": 744, + "p4381": 745, + "p4044": 746, + "p593": 747, + "p8300": 748, + "p7558": 749, + "p6494": 750, + "p6330": 751, + "p5940": 752, + "p7126": 753, + "p1061": 754, + "p6352": 755, + "p5186": 756, + "p1944": 757, + "p2285": 758, + "p6673": 759, + "p5746": 760, + "p208": 761, + "p492": 762, + "p216": 763, + "p979": 764, + "p1668": 765, + "p6620": 766, + "p711": 767, + "p7733": 768, + "p8619": 769, + "p5157": 770, + "p829": 771, + "p3180": 772, + "p3979": 773, + "p1556": 774, + "p3379": 775, + "p5727": 776, + "p596": 777, + "p2127": 778, + "p581": 779, + "p2652": 780, + "p2628": 781, + "p1849": 782, + "p4238": 783, + "p606": 784, + "p1224": 785, + "p1629": 786, + "p1413": 787, + "p957": 788, + "p8592": 789, + "p2254": 790, + "p1323": 791, + "p122": 792, + "p2093": 793, + "p1100": 794, + "p81": 795, + "p323": 796, + "p815": 797, + "p2581": 798, + "p543": 799, + "p6037": 800, + "p2397": 801, + "p5513": 802, + "p4495": 803, + "p5776": 804, + "p17": 805, + "p4590": 806, + "p8228": 807, + "p708": 808, + "p3792": 809, + "p3790": 810, + "p7090": 811, + "p1943": 812, + "p4246": 813, + "p559": 814, + "p3738": 815, + "p2167": 816, + "p1933": 817, + "p2162": 818, + "p549": 819, + "p3025": 820, + "p1182": 821, + "p4358": 822, + "p636": 823, + "p986": 824, + "p8490": 825, + "p3340": 826, + "p90": 827, + "p1487": 828, + "p1639": 829, + "p1547": 830, + "p4152": 831, + "p1498": 832, + "p1740": 833, + "p6157": 834, + "p217": 835, + "p2201": 836, + "p362": 837, + "p2146": 838, + "p1801": 839, + "p5063": 840, + "p7339": 841, + "p663": 842, + "p38": 843, + "p1336": 844, + "p3215": 845, + "p210": 846, + "p6075": 847, + "p55": 848, + "p2411": 849, + "p7445": 850, + "p5767": 851, + "p2812": 852, + "p472": 853, + "p803": 854, + "p4236": 855, + "p7665": 856, + "p1607": 857, + "p1316": 858, + "p7475": 859, + "p3001": 860, + "p1473": 861, + "p3537": 862, + "p3070": 863, + "p1390": 864, + "p1290": 865, + "p2499": 866, + "p154": 867, + "p7518": 868, + "p408": 869, + "p1811": 870, + "p1734": 871, + "p7342": 872, + "p8722": 873, + "p1754": 874, + "p7657": 875, + "p583": 876, + "p830": 877, + "p6690": 878, + "p1552": 879, + "p2498": 880, + "p1296": 881, + "p3686": 882, + "p157": 883, + "p487": 884, + "p6119": 885, + "p4926": 886, + "p4846": 887, + "p1536": 888, + "p2674": 889, + "p1645": 890, + "p3187": 891, + "p1058": 892, + "p2039": 893, + "p4071": 894, + "p4433": 895, + "p1175": 896, + "p434": 897, + "p1001": 898, + "p2816": 899, + "p820": 900, + "p2696": 901, + "p4681": 902, + "p2085": 903 + }, + "files": { + "en/en_US/libritts/high/en_US-libritts-high.onnx": { + "size_bytes": 136673811, + "md5_digest": "61d7845257f8abdc27476f606151ef8d" + }, + "en/en_US/libritts/high/en_US-libritts-high.onnx.json": { + "size_bytes": 25569, + "md5_digest": "c1c0ae81778060f8131d6c51abf548a6" + }, + "en/en_US/libritts/high/MODEL_CARD": { + "size_bytes": 255, + "md5_digest": "cdeac934f1154489924a071470b22365" + } + }, + "aliases": [ + "en-us-libritts-high" + ] + }, + "en_US-l2arctic-medium": { + "key": "en_US-l2arctic-medium", + "name": "l2arctic", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 24, + "speaker_id_map": { + "TXHC": 0, + "THV": 1, + "SVBI": 2, + "ZHAA": 3, + "PNV": 4, + "TLV": 5, + "ERMS": 6, + "MBMPS": 7, + "HQTV": 8, + "TNI": 9, + "ASI": 10, + "HJK": 11, + "LXC": 12, + "NCC": 13, + "YKWK": 14, + "YDCK": 15, + "HKK": 16, + "NJS": 17, + "YBAA": 18, + "RRBI": 19, + "BWC": 20, + "ABA": 21, + "EBVS": 22, + "SKA": 23 + }, + "files": { + "en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx": { + "size_bytes": 76778673, + "md5_digest": "a71d8acf9b01676931cd548f739382cd" + }, + "en/en_US/l2arctic/medium/en_US-l2arctic-medium.onnx.json": { + "size_bytes": 7475, + "md5_digest": "40927583d95e1e0cdf19494a08c281b6" + }, + "en/en_US/l2arctic/medium/MODEL_CARD": { + "size_bytes": 365, + "md5_digest": "8d5e9dc31cba2a9b7ee68a2a70e084f2" + } + }, + "aliases": [] + }, + "en_US-kathleen-low": { + "key": "en_US-kathleen-low", + "name": "kathleen", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/kathleen/low/en_US-kathleen-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "dd1ab131724b1cff76fe388252bec47b" + }, + "en/en_US/kathleen/low/en_US-kathleen-low.onnx.json": { + "size_bytes": 5956, + "md5_digest": "24ee1f4eedd4b554bc38be0959d47c8b" + }, + "en/en_US/kathleen/low/MODEL_CARD": { + "size_bytes": 281, + "md5_digest": "0585e0a798d093c9ee090b99d9c8f68e" + } + }, + "aliases": [ + "en-us-kathleen-low" + ] + }, + "en_US-amy-low": { + "key": "en_US-amy-low", + "name": "amy", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/amy/low/en_US-amy-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "3c3f6a6ec605f3a59763256d3b2db012" + }, + "en/en_US/amy/low/en_US-amy-low.onnx.json": { + "size_bytes": 5951, + "md5_digest": "5781d54c3075cb5d2c1dd9d6c86b8e5c" + }, + "en/en_US/amy/low/MODEL_CARD": { + "size_bytes": 273, + "md5_digest": "e1cdd84aa7493b8fbe1e6471f6f93cea" + } + }, + "aliases": [ + "en-us-amy-low" + ] + }, + "en_US-amy-medium": { + "key": "en_US-amy-medium", + "name": "amy", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/amy/medium/en_US-amy-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "778d28aeb95fcdf8a882344d9df142fc" + }, + "en/en_US/amy/medium/en_US-amy-medium.onnx.json": { + "size_bytes": 7007, + "md5_digest": "eed7b28ea26b86395fd842942bdaea4c" + }, + "en/en_US/amy/medium/MODEL_CARD": { + "size_bytes": 281, + "md5_digest": "6fca05ee5bfe8b28211b88b86b47e822" + } + }, + "aliases": [] + }, + "en_US-ryan-low": { + "key": "en_US-ryan-low", + "name": "ryan", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/ryan/low/en_US-ryan-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "32f6a995d6d561cd040b20a76f4edb1e" + }, + "en/en_US/ryan/low/en_US-ryan-low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "b11005076636a5f5a622f314ac0e775d" + }, + "en/en_US/ryan/low/MODEL_CARD": { + "size_bytes": 263, + "md5_digest": "030252d21b0bd1048c37a9eb7f94eb17" + } + }, + "aliases": [ + "en-us-ryan-low" + ] + }, + "en_US-ryan-high": { + "key": "en_US-ryan-high", + "name": "ryan", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "high", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/ryan/high/en_US-ryan-high.onnx": { + "size_bytes": 120786792, + "md5_digest": "5d879a17bddf5007f76655b445ba78b4" + }, + "en/en_US/ryan/high/en_US-ryan-high.onnx.json": { + "size_bytes": 5953, + "md5_digest": "9d0afdf46e5006e84379801d84690018" + }, + "en/en_US/ryan/high/MODEL_CARD": { + "size_bytes": 265, + "md5_digest": "9c966517ed0bfbffbfdb218e99dbeadd" + } + }, + "aliases": [ + "en-us-ryan-high" + ] + }, + "en_US-ryan-medium": { + "key": "en_US-ryan-medium", + "name": "ryan", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/ryan/medium/en_US-ryan-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "8f06d3aff8ded5a7f13f907e6bec32ac" + }, + "en/en_US/ryan/medium/en_US-ryan-medium.onnx.json": { + "size_bytes": 7008, + "md5_digest": "6222797bca87b2a799ef0ed2d2f66adf" + }, + "en/en_US/ryan/medium/MODEL_CARD": { + "size_bytes": 306, + "md5_digest": "79d9200481a9dcabfa1803cb9e31c28a" + } + }, + "aliases": [ + "en-us-ryan-medium" + ] + }, + "en_US-danny-low": { + "key": "en_US-danny-low", + "name": "danny", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/danny/low/en_US-danny-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "73cc296e178ab3d2a5698179b629cd12" + }, + "en/en_US/danny/low/en_US-danny-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "004e9fc219e2391997a89980582c7d50" + }, + "en/en_US/danny/low/MODEL_CARD": { + "size_bytes": 275, + "md5_digest": "62d30d0cccea265949980cb48212ebee" + } + }, + "aliases": [] + }, + "en_US-arctic-medium": { + "key": "en_US-arctic-medium", + "name": "arctic", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 18, + "speaker_id_map": { + "awb": 0, + "rms": 1, + "slt": 2, + "ksp": 3, + "clb": 4, + "lnh": 5, + "aew": 6, + "bdl": 7, + "jmk": 8, + "rxr": 9, + "fem": 10, + "ljm": 11, + "slp": 12, + "aup": 13, + "ahw": 14, + "axb": 15, + "eey": 16, + "gka": 17 + }, + "files": { + "en/en_US/arctic/medium/en_US-arctic-medium.onnx": { + "size_bytes": 76766385, + "md5_digest": "497c47037c2e279faf467e0a06f965d2" + }, + "en/en_US/arctic/medium/en_US-arctic-medium.onnx.json": { + "size_bytes": 7347, + "md5_digest": "0a3270843677fabafc87ef423d1e5e52" + }, + "en/en_US/arctic/medium/MODEL_CARD": { + "size_bytes": 289, + "md5_digest": "efe5b89e46cf8e0efa254203da8c7baf" + } + }, + "aliases": [] + }, + "en_US-lessac-low": { + "key": "en_US-lessac-low", + "name": "lessac", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/lessac/low/en_US-lessac-low.onnx": { + "size_bytes": 63201294, + "md5_digest": "31883a7506589feadf3c3474fd8ef658" + }, + "en/en_US/lessac/low/en_US-lessac-low.onnx.json": { + "size_bytes": 7007, + "md5_digest": "a4c8e9e2a4c5a852b2a1a43089a0b752" + }, + "en/en_US/lessac/low/MODEL_CARD": { + "size_bytes": 345, + "md5_digest": "999cbf2c337d8fb2f21b0fa2c95e9e85" + } + }, + "aliases": [ + "en-us-lessac-low" + ] + }, + "en_US-lessac-high": { + "key": "en_US-lessac-high", + "name": "lessac", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "high", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/lessac/high/en_US-lessac-high.onnx": { + "size_bytes": 113895201, + "md5_digest": "99d1f6181a7f5ccbe3f117ba8ce63c93" + }, + "en/en_US/lessac/high/en_US-lessac-high.onnx.json": { + "size_bytes": 7008, + "md5_digest": "685dcf1ef319cfacf38cfe2c94a3b71e" + }, + "en/en_US/lessac/high/MODEL_CARD": { + "size_bytes": 347, + "md5_digest": "2ff564555f6d6cde3c19dcc8f3815428" + } + }, + "aliases": [] + }, + "en_US-lessac-medium": { + "key": "en_US-lessac-medium", + "name": "lessac", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/lessac/medium/en_US-lessac-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "2fc642b535197b6305c7c8f92dc8b24f" + }, + "en/en_US/lessac/medium/en_US-lessac-medium.onnx.json": { + "size_bytes": 7010, + "md5_digest": "46e565a656f76b42b588d822025ed439" + }, + "en/en_US/lessac/medium/MODEL_CARD": { + "size_bytes": 351, + "md5_digest": "42f2dd4a98149e12fc70b301d9579dfd" + } + }, + "aliases": [ + "en-us-lessac-medium" + ] + }, + "en_US-joe-medium": { + "key": "en_US-joe-medium", + "name": "joe", + "language": { + "code": "en_US", + "family": "en", + "region": "US", + "name_native": "English", + "name_english": "English", + "country_english": "United States" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "en/en_US/joe/medium/en_US-joe-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "74fd6a4dc39e0aa9dce145d7f5acd4f6" + }, + "en/en_US/joe/medium/en_US-joe-medium.onnx.json": { + "size_bytes": 6877, + "md5_digest": "6ceb0a247c83c41ea741133825749568" + }, + "en/en_US/joe/medium/MODEL_CARD": { + "size_bytes": 280, + "md5_digest": "7d25cb111aa9699518764a1cb3943af1" + } + }, + "aliases": [] + }, + "es_MX-ald-medium": { + "key": "es_MX-ald-medium", + "name": "ald", + "language": { + "code": "es_MX", + "family": "es", + "region": "MX", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Mexico" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "es/es_MX/ald/medium/es_MX-ald-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "86374058e59b41ac3b7fe4181e1daad6" + }, + "es/es_MX/ald/medium/es_MX-ald-medium.onnx.json": { + "size_bytes": 7020, + "md5_digest": "5d9f729ad9fc9ed0cea369ee7984c608" + }, + "es/es_MX/ald/medium/MODEL_CARD": { + "size_bytes": 320, + "md5_digest": "a858af3698e0c7cda6c9ad5d0d11b651" + } + }, + "aliases": [] + }, + "es_ES-davefx-medium": { + "key": "es_ES-davefx-medium", + "name": "davefx", + "language": { + "code": "es_ES", + "family": "es", + "region": "ES", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Spain" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "es/es_ES/davefx/medium/es_ES-davefx-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "dc515cd4ecc5f6f72fe14a941188fc9c" + }, + "es/es_ES/davefx/medium/es_ES-davefx-medium.onnx.json": { + "size_bytes": 6921, + "md5_digest": "e3ea0eebe0f918c3d2430262a4a454ca" + }, + "es/es_ES/davefx/medium/MODEL_CARD": { + "size_bytes": 275, + "md5_digest": "5569c0fb20448308466216428b52f392" + } + }, + "aliases": [] + }, + "es_ES-mls_9972-low": { + "key": "es_ES-mls_9972-low", + "name": "mls_9972", + "language": { + "code": "es_ES", + "family": "es", + "region": "ES", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Spain" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "es/es_ES/mls_9972/low/es_ES-mls_9972-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "587f2fc38dc3f582e771c3748465e2a2" + }, + "es/es_ES/mls_9972/low/es_ES-mls_9972-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "59b7f5b2e6f599eebf42e10b4b13667d" + }, + "es/es_ES/mls_9972/low/MODEL_CARD": { + "size_bytes": 256, + "md5_digest": "4ba8c18ce72a202a49312ee1914ca6b0" + } + }, + "aliases": [ + "es-mls_9972-low" + ] + }, + "es_ES-sharvard-medium": { + "key": "es_ES-sharvard-medium", + "name": "sharvard", + "language": { + "code": "es_ES", + "family": "es", + "region": "ES", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Spain" + }, + "quality": "medium", + "num_speakers": 2, + "speaker_id_map": { + "M": 0, + "F": 1 + }, + "files": { + "es/es_ES/sharvard/medium/es_ES-sharvard-medium.onnx": { + "size_bytes": 76733615, + "md5_digest": "77e6f9c26e92799fb04bb90b46bf1834" + }, + "es/es_ES/sharvard/medium/es_ES-sharvard-medium.onnx.json": { + "size_bytes": 7045, + "md5_digest": "05383f58a12f2937b47a179b19b58968" + }, + "es/es_ES/sharvard/medium/MODEL_CARD": { + "size_bytes": 392, + "md5_digest": "b600a21381af84fa21b29f519a3a829a" + } + }, + "aliases": [] + }, + "es_ES-carlfm-x_low": { + "key": "es_ES-carlfm-x_low", + "name": "carlfm", + "language": { + "code": "es_ES", + "family": "es", + "region": "ES", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Spain" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "es/es_ES/carlfm/x_low/es_ES-carlfm-x_low.onnx": { + "size_bytes": 28130791, + "md5_digest": "4137b5aee01ea6241080fc4dbe59a8ee" + }, + "es/es_ES/carlfm/x_low/es_ES-carlfm-x_low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "41229762134c23d28d018e07c0d3071c" + }, + "es/es_ES/carlfm/x_low/MODEL_CARD": { + "size_bytes": 250, + "md5_digest": "19cb47bbe9e07e8d7937cfd39027d3a9" + } + }, + "aliases": [ + "es-carlfm-x-low" + ] + }, + "es_ES-mls_10246-low": { + "key": "es_ES-mls_10246-low", + "name": "mls_10246", + "language": { + "code": "es_ES", + "family": "es", + "region": "ES", + "name_native": "Español", + "name_english": "Spanish", + "country_english": "Spain" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "es/es_ES/mls_10246/low/es_ES-mls_10246-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "ab8e93c9d2714fd4481fbca4e2a38891" + }, + "es/es_ES/mls_10246/low/es_ES-mls_10246-low.onnx.json": { + "size_bytes": 5954, + "md5_digest": "7d2635e157ac47bef9aea4c9e7516eaa" + }, + "es/es_ES/mls_10246/low/MODEL_CARD": { + "size_bytes": 257, + "md5_digest": "a345cefedda92347f53ea9a84d1b3983" + } + }, + "aliases": [ + "es-mls_10246-low" + ] + }, + "uk_UA-lada-x_low": { + "key": "uk_UA-lada-x_low", + "name": "lada", + "language": { + "code": "uk_UA", + "family": "uk", + "region": "UA", + "name_native": "украї́нська мо́ва", + "name_english": "Ukrainian", + "country_english": "Ukraine" + }, + "quality": "x_low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx": { + "size_bytes": 20628813, + "md5_digest": "b84110e3923d64cdd4e0056a22090557" + }, + "uk/uk_UA/lada/x_low/uk_UA-lada-x_low.onnx.json": { + "size_bytes": 5952, + "md5_digest": "0d75a309a184f4db1a3d51872a5737d5" + }, + "uk/uk_UA/lada/x_low/MODEL_CARD": { + "size_bytes": 267, + "md5_digest": "8de03ca7a0aee2a1c088638ec18fdb87" + } + }, + "aliases": [ + "uk-lada-x-low" + ] + }, + "uk_UA-ukrainian_tts-medium": { + "key": "uk_UA-ukrainian_tts-medium", + "name": "ukrainian_tts", + "language": { + "code": "uk_UA", + "family": "uk", + "region": "UA", + "name_native": "украї́нська мо́ва", + "name_english": "Ukrainian", + "country_english": "Ukraine" + }, + "quality": "medium", + "num_speakers": 3, + "speaker_id_map": { + "lada": 0, + "mykyta": 1, + "tetiana": 2 + }, + "files": { + "uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx": { + "size_bytes": 76735663, + "md5_digest": "3366c3d4f31cb77966fb14d042956b4f" + }, + "uk/uk_UA/ukrainian_tts/medium/uk_UA-ukrainian_tts-medium.onnx.json": { + "size_bytes": 2650, + "md5_digest": "f1aabbcfec611bfb1c7b12cf00156a39" + }, + "uk/uk_UA/ukrainian_tts/medium/MODEL_CARD": { + "size_bytes": 266, + "md5_digest": "d615c1c54d0017f4eb42c95dabc5573b" + } + }, + "aliases": [] + }, + "sw_CD-lanfrica-medium": { + "key": "sw_CD-lanfrica-medium", + "name": "lanfrica", + "language": { + "code": "sw_CD", + "family": "sw", + "region": "CD", + "name_native": "Kiswahili", + "name_english": "Swahili", + "country_english": "Democratic Republic of the Congo" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "7b28078f0e76cb201dc8b512ea4bf4d6" + }, + "sw/sw_CD/lanfrica/medium/sw_CD-lanfrica-medium.onnx.json": { + "size_bytes": 7009, + "md5_digest": "b4d8985b21902e559e49e5dec1858276" + }, + "sw/sw_CD/lanfrica/medium/MODEL_CARD": { + "size_bytes": 315, + "md5_digest": "225cc22fc4a35a83f2039988499baa85" + } + }, + "aliases": [] + }, + "fr_FR-siwis-low": { + "key": "fr_FR-siwis-low", + "name": "siwis", + "language": { + "code": "fr_FR", + "family": "fr", + "region": "FR", + "name_native": "Français", + "name_english": "French", + "country_english": "France" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fr/fr_FR/siwis/low/fr_FR-siwis-low.onnx": { + "size_bytes": 28130791, + "md5_digest": "fcb614122005d70f27e4e61e58b4bb56" + }, + "fr/fr_FR/siwis/low/fr_FR-siwis-low.onnx.json": { + "size_bytes": 5950, + "md5_digest": "54392cc51bd08e8aa6270302e9d0180b" + }, + "fr/fr_FR/siwis/low/MODEL_CARD": { + "size_bytes": 274, + "md5_digest": "5d4a6b6e8d4a476e9b415ec0c1f030da" + } + }, + "aliases": [ + "fr-siwis-low" + ] + }, + "fr_FR-siwis-medium": { + "key": "fr_FR-siwis-medium", + "name": "siwis", + "language": { + "code": "fr_FR", + "family": "fr", + "region": "FR", + "name_native": "Français", + "name_english": "French", + "country_english": "France" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fr/fr_FR/siwis/medium/fr_FR-siwis-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "20e876e8c839e9b11a26085858f2300c" + }, + "fr/fr_FR/siwis/medium/fr_FR-siwis-medium.onnx.json": { + "size_bytes": 7006, + "md5_digest": "ae79606057c9a36af84e3bef86bfffb4" + }, + "fr/fr_FR/siwis/medium/MODEL_CARD": { + "size_bytes": 284, + "md5_digest": "2b9ea48b15e9e1fd25f95b415caaf66f" + } + }, + "aliases": [ + "fr-siwis-medium" + ] + }, + "fr_FR-mls_1840-low": { + "key": "fr_FR-mls_1840-low", + "name": "mls_1840", + "language": { + "code": "fr_FR", + "family": "fr", + "region": "FR", + "name_native": "Français", + "name_english": "French", + "country_english": "France" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fr/fr_FR/mls_1840/low/fr_FR-mls_1840-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "1873b5d95cb0aad9909d32d1747ae72b" + }, + "fr/fr_FR/mls_1840/low/fr_FR-mls_1840-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "498528bad39f13b5515e905452d585f1" + }, + "fr/fr_FR/mls_1840/low/MODEL_CARD": { + "size_bytes": 257, + "md5_digest": "35d860ab0a8497966c73da525728e711" + } + }, + "aliases": [ + "fr-mls_1840-low" + ] + }, + "fr_FR-gilles-low": { + "key": "fr_FR-gilles-low", + "name": "gilles", + "language": { + "code": "fr_FR", + "family": "fr", + "region": "FR", + "name_native": "Français", + "name_english": "French", + "country_english": "France" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fr/fr_FR/gilles/low/fr_FR-gilles-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "f984386d1f0927597f09a3ec10b11b5d" + }, + "fr/fr_FR/gilles/low/fr_FR-gilles-low.onnx.json": { + "size_bytes": 5951, + "md5_digest": "4366e56a9131c47465f9bf70db59eb7e" + }, + "fr/fr_FR/gilles/low/MODEL_CARD": { + "size_bytes": 300, + "md5_digest": "9317af9efdb0d9986d42357b180f06e2" + } + }, + "aliases": [ + "fr-gilles-low" + ] + }, + "fi_FI-harri-low": { + "key": "fi_FI-harri-low", + "name": "harri", + "language": { + "code": "fi_FI", + "family": "fi", + "region": "FI", + "name_native": "Suomi", + "name_english": "Finnish", + "country_english": "Finland" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fi/fi_FI/harri/low/fi_FI-harri-low.onnx": { + "size_bytes": 69795191, + "md5_digest": "f44b67203de7fd488eabc4692d30b598" + }, + "fi/fi_FI/harri/low/fi_FI-harri-low.onnx.json": { + "size_bytes": 5950, + "md5_digest": "626221fa0bec76d4f71bf713174fc781" + }, + "fi/fi_FI/harri/low/MODEL_CARD": { + "size_bytes": 284, + "md5_digest": "93ccf398abae82b7d7a3d420658e26f1" + } + }, + "aliases": [ + "fi-harri-low" + ] + }, + "fi_FI-harri-medium": { + "key": "fi_FI-harri-medium", + "name": "harri", + "language": { + "code": "fi_FI", + "family": "fi", + "region": "FI", + "name_native": "Suomi", + "name_english": "Finnish", + "country_english": "Finland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "fi/fi_FI/harri/medium/fi_FI-harri-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "8e96b9e765f8db3e910943520aa0f475" + }, + "fi/fi_FI/harri/medium/fi_FI-harri-medium.onnx.json": { + "size_bytes": 7006, + "md5_digest": "0cd2634e19aa67b04982eaa65c3ceb87" + }, + "fi/fi_FI/harri/medium/MODEL_CARD": { + "size_bytes": 304, + "md5_digest": "95d5aff86d27b69c8ee7deed6c056aff" + } + }, + "aliases": [] + }, + "ka_GE-natia-medium": { + "key": "ka_GE-natia-medium", + "name": "natia", + "language": { + "code": "ka_GE", + "family": "ka", + "region": "GE", + "name_native": "ქართული ენა", + "name_english": "Georgian", + "country_english": "Georgia" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "ka/ka_GE/natia/medium/ka_GE-natia-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "83bd40f8d176a83d3d8d605fada2a5e7" + }, + "ka/ka_GE/natia/medium/ka_GE-natia-medium.onnx.json": { + "size_bytes": 6920, + "md5_digest": "81163e42b6f91fc51cd611c2ed330386" + }, + "ka/ka_GE/natia/medium/MODEL_CARD": { + "size_bytes": 288, + "md5_digest": "81ac71dd5b3dac89bf7762bf7b738c95" + } + }, + "aliases": [] + }, + "pl_PL-darkman-medium": { + "key": "pl_PL-darkman-medium", + "name": "darkman", + "language": { + "code": "pl_PL", + "family": "pl", + "region": "PL", + "name_native": "Polski", + "name_english": "Polish", + "country_english": "Poland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "pl/pl_PL/darkman/medium/pl_PL-darkman-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "27bf2d71e934b112657544fd0b100a7a" + }, + "pl/pl_PL/darkman/medium/pl_PL-darkman-medium.onnx.json": { + "size_bytes": 6922, + "md5_digest": "fe9c08562aafa2d7ab6996e5e46f75a0" + }, + "pl/pl_PL/darkman/medium/MODEL_CARD": { + "size_bytes": 276, + "md5_digest": "952772905864f6f6375df54a675895b7" + } + }, + "aliases": [] + }, + "pl_PL-mls_6892-low": { + "key": "pl_PL-mls_6892-low", + "name": "mls_6892", + "language": { + "code": "pl_PL", + "family": "pl", + "region": "PL", + "name_native": "Polski", + "name_english": "Polish", + "country_english": "Poland" + }, + "quality": "low", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "pl/pl_PL/mls_6892/low/pl_PL-mls_6892-low.onnx": { + "size_bytes": 63104526, + "md5_digest": "8590d8e979292ca35d20e6e123bfa612" + }, + "pl/pl_PL/mls_6892/low/pl_PL-mls_6892-low.onnx.json": { + "size_bytes": 5953, + "md5_digest": "e1452902947280b5d245c6534aa4905d" + }, + "pl/pl_PL/mls_6892/low/MODEL_CARD": { + "size_bytes": 257, + "md5_digest": "74ebc618d120896113449ad2f957b7a4" + } + }, + "aliases": [ + "pl-mls_6892-low" + ] + }, + "pl_PL-gosia-medium": { + "key": "pl_PL-gosia-medium", + "name": "gosia", + "language": { + "code": "pl_PL", + "family": "pl", + "region": "PL", + "name_native": "Polski", + "name_english": "Polish", + "country_english": "Poland" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "pl/pl_PL/gosia/medium/pl_PL-gosia-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "ecf817530e575025166e454adde1f382" + }, + "pl/pl_PL/gosia/medium/pl_PL-gosia-medium.onnx.json": { + "size_bytes": 6920, + "md5_digest": "82fe5f840c3af4c98e8a1430431ecdbd" + }, + "pl/pl_PL/gosia/medium/MODEL_CARD": { + "size_bytes": 274, + "md5_digest": "e1355330fe5fab166e6f2e20af7e91e9" + } + }, + "aliases": [] + }, + "no_NO-talesyntese-medium": { + "key": "no_NO-talesyntese-medium", + "name": "talesyntese", + "language": { + "code": "no_NO", + "family": "no", + "region": "NO", + "name_native": "Norsk", + "name_english": "Norwegian", + "country_english": "Norway" + }, + "quality": "medium", + "num_speakers": 1, + "speaker_id_map": {}, + "files": { + "no/no_NO/talesyntese/medium/no_NO-talesyntese-medium.onnx": { + "size_bytes": 63201294, + "md5_digest": "9fc876e7edc6593086b4f2f34889f44b" + }, + "no/no_NO/talesyntese/medium/no_NO-talesyntese-medium.onnx.json": { + "size_bytes": 7012, + "md5_digest": "e01d969fb2f31121b1839b5f16e96738" + }, + "no/no_NO/talesyntese/medium/MODEL_CARD": { + "size_bytes": 312, + "md5_digest": "5fe51d2a4a0e05e85c88a80373000ae1" + } + }, + "aliases": [ + "no-talesyntese-medium" + ] + } +} \ No newline at end of file diff --git a/src/python_run/requirements.txt b/src/python_run/requirements.txt index 6d73988..84b6a31 100644 --- a/src/python_run/requirements.txt +++ b/src/python_run/requirements.txt @@ -1,2 +1,2 @@ -espeak-phonemizer>=1.1.0,<2 -onnxruntime~=1.11.0 +piper-phonemize~=1.0.0 +onnxruntime>=1.11.0,<2 diff --git a/src/python_run/setup.py b/src/python_run/setup.py new file mode 100644 index 0000000..2697144 --- /dev/null +++ b/src/python_run/setup.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +from pathlib import Path + +import setuptools +from setuptools import setup + +this_dir = Path(__file__).parent +module_dir = this_dir / "piper" + +requirements = [] +requirements_path = this_dir / "requirements.txt" +if requirements_path.is_file(): + with open(requirements_path, "r", encoding="utf-8") as requirements_file: + requirements = requirements_file.read().splitlines() + +data_files = [module_dir / "voices.json"] + +# ----------------------------------------------------------------------------- + +setup( + name="piper-tts", + version="1.1.0", + description="A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.", + url="http://github.com/rhasspy/piper", + author="Michael Hansen", + author_email="mike@rhasspy.org", + license="MIT", + packages=setuptools.find_packages(), + package_data={"piper": [str(p.relative_to(module_dir)) for p in data_files]}, + entry_points={ + "console_scripts": [ + "piper = piper.__main__:main", + ] + }, + install_requires=requirements, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Text Processing :: Linguistic", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + ], + keywords="rhasspy piper tts", +)