diff --git a/Makefile b/Makefile index 6d8b4c5..505c962 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,7 @@ .PHONY: piper clean LIB_DIR := lib/Linux-$(shell uname -m) +VERSION := $(cat VERSION) piper: mkdir -p build diff --git a/README.md b/README.md index bbfddd6..f2002a9 100644 --- a/README.md +++ b/README.md @@ -18,30 +18,31 @@ Voices are trained with [VITS](https://github.com/jaywalnut310/vits/) and export Our goal is to support Home Assistant and the [Year of Voice](https://www.home-assistant.io/blog/2022/12/20/year-of-voice/). -[Download voices](https://github.com/rhasspy/piper/releases/tag/v0.0.2) for the supported languages: +[Download voices](https://huggingface.co/rhasspy/piper-voices/tree/main) for the supported languages: -* Catalan (ca) -* Danish (da) -* German (de) -* British English (en-gb) -* U.S. English (en-us) -* Spanish (es) -* Finnish (fi) -* French (fr) -* Greek (el-gr) -* Icelandic (is) -* Italian (it) -* Kazakh (kk) -* Nepali (ne) -* Dutch (nl) -* Norwegian (no) -* Polish (pl) -* Brazilian Portuguese (pt-br) -* Russian (ru) -* Swedish (sv-se) -* Ukrainian (uk) -* Vietnamese (vi) -* Chinese (zh-cn) +* Catalan (ca_ES) +* Danish (da_DK) +* German (de_DE) +* English (en_GB, en_US) +* Spanish (es_ES, es_MX) +* Finnish (fi_FI) +* French (fr_FR) +* Greek (el_GR) +* Icelandic (is_IS) +* Italian (it_IT) +* Georgian (ka_GE) +* Kazakh (kk_KZ) +* Nepali (ne_NP) +* Dutch (nl_BE, nl_NL) +* Norwegian (no_NO) +* Polish (pl_PL) +* Portuguese (pt_BR) +* Russian (ru_RU) +* Swedish (sv_SE) +* Swahili (sw_CD) +* Ukrainian (uk_UA) +* Vietnamese (vi_VN) +* Chinese (zh_CN) ## Installation @@ -74,6 +75,32 @@ For multi-speaker models, use `--speaker ` to change speakers (default: See `piper --help` for more options. +### JSON Input + +The `piper` executable can accept JSON input when using the `--json-input` flag. Each line of input must be a JSON object with `text` field. For example: + +``` json +{ "text": "First sentence to speak." } +{ "text": "Second sentence to speak." } +``` + +Optional fields include: + +* `speaker` - string + * Name of the speaker to use from `speaker_id_map` in config (multi-speaker voices only) +* `speaker_id` - number + * Id of speaker to use from 0 to number of speakers - 1 (multi-speaker voices only, overrides "speaker") +* `output_file` - string + * Path to output WAV file + +The following example writes two sentences with different speakers to different files: + +``` json +{ "text": "First speaker.", "speaker_id": 0, "output_file": "/tmp/speaker_0.wav" } +{ "text": "Second speaker.", "speaker_id": 1, "output_file": "/tmp/speaker_1.wav" } +``` + + ## People using Piper Piper has been used in the following projects/papers: diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 792b680..b422426 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -5,6 +5,8 @@ project(piper C CXX) find_package(PkgConfig) pkg_check_modules(SPDLOG REQUIRED spdlog) +file(READ "${CMAKE_CURRENT_LIST_DIR}/../../VERSION" piper_version) + set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -35,3 +37,5 @@ target_include_directories(piper PUBLIC target_compile_options(piper PUBLIC ${SPDLOG_CFLAGS_OTHER}) + +target_compile_definitions(piper PUBLIC _PIPER_VERSION=${piper_version}) diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index 892fd40..fa255a0 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -70,9 +70,10 @@ struct RunConfig { // stdin input is lines of JSON instead of text with format: // { - // "text": "...", (required) + // "text": str, (required) // "speaker_id": int, (optional) - // "output_file": "...", (optional) + // "speaker": str, (optional) + // "output_file": str, (optional) // } bool jsonInput = false; }; @@ -454,6 +455,9 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) { runConfig.tashkeelModelPath = filesystem::path(argv[++i]); } else if (arg == "--json_input" || arg == "--json-input") { runConfig.jsonInput = true; + } else if (arg == "--version") { + std::cout << piper::getVersion() << std::endl; + exit(0); } else if (arg == "--debug") { // Set DEBUG logging spdlog::set_level(spdlog::level::debug); diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index ccc2c1a..d83dd3f 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -16,11 +16,24 @@ namespace piper { +#ifdef _PIPER_VERSION +// https://stackoverflow.com/questions/47346133/how-to-use-a-define-inside-a-format-string +#define _STR(x) #x +#define STR(x) _STR(x) +const std::string VERSION = STR(_PIPER_VERSION); +#else +const std::string VERSION = ""; +#endif + // Maximum value for 16-bit signed WAV sample const float MAX_WAV_VALUE = 32767.0f; const std::string instanceName{"piper"}; +std::string getVersion() { + return VERSION; +} + // True if the string is a single UTF-8 codepoint bool isSingleCodepoint(std::string s) { return utf8::distance(s.begin(), s.end()) == 1; diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 0c3175b..9e7c222 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -89,6 +89,9 @@ struct Voice { ModelSession session; }; +// Get version of Piper +std::string getVersion(); + // Must be called before using textTo* functions void initialize(PiperConfig &config);