diff --git a/.gitignore b/.gitignore index e8bcba1..8594f9f 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ tmp/ *.py[cod] *.egg +*.egg-info/ build htmlcov diff --git a/Dockerfile b/Dockerfile index 2253f55..2fa146d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,8 +14,8 @@ ENV DEBIAN_FRONTEND=noninteractive WORKDIR /build # Build minimal version of espeak-ng -ADD lib/espeak-ng-1.51.tar.gz ./ -RUN cd espeak-ng-1.51 && \ +ADD lib/espeak-ng-1.52-patched.tar.gz ./ +RUN cd espeak-ng && \ ./autogen.sh && \ ./configure \ --without-pcaudiolib \ @@ -30,9 +30,10 @@ RUN cd espeak-ng-1.51 && \ make install # Copy onnxruntime library -COPY lib/ ./lib/ -RUN mkdir -p /usr/local/include/onnxruntime && \ - tar -C /usr/local/include/onnxruntime \ +COPY lib/onnxruntime-linux-*.tgz ./lib/ +RUN export ONNX_DIR="./lib/Linux-$(uname -m)" && \ + mkdir -p "${ONNX_DIR}" && \ + tar -C "${ONNX_DIR}" \ --strip-components 1 \ -xvf "lib/onnxruntime-linux-${TARGETARCH}${TARGETVARIANT}.tgz" @@ -49,7 +50,7 @@ WORKDIR /dist RUN mkdir -p piper && \ cp -d /usr/lib64/libespeak-ng.so* ./piper/ && \ cp -dR /usr/share/espeak-ng-data ./piper/ && \ - cp -d /usr/local/include/onnxruntime/lib/libonnxruntime.so.* ./piper/ && \ + find /build/lib/ -name 'libonnxruntime.so.*' -exec cp -d {} ./piper/ \; && \ cp /build/build/piper ./piper/ && \ tar -czf "piper_${TARGETARCH}${TARGETVARIANT}.tar.gz" piper/ diff --git a/Dockerfile.dockerignore b/Dockerfile.dockerignore index bc2a31a..f3d5121 100644 --- a/Dockerfile.dockerignore +++ b/Dockerfile.dockerignore @@ -1,4 +1,5 @@ * !Makefile !src/cpp/ -!lib/ +!lib/onnxruntime*.tgz +!lib/espeak-ng*.tar.gz diff --git a/README.md b/README.md index ab03d2b..d0ecc22 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,9 @@ Download a release: * [amd64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_amd64.tar.gz) (desktop Linux) * [arm64](https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_arm64.tar.gz) (Raspberry Pi 4) -If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.13.1. +If you want to build from source, see the [Makefile](Makefile) and [C++ source](src/cpp). Piper depends on a patched `espeak-ng` in [lib](lib). + +Last tested with [onnxruntime](https://github.com/microsoft/onnxruntime) 1.14.1. ## Usage diff --git a/lib/.gitignore b/lib/.gitignore new file mode 100644 index 0000000..f590756 --- /dev/null +++ b/lib/.gitignore @@ -0,0 +1,2 @@ +espeak-ng/ +Linux-*/ diff --git a/lib/espeak-ng-1.51.tar.gz b/lib/espeak-ng-1.52-patched.tar.gz similarity index 59% rename from lib/espeak-ng-1.51.tar.gz rename to lib/espeak-ng-1.52-patched.tar.gz index 088b2a0..164cd97 100644 Binary files a/lib/espeak-ng-1.51.tar.gz and b/lib/espeak-ng-1.52-patched.tar.gz differ diff --git a/src/cpp/config.hpp b/src/cpp/config.hpp index f0ec98a..b717d51 100644 --- a/src/cpp/config.hpp +++ b/src/cpp/config.hpp @@ -32,8 +32,10 @@ struct eSpeakConfig { // Characters that eSpeak uses to break apart paragraphs/sentences set clauseBreakers{U'.', U'?', U'!', U',', U';', U':'}; - // Characters that piper will use to split utterances - set sentenceBreakers{U'.', U'?', U'!'}; + Phoneme fullStop = U'.'; + Phoneme comma = U','; + Phoneme question = U'?'; + Phoneme exclamation = U'!'; }; struct PhonemizeConfig { diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index c41a513..c47ec68 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -53,7 +53,8 @@ int main(int argc, char *argv[]) { RunConfig runConfig; parseArgs(argc, argv, runConfig); - auto exePath = filesystem::path(argv[0]); + // NOTE: This won't work for Windows (need GetModuleFileName) + auto exePath = filesystem::canonical("/proc/self/exe"); piper::initialize(exePath.parent_path()); piper::Voice voice; diff --git a/src/cpp/phonemize.hpp b/src/cpp/phonemize.hpp index 7333c09..80e166d 100644 --- a/src/cpp/phonemize.hpp +++ b/src/cpp/phonemize.hpp @@ -15,6 +15,13 @@ #include "config.hpp" #include "utf8.h" +#define CLAUSE_INTONATION_FULL_STOP 0x00000000 +#define CLAUSE_INTONATION_COMMA 0x00001000 +#define CLAUSE_INTONATION_QUESTION 0x00002000 +#define CLAUSE_INTONATION_EXCLAMATION 0x00003000 + +#define CLAUSE_TYPE_SENTENCE 0x00080000 + using namespace std; namespace piper { @@ -54,13 +61,15 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig, vector *sentencePhonemes = nullptr; const char *inputTextPointer = textCopy.c_str(); - size_t clauseBreakerIndex = 0; + int terminator = 0; while (inputTextPointer != NULL) { + // Modified espeak-ng API to get access to clause terminator string clausePhonemes( - espeak_TextToPhonemes((const void **)&inputTextPointer, + espeak_TextToPhonemes2((const void **)&inputTextPointer, /*textmode*/ espeakCHARS_AUTO, - /*phonememode = IPA*/ 0x02)); + /*phonememode = IPA*/ 0x02, + &terminator)); utf8::iterator phonemeIter(clausePhonemes.begin(), clausePhonemes.begin(), clausePhonemes.end()); @@ -74,17 +83,25 @@ void phonemize(string text, PhonemizeConfig &phonemizeConfig, } sentencePhonemes->insert(sentencePhonemes->end(), phonemeIter, phonemeEnd); - if (clauseBreakerIndex < textClauseBreakers.size()) { - auto clauseBreaker = textClauseBreakers[clauseBreakerIndex]; - sentencePhonemes->push_back(clauseBreaker); - if (phonemizeConfig.eSpeak->sentenceBreakers.contains(clauseBreaker)) { + + // Add appropriate puntuation depending on terminator type + int intonation = terminator & 0x0000F000; + if (intonation == CLAUSE_INTONATION_FULL_STOP) { + sentencePhonemes->push_back(phonemizeConfig.eSpeak->fullStop); + } else if (intonation == CLAUSE_INTONATION_COMMA) { + sentencePhonemes->push_back(phonemizeConfig.eSpeak->comma); + } else if (intonation == CLAUSE_INTONATION_QUESTION) { + sentencePhonemes->push_back(phonemizeConfig.eSpeak->question); + } else if (intonation == CLAUSE_INTONATION_EXCLAMATION) { + sentencePhonemes->push_back(phonemizeConfig.eSpeak->exclamation); + } + + if ((terminator & CLAUSE_TYPE_SENTENCE) == CLAUSE_TYPE_SENTENCE) { // End of sentence sentencePhonemes = nullptr; - } - - clauseBreakerIndex++; } - } + + } // while inputTextPointer != NULL } /* phonemize */ diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 98211f8..7f07afc 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -30,7 +30,7 @@ struct Voice { void initialize(std::filesystem::path cwd) { const char *dataPath = NULL; - auto cwdDataPath = cwd.append("espeak-ng-data"); + auto cwdDataPath = std::filesystem::absolute(cwd.append("espeak-ng-data")); if (std::filesystem::is_directory(cwdDataPath)) { dataPath = cwdDataPath.c_str(); }