diff --git a/CMakeLists.txt b/CMakeLists.txt index b422426..95721e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,40 +2,163 @@ cmake_minimum_required(VERSION 3.13) project(piper C CXX) -find_package(PkgConfig) -pkg_check_modules(SPDLOG REQUIRED spdlog) - -file(READ "${CMAKE_CURRENT_LIST_DIR}/../../VERSION" piper_version) +file(READ "${CMAKE_CURRENT_LIST_DIR}/VERSION" piper_version) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -ADD_EXECUTABLE(piper main.cpp piper.cpp) +add_executable(piper src/cpp/main.cpp src/cpp/piper.cpp) -string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") -string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") +# NOTE: onnxruntime is pulled from piper-phonemize -set(PIPER_PHONEMIZE_ROOTDIR ${CMAKE_CURRENT_LIST_DIR}/../../lib/${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}/piper_phonemize) +# ---- fmt --- -target_link_libraries(piper - piper_phonemize - espeak-ng - onnxruntime - pthread - ${SPDLOG_LIBRARIES}) +if(NOT DEFINED FMT_DIR) + set(FMT_VERSION "10.0.0") + set(FMT_DIR "${CMAKE_CURRENT_BINARY_DIR}/fmt_install") -if(NOT APPLE) - target_link_libraries(piper -static-libgcc -static-libstdc++) + include(ExternalProject) + ExternalProject_Add( + fmt_external + URL "https://github.com/fmtlib/fmt/archive/refs/tags/${FMT_VERSION}.zip" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${FMT_DIR} + CMAKE_ARGS -DFMT_TEST:BOOL=OFF # Don't build all the tests + ) + add_dependencies(piper fmt_external) endif() +# ---- spdlog --- + +if(NOT DEFINED SPDLOG_DIR) + set(SPDLOG_DIR "${CMAKE_CURRENT_BINARY_DIR}/spdlog_install") + set(SPDLOG_VERSION "1.12.0") + ExternalProject_Add( + spdlog_external + URL "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.zip" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SPDLOG_DIR} + ) + add_dependencies(piper spdlog_external) +endif() + +# ---- piper-phonemize --- + +if(NOT DEFINED PIPER_PHONEMIZE_DIR) + set(PIPER_PHONEMIZE_DIR "${CMAKE_CURRENT_BINARY_DIR}/piper_phonemize_install") + ExternalProject_Add( + piper_phonemize_external + URL "https://github.com/rhasspy/piper-phonemize/archive/refs/heads/workflow.zip" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PIPER_PHONEMIZE_DIR} + ) + add_dependencies(piper piper_phonemize_external) +endif() + +# ---- Declare executable ---- + +if(WIN32) + # Force compiler to use UTF-8 for IPA constants + add_compile_options("$<$:/utf-8>") + add_compile_options("$<$:/utf-8>") +elseif(NOT APPLE) + # Linux flags + string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") + string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") + target_link_libraries(piper -static-libgcc -static-libstdc++) + + set(PIPER_EXTRA_LIBRARIES "pthread") +endif() + +target_link_libraries(piper + fmt + spdlog + espeak-ng + piper_phonemize + onnxruntime + ${PIPER_EXTRA_LIBRARIES} +) + target_link_directories(piper PUBLIC - ${PIPER_PHONEMIZE_ROOTDIR}/lib) + ${FMT_DIR}/lib + ${SPDLOG_DIR}/lib + ${PIPER_PHONEMIZE_DIR}/lib + ${ONNXRUNTIME_DIR}/lib +) target_include_directories(piper PUBLIC - ${PIPER_PHONEMIZE_ROOTDIR}/include - ${SPDLOG_INCLUDE_DIRS}) - -target_compile_options(piper PUBLIC - ${SPDLOG_CFLAGS_OTHER}) + ${FMT_DIR}/include + ${SPDLOG_DIR}/include + ${PIPER_PHONEMIZE_DIR}/include + ${ONNXRUNTIME_DIR}/include +) target_compile_definitions(piper PUBLIC _PIPER_VERSION=${piper_version}) + +# ---- Declare test ---- + +include(CTest) +enable_testing() +add_executable(test_piper src/cpp/test.cpp src/cpp/piper.cpp) +add_test( + NAME test_piper + COMMAND test_piper "${CMAKE_SOURCE_DIR}/etc/test_voice.onnx" "${PIPER_PHONEMIZE_DIR}/share/espeak-ng-data" "${CMAKE_CURRENT_BINARY_DIR}/test.wav" + +) +target_compile_features(test_piper PUBLIC cxx_std_17) + +target_include_directories( + test_piper PUBLIC + ${FMT_DIR}/include + ${SPDLOG_DIR}/include + ${PIPER_PHONEMIZE_DIR}/include + ${ONNXRUNTIME_DIR}/include +) + +target_link_directories( + test_piper PUBLIC + ${FMT_DIR}/lib + ${SPDLOG_DIR}/lib + ${PIPER_PHONEMIZE_DIR}/lib + ${ONNXRUNTIME_DIR}/lib +) + +target_link_libraries(test_piper PUBLIC + fmt + spdlog + espeak-ng + piper_phonemize + onnxruntime +) + +# ---- Declare install targets ---- + +install( + TARGETS piper + DESTINATION ${CMAKE_INSTALL_PREFIX}) + +# Dependencies +install( + DIRECTORY ${PIPER_PHONEMIZE_DIR}/bin/ + DESTINATION ${CMAKE_INSTALL_PREFIX} + USE_SOURCE_PERMISSIONS # keep +x + FILES_MATCHING + PATTERN "piper_phonemize" + PATTERN "espeak-ng" + PATTERN "*.dll" +) + +install( + DIRECTORY ${PIPER_PHONEMIZE_DIR}/lib/ + DESTINATION ${CMAKE_INSTALL_PREFIX} + FILES_MATCHING + PATTERN "*.dll" + PATTERN "*.so*" +) + +install( + DIRECTORY ${PIPER_PHONEMIZE_DIR}/share/espeak-ng-data + DESTINATION ${CMAKE_INSTALL_PREFIX} +) + +install( + FILES ${PIPER_PHONEMIZE_DIR}/share/libtashkeel_model.ort + DESTINATION ${CMAKE_INSTALL_PREFIX} +) diff --git a/src/cpp/piper.cpp b/src/cpp/piper.cpp index ef7eb49..838efa5 100644 --- a/src/cpp/piper.cpp +++ b/src/cpp/piper.cpp @@ -283,7 +283,16 @@ void loadModel(std::string modelPath, ModelSession &session) { session.options.DisableProfiling(); auto startTime = std::chrono::steady_clock::now(); - session.onnx = Ort::Session(session.env, modelPath.c_str(), session.options); + +#ifdef _WIN32 + auto modelPathW = std::wstring(modelPath.begin(), modelPath.end()); + auto modelPathStr = modelPathW.c_str(); +#else + auto modelPathStr = modelPath.c_str(); +#endif + + session.onnx = Ort::Session(session.env, modelPathStr, session.options); + auto endTime = std::chrono::steady_clock::now(); spdlog::debug("Loaded onnx model in {} second(s)", std::chrono::duration(endTime - startTime).count()); @@ -473,7 +482,7 @@ void textToAudio(PiperConfig &config, Voice &voice, std::string text, // DEBUG log for phonemes std::string phonemesStr; for (auto phoneme : sentencePhonemes) { - utf8::append(phoneme, phonemesStr); + utf8::append(phoneme, &phonemesStr); } spdlog::debug("Converting {} phoneme(s) to ids: {}", @@ -587,7 +596,7 @@ void textToAudio(PiperConfig &config, Voice &voice, std::string text, for (auto phonemeCount : missingPhonemes) { std::string phonemeStr; - utf8::append(phonemeCount.first, phonemeStr); + utf8::append(phonemeCount.first, &phonemeStr); spdlog::warn("Missing \"{}\" (\\u{:04X}): {} time(s)", phonemeStr, (uint32_t)phonemeCount.first, phonemeCount.second); } diff --git a/src/cpp/piper.hpp b/src/cpp/piper.hpp index 332a619..21f0ece 100644 --- a/src/cpp/piper.hpp +++ b/src/cpp/piper.hpp @@ -9,9 +9,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include "json.hpp" diff --git a/src/cpp/test.cpp b/src/cpp/test.cpp new file mode 100644 index 0000000..1b0782e --- /dev/null +++ b/src/cpp/test.cpp @@ -0,0 +1,58 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "json.hpp" +#include "piper.hpp" + +using namespace std; +using json = nlohmann::json; + +int main(int argc, char *argv[]) { + piper::PiperConfig piperConfig; + piper::Voice voice; + + if (argc < 2) { + std::cerr << "Need voice model path" << std::endl; + return 1; + } + + if (argc < 3) { + std::cerr << "Need espeak-ng-data path" << std::endl; + return 1; + } + + if (argc < 4) { + std::cerr << "Need output WAV path" << std::endl; + return 1; + } + + auto modelPath = std::string(argv[1]); + piperConfig.eSpeakDataPath = std::string(argv[2]); + auto outputPath = std::string(argv[3]); + + optional speakerId; + loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId); + piper::initialize(piperConfig); + + // Output audio to WAV file + ofstream audioFile(outputPath, ios::binary); + + piper::SynthesisResult result; + piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile, result); + piper::terminate(piperConfig); + + // Verify that file has some data + if (audioFile.tellp() < 10000) { + std::cerr << "ERROR: Output file is smaller than expected!" << std::endl; + return EXIT_FAILURE; + } + + std::cout << "OK" << std::endl; + + return EXIT_SUCCESS; +}