Add --use-cuda

2026-05-03 20:48:02 +00:00 · 2023-11-20 09:55:33 -06:00
parent 38917ffd8c
commit 6c5e283439
4 changed files with 28 additions and 13 deletions
--- a/src/cpp/main.cpp
+++ b/src/cpp/main.cpp
@@ -19,8 +19,8 @@
 #endif

 #ifdef _WIN32
-  #include <io.h>
-  #include <fcntl.h>
+#include <fcntl.h>
+#include <io.h>
 #endif

 #ifdef __APPLE__
@@ -85,6 +85,9 @@ struct RunConfig {

  // Seconds of extra silence to insert after a single phoneme
  optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
+
+  // true to use CUDA execution provider
+  bool useCuda = false;
 };

 void parseArgs(int argc, char *argv[], RunConfig &runConfig);
@@ -114,7 +117,8 @@ int main(int argc, char *argv[]) {

  auto startTime = chrono::steady_clock::now();
  loadVoice(piperConfig, runConfig.modelPath.string(),
-            runConfig.modelConfigPath.string(), voice, runConfig.speakerId);
+            runConfig.modelConfigPath.string(), voice, runConfig.speakerId,
+            runConfig.useCuda);
  auto endTime = chrono::steady_clock::now();
  spdlog::info("Loaded voice in {} second(s)",
               chrono::duration<double>(endTime - startTime).count());
@@ -314,8 +318,8 @@ int main(int argc, char *argv[]) {

 #ifdef _WIN32
      // Needed on Windows to avoid terminal conversions
-      setmode(fileno(stdout),O_BINARY);
-      setmode(fileno(stdin),O_BINARY);
+      setmode(fileno(stdout), O_BINARY);
+      setmode(fileno(stdin), O_BINARY);
 #endif

      thread rawOutputThread(rawOutputProc, ref(sharedAudioBuffer),
@@ -434,10 +438,11 @@ void printUsage(char *argv[]) {
  cerr << "   --json-input                  stdin input is lines of JSON "
          "instead of plain text"
       << endl;
+  cerr << "   --use-cuda                    use CUDA execution provider"
+       << endl;
  cerr << "   --debug                       print DEBUG messages to the console"
       << endl;
-  cerr << "   -q       --quiet              disable logging"
-       << endl;
+  cerr << "   -q       --quiet              disable logging" << endl;
  cerr << endl;
 }

@@ -518,6 +523,8 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
      runConfig.tashkeelModelPath = filesystem::path(argv[++i]);
    } else if (arg == "--json_input" || arg == "--json-input") {
      runConfig.jsonInput = true;
+    } else if (arg == "--use_cuda" || arg == "--use-cuda") {
+      runConfig.useCuda = true;
    } else if (arg == "--version") {
      std::cout << piper::getVersion() << std::endl;
      exit(0);
--- a/src/cpp/piper.cpp
+++ b/src/cpp/piper.cpp
@@ -259,12 +259,18 @@ void terminate(PiperConfig &config) {
  spdlog::info("Terminated piper");
 }

-void loadModel(std::string modelPath, ModelSession &session) {
+void loadModel(std::string modelPath, ModelSession &session, bool useCuda) {
  spdlog::debug("Loading onnx model from {}", modelPath);
  session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
                         instanceName.c_str());
  session.env.DisableTelemetryEvents();

+  if (useCuda) {
+    // Use CUDA provider
+    OrtCUDAProviderOptions cuda_options{};
+    session.options.AppendExecutionProvider_CUDA(cuda_options);
+  }
+
  // Slows down performance by ~2x
  // session.options.SetIntraOpNumThreads(1);

@@ -301,7 +307,7 @@ void loadModel(std::string modelPath, ModelSession &session) {
 // Load Onnx model and JSON config file
 void loadVoice(PiperConfig &config, std::string modelPath,
               std::string modelConfigPath, Voice &voice,
-               std::optional<SpeakerId> &speakerId) {
+               std::optional<SpeakerId> &speakerId, bool useCuda) {
  spdlog::debug("Parsing voice config at {}", modelConfigPath);
  std::ifstream modelConfigFile(modelConfigPath);
  voice.configRoot = json::parse(modelConfigFile);
@@ -322,7 +328,7 @@ void loadVoice(PiperConfig &config, std::string modelPath,

  spdlog::debug("Voice contains {} speaker(s)", voice.modelConfig.numSpeakers);

-  loadModel(modelPath, voice.session);
+  loadModel(modelPath, voice.session, useCuda);

 } /* loadVoice */

--- a/src/cpp/piper.hpp
+++ b/src/cpp/piper.hpp
@@ -116,7 +116,7 @@ void terminate(PiperConfig &config);
 // Load Onnx model and JSON config file
 void loadVoice(PiperConfig &config, std::string modelPath,
               std::string modelConfigPath, Voice &voice,
-               std::optional<SpeakerId> &speakerId);
+               std::optional<SpeakerId> &speakerId, bool useCuda);

 // Phonemize text and synthesize audio
 void textToAudio(PiperConfig &config, Voice &voice, std::string text,
--- a/src/cpp/test.cpp
+++ b/src/cpp/test.cpp
@@ -36,14 +36,16 @@ int main(int argc, char *argv[]) {
  auto outputPath = std::string(argv[3]);

  optional<piper::SpeakerId> speakerId;
-  loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId);
+  loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId,
+            false);
  piper::initialize(piperConfig);

  // Output audio to WAV file
  ofstream audioFile(outputPath, ios::binary);

  piper::SynthesisResult result;
-  piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile, result);
+  piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile,
+                       result);
  piper::terminate(piperConfig);

  // Verify that file has some data