mirror of
https://github.com/pstrueb/piper.git
synced 2026-05-03 20:48:02 +00:00
Add --use-cuda
This commit is contained in:
@@ -19,8 +19,8 @@
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <io.h>
|
||||
#include <fcntl.h>
|
||||
#include <fcntl.h>
|
||||
#include <io.h>
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
@@ -85,6 +85,9 @@ struct RunConfig {
|
||||
|
||||
// Seconds of extra silence to insert after a single phoneme
|
||||
optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
|
||||
|
||||
// true to use CUDA execution provider
|
||||
bool useCuda = false;
|
||||
};
|
||||
|
||||
void parseArgs(int argc, char *argv[], RunConfig &runConfig);
|
||||
@@ -114,7 +117,8 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
auto startTime = chrono::steady_clock::now();
|
||||
loadVoice(piperConfig, runConfig.modelPath.string(),
|
||||
runConfig.modelConfigPath.string(), voice, runConfig.speakerId);
|
||||
runConfig.modelConfigPath.string(), voice, runConfig.speakerId,
|
||||
runConfig.useCuda);
|
||||
auto endTime = chrono::steady_clock::now();
|
||||
spdlog::info("Loaded voice in {} second(s)",
|
||||
chrono::duration<double>(endTime - startTime).count());
|
||||
@@ -314,8 +318,8 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
#ifdef _WIN32
|
||||
// Needed on Windows to avoid terminal conversions
|
||||
setmode(fileno(stdout),O_BINARY);
|
||||
setmode(fileno(stdin),O_BINARY);
|
||||
setmode(fileno(stdout), O_BINARY);
|
||||
setmode(fileno(stdin), O_BINARY);
|
||||
#endif
|
||||
|
||||
thread rawOutputThread(rawOutputProc, ref(sharedAudioBuffer),
|
||||
@@ -434,10 +438,11 @@ void printUsage(char *argv[]) {
|
||||
cerr << " --json-input stdin input is lines of JSON "
|
||||
"instead of plain text"
|
||||
<< endl;
|
||||
cerr << " --use-cuda use CUDA execution provider"
|
||||
<< endl;
|
||||
cerr << " --debug print DEBUG messages to the console"
|
||||
<< endl;
|
||||
cerr << " -q --quiet disable logging"
|
||||
<< endl;
|
||||
cerr << " -q --quiet disable logging" << endl;
|
||||
cerr << endl;
|
||||
}
|
||||
|
||||
@@ -518,6 +523,8 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
|
||||
runConfig.tashkeelModelPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "--json_input" || arg == "--json-input") {
|
||||
runConfig.jsonInput = true;
|
||||
} else if (arg == "--use_cuda" || arg == "--use-cuda") {
|
||||
runConfig.useCuda = true;
|
||||
} else if (arg == "--version") {
|
||||
std::cout << piper::getVersion() << std::endl;
|
||||
exit(0);
|
||||
|
||||
@@ -259,12 +259,18 @@ void terminate(PiperConfig &config) {
|
||||
spdlog::info("Terminated piper");
|
||||
}
|
||||
|
||||
void loadModel(std::string modelPath, ModelSession &session) {
|
||||
void loadModel(std::string modelPath, ModelSession &session, bool useCuda) {
|
||||
spdlog::debug("Loading onnx model from {}", modelPath);
|
||||
session.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,
|
||||
instanceName.c_str());
|
||||
session.env.DisableTelemetryEvents();
|
||||
|
||||
if (useCuda) {
|
||||
// Use CUDA provider
|
||||
OrtCUDAProviderOptions cuda_options{};
|
||||
session.options.AppendExecutionProvider_CUDA(cuda_options);
|
||||
}
|
||||
|
||||
// Slows down performance by ~2x
|
||||
// session.options.SetIntraOpNumThreads(1);
|
||||
|
||||
@@ -301,7 +307,7 @@ void loadModel(std::string modelPath, ModelSession &session) {
|
||||
// Load Onnx model and JSON config file
|
||||
void loadVoice(PiperConfig &config, std::string modelPath,
|
||||
std::string modelConfigPath, Voice &voice,
|
||||
std::optional<SpeakerId> &speakerId) {
|
||||
std::optional<SpeakerId> &speakerId, bool useCuda) {
|
||||
spdlog::debug("Parsing voice config at {}", modelConfigPath);
|
||||
std::ifstream modelConfigFile(modelConfigPath);
|
||||
voice.configRoot = json::parse(modelConfigFile);
|
||||
@@ -322,7 +328,7 @@ void loadVoice(PiperConfig &config, std::string modelPath,
|
||||
|
||||
spdlog::debug("Voice contains {} speaker(s)", voice.modelConfig.numSpeakers);
|
||||
|
||||
loadModel(modelPath, voice.session);
|
||||
loadModel(modelPath, voice.session, useCuda);
|
||||
|
||||
} /* loadVoice */
|
||||
|
||||
|
||||
@@ -116,7 +116,7 @@ void terminate(PiperConfig &config);
|
||||
// Load Onnx model and JSON config file
|
||||
void loadVoice(PiperConfig &config, std::string modelPath,
|
||||
std::string modelConfigPath, Voice &voice,
|
||||
std::optional<SpeakerId> &speakerId);
|
||||
std::optional<SpeakerId> &speakerId, bool useCuda);
|
||||
|
||||
// Phonemize text and synthesize audio
|
||||
void textToAudio(PiperConfig &config, Voice &voice, std::string text,
|
||||
|
||||
@@ -36,14 +36,16 @@ int main(int argc, char *argv[]) {
|
||||
auto outputPath = std::string(argv[3]);
|
||||
|
||||
optional<piper::SpeakerId> speakerId;
|
||||
loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId);
|
||||
loadVoice(piperConfig, modelPath, modelPath + ".json", voice, speakerId,
|
||||
false);
|
||||
piper::initialize(piperConfig);
|
||||
|
||||
// Output audio to WAV file
|
||||
ofstream audioFile(outputPath, ios::binary);
|
||||
|
||||
piper::SynthesisResult result;
|
||||
piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile, result);
|
||||
piper::textToWavFile(piperConfig, voice, "This is a test.", audioFile,
|
||||
result);
|
||||
piper::terminate(piperConfig);
|
||||
|
||||
// Verify that file has some data
|
||||
|
||||
Reference in New Issue
Block a user