mirror of
https://github.com/pstrueb/piper.git
synced 2026-06-02 09:57:02 +00:00
Add more config options
This commit is contained in:
+22
-9
@@ -36,6 +36,7 @@ struct RunConfig {
|
||||
optional<float> noiseScale;
|
||||
optional<float> lengthScale;
|
||||
optional<float> noiseW;
|
||||
optional<float> sentenceSilenceSeconds;
|
||||
};
|
||||
|
||||
void parseArgs(int argc, char *argv[], RunConfig &runConfig);
|
||||
@@ -94,6 +95,11 @@ int main(int argc, char *argv[]) {
|
||||
voice.synthesisConfig.noiseW = runConfig.noiseW.value();
|
||||
}
|
||||
|
||||
if (runConfig.sentenceSilenceSeconds) {
|
||||
voice.synthesisConfig.sentenceSilenceSeconds =
|
||||
runConfig.sentenceSilenceSeconds.value();
|
||||
}
|
||||
|
||||
if (runConfig.outputType == OUTPUT_DIRECTORY) {
|
||||
runConfig.outputPath = filesystem::absolute(runConfig.outputPath.value());
|
||||
cerr << "Output directory: " << runConfig.outputPath.value() << endl;
|
||||
@@ -234,11 +240,14 @@ void printUsage(char *argv[]) {
|
||||
"becomes available"
|
||||
<< endl;
|
||||
cerr << " -s NUM --speaker NUM id of speaker (default: 0)" << endl;
|
||||
cerr << " --noise-scale NUM generator noise (default: 0.667)"
|
||||
cerr << " --noise_scale NUM generator noise (default: 0.667)"
|
||||
<< endl;
|
||||
cerr << " --length-scale NUM phoneme length (default: 1.0)"
|
||||
cerr << " --length_scale NUM phoneme length (default: 1.0)"
|
||||
<< endl;
|
||||
cerr << " --noise-w NUM phonene width noise (default: 0.8)"
|
||||
cerr << " --noise_w NUM phoneme width noise (default: 0.8)"
|
||||
<< endl;
|
||||
cerr << " --silence_seconds NUM seconds of silence after each "
|
||||
"sentence (default: 0.2)"
|
||||
<< endl;
|
||||
cerr << endl;
|
||||
}
|
||||
@@ -263,7 +272,8 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
|
||||
} else if (arg == "-c" || arg == "--config") {
|
||||
ensureArg(argc, argv, i);
|
||||
modelConfigPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "-f" || arg == "--output_file") {
|
||||
} else if (arg == "-f" || arg == "--output_file" ||
|
||||
arg == "--output-file") {
|
||||
ensureArg(argc, argv, i);
|
||||
std::string filePath = argv[++i];
|
||||
if (filePath == "-") {
|
||||
@@ -273,24 +283,27 @@ void parseArgs(int argc, char *argv[], RunConfig &runConfig) {
|
||||
runConfig.outputType = OUTPUT_FILE;
|
||||
runConfig.outputPath = filesystem::path(filePath);
|
||||
}
|
||||
} else if (arg == "-d" || arg == "--output_dir") {
|
||||
} else if (arg == "-d" || arg == "--output_dir" || arg == "output-dir") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.outputType = OUTPUT_DIRECTORY;
|
||||
runConfig.outputPath = filesystem::path(argv[++i]);
|
||||
} else if (arg == "--output_raw") {
|
||||
} else if (arg == "--output_raw" || arg == "--output-raw") {
|
||||
runConfig.outputType = OUTPUT_RAW;
|
||||
} else if (arg == "-s" || arg == "--speaker") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.speakerId = (piper::SpeakerId)stol(argv[++i]);
|
||||
} else if (arg == "--noise-scale") {
|
||||
} else if (arg == "--noise_scale" || arg == "--noise-scale") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.noiseScale = stof(argv[++i]);
|
||||
} else if (arg == "--length-scale") {
|
||||
} else if (arg == "--length_scale" || arg == "--length-scale") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.lengthScale = stof(argv[++i]);
|
||||
} else if (arg == "--noise-w") {
|
||||
} else if (arg == "--noise_w" || arg == "--noise-w") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.noiseW = stof(argv[++i]);
|
||||
} else if (arg == "--sentence_silence" || arg == "--sentence-silence") {
|
||||
ensureArg(argc, argv, i);
|
||||
runConfig.sentenceSilenceSeconds = stof(argv[++i]);
|
||||
} else if (arg == "-h" || arg == "--help") {
|
||||
printUsage(argv);
|
||||
exit(0);
|
||||
|
||||
+63
-18
@@ -18,16 +18,31 @@ const float MAX_WAV_VALUE = 32767.0f;
|
||||
|
||||
const std::string instanceName{"piper"};
|
||||
|
||||
// True if the string is a single UTF-8 codepoint
|
||||
bool isSingleCodepoint(std::string s) {
|
||||
return utf8::distance(s.begin(), s.end()) == 1;
|
||||
}
|
||||
|
||||
// Get the first UTF-8 codepoint of a string
|
||||
Phoneme getCodepoint(std::string s) {
|
||||
utf8::iterator character_iter(s.begin(), s.begin(), s.end());
|
||||
return *character_iter;
|
||||
}
|
||||
|
||||
// Load JSON config information for phonemization
|
||||
void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) {
|
||||
// {
|
||||
// "espeak": {
|
||||
// "voice": "<language code>"
|
||||
// },
|
||||
// "phoneme_type": "<espeak or text>",
|
||||
// "phoneme_map": {
|
||||
// "<from phoneme>": ["<to phoneme 1>", "<to phoneme 2>", ...]
|
||||
// },
|
||||
// "phoneme_id_map": {
|
||||
// "<phoneme>": [<id1>, <id2>, ...]
|
||||
// }
|
||||
// }
|
||||
|
||||
if (configRoot.contains("espeak")) {
|
||||
if (!phonemizeConfig.eSpeak) {
|
||||
@@ -47,7 +62,27 @@ void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
// phoneme to [id] map
|
||||
// Maps phonemes to one or more phoneme ids (required).
|
||||
if (configRoot.contains("phoneme_id_map")) {
|
||||
auto phonemeIdMapValue = configRoot["phoneme_id_map"];
|
||||
for (auto &fromPhonemeItem : phonemeIdMapValue.items()) {
|
||||
std::string fromPhoneme = fromPhonemeItem.key();
|
||||
if (!isSingleCodepoint(fromPhoneme)) {
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme id map)");
|
||||
}
|
||||
|
||||
auto fromCodepoint = getCodepoint(fromPhoneme);
|
||||
for (auto &toIdValue : fromPhonemeItem.value()) {
|
||||
PhonemeId toId = toIdValue.get<PhonemeId>();
|
||||
phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// phoneme to [phoneme] map
|
||||
// Maps phonemes to one or more other phonemes (not normally used).
|
||||
if (configRoot.contains("phoneme_map")) {
|
||||
if (!phonemizeConfig.phonemeMap) {
|
||||
phonemizeConfig.phonemeMap.emplace();
|
||||
@@ -75,28 +110,22 @@ void parsePhonemizeConfig(json &configRoot, PhonemizeConfig &phonemizeConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
// phoneme to [id] map
|
||||
if (configRoot.contains("phoneme_id_map")) {
|
||||
auto phonemeIdMapValue = configRoot["phoneme_id_map"];
|
||||
for (auto &fromPhonemeItem : phonemeIdMapValue.items()) {
|
||||
std::string fromPhoneme = fromPhonemeItem.key();
|
||||
if (!isSingleCodepoint(fromPhoneme)) {
|
||||
throw std::runtime_error(
|
||||
"Phonemes must be one codepoint (phoneme id map)");
|
||||
}
|
||||
|
||||
auto fromCodepoint = getCodepoint(fromPhoneme);
|
||||
for (auto &toIdValue : fromPhonemeItem.value()) {
|
||||
PhonemeId toId = toIdValue.get<PhonemeId>();
|
||||
phonemizeConfig.phonemeIdMap[fromCodepoint].push_back(toId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} /* parsePhonemizeConfig */
|
||||
|
||||
// Load JSON config for audio synthesis
|
||||
void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) {
|
||||
|
||||
// {
|
||||
// "audio": {
|
||||
// "sample_rate": 22050
|
||||
// },
|
||||
// "inference": {
|
||||
// "noise_scale": 0.667,
|
||||
// "length_scale": 1,
|
||||
// "noise_w": 0.8
|
||||
// }
|
||||
// }
|
||||
|
||||
if (configRoot.contains("audio")) {
|
||||
auto audioValue = configRoot["audio"];
|
||||
if (audioValue.contains("sample_rate")) {
|
||||
@@ -105,6 +134,22 @@ void parseSynthesisConfig(json &configRoot, SynthesisConfig &synthesisConfig) {
|
||||
}
|
||||
}
|
||||
|
||||
if (configRoot.contains("inference")) {
|
||||
// Overrides default inference settings
|
||||
auto inferenceValue = configRoot["inference"];
|
||||
if (inferenceValue.contains("noise_scale")) {
|
||||
synthesisConfig.noiseScale = inferenceValue.value("noise_scale", 0.667f);
|
||||
}
|
||||
|
||||
if (inferenceValue.contains("length_scale")) {
|
||||
synthesisConfig.lengthScale = inferenceValue.value("length_scale", 1.0f);
|
||||
}
|
||||
|
||||
if (inferenceValue.contains("noise_w")) {
|
||||
synthesisConfig.noiseW = inferenceValue.value("noise_w", 0.8f);
|
||||
}
|
||||
}
|
||||
|
||||
} /* parseSynthesisConfig */
|
||||
|
||||
void parseModelConfig(json &configRoot, ModelConfig &modelConfig) {
|
||||
|
||||
Reference in New Issue
Block a user