Add phoneme-silence

This commit is contained in:
Michael Hansen
2023-07-31 15:32:02 -05:00
parent dcb4c828cd
commit bd80cba1f3
3 changed files with 114 additions and 21 deletions

View File

@@ -3,6 +3,7 @@
#include <fstream>
#include <functional>
#include <map>
#include <optional>
#include <string>
#include <vector>
@@ -49,14 +50,22 @@ struct PhonemizeConfig {
};
struct SynthesisConfig {
// VITS inference settings
float noiseScale = 0.667f;
float lengthScale = 1.0f;
float noiseW = 0.8f;
// Audio settings
int sampleRate = 22050;
int sampleWidth = 2; // 16-bit
int channels = 1; // mono
// Speaker id from 0 to numSpeakers - 1
std::optional<SpeakerId> speakerId;
// Extra silence
float sentenceSilenceSeconds = 0.2f;
std::optional<std::map<piper::Phoneme, float>> phonemeSilenceSeconds;
};
struct ModelConfig {
@@ -89,6 +98,12 @@ struct Voice {
ModelSession session;
};
// True if the string is a single UTF-8 codepoint
bool isSingleCodepoint(std::string s);
// Get the first UTF-8 codepoint of a string
Phoneme getCodepoint(std::string s);
// Get version of Piper
std::string getVersion();