diff --git a/notebooks/pretrained_models.json b/notebooks/pretrained_models.json index a30173d..44ee547 100644 --- a/notebooks/pretrained_models.json +++ b/notebooks/pretrained_models.json @@ -14,15 +14,18 @@ }, "en-gb": { "alan-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/alan/medium/epoch%3D6339-step%3D1647790.ckpt", + "alba-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/alba/medium/epoch%3D4179-step%3D2101090.ckpt", "aru-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/aru/medium/epoch%3D3479-step%3D939600.ckpt", - "northern_english_male-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/northern_english_male/medium/epoch%3D9029-step%3D2261720.ckpt" + "jenny_dioco-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/jenny_dioco/medium/epoch%3D2748-step%3D1729300.ckpt", + "northern_english_male-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/northern_english_male/medium/epoch%3D9029-step%3D2261720.ckpt", + "vctk-medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_GB/vctk/medium/epoch%3D545-step%3D1511328.ckpt" }, "en-us": { "amy_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/amy/medium/epoch%3D6679-step%3D1554200.ckpt", "arctic_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/arctic/medium/epoch%3D663-step%3D646736.ckpt", "joe_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/joe/medium/epoch%3D7889-step%3D1221224.ckpt", "kusal_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/kusal/medium/epoch%3D2652-step%3D1953828.ckpt", - "l2arctic_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/blob/main/en/en_US/l2arctic/medium/epoch%3D536-step%3D902160.ckpt", + "l2arctic_medium (fine-tuned)": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/l2arctic/medium/epoch%3D536-step%3D902160.ckpt", "lessac-high": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/lessac/high/epoch%3D2218-step%3D838782.ckpt", "lessac-low": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/lessac/low/epoch%3D2307-step%3D558536.ckpt", "lessac-medium": "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/lessac/medium/epoch%3D2164-step%3D1355540.ckpt", diff --git a/voices/voice-es-mari-medium/es-mari-medium.onnx b/voices/voice-es-mari-medium/es-mari-medium.onnx new file mode 100644 index 0000000..1e8d8ca Binary files /dev/null and b/voices/voice-es-mari-medium/es-mari-medium.onnx differ diff --git a/voices/voice-es-mari-medium/es-mari-medium.onnx.json b/voices/voice-es-mari-medium/es-mari-medium.onnx.json new file mode 100644 index 0000000..91370c8 --- /dev/null +++ b/voices/voice-es-mari-medium/es-mari-medium.onnx.json @@ -0,0 +1,482 @@ +{ + "audio": { + "sample_rate": 22050 + }, + "espeak": { + "voice": "es" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "PhonemeType.ESPEAK", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 1, + "speaker_id_map": {} +} \ No newline at end of file diff --git a/voices/voice-es-sharvard-medium/MODEL_CARD b/voices/voice-es-sharvard-medium/MODEL_CARD new file mode 100644 index 0000000..2364172 --- /dev/null +++ b/voices/voice-es-sharvard-medium/MODEL_CARD @@ -0,0 +1,16 @@ +# Model card for sharvard (medium) + +* Language: es_ES (Spanish, Spain) +* Speakers: 2 +* Quality: medium +* Samplerate: 22,050Hz + +## Dataset + +* URL: https://datashare.ed.ac.uk/handle/10283/574 +* License: http://creativecommons.org/licenses/by/3.0/ +* Paper: https://www.tandfonline.com/doi/abs/10.3109/14992027.2014.907507 + +## Training + +Finetuned from U.S. English lessac voice (medium quality). \ No newline at end of file diff --git a/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx b/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx new file mode 100644 index 0000000..77ca62d Binary files /dev/null and b/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx differ diff --git a/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx.json b/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx.json new file mode 100644 index 0000000..53a983b --- /dev/null +++ b/voices/voice-es-sharvard-medium/es-sharvard-medium.onnx.json @@ -0,0 +1,485 @@ +{ + "audio": { + "sample_rate": 22050 + }, + "espeak": { + "voice": "es" + }, + "inference": { + "noise_scale": 0.667, + "length_scale": 1, + "noise_w": 0.8 + }, + "phoneme_type": "PhonemeType.ESPEAK", + "phoneme_map": {}, + "phoneme_id_map": { + "_": [ + 0 + ], + "^": [ + 1 + ], + "$": [ + 2 + ], + " ": [ + 3 + ], + "!": [ + 4 + ], + "'": [ + 5 + ], + "(": [ + 6 + ], + ")": [ + 7 + ], + ",": [ + 8 + ], + "-": [ + 9 + ], + ".": [ + 10 + ], + ":": [ + 11 + ], + ";": [ + 12 + ], + "?": [ + 13 + ], + "a": [ + 14 + ], + "b": [ + 15 + ], + "c": [ + 16 + ], + "d": [ + 17 + ], + "e": [ + 18 + ], + "f": [ + 19 + ], + "h": [ + 20 + ], + "i": [ + 21 + ], + "j": [ + 22 + ], + "k": [ + 23 + ], + "l": [ + 24 + ], + "m": [ + 25 + ], + "n": [ + 26 + ], + "o": [ + 27 + ], + "p": [ + 28 + ], + "q": [ + 29 + ], + "r": [ + 30 + ], + "s": [ + 31 + ], + "t": [ + 32 + ], + "u": [ + 33 + ], + "v": [ + 34 + ], + "w": [ + 35 + ], + "x": [ + 36 + ], + "y": [ + 37 + ], + "z": [ + 38 + ], + "æ": [ + 39 + ], + "ç": [ + 40 + ], + "ð": [ + 41 + ], + "ø": [ + 42 + ], + "ħ": [ + 43 + ], + "ŋ": [ + 44 + ], + "œ": [ + 45 + ], + "ǀ": [ + 46 + ], + "ǁ": [ + 47 + ], + "ǂ": [ + 48 + ], + "ǃ": [ + 49 + ], + "ɐ": [ + 50 + ], + "ɑ": [ + 51 + ], + "ɒ": [ + 52 + ], + "ɓ": [ + 53 + ], + "ɔ": [ + 54 + ], + "ɕ": [ + 55 + ], + "ɖ": [ + 56 + ], + "ɗ": [ + 57 + ], + "ɘ": [ + 58 + ], + "ə": [ + 59 + ], + "ɚ": [ + 60 + ], + "ɛ": [ + 61 + ], + "ɜ": [ + 62 + ], + "ɞ": [ + 63 + ], + "ɟ": [ + 64 + ], + "ɠ": [ + 65 + ], + "ɡ": [ + 66 + ], + "ɢ": [ + 67 + ], + "ɣ": [ + 68 + ], + "ɤ": [ + 69 + ], + "ɥ": [ + 70 + ], + "ɦ": [ + 71 + ], + "ɧ": [ + 72 + ], + "ɨ": [ + 73 + ], + "ɪ": [ + 74 + ], + "ɫ": [ + 75 + ], + "ɬ": [ + 76 + ], + "ɭ": [ + 77 + ], + "ɮ": [ + 78 + ], + "ɯ": [ + 79 + ], + "ɰ": [ + 80 + ], + "ɱ": [ + 81 + ], + "ɲ": [ + 82 + ], + "ɳ": [ + 83 + ], + "ɴ": [ + 84 + ], + "ɵ": [ + 85 + ], + "ɶ": [ + 86 + ], + "ɸ": [ + 87 + ], + "ɹ": [ + 88 + ], + "ɺ": [ + 89 + ], + "ɻ": [ + 90 + ], + "ɽ": [ + 91 + ], + "ɾ": [ + 92 + ], + "ʀ": [ + 93 + ], + "ʁ": [ + 94 + ], + "ʂ": [ + 95 + ], + "ʃ": [ + 96 + ], + "ʄ": [ + 97 + ], + "ʈ": [ + 98 + ], + "ʉ": [ + 99 + ], + "ʊ": [ + 100 + ], + "ʋ": [ + 101 + ], + "ʌ": [ + 102 + ], + "ʍ": [ + 103 + ], + "ʎ": [ + 104 + ], + "ʏ": [ + 105 + ], + "ʐ": [ + 106 + ], + "ʑ": [ + 107 + ], + "ʒ": [ + 108 + ], + "ʔ": [ + 109 + ], + "ʕ": [ + 110 + ], + "ʘ": [ + 111 + ], + "ʙ": [ + 112 + ], + "ʛ": [ + 113 + ], + "ʜ": [ + 114 + ], + "ʝ": [ + 115 + ], + "ʟ": [ + 116 + ], + "ʡ": [ + 117 + ], + "ʢ": [ + 118 + ], + "ʲ": [ + 119 + ], + "ˈ": [ + 120 + ], + "ˌ": [ + 121 + ], + "ː": [ + 122 + ], + "ˑ": [ + 123 + ], + "˞": [ + 124 + ], + "β": [ + 125 + ], + "θ": [ + 126 + ], + "χ": [ + 127 + ], + "ᵻ": [ + 128 + ], + "ⱱ": [ + 129 + ], + "0": [ + 130 + ], + "1": [ + 131 + ], + "2": [ + 132 + ], + "3": [ + 133 + ], + "4": [ + 134 + ], + "5": [ + 135 + ], + "6": [ + 136 + ], + "7": [ + 137 + ], + "8": [ + 138 + ], + "9": [ + 139 + ], + "̧": [ + 140 + ], + "̃": [ + 141 + ], + "̪": [ + 142 + ], + "̯": [ + 143 + ], + "̩": [ + 144 + ], + "ʰ": [ + 145 + ], + "ˤ": [ + 146 + ], + "ε": [ + 147 + ], + "↓": [ + 148 + ], + "#": [ + 149 + ], + "\"": [ + 150 + ], + "↑": [ + 151 + ], + "̺": [ + 152 + ], + "̻": [ + 153 + ] + }, + "num_symbols": 256, + "num_speakers": 2, + "speaker_id_map": { + "M": 0, + "F": 1 + } +} \ No newline at end of file