diff --git a/notebooks/piper_multilingual_training_notebook.ipynb b/notebooks/piper_multilingual_training_notebook.ipynb index a6b6864..16e7bbf 100644 --- a/notebooks/piper_multilingual_training_notebook.ipynb +++ b/notebooks/piper_multilingual_training_notebook.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "gpuType": "T4", - "authorship_tag": "ABX9TyOhGmWaOcJ8eRFW1QnG2XyK", + "authorship_tag": "ABX9TyMwuqoJOxNquo9tfPltH+71", "include_colab_link": true }, "kernelspec": { @@ -138,12 +138,21 @@ "cell_type": "code", "source": [ "#@markdown ## 1. Extract dataset\n", + "#@markdown **Important: the audios must be in wav format, (16000 or 22050hz, 16-bits, mono), and, for convenience, numbered. Example:**\n", + "\n", + "#@markdown * **1.wav**\n", + "#@markdown * **2.wav**\n", + "#@markdown * **3.wav**\n", + "#@markdown * **.....**\n", + "\n", + "#@markdown ---\n", + "\n", "%cd /content\n", "!mkdir /content/dataset\n", "%cd /content/dataset\n", "!mkdir /content/dataset/wavs\n", "#@markdown ### Audio dataset path to unzip\n", - "zip_path = \"/content/drive/MyDrive/Fakeyou/aldEnhanced/wavs.zip\" #@param {type:\"string\"}\n", + "zip_path = \"/content/drive/MyDrive/wavs.zip\" #@param {type:\"string\"}\n", "!unzip \"{zip_path}\" -d /content/dataset/wavs\n", "#@markdown ---" ], @@ -158,6 +167,14 @@ "cell_type": "code", "source": [ "#@markdown ## 2. Upload the transcript file\n", + "#@markdown Important: the transcription means writing what the character says in each of the audios, and it must have the following structure:\n", + "\n", + "#@markdown * wavs/1.wav|This is what my character says in audio 1.\n", + "#@markdown * wavs/2.wav|This, the text that the character says in audio 2.\n", + "#@markdown * ...............\n", + "\n", + "#@markdown And so on. In addition, the transcript must be in a .csv format (UTF8 without bom)\n", + "\n", "%cd /content/dataset\n", "from google.colab import files\n", "!rm /content/dataset/metadata.csv\n", @@ -275,7 +292,7 @@ " raise Exception(\"The pretrained_models.json file was not found.\")\n", "else:\n", " ft_command = \"\"\n", - "#@makrdown ### Choose batch size based on this dataset\n", + "#@markdown ### Choose batch size based on this dataset\n", "batch_size = 16 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### Validation split\n", @@ -343,7 +360,8 @@ " --precision 32" ], "metadata": { - "id": "X4zbSjXg2J3N" + "id": "X4zbSjXg2J3N", + "cellView": "form" }, "execution_count": null, "outputs": []