Added some details.

2026-04-19 06:44:50 +00:00 · 2023-05-25 22:05:54 -05:00
parent 8c9685880a
commit d4535a09f6
1 changed files with 22 additions and 4 deletions
--- a/notebooks/piper_multilingual_training_notebook.ipynb
+++ b/notebooks/piper_multilingual_training_notebook.ipynb
@@ -5,7 +5,7 @@
    "colab": {
      "provenance": [],
      "gpuType": "T4",
-      "authorship_tag": "ABX9TyOhGmWaOcJ8eRFW1QnG2XyK",
+      "authorship_tag": "ABX9TyMwuqoJOxNquo9tfPltH+71",
      "include_colab_link": true
    },
    "kernelspec": {
@@ -138,12 +138,21 @@
      "cell_type": "code",
      "source": [
        "#@markdown ## 1. Extract dataset\n",
+        "#@markdown <font color=\"orange\">**Important: the audios must be in wav format, (16000 or 22050hz, 16-bits, mono), and, for convenience, numbered. Example:**\n",
+        "\n",
+        "#@markdown * <font color=\"orange\">**1.wav**</font>\n",
+        "#@markdown * <font color=\"orange\">**2.wav**</font>\n",
+        "#@markdown * <font color=\"orange\">**3.wav**</font>\n",
+        "#@markdown * <font color=\"orange\">**.....**</font>\n",
+        "\n",
+        "#@markdown ---\n",
+        "\n",
        "%cd /content\n",
        "!mkdir /content/dataset\n",
        "%cd /content/dataset\n",
        "!mkdir /content/dataset/wavs\n",
        "#@markdown ### Audio dataset path to unzip\n",
-        "zip_path = \"/content/drive/MyDrive/Fakeyou/aldEnhanced/wavs.zip\" #@param {type:\"string\"}\n",
+        "zip_path = \"/content/drive/MyDrive/wavs.zip\" #@param {type:\"string\"}\n",
        "!unzip \"{zip_path}\" -d /content/dataset/wavs\n",
        "#@markdown ---"
      ],
@@ -158,6 +167,14 @@
      "cell_type": "code",
      "source": [
        "#@markdown ## 2. Upload the transcript file\n",
+        "#@markdown Important: the transcription means writing what the character says in each of the audios, and it must have the following structure:\n",
+        "\n",
+        "#@markdown * wavs/1.wav|This is what my character says in audio 1.\n",
+        "#@markdown * wavs/2.wav|This, the text that the character says in audio 2.\n",
+        "#@markdown * ...............\n",
+        "\n",
+        "#@markdown And so on. In addition, the transcript must be in a .csv format (UTF8 without bom)\n",
+        "\n",
        "%cd /content/dataset\n",
        "from google.colab import files\n",
        "!rm /content/dataset/metadata.csv\n",
@@ -275,7 +292,7 @@
        "        raise Exception(\"The pretrained_models.json file was not found.\")\n",
        "else:\n",
        "    ft_command = \"\"\n",
-        "#@makrdown ### Choose batch size based on this dataset\n",
+        "#@markdown ### Choose batch size based on this dataset\n",
        "batch_size = 16 #@param {type:\"integer\"}\n",
        "#@markdown ---\n",
        "#@markdown ### Validation split\n",
@@ -343,7 +360,8 @@
        "    --precision 32"
      ],
      "metadata": {
-        "id": "X4zbSjXg2J3N"
+        "id": "X4zbSjXg2J3N",
+        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []