From cd0763928a85b2845559921109dd1440ab41e8da Mon Sep 17 00:00:00 2001 From: Mateo Cedillo <54605382+rmcpantoja@users.noreply.github.com> Date: Sat, 10 Jun 2023 19:49:45 -0500 Subject: [PATCH] Updated training notebook to support HF models by @rhasspy. --- ...piper_multilingual_training_notebook.ipynb | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/notebooks/piper_multilingual_training_notebook.ipynb b/notebooks/piper_multilingual_training_notebook.ipynb index 4001140..63fc97a 100644 --- a/notebooks/piper_multilingual_training_notebook.ipynb +++ b/notebooks/piper_multilingual_training_notebook.ipynb @@ -205,7 +205,7 @@ "\n", "import os\n", "#@markdown ### First of all, select the language of your dataset.\n", - "language = \"English (U.S.)\" #@param [\"Català\", \"Dansk\", \"Deutsch\", \"Ελληνικά\", \"English (British)\", \"English (U.S.)\", \"Español\", \"Suomi\", \"Français\", \"Icelandic\", \"Italiano\", \"қазақша\", \"नेपाली\", \"Nederlands\", \"Norsk\", \"Polski\", \"Português (Brasil)\", \"Русский\", \"Svenska\", \"украї́нська\", \"Tiếng Việt\", \"简体中文\"]\n", + "language = \"English (U.S.)\" #@param [\"Català\", \"Dansk\", \"Deutsch\", \"Ελληνικά\", \"English (British)\", \"English (U.S.)\", \"Español\", \"Suomi\", \"Français\", \"ქართული\", \"Icelandic\", \"Italiano\", \"қазақша\", \"नेपाली\", \"Nederlands\", \"Norsk\", \"Polski\", \"Português (Brasil)\", \"Русский\", \"Svenska\", \"украї́нська\", \"Tiếng Việt\", \"简体中文\"]\n", "#@markdown ---\n", "# language definition:\n", "languages = {\n", @@ -220,6 +220,7 @@ " \"Français\": \"fr\",\n", " \"Icelandic\": \"is\",\n", " \"Italiano\": \"it\",\n", + " \"ქართული\": \"ka\",\n", " \"қазақша\": \"kk\",\n", " \"नेपाली\": \"ne\",\n", " \"Nederlands\": \"nl\",\n", @@ -272,7 +273,6 @@ " {force_sp}" ], "metadata": { - "cellView": "form", "id": "dOyx9Y6JYvRF" }, "execution_count": null, @@ -302,8 +302,12 @@ " def download_model(btn):\n", " model_name = model_dropdown.value\n", " model_url = pretrained_models[final_language][model_name]\n", - " !gdown \"{model_url}\" -O \"/content/pretrained.ckpt\"\n", - "\n", + " if model_url.startswith(\"1\"):\n", + " !gdown \"{model_url}\" -O \"/content/pretrained.ckpt\"\n", + " elif model_url.startswith(\"https://drive.google.com/file/d/\"):\n", + " !gdown \"{model_url}\" -O \"/content/pretrained.ckpt\" --fuzzy\n", + " else:\n", + " !wget \"{model_url}\" -O \"/content/pretrained.ckpt\"\n", " download_button.on_click(download_model)\n", " display(model_dropdown, download_button)\n", " else:\n", @@ -316,7 +320,7 @@ "batch_size = 16 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### Validation split:\n", - "validation_split = 0.03 #@param {type:\"number\"}\n", + "validation_split = 0.01 #@param {type:\"number\"}\n", "#@markdown ---\n", "#@markdown ### Choose the quality for this model:\n", "\n", @@ -327,6 +331,7 @@ "quality = \"x-low\" #@param [\"high\", \"x-low\", \"medium\"]\n", "#@markdown ---\n", "#@markdown ### For how many epochs to save training checkpoints?\n", + "#@markdown The larger your dataset, you should set this saving interval to a smaller value, as epochs can progress longer time.\n", "checkpoint_epochs = 25 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### Step interval to generate model samples:\n", @@ -337,8 +342,8 @@ "#@markdown ---" ], "metadata": { - "cellView": "form", - "id": "ickQlOCRjkBL" + "id": "ickQlOCRjkBL", + "cellView": "form" }, "execution_count": null, "outputs": []