diff --git a/notebooks/piper_multilingual_training_notebook.ipynb b/notebooks/piper_multilingual_training_notebook.ipynb index cb96aac..a6b6864 100644 --- a/notebooks/piper_multilingual_training_notebook.ipynb +++ b/notebooks/piper_multilingual_training_notebook.ipynb @@ -4,7 +4,8 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyPovMyxp8xorYRHeQp1RAP2", + "gpuType": "T4", + "authorship_tag": "ABX9TyOhGmWaOcJ8eRFW1QnG2XyK", "include_colab_link": true }, "kernelspec": { @@ -13,7 +14,9 @@ }, "language_info": { "name": "python" - } + }, + "accelerator": "GPU", + "gpuClass": "standard" }, "cells": [ { @@ -140,7 +143,7 @@ "%cd /content/dataset\n", "!mkdir /content/dataset/wavs\n", "#@markdown ### Audio dataset path to unzip\n", - "zip_path = \"/content/drive/MyDrive/Fakeyou/odal_castilian/wavs16k.zip\" #@param {type:\"string\"}\n", + "zip_path = \"/content/drive/MyDrive/Fakeyou/aldEnhanced/wavs.zip\" #@param {type:\"string\"}\n", "!unzip \"{zip_path}\" -d /content/dataset/wavs\n", "#@markdown ---" ], @@ -221,7 +224,7 @@ "dataset_format = \"ljspeech\" #@param [\"ljspeech\", \"mycroft\"]\n", "#@markdown ---\n", "#@markdown ### Select the sample rate of the dataset\n", - "sample_rate = \"16000\" #@param [\"16000\", \"22050\"]\n", + "sample_rate = \"22050\" #@param [\"16000\", \"22050\"]\n", "#@markdown ---\n", "%cd /content/piper/src/python\n", "!python -m piper_train.preprocess \\\n", @@ -252,17 +255,17 @@ "if finetune:\n", " ft_command = '--resume_from_checkpoint \"/content/pretrained.ckpt\" '\n", " try:\n", - " with open('/CONTENT/PIPER/NOTEBOOKS/pretrained_models.json') as f:\n", + " with open('/content/piper/notebooks/pretrained_models.json') as f:\n", " pretrained_models = json.load(f)\n", " if final_language in pretrained_models:\n", " models = pretrained_models[final_language]\n", - " model_options = [(model_name, model_url) for model_name, model_url in models.items()]\n", + " model_options = [(model_name, model_name) for model_name, model_url in models.items()]\n", " model_dropdown = widgets.Dropdown(description = \"Choose pretrained model\", options=model_options)\n", " download_button = widgets.Button(description=\"Download\")\n", " def download_model(btn):\n", - " model_name, model_url = model_dropdown.value\n", - " file_id = model_url.split('/')[-2]\n", - " !gdown {file_id} -O \"/content/pretrained.ckpt\"\n", + " model_name = model_dropdown.value\n", + " model_url = pretrained_models[final_language][model_name]\n", + " !gdown \"{model_url}\" -O \"/content/pretrained.ckpt\"\n", "\n", " download_button.on_click(download_model)\n", " display(model_dropdown, download_button)\n", @@ -273,7 +276,7 @@ "else:\n", " ft_command = \"\"\n", "#@makrdown ### Choose batch size based on this dataset\n", - "batch_size = 8 #@param {type:\"integer\"}\n", + "batch_size = 16 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### Validation split\n", "validation_split = 0.03 #@param {type:\"number\"}\n", @@ -284,13 +287,13 @@ "#@markdown * low - 16Khz audio, 15-20M params\n", "#@markdown * medium - 22.05Khz audio, 15-20 params\n", "#@markdown * high - 22.05Khz audio, 28-32M params\n", - "quality = \"x-low\" #@param [\"high\", \"low\", \"medium\", \"x-low\"]\n", + "quality = \"x-low\" #@param [\"high\", \"x-low\", \"medium\"]\n", "#@markdown ---\n", "#@markdown ### For how many steps to save training checkpoints?\n", "checkpoint_epochs = 25 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### Step interval to generate model samples\n", - "log_every_n_steps = 1000 #@param {type:\"integer\"}\n", + "log_every_n_steps = 250 #@param {type:\"integer\"}\n", "#@markdown ---\n", "#@markdown ### training epochs\n", "max_epochs = 5000 #@param {type:\"integer\"}\n", @@ -336,11 +339,10 @@ " --num_sanity_val_steps 1000 \\\n", " --log_every_n_steps {log_every_n_steps} \\\n", " --max_epochs {max_epochs} \\\n", - " {ft-command}\\\n", + " {ft_command}\\\n", " --precision 32" ], "metadata": { - "cellView": "form", "id": "X4zbSjXg2J3N" }, "execution_count": null,