{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4", "authorship_tag": "ABX9TyPBXwWHq64s9HTowl1s6H6d", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "markdown", "source": [ "# [Piper](https://github.com/rhasspy/piper) model exporter\n", "\n", "Notebook created by [rmcpantoja](http://github.com/rmcpantoja)" ], "metadata": { "id": "EOL-kjplZYEU" } }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "FfMKr8v2RVOm" }, "outputs": [], "source": [ "#@title Install software\n", "#@markdown **Note: Please restart the runtime environment if prompted, and then continue to the next cell.**\n", "!git clone https://github.com/rhasspy/piper\n", "%cd /content/piper/src/python\n", "!pip install --upgrade pip\n", "!pip install cython>=0.29.0 espeak-phonemizer>=1.1.0 librosa>=0.9.2 numpy>=1.19.0 pytorch-lightning~=1.7.0 torch~=1.11.0\n", "!pip install onnx onnxruntime-gpu\n", "!bash build_monotonic_align.sh\n", "!apt-get install espeak-ng\n", "!pip install torchtext==0.12.0" ] }, { "cell_type": "code", "source": [ "#@title Voice package generation section\n", "%cd /content/piper/src/python\n", "import os\n", "import ipywidgets as widgets\n", "from IPython.display import display\n", "import json\n", "from google.colab import output\n", "guideurl = \"https://github.com/rmcpantoja/piper/blob/master/notebooks/wav/en\"\n", "#@markdown #### Download:\n", "#@markdown **Google Drive model ID:**\n", "model_id = \"\" #@param {type:\"string\"}\n", "#@markdown **Config.json Google Drive ID**\n", "config_id = \"\" #@param {type:\"string\"}\n", "#@markdown ---\n", "\n", "#@markdown #### Creation process:\n", "language = \"en-us\" #@param [\"en-us\", \"es\", \"fr\"]\n", "voice_name = \"myvoice\" #@param {type:\"string\"}\n", "voice_name = voice_name.lower()\n", "quality = \"medium\" #@param [\"high\", \"low\", \"medium\", \"x-low\"]\n", "export_voice_name = f\"{language}-{voice_name}-{quality}\"\n", "export_voice_path = \"/content/project/voice-\"+export_voice_name\n", "packages_path = \"/content/project/packages\"\n", "if not os.path.exists(export_voice_path):\n", " os.makedirs(export_voice_path)\n", "if not os.path.exists(packages_path):\n", " os.makedirs(packages_path)\n", "print(\"Downloading model and his config...\")\n", "!gdown -q \"{model_id}\" -O /content/project/model.ckpt\n", "!gdown -q \"{config_id}\" -O \"{export_voice_path}/{export_voice_name}.onnx.json\"\n", "#@markdown **Do you want to write a model card?**\n", "write_model_card = True #@param {type:\"boolean\"}\n", "if write_model_card:\n", " with open(f\"{export_voice_path}/{export_voice_name}.onnx.json\", \"r\") as file:\n", " config = json.load(file)\n", " sample_rate = config[\"audio\"][\"sample_rate\"]\n", " num_speakers = config[\"num_speakers\"]\n", " output.eval_js('new Audio(f\"{guideurl}/waiting.wav?raw=true\").play()')\n", " text_area = widgets.Textarea(\n", " description = \"fill in this following template and press start to generate the voice package\",\n", " value=f'# Model card for {voice_name} ({quality})\\n\\n* Language: {language} (normaliced)\\n* Speakers: {num_speakers}\\n* Quality: {quality}\\n* Samplerate: {sample_rate}Hz\\n\\n## Dataset\\n\\n* URL: \\n* License: \\n\\n## Training\\n\\nTrained from scratch.\\nOr finetuned from: ',\n", " layout=widgets.Layout(width='500px', height='200px')\n", " )\n", " button = widgets.Button(description='Start')\n", "\n", " def create_model_card(button):\n", " model_card_text = text_area.value.strip()\n", " with open(f'{export_voice_path}/MODEL_CARD', 'w') as file:\n", " file.write(model_card_text)\n", " text_area.close()\n", " button.close()\n", " output.clear()\n", " output.eval_js('new Audio(f\"{guideurl}/starting.wav?raw=true\").play()')\n", " start_process()\n", "\n", " button.on_click(create_model_card)\n", "\n", " display(text_area, button)\n", "\n", "def start_process():\n", " !python -m piper_train.export_onnx \"/content/project/model.ckpt\" \"{export_voice_path}/{export_voice_name}.onnx\"\n", " print(\"compressing...\")\n", " !tar -czvf \"{packages_path}/voice-{export_voice_name}.tar.gz\" -C \"{export_voice_path}\" .\n", " output.eval_js('new Audio(\"{guideurl}/success.wav?raw=true\").play()')" ], "metadata": { "cellView": "form", "id": "PqcoBb26V5xA" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#@title Download generated voice package\n", "from google.colab import files\n", "files.download(f\"{packages_path}/voice-{export_voice_name}.tar.gz\")" ], "metadata": { "cellView": "form", "id": "Hu3V9CJeWc4Y" }, "execution_count": null, "outputs": [] } ] }