diff --git a/notebooks/piper_model_exporter.ipynb b/notebooks/piper_model_exporter.ipynb index 7614b7b..fbb7a0f 100644 --- a/notebooks/piper_model_exporter.ipynb +++ b/notebooks/piper_model_exporter.ipynb @@ -5,7 +5,7 @@ "colab": { "provenance": [], "gpuType": "T4", - "authorship_tag": "ABX9TyO3vQc6aE4AHf3HNbBfmr19", + "authorship_tag": "ABX9TyNIYQxa0c4fzWAKq19kBp4a", "include_colab_link": true }, "kernelspec": { @@ -41,12 +41,75 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "cellView": "form", - "id": "FfMKr8v2RVOm" + "id": "FfMKr8v2RVOm", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1075781c-3830-47eb-89d3-9a88d0730659" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Installing...\n", + "/content/piper/src/python\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "torchaudio 2.0.2+cu118 requires torch==2.0.1, but you have torch 1.11.0 which is incompatible.\n", + "torchdata 0.6.1 requires torch==2.0.1, but you have torch 1.11.0 which is incompatible.\n", + "torchtext 0.15.2 requires torch==2.0.1, but you have torch 1.11.0 which is incompatible.\n", + "torchvision 0.15.2+cu118 requires torch==2.0.1, but you have torch 1.11.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.6/14.6 MB\u001b[0m \u001b[31m93.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.2/122.2 MB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.8/86.8 kB\u001b[0m \u001b[31m9.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "Reading package lists... Done\n", + "Building dependency tree \n", + "Reading state information... Done\n", + "The following additional packages will be installed:\n", + " espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n", + "The following NEW packages will be installed:\n", + " espeak-ng espeak-ng-data libespeak-ng1 libpcaudio0 libsonic0\n", + "0 upgraded, 5 newly installed, 0 to remove and 38 not upgraded.\n", + "Need to get 4,215 kB of archives.\n", + "After this operation, 12.0 MB of additional disk space will be used.\n", + "Get:1 http://archive.ubuntu.com/ubuntu focal/main amd64 libpcaudio0 amd64 1.1-4 [7,908 B]\n", + "Get:2 http://archive.ubuntu.com/ubuntu focal/main amd64 libsonic0 amd64 0.2.0-8 [13.1 kB]\n", + "Get:3 http://archive.ubuntu.com/ubuntu focal/main amd64 espeak-ng-data amd64 1.50+dfsg-6 [3,682 kB]\n", + "Get:4 http://archive.ubuntu.com/ubuntu focal/main amd64 libespeak-ng1 amd64 1.50+dfsg-6 [189 kB]\n", + "Get:5 http://archive.ubuntu.com/ubuntu focal/universe amd64 espeak-ng amd64 1.50+dfsg-6 [322 kB]\n", + "Fetched 4,215 kB in 1s (4,589 kB/s)\n", + "Selecting previously unselected package libpcaudio0:amd64.\n", + "(Reading database ... 122541 files and directories currently installed.)\n", + "Preparing to unpack .../libpcaudio0_1.1-4_amd64.deb ...\n", + "Unpacking libpcaudio0:amd64 (1.1-4) ...\n", + "Selecting previously unselected package libsonic0:amd64.\n", + "Preparing to unpack .../libsonic0_0.2.0-8_amd64.deb ...\n", + "Unpacking libsonic0:amd64 (0.2.0-8) ...\n", + "Selecting previously unselected package espeak-ng-data:amd64.\n", + "Preparing to unpack .../espeak-ng-data_1.50+dfsg-6_amd64.deb ...\n", + "Unpacking espeak-ng-data:amd64 (1.50+dfsg-6) ...\n", + "Selecting previously unselected package libespeak-ng1:amd64.\n", + "Preparing to unpack .../libespeak-ng1_1.50+dfsg-6_amd64.deb ...\n", + "Unpacking libespeak-ng1:amd64 (1.50+dfsg-6) ...\n", + "Selecting previously unselected package espeak-ng.\n", + "Preparing to unpack .../espeak-ng_1.50+dfsg-6_amd64.deb ...\n", + "Unpacking espeak-ng (1.50+dfsg-6) ...\n", + "Setting up libpcaudio0:amd64 (1.1-4) ...\n", + "Setting up libsonic0:amd64 (0.2.0-8) ...\n", + "Setting up espeak-ng-data:amd64 (1.50+dfsg-6) ...\n", + "Setting up libespeak-ng1:amd64 (1.50+dfsg-6) ...\n", + "Setting up espeak-ng (1.50+dfsg-6) ...\n", + "Processing triggers for man-db (2.9.1-1) ...\n", + "Processing triggers for libc-bin (2.31-0ubuntu9.9) ...\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m74.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDone!\n" + ] + } + ], "source": [ "#@title Install software\n", "\n", @@ -88,6 +151,15 @@ "voice_name = \"myvoice\" #@param {type:\"string\"}\n", "voice_name = voice_name.lower()\n", "quality = \"medium\" #@param [\"high\", \"low\", \"medium\", \"x-low\"]\n", + "def start_process():\n", + " if not os.path.exists(\"/content/project/model.ckpt\"):\n", + " raise Exception(\"Could not download model! make sure the file is shareable to everyone\")\n", + " !python -m piper_train.export_onnx \"/content/project/model.ckpt\" \"{export_voice_path}/{export_voice_name}.onnx\"\n", + " print(\"compressing...\")\n", + " !tar -czvf \"{packages_path}/voice-{export_voice_name}.tar.gz\" -C \"{export_voice_path}\" .\n", + " output.eval_js(f'new Audio(\"{guideurl}/success.wav?raw=true\").play()')\n", + " print(\"Done!\")\n", + "\n", "export_voice_name = f\"{language}-{voice_name}-{quality}\"\n", "export_voice_path = \"/content/project/voice-\"+export_voice_name\n", "packages_path = \"/content/project/packages\"\n", @@ -99,7 +171,7 @@ "if model_id.startswith(\"1\"):\n", " !gdown -q \"{model_id}\" -O /content/project/model.ckpt\n", "elif model_id.startswith(\"https://drive.google.com/file/d/\"):\n", - " !gdown \"{model_id}\" -O \"/content/project/model.ckpt\" --fuzzy\n", + " !gdown -q \"{model_id}\" -O \"/content/project/model.ckpt\" --fuzzy\n", "else:\n", " !wget \"{model_id}\" -O \"/content/project/model.ckpt\"\n", "if config_id.startswith(\"1\"):\n", @@ -109,7 +181,7 @@ "else:\n", " !wget \"{config_id}\" -O \"{export_voice_path}/{export_voice_name}.onnx.json\"\n", "#@markdown **Do you want to write a model card?**\n", - "write_model_card = True #@param {type:\"boolean\"}\n", + "write_model_card = False #@param {type:\"boolean\"}\n", "if write_model_card:\n", " with open(f\"{export_voice_path}/{export_voice_name}.onnx.json\", \"r\") as file:\n", " config = json.load(file)\n", @@ -137,23 +209,96 @@ "\n", " display(text_area, button)\n", "else:\n", - " start_process()\n", - "\n", - "def start_process():\n", - " if not os.path.exists(\"/content/project/model.ckpt\"):\n", - " raise Exception(\"Could not download model! make sure the file is shareable to everyone\")\n", - " !python -m piper_train.export_onnx \"/content/project/model.ckpt\" \"{export_voice_path}/{export_voice_name}.onnx\"\n", - " print(\"compressing...\")\n", - " !tar -czvf \"{packages_path}/voice-{export_voice_name}.tar.gz\" -C \"{export_voice_path}\" .\n", - " output.eval_js(f'new Audio(\"{guideurl}/success.wav?raw=true\").play()')\n", - " print(\"Done!\")" + " start_process()" ], "metadata": { "cellView": "form", - "id": "PqcoBb26V5xA" + "id": "PqcoBb26V5xA", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "9284cce3-2c84-4abc-e9d3-8e6dcf569cd8" }, - "execution_count": null, - "outputs": [] + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/piper/src/python\n", + "Downloading model and his config...\n", + "/usr/local/lib/python3.10/dist-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: '/usr/local/lib/python3.10/dist-packages/torchvision/image.so: undefined symbol: _ZN3c104impl8GPUTrace13gpuTraceStateE'If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?\n", + " warn(\n", + "INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /tmp/tmps308psa1\n", + "INFO:torch.distributed.nn.jit.instantiator:Writing /tmp/tmps308psa1/_remote_module_non_sriptable.py\n", + "Removing weight norm...\n", + "/content/piper/src/python/piper_train/vits/attentions.py:235: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " t_s == t_t\n", + "/content/piper/src/python/piper_train/vits/attentions.py:295: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " pad_length = max(length - (self.window_size + 1), 0)\n", + "/content/piper/src/python/piper_train/vits/attentions.py:296: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " slice_start_position = max((self.window_size + 1) - length, 0)\n", + "/content/piper/src/python/piper_train/vits/attentions.py:298: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if pad_length > 0:\n", + "/content/piper/src/python/piper_train/vits/transforms.py:174: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " assert (discriminant >= 0).all(), discriminant\n", + "/usr/local/lib/python3.10/dist-packages/torch/onnx/symbolic_helper.py:719: UserWarning: allowzero=0 by default. In order to honor zero value in shape use allowzero=1\n", + " warnings.warn(\"allowzero=0 by default. In order to honor zero value in shape use allowzero=1\")\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Warning: Constant folding - Only steps=1 can be constant folded for opset >= 10 onnx::Slice op. Constant folding not applied.\n", + "Traceback (most recent call last):\n", + " File \"/usr/lib/python3.10/runpy.py\", line 196, in _run_module_as_main\n", + " return _run_code(code, main_globals, None,\n", + " File \"/usr/lib/python3.10/runpy.py\", line 86, in _run_code\n", + " exec(code, run_globals)\n", + " File \"/content/piper/src/python/piper_train/export_onnx.py\", line 109, in \n", + " main()\n", + " File \"/content/piper/src/python/piper_train/export_onnx.py\", line 88, in main\n", + " torch.onnx.export(\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/__init__.py\", line 305, in export\n", + " return utils.export(model, args, f, export_params, verbose, training,\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/utils.py\", line 118, in export\n", + " _export(model, args, f, export_params, verbose, training, input_names, output_names,\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/utils.py\", line 719, in _export\n", + " _model_to_graph(model, args, verbose, input_names,\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/utils.py\", line 503, in _model_to_graph\n", + " graph = _optimize_graph(graph, operator_export_type,\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/utils.py\", line 232, in _optimize_graph\n", + " graph = torch._C._jit_pass_onnx(graph, operator_export_type)\n", + " File \"/usr/local/lib/python3.10/dist-packages/torch/onnx/__init__.py\", line 352, in _run_symbolic_function\n", + " def _run_symbolic_function(*args, **kwargs):\n", + "KeyboardInterrupt\n", + "^C\n", + "compressing...\n", + "./\n", + "./es-optimusbeta-medium.onnx.json\n", + "Done!\n" + ] + } + ] }, { "cell_type": "code", @@ -174,15 +319,29 @@ " if not os.path.exists(voicepacks_folder):\n", " os.makedirs(voicepacks_folder)\n", " !cp \"{packages_path}/voice-{export_voice_name}.tar.gz\" \"{voicepacks_folder}\"\n", - " msg = f\"You can find the generated voice packet at: {voicepacks_folder}.\"\n", + " msg = f\"You can find the generated voice package at: {voicepacks_folder}.\"\n", "print(f\"Done! {msg}\")" ], "metadata": { "cellView": "form", - "id": "Hu3V9CJeWc4Y" + "id": "Hu3V9CJeWc4Y", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "f2982574-cf81-45a1-8332-25aeee56d75a" }, "execution_count": null, - "outputs": [] + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Exporting package...\n", + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n", + "Done! You can find the generated voice package at: /content/drive/MyDrive/piper voice packages.\n" + ] + } + ] }, { "cell_type": "markdown",