diff --git a/notebooks/piper_inference_(ONNX).ipynb b/notebooks/piper_inference_(ONNX).ipynb index 59fa6de..2253dd2 100644 --- a/notebooks/piper_inference_(ONNX).ipynb +++ b/notebooks/piper_inference_(ONNX).ipynb @@ -4,7 +4,8 @@ "metadata": { "colab": { "provenance": [], - "authorship_tag": "ABX9TyMXOC2oFb6GhBeA5INCKyHM", + "gpuType": "T4", + "authorship_tag": "ABX9TyMpLrGPwagT9tnpOUxUO8OT", "include_colab_link": true }, "kernelspec": { @@ -53,8 +54,8 @@ { "cell_type": "code", "source": [ - "#@title Install software\n", - "#@markdown The speech synthesizer and other important dependencies will be installed in this cell.\n", + "#@title Install software and settings\n", + "#@markdown The speech synthesizer and other important dependencies will be installed in this cell. But first, some settings:\n", "\n", "#@markdown #### Enhable Enhanced Accessibility?\n", "#@markdown This Enhanced Accessibility functionality is designed for the visually impaired, in which most of the interface can be used by voice guides.\n", @@ -69,6 +70,11 @@ " lang = \"es\"\n", "else:\n", " raise Exception(\"Language not supported.\")\n", + "#@markdown ---\n", + "#@markdown #### Do you want to use the GPU for inference?\n", + "\n", + "#@markdown The GPU can be enabled in the edit/notebook settings menu, and this step must be done before connecting to a runtime. The GPU can lead to a higher response speed in inference, but you can use the CPU, for example, if your colab runtime to use GPU's has been ended.\n", + "use_gpu = False #@param {type:\"boolean\"}\n", "\n", "if enhanced_accessibility:\n", " from google.colab import output\n", @@ -92,6 +98,16 @@ "sys.path.append('/content/piper/notebooks')\n", "from translator import *\n", "lan = Translator()\n", + "print(\"Checking GPU...\")\n", + "gpu_info = !nvidia-smi\n", + "if use_gpu and any('not found' in info for info in gpu_info[0].split(':')):\n", + " if enhanced_accessibility:\n", + " playaudio(\"nogpu\")\n", + " raise Exception(lan.translate(lang, \"The Use GPU checkbox is checked, but you don't have a GPU runtime.\"))\n", + "elif not use_gpu and not any('not found' in info for info in gpu_info[0].split(':')):\n", + " if enhanced_accessibility:\n", + " playaudio(\"gpuavailable\")\n", + " raise Exception(lan.translate(lang, \"The Use GPU checkbox is unchecked, however you are using a GPU runtime environment. We recommend you check the checkbox to use GPU to take advantage of it.\"))\n", "\n", "if enhanced_accessibility:\n", " playaudio(\"installed\")\n", @@ -113,6 +129,10 @@ "#@markdown #### ID or link of the voice package (tar.gz format):\n", "package_url_or_id = \"\" #@param {type:\"string\"}\n", "#@markdown ---\n", + "if package_url_or_id == \"\" or package_url_or_id == \"http\" or package_url_or_id == \"1\":\n", + " if enhanced_accessibility:\n", + " playaudio(\"noid\")\n", + " raise Exception(lan.translate(lang, \"Invalid link or ID!\"))\n", "print(\"Downloading voice package...\")\n", "if enhanced_accessibility:\n", " playaudio(\"downloading\")\n", @@ -191,6 +211,7 @@ " \"\"\"Main entry point\"\"\"\n", " models_path = \"/content/piper/src/python\"\n", " logging.basicConfig(level=logging.DEBUG)\n", + " providers = [\"CPUExecutionProvider\"] if not use_gpu else ['CUDAExecutionProvider', 'CPUExecutionProvider']\n", " sess_options = onnxruntime.SessionOptions()\n", " model = None\n", " onnx_models = detect_onnx_models(models_path)\n", @@ -206,7 +227,7 @@ " raise Exception(lan.translate(lang, \"No downloaded voice packages!\"))\n", " elif isinstance(onnx_models, str):\n", " onnx_model = onnx_models\n", - " model, config = load_onnx(onnx_model, sess_options)\n", + " model, config = load_onnx(onnx_model, sess_options, providers)\n", " if config[\"num_speakers\"] > 1:\n", " speaker_selection.options = config[\"speaker_id_map\"].values()\n", " speaker_selection.layout.visibility = 'visible'\n", @@ -216,7 +237,7 @@ " else:\n", " speaker_selection.layout.visibility = 'hidden'\n", " preview_sid = None\n", - " \n", + "\n", " if enhanced_accessibility:\n", " inferencing(\n", " model,\n", @@ -243,13 +264,15 @@ " )\n", " config = None\n", " def load_model(button):\n", - " nonlocal config \n", + " nonlocal config\n", " global onnx_model\n", " nonlocal model\n", " nonlocal models_path\n", " selected_voice = selection.value\n", " onnx_model = f\"{models_path}/{selected_voice}\"\n", - " model, config = load_onnx(onnx_model, sess_options)\n", + " model, config = load_onnx(onnx_model, sess_options, providers)\n", + " if enhanced_accessibility:\n", + " playaudio(\"loaded\")\n", " if config[\"num_speakers\"] > 1:\n", " speaker_selection.options = config[\"speaker_id_map\"].values()\n", " speaker_selection.layout.visibility = 'visible'\n", @@ -322,12 +345,12 @@ " noise_scale_w = noise_scale_w_slider.value\n", " auto_play = play.value\n", " inferencing(model, config, sid, text, rate, noise_scale, noise_scale_w, auto_play)\n", - " \n", + "\n", " def on_close_button_clicked(b):\n", " clear_output()\n", " if enhanced_accessibility:\n", " playaudio(\"exit\")\n", - " \n", + "\n", " synthesize_button.on_click(on_synthesize_button_clicked)\n", " close_button.on_click(on_close_button_clicked)\n", " display(text_input)\n", @@ -338,10 +361,14 @@ " display(synthesize_button)\n", " display(close_button)\n", "\n", - "def load_onnx(model, sess_options):\n", + "def load_onnx(model, sess_options, providers = [\"CPUExecutionProvider\"]):\n", " _LOGGER.debug(\"Loading model from %s\", model)\n", " config = load_config(model)\n", - " model = onnxruntime.InferenceSession(str(model), sess_options=sess_options)\n", + " model = onnxruntime.InferenceSession(\n", + " str(model),\n", + " sess_options=sess_options,\n", + " providers= providers\n", + " )\n", " _LOGGER.info(\"Loaded model from %s\", model)\n", " return model, config\n", "\n",