diff --git a/src/python/scripts/setup.sh b/src/python/scripts/setup.sh new file mode 100755 index 0000000..32a8c89 --- /dev/null +++ b/src/python/scripts/setup.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -eo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" + +# Base directory of repo +base_dir="$(realpath "${this_dir}/..")" + +# Path to virtual environment +: "${venv:=${base_dir}/.venv}" + +# Python binary to use +: "${PYTHON=python3}" + +python_version="$(${PYTHON} --version)" + +# Create virtual environment +echo "Creating virtual environment at ${venv} (${python_version})" +rm -rf "${venv}" +"${PYTHON}" -m venv "${venv}" +source "${venv}/bin/activate" + +# Install Python dependencies +echo 'Installing Python dependencies' +pip3 install --upgrade pip +pip3 install --upgrade wheel setuptools + +pip3 install -r "${base_dir}/requirements.txt" + +# ----------------------------------------------------------------------------- + +echo "OK" diff --git a/src/python_run/.isort.cfg b/src/python_run/.isort.cfg new file mode 100644 index 0000000..ba2778d --- /dev/null +++ b/src/python_run/.isort.cfg @@ -0,0 +1,6 @@ +[settings] +multi_line_output=3 +include_trailing_comma=True +force_grid_wrap=0 +use_parentheses=True +line_length=88 diff --git a/src/python_run/larynx/__init__.py b/src/python_run/larynx/__init__.py new file mode 100644 index 0000000..b7110cb --- /dev/null +++ b/src/python_run/larynx/__init__.py @@ -0,0 +1,134 @@ +import io +import json +import wave +from dataclasses import dataclass +from pathlib import Path +from typing import List, Mapping, Optional, Sequence, Union + +import numpy as np +import onnxruntime +from espeak_phonemizer import Phonemizer + +_BOS = "^" +_EOS = "$" +_PAD = "_" + + +@dataclass +class LarynxConfig: + num_symbols: int + num_speakers: int + sample_rate: int + espeak_voice: str + length_scale: float + noise_scale: float + noise_w: float + phoneme_id_map: Mapping[str, Sequence[int]] + + +class Larynx: + def __init__( + self, + model_path: Union[str, Path], + config_path: Optional[Union[str, Path]] = None, + use_cuda: bool = False, + ): + if config_path is None: + config_path = f"{model_path}.json" + + self.config = load_config(config_path) + self.phonemizer = Phonemizer(self.config.espeak_voice) + self.model = onnxruntime.InferenceSession( + str(model_path), + sess_options=onnxruntime.SessionOptions(), + providers=None if not use_cuda else ["CUDAExecutionProvider"], + ) + + def synthesize( + self, + text: str, + speaker_id: Optional[int] = None, + length_scale: Optional[float] = None, + noise_scale: Optional[float] = None, + noise_w: Optional[float] = None, + ) -> bytes: + """Synthesize WAV audio from text.""" + if length_scale is None: + length_scale = self.config.length_scale + + if noise_scale is None: + noise_scale = self.config.noise_scale + + if noise_w is None: + noise_w = self.config.noise_w + + phonemes_str = self.phonemizer.phonemize(text) + phonemes = [_BOS] + list(phonemes_str) + phoneme_ids: List[int] = [] + + for phoneme in phonemes: + phoneme_ids.extend(self.config.phoneme_id_map[phoneme]) + phoneme_ids.extend(self.config.phoneme_id_map[_PAD]) + + phoneme_ids.extend(self.config.phoneme_id_map[_EOS]) + + phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0) + phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64) + scales = np.array( + [noise_scale, length_scale, noise_w], + dtype=np.float32, + ) + sid = None + + if speaker_id is not None: + sid = np.array([speaker_id], dtype=np.int64) + + # Synthesize through Onnx + audio = self.model.run( + None, + { + "input": phoneme_ids_array, + "input_lengths": phoneme_ids_lengths, + "scales": scales, + "sid": sid, + }, + )[0].squeeze((0, 1)) + audio = audio_float_to_int16(audio.squeeze()) + + # Convert to WAV + with io.BytesIO() as wav_io: + wav_file: wave.Wave_write = wave.open(wav_io, "wb") + with wav_file: + wav_file.setframerate(self.config.sample_rate) + wav_file.setsampwidth(2) + wav_file.setnchannels(1) + wav_file.writeframes(audio.tobytes()) + + return wav_io.getvalue() + + +def load_config(config_path: Union[str, Path]) -> LarynxConfig: + with open(config_path, "r", encoding="utf-8") as config_file: + config_dict = json.load(config_file) + inference = config_dict.get("inference", {}) + + return LarynxConfig( + num_symbols=config_dict["num_symbols"], + num_speakers=config_dict["num_speakers"], + sample_rate=config_dict["audio"]["sample_rate"], + espeak_voice=config_dict["espeak"]["voice"], + noise_scale=inference.get("noise_scale", 0.667), + length_scale=inference.get("length_scale", 1.0), + noise_w=inference.get("noise_w", 0.8), + phoneme_id_map=config_dict["phoneme_id_map"], + ) + + +def audio_float_to_int16( + audio: np.ndarray, max_wav_value: float = 32767.0 +) -> np.ndarray: + """Normalize audio and convert to int16 range""" + audio_norm = audio * (max_wav_value / max(0.01, np.max(np.abs(audio)))) + audio_norm = np.clip(audio_norm, -max_wav_value, max_wav_value) + audio_norm = audio_norm.astype("int16") + return audio_norm diff --git a/src/python_run/larynx/__main__.py b/src/python_run/larynx/__main__.py new file mode 100644 index 0000000..250ab60 --- /dev/null +++ b/src/python_run/larynx/__main__.py @@ -0,0 +1,19 @@ +import argparse +import sys + +from . import Larynx + + +def main() -> None: + parser = argparse.ArgumentParser() + parser.add_argument("-m", "--model", help="Path to Onnx model file") + parser.add_argument("--cuda", action="store_true", help="Use GPU") + args = parser.parse_args() + + voice = Larynx(args.model, use_cuda=args.cuda) + wav_bytes = voice.synthesize(sys.stdin.read()) + sys.stdout.buffer.write(wav_bytes) + + +if __name__ == "__main__": + main() diff --git a/src/python_run/mypy.ini b/src/python_run/mypy.ini new file mode 100644 index 0000000..0fae4b9 --- /dev/null +++ b/src/python_run/mypy.ini @@ -0,0 +1,4 @@ +[mypy] + +[mypy-onnxruntime.*] +ignore_missing_imports = True diff --git a/src/python_run/py.typed b/src/python_run/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/src/python_run/pylintrc b/src/python_run/pylintrc new file mode 100644 index 0000000..ea7eeab --- /dev/null +++ b/src/python_run/pylintrc @@ -0,0 +1,40 @@ +[MESSAGES CONTROL] +disable= + format, + abstract-class-little-used, + abstract-method, + cyclic-import, + duplicate-code, + global-statement, + import-outside-toplevel, + inconsistent-return-statements, + locally-disabled, + not-context-manager, + redefined-variable-type, + too-few-public-methods, + too-many-arguments, + too-many-branches, + too-many-instance-attributes, + too-many-lines, + too-many-locals, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + too-many-boolean-expressions, + unnecessary-pass, + unused-argument, + broad-except, + too-many-nested-blocks, + invalid-name, + unused-import, + no-self-use, + fixme, + useless-super-delegation, + missing-module-docstring, + missing-class-docstring, + missing-function-docstring, + import-error, + relative-beyond-top-level + +[FORMAT] +expected-line-ending-format=LF diff --git a/src/python_run/requirements.txt b/src/python_run/requirements.txt new file mode 100644 index 0000000..6d73988 --- /dev/null +++ b/src/python_run/requirements.txt @@ -0,0 +1,2 @@ +espeak-phonemizer>=1.1.0,<2 +onnxruntime~=1.11.0 diff --git a/src/python_run/requirements_dev.txt b/src/python_run/requirements_dev.txt new file mode 100644 index 0000000..3401b9b --- /dev/null +++ b/src/python_run/requirements_dev.txt @@ -0,0 +1,7 @@ +black==22.3.0 +coverage==5.0.4 +flake8==3.7.9 +mypy==0.910 +pylint==2.10.2 +pytest==5.4.1 +pytest-cov==2.8.1 diff --git a/src/python_run/scripts/check.sh b/src/python_run/scripts/check.sh new file mode 100755 index 0000000..60b3c0d --- /dev/null +++ b/src/python_run/scripts/check.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +# Runs formatters, linters, and type checkers on Python code. + +set -eo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" + +base_dir="$(realpath "${this_dir}/..")" + +# Path to virtual environment +: "${venv:=${base_dir}/.venv}" + +if [ -d "${venv}" ]; then + # Activate virtual environment if available + source "${venv}/bin/activate" +fi + +python_files=("${base_dir}/larynx") + +# Format code +black "${python_files[@]}" +isort "${python_files[@]}" + +# Check +flake8 "${python_files[@]}" +pylint "${python_files[@]}" +mypy "${python_files[@]}" diff --git a/src/python_run/scripts/larynx b/src/python_run/scripts/larynx new file mode 100755 index 0000000..21e3714 --- /dev/null +++ b/src/python_run/scripts/larynx @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +set -eo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" + +base_dir="$(realpath "${this_dir}/..")" + +# Path to virtual environment +: "${venv:=${base_dir}/.venv}" + +if [ -d "${venv}" ]; then + # Activate virtual environment if available + source "${venv}/bin/activate" +fi + +python3 -m larynx "$@" diff --git a/src/python_run/scripts/setup.sh b/src/python_run/scripts/setup.sh new file mode 100755 index 0000000..32a8c89 --- /dev/null +++ b/src/python_run/scripts/setup.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -eo pipefail + +# Directory of *this* script +this_dir="$( cd "$( dirname "$0" )" && pwd )" + +# Base directory of repo +base_dir="$(realpath "${this_dir}/..")" + +# Path to virtual environment +: "${venv:=${base_dir}/.venv}" + +# Python binary to use +: "${PYTHON=python3}" + +python_version="$(${PYTHON} --version)" + +# Create virtual environment +echo "Creating virtual environment at ${venv} (${python_version})" +rm -rf "${venv}" +"${PYTHON}" -m venv "${venv}" +source "${venv}/bin/activate" + +# Install Python dependencies +echo 'Installing Python dependencies' +pip3 install --upgrade pip +pip3 install --upgrade wheel setuptools + +pip3 install -r "${base_dir}/requirements.txt" + +# ----------------------------------------------------------------------------- + +echo "OK" diff --git a/src/python_run/setup.cfg b/src/python_run/setup.cfg new file mode 100644 index 0000000..0076bbf --- /dev/null +++ b/src/python_run/setup.cfg @@ -0,0 +1,22 @@ +[flake8] +# To work with Black +max-line-length = 88 +# E501: line too long +# W503: Line break occurred before a binary operator +# E203: Whitespace before ':' +# D202 No blank lines allowed after function docstring +# W504 line break after binary operator +ignore = + E501, + W503, + E203, + D202, + W504 + +[isort] +multi_line_output = 3 +include_trailing_comma=True +force_grid_wrap=0 +use_parentheses=True +line_length=88 +indent = " "