diff --git a/README.md b/README.md
index 63e7ef5..ab03d2b 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 ![Piper logo](etc/logo.png)
 
-A fast, local neural text to speech system that is meant to sound good and run reasonably fast on the Raspberry Pi 4.
+A fast, local neural text to speech system that sounds great and is optimized for the Raspberry Pi 4.
 
 ``` sh
 echo 'Welcome to the world of speech synthesis!' | \
diff --git a/etc/logo.png b/etc/logo.png
index fbb8705..a3ea7d2 100644
Binary files a/etc/logo.png and b/etc/logo.png differ
diff --git a/etc/logo.svg b/etc/logo.svg
index 1943a5e..99c73b5 100644
--- a/etc/logo.svg
+++ b/etc/logo.svg
@@ -26,15 +26,15 @@
      borderopacity="1.0"
      inkscape:pageopacity="1"
      inkscape:pageshadow="2"
-     inkscape:zoom="1.8469919"
-     inkscape:cx="164.97755"
-     inkscape:cy="48.418276"
+     inkscape:zoom="1.421213"
+     inkscape:cx="-23.774381"
+     inkscape:cy="33.944028"
      inkscape:document-units="mm"
      inkscape:current-layer="layer1"
      inkscape:document-rotation="0"
      showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1012"
+     inkscape:window-width="1280"
+     inkscape:window-height="653"
      inkscape:window-x="0"
      inkscape:window-y="0"
      inkscape:window-maximized="1"
@@ -50,7 +50,7 @@
         <dc:format>image/svg+xml</dc:format>
         <dc:type
            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
-        <dc:title></dc:title>
+        <dc:title />
       </cc:Work>
     </rdf:RDF>
   </metadata>
@@ -144,8 +144,8 @@
       <path
          id="path2257"
          style="fill:#ffffff;stroke:none;stroke-width:0.0999995;stroke-linecap:round"
-         d="m 19.97109,185.20282 10.735834,-6.19836 c 0.21219,-0.12249 0.502502,-0.0141 0.650911,0.24289 l 0.11208,0.19413 c 0.148409,0.25705 0.107331,0.58244 -0.115118,0.68513 -3.765389,1.73827 -7.326841,3.8345 -10.735835,6.19834 -0.201345,0.13962 -0.502495,0.0141 -0.65091,-0.24287 l -0.112081,-0.19413 c -0.148409,-0.25704 -0.09706,-0.56263 0.115117,-0.68513 z"
-         sodipodi:nodetypes="ssssssssss" />
+         d="m 19.523765,185.51136 11.807216,-7.07896 0.647873,1.12215 c -3.765389,1.73827 -8.398223,4.7151 -11.807217,7.07894 l -0.647874,-1.12213 z"
+         sodipodi:nodetypes="sccccs" />
     </g>
   </g>
 </svg>
diff --git a/src/benchmark/benchmark_generator.py b/src/benchmark/benchmark_generator.py
new file mode 100644
index 0000000..bfb70be
--- /dev/null
+++ b/src/benchmark/benchmark_generator.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import time
+import sys
+
+import torch
+
+_SPEAKER_ID = 0
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
+    parser.add_argument("-c", "--config", help="Path to model config file")
+    args = parser.parse_args()
+
+    if not args.config:
+        args.config = f"{args.model}.json"
+
+    with open(args.config, "r", encoding="utf-8") as config_file:
+        config = json.load(config_file)
+
+    sample_rate = config["audio"]["sample_rate"]
+    utterances = [json.loads(line) for line in sys.stdin]
+
+    start_time = time.monotonic_ns()
+    model = torch.load(args.model)
+    end_time = time.monotonic_ns()
+
+    model.eval()
+
+    load_sec = (end_time - start_time) / 1e9
+    synthesize_rtf = []
+    for utterance in utterances:
+        phoneme_ids = utterance["phoneme_ids"]
+        speaker_id = utterance.get("speaker_id")
+        synthesize_rtf.append(
+            synthesize(
+                model,
+                phoneme_ids,
+                speaker_id,
+                sample_rate,
+            )
+        )
+
+    json.dump(
+        {"load_sec": load_sec, "synthesize_rtf": synthesize_rtf},
+        sys.stdout,
+    )
+
+
+def synthesize(model, phoneme_ids, speaker_id, sample_rate) -> float:
+    text = torch.LongTensor(phoneme_ids).unsqueeze(0)
+    text_lengths = torch.LongTensor([len(phoneme_ids)])
+    sid = torch.LongTensor([speaker_id]) if speaker_id is not None else None
+
+    start_time = time.monotonic_ns()
+    audio = (
+        model(
+            text,
+            text_lengths,
+            sid,
+        )[0]
+        .detach()
+        .numpy()
+        .squeeze()
+    )
+    end_time = time.monotonic_ns()
+
+    audio_sec = (len(audio) / 2) / sample_rate
+    infer_sec = (end_time - start_time) / 1e9
+
+    return infer_sec / audio_sec
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/benchmark/benchmark_onnx.py b/src/benchmark/benchmark_onnx.py
new file mode 100644
index 0000000..22426cd
--- /dev/null
+++ b/src/benchmark/benchmark_onnx.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import time
+import sys
+
+import onnxruntime
+import numpy as np
+
+_NOISE_SCALE = 0.667
+_LENGTH_SCALE = 1.0
+_NOISE_W = 0.8
+_SPEAKER_ID = 0
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file")
+    parser.add_argument("-c", "--config", help="Path to model config file")
+    args = parser.parse_args()
+
+    if not args.config:
+        args.config = f"{args.model}.json"
+
+    with open(args.config, "r", encoding="utf-8") as config_file:
+        config = json.load(config_file)
+
+    sample_rate = config["audio"]["sample_rate"]
+    utterances = [json.loads(line) for line in sys.stdin]
+
+    start_time = time.monotonic_ns()
+    session = onnxruntime.InferenceSession(args.model)
+    end_time = time.monotonic_ns()
+
+    load_sec = (end_time - start_time) / 1e9
+    synthesize_rtf = []
+    for utterance in utterances:
+        phoneme_ids = utterance["phoneme_ids"]
+        speaker_id = utterance.get("speaker_id")
+        synthesize_rtf.append(
+            synthesize(
+                session,
+                phoneme_ids,
+                speaker_id,
+                sample_rate,
+            )
+        )
+
+    json.dump(
+        {"load_sec": load_sec, "synthesize_rtf": synthesize_rtf},
+        sys.stdout,
+    )
+
+
+def synthesize(session, phoneme_ids, speaker_id, sample_rate) -> float:
+    phoneme_ids_array = np.expand_dims(np.array(phoneme_ids, dtype=np.int64), 0)
+    phoneme_ids_lengths = np.array([phoneme_ids_array.shape[1]], dtype=np.int64)
+    scales = np.array(
+        [_NOISE_SCALE, _LENGTH_SCALE, _NOISE_W],
+        dtype=np.float32,
+    )
+
+    sid = None
+
+    if speaker_id is not None:
+        sid = np.array([speaker_id], dtype=np.int64)
+
+    # Synthesize through Onnx
+    start_time = time.monotonic_ns()
+    audio = session.run(
+        None,
+        {
+            "input": phoneme_ids_array,
+            "input_lengths": phoneme_ids_lengths,
+            "scales": scales,
+            "sid": sid,
+        },
+    )[0].squeeze()
+    end_time = time.monotonic_ns()
+
+    audio_sec = (len(audio) / 2) / sample_rate
+    infer_sec = (end_time - start_time) / 1e9
+
+    return infer_sec / audio_sec
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/benchmark/requirements.txt b/src/benchmark/requirements.txt
new file mode 100644
index 0000000..26f8d83
--- /dev/null
+++ b/src/benchmark/requirements.txt
@@ -0,0 +1,2 @@
+onnxruntime~=1.11.0
+torch~=1.11.0