diff --git a/include/lc3_private.h b/include/lc3_private.h
index 8740d84..970ef8e 100644
--- a/include/lc3_private.h
+++ b/include/lc3_private.h
@@ -26,17 +26,21 @@
 /**
  * Return number of samples, delayed samples and
  * encoded spectrum coefficients within a frame
- * For decoding, keep 18 ms of history, aligned on frames, and a frame
+ * - For encoding, keep 1.25 ms of temporal winodw
+ * - For decoding, keep 18 ms of history, aligned on frames, and a frame
  */
 
 #define __LC3_NS(dt_us, sr_hz) \
-    ((dt_us * sr_hz) / 1000 / 1000)
+    ( (dt_us * sr_hz) / 1000 / 1000 )
 
 #define __LC3_ND(dt_us, sr_hz) \
     ( (dt_us) == 7500 ? 23 * __LC3_NS(dt_us, sr_hz) / 30 \
                       :  5 * __LC3_NS(dt_us, sr_hz) /  8 )
 
-#define __LC3_NR(dt_us, sr_hz) \
+#define __LC3_NT(sr_hz) \
+    ( (5 * sr_hz) / 4000 )
+
+#define __LC3_NH(dt_us, sr_hz) \
     ( ((3 - ((dt_us) >= 10000)) + 1) * __LC3_NS(dt_us, sr_hz) )
 
 
@@ -76,7 +80,7 @@ typedef struct lc3_attdet_analysis {
 } lc3_attdet_analysis_t;
 
 struct lc3_ltpf_hp50_state {
-    float s1, s2;
+    int64_t s1, s2;
 };
 
 typedef struct lc3_ltpf_analysis {
@@ -85,8 +89,8 @@ typedef struct lc3_ltpf_analysis {
     float nc[2];
 
     struct lc3_ltpf_hp50_state hp50;
-    float x_12k8[384];
-    float x_6k4[178];
+    int16_t x_12k8[384];
+    int16_t x_6k4[178];
     int tc;
 } lc3_ltpf_analysis_t;
 
@@ -103,11 +107,13 @@ struct lc3_encoder {
     lc3_ltpf_analysis_t ltpf;
     lc3_spec_analysis_t spec;
 
+    int16_t *xt;
     float *xs, *xf, s[0];
 };
 
 #define LC3_ENCODER_BUFFER_COUNT(dt_us, sr_hz) \
-    ( 2*__LC3_NS(dt_us, sr_hz) + __LC3_ND(dt_us, sr_hz) )
+    ( ( __LC3_NS(dt_us, sr_hz) + __LC3_NT(sr_hz) ) / 2 + \
+      2*__LC3_NS(dt_us, sr_hz) + __LC3_ND(dt_us, sr_hz) )
 
 #define LC3_ENCODER_MEM_T(dt_us, sr_hz) \
     struct { \
@@ -139,11 +145,11 @@ struct lc3_decoder {
     lc3_ltpf_synthesis_t ltpf;
     lc3_plc_state_t plc;
 
-    float *xr, *xs, *xd, *xg, s[0];
+    float *xh, *xs, *xd, *xg, s[0];
 };
 
 #define LC3_DECODER_BUFFER_COUNT(dt_us, sr_hz) \
-    ( __LC3_NR(dt_us, sr_hz) + __LC3_ND(dt_us, sr_hz) + \
+    ( __LC3_NH(dt_us, sr_hz) + __LC3_ND(dt_us, sr_hz) + \
       __LC3_NS(dt_us, sr_hz) )
 
 #define LC3_DECODER_MEM_T(dt_us, sr_hz) \
diff --git a/src/common.h b/src/common.h
index 4275c4e..c9160ca 100644
--- a/src/common.h
+++ b/src/common.h
@@ -26,6 +26,7 @@
 #include <lc3.h>
 #include "fastmath.h"
 
+#include <stdalign.h>
 #include <limits.h>
 #include <string.h>
 
@@ -72,7 +73,8 @@
 /**
  * Return number of samples, delayed samples and
  * encoded spectrum coefficients within a frame
- * For decoding, keep 18 ms of history, aligned on frames, and a frame
+ * - For encoding, keep 1.25 ms for temporal window
+ * - For decoding, keep 18 ms of history, aligned on frames, and a frame
  */
 
 #define LC3_NS(dt, sr) \
@@ -87,7 +89,10 @@
 #define LC3_MAX_NE \
     LC3_NE(LC3_DT_10M, LC3_SRATE_48K)
 
-#define LC3_NR(dt, sr) \
+#define LC3_NT(sr_hz) \
+    ( (5 * LC3_SRATE_KHZ(sr)) / 4 )
+
+#define LC3_NH(dt, sr) \
     ( ((3 - dt) + 1) * LC3_NS(dt, sr) )
 
 
diff --git a/src/lc3.c b/src/lc3.c
index 394a78f..6560b6e 100644
--- a/src/lc3.c
+++ b/src/lc3.c
@@ -156,11 +156,15 @@ static void load_s16(
 
     enum lc3_dt dt = encoder->dt;
     enum lc3_srate sr = encoder->sr_pcm;
+
+    int16_t *xt = encoder->xt;
     float *xs = encoder->xs;
     int ns = LC3_NS(dt, sr);
 
-    for (int i = 0; i < ns; i++)
-        xs[i] = pcm[i*stride];
+    for (int i = 0; i < ns; i++) {
+        int16_t in = pcm[i*stride];
+        xt[i] = in, xs[i] = in;
+    }
 }
 
 /**
@@ -175,11 +179,17 @@ static void load_s24(
 
     enum lc3_dt dt = encoder->dt;
     enum lc3_srate sr = encoder->sr_pcm;
+
+    int16_t *xt = encoder->xt;
     float *xs = encoder->xs;
     int ns = LC3_NS(dt, sr);
 
-    for (int i = 0; i < ns; i++)
-        xs[i] = ldexpf(pcm[i*stride], -8);
+    for (int i = 0; i < ns; i++) {
+        int32_t in = pcm[i*stride];
+
+        xt[i] = in >> 8;
+        xs[i] = ldexpf(in, -8);
+    }
 }
 
 /**
@@ -196,7 +206,9 @@ static void analyze(struct lc3_encoder *encoder,
     enum lc3_srate sr_pcm = encoder->sr_pcm;
     int ns = LC3_NS(dt, sr_pcm);
     int nd = LC3_ND(dt, sr_pcm);
+    int nt = LC3_NT(sr_pcm);
 
+    int16_t *xt = encoder->xt;
     float *xs = encoder->xs;
     float *xf = encoder->xf;
 
@@ -205,14 +217,16 @@ static void analyze(struct lc3_encoder *encoder,
     bool att = lc3_attdet_run(dt, sr_pcm, nbytes, &encoder->attdet, xs);
 
     side->pitch_present =
-        lc3_ltpf_analyse(dt, sr_pcm, &encoder->ltpf, xs, &side->ltpf);
+        lc3_ltpf_analyse(dt, sr_pcm, &encoder->ltpf, xt, &side->ltpf);
+
+    memmove(xt - nt, xt + (ns-nt), nt * sizeof(*xt));
 
     /* --- Spectral --- */
 
     float e[LC3_NUM_BANDS];
 
     lc3_mdct_forward(dt, sr_pcm, sr, xs, xf);
-    memmove(xs - nd, xs + ns-nd, nd * sizeof(float));
+    memmove(xs - nd, xs + (ns-nd), nd * sizeof(*xs));
 
     bool nn_flag = lc3_energy_compute(dt, sr, xf, e);
     if (nn_flag)
@@ -299,12 +313,15 @@ struct lc3_encoder *lc3_setup_encoder(
     struct lc3_encoder *encoder = mem;
     int ns = LC3_NS(dt, sr_pcm);
     int nd = LC3_ND(dt, sr_pcm);
+    int nt = LC3_NT(sr_pcm);
 
     *encoder = (struct lc3_encoder){
         .dt = dt, .sr = sr,
         .sr_pcm = sr_pcm,
-        .xs = encoder->s + nd,
-        .xf = encoder->s + nd+ns,
+
+        .xt = (int16_t *)encoder->s + nt,
+        .xs = encoder->s + (nt+ns)/2 + nd,
+        .xf = encoder->s + (nt+ns)/2 + nd+ns,
     };
 
     memset(encoder->s, 0,
@@ -482,7 +499,7 @@ static void synthesize(struct lc3_decoder *decoder,
     }
 
     lc3_ltpf_synthesize(dt, sr_pcm, nbytes, &decoder->ltpf,
-        side && side->pitch_present ? &side->ltpf : NULL, decoder->xr, xs);
+        side && side->pitch_present ? &side->ltpf : NULL, decoder->xh, xs);
 }
 
 /**
@@ -493,11 +510,11 @@ static void complete(struct lc3_decoder *decoder)
 {
     enum lc3_dt dt = decoder->dt;
     enum lc3_srate sr_pcm = decoder->sr_pcm;
-    int nr = LC3_NR(dt, sr_pcm);
+    int nh = LC3_NH(dt, sr_pcm);
     int ns = LC3_NS(dt, sr_pcm);
 
-    decoder->xs = decoder->xs - decoder->xr < nr - ns ?
-        decoder->xs + ns : decoder->xr;
+    decoder->xs = decoder->xs - decoder->xh < nh - ns ?
+        decoder->xs + ns : decoder->xh;
 }
 
 /**
@@ -530,7 +547,7 @@ struct lc3_decoder *lc3_setup_decoder(
         return NULL;
 
     struct lc3_decoder *decoder = mem;
-    int nr = LC3_NR(dt, sr_pcm);
+    int nh = LC3_NH(dt, sr_pcm);
     int ns = LC3_NS(dt, sr_pcm);
     int nd = LC3_ND(dt, sr_pcm);
 
@@ -538,11 +555,10 @@ struct lc3_decoder *lc3_setup_decoder(
         .dt = dt, .sr = sr,
         .sr_pcm = sr_pcm,
 
-        .xr = decoder->s,
-        .xs = decoder->s + nr-ns,
-        .xd = decoder->s + nr,
-        .xg = decoder->s + nr+nd,
-
+        .xh = decoder->s,
+        .xs = decoder->s + nh-ns,
+        .xd = decoder->s + nh,
+        .xg = decoder->s + nh+nd,
     };
 
     lc3_plc_reset(&decoder->plc);
diff --git a/src/ltpf.c b/src/ltpf.c
index b7aeb96..7c35890 100644
--- a/src/ltpf.c
+++ b/src/ltpf.c
@@ -19,158 +19,347 @@
 #include "ltpf.h"
 #include "tables.h"
 
+#include "ltpf_arm.h"
+#include "ltpf_neon.h"
+
 
 /* ----------------------------------------------------------------------------
  *  Resampling
  * -------------------------------------------------------------------------- */
 
+/**
+ * Resampling coefficients
+ * The coefficients, in fixed Q15, are reordered by phase for each source
+ * samplerate (coefficient matrix transposed)
+ */
+
+#ifndef resample_8k_12k8
+static const int16_t h_8k_12k8_q15[8*10] = {
+      214,   417, -1052, -4529, 26233, -4529, -1052,   417,   214,     0,
+      180,     0, -1522, -2427, 24506, -5289,     0,   763,   156,   -28,
+       92,  -323, -1361,     0, 19741, -3885,  1317,   861,     0,   -61,
+        0,  -457,  -752,  1873, 13068,     0,  2389,   598,  -213,   -79,
+      -61,  -398,     0,  2686,  5997,  5997,  2686,     0,  -398,   -61,
+      -79,  -213,   598,  2389,     0, 13068,  1873,  -752,  -457,     0,
+      -61,     0,   861,  1317, -3885, 19741,     0, -1361,  -323,    92,
+      -28,   156,   763,     0, -5289, 24506, -2427, -1522,     0,   180,
+};
+#endif /* resample_8k_12k8 */
+
+#ifndef resample_16k_12k8
+static const int16_t h_16k_12k8_q15[4*20] = {
+      -61,   214,  -398,   417,     0, -1052,  2686, -4529,  5997, 26233,
+     5997, -4529,  2686, -1052,     0,   417,  -398,   214,   -61,     0,
+
+      -79,   180,  -213,     0,   598, -1522,  2389, -2427,     0, 24506,
+    13068, -5289,  1873,     0,  -752,   763,  -457,   156,     0,   -28,
+
+      -61,    92,     0,  -323,   861, -1361,  1317,     0, -3885, 19741,
+    19741, -3885,     0,  1317, -1361,   861,  -323,     0,    92,   -61,
+
+      -28,     0,   156,  -457,   763,  -752,     0,  1873, -5289, 13068,
+    24506,     0, -2427,  2389, -1522,   598,     0,  -213,   180,   -79,
+};
+#endif /* resample_16k_12k8 */
+
+#ifndef resample_32k_12k8
+static const int16_t h_32k_12k8_q15[2*40] = {
+      -30,   -31,    46,   107,     0,  -199,  -162,   209,   430,     0,
+     -681,  -526,   658,  1343,     0, -2264, -1943,  2999,  9871, 13116,
+     9871,  2999, -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
+      430,   209,  -162,  -199,     0,   107,    46,   -31,   -30,     0,
+
+      -14,   -39,     0,    90,    78,  -106,  -229,     0,   382,   299,
+     -376,  -761,     0,  1194,   937, -1214, -2644,     0,  6534, 12253,
+    12253,  6534,     0, -2644, -1214,   937,  1194,     0,  -761,  -376,
+      299,   382,     0,  -229,  -106,    78,    90,     0,   -39,   -14,
+};
+#endif /* resample_32k_12k8 */
+
+#ifndef resample_24k_12k8
+static const int16_t h_24k_12k8_q15[8*30] = {
+      -50,    19,   143,   -93,  -290,   278,   485,  -658,  -701,  1396,
+      901, -3019, -1042, 10276, 17488, 10276, -1042, -3019,   901,  1396,
+     -701,  -658,   485,   278,  -290,   -93,   143,    19,   -50,     0,
+
+      -46,     0,   141,   -45,  -305,   185,   543,  -501,  -854,  1153,
+     1249, -2619, -1908,  8712, 17358, 11772,     0, -3319,   480,  1593,
+     -504,  -796,   399,   367,  -261,  -142,   138,    40,   -52,    -5,
+
+      -41,   -17,   133,     0,  -304,    91,   574,  -334,  -959,   878,
+     1516, -2143, -2590,  7118, 16971, 13161,  1202, -3495,     0,  1731,
+     -267,  -908,   287,   445,  -215,  -188,   125,    62,   -52,   -12,
+
+      -34,   -30,   120,    41,  -291,     0,   577,  -164, -1015,   585,
+     1697, -1618, -3084,  5534, 16337, 14406,  2544, -3526,  -523,  1800,
+        0,  -985,   152,   509,  -156,  -230,   104,    83,   -48,   -19,
+
+      -26,   -41,   103,    76,  -265,   -83,   554,     0, -1023,   288,
+     1791, -1070, -3393,  3998, 15474, 15474,  3998, -3393, -1070,  1791,
+      288, -1023,     0,   554,   -83,  -265,    76,   103,   -41,   -26,
+
+      -19,   -48,    83,   104,  -230,  -156,   509,   152,  -985,     0,
+     1800,  -523, -3526,  2544, 14406, 16337,  5534, -3084, -1618,  1697,
+      585, -1015,  -164,   577,     0,  -291,    41,   120,   -30,   -34,
+
+      -12,   -52,    62,   125,  -188,  -215,   445,   287,  -908,  -267,
+     1731,     0, -3495,  1202, 13161, 16971,  7118, -2590, -2143,  1516,
+      878,  -959,  -334,   574,    91,  -304,     0,   133,   -17,   -41,
+
+       -5,   -52,    40,   138,  -142,  -261,   367,   399,  -796,  -504,
+     1593,   480, -3319,     0, 11772, 17358,  8712, -1908, -2619,  1249,
+     1153,  -854,  -501,   543,   185,  -305,   -45,   141,     0,   -46,
+};
+#endif /* resample_24k_12k8 */
+
+#ifndef resample_48k_12k8
+static const int16_t h_48k_12k8_q15[4*60] = {
+      -13,   -25,   -20,    10,    51,    71,    38,   -47,  -133,  -145,
+      -42,   139,   277,   242,     0,  -329,  -511,  -351,   144,   698,
+      895,   450,  -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
+     7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,   895,   698,
+      144,  -351,  -511,  -329,     0,   242,   277,   139,   -42,  -145,
+     -133,   -47,    38,    71,    51,    10,   -20,   -25,   -13,     0,
+
+       -9,   -23,   -24,     0,    41,    71,    52,   -23,  -115,  -152,
+      -78,    92,   254,   272,    76,  -251,  -493,  -427,     0,   576,
+      900,   624,  -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
+     8169,  5886,  2767,     0, -1542, -1660,  -809,   240,   848,   796,
+      292,  -252,  -507,  -398,   -82,   199,   288,   183,     0,  -130,
+     -145,   -71,    20,    69,    60,    20,   -15,   -26,   -17,    -3,
+
+       -6,   -20,   -26,    -8,    31,    67,    62,     0,   -94,  -152,
+     -108,    45,   223,   287,   143,  -167,  -454,  -480,  -134,   439,
+      866,   758,     0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
+     8485,  6580,  3559,   601, -1295, -1748, -1071,     0,   758,   866,
+      439,  -134,  -480,  -454,  -167,   143,   287,   223,    45,  -108,
+     -152,   -94,     0,    62,    67,    31,    -8,   -26,   -20,    -6,
+
+       -3,   -17,   -26,   -15,    20,    60,    69,    20,   -71,  -145,
+     -130,     0,   183,   288,   199,   -82,  -398,  -507,  -252,   292,
+      796,   848,   240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
+     8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,   900,
+      576,     0,  -427,  -493,  -251,    76,   272,   254,    92,   -78,
+     -152,  -115,   -23,    52,    71,    41,     0,   -24,   -23,    -9,
+};
+#endif /* resample_48k_12k8 */
+
+
+/**
+ * High-pass 50Hz filtering, at 12.8 KHz samplerate
+ * hp50            Biquad filter state
+ * xn              Input sample, in fixed Q30
+ * return          Filtered sample, in fixed Q30
+ */
+static inline int32_t filter_hp50(
+    struct lc3_ltpf_hp50_state *hp50, int32_t xn)
+{
+    int32_t yn;
+
+    const int32_t a1 = -2110217691, a2 = 1037111617;
+    const int32_t b1 = -2110535566, b2 = 1055267782;
+
+    yn       = (hp50->s1 + (int64_t)xn * b2) >> 30;
+    hp50->s1 = (hp50->s2 + (int64_t)xn * b1 - (int64_t)yn * a1);
+    hp50->s2 = (           (int64_t)xn * b2 - (int64_t)yn * a2);
+
+    return yn;
+}
+
 /**
  * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
- * p               Resampling factor with 64 KHz (8, 4 or 2)
- * x               [-d..-1] Previous, [0..ns-1] Current samples
- * y, n            [0..n-1] Output `n` processed samples
+ * p               Resampling factor with compared to 192 KHz (8, 4 or 2)
+ * h               Arrange by phase coefficients table
+ * hp50            High-Pass biquad filter state
+ * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
+ * y, n            [0..n-1] Output `n` processed samples, Q14
  *
+ * The `x` vector is aligned on 32 bits
  * The number of previous samples `d` accessed on `x` is :
  *   d: { 10, 20, 40 } - 1 for resampling factors 8, 4 and 2.
  */
-static inline void resample_base_64k_12k8(const int p,
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
+static inline void resample_x64k_12k8(const int p, const int16_t *h,
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
 {
-    /* --- Parameters  ---
-     * bn, an: High-Pass Biquad coefficients,
-     * with `bn` support of rescaling resampling factor.
-     * Note that it's an High-Pass filter, so we have `b0 = b2`,
-     * in the following steps we use `b0` as `b2`. */
+    const int w = 2*(40 / p);
 
-    const int w = 40 / p;
+    x -= w - 1;
 
-    const float *h = lc3_ltpf_h12k8 + 119;
-    const float a1 = -1.965293373f, b1 = -1.965589417f * 3*LC3_MIN(p, 4);
-    const float a2 =  0.965885461f, b2 =  0.982794708f * 3*LC3_MIN(p, 4);
+    for (int i = 0; i < 5*n; i += 5) {
+        const int16_t *hn = h + (i % p) * w;
+        const int16_t *xn = x + (i / p);
+        int32_t un = 0;
 
-    /* --- Resampling & filtering --- */
-
-    for (int i = 0; i < n; i += 8, x += w)
-        for (int j = 0; j < 40; j += 5) {
-
-            const float *hn = h - 3*(p*w + (j % p));
-            const float *xn = x -   (2*w - (j / p));
-            float yn, un = 0;
-
-            for (int k = 0; k < 2*w; k += 10) {
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-                un += *(++xn) * *(hn += (3*p));
-            }
-
-            yn = b2 * un + hp50->s1;
-            hp50->s1 = b1 * un - a1 * yn + hp50->s2;
-            hp50->s2 = b2 * un - a2 * yn;
-            *(y++) = yn;
+        for (int k = 0; k < w; k += 10) {
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
         }
+
+        int32_t yn = filter_hp50(hp50, un);
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
 }
 
 /**
  * Resample from 24 / 48 KHz to 12.8 KHz Template
- * p               Resampling factor with 192 KHz (8 or 4)
- * x               [-d..-1] Previous, [0..ns-1] Current samples
- * y, n            [0..n-1] Output `n` processed samples
+ * p               Resampling factor with compared to 192 KHz (8 or 4)
+ * h               Arrange by phase coefficients table
+ * hp50            High-Pass biquad filter state
+ * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
+ * y, n            [0..n-1] Output `n` processed samples, Q14
  *
+ * The `x` vector is aligned on 32 bits
  * The number of previous samples `d` accessed on `x` is :
  *   d: { 30, 60 } - 1 for resampling factors 8 and 4.
  */
-static inline void resample_base_192k_12k8(const int p,
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
+static inline void resample_x192k_12k8(const int p, const int16_t *h,
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
 {
-    /* --- Parameters  ---
-     * bn, an: High-Pass Biquad coefficients,
-     * with `bn` support of rescaling resampling factor.
-     * Note that it's an High-Pass filter, so we have `b0 = b2`,
-     * in the following steps we use `b0` as `b2`. */
+    const int w = 2*(120 / p);
 
-    const int w = 120 / p;
+    x -= w - 1;
 
-    const float *h = lc3_ltpf_h12k8 + 119;
-    const float a1 = -1.965293373f, b1 = -1.965589417f * p;
-    const float a2 =  0.965885461f, b2 =  0.982794708f * p;
+    for (int i = 0; i < 15*n; i += 15) {
+        const int16_t *hn = h + (i % p) * w;
+        const int16_t *xn = x + (i / p);
+        int32_t un = 0;
 
-    /* --- Resampling & filtering --- */
-
-    for (int i = 0; i < n; i += 8, x += w)
-        for (int j = 0; j < 120; j += 15) {
-
-            const float *hn = h - (p*w + (j % p));
-            const float *xn = x - (2*w - (j / p));
-            float yn, un = 0;
-
-            for (int k = 0; k < 2*w; k += 15) {
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-                un += *(++xn) * *(hn += p);
-            }
-
-            yn = b2 * un + hp50->s1;
-            hp50->s1 = b1 * un - a1 * yn + hp50->s2;
-            hp50->s2 = b2 * un - a2 * yn;
-            *(y++) = yn;
+        for (int k = 0; k < w; k += 15) {
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
+            un += *(xn++) * *(hn++);
         }
+
+        int32_t yn = filter_hp50(hp50, un);
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
 }
 
+/**
+ * Resample from 8 Khz to 12.8 KHz
+ * hp50            High-Pass biquad filter state
+ * x               [-10..-1] Previous, [0..ns-1] Current samples, Q15
+ * y, n            [0..n-1] Output `n` processed samples, Q14
+ *
+ * The `x` vector is aligned on 32 bits
+ */
+#ifndef resample_8k_12k8
+static void resample_8k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    resample_x64k_12k8(8, h_8k_12k8_q15, hp50, x, y, n);
+}
+#endif /* resample_8k_12k8 */
+
+/**
+ * Resample from 16 Khz to 12.8 KHz
+ * hp50            High-Pass biquad filter state
+ * x               [-20..-1] Previous, [0..ns-1] Current samples, in fixed Q15
+ * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
+ *
+ * The `x` vector is aligned on 32 bits
+ */
+#ifndef resample_16k_12k8
+static void resample_16k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    resample_x64k_12k8(4, h_16k_12k8_q15, hp50, x, y, n);
+}
+#endif /* resample_16k_12k8 */
+
+/**
+ * Resample from 32 Khz to 12.8 KHz
+ * hp50            High-Pass biquad filter state
+ * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
+ * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
+ *
+ * The `x` vector is aligned on 32 bits
+ */
+#ifndef resample_32k_12k8
+static void resample_32k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    resample_x64k_12k8(2, h_32k_12k8_q15, hp50, x, y, n);
+}
+#endif /* resample_32k_12k8 */
+
+/**
+ * Resample from 24 Khz to 12.8 KHz
+ * hp50            High-Pass biquad filter state
+ * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
+ * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
+ *
+ * The `x` vector is aligned on 32 bits
+ */
+#ifndef resample_24k_12k8
+static void resample_24k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    resample_x192k_12k8(8, h_24k_12k8_q15, hp50, x, y, n);
+}
+#endif /* resample_24k_12k8 */
+
+/**
+ * Resample from 48 Khz to 12.8 KHz
+ * hp50            High-Pass biquad filter state
+ * x               [-60..-1] Previous, [0..ns-1] Current samples, in fixed Q15
+ * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
+ *
+* The `x` vector is aligned on 32 bits
+*/
+#ifndef resample_48k_12k8
+static void resample_48k_12k8(
+struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    resample_x192k_12k8(4, h_48k_12k8_q15, hp50, x, y, n);
+}
+#endif /* resample_48k_12k8 */
+
+/**
+* Resample to 6.4 KHz
+* x               [-3..-1] Previous, [0..n-1] Current samples
+* y, n            [0..n-1] Output `n` processed samples
+*
+* The `x` vector is aligned on 32 bits
+ */
+#ifndef resample_6k4
+static void resample_6k4(const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t h[] = { 18477, 15424, 8105 };
+    const int16_t *ye = y + n;
+
+    for (x--; y < ye; x += 2)
+        *(y++) = (x[0] * h[0] + (x[-1] + x[1]) * h[1]
+                              + (x[-2] + x[2]) * h[2]) >> 16;
+}
+#endif /* resample_6k4 */
+
 /**
  * LTPF Resample to 12.8 KHz implementations for each samplerates
  */
 
-static void resample_8k_12k8(
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
-{
-    resample_base_64k_12k8(8, hp50, x, y, n);
-}
-
-static void resample_16k_12k8(
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
-{
-    resample_base_64k_12k8(4, hp50, x, y, n);
-}
-
-static void resample_24k_12k8(
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
-{
-    resample_base_192k_12k8(8, hp50, x, y, n);
-}
-
-static void resample_32k_12k8(
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
-{
-    resample_base_64k_12k8(2, hp50, x, y, n);
-}
-
-static void resample_48k_12k8(
-    struct lc3_ltpf_hp50_state *hp50, const float *x, float *y, int n)
-{
-    resample_base_192k_12k8(4, hp50, x, y, n);
-}
-
 static void (* const resample_12k8[])
-    (struct lc3_ltpf_hp50_state *, const float *, float *, int ) =
+    (struct lc3_ltpf_hp50_state *, const int16_t *, int16_t *, int ) =
 {
     [LC3_SRATE_8K ] = resample_8k_12k8,
     [LC3_SRATE_16K] = resample_16k_12k8,
@@ -179,23 +368,6 @@ static void (* const resample_12k8[])
     [LC3_SRATE_48K] = resample_48k_12k8,
 };
 
-/**
- * Resample to 6.4 KHz (cf. 3.3.9.3-4)
- * x               [-3..-1] Previous, [0..n-1] Current samples
- * y, n            [0..n-1] Output `n` processed samples
- */
-static void resample_6k4(const float *x, float *y, int n)
-{
-    static const float h[] = { 0.2819382921, 0.2353512128, 0.1236796411 };
-    float xn2 = x[-3], xn1 = x[-2], x0 = x[-1], x1, x2;
-
-    for (const float *ye = y + n; y < ye; xn2 = x0, xn1 = x1, x0 = x2) {
-        x1 = *(x++); x2 = *(x++);
-
-        *(y++) = x0 * h[0] + (xn1 + x1) * h[1] + (xn2 + x2) * h[2];
-    }
-}
-
 
 /* ----------------------------------------------------------------------------
  *  Analysis
@@ -203,33 +375,41 @@ static void resample_6k4(const float *x, float *y, int n)
 
 /**
  * Return dot product of 2 vectors
- * a, b, n         The 2 vectors of size `n` (multiple of 16)
+ * a, b, n         The 2 vectors of size `n` (> 0 and <= 128)
  * return          sum( a[i] * b[i] ), i = [0..n-1]
- */
-static inline float dot(const float *a, const float *b, int n)
+ *
+ * The size `n` of vectors must be multiple of 16, and less or equal to 128
+*/
+#ifndef dot
+static inline float dot(const int16_t *a, const int16_t *b, int n)
 {
-    float v = 0;
+    int64_t v = 0;
 
     for (int i = 0; i < (n >> 4); i++)
         for (int j = 0; j < 16; j++)
             v += *(a++) * *(b++);
 
-    return v;
+    int32_t v32 = (v + (1 << 5)) >> 6;
+    return (float)v32;
 }
+#endif /* dot */
 
 /**
  * Return vector of correlations
- * a, b, n         The 2 vector of size `n` to correlate
+ * a, b, n         The 2 vector of size `n` (> 0 and <= 128)
  * y, nc           Output the correlation vector of size `nc`
  *
- * The size `n` of input vectors must be multiple of 16
+ * The first vector `a` is aligned of 32 bits
+ * The size `n` of vectors is multiple of 16, and less or equal to 128
  */
+#ifndef correlate
 static void correlate(
-    const float *a, const float *b, int n, float *y, int nc)
+    const int16_t *a, const int16_t *b, int n, float *y, int nc)
 {
     for (const float *ye = y + nc; y < ye; )
         *(y++) = dot(a, b--, n);
 }
+#endif /* correlate */
 
 /**
  * Search the maximum value and returns its argument
@@ -279,24 +459,30 @@ static int argmax_weighted(
  *
  * The size `n` of vectors must be multiple of 4
  */
-static void interpolate(const float *x, int n, int d, float *y)
+static void interpolate(const int16_t *x, int n, int d, int16_t *y)
 {
-    static const float h4[][8] = {
-        { 2.09880463e-01, 5.83527575e-01, 2.09880463e-01                 },
-        { 1.06999186e-01, 5.50075002e-01, 3.35690625e-01, 6.69885837e-03 },
-        { 3.96711478e-02, 4.59220930e-01, 4.59220930e-01, 3.96711478e-02 },
-        { 6.69885837e-03, 3.35690625e-01, 5.50075002e-01, 1.06999186e-01 },
-    };
+    static const int16_t h4_q15[][4] = {
+        { 6877, 19121,  6877,     0 }, { 3506, 18025, 11000,   220 },
+        { 1300, 15048, 15048,  1300 }, {  220, 11000, 18025,  3506 } };
 
-    const float *h = h4[d];
-    float x3 = x[-2], x2 = x[-1], x1, x0;
+    const int16_t *h = h4_q15[d];
+    int16_t x3 = x[-2], x2 = x[-1], x1, x0;
 
     x1 = (*x++);
-    for (const float *ye = y + n; y < ye; ) {
-        *(y++) = (x0 = *(x++)) * h[0] + x1 * h[1] + x2 * h[2] + x3 * h[3];
-        *(y++) = (x3 = *(x++)) * h[0] + x0 * h[1] + x1 * h[2] + x2 * h[3];
-        *(y++) = (x2 = *(x++)) * h[0] + x3 * h[1] + x0 * h[2] + x1 * h[3];
-        *(y++) = (x1 = *(x++)) * h[0] + x2 * h[1] + x3 * h[2] + x0 * h[3];
+    for (const int16_t *ye = y + n; y < ye; ) {
+        int32_t yn;
+
+        yn = (x0 = *(x++)) * h[0] + x1 * h[1] + x2 * h[2] + x3 * h[3];
+        *(y++) = yn >> 15;
+
+        yn = (x3 = *(x++)) * h[0] + x0 * h[1] + x1 * h[2] + x2 * h[3];
+        *(y++) = yn >> 15;
+
+        yn = (x2 = *(x++)) * h[0] + x3 * h[1] + x0 * h[2] + x1 * h[3];
+        *(y++) = yn >> 15;
+
+        yn = (x1 = *(x++)) * h[0] + x2 * h[1] + x3 * h[2] + x0 * h[3];
+        *(y++) = yn >> 15;
     }
 }
 
@@ -306,7 +492,7 @@ static void interpolate(const float *x, int n, int d, float *y)
  * d               The phase of interpolation (-3 to 3)
  * return          The interpolated value
  */
-static float interpolate_4(const float *x, int d)
+static float interpolate_corr(const float *x, int d)
 {
     static const float h4[][8] = {
         {  1.53572770e-02, -4.72963246e-02,  8.35788573e-02,  8.98638285e-01,
@@ -336,9 +522,11 @@ static float interpolate_4(const float *x, int d)
  * x, n            [-114..-17] Previous, [0..n-1] Current 6.4KHz samples
  * tc              Return the pitch-lag estimation
  * return          True when pitch present
+ *
+ * The `x` vector is aligned on 32 bits
  */
 static bool detect_pitch(
-    struct lc3_ltpf_analysis *ltpf, const float *x, int n, int *tc)
+    struct lc3_ltpf_analysis *ltpf, const int16_t *x, int n, int *tc)
 {
     float rm1, rm2;
     float r[98];
@@ -352,8 +540,8 @@ static bool detect_pitch(
     int t1 = argmax_weighted(r, nr, -.5f/(nr-1), &rm1);
     int t2 = k0 + argmax(r + k0, nk, &rm2);
 
-    const float *x1 = x - (r0 + t1);
-    const float *x2 = x - (r0 + t2);
+    const int16_t *x1 = x - (r0 + t1);
+    const int16_t *x2 = x - (r0 + t2);
 
     float nc1 = rm1 <= 0 ? 0 :
         rm1 / sqrtf(dot(x, x, n) * dot(x1, x1, n));
@@ -370,12 +558,14 @@ static bool detect_pitch(
 
 /**
  * Pitch-lag parameter (3.3.9.7)
- * x, n            [-232..-28] Previous, [0..n-1] Current 12.8KHz samples
+ * x, n            [-232..-28] Previous, [0..n-1] Current 12.8KHz samples, Q14
  * tc              Pitch-lag estimation
  * pitch           The pitch value, in fixed .4
  * return          The bitstream pitch index value
+ *
+ * The `x` vector is aligned on 32 bits
  */
-static int refine_pitch(const float *x, int n, int tc, int *pitch)
+static int refine_pitch(const int16_t *x, int n, int tc, int *pitch)
 {
     float r[17], rm;
     int e, f;
@@ -388,17 +578,17 @@ static int refine_pitch(const float *x, int n, int tc, int *pitch)
     e = r0 + argmax(r + 4, nr, &rm);
     const float *re = r + (e - (r0 - 4));
 
-    float dm = interpolate_4(re, f = 0);
+    float dm = interpolate_corr(re, f = 0);
     for (int i = 1; i <= 3; i++) {
         float d;
 
         if (e >= 127 && ((i & 1) | (e >= 157)))
             continue;
 
-        if ((d = interpolate_4(re, i)) > dm)
+        if ((d = interpolate_corr(re, i)) > dm)
             dm = d, f = i;
 
-        if (e > 32 && (d = interpolate_4(re, -i)) > dm)
+        if (e > 32 && (d = interpolate_corr(re, -i)) > dm)
             dm = d, f = -i;
     }
 
@@ -413,31 +603,34 @@ static int refine_pitch(const float *x, int n, int tc, int *pitch)
 /**
  * LTPF Analysis
  */
-bool lc3_ltpf_analyse(enum lc3_dt dt, enum lc3_srate sr,
-    struct lc3_ltpf_analysis *ltpf, const float *x, struct lc3_ltpf_data *data)
+bool lc3_ltpf_analyse(
+    enum lc3_dt dt, enum lc3_srate sr, struct lc3_ltpf_analysis *ltpf,
+    const int16_t *x, struct lc3_ltpf_data *data)
 {
     /* --- Resampling to 12.8 KHz --- */
 
-    int z_12k8 = sizeof(ltpf->x_12k8) / sizeof(float);
+    int z_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
     int n_12k8 = dt == LC3_DT_7M5 ? 96 : 128;
 
     memmove(ltpf->x_12k8, ltpf->x_12k8 + n_12k8,
-        (z_12k8 - n_12k8) * sizeof(float));
+        (z_12k8 - n_12k8) * sizeof(*ltpf->x_12k8));
+
+    int16_t *x_12k8 = ltpf->x_12k8 + (z_12k8 - n_12k8);
 
-    float *x_12k8 = ltpf->x_12k8 + (z_12k8 - n_12k8);
     resample_12k8[sr](&ltpf->hp50, x, x_12k8, n_12k8);
 
     x_12k8 -= (dt == LC3_DT_7M5 ? 44 :  24);
 
     /* --- Resampling to 6.4 KHz --- */
 
-    int z_6k4 = sizeof(ltpf->x_6k4) / sizeof(float);
+    int z_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);
     int n_6k4 = n_12k8 >> 1;
 
     memmove(ltpf->x_6k4, ltpf->x_6k4 + n_6k4,
-        (z_6k4 - n_6k4) * sizeof(float));
+        (z_6k4 - n_6k4) * sizeof(*ltpf->x_6k4));
+
+    int16_t *x_6k4 = ltpf->x_6k4 + (z_6k4 - n_6k4);
 
-    float *x_6k4 = ltpf->x_6k4 + (z_6k4 - n_6k4);
     resample_6k4(x_12k8, x_6k4, n_6k4);
 
     /* --- Pitch detection --- */
@@ -448,7 +641,7 @@ bool lc3_ltpf_analyse(enum lc3_dt dt, enum lc3_srate sr,
     bool pitch_present = detect_pitch(ltpf, x_6k4, n_6k4, &tc);
 
     if (pitch_present) {
-        float u[n_12k8], v[n_12k8];
+        int16_t u[n_12k8], v[n_12k8];
 
         data->pitch_index = refine_pitch(x_12k8, n_12k8, tc, &pitch);
 
@@ -489,14 +682,14 @@ bool lc3_ltpf_analyse(enum lc3_dt dt, enum lc3_srate sr,
 
 /**
  * Synthesis filter template
- * xr, nr          Ring buffer of filtered samples
+ * xh, nh          History ring buffer of filtered samples
  * lag             Lag parameter in the ring buffer
  * x0              w-1 previous input samples
  * x, n            Current samples as input, filtered as output
  * c, w            Coefficients `den` then `num`, and width of filter
  * fade            Fading mode of filter  -1: Out  1: In  0: None
  */
-static inline void synthesize_template(const float *xr, int nr, int lag,
+static inline void synthesize_template(const float *xh, int nh, int lag,
     const float *x0, float *x, int n, const float *c, const int w, int fade)
 {
     float g = (float)(fade <= 0);
@@ -507,15 +700,15 @@ static inline void synthesize_template(const float *xr, int nr, int lag,
 
     lag += (w >> 1);
 
-    const float *y = x - xr < lag ? x + (nr - lag) : x - lag;
-    const float *y_end = xr + nr - 1;
+    const float *y = x - xh < lag ? x + (nh - lag) : x - lag;
+    const float *y_end = xh + nh - 1;
 
     for (int j = 0; j < w-1; j++) {
 
         u[j] = 0;
 
         float yi = *y, xi = *(x0++);
-        y = y < y_end ? y + 1 : xr;
+        y = y < y_end ? y + 1 : xh;
 
         for (int k = 0; k <= j; k++)
             u[j-k] -= yi * c[k];
@@ -532,7 +725,7 @@ static inline void synthesize_template(const float *xr, int nr, int lag,
         for (int j = 0; j < w; j++, g += g_incr) {
 
             float yi = *y, xi = *x;
-            y = y < y_end ? y + 1 : xr;
+            y = y < y_end ? y + 1 : xh;
 
             for (int k = 0; k < w; k++)
                 u[(j+(w-1)-k)%w] -= yi * c[k];
@@ -589,9 +782,9 @@ static void (* const synthesize[])(const float *, int, int,
  */
 void lc3_ltpf_synthesize(enum lc3_dt dt, enum lc3_srate sr, int nbytes,
     lc3_ltpf_synthesis_t *ltpf, const lc3_ltpf_data_t *data,
-    const float *xr, float *x)
+    const float *xh, float *x)
 {
-    int nr = LC3_NR(dt, sr);
+    int nh = LC3_NH(dt, sr);
     int dt_us = LC3_DT_US(dt);
 
     /* --- Filter parameters --- */
@@ -627,15 +820,15 @@ void lc3_ltpf_synthesize(enum lc3_dt dt, enum lc3_srate sr, int nbytes,
         memcpy(x0, x + nt-(w-1), (w-1) * sizeof(float));
 
     if (!ltpf->active && active)
-        synthesize[sr](xr, nr, pitch/4, ltpf->x, x, nt, c, 1);
+        synthesize[sr](xh, nh, pitch/4, ltpf->x, x, nt, c, 1);
     else if (ltpf->active && !active)
-        synthesize[sr](xr, nr, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
+        synthesize[sr](xh, nh, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
     else if (ltpf->active && active && ltpf->pitch == pitch)
-        synthesize[sr](xr, nr, pitch/4, ltpf->x, x, nt, c, 0);
+        synthesize[sr](xh, nh, pitch/4, ltpf->x, x, nt, c, 0);
     else if (ltpf->active && active) {
-        synthesize[sr](xr, nr, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
-        synthesize[sr](xr, nr, pitch/4,
-            (x <= xr ? x + nr : x) - (w-1), x, nt, c, 1);
+        synthesize[sr](xh, nh, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
+        synthesize[sr](xh, nh, pitch/4,
+            (x <= xh ? x + nh : x) - (w-1), x, nt, c, 1);
     }
 
     /* --- Remainder --- */
@@ -643,7 +836,7 @@ void lc3_ltpf_synthesize(enum lc3_dt dt, enum lc3_srate sr, int nbytes,
     memcpy(ltpf->x, x + ns - (w-1), (w-1) * sizeof(float));
 
     if (active)
-        synthesize[sr](xr, nr, pitch/4, x0, x + nt, ns-nt, c, 0);
+        synthesize[sr](xh, nh, pitch/4, x0, x + nt, ns-nt, c, 0);
 
     /* --- Update state --- */
 
diff --git a/src/ltpf.h b/src/ltpf.h
index a0f725c..0d5bb3c 100644
--- a/src/ltpf.h
+++ b/src/ltpf.h
@@ -53,11 +53,12 @@ typedef struct lc3_ltpf_data {
  * data            Return bitstream data
  * return          True when pitch present, False otherwise
  *
+ * The `x` vector is aligned on 32 bits
  * The number of previous samples `d` accessed on `x` is :
  *   d: { 10, 20, 30, 40, 60 } - 1 for samplerates from 8KHz to 48KHz
  */
 bool lc3_ltpf_analyse(enum lc3_dt dt, enum lc3_srate sr,
-    lc3_ltpf_analysis_t *ltpf, const float *x, lc3_ltpf_data_t *data);
+    lc3_ltpf_analysis_t *ltpf, const int16_t *x, lc3_ltpf_data_t *data);
 
 /**
  * LTPF disable
diff --git a/src/ltpf_arm.h b/src/ltpf_arm.h
new file mode 100644
index 0000000..914c964
--- /dev/null
+++ b/src/ltpf_arm.h
@@ -0,0 +1,465 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#ifdef __ARM_FEATURE_SIMD32
+
+/**
+ * Configuration
+ */
+
+#ifndef TEST_ARM
+
+#include <arm_acle.h>
+
+#define resample_8k_12k8  arm_resample_8k_12k8
+#define resample_16k_12k8 arm_resample_16k_12k8
+#define resample_24k_12k8 arm_resample_24k_12k8
+#define resample_32k_12k8 arm_resample_32k_12k8
+#define resample_48k_12k8 arm_resample_48k_12k8
+
+#define correlate arm_correlate
+
+static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
+{
+    int16x2_t r;
+    __asm("pkhbt %0, %1, %2" : "=r" (r) : "r" (a), "r" (b));
+    return r;
+}
+
+#endif /* TEST_ARM */
+
+
+/**
+ * Import
+ */
+
+static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
+static inline float dot(const int16_t *, const int16_t *, int);
+
+
+/**
+ * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
+ */
+static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
+    struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
+{
+    const int w = 40 / p;
+
+    x -= w;
+
+    for (int i = 0; i < 5*n; i += 5) {
+        const int16x2_t *hn = h + (i % (2*p)) * (48 / p);
+        const int16x2_t *xn = x + (i / (2*p));
+
+        int32_t un = __smlad(*(xn++), *(hn++), 0);
+
+        for (int k = 0; k < w; k += 5) {
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+        }
+
+        int32_t yn = filter_hp50(hp50, un);
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Resample from 24 / 48 KHz to 12.8 KHz Template
+ */
+static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
+    struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
+{
+    const int w = 120 / p;
+
+    x -= w;
+
+    for (int i = 0; i < 15*n; i += 15) {
+        const int16x2_t *hn = h + (i % (2*p)) * (128 / p);
+        const int16x2_t *xn = x + (i / (2*p));
+
+        int32_t un = __smlad(*(xn++), *(hn++), 0);
+
+        for (int k = 0; k < w; k += 15) {
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+            un = __smlad(*(xn++), *(hn++), un);
+        }
+
+        int32_t yn = filter_hp50(hp50, un);
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Resample from 8 Khz to 12.8 KHz
+ */
+static void arm_resample_8k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(int32_t) h[2*8*12] = {
+        0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0, 0,
+        0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28, 0,
+        0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61, 0,
+        0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79, 0,
+        0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61, 0,
+        0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0, 0,
+        0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92, 0,
+        0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180, 0,
+        0, 0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0,
+        0, 0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28,
+        0, 0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61,
+        0, 0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79,
+        0, 0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61,
+        0, 0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0,
+        0, 0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92,
+        0, 0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180,
+    };
+
+    arm_resample_x64k_12k8(
+        8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
+}
+
+/**
+ * Resample from 16 Khz to 12.8 KHz
+ */
+static void arm_resample_16k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(int32_t) h[2*4*24] = {
+
+            0,   -61,   214,  -398,   417,     0, -1052,  2686,
+        -4529,  5997, 26233,  5997, -4529,  2686, -1052,     0,
+          417,  -398,   214,   -61,     0,     0,     0,     0,
+
+
+            0,   -79,   180,  -213,     0,   598, -1522,  2389,
+        -2427,     0, 24506, 13068, -5289,  1873,     0,  -752,
+          763,  -457,   156,     0,   -28,     0,     0,     0,
+
+
+            0,   -61,    92,     0,  -323,   861, -1361,  1317,
+            0, -3885, 19741, 19741, -3885,     0,  1317, -1361,
+          861,  -323,     0,    92,   -61,     0,     0,     0,
+
+            0,   -28,     0,   156,  -457,   763,  -752,     0,
+         1873, -5289, 13068, 24506,     0, -2427,  2389, -1522,
+          598,     0,  -213,   180,   -79,     0,     0,     0,
+
+
+            0,     0,   -61,   214,  -398,   417,     0, -1052,
+         2686, -4529,  5997, 26233,  5997, -4529,  2686, -1052,
+            0,   417,  -398,   214,   -61,     0,     0,     0,
+
+
+            0,     0,   -79,   180,  -213,     0,   598, -1522,
+         2389, -2427,     0, 24506, 13068, -5289,  1873,     0,
+         -752,   763,  -457,   156,     0,   -28,     0,     0,
+
+
+            0,     0,   -61,    92,     0,  -323,   861, -1361,
+         1317,     0, -3885, 19741, 19741, -3885,     0,  1317,
+        -1361,   861,  -323,     0,    92,   -61,     0,     0,
+
+            0,     0,   -28,     0,   156,  -457,   763,  -752,
+            0,  1873, -5289, 13068, 24506,     0, -2427,  2389,
+        -1522,   598,     0,  -213,   180,   -79,     0,     0,
+    };
+
+    arm_resample_x64k_12k8(
+        4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
+}
+
+/**
+ * Resample from 32 Khz to 12.8 KHz
+ */
+static void arm_resample_32k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(int32_t) h[2*2*48] = {
+
+            0,   -30,   -31,    46,   107,     0,  -199,  -162,
+          209,   430,     0,  -681,  -526,   658,  1343,     0,
+        -2264, -1943,  2999,  9871, 13116,  9871,  2999, -1943,
+        -2264,     0,  1343,   658,  -526,  -681,     0,   430,
+          209,  -162,  -199,     0,   107,    46,   -31,   -30,
+            0,     0,     0,     0,     0,     0,     0,     0,
+
+            0,   -14,   -39,     0,    90,    78,  -106,  -229,
+            0,   382,   299,  -376,  -761,     0,  1194,   937,
+        -1214, -2644,     0,  6534, 12253, 12253,  6534,     0,
+        -2644, -1214,   937,  1194,     0,  -761,  -376,   299,
+          382,     0,  -229,  -106,    78,    90,     0,   -39,
+          -14,     0,     0,     0,     0,     0,     0,     0,
+
+            0,     0,   -30,   -31,    46,   107,     0,  -199,
+         -162,   209,   430,     0,  -681,  -526,   658,  1343,
+            0, -2264, -1943,  2999,  9871, 13116,  9871,  2999,
+        -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
+          430,   209,  -162,  -199,     0,   107,    46,   -31,
+          -30,     0,     0,     0,     0,     0,     0,     0,
+
+            0,     0,   -14,   -39,     0,    90,    78,  -106,
+         -229,     0,   382,   299,  -376,  -761,     0,  1194,
+          937, -1214, -2644,     0,  6534, 12253, 12253,  6534,
+            0, -2644, -1214,   937,  1194,     0,  -761,  -376,
+          299,   382,     0,  -229,  -106,    78,    90,     0,
+          -39,   -14,     0,     0,     0,     0,     0,     0,
+    };
+
+    arm_resample_x64k_12k8(
+        2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
+}
+
+/**
+ * Resample from 24 Khz to 12.8 KHz
+ */
+static void arm_resample_24k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(int32_t) h[2*8*32] = {
+
+            0,   -50,    19,   143,   -93,  -290,   278,   485,
+         -658,  -701,  1396,   901, -3019, -1042, 10276, 17488,
+        10276, -1042, -3019,   901,  1396,  -701,  -658,   485,
+          278,  -290,   -93,   143,    19,   -50,     0,     0,
+
+            0,   -46,     0,   141,   -45,  -305,   185,   543,
+         -501,  -854,  1153,  1249, -2619, -1908,  8712, 17358,
+        11772,     0, -3319,   480,  1593,  -504,  -796,   399,
+          367,  -261,  -142,   138,    40,   -52,    -5,     0,
+
+            0,   -41,   -17,   133,     0,  -304,    91,   574,
+         -334,  -959,   878,  1516, -2143, -2590,  7118, 16971,
+        13161,  1202, -3495,     0,  1731,  -267,  -908,   287,
+          445,  -215,  -188,   125,    62,   -52,   -12,     0,
+
+            0,   -34,   -30,   120,    41,  -291,     0,   577,
+         -164, -1015,   585,  1697, -1618, -3084,  5534, 16337,
+        14406,  2544, -3526,  -523,  1800,     0,  -985,   152,
+          509,  -156,  -230,   104,    83,   -48,   -19,     0,
+
+            0,   -26,   -41,   103,    76,  -265,   -83,   554,
+            0, -1023,   288,  1791, -1070, -3393,  3998, 15474,
+        15474,  3998, -3393, -1070,  1791,   288, -1023,     0,
+          554,   -83,  -265,    76,   103,   -41,   -26,     0,
+
+            0,   -19,   -48,    83,   104,  -230,  -156,   509,
+          152,  -985,     0,  1800,  -523, -3526,  2544, 14406,
+        16337,  5534, -3084, -1618,  1697,   585, -1015,  -164,
+          577,     0,  -291,    41,   120,   -30,   -34,     0,
+
+            0,   -12,   -52,    62,   125,  -188,  -215,   445,
+          287,  -908,  -267,  1731,     0, -3495,  1202, 13161,
+        16971,  7118, -2590, -2143,  1516,   878,  -959,  -334,
+          574,    91,  -304,     0,   133,   -17,   -41,     0,
+
+            0,    -5,   -52,    40,   138,  -142,  -261,   367,
+          399,  -796,  -504,  1593,   480, -3319,     0, 11772,
+        17358,  8712, -1908, -2619,  1249,  1153,  -854,  -501,
+          543,   185,  -305,   -45,   141,     0,   -46,     0,
+
+            0,     0,   -50,    19,   143,   -93,  -290,   278,
+          485,  -658,  -701,  1396,   901, -3019, -1042, 10276,
+        17488, 10276, -1042, -3019,   901,  1396,  -701,  -658,
+          485,   278,  -290,   -93,   143,    19,   -50,     0,
+
+            0,     0,   -46,     0,   141,   -45,  -305,   185,
+          543,  -501,  -854,  1153,  1249, -2619, -1908,  8712,
+        17358, 11772,     0, -3319,   480,  1593,  -504,  -796,
+          399,   367,  -261,  -142,   138,    40,   -52,    -5,
+
+            0,     0,   -41,   -17,   133,     0,  -304,    91,
+          574,  -334,  -959,   878,  1516, -2143, -2590,  7118,
+        16971, 13161,  1202, -3495,     0,  1731,  -267,  -908,
+          287,   445,  -215,  -188,   125,    62,   -52,   -12,
+
+            0,     0,   -34,   -30,   120,    41,  -291,     0,
+          577,  -164, -1015,   585,  1697, -1618, -3084,  5534,
+        16337, 14406,  2544, -3526,  -523,  1800,     0,  -985,
+          152,   509,  -156,  -230,   104,    83,   -48,   -19,
+
+            0,     0,   -26,   -41,   103,    76,  -265,   -83,
+          554,     0, -1023,   288,  1791, -1070, -3393,  3998,
+        15474, 15474,  3998, -3393, -1070,  1791,   288, -1023,
+            0,   554,   -83,  -265,    76,   103,   -41,   -26,
+
+            0,     0,   -19,   -48,    83,   104,  -230,  -156,
+          509,   152,  -985,     0,  1800,  -523, -3526,  2544,
+        14406, 16337,  5534, -3084, -1618,  1697,   585, -1015,
+         -164,   577,     0,  -291,    41,   120,   -30,   -34,
+
+            0,     0,   -12,   -52,    62,   125,  -188,  -215,
+          445,   287,  -908,  -267,  1731,     0, -3495,  1202,
+        13161, 16971,  7118, -2590, -2143,  1516,   878,  -959,
+         -334,   574,    91,  -304,     0,   133,   -17,   -41,
+
+            0,     0,    -5,   -52,    40,   138,  -142,  -261,
+          367,   399,  -796,  -504,  1593,   480, -3319,     0,
+        11772, 17358,  8712, -1908, -2619,  1249,  1153,  -854,
+         -501,   543,   185,  -305,   -45,   141,     0,   -46,
+    };
+
+    arm_resample_x192k_12k8(
+        8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
+}
+
+/**
+ * Resample from 48 Khz to 12.8 KHz
+ */
+static void arm_resample_48k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(int32_t) h[2*4*64] = {
+
+            0,   -13,   -25,   -20,    10,    51,    71,    38,
+          -47,  -133,  -145,   -42,   139,   277,   242,     0,
+         -329,  -511,  -351,   144,   698,   895,   450,  -535,
+        -1510, -1697,  -521,  1999,  5138,  7737,  8744,  7737,
+         5138,  1999,  -521, -1697, -1510,  -535,   450,   895,
+          698,   144,  -351,  -511,  -329,     0,   242,   277,
+          139,   -42,  -145,  -133,   -47,    38,    71,    51,
+           10,   -20,   -25,   -13,     0,     0,     0,     0,
+
+            0,    -9,   -23,   -24,     0,    41,    71,    52,
+          -23,  -115,  -152,   -78,    92,   254,   272,    76,
+         -251,  -493,  -427,     0,   576,   900,   624,  -262,
+        -1309, -1763,  -954,  1272,  4356,  7203,  8679,  8169,
+         5886,  2767,     0, -1542, -1660,  -809,   240,   848,
+          796,   292,  -252,  -507,  -398,   -82,   199,   288,
+          183,     0,  -130,  -145,   -71,    20,    69,    60,
+           20,   -15,   -26,   -17,    -3,     0,     0,     0,
+
+            0,    -6,   -20,   -26,    -8,    31,    67,    62,
+            0,   -94,  -152,  -108,    45,   223,   287,   143,
+         -167,  -454,  -480,  -134,   439,   866,   758,     0,
+        -1071, -1748, -1295,   601,  3559,  6580,  8485,  8485,
+         6580,  3559,   601, -1295, -1748, -1071,     0,   758,
+          866,   439,  -134,  -480,  -454,  -167,   143,   287,
+          223,    45,  -108,  -152,   -94,     0,    62,    67,
+           31,    -8,   -26,   -20,    -6,     0,     0,     0,
+
+            0,    -3,   -17,   -26,   -15,    20,    60,    69,
+           20,   -71,  -145,  -130,     0,   183,   288,   199,
+          -82,  -398,  -507,  -252,   292,   796,   848,   240,
+         -809, -1660, -1542,     0,  2767,  5886,  8169,  8679,
+         7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,
+          900,   576,     0,  -427,  -493,  -251,    76,   272,
+          254,    92,   -78,  -152,  -115,   -23,    52,    71,
+           41,     0,   -24,   -23,    -9,     0,     0,     0,
+
+            0,     0,   -13,   -25,   -20,    10,    51,    71,
+           38,   -47,  -133,  -145,   -42,   139,   277,   242,
+            0,  -329,  -511,  -351,   144,   698,   895,   450,
+         -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
+         7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,
+          895,   698,   144,  -351,  -511,  -329,     0,   242,
+          277,   139,   -42,  -145,  -133,   -47,    38,    71,
+           51,    10,   -20,   -25,   -13,     0,     0,     0,
+
+            0,     0,    -9,   -23,   -24,     0,    41,    71,
+           52,   -23,  -115,  -152,   -78,    92,   254,   272,
+           76,  -251,  -493,  -427,     0,   576,   900,   624,
+         -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
+         8169,  5886,  2767,     0, -1542, -1660,  -809,   240,
+          848,   796,   292,  -252,  -507,  -398,   -82,   199,
+          288,   183,     0,  -130,  -145,   -71,    20,    69,
+           60,    20,   -15,   -26,   -17,    -3,     0,     0,
+
+            0,     0,    -6,   -20,   -26,    -8,    31,    67,
+           62,     0,   -94,  -152,  -108,    45,   223,   287,
+          143,  -167,  -454,  -480,  -134,   439,   866,   758,
+            0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
+         8485,  6580,  3559,   601, -1295, -1748, -1071,     0,
+          758,   866,   439,  -134,  -480,  -454,  -167,   143,
+          287,   223,    45,  -108,  -152,   -94,     0,    62,
+           67,    31,    -8,   -26,   -20,    -6,     0,     0,
+
+            0,     0,    -3,   -17,   -26,   -15,    20,    60,
+           69,    20,   -71,  -145,  -130,     0,   183,   288,
+          199,   -82,  -398,  -507,  -252,   292,   796,   848,
+          240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
+         8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,
+          624,   900,   576,     0,  -427,  -493,  -251,    76,
+          272,   254,    92,   -78,  -152,  -115,   -23,    52,
+           71,    41,     0,   -24,   -23,    -9,     0,     0,
+    };
+
+    arm_resample_x192k_12k8(
+        4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
+}
+
+/**
+ * Return vector of correlations
+ */
+static void arm_correlate(
+    const int16_t *a, const int16_t *b, int n, float *y, int nc)
+{
+    /* --- Check alignment of `b` --- */
+
+    if ((uintptr_t)b & 3)
+        *(y++) = dot(a, b--, n), nc--;
+
+    /* --- Processing by pair --- */
+
+    for ( ; nc >= 2; nc -= 2) {
+        const int16x2_t *an = (const int16x2_t *)(a  );
+        const int16x2_t *bn = (const int16x2_t *)(b--);
+
+        int16x2_t ax, b0, b1;
+        int64_t v0 = 0, v1 = 0;
+
+        b1 = (int16x2_t)*(b--) << 16;
+
+        for (int i = 0; i < (n >> 4); i++ )
+            for (int j = 0; j < 4; j++) {
+
+                ax = *(an++), b0 = *(bn++);
+                v0 = __smlald (ax, b0, v0);
+                v1 = __smlaldx(ax, __pkhbt(b0, b1), v1);
+
+                ax = *(an++), b1 = *(bn++);
+                v0 = __smlald (ax, b1, v0);
+                v1 = __smlaldx(ax, __pkhbt(b1, b0), v1);
+            }
+
+        *(y++) = (float)((int32_t)((v0 + (1 << 5)) >> 6));
+        *(y++) = (float)((int32_t)((v1 + (1 << 5)) >> 6));
+    }
+
+    /* --- Odd element count --- */
+
+    if (nc > 0)
+        *(y++) = dot(a, b, n);
+}
+
+#endif /* __ARM_FEATURE_SIMD32 */
diff --git a/src/ltpf_neon.h b/src/ltpf_neon.h
new file mode 100644
index 0000000..728ed1d
--- /dev/null
+++ b/src/ltpf_neon.h
@@ -0,0 +1,256 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#if __ARM_NEON
+
+/**
+ * Configuration
+ */
+
+#ifndef TEST_NEON
+
+#include <arm_neon.h>
+
+#define resample_16k_12k8  neon_resample_16k_12k8
+#define resample_32k_12k8  neon_resample_32k_12k8
+#define resample_48k_12k8  neon_resample_48k_12k8
+
+#define correlate  neon_correlate
+#define dot        neon_dot
+
+#endif /* TEST_NEON */
+
+
+/**
+ * Import
+ */
+
+static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
+
+
+/**
+ * Resample from 16 Khz to 12.8 KHz
+ */
+static void neon_resample_16k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t h[4][20] = {
+
+    {   -61,   214,  -398,   417,     0, -1052,  2686, -4529,  5997, 26233,
+       5997, -4529,  2686, -1052,     0,   417,  -398,   214,   -61,     0 },
+
+    {   -79,   180,  -213,     0,   598, -1522,  2389, -2427,     0, 24506,
+      13068, -5289,  1873,     0,  -752,   763,  -457,   156,     0,   -28 },
+
+    {   -61,    92,     0,  -323,   861, -1361,  1317,     0, -3885, 19741,
+      19741, -3885,     0,  1317, -1361,   861,  -323,     0,    92,   -61 },
+
+    {   -28,     0,   156,  -457,   763,  -752,     0,  1873, -5289, 13068,
+      24506,     0, -2427,  2389, -1522,   598,     0,  -213,   180,   -79 },
+
+    };
+
+    x -= 20 - 1;
+
+    for (int i = 0; i < 5*n; i += 5) {
+        const int16_t *hn = h[i & 3];
+        const int16_t *xn = x + (i >> 2);
+        int32x4_t un;
+
+        un = vmull_s16(    vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+        un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+        un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+        un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+        un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+
+        int32_t yn = filter_hp50(hp50, vaddvq_s32(un));
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Resample from 32 Khz to 12.8 KHz
+ */
+static void neon_resample_32k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    x -= 40 - 1;
+
+    static const int16_t h[2][40] = {
+
+    {   -30,   -31,    46,   107,     0,  -199,  -162,   209,   430,     0,
+       -681,  -526,   658,  1343,     0, -2264, -1943,  2999,  9871, 13116,
+       9871,  2999, -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
+        430,   209,  -162,  -199,     0,   107,    46,   -31,   -30,     0 },
+
+    {   -14,   -39,     0,    90,    78,  -106,  -229,     0,   382,   299,
+       -376,  -761,     0,  1194,   937, -1214, -2644,     0,  6534, 12253,
+      12253,  6534,     0, -2644, -1214,   937,  1194,     0,  -761,  -376,
+        299,   382,     0,  -229,  -106,    78,    90,     0,   -39,   -14 },
+
+    };
+
+    for (int i = 0; i < 5*n; i += 5) {
+        const int16_t *hn = h[i & 1];
+        const int16_t *xn = x + (i >> 1);
+
+        int32x4_t un = vmull_s16(vld1_s16(xn), vld1_s16(hn));
+        xn += 4, hn += 4;
+
+        for (int i = 1; i < 10; i++)
+            un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+
+        int32_t yn = filter_hp50(hp50, vaddvq_s32(un));
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
+}
+
+
+
+/**
+ * Resample from 48 Khz to 12.8 KHz
+ */
+static void neon_resample_48k_12k8(
+    struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
+{
+    static const int16_t alignas(16) h[4][64] = {
+
+    {  -13,   -25,   -20,    10,    51,    71,    38,   -47,  -133,  -145,
+       -42,   139,   277,   242,     0,  -329,  -511,  -351,   144,   698,
+       895,   450,  -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
+      7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,   895,   698,
+       144,  -351,  -511,  -329,     0,   242,   277,   139,   -42,  -145,
+      -133,   -47,    38,    71,    51,    10,   -20,   -25,   -13,     0 },
+
+    {   -9,   -23,   -24,     0,    41,    71,    52,   -23,  -115,  -152,
+       -78,    92,   254,   272,    76,  -251,  -493,  -427,     0,   576,
+       900,   624,  -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
+      8169,  5886,  2767,     0, -1542, -1660,  -809,   240,   848,   796,
+       292,  -252,  -507,  -398,   -82,   199,   288,   183,     0,  -130,
+      -145,   -71,    20,    69,    60,    20,   -15,   -26,   -17,    -3 },
+
+    {   -6,   -20,   -26,    -8,    31,    67,    62,     0,   -94,  -152,
+      -108,    45,   223,   287,   143,  -167,  -454,  -480,  -134,   439,
+       866,   758,     0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
+      8485,  6580,  3559,   601, -1295, -1748, -1071,     0,   758,   866,
+       439,  -134,  -480,  -454,  -167,   143,   287,   223,    45,  -108,
+      -152,   -94,     0,    62,    67,    31,    -8,   -26,   -20,    -6 },
+
+    {   -3,   -17,   -26,   -15,    20,    60,    69,    20,   -71,  -145,
+      -130,     0,   183,   288,   199,   -82,  -398,  -507,  -252,   292,
+       796,   848,   240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
+      8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,   900,
+       576,     0,  -427,  -493,  -251,    76,   272,   254,    92,   -78,
+      -152,  -115,   -23,    52,    71,    41,     0,   -24,   -23,    -9 },
+
+    };
+
+    x -= 60 - 1;
+
+    for (int i = 0; i < 15*n; i += 15) {
+        const int16_t *hn = h[i & 3];
+        const int16_t *xn = x + (i >> 2);
+
+        int32x4_t un = vmull_s16(vld1_s16(xn), vld1_s16(hn));
+        xn += 4, hn += 4;
+
+        for (int i = 1; i < 15; i++)
+            un = vmlal_s16(un, vld1_s16(xn), vld1_s16(hn)), xn += 4, hn += 4;
+
+        int32_t yn = filter_hp50(hp50, vaddvq_s32(un));
+        *(y++) = (yn + (1 << 15)) >> 16;
+    }
+}
+
+/**
+ * Return dot product of 2 vectors
+ */
+static inline float neon_dot(const int16_t *a, const int16_t *b, int n)
+{
+    int64x2_t v = vmovq_n_s64(0);
+
+    for (int i = 0; i < (n >> 4); i++) {
+        int32x4_t u;
+
+        u = vmull_s16(   vld1_s16(a), vld1_s16(b)), a += 4, b += 4;
+        u = vmlal_s16(u, vld1_s16(a), vld1_s16(b)), a += 4, b += 4;
+        v = vpadalq_s32(v, u);
+
+        u = vmull_s16(   vld1_s16(a), vld1_s16(b)), a += 4, b += 4;
+        u = vmlal_s16(u, vld1_s16(a), vld1_s16(b)), a += 4, b += 4;
+        v = vpadalq_s32(v, u);
+    }
+
+    int32_t v32 = (vaddvq_s64(v) + (1 << 5)) >> 6;
+    return (float)v32;
+}
+
+/**
+ * Return vector of correlations
+ */
+static void neon_correlate(
+    const int16_t *a, const int16_t *b, int n, float *y, int nc)
+{
+    for ( ; nc >= 4; nc -= 4, b -= 4) {
+        const int16_t *an = (const int16_t *)a;
+        const int16_t *bn = (const int16_t *)b;
+
+        int64x2_t v0 = vmovq_n_s64(0), v1 = v0, v2 = v0, v3 = v0;
+        int16x4_t ax, b0, b1;
+
+        b0 = vld1_s16(bn-4);
+
+        for (int i=0; i < (n >> 4); i++ )
+            for (int j = 0; j < 2; j++) {
+                int32x4_t u0, u1, u2, u3;
+
+                b1 = b0;
+                b0 = vld1_s16(bn), bn += 4;
+                ax = vld1_s16(an), an += 4;
+
+                u0 = vmull_s16(ax, b0);
+                u1 = vmull_s16(ax, vext_s16(b1, b0, 3));
+                u2 = vmull_s16(ax, vext_s16(b1, b0, 2));
+                u3 = vmull_s16(ax, vext_s16(b1, b0, 1));
+
+                b1 = b0;
+                b0 = vld1_s16(bn), bn += 4;
+                ax = vld1_s16(an), an += 4;
+
+                u0 = vmlal_s16(u0, ax, b0);
+                u1 = vmlal_s16(u1, ax, vext_s16(b1, b0, 3));
+                u2 = vmlal_s16(u2, ax, vext_s16(b1, b0, 2));
+                u3 = vmlal_s16(u3, ax, vext_s16(b1, b0, 1));
+
+                v0 = vpadalq_s32(v0, u0);
+                v1 = vpadalq_s32(v1, u1);
+                v2 = vpadalq_s32(v2, u2);
+                v3 = vpadalq_s32(v3, u3);
+            }
+
+        *(y++) = (float)((int32_t)((vaddvq_s64(v0) + (1 << 5)) >> 6));
+        *(y++) = (float)((int32_t)((vaddvq_s64(v1) + (1 << 5)) >> 6));
+        *(y++) = (float)((int32_t)((vaddvq_s64(v2) + (1 << 5)) >> 6));
+        *(y++) = (float)((int32_t)((vaddvq_s64(v3) + (1 << 5)) >> 6));
+    }
+
+    for ( ; nc > 0; nc--)
+        *(y++) = neon_dot(a, b--, n);
+}
+
+#endif /* __ARM_NEON */
diff --git a/src/tables.c b/src/tables.c
index 68139cc..aef6fc1 100644
--- a/src/tables.c
+++ b/src/tables.c
@@ -2441,75 +2441,6 @@ const uint16_t lc3_tns_coeffs_bits[][17] = {
 };
 
 
-/**
- * Long Term Postfilter Analysis (cf. 3.7.6)
- * with the addition of `h[239] = 0`
- */
-
-const float lc3_ltpf_h12k8[240] = {
-    -2.04305583e-05, -4.46345894e-05, -7.16366399e-05, -1.00101113e-04,
-    -1.28372848e-04, -1.54543830e-04, -1.76544567e-04, -1.92256960e-04,
-    -1.99643819e-04, -1.96888686e-04, -1.82538332e-04, -1.55639427e-04,
-    -1.15860365e-04, -6.35893034e-05,  2.81006480e-19,  7.29218021e-05,
-     1.52397076e-04,  2.34920777e-04,  3.16378650e-04,  3.92211738e-04,
-     4.57623849e-04,  5.07824294e-04,  5.38295523e-04,  5.45072918e-04,
-     5.25022155e-04,  4.76098424e-04,  3.97571380e-04,  2.90200217e-04,
-     1.56344667e-04, -5.81880142e-19, -1.73252713e-04, -3.56385965e-04,
-    -5.41155231e-04, -7.18414023e-04, -8.78505232e-04, -1.01171451e-03,
-    -1.10876706e-03, -1.16134522e-03, -1.16260169e-03, -1.10764097e-03,
-    -9.93941563e-04, -8.21692190e-04, -5.94017766e-04, -3.17074654e-04,
-     9.74695082e-19,  3.45293760e-04,  7.04480871e-04,  1.06133447e-03,
-     1.39837473e-03,  1.69763080e-03,  1.94148675e-03,  2.11357591e-03,
-     2.19968245e-03,  2.18860625e-03,  2.07294546e-03,  1.84975249e-03,
-     1.52102188e-03,  1.09397426e-03,  5.81108062e-04, -1.42248266e-18,
-    -6.27153730e-04, -1.27425140e-03, -1.91223839e-03, -2.51026925e-03,
-    -3.03703830e-03, -3.46222687e-03, -3.75800672e-03, -3.90053247e-03,
-    -3.87135231e-03, -3.65866558e-03, -3.25835851e-03, -2.67475555e-03,
-    -1.92103305e-03, -1.01925433e-03,  1.86962369e-18,  1.09841545e-03,
-     2.23113197e-03,  3.34830927e-03,  4.39702277e-03,  5.32342672e-03,
-     6.07510531e-03,  6.60352025e-03,  6.86645399e-03,  6.83034270e-03,
-     6.47239234e-03,  5.78237521e-03,  4.76401273e-03,  3.43586351e-03,
-     1.83165284e-03, -2.25189837e-18, -1.99647619e-03, -4.08266886e-03,
-    -6.17308037e-03, -8.17444895e-03, -9.98882386e-03, -1.15169871e-02,
-    -1.26621006e-02, -1.33334458e-02, -1.34501120e-02, -1.29444881e-02,
-    -1.17654154e-02, -9.88086732e-03, -7.28003640e-03, -3.97473021e-03,
-     2.50961778e-18,  4.58604422e-03,  9.70324900e-03,  1.52512477e-02,
-     2.11120585e-02,  2.71533724e-02,  3.32324245e-02,  3.92003203e-02,
-     4.49066644e-02,  5.02043309e-02,  5.49542017e-02,  5.90297032e-02,
-     6.23209727e-02,  6.47385023e-02,  6.62161245e-02,  6.67132287e-02,
-     6.62161245e-02,  6.47385023e-02,  6.23209727e-02,  5.90297032e-02,
-     5.49542017e-02,  5.02043309e-02,  4.49066644e-02,  3.92003203e-02,
-     3.32324245e-02,  2.71533724e-02,  2.11120585e-02,  1.52512477e-02,
-     9.70324900e-03,  4.58604422e-03,  2.50961778e-18, -3.97473021e-03,
-    -7.28003640e-03, -9.88086732e-03, -1.17654154e-02, -1.29444881e-02,
-    -1.34501120e-02, -1.33334458e-02, -1.26621006e-02, -1.15169871e-02,
-    -9.98882386e-03, -8.17444895e-03, -6.17308037e-03, -4.08266886e-03,
-    -1.99647619e-03, -2.25189837e-18,  1.83165284e-03,  3.43586351e-03,
-     4.76401273e-03,  5.78237521e-03,  6.47239234e-03,  6.83034270e-03,
-     6.86645399e-03,  6.60352025e-03,  6.07510531e-03,  5.32342672e-03,
-     4.39702277e-03,  3.34830927e-03,  2.23113197e-03,  1.09841545e-03,
-     1.86962369e-18, -1.01925433e-03, -1.92103305e-03, -2.67475555e-03,
-    -3.25835851e-03, -3.65866558e-03, -3.87135231e-03, -3.90053247e-03,
-    -3.75800672e-03, -3.46222687e-03, -3.03703830e-03, -2.51026925e-03,
-    -1.91223839e-03, -1.27425140e-03, -6.27153730e-04, -1.42248266e-18,
-     5.81108062e-04,  1.09397426e-03,  1.52102188e-03,  1.84975249e-03,
-     2.07294546e-03,  2.18860625e-03,  2.19968245e-03,  2.11357591e-03,
-     1.94148675e-03,  1.69763080e-03,  1.39837473e-03,  1.06133447e-03,
-     7.04480871e-04,  3.45293760e-04,  9.74695082e-19, -3.17074654e-04,
-    -5.94017766e-04, -8.21692190e-04, -9.93941563e-04, -1.10764097e-03,
-    -1.16260169e-03, -1.16134522e-03, -1.10876706e-03, -1.01171451e-03,
-    -8.78505232e-04, -7.18414023e-04, -5.41155231e-04, -3.56385965e-04,
-    -1.73252713e-04, -5.81880142e-19,  1.56344667e-04,  2.90200217e-04,
-     3.97571380e-04,  4.76098424e-04,  5.25022155e-04,  5.45072918e-04,
-     5.38295523e-04,  5.07824294e-04,  4.57623849e-04,  3.92211738e-04,
-     3.16378650e-04,  2.34920777e-04,  1.52397076e-04,  7.29218021e-05,
-     2.81006480e-19, -6.35893034e-05, -1.15860365e-04, -1.55639427e-04,
-    -1.82538332e-04, -1.96888686e-04, -1.99643819e-04, -1.92256960e-04,
-    -1.76544567e-04, -1.54543830e-04, -1.28372848e-04, -1.00101113e-04,
-    -7.16366399e-05, -4.46345894e-05, -2.04305583e-05,  0.0           ,
-};
-
-
 /**
  * Long Term Postfilter Synthesis (cf. 3.7.6)
  * with - addition of a 0 for num coefficients
diff --git a/src/tables.h b/src/tables.h
index b327d0e..26bd48e 100644
--- a/src/tables.h
+++ b/src/tables.h
@@ -78,8 +78,6 @@ extern const uint16_t lc3_tns_coeffs_bits[][17];
  * Long Term Postfilter
  */
 
-extern const float lc3_ltpf_h12k8[240];
-
 extern const float *lc3_ltpf_cnum[LC3_NUM_SRATE][4];
 extern const float *lc3_ltpf_cden[LC3_NUM_SRATE][4];
 
diff --git a/tables/mktables.py b/tables/mktables.py
index fec56bd..67d4312 100755
--- a/tables/mktables.py
+++ b/tables/mktables.py
@@ -17,6 +17,75 @@
 
 import numpy as np
 
+LTPF_H12K8 = np.array([
+    -2.04305583e-05, -4.46345894e-05, -7.16366399e-05, -1.00101113e-04,
+    -1.28372848e-04, -1.54543830e-04, -1.76544567e-04, -1.92256960e-04,
+    -1.99643819e-04, -1.96888686e-04, -1.82538332e-04, -1.55639427e-04,
+    -1.15860365e-04, -6.35893034e-05,  2.81006480e-19,  7.29218021e-05,
+     1.52397076e-04,  2.34920777e-04,  3.16378650e-04,  3.92211738e-04,
+     4.57623849e-04,  5.07824294e-04,  5.38295523e-04,  5.45072918e-04,
+     5.25022155e-04,  4.76098424e-04,  3.97571380e-04,  2.90200217e-04,
+     1.56344667e-04, -5.81880142e-19, -1.73252713e-04, -3.56385965e-04,
+    -5.41155231e-04, -7.18414023e-04, -8.78505232e-04, -1.01171451e-03,
+    -1.10876706e-03, -1.16134522e-03, -1.16260169e-03, -1.10764097e-03,
+    -9.93941563e-04, -8.21692190e-04, -5.94017766e-04, -3.17074654e-04,
+     9.74695082e-19,  3.45293760e-04,  7.04480871e-04,  1.06133447e-03,
+     1.39837473e-03,  1.69763080e-03,  1.94148675e-03,  2.11357591e-03,
+     2.19968245e-03,  2.18860625e-03,  2.07294546e-03,  1.84975249e-03,
+     1.52102188e-03,  1.09397426e-03,  5.81108062e-04, -1.42248266e-18,
+    -6.27153730e-04, -1.27425140e-03, -1.91223839e-03, -2.51026925e-03,
+    -3.03703830e-03, -3.46222687e-03, -3.75800672e-03, -3.90053247e-03,
+    -3.87135231e-03, -3.65866558e-03, -3.25835851e-03, -2.67475555e-03,
+    -1.92103305e-03, -1.01925433e-03,  1.86962369e-18,  1.09841545e-03,
+     2.23113197e-03,  3.34830927e-03,  4.39702277e-03,  5.32342672e-03,
+     6.07510531e-03,  6.60352025e-03,  6.86645399e-03,  6.83034270e-03,
+     6.47239234e-03,  5.78237521e-03,  4.76401273e-03,  3.43586351e-03,
+     1.83165284e-03, -2.25189837e-18, -1.99647619e-03, -4.08266886e-03,
+    -6.17308037e-03, -8.17444895e-03, -9.98882386e-03, -1.15169871e-02,
+    -1.26621006e-02, -1.33334458e-02, -1.34501120e-02, -1.29444881e-02,
+    -1.17654154e-02, -9.88086732e-03, -7.28003640e-03, -3.97473021e-03,
+     2.50961778e-18,  4.58604422e-03,  9.70324900e-03,  1.52512477e-02,
+     2.11120585e-02,  2.71533724e-02,  3.32324245e-02,  3.92003203e-02,
+     4.49066644e-02,  5.02043309e-02,  5.49542017e-02,  5.90297032e-02,
+     6.23209727e-02,  6.47385023e-02,  6.62161245e-02,  6.67132287e-02,
+     6.62161245e-02,  6.47385023e-02,  6.23209727e-02,  5.90297032e-02,
+     5.49542017e-02,  5.02043309e-02,  4.49066644e-02,  3.92003203e-02,
+     3.32324245e-02,  2.71533724e-02,  2.11120585e-02,  1.52512477e-02,
+     9.70324900e-03,  4.58604422e-03,  2.50961778e-18, -3.97473021e-03,
+    -7.28003640e-03, -9.88086732e-03, -1.17654154e-02, -1.29444881e-02,
+    -1.34501120e-02, -1.33334458e-02, -1.26621006e-02, -1.15169871e-02,
+    -9.98882386e-03, -8.17444895e-03, -6.17308037e-03, -4.08266886e-03,
+    -1.99647619e-03, -2.25189837e-18,  1.83165284e-03,  3.43586351e-03,
+     4.76401273e-03,  5.78237521e-03,  6.47239234e-03,  6.83034270e-03,
+     6.86645399e-03,  6.60352025e-03,  6.07510531e-03,  5.32342672e-03,
+     4.39702277e-03,  3.34830927e-03,  2.23113197e-03,  1.09841545e-03,
+     1.86962369e-18, -1.01925433e-03, -1.92103305e-03, -2.67475555e-03,
+    -3.25835851e-03, -3.65866558e-03, -3.87135231e-03, -3.90053247e-03,
+    -3.75800672e-03, -3.46222687e-03, -3.03703830e-03, -2.51026925e-03,
+    -1.91223839e-03, -1.27425140e-03, -6.27153730e-04, -1.42248266e-18,
+     5.81108062e-04,  1.09397426e-03,  1.52102188e-03,  1.84975249e-03,
+     2.07294546e-03,  2.18860625e-03,  2.19968245e-03,  2.11357591e-03,
+     1.94148675e-03,  1.69763080e-03,  1.39837473e-03,  1.06133447e-03,
+     7.04480871e-04,  3.45293760e-04,  9.74695082e-19, -3.17074654e-04,
+    -5.94017766e-04, -8.21692190e-04, -9.93941563e-04, -1.10764097e-03,
+    -1.16260169e-03, -1.16134522e-03, -1.10876706e-03, -1.01171451e-03,
+    -8.78505232e-04, -7.18414023e-04, -5.41155231e-04, -3.56385965e-04,
+    -1.73252713e-04, -5.81880142e-19,  1.56344667e-04,  2.90200217e-04,
+     3.97571380e-04,  4.76098424e-04,  5.25022155e-04,  5.45072918e-04,
+     5.38295523e-04,  5.07824294e-04,  4.57623849e-04,  3.92211738e-04,
+     3.16378650e-04,  2.34920777e-04,  1.52397076e-04,  7.29218021e-05,
+     2.81006480e-19, -6.35893034e-05, -1.15860365e-04, -1.55639427e-04,
+    -1.82538332e-04, -1.96888686e-04, -1.99643819e-04, -1.92256960e-04,
+    -1.76544567e-04, -1.54543830e-04, -1.28372848e-04, -1.00101113e-04,
+    -7.16366399e-05, -4.46345894e-05, -2.04305583e-05
+])
+
+LTPF_HI = np.array([
+     6.69885837e-03,  3.96711478e-02,  1.06999186e-01,  2.09880463e-01,
+     3.35690625e-01,  4.59220930e-01,  5.50075002e-01,  5.83527575e-01,
+     5.50075002e-01,  4.59220930e-01,  3.35690625e-01,  2.09880463e-01,
+     1.06999186e-01,  3.96711478e-02,  6.69885837e-03
+])
 
 def print_table(t, m=4):
 
@@ -102,6 +171,45 @@ def inv_table():
     print('\n--- inv table ---')
     print_table(np.append(np.zeros(1), 1 / np.arange(1, 28)))
 
+def ltpf_resampler_table():
+
+    for sr in [ 8, 16, 32, 24, 48 ]:
+
+        r = 192 // sr
+        k = 64 if r & (r-1) else 192
+
+        p = (192 // k) * (k // sr)
+        q = p * (0.5 if sr == 8 else 1)
+
+        print('\n--- LTPF resampler {:d} KHz to 12.8 KHz ---'.format(sr))
+
+        h = np.rint(np.append(LTPF_H12K8, 0.) * q * 2**15).astype(int)
+        h = h.reshape((len(h) // p, p)).T
+        h = np.flip(h, axis=0)
+        print('... Gain:', np.max(np.sum(np.abs(h), axis=1)) / 32768.)
+
+        for i in range(0, len(h), 192 // k):
+            for j in range(0, len(h[i]), 10):
+                print('{:5d}, {:5d}, {:5d}, {:5d}, {:5d}, '
+                      '{:5d}, {:5d}, {:5d}, {:5d}, {:5d},'.format(
+                    h[i][j+0], h[i][j+1], h[i][j+2], h[i][j+3], h[i][j+4],
+                    h[i][j+5], h[i][j+6], h[i][j+7], h[i][j+8], h[i][j+9]))
+
+
+def ltpf_interpolate_table():
+
+    print('\n--- LTPF interpolation ---')
+
+    h = np.rint(np.append(LTPF_HI, 0.) * 2**15).astype(int)
+
+    h = h.reshape(len(h) // 4, 4).T
+    h = np.flip(h, axis=0)
+    print('... Gain:', np.max(np.sum(np.abs(h), axis=1)) / 32768.)
+
+    for i in range(len(h)):
+        print('{:5d}, {:5d}, {:5d}, {:5d}'.format(
+            h[i][0], h[i][1], h[i][2], h[i][3]))
+
 
 if __name__ == '__main__':
 
@@ -115,4 +223,7 @@ if __name__ == '__main__':
     tns_quantization_table()
     quant_iq_table()
 
+    ltpf_resampler_table()
+    ltpf_interpolate_table()
+
     print('')
diff --git a/test/arm/ltpf_arm.c b/test/arm/ltpf_arm.c
new file mode 100644
index 0000000..e7b8bfc
--- /dev/null
+++ b/test/arm/ltpf_arm.c
@@ -0,0 +1,114 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "simd32.h"
+
+/* -------------------------------------------------------------------------- */
+
+#define TEST_ARM
+#include <ltpf.c>
+
+void lc3_put_bits_generic(lc3_bits_t *a, unsigned b, int c)
+{ (void)a, (void)b, (void)c; }
+
+unsigned lc3_get_bits_generic(struct lc3_bits *a, int b)
+{ return (void)a, (void)b, 0; }
+
+/* -------------------------------------------------------------------------- */
+
+static int check_resampler()
+{
+    int16_t __x[60+480], *x = __x + 60;
+    for (int i = -60; i < 480; i++)
+        x[i] = rand() & 0xffff;
+
+    struct lc3_ltpf_hp50_state hp50 = { 0 }, hp50_arm = { 0 };
+    int16_t y[128], y_arm[128];
+
+    resample_8k_12k8(&hp50, x, y, 128);
+    arm_resample_8k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_16k_12k8(&hp50, x, y, 128);
+    arm_resample_16k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_24k_12k8(&hp50, x, y, 128);
+    arm_resample_24k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_32k_12k8(&hp50, x, y, 128);
+    arm_resample_32k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_48k_12k8(&hp50, x, y, 128);
+    arm_resample_48k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+static int check_correlate()
+{
+    int16_t alignas(4) a[500], b[500];
+    float y[100], y_arm[100];
+
+    for (int i = 0; i < 500; i++) {
+        a[i] = rand() & 0xffff;
+        b[i] = rand() & 0xffff;
+    }
+
+    correlate(a, b+200, 128, y, 100);
+    arm_correlate(a, b+200, 128, y_arm, 100);
+    if (memcmp(y, y_arm, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 99);
+    arm_correlate(a, b+199, 128, y_arm, 99);
+    if (memcmp(y, y_arm, 99 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 100);
+    arm_correlate(a, b+199, 128, y_arm, 100);
+    if (memcmp(y, y_arm, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+int check_ltpf(void)
+{
+    int ret;
+
+    if ((ret = check_resampler()) < 0)
+        return ret;
+
+    if ((ret = check_correlate()) < 0)
+        return ret;
+
+    return 0;
+}
diff --git a/test/arm/makefile.mk b/test/arm/makefile.mk
new file mode 100644
index 0000000..91d11d2
--- /dev/null
+++ b/test/arm/makefile.mk
@@ -0,0 +1,31 @@
+#
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+test_arm_src += \
+    $(TEST_DIR)/arm/test_arm.c \
+    $(TEST_DIR)/arm/ltpf_arm.c \
+    $(SRC_DIR)/tables.c
+
+test_arm_include += $(SRC_DIR)
+test_arm_ldlibs += m
+
+$(eval $(call add-bin,test_arm))
+
+test_arm: $(test_arm_bin)
+	@echo "  RUN     $(notdir $<)"
+	$(V)$<
+
+test: test_arm
diff --git a/test/arm/simd32.h b/test/arm/simd32.h
new file mode 100644
index 0000000..fd17f71
--- /dev/null
+++ b/test/arm/simd32.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#if __ARM_FEATURE_SIMD32
+
+#include <arm_acle.h>
+
+#else
+#define __ARM_FEATURE_SIMD32 1
+
+#include <stdint.h>
+
+typedef int32_t int16x2_t;
+
+__attribute__((unused))
+static int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
+{
+    uint32_t a_bot = (uint32_t)a & 0x0000ffffu;
+    uint32_t b_top = (uint32_t)b & 0xffff0000u;
+
+    return (int16x2_t)(a_bot | b_top);
+}
+
+__attribute__((unused))
+static int32_t __smlad(int16x2_t a, int16x2_t b, int32_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+
+    return u + (a_hi * b_hi) + (a_lo * b_lo);
+}
+
+__attribute__((unused))
+static int64_t __smlald(int16x2_t a, int16x2_t b, int64_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+    return u + (a_hi * b_hi) + (a_lo * b_lo);
+}
+
+__attribute__((unused))
+static int64_t __smlaldx(int16x2_t a, int16x2_t b, int64_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+    return u + (a_hi * b_lo) + (a_lo * b_hi);
+}
+
+#endif /* __ARM_FEATURE_SIMD32 */
diff --git a/test/arm/test_arm.c b/test/arm/test_arm.c
new file mode 100644
index 0000000..1e5c15b
--- /dev/null
+++ b/test/arm/test_arm.c
@@ -0,0 +1,32 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+
+int check_ltpf(void);
+
+int main()
+{
+    int r, ret = 0;
+
+    printf("Checking LTPF ARM... "); fflush(stdout);
+    printf("%s\n", (r = check_ltpf()) == 0 ? "OK" : "Failed");
+    ret = ret || r;
+
+    return ret;
+}
diff --git a/test/ctypes.h b/test/ctypes.h
index 7eb17b5..5627494 100644
--- a/test/ctypes.h
+++ b/test/ctypes.h
@@ -241,10 +241,10 @@ static PyObject *to_ltpf_hp50_state(
     CTYPES_CHECK("hp50", obj && PyDict_Check(obj));
 
     CTYPES_CHECK("hp50.s1", to_scalar(
-        PyDict_GetItemString(obj, "s1"), NPY_FLOAT, &hp50->s1));
+        PyDict_GetItemString(obj, "s1"), NPY_INT64, &hp50->s1));
 
     CTYPES_CHECK("hp50.s2", to_scalar(
-        PyDict_GetItemString(obj, "s2"), NPY_FLOAT, &hp50->s2));
+        PyDict_GetItemString(obj, "s2"), NPY_INT64, &hp50->s2));
 
     return obj;
 }
@@ -254,10 +254,10 @@ static PyObject *from_ltpf_hp50_state(
     PyObject *obj, const struct lc3_ltpf_hp50_state *hp50)
 {
     PyDict_SetItemString(obj, "s1",
-        new_scalar(NPY_FLOAT, &hp50->s1));
+        new_scalar(NPY_INT64, &hp50->s1));
 
     PyDict_SetItemString(obj, "s2",
-        new_scalar(NPY_FLOAT, &hp50->s2));
+        new_scalar(NPY_INT64, &hp50->s2));
 
     return obj;
 }
@@ -267,8 +267,8 @@ static PyObject *to_ltpf_analysis(
     PyObject *obj, struct lc3_ltpf_analysis *ltpf)
 {
     PyObject *nc_obj, *x_12k8_obj, *x_6k4_obj;
-    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(float);
-    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(float);
+    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
+    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);
 
     CTYPES_CHECK("ltpf", obj && PyDict_Check(obj));
 
@@ -286,11 +286,11 @@ static PyObject *to_ltpf_analysis(
         PyDict_GetItemString(obj, "hp50"), &ltpf->hp50));
 
     CTYPES_CHECK("ltpf.x_12k8", x_12k8_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "x_12k8"), NPY_FLOAT, ltpf->x_12k8, n_12k8));
+        PyDict_GetItemString(obj, "x_12k8"), NPY_INT16, ltpf->x_12k8, n_12k8));
     PyDict_SetItemString(obj, "x_12k8", x_12k8_obj);
 
     CTYPES_CHECK("ltpf.x_6k4", x_6k4_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "x_6k4"), NPY_FLOAT, ltpf->x_6k4, n_6k4));
+        PyDict_GetItemString(obj, "x_6k4"), NPY_INT16, ltpf->x_6k4, n_6k4));
     PyDict_SetItemString(obj, "x_6k4", x_6k4_obj);
 
     CTYPES_CHECK("ltpf.tc", to_scalar(
@@ -303,8 +303,8 @@ __attribute__((unused))
 static PyObject *from_ltpf_analysis(
     PyObject *obj, const struct lc3_ltpf_analysis *ltpf)
 {
-    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(float);
-    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(float);
+    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
+    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);
 
     if (!obj) obj = PyDict_New();
 
@@ -321,10 +321,10 @@ static PyObject *from_ltpf_analysis(
         from_ltpf_hp50_state(PyDict_New(), &ltpf->hp50));
 
     PyDict_SetItemString(obj, "x_12k8",
-        new_1d_copy(NPY_FLOAT, n_12k8, &ltpf->x_12k8));
+        new_1d_copy(NPY_INT16, n_12k8, &ltpf->x_12k8));
 
     PyDict_SetItemString(obj, "x_6k4",
-        new_1d_copy(NPY_FLOAT, n_6k4, &ltpf->x_6k4));
+        new_1d_copy(NPY_INT16, n_6k4, &ltpf->x_6k4));
 
     PyDict_SetItemString(obj, "tc",
         new_scalar(NPY_INT, &ltpf->tc));
@@ -703,6 +703,7 @@ static PyObject *from_encoder(PyObject *obj, const struct lc3_encoder *enc)
     unsigned sr_pcm = enc->sr_pcm;
     int ns = LC3_NS(dt, sr);
     int nd = LC3_ND(dt, sr);
+    int nt = LC3_NT(sr);
 
     if (!obj) obj = PyDict_New();
 
@@ -724,6 +725,9 @@ static PyObject *from_encoder(PyObject *obj, const struct lc3_encoder *enc)
     PyDict_SetItemString(obj, "quant",
         from_spec_analysis(NULL, &enc->spec));
 
+    PyDict_SetItemString(obj, "xt",
+        new_1d_copy(NPY_INT16, nt+ns, enc->xt-nt));
+
     PyDict_SetItemString(obj, "xs",
         new_1d_copy(NPY_FLOAT, ns+nd, enc->xs-nd));
 
@@ -737,7 +741,7 @@ __attribute__((unused))
 static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)
 {
     unsigned dt, sr, sr_pcm;
-    PyObject *xs_obj, *xf_obj;
+    PyObject *xt_obj, *xs_obj, *xf_obj;
 
     CTYPES_CHECK("encoder", obj && PyDict_Check(obj));
 
@@ -756,6 +760,7 @@ static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)
 
     int ns = LC3_NS(dt, sr);
     int nd = LC3_ND(dt, sr);
+    int nt = LC3_NT(sr);
 
     CTYPES_CHECK(NULL, to_attdet_analysis(
         PyDict_GetItemString(obj, "attdet"), &enc->attdet));
@@ -766,6 +771,10 @@ static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)
     CTYPES_CHECK(NULL, to_spec_analysis(
         PyDict_GetItemString(obj, "quant"), &enc->spec));
 
+    CTYPES_CHECK("encoder.xt", xt_obj = to_1d_copy(
+        PyDict_GetItemString(obj, "xt"), NPY_INT16, enc->xt-nt, ns+nt));
+    PyDict_SetItemString(obj, "xt", xt_obj);
+
     CTYPES_CHECK("encoder.xs", xs_obj = to_1d_copy(
         PyDict_GetItemString(obj, "xs"), NPY_FLOAT, enc->xs-nd, ns+nd));
     PyDict_SetItemString(obj, "xs", xs_obj);
@@ -782,8 +791,8 @@ static PyObject *from_decoder(PyObject *obj, const struct lc3_decoder *dec)
 {
     unsigned dt = dec->dt, sr = dec->sr;
     unsigned sr_pcm = dec->sr_pcm;
-    unsigned xs_pos = dec->xs - dec->xr;
-    int nr = LC3_NR(dt, sr);
+    unsigned xs_pos = dec->xs - dec->xh;
+    int nh = LC3_NH(dt, sr);
     int ns = LC3_NS(dt, sr);
     int nd = LC3_ND(dt, sr);
 
@@ -804,8 +813,8 @@ static PyObject *from_decoder(PyObject *obj, const struct lc3_decoder *dec)
     PyDict_SetItemString(obj, "plc",
         new_plc_state(&dec->plc));
 
-    PyDict_SetItemString(obj, "xr",
-        new_1d_copy(NPY_FLOAT, nr, dec->xr));
+    PyDict_SetItemString(obj, "xh",
+        new_1d_copy(NPY_FLOAT, nh, dec->xh));
 
     PyDict_SetItemString(obj, "xs_pos",
         new_scalar(NPY_INT, &xs_pos));
@@ -823,7 +832,7 @@ __attribute__((unused))
 static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
 {
     unsigned dt, sr, sr_pcm, xs_pos;
-    PyObject *xr_obj, *xd_obj, *xg_obj;
+    PyObject *xh_obj, *xd_obj, *xg_obj;
 
     CTYPES_CHECK("decoder", obj && PyDict_Check(obj));
 
@@ -840,7 +849,7 @@ static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
     CTYPES_CHECK("decoder.sr_pcm",
         (unsigned)(dec->sr_pcm = sr_pcm) < LC3_NUM_SRATE);
 
-    int nr = LC3_NR(dt, sr);
+    int nh = LC3_NH(dt, sr);
     int ns = LC3_NS(dt, sr);
     int nd = LC3_ND(dt, sr);
 
@@ -850,13 +859,13 @@ static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
     CTYPES_CHECK(NULL, to_plc_state(
         PyDict_GetItemString(obj, "plc"), &dec->plc));
 
-    CTYPES_CHECK("decoder.xr", xr_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "xr"), NPY_FLOAT, dec->xr, nr));
-    PyDict_SetItemString(obj, "xr", xr_obj);
+    CTYPES_CHECK("decoder.xh", xh_obj = to_1d_copy(
+        PyDict_GetItemString(obj, "xh"), NPY_FLOAT, dec->xh, nh));
+    PyDict_SetItemString(obj, "xh", xh_obj);
 
     CTYPES_CHECK("decoder.xs", to_scalar(
         PyDict_GetItemString(obj, "xs_pos"), NPY_INT, &xs_pos));
-    dec->xs = dec->xr + xs_pos;
+    dec->xs = dec->xh + xs_pos;
 
     CTYPES_CHECK("decoder.xd", xd_obj = to_1d_copy(
         PyDict_GetItemString(obj, "xd"), NPY_FLOAT, dec->xd, nd));
diff --git a/test/ltpf.py b/test/ltpf.py
index a159da7..1a852c8 100644
--- a/test/ltpf.py
+++ b/test/ltpf.py
@@ -120,7 +120,7 @@ class Resampler_6k4:
 
 
 def initial_hp50_state():
-    return { 's1': 0.0, 's2': 0.0 }
+    return { 's1': 0, 's2': 0 }
 
 ### ------------------------------------------------------------------------ ###
 
@@ -442,25 +442,25 @@ def initial_sstate():
 def check_resampler(rng, dt, sr):
 
     ns = T.NS[dt][sr]
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
     ok = True
 
     r = Resampler_12k8(dt, sr)
 
     hp50_c = initial_hp50_state()
-    x_c = np.zeros(nd)
+    x_c = np.zeros(nt)
     y_c = np.zeros(384)
 
     for run in range(10):
 
-        x = (2 * rng.random(ns)) - 1
+        x = ((2 * rng.random(ns)) - 1) * (2 ** 15 - 1)
         y = r.resample(x)
 
-        x_c = np.append(x_c[-nd:], x)
+        x_c = np.append(x_c[-nt:], x.astype(np.int16))
         y_c[:-r.n] = y_c[r.n:]
         y_c = lc3.ltpf_resample(dt, sr, hp50_c, x_c, y_c)
 
-        ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n])) < 1e-4
+        ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n]/2)) < 4
 
     return ok
 
@@ -469,54 +469,54 @@ def check_resampler_appendix_c(dt):
     sr = T.SRATE_16K
     ok = True
 
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
     n  = [ 96, 128 ][dt]
     k  = [ 44,  24 ][dt] + n
 
     state = initial_hp50_state()
 
-    x = np.append(np.zeros(nd), C.X_PCM[dt][0])
+    x = np.append(np.zeros(nt), C.X_PCM[dt][0])
     y = np.zeros(384)
     y = lc3.ltpf_resample(dt, sr, state, x, y)
     u = y[-k:len(C.X_TILDE_12K8D[dt][0])-k]
 
-    ok = np.amax(np.abs(u - C.X_TILDE_12K8D[dt][0])) < 1e0
+    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][0]/2)) < 2
 
-    x = np.append(x[-nd:], C.X_PCM[dt][1])
+    x = np.append(x[-nt:], C.X_PCM[dt][1])
     y[:-n] = y[n:]
     y = lc3.ltpf_resample(dt, sr, state, x, y)
     u = y[-k:len(C.X_TILDE_12K8D[dt][1])-k]
 
-    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][1])) < 1e0
+    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][1]/2)) < 2
 
     return ok
 
 def check_analysis(rng, dt, sr):
 
     ns = T.NS[dt][sr]
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
     ok = True
 
     state_c = initial_state()
-    x_c = np.zeros(ns+nd)
+    x_c = np.zeros(ns+nt)
 
     ltpf = LtpfAnalysis(dt, sr)
 
     t = np.arange(100 * ns) / (T.SRATE_KHZ[sr] * 1000)
-    s = signal.chirp(t, f0=50, f1=3e3, t1=t[-1], method='logarithmic')
+    s = signal.chirp(t, f0=10, f1=3e3, t1=t[-1], method='logarithmic')
 
     for i in range(20):
 
-        x = s[i*ns:(i+1)*ns]
+        x = s[i*ns:(i+1)*ns] * (2 ** 15 - 1)
 
         pitch_present = ltpf.run(x)
         data = ltpf.get_data()
 
-        x_c = np.append(x_c[-nd:], x)
+        x_c = np.append(x_c[-nt:], x.astype(np.int16))
         (pitch_present_c, data_c) = lc3.ltpf_analyse(dt, sr, state_c, x_c)
 
-        ok = ok and state_c['tc'] == ltpf.tc
-        ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-4
+        ok = ok and (not pitch_present or state_c['tc'] == ltpf.tc)
+        ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-2
         ok = ok and pitch_present_c == pitch_present
         ok = ok and data_c['active'] == data['active']
         ok = ok and data_c['pitch_index'] == data['pitch_index']
@@ -564,12 +564,12 @@ def check_synthesis(rng, dt, sr):
 def check_analysis_appendix_c(dt):
 
     sr = T.SRATE_16K
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
     ok = True
 
     state = initial_state()
 
-    x = np.append(np.zeros(nd), C.X_PCM[dt][0])
+    x = np.append(np.zeros(nt), C.X_PCM[dt][0])
     (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)
 
     ok = ok and C.T_CURR[dt][0] - state['tc'] == 17
@@ -578,7 +578,7 @@ def check_analysis_appendix_c(dt):
     ok = ok and data['pitch_index'] == C.PITCH_INDEX[dt][0]
     ok = ok and data['active'] == C.LTPF_ACTIVE[dt][0]
 
-    x = np.append(x[-nd:], C.X_PCM[dt][1])
+    x = np.append(x[-nt:], C.X_PCM[dt][1])
     (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)
 
     ok = ok and C.T_CURR[dt][1] - state['tc'] == 17
diff --git a/test/ltpf_py.c b/test/ltpf_py.c
index 427dbb9..c51eadd 100644
--- a/test/ltpf_py.c
+++ b/test/ltpf_py.c
@@ -27,7 +27,7 @@ static PyObject *resample_py(PyObject *m, PyObject *args)
     unsigned dt, sr;
     PyObject *hp50_obj, *x_obj, *y_obj;
     struct lc3_ltpf_hp50_state hp50;
-    float *x, *y;
+    int16_t *x, *y;
 
     if (!PyArg_ParseTuple(args, "IIOOO", &dt, &sr, &hp50_obj, &x_obj, &y_obj))
         return NULL;
@@ -36,14 +36,14 @@ static PyObject *resample_py(PyObject *m, PyObject *args)
     CTYPES_CHECK("sr", (unsigned)sr < LC3_NUM_SRATE);
     CTYPES_CHECK(NULL, hp50_obj = to_ltpf_hp50_state(hp50_obj, &hp50));
 
-    int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
-    int ny = sizeof((struct lc3_ltpf_analysis){ }.x_12k8) / sizeof(float);
+    int ns = LC3_NS(dt, sr), nt = LC3_NT(dt);
+    int ny = sizeof((struct lc3_ltpf_analysis){ }.x_12k8) / sizeof(int16_t);
     int n  = dt == LC3_DT_7M5 ? 96 : 128;
 
-    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_FLOAT, ns+nd, &x));
-    CTYPES_CHECK("y", y_obj = to_1d_ptr(y_obj, NPY_FLOAT, ny, &y));
+    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_INT16, ns+nt, &x));
+    CTYPES_CHECK("y", y_obj = to_1d_ptr(y_obj, NPY_INT16, ny, &y));
 
-    resample_12k8[sr](&hp50, x + nd, y + (ny - n), n);
+    resample_12k8[sr](&hp50, x + nt, y + (ny - n), n);
 
     from_ltpf_hp50_state(hp50_obj, &hp50);
     return Py_BuildValue("O", y_obj);
@@ -55,7 +55,7 @@ static PyObject *analyse_py(PyObject *m, PyObject *args)
     unsigned dt, sr;
     struct lc3_ltpf_analysis ltpf;
     struct lc3_ltpf_data data = { 0 };
-    float *x;
+    int16_t *x;
 
     if (!PyArg_ParseTuple(args, "IIOO", &dt, &sr, &ltpf_obj, &x_obj))
         return NULL;
@@ -64,12 +64,12 @@ static PyObject *analyse_py(PyObject *m, PyObject *args)
     CTYPES_CHECK("sr", sr < LC3_NUM_SRATE);
     CTYPES_CHECK(NULL, ltpf_obj = to_ltpf_analysis(ltpf_obj, &ltpf));
 
-    int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
+    int ns = LC3_NS(dt, sr), nt = LC3_NT(sr);
 
-    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_FLOAT, ns+nd, &x));
+    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_INT16, ns+nt, &x));
 
     int pitch_present =
-        lc3_ltpf_analyse(dt, sr, &ltpf, x + nd, &data);
+        lc3_ltpf_analyse(dt, sr, &ltpf, x + nt, &data);
 
     from_ltpf_analysis(ltpf_obj, &ltpf);
     return Py_BuildValue("iN", pitch_present, new_ltpf_data(&data));
diff --git a/test/makefile.mk b/test/makefile.mk
index c2ae83b..cfced65 100644
--- a/test/makefile.mk
+++ b/test/makefile.mk
@@ -16,12 +16,17 @@
 
 TEST_DIR := test
 
+test_py:
+	$(V)cd $(TEST_DIR) && python3 setup.py && python3 run.py
+
 .PHONY: test test-clean
 
-test:
-	$(V)cd $(TEST_DIR) && python3 setup.py && python3 run.py
+test: test_py
 
 test-clean:
 	$(V)cd $(TEST_DIR) && python3 setup.py clean > /tmp/zero
 
+-include $(TEST_DIR)/arm/makefile.mk
+-include $(TEST_DIR)/neon/makefile.mk
+
 clean-all: test-clean
diff --git a/test/neon/ltpf_neon.c b/test/neon/ltpf_neon.c
new file mode 100644
index 0000000..0577bd1
--- /dev/null
+++ b/test/neon/ltpf_neon.c
@@ -0,0 +1,116 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include "neon.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/* -------------------------------------------------------------------------- */
+
+#define TEST_NEON
+#include <ltpf.c>
+
+void lc3_put_bits_generic(lc3_bits_t *a, unsigned b, int c)
+{ (void)a, (void)b, (void)c; }
+
+unsigned lc3_get_bits_generic(struct lc3_bits *a, int b)
+{ return (void)a, (void)b, 0; }
+
+/* -------------------------------------------------------------------------- */
+
+static int check_resampler()
+{
+    int16_t __x[60+480], *x = __x + 60;
+    for (int i = -60; i < 480; i++)
+          x[i] = rand() & 0xffff;
+
+    struct lc3_ltpf_hp50_state hp50 = { 0 }, hp50_neon = { 0 };
+    int16_t y[128], y_neon[128];
+
+    resample_16k_12k8(&hp50, x, y, 128);
+    neon_resample_16k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return printf("Error\n"), -1;
+
+    resample_32k_12k8(&hp50, x, y, 128);
+    neon_resample_32k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return printf("Error\n"), -1;
+
+    resample_48k_12k8(&hp50, x, y, 128);
+    neon_resample_48k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+static int check_dot()
+{
+    int16_t x[200];
+    for (int i = 0; i < 200; i++)
+        x[i] = rand() & 0xffff;
+
+    float y = dot(x, x+3, 128);
+    float y_neon = neon_dot(x, x+3, 128);
+    if (y != y_neon)
+        return -1;
+
+    return 0;
+}
+
+static int check_correlate()
+{
+    int16_t alignas(4) a[500], b[500];
+    float y[100], y_neon[100];
+
+    for (int i = 0; i < 500; i++) {
+        a[i] = rand() & 0xffff;
+        b[i] = rand() & 0xffff;
+    }
+
+    correlate(a, b+200, 128, y, 100);
+    neon_correlate(a, b+200, 128, y_neon, 100);
+    if (memcmp(y, y_neon, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 99);
+    neon_correlate(a, b+199, 128, y_neon, 99);
+    if (memcmp(y, y_neon, 99 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+int check_ltpf(void)
+{
+    int ret;
+
+    if ((ret = check_resampler()) < 0)
+        return ret;
+
+    if ((ret = check_dot()) < 0)
+        return ret;
+
+    if ((ret = check_correlate()) < 0)
+        return ret;
+
+    return 0;
+}
diff --git a/test/neon/makefile.mk b/test/neon/makefile.mk
new file mode 100644
index 0000000..c01e70f
--- /dev/null
+++ b/test/neon/makefile.mk
@@ -0,0 +1,31 @@
+#
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+test_neon_src += \
+    $(TEST_DIR)/neon/test_neon.c \
+    $(TEST_DIR)/neon/ltpf_neon.c \
+    $(SRC_DIR)/tables.c
+
+test_neon_include += $(SRC_DIR)
+test_neon_ldlibs += m
+
+$(eval $(call add-bin,test_neon))
+
+test_neon: $(test_neon_bin)
+	@echo "  RUN     $(notdir $<)"
+	$(V)$<
+
+test: test_neon
diff --git a/test/neon/neon.h b/test/neon/neon.h
new file mode 100644
index 0000000..4015ca5
--- /dev/null
+++ b/test/neon/neon.h
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#if __ARM_NEON
+
+#include <arm_neon.h>
+
+#else
+#define __ARM_NEON 1
+
+#include <stdint.h>
+
+typedef struct { int16_t e[4]; } int16x4_t;
+
+typedef struct { int16_t e[8]; } int16x8_t;
+typedef struct { int32_t e[4]; } int32x4_t;
+typedef struct { int64_t e[2]; } int64x2_t;
+
+
+/* ----------------------------------------------------------------------------
+ *  Load / Store
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int16x4_t vld1_s16(const int16_t *p)
+{
+    int16x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = *(p++);
+
+    return r;
+}
+
+__attribute__((unused))
+static int64x2_t vmovq_n_s64(int64_t v)
+{
+    int64x2_t r;
+
+    r.e[0] = v;
+    r.e[1] = v;
+
+    return r;
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Move
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32x4_t vmovq_n_s32(uint32_t v)
+{
+    int32x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = v;
+
+    return r;
+}
+
+__attribute__((unused))
+static int16x4_t vext_s16(int16x4_t a, int16x4_t b, const int n)
+{
+    int16x4_t r;
+    int i = 0;
+
+    for (; i < n; i++) r.e[3-i] = b.e[(n-1)-i];
+    for (; i < 4; i++) r.e[3-i] = a.e[3-(i-n)];
+
+    return r;
+}
+
+/* ----------------------------------------------------------------------------
+ *  Arithmetic
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32x4_t vmull_s16(int16x4_t a, int16x4_t b)
+{
+    int32x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = (int32_t)a.e[i] * b.e[i];
+
+    return r;
+}
+
+__attribute__((unused))
+static int32x4_t vmlal_s16(int32x4_t r, int16x4_t a, int16x4_t b)
+{
+    for (int i = 0; i < 4; i++)
+        r.e[i] += (int32_t)a.e[i] * b.e[i];
+
+    return r;
+}
+
+__attribute__((unused))
+static int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)
+{
+    int64x2_t r;
+
+    r.e[0] = a.e[0] + ((int64_t)b.e[0] + b.e[1]);
+    r.e[1] = a.e[1] + ((int64_t)b.e[2] + b.e[3]);
+
+    return r;
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Reduce
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32_t vaddvq_s32(int32x4_t v)
+{
+    return v.e[0] + v.e[1] + v.e[2] + v.e[3];
+}
+
+__attribute__((unused))
+static int64_t vaddvq_s64(int64x2_t v)
+{
+    return v.e[0] + v.e[1];
+}
+
+#endif /* __ARM_NEON */
diff --git a/test/neon/test_neon.c b/test/neon/test_neon.c
new file mode 100644
index 0000000..af9bd98
--- /dev/null
+++ b/test/neon/test_neon.c
@@ -0,0 +1,32 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+
+int check_ltpf(void);
+
+int main()
+{
+    int r, ret = 0;
+
+    printf("Checking LTPF Neon... "); fflush(stdout);
+    printf("%s\n", (r = check_ltpf()) == 0 ? "OK" : "Failed");
+    ret = ret || r;
+
+    return ret;
+}