ltpf: Move analysis to fixed point

2026-06-02 09:57:02 +00:00 · 2022-05-04 14:45:20 +02:00
parent e0efd79390
commit e471e43aeb
22 changed files with 1902 additions and 345 deletions
@@ -0,0 +1,114 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "simd32.h"
+
+/* -------------------------------------------------------------------------- */
+
+#define TEST_ARM
+#include <ltpf.c>
+
+void lc3_put_bits_generic(lc3_bits_t *a, unsigned b, int c)
+{ (void)a, (void)b, (void)c; }
+
+unsigned lc3_get_bits_generic(struct lc3_bits *a, int b)
+{ return (void)a, (void)b, 0; }
+
+/* -------------------------------------------------------------------------- */
+
+static int check_resampler()
+{
+    int16_t __x[60+480], *x = __x + 60;
+    for (int i = -60; i < 480; i++)
+        x[i] = rand() & 0xffff;
+
+    struct lc3_ltpf_hp50_state hp50 = { 0 }, hp50_arm = { 0 };
+    int16_t y[128], y_arm[128];
+
+    resample_8k_12k8(&hp50, x, y, 128);
+    arm_resample_8k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_16k_12k8(&hp50, x, y, 128);
+    arm_resample_16k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_24k_12k8(&hp50, x, y, 128);
+    arm_resample_24k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_32k_12k8(&hp50, x, y, 128);
+    arm_resample_32k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    resample_48k_12k8(&hp50, x, y, 128);
+    arm_resample_48k_12k8(&hp50_arm, x, y_arm, 128);
+    if (memcmp(y, y_arm, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+static int check_correlate()
+{
+    int16_t alignas(4) a[500], b[500];
+    float y[100], y_arm[100];
+
+    for (int i = 0; i < 500; i++) {
+        a[i] = rand() & 0xffff;
+        b[i] = rand() & 0xffff;
+    }
+
+    correlate(a, b+200, 128, y, 100);
+    arm_correlate(a, b+200, 128, y_arm, 100);
+    if (memcmp(y, y_arm, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 99);
+    arm_correlate(a, b+199, 128, y_arm, 99);
+    if (memcmp(y, y_arm, 99 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 100);
+    arm_correlate(a, b+199, 128, y_arm, 100);
+    if (memcmp(y, y_arm, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+int check_ltpf(void)
+{
+    int ret;
+
+    if ((ret = check_resampler()) < 0)
+        return ret;
+
+    if ((ret = check_correlate()) < 0)
+        return ret;
+
+    return 0;
+}
@@ -0,0 +1,31 @@
+#
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+test_arm_src += \
+    $(TEST_DIR)/arm/test_arm.c \
+    $(TEST_DIR)/arm/ltpf_arm.c \
+    $(SRC_DIR)/tables.c
+
+test_arm_include += $(SRC_DIR)
+test_arm_ldlibs += m
+
+$(eval $(call add-bin,test_arm))
+
+test_arm: $(test_arm_bin)
+	@echo "  RUN     $(notdir $<)"
+	$(V)$<
+
+test: test_arm
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#if __ARM_FEATURE_SIMD32
+
+#include <arm_acle.h>
+
+#else
+#define __ARM_FEATURE_SIMD32 1
+
+#include <stdint.h>
+
+typedef int32_t int16x2_t;
+
+__attribute__((unused))
+static int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
+{
+    uint32_t a_bot = (uint32_t)a & 0x0000ffffu;
+    uint32_t b_top = (uint32_t)b & 0xffff0000u;
+
+    return (int16x2_t)(a_bot | b_top);
+}
+
+__attribute__((unused))
+static int32_t __smlad(int16x2_t a, int16x2_t b, int32_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+
+    return u + (a_hi * b_hi) + (a_lo * b_lo);
+}
+
+__attribute__((unused))
+static int64_t __smlald(int16x2_t a, int16x2_t b, int64_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+    return u + (a_hi * b_hi) + (a_lo * b_lo);
+}
+
+__attribute__((unused))
+static int64_t __smlaldx(int16x2_t a, int16x2_t b, int64_t u)
+{
+    int16_t a_hi = a >> 16, a_lo = a & 0xffff;
+    int16_t b_hi = b >> 16, b_lo = b & 0xffff;
+    return u + (a_hi * b_lo) + (a_lo * b_hi);
+}
+
+#endif /* __ARM_FEATURE_SIMD32 */
@@ -0,0 +1,32 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+
+int check_ltpf(void);
+
+int main()
+{
+    int r, ret = 0;
+
+    printf("Checking LTPF ARM... "); fflush(stdout);
+    printf("%s\n", (r = check_ltpf()) == 0 ? "OK" : "Failed");
+    ret = ret || r;
+
+    return ret;
+}
@@ -241,10 +241,10 @@ static PyObject *to_ltpf_hp50_state(
    CTYPES_CHECK("hp50", obj && PyDict_Check(obj));

    CTYPES_CHECK("hp50.s1", to_scalar(
-        PyDict_GetItemString(obj, "s1"), NPY_FLOAT, &hp50->s1));
+        PyDict_GetItemString(obj, "s1"), NPY_INT64, &hp50->s1));

    CTYPES_CHECK("hp50.s2", to_scalar(
-        PyDict_GetItemString(obj, "s2"), NPY_FLOAT, &hp50->s2));
+        PyDict_GetItemString(obj, "s2"), NPY_INT64, &hp50->s2));

    return obj;
 }
@@ -254,10 +254,10 @@ static PyObject *from_ltpf_hp50_state(
    PyObject *obj, const struct lc3_ltpf_hp50_state *hp50)
 {
    PyDict_SetItemString(obj, "s1",
-        new_scalar(NPY_FLOAT, &hp50->s1));
+        new_scalar(NPY_INT64, &hp50->s1));

    PyDict_SetItemString(obj, "s2",
-        new_scalar(NPY_FLOAT, &hp50->s2));
+        new_scalar(NPY_INT64, &hp50->s2));

    return obj;
 }
@@ -267,8 +267,8 @@ static PyObject *to_ltpf_analysis(
    PyObject *obj, struct lc3_ltpf_analysis *ltpf)
 {
    PyObject *nc_obj, *x_12k8_obj, *x_6k4_obj;
-    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(float);
-    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(float);
+    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
+    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);

    CTYPES_CHECK("ltpf", obj && PyDict_Check(obj));

@@ -286,11 +286,11 @@ static PyObject *to_ltpf_analysis(
        PyDict_GetItemString(obj, "hp50"), &ltpf->hp50));

    CTYPES_CHECK("ltpf.x_12k8", x_12k8_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "x_12k8"), NPY_FLOAT, ltpf->x_12k8, n_12k8));
+        PyDict_GetItemString(obj, "x_12k8"), NPY_INT16, ltpf->x_12k8, n_12k8));
    PyDict_SetItemString(obj, "x_12k8", x_12k8_obj);

    CTYPES_CHECK("ltpf.x_6k4", x_6k4_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "x_6k4"), NPY_FLOAT, ltpf->x_6k4, n_6k4));
+        PyDict_GetItemString(obj, "x_6k4"), NPY_INT16, ltpf->x_6k4, n_6k4));
    PyDict_SetItemString(obj, "x_6k4", x_6k4_obj);

    CTYPES_CHECK("ltpf.tc", to_scalar(
@@ -303,8 +303,8 @@ __attribute__((unused))
 static PyObject *from_ltpf_analysis(
    PyObject *obj, const struct lc3_ltpf_analysis *ltpf)
 {
-    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(float);
-    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(float);
+    const int n_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
+    const int n_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);

    if (!obj) obj = PyDict_New();

@@ -321,10 +321,10 @@ static PyObject *from_ltpf_analysis(
        from_ltpf_hp50_state(PyDict_New(), &ltpf->hp50));

    PyDict_SetItemString(obj, "x_12k8",
-        new_1d_copy(NPY_FLOAT, n_12k8, &ltpf->x_12k8));
+        new_1d_copy(NPY_INT16, n_12k8, &ltpf->x_12k8));

    PyDict_SetItemString(obj, "x_6k4",
-        new_1d_copy(NPY_FLOAT, n_6k4, &ltpf->x_6k4));
+        new_1d_copy(NPY_INT16, n_6k4, &ltpf->x_6k4));

    PyDict_SetItemString(obj, "tc",
        new_scalar(NPY_INT, &ltpf->tc));
@@ -703,6 +703,7 @@ static PyObject *from_encoder(PyObject *obj, const struct lc3_encoder *enc)
    unsigned sr_pcm = enc->sr_pcm;
    int ns = LC3_NS(dt, sr);
    int nd = LC3_ND(dt, sr);
+    int nt = LC3_NT(sr);

    if (!obj) obj = PyDict_New();

@@ -724,6 +725,9 @@ static PyObject *from_encoder(PyObject *obj, const struct lc3_encoder *enc)
    PyDict_SetItemString(obj, "quant",
        from_spec_analysis(NULL, &enc->spec));

+    PyDict_SetItemString(obj, "xt",
+        new_1d_copy(NPY_INT16, nt+ns, enc->xt-nt));
+
    PyDict_SetItemString(obj, "xs",
        new_1d_copy(NPY_FLOAT, ns+nd, enc->xs-nd));

@@ -737,7 +741,7 @@ __attribute__((unused))
 static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)
 {
    unsigned dt, sr, sr_pcm;
-    PyObject *xs_obj, *xf_obj;
+    PyObject *xt_obj, *xs_obj, *xf_obj;

    CTYPES_CHECK("encoder", obj && PyDict_Check(obj));

@@ -756,6 +760,7 @@ static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)

    int ns = LC3_NS(dt, sr);
    int nd = LC3_ND(dt, sr);
+    int nt = LC3_NT(sr);

    CTYPES_CHECK(NULL, to_attdet_analysis(
        PyDict_GetItemString(obj, "attdet"), &enc->attdet));
@@ -766,6 +771,10 @@ static PyObject *to_encoder(PyObject *obj, struct lc3_encoder *enc)
    CTYPES_CHECK(NULL, to_spec_analysis(
        PyDict_GetItemString(obj, "quant"), &enc->spec));

+    CTYPES_CHECK("encoder.xt", xt_obj = to_1d_copy(
+        PyDict_GetItemString(obj, "xt"), NPY_INT16, enc->xt-nt, ns+nt));
+    PyDict_SetItemString(obj, "xt", xt_obj);
+
    CTYPES_CHECK("encoder.xs", xs_obj = to_1d_copy(
        PyDict_GetItemString(obj, "xs"), NPY_FLOAT, enc->xs-nd, ns+nd));
    PyDict_SetItemString(obj, "xs", xs_obj);
@@ -782,8 +791,8 @@ static PyObject *from_decoder(PyObject *obj, const struct lc3_decoder *dec)
 {
    unsigned dt = dec->dt, sr = dec->sr;
    unsigned sr_pcm = dec->sr_pcm;
-    unsigned xs_pos = dec->xs - dec->xr;
-    int nr = LC3_NR(dt, sr);
+    unsigned xs_pos = dec->xs - dec->xh;
+    int nh = LC3_NH(dt, sr);
    int ns = LC3_NS(dt, sr);
    int nd = LC3_ND(dt, sr);

@@ -804,8 +813,8 @@ static PyObject *from_decoder(PyObject *obj, const struct lc3_decoder *dec)
    PyDict_SetItemString(obj, "plc",
        new_plc_state(&dec->plc));

-    PyDict_SetItemString(obj, "xr",
-        new_1d_copy(NPY_FLOAT, nr, dec->xr));
+    PyDict_SetItemString(obj, "xh",
+        new_1d_copy(NPY_FLOAT, nh, dec->xh));

    PyDict_SetItemString(obj, "xs_pos",
        new_scalar(NPY_INT, &xs_pos));
@@ -823,7 +832,7 @@ __attribute__((unused))
 static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
 {
    unsigned dt, sr, sr_pcm, xs_pos;
-    PyObject *xr_obj, *xd_obj, *xg_obj;
+    PyObject *xh_obj, *xd_obj, *xg_obj;

    CTYPES_CHECK("decoder", obj && PyDict_Check(obj));

@@ -840,7 +849,7 @@ static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
    CTYPES_CHECK("decoder.sr_pcm",
        (unsigned)(dec->sr_pcm = sr_pcm) < LC3_NUM_SRATE);

-    int nr = LC3_NR(dt, sr);
+    int nh = LC3_NH(dt, sr);
    int ns = LC3_NS(dt, sr);
    int nd = LC3_ND(dt, sr);

@@ -850,13 +859,13 @@ static PyObject *to_decoder(PyObject *obj, struct lc3_decoder *dec)
    CTYPES_CHECK(NULL, to_plc_state(
        PyDict_GetItemString(obj, "plc"), &dec->plc));

-    CTYPES_CHECK("decoder.xr", xr_obj = to_1d_copy(
-        PyDict_GetItemString(obj, "xr"), NPY_FLOAT, dec->xr, nr));
-    PyDict_SetItemString(obj, "xr", xr_obj);
+    CTYPES_CHECK("decoder.xh", xh_obj = to_1d_copy(
+        PyDict_GetItemString(obj, "xh"), NPY_FLOAT, dec->xh, nh));
+    PyDict_SetItemString(obj, "xh", xh_obj);

    CTYPES_CHECK("decoder.xs", to_scalar(
        PyDict_GetItemString(obj, "xs_pos"), NPY_INT, &xs_pos));
-    dec->xs = dec->xr + xs_pos;
+    dec->xs = dec->xh + xs_pos;

    CTYPES_CHECK("decoder.xd", xd_obj = to_1d_copy(
        PyDict_GetItemString(obj, "xd"), NPY_FLOAT, dec->xd, nd));
@@ -120,7 +120,7 @@ class Resampler_6k4:


 def initial_hp50_state():
-    return { 's1': 0.0, 's2': 0.0 }
+    return { 's1': 0, 's2': 0 }

 ### ------------------------------------------------------------------------ ###

@@ -442,25 +442,25 @@ def initial_sstate():
 def check_resampler(rng, dt, sr):

    ns = T.NS[dt][sr]
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
    ok = True

    r = Resampler_12k8(dt, sr)

    hp50_c = initial_hp50_state()
-    x_c = np.zeros(nd)
+    x_c = np.zeros(nt)
    y_c = np.zeros(384)

    for run in range(10):

-        x = (2 * rng.random(ns)) - 1
+        x = ((2 * rng.random(ns)) - 1) * (2 ** 15 - 1)
        y = r.resample(x)

-        x_c = np.append(x_c[-nd:], x)
+        x_c = np.append(x_c[-nt:], x.astype(np.int16))
        y_c[:-r.n] = y_c[r.n:]
        y_c = lc3.ltpf_resample(dt, sr, hp50_c, x_c, y_c)

-        ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n])) < 1e-4
+        ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n]/2)) < 4

    return ok

@@ -469,54 +469,54 @@ def check_resampler_appendix_c(dt):
    sr = T.SRATE_16K
    ok = True

-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
    n  = [ 96, 128 ][dt]
    k  = [ 44,  24 ][dt] + n

    state = initial_hp50_state()

-    x = np.append(np.zeros(nd), C.X_PCM[dt][0])
+    x = np.append(np.zeros(nt), C.X_PCM[dt][0])
    y = np.zeros(384)
    y = lc3.ltpf_resample(dt, sr, state, x, y)
    u = y[-k:len(C.X_TILDE_12K8D[dt][0])-k]

-    ok = np.amax(np.abs(u - C.X_TILDE_12K8D[dt][0])) < 1e0
+    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][0]/2)) < 2

-    x = np.append(x[-nd:], C.X_PCM[dt][1])
+    x = np.append(x[-nt:], C.X_PCM[dt][1])
    y[:-n] = y[n:]
    y = lc3.ltpf_resample(dt, sr, state, x, y)
    u = y[-k:len(C.X_TILDE_12K8D[dt][1])-k]

-    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][1])) < 1e0
+    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][1]/2)) < 2

    return ok

 def check_analysis(rng, dt, sr):

    ns = T.NS[dt][sr]
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
    ok = True

    state_c = initial_state()
-    x_c = np.zeros(ns+nd)
+    x_c = np.zeros(ns+nt)

    ltpf = LtpfAnalysis(dt, sr)

    t = np.arange(100 * ns) / (T.SRATE_KHZ[sr] * 1000)
-    s = signal.chirp(t, f0=50, f1=3e3, t1=t[-1], method='logarithmic')
+    s = signal.chirp(t, f0=10, f1=3e3, t1=t[-1], method='logarithmic')

    for i in range(20):

-        x = s[i*ns:(i+1)*ns]
+        x = s[i*ns:(i+1)*ns] * (2 ** 15 - 1)

        pitch_present = ltpf.run(x)
        data = ltpf.get_data()

-        x_c = np.append(x_c[-nd:], x)
+        x_c = np.append(x_c[-nt:], x.astype(np.int16))
        (pitch_present_c, data_c) = lc3.ltpf_analyse(dt, sr, state_c, x_c)

-        ok = ok and state_c['tc'] == ltpf.tc
-        ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-4
+        ok = ok and (not pitch_present or state_c['tc'] == ltpf.tc)
+        ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-2
        ok = ok and pitch_present_c == pitch_present
        ok = ok and data_c['active'] == data['active']
        ok = ok and data_c['pitch_index'] == data['pitch_index']
@@ -564,12 +564,12 @@ def check_synthesis(rng, dt, sr):
 def check_analysis_appendix_c(dt):

    sr = T.SRATE_16K
-    nd = T.ND[dt][sr]
+    nt = (5 * T.SRATE_KHZ[sr]) // 4
    ok = True

    state = initial_state()

-    x = np.append(np.zeros(nd), C.X_PCM[dt][0])
+    x = np.append(np.zeros(nt), C.X_PCM[dt][0])
    (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)

    ok = ok and C.T_CURR[dt][0] - state['tc'] == 17
@@ -578,7 +578,7 @@ def check_analysis_appendix_c(dt):
    ok = ok and data['pitch_index'] == C.PITCH_INDEX[dt][0]
    ok = ok and data['active'] == C.LTPF_ACTIVE[dt][0]

-    x = np.append(x[-nd:], C.X_PCM[dt][1])
+    x = np.append(x[-nt:], C.X_PCM[dt][1])
    (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)

    ok = ok and C.T_CURR[dt][1] - state['tc'] == 17
@@ -27,7 +27,7 @@ static PyObject *resample_py(PyObject *m, PyObject *args)
    unsigned dt, sr;
    PyObject *hp50_obj, *x_obj, *y_obj;
    struct lc3_ltpf_hp50_state hp50;
-    float *x, *y;
+    int16_t *x, *y;

    if (!PyArg_ParseTuple(args, "IIOOO", &dt, &sr, &hp50_obj, &x_obj, &y_obj))
        return NULL;
@@ -36,14 +36,14 @@ static PyObject *resample_py(PyObject *m, PyObject *args)
    CTYPES_CHECK("sr", (unsigned)sr < LC3_NUM_SRATE);
    CTYPES_CHECK(NULL, hp50_obj = to_ltpf_hp50_state(hp50_obj, &hp50));

-    int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
-    int ny = sizeof((struct lc3_ltpf_analysis){ }.x_12k8) / sizeof(float);
+    int ns = LC3_NS(dt, sr), nt = LC3_NT(dt);
+    int ny = sizeof((struct lc3_ltpf_analysis){ }.x_12k8) / sizeof(int16_t);
    int n  = dt == LC3_DT_7M5 ? 96 : 128;

-    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_FLOAT, ns+nd, &x));
-    CTYPES_CHECK("y", y_obj = to_1d_ptr(y_obj, NPY_FLOAT, ny, &y));
+    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_INT16, ns+nt, &x));
+    CTYPES_CHECK("y", y_obj = to_1d_ptr(y_obj, NPY_INT16, ny, &y));

-    resample_12k8[sr](&hp50, x + nd, y + (ny - n), n);
+    resample_12k8[sr](&hp50, x + nt, y + (ny - n), n);

    from_ltpf_hp50_state(hp50_obj, &hp50);
    return Py_BuildValue("O", y_obj);
@@ -55,7 +55,7 @@ static PyObject *analyse_py(PyObject *m, PyObject *args)
    unsigned dt, sr;
    struct lc3_ltpf_analysis ltpf;
    struct lc3_ltpf_data data = { 0 };
-    float *x;
+    int16_t *x;

    if (!PyArg_ParseTuple(args, "IIOO", &dt, &sr, &ltpf_obj, &x_obj))
        return NULL;
@@ -64,12 +64,12 @@ static PyObject *analyse_py(PyObject *m, PyObject *args)
    CTYPES_CHECK("sr", sr < LC3_NUM_SRATE);
    CTYPES_CHECK(NULL, ltpf_obj = to_ltpf_analysis(ltpf_obj, &ltpf));

-    int ns = LC3_NS(dt, sr), nd = LC3_ND(dt, sr);
+    int ns = LC3_NS(dt, sr), nt = LC3_NT(sr);

-    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_FLOAT, ns+nd, &x));
+    CTYPES_CHECK("x", x_obj = to_1d_ptr(x_obj, NPY_INT16, ns+nt, &x));

    int pitch_present =
-        lc3_ltpf_analyse(dt, sr, &ltpf, x + nd, &data);
+        lc3_ltpf_analyse(dt, sr, &ltpf, x + nt, &data);

    from_ltpf_analysis(ltpf_obj, &ltpf);
    return Py_BuildValue("iN", pitch_present, new_ltpf_data(&data));
@@ -16,12 +16,17 @@

 TEST_DIR := test

+test_py:
+	$(V)cd $(TEST_DIR) && python3 setup.py && python3 run.py
+
 .PHONY: test test-clean

-test:
-	$(V)cd $(TEST_DIR) && python3 setup.py && python3 run.py
+test: test_py

 test-clean:
 	$(V)cd $(TEST_DIR) && python3 setup.py clean > /tmp/zero

+-include $(TEST_DIR)/arm/makefile.mk
+-include $(TEST_DIR)/neon/makefile.mk
+
 clean-all: test-clean
@@ -0,0 +1,116 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include "neon.h"
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/* -------------------------------------------------------------------------- */
+
+#define TEST_NEON
+#include <ltpf.c>
+
+void lc3_put_bits_generic(lc3_bits_t *a, unsigned b, int c)
+{ (void)a, (void)b, (void)c; }
+
+unsigned lc3_get_bits_generic(struct lc3_bits *a, int b)
+{ return (void)a, (void)b, 0; }
+
+/* -------------------------------------------------------------------------- */
+
+static int check_resampler()
+{
+    int16_t __x[60+480], *x = __x + 60;
+    for (int i = -60; i < 480; i++)
+          x[i] = rand() & 0xffff;
+
+    struct lc3_ltpf_hp50_state hp50 = { 0 }, hp50_neon = { 0 };
+    int16_t y[128], y_neon[128];
+
+    resample_16k_12k8(&hp50, x, y, 128);
+    neon_resample_16k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return printf("Error\n"), -1;
+
+    resample_32k_12k8(&hp50, x, y, 128);
+    neon_resample_32k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return printf("Error\n"), -1;
+
+    resample_48k_12k8(&hp50, x, y, 128);
+    neon_resample_48k_12k8(&hp50_neon, x, y_neon, 128);
+    if (memcmp(y, y_neon, 128 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+static int check_dot()
+{
+    int16_t x[200];
+    for (int i = 0; i < 200; i++)
+        x[i] = rand() & 0xffff;
+
+    float y = dot(x, x+3, 128);
+    float y_neon = neon_dot(x, x+3, 128);
+    if (y != y_neon)
+        return -1;
+
+    return 0;
+}
+
+static int check_correlate()
+{
+    int16_t alignas(4) a[500], b[500];
+    float y[100], y_neon[100];
+
+    for (int i = 0; i < 500; i++) {
+        a[i] = rand() & 0xffff;
+        b[i] = rand() & 0xffff;
+    }
+
+    correlate(a, b+200, 128, y, 100);
+    neon_correlate(a, b+200, 128, y_neon, 100);
+    if (memcmp(y, y_neon, 100 * sizeof(*y)) != 0)
+        return -1;
+
+    correlate(a, b+199, 128, y, 99);
+    neon_correlate(a, b+199, 128, y_neon, 99);
+    if (memcmp(y, y_neon, 99 * sizeof(*y)) != 0)
+        return -1;
+
+    return 0;
+}
+
+int check_ltpf(void)
+{
+    int ret;
+
+    if ((ret = check_resampler()) < 0)
+        return ret;
+
+    if ((ret = check_dot()) < 0)
+        return ret;
+
+    if ((ret = check_correlate()) < 0)
+        return ret;
+
+    return 0;
+}
@@ -0,0 +1,31 @@
+#
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+test_neon_src += \
+    $(TEST_DIR)/neon/test_neon.c \
+    $(TEST_DIR)/neon/ltpf_neon.c \
+    $(SRC_DIR)/tables.c
+
+test_neon_include += $(SRC_DIR)
+test_neon_ldlibs += m
+
+$(eval $(call add-bin,test_neon))
+
+test_neon: $(test_neon_bin)
+	@echo "  RUN     $(notdir $<)"
+	$(V)$<
+
+test: test_neon
@@ -0,0 +1,141 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#if __ARM_NEON
+
+#include <arm_neon.h>
+
+#else
+#define __ARM_NEON 1
+
+#include <stdint.h>
+
+typedef struct { int16_t e[4]; } int16x4_t;
+
+typedef struct { int16_t e[8]; } int16x8_t;
+typedef struct { int32_t e[4]; } int32x4_t;
+typedef struct { int64_t e[2]; } int64x2_t;
+
+
+/* ----------------------------------------------------------------------------
+ *  Load / Store
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int16x4_t vld1_s16(const int16_t *p)
+{
+    int16x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = *(p++);
+
+    return r;
+}
+
+__attribute__((unused))
+static int64x2_t vmovq_n_s64(int64_t v)
+{
+    int64x2_t r;
+
+    r.e[0] = v;
+    r.e[1] = v;
+
+    return r;
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Move
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32x4_t vmovq_n_s32(uint32_t v)
+{
+    int32x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = v;
+
+    return r;
+}
+
+__attribute__((unused))
+static int16x4_t vext_s16(int16x4_t a, int16x4_t b, const int n)
+{
+    int16x4_t r;
+    int i = 0;
+
+    for (; i < n; i++) r.e[3-i] = b.e[(n-1)-i];
+    for (; i < 4; i++) r.e[3-i] = a.e[3-(i-n)];
+
+    return r;
+}
+
+/* ----------------------------------------------------------------------------
+ *  Arithmetic
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32x4_t vmull_s16(int16x4_t a, int16x4_t b)
+{
+    int32x4_t r;
+
+    for (int i = 0; i < 4; i++)
+        r.e[i] = (int32_t)a.e[i] * b.e[i];
+
+    return r;
+}
+
+__attribute__((unused))
+static int32x4_t vmlal_s16(int32x4_t r, int16x4_t a, int16x4_t b)
+{
+    for (int i = 0; i < 4; i++)
+        r.e[i] += (int32_t)a.e[i] * b.e[i];
+
+    return r;
+}
+
+__attribute__((unused))
+static int64x2_t vpadalq_s32(int64x2_t a, int32x4_t b)
+{
+    int64x2_t r;
+
+    r.e[0] = a.e[0] + ((int64_t)b.e[0] + b.e[1]);
+    r.e[1] = a.e[1] + ((int64_t)b.e[2] + b.e[3]);
+
+    return r;
+}
+
+
+/* ----------------------------------------------------------------------------
+ *  Reduce
+ * -------------------------------------------------------------------------- */
+
+__attribute__((unused))
+static int32_t vaddvq_s32(int32x4_t v)
+{
+    return v.e[0] + v.e[1] + v.e[2] + v.e[3];
+}
+
+__attribute__((unused))
+static int64_t vaddvq_s64(int64x2_t v)
+{
+    return v.e[0] + v.e[1];
+}
+
+#endif /* __ARM_NEON */
@@ -0,0 +1,32 @@
+/******************************************************************************
+ *
+ *  Copyright 2022 Google LLC
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at:
+ *
+ *  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ ******************************************************************************/
+
+#include <stdio.h>
+
+int check_ltpf(void);
+
+int main()
+{
+    int r, ret = 0;
+
+    printf("Checking LTPF Neon... "); fflush(stdout);
+    printf("%s\n", (r = check_ltpf()) == 0 ? "OK" : "Failed");
+    ret = ret || r;
+
+    return ret;
+}