fix: Compilation for ARMv7a and A32

This commit is contained in:
Antoine SOULIER
2022-05-13 09:48:03 +02:00
parent b36fec6ffa
commit 72d868b755
4 changed files with 57 additions and 42 deletions

View File

@@ -178,6 +178,8 @@ LC3_HOT static inline int32_t filter_hp50(
* The number of previous samples `d` accessed on `x` is : * The number of previous samples `d` accessed on `x` is :
* d: { 10, 20, 40 } - 1 for resampling factors 8, 4 and 2. * d: { 10, 20, 40 } - 1 for resampling factors 8, 4 and 2.
*/ */
#if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
|| !defined(resample_32k_12k8)
LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h, LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h,
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -207,6 +209,7 @@ LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h,
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif
/** /**
* Resample from 24 / 48 KHz to 12.8 KHz Template * Resample from 24 / 48 KHz to 12.8 KHz Template
@@ -220,6 +223,7 @@ LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h,
* The number of previous samples `d` accessed on `x` is : * The number of previous samples `d` accessed on `x` is :
* d: { 30, 60 } - 1 for resampling factors 8 and 4. * d: { 30, 60 } - 1 for resampling factors 8 and 4.
*/ */
#if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
LC3_HOT static inline void resample_x192k_12k8(const int p, const int16_t *h, LC3_HOT static inline void resample_x192k_12k8(const int p, const int16_t *h,
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -254,6 +258,7 @@ LC3_HOT static inline void resample_x192k_12k8(const int p, const int16_t *h,
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif
/** /**
* Resample from 8 Khz to 12.8 KHz * Resample from 8 Khz to 12.8 KHz

View File

@@ -16,24 +16,12 @@
* *
******************************************************************************/ ******************************************************************************/
#if __ARM_FEATURE_SIMD32 && !__ARM_NEON #if __ARM_FEATURE_SIMD32
/**
* Configuration
*/
#ifndef TEST_ARM #ifndef TEST_ARM
#include <arm_acle.h> #include <arm_acle.h>
#define resample_8k_12k8 arm_resample_8k_12k8
#define resample_16k_12k8 arm_resample_16k_12k8
#define resample_24k_12k8 arm_resample_24k_12k8
#define resample_32k_12k8 arm_resample_32k_12k8
#define resample_48k_12k8 arm_resample_48k_12k8
#define correlate arm_correlate
static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b) static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
{ {
int16x2_t r; int16x2_t r;
@@ -55,6 +43,8 @@ static inline float dot(const int16_t *, const int16_t *, int);
/** /**
* Resample from 8 / 16 / 32 KHz to 12.8 KHz Template * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
*/ */
#if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
|| !defined(resample_32k_12k8)
static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h, static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
{ {
@@ -80,10 +70,12 @@ static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif
/** /**
* Resample from 24 / 48 KHz to 12.8 KHz Template * Resample from 24 / 48 KHz to 12.8 KHz Template
*/ */
#if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h, static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
{ {
@@ -119,10 +111,13 @@ static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif
/** /**
* Resample from 8 Khz to 12.8 KHz * Resample from 8 Khz to 12.8 KHz
*/ */
#ifndef resample_8k_12k8
#define resample_8k_12k8 arm_resample_8k_12k8
static void arm_resample_8k_12k8( static void arm_resample_8k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -148,10 +143,13 @@ static void arm_resample_8k_12k8(
arm_resample_x64k_12k8( arm_resample_x64k_12k8(
8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
} }
#endif /* resample_8k_12k8 */
/** /**
* Resample from 16 Khz to 12.8 KHz * Resample from 16 Khz to 12.8 KHz
*/ */
#ifndef resample_16k_12k8
#define resample_16k_12k8 arm_resample_16k_12k8
static void arm_resample_16k_12k8( static void arm_resample_16k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -198,10 +196,13 @@ static void arm_resample_16k_12k8(
arm_resample_x64k_12k8( arm_resample_x64k_12k8(
4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
} }
#endif /* resample_16k_12k8 */
/** /**
* Resample from 32 Khz to 12.8 KHz * Resample from 32 Khz to 12.8 KHz
*/ */
#ifndef resample_32k_12k8
#define resample_32k_12k8 arm_resample_32k_12k8
static void arm_resample_32k_12k8( static void arm_resample_32k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -239,10 +240,13 @@ static void arm_resample_32k_12k8(
arm_resample_x64k_12k8( arm_resample_x64k_12k8(
2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
} }
#endif /* resample_32k_12k8 */
/** /**
* Resample from 24 Khz to 12.8 KHz * Resample from 24 Khz to 12.8 KHz
*/ */
#ifndef resample_24k_12k8
#define resample_24k_12k8 arm_resample_24k_12k8
static void arm_resample_24k_12k8( static void arm_resample_24k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -332,10 +336,13 @@ static void arm_resample_24k_12k8(
arm_resample_x192k_12k8( arm_resample_x192k_12k8(
8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
} }
#endif /* resample_24k_12k8 */
/** /**
* Resample from 48 Khz to 12.8 KHz * Resample from 48 Khz to 12.8 KHz
*/ */
#ifndef resample_48k_12k8
#define resample_48k_12k8 arm_resample_48k_12k8
static void arm_resample_48k_12k8( static void arm_resample_48k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -417,10 +424,13 @@ static void arm_resample_48k_12k8(
arm_resample_x192k_12k8( arm_resample_x192k_12k8(
4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n); 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
} }
#endif /* resample_48k_12k8 */
/** /**
* Return vector of correlations * Return vector of correlations
*/ */
#ifndef correlate
#define correlate arm_correlate
static void arm_correlate( static void arm_correlate(
const int16_t *a, const int16_t *b, int n, float *y, int nc) const int16_t *a, const int16_t *b, int n, float *y, int nc)
{ {
@@ -461,5 +471,6 @@ static void arm_correlate(
if (nc > 0) if (nc > 0)
*(y++) = dot(a, b, n); *(y++) = dot(a, b, n);
} }
#endif /* correlate */
#endif /* __ARM_FEATURE_SIMD32 */ #endif /* __ARM_FEATURE_SIMD32 */

View File

@@ -16,23 +16,10 @@
* *
******************************************************************************/ ******************************************************************************/
#if __ARM_NEON #if __ARM_NEON && __ARM_ARCH_ISA_A64
/**
* Configuration
*/
#ifndef TEST_NEON #ifndef TEST_NEON
#include <arm_neon.h> #include <arm_neon.h>
#define resample_16k_12k8 neon_resample_16k_12k8
#define resample_32k_12k8 neon_resample_32k_12k8
#define resample_48k_12k8 neon_resample_48k_12k8
#define correlate neon_correlate
#define dot neon_dot
#endif /* TEST_NEON */ #endif /* TEST_NEON */
@@ -46,6 +33,8 @@ static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
/** /**
* Resample from 16 Khz to 12.8 KHz * Resample from 16 Khz to 12.8 KHz
*/ */
#ifndef resample_16k_12k8
#define resample_16k_12k8 neon_resample_16k_12k8
LC3_HOT static void neon_resample_16k_12k8( LC3_HOT static void neon_resample_16k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -82,10 +71,13 @@ LC3_HOT static void neon_resample_16k_12k8(
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif /* resample_16k_12k8 */
/** /**
* Resample from 32 Khz to 12.8 KHz * Resample from 32 Khz to 12.8 KHz
*/ */
#ifndef resample_32k_12k8
#define resample_32k_12k8 neon_resample_32k_12k8
LC3_HOT static void neon_resample_32k_12k8( LC3_HOT static void neon_resample_32k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -119,12 +111,13 @@ LC3_HOT static void neon_resample_32k_12k8(
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif /* resample_32k_12k8 */
/** /**
* Resample from 48 Khz to 12.8 KHz * Resample from 48 Khz to 12.8 KHz
*/ */
#ifndef resample_48k_12k8
#define resample_48k_12k8 neon_resample_48k_12k8
LC3_HOT static void neon_resample_48k_12k8( LC3_HOT static void neon_resample_48k_12k8(
struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n) struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
{ {
@@ -176,10 +169,13 @@ LC3_HOT static void neon_resample_48k_12k8(
*(y++) = (yn + (1 << 15)) >> 16; *(y++) = (yn + (1 << 15)) >> 16;
} }
} }
#endif /* resample_48k_12k8 */
/** /**
* Return dot product of 2 vectors * Return dot product of 2 vectors
*/ */
#ifndef dot
#define dot neon_dot
LC3_HOT static inline float neon_dot(const int16_t *a, const int16_t *b, int n) LC3_HOT static inline float neon_dot(const int16_t *a, const int16_t *b, int n)
{ {
int64x2_t v = vmovq_n_s64(0); int64x2_t v = vmovq_n_s64(0);
@@ -199,10 +195,13 @@ LC3_HOT static inline float neon_dot(const int16_t *a, const int16_t *b, int n)
int32_t v32 = (vaddvq_s64(v) + (1 << 5)) >> 6; int32_t v32 = (vaddvq_s64(v) + (1 << 5)) >> 6;
return (float)v32; return (float)v32;
} }
#endif /* dot */
/** /**
* Return vector of correlations * Return vector of correlations
*/ */
#ifndef correlate
#define correlate neon_correlate
LC3_HOT static void neon_correlate( LC3_HOT static void neon_correlate(
const int16_t *a, const int16_t *b, int n, float *y, int nc) const int16_t *a, const int16_t *b, int n, float *y, int nc)
{ {
@@ -252,5 +251,6 @@ LC3_HOT static void neon_correlate(
for ( ; nc > 0; nc--) for ( ; nc > 0; nc--)
*(y++) = neon_dot(a, b--, n); *(y++) = neon_dot(a, b--, n);
} }
#endif /* correlate */
#endif /* __ARM_NEON */ #endif /* __ARM_NEON && __ARM_ARCH_ISA_A64 */

View File

@@ -16,20 +16,10 @@
* *
******************************************************************************/ ******************************************************************************/
#if __ARM_NEON #if __ARM_NEON && __ARM_ARCH_ISA_A64
/**
* Configuration
*/
#ifndef TEST_NEON #ifndef TEST_NEON
#include <arm_neon.h> #include <arm_neon.h>
#define fft_5 neon_fft_5
#define fft_bf3 neon_fft_bf3
#define fft_bf2 neon_fft_bf2
#endif /* TEST_NEON */ #endif /* TEST_NEON */
@@ -37,6 +27,8 @@
* FFT 5 Points * FFT 5 Points
* The number of interleaved transform `n` assumed to be even * The number of interleaved transform `n` assumed to be even
*/ */
#ifndef fft_5
#define fft_5 neon_fft_5
LC3_HOT static inline void neon_fft_5( LC3_HOT static inline void neon_fft_5(
const struct lc3_complex *x, struct lc3_complex *y, int n) const struct lc3_complex *x, struct lc3_complex *y, int n)
{ {
@@ -105,10 +97,13 @@ LC3_HOT static inline void neon_fft_5(
vst1_f32( (float *)(y + 9), vget_high_f32(y4) ); vst1_f32( (float *)(y + 9), vget_high_f32(y4) );
} }
} }
#endif /* fft_5 */
/** /**
* FFT Butterfly 3 Points * FFT Butterfly 3 Points
*/ */
#ifndef fft_bf3
#define fft_bf3 neon_fft_bf3
LC3_HOT static inline void neon_fft_bf3( LC3_HOT static inline void neon_fft_bf3(
const struct lc3_fft_bf3_twiddles *twiddles, const struct lc3_fft_bf3_twiddles *twiddles,
const struct lc3_complex *x, struct lc3_complex *y, int n) const struct lc3_complex *x, struct lc3_complex *y, int n)
@@ -211,10 +206,13 @@ LC3_HOT static inline void neon_fft_bf3(
} }
} }
#endif /* fft_bf3 */
/** /**
* FFT Butterfly 2 Points * FFT Butterfly 2 Points
*/ */
#ifndef fft_bf2
#define fft_bf2 neon_fft_bf2
LC3_HOT static inline void neon_fft_bf2( LC3_HOT static inline void neon_fft_bf2(
const struct lc3_fft_bf2_twiddles *twiddles, const struct lc3_fft_bf2_twiddles *twiddles,
const struct lc3_complex *x, struct lc3_complex *y, int n) const struct lc3_complex *x, struct lc3_complex *y, int n)
@@ -277,5 +275,6 @@ LC3_HOT static inline void neon_fft_bf2(
} }
} }
} }
#endif /* fft_bf2 */
#endif /* __ARM_NEON */ #endif /* __ARM_NEON && __ARM_ARCH_ISA_A64 */