diff --git a/src/fastmath.h b/src/fastmath.h index da29024..32cf79e 100644 --- a/src/fastmath.h +++ b/src/fastmath.h @@ -23,6 +23,7 @@ #ifndef __LC3_FASTMATH_H #define __LC3_FASTMATH_H +#include #include @@ -95,5 +96,63 @@ static inline float fast_log10f(float x) return log10f(2) * fast_log2f(x); } +/** + * Fast `10 * log10(x)` (or dB) approximation in fixed Q16 + * x Operand, in range 2^-63 to 2^63 (1e-19 to 1e19) + * return 10 * log10(x) in fixed Q16 (-190 to 192 dB) + * + * - The 0 value is accepted and return the minimum value ~ -191dB + * - This function assumed that float 32 bits is coded IEEE 754 + */ +static inline int32_t fast_db_q16(float x) +{ + /* --- Table in Q15 --- */ + + static const uint16_t t[][2] = { + + /* [n][0] = 10 * log10(2) * log2(1 + n/32), with n = [0..15] */ + /* [n][1] = [n+1][0] - [n][0] (while defining [16][0]) */ + + { 0, 4379 }, { 4379, 4248 }, { 8627, 4125 }, { 12753, 4009 }, + { 16762, 3899 }, { 20661, 3795 }, { 24456, 3697 }, { 28153, 3603 }, + { 31755, 3514 }, { 35269, 3429 }, { 38699, 3349 }, { 42047, 3272 }, + { 45319, 3198 }, { 48517, 3128 }, { 51645, 3061 }, { 54705, 2996 }, + + /* [n][0] = 10 * log10(2) * log2(1 + n/32) - 10 * log10(2) / 2, */ + /* with n = [16..31] */ + /* [n][1] = [n+1][0] - [n][0] (while defining [32][0]) */ + + { 8381, 2934 }, { 11315, 2875 }, { 14190, 2818 }, { 17008, 2763 }, + { 19772, 2711 }, { 22482, 2660 }, { 25142, 2611 }, { 27754, 2564 }, + { 30318, 2519 }, { 32837, 2475 }, { 35312, 2433 }, { 37744, 2392 }, + { 40136, 2352 }, { 42489, 2314 }, { 44803, 2277 }, { 47080, 2241 }, + + }; + + /* --- Approximation --- + * + * 10 * log10(x^2) = 10 * log10(2) * log2(x^2) + * + * And log2(x^2) = 2 * log2( (1 + m) * 2^e ) + * = 2 * (e + log2(1 + m)) , with m in range [0..1] + * + * Split the float values in : + * e2 Double value of the exponent (2 * e + k) + * hi High 5 bits of mantissa, for precalculated result `t[hi][0]` + * lo Low 16 bits of mantissa, for linear interpolation `t[hi][1]` + * + * Two cases, from the range of the mantissa : + * 0 to 0.5 `k = 0`, use 1st part of the table + * 0.5 to 1 `k = 1`, use 2nd part of the table */ + + union { float f; uint32_t u; } x2 = { .f = x*x }; + + int e2 = (x2.u >> 22) - 2*127; + int hi = (x2.u >> 18) & 0x1f; + int lo = (x2.u >> 2) & 0xffff; + + return e2 * 49321 + t[hi][0] + ((t[hi][1] * lo) >> 16); +} + #endif /* __LC3_FASTMATH_H */ diff --git a/src/spec.c b/src/spec.c index dbe70bd..7ecf43b 100644 --- a/src/spec.c +++ b/src/spec.c @@ -51,25 +51,24 @@ LC3_HOT static int estimate_gain( int nbits_budget, float nbits_off, int g_off, bool *reset_off) { int ne = LC3_NE(dt, sr) >> 2; - float e[ne]; + int e[ne]; - /* --- Energy (dB) by 4 NDCT blocks --- - * For the next steps, add energy offset 28/20 dB, - * and compute the maximum magnitude */ + /* --- Energy (dB) by 4 MDCT blocks --- */ - float x_max = 0; + float x2_max = 0; for (int i = 0; i < ne; i++, x += 4) { - float x0 = fabsf(x[0]), x1 = fabsf(x[1]); - float x2 = fabsf(x[2]), x3 = fabsf(x[3]); + float x0 = x[0] * x[0]; + float x1 = x[1] * x[1]; + float x2 = x[2] * x[2]; + float x3 = x[3] * x[3]; - x_max = fmaxf(x_max, x0); - x_max = fmaxf(x_max, x1); - x_max = fmaxf(x_max, x2); - x_max = fmaxf(x_max, x3); + x2_max = fmaxf(x2_max, x0); + x2_max = fmaxf(x2_max, x1); + x2_max = fmaxf(x2_max, x2); + x2_max = fmaxf(x2_max, x3); - float s2 = x0*x0 + x1*x1 + x2*x2 + x3*x3; - e[i] = 28.f/20 * 10 * (s2 > 0 ? fast_log10f(s2) : -10); + e[i] = fast_db_q16(fmaxf(x0 + x1 + x2 + x3, 1e-10f)); } /* --- Determine gain index --- */ @@ -77,21 +76,25 @@ LC3_HOT static int estimate_gain( int nbits = nbits_budget + nbits_off + 0.5f; int g_int = 255 - g_off; + const int k_20_28 = 20.f/28 * 0x1p16f + 0.5f; + const int k_2u7 = 2.7f * 0x1p16f + 0.5f; + const int k_1u4 = 1.4f * 0x1p16f + 0.5f; + for (int i = 128, j, j0 = ne-1, j1 ; i > 0; i >>= 1) { - float gn = g_int - i; - float v = 0; + int gn = (g_int - i) * k_20_28; + int v = 0; for (j = j0; j >= 0 && e[j] < gn; j--); for (j1 = j; j >= 0; j--) { - float e_diff = e[j] - gn; + int e_diff = e[j] - gn; - v += e_diff < 0 ? 2.7f * 28.f/20 : - e_diff < 43 * 28.f/20 ? e_diff + 7 * 28.f/20 : - 2*e_diff - 36 * 28.f/20 ; + v += e_diff < 0 ? k_2u7 : + e_diff < 43 << 16 ? e_diff + ( 7 << 16) + : 2*e_diff - (36 << 16); } - if (v > nbits * 1.4f * 28.f/20) + if (v > nbits * k_1u4) j0 = j1; else g_int = g_int - i; @@ -99,10 +102,10 @@ LC3_HOT static int estimate_gain( /* --- Limit gain index --- */ - int g_min = x_max == 0 ? -g_off : - ceilf(28 * log10f(x_max / (32768 - 0.375f))); + int g_min = x2_max == 0 ? -g_off : + ceilf(28 * log10f(sqrtf(x2_max) / (32768 - 0.375f))); - *reset_off = g_int < g_min || x_max == 0; + *reset_off = g_int < g_min || x2_max == 0; if (*reset_off) g_int = g_min; diff --git a/tables/fastmath.py b/tables/fastmath.py index 9850f6e..202561a 100755 --- a/tables/fastmath.py +++ b/tables/fastmath.py @@ -92,6 +92,22 @@ def approx_log2(): plt.show() +def table_db_q16(): + + k = 10 * np.log10(2); + + for i in range(32): + a = k * np.log2(np.ldexp(32 + i , -5)) - (i // 16) * (k/2); + b = k * np.log2(np.ldexp(32 + i+1, -5)) - (i // 16) * (k/2); + + an = np.ldexp(a, 15) + 0.5 + bn = np.ldexp(b - a, 15) + 0.5 + print('{{ {:5d}, {:4d} }},' + .format(int(np.ldexp(a, 15) + 0.5), + int(np.ldexp(b - a, 15) + 0.5)), + end = ' ' if i % 4 < 3 else '\n') + + if __name__ == '__main__': print('\n--- Approximation of 2^n ---') @@ -100,4 +116,7 @@ if __name__ == '__main__': print('\n--- Approximation of log2(n) ---') approx_log2() + print('\n--- Table of fixed Q16 dB ---') + table_db_q16() + print('')