ref: d857ac48de795df17d54440579559854a9a4614f
parent: 980ad38495fa0bfdccdda3e28c73e8b793a7a781
author: Jean-Marc Valin <[email protected]>
date: Wed Mar 12 09:26:37 EDT 2008
Using reciprocal approximation instead of full 32-bit division in alg_quant()
--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -89,11 +89,6 @@
#ifdef MIXED_PRECISION
-#undef MULT16_32_Q15
-#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
-/*#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))*/
-#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
-
# define S_MUL(a,b) MULT16_32_Q15(b, a)
# define C_MUL(m,a,b) \
--- a/libcelt/arch.h
+++ b/libcelt/arch.h
@@ -177,6 +177,8 @@
#define MULT16_32_Q15(a,b) ((a)*(b))
#define MULT16_32_P15(a,b) ((a)*(b))
+#define MULT32_32_Q31(a,b) ((a)*(b))
+
#define MAC16_32_Q11(c,a,b) ((c)+(a)*(b))
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
--- a/libcelt/fixed_debug.h
+++ b/libcelt/fixed_debug.h
@@ -41,6 +41,9 @@
static long long celt_mips = 0;
#define MIPS_INC celt_mips++,
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
--- a/libcelt/fixed_generic.h
+++ b/libcelt/fixed_generic.h
@@ -35,6 +35,13 @@
#ifndef FIXED_GENERIC_H
#define FIXED_GENERIC_H
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+
#define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
#define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
@@ -81,7 +88,6 @@
#define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
#define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
#define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -226,9 +226,12 @@
Ryp = yp[m] + MULT16_16(spj, SUB16(QCONST16(1.f,14),MULT16_16_Q15(alpha,Rpp)));
/* Compute the gain such that ||p + g*y|| = 1 */
- g = DIV32(SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy - MULT16_16(ROUND(Ryy,14),Rpp)) - ROUND(Ryp,14),14),ROUND(Ryy,14));
-
- /* Knowing that gain, what the error: (x-g*y)^2
+ g = MULT32_32_Q31(
+ SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy -
+ MULT16_16(ROUND(Ryy,14),Rpp))
+ - ROUND(Ryp,14), 14),
+ celt_rcp(ROUND(Ryy,14)));
+ /* Knowing that gain, what's the error: (x-g*y)^2
(result is negated and we discard x^2 because it's constant) */
/*score = 2.f*g*Rxy - 1.f*g*g*Ryy*NORM_SCALING_1;*/
score = 2*MULT16_32_Q14(ROUND(Rxy,14),g) -