shithub: opus

Download patch

ref: d857ac48de795df17d54440579559854a9a4614f
parent: 980ad38495fa0bfdccdda3e28c73e8b793a7a781
author: Jean-Marc Valin <[email protected]>
date: Wed Mar 12 09:26:37 EDT 2008

Using reciprocal approximation instead of full 32-bit division in alg_quant()

--- a/libcelt/_kiss_fft_guts.h
+++ b/libcelt/_kiss_fft_guts.h
@@ -89,11 +89,6 @@
 
 #ifdef MIXED_PRECISION
 
-#undef MULT16_32_Q15
-#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
-/*#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))*/
-#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
-
 #   define S_MUL(a,b) MULT16_32_Q15(b, a)
 
 #   define C_MUL(m,a,b) \
--- a/libcelt/arch.h
+++ b/libcelt/arch.h
@@ -177,6 +177,8 @@
 #define MULT16_32_Q15(a,b)     ((a)*(b))
 #define MULT16_32_P15(a,b)     ((a)*(b))
 
+#define MULT32_32_Q31(a,b)     ((a)*(b))
+
 #define MAC16_32_Q11(c,a,b)     ((c)+(a)*(b))
 #define MAC16_32_Q15(c,a,b)     ((c)+(a)*(b))
 
--- a/libcelt/fixed_debug.h
+++ b/libcelt/fixed_debug.h
@@ -41,6 +41,9 @@
 static long long celt_mips = 0;
 #define MIPS_INC celt_mips++,
 
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 
--- a/libcelt/fixed_generic.h
+++ b/libcelt/fixed_generic.h
@@ -35,6 +35,13 @@
 #ifndef FIXED_GENERIC_H
 #define FIXED_GENERIC_H
 
+#define MULT16_16SU(a,b) ((celt_word32_t)(celt_word16_t)(a)*(celt_word32_t)(celt_uint16_t)(b))
+
+#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
+
+#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
+
+
 #define QCONST16(x,bits) ((celt_word16_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 #define QCONST32(x,bits) ((celt_word32_t)(.5+(x)*(((celt_word32_t)1)<<(bits))))
 
@@ -81,7 +88,6 @@
 #define MAC16_32_Q11(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),11)), SHR(MULT16_16((a),((b)&0x000007ff)),11)))
 
 #define MULT16_32_P15(a,b) ADD32(MULT16_16((a),SHR((b),15)), PSHR(MULT16_16((a),((b)&0x00007fff)),15))
-#define MULT16_32_Q15(a,b) ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15))
 #define MAC16_32_Q15(c,a,b) ADD32(c,ADD32(MULT16_16((a),SHR((b),15)), SHR(MULT16_16((a),((b)&0x00007fff)),15)))
 
 
--- a/libcelt/vq.c
+++ b/libcelt/vq.c
@@ -226,9 +226,12 @@
                Ryp = yp[m] + MULT16_16(spj, SUB16(QCONST16(1.f,14),MULT16_16_Q15(alpha,Rpp)));
                
                /* Compute the gain such that ||p + g*y|| = 1 */
-               g = DIV32(SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy - MULT16_16(ROUND(Ryy,14),Rpp)) - ROUND(Ryp,14),14),ROUND(Ryy,14));
-               
-               /* Knowing that gain, what the error: (x-g*y)^2 
+               g = MULT32_32_Q31(
+                     SHL32(celt_sqrt(MULT16_16(ROUND(Ryp,14),ROUND(Ryp,14)) + Ryy -
+                                     MULT16_16(ROUND(Ryy,14),Rpp))
+                           - ROUND(Ryp,14), 14),
+                     celt_rcp(ROUND(Ryy,14)));
+               /* Knowing that gain, what's the error: (x-g*y)^2 
                   (result is negated and we discard x^2 because it's constant) */
                /*score = 2.f*g*Rxy - 1.f*g*g*Ryy*NORM_SCALING_1;*/
                score = 2*MULT16_32_Q14(ROUND(Rxy,14),g) -