shithub: opus

Download patch

ref: 8eadcdc61136793373e69bde1bddd0d28633f0a3
parent: d98d8ae087844c616c1f187265ae9d20dea54207
author: Gregory Maxwell <[email protected]>
date: Tue May 26 07:56:37 EDT 2009

Switch the N=5 case of CWRS to also use a binary search.
This avoids the need for 64-bit addition and is faster on x86_64.
It may be slower on some platforms so the direct solution is still
available in the source.

--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -167,6 +167,7 @@
   return g;
 }
 
+#if 0
 /*Compute floor(sqrt(_val)) with exact arithmetic.
   This has been tested on all possible 36-bit inputs.*/
 static celt_uint32_t isqrt36(celt_uint64_t _val){
@@ -197,6 +198,7 @@
   }
   return g;
 }
+#endif
 
 /*Although derived separately, the pulse vector coding scheme is equivalent to
    a Pyramid Vector Quantizer \cite{Fis86}.
@@ -553,11 +555,29 @@
   s=-(_i>=p);
   _i-=p&s;
   yj=_k;
+#if 0
   /*Finds the maximum _k such that ucwrs5(_k)<=_i (tested for all
      _i<2157192969=U(5,239)).*/
   if(_i>=0x2AAAAAA9UL)_k=isqrt32(2*isqrt36(10+6*(celt_uint64_t)_i)-7)+1>>1;
   else _k=_i>0?isqrt32(2*(celt_uint32_t)isqrt32(10+6*_i)-7)+1>>1:0;
   p=ucwrs5(_k);
+#else 
+  /* A binary search on U(5,K) avoids the need for 64-bit arithmetic */
+  {
+    int kl=0;
+    int kr=_k;
+    for(;;){
+      _k=kl+kr>>1;
+      p=ucwrs5(_k);
+      if(p<_i){
+        if(_k>=kr)break;
+        kl=_k+1;
+      }
+      else if(p>_i)kr=_k-1;
+      else break;
+    }  
+  }
+#endif
   _i-=p;
   yj-=_k;
   _y[0]=yj+s^s;