shithub: opus

Download patch

ref: e6ce0c6a22b51cb76fc02582ab5d4e58f126ab23
parent: abdfc38837bfdebdd5eacba6fa4f71dd7b0d1b52
author: Jean-Marc Valin <[email protected]>
date: Fri Apr 18 12:46:39 EDT 2008

optimisation: Removed a bunch of conditional branches from comb2pulse()

--- a/libcelt/cwrs.c
+++ b/libcelt/cwrs.c
@@ -266,18 +266,18 @@
 /*Converts a combination _x of _m unit pulses with associated sign bits _s into
    a pulse vector _y of length _n.
   _y: Returns the vector of pulses.
-  _x: The combination with elements sorted in ascending order.
+  _x: The combination with elements sorted in ascending order. _x[_m] = -1
   _s: The associated sign bits.*/
-void comb2pulse(int _n,int _m,int *_y,const int *_x,const int *_s){
+void comb2pulse(int _n,int _m,int * restrict _y,const int *_x,const int *_s){
   int j;
   int k;
   int n;
+  CELT_MEMSET(_y, 0, _n);
   for(k=j=0;k<_m;k+=n){
-    for(n=1;k+n<_m&&_x[k+n]==_x[k];n++);
-    while(j<_x[k])_y[j++]=0;
-    _y[j++]=_s[k]?-n:n;
+     /* _x[_m] = -1 so we won't overflow */
+    for(n=1;_x[k+n]==_x[k];n++);
+    _y[_x[k]]=_s[k]?-n:n;
   }
-  while(j<_n)_y[j++]=0;
 }
 
 /*Converts a pulse vector vector _y of length _n into a combination of _m unit
@@ -370,7 +370,7 @@
    VARDECL(int, signs);
    SAVE_STACK;
 
-   ALLOC(comb, K, int);
+   ALLOC(comb, K+1, int);
    ALLOC(signs, K, int);
    /* Simple heuristic to figure out whether it fits in 32 bits */
    if((N+4)*(K+4)<250 || (celt_ilog2(N)+1)*K<31)
@@ -379,6 +379,7 @@
    } else {
       decode_comb64(N, K, comb, signs, dec);
    }
+   comb[K] = -1;
    comb2pulse(N, K, _y, comb, signs);
    RESTORE_STACK;
 }
--- a/libcelt/cwrs.h
+++ b/libcelt/cwrs.h
@@ -57,7 +57,7 @@
  celt_uint64_t *_u);
 
 
-void comb2pulse(int _n,int _m,int *_y,const int *_x,const int *_s);
+void comb2pulse(int _n,int _m,int * restrict _y,const int *_x,const int *_s);
 
 void pulse2comb(int _n,int _m,int *_x,int *_s,const int *_y);
 
--- a/libcelt/pitch.c
+++ b/libcelt/pitch.c
@@ -168,6 +168,7 @@
       celt_word16_t Xr, Xi, n;
       /* weight = 1/sqrt(curve) */
       n = celt_rsqrt(EPSILON+curve[i]);
+      /*n = SHR32(32767,(celt_ilog2(EPSILON+curve[i])>>1));*/
       /* Pre-multiply X by n, so we can keep everything in 16 bits */
       Xr = EXTRACT16(SHR32(MULT16_16(n, X[2*i  ]),3));
       Xi = EXTRACT16(SHR32(MULT16_16(n, X[2*i+1]),3));
--- a/tests/cwrs32-test.c
+++ b/tests/cwrs32-test.c
@@ -22,7 +22,7 @@
       if(inc<1)inc=1;
       for(i=0;i<nc;i+=inc){
         celt_uint32_t u[NMAX];
-        int           x[MMAX];
+        int           x[MMAX+1];
         int           s[MMAX];
         int           x2[MMAX];
         int           s2[MMAX];
@@ -40,6 +40,7 @@
           fprintf(stderr,"Combination-index mismatch.\n");
           return 1;
         }
+        x[m] = -1;
         comb2pulse(n,m,y,x,s);
         /*for(j=0;j<n;j++)printf(" %c%i",y[j]?y[j]<0?'-':'+':' ',abs(y[j]));
         printf("\n");*/
--- a/tests/cwrs64-test.c
+++ b/tests/cwrs64-test.c
@@ -25,7 +25,7 @@
       /*printf("%d/%d: %llu",n,m, nc);*/
       for(i=0;i<nc;i+=inc){
         celt_uint64_t u[NMAX];
-        int           x[MMAX];
+        int           x[MMAX+1];
         int           s[MMAX];
         int           x2[MMAX];
         int           s2[MMAX];
@@ -43,6 +43,7 @@
           fprintf(stderr,"Combination-index mismatch.\n");
           return 1;
         }
+        x[m] = -1;
         comb2pulse(n,m,y,x,s);
         /*for(j=0;j<n;j++)printf(" %c%i",y[j]?y[j]<0?'-':'+':' ',abs(y[j]));
         printf("\n");*/