ref: 533dbe705ba546774cb48f4989fae61309963eb6
parent: bc469b8e443c6c5aebe27229af30bc789a359d57
author: Timothy B. Terriberry <[email protected]>
date: Fri Jun 14 21:35:23 EDT 2013
Further optimization to cwrsi() Makes it possible to skip the first loop in some cases.
--- a/celt/cwrs.c
+++ b/celt/cwrs.c
@@ -461,11 +461,11 @@
}
static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
+ int s;
celt_assert(_k>0);
- celt_assert(_n>0);
+ celt_assert(_n>1);
do{
opus_uint32 p;
- int s;
int k0;
/*Are the pulses in this dimension negative?*/
p=CELT_PVQ_U(_n,_k+1);
@@ -476,14 +476,29 @@
p=CELT_PVQ_U(_n,_k);
if(_k>_n){
const opus_uint32 *row;
+ opus_uint32 q;
row=CELT_PVQ_U_ROW[_n];
- for(;p>_i&&_k>_n;p=row[_k])_k--;
+ q=row[_n];
+ if(q>_i){
+ celt_assert(p>q);
+ /*Setting p=q is unnecessary, but it helps the optimizer prove p>_i,
+ allowing it to jump straight past the initial test in the second
+ loop below.
+ Once it's removed that first comparison, a smart compiler should be
+ able to figure out that the result of this assignment isn't used and
+ optimize it away anyway.*/
+ p=q;
+ _k=_n;
+ }
+ else for(;p>_i;p=row[_k])_k--;
}
for(;p>_i;p=CELT_PVQ_U_ROW[_k][_n])_k--;
_i-=p;
*_y++=(k0-_k+s)^s;
}
- while(--_n>0);
+ while(--_n>1);
+ s=-(_i>=1);
+ *_y=(_k+s)^s;
}
void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){