ref: ce15e6531932624387b5c49bb1fbfccd19710bca
parent: 63f744d583938830e4b2860eacc8047ce022c5ec
author: Timothy B. Terriberry <[email protected]>
date: Fri Jun 14 19:57:19 EDT 2013
Split cwrsi() by pulses vs. dimensions. This lets us cut out a bunch of work in the large _n, small _k case where most of the dimensions won't have any pulses. It also gets rid of all remaining usage of CELT_PVQ_U() in cwrsi(), leaving just a single test instead of lots of mins and maxes, and makes a bunch of the jump threading more obvious. This is a 1.6% decoder speedup on a 96 kbps comp48-stereo encode on a Cortex A8.
--- a/celt/cwrs.c
+++ b/celt/cwrs.c
@@ -467,34 +467,49 @@
celt_assert(_k>0);
celt_assert(_n>1);
while(_n>2){
- /*Are the pulses in this dimension negative?*/
- p=CELT_PVQ_U(_n,_k+1);
- s=-(_i>=p);
- _i-=p&s;
- /*Count how many pulses were placed in this dimension.*/
- k0=_k;
- p=CELT_PVQ_U(_n,_k);
- if(_k>_n){
+ opus_uint32 q;
+ /*Lots of pulses case:*/
+ if(_k>=_n){
const opus_uint32 *row;
- opus_uint32 q;
row=CELT_PVQ_U_ROW[_n];
+ /*Are the pulses in this dimension negative?*/
+ p=row[_k+1];
+ s=-(_i>=p);
+ _i-=p&s;
+ /*Count how many pulses were placed in this dimension.*/
+ k0=_k;
q=row[_n];
if(q>_i){
celt_assert(p>q);
- /*Setting p=q is unnecessary, but it helps the optimizer prove p>_i,
- allowing it to jump straight past the initial test in the second
- loop below.
- Once it's removed that first comparison, a smart compiler should be
- able to figure out that the result of this assignment isn't used and
- optimize it away anyway.*/
- p=q;
_k=_n;
+ do p=CELT_PVQ_U_ROW[--_k][_n];
+ while(p>_i);
}
- else for(;p>_i;p=row[_k])_k--;
+ else for(p=row[_k];p>_i;p=row[_k])_k--;
+ _i-=p;
+ *_y++=(k0-_k+s)^s;
}
- for(;p>_i;p=CELT_PVQ_U_ROW[_k][_n])_k--;
- _i-=p;
- *_y++=(k0-_k+s)^s;
+ /*Lots of dimensions case:*/
+ else{
+ /*Are there any pulses in this dimension at all?*/
+ p=CELT_PVQ_U_ROW[_k][_n];
+ q=CELT_PVQ_U_ROW[_k+1][_n];
+ if(p<=_i&&_i<q){
+ _i-=p;
+ *_y++=0;
+ }
+ else{
+ /*Are the pulses in this dimension negative?*/
+ s=-(_i>=q);
+ _i-=q&s;
+ /*Count how many pulses were placed in this dimension.*/
+ k0=_k;
+ do p=CELT_PVQ_U_ROW[--_k][_n];
+ while(p>_i);
+ _i-=p;
+ *_y++=(k0-_k+s)^s;
+ }
+ }
_n--;
}
/*_n==2*/