ref: 64ba502e2c93527a19fe98e7cdc93e55d19f4911
parent: 0fa5fa88e9bc1490f4c471c8f4a927a97c5d5446
author: Jean-Marc Valin <[email protected]>
date: Sat May 25 16:13:49 EDT 2013
Optimizes remove_doubling() by avoiding redundant calculations of yy Using a sliding window to pre-compute all yy values.
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -472,6 +472,9 @@
opus_val32 best_xy, best_yy;
int offset;
int minperiod0;
+ int max
+ VARDECL(opus_val32, yy_lookup);
+ SAVE_STACK;
minperiod0 = minperiod;
maxperiod /= 2;
@@ -484,13 +487,21 @@
*T0_=maxperiod-1;
T = T0 = *T0_;
- xx=xy=yy=0;
+ ALLOC(yy_lookup, maxperiod+1, opus_val32);
+ xy=xx=0;
for (i=0;i<N;i++)
{
- xy = MAC16_16(xy, x[i], x[i-T0]);
xx = MAC16_16(xx, x[i], x[i]);
- yy = MAC16_16(yy, x[i-T0],x[i-T0]);
+ xy = MAC16_16(xy, x[i], x[i-T0]);
}
+ yy_lookup[0] = xx;
+ yy=xx;
+ for (i=1;i<=maxperiod;i++)
+ {
+ yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]);
+ yy_lookup[i] = MAX32(0, yy);
+ }
+ yy = yy_lookup[T0];
best_xy = xy;
best_yy = yy;
#ifdef FIXED_POINT
@@ -526,15 +537,13 @@
{
T1b = (2*second_check[k]*T0+k)/(2*k);
}
- xy=yy=0;
+ xy=0;
for (i=0;i<N;i++)
{
xy = MAC16_16(xy, x[i], x[i-T1]);
- yy = MAC16_16(yy, x[i-T1], x[i-T1]);
-
xy = MAC16_16(xy, x[i], x[i-T1b]);
- yy = MAC16_16(yy, x[i-T1b], x[i-T1b]);
}
+ yy = yy_lookup[T1] + yy_lookup[T1b];
#ifdef FIXED_POINT
{
opus_val32 x2y2;
@@ -594,5 +603,6 @@
if (*T0_<minperiod0)
*T0_=minperiod0;
+ RESTORE_STACK;
return pg;
}