ref: 85ede2c6aa066da29fce5186394f46927358be3b
parent: 2040606f4a1d3b230bdf00e1b7e4427df8dcdd3b
author: Timothy B. Terriberry <[email protected]>
date: Wed May 22 11:26:12 EDT 2013
Use more MAC16_16's and unroll a loop. This splits out the non-arch-specific portions of a patch written by Aurélien Zanelli <[email protected] http://lists.xiph.org/pipermail/opus/2013-May/002088.html I also added support for odd n, for custom modes. 0.25% speedup on 96 kbps stereo encode+decode on a Cortex A8.
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -101,7 +101,7 @@
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
for (j=0;j<ord;j++)
{
- sum += MULT16_16(num[j],mem[j]);
+ sum = MAC16_16(sum,num[j],mem[j]);
}
for (j=ord-1;j>=1;j--)
{
@@ -161,11 +161,16 @@
}
#ifdef FIXED_POINT
{
- opus_val32 ac0=0;
+ opus_val32 ac0;
int shift;
- for(i=0;i<n;i++)
+ int n2;
+ ac0 = 1+n;
+ if (n&1) ac0 += SHR32(MULT16_16(xx[0],xx[0]),9);
+ for(i=(n&1);i<n;i+=2)
+ {
ac0 += SHR32(MULT16_16(xx[i],xx[i]),9);
- ac0 += 1+n;
+ ac0 += SHR32(MULT16_16(xx[i+1],xx[i+1]),9);
+ }
shift = celt_ilog2(ac0)-30+10;
shift = (shift+1)/2;
@@ -176,7 +181,7 @@
while (lag>=0)
{
for (i = lag, d = 0; i < n; i++)
- d += xx[i] * xx[i-lag];
+ d = MAC16_16(d, xx[i], xx[i-lag]);
ac[lag] = d;
/*printf ("%f ", ac[lag]);*/
lag--;