ref: ff072009fe5bdd3540ac6ac331e9961e83da722a
parent: 0f869cba0f271f3d9aee1289d490c44ce1e12975
author: Jean-Marc Valin <[email protected]>
date: Sun Dec 8 18:31:30 EST 2013
Replaces inline copies and initialization with OPUS_*() macros. This is a bit faster at -O2 because memcpy()/memmove()/memset() are vectorized. The code is also cleaner.
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -249,8 +249,7 @@
} while (++j<band_end);
}
celt_assert(start <= end);
- for (i=M*eBands[end];i<N;i++)
- *f++ = 0;
+ OPUS_CLEAR(&freq[c*N+M*eBands[end]], N-M*eBands[end]);
} while (++c<C);
}
@@ -409,8 +408,7 @@
Er = MULT16_16(mid2, mid2) + side + 2*xp;
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
{
- for (j=0;j<N;j++)
- Y[j] = X[j];
+ OPUS_COPY(Y, X, N);
return;
}
@@ -567,8 +565,7 @@
for (j=0;j<N0;j++)
tmp[i*N0+j] = X[j*stride+i];
}
- for (j=0;j<N;j++)
- X[j] = tmp[j];
+ OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@@ -591,8 +588,7 @@
for (j=0;j<N0;j++)
tmp[j*stride+i] = X[i*N0+j];
}
- for (j=0;j<N;j++)
- X[j] = tmp[j];
+ OPUS_COPY(X, tmp, N);
RESTORE_STACK;
}
@@ -1021,8 +1017,7 @@
fill &= cm_mask;
if (!fill)
{
- for (j=0;j<N;j++)
- X[j] = 0;
+ OPUS_CLEAR(X, N);
} else {
if (lowband == NULL)
{
@@ -1098,9 +1093,7 @@
if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
{
- int j;
- for (j=0;j<N;j++)
- lowband_scratch[j] = lowband[j];
+ OPUS_COPY(lowband_scratch, lowband, N);
lowband = lowband_scratch;
}
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -276,8 +276,7 @@
}
/*printf("\n");*/
/* First few samples are bad because we don't propagate the memory */
- for (i=0;i<12;i++)
- tmp[i] = 0;
+ OPUS_CLEAR(tmp, 12);
#ifdef FIXED_POINT
/* Normalize tmp to max range */
@@ -453,8 +452,7 @@
int bound = B*N/upsample;
for (i=0;i<bound;i++)
out[c*B*N+i] *= upsample;
- for (;i<B*N;i++)
- out[c*B*N+i] = 0;
+ OPUS_CLEAR(&out[c*B*N+bound], B*N-bound);
} while (++c<C);
}
}
@@ -489,8 +487,7 @@
Nu = N/upsample;
if (upsample!=1)
{
- for (i=0;i<N;i++)
- inp[i] = 0;
+ OPUS_CLEAR(inp, N);
}
for (i=0;i<Nu;i++)
inp[i*upsample] = SCALEIN(pcmp[CC*i]);
@@ -586,8 +583,7 @@
N = (m->eBands[i+1]-m->eBands[i])<<LM;
/* band is too narrow to be split down to LM=-1 */
narrow = (m->eBands[i+1]-m->eBands[i])==1;
- for (j=0;j<N;j++)
- tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
+ OPUS_COPY(tmp, &X[tf_chan*N0 + (m->eBands[i]<<LM)], N);
/* Just add the right channel if we're in stereo */
/*if (C==2)
for (j=0;j<N;j++)
@@ -597,8 +593,7 @@
/* Check the -1 case for transients */
if (isTransient && !narrow)
{
- for (j=0;j<N;j++)
- tmp_1[j] = tmp[j];
+ OPUS_COPY(tmp_1, tmp, N);
haar1(tmp_1, N>>LM, 1<<LM);
L1 = l1_metric(tmp_1, N, LM+1, bias);
if (L1<best_L1)
@@ -903,8 +898,7 @@
SAVE_STACK;
ALLOC(follower, C*nbEBands, opus_val16);
ALLOC(noise_floor, C*nbEBands, opus_val16);
- for (i=0;i<nbEBands;i++)
- offsets[i] = 0;
+ OPUS_CLEAR(offsets, nbEBands);
/* Dynamic allocation code */
maxDepth=-QCONST16(31.9f, DB_SHIFT);
for (i=0;i<end;i++)
@@ -1566,8 +1560,7 @@
amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
ALLOC(surround_dynalloc, C*nbEBands, opus_val16);
- for(i=0;i<st->end;i++)
- surround_dynalloc[i] = 0;
+ OPUS_CLEAR(surround_dynalloc, st->end);
/* This computes how much masking takes place between surround channels */
if (st->start==0&&st->energy_mask&&!st->lfe)
{
@@ -1629,8 +1622,7 @@
disabling masking. */
mask_avg = 0;
diff = 0;
- for(i=0;i<mask_end;i++)
- surround_dynalloc[i] = 0;
+ OPUS_CLEAR(surround_dynalloc, mask_end);
} else {
for(i=0;i<mask_end;i++)
surround_dynalloc[i] = MAX16(0, surround_dynalloc[i]-QCONST16(.25f, DB_SHIFT));
@@ -1666,8 +1658,7 @@
if (!secondMdct)
{
- for (i=0;i<C*nbEBands;i++)
- bandLogE2[i] = bandLogE[i];
+ OPUS_COPY(bandLogE2, bandLogE, C*nbEBands);
}
/* Last chance to catch any transient we might have missed in the
@@ -2059,16 +2050,13 @@
#endif
if (CC==2&&C==1) {
- for (i=0;i<nbEBands;i++)
- oldBandE[nbEBands+i]=oldBandE[i];
+ OPUS_COPY(&oldBandE[nbEBands], oldBandE, nbEBands);
}
if (!isTransient)
{
- for (i=0;i<CC*nbEBands;i++)
- oldLogE2[i] = oldLogE[i];
- for (i=0;i<CC*nbEBands;i++)
- oldLogE[i] = oldBandE[i];
+ OPUS_COPY(oldLogE2, oldLogE, CC*nbEBands);
+ OPUS_COPY(oldLogE, oldBandE, CC*nbEBands);
} else {
for (i=0;i<CC*nbEBands;i++)
oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1425,8 +1425,7 @@
ec_enc_init(&enc, data, max_data_bytes-1);
ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
- for (i=0;i<total_buffer*st->channels;i++)
- pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
+ OPUS_COPY(pcm_buf, &st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels], total_buffer*st->channels);
if (st->mode == MODE_CELT_ONLY)
hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -1611,8 +1610,7 @@
prefill_offset = st->channels*(st->encoder_buffer-st->delay_compensation-st->Fs/400);
gain_fade(st->delay_buffer+prefill_offset, st->delay_buffer+prefill_offset,
0, Q15ONE, celt_mode->overlap, st->Fs/400, st->channels, celt_mode->window, st->Fs);
- for(i=0;i<prefill_offset;i++)
- st->delay_buffer[i]=0;
+ OPUS_CLEAR(st->delay_buffer, prefill_offset);
#ifdef FIXED_POINT
pcm_silk = st->delay_buffer;
#else
@@ -1739,15 +1737,18 @@
ALLOC(tmp_prefill, st->channels*st->Fs/400, opus_val16);
if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
{
- for (i=0;i<st->channels*st->Fs/400;i++)
- tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
+ OPUS_COPY(tmp_prefill, &st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels], st->channels*st->Fs/400);
}
- for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
- st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
- for (;i<st->encoder_buffer*st->channels;i++)
- st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
-
+ if (st->channels*(st->encoder_buffer-(frame_size+total_buffer)) > 0)
+ {
+ OPUS_MOVE(st->delay_buffer, &st->delay_buffer[st->channels*frame_size], st->channels*(st->encoder_buffer-frame_size-total_buffer));
+ OPUS_COPY(&st->delay_buffer[st->channels*(st->encoder_buffer-frame_size-total_buffer)],
+ &pcm_buf[0],
+ (frame_size+total_buffer)*st->channels);
+ } else {
+ OPUS_COPY(st->delay_buffer, &pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels], st->encoder_buffer*st->channels);
+ }
/* gain_fade() and stereo_fade() need to be after the buffer copying
because we don't want any of this to affect the SILK part */
if( st->prev_HB_gain < Q15ONE || HB_gain < Q15ONE ) {