ref: a156c5ece7133383468d4cba33f067595d9da391
parent: 568de0a17b7b8e49fe3fa4d2a3cc4ebd62400cab
author: Jean-Marc Valin <[email protected]>
date: Mon Aug 26 14:54:39 EDT 2013
Makes the SILK pitch search use celt_pitch_xcorr() Should gives us ARM/SSE optimizations for free.
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -250,7 +250,7 @@
#else
void
#endif
-celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
int i,j;
#ifdef FIXED_POINT
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -140,6 +140,6 @@
#else
void
#endif
-celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
#endif
--- a/silk/fixed/pitch_analysis_core_FIX.c
+++ b/silk/fixed/pitch_analysis_core_FIX.c
@@ -36,6 +36,7 @@
#include "pitch_est_defines.h"
#include "stack_alloc.h"
#include "debug.h"
+#include "pitch.h"
#define SCRATCH_SIZE 22
#define SF_LENGTH_4KHZ ( PE_SUBFR_LENGTH_MS * 4 )
@@ -96,6 +97,7 @@
const opus_int16 *input_frame_ptr;
opus_int i, k, d, j;
VARDECL( opus_int16, C );
+ VARDECL( opus_int32, xcorr32 );
const opus_int16 *target_ptr, *basis_ptr;
opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;
opus_int d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;
@@ -173,6 +175,7 @@
* FIRST STAGE, operating in 4 khz
******************************************************************************/
ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );
+ ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );
silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );
target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];
for( k = 0; k < nb_subfr >> 1; k++ ) {
@@ -186,8 +189,10 @@
silk_assert( basis_ptr >= frame_4kHz );
silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
+ celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 );
+
/* Calculate first vector products before loop */
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];
normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );
normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr, basis_ptr, SF_LENGTH_8KHZ ) );
normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );
@@ -203,7 +208,7 @@
silk_assert( basis_ptr >= frame_4kHz );
silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );
+ cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];
/* Add contribution of new sample and remove contribution from oldest sample */
normalizer = silk_ADD32( normalizer,
@@ -595,11 +600,11 @@
opus_int complexity /* I Complexity setting */
)
{
- const opus_int16 *target_ptr, *basis_ptr;
- opus_int32 cross_corr;
+ const opus_int16 *target_ptr;
opus_int i, j, k, lag_counter, lag_low, lag_high;
opus_int nb_cbk_search, delta, idx, cbk_size;
VARDECL( opus_int32, scratch_mem );
+ VARDECL( opus_int32, xcorr32 );
const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
SAVE_STACK;
@@ -619,6 +624,7 @@
cbk_size = PE_NB_CBKS_STAGE3_10MS;
}
ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );
+ ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );
target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */
for( k = 0; k < nb_subfr; k++ ) {
@@ -627,11 +633,11 @@
/* Calculate the correlations for each subframe */
lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+ silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 );
for( j = lag_low; j <= lag_high; j++ ) {
- basis_ptr = target_ptr - ( start_lag + j );
- cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length );
silk_assert( lag_counter < SCRATCH_SIZE );
- scratch_mem[ lag_counter ] = cross_corr;
+ scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];
lag_counter++;
}
--- a/silk/float/pitch_analysis_core_FLP.c
+++ b/silk/float/pitch_analysis_core_FLP.c
@@ -35,6 +35,7 @@
#include "SigProc_FLP.h"
#include "SigProc_FIX.h"
#include "pitch_est_defines.h"
+#include "pitch.h"
#define SCRATCH_SIZE 22
@@ -84,6 +85,7 @@
opus_int32 filt_state[ 6 ];
silk_float threshold, contour_bias;
silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];
+ opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];
silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];
const silk_float *target_ptr, *basis_ptr;
double cross_corr, normalizer, energy, energy_tmp;
@@ -174,8 +176,10 @@
silk_assert( basis_ptr >= frame_4kHz );
silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
+ celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 );
+
/* Calculate first vector products before loop */
- cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );
+ cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];
normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +
silk_energy_FLP( basis_ptr, sf_length_8kHz ) +
sf_length_8kHz * 4000.0f;
@@ -190,7 +194,7 @@
silk_assert( basis_ptr >= frame_4kHz );
silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );
- cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz);
+ cross_corr = xcorr[ max_lag_4kHz - d ];
/* Add contribution of new sample and remove contribution from oldest sample */
normalizer +=
@@ -496,6 +500,7 @@
opus_int i, j, k, lag_counter, lag_low, lag_high;
opus_int nb_cbk_search, delta, idx, cbk_size;
silk_float scratch_mem[ SCRATCH_SIZE ];
+ opus_val32 xcorr[ SCRATCH_SIZE ];
const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;
silk_assert( complexity >= SILK_PE_MIN_COMPLEX );
@@ -521,10 +526,12 @@
/* Calculate the correlations for each subframe */
lag_low = matrix_ptr( Lag_range_ptr, k, 0, 2 );
lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );
+ silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);
+ celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 );
for( j = lag_low; j <= lag_high; j++ ) {
basis_ptr = target_ptr - ( start_lag + j );
silk_assert( lag_counter < SCRATCH_SIZE );
- scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length );
+ scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];
lag_counter++;
}