shithub: opus

--- a/celt/pitch.c

+++ b/celt/pitch.c

@@ -250,7 +250,7 @@

 #else

 void

 #endif

-celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)

+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)

    int i,j;

 #ifdef FIXED_POINT

--- a/celt/pitch.h

+++ b/celt/pitch.h

@@ -140,6 +140,6 @@

 #else

 void

 #endif

-celt_pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);

+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);

 #endif

--- a/silk/fixed/pitch_analysis_core_FIX.c

+++ b/silk/fixed/pitch_analysis_core_FIX.c

@@ -36,6 +36,7 @@

 #include "pitch_est_defines.h"

 #include "stack_alloc.h"

 #include "debug.h"

+#include "pitch.h"

 #define SCRATCH_SIZE    22

 #define SF_LENGTH_4KHZ  ( PE_SUBFR_LENGTH_MS * 4 )

@@ -96,6 +97,7 @@

     const opus_int16 *input_frame_ptr;

     opus_int   i, k, d, j;

     VARDECL( opus_int16, C );

+    VARDECL( opus_int32, xcorr32 );

     const opus_int16 *target_ptr, *basis_ptr;

     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;

     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;

@@ -173,6 +175,7 @@

     * FIRST STAGE, operating in 4 khz

     ******************************************************************************/

     ALLOC( C, nb_subfr * CSTRIDE_8KHZ, opus_int16 );

+    ALLOC( xcorr32, MAX_LAG_4KHZ-MIN_LAG_4KHZ+1, opus_int32 );

     silk_memset( C, 0, (nb_subfr >> 1) * CSTRIDE_4KHZ * sizeof( opus_int16 ) );

     target_ptr = &frame_4kHz[ silk_LSHIFT( SF_LENGTH_4KHZ, 2 ) ];

     for( k = 0; k < nb_subfr >> 1; k++ ) {

@@ -186,8 +189,10 @@

         silk_assert( basis_ptr >= frame_4kHz );

         silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

+        celt_pitch_xcorr( target_ptr, target_ptr - MAX_LAG_4KHZ, xcorr32, SF_LENGTH_8KHZ, MAX_LAG_4KHZ - MIN_LAG_4KHZ + 1 );

         /* Calculate first vector products before loop */

-        cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr,  SF_LENGTH_8KHZ );

+        cross_corr = xcorr32[ MAX_LAG_4KHZ - MIN_LAG_4KHZ ];

         normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, SF_LENGTH_8KHZ );

         normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, SF_LENGTH_8KHZ ) );

         normalizer = silk_ADD32( normalizer, silk_SMULBB( SF_LENGTH_8KHZ, 4000 ) );

@@ -203,7 +208,7 @@

             silk_assert( basis_ptr >= frame_4kHz );

             silk_assert( basis_ptr + SF_LENGTH_8KHZ <= frame_4kHz + frame_length_4kHz );

-            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, SF_LENGTH_8KHZ );

+            cross_corr = xcorr32[ MAX_LAG_4KHZ - d ];

             /* Add contribution of new sample and remove contribution from oldest sample */

             normalizer = silk_ADD32( normalizer,

@@ -595,11 +600,11 @@

     opus_int          complexity                       /* I Complexity setting          */

-    const opus_int16 *target_ptr, *basis_ptr;

-    opus_int32 cross_corr;

+    const opus_int16 *target_ptr;

     opus_int   i, j, k, lag_counter, lag_low, lag_high;

     opus_int   nb_cbk_search, delta, idx, cbk_size;

     VARDECL( opus_int32, scratch_mem );

+    VARDECL( opus_int32, xcorr32 );

     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;

     SAVE_STACK;

@@ -619,6 +624,7 @@

         cbk_size      = PE_NB_CBKS_STAGE3_10MS;

     ALLOC( scratch_mem, SCRATCH_SIZE, opus_int32 );

+    ALLOC( xcorr32, SCRATCH_SIZE, opus_int32 );

     target_ptr = &frame[ silk_LSHIFT( sf_length, 2 ) ]; /* Pointer to middle of frame */

     for( k = 0; k < nb_subfr; k++ ) {

@@ -627,11 +633,11 @@

         /* Calculate the correlations for each subframe */

         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );

         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );

+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);

+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr32, sf_length, lag_high - lag_low + 1 );

         for( j = lag_low; j <= lag_high; j++ ) {

-            basis_ptr = target_ptr - ( start_lag + j );

-            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length );

             silk_assert( lag_counter < SCRATCH_SIZE );

-            scratch_mem[ lag_counter ] = cross_corr;

+            scratch_mem[ lag_counter ] = xcorr32[ lag_high - j ];

             lag_counter++;

--- a/silk/float/pitch_analysis_core_FLP.c

+++ b/silk/float/pitch_analysis_core_FLP.c

@@ -35,6 +35,7 @@

 #include "SigProc_FLP.h"

 #include "SigProc_FIX.h"

 #include "pitch_est_defines.h"

+#include "pitch.h"

 #define SCRATCH_SIZE        22

@@ -84,6 +85,7 @@

     opus_int32 filt_state[ 6 ];

     silk_float threshold, contour_bias;

     silk_float C[ PE_MAX_NB_SUBFR][ (PE_MAX_LAG >> 1) + 5 ];

+    opus_val32 xcorr[ PE_MAX_LAG_MS * 4 - PE_MIN_LAG_MS * 4 + 1 ];

     silk_float CC[ PE_NB_CBKS_STAGE2_EXT ];

     const silk_float *target_ptr, *basis_ptr;

     double    cross_corr, normalizer, energy, energy_tmp;

@@ -174,8 +176,10 @@

         silk_assert( basis_ptr >= frame_4kHz );

         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

+        celt_pitch_xcorr( target_ptr, target_ptr-max_lag_4kHz, xcorr, sf_length_8kHz, max_lag_4kHz - min_lag_4kHz + 1 );

         /* Calculate first vector products before loop */

-        cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );

+        cross_corr = xcorr[ max_lag_4kHz - min_lag_4kHz ];

         normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +

                      silk_energy_FLP( basis_ptr,  sf_length_8kHz ) +

                      sf_length_8kHz * 4000.0f;

@@ -190,7 +194,7 @@

             silk_assert( basis_ptr >= frame_4kHz );

             silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

-            cross_corr = silk_inner_product_FLP(target_ptr, basis_ptr, sf_length_8kHz);

+            cross_corr = xcorr[ max_lag_4kHz - d ];

             /* Add contribution of new sample and remove contribution from oldest sample */

             normalizer +=

@@ -496,6 +500,7 @@

     opus_int   i, j, k, lag_counter, lag_low, lag_high;

     opus_int   nb_cbk_search, delta, idx, cbk_size;

     silk_float scratch_mem[ SCRATCH_SIZE ];

+    opus_val32 xcorr[ SCRATCH_SIZE ];

     const opus_int8 *Lag_range_ptr, *Lag_CB_ptr;

     silk_assert( complexity >= SILK_PE_MIN_COMPLEX );

@@ -521,10 +526,12 @@

         /* Calculate the correlations for each subframe */

         lag_low  = matrix_ptr( Lag_range_ptr, k, 0, 2 );

         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );

+        silk_assert(lag_high-lag_low+1 <= SCRATCH_SIZE);

+        celt_pitch_xcorr( target_ptr, target_ptr - start_lag - lag_high, xcorr, sf_length, lag_high - lag_low + 1 );

         for( j = lag_low; j <= lag_high; j++ ) {

             basis_ptr = target_ptr - ( start_lag + j );

             silk_assert( lag_counter < SCRATCH_SIZE );

-            scratch_mem[ lag_counter ] = (silk_float)silk_inner_product_FLP( target_ptr, basis_ptr, sf_length );

+            scratch_mem[ lag_counter ] = xcorr[ lag_high - j ];

             lag_counter++;