shithub: opus

--- a/silk/SigProc_FIX.h

+++ b/silk/SigProc_FIX.h

@@ -168,12 +168,6 @@

     const opus_int32            inLog_Q7            /* I  input on log scale                                            */

);

-/* Function that returns the maximum absolut value of the input vector */

-opus_int16 silk_int16_array_maxabs(                 /* O   Maximum absolute value, max: 2^15-1                          */

-    const opus_int16            *vec,               /* I   Input vector  [len]                                          */

-    const opus_int32            len                 /* I   Length of input vector                                       */

-);

 /* Compute number of bits to right shift the sum of squares of a vector    */

 /* of int16s to make it fit in an int32                                    */

 void silk_sum_sqr_shift(

@@ -252,7 +246,7 @@

     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */

     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */

     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */

-    const opus_int              search_thres2_Q15,  /* I    Final threshold for lag candidates 0 - 1                    */

+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */

     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */

     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */

     const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */

--- a/silk/fixed/find_pitch_lags_FIX.c

+++ b/silk/fixed/find_pitch_lags_FIX.c

@@ -41,7 +41,7 @@

     opus_int   buf_len, i, scale;

-    opus_int32 thrhld_Q15, res_nrg;

+    opus_int32 thrhld_Q13, res_nrg;

     const opus_int16 *x_buf, *x_buf_ptr;

     opus_int16 Wsig[      FIND_PITCH_LPC_WIN_MAX ], *Wsig_ptr;

     opus_int32 auto_corr[ MAX_FIND_PITCH_LPC_ORDER + 1 ];

@@ -110,12 +110,12 @@

     if( psEnc->sCmn.indices.signalType != TYPE_NO_VOICE_ACTIVITY && psEnc->sCmn.first_frame_after_reset == 0 ) {

         /* Threshold for pitch estimator */

-        thrhld_Q15 = SILK_FIX_CONST( 0.6, 15 );

-        thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.004, 15 ), psEnc->sCmn.pitchEstimationLPCOrder );

-        thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.1,   7  ), psEnc->sCmn.speech_activity_Q8 );

-        thrhld_Q15 = silk_SMLABB( thrhld_Q15, SILK_FIX_CONST( -0.15,  15 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );

-        thrhld_Q15 = silk_SMLAWB( thrhld_Q15, SILK_FIX_CONST( -0.1,   16 ), psEnc->sCmn.input_tilt_Q15 );

-        thrhld_Q15 = silk_SAT16(  thrhld_Q15 );

+        thrhld_Q13 = SILK_FIX_CONST( 0.6, 13 );

+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.004, 13 ), psEnc->sCmn.pitchEstimationLPCOrder );

+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   21  ), psEnc->sCmn.speech_activity_Q8 );

+        thrhld_Q13 = silk_SMLABB( thrhld_Q13, SILK_FIX_CONST( -0.15,  13 ), silk_RSHIFT( psEnc->sCmn.prevSignalType, 1 ) );

+        thrhld_Q13 = silk_SMLAWB( thrhld_Q13, SILK_FIX_CONST( -0.1,   14 ), psEnc->sCmn.input_tilt_Q15 );

+        thrhld_Q13 = silk_SAT16(  thrhld_Q13 );

         /*****************************************/

         /* Call pitch estimator                  */

@@ -122,7 +122,7 @@

         /*****************************************/

         if( silk_pitch_analysis_core( res, psEncCtrl->pitchL, &psEnc->sCmn.indices.lagIndex, &psEnc->sCmn.indices.contourIndex,

                 &psEnc->LTPCorr_Q15, psEnc->sCmn.prevLag, psEnc->sCmn.pitchEstimationThreshold_Q16,

-                (opus_int16)thrhld_Q15, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )

+                (opus_int)thrhld_Q13, psEnc->sCmn.fs_kHz, psEnc->sCmn.pitchEstimationComplexity, psEnc->sCmn.nb_subfr ) == 0 )

             psEnc->sCmn.indices.signalType = TYPE_VOICED;

         } else {

--- a/silk/fixed/pitch_analysis_core_FIX.c

+++ b/silk/fixed/pitch_analysis_core_FIX.c

@@ -41,8 +41,8 @@

 /************************************************************/

 /* Internally used functions                                */

 /************************************************************/

-void silk_P_Ana_calc_corr_st3(

-    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */

+static void silk_P_Ana_calc_corr_st3(

+    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */

     const opus_int16  frame[],                         /* I vector to correlate         */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of a 5 ms subframe   */

@@ -50,8 +50,8 @@

     opus_int          complexity                       /* I Complexity setting          */

);

-void silk_P_Ana_calc_energy_st3(

-    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */

+static void silk_P_Ana_calc_energy_st3(

+    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM energy array */

     const opus_int16  frame[],                         /* I vector to calc energy in    */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of one 5 ms subframe */

@@ -59,12 +59,6 @@

     opus_int          complexity                       /* I Complexity setting          */

);

-opus_int32 silk_P_Ana_find_scaling(

-    const opus_int16  *frame,

-    const opus_int    frame_length,

-    const opus_int    sum_sqr_len

-);

 /*************************************************************/

 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */

 /*************************************************************/

@@ -76,7 +70,7 @@

     opus_int                    *LTPCorr_Q15,       /* I/O  Normalized correlation; input: value from previous frame    */

     opus_int                    prevLag,            /* I    Last lag of previous frame; set to zero is unvoiced         */

     const opus_int32            search_thres1_Q16,  /* I    First stage threshold for lag candidates 0 - 1              */

-    const opus_int              search_thres2_Q15,  /* I    Final threshold for lag candidates 0 - 1                    */

+    const opus_int              search_thres2_Q13,  /* I    Final threshold for lag candidates 0 - 1                    */

     const opus_int              Fs_kHz,             /* I    Sample frequency (kHz)                                      */

     const opus_int              complexity,         /* I    Complexity setting, 0-2, where 2 is highest                 */

     const opus_int              nb_subfr            /* I    number of 5 ms subframes                                    */

@@ -93,18 +87,18 @@

     opus_int32 cross_corr, normalizer, energy, shift, energy_basis, energy_target;

     opus_int   d_srch[ PE_D_SRCH_LENGTH ], Cmax, length_d_srch, length_d_comp;

     opus_int16 d_comp[ ( PE_MAX_LAG >> 1 ) + 5 ];

-    opus_int32 sum, threshold, temp32, lag_counter;

+    opus_int32 sum, threshold, lag_counter;

     opus_int   CBimax, CBimax_new, CBimax_old, lag, start_lag, end_lag, lag_new;

     opus_int32 CC[ PE_NB_CBKS_STAGE2_EXT ], CCmax, CCmax_b, CCmax_new_b, CCmax_new;

     opus_int32 energies_st3[  PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];

     opus_int32 crosscorr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ];

-    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz, max_sum_sq_length;

+    opus_int   frame_length, frame_length_8kHz, frame_length_4kHz;

     opus_int   sf_length, sf_length_8kHz, sf_length_4kHz;

     opus_int   min_lag, min_lag_8kHz, min_lag_4kHz;

     opus_int   max_lag, max_lag_8kHz, max_lag_4kHz;

-    opus_int32 contour_bias_Q20, diff, lz, lshift;

+    opus_int32 contour_bias_Q15, diff;

     opus_int   nb_cbk_search, cbk_size;

-    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q15, corr_thres_Q15;

+    opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;

     const opus_int8 *Lag_CB_ptr;

     /* Check for valid sampling frequency */

     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );

@@ -114,7 +108,7 @@

     silk_assert( complexity <= SILK_PE_MAX_COMPLEX );

     silk_assert( search_thres1_Q16 >= 0 && search_thres1_Q16 <= (1<<16) );

-    silk_assert( search_thres2_Q15 >= 0 && search_thres2_Q15 <= (1<<15) );

+    silk_assert( search_thres2_Q13 >= 0 && search_thres2_Q13 <= (1<<13) );

     /* Set up frame lengths max / min lag for the sampling frequency */

     frame_length      = ( PE_LTP_MEM_LENGTH_MS + nb_subfr * PE_SUBFR_LENGTH_MS ) * Fs_kHz;

@@ -130,8 +124,6 @@

     max_lag_4kHz      = PE_MAX_LAG_MS * 4;

     max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;

-    silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );

     /* Resample from input sampled at Fs_kHz to 8 kHz */

     if( Fs_kHz == 16 ) {

         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );

@@ -159,9 +151,9 @@

     *******************************************************************************/

     /* Inner product is calculated with different lengths, so scale for the worst case */

-    max_sum_sq_length = silk_max_32( sf_length_8kHz, silk_LSHIFT( sf_length_4kHz, 2 ) );

-    shift = silk_P_Ana_find_scaling( frame_4kHz, frame_length_4kHz, max_sum_sq_length );

+    silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );

     if( shift > 0 ) {

+        shift = silk_RSHIFT( shift, 1 );

         for( i = 0; i < frame_length_4kHz; i++ ) {

             frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );

@@ -170,6 +162,7 @@

     /******************************************************************************

     * FIRST STAGE, operating in 4 khz

     ******************************************************************************/

+    silk_memset( C, 0, sizeof( opus_int16 ) * nb_subfr * ( ( PE_MAX_LAG >> 1 ) + 5) );

     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];

     for( k = 0; k < nb_subfr >> 1; k++ ) {

         /* Check that we are within range of the array */

@@ -183,12 +176,12 @@

         silk_assert( basis_ptr + sf_length_8kHz <= frame_4kHz + frame_length_4kHz );

         /* Calculate first vector products before loop */

-        cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

-        normalizer = silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz );

-        normalizer = silk_ADD_SAT32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );

+        cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr,  sf_length_8kHz );

+        normalizer = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );

+        normalizer = silk_ADD32( normalizer, silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz ) );

+        normalizer = silk_ADD32( normalizer, silk_SMULBB( sf_length_8kHz, 4000 ) );

-        temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );

-        C[ k ][ min_lag_4kHz ] = (opus_int16)silk_SAT16( temp32 );        /* Q0 */

+        C[ k ][ min_lag_4kHz ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 ); /* Q13 */

         /* From now on normalizer is computed recursively */

         for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {

@@ -201,12 +194,11 @@

             cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

             /* Add contribution of new sample and remove contribution from oldest sample */

-            normalizer +=

+            normalizer = silk_ADD32( normalizer,

                 silk_SMULBB( basis_ptr[ 0 ], basis_ptr[ 0 ] ) -

-                silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] );

+                silk_SMULBB( basis_ptr[ sf_length_8kHz ], basis_ptr[ sf_length_8kHz ] ) );

-            temp32 = silk_DIV32( cross_corr, silk_SQRT_APPROX( normalizer ) + 1 );

-            C[ k ][ d ] = (opus_int16)silk_SAT16( temp32 );                        /* Q0 */

+            C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, normalizer, 13 + 1 );        /* Q13 */

         /* Update target pointer */

         target_ptr += sf_length_8kHz;

@@ -215,20 +207,16 @@

     /* Combine two subframes into single correlation measure and apply short-lag bias */

     if( nb_subfr == PE_MAX_NB_SUBFR ) {

         for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {

-            sum = (opus_int32)C[ 0 ][ i ] + (opus_int32)C[ 1 ][ i ];                /* Q0 */

-            silk_assert( silk_RSHIFT( sum, 1 ) == silk_SAT16( silk_RSHIFT( sum, 1 ) ) );

-            sum = silk_RSHIFT( sum, 1 );                                           /* Q-1 */

-            silk_assert( silk_LSHIFT( (opus_int32)-i, 4 ) == silk_SAT16( silk_LSHIFT( (opus_int32)-i, 4 ) ) );

-            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                    /* Q-1 */

-            silk_assert( sum == silk_SAT16( sum ) );

-            C[ 0 ][ i ] = (opus_int16)sum;                                         /* Q-1 */

+            sum = (opus_int32)C[ 0 ][ i ] + (opus_int32) C[ 1 ][ i ];                           /* Q14 */

+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */

+            C[ 0 ][ i ] = (opus_int16)sum;                                                      /* Q14 */

     } else {

         /* Only short-lag bias */

         for( i = max_lag_4kHz; i >= min_lag_4kHz; i-- ) {

-            sum = (opus_int32)C[ 0 ][ i ];

-            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                    /* Q-1 */

-            C[ 0 ][ i ] = (opus_int16)sum;                                         /* Q-1 */

+            sum = silk_LSHIFT( (opus_int32)C[ 0 ][ i ], 1 );                                    /* Q14 */

+            sum = silk_SMLAWB( sum, sum, silk_LSHIFT( -i, 4 ) );                                /* Q14 */

+            C[ 0 ][ i ] = (opus_int16)sum;                                                      /* Q14 */

@@ -238,14 +226,8 @@

     silk_insertion_sort_decreasing_int16( &C[ 0 ][ min_lag_4kHz ], d_srch, max_lag_4kHz - min_lag_4kHz + 1, length_d_srch );

     /* Escape if correlation is very low already here */

-    target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];

-    energy = silk_inner_prod_aligned( target_ptr, target_ptr, silk_LSHIFT( sf_length_4kHz, 2 ) );

-    energy = silk_ADD_SAT32( energy, 1000 );                                  /* Q0 */

-    Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ];                                  /* Q-1 */

-    threshold = silk_SMULBB( Cmax, Cmax );                                    /* Q-2 */

-    /* Compare in Q-2 domain */

-    if( silk_RSHIFT( energy, 4 + 2 ) > threshold ) {

+    Cmax = (opus_int)C[ 0 ][ min_lag_4kHz ];                                                    /* Q14 */

+    if( Cmax < SILK_FIX_CONST( 0.2, 14 ) ) {

         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );

         *LTPCorr_Q15  = 0;

         *lagIndex     = 0;

@@ -306,8 +288,9 @@

     ** Scale signal down to avoid correlations measures from overflowing

     *******************************************************************************/

     /* find scaling as max scaling for each subframe */

-    shift = silk_P_Ana_find_scaling( frame_8kHz, frame_length_8kHz, sf_length_8kHz );

+    silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );

     if( shift > 0 ) {

+        shift = silk_RSHIFT( shift, 1 );

         for( i = 0; i < frame_length_8kHz; i++ ) {

             frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );

@@ -325,7 +308,7 @@

         silk_assert( target_ptr >= frame_8kHz );

         silk_assert( target_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );

-        energy_target = silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz );

+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, sf_length_8kHz ), 1 );

         for( j = 0; j < length_d_comp; j++ ) {

             d = d_comp[ j ];

             basis_ptr = target_ptr - d;

@@ -334,20 +317,10 @@

             silk_assert( basis_ptr >= frame_8kHz );

             silk_assert( basis_ptr + sf_length_8kHz <= frame_8kHz + frame_length_8kHz );

-            cross_corr   = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

-            energy_basis = silk_inner_prod_aligned( basis_ptr,  basis_ptr, sf_length_8kHz );

+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length_8kHz );

             if( cross_corr > 0 ) {

-                energy = silk_max( energy_target, energy_basis ); /* Find max to make sure first division < 1.0 */

-                lz = silk_CLZ32( cross_corr );

-                lshift = silk_LIMIT_32( lz - 1, 0, 15 );

-                temp32 = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15 */

-                silk_assert( temp32 == silk_SAT16( temp32 ) );

-                temp32 = silk_SMULWB( cross_corr, temp32 ); /* Q(-1), cc * ( cc / max(b, t) ) */

-                temp32 = silk_ADD_SAT32( temp32, temp32 );  /* Q(0) */

-                lz = silk_CLZ32( temp32 );

-                lshift = silk_LIMIT_32( lz - 1, 0, 15 );

-                energy = silk_min( energy_target, energy_basis );

-                C[ k ][ d ] = silk_DIV32( silk_LSHIFT( temp32, lshift ), silk_RSHIFT( energy, 15 - lshift ) + 1 ); /* Q15*/

+                energy_basis = silk_inner_prod_aligned( basis_ptr, basis_ptr, sf_length_8kHz );

+                C[ k ][ d ] = (opus_int16)silk_DIV32_varQ( cross_corr, silk_ADD32( energy_target, energy_basis ), 13 + 1 );        /* Q13 */

             } else {

                 C[ k ][ d ] = 0;

@@ -374,7 +347,7 @@

     } else {

         prevLag_log2_Q7 = 0;

-    silk_assert( search_thres2_Q15 == silk_SAT16( search_thres2_Q15 ) );

+    silk_assert( search_thres2_Q13 == silk_SAT16( search_thres2_Q13 ) );

     /* Set up stage 2 codebook based on number of subframes */

     if( nb_subfr == PE_MAX_NB_SUBFR ) {

         cbk_size   = PE_NB_CBKS_STAGE2_EXT;

@@ -385,12 +358,10 @@

         } else {

             nb_cbk_search = PE_NB_CBKS_STAGE2;

-        corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 13 );

     } else {

         cbk_size       = PE_NB_CBKS_STAGE2_10MS;

         Lag_CB_ptr     = &silk_CB_lags_stage2_10_ms[ 0 ][ 0 ];

         nb_cbk_search  = PE_NB_CBKS_STAGE2_10MS;

-        corr_thres_Q15 = silk_RSHIFT( silk_SMULBB( search_thres2_Q15, search_thres2_Q15 ), 14 );

     for( k = 0; k < length_d_srch; k++ ) {

@@ -399,7 +370,7 @@

             CC[ j ] = 0;

             for( i = 0; i < nb_subfr; i++ ) {

                 /* Try all codebooks */

-                CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size )];

+                CC[ j ] = CC[ j ] + (opus_int32)C[ i ][ d + matrix_ptr( Lag_CB_ptr, i, j, cbk_size ) ];

         /* Find best codebook */

@@ -413,24 +384,24 @@

         /* Bias towards shorter lags */

-        lag_log2_Q7 = silk_lin2log( (opus_int32)d ); /* Q7 */

+        lag_log2_Q7 = silk_lin2log( d ); /* Q7 */

         silk_assert( lag_log2_Q7 == silk_SAT16( lag_log2_Q7 ) );

-        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ) ) );

-        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 15 ), lag_log2_Q7 ), 7 ); /* Q15 */

+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ) ) );

+        CCmax_new_b = CCmax_new - silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_SHORTLAG_BIAS, 13 ), lag_log2_Q7 ), 7 ); /* Q13 */

         /* Bias towards previous lag */

-        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ) ) );

+        silk_assert( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) == silk_SAT16( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ) ) );

         if( prevLag > 0 ) {

             delta_lag_log2_sqr_Q7 = lag_log2_Q7 - prevLag_log2_Q7;

             silk_assert( delta_lag_log2_sqr_Q7 == silk_SAT16( delta_lag_log2_sqr_Q7 ) );

             delta_lag_log2_sqr_Q7 = silk_RSHIFT( silk_SMULBB( delta_lag_log2_sqr_Q7, delta_lag_log2_sqr_Q7 ), 7 );

-            prev_lag_bias_Q15 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 15 ), *LTPCorr_Q15 ), 15 ); /* Q15 */

-            prev_lag_bias_Q15 = silk_DIV32( silk_MUL( prev_lag_bias_Q15, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + ( 1 << 6 ) );

-            CCmax_new_b -= prev_lag_bias_Q15; /* Q15 */

+            prev_lag_bias_Q13 = silk_RSHIFT( silk_SMULBB( nb_subfr * SILK_FIX_CONST( PE_PREVLAG_BIAS, 13 ), *LTPCorr_Q15 ), 15 ); /* Q13 */

+            prev_lag_bias_Q13 = silk_DIV32( silk_MUL( prev_lag_bias_Q13, delta_lag_log2_sqr_Q7 ), delta_lag_log2_sqr_Q7 + SILK_FIX_CONST( 0.5, 7 ) );

+            CCmax_new_b -= prev_lag_bias_Q13; /* Q13 */

         if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */

-            CCmax_new > corr_thres_Q15                              &&  /* Correlation needs to be high enough to be voiced */

+            CCmax_new > silk_SMULBB( nb_subfr, search_thres2_Q13 )  &&  /* Correlation needs to be high enough to be voiced */

             silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz      /* Lag must be in range                             */

) {

             CCmax_b = CCmax_new_b;

@@ -449,15 +420,20 @@

         return 1;

+    /* Output normalized correlation */

+    *LTPCorr_Q15 = (opus_int)silk_LSHIFT( silk_DIV32_16( CCmax, nb_subfr ), 2 );

+    silk_assert( *LTPCorr_Q15 >= 0 );

     if( Fs_kHz > 8 ) {

         /***************************************************************************/

         /* Scale input signal down to avoid correlations measures from overflowing */

         /***************************************************************************/

         /* find scaling as max scaling for each subframe */

-        shift = silk_P_Ana_find_scaling( frame, frame_length, sf_length );

+        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );

         if( shift > 0 ) {

             /* Move signal to scratch mem because the input signal should be unchanged */

             /* Reuse the 32 bit scratch mem vector, use a 16 bit pointer from now */

+            shift = silk_RSHIFT( shift, 1 );

             input_frame_ptr = (opus_int16*)scratch_mem;

             for( i = 0; i < frame_length; i++ ) {

                 input_frame_ptr[ i ] = silk_RSHIFT( frame[ i ], shift );

@@ -483,9 +459,7 @@

         start_lag = silk_max_int( lag - 2, min_lag );

         end_lag   = silk_min_int( lag + 2, max_lag );

         lag_new   = lag;                                    /* to avoid undefined lag */

-        CBimax    = 0;                                        /* to avoid undefined lag */

-        silk_assert( silk_LSHIFT( CCmax, 13 ) >= 0 );

-        *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */

+        CBimax    = 0;                                      /* to avoid undefined lag */

         CCmax = silk_int32_MIN;

         /* pitch lags according to second stage */

@@ -498,7 +472,7 @@

         lag_counter = 0;

         silk_assert( lag == silk_SAT16( lag ) );

-        contour_bias_Q20 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 20 ), lag );

+        contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );

         /* Set up codebook parameters according to complexity setting and frame length */

         if( nb_subfr == PE_MAX_NB_SUBFR ) {

@@ -510,41 +484,29 @@

             cbk_size        = PE_NB_CBKS_STAGE3_10MS;

             Lag_CB_ptr      = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];

+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];

+        energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length ), 1 );

         for( d = start_lag; d <= end_lag; d++ ) {

             for( j = 0; j < nb_cbk_search; j++ ) {

                 cross_corr = 0;

-                energy     = 0;

+                energy     = energy_target;

                 for( k = 0; k < nb_subfr; k++ ) {

-                    silk_assert( PE_MAX_NB_SUBFR == 4 );

-                    energy     += silk_RSHIFT( energies_st3[  k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */

+                    cross_corr = silk_ADD32( cross_corr, crosscorr_st3[ k ][ j ][ lag_counter ] );

+                    energy     = silk_ADD32( energy, energies_st3[  k ][ j ][ lag_counter ] );

                     silk_assert( energy >= 0 );

-                    cross_corr += silk_RSHIFT( crosscorr_st3[ k ][ j ][ lag_counter ], 2 ); /* use mean, to avoid overflow */

                 if( cross_corr > 0 ) {

-                    /* Divide cross_corr / energy and get result in Q15 */

-                    lz = silk_CLZ32( cross_corr );

-                    /* Divide with result in Q13, cross_corr could be larger than energy */

-                    lshift = silk_LIMIT_32( lz - 1, 0, 13 );

-                    CCmax_new = silk_DIV32( silk_LSHIFT( cross_corr, lshift ), silk_RSHIFT( energy, 13 - lshift ) + 1 );

-                    CCmax_new = silk_SAT16( CCmax_new );

-                    CCmax_new = silk_SMULWB( cross_corr, CCmax_new );

-                    /* Saturate */

-                    if( CCmax_new > silk_RSHIFT( silk_int32_MAX, 3 ) ) {

-                        CCmax_new = silk_int32_MAX;

-                    } else {

-                        CCmax_new = silk_LSHIFT( CCmax_new, 3 );

-                    }

+                    CCmax_new = silk_DIV32_varQ( cross_corr, energy, 13 + 1 );          /* Q13 */

                     /* Reduce depending on flatness of contour */

-                    diff = silk_int16_MAX - silk_RSHIFT( silk_MUL( contour_bias_Q20, j ), 5 ); /* Q20 -> Q15 */

+                    diff = silk_int16_MAX - silk_MUL( contour_bias_Q15, j );            /* Q15 */

                     silk_assert( diff == silk_SAT16( diff ) );

-                    CCmax_new = silk_LSHIFT( silk_SMULWB( CCmax_new, diff ), 1 );

+                    CCmax_new = silk_SMULWB( CCmax_new, diff );                         /* Q14 */

                 } else {

                     CCmax_new = 0;

-                if( CCmax_new > CCmax                                               &&

-                   ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag

-                   ) {

+                if( CCmax_new > CCmax && ( d + silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {

                     CCmax   = CCmax_new;

                     lag_new = d;

                     CBimax  = j;

@@ -560,12 +522,10 @@

         *lagIndex = (opus_int16)( lag_new - min_lag);

         *contourIndex = (opus_int8)CBimax;

     } else {        /* Fs_kHz == 8 */

-        /* Save Lags and correlation */

-        CCmax = silk_max( CCmax, 0 );

-        *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */

+        /* Save Lags */

         for( k = 0; k < nb_subfr; k++ ) {

             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );

-            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );

+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );

         *lagIndex = (opus_int16)( lag - min_lag_8kHz );

         *contourIndex = (opus_int8)CBimax;

@@ -575,12 +535,21 @@

     return 0;

-/*************************************************************************/

-/* Calculates the correlations used in stage 3 search. In order to cover */

-/* the whole lag codebook for all the searched offset lags (lag +- 2),   */

-/*************************************************************************/

-void silk_P_Ana_calc_corr_st3(

-    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM correlation array */

+/***********************************************************************

+/* Calculates the correlations used in stage 3 search. In order to cover

+/* the whole lag codebook for all the searched offset lags (lag +- 2),

+/* the following correlations are needed in each sub frame:

+/*

+/* sf1: lag range [-8,...,7] total 16 correlations

+/* sf2: lag range [-4,...,4] total 9 correlations

+/* sf3: lag range [-3,....4] total 8 correltions

+/* sf4: lag range [-6,....8] total 15 correlations

+/*

+/* In total 48 correlations. The direct implementation computed in worst

+/* case 4*12*5 = 240 correlations, but more likely around 120.

+/***********************************************************************/

+static void silk_P_Ana_calc_corr_st3(

+    opus_int32        cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* O 3 DIM correlation array */

     const opus_int16  frame[],                         /* I vector to correlate         */

     opus_int          start_lag,                       /* I lag offset to search around */

     opus_int          sf_length,                       /* I length of a 5 ms subframe   */

@@ -620,7 +589,7 @@

         lag_high = matrix_ptr( Lag_range_ptr, k, 1, 2 );

         for( j = lag_low; j <= lag_high; j++ ) {

             basis_ptr = target_ptr - ( start_lag + j );

-            cross_corr = silk_inner_prod_aligned( (opus_int16*)target_ptr, (opus_int16*)basis_ptr, sf_length );

+            cross_corr = silk_inner_prod_aligned( target_ptr, basis_ptr, sf_length );

             silk_assert( lag_counter < SCRATCH_SIZE );

             scratch_mem[ lag_counter ] = cross_corr;

             lag_counter++;

@@ -645,13 +614,13 @@

 /* Calculate the energies for first two subframes. The energies are */

 /* calculated recursively.                                          */

 /********************************************************************/

-void silk_P_Ana_calc_energy_st3(

-    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],/* (O) 3 DIM energy array */

-    const opus_int16  frame[],                         /* I vector to calc energy in    */

-    opus_int          start_lag,                       /* I lag offset to search around */

-    opus_int          sf_length,                       /* I length of one 5 ms subframe */

-    opus_int          nb_subfr,                     /* I number of subframes         */

-    opus_int          complexity                       /* I Complexity setting          */

+static void silk_P_Ana_calc_energy_st3(

+    opus_int32        energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ],    /* O 3 DIM energy array */

+    const opus_int16  frame[],                          /* I vector to calc energy in    */

+    opus_int          start_lag,                        /* I lag offset to search around */

+    opus_int          sf_length,                        /* I length of one 5 ms subframe */

+    opus_int          nb_subfr,                         /* I number of subframes         */

+    opus_int          complexity                        /* I Complexity setting          */

     const opus_int16 *target_ptr, *basis_ptr;

@@ -714,32 +683,5 @@

         target_ptr += sf_length;

-    }

-}

-opus_int32 silk_P_Ana_find_scaling(

-    const opus_int16  *frame,

-    const opus_int    frame_length,

-    const opus_int    sum_sqr_len

-)

-{

-    opus_int32 nbits, x_max;

-    x_max = silk_int16_array_maxabs( frame, frame_length );

-    if( x_max < silk_int16_MAX ) {

-        /* Number of bits needed for the sum of the squares */

-        nbits = 32 - silk_CLZ32( silk_SMULBB( x_max, x_max ) );

-    } else {

-        /* Here we don't know if x_max should have been silk_int16_MAX + 1, so we expect the worst case */

-        nbits = 30;

-    }

-    nbits += 17 - silk_CLZ16( sum_sqr_len );

-    /* Without a guarantee of saturation, we need to keep the 31st bit free */

-    if( nbits < 31 ) {

-        return 0;

-    } else {

-        return( nbits - 30 );

--- a/silk/fixed/vector_ops_FIX.c

+++ b/silk/fixed/vector_ops_FIX.c

@@ -94,34 +94,3 @@

     return sum;

-/* Function that returns the maximum absolut value of the input vector */

-opus_int16 silk_int16_array_maxabs(                 /* O   Maximum absolute value, max: 2^15-1                          */

-    const opus_int16            *vec,               /* I   Input vector  [len]                                          */

-    const opus_int32            len                 /* I   Length of input vector                                       */

-)

-{

-    opus_int32 max = 0, i, lvl = 0, ind;

-    if( len == 0 ) return 0;

-    ind = len - 1;

-    max = silk_SMULBB( vec[ ind ], vec[ ind ] );

-    for( i = len - 2; i >= 0; i-- ) {

-        lvl = silk_SMULBB( vec[ i ], vec[ i ] );

-        if( lvl > max ) {

-            max = lvl;

-            ind = i;

-        }

-    }

-    /* Do not return 32768, as it will not fit in an int16 so may lead to problems later on */

-    if( max >= 1073676289 ) {           /* (2^15-1)^2 = 1073676289 */

-        return( silk_int16_MAX );

-    } else {

-        if( vec[ ind ] < 0 ) {

-            return( -vec[ ind ] );

-        } else {

-            return(  vec[ ind ] );

-        }

-    }

-}

--- a/silk/float/pitch_analysis_core_FLP.c

+++ b/silk/float/pitch_analysis_core_FLP.c

@@ -37,7 +37,6 @@

 #include "pitch_est_defines.h"

 #define SCRATCH_SIZE        22

-#define eps                 1.192092896e-07f

 /************************************************************/

 /* Internally used functions                                */

@@ -129,8 +128,6 @@

     max_lag_4kHz      = PE_MAX_LAG_MS * 4;

     max_lag_8kHz      = PE_MAX_LAG_MS * 8 - 1;

-    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));

     /* Resample from input sampled at Fs_kHz to 8 kHz */

     if( Fs_kHz == 16 ) {

         /* Resample to 16 -> 8 khz */

@@ -164,6 +161,7 @@

     /******************************************************************************

     * FIRST STAGE, operating in 4 khz

     ******************************************************************************/

+    silk_memset(C, 0, sizeof(silk_float) * nb_subfr * ((PE_MAX_LAG >> 1) + 5));

     target_ptr = &frame_4kHz[ silk_LSHIFT( sf_length_4kHz, 2 ) ];

     for( k = 0; k < nb_subfr >> 1; k++ ) {

         /* Check that we are within range of the array */

@@ -178,12 +176,14 @@

         /* Calculate first vector products before loop */

         cross_corr = silk_inner_product_FLP( target_ptr, basis_ptr, sf_length_8kHz );

-        normalizer = silk_energy_FLP( basis_ptr, sf_length_8kHz ) + sf_length_8kHz * 4000.0f;

+        normalizer = silk_energy_FLP( target_ptr, sf_length_8kHz ) +

+                     silk_energy_FLP( basis_ptr,  sf_length_8kHz ) +

+                     sf_length_8kHz * 4000.0f;

-        C[ 0 ][ min_lag_4kHz ] += (silk_float)(cross_corr / sqrt(normalizer));

+        C[ 0 ][ min_lag_4kHz ] += (silk_float)( 2 * cross_corr / normalizer );

         /* From now on normalizer is computed recursively */

-        for(d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++) {

+        for( d = min_lag_4kHz + 1; d <= max_lag_4kHz; d++ ) {

             basis_ptr--;

             /* Check that we are within range of the array */

@@ -196,7 +196,7 @@

             normalizer +=

                 basis_ptr[ 0 ] * (double)basis_ptr[ 0 ] -

                 basis_ptr[ sf_length_8kHz ] * (double)basis_ptr[ sf_length_8kHz ];

-            C[ 0 ][ d ] += (silk_float)(cross_corr / sqrt( normalizer ));

+            C[ 0 ][ d ] += (silk_float)( 2 * cross_corr / normalizer );

         /* Update target pointer */

         target_ptr += sf_length_8kHz;

@@ -214,13 +214,7 @@

     /* Escape if correlation is very low already here */

     Cmax = C[ 0 ][ min_lag_4kHz ];

-    target_ptr = &frame_4kHz[ silk_SMULBB( sf_length_4kHz, nb_subfr ) ];

-    energy = 1000.0f;

-    for( i = 0; i < silk_LSHIFT( sf_length_4kHz, 2 ); i++ ) {

-        energy += target_ptr[i] * (double)target_ptr[i];

-    }

-    threshold = Cmax * Cmax;

-    if( energy / 16.0f > threshold ) {

+    if( Cmax < 0.2f ) {

         silk_memset( pitch_out, 0, nb_subfr * sizeof( opus_int ) );

         *LTPCorr      = 0.0f;

         *lagIndex     = 0;

@@ -287,14 +281,14 @@

         target_ptr = &frame_8kHz[ PE_LTP_MEM_LENGTH_MS * 8 ];

     for( k = 0; k < nb_subfr; k++ ) {

-        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz );

+        energy_tmp = silk_energy_FLP( target_ptr, sf_length_8kHz ) + 1.0;

         for( j = 0; j < length_d_comp; j++ ) {

             d = d_comp[ j ];

             basis_ptr = target_ptr - d;

             cross_corr = silk_inner_product_FLP( basis_ptr, target_ptr, sf_length_8kHz );

-            energy     = silk_energy_FLP( basis_ptr, sf_length_8kHz );

             if( cross_corr > 0.0f ) {

-                C[ k ][ d ] = (silk_float)(cross_corr * cross_corr / (energy * energy_tmp + eps));

+                energy = silk_energy_FLP( basis_ptr, sf_length_8kHz );

+                C[ k ][ d ] = (silk_float)( 2 * cross_corr / ( energy + energy_tmp ) );

             } else {

                 C[ k ][ d ] = 0.0f;

@@ -317,7 +311,7 @@

         } else if( Fs_kHz == 16 ) {

             prevLag = silk_RSHIFT( prevLag, 1 );

-        prevLag_log2 = silk_log2((silk_float)prevLag);

+        prevLag_log2 = silk_log2( (silk_float)prevLag );

     } else {

         prevLag_log2 = 0;

@@ -356,23 +350,20 @@

                 CBimax_new = i;

-        CCmax_new = silk_max_float(CCmax_new, 0.0f); /* To avoid taking square root of negative number later */

-        CCmax_new_b = CCmax_new;

         /* Bias towards shorter lags */

-        lag_log2 = silk_log2((silk_float)d);

-        CCmax_new_b -= PE_SHORTLAG_BIAS * nb_subfr * lag_log2;

+        lag_log2 = silk_log2( (silk_float)d );

+        CCmax_new_b = CCmax_new - PE_SHORTLAG_BIAS * nb_subfr * lag_log2;

         /* Bias towards previous lag */

         if( prevLag > 0 ) {

             delta_lag_log2_sqr = lag_log2 - prevLag_log2;

             delta_lag_log2_sqr *= delta_lag_log2_sqr;

-            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / (delta_lag_log2_sqr + 0.5f);

+            CCmax_new_b -= PE_PREVLAG_BIAS * nb_subfr * (*LTPCorr) * delta_lag_log2_sqr / ( delta_lag_log2_sqr + 0.5f );

-        if( CCmax_new_b > CCmax_b                                   &&  /* Find maximum biased correlation                  */

-            CCmax_new > nb_subfr * search_thres2 * search_thres2    &&  /* Correlation needs to be high enough to be voiced */

-            silk_CB_lags_stage2[ 0 ][ CBimax_new ] <= min_lag_8kHz      /* Lag must be in range                             */

+        if( CCmax_new_b > CCmax_b &&                /* Find maximum biased correlation                  */

+            CCmax_new > nb_subfr * search_thres2    /* Correlation needs to be high enough to be voiced */

) {

             CCmax_b = CCmax_new_b;

             CCmax   = CCmax_new;

@@ -390,6 +381,10 @@

         return 1;

+    /* Output normalized correlation */

+    *LTPCorr = (silk_float)( CCmax / nb_subfr );

+    silk_assert( *LTPCorr >= 0.0f );

     if( Fs_kHz > 8 ) {

         /* Search in original signal */

@@ -406,8 +401,6 @@

         end_lag   = silk_min_int( lag + 2, max_lag );

         lag_new   = lag;                                    /* to avoid undefined lag */

         CBimax    = 0;                                      /* to avoid undefined lag */

-        silk_assert( CCmax >= 0.0f );

-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr );    /* Output normalized correlation */

         CCmax = -1000.0f;

@@ -430,16 +423,18 @@

             Lag_CB_ptr    = &silk_CB_lags_stage3_10_ms[ 0 ][ 0 ];

+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];

+        energy_tmp = silk_energy_FLP( target_ptr, nb_subfr * sf_length ) + 1.0;

         for( d = start_lag; d <= end_lag; d++ ) {

             for( j = 0; j < nb_cbk_search; j++ ) {

                 cross_corr = 0.0;

-                energy = eps;

+                energy = energy_tmp;

                 for( k = 0; k < nb_subfr; k++ ) {

-                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];

                     cross_corr += cross_corr_st3[ k ][ j ][ lag_counter ];

+                    energy     +=   energies_st3[ k ][ j ][ lag_counter ];

                 if( cross_corr > 0.0 ) {

-                    CCmax_new = (silk_float)(cross_corr * cross_corr / energy);

+                    CCmax_new = (silk_float)( 2 * cross_corr / energy );

                     /* Reduce depending on flatness of contour */

                     CCmax_new *= 1.0f - contour_bias * j;

                 } else {

@@ -446,9 +441,7 @@

                     CCmax_new = 0.0f;

-                if( CCmax_new > CCmax &&

-                   ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag

-                   ) {

+                if( CCmax_new > CCmax && ( d + (opus_int)silk_CB_lags_stage3[ 0 ][ j ] ) <= max_lag ) {

                     CCmax   = CCmax_new;

                     lag_new = d;

                     CBimax  = j;

@@ -464,12 +457,10 @@

         *lagIndex = (opus_int16)( lag_new - min_lag );

         *contourIndex = (opus_int8)CBimax;

     } else {        /* Fs_kHz == 8 */

-        /* Save Lags and correlation */

-        silk_assert( CCmax >= 0.0f );

-        *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */

+        /* Save Lags */

         for( k = 0; k < nb_subfr; k++ ) {

             pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size );

-            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * Fs_kHz );

+            pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, PE_MAX_LAG_MS * 8 );

         *lagIndex = (opus_int16)( lag - min_lag_8kHz );

         *contourIndex = (opus_int8)CBimax;

@@ -479,6 +470,19 @@

     return 0;

+/***********************************************************************

+/* Calculates the correlations used in stage 3 search. In order to cover

+/* the whole lag codebook for all the searched offset lags (lag +- 2),

+/* the following correlations are needed in each sub frame:

+/*

+/* sf1: lag range [-8,...,7] total 16 correlations

+/* sf2: lag range [-4,...,4] total 9 correlations

+/* sf3: lag range [-3,....4] total 8 correltions

+/* sf4: lag range [-6,....8] total 15 correlations

+/*

+/* In total 48 correlations. The direct implementation computed in worst

+/* case 4*12*5 = 240 correlations, but more likely around 120.

+/***********************************************************************/

 static void silk_P_Ana_calc_corr_st3(

     silk_float cross_corr_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */

     const silk_float    frame[],            /* I vector to correlate                                            */

@@ -487,19 +491,6 @@

     opus_int            nb_subfr,           /* I number of subframes                                            */

     opus_int            complexity          /* I Complexity setting                                             */

-    /***********************************************************************

-     Calculates the correlations used in stage 3 search. In order to cover

-     the whole lag codebook for all the searched offset lags (lag +- 2),

-     the following correlations are needed in each sub frame:

-     sf1: lag range [-8,...,7] total 16 correlations

-     sf2: lag range [-4,...,4] total 9 correlations

-     sf3: lag range [-3,....4] total 8 correltions

-     sf4: lag range [-6,....8] total 15 correlations

-     In total 48 correlations. The direct implementation computed in worst case

-     4*12*5 = 240 correlations, but more likely around 120.

-     **********************************************************************/

     const silk_float *target_ptr, *basis_ptr;

     opus_int   i, j, k, lag_counter, lag_low, lag_high;

@@ -552,6 +543,10 @@

+/********************************************************************/

+/* Calculate the energies for first two subframes. The energies are */

+/* calculated recursively.                                          */

+/********************************************************************/

 static void silk_P_Ana_calc_energy_st3(

     silk_float energies_st3[ PE_MAX_NB_SUBFR ][ PE_NB_CBKS_STAGE3_MAX ][ PE_NB_STAGE3_LAGS ], /* O 3 DIM correlation array */

     const silk_float    frame[],            /* I vector to correlate                                            */

@@ -560,10 +555,6 @@

     opus_int            nb_subfr,           /* I number of subframes                                            */

     opus_int            complexity          /* I Complexity setting                                             */

-/****************************************************************

-Calculate the energies for first two subframes. The energies are

-calculated recursively.

-****************************************************************/

     const silk_float *target_ptr, *basis_ptr;

     double    energy;

--- a/silk/float/wrappers_FLP.c

+++ b/silk/float/wrappers_FLP.c

@@ -155,7 +155,7 @@

     /* Convert input to fix */

     for( i = 0; i < psEnc->sCmn.frame_length; i++ ) {

-        x_Q3[ i ] = silk_float2int( 8.0 * x[ i ] );

+        x_Q3[ i ] = silk_float2int( 8.0f * x[ i ] );

     /* Call NSQ */