shithub: opus

Download patch

ref: 6adea4b93ce15b5c6eab1be29da6e01806ee8853
parent: fc7ce7d588afcd6c537e5063171156c9cbf16445
author: Koen Vos <[email protected]>
date: Fri Feb 19 06:11:20 EST 2016

pitch analysis (in FIX) now scales the input down at the start, instead of at every stage

--- a/silk/fixed/pitch_analysis_core_FIX.c
+++ b/silk/fixed/pitch_analysis_core_FIX.c
@@ -80,7 +80,7 @@
 /*      FIXED POINT CORE PITCH ANALYSIS FUNCTION             */
 /*************************************************************/
 opus_int silk_pitch_analysis_core(                  /* O    Voicing estimate: 0 voiced, 1 unvoiced                      */
-    const opus_int16            *frame,             /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
+    const opus_int16            *frame_unscaled,    /* I    Signal of length PE_FRAME_LENGTH_MS*Fs_kHz                  */
     opus_int                    *pitch_out,         /* O    4 pitch lag values                                          */
     opus_int16                  *lagIndex,          /* O    Lag Index                                                   */
     opus_int8                   *contourIndex,      /* O    Pitch contour Index                                         */
@@ -94,10 +94,11 @@
     int                         arch                /* I    Run-time architecture                                       */
 )
 {
-    VARDECL( opus_int16, frame_8kHz );
+    VARDECL( opus_int16, frame_8kHz_buf );
     VARDECL( opus_int16, frame_4kHz );
+	VARDECL( opus_int16, frame_scaled );
     opus_int32 filt_state[ 6 ];
-    const opus_int16 *input_frame_ptr;
+    const opus_int16 *frame, *frame_8kHz;
     opus_int   i, k, d, j;
     VARDECL( opus_int16, C );
     VARDECL( opus_int32, xcorr32 );
@@ -119,6 +120,7 @@
     opus_int32 delta_lag_log2_sqr_Q7, lag_log2_Q7, prevLag_log2_Q7, prev_lag_bias_Q13;
     const opus_int8 *Lag_CB_ptr;
     SAVE_STACK;
+
     /* Check for valid sampling frequency */
     silk_assert( Fs_kHz == 8 || Fs_kHz == 12 || Fs_kHz == 16 );
 
@@ -137,17 +139,33 @@
     min_lag           = PE_MIN_LAG_MS * Fs_kHz;
     max_lag           = PE_MAX_LAG_MS * Fs_kHz - 1;
 
+    /* Downscale input if necessary */
+    silk_sum_sqr_shift( &energy, &shift, frame_unscaled, frame_length );
+    shift += 2 - silk_CLZ32( energy );        /* at least one bit headroom */
+    ALLOC( frame_scaled, frame_length, opus_int16 );
+    if( shift > 0 ) {
+        shift = silk_RSHIFT( shift + 1, 1 );
+        for( i = 0; i < frame_length; i++ ) {
+            frame_scaled[ i ] = silk_RSHIFT( frame_unscaled[ i ], shift );
+        }
+		frame = frame_scaled;
+	} else {
+		frame = frame_unscaled;
+    }
+
+    ALLOC( frame_8kHz_buf, ( Fs_kHz == 8 ) ? 1 : frame_length_8kHz, opus_int16 );
     /* Resample from input sampled at Fs_kHz to 8 kHz */
-    ALLOC( frame_8kHz, frame_length_8kHz, opus_int16 );
     if( Fs_kHz == 16 ) {
         silk_memset( filt_state, 0, 2 * sizeof( opus_int32 ) );
-        silk_resampler_down2( filt_state, frame_8kHz, frame, frame_length );
+        silk_resampler_down2( filt_state, frame_8kHz_buf, frame, frame_length );
+		frame_8kHz = frame_8kHz_buf;
     } else if( Fs_kHz == 12 ) {
         silk_memset( filt_state, 0, 6 * sizeof( opus_int32 ) );
-        silk_resampler_down2_3( filt_state, frame_8kHz, frame, frame_length );
+        silk_resampler_down2_3( filt_state, frame_8kHz_buf, frame, frame_length );
+		frame_8kHz = frame_8kHz_buf;
     } else {
         silk_assert( Fs_kHz == 8 );
-        silk_memcpy( frame_8kHz, frame, frame_length_8kHz * sizeof(opus_int16) );
+		frame_8kHz = frame;
     }
 
     /* Decimate again to 4 kHz */
@@ -156,24 +174,17 @@
     silk_resampler_down2( filt_state, frame_4kHz, frame_8kHz, frame_length_8kHz );
 
     /* Low-pass filter */
-    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
-        frame_4kHz[ i ] = silk_ADD_SAT16( frame_4kHz[ i ], frame_4kHz[ i - 1 ] );
+	silk_assert( (frame_length_4kHz & 3) == 0 );
+    for( i = frame_length_4kHz - 1; i > 4; i -= 4 ) {
+        frame_4kHz[ i - 0 ] = silk_ADD_SAT16( frame_4kHz[ i - 0 ], frame_4kHz[ i - 1 ] );
+        frame_4kHz[ i - 1 ] = silk_ADD_SAT16( frame_4kHz[ i - 1 ], frame_4kHz[ i - 2 ] );
+        frame_4kHz[ i - 2 ] = silk_ADD_SAT16( frame_4kHz[ i - 2 ], frame_4kHz[ i - 3 ] );
+        frame_4kHz[ i - 3 ] = silk_ADD_SAT16( frame_4kHz[ i - 3 ], frame_4kHz[ i - 4 ] );
     }
+    frame_4kHz[ i - 0 ] = silk_ADD_SAT16( frame_4kHz[ i - 0 ], frame_4kHz[ i - 1 ] );
+    frame_4kHz[ i - 1 ] = silk_ADD_SAT16( frame_4kHz[ i - 1 ], frame_4kHz[ i - 2 ] );
+    frame_4kHz[ i - 2 ] = silk_ADD_SAT16( frame_4kHz[ i - 2 ], frame_4kHz[ i - 3 ] );
 
-    /*******************************************************************************
-    ** Scale 4 kHz signal down to prevent correlations measures from overflowing
-    ** find scaling as max scaling for each 8kHz(?) subframe
-    *******************************************************************************/
-
-    /* Inner product is calculated with different lengths, so scale for the worst case */
-    silk_sum_sqr_shift( &energy, &shift, frame_4kHz, frame_length_4kHz );
-    if( shift > 0 ) {
-        shift = silk_RSHIFT( shift, 1 );
-        for( i = 0; i < frame_length_4kHz; i++ ) {
-            frame_4kHz[ i ] = silk_RSHIFT( frame_4kHz[ i ], shift );
-        }
-    }
-
     /******************************************************************************
     * FIRST STAGE, operating in 4 khz
     ******************************************************************************/
@@ -311,18 +322,6 @@
     ** SECOND STAGE, operating at 8 kHz, on lag sections with high correlation
     *************************************************************************************/
 
-    /******************************************************************************
-    ** Scale signal down to avoid correlations measures from overflowing
-    *******************************************************************************/
-    /* find scaling as max scaling for each subframe */
-    silk_sum_sqr_shift( &energy, &shift, frame_8kHz, frame_length_8kHz );
-    if( shift > 0 ) {
-        shift = silk_RSHIFT( shift, 1 );
-        for( i = 0; i < frame_length_8kHz; i++ ) {
-            frame_8kHz[ i ] = silk_RSHIFT( frame_8kHz[ i ], shift );
-        }
-    }
-
     /*********************************************************************************
     * Find energy of each subframe projected onto its history, for a range of delays
     *********************************************************************************/
@@ -462,24 +461,6 @@
     silk_assert( *LTPCorr_Q15 >= 0 );
 
     if( Fs_kHz > 8 ) {
-        VARDECL( opus_int16, scratch_mem );
-        /***************************************************************************/
-        /* Scale input signal down to avoid correlations measures from overflowing */
-        /***************************************************************************/
-        /* find scaling as max scaling for each subframe */
-        silk_sum_sqr_shift( &energy, &shift, frame, frame_length );
-        ALLOC( scratch_mem, shift > 0 ? frame_length : ALLOC_NONE, opus_int16 );
-        if( shift > 0 ) {
-            /* Move signal to scratch mem because the input signal should be unchanged */
-            shift = silk_RSHIFT( shift, 1 );
-            for( i = 0; i < frame_length; i++ ) {
-                scratch_mem[ i ] = silk_RSHIFT( frame[ i ], shift );
-            }
-            input_frame_ptr = scratch_mem;
-        } else {
-            input_frame_ptr = frame;
-        }
-
         /* Search in original signal */
 
         CBimax_old = CBimax;
@@ -519,14 +500,14 @@
         /* Calculate the correlations and energies needed in stage 3 */
         ALLOC( energies_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
         ALLOC( cross_corr_st3, nb_subfr * nb_cbk_search, silk_pe_stage3_vals );
-        silk_P_Ana_calc_corr_st3(  cross_corr_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
-        silk_P_Ana_calc_energy_st3( energies_st3, input_frame_ptr, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_corr_st3(  cross_corr_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
+        silk_P_Ana_calc_energy_st3( energies_st3, frame, start_lag, sf_length, nb_subfr, complexity, arch );
 
         lag_counter = 0;
         silk_assert( lag == silk_SAT16( lag ) );
         contour_bias_Q15 = silk_DIV32_16( SILK_FIX_CONST( PE_FLATCONTOUR_BIAS, 15 ), lag );
 
-        target_ptr = &input_frame_ptr[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
+        target_ptr = &frame[ PE_LTP_MEM_LENGTH_MS * Fs_kHz ];
         energy_target = silk_ADD32( silk_inner_prod_aligned( target_ptr, target_ptr, nb_subfr * sf_length, arch ), 1 );
         for( d = start_lag; d <= end_lag; d++ ) {
             for( j = 0; j < nb_cbk_search; j++ ) {
--- a/silk/float/pitch_analysis_core_FLP.c
+++ b/silk/float/pitch_analysis_core_FLP.c
@@ -158,9 +158,16 @@
     silk_short2float_array( frame_4kHz, frame_4_FIX, frame_length_4kHz );
 
     /* Low-pass filter */
-    for( i = frame_length_4kHz - 1; i > 0; i-- ) {
-        frame_4kHz[ i ] += frame_4kHz[ i - 1 ];
+	silk_assert( (frame_length_4kHz & 3) == 0 );
+    for( i = frame_length_4kHz - 1; i > 4; i -= 4 ) {
+        frame_4kHz[ i - 0 ] = silk_ADD_SAT16( frame_4kHz[ i - 0 ], frame_4kHz[ i - 1 ] );
+        frame_4kHz[ i - 1 ] = silk_ADD_SAT16( frame_4kHz[ i - 1 ], frame_4kHz[ i - 2 ] );
+        frame_4kHz[ i - 2 ] = silk_ADD_SAT16( frame_4kHz[ i - 2 ], frame_4kHz[ i - 3 ] );
+        frame_4kHz[ i - 3 ] = silk_ADD_SAT16( frame_4kHz[ i - 3 ], frame_4kHz[ i - 4 ] );
     }
+    frame_4kHz[ i - 0 ] = silk_ADD_SAT16( frame_4kHz[ i - 0 ], frame_4kHz[ i - 1 ] );
+    frame_4kHz[ i - 1 ] = silk_ADD_SAT16( frame_4kHz[ i - 1 ], frame_4kHz[ i - 2 ] );
+    frame_4kHz[ i - 2 ] = silk_ADD_SAT16( frame_4kHz[ i - 2 ], frame_4kHz[ i - 3 ] );
 
     /******************************************************************************
     * FIRST STAGE, operating in 4 khz