shithub: opus

--- a/silk/control_codec.c

+++ b/silk/control_codec.c

@@ -37,6 +37,14 @@

 #endif

 #include "tuning_parameters.h"

+static const int enc_delay_matrix[3][5] = {

+/*SILK API 8  12  16  24  48 */

+/* 8 */   {5,  0,  3,  4,  8},

+/*12 */   {0,  6,  0,  0,  0},

+/*16 */   {4,  5, 11,  5, 18}

+};

 opus_int silk_setup_resamplers(

     silk_encoder_state_Fxx          *psEnc,             /* I/O                      */

     opus_int                         fs_kHz              /* I                        */

@@ -234,6 +242,9 @@

         psEnc->sCmn.PacketSize_ms  = PacketSize_ms;

         psEnc->sCmn.TargetRate_bps = 0;         /* trigger new SNR computation */

+    psEnc->sCmn.delay = enc_delay_matrix[rateID(fs_kHz*1000)][rateID(psEnc->sCmn.API_fs_Hz)];

+    silk_assert(psEnc->sCmn.delay <= MAX_ENCODER_DELAY);

     /* Set internal sampling frequency */

     silk_assert( fs_kHz == 8 || fs_kHz == 12 || fs_kHz == 16 );

--- a/silk/dec_API.c

+++ b/silk/dec_API.c

@@ -31,6 +31,14 @@

 #include "API.h"

 #include "main.h"

+static const int dec_delay_matrix[3][5] = {

+/*SILK API 8  12  16  24  48 */

+/* 8 */   {3, 0, 2, 0, 0},

+/*12 */   {0, 8, 5, 7, 5},

+/*16 */   {0, 0, 8, 5, 5}

+};

 /************************/

 /* Decoder Super Struct */

 /************************/

@@ -82,13 +90,16 @@

     opus_int   i, n, prev_fs_kHz, decode_only_middle = 0, ret = SILK_NO_ERROR;

     opus_int32 nSamplesOutDec, LBRR_symbol;

-    opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 ];

+    opus_int16 samplesOut1_tmp[ 2 ][ MAX_FS_KHZ * MAX_FRAME_LENGTH_MS + 2 + MAX_DECODER_DELAY ];

     opus_int16 samplesOut2_tmp[ MAX_API_FS_KHZ * MAX_FRAME_LENGTH_MS ];

     opus_int32 MS_pred_Q13[ 2 ] = { 0 };

     opus_int16 *resample_out_ptr;

     silk_decoder *psDec = ( silk_decoder * )decState;

     silk_decoder_state *channel_state = psDec->channel_state;

+    int delay;

+    delay = channel_state[ 0 ].delay;

     /**********************************/

     /* Test if first frame in payload */

     /**********************************/

@@ -106,6 +117,7 @@

         ret += silk_init_decoder( &channel_state[ 1 ] );

         if( psDec->nChannelsAPI == 2 ) {

             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );

+            silk_memcpy( &channel_state[ 1 ].delayBuf, &channel_state[ 0 ].delayBuf, MAX_DECODER_DELAY*sizeof(opus_int16));

@@ -143,9 +155,12 @@

     /* Initialize resampler when switching internal or external sampling frequency */

     if( prev_fs_kHz != channel_state[ 0 ].fs_kHz || channel_state[ 0 ].prev_API_sampleRate != decControl->API_sampleRate ) {

+        channel_state[ 0 ].delay = dec_delay_matrix[rateID(silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ))][rateID(decControl->API_sampleRate)];

+        silk_assert(channel_state[ 0 ].delay <= MAX_DECODER_DELAY);

         ret = silk_resampler_init( &channel_state[ 0 ].resampler_state, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ), decControl->API_sampleRate );

         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

             silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );

+            channel_state[ 1 ].delay = channel_state[ 0 ].delay;

     channel_state[ 0 ].prev_API_sampleRate = decControl->API_sampleRate;

@@ -230,19 +245,19 @@

     /* Call decoder for one frame */

     for( n = 0; n < decControl->nChannelsInternal; n++ ) {

         if( n == 0 || decode_only_middle == 0 ) {

-            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag );

+            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 + delay ], &nSamplesOutDec, lostFlag );

         } else {

-            silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );

+            silk_memset( &samplesOut1_tmp[ n ][ 2 + delay ], 0, nSamplesOutDec * sizeof( opus_int16 ) );

     if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

         /* Convert Mid/Side to Left/Right */

-        silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );

+        silk_stereo_MS_to_LR( &psDec->sStereo, &samplesOut1_tmp[ 0 ][delay], &samplesOut1_tmp[ 1 ][delay], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );

     } else {

         /* Buffering */

-        silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );

-        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );

+        silk_memcpy( &samplesOut1_tmp[ 0 ][delay], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );

+        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec + delay ], 2 * sizeof( opus_int16 ) );

     /* Number of output samples */

@@ -256,8 +271,11 @@

     for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {

+        silk_memcpy(&samplesOut1_tmp[ n ][ 1 ], &channel_state[ n ].delayBuf[ MAX_DECODER_DELAY-delay ], delay*sizeof(opus_int16));

         /* Resample decoded signal to API_sampleRate */

         ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );

+        silk_memcpy(channel_state[ n ].delayBuf, &samplesOut1_tmp[ n ][ 1 + nSamplesOutDec + delay - MAX_DECODER_DELAY ], MAX_DECODER_DELAY*sizeof(opus_int16));

         /* Interleave if stereo output and stereo stream */

         if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {

--- a/silk/define.h

+++ b/silk/define.h

@@ -86,6 +86,9 @@

 #define MAX_FRAME_LENGTH_MS                     ( SUB_FRAME_LENGTH_MS * MAX_NB_SUBFR )

 #define MAX_FRAME_LENGTH                        ( MAX_FRAME_LENGTH_MS * MAX_FS_KHZ )

+#define MAX_ENCODER_DELAY                       18

+#define MAX_DECODER_DELAY                        8

 /* Milliseconds of lookahead for pitch analysis */

 #define LA_PITCH_MS                             2

 #define LA_PITCH_MAX                            ( LA_PITCH_MS * MAX_FS_KHZ )

--- a/silk/enc_API.c

+++ b/silk/enc_API.c

@@ -138,8 +138,8 @@

     opus_int   speech_act_thr_for_switch_Q8;

     opus_int32 TargetRate_bps, MStargetRates_bps[ 2 ], channelRate_bps, LBRR_symbol;

     silk_encoder *psEnc = ( silk_encoder * )encState;

-    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ ];

-    opus_int transition;

+    opus_int16 buf[ MAX_FRAME_LENGTH_MS * MAX_API_FS_KHZ + MAX_ENCODER_DELAY];

+    opus_int transition, delay;

     psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded = psEnc->state_Fxx[ 1 ].sCmn.nFramesEncoded = 0;

@@ -222,6 +222,7 @@

     silk_assert( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 0 ].sCmn.fs_kHz == psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

+    delay = psEnc->state_Fxx[ 0 ].sCmn.delay;

     /* Input buffering/resampling and encoding */

     while( 1 ) {

         nSamplesToBuffer  = psEnc->state_Fxx[ 0 ].sCmn.frame_length - psEnc->state_Fxx[ 0 ].sCmn.inputBufIx;

@@ -231,12 +232,15 @@

         if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) {

             int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded;

             for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n ] = samplesIn[ 2 * n ];

+                    buf[ n+delay ] = samplesIn[ 2 * n ];

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

             /* Making sure to start both resamplers from the same state when switching from mono to stereo */

             if(psEnc->nPrevChannelsInternal == 1 && id==0) {

-               silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));

+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state));

+               silk_memcpy( &psEnc->state_Fxx[ 1 ].sCmn.delayBuf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf, MAX_ENCODER_DELAY*sizeof(opus_int16));

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

@@ -245,23 +249,31 @@

             nSamplesToBuffer  = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx;

             nSamplesToBuffer  = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz );

             for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n ] = samplesIn[ 2 * n + 1 ];

+                    buf[ n+delay ] = samplesIn[ 2 * n + 1 ];

+            silk_memcpy(buf, &psEnc->state_Fxx[ 1 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

+            silk_memcpy(psEnc->state_Fxx[ 1 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 1 ].sCmn.inputBufIx += nSamplesToBuffer;

         } else if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 1 ) {

             /* Combine left and right channels before resampling */

             for( n = 0; n < nSamplesFromInput; n++ ) {

-                buf[ n ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );

+                buf[ n+delay ] = (opus_int16)silk_RSHIFT_ROUND( samplesIn[ 2 * n ] + samplesIn[ 2 * n + 1 ],  1 );

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

                 &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;

         } else {

             silk_assert( encControl->nChannelsAPI == 1 && encControl->nChannelsInternal == 1 );

+            silk_memcpy(buf+delay, samplesIn, nSamplesFromInput*sizeof(opus_int16));

+            silk_memcpy(buf, &psEnc->state_Fxx[ 0 ].sCmn.delayBuf[MAX_ENCODER_DELAY-delay], delay*sizeof(opus_int16));

             ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state,

-                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], samplesIn, nSamplesFromInput );

+                &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput );

+            silk_memcpy(psEnc->state_Fxx[ 0 ].sCmn.delayBuf, buf+nSamplesFromInput+delay-MAX_ENCODER_DELAY, MAX_ENCODER_DELAY*sizeof(opus_int16));

             psEnc->state_Fxx[ 0 ].sCmn.inputBufIx += nSamplesToBuffer;

--- a/silk/main.h

+++ b/silk/main.h

@@ -43,6 +43,8 @@

 /* Uncomment the next line to force a fixed internal sampling rate (independent of what bitrate is used */

 /*#define FORCE_INTERNAL_FS_KHZ       16*/

+/* Simple way to make [8000, 12000, 16000, 24000, 48000] to [0,1,2,3,4] */

+#define rateID(R) ( ( ( ((R)>>12) - ((R)>16000) ) >> ((R)>24000) ) - 1 )

 /* Convert Left/Right stereo signal to adaptive Mid/Side representation */

 void silk_stereo_LR_to_MS(

--- a/silk/structs.h

+++ b/silk/structs.h

@@ -149,6 +149,7 @@

     opus_int                         minInternal_fs_Hz;              /* Minimum internal sampling frequency (Hz)                             */

     opus_int                         desiredInternal_fs_Hz;          /* Soft request for internal sampling frequency (Hz)                    */

     opus_int                         fs_kHz;                         /* Internal sampling frequency (kHz)                                    */

+    opus_int                         delay;                          /* Number of samples of delay to apply */

     opus_int                         nb_subfr;                       /* Number of 5 ms subframes in a frame                                  */

     opus_int                         frame_length;                   /* Frame length (samples)                                               */

     opus_int                         subfr_length;                   /* Subframe length (samples)                                            */

@@ -192,6 +193,7 @@

     /* Input/output buffering */

     opus_int16                       inputBuf[ MAX_FRAME_LENGTH + 2 ]; /* Buffer containing input signal                                   */

+    opus_int16                       delayBuf[MAX_ENCODER_DELAY];

     opus_int                         inputBufIx;

     opus_int                         nFramesPerPacket;

     opus_int                         nFramesEncoded;                 /* Number of frames analyzed in current packet                          */

@@ -257,6 +259,8 @@

     opus_int32       sLPC_Q14[ MAX_SUB_FRAME_LENGTH + MAX_LPC_ORDER ];

     opus_int32       exc_Q10[ MAX_FRAME_LENGTH ];

     opus_int16       outBuf[ 2 * MAX_FRAME_LENGTH ];             /* Buffer for output signal                                             */

+    opus_int16       delayBuf[ MAX_DECODER_DELAY ];              /* Buffer for delaying the SILK output prior to resampling              */

+    opus_int         delay;                                      /* How much decoder delay to add                                        */

     opus_int         lagPrev;                                    /* Previous Lag                                                         */

     opus_int8        LastGainIndex;                              /* Previous gain index                                                  */

     opus_int         fs_kHz;                                     /* Sampling frequency in kHz                                            */

--- a/src/opus_encoder.c

+++ b/src/opus_encoder.c

@@ -123,6 +123,11 @@

       {  48000,      24000}, /* mono */

       {  48000,      24000}, /* stereo */

};

+static const int celt_delay_table[5] = {

+/* API 8  12  16  24  48 */

+      10, 16, 21, 27, 55

+};

 int opus_encoder_get_size(int channels)

     int silkEncSizeBytes, celtEncSizeBytes;

@@ -202,15 +207,9 @@

     st->encoder_buffer = st->Fs/100;

     st->delay_compensation = st->Fs/400;

-    /* This part is meant to compensate for the resampler delay as a function

-       of the API sampling rate */

-    if (st->Fs == 48000)

-        st->delay_compensation += 23;

-    else if (st->Fs == 24000)

-       st->delay_compensation += 15;

-    else

-       st->delay_compensation += 2;

+    st->delay_compensation += celt_delay_table[rateID(st->Fs)];

     st->hybrid_stereo_width_Q14             = 1 << 14;

     st->variable_HP_smth2_Q15 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );

     st->first = 1;

@@ -486,7 +485,7 @@

 #endif

-    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)

+    if (st->stream_channels == 1 && st->prev_channels ==2 && st->silk_mode.toMono==0)

        /* Delay stereo->mono transition by two frames so that SILK can do a smooth downmix */

        st->silk_mode.toMono = 1;