shithub: opus

Download patch

ref: e070300a341a605b3ed08baa642b661d2587a841
parent: 1e87fea32698ac3070ebf092d2ca08feae57373f
parent: bb4b6885a139644cf3ac14e7deda9f633ec2d93c
author: Jean-Marc Valin <[email protected]>
date: Wed Jul 30 09:41:28 EDT 2014

Merge remote-tracking branch 'origin/master' into derf_rtp_edits

Conflicts:
	doc/draft-ietf-payload-rtp-opus.xml

--- a/doc/draft-ietf-payload-rtp-opus.xml
+++ b/doc/draft-ietf-payload-rtp-opus.xml
@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE rfc SYSTEM "rfc2629.dtd" [
 <!ENTITY rfc2119 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml'>
+<!ENTITY rfc3389 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3389.xml'>
 <!ENTITY rfc3550 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml'>
 <!ENTITY rfc3711 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3711.xml'>
 <!ENTITY rfc3551 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3551.xml'>
@@ -17,7 +18,7 @@
 <!ENTITY nbsp "&#160;">
   ]>
 
-  <rfc category="std" ipr="trust200902" docName="draft-ietf-payload-rtp-opus-01">
+  <rfc category="std" ipr="trust200902" docName="draft-ietf-payload-rtp-opus-03">
 <?xml-stylesheet type='text/xsl' href='rfc2629.xslt' ?>
 
 <?rfc strict="yes" ?>
@@ -60,7 +61,7 @@
       <organization>Mozilla</organization>
       <address>
         <postal>
-          <street>650 Castro Street</street>
+          <street>331 E. Evelyn Avenue</street>
           <city>Mountain View</city>
           <region>CA</region>
           <code>94041</code>
@@ -70,7 +71,7 @@
       </address>
     </author>
 
-    <date day='14' month='January' year='2014' />
+    <date day='30' month='July' year='2014' />
 
     <abstract>
       <t>
@@ -247,7 +248,10 @@
             the Opus encoder can use discontinuous
             transmission (DTX), where parts of the encoded signal that
             correspond to periods of silence in the input speech or audio signal
-            are not transmitted to the receiver.
+            are not transmitted to the receiver. A receiver can distinguish
+            between DTX and packet loss by looking for gaps in the sequence
+            number, as described by Section 4.1
+            of&nbsp;<xref target="RFC3551"/>.
           </t>
 
           <t>
@@ -254,7 +258,12 @@
             On the receiving side, the non-transmitted parts will be handled by a
             frame loss concealment unit in the Opus decoder which generates a
             comfort noise signal to replace the non transmitted parts of the
-            speech or audio signal.
+            speech or audio signal. Use of <xref target="RFC3389"/> Comfort
+            Noise (CN) with Opus is discouraged.
+            The transmitter MUST drop whole frames only,
+            based on the size of the last transmitted frame,
+            to ensure successive RTP timestamps differ by a multiple of 120 and
+            to allow the receiver to use whole frames for concealment.
           </t>
 
           <t>
@@ -347,8 +356,9 @@
         therefore no padding is necessary. The payload MAY be padded by an
         integer number of octets according to <xref target="RFC3550"/>.</t>
 
-        <t>The marker bit (M) of the RTP header is used in accordance with
-        Section 4.1 of <xref target="RFC3551"/>.</t>
+        <t>The timestamp, sequence number, and marker bit (M) of the RTP header
+        are used in accordance with Section 4.1
+        of&nbsp;<xref target="RFC3551"/>.</t>
 
         <t>The RTP payload type for Opus has not been assigned statically and is
         expected to be assigned dynamically.</t>
@@ -738,8 +748,8 @@
 
         <t>Example 2: 16000 Hz clock rate, maximum packet size of 40 ms,
         recommended packet size of 40 ms, maximum average bitrate of 20000 bps,
-        prefers to receive stereo but only plans to send mono, FEC is allowed,
-        DTX is not allowed</t>
+        prefers to receive stereo but only plans to send mono, FEC is desired,
+        DTX is not desired</t>
 
         <figure>
           <artwork>
@@ -926,6 +936,7 @@
   <back>
     <references title="Normative References">
       &rfc2119;
+      &rfc3389;
       &rfc3550;
       &rfc3711;
       &rfc3551;
--- a/silk/CNG.c
+++ b/silk/CNG.c
@@ -34,7 +34,7 @@
 
 /* Generates excitation for CNG LPC synthesis */
 static OPUS_INLINE void silk_CNG_exc(
-    opus_int32                       residual_Q10[],     /* O    CNG residual signal Q10                     */
+    opus_int32                       exc_Q10[],          /* O    CNG excitation signal Q10                   */
     opus_int32                       exc_buf_Q14[],      /* I    Random samples buffer Q10                   */
     opus_int32                       Gain_Q16,           /* I    Gain to apply                               */
     opus_int                         length,             /* I    Length                                      */
@@ -55,7 +55,7 @@
         idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
         silk_assert( idx >= 0 );
         silk_assert( idx <= CNG_BUF_MASK_MAX );
-        residual_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
+        exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) );
     }
     *rand_seed = seed;
 }
@@ -85,7 +85,7 @@
 )
 {
     opus_int   i, subfr;
-    opus_int32 sum_Q6, max_Gain_Q16;
+    opus_int32 sum_Q6, max_Gain_Q16, gain_Q16;
     opus_int16 A_Q12[ MAX_LPC_ORDER ];
     silk_CNG_struct *psCNG = &psDec->sCNG;
     SAVE_STACK;
@@ -125,11 +125,20 @@
     /* Add CNG when packet is lost or during DTX */
     if( psDec->lossCnt ) {
         VARDECL( opus_int32, CNG_sig_Q10 );
-
         ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 );
 
         /* Generate CNG excitation */
-        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, psCNG->CNG_smth_Gain_Q16, length, &psCNG->rand_seed );
+		gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
+		if( gain_Q16 >= (1 << 21) || psCNG->CNG_smth_Gain_Q16 > (1 << 23) ) {
+			gain_Q16 = silk_SMULTT( gain_Q16, gain_Q16 );
+			gain_Q16 = silk_SUB_LSHIFT32(silk_SMULTT( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+			gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 16 );
+		} else {
+			gain_Q16 = silk_SMULWW( gain_Q16, gain_Q16 );
+			gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
+			gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
+		}
+        silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed );
 
         /* Convert CNG NLSF to filter representation */
         silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
@@ -162,7 +171,7 @@
             /* Update states */
             CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 );
 
-            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( sum_Q6, 6 ) );
+            frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) );
         }
         silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
     } else {
--- a/silk/decode_frame.c
+++ b/silk/decode_frame.c
@@ -107,15 +107,15 @@
     silk_memmove( psDec->outBuf, &psDec->outBuf[ psDec->frame_length ], mv_len * sizeof(opus_int16) );
     silk_memcpy( &psDec->outBuf[ mv_len ], pOut, psDec->frame_length * sizeof( opus_int16 ) );
 
-    /****************************************************************/
-    /* Ensure smooth connection of extrapolated and good frames     */
-    /****************************************************************/
-    silk_PLC_glue_frames( psDec, pOut, L );
+    /************************************************/
+    /* Comfort noise generation / estimation        */
+    /************************************************/
+    silk_CNG( psDec, psDecCtrl, pOut, L );
 
-    /************************************************/
-    /* Comfort noise generation / estimation        */
-    /************************************************/
-    silk_CNG( psDec, psDecCtrl, pOut, L );
+    /****************************************************************/
+    /* Ensure smooth connection of extrapolated and good frames     */
+    /****************************************************************/
+    silk_PLC_glue_frames( psDec, pOut, L );
 
     /* Update some decoder state variables */
     psDec->lagPrev = psDecCtrl->pitchL[ psDec->nb_subfr - 1 ];
--- a/silk/fixed/burg_modified_FIX.c
+++ b/silk/fixed/burg_modified_FIX.c
@@ -54,7 +54,7 @@
     int                         arch                /* I    Run-time architecture                                       */
 )
 {
-    opus_int         k, n, s, lz, rshifts, rshifts_extra, reached_max_gain;
+    opus_int         k, n, s, lz, rshifts, reached_max_gain;
     opus_int32       C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2;
     const opus_int16 *x_ptr;
     opus_int32       C_first_row[ SILK_MAX_ORDER_LPC ];
@@ -63,27 +63,23 @@
     opus_int32       CAf[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       CAb[ SILK_MAX_ORDER_LPC + 1 ];
     opus_int32       xcorr[ SILK_MAX_ORDER_LPC ];
+    opus_int64       C0_64;
 
     silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
 
     /* Compute autocorrelations, added over subframes */
-    silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length );
-    if( rshifts > MAX_RSHIFTS ) {
-        C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS );
-        silk_assert( C0 > 0 );
-        rshifts = MAX_RSHIFTS;
+    C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr );
+    lz = silk_CLZ64(C0_64);
+    rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz;
+    if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS;
+    if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS;
+    
+    if (rshifts > 0) {
+        C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts );        
     } else {
-        lz = silk_CLZ32( C0 ) - 1;
-        rshifts_extra = N_BITS_HEAD_ROOM - lz;
-        if( rshifts_extra > 0 ) {
-            rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts );
-            C0 = silk_RSHIFT32( C0, rshifts_extra );
-        } else {
-            rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts );
-            C0 = silk_LSHIFT32( C0, -rshifts_extra );
-        }
-        rshifts += rshifts_extra;
+        C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts );
     }
+    
     CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1;                                /* Q(-rshifts) */
     silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) );
     if( rshifts > 0 ) {
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -591,7 +591,7 @@
    return st;
 }
 
-static void surround_rate_allocation(
+static opus_int32 surround_rate_allocation(
       OpusMSEncoder *st,
       opus_int32 *rate,
       int frame_size
@@ -605,6 +605,7 @@
    int lfe_offset;
    int coupled_ratio; /* Q8 */
    int lfe_ratio;     /* Q8 */
+   opus_int32 rate_sum=0;
 
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    opus_encoder_ctl((OpusEncoder*)ptr, OPUS_GET_SAMPLE_RATE(&Fs));
@@ -660,7 +661,10 @@
          rate[i] = stream_offset+channel_rate;
       else
          rate[i] = lfe_offset+(channel_rate*lfe_ratio>>8);
+      rate[i] = IMAX(rate[i], 500);
+      rate_sum += rate[i];
    }
+   return rate_sum;
 }
 
 /* Max size in case the encoder decides to return three frames */
@@ -695,6 +699,8 @@
    opus_val32 *mem = NULL;
    opus_val32 *preemph_mem=NULL;
    int frame_size;
+   opus_int32 rate_sum;
+   opus_int32 smallest_packet;
    ALLOC_STACK;
 
    if (st->surround)
@@ -738,6 +744,18 @@
       RESTORE_STACK;
       return OPUS_BAD_ARG;
    }
+   /* Estimate (slightly overestimating) of the smallest packet the encoder can produce. */
+   if (50*frame_size <= Fs)
+   {
+      smallest_packet = st->layout.nb_streams*4;
+   } else {
+      smallest_packet = st->layout.nb_streams*4*50*frame_size/Fs;
+   }
+   if (max_data_bytes < smallest_packet)
+   {
+      RESTORE_STACK;
+      return OPUS_BAD_ARG;
+   }
    ALLOC(buf, 2*frame_size, opus_val16);
    coupled_size = opus_encoder_get_size(2);
    mono_size = opus_encoder_get_size(1);
@@ -755,11 +773,19 @@
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
-   surround_rate_allocation(st, bitrates, frame_size);
+   rate_sum = surround_rate_allocation(st, bitrates, frame_size);
 
    if (!vbr)
-      max_data_bytes = IMIN(max_data_bytes, 3*st->bitrate_bps/(3*8*Fs/frame_size));
-
+   {
+      if (st->bitrate_bps == OPUS_AUTO)
+      {
+         max_data_bytes = IMIN(max_data_bytes, 3*rate_sum/(3*8*Fs/frame_size));
+      } else if (st->bitrate_bps != OPUS_BITRATE_MAX)
+      {
+         max_data_bytes = IMIN(max_data_bytes, IMAX(smallest_packet,
+                          3*st->bitrate_bps/(3*8*Fs/frame_size)));
+      }
+   }
    ptr = (char*)st + align(sizeof(OpusMSEncoder));
    for (s=0;s<st->layout.nb_streams;s++)
    {
--- a/win32/genversion.bat
+++ b/win32/genversion.bat
@@ -23,10 +23,10 @@
 
 :gotversion
 
-set version_out=#define %2 "%version%"
-set version_mk=%2 = "%version%"
+set version_out=#define %~2 "%version%"
+set version_mk=%~2 = "%version%"
 
-echo %version_out%> "%1_temp"
+echo %version_out%> "%~1_temp"
 
 if %version%==unknown goto :skipgenerate
 
@@ -35,12 +35,12 @@
 
 :skipgenerate
 
-echo n | comp "%1_temp" "%1" > NUL 2> NUL
+echo n | comp "%~1_temp" "%~1" > NUL 2> NUL
 
 if not errorlevel 1 goto exit
 
-copy /y "%1_temp" "%1"
+copy /y "%~1_temp" "%~1"
 
 :exit
 
-del "%1_temp"
+del "%~1_temp"