shithub: opus

Download patch

ref: 00cb6f7ab4a4ac55ee1b74fc8db6bde033f74038
parent: 541df0a97e44a9c4a71bd01248f21e3c5284a733
author: Jean-Marc Valin <[email protected]>
date: Thu May 26 11:06:30 EDT 2011

splitting encoder config in terms of application and signal type

--- a/src/opus.h
+++ b/src/opus.h
@@ -65,9 +65,13 @@
 
 #define OPUS_BITRATE_AUTO       -1
 
-#define OPUS_MODE_VOICE         2000
-#define OPUS_MODE_AUDIO         2001
+#define OPUS_APPLICATION_VOIP        2000
+#define OPUS_APPLICATION_AUDIO       2001
 
+#define OPUS_SIGNAL_AUTO             3000
+#define OPUS_SIGNAL_VOICE            3001
+#define OPUS_SIGNAL_MUSIC            3002
+
 #define MODE_SILK_ONLY          1000
 #define MODE_HYBRID             1001
 #define MODE_CELT_ONLY          1002
@@ -136,6 +140,11 @@
 #define OPUS_GET_FORCE_MONO_REQUEST 23
 #define OPUS_GET_FORCE_MONO(x) OPUS_GET_FORCE_MONO_REQUEST, __check_int_ptr(x)
 
+#define OPUS_SET_SIGNAL_REQUEST 24
+#define OPUS_SET_SIGNAL(x) OPUS_SET_SIGNAL_REQUEST, __check_int(x)
+#define OPUS_GET_SIGNAL_REQUEST 25
+#define OPUS_GET_SIGNAL(x) OPUS_GET_SIGNAL_REQUEST, __check_int_ptr(x)
+
 typedef struct OpusEncoder OpusEncoder;
 typedef struct OpusDecoder OpusDecoder;
 
@@ -155,7 +164,7 @@
 OPUS_EXPORT OpusEncoder *opus_encoder_create(
     int Fs,                     /* Sampling rate of input signal (Hz) */
     int channels,               /* Number of channels (1/2) in input signal */
-    int mode                    /* Coding mode (OPUS_MODE_VOICE/OPUS_MODE_AUDIO) */
+    int application                    /* Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO) */
 );
 
 OPUS_EXPORT OpusEncoder *opus_encoder_init(
@@ -162,7 +171,7 @@
     OpusEncoder *st,            /* Encoder state */
     int Fs,                     /* Sampling rate of input signal (Hz) */
     int channels,               /* Number of channels (1/2) in input signal */
-    int mode                    /* Coding mode (OPUS_MODE_VOICE/OPUS_MODE_AUDIO) */
+    int application                    /* Coding mode (OPUS_APPLICATION_VOIP/OPUS_APPLICATION_AUDIO) */
 );
 
 /* returns length of data payload (in bytes) */
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -75,7 +75,7 @@
 
 }
 
-OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels, int mode)
+OpusEncoder *opus_encoder_init(OpusEncoder* st, int Fs, int channels, int application)
 {
 	void *silk_enc;
 	CELTEncoder *celt_enc;
@@ -131,7 +131,8 @@
 	st->use_vbr = 0;
     st->user_bitrate_bps = OPUS_BITRATE_AUTO;
 	st->bitrate_bps = 3000+Fs*channels;
-	st->user_mode = mode;
+	st->user_mode = application;
+	st->signal_type = OPUS_SIGNAL_AUTO;
 	st->user_bandwidth = OPUS_BANDWIDTH_AUTO;
 	st->voice_ratio = 90;
 	st->first = 1;
@@ -209,19 +210,46 @@
     /* Equivalent bit-rate for mono */
     mono_rate = st->bitrate_bps;
     if (st->stream_channels==2)
-        mono_rate = (mono_rate+10000)/2;
+        mono_rate = 2*mono_rate/3;
     /* Compensate for smaller frame sizes assuming an equivalent overhead
        of 60 bits/frame */
     mono_rate -= 60*(st->Fs/frame_size - 50);
 
-    /* Mode selection */
-    if (st->user_mode==OPUS_MODE_VOICE)
+    /* Mode selection depending on application and signal type */
+    if (st->user_mode==OPUS_APPLICATION_VOIP)
     {
-        st->mode = MODE_SILK_ONLY;
-    } else {/* OPUS_AUDIO_MODE */
-        st->mode = MODE_CELT_ONLY;
-    }
+        celt_int32 threshold = 20000;
+        /* Hysteresis */
+        if (st->prev_mode == MODE_CELT_ONLY)
+            threshold -= 4000;
+        else if (st->prev_mode>0)
+            threshold += 4000;
 
+        /* OPUS_APPLICATION_VOIP defaults to MODE_SILK_ONLY */
+        if (st->signal_type == OPUS_SIGNAL_MUSIC && mono_rate > threshold)
+            st->mode = MODE_CELT_ONLY;
+        else
+            st->mode = MODE_SILK_ONLY;
+    } else {/* OPUS_APPLICATION_AUDIO */
+        celt_int32 threshold;
+        /* SILK/CELT threshold is higher for voice than for music */
+        threshold = 36000;
+        if (st->signal_type == OPUS_SIGNAL_MUSIC)
+            threshold -= 20000;
+        else if (st->signal_type == OPUS_SIGNAL_VOICE)
+            threshold += 8000;
+
+        /* Hysteresis */
+        if (st->prev_mode == MODE_CELT_ONLY)
+            threshold -= 4000;
+        else if (st->prev_mode>0)
+            threshold += 4000;
+
+        if (mono_rate>threshold)
+            st->mode = MODE_CELT_ONLY;
+        else
+            st->mode = MODE_SILK_ONLY;
+    }
     /* Automatic (rate-dependent) bandwidth selection */
     if (st->mode == MODE_CELT_ONLY || st->first || st->silk_mode.allowBandwidthSwitch)
     {
@@ -751,6 +779,18 @@
         {
             int *value = va_arg(ap, int*);
             *value = st->vbr_constraint;
+        }
+        break;
+        case OPUS_SET_SIGNAL_REQUEST:
+        {
+            int value = va_arg(ap, int);
+            st->signal_type = value;
+        }
+        break;
+        case OPUS_GET_SIGNAL_REQUEST:
+        {
+            int *value = va_arg(ap, int*);
+            *value = st->signal_type;
         }
         break;
         default:
--- a/src/opus_encoder.h
+++ b/src/opus_encoder.h
@@ -47,6 +47,7 @@
     int          mode;
     int          user_mode;
     int          prev_mode;
+    int          signal_type;
 	int          bandwidth;
 	int          user_bandwidth;
 	int          voice_ratio;
--- a/src/test_opus.c
+++ b/src/test_opus.c
@@ -42,9 +42,9 @@
 
 void print_usage( char* argv[] ) 
 {
-    fprintf(stderr, "Usage: %s <mode (0/1)> <sampling rate (Hz)> <channels (1/2)> "
+    fprintf(stderr, "Usage: %s <application (0/1)> <sampling rate (Hz)> <channels (1/2)> "
         "<bits per second>  [options] <input> <output>\n\n", argv[0]);
-    fprintf(stderr, "mode: 0 for voice, 1 for audio:\n" );
+    fprintf(stderr, "mode: 0 for VoIP, 1 for audio:\n" );
     fprintf(stderr, "options:\n" );
     fprintf(stderr, "-cbr                 : enable constant bitrate; default: variable bitrate\n" );
     fprintf(stderr, "-cvbr                : enable constraint variable bitrate; default: unconstraint\n" );
@@ -92,7 +92,7 @@
    int stop=0;
    int tot_read=0, tot_written=0;
    short *in, *out;
-   int mode;
+   int application;
    double bits=0.0, bits_act=0.0, bits2=0.0, nrg;
    int bandwidth=-1;
    const char *bandwidth_string;
@@ -107,7 +107,7 @@
       return 1;
    }
 
-   mode = atoi(argv[1]) + OPUS_MODE_VOICE;
+   application = atoi(argv[1]) + OPUS_APPLICATION_VOIP;
    sampling_rate = atoi(argv[2]);
    channels = atoi(argv[3]);
    bitrate_bps = atoi(argv[4]);
@@ -200,7 +200,7 @@
         }
    }
 
-   if( mode < OPUS_MODE_VOICE || mode > OPUS_MODE_AUDIO) {
+   if( application < OPUS_APPLICATION_VOIP || application > OPUS_APPLICATION_AUDIO) {
       fprintf (stderr, "mode must be: 0 or 1\n");
       return 1;
    }
@@ -233,7 +233,7 @@
       return 1;
    }
 
-   enc = opus_encoder_create(sampling_rate, channels, mode);
+   enc = opus_encoder_create(sampling_rate, channels, application);
    dec = opus_decoder_create(sampling_rate, channels);
 
    if (enc==NULL)