ref: 3b2aee062d08fc72f12a4379ce2d6e2b444d5021
parent: e53ebd696a34e2a74b1ccb5132a946b7c965f448
author: Jean-Marc Valin <[email protected]>
date: Fri Oct 21 19:21:34 EDT 2011
Implements glitchless switching between SILK bandwidths Only encoder changes were necessary because this uses the same "redundant frames" mechanism as SILK<->CELT switching. This also fixes a regression introduced in 78291b27 that was causing the encoder to go back and forth between bandwidths when SILK wasn't ready to change.
--- a/silk/control.h
+++ b/silk/control.h
@@ -89,6 +89,9 @@
/* I: Causes a smooth downmix to mono */
opus_int toMono;
+ /* I: Opus encoder is allowing us to switch bandwidth */
+ opus_int opusCanSwitch;
+
/* O: Internal sampling rate used, in Hertz; 8000/12000/16000 */
opus_int32 internalSampleRate;
@@ -100,6 +103,10 @@
/* O: Stereo width */
opus_int stereoWidth_Q14;
+
+ /* O: Tells the Opus encoder we're ready to switch */
+ opus_int switchReady;
+
} silk_EncControlStruct;
/**************************************************************************/
--- a/silk/control_audio_bandwidth.c
+++ b/silk/control_audio_bandwidth.c
@@ -34,7 +34,8 @@
/* Control internal sampling rate */
opus_int silk_control_audio_bandwidth(
- silk_encoder_state *psEncC /* I/O Pointer to Silk encoder state */
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl /* I: Control structure */
)
{
opus_int fs_kHz;
@@ -58,7 +59,7 @@
/* Stop transition phase */
psEncC->sLP.mode = 0;
}
- if( psEncC->allow_bandwidth_switch ) {
+ if( psEncC->allow_bandwidth_switch || encControl->opusCanSwitch ) {
/* Check if we should switch down */
if( silk_SMULBB( psEncC->fs_kHz, 1000 ) > psEncC->desiredInternal_fs_Hz )
{
@@ -70,7 +71,7 @@
/* Reset transition filter state */
silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
}
- if( psEncC->sLP.transition_frame_no <= 0 ) {
+ if (encControl->opusCanSwitch) {
/* Stop transition phase */
psEncC->sLP.mode = 0;
@@ -77,8 +78,12 @@
/* Switch to a lower sample frequency */
fs_kHz = psEncC->fs_kHz == 16 ? 12 : 8;
} else {
- /* Direction: down (at double speed) */
- psEncC->sLP.mode = -2;
+ if( psEncC->sLP.transition_frame_no <= 0 ) {
+ encControl->switchReady = 1;
+ } else {
+ /* Direction: down (at double speed) */
+ psEncC->sLP.mode = -2;
+ }
}
}
else
@@ -86,7 +91,7 @@
if( silk_SMULBB( psEncC->fs_kHz, 1000 ) < psEncC->desiredInternal_fs_Hz )
{
/* Switch up */
- if( psEncC->sLP.mode == 0 ) {
+ if (encControl->opusCanSwitch) {
/* Switch to a higher sample frequency */
fs_kHz = psEncC->fs_kHz == 8 ? 12 : 16;
@@ -95,9 +100,17 @@
/* Reset transition filter state */
silk_memset( psEncC->sLP.In_LP_State, 0, sizeof( psEncC->sLP.In_LP_State ) );
+
+ /* Direction: up */
+ psEncC->sLP.mode = 1;
+ } else {
+ if( psEncC->sLP.mode == 0 ) {
+ encControl->switchReady = 1;
+ } else {
+ /* Direction: up */
+ psEncC->sLP.mode = 1;
+ }
}
- /* Direction: up */
- psEncC->sLP.mode = 1;
}
}
}
--- a/silk/control_codec.c
+++ b/silk/control_codec.c
@@ -104,7 +104,7 @@
/********************************************/
/* Determine internal sampling rate */
/********************************************/
- fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn );
+ fs_kHz = silk_control_audio_bandwidth( &psEnc->sCmn, encControl );
if (force_fs_kHz)
fs_kHz = force_fs_kHz;
/********************************************/
--- a/silk/enc_API.c
+++ b/silk/enc_API.c
@@ -149,6 +149,8 @@
return ret;
}
+ encControl->switchReady = 0;
+
if( encControl->nChannelsInternal > psEnc->nChannelsInternal ) {
/* Mono -> Stereo transition: init state of second channel and stereo state */
ret += silk_init_encoder( &psEnc->state_Fxx[ 1 ] );
--- a/silk/main.h
+++ b/silk/main.h
@@ -138,7 +138,8 @@
/* Control internal sampling rate */
opus_int silk_control_audio_bandwidth(
- silk_encoder_state *psEncC /* I/O Pointer to Silk encoder state */
+ silk_encoder_state *psEncC, /* I/O Pointer to Silk encoder state */
+ silk_EncControlStruct *encControl /* I: Control structure */
);
/* Control SNR of redidual quantizer */
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -79,6 +79,7 @@
int prev_channels;
int prev_framesize;
int bandwidth;
+ int silk_bw_switch;
/* Sampling rate (at the API level) */
int first;
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
@@ -466,6 +467,7 @@
int delay_compensation;
int frame_rate;
opus_int32 max_rate;
+ int curr_bandwidth;
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -612,6 +614,13 @@
}
}
}
+ if (st->silk_bw_switch)
+ {
+ redundancy = 1;
+ celt_to_silk = 1;
+ st->silk_bw_switch = 0;
+ }
+
if (st->mode != MODE_CELT_ONLY && st->prev_mode == MODE_CELT_ONLY)
{
silk_EncControlStruct dummy;
@@ -685,6 +694,10 @@
if (max_data_bytes < 8000*frame_size / (st->Fs * 8))
st->mode = MODE_CELT_ONLY;
+ /* CELT mode doesn't support mediumband, use wideband instead */
+ if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
+ st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+
/* Can't support higher than wideband for >20 ms frames */
if (frame_size > st->Fs/50 && (st->mode == MODE_CELT_ONLY || st->bandwidth > OPUS_BANDWIDTH_WIDEBAND))
{
@@ -736,18 +749,17 @@
RESTORE_STACK;
return ret;
}
- /* CELT mode doesn't support mediumband, use wideband instead */
- if (st->mode == MODE_CELT_ONLY && st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND)
- st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ curr_bandwidth = st->bandwidth;
+
/* Chooses the appropriate mode for speech
*NEVER* switch to/from CELT-only mode here as this will invalidate some assumptions */
- if (st->mode == MODE_SILK_ONLY && st->bandwidth > OPUS_BANDWIDTH_WIDEBAND)
+ if (st->mode == MODE_SILK_ONLY && curr_bandwidth > OPUS_BANDWIDTH_WIDEBAND)
st->mode = MODE_HYBRID;
- if (st->mode == MODE_HYBRID && st->bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
+ if (st->mode == MODE_HYBRID && curr_bandwidth <= OPUS_BANDWIDTH_WIDEBAND)
st->mode = MODE_SILK_ONLY;
- /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, st->bandwidth); */
+ /* printf("%d %d %d %d\n", st->bitrate_bps, st->stream_channels, st->mode, curr_bandwidth); */
bytes_target = IMIN(max_data_bytes, st->bitrate_bps * frame_size / (st->Fs * 8)) - 1;
data += 1;
@@ -789,7 +801,7 @@
st->silk_mode.bitRate = st->bitrate_bps - 8*st->Fs/frame_size;
if( st->mode == MODE_HYBRID ) {
st->silk_mode.bitRate /= st->stream_channels;
- if( st->bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
+ if( curr_bandwidth == OPUS_BANDWIDTH_SUPERWIDEBAND ) {
if( st->Fs == 100 * frame_size ) {
/* 24 kHz, 10 ms */
st->silk_mode.bitRate = ( ( st->silk_mode.bitRate + 2000 + st->use_vbr * 1000 ) * 2 ) / 3;
@@ -816,12 +828,12 @@
st->silk_mode.payloadSize_ms = 1000 * frame_size / st->Fs;
st->silk_mode.nChannelsAPI = st->channels;
st->silk_mode.nChannelsInternal = st->stream_channels;
- if (st->bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
+ if (curr_bandwidth == OPUS_BANDWIDTH_NARROWBAND) {
st->silk_mode.desiredInternalSampleRate = 8000;
- } else if (st->bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
+ } else if (curr_bandwidth == OPUS_BANDWIDTH_MEDIUMBAND) {
st->silk_mode.desiredInternalSampleRate = 12000;
} else {
- silk_assert( st->mode == MODE_HYBRID || st->bandwidth == OPUS_BANDWIDTH_WIDEBAND );
+ silk_assert( st->mode == MODE_HYBRID || curr_bandwidth == OPUS_BANDWIDTH_WIDEBAND );
st->silk_mode.desiredInternalSampleRate = 16000;
}
if( st->mode == MODE_HYBRID ) {
@@ -891,7 +903,7 @@
}
if (nBytes==0)
{
- data[-1] = gen_toc(st->mode, st->Fs/frame_size, st->bandwidth, st->stream_channels);
+ data[-1] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
RESTORE_STACK;
return 1;
}
@@ -898,15 +910,23 @@
/* Extract SILK internal bandwidth for signaling in first byte */
if( st->mode == MODE_SILK_ONLY ) {
if( st->silk_mode.internalSampleRate == 8000 ) {
- st->bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ curr_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
} else if( st->silk_mode.internalSampleRate == 12000 ) {
- st->bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ curr_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
} else if( st->silk_mode.internalSampleRate == 16000 ) {
- st->bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ curr_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
}
} else {
silk_assert( st->silk_mode.internalSampleRate == 16000 );
}
+
+ st->silk_mode.opusCanSwitch = st->silk_mode.switchReady;
+ if (st->silk_mode.opusCanSwitch)
+ {
+ redundancy = 1;
+ celt_to_silk = 0;
+ st->silk_bw_switch = 1;
+ }
}
/* CELT processing */
@@ -913,7 +933,7 @@
{
int endband=21;
- switch(st->bandwidth)
+ switch(curr_bandwidth)
{
case OPUS_BANDWIDTH_NARROWBAND:
endband = 13;
@@ -1029,6 +1049,9 @@
redundancy = 0;
}
+ if (!redundancy)
+ st->silk_bw_switch = 0;
+
if (st->mode != MODE_CELT_ONLY)start_band=17;
if (st->mode == MODE_SILK_ONLY)
@@ -1101,7 +1124,7 @@
/* Signalling the mode in the first byte */
data--;
- data[0] = gen_toc(st->mode, st->Fs/frame_size, st->bandwidth, st->stream_channels);
+ data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels);
st->rangeFinal = enc.rng ^ redundant_rng;