ref: a4dccd3f659eb60b8b3caa8b04688246851454bf
parent: 7a8b1399d24e2da23350d1ce219bdd3fcef26183
author: Jean-Marc Valin <[email protected]>
date: Sat May 4 19:54:20 EDT 2013
Implements basic surround masking The idea is that the rate of each stream is adjusted based on its contribution to the total energy of a stereo downmix.
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -110,6 +110,12 @@
#define OPUS_SET_LFE_REQUEST 10024
#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
+#define OPUS_SET_ENERGY_SAVE_REQUEST 10026
+#define OPUS_SET_ENERGY_SAVE(x) OPUS_SET_ENERGY_SAVE_REQUEST, __opus_check_val16_ptr(x)
+
+#define OPUS_SET_ENERGY_MASK_REQUEST 10028
+#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
+
/* Encoder stuff */
int celt_encoder_get_size(int channels);
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -109,6 +109,8 @@
opus_val16 overlap_max;
opus_val16 stereo_saving;
int intensity;
+ opus_val16 *energy_save;
+ opus_val16 *energy_mask;
#ifdef RESYNTH
/* +MAX_PERIOD/2 to make space for overlap */
@@ -1165,6 +1167,7 @@
int secondMdct;
int signalBandwidth;
int transient_got_disabled=0;
+ opus_val16 surround_masking=0;
ALLOC_STACK;
mode = st->mode;
@@ -1397,6 +1400,27 @@
bandE[i] = IMIN(bandE[i], MULT16_32_Q15(QCONST16(1e-4f,15),bandE[0]));
}
amp2Log2(mode, effEnd, st->end, bandE, bandLogE, C);
+ if (st->energy_save)
+ {
+ opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+#ifdef FIXED_POINT
+ /* Compensate for the 1/8 gain we apply in the fixed-point downshift to avoid overflows. */
+ offset -= QCONST16(3.0f, DB_SHIFT);
+#endif
+ for(i=0;i<C*nbEBands;i++)
+ st->energy_save[i]=bandLogE[i]-offset;
+ st->energy_save=NULL;
+ }
+ if (st->energy_mask&&!st->lfe)
+ {
+ opus_val32 mask_avg=0;
+ opus_val16 offset = shortBlocks?HALF16(SHL16(LM, DB_SHIFT)):0;
+ for (c=0;c<C;c++)
+ for(i=0;i<st->end;i++)
+ mask_avg += bandLogE[nbEBands*c+i]-offset-st->energy_mask[nbEBands*c+i];
+ surround_masking = DIV32_16(mask_avg,C*st->end) + QCONST16(.0f, DB_SHIFT);
+ surround_masking = MIN16(MAX16(surround_masking,-QCONST16(1.5f, DB_SHIFT)), 0);
+ }
/*for (i=0;i<21;i++)
printf("%f ", bandLogE[i]);
printf("\n");*/
@@ -1625,7 +1649,7 @@
max_frac = DIV32_16(MULT16_16(QCONST16(0.8f, 15), coded_stereo_dof), coded_bins);
/*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
target -= (opus_int32)MIN32(MULT16_32_Q15(max_frac,target),
- SHR16(MULT16_16(st->stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
+ SHR32(MULT16_16(st->stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8));
}
/* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */
target += tot_boost-(16<<LM);
@@ -1649,7 +1673,14 @@
}
#endif
+ if (st->energy_mask&&!st->lfe)
{
+ opus_int32 surround_target = target + SHR32(MULT16_16(surround_masking,coded_bins<<BITRES), DB_SHIFT);
+ /*printf("%f %d %d %d %d %d %d ", surround_masking, coded_bins, st->end, st->intensity, surround_target, target, st->bitrate);*/
+ target = IMAX(target/4, surround_target);
+ }
+
+ {
opus_int32 floor_depth;
int bins;
bins = eBands[nbEBands-2]<<LM;
@@ -1660,7 +1691,7 @@
/*printf("%f %d\n", maxDepth, floor_depth);*/
}
- if (st->constrained_vbr || st->bitrate<64000)
+ if ((!st->energy_mask||st->lfe) && (st->constrained_vbr || st->bitrate<64000))
{
opus_val16 rate_factor;
#ifdef FIXED_POINT
@@ -1759,7 +1790,10 @@
codedBands = compute_allocation(mode, st->start, st->end, offsets, cap,
alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands, signalBandwidth);
- st->lastCodedBands = codedBands;
+ if (st->lastCodedBands)
+ st->lastCodedBands = IMIN(st->lastCodedBands+1,IMAX(st->lastCodedBands-1,codedBands));
+ else
+ st->lastCodedBands = codedBands;
quant_fine_energy(mode, st->start, st->end, oldBandE, error, fine_quant, enc, C);
@@ -2149,6 +2183,18 @@
{
opus_int32 value = va_arg(ap, opus_int32);
st->lfe = value;
+ }
+ break;
+ case OPUS_SET_ENERGY_SAVE_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ st->energy_save=value;
+ }
+ break;
+ case OPUS_SET_ENERGY_MASK_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ st->energy_mask = value;
}
break;
default:
--- a/include/opus_defines.h
+++ b/include/opus_defines.h
@@ -158,6 +158,7 @@
#define __opus_check_int(x) (((void)((x) == (opus_int32)0)), (opus_int32)(x))
#define __opus_check_int_ptr(ptr) ((ptr) + ((ptr) - (opus_int32*)(ptr)))
#define __opus_check_uint_ptr(ptr) ((ptr) + ((ptr) - (opus_uint32*)(ptr)))
+#define __opus_check_val16_ptr(ptr) ((ptr) + ((ptr) - (opus_val16*)(ptr)))
/** @endcond */
/** @defgroup opus_ctlvalues Pre-defined values for CTL interface
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -94,6 +94,7 @@
int silk_bw_switch;
/* Sampling rate (at the API level) */
int first;
+ int energy_masking;
StereoWidthState width_mem;
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
#ifndef FIXED_POINT
@@ -1602,7 +1603,7 @@
st->prev_HB_gain = HB_gain;
if (st->mode != MODE_HYBRID || st->stream_channels==1)
st->silk_mode.stereoWidth_Q14 = IMIN((1<<14),IMAX(0,st->bitrate_bps-32000));
- if( st->channels == 2 ) {
+ if( !st->energy_masking && st->channels == 2 ) {
/* Apply stereo width reduction (at low bitrates) */
if( st->hybrid_stereo_width_Q14 < (1 << 14) || st->silk_mode.stereoWidth_Q14 < (1 << 14) ) {
opus_val16 g1, g2;
@@ -2214,6 +2215,19 @@
opus_int32 value = va_arg(ap, opus_int32);
st->lfe = value;
celt_encoder_ctl(celt_enc, OPUS_SET_LFE(value));
+ }
+ break;
+ case OPUS_SET_ENERGY_SAVE_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_SAVE(value));
+ }
+ break;
+ case OPUS_SET_ENERGY_MASK_REQUEST:
+ {
+ opus_val16 *value = va_arg(ap, opus_val16*);
+ st->energy_masking = (value!=NULL);
+ celt_encoder_ctl(celt_enc, OPUS_SET_ENERGY_MASK(value));
}
break;
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -61,6 +61,7 @@
ChannelLayout layout;
int lfe_stream;
int variable_duration;
+ int surround;
opus_int32 bitrate_bps;
opus_val32 subframe_mem[3];
/* Encoder states go here */
@@ -104,6 +105,7 @@
{
int nb_streams;
int nb_coupled_streams;
+ opus_int32 size;
if (mapping_family==0)
{
@@ -127,7 +129,10 @@
nb_coupled_streams=0;
} else
return 0;
- return opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
+ size = opus_multistream_encoder_get_size(nb_streams, nb_coupled_streams);
+ if (channels>2)
+ size += align(opus_encoder_get_size(2));
+ return size;
}
@@ -171,9 +176,9 @@
for (i=0;i<st->layout.nb_coupled_streams;i++)
{
ret = opus_encoder_init((OpusEncoder*)ptr, Fs, 2, application);
+ if(ret!=OPUS_OK)return ret;
if (i==st->lfe_stream)
opus_encoder_ctl((OpusEncoder*)ptr, OPUS_SET_LFE(1));
- if(ret!=OPUS_OK)return ret;
ptr += align(coupled_size);
}
for (;i<st->layout.nb_streams;i++)
@@ -184,6 +189,14 @@
if(ret!=OPUS_OK)return ret;
ptr += align(mono_size);
}
+ if (surround && st->layout.nb_channels>2)
+ {
+ OpusEncoder *downmix_enc;
+ downmix_enc = (OpusEncoder*)ptr;
+ ret = opus_encoder_init(downmix_enc, Fs, 2, OPUS_APPLICATION_AUDIO);
+ if(ret!=OPUS_OK)return ret;
+ }
+ st->surround = surround;
return OPUS_OK;
}
@@ -332,6 +345,13 @@
int frame_size
);
+typedef void (*opus_surround_downmix_funct)(
+ opus_val16 *dst,
+ const void *src,
+ int channels,
+ int frame_size
+);
+
static void surround_rate_allocation(
OpusMSEncoder *st,
opus_int32 *rate,
@@ -398,7 +418,8 @@
int frame_size,
unsigned char *data,
opus_int32 max_data_bytes,
- int lsb_depth
+ int lsb_depth,
+ opus_surround_downmix_funct surround_downmix
#ifndef FIXED_POINT
, downmix_func downmix
, const void *pcm_analysis
@@ -418,6 +439,8 @@
AnalysisInfo analysis_info;
const CELTMode *celt_mode;
opus_int32 bitrates[256];
+ opus_val16 bandLogE[42];
+ opus_val16 bandLogE_mono[21];
ALLOC_STACK;
ptr = (char*)st + align(sizeof(OpusMSEncoder));
@@ -461,6 +484,36 @@
coupled_size = opus_encoder_get_size(2);
mono_size = opus_encoder_get_size(1);
+ if (st->surround && st->layout.nb_channels>2)
+ {
+ int i;
+ unsigned char dummy[512];
+ /* Temporary kludge -- remove */
+ OpusEncoder *downmix_enc;
+
+ ptr = (char*)st + align(sizeof(OpusMSEncoder));
+ for (s=0;s<st->layout.nb_streams;s++)
+ {
+ if (s < st->layout.nb_coupled_streams)
+ ptr += align(coupled_size);
+ else
+ ptr += align(mono_size);
+ }
+ downmix_enc = (OpusEncoder*)ptr;
+ surround_downmix(buf, pcm, st->layout.nb_channels, frame_size);
+ opus_encoder_ctl(downmix_enc, OPUS_SET_ENERGY_SAVE(bandLogE));
+ opus_encoder_ctl(downmix_enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+ opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+ opus_encoder_ctl(downmix_enc, OPUS_SET_FORCE_CHANNELS(2));
+ opus_encode_native(downmix_enc, buf, frame_size, dummy, 512, lsb_depth
+#ifndef FIXED_POINT
+ , &analysis_info
+#endif
+ );
+ for(i=0;i<21;i++)
+ bandLogE_mono[i] = MAX16(bandLogE[i], bandLogE[21+i]);
+ }
+
if (max_data_bytes < 4*st->layout.nb_streams-1)
{
RESTORE_STACK;
@@ -480,6 +533,13 @@
else
ptr += align(mono_size);
opus_encoder_ctl(enc, OPUS_SET_BITRATE(bitrates[s]));
+ if (st->surround)
+ {
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_MODE(MODE_CELT_ONLY));
+ opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND));
+ if (s < st->layout.nb_coupled_streams)
+ opus_encoder_ctl(enc, OPUS_SET_FORCE_CHANNELS(2));
+ }
}
ptr = (char*)st + align(sizeof(OpusMSEncoder));
@@ -503,11 +563,17 @@
(*copy_channel_in)(buf+1, 2,
pcm, st->layout.nb_channels, right, frame_size);
ptr += align(coupled_size);
+ /* FIXME: This isn't correct for the coupled center channels in
+ 6.1 surround configuration */
+ if (st->surround)
+ opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE));
} else {
int chan = get_mono_channel(&st->layout, s, -1);
(*copy_channel_in)(buf, 1,
pcm, st->layout.nb_channels, chan, frame_size);
ptr += align(mono_size);
+ if (st->surround)
+ opus_encoder_ctl(enc, OPUS_SET_ENERGY_MASK(bandLogE_mono));
}
/* number of bytes left (+Toc) */
curr_max = max_data_bytes - tot_size;
@@ -557,7 +623,86 @@
dst[i*dst_stride] = float_src[i*src_stride+src_channel];
#endif
}
+
+static void channel_pos(int channels, int pos[8])
+{
+ /* Position in the mix: 0 don't mix, 1: left, 2: center, 3:right */
+ if (channels==4)
+ {
+ pos[0]=1;
+ pos[1]=3;
+ pos[2]=1;
+ pos[3]=3;
+ } else if (channels==3||channels==5||channels==6)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=0;
+ } else if (channels==7)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=2;
+ pos[6]=0;
+ } else if (channels==8)
+ {
+ pos[0]=1;
+ pos[1]=2;
+ pos[2]=3;
+ pos[3]=1;
+ pos[4]=3;
+ pos[5]=1;
+ pos[6]=3;
+ pos[7]=0;
+ }
+}
+
+static void opus_surround_downmix_float(
+ opus_val16 *dst,
+ const void *src,
+ int channels,
+ int frame_size
+)
+{
+ const float *float_src;
+ opus_int32 i;
+ int pos[8] = {0};
+ int c;
+ float_src = (const float *)src;
+
+ channel_pos(channels, pos);
+ for (i=0;i<2*frame_size;i++)
+ dst[i]=0;
+
+ for (c=0;c<channels;c++)
+ {
+ if (pos[c]==1||pos[c]==2)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+ dst[2*i] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
+#else
+ dst[2*i] += float_src[i*channels+c];
#endif
+ }
+ if (pos[c]==2||pos[c]==3)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+ dst[2*i+1] += SHR16(FLOAT2INT16(float_src[i*channels+c]),3);
+#else
+ dst[2*i+1] += float_src[i*channels+c];
+#endif
+ }
+ }
+}
+#endif
static void opus_copy_channel_in_short(
opus_val16 *dst,
@@ -579,6 +724,47 @@
#endif
}
+static void opus_surround_downmix_short(
+ opus_val16 *dst,
+ const void *src,
+ int channels,
+ int frame_size
+)
+{
+ const opus_int16 *short_src;
+ opus_int32 i;
+ int pos[8] = {0};
+ int c;
+ short_src = (const opus_int16 *)src;
+
+ channel_pos(channels, pos);
+ for (i=0;i<2*frame_size;i++)
+ dst[i]=0;
+
+ for (c=0;c<channels;c++)
+ {
+ if (pos[c]==1||pos[c]==2)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+ dst[2*i] += SHR16(short_src[i*channels+c],3);
+#else
+ dst[2*i] += (1/32768.f)*short_src[i*channels+c];
+#endif
+ }
+ if (pos[c]==2||pos[c]==3)
+ {
+ for (i=0;i<frame_size;i++)
+#if defined(FIXED_POINT)
+ dst[2*i+1] += SHR16(short_src[i*channels+c],3);
+#else
+ dst[2*i+1] += (1/32768.f)*short_src[i*channels+c];
+#endif
+ }
+ }
+}
+
+
#ifdef FIXED_POINT
int opus_multistream_encode(
OpusMSEncoder *st,
@@ -589,7 +775,7 @@
)
{
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
- pcm, frame_size, data, max_data_bytes, 16);
+ pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_float);
}
#ifndef DISABLE_FLOAT_API
@@ -602,7 +788,7 @@
)
{
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
- pcm, frame_size, data, max_data_bytes, 16);
+ pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short);
}
#endif
@@ -619,7 +805,7 @@
{
int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_float,
- pcm, frame_size, data, max_data_bytes, 24, downmix_float, pcm+channels*st->analysis.analysis_offset);
+ pcm, frame_size, data, max_data_bytes, 24, opus_surround_downmix_float, downmix_float, pcm+channels*st->analysis.analysis_offset);
}
int opus_multistream_encode(
@@ -632,7 +818,7 @@
{
int channels = st->layout.nb_streams + st->layout.nb_coupled_streams;
return opus_multistream_encode_native(st, opus_copy_channel_in_short,
- pcm, frame_size, data, max_data_bytes, 16, downmix_int, pcm+channels*st->analysis.analysis_offset);
+ pcm, frame_size, data, max_data_bytes, 16, opus_surround_downmix_short, downmix_int, pcm+channels*st->analysis.analysis_offset);
}
#endif