ref: c5f2a9d988fe4fddae0b499d1a12d12cee6c849f
parent: d77f61ac22fa57d2f24039a959ea722b9d725629
author: Jean-Marc Valin <[email protected]>
date: Sun Oct 26 18:00:26 EDT 2008
Latest psychoacoustics work -- still highly experimental
--- a/libcelt/celt.c
+++ b/libcelt/celt.c
@@ -390,6 +390,7 @@
VARDECL(int, offsets);
#ifdef EXP_PSY
VARDECL(celt_word32_t, mask);
+ VARDECL(celt_word32_t, tonality);
#endif
int shortBlocks=0;
int transient_time;
@@ -468,18 +469,22 @@
transient_shift = 0;
shortBlocks = 0;
}
+
/* Pitch analysis: we do it early to save on the peak stack space */
- if (st->pitch_enabled && !shortBlocks)
- {
#ifdef EXP_PSY
+ ALLOC(tonality, MAX_PERIOD/4, celt_word16_t);
+ {
VARDECL(celt_word16_t, X);
ALLOC(X, MAX_PERIOD/2, celt_word16_t);
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, X, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
- compute_tonality(st->mode, X, st->psy_mem, MAX_PERIOD);
+ compute_tonality(st->mode, X, st->psy_mem, MAX_PERIOD, tonality, MAX_PERIOD/4);
+ }
#else
+ if (st->pitch_enabled && !shortBlocks)
+ {
find_spectral_pitch(st->mode, st->mode->fft, &st->mode->psy, in, st->out_mem, st->mode->window, NULL, 2*N-2*N4, MAX_PERIOD-(2*N-2*N4), &pitch_index);
-#endif
}
+#endif
ALLOC(freq, C*N, celt_sig_t); /**< Interleaved signal MDCTs */
/*for (i=0;i<(B+1)*C*N;i++) printf ("%f(%d) ", in[i], i); printf ("\n");*/
@@ -495,13 +500,16 @@
st->psy_mem[MAX_PERIOD+st->overlap-N+i] += in[C*(st->overlap+i)+c];
*/
ALLOC(mask, N, celt_sig_t);
- compute_mdct_masking(&st->psy, freq, st->psy_mem, mask, C*N);
+ compute_mdct_masking(&st->psy, freq, tonality, st->psy_mem, mask, C*N);
+ /*for (i=0;i<256;i++)
+ printf ("%f %f %f ", freq[i], tonality[i], mask[i]);
+ printf ("\n");*/
/* Invert and stretch the mask to length of X
For some reason, I get better results by using the sqrt instead,
although there's no valid reason to. Must investigate further */
- for (i=0;i<C*N;i++)
- mask[i] = 1/(.1+mask[i]);
+ /*for (i=0;i<C*N;i++)
+ mask[i] = 1/(.1+mask[i]);*/
#endif
/* Deferred allocation after find_spectral_pitch() to reduce the peak memory usage */
@@ -520,6 +528,16 @@
/* Band normalisation */
compute_band_energies(st->mode, freq, bandE);
+#ifdef EXP_PSY
+ VARDECL(celt_word32_t, bandM);
+ ALLOC(bandM,st->mode->nbEBands, celt_ener_t);
+ for (i=0;i<N;i++)
+ mask[i] = sqrt(mask[i]);
+ compute_band_energies(st->mode, mask, bandM);
+ /*for (i=0;i<st->mode->nbEBands;i++)
+ printf ("%f %f ", bandE[i], bandM[i]);
+ printf ("\n");*/
+#endif
normalise_bands(st->mode, freq, X, bandE);
/*for (i=0;i<st->mode->nbEBands;i++)printf("%f ", bandE[i]);printf("\n");*/
/*for (i=0;i<N*B*C;i++)printf("%f ", X[i]);printf("\n");*/
--- a/libcelt/psy.c
+++ b/libcelt/psy.c
@@ -145,7 +145,7 @@
}
#ifdef EXP_PSY /* Not needed for now, but will be useful in the future */
-void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_word16_t *long_window, celt_mask_t *mask, int len)
+void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_word16_t *tonality, celt_word16_t *long_window, celt_mask_t *mask, int len)
{
int i;
VARDECL(float, psd);
@@ -152,7 +152,7 @@
SAVE_STACK;
ALLOC(psd, len, float);
for (i=0;i<len;i++)
- psd[i] = X[i]*X[i];
+ psd[i] = X[i]*X[i]*tonality[i];
for (i=1;i<len-1;i++)
mask[i] = .5*psd[i] + .25*(psd[i-1]+psd[i+1]);
/*psd[0] = .5*mask[0]+.25*(mask[1]+mask[2]);*/
@@ -164,7 +164,7 @@
RESTORE_STACK;
}
-void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len)
+void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len, celt_word16_t *tonality, int mdct_size)
{
int i;
celt_word16_t norm_1;
@@ -174,6 +174,7 @@
mem2 = mem+2*N;
X[0] = 0;
X[1] = 0;
+ tonality[0] = 1;
for (i=1;i<N;i++)
{
celt_word16_t re, im, re2, im2;
@@ -180,7 +181,7 @@
re = X[2*i];
im = X[2*i+1];
/* Normalise spectrum */
- norm_1 = celt_rsqrt(MAC16_16(MULT16_16(re,re), im,im));
+ norm_1 = celt_rsqrt(.01+MAC16_16(MULT16_16(re,re), im,im));
re = MULT16_16(re, norm_1);
im = MULT16_16(im, norm_1);
/* Phase derivative */
@@ -198,5 +199,13 @@
X[2*i+1] = im;
}
/*printf ("\n");*/
+ for (i=0;i<mdct_size;i++)
+ {
+ tonality[i] = 1.0-X[2*i]*X[2*i]*X[2*i];
+ if (tonality[i]>1)
+ tonality[i] = 1;
+ if (tonality[i]<.02)
+ tonality[i]=.02;
+ }
}
#endif
--- a/libcelt/psy.h
+++ b/libcelt/psy.h
@@ -49,8 +49,8 @@
void compute_masking(const struct PsyDecay *decay, celt_word16_t *X, celt_mask_t *mask, int len);
/** Compute the masking curve for an input (MDCT) spectrum X */
-void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_word16_t *long_window, celt_mask_t *mask, int len);
+void compute_mdct_masking(const struct PsyDecay *decay, celt_word32_t *X, celt_word16_t *tonality, celt_word16_t *long_window, celt_mask_t *mask, int len);
-void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len);
+void compute_tonality(const CELTMode *m, celt_word16_t * restrict X, celt_word16_t * mem, int len, celt_word16_t *tonality, int mdct_size);
#endif /* PSY_H */