ref: 006273c59f35ca10f1ab9b4cad36fdd7550b754b
parent: 9d05628407f040dfdf6c12f6400fdc4b63698322
author: Timothy B. Terriberry <[email protected]>
date: Tue May 21 14:16:13 EDT 2013
Use a table for PVQ encoding/decoding. 58.4% speedup (2.4x faster) on test_unit_cwrs32 (no custom modes). Gives a 3.2% speedup on ./opus_demo restricted-lowdelay 48000 2 96000 comp48-stereo.sw /dev/null on a 600 MHz Cortex A8.
--- a/celt/cwrs.c
+++ b/celt/cwrs.c
@@ -71,64 +71,6 @@
}
#endif
-#ifndef SMALL_FOOTPRINT
-
-#define MASK32 (0xFFFFFFFF)
-
-/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
-static const opus_uint32 INV_TABLE[53]={
- 0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
- 0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
- 0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
- 0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
- 0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
- 0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
- 0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
- 0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
- 0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
- 0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
- 0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
- 0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
- 0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
- 0xD8FD8FD9,
-};
-
-/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
- _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
- fits in 32 bits, but currently the table for multiplicative inverses is only
- valid for _d<=52.*/
-static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
- celt_assert(_d<=52);
- return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
-}
-
-/*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
- _d does not actually have to be even, but imusdiv32odd will be faster when
- it's odd, so you should use that instead.
- _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
- table for multiplicative inverses is only valid for _d<=54).
- _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
- 32 bits.*/
-static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
- opus_uint32 _c,int _d){
- opus_uint32 inv;
- int mask;
- int shift;
- int one;
- celt_assert(_d>0);
- celt_assert(_d<=54);
- shift=EC_ILOG(_d^(_d-1));
- inv=INV_TABLE[(_d-1)>>shift];
- shift--;
- one=1<<shift;
- mask=one-1;
- return (_a*(_b>>shift)-(_c>>shift)+
- ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32;
-}
-
-#endif /* SMALL_FOOTPRINT */
-
/*Although derived separately, the pulse vector coding scheme is equivalent to
a Pyramid Vector Quantizer \cite{Fis86}.
Some additional notes about an early version appear at
@@ -248,46 +190,301 @@
year=1986
}*/
-#ifndef SMALL_FOOTPRINT
-/*Compute U(2,_k).
- Note that this may be called with _k=32768 (maxK[2]+1).*/
-static inline unsigned ucwrs2(unsigned _k){
- celt_assert(_k>0);
- return _k+(_k-1);
-}
+#if !defined(SMALL_FOOTPRINT)
-/*Compute V(2,_k).*/
-static inline opus_uint32 ncwrs2(int _k){
- celt_assert(_k>0);
- return 4*(opus_uint32)_k;
+/*U(N,K) = U(K,N) := N>0?K>0?U(N-1,K)+U(N,K-1)+U(N-1,K-1):0:K>0?1:0*/
+# define CELT_PVQ_U(_n,_k) (CELT_PVQ_U_ROW[IMIN(_n,_k)][IMAX(_n,_k)])
+/*V(N,K) := U(N,K)+U(N,K+1) = the number of PVQ codewords for a band of size N
+ with K pulses allocated to it.*/
+# define CELT_PVQ_V(_n,_k) (CELT_PVQ_U(_n,_k)+CELT_PVQ_U(_n,(_k)+1))
+
+/*For each V(N,K) supported, we will access element U(min(N,K+1),max(N,K+1)).
+ Thus, the number of entries in row I is the larger of the maximum number of
+ pulses we will ever allocate for a given N=I (K=128, or however many fit in
+ 32 bits, whichever is smaller), plus one, and the maximum N for which
+ K=I-1 pulses fit in 32 bits.
+ The largest band size in an Opus Custom mode is 208.
+ Otherwise, we can limit things to the set of N which can be achieved by
+ splitting a band from a standard Opus mode: 176, 144, 96, 88, 72, 64, 48,
+ 44, 36, 32, 24, 22, 18, 16, 8, 4, 2).*/
+#if defined(CUSTOM_MODES)
+static const opus_uint32 CELT_PVQ_U_DATA[1488]={
+#else
+static const opus_uint32 CELT_PVQ_U_DATA[1272]={
+#endif
+ /*N=0, K=0...176:*/
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+#endif
+ /*N=1, K=1...176:*/
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1,
+#endif
+ /*N=2, K=2...176:*/
+ 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41,
+ 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79,
+ 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113,
+ 115, 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139, 141, 143,
+ 145, 147, 149, 151, 153, 155, 157, 159, 161, 163, 165, 167, 169, 171, 173,
+ 175, 177, 179, 181, 183, 185, 187, 189, 191, 193, 195, 197, 199, 201, 203,
+ 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233,
+ 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, 257, 259, 261, 263,
+ 265, 267, 269, 271, 273, 275, 277, 279, 281, 283, 285, 287, 289, 291, 293,
+ 295, 297, 299, 301, 303, 305, 307, 309, 311, 313, 315, 317, 319, 321, 323,
+ 325, 327, 329, 331, 333, 335, 337, 339, 341, 343, 345, 347, 349, 351,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 353, 355, 357, 359, 361, 363, 365, 367, 369, 371, 373, 375, 377, 379, 381,
+ 383, 385, 387, 389, 391, 393, 395, 397, 399, 401, 403, 405, 407, 409, 411,
+ 413, 415,
+#endif
+ /*N=3, K=3...176:*/
+ 13, 25, 41, 61, 85, 113, 145, 181, 221, 265, 313, 365, 421, 481, 545, 613,
+ 685, 761, 841, 925, 1013, 1105, 1201, 1301, 1405, 1513, 1625, 1741, 1861,
+ 1985, 2113, 2245, 2381, 2521, 2665, 2813, 2965, 3121, 3281, 3445, 3613, 3785,
+ 3961, 4141, 4325, 4513, 4705, 4901, 5101, 5305, 5513, 5725, 5941, 6161, 6385,
+ 6613, 6845, 7081, 7321, 7565, 7813, 8065, 8321, 8581, 8845, 9113, 9385, 9661,
+ 9941, 10225, 10513, 10805, 11101, 11401, 11705, 12013, 12325, 12641, 12961,
+ 13285, 13613, 13945, 14281, 14621, 14965, 15313, 15665, 16021, 16381, 16745,
+ 17113, 17485, 17861, 18241, 18625, 19013, 19405, 19801, 20201, 20605, 21013,
+ 21425, 21841, 22261, 22685, 23113, 23545, 23981, 24421, 24865, 25313, 25765,
+ 26221, 26681, 27145, 27613, 28085, 28561, 29041, 29525, 30013, 30505, 31001,
+ 31501, 32005, 32513, 33025, 33541, 34061, 34585, 35113, 35645, 36181, 36721,
+ 37265, 37813, 38365, 38921, 39481, 40045, 40613, 41185, 41761, 42341, 42925,
+ 43513, 44105, 44701, 45301, 45905, 46513, 47125, 47741, 48361, 48985, 49613,
+ 50245, 50881, 51521, 52165, 52813, 53465, 54121, 54781, 55445, 56113, 56785,
+ 57461, 58141, 58825, 59513, 60205, 60901, 61601,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 62305, 63013, 63725, 64441, 65161, 65885, 66613, 67345, 68081, 68821, 69565,
+ 70313, 71065, 71821, 72581, 73345, 74113, 74885, 75661, 76441, 77225, 78013,
+ 78805, 79601, 80401, 81205, 82013, 82825, 83641, 84461, 85285, 86113,
+#endif
+ /*N=4, K=4...176:*/
+ 63, 129, 231, 377, 575, 833, 1159, 1561, 2047, 2625, 3303, 4089, 4991, 6017,
+ 7175, 8473, 9919, 11521, 13287, 15225, 17343, 19649, 22151, 24857, 27775,
+ 30913, 34279, 37881, 41727, 45825, 50183, 54809, 59711, 64897, 70375, 76153,
+ 82239, 88641, 95367, 102425, 109823, 117569, 125671, 134137, 142975, 152193,
+ 161799, 171801, 182207, 193025, 204263, 215929, 228031, 240577, 253575,
+ 267033, 280959, 295361, 310247, 325625, 341503, 357889, 374791, 392217,
+ 410175, 428673, 447719, 467321, 487487, 508225, 529543, 551449, 573951,
+ 597057, 620775, 645113, 670079, 695681, 721927, 748825, 776383, 804609,
+ 833511, 863097, 893375, 924353, 956039, 988441, 1021567, 1055425, 1090023,
+ 1125369, 1161471, 1198337, 1235975, 1274393, 1313599, 1353601, 1394407,
+ 1436025, 1478463, 1521729, 1565831, 1610777, 1656575, 1703233, 1750759,
+ 1799161, 1848447, 1898625, 1949703, 2001689, 2054591, 2108417, 2163175,
+ 2218873, 2275519, 2333121, 2391687, 2451225, 2511743, 2573249, 2635751,
+ 2699257, 2763775, 2829313, 2895879, 2963481, 3032127, 3101825, 3172583,
+ 3244409, 3317311, 3391297, 3466375, 3542553, 3619839, 3698241, 3777767,
+ 3858425, 3940223, 4023169, 4107271, 4192537, 4278975, 4366593, 4455399,
+ 4545401, 4636607, 4729025, 4822663, 4917529, 5013631, 5110977, 5209575,
+ 5309433, 5410559, 5512961, 5616647, 5721625, 5827903, 5935489, 6044391,
+ 6154617, 6266175, 6379073, 6493319, 6608921, 6725887, 6844225, 6963943,
+ 7085049, 7207551,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 7331457, 7456775, 7583513, 7711679, 7841281, 7972327, 8104825, 8238783,
+ 8374209, 8511111, 8649497, 8789375, 8930753, 9073639, 9218041, 9363967,
+ 9511425, 9660423, 9810969, 9963071, 10116737, 10271975, 10428793, 10587199,
+ 10747201, 10908807, 11072025, 11236863, 11403329, 11571431, 11741177,
+ 11912575,
+#endif
+ /*N=5, K=5...176:*/
+ 321, 681, 1289, 2241, 3649, 5641, 8361, 11969, 16641, 22569, 29961, 39041,
+ 50049, 63241, 78889, 97281, 118721, 143529, 172041, 204609, 241601, 283401,
+ 330409, 383041, 441729, 506921, 579081, 658689, 746241, 842249, 947241,
+ 1061761, 1186369, 1321641, 1468169, 1626561, 1797441, 1981449, 2179241,
+ 2391489, 2618881, 2862121, 3121929, 3399041, 3694209, 4008201, 4341801,
+ 4695809, 5071041, 5468329, 5888521, 6332481, 6801089, 7295241, 7815849,
+ 8363841, 8940161, 9545769, 10181641, 10848769, 11548161, 12280841, 13047849,
+ 13850241, 14689089, 15565481, 16480521, 17435329, 18431041, 19468809,
+ 20549801, 21675201, 22846209, 24064041, 25329929, 26645121, 28010881,
+ 29428489, 30899241, 32424449, 34005441, 35643561, 37340169, 39096641,
+ 40914369, 42794761, 44739241, 46749249, 48826241, 50971689, 53187081,
+ 55473921, 57833729, 60268041, 62778409, 65366401, 68033601, 70781609,
+ 73612041, 76526529, 79526721, 82614281, 85790889, 89058241, 92418049,
+ 95872041, 99421961, 103069569, 106816641, 110664969, 114616361, 118672641,
+ 122835649, 127107241, 131489289, 135983681, 140592321, 145317129, 150160041,
+ 155123009, 160208001, 165417001, 170752009, 176215041, 181808129, 187533321,
+ 193392681, 199388289, 205522241, 211796649, 218213641, 224775361, 231483969,
+ 238341641, 245350569, 252512961, 259831041, 267307049, 274943241, 282741889,
+ 290705281, 298835721, 307135529, 315607041, 324252609, 333074601, 342075401,
+ 351257409, 360623041, 370174729, 379914921, 389846081, 399970689, 410291241,
+ 420810249, 431530241, 442453761, 453583369, 464921641, 476471169, 488234561,
+ 500214441, 512413449, 524834241, 537479489, 550351881, 563454121, 576788929,
+ 590359041, 604167209, 618216201, 632508801,
+#if defined(CUSTOM_MODES)
+ /*...208:*/
+ 647047809, 661836041, 676876329, 692171521, 707724481, 723538089, 739615241,
+ 755958849, 772571841, 789457161, 806617769, 824056641, 841776769, 859781161,
+ 878072841, 896654849, 915530241, 934702089, 954173481, 973947521, 994027329,
+ 1014416041, 1035116809, 1056132801, 1077467201, 1099123209, 1121104041,
+ 1143412929, 1166053121, 1189027881, 1212340489, 1235994241,
+#endif
+ /*N=6, K=6...96:*/
+ 1683, 3653, 7183, 13073, 22363, 36365, 56695, 85305, 124515, 177045, 246047,
+ 335137, 448427, 590557, 766727, 982729, 1244979, 1560549, 1937199, 2383409,
+ 2908411, 3522221, 4235671, 5060441, 6009091, 7095093, 8332863, 9737793,
+ 11326283, 13115773, 15124775, 17372905, 19880915, 22670725, 25765455,
+ 29189457, 32968347, 37129037, 41699767, 46710137, 52191139, 58175189,
+ 64696159, 71789409, 79491819, 87841821, 96879431, 106646281, 117185651,
+ 128542501, 140763503, 153897073, 167993403, 183104493, 199284183, 216588185,
+ 235074115, 254801525, 275831935, 298228865, 322057867, 347386557, 374284647,
+ 402823977, 433078547, 465124549, 499040399, 534906769, 572806619, 612825229,
+ 655050231, 699571641, 746481891, 795875861, 847850911, 902506913, 959946283,
+ 1020274013, 1083597703, 1150027593, 1219676595, 1292660325, 1369097135,
+ 1449108145, 1532817275, 1620351277, 1711839767, 1807415257, 1907213187,
+ 2011371957, 2120032959,
+#if defined(CUSTOM_MODES)
+ /*...109:*/
+ 2233340609U, 2351442379U, 2474488829U, 2602633639U, 2736033641U, 2874848851U,
+ 3019242501U, 3169381071U, 3325434321U, 3487575323U, 3655980493U, 3830829623U,
+ 4012305913U,
+#endif
+ /*N=7, K=7...54*/
+ 8989, 19825, 40081, 75517, 134245, 227305, 369305, 579125, 880685, 1303777,
+ 1884961, 2668525, 3707509, 5064793, 6814249, 9041957, 11847485, 15345233,
+ 19665841, 24957661, 31388293, 39146185, 48442297, 59511829, 72616013,
+ 88043969, 106114625, 127178701, 151620757, 179861305, 212358985, 249612805,
+ 292164445, 340600625, 395555537, 457713341, 527810725, 606639529, 695049433,
+ 793950709, 904317037, 1027188385, 1163673953, 1314955181, 1482288821,
+ 1667010073, 1870535785, 2094367717,
+#if defined(CUSTOM_MODES)
+ /*...60:*/
+ 2340095869U, 2609401873U, 2904062449U, 3225952925U, 3577050821U, 3959439497U,
+#endif
+ /*N=8, K=8...37*/
+ 48639, 108545, 224143, 433905, 795455, 1392065, 2340495, 3800305, 5984767,
+ 9173505, 13726991, 20103025, 28875327, 40754369, 56610575, 77500017,
+ 104692735, 139703809, 184327311, 240673265, 311207743, 398796225, 506750351,
+ 638878193, 799538175, 993696769, 1226990095, 1505789553, 1837271615,
+ 2229491905U,
+#if defined(CUSTOM_MODES)
+ /*...40:*/
+ 2691463695U, 3233240945U, 3866006015U,
+#endif
+ /*N=9, K=9...28:*/
+ 265729, 598417, 1256465, 2485825, 4673345, 8405905, 14546705, 24331777,
+ 39490049, 62390545, 96220561, 145198913, 214828609, 312193553, 446304145,
+ 628496897, 872893441, 1196924561, 1621925137, 2173806145U,
+#if defined(CUSTOM_MODES)
+ /*...29:*/
+ 2883810113U,
+#endif
+ /*N=10, K=10...24:*/
+ 1462563, 3317445, 7059735, 14218905, 27298155, 50250765, 89129247, 152951073,
+ 254831667, 413442773, 654862247, 1014889769, 1541911931, 2300409629U,
+ 3375210671U,
+ /*N=11, K=11...19:*/
+ 8097453, 18474633, 39753273, 81270333, 158819253, 298199265, 540279585,
+ 948062325, 1616336765,
+#if defined(CUSTOM_MODES)
+ /*...20:*/
+ 2684641785U,
+#endif
+ /*N=12, K=12...18:*/
+ 45046719, 103274625, 224298231, 464387817, 921406335, 1759885185,
+ 3248227095U,
+ /*N=13, K=13...16:*/
+ 251595969, 579168825, 1267854873, 2653649025U,
+ /*N=14, K=14:*/
+ 1409933619
+};
+
+#if defined(CUSTOM_MODES)
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 208,CELT_PVQ_U_DATA+ 415,
+ CELT_PVQ_U_DATA+ 621,CELT_PVQ_U_DATA+ 826,CELT_PVQ_U_DATA+1030,
+ CELT_PVQ_U_DATA+1233,CELT_PVQ_U_DATA+1336,CELT_PVQ_U_DATA+1389,
+ CELT_PVQ_U_DATA+1421,CELT_PVQ_U_DATA+1441,CELT_PVQ_U_DATA+1455,
+ CELT_PVQ_U_DATA+1464,CELT_PVQ_U_DATA+1470,CELT_PVQ_U_DATA+1473
+};
+#else
+const opus_uint32 *const CELT_PVQ_U_ROW[15]={
+ CELT_PVQ_U_DATA+ 0,CELT_PVQ_U_DATA+ 176,CELT_PVQ_U_DATA+ 351,
+ CELT_PVQ_U_DATA+ 525,CELT_PVQ_U_DATA+ 698,CELT_PVQ_U_DATA+ 870,
+ CELT_PVQ_U_DATA+1041,CELT_PVQ_U_DATA+1131,CELT_PVQ_U_DATA+1178,
+ CELT_PVQ_U_DATA+1207,CELT_PVQ_U_DATA+1226,CELT_PVQ_U_DATA+1240,
+ CELT_PVQ_U_DATA+1248,CELT_PVQ_U_DATA+1254,CELT_PVQ_U_DATA+1257
+};
+#endif
+
+#if defined(CUSTOM_MODES)
+void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
+ int k;
+ /*_maxk==0 => there's nothing to do.*/
+ celt_assert(_maxk>0);
+ _bits[0]=0;
+ for(k=1;k<=_maxk;k++)_bits[k]=log2_frac(CELT_PVQ_V(_n,k),_frac);
}
+#endif
-/*Compute U(3,_k).
- Note that this may be called with _k=32768 (maxK[3]+1).*/
-static inline opus_uint32 ucwrs3(unsigned _k){
- celt_assert(_k>0);
- return (2*(opus_uint32)_k-2)*_k+1;
+static opus_uint32 icwrs(int _n,const int *_y){
+ opus_uint32 i;
+ int j;
+ int k;
+ celt_assert(_n>=2);
+ j=_n-1;
+ i=_y[j]<0;
+ k=abs(_y[j]);
+ do{
+ j--;
+ i+=CELT_PVQ_U(_n-j,k);
+ k+=abs(_y[j]);
+ if(_y[j]<0)i+=CELT_PVQ_U(_n-j,k+1);
+ }
+ while(j>0);
+ return i;
}
-/*Compute V(3,_k).*/
-static inline opus_uint32 ncwrs3(int _k){
+void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
celt_assert(_k>0);
- return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
+ ec_enc_uint(_enc,icwrs(_n,_y),CELT_PVQ_V(_n,_k));
}
-/*Compute U(4,_k).*/
-static inline opus_uint32 ucwrs4(int _k){
+static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y){
celt_assert(_k>0);
- return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
+ celt_assert(_n>0);
+ do{
+ opus_uint32 p;
+ int s;
+ int k0;
+ /*Are the pulses in this dimension negative?*/
+ p=CELT_PVQ_U(_n,_k+1);
+ s=-(_i>=p);
+ _i-=p&s;
+ /*Count how many pulses were placed in this dimension.*/
+ k0=_k;
+ for(p=CELT_PVQ_U(_n,_k);p>_i;p=CELT_PVQ_U(_n,_k))_k--;
+ _i-=p;
+ *_y++=(k0-_k+s)^s;
+ }
+ while(--_n>0);
}
-/*Compute V(4,_k).*/
-static inline opus_uint32 ncwrs4(int _k){
- celt_assert(_k>0);
- return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+ cwrsi(_n,_k,ec_dec_uint(_dec,CELT_PVQ_V(_n,_k)),_y);
}
-#endif /* SMALL_FOOTPRINT */
+#else /* SMALL_FOOTPRINT */
/*Computes the next row/column of any recurrence that obeys the relation
u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
@@ -332,125 +529,18 @@
celt_assert(len>=3);
_u[0]=0;
_u[1]=um2=1;
-#ifndef SMALL_FOOTPRINT
- /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE,
- but _k isn't tested here because k<=52 for n=7*/
- if(_n<=6)
-#endif
- {
- /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
- /*If _n==1, _u[i] should be 1 for i>1.*/
- celt_assert(_n>=2);
- /*If _k==0, the following do-while loop will overflow the buffer.*/
- celt_assert(_k>0);
- k=2;
- do _u[k]=(k<<1)-1;
- while(++k<len);
- for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
- }
-#ifndef SMALL_FOOTPRINT
- else{
- opus_uint32 um1;
- opus_uint32 n2m1;
- _u[2]=n2m1=um1=(_n<<1)-1;
- for(k=3;k<len;k++){
- /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
- _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
- if(++k>=len)break;
- _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1;
- }
- }
-#endif /* SMALL_FOOTPRINT */
+ /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
+ /*If _n==1, _u[i] should be 1 for i>1.*/
+ celt_assert(_n>=2);
+ /*If _k==0, the following do-while loop will overflow the buffer.*/
+ celt_assert(_k>0);
+ k=2;
+ do _u[k]=(k<<1)-1;
+ while(++k<len);
+ for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
return _u[_k]+_u[_k+1];
}
-#ifndef SMALL_FOOTPRINT
-
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
- set of size 1 with associated sign bits.
- _y: Returns the vector of pulses.*/
-static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
- int s;
- s=-(int)_i;
- _y[0]=(_k+s)^s;
-}
-
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
- set of size 2 with associated sign bits.
- _y: Returns the vector of pulses.*/
-static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
- opus_uint32 p;
- int s;
- int yj;
- p=ucwrs2(_k+1U);
- s=-(_i>=p);
- _i-=p&s;
- yj=_k;
- _k=(_i+1)>>1;
- p=_k?ucwrs2(_k):0;
- _i-=p;
- yj-=_k;
- _y[0]=(yj+s)^s;
- cwrsi1(_k,_i,_y+1);
-}
-
-/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
- set of size 3 with associated sign bits.
- _y: Returns the vector of pulses.*/
-static void cwrsi3(int _k,opus_uint32 _i,int *_y){
- opus_uint32 p;
- int s;
- int yj;
- p=ucwrs3(_k+1U);
- s=-(_i>=p);
- _i-=p&s;
- yj=_k;
- /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
- _i<2147418113=U(3,32768)).*/
- _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0;
- p=_k?ucwrs3(_k):0;
- _i-=p;
- yj-=_k;
- _y[0]=(yj+s)^s;
- cwrsi2(_k,_i,_y+1);
-}
-
-/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
- of size 4 with associated sign bits.
- _y: Returns the vector of pulses.*/
-static void cwrsi4(int _k,opus_uint32 _i,int *_y){
- opus_uint32 p;
- int s;
- int yj;
- int kl;
- int kr;
- p=ucwrs4(_k+1);
- s=-(_i>=p);
- _i-=p&s;
- yj=_k;
- /*We could solve a cubic for k here, but the form of the direct solution does
- not lend itself well to exact integer arithmetic.
- Instead we do a binary search on U(4,K).*/
- kl=0;
- kr=_k;
- for(;;){
- _k=(kl+kr)>>1;
- p=_k?ucwrs4(_k):0;
- if(p<_i){
- if(_k>=kr)break;
- kl=_k+1;
- }
- else if(p>_i)kr=_k-1;
- else break;
- }
- _i-=p;
- yj-=_k;
- _y[0]=(yj+s)^s;
- cwrsi3(_k,_i,_y+1);
-}
-
-#endif /* SMALL_FOOTPRINT */
-
/*Returns the _i'th combination of _k elements chosen from a set of size _n
with associated sign bits.
_y: Returns the vector of pulses.
@@ -487,56 +577,7 @@
return _y[0]<0;
}
-#ifndef SMALL_FOOTPRINT
-
/*Returns the index of the given combination of K elements chosen from a set
- of size 2 with associated sign bits.
- _y: The vector of pulses, whose sum of absolute values is K.
- _k: Returns K.*/
-static inline opus_uint32 icwrs2(const int *_y,int *_k){
- opus_uint32 i;
- int k;
- i=icwrs1(_y+1,&k);
- i+=k?ucwrs2(k):0;
- k+=abs(_y[0]);
- if(_y[0]<0)i+=ucwrs2(k+1U);
- *_k=k;
- return i;
-}
-
-/*Returns the index of the given combination of K elements chosen from a set
- of size 3 with associated sign bits.
- _y: The vector of pulses, whose sum of absolute values is K.
- _k: Returns K.*/
-static inline opus_uint32 icwrs3(const int *_y,int *_k){
- opus_uint32 i;
- int k;
- i=icwrs2(_y+1,&k);
- i+=k?ucwrs3(k):0;
- k+=abs(_y[0]);
- if(_y[0]<0)i+=ucwrs3(k+1U);
- *_k=k;
- return i;
-}
-
-/*Returns the index of the given combination of K elements chosen from a set
- of size 4 with associated sign bits.
- _y: The vector of pulses, whose sum of absolute values is K.
- _k: Returns K.*/
-static inline opus_uint32 icwrs4(const int *_y,int *_k){
- opus_uint32 i;
- int k;
- i=icwrs3(_y+1,&k);
- i+=k?ucwrs4(k):0;
- k+=abs(_y[0]);
- if(_y[0]<0)i+=ucwrs4(k+1);
- *_k=k;
- return i;
-}
-
-#endif /* SMALL_FOOTPRINT */
-
-/*Returns the index of the given combination of K elements chosen from a set
of size _n with associated sign bits.
_y: The vector of pulses, whose sum of absolute values must be _k.
_nc: Returns V(_n,_k).*/
@@ -543,8 +584,8 @@
static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
opus_uint32 *_u){
opus_uint32 i;
- int j;
- int k;
+ int j;
+ int k;
/*We can't unroll the first two iterations of the loop unless _n>=2.*/
celt_assert(_n>=2);
_u[0]=0;
@@ -589,57 +630,23 @@
void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
opus_uint32 i;
+ VARDECL(opus_uint32,u);
+ opus_uint32 nc;
+ SAVE_STACK;
celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
- switch(_n){
- case 2:{
- i=icwrs2(_y,&_k);
- ec_enc_uint(_enc,i,ncwrs2(_k));
- }break;
- case 3:{
- i=icwrs3(_y,&_k);
- ec_enc_uint(_enc,i,ncwrs3(_k));
- }break;
- case 4:{
- i=icwrs4(_y,&_k);
- ec_enc_uint(_enc,i,ncwrs4(_k));
- }break;
- default:
- {
-#endif
- VARDECL(opus_uint32,u);
- opus_uint32 nc;
- SAVE_STACK;
- ALLOC(u,_k+2U,opus_uint32);
- i=icwrs(_n,_k,&nc,_y,u);
- ec_enc_uint(_enc,i,nc);
- RESTORE_STACK;
-#ifndef SMALL_FOOTPRINT
- }
- break;
- }
-#endif
+ ALLOC(u,_k+2U,opus_uint32);
+ i=icwrs(_n,_k,&nc,_y,u);
+ ec_enc_uint(_enc,i,nc);
+ RESTORE_STACK;
}
-void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
-{
+void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec){
+ VARDECL(opus_uint32,u);
+ SAVE_STACK;
celt_assert(_k>0);
-#ifndef SMALL_FOOTPRINT
- switch(_n){
- case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
- case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
- case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
- default:
- {
-#endif
- VARDECL(opus_uint32,u);
- SAVE_STACK;
- ALLOC(u,_k+2U,opus_uint32);
- cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
- RESTORE_STACK;
-#ifndef SMALL_FOOTPRINT
- }
- break;
- }
-#endif
+ ALLOC(u,_k+2U,opus_uint32);
+ cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
+ RESTORE_STACK;
}
+
+#endif /* SMALL_FOOTPRINT */
--- a/celt/modes.c
+++ b/celt/modes.c
@@ -345,6 +345,14 @@
mode->eBands = compute_ebands(Fs, mode->shortMdctSize, res, &mode->nbEBands);
if (mode->eBands==NULL)
goto failure;
+#if !defined(SMALL_FOOTPRINT)
+ /* Make sure we don't allocate a band larger than our PVQ table.
+ 208 should be enough, but let's be paranoid. */
+ if ((mode->eBands[mode->nbEBands] - mode->eBands[mode->nbEBands-1])<<LM >
+ 208) {
+ goto failure;
+ }
+#endif
mode->effEBands = mode->nbEBands;
while (mode->eBands[mode->effEBands] > mode->shortMdctSize)
--- a/celt/tests/test_unit_cwrs32.c
+++ b/celt/tests/test_unit_cwrs32.c
@@ -53,14 +53,13 @@
#ifdef TEST_CUSTOM_MODES
-#define NDIMS (46)
+#define NDIMS (44)
static const int pn[NDIMS]={
2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 18, 20, 22,
24, 26, 28, 30, 32, 36, 40, 44, 48,
52, 56, 60, 64, 72, 80, 88, 96, 104,
- 112, 120, 128, 144, 160, 176, 192, 208, 224,
- 240
+ 112, 120, 128, 144, 160, 176, 192, 208
};
static const int pkmax[NDIMS]={
128, 128, 128, 128, 88, 52, 36, 26, 22,
@@ -67,8 +66,7 @@
18, 16, 15, 13, 12, 12, 11, 10, 9,
9, 8, 8, 7, 7, 7, 7, 6, 6,
6, 6, 6, 5, 5, 5, 5, 5, 5,
- 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4
+ 4, 4, 4, 4, 4, 4, 4, 4
};
#else /* TEST_CUSTOM_MODES */
@@ -97,7 +95,9 @@
for(pseudo=1;pseudo<41;pseudo++)
{
int k;
+#if defined(SMALL_FOOTPRINT)
opus_uint32 uu[KMAX+2U];
+#endif
opus_uint32 inc;
opus_uint32 nc;
opus_uint32 i;
@@ -104,20 +104,28 @@
k=get_pulses(pseudo);
if (k>pkmax[t])break;
printf("Testing CWRS with N=%i, K=%i...\n",n,k);
+#if defined(SMALL_FOOTPRINT)
nc=ncwrs_urow(n,k,uu);
+#else
+ nc=CELT_PVQ_V(n,k);
+#endif
inc=nc/20000;
if(inc<1)inc=1;
for(i=0;i<nc;i+=inc){
+#if defined(SMALL_FOOTPRINT)
opus_uint32 u[KMAX+2U];
- int y[NMAX];
- int sy;
- int yy[5];
+#endif
+ int y[NMAX];
+ int sy;
opus_uint32 v;
opus_uint32 ii;
- int kk;
- int j;
+ int j;
+#if defined(SMALL_FOOTPRINT)
memcpy(u,uu,(k+2U)*sizeof(*u));
cwrsi(n,k,i,y,u);
+#else
+ cwrsi(n,k,i,y);
+#endif
sy=0;
for(j=0;j<n;j++)sy+=ABS(y[j]);
if(sy!=k){
@@ -128,7 +136,12 @@
/*printf("%6u of %u:",i,nc);
for(j=0;j<n;j++)printf(" %+3i",y[j]);
printf(" ->");*/
+#if defined(SMALL_FOOTPRINT)
ii=icwrs(n,k,&v,y,u);
+#else
+ ii=icwrs(n,y);
+ v=CELT_PVQ_V(n,k);
+#endif
if(ii!=i){
fprintf(stderr,"Combination-index mismatch (%lu!=%lu).\n",
(long)ii,(long)i);
@@ -139,81 +152,6 @@
(long)v,(long)nc);
return 2;
}
-#ifndef SMALL_FOOTPRINT
- if(n==2){
- cwrsi2(k,i,yy);
- for(j=0;j<2;j++)if(yy[j]!=y[j]){
- fprintf(stderr,"N=2 pulse vector mismatch ({%i,%i}!={%i,%i}).\n",
- yy[0],yy[1],y[0],y[1]);
- return 3;
- }
- ii=icwrs2(yy,&kk);
- if(ii!=i){
- fprintf(stderr,"N=2 combination-index mismatch (%lu!=%lu).\n",
- (long)ii,(long)i);
- return 4;
- }
- if(kk!=k){
- fprintf(stderr,"N=2 pulse count mismatch (%i,%i).\n",kk,k);
- return 5;
- }
- v=ncwrs2(k);
- if(v!=nc){
- fprintf(stderr,"N=2 combination count mismatch (%lu,%lu).\n",
- (long)v,(long)nc);
- return 6;
- }
- }
- else if(n==3){
- cwrsi3(k,i,yy);
- for(j=0;j<3;j++)if(yy[j]!=y[j]){
- fprintf(stderr,"N=3 pulse vector mismatch "
- "({%i,%i,%i}!={%i,%i,%i}).\n",yy[0],yy[1],yy[2],y[0],y[1],y[2]);
- return 7;
- }
- ii=icwrs3(yy,&kk);
- if(ii!=i){
- fprintf(stderr,"N=3 combination-index mismatch (%lu!=%lu).\n",
- (long)ii,(long)i);
- return 8;
- }
- if(kk!=k){
- fprintf(stderr,"N=3 pulse count mismatch (%i!=%i).\n",kk,k);
- return 9;
- }
- v=ncwrs3(k);
- if(v!=nc){
- fprintf(stderr,"N=3 combination count mismatch (%lu!=%lu).\n",
- (long)v,(long)nc);
- return 10;
- }
- }
- else if(n==4){
- cwrsi4(k,i,yy);
- for(j=0;j<4;j++)if(yy[j]!=y[j]){
- fprintf(stderr,"N=4 pulse vector mismatch "
- "({%i,%i,%i,%i}!={%i,%i,%i,%i}.\n",
- yy[0],yy[1],yy[2],yy[3],y[0],y[1],y[2],y[3]);
- return 11;
- }
- ii=icwrs4(yy,&kk);
- if(ii!=i){
- fprintf(stderr,"N=4 combination-index mismatch (%lu!=%lu).\n",
- (long)ii,(long)i);
- return 12;
- }
- if(kk!=k){
- fprintf(stderr,"N=4 pulse count mismatch (%i!=%i).\n",kk,k);
- return 13;
- }
- v=ncwrs4(k);
- if(v!=nc){
- fprintf(stderr,"N=4 combination count mismatch (%lu!=%lu).\n",
- (long)v,(long)nc);
- return 14;
- }
- }
-#endif /* SMALL_FOOTPRINT */
/*printf(" %6u\n",i);*/
}
/*printf("\n");*/