ref: e9916c7592efd5dde18895fd35b28016ea3d0127
parent: 473f2e3d422ae80756cd32ee5a43e3671c07b28e
author: Haibo <[email protected]>
date: Thu Dec 18 04:18:11 EST 2014
Add Transform 8x8 support Add Intra 8x8 support Add no_deblocking support inside T8x8 Add CABAC parse support Add static data sheet for dequant Fix bugs and clean/astyle the code Remove build warnings Modify the UT cases Fix the ParseNalHeader bug
--- a/codec/common/inc/wels_common_defs.h
+++ b/codec/common/inc/wels_common_defs.h
@@ -65,9 +65,12 @@
extern const uint8_t g_kuiCache30ScanIdx[16];
extern const uint8_t g_kuiCache48CountScan4Idx[24];
+extern const uint8_t g_kuiMatrixV[6][8][8];
+
extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
-extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
extern const uint8_t g_kuiChromaQpTable[52];
extern const uint8_t g_kuiCabacRangeLps[64][4];
@@ -279,6 +282,8 @@
#define MB_TYPE_INTRA (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
#define MB_TYPE_INTER (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP)
#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
+#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
+#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
@@ -304,6 +309,7 @@
#define I16_PRED_DC_128 6
#define I16_PRED_DC_A 7
//////////intra4x4 Luma
+// Here, I8x8 also use these definitions
#define I4_PRED_INVALID 0
#define I4_PRED_V 0
#define I4_PRED_H 1
--- a/codec/common/src/common_tables.cpp
+++ b/codec/common/src/common_tables.cpp
@@ -37,12 +37,12 @@
//////pNonZeroCount[16+8] mapping scan index
const uint8_t g_kuiMbCountScan4Idx[24] = {
- // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
+ // 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
0, 1, 4, 5, // 2 3 | 6 7 0 | 1 0 1 2 3
2, 3, 6, 7, //--------------- --------- 4 5 6 7
8, 9, 12, 13, // 8 9 | 12 13 2 | 3 8 9 10 11
- 10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15
- 16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19
+ 10, 11, 14, 15, // 10 11 | 14 15-----------------------------> 12 13 14 15
+ 16, 17, 20, 21, //---------------- chroma 8*8 block 16 17 18 19
18, 19, 22, 23 // 16 17 | 20 21 0 1 20 21 22 23
};
@@ -61,6 +61,68 @@
46, 47, // 6+5*8, 7+5*8,
};
+const uint8_t g_kuiMatrixV[6][8][8] = { // generated from equation 8-317, 8-318
+ {
+ {20, 19, 25, 19, 20, 19, 25, 19},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {25, 24, 32, 24, 25, 24, 32, 24},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {20, 19, 25, 19, 20, 19, 25, 19},
+ {19, 18, 24, 18, 19, 18, 24, 18},
+ {25, 24, 32, 24, 25, 24, 32, 24},
+ {19, 18, 24, 18, 19, 18, 24, 18}
+ },
+ {
+ {22, 21, 28, 21, 22, 21, 28, 21},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {22, 21, 28, 21, 22, 21, 28, 21},
+ {21, 19, 26, 19, 21, 19, 26, 19},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {21, 19, 26, 19, 21, 19, 26, 19}
+ },
+ {
+ {26, 24, 33, 24, 26, 24, 33, 24},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {33, 31, 42, 31, 33, 31, 42, 31},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {26, 24, 33, 24, 26, 24, 33, 24},
+ {24, 23, 31, 23, 24, 23, 31, 23},
+ {33, 31, 42, 31, 33, 31, 42, 31},
+ {24, 23, 31, 23, 24, 23, 31, 23}
+ },
+ {
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {35, 33, 45, 33, 35, 33, 45, 33},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {28, 26, 35, 26, 28, 26, 35, 26},
+ {26, 25, 33, 25, 26, 25, 33, 25},
+ {35, 33, 45, 33, 35, 33, 45, 33},
+ {26, 25, 33, 25, 26, 25, 33, 25}
+ },
+ {
+ {32, 30, 40, 30, 32, 30, 40, 30},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {40, 38, 51, 38, 40, 38, 51, 38},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {32, 30, 40, 30, 32, 30, 40, 30},
+ {30, 28, 38, 28, 30, 28, 38, 28},
+ {40, 38, 51, 38, 40, 38, 51, 38},
+ {30, 28, 38, 28, 30, 28, 38, 28}
+ },
+ {
+ {36, 34, 46, 34, 36, 34, 46, 34},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {46, 43, 58, 43, 46, 43, 58, 43},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {36, 34, 46, 34, 36, 34, 46, 34},
+ {34, 32, 43, 32, 34, 32, 43, 32},
+ {46, 43, 58, 43, 46, 43, 58, 43},
+ {34, 32, 43, 32, 34, 32, 43, 32}
+ }
+};
//cache element equal to 30
const uint8_t g_kuiCache30ScanIdx[16] = { //mv or uiRefIndex cache scan index, 4*4 block as basic unit
@@ -170,6 +232,113 @@
/*46*/{ 2048, 2560, 2048, 2560, 2560, 3200, 2560, 3200 }, /*47*/{ 2304, 2944, 2304, 2944, 2944, 3712, 2944, 3712 },
/*48*/{ 2560, 3328, 2560, 3328, 3328, 4096, 3328, 4096 }, /*49*/{ 2816, 3584, 2816, 3584, 3584, 4608, 3584, 4608 },
/*50*/{ 3328, 4096, 3328, 4096, 4096, 5120, 4096, 5120 }, /*51*/{ 3584, 4608, 3584, 4608, 4608, 5888, 4608, 5888 },
+};
+
+ALIGNED_DECLARE (const uint16_t, g_kuiDequantCoeff8x8[52][64], 16) = {
+/* QP == 0 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 1 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 2 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 3 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 4 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 5 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 6 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 7 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 8 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 9 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 10 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 11 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 12 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 13 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 14 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 15 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 16 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 17 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 18 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 19 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 20 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 21 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 22 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 23 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 24 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 25 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 26 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 27 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 28 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 29 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 30 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 31 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 32 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 33 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 34 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 35 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 36 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 37 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 38 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 39 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 40 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 41 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 42 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 43 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 44 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 45 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
+/* QP == 46 */
+{ 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448, 512, 480, 640, 480, 512, 480, 640, 480, 480, 448, 608, 448, 480, 448, 608, 448, 640, 608, 816, 608, 640, 608, 816, 608, 480, 448, 608, 448, 480, 448, 608, 448 },
+/* QP == 47 */
+{ 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512, 576, 544, 736, 544, 576, 544, 736, 544, 544, 512, 688, 512, 544, 512, 688, 512, 736, 688, 928, 688, 736, 688, 928, 688, 544, 512, 688, 512, 544, 512, 688, 512 },
+/* QP == 48 */
+{ 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288, 320, 304, 400, 304, 320, 304, 400, 304, 304, 288, 384, 288, 304, 288, 384, 288, 400, 384, 512, 384, 400, 384, 512, 384, 304, 288, 384, 288, 304, 288, 384, 288 },
+/* QP == 49 */
+{ 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304, 352, 336, 448, 336, 352, 336, 448, 336, 336, 304, 416, 304, 336, 304, 416, 304, 448, 416, 560, 416, 448, 416, 560, 416, 336, 304, 416, 304, 336, 304, 416, 304 },
+/* QP == 50 */
+{ 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368, 416, 384, 528, 384, 416, 384, 528, 384, 384, 368, 496, 368, 384, 368, 496, 368, 528, 496, 672, 496, 528, 496, 672, 496, 384, 368, 496, 368, 384, 368, 496, 368 },
+/* QP == 51 */
+{ 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400, 448, 416, 560, 416, 448, 416, 560, 416, 416, 400, 528, 400, 416, 400, 528, 400, 560, 528, 720, 528, 560, 528, 720, 528, 416, 400, 528, 400, 416, 400, 528, 400 },
};
// table A-1 - Level limits
--- a/codec/decoder/core/inc/dec_frame.h
+++ b/codec/decoder/core/inc/dec_frame.h
@@ -68,10 +68,12 @@
int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
+ bool* pNoSubMbPartSizeLessThan8x8Flag;
+ bool* pTransformSize8x8Flag;
int8_t* pLumaQp;
int8_t (*pChromaQp)[2];
int8_t* pCbp;
- uint8_t *pCbfDc;
+ uint16_t *pCbfDc;
int8_t (*pNzc)[24];
int8_t (*pNzcRs)[24];
int8_t* pResidualPredFlag;
@@ -81,6 +83,7 @@
int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM];
+ uint8_t *pIntraNxNAvailFlag;
int8_t* pChromaPredMode;
//uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8
int8_t (*pSubMbType)[MB_SUB_PARTITION_SIZE];
@@ -132,7 +135,6 @@
int8_t* pCbp;
int8_t (*pNzc)[24];
int8_t (*pIntraPredMode)[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
-
int32_t iMbX;
int32_t iMbY;
int32_t iMbXyIndex;
--- a/codec/decoder/core/inc/dec_golomb.h
+++ b/codec/decoder/core/inc/dec_golomb.h
@@ -232,6 +232,18 @@
return 0;
}
+
+/*
+ * Check whether there is more rbsp data for processing
+ */
+static inline bool CheckMoreRBSPData(PBitStringAux pBsAux) {
+ if ((pBsAux->iBits - ((pBsAux->pCurBuf - pBsAux->pStartBuf - 2) << 3) - pBsAux->iLeftBits) > 1) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
//define macros to check syntax elements
#define WELS_CHECK_SE_BOTH_ERROR(val, lower_bound, upper_bound, syntax_name, ret_code) do {\
if ((val < lower_bound) || (val > upper_bound)) {\
--- a/codec/decoder/core/inc/decode_mb_aux.h
+++ b/codec/decoder/core/inc/decode_mb_aux.h
@@ -39,6 +39,7 @@
namespace WelsDec {
void IdctResAddPred_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
+void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs);
#if defined(__cplusplus)
extern "C" {
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -91,8 +91,15 @@
#define NEW_CTX_OFFSET_LAST 166
#define NEW_CTX_OFFSET_ONE 227
#define NEW_CTX_OFFSET_ABS 232
+#define NEW_CTX_OFFSET_TS_8x8_FLAG 399
#define CTX_NUM_MVD 7
#define CTX_NUM_CBP 4
+// Table 9-34 in Page 270
+#define NEW_CTX_OFFSET_TRANSFORM_SIZE_8X8_FLAG 399
+#define NEW_CTX_OFFSET_MAP_8x8 402
+#define NEW_CTX_OFFSET_LAST_8x8 417
+#define NEW_CTX_OFFSET_ONE_8x8 426
+#define NEW_CTX_OFFSET_ABS_8x8 431 // Puzzle, where is the definition?
typedef struct TagDataBuffer {
uint8_t* pHead;
@@ -131,6 +138,8 @@
typedef void (*PExpandPictureFunc) (uint8_t* pDst, const int32_t kiStride, const int32_t kiPicWidth,
const int32_t kiPicHeight);
+typedef void (*PGetIntraPred8x8Func) (uint8_t* pPred, const int32_t kiLumaStride, bool bTLAvail, bool bTRAvail);
+
/**/
typedef struct TagRefPic {
PPicture pRefList[LIST_A][MAX_REF_PIC_COUNT]; // reference picture marking plus FIFO scheme
@@ -262,15 +271,18 @@
int16_t* pMbType[LAYER_NUM_EXCHANGEABLE]; /* mb type */
int16_t (*pMv[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
int8_t (*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM];
+ bool* pNoSubMbPartSizeLessThan8x8Flag[LAYER_NUM_EXCHANGEABLE];
+ bool* pTransformSize8x8Flag[LAYER_NUM_EXCHANGEABLE];
int8_t* pLumaQp[LAYER_NUM_EXCHANGEABLE]; /*mb luma_qp*/
int8_t (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2]; /*mb chroma_qp*/
int16_t (*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE MB_BLOCK4x4_NUM*]
- uint8_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
+ uint16_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
int8_t (*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
int8_t (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/
int8_t (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
int8_t (*pIntra4x4FinalMode[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
+ uint8_t* pIntraNxNAvailFlag[LAYER_NUM_EXCHANGEABLE];
int8_t* pChromaPredMode[LAYER_NUM_EXCHANGEABLE];
int8_t* pCbp[LAYER_NUM_EXCHANGEABLE];
uint8_t (*pMotionPredFlag[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_PARTITION_SIZE]; // 8x8
@@ -284,7 +296,6 @@
uint32_t iMbHeight;
} sMb;
-
// reconstruction picture
PPicture pDec; //pointer to current picture being reconstructed
@@ -381,6 +392,9 @@
PGetIntraPredFunc pGetIChromaPredFunc[7]; // h264_predict_8x8_t
PIdctResAddPredFunc pIdctResAddPredFunc;
SMcFunc sMcFunc;
+ //Transform8x8
+ PGetIntraPred8x8Func pGetI8x8LumaPredFunc[14];
+ PIdctResAddPredFunc pIdctResAddPredFunc8x8;
//For error concealment
SCopyFunc sCopyFunc;
@@ -395,8 +409,8 @@
int32_t iCurSeqIntervalMaxPicWidth;
int32_t iCurSeqIntervalMaxPicHeight;
- PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntra4x4Func;
- PWelsMapNeighToSample pMap4x4NeighToSampleFunc;
+ PWelsFillNeighborMbInfoIntra4x4Func pFillInfoCacheIntraNxNFunc;
+ PWelsMapNeighToSample pMapNxNNeighToSampleFunc;
PWelsMap16NeighToSample pMap16x16NeighToSampleFunc;
//feedback whether or not have VCL in current AU, and the temporal ID
--- a/codec/decoder/core/inc/get_intra_predictor.h
+++ b/codec/decoder/core/inc/get_intra_predictor.h
@@ -60,6 +60,21 @@
void WelsI4x4LumaPredHU_c (uint8_t* pPred, const int32_t kiStride);
void WelsI4x4LumaPredHD_c (uint8_t* pPred, const int32_t kiStride);
+void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail);
+
void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride);
void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride);
void WelsIChromaPredPlane_c (uint8_t* pPred, const int32_t kiStride);
@@ -95,8 +110,6 @@
void WelsDecoderIChromaPredV_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderIChromaPredDcLeft_mmx (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderIChromaPredDcNA_mmx (uint8_t* pPred, const int32_t kiStride);
-
-
void WelsDecoderI4x4LumaPredH_sse2 (uint8_t* pPred, const int32_t kiStride);
void WelsDecoderI4x4LumaPredDDR_mmx (uint8_t* pPred, const int32_t kiStride);
--- a/codec/decoder/core/inc/parameter_sets.h
+++ b/codec/decoder/core/inc/parameter_sets.h
@@ -167,12 +167,16 @@
bool bRedundantPicCntPresentFlag;
bool bWeightedPredFlag;
uint8_t uiWeightedBipredIdc;
- bool bTransform_8x8_mode_flag;
+
+ bool bTransform8x8ModeFlag;
//Add for scalinglist support
bool bPicScalingMatrixPresentFlag;
bool bPicScalingListPresentFlag[12];
uint8_t iScalingList4x4[6][16];
uint8_t iScalingList8x8[6][64];
+
+ int32_t iSecondChromaQPIndexOffset; //second_chroma_qp_index_offset
+
} SPps, *PPps;
} // namespace WelsDec
--- a/codec/decoder/core/inc/parse_mb_syn_cabac.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cabac.h
@@ -46,6 +46,7 @@
int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip);
int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
+int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, bool& bTransformSize8x8Flag);
int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType);
int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal);
int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal);
@@ -64,6 +65,9 @@
uint32_t& uiBinVal);
int32_t ParseSignificantCoeffCabac (int32_t* significant, int32_t iResProperty, PWelsDecoderContext pCtx);
int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+ int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
+ PWelsDecoderContext pCtx);
+int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
PWelsDecoderContext pCtx);
int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx);
--- a/codec/decoder/core/inc/parse_mb_syn_cavlc.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cavlc.h
@@ -53,9 +53,9 @@
void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer);
void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
-void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
-void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer);
void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
@@ -64,29 +64,29 @@
/*!
* \brief check iPredMode for intra16x16 eligible or not
- * \param input : current iPredMode
- * \param output: 0 indicating decoding correctly; -1 means error occurence
+ * \param input : current iPredMode
+ * \param output: 0 indicating decoding correctly; -1 means error occurence
*/
int32_t CheckIntra16x16PredMode (uint8_t uiSampleAvail, int8_t* pMode);
/*!
- * \brief check iPredMode for intra4x4 eligible or not
- * \param input : current iPredMode
- * \param output: 0 indicating decoding correctly; -1 means error occurence
+ * \brief check iPredMode for intraNxN eligible or not
+ * \param input : current iPredMode
+ * \param output: 0 indicating decoding correctly; -1 means error occurence
*/
-int32_t CheckIntra4x4PredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex);
+int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8);
/*!
* \brief check iPredMode for chroma eligible or not
- * \param input : current iPredMode
- * \param output: 0 indicating decoding correctly; -1 means error occurence
+ * \param input : current iPredMode
+ * \param output: 0 indicating decoding correctly; -1 means error occurence
*/
int32_t CheckIntraChromaPredMode (uint8_t uiSampleAvail, int8_t* pMode);
/*!
* \brief predict the mode of intra4x4
- * \param input : current intra4x4 block index
- * \param output: mode index
+ * \param input : current intra4x4 block index
+ * \param output: mode index
*/
int32_t PredIntra4x4Mode (int8_t* pIntraPredMode, int32_t iIdx4);
@@ -107,10 +107,25 @@
uint8_t uiQp,
PWelsDecoderContext pCtx);
+// Transform8x8
+int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable,
+ uint8_t* pNonZeroCountCache,
+ PBitStringAux pBs,
+ /*int16_t* coeff_level,*/
+ int32_t iIndex,
+ int32_t iMaxNumCoeff,
+ const uint8_t* kpZigzagTable,
+ int32_t iResidualProperty,
+ /*short *tCoeffLevel,*/
+ int16_t* pTCoeff,
+ int32_t iIdx4x4,
+ uint8_t uiQp,
+ PWelsDecoderContext pCtx);
+
/*!
* \brief parsing inter info (including ref_index and pMvd)
- * \param input : decoding context, current mb, bit-stream
- * \param output: 0 indicating decoding correctly; -1 means error
+ * \param input : decoding context, current mb, bit-stream
+ * \param output: 0 indicating decoding correctly; -1 means error
*/
int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PBitStringAux pBs);
--- a/codec/decoder/core/inc/rec_mb.h
+++ b/codec/decoder/core/inc/rec_mb.h
@@ -78,6 +78,10 @@
int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
+int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
+
+int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
+
int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
--- a/codec/decoder/core/inc/wels_common_basis.h
+++ b/codec/decoder/core/inc/wels_common_basis.h
@@ -72,16 +72,19 @@
#define LUMA_DC_AC 3
#define CHROMA_DC 4
#define CHROMA_AC 5
-#define CHROMA_DC_U 6
-#define CHROMA_DC_V 7
-#define CHROMA_AC_U 8
-#define CHROMA_AC_V 9
-#define LUMA_DC_AC_INTRA 10
-#define LUMA_DC_AC_INTER 11
-#define CHROMA_DC_U_INTER 12
-#define CHROMA_DC_V_INTER 13
-#define CHROMA_AC_U_INTER 14
-#define CHROMA_AC_V_INTER 15
+#define LUMA_DC_AC_8 6
+#define CHROMA_DC_U 7
+#define CHROMA_DC_V 8
+#define CHROMA_AC_U 9
+#define CHROMA_AC_V 10
+#define LUMA_DC_AC_INTRA 11
+#define LUMA_DC_AC_INTER 12
+#define CHROMA_DC_U_INTER 13
+#define CHROMA_DC_V_INTER 14
+#define CHROMA_AC_U_INTER 15
+#define CHROMA_AC_V_INTER 16
+#define LUMA_DC_AC_INTRA_8 17
+#define LUMA_DC_AC_INTER_8 18
#define SHIFT_BUFFER(pBitsCache) { pBitsCache->pBuf+=2; pBitsCache->uiRemainBits += 16; pBitsCache->uiCache32Bit |= (((pBitsCache->pBuf[2] << 8) | pBitsCache->pBuf[3]) << (32 - pBitsCache->uiRemainBits)); }
#define POP_BUFFER(pBitsCache, iCount) { pBitsCache->uiCache32Bit <<= iCount; pBitsCache->uiRemainBits -= iCount; }
@@ -93,7 +96,39 @@
7, 11, 14, 15,
};
+static const uint8_t g_kuiZigzagScan8x8[64] = { //8x8 block residual zig-zag scan order
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+};
+static const uint8_t g_kuiIdx2CtxSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289
+ 0, 1, 2, 3, 4, 5, 5, 4,
+ 4, 3, 3, 4, 4, 4, 5, 5,
+ 4, 4, 4, 4, 3, 3, 6, 7,
+ 7, 7, 8, 9, 10, 9, 8, 7,
+ 7, 6, 11, 12, 13, 11, 6, 7,
+ 8, 9, 14, 10, 9, 8, 6, 11,
+ 12, 13, 11, 6, 9, 14, 10, 9,
+ 11, 12, 13, 11 ,14, 10, 12, 14,
+};
+
+static const uint8_t g_kuiIdx2CtxLastSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289
+ 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 6, 6, 6, 6,
+ 7, 7, 7, 7, 8, 8, 8, 8,
+};
+
static inline void GetMbResProperty(int32_t * pMBproperty,int32_t* pResidualProperty,bool bCavlc)
{
switch(*pResidualProperty)
@@ -142,8 +177,17 @@
break;
case CHROMA_AC_V_INTER:
*pMBproperty = 5;
- *pResidualProperty = bCavlc ?CHROMA_AC:CHROMA_AC_V;
+ *pResidualProperty = bCavlc ? CHROMA_AC : CHROMA_AC_V;
break;
+ // Reference to Table 7-2
+ case LUMA_DC_AC_INTRA_8:
+ *pMBproperty = 6;
+ *pResidualProperty = LUMA_DC_AC_8;
+ break;
+ case LUMA_DC_AC_INTER_8:
+ *pMBproperty = 7;
+ *pResidualProperty = LUMA_DC_AC_8;
+ break;
}
}
--- a/codec/decoder/core/src/au_parser.cpp
+++ b/codec/decoder/core/src/au_parser.cpp
@@ -124,6 +124,7 @@
uiBsZero = pSrcRbsp[iIndex];
if (0 == uiBsZero) {
--iNalSize;
+ ++ (*pConsumedBytes);
--iIndex;
} else {
break;
@@ -991,16 +992,10 @@
WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //seq_scaling_matrix_present_flag
pSps->bSeqScalingMatrixPresentFlag = !!uiCode;
- if (pSps->bSeqScalingMatrixPresentFlag)// For high profile, it is not used in current application. FIXME
-
+ if (pSps->bSeqScalingMatrixPresentFlag) {
WELS_READ_VERIFY (ParseScalingList (pSps, pBs, 0, pSps->bSeqScalingListPresentFlag, pSps->iScalingList4x4,
pSps->iScalingList8x8));
- //if exist, to parse scalinglist matrix value
-
- // WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
- // "ParseSps(): seq_scaling_matrix_present_flag (%d). Feature not supported.",
- // pSps->bSeqScalingMatrixPresentFlag);
- //return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_NON_BASELINE);
+ }
}
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //log2_max_frame_num_minus4
WELS_CHECK_SE_UPPER_ERROR (uiCode, SPS_LOG2_MAX_FRAME_NUM_MINUS4_MAX, "log2_max_frame_num_minus4",
@@ -1379,28 +1374,27 @@
pPps->bConstainedIntraPredFlag = !!uiCode;
WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //redundant_pic_cnt_present_flag
pPps->bRedundantPicCntPresentFlag = !!uiCode;
- /*TODO: to judge whether going on to parse*/
-//going on to parse high profile syntax, need fix me
- if (0) {
- WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode));
- pPps->bTransform_8x8_mode_flag = !!uiCode;
- WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode));
+
+ if (CheckMoreRBSPData (pBsAux)) {
+ WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //transform_8x8_mode_flag
+ pPps->bTransform8x8ModeFlag = !!uiCode;
+ WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_scaling_matrix_present_flag
pPps->bPicScalingMatrixPresentFlag = !!uiCode;
if (pPps->bPicScalingMatrixPresentFlag) {
- if (pCtx->bSpsAvailFlags[pPps->iSpsId])
+ if (pCtx->bSpsAvailFlags[pPps->iSpsId]) {
WELS_READ_VERIFY (ParseScalingList (&pCtx->sSpsBuffer[pPps->iSpsId], pBsAux, 1, pPps->bPicScalingListPresentFlag,
pPps->iScalingList4x4, pPps->iScalingList8x8));
- else {
+ } else {
pCtx->bSpsLatePps = true;
WELS_READ_VERIFY (ParseScalingList (NULL, pBsAux, 1, pPps->bPicScalingListPresentFlag, pPps->iScalingList4x4,
pPps->iScalingList8x8));
}
}
- //add second chroma qp parsing process
- WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //chroma_qp_index_offset,cr
- pPps->iChromaQpIndexOffset[1] = iCode;
- WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[1], PPS_CHROMA_QP_INDEX_OFFSET_MIN, PPS_CHROMA_QP_INDEX_OFFSET_MAX,
- "second_chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET));
+ WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode)); //second_chroma_qp_index_offset
+ pPps->iChromaQpIndexOffset[1] = iCode;
+ WELS_CHECK_SE_BOTH_ERROR (pPps->iChromaQpIndexOffset[1], PPS_CHROMA_QP_INDEX_OFFSET_MIN,
+ PPS_CHROMA_QP_INDEX_OFFSET_MAX, "chroma_qp_index_offset", GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS,
+ ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET));
}
if (pCtx->pAccessUnitList->uiAvailUnitsNum > 0) {
@@ -1481,6 +1475,7 @@
int iNextScale = 8;
int iDeltaScale;
int32_t iCode;
+ int32_t iIdx;
for (int j = 0; j < iScalingListNum; j++) {
if (iNextScale != 0) {
WELS_READ_VERIFY (BsGetSe (pBsAux, &iCode));
@@ -1492,8 +1487,9 @@
if (*bUseDefaultScalingMatrixFlag)
break;
}
- pScalingList[g_kuiZigzagScan[j]] = (iNextScale == 0) ? iLastScale : iNextScale;
- iLastScale = pScalingList[g_kuiZigzagScan[j]];
+ iIdx = iScalingListNum == 16 ? g_kuiZigzagScan[j] : g_kuiZigzagScan8x8[j];
+ pScalingList[iIdx] = (iNextScale == 0) ? iLastScale : iNextScale;
+ iLastScale = pScalingList[iIdx];
}
--- a/codec/decoder/core/src/cabac_decoder.cpp
+++ b/codec/decoder/core/src/cabac_decoder.cpp
@@ -74,7 +74,7 @@
uint8_t* pCurr;
pCurr = pBsAux->pCurBuf - iRemainingBytes;
- if(pCurr >= (pBsAux->pEndBuf - 1)) {
+ if (pCurr >= (pBsAux->pEndBuf - 1)) {
return ERR_INFO_INVALID_ACCESS;
}
pDecEngine->uiOffset = ((pCurr[0] << 16) | (pCurr[1] << 8) | pCurr[2]);
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -136,6 +136,19 @@
},
};
+static const uint8_t g_kuiTableB8x8Idx[2][16] = {
+ {
+ 0, 1, 4, 5, 8, 9, 12, 13, // 0 1 | 2 3
+ 2, 3, 6, 7, 10, 11, 14, 15 // 4 5 | 6 7
+ }, // ------------
+ // 8 9 | 10 11
+ {
+ // 12 13 | 14 15
+ 0, 1, 4, 5, 2, 3, 6, 7,
+ 8, 9, 12, 13, 10, 11, 14, 15
+ },
+};
+
#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
{\
tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\
@@ -170,7 +183,22 @@
nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
* (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
+}
+void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
+ int8_t i8x8NnzTab[4];
+ for (int32_t i = 0; i < 4; i++) {
+ int32_t iBlkIdx = i << 2;
+ i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
+ pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
+ }
+
+ //vertical
+ nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
+ nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
+ //horizontal
+ nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
+ nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
}
void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
@@ -179,73 +207,148 @@
int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
- uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
- uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
- uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
- uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
+ int8_t i8x8NnzTab[4];
- for (int i = 0; i < 3; i++)
- uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
- nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
- nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
- nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (int32_t i = 0; i < 4; i++) {
+ int32_t iBlkIdx = i << 2;
+ i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
+ pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
+ }
+ //vertical
+ nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
+ nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
- for (int i = 0; i < 3; i++)
- uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
- nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
- nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
- nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
+ //horizontal
+ nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
+ nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+ g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
+ } else {
+ uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
+ uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
+ uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
+ uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
- for (int i = 0; i < 3; i++)
- uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
- nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
- nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
- nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
+ for (int i = 0; i < 3; i++)
+ uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
+ nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
+ nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
+ nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
- for (int i = 0; i < 3; i++)
- uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
- nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
- nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
- nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
+ for (int i = 0; i < 3; i++)
+ uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
+ nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
+ nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
+ nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
- // horizontal
- * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
- nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
- nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
- nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
- nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
+ for (int i = 0; i < 3; i++)
+ uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
+ nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
+ nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
+ nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
- * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
- nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
- nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
- nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
- nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
+ for (int i = 0; i < 3; i++)
+ uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
+ nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
+ nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
+ nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
- * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
- nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
- nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
- nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
- nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
+ // horizontal
+ * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
+ nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
+ nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
+ nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
+ nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
+
+ * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
+ nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
+ nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
+ nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
+ nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
+
+ * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
+ nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
+ nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
+ nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
+ nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
+ }
}
uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
- int32_t i;
+ int32_t i, j;
uint32_t uiBSx4;
- //uint8_t* bS = static_cast<uint8_t*>(&uiBSx4);
uint8_t* pBS = (uint8_t*) (&uiBSx4);
- const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
- const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
+ const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
+ const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
+ const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
+ const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
- for (i = 0; i < 4; i++) {
- if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
- pBS[i] = 2;
- } else {
- pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
- *pBnIdx);
+ if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+ for (i = 0; i < 2; i++) {
+ uint8_t uiNzc = 0;
+ for (j = 0; uiNzc == 0 && j < 4; j++) {
+ uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+ }
+ if (uiNzc) {
+ pBS[i << 1] = pBS[1 + (i << 1)] = 2;
+ } else {
+ pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
+ *pB8x8Idx, *pBn8x8Idx);
+ }
+ pB8x8Idx += 4;
+ pBn8x8Idx += 4;
}
- pBIdx++;
- pBnIdx++;
+ } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (i = 0; i < 2; i++) {
+ uint8_t uiNzc = 0;
+ for (j = 0; uiNzc == 0 && j < 4; j++) {
+ uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+ }
+ for (j = 0; j < 2; j++) {
+ if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ pBS[j + (i << 1)] = 2;
+ } else {
+ pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
+ *pBnIdx);
+ }
+ pBnIdx++;
+ }
+ pB8x8Idx += 4;
+ }
+ } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+ for (i = 0; i < 2; i++) {
+ uint8_t uiNzc = 0;
+ for (j = 0; uiNzc == 0 && j < 4; j++) {
+ uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+ }
+ for (j = 0; j < 2; j++) {
+ if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+ pBS[j + (i << 1)] = 2;
+ } else {
+ pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
+ *pBn8x8Idx);
+ }
+ pBIdx++;
+ }
+ pBn8x8Idx += 4;
+ }
+ } else {
+ // only 4x4 transform
+ for (i = 0; i < 4; i++) {
+ if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+ pBS[i] = 2;
+ } else {
+ pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
+ *pBnIdx);
+ }
+ pBIdx++;
+ pBnIdx++;
+ }
}
+
return uiBSx4;
}
int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
@@ -501,7 +604,7 @@
pFilter->iChromaQP[0] = pCurChromaQp[0];
pFilter->iChromaQP[1] = pCurChromaQp[1];
- if (* (uint32_t*)nBS[0][1] != 0) {
+ if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
}
@@ -510,7 +613,7 @@
FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
}
- if (* (uint32_t*)nBS[0][3] != 0) {
+ if (* (uint32_t*)nBS[0][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
}
@@ -536,7 +639,7 @@
pFilter->iChromaQP[0] = pCurChromaQp[0];
pFilter->iChromaQP[1] = pCurChromaQp[1];
- if (* (uint32_t*)nBS[1][1] != 0) {
+ if (* (uint32_t*)nBS[1][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
}
@@ -546,7 +649,7 @@
nBS[1][2]);
}
- if (* (uint32_t*)nBS[1][3] != 0) {
+ if (* (uint32_t*)nBS[1][3] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
}
}
@@ -581,9 +684,16 @@
iBeta);
if (iAlpha | iBeta) {
TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
- pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
+
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+ pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
+ }
+
pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
- pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
+
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+ pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
+ }
}
// luma h
@@ -594,9 +704,15 @@
pFilter->iLumaQP = iCurQp;
if (iAlpha | iBeta) {
- pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+ pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+ }
+
pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
- pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+
+ if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+ pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+ }
}
}
void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter pFilter, int32_t iBoundryFlag) {
@@ -705,6 +821,7 @@
switch (iCurMbType) {
case MB_TYPE_INTRA4x4:
+ case MB_TYPE_INTRA8x8:
case MB_TYPE_INTRA16x16:
case MB_TYPE_INTRA_PCM:
DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
@@ -728,7 +845,11 @@
//SKIP MB_16x16 or others
if (iCurMbType != MB_TYPE_SKIP) {
if (iCurMbType == MB_TYPE_16x16) {
- DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+ if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
+ DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+ } else {
+ DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+ }
} else {
DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
}
@@ -839,8 +960,8 @@
if (iCpu & WELS_CPU_SSSE3) {
pFunc->pfLumaDeblockingLT4Ver = DeblockLumaLt4V_ssse3;
pFunc->pfLumaDeblockingEQ4Ver = DeblockLumaEq4V_ssse3;
- pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
- pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
+ pFunc->pfLumaDeblockingLT4Hor = DeblockLumaLt4H_ssse3;
+ pFunc->pfLumaDeblockingEQ4Hor = DeblockLumaEq4H_ssse3;
pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
--- a/codec/decoder/core/src/decode_mb_aux.cpp
+++ b/codec/decoder/core/src/decode_mb_aux.cpp
@@ -76,6 +76,96 @@
}
}
+void IdctResAddPred8x8_c (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
+ // To make the ASM code easy to write, should using one funciton to apply hor and ver together, such as we did on HEVC
+ // Ugly code, just for easy debug, the final version need optimization
+ int16_t p[8], b[8];
+ int16_t a[4];
+
+ int16_t iTmp[64];
+ int16_t iRes[64];
+
+ // Horizontal
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ p[j] = pRs[j + (i << 3)];
+ }
+ a[0] = p[0] + p[4];
+ a[1] = p[0] - p[4];
+ a[2] = p[6] - (p[2] >> 1);
+ a[3] = p[2] + (p[6] >> 1);
+
+ b[0] = a[0] + a[3];
+ b[2] = a[1] - a[2];
+ b[4] = a[1] + a[2];
+ b[6] = a[0] - a[3];
+
+ a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
+ a[1] = p[1] + p[7] - p[3] - (p[3] >> 1);
+ a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
+ a[3] = p[3] + p[5] + p[1] + (p[1] >> 1);
+
+ b[1] = a[0] + (a[3] >> 2);
+ b[3] = a[1] + (a[2] >> 2);
+ b[5] = a[2] - (a[1] >> 2);
+ b[7] = a[3] - (a[0] >> 2);
+
+ iTmp[0 + (i << 3)] = b[0] + b[7];
+ iTmp[1 + (i << 3)] = b[2] - b[5];
+ iTmp[2 + (i << 3)] = b[4] + b[3];
+ iTmp[3 + (i << 3)] = b[6] + b[1];
+ iTmp[4 + (i << 3)] = b[6] - b[1];
+ iTmp[5 + (i << 3)] = b[4] - b[3];
+ iTmp[6 + (i << 3)] = b[2] + b[5];
+ iTmp[7 + (i << 3)] = b[0] - b[7];
+ }
+
+ //Vertical
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ p[j] = iTmp[i + (j << 3)];
+ }
+
+ a[0] = p[0] + p[4];
+ a[1] = p[0] - p[4];
+ a[2] = p[6] - (p[2] >> 1);
+ a[3] = p[2] + (p[6] >> 1);
+
+ b[0] = a[0] + a[3];
+ b[2] = a[1] - a[2];
+ b[4] = a[1] + a[2];
+ b[6] = a[0] - a[3];
+
+ a[0] = -p[3] + p[5] - p[7] - (p[7] >> 1);
+ a[1] = p[1] + p[7] - p[3] - (p[3] >> 1);
+ a[2] = -p[1] + p[7] + p[5] + (p[5] >> 1);
+ a[3] = p[3] + p[5] + p[1] + (p[1] >> 1);
+
+
+ b[1] = a[0] + (a[3] >> 2);
+ b[7] = a[3] - (a[0] >> 2);
+ b[3] = a[1] + (a[2] >> 2);
+ b[5] = a[2] - (a[1] >> 2);
+
+ iRes[ (0 << 3) + i] = b[0] + b[7];
+ iRes[ (1 << 3) + i] = b[2] - b[5];
+ iRes[ (2 << 3) + i] = b[4] + b[3];
+ iRes[ (3 << 3) + i] = b[6] + b[1];
+ iRes[ (4 << 3) + i] = b[6] - b[1];
+ iRes[ (5 << 3) + i] = b[4] - b[3];
+ iRes[ (6 << 3) + i] = b[2] + b[5];
+ iRes[ (7 << 3) + i] = b[0] - b[7];
+ }
+
+ uint8_t* pDst = pPred;
+ for (int i = 0; i < 8; i++) {
+ for (int j = 0; j < 8; j++) {
+ pDst[i * kiStride + j] = WelsClip1 (((32 + iRes[ (i << 3) + j]) >> 6) + pDst[i * kiStride + j]);
+ }
+ }
+
+}
+
void GetI4LumaIChromaAddrTable (int32_t* pBlockOffset, const int32_t kiYStride, const int32_t kiUVStride) {
int32_t* pOffset = pBlockOffset;
int32_t i;
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -144,7 +144,6 @@
return 0;//NO_SUPPORTED_FILTER_IDX
} else {
WelsDeblockingFilterSlice (pCtx, pDeblockMb);
-
}
// any other filter_idc not supported here, 7/22/2010
@@ -159,12 +158,23 @@
WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 256); // 256 = 16*16
WelsChromaDcIdct (pCurLayer->pScaledTCoeff[iMbXy] + 320); // 320 = 16*16 + 16*4
- for (i = 0; i < 16; i++) { //luma
- iIndex = g_kuiMbCountScan4Idx[i];
- if (pCurLayer->pNzc[iMbXy][iIndex]) {
- iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
- pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4));
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (i = 0; i < 4; i++) {
+ iIndex = g_kuiMbCountScan4Idx[i << 2];
+ if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
+ || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
+ iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
+ pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
+ }
}
+ } else {
+ for (i = 0; i < 16; i++) { //luma
+ iIndex = g_kuiMbCountScan4Idx[i];
+ if (pCurLayer->pNzc[iMbXy][iIndex]) {
+ iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
+ pCtx->pIdctResAddPredFunc (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4));
+ }
+ }
}
for (i = 0; i < 4; i++) { //chroma
@@ -258,6 +268,10 @@
return 0;
}
+ if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
+ RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+ }
+
if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy]))
RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
@@ -326,7 +340,7 @@
pBlk[iStride1] = (iE - iB) >> 1;
}
-void WelsMap4x4NeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
if (pNeighAvail->iLeftAvail) { //left
pSampleAvail[ 6] =
pSampleAvail[12] =
@@ -347,7 +361,7 @@
}
}
-void WelsMap4x4NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) { //left
pSampleAvail[ 6] =
pSampleAvail[12] =
@@ -401,7 +415,7 @@
uint8_t uiNeighAvail = 0;
uint32_t uiCode;
int32_t iCode;
- pCtx->pMap4x4NeighToSampleFunc (pNeighAvail, iSampleAvail);
+ pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
for (i = 0; i < 16; i++) {
int32_t iPrevIntra4x4PredMode = 0;
@@ -429,7 +443,7 @@
}
}
- iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
+ iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
if (iFinalMode == ERR_INVALID_INTRA4X4_MODE) {
return ERR_INFO_INVALID_I4x4_PRED_MODE;
}
@@ -469,6 +483,87 @@
return ERR_NONE;
}
+int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+ PBitStringAux pBs,
+ PDqLayer pCurDqLayer) {
+ // Similar with Intra_4x4, can put them together when needed
+ int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+ int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+ int32_t iFinalMode, i;
+
+ uint8_t uiNeighAvail = 0;
+ uint32_t uiCode;
+ int32_t iCode;
+ pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
+ // Top-Right : Left : Top-Left : Top
+ uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+
+ pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
+
+ for (i = 0; i < 4; i++) {
+ int32_t iPrevIntra4x4PredMode = 0;
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+ iPrevIntra4x4PredMode = iCode;
+ } else {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+ iPrevIntra4x4PredMode = uiCode;
+ }
+ const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
+
+ int8_t iBestMode;
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ if (iPrevIntra4x4PredMode == -1)
+ iBestMode = kiPredMode;
+ else
+ iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+ } else {
+ if (iPrevIntra4x4PredMode) {
+ iBestMode = kiPredMode;
+ } else {
+ WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+ iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+ }
+ }
+
+ iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
+
+ if (iFinalMode == ERR_INVALID_INTRA4X4_MODE) {
+ return ERR_INFO_INVALID_I4x4_PRED_MODE;
+ }
+
+ for (int j = 0; j < 4; j++) {
+ pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
+ pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
+ iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
+ }
+ }
+ ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+ pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+ pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+ pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+ if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+ WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+ if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+ } else {
+ WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+ if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+ pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+ }
+
+ if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+ || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+ return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+ }
+
+ return ERR_NONE;
+}
+
int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
PDqLayer pCurDqLayer) {
int32_t iMbXy = pCurDqLayer->iMbXyIndex;
@@ -519,6 +614,9 @@
ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
@@ -539,8 +637,18 @@
} else if (0 == uiMbType) { //I4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
- pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ if (pCtx->pPps->bTransform8x8ModeFlag) {
+ // Transform 8x8 cabac will be added soon
+ WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+ }
+ if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ }
//get uiCbp for I4x4
WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
pCurLayer->pCbp[iMbXy] = uiCbp;
@@ -549,6 +657,8 @@
uiCbpLuma = uiCbp & 15;
} else { //I16x16;
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
@@ -565,7 +675,7 @@
ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
pCurLayer->pCbfDc[iMbXy] = 0;
- if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+ if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
for (i = 0; i < 2; i++) {
pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
@@ -608,26 +718,43 @@
ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
- for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- if (uiCbpLuma & (1 << iId8x8)) {
- int32_t iIdx = (iId8x8 << 2);
- for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
- //Luma (DC and AC decoding together)
- WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
- pCurLayer->pLumaQp[iMbXy],
- pCtx));
- iIdx++;
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ // Transform 8x8 support for CABAC
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
- } else {
- ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
- ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else {
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ int32_t iIdx = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+ pCurLayer->pLumaQp[iMbXy], pCtx));
+ iIdx++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
int32_t iMbResProperty;
//chroma
@@ -730,10 +857,21 @@
if (0 == uiMbType) { //Intra4x4
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
- pCtx->pFillInfoCacheIntra4x4Func (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ if (pCtx->pPps->bTransform8x8ModeFlag) {
+ WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+ }
+ if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+ }
} else { //Intra16x16
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@@ -761,6 +899,23 @@
}
if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+
+ if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+ // Need modification when B picutre add in
+ bool bNeedParseTransformSize8x8Flag =
+ (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+ || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+ && (pCtx->pPps->bTransform8x8ModeFlag));
+
+ if (bNeedParseTransformSize8x8Flag) {
+ WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
+ pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
+ }
+ }
+
memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
int32_t iQpDelta, iId8x8, iId4x4;
@@ -798,27 +953,46 @@
ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
}
} else { //non-MB_TYPE_INTRA16x16
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
- for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- if (uiCbpLuma & (1 << iId8x8)) {
- int32_t iIdx = (iId8x8 << 2);
- for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
- //Luma (DC and AC decoding together)
- WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
- g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
- pCurLayer->pLumaQp[iMbXy],
- pCtx));
- iIdx++;
+ if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+ // Transform 8x8 support for CABAC
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
+ IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
- } else {
- ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
- ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
}
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else {
+ iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpLuma & (1 << iId8x8)) {
+ int32_t iIdx = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+ g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+ pCurLayer->pLumaQp[iMbXy],
+ pCtx));
+ iIdx++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
- ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
//chroma
@@ -886,8 +1060,12 @@
pCurLayer->pCbfDc[iMbXy] = 0;
pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+
if (uiCode) {
int16_t pMv[2] = {0};
pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
@@ -943,30 +1121,23 @@
//if (!pCtx->pSps->bSeqScalingListPresentFlag[i]) {
if (!pCtx->pPps->bPicScalingListPresentFlag[i]) {
if (i < 6) {
-
-
if (i == 0 || i == 3)
memcpy (pCtx->pPps->iScalingList4x4[i], pCtx->pSps->iScalingList4x4[i], 16 * sizeof (uint8_t));
else
memcpy (pCtx->pPps->iScalingList4x4[i], pCtx->pPps->iScalingList4x4[i - 1], 16 * sizeof (uint8_t));
} else {
-
if (i == 6 || i == 7)
memcpy (pCtx->pPps->iScalingList8x8[ i - 6 ], pCtx->pSps->iScalingList8x8[ i - 6 ], 64 * sizeof (uint8_t));
else
memcpy (pCtx->pPps->iScalingList8x8[ i - 6 ], pCtx->pPps->iScalingList8x8[i - 8], 64 * sizeof (uint8_t));
-
-
}
-
}
}
-
-
}
//Init dequant coeff value for different QP
for (i = 0; i < 6; i++) {
pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
+ pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
for (q = 0; q < 51; q++) {
for (x = 0; x < 16; x++) {
pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
@@ -974,13 +1145,10 @@
}
for (y = 0; y < 64; y++) {
pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
- g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiDequantCoeff[q][x &
- 0x07];//pseudo-code ,holding for 8x8transform into
+ g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
}
}
}
-
-
pCtx->bDequantCoeff4x4Init = true;
pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
}
@@ -1027,12 +1195,12 @@
}
if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
- pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain1Intra4x4;
- pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleConstrain1;
+ pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
+ pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleConstrain1;
pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleConstrain1;
} else {
- pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain0Intra4x4;
- pCtx->pMap4x4NeighToSampleFunc = WelsMap4x4NeighToSampleNormal;
+ pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
+ pCtx->pMapNxNNeighToSampleFunc = WelsMapNxNNeighToSampleNormal;
pCtx->pMap16x16NeighToSampleFunc = WelsMap16x16NeighToSampleNormal;
}
@@ -1117,6 +1285,9 @@
pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
uiMbType = uiCode;
if (uiMbType > 25)
@@ -1178,8 +1349,20 @@
} else if (0 == uiMbType) { //reference to JM
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
- pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ if (pCtx->pPps->bTransform8x8ModeFlag) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ }
+ }
+ if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ }
//uiCbp
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
@@ -1199,6 +1382,8 @@
uiCbpL = uiCbp & 15;
} else { //I_PCM exclude, we can ignore it
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@@ -1266,27 +1451,51 @@
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
- for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- if (uiCbpL & (1 << iId8x8)) {
- int32_t iIndex = (iId8x8 << 2);
- for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
- //Luma (DC and AC decoding together)
- if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
- iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart,
- LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
- return -1;//abnormal
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if (WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;
+ }
+ iIndex++;
}
- iIndex++;
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
- } else {
- ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
- ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else {
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart,
+ LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
- ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
//chroma
@@ -1399,6 +1608,7 @@
int8_t iRefIndex[LIST_A][30];
pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+
if (ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) {
return -1;//abnormal
}
@@ -1484,12 +1694,24 @@
if (0 == uiMbType) {
ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
- pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
- if (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
- return -1;
+ if (pCtx->pPps->bTransform8x8ModeFlag) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+ }
}
+ if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ } else {
+ pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+ WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+ }
} else { //I_PCM exclude, we can ignore it
pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
@@ -1510,7 +1732,7 @@
return ERR_INFO_INVALID_CBP;
if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
return ERR_INFO_INVALID_CBP;
- if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy]) {
+ if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
} else //inter
@@ -1520,6 +1742,20 @@
pCurLayer->pCbp[iMbXy] = uiCbp;
uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+
+ // Need modification when B picutre add in
+ bool bNeedParseTransformSize8x8Flag =
+ (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+ || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+ && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+ && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+ && (uiCbpL > 0)
+ && (pCtx->pPps->bTransform8x8ModeFlag));
+
+ if (bNeedParseTransformSize8x8Flag) {
+ WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+ }
}
ST32A4 (&pNzc[0], 0);
@@ -1577,28 +1813,52 @@
ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
} else { //non-MB_TYPE_INTRA16x16
- for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
- iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
- if (uiCbpL & (1 << iId8x8)) {
- int32_t iIndex = (iId8x8 << 2);
- for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
- //Luma (DC and AC decoding together)
- if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
- iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, iMbResProperty,
- pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
- return -1;//abnormal
+ if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ if (WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4, pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;
+ }
+ iIndex++;
}
- iIndex++;
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
- } else {
- ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
- ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
}
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+ } else { // Normal T4x4
+ for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+ iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+ if (uiCbpL & (1 << iId8x8)) {
+ int32_t iIndex = (iId8x8 << 2);
+ for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+ //Luma (DC and AC decoding together)
+ if (WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex,
+ iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan + iScanIdxStart, iMbResProperty,
+ pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4), pCurLayer->pLumaQp[iMbXy], pCtx)) {
+ return -1;//abnormal
+ }
+ iIndex++;
+ }
+ } else {
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+ ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+ }
+ }
+ ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+ ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+ ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+ ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
- ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
- ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
- ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
- ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
}
@@ -1660,6 +1920,9 @@
int32_t iBaseModeFlag, i;
int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
uint32_t uiCode;
+
+ pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+ pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
if (-1 == pSlice->iMbSkipRun) {
WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -862,6 +862,21 @@
pCtx->pGetI4x4LumaPredFunc[I4_PRED_HU ] = WelsI4x4LumaPredHU_c;
pCtx->pGetI4x4LumaPredFunc[I4_PRED_HD ] = WelsI4x4LumaPredHD_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_V ] = WelsI8x8LumaPredV_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_H ] = WelsI8x8LumaPredH_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC ] = WelsI8x8LumaPredDc_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_L ] = WelsI8x8LumaPredDcLeft_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_T ] = WelsI8x8LumaPredDcTop_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DC_128] = WelsI8x8LumaPredDcNA_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL ] = WelsI8x8LumaPredDDL_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDL_TOP] = WelsI8x8LumaPredDDLTop_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_DDR ] = WelsI8x8LumaPredDDR_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL ] = WelsI8x8LumaPredVL_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_VL_TOP] = WelsI8x8LumaPredVLTop_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_VR ] = WelsI8x8LumaPredVR_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_HU ] = WelsI8x8LumaPredHU_c;
+ pCtx->pGetI8x8LumaPredFunc[I4_PRED_HD ] = WelsI8x8LumaPredHD_c;
+
pCtx->pGetIChromaPredFunc[C_PRED_DC ] = WelsIChromaPredDc_c;
pCtx->pGetIChromaPredFunc[C_PRED_H ] = WelsIChromaPredH_c;
pCtx->pGetIChromaPredFunc[C_PRED_V ] = WelsIChromaPredV_c;
@@ -872,6 +887,8 @@
pCtx->pIdctResAddPredFunc = IdctResAddPred_c;
+ pCtx->pIdctResAddPredFunc8x8 = IdctResAddPred8x8_c;
+
#if defined(HAVE_NEON)
if (pCtx->uiCpuFlag & WELS_CPU_NEON) {
pCtx->pIdctResAddPredFunc = IdctResAddPred_neon;
@@ -931,7 +948,7 @@
if (pCtx->uiCpuFlag & WELS_CPU_MMXEXT) {
pCtx->pIdctResAddPredFunc = IdctResAddPred_mmx;
- /////////mmx code opt---
+ ///////mmx code opt---
pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_mmx;
pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_mmx;
pCtx->pGetIChromaPredFunc[C_PRED_DC_L ] = WelsDecoderIChromaPredDcLeft_mmx;
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -1238,11 +1238,16 @@
int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
pCtx->sMb.pLumaQp[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pLumaQp[]");
+ pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+ bool),
+ "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+ pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+ "pCtx->sMb.pTransformSize8x8Flag[]");
pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 2,
"pCtx->sMb.pChromaQp[]");
pCtx->sMb.pMvd[i][0] = (int16_t (*)[16][2])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
- pCtx->sMb.pCbfDc[i] = (uint8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint8_t),
+ pCtx->sMb.pCbfDc[i] = (uint16_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
"pCtx->sMb.pCbfDc[]");
pCtx->sMb.pNzc[i] = (int8_t (*)[24])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
"pCtx->sMb.pNzc[]");
@@ -1255,6 +1260,8 @@
"pCtx->sMb.pIntraPredMode[]");
pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
+ pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+ "pCtx->sMb.pIntraNxNAvailFlag");
pCtx->sMb.pChromaPredMode[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
"pCtx->sMb.pChromaPredMode[]");
pCtx->sMb.pCbp[i] = (int8_t*)WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
@@ -1341,6 +1348,18 @@
pCtx->sMb.pRefIndex[i][0] = NULL;
}
+ if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
+ WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+
+ pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
+ }
+
+ if (pCtx->sMb.pTransformSize8x8Flag[i]) {
+ WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
+
+ pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
+ }
+
if (pCtx->sMb.pLumaQp[i]) {
WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
@@ -1393,6 +1412,12 @@
pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
}
+ if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
+ WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
+
+ pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
+ }
+
if (pCtx->sMb.pChromaPredMode[i]) {
WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
@@ -1989,7 +2014,7 @@
if (kuiQualityId == BASE_QUALITY_ID) {
pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering;
pDqLayer->pRefPicMarking = &pSh->sRefMarking;
-
+
if (pSh->pPps->bWeightedPredFlag) {
pDqLayer->bUseWeightPredictionFlag = true;
pDqLayer->pPredWeightTable = &pSh->sPredWeightTable;
@@ -2029,6 +2054,8 @@
pCurDq->pSliceIdc = pCtx->sMb.pSliceIdc[0];
pCurDq->pMv[0] = pCtx->sMb.pMv[0][0];
pCurDq->pRefIndex[0] = pCtx->sMb.pRefIndex[0][0];
+ pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
+ pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
pCurDq->pLumaQp = pCtx->sMb.pLumaQp[0];
pCurDq->pChromaQp = pCtx->sMb.pChromaQp[0];
pCurDq->pMvd[0] = pCtx->sMb.pMvd[0][0];
@@ -2038,6 +2065,7 @@
pCurDq->pScaledTCoeff = pCtx->sMb.pScaledTCoeff[0];
pCurDq->pIntraPredMode = pCtx->sMb.pIntraPredMode[0];
pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
+ pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
pCurDq->pCbp = pCtx->sMb.pCbp[0];
pCurDq->pSubMbType = pCtx->sMb.pSubMbType[0];
--- a/codec/decoder/core/src/get_intra_predictor.cpp
+++ b/codec/decoder/core/src/get_intra_predictor.cpp
@@ -380,6 +380,507 @@
ST32A4 (pPred + kiStride3, LD32 (kuiList));
}
+void WelsI8x8LumaPredV_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ uint64_t uiTop = 0;
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[8];
+ int32_t i;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ // 8-89
+ for (i = 7; i >= 0; i--) {
+ uiTop = ((uiTop << 8) | uiPixelFilterT[i]);
+ }
+
+ for (i = 0; i < 8; i++) {
+ ST64A8 (pPred + kiStride * i, uiTop);
+ }
+}
+
+void WelsI8x8LumaPredH_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ uint64_t uiLeft;
+ int32_t iStride[8];
+ uint8_t uiPixelFilterL[8];
+ int32_t i;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
+ pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+
+ // 8-90
+ for (i = 0; i < 8; i++) {
+ uiLeft = 0x0101010101010101U * uiPixelFilterL[i];
+ ST64A8 (pPred + iStride[i], uiLeft);
+ }
+}
+
+void WelsI8x8LumaPredDc_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ int32_t iStride[8];
+ uint8_t uiPixelFilterL[8];
+ uint8_t uiPixelFilterT[8];
+ uint16_t uiTotal = 0;
+ int32_t i;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
+ pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ // 8-91
+ for (i = 0; i < 8; i++) {
+ uiTotal += uiPixelFilterL[i];
+ uiTotal += uiPixelFilterT[i];
+ }
+
+ const uint8_t kuiMean = ((uiTotal + 8) >> 4);
+ const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
+
+ for (i = 0; i < 8; i++) {
+ ST64A8 (pPred + iStride[i], kuiMean64);
+ }
+}
+
+void WelsI8x8LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ int32_t iStride[8];
+ uint8_t uiPixelFilterL[8];
+ uint16_t uiTotal = 0;
+ int32_t i;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
+ pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+
+ // 8-92
+ for (i = 0; i < 8; i++) {
+ uiTotal += uiPixelFilterL[i];
+ }
+
+ const uint8_t kuiMean = ((uiTotal + 4) >> 3);
+ const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
+
+ for (i = 0; i < 8; i++) {
+ ST64A8 (pPred + iStride[i], kuiMean64);
+ }
+}
+
+void WelsI8x8LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[8];
+ uint16_t uiTotal = 0;
+ int32_t i;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ // 8-93
+ for (i = 0; i < 8; i++) {
+ uiTotal += uiPixelFilterT[i];
+ }
+
+ const uint8_t kuiMean = ((uiTotal + 4) >> 3);
+ const uint64_t kuiMean64 = 0x0101010101010101U * kuiMean;
+
+ for (i = 0; i < 8; i++) {
+ ST64A8 (pPred + iStride[i], kuiMean64);
+ }
+}
+
+void WelsI8x8LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // for normal 8 bit depth, 8-94
+ const uint64_t kuiDC64 = 0x8080808080808080U;
+
+ int32_t iStride[8];
+ int32_t i;
+ ST64A8 (pPred, kuiDC64);
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ ST64A8 (pPred + iStride[i], kuiDC64);
+ }
+}
+
+/*down pLeft*/
+void WelsI8x8LumaPredDDL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // Top and Top-right available
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[16];
+ int32_t i, j;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 15; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ for (j = 0; j < 8; j++) { // x
+ if (i == 7 && j == 7) { // 8-95
+ pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2;
+ } else { // 8-96
+ pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*down pLeft*/
+void WelsI8x8LumaPredDDLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // Top available and Top-right unavailable
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[16];
+ int32_t i, j;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ // p[x, -1] x=8...15 are replaced with p[7, -1]
+ uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+ for (i = 8; i < 16; i++) {
+ uiPixelFilterT[i] = pPred[7 - kiStride];
+ }
+
+ for (i = 0; i < 8; i++) { // y
+ for (j = 0; j < 8; j++) { // x
+ if (i == 7 && j == 7) { // 8-95
+ pPred[j + iStride[i]] = (uiPixelFilterT[14] + 3 * uiPixelFilterT[15] + 2) >> 2;
+ } else { // 8-96
+ pPred[j + iStride[i]] = (uiPixelFilterT[i + j] + (uiPixelFilterT[i + j + 1] << 1) + uiPixelFilterT[i + j + 2] + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*down right*/
+void WelsI8x8LumaPredDDR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // The TopLeft, Top, Left are all available under this mode
+ int32_t iStride[8];
+ uint8_t uiPixelFilterTL;
+ uint8_t uiPixelFilterL[8];
+ uint8_t uiPixelFilterT[8];
+ int32_t i, j;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
+
+ uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
+ uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ // 8-98, x < y-1
+ for (j = 0; j < (i - 1); j++) {
+ pPred[j + iStride[i]] = (uiPixelFilterL[i - j - 2] + (uiPixelFilterL[i - j - 1] << 1) + uiPixelFilterL[i - j] + 2) >> 2;
+ }
+ // 8-98, special case, x == y-1
+ if (i >= 1) {
+ j = i - 1;
+ pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2;
+ }
+ // 8-99, x==y
+ j = i;
+ pPred[j + iStride[i]] = (uiPixelFilterT[0] + (uiPixelFilterTL << 1) + uiPixelFilterL[0] + 2) >> 2;
+ // 8-97, special case, x == y+1
+ if (i < 7) {
+ j = i + 1;
+ pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2;
+ }
+ for (j = i + 2; j < 8; j++) { // 8-97, x > y+1
+ pPred[j + iStride[i]] = (uiPixelFilterT[j - i - 2] + (uiPixelFilterT[j - i - 1] << 1) + uiPixelFilterT[j - i] + 2) >> 2;
+ }
+ }
+}
+
+/*vertical pLeft*/
+void WelsI8x8LumaPredVL_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // Top and Top-right available
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[16];
+ int32_t i, j;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 15; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterT[15] = ((pPred[14 - kiStride] + pPred[15 - kiStride] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ if ((i & 0x01) == 0) { // 8-108
+ for (j = 0; j < 8; j++) { // x
+ pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1;
+ }
+ } else { // 8-109
+ for (j = 0; j < 8; j++) { // x
+ pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j +
+ (i >> 1) + 2] + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*vertical pLeft*/
+void WelsI8x8LumaPredVLTop_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // Top available and Top-right unavailable
+ int32_t iStride[8];
+ uint8_t uiPixelFilterT[16];
+ int32_t i, j;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterT[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2) : ((
+ pPred[-kiStride] * 3 + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ // p[x, -1] x=8...15 are replaced with p[7, -1]
+ uiPixelFilterT[7] = ((pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+ for (i = 8; i < 16; i++) {
+ uiPixelFilterT[i] = pPred[7 - kiStride];
+ }
+
+ for (i = 0; i < 8; i++) { // y
+ if ((i & 0x01) == 0) { // 8-108
+ for (j = 0; j < 8; j++) { // x
+ pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + uiPixelFilterT[j + (i >> 1) + 1] + 1) >> 1;
+ }
+ } else { // 8-109
+ for (j = 0; j < 8; j++) { // x
+ pPred[j + iStride[i]] = (uiPixelFilterT[j + (i >> 1)] + (uiPixelFilterT[j + (i >> 1) + 1] << 1) + uiPixelFilterT[j +
+ (i >> 1) + 2] + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*vertical right*/
+void WelsI8x8LumaPredVR_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // The TopLeft, Top, Left are always available under this mode
+ int32_t iStride[8];
+ uint8_t uiPixelFilterTL;
+ uint8_t uiPixelFilterL[8];
+ uint8_t uiPixelFilterT[8];
+ int32_t i, j;
+ int32_t izVR, izVRDiv;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
+
+ uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
+ uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ for (j = 0; j < 8; j++) { // x
+ izVR = (j << 1) - i; // 2 * x - y
+ izVRDiv = j - (i >> 1);
+ if (izVR >= 0) {
+ if ((izVR & 0x01) == 0) { // 8-100
+ if (izVRDiv > 0) {
+ pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 1] + uiPixelFilterT[izVRDiv] + 1) >> 1;
+ } else {
+ pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterT[0] + 1) >> 1;
+ }
+ } else { // 8-101
+ if (izVRDiv > 1) {
+ pPred[j + iStride[i]] = (uiPixelFilterT[izVRDiv - 2] + (uiPixelFilterT[izVRDiv - 1] << 1) + uiPixelFilterT[izVRDiv] + 2)
+ >> 2;
+ } else {
+ pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterT[0] << 1) + uiPixelFilterT[1] + 2) >> 2;
+ }
+ }
+ } else if (izVR == -1) { // 8-102
+ pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2;
+ } else if (izVR < -2) { // 8-103
+ pPred[j + iStride[i]] = (uiPixelFilterL[-izVR - 1] + (uiPixelFilterL[-izVR - 2] << 1) + uiPixelFilterL[-izVR - 3] + 2)
+ >> 2;
+ } else { // izVR==-2, 8-103, special case
+ pPred[j + iStride[i]] = (uiPixelFilterL[1] + (uiPixelFilterL[0] << 1) + uiPixelFilterTL + 2) >> 2;
+ }
+ }
+ }
+}
+
+/*horizontal up*/
+void WelsI8x8LumaPredHU_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ int32_t iStride[8];
+ uint8_t uiPixelFilterL[8];
+ int32_t i, j;
+ int32_t izHU;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterL[0] = bTLAvail ? ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2) : ((
+ pPred[-1] * 3 + pPred[-1 + iStride[1]] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ for (j = 0; j < 8; j++) { // x
+ izHU = j + (i << 1); // x + 2 * y
+ if (izHU < 13) {
+ if ((izHU & 0x01) == 0) { // 8-110
+ pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + uiPixelFilterL[1 + (izHU >> 1)] + 1) >> 1;
+ } else { // 8-111
+ pPred[j + iStride[i]] = (uiPixelFilterL[izHU >> 1] + (uiPixelFilterL[1 + (izHU >> 1)] << 1) + uiPixelFilterL[2 +
+ (izHU >> 1)] + 2) >> 2;
+ }
+ } else if (izHU == 13) { // 8-112
+ pPred[j + iStride[i]] = (uiPixelFilterL[6] + 3 * uiPixelFilterL[7] + 2) >> 2;
+ } else { // 8-113
+ pPred[j + iStride[i]] = uiPixelFilterL[7];
+ }
+ }
+ }
+}
+
+/*horizontal down*/
+void WelsI8x8LumaPredHD_c (uint8_t* pPred, const int32_t kiStride, bool bTLAvail, bool bTRAvail) {
+ // The TopLeft, Top, Left are all available under this mode
+ int32_t iStride[8];
+ uint8_t uiPixelFilterTL;
+ uint8_t uiPixelFilterL[8];
+ uint8_t uiPixelFilterT[8];
+ int32_t i, j;
+ int32_t izHD, izHDDiv;
+
+ for (iStride[0] = 0, i = 1; i < 8; i++) {
+ iStride[i] = iStride[i - 1] + kiStride;
+ }
+
+ uiPixelFilterTL = (pPred[-1] + (pPred[-1 - kiStride] << 1) + pPred[-kiStride] + 2) >> 2;
+
+ uiPixelFilterL[0] = ((pPred[-1 - kiStride] + (pPred[-1] << 1) + pPred[-1 + iStride[1]] + 2) >> 2);
+ uiPixelFilterT[0] = ((pPred[-1 - kiStride] + (pPred[-kiStride] << 1) + pPred[1 - kiStride] + 2) >> 2);
+ for (i = 1; i < 7; i++) {
+ uiPixelFilterL[i] = ((pPred[-1 + iStride[i - 1]] + (pPred[-1 + iStride[i]] << 1) + pPred[-1 + iStride[i + 1]] + 2) >>
+ 2);
+ uiPixelFilterT[i] = ((pPred[i - 1 - kiStride] + (pPred[i - kiStride] << 1) + pPred[i + 1 - kiStride] + 2) >> 2);
+ }
+ uiPixelFilterL[7] = ((pPred[-1 + iStride[6]] + pPred[-1 + iStride[7]] * 3 + 2) >> 2);
+ uiPixelFilterT[7] = bTRAvail ? ((pPred[6 - kiStride] + (pPred[7 - kiStride] << 1) + pPred[8 - kiStride] + 2) >> 2) : ((
+ pPred[6 - kiStride] + pPred[7 - kiStride] * 3 + 2) >> 2);
+
+ for (i = 0; i < 8; i++) { // y
+ for (j = 0; j < 8; j++) { // x
+ izHD = (i << 1) - j; // 2*y - x
+ izHDDiv = i - (j >> 1);
+ if (izHD >= 0) {
+ if ((izHD & 0x01) == 0) { // 8-104
+ if (izHDDiv == 0) {
+ pPred[j + iStride[i]] = (uiPixelFilterTL + uiPixelFilterL[0] + 1) >> 1;
+ } else {
+ pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 1] + uiPixelFilterL[izHDDiv] + 1) >> 1;
+ }
+ } else { // 8-105
+ if (izHDDiv == 1) {
+ pPred[j + iStride[i]] = (uiPixelFilterTL + (uiPixelFilterL[0] << 1) + uiPixelFilterL[1] + 2) >> 2;
+ } else {
+ pPred[j + iStride[i]] = (uiPixelFilterL[izHDDiv - 2] + (uiPixelFilterL[izHDDiv - 1] << 1) + uiPixelFilterL[izHDDiv] + 2)
+ >> 2;
+ }
+ }
+ } else if (izHD == -1) { // 8-106
+ pPred[j + iStride[i]] = (uiPixelFilterL[0] + (uiPixelFilterTL << 1) + uiPixelFilterT[0] + 2) >> 2;
+ } else if (izHD < -2) { // 8-107
+ pPred[j + iStride[i]] = (uiPixelFilterT[-izHD - 1] + (uiPixelFilterT[-izHD - 2] << 1) + uiPixelFilterT[-izHD - 3] + 2)
+ >> 2;
+ } else { // 8-107 special case, izHD==-2
+ pPred[j + iStride[i]] = (uiPixelFilterT[1] + (uiPixelFilterT[0] << 1) + uiPixelFilterTL + 2) >> 2;
+ }
+ }
+ }
+}
+
+
void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride) {
const uint64_t kuiVal64 = LD64A8 (&pPred[-kiStride]);
const int32_t kiStride2 = kiStride << 1;
--- a/codec/decoder/core/src/manage_dec_ref.cpp
+++ b/codec/decoder/core/src/manage_dec_ref.cpp
@@ -127,7 +127,7 @@
|| (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->eErrorConMethod)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->eErrorConMethod)
|| (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->eErrorConMethod))
- && (NULL != pCtx->pPreviousDecodedPictureInDpb);
+ && (NULL != pCtx->pPreviousDecodedPictureInDpb);
bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
&& (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);
--- a/codec/decoder/core/src/parse_mb_syn_cabac.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cabac.cpp
@@ -35,14 +35,15 @@
#include "error_code.h"
namespace WelsDec {
#define IDX_UNUSED -1
-static const int16_t g_kMaxPos [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 3, 3, 14, 14};
-static const int16_t g_kMaxC2 [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 3, 3, 4, 4};
-static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 12, 12, 16, 16};
-static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
-static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
-static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
-static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
+static const int16_t g_kMaxPos [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 63, 3, 3, 14, 14};
+static const int16_t g_kMaxC2 [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4};
+static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 0, 12, 12, 16, 16};
+static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
+static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
+
const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23
// 0 1 | 4 5 luma 8*8 block pNonZeroCount[16+8]
0, 0, 1, 1, // 2 3 | 6 7 0 | 1 0 1 2 3
@@ -275,6 +276,24 @@
}
return ERR_NONE;
}
+
+int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
+ bool& bTransformSize8x8Flag) {
+ uint32_t uiCode;
+ int32_t iIdxA, iIdxB;
+ int32_t iCtxInc;
+ PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+ PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_TS_8x8_FLAG;
+ iIdxA = (pNeighAvail->iLeftAvail) && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - 1]);
+ iIdxB = (pNeighAvail->iTopAvail)
+ && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - pCtx->pCurDqLayer->iMbWidth]);
+ iCtxInc = iIdxA + iIdxB;
+ WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+ bTransformSize8x8Flag = !!uiCode;
+
+ return ERR_NONE;
+}
+
int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
uint32_t uiCode;
PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
@@ -471,6 +490,9 @@
pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
pSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
pPartW[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+ // Need modification when B picture add in, reference to 7.3.5
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
}
for (i = 0; i < 4; i++) {
@@ -721,7 +743,7 @@
int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex;
int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
- uint8_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
+ uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
int16_t* pMbType = pCtx->pCurDqLayer->pMbType;
int32_t iCtxInc;
uiCbfBit = 0;
@@ -760,22 +782,30 @@
int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
uint32_t& uiCoeffNum) {
uint32_t uiCode;
- PWelsCabacCtx pMapCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MAP + g_kBlockCat2CtxOffsetMap [iResProperty];
- PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_LAST + g_kBlockCat2CtxOffsetLast[iResProperty];
+ PWelsCabacCtx pMapCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_MAP_8x8 : NEW_CTX_OFFSET_MAP)
+ + g_kBlockCat2CtxOffsetMap [iResProperty];
+ PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_LAST_8x8 :
+ NEW_CTX_OFFSET_LAST) + g_kBlockCat2CtxOffsetLast[iResProperty];
+
+
int32_t i;
uiCoeffNum = 0;
int32_t i0 = 0;
int32_t i1 = g_kMaxPos[iResProperty];
+ int32_t iCtx;
+
for (i = i0; i < i1; ++i) {
+ iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxSignificantCoeffFlag8x8[i] : i);
//read significant
- WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + i, uiCode));
+ WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + iCtx, uiCode));
if (uiCode) {
* (pSignificantMap++) = 1;
++ uiCoeffNum;
//read last significant
- WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + i, uiCode));
+ iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxLastSignificantCoeffFlag8x8[i] : i);
+ WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + iCtx, uiCode));
if (uiCode) {
memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t));
return ERR_NONE;
@@ -796,8 +826,11 @@
int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) {
uint32_t uiCode;
- PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ONE + g_kBlockCat2CtxOffsetOne[iResProperty];
- PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ABS + g_kBlockCat2CtxOffsetAbs[iResProperty];
+ PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ONE_8x8 : NEW_CTX_OFFSET_ONE) +
+ g_kBlockCat2CtxOffsetOne[iResProperty];
+ PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ABS_8x8 : NEW_CTX_OFFSET_ABS) +
+ g_kBlockCat2CtxOffsetAbs[iResProperty];
+
const int16_t iMaxType = g_kMaxC2[iResProperty];
int32_t i = g_kMaxPos[iResProperty];
int32_t* pCoff = pSignificant + i;
@@ -823,6 +856,46 @@
}
pCoff--;
}
+ return ERR_NONE;
+}
+
+int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+ int32_t iIndex, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty,
+ short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, PWelsDecoderContext pCtx) {
+ uint32_t uiTotalCoeffNum = 0;
+ uint32_t uiCbpBit;
+ int32_t pSignificantMap[64] = {0};
+
+ int32_t iMbResProperty = 0;
+ GetMbResProperty (&iMbResProperty, &iResProperty, false);
+ const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
+ g_kuiDequantCoeff8x8[uiQp];
+
+ uiCbpBit = 1; // for 8x8, MaxNumCoeff == 64 && uiCbpBit == 1
+ if (uiCbpBit) { //has coeff
+ WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
+ WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
+ }
+
+ pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex]] =
+ pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 1]] =
+ pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 2]] =
+ pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 3]] = (uint8_t)uiTotalCoeffNum;
+ if (uiTotalCoeffNum == 0) {
+ return ERR_NONE;
+ }
+ int32_t j = 0, i;
+ if (iResProperty == LUMA_DC_AC_8) {
+ do {
+ if (pSignificantMap[j] != 0) {
+ i = pScanTable[ j ];
+ sTCoeff[i] = uiQp >= 36 ? ((pSignificantMap[j] * pDeQuantMul[i]) << (uiQp / 6 - 6)) : ((
+ pSignificantMap[j] * pDeQuantMul[i] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
+ }
+ ++j;
+ } while (j < 64);
+ }
+
return ERR_NONE;
}
--- a/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -151,7 +151,7 @@
pNonZeroCount[5 + 8 * 5] = -1;//unavailable
}
}
-void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
int32_t iTopXy = 0;
@@ -197,7 +197,7 @@
}
}
-void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain0IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
PDqLayer pCurLayer) { //no matter slice type
int32_t iCurXy = pCurLayer->iMbXyIndex;
int32_t iTopXy = 0;
@@ -214,7 +214,7 @@
}
//intra4x4_pred_mode
- if (pNeighAvail->iTopAvail && IS_INTRA4x4 (pNeighAvail->iTopType)) { //top
+ if (pNeighAvail->iTopAvail && IS_INTRANxN (pNeighAvail->iTopType)) { //top
ST32 (pIntraPredMode + 1, LD32 (&pCurLayer->pIntraPredMode[iTopXy][0]));
} else {
int32_t iPred;
@@ -225,7 +225,7 @@
ST32 (pIntraPredMode + 1, iPred);
}
- if (pNeighAvail->iLeftAvail && IS_INTRA4x4 (pNeighAvail->iLeftType)) { //left
+ if (pNeighAvail->iLeftAvail && IS_INTRANxN (pNeighAvail->iLeftType)) { //left
pIntraPredMode[ 0 + 8 * 1] = pCurLayer->pIntraPredMode[iLeftXy][4];
pIntraPredMode[ 0 + 8 * 2] = pCurLayer->pIntraPredMode[iLeftXy][5];
pIntraPredMode[ 0 + 8 * 3] = pCurLayer->pIntraPredMode[iLeftXy][6];
@@ -565,12 +565,13 @@
return 0;
}
-int32_t CheckIntra4x4PredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex) {
+int32_t CheckIntraNxNPredMode (int32_t* pSampleAvail, int8_t* pMode, int32_t iIndex, bool b8x8) {
int8_t iIdx = g_kuiCache30ScanIdx[iIndex];
+
int32_t iLeftAvail = pSampleAvail[iIdx - 1];
int32_t iTopAvail = pSampleAvail[iIdx - 6];
int32_t bLeftTopAvail = pSampleAvail[iIdx - 7];
- int32_t bRightTopAvail = pSampleAvail[iIdx - 5];
+ int32_t bRightTopAvail = pSampleAvail[iIdx - (b8x8 ? 4 : 5)]; // Diff with 4x4 Pred
int8_t iFinalMode;
@@ -900,6 +901,93 @@
return 0;
}
+int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable, uint8_t* pNonZeroCountCache, PBitStringAux pBs, int32_t iIndex,
+ int32_t iMaxNumCoeff, const uint8_t* kpZigzagTable, int32_t iResidualProperty,
+ int16_t* pTCoeff, int32_t iIdx4x4, uint8_t uiQp,
+ PWelsDecoderContext pCtx) {
+ int32_t iLevel[16], iZerosLeft, iCoeffNum;
+ int32_t iRun[16];
+ int32_t iCurNonZeroCacheIdx, i;
+
+ int32_t iMbResProperty = 0;
+ GetMbResProperty (&iMbResProperty, &iResidualProperty, 1);
+
+ const uint16_t* kpDequantCoeff = pCtx->bUseScalingList ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
+ g_kuiDequantCoeff8x8[uiQp];
+
+ int8_t nA, nB, nC;
+ uint8_t uiTotalCoeff, uiTrailingOnes;
+ int32_t iUsedBits = 0;
+ intX_t iCurIdx = pBs->iIndex;
+ uint8_t* pBuf = ((uint8_t*)pBs->pStartBuf) + (iCurIdx >> 3);
+ bool bChromaDc = (CHROMA_DC == iResidualProperty);
+ uint8_t bChroma = (bChromaDc || CHROMA_AC == iResidualProperty);
+ SReadBitsCache sReadBitsCache;
+
+ uint32_t uiCache32Bit = (uint32_t) ((((pBuf[0] << 8) | pBuf[1]) << 16) | (pBuf[2] << 8) | pBuf[3]);
+ sReadBitsCache.uiCache32Bit = uiCache32Bit << (iCurIdx & 0x07);
+ sReadBitsCache.uiRemainBits = 32 - (iCurIdx & 0x07);
+ sReadBitsCache.pBuf = pBuf;
+ //////////////////////////////////////////////////////////////////////////
+
+ if (bChroma) {
+ iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex];
+ nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1];
+ nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8];
+ } else { //luma
+ iCurNonZeroCacheIdx = g_kuiCache48CountScan4Idx[iIndex];
+ nA = pNonZeroCountCache[iCurNonZeroCacheIdx - 1];
+ nB = pNonZeroCountCache[iCurNonZeroCacheIdx - 8];
+ }
+
+ WELS_NON_ZERO_COUNT_AVERAGE (nC, nA, nB);
+
+ iUsedBits += CavlcGetTrailingOnesAndTotalCoeff (uiTotalCoeff, uiTrailingOnes, &sReadBitsCache, pVlcTable, bChromaDc,
+ nC);
+
+ if (iResidualProperty != CHROMA_DC && iResidualProperty != I16_LUMA_DC) {
+ pNonZeroCountCache[iCurNonZeroCacheIdx] = uiTotalCoeff;
+ //////////////////////////////////////////////////////////////////////////
+ }
+ if (0 == uiTotalCoeff) {
+ pBs->iIndex += iUsedBits;
+ return 0;
+ }
+ if ((uiTrailingOnes > 3) || (uiTotalCoeff > 16)) { /////////////////check uiTrailingOnes and uiTotalCoeff
+ return ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES;
+ }
+ if ((i = CavlcGetLevelVal (iLevel, &sReadBitsCache, uiTotalCoeff, uiTrailingOnes)) == -1) {
+ return ERR_INFO_CAVLC_INVALID_LEVEL;
+ }
+ iUsedBits += i;
+ if (uiTotalCoeff < iMaxNumCoeff) {
+ iUsedBits += CavlcGetTotalZeros (iZerosLeft, &sReadBitsCache, uiTotalCoeff, pVlcTable, bChromaDc);
+ } else {
+ iZerosLeft = 0;
+ }
+
+ if ((iZerosLeft < 0) || ((iZerosLeft + uiTotalCoeff) > iMaxNumCoeff)) {
+ return ERR_INFO_CAVLC_INVALID_ZERO_LEFT;
+ }
+ if ((i = CavlcGetRunBefore (iRun, &sReadBitsCache, uiTotalCoeff, pVlcTable, iZerosLeft)) == -1) {
+ return ERR_INFO_CAVLC_INVALID_RUN_BEFORE;
+ }
+ iUsedBits += i;
+ pBs->iIndex += iUsedBits;
+ iCoeffNum = -1;
+
+ for (i = uiTotalCoeff - 1; i >= 0; --i) { //FIXME merge into rundecode?
+ int32_t j;
+ iCoeffNum += iRun[i] + 1; //FIXME add 1 earlier ?
+ j = (iCoeffNum << 2) + iIdx4x4;
+ j = kpZigzagTable[ j ];
+ pTCoeff[j] = uiQp >= 36 ? ((iLevel[i] * kpDequantCoeff[j]) << (uiQp / 6 - 6))
+ : ((iLevel[i] * kpDequantCoeff[j] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
+ }
+
+ return 0;
+}
+
int32_t ParseInterInfo (PWelsDecoderContext pCtx, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
PBitStringAux pBs) {
PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
@@ -941,7 +1029,8 @@
return ERR_INFO_INVALID_REF_INDEX;
}
}
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx]&&ppRefPic[iRefIdx]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx]
+ && ppRefPic[iRefIdx]->bIsComplete);
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -981,7 +1070,8 @@
return ERR_INFO_INVALID_REF_INDEX;
}
}
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
+ && ppRefPic[iRefIdx[i]]->bIsComplete);
}
for (i = 0; i < 2; i++) {
PredInter16x8Mv (iMvArray, iRefIdxArray, i << 3, iRefIdx[i], iMv);
@@ -1017,7 +1107,8 @@
return ERR_INFO_INVALID_REF_INDEX;
}
}
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
+ && ppRefPic[iRefIdx[i]]->bIsComplete);
} else {
WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
@@ -1056,6 +1147,9 @@
pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
iSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
iPartWidth[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+ // Need modification when B picture add in, reference to 7.3.5
+ pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
}
if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag) {
@@ -1085,7 +1179,8 @@
return ERR_INFO_INVALID_REF_INDEX;
}
}
- pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || !(ppRefPic[iRefIdx[i]]&&ppRefPic[iRefIdx[i]]->bIsComplete);
+ pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRefIdx[i]]
+ && ppRefPic[iRefIdx[i]]->bIsComplete);
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx ] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 1] =
pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 4] = pCurDqLayer->pRefIndex[0][iMbXy][uiScan4Idx + 5] = iRefIdx[i];
--- a/codec/decoder/core/src/rec_mb.cpp
+++ b/codec/decoder/core/src/rec_mb.cpp
@@ -61,11 +61,65 @@
}
}
+int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+ RecI8x8Luma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
+ RecI4x4Chroma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
+ return ERR_NONE;
+}
+
+int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+ /*****get local variable from outer variable********/
+ /*prediction info*/
+ uint8_t* pPred = pDqLayer->pPred[0];
+
+ int32_t iLumaStride = pDqLayer->iLumaStride;
+ int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
+ PGetIntraPred8x8Func* pGetI8x8LumaPredFunc = pCtx->pGetI8x8LumaPredFunc;
+
+ int8_t* pIntra8x8PredMode = pDqLayer->pIntra4x4FinalMode[iMbXy]; // I_NxN
+ int16_t* pRS = pScoeffLevel;
+ /*itransform info*/
+ PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc8x8;
+
+ /*************local variable********************/
+ uint8_t i = 0;
+ bool bTLAvail[4], bTRAvail[4];
+ // Top-Right : Left : Top-Left : Top
+ bTLAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x02);
+ bTLAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
+ bTLAvail[2] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x04);
+ bTLAvail[3] = true;
+
+ bTRAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
+ bTRAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x08);
+ bTRAvail[2] = true;
+ bTRAvail[3] = false;
+
+ /*************real process*********************/
+ for (i = 0; i < 4; i++) {
+
+ uint8_t* pPredI8x8 = pPred + pBlockOffset[i << 2];
+ uint8_t uiMode = pIntra8x8PredMode[g_kuiScan4[i << 2]];
+
+ pGetI8x8LumaPredFunc[uiMode] (pPredI8x8, iLumaStride, bTLAvail[i], bTRAvail[i]);
+
+ int32_t iIndex = g_kuiMbCountScan4Idx[i << 2];
+ if (pDqLayer->pNzc[iMbXy][iIndex] || pDqLayer->pNzc[iMbXy][iIndex + 1] || pDqLayer->pNzc[iMbXy][iIndex + 4]
+ || pDqLayer->pNzc[iMbXy][iIndex + 5]) {
+ int16_t* pRSI8x8 = &pRS[i << 6];
+ pIdctResAddPredFunc (pPredI8x8, iLumaStride, pRSI8x8);
+ }
+ }
+
+ return ERR_NONE;
+}
+
int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
RecI4x4Luma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
RecI4x4Chroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
return ERR_NONE;
}
+
int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
/*****get local variable from outer variable********/
--- a/test/decoder/DecUT_DeblockCommon.cpp
+++ b/test/decoder/DecUT_DeblockCommon.cpp
@@ -721,6 +721,10 @@
sDqLayer.iMbY = 0; //Only for test easy
sDqLayer.iMbXyIndex = 1; // this function has NO iMbXyIndex validation
+ bool bTSize8x8Flag[50] = {false};
+ sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
+ sDqLayer.pTransformSize8x8Flag[sDqLayer.iMbXyIndex] = false;
+
#define UT_DB_LUMA_TEST(iFlag, iQP, iV0, iV1, iV2) \
iBoundryFlag = iFlag; \
memset(iLumaQP, iQP, sizeof(int8_t)*50); \
@@ -777,6 +781,10 @@
sDqLayer.pMv[0] = (int16_t (*) [16][2])&iLayerMv[0];
sDqLayer.pMv[1] = (int16_t (*) [16][2])&iLayerMv[1];
+ bool bTSize8x8Flag[50] = {false};
+ sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
+ memset (bTSize8x8Flag, 0, sizeof (bool) * 50);
+
#define UT_DB_CLEAN_STATUS \
memset(iNoZeroCount, 0, sizeof(int8_t)*24*2); \
memset(iLayerRefIndex, 0, sizeof(int8_t)*2*16*2); \
@@ -883,6 +891,10 @@
sDqLayer.iMbXyIndex = 1;
sDqLayer.iMbWidth = 1;
+ bool bTSize8x8Flag[50] = {false};
+ sDqLayer.pTransformSize8x8Flag = bTSize8x8Flag;
+ memset (bTSize8x8Flag, 0, sizeof (bool) * 50);
+
uint8_t iY[50] = {0};
sFilter.pCsData[0] = iY;
sFilter.iCsStride[0] = 4;
@@ -922,12 +934,12 @@
EXPECT_TRUE(iCb[2<<1]==iChromaV1 && iCr[2<<1]==iChromaV1)<<iQP<<" "<<sDqLayer.pMbType[1]; \
EXPECT_TRUE(iCb[(2<<1)*sFilter.iCsStride[1]]==iChromaV2 && iCr[(2<<1)*sFilter.iCsStride[1]]==iChromaV2)<<iQP<<" "<<sDqLayer.pMbType[1];
- // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA4x4
+ // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA4x4
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA4x4;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)
- // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA16x16
+ // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA16x16
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA16x16;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)
@@ -934,7 +946,7 @@
// MbType==0x03, Intra8x8 has not been supported now.
- // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA_PCM
+ // QP>16, LEFT & TOP, Intra mode MB_TYPE_INTRA_PCM
iQP = 16 + rand() % 35;
sDqLayer.pMbType[1] = MB_TYPE_INTRA_PCM;
UT_DB_MACROBLOCK_TEST (0x03, iQP, 2, 1, 1, 2, 1, 1)