ref: cf5edec5ae21423690c6a4aa16996a2d1c0e92c3
dir: /test/encoder/EncUT_DecodeMbAux.cpp/
#include<gtest/gtest.h> #include<stdlib.h> #include<time.h> #include "decode_mb_aux.h" #include "wels_common_basis.h" #include "macros.h" #include "cpu.h" using namespace WelsSVCEnc; TEST(DecodeMbAuxTest, TestIhdm_4x4_dc) { short W[16],T[16],Y[16]; srand((uint32_t)time(NULL)); for(int i=0;i<16;i++) W[i]=rand()%256+1; T[0]=W[0]+W[4]+W[8]+W[12]; T[1]=W[1]+W[5]+W[9]+W[13]; T[2]=W[2]+W[6]+W[10]+W[14]; T[3]=W[3]+W[7]+W[11]+W[15]; T[4]=W[0]+W[4]-W[8]-W[12]; T[5]=W[1]+W[5]-W[9]-W[13]; T[6]=W[2]+W[6]-W[10]-W[14]; T[7]=W[3]+W[7]-W[11]-W[15]; T[8]=W[0]-W[4]-W[8]+W[12]; T[9]=W[1]-W[5]-W[9]+W[13]; T[10]=W[2]-W[6]-W[10]+W[14]; T[11]=W[3]-W[7]-W[11]+W[15]; T[12]=W[0]-W[4]+W[8]-W[12]; T[13]=W[1]-W[5]+W[9]-W[13]; T[14]=W[2]-W[6]+W[10]-W[14]; T[15]=W[3]-W[7]+W[11]-W[15]; Y[0]=T[0]+T[1]+T[2]+T[3]; Y[1]=T[0]+T[1]-T[2]-T[3]; Y[2]=T[0]-T[1]-T[2]+T[3]; Y[3]=T[0]-T[1]+T[2]-T[3]; Y[4]=T[4]+T[5]+T[6]+T[7]; Y[5]=T[4]+T[5]-T[6]-T[7]; Y[6]=T[4]-T[5]-T[6]+T[7]; Y[7]=T[4]-T[5]+T[6]-T[7]; Y[8]=T[8]+T[9]+T[10]+T[11]; Y[9]=T[8]+T[9]-T[10]-T[11]; Y[10]=T[8]-T[9]-T[10]+T[11]; Y[11]=T[8]-T[9]+T[10]-T[11]; Y[12]=T[12]+T[13]+T[14]+T[15]; Y[13]=T[12]+T[13]-T[14]-T[15]; Y[14]=T[12]-T[13]-T[14]+T[15]; Y[15]=T[12]-T[13]+T[14]-T[15]; WelsIHadamard4x4Dc(W); for(int i=0;i<16;i++) EXPECT_EQ( Y[i],W[i] ); } TEST(DecodeMbAuxTest, TestDequant_4x4_luma_dc) { short T[16],W[16]; srand((uint32_t)time(NULL)); for(int i=0;i<16;i++) { T[i]=rand()%256+1; W[i]=T[i]; } //TODO: QP<18 will cause case fail, need fix and enable the test afterwards for (int qp=18;qp<52;qp++) { WelsDequantLumaDc4x4(W,qp); for(int i=0;i<16;i++) EXPECT_EQ(((T[i]*g_kuiDequantCoeff[qp%6][0]+(1 << (1 - qp / 6))))>>(2- qp / 6),W[i]); } } TEST(DecodeMbAuxTest, TestDequant_ihdm_4x4_c) { short W[16],T[16],Y[16]; srand((uint32_t)time(NULL)); const unsigned short mf=rand()%16+1; for(int i=0;i<16;i++) W[i]=rand()%256+1; T[0]=W[0]+W[4]+W[8]+W[12]; T[1]=W[1]+W[5]+W[9]+W[13]; T[2]=W[2]+W[6]+W[10]+W[14]; T[3]=W[3]+W[7]+W[11]+W[15]; T[4]=W[0]+W[4]-W[8]-W[12]; T[5]=W[1]+W[5]-W[9]-W[13]; T[6]=W[2]+W[6]-W[10]-W[14]; T[7]=W[3]+W[7]-W[11]-W[15]; T[8]=W[0]-W[4]-W[8]+W[12]; T[9]=W[1]-W[5]-W[9]+W[13]; T[10]=W[2]-W[6]-W[10]+W[14]; T[11]=W[3]-W[7]-W[11]+W[15]; T[12]=W[0]-W[4]+W[8]-W[12]; T[13]=W[1]-W[5]+W[9]-W[13]; T[14]=W[2]-W[6]+W[10]-W[14]; T[15]=W[3]-W[7]+W[11]-W[15]; Y[0]=(T[0]+T[1]+T[2]+T[3])*mf; Y[1]=(T[0]+T[1]-T[2]-T[3])*mf; Y[2]=(T[0]-T[1]-T[2]+T[3])*mf; Y[3]=(T[0]-T[1]+T[2]-T[3])*mf; Y[4]=(T[4]+T[5]+T[6]+T[7])*mf; Y[5]=(T[4]+T[5]-T[6]-T[7])*mf; Y[6]=(T[4]-T[5]-T[6]+T[7])*mf; Y[7]=(T[4]-T[5]+T[6]-T[7])*mf; Y[8]=(T[8]+T[9]+T[10]+T[11])*mf; Y[9]=(T[8]+T[9]-T[10]-T[11])*mf; Y[10]=(T[8]-T[9]-T[10]+T[11])*mf; Y[11]=(T[8]-T[9]+T[10]-T[11])*mf; Y[12]=(T[12]+T[13]+T[14]+T[15])*mf; Y[13]=(T[12]+T[13]-T[14]-T[15])*mf; Y[14]=(T[12]-T[13]-T[14]+T[15])*mf; Y[15]=(T[12]-T[13]+T[14]-T[15])*mf; WelsDequantIHadamard4x4_c(W,mf); for(int i=0;i<16;i++) EXPECT_EQ( Y[i],W[i] ); } TEST(DecodeMbAuxTest, TestDequant_4x4_c) { short W[16], T[16]; unsigned short mf[16]; srand((uint32_t)time(NULL)); for(int i=0;i<16;i++) { W[i]=rand()%256+1; T[i]=W[i]; } for(int i=0;i<8;i++) mf[i]=rand()%16+1; WelsDequant4x4_c(W,mf); for(int i=0;i<16;i++) EXPECT_EQ( T[i]*mf[i%8],W[i] ); } TEST(DecodeMbAuxTest, TestDequant_4_4x4_c) { short W[64], T[64]; unsigned short mf[16]; srand((uint32_t)time(NULL)); for(int i=0;i<64;i++) { W[i]=rand()%256+1; T[i]=W[i]; } for(int i=0;i<8;i++) mf[i]=rand()%16+1; WelsDequantFour4x4_c(W,mf); for(int i=0;i<64;i++) EXPECT_EQ( T[i]*mf[i%8],W[i] ); } void WelsDequantHadamard2x2DcAnchor( int16_t* pDct, int16_t iMF) { const int16_t iSumU = pDct[0] + pDct[2]; const int16_t iDelU = pDct[0] - pDct[2]; const int16_t iSumD = pDct[1] + pDct[3]; const int16_t iDelD = pDct[1] - pDct[3]; pDct[0] = (iSumU + iSumD) * iMF; pDct[1] = (iSumU - iSumD) * iMF; pDct[2] = (iDelU + iDelD) * iMF; pDct[3] = (iDelU - iDelD) * iMF; } TEST(DecodeMbAuxTest, WelsDequantIHadamard2x2Dc) { int16_t iDct[4], iRefDct[4]; int16_t iMF; srand((unsigned int)time(NULL)); iMF = rand() & 127; for(int i = 0; i < 4; i++) iDct[i] = iRefDct[i] = (rand() & 65535) - 32768; WelsDequantHadamard2x2DcAnchor(iRefDct, iMF); WelsDequantIHadamard2x2Dc(iDct, iMF); bool ok = true; for(int i = 0; i < 4; i++) { if(iDct[i] != iRefDct[i]) { ok = false; break; } } EXPECT_TRUE(ok); } #define FDEC_STRIDE 32 void WelsIDctT4Anchor( uint8_t *p_dst, int16_t dct[16] ) { int16_t tmp[16]; int32_t iStridex2 = (FDEC_STRIDE<<1); int32_t iStridex3 = iStridex2 + FDEC_STRIDE; uint8_t uiDst = 0; int i; for( i = 0; i < 4; i++ ) { tmp[i<<2] = dct[i<<2] + dct[(i<<2)+1] + dct[(i<<2)+2] + (dct[(i<<2)+3]>>1); tmp[(i<<2)+1] = dct[i<<2] + (dct[(i<<2)+1]>>1) - dct[(i<<2)+2] - dct[(i<<2)+3]; tmp[(i<<2)+2] = dct[i<<2] - (dct[(i<<2)+1]>>1) - dct[(i<<2)+2] + dct[(i<<2)+3]; tmp[(i<<2)+3] = dct[i<<2] - dct[(i<<2)+1] + dct[(i<<2)+2] - (dct[(i<<2)+3]>>1); } for( i = 0; i < 4; i++ ) { uiDst = p_dst[i]; p_dst[i] = WelsClip1(uiDst + ((tmp[i]+tmp[4+i]+ tmp[8+i]+(tmp[12+i]>>1)+32)>>6)); uiDst = p_dst[i+FDEC_STRIDE]; p_dst[i+FDEC_STRIDE] = WelsClip1(uiDst + ((tmp[i]+(tmp[4+i]>>1)-tmp[8+i]-tmp[12+i]+32) >>6)); uiDst = p_dst[i+iStridex2]; p_dst[i+iStridex2] = WelsClip1(uiDst + ((tmp[i]-(tmp[4+i]>>1)-tmp[8+i]+tmp[12+i]+32) >>6)); uiDst = p_dst[i+iStridex3]; p_dst[i+iStridex3] = WelsClip1(uiDst + ((tmp[i]-tmp[4+i]+ tmp[8+i]-(tmp[12+i]>>1)+32)>>6)); } } TEST(DecodeMbAuxTest, WelsIDctT4Rec_c) { int16_t iRefDct[16]; uint8_t iRefDst[16*FDEC_STRIDE]; ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16); srand((unsigned int)time(NULL)); for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { iRefDct[i*4+j] = iDct[i*4+j] = (rand() & 65535) - 32768; iPred[i*FDEC_STRIDE+j] = iRefDst[i*FDEC_STRIDE+j] = rand() & 255; } } WelsIDctT4Anchor(iRefDst, iRefDct); WelsIDctT4Rec_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); int ok = -1; for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) { ok = i*4+j; break; } } } EXPECT_EQ(ok, -1); } #if defined(X86_ASM) TEST(DecodeMbAuxTest, WelsIDctT4Rec_mmx) { int32_t iCpuCores = 0; uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores); if(uiCpuFeatureFlag & WELS_CPU_MMXEXT) { ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRecC, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRecM, 16*FDEC_STRIDE, 16); srand((unsigned int)time(NULL)); for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { iDct[i*4+j] = (rand() & ((1 << 12)-1)) - (1 << 11); iPred[i*FDEC_STRIDE+j] = rand() & 255; } } WelsIDctT4Rec_c(iRecC, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); WelsIDctT4Rec_mmx(iRecM, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); int ok = -1; for(int i = 0; i < 4; i++) { for(int j = 0; j < 4; j++) { if(iRecC[i*FDEC_STRIDE+j] != iRecM[i*FDEC_STRIDE+j]) { ok = i*4+j; break; } } } EXPECT_EQ(ok, -1); } } #endif void WelsIDctT8Anchor( uint8_t *p_dst, int16_t dct[4][16] ) { WelsIDctT4Anchor( &p_dst[0], dct[0] ); WelsIDctT4Anchor( &p_dst[4], dct[1] ); WelsIDctT4Anchor( &p_dst[4*FDEC_STRIDE+0], dct[2] ); WelsIDctT4Anchor( &p_dst[4*FDEC_STRIDE+4], dct[3] ); } TEST(DecodeMbAuxTest, WelsIDctFourT4Rec_c) { int16_t iRefDct[4][16]; uint8_t iRefDst[16*FDEC_STRIDE]; ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 64, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16); srand((unsigned int)time(NULL)); for(int k = 0; k < 4; k++) for(int i = 0; i < 16; i++) iRefDct[k][i] = iDct[k*16+i] = (rand() & 65535) - 32768; for(int i = 0; i < 8; i++) for(int j = 0; j < 8; j++) iPred[i*FDEC_STRIDE+j] = iRefDst[i*FDEC_STRIDE+j] = rand() & 255; WelsIDctT8Anchor(iRefDst, iRefDct); WelsIDctFourT4Rec_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); int ok = -1; for(int i = 0; i < 8; i++) { for(int j = 0; j < 8; j++) { if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) { ok = i*8+j; break; } } } EXPECT_EQ(ok, -1); } void WelsIDctRecI16x4DcAnchor( uint8_t *p_dst, int16_t dct[4] ) { for(int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) { p_dst[0] = WelsClip1(p_dst[0] + ((dct[0]+32)>>6)); p_dst[1] = WelsClip1(p_dst[1] + ((dct[0]+32)>>6)); p_dst[2] = WelsClip1(p_dst[2] + ((dct[0]+32)>>6)); p_dst[3] = WelsClip1(p_dst[3] + ((dct[0]+32)>>6)); p_dst[4] = WelsClip1(p_dst[4] + ((dct[1]+32)>>6)); p_dst[5] = WelsClip1(p_dst[5] + ((dct[1]+32)>>6)); p_dst[6] = WelsClip1(p_dst[6] + ((dct[1]+32)>>6)); p_dst[7] = WelsClip1(p_dst[7] + ((dct[1]+32)>>6)); p_dst[8] = WelsClip1(p_dst[8] + ((dct[2]+32)>>6)); p_dst[9] = WelsClip1(p_dst[9] + ((dct[2]+32)>>6)); p_dst[10] = WelsClip1(p_dst[10] + ((dct[2]+32)>>6)); p_dst[11] = WelsClip1(p_dst[11] + ((dct[2]+32)>>6)); p_dst[12] = WelsClip1(p_dst[12] + ((dct[3]+32)>>6)); p_dst[13] = WelsClip1(p_dst[13] + ((dct[3]+32)>>6)); p_dst[14] = WelsClip1(p_dst[14] + ((dct[3]+32)>>6)); p_dst[15] = WelsClip1(p_dst[15] + ((dct[3]+32)>>6)); } } void WelsIDctRecI16x16DcAnchor( uint8_t *p_dst, int16_t dct[4][4] ) { for( int i = 0; i < 4; i++, p_dst += 4*FDEC_STRIDE ) WelsIDctRecI16x4DcAnchor(&p_dst[0], dct[i]); } TEST(DecodeMbAuxTest, WelsIDctRecI16x16Dc_c) { uint8_t iRefDst[16*FDEC_STRIDE]; int16_t iRefDct[4][4]; ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16); for(int i = 0; i < 16; i++) for(int j = 0; j < 16; j++) iRefDst[i*FDEC_STRIDE+j] = iPred[i*FDEC_STRIDE+j] = rand() & 255; for(int i = 0; i < 4; i++) for(int j = 0; j < 4; j++) iRefDct[i][j] = iDct[i*4+j] = (rand() & 65535) - 32768; WelsIDctRecI16x16DcAnchor(iRefDst, iRefDct); WelsIDctRecI16x16Dc_c(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); int ok = -1; for(int i = 0; i < 16; i++) { for(int j = 0; j < 16; j++) { if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) { ok = i*16+j; break; } } } EXPECT_EQ(ok, -1); } #if defined(X86_ASM) TEST(DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) { int32_t iCpuCores = 0; uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect(&iCpuCores); if(uiCpuFeatureFlag & WELS_CPU_SSE2) { uint8_t iRefDst[16*FDEC_STRIDE]; int16_t iRefDct[4][4]; ENFORCE_STACK_ALIGN_1D(int16_t, iDct, 16, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iPred, 16*FDEC_STRIDE, 16); ENFORCE_STACK_ALIGN_1D(uint8_t, iRec, 16*FDEC_STRIDE, 16); for(int i = 0; i < 16; i++) for(int j = 0; j < 16; j++) iRefDst[i*FDEC_STRIDE+j] = iPred[i*FDEC_STRIDE+j] = rand() & 255; for(int i = 0; i < 4; i++) for(int j = 0; j < 4; j++) iRefDct[i][j] = iDct[i*4+j] = (rand() & ((1<<15)-1)) - (1<<14); //2^14 limit, (2^15+32) will cause overflow for SSE2. WelsIDctRecI16x16DcAnchor(iRefDst, iRefDct); WelsIDctRecI16x16Dc_sse2(iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct); int ok = -1; for(int i = 0; i < 16; i++) { for(int j = 0; j < 16; j++) { if(iRec[i*FDEC_STRIDE+j] != iRefDst[i*FDEC_STRIDE+j]) { ok = i*16+j; break; } } } EXPECT_EQ(ok, -1); } } #endif