ref: 9b21ece789c066db96be8810f0892560adcd63b8
dir: /codec/encoder/core/src/decode_mb_aux.cpp/
/*! * \copy * Copyright (c) 2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * */ #include <string.h> #include "decode_mb_aux.h" #include "wels_common_basis.h" #include "cpu_core.h" namespace WelsSVCEnc { /**************************************************************************** * Dequant and Ihdm functions ****************************************************************************/ void WelsIHadamard4x4Dc(int16_t *pRes) //pBuffer size : 4x4 { int16_t iTemp[4]; int32_t i = 4; while( --i >= 0 ) { const int32_t kiIdx = i<<2; const int32_t kiIdx1 = 1 + kiIdx; const int32_t kiIdx2 = 1 + kiIdx1; const int32_t kiIdx3 = 1 + kiIdx2; iTemp[0] = pRes[kiIdx ] + pRes[kiIdx2]; iTemp[1] = pRes[kiIdx ] - pRes[kiIdx2]; iTemp[2] = pRes[kiIdx1] - pRes[kiIdx3]; iTemp[3] = pRes[kiIdx1] + pRes[kiIdx3]; pRes[kiIdx ] = iTemp[0] + iTemp[3]; pRes[kiIdx1] = iTemp[1] + iTemp[2]; pRes[kiIdx2] = iTemp[1] - iTemp[2]; pRes[kiIdx3] = iTemp[0] - iTemp[3]; } i = 4; while( --i >= 0 ) { const int32_t kiI4 = 4 + i; const int32_t kiI8 = 4 + kiI4; const int32_t kiI12 = 4 + kiI8; iTemp[0] = pRes[i ] + pRes[kiI8 ]; iTemp[1] = pRes[i ] - pRes[kiI8 ]; iTemp[2] = pRes[kiI4 ] - pRes[kiI12]; iTemp[3] = pRes[kiI4 ] + pRes[kiI12]; pRes[i ] = iTemp[0] + iTemp[3]; pRes[kiI4 ] = iTemp[1] + iTemp[2]; pRes[kiI8 ] = iTemp[1] - iTemp[2]; pRes[kiI12] = iTemp[0] - iTemp[3]; } } /* for qp < 12 */ void WelsDequantLumaDc4x4(int16_t *pRes, const int32_t kiQp) { int32_t i = 15; const uint16_t kuiDequantValue = g_kuiDequantCoeff[kiQp%6][0]; const int16_t kiQF0 = kiQp / 6; const int16_t kiQF1 = 2 - kiQF0; const int16_t kiQF0S = 1 << (1 - kiQF0); while ( i >= 0 ) { pRes[i ] = ( pRes[i ] * kuiDequantValue + kiQF0S ) >> kiQF1; pRes[i-1] = ( pRes[i-1] * kuiDequantValue + kiQF0S ) >> kiQF1; pRes[i-2] = ( pRes[i-2] * kuiDequantValue + kiQF0S ) >> kiQF1; pRes[i-3] = ( pRes[i-3] * kuiDequantValue + kiQF0S ) >> kiQF1; i -= 4; } } /* for qp >= 12 */ void WelsDequantIHadamard4x4_c(int16_t *pRes, const uint16_t kuiMF) { int16_t iTemp[4]; int32_t i; for(i = 0; i < 16; i += 4) { iTemp[0] = pRes[i ] + pRes[i+2]; iTemp[1] = pRes[i ] - pRes[i+2]; iTemp[2] = pRes[i+1] - pRes[i+3]; iTemp[3] = pRes[i+1] + pRes[i+3]; pRes[i ] = iTemp[0] + iTemp[3]; pRes[i+1] = iTemp[1] + iTemp[2]; pRes[i+2] = iTemp[1] - iTemp[2]; pRes[i+3] = iTemp[0] - iTemp[3]; } for(i = 0; i < 4; i++) { iTemp[0] = pRes[i ] + pRes[i+8 ]; iTemp[1] = pRes[i ] - pRes[i+8 ]; iTemp[2] = pRes[i+4 ] - pRes[i+12]; iTemp[3] = pRes[i+4 ] + pRes[i+12]; pRes[i ] = (iTemp[0] + iTemp[3]) * kuiMF; pRes[i+4 ] = (iTemp[1] + iTemp[2]) * kuiMF; pRes[i+8 ] = (iTemp[1] - iTemp[2]) * kuiMF; pRes[i+12] = (iTemp[0] - iTemp[3]) * kuiMF; } } void WelsDequantIHadamard2x2Dc( int16_t* pDct, const uint16_t kuiMF) { const int16_t kiSumU = pDct[0] + pDct[2]; const int16_t kiDelU = pDct[0] - pDct[2]; const int16_t kiSumD = pDct[1] + pDct[3]; const int16_t kiDelD = pDct[1] - pDct[3]; pDct[0] = (kiSumU + kiSumD) * kuiMF; pDct[1] = (kiSumU - kiSumD) * kuiMF; pDct[2] = (kiDelU + kiDelD) * kuiMF; pDct[3] = (kiDelU - kiDelD) * kuiMF; } void WelsDequant4x4_c(int16_t *pRes, const uint16_t* kpMF) { int32_t i; for(i = 0; i < 8; i++) { pRes[i] *= kpMF[i]; pRes[i+8] *=kpMF[i]; } } void WelsDequantFour4x4_c(int16_t *pRes, const uint16_t* kpMF) { int32_t i; for(i = 0; i < 8; i++) { pRes[i] *= kpMF[i]; pRes[i+8] *= kpMF[i]; pRes[i+16]*= kpMF[i]; pRes[i+24]*= kpMF[i]; pRes[i+32]*= kpMF[i]; pRes[i+40]*= kpMF[i]; pRes[i+48]*= kpMF[i]; pRes[i+56]*= kpMF[i]; } } /**************************************************************************** * IDCT functions, final output = prediction(CS) + IDCT(scaled_coeff) ****************************************************************************/ void WelsIDctT4Rec_c( uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct ) { int32_t i; int16_t iTemp[16]; int32_t iDstStridex2 = iStride << 1; int32_t iDstStridex3 = iStride + iDstStridex2; int32_t iPredStridex2 = iPredStride << 1; int32_t iPredStridex3 = iPredStride + iPredStridex2; for (i = 0; i < 4; i ++) //horizon { int32_t iIdx = i << 2; const int32_t kiHorSumU = pDct[iIdx] + pDct[iIdx+2]; // add 0-2 const int32_t kiHorDelU = pDct[iIdx] - pDct[iIdx+2]; // sub 0-2 const int32_t kiHorSumD = pDct[iIdx+1] + (pDct[iIdx+3] >> 1); const int32_t kiHorDelD = (pDct[iIdx+1] >> 1) - pDct[iIdx+3]; iTemp[iIdx ] = kiHorSumU + kiHorSumD; iTemp[iIdx+1] = kiHorDelU + kiHorDelD; iTemp[iIdx+2] = kiHorDelU - kiHorDelD; iTemp[iIdx+3] = kiHorSumU - kiHorSumD; } for (i = 0; i < 4; i ++) //vertical { const int32_t kiVerSumL = iTemp[i] + iTemp[8+i]; const int32_t kiVerDelL = iTemp[i] - iTemp[8+i]; const int32_t kiVerDelR = (iTemp[4+i] >> 1) - iTemp[12+i]; const int32_t kiVerSumR = iTemp[4+i] + (iTemp[12+i] >> 1); pRec[i ] = WELS_CLIP1( pPred[i ] + ((kiVerSumL + kiVerSumR + 32) >> 6) ); pRec[iStride+i ] = WELS_CLIP1( pPred[iPredStride+i ] + ((kiVerDelL + kiVerDelR + 32) >> 6) ); pRec[iDstStridex2 + i] = WELS_CLIP1( pPred[iPredStridex2+i] + ((kiVerDelL - kiVerDelR + 32) >> 6) ); pRec[iDstStridex3 + i] = WELS_CLIP1( pPred[iPredStridex3+i] + ((kiVerSumL - kiVerSumR + 32) >> 6) ); } } void WelsIDctFourT4Rec_c( uint8_t* pRec, int32_t iStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct ) { int32_t iDstStridex4 = iStride << 2; int32_t iPredStridex4 = iPredStride << 2; WelsIDctT4Rec_c( pRec, iStride, pPred, iPredStride, pDct ); WelsIDctT4Rec_c( &pRec[4], iStride, &pPred[4], iPredStride, pDct+16 ); WelsIDctT4Rec_c( &pRec[iDstStridex4 ], iStride, &pPred[iPredStridex4 ], iPredStride, pDct+32 ); WelsIDctT4Rec_c( &pRec[iDstStridex4+4], iStride, &pPred[iPredStridex4+4], iPredStride, pDct+48 ); } void WelsIDctT4RecOnMb(uint8_t* pDst, int32_t iDstStride, uint8_t* pPred, int32_t iPredStride, int16_t* pDct, PIDctFunc pfIDctFourT4) { int32_t iDstStridex8 = iDstStride << 3; int32_t iPredStridex8 = iPredStride << 3; pfIDctFourT4(&pDst[0], iDstStride, &pPred[0], iPredStride, pDct); pfIDctFourT4(&pDst[8], iDstStride, &pPred[8], iPredStride, pDct+64); pfIDctFourT4(&pDst[iDstStridex8], iDstStride, &pPred[iPredStridex8], iPredStride, pDct+128); pfIDctFourT4(&pDst[iDstStridex8+8], iDstStride, &pPred[iPredStridex8+8], iPredStride, pDct+192); } /* * pfIDctI16x16Dc: do luma idct of an MB for I16x16 mode, when only dc value are non-zero */ void WelsIDctRecI16x16Dc_c(uint8_t *pRec, int32_t iStride, uint8_t *pPred, int32_t iPredStride, int16_t *pDctDc) { int32_t i, j; for (i = 0; i < 16; i ++) { for(j = 0; j < 16; j++) { pRec[j] = WELS_CLIP1( pPred[j] + ((pDctDc[(i&0x0C) + (j>>2)] + 32) >> 6) ); } pRec += iStride; pPred += iPredStride; } } void WelsGetEncBlockStrideOffset(int32_t *pBlock, const int32_t kiStrideY, const int32_t kiStrideUV) { int32_t i, j, k, r; for(j = 0; j < 4; j++) { i = j << 2; k = (j&0x01) << 1; r = j&0x02; pBlock[i] = (0 + k + (0 + r) * kiStrideY) << 2; pBlock[i+1] = (1 + k + (0 + r) * kiStrideY) << 2; pBlock[i+2] = (0 + k + (1 + r) * kiStrideY) << 2; pBlock[i+3] = (1 + k + (1 + r) * kiStrideY) << 2; pBlock[16+j] = pBlock[20+j] = ((j&0x01) + r * kiStrideUV) << 2; } } void WelsInitReconstructionFuncs( SWelsFuncPtrList *pFuncList, uint32_t uiCpuFlag ) { pFuncList->pfDequantization4x4 = WelsDequant4x4_c; pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_c; pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_c; pFuncList->pfIDctT4 = WelsIDctT4Rec_c; pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_c; pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_c; #if defined(X86_ASM) if ( uiCpuFlag & WELS_CPU_MMXEXT ) { pFuncList->pfIDctT4 = WelsIDctT4Rec_mmx; } if ( uiCpuFlag & WELS_CPU_SSE2 ) { pFuncList->pfDequantization4x4 = WelsDequant4x4_sse2; pFuncList->pfDequantizationFour4x4 = WelsDequantFour4x4_sse2; pFuncList->pfDequantizationIHadamard4x4 = WelsDequantIHadamard4x4_sse2; pFuncList->pfIDctFourT4 = WelsIDctFourT4Rec_sse2; pFuncList->pfIDctI16x16Dc = WelsIDctRecI16x16Dc_sse2; } #endif//X86_ASM } }