ref: 41ddc536d6e82c4b86f521d5ed864f986198fe28
dir: /codec/encoder/core/src/svc_encode_mb.cpp/
/*! * \copy * Copyright (c) 2009-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * \file encode_mb.c * * \brief Implementaion for pCurMb encoding * * \date 05/19/2009 Created ************************************************************************************* */ #include "svc_encode_mb.h" #include "encode_mb_aux.h" #include "decode_mb_aux.h" #include "ls_defines.h" namespace WelsSVCEnc { void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) { pfDctFourT4 (pRes, pEncMb, iEncStride, pBestPred, 16); pfDctFourT4 (pRes + 64, pEncMb + 8, iEncStride, pBestPred + 8, 16); pfDctFourT4 (pRes + 128, pEncMb + 8 * iEncStride, iEncStride, pBestPred + 128, 16); pfDctFourT4 (pRes + 192, pEncMb + 8 * iEncStride + 8, iEncStride, pBestPred + 136, 16); } void WelsEncRecI16x16Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) { ENFORCE_STACK_ALIGN_1D (int16_t, aDctT4Dc, 16, 16) SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList; SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; const int32_t kiEncStride = pCurDqLayer->iEncStride[0]; int16_t* pRes = pMbCache->pCoeffLevel; uint8_t* pPred = pMbCache->SPicData.pCsMb[0]; const int32_t kiRecStride = pCurDqLayer->iCsStride[0]; int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; uint8_t* pBestPred = pMbCache->pMemPredLuma; const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[0]; uint8_t i, uiQp = pCurMb->uiLumaQp; uint32_t uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiCountI16x16Dc; const int16_t* pMF = g_kiQuantMF[uiQp]; const int16_t* pFF = g_iQuantIntraFF[uiQp]; WelsDctMb (pRes, pMbCache->SPicData.pEncMb[0], kiEncStride, pBestPred, pEncCtx->pFuncList->pfDctFourT4); pFuncList->pfTransformHadamard4x4Dc (aDctT4Dc, pRes); pFuncList->pfQuantizationDc4x4 (aDctT4Dc, pFF[0] << 1, pMF[0]>>1); pFuncList->pfScan4x4 (pMbCache->pDct->iLumaI16x16Dc, aDctT4Dc); uiCountI16x16Dc = pFuncList->pfGetNoneZeroCount (pMbCache->pDct->iLumaI16x16Dc); for (i = 0; i < 4; i++) { pFuncList->pfQuantizationFour4x4 (pRes, pFF, pMF); pFuncList->pfScan4x4Ac (pBlock, pRes); pFuncList->pfScan4x4Ac (pBlock + 16, pRes + 16); pFuncList->pfScan4x4Ac (pBlock + 32, pRes + 32); pFuncList->pfScan4x4Ac (pBlock + 48, pRes + 48); pRes += 64; pBlock += 64; } pRes -= 256; pBlock -= 256; for (i = 0; i < 16; i++) { uiNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock); pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount; uiNoneZeroCountMbAc += uiNoneZeroCount; pBlock += 16; } if (uiCountI16x16Dc > 0) { if (uiQp < 12) { WelsIHadamard4x4Dc (aDctT4Dc); WelsDequantLumaDc4x4 (aDctT4Dc, uiQp); } else pFuncList->pfDequantizationIHadamard4x4 (aDctT4Dc, g_kuiDequantCoeff[uiQp][0] >> 2); } if (uiNoneZeroCountMbAc > 0) { pCurMb->uiCbp = 15; pFuncList->pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]); pFuncList->pfDequantizationFour4x4 (pRes + 64, g_kuiDequantCoeff[uiQp]); pFuncList->pfDequantizationFour4x4 (pRes + 128, g_kuiDequantCoeff[uiQp]); pFuncList->pfDequantizationFour4x4 (pRes + 192, g_kuiDequantCoeff[uiQp]); pRes[0] = aDctT4Dc[0]; pRes[16] = aDctT4Dc[1]; pRes[32] = aDctT4Dc[4]; pRes[48] = aDctT4Dc[5]; pRes[64] = aDctT4Dc[2]; pRes[80] = aDctT4Dc[3]; pRes[96] = aDctT4Dc[6]; pRes[112] = aDctT4Dc[7]; pRes[128] = aDctT4Dc[8]; pRes[144] = aDctT4Dc[9]; pRes[160] = aDctT4Dc[12]; pRes[176] = aDctT4Dc[13]; pRes[192] = aDctT4Dc[10]; pRes[208] = aDctT4Dc[11]; pRes[224] = aDctT4Dc[14]; pRes[240] = aDctT4Dc[15]; pFuncList->pfIDctFourT4 (pPred, kiRecStride, pBestPred, 16, pRes); pFuncList->pfIDctFourT4 (pPred + 8, kiRecStride, pBestPred + 8, 16, pRes + 64); pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8, kiRecStride, pBestPred + 128, 16, pRes + 128); pFuncList->pfIDctFourT4 (pPred + kiRecStride * 8 + 8, kiRecStride, pBestPred + 136, 16, pRes + 192); } else if (uiCountI16x16Dc > 0) { pFuncList->pfIDctI16x16Dc (pPred, kiRecStride, pBestPred, 16, aDctT4Dc); } else { pFuncList->pfCopy16x16Aligned (pPred, kiRecStride, pBestPred, 16); } } void WelsEncRecI4x4Y (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, uint8_t uiI4x4Idx) { SWelsFuncPtrList* pFuncList = pEncCtx->pFuncList; SDqLayer* pCurDqLayer = pEncCtx->pCurDqLayer; int32_t iEncStride = pCurDqLayer->iEncStride[0]; uint8_t uiQp = pCurMb->uiLumaQp; int16_t* pResI4x4 = pMbCache->pCoeffLevel; uint8_t* pPredI4x4; uint8_t* pPred = pMbCache->SPicData.pCsMb[0]; int32_t iRecStride = pCurDqLayer->iCsStride[0]; uint32_t uiOffset = g_kuiMbCountScan4Idx[uiI4x4Idx]; uint8_t* pEncMb = pMbCache->SPicData.pEncMb[0]; uint8_t* pBestPred = pMbCache->pBestPredI4x4Blk4; int16_t* pBlock = pMbCache->pDct->iLumaBlock[uiI4x4Idx]; const int16_t* pMF = g_kiQuantMF[uiQp]; const int16_t* pFF = g_iQuantIntraFF[uiQp]; int32_t* pStrideEncBlockOffset = pEncCtx->pStrideTab->pStrideEncBlockOffset[pEncCtx->uiDependencyId]; int32_t* pStrideDecBlockOffset = pEncCtx->pStrideTab->pStrideDecBlockOffset[pEncCtx->uiDependencyId][0 == pEncCtx->uiTemporalId]; int32_t iNoneZeroCount = 0; pFuncList->pfDctT4 (pResI4x4, & (pEncMb[pStrideEncBlockOffset[uiI4x4Idx]]), iEncStride, pBestPred, 4); pFuncList->pfQuantization4x4 (pResI4x4, pFF, pMF); pFuncList->pfScan4x4 (pBlock, pResI4x4); iNoneZeroCount = pFuncList->pfGetNoneZeroCount (pBlock); pCurMb->pNonZeroCount[uiOffset] = iNoneZeroCount; pPredI4x4 = pPred + pStrideDecBlockOffset[uiI4x4Idx]; if (iNoneZeroCount > 0) { pCurMb->uiCbp |= 1 << (uiI4x4Idx >> 2); pFuncList->pfDequantization4x4 (pResI4x4, g_kuiDequantCoeff[uiQp]); pFuncList->pfIDctT4 (pPredI4x4, iRecStride, pBestPred, 4, pResI4x4); } else WelsCopy4x4 (pPredI4x4, iRecStride, pBestPred, 4); } void WelsEncInterY (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) { PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max; PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8; PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64; PScanFunc pfScan4x4 = pFuncList->pfScan4x4; PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4; PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount; PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4; int16_t* pRes = pMbCache->pCoeffLevel; int32_t iSingleCtrMb = 0, iSingleCtr8x8[4]; int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; uint8_t uiQp = pCurMb->uiLumaQp; const int16_t* pMF = g_kiQuantMF[uiQp]; const int16_t* pFF = g_kiQuantInterFF[uiQp]; int16_t aMax[16]; int32_t i, j, iNoneZeroCountMbDcAc = 0, iNoneZeroCount = 0; for (i = 0; i < 4; i++) { pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax + (i << 2)); iSingleCtr8x8[i] = 0; for (j = 0; j < 4; j++) { if (aMax[ (i << 2) + j] == 0) pfSetMemZeroSize8 (pBlock, 32); else { pfScan4x4 (pBlock, pRes); if (aMax[ (i << 2) + j] > 1) iSingleCtr8x8[i] += 9; else if (iSingleCtr8x8[i] < 6) iSingleCtr8x8[i] += pfCalculateSingleCtr4x4 (pBlock); } pRes += 16; pBlock += 16; } iSingleCtrMb += iSingleCtr8x8[i]; } pBlock -= 256; pRes -= 256; memset (pCurMb->pNonZeroCount, 0, 16); if (iSingleCtrMb < 6) { //from JVT-O079 iNoneZeroCountMbDcAc = 0; pfSetMemZeroSize64 (pRes, 768); // confirmed_safe_unsafe_usage } else { const uint8_t* kpNoneZeroCountIdx = g_kuiMbCountScan4Idx; for (i = 0; i < 4; i++) { if (iSingleCtr8x8[i] >= 4) { for (j = 0; j < 4; j++) { iNoneZeroCount = pfGetNoneZeroCount (pBlock); pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = iNoneZeroCount; iNoneZeroCountMbDcAc += iNoneZeroCount; pBlock += 16; } pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[uiQp]); pCurMb->uiCbp |= 1 << i; } else { // set zero for an 8x8 pBlock pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage kpNoneZeroCountIdx += 4; pBlock += 64; } pRes += 64; } } } void WelsEncRecUV (SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache, int16_t* pRes, int32_t iUV) { PQuantizationHadamardFunc pfQuantizationHadamard2x2 = pFuncList->pfQuantizationHadamard2x2; PQuantizationMaxFunc pfQuantizationFour4x4Max = pFuncList->pfQuantizationFour4x4Max; PSetMemoryZero pfSetMemZeroSize8 = pFuncList->pfSetMemZeroSize8; PSetMemoryZero pfSetMemZeroSize64 = pFuncList->pfSetMemZeroSize64; PScanFunc pfScan4x4Ac = pFuncList->pfScan4x4Ac; PCalculateSingleCtrFunc pfCalculateSingleCtr4x4 = pFuncList->pfCalculateSingleCtr4x4; PGetNoneZeroCountFunc pfGetNoneZeroCount = pFuncList->pfGetNoneZeroCount; PDeQuantizationFunc pfDequantizationFour4x4 = pFuncList->pfDequantizationFour4x4; const int32_t kiInterFlag = !IS_INTRA (pCurMb->uiMbType); const uint8_t kiQp = pCurMb->uiChromaQp; uint8_t i, uiNoneZeroCount, uiNoneZeroCountMbAc = 0, uiNoneZeroCountMbDc = 0; uint8_t uiNoneZeroCountOffset = (iUV - 1) << 1; //UV==1 or 2 uint8_t uiSubMbIdx = 16 + ((iUV - 1) << 2); //uiSubMbIdx == 16 or 20 int16_t* iChromaDc = pMbCache->pDct->iChromaDc[iUV - 1], *pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2]; int16_t aDct2x2[4], j, aMax[4]; int32_t iSingleCtr8x8 = 0; const int16_t* pMF = g_kiQuantMF[kiQp]; const int16_t* pFF = g_kiQuantInterFF[ (!kiInterFlag) * 6 + kiQp]; uiNoneZeroCountMbDc = pfQuantizationHadamard2x2 (pRes, pFF[0] << 1, pMF[0]>>1, aDct2x2, iChromaDc); pfQuantizationFour4x4Max (pRes, pFF, pMF, aMax); for (j = 0; j < 4; j++) { if (aMax[j] == 0) pfSetMemZeroSize8 (pBlock, 32); else { pfScan4x4Ac (pBlock, pRes); if (kiInterFlag) { if (aMax[j] > 1) iSingleCtr8x8 += 9; else if (iSingleCtr8x8 < 7) iSingleCtr8x8 += pfCalculateSingleCtr4x4 (pBlock); } else iSingleCtr8x8 = INT_MAX; } pRes += 16; pBlock += 16; } pRes -= 64; if (iSingleCtr8x8 < 7) { //from JVT-O079 pfSetMemZeroSize64 (pRes, 128); // confirmed_safe_unsafe_usage ST16 (&pCurMb->pNonZeroCount[16 + uiNoneZeroCountOffset], 0); ST16 (&pCurMb->pNonZeroCount[20 + uiNoneZeroCountOffset], 0); } else { const uint8_t* kpNoneZeroCountIdx = &g_kuiMbCountScan4Idx[uiSubMbIdx]; pBlock -= 64; for (i = 0; i < 4; i++) { uiNoneZeroCount = pfGetNoneZeroCount (pBlock); pCurMb->pNonZeroCount[*kpNoneZeroCountIdx++] = uiNoneZeroCount; uiNoneZeroCountMbAc += uiNoneZeroCount; pBlock += 16; } pfDequantizationFour4x4 (pRes, g_kuiDequantCoeff[pCurMb->uiChromaQp]); pCurMb->uiCbp &= 0x0F; pCurMb->uiCbp |= 0x20; } if (uiNoneZeroCountMbDc > 0) { WelsDequantIHadamard2x2Dc (aDct2x2, g_kuiDequantCoeff[kiQp][0] >> 1); if (2 != (pCurMb->uiCbp >> 4)) pCurMb->uiCbp |= (0x01 << 4) ; pRes[0] = aDct2x2[0]; pRes[16] = aDct2x2[1]; pRes[32] = aDct2x2[2]; pRes[48] = aDct2x2[3]; } } void WelsRecPskip (SDqLayer* pCurLayer, SWelsFuncPtrList* pFuncList, SMB* pCurMb, SMbCache* pMbCache) { int32_t* iRecStride = pCurLayer->iCsStride; uint8_t** pCsMb = &pMbCache->SPicData.pCsMb[0]; pFuncList->pfCopy16x16Aligned (pCsMb[0], *iRecStride++, pMbCache->pSkipMb, 16); pFuncList->pfCopy8x8Aligned (pCsMb[1], *iRecStride++, pMbCache->pSkipMb + 256, 8); pFuncList->pfCopy8x8Aligned (pCsMb[2], *iRecStride, pMbCache->pSkipMb + 320, 8); pFuncList->pfSetMemZeroSize8 (pCurMb->pNonZeroCount, 24); } bool WelsTryPYskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache) { int32_t iSingleCtrMb = 0; int16_t* pRes = pMbCache->pCoeffLevel; const uint8_t kuiQp = pCurMb->uiLumaQp; int16_t* pBlock = pMbCache->pDct->iLumaBlock[0]; uint16_t aMax[4], i, j; const int16_t* pMF = g_kiQuantMF[kuiQp]; const int16_t* pFF = g_kiQuantInterFF[kuiQp]; for (i = 0; i < 4; i++) { pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax); for (j = 0; j < 4; j++) { if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP else if (aMax[j] == 1) { pEncCtx->pFuncList->pfScan4x4 (pBlock, pRes); // iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock); } if (iSingleCtrMb >= 6) return false; //from JVT-O079 pRes += 16; pBlock += 16; } } return true; } bool WelsTryPUVskip (sWelsEncCtx* pEncCtx, SMB* pCurMb, SMbCache* pMbCache, int32_t iUV) { int16_t* pRes = ((iUV == 1) ? & (pMbCache->pCoeffLevel[256]) : & (pMbCache->pCoeffLevel[256 + 64])); const uint8_t kuiQp = g_kuiChromaQpTable[CLIP3_QP_0_51 (pCurMb->uiLumaQp + pEncCtx->pCurDqLayer->sLayerInfo.pPpsP->uiChromaQpIndexOffset)]; const int16_t* pMF = g_kiQuantMF[kuiQp]; const int16_t* pFF = g_kiQuantInterFF[kuiQp]; if (pEncCtx->pFuncList->pfQuantizationHadamard2x2Skip (pRes, pFF[0] << 1, pMF[0]>>1)) return false; else { uint16_t aMax[4], j; int32_t iSingleCtrMb = 0; int16_t* pBlock = pMbCache->pDct->iChromaBlock[ (iUV - 1) << 2]; pEncCtx->pFuncList->pfQuantizationFour4x4Max (pRes, pFF, pMF, (int16_t*)aMax); for (j = 0; j < 4; j++) { if (aMax[j] > 1) return false; // iSingleCtrMb += 9, can't be P_SKIP else if (aMax[j] == 1) { pEncCtx->pFuncList->pfScan4x4Ac (pBlock, pRes); iSingleCtrMb += pEncCtx->pFuncList->pfCalculateSingleCtr4x4 (pBlock); } if (iSingleCtrMb >= 7) return false; //from JVT-O079 pRes += 16; pBlock += 16; } return true; } } } // namespace WelsSVCEnc