ref: 9a9fc4c4893b8fac3cea3bec3ff9207055a7060d
parent: 1127aa77613e73f863649b95e3ab41a754558336
author: Martin Storsjö <[email protected]>
date: Wed Jan 28 07:26:17 EST 2015
Change pfSampleAveraging to be a single function with internal width handling This makes it match the behaviour of pMcLumaFunc and pMcChromaFunc.
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -81,7 +81,7 @@
int32_t iWidth, int32_t iHeight);
typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
int32_t iHeight);
-typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
+typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
typedef struct TagMcFunc {
PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
@@ -90,7 +90,7 @@
PWelsMcFunc pMcChromaFunc;
PWelsMcFunc pMcLumaFunc;
- PWelsSampleAveragingFunc pfSampleAveraging[2];
+ PWelsSampleAveragingFunc pfSampleAveraging;
} SMcFunc;
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -41,6 +41,9 @@
#include "mc.h"
#include "cpu_core.h"
+typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
+ int32_t, int32_t);
+
namespace WelsEnc {
/*------------------weight for chroma fraction pixel interpolation------------------*/
//kuiA = (8 - dx) * (8 - dy);
@@ -406,6 +409,14 @@
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
+void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixelAvgWidthEq8_c,
+ PixelAvgWidthEq16_c
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
//***************************************************************************//
// MMXEXT and SSE2 implementation //
//***************************************************************************//
@@ -597,6 +608,14 @@
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
+void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixelAvgWidthEq8_mmx,
+ PixelAvgWidthEq16_sse2
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
#endif //X86_ASM
//***************************************************************************//
@@ -699,6 +718,14 @@
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
+void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixStrideAvgWidthEq8_neon,
+ PixStrideAvgWidthEq16_neon
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
#endif
#if defined(HAVE_NEON_AARCH64)
@@ -807,15 +834,21 @@
uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
}
+void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixStrideAvgWidthEq8_AArch64_neon,
+ PixStrideAvgWidthEq16_AArch64_neon
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
#endif
void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
- static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
-
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
- memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
+ pMcFuncs->pfSampleAveraging = PixelAvg_c;
pMcFuncs->pMcChromaFunc = McChroma_c;
pMcFuncs->pMcLumaFunc = McLuma_c;
#if defined (X86_ASM)
@@ -823,8 +856,7 @@
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
- pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
- pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
+ pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
pMcFuncs->pMcChromaFunc = McChroma_sse2;
pMcFuncs->pMcLumaFunc = McLuma_sse2;
}
@@ -839,8 +871,7 @@
if (uiCpuFlag & WELS_CPU_NEON) {
pMcFuncs->pMcLumaFunc = EncMcLuma_neon;
pMcFuncs->pMcChromaFunc = EncMcChroma_neon;
- pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
- pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
+ pMcFuncs->pfSampleAveraging = PixelAvg_neon;
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
@@ -850,8 +881,7 @@
if (uiCpuFlag & WELS_CPU_NEON) {
pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon;
pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon;
- pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
- pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
+ pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
--- a/codec/encoder/core/src/md.cpp
+++ b/codec/encoder/core/src/md.cpp
@@ -531,15 +531,14 @@
inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
- PWelsSampleAveragingFunc* pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
- const int32_t kiAvgIndex = kiWidth >> 4;
+ PWelsSampleAveragingFunc pSampleAvg = pFunc->sMcFuncs.pfSampleAveraging;
int32_t iCurCost;
uint8_t* pEncMb = pMe->pEncMb;
uint8_t* pTmp = NULL;
const uint8_t kuiPixel = pMe->uiBlockSize;
- pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
- pParams->pSrcB[0], pParams->iStrideA, kiHeight);
+ pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
+ pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
if (iCurCost < pParams->iBestCost) {
@@ -547,8 +546,8 @@
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//=========================(0, 1)=======================//
- pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
- ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiHeight);
+ pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
+ ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
if (iCurCost < pParams->iBestCost) {
pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
@@ -555,8 +554,8 @@
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//==========================(-1, 0)=========================//
- pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
- ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiHeight);
+ pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
+ ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
if (iCurCost < pParams->iBestCost) {
pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
@@ -563,8 +562,8 @@
SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
}
//==========================(1, 0)=========================//
- pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
- ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiHeight);
+ pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
+ ME_REFINE_BUF_STRIDE, pParams->pSrcB[3], pParams->iStrideB, kiWidth, kiHeight);
iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
if (iCurCost < pParams->iBestCost) {
--- a/test/encoder/EncUT_MotionCompensation.cpp
+++ b/test/encoder/EncUT_MotionCompensation.cpp
@@ -40,8 +40,8 @@
}
PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width,
height);
- sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
- MC_BUFF_SRC_STRIDE, height);
+ sMcFunc.pfSampleAveraging (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
+ MC_BUFF_SRC_STRIDE, width, height);
for (int32_t j = 0; j < height; j++) {
for (int32_t i = 0; i < width; i++) {
ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]);