shithub: openh264

Download patch

ref: 9a9fc4c4893b8fac3cea3bec3ff9207055a7060d
parent: 1127aa77613e73f863649b95e3ab41a754558336
author: Martin Storsjö <[email protected]>
date: Wed Jan 28 07:26:17 EST 2015

Change pfSampleAveraging to be a single function with internal width handling

This makes it match the behaviour of pMcLumaFunc and pMcChromaFunc.

--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -81,7 +81,7 @@
                                         int32_t iWidth, int32_t iHeight);
 typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                         int32_t iHeight);
-typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t);
+typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
 
 typedef struct TagMcFunc {
   PWelsLumaHalfpelMcFunc      pfLumaHalfpelHor;
@@ -90,7 +90,7 @@
   PWelsMcFunc                 pMcChromaFunc;
 
   PWelsMcFunc                 pMcLumaFunc;
-  PWelsSampleAveragingFunc    pfSampleAveraging[2];
+  PWelsSampleAveragingFunc    pfSampleAveraging;
 } SMcFunc;
 
 typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -41,6 +41,9 @@
 #include "mc.h"
 #include "cpu_core.h"
 
+typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
+    int32_t, int32_t);
+
 namespace WelsEnc {
 /*------------------weight for chroma fraction pixel interpolation------------------*/
 //kuiA = (8 - dx) * (8 - dy);
@@ -406,6 +409,14 @@
   uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
   pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
 }
+void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+                 const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+  static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+    PixelAvgWidthEq8_c,
+    PixelAvgWidthEq16_c
+  };
+  kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
 //***************************************************************************//
 //                       MMXEXT and SSE2 implementation                      //
 //***************************************************************************//
@@ -597,6 +608,14 @@
   uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
   pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
 }
+void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+                    const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+  static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+    PixelAvgWidthEq8_mmx,
+    PixelAvgWidthEq16_sse2
+  };
+  kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
 #endif //X86_ASM
 
 //***************************************************************************//
@@ -699,6 +718,14 @@
   uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
   pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
 }
+void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+                    const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+  static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+    PixStrideAvgWidthEq8_neon,
+    PixStrideAvgWidthEq16_neon
+  };
+  kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
 #endif
 
 #if defined(HAVE_NEON_AARCH64)
@@ -807,15 +834,21 @@
   uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
   pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
 }
+void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+                            const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+  static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+    PixStrideAvgWidthEq8_AArch64_neon,
+    PixStrideAvgWidthEq16_AArch64_neon
+  };
+  kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
 #endif
 
 void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
-  static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
-
   pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
   pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
   pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
-  memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
+  pMcFuncs->pfSampleAveraging = PixelAvg_c;
   pMcFuncs->pMcChromaFunc    = McChroma_c;
   pMcFuncs->pMcLumaFunc      = McLuma_c;
 #if defined (X86_ASM)
@@ -823,8 +856,7 @@
     pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
     pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
     pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
-    pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
-    pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
+    pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
     pMcFuncs->pMcChromaFunc    = McChroma_sse2;
     pMcFuncs->pMcLumaFunc      = McLuma_sse2;
   }
@@ -839,8 +871,7 @@
   if (uiCpuFlag & WELS_CPU_NEON) {
     pMcFuncs->pMcLumaFunc      = EncMcLuma_neon;
     pMcFuncs->pMcChromaFunc    = EncMcChroma_neon;
-    pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
-    pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
+    pMcFuncs->pfSampleAveraging = PixelAvg_neon;
     pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
     pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
     pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
@@ -850,8 +881,7 @@
   if (uiCpuFlag & WELS_CPU_NEON) {
     pMcFuncs->pMcLumaFunc      = EncMcLuma_AArch64_neon;
     pMcFuncs->pMcChromaFunc    = EncMcChroma_AArch64_neon;
-    pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
-    pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
+    pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
     pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
     pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
     pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
--- a/codec/encoder/core/src/md.cpp
+++ b/codec/encoder/core/src/md.cpp
@@ -531,15 +531,14 @@
 
 inline void MeRefineQuarPixel (SWelsFuncPtrList* pFunc, SWelsME* pMe, SMeRefinePointer* pMeRefine,
                                const int32_t kiWidth, const int32_t kiHeight, SQuarRefineParams* pParams, int32_t iStrideEnc) {
-  PWelsSampleAveragingFunc* pSampleAvg	= pFunc->sMcFuncs.pfSampleAveraging;
-  const int32_t kiAvgIndex		= kiWidth >> 4;
+  PWelsSampleAveragingFunc pSampleAvg	= pFunc->sMcFuncs.pfSampleAveraging;
   int32_t iCurCost;
   uint8_t* pEncMb				= pMe->pEncMb;
   uint8_t* pTmp				= NULL;
   const uint8_t kuiPixel		= pMe->uiBlockSize;
 
-  pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
-                          pParams->pSrcB[0], pParams->iStrideA, kiHeight);
+  pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[0], ME_REFINE_BUF_STRIDE,
+              pParams->pSrcB[0], pParams->iStrideA, kiWidth, kiHeight);
 
   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[0]);
   if (iCurCost < pParams->iBestCost) {
@@ -547,8 +546,8 @@
     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
   }
   //=========================(0, 1)=======================//
-  pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
-                          ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiHeight);
+  pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[1],
+              ME_REFINE_BUF_STRIDE, pParams->pSrcB[1], pParams->iStrideA, kiWidth, kiHeight);
   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[1]);
   if (iCurCost < pParams->iBestCost) {
     pParams->iBestQuarPix = ME_QUAR_PIXEL_BOTTOM;
@@ -555,8 +554,8 @@
     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
   }
   //==========================(-1, 0)=========================//
-  pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
-                          ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiHeight);
+  pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[2],
+              ME_REFINE_BUF_STRIDE, pParams->pSrcB[2], pParams->iStrideB, kiWidth, kiHeight);
   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[2]);
   if (iCurCost < pParams->iBestCost) {
     pParams->iBestQuarPix = ME_QUAR_PIXEL_LEFT;
@@ -563,8 +562,8 @@
     SWITCH_BEST_TMP_BUF (pMeRefine->pQuarPixBest, pMeRefine->pQuarPixTmp);
   }
   //==========================(1, 0)=========================//
-  pSampleAvg[kiAvgIndex] (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
-                          ME_REFINE_BUF_STRIDE,	pParams->pSrcB[3], pParams->iStrideB,  kiHeight);
+  pSampleAvg (pMeRefine->pQuarPixTmp, ME_REFINE_BUF_STRIDE, pParams->pSrcA[3],
+              ME_REFINE_BUF_STRIDE,	pParams->pSrcB[3], pParams->iStrideB,  kiWidth, kiHeight);
 
   iCurCost = CALC_COST (pMeRefine->pQuarPixTmp, pParams->iLms[3]);
   if (iCurCost < pParams->iBestCost) {
--- a/test/encoder/EncUT_MotionCompensation.cpp
+++ b/test/encoder/EncUT_MotionCompensation.cpp
@@ -40,8 +40,8 @@
       }
       PixelAvgAnchor (uDstAnchor[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0], MC_BUFF_SRC_STRIDE, width,
                       height);
-      sMcFunc.pfSampleAveraging[w] (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
-                                    MC_BUFF_SRC_STRIDE, height);
+      sMcFunc.pfSampleAveraging (uDstTest[0], MC_BUFF_DST_STRIDE, uSrc1[0], MC_BUFF_SRC_STRIDE, uSrc2[0],
+                                 MC_BUFF_SRC_STRIDE, width, height);
       for (int32_t j = 0; j < height; j++) {
         for (int32_t i = 0; i < width; i++) {
           ASSERT_EQ (uDstAnchor[j][i], uDstTest[j][i]);