shithub: openh264

Download patch

ref: 1cf68cad1393ff918894e4da76954850e80dc9e2
parent: a8262dd9e606f99e3b02f8cbc7a03d3c5ea9c977
author: Martin Storsjö <[email protected]>
date: Tue Jan 27 06:47:48 EST 2015

Move the qpel function table into the luma function itself

This unifies the luma MC interface to match the decoder side.

--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -89,7 +89,7 @@
   PWelsLumaHalfpelMcFunc      pfLumaHalfpelCen;
   PWelsMcFunc                         pfChromaMc;
 
-  PWelsLumaQuarpelMcFunc      pfLumaQuarpelMc[16];
+  PWelsMcFunc                 pfLumaMc;
   PWelsSampleAveragingFunc    pfSampleAveraging[2];
 } SMcFunc;
 
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -395,6 +395,17 @@
     }
   }
 }
+void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+               int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
+    McCopyWidthEq16_c,     McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
+    McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
+    McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
+    McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
+  };
+  uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+  pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
 //***************************************************************************//
 //                       MMXEXT and SSE2 implementation                      //
 //***************************************************************************//
@@ -575,6 +586,17 @@
 
 }
 
+void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                  int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
+    McCopyWidthEq16_sse2,     McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
+    McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
+    McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
+    McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
+  };
+  uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+  pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
 #endif //X86_ASM
 
 //***************************************************************************//
@@ -666,6 +688,17 @@
       McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
   }
 }
+void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                     int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
+    McCopyWidthEq16_neon,        McHorVer10WidthEq16_neon,   McHorVer20WidthEq16_neon,    McHorVer30WidthEq16_neon,
+    McHorVer01WidthEq16_neon,    EncMcHorVer11_neon,         EncMcHorVer21_neon,          EncMcHorVer31_neon,
+    McHorVer02WidthEq16_neon,    EncMcHorVer12_neon,         McHorVer22WidthEq16_neon,    EncMcHorVer32_neon,
+    McHorVer03WidthEq16_neon,    EncMcHorVer13_neon,         EncMcHorVer23_neon,          EncMcHorVer33_neon
+  };
+  uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+  pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
 #endif
 
 #if defined(HAVE_NEON_AARCH64)
@@ -763,34 +796,8 @@
       McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
   }
 }
-#endif
-
-void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
-  static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
-
-  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
-    McCopyWidthEq16_c,     McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
-    McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
-    McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
-    McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
-  };
-#if defined (X86_ASM)
-  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
-    McCopyWidthEq16_sse2,     McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
-    McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
-    McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
-    McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
-  };
-#endif
-#if defined(HAVE_NEON)
-  static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
-    McCopyWidthEq16_neon,        McHorVer10WidthEq16_neon,   McHorVer20WidthEq16_neon,    McHorVer30WidthEq16_neon,
-    McHorVer01WidthEq16_neon,    EncMcHorVer11_neon,         EncMcHorVer21_neon,          EncMcHorVer31_neon,
-    McHorVer02WidthEq16_neon,    EncMcHorVer12_neon,         McHorVer22WidthEq16_neon,    EncMcHorVer32_neon,
-    McHorVer03WidthEq16_neon,    EncMcHorVer13_neon,         EncMcHorVer23_neon,          EncMcHorVer33_neon
-  };
-#endif
-#if defined(HAVE_NEON_AARCH64)
+void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+                             int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
   static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
     McCopyWidthEq16_AArch64_neon,        McHorVer10WidthEq16_AArch64_neon,   McHorVer20WidthEq16_AArch64_neon,    McHorVer30WidthEq16_AArch64_neon,
     McHorVer01WidthEq16_AArch64_neon,    EncMcHorVer11_AArch64_neon,         EncMcHorVer21_AArch64_neon,          EncMcHorVer31_AArch64_neon,
@@ -797,13 +804,20 @@
     McHorVer02WidthEq16_AArch64_neon,    EncMcHorVer12_AArch64_neon,         McHorVer22WidthEq16_AArch64_neon,    EncMcHorVer32_AArch64_neon,
     McHorVer03WidthEq16_AArch64_neon,    EncMcHorVer13_AArch64_neon,         EncMcHorVer23_AArch64_neon,          EncMcHorVer33_AArch64_neon
   };
+  uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+  pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
 #endif
+
+void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
+  static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
+
   pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
   pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
   pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
   memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
   pMcFuncs->pfChromaMc	= McChroma_c;
-  memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
+  pMcFuncs->pfLumaMc	= McLuma_c;
 #if defined (X86_ASM)
   if (uiCpuFlag & WELS_CPU_SSE2) {
     pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
@@ -812,7 +826,7 @@
     pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
     pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
     pMcFuncs->pfChromaMc = McChroma_sse2;
-    memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
+    pMcFuncs->pfLumaMc = McLuma_sse2;
   }
 
   if (uiCpuFlag & WELS_CPU_SSSE3) {
@@ -823,7 +837,7 @@
 
 #if defined(HAVE_NEON)
   if (uiCpuFlag & WELS_CPU_NEON) {
-    memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon));
+    pMcFuncs->pfLumaMc          = EncMcLuma_neon;
     pMcFuncs->pfChromaMc	= EncMcChroma_neon;
     pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
     pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
@@ -834,8 +848,7 @@
 #endif
 #if defined(HAVE_NEON_AARCH64)
   if (uiCpuFlag & WELS_CPU_NEON) {
-    memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon,
-            sizeof (pWelsMcFuncWidthEq16_AArch64_neon));
+    pMcFuncs->pfLumaMc          = EncMcLuma_AArch64_neon;
     pMcFuncs->pfChromaMc	= EncMcChroma_AArch64_neon;
     pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
     pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -1247,7 +1247,7 @@
     pDstCr	= pMbCache->pMemPredChroma + 64;
   }
   //MC
-  pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma, iLineSizeY, pDstLuma, 16, 16);
+  pFunc->sMcFuncs.pfLumaMc (pRefLuma, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16);
   pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb
   pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr
 
@@ -1313,7 +1313,6 @@
   uint8_t* pDstCr   = pMbCache->pSkipMb + 256 + 64;
 
   SMVUnitXY sMvp = { 0 };
-  uint8_t uiMvpIdx;
   int32_t n;
 
   int32_t iEncStride		= pCurLayer->iEncStride[0];
@@ -1343,8 +1342,7 @@
 
   //luma
   pRefLuma += sQpelMvp.iMvY * iLineSizeY + sQpelMvp.iMvX;
-  uiMvpIdx = ((sMvp.iMvY & 0x03) << 2) + (sMvp.iMvX & 0x03);
-  pFunc->sMcFuncs.pfLumaQuarpelMc[uiMvpIdx] (pRefLuma, iLineSizeY, pDstLuma, 16, 16);
+  pFunc->sMcFuncs.pfLumaMc (pRefLuma, iLineSizeY, pDstLuma, 16, sMvp.iMvX, sMvp.iMvY, 16, 16);
   iSadCostLuma    = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0],
                     pCurLayer->iEncStride[0], pDstLuma, 16);
 
--- a/codec/encoder/core/src/svc_mode_decision.cpp
+++ b/codec/encoder/core/src/svc_mode_decision.cpp
@@ -414,7 +414,7 @@
     pDstCr	= pMbCache->pMemPredChroma + 64;
   }
   //MC
-  pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 16);
+  pFunc->sMcFuncs.pfLumaMc (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16);
   pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8);
   pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8);
 
--- a/test/encoder/EncUT_MotionCompensation.cpp
+++ b/test/encoder/EncUT_MotionCompensation.cpp
@@ -4,17 +4,10 @@
 #include "cpu.h"
 using namespace WelsEnc;
 
-static void McLumaFunc (SMcFunc* pFuncs, const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
-                        int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
-  uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
-  ASSERT_EQ (iWidth, 16);
-  pFuncs->pfLumaQuarpelMc[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
 #define InitMcFunc WelsInitMcFuncs
 
 #define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \
-  McLumaFunc (funcs, src, srcstride, dst, dststride, mvx, mvy, width, height)
+  sMcFunc.pfLumaMc (src, srcstride, dst, dststride, mvx, mvy, width, height)
 
 #define CHROMA_FUNC sMcFunc.pfChromaMc