ref: 1cf68cad1393ff918894e4da76954850e80dc9e2
parent: a8262dd9e606f99e3b02f8cbc7a03d3c5ea9c977
author: Martin Storsjö <[email protected]>
date: Tue Jan 27 06:47:48 EST 2015
Move the qpel function table into the luma function itself This unifies the luma MC interface to match the decoder side.
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -89,7 +89,7 @@
PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
PWelsMcFunc pfChromaMc;
- PWelsLumaQuarpelMcFunc pfLumaQuarpelMc[16];
+ PWelsMcFunc pfLumaMc;
PWelsSampleAveragingFunc pfSampleAveraging[2];
} SMcFunc;
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -395,6 +395,17 @@
}
}
}
+void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
+ McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
+ McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
+ McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
+ McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
+ };
+ uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+ pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
//***************************************************************************//
// MMXEXT and SSE2 implementation //
//***************************************************************************//
@@ -575,6 +586,17 @@
}
+void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
+ McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
+ McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
+ McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
+ McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
+ };
+ uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+ pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
#endif //X86_ASM
//***************************************************************************//
@@ -666,6 +688,17 @@
McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
}
}
+void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
+ McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
+ McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
+ McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
+ McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon
+ };
+ uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+ pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
#endif
#if defined(HAVE_NEON_AARCH64)
@@ -763,34 +796,8 @@
McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
}
}
-#endif
-
-void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
- static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
-
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
- McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
- McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
- McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
- McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
- };
-#if defined (X86_ASM)
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
- McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
- McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
- McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
- McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
- };
-#endif
-#if defined(HAVE_NEON)
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
- McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
- McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
- McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
- McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon
- };
-#endif
-#if defined(HAVE_NEON_AARCH64)
+void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
McCopyWidthEq16_AArch64_neon, McHorVer10WidthEq16_AArch64_neon, McHorVer20WidthEq16_AArch64_neon, McHorVer30WidthEq16_AArch64_neon,
McHorVer01WidthEq16_AArch64_neon, EncMcHorVer11_AArch64_neon, EncMcHorVer21_AArch64_neon, EncMcHorVer31_AArch64_neon,
@@ -797,13 +804,20 @@
McHorVer02WidthEq16_AArch64_neon, EncMcHorVer12_AArch64_neon, McHorVer22WidthEq16_AArch64_neon, EncMcHorVer32_AArch64_neon,
McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon
};
+ uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
+ pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
#endif
+
+void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
+ static const PWelsSampleAveragingFunc pfPixAvgFunc[2] = {PixelAvgWidthEq8_c, PixelAvgWidthEq16_c};
+
pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
memcpy (pMcFuncs->pfSampleAveraging, pfPixAvgFunc, sizeof (pfPixAvgFunc));
pMcFuncs->pfChromaMc = McChroma_c;
- memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16, sizeof (pWelsMcFuncWidthEq16));
+ pMcFuncs->pfLumaMc = McLuma_c;
#if defined (X86_ASM)
if (uiCpuFlag & WELS_CPU_SSE2) {
pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
@@ -812,7 +826,7 @@
pMcFuncs->pfSampleAveraging[0] = PixelAvgWidthEq8_mmx;
pMcFuncs->pfSampleAveraging[1] = PixelAvgWidthEq16_sse2;
pMcFuncs->pfChromaMc = McChroma_sse2;
- memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_sse2, sizeof (pWelsMcFuncWidthEq16_sse2));
+ pMcFuncs->pfLumaMc = McLuma_sse2;
}
if (uiCpuFlag & WELS_CPU_SSSE3) {
@@ -823,7 +837,7 @@
#if defined(HAVE_NEON)
if (uiCpuFlag & WELS_CPU_NEON) {
- memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_neon, sizeof (pWelsMcFuncWidthEq16_neon));
+ pMcFuncs->pfLumaMc = EncMcLuma_neon;
pMcFuncs->pfChromaMc = EncMcChroma_neon;
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_neon;
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_neon;
@@ -834,8 +848,7 @@
#endif
#if defined(HAVE_NEON_AARCH64)
if (uiCpuFlag & WELS_CPU_NEON) {
- memcpy (pMcFuncs->pfLumaQuarpelMc, pWelsMcFuncWidthEq16_AArch64_neon,
- sizeof (pWelsMcFuncWidthEq16_AArch64_neon));
+ pMcFuncs->pfLumaMc = EncMcLuma_AArch64_neon;
pMcFuncs->pfChromaMc = EncMcChroma_AArch64_neon;
pMcFuncs->pfSampleAveraging[0] = PixStrideAvgWidthEq8_AArch64_neon;
pMcFuncs->pfSampleAveraging[1] = PixStrideAvgWidthEq16_AArch64_neon;
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -1247,7 +1247,7 @@
pDstCr = pMbCache->pMemPredChroma + 64;
}
//MC
- pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma, iLineSizeY, pDstLuma, 16, 16);
+ pFunc->sMcFuncs.pfLumaMc (pRefLuma, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16);
pFunc->sMcFuncs.pfChromaMc (pRefCb, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cb
pFunc->sMcFuncs.pfChromaMc (pRefCr, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8); //Cr
@@ -1313,7 +1313,6 @@
uint8_t* pDstCr = pMbCache->pSkipMb + 256 + 64;
SMVUnitXY sMvp = { 0 };
- uint8_t uiMvpIdx;
int32_t n;
int32_t iEncStride = pCurLayer->iEncStride[0];
@@ -1343,8 +1342,7 @@
//luma
pRefLuma += sQpelMvp.iMvY * iLineSizeY + sQpelMvp.iMvX;
- uiMvpIdx = ((sMvp.iMvY & 0x03) << 2) + (sMvp.iMvX & 0x03);
- pFunc->sMcFuncs.pfLumaQuarpelMc[uiMvpIdx] (pRefLuma, iLineSizeY, pDstLuma, 16, 16);
+ pFunc->sMcFuncs.pfLumaMc (pRefLuma, iLineSizeY, pDstLuma, 16, sMvp.iMvX, sMvp.iMvY, 16, 16);
iSadCostLuma = pFunc->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] (pMbCache->SPicData.pEncMb[0],
pCurLayer->iEncStride[0], pDstLuma, 16);
--- a/codec/encoder/core/src/svc_mode_decision.cpp
+++ b/codec/encoder/core/src/svc_mode_decision.cpp
@@ -414,7 +414,7 @@
pDstCr = pMbCache->pMemPredChroma + 64;
}
//MC
- pFunc->sMcFuncs.pfLumaQuarpelMc[0] (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 16);
+ pFunc->sMcFuncs.pfLumaMc (pRefLuma + iOffsetY, iLineSizeY, pDstLuma, 16, 0, 0, 16, 16);
pFunc->sMcFuncs.pfChromaMc (pRefCb + iOffsetUV, iLineSizeUV, pDstCb, 8, sMvp.iMvX, sMvp.iMvY, 8, 8);
pFunc->sMcFuncs.pfChromaMc (pRefCr + iOffsetUV, iLineSizeUV, pDstCr, 8, sMvp.iMvX, sMvp.iMvY, 8, 8);
--- a/test/encoder/EncUT_MotionCompensation.cpp
+++ b/test/encoder/EncUT_MotionCompensation.cpp
@@ -4,17 +4,10 @@
#include "cpu.h"
using namespace WelsEnc;
-static void McLumaFunc (SMcFunc* pFuncs, const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
- ASSERT_EQ (iWidth, 16);
- pFuncs->pfLumaQuarpelMc[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
#define InitMcFunc WelsInitMcFuncs
#define LUMA_FUNC(funcs, src, srcstride, dst, dststride, mvx, mvy, width, height) \
- McLumaFunc (funcs, src, srcstride, dst, dststride, mvx, mvy, width, height)
+ sMcFunc.pfLumaMc (src, srcstride, dst, dststride, mvx, mvy, width, height)
#define CHROMA_FUNC sMcFunc.pfChromaMc