ref: a4eea4c64dd4ea8f31794a47ffc6f69eec970f91
parent: 7486bb11001ac61257d642e7908396adac5b812b
parent: 11c9f2037df4b487ee96e1a5bb3d2c1deac998a0
author: huili2 <[email protected]>
date: Mon Aug 25 12:38:16 EDT 2014
Merge pull request #1299 from ruil2/mvcost_check_1 fix crash on mvd cost calculation
--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -200,9 +200,9 @@
// Cross Search Basics
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
#ifdef X86_ASM
extern "C"
@@ -212,14 +212,14 @@
}
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
#endif
void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -155,9 +155,9 @@
const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
int32_t& iBestSadCost);
typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch);
typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -422,13 +422,23 @@
}
}
void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch) {
uint8_t* kpEncMb = pMe->pEncMb;
const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
- uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiRefStride];
+ uint8_t* pRef = &pMe->pColoRefMb[kiMinMv * kiRefStride];
+
+ const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
+
+ int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
+ int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
+ int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
+ uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvY]);
+ int16_t iStartMv = 0;
+
+
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
@@ -438,7 +448,7 @@
PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
TransposeMatrixBlocksx8_mmx;
- const int32_t kiDiff = kiMaxPos - kiMinPos;
+ const int32_t kiDiff = iMaxPos - iMinPos;
const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3;
@@ -451,7 +461,7 @@
TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
- int32_t iTargetPos = kiMinPos;
+ int32_t iTargetPos = iMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
@@ -460,7 +470,7 @@
pRef = &uiMatrixRef[0][0];
while (iCountLoop8 > 0) {
- CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
+ CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
@@ -468,18 +478,20 @@
}
iTargetPos += 8;
pRef += 8;
+ iStartMv += 8;
-- iCountLoop8;
}
if (kiRemainingVectors > 0) {
kpEncMb = pMe->pEncMb;
pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
- while (iTargetPos < kiMaxPos) {
- const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
- uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
+ while (iTargetPos < iMaxPos) {
+ const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
+ uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
+ iStartMv++;
pRef += kiRefStride;
++iTargetPos;
}
@@ -493,21 +505,27 @@
}
void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t kiMinMv, const int16_t kiMaxMv,
const bool bVerticalSearch) {
uint8_t* kpEncMb = pMe->pEncMb;
- const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
- uint8_t* pRef = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
+
+ const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
+ int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
+ int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
+ int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
+ uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvX]);
+ int16_t iStartMv = 0;
+ uint8_t* pRef = &pMe->pColoRefMb[kiMinMv];
const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
- const int32_t kiNumVector = kiMaxPos - kiMinPos;
+ const int32_t kiNumVector = iMaxPos - iMinPos;
int32_t iCountLoop8 = kiNumVector >> 3;
const int32_t kiRemainingLoop8 = kiNumVector & 7;
- int32_t iTargetPos = kiMinPos;
+ int32_t iTargetPos = iMinPos;
int16_t iBestPos = pMe->sMv.iMvX;
uint32_t uiBestCost = pMe->uiSadCost;
uint32_t uiCostMin;
@@ -514,7 +532,7 @@
int32_t iIndexMinPos;
while (iCountLoop8 > 0) {
- CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
+ CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
if (uiCostMin < uiBestCost) {
uiBestCost = uiCostMin;
@@ -522,16 +540,18 @@
}
iTargetPos += 8;
pRef += 8;
+ iStartMv += 8;
-- iCountLoop8;
}
if (kiRemainingLoop8 > 0) {
- while (iTargetPos < kiMaxPos) {
- const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
- uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
+ while (iTargetPos < iMaxPos) {
+ const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
+ uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
+ iStartMv++;
++pRef;
++iTargetPos;
}
@@ -538,7 +558,7 @@
}
if (uiBestCost < pMe->uiSadCost) {
SMVUnitXY sBestMv;
- sBestMv.iMvX = iBestPos - kiCurMeBlockPix;
+ sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
sBestMv.iMvY = 0;
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
}
@@ -545,33 +565,55 @@
}
#endif
void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
- uint16_t* pMvdTable, const int32_t kiFixedMvd,
+ uint16_t* pMvdTable,
const int32_t kiEncStride, const int32_t kiRefStride,
- const int32_t kiMinPos, const int32_t kiMaxPos,
+ const int16_t iMinMv, const int16_t iMaxMv,
const bool bVerticalSearch) {
PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
- const int32_t kiCurMeBlockPix = bVerticalSearch ? pMe->iCurMeBlockPixY : pMe->iCurMeBlockPixX;
- const int32_t kiStride = bVerticalSearch ? kiRefStride : 1;
- uint8_t* pRef = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiStride];
- uint16_t* pMvdCost = & (pMvdTable[kiMinPos << 2]);
+ const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
+ const int32_t kiCurMeBlockQpelPixX = ((kiCurMeBlockPixX) << 2);
+ const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
+ const int32_t kiCurMeBlockQpelPixY = ((kiCurMeBlockPixY) << 2);
+ int32_t iMinPos, iMaxPos;
+ int32_t iFixedMvd;
+ int32_t iCurMeBlockPix;
+ int32_t iStride;
+ uint16_t* pMvdCost;
+
+ if (bVerticalSearch) {
+ iMinPos = kiCurMeBlockPixY + iMinMv;
+ iMaxPos = kiCurMeBlockPixY + iMaxMv;
+ iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
+ iCurMeBlockPix = pMe->iCurMeBlockPixY;
+ iStride = kiRefStride;
+ pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvY]);
+ } else {
+ iMinPos = kiCurMeBlockPixX + iMinMv;
+ iMaxPos = kiCurMeBlockPixX + iMaxMv;
+ iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
+ iCurMeBlockPix = pMe->iCurMeBlockPixX;
+ iStride = 1;
+ pMvdCost = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvX]);
+ }
+ uint8_t* pRef = &pMe->pColoRefMb[ iMinMv * iStride];
uint32_t uiBestCost = 0xFFFFFFFF;
int32_t iBestPos = 0;
- for (int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos) {
+ for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
uint8_t* const kpEncMb = pMe->pEncMb;
- uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + *pMvdCost);
+ uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
if (uiSadCost < uiBestCost) {
uiBestCost = uiSadCost;
iBestPos = iTargetPos;
}
- pRef += kiStride;
+ pRef += iStride;
pMvdCost += 4;
}
if (uiBestCost < pMe->uiSadCost) {
SMVUnitXY sBestMv;
- sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - kiCurMeBlockPix);
- sBestMv.iMvY = bVerticalSearch ? (iBestPos - kiCurMeBlockPix) : 0;
+ sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
+ sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
}
}
@@ -581,30 +623,24 @@
PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
- const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
- const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX) << 2);
- const int32_t iCurMeBlockPixY = pMe->iCurMeBlockPixY;
- const int32_t iCurMeBlockQpelPixY = ((iCurMeBlockPixY) << 2);
- uint16_t* pMvdCostX = pMe->pMvdCost - iCurMeBlockQpelPixX - pMe->sMvp.iMvX;//do the offset here instead of in the search
- uint16_t* pMvdCostY = pMe->pMvdCost - iCurMeBlockQpelPixY - pMe->sMvp.iMvY;//do the offset here instead of in the search
-
//vertical search
pfVerticalFullSearchFunc (pFuncList, pMe,
- pMvdCostY, pMvdCostX[ iCurMeBlockQpelPixX ],
+ pMe->pMvdCost,
kiEncStride, kiRefStride,
- iCurMeBlockPixY + pSlice->sMvStartMin.iMvY,
- iCurMeBlockPixY + pSlice->sMvStartMax.iMvY, true);
+ pSlice->sMvStartMin.iMvY,
+ pSlice->sMvStartMax.iMvY, true);
//horizontal search
if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
pfHorizontalFullSearchFunc (pFuncList, pMe,
- pMvdCostX, pMvdCostY[ iCurMeBlockQpelPixY ],
+ pMe->pMvdCost,
kiEncStride, kiRefStride,
- iCurMeBlockPixX + pSlice->sMvStartMin.iMvX,
- iCurMeBlockPixX + pSlice->sMvStartMax.iMvX,
+ pSlice->sMvStartMin.iMvX,
+ pSlice->sMvStartMax.iMvX,
false);
}
}
+
/////////////////////////
// Feature Search Basics
--- a/test/encoder/EncUT_MotionEstimate.cpp
+++ b/test/encoder/EncUT_MotionEstimate.cpp
@@ -174,7 +174,6 @@
delete m_pMa;
m_pMa = NULL;
}
-
}
public:
uint8_t* m_pRefStart;
@@ -206,7 +205,7 @@
SWelsME sMe;
SSlice sSlice;
const uint8_t kuiQp = rand() % 52;
- InitMe (kuiQp, m_uiMvdInterTableStride, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
+ InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
@@ -244,6 +243,50 @@
}
+TEST_F (MotionEstimateRangeTest, TestWelsMotionCrossSearch) {
+
+ SWelsFuncPtrList sFuncList;
+ SWelsME sMe;
+ SSlice sSlice;
+ int32_t iUsageType = 1;
+ uint8_t* pRef = m_pRefStart + PADDING_LENGTH * m_iWidthExt + PADDING_LENGTH;
+ const int32_t kiMaxBlock16Sad = 72000;//a rough number
+
+ WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
+ WelsInitMeFunc (&sFuncList, 0, iUsageType);
+
+ RandomPixelDataGenerator (m_pSrc, m_iWidth, m_iHeight, m_iWidth);
+ RandomPixelDataGenerator (m_pRefStart, m_iWidthExt, m_iHeightExt, m_iWidthExt);
+
+ sMe.uiBlockSize = BLOCK_16x16; //
+ for (int32_t iMby = 0; iMby < m_iMbHeight; iMby++) {
+ for (int32_t iMbx = 0; iMbx < m_iMbWidth; iMbx++) {
+
+ const uint8_t kuiQp = rand() % 52;
+
+ InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
+ SetMvWithinIntegerMvRange (m_iMbWidth, m_iMbHeight, iMbx , iMby, m_iMvRange,
+ & (sSlice.sMvStartMin), & (sSlice.sMvStartMax));
+
+
+ sMe.sMvp.iMvX = rand() % m_iMvRange;
+ sMe.sMvp.iMvY = rand() % m_iMvRange;
+ sMe.iCurMeBlockPixX = (iMbx << 4);
+ sMe.iCurMeBlockPixY = (iMby << 4);
+ sMe.pRefMb = pRef + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidthExt;
+ sMe.pEncMb = m_pSrc + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidth;;
+ sMe.uiSadCost = sMe.uiSatdCost = kiMaxBlock16Sad;
+ sMe.pColoRefMb = sMe.pRefMb;
+ WelsMotionCrossSearch (&sFuncList, &sMe, &sSlice, m_iWidth, m_iWidthExt);
+ if ((WELS_ABS (sMe.sMv.iMvX) > m_iMvRange))
+ printf ("mvx = %d\n", sMe.sMv.iMvX);
+ ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvX) > m_iMvRange));
+ if ((WELS_ABS (sMe.sMv.iMvY) > m_iMvRange))
+ printf ("mvy = %d\n", sMe.sMv.iMvY);
+ ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvY) > m_iMvRange));
+ }
+ }
+}
void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
const int32_t kiMaxBlock16Sad = 72000;//a rough number
SWelsFuncPtrList sFuncList;
@@ -295,18 +338,22 @@
uint16_t* pMvdCostY = sMe.pMvdCost - iCurMeBlockQpelPixY - sMe.sMvp.iMvY;
uint16_t* pMvdCost = vertical ? pMvdCostY : pMvdCostX;
int iSize = vertical ? m_iHeight : m_iWidth;
- int iFixedMvd = vertical ? pMvdCostX[ iCurMeBlockQpelPixX ] : pMvdCostY[ iCurMeBlockQpelPixY ];
- func (&sFuncList, &sMe,
- pMvdCost, iFixedMvd,
- m_iMaxSearchBlock, m_iWidth,
- INTPEL_NEEDED_MARGIN,
- iSize - INTPEL_NEEDED_MARGIN - 16, vertical);
//the last selection may be affected by MVDcost, that is when smaller MvY will be better
if (vertical) {
+ func (&sFuncList, &sMe,
+ pMvdCost,
+ m_iMaxSearchBlock, m_iWidth,
+ INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixY,
+ iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixY, vertical);
bFoundMatch = (sMe.sMv.iMvX == 0
&& (sMe.sMv.iMvY == sTargetMv.iMvY || abs (sMe.sMv.iMvY) < abs (sTargetMv.iMvY)));
} else {
+ func (&sFuncList, &sMe,
+ pMvdCost,
+ m_iMaxSearchBlock, m_iWidth,
+ INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixX,
+ iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixX, vertical);
bFoundMatch = (sMe.sMv.iMvY == 0
&& (sMe.sMv.iMvX == sTargetMv.iMvX || abs (sMe.sMv.iMvX) < abs (sTargetMv.iMvX)));
}