shithub: openh264

Download patch

ref: a4eea4c64dd4ea8f31794a47ffc6f69eec970f91
parent: 7486bb11001ac61257d642e7908396adac5b812b
parent: 11c9f2037df4b487ee96e1a5bb3d2c1deac998a0
author: huili2 <[email protected]>
date: Mon Aug 25 12:38:16 EDT 2014

Merge pull request #1299 from ruil2/mvcost_check_1

fix crash on mvd cost calculation

--- a/codec/encoder/core/inc/svc_motion_estimate.h
+++ b/codec/encoder/core/inc/svc_motion_estimate.h
@@ -200,9 +200,9 @@
 
 // Cross Search Basics
 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                       uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                       uint16_t* pMvdTable,
                        const int32_t kiEncStride, const int32_t kiRefStride,
-                       const int32_t kiMinPos, const int32_t kiMaxPos,
+                       const int16_t kiMinMv, const int16_t kiMaxMv,
                        const bool bVerticalSearch);
 #ifdef X86_ASM
 extern "C"
@@ -212,14 +212,14 @@
 }
 
 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                                   uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                                   uint16_t* pMvdTable,
                                    const int32_t kiEncStride, const int32_t kiRefStride,
-                                   const int32_t kiMinPos, const int32_t kiMaxPos,
+                                   const int16_t kiMinMv, const int16_t kiMaxMv,
                                    const bool bVerticalSearch);
 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                                     uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                                     uint16_t* pMvdTable,
                                      const int32_t kiEncStride, const int32_t kiRefStride,
-                                     const int32_t kiMinPos, const int32_t kiMaxPos,
+                                     const int16_t kiMinMv, const int16_t kiMaxMv,
                                      const bool bVerticalSearch);
 #endif
 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -155,9 +155,9 @@
                                      const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
                                      int32_t& iBestSadCost);
 typedef void (*PLineFullSearchFunc) (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                                     uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                                     uint16_t* pMvdTable,
                                      const int32_t kiEncStride, const int32_t kiRefStride,
-                                     const int32_t kiMinPos, const int32_t kiMaxPos,
+                                     const int16_t kiMinMv, const int16_t kiMaxMv,
                                      const bool bVerticalSearch);
 typedef void (*PInitializeHashforFeatureFunc) (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
                     uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -422,13 +422,23 @@
   }
 }
 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                                   uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                                   uint16_t* pMvdTable,
                                    const int32_t kiEncStride, const int32_t kiRefStride,
-                                   const int32_t kiMinPos, const int32_t kiMaxPos,
+                                   const int16_t kiMinMv, const int16_t kiMaxMv,
                                    const bool bVerticalSearch) {
   uint8_t*  kpEncMb = pMe->pEncMb;
   const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
-  uint8_t* pRef         = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiRefStride];
+  uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv * kiRefStride];
+
+  const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
+
+  int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
+  int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
+  int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
+  uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvY]);
+  int16_t iStartMv = 0;
+
+
   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
   const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
@@ -438,7 +448,7 @@
   PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
       TransposeMatrixBlocksx8_mmx;
 
-  const int32_t kiDiff   = kiMaxPos - kiMinPos;
+  const int32_t kiDiff   = iMaxPos - iMinPos;
   const int32_t kiRowNum  = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
   const int32_t kiBlocksNum  = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
   int32_t iCountLoop8  = (kiRowNum - kiEdgeBlocks) >> 3;
@@ -451,7 +461,7 @@
   TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
   TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
-  int32_t iTargetPos   = kiMinPos;
+  int32_t iTargetPos   = iMinPos;
   int16_t iBestPos    = pMe->sMv.iMvX;
   uint32_t uiBestCost   = pMe->uiSadCost;
   uint32_t uiCostMin;
@@ -460,7 +470,7 @@
   pRef = &uiMatrixRef[0][0];
 
   while (iCountLoop8 > 0) {
-    CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
+    CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
     uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
     if (uiCostMin < uiBestCost) {
       uiBestCost = uiCostMin;
@@ -468,18 +478,20 @@
     }
     iTargetPos += 8;
     pRef += 8;
+    iStartMv += 8;
     -- iCountLoop8;
   }
   if (kiRemainingVectors > 0) {
     kpEncMb = pMe->pEncMb;
     pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
-    while (iTargetPos < kiMaxPos) {
-      const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
-      uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
+    while (iTargetPos < iMaxPos) {
+      const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
+      uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
       if (uiSadCost < uiBestCost) {
         uiBestCost = uiSadCost;
         iBestPos = iTargetPos;
       }
+      iStartMv++;
       pRef += kiRefStride;
       ++iTargetPos;
     }
@@ -493,21 +505,27 @@
 }
 
 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                                     uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                                     uint16_t* pMvdTable,
                                      const int32_t kiEncStride, const int32_t kiRefStride,
-                                     const int32_t kiMinPos, const int32_t kiMaxPos,
+                                     const int16_t kiMinMv, const int16_t kiMaxMv,
                                      const bool bVerticalSearch) {
   uint8_t* kpEncMb = pMe->pEncMb;
-  const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixX;
-  uint8_t* pRef         = &pMe->pColoRefMb[kiMinPos - kiCurMeBlockPix];
+
+  const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
+  int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
+  int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
+  int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
+  uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv << 2) - pMe->sMvp.iMvX]);
+  int16_t iStartMv = 0;
+  uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv];
   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
-  const int32_t kiNumVector = kiMaxPos - kiMinPos;
+  const int32_t kiNumVector = iMaxPos - iMinPos;
   int32_t iCountLoop8 = kiNumVector >> 3;
   const int32_t kiRemainingLoop8 = kiNumVector & 7;
-  int32_t iTargetPos   = kiMinPos;
+  int32_t iTargetPos   = iMinPos;
   int16_t iBestPos    = pMe->sMv.iMvX;
   uint32_t uiBestCost   = pMe->uiSadCost;
   uint32_t uiCostMin;
@@ -514,7 +532,7 @@
   int32_t iIndexMinPos;
 
   while (iCountLoop8 > 0) {
-    CalcMvdCostx8_c (uiBaseCost, iTargetPos, pMvdTable, kiFixedMvd);
+    CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
     uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
     if (uiCostMin < uiBestCost) {
       uiBestCost = uiCostMin;
@@ -522,16 +540,18 @@
     }
     iTargetPos += 8;
     pRef += 8;
+    iStartMv += 8;
     -- iCountLoop8;
   }
   if (kiRemainingLoop8 > 0) {
-    while (iTargetPos < kiMaxPos) {
-      const uint16_t pMvdCost = pMvdTable[iTargetPos << 2];
-      uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + pMvdCost);
+    while (iTargetPos < iMaxPos) {
+      const uint16_t uiMvdCost = pMvdCost[iStartMv << 2];
+      uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
       if (uiSadCost < uiBestCost) {
         uiBestCost = uiSadCost;
         iBestPos = iTargetPos;
       }
+      iStartMv++;
       ++pRef;
       ++iTargetPos;
     }
@@ -538,7 +558,7 @@
   }
   if (uiBestCost < pMe->uiSadCost) {
     SMVUnitXY sBestMv;
-    sBestMv.iMvX = iBestPos - kiCurMeBlockPix;
+    sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
     sBestMv.iMvY = 0;
     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
   }
@@ -545,33 +565,55 @@
 }
 #endif
 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
-                       uint16_t* pMvdTable, const int32_t kiFixedMvd,
+                       uint16_t* pMvdTable,
                        const int32_t kiEncStride, const int32_t kiRefStride,
-                       const int32_t kiMinPos, const int32_t kiMaxPos,
+                       const int16_t iMinMv, const int16_t iMaxMv,
                        const bool bVerticalSearch) {
   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
-  const int32_t kiCurMeBlockPix  = bVerticalSearch ? pMe->iCurMeBlockPixY : pMe->iCurMeBlockPixX;
-  const int32_t kiStride = bVerticalSearch ? kiRefStride : 1;
-  uint8_t* pRef            = &pMe->pColoRefMb[ (kiMinPos - kiCurMeBlockPix) * kiStride];
-  uint16_t* pMvdCost  = & (pMvdTable[kiMinPos << 2]);
+  const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
+  const int32_t kiCurMeBlockQpelPixX = ((kiCurMeBlockPixX) << 2);
+  const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
+  const int32_t kiCurMeBlockQpelPixY = ((kiCurMeBlockPixY) << 2);
+  int32_t iMinPos, iMaxPos;
+  int32_t iFixedMvd;
+  int32_t iCurMeBlockPix;
+  int32_t iStride;
+  uint16_t* pMvdCost;
+
+  if (bVerticalSearch) {
+    iMinPos = kiCurMeBlockPixY + iMinMv;
+    iMaxPos = kiCurMeBlockPixY + iMaxMv;
+    iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
+    iCurMeBlockPix = pMe->iCurMeBlockPixY;
+    iStride = kiRefStride;
+    pMvdCost  = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvY]);
+  } else {
+    iMinPos = kiCurMeBlockPixX + iMinMv;
+    iMaxPos = kiCurMeBlockPixX + iMaxMv;
+    iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
+    iCurMeBlockPix = pMe->iCurMeBlockPixX;
+    iStride = 1;
+    pMvdCost  = & (pMvdTable[ (iMinMv << 2) - pMe->sMvp.iMvX]);
+  }
+  uint8_t* pRef            = &pMe->pColoRefMb[ iMinMv * iStride];
   uint32_t uiBestCost    = 0xFFFFFFFF;
   int32_t iBestPos       = 0;
 
-  for (int32_t iTargetPos = kiMinPos; iTargetPos < kiMaxPos; ++ iTargetPos) {
+  for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
     uint8_t* const kpEncMb  = pMe->pEncMb;
-    uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (kiFixedMvd + *pMvdCost);
+    uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
     if (uiSadCost < uiBestCost) {
       uiBestCost  = uiSadCost;
       iBestPos  = iTargetPos;
     }
-    pRef += kiStride;
+    pRef += iStride;
     pMvdCost += 4;
   }
 
   if (uiBestCost < pMe->uiSadCost) {
     SMVUnitXY sBestMv;
-    sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - kiCurMeBlockPix);
-    sBestMv.iMvY = bVerticalSearch ? (iBestPos - kiCurMeBlockPix) : 0;
+    sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
+    sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
   }
 }
@@ -581,30 +623,24 @@
   PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
   PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
 
-  const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
-  const int32_t iCurMeBlockQpelPixX = ((iCurMeBlockPixX) << 2);
-  const int32_t iCurMeBlockPixY = pMe->iCurMeBlockPixY;
-  const int32_t iCurMeBlockQpelPixY = ((iCurMeBlockPixY) << 2);
-  uint16_t* pMvdCostX = pMe->pMvdCost - iCurMeBlockQpelPixX - pMe->sMvp.iMvX;//do the offset here instead of in the search
-  uint16_t* pMvdCostY = pMe->pMvdCost - iCurMeBlockQpelPixY - pMe->sMvp.iMvY;//do the offset here instead of in the search
-
   //vertical search
   pfVerticalFullSearchFunc (pFuncList, pMe,
-                            pMvdCostY, pMvdCostX[ iCurMeBlockQpelPixX ],
+                            pMe->pMvdCost,
                             kiEncStride, kiRefStride,
-                            iCurMeBlockPixY + pSlice->sMvStartMin.iMvY,
-                            iCurMeBlockPixY + pSlice->sMvStartMax.iMvY, true);
+                            pSlice->sMvStartMin.iMvY,
+                            pSlice->sMvStartMax.iMvY, true);
 
   //horizontal search
   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
     pfHorizontalFullSearchFunc (pFuncList, pMe,
-                                pMvdCostX, pMvdCostY[ iCurMeBlockQpelPixY ],
+                                pMe->pMvdCost,
                                 kiEncStride, kiRefStride,
-                                iCurMeBlockPixX + pSlice->sMvStartMin.iMvX,
-                                iCurMeBlockPixX + pSlice->sMvStartMax.iMvX,
+                                pSlice->sMvStartMin.iMvX,
+                                pSlice->sMvStartMax.iMvX,
                                 false);
   }
 }
+
 
 /////////////////////////
 // Feature Search Basics
--- a/test/encoder/EncUT_MotionEstimate.cpp
+++ b/test/encoder/EncUT_MotionEstimate.cpp
@@ -174,7 +174,6 @@
       delete m_pMa;
       m_pMa = NULL;
     }
-
   }
  public:
   uint8_t* m_pRefStart;
@@ -206,7 +205,7 @@
   SWelsME sMe;
   SSlice sSlice;
   const uint8_t kuiQp = rand() % 52;
-  InitMe (kuiQp, m_uiMvdInterTableStride, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
+  InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
 
   WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
 
@@ -244,6 +243,50 @@
 
 }
 
+TEST_F (MotionEstimateRangeTest, TestWelsMotionCrossSearch) {
+
+  SWelsFuncPtrList sFuncList;
+  SWelsME sMe;
+  SSlice sSlice;
+  int32_t iUsageType = 1;
+  uint8_t* pRef = m_pRefStart + PADDING_LENGTH * m_iWidthExt + PADDING_LENGTH;
+  const int32_t kiMaxBlock16Sad = 72000;//a rough number
+
+  WelsInitSampleSadFunc (&sFuncList, 0); //test c functions
+  WelsInitMeFunc (&sFuncList, 0, iUsageType);
+
+  RandomPixelDataGenerator (m_pSrc, m_iWidth, m_iHeight, m_iWidth);
+  RandomPixelDataGenerator (m_pRefStart, m_iWidthExt, m_iHeightExt, m_iWidthExt);
+
+  sMe.uiBlockSize = BLOCK_16x16; //
+  for (int32_t iMby = 0; iMby < m_iMbHeight; iMby++) {
+    for (int32_t iMbx = 0; iMbx < m_iMbWidth; iMbx++) {
+
+      const uint8_t kuiQp = rand() % 52;
+
+      InitMe (kuiQp, m_uiMvdInterTableSize, m_uiMvdInterTableStride, m_pMvdCostTable, &sMe);
+      SetMvWithinIntegerMvRange (m_iMbWidth, m_iMbHeight, iMbx , iMby, m_iMvRange,
+                                 & (sSlice.sMvStartMin), & (sSlice.sMvStartMax));
+
+
+      sMe.sMvp.iMvX = rand() % m_iMvRange;
+      sMe.sMvp.iMvY = rand() % m_iMvRange;
+      sMe.iCurMeBlockPixX = (iMbx << 4);
+      sMe.iCurMeBlockPixY = (iMby << 4);
+      sMe.pRefMb = pRef + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidthExt;
+      sMe.pEncMb = m_pSrc + sMe.iCurMeBlockPixX + sMe.iCurMeBlockPixY * m_iWidth;;
+      sMe.uiSadCost = sMe.uiSatdCost = kiMaxBlock16Sad;
+      sMe.pColoRefMb = sMe.pRefMb;
+      WelsMotionCrossSearch (&sFuncList, &sMe, &sSlice, m_iWidth, m_iWidthExt);
+      if ((WELS_ABS (sMe.sMv.iMvX) > m_iMvRange))
+        printf ("mvx = %d\n", sMe.sMv.iMvX);
+      ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvX) > m_iMvRange));
+      if ((WELS_ABS (sMe.sMv.iMvY) > m_iMvRange))
+        printf ("mvy = %d\n", sMe.sMv.iMvY);
+      ASSERT_TRUE (! (WELS_ABS (sMe.sMv.iMvY) > m_iMvRange));
+    }
+  }
+}
 void MotionEstimateTest::DoLineTest (PLineFullSearchFunc func, bool vertical) {
   const int32_t kiMaxBlock16Sad = 72000;//a rough number
   SWelsFuncPtrList sFuncList;
@@ -295,18 +338,22 @@
     uint16_t* pMvdCostY = sMe.pMvdCost - iCurMeBlockQpelPixY - sMe.sMvp.iMvY;
     uint16_t* pMvdCost = vertical ? pMvdCostY : pMvdCostX;
     int iSize = vertical ? m_iHeight : m_iWidth;
-    int iFixedMvd = vertical ? pMvdCostX[ iCurMeBlockQpelPixX ] : pMvdCostY[ iCurMeBlockQpelPixY ];
-    func (&sFuncList, &sMe,
-          pMvdCost, iFixedMvd,
-          m_iMaxSearchBlock, m_iWidth,
-          INTPEL_NEEDED_MARGIN,
-          iSize - INTPEL_NEEDED_MARGIN - 16, vertical);
 
     //the last selection may be affected by MVDcost, that is when smaller MvY will be better
     if (vertical) {
+      func (&sFuncList, &sMe,
+            pMvdCost,
+            m_iMaxSearchBlock, m_iWidth,
+            INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixY,
+            iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixY, vertical);
       bFoundMatch = (sMe.sMv.iMvX == 0
                      && (sMe.sMv.iMvY == sTargetMv.iMvY || abs (sMe.sMv.iMvY) < abs (sTargetMv.iMvY)));
     } else {
+      func (&sFuncList, &sMe,
+            pMvdCost,
+            m_iMaxSearchBlock, m_iWidth,
+            INTPEL_NEEDED_MARGIN - sMe.iCurMeBlockPixX,
+            iSize - INTPEL_NEEDED_MARGIN - 16 - sMe.iCurMeBlockPixX, vertical);
       bFoundMatch = (sMe.sMv.iMvY == 0
                      && (sMe.sMv.iMvX == sTargetMv.iMvX || abs (sMe.sMv.iMvX) < abs (sTargetMv.iMvX)));
     }