shithub: openh264

Download patch

ref: fa9735b3314eba3eb2e0ae511909d79aab321a6a
parent: 8ea23ac2205b50f3eebc64f77dc6c3e2eea0f077
parent: 69983d6df4cbf5a6213a81873e7ab0ffb3b114f7
author: Licai Guo <[email protected]>
date: Tue Apr 1 06:23:24 EDT 2014

Merge pull request #602 from sijchen/fme_merge22

[Encoder ME] Add alternative search methods

--- a/codec/encoder/core/inc/picture.h
+++ b/codec/encoder/core/inc/picture.h
@@ -51,11 +51,13 @@
 
 typedef struct TagScreenContentStorage{
   SScreenBlockFeatureStorage	sRefBlockFeature[MAX_MULTI_REF_PIC_COUNT];
-	bool						bRefBlockFeatureCalculated; // flag of whether pre-process is done
+  uint32_t                    uiSadCostThreshold[BLOCK_SIZE_ALL];
+
+	bool					bRefBlockFeatureCalculated; // flag of whether pre-process is done
 	uint8_t				uiFeatureStrategyIndex;// index of hash strategy
 
 	/* for FME frame-level switch */
-	bool bFMESwitchFlag;
+	bool    bFMESwitchFlag;
 	uint8_t uiFMEGoodFrameCount;
 	int32_t iHighFreMbCount;
 }SScreenContentStorage;
@@ -116,3 +118,4 @@
 }	// end of namespace WelsSVCEnc {
 
 #endif//WELS_PICTURE_H__
+
--- a/codec/encoder/core/inc/sample.h
+++ b/codec/encoder/core/inc/sample.h
@@ -37,15 +37,6 @@
 #include "wels_func_ptr_def.h"
 
 namespace WelsSVCEnc {
-enum {
-  BLOCK_16x16 = 0,
-  BLOCK_16x8  = 1,
-  BLOCK_8x16  = 2,
-  BLOCK_8x8   = 3,
-  BLOCK_4x4   = 4,
-//    BLOCK_8x4   = 5,
-//    BLOCK_4x8   = 6,
-};
 
 //======================SATD======================//
 int32_t WelsSampleSatd16x16_c (uint8_t*, int32_t, uint8_t*, int32_t);
--- a/codec/encoder/core/inc/slice.h
+++ b/codec/encoder/core/inc/slice.h
@@ -162,7 +162,6 @@
   /*******************************sSliceHeader****************************/
   SSliceHeaderExt	sSliceHeaderExt;
 
-
   SMVUnitXY	sMvStartMin;
   SMVUnitXY	sMvStartMax;
   SMVUnitXY	sMvc[5];
@@ -175,8 +174,12 @@
 
   bool		bDynamicSlicingSliceSizeCtrlFlag;
   uint8_t		uiAssumeLog2BytePerMb;
+
+  uint32_t     uiSliceFMECostDown;//TODO: for FME switch under MT, to opt after ME final?
+
   uint8_t		uiReservedFillByte;	// reserved to meet 4 bytes alignment
 } SSlice, *PSlice;
 
 }
 #endif//WELS_SLICE_H__
+
--- a/codec/encoder/core/inc/wels_const.h
+++ b/codec/encoder/core/inc/wels_const.h
@@ -173,6 +173,18 @@
 #define MAX_NAL_UNIT_NUM_IN_AU	256	// predefined maximal number of NAL Units in an access unit
 #define MAX_ACCESS_UINT_CAPACITY	(1<<20)	// Maximal AU capacity in bytes: 1024 KB predefined
 #define MAX_ACCESS_UNIT_CACHE_NUM	2	// Maximal Access Unit(AU) cache number to be processed, denote current AU and the next coming AU.
+
+enum {
+  BLOCK_16x16 = 0,
+  BLOCK_16x8  = 1,
+  BLOCK_8x16  = 2,
+  BLOCK_8x8   = 3,
+  BLOCK_4x4   = 4,
+//    BLOCK_8x4   = 5,
+//    BLOCK_4x8   = 6,
+  BLOCK_SIZE_ALL = 5
+};
+
 enum {
   CUR_AU_IDX	= 0,			// index symbol for current access unit
   SUC_AU_IDX	= 1				// index symbol for successive access unit
--- a/codec/encoder/core/src/svc_base_layer_md.cpp
+++ b/codec/encoder/core/src/svc_base_layer_md.cpp
@@ -41,7 +41,6 @@
 #include "mv_pred.h"
 #include "svc_enc_golomb.h"
 #include "svc_base_layer_md.h"
-#include "sample.h"
 #include "encoder.h"
 #include "svc_encode_mb.h"
 #include "svc_encode_slice.h"
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -39,7 +39,6 @@
  */
 
 #include "cpu_core.h"
-#include "sample.h"
 #include "svc_motion_estimate.h"
 
 namespace WelsSVCEnc {
@@ -183,7 +182,7 @@
 
 
 /////////////////////////
-// Diamond Search Related
+// Diamond Search Basics
 /////////////////////////
 bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
                             const int32_t kiDy, int32_t* pIx, int32_t* pIy) {
@@ -260,7 +259,7 @@
 }
 
 /////////////////////////
-// DirectionalMv Related
+// DirectionalMv Basics
 /////////////////////////
 bool CheckDirectionalMv(PSampleSadSatdCostFunc pSad, void * vpMe,
                       const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
@@ -291,7 +290,7 @@
 }
 
 /////////////////////////
-// Cross Search Related
+// Cross Search Basics
 /////////////////////////
 void VerticalFullSearchUsingSSE41( void *pFunc, void *vpMe,
 														uint16_t* pMvdTable, const int32_t kiFixedMvd,
@@ -368,7 +367,7 @@
 }
 
 /////////////////////////
-// Feature Search Related
+// Feature Search Basics
 /////////////////////////
 void SetFeatureSearchIn( SWelsFuncPtrList *pFunc,  const SWelsME& sMe,
                         const SSlice *pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
@@ -473,17 +472,17 @@
   return (i < iSearchTimesx2);
 }
 
-
-void MotionEstimateFeatureFullSearchScc( SFeatureSearchIn &sFeatureSearchIn,
-                                        const uint32_t kiMaxSearchPoint,
+void MotionEstimateFeatureFullSearch( SFeatureSearchIn &sFeatureSearchIn,
+                                        const uint32_t kuiMaxSearchPoint,
                                         SWelsME* pMe) {
-  SFeatureSearchOut sFeatureSearchOut = {0};
+  SFeatureSearchOut sFeatureSearchOut = {0};//TODO: this can be refactored and removed
   sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost;
   sFeatureSearchOut.sBestMv = pMe->sMv;
   sFeatureSearchOut.pBestRef = pMe->pRefMb;
 
-  FeatureSearchOne( sFeatureSearchIn, 0, kiMaxSearchPoint, &sFeatureSearchOut );
-  if ( sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost ) {
+  int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed
+  FeatureSearchOne( sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut );
+  if ( sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost ) {//TODO: this may be refactored and removed
     UpdateMeResults(sFeatureSearchOut.sBestMv,
       sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef,
       pMe);
@@ -490,6 +489,46 @@
   }
 }
 
+/////////////////////////
+// Search function option
+/////////////////////////
+void WelsDiamondCrossSearch(SWelsFuncPtrList *pFunc, void* vpLayer, void* vpMe, void* vpSlice) {
+    SDqLayer* pCurLayer = static_cast<SDqLayer *>(vpLayer);
+    SWelsME* pMe			 = static_cast<SWelsME *>(vpMe);
+    SSlice* pSlice				 = static_cast<SSlice *>(vpSlice);
 
+    //  Step 1: diamond search
+    WelsMotionEstimateIterativeSearch(pFunc, pMe, pCurLayer->iEncStride[0], pCurLayer->pRefPic->iLineSize[0], pMe->pRefMb);
+
+    //  Step 2: CROSS search
+    SScreenContentStorage tmpScreenContentStorage; //TODO: use this structure from Ref
+    pMe->uiSadCostThreshold = tmpScreenContentStorage.uiSadCostThreshold[pMe->uiBlockSize];
+    if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
+      WelsMotionCrossSearch(pFunc, pCurLayer, pMe, pSlice);
+    }
+}
+void WelsDiamondCrossFeatureSearch(SWelsFuncPtrList *pFunc, void* vpLayer, void* vpMe, void* vpSlice) {
+    SDqLayer* pCurLayer = static_cast<SDqLayer *>(vpLayer);
+    SWelsME* pMe			 = static_cast<SWelsME *>(vpMe);
+    SSlice* pSlice				 = static_cast<SSlice *>(vpSlice);
+
+    //  Step 1: diamond search + cross
+    WelsDiamondCrossSearch(pFunc, pCurLayer, pMe, pSlice);
+
+    // Step 2: FeatureSearch
+    if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
+        pSlice->uiSliceFMECostDown += pMe->uiSadCost;
+
+        SScreenBlockFeatureStorage tmpScreenBlockFeatureStorage; //TODO: use this structure from Ref
+        uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting
+        SFeatureSearchIn sFeatureSearchIn = {0};
+        SetFeatureSearchIn(pFunc, *pMe, pSlice, &tmpScreenBlockFeatureStorage,
+          pCurLayer->iEncStride[0], pCurLayer->pRefPic->iLineSize[0],
+          &sFeatureSearchIn);
+        MotionEstimateFeatureFullSearch( sFeatureSearchIn, uiMaxSearchPoint, pMe);
+
+        pSlice->uiSliceFMECostDown -= pMe->uiSadCost;
+    }
+}
 } // namespace WelsSVCEnc