shithub: openh264

Download patch

ref: 100e9522316a1103550b33f2af821f1a78c22bc7
parent: 1b0735c3a99e2515ef648e8f6b0299a446b684fa
parent: f161566458447a169ca83253121011c40ce12092
author: sijchen <[email protected]>
date: Thu Dec 17 07:02:00 EST 2015

Merge pull request #2314 from shihuade/MultiThread_V4.5_SliceBsRefact_V1

remove pSliceBs from ctx

--- a/codec/encoder/core/inc/encoder_context.h
+++ b/codec/encoder/core/inc/encoder_context.h
@@ -114,7 +114,6 @@
   SLogContext sLogCtx;
 // Input
   SWelsSvcCodingParam* pSvcParam;   // SVC parameter, WelsSVCParamConfig in svc_param_settings.h
-  SWelsSliceBs*     pSliceBs;       // bitstream buffering for various slices, [uiSliceIdx]
 
   int32_t*          pSadCostMb;
   /* MVD cost tables for Inter MB */
@@ -199,6 +198,7 @@
   int32_t           iPosBsBuffer;   // current writing position of frame bs pBuffer
 
   SSpatialPicIndex  sSpatialIndexMap[MAX_DEPENDENCY_LAYER];
+  int32_t           iSliceBufferSize[MAX_DEPENDENCY_LAYER];
 
   bool              bRefOfCurTidIsLtr[MAX_DEPENDENCY_LAYER][MAX_TEMPORAL_LEVEL];
   uint16_t          uiIdrPicId;           // IDR picture id: [0, 65535], this one is used for LTR
--- a/codec/encoder/core/inc/slice.h
+++ b/codec/encoder/core/inc/slice.h
@@ -42,6 +42,7 @@
 #include "parameter_sets.h"
 #include "svc_enc_slice_segment.h"
 #include "set_mb_syn_cabac.h"
+#include "nal_encap.h"
 
 namespace WelsEnc {
 
@@ -157,6 +158,7 @@
 // mainly for multiple threads imp.
 SMbCache        sMbCacheInfo;   // MBCache is introduced within slice dependency
 SBitStringAux*  pSliceBsa;
+SWelsSliceBs    sSliceBs;
 
 /*******************************sSliceHeader****************************/
 SSliceHeaderExt sSliceHeaderExt;
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -1095,15 +1095,37 @@
 }
 
 static inline int32_t InitpSliceInLayer (sWelsEncCtx** ppCtx, SDqLayer* pDqLayer, CMemoryAlign* pMa,
-    const int32_t iMaxSliceNum, bool bMultithread) {
-  int32_t iSliceIdx = 0;
+                                         const int32_t iMaxSliceNum, const int32_t kiDlayerIndex) {
+  int32_t iMaxSliceBufferSize  = (*ppCtx)->iSliceBufferSize[kiDlayerIndex];
+  int32_t iSliceIdx            = 0;
+  SliceModeEnum uiSliceMode    = (*ppCtx)->pSvcParam->sSpatialLayers[kiDlayerIndex].sSliceArgument.uiSliceMode;
+
+  //SM_SINGLE_SLICE mode using single-thread bs writer pOut->sBsWrite
+  //even though multi-thread is on for other layers
+  bool bIndependenceBsBuffer   = ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 &&
+                                  SM_SINGLE_SLICE != uiSliceMode) ? true : false;
+
+  if ( iMaxSliceBufferSize <= 0) {
+    return ENC_RETURN_UNEXPECTED;
+  }
+
   while (iSliceIdx < iMaxSliceNum) {
     SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
-    pSlice->uiSliceIdx = iSliceIdx;
-    if (bMultithread)
-      pSlice->pSliceBsa = & (*ppCtx)->pSliceBs[iSliceIdx].sBsWrite;
-    else
-      pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite;
+
+    pSlice->uiSliceIdx       = iSliceIdx;
+    pSlice->sSliceBs.uiSize  = iMaxSliceBufferSize;
+    pSlice->sSliceBs.uiBsPos = 0;
+    if (bIndependenceBsBuffer){
+      pSlice->pSliceBsa      = &pSlice->sSliceBs.sBsWrite;
+      pSlice->sSliceBs.pBs   = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "SliceBs");
+      if ( NULL == pSlice->sSliceBs.pBs) {
+        return ENC_RETURN_MEMALLOCERR;
+      }
+    } else {
+      pSlice->pSliceBsa      = & (*ppCtx)->pOut->sBsWrite;
+      pSlice->sSliceBs.pBs   = NULL;
+    }
+
     if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) {
       FreeMemorySvc (ppCtx);
       return ENC_RETURN_MEMALLOCERR;
@@ -1211,7 +1233,7 @@
       pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
       WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeMemorySvc (ppCtx))
 
-      int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, pParam->iMultipleThreadIdc > 1);
+      int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, iDlayerIndex);
       WELS_VERIFY_RETURN_PROC_IF (1, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
     }
 
@@ -1809,8 +1831,8 @@
       (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) pSliceArgument->uiSliceNum);
       iSliceBufferSize = ((iLayerBsSize / pSliceArgument->uiSliceNum)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
     }
-    iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
-
+    iMaxSliceBufferSize                = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
+    (*ppCtx)->iSliceBufferSize[iIndex] = iSliceBufferSize;
     ++ iIndex;
   }
   iTargetSpatialBsSize = iLayerBsSize;
@@ -1817,8 +1839,7 @@
   iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount;
 
   iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize);
-  iTotalLength = (pParam->iMultipleThreadIdc == 1) ? iCountBsLen : (iCountBsLen + (*ppCtx)->iMaxSliceCount  *
-                 iMaxSliceBufferSize);
+  iTotalLength = iCountBsLen;
 
   pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT,
                                       (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
@@ -2125,6 +2146,12 @@
             while (iSliceIdx < iSliceNum) {
               SSlice* pSlice = &pDq->sLayerInfo.pSliceInLayer[iSliceIdx];
               FreeMbCache (&pSlice->sMbCacheInfo, pMa);
+
+              //slice bs buffer
+              if(NULL != pSlice->sSliceBs.pBs) {
+                pMa->WelsFree(pSlice->sSliceBs.pBs,"sSliceBs.pBs");
+                pSlice->sSliceBs.pBs = NULL;
+              }
               ++ iSliceIdx;
             }
             pMa->WelsFree (pDq->sLayerInfo.pSliceInLayer, "pSliceInLayer");
@@ -3896,11 +3923,6 @@
 
       WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc);
 
-      //the following line is to fix a problem with a specific setting as in test DiffSlicingInDlayerMixed:
-      //      (multi-th on with SM_SINGLE_SLICE in one of the D layers)
-      //TODO: this may not be needed any more after the slice buffer refactoring
-      pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[0].pSliceBsa = &(pCtx->pOut->sBsWrite);
-
       pCtx->iEncoderError = WelsCodeOneSlice (pCtx, 0, eNalType);
       WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
 
@@ -4734,7 +4756,7 @@
     SSliceHeaderExt* pSHExt = &pSliceIdx->sSliceHeaderExt;
     pSliceIdx->uiSliceIdx = uiSliceIdx;
     if (pCtx->pSvcParam->iMultipleThreadIdc > 1)
-      pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
+      pSliceIdx->pSliceBsa = &pSliceIdx->sSliceBs.sBsWrite;
     else
       pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
     if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -248,39 +248,6 @@
   pCurDqLayer->bNeedAdjustingSlicing = !DynamicAdjustSlicePEncCtxAll (pCurDqLayer, iRunLen);
 }
 
-int32_t SetMultiSliceBuffer (sWelsEncCtx** ppCtx, CMemoryAlign* pMa, SSliceThreading* pSmt,
-                             int32_t iMaxSliceNum, int32_t iSlice1Len, int32_t iSlice0Len, bool bDynamicSlice) {
-  (*ppCtx)->pSliceBs = (SWelsSliceBs*)pMa->WelsMalloc (sizeof (SWelsSliceBs) * iMaxSliceNum, "pSliceBs");
-  if (NULL == (*ppCtx)->pSliceBs) {
-    return ENC_RETURN_MEMALLOCERR;
-  }
-
-  if (iSlice0Len <= 0) {
-    return ENC_RETURN_UNEXPECTED;
-  }
-  //slice 0
-  (*ppCtx)->pSliceBs[0].uiSize = iSlice1Len;
-  (*ppCtx)->pSliceBs[0].pBs    = (*ppCtx)->pFrameBs + iSlice0Len;
-  (*ppCtx)->pSliceBs[0].uiBsPos = 0;
-  (*ppCtx)->pSliceBs[0].pBsBuffer = pSmt->pThreadBsBuffer[0];
-  if ((iMaxSliceNum == 1) && (!bDynamicSlice)) {
-    return ENC_RETURN_SUCCESS;
-  }
-  //slice >0
-  if (iSlice1Len <= 0) {
-    return ENC_RETURN_UNEXPECTED;
-  }
-  if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
-    return ENC_RETURN_MEMALLOCERR;
-  }
-  for (int32_t k = 1; k < iMaxSliceNum; k++) {
-    (*ppCtx)->pSliceBs[k].uiSize = iSlice1Len;
-    (*ppCtx)->pSliceBs[k].pBs    = (*ppCtx)->pSliceBs[k - 1].pBs + (*ppCtx)->pSliceBs[k - 1].uiSize;
-  }
-  return ENC_RETURN_SUCCESS;
-
-}
-
 int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
                            const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
   CMemoryAlign* pMa             = NULL;
@@ -289,7 +256,6 @@
   int32_t iNumSpatialLayers     = 0;
   int32_t iThreadNum            = 0;
   int32_t iIdx                  = 0;
-  int16_t iMaxSliceNum          = 1;
   int32_t iReturn = ENC_RETURN_SUCCESS;
   bool bWillUseTaskManage = false;
 
@@ -300,7 +266,6 @@
   pPara = pCodingParam;
   iNumSpatialLayers = pPara->iSpatialLayerNum;
   iThreadNum = pPara->iMultipleThreadIdc;
-  iMaxSliceNum = (*ppCtx)->iMaxSliceCount;
 
   pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
   WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx))
@@ -385,12 +350,6 @@
   MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
   //previous conflict ends
 
-  iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
-                                 iMaxSliceBufferSize,
-                                 iCountBsLen,
-                                 bDynamicSlice);
-  WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
-
   iReturn = WelsMutexInit (&pSmt->mutexSliceNumUpdate);
   WELS_VERIFY_RETURN_PROC_IF (1, (WELS_THREAD_ERROR_OK != iReturn), FreeMemorySvc (ppCtx))
 
@@ -408,24 +367,20 @@
 
   MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "RequestMtResource(), iThreadNum=%d, iMultipleThreadIdc= %d",
                 pPara->iMultipleThreadIdc,
-                iMaxSliceNum);
-
+                (*ppCtx)->iMaxSliceCount);
   return 0;
 }
 
 void ReleaseMtResource (sWelsEncCtx** ppCtx) {
-  SWelsSliceBs* pSliceB                 = NULL;
   SSliceThreading* pSmt                 = NULL;
   CMemoryAlign* pMa                     = NULL;
   int32_t iIdx                          = 0;
   int32_t iThreadNum                    = 0;
-  int16_t uiSliceNum                    = 0;
 
   if (NULL == ppCtx || NULL == *ppCtx)
     return;
 
   pMa           = (*ppCtx)->pMemAlign;
-  uiSliceNum    = (*ppCtx)->iMaxSliceCount;
   iThreadNum    = (*ppCtx)->pSvcParam->iMultipleThreadIdc;
   pSmt          = (*ppCtx)->pSliceThreading;
 
@@ -470,20 +425,6 @@
   }
   memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));
 
-  pSliceB = (*ppCtx)->pSliceBs;
-  iIdx = 0;
-  while (pSliceB != NULL && iIdx < uiSliceNum) {
-    pSliceB->pBsBuffer = NULL;
-    pSliceB->uiSize = 0;
-    pSliceB->uiBsPos = 0;
-    ++ iIdx;
-    ++ pSliceB;
-  }
-  if ((*ppCtx)->pSliceBs != NULL) {
-    pMa->WelsFree ((*ppCtx)->pSliceBs, "pSliceBs");
-    (*ppCtx)->pSliceBs = NULL;
-  }
-
   if ((*ppCtx)->pTaskManage != NULL) {
     delete (*ppCtx)->pTaskManage;
     (*ppCtx)->pTaskManage = NULL;
@@ -503,6 +444,7 @@
 int32_t AppendSliceToFrameBs (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, const int32_t iSliceCount) {
   SWelsSvcCodingParam* pCodingParam     = pCtx->pSvcParam;
   SSpatialLayerConfig* pDlp             = &pCodingParam->sSpatialLayers[pCtx->uiDependencyId];
+  SSlice* pSliceInlayer                 = pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer;
   SWelsSliceBs* pSliceBs                = NULL;
   const bool kbIsDynamicSlicingMode     = (pDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE);
 
@@ -511,9 +453,9 @@
   int32_t iSliceIdx     = 0;
 
   if (!kbIsDynamicSlicingMode) {
-    pSliceBs      = &pCtx->pSliceBs[0];
     iNalIdxBase   = pLbi->iNalCount = 0;
     while (iSliceIdx < iSliceCount) {
+      pSliceBs    = &pSliceInlayer[iSliceIdx].sSliceBs;
       if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
         int32_t iNalIdx = 0;
         const int32_t iCountNal = pSliceBs->iNalIndex;
@@ -535,7 +477,6 @@
         iNalIdxBase     += iCountNal;
       }
       ++ iSliceIdx;
-      ++ pSliceBs;
     }
   } else { // for SM_SIZELIMITED_SLICE
     const int32_t kiPartitionCnt        = iSliceCount;
@@ -549,7 +490,7 @@
 
       iSliceIdx = iPartitionIdx;
       while (iIdx < kiCountSlicesCoded) {
-        pSliceBs = &pCtx->pSliceBs[iSliceIdx];
+        pSliceBs = &pSliceInlayer[iSliceIdx].sSliceBs;
         if (pSliceBs != NULL && pSliceBs->uiBsPos > 0) {
           memmove (pCtx->pFrameBs + pCtx->iPosBsBuffer, pSliceBs->pBs, pSliceBs->uiBsPos); // confirmed_safe_unsafe_usage
           pCtx->iPosBsBuffer += pSliceBs->uiBsPos;
@@ -665,7 +606,7 @@
         bool bDsaFlag = false;
         iSliceIdx               = pPrivateData->iSliceIndex;
         pSlice                  = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
-        pSliceBs                = &pEncPEncCtx->pSliceBs[iSliceIdx];
+        pSliceBs                = &pSlice->sSliceBs;
 
         bDsaFlag = ((pParamD->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE) &&
                     pCodingParam->iMultipleThreadIdc > 1 &&
@@ -777,7 +718,7 @@
 
           SetOneSliceBsBufferUnderMultithread (pEncPEncCtx, kiPartitionId, iSliceIdx);
           pSlice                = &pCurDq->sLayerInfo.pSliceInLayer[iSliceIdx];
-          pSliceBs              = &pEncPEncCtx->pSliceBs[iSliceIdx];
+          pSliceBs              = &pSlice->sSliceBs;
 
           pSliceBs->uiBsPos     = 0;
           pSliceBs->iNalIndex   = 0;
@@ -1086,9 +1027,9 @@
 #endif//#if defined(MT_DEBUG)
 
 void SetOneSliceBsBufferUnderMultithread (sWelsEncCtx* pCtx, const int32_t kiThreadIdx, const int32_t iSliceIdx) {
-  pCtx->pSliceBs[iSliceIdx].pBsBuffer = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
-  pCtx->pSliceBs[iSliceIdx].uiBsPos = 0;
-  //printf("SetOneSliceBsBufferUnderMultithread, thread %d, slice %d, buffer=%x\n", kiThreadIdx, iSliceIdx, pCtx->pSliceBs[iSliceIdx].pBsBuffer);
+  SWelsSliceBs* pSliceBs  = &pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx].sSliceBs;
+  pSliceBs->pBsBuffer     = pCtx->pSliceThreading->pThreadBsBuffer[kiThreadIdx];
+  pSliceBs->uiBsPos       = 0;
 }
 }
 
--- a/codec/encoder/core/src/wels_task_encoder.cpp
+++ b/codec/encoder/core/src/wels_task_encoder.cpp
@@ -112,7 +112,7 @@
   SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, m_iSliceIdx);
 
   m_pSlice = &m_pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[m_iSliceIdx];
-  m_pSliceBs = &m_pCtx->pSliceBs[m_iSliceIdx];
+  m_pSliceBs = &m_pSlice->sSliceBs;
 
   m_pSliceBs->uiBsPos       = 0;
   m_pSliceBs->iNalIndex     = 0;
@@ -258,7 +258,7 @@
 
     SetOneSliceBsBufferUnderMultithread (m_pCtx, m_iThreadIdx, iLocalSliceIdx);
     m_pSlice = &pCurDq->sLayerInfo.pSliceInLayer[iLocalSliceIdx];
-    m_pSliceBs = &m_pCtx->pSliceBs[iLocalSliceIdx];
+    m_pSliceBs = &m_pSlice->sSliceBs;
 
     m_pSliceBs->uiBsPos     = 0;
     m_pSliceBs->iNalIndex   = 0;