ref: b6ddfabf2b8d3c5605fe0583f18e8e1949789657
parent: 240729288232fb6369397231b82ae6acc8a894c7
parent: e70621c1941215cdda325cfce8e64b65def48559
author: HaiboZhu <[email protected]>
date: Thu Jul 16 07:38:24 EDT 2015
Merge pull request #2028 from sijchen/mt42 [Encoder] save memory usage and improve error return logic under VLCOVERFLOW
--- a/codec/encoder/core/inc/slice_multi_threading.h
+++ b/codec/encoder/core/inc/slice_multi_threading.h
@@ -65,7 +65,7 @@
int32_t iCurDid);
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, const int32_t kiCountBsLen,
- const int32_t kiTargetSpatialBsSize);
+ const int32_t kiTargetSpatialBsSize, bool bDynamicSlice);
void ReleaseMtResource (sWelsEncCtx** ppCtx);
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -702,7 +702,12 @@
if (iDIndex == 0)
iCountNumNals += MAX_SLICES_NUM;
// MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h
- assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
+ if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
+ WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
+ "AcquireLayersNals(), num_of_slice(%d) > existing slice(%d) at (iDid= %d), max=%d",
+ iCountNumNals, iOrgNumNals, iDIndex, MAX_NAL_UNITS_IN_LAYER);
+ return 1;
+ }
} else { /*if ( SM_SINGLE_SLICE != pDLayer->sSliceCfg.uiSliceMode )*/
const int32_t kiNumOfSlice = GetInitialSliceNum ((pDLayer->iVideoWidth + 0x0f) >> 4,
(pDLayer->iVideoHeight + 0x0f) >> 4,
@@ -1754,6 +1759,11 @@
const int32_t kiPpsSize = (*ppCtx)->GetNeededPpsNum() * PPS_BUFFER_SIZE;
iNonVclLayersBsSizeCount = SSEI_BUFFER_SIZE + kiSpsSize + kiPpsSize;
+ bool bDynamicSlice = false;
+ uint32_t uiMaxSliceNumEstimation = 0;
+ int32_t iSliceBufferSize = 0;
+ int32_t iMaxSliceBufferSize = 0;
+ int32_t iTotalLength = 0;
int32_t iLayerBsSize = 0;
iIndex = 0;
while (iIndex < pParam->iSpatialLayerNum) {
@@ -1765,11 +1775,29 @@
MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
iLayerBsSize = WELS_ALIGN (iLayerBsSize, 4); // 4 bytes alinged
iVclLayersBsSizeCount += iLayerBsSize;
+
+ SSliceConfig* pMso = & (fDlp->sSliceCfg);
+ if (pMso->uiSliceMode == SM_DYN_SLICE) {
+ bDynamicSlice = true;
+ uiMaxSliceNumEstimation = WELS_MIN (AVERSLICENUM_CONSTRAINT,
+ (iLayerBsSize / pMso->sSliceArgument.uiSliceSizeConstraint) + 1);
+ (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, uiMaxSliceNumEstimation);
+ iSliceBufferSize = (WELS_MAX(pMso->sSliceArgument.uiSliceSizeConstraint, iLayerBsSize/uiMaxSliceNumEstimation)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
+ } else {
+ (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, pMso->sSliceArgument.uiSliceNum);
+ iSliceBufferSize = ((iLayerBsSize / pMso->sSliceArgument.uiSliceNum)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
+ }
+ iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);
+
++ iIndex;
}
iTargetSpatialBsSize = iLayerBsSize;
iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount;
+ iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize);
+ iTotalLength = (pParam->iMultipleThreadIdc == 1) ? iCountBsLen : (iCountBsLen + ((*ppCtx)->iMaxSliceCount - 1) *
+ iMaxSliceBufferSize);
+
pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT,
(pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN));
@@ -1787,20 +1815,14 @@
(*ppCtx)->pOut->iCountNals = iCountNals;
(*ppCtx)->pOut->iNalIndex = 0;
- if (pParam->iMultipleThreadIdc > 1) {
- const int32_t iTotalLength = iCountBsLen + (iTargetSpatialBsSize * ((*ppCtx)->iMaxSliceCount - 1));
- (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs");
- WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
- (*ppCtx)->iFrameBsSize = iTotalLength;
- } else {
- (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pFrameBs");
- WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
- (*ppCtx)->iFrameBsSize = iCountBsLen;
- }
+
+ (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs");
+ WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
+ (*ppCtx)->iFrameBsSize = iTotalLength;
(*ppCtx)->iPosBsBuffer = 0;
// for pSlice bs buffers
- if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iTargetSpatialBsSize)) {
+ if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iMaxSliceBufferSize, bDynamicSlice)) {
WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!");
FreeMemorySvc (ppCtx);
return 1;
@@ -4075,7 +4097,7 @@
// pick up succeeding slice for threading
// thread_id equal to iEventId per implementation here
pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex = iIndexOfSliceToBeCoded;
- SetOneSliceBsBufferUnderMultithread(pCtx, iEventId, iIndexOfSliceToBeCoded);
+ SetOneSliceBsBufferUnderMultithread (pCtx, iEventId, iIndexOfSliceToBeCoded);
WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]);
WelsEventSignal (&pCtx->pSliceThreading->pThreadMasterEvent[iEventId]);
++ iIndexOfSliceToBeCoded;
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -295,6 +295,7 @@
if (NULL == (*ppCtx)->pSliceBs) {
return ENC_RETURN_MEMALLOCERR;
}
+
if (iSlice0Len <= 0) {
return ENC_RETURN_UNEXPECTED;
}
@@ -310,19 +311,19 @@
if (iSlice1Len <= 0) {
return ENC_RETURN_UNEXPECTED;
}
+ if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
+ return ENC_RETURN_MEMALLOCERR;
+ }
for (int32_t k = 1; k < iMaxSliceNum; k++) {
(*ppCtx)->pSliceBs[k].uiSize = iSlice1Len;
(*ppCtx)->pSliceBs[k].pBs = (*ppCtx)->pSliceBs[k - 1].pBs + (*ppCtx)->pSliceBs[k - 1].uiSize;
}
- if ((*ppCtx)->iFrameBsSize < (iSlice0Len + (iMaxSliceNum - 1)*iSlice1Len)) {
- return ENC_RETURN_MEMALLOCERR;
- }
return ENC_RETURN_SUCCESS;
}
int32_t RequestMtResource (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, const int32_t iCountBsLen,
- const int32_t iTargetSpatialBsSize) {
+ const int32_t iMaxSliceBufferSize, bool bDynamicSlice) {
CMemoryAlign* pMa = NULL;
SWelsSvcCodingParam* pPara = NULL;
SSliceThreading* pSmt = NULL;
@@ -331,8 +332,6 @@
int32_t iIdx = 0;
int16_t iMaxSliceNum = 1;
int32_t iReturn = ENC_RETURN_SUCCESS;
- bool bDynamicSlice = false;
- uint32_t uiMaxSliceSizeConstraint = 0;
if (NULL == ppCtx || NULL == pCodingParam || NULL == *ppCtx || iCountBsLen <= 0)
return 1;
@@ -372,13 +371,6 @@
pSmt->pSliceConsumeTime[iIdx] = NULL;
pSmt->pSliceComplexRatio[iIdx] = NULL;
}
-
- if (pMso->uiSliceMode == SM_DYN_SLICE) {
- bDynamicSlice = true;
- if (uiMaxSliceSizeConstraint < pMso->sSliceArgument.uiSliceSizeConstraint) {
- uiMaxSliceSizeConstraint = pMso->sSliceArgument.uiSliceSizeConstraint;
- }
- }
++ iIdx;
}
// NULL for pSliceConsumeTime[iIdx]: iIdx from iNumSpatialLayers to MAX_DEPENDENCY_LAYERS
@@ -428,8 +420,7 @@
MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
(void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);
-
- pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iTargetSpatialBsSize, "pSmt->pThreadBsBuffer");
+ pSmt->pThreadBsBuffer[iIdx] = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pSmt->pThreadBsBuffer");
WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pThreadBsBuffer[iIdx]), FreeMemorySvc (ppCtx))
++ iIdx;
@@ -442,8 +433,9 @@
err = WelsEventOpen (&pSmt->pSliceCodedMasterEvent, name);
MT_TRACE_LOG (*ppCtx, WELS_LOG_INFO, "[MT] Open pSliceCodedMasterEvent named(%s) ret%d err%d", name, err, errno);
+
iReturn = SetMultiSliceBuffer (ppCtx, pMa, pSmt, iMaxSliceNum,
- iTargetSpatialBsSize, //TODO: may use uiMaxSliceSizeConstraint<<1 when bDynamicSlice, but need more twist
+ iMaxSliceBufferSize,
iCountBsLen,
bDynamicSlice);
WELS_VERIFY_RETURN_PROC_IF (iReturn, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -516,7 +516,7 @@
iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
- if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+ if ((iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) && (pCurMb->uiLumaQp < 50)) {
pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
goto TRY_REENCODING;
@@ -585,7 +585,7 @@
UpdateNonZeroCountCache (pCurMb, pMbCache);
iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
- if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+ if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) {
pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
goto TRY_REENCODING;
@@ -991,7 +991,7 @@
//step (6): begin to write bit stream; if the pSlice size is controlled, the writing may be skipped
iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
- if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+ if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) {
pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
goto TRY_REENCODING;
@@ -1097,7 +1097,7 @@
iEncReturn = pEncCtx->pFuncList->pfWelsSpatialWriteMbSyn (pEncCtx, pSlice, pCurMb);
- if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND) {
+ if (iEncReturn == ENC_RETURN_VLCOVERFLOWFOUND && (pCurMb->uiLumaQp < 50)) {
pSlice->iMbSkipRun = pEncCtx->pFuncList->pfStashPopMBStatus (&sDss, pSlice);
UpdateQpForOverflow (pCurMb, kuiChromaQpIndexOffset);
goto TRY_REENCODING;
--- a/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/svc_set_mb_syn_cavlc.cpp
@@ -250,7 +250,7 @@
assert (iLeftLength > 0);
if (iLeftLength < MAX_MACROBLOCK_SIZE_IN_BYTE_x2) {
- return ENC_RETURN_MEMALLOCERR;
+ return ENC_RETURN_VLCOVERFLOWFOUND;//ENC_RETURN_MEMALLOCERR;
//TODO: call the realloc© instead
}
return ENC_RETURN_SUCCESS;
--- a/codec/encoder/plus/src/welsEncoderExt.cpp
+++ b/codec/encoder/plus/src/welsEncoderExt.cpp
@@ -402,7 +402,7 @@
const int32_t kiEncoderReturn = WelsEncoderEncodeExt (m_pEncContext, pBsInfo, pSrcPic);
const int64_t kiCurrentFrameMs = (WelsTime() - kiBeforeFrameUs) / 1000;
- if (kiEncoderReturn == ENC_RETURN_MEMALLOCERR) {
+ if ((kiEncoderReturn == ENC_RETURN_MEMALLOCERR) || (kiEncoderReturn == ENC_RETURN_MEMOVERFLOWFOUND) || (kiEncoderReturn == ENC_RETURN_VLCOVERFLOWFOUND)) {
WelsUninitEncoderExt (&m_pEncContext);
return cmMallocMemeError;
} else if ((kiEncoderReturn != ENC_RETURN_SUCCESS) && (kiEncoderReturn == ENC_RETURN_CORRECTED)) {
--- a/test/api/encode_decode_api_test.cpp
+++ b/test/api/encode_decode_api_test.cpp
@@ -3459,13 +3459,12 @@
{true, false, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
{true, false, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
{true, false, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
- // enable the following when all random input is supported
- //{true, true, true, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 1, ""},
- //{true, true, true, 30, 340, 96, 24, SM_DYN_SLICE, 1000, 30.0, 1, ""},
- //{true, true, true, 30, 140, 196, 51, SM_DYN_SLICE, 500, 7.5, 1, ""},
- //{true, true, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
- //{true, true, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
- //{true, true, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
+ {true, true, true, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 1, ""},
+ {true, true, true, 30, 340, 96, 24, SM_DYN_SLICE, 1000, 30.0, 1, ""},
+ {true, true, true, 30, 140, 196, 51, SM_DYN_SLICE, 500, 7.5, 1, ""},
+ {true, true, true, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 1, ""},
+ {true, true, true, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 1, ""},
+ {true, true, true, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
{false, false, true, 3, 4096, 2304, 2, SM_SINGLE_SLICE, 0, 7.5, 1, ""}, // large picture size
{false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 1, ""},
{false, true, false, 30, 600, 460, 1, SM_DYN_SLICE, 450, 15.0, 4, ""},
@@ -3474,8 +3473,7 @@
{false, true, false, 30, 110, 296, 50, SM_DYN_SLICE, 500, 7.5, 2, ""},
{false, true, false, 30, 104, 416, 44, SM_DYN_SLICE, 500, 7.5, 2, ""},
{false, true, false, 30, 16, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
- //{false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
- //disable the above for now, enable when multi-thread error is correctly handled
+ {false, true, false, 30, 32, 16, 2, SM_DYN_SLICE, 500, 7.5, 3, ""},
};
class EncodeTestAPI : public ::testing::TestWithParam<EncodeOptionParam>, public ::EncodeDecodeTestAPIBase {
@@ -3543,6 +3541,12 @@
int iIdx = 0;
int iLen;
unsigned char* pData[3] = { NULL };
+
+ //FIXME: remove this after the multi-thread case is correctly handled in encoder
+ if (p.iThreads>1 && SM_DYN_SLICE == p.eSliceMode) {
+ p.bAllRandom = false;
+ }
+
while (iIdx <= p.iNumframes) {
EncodeOneFrameRandom (0, p.bAllRandom);
encToDecData (info, iLen);