ref: e7f5f6a0528ab2e49ccf5199e772dd7b42a1a8a9
parent: 033c6a0448227d6290b402435c7faec9d3063dd0
author: xiaotiansf <[email protected]>
date: Mon Nov 11 06:18:59 EST 2019
Threaded decoding enhancement: 1. Distinguish non-threaded decoding with m_iThreadCount = 0 from one-thread decoding with m_iThreadCount=1. 2. Removed the use of bAvailable of PPicture and replaced by the use of iRefCount to more preciously represent the picture's being-use counts in both threaded and non-threaded mode.
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -283,7 +283,7 @@
goto label_exit;
iSliceSize = static_cast<int32_t> (pInfo[2]);
} else {
- if (iThreadCount > 1) {
+ if (iThreadCount >= 1) {
uint8_t* uSpsPtr = NULL;
int32_t iSpsByteCount = 0;
iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount);
@@ -577,7 +577,7 @@
pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
}
- int32_t iThreadCount = 1;
+ int32_t iThreadCount = 0;
pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
if (pDecoder->Initialize (&sDecParam)) {
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -550,6 +550,14 @@
}
}
}
+static inline int32_t GetThreadCount (PWelsDecoderContext pCtx) {
+ int32_t iThreadCount = 0;
+ if (pCtx->pThreadCtx != NULL) {
+ PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX)pCtx->pThreadCtx;
+ iThreadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
+ }
+ return iThreadCount;
+}
//#ifdef __cplusplus
//}
//#endif//__cplusplus
--- a/codec/decoder/core/inc/picture.h
+++ b/codec/decoder/core/inc/picture.h
@@ -69,8 +69,7 @@
/*******************************sef_definition for misc use****************************/
bool bUsedAsRef; //for ref pic management
bool bIsLongRef; // long term reference frame flag //for ref pic management
- uint8_t uiRefCount;
- bool bAvailableFlag; // indicate whether it is available in this picture memory block.
+ int8_t iRefCount;
bool bIsComplete; // indicate whether current picture is complete, not from EC
/*******************************for future use****************************/
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -236,7 +236,7 @@
}
WelsMbInterSampleConstruction (pCtx, pCurDqLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
- if (pCtx->pThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1) {
pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
pCurDqLayer->pNzc[pCurDqLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
}
@@ -1365,7 +1365,7 @@
pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
|| bIsPending));
//predict mv
@@ -1421,7 +1421,7 @@
memset (pCurDqLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
if (uiCode) {
int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
@@ -1696,7 +1696,7 @@
SDeblockingFilter pFilter;
int32_t iFilterIdc = 1;
- if (pCtx->pThreadCtx && pSliceHeader->uiDisableDeblockingFilterIdc != 1) {
+ if (pSliceHeader->uiDisableDeblockingFilterIdc != 1) {
WelsDeblockingInitFilter (pCtx, pFilter, iFilterIdc);
}
@@ -1764,11 +1764,15 @@
pCurDqLayer->iMbX = iMbX;
pCurDqLayer->iMbY = iMbY;
pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
- if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) {
- SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]);
+ if (GetThreadCount (pCtx) > 1) {
+ if ((iMbY > iLastMby) && (iLastMbx == pCurDqLayer->iMbWidth - 1)) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[iLastMby]);
+ }
}
} while (1);
- SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
+ if (GetThreadCount (pCtx) > 1) {
+ SET_EVENT (&pCtx->pDec->pReadyEvent[pCurDqLayer->iMbY]);
+ }
return ERR_NONE;
}
@@ -2467,7 +2471,7 @@
pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
memset (pCurDqLayer->pDec->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && (ppRefPic[0]->bIsComplete
|| bIsPending));
//predict iMv
@@ -2564,7 +2568,7 @@
pCurDqLayer->pInterPredictionDoneFlag[iMbXy] = 0;
memset (pCurDqLayer->pDec->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
memset (pCurDqLayer->pDec->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && (ppRefPicL0[0]->bIsComplete
|| bIsPending)) || ! (ppRefPicL1[0] && (ppRefPicL1[0]->bIsComplete || bIsPending));
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -151,8 +151,7 @@
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
- pPicNewBuf->ppPic[i]->uiRefCount = 0;
- pPicNewBuf->ppPic[i]->bAvailableFlag = true;
+ pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -240,8 +239,7 @@
for (int32_t i = 0; i < pPicNewBuf->iCapacity; i++) {
pPicNewBuf->ppPic[i]->bUsedAsRef = false;
pPicNewBuf->ppPic[i]->bIsLongRef = false;
- pPicNewBuf->ppPic[i]->uiRefCount = 0;
- pPicNewBuf->ppPic[i]->bAvailableFlag = true;
+ pPicNewBuf->ppPic[i]->iRefCount = 0;
pPicNewBuf->ppPic[i]->bIsComplete = false;
}
// remove old PicBuf
@@ -440,7 +438,7 @@
iNumRefFrames = MAX_REF_PIC_COUNT + 2;
} else {
iNumRefFrames = pCtx->pSps->iNumRefFrames + 2;
- if (pCtx->pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
iNumRefFrames = MAX_REF_PIC_COUNT + 1;
}
}
@@ -484,7 +482,7 @@
&& kiPicHeight == pCtx->iImgHeightInPixel) && (!bNeedChangePicQueue)) // have same scaled buffer
// sync update pRefList
- if (pCtx->pThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1) {
WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
}
@@ -562,7 +560,7 @@
if (NULL != pPicBuff && NULL != *pPicBuff) {
DestroyPicBuff (pCtx, pPicBuff, pMa);
}
- if (pCtx->pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
//prevent from double destruction of PPicBuff
PWelsDecoderThreadCTX pThreadCtx = (PWelsDecoderThreadCTX) (pCtx->pThreadCtx);
int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -221,10 +221,10 @@
ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
pDstInfo->iBufferStatus = 1;
- if (pCtx->pThreadCtx != NULL && pPic->bIsComplete == false) {
+ if (GetThreadCount (pCtx) > 1 && pPic->bIsComplete == false) {
pPic->bIsComplete = true;
}
- if (pCtx->pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
for (uint32_t i = 0; i < uiMbHeight; ++i) {
SET_EVENT (&pCtx->pDec->pReadyEvent[i]);
@@ -231,7 +231,7 @@
}
}
bool bOutResChange = false;
- if (pCtx->pThreadCtx == NULL || pCtx->pLastThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1 || pCtx->pLastThreadCtx == NULL) {
bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
|| (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
} else {
@@ -2282,7 +2282,7 @@
*/
int32_t AllocPicBuffOnNewSeqBegin (PWelsDecoderContext pCtx) {
//try to allocate or relocate DPB memory only when new sequence is coming.
- if (pCtx->pThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1) {
WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
}
int32_t iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
@@ -2418,7 +2418,7 @@
int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
int32_t iRet = ERR_NONE;
- if (pCtx->pThreadCtx != NULL && pCtx->bNewSeqBegin) {
+ if (GetThreadCount (pCtx) > 1 && pCtx->bNewSeqBegin) {
WelsResetRefPic (pCtx);
}
if (pCtx->eSliceType == B_SLICE) {
@@ -2542,7 +2542,7 @@
}
}
bool isNewFrame = true;
- if (pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
isNewFrame = pCtx->pDec == NULL;
}
if (pCtx->pDec == NULL) {
@@ -2553,7 +2553,6 @@
uint32_t i = 0;
while (i < MAX_DPB_COUNT && pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i]) {
pLastThreadCtx->pDec->pRefPic[listIdx][i] = pLastThreadCtx->pCtx->sRefPic.pRefList[listIdx][i];
- pLastThreadCtx->pDec->pRefPic[listIdx][i]->bAvailableFlag = false;
++i;
}
}
@@ -2563,30 +2562,8 @@
} else {
pCtx->sRefPic = pLastThreadCtx->pCtx->sRefPic;
}
- //printf ("last uiDecodingTimeStamp = %d\n", pLastThreadCtx->pCtx->uiDecodingTimeStamp);
- for (int32_t i = 0; i < pCtx->sRefPic.uiRefCount[LIST_0]; ++i) {
- if (pCtx->sRefPic.pRefList[LIST_0][i] != NULL) {
- pCtx->sRefPic.pRefList[LIST_0][i]->bAvailableFlag = false;
- }
- }
- for (int32_t i = 0; i < pCtx->sRefPic.uiRefCount[LIST_1]; ++i) {
- if (pCtx->sRefPic.pRefList[LIST_1][i] != NULL) {
- pCtx->sRefPic.pRefList[LIST_1][i]->bAvailableFlag = false;
- }
- }
}
pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
- if (pThreadCtx != NULL) {
- if (pCtx->pDec != NULL) {
- pCtx->pDec->bAvailableFlag = false;
- pCtx->pDec->bIsUngroupedMultiSlice = false;
- pThreadCtx->pDec = pCtx->pDec;
- uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
- for (uint32_t i = 0; i < uiMbHeight; ++i) {
- RESET_EVENT (&pCtx->pDec->pReadyEvent[i]);
- }
- }
- }
if (pCtx->iTotalNumMbRec != 0)
pCtx->iTotalNumMbRec = 0;
@@ -2598,6 +2575,15 @@
pCtx->iErrorCode |= dsOutOfMemory;
return ERR_INFO_REF_COUNT_OVERFLOW;
}
+ if (pThreadCtx != NULL) {
+ pCtx->pDec->bIsUngroupedMultiSlice = false;
+ pThreadCtx->pDec = pCtx->pDec;
+ if (GetThreadCount (pCtx) > 1) ++pCtx->pDec->iRefCount;
+ uint32_t uiMbHeight = (pCtx->pDec->iHeightInPixel + 15) >> 4;
+ for (uint32_t i = 0; i < uiMbHeight; ++i) {
+ RESET_EVENT (&pCtx->pDec->pReadyEvent[i]);
+ }
+ }
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
} else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
@@ -2743,7 +2729,7 @@
if (pSh->eSliceType == B_SLICE && !pSh->iDirectSpatialMvPredFlag)
ComputeColocatedTemporalScaling (pCtx);
- if (pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
memset (&pCtx->lastReadyHeightOffset[0][0], -1, LIST_A * MAX_REF_PIC_COUNT * sizeof (int16_t));
SET_EVENT (&pThreadCtx->sSliceDecodeStart);
iRet = WelsDecodeAndConstructSlice (pCtx);
@@ -2765,7 +2751,7 @@
}
}
- if (pThreadCtx == NULL && bReconstructSlice) {
+ if (GetThreadCount (pCtx) <= 1 && bReconstructSlice) {
if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
return iRet;
@@ -2772,7 +2758,7 @@
}
}
if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
- if (pCtx->pThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1) {
if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
bAllRefComplete &= CheckRefPicturesComplete (pCtx);
} else {
@@ -2829,17 +2815,18 @@
}
}
- if (pThreadCtx != NULL && pCtx->uiDecodingTimeStamp > 1 && pCtx->pLastDecPicInfo->uiDecodingTimeStamp > 0) {
+ if (GetThreadCount (pCtx) > 1 && pCtx->uiDecodingTimeStamp > 1 && pCtx->pLastDecPicInfo->uiDecodingTimeStamp > 0) {
while (pCtx->uiDecodingTimeStamp > pCtx->pLastDecPicInfo->uiDecodingTimeStamp + 1) {
WelsSleep (1);
}
}
- if (pThreadCtx != NULL) {
+
+ if (GetThreadCount (pCtx) >= 1) {
pCtx->pLastDecPicInfo->uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
}
iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
if (iRet) {
- if (pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
SET_EVENT (&pThreadCtx->sSliceDecodeFinsh);
}
return iRet;
@@ -2847,7 +2834,7 @@
pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
pCtx->bUsedAsRef = pCtx->uiNalRefIdc > 0;
- if (pCtx->pThreadCtx == NULL) {
+ if (GetThreadCount (pCtx) <= 1) {
if (pCtx->bUsedAsRef) {
for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
uint32_t i = 0;
@@ -2870,7 +2857,7 @@
pCtx->pDec->iLinesize,
pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
}
- } else {
+ } else if (GetThreadCount (pCtx) > 1) {
SET_EVENT (&pThreadCtx->sImageReady);
}
pCtx->pDec = NULL; //after frame decoding, always set to NULL
@@ -2881,7 +2868,7 @@
pCtx->pLastDecPicInfo->iPrevFrameNum = pSh->iFrameNum;
if (pCtx->pLastDecPicInfo->bLastHasMmco5)
pCtx->pLastDecPicInfo->iPrevFrameNum = 0;
- if (pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
int32_t threadCount = pThreadCtx->sThreadInfo.uiThrMaxNum;
int32_t id = pThreadCtx->sThreadInfo.uiThrNum;
for (int32_t i = 0; i < threadCount; ++i) {
@@ -2899,7 +2886,7 @@
}
}
}
- if (pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
SET_EVENT (&pThreadCtx->sSliceDecodeFinsh);
}
return ERR_NONE;
--- a/codec/decoder/core/src/manage_dec_ref.cpp
+++ b/codec/decoder/core/src/manage_dec_ref.cpp
@@ -80,6 +80,7 @@
pRef->uiSpatialId = -1;
pRef->iSpsId = -1;
pRef->bIsComplete = false;
+ pRef->iRefCount = 0;
if (pRef->eSliceType == I_SLICE) {
return;
@@ -88,7 +89,7 @@
for (int32_t i = 0; i < MAX_DPB_COUNT; ++i) {
for (int32_t list = 0; list < lists; ++list) {
if (pRef->pRefPic[list][i] != NULL) {
- pRef->pRefPic[list][i]->bAvailableFlag = true;
+ pRef->pRefPic[list][i]->iRefCount = 0;
pRef->pRefPic[list][i] = NULL;
}
}
@@ -781,8 +782,8 @@
for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) {
if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) {
iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1;
- pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false;
pPic = pRefPic->pShortRefList[LIST_0][i];
+ pPic->bUsedAsRef = false;
pRefPic->pShortRefList[LIST_0][i] = NULL;
if (iMoveSize > 0) {
memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1],
--- a/codec/decoder/core/src/mv_pred.cpp
+++ b/codec/decoder/core/src/mv_pred.cpp
@@ -315,7 +315,7 @@
mbType = GetMbType (pCurDqLayer)[iMbXy];
PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
- if (pCtx->pThreadCtx != NULL) {
+ if (GetThreadCount (pCtx) > 1) {
if (16 * pCurDqLayer->iMbY > pCtx->lastReadyHeightOffset[1][0]) {
if (colocPic->pReadyEvent[pCurDqLayer->iMbY].isSignaled != 1) {
WAIT_EVENT (&colocPic->pReadyEvent[pCurDqLayer->iMbY], WELS_DEC_THREAD_WAIT_INFINITE);
--- a/codec/decoder/core/src/parse_mb_syn_cabac.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cabac.cpp
@@ -535,7 +535,7 @@
pRefCount[0] = pSliceHeader->uiRefCount[0];
pRefCount[1] = pSliceHeader->uiRefCount[1];
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
@@ -741,7 +741,7 @@
MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
if (IS_DIRECT (mbType)) {
--- a/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -1083,7 +1083,7 @@
iRefCount[0] = pSliceHeader->uiRefCount[0];
iRefCount[1] = pSliceHeader->uiRefCount[1];
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
switch (pCurDqLayer->pDec->pMbType[iMbXy]) {
case MB_TYPE_16x16: {
@@ -1348,7 +1348,7 @@
iRefCount[0] = pSliceHeader->uiRefCount[0];
iRefCount[1] = pSliceHeader->uiRefCount[1];
- bool bIsPending = pCtx->pThreadCtx != NULL;
+ bool bIsPending = GetThreadCount (pCtx) > 1;
MbType mbType = pCurDqLayer->pDec->pMbType[iMbXy];
if (IS_DIRECT (mbType)) {
--- a/codec/decoder/core/src/pic_queue.cpp
+++ b/codec/decoder/core/src/pic_queue.cpp
@@ -106,12 +106,14 @@
pPic->iWidthInPixel = kiPicWidth;
pPic->iHeightInPixel = kiPicHeight;
pPic->iFrameNum = -1;
- pPic->bAvailableFlag = true;
+ pPic->iRefCount = 0;
uint32_t uiMbWidth = (kiPicWidth + 15) >> 4;
uint32_t uiMbHeight = (kiPicHeight + 15) >> 4;
uint32_t uiMbCount = uiMbWidth * uiMbHeight;
+
pPic->pMbCorrectlyDecodedFlag = (bool*)pMa->WelsMallocz (uiMbCount * sizeof (bool), "pPic->pMbCorrectlyDecodedFlag");
+
pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t), "pPic->pMbType");
pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
@@ -182,8 +184,8 @@
}
for (iPicIdx = pPicBuf->iCurrentIdx + 1; iPicIdx < pPicBuf->iCapacity ; ++iPicIdx) {
- if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
- && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
+ if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
+ && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
@@ -194,8 +196,8 @@
return pPic;
}
for (iPicIdx = 0 ; iPicIdx <= pPicBuf->iCurrentIdx ; ++iPicIdx) {
- if (pPicBuf->ppPic[iPicIdx] != NULL && pPicBuf->ppPic[iPicIdx]->bAvailableFlag
- && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef) {
+ if (pPicBuf->ppPic[iPicIdx] != NULL && !pPicBuf->ppPic[iPicIdx]->bUsedAsRef
+ && pPicBuf->ppPic[iPicIdx]->iRefCount <= 0) {
pPic = pPicBuf->ppPic[iPicIdx];
break;
}
--- a/codec/decoder/core/src/rec_mb.cpp
+++ b/codec/decoder/core/src/rec_mb.cpp
@@ -252,7 +252,7 @@
iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)),
((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2)));
- if (pCtx->pThreadCtx != NULL && iRefIdx >= 0) {
+ if (GetThreadCount (pCtx) > 1 && iRefIdx >= 0) {
// wait for the lines of reference macroblock (3 + 16).
PPicture pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
if (pCtx->bNewSeqBegin && (pCtx->iErrorCode & dsRefLost)) {
--- a/codec/decoder/plus/inc/welsDecoderExt.h
+++ b/codec/decoder/plus/inc/welsDecoderExt.h
@@ -120,6 +120,7 @@
bool m_bIsBaseline;
int32_t m_iCpuCount;
int32_t m_iThreadCount;
+ int32_t m_iCtxCount;
PPicBuff m_pPicBuff;
bool m_bParamSetsLostFlag;
bool m_bFreezeOutput;
--- a/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -105,7 +105,9 @@
RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart);
}
pThrCtx->pDec = NULL;
- RESET_EVENT (&pThrCtx->sSliceDecodeFinsh);
+ if (GetThreadCount (pThrCtx->pCtx) > 1) {
+ RESET_EVENT (&pThrCtx->sSliceDecodeFinsh);
+ }
iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo);
//WelsMutexUnlock (&pWelsDecoder->m_csDecoder);
@@ -133,7 +135,8 @@
m_uiDecodeTimeStamp (0),
m_bIsBaseline (false),
m_iCpuCount (1),
- m_iThreadCount (1),
+ m_iThreadCount (0),
+ m_iCtxCount (1),
m_pPicBuff (NULL),
m_bParamSetsLostFlag (false),
m_bFreezeOutput (false),
@@ -167,8 +170,9 @@
if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) {
m_iCpuCount = WELS_DEC_MAX_NUM_CPU;
}
- m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
- memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+
+ m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount];
+ memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount);
for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) {
m_pDecThrCtxActive[i] = NULL;
}
@@ -277,7 +281,7 @@
}
void CWelsDecoder::UninitDecoder (void) {
- for (int32_t i = 0; i < m_iThreadCount; ++i) {
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
if (m_pDecThrCtx[i].pCtx != NULL) {
if (i > 0) {
WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx);
@@ -288,7 +292,7 @@
}
void CWelsDecoder::OpenDecoderThreads() {
- if (m_iThreadCount > 1) {
+ if (m_iThreadCount >= 1) {
m_uiDecodeTimeStamp = 0;
CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL);
WelsMutexInit (&m_csDecoder);
@@ -318,7 +322,7 @@
}
}
void CWelsDecoder::CloseDecoderThreads() {
- if (m_iThreadCount > 1) {
+ if (m_iThreadCount >= 1) {
for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices
WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT;
@@ -367,8 +371,8 @@
WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
"CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
VERSION_NUMBER, (int32_t)pParam->bParseOnly);
- if (m_iThreadCount > 1 && pParam->bParseOnly) {
- m_iThreadCount = 1;
+ if (m_iThreadCount >= 1 && pParam->bParseOnly) {
+ m_iThreadCount = 0;
}
OpenDecoderThreads();
//reset decoder context
@@ -377,9 +381,9 @@
memset (&m_sVlcTable, 0, sizeof (SVlcTable));
UninitDecoder();
WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo);
- for (int32_t i = 0; i < m_iThreadCount; ++i) {
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam);
- if (m_iThreadCount > 1) {
+ if (m_iThreadCount >= 1) {
m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i];
}
}
@@ -429,7 +433,7 @@
int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) {
// TBC: need to be modified when context and trace point are null
- if (m_iThreadCount > 1) {
+ if (m_iThreadCount >= 1) {
ThreadResetDecoder (pCtx);
} else {
if (pCtx != NULL && m_pWelsTrace != NULL) {
@@ -472,9 +476,8 @@
if (eOptID == DECODER_OPTION_NUM_OF_THREADS) {
if (pOption != NULL) {
int32_t threadCount = * ((int32_t*)pOption);
- if (threadCount <= 0) {
- threadCount = 1;
- } else if (threadCount > m_iCpuCount) {
+ if (threadCount < 0) threadCount = 0;
+ if (threadCount > m_iCpuCount) {
threadCount = m_iCpuCount;
}
if (threadCount > 3) {
@@ -484,14 +487,15 @@
m_iThreadCount = threadCount;
if (m_pDecThrCtx != NULL) {
delete [] m_pDecThrCtx;
- m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
- memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+ m_iCtxCount = m_iThreadCount == 0 ? 1 : m_iThreadCount;
+ m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iCtxCount];
+ memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iCtxCount);
}
}
}
return cmResultSuccess;
}
- for (int32_t i = 0; i < m_iThreadCount; ++i) {
+ for (int32_t i = 0; i < m_iCtxCount; ++i) {
PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx;
if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
@@ -502,6 +506,8 @@
iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
+ if (pDecContext == NULL) return dsInitialOptExpected;
+
pDecContext->bEndOfStreamFlag = iVal ? true : false;
return cmResultSuccess;
@@ -509,6 +515,8 @@
if (pOption == NULL)
return cmInitParaError;
+ if (pDecContext == NULL) return dsInitialOptExpected;
+
iVal = * ((int*)pOption); // int value for error concealment idc
iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
@@ -550,6 +558,7 @@
return cmInitParaError;
} else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
if (pOption) {
+ if (pDecContext == NULL) return dsInitialOptExpected;
pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
return cmResultSuccess;
}
@@ -681,7 +690,7 @@
unsigned char** ppDst,
SBufferInfo* pDstInfo) {
int iRet = dsErrorFree;
- if (m_iThreadCount > 1) {
+ if (m_iThreadCount >= 1) {
iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo);
if (m_sReoderingStatus.iNumOfPicts) {
WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE);
@@ -742,6 +751,9 @@
}
#endif//OUTPUT_BIT_STREAM
pDecContext->bEndOfStreamFlag = false;
+ if (GetThreadCount (pDecContext) <= 0) {
+ pDecContext->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp;
+ }
} else {
//For application MODE, the error detection should be added for safe.
//But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
@@ -752,13 +764,13 @@
int64_t iStart, iEnd;
iStart = WelsTime();
- if (pDecContext->pThreadCtx == NULL) {
+ if (GetThreadCount (pDecContext) <= 1) {
ppDst[0] = ppDst[1] = ppDst[2] = NULL;
}
pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
- if (pDecContext->pThreadCtx == NULL) {
+ if (GetThreadCount (pDecContext) <= 1) {
memset (pDstInfo, 0, sizeof (SBufferInfo));
}
pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
@@ -856,7 +868,7 @@
OutputStatisticsLog (*pDecContext->pDecoderStatistics);
- if (pDecContext->pThreadCtx != NULL) {
+ if (GetThreadCount (pDecContext) >= 1) {
WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
RESET_EVENT (&m_sBufferingEvent);
BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
@@ -882,7 +894,7 @@
iEnd = WelsTime();
pDecContext->dDecTime += (iEnd - iStart) / 1e3;
- if (pDecContext->pThreadCtx != NULL) {
+ if (GetThreadCount (pDecContext) >= 1) {
WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
RESET_EVENT (&m_sBufferingEvent);
BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
@@ -904,7 +916,7 @@
DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst,
SBufferInfo* pDstInfo) {
bool bEndOfStreamFlag = true;
- for (int32_t j = 0; j < m_iThreadCount; ++j) {
+ for (int32_t j = 0; j < m_iCtxCount; ++j) {
if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) {
bEndOfStreamFlag = false;
}
@@ -934,9 +946,10 @@
ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
- PPicBuff pPicBuff = m_iThreadCount == 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff;
+ PPicBuff pPicBuff = m_iThreadCount <= 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff;
if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) {
- pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
}
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
m_sReoderingStatus.iMinPOC = IMinInt32;
@@ -1001,6 +1014,7 @@
if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb
&& pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts;
+
for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
if (m_sPictInfoList[i].iPOC > IMinInt32) {
m_sPictInfoList[i].bLastGOP = true;
@@ -1036,7 +1050,7 @@
m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb;
m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
- pCtx->pPicBuff->ppPic[m_sPictInfoList[i].iPicBuffIdx]->bAvailableFlag = false;
+ if (GetThreadCount (pCtx) <= 1) ++pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iRefCount;
m_sPictInfoList[i].bLastGOP = false;
pDstInfo->iBufferStatus = 0;
++m_sReoderingStatus.iNumOfPicts;
@@ -1051,6 +1065,9 @@
void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
SBufferInfo* pDstInfo) {
PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff;
+ if (pCtx == NULL && m_iThreadCount <= 1) {
+ pCtx = m_pDecThrCtx[0].pCtx;
+ }
if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) {
m_sReoderingStatus.iMinPOC = IMinInt32;
for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
@@ -1075,7 +1092,8 @@
ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
- pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
m_sReoderingStatus.iMinPOC = IMinInt32;
--m_sReoderingStatus.iNumOfPicts;
@@ -1107,7 +1125,8 @@
ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
- pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
--m_sReoderingStatus.iNumOfPicts;
}
return;
@@ -1147,7 +1166,8 @@
ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
- pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+ PPicture pPic = pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx];
+ --pPic->iRefCount;
m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
m_sReoderingStatus.iMinPOC = IMinInt32;
--m_sReoderingStatus.iNumOfPicts;
@@ -1159,7 +1179,6 @@
SBufferInfo* pDstInfo) {
DECODING_STATE iRet = dsErrorFree;
if (pDstInfo->iBufferStatus == 1) {
- ++pDecContext->uiDecodingTimeStamp;
m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83;
if (!m_bIsBaseline) {
BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
@@ -1364,7 +1383,9 @@
memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo));
ParseAccessUnit (m_pDecThrCtx[signal]);
- m_pLastDecThrCtx = &m_pDecThrCtx[signal];
+ if (m_iThreadCount > 1) {
+ m_pLastDecThrCtx = &m_pDecThrCtx[signal];
+ }
m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated);