shithub: openh264

Download patch

ref: 776954f0c08ecc28bc99eb125b2c7d0f654f420a
parent: b523d58656a06e835d53cda36cd295d0b3fff33d
parent: 4863c6602aae600f6985f3069401341bacfbc6c2
author: huili2 <[email protected]>
date: Mon Nov 4 18:20:52 EST 2019

Merge pull request #3191 from xiaotianshi2/thread_commit_4_upated

commit-4 of multi-thread decoding support.

--- a/codec/api/svc/codec_app_def.h
+++ b/codec/api/svc/codec_app_def.h
@@ -167,8 +167,8 @@
   DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
   DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
   DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
-  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
-
+  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER,  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
+  DECODER_OPTION_NUM_OF_THREADS,         ///< number of decoding threads. The maximum thread count is equal or less than lesser of (cpu core counts and 16).
 } DECODER_OPTION;
 
 /**
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -52,7 +52,6 @@
 #include "measure_time.h"
 #include "d3d9_utils.h"
 
-
 using namespace std;
 
 #if defined (WINDOWS_PHONE)
@@ -69,6 +68,106 @@
 #endif
 //using namespace WelsDec;
 
+int32_t readPicture (uint8_t* pBuf, const int32_t& iFileSize, const int32_t& bufPos, uint8_t*& pSpsBuf,
+                     int32_t& sps_byte_count) {
+  int32_t bytes_available = iFileSize - bufPos;
+  if (bytes_available < 4) {
+    return bytes_available;
+  }
+  uint8_t* ptr = pBuf + bufPos;
+  int32_t read_bytes = 0;
+  int32_t sps_count = 0;
+  int32_t pps_count = 0;
+  int32_t non_idr_pict_count = 0;
+  int32_t idr_pict_count = 0;
+  pSpsBuf = NULL;
+  sps_byte_count = 0;
+  while (read_bytes < bytes_available - 4) {
+    bool has4ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 1;
+    bool has3ByteStartCode = false;
+    if (!has4ByteStartCode) {
+      has3ByteStartCode = ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 1;
+    }
+    if (has4ByteStartCode || has3ByteStartCode) {
+      uint8_t nal_unit_type = has4ByteStartCode ? (ptr[4] & 0x1F) : (ptr[3] & 0x1F);
+      if (nal_unit_type == 1) {
+        if (++non_idr_pict_count == 1 && idr_pict_count == 1) {
+          return read_bytes;
+        }
+        if (non_idr_pict_count == 2) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 5) {
+        if (++idr_pict_count == 1 && non_idr_pict_count == 1) {
+          return read_bytes;
+        }
+        if (idr_pict_count == 2) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 7) {
+        pSpsBuf = ptr + (has4ByteStartCode ? 4 : 3);
+        if ((++sps_count == 1) && (non_idr_pict_count == 1 || idr_pict_count == 1)) {
+          return read_bytes;
+        }
+      } else if (nal_unit_type == 8) {
+        if (++pps_count == 1 && sps_count == 1) {
+          sps_byte_count = int32_t (ptr - pSpsBuf);
+        }
+      }
+      if (read_bytes >= bytes_available - 4) {
+        return bytes_available;
+      }
+      read_bytes += 4;
+      ptr += 4;
+    } else {
+      ++ptr;
+      ++read_bytes;
+    }
+  }
+  return bytes_available;
+}
+
+void FlushFrames (ISVCDecoder* pDecoder, int64_t& iTotal, FILE* pYuvFile, FILE* pOptionFile, int32_t& iFrameCount,
+                  unsigned long long& uiTimeStamp, int32_t& iWidth, int32_t& iHeight, int32_t& iLastWidth, int32_t iLastHeight) {
+  uint8_t* pData[3] = { NULL };
+  uint8_t* pDst[3] = { NULL };
+  SBufferInfo sDstBufInfo;
+  int32_t num_of_frames_in_buffer = 0;
+  CUtils cOutputModule;
+  pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+    int64_t iStart = WelsTime();
+    pData[0] = NULL;
+    pData[1] = NULL;
+    pData[2] = NULL;
+    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+    sDstBufInfo.iBufferStatus = 1;
+    pDecoder->FlushFrame (pData, &sDstBufInfo);
+    if (sDstBufInfo.iBufferStatus == 1) {
+      pDst[0] = pData[0];
+      pDst[1] = pData[1];
+      pDst[2] = pData[2];
+    }
+    int64_t iEnd = WelsTime();
+    iTotal += iEnd - iStart;
+    if (sDstBufInfo.iBufferStatus == 1) {
+      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+      iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+      if (pOptionFile != NULL) {
+        if (iWidth != iLastWidth && iHeight != iLastHeight) {
+          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+          iLastWidth = iWidth;
+          iLastHeight = iHeight;
+        }
+      }
+      ++iFrameCount;
+    }
+  }
+}
 void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
                          int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
                          int32_t iErrorConMethod,
@@ -95,15 +194,18 @@
 
   int32_t iBufPos = 0;
   int32_t iFileSize;
-  int32_t i = 0;
   int32_t iLastWidth = 0, iLastHeight = 0;
   int32_t iFrameCount = 0;
   int32_t iEndOfStreamFlag = 0;
-  int32_t num_of_frames_in_buffer = 0;
   pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
   CUtils cOutputModule;
   double dElapsed = 0;
+  uint8_t uLastSpsBuf[32];
+  int32_t iLastSpsByteCount = 0;
 
+  int32_t iThreadCount = 1;
+  pDecoder->GetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
+
   if (kpH264FileName) {
     pH264File = fopen (kpH264FileName, "rb");
     if (pH264File == NULL) {
@@ -181,13 +283,32 @@
         goto label_exit;
       iSliceSize = static_cast<int32_t> (pInfo[2]);
     } else {
-      for (i = 0; i < iFileSize; i++) {
-        if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
-             && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
-          break;
+      if (iThreadCount > 1) {
+        uint8_t* uSpsPtr = NULL;
+        int32_t iSpsByteCount = 0;
+        iSliceSize = readPicture (pBuf, iFileSize, iBufPos, uSpsPtr, iSpsByteCount);
+        if (iLastSpsByteCount > 0 && iSpsByteCount > 0) {
+          if (iSpsByteCount != iLastSpsByteCount || memcmp (uSpsPtr, uLastSpsBuf, iLastSpsByteCount) != 0) {
+            //whenever new sequence is different from preceding sequence. All pending frames must be flushed out before the new sequence can start to decode.
+            FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+                         iLastHeight);
+          }
         }
+        if (iSpsByteCount > 0 && uSpsPtr != NULL) {
+          if (iSpsByteCount > 32) iSpsByteCount = 32;
+          iLastSpsByteCount = iSpsByteCount;
+          memcpy (uLastSpsBuf, uSpsPtr, iSpsByteCount);
+        }
+      } else {
+        int i = 0;
+        for (i = 0; i < iFileSize; i++) {
+          if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
+               && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
+            break;
+          }
+        }
+        iSliceSize = i;
       }
-      iSliceSize = i;
     }
     if (iSliceSize < 4) { //too small size, no effective data, ignore
       iBufPos += iSliceSize;
@@ -283,41 +404,8 @@
     iBufPos += iSliceSize;
     ++ iSliceIndex;
   }
-
-  pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
-  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
-    iStart = WelsTime();
-    pData[0] = NULL;
-    pData[1] = NULL;
-    pData[2] = NULL;
-    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
-    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
-    sDstBufInfo.iBufferStatus = 1;
-    pDecoder->FlushFrame (pData, &sDstBufInfo);
-    if (sDstBufInfo.iBufferStatus == 1) {
-      pDst[0] = pData[0];
-      pDst[1] = pData[1];
-      pDst[2] = pData[2];
-    }
-    iEnd = WelsTime();
-    iTotal += iEnd - iStart;
-    if (sDstBufInfo.iBufferStatus == 1) {
-      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
-      iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
-      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
-
-      if (pOptionFile != NULL) {
-        if (iWidth != iLastWidth && iHeight != iLastHeight) {
-          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
-          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
-          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
-          iLastWidth = iWidth;
-          iLastHeight = iHeight;
-        }
-      }
-      ++iFrameCount;
-    }
-  }
+  FlushFrames (pDecoder, iTotal, pYuvFile, pOptionFile, iFrameCount, uiTimeStamp, iWidth, iHeight, iLastWidth,
+               iLastHeight);
   dElapsed = iTotal / 1e6;
   fprintf (stderr, "-------------------------------------------------------\n");
   fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
@@ -488,6 +576,9 @@
   if (iLevelSetting >= 0) {
     pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
   }
+
+  int32_t iThreadCount = 1;
+  pDecoder->SetOption (DECODER_OPTION_NUM_OF_THREADS, &iThreadCount);
 
   if (pDecoder->Initialize (&sDecParam)) {
     printf ("Decoder initialization failed.\n");
--- a/codec/decoder/plus/inc/welsDecoderExt.h
+++ b/codec/decoder/plus/inc/welsDecoderExt.h
@@ -109,22 +109,51 @@
   virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
   virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
 
+ public:
+  DECODING_STATE DecodeFrame2WithCtx (PWelsDecoderContext pCtx, const unsigned char* kpSrc, const int kiSrcLen,
+                                      unsigned char** ppDst, SBufferInfo* pDstInfo);
+  DECODING_STATE ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx);
+
  private:
-  PWelsDecoderContext     m_pDecContext;
   welsCodecTrace*         m_pWelsTrace;
+  uint32_t                m_uiDecodeTimeStamp;
+  bool                    m_bIsBaseline;
+  int32_t                 m_iCpuCount;
+  int32_t                 m_iThreadCount;
+  PPicBuff                m_pPicBuff;
+  bool                    m_bParamSetsLostFlag;
+  bool                    m_bFreezeOutput;
+  int32_t                 m_DecCtxActiveCount;
+  PWelsDecoderThreadCTX   m_pDecThrCtx;
+  PWelsDecoderThreadCTX   m_pLastDecThrCtx;
+  WELS_MUTEX              m_csDecoder;
+  SWelsDecEvent           m_sBufferingEvent;
+  SWelsDecEvent           m_sReleaseBufferEvent;
+  SWelsDecSemphore        m_sIsBusy;
   SPictInfo               m_sPictInfoList[16];
   SPictReoderingStatus    m_sReoderingStatus;
+  PWelsDecoderThreadCTX   m_pDecThrCtxActive[WELS_DEC_MAX_NUM_CPU];
   SVlcTable               m_sVlcTable;
   SWelsLastDecPicInfo     m_sLastDecPicInfo;
   SDecoderStatistics      m_sDecoderStatistics;// For real time debugging
 
+ private:
   int32_t InitDecoder (const SDecodingParam* pParam);
   void UninitDecoder (void);
-  int32_t ResetDecoder();
+  int32_t InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam);
+  void UninitDecoderCtx (PWelsDecoderContext& pCtx);
+  int32_t ResetDecoder (PWelsDecoderContext& pCtx);
+  int32_t ThreadResetDecoder (PWelsDecoderContext& pCtx);
 
   void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
-  DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo);
+  DECODING_STATE ReorderPicturesInDisplay (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+  int ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+                                 SBufferInfo* pDstInfo);
+  void BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
+  void ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst, SBufferInfo* pDstInfo);
 
+  void OpenDecoderThreads();
+  void CloseDecoderThreads();
 #ifdef OUTPUT_BIT_STREAM
   WelsFileHandle* m_pFBS;
   WelsFileHandle* m_pFBSSize;
--- a/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -51,6 +51,7 @@
 //#include "macros.h"
 #include "decoder.h"
 #include "decoder_core.h"
+#include "manage_dec_ref.h"
 #include "error_concealment.h"
 
 #include "measure_time.h"
@@ -67,12 +68,11 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <sys/types.h>
+#include <malloc.h>
 #else
 #include <sys/time.h>
 #endif
 
-#define _PICTURE_REORDERING_ 1
-
 namespace WelsDec {
 
 //////////////////////////////////////////////////////////////////////
@@ -88,9 +88,58 @@
 *
 *   return: none
 ***************************************************************************/
+DECLARE_PROCTHREAD (pThrProcInit, p) {
+  SWelsDecThreadInfo* sThreadInfo = (SWelsDecThreadInfo*)p;
+#if defined(WIN32)
+  _alloca (WELS_DEC_MAX_THREAD_STACK_SIZE * (sThreadInfo->uiThrNum + 1));
+#endif
+  return sThreadInfo->pThrProcMain (p);
+}
+
+static DECODING_STATE  ConstructAccessUnit (CWelsDecoder* pWelsDecoder, PWelsDecoderThreadCTX pThrCtx) {
+  int iRet = dsErrorFree;
+  //WelsMutexLock (&pWelsDecoder->m_csDecoder);
+  if (pThrCtx->pCtx->pLastThreadCtx != NULL) {
+    PWelsDecoderThreadCTX pLastThreadCtx = (PWelsDecoderThreadCTX) (pThrCtx->pCtx->pLastThreadCtx);
+    WAIT_EVENT (&pLastThreadCtx->sSliceDecodeStart, WELS_DEC_THREAD_WAIT_INFINITE);
+    RESET_EVENT (&pLastThreadCtx->sSliceDecodeStart);
+  }
+  pThrCtx->pDec = NULL;
+  RESET_EVENT (&pThrCtx->sSliceDecodeFinsh);
+  iRet |= pWelsDecoder->DecodeFrame2WithCtx (pThrCtx->pCtx, NULL, 0, pThrCtx->ppDst, &pThrCtx->sDstInfo);
+
+  //WelsMutexUnlock (&pWelsDecoder->m_csDecoder);
+  return (DECODING_STATE)iRet;
+}
+
+DECLARE_PROCTHREAD (pThrProcFrame, p) {
+  SWelsDecoderThreadCTX* pThrCtx = (SWelsDecoderThreadCTX*)p;
+  while (1) {
+    RELEASE_SEMAPHORE (pThrCtx->sThreadInfo.sIsBusy);
+    RELEASE_SEMAPHORE (&pThrCtx->sThreadInfo.sIsIdle);
+    WAIT_SEMAPHORE (&pThrCtx->sThreadInfo.sIsActivated, WELS_DEC_THREAD_WAIT_INFINITE);
+    if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_RUN) {
+      CWelsDecoder* pWelsDecoder = (CWelsDecoder*)pThrCtx->threadCtxOwner;
+      ConstructAccessUnit (pWelsDecoder, pThrCtx);
+    } else if (pThrCtx->sThreadInfo.uiCommand == WELS_DEC_THREAD_COMMAND_ABORT) {
+      break;
+    }
+  }
+  return 0;
+}
+
 CWelsDecoder::CWelsDecoder (void)
-  : m_pDecContext (NULL),
-    m_pWelsTrace (NULL) {
+  : m_pWelsTrace (NULL),
+    m_uiDecodeTimeStamp (0),
+    m_bIsBaseline (false),
+    m_iCpuCount (1),
+    m_iThreadCount (1),
+    m_pPicBuff (NULL),
+    m_bParamSetsLostFlag (false),
+    m_bFreezeOutput (false),
+    m_DecCtxActiveCount (0),
+    m_pDecThrCtx (NULL),
+    m_pLastDecThrCtx (NULL) {
 #ifdef OUTPUT_BIT_STREAM
   char chFileName[1024] = { 0 };  //for .264
   int iBufUsed = 0;
@@ -114,6 +163,15 @@
 
   ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
 
+  m_iCpuCount = GetCPUCount();
+  if (m_iCpuCount > WELS_DEC_MAX_NUM_CPU) {
+    m_iCpuCount = WELS_DEC_MAX_NUM_CPU;
+  }
+  m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
+  memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+  for (int32_t i = 0; i < WELS_DEC_MAX_NUM_CPU; ++i) {
+    m_pDecThrCtxActive[i] = NULL;
+  }
 #ifdef OUTPUT_BIT_STREAM
   SWelsTime sCurTime;
 
@@ -169,7 +227,7 @@
   if (m_pWelsTrace != NULL) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()");
   }
-
+  CloseDecoderThreads();
   UninitDecoder();
 
 #ifdef OUTPUT_BIT_STREAM
@@ -187,6 +245,10 @@
     delete m_pWelsTrace;
     m_pWelsTrace = NULL;
   }
+  if (m_pDecThrCtx != NULL) {
+    delete[] m_pDecThrCtx;
+    m_pDecThrCtx = NULL;
+  }
 }
 
 long CWelsDecoder::Initialize (const SDecodingParam* pParam) {
@@ -215,26 +277,87 @@
 }
 
 void CWelsDecoder::UninitDecoder (void) {
-  if (NULL == m_pDecContext)
-    return;
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    if (m_pDecThrCtx[i].pCtx != NULL) {
+      if (i > 0) {
+        WelsResetRefPicWithoutUnRef (m_pDecThrCtx[i].pCtx);
+      }
+      UninitDecoderCtx (m_pDecThrCtx[i].pCtx);
+    }
+  }
+}
 
-  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.",
-           VERSION_NUMBER);
+void CWelsDecoder::OpenDecoderThreads() {
+  if (m_iThreadCount > 1) {
+    m_uiDecodeTimeStamp = 0;
+    CREATE_SEMAPHORE (&m_sIsBusy, m_iThreadCount, m_iThreadCount, NULL);
+    WelsMutexInit (&m_csDecoder);
+    CREATE_EVENT (&m_sBufferingEvent, 1, 0, NULL);
+    SET_EVENT (&m_sBufferingEvent);
+    CREATE_EVENT (&m_sReleaseBufferEvent, 1, 0, NULL);
+    SET_EVENT (&m_sReleaseBufferEvent);
+    for (int32_t i = 0; i < m_iThreadCount; ++i) {
+      m_pDecThrCtx[i].sThreadInfo.uiThrMaxNum = m_iThreadCount;
+      m_pDecThrCtx[i].sThreadInfo.uiThrNum = i;
+      m_pDecThrCtx[i].sThreadInfo.uiThrStackSize = WELS_DEC_MAX_THREAD_STACK_SIZE;
+      m_pDecThrCtx[i].sThreadInfo.pThrProcMain = pThrProcFrame;
+      m_pDecThrCtx[i].sThreadInfo.sIsBusy = &m_sIsBusy;
+      m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+      m_pDecThrCtx[i].threadCtxOwner = this;
+      m_pDecThrCtx[i].kpSrc = NULL;
+      m_pDecThrCtx[i].kiSrcLen = 0;
+      m_pDecThrCtx[i].ppDst = NULL;
+      m_pDecThrCtx[i].pDec = NULL;
+      CREATE_EVENT (&m_pDecThrCtx[i].sImageReady, 1, 0, NULL);
+      CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart, 1, 0, NULL);
+      CREATE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinsh, 1, 0, NULL);
+      CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, 0, 1, NULL);
+      CREATE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated, 0, 1, NULL);
+      CREATE_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle, pThrProcInit, (void*) (& (m_pDecThrCtx[i])));
+    }
+  }
+}
+void CWelsDecoder::CloseDecoderThreads() {
+  if (m_iThreadCount > 1) {
+    for (int32_t i = 0; i < m_iThreadCount; i++) { //waiting the completion begun slices
+      WAIT_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+      m_pDecThrCtx[i].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_ABORT;
+      RELEASE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+      WAIT_THREAD (&m_pDecThrCtx[i].sThreadInfo.sThrHandle);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sImageReady);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeStart);
+      CLOSE_EVENT (&m_pDecThrCtx[i].sSliceDecodeFinsh);
+      CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsIdle);
+      CLOSE_SEMAPHORE (&m_pDecThrCtx[i].sThreadInfo.sIsActivated);
+    }
+    WelsMutexDestroy (&m_csDecoder);
+    CLOSE_EVENT (&m_sBufferingEvent);
+    CLOSE_EVENT (&m_sReleaseBufferEvent);
+    CLOSE_SEMAPHORE (&m_sIsBusy);
+  }
+}
 
-  WelsEndDecoder (m_pDecContext);
+void CWelsDecoder::UninitDecoderCtx (PWelsDecoderContext& pCtx) {
+  if (pCtx != NULL) {
 
-  if (m_pDecContext->pMemAlign != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
-             m_pDecContext->pMemAlign->WelsGetMemoryUsage());
-    delete m_pDecContext->pMemAlign;
-    m_pDecContext->pMemAlign = NULL;
-  }
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoderCtx(), openh264 codec version = %s.",
+             VERSION_NUMBER);
 
-  if (NULL != m_pDecContext) {
-    WelsFree (m_pDecContext, "m_pDecContext");
+    WelsEndDecoder (pCtx);
 
-    m_pDecContext = NULL;
+    if (pCtx->pMemAlign != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+               "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
+               pCtx->pMemAlign->WelsGetMemoryUsage());
+      delete pCtx->pMemAlign;
+      pCtx->pMemAlign = NULL;
+    }
+
+    if (NULL != pCtx) {
+      WelsFree (pCtx, "m_pDecContext");
+
+      pCtx = NULL;
+    }
   }
 }
 
@@ -244,59 +367,100 @@
   WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
            "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
            VERSION_NUMBER, (int32_t)pParam->bParseOnly);
-
+  if (m_iThreadCount > 1 && pParam->bParseOnly) {
+    m_iThreadCount = 1;
+  }
+  OpenDecoderThreads();
   //reset decoder context
   memset (&m_sDecoderStatistics, 0, sizeof (SDecoderStatistics));
   memset (&m_sLastDecPicInfo, 0, sizeof (SWelsLastDecPicInfo));
   memset (&m_sVlcTable, 0, sizeof (SVlcTable));
+  UninitDecoder();
   WelsDecoderLastDecPicInfoDefaults (m_sLastDecPicInfo);
-  if (m_pDecContext) //free
-    UninitDecoder();
-  m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
-  if (NULL == m_pDecContext)
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    InitDecoderCtx (m_pDecThrCtx[i].pCtx, pParam);
+    if (m_iThreadCount > 1) {
+      m_pDecThrCtx[i].pCtx->pThreadCtx = &m_pDecThrCtx[i];
+    }
+  }
+  m_bParamSetsLostFlag = false;
+  m_bFreezeOutput = false;
+  return cmResultSuccess;
+}
+
+// the return value of this function is not suitable, it need report failure info to upper layer.
+int32_t CWelsDecoder::InitDecoderCtx (PWelsDecoderContext& pCtx, const SDecodingParam* pParam) {
+
+  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+           "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
+           VERSION_NUMBER, (int32_t)pParam->bParseOnly);
+
+  //reset decoder context
+  UninitDecoderCtx (pCtx);
+  pCtx = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
+  if (NULL == pCtx)
     return cmMallocMemeError;
   int32_t iCacheLineSize = 16;   // on chip cache line size in byte
-  m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize);
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder())
+  pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize);
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pMemAlign), UninitDecoderCtx (pCtx))
 
   //fill in default value into context
-  m_pDecContext->pLastDecPicInfo = &m_sLastDecPicInfo;
-  m_pDecContext->pDecoderStatistics = &m_sDecoderStatistics;
-  m_pDecContext->pVlcTable = &m_sVlcTable;
-  m_pDecContext->pPictInfoList = m_sPictInfoList;
-  m_pDecContext->pPictReoderingStatus = &m_sReoderingStatus;
-  WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx);
-  WelsDecoderSpsPpsDefaults (m_pDecContext->sSpsPpsCtx);
-
+  pCtx->pLastDecPicInfo = &m_sLastDecPicInfo;
+  pCtx->pDecoderStatistics = &m_sDecoderStatistics;
+  pCtx->pVlcTable = &m_sVlcTable;
+  pCtx->pPictInfoList = m_sPictInfoList;
+  pCtx->pPictReoderingStatus = &m_sReoderingStatus;
+  pCtx->pCsDecoder = &m_csDecoder;
+  WelsDecoderDefaults (pCtx, &m_pWelsTrace->m_sLogCtx);
+  WelsDecoderSpsPpsDefaults (pCtx->sSpsPpsCtx);
   //check param and update decoder context
-  m_pDecContext->pParam = (SDecodingParam*)m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
-                          "SDecodingParam");
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder());
-  int32_t iRet = DecoderConfigParam (m_pDecContext, pParam);
+  pCtx->pParam = (SDecodingParam*)pCtx->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
+                 "SDecodingParam");
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == pCtx->pParam), UninitDecoderCtx (pCtx));
+  int32_t iRet = DecoderConfigParam (pCtx, pParam);
   WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess);
 
   //init decoder
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx),
-                              UninitDecoder())
-
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (pCtx, &m_pWelsTrace->m_sLogCtx),
+                              UninitDecoderCtx (pCtx))
+  pCtx->pPicBuff = NULL;
   return cmResultSuccess;
 }
 
-int32_t CWelsDecoder::ResetDecoder() {
+int32_t CWelsDecoder::ResetDecoder (PWelsDecoderContext& pCtx) {
   // TBC: need to be modified when context and trace point are null
-  if (m_pDecContext != NULL && m_pWelsTrace != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
-             m_pDecContext->iErrorCode);
-    SDecodingParam sPrevParam;
-    memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam));
+  if (m_iThreadCount > 1) {
+    ThreadResetDecoder (pCtx);
+  } else {
+    if (pCtx != NULL && m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
+               pCtx->iErrorCode);
+      SDecodingParam sPrevParam;
+      memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
 
-    WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder());
+      WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoderCtx (pCtx, &sPrevParam),
+                                  UninitDecoderCtx (pCtx));
+    } else if (m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
+    }
+    ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false);
+  }
+  return ERR_INFO_UNINIT;
+}
+
+int32_t CWelsDecoder::ThreadResetDecoder (PWelsDecoderContext& pCtx) {
+  // TBC: need to be modified when context and trace point are null
+  SDecodingParam sPrevParam;
+  if (pCtx != NULL && m_pWelsTrace != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d", pCtx->iErrorCode);
+    memcpy (&sPrevParam, pCtx->pParam, sizeof (SDecodingParam));
+    ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, true);
+    CloseDecoderThreads();
+    UninitDecoder();
+    InitDecoder (&sPrevParam);
   } else if (m_pWelsTrace != NULL) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
   }
-#ifdef _PICTURE_REORDERING_
-  ResetReorderingPictureBuffers (&m_sReoderingStatus, m_sPictInfoList, false);
-#endif
   return ERR_INFO_UNINIT;
 }
 
@@ -305,71 +469,95 @@
  */
 long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
   int iVal = 0;
+  if (eOptID == DECODER_OPTION_NUM_OF_THREADS) {
+    if (pOption != NULL) {
+      int32_t threadCount = * ((int32_t*)pOption);
+      if (threadCount <= 0) {
+        threadCount = 1;
+      } else if (threadCount > m_iCpuCount) {
+        threadCount = m_iCpuCount;
+      }
+      if (threadCount > 3) {
+        threadCount = 3;
+      }
+      if (threadCount != m_iThreadCount) {
+        m_iThreadCount = threadCount;
+        if (m_pDecThrCtx != NULL) {
+          delete [] m_pDecThrCtx;
+          m_pDecThrCtx = new SWelsDecoderThreadCTX[m_iThreadCount];
+          memset (m_pDecThrCtx, 0, sizeof (SWelsDecoderThreadCTX)*m_iThreadCount);
+        }
+      }
+    }
+    return cmResultSuccess;
+  }
+  for (int32_t i = 0; i < m_iThreadCount; ++i) {
+    PWelsDecoderContext pDecContext = m_pDecThrCtx[i].pCtx;
+    if (pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
+        eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
+      return dsInitialOptExpected;
+    if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
+      if (pOption == NULL)
+        return cmInitParaError;
 
-  if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
-      eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
-    return dsInitialOptExpected;
-  if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
-    if (pOption == NULL)
-      return cmInitParaError;
+      iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
 
-    iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
+      pDecContext->bEndOfStreamFlag = iVal ? true : false;
 
-    m_pDecContext->bEndOfStreamFlag = iVal ? true : false;
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
+      if (pOption == NULL)
+        return cmInitParaError;
 
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
-    if (pOption == NULL)
-      return cmInitParaError;
+      iVal = * ((int*)pOption); // int value for error concealment idc
+      iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
+      if ((pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+                 "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
+        return cmInitParaError;
+      }
 
-    iVal = * ((int*)pOption); // int value for error concealment idc
-    iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
-    if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+      pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
+      InitErrorCon (pDecContext);
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
-      return cmInitParaError;
-    }
+               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
 
-    m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
-    InitErrorCon (m_pDecContext);
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
-
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
-    if (m_pWelsTrace) {
-      uint32_t level = * ((uint32_t*)pOption);
-      m_pWelsTrace->SetTraceLevel (level);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
-    if (m_pWelsTrace) {
-      WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
-      m_pWelsTrace->SetTraceCallback (callback);
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
-               callback);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
-    if (m_pWelsTrace) {
-      void* ctx = * ((void**)pOption);
-      m_pWelsTrace->SetTraceCallbackContext (ctx);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
-    return cmInitParaError;
-  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
-    if (pOption) {
-      m_pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
       return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
+      if (m_pWelsTrace) {
+        uint32_t level = * ((uint32_t*)pOption);
+        m_pWelsTrace->SetTraceLevel (level);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
+      if (m_pWelsTrace) {
+        WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
+        m_pWelsTrace->SetTraceCallback (callback);
+        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+                 "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
+                 callback);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
+      if (m_pWelsTrace) {
+        void* ctx = * ((void**)pOption);
+        m_pWelsTrace->SetTraceCallbackContext (ctx);
+      }
+      return cmResultSuccess;
+    } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+               "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
+      return cmInitParaError;
+    } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
+      if (pOption) {
+        pDecContext->pDecoderStatistics->iStatisticsLogInterval = (* ((unsigned int*)pOption));
+        return cmResultSuccess;
+      }
+    } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+               "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
+      return cmInitParaError;
     }
-  } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
-    return cmInitParaError;
   }
   return cmInitParaError;
 }
@@ -379,8 +567,12 @@
  */
 long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) {
   int iVal = 0;
-
-  if (m_pDecContext == NULL)
+  if (DECODER_OPTION_NUM_OF_THREADS == eOptID) {
+    * ((int*)pOption) = m_iThreadCount;
+    return cmResultSuccess;
+  }
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+  if (pDecContext == NULL)
     return cmInitExpected;
 
   if (pOption == NULL)
@@ -387,63 +579,63 @@
     return cmInitParaError;
 
   if (DECODER_OPTION_END_OF_STREAM == eOptID) {
-    iVal = m_pDecContext->bEndOfStreamFlag;
+    iVal = pDecContext->bEndOfStreamFlag;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   }
 #ifdef LONG_TERM_REF
   else if (DECODER_OPTION_IDR_PIC_ID == eOptID) {
-    iVal = m_pDecContext->uiCurIdrPicId;
+    iVal = pDecContext->uiCurIdrPicId;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNum;
+    iVal = pDecContext->iFrameNum;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) {
-    iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag;
+    iVal = pDecContext->bCurAuContainLtrMarkSeFlag;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNumOfAuMarkedLtr;
+    iVal = pDecContext->iFrameNumOfAuMarkedLtr;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   }
 #endif
   else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU
-    iVal = m_pDecContext->iFeedbackVclNalInAu;
+    iVal = pDecContext->iFeedbackVclNalInAu;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID
-    iVal = m_pDecContext->iFeedbackTidInAu;
+    iVal = pDecContext->iFeedbackTidInAu;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_IS_REF_PIC == eOptID) {
-    iVal = m_pDecContext->iFeedbackNalRefIdc;
+    iVal = pDecContext->iFeedbackNalRefIdc;
     if (iVal > 0)
       iVal = 1;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) {
-    iVal = (int)m_pDecContext->pParam->eEcActiveIdc;
+    iVal = (int)pDecContext->pParam->eEcActiveIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging
     SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption));
 
-    memcpy (pDecoderStatistics, m_pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics));
+    memcpy (pDecoderStatistics, pDecContext->pDecoderStatistics, sizeof (SDecoderStatistics));
 
-    if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status
-      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount);
-      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount + m_pDecContext->pDecoderStatistics->uiFreezingIDRNum +
-           m_pDecContext->pDecoderStatistics->uiFreezingNonIDRNum);
+    if (pDecContext->pDecoderStatistics->uiDecodedFrameCount != 0) { //not original status
+      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+          (pDecContext->pDecoderStatistics->uiDecodedFrameCount);
+      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (pDecContext->dDecTime) /
+          (pDecContext->pDecoderStatistics->uiDecodedFrameCount + pDecContext->pDecoderStatistics->uiFreezingIDRNum +
+           pDecContext->pDecoderStatistics->uiFreezingNonIDRNum);
     }
     return cmResultSuccess;
   } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
     if (pOption) {
-      iVal = m_pDecContext->pDecoderStatistics->iStatisticsLogInterval;
+      iVal = pDecContext->pDecoderStatistics->iStatisticsLogInterval;
       * ((unsigned int*)pOption) = iVal;
       return cmResultSuccess;
     }
@@ -450,34 +642,34 @@
   } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI
     PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption));
     memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo));
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     } else {
-      pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth;
-      pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight;
-      pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag;
+      pVuiSarInfo->uiSarWidth = pDecContext->pSps->sVui.uiSarWidth;
+      pVuiSarInfo->uiSarHeight = pDecContext->pSps->sVui.uiSarHeight;
+      pVuiSarInfo->bOverscanAppropriateFlag = pDecContext->pSps->sVui.bOverscanAppropriateFlag;
       return cmResultSuccess;
     }
   } else if (DECODER_OPTION_PROFILE == eOptID) {
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     }
-    iVal = (int)m_pDecContext->pSps->uiProfileIdc;
+    iVal = (int)pDecContext->pSps->uiProfileIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_LEVEL == eOptID) {
-    if (!m_pDecContext->pSps) {
+    if (!pDecContext->pSps) {
       return cmInitExpected;
     }
-    iVal = (int)m_pDecContext->pSps->uiLevelIdc;
+    iVal = (int)pDecContext->pSps->uiLevelIdc;
     * ((int*)pOption) = iVal;
     return cmResultSuccess;
   } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) {
-    if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66) {
-      * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts > 0 ? m_sReoderingStatus.iNumOfPicts : 0;
-    } else {
-      * ((int*)pOption) = 0;
+    for (int32_t activeThread = 0; activeThread < m_DecCtxActiveCount; ++activeThread) {
+      WAIT_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+      RELEASE_SEMAPHORE (&m_pDecThrCtxActive[activeThread]->sThreadInfo.sIsIdle);
     }
+    * ((int*)pOption) = m_sReoderingStatus.iNumOfPicts;
     return cmResultSuccess;
   }
 
@@ -488,7 +680,17 @@
     const int kiSrcLen,
     unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  int iRet;
+  int iRet = dsErrorFree;
+  if (m_iThreadCount > 1) {
+    iRet = ThreadDecodeFrameInternal (kpSrc, kiSrcLen, ppDst, pDstInfo);
+    if (m_sReoderingStatus.iNumOfPicts) {
+      WAIT_EVENT (&m_sBufferingEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+      RESET_EVENT (&m_sReleaseBufferEvent);
+      ReleaseBufferedReadyPicture (NULL, ppDst, pDstInfo);
+      SET_EVENT (&m_sReleaseBufferEvent);
+    }
+    return (DECODING_STATE)iRet;
+  }
   //SBufferInfo sTmpBufferInfo;
   //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL};
   iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo);
@@ -506,11 +708,11 @@
   return (DECODING_STATE)iRet;
 }
 
-DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+DECODING_STATE CWelsDecoder::DecodeFrame2WithCtx (PWelsDecoderContext pDecContext, const unsigned char* kpSrc,
     const int kiSrcLen,
     unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+  if (pDecContext == NULL || pDecContext->pParam == NULL) {
     if (m_pWelsTrace != NULL) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n");
     }
@@ -517,13 +719,13 @@
     return dsInitialOptExpected;
   }
 
-  if (m_pDecContext->pParam->bParseOnly) {
+  if (pDecContext->pParam->bParseOnly) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    pDecContext->iErrorCode |= dsInvalidArgument;
     return dsInvalidArgument;
   }
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
+  if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
 
     return dsErrorFree;
@@ -539,147 +741,175 @@
       WelsFflush (m_pFBSSize);
     }
 #endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
+    pDecContext->bEndOfStreamFlag = false;
   } else {
     //For application MODE, the error detection should be added for safe.
     //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
+    pDecContext->bEndOfStreamFlag = true;
+    pDecContext->bInstantDecFlag = true;
   }
 
   int64_t iStart, iEnd;
   iStart = WelsTime();
 
-  ppDst[0] = ppDst[1] = ppDst[2] = NULL;
-  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
+  if (pDecContext->pThreadCtx == NULL) {
+    ppDst[0] = ppDst[1] = ppDst[2] = NULL;
+  }
+  pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
   unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
-  memset (pDstInfo, 0, sizeof (SBufferInfo));
+  if (pDecContext->pThreadCtx == NULL) {
+    memset (pDstInfo, 0, sizeof (SBufferInfo));
+  }
   pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
 #ifdef LONG_TERM_REF
-  m_pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
-  m_pDecContext->bCurAuContainLtrMarkSeFlag = false;
-  m_pDecContext->iFrameNumOfAuMarkedLtr = 0;
-  m_pDecContext->iFrameNum = -1; //initialize
+  pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
+  pDecContext->bCurAuContainLtrMarkSeFlag = false;
+  pDecContext->iFrameNumOfAuMarkedLtr = 0;
+  pDecContext->iFrameNum = -1; //initialize
 #endif
 
-  m_pDecContext->iFeedbackTidInAu = -1; //initialize
-  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  pDecContext->iFeedbackTidInAu = -1; //initialize
+  pDecContext->iFeedbackNalRefIdc = -1; //initialize
   if (pDstInfo) {
     pDstInfo->uiOutYuvTimeStamp = 0;
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+    pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
   } else {
-    m_pDecContext->uiTimeStamp = 0;
+    pDecContext->uiTimeStamp = 0;
   }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst,
+  WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, ppDst,
                 pDstInfo, NULL); //iErrorCode has been modified in this function
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
-  if (m_pDecContext->iErrorCode) {
+  pDecContext->bInstantDecFlag = false; //reset no-delay flag
+  if (pDecContext->iErrorCode) {
     EWelsNalUnitType eNalType =
       NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently
 
-    eNalType = m_pDecContext->sCurNalHead.eNalUnitType;
-    if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-      if (ResetDecoder()) {
+    eNalType = pDecContext->sCurNalHead.eNalUnitType;
+    if (pDecContext->iErrorCode & dsOutOfMemory) {
+      if (ResetDecoder (pDecContext)) {
         return dsOutOfMemory;
       }
       return dsErrorFree;
     }
-    if (m_pDecContext->iErrorCode & dsRefListNullPtrs) {
-      if (ResetDecoder()) {
+    if (pDecContext->iErrorCode & dsRefListNullPtrs) {
+      if (ResetDecoder (pDecContext)) {
         return dsRefListNullPtrs;
       }
       return dsErrorFree;
     }
-    if ((m_pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && m_pDecContext->eSliceType == B_SLICE) {
-      if (ResetDecoder()) {
+    if ((pDecContext->iErrorCode & (dsBitstreamError | dsDataErrorConcealed)) && pDecContext->eSliceType == B_SLICE) {
+      if (ResetDecoder (pDecContext)) {
         pDstInfo->iBufferStatus = 0;
-        return (DECODING_STATE)m_pDecContext->iErrorCode;
+        return (DECODING_STATE)pDecContext->iErrorCode;
       }
       return dsErrorFree;
     }
     //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss.
     if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) ||
-        (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) {
-      if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+        (VIDEO_BITSTREAM_AVC == pDecContext->eVideoType)) {
+      if (pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
 #ifdef LONG_TERM_REF
-        m_pDecContext->bParamSetsLostFlag = true;
+        pDecContext->bParamSetsLostFlag = true;
 #else
-        m_pDecContext->bReferenceLostAtT0Flag = true;
+        pDecContext->bReferenceLostAtT0Flag = true;
 #endif
       }
     }
 
-    if (m_pDecContext->bPrintFrameErrorTraceFlag) {
+    if (pDecContext->bPrintFrameErrorTraceFlag) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n",
-               m_pDecContext->iErrorCode);
-      m_pDecContext->bPrintFrameErrorTraceFlag = false;
+               pDecContext->iErrorCode);
+      pDecContext->bPrintFrameErrorTraceFlag = false;
     } else {
-      m_pDecContext->iIgnoredErrorInfoPacketCount++;
-      if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
+      pDecContext->iIgnoredErrorInfoPacketCount++;
+      if (pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
         WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0.");
-        m_pDecContext->iIgnoredErrorInfoPacketCount = 0;
+        pDecContext->iIgnoredErrorInfoPacketCount = 0;
       }
     }
-    if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
+    if ((pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
       //TODO after dec status updated
-      m_pDecContext->iErrorCode |= dsDataErrorConcealed;
+      pDecContext->iErrorCode |= dsDataErrorConcealed;
 
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-      if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-        m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (pDecContext->pDecoderStatistics);
+        pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
       }
-      int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum;
-      m_pDecContext->pDecoderStatistics->uiAvgEcRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->pDecoderStatistics->uiAvgEcRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
-                m_pDecContext->pDecoderStatistics->uiAvgEcRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
-                      iMbConcealedNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
-                m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio * m_pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
-                      m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
-      m_pDecContext->pDecoderStatistics->uiAvgEcRatio = m_pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->pDecoderStatistics->uiAvgEcRatio / m_pDecContext->pDecoderStatistics->uiEcFrameNum;
-      m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio = m_pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->pDecoderStatistics->uiAvgEcPropRatio / m_pDecContext->pDecoderStatistics->uiEcFrameNum;
+      int32_t iMbConcealedNum = pDecContext->iMbEcedNum + pDecContext->iMbEcedPropNum;
+      pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->iMbNum == 0 ?
+          (pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+                pDecContext->pDecoderStatistics->uiAvgEcRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+                      iMbConcealedNum * 100) / pDecContext->iMbNum));
+      pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->iMbNum == 0 ?
+          (pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) : ((
+                pDecContext->pDecoderStatistics->uiAvgEcPropRatio * pDecContext->pDecoderStatistics->uiEcFrameNum) + ((
+                      pDecContext->iMbEcedPropNum * 100) / pDecContext->iMbNum));
+      pDecContext->pDecoderStatistics->uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
+      pDecContext->pDecoderStatistics->uiAvgEcRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+          pDecContext->pDecoderStatistics->uiAvgEcRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
+      pDecContext->pDecoderStatistics->uiAvgEcPropRatio = pDecContext->pDecoderStatistics->uiEcFrameNum == 0 ? 0 :
+          pDecContext->pDecoderStatistics->uiAvgEcPropRatio / pDecContext->pDecoderStatistics->uiEcFrameNum;
     }
     iEnd = WelsTime();
-    m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+    pDecContext->dDecTime += (iEnd - iStart) / 1e3;
 
-    OutputStatisticsLog (*m_pDecContext->pDecoderStatistics);
+    OutputStatisticsLog (*pDecContext->pDecoderStatistics);
 
-#ifdef  _PICTURE_REORDERING_
-    ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+    if (pDecContext->pThreadCtx != NULL) {
+      WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+      RESET_EVENT (&m_sBufferingEvent);
+      BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+      SET_EVENT (&m_sBufferingEvent);
+    } else {
+      ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+    }
 
-    return (DECODING_STATE)m_pDecContext->iErrorCode;
+    return (DECODING_STATE)pDecContext->iErrorCode;
   }
   // else Error free, the current codec works well
 
   if (pDstInfo->iBufferStatus == 1) {
 
-    m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-    if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-      ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+      ResetDecStatNums (pDecContext->pDecoderStatistics);
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
     }
 
-    OutputStatisticsLog (*m_pDecContext->pDecoderStatistics);
+    OutputStatisticsLog (*pDecContext->pDecoderStatistics);
   }
   iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+  pDecContext->dDecTime += (iEnd - iStart) / 1e3;
 
-#ifdef  _PICTURE_REORDERING_
-  ReorderPicturesInDisplay (ppDst, pDstInfo);
-#endif
+  if (pDecContext->pThreadCtx != NULL) {
+    WAIT_EVENT (&m_sReleaseBufferEvent, WELS_DEC_THREAD_WAIT_INFINITE);
+    RESET_EVENT (&m_sBufferingEvent);
+    BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+    SET_EVENT (&m_sBufferingEvent);
+  } else {
+    ReorderPicturesInDisplay (pDecContext, ppDst, pDstInfo);
+  }
   return dsErrorFree;
 }
 
+DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+  return DecodeFrame2WithCtx (pDecContext, kpSrc, kiSrcLen, ppDst, pDstInfo);
+}
+
 DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst,
     SBufferInfo* pDstInfo) {
-  if (m_pDecContext->bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) {
+  bool bEndOfStreamFlag = true;
+  for (int32_t j = 0; j < m_iThreadCount; ++j) {
+    if (!m_pDecThrCtx[j].pCtx->bEndOfStreamFlag) {
+      bEndOfStreamFlag = false;
+    }
+  }
+  if (bEndOfStreamFlag && m_sReoderingStatus.iNumOfPicts > 0) {
     m_sReoderingStatus.iMinPOC = IMinInt32;
     for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
       if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
@@ -704,12 +934,15 @@
     ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
     ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
     m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-    if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-      m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    PPicBuff pPicBuff = m_iThreadCount == 1 ? m_pDecThrCtx[0].pCtx->pPicBuff : m_pPicBuff;
+    if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < pPicBuff->iCapacity) {
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    }
     m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
     m_sReoderingStatus.iMinPOC = IMinInt32;
     --m_sReoderingStatus.iNumOfPicts;
   }
+
   return dsErrorFree;
 }
 
@@ -758,24 +991,15 @@
   }
 }
 
-DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) {
-  DECODING_STATE iRet = dsErrorFree;
+void CWelsDecoder::BufferingReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
   if (pDstInfo->iBufferStatus == 0) {
-    return iRet;
+    return;
   }
-  ++m_pDecContext->uiDecodingTimeStamp;
-  if (m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pSps->uiProfileIdc != 83) {
-    /*if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) {
-      m_sReoderingStatus.iLastWrittenPOC = 0;
-      return dsErrorFree;
-    }
-    if (m_sReoderingStatus.iNumOfPicts == 0 && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin
-        && m_pDecContext->eSliceType != I_SLICE) {
-      m_sReoderingStatus.iLastWrittenPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
-      return dsErrorFree;
-    }*/
-    if (m_sReoderingStatus.iNumOfPicts && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb
-        && m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
+  m_bIsBaseline = pCtx->pSps->uiProfileIdc == 66 || pCtx->pSps->uiProfileIdc == 83;
+  if (!m_bIsBaseline) {
+    if (m_sReoderingStatus.iNumOfPicts && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb
+        && pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->bNewSeqBegin) {
       m_sReoderingStatus.iLastGOPRemainPicts = m_sReoderingStatus.iNumOfPicts;
       for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
         if (m_sPictInfoList[i].iPOC > IMinInt32) {
@@ -787,7 +1011,7 @@
         //This can happen when decoder moves to next GOP without being able to decoder first picture PicOrderCntLsb = 0
         bool hasGOPChanged = false;
         for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-          if (m_sPictInfoList[i].iPOC == m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
+          if (m_sPictInfoList[i].iPOC == pCtx->pSliceHeader->iPicOrderCntLsb) {
             hasGOPChanged = true;
             break;
           }
@@ -802,105 +1026,153 @@
         }
       }
     }
-    for (int32_t i = 0; i < 16; ++i) {
-      if (m_sPictInfoList[i].iPOC == IMinInt32) {
-        memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
-        m_sPictInfoList[i].pData[0] = ppDst[0];
-        m_sPictInfoList[i].pData[1] = ppDst[1];
-        m_sPictInfoList[i].pData[2] = ppDst[2];
-        m_sPictInfoList[i].iPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
-        m_sPictInfoList[i].uiDecodingTimeStamp = m_pDecContext->uiDecodingTimeStamp;
-        m_sPictInfoList[i].iPicBuffIdx = m_pDecContext->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
-        m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[i].iPicBuffIdx]->bAvailableFlag = false;
-        m_sPictInfoList[i].bLastGOP = false;
-        pDstInfo->iBufferStatus = 0;
-        ++m_sReoderingStatus.iNumOfPicts;
-        if (i > m_sReoderingStatus.iLargestBufferedPicIndex) {
-          m_sReoderingStatus.iLargestBufferedPicIndex = i;
-        }
-        break;
+  }
+  for (int32_t i = 0; i < 16; ++i) {
+    if (m_sPictInfoList[i].iPOC == IMinInt32) {
+      memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
+      m_sPictInfoList[i].pData[0] = ppDst[0];
+      m_sPictInfoList[i].pData[1] = ppDst[1];
+      m_sPictInfoList[i].pData[2] = ppDst[2];
+      m_sPictInfoList[i].iPOC = pCtx->pSliceHeader->iPicOrderCntLsb;
+      m_sPictInfoList[i].uiDecodingTimeStamp = pCtx->uiDecodingTimeStamp;
+      m_sPictInfoList[i].iPicBuffIdx = pCtx->pLastDecPicInfo->pPreviousDecodedPictureInDpb->iPicBuffIdx;
+      pCtx->pPicBuff->ppPic[m_sPictInfoList[i].iPicBuffIdx]->bAvailableFlag = false;
+      m_sPictInfoList[i].bLastGOP = false;
+      pDstInfo->iBufferStatus = 0;
+      ++m_sReoderingStatus.iNumOfPicts;
+      if (i > m_sReoderingStatus.iLargestBufferedPicIndex) {
+        m_sReoderingStatus.iLargestBufferedPicIndex = i;
       }
+      break;
     }
-    if (m_sReoderingStatus.iLastGOPRemainPicts > 0) {
-      m_sReoderingStatus.iMinPOC = IMinInt32;
-      for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-        if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
-        if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC
-            && m_sPictInfoList[i].bLastGOP) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
+  }
+}
+
+void CWelsDecoder::ReleaseBufferedReadyPicture (PWelsDecoderContext pCtx, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  PPicBuff pPicBuff = pCtx ? pCtx->pPicBuff : m_pPicBuff;
+  if (!m_bIsBaseline && m_sReoderingStatus.iLastGOPRemainPicts > 0) {
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].bLastGOP) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
       }
-      m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
+      if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC
+          && m_sPictInfoList[i].bLastGOP) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
+    }
+    m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
 #if defined (_DEBUG)
 #ifdef _MOTION_VECTOR_DUMP_
-      fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
+    fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
 #endif
 #endif
+    memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+    ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
+    ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
+    ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
+    m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+    pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+    m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    --m_sReoderingStatus.iNumOfPicts;
+    --m_sReoderingStatus.iLastGOPRemainPicts;
+    if (m_sReoderingStatus.iLastGOPRemainPicts == 0) {
+      m_sReoderingStatus.iLastWrittenPOC = IMinInt32;
+    }
+    return;
+  }
+  if (m_sReoderingStatus.iNumOfPicts && m_bIsBaseline) {
+    uint32_t uiDecodingTimeStamp = 0;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sPictInfoList[i].iPOC > IMinInt32) {
+        uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+        m_sReoderingStatus.iPictInfoIndex = i;
+        break;
+      }
+    }
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iPictInfoIndex != i && m_sPictInfoList[i].iPOC > IMinInt32
+          && m_sPictInfoList[i].sBufferInfo.uiInBsTimeStamp < uiDecodingTimeStamp) {
+        uiDecodingTimeStamp = m_sPictInfoList[i].uiDecodingTimeStamp;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
+    }
+    if (uiDecodingTimeStamp > 0) {
       memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
       ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
       ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
       ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
       m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-      if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-        m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
-      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
-      m_sReoderingStatus.iMinPOC = IMinInt32;
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
       --m_sReoderingStatus.iNumOfPicts;
-      --m_sReoderingStatus.iLastGOPRemainPicts;
-      if (m_sReoderingStatus.iLastGOPRemainPicts == 0) {
-        m_sReoderingStatus.iLastWrittenPOC = IMinInt32;
-      }
-      return iRet;
     }
-    if (m_sReoderingStatus.iNumOfPicts > 0) {
-      m_sReoderingStatus.iMinPOC = IMinInt32;
-      for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
-        if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
-        if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
-          m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
-          m_sReoderingStatus.iPictInfoIndex = i;
-        }
+    return;
+  }
+  if (m_sReoderingStatus.iNumOfPicts > 0) {
+    m_sReoderingStatus.iMinPOC = IMinInt32;
+    for (int32_t i = 0; i <= m_sReoderingStatus.iLargestBufferedPicIndex; ++i) {
+      if (m_sReoderingStatus.iMinPOC == IMinInt32 && m_sPictInfoList[i].iPOC > IMinInt32) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
       }
+      if (m_sPictInfoList[i].iPOC > IMinInt32 && m_sPictInfoList[i].iPOC < m_sReoderingStatus.iMinPOC) {
+        m_sReoderingStatus.iMinPOC = m_sPictInfoList[i].iPOC;
+        m_sReoderingStatus.iPictInfoIndex = i;
+      }
     }
-    if (m_sReoderingStatus.iMinPOC > IMinInt32) {
-      if ((m_sReoderingStatus.iLastWrittenPOC > IMinInt32
-           && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1)
-          || m_sReoderingStatus.iMinPOC < m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
-        m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
+  }
+  if (m_sReoderingStatus.iMinPOC > IMinInt32) {
+    bool isReady = false;
+    if (pCtx != NULL) {
+      isReady = (m_sReoderingStatus.iLastWrittenPOC > IMinInt32
+                 && m_sReoderingStatus.iMinPOC - m_sReoderingStatus.iLastWrittenPOC <= 1)
+                || m_sReoderingStatus.iMinPOC < pCtx->pSliceHeader->iPicOrderCntLsb;
+    } else {
+      isReady = m_sReoderingStatus.iMinPOC == 0 || (m_sReoderingStatus.iLastWrittenPOC >= 0
+                && m_sReoderingStatus.iMinPOC <= m_sReoderingStatus.iLastWrittenPOC + 2) ;
+    }
+    if (isReady) {
+      m_sReoderingStatus.iLastWrittenPOC = m_sReoderingStatus.iMinPOC;
 #if defined (_DEBUG)
 #ifdef _MOTION_VECTOR_DUMP_
-        fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
+      fprintf (stderr, "Output POC: #%d\n", m_sReoderingStatus.iLastWrittenPOC);
 #endif
 #endif
-        memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
-        ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
-        ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
-        ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
-        m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
-        if (m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx < m_pDecContext->pPicBuff->iCapacity)
-          m_pDecContext->pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
-        m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
-        m_sReoderingStatus.iMinPOC = IMinInt32;
-        --m_sReoderingStatus.iNumOfPicts;
-        return iRet;
-      }
+      memcpy (pDstInfo, &m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+      ppDst[0] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[0];
+      ppDst[1] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[1];
+      ppDst[2] = m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].pData[2];
+      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPOC = IMinInt32;
+      pPicBuff->ppPic[m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].iPicBuffIdx]->bAvailableFlag = true;
+      m_sPictInfoList[m_sReoderingStatus.iPictInfoIndex].bLastGOP = false;
+      m_sReoderingStatus.iMinPOC = IMinInt32;
+      --m_sReoderingStatus.iNumOfPicts;
     }
   }
+}
 
+DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (PWelsDecoderContext pDecContext, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  DECODING_STATE iRet = dsErrorFree;
+  if (pDstInfo->iBufferStatus == 1) {
+    ++pDecContext->uiDecodingTimeStamp;
+    m_bIsBaseline = pDecContext->pSps->uiProfileIdc == 66 || pDecContext->pSps->uiProfileIdc == 83;
+    if (!m_bIsBaseline) {
+      BufferingReadyPicture (pDecContext, ppDst, pDstInfo);
+      ReleaseBufferedReadyPicture (pDecContext, ppDst, pDstInfo);
+    }
+  }
   return iRet;
 }
 
-DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    SParserBsInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc, const int kiSrcLen, SParserBsInfo* pDstInfo) {
+  PWelsDecoderContext pDecContext = m_pDecThrCtx[0].pCtx;
+
+  if (pDecContext == NULL || pDecContext->pParam == NULL) {
     if (m_pWelsTrace != NULL) {
       WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n");
     }
@@ -907,14 +1179,14 @@
     return dsInitialOptExpected;
   }
 
-  if (!m_pDecContext->pParam->bParseOnly) {
+  if (!pDecContext->pParam->bParseOnly) {
     WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    pDecContext->iErrorCode |= dsInvalidArgument;
     return dsInvalidArgument;
   }
   int64_t iEnd, iStart = WelsTime();
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
+  if (CheckBsBuffer (pDecContext, kiSrcLen)) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
 
     return dsErrorFree;
@@ -926,58 +1198,57 @@
       WelsFflush (m_pFBS);
     }
 #endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
+    pDecContext->bEndOfStreamFlag = false;
   } else {
     //For application MODE, the error detection should be added for safe.
     //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
+    pDecContext->bEndOfStreamFlag = true;
+    pDecContext->bInstantDecFlag = true;
   }
 
-  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
-  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
-  if (!m_pDecContext->bFramePending) { //frame complete
-    m_pDecContext->pParserBsInfo->iNalNum = 0;
-    memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
+  pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
+  pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  if (!pDecContext->bFramePending) { //frame complete
+    pDecContext->pParserBsInfo->iNalNum = 0;
+    memset (pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
   }
   pDstInfo->iNalNum = 0;
   pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0;
   if (pDstInfo) {
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+    pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
     pDstInfo->uiOutBsTimeStamp = 0;
   } else {
-    m_pDecContext->uiTimeStamp = 0;
+    pDecContext->uiTimeStamp = 0;
   }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
-  if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-    if (ResetDecoder())
+  WelsDecodeBs (pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
+  if (pDecContext->iErrorCode & dsOutOfMemory) {
+    if (ResetDecoder (pDecContext))
       return dsOutOfMemory;
     return dsErrorFree;
   }
 
-  if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) {
-    memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
+  if (!pDecContext->bFramePending && pDecContext->pParserBsInfo->iNalNum) {
+    memcpy (pDstInfo, pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
 
-    if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
-      m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
-      if (m_pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (m_pDecContext->pDecoderStatistics);
-        m_pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+    if (pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
+      pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
+      if (pDecContext->pDecoderStatistics->uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (pDecContext->pDecoderStatistics);
+        pDecContext->pDecoderStatistics->uiDecodedFrameCount++;
       }
     }
   }
 
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
+  pDecContext->bInstantDecFlag = false; //reset no-delay flag
 
-  if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode);
-    m_pDecContext->bPrintFrameErrorTraceFlag = false;
+  if (pDecContext->iErrorCode && pDecContext->bPrintFrameErrorTraceFlag) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", pDecContext->iErrorCode);
+    pDecContext->bPrintFrameErrorTraceFlag = false;
   }
   iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
-  return (DECODING_STATE) m_pDecContext->iErrorCode;
+  pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+  return (DECODING_STATE)pDecContext->iErrorCode;
 }
 
 DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc,
@@ -1016,6 +1287,92 @@
     int& iColorFormat) {
   DECODING_STATE state = dsErrorFree;
 
+  return state;
+}
+
+DECODING_STATE CWelsDecoder::ParseAccessUnit (SWelsDecoderThreadCTX& sThreadCtx) {
+  sThreadCtx.pCtx->bHasNewSps = false;
+  sThreadCtx.pCtx->bParamSetsLostFlag = m_bParamSetsLostFlag;
+  sThreadCtx.pCtx->bFreezeOutput = m_bFreezeOutput;
+  sThreadCtx.pCtx->uiDecodingTimeStamp = ++m_uiDecodeTimeStamp;
+  bool bPicBuffChanged = false;
+  if (m_pLastDecThrCtx != NULL && sThreadCtx.pCtx->sSpsPpsCtx.iSeqId < m_pLastDecThrCtx->pCtx->sSpsPpsCtx.iSeqId) {
+    CopySpsPps (m_pLastDecThrCtx->pCtx, sThreadCtx.pCtx);
+    sThreadCtx.pCtx->iPicQueueNumber = m_pLastDecThrCtx->pCtx->iPicQueueNumber;
+    if (sThreadCtx.pCtx->pPicBuff != m_pPicBuff) {
+      bPicBuffChanged = true;
+      sThreadCtx.pCtx->pPicBuff = m_pPicBuff;
+      sThreadCtx.pCtx->bHaveGotMemory = m_pPicBuff != NULL;
+      sThreadCtx.pCtx->iImgWidthInPixel = m_pLastDecThrCtx->pCtx->iImgWidthInPixel;
+      sThreadCtx.pCtx->iImgHeightInPixel = m_pLastDecThrCtx->pCtx->iImgHeightInPixel;
+    }
+  }
+  int32_t iRet = DecodeFrame2WithCtx (sThreadCtx.pCtx, sThreadCtx.kpSrc, sThreadCtx.kiSrcLen, sThreadCtx.ppDst,
+                                      &sThreadCtx.sDstInfo);
+
+  int32_t iErr = InitConstructAccessUnit (sThreadCtx.pCtx, &sThreadCtx.sDstInfo);
+  if (ERR_NONE != iErr) {
+    return (DECODING_STATE) (iRet | iErr);
+  }
+  if (sThreadCtx.pCtx->bNewSeqBegin) {
+    m_pPicBuff = sThreadCtx.pCtx->pPicBuff;
+  } else if (bPicBuffChanged) {
+    InitialDqLayersContext (sThreadCtx.pCtx, sThreadCtx.pCtx->pSps->iMbWidth << 4, sThreadCtx.pCtx->pSps->iMbHeight << 4);
+  }
+  m_bParamSetsLostFlag = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bParamSetsLostFlag;
+  m_bFreezeOutput = sThreadCtx.pCtx->bNewSeqBegin ? false : sThreadCtx.pCtx->bFreezeOutput;
+  return (DECODING_STATE)iErr;
+}
+/*
+* Run decoding picture in separate thread.
+*/
+
+int CWelsDecoder::ThreadDecodeFrameInternal (const unsigned char* kpSrc, const int kiSrcLen, unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  int state = dsErrorFree;
+  int32_t i, j;
+  int32_t signal = 0;
+
+  //serial using of threads
+  if (m_DecCtxActiveCount < m_iThreadCount) {
+    signal = m_DecCtxActiveCount;
+  } else {
+    signal = m_pDecThrCtxActive[0]->sThreadInfo.uiThrNum;
+  }
+
+  WAIT_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+
+  for (i = 0; i < m_DecCtxActiveCount; ++i) {
+    if (m_pDecThrCtxActive[i] == &m_pDecThrCtx[signal]) {
+      m_pDecThrCtxActive[i] = NULL;
+      for (j = i; j < m_DecCtxActiveCount - 1; j++) {
+        m_pDecThrCtxActive[j] = m_pDecThrCtxActive[j + 1];
+        m_pDecThrCtxActive[j + 1] = NULL;
+      }
+      --m_DecCtxActiveCount;
+      break;
+    }
+  }
+
+  m_pDecThrCtxActive[m_DecCtxActiveCount++] = &m_pDecThrCtx[signal];
+  if (m_pLastDecThrCtx != NULL) {
+    m_pDecThrCtx[signal].pCtx->pLastThreadCtx = m_pLastDecThrCtx;
+  }
+  m_pDecThrCtx[signal].kpSrc = const_cast<uint8_t*> (kpSrc);
+  m_pDecThrCtx[signal].kiSrcLen = kiSrcLen;
+  m_pDecThrCtx[signal].ppDst = ppDst;
+  memcpy (&m_pDecThrCtx[signal].sDstInfo, pDstInfo, sizeof (SBufferInfo));
+
+  ParseAccessUnit (m_pDecThrCtx[signal]);
+  m_pLastDecThrCtx = &m_pDecThrCtx[signal];
+  m_pDecThrCtx[signal].sThreadInfo.uiCommand = WELS_DEC_THREAD_COMMAND_RUN;
+  RELEASE_SEMAPHORE (&m_pDecThrCtx[signal].sThreadInfo.sIsActivated);
+
+  // wait early picture
+  if (m_DecCtxActiveCount >= m_iThreadCount) {
+    WAIT_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle, WELS_DEC_THREAD_WAIT_INFINITE);
+    RELEASE_SEMAPHORE (&m_pDecThrCtxActive[0]->sThreadInfo.sIsIdle);
+  }
   return state;
 }