shithub: openh264

Download patch

ref: 938f4417514a5b3fe8480b8d65bee1bcc08d5e14
parent: c7b50517386d21e3d4906c36e020602f3c253e53
parent: cf8574575201ffbedc00374aa138fff17e859f4a
author: sijchen <[email protected]>
date: Mon Nov 3 10:38:24 EST 2014

Merge pull request #1480 from dongzha/cleanCabacDecoder

add decoder cabac support and add  UT

--- a/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj
@@ -30,6 +30,8 @@
 		4CE4469F18BC5EAB0017DF25 /* welsDecoderExt.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4468518BC5EAB0017DF25 /* welsDecoderExt.cpp */; };
 		4CE447AC18BC6BE90017DF25 /* block_add_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CE447A718BC6BE90017DF25 /* block_add_neon.S */; };
 		4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 4CE447A918BC6BE90017DF25 /* intra_pred_neon.S */; };
+		6A3E814219D79AE900C19C1F /* cabac_decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6A3E814119D79AE900C19C1F /* cabac_decoder.cpp */; };
+		6A3E814419D7A40600C19C1F /* parse_mb_syn_cabac.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6A3E814319D7A40600C19C1F /* parse_mb_syn_cabac.cpp */; };
 		6C749B6A197CC6E600A111F9 /* block_add_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B69197CC6E600A111F9 /* block_add_aarch64_neon.S */; };
 		9ABF4382193EB60900A6BD61 /* expand_pic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9ABF4381193EB60900A6BD61 /* expand_pic.cpp */; };
 		9AED66561946A1DE009A3567 /* welsCodecTrace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 9AED66551946A1DE009A3567 /* welsCodecTrace.cpp */; };
@@ -109,6 +111,10 @@
 		4CE4468518BC5EAB0017DF25 /* welsDecoderExt.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = welsDecoderExt.cpp; sourceTree = "<group>"; };
 		4CE447A718BC6BE90017DF25 /* block_add_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = block_add_neon.S; sourceTree = "<group>"; };
 		4CE447A918BC6BE90017DF25 /* intra_pred_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; path = intra_pred_neon.S; sourceTree = "<group>"; };
+		6A3E814019D79AD900C19C1F /* cabac_decoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cabac_decoder.h; sourceTree = "<group>"; };
+		6A3E814119D79AE900C19C1F /* cabac_decoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = cabac_decoder.cpp; sourceTree = "<group>"; };
+		6A3E814319D7A40600C19C1F /* parse_mb_syn_cabac.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_mb_syn_cabac.cpp; sourceTree = "<group>"; };
+		6A3E814519D7A40D00C19C1F /* parse_mb_syn_cabac.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_mb_syn_cabac.h; sourceTree = "<group>"; };
 		6C749B69197CC6E600A111F9 /* block_add_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = block_add_aarch64_neon.S; path = arm64/block_add_aarch64_neon.S; sourceTree = "<group>"; };
 		9ABF4380193EB5F700A6BD61 /* expand_pic.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = expand_pic.h; path = ../../../common/inc/expand_pic.h; sourceTree = "<group>"; };
 		9ABF4381193EB60900A6BD61 /* expand_pic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = expand_pic.cpp; path = ../../../common/src/expand_pic.cpp; sourceTree = "<group>"; };
@@ -191,6 +197,8 @@
 		4CE4464418BC5EAA0017DF25 /* inc */ = {
 			isa = PBXGroup;
 			children = (
+				6A3E814519D7A40D00C19C1F /* parse_mb_syn_cabac.h */,
+				6A3E814019D79AD900C19C1F /* cabac_decoder.h */,
 				9AED665A1946A21D009A3567 /* utils.h */,
 				9ABF4380193EB5F700A6BD61 /* expand_pic.h */,
 				F0B204FA18FD23CF005DA23F /* error_concealment.h */,
@@ -232,6 +240,8 @@
 		4CE4466618BC5EAA0017DF25 /* src */ = {
 			isa = PBXGroup;
 			children = (
+				6A3E814319D7A40600C19C1F /* parse_mb_syn_cabac.cpp */,
+				6A3E814119D79AE900C19C1F /* cabac_decoder.cpp */,
 				9AED66581946A203009A3567 /* utils.cpp */,
 				9ABF4381193EB60900A6BD61 /* expand_pic.cpp */,
 				F0B204FB18FD23D8005DA23F /* error_concealment.cpp */,
@@ -346,6 +356,7 @@
 			isa = PBXSourcesBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				6A3E814419D7A40600C19C1F /* parse_mb_syn_cabac.cpp in Sources */,
 				4CE4469B18BC5EAB0017DF25 /* pic_queue.cpp in Sources */,
 				4CE4469F18BC5EAB0017DF25 /* welsDecoderExt.cpp in Sources */,
 				4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */,
@@ -355,6 +366,7 @@
 				4CE4469518BC5EAB0017DF25 /* manage_dec_ref.cpp in Sources */,
 				4CE4468A18BC5EAB0017DF25 /* au_parser.cpp in Sources */,
 				4CE4469918BC5EAB0017DF25 /* mv_pred.cpp in Sources */,
+				6A3E814219D79AE900C19C1F /* cabac_decoder.cpp in Sources */,
 				4CE447AC18BC6BE90017DF25 /* block_add_neon.S in Sources */,
 				6C749B6A197CC6E600A111F9 /* block_add_aarch64_neon.S in Sources */,
 				4CE4469418BC5EAB0017DF25 /* get_intra_predictor.cpp in Sources */,
--- a/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/codec/build/win32/dec/WelsDecCore.vcproj
@@ -656,6 +656,10 @@
 					>
 				</File>
 				<File
+					RelativePath="..\..\..\decoder\core\inc\cabac_decoder.h"
+					>
+				</File>
+				<File
 					RelativePath="..\..\..\common\inc\copy_mb.h"
 					>
 				</File>
@@ -776,6 +780,10 @@
 					>
 				</File>
 				<File
+					RelativePath="..\..\..\decoder\core\inc\parse_mb_syn_cabac.h"
+					>
+				</File>
+				<File
 					RelativePath="..\..\..\decoder\core\inc\parse_mb_syn_cavlc.h"
 					>
 				</File>
@@ -833,6 +841,10 @@
 					>
 				</File>
 				<File
+					RelativePath="..\..\..\decoder\core\src\cabac_decoder.cpp"
+					>
+				</File>
+				<File
 					RelativePath="..\..\..\common\src\common_tables.cpp"
 					>
 				</File>
@@ -910,6 +922,10 @@
 				</File>
 				<File
 					RelativePath="..\..\..\decoder\core\src\mv_pred.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\..\..\decoder\core\src\parse_mb_syn_cabac.cpp"
 					>
 				</File>
 				<File
--- a/codec/decoder/core/inc/bit_stream.h
+++ b/codec/decoder/core/inc/bit_stream.h
@@ -35,7 +35,6 @@
 #define WELS_BIT_STREAM_H__
 
 #include "typedefs.h"
-
 namespace WelsDec {
 
 /*
@@ -64,7 +63,7 @@
  */
 int32_t InitBits (PBitStringAux pBitString, const uint8_t* kpBuf, const int32_t kiSize);
 
-void InitReadBits (PBitStringAux pBitString);
+int32_t InitReadBits (PBitStringAux pBitString, intX_t iEndOffset);
 
 
 
--- /dev/null
+++ b/codec/decoder/core/inc/cabac_decoder.h
@@ -1,0 +1,111 @@
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ * \file	cabac_decoder.h
+ *
+ * \brief	Interfaces introduced for cabac decoder
+ *
+ * \date	10/10/2014 Created
+ *
+ *************************************************************************************
+ */
+#ifndef WELS_CABAC_DECODER_H__
+#define WELS_CABAC_DECODER_H__
+
+#include "decoder_context.h"
+#include "error_code.h"
+#include "wels_common_defs.h"
+namespace WelsDec {
+static const uint8_t g_kRenormTable256[256] = {
+  6, 6, 6, 6, 6, 6, 6, 6,
+  5, 5, 5, 5, 5, 5, 5, 5,
+  4, 4, 4, 4, 4, 4, 4, 4,
+  4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3,
+  3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1
+};
+
+
+//1. CABAC context initialization
+void WelsCabacGlobalInit(PWelsDecoderContext pCabacCtx);
+void WelsCabacContextInit (PWelsDecoderContext  pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp);
+
+//2. decoding Engine initialization
+int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux);
+void RestoreCabacDecEngineToBS (PWelsCabacDecEngine pDecEngine, SBitStringAux* pBsAux);
+//3. actual decoding
+int32_t Read32BitsCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiValue, int32_t& iNumBitsRead);
+int32_t DecodeBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBit);
+int32_t DecodeBypassCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal);
+int32_t  DecodeTerminateCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal);
+
+//4. unary parsing
+int32_t DecodeUnaryBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, int32_t iCtxOffset,
+                             uint32_t& uiSymVal);
+
+//5. EXGk parsing
+int32_t DecodeExpBypassCabac (PWelsCabacDecEngine pDecEngine, int32_t iCount, uint32_t& uiSymVal);
+uint32_t DecodeUEGLevelCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBinVal);
+int32_t DecodeUEGMvCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t iMaxC,  uint32_t& uiCode);
+
+#define WELS_CABAC_HALF    0x01FE
+#define WELS_CABAC_QUARTER 0x0100
+#define WELS_CABAC_FALSE_RETURN(iErrorInfo) \
+if(iErrorInfo) { \
+  return iErrorInfo; \
+}
+}
+#endif
--- a/codec/decoder/core/inc/dec_frame.h
+++ b/codec/decoder/core/inc/dec_frame.h
@@ -66,10 +66,12 @@
   int8_t*  pMbType;
   int32_t* pSliceIdc;				// using int32_t for slice_idc
   int16_t	(*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
+  int16_t	(*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
   int8_t	(*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
   int8_t*  pLumaQp;
   int8_t*  pChromaQp;
   int8_t*  pCbp;
+  uint8_t *pCbfDc;
   int8_t (*pNzc)[24];
   int8_t (*pNzcRs)[24];
   int8_t*  pResidualPredFlag;
--- a/codec/decoder/core/inc/dec_golomb.h
+++ b/codec/decoder/core/inc/dec_golomb.h
@@ -219,7 +219,7 @@
 static inline int32_t BsGetTrailingBits (uint8_t* pBuf) {
 // TODO
   uint32_t uiValue = *pBuf;
-  int32_t iRetNum = 1;
+  int32_t iRetNum = 0;
 
   do {
     if (uiValue & 1)
--- a/codec/decoder/core/inc/decode_slice.h
+++ b/codec/decoder/core/inc/decode_slice.h
@@ -38,12 +38,17 @@
 namespace WelsDec {
 
 int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
-int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
+int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
 
 int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx);
-int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
-typedef int32_t (*PWelsDecMbCavlcFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur);
+int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
 
+int32_t WelsDecodeMbCabacISlice(PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacPSlice(PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacISliceBaseMode0(PWelsDecoderContext pCtx, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacPSliceBaseMode0(PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
+
 int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice
 
 int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur);
@@ -77,6 +82,8 @@
 void SetNonZeroCount_c (int8_t* pNonZeroCount);
 
 void WelsBlockFuncInit (SBlockFunc* pFunc,  int32_t iCpu);
+void WelsBlockZero16x16_c(int16_t * block, int32_t stride);
+void WelsBlockZero8x8_c(int16_t * block, int32_t stride);
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -43,6 +43,7 @@
 #include "utils.h"
 #include "wels_const.h"
 #include "wels_common_basis.h"
+#include "wels_common_defs.h"
 #include "codec_app_def.h"
 #include "parameter_sets.h"
 #include "nalu.h"
@@ -56,7 +57,43 @@
 #include "expand_pic.h"
 
 namespace WelsDec {
+#define MAX_PRED_MODE_ID_I16x16  3
+#define MAX_PRED_MODE_ID_CHROMA  3
+#define MAX_PRED_MODE_ID_I4x4    8
+#define  WELS_QP_MAX    51
 
+typedef struct SWels_Cabac_Element {
+  uint8_t uiState;
+  uint8_t uiMPS;
+}SWelsCabacCtx, *PWelsCabacCtx;
+
+typedef struct
+{
+  uint64_t uiRange;
+  uint64_t uiOffset;
+  int32_t iBitsLeft;
+  uint8_t *pBuffStart;
+  uint8_t *pBuffCurr;
+  uint8_t *pBuffEnd;
+} SWelsCabacDecEngine, *PWelsCabacDecEngine;
+
+#define NEW_CTX_OFFSET_MB_TYPE_I 3
+#define NEW_CTX_OFFSET_SKIP 11
+#define NEW_CTX_OFFSET_SUBMB_TYPE 21
+#define NEW_CTX_OFFSET_MVD 40
+#define NEW_CTX_OFFSET_REF_NO 54
+#define NEW_CTX_OFFSET_DELTA_QP 60
+#define NEW_CTX_OFFSET_IPR 68
+#define NEW_CTX_OFFSET_CIPR 64
+#define NEW_CTX_OFFSET_CBP 73
+#define NEW_CTX_OFFSET_CBF 85
+#define NEW_CTX_OFFSET_MAP 105
+#define NEW_CTX_OFFSET_LAST 166
+#define NEW_CTX_OFFSET_ONE 227
+#define NEW_CTX_OFFSET_ABS 232
+#define CTX_NUM_MVD 7
+#define CTX_NUM_CBP 4
+
 typedef struct TagDataBuffer {
 uint8_t* pHead;
 uint8_t* pEnd;
@@ -141,16 +178,20 @@
 } SDeblockingFunc, *PDeblockingFunc;
 
 typedef void (*PWelsNonZeroCountFunc) (int8_t* pNonZeroCount);
-
+typedef void (*PWelsBlockZeroFunc) (int16_t* block,int32_t stride);
 typedef  struct  TagBlockFunc {
 PWelsNonZeroCountFunc		pWelsSetNonZeroCountFunc;
+PWelsBlockZeroFunc			pWelsBlockZero16x16Func;
+PWelsBlockZeroFunc			pWelsBlockZero8x8Func;
 } SBlockFunc;
 
-typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+typedef void (*PWelsFillNeighborMbInfoIntra4x4Func) (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
     int8_t* pIntraPredMode, PDqLayer pCurLayer);
-typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
+typedef void (*PWelsMapNeighToSample) (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail);
+typedef void (*PWelsMap16NeighToSample) (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail);
+typedef int32_t (*PWelsParseIntra4x4ModeFunc) (PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
     PDqLayer pCurDqLayer);
-typedef int32_t (*PWelsParseIntra16x16ModeFunc) (PNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer);
+typedef int32_t (*PWelsParseIntra16x16ModeFunc) (PWelsNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer);
 
 enum {
 OVERWRITE_NONE = 0,
@@ -202,6 +243,8 @@
   int8_t	(*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM];
   int8_t*	pLumaQp[LAYER_NUM_EXCHANGEABLE];	/*mb luma_qp*/
   int8_t*	pChromaQp[LAYER_NUM_EXCHANGEABLE];					/*mb chroma_qp*/
+  int16_t	(*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE   MB_BLOCK4x4_NUM*]
+  uint8_t *pCbfDc[LAYER_NUM_EXCHANGEABLE];
   int8_t	(*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
   int8_t	(*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
   int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/
@@ -308,8 +351,8 @@
 int32_t iCurSeqIntervalMaxPicHeight;
 
 PWelsFillNeighborMbInfoIntra4x4Func  pFillInfoCacheIntra4x4Func;
-PWelsParseIntra4x4ModeFunc           pParseIntra4x4ModeFunc;
-PWelsParseIntra16x16ModeFunc         pParseIntra16x16ModeFunc;
+PWelsMapNeighToSample pMap4x4NeighToSampleFunc;
+PWelsMap16NeighToSample pMap16x16NeighToSampleFunc;
 
 //feedback whether or not have VCL in current AU, and the temporal ID
 int32_t iFeedbackVclNalInAu;
@@ -325,7 +368,10 @@
 //Save the last nal header info
 SNalUnitHeaderExt sLastNalHdrExt;
 SSliceHeader      sLastSliceHeader;
-
+SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT];
+bool bCabacInited;
+SWelsCabacCtx   pCabacCtx[WELS_CONTEXT_COUNT];
+PWelsCabacDecEngine   pCabacDecEngine;
 } SWelsDecoderContext, *PWelsDecoderContext;
 
 static inline void ResetActiveSPSForEachLayer (PWelsDecoderContext pCtx) {
@@ -336,6 +382,7 @@
 //#ifdef __cplusplus
 //}
 //#endif//__cplusplus
+
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/error_code.h
+++ b/codec/decoder/core/inc/error_code.h
@@ -180,7 +180,9 @@
 ERR_INFO_INVALID_MMCO_SHOART2LONG,
 ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW,
 ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH,
-ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX
+ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX,
+//for CABAC
+ERR_CABAC_NO_BS_TO_READ,
 };
 //-----------------------------------------------------------------------------------------------------------
 
--- a/codec/decoder/core/inc/mb_cache.h
+++ b/codec/decoder/core/inc/mb_cache.h
@@ -69,7 +69,11 @@
 int32_t iTopType;
 int32_t iLeftTopType;
 int32_t iRightTopType;
-} SNeighAvail, *PNeighAvail;
+
+int8_t  iTopCbp;
+int8_t  iLeftCbp;
+int8_t iDummy[2]; //for align
+} SWelsNeighAvail, *PWelsNeighAvail;
 
 } // namespace WelsDec
 
--- /dev/null
+++ b/codec/decoder/core/inc/parse_mb_syn_cabac.h
@@ -1,0 +1,72 @@
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ * \file	parse_mb_syn_cabac.h
+ *
+ * \brief	cabac parse for syntax elements
+ *
+ * \date	10/10/2014 Created
+ *
+ *************************************************************************************
+ */
+#ifndef WELS_PARSE_MB_SYN_CABAC_H__
+#define WELS_PARSE_MB_SYN_CABAC_H__
+
+#include "decoder_context.h"
+#include "cabac_decoder.h"
+namespace WelsDec {
+int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal);
+int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip);
+int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
+int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
+int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType);
+int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal);
+int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal);
+int32_t ParseInterMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                   int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]);
+int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc,
+                          int8_t ref_idx[LIST_A][30],
+                          int32_t iListIdx, int32_t index, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal);
+int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30],
+                           int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal);
+int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
+int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta);
+int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t index, int32_t iResProperty,
+                           PWelsDecoderContext pCtx, uint32_t& uiCbpBit);
+int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
+                                  uint32_t& uiBinVal);
+int32_t ParseSignificantCoeffCabac (int32_t* significant, int32_t iResProperty, PWelsDecoderContext pCtx);
+int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+                                 int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
+                                 PWelsDecoderContext pCtx);
+int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx);
+}
+//#pragma pack()
+#endif
--- a/codec/decoder/core/inc/parse_mb_syn_cavlc.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cavlc.h
@@ -49,101 +49,19 @@
 
 namespace WelsDec {
 
-#define I16_LUMA_DC  1
-#define I16_LUMA_AC  2
-#define LUMA_DC_AC   3
-#define CHROMA_DC    4
-#define CHROMA_AC    5
 
-typedef struct TagReadBitsCache {
-uint32_t uiCache32Bit;
-uint8_t  uiRemainBits;
-uint8_t*  pBuf;
-} SReadBitsCache;
 
-#define SHIFT_BUFFER(pBitsCache)	{	pBitsCache->pBuf+=2; pBitsCache->uiRemainBits += 16; pBitsCache->uiCache32Bit |= (((pBitsCache->pBuf[2] << 8) | pBitsCache->pBuf[3]) << (32 - pBitsCache->uiRemainBits));	}
-#define POP_BUFFER(pBitsCache, iCount)	{ pBitsCache->uiCache32Bit <<= iCount;	pBitsCache->uiRemainBits -= iCount;	}
-
-static const uint8_t g_kuiZigzagScan[16] = { //4*4block residual zig-zag scan order
-0,  1,  4,  8,
-5,  2,  3,  6,
-9, 12, 13, 10,
-7, 11, 14, 15,
-};
-
-
-typedef struct TagI16PredInfo {
-int8_t iPredMode;
-int8_t iLeftAvail;
-int8_t iTopAvail;
-int8_t iLeftTopAvail;
-} SI16PredInfo;
-static const SI16PredInfo g_ksI16PredInfo[4] = {
-{I16_PRED_V, 0, 1, 0},
-{I16_PRED_H, 1, 0, 0},
-{         0, 0, 0, 0},
-{I16_PRED_P, 1, 1, 1},
-};
-
-static const SI16PredInfo g_ksChromaPredInfo[4] = {
-{       0, 0, 0, 0},
-{C_PRED_H, 1, 0, 0},
-{C_PRED_V, 0, 1, 0},
-{C_PRED_P, 1, 1, 1},
-};
-
-
-typedef struct TagI4PredInfo {
-int8_t iPredMode;
-int8_t iLeftAvail;
-int8_t iTopAvail;
-int8_t iLeftTopAvail;
-//	int8_t right_top_avail; //when right_top unavailable but top avail, we can pad the right-top with the rightmost pixel of top
-} SI4PredInfo;
-static const SI4PredInfo g_ksI4PredInfo[9] = {
-{  I4_PRED_V, 0, 1, 0},
-{  I4_PRED_H, 1, 0, 0},
-{          0, 0, 0, 0},
-{I4_PRED_DDL, 0, 1, 0},
-{I4_PRED_DDR, 1, 1, 1},
-{ I4_PRED_VR, 1, 1, 1},
-{ I4_PRED_HD, 1, 1, 1},
-{ I4_PRED_VL, 0, 1, 0},
-{ I4_PRED_HU, 1, 0, 0},
-};
-
-static const uint8_t g_kuiI16CbpTable[6] = {0, 16, 32, 15, 31, 47}; //reference to JM
-
-
-typedef struct TagPartMbInfo {
-MbType iType;
-int8_t iPartCount; //P_16*16, P_16*8, P_8*16, P_8*8 based on 8*8 block; P_8*4, P_4*8, P_4*4 based on 4*4 block
-int8_t iPartWidth; //based on 4*4 block
-} SPartMbInfo;
-static const SPartMbInfo g_ksInterMbTypeInfo[5] = {
-{MB_TYPE_16x16,    1, 4},
-{MB_TYPE_16x8,     2, 4},
-{MB_TYPE_8x16,     2, 2},
-{MB_TYPE_8x8,      4, 4},
-{MB_TYPE_8x8_REF0, 4, 4}, //ref0--ref_idx not present in bit-stream and default as 0
-};
-static const SPartMbInfo g_ksInterSubMbTypeInfo[4] = {
-{SUB_MB_TYPE_8x8, 1, 2},
-{SUB_MB_TYPE_8x4, 2, 2},
-{SUB_MB_TYPE_4x8, 2, 1},
-{SUB_MB_TYPE_4x4, 4, 1},
-};
-
-void GetNeighborAvailMbType (PNeighAvail pNeighAvail, PDqLayer pCurLayer);
-void WelsFillCacheNonZeroCount (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
-void WelsFillCacheConstrain0Intra4x4 (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer);
+void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, PDqLayer pCurLayer);
+void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
                                       PDqLayer pCurLayer);
-void WelsFillCacheConstrain1Intra4x4 (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
                                       PDqLayer pCurLayer);
-void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                         int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
+void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
                          int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
 
-
 /*!
  * \brief   check iPredMode for intra16x16 eligible or not
  * \param 	input : current iPredMode
@@ -190,19 +108,7 @@
                                 PWelsDecoderContext pCtx);
 
 /*!
- * \brief   parsing intra mode
- * \param 	input : current mb, bit-stream
- * \param 	output: 0 indicating decoding correctly; -1 means error
- */
-int32_t ParseIntra4x4ModeConstrain0 (PNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
-                                     PDqLayer pCurDqLayer);
-int32_t ParseIntra4x4ModeConstrain1 (PNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
-                                     PDqLayer pCurDqLayer);
-int32_t ParseIntra16x16ModeConstrain0 (PNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer);
-int32_t ParseIntra16x16ModeConstrain1 (PNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer);
-
-/*!
- * \brief   parsing inter info (including ref_index and mvd)
+ * \brief   parsing inter info (including ref_index and pMvd)
  * \param 	input : decoding context, current mb, bit-stream
  * \param 	output: 0 indicating decoding correctly; -1 means error
  */
--- a/codec/decoder/core/inc/slice.h
+++ b/codec/decoder/core/inc/slice.h
@@ -197,6 +197,7 @@
 /*from lower layer: slice header*/
 uint8_t		eSliceType;
 uint8_t		uiPadding[2];
+int32_t     iLastDeltaQp;
 } SSlice, *PSlice;
 
 } // namespace WelsDec
--- a/codec/decoder/core/inc/wels_common_basis.h
+++ b/codec/decoder/core/inc/wels_common_basis.h
@@ -47,6 +47,11 @@
 extern const uint8_t g_kuiScan8[24];
 extern const uint8_t g_kuiLumaDcZigzagScan[16];
 extern const uint8_t g_kuiChromaDcScan[4];
+extern const uint8_t g_kMbNonZeroCountIdx[24];
+extern const uint8_t g_kCacheNzcScanIdx[4*4+4+4+3];
+extern const uint8_t g_kCache26ScanIdx[16];
+extern const uint8_t g_kCache30ScanIdx[16];
+extern const uint8_t g_kNonZeroScanIdxC[4];
 /* Profile IDC */
 typedef uint8_t		ProfileIdc;
 enum {
@@ -118,7 +123,94 @@
 #define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
 #define IS_SUB8x8(type) (MB_TYPE_8x8 == (type) || MB_TYPE_8x8_REF0 == (type))
 
+#define I16_LUMA_DC  1
+#define I16_LUMA_AC  2
+#define LUMA_DC_AC   3
+#define CHROMA_DC    4
+#define CHROMA_AC    5
+#define CHROMA_DC_U  6
+#define CHROMA_DC_V  7
+#define CHROMA_AC_U  8
+#define CHROMA_AC_V  9
 
+typedef struct TagReadBitsCache {
+    uint32_t uiCache32Bit;
+    uint8_t  uiRemainBits;
+    uint8_t*  pBuf;
+} SReadBitsCache;
+
+#define SHIFT_BUFFER(pBitsCache)	{	pBitsCache->pBuf+=2; pBitsCache->uiRemainBits += 16; pBitsCache->uiCache32Bit |= (((pBitsCache->pBuf[2] << 8) | pBitsCache->pBuf[3]) << (32 - pBitsCache->uiRemainBits));	}
+#define POP_BUFFER(pBitsCache, iCount)	{ pBitsCache->uiCache32Bit <<= iCount;	pBitsCache->uiRemainBits -= iCount;	}
+
+static const uint8_t g_kuiZigzagScan[16] = { //4*4block residual zig-zag scan order
+    0,  1,  4,  8,
+    5,  2,  3,  6,
+    9, 12, 13, 10,
+    7, 11, 14, 15,
+};
+
+
+typedef struct TagI16PredInfo {
+    int8_t iPredMode;
+    int8_t iLeftAvail;
+    int8_t iTopAvail;
+    int8_t iLeftTopAvail;
+} SI16PredInfo;
+static const SI16PredInfo g_ksI16PredInfo[4] = {
+    {I16_PRED_V, 0, 1, 0},
+    {I16_PRED_H, 1, 0, 0},
+    {         0, 0, 0, 0},
+    {I16_PRED_P, 1, 1, 1},
+};
+
+static const SI16PredInfo g_ksChromaPredInfo[4] = {
+    {       0, 0, 0, 0},
+    {C_PRED_H, 1, 0, 0},
+    {C_PRED_V, 0, 1, 0},
+    {C_PRED_P, 1, 1, 1},
+};
+
+
+typedef struct TagI4PredInfo {
+    int8_t iPredMode;
+    int8_t iLeftAvail;
+    int8_t iTopAvail;
+    int8_t iLeftTopAvail;
+    //	int8_t right_top_avail; //when right_top unavailable but top avail, we can pad the right-top with the rightmost pixel of top
+} SI4PredInfo;
+static const SI4PredInfo g_ksI4PredInfo[9] = {
+    {  I4_PRED_V, 0, 1, 0},
+    {  I4_PRED_H, 1, 0, 0},
+    {          0, 0, 0, 0},
+    {I4_PRED_DDL, 0, 1, 0},
+    {I4_PRED_DDR, 1, 1, 1},
+    { I4_PRED_VR, 1, 1, 1},
+    { I4_PRED_HD, 1, 1, 1},
+    { I4_PRED_VL, 0, 1, 0},
+    { I4_PRED_HU, 1, 0, 0},
+};
+
+static const uint8_t g_kuiI16CbpTable[6] = {0, 16, 32, 15, 31, 47}; 
+
+
+typedef struct TagPartMbInfo {
+    MbType iType;
+    int8_t iPartCount; //P_16*16, P_16*8, P_8*16, P_8*8 based on 8*8 block; P_8*4, P_4*8, P_4*4 based on 4*4 block
+    int8_t iPartWidth; //based on 4*4 block
+} SPartMbInfo;
+static const SPartMbInfo g_ksInterMbTypeInfo[5] = {
+    {MB_TYPE_16x16,    1, 4},
+    {MB_TYPE_16x8,     2, 4},
+    {MB_TYPE_8x16,     2, 2},
+    {MB_TYPE_8x8,      4, 4},
+    {MB_TYPE_8x8_REF0, 4, 4}, //ref0--ref_idx not present in bit-stream and default as 0
+};
+static const SPartMbInfo g_ksInterSubMbTypeInfo[4] = {
+    {SUB_MB_TYPE_8x8, 1, 2},
+    {SUB_MB_TYPE_8x4, 2, 2},
+    {SUB_MB_TYPE_4x8, 2, 1},
+    {SUB_MB_TYPE_4x4, 4, 1},
+};
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/wels_const.h
+++ b/codec/decoder/core/inc/wels_const.h
@@ -96,4 +96,10 @@
 #define MAX_ACCESS_UNIT_CAPACITY 7077888 //Maximum AU size in bytes for level 5.2 for single frame
 #define MAX_MACROBLOCK_CAPACITY 5000 //Maximal legal MB capacity, 15000 bits is enough
 
+enum {
+    BASE_MB = 0,
+    NON_AVC_REWRITE_ENHANCE_MB =1,
+    AVC_REWRITE_ENHANCE_MB = 2
+};
+
 #endif//WELS_CONSTANCE_H__
--- a/codec/decoder/core/src/au_parser.cpp
+++ b/codec/decoder/core/src/au_parser.cpp
@@ -43,7 +43,6 @@
 #include "error_code.h"
 #include "memmgr_nal_unit.h"
 #include "decoder_core.h"
-#include "decoder_core.h"
 
 namespace WelsDec {
 /*!
@@ -227,14 +226,18 @@
     pCurNal->sNalHeaderExt.sNalUnitHeader.uiForbiddenZeroBit = pNalUnitHeader->uiForbiddenZeroBit;
     pCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc		  = pNalUnitHeader->uiNalRefIdc;
     pCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType	      = pNalUnitHeader->eNalUnitType;
+    if (pNalUnitHeader->uiNalRefIdc != 0) {
+      pBs = &pCtx->sBs;
+      iBitSize = (iNalSize << 3) - BsGetTrailingBits (pNal + iNalSize - 1); // convert into bit
 
-    pBs = &pCtx->sBs;
-
-    iBitSize = (iNalSize << 3) - BsGetTrailingBits (pNal + iNalSize - 1); // convert into bit
-
-    InitBits (pBs, pNal, iBitSize);
-
-    ParsePrefixNalUnit (pCtx, pBs);
+      iErr = InitBits (pBs, pNal, iBitSize);
+      if (iErr) {
+        WelsLog (pLogCtx, WELS_LOG_ERROR, "NAL_UNIT_PREFIX: InitBits() fail due invalid access.");
+        pCtx->iErrorCode	|= dsBitstreamError;
+        return NULL;
+      }
+      ParsePrefixNalUnit (pCtx, pBs);
+    }
     pCurNal->sNalData.sPrefixNal.bPrefixNalCorrectFlag = true;
 
     break;
@@ -309,7 +312,12 @@
 
     pBs = &pCurAu->pNalUnitsList[uiAvailNalNum - 1]->sNalData.sVclNal.sSliceBitsRead;
     iBitSize = (iNalSize << 3) - BsGetTrailingBits (pNal + iNalSize - 1); // convert into bit
-    InitBits (pBs, pNal, iBitSize);
+    iErr = InitBits (pBs, pNal, iBitSize);
+    if (iErr) {
+      WelsLog (pLogCtx, WELS_LOG_ERROR, "NAL_UNIT_CODED_SLICE: InitBits() fail due invalid access.");
+      pCtx->iErrorCode	|= dsBitstreamError;
+      return NULL;
+    }
     iErr = ParseSliceHeaderSyntaxs (pCtx, pBs, bExtensionFlag);
     if (iErr != ERR_NONE) {
       if ((uiAvailNalNum == 1) && (pCurNal->sNalHeaderExt.bIdrFlag)) { //IDR parse error
@@ -505,8 +513,16 @@
   switch (eNalType) {
   case NAL_UNIT_SPS:
   case NAL_UNIT_SUBSET_SPS:
-    if (iBitSize > 0)
-      InitBits (pBs, pRbsp, iBitSize);
+    if (iBitSize > 0) {
+      iErr = InitBits (pBs, pRbsp, iBitSize);
+      if (ERR_NONE != iErr) {
+        if (pCtx->eErrorConMethod == ERROR_CON_DISABLE)
+          pCtx->iErrorCode |= dsNoParamSets;
+        else
+          pCtx->iErrorCode |= dsBitstreamError;
+        return iErr;
+      }
+    }
     iErr = ParseSps (pCtx, pBs, &iPicWidth, &iPicHeight);
     if (ERR_NONE != iErr) {	// modified for pSps/pSubsetSps invalid, 12/1/2009
       if (pCtx->eErrorConMethod == ERROR_CON_DISABLE)
@@ -519,8 +535,16 @@
     break;
 
   case NAL_UNIT_PPS:
-    if (iBitSize > 0)
-      InitBits (pBs, pRbsp, iBitSize);
+    if (iBitSize > 0) {
+      iErr = InitBits (pBs, pRbsp, iBitSize);
+      if (ERR_NONE != iErr) {
+        if (pCtx->eErrorConMethod == ERROR_CON_DISABLE)
+          pCtx->iErrorCode |= dsNoParamSets;
+        else
+          pCtx->iErrorCode |= dsBitstreamError;
+        return iErr;
+      }
+    }
     iErr = ParsePps (pCtx, &pCtx->sPpsBuffer[0], pBs);
     if (ERR_NONE != iErr) {	// modified for pps invalid, 12/1/2009
       if (pCtx->eErrorConMethod == ERROR_CON_DISABLE)
--- a/codec/decoder/core/src/bit_stream.cpp
+++ b/codec/decoder/core/src/bit_stream.cpp
@@ -38,6 +38,7 @@
  *************************************************************************************
  */
 #include "bit_stream.h"
+#include "error_code.h"
 
 namespace WelsDec {
 
@@ -47,10 +48,14 @@
   return uiValue;
 }
 
-void InitReadBits (PBitStringAux pBitString) {
+int32_t InitReadBits (PBitStringAux pBitString, intX_t iEndOffset) {
+  if(pBitString->pCurBuf>=(pBitString->pEndBuf - iEndOffset)) {
+    return ERR_INFO_INVALID_ACCESS;
+  }
   pBitString->uiCurBits  = GetValue4Bytes (pBitString->pCurBuf);
   pBitString->pCurBuf  += 4;
   pBitString->iLeftBits = -16;
+  return ERR_NONE;
 }
 
 /*!
@@ -60,7 +65,7 @@
  * \param	kpBuf		bit-stream buffer
  * \param	kiSize	    size in bits for decoder; size in bytes for encoder
  *
- * \return	size of buffer data in byte; failed in -1 return
+ * \return	0: success, other: fail
  */
 int32_t InitBits (PBitStringAux pBitString, const uint8_t* kpBuf, const int32_t kiSize) {
   const int32_t kiSizeBuf = (kiSize + 7) >> 3;
@@ -67,16 +72,17 @@
   uint8_t* pTmp = (uint8_t*)kpBuf;
 
   if (NULL == pTmp)
-    return -1;
+    return ERR_INFO_INVALID_ACCESS;
 
   pBitString->pStartBuf   = pTmp;				// buffer to start position
   pBitString->pEndBuf	    = pTmp + kiSizeBuf;	// buffer + length
   pBitString->iBits	    = kiSize;				// count bits of overall bitstreaming inputindex;
-
   pBitString->pCurBuf   = pBitString->pStartBuf;
-  InitReadBits (pBitString);
-
-  return kiSizeBuf;
+  int32_t iErr = InitReadBits (pBitString, 0);
+  if(iErr) {
+    return iErr;
+  }
+  return ERR_NONE;
 }
 
 } // namespace WelsDec
--- /dev/null
+++ b/codec/decoder/core/src/cabac_decoder.cpp
@@ -1,0 +1,330 @@
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	cabac_decoder.cpp:	deals with cabac state transition and related functions
+ */
+#include "cabac_decoder.h"
+namespace WelsDec {
+static const int16_t g_kMvdBinPos2Ctx [8] = {0, 1, 2, 3, 3, 3, 3, 3};
+
+void WelsCabacGlobalInit (PWelsDecoderContext pCtx) {
+  for (int32_t iModel = 0; iModel < 4; iModel++) {
+    for (int32_t iQp = 0; iQp <= WELS_QP_MAX; iQp++)
+      for (int32_t iIdx = 0; iIdx < WELS_CONTEXT_COUNT; iIdx++) {
+        int32_t m               = g_kiCabacGlobalContextIdx[iIdx][iModel][0];
+        int32_t n               = g_kiCabacGlobalContextIdx[iIdx][iModel][1];
+        int32_t iPreCtxState    = WELS_CLIP3 ((((m * iQp) >> 4) + n), 1, 126);
+        uint8_t uiValMps         = 0;
+        uint8_t uiStateIdx       = 0;
+        if (iPreCtxState <= 63) {
+          uiStateIdx = 63 - iPreCtxState;
+          uiValMps = 0;
+        } else {
+          uiStateIdx = iPreCtxState - 64;
+          uiValMps = 1;
+        }
+        pCtx->sWelsCabacContexts[iModel][iQp][iIdx].uiState = uiStateIdx;
+        pCtx->sWelsCabacContexts[iModel][iQp][iIdx].uiMPS = uiValMps;
+      }
+  }
+  pCtx->bCabacInited = true;
+}
+
+// ------------------- 1. context initialization
+void WelsCabacContextInit (PWelsDecoderContext  pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp) {
+  int32_t iIdx =  pCtx->eSliceType == WelsCommon::I_SLICE ? 0 : iCabacInitIdc + 1;
+  if (!pCtx->bCabacInited) {
+    WelsCabacGlobalInit (pCtx);
+  }
+  memcpy (pCtx->pCabacCtx, pCtx->sWelsCabacContexts[iIdx][iQp],
+          WELS_CONTEXT_COUNT * sizeof (SWelsCabacCtx));
+}
+
+// ------------------- 2. decoding Engine initialization
+int32_t InitCabacDecEngineFromBS (PWelsCabacDecEngine pDecEngine, PBitStringAux pBsAux) {
+  int32_t iRemainingBits = - pBsAux->iLeftBits; //pBsAux->iLeftBits < 0
+  int32_t iRemainingBytes = (iRemainingBits >> 3) + 2; //+2: indicating the pre-read 2 bytes
+  uint8_t* pCurr;
+
+  pCurr = pBsAux->pCurBuf - iRemainingBytes;
+  if(pCurr >= (pBsAux->pEndBuf - 1)) {
+    return ERR_INFO_INVALID_ACCESS;
+  }
+  pDecEngine->uiOffset = ((pCurr[0] << 16) | (pCurr[1] << 8) | pCurr[2]);
+  pDecEngine->uiOffset <<= 16;
+  pDecEngine->uiOffset |= (pCurr[3] << 8) | pCurr[4];
+  pDecEngine->iBitsLeft = 31;
+  pDecEngine->pBuffCurr = pCurr + 5;
+
+  pDecEngine->uiRange = WELS_CABAC_HALF;
+  pDecEngine->pBuffStart = pBsAux->pStartBuf;
+  pDecEngine->pBuffEnd = pBsAux->pEndBuf;
+  pBsAux->iLeftBits = 0;
+  return ERR_NONE;
+}
+
+void RestoreCabacDecEngineToBS (PWelsCabacDecEngine pDecEngine, PBitStringAux pBsAux) {
+  //CABAC decoding finished, changing to SBitStringAux
+  pDecEngine->pBuffCurr -= (pDecEngine->iBitsLeft >> 3);
+  pDecEngine->iBitsLeft = 0;     //pcm_alignment_zero_bit in CABAC
+  pBsAux->iLeftBits = 0;
+  pBsAux->pStartBuf = pDecEngine->pBuffStart;
+  pBsAux->pCurBuf = pDecEngine->pBuffCurr;
+  pBsAux->uiCurBits = 0;
+  pBsAux->iIndex = 0;
+}
+
+// ------------------- 3. actual decoding
+int32_t Read32BitsCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiValue, int32_t& iNumBitsRead) {
+  intX_t iLeftBytes = pDecEngine->pBuffEnd - pDecEngine->pBuffCurr;
+  iNumBitsRead = 0;
+  uiValue = 0;
+  if (iLeftBytes <= 0) {
+    return ERR_CABAC_NO_BS_TO_READ;
+  }
+  switch (iLeftBytes) {
+  case 3:
+    uiValue = ((pDecEngine->pBuffCurr[0]) << 16 | (pDecEngine->pBuffCurr[1]) << 8 | (pDecEngine->pBuffCurr[2]));
+    pDecEngine->pBuffCurr += 3;
+    iNumBitsRead = 24;
+    break;
+  case 2:
+    uiValue = ((pDecEngine->pBuffCurr[0]) << 8 | (pDecEngine->pBuffCurr[1]));
+    pDecEngine->pBuffCurr += 2;
+    iNumBitsRead = 16;
+    break;
+  case 1:
+    uiValue = pDecEngine->pBuffCurr[0];
+    pDecEngine->pBuffCurr += 1;
+    iNumBitsRead = 8;
+    break;
+  default:
+    uiValue = ((pDecEngine->pBuffCurr[0] << 24) | (pDecEngine->pBuffCurr[1]) << 16 | (pDecEngine->pBuffCurr[2]) << 8 |
+               (pDecEngine->pBuffCurr[3]));
+    pDecEngine->pBuffCurr += 4;
+    iNumBitsRead = 32;
+    break;
+  }
+  return ERR_NONE;
+}
+
+int32_t DecodeBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiBinVal) {
+  int32_t iErrorInfo = ERR_NONE;
+  uint32_t uiState = pBinCtx->uiState;
+  uiBinVal = pBinCtx->uiMPS;
+  uint64_t uiOffset = pDecEngine->uiOffset;
+  uint64_t uiRange = pDecEngine->uiRange;
+
+  int32_t iRenorm = 1;
+  uint32_t uiRangeLPS = g_kuiCabacRangeLps[uiState][ (uiRange >> 6) & 0x03];
+  uiRange -= uiRangeLPS;
+  if (uiOffset >= (uiRange << pDecEngine->iBitsLeft)) { //LPS
+    uiOffset -= (uiRange << pDecEngine->iBitsLeft);
+    uiBinVal ^= 0x0001;
+    if (!uiState)
+      pBinCtx->uiMPS ^= 0x01;
+    pBinCtx->uiState = g_kuiStateTransTable[uiState][0];
+    iRenorm = g_kRenormTable256[uiRangeLPS];
+    uiRange = (uiRangeLPS << iRenorm);
+  } else {  //MPS
+    pBinCtx->uiState = g_kuiStateTransTable[uiState][1];
+    if (uiRange >= WELS_CABAC_QUARTER) {
+      pDecEngine->uiRange = uiRange;
+      return ERR_NONE;
+    } else {
+      uiRange <<= 1;
+    }
+  }
+  //Renorm
+  pDecEngine->uiRange = uiRange;
+  pDecEngine->iBitsLeft -= iRenorm;
+  if (pDecEngine->iBitsLeft > 0) {
+    pDecEngine->uiOffset = uiOffset;
+    return ERR_NONE;
+  }
+  uint32_t uiVal = 0;
+  int32_t iNumBitsRead = 0;
+  iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead);
+  pDecEngine->uiOffset = (uiOffset << iNumBitsRead) | uiVal;
+  pDecEngine->iBitsLeft += iNumBitsRead;
+  if (iErrorInfo && pDecEngine->iBitsLeft < 0) {
+    return iErrorInfo;
+  }
+  return ERR_NONE;
+}
+
+int32_t DecodeBypassCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal) {
+  int32_t iErrorInfo = ERR_NONE;
+  int32_t iBitsLeft = pDecEngine->iBitsLeft;
+  uint64_t uiOffset = pDecEngine->uiOffset;
+  uint64_t uiRangeValue;
+
+
+  if (iBitsLeft <= 0) {
+    uint32_t uiVal = 0;
+    int32_t iNumBitsRead = 0;
+    iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead);
+    uiOffset = (uiOffset << iNumBitsRead) | uiVal;
+    iBitsLeft = iNumBitsRead;
+    if (iErrorInfo && iBitsLeft == 0) {
+      return iErrorInfo;
+    }
+  }
+  iBitsLeft--;
+  uiRangeValue = (pDecEngine->uiRange << iBitsLeft);
+  if (uiOffset >= uiRangeValue) {
+    pDecEngine->iBitsLeft = iBitsLeft;
+    pDecEngine->uiOffset = uiOffset - uiRangeValue;
+    uiBinVal = 1;
+    return ERR_NONE;
+  }
+  pDecEngine->iBitsLeft = iBitsLeft;
+  pDecEngine->uiOffset = uiOffset;
+  uiBinVal = 0;
+  return ERR_NONE;
+}
+
+int32_t DecodeTerminateCabac (PWelsCabacDecEngine pDecEngine, uint32_t& uiBinVal) {
+  int32_t iErrorInfo = ERR_NONE;
+  uint64_t uiRange = pDecEngine->uiRange - 2;
+  int64_t uiOffset = pDecEngine->uiOffset;
+
+  if (uiOffset >= (uiRange << pDecEngine->iBitsLeft)) {
+    uiBinVal = 1;
+  } else {
+    uiBinVal = 0;
+    // Renorm
+    if (uiRange < WELS_CABAC_QUARTER) {
+      int32_t iRenorm = g_kRenormTable256[uiRange];
+      pDecEngine->uiRange = (uiRange << iRenorm);
+      pDecEngine->iBitsLeft -= iRenorm;
+      if (pDecEngine->iBitsLeft < 0) {
+        uint32_t uiVal = 0;
+        int32_t iNumBitsRead = 0;
+        iErrorInfo = Read32BitsCabac (pDecEngine, uiVal, iNumBitsRead);
+        pDecEngine->uiOffset = (pDecEngine->uiOffset << iNumBitsRead) | uiVal;
+        pDecEngine->iBitsLeft += iNumBitsRead;
+      }
+      if (iErrorInfo && pDecEngine->iBitsLeft < 0) {
+        return iErrorInfo;
+      }
+      return ERR_NONE;
+    } else {
+      pDecEngine->uiRange = uiRange;
+      return ERR_NONE;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t DecodeUnaryBinCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, int32_t iCtxOffset,
+                             uint32_t& uiSymVal) {
+  uiSymVal = 0;
+  WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiSymVal));
+  if (uiSymVal == 0) {
+    return ERR_NONE;
+  } else {
+    uint32_t uiCode;
+    pBinCtx += iCtxOffset;
+    uiSymVal = 0;
+    do {
+      WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiCode));
+      ++uiSymVal;
+    } while (uiCode != 0);
+    return ERR_NONE;
+  }
+}
+
+int32_t DecodeExpBypassCabac (PWelsCabacDecEngine pDecEngine, int32_t iCount, uint32_t& uiSymVal) {
+  uint32_t uiCode;
+  int32_t iSymTmp = 0;
+  int32_t iSymTmp2 = 0;
+  uiSymVal = 0;
+  do {
+    WELS_READ_VERIFY (DecodeBypassCabac (pDecEngine, uiCode));
+    if (uiCode == 1) {
+      iSymTmp += (1 << iCount);
+      ++iCount;
+    }
+  } while (uiCode != 0);
+
+  while (iCount--) {
+    WELS_READ_VERIFY (DecodeBypassCabac (pDecEngine, uiCode));
+    if (uiCode == 1) {
+      iSymTmp2 |= (1 << iCount);
+    }
+  }
+  uiSymVal = (uint32_t) (iSymTmp + iSymTmp2);
+  return ERR_NONE;
+}
+
+uint32_t DecodeUEGLevelCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t& uiCode) {
+  uiCode = 0;
+  WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiCode));
+  if (uiCode == 0)
+    return ERR_NONE;
+  else {
+    uint32_t uiTmp, uiCount = 1;
+    uiCode = 0;
+    do {
+      WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx, uiTmp));
+      ++uiCode;
+      ++uiCount;
+    } while (uiTmp != 0 && uiCount != 13);
+
+    if (uiTmp != 0) {
+      WELS_READ_VERIFY (DecodeExpBypassCabac (pDecEngine, 0, uiTmp));
+      uiCode += uiTmp + 1;
+    }
+    return ERR_NONE;
+  }
+  return ERR_NONE;
+}
+
+int32_t DecodeUEGMvCabac (PWelsCabacDecEngine pDecEngine, PWelsCabacCtx pBinCtx, uint32_t iMaxBin,  uint32_t& uiCode) {
+  WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx + g_kMvdBinPos2Ctx[0], uiCode));
+  if (uiCode == 0)
+    return ERR_NONE;
+  else {
+    uint32_t uiTmp, uiCount = 1;
+    uiCode = 0;
+    do {
+      WELS_READ_VERIFY (DecodeBinCabac (pDecEngine, pBinCtx + g_kMvdBinPos2Ctx[uiCount++], uiTmp));
+      uiCode++;
+    } while (uiTmp != 0 && uiCount != 8);
+
+    if (uiTmp != 0) {
+      WELS_READ_VERIFY (DecodeExpBypassCabac (pDecEngine, 3, uiTmp));
+      uiCode += (uiTmp + 1);
+    }
+    return ERR_NONE;
+  }
+}
+}
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -44,6 +44,7 @@
 #include "decode_slice.h"
 
 #include "parse_mb_syn_cavlc.h"
+#include "parse_mb_syn_cabac.h"
 #include "rec_mb.h"
 #include "mv_pred.h"
 
@@ -318,6 +319,576 @@
   pBlk[iStride1] = (iE - iB) >> 1;
 }
 
+void WelsMap4x4NeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {  //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail) { //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail) { //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail) { //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+
+void WelsMap4x4NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail) {
+    *pSampleAvail |= 1;
+  }
+}
+
+void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
+    *pSampleAvail |= 1;
+  }
+}
+
+int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+                           PBitStringAux pBs,
+                           PDqLayer pCurDqLayer) {
+  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int32_t iFinalMode, i;
+
+  uint8_t uiNeighAvail = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMap4x4NeighToSampleFunc (pNeighAvail, iSampleAvail);
+  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+  for (i = 0; i < 16; i++) {
+    int32_t iPrevIntra4x4PredMode = 0;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+      iPrevIntra4x4PredMode = iCode;
+    } else {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      iPrevIntra4x4PredMode = uiCode;
+    }
+    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
+
+    int8_t iBestMode;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      if (iPrevIntra4x4PredMode == -1)
+        iBestMode = kiPredMode;
+      else
+        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+    } else {
+      if (iPrevIntra4x4PredMode) {
+        iBestMode = kiPredMode;
+      } else {
+        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+        iBestMode = uiCode + (uiCode >= kiPredMode);
+      }
+    }
+
+    iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
+    if (iFinalMode  == ERR_INVALID_INTRA4X4_MODE) {
+      return ERR_INFO_INVALID_I4x4_PRED_MODE;
+    }
+
+    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
+
+    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
+
+    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
+  }
+  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
+                             PDqLayer pCurDqLayer) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
+
+  if (CheckIntra16x16PredMode (uiNeighAvail,
+                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
+    return ERR_INFO_INVALID_I16x16_PRED_MODE;
+  }
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer		 = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux		 = pCurLayer->pBitStringAux;
+  PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader		 = &pSlice->sSliceHeaderExt.sSliceHeader;
+  SWelsNeighAvail sNeighAvail;
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t iMbMode, i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
+  if (uiMbType > 25) {
+    return ERR_INFO_INVALID_MB_TYPE;
+  } else if (25 == uiMbType) {   //I_PCM
+    WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+    if (uiEosFlag) {
+      RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+    }
+    return ERR_NONE;
+  } else if (0 == uiMbType) { //I4x4
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+    pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+    //get uiCbp for I4x4
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpChroma = uiCbp >> 4;
+    uiCbpLuma = uiCbp & 15;
+  } else { //I16x16;
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+    uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
+  }
+  iMbMode = BASE_MB;
+
+  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  pCurLayer->pCbfDc[iMbXy] = 0;
+
+  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
+                                  pSliceHeader->pPps->iChromaQpIndexOffset), 0, 51)];
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    int32_t iQpDelta, iId8x8, iId4x4;
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
+      return ERR_INFO_INVALID_QP;
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
+                                  pSliceHeader->pPps->iChromaQpIndexOffset), 0, 51)];
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { //pNonZeroCount = 0
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+        if (uiCbpLuma & (1 << iId8x8)) {
+          int32_t iIdx = (iId8x8 << 2);
+          for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+            //Luma (DC and AC decoding together)
+            WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                              g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pLumaQp[iMbXy],
+                              pCtx));
+            iIdx++;
+          }
+        } else {
+          ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+          ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+        }
+      }
+      ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+      ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+      ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+      ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      //Cb Cr
+      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (0 << 2), 4, g_kuiChromaDcScan,
+                        CHROMA_DC_V, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (0 << 6), pCurLayer->pChromaQp[iMbXy], pCtx));
+      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (1 << 2), 4, g_kuiChromaDcScan,
+                        CHROMA_DC_U, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (1 << 6), pCurLayer->pChromaQp[iMbXy], pCtx));
+    }
+
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        int32_t iResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t iIdx = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iResProperty,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy], pCtx));
+          iIdx++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
+    }
+  } else {
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer		 = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux		 = pCurLayer->pBitStringAux;
+  PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader		 = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+  int32_t iMbMode, i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+  WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
+  // uiMbType = 4 is not allowded.
+  if (uiMbType < 4) { //Inter mode
+    int16_t pMotionVector[LIST_A][30][MV_A];
+    int16_t pMvdCache[LIST_A][30][MV_A];
+    int8_t	pRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+    WELS_READ_VERIFY (ParseInterMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
+    iMbMode = BASE_MB;
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  } else { //Intra mode
+    uiMbType -= 5;
+    if (uiMbType > 25) {
+      return ERR_INFO_INVALID_MB_TYPE;
+    }
+
+    if (25 == uiMbType) {   //I_PCM
+      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+      if (uiEosFlag) {
+        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+      }
+      return ERR_NONE;
+    } else { //normal Intra mode
+      if (0 == uiMbType) { //Intra4x4
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        pCtx->pFillInfoCacheIntra4x4Func (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+      } else { //Intra16x16
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+      }
+      iMbMode = BASE_MB;
+    }
+  }
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpChroma = pCurLayer->pCbp[iMbXy] >> 4;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+  }
+
+  pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
+  pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
+  pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                  pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                            pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+        if (uiCbpLuma & (1 << iId8x8)) {
+          int32_t iIdx = (iId8x8 << 2);
+          for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+            //Luma (DC and AC decoding together)
+            WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                              g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pLumaQp[iMbXy],
+                              pCtx));
+            iIdx++;
+          }
+        } else {
+          ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+          ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+        }
+      }
+      ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+      ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+      ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+      ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        int32_t iResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy], pCtx));
+      }
+    }
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        int32_t iResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t index = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+                            iResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy], pCtx));
+          index++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+    }
+  } else {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                  pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer		 = pCtx->pCurDqLayer;
+  PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader		 = &pSlice->sSliceHeaderExt.sSliceHeader;
+  uint32_t uiCode;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  SWelsNeighAvail uiNeighAvail;
+  pCurLayer->pCbp[iMbXy] = 0;
+  pCurLayer->pCbfDc[iMbXy] = 0;
+  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+  if (uiCode) {
+    int16_t pMv[2] = {0};
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+
+    //predict mv
+    PredPSkipMvFromNeighbor (pCurLayer, pMv);
+    for (i = 0; i < 16; i++) {
+      ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
+      ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
+    }
+
+    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+      memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    }
+
+    //reset rS
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+    pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                  pSliceHeader->pPps->iChromaQpIndexOffset, 0, 51)];
+
+    //for neighboring CABAC usage
+    pSlice->iLastDeltaQp = 0;
+
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+    return ERR_NONE;
+  }
+
+  WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+  return ERR_NONE;
+}
+
 int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
   PDqLayer pCurLayer = pCtx->pCurDqLayer;
   PFmo pFmo = pCtx->pFmo;
@@ -329,34 +900,50 @@
   PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
   int32_t iMbX, iMbY;
   const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
-  PBitStringAux pBs = pCurLayer->pBitStringAux;
-  intX_t iUsedBits  = 0;
+  uint32_t uiEosFlag = 0;
+  PWelsDecMbFunc pDecMbFunc;
 
-  PWelsDecMbCavlcFunc pDecMbCavlcFunc;
-
   pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
 
-  if (P_SLICE == pSliceHeader->eSliceType) {
-    pDecMbCavlcFunc = WelsDecodeMbCavlcPSlice;
-  } else { //I_SLICE
-    pDecMbCavlcFunc = WelsDecodeMbCavlcISlice;
+  if (pCtx->pPps->bEntropyCodingModeFlag) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+               "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
+      pCtx->iErrorCode |= dsBitstreamError;
+      return dsBitstreamError;
+    }
+    if (P_SLICE == pSliceHeader->eSliceType)
+      pDecMbFunc = WelsDecodeMbCabacPSlice;
+    else //I_SLICE. B_SLICE not supported now
+      pDecMbFunc = WelsDecodeMbCabacISlice;
+  } else {
+    if (P_SLICE == pSliceHeader->eSliceType) {
+      pDecMbFunc = WelsDecodeMbCavlcPSlice;
+    } else { //I_SLICE
+      pDecMbFunc = WelsDecodeMbCavlcISlice;
+    }
   }
 
   if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
     pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain1Intra4x4;
-    pCtx->pParseIntra4x4ModeFunc      = ParseIntra4x4ModeConstrain1;
-    pCtx->pParseIntra16x16ModeFunc    = ParseIntra16x16ModeConstrain1;
+    pCtx->pMap4x4NeighToSampleFunc    = WelsMap4x4NeighToSampleConstrain1;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
   } else {
     pCtx->pFillInfoCacheIntra4x4Func = WelsFillCacheConstrain0Intra4x4;
-    pCtx->pParseIntra4x4ModeFunc      = ParseIntra4x4ModeConstrain0;
-    pCtx->pParseIntra16x16ModeFunc    = ParseIntra16x16ModeConstrain0;
+    pCtx->pMap4x4NeighToSampleFunc    = WelsMap4x4NeighToSampleNormal;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
   }
 
   pCtx->eSliceType = pSliceHeader->eSliceType;
-
   if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
-    //CABAC encoding is unsupported yet!
-    return -1;
+    int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
+    int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
+    WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    pSlice->iLastDeltaQp = 0;
+    WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
   }
 
   iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
@@ -398,7 +985,7 @@
     }
 
     pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
-    iRet = pDecMbCavlcFunc (pCtx,  pNalCur);
+    iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
 
     if (iRet != ERR_NONE) {
       return iRet;
@@ -405,24 +992,14 @@
     }
 
     ++pSlice->iTotalMbInCurSlice;
-
+    if (uiEosFlag) { //end of slice
+      break;
+    }
     if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
       iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
     } else {
       ++iNextMbXyIndex;
     }
-
-    // check whether there is left bits to read next time in case multiple slices
-    iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
-    if (iUsedBits == pBs->iBits && 0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun) {	// slice boundary
-      break;
-    }
-    if (iUsedBits > pBs->iBits) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-               "WelsDecodeSlice()::::pBs incomplete, iUsedBits:%" PRId64" > pBs->iBits:%d, MUST stop decoding.",
-               (int64_t) iUsedBits, pBs->iBits);
-      return -1;
-    }
     iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
     iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
     pCurLayer->iMbX =  iMbX;
@@ -440,7 +1017,7 @@
   PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
   PSliceHeader pSliceHeader		     = &pSlice->sSliceHeaderExt.sSliceHeader;
 
-  SNeighAvail sNeighAvail;
+  SWelsNeighAvail sNeighAvail;
 
   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
@@ -455,11 +1032,11 @@
   int32_t iCode;
 
   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
   pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
   pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
 
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_type
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
   uiMbType = uiCode;
   if (uiMbType > 25) {
     return ERR_INFO_INVALID_MB_TYPE;
@@ -508,20 +1085,18 @@
     }
 
     pBs->pCurBuf += 384;
-    InitReadBits (pBs);
 
     //step 3: update QP and pNonZeroCount
     pCurLayer->pLumaQp[iMbXy] = 0;
     pCurLayer->pChromaQp[iMbXy] = 0;
     memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
+    WELS_READ_VERIFY(InitReadBits (pBs, 0));
     return 0;
   } else if (0 == uiMbType) { //reference to JM
     ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
     pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
     pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-    if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
-      return -1;
-    }
+    WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
 
     //uiCbp
     WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
@@ -542,9 +1117,7 @@
     uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
     uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
     WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-    if (pCtx->pParseIntra16x16ModeFunc (&sNeighAvail, pBs, pCurLayer)) {
-      return -1;
-    }
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
   }
 
   memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
@@ -572,19 +1145,7 @@
       return ERR_INFO_INVALID_QP;
     }
 
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp + iQpDelta; //update iLastMbQp
-    //refer to JVT-X201wcm1.doc equation(7-35)
-    if ((unsigned) (pCurLayer->pLumaQp[iMbXy]) > 51) {
-      if (pCurLayer->pLumaQp[iMbXy] < 0) {
-        pCurLayer->pLumaQp[iMbXy] += 52;
-      } else {
-        pCurLayer->pLumaQp[iMbXy] -= 52;
-      }
-    }
-    //QP should be in the range of [0, 51]
-    if (pCurLayer->pLumaQp[iMbXy] < 0 || pCurLayer->pLumaQp[iMbXy] > 51) {
-      return ERR_INFO_INVALID_QP;
-    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
     pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
     pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
                                   pSliceHeader->pPps->iChromaQpIndexOffset, 0,
@@ -673,7 +1234,7 @@
   return 0;
 }
 
-int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) {
+int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
   PDqLayer pCurLayer = pCtx->pCurDqLayer;
   PBitStringAux pBs = pCurLayer->pBitStringAux;
   PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
@@ -680,7 +1241,7 @@
   int32_t iBaseModeFlag;
   int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
   uint32_t uiCode;
-
+  intX_t iUsedBits;
   if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
     WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
     iBaseModeFlag = uiCode;
@@ -698,6 +1259,19 @@
     return iRet;
   }
 
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) {	// slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%"PRId64" > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return -1;
+  }
   return 0;
 }
 
@@ -708,11 +1282,10 @@
   PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
   PSliceHeader pSliceHeader		     = &pSlice->sSliceHeaderExt.sSliceHeader;
 
-  SNeighAvail sNeighAvail;
-
   int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
   int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
 
+  SWelsNeighAvail sNeighAvail;
   int32_t iMbX = pCurLayer->iMbX;
   int32_t iMbY = pCurLayer->iMbY;
   const int32_t iMbXy = pCurLayer->iMbXyIndex;
@@ -721,15 +1294,13 @@
   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
   uint32_t uiCode;
   int32_t iCode;
-
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
   ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
   pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_type
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
   uiMbType = uiCode;
   if (uiMbType < 5) { //inter MB type
     int16_t iMotionVector[LIST_A][30][MV_A];
-
     int8_t	iRefIndex[LIST_A][30];
     pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
     WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
@@ -800,7 +1371,6 @@
       }
 
       pBs->pCurBuf += 384;
-      InitReadBits (pBs);
 
       //step 3: update QP and pNonZeroCount
       pCurLayer->pLumaQp[iMbXy] = 0;
@@ -812,6 +1382,7 @@
       ST32A4 (&pNzc[12], 0x10101010);
       ST32A4 (&pNzc[16], 0x10101010);
       ST32A4 (&pNzc[20], 0x10101010);
+      WELS_READ_VERIFY (InitReadBits (pBs, 0));
       return 0;
     } else {
       if (0 == uiMbType) {
@@ -818,7 +1389,7 @@
         ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
         pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
         pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-        if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
+        if (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
           return -1;
         }
       } else { //I_PCM exclude, we can ignore it
@@ -828,7 +1399,7 @@
         uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
         uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
         WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-        if (pCtx->pParseIntra16x16ModeFunc (&sNeighAvail, pBs, pCurLayer)) {
+        if (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) {
           return -1;
         }
       }
@@ -877,19 +1448,7 @@
       return ERR_INFO_INVALID_QP;
     }
 
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp + iQpDelta; //update iLastMbQp
-    //refer to JVT-X201wcm1.doc equation(7-35)
-    if ((unsigned) (pCurLayer->pLumaQp[iMbXy]) > 51) {
-      if (pCurLayer->pLumaQp[iMbXy] < 0) {
-        pCurLayer->pLumaQp[iMbXy] += 52;
-      } else {
-        pCurLayer->pLumaQp[iMbXy] -= 52;
-      }
-    }
-    //QP should be in the range of [0, 51]
-    if (pCurLayer->pLumaQp[iMbXy] < 0 || pCurLayer->pLumaQp[iMbXy] > 51) {
-      return ERR_INFO_INVALID_QP;
-    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
     pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
     pCurLayer->pChromaQp[iMbXy] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
                                   pSliceHeader->pPps->iChromaQpIndexOffset, 0,
@@ -978,12 +1537,12 @@
   return 0;
 }
 
-int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur) {
+int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
   PDqLayer pCurLayer		 = pCtx->pCurDqLayer;
   PBitStringAux pBs		 = pCurLayer->pBitStringAux;
   PSlice pSlice			 = &pCurLayer->sLayerInfo.sSliceInLayer;
   PSliceHeader pSliceHeader		    = &pSlice->sSliceHeaderExt.sSliceHeader;
-
+  intX_t iUsedBits;
   const int32_t iMbXy = pCurLayer->iMbXyIndex;
   int8_t* pNzc = pCurLayer->pNzc[iMbXy];
   int32_t iBaseModeFlag, i;
@@ -996,7 +1555,6 @@
     if (-1 == pSlice->iMbSkipRun) {
       return -1;
     }
-
   }
   if (pSlice->iMbSkipRun--) {
     int16_t iMv[2];
@@ -1031,27 +1589,37 @@
     }
 
     pCurLayer->pCbp[iMbXy] = 0;
-
-    return 0;
-  }
-
-  if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
-    iBaseModeFlag = uiCode;
   } else {
-    iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+      iBaseModeFlag = uiCode;
+    } else {
+      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+    }
+    if (!iBaseModeFlag) {
+      iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+               iBaseModeFlag);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+    }
+    if (iRet) { //occur error when parsing, MUST STOP decoding
+      return iRet;
+    }
   }
-  if (!iBaseModeFlag) {
-    iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
-  } else {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
-             iBaseModeFlag);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) {	// slice boundary
+    uiEosFlag = 1;
   }
-  if (iRet) { //occur error when parsing, MUST STOP decoding
-    return iRet;
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%"PRId64" > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return -1;
   }
-
   return 0;
 }
 
@@ -1069,6 +1637,21 @@
     pFunc->pWelsSetNonZeroCountFunc		= SetNonZeroCount_AArch64_neon;
   }
 #endif
+
+  pFunc->pWelsBlockZero16x16Func	    = WelsBlockZero16x16_c;
+  pFunc->pWelsBlockZero8x8Func	    = WelsBlockZero8x8_c;
+  //TO DO add neon and X86
+#ifdef	HAVE_NEON
+  if (iCpu & WELS_CPU_NEON) {
+
+  }
+#endif
+
+#ifdef	HAVE_NEON_AARCH64
+  if (iCpu & WELS_CPU_NEON) {
+
+  }
+#endif
 }
 
 void SetNonZeroCount_c (int8_t* pNonZeroCount) {
@@ -1077,6 +1660,23 @@
   for (i = 0; i < 24; i++) {
     pNonZeroCount[i] = !!pNonZeroCount[i];
   }
+}
+
+void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
+  int32_t i;
+  int16_t* pDst = pBlock;
+
+  for (i = 0; i < iH; i++) {
+    memset (pDst, uiVal, iW * sizeof (int16_t));
+    pDst += iStride;
+  }
+}
+void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 16, 16, iStride, 0);
+}
+
+void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 8, 8, iStride, 0);
 }
 
 } // namespace WelsDec
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -139,8 +139,8 @@
   pCtx->uiCpuFlag					= 0;
 
   pCtx->bAuReadyFlag				= 0; // au data is not ready
+  pCtx->bCabacInited = false;
 
-
   pCtx->uiCpuFlag = WelsCPUFeatureDetect (&iCpuCores);
 
   pCtx->iImgWidthInPixel		= 0;
@@ -241,6 +241,10 @@
 
   pCtx->bHaveGotMemory	= true;			// global memory for decoder context related is requested
   pCtx->pDec		        = NULL;			// need prefetch a new pic due to spatial size changed
+
+  if (pCtx->pCabacDecEngine == NULL)
+    pCtx->pCabacDecEngine = (SWelsCabacDecEngine*) WelsMalloc (sizeof (SWelsCabacDecEngine), "pCtx->pCabacDecEngine");
+
   return ERR_NONE;
 }
 
@@ -267,7 +271,7 @@
   pCtx->iImgWidthInPixel	= 0;
   pCtx->iImgHeightInPixel = 0;
   pCtx->bHaveGotMemory	= false;
-
+  WelsFree (pCtx->pCabacDecEngine, "pCtx->pCabacDecEngine");
 }
 
 /*!
@@ -464,6 +468,7 @@
         } else {
 
           iConsumedBytes = 0;
+          pDstNal[iDstIdx] = pDstNal[iDstIdx + 1] = pDstNal[iDstIdx + 2] = pDstNal[iDstIdx + 3] = 0; // set 4 reserved bytes to zero
           pNalPayload	= ParseNalHeader (pCtx, &pCtx->sCurNalHead, pDstNal, iDstIdx, pSrcNal - 3, iSrcIdx + 3, &iConsumedBytes);
           if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) {
             CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
@@ -502,13 +507,12 @@
             return pCtx->iErrorCode;
           }
 
-          pDstNal += iDstIdx; //update current position
+          pDstNal += (iDstIdx + 4); //init, increase 4 reserved zero bytes, used to store the next NAL
           if ((iSrcLength - iSrcConsumed + 4) > (pRawData->pEnd - pDstNal)) {
-            pRawData->pCurPos = pRawData->pHead;
+            pDstNal = pRawData->pCurPos = pRawData->pHead;
           } else {
             pRawData->pCurPos = pDstNal;
           }
-          pDstNal = pRawData->pCurPos + 4; //init, 4 bytes used to store the next NAL
 
           pSrcNal += iSrcIdx + 3;
           iSrcConsumed += 3;
@@ -524,6 +528,7 @@
     //last NAL decoding
 
     iConsumedBytes = 0;
+    pDstNal[iDstIdx] = pDstNal[iDstIdx + 1] = pDstNal[iDstIdx + 2] = pDstNal[iDstIdx + 3] = 0; // set 4 reserved bytes to zero
     pNalPayload = ParseNalHeader (pCtx, &pCtx->sCurNalHead, pDstNal, iDstIdx, pSrcNal - 3, iSrcIdx + 3, &iConsumedBytes);
     if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) {
       CheckAndFinishLastPic (pCtx, ppDst, pDstBufInfo);
@@ -557,8 +562,7 @@
       }
       return pCtx->iErrorCode;
     }
-    pDstNal += iDstIdx;
-    pRawData->pCurPos = pDstNal; //init the pCurPos for next NAL(s) storage
+    pRawData->pCurPos = pDstNal + iDstIdx + 4; //init, increase 4 reserved zero bytes, used to store the next NAL
   } else { /* no supplementary picture payload input, but stored a picture */
     PAccessUnit pCurAu	=
       pCtx->pAccessUnitList;	// current access unit, it will never point to NULL after decode's successful initialization
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -707,8 +707,11 @@
   }
 
   if (pPps->bEntropyCodingModeFlag) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): CABAC in Enhancement layer not supported.");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_CABAC_EL);
+    if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+      pSliceHead->iCabacInitIdc = uiCode;
+    } else
+      pSliceHead->iCabacInitIdc = 0;
   }
 
   WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta
@@ -1021,6 +1024,10 @@
                            "pCtx->sMb.pLumaQp[]");
     pCtx->sMb.pChromaQp[i] = (int8_t*)WelsMalloc (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
                              "pCtx->sMb.pChromaQp[]");
+    pCtx->sMb.pMvd[i][0] = (int16_t (*)[16][2])WelsMalloc (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                             int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
+    pCtx->sMb.pCbfDc[i] = (uint8_t*)WelsMalloc (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint8_t),
+                          "pCtx->sMb.pCbfDc[]");
     pCtx->sMb.pNzc[i] = (int8_t (*)[24])WelsMalloc (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
                         "pCtx->sMb.pNzc[]");
     pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])WelsMalloc (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
@@ -1057,6 +1064,8 @@
                             (NULL == pCtx->sMb.pRefIndex[i][0]) ||
                             (NULL == pCtx->sMb.pLumaQp[i]) ||
                             (NULL == pCtx->sMb.pChromaQp[i]) ||
+                            (NULL == pCtx->sMb.pMvd[i][0]) ||
+                            (NULL == pCtx->sMb.pCbfDc[i]) ||
                             (NULL == pCtx->sMb.pNzc[i]) ||
                             (NULL == pCtx->sMb.pNzcRs[i]) ||
                             (NULL == pCtx->sMb.pScaledTCoeff[i]) ||
@@ -1076,7 +1085,6 @@
     ++ i;
   } while (i < LAYER_NUM_EXCHANGEABLE);
 
-
   pCtx->bInitialDqLayersMem	= true;
   pCtx->iPicWidthReq			= kiMaxWidth;
   pCtx->iPicHeightReq			= kiMaxHeight;
@@ -1124,6 +1132,16 @@
       pCtx->sMb.pChromaQp[i] = NULL;
     }
 
+    if (pCtx->sMb.pMvd[i][0]) {
+      WelsFree (pCtx->sMb.pMvd[i][0], "pCtx->sMb.pMvd[][]");
+      pCtx->sMb.pMvd[i][0] = NULL;
+    }
+
+    if (pCtx->sMb.pCbfDc[i]) {
+      WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]");
+      pCtx->sMb.pCbfDc[i] = NULL;
+    }
+
     if (pCtx->sMb.pNzc[i]) {
       WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]");
 
@@ -1754,6 +1772,8 @@
     pCurDq->pRefIndex[0]    = pCtx->sMb.pRefIndex[0][0];
     pCurDq->pLumaQp         = pCtx->sMb.pLumaQp[0];
     pCurDq->pChromaQp       = pCtx->sMb.pChromaQp[0];
+    pCurDq->pMvd[0]       = pCtx->sMb.pMvd[0][0];
+    pCurDq->pCbfDc       = pCtx->sMb.pCbfDc[0];
     pCurDq->pNzc			= pCtx->sMb.pNzc[0];
     pCurDq->pNzcRs			= pCtx->sMb.pNzcRs[0];
     pCurDq->pScaledTCoeff   = pCtx->sMb.pScaledTCoeff[0];
--- a/codec/decoder/core/src/decoder_data_tables.cpp
+++ b/codec/decoder/core/src/decoder_data_tables.cpp
@@ -54,6 +54,56 @@
 // extern at wels_common_basis.h
 
 /*common use table*/
+const uint8_t g_kMbNonZeroCountIdx[24] = {
+  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
+  0,  1,  4,  5,   //  2   3 | 6  7        0  |  1                  0   1   2   3
+  2,  3,  6,  7,   //---------------      ---------                 4   5   6   7
+  8,  9, 12, 13,   //  8   9 | 12 13       2  |  3                  8   9  10  11
+  10, 11, 14, 15,   // 10  11 | 14 15-----------------------------> 12  13  14  15
+  16, 17, 20, 21,   //----------------    chroma 8*8 block          16  17  18  19
+  18, 19, 22, 23   // 16  17 | 20 21        0    1                 20  21  22  23
+};
+//cache element equal to 26
+
+const uint8_t g_kCacheNzcScanIdx[4 * 4 + 4 + 4 + 3] = {
+  /* Luma */
+  9, 10, 17, 18,	// 1+1*8, 2+1*8, 1+2*8, 2+2*8,
+  11, 12, 19, 20,	// 3+1*8, 4+1*8, 3+2*8, 4+2*8,
+  25, 26, 33, 34,	// 1+3*8, 2+3*8, 1+4*8, 2+4*8,
+  27, 28, 35, 36,	// 3+3*8, 4+3*8, 3+4*8, 4+4*8,
+  /* Cb */
+  14, 15,			// 6+1*8, 7+1*8,
+  22, 23,			// 6+2*8, 7+2*8,
+
+  /* Cr */
+  38, 39,			// 6+4*8, 7+4*8,
+  46, 47,			// 6+5*8, 7+5*8,
+  /* Luma DC */
+  41,   // 1+5*8
+  /* Chroma DC */
+  42, 43 // 2+5*8, 3+5*8,
+};
+
+const uint8_t g_kCache26ScanIdx[16] = { //intra4*4_pred_mode and pNonZeroCount cache scan index, 4*4 block as basic unit
+  6,  7, 11, 12,
+  8,  9, 13, 14,
+  16, 17, 21, 22,
+  18, 19, 23, 24
+};
+
+//cache element equal to 30
+const uint8_t g_kCache30ScanIdx[16] = { //mv or pRefIndex cache scan index, 4*4 block as basic unit
+  7,  8, 13, 14,
+  9, 10, 15, 16,
+  19, 20, 25, 26,
+  21, 22, 27, 28
+};
+
+const uint8_t g_kNonZeroScanIdxC[4] = { //pNonZeroCount cache for chroma, 4*4 block as basic unit
+  4, 5,
+  7, 8
+};
+
 const uint8_t g_kuiScan8[24] = {	// [16 + 2*4]
   9, 10, 17, 18,	// 1+1*8, 2+1*8, 1+2*8, 2+2*8,
   11, 12, 19, 20,	// 3+1*8, 4+1*8, 3+2*8, 4+2*8,
--- /dev/null
+++ b/codec/decoder/core/src/parse_mb_syn_cabac.cpp
@@ -1,0 +1,909 @@
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *	cabac_decoder.cpp:	cabac parse for syntax elements
+ */
+#include "parse_mb_syn_cabac.h"
+#include "mv_pred.h"
+#include "error_code.h"
+namespace WelsDec {
+#define IDX_UNUSED -1
+static const int16_t g_kMaxPos       [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 3, 3, 14, 14};
+static const int16_t g_kMaxC2       [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 3, 3, 4, 4};
+static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 12, 12, 16, 16};
+static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
+static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 30, 30, 39, 39};
+
+const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23
+  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
+  0,  0,  1,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
+  0,  0,  1,  1,   //---------------      ---------                 4   5   6   7
+  1,  1,  1,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
+  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
+  0,  0,  1,  1,   //----------------    chroma 8*8 block          16  17  18  19
+  0,  0,  1,  1   // 16  17 | 20 21        0    1                 20  21  22  23
+  // 18  19 | 22 23
+};
+
+const uint8_t g_kLeftBlkInsideMb[24] = { //for index with z-order 0~23
+  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
+  0,  1,  0,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
+  1,  1,  1,  1,   //---------------      ---------                 4   5   6   7
+  0,  1,  0,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
+  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
+  0,  1,  0,  1,   //----------------    chroma 8*8 block          16  17  18  19
+  0,  1,  0,  1   // 16  17 | 20 21        0    1                 20  21  22  23
+  // 18  19 | 22 23
+};
+
+void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                             const int8_t iListIdx) {
+  int32_t iRef32Bit = (int32_t) iRef;
+  const int32_t iRef4Bytes = (iRef32Bit << 24) | (iRef32Bit << 16) | (iRef32Bit << 8) | iRef32Bit;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+  const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+  const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+  const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+  //mb
+  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
+  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
+  //cache
+  ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes);
+  ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes);
+}
+
+void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                             const int8_t iListIdx) {
+  int16_t iRef16Bit = (int16_t) iRef;
+  const int16_t iRef2Bytes = (iRef16Bit << 8) | iRef16Bit;
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 2; i++, iPartIdx += 8) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
+    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
+    //cache
+    ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes);
+    ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes);
+  }
+}
+
+void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                            const int8_t iListIdx) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+  pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] =
+        pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef;
+}
+
+void UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 16; i += 2) {
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][i], LD64 (pMvd32));
+  }
+}
+
+void UpdateP16x8MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
+                          const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 2; i++, iPartIdx += 4) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
+    //cache
+    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
+    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
+  }
+}
+
+void UpdateP8x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
+                          const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+
+  for (i = 0; i < 2; i++, iPartIdx += 8) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
+    //cache
+    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
+    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
+  }
+}
+
+int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal) {
+  uiBinVal = 0;
+  WELS_READ_VERIFY (DecodeTerminateCabac (pCtx->pCabacDecEngine, uiBinVal));
+  return ERR_NONE;
+}
+
+int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip) {
+  uiSkip = 0;
+  int32_t iCtxInc = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_SKIP) + (pNeighAvail->iTopAvail
+                    && pNeighAvail->iTopType  != MB_TYPE_SKIP);
+  PWelsCabacCtx pBinCtx = (pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP + iCtxInc);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx, uiSkip));
+  return ERR_NONE;
+}
+
+
+int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal) {
+  uint32_t uiCode;
+  int32_t iIdxA = 0, iIdxB = 0;
+  int32_t iCtxInc;
+  uiBinVal = 0;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MB_TYPE_I; //I mode in I slice
+  iIdxA = (pNeighAvail->iLeftAvail) && (pNeighAvail->iLeftType != MB_TYPE_INTRA4x4
+                                        && pNeighAvail->iLeftType != MB_TYPE_INTRA8x8);
+  iIdxB = (pNeighAvail->iTopAvail) && (pNeighAvail->iTopType != MB_TYPE_INTRA4x4
+                                       && pNeighAvail->iTopType != MB_TYPE_INTRA8x8);
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  uiBinVal = uiCode;
+  if (uiBinVal != 0) {  //I16x16
+    WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
+    if (uiCode == 1)
+      uiBinVal = 25; //I_PCM
+    else {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+      uiBinVal = 1 + uiCode * 12;
+      //decoding of uiCbp:0,1,2
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
+      if (uiCode != 0) {
+        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+        uiBinVal += 4;
+        if (uiCode != 0)
+          uiBinVal += 4;
+      }
+      //decoding of I pred-mode: 0,1,2,3
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+      uiBinVal += (uiCode << 1);
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
+      uiBinVal += uiCode;
+    }
+  }
+  //I4x4
+  return ERR_NONE;
+}
+
+int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) {
+  uint32_t uiCode;
+  uiMbType = 0;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  if (uiCode) {
+    // Intra MB
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+    if (uiCode) { // Intra 16x16
+      WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
+      if (uiCode) {
+        uiMbType = 30;
+        return ERR_NONE;//MB_TYPE_INTRA_PCM;
+      }
+
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
+      uiMbType = 6 + uiCode * 12;
+
+      //uiCbp: 0,1,2
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
+      if (uiCode) {
+        uiMbType += 4;
+        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
+        if (uiCode)
+          uiMbType += 4;
+      }
+
+      //IPredMode: 0,1,2,3
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
+      uiMbType += (uiCode << 1);
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
+      uiMbType += uiCode;
+    } else
+      // Intra 4x4
+      uiMbType = 5;
+  } else { // P MB
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
+    if (uiCode) { //second bit
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+      if (uiCode)
+        uiMbType = 1;
+      else
+        uiMbType = 2;
+    } else {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      if (uiCode)
+        uiMbType = 3;
+      else
+        uiMbType = 0;
+    }
+  }
+  return ERR_NONE;
+}
+int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
+  uint32_t uiCode;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SUBMB_TYPE;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode));
+  if (uiCode)
+    uiSubMbType = 0;
+  else {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode));
+    if (uiCode) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
+      uiSubMbType = 3 - uiCode;
+    } else {
+      uiSubMbType = 1;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal) {
+  uint32_t uiCode;
+  iBinVal = 0;
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR, uiCode));
+  if (uiCode == 1)
+    iBinVal = -1;
+  else {
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= uiCode;
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= (uiCode << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= (uiCode << 2);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal) {
+  uint32_t uiCode;
+  int32_t iIdxA, iIdxB, iCtxInc;
+  int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode;
+  int8_t* pMbType = pCtx->pCurDqLayer->pMbType;
+  int32_t iLeftAvail     = uiNeighAvail & 0x04;
+  int32_t iTopAvail      = uiNeighAvail & 0x01;
+
+  int32_t iMbXy = pCtx->pCurDqLayer->iMbXyIndex;
+  int32_t iMbXyTop = iMbXy - pCtx->pCurDqLayer->iMbWidth;
+  int32_t iMbXyLeft = iMbXy - 1;
+
+  iBinVal = 0;
+
+  iIdxB = iTopAvail  && (pChromaPredMode[iMbXyTop] > 0 && pChromaPredMode[iMbXyTop] <= 3)
+          && pMbType[iMbXyTop]  != MB_TYPE_INTRA_PCM;
+  iIdxA = iLeftAvail && (pChromaPredMode[iMbXyLeft] > 0 && pChromaPredMode[iMbXyLeft] <= 3)
+          && pMbType[iMbXyLeft] != MB_TYPE_INTRA_PCM;
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + iCtxInc, uiCode));
+  iBinVal = uiCode;
+  if (iBinVal != 0) {
+    uint32_t iSym;
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, iSym));
+    if (iSym == 0) {
+      iBinVal = (iSym + 1);
+      return ERR_NONE;
+    }
+    iSym = 0;
+    do {
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, uiCode));
+      ++iSym;
+    } while ((uiCode != 0) && (iSym < 1));
+
+    if ((uiCode != 0) && (iSym == 1))
+      ++ iSym;
+    iBinVal = (iSym + 1);
+    return ERR_NONE;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseInterMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                   int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]) {
+  PSlice pSlice				= &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader	= &pSlice->sSliceHeaderExt.sSliceHeader;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  int32_t pRefCount[2];
+  int32_t i, j;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int16_t pMv[4] = {0};
+  int16_t pMvd[4] = {0};
+  int8_t iRef[2] = {0};
+  int32_t iPartIdx;
+  int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
+  int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
+  pRefCount[0] = pSliceHeader->uiRefCount[0];
+  pRefCount[1] = pSliceHeader->uiRefCount[1];
+
+  switch (pCurDqLayer->pMbType[iMbXy]) {
+  case MB_TYPE_16x16: {
+    iPartIdx = 0;
+    WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
+                                        iRef[0]));
+    if ((iRef[0] < 0) || (iRef[0] >= pRefCount[0]) || (ppRefPic[iRef[0]] == NULL)) { //error ref_idx
+      if (pCtx->eErrorConMethod != ERROR_CON_DISABLE) {
+        iRef[0] = 0;
+        pCtx->iErrorCode |= dsBitstreamError;
+      } else {
+        return ERR_INFO_INVALID_REF_INDEX;
+      }
+    }
+    PredMv (pMotionVector, pRefIndex, 0, 4, iRef[0], pMv);
+    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+    pMv[0] += pMvd[0];
+    pMv[1] += pMvd[1];
+    WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+    UpdateP16x16MotionInfo (pCurDqLayer, iRef[0], pMv);
+    UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
+  }
+  break;
+  case MB_TYPE_16x8:
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 3;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
+                                          iRef[i]));
+      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
+        if (pCtx->eErrorConMethod != ERROR_CON_DISABLE) {
+          iRef[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return ERR_INFO_INVALID_REF_INDEX;
+        }
+      }
+      UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
+    }
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 3;
+      PredInter16x8Mv (pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+      pMv[0] += pMvd[0];
+      pMv[1] += pMvd[1];
+      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
+      UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
+    }
+    break;
+  case MB_TYPE_8x16:
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 2;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
+                                          iRef[i]));
+      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
+        if (pCtx->eErrorConMethod != ERROR_CON_DISABLE) {
+          iRef[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return ERR_INFO_INVALID_REF_INDEX;
+        }
+      }
+      UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
+    }
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 2;
+      PredInter8x16Mv (pMotionVector, pRefIndex, i << 2, iRef[i], pMv/*&mv[0], &mv[1]*/);
+
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+      pMv[0] += pMvd[0];
+      pMv[1] += pMvd[1];
+      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
+      UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
+    }
+    break;
+  case MB_TYPE_8x8:
+  case MB_TYPE_8x8_REF0: {
+    int8_t pRefIdx[4] = {0}, pSubPartCount[4], pPartW[4];
+    uint32_t uiSubMbType;
+    //sub_mb_type, partition
+    for (i = 0; i < 4; i++) {
+      WELS_READ_VERIFY (ParseSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType));
+      if (uiSubMbType >= 4) { //invalid sub_mb_type
+        return ERR_INFO_INVALID_SUB_MB_TYPE;
+      }
+      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
+      pSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
+      pPartW[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
+    }
+
+    for (i = 0; i < 4; i++) {
+      int16_t iIdx8 = i << 2;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iIdx8, pRefCount[0], 1,
+                                          pRefIdx[i]));
+      if ((pRefIdx[i] < 0) || (pRefIdx[i] >= pRefCount[0]) || (ppRefPic[pRefIdx[i]] == NULL)) { //error ref_idx
+        if (pCtx->eErrorConMethod != ERROR_CON_DISABLE) {
+          pRefIdx[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return ERR_INFO_INVALID_REF_INDEX;
+        }
+      }
+      UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0);
+    }
+    //mv
+    for (i = 0; i < 4; i++) {
+      int8_t iPartCount = pSubPartCount[i];
+      uiSubMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+      int16_t iPartIdx, iBlockW = pPartW[i];
+      uint8_t iScan4Idx, iCacheIdx;
+      iCacheIdx = g_kuiCache30ScanIdx[i << 2];
+      pRefIndex[0][iCacheIdx ] = pRefIndex[0][iCacheIdx + 1]
+                                 = pRefIndex[0][iCacheIdx + 6] = pRefIndex[0][iCacheIdx + 7] = pRefIdx[i];
+
+      for (j = 0; j < iPartCount; j++) {
+        iPartIdx = (i << 2) + j * iBlockW;
+        iScan4Idx = g_kuiScan4[iPartIdx];
+        iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+        PredMv (pMotionVector, pRefIndex, iPartIdx, iBlockW, pRefIdx[i], pMv);
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+        pMv[0] += pMvd[0];
+        pMv[1] += pMvd[1];
+        WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        if (SUB_MB_TYPE_8x8 == uiSubMbType) {
+          ST32 ((pMv + 2), LD32 (pMv));
+          ST32 ((pMvd + 2), LD32 (pMvd));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd));
+          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
+          ST64 (pMotionVector[0][iCacheIdx + 6], LD64 (pMv));
+          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
+          ST64 (pMvdCache[0][iCacheIdx + 6], LD64 (pMvd));
+        } else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
+          ST32 ((pMv + 2), LD32 (pMv));
+          ST32 ((pMvd + 2), LD32 (pMvd));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD64 (pMv));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD64 (pMvd));
+          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
+          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
+        } else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd));
+          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
+          ST32 (pMotionVector[0][iCacheIdx + 6], LD32 (pMv));
+          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
+          ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd));
+        } else {  //SUB_MB_TYPE_4x4
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
+          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
+          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
+        }
+      }
+    }
+  }
+  break;
+  default:
+    break;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc,
+                          int8_t ref_idx[LIST_A][30],
+                          int32_t iListIdx, int32_t iZOrderIdx, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal) {
+  if (iActiveRefNum == 1) {
+    iRefIdxVal = 0;
+    return ERR_NONE;
+  }
+  uint32_t uiCode;
+  int32_t iIdxA = 0, iIdxB = 0;
+  int32_t iCtxInc;
+  int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[LIST_0][pCtx->pCurDqLayer->iMbXyIndex];
+  if (iZOrderIdx == 0) {
+    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
+    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
+  } else if (iZOrderIdx == 4) {
+    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
+    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
+  } else if (iZOrderIdx == 8) {
+    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
+    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
+  } else {
+    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
+    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
+  }
+
+  iCtxInc = iIdxA + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + iCtxInc, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + 4, 1, uiCode));
+    ++uiCode;
+  }
+  iRefIdxVal = (int8_t) uiCode;
+  return ERR_NONE;
+}
+
+int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30],
+                           int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal) {
+  uint32_t uiCode;
+  int32_t iIdxA = 0;
+  //int32_t sym;
+  int32_t iCtxInc;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MVD + iMvComp * CTX_NUM_MVD;
+  iMvdVal = 0;
+  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 6] >= 0)
+    iIdxA = WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 6][iMvComp]);
+  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 1] >= 0)
+    iIdxA += WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 1][iMvComp]);
+
+  if (iIdxA < 3)
+    iCtxInc = 0;
+  else if (iIdxA > 32)
+    iCtxInc = 2;
+  else
+    iCtxInc = 1;
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,  pBinCtx + iCtxInc, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeUEGMvCabac (pCtx->pCabacDecEngine, pBinCtx + 3, 3, uiCode));
+    iMvdVal = (int16_t) (uiCode + 1);
+    WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
+    if (uiCode) {
+      iMvdVal = -iMvdVal;
+    }
+  } else {
+    iMvdVal = 0;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiCbp) {
+  int32_t iIdxA = 0, iIdxB = 0, pALeftMb[2], pBTopMb[2];
+  uiCbp = 0;
+  uint32_t pCbpBit[6];
+  int32_t iCtxInc;
+
+  //Luma: bit by bit for 4 8x8 blocks in z-order
+  pBTopMb[0]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iTopCbp  & (1 << 2)) == 0);
+  pBTopMb[1]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iTopCbp  & (1 << 3)) == 0);
+  pALeftMb[0] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iLeftCbp & (1 << 1)) == 0);
+  pALeftMb[1] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iLeftCbp & (1 << 3)) == 0);
+
+  //left_top 8x8 block
+  iCtxInc = pALeftMb[0] + (pBTopMb[0] << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[0]));
+  if (pCbpBit[0])
+    uiCbp += 0x01;
+
+  //right_top 8x8 block
+  iIdxA = !pCbpBit[0];
+  iCtxInc = iIdxA + (pBTopMb[1] << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[1]));
+  if (pCbpBit[1])
+    uiCbp += 0x02;
+
+  //left_bottom 8x8 block
+  iIdxB = !pCbpBit[0];
+  iCtxInc = pALeftMb[1] + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[2]));
+  if (pCbpBit[2])
+    uiCbp += 0x04;
+
+  //right_bottom 8x8 block
+  iIdxB = !pCbpBit[1];
+  iIdxA = !pCbpBit[2];
+  iCtxInc = iIdxA + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[3]));
+  if (pCbpBit[3])
+    uiCbp += 0x08;
+
+  //Chroma: bit by bit
+  iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4));
+  iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4));
+
+  //BitIdx = 0
+  iCtxInc = iIdxA + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + CTX_NUM_CBP + iCtxInc,
+                                    pCbpBit[4]));
+
+  //BitIdx = 1
+  if (pCbpBit[4]) {
+    iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4) == 2);
+    iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4) == 2);
+    iCtxInc = iIdxA + (iIdxB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + 2 * CTX_NUM_CBP + iCtxInc,
+                                      pCbpBit[5]));
+    uiCbp += 1 << (4 + pCbpBit[5]);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta) {
+  uint32_t uiCode;
+  PSlice pCurrSlice = & (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer);
+  iQpDelta = 0;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_DELTA_QP;
+  int32_t iCtxInc = (pCurrSlice->iLastDeltaQp != 0);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  if (uiCode != 0) {
+    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pBinCtx + 2, 1, uiCode));
+    uiCode++;
+    iQpDelta = (uiCode + 1) >> 1;
+    if ((uiCode & 1) == 0)
+      iQpDelta = - iQpDelta;
+  }
+  pCurrSlice->iLastDeltaQp = iQpDelta;
+  return ERR_NONE;
+}
+
+int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t iZIndex, int32_t iResProperty,
+                           PWelsDecoderContext pCtx, uint32_t& uiCbfBit) {
+  int8_t nA, nB/*, zigzag_idx = 0*/;
+  int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex;
+  int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
+  int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
+  uint8_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
+  int8_t* pMbType = pCtx->pCurDqLayer->pMbType;
+  int32_t iCtxInc;
+  uiCbfBit = 0;
+  nA = nB = IS_INTRA (pMbType[iCurrBlkXy]);
+
+  if (iResProperty == I16_LUMA_DC || iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) { //DC
+    if (pNeighAvail->iTopAvail)
+      nB = (pMbType[iTopBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iTopBlkXy] >> iResProperty) & 1);
+    if (pNeighAvail->iLeftAvail)
+      nA = (pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iLeftBlkXy] >> iResProperty) & 1);
+    iCtxInc = nA + (nB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
+    if (uiCbfBit)
+      pCbfDc[iCurrBlkXy] |= (1 << iResProperty);
+  } else { //AC
+    //for 4x4 blk, make sure blk-idx is correct
+    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] != 0xff) { //top blk available
+      if (g_kTopBlkInsideMb[iZIndex])
+        iTopBlkXy = iCurrBlkXy;
+      nB = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] || pMbType[iTopBlkXy]  == MB_TYPE_INTRA_PCM;
+    }
+    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] != 0xff) { //left blk available
+      if (g_kLeftBlkInsideMb[iZIndex])
+        iLeftBlkXy = iCurrBlkXy;
+      nA = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] || pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM;
+    }
+
+    iCtxInc = nA + (nB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
+                                  uint32_t& uiCoeffNum) {
+  uint32_t uiCode;
+  PWelsCabacCtx pMapCtx  = pCtx->pCabacCtx + NEW_CTX_OFFSET_MAP + g_kBlockCat2CtxOffsetMap [iResProperty];
+  PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_LAST + g_kBlockCat2CtxOffsetLast[iResProperty];
+
+  int32_t i;
+  uiCoeffNum = 0;
+  int32_t i0 = 0;
+  int32_t i1 = g_kMaxPos[iResProperty];
+
+  for (i = i0; i < i1; ++i) {
+    //read significant
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + i, uiCode));
+    if (uiCode) {
+      * (pSignificantMap++) = 1;
+      ++ uiCoeffNum;
+      //read last significant
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + i, uiCode));
+      if (uiCode) {
+        memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t));
+        return ERR_NONE;
+      }
+    } else
+      * (pSignificantMap++) = 0;
+  }
+
+  //deal with last pSignificantMap if no data
+  //if(i < i1+1)
+  {
+    *pSignificantMap = 1;
+    ++uiCoeffNum;
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) {
+  uint32_t uiCode;
+  PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ONE + g_kBlockCat2CtxOffsetOne[iResProperty];
+  PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_ABS + g_kBlockCat2CtxOffsetAbs[iResProperty];
+  const int16_t iMaxType = g_kMaxC2[iResProperty];
+  int32_t i = g_kMaxPos[iResProperty];
+  int32_t* pCoff = pSignificant + i;
+  int32_t c1 = 1;
+  int32_t c2 = 0;
+  for (; i >= 0; --i) {
+    if (*pCoff != 0) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pOneCtx + c1, uiCode));
+      *pCoff += uiCode;
+      if (*pCoff == 2) {
+        WELS_READ_VERIFY (DecodeUEGLevelCabac (pCtx->pCabacDecEngine, pAbsCtx + c2, uiCode));
+        *pCoff += uiCode;
+        ++c2;
+        c2 = WELS_MIN (c2, iMaxType);
+        c1 = 0;
+      } else if (c1) {
+        ++c1;
+        c1 = WELS_MIN (c1, 4);
+      }
+      WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
+      if (uiCode)
+        *pCoff = - *pCoff;
+    }
+    pCoff--;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+                                 int32_t iIndex, int32_t iMaxNumCoeff,
+                                 const uint8_t* pScanTable, int32_t iResProperty, short* sTCoeff, /*int mb_mode*/ uint8_t uiQp,
+                                 PWelsDecoderContext pCtx) {
+  int32_t iCurNzCacheIdx;
+  const uint16_t* pDeQuantMul = g_kuiDequantCoeff[uiQp];
+  uint32_t uiTotalCoeffNum = 0;
+  uint32_t uiCbpBit;
+  int32_t pSignificantMap[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  WELS_READ_VERIFY (ParseCbfInfoCabac (pNeighAvail, pNonZeroCountCache, iIndex, iResProperty, pCtx, uiCbpBit));
+  if (uiCbpBit) { //has coeff
+    WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
+    WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
+  }
+
+  iCurNzCacheIdx = g_kCacheNzcScanIdx[iIndex];
+  pNonZeroCountCache[iCurNzCacheIdx] = (uint8_t)uiTotalCoeffNum;
+  if (uiTotalCoeffNum == 0) {
+    return ERR_NONE;
+  }
+  int32_t j = 0;
+  if (iResProperty == I16_LUMA_DC) {
+    do {
+      if (pSignificantMap[j] != 0)
+        sTCoeff[pScanTable[j]] = pSignificantMap[j];
+      ++j;
+    } while (j < 16);
+  } else if (iResProperty == CHROMA_DC_U) {
+    do {
+      if (pSignificantMap[j] != 0)
+        sTCoeff[pScanTable[j]] = pSignificantMap[j] * pDeQuantMul[0];
+      ++j;
+    } while (j < 16);
+  } else { //luma ac, chroma ac
+    do {
+      if (pSignificantMap[j] != 0)
+        sTCoeff[pScanTable[j]] = pSignificantMap[j] * pDeQuantMul[pScanTable[j] & 0x07];
+      ++j;
+    } while (j < 16);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
+  int32_t i;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux;
+  SDqLayer* pCurLayer = pCtx->pCurDqLayer;
+  int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0];
+  int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1];
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+  int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4;
+  int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3;
+
+  uint8_t* pMbDstY = pCtx->pDec->pData[0] + iMbOffsetLuma;
+  uint8_t* pMbDstU = pCtx->pDec->pData[1] + iMbOffsetChroma;
+  uint8_t* pMbDstV = pCtx->pDec->pData[2] + iMbOffsetChroma;
+
+  uint8_t* pPtrSrc;
+
+  pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+  RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux);
+  intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf;
+  if (iBytesLeft < 384) {
+    return ERR_CABAC_NO_BS_TO_READ;
+  }
+  pPtrSrc = pBsAux->pCurBuf;
+  for (i = 0; i < 16; i++) {   //luma
+    memcpy (pMbDstY , pPtrSrc, 16);
+    pMbDstY += iDstStrideLuma;
+    pPtrSrc += 16;
+  }
+  for (i = 0; i < 8; i++) {   //cb
+    memcpy (pMbDstU, pPtrSrc, 8);
+    pMbDstU += iDstStrideChroma;
+    pPtrSrc += 8;
+  }
+  for (i = 0; i < 8; i++) {   //cr
+    memcpy (pMbDstV, pPtrSrc, 8);
+    pMbDstV += iDstStrideChroma;
+    pPtrSrc += 8;
+  }
+
+  pBsAux->pCurBuf += 384;
+
+  pCurLayer->pLumaQp[iMbXy] = 0;
+  pCurLayer->pChromaQp[iMbXy] = 0;
+  memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));
+
+  //step 4: cabac engine init
+  WELS_READ_VERIFY (InitReadBits (pBsAux, 1));
+  WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux));
+  return ERR_NONE;
+}
+}
--- a/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -45,7 +45,7 @@
 
 namespace WelsDec {
 #define MAX_LEVEL_PREFIX 15
-void GetNeighborAvailMbType (PNeighAvail pNeighAvail, PDqLayer pCurLayer) {
+void GetNeighborAvailMbType (PWelsNeighAvail pNeighAvail, PDqLayer pCurLayer) {
   int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
   int32_t iCurXy, iTopXy = 0, iLeftXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
   int32_t iCurX, iCurY;
@@ -58,9 +58,11 @@
     iLeftXy = iCurXy - 1;
     iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
     pNeighAvail->iLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
+    pNeighAvail->iLeftCbp   = pNeighAvail->iLeftAvail ? pCurLayer->pCbp[iLeftXy] : 0;
   } else {
     pNeighAvail->iLeftAvail = 0;
     pNeighAvail->iLeftTopAvail = 0;
+    pNeighAvail->iLeftCbp = 0;
   }
 
   if (iCurY != 0) {
@@ -67,6 +69,7 @@
     iTopXy = iCurXy - pCurLayer->iMbWidth;
     iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
     pNeighAvail->iTopAvail = (iTopSliceIdc == iCurSliceIdc);
+    pNeighAvail->iTopCbp   = pNeighAvail->iTopAvail ? pCurLayer->pCbp[iTopXy] : 0;
     if (iCurX != 0) {
       iLeftTopXy = iTopXy - 1;
       iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
@@ -85,6 +88,7 @@
     pNeighAvail->iTopAvail = 0;
     pNeighAvail->iLeftTopAvail = 0;
     pNeighAvail->iRightTopAvail = 0;
+    pNeighAvail->iTopCbp   = 0;
   }
 
   pNeighAvail->iLeftType     = (pNeighAvail->iLeftAvail     ? pCurLayer->pMbType[iLeftXy]     : 0);
@@ -92,14 +96,11 @@
   pNeighAvail->iLeftTopType  = (pNeighAvail->iLeftTopAvail  ? pCurLayer->pMbType[iLeftTopXy]  : 0);
   pNeighAvail->iRightTopType = (pNeighAvail->iRightTopAvail ? pCurLayer->pMbType[iRightTopXy] : 0);
 }
-void WelsFillCacheNonZeroCount (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+void WelsFillCacheNonZeroCount (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
                                 PDqLayer pCurLayer) { //no matter slice type, intra_pred_constrained_flag
   int32_t iCurXy  = pCurLayer->iMbXyIndex;
   int32_t iTopXy  = 0;
   int32_t iLeftXy = 0;
-
-  GetNeighborAvailMbType (pNeighAvail, pCurLayer);
-
   if (pNeighAvail->iTopAvail) {
     iTopXy = iCurXy - pCurLayer->iMbWidth;
   }
@@ -143,7 +144,7 @@
       pNonZeroCount[5 + 8 * 5] = -1;//unavailable
   }
 }
-void WelsFillCacheConstrain1Intra4x4 (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain1Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
                                       PDqLayer pCurLayer) { //no matter slice type
   int32_t iCurXy  = pCurLayer->iMbXyIndex;
   int32_t iTopXy  = 0;
@@ -189,7 +190,7 @@
   }
 }
 
-void WelsFillCacheConstrain0Intra4x4 (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
+void WelsFillCacheConstrain0Intra4x4 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
                                       PDqLayer pCurLayer) { //no matter slice type
   int32_t iCurXy  = pCurLayer->iMbXyIndex;
   int32_t iTopXy  = 0;
@@ -235,8 +236,8 @@
   }
 }
 
-void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
-                         int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) {
+void WelsFillCacheInterCabac(PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer)
+{
   int32_t iCurXy      = pCurLayer->iMbXyIndex;
   int32_t iTopXy      = 0;
   int32_t iLeftXy     = 0;
@@ -265,6 +266,12 @@
     ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
     ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
     ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
+
+    ST32(iMvdCache[0][ 6], LD32(pCurLayer->pMvd[0][iLeftXy][ 3]));
+    ST32(iMvdCache[0][12], LD32(pCurLayer->pMvd[0][iLeftXy][ 7]));
+    ST32(iMvdCache[0][18], LD32(pCurLayer->pMvd[0][iLeftXy][11]));
+    ST32(iMvdCache[0][24], LD32(pCurLayer->pMvd[0][iLeftXy][15]));
+
     iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
     iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
     iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
@@ -275,8 +282,136 @@
     ST32 (iMvArray[0][18], 0);
     ST32 (iMvArray[0][24], 0);
 
+    ST32(iMvdCache[0][ 6], 0);
+    ST32(iMvdCache[0][12], 0);
+    ST32(iMvdCache[0][18], 0);
+    ST32(iMvdCache[0][24], 0);
+
+
     if (0 == pNeighAvail->iLeftAvail) { //not available
       iRefIdxArray[0][ 6] =
+      iRefIdxArray[0][12] =
+      iRefIdxArray[0][18] =
+      iRefIdxArray[0][24] = REF_NOT_AVAIL;
+    } else { //available but is intra mb type
+      iRefIdxArray[0][ 6] =
+      iRefIdxArray[0][12] =
+      iRefIdxArray[0][18] =
+      iRefIdxArray[0][24] = REF_NOT_IN_LIST;
+    }
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
+    ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
+    ST32(iMvdCache[0][0], LD32(pCurLayer->pMvd[0][iLeftTopXy][15]));
+    iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15];
+  } else {
+    ST32 (iMvArray[0][0], 0);
+    ST32(iMvdCache[0][0], 0);
+    if (0 == pNeighAvail->iLeftTopAvail) { //not available
+      iRefIdxArray[0][0] = REF_NOT_AVAIL;
+    } else { //available but is intra mb type
+      iRefIdxArray[0][0] = REF_NOT_IN_LIST;
+    }
+  }
+
+  if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
+    ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
+    ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
+    ST64(iMvdCache[0][1], LD64(pCurLayer->pMvd[0][iTopXy][12]));
+    ST64(iMvdCache[0][3], LD64(pCurLayer->pMvd[0][iTopXy][14]));
+    ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12]));
+  } else {
+    ST64 (iMvArray[0][1], 0);
+    ST64 (iMvArray[0][3], 0);
+    ST64(iMvdCache[0][1], 0);
+    ST64(iMvdCache[0][3], 0);
+    if (0 == pNeighAvail->iTopAvail) { //not available
+      iRefIdxArray[0][1] =
+      iRefIdxArray[0][2] =
+      iRefIdxArray[0][3] =
+      iRefIdxArray[0][4] = REF_NOT_AVAIL;
+    } else { //available but is intra mb type
+      iRefIdxArray[0][1] =
+      iRefIdxArray[0][2] =
+      iRefIdxArray[0][3] =
+      iRefIdxArray[0][4] = REF_NOT_IN_LIST;
+    }
+  }
+
+  if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
+    ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
+    ST32(iMvdCache[0][5], LD32(pCurLayer->pMvd[0][iRightTopXy][12]));
+    iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
+  } else {
+    ST32 (iMvArray[0][5], 0);
+    if (0 == pNeighAvail->iRightTopAvail) { //not available
+      iRefIdxArray[0][5] = REF_NOT_AVAIL;
+    } else { //available but is intra mb type
+      iRefIdxArray[0][5] = REF_NOT_IN_LIST;
+    }
+  }
+
+  //right-top 4*4 block unavailable
+  ST32 (iMvArray[0][ 9], 0);
+  ST32 (iMvArray[0][21], 0);
+  ST32 (iMvArray[0][11], 0);
+  ST32 (iMvArray[0][17], 0);
+  ST32 (iMvArray[0][23], 0);
+  ST32(iMvdCache[0][ 9], 0);
+  ST32(iMvdCache[0][21], 0);
+  ST32(iMvdCache[0][11], 0);
+  ST32(iMvdCache[0][17], 0);
+  ST32(iMvdCache[0][23], 0);
+  iRefIdxArray[0][ 9] =
+  iRefIdxArray[0][21] =
+  iRefIdxArray[0][11] =
+  iRefIdxArray[0][17] =
+  iRefIdxArray[0][23] = REF_NOT_AVAIL;
+}
+
+void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                         int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer)
+{
+  int32_t iCurXy      = pCurLayer->iMbXyIndex;
+  int32_t iTopXy      = 0;
+  int32_t iLeftXy     = 0;
+  int32_t iLeftTopXy  = 0;
+  int32_t iRightTopXy = 0;
+
+  //stuff non_zero_coeff_count from pNeighAvail(left and top)
+  WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+
+  if (pNeighAvail->iTopAvail) {
+    iTopXy = iCurXy - pCurLayer->iMbWidth;
+  }
+  if (pNeighAvail->iLeftAvail) {
+    iLeftXy = iCurXy - 1;
+  }
+  if (pNeighAvail->iLeftTopAvail) {
+    iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
+  }
+  if (pNeighAvail->iRightTopAvail) {
+    iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
+  }
+
+  //stuff mv_cache and iRefIdxArray from left and top (inter)
+  if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
+    ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3]));
+    ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
+    ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
+    ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
+    iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
+    iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
+    iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
+    iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15];
+  } else {
+    ST32 (iMvArray[0][ 6], 0);
+    ST32 (iMvArray[0][12], 0);
+    ST32 (iMvArray[0][18], 0);
+    ST32 (iMvArray[0][24], 0);
+
+    if (0 == pNeighAvail->iLeftAvail) { //not available
+      iRefIdxArray[0][ 6] =
         iRefIdxArray[0][12] =
           iRefIdxArray[0][18] =
             iRefIdxArray[0][24] = REF_NOT_AVAIL;
@@ -298,7 +433,6 @@
       iRefIdxArray[0][0] = REF_NOT_IN_LIST;
     }
   }
-
   if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
     ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
     ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
@@ -306,7 +440,6 @@
   } else {
     ST64 (iMvArray[0][1], 0);
     ST64 (iMvArray[0][3], 0);
-
     if (0 == pNeighAvail->iTopAvail) { //not available
       iRefIdxArray[0][1] =
         iRefIdxArray[0][2] =
@@ -319,7 +452,6 @@
             iRefIdxArray[0][4] = REF_NOT_IN_LIST;
     }
   }
-
   if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
     ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
     iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
@@ -331,7 +463,6 @@
       iRefIdxArray[0][5] = REF_NOT_IN_LIST;
     }
   }
-
   //right-top 4*4 block unavailable
   ST32 (iMvArray[0][ 9], 0);
   ST32 (iMvArray[0][21], 0);
@@ -358,9 +489,6 @@
   return iBestMode;
 }
 
-#define MAX_PRED_MODE_ID_I16x16  3
-#define MAX_PRED_MODE_ID_CHROMA  3
-#define MAX_PRED_MODE_ID_I4x4    8
 #define CHECK_I16_MODE(a, b, c, d)                           \
                       ((a == g_ksI16PredInfo[a].iPredMode) &&  \
 					   (b >= g_ksI16PredInfo[a].iLeftAvail) && \
@@ -755,216 +883,6 @@
       j          = kpZigzagTable[ iCoeffNum ];
       pTCoeff[j] = iLevel[i] * kpDequantCoeff[j & 0x07];
     }
-  }
-
-  return 0;
-}
-
-int32_t ParseIntra4x4ModeConstrain0 (PNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
-                                     PDqLayer pCurDqLayer) {
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  if (pNeighAvail->iLeftAvail) {  //left
-    iSampleAvail[ 6] =
-      iSampleAvail[12] =
-        iSampleAvail[18] =
-          iSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail) { //top_left
-    iSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail) { //top
-    iSampleAvail[1] =
-      iSampleAvail[2] =
-        iSampleAvail[3] =
-          iSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail) { //top_right
-    iSampleAvail[5] = 1;
-  }
-
-  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-
-  for (i = 0; i < 16; i++) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //prev_intra4x4_pred_mode_flag[ luma4x4BlkIdx ]
-    const int32_t kiPrevIntra4x4PredMode = uiCode;
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
-
-    int8_t iBestMode;
-    if (kiPrevIntra4x4PredMode) {
-      iBestMode = kiPredMode;
-    } else { //kPrevIntra4x4PredMode == 0
-      WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode)); //rem_intra4x4_pred_mode[ luma4x4BlkIdx ]
-      const int32_t kiRemIntra4x4PredMode = uiCode;
-      if (kiRemIntra4x4PredMode < kiPredMode) {
-        iBestMode = kiRemIntra4x4PredMode;
-      } else {
-        iBestMode = kiRemIntra4x4PredMode + 1;
-      }
-    }
-
-    iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
-    if (iFinalMode  == ERR_INVALID_INTRA4X4_MODE) {
-      return ERR_INFO_INVALID_I4x4_PRED_MODE;
-    }
-
-    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
-
-    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
-
-    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-  if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-  pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  if (CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-
-  return 0;
-}
-
-int32_t ParseIntra4x4ModeConstrain1 (PNeighAvail pNeighAvail, int8_t* pIntraPredMode, PBitStringAux pBs,
-                                     PDqLayer pCurDqLayer) {
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
-    iSampleAvail[ 6] =
-      iSampleAvail[12] =
-        iSampleAvail[18] =
-          iSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
-    iSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
-    iSampleAvail[1] =
-      iSampleAvail[2] =
-        iSampleAvail[3] =
-          iSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
-    iSampleAvail[5] = 1;
-  }
-
-  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-
-  for (i = 0; i < 16; i++) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //prev_intra4x4_pred_mode_flag[ luma4x4BlkIdx ]
-    const int32_t kiPrevIntra4x4PredMode = uiCode; //1bit
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
-
-    int8_t iBestMode;
-    if (kiPrevIntra4x4PredMode) {
-      iBestMode = kiPredMode;
-    } else { //kPrevIntra4x4PredMode == 0
-      WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode)); //rem_intra4x4_pred_mode[ luma4x4BlkIdx ]
-      const int32_t kiRemIntra4x4PredMode = uiCode;
-      if (kiRemIntra4x4PredMode < kiPredMode) {
-        iBestMode = kiRemIntra4x4PredMode;
-      } else {
-        iBestMode = kiRemIntra4x4PredMode + 1;
-      }
-    }
-
-    iFinalMode = CheckIntra4x4PredMode (&iSampleAvail[0], &iBestMode, i);
-    if (iFinalMode  == ERR_INVALID_INTRA4X4_MODE) {
-      return ERR_INFO_INVALID_I4x4_PRED_MODE;
-    }
-
-    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
-
-    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
-
-    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-  if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-  pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  if (CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-
-  return 0;
-}
-
-int32_t ParseIntra16x16ModeConstrain0 (PNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer) {
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
-  uint32_t uiCode;
-  if (pNeighAvail->iLeftAvail) {
-    uiNeighAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail) {
-    uiNeighAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail) {
-    uiNeighAvail |= 1;
-  }
-
-  if (CheckIntra16x16PredMode (uiNeighAvail,
-                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
-    return ERR_INFO_INVALID_I16x16_PRED_MODE;
-  }
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-  if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-  pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-
-  if (CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-
-  return 0;
-}
-
-int32_t ParseIntra16x16ModeConstrain1 (PNeighAvail pNeighAvail, PBitStringAux pBs, PDqLayer pCurDqLayer) {
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
-  uint32_t uiCode;
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
-    uiNeighAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
-    uiNeighAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
-    uiNeighAvail |= 1;
-  }
-
-  if (CheckIntra16x16PredMode (uiNeighAvail,
-                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
-    return ERR_INFO_INVALID_I16x16_PRED_MODE;
-  }
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-  if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
-  }
-  pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-
-  if (CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return ERR_INFO_INVALID_I_CHROMA_PRED_MODE;
   }
 
   return 0;
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -2,6 +2,7 @@
 DECODER_CPP_SRCS=\
 	$(DECODER_SRCDIR)/core/src/au_parser.cpp\
 	$(DECODER_SRCDIR)/core/src/bit_stream.cpp\
+	$(DECODER_SRCDIR)/core/src/cabac_decoder.cpp\
 	$(DECODER_SRCDIR)/core/src/deblocking.cpp\
 	$(DECODER_SRCDIR)/core/src/decode_mb_aux.cpp\
 	$(DECODER_SRCDIR)/core/src/decode_slice.cpp\
@@ -16,6 +17,7 @@
 	$(DECODER_SRCDIR)/core/src/mem_align.cpp\
 	$(DECODER_SRCDIR)/core/src/memmgr_nal_unit.cpp\
 	$(DECODER_SRCDIR)/core/src/mv_pred.cpp\
+	$(DECODER_SRCDIR)/core/src/parse_mb_syn_cabac.cpp\
 	$(DECODER_SRCDIR)/core/src/parse_mb_syn_cavlc.cpp\
 	$(DECODER_SRCDIR)/core/src/pic_queue.cpp\
 	$(DECODER_SRCDIR)/core/src/rec_mb.cpp\
binary files /dev/null b/res/QCIF_2P_I_allIPCM.264 differ
binary files /dev/null b/res/test_cif_I_CABAC_PCM.264 differ
binary files /dev/null b/res/test_cif_I_CABAC_slice.264 differ
binary files /dev/null b/res/test_cif_P_CABAC_slice.264 differ
binary files /dev/null b/res/test_qcif_cabac.264 differ
--- a/test/api/decoder_test.cpp
+++ b/test/api/decoder_test.cpp
@@ -119,7 +119,12 @@
   {"res/SVA_CL1_E.264", "4fe09ab6cdc965ea10a20f1d6dd38aca954412bb"},
   {"res/SVA_FM1_E.264", "fad08c4ff7cf2307b6579853d0f4652fc26645d3"},
   {"res/SVA_NL1_B.264", "6d63f72a0c0d833b1db0ba438afff3b4180fb3e6"},
-  {"res/SVA_NL2_E.264", "70453ef8097c94dd190d6d2d1d5cb83c67e66238"}
+  {"res/SVA_NL2_E.264", "70453ef8097c94dd190d6d2d1d5cb83c67e66238"},
+  {"res/test_cif_I_CABAC_PCM.264", "95fdf21470d3bbcf95505abb2164042063a79d98"},
+  {"res/test_cif_I_CABAC_slice.264", "19121bc67f2b13fb8f030504fc0827e1ac6d0fdb"},
+  {"res/test_cif_P_CABAC_slice.264", "521bbd0ba2422369b724c7054545cf107a56f959"},
+  {"res/test_qcif_cabac.264", "587d1d05943f3cd416bf69469975fdee05361e69"},
+  {"res/QCIF_2P_I_allIPCM.264", "8724c0866ebdba7ebb7209a0c0c3ae3ae38a0240"}
 };
 
 INSTANTIATE_TEST_CASE_P (DecodeFile, DecoderOutputTest,
--- a/test/api/encode_decode_api_test.cpp
+++ b/test/api/encode_decode_api_test.cpp
@@ -763,7 +763,7 @@
   decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
   int32_t iSpsPpsIdAddition = 1;
   encoder_->SetOption (ENCODER_OPTION_ENABLE_SPS_PPS_ID_ADDITION, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
+  int32_t iIDRPeriod = pow(2,(param_.iTemporalLayerNum-1)) * ((rand() % 5) + 1);
   encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
   SLTRConfig sLtrConfigVal;
   sLtrConfigVal.bEnableLongTermReference = 1;
@@ -1854,7 +1854,7 @@
   decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
   int32_t iSpsPpsIdAddition = 1;
   encoder_->SetOption (ENCODER_OPTION_ENABLE_SPS_PPS_ID_ADDITION, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 15;
+  int32_t iIDRPeriod = pow(2,(param_.iTemporalLayerNum-1)) * ((rand() % 5) + 1);
   encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
   SLTRConfig sLtrConfigVal;
   sLtrConfigVal.bEnableLongTermReference = 1;