ref: e0ef0cabdd605f5f5809358e1999b0457ba4f3ac
parent: 9d73d273ff81f12fb05c86d21231106ae307b8ce
parent: 06c534d9f2be39138d13beacf64d1c0d56739b8a
author: Licai Guo <[email protected]>
date: Thu Mar 20 11:57:50 EDT 2014
Merge pull request #555 from huili2/remove_unused_func rephrase blockzero function complexity and remove useless functions
--- a/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/codec/build/win32/dec/WelsDecCore.vcproj
@@ -349,46 +349,6 @@
Filter="*.asm;*.inc"
>
<File
- RelativePath="..\..\..\decoder\core\x86\block_add.asm"
- >
- <FileConfiguration
- Name="Release|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
- Outputs="$(IntDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Release|x64"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
- Outputs="$(IntDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|Win32"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)
"
- Outputs="$(IntDir)\$(InputName).obj"
- />
- </FileConfiguration>
- <FileConfiguration
- Name="Debug|x64"
- >
- <Tool
- Name="VCCustomBuildTool"
- CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)
"
- Outputs="$(IntDir)\$(InputName).obj"
- />
- </FileConfiguration>
- </File>
- <File
RelativePath="..\..\..\common\x86\cpuid.asm"
>
<FileConfiguration
--- a/codec/decoder/core/arm/block_add_neon.S
+++ b/codec/decoder/core/arm/block_add_neon.S
@@ -116,46 +116,6 @@
WELS_ASM_FUNC_END
-// r0 int16_t * block,
-// r1 int32_t stride
-WELS_ASM_FUNC_BEGIN WelsResBlockZero16x16_neon// can use for 256*sizeof(int16_t)
- push {r2}
- mov r2, #16
-// each row 16 elements, 16*sizeof(int16_t)
-// memset(ptr_dest, 0, 16*sizeof(int16_t));
-// ptr_dest += stride;
- lsl r1, r1, #1 // r1 = 2*r1
- veor.i16 q0, q0, q0
- veor.i16 q1, q1, q1
-
-block_zero_16x16_luma_loop:
- vst1.i16 {q0, q1}, [r0], r1
- subs r2, r2, #2
- vst1.i16 {q0, q1}, [r0], r1
- bne block_zero_16x16_luma_loop
-
- pop {r2}
-WELS_ASM_FUNC_END
-
-WELS_ASM_FUNC_BEGIN WelsResBlockZero8x8_neon// can use for 64*sizeof(int16_t)
- push {r2}
- mov r2, #8
-// each row 8 elements, 8*sizeof(int16_t)
-// memset(ptr_dest, 0, 8*sizeof(int16_t));
-// ptr_dest += stride;
- lsl r1, r1, #1
- veor.i16 q0, q0, q0
-
-block_zero_8x8_chma_loop:
- vst1.i16 {q0}, [r0], r1
- subs r2, r2, #2
- vst1.i16 {q0}, [r0], r1
- bne block_zero_8x8_chma_loop
-
- pop {r2}
-WELS_ASM_FUNC_END
-
-
// uint8_t *pred, const int32_t stride, int16_t *rs
WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
--- a/codec/decoder/core/inc/decode_slice.h
+++ b/codec/decoder/core/inc/decode_slice.h
@@ -37,8 +37,6 @@
namespace WelsDec {
-void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal);
-
int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
@@ -66,22 +64,13 @@
#endif//__cplusplus
#if defined(HAVE_NEON)
-void WelsResBlockZero16x16_neon(int16_t* pBlock, int32_t iStride);
-void WelsResBlockZero8x8_neon(int16_t* pBlock, int32_t iStride);
void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
#endif
-#ifdef X86_ASM
-void WelsResBlockZero16x16_sse2 (int16_t* pBlock, int32_t iStride);
-void WelsResBlockZero8x8_sse2 (int16_t* pBlock, int32_t iStride);
-#endif
-
#ifdef __cplusplus
}
#endif//__cplusplus
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride);
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride);
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu);
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -133,15 +133,9 @@
PChromaDeblockingEQ4Func pfChromaDeblockingEQ4Hor;
} SDeblockingFunc, *PDeblockingFunc;
-typedef void (*PWelsBlockAddStrideFunc) (uint8_t* pDest, uint8_t* pPred, int16_t* pRes, int32_t iPredStride,
- int32_t iResStride);
-typedef void (*PWelsBlockZeroFunc) (int16_t* pBlock, int32_t iStride);
typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
-typedef void (*PWelsSimpleIdct4x4AddFunc) (int16_t* pDest, int16_t* pSrc, int32_t iStride);
typedef struct TagBlockFunc {
- PWelsBlockZeroFunc pWelsBlockZero16x16Func;
- PWelsBlockZeroFunc pWelsBlockZero8x8Func;
PWelsNonZeroCountFunc pWelsSetNonZeroCountFunc;
} SBlockFunc;
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -842,9 +842,7 @@
uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
}
- pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
- pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
- pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
+ memset(pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof(int16_t));
ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
@@ -1043,42 +1041,14 @@
return 0;
}
-void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) {
- int32_t i;
- int16_t* pDst = pBlock;
-
- for (i = 0; i < iHeight; i++) {
- memset (pDst, uiVal, iWidth * sizeof (int16_t));
- pDst += iStride;
- }
-}
-
void WelsBlockFuncInit (SBlockFunc* pFunc, int32_t iCpu) {
- pFunc->pWelsBlockZero16x16Func = WelsBlockZero16x16_c;
- pFunc->pWelsBlockZero8x8Func = WelsBlockZero8x8_c;
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_c;
-#ifdef X86_ASM
- if (iCpu & WELS_CPU_SSE2) {
- pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_sse2;
- pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_sse2;
- }
-#endif
-
#ifdef HAVE_NEON
if ( iCpu & WELS_CPU_NEON ) {
- pFunc->pWelsBlockZero16x16Func = WelsResBlockZero16x16_neon;
- pFunc->pWelsBlockZero8x8Func = WelsResBlockZero8x8_neon;
pFunc->pWelsSetNonZeroCountFunc = SetNonZeroCount_neon;
}
#endif
-}
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
- WelsBlockInit (pBlock, 16, 16, iStride, 0);
-}
-
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
- WelsBlockInit (pBlock, 8, 8, iStride, 0);
}
void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
--- a/codec/decoder/core/x86/block_add.asm
+++ /dev/null
@@ -1,151 +1,0 @@
-;*!
-;* \copy
-;* Copyright (c) 2009-2013, Cisco Systems
-;* All rights reserved.
-;*
-;* Redistribution and use in source and binary forms, with or without
-;* modification, are permitted provided that the following conditions
-;* are met:
-;*
-;* * Redistributions of source code must retain the above copyright
-;* notice, this list of conditions and the following disclaimer.
-;*
-;* * Redistributions in binary form must reproduce the above copyright
-;* notice, this list of conditions and the following disclaimer in
-;* the documentation and/or other materials provided with the
-;* distribution.
-;*
-;* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-;* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-;* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-;* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-;* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-;* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-;* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-;* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-;* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-;* POSSIBILITY OF SUCH DAMAGE.
-;*
-;*
-;* block_add.asm
-;*
-;* Abstract
-;* add block
-;*
-;* History
-;* 09/21/2009 Created
-;*
-;*
-;*************************************************************************/
-
-%include "asm_inc.asm"
-
-;*******************************************************************************
-; Code
-;*******************************************************************************
-
-SECTION .text
-
-
-;*******************************************************************************
-; void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
-;*******************************************************************************
-WELS_EXTERN WelsResBlockZero16x16_sse2
- %assign push_num 0
- LOAD_2_PARA
- PUSH_XMM 8
- SIGN_EXTENSION r1, r1d
- lea r1, [r1*2]
- lea r2, [r1*3]
-
- pxor xmm7, xmm7
-
- ; four lines
- movdqa [r0], xmm7
- movdqa [r0+10h], xmm7
-
- movdqa [r0+r1], xmm7
- movdqa [r0+r1+10h], xmm7
-
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r1*2+10h], xmm7
-
- movdqa [r0+r2], xmm7
- movdqa [r0+r2+10h], xmm7
-
- ; four lines
- lea r0, [r0+r1*4]
- movdqa [r0], xmm7
- movdqa [r0+10h], xmm7
-
- movdqa [r0+r1], xmm7
- movdqa [r0+r1+10h], xmm7
-
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r1*2+10h], xmm7
-
- movdqa [r0+r2], xmm7
- movdqa [r0+r2+10h], xmm7
-
- ; four lines
- lea r0, [r0+r1*4]
- movdqa [r0], xmm7
- movdqa [r0+10h], xmm7
-
- movdqa [r0+r1], xmm7
- movdqa [r0+r1+10h], xmm7
-
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r1*2+10h], xmm7
-
- movdqa [r0+r2], xmm7
- movdqa [r0+r2+10h], xmm7
-
- ; four lines
- lea r0, [r0+r1*4]
- movdqa [r0], xmm7
- movdqa [r0+10h], xmm7
-
- movdqa [r0+r1], xmm7
- movdqa [r0+r1+10h], xmm7
-
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r1*2+10h], xmm7
-
- movdqa [r0+r2], xmm7
- movdqa [r0+r2+10h], xmm7
-
- POP_XMM
- ret
-
-
-;*******************************************************************************
-; void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
-;*******************************************************************************
-WELS_EXTERN WelsResBlockZero8x8_sse2
- %assign push_num 0
- LOAD_2_PARA
- PUSH_XMM 8
- SIGN_EXTENSION r1, r1d
- lea r1, [r1*2]
- lea r2, [r1*3]
-
- pxor xmm7, xmm7
-
- movdqa [r0], xmm7
- movdqa [r0+r1], xmm7
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r2], xmm7
-
- lea r0, [r0+r1*4]
- movdqa [r0], xmm7
- movdqa [r0+r1], xmm7
- movdqa [r0+r1*2], xmm7
- movdqa [r0+r2], xmm7
-
-
- POP_XMM
- ret
-
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -27,7 +27,6 @@
ifeq ($(ASM_ARCH), x86)
DECODER_ASM_SRCS=\
- $(DECODER_SRCDIR)/core/x86/block_add.asm\
$(DECODER_SRCDIR)/core/x86/dct.asm\
$(DECODER_SRCDIR)/core/x86/intra_pred.asm\