ref: 9a0663620a6dd1cb655b44015a3a1c4da3ccd2fc
parent: 1a7d0ab831fa3560c3acb7fe750bea39bb77b2f1
author: Martin Storsjö <[email protected]>
date: Wed Jan 28 06:18:31 EST 2015
Move the MC routines to the common library Use the decoder versions of the functions (which are capable of handling widths 4/8/16 for luma, not only 16 as in the encoder). By using the more generic versions, there may be a small performance loss since the functions need to check the width in every call. Actual measurements show that the actual change is very small (and the shared routines turn out to actually be faster than the existing ones in ARM NEON setups).
--- a/codec/build/iOS/common/common.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/common/common.xcodeproj/project.pbxproj
@@ -19,6 +19,7 @@
4CE443D918B722CD0017DF25 /* Foundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4CE443D818B722CD0017DF25 /* Foundation.framework */; };
53C1C9BC193F0FB000404D8F /* expand_pic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 53C1C9BB193F0FB000404D8F /* expand_pic.cpp */; };
5BA8F2C019603F5F00011CE4 /* common_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */; };
+ 5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 5BDD15EC1A79027600B6CA2E /* mc.cpp */; };
F0B204F918FD23BF005DA23F /* copy_mb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F0B204F818FD23BF005DA23F /* copy_mb.cpp */; };
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8221906673900E156A8 /* arm_arch64_common_macro.S */; };
F556A8251906673900E156A8 /* expand_picture_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = F556A8231906673900E156A8 /* expand_picture_aarch64_neon.S */; };
@@ -54,7 +55,6 @@
4C3406BA18D96EA600DFA14A /* deblocking_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deblocking_common.h; sourceTree = "<group>"; };
4C3406BD18D96EA600DFA14A /* ls_defines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ls_defines.h; sourceTree = "<group>"; };
4C3406BE18D96EA600DFA14A /* macros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = macros.h; sourceTree = "<group>"; };
- 4C3406BF18D96EA600DFA14A /* mc_common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc_common.h; sourceTree = "<group>"; };
4C3406C018D96EA600DFA14A /* measure_time.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = measure_time.h; sourceTree = "<group>"; };
4C3406C118D96EA600DFA14A /* typedefs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = typedefs.h; sourceTree = "<group>"; };
4C3406C218D96EA600DFA14A /* WelsThreadLib.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = WelsThreadLib.h; sourceTree = "<group>"; };
@@ -70,6 +70,8 @@
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = expand_pic.cpp; sourceTree = "<group>"; };
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = wels_common_defs.h; sourceTree = "<group>"; };
5BA8F2BF19603F5F00011CE4 /* common_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = common_tables.cpp; sourceTree = "<group>"; };
+ 5BDD15EB1A79026A00B6CA2E /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
+ 5BDD15EC1A79027600B6CA2E /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; };
F0B204F718FD23B6005DA23F /* copy_mb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = copy_mb.h; sourceTree = "<group>"; };
F0B204F818FD23BF005DA23F /* copy_mb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = copy_mb.cpp; sourceTree = "<group>"; };
F556A8221906673900E156A8 /* arm_arch64_common_macro.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = arm_arch64_common_macro.S; path = arm64/arm_arch64_common_macro.S; sourceTree = "<group>"; };
@@ -123,7 +125,7 @@
4C3406BA18D96EA600DFA14A /* deblocking_common.h */,
4C3406BD18D96EA600DFA14A /* ls_defines.h */,
4C3406BE18D96EA600DFA14A /* macros.h */,
- 4C3406BF18D96EA600DFA14A /* mc_common.h */,
+ 5BDD15EB1A79026A00B6CA2E /* mc.h */,
4C3406C018D96EA600DFA14A /* measure_time.h */,
4C3406C118D96EA600DFA14A /* typedefs.h */,
5BA8F2BE19603F3500011CE4 /* wels_common_defs.h */,
@@ -143,6 +145,7 @@
4C3406C518D96EA600DFA14A /* crt_util_safe_x.cpp */,
53C1C9BB193F0FB000404D8F /* expand_pic.cpp */,
4C3406C618D96EA600DFA14A /* deblocking_common.cpp */,
+ 5BDD15EC1A79027600B6CA2E /* mc.cpp */,
4C3406C818D96EA600DFA14A /* WelsThreadLib.cpp */,
);
path = src;
@@ -253,6 +256,7 @@
F5B8D82D190757290037849A /* mc_aarch64_neon.S in Sources */,
4C3406C918D96EA600DFA14A /* arm_arch_common_macro.S in Sources */,
F556A8241906673900E156A8 /* arm_arch64_common_macro.S in Sources */,
+ 5BDD15ED1A79027600B6CA2E /* mc.cpp in Sources */,
F5AC94FF193EB7D800F58154 /* deblocking_aarch64_neon.S in Sources */,
4C3406CE18D96EA600DFA14A /* crt_util_safe_x.cpp in Sources */,
F791965919D3BE2200F60C6B /* intra_pred_common.cpp in Sources */,
--- a/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/dec/welsdec/welsdec.xcodeproj/project.pbxproj
@@ -20,7 +20,6 @@
4CE4469318BC5EAB0017DF25 /* fmo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467018BC5EAA0017DF25 /* fmo.cpp */; };
4CE4469418BC5EAB0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */; };
4CE4469518BC5EAB0017DF25 /* manage_dec_ref.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */; };
- 4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467318BC5EAA0017DF25 /* mc.cpp */; };
4CE4469718BC5EAB0017DF25 /* mem_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467418BC5EAA0017DF25 /* mem_align.cpp */; };
4CE4469818BC5EAB0017DF25 /* memmgr_nal_unit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */; };
4CE4469918BC5EAB0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */; };
@@ -73,7 +72,6 @@
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; tabWidth = 4; usesTabs = 0; };
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = manage_dec_ref.h; sourceTree = "<group>"; };
4CE4465518BC5EAA0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
- 4CE4465618BC5EAA0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
4CE4465718BC5EAA0017DF25 /* mem_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mem_align.h; sourceTree = "<group>"; };
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memmgr_nal_unit.h; sourceTree = "<group>"; };
4CE4465918BC5EAA0017DF25 /* mv_pred.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mv_pred.h; sourceTree = "<group>"; };
@@ -99,7 +97,6 @@
4CE4467018BC5EAA0017DF25 /* fmo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fmo.cpp; sourceTree = "<group>"; };
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = manage_dec_ref.cpp; sourceTree = "<group>"; };
- 4CE4467318BC5EAA0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; indentWidth = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; tabWidth = 1; usesTabs = 0; wrapsLines = 1; };
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mem_align.cpp; sourceTree = "<group>"; };
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memmgr_nal_unit.cpp; sourceTree = "<group>"; };
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
@@ -218,7 +215,6 @@
4CE4465318BC5EAA0017DF25 /* get_intra_predictor.h */,
4CE4465418BC5EAA0017DF25 /* manage_dec_ref.h */,
4CE4465518BC5EAA0017DF25 /* mb_cache.h */,
- 4CE4465618BC5EAA0017DF25 /* mc.h */,
4CE4465718BC5EAA0017DF25 /* mem_align.h */,
4CE4465818BC5EAA0017DF25 /* memmgr_nal_unit.h */,
4CE4465918BC5EAA0017DF25 /* mv_pred.h */,
@@ -256,7 +252,6 @@
4CE4467018BC5EAA0017DF25 /* fmo.cpp */,
4CE4467118BC5EAA0017DF25 /* get_intra_predictor.cpp */,
4CE4467218BC5EAA0017DF25 /* manage_dec_ref.cpp */,
- 4CE4467318BC5EAA0017DF25 /* mc.cpp */,
4CE4467418BC5EAA0017DF25 /* mem_align.cpp */,
4CE4467518BC5EAA0017DF25 /* memmgr_nal_unit.cpp */,
4CE4467618BC5EAA0017DF25 /* mv_pred.cpp */,
@@ -375,7 +370,6 @@
4CBC1B81194AC4E100214D9E /* intra_pred_aarch64_neon.S in Sources */,
4CE4469018BC5EAB0017DF25 /* decoder_core.cpp in Sources */,
4CE447AE18BC6BE90017DF25 /* intra_pred_neon.S in Sources */,
- 4CE4469618BC5EAB0017DF25 /* mc.cpp in Sources */,
4CE4469C18BC5EAB0017DF25 /* rec_mb.cpp in Sources */,
4CE4468B18BC5EAB0017DF25 /* bit_stream.cpp in Sources */,
4CE4468D18BC5EAB0017DF25 /* decode_mb_aux.cpp in Sources */,
--- a/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/enc/welsenc/welsenc.xcodeproj/project.pbxproj
@@ -24,7 +24,6 @@
4CE4471318BC605C0017DF25 /* encoder_data_tables.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */; };
4CE4471418BC605C0017DF25 /* encoder_ext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E318BC605C0017DF25 /* encoder_ext.cpp */; };
4CE4471618BC605C0017DF25 /* get_intra_predictor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */; };
- 4CE4471718BC605C0017DF25 /* mc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E618BC605C0017DF25 /* mc.cpp */; };
4CE4471818BC605C0017DF25 /* md.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E718BC605C0017DF25 /* md.cpp */; };
4CE4471918BC605C0017DF25 /* memory_align.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E818BC605C0017DF25 /* memory_align.cpp */; };
4CE4471A18BC605C0017DF25 /* mv_pred.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CE446E918BC605C0017DF25 /* mv_pred.cpp */; };
@@ -93,7 +92,6 @@
4CE446B518BC605C0017DF25 /* extern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = extern.h; sourceTree = "<group>"; };
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = get_intra_predictor.h; sourceTree = "<group>"; };
4CE446B718BC605C0017DF25 /* mb_cache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mb_cache.h; sourceTree = "<group>"; };
- 4CE446B818BC605C0017DF25 /* mc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mc.h; sourceTree = "<group>"; };
4CE446B918BC605C0017DF25 /* md.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = md.h; sourceTree = "<group>"; };
4CE446BA18BC605C0017DF25 /* memory_align.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = memory_align.h; sourceTree = "<group>"; };
4CE446BB18BC605C0017DF25 /* mt_defs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mt_defs.h; sourceTree = "<group>"; };
@@ -135,7 +133,6 @@
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_data_tables.cpp; sourceTree = "<group>"; };
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = encoder_ext.cpp; sourceTree = "<group>"; };
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = get_intra_predictor.cpp; sourceTree = "<group>"; };
- 4CE446E618BC605C0017DF25 /* mc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mc.cpp; sourceTree = "<group>"; };
4CE446E718BC605C0017DF25 /* md.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = md.cpp; sourceTree = "<group>"; };
4CE446E818BC605C0017DF25 /* memory_align.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = memory_align.cpp; sourceTree = "<group>"; };
4CE446E918BC605C0017DF25 /* mv_pred.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mv_pred.cpp; sourceTree = "<group>"; };
@@ -276,7 +273,6 @@
4CE446B518BC605C0017DF25 /* extern.h */,
4CE446B618BC605C0017DF25 /* get_intra_predictor.h */,
4CE446B718BC605C0017DF25 /* mb_cache.h */,
- 4CE446B818BC605C0017DF25 /* mc.h */,
4CE446B918BC605C0017DF25 /* md.h */,
4CE446BA18BC605C0017DF25 /* memory_align.h */,
4CE446BB18BC605C0017DF25 /* mt_defs.h */,
@@ -328,7 +324,6 @@
4CE446E218BC605C0017DF25 /* encoder_data_tables.cpp */,
4CE446E318BC605C0017DF25 /* encoder_ext.cpp */,
4CE446E518BC605C0017DF25 /* get_intra_predictor.cpp */,
- 4CE446E618BC605C0017DF25 /* mc.cpp */,
4CE446E718BC605C0017DF25 /* md.cpp */,
4CE446E818BC605C0017DF25 /* memory_align.cpp */,
4CE446E918BC605C0017DF25 /* mv_pred.cpp */,
@@ -455,7 +450,6 @@
4CE4470E18BC605C0017DF25 /* au_set.cpp in Sources */,
F5BE8005196B913200ED02ED /* memory_aarch64_neon.S in Sources */,
4CBC1B83194ACBB400214D9E /* intra_pred_aarch64_neon.S in Sources */,
- 4CE4471718BC605C0017DF25 /* mc.cpp in Sources */,
F7E9994519EBD1E9009B1021 /* svc_set_mb_syn_cabac.cpp in Sources */,
F5617A50196A833A006E2B20 /* reconstruct_aarch64_neon.S in Sources */,
4CE4472918BC605C0017DF25 /* svc_set_mb_syn_cavlc.cpp in Sources */,
--- a/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/codec/build/win32/dec/WelsDecCore.vcproj
@@ -744,14 +744,10 @@
>
</File>
<File
- RelativePath="..\..\..\decoder\core\inc\mc.h"
+ RelativePath="..\..\..\common\inc\mc.h"
>
</File>
<File
- RelativePath="..\..\..\common\inc\mc_common.h"
- >
- </File>
- <File
RelativePath="..\..\..\common\inc\measure_time.h"
>
</File>
@@ -909,7 +905,7 @@
>
</File>
<File
- RelativePath="..\..\..\decoder\core\src\mc.cpp"
+ RelativePath="..\..\..\common\src\mc.cpp"
>
</File>
<File
--- a/codec/build/win32/enc/WelsEncCore.vcproj
+++ b/codec/build/win32/enc/WelsEncCore.vcproj
@@ -386,7 +386,7 @@
>
</File>
<File
- RelativePath="..\..\..\encoder\core\src\mc.cpp"
+ RelativePath="..\..\..\common\src\mc.cpp"
>
</File>
<File
@@ -563,11 +563,7 @@
>
</File>
<File
- RelativePath="..\..\..\encoder\core\inc\mc.h"
- >
- </File>
- <File
- RelativePath="..\..\..\common\inc\mc_common.h"
+ RelativePath="..\..\..\common\inc\mc.h"
>
</File>
<File
--- /dev/null
+++ b/codec/common/inc/mc.h
@@ -1,0 +1,300 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef MC_H
+#define MC_H
+
+#include "typedefs.h"
+
+typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
+
+typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight);
+typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t,
+ int32_t, int32_t);
+
+typedef struct TagMcFunc {
+ PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
+ PWelsLumaHalfpelMcFunc pfLumaHalfpelVer;
+ PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
+ PWelsMcFunc pMcChromaFunc;
+
+ PWelsMcFunc pMcLumaFunc;
+ PWelsSampleAveragingFunc pfSampleAveraging;
+} SMcFunc;
+
+namespace WelsCommon {
+
+void InitMcFunc (SMcFunc* pMcFunc, uint32_t iCpu);
+
+} // namespace WelsCommon
+
+
+#if defined(__cplusplus)
+extern "C" {
+#endif//__cplusplus
+
+#if defined(HAVE_NEON)
+void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
+
+void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
+
+void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
+
+void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t* pWeights, int32_t iHeight);
+
+void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t* pWeights, int32_t iHeight);
+
+void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
+void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
+void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
+
+void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+
+void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+
+//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
+void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+
+//vertical filter to gain half sample, that is (0, 2) location in quarter sample
+void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+
+//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
+void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+
+void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
+ const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
+void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
+ const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
+
+void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// width+1
+void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// width+1
+
+void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// height+1
+void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// height+1
+
+void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);//width+1&&height+1
+void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);//width+1&&height+1
+#endif
+
+#if defined(HAVE_NEON_AARCH64)
+void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t* pWeights, int32_t iHeight);
+void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t* pWeights, int32_t iHeight);
+void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
+void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+//vertical filter to gain half sample, that is (0, 2) location in quarter sample
+void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
+void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
+ const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
+void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
+ const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
+void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// width+1
+void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// width+1
+void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// height+1
+void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);// height+1
+void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);//width+1&&height+1
+void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);//width+1&&height+1
+#endif
+
+#if defined(X86_ASM)
+//***************************************************************************//
+// MMXEXT definition //
+//***************************************************************************//
+void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ const uint8_t* kpABCD, int32_t iHeight);
+void McCopyWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+
+//***************************************************************************//
+// SSE2 definition //
+//***************************************************************************//
+void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ const uint8_t* kpABCD, int32_t iHeight);
+void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight);
+void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight);
+void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight);
+
+void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
+
+void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight);
+
+void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight);
+
+void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
+ int32_t iWidth,
+ int32_t iHeight);
+
+//***************************************************************************//
+// SSSE3 definition //
+//***************************************************************************//
+
+void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ const uint8_t* kpABCD, int32_t iHeight);
+
+#endif //X86_ASM
+
+#if defined(__cplusplus)
+}
+#endif//__cplusplus
+
+#endif//MC_H
--- a/codec/common/inc/mc_common.h
+++ /dev/null
@@ -1,275 +1,0 @@
-/*!
- * \copy
- * Copyright (c) 2013, Cisco Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef MC_COMMON_H
-#define MC_COMMON_H
-
-#include "typedefs.h"
-
-#if defined(__cplusplus)
-extern "C" {
-#endif//__cplusplus
-
-#if defined(HAVE_NEON)
-void McCopyWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
-
-void McCopyWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
-
-void McCopyWidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight);
-
-void McChromaWidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t* pWeights, int32_t iHeight);
-
-void McChromaWidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t* pWeights, int32_t iHeight);
-
-void PixelAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
-void PixelAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
-void PixelAvgWidthEq4_neon (uint8_t* pDst, int32_t iDstStride, uint8_t* pSrcA, uint8_t* pSrcB, int32_t iHeight);
-
-void McHorVer01WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer01WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer01WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-
-void McHorVer10WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer10WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer10WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-
-//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
-void McHorVer20WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-
-//vertical filter to gain half sample, that is (0, 2) location in quarter sample
-void McHorVer02WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer02WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer02WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-
-//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
-void McHorVer22WidthEq16_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22WidthEq8_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22WidthEq4_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-
-void PixStrideAvgWidthEq16_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
- const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
-void PixStrideAvgWidthEq8_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
- const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
-
-void McHorVer20Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// width+1
-void McHorVer20Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// width+1
-
-void McHorVer02Height17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// height+1
-void McHorVer02Height9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// height+1
-
-void McHorVer22Width17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);//width+1&&height+1
-void McHorVer22Width9_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);//width+1&&height+1
-#endif
-
-#if defined(HAVE_NEON_AARCH64)
-void McCopyWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McCopyWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McCopyWidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McChromaWidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t* pWeights, int32_t iHeight);
-void McChromaWidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t* pWeights, int32_t iHeight);
-void PixelAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-void PixelAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-void PixelAvgWidthEq4_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-void McHorVer01WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer01WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer01WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer03WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer10WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer10WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer10WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer30WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
-void McHorVer20WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-//vertical filter to gain half sample, that is (0, 2) location in quarter sample
-void McHorVer02WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer02WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer02WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
-void McHorVer22WidthEq16_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22WidthEq8_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22WidthEq4_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void PixStrideAvgWidthEq16_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
- const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
-void PixStrideAvgWidthEq8_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcStrideA,
- const uint8_t* pSrcB, int32_t iSrcStrideB, int32_t iHeight);
-void McHorVer20Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// width+1
-void McHorVer20Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// width+1
-void McHorVer02Height17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// height+1
-void McHorVer02Height9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);// height+1
-void McHorVer22Width17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);//width+1&&height+1
-void McHorVer22Width9_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);//width+1&&height+1
-#endif
-
-#if defined(X86_ASM)
-//***************************************************************************//
-// MMXEXT definition //
-//***************************************************************************//
-void McHorVer20WidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McChromaWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- const uint8_t* kpABCD, int32_t iHeight);
-void McCopyWidthEq4_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McCopyWidthEq8_mmx (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void PixelAvgWidthEq4_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-void PixelAvgWidthEq8_mmx (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-
-//***************************************************************************//
-// SSE2 definition //
-//***************************************************************************//
-void McChromaWidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- const uint8_t* kpABCD, int32_t iHeight);
-void McCopyWidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer20WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer02WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22Width8HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-void McHorVer22Width8VerLastAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight);
-void McHorVer22Width8VerLastUnAlign_sse2 (const uint8_t* pTap, int32_t iTapStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight);
-
-void PixelAvgWidthEq16_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight);
-
-void McHorVer20Width9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight);
-
-void McHorVer02Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight);
-
-void McHorVer22HorFirst_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pTap, int32_t iTapStride,
- int32_t iWidth,
- int32_t iHeight);
-
-//***************************************************************************//
-// SSSE3 definition //
-//***************************************************************************//
-
-void McChromaWidthEq8_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- const uint8_t* kpABCD, int32_t iHeight);
-
-#endif //X86_ASM
-
-#if defined(__cplusplus)
-}
-#endif//__cplusplus
-
-#endif//MC_COMMON_H
--- /dev/null
+++ b/codec/common/src/mc.cpp
@@ -1,0 +1,1328 @@
+/*!
+ * \copy
+ * Copyright (c) 2009-2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file mc.c
+ *
+ * \brief Interfaces implementation for motion compensation
+ *
+ * \date 03/17/2009 Created
+ *
+ *************************************************************************************
+ */
+
+#include "mc.h"
+
+#include "cpu_core.h"
+#include "ls_defines.h"
+#include "macros.h"
+
+typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ const uint8_t* kpABCD, int32_t iHeight);
+typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
+ int32_t, int32_t);
+typedef void (*PWelsMcWidthHeightFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight);
+
+namespace WelsCommon {
+
+/*------------------weight for chroma fraction pixel interpolation------------------*/
+//iA = (8 - dx) * (8 - dy);
+//iB = dx * (8 - dy);
+//iC = (8 - dx) * dy;
+//iD = dx * dy
+static const uint8_t g_kuiABCD[8][8][4] = { //g_kA[dy][dx], g_kB[dy][dx], g_kC[dy][dx], g_kD[dy][dx]
+ {
+ {64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0},
+ {32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0}
+ },
+ {
+ {56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3},
+ {28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7}
+ },
+ {
+ {48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6},
+ {24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14}
+ },
+ {
+ {40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9},
+ {20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21}
+ },
+ {
+ {32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12},
+ {16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28}
+ },
+ {
+ {24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15},
+ {12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35}
+ },
+ {
+ {16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18},
+ {8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42}
+ },
+ {
+ {8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21},
+ {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
+ }
+};
+
+//***************************************************************************//
+// C code implementation //
+//***************************************************************************//
+static inline void McCopyWidthEq2_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ int32_t i;
+ for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma
+ ST16A2 (pDst, LD16 (pSrc));
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ int32_t i;
+ for (i = 0; i < iHeight; i++) {
+ ST32A4 (pDst, LD32 (pSrc));
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ int32_t i;
+ for (i = 0; i < iHeight; i++) {
+ ST64A8 (pDst, LD64 (pSrc));
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ int32_t i;
+ for (i = 0; i < iHeight; i++) {
+ ST64A8 (pDst , LD64 (pSrc));
+ ST64A8 (pDst + 8, LD64 (pSrc + 8));
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+//--------------------Luma sample MC------------------//
+
+static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) {
+ int32_t iPix05 = pSrc[0] + pSrc[5];
+ int32_t iPix14 = pSrc[1] + pSrc[4];
+ int32_t iPix23 = pSrc[2] + pSrc[3];
+
+ return (iPix05 - (iPix14 * 5) + (iPix23 * 20));
+}
+// h: iOffset=1 / v: iOffset=iSrcStride
+static inline int32_t FilterInput8bitWithStride_c (const uint8_t* pSrc, const int32_t kiOffset) {
+ const int32_t kiOffset1 = kiOffset;
+ const int32_t kiOffset2 = (kiOffset << 1);
+ const int32_t kiOffset3 = kiOffset + kiOffset2;
+ const uint32_t kuiPix05 = * (pSrc - kiOffset2) + * (pSrc + kiOffset3);
+ const uint32_t kuiPix14 = * (pSrc - kiOffset1) + * (pSrc + kiOffset2);
+ const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiOffset1);
+
+ return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2));
+}
+
+static inline void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ int32_t i, j;
+ for (i = 0; i < iHeight; i++) {
+ for (j = 0; j < iWidth; j++) {
+ pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
+ }
+ pDst += iDstStride;
+ pSrcA += iSrcAStride;
+ pSrcB += iSrcBStride;
+ }
+}
+static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
+ int32_t iHeight) {
+ if (iWidth == 16)
+ McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //here iWidth == 2
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+
+//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
+static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ int32_t i, j;
+ for (i = 0; i < iHeight; i++) {
+ for (j = 0; j < iWidth; j++) {
+ pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, 1) + 16) >> 5);
+ }
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+//vertical filter to gain half sample, that is (0, 2) location in quarter sample
+static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ int32_t i, j;
+ for (i = 0; i < iHeight; i++) {
+ for (j = 0; j < iWidth; j++) {
+ pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, iSrcStride) + 16) >> 5);
+ }
+ pDst += iDstStride;
+ pSrc += iSrcStride;
+ }
+}
+
+//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
+static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ int16_t iTmp[16 + 5]; //16
+ int32_t i, j, k;
+
+ for (i = 0; i < iHeight; i++) {
+ for (j = 0; j < iWidth + 5; j++) {
+ iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride);
+ }
+ for (k = 0; k < iWidth; k++) {
+ pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10);
+ }
+ pSrc += iSrcStride;
+ pDst += iDstStride;
+ }
+}
+
+/////////////////////luma MC//////////////////////////
+static inline void McHorVer01_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiTmp[256];
+ McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer03_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiTmp[256];
+ McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, uiTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer10_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiTmp[256];
+ McHorVer20_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer11_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiVerTmp[256];
+ McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer12_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiVerTmp[256];
+ uint8_t uiCtrTmp[256];
+ McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer13_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiVerTmp[256];
+ McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer21_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiCtrTmp[256];
+ McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer23_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiCtrTmp[256];
+ McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer30_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, pSrc + 1, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer31_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiVerTmp[256];
+ McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer32_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiVerTmp[256];
+ uint8_t uiCtrTmp[256];
+ McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
+}
+static inline void McHorVer33_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ uint8_t uiHorTmp[256];
+ uint8_t uiVerTmp[256];
+ McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
+ McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
+ PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
+}
+
+void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
+//pSrc has been added the offset of mv
+{
+ static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
+ {McCopy_c, McHorVer01_c, McHorVer02_c, McHorVer03_c},
+ {McHorVer10_c, McHorVer11_c, McHorVer12_c, McHorVer13_c},
+ {McHorVer20_c, McHorVer21_c, McHorVer22_c, McHorVer23_c},
+ {McHorVer30_c, McHorVer31_c, McHorVer32_c, McHorVer33_c},
+ };
+
+ pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+}
+
+static inline void McChromaWithFragMv_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ int32_t i, j;
+ int32_t iA, iB, iC, iD;
+ const uint8_t* pSrcNext = pSrc + iSrcStride;
+ const uint8_t* pABCD = g_kuiABCD[iMvY & 0x07][iMvX & 0x07];
+ iA = pABCD[0];
+ iB = pABCD[1];
+ iC = pABCD[2];
+ iD = pABCD[3];
+ for (i = 0; i < iHeight; i++) {
+ for (j = 0; j < iWidth; j++) {
+ pDst[j] = (iA * pSrc[j] + iB * pSrc[j + 1] + iC * pSrcNext[j] + iD * pSrcNext[j + 1] + 32) >> 6;
+ }
+ pDst += iDstStride;
+ pSrc = pSrcNext;
+ pSrcNext += iSrcStride;
+ }
+}
+
+void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
+//pSrc has been added the offset of mv
+{
+ const int32_t kiD8x = iMvX & 0x07;
+ const int32_t kiD8y = iMvY & 0x07;
+ if (0 == kiD8x && 0 == kiD8y)
+ McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ else
+ McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
+}
+
+#if defined(X86_ASM)
+//***************************************************************************//
+// SSE2 implement //
+//***************************************************************************//
+static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16)
+ McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5);
+ McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight);
+}
+
+static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
+}
+
+static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iHeight) {
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
+}
+void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
+ int32_t tmp1 = 2 * (iWidth - 8);
+ McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
+ McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
+ McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
+}
+
+static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth,
+ int32_t iHeight) {
+ if (iWidth == 16)
+ McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+
+static inline void McHorVer20_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+
+static inline void McHorVer02_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McHorVer02_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight);
+}
+
+static inline void McHorVer22_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McHorVer22_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight);
+}
+
+static inline void McHorVer01_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ } else {
+ McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer03_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ } else {
+ McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer10_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pTmp, 16, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer11_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer12_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else {
+ McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer13_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4 , iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer21_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer23_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer30_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer31_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer32_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else {
+ McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
+ McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+static inline void McHorVer33_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else {
+ McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
+ PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+
+void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
+//pSrc has been added the offset of mv
+{
+ static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
+ {McCopy_sse2, McHorVer01_sse2, McHorVer02_sse2, McHorVer03_sse2},
+ {McHorVer10_sse2, McHorVer11_sse2, McHorVer12_sse2, McHorVer13_sse2},
+ {McHorVer20_sse2, McHorVer21_sse2, McHorVer22_sse2, McHorVer23_sse2},
+ {McHorVer30_sse2, McHorVer31_sse2, McHorVer32_sse2, McHorVer33_sse2},
+ };
+
+ pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+}
+
+void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = {
+ McChromaWidthEq4_mmx,
+ McChromaWidthEq8_sse2
+ };
+ const int32_t kiD8x = iMvX & 0x07;
+ const int32_t kiD8y = iMvY & 0x07;
+ if (kiD8x == 0 && kiD8y == 0) {
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ return;
+ }
+ if (iWidth != 2) {
+ kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
+ } else
+ McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
+}
+
+void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = {
+ McChromaWidthEq4_mmx,
+ McChromaWidthEq8_ssse3
+ };
+ const int32_t kiD8x = iMvX & 0x07;
+ const int32_t kiD8y = iMvY & 0x07;
+ if (kiD8x == 0 && kiD8y == 0) {
+ McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+ return;
+ }
+ if (iWidth != 2) {
+ kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
+ } else
+ McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
+}
+
+void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixelAvgWidthEq8_mmx,
+ PixelAvgWidthEq16_sse2
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
+
+#endif //X86_ASM
+//***************************************************************************//
+// NEON implementation //
+//***************************************************************************//
+#if defined(HAVE_NEON)
+void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 17)
+ McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 9)
+ McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 8)
+ McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 17)
+ McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 9)
+ McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (16 == iWidth)
+ McCopyWidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (8 == iWidth)
+ McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (4 == iWidth)
+ McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer20_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer20WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer20WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer20WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer02_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer02WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer02WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer02WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer22_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer22WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer22WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer22WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+
+void McHorVer01_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer01WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer01WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer01WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer03_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer03WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer03WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer03WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer10_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer10WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer10WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer10WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ }
+}
+void McHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ }
+}
+void McHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ }
+}
+void McHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ }
+}
+void McHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
+ }
+}
+void McHorVer30_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer30WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer30WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer30WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ }
+}
+void McHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
+ }
+}
+void McHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
+ }
+}
+
+void McLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
+ {McCopy_neon, McHorVer01_neon, McHorVer02_neon, McHorVer03_neon},
+ {McHorVer10_neon, McHorVer11_neon, McHorVer12_neon, McHorVer13_neon},
+ {McHorVer20_neon, McHorVer21_neon, McHorVer22_neon, McHorVer23_neon},
+ {McHorVer30_neon, McHorVer31_neon, McHorVer32_neon, McHorVer33_neon},
+ };
+ // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2);
+ pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+}
+void McChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ if (0 == iMvX && 0 == iMvY) {
+ if (8 == iWidth)
+ McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //here iWidth == 2
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ } else {
+ const int32_t kiD8x = iMvX & 0x07;
+ const int32_t kiD8y = iMvY & 0x07;
+ if (8 == iWidth)
+ McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
+ else if (4 == iWidth)
+ McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
+ else //here iWidth == 2
+ McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
+ }
+}
+void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixStrideAvgWidthEq8_neon,
+ PixStrideAvgWidthEq16_neon
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
+#endif
+#if defined(HAVE_NEON_AARCH64)
+void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 17)
+ McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 9)
+ McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 8)
+ McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
+ int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 17)
+ McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //if (iWidth == 9)
+ McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (16 == iWidth)
+ McCopyWidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (8 == iWidth)
+ McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (4 == iWidth)
+ McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer20_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer02_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer22_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+
+void McHorVer01_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer01WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer01WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer01WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer03_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer03WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer03WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer03WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer10_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer10WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer10WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer10WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+void McHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+void McHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+void McHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+void McHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+void McHorVer30_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ if (iWidth == 16)
+ McHorVer30WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 8)
+ McHorVer30WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McHorVer30WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+}
+void McHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+void McHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
+ }
+}
+void McHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int32_t iWidth, int32_t iHeight) {
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+ if (iWidth == 16) {
+ McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 8) {
+ McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ } else if (iWidth == 4) {
+ McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
+ McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
+ PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
+ }
+}
+
+void McLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
+ {McCopy_AArch64_neon, McHorVer01_AArch64_neon, McHorVer02_AArch64_neon, McHorVer03_AArch64_neon},
+ {McHorVer10_AArch64_neon, McHorVer11_AArch64_neon, McHorVer12_AArch64_neon, McHorVer13_AArch64_neon},
+ {McHorVer20_AArch64_neon, McHorVer21_AArch64_neon, McHorVer22_AArch64_neon, McHorVer23_AArch64_neon},
+ {McHorVer30_AArch64_neon, McHorVer31_AArch64_neon, McHorVer32_AArch64_neon, McHorVer33_AArch64_neon},
+ };
+ // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2);
+ pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
+}
+void McChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
+ int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
+ if (0 == iMvX && 0 == iMvY) {
+ if (8 == iWidth)
+ McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else if (iWidth == 4)
+ McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ else //here iWidth == 2
+ McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
+ } else {
+ const int32_t kiD8x = iMvX & 0x07;
+ const int32_t kiD8y = iMvY & 0x07;
+ if (8 == iWidth)
+ McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
+ else if (4 == iWidth)
+ McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
+ else //here iWidth == 2
+ McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
+ }
+}
+void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
+ const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
+ static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
+ PixStrideAvgWidthEq8_AArch64_neon,
+ PixStrideAvgWidthEq16_AArch64_neon
+ };
+ kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
+}
+#endif
+
+void InitMcFunc (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
+ pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
+ pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
+ pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
+ pMcFuncs->pfSampleAveraging = PixelAvg_c;
+ pMcFuncs->pMcChromaFunc = McChroma_c;
+ pMcFuncs->pMcLumaFunc = McLuma_c;
+
+#if defined (X86_ASM)
+ if (uiCpuFlag & WELS_CPU_SSE2) {
+ pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
+ pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
+ pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
+ pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
+ pMcFuncs->pMcChromaFunc = McChroma_sse2;
+ pMcFuncs->pMcLumaFunc = McLuma_sse2;
+ }
+
+ if (uiCpuFlag & WELS_CPU_SSSE3) {
+ pMcFuncs->pMcChromaFunc = McChroma_ssse3;
+ }
+#endif //(X86_ASM)
+
+#if defined(HAVE_NEON)
+ if (uiCpuFlag & WELS_CPU_NEON) {
+ pMcFuncs->pMcLumaFunc = McLuma_neon;
+ pMcFuncs->pMcChromaFunc = McChroma_neon;
+ pMcFuncs->pfSampleAveraging = PixelAvg_neon;
+ pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
+ pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
+ pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
+ }
+#endif
+#if defined(HAVE_NEON_AARCH64)
+ if (uiCpuFlag & WELS_CPU_NEON) {
+ pMcFuncs->pMcLumaFunc = McLuma_AArch64_neon;
+ pMcFuncs->pMcChromaFunc = McChroma_AArch64_neon;
+ pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
+ pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
+ pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
+ pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
+ }
+#endif
+}
+} // namespace WelsCommon
--- a/codec/common/targets.mk
+++ b/codec/common/targets.mk
@@ -7,6 +7,7 @@
$(COMMON_SRCDIR)/src/deblocking_common.cpp\
$(COMMON_SRCDIR)/src/expand_pic.cpp\
$(COMMON_SRCDIR)/src/intra_pred_common.cpp\
+ $(COMMON_SRCDIR)/src/mc.cpp\
$(COMMON_SRCDIR)/src/sad_common.cpp\
$(COMMON_SRCDIR)/src/utils.cpp\
$(COMMON_SRCDIR)/src/welsCodecTrace.cpp\
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -55,6 +55,7 @@
#include "crt_util_safe_x.h"
#include "mb_cache.h"
#include "expand_pic.h"
+#include "mc.h"
namespace WelsDec {
#define MAX_PRED_MODE_ID_I16x16 3
@@ -141,13 +142,6 @@
uint8_t uiLongRefCount[LIST_A]; // dependend on ref pic module
int32_t iMaxLongTermFrameIdx;
} SRefPic, *PRefPic;
-
-typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
-typedef struct TagMcFunc {
-PWelsMcFunc pMcLumaFunc;
-PWelsMcFunc pMcChromaFunc;
-} SMcFunc;
typedef void (*PCopyFunc) (uint8_t* pDst, int32_t iStrideD, uint8_t* pSrc, int32_t iStrideS);
typedef struct TagCopyFunc {
--- a/codec/decoder/core/inc/mc.h
+++ /dev/null
@@ -1,50 +1,0 @@
-/*!
- * \copy
- * Copyright (c) 2013, Cisco Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef WELS_MC_H__
-#define WELS_MC_H__
-
-#include "wels_const.h"
-#include "macros.h"
-#include "decoder_context.h"
-#include "mc_common.h"
-
-namespace WelsDec {
-
-typedef void (*PMcChromaWidthExtFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- const uint8_t* kpABCD, int32_t iHeight);
-
-void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu);
-
-} // namespace WelsDec
-
-#endif//WELS_MC_H__
--- a/codec/decoder/core/src/mc.cpp
+++ /dev/null
@@ -1,1226 +1,0 @@
-/*!
- * \copy
- * Copyright (c) 2009-2013, Cisco Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file mc.c
- *
- * \brief Interfaces implementation for motion compensation
- *
- * \date 03/17/2009 Created
- *
- *************************************************************************************
- */
-
-#include "mc.h"
-
-#include "cpu_core.h"
-
-namespace WelsDec {
-
-/*------------------weight for chroma fraction pixel interpolation------------------*/
-//iA = (8 - dx) * (8 - dy);
-//iB = dx * (8 - dy);
-//iC = (8 - dx) * dy;
-//iD = dx * dy
-static const uint8_t g_kuiABCD[8][8][4] = { //g_kA[dy][dx], g_kB[dy][dx], g_kC[dy][dx], g_kD[dy][dx]
- {
- {64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0},
- {32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0}
- },
- {
- {56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3},
- {28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7}
- },
- {
- {48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6},
- {24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14}
- },
- {
- {40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9},
- {20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21}
- },
- {
- {32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12},
- {16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28}
- },
- {
- {24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15},
- {12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35}
- },
- {
- {16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18},
- {8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42}
- },
- {
- {8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21},
- {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
- }
-};
-
-typedef void (*PWelsMcWidthHeightFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight);
-
-//***************************************************************************//
-// C code implementation //
-//***************************************************************************//
-static inline void McCopyWidthEq2_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma
- ST16A2 (pDst, LD16 (pSrc));
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- ST32A4 (pDst, LD32 (pSrc));
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- ST64A8 (pDst, LD64 (pSrc));
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- ST64A8 (pDst , LD64 (pSrc));
- ST64A8 (pDst + 8, LD64 (pSrc + 8));
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-//--------------------Luma sample MC------------------//
-
-static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) {
- int32_t iPix05 = pSrc[0] + pSrc[5];
- int32_t iPix14 = pSrc[1] + pSrc[4];
- int32_t iPix23 = pSrc[2] + pSrc[3];
-
- return (iPix05 - (iPix14 * 5) + (iPix23 * 20));
-}
-// h: iOffset=1 / v: iOffset=iSrcStride
-static inline int32_t FilterInput8bitWithStride_c (const uint8_t* pSrc, const int32_t kiOffset) {
- const int32_t kiOffset1 = kiOffset;
- const int32_t kiOffset2 = (kiOffset << 1);
- const int32_t kiOffset3 = kiOffset + kiOffset2;
- const uint32_t kuiPix05 = * (pSrc - kiOffset2) + * (pSrc + kiOffset3);
- const uint32_t kuiPix14 = * (pSrc - kiOffset1) + * (pSrc + kiOffset2);
- const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiOffset1);
-
- return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2));
-}
-
-static inline void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
- }
- pDst += iDstStride;
- pSrcA += iSrcAStride;
- pSrcB += iSrcBStride;
- }
-}
-static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
- int32_t iHeight) {
- if (iWidth == 16)
- McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //here iWidth == 2
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
-static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, 1) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((FilterInput8bitWithStride_c (pSrc + j, iSrcStride) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int16_t iTmp[16 + 5]; //16
- int32_t i, j, k;
-
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth + 5; j++) {
- iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride);
- }
- for (k = 0; k < iWidth; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10);
- }
- pSrc += iSrcStride;
- pDst += iDstStride;
- }
-}
-
-/////////////////////luma MC//////////////////////////
-static inline void McHorVer01_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiTmp[256];
- McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer03_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiTmp[256];
- McHorVer02_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, uiTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer10_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiTmp[256];
- McHorVer20_c (pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, pSrc, iSrcStride, uiTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer11_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiVerTmp[256];
- McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer12_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiVerTmp[256];
- uint8_t uiCtrTmp[256];
- McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer13_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiVerTmp[256];
- McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer02_c (pSrc, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer21_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiCtrTmp[256];
- McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer23_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiCtrTmp[256];
- McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer30_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, pSrc + 1, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer31_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiVerTmp[256];
- McHorVer20_c (pSrc, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer32_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiVerTmp[256];
- uint8_t uiCtrTmp[256];
- McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- McHorVer22_c (pSrc, iSrcStride, uiCtrTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiVerTmp, 16, uiCtrTmp, 16, iWidth, iHeight);
-}
-static inline void McHorVer33_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- uint8_t uiHorTmp[256];
- uint8_t uiVerTmp[256];
- McHorVer20_c (pSrc + iSrcStride, iSrcStride, uiHorTmp, 16, iWidth, iHeight);
- McHorVer02_c (pSrc + 1, iSrcStride, uiVerTmp, 16, iWidth, iHeight);
- PixelAvg_c (pDst, iDstStride, uiHorTmp, 16, uiVerTmp, 16, iWidth, iHeight);
-}
-
-void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
-//pSrc has been added the offset of mv
-{
- static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
- {McCopy_c, McHorVer01_c, McHorVer02_c, McHorVer03_c},
- {McHorVer10_c, McHorVer11_c, McHorVer12_c, McHorVer13_c},
- {McHorVer20_c, McHorVer21_c, McHorVer22_c, McHorVer23_c},
- {McHorVer30_c, McHorVer31_c, McHorVer32_c, McHorVer33_c},
- };
-
- pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
-}
-
-static inline void McChromaWithFragMv_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- int32_t i, j;
- int32_t iA, iB, iC, iD;
- const uint8_t* pSrcNext = pSrc + iSrcStride;
- const uint8_t* pABCD = g_kuiABCD[iMvY & 0x07][iMvX & 0x07];
- iA = pABCD[0];
- iB = pABCD[1];
- iC = pABCD[2];
- iD = pABCD[3];
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = (iA * pSrc[j] + iB * pSrc[j + 1] + iC * pSrcNext[j] + iD * pSrcNext[j + 1] + 32) >> 6;
- }
- pDst += iDstStride;
- pSrc = pSrcNext;
- pSrcNext += iSrcStride;
- }
-}
-
-void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
-//pSrc has been added the offset of mv
-{
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (0 == kiD8x && 0 == kiD8y)
- McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- else
- McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
-}
-
-#if defined(X86_ASM)
-//***************************************************************************//
-// SSE2 implement //
-//***************************************************************************//
-static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_2D (int16_t, iTap, 21, 8, 16)
- McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)iTap, 16, iHeight + 5);
- McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)iTap, 16, pDst, iDstStride, 8, iHeight);
-}
-
-static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
-}
-
-static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
-}
-
-static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- if (iWidth == 16)
- McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
-static inline void McHorVer20_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
-static inline void McHorVer02_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McHorVer02_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight);
-}
-
-static inline void McHorVer22_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McHorVer22_c (pSrc, iSrcStride, pDst, iDstStride, 4, iHeight);
-}
-
-static inline void McHorVer01_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- } else {
- McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- }
-}
-static inline void McHorVer03_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- } else {
- McHorVer02_c (pSrc, iSrcStride, pTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- }
-}
-static inline void McHorVer10_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
- }
-}
-static inline void McHorVer11_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-static inline void McHorVer12_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else {
- McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4, iHeight);
- McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-static inline void McHorVer13_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02_c (pSrc, iSrcStride, pVerTmp, 16, 4 , iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-static inline void McHorVer21_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-static inline void McHorVer23_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-static inline void McHorVer30_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
- }
-}
-static inline void McHorVer31_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-static inline void McHorVer32_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else {
- McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
- McHorVer22_c (pSrc, iSrcStride, pCtrTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-static inline void McHorVer33_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else {
- McHorVer20WidthEq4_mmx (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02_c (pSrc + 1, iSrcStride, pVerTmp, 16, 4, iHeight);
- PixelAvgWidthEq4_mmx (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-
-void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
-//pSrc has been added the offset of mv
-{
- static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
- {McCopy_sse2, McHorVer01_sse2, McHorVer02_sse2, McHorVer03_sse2},
- {McHorVer10_sse2, McHorVer11_sse2, McHorVer12_sse2, McHorVer13_sse2},
- {McHorVer20_sse2, McHorVer21_sse2, McHorVer22_sse2, McHorVer23_sse2},
- {McHorVer30_sse2, McHorVer31_sse2, McHorVer32_sse2, McHorVer33_sse2},
- };
-
- pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
-}
-
-void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = {
- McChromaWidthEq4_mmx,
- McChromaWidthEq8_sse2
- };
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (kiD8x == 0 && kiD8y == 0) {
- McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- return;
- }
- if (iWidth != 2) {
- kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
- } else
- McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
-}
-
-void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PMcChromaWidthExtFunc kpMcChromaWidthFuncs[2] = {
- McChromaWidthEq4_mmx,
- McChromaWidthEq8_ssse3
- };
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (kiD8x == 0 && kiD8y == 0) {
- McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- return;
- }
- if (iWidth != 2) {
- kpMcChromaWidthFuncs[iWidth >> 3] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
- } else
- McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
-}
-
-#endif //X86_ASM
-//***************************************************************************//
-// NEON implementation //
-//***************************************************************************//
-#if defined(HAVE_NEON)
-void McCopy_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (16 == iWidth)
- McCopyWidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (8 == iWidth)
- McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (4 == iWidth)
- McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer20_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer20WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer20WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer02_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer02WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer02WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer22_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer22WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer22WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
-void McHorVer01_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer01WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer01WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer01WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer03_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer03WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer03WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer03WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer10_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer10WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer10WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer10WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- }
-}
-void McHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- }
-}
-void McHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- }
-}
-void McHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- }
-}
-void McHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pCtrTmp, iHeight);
- }
-}
-void McHorVer30_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer30WidthEq16_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer30WidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer30WidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- }
-}
-void McHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq4_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pVerTmp, pCtrTmp, iHeight);
- }
-}
-void McHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_neon (pDst, iDstStride, pHorTmp, pVerTmp, iHeight);
- }
-}
-
-void McLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
- {McCopy_neon, McHorVer01_neon, McHorVer02_neon, McHorVer03_neon},
- {McHorVer10_neon, McHorVer11_neon, McHorVer12_neon, McHorVer13_neon},
- {McHorVer20_neon, McHorVer21_neon, McHorVer22_neon, McHorVer23_neon},
- {McHorVer30_neon, McHorVer31_neon, McHorVer32_neon, McHorVer33_neon},
- };
- // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2);
- pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
-}
-void McChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- if (0 == iMvX && 0 == iMvY) {
- if (8 == iWidth)
- McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //here iWidth == 2
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- } else {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (8 == iWidth)
- McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else if (4 == iWidth)
- McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else //here iWidth == 2
- McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
- }
-}
-#endif
-#if defined(HAVE_NEON_AARCH64)
-void McCopy_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (16 == iWidth)
- McCopyWidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (8 == iWidth)
- McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (4 == iWidth)
- McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer20_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer02_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer22_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-
-void McHorVer01_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer01WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer01WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer01WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer03_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer03WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer03WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer03WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer10_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer10WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer10WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer10WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-void McHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-void McHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_AArch64_neon (pSrc, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-void McHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-void McHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-void McHorVer30_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer30WidthEq16_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McHorVer30WidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McHorVer30WidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-void McHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq8_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- McHorVer22WidthEq4_AArch64_neon (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pVerTmp, 16, pCtrTmp, 16, iHeight);
- }
-}
-void McHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
- ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
- if (iWidth == 16) {
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 8) {
- McHorVer20WidthEq8_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq8_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq8_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- } else if (iWidth == 4) {
- McHorVer20WidthEq4_AArch64_neon (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
- McHorVer02WidthEq4_AArch64_neon (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
- PixelAvgWidthEq4_AArch64_neon (pDst, iDstStride, pHorTmp, 16, pVerTmp, 16, iHeight);
- }
-}
-
-void McLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsMcWidthHeightFunc pWelsMcFunc[4][4] = { //[x][y]
- {McCopy_AArch64_neon, McHorVer01_AArch64_neon, McHorVer02_AArch64_neon, McHorVer03_AArch64_neon},
- {McHorVer10_AArch64_neon, McHorVer11_AArch64_neon, McHorVer12_AArch64_neon, McHorVer13_AArch64_neon},
- {McHorVer20_AArch64_neon, McHorVer21_AArch64_neon, McHorVer22_AArch64_neon, McHorVer23_AArch64_neon},
- {McHorVer30_AArch64_neon, McHorVer31_AArch64_neon, McHorVer32_AArch64_neon, McHorVer33_AArch64_neon},
- };
- // pSrc += (iMvY >> 2) * iSrcStride + (iMvX >> 2);
- pWelsMcFunc[iMvX & 0x03][iMvY & 0x03] (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
-}
-void McChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- if (0 == iMvX && 0 == iMvY) {
- if (8 == iWidth)
- McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //here iWidth == 2
- McCopyWidthEq2_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- } else {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (8 == iWidth)
- McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else if (4 == iWidth)
- McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else //here iWidth == 2
- McChromaWithFragMv_c (pSrc, iSrcStride, pDst, iDstStride, iMvX, iMvY, iWidth, iHeight);
- }
-}
-#endif
-
-void InitMcFunc (SMcFunc* pMcFunc, int32_t iCpu) {
- pMcFunc->pMcLumaFunc = McLuma_c;
- pMcFunc->pMcChromaFunc = McChroma_c;
-
-#ifdef HAVE_NEON
- if (iCpu & WELS_CPU_NEON) {
- pMcFunc->pMcLumaFunc = McLuma_neon;
- pMcFunc->pMcChromaFunc = McChroma_neon;
- }
-#endif
-#ifdef HAVE_NEON_AARCH64
- if (iCpu & WELS_CPU_NEON) {
- pMcFunc->pMcLumaFunc = McLuma_AArch64_neon;
- pMcFunc->pMcChromaFunc = McChroma_AArch64_neon;
- }
-#endif
-#if defined (X86_ASM)
- if (iCpu & WELS_CPU_SSE2) {
- pMcFunc->pMcLumaFunc = McLuma_sse2;
- pMcFunc->pMcChromaFunc = McChroma_sse2;
- }
- if (iCpu & WELS_CPU_SSSE3) {
- pMcFunc->pMcChromaFunc = McChroma_ssse3;
- }
-#endif //(X86_ASM)
-}
-
-} // namespace WelsDec
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -13,7 +13,6 @@
$(DECODER_SRCDIR)/core/src/fmo.cpp\
$(DECODER_SRCDIR)/core/src/get_intra_predictor.cpp\
$(DECODER_SRCDIR)/core/src/manage_dec_ref.cpp\
- $(DECODER_SRCDIR)/core/src/mc.cpp\
$(DECODER_SRCDIR)/core/src/mem_align.cpp\
$(DECODER_SRCDIR)/core/src/memmgr_nal_unit.cpp\
$(DECODER_SRCDIR)/core/src/mv_pred.cpp\
--- a/codec/encoder/core/inc/mc.h
+++ /dev/null
@@ -1,51 +1,0 @@
-/*!
- * \copy
- * Copyright (c) 2013, Cisco Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-//macroblock.h
-#ifndef WELS_MC_H__
-#define WELS_MC_H__
-
-#include <string.h>
-#include "typedefs.h"
-#include "wels_const.h"
-#include "macros.h"
-#include "wels_func_ptr_def.h"
-#include "mc_common.h"
-
-/////////////////////luma MC//////////////////////////
-//x y means dx(mv[0] & 3) and dy(mv[1] & 3)
-
-namespace WelsEnc {
-void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag);
-
-}
-#endif//WELS_MC_H__
--- a/codec/encoder/core/inc/wels_func_ptr_def.h
+++ b/codec/encoder/core/inc/wels_func_ptr_def.h
@@ -44,6 +44,7 @@
#include "expand_pic.h"
#include "rc.h"
#include "IWelsVP.h"
+#include "mc.h"
namespace WelsEnc {
@@ -73,25 +74,6 @@
typedef int32_t (*PQuantizationSkipFunc) (int16_t* pDct, int16_t iFF, int16_t iMF);
typedef int32_t (*PQuantizationHadamardFunc) (int16_t* pRes, const int16_t kiFF, int16_t iMF, int16_t* pDct,
int16_t* pBlock);
-
-typedef void (*PWelsMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight);
-
-typedef void (*PWelsLumaHalfpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight);
-typedef void (*PWelsLumaQuarpelMcFunc) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight);
-typedef void (*PWelsSampleAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*, int32_t, int32_t, int32_t);
-
-typedef struct TagMcFunc {
- PWelsLumaHalfpelMcFunc pfLumaHalfpelHor;
- PWelsLumaHalfpelMcFunc pfLumaHalfpelVer;
- PWelsLumaHalfpelMcFunc pfLumaHalfpelCen;
- PWelsMcFunc pMcChromaFunc;
-
- PWelsMcFunc pMcLumaFunc;
- PWelsSampleAveragingFunc pfSampleAveraging;
-} SMcFunc;
typedef void (*PLumaDeblockingLT4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* iTc);
typedef void (*PLumaDeblockingEQ4Func) (uint8_t* iSampleY, int32_t iStride, int32_t iAlpha, int32_t iBeta);
--- a/codec/encoder/core/src/encoder.cpp
+++ b/codec/encoder/core/src/encoder.cpp
@@ -209,7 +209,7 @@
/* Motion compensation */
/*init pixel average function*/
/*get one column or row pixel when refinement*/
- WelsInitMcFuncs (&pFuncList->sMcFuncs, uiCpuFlag);
+ InitMcFunc (&pFuncList->sMcFuncs, uiCpuFlag);
InitCoeffFunc (pFuncList,uiCpuFlag,pParam->iEntropyCodingModeFlag);
WelsInitEncodingFuncs (pFuncList, uiCpuFlag);
--- a/codec/encoder/core/src/mc.cpp
+++ /dev/null
@@ -1,891 +1,0 @@
-/*!
- * \copy
- * Copyright (c) 2009-2013, Cisco Systems
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file mc.c
- *
- * \brief Interfaces implementation for motion compensation
- *
- * \date 03/17/2009 Created
- *
- *************************************************************************************
- */
-
-#include "mc.h"
-#include "cpu_core.h"
-
-typedef void (*PWelsSampleWidthAveragingFunc) (uint8_t*, int32_t, const uint8_t*, int32_t, const uint8_t*,
- int32_t, int32_t);
-
-namespace WelsEnc {
-/*------------------weight for chroma fraction pixel interpolation------------------*/
-//kuiA = (8 - dx) * (8 - dy);
-//kuiB = dx * (8 - dy);
-//kuiC = (8 - dx) * dy;
-//kuiD = dx * dy
-static const uint8_t g_kuiABCD[8][8][4] = { ////g_kuiA[dy][dx], g_kuiB[dy][dx], g_kuiC[dy][dx], g_kuiD[dy][dx]
- {
- {64, 0, 0, 0}, {56, 8, 0, 0}, {48, 16, 0, 0}, {40, 24, 0, 0},
- {32, 32, 0, 0}, {24, 40, 0, 0}, {16, 48, 0, 0}, {8, 56, 0, 0}
- },
- {
- {56, 0, 8, 0}, {49, 7, 7, 1}, {42, 14, 6, 2}, {35, 21, 5, 3},
- {28, 28, 4, 4}, {21, 35, 3, 5}, {14, 42, 2, 6}, {7, 49, 1, 7}
- },
- {
- {48, 0, 16, 0}, {42, 6, 14, 2}, {36, 12, 12, 4}, {30, 18, 10, 6},
- {24, 24, 8, 8}, {18, 30, 6, 10}, {12, 36, 4, 12}, {6, 42, 2, 14}
- },
- {
- {40, 0, 24, 0}, {35, 5, 21, 3}, {30, 10, 18, 6}, {25, 15, 15, 9},
- {20, 20, 12, 12}, {15, 25, 9, 15}, {10, 30, 6, 18}, {5, 35, 3, 21}
- },
- {
- {32, 0, 32, 0}, {28, 4, 28, 4}, {24, 8, 24, 8}, {20, 12, 20, 12},
- {16, 16, 16, 16}, {12, 20, 12, 20}, {8, 24, 8, 24}, {4, 28, 4, 28}
- },
- {
- {24, 0, 40, 0}, {21, 3, 35, 5}, {18, 6, 30, 10}, {15, 9, 25, 15},
- {12, 12, 20, 20}, {9, 15, 15, 25}, {6, 18, 10, 30}, {3, 21, 5, 35}
- },
- {
- {16, 0, 48, 0}, {14, 2, 42, 6}, {12, 4, 36, 12}, {10, 6, 30, 18},
- {8, 8, 24, 24}, {6, 10, 18, 30}, {4, 12, 12, 36}, {2, 14, 6, 42}
- },
- {
- {8, 0, 56, 0}, {7, 1, 49, 7}, {6, 2, 42, 14}, {5, 3, 35, 21},
- {4, 4, 28, 28}, {3, 5, 21, 35}, {2, 6, 14, 42}, {1, 7, 7, 49}
- }
-};
-
-//***************************************************************************//
-// C code implementation //
-//***************************************************************************//
-static inline void McCopyWidthEq4_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- memcpy (pDst, pSrc, 4); // confirmed_safe_unsafe_usage
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-static inline void McCopyWidthEq8_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight)
-
-{
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- memcpy (pDst, pSrc, 8); // confirmed_safe_unsafe_usage
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-static inline void McCopyWidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i;
- for (i = 0; i < iHeight; i++) {
- memcpy (pDst, pSrc, 16); // confirmed_safe_unsafe_usage
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-
-//--------------------Luma sample MC------------------//
-static inline int32_t HorFilter_c (const uint8_t* pSrc) {
- int32_t iPix05 = pSrc[-2] + pSrc[3];
- int32_t iPix14 = pSrc[-1] + pSrc[2];
- int32_t iPix23 = pSrc[ 0] + pSrc[1];
-
- return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
-}
-
-static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) {
- int32_t iPix05 = pSrc[0] + pSrc[5];
- int32_t iPix14 = pSrc[1] + pSrc[4];
- int32_t iPix23 = pSrc[2] + pSrc[3];
-
- return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
-}
-static inline int32_t VerFilter_c (const uint8_t* pSrc, const int32_t kiSrcStride) {
- const int32_t kiLine1 = kiSrcStride;
- const int32_t kiLine2 = (kiSrcStride << 1);
- const int32_t kiLine3 = kiLine1 + kiLine2;
- const uint32_t kuiPix05 = * (pSrc - kiLine2) + * (pSrc + kiLine3);
- const uint32_t kuiPix14 = * (pSrc - kiLine1) + * (pSrc + kiLine2);
- const uint32_t kuiPix23 = * (pSrc) + * (pSrc + kiLine1);
-
- return (kuiPix05 - ((kuiPix14 << 2) + kuiPix14) + (kuiPix23 << 4) + (kuiPix23 << 2));
-}
-
-static inline void PixelAvgWidthEq8_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < 8; j++) {
- pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
- }
- pDst += iDstStride;
- pSrcA += iSrcAStride;
- pSrcB += iSrcBStride;
- }
-}
-static inline void PixelAvgWidthEq16_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < 16; j++) {
- pDst[j] = (pSrcA[j] + pSrcB[j] + 1) >> 1;
- }
- pDst += iDstStride;
- pSrcA += iSrcAStride;
- pSrcB += iSrcBStride;
- }
-}
-
-//horizontal filter to gain half sample, that is (2, 0) location in quarter sample
-static inline void McHorVer20WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < 16; j++) {
- pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-//vertical filter to gain half sample, that is (0, 2) location in quarter sample
-static inline void McHorVer02WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < 16; j++) {
- pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
-static inline void McHorVer22WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- int16_t pTmp[16 + 5] = {0}; //16
- int32_t i, j, k;
-
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < 16 + 5; j++) {
- pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
- }
- for (k = 0; k < 16; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
- }
- pSrc += iSrcStride;
- pDst += iDstStride;
- }
-}
-
-/////////////////////luma MC//////////////////////////
-
-static inline void McHorVer01WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer03WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer10WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer11WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer12WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer02WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer13WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer21WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer23WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer30WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer31WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer32WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_c (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer33WidthEq16_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_c (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_c (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_c (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-
-static inline void McHorVer20_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((HorFilter_c (pSrc + j) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-//vertical filter to gain half sample, that is (0, 2) location in quarter sample
-static inline void McHorVer02_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int32_t i, j;
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = WelsClip1 ((VerFilter_c (pSrc + j, iSrcStride) + 16) >> 5);
- }
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
-}
-//horizontal and vertical filter to gain half sample, that is (2, 2) location in quarter sample
-static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- int16_t pTmp[17 + 5] = {0}; //w+1
- int32_t i, j, k;
-
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth + 5; j++) {
- pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
- }
- for (k = 0; k < iWidth; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
- }
- pSrc += iSrcStride;
- pDst += iDstStride;
- }
-}
-static inline void McCopy_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
- int32_t iHeight) {
- int32_t i;
- if (iWidth == 16)
- McCopyWidthEq16_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McCopyWidthEq8_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_c (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else {
- for (i = 0; i < iHeight; i++) {
- memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
- }
-}
-
-void McChroma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight)
-//pSrc has been added the offset of mv
-{
- const int32_t kiDx = iMvX & 0x07;
- const int32_t kiDy = iMvY & 0x07;
-
- if (0 == kiDx && 0 == kiDy) {
- McCopy_c (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- } else {
- const int32_t kiDA = g_kuiABCD[kiDy][kiDx][0];
- const int32_t kiDB = g_kuiABCD[kiDy][kiDx][1];
- const int32_t kiDC = g_kuiABCD[kiDy][kiDx][2];
- const int32_t kiDD = g_kuiABCD[kiDy][kiDx][3];
-
- int32_t i, j;
-
- const uint8_t* pSrcNext = pSrc + iSrcStride;
-
- for (i = 0; i < iHeight; i++) {
- for (j = 0; j < iWidth; j++) {
- pDst[j] = (kiDA * pSrc[j] + kiDB * pSrc[j + 1] + kiDC * pSrcNext[j] + kiDD * pSrcNext[j + 1] + 32) >> 6;
- }
- pDst += iDstStride;
- pSrc = pSrcNext;
- pSrcNext += iSrcStride;
- }
- }
-}
-void McLuma_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16[16] = { //[y*4+x]
- McCopyWidthEq16_c, McHorVer10WidthEq16_c, McHorVer20WidthEq16_c, McHorVer30WidthEq16_c,
- McHorVer01WidthEq16_c, McHorVer11WidthEq16_c, McHorVer21WidthEq16_c, McHorVer31WidthEq16_c,
- McHorVer02WidthEq16_c, McHorVer12WidthEq16_c, McHorVer22WidthEq16_c, McHorVer32WidthEq16_c,
- McHorVer03WidthEq16_c, McHorVer13WidthEq16_c, McHorVer23WidthEq16_c, McHorVer33WidthEq16_c
- };
- uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
- pWelsMcFuncWidthEq16[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void PixelAvg_c (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
- static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
- PixelAvgWidthEq8_c,
- PixelAvgWidthEq16_c
- };
- kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
-}
-//***************************************************************************//
-// MMXEXT and SSE2 implementation //
-//***************************************************************************//
-#if defined(X86_ASM)
-
-static inline void McHorVer22WidthEq8_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 21, 8, 16)
- McHorVer22Width8HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 16, iHeight + 5);
- McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 16, pDst, iDstStride, 8, iHeight);
-}
-
-//2010.2.5
-
-static inline void McHorVer02WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* PDst, int32_t iDstStride,
- int32_t iHeight) {
- McHorVer02WidthEq8_sse2 (pSrc, iSrcStride, PDst, iDstStride, iHeight);
- McHorVer02WidthEq8_sse2 (&pSrc[8], iSrcStride, &PDst[8], iDstStride, iHeight);
-}
-static inline void McHorVer22WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- McHorVer22WidthEq8_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- McHorVer22WidthEq8_sse2 (&pSrc[8], iSrcStride, &pDst[8], iDstStride, iHeight);
-}
-void McHorVer22Width9Or17Height9Or17_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_2D (int16_t, pTap, 22, 24, 16)
- int32_t tmp1 = 2 * (iWidth - 8);
- McHorVer22HorFirst_sse2 (pSrc - 2, iSrcStride, (uint8_t*)pTap, 48, iWidth, iHeight + 5);
- McHorVer22Width8VerLastAlign_sse2 ((uint8_t*)pTap, 48, pDst, iDstStride, iWidth - 1, iHeight);
- McHorVer22Width8VerLastUnAlign_sse2 ((uint8_t*)pTap + tmp1, 48, pDst + iWidth - 8, iDstStride, 8, iHeight);
-}
-
-static inline void McHorVer01WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer03WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer10WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer11WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer12WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer13WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer21WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer23WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer30WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pTmp, 16, iHeight);
-}
-static inline void McHorVer31WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer32WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-static inline void McHorVer33WidthEq16_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
-
- McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_sse2 (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-
-static inline void McCopy_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- int32_t i;
- if (iWidth == 16)
- McCopyWidthEq16_sse2 (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 8)
- McCopyWidthEq8_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else if (iWidth == 4)
- McCopyWidthEq4_mmx (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else {
- for (i = 0; i < iHeight; i++) {
- memcpy (pDst, pSrc, iWidth); // confirmed_safe_unsafe_usage
- pDst += iDstStride;
- pSrc += iSrcStride;
- }
- }
-}
-
-typedef void (*McChromaWidthEqx) (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- const uint8_t* pABCD, int32_t iHeigh);
-void McChroma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- static const McChromaWidthEqx kpfFuncs[2] = {
- McChromaWidthEq4_mmx,
- McChromaWidthEq8_sse2
- };
-
- if (0 == kiD8x && 0 == kiD8y) {
- McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- } else {
- kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
- }
-}
-
-void McChroma_ssse3 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
-
- static const McChromaWidthEqx kpfFuncs[2] = {
- McChromaWidthEq4_mmx,
- McChromaWidthEq8_ssse3
- };
- if (0 == kiD8x && 0 == kiD8y) {
- McCopy_sse2 (pSrc, iSrcStride, pDst, iDstStride, iWidth, iHeight);
- } else {
- kpfFuncs[ (iWidth >> 3)] (pSrc, iSrcStride, pDst, iDstStride, g_kuiABCD[kiD8y][kiD8x], iHeight);
- }
-
-}
-
-void McLuma_sse2 (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_sse2[16] = {
- McCopyWidthEq16_sse2, McHorVer10WidthEq16_sse2, McHorVer20WidthEq16_sse2, McHorVer30WidthEq16_sse2,
- McHorVer01WidthEq16_sse2, McHorVer11WidthEq16_sse2, McHorVer21WidthEq16_sse2, McHorVer31WidthEq16_sse2,
- McHorVer02WidthEq16_sse2, McHorVer12WidthEq16_sse2, McHorVer22WidthEq16_sse2, McHorVer32WidthEq16_sse2,
- McHorVer03WidthEq16_sse2, McHorVer13WidthEq16_sse2, McHorVer23WidthEq16_sse2, McHorVer33WidthEq16_sse2
- };
- uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
- pWelsMcFuncWidthEq16_sse2[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void PixelAvg_sse2 (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
- static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
- PixelAvgWidthEq8_mmx,
- PixelAvgWidthEq16_sse2
- };
- kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
-}
-#endif //X86_ASM
-
-//***************************************************************************//
-// NEON implementation //
-//***************************************************************************//
-#if defined(HAVE_NEON)
-void McHorVer20Width9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 17)
- McHorVer20Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 9)
- McHorVer20Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer02Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer02Height17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 8)
- McHorVer02Height9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer22Width9Or17Height9Or17_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 17)
- McHorVer22Width17_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 9)
- McHorVer22Width9_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void EncMcHorVer11_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer12_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer13_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer21_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer23_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer31_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer32_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcHorVer33_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_neon (pDst, iDstStride, pTmp, &pTmp[256], iHeight);
-}
-void EncMcChroma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (0 == kiD8x && 0 == kiD8y) {
- if (8 == iWidth)
- McCopyWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else // iWidth == 4
- McCopyWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- } else {
- if (8 == iWidth)
- McChromaWidthEq8_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else //if(4 == iWidth)
- McChromaWidthEq4_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- }
-}
-void EncMcLuma_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_neon[16] = { //[x][y]
- McCopyWidthEq16_neon, McHorVer10WidthEq16_neon, McHorVer20WidthEq16_neon, McHorVer30WidthEq16_neon,
- McHorVer01WidthEq16_neon, EncMcHorVer11_neon, EncMcHorVer21_neon, EncMcHorVer31_neon,
- McHorVer02WidthEq16_neon, EncMcHorVer12_neon, McHorVer22WidthEq16_neon, EncMcHorVer32_neon,
- McHorVer03WidthEq16_neon, EncMcHorVer13_neon, EncMcHorVer23_neon, EncMcHorVer33_neon
- };
- uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
- pWelsMcFuncWidthEq16_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void PixelAvg_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
- static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
- PixStrideAvgWidthEq8_neon,
- PixStrideAvgWidthEq16_neon
- };
- kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
-}
-#endif
-
-#if defined(HAVE_NEON_AARCH64)
-void McHorVer20Width9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 17)
- McHorVer20Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 9)
- McHorVer20Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer02Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 16)
- McHorVer02Height17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 8)
- McHorVer02Height9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void McHorVer22Width9Or17Height9Or17_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst,
- int32_t iDstStride,
- int32_t iWidth, int32_t iHeight) {
- if (iWidth == 17)
- McHorVer22Width17_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else //if (iWidth == 9)
- McHorVer22Width9_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void EncMcHorVer11_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer12_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer13_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer21_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer23_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer31_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer32_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, pTmp, 16, iHeight);
- McHorVer22WidthEq16_AArch64_neon (pSrc, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcHorVer33_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int32_t iHeight) {
- ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 512, 16)
- McHorVer20WidthEq16_AArch64_neon (pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
- McHorVer02WidthEq16_AArch64_neon (pSrc + 1, iSrcStride, &pTmp[256], 16, iHeight);
- PixelAvgWidthEq16_AArch64_neon (pDst, iDstStride, pTmp, 16, &pTmp[256], 16, iHeight);
-}
-void EncMcChroma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- const int32_t kiD8x = iMvX & 0x07;
- const int32_t kiD8y = iMvY & 0x07;
- if (0 == kiD8x && 0 == kiD8y) {
- if (8 == iWidth)
- McCopyWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- else // iWidth == 4
- McCopyWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, iHeight);
- } else {
- if (8 == iWidth)
- McChromaWidthEq8_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- else //if(4 == iWidth)
- McChromaWidthEq4_AArch64_neon (pSrc, iSrcStride, pDst, iDstStride, (int32_t*) (g_kuiABCD[kiD8y][kiD8x]), iHeight);
- }
-}
-void EncMcLuma_AArch64_neon (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
- int16_t iMvX, int16_t iMvY, int32_t iWidth, int32_t iHeight) {
- static const PWelsLumaQuarpelMcFunc pWelsMcFuncWidthEq16_AArch64_neon[16] = { //[x][y]
- McCopyWidthEq16_AArch64_neon, McHorVer10WidthEq16_AArch64_neon, McHorVer20WidthEq16_AArch64_neon, McHorVer30WidthEq16_AArch64_neon,
- McHorVer01WidthEq16_AArch64_neon, EncMcHorVer11_AArch64_neon, EncMcHorVer21_AArch64_neon, EncMcHorVer31_AArch64_neon,
- McHorVer02WidthEq16_AArch64_neon, EncMcHorVer12_AArch64_neon, McHorVer22WidthEq16_AArch64_neon, EncMcHorVer32_AArch64_neon,
- McHorVer03WidthEq16_AArch64_neon, EncMcHorVer13_AArch64_neon, EncMcHorVer23_AArch64_neon, EncMcHorVer33_AArch64_neon
- };
- uint8_t uiMvpIdx = ((iMvY & 0x03) << 2) + (iMvX & 0x03);
- pWelsMcFuncWidthEq16_AArch64_neon[uiMvpIdx] (pSrc, iSrcStride, pDst, iDstStride, iHeight);
-}
-void PixelAvg_AArch64_neon (uint8_t* pDst, int32_t iDstStride, const uint8_t* pSrcA, int32_t iSrcAStride,
- const uint8_t* pSrcB, int32_t iSrcBStride, int32_t iWidth, int32_t iHeight) {
- static const PWelsSampleWidthAveragingFunc kpfFuncs[2] = {
- PixStrideAvgWidthEq8_AArch64_neon,
- PixStrideAvgWidthEq16_AArch64_neon
- };
- kpfFuncs[iWidth >> 4] (pDst, iDstStride, pSrcA, iSrcAStride, pSrcB, iSrcBStride, iHeight);
-}
-#endif
-
-void WelsInitMcFuncs (SMcFunc* pMcFuncs, uint32_t uiCpuFlag) {
- pMcFuncs->pfLumaHalfpelHor = McHorVer20_c;
- pMcFuncs->pfLumaHalfpelVer = McHorVer02_c;
- pMcFuncs->pfLumaHalfpelCen = McHorVer22_c;
- pMcFuncs->pfSampleAveraging = PixelAvg_c;
- pMcFuncs->pMcChromaFunc = McChroma_c;
- pMcFuncs->pMcLumaFunc = McLuma_c;
-#if defined (X86_ASM)
- if (uiCpuFlag & WELS_CPU_SSE2) {
- pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_sse2;
- pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_sse2;
- pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_sse2;
- pMcFuncs->pfSampleAveraging = PixelAvg_sse2;
- pMcFuncs->pMcChromaFunc = McChroma_sse2;
- pMcFuncs->pMcLumaFunc = McLuma_sse2;
- }
-
- if (uiCpuFlag & WELS_CPU_SSSE3) {
- pMcFuncs->pMcChromaFunc = McChroma_ssse3;
- }
-
-#endif //(X86_ASM)
-
-#if defined(HAVE_NEON)
- if (uiCpuFlag & WELS_CPU_NEON) {
- pMcFuncs->pMcLumaFunc = EncMcLuma_neon;
- pMcFuncs->pMcChromaFunc = EncMcChroma_neon;
- pMcFuncs->pfSampleAveraging = PixelAvg_neon;
- pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_neon;//iWidth+1:8/16
- pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_neon;//heigh+1:8/16
- pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_neon;//iWidth+1/heigh+1
- }
-#endif
-#if defined(HAVE_NEON_AARCH64)
- if (uiCpuFlag & WELS_CPU_NEON) {
- pMcFuncs->pMcLumaFunc = EncMcLuma_AArch64_neon;
- pMcFuncs->pMcChromaFunc = EncMcChroma_AArch64_neon;
- pMcFuncs->pfSampleAveraging = PixelAvg_AArch64_neon;
- pMcFuncs->pfLumaHalfpelHor = McHorVer20Width9Or17_AArch64_neon;//iWidth+1:8/16
- pMcFuncs->pfLumaHalfpelVer = McHorVer02Height9Or17_AArch64_neon;//heigh+1:8/16
- pMcFuncs->pfLumaHalfpelCen = McHorVer22Width9Or17Height9Or17_AArch64_neon;//iWidth+1/heigh+1
- }
-#endif
-}
-}
--- a/codec/encoder/targets.mk
+++ b/codec/encoder/targets.mk
@@ -8,7 +8,6 @@
$(ENCODER_SRCDIR)/core/src/encoder_data_tables.cpp\
$(ENCODER_SRCDIR)/core/src/encoder_ext.cpp\
$(ENCODER_SRCDIR)/core/src/get_intra_predictor.cpp\
- $(ENCODER_SRCDIR)/core/src/mc.cpp\
$(ENCODER_SRCDIR)/core/src/md.cpp\
$(ENCODER_SRCDIR)/core/src/memory_align.cpp\
$(ENCODER_SRCDIR)/core/src/mv_pred.cpp\
--- a/test/decoder/DecUT_MotionCompensation.cpp
+++ b/test/decoder/DecUT_MotionCompensation.cpp
@@ -1,8 +1,9 @@
#include <gtest/gtest.h>
#include "codec_def.h"
+#include "macros.h"
#include "mc.h"
#include "cpu.h"
-using namespace WelsDec;
+using namespace WelsCommon;
#include "mc_test_common.h"
--- a/test/encoder/EncUT_MotionCompensation.cpp
+++ b/test/encoder/EncUT_MotionCompensation.cpp
@@ -1,11 +1,10 @@
#include <gtest/gtest.h>
#include "codec_def.h"
+#include "macros.h"
#include "mc.h"
#include "cpu.h"
-using namespace WelsEnc;
+using namespace WelsCommon;
-#define InitMcFunc WelsInitMcFuncs
-
#include "mc_test_common.h"
DEF_MCCOPYTEST (Enc, 16, 8)
@@ -27,7 +26,7 @@
int32_t width = 8 << w;
int32_t height = 16;
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
- WelsInitMcFuncs (&sMcFunc, uiCpuFlag);
+ InitMcFunc (&sMcFunc, uiCpuFlag);
uint8_t uSrc1[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
uint8_t uSrc2[MC_BUFF_HEIGHT][MC_BUFF_SRC_STRIDE];
ENFORCE_STACK_ALIGN_2D (uint8_t, uDstAnchor, MC_BUFF_HEIGHT, MC_BUFF_DST_STRIDE, 16);
@@ -76,7 +75,7 @@
}
uint32_t uiCpuFlag = k == 0 ? 0 : WelsCPUFeatureDetect (NULL);
- WelsInitMcFuncs (&sMcFunc, uiCpuFlag);
+ InitMcFunc (&sMcFunc, uiCpuFlag);
MCHalfPelFilterAnchor (uAnchors[1], uAnchors[2], uAnchors[3], uAnchors[0], MC_BUFF_SRC_STRIDE, width, height, pBuf + 4);
sMcFunc.pfLumaHalfpelHor (&uSrcTest[4][4], MC_BUFF_SRC_STRIDE, uDstTest[0], MC_BUFF_DST_STRIDE, width + 1, height);