ref: 910c64ef22eab59856acfafbcfac8a14ed13b7cf
parent: 5397265021c119543a85879749e80747f0dc9092
author: HFVideoMac <[email protected]>
date: Tue Jul 22 09:06:34 EDT 2014
add ARM64 Adaptative Quantization code and UT
--- a/Makefile
+++ b/Makefile
@@ -89,6 +89,7 @@
PROCESSING_INCLUDES += \
-I$(SRC_PATH)codec/processing/interface \
-I$(SRC_PATH)codec/processing/src/common \
+ -I$(SRC_PATH)codec/processing/src/adaptivequantization \
-I$(SRC_PATH)codec/processing/src/scrolldetection
GTEST_INCLUDES += \
--- a/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj
+++ b/codec/build/iOS/processing/processing.xcodeproj/project.pbxproj
@@ -31,6 +31,7 @@
549947F2196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D5196A3FB400BA3D87 /* ScrollDetectionFuncs.cpp */; };
549947F3196A3FB400BA3D87 /* vaacalcfuncs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */; };
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 549947D9196A3FB400BA3D87 /* vaacalculation.cpp */; };
+ 6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */ = {isa = PBXBuildFile; fileRef = 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@@ -91,6 +92,7 @@
549947D8196A3FB400BA3D87 /* vaacalcfuncs.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalcfuncs.cpp; sourceTree = "<group>"; };
549947D9196A3FB400BA3D87 /* vaacalculation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vaacalculation.cpp; sourceTree = "<group>"; };
549947DA196A3FB400BA3D87 /* vaacalculation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vaacalculation.h; sourceTree = "<group>"; };
+ 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.asm; name = adaptive_quantization_aarch64_neon.S; path = arm64/adaptive_quantization_aarch64_neon.S; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -108,6 +110,7 @@
4CC6094D197E008B00BE8B8B /* arm64 */ = {
isa = PBXGroup;
children = (
+ 6C749B77197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S */,
4CC6094E197E009D00BE8B8B /* down_sample_aarch64_neon.S */,
);
name = arm64;
@@ -337,6 +340,7 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
+ 6C749B78197E2A2000A111F9 /* adaptive_quantization_aarch64_neon.S in Sources */,
549947F4196A3FB400BA3D87 /* vaacalculation.cpp in Sources */,
549947E9196A3FB400BA3D87 /* ComplexityAnalysis.cpp in Sources */,
549947E3196A3FB400BA3D87 /* vaa_calc_neon.S in Sources */,
--- a/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp
+++ b/codec/processing/src/adaptivequantization/AdaptiveQuantization.cpp
@@ -235,6 +235,11 @@
pfVar = SampleVariance16x16_neon;
}
#endif
+#ifdef HAVE_NEON_AARCH64
+ if (iCpuFlag & WELS_CPU_NEON) {
+ pfVar = SampleVariance16x16_AArch64_neon;
+ }
+#endif
}
void SampleVariance16x16_c (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride,
--- a/codec/processing/src/adaptivequantization/AdaptiveQuantization.h
+++ b/codec/processing/src/adaptivequantization/AdaptiveQuantization.h
@@ -68,6 +68,12 @@
WELSVP_EXTERN_C_END
#endif
+#ifdef HAVE_NEON_AARCH64
+WELSVP_EXTERN_C_BEGIN
+VarFunc SampleVariance16x16_AArch64_neon;
+WELSVP_EXTERN_C_END
+#endif
+
class CAdaptiveQuantization : public IStrategy {
public:
CAdaptiveQuantization (int32_t iCpuFlag);
--- /dev/null
+++ b/codec/processing/src/arm64/adaptive_quantization_aarch64_neon.S
@@ -1,0 +1,88 @@
+/*!
+ * \copy
+ * Copyright (c) 2013, Cisco Systems
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifdef HAVE_NEON_AARCH64
+.text
+#include "arm_arch64_common_macro.S"
+WELS_ASM_AARCH64_FUNC_BEGIN SampleVariance16x16_AArch64_neon
+ ld1 {v1.16b}, [x0], x1 //save the ref data (16bytes)
+ ld1 {v0.16b}, [x2], x3 //save the src data (16bytes)
+ uabd v2.16b, v0.16b, v1.16b
+ umull v3.8h, v2.8b, v2.8b
+ umull2 v4.8h, v2.16b, v2.16b
+ uaddlp v4.4s, v4.8h
+ uadalp v4.4s, v3.8h //sqr
+ uaddlp v2.8h, v2.16b //sum
+
+ uaddlp v1.8h, v0.16b //sum_cur
+
+ umull v3.8h, v0.8b, v0.8b
+ umull2 v5.8h, v0.16b, v0.16b
+ uaddlp v3.4s, v3.8h
+ uadalp v3.4s, v5.8h //sqr_cur
+.rept 15
+ ld1 {v5.16b}, [x0], x1 //save the ref data (16bytes)
+ ld1 {v0.16b}, [x2], x3 //save the src data (16bytes)
+
+ uabd v6.16b, v0.16b, v5.16b
+
+ //v1 save sum_cur
+ uadalp v1.8h, v0.16b
+
+ //v4 save sqr
+ umull v5.8h, v6.8b, v6.8b
+ umull2 v7.8h, v6.16b, v6.16b
+ uadalp v4.4s, v5.8h //sqr
+ uadalp v4.4s, v7.8h //sqr
+
+ //v2 save sum
+ uadalp v2.8h, v6.16b
+
+ //v3 save sqr_cur
+ umull v5.8h, v0.8b, v0.8b
+ umull2 v7.8h, v0.16b, v0.16b
+ uadalp v3.4s, v5.8h //sqr_cur
+ uadalp v3.4s, v7.8h //sqr_cur
+.endr
+ uaddlv s2, v2.8h //sum
+ uaddlv s1, v1.8h //sum_cur
+ ins v2.s[1], v1.s[0] // sum, sum_cur
+ shrn v2.4h, v2.4s, #8 // sum, sum_cur>>8
+ mul v2.4h, v2.4h, v2.4h//// sum*sum, sum_cur*sum_cur
+ uaddlv d4, v4.4s //sqr
+ uaddlv d3, v3.4s //sqr_cur
+ ins v4.s[1], v3.s[0] // sqr, sqr_cur
+ shrn v4.4h, v4.4s, #8 // sqr, sqr_cur>>8
+ sub v4.4h, v4.4h, v2.4h
+ st1 {v4.s}[0], [x4]
+WELS_ASM_AARCH64_FUNC_END
+#endif
--- a/codec/processing/targets.mk
+++ b/codec/processing/targets.mk
@@ -42,6 +42,7 @@
ifeq ($(ASM_ARCH), arm64)
PROCESSING_ASM_ARM64_SRCS=\
+ $(PROCESSING_SRCDIR)/src/arm64/adaptive_quantization_aarch64_neon.S\
$(PROCESSING_SRCDIR)/src/arm64/down_sample_aarch64_neon.S\
PROCESSING_OBJS += $(PROCESSING_ASM_ARM64_SRCS:.S=.$(OBJ))
--- /dev/null
+++ b/test/processing/ProcessUT_AdaptiveQuantization.cpp
@@ -1,0 +1,76 @@
+#include <gtest/gtest.h>
+#include <math.h>
+#include <string.h>
+#include "cpu.h"
+#include "cpu_core.h"
+#include "util.h"
+#include "macros.h"
+#include "IWelsVP.h"
+#include "AdaptiveQuantization.h"
+
+
+using namespace nsWelsVP;
+
+static void FillWithRandomData (uint8_t* p, int32_t Len) {
+ for (int32_t i = 0; i < Len; i++) {
+ p[i] = rand() % 256;
+ }
+}
+
+void SampleVariance16x16_ref (uint8_t* pRefY, int32_t iRefStride, uint8_t* pSrcY, int32_t iSrcStride,
+ SMotionTextureUnit* pMotionTexture) {
+ uint32_t uiCurSquare = 0, uiSquare = 0;
+ uint16_t uiCurSum = 0, uiSum = 0;
+
+ for (int32_t y = 0; y < MB_WIDTH_LUMA; y++) {
+ for (int32_t x = 0; x < MB_WIDTH_LUMA; x++) {
+ uint32_t uiDiff = WELS_ABS (pRefY[x] - pSrcY[x]);
+ uiSum += uiDiff;
+ uiSquare += uiDiff * uiDiff;
+
+ uiCurSum += pSrcY[x];
+ uiCurSquare += pSrcY[x] * pSrcY[x];
+ }
+ pRefY += iRefStride;
+ pSrcY += iSrcStride;
+ }
+
+ uiSum = uiSum >> 8;
+ pMotionTexture->uiMotionIndex = (uiSquare >> 8) - (uiSum * uiSum);
+
+ uiCurSum = uiCurSum >> 8;
+ pMotionTexture->uiTextureIndex = (uiCurSquare >> 8) - (uiCurSum * uiCurSum);
+}
+
+#define GENERATE_AQTEST(method) \
+TEST (AdaptiveQuantization, method) {\
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pRefY,32*16,16)\
+ ENFORCE_STACK_ALIGN_1D (uint8_t, pSrcY,48*16,16)\
+ SMotionTextureUnit pMotionTexture[2];\
+ FillWithRandomData (pRefY,32*16);\
+ FillWithRandomData (pSrcY,48*16);\
+ SampleVariance16x16_ref (pRefY,32,pSrcY,48,&pMotionTexture[0]);\
+ method(pRefY,32,pSrcY,48,&pMotionTexture[1]);\
+ ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
+ ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
+ memset (pRefY,0,32*16);\
+ memset (pSrcY,255,48*16);\
+ SampleVariance16x16_ref (pRefY,32,pSrcY,48,&pMotionTexture[0]);\
+ method(pRefY,32,pSrcY,48,&pMotionTexture[1]);\
+ ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
+ ASSERT_EQ(pMotionTexture[0].uiMotionIndex,pMotionTexture[1].uiMotionIndex);\
+}
+
+GENERATE_AQTEST (SampleVariance16x16_c)
+#if defined(X86_ASM)
+GENERATE_AQTEST (SampleVariance16x16_sse2)
+#endif
+
+#if defined(HAVE_NEON)
+GENERATE_AQTEST (SampleVariance16x16_neon)
+#endif
+
+#if defined(HAVE_NEON_AARCH64)
+GENERATE_AQTEST (SampleVariance16x16_AArch64_neon)
+#endif
+
--- a/test/processing/targets.mk
+++ b/test/processing/targets.mk
@@ -1,5 +1,6 @@
PROCESSING_UNITTEST_SRCDIR=test/processing
PROCESSING_UNITTEST_CPP_SRCS=\
+ $(PROCESSING_UNITTEST_SRCDIR)/ProcessUT_AdaptiveQuantization.cpp\
$(PROCESSING_UNITTEST_SRCDIR)/ProcessUT_ScrollDetection.cpp\
PROCESSING_UNITTEST_OBJS += $(PROCESSING_UNITTEST_CPP_SRCS:.cpp=.$(OBJ))