shithub: openh264

Download patch

ref: fe57aa46df2ae9eb54a371d32a3472b31f69efd6
parent: e6c9eb9824f5624237e2c2ce3c7bbf108cc1848b
author: zhiliang wang <[email protected]>
date: Thu May 15 05:17:35 EDT 2014

Add gnu assembler support.

--- a/codec/encoder/core/arm64/pixel_neon_aarch64.S
+++ b/codec/encoder/core/arm64/pixel_neon_aarch64.S
@@ -62,6 +62,18 @@
     ld1     {v7.8b}, [x0], x1
 .endm
 
+.macro LOAD_16X8_1
+    ld1     {v0.16b}, [x0], x1
+    ld1     {v1.16b}, [x0], x1
+    ld1     {v2.16b}, [x0], x1
+    ld1     {v3.16b}, [x0], x1
+    ld1     {v4.16b}, [x0], x1
+    ld1     {v5.16b}, [x0], x1
+    ld1     {v6.16b}, [x0], x1
+    ld1     {v7.16b}, [x0], x1
+.endm
+
+#ifdef __APPLE__
 .macro LOAD_8X8_2
     ld1     {v16.8b}, [$0], x3
     ld1     {v17.8b}, [$0], x3
@@ -95,17 +107,6 @@
     uabal   v29.8h, v7.8b, v25.8b
 .endm
 
-.macro LOAD_16X8_1
-    ld1     {v0.16b}, [x0], x1
-    ld1     {v1.16b}, [x0], x1
-    ld1     {v2.16b}, [x0], x1
-    ld1     {v3.16b}, [x0], x1
-    ld1     {v4.16b}, [x0], x1
-    ld1     {v5.16b}, [x0], x1
-    ld1     {v6.16b}, [x0], x1
-    ld1     {v7.16b}, [x0], x1
-.endm
-
 .macro LOAD_16X8_2
     ld1     {v16.16b}, [$0], x3
     ld1     {v17.16b}, [$0], x3
@@ -154,6 +155,89 @@
     uabal   v29.8h, v7.8b, v25.8b
     uabal2  v29.8h, v7.16b,v25.16b
 .endm
+#else
+.macro LOAD_8X8_2 arg0
+    ld1     {v16.8b}, [\arg0], x3
+    ld1     {v17.8b}, [\arg0], x3
+    ld1     {v18.8b}, [\arg0], x3
+    ld1     {v19.8b}, [\arg0], x3
+    ld1     {v20.8b}, [\arg0], x3
+    ld1     {v21.8b}, [\arg0], x3
+    ld1     {v22.8b}, [\arg0], x3
+    ld1     {v23.8b}, [\arg0], x3
+.endm
+
+.macro CALC_ABS_8X8_1 arg0, arg1
+    uab\arg1\()l    \arg0, v0.8b, v16.8b
+    uabal   \arg0, v1.8b, v17.8b
+    uabal   \arg0, v2.8b, v18.8b
+    uabal   \arg0, v3.8b, v19.8b
+    uabal   \arg0, v4.8b, v20.8b
+    uabal   \arg0, v5.8b, v21.8b
+    uabal   \arg0, v6.8b, v22.8b
+    uabal   \arg0, v7.8b, v23.8b
+.endm
+
+.macro CALC_ABS_8X8_2 arg0
+    uab\arg0\()l    v29.8h, v0.8b, v18.8b
+    uabal   v29.8h, v1.8b, v19.8b
+    uabal   v29.8h, v2.8b, v20.8b
+    uabal   v29.8h, v3.8b, v21.8b
+    uabal   v29.8h, v4.8b, v22.8b
+    uabal   v29.8h, v5.8b, v23.8b
+    uabal   v29.8h, v6.8b, v24.8b
+    uabal   v29.8h, v7.8b, v25.8b
+.endm
+
+.macro LOAD_16X8_2 arg0
+    ld1     {v16.16b}, [\arg0], x3
+    ld1     {v17.16b}, [\arg0], x3
+    ld1     {v18.16b}, [\arg0], x3
+    ld1     {v19.16b}, [\arg0], x3
+    ld1     {v20.16b}, [\arg0], x3
+    ld1     {v21.16b}, [\arg0], x3
+    ld1     {v22.16b}, [\arg0], x3
+    ld1     {v23.16b}, [\arg0], x3
+.endm
+
+.macro CALC_ABS_16X8_1 arg0, arg1
+    uab\arg1\()l  \arg0, v0.8b, v16.8b
+    uabal2  \arg0, v0.16b,v16.16b
+    uabal   \arg0, v1.8b, v17.8b
+    uabal2  \arg0, v1.16b,v17.16b
+    uabal   \arg0, v2.8b, v18.8b
+    uabal2  \arg0, v2.16b,v18.16b
+    uabal   \arg0, v3.8b, v19.8b
+    uabal2  \arg0, v3.16b,v19.16b
+    uabal   \arg0, v4.8b, v20.8b
+    uabal2  \arg0, v4.16b,v20.16b
+    uabal   \arg0, v5.8b, v21.8b
+    uabal2  \arg0, v5.16b,v21.16b
+    uabal   \arg0, v6.8b, v22.8b
+    uabal2  \arg0, v6.16b,v22.16b
+    uabal   \arg0, v7.8b, v23.8b
+    uabal2  \arg0, v7.16b,v23.16b
+.endm
+
+.macro CALC_ABS_16X8_2 arg0
+    uab\arg0\()l  v29.8h, v0.8b, v18.8b
+    uabal2  v29.8h, v0.16b,v18.16b
+    uabal   v29.8h, v1.8b, v19.8b
+    uabal2  v29.8h, v1.16b,v19.16b
+    uabal   v29.8h, v2.8b, v20.8b
+    uabal2  v29.8h, v2.16b,v20.16b
+    uabal   v29.8h, v3.8b, v21.8b
+    uabal2  v29.8h, v3.16b,v21.16b
+    uabal   v29.8h, v4.8b, v22.8b
+    uabal2  v29.8h, v4.16b,v22.16b
+    uabal   v29.8h, v5.8b, v23.8b
+    uabal2  v29.8h, v5.16b,v23.16b
+    uabal   v29.8h, v6.8b, v24.8b
+    uabal2  v29.8h, v6.16b,v24.16b
+    uabal   v29.8h, v7.8b, v25.8b
+    uabal2  v29.8h, v7.16b,v25.16b
+.endm
+#endif
 
 WELS_ASM_ARCH64_FUNC_BEGIN WelsSampleSad4x4_AArch64_neon
     sxtw    x1, w1