shithub: dav1d

Download patch

ref: 8bbcd3f7a7518808032be82260fd1ebb02337d2d
parent: 556780b7556b5be83ab49d75b23f74434f848132
author: Martin Storsjö <[email protected]>
date: Sun Apr 7 20:11:03 EDT 2019

arm: Add a _neon suffix to all internal functions

This eases disambiguating these functions when looking at perf
profiles.

--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -218,7 +218,7 @@
 // This has got the same signature as the put_8tap functions,
 // assumes that the caller has loaded the h argument into r5,
 // and assumes that r8 is set to (clz(w)-24).
-function put
+function put_neon
         adr             r9,  L(put_tbl)
         ldr             r8,  [r9, r8, lsl #2]
         add             r9,  r9,  r8
@@ -309,7 +309,7 @@
 // This has got the same signature as the put_8tap functions,
 // assumes that the caller has loaded the h argument into r4,
 // and assumes that r8 is set to (clz(w)-24), and r7 to w*2.
-function prep
+function prep_neon
         adr             r9,  L(prep_tbl)
         ldr             r8,  [r9, r8, lsl #2]
         add             r9,  r9,  r8
@@ -660,7 +660,7 @@
         push            {r4-r11,lr}
         movw            r8,  \type_h
         movw            r9,  \type_v
-        b               \op\()_8tap
+        b               \op\()_8tap_neon
 endfunc
 .endm
 
@@ -680,7 +680,7 @@
 make_8tap_fn \type, sharp_regular,  SHARP,   REGULAR
 make_8tap_fn \type, sharp_smooth,   SHARP,   SMOOTH
 
-function \type\()_8tap
+function \type\()_8tap_neon
         ldrd            r4,  r5,  [sp, #36]
         ldrd            r6,  r7,  [sp, #44]
         movw            r10,  #0x4081  // (1 << 14) | (1 << 7) | (1 << 0)
@@ -699,7 +699,7 @@
         bne             L(\type\()_8tap_h)
         tst             \my, #(0x7f << 14)
         bne             L(\type\()_8tap_v)
-        b               \type
+        b               \type\()_neon
 
 L(\type\()_8tap_h):
         cmp             \w,  #4
@@ -1831,7 +1831,7 @@
         bne             L(\type\()_bilin_h)
         cmp             \my, #0
         bne             L(\type\()_bilin_v)
-        b               \type
+        b               \type\()_neon
 
 L(\type\()_bilin_h):
         cmp             \my, #0
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -236,7 +236,7 @@
 
 // This has got the same signature as the put_8tap functions,
 // and assumes that x8 is set to (clz(w)-24).
-function put
+function put_neon
         adr             x9,  L(put_tbl)
         ldrh            w8,  [x9, x8, lsl #1]
         sub             x9,  x9,  w8, uxtw
@@ -331,7 +331,7 @@
 
 // This has got the same signature as the prep_8tap functions,
 // and assumes that x8 is set to (clz(w)-24), and x7 to w*2.
-function prep
+function prep_neon
         adr             x9,  L(prep_tbl)
         ldrh            w8,  [x9, x8, lsl #1]
         sub             x9,  x9,  w8, uxtw
@@ -703,7 +703,7 @@
 function \op\()_8tap_\type\()_8bpc_neon, export=1
         mov             x8,  \type_h
         mov             x9,  \type_v
-        b               \op\()_8tap
+        b               \op\()_8tap\()_neon
 endfunc
 .endm
 
@@ -723,7 +723,7 @@
 make_8tap_fn \type, sharp_regular,  SHARP,   REGULAR
 make_8tap_fn \type, sharp_smooth,   SHARP,   SMOOTH
 
-function \type\()_8tap
+function \type\()_8tap_neon
         mov             w10,  #0x4081  // (1 << 14) | (1 << 7) | (1 << 0)
         mul             \mx,  \mx, w10
         mul             \my,  \my, w10
@@ -741,7 +741,7 @@
         b.ne            L(\type\()_8tap_h)
         tst             \my, #(0x7f << 14)
         b.ne            L(\type\()_8tap_v)
-        b               \type
+        b               \type\()_neon
 
 L(\type\()_8tap_h):
         cmp             \w,  #4
@@ -1826,7 +1826,7 @@
         sub             w8,  w8,  #24
         cbnz            \mx, L(\type\()_bilin_h)
         cbnz            \my, L(\type\()_bilin_v)
-        b               \type
+        b               \type\()_neon
 
 L(\type\()_bilin_h):
         cbnz            \my, L(\type\()_bilin_hv)
@@ -2335,7 +2335,7 @@
         add             \src, \src, \inc
 .endm
 
-function warp_filter_horz
+function warp_filter_horz_neon
         add             w12, w5,  #512
 
         ld1             {v16.8b, v17.8b}, [x2], x3
@@ -2431,24 +2431,24 @@
         lsl             x1,  x1,  #1
 .endif
 
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v24.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v25.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v26.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v27.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v28.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v29.16b, v16.16b
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v30.16b, v16.16b
 
 1:
         add             w14, w6,  #512
-        bl              warp_filter_horz
+        bl              warp_filter_horz_neon
         mov             v31.16b, v16.16b
 
         load_filter_row d0, w14, w9