shithub: dav1d

Download patch

ref: ff41197bc89fe06311cb07d0acf7e3cac76c6946
parent: e570088de116bbbbb0e24ae5b70c0927130e5964
author: Martin Storsjö <[email protected]>
date: Fri Oct 4 19:07:49 EDT 2019

arm64: mc: Schedule instructions better in the warp8x8 functions

Before:           Cortex A53     A72     A73
warp_8x8_8bpc_neon:   1997.3  1170.1  1199.9
warp_8x8t_8bpc_neon:  1982.4  1171.5  1192.6
After:
warp_8x8_8bpc_neon:   1954.6  1159.2  1153.3
warp_8x8t_8bpc_neon:  1938.5  1146.2  1136.7

--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -2975,7 +2975,9 @@
         ld1             {v16.8b, v17.8b}, [x2], x3
 
         load_filter_row d0, w12, w7
+        uxtl            v16.8h,  v16.8b
         load_filter_row d1, w12, w7
+        uxtl            v17.8h,  v17.8b
         load_filter_row d2, w12, w7
         sxtl            v0.8h,   v0.8b
         load_filter_row d3, w12, w7
@@ -2988,16 +2990,12 @@
         sxtl            v4.8h,   v4.8b
         load_filter_row d7, w12, w7
         sxtl            v5.8h,   v5.8b
-        sxtl            v6.8h,   v6.8b
-        sxtl            v7.8h,   v7.8b
-
-        uxtl            v16.8h,  v16.8b
-        uxtl            v17.8h,  v17.8b
-
         ext             v18.16b, v16.16b, v17.16b, #2*1
         mul             v23.8h,  v16.8h,  v0.8h
+        sxtl            v6.8h,   v6.8b
         ext             v19.16b, v16.16b, v17.16b, #2*2
         mul             v18.8h,  v18.8h,  v1.8h
+        sxtl            v7.8h,   v7.8b
         ext             v20.16b, v16.16b, v17.16b, #2*3
         mul             v19.8h,  v19.8h,  v2.8h
         ext             v21.16b, v16.16b, v17.16b, #2*4