shithub: dav1d

Download patch

ref: 39d6c599352bff68038500756488e80f9cd31295
parent: b6b1394b06ea2cce03c9c97c77510cb8f2a207e2
author: Martin Storsjö <[email protected]>
date: Sat May 2 20:44:05 EDT 2020

arm64: itx: Simplify inv_txfm_horz_dct_32x8

Unify some loads and stores, avoiding some extra pointer moving.

--- a/src/arm/64/itx.S
+++ b/src/arm/64/itx.S
@@ -2085,11 +2085,9 @@
         transpose_8x8h  v31, v30, v29, v28, v27, v26, v25, v24, v4, v5
         transpose_8x8h  v23, v22, v21, v20, v19, v18, v17, v16, v4, v5
 .macro store2 r0, r1, shift
-        ld1             {v4.8h}, [x6], #16
-        ld1             {v5.8h}, [x6]
+        ld1             {v4.8h, v5.8h}, [x6]
         sqsub           v7.8h,   v4.8h,   \r0
         sqsub           v6.8h,   v5.8h,   \r1
-        sub             x6,  x6,  #16
         sqadd           v4.8h,   v4.8h,   \r0
         sqadd           v5.8h,   v5.8h,   \r1
         rev64           v6.8h,   v6.8h
@@ -2098,12 +2096,10 @@
         srshr           v5.8h,   v5.8h,   #\shift
         srshr           v6.8h,   v6.8h,   #\shift
         srshr           v7.8h,   v7.8h,   #\shift
-        st1             {v4.8h}, [x6], #16
         ext             v6.16b,  v6.16b,  v6.16b,  #8
-        st1             {v5.8h}, [x6], #16
+        st1             {v4.8h, v5.8h}, [x6], #32
         ext             v7.16b,  v7.16b,  v7.16b,  #8
-        st1             {v6.8h}, [x6], #16
-        st1             {v7.8h}, [x6], #16
+        st1             {v6.8h, v7.8h}, [x6], #32
 .endm
 
         store2          v31.8h,  v23.8h, \shift