shithub: dav1d

Download patch

ref: 8fb306575cef9c1882cfc693b1c47e68e27fc71a
parent: 8e8fb84dcda63e83671a41235f2d71e726a2e716
author: Martin Storsjö <[email protected]>
date: Sun Feb 9 18:14:22 EST 2020

arm: looprestoration: Improve scheduling in box3/5_h slightly

Set flags further from the branch instructions that use them.

--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -925,11 +925,11 @@
         vmull.u8        q6,  d9,  d9
 
         add3            4
+        subs            r5,  r5,  #4
         vst1.16         {d6},  [r1,  :64]!
         vst1.16         {d14}, [r11, :64]!
         vst1.32         {q12}, [r0,  :128]!
         vst1.32         {q8},  [r10, :128]!
-        subs            r5,  r5,  #4
         ble             9f
         vext.8          q0,  q0,  q0,  #4
         vext.8          q1,  q1,  q2,  #8
@@ -1215,11 +1215,11 @@
         vmull.u8        q6,  d9,  d9
 
         add5            4
+        subs            r5,  r5,  #4
         vst1.16         {d6},  [r1,  :64]!
         vst1.16         {d14}, [r11, :64]!
         vst1.32         {q12}, [r0,  :128]!
         vst1.32         {q10}, [r10, :128]!
-        subs            r5,  r5,  #4
         ble             9f
         vext.8          q0,  q0,  q0,  #4
         vext.8          q1,  q1,  q2,  #8
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -844,11 +844,11 @@
         umull2          v6.8h,   v4.16b,  v4.16b
 
         add3            4
+        subs            w5,  w5,  #4
         st1             {v3.4h},  [x1],  #8
         st1             {v7.4h},  [x11], #8
         st1             {v26.4s}, [x0],  #16
         st1             {v28.4s}, [x10], #16
-        subs            w5,  w5,  #4
         b.le            9f
         ext             v0.16b,  v0.16b,  v0.16b, #4
         ext             v4.16b,  v4.16b,  v4.16b, #4
@@ -1114,11 +1114,11 @@
         umull2          v6.8h,   v4.16b,  v4.16b
 
         add5            4
+        subs            w5,  w5,  #4
         st1             {v3.4h},  [x1],  #8
         st1             {v7.4h},  [x11], #8
         st1             {v26.4s}, [x0],  #16
         st1             {v28.4s}, [x10], #16
-        subs            w5,  w5,  #4
         b.le            9f
         ext             v0.16b,  v0.16b,  v0.16b, #4
         ext             v1.16b,  v1.16b,  v2.16b, #8