ref: 8fb306575cef9c1882cfc693b1c47e68e27fc71a
parent: 8e8fb84dcda63e83671a41235f2d71e726a2e716
author: Martin Storsjö <[email protected]>
date: Sun Feb 9 18:14:22 EST 2020
arm: looprestoration: Improve scheduling in box3/5_h slightly Set flags further from the branch instructions that use them.
--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -925,11 +925,11 @@
vmull.u8 q6, d9, d9
add3 4
+ subs r5, r5, #4
vst1.16 {d6}, [r1, :64]!
vst1.16 {d14}, [r11, :64]!
vst1.32 {q12}, [r0, :128]!
vst1.32 {q8}, [r10, :128]!
- subs r5, r5, #4
ble 9f
vext.8 q0, q0, q0, #4
vext.8 q1, q1, q2, #8
@@ -1215,11 +1215,11 @@
vmull.u8 q6, d9, d9
add5 4
+ subs r5, r5, #4
vst1.16 {d6}, [r1, :64]!
vst1.16 {d14}, [r11, :64]!
vst1.32 {q12}, [r0, :128]!
vst1.32 {q10}, [r10, :128]!
- subs r5, r5, #4
ble 9f
vext.8 q0, q0, q0, #4
vext.8 q1, q1, q2, #8
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -844,11 +844,11 @@
umull2 v6.8h, v4.16b, v4.16b
add3 4
+ subs w5, w5, #4
st1 {v3.4h}, [x1], #8
st1 {v7.4h}, [x11], #8
st1 {v26.4s}, [x0], #16
st1 {v28.4s}, [x10], #16
- subs w5, w5, #4
b.le 9f
ext v0.16b, v0.16b, v0.16b, #4
ext v4.16b, v4.16b, v4.16b, #4
@@ -1114,11 +1114,11 @@
umull2 v6.8h, v4.16b, v4.16b
add5 4
+ subs w5, w5, #4
st1 {v3.4h}, [x1], #8
st1 {v7.4h}, [x11], #8
st1 {v26.4s}, [x0], #16
st1 {v28.4s}, [x10], #16
- subs w5, w5, #4
b.le 9f
ext v0.16b, v0.16b, v0.16b, #4
ext v1.16b, v1.16b, v2.16b, #8