shithub: dav1d

Download patch

ref: b252334a3a5c76cfc2e6d4e2a57f08d3dc551766
parent: 78d27b7d1c923f632bc266470436e7f46a940d70
author: Martin Storsjö <[email protected]>
date: Wed Dec 9 09:06:47 EST 2020

arm: loopfilter: Compare L != 0 before doing a splat

--- a/src/arm/32/loopfilter.S
+++ b/src/arm/32/loopfilter.S
@@ -783,11 +783,11 @@
         vld1.8          {d6[]}, [r5]   // sharp[1]
         sub             r5,  r5,  #8
         vbif            d1,  d0,  d3   // if (!l[0][0]) L = l[offset][0]
+        vtst.32         d2,  d1,  d2   // L != 0
         vmul.i32        d1,  d1,  d4   // L
 .ifc \type, y
         vdup.32         d15, r2        // vmask[2]
 .endif
-        vtst.32         d2,  d1,  d2   // L != 0
         vdup.32         d14, r7        // vmask[1]
         vmov            r10, r11, d2
         orrs            r10, r10, r11
--- a/src/arm/64/loopfilter.S
+++ b/src/arm/64/loopfilter.S
@@ -1034,11 +1034,11 @@
         ld1r            {v6.16b}, [x5]            // sharp[1]
         sub             x5,  x5,  #8
         bif             v1.16b,  v0.16b,  v3.16b  // if (!l[0][0]) L = l[offset][0]
+        cmtst           v2.4s,   v1.4s,   v2.4s   // L != 0
         mul             v1.4s,   v1.4s,   v4.4s   // L
 .ifc \type, y
         dup             v15.4s,  w2               // vmask[2]
 .endif
-        cmtst           v2.4s,   v1.4s,   v2.4s   // L != 0
         dup             v14.4s,  w7               // vmask[1]
         mov             x16, v2.d[0]
         mov             x17, v2.d[1]
--- a/src/arm/64/loopfilter16.S
+++ b/src/arm/64/loopfilter16.S
@@ -808,11 +808,11 @@
         ld1r            {v6.8b}, [x5]             // sharp[1]
         sub             x5,  x5,  #8
         bif             v1.8b,   v0.8b,   v3.8b   // if (!l[0][0]) L = l[offset][0]
+        cmtst           v2.2s,   v1.2s,   v2.2s   // L != 0
         mul             v1.2s,   v1.2s,   v4.2s   // L
 .ifc \type, y
         dup             v15.2s,  w2               // vmask[2]
 .endif
-        cmtst           v2.2s,   v1.2s,   v2.2s   // L != 0
         dup             v14.2s,  w7               // vmask[1]
         mov             x16, v2.d[0]
         cmp             x16, #0