shithub: dav1d

Download patch

ref: 18ef9556b71e3b6b839c35ae614ef0bb5b6a2179
parent: c204da0ff33a0d563d6c632b42799e4fbc48f402
author: Martin Storsjö <[email protected]>
date: Wed Feb 27 06:29:14 EST 2019

arm: looprestoration: Simplify a few padding cases in wiener_filter_h_neon

--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -283,14 +283,12 @@
         .word 66f - L(variable_shift_tbl) + CONFIG_THUMB
         .word 77f - L(variable_shift_tbl) + CONFIG_THUMB
 
+44:     // 4 pixels valid in d2/d16, fill d3/d17 with padding.
+        vmov            d3,  d4
+        vmov            d17, d18
+        b               88f
         // Shift q1 right, shifting out invalid pixels,
         // shift q1 left to the original offset, shifting in padding pixels.
-44:     // 4 pixels valid
-        vext.8          q1,  q1,  q1,  #8
-        vext.8          q1,  q1,  q2,  #8
-        vext.8          q8,  q8,  q8,  #8
-        vext.8          q8,  q8,  q9,  #8
-        b               88f
 55:     // 5 pixels valid
         vext.8          q1,  q1,  q1,  #10
         vext.8          q1,  q1,  q2,  #6
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -224,14 +224,12 @@
         mov             v3.16b,  v28.16b
         mov             v5.16b,  v29.16b
         br              x11
+44:     // 4 pixels valid in v2/v4, fill the high half with padding.
+        ins             v2.d[1], v3.d[0]
+        ins             v4.d[1], v5.d[0]
+        b               88f
         // Shift v2 right, shifting out invalid pixels,
         // shift v2 left to the original offset, shifting in padding pixels.
-44:     // 4 pixels valid
-        ext             v2.16b,  v2.16b,  v2.16b,  #8
-        ext             v2.16b,  v2.16b,  v3.16b,  #8
-        ext             v4.16b,  v4.16b,  v4.16b,  #8
-        ext             v4.16b,  v4.16b,  v5.16b,  #8
-        b               88f
 55:     // 5 pixels valid
         ext             v2.16b,  v2.16b,  v2.16b,  #10
         ext             v2.16b,  v2.16b,  v3.16b,  #6
@@ -238,17 +236,13 @@
         ext             v4.16b,  v4.16b,  v4.16b,  #10
         ext             v4.16b,  v4.16b,  v5.16b,  #6
         b               88f
-66:     // 6 pixels valid
-        ext             v2.16b,  v2.16b,  v2.16b,  #12
-        ext             v2.16b,  v2.16b,  v3.16b,  #4
-        ext             v4.16b,  v4.16b,  v4.16b,  #12
-        ext             v4.16b,  v4.16b,  v5.16b,  #4
+66:     // 6 pixels valid, fill the upper 2 pixels with padding.
+        ins             v2.s[3], v3.s[0]
+        ins             v4.s[3], v5.s[0]
         b               88f
-77:     // 7 pixels valid
-        ext             v2.16b,  v2.16b,  v2.16b,  #14
-        ext             v2.16b,  v2.16b,  v3.16b,  #2
-        ext             v4.16b,  v4.16b,  v4.16b,  #14
-        ext             v4.16b,  v4.16b,  v5.16b,  #2
+77:     // 7 pixels valid, fill the last pixel with padding.
+        ins             v2.h[7], v3.h[0]
+        ins             v4.h[7], v5.h[0]
         b               88f
 
 L(variable_shift_tbl):