shithub: dav1d

Download patch

ref: afe901a624990ad98090c1a15f9d599eaff3a4cf
parent: dfadb6df41acd02daddcae0f42c64f3e25fcb4b0
author: Henrik Gramner <[email protected]>
date: Wed Oct 9 19:14:16 EDT 2019

Simplify ipred_z C code

--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -324,44 +324,37 @@
     }
 }
 
-static int get_filter_strength(const unsigned blk_wh, const unsigned d,
-                               const int type)
-{
-    int strength = 0;
-
-    if (type == 0) {
-        if (blk_wh <= 8) {
-            if (d >= 56) strength = 1;
-        } else if (blk_wh <= 12) {
-            if (d >= 40) strength = 1;
-        } else if (blk_wh <= 16) {
-            if (d >= 40) strength = 1;
-        } else if (blk_wh <= 24) {
-            if (d >= 8) strength = 1;
-            if (d >= 16) strength = 2;
-            if (d >= 32) strength = 3;
-        } else if (blk_wh <= 32) {
-            if (d >= 1) strength = 1;
-            if (d >= 4) strength = 2;
-            if (d >= 32) strength = 3;
+static int get_filter_strength(const int wh, const int angle, const int is_sm) {
+    if (is_sm) {
+        if (wh <= 8) {
+            if (angle >= 64) return 2;
+            if (angle >= 40) return 1;
+        } else if (wh <= 16) {
+            if (angle >= 48) return 2;
+            if (angle >= 20) return 1;
+        } else if (wh <= 24) {
+            if (angle >=  4) return 3;
         } else {
-            if (d >= 1) strength = 3;
+            return 3;
         }
     } else {
-        if (blk_wh <= 8) {
-            if (d >= 40) strength = 1;
-            if (d >= 64) strength = 2;
-        } else if (blk_wh <= 16) {
-            if (d >= 20) strength = 1;
-            if (d >= 48) strength = 2;
-        } else if (blk_wh <= 24) {
-            if (d >= 4) strength = 3;
+        if (wh <= 8) {
+            if (angle >= 56) return 1;
+        } else if (wh <= 16) {
+            if (angle >= 40) return 1;
+        } else if (wh <= 24) {
+            if (angle >= 32) return 3;
+            if (angle >= 16) return 2;
+            if (angle >=  8) return 1;
+        } else if (wh <= 32) {
+            if (angle >= 32) return 3;
+            if (angle >=  4) return 2;
+            return 1;
         } else {
-            if (d >= 1) strength = 3;
+            return 3;
         }
     }
-
-    return strength;
+    return 0;
 }
 
 static void filter_edge(pixel *const out, const int sz,
@@ -451,12 +444,12 @@
     for (int y = 0, xpos = dx; y < height;
          y++, dst += PXSTRIDE(stride), xpos += dx)
     {
-        const int frac = (xpos >> 1) & 0x1F;
+        const int frac = xpos & 0x3E;
 
         for (int x = 0, base = xpos >> 6; x < width; x++, base += base_inc) {
             if (base < max_base_x) {
-                const int v = top[base] * (32 - frac) + top[base + 1] * frac;
-                dst[x] = iclip_pixel((v + 16) >> 5);
+                const int v = top[base] * (64 - frac) + top[base + 1] * frac;
+                dst[x] = (v + 32) >> 6;
             } else {
                 pixel_set(&dst[x], top[max_base_x], width - x);
                 break;
@@ -518,30 +511,29 @@
     }
     *topleft = *topleft_in;
 
-    const int min_base_x = -(1 + upsample_above);
     const int base_inc_x = 1 + upsample_above;
     const pixel *const left = &topleft[-(1 + upsample_left)];
-    const pixel *const top = &topleft[1 + upsample_above];
-    for (int y = 0, xpos = -dx; y < height;
+    for (int y = 0, xpos = ((1 + upsample_above) << 6) - dx; y < height;
          y++, xpos -= dx, dst += PXSTRIDE(stride))
     {
         int base_x = xpos >> 6;
-        const int frac_x = (xpos >> 1) & 0x1F;
+        const int frac_x = xpos & 0x3E;
 
         for (int x = 0, ypos = (y << (6 + upsample_left)) - dy; x < width;
              x++, base_x += base_inc_x, ypos -= dy)
         {
             int v;
-
-            if (base_x >= min_base_x) {
-                v = top[base_x] * (32 - frac_x) + top[base_x + 1] * frac_x;
+            if (base_x >= 0) {
+                v = topleft[base_x] * (64 - frac_x) +
+                    topleft[base_x + 1] * frac_x;
             } else {
                 const int base_y = ypos >> 6;
                 assert(base_y >= -(1 + upsample_left));
-                const int frac_y = (ypos >> 1) & 0x1F;
-                v = left[-base_y] * (32 - frac_y) + left[-(base_y + 1)] * frac_y;
+                const int frac_y = ypos & 0x3E;
+                v = left[-base_y] * (64 - frac_y) +
+                    left[-(base_y + 1)] * frac_y;
             }
-            dst[x] = iclip_pixel((v + 16) >> 5);
+            dst[x] = (v + 32) >> 6;
         }
     }
 }
@@ -588,13 +580,13 @@
     }
     const int base_inc = 1 + upsample_left;
     for (int x = 0, ypos = dy; x < width; x++, ypos += dy) {
-        const int frac = (ypos >> 1) & 0x1F;
+        const int frac = ypos & 0x3E;
 
         for (int y = 0, base = ypos >> 6; y < height; y++, base += base_inc) {
             if (base < max_base_y) {
-                const int v = left[-base] * (32 - frac) +
+                const int v = left[-base] * (64 - frac) +
                               left[-(base + 1)] * frac;
-                dst[y * PXSTRIDE(stride) + x] = iclip_pixel((v + 16) >> 5);
+                dst[y * PXSTRIDE(stride) + x] = (v + 32) >> 6;
             } else {
                 do {
                     dst[y * PXSTRIDE(stride) + x] = left[-max_base_y];
--- a/src/x86/ipred.asm
+++ b/src/x86/ipred.asm
@@ -1361,11 +1361,11 @@
     shr                 r5d, 6 ; base3
     movhps              xm0, [rsp+r5]
     vpblendd             m1, m2, 0xc0
-    pand                 m2, m4, m6 ; frac << 1
+    pand                 m2, m4, m6 ; frac
     vpblendd             m0, m1, 0xf0
-    psubw                m1, m5, m2 ; (32 - frac) << 1
+    psubw                m1, m5, m2 ; 64-frac
     psllw                m2, 8
-    por                  m1, m2     ; (32-frac, frac) << 1
+    por                  m1, m2     ; 64-frac, frac
     pmaddubsw            m0, m1
     paddw                m6, m7     ; xpos += dx
     pmulhrsw             m0, m3
@@ -1462,12 +1462,12 @@
     shr                 r5d, 6 ; base3
     movhps              xm0, [tlq+r5]
     vpblendd             m1, m2, 0xc0
-    pand                 m2, m4, m6 ; frac << 1
+    pand                 m2, m4, m6 ; frac
     vpblendd             m0, m1, 0xf0
-    psubw                m1, m5, m2 ; (32 - frac) << 1
+    psubw                m1, m5, m2 ; 64-frac
     psllw                m2, 8
     pshufb               m0, m8
-    por                  m1, m2     ; (32-frac, frac) << 1
+    por                  m1, m2     ; 64-frac, frac
     pmaddubsw            m0, m1
     pcmpgtw              m1, m9, m6 ; base < max_base_x
     pmulhrsw             m0, m3
@@ -2299,12 +2299,12 @@
     sar                  r5, 6 ; base3
     movhps              xm0, [tlq+r5]
     vpblendd             m1, m2, 0xc0
-    pand                 m2, m4, m6 ; frac << 1
+    pand                 m2, m4, m6 ; frac
     vpblendd             m0, m1, 0xf0
-    psubw                m1, m5, m2 ; (32 - frac) << 1
+    psubw                m1, m5, m2 ; 64-frac
     psllw                m2, 8
     pshufb               m0, m8
-    por                  m1, m2     ; (32-frac, frac) << 1
+    por                  m1, m2     ; 64-frac, frac
     pmaddubsw            m0, m1
     pcmpgtw              m1, m9, m6 ; base < max_base_y
     pmulhrsw             m0, m3