shithub: dav1d

Download patch

ref: 5bc8a5002beadaa195fa99df46f10a793055a533
parent: 2653292c7f5f4d020c3a621427c284edfdb762b2
author: Martin Storsjö <[email protected]>
date: Wed Feb 5 05:43:10 EST 2020

arm: looprestoration: Prepare for 16bpc wiener filter by adding _8bpc to function names

--- a/src/arm/32/looprestoration.S
+++ b/src/arm/32/looprestoration.S
@@ -28,11 +28,11 @@
 #include "src/arm/asm.S"
 #include "util.S"
 
-// void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
-//                                 const pixel *src, ptrdiff_t stride,
-//                                 const int16_t fh[7], const intptr_t w,
-//                                 int h, enum LrEdgeFlags edges);
-function wiener_filter_h_neon, export=1
+// void dav1d_wiener_filter_h_8bpc_neon(int16_t *dst, const pixel (*left)[4],
+//                                      const pixel *src, ptrdiff_t stride,
+//                                      const int16_t fh[7], const intptr_t w,
+//                                      int h, enum LrEdgeFlags edges);
+function wiener_filter_h_8bpc_neon, export=1
         push            {r4-r11,lr}
         vpush           {q4}
         ldrd            r4,  r5,  [sp, #52]
@@ -367,11 +367,11 @@
 .purgem filter_4
 endfunc
 
-// void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
-//                                 const int16_t *mid, int w, int h,
-//                                 const int16_t fv[7], enum LrEdgeFlags edges,
-//                                 ptrdiff_t mid_stride);
-function wiener_filter_v_neon, export=1
+// void dav1d_wiener_filter_v_8bpc_neon(pixel *dst, ptrdiff_t stride,
+//                                      const int16_t *mid, int w, int h,
+//                                      const int16_t fv[7], enum LrEdgeFlags edges,
+//                                      ptrdiff_t mid_stride);
+function wiener_filter_v_8bpc_neon, export=1
         push            {r4-r7,lr}
         ldrd            r4,  r5,  [sp, #20]
         ldrd            r6,  r7,  [sp, #28]
@@ -548,9 +548,9 @@
 .purgem filter
 endfunc
 
-// void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
-//                             const pixel *src, int w, int h);
-function copy_narrow_neon, export=1
+// void dav1d_copy_narrow_8bpc_neon(pixel *dst, ptrdiff_t stride,
+//                                  const pixel *src, int w, int h);
+function copy_narrow_8bpc_neon, export=1
         push            {r4,lr}
         ldr             r4, [sp, #8]
         adr             r12, L(copy_narrow_tbl)
--- a/src/arm/64/looprestoration.S
+++ b/src/arm/64/looprestoration.S
@@ -28,11 +28,11 @@
 #include "src/arm/asm.S"
 #include "util.S"
 
-// void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
-//                                 const pixel *src, ptrdiff_t stride,
-//                                 const int16_t fh[7], const intptr_t w,
-//                                 int h, enum LrEdgeFlags edges);
-function wiener_filter_h_neon, export=1
+// void dav1d_wiener_filter_h_8bpc_neon(int16_t *dst, const pixel (*left)[4],
+//                                      const pixel *src, ptrdiff_t stride,
+//                                      const int16_t fh[7], const intptr_t w,
+//                                      int h, enum LrEdgeFlags edges);
+function wiener_filter_h_8bpc_neon, export=1
         mov             w8,  w5
         ld1             {v0.8h},  [x4]
         mov             w9,  #(1 << 14) - (1 << 2)
@@ -306,11 +306,11 @@
 .purgem filter
 endfunc
 
-// void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
-//                                 const int16_t *mid, int w, int h,
-//                                 const int16_t fv[7], enum LrEdgeFlags edges,
-//                                 ptrdiff_t mid_stride);
-function wiener_filter_v_neon, export=1
+// void dav1d_wiener_filter_v_8bpc_neon(pixel *dst, ptrdiff_t stride,
+//                                      const int16_t *mid, int w, int h,
+//                                      const int16_t fv[7], enum LrEdgeFlags edges,
+//                                      ptrdiff_t mid_stride);
+function wiener_filter_v_8bpc_neon, export=1
         mov             w8,  w4
         ld1             {v0.8h},  [x5]
         movi            v1.8h, #128
@@ -482,9 +482,9 @@
 .purgem filter
 endfunc
 
-// void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
-//                             const pixel *src, int w, int h);
-function copy_narrow_neon, export=1
+// void dav1d_copy_narrow_8bpc_neon(pixel *dst, ptrdiff_t stride,
+//                                  const pixel *src, int w, int h);
+function copy_narrow_8bpc_neon, export=1
         adr             x5,  L(copy_narrow_tbl)
         ldrh            w6,  [x5, w3, uxtw #1]
         sub             x5,  x5,  w6, uxth
--- a/src/arm/looprestoration_init_tmpl.c
+++ b/src/arm/looprestoration_init_tmpl.c
@@ -41,10 +41,10 @@
 // Compared to the reference C version, this is the output of the first pass
 // _subtracted_ by 1 << (bitdepth + 6 - round_bits_h) = 2048, i.e.
 // with round_offset precompensated.
-void dav1d_wiener_filter_h_neon(int16_t *dst, const pixel (*left)[4],
-                                const pixel *src, ptrdiff_t stride,
-                                const int16_t fh[7], const intptr_t w,
-                                int h, enum LrEdgeFlags edges);
+void BF(dav1d_wiener_filter_h, neon)(int16_t *dst, const pixel (*left)[4],
+                                     const pixel *src, ptrdiff_t stride,
+                                     const int16_t fh[7], const intptr_t w,
+                                     int h, enum LrEdgeFlags edges);
 // This calculates things slightly differently than the reference C version.
 // This version calculates roughly this:
 // fv[3] += 128;
@@ -53,12 +53,12 @@
 //     sum += mid[idx] * fv[i];
 // sum = (sum + rounding_off_v) >> round_bits_v;
 // This function assumes that the width is a multiple of 8.
-void dav1d_wiener_filter_v_neon(pixel *dst, ptrdiff_t stride,
-                                const int16_t *mid, int w, int h,
-                                const int16_t fv[7], enum LrEdgeFlags edges,
-                                ptrdiff_t mid_stride);
-void dav1d_copy_narrow_neon(pixel *dst, ptrdiff_t stride,
-                            const pixel *src, int w, int h);
+void BF(dav1d_wiener_filter_v, neon)(pixel *dst, ptrdiff_t stride,
+                                     const int16_t *mid, int w, int h,
+                                     const int16_t fv[7], enum LrEdgeFlags edges,
+                                     ptrdiff_t mid_stride);
+void BF(dav1d_copy_narrow, neon)(pixel *dst, ptrdiff_t stride,
+                                 const pixel *src, int w, int h);
 
 static void wiener_filter_neon(pixel *const dst, const ptrdiff_t dst_stride,
                                const pixel (*const left)[4],
@@ -70,27 +70,29 @@
     int mid_stride = (w + 7) & ~7;
 
     // Horizontal filter
-    dav1d_wiener_filter_h_neon(&mid[2 * mid_stride], left, dst, dst_stride,
-                               fh, w, h, edges);
+    BF(dav1d_wiener_filter_h, neon)(&mid[2 * mid_stride], left, dst, dst_stride,
+                                    fh, w, h, edges);
     if (edges & LR_HAVE_TOP)
-        dav1d_wiener_filter_h_neon(mid, NULL, lpf, lpf_stride,
-                                   fh, w, 2, edges);
+        BF(dav1d_wiener_filter_h, neon)(mid, NULL, lpf, lpf_stride,
+                                        fh, w, 2, edges);
     if (edges & LR_HAVE_BOTTOM)
-        dav1d_wiener_filter_h_neon(&mid[(2 + h) * mid_stride], NULL,
-                                   lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
-                                   fh, w, 2, edges);
+        BF(dav1d_wiener_filter_h, neon)(&mid[(2 + h) * mid_stride], NULL,
+                                        lpf + 6 * PXSTRIDE(lpf_stride),
+                                        lpf_stride, fh, w, 2, edges);
 
     // Vertical filter
     if (w >= 8)
-        dav1d_wiener_filter_v_neon(dst, dst_stride, &mid[2*mid_stride],
-                                   w & ~7, h, fv, edges, mid_stride * sizeof(*mid));
+        BF(dav1d_wiener_filter_v, neon)(dst, dst_stride, &mid[2*mid_stride],
+                                        w & ~7, h, fv, edges,
+                                        mid_stride * sizeof(*mid));
     if (w & 7) {
         // For uneven widths, do a full 8 pixel wide filtering into a temp
         // buffer and copy out the narrow slice of pixels separately into dest.
         ALIGN_STK_16(pixel, tmp, 64 * 8,);
-        dav1d_wiener_filter_v_neon(tmp, w & 7, &mid[2*mid_stride + (w & ~7)],
-                                   w & 7, h, fv, edges, mid_stride * sizeof(*mid));
-        dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, tmp, w & 7, h);
+        BF(dav1d_wiener_filter_v, neon)(tmp, w & 7, &mid[2*mid_stride + (w & ~7)],
+                                        w & 7, h, fv, edges,
+                                        mid_stride * sizeof(*mid));
+        BF(dav1d_copy_narrow, neon)(dst + (w & ~7), dst_stride, tmp, w & 7, h);
     }
 }
 
@@ -211,8 +213,8 @@
             dav1d_sgr_weighted1_neon(stripe, w & 7, dst + (w & ~7), dst_stride,
                                      tmp + (w & ~7), w & 7, h,
                                      (1 << 7) - sgr_wt[1]);
-            dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, stripe,
-                                   w & 7, h);
+            BF(dav1d_copy_narrow, neon)(dst + (w & ~7), dst_stride, stripe,
+                                        w & 7, h);
         }
     } else if (!dav1d_sgr_params[sgr_idx][1]) {
         ALIGN_STK_16(coef, tmp, 64 * 384,);
@@ -228,8 +230,8 @@
             ALIGN_STK_16(pixel, stripe, 64 * 8,);
             dav1d_sgr_weighted1_neon(stripe, w & 7, dst + (w & ~7), dst_stride,
                                      tmp + (w & ~7), w & 7, h, sgr_wt[0]);
-            dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, stripe,
-                                   w & 7, h);
+            BF(dav1d_copy_narrow, neon)(dst + (w & ~7), dst_stride, stripe,
+                                        w & 7, h);
         }
     } else {
         ALIGN_STK_16(coef, tmp1, 64 * 384,);
@@ -250,8 +252,8 @@
             dav1d_sgr_weighted2_neon(stripe, w & 7, dst + (w & ~7), dst_stride,
                                      tmp1 + (w & ~7), tmp2 + (w & ~7),
                                      w & 7, h, wt);
-            dav1d_copy_narrow_neon(dst + (w & ~7), dst_stride, stripe,
-                                   w & 7, h);
+            BF(dav1d_copy_narrow, neon)(dst + (w & ~7), dst_stride, stripe,
+                                        w & 7, h);
         }
     }
 }