shithub: dav1d

Download patch

ref: 3aff1a37ced8af38a40366490ea2662e9703bd02
parent: 604e62c987a3185764f3ecf69ab3529afbed6e39
author: David Michael Barr <[email protected]>
date: Thu Sep 27 18:48:06 EDT 2018

Introduce single-plane variant of cfl_pred

In recon_b_intra, if both alpha values are non-zero then predict both
planes from AC and the first rows of DC, as before.
Otherwise, predict the zero-alpha plane directly by DC and the non-zero
plane by AC and the first row of DC.

--- a/src/ipred.c
+++ b/src/ipred.c
@@ -880,6 +880,38 @@
 cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
 
 static NOINLINE void
+cfl_pred_1_c(pixel *dst, const ptrdiff_t stride, const int16_t *ac,
+             const pixel *const dc_pred, const int8_t alpha,
+             const int width, const int height)
+{
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int diff = alpha * ac[x];
+            dst[x] = iclip_pixel(dc_pred[0] + apply_sign((abs(diff) + 32) >> 6,
+                                                         diff));
+        }
+        ac += width;
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define cfl_pred_1_fn(width) \
+static void cfl_pred_1_##width##xN_c(pixel *const dst, \
+                                     const ptrdiff_t stride, \
+                                     const int16_t *const ac, \
+                                     const pixel *const dc_pred, \
+                                     const int8_t alpha, \
+                                     const int height) \
+{ \
+    cfl_pred_1_c(dst, stride, ac, dc_pred, alpha, width, height); \
+}
+
+cfl_pred_1_fn( 4)
+cfl_pred_1_fn( 8)
+cfl_pred_1_fn(16)
+cfl_pred_1_fn(32)
+
+static NOINLINE void
 cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride,
            const int16_t *ac, const pixel *const dc_pred,
            const int8_t *const alphas, const int width, const int height)
@@ -1015,6 +1047,11 @@
     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
+
+    c->cfl_pred_1[0] = cfl_pred_1_4xN_c;
+    c->cfl_pred_1[1] = cfl_pred_1_8xN_c;
+    c->cfl_pred_1[2] = cfl_pred_1_16xN_c;
+    c->cfl_pred_1[3] = cfl_pred_1_32xN_c;
 
     c->cfl_pred[0] = cfl_pred_4xN_c;
     c->cfl_pred[1] = cfl_pred_8xN_c;
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -55,6 +55,17 @@
 typedef decl_cfl_ac_fn(*cfl_ac_fn);
 
 /*
+ * dst[x,y] = dc + alpha * ac[x,y]
+ * - alpha contains a q3 scalar in [-16,16] range;
+ * - dc_pred[] is the first line of the plane's DC prediction
+ */
+#define decl_cfl_pred_1_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, \
+            const int16_t *ac, const pixel *dc_pred, \
+            const int8_t alpha, const int height)
+typedef decl_cfl_pred_1_fn(*cfl_pred_1_fn);
+
+/*
  * dst[plane][x,y] = dc[plane] + alpha[plane] * ac[x,y]
  * - alphas contains two q3 scalars (one for each plane) in [-16,16] range;
  * - dc_pred[] is the first line of each plane's DC prediction, the second plane
@@ -80,6 +91,7 @@
 
     // chroma-from-luma
     cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
+    cfl_pred_1_fn cfl_pred_1[4];
     cfl_pred_fn cfl_pred[4];
 
     // palette
--- a/src/recon.c
+++ b/src/recon.c
@@ -870,8 +870,13 @@
                                                     top_sb_edge, DC_PRED, &angle,
                                                     cfl_uv_t_dim->w,
                                                     cfl_uv_t_dim->h, edge);
-                    dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
-                                                       0, edge, 0);
+                    if (b->cfl_alpha[pl] == 0) {
+                      dsp->ipred.intra_pred[cfl_uvtx][m](uv_dst[pl], stride,
+                                                         edge, 0);
+                    } else {
+                      dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
+                                                         0, edge, 0);
+                    }
                 }
                 const int furthest_r =
                     ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
@@ -881,11 +886,25 @@
                                  [cfl_uvtx](ac, y_src, f->cur.p.stride[0],
                                             cbw4 - (furthest_r >> ss_hor),
                                             cbh4 - (furthest_b >> ss_ver));
-                dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
-                                                      uv_dst[1], stride,
-                                                      ac, uv_pred,
-                                                      b->cfl_alpha,
-                                                      cbh4 * 4);
+                if (b->cfl_alpha[0] == 0) {
+                  dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[1],
+                                                          stride, ac,
+                                                          &uv_pred[32],
+                                                          b->cfl_alpha[1],
+                                                          cbh4 * 4);
+                } else if (b->cfl_alpha[1] == 0) {
+                  dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[0],
+                                                          stride, ac,
+                                                          uv_pred,
+                                                          b->cfl_alpha[0],
+                                                          cbh4 * 4);
+                } else {
+                  dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
+                                                        uv_dst[1], stride,
+                                                        ac, uv_pred,
+                                                        b->cfl_alpha,
+                                                        cbh4 * 4);
+                }
                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
                     ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
                     hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");