ref: 3aff1a37ced8af38a40366490ea2662e9703bd02
parent: 604e62c987a3185764f3ecf69ab3529afbed6e39
author: David Michael Barr <[email protected]>
date: Thu Sep 27 18:48:06 EDT 2018
Introduce single-plane variant of cfl_pred In recon_b_intra, if both alpha values are non-zero then predict both planes from AC and the first rows of DC, as before. Otherwise, predict the zero-alpha plane directly by DC and the non-zero plane by AC and the first row of DC.
--- a/src/ipred.c
+++ b/src/ipred.c
@@ -880,6 +880,38 @@
cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
static NOINLINE void
+cfl_pred_1_c(pixel *dst, const ptrdiff_t stride, const int16_t *ac,
+ const pixel *const dc_pred, const int8_t alpha,
+ const int width, const int height)
+{
+ for (int y = 0; y < height; y++) {
+ for (int x = 0; x < width; x++) {
+ const int diff = alpha * ac[x];
+ dst[x] = iclip_pixel(dc_pred[0] + apply_sign((abs(diff) + 32) >> 6,
+ diff));
+ }
+ ac += width;
+ dst += PXSTRIDE(stride);
+ }
+}
+
+#define cfl_pred_1_fn(width) \
+static void cfl_pred_1_##width##xN_c(pixel *const dst, \
+ const ptrdiff_t stride, \
+ const int16_t *const ac, \
+ const pixel *const dc_pred, \
+ const int8_t alpha, \
+ const int height) \
+{ \
+ cfl_pred_1_c(dst, stride, ac, dc_pred, alpha, width, height); \
+}
+
+cfl_pred_1_fn( 4)
+cfl_pred_1_fn( 8)
+cfl_pred_1_fn(16)
+cfl_pred_1_fn(32)
+
+static NOINLINE void
cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride,
const int16_t *ac, const pixel *const dc_pred,
const int8_t *const alphas, const int width, const int height)
@@ -1015,6 +1047,11 @@
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
+
+ c->cfl_pred_1[0] = cfl_pred_1_4xN_c;
+ c->cfl_pred_1[1] = cfl_pred_1_8xN_c;
+ c->cfl_pred_1[2] = cfl_pred_1_16xN_c;
+ c->cfl_pred_1[3] = cfl_pred_1_32xN_c;
c->cfl_pred[0] = cfl_pred_4xN_c;
c->cfl_pred[1] = cfl_pred_8xN_c;
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -55,6 +55,17 @@
typedef decl_cfl_ac_fn(*cfl_ac_fn);
/*
+ * dst[x,y] = dc + alpha * ac[x,y]
+ * - alpha contains a q3 scalar in [-16,16] range;
+ * - dc_pred[] is the first line of the plane's DC prediction
+ */
+#define decl_cfl_pred_1_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, \
+ const int16_t *ac, const pixel *dc_pred, \
+ const int8_t alpha, const int height)
+typedef decl_cfl_pred_1_fn(*cfl_pred_1_fn);
+
+/*
* dst[plane][x,y] = dc[plane] + alpha[plane] * ac[x,y]
* - alphas contains two q3 scalars (one for each plane) in [-16,16] range;
* - dc_pred[] is the first line of each plane's DC prediction, the second plane
@@ -80,6 +91,7 @@
// chroma-from-luma
cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
+ cfl_pred_1_fn cfl_pred_1[4];
cfl_pred_fn cfl_pred[4];
// palette
--- a/src/recon.c
+++ b/src/recon.c
@@ -870,8 +870,13 @@
top_sb_edge, DC_PRED, &angle,
cfl_uv_t_dim->w,
cfl_uv_t_dim->h, edge);
- dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
- 0, edge, 0);
+ if (b->cfl_alpha[pl] == 0) {
+ dsp->ipred.intra_pred[cfl_uvtx][m](uv_dst[pl], stride,
+ edge, 0);
+ } else {
+ dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
+ 0, edge, 0);
+ }
}
const int furthest_r =
((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
@@ -881,11 +886,25 @@
[cfl_uvtx](ac, y_src, f->cur.p.stride[0],
cbw4 - (furthest_r >> ss_hor),
cbh4 - (furthest_b >> ss_ver));
- dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
- uv_dst[1], stride,
- ac, uv_pred,
- b->cfl_alpha,
- cbh4 * 4);
+ if (b->cfl_alpha[0] == 0) {
+ dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[1],
+ stride, ac,
+ &uv_pred[32],
+ b->cfl_alpha[1],
+ cbh4 * 4);
+ } else if (b->cfl_alpha[1] == 0) {
+ dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[0],
+ stride, ac,
+ uv_pred,
+ b->cfl_alpha[0],
+ cbh4 * 4);
+ } else {
+ dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
+ uv_dst[1], stride,
+ ac, uv_pred,
+ b->cfl_alpha,
+ cbh4 * 4);
+ }
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");