ref: 0941aece66b70bfc2b4704cb3ec98068eba1fc52
parent: 08da01aa555d8786b0db35cb8f394177f7309426
author: Ronald S. Bultje <[email protected]>
date: Fri Nov 2 15:33:00 EDT 2018
Simplify blend() by making intermediate buffer contiguous
--- a/src/mc.h
+++ b/src/mc.h
@@ -82,7 +82,7 @@
#define decl_blend_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
- const pixel *tmp, ptrdiff_t tmp_stride, int w, int h, \
+ const pixel *tmp, int w, int h, \
const uint8_t *mask, ptrdiff_t mstride)
typedef decl_blend_fn(*blend_fn);
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -373,8 +373,7 @@
}
static void blend_c(pixel *dst, const ptrdiff_t dst_stride,
- const pixel *tmp, const ptrdiff_t tmp_stride,
- const int w, const int h,
+ const pixel *tmp, const int w, const int h,
const uint8_t *mask, const ptrdiff_t m_stride)
{
for (int y = 0; y < h; y++) {
@@ -383,7 +382,7 @@
dst[x] = blend_px(dst[x], tmp[x], mask[m_stride == 1 ? 0 : x]);
}
dst += PXSTRIDE(dst_stride);
- tmp += PXSTRIDE(tmp_stride);
+ tmp += w;
mask += m_stride;
}
}
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -589,15 +589,14 @@
dav1d_block_dimensions[sbtype_to_bs[a_r->sb_type]];
if (a_r->ref[0] > 0) {
- mc(t, lap, NULL, 128 * sizeof(pixel),
- iclip(a_b_dim[0], 2, b_dim[0]), imin(b_dim[1], 16) >> 1,
+ const int ow4 = iclip(a_b_dim[0], 2, b_dim[0]);
+ const int oh4 = imin(b_dim[1], 16) >> 1;
+ mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, oh4,
t->bx + x, t->by, pl, a_r->mv[0],
&f->refp[a_r->ref[0] - 1],
dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
- f->dsp->mc.blend(&dst[x * h_mul], dst_stride,
- lap, 128 * sizeof(pixel),
- h_mul * iclip(a_b_dim[0], 2, b_dim[0]),
- v_mul * imin(b_dim[1], 16) >> 1,
+ f->dsp->mc.blend(&dst[x * h_mul], dst_stride, lap,
+ h_mul * ow4, v_mul * oh4,
obmc_masks[imin(b_dim[3], 4) - ss_ver], 1);
i++;
}
@@ -613,16 +612,14 @@
dav1d_block_dimensions[sbtype_to_bs[l_r->sb_type]];
if (l_r->ref[0] > 0) {
- mc(t, lap, NULL, 32 * sizeof(pixel),
- imin(b_dim[0], 16) >> 1,
- iclip(l_b_dim[1], 2, b_dim[1]),
+ const int ow4 = imin(b_dim[0], 16) >> 1;
+ const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
+ mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
t->bx, t->by + y, pl, l_r->mv[0],
&f->refp[l_r->ref[0] - 1],
dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)], dst_stride,
- lap, 32 * sizeof(pixel),
- h_mul * imin(b_dim[0], 16) >> 1,
- v_mul * iclip(l_b_dim[1], 2, b_dim[1]),
+ lap, h_mul * ow4, v_mul * oh4,
obmc_masks[imin(b_dim[2], 4) - ss_hor], 0);
i++;
}
@@ -1127,7 +1124,7 @@
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
dav1d_wedge_masks[bs][0][0][b->wedge_idx];
- dsp->mc.blend(dst, f->cur.p.stride[0], tmp, bw4 * 4 * sizeof(pixel),
+ dsp->mc.blend(dst, f->cur.p.stride[0], tmp,
bw4 * 4, bh4 * 4, ii_mask, bw4 * 4);
}
@@ -1247,7 +1244,7 @@
&angle, cbw4, cbh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
tl_edge, cbw4 * 4, cbh4 * 4, 0);
- dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp, cbw4 * 4 * sizeof(pixel),
+ dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask, cbw4 * 4);
}
}