ref: f05d67067c63b08f5886b2f961944d990bdb0a8c
parent: 41a58e644010c51d0c1a0b4380bc974295718a6f
author: Luc Trudeau <[email protected]>
date: Wed Mar 25 19:12:54 EDT 2020
Extract sub_h4 out of inner loop Also contains const correctness changes.
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -777,8 +777,8 @@
const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
for (int init_y = 0; init_y < h4; init_y += 16) {
+ const int sub_h4 = imin(h4, 16 + init_y);
for (int init_x = 0; init_x < w4; init_x += 16) {
- const int sub_h4 = imin(h4, 16 + init_y);
const int sub_w4 = imin(w4, init_x + 16);
int y_off = !!init_y, y, x;
for (y = init_y, t->by += init_y; y < sub_h4;
@@ -932,8 +932,8 @@
} else {
assert(refp != &f->sr_cur);
- int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
- int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
+ const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
+ const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
#define scale_mv(res, val, scale) do { \
const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32; \
@@ -1147,6 +1147,8 @@
const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
for (int init_y = 0; init_y < h4; init_y += 16) {
+ const int sub_h4 = imin(h4, 16 + init_y);
+ const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
for (int init_x = 0; init_x < w4; init_x += 16) {
if (b->pal_sz[0]) {
pixel *dst = ((pixel *) f->cur.data[0]) +
@@ -1177,7 +1179,6 @@
const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
int y, x;
- const int sub_h4 = imin(h4, 16 + init_y);
const int sub_w4 = imin(w4, init_x + 16);
for (y = init_y, t->by += init_y; y < sub_h4;
y += t_dim->h, t->by += t_dim->h)
@@ -1345,8 +1346,8 @@
hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
}
} else if (b->pal_sz[1]) {
- ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
- (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
+ const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
+ (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
const uint16_t (*pal)[8];
const uint8_t *pal_idx;
if (f->frame_thread.pass) {
@@ -1384,7 +1385,6 @@
const int uv_sb_has_bl =
init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
- const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
for (int pl = 0; pl < 2; pl++) {
for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
@@ -1520,7 +1520,7 @@
}
int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize bs,
- const Av1Block *const b)
+ const Av1Block *const b)
{
Dav1dTileState *const ts = t->ts;
const Dav1dFrameContext *const f = t->f;