shithub: dav1d

Download patch

ref: f05d67067c63b08f5886b2f961944d990bdb0a8c
parent: 41a58e644010c51d0c1a0b4380bc974295718a6f
author: Luc Trudeau <[email protected]>
date: Wed Mar 25 19:12:54 EDT 2020

Extract sub_h4 out of inner loop

Also contains const correctness changes.

--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -777,8 +777,8 @@
     const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
 
     for (int init_y = 0; init_y < h4; init_y += 16) {
+        const int sub_h4 = imin(h4, 16 + init_y);
         for (int init_x = 0; init_x < w4; init_x += 16) {
-            const int sub_h4 = imin(h4, 16 + init_y);
             const int sub_w4 = imin(w4, init_x + 16);
             int y_off = !!init_y, y, x;
             for (y = init_y, t->by += init_y; y < sub_h4;
@@ -932,8 +932,8 @@
     } else {
         assert(refp != &f->sr_cur);
 
-        int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
-        int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
+        const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
+        const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
 #define scale_mv(res, val, scale) do { \
             const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
             res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
@@ -1147,6 +1147,8 @@
     const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
 
     for (int init_y = 0; init_y < h4; init_y += 16) {
+        const int sub_h4 = imin(h4, 16 + init_y);
+        const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
         for (int init_x = 0; init_x < w4; init_x += 16) {
             if (b->pal_sz[0]) {
                 pixel *dst = ((pixel *) f->cur.data[0]) +
@@ -1177,7 +1179,6 @@
             const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
                               intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
             int y, x;
-            const int sub_h4 = imin(h4, 16 + init_y);
             const int sub_w4 = imin(w4, init_x + 16);
             for (y = init_y, t->by += init_y; y < sub_h4;
                  y += t_dim->h, t->by += t_dim->h)
@@ -1345,8 +1346,8 @@
                     hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
                 }
             } else if (b->pal_sz[1]) {
-                ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
-                                           (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
+                const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
+                                              (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
                 const uint16_t (*pal)[8];
                 const uint8_t *pal_idx;
                 if (f->frame_thread.pass) {
@@ -1384,7 +1385,6 @@
             const int uv_sb_has_bl =
                 init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
                 intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
-            const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
             const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
             for (int pl = 0; pl < 2; pl++) {
                 for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
@@ -1520,7 +1520,7 @@
 }
 
 int bytefn(dav1d_recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize bs,
-                                 const Av1Block *const b)
+                                const Av1Block *const b)
 {
     Dav1dTileState *const ts = t->ts;
     const Dav1dFrameContext *const f = t->f;