shithub: dav1d

--- a/src/cdef_apply_tmpl.c

+++ b/src/cdef_apply_tmpl.c

@@ -89,7 +89,7 @@

     const int sbsz = 16;

     const int sb64w = f->sb128w << 1;

     const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;

-    const enum Dav1dPixelLayout layout = f->cur.p.p.layout;

+    const enum Dav1dPixelLayout layout = f->cur.p.layout;

     const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;

     const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;

     const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;

@@ -106,7 +106,7 @@

         if (edges & HAVE_BOTTOM) {

             // backup pre-filter data for next iteration

-            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,

+            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.stride,

                          8, f->bw * 4, layout);

@@ -148,11 +148,11 @@

                 if (last_skip && edges & HAVE_LEFT) {

                     // we didn't backup the prefilter data because it wasn't

                     // there, so do it here instead

-                    backup2x8(lr_bak[bit], bptrs, f->cur.p.stride, 0, layout);

+                    backup2x8(lr_bak[bit], bptrs, f->cur.stride, 0, layout);

                 if (edges & HAVE_RIGHT) {

                     // backup pre-filter data for next iteration

-                    backup2x8(lr_bak[!bit], bptrs, f->cur.p.stride, 8, layout);

+                    backup2x8(lr_bak[!bit], bptrs, f->cur.stride, 8, layout);

                 // the actual filter

@@ -165,10 +165,10 @@

                 uv_sec_lvl += uv_sec_lvl == 3;

                 uv_sec_lvl <<= BITDEPTH - 8;

                 unsigned variance;

-                const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],

+                const int dir = dsp->cdef.dir(bptrs[0], f->cur.stride[0],

                                               &variance);

                 if (y_lvl) {

-                    dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0], lr_bak[bit][0],

+                    dsp->cdef.fb[0](bptrs[0], f->cur.stride[0], lr_bak[bit][0],

                                     (pixel *const [2]) {

                                         &f->lf.cdef_line_ptr[tf][0][0][bx * 4],

                                         &f->lf.cdef_line_ptr[tf][0][1][bx * 4],

@@ -179,10 +179,10 @@

                 if (uv_lvl && has_chroma) {

                     const int uvdir =

-                        f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :

+                        f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :

                         ((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];

                     for (int pl = 1; pl <= 2; pl++) {

-                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],

+                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.stride[1],

                                              lr_bak[bit][pl],

                                              (pixel *const [2]) {

                                                  &f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],

@@ -209,9 +209,9 @@

             iptrs[2] += sbsz * 4 >> ss_hor;

-        ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);

-        ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;

-        ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;

+        ptrs[0] += 8 * PXSTRIDE(f->cur.stride[0]);

+        ptrs[1] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;

+        ptrs[2] += 8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;

         f->lf.top_pre_cdef_toggle ^= 1;

--- a/src/decode.c

+++ b/src/decode.c

@@ -422,11 +422,11 @@

         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +

                             ((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];

     if (i < pal_sz) {

-        int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);

+        int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.bpc);

         if (i < pal_sz) {

-            int bits = f->cur.p.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);

-            const int max = (1 << f->cur.p.p.bpc) - 1;

+            int bits = f->cur.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);

+            const int max = (1 << f->cur.p.bpc) - 1;

             do {

                 const int delta = msac_decode_bools(&ts->msac, bits);

@@ -478,9 +478,9 @@

         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +

                             ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];

     if (msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) {

-        const int bits = f->cur.p.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);

-        int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);

-        const int max = (1 << f->cur.p.p.bpc) - 1;

+        const int bits = f->cur.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);

+        int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.bpc);

+        const int max = (1 << f->cur.p.bpc) - 1;

         for (int i = 1; i < b->pal_sz[1]; i++) {

             int delta = msac_decode_bools(&ts->msac, bits);

             if (delta && msac_decode_bool(&ts->msac, EC_BOOL_EPROB)) delta = -delta;

@@ -488,7 +488,7 @@

     } else {

         for (int i = 0; i < b->pal_sz[1]; i++)

-            pal[i] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);

+            pal[i] = msac_decode_bools(&ts->msac, f->cur.p.bpc);

     if (DEBUG_BLOCK_INFO) {

         printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);

@@ -634,7 +634,7 @@

         } else {

             assert(f->frame_hdr.txfm_mode == TX_LARGEST);

-        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];

+        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];

     } else {

         assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64);

         int y, x, y_off, x_off;

@@ -652,7 +652,7 @@

         if (DEBUG_BLOCK_INFO)

             printf("Post-vartxtree[%x/%x]: r=%d\n",

                    b->tx_split[0], b->tx_split[1], t->ts->msac.rng);

-        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];

+        b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];

@@ -694,8 +694,8 @@

         &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;

     const uint8_t *const b_dim = dav1d_block_dimensions[bs];

     const int bx4 = t->bx & 31, by4 = t->by & 31;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;

     const int bw4 = b_dim[0], bh4 = b_dim[1];

     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);

@@ -1138,7 +1138,7 @@

             t_dim = &dav1d_txfm_dimensions[TX_4X4];

         } else {

             b->tx = dav1d_max_txfm_size_for_bs[bs][0];

-            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.p.layout];

+            b->uvtx = dav1d_max_txfm_size_for_bs[bs][f->cur.p.layout];

             t_dim = &dav1d_txfm_dimensions[b->tx];

             if (f->frame_hdr.txfm_mode == TX_SWITCHABLE && t_dim->max > TX_4X4) {

                 const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4);

@@ -1166,7 +1166,7 @@

                                    &f->frame_hdr, (const uint8_t (*)[8][2])

                                    &ts->lflvl[b->seg_id][0][0][0],

                                    t->bx, t->by, f->w4, f->h4, bs,

-                                   b->tx, b->uvtx, f->cur.p.p.layout,

+                                   b->tx, b->uvtx, f->cur.p.layout,

                                    &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],

                                    has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,

                                    has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);

@@ -1543,7 +1543,7 @@

                 if (f->seq_hdr.jnt_comp) {

                     const int jnt_ctx =

                         get_jnt_comp_ctx(f->seq_hdr.order_hint_n_bits,

-                                         f->cur.p.poc, f->refp[b->ref[0]].p.poc,

+                                         f->cur.poc, f->refp[b->ref[0]].p.poc,

                                          f->refp[b->ref[1]].p.poc, t->a, &t->l,

                                          by4, bx4);

                     b->comp_type = COMP_INTER_WEIGHTED_AVG +

@@ -1833,7 +1833,7 @@

         dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride,

                                    &f->frame_hdr, lf_lvls, t->bx, t->by,

                                    f->w4, f->h4, b->skip, bs, b->tx_split,

-                                   b->uvtx, f->cur.p.p.layout,

+                                   b->uvtx, f->cur.p.layout,

                                    &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],

                                    has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,

                                    has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);

@@ -1938,7 +1938,7 @@

             const unsigned n_part = bl == BL_8X8 ? N_SUB8X8_PARTITIONS :

                 bl == BL_128X128 ? N_PARTITIONS - 2 : N_PARTITIONS;

             bp = msac_decode_symbol_adapt(&t->ts->msac, pc, n_part);

-            if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&

+            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&

                 (bp == PARTITION_V || bp == PARTITION_V4 ||

                  bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))

@@ -2139,7 +2139,7 @@

         } else {

             uint16_t cdf[2] = { gather_left_partition_prob(pc, bl), 0 };

             is_split = msac_decode_symbol(&t->ts->msac, cdf, 2);

-            if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)

+            if (f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)

                 return 1;

             if (DEBUG_BLOCK_INFO)

                 printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",

@@ -2230,12 +2230,30 @@

     ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);

     // Reference Restoration Unit (used for exp coding)

-    Av1Filter *const lf_mask =

-        f->lf.mask + (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;

-    const int unit_idx = ((ts->tiling.row_start & 16) >> 3) +

-                         ((ts->tiling.col_start & 16) >> 4);

+    int sb_idx, unit_idx;

+    if (f->frame_hdr.super_res.enabled) {

+        // vertical components only

+        sb_idx = (ts->tiling.row_start >> 5) * f->sr_sb128w;

+        unit_idx = (ts->tiling.row_start & 16) >> 3;

+    } else {

+        sb_idx = (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;

+        unit_idx = ((ts->tiling.row_start & 16) >> 3) +

+                   ((ts->tiling.col_start & 16) >> 4);

+    }

     for (int p = 0; p < 3; p++) {

-        ts->lr_ref[p] = &lf_mask->lr[p][unit_idx];

+        if (f->frame_hdr.super_res.enabled) {

+            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+            const int d = f->frame_hdr.super_res.width_scale_denominator;

+            const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];

+            const int rnd = (8 << unit_size_log2) - 1, shift = unit_size_log2 + 3;

+            const int x = ((4 * ts->tiling.col_start * d >> ss_hor) + rnd) >> shift;

+            const int px_x = x << (unit_size_log2 + ss_hor);

+            const int u_idx = unit_idx + ((px_x & 64) >> 1);

+            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx + (px_x >> 7)].lr[p][u_idx];

+        } else {

+            ts->lr_ref[p] = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];

+        }

         ts->lr_ref[p]->filter_v[0] = 3;

         ts->lr_ref[p]->filter_v[1] = -7;

         ts->lr_ref[p]->filter_v[2] = 15;

@@ -2250,6 +2268,87 @@

         atomic_init(&ts->progress, row_sb_start);

+static void read_restoration_info(Dav1dTileContext *const t,

+                                  Av1RestorationUnit *const lr, const int p,

+                                  const enum RestorationType frame_type)

+{

+    const Dav1dFrameContext *const f = t->f;

+    Dav1dTileState *const ts = t->ts;

+    if (frame_type == RESTORATION_SWITCHABLE) {

+        const int filter =

+            msac_decode_symbol_adapt(&ts->msac,

+                                     ts->cdf.m.restore_switchable, 3);

+        lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :

+                                          RESTORATION_WIENER :

+                            RESTORATION_NONE;

+    } else {

+        const unsigned type =

+            msac_decode_bool_adapt(&ts->msac,

+                                   frame_type == RESTORATION_WIENER ?

+                                       ts->cdf.m.restore_wiener :

+                                       ts->cdf.m.restore_sgrproj);

+        lr->type = type ? frame_type : RESTORATION_NONE;

+    }

+    if (lr->type == RESTORATION_WIENER) {

+        lr->filter_v[0] =

+            !p ? msac_decode_subexp(&ts->msac,

+                                    ts->lr_ref[p]->filter_v[0] + 5, 16,

+                                    1) - 5:

+                 0;

+        lr->filter_v[1] =

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->filter_v[1] + 23, 32,

+                               2) - 23;

+        lr->filter_v[2] =

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->filter_v[2] + 17, 64,

+                               3) - 17;

+        lr->filter_h[0] =

+            !p ? msac_decode_subexp(&ts->msac,

+                                    ts->lr_ref[p]->filter_h[0] + 5, 16,

+                                    1) - 5:

+                0;

+        lr->filter_h[1] =

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->filter_h[1] + 23, 32,

+                               2) - 23;

+        lr->filter_h[2] =

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->filter_h[2] + 17, 64,

+                               3) - 17;

+        memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));

+        ts->lr_ref[p] = lr;

+        if (DEBUG_BLOCK_INFO)

+            printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",

+                   p, lr->filter_v[0], lr->filter_v[1],

+                   lr->filter_v[2], lr->filter_h[0],

+                   lr->filter_h[1], lr->filter_h[2], ts->msac.rng);

+    } else if (lr->type == RESTORATION_SGRPROJ) {

+        const unsigned idx = msac_decode_bools(&ts->msac, 4);

+        lr->sgr_idx = idx;

+        lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->sgr_weights[0] + 96, 128,

+                               4) - 96 :

+            0;

+        lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?

+            msac_decode_subexp(&ts->msac,

+                               ts->lr_ref[p]->sgr_weights[1] + 32, 128,

+                               4) - 32 :

+            iclip(128 - lr->sgr_weights[0], -32, 95);

+        memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));

+        memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));

+        ts->lr_ref[p] = lr;

+        if (DEBUG_BLOCK_INFO)

+            printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",

+                   p, lr->sgr_idx, lr->sgr_weights[0],

+                   lr->sgr_weights[1], ts->msac.rng);

+    }

+}

 int dav1d_decode_tile_sbrow(Dav1dTileContext *const t) {

     const Dav1dFrameContext *const f = t->f;

     const enum BlockLevel root_bl = f->seq_hdr.sb128 ? BL_128X128 : BL_64X64;

@@ -2275,9 +2374,6 @@

         return 0;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     if (c->n_fc > 1 && f->frame_hdr.use_ref_frame_mvs) {

         for (int n = 0; n < 7; n++)

             if (dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step),

@@ -2311,99 +2407,52 @@

         for (int p = 0; p < 3; p++) {

             if (f->frame_hdr.restoration.type[p] == RESTORATION_NONE)

                 continue;

-            const int by = t->by >> (ss_ver & !!p);

-            const int bx = t->bx >> (ss_hor & !!p);

-            const int bh = f->bh >> (ss_ver & !!p);

-            const int bw = f->bw >> (ss_hor & !!p);

-            const int unit_size_log2 =

-                f->frame_hdr.restoration.unit_size[!!p];

-            // 4pel unit size

-            const int b_unit_size = 1 << (unit_size_log2 - 2);

-            const unsigned mask = b_unit_size - 1;

-            if (by & mask || bx & mask) continue;

-            const int half_unit = b_unit_size >> 1;

+            const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+            const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+            const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!p];

+            const int y = t->by * 4 >> ss_ver;

+            const int h = (f->cur.p.h + ss_ver) >> ss_ver;

+            const int unit_size = 1 << unit_size_log2;

+            const unsigned mask = unit_size - 1;

+            if (y & mask) continue;

+            const int half_unit = unit_size >> 1;

             // Round half up at frame boundaries, if there's more than one

             // restoration unit

-            const int bottom_round = by && by + half_unit > bh;

-            const int right_round = bx && bx + half_unit > bw;

-            if (bottom_round || right_round) continue;

-            const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);

-            Av1RestorationUnit *const lr = &t->lf_mask->lr[p][unit_idx];

-            const enum RestorationType frame_type =

-                f->frame_hdr.restoration.type[p];

+            if (y && y + half_unit > h) continue;

-            if (frame_type == RESTORATION_SWITCHABLE) {

-                const int filter =

-                    msac_decode_symbol_adapt(&ts->msac,

-                                             ts->cdf.m.restore_switchable, 3);

-                lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :

-                                                  RESTORATION_WIENER :

-                                    RESTORATION_NONE;

-            } else {

-                const unsigned type =

-                    msac_decode_bool_adapt(&ts->msac,

-                                           frame_type == RESTORATION_WIENER ?

-                                               ts->cdf.m.restore_wiener :

-                                               ts->cdf.m.restore_sgrproj);

-                lr->type = type ? frame_type : RESTORATION_NONE;

-            }

+            const enum RestorationType frame_type = f->frame_hdr.restoration.type[p];

-            if (lr->type == RESTORATION_WIENER) {

-                lr->filter_v[0] =

-                    !p ? msac_decode_subexp(&ts->msac,

-                                            ts->lr_ref[p]->filter_v[0] + 5, 16,

-                                            1) - 5:

-                         0;

-                lr->filter_v[1] =

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->filter_v[1] + 23, 32,

-                                       2) - 23;

-                lr->filter_v[2] =

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->filter_v[2] + 17, 64,

-                                       3) - 17;

+            if (f->frame_hdr.super_res.enabled) {

+                const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;

+                const int n_units = imax(1, (w + half_unit) >> unit_size_log2);

-                lr->filter_h[0] =

-                    !p ? msac_decode_subexp(&ts->msac,

-                                            ts->lr_ref[p]->filter_h[0] + 5, 16,

-                                            1) - 5:

-                        0;

-                lr->filter_h[1] =

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->filter_h[1] + 23, 32,

-                                       2) - 23;

-                lr->filter_h[2] =

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->filter_h[2] + 17, 64,

-                                       3) - 17;

-                memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));

-                ts->lr_ref[p] = lr;

-                if (DEBUG_BLOCK_INFO)

-                    printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",

-                           p, lr->filter_v[0], lr->filter_v[1],

-                           lr->filter_v[2], lr->filter_h[0],

-                           lr->filter_h[1], lr->filter_h[2], ts->msac.rng);

-            } else if (lr->type == RESTORATION_SGRPROJ) {

-                const unsigned idx = msac_decode_bools(&ts->msac, 4);

-                lr->sgr_idx = idx;

-                lr->sgr_weights[0] = dav1d_sgr_params[idx][0] ?

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->sgr_weights[0] + 96, 128,

-                                       4) - 96 :

-                    0;

-                lr->sgr_weights[1] = dav1d_sgr_params[idx][1] ?

-                    msac_decode_subexp(&ts->msac,

-                                       ts->lr_ref[p]->sgr_weights[1] + 32, 128,

-                                       4) - 32 :

-                    iclip(128 - lr->sgr_weights[0], -32, 95);

-                memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));

-                memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));

-                ts->lr_ref[p] = lr;

-                if (DEBUG_BLOCK_INFO)

-                    printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",

-                           p, lr->sgr_idx, lr->sgr_weights[0],

-                           lr->sgr_weights[1], ts->msac.rng);

+                const int d = f->frame_hdr.super_res.width_scale_denominator;

+                const int rnd = unit_size * 8 - 1, shift = unit_size_log2 + 3;

+                const int x0 = ((4 *  t->bx            * d >> ss_hor) + rnd) >> shift;

+                const int x1 = ((4 * (t->bx + sb_step) * d >> ss_hor) + rnd) >> shift;

+                for (int x = x0; x < imin(x1, n_units); x++) {

+                    const int px_x = x << (unit_size_log2 + ss_hor);

+                    const int sb_idx = (t->by >> 5) * f->sr_sb128w + (px_x >> 7);

+                    const int unit_idx = ((t->by & 16) >> 3) + ((px_x & 64) >> 6);

+                    Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];

+                    read_restoration_info(t, lr, p, frame_type);

+                }

+            } else {

+                const int x = 4 * t->bx >> ss_hor;

+                if (x & mask) continue;

+                const int w = (f->cur.p.w + ss_hor) >> ss_hor;

+                // Round half up at frame boundaries, if there's more than one

+                // restoration unit

+                if (x && x + half_unit > w) continue;

+                const int sb_idx = (t->by >> 5) * f->sr_sb128w + (t->bx >> 5);

+                const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);

+                Av1RestorationUnit *const lr = &f->lf.lr_mask[sb_idx].lr[p][unit_idx];

+                read_restoration_info(t, lr, p, frame_type);

         if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))

@@ -2423,8 +2472,8 @@

     int align_h = (f->bh + 31) & ~31;

     memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],

            &t->l.tx_lpf_y[t->by & 16], sb_step);

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

     align_h >>= ss_ver;

     memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> ss_ver)],

            &t->l.tx_lpf_uv[(t->by & 16) >> ss_ver], sb_step >> ss_ver);

@@ -2512,22 +2561,13 @@

     // update allocation of block contexts for above

     if (f->sb128w > f->lf.line_sz) {

         dav1d_freep_aligned(&f->lf.cdef_line);

-        dav1d_freep_aligned(&f->lf.lr_lpf_line);

         // note that we allocate all pixel arrays as if we were dealing with

         // 10 bits/component data

         uint16_t *ptr = f->lf.cdef_line =

             dav1d_alloc_aligned(f->b4_stride * 4 * 12 * sizeof(uint16_t), 32);

+        if (!ptr) return -ENOMEM;

-        uint16_t *lr_ptr = f->lf.lr_lpf_line =

-            dav1d_alloc_aligned(f->b4_stride * 4 * 3 * 12 * sizeof(uint16_t), 32);

-        if (!ptr || !lr_ptr) {

-            if (ptr) dav1d_free_aligned(ptr);

-            if (lr_ptr) dav1d_free_aligned(lr_ptr);

-            return -ENOMEM;

-        }

         for (int pl = 0; pl <= 2; pl++) {

             f->lf.cdef_line_ptr[0][pl][0] = ptr + f->b4_stride * 4 * 0;

             f->lf.cdef_line_ptr[0][pl][1] = ptr + f->b4_stride * 4 * 1;

@@ -2534,12 +2574,26 @@

             f->lf.cdef_line_ptr[1][pl][0] = ptr + f->b4_stride * 4 * 2;

             f->lf.cdef_line_ptr[1][pl][1] = ptr + f->b4_stride * 4 * 3;

             ptr += f->b4_stride * 4 * 4;

+        }

+        f->lf.line_sz = f->sb128w;

+    }

+    const ptrdiff_t lr_stride = (f->sr_cur.p.p.w + 31) & ~31;

+    if (lr_stride > f->lf.lr_line_sz) {

+        dav1d_freep_aligned(&f->lf.lr_lpf_line);

+        uint16_t *lr_ptr = f->lf.lr_lpf_line =

+            dav1d_alloc_aligned(lr_stride * 3 * 12 * sizeof(uint16_t), 32);

+        if (!lr_ptr) return -ENOMEM;

+        for (int pl = 0; pl <= 2; pl++) {

             f->lf.lr_lpf_line_ptr[pl] = lr_ptr;

-            lr_ptr += f->b4_stride * 4 * 12;

+            lr_ptr += lr_stride * 12;

-        f->lf.line_sz = f->sb128w;

+        f->lf.lr_line_sz = lr_stride;

     // update allocation for loopfilter masks

@@ -2579,6 +2633,13 @@

         f->lf.mask_sz = f->sb128w * f->sb128h;

+    f->sr_sb128w = (f->sr_cur.p.p.w + 127) >> 7;

+    if (f->sr_sb128w * f->sb128h > f->lf.lr_mask_sz) {

+        freep(&f->lf.lr_mask);

+        f->lf.lr_mask = malloc(f->sr_sb128w * f->sb128h * sizeof(*f->lf.lr_mask));

+        if (!f->lf.lr_mask) return -ENOMEM;

+        f->lf.lr_mask_sz = f->sr_sb128w * f->sb128h;

+    }

     if (f->frame_hdr.loopfilter.sharpness != f->lf.last_sharpness) {

         dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr.loopfilter.sharpness);

         f->lf.last_sharpness = f->frame_hdr.loopfilter.sharpness;

@@ -2612,7 +2673,7 @@

         const int order_hint_n_bits = f->seq_hdr.order_hint * f->seq_hdr.order_hint_n_bits;

         const int ret = av1_init_ref_mv_common(f->libaom_cm, f->bw >> 1, f->bh >> 1,

                                                f->b4_stride, f->seq_hdr.sb128,

-                                               f->mvs, f->ref_mvs, f->cur.p.poc, f->refpoc,

+                                               f->mvs, f->ref_mvs, f->cur.poc, f->refpoc,

                                                f->refrefpoc, f->frame_hdr.gmv,

                                                f->frame_hdr.hp, f->frame_hdr.force_integer_mv,

                                                f->frame_hdr.use_ref_frame_mvs,

@@ -2644,9 +2705,9 @@

                 const unsigned ref1poc = f->refp[j].p.poc;

                 const unsigned d1 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,

-                                                          ref0poc, f->cur.p.poc)), 31);

+                                                          ref0poc, f->cur.poc)), 31);

                 const unsigned d0 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,

-                                                          ref1poc, f->cur.p.poc)), 31);

+                                                          ref1poc, f->cur.poc)), 31);

                 const int order = d0 <= d1;

                 static const uint8_t quant_dist_weight[3][2] = {

@@ -2672,9 +2733,12 @@

     // init loopfilter pointers

     f->lf.mask_ptr = f->lf.mask;

-    f->lf.p[0] = f->cur.p.data[0];

-    f->lf.p[1] = f->cur.p.data[1];

-    f->lf.p[2] = f->cur.p.data[2];

+    f->lf.p[0] = f->cur.data[0];

+    f->lf.p[1] = f->cur.data[1];

+    f->lf.p[2] = f->cur.data[2];

+    f->lf.sr_p[0] = f->sr_cur.p.data[0];

+    f->lf.sr_p[1] = f->sr_cur.p.data[1];

+    f->lf.sr_p[2] = f->sr_cur.p.data[2];

     f->lf.tile_row = 1;

     dav1d_cdf_thread_wait(&f->in_cdf);

@@ -2758,7 +2822,7 @@

                     // loopfilter + cdef + restoration

                     if (f->frame_thread.pass != 1)

                         f->bd_fn.filter_sbrow(f, sby);

-                    dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,

+                    dav1d_thread_picture_signal(&f->sr_cur, (sby + 1) * f->sb_step * 4,

                                                 progress_plane_type);

@@ -2802,7 +2866,7 @@

                             pthread_mutex_unlock(&ts->tile_thread.lock);

                         if (progress == TILE_ERROR) {

-                            dav1d_thread_picture_signal(&f->cur, FRAME_ERROR,

+                            dav1d_thread_picture_signal(&f->sr_cur, FRAME_ERROR,

                                                         progress_plane_type);

                             const uint64_t all_mask = ~0ULL >> (64 - f->n_tc);

                             pthread_mutex_lock(&f->tile_thread.lock);

@@ -2816,7 +2880,7 @@

                     // loopfilter + cdef + restoration

                     if (f->frame_thread.pass != 1)

                         f->bd_fn.filter_sbrow(f, sby);

-                    dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,

+                    dav1d_thread_picture_signal(&f->sr_cur, (sby + 1) * f->sb_step * 4,

                                                 progress_plane_type);

@@ -2855,7 +2919,7 @@

     retval = 0;

 error:

-    dav1d_thread_picture_signal(&f->cur, retval == 0 ? UINT_MAX : FRAME_ERROR,

+    dav1d_thread_picture_signal(&f->sr_cur, retval == 0 ? UINT_MAX : FRAME_ERROR,

                                 PLANE_TYPE_ALL);

     for (int i = 0; i < 7; i++) {

         if (f->refp[i].p.data[0])

@@ -2863,7 +2927,8 @@

         dav1d_ref_dec(&f->ref_mvs_ref[i]);

-    dav1d_thread_picture_unref(&f->cur);

+    dav1d_picture_unref(&f->cur);

+    dav1d_thread_picture_unref(&f->sr_cur);

     dav1d_cdf_thread_unref(&f->in_cdf);

     if (f->frame_hdr.refresh_context) {

         dav1d_cdf_thread_signal(&f->out_cdf);

@@ -2879,6 +2944,12 @@

     return retval;

+static int get_upscale_x0(const int in_w, const int out_w, const int step) {

+    const int err = out_w * step - (in_w << 14);

+    const int x0 = (-((out_w - in_w) << 13) + (out_w >> 1)) / out_w + 128 - (err >> 1);

+    return x0 & 0x3fff;

+}

 int dav1d_submit_frame(Dav1dContext *const c) {

     Dav1dFrameContext *f;

     int res = -1;

@@ -2966,9 +3037,9 @@

         for (int i = 0; i < 7; i++) {

             const int refidx = f->frame_hdr.refidx[i];

             if (!c->refs[refidx].p.p.data[0] ||

-                f->frame_hdr.width * 2 < c->refs[refidx].p.p.p.w ||

+                f->frame_hdr.width[0] * 2 < c->refs[refidx].p.p.p.w ||

                 f->frame_hdr.height * 2 < c->refs[refidx].p.p.p.h ||

-                f->frame_hdr.width > c->refs[refidx].p.p.p.w * 16 ||

+                f->frame_hdr.width[0] > c->refs[refidx].p.p.p.w * 16 ||

                 f->frame_hdr.height > c->refs[refidx].p.p.p.h * 16 ||

                 f->seq_hdr.layout != c->refs[refidx].p.p.p.layout ||

                 f->seq_hdr.bpc != c->refs[refidx].p.p.p.bpc)

@@ -2979,16 +3050,16 @@

                 goto error;

             dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);

-            if (f->frame_hdr.width  != c->refs[refidx].p.p.p.w ||

+            f->ref_coded_width[i] = c->refs[refidx].coded_width;

+            if (f->frame_hdr.width[0] != c->refs[refidx].p.p.p.w ||

                 f->frame_hdr.height != c->refs[refidx].p.p.p.h)

 #define scale_fac(ref_sz, this_sz) \

-    (((ref_sz << 14) + (this_sz >> 1)) / this_sz)

+    ((((ref_sz) << 14) + ((this_sz) >> 1)) / (this_sz))

                 f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,

-                                               f->frame_hdr.width);

+                                               f->frame_hdr.width[0]);

                 f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,

                                                f->frame_hdr.height);

-#undef scale_fac

                 f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;

                 f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;

             } else {

@@ -3015,35 +3086,53 @@

     c->n_tile_data = 0;

     // allocate frame

-    if ((res = dav1d_thread_picture_alloc(&f->cur, f->frame_hdr.width,

-                                          f->frame_hdr.height,

-                                          f->seq_hdr.layout, f->seq_hdr.bpc,

-                                          c->n_fc > 1 ? &f->frame_thread.td : NULL,

-                                          f->frame_hdr.show_frame,

-                                          &c->allocator)) < 0)

-    {

-        goto error;

+    res = dav1d_thread_picture_alloc(&f->sr_cur, f->frame_hdr.width[1],

+                                     f->frame_hdr.height,

+                                     f->seq_hdr.layout, f->seq_hdr.bpc,

+                                     c->n_fc > 1 ? &f->frame_thread.td : NULL,

+                                     f->frame_hdr.show_frame, &c->allocator);

+    if (res < 0) goto error;

+    f->sr_cur.p.poc = f->frame_hdr.frame_offset;

+    f->sr_cur.p.p.type = f->frame_hdr.frame_type;

+    f->sr_cur.p.p.pri = f->seq_hdr.pri;

+    f->sr_cur.p.p.trc = f->seq_hdr.trc;

+    f->sr_cur.p.p.mtrx = f->seq_hdr.mtrx;

+    f->sr_cur.p.p.chr = f->seq_hdr.chr;

+    f->sr_cur.p.p.fullrange = f->seq_hdr.color_range;

+    if (f->frame_hdr.super_res.enabled) {

+        res = dav1d_picture_alloc(&f->cur, f->frame_hdr.width[0],

+                                  f->frame_hdr.height, f->seq_hdr.layout,

+                                  f->seq_hdr.bpc, &c->allocator);

+        if (res < 0) goto error;

+        f->cur.poc = f->frame_hdr.frame_offset;

+    } else {

+        dav1d_picture_ref(&f->cur, &f->sr_cur.p);

-    f->cur.p.poc = f->frame_hdr.frame_offset;

-    f->cur.p.p.type = f->frame_hdr.frame_type;

-    f->cur.p.p.pri = f->seq_hdr.pri;

-    f->cur.p.p.trc = f->seq_hdr.trc;

-    f->cur.p.p.mtrx = f->seq_hdr.mtrx;

-    f->cur.p.p.chr = f->seq_hdr.chr;

-    f->cur.p.p.fullrange = f->seq_hdr.color_range;

+    if (f->frame_hdr.super_res.enabled) {

+        f->resize_step[0] = scale_fac(f->cur.p.w, f->sr_cur.p.p.w);

+        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+        const int in_cw = (f->cur.p.w + ss_hor) >> ss_hor;

+        const int out_cw = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;

+        f->resize_step[1] = scale_fac(in_cw, out_cw);

+#undef scale_fac

+        f->resize_start[0] = get_upscale_x0(f->cur.p.w, f->sr_cur.p.p.w, f->resize_step[0]);

+        f->resize_start[1] = get_upscale_x0(in_cw, out_cw, f->resize_step[1]);

+    }

     // move f->cur into output queue

     if (c->n_fc == 1) {

         if (f->frame_hdr.show_frame)

-            dav1d_picture_ref(&c->out, &f->cur.p);

+            dav1d_picture_ref(&c->out, &f->sr_cur.p);

     } else {

-        dav1d_thread_picture_ref(out_delayed, &f->cur);

+        dav1d_thread_picture_ref(out_delayed, &f->sr_cur);

-    f->w4 = (f->frame_hdr.width + 3) >> 2;

+    f->w4 = (f->frame_hdr.width[0] + 3) >> 2;

     f->h4 = (f->frame_hdr.height + 3) >> 2;

-    f->bw = ((f->frame_hdr.width + 7) >> 3) << 1;

+    f->bw = ((f->frame_hdr.width[0] + 7) >> 3) << 1;

     f->bh = ((f->frame_hdr.height + 7) >> 3) << 1;

     f->sb128w = (f->bw + 31) >> 5;

     f->sb128h = (f->bh + 31) >> 5;

@@ -3067,8 +3156,8 @@

             for (int i = 0; i < 7; i++) {

                 const int refidx = f->frame_hdr.refidx[i];

                 if (c->refs[refidx].refmvs != NULL &&

-                    f->refp[i].p.p.w == f->cur.p.p.w &&

-                    f->refp[i].p.p.h == f->cur.p.p.h)

+                    f->ref_coded_width[i] == f->cur.p.w &&

+                    f->refp[i].p.p.h == f->cur.p.h)

                     f->ref_mvs_ref[i] = c->refs[refidx].refmvs;

                     dav1d_ref_inc(f->ref_mvs_ref[i]);

@@ -3100,7 +3189,7 @@

         if (f->frame_hdr.segmentation.temporal || !f->frame_hdr.segmentation.update_map) {

             const int pri_ref = f->frame_hdr.primary_ref_frame;

             assert(pri_ref != PRIMARY_REF_NONE);

-            const int ref_w = ((f->refp[pri_ref].p.p.w + 7) >> 3) << 1;

+            const int ref_w = ((f->ref_coded_width[pri_ref] + 7) >> 3) << 1;

             const int ref_h = ((f->refp[pri_ref].p.p.h + 7) >> 3) << 1;

             if (ref_w == f->bw && ref_h == f->bh) {

                 f->prev_segmap_ref = c->refs[f->frame_hdr.refidx[pri_ref]].segmap;

@@ -3147,7 +3236,8 @@

         if (f->frame_hdr.refresh_frame_flags & (1 << i)) {

             if (c->refs[i].p.p.data[0])

                 dav1d_thread_picture_unref(&c->refs[i].p);

-            dav1d_thread_picture_ref(&c->refs[i].p, &f->cur);

+            dav1d_thread_picture_ref(&c->refs[i].p, &f->sr_cur);

+            c->refs[i].coded_width = f->frame_hdr.width[0];

             if (c->cdf[i].cdf) dav1d_cdf_thread_unref(&c->cdf[i]);

             if (f->frame_hdr.refresh_context) {

@@ -3207,7 +3297,8 @@

         dav1d_ref_dec(&f->ref_mvs_ref[i]);

     dav1d_picture_unref(&c->out);

-    dav1d_thread_picture_unref(&f->cur);

+    dav1d_picture_unref(&f->cur);

+    dav1d_thread_picture_unref(&f->sr_cur);

     dav1d_ref_dec(&f->mvs_ref);

     for (int i = 0; i < f->n_tile_data; i++)

--- a/src/internal.h

+++ b/src/internal.h

@@ -99,6 +99,7 @@

         Av1LoopfilterModeRefDeltas lf_mode_ref_deltas;

         Av1FilmGrainData film_grain;

         uint8_t qidx;

+        unsigned coded_width;

     } refs[8];

     CdfThreadContext cdf[8];

@@ -119,7 +120,9 @@

 struct Dav1dFrameContext {

     Av1SequenceHeader seq_hdr;

     Av1FrameHeader frame_hdr;

-    Dav1dThreadPicture refp[7], cur;

+    Dav1dThreadPicture refp[7];

+    Dav1dPicture cur; // during block coding / reconstruction

+    Dav1dThreadPicture sr_cur; // after super-resolution upscaling

     Dav1dRef *mvs_ref;

     refmvs *mvs, *ref_mvs[7];

     Dav1dRef *ref_mvs_ref[7];

@@ -127,6 +130,7 @@

     uint8_t *cur_segmap;

     const uint8_t *prev_segmap;

     unsigned refpoc[7], refrefpoc[7][7];

+    int ref_coded_width[7];

     CdfThreadContext in_cdf, out_cdf;

     struct {

         Dav1dData data;

@@ -139,6 +143,7 @@

         int scale; // if no scaling, this is 0

         int step;

     } svc[7][2 /* x, y */];

+    int resize_step[2 /* y, uv */], resize_start[2 /* y, uv */];

     const Dav1dContext *c;

     Dav1dTileContext *tc;

@@ -157,7 +162,7 @@

     int ipred_edge_sz;

     pixel *ipred_edge[3];

     ptrdiff_t b4_stride;

-    int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step;

+    int w4, h4, bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step, sr_sb128w;

     uint16_t dq[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];

     const uint8_t *qm[2 /* is_1d */][N_RECT_TX_SIZES][3 /* plane */];

     BlockContext *a;

@@ -188,8 +193,9 @@

     struct {

         uint8_t (*level)[4];

         Av1Filter *mask;

+        Av1Restoration *lr_mask;

         int top_pre_cdef_toggle;

-        int mask_sz /* w*h */, line_sz /* w */, re_sz /* h */;

+        int mask_sz /* w*h */, lr_mask_sz, line_sz /* w */, lr_line_sz, re_sz /* h */;

         Av1FilterLUT lim_lut;

         int last_sharpness;

         uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];

@@ -201,7 +207,7 @@

         // in-loop filter per-frame state keeping

         int tile_row; // for carry-over at tile row edges

-        pixel *p[3];

+        pixel *p[3], *sr_p[3];

         Av1Filter *mask_ptr, *prev_mask_ptr;

     } lf;

--- a/src/levels.h

+++ b/src/levels.h

@@ -431,9 +431,12 @@

     } operating_points[32];

     int frame_offset;

     int refresh_frame_flags;

-    int width, height;

+    int width[2 /* { coded_width, superresolution_upscaled_width } */], height;

     int render_width, render_height;

-    int super_res;

+    struct {

+        int width_scale_denominator;

+        int enabled;

+    } super_res;

     int have_render_size;

     int allow_intrabc;

     int frame_ref_short_signaling;

--- a/src/lf_apply_tmpl.c

+++ b/src/lf_apply_tmpl.c

@@ -183,8 +183,8 @@

     const int sbsz = 32 >> is_sb64;

     const int sbl2 = 5 - is_sb64;

     const int halign = (f->bh + 31) & ~31;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;

     const unsigned vmax = 1U << vmask, hmax = 1U << hmask;

     const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);

@@ -211,7 +211,7 @@

             y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;

-        if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

+        if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

             uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];

             for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;

                  y++, uv_mask <<= 1)

@@ -247,7 +247,7 @@

                 y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;

-            if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

+            if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

                 const unsigned cw = (w + ss_hor) >> ss_hor;

                 uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];

                 for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {

@@ -268,7 +268,7 @@

          x++, have_left = 1, ptr += 128, level_ptr += 32)

         filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,

-                            lflvl[x].filter_y[0], ptr, f->cur.p.stride[0],

+                            lflvl[x].filter_y[0], ptr, f->cur.stride[0],

                             imin(32, f->w4 - x * 32), starty4, endy4);

@@ -275,7 +275,7 @@

     level_ptr = f->lf.level + f->b4_stride * sby * sbsz;

     for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {

         filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,

-                            lflvl[x].filter_y[1], ptr, f->cur.p.stride[0],

+                            lflvl[x].filter_y[1], ptr, f->cur.stride[0],

                             imin(32, f->w4 - x * 32), starty4, endy4);

@@ -289,7 +289,7 @@

         filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,

                              lflvl[x].filter_uv[0],

-                             &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],

+                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],

                              (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,

                              starty4 >> ss_ver, uv_endy4, ss_ver);

@@ -300,7 +300,7 @@

         filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,

                              lflvl[x].filter_uv[1],

-                             &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],

+                             &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],

                              (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,

                              starty4 >> ss_ver, uv_endy4, ss_hor);

--- a/src/lf_mask.h

+++ b/src/lf_mask.h

@@ -47,7 +47,7 @@

     int16_t sgr_weights[2];

 } Av1RestorationUnit;

-// each struct describes one 128x128 area (1 or 4 SBs)

+// each struct describes one 128x128 area (1 or 4 SBs), pre-superres-scaling

 typedef struct Av1Filter {

     // each bit is 1 col

     uint16_t filter_y[2 /* 0=col, 1=row */][32][3][2];

@@ -54,8 +54,12 @@

     uint16_t filter_uv[2 /* 0=col, 1=row */][32][2][2];

     int8_t cdef_idx[4]; // -1 means "unset"

     uint16_t noskip_mask[32][2];

-    Av1RestorationUnit lr[3][4];

 } Av1Filter;

+// each struct describes one 128x128 area (1 or 4 SBs), post-superres-scaling

+typedef struct Av1Restoration {

+    Av1RestorationUnit lr[3][4];

+} Av1Restoration;

 void dav1d_create_lf_mask_intra(Av1Filter *lflvl, uint8_t (*level_cache)[4],

                                 const ptrdiff_t b4_stride,

--- a/src/lib.c

+++ b/src/lib.c

@@ -266,7 +266,8 @@

                     dav1d_thread_picture_unref(&f->refp[i]);

                 dav1d_ref_dec(&f->ref_mvs_ref[i]);

-            dav1d_thread_picture_unref(&f->cur);

+            dav1d_picture_unref(&f->cur);

+            dav1d_thread_picture_unref(&f->sr_cur);

             dav1d_cdf_thread_unref(&f->in_cdf);

             if (f->frame_hdr.refresh_context)

                 dav1d_cdf_thread_unref(&f->out_cdf);

@@ -324,6 +325,7 @@

         dav1d_free_aligned(f->ipred_edge[0]);

         free(f->a);

         free(f->lf.mask);

+        free(f->lf.lr_mask);

         free(f->lf.level);

         free(f->lf.tx_lpf_right_edge[0]);

         av1_free_ref_mv_common(f->libaom_cm);

--- a/src/lr_apply_tmpl.c

+++ b/src/lr_apply_tmpl.c

@@ -33,7 +33,6 @@

 #include "src/lr_apply.h"

 enum LrRestorePlanes {

     LR_RESTORE_Y = 1 << 0,

     LR_RESTORE_U = 1 << 1,

@@ -44,13 +43,14 @@

 // contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above

 // and 2 below) the final 4 rows are used to swap the bottom of the last

 // stripe with the top of the next super block row.

-static void backup_lpf(pixel *dst, ptrdiff_t dst_stride,

-                       const pixel *src, ptrdiff_t src_stride,

+static void backup_lpf(const Dav1dFrameContext *const f,

+                       pixel *dst, const ptrdiff_t dst_stride,

+                       const pixel *src, const ptrdiff_t src_stride,

                        const int ss_ver, const int sb128,

-                       int row, const int row_h, const int w)

+                       int row, const int row_h, const int src_w, const int ss_hor)

-    src_stride = PXSTRIDE(src_stride);

-    dst_stride = PXSTRIDE(dst_stride);

+    const int dst_w = f->frame_hdr.super_res.enabled ?

+                      (f->frame_hdr.width[1] + ss_hor) >> ss_hor : src_w;

     // The first stripe of the frame is shorter by 8 luma pixel rows.

     int stripe_h = (64 - 8 * !row) >> ss_ver;

@@ -59,23 +59,38 @@

         const int top = 4 << sb128;

         // Copy the top part of the stored loop filtered pixels from the

         // previous sb row needed above the first stripe of this sb row.

-        pixel_copy(&dst[dst_stride *  0], &dst[dst_stride *  top], w);

-        pixel_copy(&dst[dst_stride *  1], &dst[dst_stride * (top + 1)], w);

-        pixel_copy(&dst[dst_stride *  2], &dst[dst_stride * (top + 2)], w);

-        pixel_copy(&dst[dst_stride *  3], &dst[dst_stride * (top + 3)], w);

+        pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],

+                   &dst[PXSTRIDE(dst_stride) *  top],      dst_w);

+        pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],

+                   &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);

+        pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],

+                   &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);

+        pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],

+                   &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);

-    dst += 4 * dst_stride;

-    src += (stripe_h - 2) * src_stride;

+    dst += 4 * PXSTRIDE(dst_stride);

+    src += (stripe_h - 2) * PXSTRIDE(src_stride);

-    for (; row + stripe_h <= row_h; row += stripe_h) {

-        for (int i = 0; i < 4; i++) {

-            pixel_copy(dst, src, w);

-            dst += dst_stride;

-            src += src_stride;

+    if (f->frame_hdr.super_res.enabled) {

+        for (; row + stripe_h <= row_h; row += stripe_h) {

+            f->dsp->mc.resize(dst, dst_stride, src, src_stride,

+                              dst_w, src_w, 4, f->resize_step[ss_hor],

+                              f->resize_start[ss_hor]);

+            stripe_h = 64 >> ss_ver;

+            src += stripe_h * PXSTRIDE(src_stride);

+            dst += 4 * PXSTRIDE(dst_stride);

-        stripe_h = 64 >> ss_ver;

-        src += (stripe_h - 4) * src_stride;

+    } else {

+        for (; row + stripe_h <= row_h; row += stripe_h) {

+            for (int i = 0; i < 4; i++) {

+                pixel_copy(dst, src, src_w);

+                dst += PXSTRIDE(dst_stride);

+                src += PXSTRIDE(src_stride);

+            }

+            stripe_h = 64 >> ss_ver;

+            src += (stripe_h - 4) * PXSTRIDE(src_stride);

+        }

@@ -83,7 +98,8 @@

                                /*const*/ pixel *const src[3], const int sby)

     const ptrdiff_t offset = 8 * !!sby;

-    const ptrdiff_t *const src_stride = f->cur.p.stride;

+    const ptrdiff_t *const src_stride = f->cur.stride;

+    const ptrdiff_t lr_stride = ((f->sr_cur.p.p.w + 31) & ~31) * sizeof(pixel);

     // TODO Also check block level restore type to reduce copying.

     const int restore_planes =

@@ -96,13 +112,13 @@

         const int w = f->bw << 2;

         const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);

         const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset;

-        backup_lpf(f->lf.lr_lpf_line_ptr[0], sizeof(pixel) * f->b4_stride * 4,

+        backup_lpf(f, f->lf.lr_lpf_line_ptr[0], lr_stride,

                    src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],

-                   0, f->seq_hdr.sb128, y_stripe, row_h, w);

+                   0, f->seq_hdr.sb128, y_stripe, row_h, w, 0);

     if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {

-        const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-        const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

         const int h = f->bh << (2 - ss_ver);

         const int w = f->bw << (2 - ss_hor);

         const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);

@@ -111,19 +127,18 @@

             (sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;

         if (restore_planes & LR_RESTORE_U) {

-            backup_lpf(f->lf.lr_lpf_line_ptr[1], sizeof(pixel) * f->b4_stride * 4,

+            backup_lpf(f, f->lf.lr_lpf_line_ptr[1], lr_stride,

                        src[1] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],

-                       ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);

+                       ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w, ss_hor);

         if (restore_planes & LR_RESTORE_V) {

-            backup_lpf(f->lf.lr_lpf_line_ptr[2], sizeof(pixel) * f->b4_stride * 4,

+            backup_lpf(f, f->lf.lr_lpf_line_ptr[2], lr_stride,

                        src[2] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1],

-                       ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w);

+                       ss_ver, f->seq_hdr.sb128, y_stripe, row_h, w, ss_hor);

 static void lr_stripe(const Dav1dFrameContext *const f, pixel *p,

                       const pixel (*left)[4], int x, int y,

                       const int plane, const int unit_w, const int row_h,

@@ -131,11 +146,11 @@

     const Dav1dDSPContext *const dsp = f->dsp;

     const int chroma = !!plane;

-    const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);

+    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);

     const int sbrow_has_bottom = (edges & LR_HAVE_BOTTOM);

     const pixel *lpf = f->lf.lr_lpf_line_ptr[plane] + x;

-    const ptrdiff_t p_stride = f->cur.p.stride[chroma];

-    const ptrdiff_t lpf_stride = sizeof(pixel) * f->b4_stride * 4;

+    const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];

+    const ptrdiff_t lpf_stride = sizeof(pixel) * ((f->sr_cur.p.p.w + 31) & ~31);

     // The first stripe of the frame is shorter by 8 luma pixel rows.

     int stripe_h = imin((64 - 8 * !y) >> ss_ver, row_h - y);

@@ -192,9 +207,9 @@

                      const int w, const int h, const int row_h, const int plane)

     const int chroma = !!plane;

-    const int ss_ver = chroma & (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);

-    const int ss_hor = chroma & (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);

-    const ptrdiff_t p_stride = f->cur.p.stride[chroma];

+    const int ss_ver = chroma & (f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420);

+    const int ss_hor = chroma & (f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444);

+    const ptrdiff_t p_stride = f->sr_cur.p.stride[chroma];

     const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!plane];

     const int unit_size = 1 << unit_size_log2;

@@ -238,8 +253,8 @@

         // AV1Filter unit.

         const int unit_idx = ((ruy & 16) >> 3) + ((rux & 16) >> 4);

         const Av1RestorationUnit *const lr =

-            &f->lf.mask[(((ruy << (unit_size_log2)) >> shift_ver) * f->sb128w) +

-                        (x >> shift_hor)].lr[plane][unit_idx];

+            &f->lf.lr_mask[(((ruy << (unit_size_log2)) >> shift_ver) * f->sr_sb128w) +

+                           (x >> shift_hor)].lr[plane][unit_idx];

         // FIXME Don't backup if the next restoration unit is RESTORE_NONE

         // This also requires not restoring in the same conditions.

@@ -257,7 +272,7 @@

                             const int sby)

     const ptrdiff_t offset_y = 8 * !!sby;

-    const ptrdiff_t *const dst_stride = f->cur.p.stride;

+    const ptrdiff_t *const dst_stride = f->sr_cur.p.stride;

     const int restore_planes =

         ((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +

@@ -265,8 +280,8 @@

         ((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);

     if (restore_planes & LR_RESTORE_Y) {

-        const int h = f->cur.p.p.h;

-        const int w = f->cur.p.p.w;

+        const int h = f->sr_cur.p.p.h;

+        const int w = f->sr_cur.p.p.w;

         const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);

         const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset_y;

         lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,

@@ -273,10 +288,10 @@

                  h, row_h, 0);

     if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {

-        const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-        const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

-        const int h = (f->cur.p.p.h + ss_ver) >> ss_ver;

-        const int w = (f->cur.p.p.w + ss_hor) >> ss_hor;

+        const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+        const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+        const int h = (f->sr_cur.p.p.h + ss_ver) >> ss_ver;

+        const int w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;

         const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);

         const ptrdiff_t offset_uv = offset_y >> ss_ver;

         const int y_stripe =

--- a/src/mc.h

+++ b/src/mc.h

@@ -105,6 +105,12 @@

             pixel *dst, ptrdiff_t dst_stride, const pixel *src, ptrdiff_t src_stride)

 typedef decl_emu_edge_fn(*emu_edge_fn);

+#define decl_resize_fn(name) \

+void (name)(pixel *dst, ptrdiff_t dst_stride, \

+            const pixel *src, ptrdiff_t src_stride, \

+            int dst_w, int src_w, int h, int dx, int mx)

+typedef decl_resize_fn(*resize_fn);

 typedef struct Dav1dMCDSPContext {

     mc_fn mc[N_2D_FILTERS];

     mc_scaled_fn mc_scaled[N_2D_FILTERS];

@@ -120,6 +126,7 @@

     warp8x8_fn warp8x8;

     warp8x8t_fn warp8x8t;

     emu_edge_fn emu_edge;

+    resize_fn resize;

 } Dav1dMCDSPContext;

 void dav1d_mc_dsp_init_8bpc(Dav1dMCDSPContext *c);

--- a/src/mc_tmpl.c

+++ b/src/mc_tmpl.c

@@ -782,6 +782,34 @@

+static void resize_c(pixel *dst, const ptrdiff_t dst_stride,

+                     const pixel *src, const ptrdiff_t src_stride,

+                     const int dst_w, const int src_w, int h,

+                     const int dx, const int mx0)

+{

+    do {

+        int mx = mx0, src_x = -1;

+        for (int x = 0; x < dst_w; x++) {

+            const int16_t *const F = dav1d_resize_filter[mx >> 8];

+            dst[x] = iclip_pixel((F[0] * src[iclip(src_x - 3, 0, src_w - 1)] +

+                                  F[1] * src[iclip(src_x - 2, 0, src_w - 1)] +

+                                  F[2] * src[iclip(src_x - 1, 0, src_w - 1)] +

+                                  F[3] * src[iclip(src_x + 0, 0, src_w - 1)] +

+                                  F[4] * src[iclip(src_x + 1, 0, src_w - 1)] +

+                                  F[5] * src[iclip(src_x + 2, 0, src_w - 1)] +

+                                  F[6] * src[iclip(src_x + 3, 0, src_w - 1)] +

+                                  F[7] * src[iclip(src_x + 4, 0, src_w - 1)] +

+                                  64) >> 7);

+            mx += dx;

+            src_x += mx >> 14;

+            mx &= 0x3fff;

+        }

+        dst += PXSTRIDE(dst_stride);

+        src += PXSTRIDE(src_stride);

+    } while (--h);

+}

 void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {

 #define init_mc_fns(type, name) do { \

     c->mc        [type] = put_##name##_c; \

@@ -813,6 +841,7 @@

     c->warp8x8  = warp_affine_8x8_c;

     c->warp8x8t = warp_affine_8x8t_c;

     c->emu_edge = emu_edge_c;

+    c->resize   = resize_c;

 #if HAVE_ASM

 #if ARCH_AARCH64 || ARCH_ARM

--- a/src/obu.c

+++ b/src/obu.c

@@ -280,9 +280,18 @@

                     &c->refs[c->frame_hdr.refidx[i]].p;

                 if (!ref->p.data[0]) return -1;

                 // FIXME render_* may be wrong

-                hdr->render_width = hdr->width = ref->p.p.w;

+                hdr->render_width = hdr->width[1] = ref->p.p.w;

                 hdr->render_height = hdr->height = ref->p.p.h;

-                hdr->super_res = 0; // FIXME probably wrong

+                hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);

+                if (hdr->super_res.enabled) {

+                    const int d = hdr->super_res.width_scale_denominator =

+                        9 + dav1d_get_bits(gb, 3);

+                    hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d,

+                                         imin(16, hdr->width[1]));

+                } else {

+                    hdr->super_res.width_scale_denominator = 8;

+                    hdr->width[0] = hdr->width[1];

+                }

                 return 0;

@@ -289,20 +298,26 @@

     if (hdr->frame_size_override) {

-        hdr->width = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;

+        hdr->width[1] = dav1d_get_bits(gb, seqhdr->width_n_bits) + 1;

         hdr->height = dav1d_get_bits(gb, seqhdr->height_n_bits) + 1;

     } else {

-        hdr->width = seqhdr->max_width;

+        hdr->width[1] = seqhdr->max_width;

         hdr->height = seqhdr->max_height;

-    hdr->super_res = seqhdr->super_res && dav1d_get_bits(gb, 1);

-    if (hdr->super_res) return -1; // FIXME

+    hdr->super_res.enabled = seqhdr->super_res && dav1d_get_bits(gb, 1);

+    if (hdr->super_res.enabled) {

+        const int d = hdr->super_res.width_scale_denominator = 9 + dav1d_get_bits(gb, 3);

+        hdr->width[0] = imax((hdr->width[1] * 8 + (d >> 1)) / d, imin(16, hdr->width[1]));

+    } else {

+        hdr->super_res.width_scale_denominator = 8;

+        hdr->width[0] = hdr->width[1];

+    }

     hdr->have_render_size = dav1d_get_bits(gb, 1);

     if (hdr->have_render_size) {

         hdr->render_width = dav1d_get_bits(gb, 16) + 1;

         hdr->render_height = dav1d_get_bits(gb, 16) + 1;

     } else {

-        hdr->render_width = hdr->width;

+        hdr->render_width = hdr->width[1];

         hdr->render_height = hdr->height;

     return 0;

@@ -411,7 +426,7 @@

                 dav1d_get_bits(gb, seqhdr->order_hint_n_bits);

         if ((res = read_frame_size(c, gb, 0)) < 0) goto error;

         hdr->allow_intrabc = hdr->allow_screen_content_tools &&

-                             /* FIXME: no superres scaling && */ dav1d_get_bits(gb, 1);

+                             !hdr->super_res.enabled && dav1d_get_bits(gb, 1);

         hdr->use_ref_frame_mvs = 0;

     } else {

         hdr->allow_intrabc = 0;

@@ -455,7 +470,7 @@

     hdr->tiling.uniform = dav1d_get_bits(gb, 1);

     const int sbsz_min1 = (64 << seqhdr->sb128) - 1;

     int sbsz_log2 = 6 + seqhdr->sb128;

-    int sbw = (hdr->width + sbsz_min1) >> sbsz_log2;

+    int sbw = (hdr->width[0] + sbsz_min1) >> sbsz_log2;

     int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;

     int max_tile_width_sb = 4096 >> sbsz_log2;

     int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);

@@ -733,7 +748,9 @@

 #endif

     // restoration

-    if (!hdr->all_lossless && seqhdr->restoration && !hdr->allow_intrabc) {

+    if ((!hdr->all_lossless || hdr->super_res.enabled) &&

+        seqhdr->restoration && !hdr->allow_intrabc)

+    {

         hdr->restoration.type[0] = dav1d_get_bits(gb, 2);

         if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {

             hdr->restoration.type[1] = dav1d_get_bits(gb, 2);

--- a/src/picture.c

+++ b/src/picture.c

@@ -91,8 +91,7 @@

     void *extra_ptr; /* MUST BE AT THE END */

};

-static void free_buffer(const uint8_t *data, void *user_data)

-{

+static void free_buffer(const uint8_t *const data, void *const user_data) {

     struct pic_ctx_context *pic_ctx = user_data;

     pic_ctx->allocator.release_picture_callback(pic_ctx->data,

@@ -148,6 +147,13 @@

         *extra_ptr = &pic_ctx->extra_ptr;

     return 0;

+}

+int dav1d_picture_alloc(Dav1dPicture *const p, const int w, const int h,

+                        const enum Dav1dPixelLayout layout, const int bpc,

+                        Dav1dPicAllocator *const p_allocator)

+{

+    return picture_alloc_with_edges(p, w, h, layout, bpc, p_allocator, 0, NULL);

 int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,

--- a/src/picture.h

+++ b/src/picture.h

@@ -54,6 +54,10 @@

/*

  * Allocate a picture with custom border size.

*/

+int dav1d_picture_alloc(Dav1dPicture *p, int w, int h,

+                        enum Dav1dPixelLayout layout, int bpc,

+                        Dav1dPicAllocator *);

 int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h,

                                enum Dav1dPixelLayout layout, int bpc,

                                struct thread_data *t, int visible,

--- a/src/recon_tmpl.c

+++ b/src/recon_tmpl.c

@@ -72,7 +72,7 @@

     if (dbg) printf("Start: r=%d\n", ts->msac.rng);

     // does this block have any non-zero coefficients

-    const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.p.layout);

+    const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);

     const int all_skip =

         msac_decode_bool_adapt(&ts->msac, ts->cdf.coef.skip[t_dim->ctx][sctx]);

     if (dbg)

@@ -289,7 +289,7 @@

         t->by += txsh;

         if (txh >= txw && t->by < f->bh) {

             if (dst)

-                dst += 4 * txsh * PXSTRIDE(f->cur.p.stride[0]);

+                dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);

             read_coef_tree(t, bs, b, sub, depth + 1, tx_split,

                            x_off * 2 + 0, y_off * 2 + 1, dst);

             t->bx += txsw;

@@ -349,9 +349,9 @@

             if (eob >= 0) {

                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)

                     coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");

-                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.p.stride[0], cf, eob);

+                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob);

                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)

-                    hex_dump(dst, f->cur.p.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");

+                    hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");

@@ -361,8 +361,8 @@

                                     const enum BlockSize bs, const Av1Block *const b)

     const Dav1dFrameContext *const f = t->f;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int bx4 = t->bx & 31, by4 = t->by & 31;

     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;

     const uint8_t *const b_dim = dav1d_block_dimensions[bs];

@@ -501,8 +501,8 @@

     assert((dst8 != NULL) ^ (dst16 != NULL));

     const Dav1dFrameContext *const f = t->f;

-    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;

     const int mvx = mv.x, mvy = mv.y;

     const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);

@@ -509,19 +509,19 @@

     ptrdiff_t ref_stride = refp->p.stride[!!pl];

     const pixel *ref;

-    if (refp->p.p.w == f->cur.p.p.w && refp->p.p.h == f->cur.p.p.h) {

+    if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {

         const int dx = bx * h_mul + (mvx >> (3 + ss_hor));

         const int dy = by * v_mul + (mvy >> (3 + ss_ver));

         int w, h;

-        if (refp != &f->cur) { // i.e. not for intrabc

+        if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc

             if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,

                                           PLANE_TYPE_Y + !!pl))

                 return -1;

-            w = (f->cur.p.p.w + ss_hor) >> ss_hor;

-            h = (f->cur.p.p.h + ss_ver) >> ss_ver;

+            w = (f->cur.p.w + ss_hor) >> ss_hor;

+            h = (f->cur.p.h + ss_ver) >> ss_ver;

         } else {

             w = f->bw * 4 >> ss_hor;

             h = f->bh * 4 >> ss_ver;

@@ -548,7 +548,7 @@

                                       bh4 * v_mul, mx << !ss_hor, my << !ss_ver);

     } else {

-        assert(refp != &f->cur);

+        assert(refp != &f->sr_cur);

         int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);

         int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);

@@ -569,6 +569,11 @@

         if (dav1d_thread_picture_wait(refp, bottom, PLANE_TYPE_Y + !!pl))

             return -1;

+        if (DEBUG_BLOCK_INFO)

+            printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",

+                   left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,

+                   right-left, bottom-top,

+                   f->svc[refidx][0].step, f->svc[refidx][1].step);

         const int w = (refp->p.p.w + ss_hor) >> ss_hor;

         const int h = (refp->p.p.h + ss_ver) >> ss_ver;

@@ -579,6 +584,7 @@

                                 refp->p.data[pl], ref_stride);

             ref = &t->emu_edge[320 * 3 + 3];

             ref_stride = 320 * sizeof(pixel);

+            if (DEBUG_BLOCK_INFO) printf("Emu\n");

         } else {

             ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;

@@ -610,8 +616,8 @@

     const Dav1dFrameContext *const f = t->f;

     const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];

     pixel *const lap = t->scratch.lap;

-    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;

     int res;

@@ -673,8 +679,8 @@

     assert((dst8 != NULL) ^ (dst16 != NULL));

     const Dav1dFrameContext *const f = t->f;

     const Dav1dDSPContext *const dsp = f->dsp;

-    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;

     assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));

     const int32_t *const mat = wmp->matrix;

@@ -735,8 +741,8 @@

     const Dav1dFrameContext *const f = t->f;

     const Dav1dDSPContext *const dsp = f->dsp;

     const int bx4 = t->bx & 31, by4 = t->by & 31;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;

     const uint8_t *const b_dim = dav1d_block_dimensions[bs];

     const int bw4 = b_dim[0], bh4 = b_dim[1];

@@ -758,8 +764,8 @@

     for (int init_y = 0; init_y < h4; init_y += 16) {

         for (int init_x = 0; init_x < w4; init_x += 16) {

             if (b->pal_sz[0]) {

-                pixel *dst = ((pixel *) f->cur.p.data[0]) +

-                             4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);

+                pixel *dst = ((pixel *) f->cur.data[0]) +

+                             4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);

                 const uint8_t *pal_idx;

                 if (f->frame_thread.pass) {

                     pal_idx = ts->frame_thread.pal_idx;

@@ -770,10 +776,10 @@

                 const uint16_t *const pal = f->frame_thread.pass ?

                     f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +

                                         ((t->bx >> 1) + (t->by & 1))][0] : t->pal[0];

-                f->dsp->ipred.pal_pred(dst, f->cur.p.stride[0], pal,

+                f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,

                                        pal_idx, bw4 * 4, bh4 * 4);

                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)

-                    hex_dump(dst, PXSTRIDE(f->cur.p.stride[0]),

+                    hex_dump(dst, PXSTRIDE(f->cur.stride[0]),

                              bw4 * 4, bh4 * 4, "y-pal-pred");

@@ -790,8 +796,8 @@

             for (y = init_y, t->by += init_y; y < sub_h4;

                  y += t_dim->h, t->by += t_dim->h)

-                pixel *dst = ((pixel *) f->cur.p.data[0]) +

-                               4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) +

+                pixel *dst = ((pixel *) f->cur.data[0]) +

+                               4 * (t->by * PXSTRIDE(f->cur.stride[0]) +

                                     t->bx + init_x);

                 for (x = init_x, t->bx += init_x; x < sub_w4;

                      x += t_dim->w, t->bx += t_dim->w)

@@ -818,10 +824,10 @@

                                                           ts->tiling.col_end,

                                                           ts->tiling.row_end,

                                                           edge_flags, dst,

-                                                          f->cur.p.stride[0], top_sb_edge,

+                                                          f->cur.stride[0], top_sb_edge,

                                                           b->y_mode, &angle,

                                                           t_dim->w, t_dim->h, edge);

-                    dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,

+                    dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,

                                              t_dim->w * 4, t_dim->h * 4,

                                              angle | intra_flags,

                                              4 * f->bw - 4 * t->bx,

@@ -833,7 +839,7 @@

                         hex_dump(edge, 0, 1, 1, "tl");

                         hex_dump(edge + 1, t_dim->w * 4,

                                  t_dim->w * 4, 2, "t");

-                        hex_dump(dst, f->cur.p.stride[0],

+                        hex_dump(dst, f->cur.stride[0],

                                  t_dim->w * 4, t_dim->h * 4, "y-intra-pred");

@@ -875,10 +881,10 @@

                                           imin(t_dim->w, 8) * 4, 3, "dq");

                             dsp->itx.itxfm_add[b->tx]

                                               [txtp](dst,

-                                                     f->cur.p.stride[0],

+                                                     f->cur.stride[0],

                                                      cf, eob);

                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)

-                                hex_dump(dst, f->cur.p.stride[0],

+                                hex_dump(dst, f->cur.stride[0],

                                          t_dim->w * 4, t_dim->h * 4, "recon");

                     } else if (!f->frame_thread.pass) {

@@ -896,24 +902,24 @@

             if (!has_chroma) continue;

-            const ptrdiff_t stride = f->cur.p.stride[1];

+            const ptrdiff_t stride = f->cur.stride[1];

             if (b->uv_mode == CFL_PRED) {

                 assert(!init_x && !init_y);

                 int16_t *const ac = t->scratch.ac;

-                pixel *y_src = ((pixel *) f->cur.p.data[0]) + 4 * (t->bx & ~ss_hor) +

-                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.p.stride[0]);

+                pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +

+                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);

                 const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +

                                               (t->by >> ss_ver) * PXSTRIDE(stride));

-                pixel *const uv_dst[2] = { ((pixel *) f->cur.p.data[1]) + uv_off,

-                                           ((pixel *) f->cur.p.data[2]) + uv_off };

+                pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,

+                                           ((pixel *) f->cur.data[2]) + uv_off };

                 const int furthest_r =

                     ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);

                 const int furthest_b =

                     ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);

-                dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],

+                dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],

                                                          cbw4 - (furthest_r >> ss_hor),

                                                          cbh4 - (furthest_b >> ss_ver),

                                                          cbw4 * 4, cbh4 * 4);

@@ -950,7 +956,7 @@

             } else if (b->pal_sz[1]) {

                 ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +

-                                           (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));

+                                           (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));

                 const uint8_t *pal_idx;

                 if (f->frame_thread.pass) {

                     pal_idx = ts->frame_thread.pal_idx;

@@ -961,21 +967,21 @@

                 const uint16_t *const pal_u = f->frame_thread.pass ?

                     f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +

                                         ((t->bx >> 1) + (t->by & 1))][1] : t->pal[1];

-                f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[1]) + uv_dstoff,

-                                       f->cur.p.stride[1], pal_u,

+                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,

+                                       f->cur.stride[1], pal_u,

                                        pal_idx, cbw4 * 4, cbh4 * 4);

                 const uint16_t *const pal_v = f->frame_thread.pass ?

                     f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +

                                         ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];

-                f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[2]) + uv_dstoff,

-                                       f->cur.p.stride[1], pal_v,

+                f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,

+                                       f->cur.stride[1], pal_v,

                                        pal_idx, cbw4 * 4, cbh4 * 4);

                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {

-                    hex_dump(((pixel *) f->cur.p.data[1]) + uv_dstoff,

-                             PXSTRIDE(f->cur.p.stride[1]),

+                    hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,

+                             PXSTRIDE(f->cur.stride[1]),

                              cbw4 * 4, cbh4 * 4, "u-pal-pred");

-                    hex_dump(((pixel *) f->cur.p.data[2]) + uv_dstoff,

-                             PXSTRIDE(f->cur.p.stride[1]),

+                    hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,

+                             PXSTRIDE(f->cur.stride[1]),

                              cbw4 * 4, cbh4 * 4, "v-pal-pred");

@@ -984,10 +990,10 @@

                                  sm_uv_flag(&t->l, cby4);

             const int uv_sb_has_tr =

                 ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :

-                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.p.layout - 1));

+                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));

             const int uv_sb_has_bl =

                 init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :

-                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.p.layout - 1));

+                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));

             const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);

             const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);

             for (int pl = 0; pl < 2; pl++) {

@@ -994,7 +1000,7 @@

                 for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;

                      y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)

-                    pixel *dst = ((pixel *) f->cur.p.data[1 + pl]) +

+                    pixel *dst = ((pixel *) f->cur.data[1 + pl]) +

                                    4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +

                                         ((t->bx + init_x) >> ss_hor));

                     for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;

@@ -1127,8 +1133,8 @@

     const Dav1dFrameContext *const f = t->f;

     const Dav1dDSPContext *const dsp = f->dsp;

     const int bx4 = t->bx & 31, by4 = t->by & 31;

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;

     const uint8_t *const b_dim = dav1d_block_dimensions[bs];

     const int bw4 = b_dim[0], bh4 = b_dim[1];

@@ -1136,26 +1142,27 @@

     const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&

                            (bw4 > ss_hor || t->bx & 1) &&

                            (bh4 > ss_ver || t->by & 1);

-    const int chr_layout_idx = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :

-                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.p.layout;

+    const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :

+                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;

     int res;

     // prediction

     const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;

-    pixel *dst = ((pixel *) f->cur.p.data[0]) +

-        4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);

+    pixel *dst = ((pixel *) f->cur.data[0]) +

+        4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);

     const ptrdiff_t uvdstoff =

-        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));

+        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));

     if (!(f->frame_hdr.frame_type & 1)) {

         // intrabc

-        res = mc(t, dst, NULL, f->cur.p.stride[0],

-                 bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);

+        assert(!f->frame_hdr.super_res.enabled);

+        res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,

+                 b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);

         if (res) return res;

         if (has_chroma) for (int pl = 1; pl < 3; pl++) {

-            res = mc(t, ((pixel *)f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],

+            res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],

                      bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),

-                     t->bx & ~ss_hor, t->by & ~ss_ver,

-                     pl, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);

+                     t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],

+                     &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);

             if (res) return res;

     } else if (b->comp_type == COMP_INTER_NONE) {

@@ -1168,16 +1175,16 @@

              (b->motion_mode == MM_WARP &&

               t->warpmv.type > WM_TYPE_TRANSLATION)))

-            res = warp_affine(t, dst, NULL, f->cur.p.stride[0], b_dim, 0, refp,

+            res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,

                               b->motion_mode == MM_WARP ? &t->warpmv :

                                   &f->frame_hdr.gmv[b->ref[0]]);

             if (res) return res;

         } else {

-            res = mc(t, dst, NULL, f->cur.p.stride[0],

+            res = mc(t, dst, NULL, f->cur.stride[0],

                      bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);

             if (res) return res;

             if (b->motion_mode == MM_OBMC) {

-                res = obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);

+                res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);

                 if (res) return res;

@@ -1197,7 +1204,7 @@

             m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,

                                                   t->by, t->by > ts->tiling.row_start,

                                                   ts->tiling.col_end, ts->tiling.row_end,

-                                                  0, dst, f->cur.p.stride[0], top_sb_edge,

+                                                  0, dst, f->cur.stride[0], top_sb_edge,

                                                   m, &angle, bw4, bh4, tl_edge);

             dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),

                                      tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);

@@ -1205,7 +1212,7 @@

                 b->interintra_type == INTER_INTRA_BLEND ?

                      dav1d_ii_masks[bs][0][b->interintra_mode] :

                      dav1d_wedge_masks[bs][0][0][b->wedge_idx];

-            dsp->mc.blend(dst, f->cur.p.stride[0], tmp,

+            dsp->mc.blend(dst, f->cur.stride[0], tmp,

                           bw4 * 4, bh4 * 4, ii_mask);

@@ -1229,8 +1236,8 @@

             int h_off = 0, v_off = 0;

             if (bw4 == 1 && bh4 == ss_ver) {

                 for (int pl = 0; pl < 2; pl++) {

-                    res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,

-                             NULL, f->cur.p.stride[1],

+                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,

+                             NULL, f->cur.stride[1],

                              bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,

                              r[-(f->b4_stride + 1)].mv[0],

                              &f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],

@@ -1239,7 +1246,7 @@

                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);

                     if (res) return res;

-                v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);

+                v_off = 2 * PXSTRIDE(f->cur.stride[1]);

                 h_off = 2;

             if (bw4 == 1) {

@@ -1246,8 +1253,8 @@

                 const enum Filter2d left_filter_2d =

                     dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];

                 for (int pl = 0; pl < 2; pl++) {

-                    res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,

-                             f->cur.p.stride[1], bw4, bh4, t->bx - 1,

+                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,

+                             f->cur.stride[1], bw4, bh4, t->bx - 1,

                              t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],

                              r[-1].ref[0] - 1,

                              f->frame_thread.pass != 2 ? left_filter_2d :

@@ -1260,8 +1267,8 @@

                 const enum Filter2d top_filter_2d =

                     dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];

                 for (int pl = 0; pl < 2; pl++) {

-                    res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off, NULL,

-                             f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,

+                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,

+                             f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,

                              1 + pl, r[-f->b4_stride].mv[0],

                              &f->refp[r[-f->b4_stride].ref[0] - 1],

                              r[-f->b4_stride].ref[0] - 1,

@@ -1269,10 +1276,10 @@

                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);

                     if (res) return res;

-                v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);

+                v_off = 2 * PXSTRIDE(f->cur.stride[1]);

             for (int pl = 0; pl < 2; pl++) {

-                res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],

+                res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],

                          bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],

                          refp, b->ref[0], filter_2d);

                 if (res) return res;

@@ -1285,8 +1292,8 @@

                   t->warpmv.type > WM_TYPE_TRANSLATION)))

                 for (int pl = 0; pl < 2; pl++) {

-                    res = warp_affine(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff, NULL,

-                                      f->cur.p.stride[1], b_dim, 1 + pl, refp,

+                    res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,

+                                      f->cur.stride[1], b_dim, 1 + pl, refp,

                                       b->motion_mode == MM_WARP ? &t->warpmv :

                                           &f->frame_hdr.gmv[b->ref[0]]);

                     if (res) return res;

@@ -1293,15 +1300,15 @@

             } else {

                 for (int pl = 0; pl < 2; pl++) {

-                    res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,

-                             NULL, f->cur.p.stride[1],

+                    res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,

+                             NULL, f->cur.stride[1],

                              bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),

                              t->bx & ~ss_hor, t->by & ~ss_ver,

                              1 + pl, b->mv[0], refp, b->ref[0], filter_2d);

                     if (res) return res;

                     if (b->motion_mode == MM_OBMC) {

-                        res = obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,

-                                   f->cur.p.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);

+                        res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,

+                                   f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);

                         if (res) return res;

@@ -1322,7 +1329,7 @@

                         b->interintra_mode == II_SMOOTH_PRED ?

                         SMOOTH_PRED : b->interintra_mode;

                     int angle = 0;

-                    pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;

+                    pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;

                     const pixel *top_sb_edge = NULL;

                     if (!(t->by & (f->sb_step - 1))) {

                         top_sb_edge = f->ipred_edge[pl + 1];

@@ -1337,12 +1344,12 @@

                                                               (ts->tiling.row_start >> ss_ver),

                                                           ts->tiling.col_end >> ss_hor,

                                                           ts->tiling.row_end >> ss_ver,

-                                                          0, uvdst, f->cur.p.stride[1],

+                                                          0, uvdst, f->cur.stride[1],

                                                           top_sb_edge, m,

                                                           &angle, cbw4, cbh4, tl_edge);

                     dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),

                                              tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);

-                    dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,

+                    dsp->mc.blend(uvdst, f->cur.stride[1], tmp,

                                   cbw4 * 4, cbh4 * 4, ii_mask);

@@ -1370,21 +1377,23 @@

             } else {

                 res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,

                          b->mv[i], refp, b->ref[i], filter_2d);

+                if (DEBUG_BLOCK_INFO)

+                    coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");

                 if (res) return res;

         switch (b->comp_type) {

         case COMP_INTER_AVG:

-            dsp->mc.avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],

+            dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],

                         bw4 * 4, bh4 * 4);

             break;

         case COMP_INTER_WEIGHTED_AVG:

             jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];

-            dsp->mc.w_avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],

+            dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],

                           bw4 * 4, bh4 * 4, jnt_weight);

             break;

         case COMP_INTER_SEG:

-            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.p.stride[0],

+            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],

                                            tmp[b->mask_sign], tmp[!b->mask_sign],

                                            bw4 * 4, bh4 * 4, seg_mask, b->mask_sign);

             mask = seg_mask;

@@ -1391,7 +1400,7 @@

             break;

         case COMP_INTER_WEDGE:

             mask = dav1d_wedge_masks[bs][0][0][b->wedge_idx];

-            dsp->mc.mask(dst, f->cur.p.stride[0],

+            dsp->mc.mask(dst, f->cur.stride[0],

                          tmp[b->mask_sign], tmp[!b->mask_sign],

                          bw4 * 4, bh4 * 4, mask);

             if (has_chroma)

@@ -1416,19 +1425,19 @@

                     if (res) return res;

-            pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;

+            pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;

             switch (b->comp_type) {

             case COMP_INTER_AVG:

-                dsp->mc.avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],

+                dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],

                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver);

                 break;

             case COMP_INTER_WEIGHTED_AVG:

-                dsp->mc.w_avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],

+                dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],

                               bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight);

                 break;

             case COMP_INTER_WEDGE:

             case COMP_INTER_SEG:

-                dsp->mc.mask(uvdst, f->cur.p.stride[1],

+                dsp->mc.mask(uvdst, f->cur.stride[1],

                              tmp[b->mask_sign], tmp[!b->mask_sign],

                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask);

                 break;

@@ -1437,11 +1446,11 @@

     if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {

-        hex_dump(dst, f->cur.p.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");

+        hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");

         if (has_chroma) {

-            hex_dump(&((pixel *) f->cur.p.data[1])[uvdstoff], f->cur.p.stride[1],

+            hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],

                      cbw4 * 4, cbh4 * 4, "u-pred");

-            hex_dump(&((pixel *) f->cur.p.data[2])[uvdstoff], f->cur.p.stride[1],

+            hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],

                      cbw4 * 4, cbh4 * 4, "v-pred");

@@ -1473,7 +1482,7 @@

         for (int init_x = 0; init_x < bw4; init_x += 16) {

             // coefficient coding & inverse transforms

             int y_off = !!init_y, y;

-            dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * init_y;

+            dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;

             for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);

                  y += ytx->h, y_off++)

@@ -1485,17 +1494,17 @@

                                    x_off, y_off, &dst[x * 4]);

                     t->bx += ytx->w;

-                dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * ytx->h;

+                dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;

                 t->bx -= x;

                 t->by += ytx->h;

-            dst -= PXSTRIDE(f->cur.p.stride[0]) * 4 * y;

+            dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;

             t->by -= y;

             // chroma coefs and inverse transform

             if (has_chroma) for (int pl = 0; pl < 2; pl++) {

-                pixel *uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff +

-                    (PXSTRIDE(f->cur.p.stride[1]) * init_y * 4 >> ss_ver);

+                pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +

+                    (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);

                 for (y = init_y >> ss_ver, t->by += init_y;

                      y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)

@@ -1544,15 +1553,15 @@

                                 coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");

                             dsp->itx.itxfm_add[b->uvtx]

                                               [txtp](&uvdst[4 * x],

-                                                     f->cur.p.stride[1],

+                                                     f->cur.stride[1],

                                                      cf, eob);

                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)

-                                hex_dump(&uvdst[4 * x], f->cur.p.stride[1],

+                                hex_dump(&uvdst[4 * x], f->cur.stride[1],

                                          uvtx->w * 4, uvtx->h * 4, "recon");

                         t->bx += uvtx->w << ss_hor;

-                    uvdst += PXSTRIDE(f->cur.p.stride[1]) * 4 * uvtx->h;

+                    uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;

                     t->bx -= x << ss_hor;

                     t->by += uvtx->h << ss_ver;

@@ -1564,7 +1573,7 @@

 void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {

-    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+    const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

     const int sbsz = f->sb_step, sbh = f->sbh;

     if (f->frame_hdr.loopfilter.level_y[0] ||

@@ -1584,9 +1593,9 @@

     if (f->seq_hdr.cdef) {

         if (sby) {

             pixel *p_up[3] = {

-                f->lf.p[0] - 8 * PXSTRIDE(f->cur.p.stride[0]),

-                f->lf.p[1] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),

-                f->lf.p[2] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),

+                f->lf.p[0] - 8 * PXSTRIDE(f->cur.stride[0]),

+                f->lf.p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),

+                f->lf.p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),

};

             bytefn(dav1d_cdef_brow)(f, p_up, f->lf.prev_mask_ptr,

                                     sby * sbsz - 2, sby * sbsz);

@@ -1595,13 +1604,34 @@

         bytefn(dav1d_cdef_brow)(f, f->lf.p, f->lf.mask_ptr, sby * sbsz,

                                 imin(sby * sbsz + n_blks, f->bh));

+    if (f->frame_hdr.super_res.enabled) {

+        const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;

+        for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {

+            const int h_start = 8 * !!sby >> (ss_ver & !!pl);

+            const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];

+            pixel *dst = f->lf.sr_p[pl] - h_start * PXSTRIDE(dst_stride);

+            const ptrdiff_t src_stride = f->cur.stride[!!pl];

+            const pixel *src = f->lf.p[pl] - h_start * PXSTRIDE(src_stride);

+            const int h_end = 4 * (sbsz - 2 * (sby + 1 < sbh)) >> (ss_ver & !!pl);

+            const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+            const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;

+            const int src_w = (4 * f->bw + ss_hor) >> ss_hor;

+            f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w, src_w,

+                              h_end + h_start, f->resize_step[!!pl],

+                              f->resize_start[!!pl]);

+        }

+    }

     if (f->seq_hdr.restoration) {

-        bytefn(dav1d_lr_sbrow)(f, f->lf.p, sby);

+        bytefn(dav1d_lr_sbrow)(f, f->lf.sr_p, sby);

-    f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[0]);

-    f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;

-    f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;

+    f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.stride[0]);

+    f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;

+    f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.stride[1]) >> ss_ver;

+    f->lf.sr_p[0] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[0]);

+    f->lf.sr_p[1] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;

+    f->lf.sr_p[2] += sbsz * 4 * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver;

     f->lf.prev_mask_ptr = f->lf.mask_ptr;

     if ((sby & 1) || f->seq_hdr.sb128) {

         f->lf.mask_ptr += f->sb128w;

@@ -1616,20 +1646,20 @@

     const int x_off = ts->tiling.col_start;

     const pixel *const y =

-        ((const pixel *) f->cur.p.data[0]) + x_off * 4 +

-                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.p.stride[0]);

+        ((const pixel *) f->cur.data[0]) + x_off * 4 +

+                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);

     pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,

                4 * (ts->tiling.col_end - x_off));

-    if (f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

-        const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;

-        const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;

+    if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {

+        const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;

+        const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;

         const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +

-            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.p.stride[1]);

+            (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);

         for (int pl = 1; pl <= 2; pl++)

             pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],

-                       &((const pixel *) f->cur.p.data[pl])[uv_off],

+                       &((const pixel *) f->cur.data[pl])[uv_off],

                        4 * (ts->tiling.col_end - x_off) >> ss_hor);

--- a/src/tables.c

+++ b/src/tables.c

@@ -712,6 +712,41 @@

     { 0, 0,   2,  -1, 0,   0, 127, 0 }

};

+const int16_t dav1d_resize_filter[64][8] = {

+    {  0, 0,   0, 128,   0,   0, 0,  0 }, {  0, 0,  -1, 128,   2,  -1, 0,  0 },

+    {  0, 1,  -3, 127,   4,  -2, 1,  0 }, {  0, 1,  -4, 127,   6,  -3, 1,  0 },

+    {  0, 2,  -6, 126,   8,  -3, 1,  0 }, {  0, 2,  -7, 125,  11,  -4, 1,  0 },

+    { -1, 2,  -8, 125,  13,  -5, 2,  0 }, { -1, 3,  -9, 124,  15,  -6, 2,  0 },

+    { -1, 3, -10, 123,  18,  -6, 2, -1 }, { -1, 3, -11, 122,  20,  -7, 3, -1 },

+    { -1, 4, -12, 121,  22,  -8, 3, -1 }, { -1, 4, -13, 120,  25,  -9, 3, -1 },

+    { -1, 4, -14, 118,  28,  -9, 3, -1 }, { -1, 4, -15, 117,  30, -10, 4, -1 },

+    { -1, 5, -16, 116,  32, -11, 4, -1 }, { -1, 5, -16, 114,  35, -12, 4, -1 },

+    { -1, 5, -17, 112,  38, -12, 4, -1 }, { -1, 5, -18, 111,  40, -13, 5, -1 },

+    { -1, 5, -18, 109,  43, -14, 5, -1 }, { -1, 6, -19, 107,  45, -14, 5, -1 },

+    { -1, 6, -19, 105,  48, -15, 5, -1 }, { -1, 6, -19, 103,  51, -16, 5, -1 },

+    { -1, 6, -20, 101,  53, -16, 6, -1 }, { -1, 6, -20,  99,  56, -17, 6, -1 },

+    { -1, 6, -20,  97,  58, -17, 6, -1 }, { -1, 6, -20,  95,  61, -18, 6, -1 },

+    { -2, 7, -20,  93,  64, -18, 6, -2 }, { -2, 7, -20,  91,  66, -19, 6, -1 },

+    { -2, 7, -20,  88,  69, -19, 6, -1 }, { -2, 7, -20,  86,  71, -19, 6, -1 },

+    { -2, 7, -20,  84,  74, -20, 7, -2 }, { -2, 7, -20,  81,  76, -20, 7, -1 },

+    { -2, 7, -20,  79,  79, -20, 7, -2 }, { -1, 7, -20,  76,  81, -20, 7, -2 },

+    { -2, 7, -20,  74,  84, -20, 7, -2 }, { -1, 6, -19,  71,  86, -20, 7, -2 },

+    { -1, 6, -19,  69,  88, -20, 7, -2 }, { -1, 6, -19,  66,  91, -20, 7, -2 },

+    { -2, 6, -18,  64,  93, -20, 7, -2 }, { -1, 6, -18,  61,  95, -20, 6, -1 },

+    { -1, 6, -17,  58,  97, -20, 6, -1 }, { -1, 6, -17,  56,  99, -20, 6, -1 },

+    { -1, 6, -16,  53, 101, -20, 6, -1 }, { -1, 5, -16,  51, 103, -19, 6, -1 },

+    { -1, 5, -15,  48, 105, -19, 6, -1 }, { -1, 5, -14,  45, 107, -19, 6, -1 },

+    { -1, 5, -14,  43, 109, -18, 5, -1 }, { -1, 5, -13,  40, 111, -18, 5, -1 },

+    { -1, 4, -12,  38, 112, -17, 5, -1 }, { -1, 4, -12,  35, 114, -16, 5, -1 },

+    { -1, 4, -11,  32, 116, -16, 5, -1 }, { -1, 4, -10,  30, 117, -15, 4, -1 },

+    { -1, 3,  -9,  28, 118, -14, 4, -1 }, { -1, 3,  -9,  25, 120, -13, 4, -1 },

+    { -1, 3,  -8,  22, 121, -12, 4, -1 }, { -1, 3,  -7,  20, 122, -11, 3, -1 },

+    { -1, 2,  -6,  18, 123, -10, 3, -1 }, {  0, 2,  -6,  15, 124,  -9, 3, -1 },

+    {  0, 2,  -5,  13, 125,  -8, 2, -1 }, {  0, 1,  -4,  11, 125,  -7, 2,  0 },

+    {  0, 1,  -3,   8, 126,  -6, 2,  0 }, {  0, 1,  -3,   6, 127,  -4, 1,  0 },

+    {  0, 1,  -2,   4, 127,  -3, 1,  0 }, {  0, 0,  -1,   2, 128,  -1, 0,  0 },

+};

 const uint8_t dav1d_sm_weights[128] = {

     // Unused, because we always offset by bs, which is at least 2.

       0,   0,

--- a/src/tables.h

+++ b/src/tables.h

@@ -111,6 +111,7 @@

 extern const int8_t dav1d_mc_subpel_filters[5][15][8];

 extern const int8_t dav1d_mc_warp_filter[193][8];

+extern const int16_t dav1d_resize_filter[64][8];

 extern const uint8_t dav1d_sm_weights[128];

 extern const int16_t dav1d_dr_intra_derivative[90];