shithub: dav1d

Download patch

ref: e2892ffa2dd1e893d0229c5fcbe0bbbee8e11c20
parent: d32eb2d935a31288c34ccaa42b09bc60eacdfd68
author: Ronald S. Bultje <[email protected]>
date: Tue Sep 4 13:32:00 EDT 2018

Initial decoder implementation.

With minor contributions from:
 - Jean-Baptiste Kempf <[email protected]>
 - Marvin Scholz <[email protected]>
 - Hugo Beauzée-Luyssen <[email protected]>

--- /dev/null
+++ b/include/common/attributes.h
@@ -1,0 +1,54 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_ATTRIBUTES_H__
+#define __DAV1D_COMMON_ATTRIBUTES_H__
+
+#include <stddef.h>
+
+/*
+ * API for variables, struct members (ALIGN()) like:
+ * uint8_t var[1][2][3][4]
+ * becomes:
+ * ALIGN(uint8_t var[1][2][3][4], alignment).
+ */
+#define ALIGN(line, align) \
+    line __attribute__((aligned(align)))
+
+/*
+ * API for stack alignment (ALIGN_STK_$align()) of variables like:
+ * uint8_t var[1][2][3][4]
+ * becomes:
+ * ALIGN_STK_$align(uint8_t, var, 1, [2][3][4])
+ */
+#define ALIGN_STK_32(type, var, sz1d, sznd) \
+    ALIGN(type var[sz1d]sznd, 32)
+// as long as stack is itself 16-byte aligned, this works (win64, gcc)
+#define ALIGN_STK_16(type, var, sz1d, sznd) \
+    ALIGN(type var[sz1d]sznd, 16)
+
+#endif /* __DAV1D_COMMON_ATTRIBUTES_H__ */
--- /dev/null
+++ b/include/common/bitdepth.h
@@ -1,0 +1,68 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_BITDEPTH_H__
+#define __DAV1D_COMMON_BITDEPTH_H__ 1
+
+#include <stdint.h>
+#include <string.h>
+
+#if !defined(BITDEPTH)
+typedef void pixel;
+typedef void coef;
+#elif BITDEPTH == 8
+typedef uint8_t pixel;
+typedef int16_t coef;
+#define pixel_copy memcpy
+#define pixel_set memset
+#define iclip_pixel iclip_u8
+#define PIX_HEX_FMT "%02x"
+#define bytefn(x) x##_8bpc
+#define bitfn(x) x##_8bpc
+#define PXSTRIDE(x) x
+#elif BITDEPTH == 10 || BITDEPTH == 12
+typedef uint16_t pixel;
+typedef int32_t coef;
+#define pixel_copy(a, b, c) memcpy(a, b, (c) << 1)
+#define iclip_pixel(x) iclip(x, 0, ((1 << BITDEPTH) - 1))
+static inline void pixel_set(pixel *const dst, const int val, const int num) {
+    for (int n = 0; n < num; n++)
+        dst[n] = val;
+}
+#define PIX_HEX_FMT "%03x"
+#define bytefn(x) x##_16bpc
+#if BITDEPTH == 10
+#define bitfn(x) x##_10bpc
+#else
+#define bitfn(x) x##_12bpc
+#endif
+#define PXSTRIDE(x) (x >> 1)
+#else
+#error invalid value for bitdepth
+#endif
+
+#endif /* __DAV1D_COMMON_BITDEPTH_H__ */
--- /dev/null
+++ b/include/common/dump.h
@@ -1,0 +1,75 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_DUMP_H__
+#define __DAV1D_COMMON_DUMP_H__
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "common/bitdepth.h"
+
+static inline void append_plane_to_file(const pixel *buf, ptrdiff_t stride,
+                                        int w, int h, const char *const file)
+{
+    FILE *const f = fopen(file, "ab");
+    while (h--) {
+        fwrite(buf, w * sizeof(pixel), 1, f);
+        buf += PXSTRIDE(stride);
+    }
+    fclose(f);
+}
+
+static inline void hex_dump(const pixel *buf, ptrdiff_t stride,
+                            int w, int h, const char *what)
+{
+    printf("%s\n", what);
+    while (h--) {
+        int x;
+        for (x = 0; x < w; x++)
+            printf(" " PIX_HEX_FMT, buf[x]);
+        buf += PXSTRIDE(stride);
+        printf("\n");
+    }
+}
+
+static inline void coef_dump(const coef *buf, const int w, const int h,
+                             const int len, const char *what)
+{
+    int y;
+    printf("%s\n", what);
+    for (y = 0; y < h; y++) {
+        int x;
+        for (x = 0; x < w; x++)
+            printf(" %*d", len, buf[x]);
+        buf += w;
+        printf("\n");
+    }
+}
+
+#endif /* __DAV1D_COMMON_DUMP_H__ */
--- /dev/null
+++ b/include/common/intops.h
@@ -1,0 +1,78 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_INTOPS_H__
+#define __DAV1D_COMMON_INTOPS_H__
+
+#include <stdint.h>
+
+static inline int imax(const int a, const int b) {
+    return a > b ? a : b;
+}
+
+static inline int imin(const int a, const int b) {
+    return a < b ? a : b;
+}
+
+static inline int iclip(const int v, const int min, const int max) {
+    return v < min ? min : v > max ? max : v;
+}
+
+static inline int iclip_u8(const int v) {
+    return iclip(v, 0, 255);
+}
+
+static inline int apply_sign(const int v, const int s) {
+    return s < 0 ? -v : v;
+}
+
+static inline int ulog2(const unsigned v) {
+    return 31 - __builtin_clz(v);
+}
+
+static inline int u64log2(const uint64_t v) {
+    return 63 - __builtin_clzll(v);
+}
+
+static inline unsigned rl16(const uint8_t *const ptr) {
+    return (ptr[1] << 8) | ptr[0];
+}
+
+static inline unsigned rl32(const uint8_t *const ptr) {
+    return (rl16(&ptr[2]) << 16) | rl16(ptr);
+}
+
+static inline unsigned inv_recenter(const unsigned r, const unsigned v) {
+    if (v > (r << 1))
+        return v;
+    else if ((v & 1) == 0)
+        return (v >> 1) + r;
+    else
+        return r - ((v + 1) >> 1);
+}
+
+#endif /* __DAV1D_COMMON_INTOPS_H__ */
--- /dev/null
+++ b/include/common/mem.h
@@ -1,0 +1,76 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_MEM_H__
+#define __DAV1D_COMMON_MEM_H__
+
+#include <assert.h>
+#include <stdlib.h>
+#include <malloc.h>
+
+/*
+ * Allocate 32-byte aligned memory. The return value can be released
+ * by calling the standard free() function.
+ */
+static inline void *dav1d_alloc_aligned(size_t sz, size_t align) {
+#ifdef HAVE_POSIX_MEMALIGN
+    void *ptr;
+    assert(!(align & (align - 1)));
+    if (posix_memalign(&ptr, align, sz)) return NULL;
+    return ptr;
+#elif defined(HAVE_ALIGNED_MALLOC)
+    return _aligned_malloc(sz, align);
+#else
+#error Missing aligned alloc implementation
+#endif
+}
+
+static inline void dav1d_free_aligned(void* ptr) {
+#ifdef HAVE_POSIX_MEMALIGN
+    free(ptr);
+#elif defined(HAVE_ALIGNED_MALLOC)
+    _aligned_free(ptr);
+#endif
+}
+
+static inline void dav1d_freep_aligned(void* ptr) {
+    void **mem = (void **) ptr;
+    if (*mem) {
+        dav1d_free_aligned(*mem);
+        *mem = NULL;
+    }
+}
+
+static inline void freep(void *ptr) {
+    void **mem = (void **) ptr;
+    if (*mem) {
+        free(*mem);
+        *mem = NULL;
+    }
+}
+
+#endif /* __DAV1D_COMMON_MEM_H__ */
--- /dev/null
+++ b/include/common/validate.h
@@ -1,0 +1,59 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_COMMON_VALIDATE_H__
+#define __DAV1D_COMMON_VALIDATE_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if defined(NDEBUG)
+#define debug_abort()
+#else
+#define debug_abort abort
+#endif
+
+#define validate_input_or_ret_with_msg(x, r, msg...) \
+    if (!(x)) { \
+        fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
+                #x, __PRETTY_FUNCTION__); \
+        fprintf(stderr, msg); \
+        debug_abort(); \
+        return r; \
+    }
+
+#define validate_input_or_ret(x, r) \
+    if (!(x)) { \
+        fprintf(stderr, "Input validation check \'%s\' failed in %s!\n", \
+                #x, __PRETTY_FUNCTION__); \
+        debug_abort(); \
+        return r; \
+    }
+
+#define validate_input(x) validate_input_or_ret(x, )
+
+#endif /* __DAV1D_COMMON_VALIDATE_H__ */
--- /dev/null
+++ b/include/dav1d/common.h
@@ -1,0 +1,43 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __COMMON_H__
+#define __COMMON_H__
+
+#ifndef DAV1D_API
+    #if defined _WIN32
+      #define DAV1D_API __declspec(dllexport)
+    #else
+      #if __GNUC__ >= 4
+        #define DAV1D_API __attribute__ ((visibility ("default")))
+      #else
+        #define DAV1D_API
+      #endif
+    #endif
+#endif
+
+#endif // __COMMON_H__
--- /dev/null
+++ b/include/dav1d/data.h
@@ -1,0 +1,52 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_DATA_H__
+#define __DAV1D_DATA_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common.h"
+
+typedef struct Dav1dData {
+    uint8_t *data; ///< data pointer
+    size_t sz; ///< data size
+    struct Dav1dRef *ref; ///< allocation origin
+} Dav1dData;
+
+/**
+ * Allocate data.
+ */
+DAV1D_API int dav1d_data_create(Dav1dData *data, size_t sz);
+
+/**
+ * Free data.
+ */
+DAV1D_API void dav1d_data_unref(Dav1dData *buf);
+
+#endif /* __DAV1D_DATA_H__ */
--- /dev/null
+++ b/include/dav1d/dav1d.h
@@ -1,0 +1,87 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_H__
+#define __DAV1D_H__
+
+#include "common.h"
+#include "picture.h"
+#include "data.h"
+
+typedef struct Dav1dContext Dav1dContext;
+typedef struct Dav1dRef Dav1dRef;
+
+typedef struct Dav1dSettings {
+    int n_frame_threads;
+    int n_tile_threads;
+} Dav1dSettings;
+
+/*
+ * Init the library.
+ */
+DAV1D_API void dav1d_init(void);
+
+/**
+ * Get library version.
+ */
+DAV1D_API const char *dav1d_version(void);
+
+/**
+ * Initialize settings to default values.
+ */
+DAV1D_API void dav1d_default_settings(Dav1dSettings *s);
+
+/**
+ * Open/allocate decoder instance.
+ *
+ * The resulting instance context will be placed in $c_out and can be used in
+ * iterative calls to dav1d_decode().
+ *
+ * You should free the context using dav1d_close() when you're done decoding.
+ *
+ * This returns < 0 (a negative errno code) on error, or 0 on success.
+ */
+DAV1D_API int dav1d_open(Dav1dContext **c_out, const Dav1dSettings *s);
+
+/**
+ * Decode one input frame. Library takes ownership of the passed-in reference.
+ * After that, it will return < 0 (a negative errno code, but not -EAGAIN) on
+ * failure, or 0 on success. If any decoded output frames are available, they
+ * will be placed in $out. The caller assumes ownership of the returned output
+ * picture.
+ *
+ * To flush the decoder (i.e. all input is finished), feed it NULL input data
+ * until it returns -EAGAIN.
+ */
+DAV1D_API int dav1d_decode(Dav1dContext *c, Dav1dData *in, Dav1dPicture *out);
+
+/**
+ * Close decoder instance, free all associated memory.
+ */
+DAV1D_API void dav1d_close(Dav1dContext *c);
+
+#endif /* __DAV1D_H__ */
--- /dev/null
+++ b/include/dav1d/picture.h
@@ -1,0 +1,145 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_PICTURE_H__
+#define __DAV1D_PICTURE_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common.h"
+
+enum Dav1dPixelLayout {
+    DAV1D_PIXEL_LAYOUT_I400, ///< monochrome
+    DAV1D_PIXEL_LAYOUT_I420, ///< 4:2:0 planar
+    DAV1D_PIXEL_LAYOUT_I422, ///< 4:2:2 planar
+    DAV1D_PIXEL_LAYOUT_I444, ///< 4:4:4 planar
+};
+
+enum Dav1dColorPrimaries {
+    DAV1D_COLOR_PRI_BT709 = 1,
+    DAV1D_COLOR_PRI_UNKNOWN = 2,
+    DAV1D_COLOR_PRI_BT470M = 4,
+    DAV1D_COLOR_PRI_BT470BG = 5,
+    DAV1D_COLOR_PRI_BT601 = 6,
+    DAV1D_COLOR_PRI_SMPTE240 = 7,
+    DAV1D_COLOR_PRI_FILM = 8,
+    DAV1D_COLOR_PRI_BT2020 = 9,
+    DAV1D_COLOR_PRI_XYZ = 10,
+    DAV1D_COLOR_PRI_SMPTE431 = 11,
+    DAV1D_COLOR_PRI_SMPTE432 = 12,
+    DAV1D_COLOR_PRI_EBU3213 = 22,
+};
+
+enum Dav1dTransferCharacteristics {
+    DAV1D_TRC_BT709 = 1,
+    DAV1D_TRC_UNKNOWN = 2,
+    DAV1D_TRC_BT470M = 4,
+    DAV1D_TRC_BT470BG = 5,
+    DAV1D_TRC_BT601 = 6,
+    DAV1D_TRC_SMPTE240 = 7,
+    DAV1D_TRC_LINEAR = 8,
+    DAV1D_TRC_LOG100 = 9,         ///< logarithmic (100:1 range)
+    DAV1D_TRC_LOG100_SQRT10 = 10, ///< lograithmic (100*sqrt(10):1 range)
+    DAV1D_TRC_IEC61966 = 11,
+    DAV1D_TRC_BT1361 = 12,
+    DAV1D_TRC_SRGB = 13,
+    DAV1D_TRC_BT2020_10BIT = 14,
+    DAV1D_TRC_BT2020_12BIT = 15,
+    DAV1D_TRC_SMPTE2084 = 16,     ///< PQ
+    DAV1D_TRC_SMPTE428 = 17,
+    DAV1D_TRC_HLG = 18,           ///< hybrid log/gamma (BT.2100 / ARIB STD-B67)
+};
+
+enum Dav1dMatrixCoefficients {
+    DAV1D_MC_IDENTITY = 0,
+    DAV1D_MC_BT709 = 1,
+    DAV1D_MC_UNKNOWN = 2,
+    DAV1D_MC_FCC = 4,
+    DAV1D_MC_BT470BG = 5,
+    DAV1D_MC_BT601 = 6,
+    DAV1D_MC_SMPTE240 = 7,
+    DAV1D_MC_SMPTE_YCGCO = 8,
+    DAV1D_MC_BT2020_NCL = 9,
+    DAV1D_MC_BT2020_CL = 10,
+    DAV1D_MC_SMPTE2085 = 11,
+    DAV1D_MC_CHROMAT_NCL = 12, ///< Chromaticity-derived
+    DAV1D_MC_CHROMAT_CL = 13,
+    DAV1D_MC_ICTCP = 14,
+};
+
+enum Dav1dChromaSamplePosition {
+    DAV1D_CHR_UNKNOWN = 0,
+    DAV1D_CHR_VERTICAL = 1,  ///< Horizontally co-located with luma(0, 0)
+                           ///< sample, between two vertical samples
+    DAV1D_CHR_COLOCATED = 2, ///< Co-located with luma(0, 0) sample
+};
+
+typedef struct Dav1dPictureParameters {
+    int w; ///< width (in pixels)
+    int h; ///< height (in pixels)
+    enum Dav1dPixelLayout layout; ///< format of the picture
+    int bpc; ///< bits per pixel component (8 or 10)
+
+    enum Dav1dColorPrimaries pri; ///< color primaries (av1)
+    enum Dav1dTransferCharacteristics trc; ///< transfer characteristics (av1)
+    enum Dav1dMatrixCoefficients mtrx; ///< matrix coefficients (av1)
+    enum Dav1dChromaSamplePosition chr; ///< chroma sample position (av1)
+    /**
+     * Pixel data uses JPEG pixel range ([0,255] for 8bits) instead of
+     * MPEG pixel range ([16,235] for 8bits luma, [16,240] for 8bits chroma).
+     */
+    int fullrange;
+} Dav1dPictureParameters;
+
+typedef struct Dav1dPicture {
+    /**
+     * Pointers to planar image data (Y is [0], U is [1], V is [2]). The data
+     * should be bytes (for 8 bpc) or words (for 10 bpc). In case of words
+     * containing 10 bpc image data, the pixels should be located in the LSB
+     * bits, so that values range between [0, 1023]; the upper bits should be
+     * zero'ed out.
+     */
+    void *data[3];
+    struct Dav1dRef *ref; ///< allocation origin
+
+    /**
+     * Number of bytes between 2 lines in data[] for luma [0] or chroma [1].
+     */
+    ptrdiff_t stride[2];
+
+    Dav1dPictureParameters p;
+
+    int poc; ///< frame number
+} Dav1dPicture;
+
+/**
+ * Release reference to a picture.
+ */
+DAV1D_API void dav1d_picture_unref(Dav1dPicture *p);
+
+#endif /* __DAV1D_PICTURE_H__ */
--- a/meson.build
+++ b/meson.build
@@ -37,9 +37,17 @@
     thread_dependency = cc.find_library('pthread')
 endif
 
-dav1d_inc_dirs = include_directories('include')
+dav1d_inc_dirs = include_directories(['include', 'include/dav1d'])
 
 #
+# Option handling
+#
+dav1d_bitdepths = get_option('bitdepths')
+foreach bitdepth : dav1d_bitdepths
+    cdata.set('CONFIG_@0@BPC'.format(bitdepth), 1)
+endforeach
+
+#
 # OS/Compiler feature detection
 #
 
@@ -121,13 +129,100 @@
 
 subdir('include')
 
+#
+# dav1d library
+#
+libdav1d_tmpl_sources = files(
+    'src/ipred.c',
+    'src/itx.c',
+    'src/ipred_prepare.c',
+    'src/lf_apply.c',
+    'src/loopfilter.c',
+    'src/mc.c',
+    'src/cdef_apply.c',
+    'src/cdef.c',
+    'src/lr_apply.c',
+    'src/looprestoration.c',
+    'src/recon.c'
+)
+
+# Build a helper library for each bitdepth
+bitdepth_objs = []
+foreach bitdepth : dav1d_bitdepths
+    bitdepth_lib = static_library(
+        'dav1d_bitdepth_@0@'.format(bitdepth),
+        libdav1d_tmpl_sources, config_h_target,
+        include_directories: dav1d_inc_dirs,
+        c_args: ['-DBITDEPTH=@0@'.format(bitdepth), stackalign_flag],
+        install: false,
+        build_by_default: false,
+    )
+    bitdepth_objs += bitdepth_lib.extract_all_objects()
+endforeach
+
+entrypoints_src = files(
+    'src/lib.c',
+    'src/thread_task.c'
+)
+entrypoints_lib = static_library(
+    'libdav1dentrypoint',
+    entrypoints_src,
+    include_directories: dav1d_inc_dirs,
+    c_args: [stackrealign_flag],
+    install: false,
+)
+entrypoints_objs = entrypoints_lib.extract_all_objects()
+
+libdav1d_sources = files(
+    'src/picture.c',
+    'src/data.c',
+    'src/ref.c',
+    'src/getbits.c',
+    'src/obu.c',
+    'src/decode.c',
+    'src/cdf.c',
+    'src/msac.c',
+    'src/tables.c',
+    'src/scan.c',
+    'src/dequant_tables.c',
+    'src/intra_edge.c',
+    'src/lf_mask.c',
+    'src/ref_mvs.c',
+    'src/warpmv.c',
+    'src/wedge.c',
+    'src/qm.c',
+)
+
 libdav1d = library('dav1d',
-    rev_target,
+    libdav1d_sources, rev_target,
     version: '0.0.1',
+    objects: [bitdepth_objs, entrypoints_objs],
     include_directories: dav1d_inc_dirs,
     c_args: [stackalign_flag],
     dependencies: thread_dependency,
     install: true
+)
+
+install_subdir('include/dav1d/', install_dir: 'include')
+
+#
+# dav1d cli tool
+#
+dav1d_sources = files(
+    'tools/dav1d.c',
+    'tools/dav1d_cli_parse.c',
+    'tools/input/input.c',
+    'tools/input/ivf.c',
+    'tools/output/md5.c',
+    'tools/output/output.c',
+    'tools/output/y4m2.c',
+    'tools/output/yuv.c'
+)
+
+dav1d = executable('dav1d',
+    dav1d_sources, rev_target,
+    link_with: libdav1d,
+    include_directories: [dav1d_inc_dirs, include_directories('tools')]
 )
 
 #
--- /dev/null
+++ b/meson_options.txt
@@ -1,0 +1,6 @@
+# General options
+
+option('bitdepths',
+    type: 'array',
+    choices: ['8', '10'],
+    description: 'Enable only specified bitdepths')
--- /dev/null
+++ b/src/cdef.c
@@ -1,0 +1,271 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/cdef.h"
+
+static const int8_t cdef_directions[8 /* dir */][2 /* pass */][2 /* y, x */] = {
+    { { -1, 1 }, { -2,  2 } },
+    { {  0, 1 }, { -1,  2 } },
+    { {  0, 1 }, {  0,  2 } },
+    { {  0, 1 }, {  1,  2 } },
+    { {  1, 1 }, {  2,  2 } },
+    { {  1, 0 }, {  2,  1 } },
+    { {  1, 0 }, {  2,  0 } },
+    { {  1, 0 }, {  2, -1 } }
+};
+static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
+static const uint8_t cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
+
+static inline int constrain(const int diff, const int threshold,
+                            const int damping)
+{
+    if (!threshold) return 0;
+    const int shift = imax(0, damping - ulog2(threshold));
+    return apply_sign(imin(abs(diff), imax(0, threshold - (abs(diff) >> shift))),
+                      diff);
+}
+
+/*
+ * <code partially copied from libaom>
+ */
+
+#define CDEF_VERY_LARGE (30000)
+
+static void fill(uint16_t *tmp, const ptrdiff_t stride,
+                 const int w, const int h)
+{
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++)
+            tmp[x] = CDEF_VERY_LARGE;
+        tmp += stride;
+    }
+}
+
+/* Smooth in the direction detected. */
+static void cdef_filter_block_c(pixel *const dst, const ptrdiff_t dst_stride,
+                                /*const*/ pixel *const top[2],
+                                const int w, const int h, const int pri_strength,
+                                const int sec_strength, const int dir,
+                                const int damping, const enum CdefEdgeFlags edges)
+{
+    const ptrdiff_t tmp_stride = w + 4;
+    uint16_t tmp[tmp_stride * (h + 4)];
+    const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
+    const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
+
+    // fill extended input buffer
+    int x_start = -2, x_end = w + 2, y_start = -2, y_end = h + 2;
+    if (!(edges & HAVE_TOP)) {
+        fill(tmp, tmp_stride, w + 4, 2);
+        y_start = 0;
+    }
+    if (!(edges & HAVE_BOTTOM)) {
+        fill(tmp + (h + 2) * tmp_stride, tmp_stride, w + 4, 2);
+        y_end -= 2;
+    }
+    if (!(edges & HAVE_LEFT)) {
+        fill(tmp + (2 + y_start) * tmp_stride, tmp_stride, 2, y_end - y_start);
+        x_start = 0;
+    }
+    if (!(edges & HAVE_RIGHT)) {
+        fill(tmp + (2 + y_start) * tmp_stride + w + 2, tmp_stride,
+             2, y_end - y_start);
+        x_end -= 2;
+    }
+    for (int y = y_start; y < 0; y++)
+        for (int x = x_start; x < x_end; x++)
+            tmp[(y + 2) * tmp_stride + (x + 2)] = top[y & 1][x];
+    for (int y = 0; y < y_end; y++)
+        for (int x = x_start; x < x_end; x++)
+            tmp[(y + 2) * tmp_stride + (x + 2)] = dst[y * PXSTRIDE(dst_stride) + x];
+
+    // run actual filter
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++) {
+            int sum = 0;
+            const int px = dst[y * PXSTRIDE(dst_stride) + x];
+            int max = px, min = px;
+            for (int k = 0; k < 2; k++) {
+#define extpx(y, x) tmp[((y) + 2) * tmp_stride + ((x) + 2)]
+                const int8_t *const off1 = cdef_directions[dir][k];
+                const int p0 = extpx(y + off1[0], x + off1[1]);
+                const int p1 = extpx(y - off1[0], x - off1[1]);
+                sum += pri_taps[k] * constrain(p0 - px, pri_strength, damping);
+                sum += pri_taps[k] * constrain(p1 - px, pri_strength, damping);
+                if (p0 != CDEF_VERY_LARGE) max = imax(p0, max);
+                if (p1 != CDEF_VERY_LARGE) max = imax(p1, max);
+                min = imin(p0, min);
+                min = imin(p1, min);
+                const int8_t *const off2 = cdef_directions[(dir + 2) & 7][k];
+                const int s0 = extpx(y + off2[0], x + off2[1]);
+                const int s1 = extpx(y - off2[0], x - off2[1]);
+                const int8_t *const off3 = cdef_directions[(dir + 6) & 7][k];
+                const int s2 = extpx(y + off3[0], x + off3[1]);
+                const int s3 = extpx(y - off3[0], x - off3[1]);
+#undef extpx
+                if (s0 != CDEF_VERY_LARGE) max = imax(s0, max);
+                if (s1 != CDEF_VERY_LARGE) max = imax(s1, max);
+                if (s2 != CDEF_VERY_LARGE) max = imax(s2, max);
+                if (s3 != CDEF_VERY_LARGE) max = imax(s3, max);
+                min = imin(s0, min);
+                min = imin(s1, min);
+                min = imin(s2, min);
+                min = imin(s3, min);
+                sum += sec_taps[k] * constrain(s0 - px, sec_strength, damping);
+                sum += sec_taps[k] * constrain(s1 - px, sec_strength, damping);
+                sum += sec_taps[k] * constrain(s2 - px, sec_strength, damping);
+                sum += sec_taps[k] * constrain(s3 - px, sec_strength, damping);
+            }
+            dst[y * PXSTRIDE(dst_stride) + x] =
+                iclip(px + ((8 + sum - (sum < 0)) >> 4), min, max);
+        }
+    }
+}
+
+/*
+ * </code partially copied from libaom>
+ */
+
+#define cdef_fn(w, h) \
+static void cdef_filter_block_##w##x##h##_c(pixel *const dst, \
+                                            const ptrdiff_t stride, \
+                                            /*const*/ pixel *const top[2], \
+                                            const int pri_strength, \
+                                            const int sec_strength, \
+                                            const int dir, \
+                                            const int damping, \
+                                            const enum CdefEdgeFlags edges) \
+{ \
+    cdef_filter_block_c(dst, stride, top, w, h, pri_strength, sec_strength, \
+                        dir, damping, edges); \
+}
+
+cdef_fn(4, 4);
+cdef_fn(4, 8);
+cdef_fn(8, 8);
+
+/*
+ * <code copied from libaom>
+ */
+
+/* Detect direction. 0 means 45-degree up-right, 2 is horizontal, and so on.
+   The search minimizes the weighted variance along all the lines in a
+   particular direction, i.e. the squared error between the input and a
+   "predicted" block where each pixel is replaced by the average along a line
+   in a particular direction. Since each direction have the same sum(x^2) term,
+   that term is never computed. See Section 2, step 2, of:
+   http://jmvalin.ca/notes/intra_paint.pdf */
+static const uint16_t div_table[] = {
+    0, 840, 420, 280, 210, 168, 140, 120, 105
+};
+static int cdef_find_dir_c(const pixel *img, const ptrdiff_t stride,
+                           unsigned *const var)
+{
+    int i;
+    int32_t cost[8] = { 0 };
+    int partial[8][15] = { { 0 } };
+    int32_t best_cost = 0;
+    int best_dir = 0;
+    /* Instead of dividing by n between 2 and 8, we multiply by 3*5*7*8/n.
+     The output is then 840 times larger, but we don't care for finding
+     the max. */
+    for (i = 0; i < 8; i++) {
+        int j;
+        for (j = 0; j < 8; j++) {
+            int x;
+            /* We subtract 128 here to reduce the maximum range of the squared
+             partial sums. */
+            x = (img[i * PXSTRIDE(stride) + j] >> (BITDEPTH - 8)) - 128;
+            partial[0][i + j] += x;
+            partial[1][i + j / 2] += x;
+            partial[2][i] += x;
+            partial[3][3 + i - j / 2] += x;
+            partial[4][7 + i - j] += x;
+            partial[5][3 - i / 2 + j] += x;
+            partial[6][j] += x;
+            partial[7][i / 2 + j] += x;
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        cost[2] += partial[2][i] * partial[2][i];
+        cost[6] += partial[6][i] * partial[6][i];
+    }
+    cost[2] *= div_table[8];
+    cost[6] *= div_table[8];
+    for (i = 0; i < 7; i++) {
+        cost[0] += (partial[0][i] * partial[0][i] +
+                    partial[0][14 - i] * partial[0][14 - i]) *
+                   div_table[i + 1];
+        cost[4] += (partial[4][i] * partial[4][i] +
+                    partial[4][14 - i] * partial[4][14 - i]) *
+                   div_table[i + 1];
+    }
+    cost[0] += partial[0][7] * partial[0][7] * div_table[8];
+    cost[4] += partial[4][7] * partial[4][7] * div_table[8];
+    for (i = 1; i < 8; i += 2) {
+        int j;
+        for (j = 0; j < 4 + 1; j++) {
+            cost[i] += partial[i][3 + j] * partial[i][3 + j];
+        }
+        cost[i] *= div_table[8];
+        for (j = 0; j < 4 - 1; j++) {
+            cost[i] += (partial[i][j] * partial[i][j] +
+                        partial[i][10 - j] * partial[i][10 - j]) *
+                       div_table[2 * j + 2];
+        }
+    }
+    for (i = 0; i < 8; i++) {
+        if (cost[i] > best_cost) {
+            best_cost = cost[i];
+            best_dir = i;
+        }
+    }
+    /* Difference between the optimal variance and the variance along the
+     orthogonal direction. Again, the sum(x^2) terms cancel out. */
+    *var = best_cost - cost[(best_dir + 4) & 7];
+    /* We'd normally divide by 840, but dividing by 1024 is close enough
+     for what we're going to do with this. */
+    *var >>= 10;
+    return best_dir;
+}
+
+/*
+ * </code copied from libaom>
+ */
+
+void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
+    c->dir = cdef_find_dir_c;
+    c->fb[0] = cdef_filter_block_8x8_c;
+    c->fb[1] = cdef_filter_block_4x8_c;
+    c->fb[2] = cdef_filter_block_4x4_c;
+}
--- /dev/null
+++ b/src/cdef.h
@@ -1,0 +1,62 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_CDEF_H__
+#define __DAV1D_SRC_CDEF_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common/bitdepth.h"
+
+enum CdefEdgeFlags {
+    HAVE_LEFT = 1 << 0,
+    HAVE_RIGHT = 1 << 1,
+    HAVE_TOP = 1 << 2,
+    HAVE_BOTTOM = 1 << 3,
+};
+
+// CDEF operates entirely on pre-filter data; if bottom/right edges are
+// present (according to $edges), then the pre-filter data is located in
+// $dst. However, the edge pixels above $dst may be post-filter, so in
+// order to get access to pre-filter top pixels, use $top.
+typedef void (*cdef_fn)(pixel *dst, ptrdiff_t stride,
+                        /*const*/ pixel *const top[2],
+                        int pri_strength, int sec_strength,
+                        int dir, int damping, enum CdefEdgeFlags edges);
+typedef int (*cdef_dir_fn)(const pixel *dst, ptrdiff_t stride,
+                           unsigned *var);
+
+typedef struct Dav1dCdefDSPContext {
+    cdef_dir_fn dir;
+    cdef_fn fb[3 /* 444/luma, 422, 420 */];
+} Dav1dCdefDSPContext;
+
+void dav1d_cdef_dsp_init_8bpc(Dav1dCdefDSPContext *c);
+void dav1d_cdef_dsp_init_10bpc(Dav1dCdefDSPContext *c);
+
+#endif /* __DAV1D_SRC_CDEF_H__ */
--- /dev/null
+++ b/src/cdef_apply.c
@@ -1,0 +1,237 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/cdef_apply.h"
+
+static void backup2lines(pixel *const dst[3][2],
+                         /*const*/ pixel *const src[3],
+                         const ptrdiff_t src_stride[2], int y_off, int w,
+                         const enum Dav1dPixelLayout layout)
+{
+    pixel_copy(dst[0][0], src[0] + (y_off - 2) * PXSTRIDE(src_stride[0]), w);
+    pixel_copy(dst[0][1], src[0] + (y_off - 1) * PXSTRIDE(src_stride[0]), w);
+
+    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+
+    w >>= ss_hor;
+    y_off >>= ss_ver;
+    pixel_copy(dst[1][0], src[1] + (y_off - 2) * PXSTRIDE(src_stride[1]), w);
+    pixel_copy(dst[1][1], src[1] + (y_off - 1) * PXSTRIDE(src_stride[1]), w);
+    pixel_copy(dst[2][0], src[2] + (y_off - 2) * PXSTRIDE(src_stride[1]), w);
+    pixel_copy(dst[2][1], src[2] + (y_off - 1) * PXSTRIDE(src_stride[1]), w);
+}
+
+static void backup2x8(pixel dst[3][8][2],
+                      /*const*/ pixel *const src[3],
+                      const ptrdiff_t src_stride[2], int x_off,
+                      const enum Dav1dPixelLayout layout)
+{
+    for (int y = 0, y_off = 0; y < 8; y++, y_off += PXSTRIDE(src_stride[0]))
+        pixel_copy(dst[0][y], &src[0][y_off + x_off - 2], 2);
+
+    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+
+    x_off >>= ss_hor;
+    for (int y = 0, y_off = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(src_stride[1])) {
+        pixel_copy(dst[1][y], &src[1][y_off + x_off - 2], 2);
+        pixel_copy(dst[2][y], &src[2][y_off + x_off - 2], 2);
+    }
+}
+
+static void restore2x8(pixel *const dst[3],
+                       const ptrdiff_t dst_stride[2],
+                       const pixel src[3][8][2], const enum Dav1dPixelLayout layout)
+{
+    for (int y = 0, y_off = 0; y < 8; y++, y_off += PXSTRIDE(dst_stride[0]))
+        pixel_copy(&dst[0][y_off - 2], src[0][y], 2);
+
+    if (layout == DAV1D_PIXEL_LAYOUT_I400) return;
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+
+    for (int y = 0, y_off = 0; y < (8 >> ss_ver); y++, y_off += PXSTRIDE(dst_stride[1])) {
+        pixel_copy(&dst[1][y_off - 2], src[1][y], 2);
+        pixel_copy(&dst[2][y_off - 2], src[2][y], 2);
+    }
+}
+
+static int adjust_strength(const int strength, const unsigned var) {
+    if (!var) return 0;
+    const int i = var >> 6 ? imin(ulog2(var >> 6), 12) : 0;
+    return (strength * (4 + i) + 8) >> 4;
+}
+
+void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *const f,
+                             pixel *const p[3],
+                             const Av1Filter *const lflvl,
+                             const int by_start, const int by_end)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+    enum CdefEdgeFlags edges = HAVE_BOTTOM | (by_start > 0 ? HAVE_TOP : 0);
+    pixel *ptrs[3] = { p[0], p[1], p[2] };
+    const int sbsz = 16;
+    const int sb64w = f->sb128w << 1;
+    const int damping = f->frame_hdr.cdef.damping + BITDEPTH - 8;
+    const enum Dav1dPixelLayout layout = f->cur.p.p.layout;
+    const int uv_idx = DAV1D_PIXEL_LAYOUT_I444 - layout;
+    const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+
+    // FIXME a design improvement that could be made here is to keep a set of
+    // flags for each block position on whether the block was filtered; if not,
+    // the backup of pre-filter data is empty, and the restore is therefore
+    // unnecessary as well.
+
+    for (int by = by_start; by < by_end; by += 2, edges |= HAVE_TOP) {
+        const int tf = f->lf.top_pre_cdef_toggle;
+        if (by + 2 >= f->bh) edges &= ~HAVE_BOTTOM;
+
+        if (edges & HAVE_BOTTOM) {
+            // backup pre-filter data for next iteration
+            backup2lines(f->lf.cdef_line_ptr[!tf], ptrs, f->cur.p.stride,
+                         8, f->bw * 4, layout);
+        }
+
+        pixel lr_bak[2 /* idx */][3 /* plane */][8 /* y */][2 /* x */];
+        pixel *iptrs[3] = { ptrs[0], ptrs[1], ptrs[2] };
+        edges &= ~HAVE_LEFT;
+        edges |= HAVE_RIGHT;
+        for (int sbx = 0, last_skip = 1; sbx < sb64w; sbx++, edges |= HAVE_LEFT) {
+            const int sb128x = sbx >>1;
+            const int sb64_idx = ((by & sbsz) >> 3) + (sbx & 1);
+            const int cdef_idx = lflvl[sb128x].cdef_idx[sb64_idx];
+            if (cdef_idx == -1 ||
+                (!f->frame_hdr.cdef.y_strength[cdef_idx] &&
+                 !f->frame_hdr.cdef.uv_strength[cdef_idx]))
+            {
+                last_skip = 1;
+                goto next_sb;
+            }
+
+            const int y_lvl = f->frame_hdr.cdef.y_strength[cdef_idx];
+            const int uv_lvl = f->frame_hdr.cdef.uv_strength[cdef_idx];
+            pixel *bptrs[3] = { iptrs[0], iptrs[1], iptrs[2] };
+            for (int bx = sbx * sbsz; bx < imin((sbx + 1) * sbsz, f->bw);
+                 bx += 2, edges |= HAVE_LEFT)
+            {
+                if (bx + 2 >= f->bw) edges &= ~HAVE_RIGHT;
+
+                // check if this 8x8 block had any coded coefficients; if not,
+                // go to the next block
+                const unsigned bx_mask = 3U << (bx & 30);
+                const int by_idx = by & 30;
+                if (!((lflvl[sb128x].noskip_mask[by_idx + 0] |
+                       lflvl[sb128x].noskip_mask[by_idx + 1]) & bx_mask))
+                {
+                    last_skip = 1;
+                    goto next_b;
+                }
+
+                if (!last_skip) {
+                    // backup post-filter data (will be restored at the end)
+                    backup2x8(lr_bak[1], bptrs, f->cur.p.stride, 0, layout);
+
+                    // restore pre-filter data from last iteration
+                    restore2x8(bptrs, f->cur.p.stride, lr_bak[0], layout);
+                }
+                if (edges & HAVE_RIGHT) {
+                    // backup pre-filter data for next iteration
+                    backup2x8(lr_bak[0], bptrs, f->cur.p.stride, 8, layout);
+                }
+
+                // the actual filter
+                const int y_pri_lvl = (y_lvl >> 2) << (BITDEPTH - 8);
+                int y_sec_lvl = y_lvl & 3;
+                y_sec_lvl += y_sec_lvl == 3;
+                y_sec_lvl <<= BITDEPTH - 8;
+                const int uv_pri_lvl = (uv_lvl >> 2) << (BITDEPTH - 8);
+                int uv_sec_lvl = uv_lvl & 3;
+                uv_sec_lvl += uv_sec_lvl == 3;
+                uv_sec_lvl <<= BITDEPTH - 8;
+                unsigned variance;
+                const int dir = dsp->cdef.dir(bptrs[0], f->cur.p.stride[0],
+                                              &variance);
+                if (y_lvl) {
+                    dsp->cdef.fb[0](bptrs[0], f->cur.p.stride[0],
+                                    (pixel *const [2]) {
+                                        &f->lf.cdef_line_ptr[tf][0][0][bx * 4],
+                                        &f->lf.cdef_line_ptr[tf][0][1][bx * 4],
+                                    },
+                                    adjust_strength(y_pri_lvl, variance),
+                                    y_sec_lvl, y_pri_lvl ? dir : 0,
+                                    damping, edges);
+                }
+                if (uv_lvl && has_chroma) {
+                    const int uvdir =
+                        f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I422 ? dir :
+                        ((uint8_t[]) { 7, 0, 2, 4, 5, 6, 6, 6 })[dir];
+                    for (int pl = 1; pl <= 2; pl++) {
+                        dsp->cdef.fb[uv_idx](bptrs[pl], f->cur.p.stride[1],
+                                             (pixel *const [2]) {
+                                                 &f->lf.cdef_line_ptr[tf][pl][0][bx * 4 >> ss_hor],
+                                                 &f->lf.cdef_line_ptr[tf][pl][1][bx * 4 >> ss_hor],
+                                             },
+                                             uv_pri_lvl, uv_sec_lvl,
+                                             uv_pri_lvl ? uvdir : 0,
+                                             damping - 1, edges);
+                    }
+                }
+
+                if (!last_skip) {
+                    // restore post-filter data from the beginning of this loop
+                    restore2x8(bptrs, f->cur.p.stride, lr_bak[1], layout);
+                }
+                last_skip = 0;
+
+            next_b:
+                bptrs[0] += 8;
+                bptrs[1] += 8 >> ss_hor;
+                bptrs[2] += 8 >> ss_hor;
+            }
+
+        next_sb:
+            iptrs[0] += sbsz * 4;
+            iptrs[1] += sbsz * 4 >> ss_hor;
+            iptrs[2] += sbsz * 4 >> ss_hor;
+        }
+
+        ptrs[0] += 8 * PXSTRIDE(f->cur.p.stride[0]);
+        ptrs[1] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+        ptrs[2] += 8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+        f->lf.top_pre_cdef_toggle ^= 1;
+    }
+}
--- /dev/null
+++ b/src/cdef_apply.h
@@ -1,0 +1,38 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_CDEF_APPLY_H__
+#define __DAV1D_SRC_CDEF_APPLY_H__
+
+#include "common/bitdepth.h"
+
+#include "src/internal.h"
+
+void bytefn(dav1d_cdef_brow)(Dav1dFrameContext *f, pixel *const p[3],
+                             const Av1Filter *lflvl, int by_start, int by_end);
+
+#endif /* __DAV1D_SRC_CDEF_APPLY_H__ */
--- /dev/null
+++ b/src/cdf.c
@@ -1,0 +1,4251 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/cdf.h"
+
+#define AOM_ICDF(x) (32768-(x))
+
+#define AOM_CDF2(a) \
+    AOM_ICDF(a)
+#define AOM_CDF3(a,b) \
+    AOM_ICDF(a), AOM_ICDF(b)
+#define AOM_CDF4(a,b,c) \
+    AOM_ICDF(a), AOM_CDF3(b,c)
+#define AOM_CDF5(a,b,c,d) \
+    AOM_ICDF(a), AOM_CDF4(b,c,d)
+#define AOM_CDF6(a,b,c,d,e) \
+    AOM_ICDF(a), AOM_CDF5(b,c,d,e)
+#define AOM_CDF7(a,b,c,d,e,f) \
+    AOM_ICDF(a), AOM_CDF6(b,c,d,e,f)
+#define AOM_CDF8(a,b,c,d,e,f,g) \
+    AOM_ICDF(a), AOM_CDF7(b,c,d,e,f,g)
+#define AOM_CDF9(a,b,c,d,e,f,g,h) \
+    AOM_ICDF(a), AOM_CDF8(b,c,d,e,f,g,h)
+#define AOM_CDF10(a,b,c,d,e,f,g,h,i) \
+    AOM_ICDF(a), AOM_CDF9(b,c,d,e,f,g,h,i)
+#define AOM_CDF11(a,b,c,d,e,f,g,h,i,j) \
+    AOM_ICDF(a), AOM_CDF10(b,c,d,e,f,g,h,i,j)
+#define AOM_CDF12(a,b,c,d,e,f,g,h,i,j,k) \
+    AOM_ICDF(a), AOM_CDF11(b,c,d,e,f,g,h,i,j,k)
+#define AOM_CDF13(a,b,c,d,e,f,g,h,i,j,k,l) \
+    AOM_ICDF(a), AOM_CDF12(b,c,d,e,f,g,h,i,j,k,l)
+#define AOM_CDF14(a,b,c,d,e,f,g,h,i,j,k,l,m) \
+    AOM_ICDF(a), AOM_CDF13(b,c,d,e,f,g,h,i,j,k,l,m)
+#define AOM_CDF15(a,b,c,d,e,f,g,h,i,j,k,l,m,n) \
+    AOM_ICDF(a), AOM_CDF14(b,c,d,e,f,g,h,i,j,k,l,m,n)
+#define AOM_CDF16(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o) \
+    AOM_ICDF(a), AOM_CDF15(b,c,d,e,f,g,h,i,j,k,l,m,n,o)
+
+static const CdfModeContext av1_default_cdf = {
+    .y_mode = {
+        { AOM_CDF13(22801, 23489, 24293, 24756, 25601, 26123,
+                    26606, 27418, 27945, 29228, 29685, 30349) },
+        { AOM_CDF13(18673, 19845, 22631, 23318, 23950, 24649,
+                    25527, 27364, 28152, 29701, 29984, 30852) },
+        { AOM_CDF13(19770, 20979, 23396, 23939, 24241, 24654,
+                    25136, 27073, 27830, 29360, 29730, 30659) },
+        { AOM_CDF13(20155, 21301, 22838, 23178, 23261, 23533,
+                    23703, 24804, 25352, 26575, 27016, 28049) },
+    }, .use_filter_intra = {
+        [BS_4x4]     = { AOM_CDF2(4621) },
+        [BS_4x8]     = { AOM_CDF2(6743) },
+        [BS_8x4]     = { AOM_CDF2(5893) },
+        [BS_8x8]     = { AOM_CDF2(7866) },
+        [BS_8x16]    = { AOM_CDF2(12551) },
+        [BS_16x8]    = { AOM_CDF2(9394) },
+        [BS_16x16]   = { AOM_CDF2(12408) },
+        [BS_16x32]   = { AOM_CDF2(14301) },
+        [BS_32x16]   = { AOM_CDF2(12756) },
+        [BS_32x32]   = { AOM_CDF2(22343) },
+        [BS_32x64]   = { AOM_CDF2(16384) },
+        [BS_64x32]   = { AOM_CDF2(16384) },
+        [BS_64x64]   = { AOM_CDF2(16384) },
+        [BS_64x128]  = { AOM_CDF2(16384) },
+        [BS_128x64]  = { AOM_CDF2(16384) },
+        [BS_128x128] = { AOM_CDF2(16384) },
+        [BS_4x16]    = { AOM_CDF2(12770) },
+        [BS_16x4]    = { AOM_CDF2(10368) },
+        [BS_8x32]    = { AOM_CDF2(20229) },
+        [BS_32x8]    = { AOM_CDF2(18101) },
+        [BS_16x64]   = { AOM_CDF2(16384) },
+        [BS_64x16]   = { AOM_CDF2(16384) },
+    }, .filter_intra = {
+        AOM_CDF5(8949, 12776, 17211, 29558),
+    }, .uv_mode = {
+        {
+            { AOM_CDF13(22631, 24152, 25378, 25661, 25986, 26520, 27055, 27923,
+                        28244, 30059, 30941, 31961) },
+            { AOM_CDF13(9513, 26881, 26973, 27046, 27118, 27664, 27739, 27824,
+                        28359, 29505, 29800, 31796) },
+            { AOM_CDF13(9845, 9915, 28663, 28704, 28757, 28780, 29198, 29822,
+                        29854, 30764, 31777, 32029) },
+            { AOM_CDF13(13639, 13897, 14171, 25331, 25606, 25727, 25953, 27148,
+                        28577, 30612, 31355, 32493) },
+            { AOM_CDF13(9764, 9835, 9930, 9954, 25386, 27053, 27958, 28148,
+                        28243, 31101, 31744, 32363) },
+            { AOM_CDF13(11825, 13589, 13677, 13720, 15048, 29213, 29301, 29458,
+                        29711, 31161, 31441, 32550) },
+            { AOM_CDF13(14175, 14399, 16608, 16821, 17718, 17775, 28551, 30200,
+                        30245, 31837, 32342, 32667) },
+            { AOM_CDF13(12885, 13038, 14978, 15590, 15673, 15748, 16176, 29128,
+                        29267, 30643, 31961, 32461) },
+            { AOM_CDF13(12026, 13661, 13874, 15305, 15490, 15726, 15995, 16273,
+                        28443, 30388, 30767, 32416) },
+            { AOM_CDF13(19052, 19840, 20579, 20916, 21150, 21467, 21885, 22719,
+                        23174, 28861, 30379, 32175) },
+            { AOM_CDF13(18627, 19649, 20974, 21219, 21492, 21816, 22199, 23119,
+                        23527, 27053, 31397, 32148) },
+            { AOM_CDF13(17026, 19004, 19997, 20339, 20586, 21103, 21349, 21907,
+                        22482, 25896, 26541, 31819) },
+            { AOM_CDF13(12124, 13759, 14959, 14992, 15007, 15051, 15078, 15166,
+                        15255, 15753, 16039, 16606) }
+        }, {
+            { AOM_CDF14(10407, 11208, 12900, 13181, 13823, 14175, 14899, 15656,
+                      15986, 20086, 20995, 22455, 24212) },
+            { AOM_CDF14(4532, 19780, 20057, 20215, 20428, 21071, 21199, 21451,
+                        22099, 24228, 24693, 27032, 29472) },
+            { AOM_CDF14(5273, 5379, 20177, 20270, 20385, 20439, 20949, 21695,
+                        21774, 23138, 24256, 24703, 26679) },
+            { AOM_CDF14(6740, 7167, 7662, 14152, 14536, 14785, 15034, 16741,
+                        18371, 21520, 22206, 23389, 24182) },
+            { AOM_CDF14(4987, 5368, 5928, 6068, 19114, 20315, 21857, 22253,
+                        22411, 24911, 25380, 26027, 26376) },
+            { AOM_CDF14(5370, 6889, 7247, 7393, 9498, 21114, 21402, 21753,
+                        21981, 24780, 25386, 26517, 27176) },
+            { AOM_CDF14(4816, 4961, 7204, 7326, 8765, 8930, 20169, 20682,
+                        20803, 23188, 23763, 24455, 24940) },
+            { AOM_CDF14(6608, 6740, 8529, 9049, 9257, 9356, 9735, 18827,
+                        19059, 22336, 23204, 23964, 24793) },
+            { AOM_CDF14(5998, 7419, 7781, 8933, 9255, 9549, 9753, 10417,
+                        18898, 22494, 23139, 24764, 25989) },
+            { AOM_CDF14(10660, 11298, 12550, 12957, 13322, 13624, 14040, 15004,
+                        15534, 20714, 21789, 23443, 24861) },
+            { AOM_CDF14(10522, 11530, 12552, 12963, 13378, 13779, 14245, 15235,
+                        15902, 20102, 22696, 23774, 25838) },
+            { AOM_CDF14(10099, 10691, 12639, 13049, 13386, 13665, 14125, 15163,
+                        15636, 19676, 20474, 23519, 25208) },
+            { AOM_CDF14(3144, 5087, 7382, 7504, 7593, 7690, 7801, 8064,
+                        8232, 9248, 9875, 10521, 29048) }
+        }
+    }, .angle_delta = {
+        { AOM_CDF7(2180, 5032, 7567, 22776, 26989, 30217) },
+        { AOM_CDF7(2301, 5608, 8801, 23487, 26974, 30330) },
+        { AOM_CDF7(3780, 11018, 13699, 19354, 23083, 31286) },
+        { AOM_CDF7(4581, 11226, 15147, 17138, 21834, 28397) },
+        { AOM_CDF7(1737, 10927, 14509, 19588, 22745, 28823) },
+        { AOM_CDF7(2664, 10176, 12485, 17650, 21600, 30495) },
+        { AOM_CDF7(2240, 11096, 15453, 20341, 22561, 28917) },
+        { AOM_CDF7(3605, 10428, 12459, 17676, 21244, 30655) }
+    }, .filter = {
+        {
+            { AOM_CDF3(31935, 32720) },
+            { AOM_CDF3(5568, 32719) },
+            { AOM_CDF3(422, 2938) },
+            { AOM_CDF3(28244, 32608) },
+            { AOM_CDF3(31206, 31953) },
+            { AOM_CDF3(4862, 32121) },
+            { AOM_CDF3(770, 1152) },
+            { AOM_CDF3(20889, 25637) },
+        }, {
+            { AOM_CDF3(31910, 32724) },
+            { AOM_CDF3(4120, 32712) },
+            { AOM_CDF3(305, 2247) },
+            { AOM_CDF3(27403, 32636) },
+            { AOM_CDF3(31022, 32009) },
+            { AOM_CDF3(2963, 32093) },
+            { AOM_CDF3(601, 943) },
+            { AOM_CDF3(14969, 21398) }
+        }
+    }, .newmv_mode = {
+        { AOM_CDF2(24035) },
+        { AOM_CDF2(16630) },
+        { AOM_CDF2(15339) },
+        { AOM_CDF2(8386) },
+        { AOM_CDF2(12222) },
+        { AOM_CDF2(4676) },
+    }, .globalmv_mode = {
+        { AOM_CDF2(2175) },
+        { AOM_CDF2(1054) }
+    }, .refmv_mode = {
+        { AOM_CDF2(23974) },
+        { AOM_CDF2(24188) },
+        { AOM_CDF2(17848) },
+        { AOM_CDF2(28622) },
+        { AOM_CDF2(24312) },
+        { AOM_CDF2(19923) }
+    }, .drl_bit = {
+        { AOM_CDF2(13104) },
+        { AOM_CDF2(24560) },
+        { AOM_CDF2(18945) }
+    }, .comp_inter_mode = {
+        { AOM_CDF8(7760, 13823, 15808, 17641, 19156, 20666, 26891) },
+        { AOM_CDF8(10730, 19452, 21145, 22749, 24039, 25131, 28724) },
+        { AOM_CDF8(10664, 20221, 21588, 22906, 24295, 25387, 28436) },
+        { AOM_CDF8(13298, 16984, 20471, 24182, 25067, 25736, 26422) },
+        { AOM_CDF8(18904, 23325, 25242, 27432, 27898, 28258, 30758) },
+        { AOM_CDF8(10725, 17454, 20124, 22820, 24195, 25168, 26046) },
+        { AOM_CDF8(17125, 24273, 25814, 27492, 28214, 28704, 30592) },
+        { AOM_CDF8(13046, 23214, 24505, 25942, 27435, 28442, 29330) }
+    }, .intra = {
+        { AOM_CDF2(806) },
+        { AOM_CDF2(16662) },
+        { AOM_CDF2(20186) },
+        { AOM_CDF2(26538) }
+    }, .comp = {
+        { AOM_CDF2(26828) },
+        { AOM_CDF2(24035) },
+        { AOM_CDF2(12031) },
+        { AOM_CDF2(10640) },
+        { AOM_CDF2(2901) }
+    }, .comp_dir = {
+        { AOM_CDF2(1198) },
+        { AOM_CDF2(2070) },
+        { AOM_CDF2(9166) },
+        { AOM_CDF2(7499) },
+        { AOM_CDF2(22475) }
+    }, .jnt_comp = {
+        { AOM_CDF2(18244) },
+        { AOM_CDF2(12865) },
+        { AOM_CDF2(7053) },
+        { AOM_CDF2(13259) },
+        { AOM_CDF2(9334) },
+        { AOM_CDF2(4644) }
+    }, .mask_comp = {
+        { AOM_CDF2(26607) },
+        { AOM_CDF2(22891) },
+        { AOM_CDF2(18840) },
+        { AOM_CDF2(24594) },
+        { AOM_CDF2(19934) },
+        { AOM_CDF2(22674) }
+    }, .wedge_comp = {
+        { AOM_CDF2(23431) },
+        { AOM_CDF2(13171) },
+        { AOM_CDF2(11470) },
+        { AOM_CDF2(9770) },
+        { AOM_CDF2(9100) },
+        { AOM_CDF2(8233) },
+        { AOM_CDF2(6172) },
+        { AOM_CDF2(11820) },
+        { AOM_CDF2(7701) },
+    }, .wedge_idx = {
+        { AOM_CDF16(2438, 4440, 6599, 8663, 11005, 12874, 15751, 18094,
+                    20359, 22362, 24127, 25702, 27752, 29450, 31171) },
+        { AOM_CDF16(806, 3266, 6005, 6738, 7218, 7367, 7771, 14588,
+                    16323, 17367, 18452, 19422, 22839, 26127, 29629) },
+        { AOM_CDF16(2779, 3738, 4683, 7213, 7775, 8017, 8655, 14357,
+                    17939, 21332, 24520, 27470, 29456, 30529, 31656) },
+        { AOM_CDF16(1684, 3625, 5675, 7108, 9302, 11274, 14429, 17144,
+                    19163, 20961, 22884, 24471, 26719, 28714, 30877) },
+        { AOM_CDF16(1142, 3491, 6277, 7314, 8089, 8355, 9023, 13624,
+                    15369, 16730, 18114, 19313, 22521, 26012, 29550) },
+        { AOM_CDF16(2742, 4195, 5727, 8035, 8980, 9336, 10146, 14124,
+                    17270, 20533, 23434, 25972, 27944, 29570, 31416) },
+        { AOM_CDF16(1727, 3948, 6101, 7796, 9841, 12344, 15766, 18944,
+                    20638, 22038, 23963, 25311, 26988, 28766, 31012) },
+        { AOM_CDF16(154, 987, 1925, 2051, 2088, 2111, 2151, 23033,
+                    23703, 24284, 24985, 25684, 27259, 28883, 30911) },
+        { AOM_CDF16(1135, 1322, 1493, 2635, 2696, 2737, 2770, 21016,
+                    22935, 25057, 27251, 29173, 30089, 30960, 31933) },
+    }, .interintra = {
+        { AOM_CDF2(16384) },
+        { AOM_CDF2(26887) },
+        { AOM_CDF2(27597) },
+        { AOM_CDF2(30237) }
+    }, .interintra_mode = {
+        { AOM_CDF4(8192, 16384, 24576) },
+        { AOM_CDF4(1875, 11082, 27332) },
+        { AOM_CDF4(2473, 9996, 26388) },
+        { AOM_CDF4(4238, 11537, 25926) }
+    }, .interintra_wedge = {
+        { AOM_CDF2(20036) },
+        { AOM_CDF2(24957) },
+        { AOM_CDF2(26704) },
+        { AOM_CDF2(27530) },
+        { AOM_CDF2(29564) },
+        { AOM_CDF2(29444) },
+        { AOM_CDF2(26872) },
+    }, .ref = {
+        {
+            { AOM_CDF2(4897) },
+            { AOM_CDF2(16973) },
+            { AOM_CDF2(29744) },
+        }, {
+            { AOM_CDF2(1555) },
+            { AOM_CDF2(16751) },
+            { AOM_CDF2(30279) },
+        }, {
+            { AOM_CDF2(4236) },
+            { AOM_CDF2(19647) },
+            { AOM_CDF2(31194) },
+        }, {
+            { AOM_CDF2(8650) },
+            { AOM_CDF2(24773) },
+            { AOM_CDF2(31895) },
+        }, {
+            { AOM_CDF2(904) },
+            { AOM_CDF2(11014) },
+            { AOM_CDF2(26875) },
+        }, {
+            { AOM_CDF2(1444) },
+            { AOM_CDF2(15087) },
+            { AOM_CDF2(30304) },
+        }
+    }, .comp_fwd_ref = {
+        {
+            { AOM_CDF2(4946) },
+            { AOM_CDF2(19891) },
+            { AOM_CDF2(30731) },
+        }, {
+            { AOM_CDF2(9468) },
+            { AOM_CDF2(22441) },
+            { AOM_CDF2(31059) },
+        }, {
+            { AOM_CDF2(1503) },
+            { AOM_CDF2(15160) },
+            { AOM_CDF2(27544) },
+        }
+    }, .comp_bwd_ref = {
+        {
+            { AOM_CDF2(2235) },
+            { AOM_CDF2(17182) },
+            { AOM_CDF2(30606) },
+        }, {
+            { AOM_CDF2(1423) },
+            { AOM_CDF2(15175) },
+            { AOM_CDF2(30489) },
+        }
+    }, .comp_uni_ref = {
+        {
+            { AOM_CDF2(5284) },
+            { AOM_CDF2(23152) },
+            { AOM_CDF2(31774) },
+        }, {
+            { AOM_CDF2(3865) },
+            { AOM_CDF2(14173) },
+            { AOM_CDF2(25120) },
+        }, {
+            { AOM_CDF2(3128) },
+            { AOM_CDF2(15270) },
+            { AOM_CDF2(26710) },
+        }
+    }, .txsz = {
+        {
+            { AOM_CDF2(19968) },
+            { AOM_CDF2(19968) },
+            { AOM_CDF2(24320) }
+        }, {
+            { AOM_CDF3(12272, 30172) },
+            { AOM_CDF3(12272, 30172) },
+            { AOM_CDF3(18677, 30848) }
+        }, {
+            { AOM_CDF3(12986, 15180) },
+            { AOM_CDF3(12986, 15180) },
+            { AOM_CDF3(24302, 25602) }
+        }, {
+            { AOM_CDF3(5782, 11475) },
+            { AOM_CDF3(5782, 11475) },
+            { AOM_CDF3(16803, 22759) }
+        }
+    }, .txpart = {
+        { { AOM_CDF2(28581) }, { AOM_CDF2(23846) }, { AOM_CDF2(20847) }, },
+        { { AOM_CDF2(24315) }, { AOM_CDF2(18196) }, { AOM_CDF2(12133) }, },
+        { { AOM_CDF2(18791) }, { AOM_CDF2(10887) }, { AOM_CDF2(11005) }, },
+        { { AOM_CDF2(27179) }, { AOM_CDF2(20004) }, { AOM_CDF2(11281) }, },
+        { { AOM_CDF2(26549) }, { AOM_CDF2(19308) }, { AOM_CDF2(14224) }, },
+        { { AOM_CDF2(28015) }, { AOM_CDF2(21546) }, { AOM_CDF2(14400) }, },
+        { { AOM_CDF2(28165) }, { AOM_CDF2(22401) }, { AOM_CDF2(16088) }, },
+    }, .txtp_inter = {
+        {}, {
+            { AOM_CDF16(4458, 5560, 7695, 9709, 13330, 14789, 17537, 20266,
+                        21504, 22848, 23934, 25474, 27727, 28915, 30631) },
+            { AOM_CDF16(1645, 2573, 4778, 5711, 7807, 8622, 10522, 15357,
+                        17674, 20408, 22517, 25010, 27116, 28856, 30749) },
+            { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                        18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+            { AOM_CDF16(2048, 4096, 6144, 8192, 10240, 12288, 14336, 16384,
+                        18432, 20480, 22528, 24576, 26624, 28672, 30720) },
+        }, {
+            { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                        24576, 27307, 30037) },
+            { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                        24576, 27307, 30037) },
+            { AOM_CDF12(770, 2421, 5225, 12907, 15819, 18927, 21561, 24089,
+                        26595, 28526, 30529) },
+            { AOM_CDF12(2731, 5461, 8192, 10923, 13653, 16384, 19115, 21845,
+                        24576, 27307, 30037) },
+        }, {
+            { AOM_CDF2(16384) },
+            { AOM_CDF2(4167) },
+            { AOM_CDF2(1998) },
+            { AOM_CDF2(748) },
+        },
+    }, .txtp_intra = {
+        {}, {
+            {
+                { AOM_CDF7(1535, 8035, 9461, 12751, 23467, 27825) },
+                { AOM_CDF7(564, 3335, 9709, 10870, 18143, 28094) },
+                { AOM_CDF7(672, 3247, 3676, 11982, 19415, 23127) },
+                { AOM_CDF7(5279, 13885, 15487, 18044, 23527, 30252) },
+                { AOM_CDF7(4423, 6074, 7985, 10416, 25693, 29298) },
+                { AOM_CDF7(1486, 4241, 9460, 10662, 16456, 27694) },
+                { AOM_CDF7(439, 2838, 3522, 6737, 18058, 23754) },
+                { AOM_CDF7(1190, 4233, 4855, 11670, 20281, 24377) },
+                { AOM_CDF7(1045, 4312, 8647, 10159, 18644, 29335) },
+                { AOM_CDF7(202, 3734, 4747, 7298, 17127, 24016) },
+                { AOM_CDF7(447, 4312, 6819, 8884, 16010, 23858) },
+                { AOM_CDF7(277, 4369, 5255, 8905, 16465, 22271) },
+                { AOM_CDF7(3409, 5436, 10599, 15599, 19687, 24040) },
+            }, {
+                { AOM_CDF7(1870, 13742, 14530, 16498, 23770, 27698) },
+                { AOM_CDF7(326, 8796, 14632, 15079, 19272, 27486) },
+                { AOM_CDF7(484, 7576, 7712, 14443, 19159, 22591) },
+                { AOM_CDF7(1126, 15340, 15895, 17023, 20896, 30279) },
+                { AOM_CDF7(655, 4854, 5249, 5913, 22099, 27138) },
+                { AOM_CDF7(1299, 6458, 8885, 9290, 14851, 25497) },
+                { AOM_CDF7(311, 5295, 5552, 6885, 16107, 22672) },
+                { AOM_CDF7(883, 8059, 8270, 11258, 17289, 21549) },
+                { AOM_CDF7(741, 7580, 9318, 10345, 16688, 29046) },
+                { AOM_CDF7(110, 7406, 7915, 9195, 16041, 23329) },
+                { AOM_CDF7(363, 7974, 9357, 10673, 15629, 24474) },
+                { AOM_CDF7(153, 7647, 8112, 9936, 15307, 19996) },
+                { AOM_CDF7(3511, 6332, 11165, 15335, 19323, 23594) },
+            }, {
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+            }, {
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+                { AOM_CDF7(4681, 9362, 14043, 18725, 23406, 28087) },
+            },
+        }, {
+            {
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+            }, {
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+            }, {
+                { AOM_CDF5(1127, 12814, 22772, 27483) },
+                { AOM_CDF5(145, 6761, 11980, 26667) },
+                { AOM_CDF5(362, 5887, 11678, 16725) },
+                { AOM_CDF5(385, 15213, 18587, 30693) },
+                { AOM_CDF5(25, 2914, 23134, 27903) },
+                { AOM_CDF5(60, 4470, 11749, 23991) },
+                { AOM_CDF5(37, 3332, 14511, 21448) },
+                { AOM_CDF5(157, 6320, 13036, 17439) },
+                { AOM_CDF5(119, 6719, 12906, 29396) },
+                { AOM_CDF5(47, 5537, 12576, 21499) },
+                { AOM_CDF5(269, 6076, 11258, 23115) },
+                { AOM_CDF5(83, 5615, 12001, 17228) },
+                { AOM_CDF5(1968, 5556, 12023, 18547) },
+            }, {
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+                { AOM_CDF5(6554, 13107, 19661, 26214) },
+            },
+        },
+    }, .skip = {
+        { AOM_CDF2(31671) },
+        { AOM_CDF2(16515) },
+        { AOM_CDF2(4576) }
+    }, .skip_mode = {
+        { AOM_CDF2(32621) },
+        { AOM_CDF2(20708) },
+        { AOM_CDF2(8127) }
+    }, .partition = {
+        {
+            // 128x128 -> 64x64
+            { AOM_CDF8(27899, 28219, 28529, 32484, 32539, 32619, 32639) },
+            { AOM_CDF8(6607, 6990, 8268, 32060, 32219, 32338, 32371) },
+            { AOM_CDF8(5429, 6676, 7122, 32027, 32227, 32531, 32582) },
+            { AOM_CDF8(711, 966, 1172, 32448, 32538, 32617, 32664) },
+        }, {
+            // 64x64 -> 32x32
+            { AOM_CDF10(20137, 21547, 23078, 29566, 29837, 30261, 30524, 30892,
+                        31724) },
+            { AOM_CDF10(6732, 7490, 9497, 27944, 28250, 28515, 28969, 29630,
+                        30104) },
+            { AOM_CDF10(5945, 7663, 8348, 28683, 29117, 29749, 30064, 30298,
+                        32238) },
+            { AOM_CDF10(870, 1212, 1487, 31198, 31394, 31574, 31743, 31881,
+                        32332) },
+        }, {
+            // 32x32 -> 16x16
+            { AOM_CDF10(18462, 20920, 23124, 27647, 28227, 29049, 29519, 30178,
+                        31544) },
+            { AOM_CDF10(7689, 9060, 12056, 24992, 25660, 26182, 26951, 28041,
+                        29052) },
+            { AOM_CDF10(6015, 9009, 10062, 24544, 25409, 26545, 27071, 27526,
+                        32047) },
+            { AOM_CDF10(1394, 2208, 2796, 28614, 29061, 29466, 29840, 30185,
+                        31899) },
+        }, {
+            // 16x16 -> 8x8
+            { AOM_CDF10(15597, 20929, 24571, 26706, 27664, 28821, 29601, 30571,
+                        31902) },
+            { AOM_CDF10(7925, 11043, 16785, 22470, 23971, 25043, 26651, 28701,
+                        29834) },
+            { AOM_CDF10(5414, 13269, 15111, 20488, 22360, 24500, 25537, 26336,
+                        32117) },
+            { AOM_CDF10(2662, 6362, 8614, 20860, 23053, 24778, 26436, 27829,
+                        31171) },
+        }, {
+            // 8x8 -> 4x4 only supports the four legacy partition types
+            { AOM_CDF4(19132, 25510, 30392) },
+            { AOM_CDF4(13928, 19855, 28540) },
+            { AOM_CDF4(12522, 23679, 28629) },
+            { AOM_CDF4(9896, 18783, 25853) },
+        }
+    }, .seg_pred = {
+        { AOM_CDF2(128 * 128) },
+        { AOM_CDF2(128 * 128) },
+        { AOM_CDF2(128 * 128) }
+    }, .seg_id = {
+        { AOM_CDF8(5622, 7893, 16093, 18233, 27809, 28373, 32533), },
+        { AOM_CDF8(14274, 18230, 22557, 24935, 29980, 30851, 32344), },
+        { AOM_CDF8(27527, 28487, 28723, 28890, 32397, 32647, 32679), },
+    }, .cfl_sign = {
+        AOM_CDF8(1418, 2123, 13340, 18405, 26972, 28343, 32294)
+    }, .cfl_alpha = {
+        { AOM_CDF16(7637, 20719, 31401, 32481, 32657, 32688, 32692, 32696,
+                    32700, 32704, 32708, 32712, 32716, 32720, 32724) },
+        { AOM_CDF16(14365, 23603, 28135, 31168, 32167, 32395, 32487, 32573,
+                    32620, 32647, 32668, 32672, 32676, 32680, 32684) },
+        { AOM_CDF16(11532, 22380, 28445, 31360, 32349, 32523, 32584, 32649,
+                    32673, 32677, 32681, 32685, 32689, 32693, 32697) },
+        { AOM_CDF16(26990, 31402, 32282, 32571, 32692, 32696, 32700, 32704,
+                    32708, 32712, 32716, 32720, 32724, 32728, 32732) },
+        { AOM_CDF16(17248, 26058, 28904, 30608, 31305, 31877, 32126, 32321,
+                    32394, 32464, 32516, 32560, 32576, 32593, 32622) },
+        { AOM_CDF16(14738, 21678, 25779, 27901, 29024, 30302, 30980, 31843,
+                    32144, 32413, 32520, 32594, 32622, 32656, 32660) }
+    }, .restore_wiener = {
+        AOM_CDF2(11570)
+    }, .restore_sgrproj = {
+        AOM_CDF2(16855)
+    }, .restore_switchable = {
+        AOM_CDF3(9413, 22581)
+    }, .delta_q = {
+        AOM_CDF4(28160, 32120, 32677)
+    }, .delta_lf = {
+        { AOM_CDF4(28160, 32120, 32677) },
+        { AOM_CDF4(28160, 32120, 32677) },
+        { AOM_CDF4(28160, 32120, 32677) },
+        { AOM_CDF4(28160, 32120, 32677) },
+        { AOM_CDF4(28160, 32120, 32677) },
+    }, .motion_mode = {
+        [BS_8x8]     = { AOM_CDF3(7651, 24760) },
+        [BS_8x16]    = { AOM_CDF3(4738, 24765) },
+        [BS_8x32]    = { AOM_CDF3(28799, 31390) },
+        [BS_16x8]    = { AOM_CDF3(5391, 25528) },
+        [BS_16x16]   = { AOM_CDF3(19419, 26810) },
+        [BS_16x32]   = { AOM_CDF3(5123, 23606) },
+        [BS_16x64]   = { AOM_CDF3(28973, 31594) },
+        [BS_32x8]    = { AOM_CDF3(26431, 30774) },
+        [BS_32x16]   = { AOM_CDF3(11606, 24308) },
+        [BS_32x32]   = { AOM_CDF3(26260, 29116) },
+        [BS_32x64]   = { AOM_CDF3(20360, 28062) },
+        [BS_64x16]   = { AOM_CDF3(29742, 31203) },
+        [BS_64x32]   = { AOM_CDF3(21679, 26830) },
+        [BS_64x64]   = { AOM_CDF3(29516, 30701) },
+        [BS_64x128]  = { AOM_CDF3(28898, 30397) },
+        [BS_128x64]  = { AOM_CDF3(30878, 31335) },
+        [BS_128x128] = { AOM_CDF3(32507, 32558) },
+    }, .obmc = {
+        [BS_8x8]     = { AOM_CDF2(10437) },
+        [BS_8x16]    = { AOM_CDF2(9371) },
+        [BS_8x32]    = { AOM_CDF2(23664) },
+        [BS_16x8]    = { AOM_CDF2(9301) },
+        [BS_16x16]   = { AOM_CDF2(17432) },
+        [BS_16x32]   = { AOM_CDF2(14423) },
+        [BS_16x64]   = { AOM_CDF2(24008) },
+        [BS_32x8]    = { AOM_CDF2(20901) },
+        [BS_32x16]   = { AOM_CDF2(15142) },
+        [BS_32x32]   = { AOM_CDF2(25817) },
+        [BS_32x64]   = { AOM_CDF2(22823) },
+        [BS_64x16]   = { AOM_CDF2(26879) },
+        [BS_64x32]   = { AOM_CDF2(22083) },
+        [BS_64x64]   = { AOM_CDF2(30128) },
+        [BS_64x128]  = { AOM_CDF2(31014) },
+        [BS_128x64]  = { AOM_CDF2(31560) },
+        [BS_128x128] = { AOM_CDF2(32638) },
+    }, .pal_y = {
+        { { AOM_CDF2(31676) }, { AOM_CDF2(3419) }, { AOM_CDF2(1261) } },
+        { { AOM_CDF2(31912) }, { AOM_CDF2(2859) }, { AOM_CDF2(980) } },
+        { { AOM_CDF2(31823) }, { AOM_CDF2(3400) }, { AOM_CDF2(781) } },
+        { { AOM_CDF2(32030) }, { AOM_CDF2(3561) }, { AOM_CDF2(904) } },
+        { { AOM_CDF2(32309) }, { AOM_CDF2(7337) }, { AOM_CDF2(1462) } },
+        { { AOM_CDF2(32265) }, { AOM_CDF2(4015) }, { AOM_CDF2(1521) } },
+        { { AOM_CDF2(32450) }, { AOM_CDF2(7946) }, { AOM_CDF2(129) } },
+    }, .pal_sz = {
+        {
+            { AOM_CDF7(7952, 13000, 18149, 21478, 25527, 29241) },
+            { AOM_CDF7(7139, 11421, 16195, 19544, 23666, 28073) },
+            { AOM_CDF7(7788, 12741, 17325, 20500, 24315, 28530) },
+            { AOM_CDF7(8271, 14064, 18246, 21564, 25071, 28533) },
+            { AOM_CDF7(12725, 19180, 21863, 24839, 27535, 30120) },
+            { AOM_CDF7(9711, 14888, 16923, 21052, 25661, 27875) },
+            { AOM_CDF7(14940, 20797, 21678, 24186, 27033, 28999) },
+        }, {
+            { AOM_CDF7(8713, 19979, 27128, 29609, 31331, 32272) },
+            { AOM_CDF7(5839, 15573, 23581, 26947, 29848, 31700) },
+            { AOM_CDF7(4426, 11260, 17999, 21483, 25863, 29430) },
+            { AOM_CDF7(3228, 9464, 14993, 18089, 22523, 27420) },
+            { AOM_CDF7(3768, 8886, 13091, 17852, 22495, 27207) },
+            { AOM_CDF7(2464, 8451, 12861, 21632, 25525, 28555) },
+            { AOM_CDF7(1269, 5435, 10433, 18963, 21700, 25865) },
+        }
+    }, .pal_uv = {
+        { AOM_CDF2(32461) }, { AOM_CDF2(21488) },
+    }, .color_map = {
+        { /* y */
+            {
+                { AOM_CDF2(28710) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(10553) },
+                { AOM_CDF2(27036) },
+                { AOM_CDF2(31603) },
+            }, {
+                { AOM_CDF3(27877, 30490) },
+                { AOM_CDF3(11532, 25697) },
+                { AOM_CDF3(6544, 30234) },
+                { AOM_CDF3(23018, 28072) },
+                { AOM_CDF3(31915, 32385) },
+            }, {
+                { AOM_CDF4(25572, 28046, 30045) },
+                { AOM_CDF4(9478, 21590, 27256) },
+                { AOM_CDF4(7248, 26837, 29824) },
+                { AOM_CDF4(19167, 24486, 28349) },
+                { AOM_CDF4(31400, 31825, 32250) },
+            }, {
+                { AOM_CDF5(24779, 26955, 28576, 30282) },
+                { AOM_CDF5(8669, 20364, 24073, 28093) },
+                { AOM_CDF5(4255, 27565, 29377, 31067) },
+                { AOM_CDF5(19864, 23674, 26716, 29530) },
+                { AOM_CDF5(31646, 31893, 32147, 32426) },
+            }, {
+                { AOM_CDF6(23132, 25407, 26970, 28435, 30073) },
+                { AOM_CDF6(7443, 17242, 20717, 24762, 27982) },
+                { AOM_CDF6(6300, 24862, 26944, 28784, 30671) },
+                { AOM_CDF6(18916, 22895, 25267, 27435, 29652) },
+                { AOM_CDF6(31270, 31550, 31808, 32059, 32353) },
+            }, {
+                { AOM_CDF7(23105, 25199, 26464, 27684, 28931, 30318) },
+                { AOM_CDF7(6950, 15447, 18952, 22681, 25567, 28563) },
+                { AOM_CDF7(7560, 23474, 25490, 27203, 28921, 30708) },
+                { AOM_CDF7(18544, 22373, 24457, 26195, 28119, 30045) },
+                { AOM_CDF7(31198, 31451, 31670, 31882, 32123, 32391) },
+            }, {
+                { AOM_CDF8(21689, 23883, 25163, 26352, 27506, 28827, 30195) },
+                { AOM_CDF8(6892, 15385, 17840, 21606, 24287, 26753, 29204) },
+                { AOM_CDF8(5651, 23182, 25042, 26518, 27982, 29392, 30900) },
+                { AOM_CDF8(19349, 22578, 24418, 25994, 27524, 29031, 30448) },
+                { AOM_CDF8(31028, 31270, 31504, 31705, 31927, 32153, 32392) },
+            },
+        }, { /* uv */
+            {
+                { AOM_CDF2(29089) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(8713) },
+                { AOM_CDF2(29257) },
+                { AOM_CDF2(31610) },
+            }, {
+                { AOM_CDF3(25257, 29145) },
+                { AOM_CDF3(12287, 27293) },
+                { AOM_CDF3(7033, 27960) },
+                { AOM_CDF3(20145, 25405) },
+                { AOM_CDF3(30608, 31639) },
+            }, {
+                { AOM_CDF4(24210, 27175, 29903) },
+                { AOM_CDF4(9888, 22386, 27214) },
+                { AOM_CDF4(5901, 26053, 29293) },
+                { AOM_CDF4(18318, 22152, 28333) },
+                { AOM_CDF4(30459, 31136, 31926) },
+            }, {
+                { AOM_CDF5(22980, 25479, 27781, 29986) },
+                { AOM_CDF5(8413, 21408, 24859, 28874) },
+                { AOM_CDF5(2257, 29449, 30594, 31598) },
+                { AOM_CDF5(19189, 21202, 25915, 28620) },
+                { AOM_CDF5(31844, 32044, 32281, 32518) },
+            }, {
+                { AOM_CDF6(22217, 24567, 26637, 28683, 30548) },
+                { AOM_CDF6(7307, 16406, 19636, 24632, 28424) },
+                { AOM_CDF6(4441, 25064, 26879, 28942, 30919) },
+                { AOM_CDF6(17210, 20528, 23319, 26750, 29582) },
+                { AOM_CDF6(30674, 30953, 31396, 31735, 32207) },
+            }, {
+                { AOM_CDF7(21239, 23168, 25044, 26962, 28705, 30506) },
+                { AOM_CDF7(6545, 15012, 18004, 21817, 25503, 28701) },
+                { AOM_CDF7(3448, 26295, 27437, 28704, 30126, 31442) },
+                { AOM_CDF7(15889, 18323, 21704, 24698, 26976, 29690) },
+                { AOM_CDF7(30988, 31204, 31479, 31734, 31983, 32325) },
+            }, {
+                { AOM_CDF8(21442, 23288, 24758, 26246, 27649, 28980, 30563) },
+                { AOM_CDF8(5863, 14933, 17552, 20668, 23683, 26411, 29273) },
+                { AOM_CDF8(3415, 25810, 26877, 27990, 29223, 30394, 31618) },
+                { AOM_CDF8(17965, 20084, 22232, 23974, 26274, 28402, 30390) },
+                { AOM_CDF8(31190, 31329, 31516, 31679, 31825, 32026, 32322) },
+            },
+        },
+    }, .intrabc = {
+        AOM_CDF2(30531)
+    },
+};
+
+static const CdfMvContext default_mv_cdf = {
+    .comp = {
+        { /* mv vertical component */
+            .classes = {
+                AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
+                          32757, 32762, 32767)
+            }, .class0 = {
+                AOM_CDF2(216 * 128)
+            }, .classN = {
+                { AOM_CDF2(128 * 136) },
+                { AOM_CDF2(128 * 140) },
+                { AOM_CDF2(128 * 148) },
+                { AOM_CDF2(128 * 160) },
+                { AOM_CDF2(128 * 176) },
+                { AOM_CDF2(128 * 192) },
+                { AOM_CDF2(128 * 224) },
+                { AOM_CDF2(128 * 234) },
+                { AOM_CDF2(128 * 234) },
+                { AOM_CDF2(128 * 240) }
+            }, .class0_fp = {
+                { AOM_CDF4(16384, 24576, 26624) },
+                { AOM_CDF4(12288, 21248, 24128) }
+            }, .classN_fp = {
+                AOM_CDF4(8192, 17408, 21248)
+            }, .class0_hp = {
+                AOM_CDF2(160 * 128)
+            }, .classN_hp = {
+                AOM_CDF2(128 * 128)
+            }, .sign = {
+                AOM_CDF2(128 * 128)
+            }
+        }, { /* mv horizontal component */
+            .classes = {
+                AOM_CDF11(28672, 30976, 31858, 32320, 32551, 32656, 32740,
+                          32757, 32762, 32767)
+            }, .class0 = {
+                AOM_CDF2(216 * 128)
+            }, .classN = {
+                { AOM_CDF2(128 * 136) },
+                { AOM_CDF2(128 * 140) },
+                { AOM_CDF2(128 * 148) },
+                { AOM_CDF2(128 * 160) },
+                { AOM_CDF2(128 * 176) },
+                { AOM_CDF2(128 * 192) },
+                { AOM_CDF2(128 * 224) },
+                { AOM_CDF2(128 * 234) },
+                { AOM_CDF2(128 * 234) },
+                { AOM_CDF2(128 * 240) }
+            }, .class0_fp = {
+                { AOM_CDF4(16384, 24576, 26624) },
+                { AOM_CDF4(12288, 21248, 24128) }
+            }, .classN_fp = {
+                AOM_CDF4(8192, 17408, 21248)
+            }, .class0_hp = {
+                AOM_CDF2(160 * 128)
+            }, .classN_hp = {
+                AOM_CDF2(128 * 128)
+            }, .sign = {
+                AOM_CDF2(128 * 128)
+            },
+        }
+    }, .joint = {
+        AOM_CDF4(4096, 11264, 19328)
+    }
+};
+
+static const uint16_t default_kf_y_mode_cdf[5][5][N_INTRA_PRED_MODES + 1] = {
+    {
+        { AOM_CDF13(15588, 17027, 19338, 20218, 20682, 21110, 21825, 23244,
+                    24189, 28165, 29093, 30466) },
+        { AOM_CDF13(12016, 18066, 19516, 20303, 20719, 21444, 21888, 23032,
+                    24434, 28658, 30172, 31409) },
+        { AOM_CDF13(10052, 10771, 22296, 22788, 23055, 23239, 24133, 25620,
+                    26160, 29336, 29929, 31567) },
+        { AOM_CDF13(14091, 15406, 16442, 18808, 19136, 19546, 19998, 22096,
+                    24746, 29585, 30958, 32462) },
+        { AOM_CDF13(12122, 13265, 15603, 16501, 18609, 20033, 22391, 25583,
+                    26437, 30261, 31073, 32475) }
+    }, {
+        { AOM_CDF13(10023, 19585, 20848, 21440, 21832, 22760, 23089, 24023,
+                  25381, 29014, 30482, 31436) },
+        { AOM_CDF13(5983, 24099, 24560, 24886, 25066, 25795, 25913, 26423,
+                    27610, 29905, 31276, 31794) },
+        { AOM_CDF13(7444, 12781, 20177, 20728, 21077, 21607, 22170, 23405,
+                    24469, 27915, 29090, 30492) },
+        { AOM_CDF13(8537, 14689, 15432, 17087, 17408, 18172, 18408, 19825,
+                    24649, 29153, 31096, 32210) },
+        { AOM_CDF13(7543, 14231, 15496, 16195, 17905, 20717, 21984, 24516,
+                    26001, 29675, 30981, 31994) }
+    }, {
+        { AOM_CDF13(12613, 13591, 21383, 22004, 22312, 22577, 23401, 25055,
+                  25729, 29538, 30305, 32077) },
+        { AOM_CDF13(9687, 13470, 18506, 19230, 19604, 20147, 20695, 22062,
+                    23219, 27743, 29211, 30907) },
+        { AOM_CDF13(6183, 6505, 26024, 26252, 26366, 26434, 27082, 28354,
+                    28555, 30467, 30794, 32086) },
+        { AOM_CDF13(10718, 11734, 14954, 17224, 17565, 17924, 18561, 21523,
+                    23878, 28975, 30287, 32252) },
+        { AOM_CDF13(9194, 9858, 16501, 17263, 18424, 19171, 21563, 25961,
+                    26561, 30072, 30737, 32463) }
+    }, {
+        { AOM_CDF13(12602, 14399, 15488, 18381, 18778, 19315, 19724, 21419,
+                  25060, 29696, 30917, 32409) },
+        { AOM_CDF13(8203, 13821, 14524, 17105, 17439, 18131, 18404, 19468,
+                    25225, 29485, 31158, 32342) },
+        { AOM_CDF13(8451, 9731, 15004, 17643, 18012, 18425, 19070, 21538,
+                    24605, 29118, 30078, 32018) },
+        { AOM_CDF13(7714, 9048, 9516, 16667, 16817, 16994, 17153, 18767,
+                    26743, 30389, 31536, 32528) },
+        { AOM_CDF13(8843, 10280, 11496, 15317, 16652, 17943, 19108, 22718,
+                    25769, 29953, 30983, 32485) }
+    }, {
+        { AOM_CDF13(12578, 13671, 15979, 16834, 19075, 20913, 22989, 25449,
+                  26219, 30214, 31150, 32477) },
+        { AOM_CDF13(9563, 13626, 15080, 15892, 17756, 20863, 22207, 24236,
+                    25380, 29653, 31143, 32277) },
+        { AOM_CDF13(8356, 8901, 17616, 18256, 19350, 20106, 22598, 25947,
+                    26466, 29900, 30523, 32261) },
+        { AOM_CDF13(10835, 11815, 13124, 16042, 17018, 18039, 18947, 22753,
+                    24615, 29489, 30883, 32482) },
+        { AOM_CDF13(7618, 8288, 9859, 10509, 15386, 18657, 22903, 28776,
+                    29180, 31355, 31802, 32593) }
+    }
+};
+
+static const CdfCoefContext av1_default_coef_cdf[4] = {
+    [0] = {
+        .skip = {
+            { { AOM_CDF2(31849) },
+                { AOM_CDF2(5892) },
+                { AOM_CDF2(12112) },
+                { AOM_CDF2(21935) },
+                { AOM_CDF2(20289) },
+                { AOM_CDF2(27473) },
+                { AOM_CDF2(32487) },
+                { AOM_CDF2(7654) },
+                { AOM_CDF2(19473) },
+                { AOM_CDF2(29984) },
+                { AOM_CDF2(9961) },
+                { AOM_CDF2(30242) },
+                { AOM_CDF2(32117) } },
+            { { AOM_CDF2(31548) },
+                { AOM_CDF2(1549) },
+                { AOM_CDF2(10130) },
+                { AOM_CDF2(16656) },
+                { AOM_CDF2(18591) },
+                { AOM_CDF2(26308) },
+                { AOM_CDF2(32537) },
+                { AOM_CDF2(5403) },
+                { AOM_CDF2(18096) },
+                { AOM_CDF2(30003) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(29957) },
+                { AOM_CDF2(5391) },
+                { AOM_CDF2(18039) },
+                { AOM_CDF2(23566) },
+                { AOM_CDF2(22431) },
+                { AOM_CDF2(25822) },
+                { AOM_CDF2(32197) },
+                { AOM_CDF2(3778) },
+                { AOM_CDF2(15336) },
+                { AOM_CDF2(28981) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(17920) },
+                { AOM_CDF2(1818) },
+                { AOM_CDF2(7282) },
+                { AOM_CDF2(25273) },
+                { AOM_CDF2(10923) },
+                { AOM_CDF2(31554) },
+                { AOM_CDF2(32624) },
+                { AOM_CDF2(1366) },
+                { AOM_CDF2(15628) },
+                { AOM_CDF2(30462) },
+                { AOM_CDF2(146) },
+                { AOM_CDF2(5132) },
+                { AOM_CDF2(31657) } },
+            { { AOM_CDF2(6308) },
+                { AOM_CDF2(117) },
+                { AOM_CDF2(1638) },
+                { AOM_CDF2(2161) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(10923) },
+                { AOM_CDF2(30247) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } }
+        }, .eob_bin_16 = {
+            { { AOM_CDF5(840, 1039, 1980, 4895) },
+              { AOM_CDF5(370, 671, 1883, 4471) } },
+            { { AOM_CDF5(3247, 4950, 9688, 14563) },
+              { AOM_CDF5(1904, 3354, 7763, 14647) } }
+        }, .eob_bin_32 = {
+            { { AOM_CDF6(400, 520, 977, 2102, 6542) },
+              { AOM_CDF6(210, 405, 1315, 3326, 7537) } },
+            { { AOM_CDF6(2636, 4273, 7588, 11794, 20401) },
+              { AOM_CDF6(1786, 3179, 6902, 11357, 19054) } }
+        }, .eob_bin_64 = {
+            { { AOM_CDF7(329, 498, 1101, 1784, 3265, 7758) },
+              { AOM_CDF7(335, 730, 1459, 5494, 8755, 12997) } },
+            { { AOM_CDF7(3505, 5304, 10086, 13814, 17684, 23370) },
+              { AOM_CDF7(1563, 2700, 4876, 10911, 14706, 22480) } }
+        }, .eob_bin_128 = {
+            { { AOM_CDF8(219, 482, 1140, 2091, 3680, 6028, 12586) },
+              { AOM_CDF8(371, 699, 1254, 4830, 9479, 12562, 17497) } },
+            { { AOM_CDF8(5245, 7456, 12880, 15852, 20033, 23932, 27608) },
+              { AOM_CDF8(2054, 3472, 5869, 14232, 18242, 20590, 26752) } }
+        }, .eob_bin_256 = {
+            { { AOM_CDF9(310, 584, 1887, 3589, 6168, 8611, 11352, 15652) },
+              { AOM_CDF9(998, 1850, 2998, 5604, 17341, 19888, 22899, 25583) } },
+            { { AOM_CDF9(2520, 3240, 5952, 8870, 12577, 17558, 19954, 24168) },
+              { AOM_CDF9(2203, 4130, 7435, 10739, 20652, 23681, 25609, 27261) } }
+        }, .eob_bin_512 = {
+            { { AOM_CDF10(641, 983, 3707, 5430, 10234, 14958, 18788,
+                          23412, 26061) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } },
+            { { AOM_CDF10(5095, 6446, 9996, 13354, 16017, 17986, 20919,
+                          26129, 29140) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } }
+        }, .eob_bin_1024 = {
+            { { AOM_CDF11(393, 421, 751, 1623, 3160, 6352, 13345, 18047,
+                          22571, 25830) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } },
+            { { AOM_CDF11(1865, 1988, 2930, 4242, 10533, 16538, 21354,
+                          27255, 28546, 31784) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } }
+        }, .eob_hi_bit = {
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16961) }, { AOM_CDF2(17223) }, { AOM_CDF2(7621) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(19069) }, { AOM_CDF2(22525) }, { AOM_CDF2(13377) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20401) }, { AOM_CDF2(17025) }, { AOM_CDF2(12845) },
+                { AOM_CDF2(12873) }, { AOM_CDF2(14094) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20681) }, { AOM_CDF2(20701) }, { AOM_CDF2(15250) },
+                { AOM_CDF2(15017) }, { AOM_CDF2(14928) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(23905) }, { AOM_CDF2(17194) }, { AOM_CDF2(16170) },
+                { AOM_CDF2(17695) }, { AOM_CDF2(13826) }, { AOM_CDF2(15810) },
+                { AOM_CDF2(12036) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(23959) }, { AOM_CDF2(20799) }, { AOM_CDF2(19021) },
+                { AOM_CDF2(16203) }, { AOM_CDF2(17886) }, { AOM_CDF2(14144) },
+                { AOM_CDF2(12010) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(27399) }, { AOM_CDF2(16327) }, { AOM_CDF2(18071) },
+                { AOM_CDF2(19584) }, { AOM_CDF2(20721) }, { AOM_CDF2(18432) },
+                { AOM_CDF2(19560) }, { AOM_CDF2(10150) }, { AOM_CDF2(8805) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(24932) }, { AOM_CDF2(20833) }, { AOM_CDF2(12027) },
+                { AOM_CDF2(16670) }, { AOM_CDF2(19914) }, { AOM_CDF2(15106) },
+                { AOM_CDF2(17662) }, { AOM_CDF2(13783) }, { AOM_CDF2(28756) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(23406) }, { AOM_CDF2(21845) }, { AOM_CDF2(18432) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(17096) }, { AOM_CDF2(12561) },
+                { AOM_CDF2(17320) }, { AOM_CDF2(22395) }, { AOM_CDF2(21370) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } }
+        }, .eob_base_tok = {
+            { { { AOM_CDF3(17837, 29055) },
+                { AOM_CDF3(29600, 31446) },
+                { AOM_CDF3(30844, 31878) },
+                { AOM_CDF3(24926, 28948) } },
+             { { AOM_CDF3(21365, 30026) },
+                { AOM_CDF3(30512, 32423) },
+                { AOM_CDF3(31658, 32621) },
+                { AOM_CDF3(29630, 31881) } } },
+            { { { AOM_CDF3(5717, 26477) },
+                { AOM_CDF3(30491, 31703) },
+                { AOM_CDF3(31550, 32158) },
+                { AOM_CDF3(29648, 31491) } },
+              { { AOM_CDF3(12608, 27820) },
+                { AOM_CDF3(30680, 32225) },
+                { AOM_CDF3(30809, 32335) },
+                { AOM_CDF3(31299, 32423) } } },
+            { { { AOM_CDF3(1786, 12612) },
+                { AOM_CDF3(30663, 31625) },
+                { AOM_CDF3(32339, 32468) },
+                { AOM_CDF3(31148, 31833) } },
+              { { AOM_CDF3(18857, 23865) },
+                { AOM_CDF3(31428, 32428) },
+                { AOM_CDF3(31744, 32373) },
+                { AOM_CDF3(31775, 32526) } } },
+            { { { AOM_CDF3(1787, 2532) },
+                { AOM_CDF3(30832, 31662) },
+                { AOM_CDF3(31824, 32682) },
+                { AOM_CDF3(32133, 32569) } },
+              { { AOM_CDF3(13751, 22235) },
+                { AOM_CDF3(32089, 32409) },
+                { AOM_CDF3(27084, 27920) },
+                { AOM_CDF3(29291, 32594) } } },
+            { { { AOM_CDF3(1725, 3449) },
+                { AOM_CDF3(31102, 31935) },
+                { AOM_CDF3(32457, 32613) },
+                { AOM_CDF3(32412, 32649) } },
+              { { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) } } }
+        }, .base_tok = {
+            { { { AOM_CDF4(4034, 8930, 12727) },
+                { AOM_CDF4(18082, 29741, 31877) },
+                { AOM_CDF4(12596, 26124, 30493) },
+                { AOM_CDF4(9446, 21118, 27005) },
+                { AOM_CDF4(6308, 15141, 21279) },
+                { AOM_CDF4(2463, 6357, 9783) },
+                { AOM_CDF4(20667, 30546, 31929) },
+                { AOM_CDF4(13043, 26123, 30134) },
+                { AOM_CDF4(8151, 18757, 24778) },
+                { AOM_CDF4(5255, 12839, 18632) },
+                { AOM_CDF4(2820, 7206, 11161) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(15736, 27553, 30604) },
+                { AOM_CDF4(11210, 23794, 28787) },
+                { AOM_CDF4(5947, 13874, 19701) },
+                { AOM_CDF4(4215, 9323, 13891) },
+                { AOM_CDF4(2833, 6462, 10059) },
+                { AOM_CDF4(19605, 30393, 31582) },
+                { AOM_CDF4(13523, 26252, 30248) },
+                { AOM_CDF4(8446, 18622, 24512) },
+                { AOM_CDF4(3818, 10343, 15974) },
+                { AOM_CDF4(1481, 4117, 6796) },
+                { AOM_CDF4(22649, 31302, 32190) },
+                { AOM_CDF4(14829, 27127, 30449) },
+                { AOM_CDF4(8313, 17702, 23304) },
+                { AOM_CDF4(3022, 8301, 12786) },
+                { AOM_CDF4(1536, 4412, 7184) },
+                { AOM_CDF4(22354, 29774, 31372) },
+                { AOM_CDF4(14723, 25472, 29214) },
+                { AOM_CDF4(6673, 13745, 18662) },
+                { AOM_CDF4(2068, 5766, 9322) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(6302, 16444, 21761) },
+                { AOM_CDF4(23040, 31538, 32475) },
+                { AOM_CDF4(15196, 28452, 31496) },
+                { AOM_CDF4(10020, 22946, 28514) },
+                { AOM_CDF4(6533, 16862, 23501) },
+                { AOM_CDF4(3538, 9816, 15076) },
+                { AOM_CDF4(24444, 31875, 32525) },
+                { AOM_CDF4(15881, 28924, 31635) },
+                { AOM_CDF4(9922, 22873, 28466) },
+                { AOM_CDF4(6527, 16966, 23691) },
+                { AOM_CDF4(4114, 11303, 17220) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(20201, 30770, 32209) },
+                { AOM_CDF4(14754, 28071, 31258) },
+                { AOM_CDF4(8378, 20186, 26517) },
+                { AOM_CDF4(5916, 15299, 21978) },
+                { AOM_CDF4(4268, 11583, 17901) },
+                { AOM_CDF4(24361, 32025, 32581) },
+                { AOM_CDF4(18673, 30105, 31943) },
+                { AOM_CDF4(10196, 22244, 27576) },
+                { AOM_CDF4(5495, 14349, 20417) },
+                { AOM_CDF4(2676, 7415, 11498) },
+                { AOM_CDF4(24678, 31958, 32585) },
+                { AOM_CDF4(18629, 29906, 31831) },
+                { AOM_CDF4(9364, 20724, 26315) },
+                { AOM_CDF4(4641, 12318, 18094) },
+                { AOM_CDF4(2758, 7387, 11579) },
+                { AOM_CDF4(25433, 31842, 32469) },
+                { AOM_CDF4(18795, 29289, 31411) },
+                { AOM_CDF4(7644, 17584, 23592) },
+                { AOM_CDF4(3408, 9014, 15047) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(4536, 10072, 14001) },
+                { AOM_CDF4(25459, 31416, 32206) },
+                { AOM_CDF4(16605, 28048, 30818) },
+                { AOM_CDF4(11008, 22857, 27719) },
+                { AOM_CDF4(6915, 16268, 22315) },
+                { AOM_CDF4(2625, 6812, 10537) },
+                { AOM_CDF4(24257, 31788, 32499) },
+                { AOM_CDF4(16880, 29454, 31879) },
+                { AOM_CDF4(11958, 25054, 29778) },
+                { AOM_CDF4(7916, 18718, 25084) },
+                { AOM_CDF4(3383, 8777, 13446) },
+                { AOM_CDF4(22720, 31603, 32393) },
+                { AOM_CDF4(14960, 28125, 31335) },
+                { AOM_CDF4(9731, 22210, 27928) },
+                { AOM_CDF4(6304, 15832, 22277) },
+                { AOM_CDF4(2910, 7818, 12166) },
+                { AOM_CDF4(20375, 30627, 32131) },
+                { AOM_CDF4(13904, 27284, 30887) },
+                { AOM_CDF4(9368, 21558, 27144) },
+                { AOM_CDF4(5937, 14966, 21119) },
+                { AOM_CDF4(2667, 7225, 11319) },
+                { AOM_CDF4(23970, 31470, 32378) },
+                { AOM_CDF4(17173, 29734, 32018) },
+                { AOM_CDF4(12795, 25441, 29965) },
+                { AOM_CDF4(8981, 19680, 25893) },
+                { AOM_CDF4(4728, 11372, 16902) },
+                { AOM_CDF4(24287, 31797, 32439) },
+                { AOM_CDF4(16703, 29145, 31696) },
+                { AOM_CDF4(10833, 23554, 28725) },
+                { AOM_CDF4(6468, 16566, 23057) },
+                { AOM_CDF4(2415, 6562, 10278) },
+                { AOM_CDF4(26610, 32395, 32659) },
+                { AOM_CDF4(18590, 30498, 32117) },
+                { AOM_CDF4(12420, 25756, 29950) },
+                { AOM_CDF4(7639, 18746, 24710) },
+                { AOM_CDF4(3001, 8086, 12347) },
+                { AOM_CDF4(25076, 32064, 32580) },
+                { AOM_CDF4(17946, 30128, 32028) },
+                { AOM_CDF4(12024, 24985, 29378) },
+                { AOM_CDF4(7517, 18390, 24304) },
+                { AOM_CDF4(3243, 8781, 13331) } },
+              { { AOM_CDF4(6037, 16771, 21957) },
+                { AOM_CDF4(24774, 31704, 32426) },
+                { AOM_CDF4(16830, 28589, 31056) },
+                { AOM_CDF4(10602, 22828, 27760) },
+                { AOM_CDF4(6733, 16829, 23071) },
+                { AOM_CDF4(3250, 8914, 13556) },
+                { AOM_CDF4(25582, 32220, 32668) },
+                { AOM_CDF4(18659, 30342, 32223) },
+                { AOM_CDF4(12546, 26149, 30515) },
+                { AOM_CDF4(8420, 20451, 26801) },
+                { AOM_CDF4(4636, 12420, 18344) },
+                { AOM_CDF4(27581, 32362, 32639) },
+                { AOM_CDF4(18987, 30083, 31978) },
+                { AOM_CDF4(11327, 24248, 29084) },
+                { AOM_CDF4(7264, 17719, 24120) },
+                { AOM_CDF4(3995, 10768, 16169) },
+                { AOM_CDF4(25893, 31831, 32487) },
+                { AOM_CDF4(16577, 28587, 31379) },
+                { AOM_CDF4(10189, 22748, 28182) },
+                { AOM_CDF4(6832, 17094, 23556) },
+                { AOM_CDF4(3708, 10110, 15334) },
+                { AOM_CDF4(25904, 32282, 32656) },
+                { AOM_CDF4(19721, 30792, 32276) },
+                { AOM_CDF4(12819, 26243, 30411) },
+                { AOM_CDF4(8572, 20614, 26891) },
+                { AOM_CDF4(5364, 14059, 20467) },
+                { AOM_CDF4(26580, 32438, 32677) },
+                { AOM_CDF4(20852, 31225, 32340) },
+                { AOM_CDF4(12435, 25700, 29967) },
+                { AOM_CDF4(8691, 20825, 26976) },
+                { AOM_CDF4(4446, 12209, 17269) },
+                { AOM_CDF4(27350, 32429, 32696) },
+                { AOM_CDF4(21372, 30977, 32272) },
+                { AOM_CDF4(12673, 25270, 29853) },
+                { AOM_CDF4(9208, 20925, 26640) },
+                { AOM_CDF4(5018, 13351, 18732) },
+                { AOM_CDF4(27351, 32479, 32713) },
+                { AOM_CDF4(21398, 31209, 32387) },
+                { AOM_CDF4(12162, 25047, 29842) },
+                { AOM_CDF4(7896, 18691, 25319) },
+                { AOM_CDF4(4670, 12882, 18881) } } },
+            { { { AOM_CDF4(5487, 10460, 13708) },
+                { AOM_CDF4(21597, 28303, 30674) },
+                { AOM_CDF4(11037, 21953, 26476) },
+                { AOM_CDF4(8147, 17962, 22952) },
+                { AOM_CDF4(5242, 13061, 18532) },
+                { AOM_CDF4(1889, 5208, 8182) },
+                { AOM_CDF4(26774, 32133, 32590) },
+                { AOM_CDF4(17844, 29564, 31767) },
+                { AOM_CDF4(11690, 24438, 29171) },
+                { AOM_CDF4(7542, 18215, 24459) },
+                { AOM_CDF4(2993, 8050, 12319) },
+                { AOM_CDF4(28023, 32328, 32591) },
+                { AOM_CDF4(18651, 30126, 31954) },
+                { AOM_CDF4(12164, 25146, 29589) },
+                { AOM_CDF4(7762, 18530, 24771) },
+                { AOM_CDF4(3492, 9183, 13920) },
+                { AOM_CDF4(27591, 32008, 32491) },
+                { AOM_CDF4(17149, 28853, 31510) },
+                { AOM_CDF4(11485, 24003, 28860) },
+                { AOM_CDF4(7697, 18086, 24210) },
+                { AOM_CDF4(3075, 7999, 12218) },
+                { AOM_CDF4(28268, 32482, 32654) },
+                { AOM_CDF4(19631, 31051, 32404) },
+                { AOM_CDF4(13860, 27260, 31020) },
+                { AOM_CDF4(9605, 21613, 27594) },
+                { AOM_CDF4(4876, 12162, 17908) },
+                { AOM_CDF4(27248, 32316, 32576) },
+                { AOM_CDF4(18955, 30457, 32075) },
+                { AOM_CDF4(11824, 23997, 28795) },
+                { AOM_CDF4(7346, 18196, 24647) },
+                { AOM_CDF4(3403, 9247, 14111) },
+                { AOM_CDF4(29711, 32655, 32735) },
+                { AOM_CDF4(21169, 31394, 32417) },
+                { AOM_CDF4(13487, 27198, 30957) },
+                { AOM_CDF4(8828, 21683, 27614) },
+                { AOM_CDF4(4270, 11451, 17038) },
+                { AOM_CDF4(28708, 32578, 32731) },
+                { AOM_CDF4(20120, 31241, 32482) },
+                { AOM_CDF4(13692, 27550, 31321) },
+                { AOM_CDF4(9418, 22514, 28439) },
+                { AOM_CDF4(4999, 13283, 19462) } },
+              { { AOM_CDF4(5673, 14302, 19711) },
+                { AOM_CDF4(26251, 30701, 31834) },
+                { AOM_CDF4(12782, 23783, 27803) },
+                { AOM_CDF4(9127, 20657, 25808) },
+                { AOM_CDF4(6368, 16208, 21462) },
+                { AOM_CDF4(2465, 7177, 10822) },
+                { AOM_CDF4(29961, 32563, 32719) },
+                { AOM_CDF4(18318, 29891, 31949) },
+                { AOM_CDF4(11361, 24514, 29357) },
+                { AOM_CDF4(7900, 19603, 25607) },
+                { AOM_CDF4(4002, 10590, 15546) },
+                { AOM_CDF4(29637, 32310, 32595) },
+                { AOM_CDF4(18296, 29913, 31809) },
+                { AOM_CDF4(10144, 21515, 26871) },
+                { AOM_CDF4(5358, 14322, 20394) },
+                { AOM_CDF4(3067, 8362, 13346) },
+                { AOM_CDF4(28652, 32470, 32676) },
+                { AOM_CDF4(17538, 30771, 32209) },
+                { AOM_CDF4(13924, 26882, 30494) },
+                { AOM_CDF4(10496, 22837, 27869) },
+                { AOM_CDF4(7236, 16396, 21621) },
+                { AOM_CDF4(30743, 32687, 32746) },
+                { AOM_CDF4(23006, 31676, 32489) },
+                { AOM_CDF4(14494, 27828, 31120) },
+                { AOM_CDF4(10174, 22801, 28352) },
+                { AOM_CDF4(6242, 15281, 21043) },
+                { AOM_CDF4(25817, 32243, 32720) },
+                { AOM_CDF4(18618, 31367, 32325) },
+                { AOM_CDF4(13997, 28318, 31878) },
+                { AOM_CDF4(12255, 26534, 31383) },
+                { AOM_CDF4(9561, 21588, 28450) },
+                { AOM_CDF4(28188, 32635, 32724) },
+                { AOM_CDF4(22060, 32365, 32728) },
+                { AOM_CDF4(18102, 30690, 32528) },
+                { AOM_CDF4(14196, 28864, 31999) },
+                { AOM_CDF4(12262, 25792, 30865) },
+                { AOM_CDF4(24176, 32109, 32628) },
+                { AOM_CDF4(18280, 29681, 31963) },
+                { AOM_CDF4(10205, 23703, 29664) },
+                { AOM_CDF4(7889, 20025, 27676) },
+                { AOM_CDF4(6060, 16743, 23970) } } },
+            { { { AOM_CDF4(5141, 7096, 8260) },
+                { AOM_CDF4(27186, 29022, 29789) },
+                { AOM_CDF4(6668, 12568, 15682) },
+                { AOM_CDF4(2172, 6181, 8638) },
+                { AOM_CDF4(1126, 3379, 4531) },
+                { AOM_CDF4(443, 1361, 2254) },
+                { AOM_CDF4(26083, 31153, 32436) },
+                { AOM_CDF4(13486, 24603, 28483) },
+                { AOM_CDF4(6508, 14840, 19910) },
+                { AOM_CDF4(3386, 8800, 13286) },
+                { AOM_CDF4(1530, 4322, 7054) },
+                { AOM_CDF4(29639, 32080, 32548) },
+                { AOM_CDF4(15897, 27552, 30290) },
+                { AOM_CDF4(8588, 20047, 25383) },
+                { AOM_CDF4(4889, 13339, 19269) },
+                { AOM_CDF4(2240, 6871, 10498) },
+                { AOM_CDF4(28165, 32197, 32517) },
+                { AOM_CDF4(20735, 30427, 31568) },
+                { AOM_CDF4(14325, 24671, 27692) },
+                { AOM_CDF4(5119, 12554, 17805) },
+                { AOM_CDF4(1810, 5441, 8261) },
+                { AOM_CDF4(31212, 32724, 32748) },
+                { AOM_CDF4(23352, 31766, 32545) },
+                { AOM_CDF4(14669, 27570, 31059) },
+                { AOM_CDF4(8492, 20894, 27272) },
+                { AOM_CDF4(3644, 10194, 15204) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(2461, 7013, 9371) },
+                { AOM_CDF4(24749, 29600, 30986) },
+                { AOM_CDF4(9466, 19037, 22417) },
+                { AOM_CDF4(3584, 9280, 14400) },
+                { AOM_CDF4(1505, 3929, 5433) },
+                { AOM_CDF4(677, 1500, 2736) },
+                { AOM_CDF4(23987, 30702, 32117) },
+                { AOM_CDF4(13554, 24571, 29263) },
+                { AOM_CDF4(6211, 14556, 21155) },
+                { AOM_CDF4(3135, 10972, 15625) },
+                { AOM_CDF4(2435, 7127, 11427) },
+                { AOM_CDF4(31300, 32532, 32550) },
+                { AOM_CDF4(14757, 30365, 31954) },
+                { AOM_CDF4(4405, 11612, 18553) },
+                { AOM_CDF4(580, 4132, 7322) },
+                { AOM_CDF4(1695, 10169, 14124) },
+                { AOM_CDF4(30008, 32282, 32591) },
+                { AOM_CDF4(19244, 30108, 31748) },
+                { AOM_CDF4(11180, 24158, 29555) },
+                { AOM_CDF4(5650, 14972, 19209) },
+                { AOM_CDF4(2114, 5109, 8456) },
+                { AOM_CDF4(31856, 32716, 32748) },
+                { AOM_CDF4(23012, 31664, 32572) },
+                { AOM_CDF4(13694, 26656, 30636) },
+                { AOM_CDF4(8142, 19508, 26093) },
+                { AOM_CDF4(4253, 10955, 16724) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(601, 983, 1311) },
+                { AOM_CDF4(18725, 23406, 28087) },
+                { AOM_CDF4(5461, 8192, 10923) },
+                { AOM_CDF4(3781, 15124, 21425) },
+                { AOM_CDF4(2587, 7761, 12072) },
+                { AOM_CDF4(106, 458, 810) },
+                { AOM_CDF4(22282, 29710, 31894) },
+                { AOM_CDF4(8508, 20926, 25984) },
+                { AOM_CDF4(3726, 12713, 18083) },
+                { AOM_CDF4(1620, 7112, 10893) },
+                { AOM_CDF4(729, 2236, 3495) },
+                { AOM_CDF4(30163, 32474, 32684) },
+                { AOM_CDF4(18304, 30464, 32000) },
+                { AOM_CDF4(11443, 26526, 29647) },
+                { AOM_CDF4(6007, 15292, 21299) },
+                { AOM_CDF4(2234, 6703, 8937) },
+                { AOM_CDF4(30954, 32177, 32571) },
+                { AOM_CDF4(17363, 29562, 31076) },
+                { AOM_CDF4(9686, 22464, 27410) },
+                { AOM_CDF4(8192, 16384, 21390) },
+                { AOM_CDF4(1755, 8046, 11264) },
+                { AOM_CDF4(31168, 32734, 32748) },
+                { AOM_CDF4(22486, 31441, 32471) },
+                { AOM_CDF4(12833, 25627, 29738) },
+                { AOM_CDF4(6980, 17379, 23122) },
+                { AOM_CDF4(3111, 8887, 13479) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } }
+        }, .dc_sign = {
+            {
+                { AOM_CDF2(128 * 125) },
+                { AOM_CDF2(128 * 102) },
+                { AOM_CDF2(128 * 147) },
+            }, {
+                { AOM_CDF2(128 * 119) },
+                { AOM_CDF2(128 * 101) },
+                { AOM_CDF2(128 * 135) },
+            }
+        }, .br_tok = {
+            { { { AOM_CDF4(14298, 20718, 24174) },
+                { AOM_CDF4(12536, 19601, 23789) },
+                { AOM_CDF4(8712, 15051, 19503) },
+                { AOM_CDF4(6170, 11327, 15434) },
+                { AOM_CDF4(4742, 8926, 12538) },
+                { AOM_CDF4(3803, 7317, 10546) },
+                { AOM_CDF4(1696, 3317, 4871) },
+                { AOM_CDF4(14392, 19951, 22756) },
+                { AOM_CDF4(15978, 23218, 26818) },
+                { AOM_CDF4(12187, 19474, 23889) },
+                { AOM_CDF4(9176, 15640, 20259) },
+                { AOM_CDF4(7068, 12655, 17028) },
+                { AOM_CDF4(5656, 10442, 14472) },
+                { AOM_CDF4(2580, 4992, 7244) },
+                { AOM_CDF4(12136, 18049, 21426) },
+                { AOM_CDF4(13784, 20721, 24481) },
+                { AOM_CDF4(10836, 17621, 21900) },
+                { AOM_CDF4(8372, 14444, 18847) },
+                { AOM_CDF4(6523, 11779, 16000) },
+                { AOM_CDF4(5337, 9898, 13760) },
+                { AOM_CDF4(3034, 5860, 8462) } },
+              { { AOM_CDF4(15967, 22905, 26286) },
+                { AOM_CDF4(13534, 20654, 24579) },
+                { AOM_CDF4(9504, 16092, 20535) },
+                { AOM_CDF4(6975, 12568, 16903) },
+                { AOM_CDF4(5364, 10091, 14020) },
+                { AOM_CDF4(4357, 8370, 11857) },
+                { AOM_CDF4(2506, 4934, 7218) },
+                { AOM_CDF4(23032, 28815, 30936) },
+                { AOM_CDF4(19540, 26704, 29719) },
+                { AOM_CDF4(15158, 22969, 27097) },
+                { AOM_CDF4(11408, 18865, 23650) },
+                { AOM_CDF4(8885, 15448, 20250) },
+                { AOM_CDF4(7108, 12853, 17416) },
+                { AOM_CDF4(4231, 8041, 11480) },
+                { AOM_CDF4(19823, 26490, 29156) },
+                { AOM_CDF4(18890, 25929, 28932) },
+                { AOM_CDF4(15660, 23491, 27433) },
+                { AOM_CDF4(12147, 19776, 24488) },
+                { AOM_CDF4(9728, 16774, 21649) },
+                { AOM_CDF4(7919, 14277, 19066) },
+                { AOM_CDF4(5440, 10170, 14185) } } },
+            { { { AOM_CDF4(14406, 20862, 24414) },
+                { AOM_CDF4(11824, 18907, 23109) },
+                { AOM_CDF4(8257, 14393, 18803) },
+                { AOM_CDF4(5860, 10747, 14778) },
+                { AOM_CDF4(4475, 8486, 11984) },
+                { AOM_CDF4(3606, 6954, 10043) },
+                { AOM_CDF4(1736, 3410, 5048) },
+                { AOM_CDF4(14430, 20046, 22882) },
+                { AOM_CDF4(15593, 22899, 26709) },
+                { AOM_CDF4(12102, 19368, 23811) },
+                { AOM_CDF4(9059, 15584, 20262) },
+                { AOM_CDF4(6999, 12603, 17048) },
+                { AOM_CDF4(5684, 10497, 14553) },
+                { AOM_CDF4(2822, 5438, 7862) },
+                { AOM_CDF4(15785, 21585, 24359) },
+                { AOM_CDF4(18347, 25229, 28266) },
+                { AOM_CDF4(14974, 22487, 26389) },
+                { AOM_CDF4(11423, 18681, 23271) },
+                { AOM_CDF4(8863, 15350, 20008) },
+                { AOM_CDF4(7153, 12852, 17278) },
+                { AOM_CDF4(3707, 7036, 9982) } },
+              { { AOM_CDF4(15460, 21696, 25469) },
+                { AOM_CDF4(12170, 19249, 23191) },
+                { AOM_CDF4(8723, 15027, 19332) },
+                { AOM_CDF4(6428, 11704, 15874) },
+                { AOM_CDF4(4922, 9292, 13052) },
+                { AOM_CDF4(4139, 7695, 11010) },
+                { AOM_CDF4(2291, 4508, 6598) },
+                { AOM_CDF4(19856, 26920, 29828) },
+                { AOM_CDF4(17923, 25289, 28792) },
+                { AOM_CDF4(14278, 21968, 26297) },
+                { AOM_CDF4(10910, 18136, 22950) },
+                { AOM_CDF4(8423, 14815, 19627) },
+                { AOM_CDF4(6771, 12283, 16774) },
+                { AOM_CDF4(4074, 7750, 11081) },
+                { AOM_CDF4(19852, 26074, 28672) },
+                { AOM_CDF4(19371, 26110, 28989) },
+                { AOM_CDF4(16265, 23873, 27663) },
+                { AOM_CDF4(12758, 20378, 24952) },
+                { AOM_CDF4(10095, 17098, 21961) },
+                { AOM_CDF4(8250, 14628, 19451) },
+                { AOM_CDF4(5205, 9745, 13622) } } },
+            { { { AOM_CDF4(10563, 16233, 19763) },
+                { AOM_CDF4(9794, 16022, 19804) },
+                { AOM_CDF4(6750, 11945, 15759) },
+                { AOM_CDF4(4963, 9186, 12752) },
+                { AOM_CDF4(3845, 7435, 10627) },
+                { AOM_CDF4(3051, 6085, 8834) },
+                { AOM_CDF4(1311, 2596, 3830) },
+                { AOM_CDF4(11246, 16404, 19689) },
+                { AOM_CDF4(12315, 18911, 22731) },
+                { AOM_CDF4(10557, 17095, 21289) },
+                { AOM_CDF4(8136, 14006, 18249) },
+                { AOM_CDF4(6348, 11474, 15565) },
+                { AOM_CDF4(5196, 9655, 13400) },
+                { AOM_CDF4(2349, 4526, 6587) },
+                { AOM_CDF4(13337, 18730, 21569) },
+                { AOM_CDF4(19306, 26071, 28882) },
+                { AOM_CDF4(15952, 23540, 27254) },
+                { AOM_CDF4(12409, 19934, 24430) },
+                { AOM_CDF4(9760, 16706, 21389) },
+                { AOM_CDF4(8004, 14220, 18818) },
+                { AOM_CDF4(4138, 7794, 10961) } },
+              { { AOM_CDF4(10870, 16684, 20949) },
+                { AOM_CDF4(9664, 15230, 18680) },
+                { AOM_CDF4(6886, 12109, 15408) },
+                { AOM_CDF4(4825, 8900, 12305) },
+                { AOM_CDF4(3630, 7162, 10314) },
+                { AOM_CDF4(3036, 6429, 9387) },
+                { AOM_CDF4(1671, 3296, 4940) },
+                { AOM_CDF4(13819, 19159, 23026) },
+                { AOM_CDF4(11984, 19108, 23120) },
+                { AOM_CDF4(10690, 17210, 21663) },
+                { AOM_CDF4(7984, 14154, 18333) },
+                { AOM_CDF4(6868, 12294, 16124) },
+                { AOM_CDF4(5274, 8994, 12868) },
+                { AOM_CDF4(2988, 5771, 8424) },
+                { AOM_CDF4(19736, 26647, 29141) },
+                { AOM_CDF4(18933, 26070, 28984) },
+                { AOM_CDF4(15779, 23048, 27200) },
+                { AOM_CDF4(12638, 20061, 24532) },
+                { AOM_CDF4(10692, 17545, 22220) },
+                { AOM_CDF4(9217, 15251, 20054) },
+                { AOM_CDF4(5078, 9284, 12594) } } },
+            { { { AOM_CDF4(2331, 3662, 5244) },
+                { AOM_CDF4(2891, 4771, 6145) },
+                { AOM_CDF4(4598, 7623, 9729) },
+                { AOM_CDF4(3520, 6845, 9199) },
+                { AOM_CDF4(3417, 6119, 9324) },
+                { AOM_CDF4(2601, 5412, 7385) },
+                { AOM_CDF4(600, 1173, 1744) },
+                { AOM_CDF4(7672, 13286, 17469) },
+                { AOM_CDF4(4232, 7792, 10793) },
+                { AOM_CDF4(2915, 5317, 7397) },
+                { AOM_CDF4(2318, 4356, 6152) },
+                { AOM_CDF4(2127, 4000, 5554) },
+                { AOM_CDF4(1850, 3478, 5275) },
+                { AOM_CDF4(977, 1933, 2843) },
+                { AOM_CDF4(18280, 24387, 27989) },
+                { AOM_CDF4(15852, 22671, 26185) },
+                { AOM_CDF4(13845, 20951, 24789) },
+                { AOM_CDF4(11055, 17966, 22129) },
+                { AOM_CDF4(9138, 15422, 19801) },
+                { AOM_CDF4(7454, 13145, 17456) },
+                { AOM_CDF4(3370, 6393, 9013) } },
+              { { AOM_CDF4(5842, 9229, 10838) },
+                { AOM_CDF4(2313, 3491, 4276) },
+                { AOM_CDF4(2998, 6104, 7496) },
+                { AOM_CDF4(2420, 7447, 9868) },
+                { AOM_CDF4(3034, 8495, 10923) },
+                { AOM_CDF4(4076, 8937, 10975) },
+                { AOM_CDF4(1086, 2370, 3299) },
+                { AOM_CDF4(9714, 17254, 20444) },
+                { AOM_CDF4(8543, 13698, 17123) },
+                { AOM_CDF4(4918, 9007, 11910) },
+                { AOM_CDF4(4129, 7532, 10553) },
+                { AOM_CDF4(2364, 5533, 8058) },
+                { AOM_CDF4(1834, 3546, 5563) },
+                { AOM_CDF4(1473, 2908, 4133) },
+                { AOM_CDF4(15405, 21193, 25619) },
+                { AOM_CDF4(15691, 21952, 26561) },
+                { AOM_CDF4(12962, 19194, 24165) },
+                { AOM_CDF4(10272, 17855, 22129) },
+                { AOM_CDF4(8588, 15270, 20718) },
+                { AOM_CDF4(8682, 14669, 19500) },
+                { AOM_CDF4(4870, 9636, 13205) } } }
+        },
+    }, [1] = {
+        .skip = {
+            { { AOM_CDF2(30371) },
+                { AOM_CDF2(7570) },
+                { AOM_CDF2(13155) },
+                { AOM_CDF2(20751) },
+                { AOM_CDF2(20969) },
+                { AOM_CDF2(27067) },
+                { AOM_CDF2(32013) },
+                { AOM_CDF2(5495) },
+                { AOM_CDF2(17942) },
+                { AOM_CDF2(28280) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(31782) },
+                { AOM_CDF2(1836) },
+                { AOM_CDF2(10689) },
+                { AOM_CDF2(17604) },
+                { AOM_CDF2(21622) },
+                { AOM_CDF2(27518) },
+                { AOM_CDF2(32399) },
+                { AOM_CDF2(4419) },
+                { AOM_CDF2(16294) },
+                { AOM_CDF2(28345) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(31901) },
+                { AOM_CDF2(10311) },
+                { AOM_CDF2(18047) },
+                { AOM_CDF2(24806) },
+                { AOM_CDF2(23288) },
+                { AOM_CDF2(27914) },
+                { AOM_CDF2(32296) },
+                { AOM_CDF2(4215) },
+                { AOM_CDF2(15756) },
+                { AOM_CDF2(28341) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(26726) },
+                { AOM_CDF2(1045) },
+                { AOM_CDF2(11703) },
+                { AOM_CDF2(20590) },
+                { AOM_CDF2(18554) },
+                { AOM_CDF2(25970) },
+                { AOM_CDF2(31938) },
+                { AOM_CDF2(5583) },
+                { AOM_CDF2(21313) },
+                { AOM_CDF2(29390) },
+                { AOM_CDF2(641) },
+                { AOM_CDF2(22265) },
+                { AOM_CDF2(31452) } },
+            { { AOM_CDF2(26584) },
+                { AOM_CDF2(188) },
+                { AOM_CDF2(8847) },
+                { AOM_CDF2(24519) },
+                { AOM_CDF2(22938) },
+                { AOM_CDF2(30583) },
+                { AOM_CDF2(32608) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } }
+        }, .eob_bin_16 = {
+            { { AOM_CDF5(2125, 2551, 5165, 8946) },
+              { AOM_CDF5(513, 765, 1859, 6339) } },
+            { { AOM_CDF5(7637, 9498, 14259, 19108) },
+              { AOM_CDF5(2497, 4096, 8866, 16993) } }
+        }, .eob_bin_32 = {
+            { { AOM_CDF6(989, 1249, 2019, 4151, 10785) },
+              { AOM_CDF6(313, 441, 1099, 2917, 8562) } },
+            { { AOM_CDF6(8394, 10352, 13932, 18855, 26014) },
+              { AOM_CDF6(2578, 4124, 8181, 13670, 24234) } }
+        }, .eob_bin_64 = {
+            { { AOM_CDF7(1260, 1446, 2253, 3712, 6652, 13369) },
+              { AOM_CDF7(401, 605, 1029, 2563, 5845, 12626) } },
+            { { AOM_CDF7(8609, 10612, 14624, 18714, 22614, 29024) },
+              { AOM_CDF7(1923, 3127, 5867, 9703, 14277, 27100) } }
+        }, .eob_bin_128 = {
+            { { AOM_CDF8(685, 933, 1488, 2714, 4766, 8562, 19254) },
+              { AOM_CDF8(217, 352, 618, 2303, 5261, 9969, 17472) } },
+            { { AOM_CDF8(8045, 11200, 15497, 19595, 23948, 27408, 30938) },
+              { AOM_CDF8(2310, 4160, 7471, 14997, 17931, 20768, 30240) } }
+        }, .eob_bin_256 = {
+            { { AOM_CDF9(1448, 2109, 4151, 6263, 9329, 13260, 17944, 23300) },
+              { AOM_CDF9(399, 1019, 1749, 3038, 10444, 15546, 22739, 27294) } },
+            { { AOM_CDF9(6402, 8148, 12623, 15072, 18728, 22847, 26447, 29377) },
+              { AOM_CDF9(1674, 3252, 5734, 10159, 22397, 23802, 24821, 30940) } }
+        }, .eob_bin_512 = {
+            { { AOM_CDF10(1230, 2278, 5035, 7776, 11871, 15346, 19590,
+                          24584, 28749) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } },
+            { { AOM_CDF10(7265, 9979, 15819, 19250, 21780, 23846, 26478,
+                          28396, 31811) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } }
+        }, .eob_bin_1024 = {
+            { { AOM_CDF11(696, 948, 3145, 5702, 9706, 13217, 17851,
+                          21856, 25692, 28034) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } },
+            { { AOM_CDF11(2672, 3591, 9330, 17084, 22725, 24284, 26527,
+                          28027, 28377, 30876) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } }
+        }, .eob_hi_bit = {
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(17471) }, { AOM_CDF2(20223) }, { AOM_CDF2(11357) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20335) }, { AOM_CDF2(21667) }, { AOM_CDF2(14818) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20430) }, { AOM_CDF2(20662) }, { AOM_CDF2(15367) },
+                { AOM_CDF2(16970) }, { AOM_CDF2(14657) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(22117) }, { AOM_CDF2(22028) }, { AOM_CDF2(18650) },
+                { AOM_CDF2(16042) }, { AOM_CDF2(15885) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(22409) }, { AOM_CDF2(21012) }, { AOM_CDF2(15650) },
+                { AOM_CDF2(17395) }, { AOM_CDF2(15469) }, { AOM_CDF2(20205) },
+                { AOM_CDF2(19511) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(24220) }, { AOM_CDF2(22480) }, { AOM_CDF2(17737) },
+                { AOM_CDF2(18916) }, { AOM_CDF2(19268) }, { AOM_CDF2(18412) },
+                { AOM_CDF2(18844) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(25991) }, { AOM_CDF2(20314) }, { AOM_CDF2(17731) },
+                { AOM_CDF2(19678) }, { AOM_CDF2(18649) }, { AOM_CDF2(17307) },
+                { AOM_CDF2(21798) }, { AOM_CDF2(17549) }, { AOM_CDF2(15630) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(26585) }, { AOM_CDF2(21469) }, { AOM_CDF2(20432) },
+                { AOM_CDF2(17735) }, { AOM_CDF2(19280) }, { AOM_CDF2(15235) },
+                { AOM_CDF2(20297) }, { AOM_CDF2(22471) }, { AOM_CDF2(28997) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(26605) }, { AOM_CDF2(11304) }, { AOM_CDF2(16726) },
+                { AOM_CDF2(16560) }, { AOM_CDF2(20866) }, { AOM_CDF2(23524) },
+                { AOM_CDF2(19878) }, { AOM_CDF2(13469) }, { AOM_CDF2(23084) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } }
+        }, .eob_base_tok = {
+            { { { AOM_CDF3(17560, 29888) },
+                { AOM_CDF3(29671, 31549) },
+                { AOM_CDF3(31007, 32056) },
+                { AOM_CDF3(27286, 30006) } },
+              { { AOM_CDF3(26594, 31212) },
+                { AOM_CDF3(31208, 32582) },
+                { AOM_CDF3(31835, 32637) },
+                { AOM_CDF3(30595, 32206) } } },
+            { { { AOM_CDF3(15239, 29932) },
+                { AOM_CDF3(31315, 32095) },
+                { AOM_CDF3(32130, 32434) },
+                { AOM_CDF3(30864, 31996) } },
+              { { AOM_CDF3(26279, 30968) },
+                { AOM_CDF3(31142, 32495) },
+                { AOM_CDF3(31713, 32540) },
+                { AOM_CDF3(31929, 32594) } } },
+            { { { AOM_CDF3(2644, 25198) },
+                { AOM_CDF3(32038, 32451) },
+                { AOM_CDF3(32639, 32695) },
+                { AOM_CDF3(32166, 32518) } },
+              { { AOM_CDF3(17187, 27668) },
+                { AOM_CDF3(31714, 32550) },
+                { AOM_CDF3(32283, 32678) },
+                { AOM_CDF3(31930, 32563) } } },
+            { { { AOM_CDF3(1044, 2257) },
+                { AOM_CDF3(30755, 31923) },
+                { AOM_CDF3(32208, 32693) },
+                { AOM_CDF3(32244, 32615) } },
+              { { AOM_CDF3(21317, 26207) },
+                { AOM_CDF3(29133, 30868) },
+                { AOM_CDF3(29311, 31231) },
+                { AOM_CDF3(29657, 31087) } } },
+            { { { AOM_CDF3(478, 1834) },
+                { AOM_CDF3(31005, 31987) },
+                { AOM_CDF3(32317, 32724) },
+                { AOM_CDF3(30865, 32648) } },
+              { { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) } } }
+        }, .base_tok = {
+            { { { AOM_CDF4(6041, 11854, 15927) },
+                { AOM_CDF4(20326, 30905, 32251) },
+                { AOM_CDF4(14164, 26831, 30725) },
+                { AOM_CDF4(9760, 20647, 26585) },
+                { AOM_CDF4(6416, 14953, 21219) },
+                { AOM_CDF4(2966, 7151, 10891) },
+                { AOM_CDF4(23567, 31374, 32254) },
+                { AOM_CDF4(14978, 27416, 30946) },
+                { AOM_CDF4(9434, 20225, 26254) },
+                { AOM_CDF4(6658, 14558, 20535) },
+                { AOM_CDF4(3916, 8677, 12989) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(18088, 29545, 31587) },
+                { AOM_CDF4(13062, 25843, 30073) },
+                { AOM_CDF4(8940, 16827, 22251) },
+                { AOM_CDF4(7654, 13220, 17973) },
+                { AOM_CDF4(5733, 10316, 14456) },
+                { AOM_CDF4(22879, 31388, 32114) },
+                { AOM_CDF4(15215, 27993, 30955) },
+                { AOM_CDF4(9397, 19445, 24978) },
+                { AOM_CDF4(3442, 9813, 15344) },
+                { AOM_CDF4(1368, 3936, 6532) },
+                { AOM_CDF4(25494, 32033, 32406) },
+                { AOM_CDF4(16772, 27963, 30718) },
+                { AOM_CDF4(9419, 18165, 23260) },
+                { AOM_CDF4(2677, 7501, 11797) },
+                { AOM_CDF4(1516, 4344, 7170) },
+                { AOM_CDF4(26556, 31454, 32101) },
+                { AOM_CDF4(17128, 27035, 30108) },
+                { AOM_CDF4(8324, 15344, 20249) },
+                { AOM_CDF4(1903, 5696, 9469) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(8455, 19003, 24368) },
+                { AOM_CDF4(23563, 32021, 32604) },
+                { AOM_CDF4(16237, 29446, 31935) },
+                { AOM_CDF4(10724, 23999, 29358) },
+                { AOM_CDF4(6725, 17528, 24416) },
+                { AOM_CDF4(3927, 10927, 16825) },
+                { AOM_CDF4(26313, 32288, 32634) },
+                { AOM_CDF4(17430, 30095, 32095) },
+                { AOM_CDF4(11116, 24606, 29679) },
+                { AOM_CDF4(7195, 18384, 25269) },
+                { AOM_CDF4(4726, 12852, 19315) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(22822, 31648, 32483) },
+                { AOM_CDF4(16724, 29633, 31929) },
+                { AOM_CDF4(10261, 23033, 28725) },
+                { AOM_CDF4(7029, 17840, 24528) },
+                { AOM_CDF4(4867, 13886, 21502) },
+                { AOM_CDF4(25298, 31892, 32491) },
+                { AOM_CDF4(17809, 29330, 31512) },
+                { AOM_CDF4(9668, 21329, 26579) },
+                { AOM_CDF4(4774, 12956, 18976) },
+                { AOM_CDF4(2322, 7030, 11540) },
+                { AOM_CDF4(25472, 31920, 32543) },
+                { AOM_CDF4(17957, 29387, 31632) },
+                { AOM_CDF4(9196, 20593, 26400) },
+                { AOM_CDF4(4680, 12705, 19202) },
+                { AOM_CDF4(2917, 8456, 13436) },
+                { AOM_CDF4(26471, 32059, 32574) },
+                { AOM_CDF4(18458, 29783, 31909) },
+                { AOM_CDF4(8400, 19464, 25956) },
+                { AOM_CDF4(3812, 10973, 17206) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(6779, 13743, 17678) },
+                { AOM_CDF4(24806, 31797, 32457) },
+                { AOM_CDF4(17616, 29047, 31372) },
+                { AOM_CDF4(11063, 23175, 28003) },
+                { AOM_CDF4(6521, 16110, 22324) },
+                { AOM_CDF4(2764, 7504, 11654) },
+                { AOM_CDF4(25266, 32367, 32637) },
+                { AOM_CDF4(19054, 30553, 32175) },
+                { AOM_CDF4(12139, 25212, 29807) },
+                { AOM_CDF4(7311, 18162, 24704) },
+                { AOM_CDF4(3397, 9164, 14074) },
+                { AOM_CDF4(25988, 32208, 32522) },
+                { AOM_CDF4(16253, 28912, 31526) },
+                { AOM_CDF4(9151, 21387, 27372) },
+                { AOM_CDF4(5688, 14915, 21496) },
+                { AOM_CDF4(2717, 7627, 12004) },
+                { AOM_CDF4(23144, 31855, 32443) },
+                { AOM_CDF4(16070, 28491, 31325) },
+                { AOM_CDF4(8702, 20467, 26517) },
+                { AOM_CDF4(5243, 13956, 20367) },
+                { AOM_CDF4(2621, 7335, 11567) },
+                { AOM_CDF4(26636, 32340, 32630) },
+                { AOM_CDF4(19990, 31050, 32341) },
+                { AOM_CDF4(13243, 26105, 30315) },
+                { AOM_CDF4(8588, 19521, 25918) },
+                { AOM_CDF4(4717, 11585, 17304) },
+                { AOM_CDF4(25844, 32292, 32582) },
+                { AOM_CDF4(19090, 30635, 32097) },
+                { AOM_CDF4(11963, 24546, 28939) },
+                { AOM_CDF4(6218, 16087, 22354) },
+                { AOM_CDF4(2340, 6608, 10426) },
+                { AOM_CDF4(28046, 32576, 32694) },
+                { AOM_CDF4(21178, 31313, 32296) },
+                { AOM_CDF4(13486, 26184, 29870) },
+                { AOM_CDF4(7149, 17871, 23723) },
+                { AOM_CDF4(2833, 7958, 12259) },
+                { AOM_CDF4(27710, 32528, 32686) },
+                { AOM_CDF4(20674, 31076, 32268) },
+                { AOM_CDF4(12413, 24955, 29243) },
+                { AOM_CDF4(6676, 16927, 23097) },
+                { AOM_CDF4(2966, 8333, 12919) } },
+              { { AOM_CDF4(8639, 19339, 24429) },
+                { AOM_CDF4(24404, 31837, 32525) },
+                { AOM_CDF4(16997, 29425, 31784) },
+                { AOM_CDF4(11253, 24234, 29149) },
+                { AOM_CDF4(6751, 17394, 24028) },
+                { AOM_CDF4(3490, 9830, 15191) },
+                { AOM_CDF4(26283, 32471, 32714) },
+                { AOM_CDF4(19599, 31168, 32442) },
+                { AOM_CDF4(13146, 26954, 30893) },
+                { AOM_CDF4(8214, 20588, 26890) },
+                { AOM_CDF4(4699, 13081, 19300) },
+                { AOM_CDF4(28212, 32458, 32669) },
+                { AOM_CDF4(18594, 30316, 32100) },
+                { AOM_CDF4(11219, 24408, 29234) },
+                { AOM_CDF4(6865, 17656, 24149) },
+                { AOM_CDF4(3678, 10362, 16006) },
+                { AOM_CDF4(25825, 32136, 32616) },
+                { AOM_CDF4(17313, 29853, 32021) },
+                { AOM_CDF4(11197, 24471, 29472) },
+                { AOM_CDF4(6947, 17781, 24405) },
+                { AOM_CDF4(3768, 10660, 16261) },
+                { AOM_CDF4(27352, 32500, 32706) },
+                { AOM_CDF4(20850, 31468, 32469) },
+                { AOM_CDF4(14021, 27707, 31133) },
+                { AOM_CDF4(8964, 21748, 27838) },
+                { AOM_CDF4(5437, 14665, 21187) },
+                { AOM_CDF4(26304, 32492, 32698) },
+                { AOM_CDF4(20409, 31380, 32385) },
+                { AOM_CDF4(13682, 27222, 30632) },
+                { AOM_CDF4(8974, 21236, 26685) },
+                { AOM_CDF4(4234, 11665, 16934) },
+                { AOM_CDF4(26273, 32357, 32711) },
+                { AOM_CDF4(20672, 31242, 32441) },
+                { AOM_CDF4(14172, 27254, 30902) },
+                { AOM_CDF4(9870, 21898, 27275) },
+                { AOM_CDF4(5164, 13506, 19270) },
+                { AOM_CDF4(26725, 32459, 32728) },
+                { AOM_CDF4(20991, 31442, 32527) },
+                { AOM_CDF4(13071, 26434, 30811) },
+                { AOM_CDF4(8184, 20090, 26742) },
+                { AOM_CDF4(4803, 13255, 19895) } } },
+            { { { AOM_CDF4(7555, 14942, 18501) },
+                { AOM_CDF4(24410, 31178, 32287) },
+                { AOM_CDF4(14394, 26738, 30253) },
+                { AOM_CDF4(8413, 19554, 25195) },
+                { AOM_CDF4(4766, 12924, 18785) },
+                { AOM_CDF4(2029, 5806, 9207) },
+                { AOM_CDF4(26776, 32364, 32663) },
+                { AOM_CDF4(18732, 29967, 31931) },
+                { AOM_CDF4(11005, 23786, 28852) },
+                { AOM_CDF4(6466, 16909, 23510) },
+                { AOM_CDF4(3044, 8638, 13419) },
+                { AOM_CDF4(29208, 32582, 32704) },
+                { AOM_CDF4(20068, 30857, 32208) },
+                { AOM_CDF4(12003, 25085, 29595) },
+                { AOM_CDF4(6947, 17750, 24189) },
+                { AOM_CDF4(3245, 9103, 14007) },
+                { AOM_CDF4(27359, 32465, 32669) },
+                { AOM_CDF4(19421, 30614, 32174) },
+                { AOM_CDF4(11915, 25010, 29579) },
+                { AOM_CDF4(6950, 17676, 24074) },
+                { AOM_CDF4(3007, 8473, 13096) },
+                { AOM_CDF4(29002, 32676, 32735) },
+                { AOM_CDF4(22102, 31849, 32576) },
+                { AOM_CDF4(14408, 28009, 31405) },
+                { AOM_CDF4(9027, 21679, 27931) },
+                { AOM_CDF4(4694, 12678, 18748) },
+                { AOM_CDF4(28216, 32528, 32682) },
+                { AOM_CDF4(20849, 31264, 32318) },
+                { AOM_CDF4(12756, 25815, 29751) },
+                { AOM_CDF4(7565, 18801, 24923) },
+                { AOM_CDF4(3509, 9533, 14477) },
+                { AOM_CDF4(30133, 32687, 32739) },
+                { AOM_CDF4(23063, 31910, 32515) },
+                { AOM_CDF4(14588, 28051, 31132) },
+                { AOM_CDF4(9085, 21649, 27457) },
+                { AOM_CDF4(4261, 11654, 17264) },
+                { AOM_CDF4(29518, 32691, 32748) },
+                { AOM_CDF4(22451, 31959, 32613) },
+                { AOM_CDF4(14864, 28722, 31700) },
+                { AOM_CDF4(9695, 22964, 28716) },
+                { AOM_CDF4(4932, 13358, 19502) } },
+              { { AOM_CDF4(6465, 16958, 21688) },
+                { AOM_CDF4(25199, 31514, 32360) },
+                { AOM_CDF4(14774, 27149, 30607) },
+                { AOM_CDF4(9257, 21438, 26972) },
+                { AOM_CDF4(5723, 15183, 21882) },
+                { AOM_CDF4(3150, 8879, 13731) },
+                { AOM_CDF4(26989, 32262, 32682) },
+                { AOM_CDF4(17396, 29937, 32085) },
+                { AOM_CDF4(11387, 24901, 29784) },
+                { AOM_CDF4(7289, 18821, 25548) },
+                { AOM_CDF4(3734, 10577, 16086) },
+                { AOM_CDF4(29728, 32501, 32695) },
+                { AOM_CDF4(17431, 29701, 31903) },
+                { AOM_CDF4(9921, 22826, 28300) },
+                { AOM_CDF4(5896, 15434, 22068) },
+                { AOM_CDF4(3430, 9646, 14757) },
+                { AOM_CDF4(28614, 32511, 32705) },
+                { AOM_CDF4(19364, 30638, 32263) },
+                { AOM_CDF4(13129, 26254, 30402) },
+                { AOM_CDF4(8754, 20484, 26440) },
+                { AOM_CDF4(4378, 11607, 17110) },
+                { AOM_CDF4(30292, 32671, 32744) },
+                { AOM_CDF4(21780, 31603, 32501) },
+                { AOM_CDF4(14314, 27829, 31291) },
+                { AOM_CDF4(9611, 22327, 28263) },
+                { AOM_CDF4(4890, 13087, 19065) },
+                { AOM_CDF4(25862, 32567, 32733) },
+                { AOM_CDF4(20794, 32050, 32567) },
+                { AOM_CDF4(17243, 30625, 32254) },
+                { AOM_CDF4(13283, 27628, 31474) },
+                { AOM_CDF4(9669, 22532, 28918) },
+                { AOM_CDF4(27435, 32697, 32748) },
+                { AOM_CDF4(24922, 32390, 32714) },
+                { AOM_CDF4(21449, 31504, 32536) },
+                { AOM_CDF4(16392, 29729, 31832) },
+                { AOM_CDF4(11692, 24884, 29076) },
+                { AOM_CDF4(24193, 32290, 32735) },
+                { AOM_CDF4(18909, 31104, 32563) },
+                { AOM_CDF4(12236, 26841, 31403) },
+                { AOM_CDF4(8171, 21840, 29082) },
+                { AOM_CDF4(7224, 17280, 25275) } } },
+            { { { AOM_CDF4(3078, 6839, 9890) },
+                { AOM_CDF4(13837, 20450, 24479) },
+                { AOM_CDF4(5914, 14222, 19328) },
+                { AOM_CDF4(3866, 10267, 14762) },
+                { AOM_CDF4(2612, 7208, 11042) },
+                { AOM_CDF4(1067, 2991, 4776) },
+                { AOM_CDF4(25817, 31646, 32529) },
+                { AOM_CDF4(13708, 26338, 30385) },
+                { AOM_CDF4(7328, 18585, 24870) },
+                { AOM_CDF4(4691, 13080, 19276) },
+                { AOM_CDF4(1825, 5253, 8352) },
+                { AOM_CDF4(29386, 32315, 32624) },
+                { AOM_CDF4(17160, 29001, 31360) },
+                { AOM_CDF4(9602, 21862, 27396) },
+                { AOM_CDF4(5915, 15772, 22148) },
+                { AOM_CDF4(2786, 7779, 12047) },
+                { AOM_CDF4(29246, 32450, 32663) },
+                { AOM_CDF4(18696, 29929, 31818) },
+                { AOM_CDF4(10510, 23369, 28560) },
+                { AOM_CDF4(6229, 16499, 23125) },
+                { AOM_CDF4(2608, 7448, 11705) },
+                { AOM_CDF4(30753, 32710, 32748) },
+                { AOM_CDF4(21638, 31487, 32503) },
+                { AOM_CDF4(12937, 26854, 30870) },
+                { AOM_CDF4(8182, 20596, 26970) },
+                { AOM_CDF4(3637, 10269, 15497) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(5244, 12150, 16906) },
+                { AOM_CDF4(20486, 26858, 29701) },
+                { AOM_CDF4(7756, 18317, 23735) },
+                { AOM_CDF4(3452, 9256, 13146) },
+                { AOM_CDF4(2020, 5206, 8229) },
+                { AOM_CDF4(1801, 4993, 7903) },
+                { AOM_CDF4(27051, 31858, 32531) },
+                { AOM_CDF4(15988, 27531, 30619) },
+                { AOM_CDF4(9188, 21484, 26719) },
+                { AOM_CDF4(6273, 17186, 23800) },
+                { AOM_CDF4(3108, 9355, 14764) },
+                { AOM_CDF4(31076, 32520, 32680) },
+                { AOM_CDF4(18119, 30037, 31850) },
+                { AOM_CDF4(10244, 22969, 27472) },
+                { AOM_CDF4(4692, 14077, 19273) },
+                { AOM_CDF4(3694, 11677, 17556) },
+                { AOM_CDF4(30060, 32581, 32720) },
+                { AOM_CDF4(21011, 30775, 32120) },
+                { AOM_CDF4(11931, 24820, 29289) },
+                { AOM_CDF4(7119, 17662, 24356) },
+                { AOM_CDF4(3833, 10706, 16304) },
+                { AOM_CDF4(31954, 32731, 32748) },
+                { AOM_CDF4(23913, 31724, 32489) },
+                { AOM_CDF4(15520, 28060, 31286) },
+                { AOM_CDF4(11517, 23008, 28571) },
+                { AOM_CDF4(6193, 14508, 20629) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(1035, 2807, 4156) },
+                { AOM_CDF4(13162, 18138, 20939) },
+                { AOM_CDF4(2696, 6633, 8755) },
+                { AOM_CDF4(1373, 4161, 6853) },
+                { AOM_CDF4(1099, 2746, 4716) },
+                { AOM_CDF4(340, 1021, 1599) },
+                { AOM_CDF4(22826, 30419, 32135) },
+                { AOM_CDF4(10395, 21762, 26942) },
+                { AOM_CDF4(4726, 12407, 17361) },
+                { AOM_CDF4(2447, 7080, 10593) },
+                { AOM_CDF4(1227, 3717, 6011) },
+                { AOM_CDF4(28156, 31424, 31934) },
+                { AOM_CDF4(16915, 27754, 30373) },
+                { AOM_CDF4(9148, 20990, 26431) },
+                { AOM_CDF4(5950, 15515, 21148) },
+                { AOM_CDF4(2492, 7327, 11526) },
+                { AOM_CDF4(30602, 32477, 32670) },
+                { AOM_CDF4(20026, 29955, 31568) },
+                { AOM_CDF4(11220, 23628, 28105) },
+                { AOM_CDF4(6652, 17019, 22973) },
+                { AOM_CDF4(3064, 8536, 13043) },
+                { AOM_CDF4(31769, 32724, 32748) },
+                { AOM_CDF4(22230, 30887, 32373) },
+                { AOM_CDF4(12234, 25079, 29731) },
+                { AOM_CDF4(7326, 18816, 25353) },
+                { AOM_CDF4(3933, 10907, 16616) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } }
+        }, .dc_sign = {
+            {
+                { AOM_CDF2(128 * 125) },
+                { AOM_CDF2(128 * 102) },
+                { AOM_CDF2(128 * 147) },
+            },
+            {
+                { AOM_CDF2(128 * 119) },
+                { AOM_CDF2(128 * 101) },
+                { AOM_CDF2(128 * 135) },
+            }
+        }, .br_tok = {
+            { { { AOM_CDF4(14995, 21341, 24749) },
+                { AOM_CDF4(13158, 20289, 24601) },
+                { AOM_CDF4(8941, 15326, 19876) },
+                { AOM_CDF4(6297, 11541, 15807) },
+                { AOM_CDF4(4817, 9029, 12776) },
+                { AOM_CDF4(3731, 7273, 10627) },
+                { AOM_CDF4(1847, 3617, 5354) },
+                { AOM_CDF4(14472, 19659, 22343) },
+                { AOM_CDF4(16806, 24162, 27533) },
+                { AOM_CDF4(12900, 20404, 24713) },
+                { AOM_CDF4(9411, 16112, 20797) },
+                { AOM_CDF4(7056, 12697, 17148) },
+                { AOM_CDF4(5544, 10339, 14460) },
+                { AOM_CDF4(2954, 5704, 8319) },
+                { AOM_CDF4(12464, 18071, 21354) },
+                { AOM_CDF4(15482, 22528, 26034) },
+                { AOM_CDF4(12070, 19269, 23624) },
+                { AOM_CDF4(8953, 15406, 20106) },
+                { AOM_CDF4(7027, 12730, 17220) },
+                { AOM_CDF4(5887, 10913, 15140) },
+                { AOM_CDF4(3793, 7278, 10447) } },
+              { { AOM_CDF4(15571, 22232, 25749) },
+                { AOM_CDF4(14506, 21575, 25374) },
+                { AOM_CDF4(10189, 17089, 21569) },
+                { AOM_CDF4(7316, 13301, 17915) },
+                { AOM_CDF4(5783, 10912, 15190) },
+                { AOM_CDF4(4760, 9155, 13088) },
+                { AOM_CDF4(2993, 5966, 8774) },
+                { AOM_CDF4(23424, 28903, 30778) },
+                { AOM_CDF4(20775, 27666, 30290) },
+                { AOM_CDF4(16474, 24410, 28299) },
+                { AOM_CDF4(12471, 20180, 24987) },
+                { AOM_CDF4(9410, 16487, 21439) },
+                { AOM_CDF4(7536, 13614, 18529) },
+                { AOM_CDF4(5048, 9586, 13549) },
+                { AOM_CDF4(21090, 27290, 29756) },
+                { AOM_CDF4(20796, 27402, 30026) },
+                { AOM_CDF4(17819, 25485, 28969) },
+                { AOM_CDF4(13860, 21909, 26462) },
+                { AOM_CDF4(11002, 18494, 23529) },
+                { AOM_CDF4(8953, 15929, 20897) },
+                { AOM_CDF4(6448, 11918, 16454) } } },
+            { { { AOM_CDF4(15999, 22208, 25449) },
+                { AOM_CDF4(13050, 19988, 24122) },
+                { AOM_CDF4(8594, 14864, 19378) },
+                { AOM_CDF4(6033, 11079, 15238) },
+                { AOM_CDF4(4554, 8683, 12347) },
+                { AOM_CDF4(3672, 7139, 10337) },
+                { AOM_CDF4(1900, 3771, 5576) },
+                { AOM_CDF4(15788, 21340, 23949) },
+                { AOM_CDF4(16825, 24235, 27758) },
+                { AOM_CDF4(12873, 20402, 24810) },
+                { AOM_CDF4(9590, 16363, 21094) },
+                { AOM_CDF4(7352, 13209, 17733) },
+                { AOM_CDF4(5960, 10989, 15184) },
+                { AOM_CDF4(3232, 6234, 9007) },
+                { AOM_CDF4(15761, 20716, 23224) },
+                { AOM_CDF4(19318, 25989, 28759) },
+                { AOM_CDF4(15529, 23094, 26929) },
+                { AOM_CDF4(11662, 18989, 23641) },
+                { AOM_CDF4(8955, 15568, 20366) },
+                { AOM_CDF4(7281, 13106, 17708) },
+                { AOM_CDF4(4248, 8059, 11440) } },
+              { { AOM_CDF4(14899, 21217, 24503) },
+                { AOM_CDF4(13519, 20283, 24047) },
+                { AOM_CDF4(9429, 15966, 20365) },
+                { AOM_CDF4(6700, 12355, 16652) },
+                { AOM_CDF4(5088, 9704, 13716) },
+                { AOM_CDF4(4243, 8154, 11731) },
+                { AOM_CDF4(2702, 5364, 7861) },
+                { AOM_CDF4(22745, 28388, 30454) },
+                { AOM_CDF4(20235, 27146, 29922) },
+                { AOM_CDF4(15896, 23715, 27637) },
+                { AOM_CDF4(11840, 19350, 24131) },
+                { AOM_CDF4(9122, 15932, 20880) },
+                { AOM_CDF4(7488, 13581, 18362) },
+                { AOM_CDF4(5114, 9568, 13370) },
+                { AOM_CDF4(20845, 26553, 28932) },
+                { AOM_CDF4(20981, 27372, 29884) },
+                { AOM_CDF4(17781, 25335, 28785) },
+                { AOM_CDF4(13760, 21708, 26297) },
+                { AOM_CDF4(10975, 18415, 23365) },
+                { AOM_CDF4(9045, 15789, 20686) },
+                { AOM_CDF4(6130, 11199, 15423) } } },
+            { { { AOM_CDF4(13549, 19724, 23158) },
+                { AOM_CDF4(11844, 18382, 22246) },
+                { AOM_CDF4(7919, 13619, 17773) },
+                { AOM_CDF4(5486, 10143, 13946) },
+                { AOM_CDF4(4166, 7983, 11324) },
+                { AOM_CDF4(3364, 6506, 9427) },
+                { AOM_CDF4(1598, 3160, 4674) },
+                { AOM_CDF4(15281, 20979, 23781) },
+                { AOM_CDF4(14939, 22119, 25952) },
+                { AOM_CDF4(11363, 18407, 22812) },
+                { AOM_CDF4(8609, 14857, 19370) },
+                { AOM_CDF4(6737, 12184, 16480) },
+                { AOM_CDF4(5506, 10263, 14262) },
+                { AOM_CDF4(2990, 5786, 8380) },
+                { AOM_CDF4(20249, 25253, 27417) },
+                { AOM_CDF4(21070, 27518, 30001) },
+                { AOM_CDF4(16854, 24469, 28074) },
+                { AOM_CDF4(12864, 20486, 25000) },
+                { AOM_CDF4(9962, 16978, 21778) },
+                { AOM_CDF4(8074, 14338, 19048) },
+                { AOM_CDF4(4494, 8479, 11906) } },
+              { { AOM_CDF4(13960, 19617, 22829) },
+                { AOM_CDF4(11150, 17341, 21228) },
+                { AOM_CDF4(7150, 12964, 17190) },
+                { AOM_CDF4(5331, 10002, 13867) },
+                { AOM_CDF4(4167, 7744, 11057) },
+                { AOM_CDF4(3480, 6629, 9646) },
+                { AOM_CDF4(1883, 3784, 5686) },
+                { AOM_CDF4(18752, 25660, 28912) },
+                { AOM_CDF4(16968, 24586, 28030) },
+                { AOM_CDF4(13520, 21055, 25313) },
+                { AOM_CDF4(10453, 17626, 22280) },
+                { AOM_CDF4(8386, 14505, 19116) },
+                { AOM_CDF4(6742, 12595, 17008) },
+                { AOM_CDF4(4273, 8140, 11499) },
+                { AOM_CDF4(22120, 27827, 30233) },
+                { AOM_CDF4(20563, 27358, 29895) },
+                { AOM_CDF4(17076, 24644, 28153) },
+                { AOM_CDF4(13362, 20942, 25309) },
+                { AOM_CDF4(10794, 17965, 22695) },
+                { AOM_CDF4(9014, 15652, 20319) },
+                { AOM_CDF4(5708, 10512, 14497) } } },
+            { { { AOM_CDF4(5705, 10930, 15725) },
+                { AOM_CDF4(7946, 12765, 16115) },
+                { AOM_CDF4(6801, 12123, 16226) },
+                { AOM_CDF4(5462, 10135, 14200) },
+                { AOM_CDF4(4189, 8011, 11507) },
+                { AOM_CDF4(3191, 6229, 9408) },
+                { AOM_CDF4(1057, 2137, 3212) },
+                { AOM_CDF4(10018, 17067, 21491) },
+                { AOM_CDF4(7380, 12582, 16453) },
+                { AOM_CDF4(6068, 10845, 14339) },
+                { AOM_CDF4(5098, 9198, 12555) },
+                { AOM_CDF4(4312, 8010, 11119) },
+                { AOM_CDF4(3700, 6966, 9781) },
+                { AOM_CDF4(1693, 3326, 4887) },
+                { AOM_CDF4(18757, 24930, 27774) },
+                { AOM_CDF4(17648, 24596, 27817) },
+                { AOM_CDF4(14707, 22052, 26026) },
+                { AOM_CDF4(11720, 18852, 23292) },
+                { AOM_CDF4(9357, 15952, 20525) },
+                { AOM_CDF4(7810, 13753, 18210) },
+                { AOM_CDF4(3879, 7333, 10328) } },
+              { { AOM_CDF4(8278, 13242, 15922) },
+                { AOM_CDF4(10547, 15867, 18919) },
+                { AOM_CDF4(9106, 15842, 20609) },
+                { AOM_CDF4(6833, 13007, 17218) },
+                { AOM_CDF4(4811, 9712, 13923) },
+                { AOM_CDF4(3985, 7352, 11128) },
+                { AOM_CDF4(1688, 3458, 5262) },
+                { AOM_CDF4(12951, 21861, 26510) },
+                { AOM_CDF4(9788, 16044, 20276) },
+                { AOM_CDF4(6309, 11244, 14870) },
+                { AOM_CDF4(5183, 9349, 12566) },
+                { AOM_CDF4(4389, 8229, 11492) },
+                { AOM_CDF4(3633, 6945, 10620) },
+                { AOM_CDF4(3600, 6847, 9907) },
+                { AOM_CDF4(21748, 28137, 30255) },
+                { AOM_CDF4(19436, 26581, 29560) },
+                { AOM_CDF4(16359, 24201, 27953) },
+                { AOM_CDF4(13961, 21693, 25871) },
+                { AOM_CDF4(11544, 18686, 23322) },
+                { AOM_CDF4(9372, 16462, 20952) },
+                { AOM_CDF4(6138, 11210, 15390) } } }
+        },
+    }, [2] = {
+        .skip = {
+            { { AOM_CDF2(29614) },
+                { AOM_CDF2(9068) },
+                { AOM_CDF2(12924) },
+                { AOM_CDF2(19538) },
+                { AOM_CDF2(17737) },
+                { AOM_CDF2(24619) },
+                { AOM_CDF2(30642) },
+                { AOM_CDF2(4119) },
+                { AOM_CDF2(16026) },
+                { AOM_CDF2(25657) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(31957) },
+                { AOM_CDF2(3230) },
+                { AOM_CDF2(11153) },
+                { AOM_CDF2(18123) },
+                { AOM_CDF2(20143) },
+                { AOM_CDF2(26536) },
+                { AOM_CDF2(31986) },
+                { AOM_CDF2(3050) },
+                { AOM_CDF2(14603) },
+                { AOM_CDF2(25155) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(32363) },
+                { AOM_CDF2(10692) },
+                { AOM_CDF2(19090) },
+                { AOM_CDF2(24357) },
+                { AOM_CDF2(24442) },
+                { AOM_CDF2(28312) },
+                { AOM_CDF2(32169) },
+                { AOM_CDF2(3648) },
+                { AOM_CDF2(15690) },
+                { AOM_CDF2(26815) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(30669) },
+                { AOM_CDF2(3832) },
+                { AOM_CDF2(11663) },
+                { AOM_CDF2(18889) },
+                { AOM_CDF2(19782) },
+                { AOM_CDF2(23313) },
+                { AOM_CDF2(31330) },
+                { AOM_CDF2(5124) },
+                { AOM_CDF2(18719) },
+                { AOM_CDF2(28468) },
+                { AOM_CDF2(3082) },
+                { AOM_CDF2(20982) },
+                { AOM_CDF2(29443) } },
+            { { AOM_CDF2(28573) },
+                { AOM_CDF2(3183) },
+                { AOM_CDF2(17802) },
+                { AOM_CDF2(25977) },
+                { AOM_CDF2(26677) },
+                { AOM_CDF2(27832) },
+                { AOM_CDF2(32387) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } }
+        }, .eob_bin_16 = {
+            { { AOM_CDF5(4016, 4897, 8881, 14968) },
+              { AOM_CDF5(716, 1105, 2646, 10056) } },
+            { { AOM_CDF5(11139, 13270, 18241, 23566) },
+              { AOM_CDF5(3192, 5032, 10297, 19755) } }
+        }, .eob_bin_32 = {
+            { { AOM_CDF6(2515, 3003, 4452, 8162, 16041) },
+              { AOM_CDF6(574, 821, 1836, 5089, 13128) } },
+            { { AOM_CDF6(13468, 16303, 20361, 25105, 29281) },
+              { AOM_CDF6(3542, 5502, 10415, 16760, 25644) } }
+        }, .eob_bin_64 = {
+            { { AOM_CDF7(2374, 2772, 4583, 7276, 12288, 19706) },
+              { AOM_CDF7(497, 810, 1315, 3000, 7004, 15641) } },
+            { { AOM_CDF7(15050, 17126, 21410, 24886, 28156, 30726) },
+              { AOM_CDF7(4034, 6290, 10235, 14982, 21214, 28491) } }
+        }, .eob_bin_128 = {
+            { { AOM_CDF8(1366, 1738, 2527, 5016, 9355, 15797, 24643) },
+              { AOM_CDF8(354, 558, 944, 2760, 7287, 14037, 21779) } },
+            { { AOM_CDF8(13627, 16246, 20173, 24429, 27948, 30415, 31863) },
+              { AOM_CDF8(6275, 9889, 14769, 23164, 27988, 30493, 32272) } }
+        }, .eob_bin_256 = {
+            { { AOM_CDF9(3089, 3920, 6038, 9460, 14266, 19881, 25766, 29176) },
+              { AOM_CDF9(1084, 2358, 3488, 5122, 11483, 18103, 26023, 29799) } },
+            { { AOM_CDF9(11514, 13794, 17480, 20754, 24361, 27378, 29492, 31277) },
+              { AOM_CDF9(6571, 9610, 15516, 21826, 29092, 30829, 31842, 32708) } }
+        }, .eob_bin_512 = {
+            { { AOM_CDF10(2624, 3936, 6480, 9686, 13979, 17726, 23267,
+                          28410, 31078) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } },
+            { { AOM_CDF10(12015, 14769, 19588, 22052, 24222, 25812,
+                          27300, 29219, 32114) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } }
+        }, .eob_bin_1024 = {
+            { { AOM_CDF11(2784, 3831, 7041, 10521, 14847, 18844, 23155,
+                          26682, 29229, 31045) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } },
+            { { AOM_CDF11(9577, 12466, 17739, 20750, 22061, 23215, 24601,
+                          25483, 25843, 32056) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } }
+        }, .eob_hi_bit = {
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(18983) }, { AOM_CDF2(20512) }, { AOM_CDF2(14885) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20090) }, { AOM_CDF2(19444) }, { AOM_CDF2(17286) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(19139) }, { AOM_CDF2(21487) }, { AOM_CDF2(18959) },
+                { AOM_CDF2(20910) }, { AOM_CDF2(19089) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20536) }, { AOM_CDF2(20664) }, { AOM_CDF2(20625) },
+                { AOM_CDF2(19123) }, { AOM_CDF2(14862) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(19833) }, { AOM_CDF2(21502) }, { AOM_CDF2(17485) },
+                { AOM_CDF2(20267) }, { AOM_CDF2(18353) }, { AOM_CDF2(23329) },
+                { AOM_CDF2(21478) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(22041) }, { AOM_CDF2(23434) }, { AOM_CDF2(20001) },
+                { AOM_CDF2(20554) }, { AOM_CDF2(20951) }, { AOM_CDF2(20145) },
+                { AOM_CDF2(15562) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(23312) }, { AOM_CDF2(21607) }, { AOM_CDF2(16526) },
+                { AOM_CDF2(18957) }, { AOM_CDF2(18034) }, { AOM_CDF2(18934) },
+                { AOM_CDF2(24247) }, { AOM_CDF2(16921) }, { AOM_CDF2(17080) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(26579) }, { AOM_CDF2(24910) }, { AOM_CDF2(18637) },
+                { AOM_CDF2(19800) }, { AOM_CDF2(20388) }, { AOM_CDF2(9887) },
+                { AOM_CDF2(15642) }, { AOM_CDF2(30198) }, { AOM_CDF2(24721) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(26998) }, { AOM_CDF2(16737) }, { AOM_CDF2(17838) },
+                { AOM_CDF2(18922) }, { AOM_CDF2(19515) }, { AOM_CDF2(18636) },
+                { AOM_CDF2(17333) }, { AOM_CDF2(15776) }, { AOM_CDF2(22658) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } }
+        }, .eob_base_tok = {
+            { { { AOM_CDF3(20092, 30774) },
+                { AOM_CDF3(30695, 32020) },
+                { AOM_CDF3(31131, 32103) },
+                { AOM_CDF3(28666, 30870) } },
+              { { AOM_CDF3(27258, 31095) },
+                { AOM_CDF3(31804, 32623) },
+                { AOM_CDF3(31763, 32528) },
+                { AOM_CDF3(31438, 32506) } } },
+            { { { AOM_CDF3(18049, 30489) },
+                { AOM_CDF3(31706, 32286) },
+                { AOM_CDF3(32163, 32473) },
+                { AOM_CDF3(31550, 32184) } },
+              { { AOM_CDF3(27116, 30842) },
+                { AOM_CDF3(31971, 32598) },
+                { AOM_CDF3(32088, 32576) },
+                { AOM_CDF3(32067, 32664) } } },
+            { { { AOM_CDF3(12854, 29093) },
+                { AOM_CDF3(32272, 32558) },
+                { AOM_CDF3(32667, 32729) },
+                { AOM_CDF3(32306, 32585) } },
+              { { AOM_CDF3(25476, 30366) },
+                { AOM_CDF3(32169, 32687) },
+                { AOM_CDF3(32479, 32689) },
+                { AOM_CDF3(31673, 32634) } } },
+            { { { AOM_CDF3(2809, 19301) },
+                { AOM_CDF3(32205, 32622) },
+                { AOM_CDF3(32338, 32730) },
+                { AOM_CDF3(31786, 32616) } },
+              { { AOM_CDF3(22737, 29105) },
+                { AOM_CDF3(30810, 32362) },
+                { AOM_CDF3(30014, 32627) },
+                { AOM_CDF3(30528, 32574) } } },
+            { { { AOM_CDF3(935, 3382) },
+                { AOM_CDF3(30789, 31909) },
+                { AOM_CDF3(32466, 32756) },
+                { AOM_CDF3(30860, 32513) } },
+              { { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) } } }
+        }, .base_tok = {
+            { { { AOM_CDF4(8896, 16227, 20630) },
+                { AOM_CDF4(23629, 31782, 32527) },
+                { AOM_CDF4(15173, 27755, 31321) },
+                { AOM_CDF4(10158, 21233, 27382) },
+                { AOM_CDF4(6420, 14857, 21558) },
+                { AOM_CDF4(3269, 8155, 12646) },
+                { AOM_CDF4(24835, 32009, 32496) },
+                { AOM_CDF4(16509, 28421, 31579) },
+                { AOM_CDF4(10957, 21514, 27418) },
+                { AOM_CDF4(7881, 15930, 22096) },
+                { AOM_CDF4(5388, 10960, 15918) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(20745, 30773, 32093) },
+                { AOM_CDF4(15200, 27221, 30861) },
+                { AOM_CDF4(13032, 20873, 25667) },
+                { AOM_CDF4(12285, 18663, 23494) },
+                { AOM_CDF4(11563, 17481, 21489) },
+                { AOM_CDF4(26260, 31982, 32320) },
+                { AOM_CDF4(15397, 28083, 31100) },
+                { AOM_CDF4(9742, 19217, 24824) },
+                { AOM_CDF4(3261, 9629, 15362) },
+                { AOM_CDF4(1480, 4322, 7499) },
+                { AOM_CDF4(27599, 32256, 32460) },
+                { AOM_CDF4(16857, 27659, 30774) },
+                { AOM_CDF4(9551, 18290, 23748) },
+                { AOM_CDF4(3052, 8933, 14103) },
+                { AOM_CDF4(2021, 5910, 9787) },
+                { AOM_CDF4(29005, 32015, 32392) },
+                { AOM_CDF4(17677, 27694, 30863) },
+                { AOM_CDF4(9204, 17356, 23219) },
+                { AOM_CDF4(2403, 7516, 12814) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(10808, 22056, 26896) },
+                { AOM_CDF4(25739, 32313, 32676) },
+                { AOM_CDF4(17288, 30203, 32221) },
+                { AOM_CDF4(11359, 24878, 29896) },
+                { AOM_CDF4(6949, 17767, 24893) },
+                { AOM_CDF4(4287, 11796, 18071) },
+                { AOM_CDF4(27880, 32521, 32705) },
+                { AOM_CDF4(19038, 31004, 32414) },
+                { AOM_CDF4(12564, 26345, 30768) },
+                { AOM_CDF4(8269, 19947, 26779) },
+                { AOM_CDF4(5674, 14657, 21674) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(25742, 32319, 32671) },
+                { AOM_CDF4(19557, 31164, 32454) },
+                { AOM_CDF4(13381, 26381, 30755) },
+                { AOM_CDF4(10101, 21466, 26722) },
+                { AOM_CDF4(9209, 19650, 26825) },
+                { AOM_CDF4(27107, 31917, 32432) },
+                { AOM_CDF4(18056, 28893, 31203) },
+                { AOM_CDF4(10200, 21434, 26764) },
+                { AOM_CDF4(4660, 12913, 19502) },
+                { AOM_CDF4(2368, 6930, 12504) },
+                { AOM_CDF4(26960, 32158, 32613) },
+                { AOM_CDF4(18628, 30005, 32031) },
+                { AOM_CDF4(10233, 22442, 28232) },
+                { AOM_CDF4(5471, 14630, 21516) },
+                { AOM_CDF4(3235, 10767, 17109) },
+                { AOM_CDF4(27696, 32440, 32692) },
+                { AOM_CDF4(20032, 31167, 32438) },
+                { AOM_CDF4(8700, 21341, 28442) },
+                { AOM_CDF4(5662, 14831, 21795) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(9704, 17294, 21132) },
+                { AOM_CDF4(26762, 32278, 32633) },
+                { AOM_CDF4(18382, 29620, 31819) },
+                { AOM_CDF4(10891, 23475, 28723) },
+                { AOM_CDF4(6358, 16583, 23309) },
+                { AOM_CDF4(3248, 9118, 14141) },
+                { AOM_CDF4(27204, 32573, 32699) },
+                { AOM_CDF4(19818, 30824, 32329) },
+                { AOM_CDF4(11772, 25120, 30041) },
+                { AOM_CDF4(6995, 18033, 25039) },
+                { AOM_CDF4(3752, 10442, 16098) },
+                { AOM_CDF4(27222, 32256, 32559) },
+                { AOM_CDF4(15356, 28399, 31475) },
+                { AOM_CDF4(8821, 20635, 27057) },
+                { AOM_CDF4(5511, 14404, 21239) },
+                { AOM_CDF4(2935, 8222, 13051) },
+                { AOM_CDF4(24875, 32120, 32529) },
+                { AOM_CDF4(15233, 28265, 31445) },
+                { AOM_CDF4(8605, 20570, 26932) },
+                { AOM_CDF4(5431, 14413, 21196) },
+                { AOM_CDF4(2994, 8341, 13223) },
+                { AOM_CDF4(28201, 32604, 32700) },
+                { AOM_CDF4(21041, 31446, 32456) },
+                { AOM_CDF4(13221, 26213, 30475) },
+                { AOM_CDF4(8255, 19385, 26037) },
+                { AOM_CDF4(4930, 12585, 18830) },
+                { AOM_CDF4(28768, 32448, 32627) },
+                { AOM_CDF4(19705, 30561, 32021) },
+                { AOM_CDF4(11572, 23589, 28220) },
+                { AOM_CDF4(5532, 15034, 21446) },
+                { AOM_CDF4(2460, 7150, 11456) },
+                { AOM_CDF4(29874, 32619, 32699) },
+                { AOM_CDF4(21621, 31071, 32201) },
+                { AOM_CDF4(12511, 24747, 28992) },
+                { AOM_CDF4(6281, 16395, 22748) },
+                { AOM_CDF4(3246, 9278, 14497) },
+                { AOM_CDF4(29715, 32625, 32712) },
+                { AOM_CDF4(20958, 31011, 32283) },
+                { AOM_CDF4(11233, 23671, 28806) },
+                { AOM_CDF4(6012, 16128, 22868) },
+                { AOM_CDF4(3427, 9851, 15414) } },
+              { { AOM_CDF4(11016, 22111, 26794) },
+                { AOM_CDF4(25946, 32357, 32677) },
+                { AOM_CDF4(17890, 30452, 32252) },
+                { AOM_CDF4(11678, 25142, 29816) },
+                { AOM_CDF4(6720, 17534, 24584) },
+                { AOM_CDF4(4230, 11665, 17820) },
+                { AOM_CDF4(28400, 32623, 32747) },
+                { AOM_CDF4(21164, 31668, 32575) },
+                { AOM_CDF4(13572, 27388, 31182) },
+                { AOM_CDF4(8234, 20750, 27358) },
+                { AOM_CDF4(5065, 14055, 20897) },
+                { AOM_CDF4(28981, 32547, 32705) },
+                { AOM_CDF4(18681, 30543, 32239) },
+                { AOM_CDF4(10919, 24075, 29286) },
+                { AOM_CDF4(6431, 17199, 24077) },
+                { AOM_CDF4(3819, 10464, 16618) },
+                { AOM_CDF4(26870, 32467, 32693) },
+                { AOM_CDF4(19041, 30831, 32347) },
+                { AOM_CDF4(11794, 25211, 30016) },
+                { AOM_CDF4(6888, 18019, 24970) },
+                { AOM_CDF4(4370, 12363, 18992) },
+                { AOM_CDF4(29578, 32670, 32744) },
+                { AOM_CDF4(23159, 32007, 32613) },
+                { AOM_CDF4(15315, 28669, 31676) },
+                { AOM_CDF4(9298, 22607, 28782) },
+                { AOM_CDF4(6144, 15913, 22968) },
+                { AOM_CDF4(28110, 32499, 32669) },
+                { AOM_CDF4(21574, 30937, 32015) },
+                { AOM_CDF4(12759, 24818, 28727) },
+                { AOM_CDF4(6545, 16761, 23042) },
+                { AOM_CDF4(3649, 10597, 16833) },
+                { AOM_CDF4(28163, 32552, 32728) },
+                { AOM_CDF4(22101, 31469, 32464) },
+                { AOM_CDF4(13160, 25472, 30143) },
+                { AOM_CDF4(7303, 18684, 25468) },
+                { AOM_CDF4(5241, 13975, 20955) },
+                { AOM_CDF4(28400, 32631, 32744) },
+                { AOM_CDF4(22104, 31793, 32603) },
+                { AOM_CDF4(13557, 26571, 30846) },
+                { AOM_CDF4(7749, 19861, 26675) },
+                { AOM_CDF4(4873, 14030, 21234) } } },
+            { { { AOM_CDF4(9800, 17635, 21073) },
+                { AOM_CDF4(26153, 31885, 32527) },
+                { AOM_CDF4(15038, 27852, 31006) },
+                { AOM_CDF4(8718, 20564, 26486) },
+                { AOM_CDF4(5128, 14076, 20514) },
+                { AOM_CDF4(2636, 7566, 11925) },
+                { AOM_CDF4(27551, 32504, 32701) },
+                { AOM_CDF4(18310, 30054, 32100) },
+                { AOM_CDF4(10211, 23420, 29082) },
+                { AOM_CDF4(6222, 16876, 23916) },
+                { AOM_CDF4(3462, 9954, 15498) },
+                { AOM_CDF4(29991, 32633, 32721) },
+                { AOM_CDF4(19883, 30751, 32201) },
+                { AOM_CDF4(11141, 24184, 29285) },
+                { AOM_CDF4(6420, 16940, 23774) },
+                { AOM_CDF4(3392, 9753, 15118) },
+                { AOM_CDF4(28465, 32616, 32712) },
+                { AOM_CDF4(19850, 30702, 32244) },
+                { AOM_CDF4(10983, 24024, 29223) },
+                { AOM_CDF4(6294, 16770, 23582) },
+                { AOM_CDF4(3244, 9283, 14509) },
+                { AOM_CDF4(30023, 32717, 32748) },
+                { AOM_CDF4(22940, 32032, 32626) },
+                { AOM_CDF4(14282, 27928, 31473) },
+                { AOM_CDF4(8562, 21327, 27914) },
+                { AOM_CDF4(4846, 13393, 19919) },
+                { AOM_CDF4(29981, 32590, 32695) },
+                { AOM_CDF4(20465, 30963, 32166) },
+                { AOM_CDF4(11479, 23579, 28195) },
+                { AOM_CDF4(5916, 15648, 22073) },
+                { AOM_CDF4(3031, 8605, 13398) },
+                { AOM_CDF4(31146, 32691, 32739) },
+                { AOM_CDF4(23106, 31724, 32444) },
+                { AOM_CDF4(13783, 26738, 30439) },
+                { AOM_CDF4(7852, 19468, 25807) },
+                { AOM_CDF4(3860, 11124, 16853) },
+                { AOM_CDF4(31014, 32724, 32748) },
+                { AOM_CDF4(23629, 32109, 32628) },
+                { AOM_CDF4(14747, 28115, 31403) },
+                { AOM_CDF4(8545, 21242, 27478) },
+                { AOM_CDF4(4574, 12781, 19067) } },
+              { { AOM_CDF4(9185, 19694, 24688) },
+                { AOM_CDF4(26081, 31985, 32621) },
+                { AOM_CDF4(16015, 29000, 31787) },
+                { AOM_CDF4(10542, 23690, 29206) },
+                { AOM_CDF4(6732, 17945, 24677) },
+                { AOM_CDF4(3916, 11039, 16722) },
+                { AOM_CDF4(28224, 32566, 32744) },
+                { AOM_CDF4(19100, 31138, 32485) },
+                { AOM_CDF4(12528, 26620, 30879) },
+                { AOM_CDF4(7741, 20277, 26885) },
+                { AOM_CDF4(4566, 12845, 18990) },
+                { AOM_CDF4(29933, 32593, 32718) },
+                { AOM_CDF4(17670, 30333, 32155) },
+                { AOM_CDF4(10385, 23600, 28909) },
+                { AOM_CDF4(6243, 16236, 22407) },
+                { AOM_CDF4(3976, 10389, 16017) },
+                { AOM_CDF4(28377, 32561, 32738) },
+                { AOM_CDF4(19366, 31175, 32482) },
+                { AOM_CDF4(13327, 27175, 31094) },
+                { AOM_CDF4(8258, 20769, 27143) },
+                { AOM_CDF4(4703, 13198, 19527) },
+                { AOM_CDF4(31086, 32706, 32748) },
+                { AOM_CDF4(22853, 31902, 32583) },
+                { AOM_CDF4(14759, 28186, 31419) },
+                { AOM_CDF4(9284, 22382, 28348) },
+                { AOM_CDF4(5585, 15192, 21868) },
+                { AOM_CDF4(28291, 32652, 32746) },
+                { AOM_CDF4(19849, 32107, 32571) },
+                { AOM_CDF4(14834, 26818, 29214) },
+                { AOM_CDF4(10306, 22594, 28672) },
+                { AOM_CDF4(6615, 17384, 23384) },
+                { AOM_CDF4(28947, 32604, 32745) },
+                { AOM_CDF4(25625, 32289, 32646) },
+                { AOM_CDF4(18758, 28672, 31403) },
+                { AOM_CDF4(10017, 23430, 28523) },
+                { AOM_CDF4(6862, 15269, 22131) },
+                { AOM_CDF4(23933, 32509, 32739) },
+                { AOM_CDF4(19927, 31495, 32631) },
+                { AOM_CDF4(11903, 26023, 30621) },
+                { AOM_CDF4(7026, 20094, 27252) },
+                { AOM_CDF4(5998, 18106, 24437) } } },
+            { { { AOM_CDF4(4456, 11274, 15533) },
+                { AOM_CDF4(21219, 29079, 31616) },
+                { AOM_CDF4(11173, 23774, 28567) },
+                { AOM_CDF4(7282, 18293, 24263) },
+                { AOM_CDF4(4890, 13286, 19115) },
+                { AOM_CDF4(1890, 5508, 8659) },
+                { AOM_CDF4(26651, 32136, 32647) },
+                { AOM_CDF4(14630, 28254, 31455) },
+                { AOM_CDF4(8716, 21287, 27395) },
+                { AOM_CDF4(5615, 15331, 22008) },
+                { AOM_CDF4(2675, 7700, 12150) },
+                { AOM_CDF4(29954, 32526, 32690) },
+                { AOM_CDF4(16126, 28982, 31633) },
+                { AOM_CDF4(9030, 21361, 27352) },
+                { AOM_CDF4(5411, 14793, 21271) },
+                { AOM_CDF4(2943, 8422, 13163) },
+                { AOM_CDF4(29539, 32601, 32730) },
+                { AOM_CDF4(18125, 30385, 32201) },
+                { AOM_CDF4(10422, 24090, 29468) },
+                { AOM_CDF4(6468, 17487, 24438) },
+                { AOM_CDF4(2970, 8653, 13531) },
+                { AOM_CDF4(30912, 32715, 32748) },
+                { AOM_CDF4(20666, 31373, 32497) },
+                { AOM_CDF4(12509, 26640, 30917) },
+                { AOM_CDF4(8058, 20629, 27290) },
+                { AOM_CDF4(4231, 12006, 18052) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(10202, 20633, 25484) },
+                { AOM_CDF4(27336, 31445, 32352) },
+                { AOM_CDF4(12420, 24384, 28552) },
+                { AOM_CDF4(7648, 18115, 23856) },
+                { AOM_CDF4(5662, 14341, 19902) },
+                { AOM_CDF4(3611, 10328, 15390) },
+                { AOM_CDF4(30945, 32616, 32736) },
+                { AOM_CDF4(18682, 30505, 32253) },
+                { AOM_CDF4(11513, 25336, 30203) },
+                { AOM_CDF4(7449, 19452, 26148) },
+                { AOM_CDF4(4482, 13051, 18886) },
+                { AOM_CDF4(32022, 32690, 32747) },
+                { AOM_CDF4(18578, 30501, 32146) },
+                { AOM_CDF4(11249, 23368, 28631) },
+                { AOM_CDF4(5645, 16958, 22158) },
+                { AOM_CDF4(5009, 11444, 16637) },
+                { AOM_CDF4(31357, 32710, 32748) },
+                { AOM_CDF4(21552, 31494, 32504) },
+                { AOM_CDF4(13891, 27677, 31340) },
+                { AOM_CDF4(9051, 22098, 28172) },
+                { AOM_CDF4(5190, 13377, 19486) },
+                { AOM_CDF4(32364, 32740, 32748) },
+                { AOM_CDF4(24839, 31907, 32551) },
+                { AOM_CDF4(17160, 28779, 31696) },
+                { AOM_CDF4(12452, 24137, 29602) },
+                { AOM_CDF4(6165, 15389, 22477) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(2575, 7281, 11077) },
+                { AOM_CDF4(14002, 20866, 25402) },
+                { AOM_CDF4(6343, 15056, 19658) },
+                { AOM_CDF4(4474, 11858, 17041) },
+                { AOM_CDF4(2865, 8299, 12534) },
+                { AOM_CDF4(1344, 3949, 6391) },
+                { AOM_CDF4(24720, 31239, 32459) },
+                { AOM_CDF4(12585, 25356, 29968) },
+                { AOM_CDF4(7181, 18246, 24444) },
+                { AOM_CDF4(5025, 13667, 19885) },
+                { AOM_CDF4(2521, 7304, 11605) },
+                { AOM_CDF4(29908, 32252, 32584) },
+                { AOM_CDF4(17421, 29156, 31575) },
+                { AOM_CDF4(9889, 22188, 27782) },
+                { AOM_CDF4(5878, 15647, 22123) },
+                { AOM_CDF4(2814, 8665, 13323) },
+                { AOM_CDF4(30183, 32568, 32713) },
+                { AOM_CDF4(18528, 30195, 32049) },
+                { AOM_CDF4(10982, 24606, 29657) },
+                { AOM_CDF4(6957, 18165, 25231) },
+                { AOM_CDF4(3508, 10118, 15468) },
+                { AOM_CDF4(31761, 32736, 32748) },
+                { AOM_CDF4(21041, 31328, 32546) },
+                { AOM_CDF4(12568, 26732, 31166) },
+                { AOM_CDF4(8052, 20720, 27733) },
+                { AOM_CDF4(4336, 12192, 18396) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } }
+        }, .dc_sign = {
+            {
+                { AOM_CDF2(128 * 125) },
+                { AOM_CDF2(128 * 102) },
+                { AOM_CDF2(128 * 147) },
+            },
+            {
+                { AOM_CDF2(128 * 119) },
+                { AOM_CDF2(128 * 101) },
+                { AOM_CDF2(128 * 135) },
+            }
+        }, .br_tok = {
+            { { { AOM_CDF4(16138, 22223, 25509) },
+                { AOM_CDF4(15347, 22430, 26332) },
+                { AOM_CDF4(9614, 16736, 21332) },
+                { AOM_CDF4(6600, 12275, 16907) },
+                { AOM_CDF4(4811, 9424, 13547) },
+                { AOM_CDF4(3748, 7809, 11420) },
+                { AOM_CDF4(2254, 4587, 6890) },
+                { AOM_CDF4(15196, 20284, 23177) },
+                { AOM_CDF4(18317, 25469, 28451) },
+                { AOM_CDF4(13918, 21651, 25842) },
+                { AOM_CDF4(10052, 17150, 21995) },
+                { AOM_CDF4(7499, 13630, 18587) },
+                { AOM_CDF4(6158, 11417, 16003) },
+                { AOM_CDF4(4014, 7785, 11252) },
+                { AOM_CDF4(15048, 21067, 24384) },
+                { AOM_CDF4(18202, 25346, 28553) },
+                { AOM_CDF4(14302, 22019, 26356) },
+                { AOM_CDF4(10839, 18139, 23166) },
+                { AOM_CDF4(8715, 15744, 20806) },
+                { AOM_CDF4(7536, 13576, 18544) },
+                { AOM_CDF4(5413, 10335, 14498) } },
+              { { AOM_CDF4(17394, 24501, 27895) },
+                { AOM_CDF4(15889, 23420, 27185) },
+                { AOM_CDF4(11561, 19133, 23870) },
+                { AOM_CDF4(8285, 14812, 19844) },
+                { AOM_CDF4(6496, 12043, 16550) },
+                { AOM_CDF4(4771, 9574, 13677) },
+                { AOM_CDF4(3603, 6830, 10144) },
+                { AOM_CDF4(21656, 27704, 30200) },
+                { AOM_CDF4(21324, 27915, 30511) },
+                { AOM_CDF4(17327, 25336, 28997) },
+                { AOM_CDF4(13417, 21381, 26033) },
+                { AOM_CDF4(10132, 17425, 22338) },
+                { AOM_CDF4(8580, 15016, 19633) },
+                { AOM_CDF4(5694, 11477, 16411) },
+                { AOM_CDF4(24116, 29780, 31450) },
+                { AOM_CDF4(23853, 29695, 31591) },
+                { AOM_CDF4(20085, 27614, 30428) },
+                { AOM_CDF4(15326, 24335, 28575) },
+                { AOM_CDF4(11814, 19472, 24810) },
+                { AOM_CDF4(10221, 18611, 24767) },
+                { AOM_CDF4(7689, 14558, 20321) } } },
+            { { { AOM_CDF4(16214, 22380, 25770) },
+                { AOM_CDF4(14213, 21304, 25295) },
+                { AOM_CDF4(9213, 15823, 20455) },
+                { AOM_CDF4(6395, 11758, 16139) },
+                { AOM_CDF4(4779, 9187, 13066) },
+                { AOM_CDF4(3821, 7501, 10953) },
+                { AOM_CDF4(2293, 4567, 6795) },
+                { AOM_CDF4(15859, 21283, 23820) },
+                { AOM_CDF4(18404, 25602, 28726) },
+                { AOM_CDF4(14325, 21980, 26206) },
+                { AOM_CDF4(10669, 17937, 22720) },
+                { AOM_CDF4(8297, 14642, 19447) },
+                { AOM_CDF4(6746, 12389, 16893) },
+                { AOM_CDF4(4324, 8251, 11770) },
+                { AOM_CDF4(16532, 21631, 24475) },
+                { AOM_CDF4(20667, 27150, 29668) },
+                { AOM_CDF4(16728, 24510, 28175) },
+                { AOM_CDF4(12861, 20645, 25332) },
+                { AOM_CDF4(10076, 17361, 22417) },
+                { AOM_CDF4(8395, 14940, 19963) },
+                { AOM_CDF4(5731, 10683, 14912) } },
+              { { AOM_CDF4(14433, 21155, 24938) },
+                { AOM_CDF4(14658, 21716, 25545) },
+                { AOM_CDF4(9923, 16824, 21557) },
+                { AOM_CDF4(6982, 13052, 17721) },
+                { AOM_CDF4(5419, 10503, 15050) },
+                { AOM_CDF4(4852, 9162, 13014) },
+                { AOM_CDF4(3271, 6395, 9630) },
+                { AOM_CDF4(22210, 27833, 30109) },
+                { AOM_CDF4(20750, 27368, 29821) },
+                { AOM_CDF4(16894, 24828, 28573) },
+                { AOM_CDF4(13247, 21276, 25757) },
+                { AOM_CDF4(10038, 17265, 22563) },
+                { AOM_CDF4(8587, 14947, 20327) },
+                { AOM_CDF4(5645, 11371, 15252) },
+                { AOM_CDF4(22027, 27526, 29714) },
+                { AOM_CDF4(23098, 29146, 31221) },
+                { AOM_CDF4(19886, 27341, 30272) },
+                { AOM_CDF4(15609, 23747, 28046) },
+                { AOM_CDF4(11993, 20065, 24939) },
+                { AOM_CDF4(9637, 18267, 23671) },
+                { AOM_CDF4(7625, 13801, 19144) } } },
+            { { { AOM_CDF4(14438, 20798, 24089) },
+                { AOM_CDF4(12621, 19203, 23097) },
+                { AOM_CDF4(8177, 14125, 18402) },
+                { AOM_CDF4(5674, 10501, 14456) },
+                { AOM_CDF4(4236, 8239, 11733) },
+                { AOM_CDF4(3447, 6750, 9806) },
+                { AOM_CDF4(1986, 3950, 5864) },
+                { AOM_CDF4(16208, 22099, 24930) },
+                { AOM_CDF4(16537, 24025, 27585) },
+                { AOM_CDF4(12780, 20381, 24867) },
+                { AOM_CDF4(9767, 16612, 21416) },
+                { AOM_CDF4(7686, 13738, 18398) },
+                { AOM_CDF4(6333, 11614, 15964) },
+                { AOM_CDF4(3941, 7571, 10836) },
+                { AOM_CDF4(22819, 27422, 29202) },
+                { AOM_CDF4(22224, 28514, 30721) },
+                { AOM_CDF4(17660, 25433, 28913) },
+                { AOM_CDF4(13574, 21482, 26002) },
+                { AOM_CDF4(10629, 17977, 22938) },
+                { AOM_CDF4(8612, 15298, 20265) },
+                { AOM_CDF4(5607, 10491, 14596) } },
+              { { AOM_CDF4(13569, 19800, 23206) },
+                { AOM_CDF4(13128, 19924, 23869) },
+                { AOM_CDF4(8329, 14841, 19403) },
+                { AOM_CDF4(6130, 10976, 15057) },
+                { AOM_CDF4(4682, 8839, 12518) },
+                { AOM_CDF4(3656, 7409, 10588) },
+                { AOM_CDF4(2577, 5099, 7412) },
+                { AOM_CDF4(22427, 28684, 30585) },
+                { AOM_CDF4(20913, 27750, 30139) },
+                { AOM_CDF4(15840, 24109, 27834) },
+                { AOM_CDF4(12308, 20029, 24569) },
+                { AOM_CDF4(10216, 16785, 21458) },
+                { AOM_CDF4(8309, 14203, 19113) },
+                { AOM_CDF4(6043, 11168, 15307) },
+                { AOM_CDF4(23166, 28901, 30998) },
+                { AOM_CDF4(21899, 28405, 30751) },
+                { AOM_CDF4(18413, 26091, 29443) },
+                { AOM_CDF4(15233, 23114, 27352) },
+                { AOM_CDF4(12683, 20472, 25288) },
+                { AOM_CDF4(10702, 18259, 23409) },
+                { AOM_CDF4(8125, 14464, 19226) } } },
+            { { { AOM_CDF4(9040, 14786, 18360) },
+                { AOM_CDF4(9979, 15718, 19415) },
+                { AOM_CDF4(7913, 13918, 18311) },
+                { AOM_CDF4(5859, 10889, 15184) },
+                { AOM_CDF4(4593, 8677, 12510) },
+                { AOM_CDF4(3820, 7396, 10791) },
+                { AOM_CDF4(1730, 3471, 5192) },
+                { AOM_CDF4(11803, 18365, 22709) },
+                { AOM_CDF4(11419, 18058, 22225) },
+                { AOM_CDF4(9418, 15774, 20243) },
+                { AOM_CDF4(7539, 13325, 17657) },
+                { AOM_CDF4(6233, 11317, 15384) },
+                { AOM_CDF4(5137, 9656, 13545) },
+                { AOM_CDF4(2977, 5774, 8349) },
+                { AOM_CDF4(21207, 27246, 29640) },
+                { AOM_CDF4(19547, 26578, 29497) },
+                { AOM_CDF4(16169, 23871, 27690) },
+                { AOM_CDF4(12820, 20458, 25018) },
+                { AOM_CDF4(10224, 17332, 22214) },
+                { AOM_CDF4(8526, 15048, 19884) },
+                { AOM_CDF4(5037, 9410, 13118) } },
+              { { AOM_CDF4(12339, 17329, 20140) },
+                { AOM_CDF4(13505, 19895, 23225) },
+                { AOM_CDF4(9847, 16944, 21564) },
+                { AOM_CDF4(7280, 13256, 18348) },
+                { AOM_CDF4(4712, 10009, 14454) },
+                { AOM_CDF4(4361, 7914, 12477) },
+                { AOM_CDF4(2870, 5628, 7995) },
+                { AOM_CDF4(20061, 25504, 28526) },
+                { AOM_CDF4(15235, 22878, 26145) },
+                { AOM_CDF4(12985, 19958, 24155) },
+                { AOM_CDF4(9782, 16641, 21403) },
+                { AOM_CDF4(9456, 16360, 20760) },
+                { AOM_CDF4(6855, 12940, 18557) },
+                { AOM_CDF4(5661, 10564, 15002) },
+                { AOM_CDF4(25656, 30602, 31894) },
+                { AOM_CDF4(22570, 29107, 31092) },
+                { AOM_CDF4(18917, 26423, 29541) },
+                { AOM_CDF4(15940, 23649, 27754) },
+                { AOM_CDF4(12803, 20581, 25219) },
+                { AOM_CDF4(11082, 18695, 23376) },
+                { AOM_CDF4(7939, 14373, 19005) } } }
+        },
+    }, [3] = {
+        .skip = {
+            { { AOM_CDF2(26887) },
+                { AOM_CDF2(6729) },
+                { AOM_CDF2(10361) },
+                { AOM_CDF2(17442) },
+                { AOM_CDF2(15045) },
+                { AOM_CDF2(22478) },
+                { AOM_CDF2(29072) },
+                { AOM_CDF2(2713) },
+                { AOM_CDF2(11861) },
+                { AOM_CDF2(20773) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(31903) },
+                { AOM_CDF2(2044) },
+                { AOM_CDF2(7528) },
+                { AOM_CDF2(14618) },
+                { AOM_CDF2(16182) },
+                { AOM_CDF2(24168) },
+                { AOM_CDF2(31037) },
+                { AOM_CDF2(2786) },
+                { AOM_CDF2(11194) },
+                { AOM_CDF2(20155) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(32510) },
+                { AOM_CDF2(8430) },
+                { AOM_CDF2(17318) },
+                { AOM_CDF2(24154) },
+                { AOM_CDF2(23674) },
+                { AOM_CDF2(28789) },
+                { AOM_CDF2(32139) },
+                { AOM_CDF2(3440) },
+                { AOM_CDF2(13117) },
+                { AOM_CDF2(22702) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } },
+            { { AOM_CDF2(31671) },
+                { AOM_CDF2(2056) },
+                { AOM_CDF2(11746) },
+                { AOM_CDF2(16852) },
+                { AOM_CDF2(18635) },
+                { AOM_CDF2(24715) },
+                { AOM_CDF2(31484) },
+                { AOM_CDF2(4656) },
+                { AOM_CDF2(16074) },
+                { AOM_CDF2(24704) },
+                { AOM_CDF2(1806) },
+                { AOM_CDF2(14645) },
+                { AOM_CDF2(25336) } },
+            { { AOM_CDF2(31539) },
+                { AOM_CDF2(8433) },
+                { AOM_CDF2(20576) },
+                { AOM_CDF2(27904) },
+                { AOM_CDF2(27852) },
+                { AOM_CDF2(30026) },
+                { AOM_CDF2(32441) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) } }
+        }, .eob_bin_16 = {
+            { { AOM_CDF5(6708, 8958, 14746, 22133) },
+              { AOM_CDF5(1222, 2074, 4783, 15410) } },
+            { { AOM_CDF5(19575, 21766, 26044, 29709) },
+              { AOM_CDF5(7297, 10767, 19273, 28194) } }
+        }, .eob_bin_32 = {
+            { { AOM_CDF6(4617, 5709, 8446, 13584, 23135) },
+              { AOM_CDF6(1156, 1702, 3675, 9274, 20539) } },
+            { { AOM_CDF6(22086, 24282, 27010, 29770, 31743) },
+              { AOM_CDF6(7699, 10897, 20891, 26926, 31628) } }
+        }, .eob_bin_64 = {
+            { { AOM_CDF7(6307, 7541, 12060, 16358, 22553, 27865) },
+              { AOM_CDF7(1289, 2320, 3971, 7926, 14153, 24291) } },
+            { { AOM_CDF7(24212, 25708, 28268, 30035, 31307, 32049) },
+              { AOM_CDF7(8726, 12378, 19409, 26450, 30038, 32462) } }
+        }, .eob_bin_128 = {
+            { { AOM_CDF8(3472, 4885, 7489, 12481, 18517, 24536, 29635) },
+              { AOM_CDF8(886, 1731, 3271, 8469, 15569, 22126, 28383) } },
+            { { AOM_CDF8(24313, 26062, 28385, 30107, 31217, 31898, 32345) },
+              { AOM_CDF8(9165, 13282, 21150, 30286, 31894, 32571, 32712) } }
+        }, .eob_bin_256 = {
+            { { AOM_CDF9(5348, 7113, 11820, 15924, 22106, 26777, 30334, 31757) },
+              { AOM_CDF9(2453, 4474, 6307, 8777, 16474, 22975, 29000, 31547) } },
+            { { AOM_CDF9(23110, 24597, 27140, 28894, 30167, 30927, 31392, 32094) },
+              { AOM_CDF9(9998, 17661, 25178, 28097, 31308, 32038, 32403, 32695) } }
+        }, .eob_bin_512 = {
+            { { AOM_CDF10(5927, 7809, 10923, 14597, 19439, 24135, 28456,
+                          31142, 32060) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } },
+            { { AOM_CDF10(21093, 23043, 25742, 27658, 29097, 29716,
+                          30073, 30820, 31956) },
+              { AOM_CDF10(3277, 6554, 9830, 13107, 16384, 19661, 22938,
+                          26214, 29491) } }
+        }, .eob_bin_1024 = {
+            { { AOM_CDF11(6698, 8334, 11961, 15762, 20186, 23862, 27434,
+                          29326, 31082, 32050) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } },
+            { { AOM_CDF11(20569, 22426, 25569, 26859, 28053, 28913,
+                          29486, 29724, 29807, 32570) },
+              { AOM_CDF11(2979, 5958, 8937, 11916, 14895, 17873, 20852,
+                          23831, 26810, 29789) } }
+        }, .eob_hi_bit = {
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20177) }, { AOM_CDF2(20789) }, { AOM_CDF2(20262) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(21416) }, { AOM_CDF2(20855) }, { AOM_CDF2(23410) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20238) }, { AOM_CDF2(21057) }, { AOM_CDF2(19159) },
+                { AOM_CDF2(22337) }, { AOM_CDF2(20159) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(20125) }, { AOM_CDF2(20559) }, { AOM_CDF2(21707) },
+                { AOM_CDF2(22296) }, { AOM_CDF2(17333) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(19941) }, { AOM_CDF2(20527) }, { AOM_CDF2(21470) },
+                { AOM_CDF2(22487) }, { AOM_CDF2(19558) }, { AOM_CDF2(22354) },
+                { AOM_CDF2(20331) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(22752) }, { AOM_CDF2(25006) }, { AOM_CDF2(22075) },
+                { AOM_CDF2(21576) }, { AOM_CDF2(17740) }, { AOM_CDF2(21690) },
+                { AOM_CDF2(19211) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(21442) }, { AOM_CDF2(22358) }, { AOM_CDF2(18503) },
+                { AOM_CDF2(20291) }, { AOM_CDF2(19945) }, { AOM_CDF2(21294) },
+                { AOM_CDF2(21178) }, { AOM_CDF2(19400) }, { AOM_CDF2(10556) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(24648) }, { AOM_CDF2(24949) }, { AOM_CDF2(20708) },
+                { AOM_CDF2(23905) }, { AOM_CDF2(20501) }, { AOM_CDF2(9558) },
+                { AOM_CDF2(9423) },  { AOM_CDF2(30365) }, { AOM_CDF2(19253) } } },
+            { { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(26064) }, { AOM_CDF2(22098) }, { AOM_CDF2(19613) },
+                { AOM_CDF2(20525) }, { AOM_CDF2(17595) }, { AOM_CDF2(16618) },
+                { AOM_CDF2(20497) }, { AOM_CDF2(18989) }, { AOM_CDF2(15513) } },
+              { { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) },
+                { AOM_CDF2(16384) }, { AOM_CDF2(16384) }, { AOM_CDF2(16384) } } }
+        }, .eob_base_tok = {
+            { { { AOM_CDF3(22497, 31198) },
+                { AOM_CDF3(31715, 32495) },
+                { AOM_CDF3(31606, 32337) },
+                { AOM_CDF3(30388, 31990) } },
+              { { AOM_CDF3(27877, 31584) },
+                { AOM_CDF3(32170, 32728) },
+                { AOM_CDF3(32155, 32688) },
+                { AOM_CDF3(32219, 32702) } } },
+            { { { AOM_CDF3(21457, 31043) },
+                { AOM_CDF3(31951, 32483) },
+                { AOM_CDF3(32153, 32562) },
+                { AOM_CDF3(31473, 32215) } },
+              { { AOM_CDF3(27558, 31151) },
+                { AOM_CDF3(32020, 32640) },
+                { AOM_CDF3(32097, 32575) },
+                { AOM_CDF3(32242, 32719) } } },
+            { { { AOM_CDF3(19980, 30591) },
+                { AOM_CDF3(32219, 32597) },
+                { AOM_CDF3(32581, 32706) },
+                { AOM_CDF3(31803, 32287) } },
+              { { AOM_CDF3(26473, 30507) },
+                { AOM_CDF3(32431, 32723) },
+                { AOM_CDF3(32196, 32611) },
+                { AOM_CDF3(31588, 32528) } } },
+            { { { AOM_CDF3(24647, 30463) },
+                { AOM_CDF3(32412, 32695) },
+                { AOM_CDF3(32468, 32720) },
+                { AOM_CDF3(31269, 32523) } },
+              { { AOM_CDF3(28482, 31505) },
+                { AOM_CDF3(32152, 32701) },
+                { AOM_CDF3(31732, 32598) },
+                { AOM_CDF3(31767, 32712) } } },
+            { { { AOM_CDF3(12358, 24977) },
+                { AOM_CDF3(31331, 32385) },
+                { AOM_CDF3(32634, 32756) },
+                { AOM_CDF3(30411, 32548) } },
+              { { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) },
+                { AOM_CDF3(10923, 21845) } } }
+        }, .base_tok = {
+            { { { AOM_CDF4(7062, 16472, 22319) },
+                { AOM_CDF4(24538, 32261, 32674) },
+                { AOM_CDF4(13675, 28041, 31779) },
+                { AOM_CDF4(8590, 20674, 27631) },
+                { AOM_CDF4(5685, 14675, 22013) },
+                { AOM_CDF4(3655, 9898, 15731) },
+                { AOM_CDF4(26493, 32418, 32658) },
+                { AOM_CDF4(16376, 29342, 32090) },
+                { AOM_CDF4(10594, 22649, 28970) },
+                { AOM_CDF4(8176, 17170, 24303) },
+                { AOM_CDF4(5605, 12694, 19139) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(23888, 31902, 32542) },
+                { AOM_CDF4(18612, 29687, 31987) },
+                { AOM_CDF4(16245, 24852, 29249) },
+                { AOM_CDF4(15765, 22608, 27559) },
+                { AOM_CDF4(19895, 24699, 27510) },
+                { AOM_CDF4(28401, 32212, 32457) },
+                { AOM_CDF4(15274, 27825, 30980) },
+                { AOM_CDF4(9364, 18128, 24332) },
+                { AOM_CDF4(2283, 8193, 15082) },
+                { AOM_CDF4(1228, 3972, 7881) },
+                { AOM_CDF4(29455, 32469, 32620) },
+                { AOM_CDF4(17981, 28245, 31388) },
+                { AOM_CDF4(10921, 20098, 26240) },
+                { AOM_CDF4(3743, 11829, 18657) },
+                { AOM_CDF4(2374, 9593, 15715) },
+                { AOM_CDF4(31068, 32466, 32635) },
+                { AOM_CDF4(20321, 29572, 31971) },
+                { AOM_CDF4(10771, 20255, 27119) },
+                { AOM_CDF4(2795, 10410, 17361) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(9320, 22102, 27840) },
+                { AOM_CDF4(27057, 32464, 32724) },
+                { AOM_CDF4(16331, 30268, 32309) },
+                { AOM_CDF4(10319, 23935, 29720) },
+                { AOM_CDF4(6189, 16448, 24106) },
+                { AOM_CDF4(3589, 10884, 18808) },
+                { AOM_CDF4(29026, 32624, 32748) },
+                { AOM_CDF4(19226, 31507, 32587) },
+                { AOM_CDF4(12692, 26921, 31203) },
+                { AOM_CDF4(7049, 19532, 27635) },
+                { AOM_CDF4(7727, 15669, 23252) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(28056, 32625, 32748) },
+                { AOM_CDF4(22383, 32075, 32669) },
+                { AOM_CDF4(15417, 27098, 31749) },
+                { AOM_CDF4(18127, 26493, 27190) },
+                { AOM_CDF4(5461, 16384, 21845) },
+                { AOM_CDF4(27982, 32091, 32584) },
+                { AOM_CDF4(19045, 29868, 31972) },
+                { AOM_CDF4(10397, 22266, 27932) },
+                { AOM_CDF4(5990, 13697, 21500) },
+                { AOM_CDF4(1792, 6912, 15104) },
+                { AOM_CDF4(28198, 32501, 32718) },
+                { AOM_CDF4(21534, 31521, 32569) },
+                { AOM_CDF4(11109, 25217, 30017) },
+                { AOM_CDF4(5671, 15124, 26151) },
+                { AOM_CDF4(4681, 14043, 18725) },
+                { AOM_CDF4(28688, 32580, 32741) },
+                { AOM_CDF4(22576, 32079, 32661) },
+                { AOM_CDF4(10627, 22141, 28340) },
+                { AOM_CDF4(9362, 14043, 28087) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(7754, 16948, 22142) },
+                { AOM_CDF4(25670, 32330, 32691) },
+                { AOM_CDF4(15663, 29225, 31994) },
+                { AOM_CDF4(9878, 23288, 29158) },
+                { AOM_CDF4(6419, 17088, 24336) },
+                { AOM_CDF4(3859, 11003, 17039) },
+                { AOM_CDF4(27562, 32595, 32725) },
+                { AOM_CDF4(17575, 30588, 32399) },
+                { AOM_CDF4(10819, 24838, 30309) },
+                { AOM_CDF4(7124, 18686, 25916) },
+                { AOM_CDF4(4479, 12688, 19340) },
+                { AOM_CDF4(28385, 32476, 32673) },
+                { AOM_CDF4(15306, 29005, 31938) },
+                { AOM_CDF4(8937, 21615, 28322) },
+                { AOM_CDF4(5982, 15603, 22786) },
+                { AOM_CDF4(3620, 10267, 16136) },
+                { AOM_CDF4(27280, 32464, 32667) },
+                { AOM_CDF4(15607, 29160, 32004) },
+                { AOM_CDF4(9091, 22135, 28740) },
+                { AOM_CDF4(6232, 16632, 24020) },
+                { AOM_CDF4(4047, 11377, 17672) },
+                { AOM_CDF4(29220, 32630, 32718) },
+                { AOM_CDF4(19650, 31220, 32462) },
+                { AOM_CDF4(13050, 26312, 30827) },
+                { AOM_CDF4(9228, 20870, 27468) },
+                { AOM_CDF4(6146, 15149, 21971) },
+                { AOM_CDF4(30169, 32481, 32623) },
+                { AOM_CDF4(17212, 29311, 31554) },
+                { AOM_CDF4(9911, 21311, 26882) },
+                { AOM_CDF4(4487, 13314, 20372) },
+                { AOM_CDF4(2570, 7772, 12889) },
+                { AOM_CDF4(30924, 32613, 32708) },
+                { AOM_CDF4(19490, 30206, 32107) },
+                { AOM_CDF4(11232, 23998, 29276) },
+                { AOM_CDF4(6769, 17955, 25035) },
+                { AOM_CDF4(4398, 12623, 19214) },
+                { AOM_CDF4(30609, 32627, 32722) },
+                { AOM_CDF4(19370, 30582, 32287) },
+                { AOM_CDF4(10457, 23619, 29409) },
+                { AOM_CDF4(6443, 17637, 24834) },
+                { AOM_CDF4(4645, 13236, 20106) } },
+              { { AOM_CDF4(8626, 20271, 26216) },
+                { AOM_CDF4(26707, 32406, 32711) },
+                { AOM_CDF4(16999, 30329, 32286) },
+                { AOM_CDF4(11445, 25123, 30286) },
+                { AOM_CDF4(6411, 18828, 25601) },
+                { AOM_CDF4(6801, 12458, 20248) },
+                { AOM_CDF4(29918, 32682, 32748) },
+                { AOM_CDF4(20649, 31739, 32618) },
+                { AOM_CDF4(12879, 27773, 31581) },
+                { AOM_CDF4(7896, 21751, 28244) },
+                { AOM_CDF4(5260, 14870, 23698) },
+                { AOM_CDF4(29252, 32593, 32731) },
+                { AOM_CDF4(17072, 30460, 32294) },
+                { AOM_CDF4(10653, 24143, 29365) },
+                { AOM_CDF4(6536, 17490, 23983) },
+                { AOM_CDF4(4929, 13170, 20085) },
+                { AOM_CDF4(28137, 32518, 32715) },
+                { AOM_CDF4(18171, 30784, 32407) },
+                { AOM_CDF4(11437, 25436, 30459) },
+                { AOM_CDF4(7252, 18534, 26176) },
+                { AOM_CDF4(4126, 13353, 20978) },
+                { AOM_CDF4(31162, 32726, 32748) },
+                { AOM_CDF4(23017, 32222, 32701) },
+                { AOM_CDF4(15629, 29233, 32046) },
+                { AOM_CDF4(9387, 22621, 29480) },
+                { AOM_CDF4(6922, 17616, 25010) },
+                { AOM_CDF4(28838, 32265, 32614) },
+                { AOM_CDF4(19701, 30206, 31920) },
+                { AOM_CDF4(11214, 22410, 27933) },
+                { AOM_CDF4(5320, 14177, 23034) },
+                { AOM_CDF4(5049, 12881, 17827) },
+                { AOM_CDF4(27484, 32471, 32734) },
+                { AOM_CDF4(21076, 31526, 32561) },
+                { AOM_CDF4(12707, 26303, 31211) },
+                { AOM_CDF4(8169, 21722, 28219) },
+                { AOM_CDF4(6045, 19406, 27042) },
+                { AOM_CDF4(27753, 32572, 32745) },
+                { AOM_CDF4(20832, 31878, 32653) },
+                { AOM_CDF4(13250, 27356, 31674) },
+                { AOM_CDF4(7718, 21508, 29858) },
+                { AOM_CDF4(7209, 18350, 25559) } } },
+            { { { AOM_CDF4(7876, 16901, 21741) },
+                { AOM_CDF4(24001, 31898, 32625) },
+                { AOM_CDF4(14529, 27959, 31451) },
+                { AOM_CDF4(8273, 20818, 27258) },
+                { AOM_CDF4(5278, 14673, 21510) },
+                { AOM_CDF4(2983, 8843, 14039) },
+                { AOM_CDF4(28016, 32574, 32732) },
+                { AOM_CDF4(17471, 30306, 32301) },
+                { AOM_CDF4(10224, 24063, 29728) },
+                { AOM_CDF4(6602, 17954, 25052) },
+                { AOM_CDF4(4002, 11585, 17759) },
+                { AOM_CDF4(30190, 32634, 32739) },
+                { AOM_CDF4(17497, 30282, 32270) },
+                { AOM_CDF4(10229, 23729, 29538) },
+                { AOM_CDF4(6344, 17211, 24440) },
+                { AOM_CDF4(3849, 11189, 17108) },
+                { AOM_CDF4(28570, 32583, 32726) },
+                { AOM_CDF4(17521, 30161, 32238) },
+                { AOM_CDF4(10153, 23565, 29378) },
+                { AOM_CDF4(6455, 17341, 24443) },
+                { AOM_CDF4(3907, 11042, 17024) },
+                { AOM_CDF4(30689, 32715, 32748) },
+                { AOM_CDF4(21546, 31840, 32610) },
+                { AOM_CDF4(13547, 27581, 31459) },
+                { AOM_CDF4(8912, 21757, 28309) },
+                { AOM_CDF4(5548, 15080, 22046) },
+                { AOM_CDF4(30783, 32540, 32685) },
+                { AOM_CDF4(17540, 29528, 31668) },
+                { AOM_CDF4(10160, 21468, 26783) },
+                { AOM_CDF4(4724, 13393, 20054) },
+                { AOM_CDF4(2702, 8174, 13102) },
+                { AOM_CDF4(31648, 32686, 32742) },
+                { AOM_CDF4(20954, 31094, 32337) },
+                { AOM_CDF4(12420, 25698, 30179) },
+                { AOM_CDF4(7304, 19320, 26248) },
+                { AOM_CDF4(4366, 12261, 18864) },
+                { AOM_CDF4(31581, 32723, 32748) },
+                { AOM_CDF4(21373, 31586, 32525) },
+                { AOM_CDF4(12744, 26625, 30885) },
+                { AOM_CDF4(7431, 20322, 26950) },
+                { AOM_CDF4(4692, 13323, 20111) } },
+              { { AOM_CDF4(7833, 18369, 24095) },
+                { AOM_CDF4(26650, 32273, 32702) },
+                { AOM_CDF4(16371, 29961, 32191) },
+                { AOM_CDF4(11055, 24082, 29629) },
+                { AOM_CDF4(6892, 18644, 25400) },
+                { AOM_CDF4(5006, 13057, 19240) },
+                { AOM_CDF4(29834, 32666, 32748) },
+                { AOM_CDF4(19577, 31335, 32570) },
+                { AOM_CDF4(12253, 26509, 31122) },
+                { AOM_CDF4(7991, 20772, 27711) },
+                { AOM_CDF4(5677, 15910, 23059) },
+                { AOM_CDF4(30109, 32532, 32720) },
+                { AOM_CDF4(16747, 30166, 32252) },
+                { AOM_CDF4(10134, 23542, 29184) },
+                { AOM_CDF4(5791, 16176, 23556) },
+                { AOM_CDF4(4362, 10414, 17284) },
+                { AOM_CDF4(29492, 32626, 32748) },
+                { AOM_CDF4(19894, 31402, 32525) },
+                { AOM_CDF4(12942, 27071, 30869) },
+                { AOM_CDF4(8346, 21216, 27405) },
+                { AOM_CDF4(6572, 17087, 23859) },
+                { AOM_CDF4(32035, 32735, 32748) },
+                { AOM_CDF4(22957, 31838, 32618) },
+                { AOM_CDF4(14724, 28572, 31772) },
+                { AOM_CDF4(10364, 23999, 29553) },
+                { AOM_CDF4(7004, 18433, 25655) },
+                { AOM_CDF4(27528, 32277, 32681) },
+                { AOM_CDF4(16959, 31171, 32096) },
+                { AOM_CDF4(10486, 23593, 27962) },
+                { AOM_CDF4(8192, 16384, 23211) },
+                { AOM_CDF4(8937, 17873, 20852) },
+                { AOM_CDF4(27715, 32002, 32615) },
+                { AOM_CDF4(15073, 29491, 31676) },
+                { AOM_CDF4(11264, 24576, 28672) },
+                { AOM_CDF4(2341, 18725, 23406) },
+                { AOM_CDF4(7282, 18204, 25486) },
+                { AOM_CDF4(28547, 32213, 32657) },
+                { AOM_CDF4(20788, 29773, 32239) },
+                { AOM_CDF4(6780, 21469, 30508) },
+                { AOM_CDF4(5958, 14895, 23831) },
+                { AOM_CDF4(16384, 21845, 27307) } } },
+            { { { AOM_CDF4(5992, 14304, 19765) },
+                { AOM_CDF4(22612, 31238, 32456) },
+                { AOM_CDF4(13456, 27162, 31087) },
+                { AOM_CDF4(8001, 20062, 26504) },
+                { AOM_CDF4(5168, 14105, 20764) },
+                { AOM_CDF4(2632, 7771, 12385) },
+                { AOM_CDF4(27034, 32344, 32709) },
+                { AOM_CDF4(15850, 29415, 31997) },
+                { AOM_CDF4(9494, 22776, 28841) },
+                { AOM_CDF4(6151, 16830, 23969) },
+                { AOM_CDF4(3461, 10039, 15722) },
+                { AOM_CDF4(30134, 32569, 32731) },
+                { AOM_CDF4(15638, 29422, 31945) },
+                { AOM_CDF4(9150, 21865, 28218) },
+                { AOM_CDF4(5647, 15719, 22676) },
+                { AOM_CDF4(3402, 9772, 15477) },
+                { AOM_CDF4(28530, 32586, 32735) },
+                { AOM_CDF4(17139, 30298, 32292) },
+                { AOM_CDF4(10200, 24039, 29685) },
+                { AOM_CDF4(6419, 17674, 24786) },
+                { AOM_CDF4(3544, 10225, 15824) },
+                { AOM_CDF4(31333, 32726, 32748) },
+                { AOM_CDF4(20618, 31487, 32544) },
+                { AOM_CDF4(12901, 27217, 31232) },
+                { AOM_CDF4(8624, 21734, 28171) },
+                { AOM_CDF4(5104, 14191, 20748) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(11206, 21090, 26561) },
+                { AOM_CDF4(28759, 32279, 32671) },
+                { AOM_CDF4(14171, 27952, 31569) },
+                { AOM_CDF4(9743, 22907, 29141) },
+                { AOM_CDF4(6871, 17886, 24868) },
+                { AOM_CDF4(4960, 13152, 19315) },
+                { AOM_CDF4(31077, 32661, 32748) },
+                { AOM_CDF4(19400, 31195, 32515) },
+                { AOM_CDF4(12752, 26858, 31040) },
+                { AOM_CDF4(8370, 22098, 28591) },
+                { AOM_CDF4(5457, 15373, 22298) },
+                { AOM_CDF4(31697, 32706, 32748) },
+                { AOM_CDF4(17860, 30657, 32333) },
+                { AOM_CDF4(12510, 24812, 29261) },
+                { AOM_CDF4(6180, 19124, 24722) },
+                { AOM_CDF4(5041, 13548, 17959) },
+                { AOM_CDF4(31552, 32716, 32748) },
+                { AOM_CDF4(21908, 31769, 32623) },
+                { AOM_CDF4(14470, 28201, 31565) },
+                { AOM_CDF4(9493, 22982, 28608) },
+                { AOM_CDF4(6858, 17240, 24137) },
+                { AOM_CDF4(32543, 32752, 32756) },
+                { AOM_CDF4(24286, 32097, 32666) },
+                { AOM_CDF4(15958, 29217, 32024) },
+                { AOM_CDF4(10207, 24234, 29958) },
+                { AOM_CDF4(6929, 18305, 25652) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } },
+            { { { AOM_CDF4(4137, 10847, 15682) },
+                { AOM_CDF4(17824, 27001, 30058) },
+                { AOM_CDF4(10204, 22796, 28291) },
+                { AOM_CDF4(6076, 15935, 22125) },
+                { AOM_CDF4(3852, 10937, 16816) },
+                { AOM_CDF4(2252, 6324, 10131) },
+                { AOM_CDF4(25840, 32016, 32662) },
+                { AOM_CDF4(15109, 28268, 31531) },
+                { AOM_CDF4(9385, 22231, 28340) },
+                { AOM_CDF4(6082, 16672, 23479) },
+                { AOM_CDF4(3318, 9427, 14681) },
+                { AOM_CDF4(30594, 32574, 32718) },
+                { AOM_CDF4(16836, 29552, 31859) },
+                { AOM_CDF4(9556, 22542, 28356) },
+                { AOM_CDF4(6305, 16725, 23540) },
+                { AOM_CDF4(3376, 9895, 15184) },
+                { AOM_CDF4(29383, 32617, 32745) },
+                { AOM_CDF4(18891, 30809, 32401) },
+                { AOM_CDF4(11688, 25942, 30687) },
+                { AOM_CDF4(7468, 19469, 26651) },
+                { AOM_CDF4(3909, 11358, 17012) },
+                { AOM_CDF4(31564, 32736, 32748) },
+                { AOM_CDF4(20906, 31611, 32600) },
+                { AOM_CDF4(13191, 27621, 31537) },
+                { AOM_CDF4(8768, 22029, 28676) },
+                { AOM_CDF4(5079, 14109, 20906) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } },
+              { { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) },
+                { AOM_CDF4(8192, 16384, 24576) } } }
+        }, .dc_sign = {
+            {
+                { AOM_CDF2(128 * 125) },
+                { AOM_CDF2(128 * 102) },
+                { AOM_CDF2(128 * 147) },
+            },
+            {
+                { AOM_CDF2(128 * 119) },
+                { AOM_CDF2(128 * 101) },
+                { AOM_CDF2(128 * 135) },
+            }
+        }, .br_tok = {
+            { { { AOM_CDF4(18315, 24289, 27551) },
+                { AOM_CDF4(16854, 24068, 27835) },
+                { AOM_CDF4(10140, 17927, 23173) },
+                { AOM_CDF4(6722, 12982, 18267) },
+                { AOM_CDF4(4661, 9826, 14706) },
+                { AOM_CDF4(3832, 8165, 12294) },
+                { AOM_CDF4(2795, 6098, 9245) },
+                { AOM_CDF4(17145, 23326, 26672) },
+                { AOM_CDF4(20733, 27680, 30308) },
+                { AOM_CDF4(16032, 24461, 28546) },
+                { AOM_CDF4(11653, 20093, 25081) },
+                { AOM_CDF4(9290, 16429, 22086) },
+                { AOM_CDF4(7796, 14598, 19982) },
+                { AOM_CDF4(6502, 12378, 17441) },
+                { AOM_CDF4(21681, 27732, 30320) },
+                { AOM_CDF4(22389, 29044, 31261) },
+                { AOM_CDF4(19027, 26731, 30087) },
+                { AOM_CDF4(14739, 23755, 28624) },
+                { AOM_CDF4(11358, 20778, 25511) },
+                { AOM_CDF4(10995, 18073, 24190) },
+                { AOM_CDF4(9162, 14990, 20617) } },
+              { { AOM_CDF4(21425, 27952, 30388) },
+                { AOM_CDF4(18062, 25838, 29034) },
+                { AOM_CDF4(11956, 19881, 24808) },
+                { AOM_CDF4(7718, 15000, 20980) },
+                { AOM_CDF4(5702, 11254, 16143) },
+                { AOM_CDF4(4898, 9088, 16864) },
+                { AOM_CDF4(3679, 6776, 11907) },
+                { AOM_CDF4(23294, 30160, 31663) },
+                { AOM_CDF4(24397, 29896, 31836) },
+                { AOM_CDF4(19245, 27128, 30593) },
+                { AOM_CDF4(13202, 19825, 26404) },
+                { AOM_CDF4(11578, 19297, 23957) },
+                { AOM_CDF4(8073, 13297, 21370) },
+                { AOM_CDF4(5461, 10923, 19745) },
+                { AOM_CDF4(27367, 30521, 31934) },
+                { AOM_CDF4(24904, 30671, 31940) },
+                { AOM_CDF4(23075, 28460, 31299) },
+                { AOM_CDF4(14400, 23658, 30417) },
+                { AOM_CDF4(13885, 23882, 28325) },
+                { AOM_CDF4(14746, 22938, 27853) },
+                { AOM_CDF4(5461, 16384, 27307) } } },
+            { { { AOM_CDF4(18274, 24813, 27890) },
+                { AOM_CDF4(15537, 23149, 27003) },
+                { AOM_CDF4(9449, 16740, 21827) },
+                { AOM_CDF4(6700, 12498, 17261) },
+                { AOM_CDF4(4988, 9866, 14198) },
+                { AOM_CDF4(4236, 8147, 11902) },
+                { AOM_CDF4(2867, 5860, 8654) },
+                { AOM_CDF4(17124, 23171, 26101) },
+                { AOM_CDF4(20396, 27477, 30148) },
+                { AOM_CDF4(16573, 24629, 28492) },
+                { AOM_CDF4(12749, 20846, 25674) },
+                { AOM_CDF4(10233, 17878, 22818) },
+                { AOM_CDF4(8525, 15332, 20363) },
+                { AOM_CDF4(6283, 11632, 16255) },
+                { AOM_CDF4(20466, 26511, 29286) },
+                { AOM_CDF4(23059, 29174, 31191) },
+                { AOM_CDF4(19481, 27263, 30241) },
+                { AOM_CDF4(15458, 23631, 28137) },
+                { AOM_CDF4(12416, 20608, 25693) },
+                { AOM_CDF4(10261, 18011, 23261) },
+                { AOM_CDF4(8016, 14655, 19666) } },
+              { { AOM_CDF4(17616, 24586, 28112) },
+                { AOM_CDF4(15809, 23299, 27155) },
+                { AOM_CDF4(10767, 18890, 23793) },
+                { AOM_CDF4(7727, 14255, 18865) },
+                { AOM_CDF4(6129, 11926, 16882) },
+                { AOM_CDF4(4482, 9704, 14861) },
+                { AOM_CDF4(3277, 7452, 11522) },
+                { AOM_CDF4(22956, 28551, 30730) },
+                { AOM_CDF4(22724, 28937, 30961) },
+                { AOM_CDF4(18467, 26324, 29580) },
+                { AOM_CDF4(13234, 20713, 25649) },
+                { AOM_CDF4(11181, 17592, 22481) },
+                { AOM_CDF4(8291, 18358, 24576) },
+                { AOM_CDF4(7568, 11881, 14984) },
+                { AOM_CDF4(24948, 29001, 31147) },
+                { AOM_CDF4(25674, 30619, 32151) },
+                { AOM_CDF4(20841, 26793, 29603) },
+                { AOM_CDF4(14669, 24356, 28666) },
+                { AOM_CDF4(11334, 23593, 28219) },
+                { AOM_CDF4(8922, 14762, 22873) },
+                { AOM_CDF4(8301, 13544, 20535) } } },
+            { { { AOM_CDF4(17113, 23733, 27081) },
+                { AOM_CDF4(14139, 21406, 25452) },
+                { AOM_CDF4(8552, 15002, 19776) },
+                { AOM_CDF4(5871, 11120, 15378) },
+                { AOM_CDF4(4455, 8616, 12253) },
+                { AOM_CDF4(3469, 6910, 10386) },
+                { AOM_CDF4(2255, 4553, 6782) },
+                { AOM_CDF4(18224, 24376, 27053) },
+                { AOM_CDF4(19290, 26710, 29614) },
+                { AOM_CDF4(14936, 22991, 27184) },
+                { AOM_CDF4(11238, 18951, 23762) },
+                { AOM_CDF4(8786, 15617, 20588) },
+                { AOM_CDF4(7317, 13228, 18003) },
+                { AOM_CDF4(5101, 9512, 13493) },
+                { AOM_CDF4(22639, 28222, 30210) },
+                { AOM_CDF4(23216, 29331, 31307) },
+                { AOM_CDF4(19075, 26762, 29895) },
+                { AOM_CDF4(15014, 23113, 27457) },
+                { AOM_CDF4(11938, 19857, 24752) },
+                { AOM_CDF4(9942, 17280, 22282) },
+                { AOM_CDF4(7167, 13144, 17752) } },
+              { { AOM_CDF4(15820, 22738, 26488) },
+                { AOM_CDF4(13530, 20885, 25216) },
+                { AOM_CDF4(8395, 15530, 20452) },
+                { AOM_CDF4(6574, 12321, 16380) },
+                { AOM_CDF4(5353, 10419, 14568) },
+                { AOM_CDF4(4613, 8446, 12381) },
+                { AOM_CDF4(3440, 7158, 9903) },
+                { AOM_CDF4(24247, 29051, 31224) },
+                { AOM_CDF4(22118, 28058, 30369) },
+                { AOM_CDF4(16498, 24768, 28389) },
+                { AOM_CDF4(12920, 21175, 26137) },
+                { AOM_CDF4(10730, 18619, 25352) },
+                { AOM_CDF4(10187, 16279, 22791) },
+                { AOM_CDF4(9310, 14631, 22127) },
+                { AOM_CDF4(24970, 30558, 32057) },
+                { AOM_CDF4(24801, 29942, 31698) },
+                { AOM_CDF4(22432, 28453, 30855) },
+                { AOM_CDF4(19054, 25680, 29580) },
+                { AOM_CDF4(14392, 23036, 28109) },
+                { AOM_CDF4(12495, 20947, 26650) },
+                { AOM_CDF4(12442, 20326, 26214) } } },
+            { { { AOM_CDF4(12162, 18785, 22648) },
+                { AOM_CDF4(12749, 19697, 23806) },
+                { AOM_CDF4(8580, 15297, 20346) },
+                { AOM_CDF4(6169, 11749, 16543) },
+                { AOM_CDF4(4836, 9391, 13448) },
+                { AOM_CDF4(3821, 7711, 11613) },
+                { AOM_CDF4(2228, 4601, 7070) },
+                { AOM_CDF4(16319, 24725, 28280) },
+                { AOM_CDF4(15698, 23277, 27168) },
+                { AOM_CDF4(12726, 20368, 25047) },
+                { AOM_CDF4(9912, 17015, 21976) },
+                { AOM_CDF4(7888, 14220, 19179) },
+                { AOM_CDF4(6777, 12284, 17018) },
+                { AOM_CDF4(4492, 8590, 12252) },
+                { AOM_CDF4(23249, 28904, 30947) },
+                { AOM_CDF4(21050, 27908, 30512) },
+                { AOM_CDF4(17440, 25340, 28949) },
+                { AOM_CDF4(14059, 22018, 26541) },
+                { AOM_CDF4(11288, 18903, 23898) },
+                { AOM_CDF4(9411, 16342, 21428) },
+                { AOM_CDF4(6278, 11588, 15944) } },
+              { { AOM_CDF4(13981, 20067, 23226) },
+                { AOM_CDF4(16922, 23580, 26783) },
+                { AOM_CDF4(11005, 19039, 24487) },
+                { AOM_CDF4(7389, 14218, 19798) },
+                { AOM_CDF4(5598, 11505, 17206) },
+                { AOM_CDF4(6090, 11213, 15659) },
+                { AOM_CDF4(3820, 7371, 10119) },
+                { AOM_CDF4(21082, 26925, 29675) },
+                { AOM_CDF4(21262, 28627, 31128) },
+                { AOM_CDF4(18392, 26454, 30437) },
+                { AOM_CDF4(14870, 22910, 27096) },
+                { AOM_CDF4(12620, 19484, 24908) },
+                { AOM_CDF4(9290, 16553, 22802) },
+                { AOM_CDF4(6668, 14288, 20004) },
+                { AOM_CDF4(27704, 31055, 31949) },
+                { AOM_CDF4(24709, 29978, 31788) },
+                { AOM_CDF4(21668, 29264, 31657) },
+                { AOM_CDF4(18295, 26968, 30074) },
+                { AOM_CDF4(16399, 24422, 29313) },
+                { AOM_CDF4(14347, 23026, 28104) },
+                { AOM_CDF4(12370, 19806, 24477) } } }
+        },
+    }
+};
+
+static inline int get_qcat_idx(int q) {
+    if (q <= 20) return 0;
+    if (q <= 60) return 1;
+    if (q <= 120) return 2;
+    return 3;
+}
+
+static CdfThreadContext cdf_init[4] = {
+    [0] = { .cdf = NULL },
+    [1] = { .cdf = NULL },
+    [2] = { .cdf = NULL },
+    [3] = { .cdf = NULL },
+};
+
+void av1_init_states(CdfThreadContext *const cdf, const int qidx) {
+    const int qcat = get_qcat_idx(qidx);
+    if (cdf_init[qcat].cdf) {
+        cdf_thread_ref(cdf, &cdf_init[qcat]);
+        return;
+    }
+
+    cdf_thread_alloc(&cdf_init[qcat], NULL);
+    cdf_init[qcat].cdf->m = av1_default_cdf;
+    memcpy(cdf_init[qcat].cdf->kfym, default_kf_y_mode_cdf,
+           sizeof(default_kf_y_mode_cdf));
+    cdf_init[qcat].cdf->coef = av1_default_coef_cdf[qcat];
+    cdf_init[qcat].cdf->mv = default_mv_cdf;
+    cdf_init[qcat].cdf->dmv = default_mv_cdf;
+    cdf_thread_ref(cdf, &cdf_init[qcat]);
+}
+
+void av1_update_tile_cdf(const Av1FrameHeader *const hdr,
+                         CdfContext *const dst,
+                         const CdfContext *const src)
+{
+    int i, j, k, l;
+
+#define update_cdf_1d(n1d, name) \
+    do { \
+        memcpy(dst->name, src->name, sizeof(*dst->name) * n1d); \
+        assert(!dst->name[n1d - 1]); \
+        dst->name[n1d] = 0; \
+    } while (0)
+
+#define update_cdf_2d(n1d, n2d, name) \
+    for (j = 0; j < (n1d); j++) update_cdf_1d(n2d, name[j])
+#define update_cdf_3d(n1d, n2d, n3d, name) \
+    for (k = 0; k < (n1d); k++) update_cdf_2d(n2d, n3d, name[k])
+#define update_cdf_4d(n1d, n2d, n3d, n4d, name) \
+    for (l = 0; l < (n1d); l++) update_cdf_3d(n2d, n3d, n4d, name[l])
+#define update_cdf_6d(n1d, n2d, n3d, n4d, n5d, n6d, name) \
+    for (n = 0; n < (n1d); n++) \
+        for (m = 0; m < (n2d); m++) \
+            update_cdf_4d(n3d, n4d, n5d, n6d, name[n][m])
+
+#define update_bit_0d(name) \
+    do { \
+        dst->name[0] = src->name[0]; \
+        dst->name[1] = 0; \
+    } while (0)
+
+#define update_bit_1d(n1d, name) \
+    for (i = 0; i < (n1d); i++) update_bit_0d(name[i])
+#define update_bit_2d(n1d, n2d, name) \
+    for (j = 0; j < (n1d); j++) update_bit_1d(n2d, name[j])
+#define update_bit_3d(n1d, n2d, n3d, name) \
+    for (k = 0; k < (n1d); k++) update_bit_2d(n2d, n3d, name[k])
+
+    update_bit_1d(N_BS_SIZES, m.use_filter_intra);
+    update_cdf_1d(5, m.filter_intra);
+    update_cdf_3d(2, N_INTRA_PRED_MODES, N_UV_INTRA_PRED_MODES - !k, m.uv_mode);
+    update_cdf_2d(8, 7, m.angle_delta);
+    update_cdf_3d(N_TX_SIZES - 1, 3, imin(k + 2, 3), m.txsz);
+    update_cdf_3d(N_TX_SIZES - 1, N_INTRA_PRED_MODES, 7, m.txtp_intra[1]);
+    update_cdf_3d(N_TX_SIZES - 1, N_INTRA_PRED_MODES, 5, m.txtp_intra[2]);
+    update_bit_1d(3, m.skip);
+    static const uint8_t n_partitions[N_BL_LEVELS] = {
+        [BL_128X128] = N_PARTITIONS - 2,
+        [BL_64X64]   = N_PARTITIONS,
+        [BL_32X32]   = N_PARTITIONS,
+        [BL_16X16]   = N_PARTITIONS,
+        [BL_8X8]     = N_SUB8X8_PARTITIONS,
+    };
+    update_cdf_3d(N_BL_LEVELS, 4, n_partitions[k], m.partition);
+    update_bit_2d(N_TX_SIZES, 13, coef.skip);
+    update_cdf_3d(2, 2, 5, coef.eob_bin_16);
+    update_cdf_3d(2, 2, 6, coef.eob_bin_32);
+    update_cdf_3d(2, 2, 7, coef.eob_bin_64);
+    update_cdf_3d(2, 2, 8, coef.eob_bin_128);
+    update_cdf_3d(2, 2, 9, coef.eob_bin_256);
+    update_cdf_3d(2, 2, 10, coef.eob_bin_512);
+    update_cdf_3d(2, 2, 11, coef.eob_bin_1024);
+    update_bit_3d(N_TX_SIZES, 2, 11 /*22*/, coef.eob_hi_bit);
+    update_cdf_4d(N_TX_SIZES, 2, 4, 3, coef.eob_base_tok);
+    update_cdf_4d(N_TX_SIZES, 2, 41 /*42*/, 4, coef.base_tok);
+    update_bit_2d(2, 3, coef.dc_sign);
+    update_cdf_4d(4, 2, 21, 4, coef.br_tok);
+    update_cdf_2d(3, NUM_SEGMENTS, m.seg_id);
+    update_cdf_1d(8, m.cfl_sign);
+    update_cdf_2d(6, 16, m.cfl_alpha);
+    update_bit_0d(m.restore_wiener);
+    update_bit_0d(m.restore_sgrproj);
+    update_cdf_1d(3, m.restore_switchable);
+    update_cdf_1d(4, m.delta_q);
+    update_cdf_2d(5, 4, m.delta_lf);
+    update_bit_2d(7, 3, m.pal_y);
+    update_bit_1d(2, m.pal_uv);
+    update_cdf_3d(2, 7, 7, m.pal_sz);
+    update_cdf_4d(2, 7, 5, k + 2, m.color_map);
+
+    if (!(hdr->frame_type & 1)) {
+        update_bit_0d(m.intrabc);
+
+        update_cdf_1d(N_MV_JOINTS, dmv.joint);
+        for (k = 0; k < 2; k++) {
+            update_cdf_1d(11, dmv.comp[k].classes);
+            update_bit_0d(dmv.comp[k].class0);
+            update_bit_1d(10, dmv.comp[k].classN);
+            update_bit_0d(dmv.comp[k].sign);
+        }
+        return;
+    }
+
+    update_bit_1d(3, m.skip_mode);
+    update_cdf_2d(4, N_INTRA_PRED_MODES, m.y_mode);
+    update_cdf_3d(2, 8, N_SWITCHABLE_FILTERS, m.filter);
+    update_bit_1d(6, m.newmv_mode);
+    update_bit_1d(2, m.globalmv_mode);
+    update_bit_1d(6, m.refmv_mode);
+    update_bit_1d(3, m.drl_bit);
+    update_cdf_2d(8, N_COMP_INTER_PRED_MODES, m.comp_inter_mode);
+    update_bit_1d(4, m.intra);
+    update_bit_1d(5, m.comp);
+    update_bit_1d(5, m.comp_dir);
+    update_bit_1d(6, m.jnt_comp);
+    update_bit_1d(6, m.mask_comp);
+    update_bit_1d(9, m.wedge_comp);
+    update_cdf_2d(9, 16, m.wedge_idx);
+    update_bit_2d(6, 3, m.ref);
+    update_bit_2d(3, 3, m.comp_fwd_ref);
+    update_bit_2d(2, 3, m.comp_bwd_ref);
+    update_bit_2d(3, 3, m.comp_uni_ref);
+    update_bit_2d(7, 3, m.txpart);
+    update_cdf_2d(N_TX_SIZES - 1, 16, m.txtp_inter[1]);
+    update_cdf_2d(N_TX_SIZES - 1, 12, m.txtp_inter[2]);
+    update_cdf_2d(N_TX_SIZES - 1,  2, m.txtp_inter[3]);
+    update_bit_1d(3, m.seg_pred);
+    update_bit_1d(4, m.interintra);
+    update_bit_1d(7, m.interintra_wedge);
+    update_cdf_2d(4, 4, m.interintra_mode);
+    update_cdf_2d(N_BS_SIZES, 3, m.motion_mode);
+    update_bit_1d(N_BS_SIZES, m.obmc);
+
+    update_cdf_1d(N_MV_JOINTS, mv.joint);
+    for (k = 0; k < 2; k++) {
+        update_cdf_1d(11, mv.comp[k].classes);
+        update_bit_0d(mv.comp[k].class0);
+        update_bit_1d(10, mv.comp[k].classN);
+        update_cdf_2d(2, 4, mv.comp[k].class0_fp);
+        update_cdf_1d(4, mv.comp[k].classN_fp);
+        update_bit_0d(mv.comp[k].class0_hp);
+        update_bit_0d(mv.comp[k].classN_hp);
+        update_bit_0d(mv.comp[k].sign);
+    }
+}
+
+/*
+ * CDF threading wrappers.
+ */
+void cdf_thread_alloc(CdfThreadContext *const cdf, struct thread_data *const t) {
+    cdf->ref = dav1d_ref_create(sizeof(CdfContext) +
+                                (t != NULL) * sizeof(atomic_uint));
+    cdf->cdf = cdf->ref->data;
+    if (t) {
+        cdf->progress = (atomic_uint *) &cdf->cdf[1];
+        atomic_init(cdf->progress, 0);
+        cdf->t = t;
+    }
+}
+
+void cdf_thread_ref(CdfThreadContext *const dst, CdfThreadContext *const src) {
+    dav1d_ref_inc(src->ref);
+    *dst = *src;
+}
+
+void cdf_thread_unref(CdfThreadContext *const cdf) {
+    dav1d_ref_dec(cdf->ref);
+    memset(cdf, 0, sizeof(*cdf));
+}
+
+void cdf_thread_wait(CdfThreadContext *const cdf) {
+    if (!cdf->t) return;
+
+    if (atomic_load(cdf->progress)) return;
+    pthread_mutex_lock(&cdf->t->lock);
+    while (!atomic_load(cdf->progress))
+        pthread_cond_wait(&cdf->t->cond, &cdf->t->lock);
+    pthread_mutex_unlock(&cdf->t->lock);
+}
+
+void cdf_thread_signal(CdfThreadContext *const cdf) {
+    if (!cdf->t) return;
+
+    pthread_mutex_lock(&cdf->t->lock);
+    atomic_store(cdf->progress, 1);
+    pthread_cond_broadcast(&cdf->t->cond);
+    pthread_mutex_unlock(&cdf->t->lock);
+}
--- /dev/null
+++ b/src/cdf.h
@@ -1,0 +1,148 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __AV1_CDF_H__
+#define __AV1_CDF_H__
+
+#include <stdint.h>
+
+#include "src/levels.h"
+#include "src/ref.h"
+#include "src/thread_data.h"
+
+typedef struct CdfModeContext {
+    uint16_t y_mode[4][N_INTRA_PRED_MODES + 1];
+    uint16_t use_filter_intra[N_BS_SIZES][2];
+    uint16_t filter_intra[5 + 1];
+    uint16_t uv_mode[2][N_INTRA_PRED_MODES][N_UV_INTRA_PRED_MODES + 1];
+    uint16_t angle_delta[8][8];
+    uint16_t filter[2][8][N_SWITCHABLE_FILTERS + 1];
+    uint16_t newmv_mode[6][2];
+    uint16_t globalmv_mode[2][2];
+    uint16_t refmv_mode[6][2];
+    uint16_t drl_bit[3][2];
+    uint16_t comp_inter_mode[8][N_COMP_INTER_PRED_MODES + 1];
+    uint16_t intra[4][2];
+    uint16_t comp[5][2];
+    uint16_t comp_dir[5][2];
+    uint16_t jnt_comp[6][2];
+    uint16_t mask_comp[6][2];
+    uint16_t wedge_comp[9][2];
+    uint16_t wedge_idx[9][16 + 1];
+    uint16_t interintra[7][2];
+    uint16_t interintra_mode[4][5];
+    uint16_t interintra_wedge[7][2];
+    uint16_t ref[6][3][2];
+    uint16_t comp_fwd_ref[3][3][2];
+    uint16_t comp_bwd_ref[2][3][2];
+    uint16_t comp_uni_ref[3][3][2];
+    uint16_t txsz[N_TX_SIZES - 1][3][4];
+    uint16_t txpart[7][3][2];
+    uint16_t txtp_inter[4][N_TX_SIZES][N_TX_TYPES + 1];
+    uint16_t txtp_intra[3][N_TX_SIZES][N_INTRA_PRED_MODES][N_TX_TYPES + 1];
+    uint16_t skip[3][2];
+    uint16_t skip_mode[3][2];
+    uint16_t partition[N_BL_LEVELS][4][N_PARTITIONS + 1];
+    uint16_t seg_pred[3][2];
+    uint16_t seg_id[3][NUM_SEGMENTS + 1];
+    uint16_t cfl_sign[8 + 1];
+    uint16_t cfl_alpha[6][16 + 1];
+    uint16_t restore_wiener[2];
+    uint16_t restore_sgrproj[2];
+    uint16_t restore_switchable[3 + 1];
+    uint16_t delta_q[4 + 1];
+    uint16_t delta_lf[5][4 + 1];
+    uint16_t obmc[N_BS_SIZES][2];
+    uint16_t motion_mode[N_BS_SIZES][3 + 1];
+    uint16_t pal_y[7][3][2];
+    uint16_t pal_uv[2][2];
+    uint16_t pal_sz[2][7][7 + 1];
+    uint16_t color_map[2][7][5][8 + 1];
+    uint16_t intrabc[2];
+} CdfModeContext;
+
+typedef struct CdfCoefContext {
+    uint16_t skip[N_TX_SIZES][13][2];
+    uint16_t eob_bin_16[2][2][6];
+    uint16_t eob_bin_32[2][2][7];
+    uint16_t eob_bin_64[2][2][8];
+    uint16_t eob_bin_128[2][2][9];
+    uint16_t eob_bin_256[2][2][10];
+    uint16_t eob_bin_512[2][2][11];
+    uint16_t eob_bin_1024[2][2][12];
+    uint16_t eob_hi_bit[N_TX_SIZES][2][11 /*22*/][2];
+    uint16_t eob_base_tok[N_TX_SIZES][2][4][4];
+    uint16_t base_tok[N_TX_SIZES][2][41][5];
+    uint16_t dc_sign[2][3][2];
+    uint16_t br_tok[4 /*5*/][2][21][5];
+} CdfCoefContext;
+
+typedef struct CdfMvComponent {
+    uint16_t classes[11 + 1];
+    uint16_t class0[2];
+    uint16_t classN[10][2];
+    uint16_t class0_fp[2][4 + 1];
+    uint16_t classN_fp[4 + 1];
+    uint16_t class0_hp[2];
+    uint16_t classN_hp[2];
+    uint16_t sign[2];
+} CdfMvComponent;
+
+typedef struct CdfMvContext {
+    CdfMvComponent comp[2];
+    uint16_t joint[N_MV_JOINTS + 1];
+} CdfMvContext;
+
+typedef struct CdfContext {
+    CdfModeContext m;
+    uint16_t kfym[5][5][N_INTRA_PRED_MODES + 1];
+    CdfCoefContext coef;
+    CdfMvContext mv, dmv;
+} CdfContext;
+
+typedef struct CdfThreadContext {
+    CdfContext *cdf;
+    Dav1dRef *ref; ///< allocation origin
+    struct thread_data *t;
+    atomic_uint *progress;
+} CdfThreadContext;
+
+void av1_init_states(CdfThreadContext *cdf, int qidx);
+void av1_update_tile_cdf(const Av1FrameHeader *hdr, CdfContext *dst,
+                         const CdfContext *src);
+
+void cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);
+void cdf_thread_ref(CdfThreadContext *dst, CdfThreadContext *src);
+void cdf_thread_unref(CdfThreadContext *cdf);
+
+/*
+ * These are binary signals (so a signal is either "done" or "not done").
+ */
+void cdf_thread_wait(CdfThreadContext *cdf);
+void cdf_thread_signal(CdfThreadContext *cdf);
+
+#endif /* __AV1_CDF_H__ */
--- /dev/null
+++ b/src/data.c
@@ -1,0 +1,54 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dav1d/data.h"
+
+#include "common/validate.h"
+
+#include "src/ref.h"
+
+int dav1d_data_create(Dav1dData *const buf, const size_t sz) {
+    validate_input_or_ret(buf != NULL, -EINVAL);
+
+    buf->ref = dav1d_ref_create(sz);
+    if (!buf->ref) return -ENOMEM;
+    buf->data = buf->ref->data;
+    buf->sz = sz;
+
+    return 0;
+}
+
+void dav1d_data_unref(Dav1dData *const buf) {
+    dav1d_ref_dec(buf->ref);
+    memset(buf, 0, sizeof(*buf));
+}
--- /dev/null
+++ b/src/data.h
@@ -1,0 +1,35 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_DATA_H__
+#define __DAV1D_SRC_DATA_H__
+
+#include "dav1d/data.h"
+
+void dav1d_data_unref(Dav1dData *buf);
+
+#endif /* __DAV1D_SRC_DATA_H__ */
--- /dev/null
+++ b/src/decode.c
@@ -1,0 +1,2908 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "dav1d/data.h"
+
+#include "common/intops.h"
+#include "common/mem.h"
+
+#include "src/decode.h"
+#include "src/dequant_tables.h"
+#include "src/env.h"
+#include "src/qm.h"
+#include "src/recon.h"
+#include "src/ref.h"
+#include "src/tables.h"
+#include "src/thread_task.h"
+#include "src/warpmv.h"
+
+static void init_quant_tables(const Av1SequenceHeader *const seq_hdr,
+                              const Av1FrameHeader *const frame_hdr,
+                              const int qidx, uint16_t (*dq)[3][2])
+{
+    for (int i = 0; i < (frame_hdr->segmentation.enabled ? 8 : 1); i++) {
+        const int yac = frame_hdr->segmentation.enabled ?
+            iclip_u8(qidx + frame_hdr->segmentation.seg_data.d[i].delta_q) : qidx;
+        const int ydc = iclip_u8(yac + frame_hdr->quant.ydc_delta);
+        const int uac = iclip_u8(yac + frame_hdr->quant.uac_delta);
+        const int udc = iclip_u8(yac + frame_hdr->quant.udc_delta);
+        const int vac = iclip_u8(yac + frame_hdr->quant.vac_delta);
+        const int vdc = iclip_u8(yac + frame_hdr->quant.vdc_delta);
+
+        dq[i][0][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][ydc][0];
+        dq[i][0][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][yac][1];
+        dq[i][1][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][udc][0];
+        dq[i][1][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][uac][1];
+        dq[i][2][0] = dav1d_dq_tbl[seq_hdr->bpc > 8][vdc][0];
+        dq[i][2][1] = dav1d_dq_tbl[seq_hdr->bpc > 8][vac][1];
+    }
+}
+
+static int read_mv_component_diff(Dav1dTileContext *const t,
+                                  CdfMvComponent *const mv_comp,
+                                  const int have_fp)
+{
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    const int have_hp = f->frame_hdr.hp;
+    const int sign = msac_decode_bool_adapt(&ts->msac, mv_comp->sign);
+    const int cl = msac_decode_symbol_adapt(&ts->msac, mv_comp->classes, 11);
+    int up, fp, hp;
+
+    if (!cl) {
+        up = msac_decode_bool_adapt(&ts->msac, mv_comp->class0);
+        if (have_fp) {
+            fp = msac_decode_symbol_adapt(&ts->msac, mv_comp->class0_fp[up], 4);
+            hp = have_hp ? msac_decode_bool_adapt(&ts->msac, mv_comp->class0_hp) : 1;
+        } else {
+            fp = 3;
+            hp = 1;
+        }
+    } else {
+        up = 1 << cl;
+        for (int n = 0; n < cl; n++)
+            up |= msac_decode_bool_adapt(&ts->msac, mv_comp->classN[n]) << n;
+        if (have_fp) {
+            fp = msac_decode_symbol_adapt(&ts->msac, mv_comp->classN_fp, 4);
+            hp = have_hp ? msac_decode_bool_adapt(&ts->msac, mv_comp->classN_hp) : 1;
+        } else {
+            fp = 3;
+            hp = 1;
+        }
+    }
+
+    const int diff = ((up << 3) | (fp << 1) | hp) + 1;
+
+    return sign ? -diff : diff;
+}
+
+static void read_mv_residual(Dav1dTileContext *const t, mv *const ref_mv,
+                             CdfMvContext *const mv_cdf, const int have_fp)
+{
+    switch (msac_decode_symbol_adapt(&t->ts->msac, t->ts->cdf.mv.joint, N_MV_JOINTS)) {
+    case MV_JOINT_HV:
+        ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
+        ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
+        break;
+    case MV_JOINT_H:
+        ref_mv->x += read_mv_component_diff(t, &mv_cdf->comp[1], have_fp);
+        break;
+    case MV_JOINT_V:
+        ref_mv->y += read_mv_component_diff(t, &mv_cdf->comp[0], have_fp);
+        break;
+    default:
+        break;
+    }
+}
+
+static void read_tx_tree(Dav1dTileContext *const t,
+                         const enum RectTxfmSize from,
+                         const int depth, uint16_t *const masks,
+                         const int x_off, const int y_off)
+{
+    const Dav1dFrameContext *const f = t->f;
+    const int bx4 = t->bx & 31, by4 = t->by & 31;
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[from];
+    const int txw = t_dim->lw, txh = t_dim->lh;
+    int is_split;
+
+    if (depth < 2 && from > (int) TX_4X4) {
+        const int cat = 2 * (TX_64X64 - t_dim->max) - depth;
+        const int a = t->a->tx[bx4] < txw;
+        const int l = t->l.tx[by4] < txh;
+
+        is_split = msac_decode_bool_adapt(&t->ts->msac, t->ts->cdf.m.txpart[cat][a + l]);
+        if (is_split)
+            masks[depth] |= 1 << (y_off * 4 + x_off);
+    } else {
+        is_split = 0;
+    }
+
+    if (is_split && t_dim->max > TX_8X8) {
+        const enum RectTxfmSize sub = t_dim->sub;
+        const TxfmInfo *const sub_t_dim = &av1_txfm_dimensions[sub];
+        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
+
+        read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 0);
+        t->bx += txsw;
+        if (txw >= txh && t->bx < f->bw)
+            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 1, y_off * 2 + 0);
+        t->bx -= txsw;
+        t->by += txsh;
+        if (txh >= txw && t->by < f->bh) {
+            read_tx_tree(t, sub, depth + 1, masks, x_off * 2 + 0, y_off * 2 + 1);
+            t->bx += txsw;
+            if (txw >= txh && t->bx < f->bw)
+                read_tx_tree(t, sub, depth + 1, masks,
+                             x_off * 2 + 1, y_off * 2 + 1);
+            t->bx -= txsw;
+        }
+        t->by -= txsh;
+    } else {
+        memset(&t->a->tx[bx4], is_split ? TX_4X4 : txw, t_dim->w);
+        memset(&t->l.tx[by4], is_split ? TX_4X4 : txh, t_dim->h);
+    }
+}
+
+int av1_neg_deinterleave(int diff, int ref, int max) {
+    if (!ref) return diff;
+    if (ref >= (max - 1)) return max - diff - 1;
+    if (2 * ref < max) {
+        if (diff <= 2 * ref) {
+            if (diff & 1)
+                return ref + ((diff + 1) >> 1);
+            else
+                return ref - (diff >> 1);
+        }
+        return diff;
+    } else {
+        if (diff <= 2 * (max - ref - 1)) {
+            if (diff & 1)
+                return ref + ((diff + 1) >> 1);
+            else
+                return ref - (diff >> 1);
+        }
+        return max - (diff + 1);
+    }
+}
+
+static void find_matching_ref(const Dav1dTileContext *const t,
+                              const enum EdgeFlags intra_edge_flags,
+                              const int bw4, const int bh4,
+                              const int w4, const int h4,
+                              const int have_left, const int have_top,
+                              const int ref, uint64_t masks[2])
+{
+    const Dav1dFrameContext *const f = t->f;
+    const ptrdiff_t b4_stride = f->b4_stride;
+    const refmvs *const r = &f->mvs[t->by * b4_stride + t->bx];
+    int count = 0;
+    int have_topleft = have_top && have_left;
+    int have_topright = imax(bw4, bh4) < 32 &&
+                        have_top && t->bx + bw4 < t->ts->tiling.col_end &&
+                        (intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT);
+
+#define bs(rp) av1_block_dimensions[sbtype_to_bs[(rp)->sb_type]]
+#define matches(rp) ((rp)->ref[0] == ref + 1 && (rp)->ref[1] == -1)
+
+    if (have_top) {
+        const refmvs *r2 = &r[-b4_stride];
+        if (matches(r2)) {
+            masks[0] |= 1;
+            count = 1;
+        }
+        int aw4 = bs(r2)[0];
+        if (aw4 >= bw4) {
+            const int off = t->bx & (aw4 - 1);
+            if (off) have_topleft = 0;
+            if (aw4 - off > bw4) have_topright = 0;
+        } else {
+            unsigned mask = 1 << aw4;
+            for (int x = aw4; x < w4; x += aw4) {
+                r2 += aw4;
+                if (matches(r2)) {
+                    masks[0] |= mask;
+                    if (++count >= 8) return;
+                }
+                aw4 = bs(r2)[0];
+                mask <<= aw4;
+            }
+        }
+    }
+    if (have_left) {
+        const refmvs *r2 = &r[-1];
+        if (matches(r2)) {
+            masks[1] |= 1;
+            if (++count >= 8) return;
+        }
+        int lh4 = bs(r2)[1];
+        if (lh4 >= bh4) {
+            if (t->by & (lh4 - 1)) have_topleft = 0;
+        } else {
+            unsigned mask = 1 << lh4;
+            for (int y = lh4; y < h4; y += lh4) {
+                r2 += lh4 * b4_stride;
+                if (matches(r2)) {
+                    masks[1] |= mask;
+                    if (++count >= 8) return;
+                }
+                lh4 = bs(r2)[1];
+                mask <<= lh4;
+            }
+        }
+    }
+    if (have_topleft && matches(&r[-(1 + b4_stride)])) {
+        masks[1] |= 1ULL << 32;
+        if (++count >= 8) return;
+    }
+    if (have_topright && matches(&r[bw4 - b4_stride])) {
+        masks[0] |= 1ULL << 32;
+    }
+#undef matches
+}
+
+static void derive_warpmv(const Dav1dTileContext *const t,
+                          const int bw4, const int bh4,
+                          const uint64_t masks[2], const struct mv mv,
+                          WarpedMotionParams *const wmp)
+{
+    int pts[8][2 /* in, out */][2 /* x, y */], np = 0;
+    const Dav1dFrameContext *const f = t->f;
+    const ptrdiff_t b4_stride = f->b4_stride;
+    const refmvs *const r = &f->mvs[t->by * b4_stride + t->bx];
+
+#define add_sample(dx, dy, sx, sy, rp) do { \
+    pts[np][0][0] = 16 * (2 * dx + sx * bs(rp)[0]) - 8; \
+    pts[np][0][1] = 16 * (2 * dy + sy * bs(rp)[1]) - 8; \
+    pts[np][1][0] = pts[np][0][0] + (rp)->mv[0].x; \
+    pts[np][1][1] = pts[np][0][1] + (rp)->mv[0].y; \
+    np++; \
+} while (0)
+
+    // use masks[] to find the projectable motion vectors in the edges
+    if ((unsigned) masks[0] == 1 && !(masks[1] >> 32)) {
+        const int off = t->bx & (bs(&r[-b4_stride])[0] - 1);
+        add_sample(-off, 0, 1, -1, &r[-b4_stride]);
+    } else for (unsigned off = 0, xmask = masks[0]; np < 8 && xmask;) { // top
+        const int tz = __builtin_ctz(xmask);
+        off += tz;
+        add_sample(off, 0, 1, -1, &r[off - b4_stride]);
+        xmask >>= tz + 1;
+        off += 1;
+    }
+    if (np < 8 && masks[1] == 1) {
+        const int off = t->by & (bs(&r[-1])[1] - 1);
+        add_sample(0, -off, -1, 1, &r[-1 - off * b4_stride]);
+    } else for (unsigned off = 0, ymask = masks[1]; np < 8 && ymask;) { // left
+        const int tz = __builtin_ctz(ymask);
+        off += tz;
+        add_sample(0, off, -1, 1, &r[off * b4_stride - 1]);
+        ymask >>= tz + 1;
+        off += 1;
+    }
+    if (np < 8 && masks[1] >> 32) // top/left
+        add_sample(0, 0, -1, -1, &r[-(1 + b4_stride)]);
+    if (np < 8 && masks[0] >> 32) // top/right
+        add_sample(bw4, 0, 1, -1, &r[bw4 - b4_stride]);
+    assert(np > 0 && np <= 8);
+#undef bs
+
+    // select according to motion vector difference against a threshold
+    int mvd[8], ret = 0;
+    const int thresh = 4 * iclip(imax(bw4, bh4), 4, 28);
+    for (int i = 0; i < np; i++) {
+        mvd[i] = labs(pts[i][1][0] - pts[i][0][0] - mv.x) +
+                 labs(pts[i][1][1] - pts[i][0][1] - mv.y);
+        if (mvd[i] > thresh)
+            mvd[i] = -1;
+        else
+            ret++;
+    }
+    if (!ret) {
+        ret = 1;
+    } else for (int i = 0, j = np - 1, k = 0; k < np - ret; k++, i++, j--) {
+        while (mvd[i] != -1) i++;
+        while (mvd[j] == -1) j--;
+        assert(i != j);
+        if (i > j) break;
+        // replace the discarded samples;
+        mvd[i] = mvd[j];
+        memcpy(pts[i], pts[j], sizeof(*pts));
+    }
+
+    if (!find_affine_int(pts, ret, bw4, bh4, mv, wmp, t->bx, t->by) &&
+        !get_shear_params(wmp))
+    {
+        wmp->type = WM_TYPE_AFFINE;
+    } else
+        wmp->type = WM_TYPE_IDENTITY;
+}
+
+static inline int findoddzero(const uint8_t *buf, int len) {
+    for (int n = 0; n < len; n++)
+        if (!buf[n * 2]) return 1;
+    return 0;
+}
+
+static void read_pal_plane(Dav1dTileContext *const t, Av1Block *const b,
+                           const int pl, const int sz_ctx,
+                           const int bx4, const int by4)
+{
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    const int pal_sz = b->pal_sz[pl] = 2 + msac_decode_symbol_adapt(&ts->msac,
+                                                 ts->cdf.m.pal_sz[pl][sz_ctx], 7);
+    uint16_t cache[16], used_cache[8];
+    int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
+    int n_cache = 0;
+    // don't reuse above palette outside SB64 boundaries
+    int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
+    const uint16_t *l = t->al_pal[1][by4][pl], *a = t->al_pal[0][bx4][pl];
+
+    // fill/sort cache
+    while (l_cache && a_cache) {
+        if (*l < *a) {
+            if (!n_cache || cache[n_cache - 1] != *l)
+                cache[n_cache++] = *l;
+            l++;
+            l_cache--;
+        } else {
+            if (*a == *l) {
+                l++;
+                l_cache--;
+            }
+            if (!n_cache || cache[n_cache - 1] != *a)
+                cache[n_cache++] = *a;
+            a++;
+            a_cache--;
+        }
+    }
+    if (l_cache) {
+        do {
+            if (!n_cache || cache[n_cache - 1] != *l)
+                cache[n_cache++] = *l;
+            l++;
+        } while (--l_cache > 0);
+    } else if (a_cache) {
+        do {
+            if (!n_cache || cache[n_cache - 1] != *a)
+                cache[n_cache++] = *a;
+            a++;
+        } while (--a_cache > 0);
+    }
+
+    // find reused cache entries
+    int i = 0;
+    for (int n = 0; n < n_cache && i < pal_sz; n++)
+        if (msac_decode_bool(&ts->msac, 128 << 7))
+            used_cache[i++] = cache[n];
+    const int n_used_cache = i;
+
+    // parse new entries
+    uint16_t *const pal = f->frame_thread.pass ?
+        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                            ((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
+    if (i < pal_sz) {
+        int prev = pal[i++] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+
+        if (i < pal_sz) {
+            int bits = f->cur.p.p.bpc - 3 + msac_decode_bools(&ts->msac, 2);
+            const int max = (1 << f->cur.p.p.bpc) - 1;
+
+            do {
+                const int delta = msac_decode_bools(&ts->msac, bits);
+                prev = pal[i++] = imin(prev + delta + !pl, max);
+                if (prev + !pl >= max) {
+                    for (; i < pal_sz; i++)
+                        pal[i] = pal[i - 1];
+                    break;
+                }
+                bits = imin(bits, 1 + ulog2(max - prev - !pl));
+            } while (i < pal_sz);
+        }
+
+        // merge cache+new entries
+        int n = 0, m = n_used_cache;
+        for (i = 0; i < pal_sz; i++) {
+            if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
+                pal[i] = used_cache[n++];
+            } else {
+                assert(m < pal_sz);
+                pal[i] = pal[m++];
+            }
+        }
+    } else {
+        memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
+    }
+
+    if (DEBUG_BLOCK_INFO) {
+        printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
+               pl, b->pal_sz[pl], n_cache, n_used_cache, ts->msac.rng);
+        for (int n = 0; n < n_cache; n++)
+            printf("%c%02x", n ? ' ' : '[', cache[n]);
+        printf("%s, pal=", n_cache ? "]" : "[]");
+        for (int n = 0; n < b->pal_sz[0]; n++)
+            printf("%c%02x", n ? ' ' : '[', pal[n]);
+        printf("]\n");
+    }
+}
+
+static void read_pal_uv(Dav1dTileContext *const t, Av1Block *const b,
+                        const int sz_ctx, const int cbx4, const int cby4)
+{
+    read_pal_plane(t, b, 1, sz_ctx, cbx4, cby4);
+
+    // V pal coding
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    uint16_t *const pal = f->frame_thread.pass ?
+        f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                            ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
+    if (msac_decode_bool(&ts->msac, 128 << 7)) {
+        const int bits = f->cur.p.p.bpc - 4 + msac_decode_bools(&ts->msac, 2);
+        int prev = pal[0] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+        const int max = (1 << f->cur.p.p.bpc) - 1;
+        for (int i = 1; i < b->pal_sz[1]; i++) {
+            int delta = msac_decode_bools(&ts->msac, bits);
+            if (delta && msac_decode_bool(&ts->msac, 128 << 7)) delta = -delta;
+            prev = pal[i] = (prev + delta) & max;
+        }
+    } else {
+        for (int i = 0; i < b->pal_sz[1]; i++)
+            pal[i] = msac_decode_bools(&ts->msac, f->cur.p.p.bpc);
+    }
+    if (DEBUG_BLOCK_INFO) {
+        printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
+        for (int n = 0; n < b->pal_sz[1]; n++)
+            printf("%c%02x", n ? ' ' : '[', pal[n]);
+        printf("]\n");
+    }
+}
+
+// meant to be SIMD'able, so that theoretical complexity of this function
+// times block size goes from w4*h4 to w4+h4-1
+// a and b are previous two lines containing (a) top/left entries or (b)
+// top/left entries, with a[0] being either the first top or first left entry,
+// depending on top_offset being 1 or 0, and b being the first top/left entry
+// for whichever has one. left_offset indicates whether the (len-1)th entry
+// has a left neighbour.
+// output is order[] and ctx for each member of this diagonal.
+static void order_palette(const uint8_t *pal_idx, const ptrdiff_t stride,
+                          const int i, const int first, const int last,
+                          uint8_t (*const order)[8], uint8_t *const ctx)
+{
+    int have_top = i > first;
+
+    pal_idx += first + (i - first) * stride;
+    for (int j = first, n = 0; j >= last; have_top = 1, j--, n++, pal_idx += stride - 1) {
+        const int have_left = j > 0;
+
+        assert(have_left || have_top);
+
+#define add(v_in) do { \
+        const int v = v_in; \
+        assert(v < 8U); \
+        order[n][o_idx++] = v; \
+        mask |= 1 << v; \
+    } while (0)
+
+        unsigned mask = 0;
+        int o_idx = 0;
+        if (!have_left) {
+            ctx[n] = 0;
+            add(pal_idx[-stride]);
+        } else if (!have_top) {
+            ctx[n] = 0;
+            add(pal_idx[-1]);
+        } else {
+            const int l = pal_idx[-1], t = pal_idx[-stride], tl = pal_idx[-(stride + 1)];
+            const int same_t_l = t == l;
+            const int same_t_tl = t == tl;
+            const int same_l_tl = l == tl;
+            const int same_all = same_t_l & same_t_tl & same_l_tl;
+
+            if (same_all) {
+                ctx[n] = 4;
+                add(t);
+            } else if (same_t_l) {
+                ctx[n] = 3;
+                add(t);
+                add(tl);
+            } else if (same_t_tl | same_l_tl) {
+                ctx[n] = 2;
+                add(tl);
+                add(same_t_tl ? l : t);
+            } else {
+                ctx[n] = 1;
+                add(imin(t, l));
+                add(imax(t, l));
+                add(tl);
+            }
+        }
+        for (unsigned m = 1, bit = 0; m < 0x100; m <<= 1, bit++)
+            if (!(mask & m))
+                order[n][o_idx++] = bit;
+        assert(o_idx == 8);
+#undef add
+    }
+}
+
+static void read_pal_indices(Dav1dTileContext *const t,
+                             uint8_t *const pal_idx,
+                             const Av1Block *const b, const int pl,
+                             const int w4, const int h4,
+                             const int bw4, const int bh4)
+{
+    Dav1dTileState *const ts = t->ts;
+    const ptrdiff_t stride = bw4 * 4;
+    pal_idx[0] = msac_decode_uniform(&ts->msac, b->pal_sz[pl]);
+    uint16_t (*const color_map_cdf)[8 + 1] =
+        ts->cdf.m.color_map[pl][b->pal_sz[pl] - 2];
+    for (int i = 1; i < 4 * (w4 + h4) - 1; i++) {
+        // top/left-to-bottom/right diagonals ("wave-front")
+        uint8_t order[64][8], ctx[64];
+        const int first = imin(i, w4 * 4 - 1);
+        const int last = imax(0, i - h4 * 4 + 1);
+        order_palette(pal_idx, stride, i, first, last, order, ctx);
+        for (int j = first, m = 0; j >= last; j--, m++) {
+            const int color_idx =
+                msac_decode_symbol_adapt(&ts->msac, color_map_cdf[ctx[m]],
+                                         b->pal_sz[pl]);
+            pal_idx[(i - j) * stride + j] = order[m][color_idx];
+        }
+    }
+    // fill invisible edges
+    if (bw4 > w4)
+        for (int y = 0; y < 4 * h4; y++)
+            memset(&pal_idx[y * stride + 4 * w4],
+                   pal_idx[y * stride + 4 * w4 - 1], 4 * (bw4 - w4));
+    if (h4 < bh4) {
+        const uint8_t *const src = &pal_idx[stride * (4 * h4 - 1)];
+        for (int y = h4 * 4; y < bh4 * 4; y++)
+            memcpy(&pal_idx[y * stride], src, bw4 * 4);
+    }
+}
+
+static void read_vartx_tree(Dav1dTileContext *const t,
+                            Av1Block *const b, const enum BlockSize bs,
+                            const int bx4, const int by4)
+{
+    const Dav1dFrameContext *const f = t->f;
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = b_dim[0], bh4 = b_dim[1];
+
+    // var-tx tree coding
+    b->tx_split[0] = b->tx_split[1] = 0;
+    b->max_ytx = av1_max_txfm_size_for_bs[bs][0];
+    if (f->frame_hdr.segmentation.lossless[b->seg_id] ||
+        b->max_ytx == TX_4X4)
+    {
+        b->max_ytx = b->uvtx = TX_4X4;
+        if (f->frame_hdr.txfm_mode == TX_SWITCHABLE) {
+            memset(&t->a->tx[bx4], TX_4X4, bw4);
+            memset(&t->l.tx[by4], TX_4X4, bh4);
+        }
+    } else if (f->frame_hdr.txfm_mode != TX_SWITCHABLE || b->skip) {
+        if (f->frame_hdr.txfm_mode == TX_SWITCHABLE) {
+            memset(&t->a->tx[bx4], b_dim[2], bw4);
+            memset(&t->l.tx[by4], b_dim[3], bh4);
+        } else {
+            assert(f->frame_hdr.txfm_mode == TX_LARGEST);
+        }
+        b->uvtx = av1_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+    } else {
+        assert(imin(bw4, bh4) <= 16 || b->max_ytx == TX_64X64);
+        int y, x, y_off, x_off;
+        const TxfmInfo *const ytx = &av1_txfm_dimensions[b->max_ytx];
+        for (y = 0, y_off = 0; y < bh4; y += ytx->h, y_off++) {
+            for (x = 0, x_off = 0; x < bw4; x += ytx->w, x_off++) {
+                read_tx_tree(t, b->max_ytx, 0, b->tx_split, x_off, y_off);
+                // contexts are updated inside read_tx_tree()
+                t->bx += ytx->w;
+            }
+            t->bx -= x;
+            t->by += ytx->h;
+        }
+        t->by -= y;
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-vartxtree[%x/%x]: r=%d\n",
+                   b->tx_split[0], b->tx_split[1], t->ts->msac.rng);
+        b->uvtx = av1_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+    }
+}
+
+static inline unsigned get_prev_frame_segid(const Dav1dFrameContext *const f,
+                                            const int by, const int bx,
+                                            const int w4, int h4,
+                                            const uint8_t *ref_seg_map,
+                                            const ptrdiff_t stride)
+{
+    unsigned seg_id = 8;
+
+    assert(f->frame_hdr.primary_ref_frame != PRIMARY_REF_NONE);
+    dav1d_thread_picture_wait(&f->refp[f->frame_hdr.primary_ref_frame],
+                              (by + h4) * 4, PLANE_TYPE_BLOCK);
+
+    ref_seg_map += by * stride + bx;
+    do {
+        for (int x = 0; x < w4; x++)
+            seg_id = imin(seg_id, ref_seg_map[x]);
+        ref_seg_map += stride;
+    } while (--h4 > 0);
+    assert(seg_id < 8);
+
+    return seg_id;
+}
+
+static void decode_b(Dav1dTileContext *const t,
+                     const enum BlockLevel bl,
+                     const enum BlockSize bs,
+                     const enum BlockPartition bp,
+                     const enum EdgeFlags intra_edge_flags)
+{
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    Av1Block b_mem, *const b = f->frame_thread.pass ?
+        &f->frame_thread.b[t->by * f->b4_stride + t->bx] : &b_mem;
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bx4 = t->bx & 31, by4 = t->by & 31;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
+    const int bw4 = b_dim[0], bh4 = b_dim[1];
+    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
+    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
+    const int have_left = t->bx > ts->tiling.col_start;
+    const int have_top = t->by > ts->tiling.row_start;
+    const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
+                           (bw4 > ss_hor || t->bx & 1) &&
+                           (bh4 > ss_ver || t->by & 1);
+
+    if (f->frame_thread.pass == 2) {
+        if (b->intra) {
+            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
+
+            if (has_chroma) {
+                memset(&t->l.uvmode[cby4], b->uv_mode, cbh4);
+                memset(&t->a->uvmode[cbx4], b->uv_mode, cbw4);
+            }
+            const enum IntraPredMode y_mode_nofilt =
+                b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
+            memset(&t->l.mode[by4], y_mode_nofilt, bh4);
+            memset(&t->a->mode[bx4], y_mode_nofilt, bw4);
+        } else {
+            if (b->comp_type == COMP_INTER_NONE && b->motion_mode == MM_WARP) {
+                uint64_t mask[2] = { 0, 0 };
+                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
+                                  have_left, have_top, b->ref[0], mask);
+                derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
+            }
+            f->bd_fn.recon_b_inter(t, bs, b);
+
+            const uint8_t *const filter = eve_av1_filter_dir[b->filter2d];
+            memset(&t->l.filter[0][by4], filter[0], bh4);
+            memset(&t->a->filter[0][bx4], filter[0], bw4);
+            memset(&t->l.filter[1][by4], filter[1], bh4);
+            memset(&t->a->filter[1][bx4], filter[1], bw4);
+            if (has_chroma) {
+                memset(&t->l.uvmode[cby4], DC_PRED, cbh4);
+                memset(&t->a->uvmode[cbx4], DC_PRED, cbw4);
+            }
+        }
+        memset(&t->l.intra[by4], b->intra, bh4);
+        memset(&t->a->intra[bx4], b->intra, bw4);
+        return;
+    }
+
+    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
+
+    b->bl = bl;
+    b->bp = bp;
+    b->bs = bs;
+
+    // skip_mode
+    if (f->frame_hdr.skip_mode_enabled && imin(bw4, bh4) > 1) {
+        const int smctx = t->a->skip_mode[bx4] + t->l.skip_mode[by4];
+        b->skip_mode = msac_decode_bool_adapt(&ts->msac,
+                                              ts->cdf.m.skip_mode[smctx]);
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-skipmode[%d]: r=%d\n", b->skip_mode, ts->msac.rng);
+    } else {
+        b->skip_mode = 0;
+    }
+
+    // segment_id (if seg_feature for skip/ref/gmv is enabled)
+    int seg_pred = 0;
+    if (f->frame_hdr.segmentation.enabled) {
+        if (!f->frame_hdr.segmentation.update_map) {
+            b->seg_id = f->prev_segmap ?
+                        get_prev_frame_segid(f, t->by, t->bx, w4, h4,
+                                             f->prev_segmap, f->b4_stride) : 0;
+        } else if (f->frame_hdr.segmentation.seg_data.preskip) {
+            if (f->frame_hdr.segmentation.temporal &&
+                (seg_pred = msac_decode_bool_adapt(&ts->msac,
+                                       ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
+                                                          t->l.seg_pred[by4]])))
+            {
+                // temporal predicted seg_id
+                b->seg_id = f->prev_segmap ?
+                            get_prev_frame_segid(f, t->by, t->bx, w4, h4,
+                                                 f->prev_segmap, f->b4_stride) : 0;
+            } else {
+                int seg_ctx;
+                const unsigned pred_seg_id =
+                    get_cur_frame_segid(t->by, t->bx, have_top, have_left,
+                                        &seg_ctx, f->cur_segmap, f->b4_stride);
+                const unsigned diff = msac_decode_symbol_adapt(&ts->msac,
+                                                   ts->cdf.m.seg_id[seg_ctx],
+                                                   NUM_SEGMENTS);
+                const unsigned last_active_seg_id =
+                    f->frame_hdr.segmentation.seg_data.last_active_segid;
+                b->seg_id = av1_neg_deinterleave(diff, pred_seg_id,
+                                                 last_active_seg_id + 1);
+                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
+            }
+
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-segid[preskip;%d]: r=%d\n",
+                       b->seg_id, ts->msac.rng);
+        }
+    } else {
+        b->seg_id = 0;
+    }
+
+    // skip
+    const int sctx = t->a->skip[bx4] + t->l.skip[by4];
+    b->skip = b->skip_mode ? 1 :
+              msac_decode_bool_adapt(&ts->msac, ts->cdf.m.skip[sctx]);
+    if (DEBUG_BLOCK_INFO)
+        printf("Post-skip[%d]: r=%d\n", b->skip, ts->msac.rng);
+
+    // segment_id
+    if (f->frame_hdr.segmentation.enabled &&
+        f->frame_hdr.segmentation.update_map &&
+        !f->frame_hdr.segmentation.seg_data.preskip)
+    {
+        if (!b->skip && f->frame_hdr.segmentation.temporal &&
+            (seg_pred = msac_decode_bool_adapt(&ts->msac,
+                                   ts->cdf.m.seg_pred[t->a->seg_pred[bx4] +
+                                                      t->l.seg_pred[by4]])))
+        {
+            // temporal predicted seg_id
+            b->seg_id = f->prev_segmap ?
+                        get_prev_frame_segid(f, t->by, t->bx, w4, h4,
+                                             f->prev_segmap, f->b4_stride) : 0;
+        } else {
+            int seg_ctx;
+            const unsigned pred_seg_id =
+                get_cur_frame_segid(t->by, t->bx, have_top, have_left,
+                                    &seg_ctx, f->cur_segmap, f->b4_stride);
+            if (b->skip) {
+                b->seg_id = pred_seg_id;
+            } else {
+                const unsigned diff = msac_decode_symbol_adapt(&ts->msac,
+                                                   ts->cdf.m.seg_id[seg_ctx],
+                                                   NUM_SEGMENTS);
+                const unsigned last_active_seg_id =
+                    f->frame_hdr.segmentation.seg_data.last_active_segid;
+                b->seg_id = av1_neg_deinterleave(diff, pred_seg_id,
+                                                 last_active_seg_id + 1);
+                if (b->seg_id > last_active_seg_id) b->seg_id = 0; // error?
+            }
+        }
+
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-segid[postskip;%d]: r=%d\n",
+                   b->seg_id, ts->msac.rng);
+    }
+
+    // cdef index
+    if (!b->skip) {
+        const int idx = f->seq_hdr.sb128 ? ((t->bx & 16) >> 4) +
+                                           ((t->by & 16) >> 3) : 0;
+        if (t->cur_sb_cdef_idx_ptr[idx] == -1) {
+            const int v = msac_decode_bools(&ts->msac, f->frame_hdr.cdef.n_bits);
+            t->cur_sb_cdef_idx_ptr[idx] = v;
+            if (bw4 > 16) t->cur_sb_cdef_idx_ptr[idx + 1] = v;
+            if (bh4 > 16) t->cur_sb_cdef_idx_ptr[idx + 2] = v;
+            if (bw4 == 32 && bh4 == 32) t->cur_sb_cdef_idx_ptr[idx + 3] = v;
+
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-cdef_idx[%d]: r=%d\n",
+                        *t->cur_sb_cdef_idx_ptr, ts->msac.rng);
+        }
+    }
+
+    // delta-q/lf
+    if (!(t->bx & (31 >> !f->seq_hdr.sb128)) &&
+        !(t->by & (31 >> !f->seq_hdr.sb128)))
+    {
+        const int prev_qidx = ts->last_qidx;
+        const int have_delta_q = f->frame_hdr.delta_q_present &&
+            (bs != (f->seq_hdr.sb128 ? BS_128x128 : BS_64x64) || !b->skip);
+        if (have_delta_q) {
+            int delta_q = msac_decode_symbol_adapt(&ts->msac, ts->cdf.m.delta_q, 4);
+            if (delta_q == 3) {
+                const int n_bits = 1 + msac_decode_bools(&ts->msac, 3);
+                delta_q = msac_decode_bools(&ts->msac, n_bits) + 1 + (1 << n_bits);
+            }
+            if (delta_q) {
+                if (msac_decode_bool(&ts->msac, 128 << 7)) delta_q = -delta_q;
+                delta_q *= 1 << f->frame_hdr.delta_q_res_log2;
+            }
+            ts->last_qidx = iclip(ts->last_qidx + delta_q, 1, 255);
+            if (have_delta_q && DEBUG_BLOCK_INFO)
+                printf("Post-delta_q[%d->%d]: r=%d\n",
+                       delta_q, ts->last_qidx, ts->msac.rng);
+        }
+        if (ts->last_qidx == f->frame_hdr.quant.yac) {
+            // assign frame-wide q values to this sb
+            ts->dq = f->dq;
+        } else if (ts->last_qidx != prev_qidx) {
+            // find sb-specific quant parameters
+            init_quant_tables(&f->seq_hdr, &f->frame_hdr, ts->last_qidx, ts->dqmem);
+            ts->dq = ts->dqmem;
+        }
+
+        // delta_lf
+        int8_t prev_delta_lf[4];
+        memcpy(prev_delta_lf, ts->last_delta_lf, 4);
+        if (have_delta_q && f->frame_hdr.delta_lf_present) {
+            const int n_lfs = f->frame_hdr.delta_lf_multi ?
+                f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 ? 4 : 2 : 1;
+
+            for (int i = 0; i < n_lfs; i++) {
+                int delta_lf = msac_decode_symbol_adapt(&ts->msac,
+                                ts->cdf.m.delta_lf[i + f->frame_hdr.delta_lf_multi], 4);
+                if (delta_lf == 3) {
+                    const int n_bits = 1 + msac_decode_bools(&ts->msac, 3);
+                    delta_lf = msac_decode_bools(&ts->msac, n_bits) + 1 + (1 << n_bits);
+                }
+                if (delta_lf) {
+                    if (msac_decode_bool(&ts->msac, 128 << 7)) delta_lf = -delta_lf;
+                    delta_lf *= 1 << f->frame_hdr.delta_lf_res_log2;
+                }
+                ts->last_delta_lf[i] = iclip(ts->last_delta_lf[i] + delta_lf, -63, 63);
+                if (have_delta_q && DEBUG_BLOCK_INFO)
+                    printf("Post-delta_lf[%d:%d]: r=%d\n", i, delta_lf, ts->msac.rng);
+            }
+        }
+        if (!memcmp(ts->last_delta_lf, (int8_t[4]) { 0, 0, 0, 0 }, 4)) {
+            // assign frame-wide lf values to this sb
+            ts->lflvl = f->lf.lvl;
+        } else if (memcmp(ts->last_delta_lf, prev_delta_lf, 4)) {
+            // find sb-specific lf lvl parameters
+            dav1d_calc_lf_values(ts->lflvlmem, &f->frame_hdr, ts->last_delta_lf);
+            ts->lflvl = ts->lflvlmem;
+        }
+    }
+
+    if (b->skip_mode) {
+        b->intra = 0;
+    } else if (f->frame_hdr.frame_type & 1) {
+        const int ictx = get_intra_ctx(t->a, &t->l, by4, bx4,
+                                       have_top, have_left);
+        b->intra = !msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intra[ictx]);
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-intra[%d]: r=%d\n", b->intra, ts->msac.rng);
+    } else if (f->frame_hdr.allow_intrabc) {
+        b->intra = !msac_decode_bool_adapt(&ts->msac, ts->cdf.m.intrabc);
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-intrabcflag[%d]: r=%d\n", b->intra, ts->msac.rng);
+    } else {
+        b->intra = 1;
+    }
+
+    // intra/inter-specific stuff
+    if (b->intra) {
+        uint16_t *const ymode_cdf = f->frame_hdr.frame_type & 1 ?
+            ts->cdf.m.y_mode[av1_ymode_size_context[bs]] :
+            ts->cdf.kfym[intra_mode_context[t->a->mode[bx4]]]
+                        [intra_mode_context[t->l.mode[by4]]];
+        b->y_mode = msac_decode_symbol_adapt(&ts->msac, ymode_cdf,
+                                              N_INTRA_PRED_MODES);
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-ymode[%d]: r=%d\n", b->y_mode, ts->msac.rng);
+
+        // angle delta
+        if (b_dim[2] + b_dim[3] >= 2 && b->y_mode >= VERT_PRED &&
+            b->y_mode <= VERT_LEFT_PRED)
+        {
+            uint16_t *const acdf = ts->cdf.m.angle_delta[b->y_mode - VERT_PRED];
+            const int angle = msac_decode_symbol_adapt(&ts->msac, acdf, 7);
+            b->y_angle = angle - 3;
+        } else {
+            b->y_angle = 0;
+        }
+
+        if (has_chroma) {
+            const int cfl_allowed = !!(cfl_allowed_mask & (1 << bs));
+            uint16_t *const uvmode_cdf = ts->cdf.m.uv_mode[cfl_allowed][b->y_mode];
+            b->uv_mode = msac_decode_symbol_adapt(&ts->msac, uvmode_cdf,
+                                         N_UV_INTRA_PRED_MODES - !cfl_allowed);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-uvmode[%d]: r=%d\n", b->uv_mode, ts->msac.rng);
+
+            if (b->uv_mode == CFL_PRED) {
+#define SIGN(a) (!!(a) + ((a) > 0))
+                const int sign =
+                    msac_decode_symbol_adapt(&ts->msac, ts->cdf.m.cfl_sign, 8) + 1;
+                const int sign_u = sign * 0x56 >> 8, sign_v = sign - sign_u * 3;
+                assert(sign_u == sign / 3);
+                if (sign_u) {
+                    const int ctx = (sign_u == 2) * 3 + sign_v;
+                    b->cfl_alpha[0] = msac_decode_symbol_adapt(&ts->msac,
+                                            ts->cdf.m.cfl_alpha[ctx], 16) + 1;
+                    if (sign_u == 1) b->cfl_alpha[0] = -b->cfl_alpha[0];
+                } else {
+                    b->cfl_alpha[0] = 0;
+                }
+                if (sign_v) {
+                    const int ctx = (sign_v == 2) * 3 + sign_u;
+                    b->cfl_alpha[1] = msac_decode_symbol_adapt(&ts->msac,
+                                            ts->cdf.m.cfl_alpha[ctx], 16) + 1;
+                    if (sign_v == 1) b->cfl_alpha[1] = -b->cfl_alpha[1];
+                } else {
+                    b->cfl_alpha[1] = 0;
+                }
+#undef SIGN
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-uvalphas[%d/%d]: r=%d\n",
+                           b->cfl_alpha[0], b->cfl_alpha[1], ts->msac.rng);
+            } else if (b_dim[2] + b_dim[3] >= 2 && b->uv_mode >= VERT_PRED &&
+                       b->uv_mode <= VERT_LEFT_PRED)
+            {
+                uint16_t *const acdf = ts->cdf.m.angle_delta[b->uv_mode - VERT_PRED];
+                const int angle = msac_decode_symbol_adapt(&ts->msac, acdf, 7);
+                b->uv_angle = angle - 3;
+            } else {
+                b->uv_angle = 0;
+            }
+        }
+
+        b->pal_sz[0] = b->pal_sz[1] = 0;
+        if (f->frame_hdr.allow_screen_content_tools &&
+            imax(bw4, bh4) <= 16 && bw4 + bh4 >= 4)
+        {
+            const int sz_ctx = b_dim[2] + b_dim[3] - 2;
+            if (b->y_mode == DC_PRED) {
+                const int pal_ctx = (t->a->pal_sz[bx4] > 0) + (t->l.pal_sz[by4] > 0);
+                const int use_y_pal =
+                    msac_decode_bool_adapt(&ts->msac, ts->cdf.m.pal_y[sz_ctx][pal_ctx]);
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-y_pal[%d]: r=%d\n", use_y_pal, ts->msac.rng);
+                if (use_y_pal)
+                    read_pal_plane(t, b, 0, sz_ctx, bx4, by4);
+            }
+
+            if (has_chroma && b->uv_mode == DC_PRED) {
+                const int pal_ctx = b->pal_sz[0] > 0;
+                const int use_uv_pal =
+                    msac_decode_bool_adapt(&ts->msac, ts->cdf.m.pal_uv[pal_ctx]);
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-uv_pal[%d]: r=%d\n", use_uv_pal, ts->msac.rng);
+                if (use_uv_pal)
+                    read_pal_uv(t, b, sz_ctx, cbx4, cby4);
+            }
+        }
+
+        if (b->y_mode == DC_PRED && !b->pal_sz[0] &&
+            imax(b_dim[2], b_dim[3]) <= 3 && f->seq_hdr.filter_intra)
+        {
+            const int is_filter = msac_decode_bool_adapt(&ts->msac,
+                                            ts->cdf.m.use_filter_intra[bs]);
+            if (is_filter) {
+                b->y_mode = FILTER_PRED;
+                b->y_angle = msac_decode_symbol_adapt(&ts->msac,
+                                                  ts->cdf.m.filter_intra, 5);
+            }
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-filterintramode[%d/%d]: r=%d\n",
+                       b->y_mode, b->y_angle, ts->msac.rng);
+        }
+
+        if (b->pal_sz[0]) {
+            uint8_t *pal_idx;
+            if (f->frame_thread.pass) {
+                pal_idx = ts->frame_thread.pal_idx;
+                ts->frame_thread.pal_idx += bw4 * bh4 * 16;
+            } else
+                pal_idx = t->scratch.pal_idx;
+            read_pal_indices(t, pal_idx, b, 0, w4, h4, bw4, bh4);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-y-pal-indices: r=%d\n", ts->msac.rng);
+        }
+
+        if (has_chroma && b->pal_sz[1]) {
+            uint8_t *pal_idx;
+            if (f->frame_thread.pass) {
+                pal_idx = ts->frame_thread.pal_idx;
+                ts->frame_thread.pal_idx += cbw4 * cbh4 * 16;
+            } else
+                pal_idx = &t->scratch.pal_idx[bw4 * bh4 * 16];
+            read_pal_indices(t, pal_idx, b, 1, cw4, ch4, cbw4, cbh4);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-uv-pal-indices: r=%d\n", ts->msac.rng);
+        }
+
+        const TxfmInfo *t_dim;
+        if (f->frame_hdr.segmentation.lossless[b->seg_id]) {
+            b->tx = b->uvtx = (int) TX_4X4;
+            t_dim = &av1_txfm_dimensions[TX_4X4];
+        } else {
+            b->tx = av1_max_txfm_size_for_bs[bs][0];
+            b->uvtx = av1_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+            t_dim = &av1_txfm_dimensions[b->tx];
+            if (f->frame_hdr.txfm_mode == TX_SWITCHABLE && t_dim->max > TX_4X4) {
+                const int tctx = get_tx_ctx(t->a, &t->l, t_dim, by4, bx4,
+                                            have_top, have_left);
+                uint16_t *const tx_cdf = ts->cdf.m.txsz[t_dim->max - 1][tctx];
+                int depth = msac_decode_symbol_adapt(&ts->msac, tx_cdf,
+                                                     imin(t_dim->max + 1, 3));
+
+                while (depth--) {
+                    b->tx = t_dim->sub;
+                    t_dim = &av1_txfm_dimensions[b->tx];
+                }
+            }
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-tx[%d]: r=%d\n", b->tx, ts->msac.rng);
+        }
+
+        // reconstruction
+        if (f->frame_thread.pass == 1) {
+            f->bd_fn.read_coef_blocks(t, bs, b);
+        } else {
+            f->bd_fn.recon_b_intra(t, bs, intra_edge_flags, b);
+        }
+
+        dav1d_create_lf_mask_intra(t->lf_mask, f->lf.level, f->b4_stride,
+                                   &f->frame_hdr, (const uint8_t (*)[8][2])
+                                   &ts->lflvl[b->seg_id][0][0][0],
+                                   t->bx, t->by, f->bw, f->bh, bs,
+                                   b->tx, b->uvtx, f->cur.p.p.layout,
+                                   &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
+                                   has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
+                                   has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
+
+        // update contexts
+        memset(&t->a->tx_intra[bx4], t_dim->lw, bw4);
+        memset(&t->l.tx_intra[by4], t_dim->lh, bh4);
+        const enum IntraPredMode y_mode_nofilt =
+            b->y_mode == FILTER_PRED ? DC_PRED : b->y_mode;
+        memset(&t->l.mode[by4], y_mode_nofilt, bh4);
+        memset(&t->a->mode[bx4], y_mode_nofilt, bw4);
+        memset(&t->l.pal_sz[by4], b->pal_sz[0], bh4);
+        memset(&t->a->pal_sz[bx4], b->pal_sz[0], bw4);
+        if (b->pal_sz[0]) {
+            uint16_t *const pal = f->frame_thread.pass ?
+                f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                                    ((t->bx >> 1) + (t->by & 1))][0] : t->pal[0];
+            for (int x = 0; x < bw4; x++)
+                memcpy(t->al_pal[0][bx4 + x][0], pal, 16);
+            for (int y = 0; y < bh4; y++)
+                memcpy(t->al_pal[1][by4 + y][0], pal, 16);
+        }
+        if (has_chroma) {
+            memset(&t->l.uvmode[cby4], b->uv_mode, cbh4);
+            memset(&t->a->uvmode[cbx4], b->uv_mode, cbw4);
+            memset(&t->pal_sz_uv[1][cby4], b->pal_sz[1], cbh4);
+            memset(&t->pal_sz_uv[0][cbx4], b->pal_sz[1], cbw4);
+            if (b->pal_sz[1]) for (int pl = 1; pl < 3; pl++) {
+                uint16_t *const pal = f->frame_thread.pass ?
+                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                                        ((t->bx >> 1) + (t->by & 1))][pl] : t->pal[pl];
+                for (int x = 0; x < cbw4; x++)
+                    memcpy(t->al_pal[0][cbx4 + x][pl], pal, 16);
+                for (int y = 0; y < cbh4; y++)
+                    memcpy(t->al_pal[1][cby4 + y][pl], pal, 16);
+            }
+        }
+        if ((f->frame_hdr.frame_type & 1) || f->frame_hdr.allow_intrabc) {
+            memset(&t->a->tx[bx4], t_dim->lw, bw4);
+            memset(&t->l.tx[by4], t_dim->lh, bh4);
+            splat_intraref(f->mvs, f->b4_stride, t->by, t->bx, bs,
+                           y_mode_nofilt);
+        }
+        if (f->frame_hdr.frame_type & 1) {
+            memset(&t->l.comp_type[by4], COMP_INTER_NONE, bh4);
+            memset(&t->a->comp_type[bx4], COMP_INTER_NONE, bw4);
+            memset(&t->l.ref[0][by4], -1, bh4);
+            memset(&t->a->ref[0][bx4], -1, bw4);
+            memset(&t->l.ref[1][by4], -1, bh4);
+            memset(&t->a->ref[1][bx4], -1, bw4);
+            memset(&t->l.filter[0][by4], N_SWITCHABLE_FILTERS, bh4);
+            memset(&t->a->filter[0][bx4], N_SWITCHABLE_FILTERS, bw4);
+            memset(&t->l.filter[1][by4], N_SWITCHABLE_FILTERS, bh4);
+            memset(&t->a->filter[1][bx4], N_SWITCHABLE_FILTERS, bw4);
+        }
+    } else if (!(f->frame_hdr.frame_type & 1)) {
+        // intra block copy
+        candidate_mv mvstack[8];
+        int n_mvs;
+        mv mvlist[2][2];
+        av1_find_ref_mvs(mvstack, &n_mvs, mvlist, NULL,
+                         (int[2]) { -1, -1 }, f->bw, f->bh,
+                         bs, bp, t->by, t->bx, ts->tiling.col_start,
+                         ts->tiling.col_end, ts->tiling.row_start,
+                         ts->tiling.row_end, f->libaom_cm);
+
+        if (mvlist[0][0].y | mvlist[0][0].x)
+            b->mv[0] = mvlist[0][0];
+        else if (mvlist[0][1].y | mvlist[0][1].x)
+            b->mv[0] = mvlist[0][1];
+        else {
+            if (t->by - (16 << f->seq_hdr.sb128) < ts->tiling.row_start) {
+                b->mv[0].y = 0;
+                b->mv[0].x = -(512 << f->seq_hdr.sb128) - 2048;
+            } else {
+                b->mv[0].y = -(512 << f->seq_hdr.sb128);
+                b->mv[0].x = 0;
+            }
+        }
+
+        const struct mv ref = b->mv[0];
+        read_mv_residual(t, &b->mv[0], &ts->cdf.dmv, 0);
+        if (DEBUG_BLOCK_INFO)
+            printf("Post-dmv[%d/%d,ref=%d/%d|%d/%d]: r=%d\n",
+                   b->mv[0].y, b->mv[0].x, ref.y, ref.x,
+                   mvlist[0][0].y, mvlist[0][0].x, ts->msac.rng);
+        read_vartx_tree(t, b, bs, bx4, by4);
+
+        // reconstruction
+        if (f->frame_thread.pass == 1) {
+            f->bd_fn.read_coef_blocks(t, bs, b);
+        } else {
+            f->bd_fn.recon_b_inter(t, bs, b);
+        }
+
+        splat_intrabc_mv(f->mvs, f->b4_stride, t->by, t->bx, bs, b->mv[0]);
+
+        memset(&t->a->tx_intra[bx4], b_dim[2], bw4);
+        memset(&t->l.tx_intra[by4], b_dim[3], bh4);
+        memset(&t->l.mode[by4], DC_PRED, bh4);
+        memset(&t->a->mode[bx4], DC_PRED, bw4);
+        memset(&t->l.pal_sz[by4], 0, bh4);
+        memset(&t->a->pal_sz[bx4], 0, bw4);
+        if (has_chroma) {
+            memset(&t->l.uvmode[cby4], DC_PRED, cbh4);
+            memset(&t->a->uvmode[cbx4], DC_PRED, cbw4);
+            memset(&t->pal_sz_uv[1][cby4], 0, cbh4);
+            memset(&t->pal_sz_uv[0][cbx4], 0, cbw4);
+        }
+    } else {
+        // inter-specific mode/mv coding
+        int is_comp, has_subpel_filter;
+
+        if (b->skip_mode) {
+            is_comp = 1;
+        } else if (f->frame_hdr.switchable_comp_refs && imin(bw4, bh4) > 1) {
+            const int ctx = get_comp_ctx(t->a, &t->l, by4, bx4,
+                                         have_top, have_left);
+            is_comp = msac_decode_bool_adapt(&ts->msac, ts->cdf.m.comp[ctx]);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-compflag[%d]: r=%d\n", is_comp, ts->msac.rng);
+        } else {
+            is_comp = 0;
+        }
+
+        if (b->skip_mode) {
+            b->ref[0] = f->frame_hdr.skip_mode_refs[0];
+            b->ref[1] = f->frame_hdr.skip_mode_refs[1];
+            b->comp_type = COMP_INTER_AVG;
+            b->inter_mode = NEARESTMV_NEARESTMV;
+            b->drl_idx = 0;
+            has_subpel_filter = 0;
+
+            candidate_mv mvstack[8];
+            int n_mvs, ctx;
+            mv mvlist[2][2];
+            av1_find_ref_mvs(mvstack, &n_mvs, mvlist, &ctx,
+                             (int[2]) { b->ref[0], b->ref[1] }, f->bw, f->bh,
+                             bs, bp, t->by, t->bx, ts->tiling.col_start,
+                             ts->tiling.col_end, ts->tiling.row_start,
+                             ts->tiling.row_end, f->libaom_cm);
+
+            b->mv[0] = mvstack[0].this_mv;
+            b->mv[1] = mvstack[0].comp_mv;
+            if (!f->frame_hdr.hp) {
+                unset_hp_bit(&b->mv[0]);
+                unset_hp_bit(&b->mv[1]);
+            }
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-skipmodeblock[mv=1:y=%d,x=%d,2:y=%d,x=%d,refs=%d+%d\n",
+                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
+                       b->ref[0], b->ref[1]);
+        } else if (is_comp) {
+            const int dir_ctx = get_comp_dir_ctx(t->a, &t->l, by4, bx4,
+                                                 have_top, have_left);
+            if (msac_decode_bool_adapt(&ts->msac, ts->cdf.m.comp_dir[dir_ctx])) {
+                // bidir - first reference (fw)
+                const int ctx1 = av1_get_fwd_ref_ctx(t->a, &t->l, by4, bx4,
+                                                     have_top, have_left);
+                if (msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_fwd_ref[0][ctx1]))
+                {
+                    const int ctx2 = av1_get_fwd_ref_2_ctx(t->a, &t->l, by4, bx4,
+                                                           have_top, have_left);
+                    b->ref[0] = 2 + msac_decode_bool_adapt(&ts->msac,
+                                            ts->cdf.m.comp_fwd_ref[2][ctx2]);
+                } else {
+                    const int ctx2 = av1_get_fwd_ref_1_ctx(t->a, &t->l, by4, bx4,
+                                                           have_top, have_left);
+                    b->ref[0] = msac_decode_bool_adapt(&ts->msac,
+                                            ts->cdf.m.comp_fwd_ref[1][ctx2]);
+                }
+
+                // second reference (bw)
+                const int ctx3 = av1_get_bwd_ref_ctx(t->a, &t->l, by4, bx4,
+                                                     have_top, have_left);
+                if (msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_bwd_ref[0][ctx3]))
+                {
+                    b->ref[1] = 6;
+                } else {
+                    const int ctx4 = av1_get_bwd_ref_1_ctx(t->a, &t->l, by4, bx4,
+                                                           have_top, have_left);
+                    b->ref[1] = 4 + msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_bwd_ref[1][ctx4]);
+                }
+            } else {
+                // unidir
+                const int uctx_p = av1_get_uni_p_ctx(t->a, &t->l, by4, bx4,
+                                                     have_top, have_left);
+                if (msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_uni_ref[0][uctx_p]))
+                {
+                    b->ref[0] = 4;
+                    b->ref[1] = 6;
+                } else {
+                    const int uctx_p1 = av1_get_uni_p1_ctx(t->a, &t->l, by4, bx4,
+                                                           have_top, have_left);
+                    b->ref[0] = 0;
+                    b->ref[1] = 1 + msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_uni_ref[1][uctx_p1]);
+                    if (b->ref[1] == 2) {
+                        const int uctx_p2 = av1_get_uni_p2_ctx(t->a, &t->l, by4, bx4,
+                                                               have_top, have_left);
+                        b->ref[1] += msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.comp_uni_ref[2][uctx_p2]);
+                    }
+                }
+            }
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-refs[%d/%d]: r=%d\n",
+                       b->ref[0], b->ref[1], ts->msac.rng);
+
+            candidate_mv mvstack[8];
+            int n_mvs, ctx;
+            mv mvlist[2][2];
+            av1_find_ref_mvs(mvstack, &n_mvs, mvlist, &ctx,
+                             (int[2]) { b->ref[0], b->ref[1] }, f->bw, f->bh,
+                             bs, bp, t->by, t->bx, ts->tiling.col_start,
+                             ts->tiling.col_end, ts->tiling.row_start,
+                             ts->tiling.row_end, f->libaom_cm);
+
+            b->inter_mode = msac_decode_symbol_adapt(&ts->msac,
+                                             ts->cdf.m.comp_inter_mode[ctx],
+                                             N_COMP_INTER_PRED_MODES);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-compintermode[%d,ctx=%d,n_mvs=%d]: r=%d\n",
+                       b->inter_mode, ctx, n_mvs, ts->msac.rng);
+
+            const uint8_t *const im = av1_comp_inter_pred_modes[b->inter_mode];
+            b->drl_idx = 0;
+            if (b->inter_mode == NEWMV_NEWMV) {
+                if (n_mvs > 1) {
+                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
+                    b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                             ts->cdf.m.drl_bit[drl_ctx_v1]);
+                    if (b->drl_idx == 1 && n_mvs > 2) {
+                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
+                        b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                             ts->cdf.m.drl_bit[drl_ctx_v2]);
+                    }
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
+                               b->drl_idx, n_mvs, ts->msac.rng);
+                }
+            } else if (im[0] == NEARMV || im[1] == NEARMV) {
+                b->drl_idx = 1;
+                if (n_mvs > 2) {
+                    const int drl_ctx_v2 = get_drl_context(mvstack, 1);
+                    b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                             ts->cdf.m.drl_bit[drl_ctx_v2]);
+                    if (b->drl_idx == 2 && n_mvs > 3) {
+                        const int drl_ctx_v3 = get_drl_context(mvstack, 2);
+                        b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                             ts->cdf.m.drl_bit[drl_ctx_v3]);
+                    }
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-drlidx[%d,n_mvs=%d]: r=%d\n",
+                               b->drl_idx, n_mvs, ts->msac.rng);
+                }
+            }
+
+#define assign_comp_mv(idx, pfx) \
+            switch (im[idx]) { \
+            case NEARMV: \
+            case NEARESTMV: \
+                b->mv[idx] = mvstack[b->drl_idx].pfx##_mv; \
+                if (!f->frame_hdr.hp) unset_hp_bit(&b->mv[idx]); \
+                break; \
+            case GLOBALMV: \
+                has_subpel_filter |= \
+                    f->frame_hdr.gmv[b->ref[idx]].type == WM_TYPE_TRANSLATION; \
+                b->mv[idx] = get_gmv_2d(&f->frame_hdr.gmv[b->ref[idx]], \
+                                        t->bx, t->by, bw4, bh4, &f->frame_hdr); \
+                break; \
+            case NEWMV: \
+                b->mv[idx] = mvstack[b->drl_idx].pfx##_mv; \
+                read_mv_residual(t, &b->mv[idx], &ts->cdf.mv, \
+                                 !f->frame_hdr.force_integer_mv); \
+                break; \
+            }
+            has_subpel_filter = imin(bw4, bh4) == 1 ||
+                                b->inter_mode != GLOBALMV_GLOBALMV;
+            assign_comp_mv(0, this);
+            assign_comp_mv(1, comp);
+#undef assign_comp_mv
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-residual_mv[1:y=%d,x=%d,2:y=%d,x=%d]: r=%d\n",
+                       b->mv[0].y, b->mv[0].x, b->mv[1].y, b->mv[1].x,
+                       ts->msac.rng);
+
+            // jnt_comp vs. seg vs. wedge
+            int is_segwedge = 0;
+            if (f->seq_hdr.masked_compound) {
+                const int mask_ctx = get_mask_comp_ctx(t->a, &t->l, by4, bx4);
+
+                is_segwedge = msac_decode_bool_adapt(&ts->msac,
+                                                 ts->cdf.m.mask_comp[mask_ctx]);
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-segwedge_vs_jntavg[%d,ctx=%d]: r=%d\n",
+                           is_segwedge, mask_ctx, ts->msac.rng);
+            }
+
+            if (!is_segwedge) {
+                if (f->seq_hdr.jnt_comp) {
+                    const int jnt_ctx =
+                        get_jnt_comp_ctx(f->seq_hdr.order_hint_n_bits,
+                                         f->cur.p.poc, f->refp[b->ref[0]].p.poc,
+                                         f->refp[b->ref[1]].p.poc, t->a, &t->l,
+                                         by4, bx4);
+                    b->comp_type = COMP_INTER_WEIGHTED_AVG +
+                        msac_decode_bool_adapt(&ts->msac,
+                                               ts->cdf.m.jnt_comp[jnt_ctx]);
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-jnt_comp[%d,ctx=%d[ac:%d,ar:%d,lc:%d,lr:%d]]: r=%d\n",
+                               b->comp_type == COMP_INTER_AVG,
+                               jnt_ctx, t->a->comp_type[bx4], t->a->ref[0][bx4],
+                               t->l.comp_type[by4], t->l.ref[0][by4],
+                               ts->msac.rng);
+                } else {
+                    b->comp_type = COMP_INTER_AVG;
+                }
+            } else {
+                if (wedge_allowed_mask & (1 << bs)) {
+                    const int ctx = av1_wedge_ctx_lut[bs];
+                    b->comp_type = COMP_INTER_WEDGE -
+                        msac_decode_bool_adapt(&ts->msac,
+                                               ts->cdf.m.wedge_comp[ctx]);
+                    if (b->comp_type == COMP_INTER_WEDGE)
+                        b->wedge_idx = msac_decode_symbol_adapt(&ts->msac,
+                                                ts->cdf.m.wedge_idx[ctx], 16);
+                } else {
+                    b->comp_type = COMP_INTER_SEG;
+                }
+                b->mask_sign = msac_decode_bool(&ts->msac, 128 << 7);
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-seg/wedge[%d,wedge_idx=%d,sign=%d]: r=%d\n",
+                           b->comp_type == COMP_INTER_WEDGE,
+                           b->wedge_idx, b->mask_sign, ts->msac.rng);
+            }
+        } else {
+            b->comp_type = COMP_INTER_NONE;
+
+            // ref
+            const int ctx1 = av1_get_ref_ctx(t->a, &t->l, by4, bx4,
+                                             have_top, have_left);
+            if (msac_decode_bool_adapt(&ts->msac, ts->cdf.m.ref[0][ctx1])) {
+                const int ctx2 = av1_get_ref_2_ctx(t->a, &t->l, by4, bx4,
+                                                   have_top, have_left);
+                if (msac_decode_bool_adapt(&ts->msac, ts->cdf.m.ref[1][ctx2])) {
+                    b->ref[0] = 6;
+                } else {
+                    const int ctx3 = av1_get_ref_6_ctx(t->a, &t->l, by4, bx4,
+                                                       have_top, have_left);
+                    b->ref[0] = 4 + msac_decode_bool_adapt(&ts->msac,
+                                                       ts->cdf.m.ref[5][ctx3]);
+                }
+            } else {
+                const int ctx2 = av1_get_ref_3_ctx(t->a, &t->l, by4, bx4,
+                                                   have_top, have_left);
+                if (msac_decode_bool_adapt(&ts->msac, ts->cdf.m.ref[2][ctx2])) {
+                    const int ctx3 = av1_get_ref_5_ctx(t->a, &t->l, by4, bx4,
+                                                       have_top, have_left);
+                    b->ref[0] = 2 + msac_decode_bool_adapt(&ts->msac,
+                                                       ts->cdf.m.ref[4][ctx3]);
+                } else {
+                    const int ctx3 = av1_get_ref_4_ctx(t->a, &t->l, by4, bx4,
+                                                       have_top, have_left);
+                    b->ref[0] = msac_decode_bool_adapt(&ts->msac,
+                                                       ts->cdf.m.ref[3][ctx3]);
+                }
+            }
+            b->ref[1] = -1;
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-ref[%d]: r=%d\n", b->ref[0], ts->msac.rng);
+
+            candidate_mv mvstack[8];
+            int n_mvs, ctx;
+            mv mvlist[2][2];
+            av1_find_ref_mvs(mvstack, &n_mvs, mvlist, &ctx,
+                             (int[2]) { b->ref[0], -1 }, f->bw, f->bh, bs, bp,
+                             t->by, t->bx, ts->tiling.col_start,
+                             ts->tiling.col_end, ts->tiling.row_start,
+                             ts->tiling.row_end, f->libaom_cm);
+
+            // mode parsing and mv derivation from ref_mvs
+            if (msac_decode_bool_adapt(&ts->msac, ts->cdf.m.newmv_mode[ctx & 7])) {
+                if (!msac_decode_bool_adapt(&ts->msac,
+                                        ts->cdf.m.globalmv_mode[(ctx >> 3) & 1]))
+                {
+                    b->inter_mode = GLOBALMV;
+                    b->mv[0] = get_gmv_2d(&f->frame_hdr.gmv[b->ref[0]],
+                                          t->bx, t->by, bw4, bh4, &f->frame_hdr);
+                    has_subpel_filter = imin(bw4, bh4) == 1 ||
+                        f->frame_hdr.gmv[b->ref[0]].type == WM_TYPE_TRANSLATION;
+                } else {
+                    has_subpel_filter = 1;
+                    if (msac_decode_bool_adapt(&ts->msac,
+                                       ts->cdf.m.refmv_mode[(ctx >> 4) & 15]))
+                    {
+                        b->inter_mode = NEARMV;
+                        b->drl_idx = 1;
+                        if (n_mvs > 2) {
+                            const int drl_ctx_v2 = get_drl_context(mvstack, 1);
+                            b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                                 ts->cdf.m.drl_bit[drl_ctx_v2]);
+                            if (b->drl_idx == 2 && n_mvs > 3) {
+                                const int drl_ctx_v3 =
+                                    get_drl_context(mvstack, 2);
+                                b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                                 ts->cdf.m.drl_bit[drl_ctx_v3]);
+                            }
+                        }
+                    } else {
+                        b->inter_mode = NEARESTMV;
+                        b->drl_idx = 0;
+                    }
+                    if (b->drl_idx >= 2) {
+                        b->mv[0] = mvstack[b->drl_idx].this_mv;
+                    } else {
+                        b->mv[0] = mvlist[0][b->drl_idx];
+                        if (!f->frame_hdr.hp) unset_hp_bit(&b->mv[0]);
+                    }
+                }
+
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-intermode[%d,drl=%d,mv=y:%d,x:%d,n_mvs=%d]: r=%d\n",
+                           b->inter_mode, b->drl_idx, b->mv[0].y, b->mv[0].x, n_mvs,
+                           ts->msac.rng);
+            } else {
+                has_subpel_filter = 1;
+                b->inter_mode = NEWMV;
+                b->drl_idx = 0;
+                if (n_mvs > 1) {
+                    const int drl_ctx_v1 = get_drl_context(mvstack, 0);
+                    b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                                 ts->cdf.m.drl_bit[drl_ctx_v1]);
+                    if (b->drl_idx == 1 && n_mvs > 2) {
+                        const int drl_ctx_v2 = get_drl_context(mvstack, 1);
+                        b->drl_idx += msac_decode_bool_adapt(&ts->msac,
+                                                 ts->cdf.m.drl_bit[drl_ctx_v2]);
+                    }
+                }
+                if (n_mvs > 1) {
+                    b->mv[0] = mvstack[b->drl_idx].this_mv;
+                } else {
+                    b->mv[0] = mvlist[0][0];
+                    if (!f->frame_hdr.hp) unset_hp_bit(&b->mv[0]);
+                }
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-intermode[%d,drl=%d]: r=%d\n",
+                           b->inter_mode, b->drl_idx, ts->msac.rng);
+                read_mv_residual(t, &b->mv[0], &ts->cdf.mv,
+                                 !f->frame_hdr.force_integer_mv);
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-residualmv[mv=y:%d,x:%d]: r=%d\n",
+                           b->mv[0].y, b->mv[0].x, ts->msac.rng);
+            }
+
+            // interintra flags
+            const int ii_sz_grp = av1_ymode_size_context[bs];
+            if (f->seq_hdr.inter_intra &&
+                interintra_allowed_mask & (1 << bs) &&
+                msac_decode_bool_adapt(&ts->msac, ts->cdf.m.interintra[ii_sz_grp]))
+            {
+                b->interintra_mode = msac_decode_symbol_adapt(&ts->msac,
+                                          ts->cdf.m.interintra_mode[ii_sz_grp],
+                                          N_INTER_INTRA_PRED_MODES);
+                const int wedge_ctx = av1_wedge_ctx_lut[bs];
+                b->interintra_type = INTER_INTRA_BLEND +
+                    msac_decode_bool_adapt(&ts->msac,
+                                           ts->cdf.m.interintra_wedge[wedge_ctx]);
+                if (b->interintra_type == INTER_INTRA_WEDGE)
+                    b->wedge_idx = msac_decode_symbol_adapt(&ts->msac,
+                                            ts->cdf.m.wedge_idx[wedge_ctx], 16);
+            } else {
+                b->interintra_type = INTER_INTRA_NONE;
+            }
+            if (DEBUG_BLOCK_INFO && f->seq_hdr.inter_intra &&
+                interintra_allowed_mask & (1 << bs))
+            {
+                printf("Post-interintra[t=%d,m=%d,w=%d]: r=%d\n",
+                       b->interintra_type, b->interintra_mode,
+                       b->wedge_idx, ts->msac.rng);
+            }
+
+            // motion variation
+            if (f->frame_hdr.switchable_motion_mode &&
+                b->interintra_type == INTER_INTRA_NONE && imin(bw4, bh4) >= 2 &&
+                // is not warped global motion
+                !(!f->frame_hdr.force_integer_mv && b->inter_mode == GLOBALMV &&
+                  f->frame_hdr.gmv[b->ref[0]].type > WM_TYPE_TRANSLATION) &&
+                // has overlappable neighbours
+                ((have_left && findoddzero(&t->l.intra[by4 + 1], h4 >> 1)) ||
+                 (have_top && findoddzero(&t->a->intra[bx4 + 1], w4 >> 1))))
+            {
+                // reaching here means the block allows obmc - check warp by
+                // finding matching-ref blocks in top/left edges
+                uint64_t mask[2] = { 0, 0 };
+                find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
+                                  have_left, have_top, b->ref[0], mask);
+                const int allow_warp = !f->frame_hdr.force_integer_mv &&
+                    f->frame_hdr.warp_motion && (mask[0] | mask[1]);
+
+                b->motion_mode = allow_warp ?
+                    msac_decode_symbol_adapt(&ts->msac, ts->cdf.m.motion_mode[bs], 3) :
+                    msac_decode_bool_adapt(&ts->msac, ts->cdf.m.obmc[bs]);
+                if (b->motion_mode == MM_WARP) {
+                    has_subpel_filter = 0;
+                    derive_warpmv(t, bw4, bh4, mask, b->mv[0], &t->warpmv);
+#define signabs(v) v < 0 ? '-' : ' ', abs(v)
+                    if (DEBUG_BLOCK_INFO)
+                        printf("[ %c%x %c%x %c%x\n  %c%x %c%x %c%x ]\n"
+                               "alpha=%c%x, beta=%c%x, gamma=%c%x, delta=%c%x\n",
+                               signabs(t->warpmv.matrix[0]),
+                               signabs(t->warpmv.matrix[1]),
+                               signabs(t->warpmv.matrix[2]),
+                               signabs(t->warpmv.matrix[3]),
+                               signabs(t->warpmv.matrix[4]),
+                               signabs(t->warpmv.matrix[5]),
+                               signabs(t->warpmv.alpha),
+                               signabs(t->warpmv.beta),
+                               signabs(t->warpmv.gamma),
+                               signabs(t->warpmv.delta));
+#undef signabs
+                }
+
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-motionmode[%d]: r=%d [mask: 0x%" PRIu64 "x/0x%"
+                           PRIu64 "x]\n", b->motion_mode, ts->msac.rng, mask[0],
+                            mask[1]);
+            } else {
+                b->motion_mode = MM_TRANSLATION;
+            }
+        }
+
+        // subpel filter
+        enum FilterMode filter[2];
+        if (f->frame_hdr.subpel_filter_mode == FILTER_SWITCHABLE) {
+            if (has_subpel_filter) {
+                const int comp = b->comp_type != COMP_INTER_NONE;
+                const int ctx1 = get_filter_ctx(t->a, &t->l, comp, 0, b->ref[0],
+                                                by4, bx4);
+                filter[0] = msac_decode_symbol_adapt(&ts->msac,
+                    ts->cdf.m.filter[0][ctx1], N_SWITCHABLE_FILTERS);
+                if (f->seq_hdr.dual_filter) {
+                    const int ctx2 = get_filter_ctx(t->a, &t->l, comp, 1,
+                                                    b->ref[0], by4, bx4);
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-subpel_filter1[%d,ctx=%d]: r=%d\n",
+                               filter[0], ctx1, ts->msac.rng);
+                    filter[1] = msac_decode_symbol_adapt(&ts->msac,
+                        ts->cdf.m.filter[1][ctx2], N_SWITCHABLE_FILTERS);
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-subpel_filter2[%d,ctx=%d]: r=%d\n",
+                               filter[1], ctx2, ts->msac.rng);
+                } else {
+                    filter[1] = filter[0];
+                    if (DEBUG_BLOCK_INFO)
+                        printf("Post-subpel_filter[%d,ctx=%d]: r=%d\n",
+                               filter[0], ctx1, ts->msac.rng);
+                }
+            } else {
+                filter[0] = filter[1] = FILTER_8TAP_REGULAR;
+            }
+        } else {
+            filter[0] = filter[1] = f->frame_hdr.subpel_filter_mode;
+        }
+        b->filter2d = av1_filter_2d[filter[1]][filter[0]];
+
+        read_vartx_tree(t, b, bs, bx4, by4);
+
+        // reconstruction
+        if (f->frame_thread.pass == 1) {
+            f->bd_fn.read_coef_blocks(t, bs, b);
+        } else {
+            f->bd_fn.recon_b_inter(t, bs, b);
+        }
+
+        const int is_globalmv =
+            b->inter_mode == (is_comp ? GLOBALMV_GLOBALMV : GLOBALMV);
+        const uint8_t (*const lf_lvls)[8][2] = (const uint8_t (*)[8][2])
+            &ts->lflvl[b->seg_id][0][b->ref[0] + 1][!is_globalmv];
+        dav1d_create_lf_mask_inter(t->lf_mask, f->lf.level, f->b4_stride,
+                                   &f->frame_hdr, lf_lvls, t->bx, t->by,
+                                   f->bw, f->bh, b->skip, bs, b->tx_split,
+                                   b->uvtx, f->cur.p.p.layout,
+                                   &t->a->tx_lpf_y[bx4], &t->l.tx_lpf_y[by4],
+                                   has_chroma ? &t->a->tx_lpf_uv[cbx4] : NULL,
+                                   has_chroma ? &t->l.tx_lpf_uv[cby4] : NULL);
+
+        // context updates
+        if (is_comp) {
+            splat_tworef_mv(f->mvs, f->b4_stride, t->by, t->bx, bs,
+                            b->inter_mode, b->ref[0], b->ref[1],
+                            b->mv[0], b->mv[1]);
+        } else {
+            splat_oneref_mv(f->mvs, f->b4_stride, t->by, t->bx, bs,
+                            b->inter_mode, b->ref[0], b->mv[0],
+                            b->interintra_type);
+        }
+        memset(&t->l.pal_sz[by4], 0, bh4);
+        memset(&t->a->pal_sz[bx4], 0, bw4);
+        if (has_chroma) {
+            memset(&t->l.uvmode[cby4], DC_PRED, cbh4);
+            memset(&t->a->uvmode[cbx4], DC_PRED, cbw4);
+            memset(&t->pal_sz_uv[1][cby4], 0, cbh4);
+            memset(&t->pal_sz_uv[0][cbx4], 0, cbw4);
+        }
+        memset(&t->a->tx_intra[bx4], b_dim[2], bw4);
+        memset(&t->l.tx_intra[by4], b_dim[3], bh4);
+        memset(&t->l.comp_type[by4], b->comp_type, bh4);
+        memset(&t->a->comp_type[bx4], b->comp_type, bw4);
+        memset(&t->l.filter[0][by4], filter[0], bh4);
+        memset(&t->a->filter[0][bx4], filter[0], bw4);
+        memset(&t->l.filter[1][by4], filter[1], bh4);
+        memset(&t->a->filter[1][bx4], filter[1], bw4);
+        memset(&t->l.mode[by4], b->inter_mode, bh4);
+        memset(&t->a->mode[bx4], b->inter_mode, bw4);
+        memset(&t->l.ref[0][by4], b->ref[0], bh4);
+        memset(&t->a->ref[0][bx4], b->ref[0], bw4);
+        memset(&t->l.ref[1][by4], b->ref[1], bh4);
+        memset(&t->a->ref[1][bx4], b->ref[1], bw4);
+    }
+
+    // update contexts
+    if (f->frame_hdr.segmentation.enabled &&
+        f->frame_hdr.segmentation.update_map)
+    {
+        uint8_t *seg_ptr = &f->cur_segmap[t->by * f->b4_stride + t->bx];
+        for (int y = 0; y < bh4; y++) {
+            memset(seg_ptr, b->seg_id, bw4);
+            seg_ptr += f->b4_stride;
+        }
+    }
+    memset(&t->l.seg_pred[by4], seg_pred, bh4);
+    memset(&t->a->seg_pred[bx4], seg_pred, bw4);
+    memset(&t->l.skip_mode[by4], b->skip_mode, bh4);
+    memset(&t->a->skip_mode[bx4], b->skip_mode, bw4);
+    memset(&t->l.intra[by4], b->intra, bh4);
+    memset(&t->a->intra[bx4], b->intra, bw4);
+    memset(&t->l.skip[by4], b->skip, bh4);
+    memset(&t->a->skip[bx4], b->skip, bw4);
+    if (!b->skip) {
+        uint32_t *noskip_mask = &t->lf_mask->noskip_mask[by4];
+        const unsigned mask = ((1ULL << bw4) - 1) << bx4;
+        for (int y = 0; y < bh4; y++)
+            *noskip_mask++ |= mask;
+    }
+}
+
+static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl,
+                     const EdgeNode *const node)
+{
+    const Dav1dFrameContext *const f = t->f;
+    const int hsz = 16 >> bl;
+    const int have_h_split = f->bw > t->bx + hsz;
+    const int have_v_split = f->bh > t->by + hsz;
+
+    if (!have_h_split && !have_v_split) {
+        assert(bl < BL_8X8);
+        return decode_sb(t, bl + 1, ((const EdgeBranch *) node)->split[0]);
+    }
+
+    uint16_t *pc;
+    enum BlockPartition bp;
+    int ctx, bx8, by8;
+    if (f->frame_thread.pass != 2) {
+        if (0 && bl == BL_64X64)
+            printf("poc=%d,y=%d,x=%d,bl=%d,r=%d\n",
+                   f->frame_hdr.frame_offset, t->by, t->bx, bl, t->ts->msac.rng);
+        bx8 = (t->bx & 31) >> 1;
+        by8 = (t->by & 31) >> 1;
+        ctx = get_partition_ctx(t->a, &t->l, bl, by8, bx8);
+        pc = t->ts->cdf.m.partition[bl][ctx];
+    }
+
+    if (have_h_split && have_v_split) {
+        if (f->frame_thread.pass == 2) {
+            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
+            bp = b->bl == bl ? b->bp : PARTITION_SPLIT;
+        } else {
+            const unsigned n_part = bl == BL_8X8 ? N_SUB8X8_PARTITIONS :
+                bl == BL_128X128 ? N_PARTITIONS - 2 : N_PARTITIONS;
+            bp = msac_decode_symbol_adapt(&t->ts->msac, pc, n_part);
+            if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 &&
+                (bp == PARTITION_V || bp == PARTITION_V4 ||
+                 bp == PARTITION_T_LEFT_SPLIT || bp == PARTITION_T_RIGHT_SPLIT))
+            {
+                return 1;
+            }
+            if (DEBUG_BLOCK_INFO)
+                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
+                       f->frame_hdr.frame_offset, t->by, t->bx, bl, ctx, bp,
+                       t->ts->msac.rng);
+        }
+        const uint8_t *const b = av1_block_sizes[bl][bp];
+
+        switch (bp) {
+        case PARTITION_NONE:
+            decode_b(t, bl, b[0], PARTITION_NONE, node->o);
+            break;
+        case PARTITION_H:
+            decode_b(t, bl, b[0], PARTITION_H, node->h[0]);
+            t->by += hsz;
+            decode_b(t, bl, b[0], PARTITION_H, node->h[1]);
+            t->by -= hsz;
+            break;
+        case PARTITION_V:
+            decode_b(t, bl, b[0], PARTITION_V, node->v[0]);
+            t->bx += hsz;
+            decode_b(t, bl, b[0], PARTITION_V, node->v[1]);
+            t->bx -= hsz;
+            break;
+        case PARTITION_SPLIT:
+            if (bl == BL_8X8) {
+                const EdgeTip *const tip = (const EdgeTip *) node;
+                assert(hsz == 1);
+                decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]);
+                const enum Filter2d tl_filter = t->tl_4x4_filter;
+                t->bx++;
+                decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]);
+                t->bx--;
+                t->by++;
+                decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]);
+                t->bx++;
+                t->tl_4x4_filter = tl_filter;
+                decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[3]);
+                t->bx--;
+                t->by--;
+            } else {
+                const EdgeBranch *const branch = (const EdgeBranch *) node;
+                if (decode_sb(t, bl + 1, branch->split[0])) return 1;
+                t->bx += hsz;
+                if (decode_sb(t, bl + 1, branch->split[1])) return 1;
+                t->bx -= hsz;
+                t->by += hsz;
+                if (decode_sb(t, bl + 1, branch->split[2])) return 1;
+                t->bx += hsz;
+                if (decode_sb(t, bl + 1, branch->split[3])) return 1;
+                t->bx -= hsz;
+                t->by -= hsz;
+            }
+            break;
+        case PARTITION_T_TOP_SPLIT: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[0]);
+            t->bx += hsz;
+            decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[1]);
+            t->bx -= hsz;
+            t->by += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, branch->tts[2]);
+            t->by -= hsz;
+            break;
+        }
+        case PARTITION_T_BOTTOM_SPLIT: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, branch->tbs[0]);
+            t->by += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[1]);
+            t->bx += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[2]);
+            t->bx -= hsz;
+            t->by -= hsz;
+            break;
+        }
+        case PARTITION_T_LEFT_SPLIT: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[0]);
+            t->by += hsz;
+            decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[1]);
+            t->by -= hsz;
+            t->bx += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, branch->tls[2]);
+            t->bx -= hsz;
+            break;
+        }
+        case PARTITION_T_RIGHT_SPLIT: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, branch->trs[0]);
+            t->bx += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[1]);
+            t->by += hsz;
+            decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[2]);
+            t->by -= hsz;
+            t->bx -= hsz;
+            break;
+        }
+        case PARTITION_H4: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_H4, branch->h4[0]);
+            t->by += hsz >> 1;
+            decode_b(t, bl, b[0], PARTITION_H4, branch->h4[1]);
+            t->by += hsz >> 1;
+            decode_b(t, bl, b[0], PARTITION_H4, branch->h4[2]);
+            t->by += hsz >> 1;
+            if (t->by < f->bh)
+                decode_b(t, bl, b[0], PARTITION_H4, branch->h4[3]);
+            t->by -= hsz * 3 >> 1;
+            break;
+        }
+        case PARTITION_V4: {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            decode_b(t, bl, b[0], PARTITION_V4, branch->v4[0]);
+            t->bx += hsz >> 1;
+            decode_b(t, bl, b[0], PARTITION_V4, branch->v4[1]);
+            t->bx += hsz >> 1;
+            decode_b(t, bl, b[0], PARTITION_V4, branch->v4[2]);
+            t->bx += hsz >> 1;
+            if (t->bx < f->bw)
+                decode_b(t, bl, b[0], PARTITION_V4, branch->v4[3]);
+            t->bx -= hsz * 3 >> 1;
+            break;
+        }
+        default: assert(0);
+        }
+    } else if (have_h_split) {
+        unsigned is_split;
+        if (f->frame_thread.pass == 2) {
+            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
+            is_split = b->bl != bl;
+        } else {
+            const unsigned p = gather_top_partition_prob(pc, bl);
+            is_split = msac_decode_bool(&t->ts->msac, p);
+            if (DEBUG_BLOCK_INFO)
+                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
+                       f->frame_hdr.frame_offset, t->by, t->bx, bl, ctx,
+                       is_split ? PARTITION_SPLIT : PARTITION_H, t->ts->msac.rng);
+        }
+
+        assert(bl < BL_8X8);
+        if (is_split) {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            bp = PARTITION_SPLIT;
+            if (decode_sb(t, bl + 1, branch->split[0])) return 1;
+            t->bx += hsz;
+            if (decode_sb(t, bl + 1, branch->split[1])) return 1;
+            t->bx -= hsz;
+        } else {
+            bp = PARTITION_H;
+            decode_b(t, bl, av1_block_sizes[bl][PARTITION_H][0], PARTITION_H,
+                     node->h[0]);
+        }
+    } else {
+        assert(have_v_split);
+        unsigned is_split;
+        if (f->frame_thread.pass == 2) {
+            const Av1Block *const b = &f->frame_thread.b[t->by * f->b4_stride + t->bx];
+            is_split = b->bl != bl;
+        } else {
+            const unsigned p = gather_left_partition_prob(pc, bl);
+            is_split = msac_decode_bool(&t->ts->msac, p);
+            if (f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I422 && !is_split)
+                return 1;
+            if (DEBUG_BLOCK_INFO)
+                printf("poc=%d,y=%d,x=%d,bl=%d,ctx=%d,bp=%d: r=%d\n",
+                       f->frame_hdr.frame_offset, t->by, t->bx, bl, ctx,
+                       is_split ? PARTITION_SPLIT : PARTITION_V, t->ts->msac.rng);
+        }
+
+        assert(bl < BL_8X8);
+        if (is_split) {
+            const EdgeBranch *const branch = (const EdgeBranch *) node;
+            bp = PARTITION_SPLIT;
+            if (decode_sb(t, bl + 1, branch->split[0])) return 1;
+            t->by += hsz;
+            if (decode_sb(t, bl + 1, branch->split[2])) return 1;
+            t->by -= hsz;
+        } else {
+            bp = PARTITION_V;
+            decode_b(t, bl, av1_block_sizes[bl][PARTITION_V][0], PARTITION_V,
+                     node->v[0]);
+        }
+    }
+
+    if (f->frame_thread.pass != 2 && (bp != PARTITION_SPLIT || bl == BL_8X8)) {
+        memset(&t->a->partition[bx8], av1_al_part_ctx[0][bl][bp], hsz);
+        memset(&t->l.partition[by8], av1_al_part_ctx[1][bl][bp], hsz);
+    }
+
+    return 0;
+}
+
+static void reset_context(BlockContext *const ctx, const int keyframe, const int pass) {
+    memset(ctx->intra, keyframe, sizeof(ctx->intra));
+    memset(ctx->uvmode, DC_PRED, sizeof(ctx->uvmode));
+    if (keyframe)
+        memset(ctx->mode, DC_PRED, sizeof(ctx->mode));
+
+    if (pass == 2) return;
+
+    memset(ctx->partition, 0, sizeof(ctx->partition));
+    memset(ctx->skip, 0, sizeof(ctx->skip));
+    memset(ctx->skip_mode, 0, sizeof(ctx->skip_mode));
+    memset(ctx->tx_lpf_y, 2, sizeof(ctx->tx_lpf_y));
+    memset(ctx->tx_lpf_uv, 1, sizeof(ctx->tx_lpf_uv));
+    memset(ctx->tx_intra, -1, sizeof(ctx->tx_intra));
+    memset(ctx->tx, TX_64X64, sizeof(ctx->tx));
+    if (!keyframe) {
+        memset(ctx->ref, -1, sizeof(ctx->ref));
+        memset(ctx->comp_type, 0, sizeof(ctx->comp_type));
+        memset(ctx->mode, NEARESTMV, sizeof(ctx->mode));
+    }
+    memset(ctx->lcoef, 0x40, sizeof(ctx->lcoef));
+    memset(ctx->ccoef, 0x40, sizeof(ctx->ccoef));
+    memset(ctx->filter, N_SWITCHABLE_FILTERS, sizeof(ctx->filter));
+    memset(ctx->seg_pred, 0, sizeof(ctx->seg_pred));
+    memset(ctx->pal_sz, 0, sizeof(ctx->pal_sz));
+}
+
+static void setup_tile(Dav1dTileState *const ts,
+                       const Dav1dFrameContext *const f,
+                       const uint8_t *const data, const size_t sz,
+                       const int tile_row, const int tile_col,
+                       const int tile_start_off)
+{
+    const int col_sb_start = f->frame_hdr.tiling.col_start_sb[tile_col];
+    const int col_sb128_start = col_sb_start >> !f->seq_hdr.sb128;
+    const int col_sb_end = f->frame_hdr.tiling.col_start_sb[tile_col + 1];
+    const int row_sb_start = f->frame_hdr.tiling.row_start_sb[tile_row];
+    const int row_sb_end = f->frame_hdr.tiling.row_start_sb[tile_row + 1];
+    const int sb_shift = f->sb_shift;
+
+    ts->frame_thread.pal_idx = &f->frame_thread.pal_idx[tile_start_off * 2];
+    ts->frame_thread.cf = &((int32_t *) f->frame_thread.cf)[tile_start_off * 3];
+    ts->cdf = *f->in_cdf.cdf;
+    ts->last_qidx = f->frame_hdr.quant.yac;
+    memset(ts->last_delta_lf, 0, sizeof(ts->last_delta_lf));
+
+    msac_init(&ts->msac, data, sz);
+
+    ts->tiling.row = tile_row;
+    ts->tiling.col = tile_col;
+    ts->tiling.col_start = col_sb_start << sb_shift;
+    ts->tiling.col_end = imin(col_sb_end << sb_shift, f->bw);
+    ts->tiling.row_start = row_sb_start << sb_shift;
+    ts->tiling.row_end = imin(row_sb_end << sb_shift, f->bh);
+
+    // Reference Restoration Unit (used for exp coding)
+    Av1Filter *const lf_mask =
+        f->lf.mask + (ts->tiling.row_start >> 5) * f->sb128w + col_sb128_start;
+    const int unit_idx = ((ts->tiling.row_start & 16) >> 3) +
+                         ((ts->tiling.col_start & 16) >> 4);
+    for (int p = 0; p < 3; p++) {
+        ts->lr_ref[p] = &lf_mask->lr[p][unit_idx];
+        ts->lr_ref[p]->filter_v[0] = 3;
+        ts->lr_ref[p]->filter_v[1] = -7;
+        ts->lr_ref[p]->filter_v[2] = 15;
+        ts->lr_ref[p]->filter_h[0] = 3;
+        ts->lr_ref[p]->filter_h[1] = -7;
+        ts->lr_ref[p]->filter_h[2] = 15;
+        ts->lr_ref[p]->sgr_weights[0] = -32;
+        ts->lr_ref[p]->sgr_weights[1] = 31;
+    }
+
+    if (f->n_tc > 1)
+        atomic_init(&ts->progress, 0);
+}
+
+int decode_tile_sbrow(Dav1dTileContext *const t) {
+    const Dav1dFrameContext *const f = t->f;
+    const enum BlockLevel root_bl = f->seq_hdr.sb128 ? BL_128X128 : BL_64X64;
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dContext *const c = f->c;
+    const int sb_step = f->sb_step;
+    const int tile_row = ts->tiling.row, tile_col = ts->tiling.col;
+    const int col_sb_start = f->frame_hdr.tiling.col_start_sb[tile_col];
+    const int col_sb128_start = col_sb_start >> !f->seq_hdr.sb128;
+
+    reset_context(&t->l, !(f->frame_hdr.frame_type & 1), f->frame_thread.pass);
+    if (f->frame_thread.pass == 2) {
+        for (t->bx = ts->tiling.col_start,
+             t->a = f->a + col_sb128_start + tile_row * f->sb128w;
+             t->bx < ts->tiling.col_end; t->bx += sb_step)
+        {
+            if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
+                return 1;
+            if (t->bx & 16 || f->seq_hdr.sb128)
+                t->a++;
+        }
+        f->bd_fn.backup_ipred_edge(t);
+        return 0;
+    }
+
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+
+    if (c->n_fc > 1 && f->frame_hdr.use_ref_frame_mvs) {
+        for (int n = 0; n < 7; n++)
+            dav1d_thread_picture_wait(&f->refp[n], 4 * (t->by + sb_step),
+                                      PLANE_TYPE_BLOCK);
+        av1_init_ref_mv_tile_row(f->libaom_cm,
+                                 ts->tiling.col_start, ts->tiling.col_end,
+                                 t->by, imin(t->by + sb_step, f->bh));
+    }
+    memset(t->pal_sz_uv[1], 0, sizeof(*t->pal_sz_uv));
+    const int sb128y = t->by >> 5;
+    for (t->bx = ts->tiling.col_start, t->a = f->a + col_sb128_start + tile_row * f->sb128w,
+         t->lf_mask = f->lf.mask + sb128y * f->sb128w + col_sb128_start;
+         t->bx < ts->tiling.col_end; t->bx += sb_step)
+    {
+        if (root_bl == BL_128X128) {
+            t->cur_sb_cdef_idx_ptr = t->lf_mask->cdef_idx;
+            t->cur_sb_cdef_idx_ptr[0] = -1;
+            t->cur_sb_cdef_idx_ptr[1] = -1;
+            t->cur_sb_cdef_idx_ptr[2] = -1;
+            t->cur_sb_cdef_idx_ptr[3] = -1;
+        } else {
+            t->cur_sb_cdef_idx_ptr =
+                &t->lf_mask->cdef_idx[((t->bx & 16) >> 4) +
+                                      ((t->by & 16) >> 3)];
+            t->cur_sb_cdef_idx_ptr[0] = -1;
+        }
+        // Restoration filter
+        for (int p = 0; p < 3; p++) {
+            if (f->frame_hdr.restoration.type[p] == RESTORATION_NONE)
+                continue;
+            const int by = t->by >> (ss_ver & !!p);
+            const int bx = t->bx >> (ss_hor & !!p);
+            const int bh = f->bh >> (ss_ver & !!p);
+            const int bw = f->bw >> (ss_hor & !!p);
+
+            const int unit_size_log2 =
+                f->frame_hdr.restoration.unit_size[!!p];
+            // 4pel unit size
+            const int b_unit_size = 1 << (unit_size_log2 - 2);
+            const unsigned mask = b_unit_size - 1;
+            if (by & mask || bx & mask) continue;
+            const int half_unit = b_unit_size >> 1;
+            // Round half up at frame boundaries, if there's more than one
+            // restoration unit
+            const int bottom_round = by && by + half_unit > bh;
+            const int right_round = bx && bx + half_unit > bw;
+            if (bottom_round || right_round) continue;
+            const int unit_idx = ((t->by & 16) >> 3) + ((t->bx & 16) >> 4);
+            Av1RestorationUnit *const lr = &t->lf_mask->lr[p][unit_idx];
+            const enum RestorationType frame_type =
+                f->frame_hdr.restoration.type[p];
+
+            if (frame_type == RESTORATION_SWITCHABLE) {
+                const int filter =
+                    msac_decode_symbol_adapt(&ts->msac,
+                                             ts->cdf.m.restore_switchable, 3);
+                lr->type = filter ? filter == 2 ? RESTORATION_SGRPROJ :
+                                                  RESTORATION_WIENER :
+                                    RESTORATION_NONE;
+            } else {
+                const unsigned type =
+                    msac_decode_bool_adapt(&ts->msac,
+                                           frame_type == RESTORATION_WIENER ?
+                                               ts->cdf.m.restore_wiener :
+                                               ts->cdf.m.restore_sgrproj);
+                lr->type = type ? frame_type : RESTORATION_NONE;
+            }
+
+            if (lr->type == RESTORATION_WIENER) {
+                lr->filter_v[0] =
+                    !p ? msac_decode_subexp(&ts->msac,
+                                            ts->lr_ref[p]->filter_v[0] + 5, 16,
+                                            1) - 5:
+                         0;
+                lr->filter_v[1] =
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->filter_v[1] + 23, 32,
+                                       2) - 23;
+                lr->filter_v[2] =
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->filter_v[2] + 17, 64,
+                                       3) - 17;
+
+                lr->filter_h[0] =
+                    !p ? msac_decode_subexp(&ts->msac,
+                                            ts->lr_ref[p]->filter_h[0] + 5, 16,
+                                            1) - 5:
+                        0;
+                lr->filter_h[1] =
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->filter_h[1] + 23, 32,
+                                       2) - 23;
+                lr->filter_h[2] =
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->filter_h[2] + 17, 64,
+                                       3) - 17;
+                memcpy(lr->sgr_weights, ts->lr_ref[p]->sgr_weights, sizeof(lr->sgr_weights));
+                ts->lr_ref[p] = lr;
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-lr_wiener[pl=%d,v[%d,%d,%d],h[%d,%d,%d]]: r=%d\n",
+                           p, lr->filter_v[0], lr->filter_v[1],
+                           lr->filter_v[2], lr->filter_h[0],
+                           lr->filter_h[1], lr->filter_h[2], ts->msac.rng);
+            } else if (lr->type == RESTORATION_SGRPROJ) {
+                const unsigned idx = msac_decode_bools(&ts->msac, 4);
+                lr->sgr_idx = idx;
+                lr->sgr_weights[0] = sgr_params[idx][0] ?
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->sgr_weights[0] + 96, 128,
+                                       4) - 96 :
+                    0;
+                lr->sgr_weights[1] = sgr_params[idx][1] ?
+                    msac_decode_subexp(&ts->msac,
+                                       ts->lr_ref[p]->sgr_weights[1] + 32, 128,
+                                       4) - 32 :
+                    iclip(128 - lr->sgr_weights[0], -32, 95);
+                memcpy(lr->filter_v, ts->lr_ref[p]->filter_v, sizeof(lr->filter_v));
+                memcpy(lr->filter_h, ts->lr_ref[p]->filter_h, sizeof(lr->filter_h));
+                ts->lr_ref[p] = lr;
+                if (DEBUG_BLOCK_INFO)
+                    printf("Post-lr_sgrproj[pl=%d,idx=%d,w[%d,%d]]: r=%d\n",
+                           p, lr->sgr_idx, lr->sgr_weights[0],
+                           lr->sgr_weights[1], ts->msac.rng);
+            }
+        }
+        if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
+            return 1;
+        if (t->bx & 16 || f->seq_hdr.sb128) {
+            t->a++;
+            t->lf_mask++;
+        }
+    }
+
+    // backup pre-loopfilter pixels for intra prediction of the next sbrow
+    if (f->frame_thread.pass != 1)
+        f->bd_fn.backup_ipred_edge(t);
+
+    // backup t->a/l.tx_lpf_y/uv at tile boundaries to use them to "fix"
+    // up the initial value in neighbour tiles when running the loopfilter
+    int align_h = (f->bh + 31) & ~31;
+    memcpy(&f->lf.tx_lpf_right_edge[0][align_h * tile_col + t->by],
+           &t->l.tx_lpf_y[t->by & 16], sb_step);
+    align_h >>= 1;
+    memcpy(&f->lf.tx_lpf_right_edge[1][align_h * tile_col + (t->by >> 1)],
+           &t->l.tx_lpf_uv[(t->by & 16) >> 1], sb_step >> 1);
+
+    return 0;
+}
+
+int decode_frame(Dav1dFrameContext *const f) {
+    const Dav1dContext *const c = f->c;
+
+    if (f->n_tc > 1) {
+        if (f->frame_hdr.tiling.cols * f->sbh > f->tile_thread.titsati_sz) {
+            f->tile_thread.task_idx_to_sby_and_tile_idx =
+                malloc(sizeof(*f->tile_thread.task_idx_to_sby_and_tile_idx) *
+                       f->frame_hdr.tiling.cols * f->sbh);
+            if (!f->tile_thread.task_idx_to_sby_and_tile_idx) return -ENOMEM;
+            f->tile_thread.titsati_sz = f->frame_hdr.tiling.cols * f->sbh;
+        }
+        if (f->tile_thread.titsati_init[0] != f->frame_hdr.tiling.cols ||
+            f->tile_thread.titsati_init[1] != f->sbh)
+        {
+            for (int tile_row = 0, tile_idx = 0;
+                 tile_row < f->frame_hdr.tiling.rows; tile_row++)
+            {
+                for (int sby = f->frame_hdr.tiling.row_start_sb[tile_row];
+                     sby < f->frame_hdr.tiling.row_start_sb[tile_row + 1]; sby++)
+                {
+                    for (int tile_col = 0; tile_col < f->frame_hdr.tiling.cols;
+                         tile_col++, tile_idx++)
+                    {
+                        f->tile_thread.task_idx_to_sby_and_tile_idx[tile_idx][0] = sby;
+                        f->tile_thread.task_idx_to_sby_and_tile_idx[tile_idx][1] =
+                            tile_row * f->frame_hdr.tiling.cols + tile_col;
+                    }
+                }
+            }
+            f->tile_thread.titsati_init[0] = f->frame_hdr.tiling.cols;
+            f->tile_thread.titsati_init[1] = f->sbh;
+        }
+    }
+
+    if (f->frame_hdr.tiling.cols * f->frame_hdr.tiling.rows > f->n_ts) {
+        f->ts = realloc(f->ts, f->frame_hdr.tiling.cols *
+                               f->frame_hdr.tiling.rows * sizeof(*f->ts));
+        if (!f->ts) return -ENOMEM;
+        for (int n = f->n_ts;
+             n < f->frame_hdr.tiling.cols * f->frame_hdr.tiling.rows; n++)
+        {
+            Dav1dTileState *const ts = &f->ts[n];
+            pthread_mutex_init(&ts->tile_thread.lock, NULL);
+            pthread_cond_init(&ts->tile_thread.cond, NULL);
+        }
+        if (c->n_fc > 1) {
+            freep(&f->frame_thread.tile_start_off);
+            f->frame_thread.tile_start_off =
+                malloc(sizeof(*f->frame_thread.tile_start_off) *
+                       f->frame_hdr.tiling.cols * f->frame_hdr.tiling.rows);
+        }
+        f->n_ts = f->frame_hdr.tiling.cols * f->frame_hdr.tiling.rows;
+    }
+
+    if (c->n_fc > 1) {
+        int tile_idx = 0;
+        for (int tile_row = 0; tile_row < f->frame_hdr.tiling.rows; tile_row++) {
+            int row_off = f->frame_hdr.tiling.row_start_sb[tile_row] *
+                          f->sb_step * 4 * f->sb128w * 128;
+            int b_diff = (f->frame_hdr.tiling.row_start_sb[tile_row + 1] -
+                          f->frame_hdr.tiling.row_start_sb[tile_row]) * f->sb_step * 4;
+            for (int tile_col = 0; tile_col < f->frame_hdr.tiling.cols; tile_col++) {
+                f->frame_thread.tile_start_off[tile_idx++] = row_off + b_diff *
+                    f->frame_hdr.tiling.col_start_sb[tile_col] * f->sb_step * 4;
+            }
+        }
+    }
+
+    if (f->sb128w * f->frame_hdr.tiling.rows > f->a_sz) {
+        freep(&f->a);
+        f->a = malloc(f->sb128w * f->frame_hdr.tiling.rows * sizeof(*f->a));
+        if (!f->a) return -ENOMEM;
+        f->a_sz = f->sb128w * f->frame_hdr.tiling.rows;
+    }
+
+    // update allocation of block contexts for above
+    if (f->sb128w > f->lf.line_sz) {
+        dav1d_freep_aligned(&f->lf.cdef_line);
+        dav1d_freep_aligned(&f->lf.lr_lpf_line);
+
+        // note that we allocate all pixel arrays as if we were dealing with
+        // 10 bits/component data
+        uint16_t *ptr = f->lf.cdef_line =
+            dav1d_alloc_aligned(f->b4_stride * 4 * 12 * sizeof(uint16_t), 32);
+
+        uint16_t *lr_ptr = f->lf.lr_lpf_line =
+            dav1d_alloc_aligned(f->b4_stride * 4 * 3 * 12 * sizeof(uint16_t), 32);
+
+        for (int pl = 0; pl <= 2; pl++) {
+            f->lf.cdef_line_ptr[0][pl][0] = ptr + f->b4_stride * 4 * 0;
+            f->lf.cdef_line_ptr[0][pl][1] = ptr + f->b4_stride * 4 * 1;
+            f->lf.cdef_line_ptr[1][pl][0] = ptr + f->b4_stride * 4 * 2;
+            f->lf.cdef_line_ptr[1][pl][1] = ptr + f->b4_stride * 4 * 3;
+            ptr += f->b4_stride * 4 * 4;
+
+            f->lf.lr_lpf_line_ptr[pl] = lr_ptr;
+            lr_ptr += f->b4_stride * 4 * 12;
+        }
+
+        f->lf.line_sz = f->sb128w;
+    }
+
+    // update allocation for loopfilter masks
+    if (f->sb128w * f->sb128h > f->lf.mask_sz) {
+        freep(&f->lf.mask);
+        freep(&f->lf.level);
+        freep(&f->frame_thread.b);
+        f->lf.mask = malloc(f->sb128w * f->sb128h * sizeof(*f->lf.mask));
+        f->lf.level = malloc(f->sb128w * f->sb128h * 32 * 32 *
+                             sizeof(*f->lf.level));
+        if (!f->lf.mask || !f->lf.level) return -ENOMEM;
+        if (c->n_fc > 1) {
+            freep(&f->frame_thread.b);
+            freep(&f->frame_thread.cbi);
+            dav1d_freep_aligned(&f->frame_thread.cf);
+            dav1d_freep_aligned(&f->frame_thread.pal_idx);
+            freep(&f->frame_thread.pal);
+            f->frame_thread.b = malloc(sizeof(*f->frame_thread.b) *
+                                       f->sb128w * f->sb128h * 32 * 32);
+            f->frame_thread.pal = malloc(sizeof(*f->frame_thread.pal) *
+                                         f->sb128w * f->sb128h * 16 * 16);
+            f->frame_thread.pal_idx =
+                dav1d_alloc_aligned(sizeof(*f->frame_thread.pal_idx) *
+                                    f->sb128w * f->sb128h * 128 * 128 * 2, 32);
+            f->frame_thread.cbi = malloc(sizeof(*f->frame_thread.cbi) *
+                                         f->sb128w * f->sb128h * 32 * 32);
+            f->frame_thread.cf =
+                dav1d_alloc_aligned(sizeof(int32_t) * 3 *
+                                    f->sb128w * f->sb128h * 128 * 128, 32);
+            if (!f->frame_thread.b || !f->frame_thread.pal_idx ||
+                !f->frame_thread.cf)
+            {
+                return -ENOMEM;
+            }
+            memset(f->frame_thread.cf, 0,
+                   sizeof(int32_t) * 3 * f->sb128w * f->sb128h * 128 * 128);
+        }
+        f->lf.mask_sz = f->sb128w * f->sb128h;
+    }
+    if (f->frame_hdr.loopfilter.sharpness != f->lf.last_sharpness) {
+        dav1d_calc_eih(&f->lf.lim_lut, f->frame_hdr.loopfilter.sharpness);
+        f->lf.last_sharpness = f->frame_hdr.loopfilter.sharpness;
+    }
+    dav1d_calc_lf_values(f->lf.lvl, &f->frame_hdr, (int8_t[4]) { 0, 0, 0, 0 });
+    memset(f->lf.mask, 0, sizeof(*f->lf.mask) * f->sb128w * f->sb128h);
+
+    if (f->sbh * f->sb128w * 128 > f->ipred_edge_sz) {
+        dav1d_freep_aligned(&f->ipred_edge[0]);
+        uint16_t *ptr = f->ipred_edge[0] =
+            dav1d_alloc_aligned(f->sb128w * 128 * f->sbh * 3 * sizeof(uint16_t), 32);
+        if (!f->ipred_edge[0]) return -ENOMEM;
+        f->ipred_edge_sz = f->sbh * f->sb128w * 128;
+        f->ipred_edge[1] = &ptr[f->ipred_edge_sz];
+        f->ipred_edge[2] = &ptr[f->ipred_edge_sz * 2];
+    }
+
+    if (f->sb128h > f->lf.re_sz) {
+        freep(&f->lf.tx_lpf_right_edge[0]);
+        f->lf.tx_lpf_right_edge[0] = malloc((f->sb128h * 32 * 2) *
+                                            f->frame_hdr.tiling.cols);
+        if (!f->lf.tx_lpf_right_edge[0]) return -ENOMEM;
+        f->lf.tx_lpf_right_edge[1] = f->lf.tx_lpf_right_edge[0] +
+                                     f->sb128h * 32 * f->frame_hdr.tiling.cols;
+        f->lf.re_sz = f->sb128h;
+    }
+
+    // init ref mvs
+    if ((f->frame_hdr.frame_type & 1) || f->frame_hdr.allow_intrabc) {
+        f->mvs = f->mvs_ref->data;
+        const int order_hint_n_bits = f->seq_hdr.order_hint * f->seq_hdr.order_hint_n_bits;
+        av1_init_ref_mv_common(f->libaom_cm, f->bw >> 1, f->bh >> 1,
+                               f->b4_stride, f->seq_hdr.sb128,
+                               f->mvs, f->ref_mvs, f->cur.p.poc, f->refpoc,
+                               f->refrefpoc, f->frame_hdr.gmv,
+                               f->frame_hdr.hp, f->frame_hdr.force_integer_mv,
+                               f->frame_hdr.use_ref_frame_mvs,
+                               order_hint_n_bits);
+        if (c->n_fc == 1 && f->frame_hdr.use_ref_frame_mvs)
+            av1_init_ref_mv_tile_row(f->libaom_cm, 0, f->bw, 0, f->bh);
+    }
+
+    // setup dequant tables
+    init_quant_tables(&f->seq_hdr, &f->frame_hdr, f->frame_hdr.quant.yac, f->dq);
+    if (f->frame_hdr.quant.qm)
+        for (int j = 0; j < N_RECT_TX_SIZES; j++) {
+            f->qm[0][j][0] = av1_qm_tbl[f->frame_hdr.quant.qm_y][0][j];
+            f->qm[0][j][1] = av1_qm_tbl[f->frame_hdr.quant.qm_u][1][j];
+            f->qm[0][j][2] = av1_qm_tbl[f->frame_hdr.quant.qm_v][1][j];
+        }
+    for (int i = f->frame_hdr.quant.qm; i < 2; i++)
+        for (int tx = 0; tx < N_RECT_TX_SIZES; tx++)
+            for (int pl = 0; pl < 3; pl++)
+                f->qm[i][tx][pl] = av1_qm_tbl[15][!!pl][tx];
+
+    // setup jnt_comp weights
+    if (f->frame_hdr.switchable_comp_refs) {
+        for (int i = 0; i < 7; i++) {
+            const unsigned ref0poc = f->refp[i].p.poc;
+
+            for (int j = i + 1; j < 7; j++) {
+                const unsigned ref1poc = f->refp[j].p.poc;
+
+                const unsigned d1 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,
+                                                          ref0poc, f->cur.p.poc)), 31);
+                const unsigned d0 = imin(abs(get_poc_diff(f->seq_hdr.order_hint_n_bits,
+                                                          ref1poc, f->cur.p.poc)), 31);
+                const int order = d0 <= d1;
+
+                static const uint8_t quant_dist_weight[3][2] = {
+                    { 2, 3 }, { 2, 5 }, { 2, 7 }
+                };
+                static const uint8_t quant_dist_lookup_table[4][2] = {
+                    { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 }
+                };
+
+                int k;
+                for (k = 0; k < 3; k++) {
+                    const int c0 = quant_dist_weight[k][order];
+                    const int c1 = quant_dist_weight[k][!order];
+                    const int d0_c0 = d0 * c0;
+                    const int d1_c1 = d1 * c1;
+                    if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
+                }
+
+                f->jnt_weights[i][j] = quant_dist_lookup_table[k][order];
+            }
+        }
+    }
+
+    // init loopfilter pointers
+    f->lf.mask_ptr = f->lf.mask;
+    f->lf.p[0] = f->cur.p.data[0];
+    f->lf.p[1] = f->cur.p.data[1];
+    f->lf.p[2] = f->cur.p.data[2];
+    f->lf.tile_row = 1;
+
+    cdf_thread_wait(&f->in_cdf);
+
+    // parse individual tiles per tile group
+    int update_set = 0, tile_idx = 0;
+    const unsigned tile_col_mask = (1 << f->frame_hdr.tiling.log2_cols) - 1;
+    for (int i = 0; i < f->n_tile_data; i++) {
+        const uint8_t *data = f->tile[i].data.data;
+        size_t size = f->tile[i].data.sz;
+
+        const int last_tile_row_plus1 = 1 + (f->tile[i].end >> f->frame_hdr.tiling.log2_cols);
+        const int last_tile_col_plus1 = 1 + (f->tile[i].end & tile_col_mask);
+        const int empty_tile_cols = imax(0, last_tile_col_plus1 - f->frame_hdr.tiling.cols);
+        const int empty_tile_rows = imax(0, last_tile_row_plus1 - f->frame_hdr.tiling.rows);
+        const int empty_tiles =
+            (empty_tile_rows << f->frame_hdr.tiling.log2_cols) + empty_tile_cols;
+        for (int j = f->tile[i].start; j <= f->tile[i].end - empty_tiles; j++) {
+            const int tile_row = j >> f->frame_hdr.tiling.log2_cols;
+            const int tile_col = j & tile_col_mask;
+
+            if (tile_col >= f->frame_hdr.tiling.cols) continue;
+            if (tile_row >= f->frame_hdr.tiling.rows) continue;
+
+            size_t tile_sz;
+            if (j == f->tile[i].end - empty_tiles) {
+                tile_sz = size;
+            } else {
+                tile_sz = 0;
+                for (int k = 0; k < f->frame_hdr.tiling.n_bytes; k++)
+                    tile_sz |= *data++ << (k * 8);
+                tile_sz++;
+                size -= f->frame_hdr.tiling.n_bytes;
+                if (tile_sz > size) goto error;
+            }
+
+            setup_tile(&f->ts[tile_row * f->frame_hdr.tiling.cols + tile_col],
+                       f, data, tile_sz, tile_row, tile_col,
+                       c->n_fc > 1 ? f->frame_thread.tile_start_off[tile_idx++] : 0);
+            if (j == f->frame_hdr.tiling.update && f->frame_hdr.refresh_context)
+                update_set = 1;
+            data += tile_sz;
+            size -= tile_sz;
+        }
+    }
+
+    cdf_thread_unref(&f->in_cdf);
+
+    // 2-pass decoding:
+    // - enabled for frame-threading, so that one frame can do symbol parsing
+    //   as another (or multiple) are doing reconstruction. One advantage here
+    //   is that although reconstruction is limited by reference availability,
+    //   symbol parsing is not. Therefore, symbol parsing can effectively use
+    //   row and col tile threading, but reconstruction only col tile threading;
+    // - pass 0 means no 2-pass;
+    // - pass 1 means symbol parsing only;
+    // - pass 2 means reconstruction and loop filtering.
+
+    const int uses_2pass = c->n_fc > 1 && f->frame_hdr.refresh_context;
+    for (f->frame_thread.pass = uses_2pass;
+         f->frame_thread.pass <= 2 * uses_2pass; f->frame_thread.pass++)
+    {
+        const enum PlaneType progress_plane_type =
+            f->frame_thread.pass == 0 ? PLANE_TYPE_ALL :
+            f->frame_thread.pass == 1 ? PLANE_TYPE_BLOCK : PLANE_TYPE_Y;
+
+        for (int n = 0; n < f->sb128w * f->frame_hdr.tiling.rows; n++)
+            reset_context(&f->a[n], !(f->frame_hdr.frame_type & 1), f->frame_thread.pass);
+
+        if (f->n_tc == 1) {
+            Dav1dTileContext *const t = f->tc;
+
+            // no tile threading - we explicitly interleave tile/sbrow decoding
+            // and post-filtering, so that the full process runs in-line, so
+            // that frame threading is still possible
+            for (int tile_row = 0; tile_row < f->frame_hdr.tiling.rows; tile_row++) {
+                for (int sby = f->frame_hdr.tiling.row_start_sb[tile_row];
+                     sby < f->frame_hdr.tiling.row_start_sb[tile_row + 1]; sby++)
+                {
+                    t->by = sby << (4 + f->seq_hdr.sb128);
+                    for (int tile_col = 0; tile_col < f->frame_hdr.tiling.cols; tile_col++) {
+                        t->ts = &f->ts[tile_row * f->frame_hdr.tiling.rows + tile_col];
+
+                        int res;
+                        if ((res = decode_tile_sbrow(t)))
+                            return res;
+                    }
+
+                    // loopfilter + cdef + restoration
+                    if (f->frame_thread.pass != 1)
+                        f->bd_fn.filter_sbrow(f, sby);
+                    dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,
+                                                progress_plane_type);
+                }
+            }
+        } else {
+            // signal available tasks to worker threads
+            int num_tasks;
+
+            const uint64_t all_mask = ~0ULL >> (64 - f->n_tc);
+            pthread_mutex_lock(&f->tile_thread.lock);
+            while (f->tile_thread.available != all_mask)
+                pthread_cond_wait(&f->tile_thread.icond, &f->tile_thread.lock);
+            assert(!f->tile_thread.tasks_left);
+            if (f->frame_thread.pass == 1 || f->n_tc >= f->frame_hdr.tiling.cols) {
+                // we can (or in fact, if >, we need to) do full tile decoding.
+                // loopfilter happens below
+                num_tasks = f->frame_hdr.tiling.cols * f->frame_hdr.tiling.rows;
+            } else {
+                // we need to interleave sbrow decoding for all tile cols in a
+                // tile row, since otherwise subsequent threads will be blocked
+                // waiting for the post-filter to complete
+                num_tasks = f->sbh * f->frame_hdr.tiling.cols;
+            }
+            f->tile_thread.num_tasks = f->tile_thread.tasks_left = num_tasks;
+            pthread_cond_broadcast(&f->tile_thread.cond);
+            pthread_mutex_unlock(&f->tile_thread.lock);
+
+            // loopfilter + cdef + restoration
+            for (int tile_row = 0; tile_row < f->frame_hdr.tiling.rows; tile_row++) {
+                for (int sby = f->frame_hdr.tiling.row_start_sb[tile_row];
+                     sby < f->frame_hdr.tiling.row_start_sb[tile_row + 1]; sby++)
+                {
+                    for (int tile_col = 0; tile_col < f->frame_hdr.tiling.cols;
+                         tile_col++)
+                    {
+                        Dav1dTileState *const ts =
+                            &f->ts[tile_row * f->frame_hdr.tiling.cols + tile_col];
+
+                        if (atomic_load(&ts->progress) <= sby) {
+                            pthread_mutex_lock(&ts->tile_thread.lock);
+                            while (atomic_load(&ts->progress) <= sby)
+                                pthread_cond_wait(&ts->tile_thread.cond,
+                                                  &ts->tile_thread.lock);
+                            pthread_mutex_unlock(&ts->tile_thread.lock);
+                        }
+                    }
+
+                    // loopfilter + cdef + restoration
+                    if (f->frame_thread.pass != 1)
+                        f->bd_fn.filter_sbrow(f, sby);
+                    dav1d_thread_picture_signal(&f->cur, (sby + 1) * f->sb_step * 4,
+                                                progress_plane_type);
+                }
+            }
+        }
+
+        if (f->frame_thread.pass <= 1 && f->frame_hdr.refresh_context) {
+            // cdf update
+            if (update_set)
+                av1_update_tile_cdf(&f->frame_hdr, f->out_cdf.cdf,
+                                    &f->ts[f->frame_hdr.tiling.update].cdf);
+            cdf_thread_signal(&f->out_cdf);
+            cdf_thread_unref(&f->out_cdf);
+        }
+        if (f->frame_thread.pass == 1) {
+            assert(c->n_fc > 1);
+            for (int tile_idx = 0;
+                 tile_idx < f->frame_hdr.tiling.rows * f->frame_hdr.tiling.cols;
+                 tile_idx++)
+            {
+                Dav1dTileState *const ts = &f->ts[tile_idx];
+                const int tile_start_off = f->frame_thread.tile_start_off[tile_idx];
+                ts->frame_thread.pal_idx = &f->frame_thread.pal_idx[tile_start_off * 2];
+                ts->frame_thread.cf = &((int32_t *) f->frame_thread.cf)[tile_start_off * 3];
+                if (f->n_tc > 0)
+                    atomic_init(&ts->progress, 0);
+            }
+        }
+    }
+
+    dav1d_thread_picture_signal(&f->cur, UINT_MAX, PLANE_TYPE_ALL);
+
+    for (int i = 0; i < 7; i++) {
+        if (f->refp[i].p.data[0])
+            dav1d_thread_picture_unref(&f->refp[i]);
+        if (f->ref_mvs_ref[i])
+            dav1d_ref_dec(f->ref_mvs_ref[i]);
+    }
+
+    dav1d_thread_picture_unref(&f->cur);
+    if (f->cur_segmap_ref)
+        dav1d_ref_dec(f->cur_segmap_ref);
+    if (f->prev_segmap_ref)
+        dav1d_ref_dec(f->prev_segmap_ref);
+    if (f->mvs_ref)
+        dav1d_ref_dec(f->mvs_ref);
+
+    for (int i = 0; i < f->n_tile_data; i++)
+        dav1d_data_unref(&f->tile[i].data);
+
+    return 0;
+
+error:
+    for (int i = 0; i < f->n_tile_data; i++)
+        dav1d_data_unref(&f->tile[i].data);
+
+    return -EINVAL;
+}
+
+int submit_frame(Dav1dContext *const c) {
+    Dav1dFrameContext *f;
+    int res;
+
+    // wait for c->out_delayed[next] and move into c->out if visible
+    Dav1dThreadPicture *out_delayed;
+    if (c->n_fc > 1) {
+        const unsigned next = c->frame_thread.next++;
+        if (c->frame_thread.next == c->n_fc)
+            c->frame_thread.next = 0;
+
+        f = &c->fc[next];
+        pthread_mutex_lock(&f->frame_thread.td.lock);
+        while (f->n_tile_data > 0)
+            pthread_cond_wait(&f->frame_thread.td.cond,
+                              &f->frame_thread.td.lock);
+        out_delayed = &c->frame_thread.out_delayed[next];
+        if (out_delayed->p.data[0]) {
+            if (out_delayed->visible)
+                dav1d_picture_ref(&c->out, &out_delayed->p);
+            dav1d_thread_picture_unref(out_delayed);
+        }
+    } else {
+        f = c->fc;
+    }
+
+    f->seq_hdr = c->seq_hdr;
+    f->frame_hdr = c->frame_hdr;
+    const int bd_idx = (f->seq_hdr.bpc - 8) >> 1;
+    f->dsp = &c->dsp[bd_idx];
+
+    if (!f->dsp->ipred.intra_pred[TX_4X4][DC_PRED]) {
+        Dav1dDSPContext *const dsp = &c->dsp[bd_idx];
+
+        switch (f->seq_hdr.bpc) {
+#define assign_bitdepth_case(bd) \
+        case bd: \
+            dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
+            dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
+            dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \
+            dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
+            dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr); \
+            dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
+            break
+#if CONFIG_8BPC
+        assign_bitdepth_case(8);
+#endif
+#if CONFIG_10BPC
+        assign_bitdepth_case(10);
+#endif
+#undef assign_bitdepth_case
+        default:
+            fprintf(stderr, "Compiled without support for %d-bit decoding\n",
+                    f->seq_hdr.bpc);
+            return -ENOPROTOOPT;
+        }
+    }
+
+#define assign_bitdepth_case(bd) \
+        f->bd_fn.recon_b_inter = recon_b_inter_##bd##bpc; \
+        f->bd_fn.recon_b_intra = recon_b_intra_##bd##bpc; \
+        f->bd_fn.filter_sbrow = filter_sbrow_##bd##bpc; \
+        f->bd_fn.backup_ipred_edge = backup_ipred_edge_##bd##bpc; \
+        f->bd_fn.read_coef_blocks = read_coef_blocks_##bd##bpc
+    if (f->seq_hdr.bpc <= 8) {
+#if CONFIG_8BPC
+        assign_bitdepth_case(8);
+#endif
+    } else {
+#if CONFIG_10BPC
+        assign_bitdepth_case(16);
+#endif
+    }
+#undef assign_bitdepth_case
+
+    if (f->frame_hdr.frame_type & 1)
+        for (int i = 0; i < 7; i++) {
+            const int refidx = f->frame_hdr.refidx[i];
+            dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
+        }
+
+    // setup entropy
+    if (f->frame_hdr.primary_ref_frame == PRIMARY_REF_NONE) {
+        av1_init_states(&f->in_cdf, f->frame_hdr.quant.yac);
+    } else {
+        const int pri_ref = f->frame_hdr.refidx[f->frame_hdr.primary_ref_frame];
+        cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
+    }
+    if (f->frame_hdr.refresh_context) {
+        cdf_thread_alloc(&f->out_cdf, c->n_fc > 1 ? &f->frame_thread.td : NULL);
+        memcpy(f->out_cdf.cdf, f->in_cdf.cdf, sizeof(*f->in_cdf.cdf));
+    }
+
+    // FIXME qsort so tiles are in order (for frame threading)
+    memcpy(f->tile, c->tile, c->n_tile_data * sizeof(*f->tile));
+    f->n_tile_data = c->n_tile_data;
+    c->n_tile_data = 0;
+
+    // allocate frame
+    if ((res = dav1d_thread_picture_alloc(&f->cur, f->frame_hdr.width,
+                                          f->frame_hdr.height,
+                                          f->seq_hdr.layout, f->seq_hdr.bpc,
+                                          c->n_fc > 1 ? &f->frame_thread.td : NULL,
+                                          f->frame_hdr.show_frame)) < 0)
+    {
+        return res;
+    }
+
+    f->cur.p.poc = f->frame_hdr.frame_offset;
+    f->cur.p.p.pri = f->seq_hdr.pri;
+    f->cur.p.p.trc = f->seq_hdr.trc;
+    f->cur.p.p.mtrx = f->seq_hdr.mtrx;
+    f->cur.p.p.chr = f->seq_hdr.chr;
+    f->cur.p.p.fullrange = f->seq_hdr.color_range;
+
+    // move f->cur into output queue
+    if (c->n_fc == 1) {
+        if (f->frame_hdr.show_frame)
+            dav1d_picture_ref(&c->out, &f->cur.p);
+    } else {
+        dav1d_thread_picture_ref(out_delayed, &f->cur);
+    }
+
+    f->bw = (f->frame_hdr.width + 3) >> 2;
+    f->bh = (f->frame_hdr.height + 3) >> 2;
+    f->sb128w = (f->bw + 31) >> 5;
+    f->sb128h = (f->bh + 31) >> 5;
+    f->sb_shift = 4 + f->seq_hdr.sb128;
+    f->sb_step = 16 << f->seq_hdr.sb128;
+    f->sbh = (f->bh + f->sb_step - 1) >> f->sb_shift;
+    f->b4_stride = (f->bw + 31) & ~31;
+
+    // ref_mvs
+    if ((f->frame_hdr.frame_type & 1) || f->frame_hdr.allow_intrabc) {
+        f->mvs_ref = dav1d_ref_create(f->sb128h * 32 * f->b4_stride *
+                                      sizeof(*f->mvs));
+        f->mvs = f->mvs_ref->data;
+        if (f->frame_hdr.use_ref_frame_mvs) {
+            for (int i = 0; i < 7; i++) {
+                const int refidx = f->frame_hdr.refidx[i];
+                f->refpoc[i] = f->refp[i].p.poc;
+                if (c->refs[refidx].refmvs != NULL &&
+                    f->refp[i].p.p.w == f->cur.p.p.w &&
+                    f->refp[i].p.p.h == f->cur.p.p.h)
+                {
+                    f->ref_mvs_ref[i] = c->refs[refidx].refmvs;
+                    dav1d_ref_inc(f->ref_mvs_ref[i]);
+                    f->ref_mvs[i] = c->refs[refidx].refmvs->data;
+                } else {
+                    f->ref_mvs[i] = NULL;
+                    f->ref_mvs_ref[i] = NULL;
+                }
+                memcpy(f->refrefpoc[i], c->refs[refidx].refpoc,
+                       sizeof(*f->refrefpoc));
+            }
+        } else {
+            memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
+        }
+    } else {
+        f->mvs_ref = NULL;
+        memset(f->ref_mvs_ref, 0, sizeof(f->ref_mvs_ref));
+    }
+
+    // segmap
+    if (f->frame_hdr.segmentation.enabled) {
+        if (f->frame_hdr.segmentation.temporal) {
+            const int pri_ref = f->frame_hdr.primary_ref_frame;
+            assert(pri_ref != PRIMARY_REF_NONE);
+            const int ref_w = (f->refp[pri_ref].p.p.w + 3) >> 2;
+            const int ref_h = (f->refp[pri_ref].p.p.h + 3) >> 2;
+            if (ref_w == f->bw && ref_h == f->bh) {
+                f->prev_segmap_ref = c->refs[f->frame_hdr.refidx[pri_ref]].segmap;
+                dav1d_ref_inc(f->prev_segmap_ref);
+                f->prev_segmap = f->prev_segmap_ref->data;
+            } else {
+                f->prev_segmap_ref = NULL;
+                f->prev_segmap = NULL;
+            }
+        } else {
+            f->prev_segmap_ref = NULL;
+            f->prev_segmap = NULL;
+        }
+        if (f->frame_hdr.segmentation.update_map) {
+            f->cur_segmap_ref = dav1d_ref_create(f->b4_stride * 32 * f->sb128h);
+            f->cur_segmap = f->cur_segmap_ref->data;
+        } else {
+            f->cur_segmap_ref = f->prev_segmap_ref;
+            dav1d_ref_inc(f->cur_segmap_ref);
+            f->cur_segmap = f->prev_segmap_ref->data;
+        }
+    } else {
+        f->cur_segmap = NULL;
+        f->cur_segmap_ref = NULL;
+        f->prev_segmap_ref = NULL;
+    }
+
+    // update references etc.
+    for (int i = 0; i < 8; i++) {
+        if (f->frame_hdr.refresh_frame_flags & (1 << i)) {
+            if (c->refs[i].p.p.data[0])
+                dav1d_thread_picture_unref(&c->refs[i].p);
+            dav1d_thread_picture_ref(&c->refs[i].p, &f->cur);
+
+            if (c->cdf[i].cdf) cdf_thread_unref(&c->cdf[i]);
+            if (f->frame_hdr.refresh_context) {
+                cdf_thread_ref(&c->cdf[i], &f->out_cdf);
+            } else {
+                cdf_thread_ref(&c->cdf[i], &f->in_cdf);
+            }
+            c->refs[i].lf_mode_ref_deltas =
+                f->frame_hdr.loopfilter.mode_ref_deltas;
+            c->refs[i].seg_data = f->frame_hdr.segmentation.seg_data;
+            memcpy(c->refs[i].gmv, f->frame_hdr.gmv, sizeof(c->refs[i].gmv));
+            c->refs[i].film_grain = f->frame_hdr.film_grain.data;
+
+            if (c->refs[i].segmap)
+                dav1d_ref_dec(c->refs[i].segmap);
+            c->refs[i].segmap = f->cur_segmap_ref;
+            if (f->cur_segmap_ref)
+                dav1d_ref_inc(f->cur_segmap_ref);
+            if (c->refs[i].refmvs)
+                dav1d_ref_dec(c->refs[i].refmvs);
+            if (f->frame_hdr.allow_intrabc) {
+                c->refs[i].refmvs = NULL;
+            } else {
+                c->refs[i].refmvs = f->mvs_ref;
+                if (f->mvs_ref)
+                    dav1d_ref_inc(f->mvs_ref);
+            }
+            memcpy(c->refs[i].refpoc, f->refpoc, sizeof(f->refpoc));
+        }
+    }
+
+    if (c->n_fc == 1) {
+        if ((res = decode_frame(f)) < 0)
+            return res;
+    } else {
+        pthread_cond_signal(&f->frame_thread.td.cond);
+        pthread_mutex_unlock(&f->frame_thread.td.lock);
+    }
+
+    return 0;
+}
--- /dev/null
+++ b/src/decode.h
@@ -1,0 +1,35 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_DECODE_H__
+#define __DAV1D_SRC_DECODE_H__
+
+#include "src/internal.h"
+
+int submit_frame(Dav1dContext *c);
+
+#endif /* __DAV1D_SRC_DECODE_H__ */
--- /dev/null
+++ b/src/dequant_tables.c
@@ -1,0 +1,164 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "src/dequant_tables.h"
+
+const uint16_t dav1d_dq_tbl[][QINDEX_RANGE][2] = {
+    {
+        {    4,    4, }, {    8,    8, }, {    8,    9, }, {    9,   10, },
+        {   10,   11, }, {   11,   12, }, {   12,   13, }, {   12,   14, },
+        {   13,   15, }, {   14,   16, }, {   15,   17, }, {   16,   18, },
+        {   17,   19, }, {   18,   20, }, {   19,   21, }, {   19,   22, },
+        {   20,   23, }, {   21,   24, }, {   22,   25, }, {   23,   26, },
+        {   24,   27, }, {   25,   28, }, {   26,   29, }, {   26,   30, },
+        {   27,   31, }, {   28,   32, }, {   29,   33, }, {   30,   34, },
+        {   31,   35, }, {   32,   36, }, {   32,   37, }, {   33,   38, },
+        {   34,   39, }, {   35,   40, }, {   36,   41, }, {   37,   42, },
+        {   38,   43, }, {   38,   44, }, {   39,   45, }, {   40,   46, },
+        {   41,   47, }, {   42,   48, }, {   43,   49, }, {   43,   50, },
+        {   44,   51, }, {   45,   52, }, {   46,   53, }, {   47,   54, },
+        {   48,   55, }, {   48,   56, }, {   49,   57, }, {   50,   58, },
+        {   51,   59, }, {   52,   60, }, {   53,   61, }, {   53,   62, },
+        {   54,   63, }, {   55,   64, }, {   56,   65, }, {   57,   66, },
+        {   57,   67, }, {   58,   68, }, {   59,   69, }, {   60,   70, },
+        {   61,   71, }, {   62,   72, }, {   62,   73, }, {   63,   74, },
+        {   64,   75, }, {   65,   76, }, {   66,   77, }, {   66,   78, },
+        {   67,   79, }, {   68,   80, }, {   69,   81, }, {   70,   82, },
+        {   70,   83, }, {   71,   84, }, {   72,   85, }, {   73,   86, },
+        {   74,   87, }, {   74,   88, }, {   75,   89, }, {   76,   90, },
+        {   77,   91, }, {   78,   92, }, {   78,   93, }, {   79,   94, },
+        {   80,   95, }, {   81,   96, }, {   81,   97, }, {   82,   98, },
+        {   83,   99, }, {   84,  100, }, {   85,  101, }, {   85,  102, },
+        {   87,  104, }, {   88,  106, }, {   90,  108, }, {   92,  110, },
+        {   93,  112, }, {   95,  114, }, {   96,  116, }, {   98,  118, },
+        {   99,  120, }, {  101,  122, }, {  102,  124, }, {  104,  126, },
+        {  105,  128, }, {  107,  130, }, {  108,  132, }, {  110,  134, },
+        {  111,  136, }, {  113,  138, }, {  114,  140, }, {  116,  142, },
+        {  117,  144, }, {  118,  146, }, {  120,  148, }, {  121,  150, },
+        {  123,  152, }, {  125,  155, }, {  127,  158, }, {  129,  161, },
+        {  131,  164, }, {  134,  167, }, {  136,  170, }, {  138,  173, },
+        {  140,  176, }, {  142,  179, }, {  144,  182, }, {  146,  185, },
+        {  148,  188, }, {  150,  191, }, {  152,  194, }, {  154,  197, },
+        {  156,  200, }, {  158,  203, }, {  161,  207, }, {  164,  211, },
+        {  166,  215, }, {  169,  219, }, {  172,  223, }, {  174,  227, },
+        {  177,  231, }, {  180,  235, }, {  182,  239, }, {  185,  243, },
+        {  187,  247, }, {  190,  251, }, {  192,  255, }, {  195,  260, },
+        {  199,  265, }, {  202,  270, }, {  205,  275, }, {  208,  280, },
+        {  211,  285, }, {  214,  290, }, {  217,  295, }, {  220,  300, },
+        {  223,  305, }, {  226,  311, }, {  230,  317, }, {  233,  323, },
+        {  237,  329, }, {  240,  335, }, {  243,  341, }, {  247,  347, },
+        {  250,  353, }, {  253,  359, }, {  257,  366, }, {  261,  373, },
+        {  265,  380, }, {  269,  387, }, {  272,  394, }, {  276,  401, },
+        {  280,  408, }, {  284,  416, }, {  288,  424, }, {  292,  432, },
+        {  296,  440, }, {  300,  448, }, {  304,  456, }, {  309,  465, },
+        {  313,  474, }, {  317,  483, }, {  322,  492, }, {  326,  501, },
+        {  330,  510, }, {  335,  520, }, {  340,  530, }, {  344,  540, },
+        {  349,  550, }, {  354,  560, }, {  359,  571, }, {  364,  582, },
+        {  369,  593, }, {  374,  604, }, {  379,  615, }, {  384,  627, },
+        {  389,  639, }, {  395,  651, }, {  400,  663, }, {  406,  676, },
+        {  411,  689, }, {  417,  702, }, {  423,  715, }, {  429,  729, },
+        {  435,  743, }, {  441,  757, }, {  447,  771, }, {  454,  786, },
+        {  461,  801, }, {  467,  816, }, {  475,  832, }, {  482,  848, },
+        {  489,  864, }, {  497,  881, }, {  505,  898, }, {  513,  915, },
+        {  522,  933, }, {  530,  951, }, {  539,  969, }, {  549,  988, },
+        {  559, 1007, }, {  569, 1026, }, {  579, 1046, }, {  590, 1066, },
+        {  602, 1087, }, {  614, 1108, }, {  626, 1129, }, {  640, 1151, },
+        {  654, 1173, }, {  668, 1196, }, {  684, 1219, }, {  700, 1243, },
+        {  717, 1267, }, {  736, 1292, }, {  755, 1317, }, {  775, 1343, },
+        {  796, 1369, }, {  819, 1396, }, {  843, 1423, }, {  869, 1451, },
+        {  896, 1479, }, {  925, 1508, }, {  955, 1537, }, {  988, 1567, },
+        { 1022, 1597, }, { 1058, 1628, }, { 1098, 1660, }, { 1139, 1692, },
+        { 1184, 1725, }, { 1232, 1759, }, { 1282, 1793, }, { 1336, 1828, },
+    }, {
+        {    4,    4, }, {    9,    9, }, {   10,   11, }, {   13,   13, },
+        {   15,   16, }, {   17,   18, }, {   20,   21, }, {   22,   24, },
+        {   25,   27, }, {   28,   30, }, {   31,   33, }, {   34,   37, },
+        {   37,   40, }, {   40,   44, }, {   43,   48, }, {   47,   51, },
+        {   50,   55, }, {   53,   59, }, {   57,   63, }, {   60,   67, },
+        {   64,   71, }, {   68,   75, }, {   71,   79, }, {   75,   83, },
+        {   78,   88, }, {   82,   92, }, {   86,   96, }, {   90,  100, },
+        {   93,  105, }, {   97,  109, }, {  101,  114, }, {  105,  118, },
+        {  109,  122, }, {  113,  127, }, {  116,  131, }, {  120,  136, },
+        {  124,  140, }, {  128,  145, }, {  132,  149, }, {  136,  154, },
+        {  140,  158, }, {  143,  163, }, {  147,  168, }, {  151,  172, },
+        {  155,  177, }, {  159,  181, }, {  163,  186, }, {  166,  190, },
+        {  170,  195, }, {  174,  199, }, {  178,  204, }, {  182,  208, },
+        {  185,  213, }, {  189,  217, }, {  193,  222, }, {  197,  226, },
+        {  200,  231, }, {  204,  235, }, {  208,  240, }, {  212,  244, },
+        {  215,  249, }, {  219,  253, }, {  223,  258, }, {  226,  262, },
+        {  230,  267, }, {  233,  271, }, {  237,  275, }, {  241,  280, },
+        {  244,  284, }, {  248,  289, }, {  251,  293, }, {  255,  297, },
+        {  259,  302, }, {  262,  306, }, {  266,  311, }, {  269,  315, },
+        {  273,  319, }, {  276,  324, }, {  280,  328, }, {  283,  332, },
+        {  287,  337, }, {  290,  341, }, {  293,  345, }, {  297,  349, },
+        {  300,  354, }, {  304,  358, }, {  307,  362, }, {  310,  367, },
+        {  314,  371, }, {  317,  375, }, {  321,  379, }, {  324,  384, },
+        {  327,  388, }, {  331,  392, }, {  334,  396, }, {  337,  401, },
+        {  343,  409, }, {  350,  417, }, {  356,  425, }, {  362,  433, },
+        {  369,  441, }, {  375,  449, }, {  381,  458, }, {  387,  466, },
+        {  394,  474, }, {  400,  482, }, {  406,  490, }, {  412,  498, },
+        {  418,  506, }, {  424,  514, }, {  430,  523, }, {  436,  531, },
+        {  442,  539, }, {  448,  547, }, {  454,  555, }, {  460,  563, },
+        {  466,  571, }, {  472,  579, }, {  478,  588, }, {  484,  596, },
+        {  490,  604, }, {  499,  616, }, {  507,  628, }, {  516,  640, },
+        {  525,  652, }, {  533,  664, }, {  542,  676, }, {  550,  688, },
+        {  559,  700, }, {  567,  713, }, {  576,  725, }, {  584,  737, },
+        {  592,  749, }, {  601,  761, }, {  609,  773, }, {  617,  785, },
+        {  625,  797, }, {  634,  809, }, {  644,  825, }, {  655,  841, },
+        {  666,  857, }, {  676,  873, }, {  687,  889, }, {  698,  905, },
+        {  708,  922, }, {  718,  938, }, {  729,  954, }, {  739,  970, },
+        {  749,  986, }, {  759, 1002, }, {  770, 1018, }, {  782, 1038, },
+        {  795, 1058, }, {  807, 1078, }, {  819, 1098, }, {  831, 1118, },
+        {  844, 1138, }, {  856, 1158, }, {  868, 1178, }, {  880, 1198, },
+        {  891, 1218, }, {  906, 1242, }, {  920, 1266, }, {  933, 1290, },
+        {  947, 1314, }, {  961, 1338, }, {  975, 1362, }, {  988, 1386, },
+        { 1001, 1411, }, { 1015, 1435, }, { 1030, 1463, }, { 1045, 1491, },
+        { 1061, 1519, }, { 1076, 1547, }, { 1090, 1575, }, { 1105, 1603, },
+        { 1120, 1631, }, { 1137, 1663, }, { 1153, 1695, }, { 1170, 1727, },
+        { 1186, 1759, }, { 1202, 1791, }, { 1218, 1823, }, { 1236, 1859, },
+        { 1253, 1895, }, { 1271, 1931, }, { 1288, 1967, }, { 1306, 2003, },
+        { 1323, 2039, }, { 1342, 2079, }, { 1361, 2119, }, { 1379, 2159, },
+        { 1398, 2199, }, { 1416, 2239, }, { 1436, 2283, }, { 1456, 2327, },
+        { 1476, 2371, }, { 1496, 2415, }, { 1516, 2459, }, { 1537, 2507, },
+        { 1559, 2555, }, { 1580, 2603, }, { 1601, 2651, }, { 1624, 2703, },
+        { 1647, 2755, }, { 1670, 2807, }, { 1692, 2859, }, { 1717, 2915, },
+        { 1741, 2971, }, { 1766, 3027, }, { 1791, 3083, }, { 1817, 3143, },
+        { 1844, 3203, }, { 1871, 3263, }, { 1900, 3327, }, { 1929, 3391, },
+        { 1958, 3455, }, { 1990, 3523, }, { 2021, 3591, }, { 2054, 3659, },
+        { 2088, 3731, }, { 2123, 3803, }, { 2159, 3876, }, { 2197, 3952, },
+        { 2236, 4028, }, { 2276, 4104, }, { 2319, 4184, }, { 2363, 4264, },
+        { 2410, 4348, }, { 2458, 4432, }, { 2508, 4516, }, { 2561, 4604, },
+        { 2616, 4692, }, { 2675, 4784, }, { 2737, 4876, }, { 2802, 4972, },
+        { 2871, 5068, }, { 2944, 5168, }, { 3020, 5268, }, { 3102, 5372, },
+        { 3188, 5476, }, { 3280, 5584, }, { 3375, 5692, }, { 3478, 5804, },
+        { 3586, 5916, }, { 3702, 6032, }, { 3823, 6148, }, { 3953, 6268, },
+        { 4089, 6388, }, { 4236, 6512, }, { 4394, 6640, }, { 4559, 6768, },
+        { 4737, 6900, }, { 4929, 7036, }, { 5130, 7172, }, { 5347, 7312, },
+    }
+};
--- /dev/null
+++ b/src/dequant_tables.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_DEQUANT_TABLES_H__
+#define __DAV1D_SRC_DEQUANT_TABLES_H__
+
+#include <stdint.h>
+
+#include "src/levels.h"
+
+extern const uint16_t dav1d_dq_tbl[][QINDEX_RANGE][2];
+
+#endif /* __DAV1D_SRC_DEQUANT_TABLES_H__ */
--- /dev/null
+++ b/src/env.h
@@ -1,0 +1,754 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ENV_H__
+#define __DAV1D_SRC_ENV_H__
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "src/levels.h"
+#include "src/ref_mvs.h"
+#include "src/tables.h"
+
+typedef struct BlockContext {
+    uint8_t mode[32];
+    uint8_t lcoef[32];
+    uint8_t ccoef[2][32];
+    uint8_t seg_pred[32];
+    uint8_t skip[32];
+    uint8_t skip_mode[32];
+    uint8_t intra[32];
+    uint8_t comp_type[32];
+    int8_t ref[2][32]; // -1 means intra
+    uint8_t filter[2][32]; // 3 means unset
+    int8_t tx_intra[32];
+    int8_t tx[32];
+    uint8_t tx_lpf_y[32];
+    uint8_t tx_lpf_uv[32];
+    uint8_t partition[16];
+    uint8_t uvmode[32];
+    uint8_t pal_sz[32];
+} BlockContext;
+
+static inline int get_intra_ctx(const BlockContext *const a,
+                                const BlockContext *const l,
+                                const int yb4, const int xb4,
+                                const int have_top, const int have_left)
+{
+    if (have_left) {
+        if (have_top) {
+            const int ctx = l->intra[yb4] + a->intra[xb4];
+            return ctx + (ctx == 2);
+        } else
+            return l->intra[yb4] * 2;
+    } else {
+        return have_top ? a->intra[xb4] * 2 : 0;
+    }
+}
+
+static inline int get_tx_ctx(const BlockContext *const a,
+                             const BlockContext *const l,
+                             const TxfmInfo *const max_tx,
+                             const int yb4, const int xb4,
+                             const int have_top, const int have_left)
+{
+    return (l->tx_intra[yb4] >= max_tx->lh) + (a->tx_intra[xb4] >= max_tx->lw);
+}
+
+static inline int get_partition_ctx(const BlockContext *const a,
+                                    const BlockContext *const l,
+                                    const enum BlockLevel bl,
+                                    const int yb8, const int xb8)
+{
+    return ((a->partition[xb8] >> (4 - bl)) & 1) +
+          (((l->partition[yb8] >> (4 - bl)) & 1) << 1);
+}
+
+static inline unsigned cdf_element_prob(const uint16_t *const cdf, const int e) {
+    return (e > 0 ? cdf[e - 1] : 32768) - cdf[e];
+}
+
+static inline unsigned gather_left_partition_prob(const uint16_t *const in,
+                                                  const enum BlockLevel bl)
+{
+    unsigned out = 32768;
+    out -= cdf_element_prob(in, PARTITION_H);
+    if (bl != BL_128X128)
+        out -= cdf_element_prob(in, PARTITION_H4);
+    out -= cdf_element_prob(in, PARTITION_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_TOP_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_BOTTOM_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_LEFT_SPLIT);
+    return 32768 - out;
+}
+
+static inline unsigned gather_top_partition_prob(const uint16_t *const in,
+                                                 const enum BlockLevel bl)
+{
+    unsigned out = 32768;
+    out -= cdf_element_prob(in, PARTITION_V);
+    if (bl != BL_128X128)
+        out -= cdf_element_prob(in, PARTITION_V4);
+    out -= cdf_element_prob(in, PARTITION_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_TOP_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_LEFT_SPLIT);
+    out -= cdf_element_prob(in, PARTITION_T_RIGHT_SPLIT);
+    return 32768 - out;
+}
+
+static inline enum TxfmTypeSet get_ext_txtp_set(const enum RectTxfmSize tx,
+                                                const int inter,
+                                                const Av1FrameHeader *const hdr,
+                                                const int seg_id)
+{
+    if (hdr->segmentation.lossless[seg_id]) {
+        assert(tx == (int) TX_4X4);
+        return TXTP_SET_LOSSLESS;
+    }
+
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+
+    if (t_dim->max >= TX_64X64)
+        return TXTP_SET_DCT;
+
+    if (t_dim->max == TX_32X32)
+        return inter ? TXTP_SET_DCT_ID : TXTP_SET_DCT;
+
+    if (hdr->reduced_txtp_set)
+        return inter ? TXTP_SET_DCT_ID : TXTP_SET_DT4_ID;
+
+    const enum TxfmSize txsqsz = t_dim->min;
+
+    if (inter)
+        return txsqsz == TX_16X16 ? TXTP_SET_DT9_ID_1D : TXTP_SET_ALL;
+    else
+        return txsqsz == TX_16X16 ? TXTP_SET_DT4_ID : TXTP_SET_DT4_ID_1D;
+}
+
+static inline enum TxfmType get_uv_intra_txtp(const enum IntraPredMode uv_mode,
+                                              const enum RectTxfmSize tx,
+                                              const Av1FrameHeader *const hdr,
+                                              const int seg_id)
+{
+    if (hdr->segmentation.lossless[seg_id]) {
+        assert(tx == (int) TX_4X4);
+        return WHT_WHT;
+    }
+
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+
+    return t_dim->max == TX_32X32 ? DCT_DCT : av1_txtp_from_uvmode[uv_mode];
+}
+
+static inline enum TxfmType get_uv_inter_txtp(const TxfmInfo *const uvt_dim,
+                                              const enum TxfmType ytxtp,
+                                              const Av1FrameHeader *const hdr,
+                                              const int seg_id)
+{
+    if (hdr->segmentation.lossless[seg_id]) {
+        assert(uvt_dim->max == TX_4X4);
+        return WHT_WHT;
+    }
+
+    if (uvt_dim->max == TX_32X32)
+        return ytxtp == IDTX ? IDTX : DCT_DCT;
+    if (uvt_dim->min == TX_16X16 &&
+        ((1 << ytxtp) & ((1 << H_FLIPADST) | (1 << V_FLIPADST) |
+                         (1 << H_ADST) | (1 << V_ADST))))
+    {
+        return DCT_DCT;
+    }
+
+    return ytxtp;
+}
+
+static inline int get_filter_ctx(const BlockContext *const a,
+                                 const BlockContext *const l,
+                                 const int comp, const int dir, const int ref,
+                                 const int yb4, const int xb4)
+{
+    const int a_filter = (a->ref[0][xb4] == ref || a->ref[1][xb4] == ref) ?
+                         a->filter[dir][xb4] : N_SWITCHABLE_FILTERS;
+    const int l_filter = (l->ref[0][yb4] == ref || l->ref[1][yb4] == ref) ?
+                         l->filter[dir][yb4] : N_SWITCHABLE_FILTERS;
+
+    if (a_filter == l_filter) {
+        return comp * 4 + a_filter;
+    } else if (a_filter == N_SWITCHABLE_FILTERS) {
+        return comp * 4 + l_filter;
+    } else if (l_filter == N_SWITCHABLE_FILTERS) {
+        return comp * 4 + a_filter;
+    } else {
+        return comp * 4 + N_SWITCHABLE_FILTERS;
+    }
+}
+
+static inline int get_comp_ctx(const BlockContext *const a,
+                               const BlockContext *const l,
+                               const int yb4, const int xb4,
+                               const int have_top, const int have_left)
+{
+    if (have_top) {
+        if (have_left) {
+            if (a->comp_type[xb4]) {
+                if (l->comp_type[yb4]) {
+                    return 4;
+                } else {
+                    // 4U means intra (-1) or bwd (>= 4)
+                    return 2 + (l->ref[0][yb4] >= 4U);
+                }
+            } else if (l->comp_type[yb4]) {
+                // 4U means intra (-1) or bwd (>= 4)
+                return 2 + (a->ref[0][xb4] >= 4U);
+            } else {
+                return (l->ref[0][yb4] >= 4) ^ (a->ref[0][xb4] >= 4);
+            }
+        } else {
+            return a->comp_type[xb4] ? 3 : a->ref[0][xb4] >= 4;
+        }
+    } else if (have_left) {
+        return l->comp_type[yb4] ? 3 : l->ref[0][yb4] >= 4;
+    } else {
+        return 1;
+    }
+}
+
+static inline int get_comp_dir_ctx(const BlockContext *const a,
+                                   const BlockContext *const l,
+                                   const int yb4, const int xb4,
+                                   const int have_top, const int have_left)
+{
+#define has_uni_comp(edge, off) \
+    ((edge->ref[0][off] < 4) == (edge->ref[1][off] < 4))
+
+    if (have_top && have_left) {
+        const int a_intra = a->intra[xb4], l_intra = l->intra[yb4];
+
+        if (a_intra && l_intra) return 2;
+        if (a_intra || l_intra) {
+            const BlockContext *const edge = a_intra ? l : a;
+            const int off = a_intra ? yb4 : xb4;
+
+            if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
+            return 1 + 2 * has_uni_comp(edge, off);
+        }
+
+        const int a_comp = a->comp_type[xb4] != COMP_INTER_NONE;
+        const int l_comp = l->comp_type[yb4] != COMP_INTER_NONE;
+        const int a_ref0 = a->ref[0][xb4], l_ref0 = l->ref[0][yb4];
+
+        if (!a_comp && !l_comp) {
+            return 1 + 2 * ((a_ref0 >= 4) == (l_ref0 >= 4));
+        } else if (!a_comp || !l_comp) {
+            const BlockContext *const edge = a_comp ? a : l;
+            const int off = a_comp ? xb4 : yb4;
+
+            if (!has_uni_comp(edge, off)) return 1;
+            return 3 + ((a_ref0 >= 4) == (l_ref0 >= 4));
+        } else {
+            const int a_uni = has_uni_comp(a, xb4), l_uni = has_uni_comp(l, yb4);
+
+            if (!a_uni && !l_uni) return 0;
+            if (!a_uni || !l_uni) return 2;
+            return 3 + ((a_ref0 == 4) == (l_ref0 == 4));
+        }
+    } else if (have_top || have_left) {
+        const BlockContext *const edge = have_left ? l : a;
+        const int off = have_left ? yb4 : xb4;
+
+        if (edge->intra[off]) return 2;
+        if (edge->comp_type[off] == COMP_INTER_NONE) return 2;
+        return 4 * has_uni_comp(edge, off);
+    } else {
+        return 2;
+    }
+}
+
+static inline int get_poc_diff(const int order_hint_n_bits,
+                               const int poc0, const int poc1)
+{
+    const int mask = 1 << (order_hint_n_bits - 1);
+    const int diff = poc0 - poc1;
+    return (diff & (mask - 1)) - (diff & mask);
+}
+
+static inline int get_jnt_comp_ctx(const int order_hint_n_bits,
+                                   const unsigned poc, const unsigned ref0poc,
+                                   const unsigned ref1poc,
+                                   const BlockContext *const a,
+                                   const BlockContext *const l,
+                                   const int yb4, const int xb4)
+{
+    const unsigned d0 = abs(get_poc_diff(order_hint_n_bits, ref0poc, poc));
+    const unsigned d1 = abs(get_poc_diff(order_hint_n_bits, ref1poc, poc));
+    const int offset = d0 == d1;
+    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_AVG ||
+                      a->ref[0][xb4] == 6;
+    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_AVG ||
+                      l->ref[0][yb4] == 6;
+
+    return 3 * offset + a_ctx + l_ctx;
+}
+
+static inline int get_mask_comp_ctx(const BlockContext *const a,
+                                    const BlockContext *const l,
+                                    const int yb4, const int xb4)
+{
+    const int a_ctx = a->comp_type[xb4] >= COMP_INTER_SEG ? 1 :
+                      a->ref[0][xb4] == 6 ? 3 : 0;
+    const int l_ctx = l->comp_type[yb4] >= COMP_INTER_SEG ? 1 :
+                      l->ref[0][yb4] == 6 ? 3 : 0;
+
+    return imin(a_ctx + l_ctx, 5);
+}
+
+#define av1_get_ref_2_ctx av1_get_bwd_ref_ctx
+#define av1_get_ref_3_ctx av1_get_fwd_ref_ctx
+#define av1_get_ref_4_ctx av1_get_fwd_ref_1_ctx
+#define av1_get_ref_5_ctx av1_get_fwd_ref_2_ctx
+#define av1_get_ref_6_ctx av1_get_bwd_ref_1_ctx
+#define av1_get_uni_p_ctx av1_get_ref_ctx
+#define av1_get_uni_p2_ctx av1_get_fwd_ref_2_ctx
+
+static inline int av1_get_ref_ctx(const BlockContext *const a,
+                                  const BlockContext *const l,
+                                  const int yb4, const int xb4,
+                                  int have_top, int have_left)
+{
+    int cnt[2] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        cnt[a->ref[0][xb4] >= 4]++;
+        if (a->comp_type[xb4]) cnt[a->ref[1][xb4] >= 4]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        cnt[l->ref[0][yb4] >= 4]++;
+        if (l->comp_type[yb4]) cnt[l->ref[1][yb4] >= 4]++;
+    }
+
+    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_ctx(const BlockContext *const a,
+                                      const BlockContext *const l,
+                                      const int yb4, const int xb4,
+                                      const int have_top, const int have_left)
+{
+    int cnt[4] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] < 4) cnt[a->ref[0][xb4]]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] < 4) cnt[a->ref[1][xb4]]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] < 4) cnt[l->ref[0][yb4]]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] < 4) cnt[l->ref[1][yb4]]++;
+    }
+
+    cnt[0] += cnt[1];
+    cnt[2] += cnt[3];
+
+    return cnt[0] == cnt[2] ? 1 : cnt[0] < cnt[2] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_1_ctx(const BlockContext *const a,
+                                        const BlockContext *const l,
+                                        const int yb4, const int xb4,
+                                        const int have_top, const int have_left)
+{
+    int cnt[2] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] < 2) cnt[a->ref[0][xb4]]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] < 2) cnt[a->ref[1][xb4]]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] < 2) cnt[l->ref[0][yb4]]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] < 2) cnt[l->ref[1][yb4]]++;
+    }
+
+    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_fwd_ref_2_ctx(const BlockContext *const a,
+                                        const BlockContext *const l,
+                                        const int yb4, const int xb4,
+                                        const int have_top, const int have_left)
+{
+    int cnt[2] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] - 2U < 2) cnt[a->ref[0][xb4] - 2]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] - 2U < 2) cnt[a->ref[1][xb4] - 2]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] - 2U < 2) cnt[l->ref[0][yb4] - 2]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] - 2U < 2) cnt[l->ref[1][yb4] - 2]++;
+    }
+
+    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_bwd_ref_ctx(const BlockContext *const a,
+                                      const BlockContext *const l,
+                                      const int yb4, const int xb4,
+                                      const int have_top, const int have_left)
+{
+    int cnt[3] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
+    }
+
+    cnt[1] += cnt[0];
+
+    return cnt[2] == cnt[1] ? 1 : cnt[1] < cnt[2] ? 0 : 2;
+}
+
+static inline int av1_get_bwd_ref_1_ctx(const BlockContext *const a,
+                                        const BlockContext *const l,
+                                        const int yb4, const int xb4,
+                                        const int have_top, const int have_left)
+{
+    int cnt[3] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] >= 4) cnt[a->ref[0][xb4] - 4]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] >= 4) cnt[a->ref[1][xb4] - 4]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] >= 4) cnt[l->ref[0][yb4] - 4]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] >= 4) cnt[l->ref[1][yb4] - 4]++;
+    }
+
+    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int av1_get_uni_p1_ctx(const BlockContext *const a,
+                                     const BlockContext *const l,
+                                     const int yb4, const int xb4,
+                                     const int have_top, const int have_left)
+{
+    int cnt[3] = { 0 };
+
+    if (have_top && !a->intra[xb4]) {
+        if (a->ref[0][xb4] - 1U < 3) cnt[a->ref[0][xb4] - 1]++;
+        if (a->comp_type[xb4] && a->ref[1][xb4] - 1U < 3) cnt[a->ref[1][xb4] - 1]++;
+    }
+
+    if (have_left && !l->intra[yb4]) {
+        if (l->ref[0][yb4] - 1U < 3) cnt[l->ref[0][yb4] - 1]++;
+        if (l->comp_type[yb4] && l->ref[1][yb4] - 1U < 3) cnt[l->ref[1][yb4] - 1]++;
+    }
+
+    cnt[1] += cnt[2];
+
+    return cnt[0] == cnt[1] ? 1 : cnt[0] < cnt[1] ? 0 : 2;
+}
+
+static inline int get_drl_context(const candidate_mv *const ref_mv_stack,
+                                  const int ref_idx)
+{
+    if (ref_mv_stack[ref_idx].weight >= 640 &&
+        ref_mv_stack[ref_idx + 1].weight >= 640)
+        return 0;
+
+    if (ref_mv_stack[ref_idx].weight >= 640 &&
+        ref_mv_stack[ref_idx + 1].weight < 640)
+        return 1;
+
+    if (ref_mv_stack[ref_idx].weight < 640 &&
+        ref_mv_stack[ref_idx + 1].weight < 640)
+        return 2;
+
+    return 0;
+}
+
+static inline unsigned get_cur_frame_segid(const int by, const int bx,
+                                           const int have_top,
+                                           const int have_left,
+                                           int *const seg_ctx,
+                                           const uint8_t *cur_seg_map,
+                                           const ptrdiff_t stride)
+{
+    cur_seg_map += bx + by * stride;
+    if (have_left && have_top) {
+        const int l = cur_seg_map[-1];
+        const int a = cur_seg_map[-stride];
+        const int al = cur_seg_map[-(stride + 1)];
+
+        if (l == a && al == l) *seg_ctx = 2;
+        else if (l == a || al == l || a == al) *seg_ctx = 1;
+        else *seg_ctx = 0;
+        return a == al ? a : l;
+    } else {
+        *seg_ctx = 0;
+        return have_left ? cur_seg_map[-1] : have_top ? cur_seg_map[-stride] : 0;
+    }
+}
+
+static inline int get_coef_skip_ctx(const TxfmInfo *const t_dim,
+                                    const enum BlockSize bs,
+                                    const uint8_t *const a,
+                                    const uint8_t *const l,
+                                    const int chroma,
+                                    const enum Dav1dPixelLayout layout)
+{
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+
+    if (chroma) {
+        const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int not_one_blk = b_dim[2] - (!!b_dim[2] && ss_hor) > t_dim->lw ||
+                                b_dim[3] - (!!b_dim[3] && ss_ver) > t_dim->lh;
+        int ca, cl;
+
+#define MERGE_CTX(dir, type, mask) \
+        c##dir = !!((*(const type *) dir) & mask); \
+        break
+        switch (t_dim->lw) {
+        case TX_4X4:   MERGE_CTX(a, uint8_t,  0x3F);
+        case TX_8X8:   MERGE_CTX(a, uint16_t, 0x3F3F);
+        case TX_16X16: MERGE_CTX(a, uint32_t, 0x3F3F3F3FU);
+        case TX_32X32: MERGE_CTX(a, uint64_t, 0x3F3F3F3F3F3F3F3FULL);
+        default: abort();
+        }
+        switch (t_dim->lh) {
+        case TX_4X4:   MERGE_CTX(l, uint8_t,  0x3F);
+        case TX_8X8:   MERGE_CTX(l, uint16_t, 0x3F3F);
+        case TX_16X16: MERGE_CTX(l, uint32_t, 0x3F3F3F3FU);
+        case TX_32X32: MERGE_CTX(l, uint64_t, 0x3F3F3F3F3F3F3F3FULL);
+        default: abort();
+        }
+#undef MERGE_CTX
+
+        return 7 + not_one_blk * 3 + ca + cl;
+    } else if (b_dim[2] == t_dim->lw && b_dim[3] == t_dim->lh) {
+        return 0;
+    } else {
+        static const uint8_t skip_contexts[5][5] = {
+            { 1, 2, 2, 2, 3 },
+            { 1, 4, 4, 4, 5 },
+            { 1, 4, 4, 4, 5 },
+            { 1, 4, 4, 4, 5 },
+            { 1, 4, 4, 4, 6 }
+        };
+        uint64_t la, ll;
+
+#define MERGE_CTX(dir, type, tx) do { \
+            l##dir = *(const type *) dir; \
+            if (tx == TX_64X64) \
+                l##dir |= *(const type *) &dir[sizeof(type)]; \
+            if (tx >= TX_32X32) l##dir |= l##dir >> 32; \
+            if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
+            if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
+            l##dir &= 0x3F; \
+        } while (0); \
+        break
+        switch (t_dim->lw) {
+        case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4);
+        case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8);
+        case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16);
+        case TX_32X32: MERGE_CTX(a, uint64_t, TX_32X32);
+        case TX_64X64: MERGE_CTX(a, uint64_t, TX_64X64);
+        }
+        switch (t_dim->lh) {
+        case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4);
+        case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8);
+        case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16);
+        case TX_32X32: MERGE_CTX(l, uint64_t, TX_32X32);
+        case TX_64X64: MERGE_CTX(l, uint64_t, TX_64X64);
+        }
+#undef MERGE_CTX
+
+        const int max = imin(la | ll, 4);
+        const int min = imin(imin(la, ll), 4);
+
+        return skip_contexts[min][max];
+    }
+}
+
+static inline int get_coef_nz_ctx(uint8_t *const levels, const int scan_idx,
+                                  const int rc, const int is_eob,
+                                  const enum RectTxfmSize tx,
+                                  const enum TxClass tx_class)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+
+    if (is_eob) {
+        if (scan_idx == 0)         return 0;
+        const int eighth_sz = imin(t_dim->w, 8) * imin(t_dim->h, 8) * 2;
+        if (scan_idx <= eighth_sz) return 1;
+        const int quart_sz = eighth_sz * 2;
+        if (scan_idx <= quart_sz)  return 2;
+        return 3;
+    }
+
+    const int x = rc >> (2 + imin(t_dim->lh, 3));
+    const int y = rc & (4 * imin(t_dim->h, 8) - 1);
+    const ptrdiff_t stride = 4 * (imin(t_dim->h, 8) + 1);
+    static const uint8_t offsets[3][5][2 /* x, y */] = {
+        [TX_CLASS_2D] = {
+            { 0, 1 }, { 1, 0 }, { 2, 0 }, { 0, 2 }, { 1, 1 }
+        }, [TX_CLASS_V] = {
+            { 0, 1 }, { 1, 0 }, { 0, 2 }, { 0, 3 }, { 0, 4 }
+        }, [TX_CLASS_H] = {
+            { 0, 1 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }
+        }
+    };
+    const uint8_t (*const off)[2] = offsets[tx_class];
+    int mag = 0;
+    for (int i = 0; i < 5; i++)
+        mag += imin(levels[(x + off[i][0]) * stride + (y + off[i][1])], 3);
+    const int ctx = imin((mag + 1) >> 1, 4);
+    if (tx_class == TX_CLASS_2D) {
+        return !rc ? 0 :
+            av1_nz_map_ctx_offset[tx][imin(y, 4)][imin(x, 4)] + ctx;
+    } else {
+        return 26 + imin((tx_class == TX_CLASS_V) ? y : x, 2) * 5 + ctx;
+    }
+}
+
+static inline int get_dc_sign_ctx(const TxfmInfo *const t_dim,
+                                  const uint8_t *const a,
+                                  const uint8_t *const l)
+{
+    uint64_t sa, sl;
+
+#define MERGE_CTX(dir, type, tx, mask) do { \
+        s##dir = ((*(const type *) dir) >> 6) & mask; \
+        if (tx == TX_64X64) \
+            s##dir += ((*(const type *) &dir[sizeof(type)]) >> 6) & mask; \
+        if (tx >= TX_32X32) s##dir += s##dir >> 32; \
+        if (tx >= TX_16X16) s##dir += s##dir >> 16; \
+        if (tx >= TX_8X8)   s##dir += s##dir >> 8; \
+    } while (0); \
+    break
+    switch (t_dim->lw) {
+    case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4,   0x03);
+    case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8,   0x0303);
+    case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16, 0x03030303U);
+    case TX_32X32: MERGE_CTX(a, uint64_t, TX_32X32, 0x0303030303030303ULL);
+    case TX_64X64: MERGE_CTX(a, uint64_t, TX_64X64, 0x0303030303030303ULL);
+    }
+    switch (t_dim->lh) {
+    case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4,   0x03);
+    case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8,   0x0303);
+    case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16, 0x03030303U);
+    case TX_32X32: MERGE_CTX(l, uint64_t, TX_32X32, 0x0303030303030303ULL);
+    case TX_64X64: MERGE_CTX(l, uint64_t, TX_64X64, 0x0303030303030303ULL);
+    }
+#undef MERGE_CTX
+    const int s = ((int) ((sa + sl) & 0xFF)) - (t_dim->w + t_dim->h);
+
+    return s < 0 ? 1 : s > 0 ? 2 : 0;
+}
+
+static inline int get_br_ctx(const uint8_t *const levels,
+                             const int rc, const enum RectTxfmSize tx,
+                             const enum TxClass tx_class)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+    const int x = rc >> (imin(t_dim->lh, 3) + 2);
+    const int y = rc & (4 * imin(t_dim->h, 8) - 1);
+    const int stride = 4 * (imin(t_dim->h, 8) + 1);
+    int mag = 0;
+    static const uint8_t offsets_from_txclass[3][3][2] = {
+        [TX_CLASS_2D] = { { 0, 1 }, { 1, 0 }, { 1, 1 } },
+        [TX_CLASS_H]  = { { 0, 1 }, { 1, 0 }, { 0, 2 } },
+        [TX_CLASS_V]  = { { 0, 1 }, { 1, 0 }, { 2, 0 } }
+    };
+    const uint8_t (*const offsets)[2] = offsets_from_txclass[tx_class];
+    for (int i = 0; i < 3; i++)
+        mag += levels[(x + offsets[i][1]) * stride + y + offsets[i][0]];
+
+    mag = imin((mag + 1) >> 1, 6);
+    if (rc == 0) return mag;
+    switch (tx_class) {
+    case TX_CLASS_2D:
+        if (y < 2 && x < 2) return mag + 7;
+        break;
+    case TX_CLASS_H:
+        if (x == 0) return mag + 7;
+        break;
+    case TX_CLASS_V:
+        if (y == 0) return mag + 7;
+        break;
+    }
+    return mag + 14;
+}
+
+static inline mv get_gmv_2d(const WarpedMotionParams *const gmv,
+                            const int bx4, const int by4,
+                            const int bw4, const int bh4,
+                            const Av1FrameHeader *const hdr)
+{
+    switch (gmv->type) {
+    case WM_TYPE_ROT_ZOOM:
+        assert(gmv->matrix[5] ==  gmv->matrix[2]);
+        assert(gmv->matrix[4] == -gmv->matrix[3]);
+        // fall-through
+    default:
+    case WM_TYPE_AFFINE: {
+        const int x = bx4 * 4 + bw4 * 2 - 1;
+        const int y = by4 * 4 + bh4 * 2 - 1;
+        const int xc = (gmv->matrix[2] - (1 << 16)) * x +
+                       gmv->matrix[3] * y + gmv->matrix[0];
+        const int yc = (gmv->matrix[5] - (1 << 16)) * y +
+                       gmv->matrix[4] * x + gmv->matrix[1];
+        const int shift = 16 - (3 - !hdr->hp);
+        const int round = (1 << shift) >> 1;
+        return (mv) {
+            .y = apply_sign(((abs(yc) + round) >> shift) << !hdr->hp, yc),
+            .x = apply_sign(((abs(xc) + round) >> shift) << !hdr->hp, xc),
+        };
+    }
+    case WM_TYPE_TRANSLATION:
+        return (mv) {
+            .y = gmv->matrix[0] >> 13,
+            .x = gmv->matrix[1] >> 13,
+        };
+    case WM_TYPE_IDENTITY:
+        return (mv) { .x = 0, .y = 0 };
+    }
+}
+
+#endif /* __DAV1D_SRC_ENV_H__ */
--- /dev/null
+++ b/src/getbits.c
@@ -1,0 +1,129 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+
+#include "common/intops.h"
+
+#include "src/getbits.h"
+
+void init_get_bits(GetBits *const c,
+                   const uint8_t *const data, const size_t sz)
+{
+    c->ptr = c->ptr_start = data;
+    c->ptr_end = &c->ptr_start[sz];
+    c->bits_left = 0;
+    c->state = 0;
+    c->error = 0;
+    c->eof = 0;
+}
+
+static void refill(GetBits *const c, const unsigned n) {
+    assert(c->bits_left <= 56);
+    uint64_t state = 0;
+    do {
+        state <<= 8;
+        c->bits_left += 8;
+        if (!c->eof)
+            state |= *c->ptr++;
+        if (c->ptr >= c->ptr_end) {
+            c->error = c->eof;
+            c->eof = 1;
+        }
+    } while (n > c->bits_left);
+    c->state |= state << (64 - c->bits_left);
+}
+
+unsigned get_bits(GetBits *const c, const unsigned n) {
+    assert(n <= 32 /* can go up to 57 if we change return type */);
+
+    if (n > c->bits_left) refill(c, n);
+
+    const uint64_t state = c->state;
+    c->bits_left -= n;
+    c->state <<= n;
+
+    return state >> (64 - n);
+}
+
+int get_sbits(GetBits *const c, const unsigned n) {
+    const int shift = 31 - n;
+    const int res = get_bits(c, n + 1) << shift;
+    return res >> shift;
+}
+
+unsigned get_uniform(GetBits *const c, const unsigned n) {
+    assert(n > 0);
+    const int l = ulog2(n) + 1;
+    assert(l > 0);
+    const int m = (1 << l) - n;
+    const int v = get_bits(c, l - 1);
+    return v < m ? v : (v << 1) - m + get_bits(c, 1);
+}
+
+unsigned get_vlc(GetBits *const c) {
+    int n_bits = 0;
+    while (!get_bits(c, 1)) n_bits++;
+    if (n_bits >= 32) return 0xFFFFFFFFU;
+    return ((1 << n_bits) - 1) + get_bits(c, n_bits);
+}
+
+static unsigned get_bits_subexp_u(GetBits *const c, const unsigned ref,
+                                  const unsigned n)
+{
+    unsigned v = 0;
+
+    for (int i = 0;; i++) {
+        const int b = i ? 3 + i - 1 : 3;
+
+        if (n < v + 3 * (1 << b)) {
+            v += get_uniform(c, n - v + 1);
+            break;
+        }
+
+        if (!get_bits(c, 1)) {
+            v += get_bits(c, b);
+            break;
+        }
+
+        v += 1 << b;
+    }
+
+    return ref * 2 <= n ? inv_recenter(ref, v) : n - inv_recenter(n - ref, v);
+}
+
+int get_bits_subexp(GetBits *const c, const int ref, const unsigned n) {
+    return (int) get_bits_subexp_u(c, ref + (1 << n), 2 << n) - (1 << n);
+}
+
+const uint8_t *flush_get_bits(GetBits *c) {
+    c->bits_left = 0;
+    c->state = 0;
+    return c->ptr;
+}
--- /dev/null
+++ b/src/getbits.h
@@ -1,0 +1,49 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_GETBITS_H__
+#define __DAV1D_SRC_GETBITS_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct GetBits {
+    int error, eof;
+    uint64_t state;
+    unsigned bits_left;
+    const uint8_t *ptr, *ptr_start, *ptr_end;
+} GetBits;
+
+void init_get_bits(GetBits *c, const uint8_t *data, size_t sz);
+unsigned get_bits(GetBits *c, unsigned n);
+int get_sbits(GetBits *c, unsigned n);
+unsigned get_uniform(GetBits *c, unsigned range);
+unsigned get_vlc(GetBits *c);
+int get_bits_subexp(GetBits *c, int ref, unsigned n);
+const uint8_t *flush_get_bits(GetBits *c);
+
+#endif /* __DAV1D_SRC_GETBITS_H__ */
--- /dev/null
+++ b/src/internal.h
@@ -1,0 +1,275 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_INTERNAL_H__
+#define __DAV1D_SRC_INTERNAL_H__
+
+#include <pthread.h>
+#include <stdatomic.h>
+
+#include "dav1d/data.h"
+
+typedef struct Dav1dFrameContext Dav1dFrameContext;
+typedef struct Dav1dTileState Dav1dTileState;
+typedef struct Dav1dTileContext Dav1dTileContext;
+
+#include "common/attributes.h"
+
+#include "src/cdef.h"
+#include "src/cdf.h"
+#include "src/env.h"
+#include "src/intra_edge.h"
+#include "src/ipred.h"
+#include "src/itx.h"
+#include "src/levels.h"
+#include "src/lf_mask.h"
+#include "src/loopfilter.h"
+#include "src/looprestoration.h"
+#include "src/mc.h"
+#include "src/msac.h"
+#include "src/picture.h"
+#include "src/recon.h"
+#include "src/ref_mvs.h"
+
+typedef struct Dav1dDSPContext {
+    Dav1dIntraPredDSPContext ipred;
+    Dav1dMCDSPContext mc;
+    Dav1dInvTxfmDSPContext itx;
+    Dav1dLoopFilterDSPContext lf;
+    Dav1dCdefDSPContext cdef;
+    Dav1dLoopRestorationDSPContext lr;
+} Dav1dDSPContext;
+
+struct Dav1dContext {
+    Dav1dFrameContext *fc;
+    int n_fc;
+
+    // cache of OBUs that make up a single frame before we submit them
+    // to a frame worker to be decoded
+    struct {
+        Dav1dData data;
+        int start, end;
+    } tile[256];
+    int n_tile_data, have_seq_hdr, have_frame_hdr;
+    unsigned tile_mask;
+    Av1SequenceHeader seq_hdr; // FIXME make ref?
+    Av1FrameHeader frame_hdr; // FIXME make ref?
+
+    // decoded output picture queue
+    Dav1dPicture out;
+    struct {
+        Dav1dThreadPicture *out_delayed;
+        unsigned next;
+    } frame_thread;
+
+    // reference/entropy state
+    struct {
+        Dav1dThreadPicture p;
+        Dav1dRef *segmap;
+        Av1SegmentationDataSet seg_data;
+        Dav1dRef *refmvs;
+        unsigned refpoc[7];
+        WarpedMotionParams gmv[7];
+        Av1LoopfilterModeRefDeltas lf_mode_ref_deltas;
+        Av1FilmGrainData film_grain;
+    } refs[8];
+    CdfThreadContext cdf[8];
+
+    Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
+
+    // tree to keep track of which edges are available
+    struct {
+        EdgeNode *root[2 /* BL_128X128 vs. BL_64X64 */];
+        EdgeBranch branch_sb128[1 + 4 + 16 + 64];
+        EdgeBranch branch_sb64[1 + 4 + 16];
+        EdgeTip tip_sb128[256];
+        EdgeTip tip_sb64[64];
+    } intra_edge;
+};
+
+struct Dav1dFrameContext {
+    Av1SequenceHeader seq_hdr;
+    Av1FrameHeader frame_hdr;
+    Dav1dThreadPicture refp[7], cur;
+    Dav1dRef *mvs_ref;
+    refmvs *mvs, *ref_mvs[7];
+    Dav1dRef *ref_mvs_ref[7];
+    Dav1dRef *cur_segmap_ref, *prev_segmap_ref;
+    uint8_t *cur_segmap;
+    const uint8_t *prev_segmap;
+    unsigned refpoc[7], refrefpoc[7][7];
+    CdfThreadContext in_cdf, out_cdf;
+    struct {
+        Dav1dData data;
+        int start, end;
+    } tile[256];
+    int n_tile_data;
+
+    const Dav1dContext *c;
+    Dav1dTileContext *tc;
+    int n_tc;
+    Dav1dTileState *ts;
+    int n_ts;
+    const Dav1dDSPContext *dsp;
+    struct {
+        recon_b_intra_fn recon_b_intra;
+        recon_b_inter_fn recon_b_inter;
+        filter_sbrow_fn filter_sbrow;
+        backup_ipred_edge_fn backup_ipred_edge;
+        read_coef_blocks_fn read_coef_blocks;
+    } bd_fn;
+
+    int ipred_edge_sz;
+    pixel *ipred_edge[3];
+    ptrdiff_t b4_stride;
+    int bw, bh, sb128w, sb128h, sbh, sb_shift, sb_step;
+    uint16_t dq[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];
+    const uint8_t *qm[2 /* is_1d */][N_RECT_TX_SIZES][3 /* plane */];
+    BlockContext *a;
+    int a_sz /* w*tile_rows */;
+    AV1_COMMON *libaom_cm; // FIXME
+    uint8_t jnt_weights[7][7];
+
+    struct {
+        struct thread_data td;
+        int pass, die;
+        // indexed using t->by * f->b4_stride + t->bx
+        Av1Block *b;
+        struct CodedBlockInfo {
+            int16_t eob[3 /* plane */];
+            uint8_t txtp[3 /* plane */];
+        } *cbi;
+        int8_t *txtp;
+        // indexed using (t->by >> 1) * (f->b4_stride >> 1) + (t->bx >> 1)
+        uint16_t (*pal)[3 /* plane */][8 /* idx */];
+        // iterated over inside tile state
+        uint8_t *pal_idx;
+        coef *cf;
+        // start offsets per tile
+        int *tile_start_off;
+    } frame_thread;
+
+    // loopfilter
+    struct {
+        uint8_t (*level)[4];
+        Av1Filter *mask;
+        int top_pre_cdef_toggle;
+        int mask_sz /* w*h */, line_sz /* w */, re_sz /* h */;
+        Av1FilterLUT lim_lut;
+        int last_sharpness;
+        uint8_t lvl[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
+        uint8_t *tx_lpf_right_edge[2];
+        pixel *cdef_line;
+        pixel *cdef_line_ptr[2 /* pre, post */][3 /* plane */][2 /* y */];
+        pixel *lr_lpf_line;
+        pixel *lr_lpf_line_ptr[3 /* plane */];
+
+        // in-loop filter per-frame state keeping
+        int tile_row; // for carry-over at tile row edges
+        pixel *p[3];
+        Av1Filter *mask_ptr, *prev_mask_ptr;
+    } lf;
+
+    // threading (refer to tc[] for per-thread things)
+    struct FrameTileThreadData {
+        uint64_t available;
+        pthread_mutex_t lock;
+        pthread_cond_t cond, icond;
+        int tasks_left, num_tasks;
+        int (*task_idx_to_sby_and_tile_idx)[2];
+        int titsati_sz, titsati_init[2];
+    } tile_thread;
+};
+
+struct Dav1dTileState {
+    struct {
+        int col_start, col_end, row_start, row_end; // in 4px units
+        int col, row; // in tile units
+    } tiling;
+
+    CdfContext cdf;
+    MsacContext msac;
+
+    atomic_int progress; // in sby units
+    struct {
+        pthread_mutex_t lock;
+        pthread_cond_t cond;
+    } tile_thread;
+    struct {
+        uint8_t *pal_idx;
+        coef *cf;
+    } frame_thread;
+
+    uint16_t dqmem[NUM_SEGMENTS][3 /* plane */][2 /* dc/ac */];
+    const uint16_t (*dq)[3][2];
+    int last_qidx;
+
+    int8_t last_delta_lf[4];
+    uint8_t lflvlmem[8 /* seg_id */][4 /* dir */][8 /* ref */][2 /* is_gmv */];
+    const uint8_t (*lflvl)[4][8][2];
+
+    Av1RestorationUnit *lr_ref[3];
+};
+
+struct Dav1dTileContext {
+    const Dav1dFrameContext *f;
+    Dav1dTileState *ts;
+    int bx, by;
+    BlockContext l, *a;
+    coef *cf;
+    pixel *emu_edge; // stride=160
+    // FIXME types can be changed to pixel (and dynamically allocated)
+    // which would make copy/assign operations slightly faster?
+    uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
+    uint16_t pal[3 /* plane */][8 /* palette_idx */];
+    uint8_t pal_sz_uv[2 /* a/l */][32 /* bx4/by4 */];
+    uint8_t txtp_map[32 * 32]; // inter-only
+    WarpedMotionParams warpmv;
+    union {
+        void *mem;
+        uint8_t *pal_idx;
+        int16_t *ac;
+        pixel *interintra, *lap;
+        coef *compinter;
+    } scratch;
+    ALIGN(uint8_t scratch_seg_mask[128 * 128], 32);
+
+    Av1Filter *lf_mask;
+    int8_t *cur_sb_cdef_idx_ptr;
+    // for chroma sub8x8, we need to know the filter for all 4 subblocks in
+    // a 4x4 area, but the top/left one can go out of cache already, so this
+    // keeps it accessible
+    enum Filter2d tl_4x4_filter;
+
+    struct {
+        struct thread_data td;
+        struct FrameTileThreadData *fttd;
+        int die;
+    } tile_thread;
+};
+
+#endif /* __DAV1D_SRC_INTERNAL_H__ */
--- /dev/null
+++ b/src/intra_edge.c
@@ -1,0 +1,166 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+
+#include "src/intra_edge.h"
+#include "src/levels.h"
+
+struct ModeSelMem {
+    EdgeBranch *nwc[3 /* 64x64, 32x32, 16x16 */];
+    EdgeTip *nt;
+};
+
+static void init_edges(EdgeNode *const node,
+                       const enum BlockLevel bl,
+                       const enum EdgeFlags edge_flags)
+{
+    node->o = edge_flags;
+
+#define ALL_FL(t) (EDGE_I444_##t | EDGE_I422_##t | EDGE_I420_##t)
+    if (bl == BL_8X8) {
+        EdgeTip *const nt = (EdgeTip *) node;
+
+        node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
+        node->h[1] = edge_flags & (ALL_FL(LEFT_HAS_BOTTOM) |
+                                   EDGE_I420_TOP_HAS_RIGHT);
+
+        node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
+        node->v[1] = edge_flags & (ALL_FL(TOP_HAS_RIGHT) |
+                                   EDGE_I420_LEFT_HAS_BOTTOM |
+                                   EDGE_I422_LEFT_HAS_BOTTOM);
+
+        nt->split[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
+        nt->split[1] = (edge_flags & ALL_FL(TOP_HAS_RIGHT)) |
+                       EDGE_I422_LEFT_HAS_BOTTOM;
+        nt->split[2] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
+        nt->split[3] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
+                                     EDGE_I420_LEFT_HAS_BOTTOM |
+                                     EDGE_I422_LEFT_HAS_BOTTOM);
+    } else {
+        EdgeBranch *const nwc = (EdgeBranch *) node;
+
+        node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
+        node->h[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
+
+        node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
+        node->v[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
+
+        nwc->h4[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->h4[1] =
+        nwc->h4[2] = ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->h4[3] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
+        if (bl == BL_16X16)
+            nwc->h4[1] |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
+
+        nwc->v4[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
+        nwc->v4[1] =
+        nwc->v4[2] = ALL_FL(TOP_HAS_RIGHT);
+        nwc->v4[3] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
+        if (bl == BL_16X16)
+            nwc->v4[1] |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
+                                        EDGE_I422_LEFT_HAS_BOTTOM);
+
+        nwc->tls[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->tls[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->tls[2] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
+
+        nwc->trs[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
+        nwc->trs[1] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->trs[2] = 0;
+
+        nwc->tts[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->tts[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
+        nwc->tts[2] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
+
+        nwc->tbs[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
+        nwc->tbs[1] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
+        nwc->tbs[2] = 0;
+    }
+}
+
+static void init_mode_node(EdgeBranch *const nwc,
+                           const enum BlockLevel bl,
+                           struct ModeSelMem *const mem,
+                           const int top_has_right,
+                           const int left_has_bottom)
+{
+    int n;
+
+    init_edges(&nwc->node, bl,
+               (top_has_right ? ALL_FL(TOP_HAS_RIGHT) : 0) |
+               (left_has_bottom ? ALL_FL(LEFT_HAS_BOTTOM) : 0));
+    if (bl == BL_16X16) {
+        for (n = 0; n < 4; n++) {
+            EdgeTip *const nt = mem->nt++;
+            nwc->split[n] = &nt->node;
+            init_edges(&nt->node, bl + 1,
+                       ((n == 3 || (n == 1 && !top_has_right)) ? 0 :
+                        ALL_FL(TOP_HAS_RIGHT)) |
+                       (!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
+                        ALL_FL(LEFT_HAS_BOTTOM)));
+        }
+    } else {
+        for (n = 0; n < 4; n++) {
+            EdgeBranch *const nwc_child = mem->nwc[bl]++;
+            nwc->split[n] = &nwc_child->node;
+            init_mode_node(nwc_child, bl + 1, mem,
+                           !(n == 3 || (n == 1 && !top_has_right)),
+                           n == 0 || (n == 2 && left_has_bottom));
+        }
+    }
+}
+
+void init_mode_tree(EdgeNode *const root_node, EdgeTip *const nt,
+                    const int allow_sb128)
+{
+    EdgeBranch *const root = (EdgeBranch *) root_node;
+    struct ModeSelMem mem;
+
+    if (allow_sb128) {
+        mem.nwc[BL_128X128] = &root[1];
+        mem.nwc[BL_64X64] = &root[1 + 4];
+        mem.nwc[BL_32X32] = &root[1 + 4 + 16];
+        mem.nt = nt;
+        init_mode_node(root, BL_128X128, &mem, 1, 0);
+        assert(mem.nwc[BL_128X128] == &root[1 + 4]);
+        assert(mem.nwc[BL_64X64] == &root[1 + 4 + 16]);
+        assert(mem.nwc[BL_32X32] == &root[1 + 4 + 16 + 64]);
+        assert(mem.nt == &nt[256]);
+    } else {
+        mem.nwc[BL_128X128] = NULL;
+        mem.nwc[BL_64X64] = &root[1];
+        mem.nwc[BL_32X32] = &root[1 + 4];
+        mem.nt = nt;
+        init_mode_node(root, BL_64X64, &mem, 1, 0);
+        assert(mem.nwc[BL_64X64] == &root[1 + 4]);
+        assert(mem.nwc[BL_32X32] == &root[1 + 4 + 16]);
+        assert(mem.nt == &nt[64]);
+    }
+}
--- /dev/null
+++ b/src/intra_edge.h
@@ -1,0 +1,57 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_INTRA_EDGE_H__
+#define __DAV1D_SRC_INTRA_EDGE_H__
+
+enum EdgeFlags {
+    EDGE_I444_TOP_HAS_RIGHT = 1 << 0,
+    EDGE_I422_TOP_HAS_RIGHT = 1 << 1,
+    EDGE_I420_TOP_HAS_RIGHT = 1 << 2,
+    EDGE_I444_LEFT_HAS_BOTTOM = 1 << 3,
+    EDGE_I422_LEFT_HAS_BOTTOM = 1 << 4,
+    EDGE_I420_LEFT_HAS_BOTTOM = 1 << 5,
+};
+
+typedef struct EdgeNode EdgeNode;
+struct EdgeNode {
+    enum EdgeFlags o, h[2], v[2];
+};
+typedef struct EdgeTip {
+    EdgeNode node;
+    enum EdgeFlags split[4];
+} EdgeTip;
+typedef struct EdgeBranch {
+    EdgeNode node;
+    enum EdgeFlags tts[3], tbs[3], tls[3], trs[3], h4[4], v4[4];
+    EdgeNode *split[4];
+} EdgeBranch;
+
+void init_mode_tree(EdgeNode *const root, EdgeTip *const nt,
+                    const int allow_sb128);
+
+#endif /* __DAV1D_SRC_INTRA_EDGE_H__ */
--- /dev/null
+++ b/src/ipred.c
@@ -1,0 +1,1010 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/ipred.h"
+
+#define sz_grid(l_fn) \
+l_fn( 4,  4); \
+l_fn( 4,  8); \
+l_fn( 4, 16); \
+l_fn( 8,  4); \
+l_fn( 8,  8); \
+l_fn( 8, 16); \
+l_fn( 8, 32); \
+l_fn(16,  4); \
+l_fn(16,  8); \
+l_fn(16, 16); \
+l_fn(16, 32); \
+l_fn(16, 64); \
+l_fn(32,  8); \
+l_fn(32, 16); \
+l_fn(32, 32); \
+l_fn(32, 64); \
+l_fn(64, 16); \
+l_fn(64, 32); \
+l_fn(64, 64)
+
+static __attribute__((noinline)) void
+splat_dc_c(pixel *dst, const ptrdiff_t stride,
+           const int w, const int h, const unsigned dc)
+{
+    assert(dc <= (1 << BITDEPTH) - 1);
+#if BITDEPTH == 8
+    if (w > 4) {
+        const uint64_t dcN = dc * 0x0101010101010101ULL;
+        for (int y = 0; y < h; y++) {
+            for (int x = 0; x < w; x += sizeof(dcN))
+                *((uint64_t *) &dst[x]) = dcN;
+            dst += PXSTRIDE(stride);
+        }
+    } else {
+        const unsigned dcN = dc * 0x01010101U;
+        for (int y = 0; y < h; y++) {
+            for (int x = 0; x < w; x += sizeof(dcN))
+                *((unsigned *) &dst[x]) = dcN;
+            dst += PXSTRIDE(stride);
+        }
+    }
+#else
+    const uint64_t dcN = dc * 0x0001000100010001ULL;
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x += sizeof(dcN) >> 1)
+            *((uint64_t *) &dst[x]) = dcN;
+        dst += PXSTRIDE(stride);
+    }
+#endif
+}
+
+#define dc_lfn(w, h, dir, dc_gen) \
+static void dc##dir##_##w##x##h##_c(pixel *dst, const ptrdiff_t stride, \
+                                    const pixel *const topleft, const int a) \
+{ \
+    dc_gen; \
+    splat_dc_c(dst, stride, w, h, dc); \
+}
+
+#define dc1d_lfns(width, height, sh1, sh2) \
+dc_lfn(width, height, top, unsigned dc = width >> 1; \
+                           for (int i = 0; i < width; i++) \
+                               dc += topleft[1 + i]; \
+                           dc >>= sh1); \
+dc_lfn(width, height, left, unsigned dc = height >> 1; \
+                            for (int i = 0; i < height; i++) \
+                                dc += topleft[-(1 + i)]; \
+                            dc >>= sh2)
+
+dc1d_lfns( 4,  4, 2, 2);
+dc1d_lfns( 4,  8, 2, 3);
+dc1d_lfns( 4, 16, 2, 4);
+dc1d_lfns( 8,  4, 3, 2);
+dc1d_lfns( 8,  8, 3, 3);
+dc1d_lfns( 8, 16, 3, 4);
+dc1d_lfns( 8, 32, 3, 5);
+dc1d_lfns(16,  4, 4, 2);
+dc1d_lfns(16,  8, 4, 3);
+dc1d_lfns(16, 16, 4, 4);
+dc1d_lfns(16, 32, 4, 5);
+dc1d_lfns(16, 64, 4, 6);
+dc1d_lfns(32,  8, 5, 3);
+dc1d_lfns(32, 16, 5, 4);
+dc1d_lfns(32, 32, 5, 5);
+dc1d_lfns(32, 64, 5, 6);
+dc1d_lfns(64, 16, 6, 4);
+dc1d_lfns(64, 32, 6, 5);
+dc1d_lfns(64, 64, 6, 6);
+
+#define dc2d_lfn(width, height, dc_gen) \
+dc_lfn(width, height,, unsigned dc = (width + height) >> 1; \
+                       for (int i = 0; i < width; i++) \
+                           dc += topleft[i + 1]; \
+                       for (int i = 0; i < height; i++) \
+                           dc += topleft[-(i + 1)]; \
+                       dc_gen)
+
+dc2d_lfn( 4,  4, dc >>= 3);
+dc2d_lfn( 4,  8, dc = iclip_pixel(0x5556 * dc >> 18));
+dc2d_lfn( 4, 16, dc = iclip_pixel(0x3334 * dc >> 18));
+dc2d_lfn( 8,  4, dc = iclip_pixel(0x5556 * dc >> 18));
+dc2d_lfn( 8,  8, dc >>= 4);
+dc2d_lfn( 8, 16, dc = iclip_pixel(0x5556 * dc >> 19));
+dc2d_lfn( 8, 32, dc = iclip_pixel(0x3334 * dc >> 19));
+dc2d_lfn(16,  4, dc = iclip_pixel(0x3334 * dc >> 18));
+dc2d_lfn(16,  8, dc = iclip_pixel(0x5556 * dc >> 19));
+dc2d_lfn(16, 16, dc >>= 5);
+dc2d_lfn(16, 32, dc = iclip_pixel(0x5556 * dc >> 20));
+dc2d_lfn(16, 64, dc = iclip_pixel(0x3334 * dc >> 20));
+dc2d_lfn(32,  8, dc = iclip_pixel(0x3334 * dc >> 19));
+dc2d_lfn(32, 16, dc = iclip_pixel(0x5556 * dc >> 20));
+dc2d_lfn(32, 32, dc >>= 6);
+dc2d_lfn(32, 64, dc = iclip_pixel(0x5556 * dc >> 21));
+dc2d_lfn(64, 16, dc = iclip_pixel(0x3334 * dc >> 20));
+dc2d_lfn(64, 32, dc = iclip_pixel(0x5556 * dc >> 21));
+dc2d_lfn(64, 64, dc >>= 7);
+
+#define dc128_lfn(width, height) \
+dc_lfn(width, height, 128, const unsigned dc = (1 << BITDEPTH) >> 1)
+
+sz_grid(dc128_lfn);
+
+static __attribute__((noinline)) void
+v_c(pixel *dst, const ptrdiff_t stride,
+    const pixel *const topleft, const int width, const int height)
+{
+    for (int y = 0; y < height; y++) {
+        pixel_copy(dst, topleft + 1, width);
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define v_lfn(width, height) \
+static void v_##width##x##height##_##c(pixel *dst, const ptrdiff_t stride, \
+                                       const pixel *const topleft, const int a) \
+{ \
+    v_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(v_lfn);
+
+static __attribute__((noinline)) void
+h_c(pixel *dst, const ptrdiff_t stride,
+    const pixel *const topleft, const int width, const int height)
+{
+    for (int y = 0; y < height; y++) {
+        pixel_set(dst, topleft[-(1 + y)], width);
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define h_lfn(width, height) \
+static void h_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                     const pixel *const topleft, const int a) \
+{ \
+    h_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(h_lfn);
+
+static __attribute__((noinline)) void
+paeth_c(pixel *dst, const ptrdiff_t stride, const pixel *const tl_ptr,
+        const int width, const int height)
+{
+    const int topleft = tl_ptr[0];
+    for (int y = 0; y < height; y++) {
+        const int left = tl_ptr[-(y + 1)];
+        for (int x = 0; x < width; x++) {
+            const int top = tl_ptr[1 + x];
+            const int base = left + top - topleft;
+            const int ldiff = abs(left - base);
+            const int tdiff = abs(top - base);
+            const int tldiff = abs(topleft - base);
+
+            dst[x] = ldiff <= tdiff && ldiff <= tldiff ? left :
+                     tdiff <= tldiff ? top : topleft;
+        }
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define paeth_lfn(width, height) \
+static void paeth_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                         const pixel *const topleft, \
+                                         const int a) \
+{ \
+    paeth_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(paeth_lfn);
+
+static const uint8_t sm_weight_arrays[] = {
+    // Unused, because we always offset by bs, which is at least 2.
+    0, 0,
+    // bs = 2
+    255, 128,
+    // bs = 4
+    255, 149, 85, 64,
+    // bs = 8
+    255, 197, 146, 105, 73, 50, 37, 32,
+    // bs = 16
+    255, 225, 196, 170, 145, 123, 102, 84, 68, 54, 43, 33, 26, 20, 17, 16,
+    // bs = 32
+    255, 240, 225, 210, 196, 182, 169, 157, 145, 133, 122, 111, 101, 92, 83, 74,
+    66, 59, 52, 45, 39, 34, 29, 25, 21, 17, 14, 12, 10, 9, 8, 8,
+    // bs = 64
+    255, 248, 240, 233, 225, 218, 210, 203, 196, 189, 182, 176, 169, 163, 156,
+    150, 144, 138, 133, 127, 121, 116, 111, 106, 101, 96, 91, 86, 82, 77, 73, 69,
+    65, 61, 57, 54, 50, 47, 44, 41, 38, 35, 32, 29, 27, 25, 22, 20, 18, 16, 15,
+    13, 12, 10, 9, 8, 7, 6, 6, 5, 5, 4, 4, 4,
+};
+
+static __attribute__((noinline)) void
+smooth_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft,
+         const int width, const int height)
+{
+    const uint8_t *const weights_hor = &sm_weight_arrays[width];
+    const uint8_t *const weights_ver = &sm_weight_arrays[height];
+    const int right = topleft[width], bottom = topleft[-height];
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int pred = weights_ver[y]  * topleft[1 + x] +
+                      (256 - weights_ver[y]) * bottom +
+                             weights_hor[x]  * topleft[-(1 + y)] +
+                      (256 - weights_hor[x]) * right;
+            dst[x] = (pred + 256) >> 9;
+        }
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define smooth_lfn(width, height) \
+static void smooth_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                          const pixel *const topleft, \
+                                          const int a) \
+{ \
+    smooth_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(smooth_lfn);
+
+static __attribute__((noinline)) void
+smooth_v_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft,
+           const int width, const int height)
+{
+    const uint8_t *const weights_ver = &sm_weight_arrays[height];
+    const int bottom = topleft[-height];
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int pred = weights_ver[y]  * topleft[1 + x] +
+                      (256 - weights_ver[y]) * bottom;
+            dst[x] = (pred + 128) >> 8;
+        }
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define smooth_v_lfn(width, height) \
+static void smooth_v_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                            const pixel *const topleft, \
+                                            const int a) \
+{ \
+    smooth_v_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(smooth_v_lfn);
+
+static __attribute__((noinline)) void
+smooth_h_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft,
+           const int width, const int height)
+{
+    const uint8_t *const weights_hor = &sm_weight_arrays[width];
+    const int right = topleft[width];
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int pred = weights_hor[x]  * topleft[-(y + 1)] +
+                      (256 - weights_hor[x]) * right;
+            dst[x] = (pred + 128) >> 8;
+        }
+        dst += PXSTRIDE(stride);
+    }
+}
+
+#define smooth_h_lfn(width, height) \
+static void smooth_h_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                            const pixel *const topleft, \
+                                            const int a) \
+{ \
+    smooth_h_c(dst, stride, topleft, width, height); \
+}
+
+sz_grid(smooth_h_lfn);
+
+static const int16_t dr_intra_derivative[90] = {
+  // More evenly spread out angles and limited to 10-bit
+  // Values that are 0 will never be used
+  //                    Approx angle
+  0,    0, 0,        //
+  1023, 0, 0,        // 3, ...
+  547,  0, 0,        // 6, ...
+  372,  0, 0, 0, 0,  // 9, ...
+  273,  0, 0,        // 14, ...
+  215,  0, 0,        // 17, ...
+  178,  0, 0,        // 20, ...
+  151,  0, 0,        // 23, ... (113 & 203 are base angles)
+  132,  0, 0,        // 26, ...
+  116,  0, 0,        // 29, ...
+  102,  0, 0, 0,     // 32, ...
+  90,   0, 0,        // 36, ...
+  80,   0, 0,        // 39, ...
+  71,   0, 0,        // 42, ...
+  64,   0, 0,        // 45, ... (45 & 135 are base angles)
+  57,   0, 0,        // 48, ...
+  51,   0, 0,        // 51, ...
+  45,   0, 0, 0,     // 54, ...
+  40,   0, 0,        // 58, ...
+  35,   0, 0,        // 61, ...
+  31,   0, 0,        // 64, ...
+  27,   0, 0,        // 67, ... (67 & 157 are base angles)
+  23,   0, 0,        // 70, ...
+  19,   0, 0,        // 73, ...
+  15,   0, 0, 0, 0,  // 76, ...
+  11,   0, 0,        // 81, ...
+  7,    0, 0,        // 84, ...
+  3,    0, 0,        // 87, ...
+};
+
+static int get_filter_strength(const unsigned blk_wh, const unsigned d,
+                               const int type)
+{
+    int strength = 0;
+
+    if (type == 0) {
+        if (blk_wh <= 8) {
+            if (d >= 56) strength = 1;
+        } else if (blk_wh <= 12) {
+            if (d >= 40) strength = 1;
+        } else if (blk_wh <= 16) {
+            if (d >= 40) strength = 1;
+        } else if (blk_wh <= 24) {
+            if (d >= 8) strength = 1;
+            if (d >= 16) strength = 2;
+            if (d >= 32) strength = 3;
+        } else if (blk_wh <= 32) {
+            if (d >= 1) strength = 1;
+            if (d >= 4) strength = 2;
+            if (d >= 32) strength = 3;
+        } else {
+            if (d >= 1) strength = 3;
+        }
+    } else {
+        if (blk_wh <= 8) {
+            if (d >= 40) strength = 1;
+            if (d >= 64) strength = 2;
+        } else if (blk_wh <= 16) {
+            if (d >= 20) strength = 1;
+            if (d >= 48) strength = 2;
+        } else if (blk_wh <= 24) {
+            if (d >= 4) strength = 3;
+        } else {
+            if (d >= 1) strength = 3;
+        }
+    }
+
+    return strength;
+}
+
+static void filter_edge(pixel *const out, const int sz,
+                        const pixel *const in, const int from, const int to,
+                        const unsigned strength)
+{
+    const uint8_t kernel[3][5] = {
+        { 0, 4, 8, 4, 0 },
+        { 0, 5, 6, 5, 0 },
+        { 2, 4, 4, 4, 2 }
+    };
+
+    assert(strength > 0);
+    for (int i = 0; i < sz; i++) {
+        int s = 0;
+        for (int j = 0; j < 5; j++)
+            s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j];
+        out[i] = (s + 8) >> 4;
+    }
+}
+
+static int get_upsample(const int blk_wh, const unsigned d, const int type) {
+    if (d >= 40) return 0;
+    return type ? (blk_wh <= 8) : (blk_wh <= 16);
+}
+
+static void upsample_edge(pixel *const out, const int hsz,
+                          const pixel *const in, const int from, const int to)
+{
+    const int8_t kernel[4] = { -1, 9, 9, -1 };
+    int i;
+    for (i = 0; i < hsz - 1; i++) {
+        out[i * 2] = in[iclip(i, from, to - 1)];
+
+        int s = 0;
+        for (int j = 0; j < 4; j++)
+            s += in[iclip(i + j - 1, from, to - 1)] * kernel[j];
+        out[i * 2 + 1] = iclip_pixel((s + 8) >> 4);
+    }
+    out[i * 2] = in[iclip(i, from, to - 1)];
+}
+
+static __attribute__((noinline)) void
+z1_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
+     int angle, const int width, const int height)
+{
+    const int is_sm = angle >> 9;
+    angle &= 511;
+    assert(angle < 90);
+    const int dx = dr_intra_derivative[angle];
+    pixel top_out[(width + height) * 2];
+    const pixel *top;
+    int max_base_x;
+    const int upsample_above = get_upsample(width + height, 90 - angle, is_sm);
+    if (upsample_above) {
+        upsample_edge(top_out, width + height,
+                      &topleft_in[1], -1, width + imin(width, height));
+        top = top_out;
+        max_base_x = 2 * (width + height) - 2;
+    } else {
+        const int filter_strength =
+            get_filter_strength(width + height, 90 - angle, is_sm);
+
+        if (filter_strength) {
+            filter_edge(top_out, width + height,
+                        &topleft_in[1], -1, width + imin(width, height),
+                        filter_strength);
+            top = top_out;
+            max_base_x = width + height - 1;
+        } else {
+            top = &topleft_in[1];
+            max_base_x = width + imin(width, height) - 1;
+        }
+    }
+    const int frac_bits = 6 - upsample_above;
+    const int base_inc = 1 << upsample_above;
+    for (int y = 0, xpos = dx; y < height;
+         y++, dst += PXSTRIDE(stride), xpos += dx)
+    {
+        int base = xpos >> frac_bits;
+        const int frac = ((xpos << upsample_above) & 0x3F) >> 1;
+
+        for (int x = 0; x < width; x++, base += base_inc) {
+            if (base < max_base_x) {
+                const int v = top[base] * (32 - frac) + top[base + 1] * frac;
+                dst[x] = iclip_pixel((v + 16) >> 5);
+            } else {
+                pixel_set(&dst[x], top[max_base_x], width - x);
+                break;
+            }
+        }
+    }
+}
+
+#define z1_lfn(width, height) \
+static void z1_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                      const pixel *const topleft, \
+                                      const int angle) \
+{ \
+    z1_c(dst, stride, topleft, angle, width, height); \
+}
+
+sz_grid(z1_lfn);
+
+static __attribute__((noinline)) void
+z2_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
+     int angle, const int width, const int height)
+{
+    const int is_sm = angle >> 9;
+    angle &= 511;
+    assert(angle > 90 && angle < 180);
+    const int dy = dr_intra_derivative[angle - 90];
+    const int dx = dr_intra_derivative[180 - angle];
+    const int upsample_left = get_upsample(width + height, 180 - angle, is_sm);
+    const int upsample_above = get_upsample(width + height, angle - 90, is_sm);
+    pixel edge[width * 2 + height * 2 + 1];
+    pixel *const topleft = &edge[height * 2];
+
+    if (upsample_above) {
+        upsample_edge(topleft, width + 1, topleft_in, 0, width + 1);
+    } else {
+        const int filter_strength =
+            get_filter_strength(width + height, angle - 90, is_sm);
+
+        if (filter_strength) {
+            filter_edge(&topleft[1], width, &topleft_in[1], -1, width,
+                        filter_strength);
+        } else {
+            pixel_copy(&topleft[1], &topleft_in[1], width);
+        }
+    }
+    if (upsample_left) {
+        upsample_edge(edge, height + 1, &topleft_in[-height], 0, height + 1);
+    } else {
+        const int filter_strength =
+            get_filter_strength(width + height, 180 - angle, is_sm);
+
+        if (filter_strength) {
+            filter_edge(&topleft[-height], height, &topleft_in[-height],
+                        0, height + 1, filter_strength);
+        } else {
+            pixel_copy(&topleft[-height], &topleft_in[-height], height);
+        }
+    }
+    *topleft = *topleft_in;
+
+    const int min_base_x = -(1 << upsample_above);
+    const int frac_bits_y = 6 - upsample_left, frac_bits_x = 6 - upsample_above;
+    const int base_inc_x = 1 << upsample_above;
+    const pixel *const left = &topleft[-(1 << upsample_left)];
+    const pixel *const top = &topleft[1 << upsample_above];
+    for (int y = 0, xpos = -dx; y < height;
+         y++, xpos -= dx, dst += PXSTRIDE(stride))
+    {
+        int base_x = xpos >> frac_bits_x;
+        const int frac_x = ((xpos * (1 << upsample_above)) & 0x3F) >> 1;
+
+        for (int x = 0, ypos = (y << 6) - dy; x < width;
+             x++, base_x += base_inc_x, ypos -= dy)
+        {
+            int v;
+
+            if (base_x >= min_base_x) {
+                v = top[base_x] * (32 - frac_x) + top[base_x + 1] * frac_x;
+            } else {
+                const int base_y = ypos >> frac_bits_y;
+                assert(base_y >= -(1 << upsample_left));
+                const int frac_y = ((ypos * (1 << upsample_left)) & 0x3F) >> 1;
+                v = left[-base_y] * (32 - frac_y) + left[-(base_y + 1)] * frac_y;
+            }
+            dst[x] = iclip_pixel((v + 16) >> 5);
+        }
+    }
+}
+
+#define z2_lfn(width, height) \
+static void z2_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                      const pixel *const topleft, \
+                                      const int angle) \
+{ \
+    z2_c(dst, stride, topleft, angle, width, height); \
+}
+
+sz_grid(z2_lfn);
+
+static __attribute__((noinline)) void
+z3_c(pixel *dst, const ptrdiff_t stride, const pixel *const topleft_in,
+     int angle, const int width, const int height)
+{
+    const int is_sm = angle >> 9;
+    angle &= 511;
+    assert(angle > 180);
+    const int dy = dr_intra_derivative[270 - angle];
+    pixel left_out[(width + height) * 2];
+    const pixel *left;
+    int max_base_y;
+    const int upsample_left = get_upsample(width + height, angle - 180, is_sm);
+    if (upsample_left) {
+        upsample_edge(left_out, width + height,
+                      &topleft_in[-(width + height)],
+                      imax(width - height, 0), width + height + 1);
+        left = &left_out[2 * (width + height) - 2];
+        max_base_y = 2 * (width + height) - 2;
+    } else {
+        const int filter_strength =
+            get_filter_strength(width + height, angle - 180, is_sm);
+
+        if (filter_strength) {
+            filter_edge(left_out, width + height,
+                        &topleft_in[-(width + height)],
+                        imax(width - height, 0), width + height + 1,
+                        filter_strength);
+            left = &left_out[width + height - 1];
+            max_base_y = width + height - 1;
+        } else {
+            left = &topleft_in[-1];
+            max_base_y = height + imin(width, height) - 1;
+        }
+    }
+    const int frac_bits = 6 - upsample_left;
+    const int base_inc = 1 << upsample_left;
+    for (int x = 0, ypos = dy; x < width; x++, ypos += dy) {
+        int base = ypos >> frac_bits;
+        const int frac = ((ypos << upsample_left) & 0x3F) >> 1;
+
+        for (int y = 0; y < height; y++, base += base_inc) {
+            if (base < max_base_y) {
+                const int v = left[-base] * (32 - frac) +
+                              left[-(base + 1)] * frac;
+                dst[y * PXSTRIDE(stride) + x] = iclip_pixel((v + 16) >> 5);
+            } else {
+                do {
+                    dst[y * PXSTRIDE(stride) + x] = left[-max_base_y];
+                } while (++y < height);
+                break;
+            }
+        }
+    }
+}
+
+#define z3_lfn(width, height) \
+static void z3_##width##x##height##_c(pixel *dst, const ptrdiff_t stride, \
+                                      const pixel *const topleft, \
+                                      const int angle) \
+{ \
+    z3_c(dst, stride, topleft, angle, width, height); \
+}
+
+sz_grid(z3_lfn);
+
+static const int8_t av1_filter_intra_taps[5][8][8] = {
+    {
+        { -6, 10,  0,  0,  0, 12,  0, 0 },
+        { -5,  2, 10,  0,  0,  9,  0, 0 },
+        { -3,  1,  1, 10,  0,  7,  0, 0 },
+        { -3,  1,  1,  2, 10,  5,  0, 0 },
+        { -4,  6,  0,  0,  0,  2, 12, 0 },
+        { -3,  2,  6,  0,  0,  2,  9, 0 },
+        { -3,  2,  2,  6,  0,  2,  7, 0 },
+        { -3,  1,  2,  2,  6,  3,  5, 0 },
+    }, {
+        { -10, 16,  0,  0,  0, 10,  0, 0 },
+        {  -6,  0, 16,  0,  0,  6,  0, 0 },
+        {  -4,  0,  0, 16,  0,  4,  0, 0 },
+        {  -2,  0,  0,  0, 16,  2,  0, 0 },
+        { -10, 16,  0,  0,  0,  0, 10, 0 },
+        {  -6,  0, 16,  0,  0,  0,  6, 0 },
+        {  -4,  0,  0, 16,  0,  0,  4, 0 },
+        {  -2,  0,  0,  0, 16,  0,  2, 0 },
+    }, {
+        { -8, 8, 0, 0, 0, 16,  0, 0 },
+        { -8, 0, 8, 0, 0, 16,  0, 0 },
+        { -8, 0, 0, 8, 0, 16,  0, 0 },
+        { -8, 0, 0, 0, 8, 16,  0, 0 },
+        { -4, 4, 0, 0, 0,  0, 16, 0 },
+        { -4, 0, 4, 0, 0,  0, 16, 0 },
+        { -4, 0, 0, 4, 0,  0, 16, 0 },
+        { -4, 0, 0, 0, 4,  0, 16, 0 },
+    }, {
+        { -2, 8, 0, 0, 0, 10,  0, 0 },
+        { -1, 3, 8, 0, 0,  6,  0, 0 },
+        { -1, 2, 3, 8, 0,  4,  0, 0 },
+        {  0, 1, 2, 3, 8,  2,  0, 0 },
+        { -1, 4, 0, 0, 0,  3, 10, 0 },
+        { -1, 3, 4, 0, 0,  4,  6, 0 },
+        { -1, 2, 3, 4, 0,  4,  4, 0 },
+        { -1, 2, 2, 3, 4,  3,  3, 0 },
+    }, {
+        { -12, 14,  0,  0,  0, 14,  0, 0 },
+        { -10,  0, 14,  0,  0, 12,  0, 0 },
+        {  -9,  0,  0, 14,  0, 11,  0, 0 },
+        {  -8,  0,  0,  0, 14, 10,  0, 0 },
+        { -10, 12,  0,  0,  0,  0, 14, 0 },
+        {  -9,  1, 12,  0,  0,  0, 12, 0 },
+        {  -8,  0,  0, 12,  0,  1, 11, 0 },
+        {  -7,  0,  0,  1, 12,  1,  9, 0 },
+    },
+};
+
+static __attribute__((noinline)) void
+filter_intra_c(pixel *dst, const ptrdiff_t stride,
+               const pixel *const topleft_in,
+               int filt_idx, const int width, const int height)
+{
+    filt_idx &= 511;
+    assert(filt_idx < 5);
+
+    const int8_t (*const filter)[8] = av1_filter_intra_taps[filt_idx];
+    int x, y;
+    ptrdiff_t left_stride;
+    const pixel *left, *topleft, *top;
+
+    top = &topleft_in[1];
+    for (y = 0; y < height; y += 2) {
+        topleft = &topleft_in[-y];
+        left = &topleft[-1];
+        left_stride = -1;
+        for (x = 0; x < width; x += 4) {
+            const int p0 = *topleft;
+            const int p1 = top[0], p2 = top[1], p3 = top[2], p4 = top[3];
+            const int p5 = left[0 * left_stride], p6 = left[1 * left_stride];
+            pixel *ptr = &dst[x];
+            const int8_t (*flt_ptr)[8] = filter;
+
+            for (int yy = 0; yy < 2; yy++) {
+                for (int xx = 0; xx < 4; xx++, flt_ptr++) {
+                    int acc = flt_ptr[0][0] * p0 + flt_ptr[0][1] * p1 +
+                              flt_ptr[0][2] * p2 + flt_ptr[0][3] * p3 +
+                              flt_ptr[0][4] * p4 + flt_ptr[0][5] * p5 +
+                              flt_ptr[0][6] * p6;
+                    ptr[xx] = iclip_pixel((acc + 8) >> 4);
+                }
+                ptr += PXSTRIDE(stride);
+            }
+
+            left = &dst[x + 4 - 1];
+            left_stride = PXSTRIDE(stride);
+            top += 4;
+            topleft = &top[-1];
+        }
+        top = &dst[PXSTRIDE(stride)];
+        dst = &dst[PXSTRIDE(stride) * 2];
+    }
+}
+
+#define filter_lfn(width, height) \
+static void filter_##width##x##height##_c(pixel *const dst, \
+                                          const ptrdiff_t stride, \
+                                          const pixel *const topleft, \
+                                          const int filt_idx) \
+{ \
+    filter_intra_c(dst, stride, topleft, filt_idx, width, height); \
+}
+
+filter_lfn( 4,  4);
+filter_lfn( 8,  4);
+filter_lfn(16,  4);
+filter_lfn( 4,  8);
+filter_lfn( 8,  8);
+filter_lfn(16,  8);
+filter_lfn(32,  8);
+filter_lfn( 4, 16);
+filter_lfn( 8, 16);
+filter_lfn(16, 16);
+filter_lfn(32, 16);
+filter_lfn( 8, 32);
+filter_lfn(16, 32);
+filter_lfn(32, 32);
+
+static __attribute__((noinline)) void
+cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
+         const int w_pad, const int h_pad, const int width, const int height,
+         const int ss_hor, const int ss_ver, const int log2sz)
+{
+    int y, x;
+    int16_t *const ac_orig = ac;
+
+    assert(w_pad >= 0 && w_pad * 4 < width);
+    assert(h_pad >= 0 && h_pad * 4 < height);
+
+    for (y = 0; y < height - 4 * h_pad; y++) {
+        for (x = 0; x < width - 4 * w_pad; x++) {
+            int ac_sum = ypx[x << ss_hor];
+            if (ss_hor) ac_sum += ypx[x * 2 + 1];
+            if (ss_ver) {
+                ac_sum += ypx[(x << ss_hor) + PXSTRIDE(stride)];
+                if (ss_hor) ac_sum += ypx[x * 2 + 1 + PXSTRIDE(stride)];
+            }
+            ac[x] = ac_sum << (1 + !ss_ver + !ss_hor);
+        }
+        for (; x < width; x++)
+            ac[x] = ac[x - 1];
+        ac += width;
+        ypx += PXSTRIDE(stride) << ss_ver;
+    }
+    for (; y < height; y++) {
+        memcpy(ac, &ac[-32], width * sizeof(*ac));
+        ac += width;
+    }
+
+    int sum = (1 << log2sz) >> 1;
+    for (ac = ac_orig, y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            sum += ac[x];
+        ac += width;
+    }
+    sum >>= log2sz;
+
+    // subtract DC
+    for (ac = ac_orig, y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            ac[x] -= sum;
+        ac += width;
+    }
+}
+
+#define cfl_ac_fn(lw, lh, cw, ch, ss_hor, ss_ver, log2sz) \
+static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \
+                                                    const pixel *const ypx, \
+                                                    const ptrdiff_t stride, \
+                                                    const int w_pad, \
+                                                    const int h_pad) \
+{ \
+    cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \
+}
+
+cfl_ac_fn( 8,  8,  4,  4, 1, 1, 4);
+cfl_ac_fn( 8, 16,  4,  8, 1, 1, 5);
+cfl_ac_fn( 8, 32,  4, 16, 1, 1, 6);
+cfl_ac_fn(16,  8,  8,  4, 1, 1, 5);
+cfl_ac_fn(16, 16,  8,  8, 1, 1, 6);
+cfl_ac_fn(16, 32,  8, 16, 1, 1, 7);
+cfl_ac_fn(32,  8, 16,  4, 1, 1, 6);
+cfl_ac_fn(32, 16, 16,  8, 1, 1, 7);
+cfl_ac_fn(32, 32, 16, 16, 1, 1, 8);
+
+cfl_ac_fn( 8,  4,  4,  4, 1, 0, 4);
+cfl_ac_fn( 8,  8,  4,  8, 1, 0, 5);
+cfl_ac_fn(16,  4,  8,  4, 1, 0, 5);
+cfl_ac_fn(16,  8,  8,  8, 1, 0, 6);
+cfl_ac_fn(16, 16,  8, 16, 1, 0, 7);
+cfl_ac_fn(32,  8, 16,  8, 1, 0, 7);
+cfl_ac_fn(32, 16, 16, 16, 1, 0, 8);
+cfl_ac_fn(32, 32, 16, 32, 1, 0, 9);
+
+cfl_ac_fn( 4,  4,  4,  4, 0, 0, 4);
+cfl_ac_fn( 4,  8,  4,  8, 0, 0, 5);
+cfl_ac_fn( 4, 16,  4, 16, 0, 0, 6);
+cfl_ac_fn( 8,  4,  8,  4, 0, 0, 5);
+cfl_ac_fn( 8,  8,  8,  8, 0, 0, 6);
+cfl_ac_fn( 8, 16,  8, 16, 0, 0, 7);
+cfl_ac_fn( 8, 32,  8, 32, 0, 0, 8);
+cfl_ac_fn(16,  4, 16,  4, 0, 0, 6);
+cfl_ac_fn(16,  8, 16,  8, 0, 0, 7);
+cfl_ac_fn(16, 16, 16, 16, 0, 0, 8);
+cfl_ac_fn(16, 32, 16, 32, 0, 0, 9);
+cfl_ac_fn(32,  8, 32,  8, 0, 0, 8);
+cfl_ac_fn(32, 16, 32, 16, 0, 0, 9);
+cfl_ac_fn(32, 32, 32, 32, 0, 0, 10);
+
+static __attribute__((noinline)) void
+cfl_pred_c(pixel *dstU, pixel *dstV, const ptrdiff_t stride,
+           const int16_t *ac, const pixel *const dc_pred,
+           const int8_t *const alphas, const int width, const int height)
+{
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int diff1 = alphas[0] * ac[x];
+            dstU[x] = iclip_pixel(dc_pred[ 0] + apply_sign((abs(diff1) + 32) >> 6,
+                                                           diff1));
+            const int diff2 = alphas[1] * ac[x];
+            dstV[x] = iclip_pixel(dc_pred[32] + apply_sign((abs(diff2) + 32) >> 6,
+                                                           diff2));
+        }
+        ac += width;
+        dstU += PXSTRIDE(stride);
+        dstV += PXSTRIDE(stride);
+    }
+}
+
+#define cfl_pred_fn(width) \
+static void cfl_pred_##width##xN_c(pixel *const dstU, \
+                                   pixel *const dstV, \
+                                   const ptrdiff_t stride, \
+                                   const int16_t *const ac, \
+                                   const pixel *const dc_pred, \
+                                   const int8_t *const alphas, \
+                                   const int height) \
+{ \
+    cfl_pred_c(dstU, dstV, stride, ac, dc_pred, alphas, width, height); \
+}
+
+cfl_pred_fn( 4);
+cfl_pred_fn( 8);
+cfl_pred_fn(16);
+cfl_pred_fn(32);
+
+static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
+                       const uint16_t *const pal, const uint8_t *idx,
+                       const int w, const int h)
+{
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++)
+            dst[x] = pal[idx[x]];
+        idx += w;
+        dst += PXSTRIDE(stride);
+    }
+}
+
+void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
+#define assign_lfn(w, h, p1, p2, pfx) \
+    c->intra_pred[pfx##TX_##w##X##h][p1##_PRED] = p2##_##w##x##h##_c
+#define assign_fns(p1, p2) \
+    assign_lfn( 4,  4, p1, p2,); \
+    assign_lfn( 4,  8, p1, p2, R); \
+    assign_lfn( 4, 16, p1, p2, R); \
+    assign_lfn( 8,  4, p1, p2, R); \
+    assign_lfn( 8,  8, p1, p2,); \
+    assign_lfn( 8, 16, p1, p2, R); \
+    assign_lfn( 8, 32, p1, p2, R); \
+    assign_lfn(16,  4, p1, p2, R); \
+    assign_lfn(16,  8, p1, p2, R); \
+    assign_lfn(16, 16, p1, p2,); \
+    assign_lfn(16, 32, p1, p2, R); \
+    assign_lfn(16, 64, p1, p2, R); \
+    assign_lfn(32,  8, p1, p2, R); \
+    assign_lfn(32, 16, p1, p2, R); \
+    assign_lfn(32, 32, p1, p2,); \
+    assign_lfn(32, 64, p1, p2, R); \
+    assign_lfn(64, 16, p1, p2, R); \
+    assign_lfn(64, 32, p1, p2, R); \
+    assign_lfn(64, 64, p1, p2,); \
+
+    assign_fns(DC, dc);
+    assign_fns(DC_128, dc128);
+    assign_fns(TOP_DC,  dctop);
+    assign_fns(LEFT_DC, dcleft);
+    assign_fns(HOR, h);
+    assign_fns(VERT, v);
+    assign_fns(PAETH, paeth);
+    assign_fns(SMOOTH, smooth);
+    assign_fns(SMOOTH_V, smooth_v);
+    assign_fns(SMOOTH_H, smooth_h);
+    assign_fns(Z1, z1);
+    assign_fns(Z2, z2);
+    assign_fns(Z3, z3);
+
+    assign_lfn( 4,  4, FILTER, filter,);
+    assign_lfn( 8,  4, FILTER, filter, R);
+    assign_lfn(16,  4, FILTER, filter, R);
+    assign_lfn( 4,  8, FILTER, filter, R);
+    assign_lfn( 8,  8, FILTER, filter,);
+    assign_lfn(16,  8, FILTER, filter, R);
+    assign_lfn(32,  8, FILTER, filter, R);
+    assign_lfn( 4, 16, FILTER, filter, R);
+    assign_lfn( 8, 16, FILTER, filter, R);
+    assign_lfn(16, 16, FILTER, filter,);
+    assign_lfn(32, 16, FILTER, filter, R);
+    assign_lfn( 8, 32, FILTER, filter, R);
+    assign_lfn(16, 32, FILTER, filter, R);
+    assign_lfn(32, 32, FILTER, filter,);
+
+    // cfl functions are split per chroma subsampling type
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_4X4  ] = cfl_ac_8x8_to_4x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X8  ] = cfl_ac_8x16_to_4x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X16 ] = cfl_ac_8x32_to_4x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X4  ] = cfl_ac_16x8_to_8x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_8X8  ] = cfl_ac_16x16_to_8x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X16 ] = cfl_ac_16x32_to_8x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X4 ] = cfl_ac_32x8_to_16x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X8 ] = cfl_ac_32x16_to_16x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_16X16] = cfl_ac_32x32_to_16x16_c;
+
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4  ] = cfl_ac_8x4_to_4x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8  ] = cfl_ac_8x8_to_4x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4 ] = cfl_ac_16x4_to_8x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8 ] = cfl_ac_16x8_to_8x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16] = cfl_ac_16x16_to_8x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c;
+
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_4X4  ] = cfl_ac_4x4_to_4x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X8  ] = cfl_ac_4x8_to_4x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X16 ] = cfl_ac_4x16_to_4x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X4  ] = cfl_ac_8x4_to_8x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_8X8  ] = cfl_ac_8x8_to_8x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X16 ] = cfl_ac_8x16_to_8x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X32 ] = cfl_ac_8x32_to_8x32_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X4 ] = cfl_ac_16x4_to_16x4_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X8 ] = cfl_ac_16x8_to_16x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_16X16] = cfl_ac_16x16_to_16x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X32] = cfl_ac_16x32_to_16x32_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
+
+    c->cfl_pred[0] = cfl_pred_4xN_c;
+    c->cfl_pred[1] = cfl_pred_8xN_c;
+    c->cfl_pred[2] = cfl_pred_16xN_c;
+    c->cfl_pred[3] = cfl_pred_32xN_c;
+
+    c->pal_pred = pal_pred_c;
+}
--- /dev/null
+++ b/src/ipred.h
@@ -1,0 +1,92 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_IPRED_H__
+#define __DAV1D_SRC_IPRED_H__
+
+#include "common/bitdepth.h"
+
+#include "src/levels.h"
+
+/*
+ * Intra prediction.
+ * - a is the angle (in degrees) for directional intra predictors. For other
+ *   modes, it is ignored;
+ * - topleft is the same as the argument given to dav1d_prepare_intra_edges(),
+ *   see ipred_prepare.h for more detailed documentation.
+ */
+#define decl_angular_ipred_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, int angle);
+typedef decl_angular_ipred_fn(*angular_ipred_fn);
+
+/*
+ * Create a subsampled Y plane with the DC subtracted.
+ * - w/h_pad is the edge of the width/height that extends outside the visible
+ *   portion of the frame in 4px units;
+ * - ac has a stride of 16.
+ */
+#define decl_cfl_ac_fn(name) \
+void (name)(int16_t *ac, const pixel *y, ptrdiff_t stride, \
+            int w_pad, int h_pad)
+typedef decl_cfl_ac_fn(*cfl_ac_fn);
+
+/*
+ * dst[plane][x,y] = dc[plane] + alpha[plane] * ac[x,y]
+ * - alphas contains two q3 scalars (one for each plane) in [-16,16] range;
+ * - dc_pred[] is the first line of each plane's DC prediction, the second plane
+ *   starting at an offset of 16 * sizeof(pixel) bytes.
+ */
+#define decl_cfl_pred_fn(name) \
+void (name)(pixel *u_dst, pixel *v_dst, ptrdiff_t stride, \
+            const int16_t *ac, const pixel *dc_pred, \
+            const int8_t *const alphas, const int height)
+typedef decl_cfl_pred_fn(*cfl_pred_fn);
+
+/*
+ * dst[x,y] = pal[idx[x,y]]
+ * - palette indices are [0-7]
+ */
+#define decl_pal_pred_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, const uint16_t *pal, \
+            const uint8_t *idx, const int w, const int h)
+typedef decl_pal_pred_fn(*pal_pred_fn);
+
+typedef struct Dav1dIntraPredDSPContext {
+    angular_ipred_fn intra_pred[N_RECT_TX_SIZES][N_IMPL_INTRA_PRED_MODES];
+
+    // chroma-from-luma
+    cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
+    cfl_pred_fn cfl_pred[4];
+
+    // palette
+    pal_pred_fn pal_pred;
+} Dav1dIntraPredDSPContext;
+
+void dav1d_intra_pred_dsp_init_8bpc(Dav1dIntraPredDSPContext *c);
+void dav1d_intra_pred_dsp_init_10bpc(Dav1dIntraPredDSPContext *c);
+
+#endif /* __DAV1D_SRC_IPRED_H__ */
--- /dev/null
+++ b/src/ipred_prepare.c
@@ -1,0 +1,209 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/ipred_prepare.h"
+
+static const uint8_t av1_mode_conv[N_INTRA_PRED_MODES]
+                                  [2 /* have_left */][2 /* have_top */] =
+{
+    [DC_PRED]    = { { DC_128_PRED,  TOP_DC_PRED },
+                     { LEFT_DC_PRED, DC_PRED     } },
+    [PAETH_PRED] = { { DC_128_PRED,  VERT_PRED   },
+                     { HOR_PRED,     PAETH_PRED  } },
+};
+
+static const uint8_t av1_mode_to_angle_map[8] = {
+    90, 180, 45, 135, 113, 157, 203, 67
+};
+
+static const struct {
+    uint8_t needs_left:1;
+    uint8_t needs_top:1;
+    uint8_t needs_topleft:1;
+    uint8_t needs_topright:1;
+    uint8_t needs_bottomleft:1;
+} av1_intra_prediction_edges[N_IMPL_INTRA_PRED_MODES] = {
+    [DC_PRED]       = { .needs_top  = 1, .needs_left = 1 },
+    [VERT_PRED]     = { .needs_top  = 1 },
+    [HOR_PRED]      = { .needs_left = 1 },
+    [LEFT_DC_PRED]  = { .needs_left = 1 },
+    [TOP_DC_PRED]   = { .needs_top  = 1 },
+    [DC_128_PRED]   = { 0 },
+    [Z1_PRED]       = { .needs_top = 1, .needs_topright = 1,
+                        .needs_topleft = 1 },
+    [Z2_PRED]       = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+    [Z3_PRED]       = { .needs_left = 1, .needs_bottomleft = 1,
+                        .needs_topleft = 1 },
+    [SMOOTH_PRED]   = { .needs_left = 1, .needs_top = 1 },
+    [SMOOTH_V_PRED] = { .needs_left = 1, .needs_top = 1 },
+    [SMOOTH_H_PRED] = { .needs_left = 1, .needs_top = 1 },
+    [PAETH_PRED]    = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+    [FILTER_PRED]   = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
+};
+
+enum IntraPredMode
+bytefn(prepare_intra_edges)(const int x, const int have_left,
+                            const int y, const int have_top,
+                            const int w, const int h,
+                            const enum EdgeFlags edge_flags,
+                            const pixel *const dst,
+                            const ptrdiff_t stride,
+                            const pixel *prefilter_toplevel_sb_edge,
+                            enum IntraPredMode mode, int *const angle,
+                            const int tw, const int th,
+                            pixel *const topleft_out)
+{
+    assert(y < h && x < w);
+
+    switch (mode) {
+    case VERT_PRED:
+    case HOR_PRED:
+    case DIAG_DOWN_LEFT_PRED:
+    case DIAG_DOWN_RIGHT_PRED:
+    case VERT_RIGHT_PRED:
+    case HOR_DOWN_PRED:
+    case HOR_UP_PRED:
+    case VERT_LEFT_PRED: {
+        *angle = av1_mode_to_angle_map[mode - VERT_PRED] + 3 * *angle;
+
+        if (*angle < 90) {
+            mode = have_top ? Z1_PRED : VERT_PRED;
+        } else if (*angle == 90) {
+            mode = VERT_PRED;
+        } else if (*angle < 180) {
+            mode = Z2_PRED;
+        } else if (*angle == 180) {
+            mode = HOR_PRED;
+        } else {
+            mode = have_left ? Z3_PRED : HOR_PRED;
+        }
+        break;
+    }
+    case DC_PRED:
+    case PAETH_PRED:
+        mode = av1_mode_conv[mode][have_left][have_top];
+        break;
+    default:
+        break;
+    }
+
+    const pixel *dst_top;
+    if (have_top &&
+        (av1_intra_prediction_edges[mode].needs_top ||
+         av1_intra_prediction_edges[mode].needs_topleft ||
+         (av1_intra_prediction_edges[mode].needs_left && !have_left)))
+    {
+        if (prefilter_toplevel_sb_edge) {
+            dst_top = &prefilter_toplevel_sb_edge[x * 4];
+        } else {
+            dst_top = &dst[-PXSTRIDE(stride)];
+        }
+    }
+
+    if (av1_intra_prediction_edges[mode].needs_left) {
+        const int sz = th << 2;
+        pixel *const left = &topleft_out[-sz];
+
+        if (have_left) {
+            const int px_have = imin(sz, (h - y) << 2);
+
+            for (int i = 0; i < px_have; i++)
+                left[sz - 1 - i] = dst[PXSTRIDE(stride) * i - 1];
+            if (px_have < sz)
+                pixel_set(left, left[sz - px_have], sz - px_have);
+        } else {
+            pixel_set(left, have_top ? *dst_top : ((1 << BITDEPTH) >> 1) + 1, sz);
+        }
+
+        if (av1_intra_prediction_edges[mode].needs_bottomleft) {
+            const int have_bottomleft = (!have_left || y + th >= h) ? 0 :
+                                        (edge_flags & EDGE_I444_LEFT_HAS_BOTTOM);
+
+            if (have_bottomleft) {
+                const int px_have = imin(sz, (h - y - th) << 2);
+
+                for (int i = 0; i < px_have; i++)
+                    left[-(i + 1)] = dst[(sz + i) * PXSTRIDE(stride) - 1];
+                if (px_have < sz)
+                    pixel_set(left - sz, left[-px_have], sz - px_have);
+            } else {
+                pixel_set(left - sz, left[0], sz);
+            }
+        }
+    }
+
+    if (av1_intra_prediction_edges[mode].needs_top) {
+        const int sz = tw << 2;
+        pixel *const top = &topleft_out[1];
+
+        if (have_top) {
+            const int px_have = imin(sz, (w - x) << 2);
+            pixel_copy(top, dst_top, px_have);
+            if (px_have < sz)
+                pixel_set(top + px_have, top[px_have - 1], sz - px_have);
+        } else {
+            pixel_set(top, have_left ? dst[-1] : ((1 << BITDEPTH) >> 1) - 1, sz);
+        }
+
+        if (av1_intra_prediction_edges[mode].needs_topright) {
+            const int have_topright = (!have_top || x + tw >= w) ? 0 :
+                                      (edge_flags & EDGE_I444_TOP_HAS_RIGHT);
+
+            if (have_topright) {
+                const int px_have = imin(sz, (w - x - tw) << 2);
+
+                pixel_copy(top + sz, &dst_top[sz], px_have);
+                if (px_have < sz)
+                    pixel_set(top + sz + px_have, top[sz + px_have - 1],
+                              sz - px_have);
+            } else {
+                pixel_set(top + sz, top[sz - 1], sz);
+            }
+        }
+    }
+
+    if (av1_intra_prediction_edges[mode].needs_topleft) {
+        if (have_left) {
+            *topleft_out = have_top ? dst_top[-1] : dst[-1];
+        } else {
+            *topleft_out = have_top ? *dst_top : (1 << BITDEPTH) >> 1;
+        }
+        if (mode == Z2_PRED && tw + th >= 6)
+            *topleft_out = (topleft_out[-1] * 5 + topleft_out[0] * 6 +
+                            topleft_out[1] * 5 + 8) >> 4;
+    }
+
+    return mode;
+}
--- /dev/null
+++ b/src/ipred_prepare.h
@@ -1,0 +1,102 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_IPRED_PREPARE_H__
+#define __DAV1D_SRC_IPRED_PREPARE_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "common/bitdepth.h"
+
+#include "src/env.h"
+#include "src/intra_edge.h"
+#include "src/levels.h"
+
+/*
+ * Luma intra edge preparation.
+ *
+ * x/y/start/w/h are in luma block (4px) units:
+ * - x and y are the absolute block positions in the image;
+ * - start/w/h are the *dependent tile* boundary positions. In practice, start
+ *   is the horizontal tile start, w is the horizontal tile end, the vertical
+ *   tile start is assumed to be 0 and h is the vertical image end.
+ *
+ * edge_flags signals which edges are available for this transform-block inside
+ * the given partition, as well as for the partition inside the superblock
+ * structure.
+ *
+ * dst and stride are pointers to the top/left position of the current block,
+ * and can be used to locate the top, left, top/left, top/right and bottom/left
+ * edge pointers also.
+ *
+ * angle is the angle_delta [-3..3] on input, and the absolute angle on output.
+ *
+ * mode is the intra prediction mode as coded in the bitstream. The return value
+ * is this same mode, converted to an index in the DSP functions.
+ *
+ * tw/th are the size of the transform block in block (4px) units.
+ *
+ * topleft_out is a pointer to scratch memory that will be filled with the edge
+ * pixels. The memory array should have space to be indexed in the [-2*w,2*w]
+ * range, in the following order:
+ * - [0] will be the top/left edge pixel;
+ * - [1..w] will be the top edge pixels (1 being left-most, w being right-most);
+ * - [w+1..w*w] will be the top/right edge pixels;
+ * - [-1..-w] will be the left edge pixels (-1 being top-most, -w being bottom-
+ *   most);
+ * - [-w-1..-2*w] will be the bottom/left edge pixels.
+ * Each edge may remain uninitialized if it is not used by the returned mode
+ * index. If edges are not available (because the edge position is outside the
+ * tile dimensions or because edge_flags indicates lack of edge availability),
+ * they will be extended from nearby edges as defined by the av1 spec.
+ */
+enum IntraPredMode
+    bytefn(prepare_intra_edges)(int x, int have_left, int y, int have_top,
+                                int w, int h, enum EdgeFlags edge_flags,
+                                const pixel *dst, ptrdiff_t stride,
+                                const pixel *prefilter_toplevel_sb_edge,
+                                enum IntraPredMode mode, int *angle,
+                                int tw, int th, pixel *topleft_out);
+
+// is or'ed with the angle argument into intra predictors to signal that edges
+// are smooth and should use reduced filter strength
+#define ANGLE_SMOOTH_EDGE_FLAG 512
+static inline int sm_flag(const BlockContext *const b, const int idx) {
+    if (!b->intra[idx]) return 0;
+    const enum IntraPredMode m = b->mode[idx];
+    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
+            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
+}
+
+static inline int sm_uv_flag(const BlockContext *const b, const int idx) {
+    const enum IntraPredMode m = b->uvmode[idx];
+    return (m == SMOOTH_PRED || m == SMOOTH_H_PRED ||
+            m == SMOOTH_V_PRED) ? ANGLE_SMOOTH_EDGE_FLAG : 0;
+}
+
+#endif /* __DAV1D_SRC_IPRED_PREPARE_H__ */
--- /dev/null
+++ b/src/itx.c
@@ -1,0 +1,224 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/itx.h"
+
+#include "src/itx_1d.c"
+
+typedef void (*itx_1d_fn)(const coef *in, ptrdiff_t in_s,
+                          coef *out, ptrdiff_t out_s);
+
+static void __attribute__((noinline))
+inv_txfm_add_c(pixel *dst, const ptrdiff_t stride,
+               coef *const coeff, const int eob,
+               const int w, const int h, const int shift1, const int shift2,
+               const itx_1d_fn first_1d_fn, const itx_1d_fn second_1d_fn)
+{
+    int i, j;
+    const ptrdiff_t sh = imin(h, 32), sw = imin(w, 32);
+    coef tmp[w * h], out[h], in_mem[w];
+    const int is_rect2 = w * 2 == h || h * 2 == w;
+
+    if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem));
+    const int rnd1 = (1 << shift1) >> 1;
+    for (i = 0; i < sh; i++) {
+        if (w != sw || is_rect2) {
+            for (j = 0; j < sw; j++) {
+                in_mem[j] = coeff[i + j * sh];
+                if (is_rect2)
+                    in_mem[j] = (in_mem[j] * 2896 + 2048) >> 12;
+            }
+            first_1d_fn(in_mem, 1, &tmp[i * w], 1);
+        } else {
+            first_1d_fn(&coeff[i], sh, &tmp[i * w], 1);
+        }
+        for (j = 0; j < w; j++)
+            tmp[i * w + j] = (tmp[i * w + j] + (rnd1)) >> shift1;
+    }
+
+    if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp));
+    const int rnd2 = (1 << shift2) >> 1;
+    for (i = 0; i < w; i++) {
+        second_1d_fn(&tmp[i], w, out, 1);
+        for (j = 0; j < h; j++)
+            dst[i + j * PXSTRIDE(stride)] =
+                iclip_pixel(dst[i + j * PXSTRIDE(stride)] +
+                            ((out[j] + (rnd2)) >> shift2));
+    }
+    memset(coeff, 0, sizeof(*coeff) * sh * sw);
+}
+
+#define inv_txfm_fn(type1, type2, w, h, shift1, shift2) \
+static void \
+inv_txfm_add_##type1##_##type2##_##w##x##h##_c(pixel *dst, \
+                                               const ptrdiff_t stride, \
+                                               coef *const coeff, \
+                                               const int eob) \
+{ \
+    inv_txfm_add_c(dst, stride, coeff, eob, w, h, shift1, shift2, \
+                   inv_##type1##w##_1d, inv_##type2##h##_1d); \
+}
+
+#define inv_txfm_fn64(w, h, shift1, shift2) \
+inv_txfm_fn(dct, dct, w, h, shift1, shift2)
+
+#define inv_txfm_fn32(w, h, shift1, shift2) \
+inv_txfm_fn64(w, h, shift1, shift2); \
+inv_txfm_fn(identity, identity, w, h, shift1, shift2)
+
+#define inv_txfm_fn16(w, h, shift1, shift2) \
+inv_txfm_fn32(w, h, shift1, shift2); \
+inv_txfm_fn(adst,     dct,      w, h, shift1, shift2); \
+inv_txfm_fn(dct,      adst,     w, h, shift1, shift2); \
+inv_txfm_fn(adst,     adst,     w, h, shift1, shift2); \
+inv_txfm_fn(dct,      flipadst, w, h, shift1, shift2); \
+inv_txfm_fn(flipadst, dct,      w, h, shift1, shift2); \
+inv_txfm_fn(adst,     flipadst, w, h, shift1, shift2); \
+inv_txfm_fn(flipadst, adst,     w, h, shift1, shift2); \
+inv_txfm_fn(flipadst, flipadst, w, h, shift1, shift2); \
+inv_txfm_fn(identity, dct,      w, h, shift1, shift2); \
+inv_txfm_fn(dct,      identity, w, h, shift1, shift2); \
+
+#define inv_txfm_fn84(w, h, shift1, shift2) \
+inv_txfm_fn16(w, h, shift1, shift2); \
+inv_txfm_fn(identity, flipadst, w, h, shift1, shift2); \
+inv_txfm_fn(flipadst, identity, w, h, shift1, shift2); \
+inv_txfm_fn(identity, adst,     w, h, shift1, shift2); \
+inv_txfm_fn(adst,     identity, w, h, shift1, shift2); \
+
+inv_txfm_fn84( 4,  4, 0, 4);
+inv_txfm_fn84( 4,  8, 0, 4);
+inv_txfm_fn84( 4, 16, 1, 4);
+inv_txfm_fn84( 8,  4, 0, 4);
+inv_txfm_fn84( 8,  8, 1, 4);
+inv_txfm_fn84( 8, 16, 1, 4);
+inv_txfm_fn32( 8, 32, 2, 4);
+inv_txfm_fn84(16,  4, 1, 4);
+inv_txfm_fn84(16,  8, 1, 4);
+inv_txfm_fn16(16, 16, 2, 4);
+inv_txfm_fn32(16, 32, 1, 4);
+inv_txfm_fn64(16, 64, 2, 4);
+inv_txfm_fn32(32,  8, 2, 4);
+inv_txfm_fn32(32, 16, 1, 4);
+inv_txfm_fn32(32, 32, 2, 4);
+inv_txfm_fn64(32, 64, 1, 4);
+inv_txfm_fn64(64, 16, 2, 4);
+inv_txfm_fn64(64, 32, 1, 4);
+inv_txfm_fn64(64, 64, 2, 4);
+
+static void inv_txfm_add_wht_wht_4x4_c(pixel *dst, const ptrdiff_t stride,
+                                       coef *const coeff, const int eob)
+{
+    int i, j;
+    coef tmp[4 * 4], out[4];
+
+    for (i = 0; i < 4; i++)
+        inv_wht4_1d(&coeff[i], 4, &tmp[i * 4], 1, 0);
+
+    for (i = 0; i < 4; i++) {
+        inv_wht4_1d(&tmp[i], 4, out, 1, 1);
+        for (j = 0; j < 4; j++)
+            dst[i + j * PXSTRIDE(stride)] =
+                iclip_pixel(dst[i + j * PXSTRIDE(stride)] + out[j]);
+    }
+    memset(coeff, 0, sizeof(*coeff) * 4 * 4);
+}
+
+void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
+#define assign_itx_all_fn64(w, h, pfx) \
+    c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
+        inv_txfm_add_dct_dct_##w##x##h##_c
+
+#define assign_itx_all_fn32(w, h, pfx) \
+    assign_itx_all_fn64(w, h, pfx); \
+    c->itxfm_add[pfx##TX_##w##X##h][IDTX] = \
+        inv_txfm_add_identity_identity_##w##x##h##_c
+
+#define assign_itx_all_fn16(w, h, pfx) \
+    assign_itx_all_fn32(w, h, pfx); \
+    c->itxfm_add[pfx##TX_##w##X##h][DCT_ADST ] = \
+        inv_txfm_add_adst_dct_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][ADST_DCT ] = \
+        inv_txfm_add_dct_adst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][ADST_ADST] = \
+        inv_txfm_add_adst_adst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][ADST_FLIPADST] = \
+        inv_txfm_add_flipadst_adst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_ADST] = \
+        inv_txfm_add_adst_flipadst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][DCT_FLIPADST] = \
+        inv_txfm_add_flipadst_dct_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_DCT] = \
+        inv_txfm_add_dct_flipadst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][FLIPADST_FLIPADST] = \
+        inv_txfm_add_flipadst_flipadst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][H_DCT] = \
+        inv_txfm_add_dct_identity_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][V_DCT] = \
+        inv_txfm_add_identity_dct_##w##x##h##_c
+
+#define assign_itx_all_fn84(w, h, pfx) \
+    assign_itx_all_fn16(w, h, pfx); \
+    c->itxfm_add[pfx##TX_##w##X##h][H_FLIPADST] = \
+        inv_txfm_add_flipadst_identity_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][V_FLIPADST] = \
+        inv_txfm_add_identity_flipadst_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][H_ADST] = \
+        inv_txfm_add_adst_identity_##w##x##h##_c; \
+    c->itxfm_add[pfx##TX_##w##X##h][V_ADST] = \
+        inv_txfm_add_identity_adst_##w##x##h##_c; \
+
+    c->itxfm_add[TX_4X4][WHT_WHT] = inv_txfm_add_wht_wht_4x4_c;
+    assign_itx_all_fn84( 4,  4, );
+    assign_itx_all_fn84( 4,  8, R);
+    assign_itx_all_fn84( 4, 16, R);
+    assign_itx_all_fn84( 8,  4, R);
+    assign_itx_all_fn84( 8,  8, );
+    assign_itx_all_fn84( 8, 16, R);
+    assign_itx_all_fn32( 8, 32, R);
+    assign_itx_all_fn84(16,  4, R);
+    assign_itx_all_fn84(16,  8, R);
+    assign_itx_all_fn16(16, 16, );
+    assign_itx_all_fn32(16, 32, R);
+    assign_itx_all_fn64(16, 64, R);
+    assign_itx_all_fn32(32,  8, R);
+    assign_itx_all_fn32(32, 16, R);
+    assign_itx_all_fn32(32, 32, );
+    assign_itx_all_fn64(32, 64, R);
+    assign_itx_all_fn64(64, 16, R);
+    assign_itx_all_fn64(64, 32, R);
+    assign_itx_all_fn64(64, 64, );
+}
--- /dev/null
+++ b/src/itx.h
@@ -1,0 +1,46 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ITX_H__
+#define __DAV1D_SRC_ITX_H__
+
+#include "common/bitdepth.h"
+
+#include "src/levels.h"
+
+#define decl_itx_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, coef *coeff, int eob)
+typedef decl_itx_fn(*itxfm_fn);
+
+typedef struct Dav1dInvTxfmDSPContext {
+    itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
+} Dav1dInvTxfmDSPContext;
+
+void dav1d_itx_dsp_init_8bpc(Dav1dInvTxfmDSPContext *c);
+void dav1d_itx_dsp_init_10bpc(Dav1dInvTxfmDSPContext *c);
+
+#endif /* __DAV1D_SRC_ITX_H__ */
--- /dev/null
+++ b/src/itx_1d.c
@@ -1,0 +1,861 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+#include <stdint.h>
+
+static void __attribute__((noinline))
+inv_dct4_1d(const coef *const in, const ptrdiff_t in_s,
+            coef *const out, const ptrdiff_t out_s)
+{
+    const int in0 = in[0 * in_s], in1 = in[1 * in_s];
+    const int in2 = in[2 * in_s], in3 = in[3 * in_s];
+
+    int t0 = ((in0 + in2) * 2896 + 2048) >> 12;
+    int t1 = ((in0 - in2) * 2896 + 2048) >> 12;
+    int t2 = (in1 * 1567 - in3 * 3784 + 2048) >> 12;
+    int t3 = (in1 * 3784 + in3 * 1567 + 2048) >> 12;
+
+    out[0 * out_s] = t0 + t3;
+    out[1 * out_s] = t1 + t2;
+    out[2 * out_s] = t1 - t2;
+    out[3 * out_s] = t0 - t3;
+}
+
+static void __attribute__((noinline))
+inv_dct8_1d(const coef *const in, const ptrdiff_t in_s,
+            coef *const out, const ptrdiff_t out_s)
+{
+    coef tmp[4];
+
+    inv_dct4_1d(in, in_s * 2, tmp, 1);
+
+    const int in1 = in[1 * in_s], in3 = in[3 * in_s];
+    const int in5 = in[5 * in_s], in7 = in[7 * in_s];
+
+    int t4a = (in1 *  799 - in7 * 4017 + 2048) >> 12;
+    int t5a = (in5 * 3406 - in3 * 2276 + 2048) >> 12;
+    int t6a = (in5 * 2276 + in3 * 3406 + 2048) >> 12;
+    int t7a = (in1 * 4017 + in7 *  799 + 2048) >> 12;
+
+    int t4  = t4a + t5a;
+        t5a = t4a - t5a;
+    int t7  = t7a + t6a;
+        t6a = t7a - t6a;
+
+    int t5  = ((t6a - t5a) * 2896 + 2048) >> 12;
+    int t6  = ((t6a + t5a) * 2896 + 2048) >> 12;
+
+    out[0 * out_s] = tmp[0] + t7;
+    out[1 * out_s] = tmp[1] + t6;
+    out[2 * out_s] = tmp[2] + t5;
+    out[3 * out_s] = tmp[3] + t4;
+    out[4 * out_s] = tmp[3] - t4;
+    out[5 * out_s] = tmp[2] - t5;
+    out[6 * out_s] = tmp[1] - t6;
+    out[7 * out_s] = tmp[0] - t7;
+}
+
+static void __attribute__((noinline))
+inv_dct16_1d(const coef *const in, const ptrdiff_t in_s,
+             coef *const out, const ptrdiff_t out_s)
+{
+    coef tmp[8];
+
+    inv_dct8_1d(in, in_s * 2, tmp, 1);
+
+    const int in1  = in[ 1 * in_s], in3  = in[ 3 * in_s];
+    const int in5  = in[ 5 * in_s], in7  = in[ 7 * in_s];
+    const int in9  = in[ 9 * in_s], in11 = in[11 * in_s];
+    const int in13 = in[13 * in_s], in15 = in[15 * in_s];
+
+    int t8a  = (in1  *  401 - in15 * 4076 + 2048) >> 12;
+    int t15a = (in1  * 4076 + in15 *  401 + 2048) >> 12;
+    int t9a  = (in9  * 3166 - in7  * 2598 + 2048) >> 12;
+    int t14a = (in9  * 2598 + in7  * 3166 + 2048) >> 12;
+    int t10a = (in5  * 1931 - in11 * 3612 + 2048) >> 12;
+    int t13a = (in5  * 3612 + in11 * 1931 + 2048) >> 12;
+    int t11a = (in13 * 3920 - in3  * 1189 + 2048) >> 12;
+    int t12a = (in13 * 1189 + in3  * 3920 + 2048) >> 12;
+
+    int t8  = t8a  + t9a;
+    int t9  = t8a  - t9a;
+    int t10 = t11a - t10a;
+    int t11 = t11a + t10a;
+    int t12 = t12a + t13a;
+    int t13 = t12a - t13a;
+    int t14 = t15a - t14a;
+    int t15 = t15a + t14a;
+
+    t9a  = (  t14 * 1567 - t9  * 3784  + 2048) >> 12;
+    t14a = (  t14 * 3784 + t9  * 1567  + 2048) >> 12;
+    t10a = (-(t13 * 3784 + t10 * 1567) + 2048) >> 12;
+    t13a = (  t13 * 1567 - t10 * 3784  + 2048) >> 12;
+
+    t8a  = t8   + t11;
+    t9   = t9a  + t10a;
+    t10  = t9a  - t10a;
+    t11a = t8   - t11;
+    t12a = t15  - t12;
+    t13  = t14a - t13a;
+    t14  = t14a + t13a;
+    t15a = t15  + t12;
+
+    t10a = ((t13  - t10)  * 2896 + 2048) >> 12;
+    t13a = ((t13  + t10)  * 2896 + 2048) >> 12;
+    t11  = ((t12a - t11a) * 2896 + 2048) >> 12;
+    t12  = ((t12a + t11a) * 2896 + 2048) >> 12;
+
+    out[ 0 * out_s] = tmp[0] + t15a;
+    out[ 1 * out_s] = tmp[1] + t14;
+    out[ 2 * out_s] = tmp[2] + t13a;
+    out[ 3 * out_s] = tmp[3] + t12;
+    out[ 4 * out_s] = tmp[4] + t11;
+    out[ 5 * out_s] = tmp[5] + t10a;
+    out[ 6 * out_s] = tmp[6] + t9;
+    out[ 7 * out_s] = tmp[7] + t8a;
+    out[ 8 * out_s] = tmp[7] - t8a;
+    out[ 9 * out_s] = tmp[6] - t9;
+    out[10 * out_s] = tmp[5] - t10a;
+    out[11 * out_s] = tmp[4] - t11;
+    out[12 * out_s] = tmp[3] - t12;
+    out[13 * out_s] = tmp[2] - t13a;
+    out[14 * out_s] = tmp[1] - t14;
+    out[15 * out_s] = tmp[0] - t15a;
+}
+
+static void __attribute__((noinline))
+inv_dct32_1d(const coef *const in, const ptrdiff_t in_s,
+             coef *const out, const ptrdiff_t out_s)
+{
+    coef tmp[16];
+
+    inv_dct16_1d(in, in_s * 2, tmp, 1);
+
+    const int in1  = in[ 1 * in_s], in3  = in[ 3 * in_s];
+    const int in5  = in[ 5 * in_s], in7  = in[ 7 * in_s];
+    const int in9  = in[ 9 * in_s], in11 = in[11 * in_s];
+    const int in13 = in[13 * in_s], in15 = in[15 * in_s];
+    const int in17 = in[17 * in_s], in19 = in[19 * in_s];
+    const int in21 = in[21 * in_s], in23 = in[23 * in_s];
+    const int in25 = in[25 * in_s], in27 = in[27 * in_s];
+    const int in29 = in[29 * in_s], in31 = in[31 * in_s];
+
+    int t16a = (in1  *  201 - in31 * 4091 + 2048) >> 12;
+    int t31a = (in1  * 4091 + in31 *  201 + 2048) >> 12;
+    int t17a = (in17 * 3035 - in15 * 2751 + 2048) >> 12;
+    int t30a = (in17 * 2751 + in15 * 3035 + 2048) >> 12;
+    int t18a = (in9  * 1751 - in23 * 3703 + 2048) >> 12;
+    int t29a = (in9  * 3703 + in23 * 1751 + 2048) >> 12;
+    int t19a = (in25 * 3857 - in7  * 1380 + 2048) >> 12;
+    int t28a = (in25 * 1380 + in7  * 3857 + 2048) >> 12;
+    int t20a = (in5  *  995 - in27 * 3973 + 2048) >> 12;
+    int t27a = (in5  * 3973 + in27 *  995 + 2048) >> 12;
+    int t21a = (in21 * 3513 - in11 * 2106 + 2048) >> 12;
+    int t26a = (in21 * 2106 + in11 * 3513 + 2048) >> 12;
+    int t22a = (in13 * 2440 - in19 * 3290 + 2048) >> 12;
+    int t25a = (in13 * 3290 + in19 * 2440 + 2048) >> 12;
+    int t23a = (in29 * 4052 - in3  *  601 + 2048) >> 12;
+    int t24a = (in29 *  601 + in3  * 4052 + 2048) >> 12;
+
+    int t16 = t16a + t17a;
+    int t17 = t16a - t17a;
+    int t18 = t19a - t18a;
+    int t19 = t19a + t18a;
+    int t20 = t20a + t21a;
+    int t21 = t20a - t21a;
+    int t22 = t23a - t22a;
+    int t23 = t23a + t22a;
+    int t24 = t24a + t25a;
+    int t25 = t24a - t25a;
+    int t26 = t27a - t26a;
+    int t27 = t27a + t26a;
+    int t28 = t28a + t29a;
+    int t29 = t28a - t29a;
+    int t30 = t31a - t30a;
+    int t31 = t31a + t30a;
+
+    t17a = (  t30 *  799 - t17 * 4017  + 2048) >> 12;
+    t30a = (  t30 * 4017 + t17 *  799  + 2048) >> 12;
+    t18a = (-(t29 * 4017 + t18 *  799) + 2048) >> 12;
+    t29a = (  t29 *  799 - t18 * 4017  + 2048) >> 12;
+    t21a = (  t26 * 3406 - t21 * 2276  + 2048) >> 12;
+    t26a = (  t26 * 2276 + t21 * 3406  + 2048) >> 12;
+    t22a = (-(t25 * 2276 + t22 * 3406) + 2048) >> 12;
+    t25a = (  t25 * 3406 - t22 * 2276  + 2048) >> 12;
+
+    t16a = t16  + t19;
+    t17  = t17a + t18a;
+    t18  = t17a - t18a;
+    t19a = t16  - t19;
+    t20a = t23  - t20;
+    t21  = t22a - t21a;
+    t22  = t22a + t21a;
+    t23a = t23  + t20;
+    t24a = t24  + t27;
+    t25  = t25a + t26a;
+    t26  = t25a - t26a;
+    t27a = t24  - t27;
+    t28a = t31  - t28;
+    t29  = t30a - t29a;
+    t30  = t30a + t29a;
+    t31a = t31  + t28;
+
+    t18a = (  t29  * 1567 - t18  * 3784  + 2048) >> 12;
+    t29a = (  t29  * 3784 + t18  * 1567  + 2048) >> 12;
+    t19  = (  t28a * 1567 - t19a * 3784  + 2048) >> 12;
+    t28  = (  t28a * 3784 + t19a * 1567  + 2048) >> 12;
+    t20  = (-(t27a * 3784 + t20a * 1567) + 2048) >> 12;
+    t27  = (  t27a * 1567 - t20a * 3784  + 2048) >> 12;
+    t21a = (-(t26  * 3784 + t21  * 1567) + 2048) >> 12;
+    t26a = (  t26  * 1567 - t21  * 3784  + 2048) >> 12;
+
+    t16  = t16a + t23a;
+    t17a = t17  + t22;
+    t18  = t18a + t21a;
+    t19a = t19  + t20;
+    t20a = t19  - t20;
+    t21  = t18a - t21a;
+    t22a = t17  - t22;
+    t23  = t16a - t23a;
+    t24  = t31a - t24a;
+    t25a = t30  - t25;
+    t26  = t29a - t26a;
+    t27a = t28  - t27;
+    t28a = t28  + t27;
+    t29  = t29a + t26a;
+    t30a = t30  + t25;
+    t31  = t31a + t24a;
+
+    t20  = ((t27a - t20a) * 2896 + 2048) >> 12;
+    t27  = ((t27a + t20a) * 2896 + 2048) >> 12;
+    t21a = ((t26  - t21 ) * 2896 + 2048) >> 12;
+    t26a = ((t26  + t21 ) * 2896 + 2048) >> 12;
+    t22  = ((t25a - t22a) * 2896 + 2048) >> 12;
+    t25  = ((t25a + t22a) * 2896 + 2048) >> 12;
+    t23a = ((t24  - t23 ) * 2896 + 2048) >> 12;
+    t24a = ((t24  + t23 ) * 2896 + 2048) >> 12;
+
+    out[ 0 * out_s] = tmp[ 0] + t31;
+    out[ 1 * out_s] = tmp[ 1] + t30a;
+    out[ 2 * out_s] = tmp[ 2] + t29;
+    out[ 3 * out_s] = tmp[ 3] + t28a;
+    out[ 4 * out_s] = tmp[ 4] + t27;
+    out[ 5 * out_s] = tmp[ 5] + t26a;
+    out[ 6 * out_s] = tmp[ 6] + t25;
+    out[ 7 * out_s] = tmp[ 7] + t24a;
+    out[ 8 * out_s] = tmp[ 8] + t23a;
+    out[ 9 * out_s] = tmp[ 9] + t22;
+    out[10 * out_s] = tmp[10] + t21a;
+    out[11 * out_s] = tmp[11] + t20;
+    out[12 * out_s] = tmp[12] + t19a;
+    out[13 * out_s] = tmp[13] + t18;
+    out[14 * out_s] = tmp[14] + t17a;
+    out[15 * out_s] = tmp[15] + t16;
+    out[16 * out_s] = tmp[15] - t16;
+    out[17 * out_s] = tmp[14] - t17a;
+    out[18 * out_s] = tmp[13] - t18;
+    out[19 * out_s] = tmp[12] - t19a;
+    out[20 * out_s] = tmp[11] - t20;
+    out[21 * out_s] = tmp[10] - t21a;
+    out[22 * out_s] = tmp[ 9] - t22;
+    out[23 * out_s] = tmp[ 8] - t23a;
+    out[24 * out_s] = tmp[ 7] - t24a;
+    out[25 * out_s] = tmp[ 6] - t25;
+    out[26 * out_s] = tmp[ 5] - t26a;
+    out[27 * out_s] = tmp[ 4] - t27;
+    out[28 * out_s] = tmp[ 3] - t28a;
+    out[29 * out_s] = tmp[ 2] - t29;
+    out[30 * out_s] = tmp[ 1] - t30a;
+    out[31 * out_s] = tmp[ 0] - t31;
+}
+
+static void __attribute__((noinline))
+inv_dct64_1d(const coef *const in, const ptrdiff_t in_s,
+             coef *const out, const ptrdiff_t out_s)
+{
+    coef tmp[32];
+
+    inv_dct32_1d(in, in_s * 2, tmp, 1);
+
+    const int in1  = in[ 1 * in_s], in3  = in[ 3 * in_s];
+    const int in5  = in[ 5 * in_s], in7  = in[ 7 * in_s];
+    const int in9  = in[ 9 * in_s], in11 = in[11 * in_s];
+    const int in13 = in[13 * in_s], in15 = in[15 * in_s];
+    const int in17 = in[17 * in_s], in19 = in[19 * in_s];
+    const int in21 = in[21 * in_s], in23 = in[23 * in_s];
+    const int in25 = in[25 * in_s], in27 = in[27 * in_s];
+    const int in29 = in[29 * in_s], in31 = in[31 * in_s];
+    const int in33 = in[33 * in_s], in35 = in[35 * in_s];
+    const int in37 = in[37 * in_s], in39 = in[39 * in_s];
+    const int in41 = in[41 * in_s], in43 = in[43 * in_s];
+    const int in45 = in[45 * in_s], in47 = in[47 * in_s];
+    const int in49 = in[49 * in_s], in51 = in[51 * in_s];
+    const int in53 = in[53 * in_s], in55 = in[55 * in_s];
+    const int in57 = in[57 * in_s], in59 = in[59 * in_s];
+    const int in61 = in[61 * in_s], in63 = in[63 * in_s];
+
+    int t32a = (in1  *  101 - in63 * 4095 + 2048) >> 12;
+    int t33a = (in33 * 2967 - in31 * 2824 + 2048) >> 12;
+    int t34a = (in17 * 1660 - in47 * 3745 + 2048) >> 12;
+    int t35a = (in49 * 3822 - in15 * 1474 + 2048) >> 12;
+    int t36a = (in9  *  897 - in55 * 3996 + 2048) >> 12;
+    int t37a = (in41 * 3461 - in23 * 2191 + 2048) >> 12;
+    int t38a = (in25 * 2359 - in39 * 3349 + 2048) >> 12;
+    int t39a = (in57 * 4036 - in7  *  700 + 2048) >> 12;
+    int t40a = (in5  *  501 - in59 * 4065 + 2048) >> 12;
+    int t41a = (in37 * 3229 - in27 * 2520 + 2048) >> 12;
+    int t42a = (in21 * 2019 - in43 * 3564 + 2048) >> 12;
+    int t43a = (in53 * 3948 - in11 * 1092 + 2048) >> 12;
+    int t44a = (in13 * 1285 - in51 * 3889 + 2048) >> 12;
+    int t45a = (in45 * 3659 - in19 * 1842 + 2048) >> 12;
+    int t46a = (in29 * 2675 - in35 * 3102 + 2048) >> 12;
+    int t47a = (in61 * 4085 - in3  *  301 + 2048) >> 12;
+    int t48a = (in61 *  301 + in3  * 4085 + 2048) >> 12;
+    int t49a = (in29 * 3102 + in35 * 2675 + 2048) >> 12;
+    int t50a = (in45 * 1842 + in19 * 3659 + 2048) >> 12;
+    int t51a = (in13 * 3889 + in51 * 1285 + 2048) >> 12;
+    int t52a = (in53 * 1092 + in11 * 3948 + 2048) >> 12;
+    int t53a = (in21 * 3564 + in43 * 2019 + 2048) >> 12;
+    int t54a = (in37 * 2520 + in27 * 3229 + 2048) >> 12;
+    int t55a = (in5  * 4065 + in59 *  501 + 2048) >> 12;
+    int t56a = (in57 *  700 + in7  * 4036 + 2048) >> 12;
+    int t57a = (in25 * 3349 + in39 * 2359 + 2048) >> 12;
+    int t58a = (in41 * 2191 + in23 * 3461 + 2048) >> 12;
+    int t59a = (in9  * 3996 + in55 *  897 + 2048) >> 12;
+    int t60a = (in49 * 1474 + in15 * 3822 + 2048) >> 12;
+    int t61a = (in17 * 3745 + in47 * 1660 + 2048) >> 12;
+    int t62a = (in33 * 2824 + in31 * 2967 + 2048) >> 12;
+    int t63a = (in1  * 4095 + in63 *  101 + 2048) >> 12;
+
+    int t32 = t32a + t33a;
+    int t33 = t32a - t33a;
+    int t34 = t35a - t34a;
+    int t35 = t35a + t34a;
+    int t36 = t36a + t37a;
+    int t37 = t36a - t37a;
+    int t38 = t39a - t38a;
+    int t39 = t39a + t38a;
+    int t40 = t40a + t41a;
+    int t41 = t40a - t41a;
+    int t42 = t43a - t42a;
+    int t43 = t43a + t42a;
+    int t44 = t44a + t45a;
+    int t45 = t44a - t45a;
+    int t46 = t47a - t46a;
+    int t47 = t47a + t46a;
+    int t48 = t48a + t49a;
+    int t49 = t48a - t49a;
+    int t50 = t51a - t50a;
+    int t51 = t51a + t50a;
+    int t52 = t52a + t53a;
+    int t53 = t52a - t53a;
+    int t54 = t55a - t54a;
+    int t55 = t55a + t54a;
+    int t56 = t56a + t57a;
+    int t57 = t56a - t57a;
+    int t58 = t59a - t58a;
+    int t59 = t59a + t58a;
+    int t60 = t60a + t61a;
+    int t61 = t60a - t61a;
+    int t62 = t63a - t62a;
+    int t63 = t63a + t62a;
+
+    t33a = (t33 * -4076 + t62 *   401 + 2048) >> 12;
+    t34a = (t34 * - 401 + t61 * -4076 + 2048) >> 12;
+    t37a = (t37 * -2598 + t58 *  3166 + 2048) >> 12;
+    t38a = (t38 * -3166 + t57 * -2598 + 2048) >> 12;
+    t41a = (t41 * -3612 + t54 *  1931 + 2048) >> 12;
+    t42a = (t42 * -1931 + t53 * -3612 + 2048) >> 12;
+    t45a = (t45 * -1189 + t50 *  3920 + 2048) >> 12;
+    t46a = (t46 * -3920 + t49 * -1189 + 2048) >> 12;
+    t49a = (t46 * -1189 + t49 *  3920 + 2048) >> 12;
+    t50a = (t45 *  3920 + t50 *  1189 + 2048) >> 12;
+    t53a = (t42 * -3612 + t53 *  1931 + 2048) >> 12;
+    t54a = (t41 *  1931 + t54 *  3612 + 2048) >> 12;
+    t57a = (t38 * -2598 + t57 *  3166 + 2048) >> 12;
+    t58a = (t37 *  3166 + t58 *  2598 + 2048) >> 12;
+    t61a = (t34 * -4076 + t61 *   401 + 2048) >> 12;
+    t62a = (t33 *   401 + t62 *  4076 + 2048) >> 12;
+
+    t32a = t32  + t35;
+    t33  = t33a + t34a;
+    t34  = t33a - t34a;
+    t35a = t32  - t35;
+    t36a = t39  - t36;
+    t37  = t38a - t37a;
+    t38  = t38a + t37a;
+    t39a = t39  + t36;
+    t40a = t40  + t43;
+    t41  = t41a + t42a;
+    t42  = t41a - t42a;
+    t43a = t40  - t43;
+    t44a = t47  - t44;
+    t45  = t46a - t45a;
+    t46  = t46a + t45a;
+    t47a = t47  + t44;
+    t48a = t48  + t51;
+    t49  = t49a + t50a;
+    t50  = t49a - t50a;
+    t51a = t48  - t51;
+    t52a = t55  - t52;
+    t53  = t54a - t53a;
+    t54  = t54a + t53a;
+    t55a = t55  + t52;
+    t56a = t56  + t59;
+    t57  = t57a + t58a;
+    t58  = t57a - t58a;
+    t59a = t56  - t59;
+    t60a = t63  - t60;
+    t61  = t62a - t61a;
+    t62  = t62a + t61a;
+    t63a = t63  + t60;
+
+    t34a = (t34  * -4017 + t61  *   799 + 2048) >> 12;
+    t35  = (t35a * -4017 + t60a *   799 + 2048) >> 12;
+    t36  = (t36a * - 799 + t59a * -4017 + 2048) >> 12;
+    t37a = (t37  * - 799 + t58  * -4017 + 2048) >> 12;
+    t42a = (t42  * -2276 + t53  *  3406 + 2048) >> 12;
+    t43  = (t43a * -2276 + t52a *  3406 + 2048) >> 12;
+    t44  = (t44a * -3406 + t51a * -2276 + 2048) >> 12;
+    t45a = (t45  * -3406 + t50  * -2276 + 2048) >> 12;
+    t50a = (t45  * -2276 + t50  *  3406 + 2048) >> 12;
+    t51  = (t44a * -2276 + t51a *  3406 + 2048) >> 12;
+    t52  = (t43a *  3406 + t52a *  2276 + 2048) >> 12;
+    t53a = (t42  *  3406 + t53  *  2276 + 2048) >> 12;
+    t58a = (t37  * -4017 + t58  *   799 + 2048) >> 12;
+    t59  = (t36a * -4017 + t59a *   799 + 2048) >> 12;
+    t60  = (t35a *   799 + t60a *  4017 + 2048) >> 12;
+    t61a = (t34  *   799 + t61  *  4017 + 2048) >> 12;
+
+    t32  = t32a + t39a;
+    t33a = t33  + t38;
+    t34  = t34a + t37a;
+    t35a = t35  + t36;
+    t36a = t35  - t36;
+    t37  = t34a - t37a;
+    t38a = t33  - t38;
+    t39  = t32a - t39a;
+    t40  = t47a - t40a;
+    t41a = t46  - t41;
+    t42  = t45a - t42a;
+    t43a = t44  - t43;
+    t44a = t44  + t43;
+    t45  = t45a + t42a;
+    t46a = t46  + t41;
+    t47  = t47a + t40a;
+    t48  = t48a + t55a;
+    t49a = t49  + t54;
+    t50  = t50a + t53a;
+    t51a = t51  + t52;
+    t52a = t51  - t52;
+    t53  = t50a - t53a;
+    t54a = t49  - t54;
+    t55  = t48a - t55a;
+    t56  = t63a - t56a;
+    t57a = t62  - t57;
+    t58  = t61a - t58a;
+    t59a = t60  - t59;
+    t60a = t60  + t59;
+    t61  = t61a + t58a;
+    t62a = t62  + t57;
+    t63  = t63a + t56a;
+
+    t36  = (t36a * -3784 + t59a *  1567 + 2048) >> 12;
+    t37a = (t37  * -3784 + t58  *  1567 + 2048) >> 12;
+    t38  = (t38a * -3784 + t57a *  1567 + 2048) >> 12;
+    t39a = (t39  * -3784 + t56  *  1567 + 2048) >> 12;
+    t40a = (t40  * -1567 + t55  * -3784 + 2048) >> 12;
+    t41  = (t41a * -1567 + t54a * -3784 + 2048) >> 12;
+    t42a = (t42  * -1567 + t53  * -3784 + 2048) >> 12;
+    t43  = (t43a * -1567 + t52a * -3784 + 2048) >> 12;
+    t52  = (t43a * -3784 + t52a *  1567 + 2048) >> 12;
+    t53a = (t42  * -3784 + t53  *  1567 + 2048) >> 12;
+    t54  = (t41a * -3784 + t54a *  1567 + 2048) >> 12;
+    t55a = (t40  * -3784 + t55  *  1567 + 2048) >> 12;
+    t56a = (t39  *  1567 + t56  *  3784 + 2048) >> 12;
+    t57  = (t38a *  1567 + t57a *  3784 + 2048) >> 12;
+    t58a = (t37  *  1567 + t58  *  3784 + 2048) >> 12;
+    t59  = (t36a *  1567 + t59a *  3784 + 2048) >> 12;
+
+    t32a = t32  + t47;
+    t33  = t33a + t46a;
+    t34a = t34  + t45;
+    t35  = t35a + t44a;
+    t36a = t36  + t43;
+    t37  = t37a + t42a;
+    t38a = t38  + t41;
+    t39  = t39a + t40a;
+    t40  = t39a - t40a;
+    t41a = t38  - t41;
+    t42  = t37a - t42a;
+    t43a = t36  - t43;
+    t44  = t35a - t44a;
+    t45a = t34  - t45;
+    t46  = t33a - t46a;
+    t47a = t32  - t47;
+    t48a = t63  - t48;
+    t49  = t62a - t49a;
+    t50a = t61  - t50;
+    t51  = t60a - t51a;
+    t52a = t59  - t52;
+    t53  = t58a - t53a;
+    t54a = t57  - t54;
+    t55  = t56a - t55a;
+    t56  = t56a + t55a;
+    t57a = t57  + t54;
+    t58  = t58a + t53a;
+    t59a = t59  + t52;
+    t60  = t60a + t51a;
+    t61a = t61  + t50;
+    t62  = t62a + t49a;
+    t63a = t63  + t48;
+
+    t40a = (t40  * -2896 + t55  * 2896 + 2048) >> 12;
+    t41  = (t41a * -2896 + t54a * 2896 + 2048) >> 12;
+    t42a = (t42  * -2896 + t53  * 2896 + 2048) >> 12;
+    t43  = (t43a * -2896 + t52a * 2896 + 2048) >> 12;
+    t44a = (t44  * -2896 + t51  * 2896 + 2048) >> 12;
+    t45  = (t45a * -2896 + t50a * 2896 + 2048) >> 12;
+    t46a = (t46  * -2896 + t49  * 2896 + 2048) >> 12;
+    t47  = (t47a * -2896 + t48a * 2896 + 2048) >> 12;
+    t48  = (t47a *  2896 + t48a * 2896 + 2048) >> 12;
+    t49a = (t46  *  2896 + t49  * 2896 + 2048) >> 12;
+    t50  = (t45a *  2896 + t50a * 2896 + 2048) >> 12;
+    t51a = (t44  *  2896 + t51  * 2896 + 2048) >> 12;
+    t52  = (t43a *  2896 + t52a * 2896 + 2048) >> 12;
+    t53a = (t42  *  2896 + t53  * 2896 + 2048) >> 12;
+    t54  = (t41a *  2896 + t54a * 2896 + 2048) >> 12;
+    t55a = (t40  *  2896 + t55  * 2896 + 2048) >> 12;
+
+    out[ 0 * out_s] = tmp[ 0] + t63a;
+    out[ 1 * out_s] = tmp[ 1] + t62;
+    out[ 2 * out_s] = tmp[ 2] + t61a;
+    out[ 3 * out_s] = tmp[ 3] + t60;
+    out[ 4 * out_s] = tmp[ 4] + t59a;
+    out[ 5 * out_s] = tmp[ 5] + t58;
+    out[ 6 * out_s] = tmp[ 6] + t57a;
+    out[ 7 * out_s] = tmp[ 7] + t56;
+    out[ 8 * out_s] = tmp[ 8] + t55a;
+    out[ 9 * out_s] = tmp[ 9] + t54;
+    out[10 * out_s] = tmp[10] + t53a;
+    out[11 * out_s] = tmp[11] + t52;
+    out[12 * out_s] = tmp[12] + t51a;
+    out[13 * out_s] = tmp[13] + t50;
+    out[14 * out_s] = tmp[14] + t49a;
+    out[15 * out_s] = tmp[15] + t48;
+    out[16 * out_s] = tmp[16] + t47;
+    out[17 * out_s] = tmp[17] + t46a;
+    out[18 * out_s] = tmp[18] + t45;
+    out[19 * out_s] = tmp[19] + t44a;
+    out[20 * out_s] = tmp[20] + t43;
+    out[21 * out_s] = tmp[21] + t42a;
+    out[22 * out_s] = tmp[22] + t41;
+    out[23 * out_s] = tmp[23] + t40a;
+    out[24 * out_s] = tmp[24] + t39;
+    out[25 * out_s] = tmp[25] + t38a;
+    out[26 * out_s] = tmp[26] + t37;
+    out[27 * out_s] = tmp[27] + t36a;
+    out[28 * out_s] = tmp[28] + t35;
+    out[29 * out_s] = tmp[29] + t34a;
+    out[30 * out_s] = tmp[30] + t33;
+    out[31 * out_s] = tmp[31] + t32a;
+    out[32 * out_s] = tmp[31] - t32a;
+    out[33 * out_s] = tmp[30] - t33;
+    out[34 * out_s] = tmp[29] - t34a;
+    out[35 * out_s] = tmp[28] - t35;
+    out[36 * out_s] = tmp[27] - t36a;
+    out[37 * out_s] = tmp[26] - t37;
+    out[38 * out_s] = tmp[25] - t38a;
+    out[39 * out_s] = tmp[24] - t39;
+    out[40 * out_s] = tmp[23] - t40a;
+    out[41 * out_s] = tmp[22] - t41;
+    out[42 * out_s] = tmp[21] - t42a;
+    out[43 * out_s] = tmp[20] - t43;
+    out[44 * out_s] = tmp[19] - t44a;
+    out[45 * out_s] = tmp[18] - t45;
+    out[46 * out_s] = tmp[17] - t46a;
+    out[47 * out_s] = tmp[16] - t47;
+    out[48 * out_s] = tmp[15] - t48;
+    out[49 * out_s] = tmp[14] - t49a;
+    out[50 * out_s] = tmp[13] - t50;
+    out[51 * out_s] = tmp[12] - t51a;
+    out[52 * out_s] = tmp[11] - t52;
+    out[53 * out_s] = tmp[10] - t53a;
+    out[54 * out_s] = tmp[ 9] - t54;
+    out[55 * out_s] = tmp[ 8] - t55a;
+    out[56 * out_s] = tmp[ 7] - t56;
+    out[57 * out_s] = tmp[ 6] - t57a;
+    out[58 * out_s] = tmp[ 5] - t58;
+    out[59 * out_s] = tmp[ 4] - t59a;
+    out[60 * out_s] = tmp[ 3] - t60;
+    out[61 * out_s] = tmp[ 2] - t61a;
+    out[62 * out_s] = tmp[ 1] - t62;
+    out[63 * out_s] = tmp[ 0] - t63a;
+}
+
+static void __attribute__((noinline))
+inv_adst4_1d(const coef *const in, const ptrdiff_t in_s,
+             coef *const out, const ptrdiff_t out_s)
+{
+    const int in0 = in[0 * in_s], in1 = in[1 * in_s];
+    const int in2 = in[2 * in_s], in3 = in[3 * in_s];
+
+    int t0 = 1321 * in0 + 3803 * in2 + 2482 * in3;
+    int t1 = 2482 * in0 - 1321 * in2 - 3803 * in3;
+    int t2 = 3344 * (in0 - in2 + in3);
+    int t3 = 3344 * in1;
+
+    out[0 * out_s] = (t0 + t3      + 2048) >> 12;
+    out[1 * out_s] = (t1 + t3      + 2048) >> 12;
+    out[2 * out_s] = (t2           + 2048) >> 12;
+    out[3 * out_s] = (t0 + t1 - t3 + 2048) >> 12;
+}
+
+static void __attribute__((noinline))
+inv_adst8_1d(const coef *const in, const ptrdiff_t in_s,
+             coef *const out, const ptrdiff_t out_s)
+{
+    const int in0 = in[0 * in_s], in1 = in[1 * in_s];
+    const int in2 = in[2 * in_s], in3 = in[3 * in_s];
+    const int in4 = in[4 * in_s], in5 = in[5 * in_s];
+    const int in6 = in[6 * in_s], in7 = in[7 * in_s];
+
+    int t0a = (4076 * in7 +  401 * in0 + 2048) >> 12;
+    int t1a = ( 401 * in7 - 4076 * in0 + 2048) >> 12;
+    int t2a = (3612 * in5 + 1931 * in2 + 2048) >> 12;
+    int t3a = (1931 * in5 - 3612 * in2 + 2048) >> 12;
+    int t4a = (2598 * in3 + 3166 * in4 + 2048) >> 12;
+    int t5a = (3166 * in3 - 2598 * in4 + 2048) >> 12;
+    int t6a = (1189 * in1 + 3920 * in6 + 2048) >> 12;
+    int t7a = (3920 * in1 - 1189 * in6 + 2048) >> 12;
+
+    int t0 = t0a + t4a;
+    int t1 = t1a + t5a;
+    int t2 = t2a + t6a;
+    int t3 = t3a + t7a;
+    int t4 = t0a - t4a;
+    int t5 = t1a - t5a;
+    int t6 = t2a - t6a;
+    int t7 = t3a - t7a;
+
+    t4a = (3784 * t4 + 1567 * t5 + 2048) >> 12;
+    t5a = (1567 * t4 - 3784 * t5 + 2048) >> 12;
+    t6a = (3784 * t7 - 1567 * t6 + 2048) >> 12;
+    t7a = (1567 * t7 + 3784 * t6 + 2048) >> 12;
+
+    out[0 * out_s] =   t0 + t2;
+    out[7 * out_s] = -(t1 + t3);
+    t2             =   t0 - t2;
+    t3             =   t1 - t3;
+
+    out[1 * out_s] = -(t4a + t6a);
+    out[6 * out_s] =   t5a + t7a;
+    t6             =   t4a - t6a;
+    t7             =   t5a - t7a;
+
+    out[3 * out_s] = -(((t2 + t3) * 2896 + 2048) >> 12);
+    out[4 * out_s] =   ((t2 - t3) * 2896 + 2048) >> 12;
+    out[2 * out_s] =   ((t6 + t7) * 2896 + 2048) >> 12;
+    out[5 * out_s] = -(((t6 - t7) * 2896 + 2048) >> 12);
+}
+
+static void __attribute__((noinline))
+inv_adst16_1d(const coef *const in, const ptrdiff_t in_s,
+              coef *const out, const ptrdiff_t out_s)
+{
+    const int in0  = in[ 0 * in_s], in1  = in[ 1 * in_s];
+    const int in2  = in[ 2 * in_s], in3  = in[ 3 * in_s];
+    const int in4  = in[ 4 * in_s], in5  = in[ 5 * in_s];
+    const int in6  = in[ 6 * in_s], in7  = in[ 7 * in_s];
+    const int in8  = in[ 8 * in_s], in9  = in[ 9 * in_s];
+    const int in10 = in[10 * in_s], in11 = in[11 * in_s];
+    const int in12 = in[12 * in_s], in13 = in[13 * in_s];
+    const int in14 = in[14 * in_s], in15 = in[15 * in_s];
+
+    int t0  = (in15 * 4091 + in0  *  201 + 2048) >> 12;
+    int t1  = (in15 *  201 - in0  * 4091 + 2048) >> 12;
+    int t2  = (in13 * 3973 + in2  *  995 + 2048) >> 12;
+    int t3  = (in13 *  995 - in2  * 3973 + 2048) >> 12;
+    int t4  = (in11 * 3703 + in4  * 1751 + 2048) >> 12;
+    int t5  = (in11 * 1751 - in4  * 3703 + 2048) >> 12;
+    int t6  = (in9  * 3290 + in6  * 2440 + 2048) >> 12;
+    int t7  = (in9  * 2440 - in6  * 3290 + 2048) >> 12;
+    int t8  = (in7  * 2751 + in8  * 3035 + 2048) >> 12;
+    int t9  = (in7  * 3035 - in8  * 2751 + 2048) >> 12;
+    int t10 = (in5  * 2106 + in10 * 3513 + 2048) >> 12;
+    int t11 = (in5  * 3513 - in10 * 2106 + 2048) >> 12;
+    int t12 = (in3  * 1380 + in12 * 3857 + 2048) >> 12;
+    int t13 = (in3  * 3857 - in12 * 1380 + 2048) >> 12;
+    int t14 = (in1  *  601 + in14 * 4052 + 2048) >> 12;
+    int t15 = (in1  * 4052 - in14 *  601 + 2048) >> 12;
+
+    int t0a  = t0 + t8;
+    int t1a  = t1 + t9;
+    int t2a  = t2 + t10;
+    int t3a  = t3 + t11;
+    int t4a  = t4 + t12;
+    int t5a  = t5 + t13;
+    int t6a  = t6 + t14;
+    int t7a  = t7 + t15;
+    int t8a  = t0 - t8;
+    int t9a  = t1 - t9;
+    int t10a = t2 - t10;
+    int t11a = t3 - t11;
+    int t12a = t4 - t12;
+    int t13a = t5 - t13;
+    int t14a = t6 - t14;
+    int t15a = t7 - t15;
+
+    t8   = (t8a  * 4017 + t9a  *  799 + 2048) >> 12;
+    t9   = (t8a  *  799 - t9a  * 4017 + 2048) >> 12;
+    t10  = (t10a * 2276 + t11a * 3406 + 2048) >> 12;
+    t11  = (t10a * 3406 - t11a * 2276 + 2048) >> 12;
+    t12  = (t13a * 4017 - t12a *  799 + 2048) >> 12;
+    t13  = (t13a *  799 + t12a * 4017 + 2048) >> 12;
+    t14  = (t15a * 2276 - t14a * 3406 + 2048) >> 12;
+    t15  = (t15a * 3406 + t14a * 2276 + 2048) >> 12;
+
+    t0   = t0a + t4a;
+    t1   = t1a + t5a;
+    t2   = t2a + t6a;
+    t3   = t3a + t7a;
+    t4   = t0a - t4a;
+    t5   = t1a - t5a;
+    t6   = t2a - t6a;
+    t7   = t3a - t7a;
+    t8a  = t8  + t12;
+    t9a  = t9  + t13;
+    t10a = t10 + t14;
+    t11a = t11 + t15;
+    t12a = t8  - t12;
+    t13a = t9  - t13;
+    t14a = t10 - t14;
+    t15a = t11 - t15;
+
+    t4a  = (t4   * 3784 + t5   * 1567 + 2048) >> 12;
+    t5a  = (t4   * 1567 - t5   * 3784 + 2048) >> 12;
+    t6a  = (t7   * 3784 - t6   * 1567 + 2048) >> 12;
+    t7a  = (t7   * 1567 + t6   * 3784 + 2048) >> 12;
+    t12  = (t12a * 3784 + t13a * 1567 + 2048) >> 12;
+    t13  = (t12a * 1567 - t13a * 3784 + 2048) >> 12;
+    t14  = (t15a * 3784 - t14a * 1567 + 2048) >> 12;
+    t15  = (t15a * 1567 + t14a * 3784 + 2048) >> 12;
+
+    out[ 0 * out_s] =   t0  + t2;
+    out[15 * out_s] = -(t1  + t3);
+    t2a             =   t0  - t2;
+    t3a             =   t1  - t3;
+    out[ 3 * out_s] = -(t4a + t6a);
+    out[12 * out_s] =   t5a + t7a;
+    t6              =   t4a - t6a;
+    t7              =   t5a - t7a;
+    out[ 1 * out_s] = -(t8a + t10a);
+    out[14 * out_s] =   t9a + t11a;
+    t10             =   t8a - t10a;
+    t11             =   t9a - t11a;
+    out[ 2 * out_s] =   t12 + t14;
+    out[13 * out_s] = -(t13 + t15);
+    t14a            =   t12 - t14;
+    t15a            =   t13 - t15;
+
+    out[ 7 * out_s] = -(((t2a  + t3a)  * 2896 + 2048) >> 12);
+    out[ 8 * out_s] =   ((t2a  - t3a)  * 2896 + 2048) >> 12;
+    out[ 4 * out_s] =   ((t6   + t7)   * 2896 + 2048) >> 12;
+    out[11 * out_s] = -(((t6   - t7)   * 2896 + 2048) >> 12);
+    out[ 6 * out_s] =   ((t10  + t11)  * 2896 + 2048) >> 12;
+    out[ 9 * out_s] = -(((t10  - t11)  * 2896 + 2048) >> 12);
+    out[ 5 * out_s] = -(((t14a + t15a) * 2896 + 2048) >> 12);
+    out[10 * out_s] =   ((t14a - t15a) * 2896 + 2048) >> 12;
+}
+
+#define flip_inv_adst(sz) \
+static void inv_flipadst##sz##_1d(const coef *const in, const ptrdiff_t in_s, \
+                                  coef *const out, const ptrdiff_t out_s) \
+{ \
+    inv_adst##sz##_1d(in, in_s, &out[(sz - 1) * out_s], -out_s); \
+}
+
+flip_inv_adst(4);
+flip_inv_adst(8);
+flip_inv_adst(16);
+
+#undef flip_inv_adst
+
+static void __attribute__((noinline))
+inv_identity4_1d(const coef *const in, const ptrdiff_t in_s,
+                 coef *const out, const ptrdiff_t out_s)
+{
+    for (int i = 0; i < 4; i++)
+        out[out_s * i] = (in[in_s * i] * 5793 + 2048) >> 12;
+}
+
+static void __attribute__((noinline))
+inv_identity8_1d(const coef *const in, const ptrdiff_t in_s,
+                 coef *const out, const ptrdiff_t out_s)
+{
+    for (int i = 0; i < 8; i++)
+        out[out_s * i] = in[in_s * i] * 2;
+}
+
+static void __attribute__((noinline))
+inv_identity16_1d(const coef *const in, const ptrdiff_t in_s,
+                  coef *const out, const ptrdiff_t out_s)
+{
+    for (int i = 0; i < 16; i++)
+        out[out_s * i] = (in[in_s * i] * 2 * 5793 + 2048) >> 12;
+}
+
+static void __attribute__((noinline))
+inv_identity32_1d(const coef *const in, const ptrdiff_t in_s,
+                  coef *const out, const ptrdiff_t out_s)
+{
+    for (int i = 0; i < 32; i++)
+        out[out_s * i] = in[in_s * i] * 4;
+}
+
+static void __attribute__((noinline))
+inv_wht4_1d(const coef *const in, const ptrdiff_t in_s,
+            coef *const out, const ptrdiff_t out_s,
+            const int pass)
+{
+    const int sh = 2 * !pass;
+    const int in0 = in[0 * in_s] >> sh, in1 = in[1 * in_s] >> sh;
+    const int in2 = in[2 * in_s] >> sh, in3 = in[3 * in_s] >> sh;
+    const int t0 = in0 + in1;
+    const int t2 = in2 - in3;
+    const int t4 = (t0 - t2) >> 1;
+    const int t3 = t4 - in3;
+    const int t1 = t4 - in1;
+
+    out[0 * out_s] = t0 - t3;
+    out[1 * out_s] = t3;
+    out[2 * out_s] = t1;
+    out[3 * out_s] = t2 + t1;
+}
--- /dev/null
+++ b/src/levels.h
@@ -1,0 +1,525 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LEVELS_H__
+#define __DAV1D_SRC_LEVELS_H__
+
+#include "dav1d/picture.h"
+
+enum ObuType {
+    OBU_SEQ_HDR   = 1,
+    OBU_TD        = 2,
+    OBU_FRAME_HDR = 3,
+    OBU_TILE_GRP  = 4,
+    OBU_METADATA  = 5,
+    OBU_FRAME     = 6,
+    OBU_PADDING   = 15,
+};
+
+enum FrameType {
+    KEY_FRAME       = 0,
+    INTER_FRAME     = 1,
+    INTRAONLY_FRAME = 2,
+    S_FRAME         = 3,
+} FRAME_TYPE;
+
+enum TxfmSize {
+    TX_4X4,
+    TX_8X8,
+    TX_16X16,
+    TX_32X32,
+    TX_64X64,
+    N_TX_SIZES,
+};
+
+enum BlockLevel {
+    BL_128X128,
+    BL_64X64,
+    BL_32X32,
+    BL_16X16,
+    BL_8X8,
+    N_BL_LEVELS,
+};
+
+enum TxfmMode {
+    TX_4X4_ONLY,
+    TX_LARGEST,
+    TX_SWITCHABLE,
+    N_TX_MODES,
+};
+
+enum RectTxfmSize {
+    RTX_4X8 = N_TX_SIZES,
+    RTX_8X4,
+    RTX_8X16,
+    RTX_16X8,
+    RTX_16X32,
+    RTX_32X16,
+    RTX_32X64,
+    RTX_64X32,
+    RTX_4X16,
+    RTX_16X4,
+    RTX_8X32,
+    RTX_32X8,
+    RTX_16X64,
+    RTX_64X16,
+    N_RECT_TX_SIZES
+};
+
+enum TxfmType {
+    DCT_DCT,    // DCT  in both horizontal and vertical
+    ADST_DCT,   // ADST in vertical, DCT in horizontal
+    DCT_ADST,   // DCT  in vertical, ADST in horizontal
+    ADST_ADST,  // ADST in both directions
+    FLIPADST_DCT,
+    DCT_FLIPADST,
+    FLIPADST_FLIPADST,
+    ADST_FLIPADST,
+    FLIPADST_ADST,
+    IDTX,
+    V_DCT,
+    H_DCT,
+    V_ADST,
+    H_ADST,
+    V_FLIPADST,
+    H_FLIPADST,
+    N_TX_TYPES,
+    WHT_WHT = N_TX_TYPES,
+    N_TX_TYPES_PLUS_LL,
+};
+
+enum TxfmTypeSet {
+    TXTP_SET_DCT,
+    TXTP_SET_DCT_ID,
+    TXTP_SET_DT4_ID,
+    TXTP_SET_DT4_ID_1D,
+    TXTP_SET_DT9_ID_1D,
+    TXTP_SET_ALL,
+    TXTP_SET_LOSSLESS,
+    N_TXTP_SETS
+};
+
+enum TxClass {
+    TX_CLASS_2D,
+    TX_CLASS_H,
+    TX_CLASS_V,
+};
+
+enum IntraPredMode {
+    DC_PRED,
+    VERT_PRED,
+    HOR_PRED,
+    DIAG_DOWN_LEFT_PRED,
+    DIAG_DOWN_RIGHT_PRED,
+    VERT_RIGHT_PRED,
+    HOR_DOWN_PRED,
+    HOR_UP_PRED,
+    VERT_LEFT_PRED,
+    SMOOTH_PRED,
+    SMOOTH_V_PRED,
+    SMOOTH_H_PRED,
+    PAETH_PRED,
+    N_INTRA_PRED_MODES,
+    CFL_PRED = N_INTRA_PRED_MODES,
+    N_UV_INTRA_PRED_MODES,
+    N_IMPL_INTRA_PRED_MODES = N_UV_INTRA_PRED_MODES,
+    LEFT_DC_PRED = DIAG_DOWN_LEFT_PRED,
+    TOP_DC_PRED,
+    DC_128_PRED,
+    Z1_PRED,
+    Z2_PRED,
+    Z3_PRED,
+    FILTER_PRED = N_INTRA_PRED_MODES,
+};
+
+enum InterIntraPredMode {
+    II_DC_PRED,
+    II_VERT_PRED,
+    II_HOR_PRED,
+    II_SMOOTH_PRED,
+    N_INTER_INTRA_PRED_MODES,
+};
+
+enum BlockPartition {
+    PARTITION_NONE,     // [ ] <-.
+    PARTITION_H,        // [-]   |
+    PARTITION_V,        // [|]   |
+    PARTITION_SPLIT,    // [+] --'
+    PARTITION_T_TOP_SPLIT,    // [⊥] i.e. split top, H bottom
+    PARTITION_T_BOTTOM_SPLIT, // [т] i.e. H top, split bottom
+    PARTITION_T_LEFT_SPLIT,   // [-|] i.e. split left, V right
+    PARTITION_T_RIGHT_SPLIT,  // [|-] i.e. V left, split right
+    PARTITION_H4,       // [Ⲷ]
+    PARTITION_V4,       // [Ⲽ]
+    N_PARTITIONS,
+    N_SUB8X8_PARTITIONS = PARTITION_T_TOP_SPLIT,
+};
+
+enum BlockSize {
+    BS_128x128,
+    BS_128x64,
+    BS_64x128,
+    BS_64x64,
+    BS_64x32,
+    BS_64x16,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_32x8,
+    BS_16x64,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_16x4,
+    BS_8x32,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x16,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+
+enum FilterMode {
+    FILTER_8TAP_REGULAR,
+    FILTER_8TAP_SMOOTH,
+    FILTER_8TAP_SHARP,
+    N_SWITCHABLE_FILTERS,
+    FILTER_BILINEAR = N_SWITCHABLE_FILTERS,
+    N_FILTERS,
+    FILTER_SWITCHABLE = N_FILTERS,
+};
+
+enum Filter2d { // order is horizontal, vertical
+    FILTER_2D_8TAP_REGULAR,
+    FILTER_2D_8TAP_REGULAR_SMOOTH,
+    FILTER_2D_8TAP_REGULAR_SHARP,
+    FILTER_2D_8TAP_SHARP_REGULAR,
+    FILTER_2D_8TAP_SHARP_SMOOTH,
+    FILTER_2D_8TAP_SHARP,
+    FILTER_2D_8TAP_SMOOTH_REGULAR,
+    FILTER_2D_8TAP_SMOOTH,
+    FILTER_2D_8TAP_SMOOTH_SHARP,
+    FILTER_2D_BILINEAR,
+    N_2D_FILTERS,
+};
+
+enum MVJoint {
+    MV_JOINT_ZERO,
+    MV_JOINT_H,
+    MV_JOINT_V,
+    MV_JOINT_HV,
+    N_MV_JOINTS,
+};
+
+enum InterPredMode {
+    NEARESTMV,
+    NEARMV,
+    GLOBALMV,
+    NEWMV,
+    N_INTER_PRED_MODES,
+};
+
+enum CompInterPredMode {
+    NEARESTMV_NEARESTMV,
+    NEARMV_NEARMV,
+    NEARESTMV_NEWMV,
+    NEWMV_NEARESTMV,
+    NEARMV_NEWMV,
+    NEWMV_NEARMV,
+    GLOBALMV_GLOBALMV,
+    NEWMV_NEWMV,
+    N_COMP_INTER_PRED_MODES,
+};
+
+enum CompInterType {
+    COMP_INTER_NONE,
+    COMP_INTER_WEIGHTED_AVG,
+    COMP_INTER_AVG,
+    COMP_INTER_SEG,
+    COMP_INTER_WEDGE,
+};
+
+enum InterIntraType {
+    INTER_INTRA_NONE,
+    INTER_INTRA_BLEND,
+    INTER_INTRA_WEDGE,
+};
+
+enum AdaptiveBoolean {
+    OFF = 0,
+    ON = 1,
+    ADAPTIVE = 2,
+};
+
+enum RestorationType {
+    RESTORATION_NONE,
+    RESTORATION_SWITCHABLE,
+    RESTORATION_WIENER,
+    RESTORATION_SGRPROJ,
+};
+
+typedef struct mv {
+    int16_t y, x;
+} mv;
+
+enum WarpedMotionType {
+    WM_TYPE_IDENTITY,
+    WM_TYPE_TRANSLATION,
+    WM_TYPE_ROT_ZOOM,
+    WM_TYPE_AFFINE,
+};
+
+typedef struct WarpedMotionParams {
+    enum WarpedMotionType type;
+    int32_t matrix[6];
+    union {
+        struct {
+            int16_t alpha, beta, gamma, delta;
+        };
+        int16_t abcd[4];
+    };
+} WarpedMotionParams;
+
+enum MotionMode {
+    MM_TRANSLATION,
+    MM_OBMC,
+    MM_WARP,
+};
+
+typedef struct Av1SequenceHeader {
+    int profile;
+    int still_picture;
+    int reduced_still_picture_header;
+    int timing_info_present;
+    int num_units_in_tick;
+    int time_scale;
+    int equal_picture_interval;
+    int num_ticks_per_picture;
+    int decoder_model_info_present;
+    int bitrate_scale;
+    int buffer_size_scale;
+    int encoder_decoder_buffer_delay_length;
+    int num_units_in_decoding_tick;
+    int buffer_removal_delay_length;
+    int frame_presentation_delay_length;
+    int display_model_info_present;
+    int num_operating_points;
+    struct Av1SequenceHeaderOperatingPoint {
+        int idc;
+        int major_level, minor_level;
+        int tier;
+        int decoder_model_param_present;
+        int bitrate;
+        int buffer_size;
+        int cbr;
+        int decoder_buffer_delay;
+        int encoder_buffer_delay;
+        int low_delay_mode;
+        int display_model_param_present;
+        int initial_display_delay;
+    } operating_points[32];
+    int max_width, max_height, width_n_bits, height_n_bits;
+    int frame_id_numbers_present;
+    int delta_frame_id_n_bits;
+    int frame_id_n_bits;
+    int sb128;
+    int filter_intra;
+    int intra_edge_filter;
+    int inter_intra;
+    int masked_compound;
+    int warped_motion;
+    int dual_filter;
+    int order_hint;
+    int jnt_comp;
+    int ref_frame_mvs;
+    enum AdaptiveBoolean screen_content_tools;
+    enum AdaptiveBoolean force_integer_mv;
+    int order_hint_n_bits;
+    int super_res;
+    int cdef;
+    int restoration;
+    int bpc;
+    int hbd;
+    int color_description_present;
+    enum Dav1dPixelLayout layout;
+    enum Dav1dColorPrimaries pri;
+    enum Dav1dTransferCharacteristics trc;
+    enum Dav1dMatrixCoefficients mtrx;
+    enum Dav1dChromaSamplePosition chr;
+    int color_range;
+    int separate_uv_delta_q;
+    int film_grain_present;
+} Av1SequenceHeader;
+
+#define NUM_SEGMENTS 8
+
+typedef struct Av1SegmentationData {
+    int delta_q;
+    int delta_lf_y_v, delta_lf_y_h, delta_lf_u, delta_lf_v;
+    int ref;
+    int skip;
+    int globalmv;
+} Av1SegmentationData;
+
+typedef struct Av1SegmentationDataSet {
+    Av1SegmentationData d[NUM_SEGMENTS];
+    int preskip;
+    int last_active_segid;
+} Av1SegmentationDataSet;
+
+typedef struct Av1LoopfilterModeRefDeltas {
+    int mode_delta[2];
+    int ref_delta[8];
+} Av1LoopfilterModeRefDeltas;
+
+typedef struct Av1FilmGrainData {
+    int num_y_points;
+    uint8_t y_points[14][2 /* value, scaling */];
+    int chroma_scaling_from_luma;
+    int num_uv_points[2];
+    uint8_t uv_points[2][10][2 /* value, scaling */];
+    int scaling_shift;
+    int ar_coeff_lag;
+    int8_t ar_coeffs_y[24];
+    int8_t ar_coeffs_uv[2][25];
+    int ar_coeff_shift;
+    int grain_scale_shift;
+    int uv_mult[2];
+    int uv_luma_mult[2];
+    int uv_offset[2];
+    int overlap_flag;
+    int clip_to_restricted_range;
+} Av1FilmGrainData;
+
+typedef struct Av1FrameHeader {
+    int show_existing_frame;
+    int existing_frame_idx;
+    int frame_id;
+    enum FrameType frame_type;
+    int show_frame;
+    int showable_frame;
+    int error_resilient_mode;
+    int disable_cdf_update;
+    int allow_screen_content_tools;
+    int force_integer_mv;
+    int frame_size_override;
+#define PRIMARY_REF_NONE 7
+    int primary_ref_frame;
+    int frame_offset;
+    int refresh_frame_flags;
+    int width, height;
+    int render_width, render_height;
+    int super_res;
+    int have_render_size;
+    int allow_intrabc;
+    int frame_ref_short_signaling;
+    int refidx[7];
+    int hp;
+    enum FilterMode subpel_filter_mode;
+    int switchable_motion_mode;
+    int use_ref_frame_mvs;
+    int refresh_context;
+    struct {
+        int uniform;
+        int n_bytes;
+        int min_log2_cols, max_log2_cols, log2_cols, cols;
+        int col_start_sb[1025];
+        int min_log2_rows, max_log2_rows, log2_rows, rows;
+        int row_start_sb[1025];
+        int update;
+    } tiling;
+    struct {
+        int yac;
+        int ydc_delta;
+        int udc_delta, uac_delta, vdc_delta, vac_delta;
+        int qm, qm_y, qm_u, qm_v;
+    } quant;
+    struct {
+        int enabled, update_map, temporal, update_data;
+        Av1SegmentationDataSet seg_data;
+        int lossless[NUM_SEGMENTS], qidx[NUM_SEGMENTS];
+    } segmentation;
+    int delta_q_present;
+    int delta_q_res_log2;
+    int delta_lf_present;
+    int delta_lf_res_log2;
+    int delta_lf_multi;
+    int all_lossless;
+    struct {
+        int level_y[2];
+        int level_u, level_v;
+        int mode_ref_delta_enabled;
+        int mode_ref_delta_update;
+        Av1LoopfilterModeRefDeltas mode_ref_deltas;
+        int sharpness;
+    } loopfilter;
+    struct {
+        int damping;
+        int n_bits;
+        int y_strength[8];
+        int uv_strength[8];
+    } cdef;
+    struct {
+        enum RestorationType type[3];
+        int unit_size[2];
+    } restoration;
+    enum TxfmMode txfm_mode;
+    int switchable_comp_refs;
+    int skip_mode_allowed, skip_mode_enabled, skip_mode_refs[2];
+    int warp_motion;
+    int reduced_txtp_set;
+    WarpedMotionParams gmv[7];
+    struct {
+        int present, update, seed;
+        Av1FilmGrainData data;
+    } film_grain;
+} Av1FrameHeader;
+
+#define QINDEX_RANGE 256
+
+typedef struct Av1Block {
+    uint8_t bl, bs, bp;
+    uint8_t intra, seg_id, skip_mode, skip, uvtx;
+    union {
+        struct {
+            uint8_t y_mode, uv_mode, tx, pal_sz[2];
+            int8_t y_angle, uv_angle, cfl_alpha[2];
+        }; // intra
+        struct {
+            int8_t ref[2];
+            uint8_t comp_type, wedge_idx, mask_sign, inter_mode, drl_idx;
+            uint8_t interintra_type, interintra_mode, motion_mode;
+            uint8_t max_ytx, filter2d;
+            uint16_t tx_split[2];
+            mv mv[2];
+        }; // inter
+    };
+} Av1Block;
+
+#endif /* __DAV1D_SRC_LEVELS_H__ */
--- /dev/null
+++ b/src/lf_apply.c
@@ -1,0 +1,330 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/lf_apply.h"
+
+static inline int maxifzero(const uint8_t (*const a)[4], const int have_b,
+                            const uint8_t (*const b)[4], const int diridx)
+{
+    const int a_val = (*a)[diridx];
+    if (a_val) return a_val;
+    if (!have_b) return a_val;
+    return (*b)[diridx];
+}
+
+static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
+                                       const int have_left,
+                                       const uint8_t (*lvl)[4],
+                                       const ptrdiff_t b4_stride,
+                                       const uint32_t (*const mask)[3],
+                                       pixel *dst, const ptrdiff_t ls,
+                                       const int starty4, const int endy4)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+
+    // filter edges between columns (e.g. block1 | block2)
+    for (int y = starty4; y < endy4;
+         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
+    {
+        pixel *ptr = dst;
+        const uint8_t (*l)[4] = lvl;
+        const uint32_t *const hmask = mask[y];
+        const unsigned hm = hmask[0] | hmask[1] | hmask[2];
+
+        for (unsigned x = 1; hm & ~(x - 1); l++, x <<= 1, ptr += 4) {
+            if ((have_left || x > 1) && (hm & x)) {
+                const int L = maxifzero(l, have_left || x > 1, &l[-1], 0);
+                if (!L) continue;
+                const int H = L >> 4;
+                const int E = f->lf.lim_lut.e[L], I = f->lf.lim_lut.i[L];
+                const int idx = (hmask[2] & x) ? 2 : !!(hmask[1] & x);
+
+                dsp->lf.loop_filter[idx][0](ptr, ls, E, I, H);
+            }
+        }
+    }
+}
+
+static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
+                                       const int have_top,
+                                       const uint8_t (*lvl)[4],
+                                       const ptrdiff_t b4_stride,
+                                       const uint32_t (*const mask)[3],
+                                       pixel *dst, const ptrdiff_t ls,
+                                       const int starty4, const int endy4)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+
+    //                                 block1
+    // filter edges between rows (e.g. ------)
+    //                                 block2
+    for (int y = starty4; y < endy4;
+         y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
+    {
+        pixel *ptr = dst;
+        const uint8_t (*l)[4] = lvl;
+        const uint32_t *const vmask = mask[y];
+        const unsigned vm = vmask[0] | vmask[1] | vmask[2];
+
+        for (unsigned x = 1; vm & ~(x - 1); x <<= 1, ptr += 4, l++) {
+            if ((have_top || y) && (vm & x)) {
+                const int L = maxifzero(l, have_top || y, &l[-b4_stride], 1);
+                if (!L) continue;
+                const int H = L >> 4;
+                const int E = f->lf.lim_lut.e[L], I = f->lf.lim_lut.i[L];
+                const int idx = (vmask[2] & x) ? 2 : !!(vmask[1] & x);
+
+                dsp->lf.loop_filter[idx][1](ptr, ls, E, I, H);
+            }
+        }
+    }
+}
+
+static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
+                                        const int have_left,
+                                        const uint8_t (*lvl)[4],
+                                        const ptrdiff_t b4_stride,
+                                        const uint32_t (*const mask)[2],
+                                        pixel *const u, pixel *const v,
+                                        const ptrdiff_t ls,
+                                        const int starty4, const int endy4)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+    int y;
+    ptrdiff_t off_l;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int hstep = 1 << ss_hor;
+
+    // filter edges between columns (e.g. block1 | block2)
+    lvl += ss_hor + ss_ver * b4_stride;
+    for (off_l = 0, y = starty4; y < endy4;
+         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride << ss_ver)
+    {
+        ptrdiff_t off = off_l;
+        const uint8_t (*l)[4] = lvl;
+        const uint32_t *const hmask = mask[y];
+        const unsigned hm = hmask[0] | hmask[1];
+
+        for (unsigned x = 1; hm & ~(x - 1); l += hstep, x <<= 1, off += 4) {
+            if ((have_left || x > 1) && (hm & x)) {
+                const int idx = !!(hmask[1] & x);
+
+                const int Lu = maxifzero(l, have_left || x > 1, &l[-hstep], 2);
+                if (Lu) {
+                    const int H = Lu >> 4;
+                    const int E = f->lf.lim_lut.e[Lu], I = f->lf.lim_lut.i[Lu];
+
+                    dsp->lf.loop_filter_uv[idx][0](&u[off], ls, E, I, H);
+                }
+
+                const int Lv = maxifzero(l, have_left || x > 1, &l[-hstep], 3);
+                if (Lv) {
+                    const int H = Lv >> 4;
+                    const int E = f->lf.lim_lut.e[Lv], I = f->lf.lim_lut.i[Lv];
+
+                    dsp->lf.loop_filter_uv[idx][0](&v[off], ls, E, I, H);
+                }
+            }
+        }
+    }
+}
+
+static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
+                                        const int have_top,
+                                        const uint8_t (*lvl)[4],
+                                        const ptrdiff_t b4_stride,
+                                        const uint32_t (*const mask)[2],
+                                        pixel *const u, pixel *const v,
+                                        const ptrdiff_t ls,
+                                        const int starty4, const int endy4)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+    int y;
+    ptrdiff_t off_l;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int hstep = 1 << ss_hor;
+
+    //                                 block1
+    // filter edges between rows (e.g. ------)
+    //                                 block2
+    lvl += ss_ver * b4_stride + ss_hor;
+    for (off_l = 0, y = starty4; y < endy4;
+         y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride << ss_ver)
+    {
+        ptrdiff_t off = off_l;
+        const uint8_t (*l)[4] = lvl;
+        const uint32_t *const vmask = mask[y];
+        const unsigned vm = vmask[0] | vmask[1];
+
+        for (unsigned x = 1; vm & ~(x - 1); x <<= 1, off += 4, l += hstep) {
+            if ((have_top || y) && (vm & x)) {
+                const int idx = !!(vmask[1] & x);
+
+                const int Lu = maxifzero(l, have_top || y,
+                                         &l[-(b4_stride << ss_ver)], 2);
+                if (Lu) {
+                    const int H = Lu >> 4;
+                    const int E = f->lf.lim_lut.e[Lu], I = f->lf.lim_lut.i[Lu];
+
+                    dsp->lf.loop_filter_uv[idx][1](&u[off], ls, E, I, H);
+                }
+
+                const int Lv = maxifzero(l, have_top || y,
+                                         &l[-(b4_stride << ss_ver)], 3);
+                if (Lv) {
+                    const int H = Lv >> 4;
+                    const int E = f->lf.lim_lut.e[Lv], I = f->lf.lim_lut.i[Lv];
+
+                    dsp->lf.loop_filter_uv[idx][1](&v[off], ls, E, I, H);
+                }
+            }
+        }
+    }
+}
+
+void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *const f,
+                                    pixel *const p[3], Av1Filter *const lflvl,
+                                    int sby, const int start_of_tile_row)
+{
+    int x, have_left;
+    const int have_top = sby > 0;
+    const int is_sb64 = !f->seq_hdr.sb128;
+    const int starty4 = (sby & is_sb64) << 4;
+    const int sbsz = 32 >> is_sb64;
+    const int sbl2 = 5 - is_sb64;
+    const int endy4 = starty4 + imin(f->bh - sby * f->sb_step, sbsz);
+    const int halign = (f->bh + 31) & ~31;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+
+    // fix lpf strength at tile col boundaries
+    const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
+    const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
+    for (int tile_col = 1;; tile_col++) {
+        x = f->frame_hdr.tiling.col_start_sb[tile_col];
+        if ((x << sbl2) >= f->bw) break;
+        const int mask = x & is_sb64 ? 1 << 16 : 1;
+        const int uv_mask = x & is_sb64 ? 1 << (16 >> ss_hor) : 1;
+        x >>= is_sb64;
+        for (int y = starty4; y < endy4; y++) {
+            const int idx = 2 * !!(lflvl[x].filter_y[0][y][2] & mask) +
+                                !!(lflvl[x].filter_y[0][y][1] & mask);
+            lflvl[x].filter_y[0][y][2] &= ~mask;
+            lflvl[x].filter_y[0][y][1] &= ~mask;
+            lflvl[x].filter_y[0][y][0] &= ~mask;
+            lflvl[x].filter_y[0][y][imin(idx, lpf_y[y - starty4])] |= mask;
+        }
+        for (int y = starty4 >> ss_ver; y < (endy4 >> ss_ver); y++) {
+            const int idx = !!(lflvl[x].filter_uv[0][y][1] & uv_mask);
+            lflvl[x].filter_uv[0][y][1] &= ~uv_mask;
+            lflvl[x].filter_uv[0][y][0] &= ~uv_mask;
+            lflvl[x].filter_uv[0][y][imin(idx, lpf_uv[y - (starty4 >> ss_ver)])] |= uv_mask;
+        }
+        lpf_y  += halign;
+        lpf_uv += halign >> ss_ver;
+    }
+
+    // fix lpf strength at tile row boundaries
+    if (start_of_tile_row) {
+        const BlockContext *a;
+        for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
+             x < f->sb128w; x++, a++)
+        {
+            uint32_t *const y_vmask = lflvl[x].filter_y[1][starty4];
+            const unsigned y_vm = y_vmask[0] | y_vmask[1] | y_vmask[2];
+
+            for (unsigned mask = 1, i = 0; i < 32; mask <<= 1, i++) {
+                if (!(y_vm & mask)) continue;
+                const int idx = 2 * !!(y_vmask[2] & mask) + !!(y_vmask[1] & mask);
+                y_vmask[2] &= ~mask;
+                y_vmask[1] &= ~mask;
+                y_vmask[0] &= ~mask;
+                y_vmask[imin(idx, a->tx_lpf_y[i])] |= mask;
+            }
+
+            uint32_t *const uv_vmask = lflvl[x].filter_uv[1][starty4 >> ss_ver];
+            const unsigned uv_vm = uv_vmask[0] | uv_vmask[1];
+            for (unsigned mask = 1, i = 0; i < (32 >> ss_hor); mask <<= 1, i++) {
+                if (!(uv_vm & mask)) continue;
+                const int idx = !!(uv_vmask[1] & mask);
+                uv_vmask[1] &= ~mask;
+                uv_vmask[0] &= ~mask;
+                uv_vmask[imin(idx, a->tx_lpf_uv[i])] |= mask;
+            }
+        }
+    }
+
+    pixel *ptr;
+    uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
+    for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
+         x++, have_left = 1, ptr += 128, level_ptr += 32)
+    {
+        filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
+                            lflvl[x].filter_y[0],
+                            ptr, f->cur.p.stride[0], starty4, endy4);
+    }
+
+    level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
+    for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
+        filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
+                            lflvl[x].filter_y[1],
+                            ptr, f->cur.p.stride[0], starty4, endy4);
+    }
+
+    if (!f->frame_hdr.loopfilter.level_u && !f->frame_hdr.loopfilter.level_v)
+        return;
+
+    ptrdiff_t uv_off;
+    level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
+    for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
+         x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32)
+    {
+        filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
+                             lflvl[x].filter_uv[0],
+                             &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
+                             starty4 >> ss_ver, endy4 >> ss_ver);
+    }
+
+    level_ptr = f->lf.level + f->b4_stride * sby * sbsz;
+    for (uv_off = 0, x = 0; x < f->sb128w;
+         x++, uv_off += 128 >> ss_hor, level_ptr += 32)
+    {
+        filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
+                             lflvl[x].filter_uv[1],
+                             &p[1][uv_off], &p[2][uv_off], f->cur.p.stride[1],
+                             starty4 >> ss_ver, endy4 >> ss_ver);
+    }
+}
--- /dev/null
+++ b/src/lf_apply.h
@@ -1,0 +1,42 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LF_APPLY_H__
+#define __DAV1D_SRC_LF_APPLY_H__
+
+#include <stdint.h>
+
+#include "common/bitdepth.h"
+
+#include "src/internal.h"
+#include "src/levels.h"
+
+void bytefn(dav1d_loopfilter_sbrow)(const Dav1dFrameContext *f,
+                                    pixel *const p[3], Av1Filter *lflvl,
+                                    int sby, int start_of_tile_row);
+
+#endif /* __DAV1D_SRC_LF_APPLY_H__ */
--- /dev/null
+++ b/src/lf_mask.c
@@ -1,0 +1,402 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/levels.h"
+#include "src/lf_mask.h"
+#include "src/tables.h"
+
+static void decomp_tx(uint8_t (*txa)[2 /* txsz, step */][32 /* y */][32 /* x */],
+                      const enum RectTxfmSize from,
+                      const int depth,
+                      const int y_off, const int x_off,
+                      const uint16_t *const tx_masks)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[from];
+    int is_split;
+
+    if (depth > 1) {
+        is_split = 0;
+    } else {
+        const int off = y_off * 4 + x_off;
+        is_split = (tx_masks[depth] >> off) & 1;
+    }
+
+    if (is_split) {
+        const enum RectTxfmSize sub = t_dim->sub;
+        const int htw4 = t_dim->w >> 1, hth4 = t_dim->h >> 1;
+
+        decomp_tx(txa, sub, depth + 1, y_off * 2 + 0, x_off * 2 + 0, tx_masks);
+        if (t_dim->w >= t_dim->h)
+            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][0][htw4],
+                      sub, depth + 1, y_off * 2 + 0, x_off * 2 + 1, tx_masks);
+        if (t_dim->h >= t_dim->w) {
+            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][0],
+                      sub, depth + 1, y_off * 2 + 1, x_off * 2 + 0, tx_masks);
+            if (t_dim->w >= t_dim->h)
+                decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][hth4][htw4],
+                          sub, depth + 1, y_off * 2 + 1, x_off * 2 + 1, tx_masks);
+        }
+    } else {
+        const int lw = imin(2, t_dim->lw), lh = imin(2, t_dim->lh);
+        int y;
+
+        for (y = 0; y < t_dim->h; y++) {
+            memset(txa[0][0][y], lw, t_dim->w);
+            memset(txa[1][0][y], lh, t_dim->w);
+            txa[0][1][y][0] = t_dim->w;
+        }
+        memset(txa[1][1][0], t_dim->h, t_dim->w);
+    }
+}
+
+static inline void mask_edges_inter(uint32_t (*masks)[32][3],
+                                    const int by4, const int bx4,
+                                    const int w4, const int h4, const int skip,
+                                    const enum RectTxfmSize max_tx,
+                                    const uint16_t *const tx_masks,
+                                    uint8_t *const a, uint8_t *const l)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[max_tx];
+    int y, x;
+
+    uint8_t txa[2 /* edge */][2 /* txsz, step */][32 /* y */][32 /* x */];
+    int y_off, x_off;
+    for (y_off = 0, y = 0; y < h4; y += t_dim->h, y_off++)
+        for (x_off = 0, x = 0; x < w4; x += t_dim->w, x_off++)
+            decomp_tx((uint8_t(*)[2][32][32]) &txa[0][0][y][x],
+                      max_tx, 0, y_off, x_off, tx_masks);
+
+    // left block edge
+    unsigned mask = 1U << bx4;
+    for (y = 0; y < h4; y++)
+        masks[0][by4 + y][imin(txa[0][0][y][0], l[y])] |= mask;
+
+    // top block edge
+    for (x = 0; x < w4; x++, mask <<= 1)
+        masks[1][by4][imin(txa[1][0][0][x], a[x])] |= mask;
+
+    if (!skip) {
+        // inner (tx) left|right edges
+        for (y = 0; y < h4; y++) {
+            int ltx = txa[0][0][y][0];
+            int step = txa[0][1][y][0];
+            for (x = step, mask = 1U << (bx4 + step);
+                 x < w4; x += step, mask <<= step)
+            {
+                const int rtx = txa[0][0][y][x];
+                masks[0][by4 + y][imin(rtx, ltx)] |= mask;
+                ltx = rtx;
+                step = txa[0][1][y][x];
+            }
+        }
+
+        //            top
+        // inner (tx) --- edges
+        //           bottom
+        for (x = 0, mask = 1U << bx4; x < w4; x++, mask <<= 1) {
+            int ttx = txa[1][0][0][x];
+            int step = txa[1][1][0][x];
+            for (y = step; y < h4; y += step) {
+                const int btx = txa[1][0][y][x];
+                masks[1][by4 + y][imin(ttx, btx)] |= mask;
+                ttx = btx;
+                step = txa[1][1][y][x];
+            }
+        }
+    }
+
+    for (y = 0; y < h4; y++)
+        l[y] = txa[0][0][y][w4 - 1];
+    memcpy(a, txa[1][0][h4 - 1], w4);
+}
+
+static inline void mask_edges_intra(uint32_t (*const masks)[32][3],
+                                    const int by4, const int bx4,
+                                    const int w4, const int h4,
+                                    const enum RectTxfmSize tx,
+                                    uint8_t *const a, uint8_t *const l)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
+    const int twl4c = imin(2, twl4), thl4c = imin(2, thl4);
+    int y, x;
+
+    // left block edge
+    unsigned mask = 1U << bx4;
+    for (y = 0; y < h4; y++)
+        masks[0][by4 + y][imin(twl4c, l[y])] |= mask;
+
+    // top block edge
+    for (mask = 1U << bx4, x = 0; x < w4; x++, mask <<= 1)
+        masks[1][by4][imin(thl4c, a[x])] |= mask;
+
+    static const uint32_t hstep[] = {
+        0xffffffff, 0x55555555, 0x11111111, 0x01010101, 0x00010001
+    };
+
+    // inner (tx) left|right edges
+    const unsigned t = 1U << bx4;
+    const unsigned inner = (((uint64_t) t) << w4) - t;
+    mask = (inner - t) & hstep[twl4];
+    for (y = 0; y < h4; y++)
+        masks[0][by4 + y][twl4c] |= mask;
+
+    //            top
+    // inner (tx) --- edges
+    //           bottom
+    const int vstep = t_dim->h;
+    for (y = vstep; y < h4; y += vstep)
+        masks[1][by4 + y][thl4c] |= inner;
+
+    memset(a, thl4c, w4);
+    memset(l, twl4c, h4);
+}
+
+static inline void mask_edges_chroma(uint32_t (*const masks)[32][2],
+                                     const int cby4, const int cbx4,
+                                     const int cw4, const int ch4,
+                                     const int skip_inter,
+                                     const enum RectTxfmSize tx,
+                                     uint8_t *const a, uint8_t *const l)
+{
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+    const int twl4 = t_dim->lw, thl4 = t_dim->lh;
+    const int twl4c = !!twl4, thl4c = !!thl4;
+    int y, x;
+
+    // left block edge
+    unsigned mask = 1U << cbx4;
+    for (y = 0; y < ch4; y++)
+        masks[0][cby4 + y][imin(twl4c, l[y])] |= mask;
+
+    // top block edge
+    for (mask = 1U << cbx4, x = 0; x < cw4; x++, mask <<= 1)
+        masks[1][cby4][imin(thl4c, a[x])] |= mask;
+
+    if (!skip_inter) {
+        static const uint32_t hstep[] = {
+            0xffffffff, 0x55555555, 0x11111111, 0x01010101
+        };
+
+        // inner (tx) left|right edges
+        const int t = 1U << cbx4;
+        const unsigned inner = (((uint64_t) t) << cw4) - t;
+        mask = (inner - t) & hstep[twl4];
+        for (y = 0; y < ch4; y++)
+            masks[0][cby4 + y][twl4c] |= mask;
+
+        //            top
+        // inner (tx) --- edges
+        //           bottom
+        const int vstep = t_dim->h;
+        for (y = vstep; y < ch4; y += vstep)
+            masks[1][cby4 + y][thl4c] |= inner;
+    }
+
+    memset(a, thl4c, cw4);
+    memset(l, twl4c, ch4);
+}
+
+void dav1d_create_lf_mask_intra(Av1Filter *const lflvl,
+                                uint8_t (*level_cache)[4],
+                                const ptrdiff_t b4_stride,
+                                const Av1FrameHeader *const hdr,
+                                const uint8_t (*filter_level)[8][2],
+                                const int bx, const int by,
+                                const int iw, const int ih,
+                                const enum BlockSize bs,
+                                const enum RectTxfmSize ytx,
+                                const enum RectTxfmSize uvtx,
+                                const enum Dav1dPixelLayout layout,
+                                uint8_t *const ay, uint8_t *const ly,
+                                uint8_t *const auv, uint8_t *const luv)
+{
+    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1])
+        return;
+
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = imin(iw - bx, b_dim[0]);
+    const int bh4 = imin(ih - by, b_dim[1]);
+    const int bx4 = bx & 31;
+    const int by4 = by & 31;
+
+    level_cache += by * b4_stride + bx;
+    for (int y = 0; y < bh4; y++) {
+        for (int x = 0; x < bw4; x++) {
+            level_cache[x][0] = filter_level[0][0][0];
+            level_cache[x][1] = filter_level[1][0][0];
+            level_cache[x][2] = filter_level[2][0][0];
+            level_cache[x][3] = filter_level[3][0][0];
+        }
+        level_cache += b4_stride;
+    }
+
+    mask_edges_intra(lflvl->filter_y, by4, bx4, bw4, bh4, ytx, ay, ly);
+
+    if (!auv) return;
+
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int cbw4 = (bw4 + ss_hor) >> ss_hor;
+    const int cbh4 = (bh4 + ss_ver) >> ss_ver;
+    const int cbx4 = bx4 >> ss_hor;
+    const int cby4 = by4 >> ss_ver;
+
+    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, 0, uvtx, auv, luv);
+}
+
+void dav1d_create_lf_mask_inter(Av1Filter *const lflvl,
+                                uint8_t (*level_cache)[4],
+                                const ptrdiff_t b4_stride,
+                                const Av1FrameHeader *const hdr,
+                                const uint8_t (*filter_level)[8][2],
+                                const int bx, const int by,
+                                const int iw, const int ih,
+                                const int skip, const enum BlockSize bs,
+                                const uint16_t *const tx_masks,
+                                const enum RectTxfmSize uvtx,
+                                const enum Dav1dPixelLayout layout,
+                                uint8_t *const ay, uint8_t *const ly,
+                                uint8_t *const auv, uint8_t *const luv)
+{
+    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1])
+        return;
+
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = imin(iw - bx, b_dim[0]);
+    const int bh4 = imin(ih - by, b_dim[1]);
+    const int bx4 = bx & 31;
+    const int by4 = by & 31;
+
+    level_cache += by * b4_stride + bx;
+    for (int y = 0; y < bh4; y++) {
+        for (int x = 0; x < bw4; x++) {
+            level_cache[x][0] = filter_level[0][0][0];
+            level_cache[x][1] = filter_level[1][0][0];
+            level_cache[x][2] = filter_level[2][0][0];
+            level_cache[x][3] = filter_level[3][0][0];
+        }
+        level_cache += b4_stride;
+    }
+
+    mask_edges_inter(lflvl->filter_y, by4, bx4, bw4, bh4, skip,
+                     av1_max_txfm_size_for_bs[bs][0], tx_masks, ay, ly);
+
+    if (!auv) return;
+
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int cbw4 = (bw4 + ss_hor) >> ss_hor;
+    const int cbh4 = (bh4 + ss_ver) >> ss_ver;
+    const int cbx4 = bx4 >> ss_hor;
+    const int cby4 = by4 >> ss_ver;
+
+    mask_edges_chroma(lflvl->filter_uv, cby4, cbx4, cbw4, cbh4, skip, uvtx, auv, luv);
+}
+
+void dav1d_calc_eih(Av1FilterLUT *const lim_lut, const int filter_sharpness) {
+    int level;
+
+    // set E/I/H values from loopfilter level
+    for (level = 0; level < 64; level++) {
+        const int sharp = filter_sharpness;
+        int limit = level;
+
+        if (sharp > 0) {
+            limit >>= (sharp + 3) >> 2;
+            limit = imin(limit, 9 - sharp);
+        }
+        limit = imax(limit, 1);
+
+        lim_lut->i[level] = limit;
+        lim_lut->e[level] = 2 * (level + 2) + limit;
+    }
+}
+
+static void dav1d_calc_lf_value(uint8_t (*const lflvl_values)[2],
+                                const int is_chroma, const int base_lvl,
+                                const int lf_delta, const int seg_delta,
+                                const Av1LoopfilterModeRefDeltas *const mr_delta)
+{
+    const int base = iclip(iclip(base_lvl + lf_delta, 0, 63) + seg_delta, 0, 63);
+
+    if (!base_lvl && is_chroma) {
+        memset(lflvl_values, 0, 8 * 2);
+    } else if (!mr_delta) {
+        memset(lflvl_values, base, 8 * 2);
+    } else {
+        const int sh = base >= 32;
+        lflvl_values[0][0] = lflvl_values[0][1] =
+            iclip(base + (mr_delta->ref_delta[0] * (1 << sh)), 0, 63);
+        for (int r = 1; r < 8; r++) {
+            for (int m = 0; m < 2; m++) {
+                const int delta =
+                    mr_delta->mode_delta[m] + mr_delta->ref_delta[r];
+                lflvl_values[r][m] = iclip(base + (delta * (1 << sh)), 0, 63);
+            }
+        }
+    }
+}
+
+void dav1d_calc_lf_values(uint8_t (*const lflvl_values)[4][8][2],
+                          const Av1FrameHeader *const hdr,
+                          const int8_t lf_delta[4])
+{
+    const int n_seg = hdr->segmentation.enabled ? 8 : 1;
+
+    if (!hdr->loopfilter.level_y[0] && !hdr->loopfilter.level_y[1]) {
+        memset(lflvl_values, 0, 8 * 4 * 2 * n_seg);
+        return;
+    }
+
+    const Av1LoopfilterModeRefDeltas *const mr_deltas =
+        hdr->loopfilter.mode_ref_delta_enabled ?
+        &hdr->loopfilter.mode_ref_deltas : NULL;
+    for (int s = 0; s < n_seg; s++) {
+        const Av1SegmentationData *const segd =
+            hdr->segmentation.enabled ? &hdr->segmentation.seg_data.d[s] : NULL;
+
+        dav1d_calc_lf_value(lflvl_values[s][0], 0, hdr->loopfilter.level_y[0],
+                            lf_delta[0], segd ? segd->delta_lf_y_v : 0, mr_deltas);
+        dav1d_calc_lf_value(lflvl_values[s][1], 0, hdr->loopfilter.level_y[1],
+                            lf_delta[hdr->delta_lf_multi ? 1 : 0],
+                            segd ? segd->delta_lf_y_h : 0, mr_deltas);
+        dav1d_calc_lf_value(lflvl_values[s][2], 1, hdr->loopfilter.level_u,
+                            lf_delta[hdr->delta_lf_multi ? 2 : 0],
+                            segd ? segd->delta_lf_u : 0, mr_deltas);
+        dav1d_calc_lf_value(lflvl_values[s][3], 1, hdr->loopfilter.level_v,
+                            lf_delta[hdr->delta_lf_multi ? 3 : 0],
+                            segd ? segd->delta_lf_v : 0, mr_deltas);
+    }
+}
--- /dev/null
+++ b/src/lf_mask.h
@@ -1,0 +1,80 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LF_MASK_H__
+#define __DAV1D_SRC_LF_MASK_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "src/levels.h"
+
+typedef struct Av1FilterLUT {
+    uint8_t e[64];
+    uint8_t i[64];
+} Av1FilterLUT;
+
+typedef struct Av1RestorationUnit {
+    enum RestorationType type;
+    int16_t filter_h[3];
+    int16_t filter_v[3];
+    uint8_t sgr_idx;
+    int16_t sgr_weights[2];
+} Av1RestorationUnit;
+
+// each struct describes one 128x128 area (1 or 4 SBs)
+typedef struct Av1Filter {
+    // each bit is 1 col
+    uint32_t filter_y[2 /* 0=col, 1=row */][32][3];
+    uint32_t filter_uv[2 /* 0=col, 1=row */][32][2];
+    int8_t cdef_idx[4]; // -1 means "unset"
+    uint32_t noskip_mask[32];
+    Av1RestorationUnit lr[3][4];
+} Av1Filter;
+
+void dav1d_create_lf_mask_intra(Av1Filter *lflvl, uint8_t (*level_cache)[4],
+                                const ptrdiff_t b4_stride,
+                                const Av1FrameHeader *hdr,
+                                const uint8_t (*level)[8][2], int bx, int by,
+                                int iw, int ih, enum BlockSize bs,
+                                enum RectTxfmSize ytx, enum RectTxfmSize uvtx,
+                                enum Dav1dPixelLayout layout, uint8_t *ay,
+                                uint8_t *ly, uint8_t *auv, uint8_t *luv);
+void dav1d_create_lf_mask_inter(Av1Filter *lflvl, uint8_t (*level_cache)[4],
+                                const ptrdiff_t b4_stride,
+                                const Av1FrameHeader *hdr,
+                                const uint8_t (*level)[8][2], int bx, int by,
+                                int iw, int ih, int skip_inter,
+                                enum BlockSize bs, const uint16_t *tx_mask,
+                                enum RectTxfmSize uvtx,
+                                enum Dav1dPixelLayout layout, uint8_t *ay,
+                                uint8_t *ly, uint8_t *auv, uint8_t *luv);
+void dav1d_calc_eih(Av1FilterLUT *lim_lut, int filter_sharpness);
+void dav1d_calc_lf_values(uint8_t (*values)[4][8][2], const Av1FrameHeader *hdr,
+                          const int8_t lf_delta[4]);
+
+#endif /* __DAV1D_SRC_LF_MASK_H__ */
--- /dev/null
+++ b/src/lib.c
@@ -1,0 +1,297 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <string.h>
+
+#include "dav1d/dav1d.h"
+#include "dav1d/data.h"
+
+#include "include/version.h"
+
+#include "common/mem.h"
+#include "common/validate.h"
+
+#include "src/internal.h"
+#include "src/obu.h"
+#include "src/qm.h"
+#include "src/ref.h"
+#include "src/thread_task.h"
+#include "src/wedge.h"
+
+void dav1d_init(void) {
+    av1_init_wedge_masks();
+    av1_init_interintra_masks();
+    av1_init_qm_tables();
+}
+
+const char *dav1d_version(void) {
+    return DAV1D_VERSION;
+}
+
+void dav1d_default_settings(Dav1dSettings *const s) {
+    s->n_frame_threads = 1;
+    s->n_tile_threads = 1;
+}
+
+int dav1d_open(Dav1dContext **const c_out,
+               const Dav1dSettings *const s)
+{
+    validate_input_or_ret(c_out != NULL, -EINVAL);
+    validate_input_or_ret(s != NULL, -EINVAL);
+    validate_input_or_ret(s->n_tile_threads >= 1 &&
+                          s->n_tile_threads <= 64, -EINVAL);
+    validate_input_or_ret(s->n_frame_threads >= 1 &&
+                          s->n_frame_threads <= 256, -EINVAL);
+
+    Dav1dContext *const c = *c_out = dav1d_alloc_aligned(sizeof(*c), 32);
+    if (!c) goto error;
+    memset(c, 0, sizeof(*c));
+
+    c->n_fc = s->n_frame_threads;
+    c->fc = dav1d_alloc_aligned(sizeof(*c->fc) * s->n_frame_threads, 32);
+    if (!c->fc) goto error;
+    memset(c->fc, 0, sizeof(*c->fc) * s->n_frame_threads);
+    if (c->n_fc > 1) {
+        c->frame_thread.out_delayed =
+            malloc(sizeof(*c->frame_thread.out_delayed) * c->n_fc);
+        memset(c->frame_thread.out_delayed, 0,
+               sizeof(*c->frame_thread.out_delayed) * c->n_fc);
+    }
+    for (int n = 0; n < s->n_frame_threads; n++) {
+        Dav1dFrameContext *const f = &c->fc[n];
+        f->c = c;
+        f->lf.last_sharpness = -1;
+        f->n_tc = s->n_tile_threads;
+        f->tc = dav1d_alloc_aligned(sizeof(*f->tc) * s->n_tile_threads, 32);
+        if (!f->tc) goto error;
+        memset(f->tc, 0, sizeof(*f->tc) * s->n_tile_threads);
+        if (f->n_tc > 1) {
+            pthread_mutex_init(&f->tile_thread.lock, NULL);
+            pthread_cond_init(&f->tile_thread.cond, NULL);
+            pthread_cond_init(&f->tile_thread.icond, NULL);
+        }
+        for (int m = 0; m < s->n_tile_threads; m++) {
+            Dav1dTileContext *const t = &f->tc[m];
+            t->f = f;
+            t->cf = dav1d_alloc_aligned(32 * 32 * sizeof(int32_t), 32);
+            if (!t->cf) goto error;
+            t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 8, 32);
+            if (!t->scratch.mem) goto error;
+            memset(t->cf, 0, 32 * 32 * sizeof(int32_t));
+            t->emu_edge =
+                dav1d_alloc_aligned(160 * (128 + 7) * sizeof(uint16_t), 32);
+            if (!t->emu_edge) goto error;
+            if (f->n_tc > 1) {
+                pthread_mutex_init(&t->tile_thread.td.lock, NULL);
+                pthread_cond_init(&t->tile_thread.td.cond, NULL);
+                t->tile_thread.fttd = &f->tile_thread;
+                pthread_create(&t->tile_thread.td.thread, NULL, dav1d_tile_task, t);
+            }
+        }
+        f->libaom_cm = av1_alloc_ref_mv_common();
+        if (c->n_fc > 1) {
+            pthread_mutex_init(&f->frame_thread.td.lock, NULL);
+            pthread_cond_init(&f->frame_thread.td.cond, NULL);
+            pthread_create(&f->frame_thread.td.thread, NULL, dav1d_frame_task, f);
+        }
+    }
+
+    // intra edge tree
+    c->intra_edge.root[BL_128X128] = &c->intra_edge.branch_sb128[0].node;
+    init_mode_tree(c->intra_edge.root[BL_128X128], c->intra_edge.tip_sb128, 1);
+    c->intra_edge.root[BL_64X64] = &c->intra_edge.branch_sb64[0].node;
+    init_mode_tree(c->intra_edge.root[BL_64X64], c->intra_edge.tip_sb64, 0);
+
+    return 0;
+
+error:
+    if (c) {
+        if (c->fc) {
+            for (int n = 0; n < c->n_fc; n++)
+                if (c->fc[n].tc)
+                    free(c->fc[n].tc);
+            free(c->fc);
+        }
+        free(c);
+    }
+    fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+    return -ENOMEM;
+}
+
+int dav1d_decode(Dav1dContext *const c,
+                 Dav1dData *const in, Dav1dPicture *const out)
+{
+    int res;
+
+    validate_input_or_ret(c != NULL, -EINVAL);
+    validate_input_or_ret(out != NULL, -EINVAL);
+
+    while (!in) {
+        if (c->n_fc == 1) return -EAGAIN;
+
+        // flush
+        const unsigned next = c->frame_thread.next;
+        Dav1dFrameContext *const f = &c->fc[next];
+
+        pthread_mutex_lock(&f->frame_thread.td.lock);
+        while (f->n_tile_data > 0)
+            pthread_cond_wait(&f->frame_thread.td.cond,
+                              &f->frame_thread.td.lock);
+        pthread_mutex_unlock(&f->frame_thread.td.lock);
+        Dav1dThreadPicture *const out_delayed =
+            &c->frame_thread.out_delayed[next];
+        if (out_delayed->p.data[0]) {
+            if (++c->frame_thread.next == c->n_fc)
+                c->frame_thread.next = 0;
+            if (out_delayed->visible) {
+                dav1d_picture_ref(out, &out_delayed->p);
+            }
+            dav1d_thread_picture_unref(out_delayed);
+            if (out->data[0]) {
+                return 0;
+            }
+            // else continue
+        } else {
+            return -EAGAIN;
+        }
+    }
+
+    while (in->sz > 0) {
+        if ((res = parse_obus(c, in)) < 0)
+            return res;
+
+        assert(res <= in->sz);
+        in->sz -= res;
+        in->data += res;
+        if (c->out.data[0]) {
+            if (!in->sz) dav1d_data_unref(in);
+            dav1d_picture_ref(out, &c->out);
+            dav1d_picture_unref(&c->out);
+            return 0;
+        }
+    }
+
+    if (c->out.data[0]) {
+        dav1d_picture_ref(out, &c->out);
+        dav1d_picture_unref(&c->out);
+        return 0;
+    }
+
+    return -EAGAIN;
+}
+
+void dav1d_close(Dav1dContext *const c) {
+    validate_input(c != NULL);
+
+    for (int n = 0; n < c->n_fc; n++) {
+        Dav1dFrameContext *const f = &c->fc[n];
+
+        // clean-up threading stuff
+        if (c->n_fc > 1) {
+            pthread_mutex_lock(&f->frame_thread.td.lock);
+            f->frame_thread.die = 1;
+            pthread_cond_signal(&f->frame_thread.td.cond);
+            pthread_mutex_unlock(&f->frame_thread.td.lock);
+            pthread_join(f->frame_thread.td.thread, NULL);
+            freep(&f->frame_thread.b);
+            dav1d_free_aligned(&f->frame_thread.pal_idx);
+            dav1d_free_aligned(&f->frame_thread.cf);
+            freep(&f->frame_thread.tile_start_off);
+            freep(&f->frame_thread.pal);
+            freep(&f->frame_thread.cbi);
+            pthread_mutex_destroy(&f->frame_thread.td.lock);
+            pthread_cond_destroy(&f->frame_thread.td.cond);
+        }
+        if (f->n_tc > 1) {
+            pthread_mutex_lock(&f->tile_thread.lock);
+            for (int m = 0; m < f->n_tc; m++) {
+                Dav1dTileContext *const t = &f->tc[m];
+                t->tile_thread.die = 1;
+            }
+            pthread_cond_broadcast(&f->tile_thread.cond);
+            while (f->tile_thread.available != (1 << f->n_tc) - 1)
+                pthread_cond_wait(&f->tile_thread.icond,
+                                  &f->tile_thread.lock);
+            pthread_mutex_unlock(&f->tile_thread.lock);
+            for (int m = 0; m < f->n_tc; m++) {
+                Dav1dTileContext *const t = &f->tc[m];
+                if (f->n_tc > 1) {
+                    pthread_join(t->tile_thread.td.thread, NULL);
+                    pthread_mutex_destroy(&t->tile_thread.td.lock);
+                    pthread_cond_destroy(&t->tile_thread.td.cond);
+                }
+            }
+            pthread_mutex_destroy(&f->tile_thread.lock);
+            pthread_cond_destroy(&f->tile_thread.cond);
+            pthread_cond_destroy(&f->tile_thread.icond);
+        }
+        for (int m = 0; m < f->n_tc; m++) {
+            Dav1dTileContext *const t = &f->tc[m];
+            dav1d_free_aligned(t->cf);
+            dav1d_free_aligned(t->scratch.mem);
+            dav1d_free_aligned(t->emu_edge);
+        }
+        for (int m = 0; m < f->n_ts; m++) {
+            Dav1dTileState *const ts = &f->ts[m];
+            pthread_cond_destroy(&ts->tile_thread.cond);
+            pthread_mutex_destroy(&ts->tile_thread.lock);
+        }
+        free(f->ts);
+        dav1d_free_aligned(f->tc);
+        dav1d_free_aligned(f->ipred_edge[0]);
+        free(f->a);
+        free(f->lf.mask);
+        free(f->lf.level);
+        free(f->lf.tx_lpf_right_edge[0]);
+        av1_free_ref_mv_common(f->libaom_cm);
+        dav1d_free_aligned(f->lf.cdef_line);
+        dav1d_free_aligned(f->lf.lr_lpf_line);
+    }
+    dav1d_free_aligned(c->fc);
+    if (c->n_fc > 1) {
+        for (int n = 0; n < c->n_fc; n++)
+            if (c->frame_thread.out_delayed[n].p.data[0])
+                dav1d_thread_picture_unref(&c->frame_thread.out_delayed[n]);
+        free(c->frame_thread.out_delayed);
+    }
+    for (int n = 0; n < c->n_tile_data; n++)
+        dav1d_data_unref(&c->tile[n].data);
+    for (int n = 0; n < 8; n++) {
+        if (c->cdf[n].cdf)
+            cdf_thread_unref(&c->cdf[n]);
+        if (c->refs[n].p.p.data[0])
+            dav1d_thread_picture_unref(&c->refs[n].p);
+        if (c->refs[n].refmvs)
+            dav1d_ref_dec(c->refs[n].refmvs);
+        if (c->refs[n].segmap)
+            dav1d_ref_dec(c->refs[n].segmap);
+    }
+    dav1d_free_aligned(c);
+}
--- /dev/null
+++ b/src/loopfilter.c
@@ -1,0 +1,195 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/loopfilter.h"
+
+static __attribute__((noinline)) void
+loop_filter(pixel *dst, int E, int I, int H,
+            const ptrdiff_t stridea, const ptrdiff_t strideb, const int wd)
+{
+    int i, F = 1 << (BITDEPTH - 8);
+
+    E <<= BITDEPTH - 8;
+    I <<= BITDEPTH - 8;
+    H <<= BITDEPTH - 8;
+
+    for (i = 0; i < 4; i++, dst += stridea) {
+        int p6, p5, p4, p3, p2;
+        int p1 = dst[strideb * -2], p0 = dst[strideb * -1];
+        int q0 = dst[strideb * +0], q1 = dst[strideb * +1];
+        int q2, q3, q4, q5, q6;
+        int fm, flat8out, flat8in;
+
+        fm = abs(p1 - p0) <= I && abs(q1 - q0) <= I &&
+             abs(p0 - q0) * 2 + (abs(p1 - q1) >> 1) <= E;
+
+        if (wd > 4) {
+            p2 = dst[strideb * -3];
+            q2 = dst[strideb * +2];
+
+            fm &= abs(p2 - p1) <= I && abs(q2 - q1) <= I;
+
+            if (wd > 6) {
+                p3 = dst[strideb * -4];
+                q3 = dst[strideb * +3];
+
+                fm &= abs(p3 - p2) <= I && abs(q3 - q2) <= I;
+            }
+        }
+
+        if (!fm) continue;
+
+        if (wd >= 16) {
+            p6 = dst[strideb * -7];
+            p5 = dst[strideb * -6];
+            p4 = dst[strideb * -5];
+            q4 = dst[strideb * +4];
+            q5 = dst[strideb * +5];
+            q6 = dst[strideb * +6];
+
+            flat8out = abs(p6 - p0) <= F && abs(p5 - p0) <= F &&
+                       abs(p4 - p0) <= F && abs(q4 - q0) <= F &&
+                       abs(q5 - q0) <= F && abs(q6 - q0) <= F;
+        }
+
+        if (wd >= 6)
+            flat8in = abs(p2 - p0) <= F && abs(p1 - p0) <= F &&
+                      abs(q1 - q0) <= F && abs(q2 - q0) <= F;
+
+        if (wd >= 8)
+            flat8in &= abs(p3 - p0) <= F && abs(q3 - q0) <= F;
+
+        if (wd >= 16 && (flat8out & flat8in)) {
+            dst[strideb * -6] = (p6 + p6 + p6 + p6 + p6 + p6 * 2 + p5 * 2 +
+                                 p4 * 2 + p3 + p2 + p1 + p0 + q0 + 8) >> 4;
+            dst[strideb * -5] = (p6 + p6 + p6 + p6 + p6 + p5 * 2 + p4 * 2 +
+                                 p3 * 2 + p2 + p1 + p0 + q0 + q1 + 8) >> 4;
+            dst[strideb * -4] = (p6 + p6 + p6 + p6 + p5 + p4 * 2 + p3 * 2 +
+                                 p2 * 2 + p1 + p0 + q0 + q1 + q2 + 8) >> 4;
+            dst[strideb * -3] = (p6 + p6 + p6 + p5 + p4 + p3 * 2 + p2 * 2 +
+                                 p1 * 2 + p0 + q0 + q1 + q2 + q3 + 8) >> 4;
+            dst[strideb * -2] = (p6 + p6 + p5 + p4 + p3 + p2 * 2 + p1 * 2 +
+                                 p0 * 2 + q0 + q1 + q2 + q3 + q4 + 8) >> 4;
+            dst[strideb * -1] = (p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
+                                 q0 * 2 + q1 + q2 + q3 + q4 + q5 + 8) >> 4;
+            dst[strideb * +0] = (p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
+                                 q1 * 2 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
+            dst[strideb * +1] = (p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
+                                 q2 * 2 + q3 + q4 + q5 + q6 + q6 + 8) >> 4;
+            dst[strideb * +2] = (p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 +
+                                 q3 * 2 + q4 + q5 + q6 + q6 + q6 + 8) >> 4;
+            dst[strideb * +3] = (p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 +
+                                 q4 * 2 + q5 + q6 + q6 + q6 + q6 + 8) >> 4;
+            dst[strideb * +4] = (p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 +
+                                 q5 * 2 + q6 + q6 + q6 + q6 + q6 + 8) >> 4;
+            dst[strideb * +5] = (p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 +
+                                 q6 * 2 + q6 + q6 + q6 + q6 + q6 + 8) >> 4;
+        } else if (wd >= 8 && flat8in) {
+            dst[strideb * -3] = (p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3;
+            dst[strideb * -2] = (p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3;
+            dst[strideb * -1] = (p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3;
+            dst[strideb * +0] = (p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3;
+            dst[strideb * +1] = (p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3 + 4) >> 3;
+            dst[strideb * +2] = (p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3 + 4) >> 3;
+        } else if (wd == 6 && flat8in) {
+            dst[strideb * -2] = (p2 + 2 * p2 + 2 * p1 + 2 * p0 + q0 + 4) >> 3;
+            dst[strideb * -1] = (p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3;
+            dst[strideb * +0] = (p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3;
+            dst[strideb * +1] = (p0 + 2 * q0 + 2 * q1 + 2 * q2 + q2 + 4) >> 3;
+        } else {
+            int hev = abs(p1 - p0) > H || abs(q1 - q0) > H;
+
+#define iclip_diff(v) iclip(v, -128 * (1 << (BITDEPTH - 8)), \
+                                127 * (1 << (BITDEPTH - 8)))
+
+            if (hev) {
+                int f = iclip_diff(p1 - q1), f1, f2;
+                f = iclip_diff(3 * (q0 - p0) + f);
+
+                f1 = imin(f + 4, 127) >> 3;
+                f2 = imin(f + 3, 127) >> 3;
+
+                dst[strideb * -1] = iclip_pixel(p0 + f2);
+                dst[strideb * +0] = iclip_pixel(q0 - f1);
+            } else {
+                int f = iclip_diff(3 * (q0 - p0)), f1, f2;
+
+                f1 = imin(f + 4, 127) >> 3;
+                f2 = imin(f + 3, 127) >> 3;
+
+                dst[strideb * -1] = iclip_pixel(p0 + f2);
+                dst[strideb * +0] = iclip_pixel(q0 - f1);
+
+                f = (f1 + 1) >> 1;
+                dst[strideb * -2] = iclip_pixel(p1 + f);
+                dst[strideb * +1] = iclip_pixel(q1 - f);
+            }
+#undef iclip_diff
+        }
+    }
+}
+
+#define lf_4_fn(dir, wd, stridea, strideb) \
+static void loop_filter_##dir##_##wd##wd_4px_c(pixel *const dst, \
+                                               const ptrdiff_t stride, \
+                                               const int E, const int I, \
+                                               const int H) \
+{ \
+    loop_filter(dst, E, I, H, stridea, strideb, wd); \
+}
+
+#define lf_4_fns(wd) \
+lf_4_fn(h, wd, PXSTRIDE(stride), 1) \
+lf_4_fn(v, wd, 1, PXSTRIDE(stride))
+
+lf_4_fns(4)
+lf_4_fns(6)
+lf_4_fns(8)
+lf_4_fns(16)
+
+#undef lf_4_fn
+#undef lf_4_fns
+
+void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
+    c->loop_filter[0][0] = loop_filter_h_4wd_4px_c;
+    c->loop_filter[0][1] = loop_filter_v_4wd_4px_c;
+    c->loop_filter[1][0] = loop_filter_h_8wd_4px_c;
+    c->loop_filter[1][1] = loop_filter_v_8wd_4px_c;
+    c->loop_filter[2][0] = loop_filter_h_16wd_4px_c;
+    c->loop_filter[2][1] = loop_filter_v_16wd_4px_c;
+
+    c->loop_filter_uv[0][0] = loop_filter_h_4wd_4px_c;
+    c->loop_filter_uv[0][1] = loop_filter_v_4wd_4px_c;
+    c->loop_filter_uv[1][0] = loop_filter_h_6wd_4px_c;
+    c->loop_filter_uv[1][1] = loop_filter_v_6wd_4px_c;
+}
--- /dev/null
+++ b/src/loopfilter.h
@@ -1,0 +1,56 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LOOPFILTER_H__
+#define __DAV1D_SRC_LOOPFILTER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "common/bitdepth.h"
+
+#include "src/levels.h"
+
+#define decl_loopfilter_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, int mb_lim, int lim, int hev_thr);
+typedef decl_loopfilter_fn(*loopfilter_fn);
+
+typedef struct Dav1dLoopFilterDSPContext {
+    /*
+     * dimension 1: filter taps (0=4, 1=8, 2=16 for luma; 0=4, 1=6 for chroma)
+     * dimension 2: 0=col-edge filter (h), 1=row-edge filter (v)
+     *
+     * dst/stride are aligned by 4
+     */
+    loopfilter_fn loop_filter[3][2];
+    loopfilter_fn loop_filter_uv[2][2];
+} Dav1dLoopFilterDSPContext;
+
+void dav1d_loop_filter_dsp_init_8bpc(Dav1dLoopFilterDSPContext *c);
+void dav1d_loop_filter_dsp_init_10bpc(Dav1dLoopFilterDSPContext *c);
+
+#endif /* __DAV1D_SRC_LOOPFILTER_H__ */
--- /dev/null
+++ b/src/looprestoration.c
@@ -1,0 +1,577 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/looprestoration.h"
+#include "src/tables.h"
+
+
+// TODO Reuse p when no padding is needed (add and remove lpf pixels in p)
+// TODO Chroma only requires 2 rows of padding.
+static void padding(pixel *dst, const ptrdiff_t dst_stride,
+                    const pixel *p, const ptrdiff_t p_stride,
+                    const pixel *lpf, const ptrdiff_t lpf_stride,
+                    int unit_w, const int stripe_h, const enum LrEdgeFlags edges)
+{
+    const int have_left = !!(edges & LR_HAVE_LEFT);
+    const int have_right = !!(edges & LR_HAVE_RIGHT);
+
+    // Copy more pixels if we don't have to pad them
+    unit_w += 3 * have_left + 3 * have_right;
+    pixel *dst_l = dst + 3 * !have_left;
+    p -= 3 * have_left;
+    lpf -= 3 * have_left;
+
+    if (edges & LR_HAVE_TOP) {
+        // Copy previous loop filtered rows
+        const pixel *const above_1 = lpf;
+        const pixel *const above_2 = above_1 + PXSTRIDE(lpf_stride);
+        pixel_copy(dst_l, above_1, unit_w);
+        pixel_copy(dst_l + PXSTRIDE(dst_stride), above_1, unit_w);
+        pixel_copy(dst_l + 2 * PXSTRIDE(dst_stride), above_2, unit_w);
+    } else {
+        // Pad with first row
+        pixel_copy(dst_l, p, unit_w);
+        pixel_copy(dst_l + PXSTRIDE(dst_stride), p, unit_w);
+        pixel_copy(dst_l + 2 * PXSTRIDE(dst_stride), p, unit_w);
+    }
+
+    pixel *dst_tl = dst_l + 3 * PXSTRIDE(dst_stride);
+    if (edges & LR_HAVE_BOTTOM) {
+        // Copy next loop filtered rows
+        const pixel *const below_1 = lpf + 6 * PXSTRIDE(lpf_stride);
+        const pixel *const below_2 = below_1 + PXSTRIDE(lpf_stride);
+        pixel_copy(dst_tl + stripe_h * PXSTRIDE(dst_stride), below_1, unit_w);
+        pixel_copy(dst_tl + (stripe_h + 1) * PXSTRIDE(dst_stride), below_2, unit_w);
+        pixel_copy(dst_tl + (stripe_h + 2) * PXSTRIDE(dst_stride), below_2, unit_w);
+    } else {
+        // Pad with last row
+        const pixel *const src = p + (stripe_h - 1) * PXSTRIDE(p_stride);
+        pixel_copy(dst_tl + stripe_h * PXSTRIDE(dst_stride), src, unit_w);
+        pixel_copy(dst_tl + (stripe_h + 1) * PXSTRIDE(dst_stride), src, unit_w);
+        pixel_copy(dst_tl + (stripe_h + 2) * PXSTRIDE(dst_stride), src, unit_w);
+    }
+
+    // Inner UNIT_WxSTRIPE_H
+    for (int j = 0; j < stripe_h; j++) {
+        pixel_copy(dst_tl, p, unit_w);
+        dst_tl += PXSTRIDE(dst_stride);
+        p += PXSTRIDE(p_stride);
+    }
+
+    if (!have_right) {
+        pixel *pad = dst_l + unit_w;
+        pixel *row_last = &dst_l[unit_w - 1];
+        // Pad 3x(STRIPE_H+6) with last column
+        for (int j = 0; j < stripe_h + 6; j++) {
+            pixel_set(pad, *row_last, 3);
+            pad += PXSTRIDE(dst_stride);
+            row_last += PXSTRIDE(dst_stride);
+        }
+    }
+
+    if (!have_left) {
+        // Pad 3x(STRIPE_H+6) with first column
+        for (int j = 0; j < stripe_h + 6; j++) {
+            pixel_set(dst, *dst_l, 3);
+            dst += PXSTRIDE(dst_stride);
+            dst_l += PXSTRIDE(dst_stride);
+        }
+    }
+}
+
+// FIXME Could split into luma and chroma specific functions,
+// (since first and last tops are always 0 for chroma)
+// FIXME Could implement a version that requires less temporary memory
+// (should be possible to implement with only 6 rows of temp storage)
+static void wiener_c(pixel *p, const ptrdiff_t p_stride,
+                     const pixel *lpf, const ptrdiff_t lpf_stride,
+                     const int w, const int h,
+                     const int16_t filterh[7], const int16_t filterv[7],
+                     const enum LrEdgeFlags edges)
+{
+    // padding is 3 pixels above and 3 pixels below
+    const ptrdiff_t tmp_stride = sizeof(pixel) * (w + 6);
+    pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
+    pixel *tmp_ptr = tmp;
+
+    padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
+
+    // Values stored between horizontal and vertical filtering don't
+    // fit in a uint8_t.
+    uint16_t hor[(h + 6 /*padding*/) * w];
+    uint16_t *hor_ptr = hor;
+
+    const int round_bits_h = 3 + (BITDEPTH == 12) * 2;
+    const int rounding_off_h = 1 << (round_bits_h - 1);
+    const int clip_limit = 1 << ((BITDEPTH) + 1 + 7 - round_bits_h);
+    for (int j = 0; j < h + 6; j++) {
+        for (int i = 0; i < w; i++) {
+            int sum = (tmp_ptr[i + 3] << 7) + (1 << (BITDEPTH + 6));
+
+            for (int k = 0; k < 7; k++) {
+                sum += tmp_ptr[i + k] * filterh[k];
+            }
+
+            hor_ptr[i] =
+                iclip((sum + rounding_off_h) >> round_bits_h, 0, clip_limit);
+        }
+        tmp_ptr += PXSTRIDE(tmp_stride);
+        hor_ptr += w;
+    }
+
+    const int round_bits_v = 11 - (BITDEPTH == 12) * 2;
+    const int rounding_off_v = 1 << (round_bits_v - 1);
+    const int round_offset = 1 << (BITDEPTH + (round_bits_v - 1));
+    for (int i = 0; i < w; i++) {
+        for (int j = 0; j < h; j++) {
+            int sum = (hor[w * (j + 3) + i] << 7) - round_offset;
+
+            for (int k = 0; k < 7; k++) {
+                sum += hor[(j + k) * w + i] * filterv[k];
+            }
+
+            p[j * PXSTRIDE(p_stride) + i] =
+                iclip_pixel((sum + rounding_off_v) >> round_bits_v);
+        }
+    }
+}
+
+// Sum over a 3x3 area
+// The dst and src pointers are positioned 3 pixels above and 3 pixels to the
+// left of the top left corner. However, the self guided filter only needs 1
+// pixel above and one pixel to the left. As for the pixels below and to the
+// right they must be computed in the sums, but don't need to be stored.
+//
+// Example for a 4x4 block:
+//      x x x x x x x x x x
+//      x c c c c c c c c x
+//      x i s s s s s s i x
+//      x i s s s s s s i x
+//      x i s s s s s s i x
+//      x i s s s s s s i x
+//      x i s s s s s s i x
+//      x i s s s s s s i x
+//      x c c c c c c c c x
+//      x x x x x x x x x x
+//
+// s: Pixel summed and stored
+// i: Pixel summed and stored (between loops)
+// c: Pixel summed not stored
+// x: Pixel not summed not stored
+static void boxsum3(coef *dst, const ptrdiff_t dst_stride,
+                    const pixel *src, ptrdiff_t src_stride,
+                    const int w, const int h)
+{
+    src_stride = PXSTRIDE(src_stride);
+    // We skip the first row, as it is never used
+    src += src_stride;
+    dst += dst_stride;
+
+    // We skip the first and last columns, as they are never used
+    for (int x = 1; x < w - 1; x++) {
+        coef *ds = dst + x;
+        const pixel *s = src + x;
+        int a = s[0], b = s[src_stride];
+
+        // We skip the first 2 rows, as they are skipped in the next loop and
+        // we don't need the last 2 row as it is skipped in the next loop
+        for (int y = 2; y < h - 2; y++) {
+            s += src_stride;
+            const int c = s[src_stride];
+            ds += dst_stride;
+            *ds = a + b + c;
+            a = b;
+            b = c;
+        }
+     }
+
+    // We skip the first 2 rows as they are never read
+    dst += dst_stride;
+    // We skip the last 2 rows as it is never read
+    for (int y = 2; y < h - 2; y++) {
+        int a = dst[1], b = dst[2];
+
+        // We don't store the first column as it is never read and
+        // we don't store the last 2 columns as they are never read
+        for (int x = 2; x < w - 2; x++) {
+            const int c = dst[x + 1];
+            dst[x] = a + b + c;
+            a = b;
+            b = c;
+        }
+        dst += dst_stride;
+    }
+}
+
+// Sum over a 5x5 area
+// The dst and src pointers are positioned 3 pixels above and 3 pixels to the
+// left of the top left corner. However, the self guided filter only needs 1
+// pixel above and one pixel to the left. As for the pixels below and to the
+// right they must be computed in the sums, but don't need to be stored.
+//
+// Example for a 4x4 block:
+//      c c c c c c c c c c
+//      c c c c c c c c c c
+//      i i s s s s s s i i
+//      i i s s s s s s i i
+//      i i s s s s s s i i
+//      i i s s s s s s i i
+//      i i s s s s s s i i
+//      i i s s s s s s i i
+//      c c c c c c c c c c
+//      c c c c c c c c c c
+//
+// s: Pixel summed and stored
+// i: Pixel summed and stored (between loops)
+// c: Pixel summed not stored
+// x: Pixel not summed not stored
+static void boxsum5(coef *dst, const ptrdiff_t dst_stride,
+                    const pixel *const src, ptrdiff_t src_stride,
+                    const int w, const int h)
+{
+    src_stride = PXSTRIDE(src_stride);
+
+    // We skip the first row, as it is never used
+    dst += dst_stride;
+
+    for (int x = 0; x < w; x++) {
+        coef *ds = dst + x;
+        const pixel *s = src + 3 * src_stride + x;
+        int a = s[-3 * src_stride];
+        int b = s[-2 * src_stride];
+        int c = s[-1 * src_stride];
+        int d = s[0];
+
+        // We skip the first 2 rows, as they are skipped in the next loop and
+        // we don't need the last 2 row as it is skipped in the next loop
+        for (int y = 2; y < h - 2; y++) {
+            s += src_stride;
+            const int e = *s;
+            ds += dst_stride;
+            *ds = a + b + c + d + e;
+            a = b;
+            b = c;
+            c = d;
+            d = e;
+        }
+    }
+
+    // We skip the first 2 rows as they are never read
+    dst += dst_stride;
+    for (int y = 2; y < h - 2; y++) {
+        int a = dst[0];
+        int b = dst[1];
+        int c = dst[2];
+        int d = dst[3];
+
+        for (int x = 2; x < w - 2; x++) {
+            const int e = dst[x + 2];
+            dst[x] = a + b + c + d + e;
+            a = b;
+            b = c;
+            c = d;
+            d = e;
+        }
+        dst += dst_stride;
+    }
+}
+
+// See boxsum3 function comments for details on row and column skipping
+static void boxsum3sqr(int32_t *dst, const ptrdiff_t dst_stride,
+                       const pixel *src, ptrdiff_t src_stride,
+                       const int w, const int h)
+{
+    src_stride = PXSTRIDE(src_stride);
+
+    // We skip the first row, as it is never used
+    src += src_stride;
+    dst += dst_stride;
+
+    // We skip the first and last columns, as they are never used
+    for (int x = 1; x < w - 1; x++) {
+        int *ds = dst + x;
+        const pixel *s = src + x;
+        int a = s[0] * s[0];
+        int b = s[src_stride] * s[src_stride];
+
+        // We skip the first row, as it is skipped in the next loop and
+        // we don't need the last row as it is skipped in the next loop
+        for (int y = 2; y < h - 2; y++) {
+            s += src_stride;
+            const int c = s[src_stride] * s[src_stride];
+            ds += dst_stride;
+            *ds = a + b + c;
+            a = b;
+            b = c;
+        }
+     }
+
+    // We skip the first row as it is never read
+    dst += dst_stride;
+    // We skip the last row as it is never read
+    for (int y = 2; y < h - 2; y++) {
+        int a = dst[1], b = dst[2];
+
+        // We don't store the first column as it is never read and
+        // we don't store the last 2 columns as they are never read
+        for (int x = 2; x < w - 2; x++) {
+            const int c = dst[x + 1];
+            dst[x] = a + b + c;
+            a = b;
+            b = c;
+        }
+        dst += dst_stride;
+    }
+}
+
+// See boxsum5 function comments for details on row and column skipping
+static void boxsum5sqr(int32_t *dst, const ptrdiff_t dst_stride,
+                       const pixel *const src, ptrdiff_t src_stride,
+                       const int w, const int h)
+{
+    src_stride = PXSTRIDE(src_stride);
+
+    // We skip the first row, as it is never used
+    dst += dst_stride;
+
+    for (int x = 0; x < w; x++) {
+        int *ds = dst + x;
+        const pixel *s = src + 3 * src_stride + x;
+        int a = s[-3 * src_stride] * s[-3 * src_stride];
+        int b = s[-2 * src_stride] * s[-2 * src_stride];
+        int c = s[-1 * src_stride] * s[-1 * src_stride];
+        int d = s[0] * s[0];
+
+        // We skip the first 2 rows, as they are skipped in the next loop and
+        // we don't need the last 2 row as it is skipped in the next loop
+        for (int y = 2; y < h - 2; y++) {
+            s += src_stride;
+            const int e = s[0] * s[0];
+            ds += dst_stride;
+            *ds = a + b + c + d + e;
+            a = b;
+            b = c;
+            c = d;
+            d = e;
+        }
+    }
+
+    // We skip the first 2 rows as they are never read
+    dst += dst_stride;
+    for (int y = 2; y < h - 2; y++) {
+        int a = dst[0];
+        int b = dst[1];
+        int c = dst[2];
+        int d = dst[3];
+
+        for (int x = 2; x < w - 2; x++) {
+            const int e = dst[x + 2];
+            dst[x] = a + b + c + d + e;
+            a = b;
+            b = c;
+            c = d;
+            d = e;
+        }
+        dst += dst_stride;
+    }
+}
+
+static void selfguided_filter(int32_t *dst, const ptrdiff_t dst_stride,
+                              const pixel *src, const ptrdiff_t src_stride,
+                              const int w, const int h, const int n, const int s)
+{
+    const int tmp_stride = w + 6;
+    // FIXME Replace array with scratch memory
+    int32_t A_[(h + 6) * tmp_stride];
+    int32_t *A = A_ + 3 * tmp_stride + 3;
+    // By inverting A and B after the boxsums, B can be of size coef instead
+    // of int32_t
+    coef B_[(h + 6) * tmp_stride];
+    coef *B = B_ + 3 * tmp_stride + 3;
+
+    const int step = (n == 25) + 1;
+    if (n == 25) {
+        boxsum5(B_, tmp_stride, src, src_stride, w + 6, h + 6);
+        boxsum5sqr(A_, tmp_stride, src, src_stride, w + 6, h + 6);
+    } else {
+        boxsum3(B_, tmp_stride, src, src_stride, w + 6, h + 6);
+        boxsum3sqr(A_, tmp_stride, src, src_stride, w + 6, h + 6);
+    }
+
+    int32_t *AA = A - tmp_stride;
+    coef *BB = B - tmp_stride;
+    for (int j = -1; j < h + 1; j+= step) {
+        for (int i = -1; i < w + 1; i++) {
+            const int a =
+                (AA[i] + (1 << (2 * (BITDEPTH - 8)) >> 1)) >> (2 * (BITDEPTH - 8));
+            const int b =
+                (BB[i] + (1 << (BITDEPTH - 8) >> 1)) >> (BITDEPTH - 8);
+
+            const uint32_t p = (a * n >= b * b) * (a * n - b * b);
+            const uint32_t z = (p * s + (1 << 19)) >> 20;
+
+            const int x = sgr_x_by_xplus1[imin(z, 255)];
+            // This is where we invert A and B, so that B is of size coef.
+            AA[i] = (((1 << 8) - x) * BB[i] * sgr_one_by_x[n - 1] + (1 << 11)) >> 12;
+            BB[i] = x;
+        }
+        AA += step * tmp_stride;
+        BB += step * tmp_stride;
+    }
+
+    src += 3 * PXSTRIDE(src_stride) + 3;
+    if (n == 25) {
+        for (int j = 0; j < h; j+=2) {
+            for (int i = 0; i < w; i++) {
+                const int32_t a = (B[i - tmp_stride] +
+                                   B[i + tmp_stride]) * 6 +
+                                  (B[i - 1 - tmp_stride] +
+                                   B[i - 1 + tmp_stride] +
+                                   B[i + 1 - tmp_stride] +
+                                   B[i + 1 + tmp_stride]) * 5;
+                const int32_t b = (A[i - tmp_stride] +
+                                   A[i + tmp_stride]) * 6 +
+                                  (A[i - 1 - tmp_stride] +
+                                   A[i - 1 + tmp_stride] +
+                                   A[i + 1 - tmp_stride] +
+                                   A[i + 1 + tmp_stride]) * 5;
+                dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
+            }
+            dst += dst_stride;
+            src += PXSTRIDE(src_stride);
+            B += tmp_stride;
+            A += tmp_stride;
+            for (int i = 0; i < w; i++) {
+                const int32_t a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
+                const int32_t b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
+                dst[i] = (a * src[i] + b + (1 << 7)) >> 8;
+            }
+            dst += dst_stride;
+            src += PXSTRIDE(src_stride);
+            B += tmp_stride;
+            A += tmp_stride;
+        }
+    } else {
+        for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                const int32_t a =
+                    (B[i] + B[i - 1] + B[i + 1] +
+                     B[i - tmp_stride] +
+                     B[i + tmp_stride]) * 4 +
+                    (B[i - 1 - tmp_stride] +
+                     B[i - 1 + tmp_stride] +
+                     B[i + 1 - tmp_stride] +
+                     B[i + 1 + tmp_stride]) * 3;
+
+                const int32_t b =
+                    (A[i] + A[i - 1] + A[i + 1] +
+                     A[i - tmp_stride] +
+                     A[i + tmp_stride]) * 4 +
+                    (A[i - 1 - tmp_stride] +
+                     A[i - 1 + tmp_stride] +
+                     A[i + 1 - tmp_stride] +
+                     A[i + 1 + tmp_stride]) * 3;
+
+                dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
+            }
+            dst += dst_stride;
+            src += PXSTRIDE(src_stride);
+            B += tmp_stride;
+            A += tmp_stride;
+        }
+    }
+}
+
+static void selfguided_c(pixel *p, const ptrdiff_t p_stride,
+                         const pixel *lpf, const ptrdiff_t lpf_stride,
+                         const int w, const int h, const int sgr_idx,
+                         const int16_t sgr_w[2], const enum LrEdgeFlags edges)
+{
+    // padding is 3 pixels above and 3 pixels below
+    const int tmp_stride = sizeof(pixel) * (w + 6);
+    pixel tmp[(h + 6) * PXSTRIDE(tmp_stride)];
+
+    padding(tmp, tmp_stride, p, p_stride, lpf, lpf_stride, w, h, edges);
+
+    // both r1 and r0 can't be zero
+    if (!sgr_params[sgr_idx][0]) {
+        int32_t dst[h * w];
+        const int s1 = sgr_params[sgr_idx][3];
+        selfguided_filter(dst, w, tmp, tmp_stride, w, h, 9, s1);
+        const int w1 = (1 << 7) - sgr_w[1];
+        for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                const int32_t u = (p[i] << 4);
+                const int32_t v = (u << 7) + w1 * (dst[j * w + i] - u);
+                p[i] = iclip_pixel((v + (1 << 10)) >> 11);
+            }
+            p += PXSTRIDE(p_stride);
+        }
+    } else if (!sgr_params[sgr_idx][1]) {
+        int32_t dst[h * w];
+        const int s0 = sgr_params[sgr_idx][2];
+        selfguided_filter(dst, w, tmp, tmp_stride, w, h, 25, s0);
+        const int w0 = sgr_w[0];
+        for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                const int32_t u = (p[i] << 4);
+                const int32_t v = (u << 7) + w0 * (dst[j * w + i] - u);
+                p[i] = iclip_pixel((v + (1 << 10)) >> 11);
+            }
+            p += PXSTRIDE(p_stride);
+        }
+    } else {
+        int32_t dst0[h * w];
+        int32_t dst1[h * w];
+        const int s0 = sgr_params[sgr_idx][2];
+        const int s1 = sgr_params[sgr_idx][3];
+        const int w0 = sgr_w[0];
+        const int w1 = (1 << 7) - w0 - sgr_w[1];
+        selfguided_filter(dst0, w, tmp, tmp_stride, w, h, 25, s0);
+        selfguided_filter(dst1, w, tmp, tmp_stride, w, h, 9, s1);
+        for (int j = 0; j < h; j++) {
+            for (int i = 0; i < w; i++) {
+                const int32_t u = (p[i] << 4);
+                const int32_t v = (u << 7) + w0 * (dst0[j * w + i] - u) +
+                                  w1 * (dst1[j * w + i] - u);
+                p[i] = iclip_pixel((v + (1 << 10)) >> 11);
+            }
+            p += PXSTRIDE(p_stride);
+        }
+    }
+}
+
+void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
+    c->wiener = wiener_c;
+    c->selfguided = selfguided_c;
+}
--- /dev/null
+++ b/src/looprestoration.h
@@ -1,0 +1,65 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LOOPRESTORATION_H__
+#define __DAV1D_SRC_LOOPRESTORATION_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "common/bitdepth.h"
+
+enum LrEdgeFlags {
+    LR_HAVE_LEFT = 1 << 0,
+    LR_HAVE_RIGHT = 1 << 1,
+    LR_HAVE_TOP = 1 << 2,
+    LR_HAVE_BOTTOM = 1 << 3,
+};
+
+// Although the spec applies restoration filters over 4x4 blocks, the wiener
+// filter can be applied to a bigger surface.
+//    * w is constrained by the restoration unit size (w <= 256)
+//    * h is constrained by the stripe height (h <= 64)
+typedef void (*wienerfilter_fn)(pixel *dst, ptrdiff_t dst_stride,
+                                const pixel *lpf, ptrdiff_t lpf_stride,
+                                int w, int h, const int16_t filterh[7],
+                                const int16_t filterv[7], enum LrEdgeFlags edges);
+
+typedef void (*selfguided_fn)(pixel *dst, ptrdiff_t dst_stride,
+                              const pixel *lpf, ptrdiff_t lpf_stride,
+                              int w, int h, int sgr_idx, const int16_t sgr_w[2],
+                              const enum LrEdgeFlags edges);
+
+typedef struct Dav1dLoopRestorationDSPContext {
+    wienerfilter_fn wiener;
+    selfguided_fn selfguided;
+} Dav1dLoopRestorationDSPContext;
+
+void dav1d_loop_restoration_dsp_init_8bpc(Dav1dLoopRestorationDSPContext *c);
+void dav1d_loop_restoration_dsp_init_10bpc(Dav1dLoopRestorationDSPContext *c);
+
+#endif /* __DAV1D_SRC_LOOPRESTORATION_H__ */
--- /dev/null
+++ b/src/lr_apply.c
@@ -1,0 +1,302 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdio.h>
+
+#include "common/intops.h"
+
+#include "src/lr_apply.h"
+
+
+enum LrRestorePlanes {
+    LR_RESTORE_Y = 1 << 0,
+    LR_RESTORE_U = 1 << 1,
+    LR_RESTORE_V = 1 << 2,
+};
+
+static void backup_lpf(pixel *dst, ptrdiff_t dst_stride,
+                       const pixel *src, ptrdiff_t src_stride,
+                       const int first_stripe_h, const int next_stripe_h,
+                       int row, const int row_h, const int w, const int h)
+{
+    src_stride = PXSTRIDE(src_stride);
+    dst_stride = PXSTRIDE(dst_stride);
+    if (row) {
+        // Copy the top part of the stored loop filtered pixels from the
+        // previous sb row needed above the first stripe of this sb row.
+        pixel_copy(&dst[dst_stride *  0], &dst[dst_stride *  8], w);
+        pixel_copy(&dst[dst_stride *  1], &dst[dst_stride *  9], w);
+        pixel_copy(&dst[dst_stride *  2], &dst[dst_stride * 10], w);
+        pixel_copy(&dst[dst_stride *  3], &dst[dst_stride * 11], w);
+    }
+
+    int stripe_h = first_stripe_h;
+    dst += 4 * dst_stride;
+    src += (stripe_h - 2) * src_stride;
+    for (; row + stripe_h <= row_h; row += stripe_h) {
+        for (int i = 0; i < 4; i++) {
+            pixel_copy(dst, src, w);
+            dst += dst_stride;
+            src += src_stride;
+        }
+        stripe_h = next_stripe_h;
+        src += (stripe_h - 4) * src_stride;
+    }
+}
+
+void bytefn(dav1d_lr_copy_lpf)(Dav1dFrameContext *const f,
+                               /*const*/ pixel *const src[3], const int sby)
+{
+    const int stripe_h = 64 - (8 * !sby);
+    const ptrdiff_t offset = 8 * !!sby;
+    const ptrdiff_t *const src_stride = f->cur.p.stride;
+
+    // TODO Also check block level restore type to reduce copying.
+    const int restore_planes =
+        ((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +
+        ((f->frame_hdr.restoration.type[1] != RESTORATION_NONE) << 1) +
+        ((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);
+
+    if (restore_planes & LR_RESTORE_Y) {
+        const int h = f->bh << 2;
+        const int w = f->bw << 2;
+        const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
+        const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset;
+        backup_lpf(f->lf.lr_lpf_line_ptr[0], sizeof(pixel) * f->b4_stride * 4,
+                   src[0] - offset * PXSTRIDE(src_stride[0]),
+                   src_stride[0], stripe_h, 64, y_stripe, row_h, w, h);
+    }
+    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
+        const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int h = f->bh << (2 - ss_ver);
+        const int w = f->bw << (2 - ss_hor);
+        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
+        const int stripe_h_uv = stripe_h >> ss_ver;
+        const ptrdiff_t offset_uv = offset >> ss_ver;
+        const int y_stripe =
+            (sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;
+
+        if (restore_planes & LR_RESTORE_U) {
+            backup_lpf(f->lf.lr_lpf_line_ptr[1], sizeof(pixel) * f->b4_stride * 4,
+                       src[1] - offset_uv * PXSTRIDE(src_stride[1]),
+                       src_stride[1], stripe_h_uv, 32, y_stripe,
+                       row_h, w, h);
+        }
+        if (restore_planes & LR_RESTORE_V) {
+            backup_lpf(f->lf.lr_lpf_line_ptr[2], sizeof(pixel) * f->b4_stride * 4,
+                       src[2] - offset_uv * PXSTRIDE(src_stride[1]),
+                       src_stride[1], stripe_h_uv, 32, y_stripe,
+                       row_h, w, h);
+        }
+    }
+}
+
+
+static void lr_stripe(const Dav1dFrameContext *const f, pixel *p, int x, int y,
+                      const int plane, const int unit_w,
+                      const int first_stripe_h, const int next_stripe_h,
+                      const int row_h, const Av1RestorationUnit *const lr,
+                      enum LrEdgeFlags edges)
+{
+    const Dav1dDSPContext *const dsp = f->dsp;
+    const int sbrow_has_bottom = (edges & LR_HAVE_BOTTOM);
+    const pixel *lpf = f->lf.lr_lpf_line_ptr[plane] + x;
+    const ptrdiff_t p_stride = f->cur.p.stride[!!plane];
+    const ptrdiff_t lpf_stride = sizeof(pixel) * f->b4_stride * 4;
+
+    int stripe_h = first_stripe_h;
+
+    // FIXME [8] might be easier for SIMD
+    int16_t filterh[7], filterv[7];
+    if (lr->type == RESTORATION_WIENER) {
+        filterh[0] = filterh[6] = lr->filter_h[0];
+        filterh[1] = filterh[5] = lr->filter_h[1];
+        filterh[2] = filterh[4] = lr->filter_h[2];
+        filterh[3] = -((filterh[0] + filterh[1] + filterh[2]) * 2);
+
+        filterv[0] = filterv[6] = lr->filter_v[0];
+        filterv[1] = filterv[5] = lr->filter_v[1];
+        filterv[2] = filterv[4] = lr->filter_v[2];
+        filterv[3] = -((filterv[0] + filterv[1] + filterv[2]) * 2);
+    }
+
+    while (y + stripe_h <= row_h) {
+        // TODO Look into getting rid of the this if
+        if (y + stripe_h == row_h) {
+            edges &= ~LR_HAVE_BOTTOM;
+        } else {
+            edges |= LR_HAVE_BOTTOM;
+        }
+        if (lr->type == RESTORATION_WIENER) {
+            dsp->lr.wiener(p, p_stride, lpf, lpf_stride, unit_w, stripe_h,
+                           filterh, filterv, edges);
+        } else {
+            assert(lr->type == RESTORATION_SGRPROJ);
+            dsp->lr.selfguided(p, p_stride, lpf, lpf_stride, unit_w, stripe_h,
+                               lr->sgr_idx, lr->sgr_weights, edges);
+        }
+
+        y += stripe_h;
+        edges |= LR_HAVE_TOP;
+        if (y + stripe_h > row_h && sbrow_has_bottom) break;
+        p += stripe_h * PXSTRIDE(p_stride);
+        stripe_h = imin(next_stripe_h, row_h - y);
+        if (stripe_h == 0) break;
+        lpf += 4 * PXSTRIDE(lpf_stride);
+    }
+}
+
+static void backup3xU(pixel *dst, const pixel *src, const ptrdiff_t src_stride,
+                      int u)
+{
+    for (; u > 0; u--, dst += 3, src += PXSTRIDE(src_stride))
+        pixel_copy(dst, src, 3);
+}
+
+static void restore3xU(pixel *dst, const ptrdiff_t dst_stride, const pixel *src,
+                       int u)
+{
+    for (; u > 0; u--, dst += PXSTRIDE(dst_stride), src += 3)
+        pixel_copy(dst, src, 3);
+}
+
+static void lr_sbrow(const Dav1dFrameContext *const f, pixel *p, const int y,
+                     const int w, const int h, const int row_h, const int plane)
+{
+    const int ss_ver = !!plane * f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const ptrdiff_t p_stride = f->cur.p.stride[!!plane];
+
+    const int unit_size_log2 = f->frame_hdr.restoration.unit_size[!!plane];
+    const int unit_size = 1 << unit_size_log2;
+    const int half_unit_size = unit_size >> 1;
+    const int max_unit_size = unit_size + half_unit_size;
+
+    const int row_y = y + ((8 >> ss_ver) * !!y);
+
+    // FIXME This is an ugly hack to lookup the proper AV1Filter unit for
+    // chroma planes. Question: For Multithreaded decoding, is it better
+    // to store the chroma LR information with collocated Luma information?
+    // In other words. For a chroma restoration unit locate at 128,128 and
+    // with a 4:2:0 chroma subsampling, do we store the filter information at
+    // the AV1Filter unit located at (128,128) or (256,256)
+    // TODO Support chroma subsampling.
+    const int shift = plane ? 6 : 7;
+
+    int ruy = (row_y >> unit_size_log2);
+    // Merge last restoration unit if its height is < half_unit_size
+    if (ruy > 0) ruy -= (ruy << unit_size_log2) + half_unit_size > h;
+
+    const int proc_h = 64 >> ss_ver;
+    const int stripe_h = proc_h - ((8 >> ss_ver) * !y);
+    const int filter_h = imin(stripe_h + proc_h * f->seq_hdr.sb128, h - y);
+
+    pixel pre_lr_border[filter_h * 3];
+    pixel post_lr_border[filter_h * 3];
+
+    int unit_w = unit_size;
+
+    enum LrEdgeFlags edges = (y > 0 ? LR_HAVE_TOP : 0) |
+                             (row_h < h ? LR_HAVE_BOTTOM : 0);
+
+    for (int x = 0, rux = 0; x < w; x+= unit_w, rux++, edges |= LR_HAVE_LEFT) {
+        // TODO Clean up this if statement.
+        if (x + max_unit_size > w) {
+            unit_w = w - x;
+            edges &= ~LR_HAVE_RIGHT;
+        } else {
+            edges |= LR_HAVE_RIGHT;
+        }
+
+        // Based on the position of the restoration unit, find the corresponding
+        // AV1Filter unit.
+        const int unit_idx = ((ruy & 16) >> 3) + ((rux & 16) >> 4);
+        const Av1RestorationUnit *const lr =
+            &f->lf.mask[(((ruy << unit_size_log2) >> shift) * f->sb128w) +
+                        (x >> shift)].lr[plane][unit_idx];
+
+        if (edges & LR_HAVE_LEFT) {
+            restore3xU(p - 3, p_stride, pre_lr_border, filter_h);
+        }
+        // FIXME Don't backup if the next restoration unit is RESTORE_NONE
+        // This also requires not restoring in the same conditions.
+        if (edges & LR_HAVE_RIGHT) {
+            backup3xU(pre_lr_border, p + unit_w - 3, p_stride, filter_h);
+        }
+        if (lr->type != RESTORATION_NONE) {
+            lr_stripe(f, p, x, y, plane, unit_w, stripe_h, proc_h,
+                      row_h, lr, edges);
+        }
+        if (edges & LR_HAVE_LEFT) {
+            restore3xU(p - 3, p_stride, post_lr_border, filter_h);
+        }
+        if (edges & LR_HAVE_RIGHT) {
+            backup3xU(post_lr_border, p + unit_w - 3, p_stride, filter_h);
+        }
+        p += unit_w;
+    }
+}
+
+void bytefn(dav1d_lr_sbrow)(Dav1dFrameContext *const f, pixel *const dst[3],
+                            const int sby)
+{
+    const ptrdiff_t offset_y = 8 * !!sby;
+    const ptrdiff_t *const dst_stride = f->cur.p.stride;
+
+    const int restore_planes =
+        ((f->frame_hdr.restoration.type[0] != RESTORATION_NONE) << 0) +
+        ((f->frame_hdr.restoration.type[1] != RESTORATION_NONE) << 1) +
+        ((f->frame_hdr.restoration.type[2] != RESTORATION_NONE) << 2);
+
+    if (restore_planes & LR_RESTORE_Y) {
+        const int h = f->bh << 2;
+        const int w = f->bw << 2;
+        const int row_h = imin((sby + 1) << (6 + f->seq_hdr.sb128), h);
+        const int y_stripe = (sby << (6 + f->seq_hdr.sb128)) - offset_y;
+        lr_sbrow(f, dst[0] - offset_y * PXSTRIDE(dst_stride[0]), y_stripe, w,
+                 h, row_h, 0);
+    }
+    if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) {
+        const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int h = f->bh << (2 - ss_ver);
+        const int w = f->bw << (2 - ss_hor);
+        const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr.sb128), h);
+        const ptrdiff_t offset_uv = offset_y >> ss_ver;
+        const int y_stripe =
+            (sby << ((6 - ss_ver) + f->seq_hdr.sb128)) - offset_uv;
+        if (restore_planes & LR_RESTORE_U)
+            lr_sbrow(f, dst[1] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
+                     w, h, row_h, 1);
+
+        if (restore_planes & LR_RESTORE_V)
+            lr_sbrow(f, dst[2] - offset_uv * PXSTRIDE(dst_stride[1]), y_stripe,
+                     w, h, row_h, 2);
+    }
+}
--- /dev/null
+++ b/src/lr_apply.h
@@ -1,0 +1,44 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_LR_APPLY_H__
+#define __DAV1D_SRC_LR_APPLY_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "common/bitdepth.h"
+
+#include "src/internal.h"
+
+void bytefn(dav1d_lr_copy_lpf)(Dav1dFrameContext *const f,
+                               /*const*/ pixel *const src[3], int sby);
+
+void bytefn(dav1d_lr_sbrow)(Dav1dFrameContext *const f, pixel *const dst[3],
+                            int sby);
+
+#endif /* __DAV1D_SRC_LR_APPLY_H__ */
--- /dev/null
+++ b/src/mc.c
@@ -1,0 +1,533 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/mc.h"
+#include "src/tables.h"
+
+static __attribute__((noinline)) void
+put_c(pixel *dst, const ptrdiff_t dst_stride,
+      const pixel *src, const ptrdiff_t src_stride, const int w, int h)
+{
+    do {
+        pixel_copy(dst, src, w);
+
+        dst += dst_stride;
+        src += src_stride;
+    } while (--h);
+}
+
+static __attribute__((noinline)) void
+prep_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,
+       const int w, int h)
+{
+    do {
+        for (int x = 0; x < w; x++)
+            tmp[x] = src[x] << 4;
+
+        tmp += w;
+        src += src_stride;
+    } while (--h);
+}
+
+#define FILTER_8TAP(src, x, F, stride) \
+    (F[0] * src[x + -3 * stride] + \
+     F[1] * src[x + -2 * stride] + \
+     F[2] * src[x + -1 * stride] + \
+     F[3] * src[x + +0 * stride] + \
+     F[4] * src[x + +1 * stride] + \
+     F[5] * src[x + +2 * stride] + \
+     F[6] * src[x + +3 * stride] + \
+     F[7] * src[x + +4 * stride])
+
+#define FILTER_8TAP_RND(src, x, F, stride, sh) \
+    ((FILTER_8TAP(src, x, F, stride) + ((1 << sh) >> 1)) >> sh)
+
+#define FILTER_8TAP_CLIP(src, x, F, stride, sh) \
+    iclip_pixel(FILTER_8TAP_RND(src, x, F, stride, sh))
+
+#define GET_FILTERS() \
+    const int8_t *const fh = !mx ? NULL : w > 4 ? \
+        dav1d_mc_subpel_filters[filter_type & 3][mx - 1] : \
+        dav1d_mc_subpel_filters[3 + (filter_type & 1)][mx - 1]; \
+    const int8_t *const fv = !my ? NULL : h > 4 ? \
+        dav1d_mc_subpel_filters[filter_type >> 2][my - 1] : \
+        dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][my - 1]; \
+
+static __attribute__((noinline)) void
+put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
+           const pixel *src, ptrdiff_t src_stride,
+           const int w, int h, const int mx, const int my,
+           const int filter_type)
+{
+    GET_FILTERS();
+    dst_stride = PXSTRIDE(dst_stride);
+    src_stride = PXSTRIDE(src_stride);
+
+    if (fh) {
+        if (fv) {
+            int tmp_h = h + 7;
+            coef mid[128 * 135], *mid_ptr = mid;
+
+            src -= src_stride * 3;
+            do {
+                for (int x = 0; x < w; x++)
+                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+
+                mid_ptr += 128;
+                src += src_stride;
+            } while (--tmp_h);
+
+            mid_ptr = mid + 128 * 3;
+            do {
+                for (int x = 0; x < w; x++)
+                    dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 11);
+
+                mid_ptr += 128;
+                dst += dst_stride;
+            } while (--h);
+        } else {
+            do {
+                for (int x = 0; x < w; x++) {
+                    const int px = FILTER_8TAP_RND(src, x, fh, 1, 3);
+                    dst[x] = iclip_pixel((px + 8) >> 4);
+                }
+
+                dst += dst_stride;
+                src += src_stride;
+            } while (--h);
+        }
+    } else if (fv) {
+        do {
+            for (int x = 0; x < w; x++)
+                dst[x] = FILTER_8TAP_CLIP(src, x, fv, src_stride, 7);
+
+            dst += dst_stride;
+            src += src_stride;
+        } while (--h);
+    } else
+        put_c(dst, dst_stride, src, src_stride, w, h);
+}
+
+static __attribute__((noinline)) void
+prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
+            const int w, int h, const int mx, const int my,
+            const int filter_type)
+{
+    GET_FILTERS();
+    src_stride = PXSTRIDE(src_stride);
+
+    if (fh) {
+        if (fv) {
+            int tmp_h = h + 7;
+            coef mid[128 * 135], *mid_ptr = mid;
+
+            src -= src_stride * 3;
+            do {
+                for (int x = 0; x < w; x++)
+                    mid_ptr[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+
+                mid_ptr += 128;
+                src += src_stride;
+            } while (--tmp_h);
+
+            mid_ptr = mid + 128 * 3;
+            do {
+                for (int x = 0; x < w; x++)
+                    tmp[x] = FILTER_8TAP_RND(mid_ptr, x, fv, 128, 7);
+
+                mid_ptr += 128;
+                tmp += w;
+            } while (--h);
+        } else {
+            do {
+                for (int x = 0; x < w; x++)
+                    tmp[x] = FILTER_8TAP_RND(src, x, fh, 1, 3);
+
+                tmp += w;
+                src += src_stride;
+            } while (--h);
+        }
+    } else if (fv) {
+        do {
+            for (int x = 0; x < w; x++)
+                tmp[x] = FILTER_8TAP_RND(src, x, fv, src_stride, 3);
+
+            tmp += w;
+            src += src_stride;
+        } while (--h);
+    } else
+        prep_c(tmp, src, src_stride, w, h);
+}
+
+#define filter_fns(type, type_h, type_v) \
+static void put_8tap_##type##_c(pixel *const dst, \
+                                const ptrdiff_t dst_stride, \
+                                const pixel *const src, \
+                                const ptrdiff_t src_stride, \
+                                const int w, const int h, \
+                                const int mx, const int my) \
+{ \
+    put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \
+               type_h | (type_v << 2)); \
+} \
+static void prep_8tap_##type##_c(coef *const tmp, \
+                                 const pixel *const src, \
+                                 const ptrdiff_t src_stride, \
+                                 const int w, const int h, \
+                                 const int mx, const int my) \
+{ \
+    prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
+                type_h | (type_v << 2)); \
+}
+
+filter_fns(regular,        FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR)
+filter_fns(regular_sharp,  FILTER_8TAP_REGULAR, FILTER_8TAP_SHARP)
+filter_fns(regular_smooth, FILTER_8TAP_REGULAR, FILTER_8TAP_SMOOTH)
+filter_fns(smooth,         FILTER_8TAP_SMOOTH,  FILTER_8TAP_SMOOTH)
+filter_fns(smooth_regular, FILTER_8TAP_SMOOTH,  FILTER_8TAP_REGULAR)
+filter_fns(smooth_sharp,   FILTER_8TAP_SMOOTH,  FILTER_8TAP_SHARP)
+filter_fns(sharp,          FILTER_8TAP_SHARP,   FILTER_8TAP_SHARP)
+filter_fns(sharp_regular,  FILTER_8TAP_SHARP,   FILTER_8TAP_REGULAR)
+filter_fns(sharp_smooth,   FILTER_8TAP_SHARP,   FILTER_8TAP_SMOOTH)
+
+#define FILTER_BILIN(src, x, mxy, stride) \
+    (16 * src[x] + (mxy * (src[x + stride] - src[x])))
+
+#define FILTER_BILIN_RND(src, x, mxy, stride, sh) \
+    ((FILTER_BILIN(src, x, mxy, stride) + ((1 << sh) >> 1)) >> sh)
+
+#define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \
+    iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh))
+
+static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride,
+                        const pixel *src, ptrdiff_t src_stride,
+                        const int w, int h, const int mx, const int my)
+{
+    dst_stride = PXSTRIDE(dst_stride);
+    src_stride = PXSTRIDE(src_stride);
+
+    if (mx) {
+        if (my) {
+            coef mid[128 * 129], *mid_ptr = mid;
+            int tmp_h = h + 1;
+
+            do {
+                for (int x = 0; x < w; x++)
+                    mid_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+
+                mid_ptr += 128;
+                src += src_stride;
+            } while (--tmp_h);
+
+            mid_ptr = mid;
+            do {
+                for (int x = 0; x < w; x++)
+                    dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128, 8);
+
+                mid_ptr += 128;
+                dst += dst_stride;
+            } while (--h);
+        } else {
+            do {
+                for (int x = 0; x < w; x++)
+                    dst[x] = FILTER_BILIN_CLIP(src, x, mx, 1, 4);
+
+                dst += dst_stride;
+                src += src_stride;
+            } while (--h);
+        }
+    } else if (my) {
+        do {
+            for (int x = 0; x < w; x++)
+                dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4);
+
+            dst += dst_stride;
+            src += src_stride;
+        } while (--h);
+    } else
+        put_c(dst, dst_stride, src, src_stride, w, h);
+}
+
+static void prep_bilin_c(coef *tmp,
+                         const pixel *src, ptrdiff_t src_stride,
+                         const int w, int h, const int mx, const int my)
+{
+    src_stride = PXSTRIDE(src_stride);
+
+    if (mx) {
+        if (my) {
+            coef mid[128 * 129], *mid_ptr = mid;
+            int tmp_h = h + 1;
+
+            do {
+                for (int x = 0; x < w; x++)
+                    mid_ptr[x] = FILTER_BILIN(src, x, mx, 1);
+
+                mid_ptr += 128;
+                src += src_stride;
+            } while (--tmp_h);
+
+            mid_ptr = mid;
+            do {
+                for (int x = 0; x < w; x++)
+                    tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4);
+
+                mid_ptr += 128;
+                tmp += w;
+            } while (--h);
+        } else {
+            do {
+                for (int x = 0; x < w; x++)
+                    tmp[x] = FILTER_BILIN(src, x, mx, 1);
+
+                tmp += w;
+                src += src_stride;
+            } while (--h);
+        }
+    } else if (my) {
+        do {
+            for (int x = 0; x < w; x++)
+                tmp[x] = FILTER_BILIN(src, x, my, src_stride);
+
+            tmp += w;
+            src += src_stride;
+        } while (--h);
+    } else
+        prep_c(tmp, src, src_stride, w, h);
+}
+
+static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
+                  const coef *tmp1, const coef *tmp2, const int w, int h)
+{
+    do {
+        for (int x = 0; x < w; x++)
+            dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + 16) >> 5);
+
+        tmp1 += w;
+        tmp2 += w;
+        dst += PXSTRIDE(dst_stride);
+    } while (--h);
+}
+
+static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
+                    const coef *tmp1, const coef *tmp2, const int w, int h,
+                    const int weight)
+{
+    do {
+        for (int x = 0; x < w; x++)
+            dst[x] = iclip_pixel((tmp1[x] * weight +
+                                  tmp2[x] * (16 - weight) + 128) >> 8);
+
+        tmp1 += w;
+        tmp2 += w;
+        dst += PXSTRIDE(dst_stride);
+    } while (--h);
+}
+
+static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
+                   const coef *tmp1, const coef *tmp2, const int w, int h,
+                   const uint8_t *mask)
+{
+    do {
+        for (int x = 0; x < w; x++)
+            dst[x] = iclip_pixel((tmp1[x] * mask[x] +
+                                  tmp2[x] * (64 - mask[x]) + 512) >> 10);
+
+        tmp1 += w;
+        tmp2 += w;
+        mask += w;
+        dst += PXSTRIDE(dst_stride);
+    } while (--h);
+}
+
+static void blend_c(pixel *dst, const ptrdiff_t dst_stride,
+                    const pixel *tmp, const ptrdiff_t tmp_stride,
+                    const int w, const int h,
+                    const uint8_t *mask, const ptrdiff_t m_stride)
+{
+    for (int y = 0; y < h; y++) {
+        for (int x = 0; x < w; x++) {
+#define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6)
+            dst[x] = blend_px(dst[x], tmp[x], mask[m_stride == 1 ? 0 : x]);
+        }
+        dst += PXSTRIDE(dst_stride);
+        tmp += PXSTRIDE(tmp_stride);
+        mask += m_stride;
+    }
+}
+
+static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
+                     const coef *tmp1, const coef *tmp2, const int w, int h,
+                     uint8_t *mask, const int sign,
+                     const int ss_hor, const int ss_ver)
+{
+    // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows,
+    // and then load this intermediate to calculate final value for odd rows
+    const int rnd = 8 << (BITDEPTH - 8);
+    do {
+        for (int x = 0; x < w; x++) {
+            const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + rnd) >> BITDEPTH), 64);
+            dst[x] = iclip_pixel((tmp1[x] * m +
+                                  tmp2[x] * (64 - m) + 512) >> 10);
+
+            if (ss_hor) {
+                x++;
+
+                const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + rnd) >> BITDEPTH), 64);
+                dst[x] = iclip_pixel((tmp1[x] * n +
+                                      tmp2[x] * (64 - n) + 512) >> 10);
+
+                if (h & ss_ver) {
+                    mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2;
+                } else if (ss_ver) {
+                    mask[x >> 1] = m + n;
+                } else {
+                    mask[x >> 1] = (m + n + 1 - sign) >> 1;
+                }
+            } else {
+                mask[x] = m;
+            }
+        }
+
+        tmp1 += w;
+        tmp2 += w;
+        dst += PXSTRIDE(dst_stride);
+        if (!ss_ver || (h & 1)) mask += w >> ss_hor;
+    } while (--h);
+}
+
+#define w_mask_fns(ssn, ss_hor, ss_ver) \
+static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \
+                             const coef *const tmp1, const coef *const tmp2, \
+                             const int w, const int h, uint8_t *mask, \
+                             const int sign) \
+{ \
+    w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver); \
+}
+
+w_mask_fns(444, 0, 0);
+w_mask_fns(422, 1, 0);
+w_mask_fns(420, 1, 1);
+
+#undef w_mask_fns
+
+static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
+                              const pixel *src, const ptrdiff_t src_stride,
+                              const int16_t *const abcd, int mx, int my)
+{
+    coef mid[15 * 8], *mid_ptr = mid;
+
+    src -= 3 * PXSTRIDE(src_stride);
+    for (int y = 0; y < 15; y++, mx += abcd[1]) {
+        for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) {
+            const int8_t *const filter =
+                dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
+
+            mid_ptr[x] = FILTER_8TAP_RND(src, x, filter, 1, 3);
+        }
+        src += PXSTRIDE(src_stride);
+        mid_ptr += 8;
+    }
+
+    mid_ptr = &mid[3 * 8];
+    for (int y = 0; y < 8; y++, my += abcd[3]) {
+        for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) {
+            const int8_t *const filter =
+                dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];
+
+            dst[x] = FILTER_8TAP_CLIP(mid_ptr, x, filter, 8, 11);
+        }
+        mid_ptr += 8;
+        dst += PXSTRIDE(dst_stride);
+    }
+}
+
+static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride,
+                               const pixel *src, const ptrdiff_t src_stride,
+                               const int16_t *const abcd, int mx, int my)
+{
+    coef mid[15 * 8], *mid_ptr = mid;
+
+    src -= 3 * PXSTRIDE(src_stride);
+    for (int y = 0; y < 15; y++, mx += abcd[1]) {
+        for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) {
+            const int8_t *const filter =
+                dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
+
+            mid_ptr[x] = FILTER_8TAP_RND(src, x, filter, 1, 3);
+        }
+        src += PXSTRIDE(src_stride);
+        mid_ptr += 8;
+    }
+
+    mid_ptr = &mid[3 * 8];
+    for (int y = 0; y < 8; y++, my += abcd[3]) {
+        for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) {
+            const int8_t *const filter =
+                dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];
+
+            tmp[x] = FILTER_8TAP_RND(mid_ptr, x, filter, 8, 7);
+        }
+        mid_ptr += 8;
+        tmp += tmp_stride;
+    }
+}
+
+void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
+#define init_mc_fns(type, name) do { \
+    c->mc [type] = put_##name##_c; \
+    c->mct[type] = prep_##name##_c; \
+} while (0)
+
+    init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
+    init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
+    init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp);
+    init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular);
+    init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth);
+    init_mc_fns(FILTER_2D_8TAP_SHARP,          8tap_sharp);
+    init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
+    init_mc_fns(FILTER_2D_8TAP_SMOOTH,         8tap_smooth);
+    init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp);
+    init_mc_fns(FILTER_2D_BILINEAR,            bilin);
+
+    c->avg      = avg_c;
+    c->w_avg    = w_avg_c;
+    c->mask     = mask_c;
+    c->blend    = blend_c;
+    c->w_mask[0] = w_mask_444_c;
+    c->w_mask[1] = w_mask_422_c;
+    c->w_mask[2] = w_mask_420_c;
+    c->warp8x8  = warp_affine_8x8_c;
+    c->warp8x8t = warp_affine_8x8t_c;
+}
--- /dev/null
+++ b/src/mc.h
@@ -1,0 +1,104 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_MC_H__
+#define __DAV1D_SRC_MC_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "common/bitdepth.h"
+
+#include "src/levels.h"
+
+#define decl_mc_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const pixel *src, ptrdiff_t src_stride, \
+            int w, int h, int mx, int my)
+typedef decl_mc_fn(*mc_fn);
+
+#define decl_warp8x8_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const pixel *src, ptrdiff_t src_stride, \
+            const int16_t *abcd, int mx, int my)
+typedef decl_warp8x8_fn(*warp8x8_fn);
+
+#define decl_mct_fn(name) \
+void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \
+            int w, int h, int mx, int my)
+typedef decl_mct_fn(*mct_fn);
+
+#define decl_warp8x8t_fn(name) \
+void (name)(coef *tmp, const ptrdiff_t tmp_stride, \
+            const pixel *src, ptrdiff_t src_stride, \
+            const int16_t *abcd, int mx, int my)
+typedef decl_warp8x8t_fn(*warp8x8t_fn);
+
+#define decl_avg_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const coef *tmp1, const coef *tmp2, int w, int h)
+typedef decl_avg_fn(*avg_fn);
+
+#define decl_w_avg_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const coef *tmp1, const coef *tmp2, int w, int h, int weight)
+typedef decl_w_avg_fn(*w_avg_fn);
+
+#define decl_mask_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const coef *tmp1, const coef *tmp2, int w, int h, \
+            const uint8_t *mask)
+typedef decl_mask_fn(*mask_fn);
+
+#define decl_w_mask_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const coef *tmp1, const coef *tmp2, int w, int h, \
+            uint8_t *mask, int sign)
+typedef decl_w_mask_fn(*w_mask_fn);
+
+#define decl_blend_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const pixel *tmp, ptrdiff_t tmp_stride, int w, int h, \
+            const uint8_t *mask, ptrdiff_t mstride)
+typedef decl_blend_fn(*blend_fn);
+
+typedef struct Dav1dMCDSPContext {
+    mc_fn mc[N_2D_FILTERS];
+    mct_fn mct[N_2D_FILTERS];
+    avg_fn avg;
+    w_avg_fn w_avg;
+    mask_fn mask;
+    w_mask_fn w_mask[3 /* 444, 422, 420 */];
+    blend_fn blend;
+    warp8x8_fn warp8x8;
+    warp8x8t_fn warp8x8t;
+} Dav1dMCDSPContext;
+
+void dav1d_mc_dsp_init_8bpc(Dav1dMCDSPContext *c);
+void dav1d_mc_dsp_init_10bpc(Dav1dMCDSPContext *c);
+
+#endif /* __DAV1D_SRC_MC_H__ */
--- /dev/null
+++ b/src/msac.c
@@ -1,0 +1,318 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <limits.h>
+
+#include "common/intops.h"
+
+#include "src/msac.h"
+
+typedef MsacContext od_ec_dec;
+
+//#define CDF_SIZE(x) ((x) + 1)
+#define CDF_PROB_BITS 15
+#define CDF_PROB_TOP (1 << CDF_PROB_BITS)
+//#define CDF_INIT_TOP 32768
+#define CDF_SHIFT (15 - CDF_PROB_BITS)
+
+#define OD_CLZ0 (1)
+#define OD_CLZ(x) (-get_msb(x))
+#define OD_ILOG_NZ(x) (OD_CLZ0 - OD_CLZ(x))
+
+static inline int get_msb(unsigned int n) {
+    assert(n != 0);
+    return 31 ^ __builtin_clz(n);
+}
+
+#define EC_PROB_SHIFT 6
+#define EC_MIN_PROB 4  // must be <= (1<<EC_PROB_SHIFT)/16
+
+/*OPT: od_ec_window must be at least 32 bits, but if you have fast arithmetic
+ on a larger type, you can speed up the decoder by using it here.*/
+typedef uint32_t od_ec_window;
+
+#define OD_EC_WINDOW_SIZE ((int)sizeof(od_ec_window) * CHAR_BIT)
+
+/*The resolution of fractional-precision bit usage measurements, i.e.,
+ 3 => 1/8th bits.*/
+#define OD_BITRES (3)
+
+#define OD_ICDF AOM_ICDF
+
+#define AOM_ICDF(a) (32768-(a))
+
+/*A range decoder.
+  This is an entropy decoder based upon \cite{Mar79}, which is itself a
+   rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}.
+  It is very similar to arithmetic encoding, except that encoding is done with
+   digits in any base, instead of with bits, and so it is faster when using
+   larger bases (i.e.: a byte).
+  The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$
+   is the base, longer than the theoretical optimum, but to my knowledge there
+   is no published justification for this claim.
+  This only seems true when using near-infinite precision arithmetic so that
+   the process is carried out with no rounding errors.
+
+  An excellent description of implementation details is available at
+   http://www.arturocampos.com/ac_range.html
+  A recent work \cite{MNW98} which proposes several changes to arithmetic
+   encoding for efficiency actually re-discovers many of the principles
+   behind range encoding, and presents a good theoretical analysis of them.
+
+  End of stream is handled by writing out the smallest number of bits that
+   ensures that the stream will be correctly decoded regardless of the value of
+   any subsequent bits.
+  od_ec_dec_tell() can be used to determine how many bits were needed to decode
+   all the symbols thus far; other data can be packed in the remaining bits of
+   the input buffer.
+  @PHDTHESIS{Pas76,
+    author="Richard Clark Pasco",
+    title="Source coding algorithms for fast data compression",
+    school="Dept. of Electrical Engineering, Stanford University",
+    address="Stanford, CA",
+    month=May,
+    year=1976,
+    URL="http://www.richpasco.org/scaffdc.pdf"
+  }
+  @INPROCEEDINGS{Mar79,
+   author="Martin, G.N.N.",
+   title="Range encoding: an algorithm for removing redundancy from a digitised
+    message",
+   booktitle="Video & Data Recording Conference",
+   year=1979,
+   address="Southampton",
+   month=Jul,
+   URL="http://www.compressconsult.com/rangecoder/rngcod.pdf.gz"
+  }
+  @ARTICLE{MNW98,
+   author="Alistair Moffat and Radford Neal and Ian H. Witten",
+   title="Arithmetic Coding Revisited",
+   journal="{ACM} Transactions on Information Systems",
+   year=1998,
+   volume=16,
+   number=3,
+   pages="256--294",
+   month=Jul,
+   URL="http://researchcommons.waikato.ac.nz/bitstream/handle/10289/78/content.pdf"
+  }*/
+
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+#define OD_EC_LOTS_OF_BITS (0x4000)
+
+static void od_ec_dec_refill(od_ec_dec *dec) {
+  int s;
+  od_ec_window dif;
+  int16_t cnt;
+  const unsigned char *bptr;
+  const unsigned char *end;
+  dif = dec->dif;
+  cnt = dec->cnt;
+  bptr = dec->bptr;
+  end = dec->end;
+  s = OD_EC_WINDOW_SIZE - 9 - (cnt + 15);
+  for (; s >= 0 && bptr < end; s -= 8, bptr++) {
+    assert(s <= OD_EC_WINDOW_SIZE - 8);
+    dif ^= (od_ec_window)bptr[0] << s;
+    cnt += 8;
+  }
+  if (bptr >= end) {
+    dec->tell_offs += OD_EC_LOTS_OF_BITS - cnt;
+    cnt = OD_EC_LOTS_OF_BITS;
+  }
+  dec->dif = dif;
+  dec->cnt = cnt;
+  dec->bptr = bptr;
+}
+
+/*Takes updated dif and range values, renormalizes them so that
+   32768 <= rng < 65536 (reading more bytes from the stream into dif if
+   necessary), and stores them back in the decoder context.
+  dif: The new value of dif.
+  rng: The new value of the range.
+  ret: The value to return.
+  Return: ret.
+          This allows the compiler to jump to this function via a tail-call.*/
+static int od_ec_dec_normalize(od_ec_dec *dec, od_ec_window dif, unsigned rng,
+                               int ret) {
+  int d;
+  assert(rng <= 65535U);
+  d = 16 - OD_ILOG_NZ(rng);
+  dec->cnt -= d;
+  /*This is equivalent to shifting in 1's instead of 0's.*/
+  dec->dif = ((dif + 1) << d) - 1;
+  dec->rng = rng << d;
+  if (dec->cnt < 0) od_ec_dec_refill(dec);
+  return ret;
+}
+
+/*Initializes the decoder.
+  buf: The input buffer to use.
+  Return: 0 on success, or a negative value on error.*/
+void od_ec_dec_init(od_ec_dec *dec, const unsigned char *buf,
+                    uint32_t storage) {
+  dec->buf = buf;
+  dec->tell_offs = 10 - (OD_EC_WINDOW_SIZE - 8);
+  dec->end = buf + storage;
+  dec->bptr = buf;
+  dec->dif = ((od_ec_window)1 << (OD_EC_WINDOW_SIZE - 1)) - 1;
+  dec->rng = 0x8000;
+  dec->cnt = -15;
+  dec->error = 0;
+  od_ec_dec_refill(dec);
+}
+
+/*Decode a single binary value.
+  f: The probability that the bit is one, scaled by 32768.
+  Return: The value decoded (0 or 1).*/
+int od_ec_decode_bool_q15(od_ec_dec *dec, unsigned f) {
+  od_ec_window dif;
+  od_ec_window vw;
+  unsigned r;
+  unsigned r_new;
+  unsigned v;
+  int ret;
+  assert(0 < f);
+  assert(f < 32768U);
+  dif = dec->dif;
+  r = dec->rng;
+  assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  assert(32768U <= r);
+  v = ((r >> 8) * (uint32_t)(f >> EC_PROB_SHIFT) >> (7 - EC_PROB_SHIFT));
+  v += EC_MIN_PROB;
+  vw = (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  ret = 1;
+  r_new = v;
+  if (dif >= vw) {
+    r_new = r - v;
+    dif -= vw;
+    ret = 0;
+  }
+  return od_ec_dec_normalize(dec, dif, r_new, ret);
+}
+
+/*Decodes a symbol given an inverse cumulative distribution function (CDF)
+   table in Q15.
+  icdf: CDF_PROB_TOP minus the CDF, such that symbol s falls in the range
+         [s > 0 ? (CDF_PROB_TOP - icdf[s - 1]) : 0, CDF_PROB_TOP - icdf[s]).
+        The values must be monotonically non-increasing, and icdf[nsyms - 1]
+         must be 0.
+  nsyms: The number of symbols in the alphabet.
+         This should be at most 16.
+  Return: The decoded symbol s.*/
+int od_ec_decode_cdf_q15(od_ec_dec *dec, const uint16_t *icdf, int nsyms) {
+  od_ec_window dif;
+  unsigned r;
+  unsigned c;
+  unsigned u;
+  unsigned v;
+  int ret;
+  (void)nsyms;
+  dif = dec->dif;
+  r = dec->rng;
+  const int N = nsyms - 1;
+
+  assert(dif >> (OD_EC_WINDOW_SIZE - 16) < r);
+  assert(icdf[nsyms - 1] == OD_ICDF(CDF_PROB_TOP));
+  assert(32768U <= r);
+  assert(7 - EC_PROB_SHIFT - CDF_SHIFT >= 0);
+  c = (unsigned)(dif >> (OD_EC_WINDOW_SIZE - 16));
+  v = r;
+  ret = -1;
+  do {
+    u = v;
+    v = ((r >> 8) * (uint32_t)(icdf[++ret] >> EC_PROB_SHIFT) >>
+         (7 - EC_PROB_SHIFT - CDF_SHIFT));
+    v += EC_MIN_PROB * (N - ret);
+  } while (c < v);
+  assert(v < u);
+  assert(u <= r);
+  r = u - v;
+  dif -= (od_ec_window)v << (OD_EC_WINDOW_SIZE - 16);
+  return od_ec_dec_normalize(dec, dif, r, ret);
+}
+
+void msac_init(MsacContext *const c,
+               const uint8_t *const data, const size_t sz)
+{
+    od_ec_dec_init(c, data, sz);
+}
+
+unsigned msac_decode_symbol(MsacContext *const c, const uint16_t *const cdf,
+                            const unsigned n_symbols)
+{
+    return od_ec_decode_cdf_q15(c, cdf, n_symbols);
+}
+
+unsigned msac_decode_bool(MsacContext *const c, const unsigned cdf) {
+    return od_ec_decode_bool_q15(c, cdf);
+}
+
+unsigned msac_decode_bools(MsacContext *const c, const unsigned l) {
+    int v = 0;
+    for (int n = (int) l - 1; n >= 0; n--)
+        v = (v << 1) | msac_decode_bool(c, 128 << 7);
+    return v;
+}
+
+int msac_decode_subexp(MsacContext *const c, const int ref,
+                       const unsigned n, const unsigned k)
+{
+    int i = 0;
+    int a = 0;
+    int b = k;
+    while ((2 << b) < n) {
+        if (!msac_decode_bool(c, 128 << 7)) break;
+        b = k + i++;
+        a = (1 << b);
+    }
+    const unsigned v = msac_decode_bools(c, b) + a;
+    return ref * 2 <= n ? inv_recenter(ref, v) :
+                          n - 1 - inv_recenter(n - 1 - ref, v);
+}
+
+int msac_decode_uniform(MsacContext *const c, const unsigned n) {
+    assert(n > 0);
+    const int l = ulog2(n) + 1;
+    assert(l > 1);
+    const int m = (1 << l) - n;
+    const int v = msac_decode_bools(c, l - 1);
+    return v < m ? v : (v << 1) - m + msac_decode_bool(c, 128 << 7);
+}
+
+void update_cdf(uint16_t *cdf, unsigned val, unsigned nsymbs) {
+    int rate;
+    int i, tmp;
+
+    static const int nsymbs2speed[17] = {
+        0, 0, 1, 1, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2
+    };
+    assert(nsymbs < 17);
+    rate = 3 + (cdf[nsymbs] > 15) + (cdf[nsymbs] > 31) + nsymbs2speed[nsymbs];
+    tmp = 32768;
+
+    // Single loop (faster)
+    for (i = 0; i < nsymbs - 1; ++i) {
+        tmp = (i == val) ? 0 : tmp;
+        if (tmp < cdf[i]) {
+            cdf[i] -= ((cdf[i] - tmp) >> rate);
+        } else {
+            cdf[i] += ((tmp - cdf[i]) >> rate);
+        }
+    }
+
+    cdf[nsymbs] += (cdf[nsymbs] < 32);
+}
--- /dev/null
+++ b/src/msac.h
@@ -1,0 +1,56 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef __DAV1D_SRC_MSAC_H__
+#define __DAV1D_SRC_MSAC_H__
+
+#include <stdint.h>
+#include <stdlib.h>
+
+typedef struct MsacContext {
+    const uint8_t *buf, *end, *bptr;
+    int32_t tell_offs;
+    uint32_t dif;
+    uint16_t rng;
+    int16_t cnt;
+    int error;
+} MsacContext;
+
+void msac_init(MsacContext *c, const uint8_t *data, size_t sz);
+unsigned msac_decode_symbol(MsacContext *c, const uint16_t *cdf,
+                            const unsigned n_symbols);
+unsigned msac_decode_bool(MsacContext *c, unsigned cdf);
+unsigned msac_decode_bools(MsacContext *c, unsigned l);
+int msac_decode_subexp(MsacContext *c, int ref, unsigned n, unsigned k);
+int msac_decode_uniform(MsacContext *c, unsigned n);
+void update_cdf(uint16_t *cdf, unsigned val, unsigned nsymbs);
+
+static inline unsigned msac_decode_symbol_adapt(MsacContext *const c,
+                                                uint16_t *const cdf,
+                                                const unsigned n_symbols)
+{
+    const unsigned val = msac_decode_symbol(c, cdf, n_symbols);
+    update_cdf(cdf, val, n_symbols);
+    return val;
+}
+
+static inline unsigned msac_decode_bool_adapt(MsacContext *const c,
+                                              uint16_t *const cdf)
+{
+    const unsigned bit = msac_decode_bool(c, *cdf);
+    uint16_t bak_cdf[3] = { cdf[0], 0, cdf[1] };
+    update_cdf(bak_cdf, bit, 2);
+    cdf[0] = bak_cdf[0];
+    cdf[1] = bak_cdf[2];
+    return bit;
+}
+
+#endif /* __DAV1D_SRC_MSAC_H__ */
--- /dev/null
+++ b/src/obu.c
@@ -1,0 +1,1102 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "dav1d/data.h"
+
+#include "common/intops.h"
+
+#include "src/decode.h"
+#include "src/getbits.h"
+#include "src/levels.h"
+#include "src/obu.h"
+#include "src/ref.h"
+#include "src/warpmv.h"
+
+static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb) {
+    const uint8_t *const init_ptr = gb->ptr;
+    Av1SequenceHeader *const hdr = &c->seq_hdr;
+
+#define DEBUG_SEQ_HDR 0
+
+    hdr->profile = get_bits(gb, 3);
+    if (hdr->profile > 2U) goto error;
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-profile: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    hdr->still_picture = get_bits(gb, 1);
+    hdr->reduced_still_picture_header = get_bits(gb, 1);
+    if (hdr->reduced_still_picture_header && !hdr->still_picture) goto error;
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-stillpicture_flags: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    if (hdr->reduced_still_picture_header) {
+        hdr->timing_info_present = 0;
+        hdr->decoder_model_info_present = 0;
+        hdr->display_model_info_present = 0;
+        hdr->num_operating_points = 1;
+        hdr->operating_points[0].idc = 0;
+        hdr->operating_points[0].major_level = get_bits(gb, 3);
+        hdr->operating_points[0].minor_level = get_bits(gb, 2);
+        hdr->operating_points[0].tier = 0;
+        hdr->operating_points[0].decoder_model_param_present = 0;
+        hdr->operating_points[0].display_model_param_present = 0;
+    } else {
+        hdr->timing_info_present = get_bits(gb, 1);
+        if (hdr->timing_info_present) {
+            hdr->num_units_in_tick = get_bits(gb, 32);
+            hdr->time_scale = get_bits(gb, 32);
+            hdr->equal_picture_interval = get_bits(gb, 1);
+            if (hdr->equal_picture_interval)
+                hdr->num_ticks_per_picture = get_vlc(gb) + 1;
+
+            hdr->decoder_model_info_present = get_bits(gb, 1);
+            if (hdr->decoder_model_info_present) {
+                hdr->bitrate_scale = get_bits(gb, 4);
+                hdr->buffer_size_scale = get_bits(gb, 4);
+                hdr->encoder_decoder_buffer_delay_length = get_bits(gb, 5) + 1;
+                hdr->num_units_in_decoding_tick = get_bits(gb, 32);
+                hdr->buffer_removal_delay_length = get_bits(gb, 5) + 1;
+                hdr->frame_presentation_delay_length = get_bits(gb, 5) + 1;
+            }
+        } else {
+            hdr->decoder_model_info_present = 0;
+        }
+#if DEBUG_SEQ_HDR
+        printf("SEQHDR: post-timinginfo: off=%ld\n",
+               (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+        hdr->display_model_info_present = get_bits(gb, 1);
+        hdr->num_operating_points = get_bits(gb, 5) + 1;
+        for (int i = 0; i < c->seq_hdr.num_operating_points; i++) {
+            struct Av1SequenceHeaderOperatingPoint *const op =
+                &hdr->operating_points[i];
+            op->idc = get_bits(gb, 12);
+            op->major_level = 2 + get_bits(gb, 3);
+            op->minor_level = get_bits(gb, 2);
+            op->tier = op->major_level > 3 ? get_bits(gb, 1) : 0;
+            op->decoder_model_param_present =
+                hdr->decoder_model_info_present && get_bits(gb, 1);
+            if (op->decoder_model_param_present) {
+                op->bitrate = get_vlc(gb) + 1;
+                op->buffer_size = get_vlc(gb) + 1;
+                op->cbr = get_bits(gb, 1);
+                op->decoder_buffer_delay =
+                    get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
+                op->encoder_buffer_delay =
+                    get_bits(gb, hdr->encoder_decoder_buffer_delay_length);
+                op->low_delay_mode = get_bits(gb, 1);
+            }
+            op->display_model_param_present =
+                hdr->display_model_info_present && get_bits(gb, 1);
+            if (op->display_model_param_present) {
+                op->initial_display_delay = get_bits(gb, 4) + 1;
+            }
+        }
+#if DEBUG_SEQ_HDR
+        printf("SEQHDR: post-operating-points: off=%ld\n",
+               (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    }
+
+    hdr->width_n_bits = get_bits(gb, 4) + 1;
+    hdr->height_n_bits = get_bits(gb, 4) + 1;
+    hdr->max_width = get_bits(gb, hdr->width_n_bits) + 1;
+    hdr->max_height = get_bits(gb, hdr->height_n_bits) + 1;
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-size: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->frame_id_numbers_present =
+        hdr->reduced_still_picture_header ? 0 : get_bits(gb, 1);
+    if (hdr->frame_id_numbers_present) {
+        hdr->delta_frame_id_n_bits = get_bits(gb, 4) + 2;
+        hdr->frame_id_n_bits = get_bits(gb, 3) + hdr->delta_frame_id_n_bits + 1;
+    }
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-frame-id-numbers-present: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    hdr->sb128 = get_bits(gb, 1);
+    hdr->filter_intra = get_bits(gb, 1);
+    hdr->intra_edge_filter = get_bits(gb, 1);
+    if (hdr->reduced_still_picture_header) {
+        hdr->inter_intra = 0;
+        hdr->masked_compound = 0;
+        hdr->warped_motion = 0;
+        hdr->dual_filter = 0;
+        hdr->order_hint = 0;
+        hdr->jnt_comp = 0;
+        hdr->ref_frame_mvs = 0;
+        hdr->order_hint_n_bits = 0;
+        hdr->screen_content_tools = ADAPTIVE;
+        hdr->force_integer_mv = ADAPTIVE;
+    } else {
+        hdr->inter_intra = get_bits(gb, 1);
+        hdr->masked_compound = get_bits(gb, 1);
+        hdr->warped_motion = get_bits(gb, 1);
+        hdr->dual_filter = get_bits(gb, 1);
+        hdr->order_hint = get_bits(gb, 1);
+        if (hdr->order_hint) {
+            hdr->jnt_comp = get_bits(gb, 1);
+            hdr->ref_frame_mvs = get_bits(gb, 1);
+        } else {
+            hdr->jnt_comp = 0;
+            hdr->ref_frame_mvs = 0;
+            hdr->order_hint_n_bits = 0;
+        }
+        hdr->screen_content_tools = get_bits(gb, 1) ? ADAPTIVE : get_bits(gb, 1);
+    #if DEBUG_SEQ_HDR
+        printf("SEQHDR: post-screentools: off=%ld\n",
+               (gb->ptr - init_ptr) * 8 - gb->bits_left);
+    #endif
+        hdr->force_integer_mv = hdr->screen_content_tools ?
+                                get_bits(gb, 1) ? ADAPTIVE : get_bits(gb, 1) : 2;
+        if (hdr->order_hint)
+            hdr->order_hint_n_bits = get_bits(gb, 3) + 1;
+    }
+    hdr->super_res = get_bits(gb, 1);
+    hdr->cdef = get_bits(gb, 1);
+    hdr->restoration = get_bits(gb, 1);
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-featurebits: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    const int hbd = get_bits(gb, 1);
+    hdr->bpc = hdr->profile == 2 && hbd ? 10 + 2 * get_bits(gb, 1): 8 + 2 * hbd;
+    hdr->hbd = hdr->bpc > 8;
+    const int monochrome = hdr->profile != 1 ? get_bits(gb, 1) : 0;
+    hdr->color_description_present = get_bits(gb, 1);
+    if (hdr->color_description_present) {
+        hdr->pri = get_bits(gb, 8);
+        hdr->trc = get_bits(gb, 8);
+        hdr->mtrx = get_bits(gb, 8);
+    } else {
+        hdr->pri = DAV1D_COLOR_PRI_UNKNOWN;
+        hdr->trc = DAV1D_TRC_UNKNOWN;
+        hdr->mtrx = DAV1D_MC_UNKNOWN;
+    }
+    if (monochrome) {
+        hdr->color_range = get_bits(gb, 1);
+        hdr->layout = DAV1D_PIXEL_LAYOUT_I400;
+        hdr->chr = DAV1D_CHR_UNKNOWN;
+        hdr->separate_uv_delta_q = 0;
+    } else if (hdr->pri == DAV1D_COLOR_PRI_BT709 &&
+               hdr->trc == DAV1D_TRC_SRGB &&
+               hdr->mtrx == DAV1D_MC_IDENTITY)
+    {
+        hdr->layout = DAV1D_PIXEL_LAYOUT_I444;
+        hdr->color_range = 1;
+        if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->bpc == 12))
+            goto error;
+    } else {
+        hdr->color_range = get_bits(gb, 1);
+        switch (hdr->profile) {
+        case 0: hdr->layout = DAV1D_PIXEL_LAYOUT_I420; break;
+        case 1: hdr->layout = DAV1D_PIXEL_LAYOUT_I444; break;
+        case 2:
+            if (hdr->bpc == 12) {
+                hdr->layout = get_bits(gb, 1) ?
+                              get_bits(gb, 1) ? DAV1D_PIXEL_LAYOUT_I420 :
+                                                DAV1D_PIXEL_LAYOUT_I422 :
+                                                DAV1D_PIXEL_LAYOUT_I444;
+            } else
+                hdr->layout = DAV1D_PIXEL_LAYOUT_I422;
+            break;
+        }
+        if (hdr->layout == DAV1D_PIXEL_LAYOUT_I420)
+            hdr->chr = get_bits(gb, 2);
+        hdr->separate_uv_delta_q = get_bits(gb, 1);
+    }
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-colorinfo: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    c->seq_hdr.film_grain_present = get_bits(gb, 1);
+#if DEBUG_SEQ_HDR
+    printf("SEQHDR: post-filmgrain: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    get_bits(gb, 1); // dummy bit
+
+    return flush_get_bits(gb) - init_ptr;
+
+error:
+    fprintf(stderr, "Error parsing sequence header\n");
+    return -EINVAL;
+}
+
+static int read_frame_size(Dav1dContext *const c, GetBits *const gb,
+                           const int use_ref)
+{
+    const Av1SequenceHeader *const seqhdr = &c->seq_hdr;
+    Av1FrameHeader *const hdr = &c->frame_hdr;
+
+    if (use_ref) {
+        for (int i = 0; i < 7; i++) {
+            if (get_bits(gb, 1)) {
+                Dav1dThreadPicture *const ref =
+                    &c->refs[c->frame_hdr.refidx[i]].p;
+                if (!ref->p.data[0]) return -1;
+                // FIXME render_* may be wrong
+                hdr->render_width = hdr->width = ref->p.p.w;
+                hdr->render_height = hdr->height = ref->p.p.h;
+                hdr->super_res = 0; // FIXME probably wrong
+                return 0;
+            }
+        }
+    }
+
+    if (hdr->frame_size_override) {
+        hdr->width = get_bits(gb, seqhdr->width_n_bits) + 1;
+        hdr->height = get_bits(gb, seqhdr->height_n_bits) + 1;
+    } else {
+        hdr->width = seqhdr->max_width;
+        hdr->height = seqhdr->max_height;
+    }
+    hdr->super_res = seqhdr->super_res && get_bits(gb, 1);
+    if (hdr->super_res) return -1; // FIXME
+    hdr->have_render_size = get_bits(gb, 1);
+    if (hdr->have_render_size) {
+        hdr->render_width = get_bits(gb, seqhdr->width_n_bits) + 1;
+        hdr->render_height = get_bits(gb, seqhdr->height_n_bits) + 1;
+    } else {
+        hdr->render_width = hdr->width;
+        hdr->render_height = hdr->height;
+    }
+    return 0;
+}
+
+static inline int tile_log2(int sz, int tgt) {
+    int k;
+    for (k = 0; (sz << k) < tgt; k++) ;
+    return k;
+}
+
+static const Av1LoopfilterModeRefDeltas default_mode_ref_deltas = {
+    .mode_delta = { 0, 0 },
+    .ref_delta = { 1, 0, 0, 0, -1, 0, -1, -1 },
+};
+
+static int parse_frame_hdr(Dav1dContext *const c, GetBits *const gb,
+                           const int have_trailing_bit)
+{
+    const uint8_t *const init_ptr = gb->ptr;
+    const Av1SequenceHeader *const seqhdr = &c->seq_hdr;
+    Av1FrameHeader *const hdr = &c->frame_hdr;
+    int res;
+
+#define DEBUG_FRAME_HDR 0
+
+    hdr->show_existing_frame =
+        !seqhdr->reduced_still_picture_header && get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-show_existing_frame: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    if (hdr->show_existing_frame) {
+        hdr->existing_frame_idx = get_bits(gb, 3);
+        if (seqhdr->frame_id_numbers_present)
+            hdr->frame_id = get_bits(gb, seqhdr->frame_id_n_bits);
+        goto end;
+    }
+
+    hdr->frame_type = seqhdr->reduced_still_picture_header ? KEY_FRAME : get_bits(gb, 2);
+    hdr->show_frame = seqhdr->reduced_still_picture_header || get_bits(gb, 1);
+    if (!hdr->show_frame)
+        hdr->showable_frame = get_bits(gb, 1);
+    hdr->error_resilient_mode =
+        (hdr->frame_type == KEY_FRAME && hdr->show_frame) ||
+        hdr->frame_type == S_FRAME ||
+        seqhdr->reduced_still_picture_header || get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-frametype_bits: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->disable_cdf_update = get_bits(gb, 1);
+    hdr->allow_screen_content_tools = seqhdr->screen_content_tools == ADAPTIVE ?
+                                 get_bits(gb, 1) : seqhdr->screen_content_tools;
+    if (hdr->allow_screen_content_tools)
+        hdr->force_integer_mv = seqhdr->force_integer_mv == ADAPTIVE ?
+                                get_bits(gb, 1) : seqhdr->force_integer_mv;
+
+    if (seqhdr->frame_id_numbers_present)
+        hdr->frame_id = get_bits(gb, seqhdr->frame_id_n_bits);
+
+    hdr->frame_size_override = seqhdr->reduced_still_picture_header ? 0 :
+                               hdr->frame_type == S_FRAME ? 1 : get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-frame_size_override_flag: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->frame_offset = seqhdr->order_hint ?
+                        get_bits(gb, seqhdr->order_hint_n_bits) : 0;
+    hdr->primary_ref_frame = !hdr->error_resilient_mode && hdr->frame_type & 1 ?
+                             get_bits(gb, 3) : PRIMARY_REF_NONE;
+
+    if (hdr->frame_type == KEY_FRAME) {
+        hdr->refresh_frame_flags = hdr->show_frame ? 0xff : get_bits(gb, 8);
+        if ((res = read_frame_size(c, gb, 0)) < 0) goto error;
+        hdr->allow_intrabc = hdr->allow_screen_content_tools &&
+                             /* FIXME: no superres scaling && */ get_bits(gb, 1);
+        hdr->use_ref_frame_mvs = 0;
+    } else {
+        if (hdr->error_resilient_mode && seqhdr->order_hint)
+            for (int i = 0; i < 8; i++)
+                get_bits(gb, seqhdr->order_hint_n_bits);
+
+        if (hdr->frame_type == INTRAONLY_FRAME) {
+            hdr->refresh_frame_flags = get_bits(gb, 8);
+            if ((res = read_frame_size(c, gb, 0)) < 0) goto error;
+            hdr->allow_intrabc = hdr->allow_screen_content_tools &&
+                             /* FIXME: no superres scaling && */ get_bits(gb, 1);
+        } else {
+            hdr->allow_intrabc = 0;
+            hdr->refresh_frame_flags = hdr->frame_type == S_FRAME ? 0xff :
+                                       get_bits(gb, 8);
+            hdr->frame_ref_short_signaling =
+                seqhdr->order_hint && get_bits(gb, 1);
+            if (hdr->frame_ref_short_signaling) goto error; // FIXME
+            for (int i = 0; i < 7; i++) {
+                hdr->refidx[i] = get_bits(gb, 3);
+                if (seqhdr->frame_id_numbers_present)
+                    get_bits(gb, seqhdr->delta_frame_id_n_bits);
+            }
+            const int use_ref = !hdr->error_resilient_mode &&
+                                hdr->frame_size_override;
+            if ((res = read_frame_size(c, gb, use_ref)) < 0) goto error;
+            hdr->hp = !hdr->force_integer_mv && get_bits(gb, 1);
+            hdr->subpel_filter_mode = get_bits(gb, 1) ? FILTER_SWITCHABLE :
+                                                        get_bits(gb, 2);
+            hdr->switchable_motion_mode = get_bits(gb, 1);
+            hdr->use_ref_frame_mvs = !hdr->error_resilient_mode &&
+                seqhdr->ref_frame_mvs && seqhdr->order_hint &&
+                hdr->frame_type & 1 && get_bits(gb, 1);
+        }
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-frametype-specific-bits: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    hdr->refresh_context = !seqhdr->reduced_still_picture_header &&
+                           !hdr->disable_cdf_update && !get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-refresh_context: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // tile data
+    hdr->tiling.uniform = get_bits(gb, 1);
+    const int sbsz_min1 = (64 << seqhdr->sb128) - 1;
+    int sbsz_log2 = 6 + seqhdr->sb128;
+    int sbw = (hdr->width + sbsz_min1) >> sbsz_log2;
+    int sbh = (hdr->height + sbsz_min1) >> sbsz_log2;
+    int max_tile_width_sb = 4096 >> sbsz_log2, max_tile_height_sb;
+    int max_tile_area_sb = 4096 * 2304 >> (2 * sbsz_log2);
+    hdr->tiling.min_log2_cols = tile_log2(max_tile_width_sb, sbw);
+    hdr->tiling.max_log2_cols = tile_log2(1, imin(sbw, 1024));
+    hdr->tiling.max_log2_rows = tile_log2(1, imin(sbh, 1024));
+    int min_log2_tiles = imax(tile_log2(max_tile_area_sb, sbw * sbh),
+                              hdr->tiling.min_log2_cols);
+    if (hdr->tiling.uniform) {
+        for (hdr->tiling.log2_cols = hdr->tiling.min_log2_cols;
+             hdr->tiling.log2_cols < hdr->tiling.max_log2_cols && get_bits(gb, 1);
+             hdr->tiling.log2_cols++) ;
+        const int tile_w = 1 + ((sbw - 1) >> hdr->tiling.log2_cols);
+        hdr->tiling.cols = 0;
+        for (int sbx = 0; sbx < sbw; sbx += tile_w, hdr->tiling.cols++)
+            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
+        hdr->tiling.min_log2_rows =
+            imax(min_log2_tiles - hdr->tiling.log2_cols, 0);
+        max_tile_height_sb = sbh >> hdr->tiling.min_log2_rows;
+
+        for (hdr->tiling.log2_rows = hdr->tiling.min_log2_rows;
+             hdr->tiling.log2_rows < hdr->tiling.max_log2_rows && get_bits(gb, 1);
+             hdr->tiling.log2_rows++) ;
+        const int tile_h = 1 + ((sbh - 1) >> hdr->tiling.log2_rows);
+        hdr->tiling.rows = 0;
+        for (int sby = 0; sby < sbh; sby += tile_h, hdr->tiling.rows++)
+            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
+    } else {
+        hdr->tiling.cols = 0;
+        int widest_tile = 0, max_tile_area_sb = sbw * sbh;
+        for (int sbx = 0; sbx < sbw; hdr->tiling.cols++) {
+            const int tile_w = get_uniform(gb, imin(sbw - sbx,
+                                                    max_tile_width_sb));
+            hdr->tiling.col_start_sb[hdr->tiling.cols] = sbx;
+            sbx += tile_w;
+            widest_tile = imax(widest_tile, tile_w);
+        }
+        hdr->tiling.log2_cols = tile_log2(1, hdr->tiling.cols);
+        if (min_log2_tiles) max_tile_area_sb >>= min_log2_tiles + 1;
+        max_tile_height_sb = imax(max_tile_area_sb / widest_tile, 1);
+
+        hdr->tiling.rows = 0;
+        for (int sby = 0; sby < sbh; hdr->tiling.rows++) {
+            const int tile_h = get_uniform(gb, imin(sbh - sby,
+                                                    max_tile_height_sb));
+            hdr->tiling.row_start_sb[hdr->tiling.rows] = sby;
+            sby += tile_h;
+        }
+        hdr->tiling.log2_rows = tile_log2(1, hdr->tiling.rows);
+    }
+    hdr->tiling.col_start_sb[hdr->tiling.cols] = sbw;
+    hdr->tiling.row_start_sb[hdr->tiling.rows] = sbh;
+    if (hdr->tiling.log2_cols || hdr->tiling.log2_rows) {
+        hdr->tiling.update = get_bits(gb, hdr->tiling.log2_cols +
+                                          hdr->tiling.log2_rows);
+        hdr->tiling.n_bytes = get_bits(gb, 2) + 1;
+    } else {
+        hdr->tiling.n_bytes = hdr->tiling.update = 0;
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-tiling: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // quant data
+    hdr->quant.yac = get_bits(gb, 8);
+    hdr->quant.ydc_delta = get_bits(gb, 1) ? get_sbits(gb, 6) : 0;
+    if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {
+        hdr->quant.udc_delta = get_bits(gb, 1) ? get_sbits(gb, 6) : 0;
+        hdr->quant.uac_delta = get_bits(gb, 1) ? get_sbits(gb, 6) : 0;
+        if (seqhdr->separate_uv_delta_q) {
+            hdr->quant.vdc_delta = get_bits(gb, 1) ? get_sbits(gb, 6) : 0;
+            hdr->quant.vac_delta = get_bits(gb, 1) ? get_sbits(gb, 6) : 0;
+        } else {
+            hdr->quant.vdc_delta = hdr->quant.udc_delta;
+            hdr->quant.vac_delta = hdr->quant.uac_delta;
+        }
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-quant: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->quant.qm = get_bits(gb, 1);
+    if (hdr->quant.qm) {
+        hdr->quant.qm_y = get_bits(gb, 4);
+        hdr->quant.qm_u = get_bits(gb, 4);
+        hdr->quant.qm_v = seqhdr->separate_uv_delta_q ? get_bits(gb, 4) :
+                                                        hdr->quant.qm_u;
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-qm: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // segmentation data
+    hdr->segmentation.enabled = get_bits(gb, 1);
+    if (hdr->segmentation.enabled) {
+        if (hdr->primary_ref_frame == PRIMARY_REF_NONE) {
+            hdr->segmentation.update_map = 1;
+            hdr->segmentation.temporal = 0;
+            hdr->segmentation.update_data = 1;
+        } else {
+            hdr->segmentation.update_map = get_bits(gb, 1);
+            hdr->segmentation.temporal =
+                hdr->segmentation.update_map ? get_bits(gb, 1) : 0;
+            hdr->segmentation.update_data = get_bits(gb, 1);
+        }
+
+        if (hdr->segmentation.update_data) {
+            hdr->segmentation.seg_data.preskip = 0;
+            hdr->segmentation.seg_data.last_active_segid = -1;
+            for (int i = 0; i < NUM_SEGMENTS; i++) {
+                Av1SegmentationData *const seg =
+                    &hdr->segmentation.seg_data.d[i];
+                if (get_bits(gb, 1)) {
+                    seg->delta_q = get_sbits(gb, 8);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                } else {
+                    seg->delta_q = 0;
+                }
+                if (get_bits(gb, 1)) {
+                    seg->delta_lf_y_v = get_sbits(gb, 6);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                } else {
+                    seg->delta_lf_y_v = 0;
+                }
+                if (get_bits(gb, 1)) {
+                    seg->delta_lf_y_h = get_sbits(gb, 6);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                } else {
+                    seg->delta_lf_y_h = 0;
+                }
+                if (get_bits(gb, 1)) {
+                    seg->delta_lf_u = get_sbits(gb, 6);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                } else {
+                    seg->delta_lf_u = 0;
+                }
+                if (get_bits(gb, 1)) {
+                    seg->delta_lf_v = get_sbits(gb, 6);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                } else {
+                    seg->delta_lf_v = 0;
+                }
+                if (get_bits(gb, 1)) {
+                    seg->ref = get_bits(gb, 3);
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                    hdr->segmentation.seg_data.preskip = 1;
+                } else {
+                    seg->ref = -1;
+                }
+                if ((seg->skip = get_bits(gb, 1))) {
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                    hdr->segmentation.seg_data.preskip = 1;
+                }
+                if ((seg->globalmv = get_bits(gb, 1))) {
+                    hdr->segmentation.seg_data.last_active_segid = i;
+                    hdr->segmentation.seg_data.preskip = 1;
+                }
+            }
+        } else {
+            const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
+            hdr->segmentation.seg_data = c->refs[pri_ref].seg_data;
+        }
+    } else {
+        const int pri_ref = hdr->refidx[hdr->primary_ref_frame];
+        hdr->segmentation.seg_data = c->refs[pri_ref].seg_data;
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-segmentation: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // delta q
+    hdr->delta_q_present = hdr->quant.yac ? get_bits(gb, 1) : 0;
+    hdr->delta_q_res_log2 = hdr->delta_q_present ? get_bits(gb, 2) : 0;
+    hdr->delta_lf_present = hdr->delta_q_present && !hdr->allow_intrabc &&
+                            get_bits(gb, 1);
+    hdr->delta_lf_res_log2 = hdr->delta_lf_present ? get_bits(gb, 2) : 0;
+    hdr->delta_lf_multi = hdr->delta_lf_present ? get_bits(gb, 1) : 0;
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-delta_q_lf_flags: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // derive lossless flags
+    const int delta_lossless = !hdr->quant.ydc_delta && !hdr->quant.udc_delta &&
+        !hdr->quant.uac_delta && !hdr->quant.vdc_delta && !hdr->quant.vac_delta;
+    hdr->all_lossless = 1;
+    for (int i = 0; i < NUM_SEGMENTS; i++) {
+        hdr->segmentation.qidx[i] = hdr->segmentation.enabled ?
+            iclip_u8(hdr->quant.yac + hdr->segmentation.seg_data.d[i].delta_q) :
+            hdr->quant.yac;
+        hdr->segmentation.lossless[i] =
+            !hdr->segmentation.qidx[i] && delta_lossless;
+        hdr->all_lossless &= hdr->segmentation.lossless[i];
+    }
+
+    // loopfilter
+    if (hdr->all_lossless || hdr->allow_intrabc) {
+        hdr->loopfilter.level_y[0] = hdr->loopfilter.level_y[1] = 0;
+        hdr->loopfilter.level_u = hdr->loopfilter.level_v = 0;
+        hdr->loopfilter.sharpness = 0;
+        hdr->loopfilter.mode_ref_delta_enabled = 1;
+        hdr->loopfilter.mode_ref_delta_update = 1;
+        hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
+    } else {
+        hdr->loopfilter.level_y[0] = get_bits(gb, 6);
+        hdr->loopfilter.level_y[1] = get_bits(gb, 6);
+        if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400 &&
+            (hdr->loopfilter.level_y[0] || hdr->loopfilter.level_y[1]))
+        {
+            hdr->loopfilter.level_u = get_bits(gb, 6);
+            hdr->loopfilter.level_v = get_bits(gb, 6);
+        }
+        hdr->loopfilter.sharpness = get_bits(gb, 3);
+
+        if (hdr->primary_ref_frame == PRIMARY_REF_NONE) {
+            hdr->loopfilter.mode_ref_deltas = default_mode_ref_deltas;
+        } else {
+            const int ref = hdr->refidx[hdr->primary_ref_frame];
+            hdr->loopfilter.mode_ref_deltas = c->refs[ref].lf_mode_ref_deltas;
+        }
+        hdr->loopfilter.mode_ref_delta_enabled = get_bits(gb, 1);
+        if (hdr->loopfilter.mode_ref_delta_enabled) {
+            hdr->loopfilter.mode_ref_delta_update = get_bits(gb, 1);
+            if (hdr->loopfilter.mode_ref_delta_update) {
+                for (int i = 0; i < 8; i++)
+                    if (get_bits(gb, 1))
+                        hdr->loopfilter.mode_ref_deltas.ref_delta[i] =
+                            get_sbits(gb, 6);
+                for (int i = 0; i < 2; i++)
+                    if (get_bits(gb, 1))
+                        hdr->loopfilter.mode_ref_deltas.mode_delta[i] =
+                            get_sbits(gb, 6);
+            }
+        }
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-lpf: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // cdef
+    if (!hdr->all_lossless && seqhdr->cdef && !hdr->allow_intrabc) {
+        hdr->cdef.damping = get_bits(gb, 2) + 3;
+        hdr->cdef.n_bits = get_bits(gb, 2);
+        for (int i = 0; i < (1 << hdr->cdef.n_bits); i++) {
+            hdr->cdef.y_strength[i] = get_bits(gb, 6);
+            if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400)
+                hdr->cdef.uv_strength[i] = get_bits(gb, 6);
+        }
+    } else {
+        hdr->cdef.n_bits = 0;
+        hdr->cdef.y_strength[0] = 0;
+        hdr->cdef.uv_strength[0] = 0;
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-cdef: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    // restoration
+    if (!hdr->all_lossless && seqhdr->restoration && !hdr->allow_intrabc) {
+        hdr->restoration.type[0] = get_bits(gb, 2);
+        if (seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400) {
+            hdr->restoration.type[1] = get_bits(gb, 2);
+            hdr->restoration.type[2] = get_bits(gb, 2);
+        }
+
+        if (hdr->restoration.type[0] || hdr->restoration.type[1] ||
+            hdr->restoration.type[2])
+        {
+            // Log2 of the restoration unit size.
+            hdr->restoration.unit_size[0] = 6 + seqhdr->sb128;
+            if (get_bits(gb, 1)) {
+                hdr->restoration.unit_size[0]++;
+                if (!seqhdr->sb128)
+                    hdr->restoration.unit_size[0] += get_bits(gb, 1);
+            }
+            hdr->restoration.unit_size[1] = hdr->restoration.unit_size[0];
+            if ((hdr->restoration.type[1] || hdr->restoration.type[2]) &&
+                seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420)
+            {
+                hdr->restoration.unit_size[1] -= get_bits(gb, 1);
+            }
+        } else {
+            hdr->restoration.unit_size[0] = 8;
+        }
+    } else {
+        hdr->restoration.type[0] = RESTORATION_NONE;
+        hdr->restoration.type[1] = RESTORATION_NONE;
+        hdr->restoration.type[2] = RESTORATION_NONE;
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-restoration: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    hdr->txfm_mode = hdr->all_lossless ? TX_4X4_ONLY :
+                     get_bits(gb, 1) ? TX_SWITCHABLE : TX_LARGEST;
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-txfmmode: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->switchable_comp_refs = hdr->frame_type & 1 ? get_bits(gb, 1) : 0;
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-refmode: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->skip_mode_allowed = 0;
+    if (hdr->switchable_comp_refs && hdr->frame_type & 1) {
+        const unsigned poc = hdr->frame_offset;
+        unsigned off_before[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
+        int off_after = -1;
+        int off_before_idx[2], off_after_idx;
+        for (int i = 0; i < 7; i++) {
+            const unsigned refpoc = c->refs[hdr->refidx[i]].p.p.poc;
+
+            const int diff = get_poc_diff(seqhdr->order_hint_n_bits, refpoc, poc);
+            if (diff > 0) {
+                if (off_after == -1 || get_poc_diff(seqhdr->order_hint_n_bits,
+                                                    off_after, refpoc) > 0)
+                {
+                    off_after = refpoc;
+                    off_after_idx = i;
+                }
+            } else if (diff < 0) {
+                if (off_before[0] == 0xFFFFFFFFU ||
+                    get_poc_diff(seqhdr->order_hint_n_bits,
+                                 refpoc, off_before[0]) > 0)
+                {
+                    off_before[1] = off_before[0];
+                    off_before[0] = refpoc;
+                    off_before_idx[1] = off_before_idx[0];
+                    off_before_idx[0] = i;
+                } else if (refpoc != off_before[0] &&
+                           (off_before[1] == 0xFFFFFFFFU ||
+                            get_poc_diff(seqhdr->order_hint_n_bits,
+                                         refpoc, off_before[1]) > 0))
+                {
+                    off_before[1] = refpoc;
+                    off_before_idx[1] = i;
+                }
+            }
+        }
+
+        if (off_before[0] != 0xFFFFFFFFU && off_after != -1) {
+            hdr->skip_mode_refs[0] = off_before_idx[0];
+            hdr->skip_mode_refs[1] = off_after_idx;
+            hdr->skip_mode_allowed = 1;
+        } else if (off_before[0] != 0xFFFFFFFFU &&
+                   off_before[1] != 0xFFFFFFFFU)
+        {
+            hdr->skip_mode_refs[0] = off_before_idx[0];
+            hdr->skip_mode_refs[1] = off_before_idx[1];
+            hdr->skip_mode_allowed = 1;
+        }
+    }
+    hdr->skip_mode_enabled = hdr->skip_mode_allowed ? get_bits(gb, 1) : 0;
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-extskip: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->warp_motion = !hdr->error_resilient_mode && hdr->frame_type & 1 &&
+        seqhdr->warped_motion && get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-warpmotionbit: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+    hdr->reduced_txtp_set = get_bits(gb, 1);
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-reducedtxtpset: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    for (int i = 0; i < 7; i++)
+        hdr->gmv[i] = default_wm_params;
+
+    if (hdr->frame_type & 1) {
+        for (int i = 0; i < 7; i++) {
+            hdr->gmv[i].type = !get_bits(gb, 1) ? WM_TYPE_IDENTITY :
+                                get_bits(gb, 1) ? WM_TYPE_ROT_ZOOM :
+                                get_bits(gb, 1) ? WM_TYPE_TRANSLATION :
+                                                  WM_TYPE_AFFINE;
+
+            if (hdr->gmv[i].type == WM_TYPE_IDENTITY) continue;
+
+            const WarpedMotionParams *const ref_gmv =
+                hdr->primary_ref_frame == PRIMARY_REF_NONE ? &default_wm_params :
+                &c->refs[hdr->refidx[hdr->primary_ref_frame]].gmv[i];
+            int32_t *const mat = hdr->gmv[i].matrix;
+            const int32_t *const ref_mat = ref_gmv->matrix;
+            int bits, shift;
+
+            if (hdr->gmv[i].type >= WM_TYPE_ROT_ZOOM) {
+                mat[2] = (1 << 16) + 2 *
+                    get_bits_subexp(gb, (ref_mat[2] - (1 << 16)) >> 1, 12);
+                mat[3] = 2 * get_bits_subexp(gb, ref_mat[3] >> 1, 12);
+
+                bits = 12;
+                shift = 10;
+            } else {
+                bits = 9 - !hdr->hp;
+                shift = 13 + !hdr->hp;
+            }
+
+            if (hdr->gmv[i].type == WM_TYPE_AFFINE) {
+                mat[4] = 2 * get_bits_subexp(gb, ref_mat[4] >> 1, 12);
+                mat[5] = (1 << 16) + 2 *
+                    get_bits_subexp(gb, (ref_mat[5] - (1 << 16)) >> 1, 12);
+            } else {
+                mat[4] = -mat[3];
+                mat[5] = mat[2];
+            }
+
+            mat[0] = get_bits_subexp(gb, ref_mat[0] >> shift, bits) * (1 << shift);
+            mat[1] = get_bits_subexp(gb, ref_mat[1] >> shift, bits) * (1 << shift);
+
+            if (get_shear_params(&hdr->gmv[i]))
+                goto error;
+        }
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-gmv: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+    hdr->film_grain.present = seqhdr->film_grain_present && get_bits(gb, 1);
+    if (hdr->film_grain.present) {
+        hdr->film_grain.seed = get_bits(gb, 16);
+        hdr->film_grain.update = hdr->frame_type != INTER_FRAME || get_bits(gb, 1);
+        if (!hdr->film_grain.update) {
+            const int refidx = get_bits(gb, 3);
+            int i;
+            for (i = 0; i < 7; i++)
+                if (hdr->refidx[i] == refidx)
+                    break;
+            if (i == 7) goto error;
+            hdr->film_grain.data = c->refs[refidx].film_grain;
+        } else {
+            Av1FilmGrainData *const fgd = &hdr->film_grain.data;
+
+            fgd->num_y_points = get_bits(gb, 4);
+            if (fgd->num_y_points > 14) goto error;
+            for (int i = 0; i < fgd->num_y_points; i++) {
+                fgd->y_points[i][0] = get_bits(gb, 8);
+                if (i && fgd->y_points[i - 1][0] >= fgd->y_points[i][0])
+                    goto error;
+                fgd->y_points[i][1] = get_bits(gb, 8);
+            }
+
+            fgd->chroma_scaling_from_luma =
+                seqhdr->layout != DAV1D_PIXEL_LAYOUT_I400 && get_bits(gb, 1);
+            if (seqhdr->layout == DAV1D_PIXEL_LAYOUT_I400 ||
+                fgd->chroma_scaling_from_luma ||
+                (seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420 && !fgd->num_y_points))
+            {
+                fgd->num_uv_points[0] = fgd->num_uv_points[1] = 0;
+            } else for (int pl = 0; pl < 2; pl++) {
+                fgd->num_uv_points[pl] = get_bits(gb, 4);
+                if (fgd->num_uv_points[pl] > 10) goto error;
+                for (int i = 0; i < fgd->num_uv_points[pl]; i++) {
+                    fgd->uv_points[pl][i][0] = get_bits(gb, 8);
+                    if (i && fgd->uv_points[pl][i - 1][0] >= fgd->uv_points[pl][i][0])
+                        goto error;
+                    fgd->uv_points[pl][i][1] = get_bits(gb, 8);
+                }
+            }
+
+            if (seqhdr->layout == DAV1D_PIXEL_LAYOUT_I420 &&
+                !!fgd->num_uv_points[0] != !!fgd->num_uv_points[1])
+            {
+                goto error;
+            }
+
+            fgd->scaling_shift = get_bits(gb, 2) + 8;
+            fgd->ar_coeff_lag = get_bits(gb, 2);
+            const int num_y_pos = 2 * fgd->ar_coeff_lag * (fgd->ar_coeff_lag + 1);
+            if (fgd->num_y_points)
+                for (int i = 0; i < num_y_pos; i++)
+                    fgd->ar_coeffs_y[i] = get_bits(gb, 8) - 128;
+            for (int pl = 0; pl < 2; pl++)
+                if (fgd->num_uv_points[pl] || fgd->chroma_scaling_from_luma) {
+                    const int num_uv_pos = num_y_pos + !!fgd->num_y_points;
+                    for (int i = 0; i < num_uv_pos; i++)
+                        fgd->ar_coeffs_uv[pl][i] = get_bits(gb, 8) - 128;
+                }
+            fgd->ar_coeff_shift = get_bits(gb, 2) + 6;
+            fgd->grain_scale_shift = get_bits(gb, 2);
+            for (int pl = 0; pl < 2; pl++)
+                if (fgd->num_uv_points[pl]) {
+                    fgd->uv_mult[pl] = get_bits(gb, 8);
+                    fgd->uv_luma_mult[pl] = get_bits(gb, 8);
+                    fgd->uv_offset[pl] = get_bits(gb, 9);
+                }
+            fgd->overlap_flag = get_bits(gb, 1);
+            fgd->clip_to_restricted_range = get_bits(gb, 1);
+        }
+    } else {
+        memset(&hdr->film_grain.data, 0, sizeof(hdr->film_grain));
+    }
+#if DEBUG_FRAME_HDR
+    printf("HDR: post-filmgrain: off=%ld\n",
+           (gb->ptr - init_ptr) * 8 - gb->bits_left);
+#endif
+
+end:
+
+    if (have_trailing_bit)
+        get_bits(gb, 1); // dummy bit
+
+    return flush_get_bits(gb) - init_ptr;
+
+error:
+    fprintf(stderr, "Error parsing frame header\n");
+    return -EINVAL;
+}
+
+static int parse_tile_hdr(Dav1dContext *const c, GetBits *const gb) {
+    const uint8_t *const init_ptr = gb->ptr;
+
+    int have_tile_pos = 0;
+    const int n_bits = c->frame_hdr.tiling.log2_cols +
+                       c->frame_hdr.tiling.log2_rows;
+    if (n_bits)
+        have_tile_pos = get_bits(gb, 1);
+
+    if (have_tile_pos) {
+        c->tile[c->n_tile_data].start = get_bits(gb, n_bits);
+        c->tile[c->n_tile_data].end = get_bits(gb, n_bits);
+    } else {
+        c->tile[c->n_tile_data].start = 0;
+        c->tile[c->n_tile_data].end = (1 << n_bits) - 1;
+    }
+
+    return flush_get_bits(gb) - init_ptr;
+}
+
+int parse_obus(Dav1dContext *const c, Dav1dData *const in) {
+    GetBits gb;
+    int res;
+
+    init_get_bits(&gb, in->data, in->sz);
+
+    // obu header
+    get_bits(&gb, 1); // obu_forbidden_bit
+    const enum ObuType type = get_bits(&gb, 4);
+    const int has_extension = get_bits(&gb, 1);
+    const int has_length_field = get_bits(&gb, 1);
+    if (!has_length_field) goto error;
+    get_bits(&gb, 1); // reserved
+    if (has_extension) {
+        get_bits(&gb, 3); // temporal_layer_id
+        get_bits(&gb, 2); // enhancement_layer_id
+        get_bits(&gb, 3); // reserved
+    }
+
+    // obu length field
+    int len = 0, more, i = 0;
+    do {
+        more = get_bits(&gb, 1);
+        len |= get_bits(&gb, 7) << (i * 7);
+        if (more && ++i == 8) goto error;
+    } while (more);
+    if (gb.error) goto error;
+
+    int off = flush_get_bits(&gb) - in->data;
+    const int init_off = off;
+    if (len > in->sz - off) goto error;
+
+    switch (type) {
+    case OBU_SEQ_HDR:
+        if ((res = parse_seq_hdr(c, &gb)) < 0)
+            return res;
+        if (res != len) goto error;
+        c->have_seq_hdr = 1;
+        c->have_frame_hdr = 0;
+        break;
+    case OBU_FRAME:
+    case OBU_FRAME_HDR:
+        if (!c->have_seq_hdr) goto error;
+        if ((res = parse_frame_hdr(c, &gb, type == OBU_FRAME_HDR)) < 0)
+            return res;
+        c->have_frame_hdr = 1;
+        for (int n = 0; n < c->n_tile_data; n++)
+            dav1d_data_unref(&c->tile[n].data);
+        c->n_tile_data = 0;
+        c->tile_mask = 0;
+        if (type == OBU_FRAME_HDR) break;
+        off += res;
+        // fall-through
+    case OBU_TILE_GRP:
+        if (!c->have_frame_hdr) goto error;
+        if (c->n_tile_data >= 256) goto error;
+        if ((res = parse_tile_hdr(c, &gb)) < 0)
+            return res;
+        off += res;
+        dav1d_ref_inc(in->ref);
+        c->tile[c->n_tile_data].data.ref = in->ref;
+        c->tile[c->n_tile_data].data.data = in->data + off;
+        c->tile[c->n_tile_data].data.sz = len + init_off - off;
+        if (c->tile[c->n_tile_data].start > c->tile[c->n_tile_data].end)
+            goto error;
+#define mask(a) ((1 << (a)) - 1)
+        const unsigned tile_mask = mask(c->tile[c->n_tile_data].end + 1) -
+                                   mask(c->tile[c->n_tile_data].start);
+#undef mask
+        if (tile_mask & c->tile_mask) goto error; // tile overlap
+        c->tile_mask |= tile_mask;
+        c->n_tile_data++;
+        break;
+    case OBU_PADDING:
+    case OBU_TD:
+    case OBU_METADATA:
+        // ignore OBUs we don't care about
+        break;
+    default:
+        fprintf(stderr, "Unknown OBU type %d of size %d\n", type, len);
+        return -EINVAL;
+    }
+
+    const int n_tiles = 1 << (c->frame_hdr.tiling.log2_cols +
+                              c->frame_hdr.tiling.log2_rows);
+    if (c->have_seq_hdr && c->have_frame_hdr &&
+        c->tile_mask == (1 << n_tiles) - 1)
+    {
+        assert(c->n_tile_data);
+        submit_frame(c);
+        assert(!c->n_tile_data);
+        c->have_frame_hdr = 0;
+        c->tile_mask = 0;
+    } else if (c->have_seq_hdr && c->have_frame_hdr &&
+               c->frame_hdr.show_existing_frame)
+    {
+        if (c->n_fc == 1) {
+            dav1d_picture_ref(&c->out,
+                              &c->refs[c->frame_hdr.existing_frame_idx].p.p);
+        } else {
+            // need to append this to the frame output queue
+            const unsigned next = c->frame_thread.next++;
+            if (c->frame_thread.next == c->n_fc)
+                c->frame_thread.next = 0;
+
+            Dav1dFrameContext *const f = &c->fc[next];
+            pthread_mutex_lock(&f->frame_thread.td.lock);
+            while (f->n_tile_data > 0)
+                pthread_cond_wait(&f->frame_thread.td.cond,
+                                  &f->frame_thread.td.lock);
+            Dav1dThreadPicture *const out_delayed =
+                &c->frame_thread.out_delayed[next];
+            if (out_delayed->p.data[0]) {
+                if (out_delayed->visible)
+                    dav1d_picture_ref(&c->out, &out_delayed->p);
+                dav1d_thread_picture_unref(out_delayed);
+            }
+            dav1d_thread_picture_ref(out_delayed,
+                                     &c->refs[c->frame_hdr.existing_frame_idx].p);
+            out_delayed->visible = 1;
+            pthread_mutex_unlock(&f->frame_thread.td.lock);
+        }
+        c->have_frame_hdr = 0;
+    }
+
+    return len + init_off;
+
+error:
+    fprintf(stderr, "Error parsing OBU data\n");
+    return -EINVAL;
+}
--- /dev/null
+++ b/src/obu.h
@@ -1,0 +1,36 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_OBU_H__
+#define __DAV1D_SRC_OBU_H__
+
+#include "dav1d/data.h"
+#include "src/internal.h"
+
+int parse_obus(Dav1dContext *c, Dav1dData *in);
+
+#endif /* __DAV1D_SRC_OBU_H__ */
--- /dev/null
+++ b/src/picture.c
@@ -1,0 +1,187 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/intops.h"
+#include "common/mem.h"
+#include "common/validate.h"
+
+#include "src/picture.h"
+#include "src/ref.h"
+
+static int picture_alloc_with_edges(Dav1dPicture *const p,
+                                    const int w, const int h,
+                                    const enum Dav1dPixelLayout layout,
+                                    const int bpc,
+                                    const int extra, void **const extra_ptr)
+{
+    int aligned_h;
+
+    if (p->data[0]) {
+        fprintf(stderr, "Picture already allocated!\n");
+        return -1;
+    }
+    assert(bpc > 0 && bpc <= 16);
+
+    const int hbd = bpc > 8;
+    const int aligned_w = (w + 127) & ~127;
+    const int has_chroma = layout != DAV1D_PIXEL_LAYOUT_I400;
+    const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
+    p->stride[0] = aligned_w << hbd;
+    p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0;
+    p->p.w = w;
+    p->p.h = h;
+    p->p.pri = DAV1D_COLOR_PRI_UNKNOWN;
+    p->p.trc = DAV1D_TRC_UNKNOWN;
+    p->p.mtrx = DAV1D_MC_UNKNOWN;
+    p->p.chr = DAV1D_CHR_UNKNOWN;
+    aligned_h = (h + 127) & ~127;
+    p->p.layout = layout;
+    p->p.bpc = bpc;
+    const size_t y_sz = p->stride[0] * aligned_h;
+    const size_t uv_sz = p->stride[1] * (aligned_h >> ss_ver);
+    if (!(p->ref = dav1d_ref_create(y_sz + 2 * uv_sz + extra))) {
+        fprintf(stderr, "Failed to allocate memory of size %zu: %s\n",
+                y_sz + 2 * uv_sz + extra, strerror(errno));
+        return -ENOMEM;
+    }
+    uint8_t *data = p->ref->data;
+    p->data[0] = data;
+    p->data[1] = has_chroma ? data + y_sz : NULL;
+    p->data[2] = has_chroma ? data + y_sz + uv_sz : NULL;
+
+    if (extra)
+        *extra_ptr = &data[y_sz + uv_sz * 2];
+
+    return 0;
+}
+
+int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p,
+                               const int w, const int h,
+                               const enum Dav1dPixelLayout layout, const int bpc,
+                               struct thread_data *const t, const int visible)
+{
+    p->t = t;
+
+    const int res =
+        picture_alloc_with_edges(&p->p, w, h, layout, bpc,
+                                 t != NULL ? sizeof(atomic_int) * 2 : 0,
+                                 (void **) &p->progress);
+
+    p->visible = visible;
+    if (t) {
+        atomic_init(&p->progress[0], 0);
+        atomic_init(&p->progress[1], 0);
+    }
+    return res;
+}
+
+void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) {
+    validate_input(dst != NULL);
+    validate_input(dst->data[0] == NULL);
+    validate_input(src != NULL);
+
+    if (src->ref) {
+        validate_input(src->data[0] != NULL);
+        dav1d_ref_inc(src->ref);
+    }
+    *dst = *src;
+}
+
+void dav1d_thread_picture_ref(Dav1dThreadPicture *dst,
+                              const Dav1dThreadPicture *src)
+{
+    dav1d_picture_ref(&dst->p, &src->p);
+    dst->t = src->t;
+    dst->visible = src->visible;
+    dst->progress = src->progress;
+}
+
+void dav1d_picture_unref(Dav1dPicture *const p) {
+    validate_input(p != NULL);
+
+    if (p->ref) {
+        validate_input(p->data[0] != NULL);
+        dav1d_ref_dec(p->ref);
+    }
+    memset(p, 0, sizeof(*p));
+}
+
+void dav1d_thread_picture_unref(Dav1dThreadPicture *const p) {
+    dav1d_picture_unref(&p->p);
+
+    p->t = NULL;
+    p->progress = NULL;
+}
+
+void dav1d_thread_picture_wait(const Dav1dThreadPicture *const p,
+                               int y_unclipped, const enum PlaneType plane_type)
+{
+    assert(plane_type != PLANE_TYPE_ALL);
+
+    if (!p->t)
+        return;
+
+    // convert to luma units; include plane delay from loopfilters; clip
+    const int ss_ver = p->p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    y_unclipped <<= plane_type & ss_ver; // we rely here on PLANE_TYPE_UV being 1
+    y_unclipped += (plane_type != PLANE_TYPE_BLOCK) * 8; // delay imposed by loopfilter
+    const int y = iclip(y_unclipped, 0, p->p.p.h - 1);
+    atomic_uint *const progress = &p->progress[plane_type != PLANE_TYPE_BLOCK];
+
+    if (atomic_load_explicit(progress, memory_order_acquire) >= y)
+        return;
+
+    pthread_mutex_lock(&p->t->lock);
+    while (atomic_load_explicit(progress, memory_order_relaxed) < y)
+        pthread_cond_wait(&p->t->cond, &p->t->lock);
+    pthread_mutex_unlock(&p->t->lock);
+}
+
+void dav1d_thread_picture_signal(const Dav1dThreadPicture *const p,
+                                 const int y, // in pixel units
+                                 const enum PlaneType plane_type)
+{
+    assert(plane_type != PLANE_TYPE_UV);
+
+    if (!p->t)
+        return;
+
+    pthread_mutex_lock(&p->t->lock);
+    if (plane_type != PLANE_TYPE_Y) atomic_store(&p->progress[0], y);
+    if (plane_type != PLANE_TYPE_BLOCK) atomic_store(&p->progress[1], y);
+    pthread_cond_broadcast(&p->t->cond);
+    pthread_mutex_unlock(&p->t->lock);
+}
--- /dev/null
+++ b/src/picture.h
@@ -1,0 +1,91 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_PICTURE_H__
+#define __DAV1D_SRC_PICTURE_H__
+
+#include <pthread.h>
+#include <stdatomic.h>
+
+#include "dav1d/picture.h"
+
+#include "src/thread_data.h"
+
+enum PlaneType {
+    PLANE_TYPE_Y,
+    PLANE_TYPE_UV,
+    PLANE_TYPE_BLOCK,
+    PLANE_TYPE_ALL,
+};
+
+typedef struct Dav1dThreadPicture {
+    Dav1dPicture p;
+    int visible;
+    struct thread_data *t;
+    // [0] block data (including segmentation map and motion vectors)
+    // [1] pixel data
+    atomic_uint *progress;
+} Dav1dThreadPicture;
+
+/*
+ * Allocate a picture with custom border size.
+ */
+int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h,
+                               enum Dav1dPixelLayout layout, int bpc,
+                               struct thread_data *t, int visible);
+
+/**
+ * Create a copy of a picture.
+ */
+void dav1d_picture_ref(Dav1dPicture *dst, const Dav1dPicture *src);
+void dav1d_thread_picture_ref(Dav1dThreadPicture *dst,
+                              const Dav1dThreadPicture *src);
+void dav1d_thread_picture_unref(Dav1dThreadPicture *p);
+
+/**
+ * Wait for picture to reach a certain stage.
+ *
+ * y is in full-pixel units. If pt is not UV, this is in luma
+ * units, else it is in chroma units.
+ * plane_type is used to determine how many pixels delay are
+ * introduced by loopfilter processes.
+ */
+void dav1d_thread_picture_wait(const Dav1dThreadPicture *p, int y,
+                               enum PlaneType plane_type);
+
+/**
+ * Signal decoding progress.
+ *
+ * y is in full-pixel luma units.
+ * plane_type denotes whether we have completed block data (pass 1;
+ * PLANE_TYPE_BLOCK), pixel data (pass 2, PLANE_TYPE_Y) or both (no
+ * 2-pass decoding; PLANE_TYPE_ALL).
+ */
+void dav1d_thread_picture_signal(const Dav1dThreadPicture *p, int y,
+                                 enum PlaneType plane_type);
+
+#endif /* __DAV1D_SRC_PICTURE_H__ */
--- /dev/null
+++ b/src/qm.c
@@ -1,0 +1,3152 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <string.h>
+
+#include "src/qm.h"
+
+static const uint8_t qm_tbl_4x4_t[][2][10] = {
+    {
+        {
+             32,
+             43,  67,
+             73,  94, 137,
+             97, 110, 150, 200,
+        }, {
+             35,
+             46,  60,
+             57,  69,  90,
+             66,  71,  90, 109,
+        },
+    }, {
+        {
+             32,
+             41,  63,
+             69,  88, 127,
+             92, 103, 140, 184,
+        }, {
+             33,
+             45,  58,
+             56,  66,  86,
+             64,  69,  87, 105,
+        },
+    }, {
+        {
+             32,
+             38,  56,
+             63,  78, 113,
+             86,  97, 130, 169,
+        }, {
+             32,
+             45,  55,
+             53,  62,  80,
+             63,  67,  84, 101,
+        },
+    }, {
+        {
+             32,
+             37,  54,
+             58,  72, 102,
+             81,  91, 121, 156,
+        }, {
+             32,
+             45,  54,
+             51,  59,  75,
+             61,  65,  81,  97,
+        },
+    }, {
+        {
+             32,
+             34,  49,
+             53,  64,  91,
+             75,  81, 112, 140,
+        }, {
+             32,
+             46,  53,
+             49,  55,  70,
+             58,  62,  78,  91,
+        },
+    }, {
+        {
+             32,
+             34,  48,
+             49,  60,  82,
+             72,  79, 104, 134,
+        }, {
+             32,
+             46,  53,
+             47,  54,  66,
+             57,  60,  75,  89,
+        },
+    }, {
+        {
+             32,
+             33,  39,
+             45,  51,  71,
+             62,  64,  87, 108,
+        }, {
+             31,
+             42,  48,
+             47,  50,  61,
+             53,  54,  67,  78,
+        },
+    }, {
+        {
+             32,
+             33,  38,
+             42,  46,  63,
+             55,  57,  75,  92,
+        }, {
+             31,
+             41,  48,
+             46,  48,  58,
+             51,  51,  62,  71,
+        },
+    }, {
+        {
+             32,
+             32,  35,
+             38,  40,  54,
+             51,  49,  64,  81,
+        }, {
+             31,
+             38,  47,
+             47,  46,  54,
+             49,  46,  57,  66,
+        },
+    }, {
+        {
+             32,
+             32,  34,
+             35,  37,  48,
+             43,  43,  54,  65,
+        }, {
+             31,
+             37,  44,
+             47,  47,  53,
+             47,  45,  53,  59,
+        },
+    }, {
+        {
+             32,
+             32,  33,
+             34,  35,  39,
+             38,  39,  45,  54,
+        }, {
+             31,
+             34,  39,
+             42,  45,  48,
+             47,  46,  49,  54,
+        },
+    }, {
+        {
+             32,
+             32,  32,
+             32,  33,  35,
+             35,  35,  38,  46,
+        }, {
+             31,
+             32,  34,
+             38,  41,  47,
+             46,  46,  47,  52,
+        },
+    }, {
+        {
+             31,
+             32,  32,
+             32,  32,  33,
+             32,  33,  34,  35,
+        }, {
+             31,
+             31,  32,
+             34,  35,  39,
+             38,  40,  43,  47,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             31,  32,  32,
+             32,  32,  32,  33,
+        }, {
+             31,
+             31,  31,
+             31,  31,  32,
+             34,  35,  35,  39,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+        }, {
+             31,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_8x4[][2][32] = {
+    {
+        {
+             32,  33,  37,  49,  65,  80,  91, 104,
+             42,  42,  58,  71,  84,  97, 100, 112,
+             75,  69,  84, 103, 125, 142, 145, 146,
+             91,  86,  91, 110, 128, 152, 178, 190,
+        }, {
+             31,  40,  46,  48,  54,  61,  64,  68,
+             47,  45,  56,  61,  65,  69,  68,  71,
+             60,  54,  64,  75,  85,  92,  90,  87,
+             66,  61,  64,  73,  82,  92, 102, 105,
+        },
+    }, {
+        {
+             32,  33,  36,  46,  60,  75,  86,  98,
+             42,  42,  56,  67,  79,  92,  95, 105,
+             69,  64,  77,  93, 112, 130, 136, 136,
+             88,  83,  88, 105, 122, 144, 167, 177,
+        }, {
+             31,  40,  46,  47,  52,  59,  63,  66,
+             47,  45,  55,  60,  64,  68,  66,  69,
+             57,  52,  61,  70,  79,  87,  88,  85,
+             65,  61,  63,  72,  81,  90,  99, 102,
+        },
+    }, {
+        {
+             32,  32,  34,  44,  54,  72,  82,  92,
+             38,  40,  51,  61,  69,  84,  89,  98,
+             62,  58,  68,  85,  98, 118, 129, 127,
+             86,  80,  85, 101, 117, 136, 157, 165,
+        }, {
+             31,  38,  46,  46,  50,  57,  61,  65,
+             47,  46,  53,  56,  59,  64,  65,  67,
+             54,  50,  57,  66,  74,  82,  85,  82,
+             64,  60,  62,  71,  79,  88,  97,  99,
+        },
+    }, {
+        {
+             32,  32,  34,  41,  51,  65,  75,  86,
+             35,  36,  47,  53,  61,  73,  81,  92,
+             59,  57,  65,  78,  92, 108, 117, 119,
+             83,  78,  82,  97, 111, 129, 148, 154,
+        }, {
+             31,  36,  46,  45,  49,  54,  59,  63,
+             47,  47,  52,  53,  55,  58,  61,  65,
+             53,  50,  55,  63,  71,  77,  81,  80,
+             63,  59,  61,  70,  77,  86,  94,  95,
+        },
+    }, {
+        {
+             32,  32,  34,  38,  48,  60,  72,  81,
+             35,  36,  42,  51,  59,  68,  79,  86,
+             51,  50,  54,  67,  80,  92, 104, 112,
+             77,  72,  75,  87, 103, 119, 135, 144,
+        }, {
+             31,  36,  43,  45,  47,  52,  57,  61,
+             47,  47,  50,  53,  54,  56,  60,  63,
+             50,  47,  50,  58,  66,  70,  75,  77,
+             61,  57,  58,  65,  74,  82,  90,  93,
+        },
+    }, {
+        {
+             32,  32,  34,  37,  45,  54,  65,  75,
+             35,  36,  42,  50,  56,  63,  73,  81,
+             51,  50,  54,  65,  76,  87,  97, 106,
+             75,  71,  73,  84,  96, 110, 125, 136,
+        }, {
+             31,  36,  43,  46,  46,  50,  54,  59,
+             47,  47,  50,  53,  54,  55,  58,  61,
+             50,  47,  50,  57,  64,  68,  72,  75,
+             60,  56,  57,  64,  71,  78,  85,  90,
+        },
+    }, {
+        {
+             32,  32,  33,  35,  41,  49,  57,  66,
+             34,  34,  37,  43,  48,  54,  60,  68,
+             43,  42,  44,  54,  64,  71,  78,  86,
+             62,  59,  58,  68,  79,  91, 101, 111,
+        }, {
+             31,  33,  40,  47,  45,  48,  51,  55,
+             42,  44,  47,  50,  49,  50,  52,  55,
+             47,  45,  46,  54,  59,  61,  63,  66,
+             54,  51,  50,  57,  64,  70,  75,  79,
+        },
+    }, {
+        {
+             32,  32,  32,  34,  38,  44,  50,  61,
+             32,  33,  35,  37,  40,  45,  50,  58,
+             42,  41,  42,  50,  58,  66,  71,  79,
+             56,  53,  52,  59,  68,  78,  86,  97,
+        }, {
+             31,  32,  39,  44,  46,  47,  48,  53,
+             38,  40,  47,  47,  47,  46,  47,  50,
+             47,  45,  45,  51,  56,  59,  61,  64,
+             52,  49,  48,  53,  58,  64,  68,  73,
+        },
+    }, {
+        {
+             32,  32,  32,  34,  35,  40,  46,  52,
+             32,  33,  34,  37,  38,  42,  46,  51,
+             37,  36,  38,  44,  49,  55,  59,  64,
+             52,  49,  49,  54,  60,  69,  76,  83,
+        }, {
+             31,  31,  36,  42,  47,  46,  48,  50,
+             38,  40,  44,  47,  48,  46,  46,  48,
+             47,  46,  47,  50,  53,  54,  55,  56,
+             50,  48,  47,  50,  54,  60,  64,  67,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  34,  37,  42,  46,
+             32,  33,  34,  35,  37,  40,  43,  46,
+             35,  34,  36,  38,  43,  49,  53,  56,
+             43,  41,  42,  42,  49,  56,  63,  67,
+        }, {
+             31,  31,  35,  39,  43,  47,  46,  48,
+             38,  40,  43,  47,  47,  47,  46,  46,
+             47,  46,  47,  47,  50,  53,  53,  54,
+             48,  45,  46,  45,  50,  55,  58,  59,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  33,  34,  37,  40,
+             32,  32,  33,  33,  34,  36,  38,  40,
+             34,  34,  34,  36,  38,  41,  44,  46,
+             39,  38,  38,  40,  42,  47,  52,  56,
+        }, {
+             31,  31,  33,  36,  40,  45,  47,  47,
+             34,  35,  37,  41,  44,  46,  47,  46,
+             42,  42,  44,  46,  48,  49,  50,  49,
+             48,  46,  46,  46,  48,  51,  54,  55,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  33,  34,  35,
+             31,  32,  32,  32,  33,  33,  34,  34,
+             32,  32,  33,  34,  35,  36,  37,  38,
+             35,  35,  34,  36,  38,  40,  42,  48,
+        }, {
+             31,  31,  31,  34,  37,  39,  42,  48,
+             31,  31,  32,  36,  39,  41,  43,  46,
+             37,  38,  40,  43,  46,  47,  47,  48,
+             48,  47,  46,  47,  47,  48,  50,  53,
+        },
+    }, {
+        {
+             31,  31,  32,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  33,  34,  34,  35,
+             32,  32,  32,  33,  34,  34,  35,  36,
+        }, {
+             31,  31,  31,  31,  34,  35,  38,  41,
+             31,  31,  32,  32,  36,  37,  40,  42,
+             35,  36,  37,  37,  40,  42,  45,  45,
+             37,  38,  39,  40,  43,  44,  47,  47,
+        },
+    }, {
+        {
+             31,  31,  31,  31,  31,  31,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,
+             32,  32,  32,  32,  32,  33,  33,  33,
+        }, {
+             31,  31,  31,  31,  31,  31,  34,  34,
+             31,  31,  31,  32,  32,  33,  36,  36,
+             31,  31,  31,  32,  32,  33,  36,  36,
+             34,  35,  35,  36,  36,  37,  40,  40,
+        },
+    }, {
+        {
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,
+        }, {
+             31,  31,  31,  31,  31,  31,  31,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_8x8_t[][2][36] = {
+    {
+        {
+             32,
+             32,  35,
+             38,  40,  54,
+             51,  49,  65,  82,
+             68,  63,  78,  97, 117,
+             84,  76,  91, 111, 134, 152,
+             95,  89,  98, 113, 138, 159, 183,
+            109, 102, 106, 121, 142, 168, 199, 220,
+        }, {
+             31,
+             38,  47,
+             47,  46,  54,
+             50,  47,  57,  66,
+             57,  52,  61,  72,  82,
+             63,  57,  66,  77,  88,  96,
+             67,  62,  67,  75,  86,  95, 104,
+             71,  67,  68,  75,  84,  95, 107, 113,
+        },
+    }, {
+        {
+             32,
+             32,  35,
+             37,  39,  51,
+             47,  46,  60,  73,
+             62,  58,  71,  87, 105,
+             78,  72,  84, 100, 121, 140,
+             90,  84,  93, 106, 129, 148, 169,
+            102,  96, 100, 113, 132, 155, 183, 201,
+        }, {
+             31,
+             38,  47,
+             47,  47,  53,
+             48,  46,  55,  62,
+             54,  50,  58,  67,  76,
+             61,  55,  63,  72,  83,  91,
+             66,  61,  65,  73,  84,  92, 101,
+             69,  65,  66,  73,  82,  92, 103, 109,
+        },
+    }, {
+        {
+             32,
+             32,  34,
+             35,  37,  48,
+             46,  45,  56,  70,
+             57,  54,  64,  80,  93,
+             76,  70,  79,  96, 111, 134,
+             85,  79,  87, 100, 121, 138, 156,
+             96,  90,  93, 105, 122, 144, 168, 184,
+        }, {
+             31,
+             36,  43,
+             47,  47,  53,
+             48,  46,  54,  61,
+             52,  49,  55,  65,  71,
+             60,  55,  60,  70,  78,  89,
+             64,  59,  63,  71,  81,  89,  97,
+             67,  63,  64,  71,  79,  89,  99, 104,
+        },
+    }, {
+        {
+             32,
+             32,  33,
+             35,  36,  46,
+             42,  42,  52,  63,
+             53,  51,  60,  73,  86,
+             68,  64,  72,  84, 100, 117,
+             78,  74,  80,  92, 109, 128, 140,
+             90,  84,  87,  98, 114, 133, 155, 168,
+        }, {
+             31,
+             34,  39,
+             46,  47,  52,
+             47,  45,  52,  58,
+             50,  48,  54,  62,  68,
+             57,  53,  58,  65,  73,  82,
+             61,  57,  61,  68,  77,  86,  91,
+             65,  61,  62,  68,  76,  86,  95, 100,
+        },
+    }, {
+        {
+             32,
+             32,  33,
+             34,  35,  39,
+             39,  40,  46,  56,
+             50,  48,  53,  65,  78,
+             62,  59,  63,  75,  90, 105,
+             76,  71,  74,  86, 101, 118, 134,
+             84,  79,  81,  92, 106, 123, 142, 153,
+        }, {
+             31,
+             34,  39,
+             42,  45,  48,
+             47,  46,  49,  55,
+             49,  47,  50,  58,  65,
+             54,  51,  53,  61,  69,  76,
+             60,  56,  57,  65,  73,  82,  89,
+             64,  59,  60,  66,  74,  83,  92,  96,
+        },
+    }, {
+        {
+             32,
+             32,  33,
+             34,  35,  39,
+             38,  39,  45,  54,
+             46,  45,  51,  61,  71,
+             56,  54,  58,  69,  80,  92,
+             68,  64,  68,  78,  90, 103, 117,
+             78,  74,  76,  86,  99, 113, 128, 140,
+        }, {
+             31,
+             34,  39,
+             42,  45,  48,
+             47,  46,  49,  54,
+             48,  46,  50,  56,  61,
+             52,  49,  52,  58,  65,  71,
+             57,  53,  55,  61,  68,  75,  82,
+             61,  57,  58,  64,  71,  79,  86,  91,
+        },
+    }, {
+        {
+             31,
+             32,  32,
+             32,  33,  35,
+             35,  35,  38,  48,
+             42,  41,  43,  54,  63,
+             51,  49,  49,  59,  71,  81,
+             59,  56,  56,  66,  77,  89,  98,
+             69,  65,  64,  73,  85,  97, 108, 119,
+        }, {
+             31,
+             32,  35,
+             38,  42,  47,
+             48,  47,  48,  53,
+             47,  45,  45,  53,  58,
+             50,  47,  47,  54,  61,  66,
+             53,  50,  49,  56,  63,  69,  73,
+             57,  54,  52,  58,  65,  72,  77,  82,
+        },
+    }, {
+        {
+             31,
+             32,  32,
+             32,  32,  35,
+             34,  34,  37,  42,
+             38,  37,  40,  47,  54,
+             46,  44,  45,  52,  60,  69,
+             52,  49,  49,  56,  65,  75,  82,
+             63,  59,  58,  65,  73,  84,  92, 105,
+        }, {
+             31,
+             31,  32,
+             38,  40,  47,
+             44,  44,  47,  50,
+             47,  45,  46,  51,  54,
+             48,  46,  46,  51,  56,  61,
+             50,  47,  47,  52,  57,  63,  66,
+             55,  52,  50,  54,  60,  66,  70,  76,
+        },
+    }, {
+        {
+             31,
+             32,  32,
+             32,  32,  34,
+             34,  33,  35,  39,
+             35,  34,  37,  42,  48,
+             41,  40,  41,  47,  53,  60,
+             47,  44,  45,  51,  57,  65,  71,
+             53,  50,  51,  55,  61,  70,  77,  85,
+        }, {
+             31,
+             31,  32,
+             35,  36,  41,
+             42,  42,  45,  48,
+             48,  46,  47,  50,  53,
+             47,  45,  45,  49,  53,  57,
+             49,  46,  46,  50,  54,  59,  61,
+             51,  48,  48,  51,  54,  60,  64,  68,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             32,  32,  33,
+             32,  32,  34,  35,
+             34,  34,  35,  37,  41,
+             37,  36,  38,  39,  45,  51,
+             43,  41,  42,  42,  49,  56,  63,
+             47,  44,  45,  46,  52,  59,  67,  71,
+        }, {
+             31,
+             31,  32,
+             34,  35,  39,
+             37,  40,  43,  47,
+             43,  43,  45,  47,  49,
+             48,  46,  46,  47,  50,  53,
+             47,  45,  45,  45,  50,  55,  58,
+             49,  46,  46,  46,  50,  55,  60,  61,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             32,  32,  32,
+             32,  32,  33,  34,
+             33,  33,  34,  35,  37,
+             34,  34,  35,  36,  39,  43,
+             37,  36,  37,  38,  41,  46,  51,
+             41,  39,  40,  41,  44,  49,  54,  58,
+        }, {
+             31,
+             31,  31,
+             32,  33,  35,
+             35,  37,  39,  43,
+             39,  41,  42,  45,  47,
+             45,  44,  45,  47,  48,  50,
+             48,  46,  46,  47,  48,  51,  53,
+             48,  46,  45,  46,  47,  51,  54,  56,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             31,  32,  32,
+             32,  32,  32,  33,
+             32,  32,  32,  34,  35,
+             32,  33,  33,  34,  35,  36,
+             34,  34,  33,  35,  36,  38,  39,
+             35,  35,  34,  36,  38,  40,  42,  48,
+        }, {
+             31,
+             31,  31,
+             30,  31,  32,
+             34,  34,  35,  39,
+             36,  37,  39,  42,  46,
+             39,  40,  41,  44,  47,  47,
+             42,  42,  42,  45,  47,  48,  48,
+             48,  47,  46,  47,  47,  49,  50,  53,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             32,  32,  32,  32,  33,
+             32,  32,  32,  32,  33,  34,
+             32,  32,  32,  32,  34,  34,  35,
+             33,  33,  33,  33,  35,  35,  36,  38,
+        }, {
+             31,
+             31,  31,
+             31,  31,  31,
+             30,  31,  31,  32,
+             34,  34,  35,  35,  39,
+             35,  35,  36,  36,  40,  41,
+             37,  38,  39,  40,  43,  44,  47,
+             40,  41,  41,  42,  44,  45,  47,  48,
+        },
+    }, {
+        {
+             31,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,
+             32,  32,  32,  32,  32,  32,  33,
+             32,  32,  32,  32,  32,  32,  33,  33,
+        }, {
+             31,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             30,  31,  31,  31,  32,
+             31,  32,  32,  32,  32,  33,
+             33,  34,  34,  35,  35,  36,  39,
+             33,  34,  34,  35,  35,  36,  39,  39,
+        },
+    }, {
+        {
+             31,
+             31,  31,
+             31,  31,  31,
+             31,  31,  32,  32,
+             31,  31,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,
+        }, {
+             31,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  31,  31,  31,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_16x4[][2][64] = {
+    {
+        {
+             31,  32,  32,  34,  34,  41,  45,  54,  60,  72,  75,  83,  88,  94, 101, 108,
+             44,  41,  42,  48,  54,  63,  67,  75,  79,  90,  92, 100, 100, 101, 108, 115,
+             79,  72,  71,  73,  78,  90,  96, 110, 118, 133, 136, 142, 140, 144, 141, 151,
+             96,  90,  86,  83,  89,  95, 102, 111, 123, 135, 149, 160, 173, 180, 188, 197,
+        }, {
+             31,  32,  36,  43,  46,  45,  46,  50,  52,  57,  59,  62,  63,  65,  67,  69,
+             49,  45,  46,  49,  53,  58,  59,  62,  64,  67,  68,  71,  69,  68,  70,  72,
+             63,  57,  56,  57,  60,  67,  71,  78,  82,  89,  90,  91,  89,  89,  86,  88,
+             69,  65,  62,  60,  63,  66,  70,  74,  80,  85,  91,  96, 101, 103, 105, 107,
+        },
+    }, {
+        {
+             31,  32,  32,  33,  34,  37,  44,  49,  56,  65,  72,  78,  84,  89,  95, 101,
+             44,  41,  42,  44,  54,  58,  66,  71,  77,  84,  90,  95,  95,  95, 101, 108,
+             73,  67,  65,  66,  74,  79,  90,  99, 107, 119, 127, 133, 132, 136, 132, 141,
+             93,  87,  83,  81,  86,  92,  98, 107, 117, 129, 141, 151, 163, 169, 175, 183,
+        }, {
+             31,  32,  36,  41,  46,  46,  46,  48,  51,  54,  57,  60,  62,  64,  65,  67,
+             49,  45,  46,  47,  53,  56,  59,  61,  63,  65,  67,  69,  67,  66,  68,  70,
+             61,  55,  54,  54,  59,  62,  68,  73,  77,  82,  86,  88,  86,  87,  83,  86,
+             69,  64,  61,  59,  62,  65,  68,  73,  78,  84,  89,  93,  98, 100, 102, 103,
+        },
+    }, {
+        {
+             31,  32,  32,  33,  34,  37,  41,  46,  53,  60,  65,  74,  79,  84,  89,  94,
+             39,  38,  39,  40,  47,  54,  58,  62,  68,  73,  78,  85,  90,  90,  95, 101,
+             65,  60,  59,  58,  65,  73,  79,  86,  97, 105, 111, 120, 125, 128, 124, 131,
+             90,  84,  81,  78,  83,  89,  94, 102, 112, 123, 134, 143, 154, 158, 164, 170,
+        }, {
+             31,  32,  36,  40,  44,  46,  45,  47,  49,  52,  54,  58,  60,  62,  64,  65,
+             48,  46,  46,  46,  51,  54,  56,  57,  58,  60,  62,  64,  66,  64,  66,  68,
+             57,  53,  51,  50,  54,  60,  64,  68,  73,  76,  79,  82,  84,  84,  81,  83,
+             68,  63,  60,  58,  61,  64,  67,  71,  77,  82,  87,  91,  95,  97,  99, 100,
+        },
+    }, {
+        {
+             31,  32,  32,  33,  34,  34,  39,  44,  49,  54,  60,  68,  75,  79,  84,  88,
+             36,  35,  36,  38,  42,  48,  51,  56,  60,  63,  68,  75,  81,  85,  89,  94,
+             62,  58,  57,  56,  61,  66,  74,  82,  90,  95, 102, 110, 117, 120, 116, 123,
+             88,  82,  79,  76,  81,  85,  91,  98, 107, 117, 127, 135, 145, 148, 153, 159,
+        }, {
+             31,  32,  35,  40,  43,  46,  45,  46,  48,  50,  52,  56,  58,  60,  62,  63,
+             48,  46,  47,  47,  50,  53,  53,  54,  54,  55,  56,  59,  61,  63,  64,  66,
+             56,  52,  50,  49,  53,  56,  61,  65,  70,  72,  75,  79,  81,  82,  79,  81,
+             67,  62,  60,  57,  60,  63,  66,  70,  75,  80,  85,  89,  93,  94,  96,  97,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  33,  34,  37,  41,  45,  49,  54,  61,  68,  74,  78,  83,
+             36,  35,  35,  37,  41,  48,  50,  53,  56,  60,  63,  69,  75,  80,  84,  88,
+             53,  51,  49,  49,  53,  60,  65,  71,  76,  82,  87,  93, 100, 105, 109, 114,
+             81,  76,  73,  71,  74,  80,  85,  91,  98, 105, 112, 121, 130, 137, 142, 148,
+        }, {
+             31,  31,  33,  38,  42,  46,  46,  45,  46,  48,  50,  52,  56,  58,  60,  62,
+             48,  47,  46,  47,  49,  53,  53,  53,  54,  54,  55,  57,  59,  61,  62,  64,
+             52,  49,  48,  47,  50,  54,  57,  61,  64,  66,  68,  71,  73,  75,  76,  78,
+             64,  60,  57,  56,  57,  61,  64,  68,  71,  75,  78,  83,  87,  90,  92,  94,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  33,  34,  34,  37,  41,  45,  49,  54,  60,  65,  72,  75,
+             36,  35,  34,  36,  38,  42,  48,  50,  53,  56,  60,  63,  68,  73,  79,  81,
+             53,  51,  49,  50,  49,  54,  60,  65,  71,  76,  82,  87,  92,  97, 104, 106,
+             79,  75,  72,  71,  69,  73,  78,  84,  90,  96, 103, 110, 118, 125, 133, 136,
+        }, {
+             31,  31,  32,  36,  40,  43,  46,  46,  45,  46,  48,  50,  52,  54,  57,  59,
+             48,  47,  46,  47,  47,  50,  53,  53,  53,  54,  54,  55,  56,  58,  60,  61,
+             52,  50,  48,  47,  47,  50,  54,  57,  61,  64,  66,  68,  70,  72,  75,  75,
+             63,  60,  57,  56,  54,  57,  60,  64,  67,  71,  75,  78,  82,  85,  89,  90,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  34,  34,  36,  39,  42,  45,  50,  54,  60,  66,  73,
+             34,  34,  33,  35,  37,  39,  42,  44,  46,  48,  51,  54,  58,  63,  68,  74,
+             44,  43,  41,  43,  43,  48,  53,  57,  60,  64,  67,  72,  76,  80,  85,  91,
+             65,  62,  59,  59,  58,  63,  67,  71,  76,  81,  85,  92,  98, 105, 111, 118,
+        }, {
+             31,  31,  32,  35,  40,  43,  46,  46,  46,  46,  47,  48,  50,  52,  55,  58,
+             42,  42,  42,  45,  47,  48,  50,  50,  49,  49,  50,  50,  52,  53,  55,  58,
+             49,  47,  45,  46,  46,  49,  53,  55,  57,  59,  60,  61,  63,  64,  66,  68,
+             57,  54,  52,  51,  50,  53,  56,  58,  61,  64,  67,  71,  73,  76,  79,  82,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  32,  34,  35,  37,  39,  41,  45,  50,  54,  57,  61,
+             32,  32,  33,  34,  34,  35,  37,  38,  40,  41,  43,  46,  50,  53,  56,  58,
+             44,  42,  41,  42,  42,  42,  48,  54,  57,  60,  63,  67,  71,  74,  77,  79,
+             58,  55,  53,  53,  53,  52,  57,  63,  67,  70,  74,  79,  86,  90,  93,  97,
+        }, {
+             31,  31,  32,  34,  37,  39,  42,  47,  46,  46,  46,  47,  48,  50,  51,  53,
+             37,  38,  40,  42,  44,  47,  47,  48,  47,  46,  46,  46,  47,  48,  49,  50,
+             49,  47,  45,  45,  45,  45,  49,  53,  55,  57,  58,  59,  61,  62,  63,  64,
+             54,  51,  49,  49,  48,  48,  51,  55,  58,  60,  62,  65,  68,  70,  71,  73,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  32,  33,  34,  35,  37,  38,  42,  45,  47,  51,  55,
+             32,  32,  32,  33,  34,  35,  36,  37,  38,  40,  40,  43,  45,  47,  50,  54,
+             38,  37,  36,  36,  38,  39,  41,  44,  49,  51,  52,  56,  58,  60,  63,  67,
+             53,  51,  49,  49,  50,  49,  51,  54,  60,  63,  65,  71,  75,  77,  82,  87,
+        }, {
+             31,  31,  31,  32,  35,  39,  40,  42,  47,  47,  46,  46,  47,  48,  49,  51,
+             37,  38,  39,  40,  43,  47,  47,  47,  48,  47,  47,  46,  46,  47,  47,  49,
+             48,  47,  46,  46,  46,  47,  48,  50,  53,  53,  54,  55,  55,  55,  56,  57,
+             52,  50,  48,  48,  47,  47,  48,  50,  54,  56,  57,  61,  63,  64,  66,  68,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  35,  38,  40,  42,  45,  46,
+             32,  32,  32,  33,  34,  34,  35,  36,  37,  38,  38,  40,  41,  43,  45,  46,
+             36,  35,  35,  34,  36,  36,  38,  40,  42,  47,  48,  50,  51,  53,  56,  56,
+             44,  42,  41,  41,  42,  42,  42,  44,  48,  52,  54,  58,  60,  63,  66,  67,
+        }, {
+             31,  31,  31,  31,  34,  35,  39,  40,  42,  46,  47,  47,  47,  46,  48,  48,
+             37,  38,  39,  40,  42,  43,  47,  47,  47,  48,  48,  47,  46,  46,  46,  46,
+             48,  47,  46,  46,  47,  47,  47,  48,  50,  52,  53,  53,  53,  53,  54,  54,
+             49,  47,  46,  45,  45,  46,  45,  47,  49,  53,  53,  56,  57,  58,  59,  59,
+        },
+    }, {
+        {
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  34,  34,  35,  35,  38,  38,  42,
+             32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  36,  36,  39,  39,  42,
+             34,  34,  34,  33,  33,  35,  35,  37,  37,  39,  39,  43,  43,  45,  45,  48,
+             39,  38,  38,  37,  37,  39,  39,  40,  40,  45,  45,  51,  51,  54,  54,  58,
+        }, {
+             31,  31,  31,  31,  31,  34,  34,  38,  38,  42,  42,  48,  48,  47,  47,  47,
+             33,  34,  34,  35,  35,  39,  39,  43,  43,  45,  45,  47,  47,  46,  46,  45,
+             42,  42,  42,  42,  42,  45,  45,  47,  47,  48,  48,  50,  50,  50,  50,  49,
+             48,  47,  47,  45,  45,  46,  46,  46,  46,  50,  50,  53,  53,  54,  54,  56,
+        },
+    }, {
+        {
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,
+             36,  35,  35,  35,  34,  34,  36,  36,  37,  38,  38,  41,  42,  44,  48,  48,
+        }, {
+             31,  31,  31,  31,  31,  31,  34,  34,  36,  38,  38,  41,  42,  44,  48,  48,
+             31,  31,  31,  32,  32,  32,  35,  36,  37,  40,  40,  42,  43,  44,  46,  46,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,
+             48,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  49,  50,  51,  53,  53,
+        },
+    }, {
+        {
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  35,  36,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,
+        }, {
+             31,  31,  31,  31,  31,  31,  31,  31,  33,  34,  34,  36,  38,  38,  39,  42,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,
+             35,  35,  36,  36,  36,  37,  37,  38,  40,  40,  40,  43,  45,  45,  45,  46,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,
+        },
+    }, {
+        {
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,
+        }, {
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  34,  34,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,
+             34,  34,  35,  35,  35,  35,  36,  36,  36,  36,  36,  37,  39,  40,  40,  40,
+        },
+    }, {
+        {
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        }, {
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_16x8[][2][128] = {
+    {
+        {
+             32,  31,  32,  34,  36,  44,  48,  58,  65,  79,  82,  91,  97, 103, 110, 118,
+             32,  33,  34,  37,  38,  43,  46,  54,  58,  70,  72,  80,  86,  93, 100, 107,
+             36,  34,  36,  42,  48,  53,  56,  63,  68,  79,  81,  88,  94,  98, 101, 105,
+             53,  49,  50,  54,  60,  71,  76,  87,  92, 104, 106, 106, 107, 114, 117, 118,
+             65,  59,  59,  63,  68,  79,  85,  98, 105, 118, 121, 130, 128, 131, 138, 136,
+             87,  78,  77,  79,  84,  95, 102, 116, 124, 141, 144, 148, 157, 150, 161, 157,
+             93,  86,  82,  80,  86,  94, 105, 112, 122, 135, 149, 162, 167, 174, 183, 182,
+             99,  93,  89,  88,  90,  97, 105, 115, 124, 135, 146, 159, 171, 186, 193, 203,
+        }, {
+             32,  30,  33,  42,  49,  49,  50,  54,  57,  63,  64,  68,  70,  72,  74,  76,
+             37,  40,  43,  47,  48,  46,  46,  49,  50,  55,  56,  59,  62,  64,  67,  69,
+             48,  46,  47,  50,  53,  53,  54,  55,  56,  60,  61,  64,  66,  66,  66,  67,
+             52,  48,  47,  50,  54,  61,  64,  68,  70,  75,  75,  74,  73,  75,  74,  73,
+             57,  52,  51,  53,  57,  64,  67,  73,  76,  82,  83,  86,  83,  83,  84,  82,
+             66,  60,  59,  60,  62,  69,  73,  80,  84,  92,  93,  94,  96,  92,  94,  91,
+             68,  63,  60,  59,  62,  66,  72,  76,  80,  87,  93,  98,  99, 101, 103, 101,
+             71,  66,  63,  62,  62,  66,  70,  75,  79,  84,  89,  94,  98, 104, 106, 109,
+        },
+    }, {
+        {
+             32,  31,  32,  32,  36,  39,  47,  53,  61,  71,  79,  86,  92,  98, 104, 110,
+             32,  32,  34,  35,  37,  40,  45,  50,  56,  64,  70,  76,  82,  88,  94, 100,
+             36,  35,  36,  40,  48,  50,  56,  60,  65,  73,  79,  84,  89,  93,  95,  98,
+             47,  44,  45,  47,  56,  60,  69,  75,  81,  89,  95, 100, 101, 108, 110, 111,
+             65,  60,  59,  60,  68,  73,  84,  92, 100, 111, 118, 124, 121, 124, 129, 127,
+             79,  72,  71,  71,  78,  84,  95, 103, 113, 125, 133, 140, 148, 141, 151, 147,
+             90,  84,  80,  78,  83,  91, 101, 108, 116, 129, 142, 153, 157, 163, 171, 169,
+             96,  90,  87,  85,  87,  94, 101, 110, 118, 129, 138, 150, 161, 174, 181, 188,
+        }, {
+             32,  30,  33,  39,  49,  48,  50,  52,  55,  60,  63,  66,  68,  70,  72,  74,
+             35,  38,  41,  46,  48,  46,  46,  47,  49,  53,  55,  58,  60,  62,  65,  67,
+             48,  46,  47,  48,  53,  53,  54,  54,  56,  58,  60,  62,  64,  65,  65,  65,
+             50,  46,  46,  47,  54,  56,  61,  63,  65,  68,  70,  72,  71,  73,  72,  71,
+             57,  52,  51,  51,  57,  60,  66,  71,  74,  79,  82,  84,  81,  81,  82,  79,
+             63,  58,  56,  55,  60,  64,  70,  75,  79,  85,  89,  91,  94,  89,  92,  89,
+             68,  63,  60,  58,  61,  65,  71,  75,  79,  85,  91,  95,  97,  98, 100,  98,
+             70,  65,  63,  61,  61,  65,  69,  74,  78,  82,  87,  91,  96, 101, 103, 105,
+        },
+    }, {
+        {
+             32,  31,  32,  32,  34,  39,  44,  49,  57,  65,  71,  81,  87,  92,  98, 103,
+             32,  32,  33,  34,  36,  39,  42,  46,  53,  59,  64,  72,  77,  83,  88,  94,
+             36,  35,  36,  38,  44,  50,  53,  57,  63,  68,  73,  80,  85,  88,  89,  92,
+             44,  41,  42,  42,  50,  58,  63,  67,  74,  79,  84,  91,  96, 102, 103, 103,
+             58,  54,  53,  52,  59,  68,  74,  81,  90,  97, 102, 110, 114, 117, 121, 119,
+             79,  73,  71,  69,  75,  84,  90,  97, 108, 118, 125, 135, 140, 133, 141, 137,
+             88,  81,  78,  76,  81,  88,  97, 104, 111, 123, 135, 145, 148, 153, 160, 158,
+             93,  88,  84,  82,  84,  90,  97, 105, 113, 122, 131, 141, 151, 163, 169, 175,
+        }, {
+             32,  31,  33,  37,  44,  48,  49,  51,  54,  57,  60,  64,  66,  68,  70,  72,
+             34,  36,  40,  44,  46,  46,  45,  47,  49,  51,  53,  57,  59,  61,  63,  65,
+             48,  46,  47,  47,  51,  53,  53,  54,  55,  56,  58,  61,  63,  63,  63,  63,
+             49,  46,  46,  45,  51,  56,  58,  60,  62,  64,  65,  68,  69,  71,  70,  69,
+             54,  50,  49,  48,  53,  58,  62,  65,  70,  73,  75,  78,  79,  79,  80,  77,
+             63,  58,  56,  54,  59,  64,  67,  71,  77,  82,  85,  89,  91,  87,  89,  86,
+             67,  62,  59,  57,  60,  64,  70,  73,  77,  83,  89,  93,  94,  96,  97,  95,
+             69,  65,  62,  60,  61,  64,  68,  72,  76,  81,  85,  89,  93,  98, 100, 102,
+        },
+    }, {
+        {
+             32,  31,  31,  32,  34,  36,  41,  47,  53,  58,  65,  74,  82,  87,  92,  97,
+             31,  32,  33,  34,  35,  36,  40,  44,  50,  54,  59,  67,  73,  78,  83,  88,
+             35,  34,  35,  37,  41,  46,  49,  53,  57,  61,  66,  73,  79,  83,  84,  86,
+             44,  41,  42,  42,  48,  54,  60,  66,  71,  75,  79,  86,  92,  96,  97,  97,
+             53,  50,  49,  49,  54,  60,  67,  75,  82,  87,  92, 100, 105, 110, 114, 111,
+             65,  61,  59,  58,  63,  68,  76,  84,  92,  98, 105, 113, 120, 125, 132, 128,
+             82,  76,  73,  71,  76,  80,  88,  97, 106, 112, 120, 131, 139, 144, 150, 147,
+             90,  85,  81,  79,  81,  87,  93, 101, 108, 116, 124, 134, 142, 153, 157, 163,
+        }, {
+             32,  31,  33,  37,  42,  49,  48,  50,  52,  54,  57,  61,  64,  66,  68,  70,
+             33,  34,  37,  43,  44,  47,  46,  46,  47,  49,  51,  55,  57,  59,  61,  63,
+             45,  45,  46,  47,  49,  52,  51,  52,  53,  54,  55,  58,  60,  61,  61,  61,
+             49,  46,  45,  45,  49,  53,  57,  59,  61,  62,  64,  66,  68,  69,  68,  67,
+             52,  49,  47,  47,  50,  54,  59,  63,  66,  68,  70,  73,  75,  77,  77,  75,
+             57,  53,  51,  50,  53,  57,  61,  66,  71,  73,  76,  80,  83,  84,  86,  83,
+             64,  60,  57,  55,  58,  61,  66,  71,  75,  79,  83,  87,  91,  93,  94,  92,
+             68,  64,  61,  59,  60,  63,  67,  71,  74,  79,  83,  87,  91,  95,  97,  98,
+        },
+    }, {
+        {
+             32,  31,  31,  32,  33,  36,  39,  44,  48,  53,  58,  66,  74,  81,  86,  91,
+             31,  32,  32,  33,  34,  35,  38,  41,  45,  49,  54,  60,  67,  73,  78,  82,
+             33,  33,  34,  36,  38,  42,  44,  46,  50,  53,  57,  63,  69,  75,  78,  80,
+             40,  39,  38,  40,  44,  51,  54,  59,  62,  66,  70,  75,  81,  86,  90,  90,
+             51,  49,  47,  48,  52,  58,  63,  69,  74,  79,  84,  90,  97, 102, 106, 103,
+             65,  61,  59,  58,  62,  68,  73,  79,  85,  92,  98, 106, 113, 120, 124, 119,
+             79,  74,  71,  69,  72,  78,  84,  90,  96, 103, 110, 119, 128, 135, 140, 137,
+             87,  82,  79,  77,  78,  84,  89,  96, 103, 111, 118, 126, 134, 143, 147, 151,
+        }, {
+             32,  31,  31,  35,  41,  49,  48,  49,  50,  52,  54,  57,  61,  64,  66,  68,
+             32,  33,  35,  39,  43,  47,  46,  45,  46,  48,  50,  52,  55,  58,  59,  61,
+             40,  41,  43,  46,  48,  50,  49,  48,  49,  50,  51,  53,  56,  58,  59,  59,
+             49,  47,  46,  46,  49,  53,  54,  56,  57,  58,  59,  61,  63,  65,  66,  65,
+             51,  49,  47,  47,  49,  54,  57,  61,  63,  65,  67,  69,  72,  73,  75,  72,
+             57,  54,  51,  50,  52,  57,  60,  64,  67,  71,  73,  77,  80,  82,  84,  81,
+             63,  59,  57,  55,  57,  60,  64,  67,  71,  75,  78,  82,  86,  89,  91,  89,
+             67,  63,  60,  58,  59,  62,  65,  69,  73,  77,  81,  85,  88,  92,  94,  95,
+        },
+    }, {
+        {
+             32,  31,  31,  32,  32,  34,  36,  39,  44,  48,  53,  58,  65,  71,  79,  82,
+             31,  32,  32,  32,  33,  34,  34,  37,  41,  45,  49,  54,  60,  65,  72,  75,
+             32,  32,  33,  34,  35,  37,  38,  40,  43,  46,  50,  54,  58,  63,  70,  72,
+             36,  35,  34,  36,  38,  42,  48,  50,  53,  56,  60,  63,  68,  73,  79,  81,
+             44,  42,  41,  42,  42,  48,  54,  58,  63,  67,  71,  75,  79,  84,  90,  92,
+             53,  51,  49,  50,  49,  54,  60,  65,  71,  76,  82,  87,  92,  97, 104, 106,
+             65,  62,  59,  59,  58,  63,  68,  73,  79,  85,  92,  98, 105, 111, 118, 121,
+             79,  75,  72,  71,  69,  73,  78,  84,  90,  96, 103, 110, 118, 125, 133, 136,
+        }, {
+             32,  31,  30,  33,  37,  42,  49,  48,  49,  50,  52,  54,  57,  60,  63,  64,
+             31,  31,  32,  36,  40,  43,  46,  46,  45,  46,  48,  50,  52,  54,  57,  59,
+             37,  38,  40,  43,  47,  47,  48,  47,  46,  46,  47,  49,  50,  52,  55,  56,
+             48,  47,  46,  47,  47,  50,  53,  53,  53,  54,  54,  55,  56,  58,  60,  61,
+             49,  47,  45,  46,  45,  49,  53,  56,  58,  59,  61,  62,  64,  65,  67,  68,
+             52,  50,  48,  47,  47,  50,  54,  57,  61,  64,  66,  68,  70,  72,  75,  75,
+             57,  54,  52,  51,  50,  53,  57,  60,  64,  67,  71,  73,  76,  79,  82,  83,
+             63,  60,  57,  56,  54,  57,  60,  64,  67,  71,  75,  78,  82,  85,  89,  90,
+        },
+    }, {
+        {
+             32,  31,  31,  32,  32,  34,  35,  38,  41,  44,  48,  53,  58,  65,  71,  79,
+             31,  32,  32,  32,  33,  34,  34,  36,  39,  42,  45,  49,  54,  60,  65,  72,
+             32,  32,  33,  34,  35,  37,  38,  40,  41,  43,  46,  50,  54,  58,  63,  70,
+             36,  35,  34,  36,  38,  42,  47,  49,  51,  54,  56,  60,  63,  68,  73,  79,
+             44,  42,  41,  42,  42,  48,  52,  56,  60,  64,  67,  71,  75,  79,  84,  90,
+             53,  51,  49,  50,  49,  54,  59,  63,  67,  72,  76,  82,  87,  92,  97, 104,
+             62,  59,  57,  57,  56,  61,  65,  69,  74,  79,  83,  90,  95, 102, 108, 115,
+             73,  69,  66,  65,  64,  69,  73,  77,  81,  86,  91,  99, 105, 112, 119, 127,
+        }, {
+             32,  31,  30,  33,  37,  42,  47,  48,  48,  49,  50,  52,  54,  57,  60,  63,
+             31,  31,  32,  36,  40,  43,  46,  46,  45,  45,  46,  48,  50,  52,  54,  57,
+             37,  38,  40,  43,  47,  47,  48,  47,  46,  46,  46,  47,  49,  50,  52,  55,
+             48,  47,  46,  47,  47,  50,  52,  53,  53,  53,  54,  54,  55,  56,  58,  60,
+             49,  47,  45,  46,  45,  49,  53,  55,  57,  58,  59,  61,  62,  64,  65,  67,
+             52,  50,  48,  47,  47,  50,  53,  56,  59,  62,  64,  66,  68,  70,  72,  75,
+             56,  53,  51,  50,  49,  53,  55,  58,  61,  64,  66,  70,  72,  75,  77,  80,
+             61,  57,  55,  54,  52,  56,  58,  61,  63,  66,  69,  73,  76,  79,  82,  86,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  34,  36,  38,  41,  44,  48,  53,  57,  61,  65,
+             31,  32,  32,  32,  32,  33,  34,  34,  37,  39,  41,  45,  49,  53,  56,  60,
+             32,  32,  33,  34,  34,  35,  37,  38,  40,  41,  43,  46,  50,  53,  56,  58,
+             35,  35,  34,  35,  36,  37,  41,  46,  47,  49,  51,  54,  57,  60,  63,  66,
+             39,  38,  37,  38,  39,  40,  44,  50,  52,  54,  57,  60,  64,  67,  69,  72,
+             44,  42,  41,  42,  42,  42,  48,  54,  57,  60,  63,  67,  71,  74,  77,  79,
+             53,  51,  49,  49,  49,  49,  54,  60,  64,  67,  71,  76,  82,  86,  89,  92,
+             65,  62,  59,  59,  58,  58,  63,  68,  72,  76,  79,  85,  92,  97, 100, 105,
+        }, {
+             32,  31,  30,  33,  35,  37,  42,  49,  48,  48,  49,  50,  52,  54,  55,  57,
+             31,  31,  32,  35,  37,  40,  43,  46,  46,  45,  45,  46,  48,  49,  51,  52,
+             37,  38,  40,  42,  44,  47,  47,  48,  47,  46,  46,  46,  47,  48,  49,  50,
+             45,  45,  44,  46,  46,  47,  49,  52,  51,  51,  51,  52,  53,  54,  54,  55,
+             48,  47,  45,  46,  46,  47,  50,  53,  54,  54,  55,  56,  57,  58,  58,  59,
+             49,  47,  45,  45,  45,  45,  49,  53,  55,  57,  58,  59,  61,  62,  63,  64,
+             52,  50,  48,  47,  47,  47,  50,  54,  57,  59,  61,  64,  66,  68,  69,  70,
+             57,  54,  52,  51,  51,  50,  53,  57,  59,  61,  64,  67,  71,  73,  74,  76,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  32,  34,  36,  38,  39,  44,  47,  49,  53,  58,
+             31,  32,  32,  32,  32,  33,  33,  34,  34,  36,  37,  41,  44,  46,  49,  54,
+             32,  32,  32,  33,  34,  35,  35,  36,  37,  39,  40,  42,  45,  47,  50,  54,
+             32,  33,  33,  33,  34,  36,  36,  38,  40,  41,  42,  45,  47,  48,  51,  55,
+             36,  35,  35,  35,  36,  38,  40,  42,  48,  49,  50,  53,  56,  57,  60,  63,
+             44,  42,  41,  41,  42,  42,  44,  48,  54,  56,  58,  63,  66,  67,  71,  75,
+             47,  45,  44,  44,  45,  45,  47,  50,  56,  58,  60,  66,  69,  71,  75,  79,
+             53,  51,  49,  49,  50,  49,  51,  54,  60,  63,  65,  71,  75,  77,  82,  87,
+        }, {
+             32,  31,  30,  31,  33,  37,  39,  42,  49,  48,  48,  49,  50,  51,  52,  54,
+             31,  31,  32,  33,  36,  40,  41,  43,  46,  46,  46,  45,  46,  47,  48,  50,
+             35,  37,  38,  38,  41,  45,  46,  46,  48,  47,  46,  45,  46,  47,  47,  49,
+             38,  40,  40,  41,  44,  47,  47,  48,  49,  48,  48,  47,  48,  48,  48,  50,
+             48,  47,  46,  46,  47,  47,  48,  50,  53,  53,  53,  53,  54,  54,  54,  55,
+             49,  47,  45,  45,  46,  45,  47,  49,  53,  55,  56,  58,  59,  60,  61,  62,
+             50,  48,  46,  46,  46,  46,  47,  50,  54,  55,  56,  59,  61,  61,  63,  65,
+             52,  50,  48,  48,  47,  47,  48,  50,  54,  56,  57,  61,  63,  64,  66,  68,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  34,  35,  36,  39,  41,  44,  47,  48,
+             31,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  37,  39,  41,  44,  45,
+             31,  32,  32,  32,  33,  33,  34,  34,  35,  36,  36,  39,  40,  42,  44,  45,
+             32,  32,  32,  33,  34,  34,  35,  36,  37,  38,  38,  40,  41,  43,  45,  46,
+             35,  35,  34,  34,  35,  36,  37,  39,  41,  45,  46,  48,  49,  51,  53,  54,
+             36,  35,  35,  34,  36,  36,  38,  40,  42,  47,  48,  50,  51,  53,  56,  56,
+             44,  42,  41,  41,  42,  42,  42,  44,  48,  52,  54,  58,  60,  63,  66,  67,
+             47,  45,  45,  44,  44,  45,  45,  47,  50,  55,  56,  60,  62,  66,  69,  70,
+        }, {
+             32,  31,  31,  30,  33,  33,  37,  39,  42,  47,  49,  48,  48,  49,  50,  50,
+             31,  31,  32,  32,  35,  36,  40,  41,  43,  46,  46,  46,  45,  45,  46,  46,
+             33,  34,  34,  35,  37,  38,  43,  43,  44,  46,  47,  46,  46,  45,  46,  46,
+             37,  38,  39,  40,  42,  43,  47,  47,  47,  48,  48,  47,  46,  46,  46,  46,
+             45,  45,  45,  44,  46,  46,  47,  48,  49,  51,  52,  51,  51,  51,  52,  52,
+             48,  47,  46,  46,  47,  47,  47,  48,  50,  52,  53,  53,  53,  53,  54,  54,
+             49,  47,  46,  45,  45,  46,  45,  47,  49,  53,  53,  56,  57,  58,  59,  59,
+             50,  48,  47,  46,  46,  46,  46,  47,  50,  53,  54,  56,  57,  59,  61,  61,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  32,  34,  34,  36,  36,  39,  39,  44,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  37,  37,  41,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  37,  37,  41,
+             32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  43,
+             32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  43,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,  50,  50,  53,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,  50,  50,  53,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  48,  48,  54,  54,  58,  58,  63,
+        }, {
+             32,  31,  31,  30,  30,  33,  33,  37,  37,  42,  42,  49,  49,  48,  48,  49,
+             31,  31,  31,  32,  32,  36,  36,  40,  40,  43,  43,  46,  46,  46,  46,  45,
+             31,  31,  31,  32,  32,  36,  36,  40,  40,  43,  43,  46,  46,  46,  46,  45,
+             37,  38,  38,  40,  40,  43,  43,  47,  47,  47,  47,  48,  48,  47,  47,  46,
+             37,  38,  38,  40,  40,  43,  43,  47,  47,  47,  47,  48,  48,  47,  47,  46,
+             48,  47,  47,  46,  46,  47,  47,  47,  47,  50,  50,  53,  53,  53,  53,  53,
+             48,  47,  47,  46,  46,  47,  47,  47,  47,  50,  50,  53,  53,  53,  53,  53,
+             49,  47,  47,  45,  45,  46,  46,  45,  45,  49,  49,  53,  53,  56,  56,  58,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  33,  34,  34,  36,  36,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,
+             31,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  35,  35,  35,  36,  36,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,
+             35,  35,  35,  34,  34,  34,  35,  36,  36,  37,  37,  40,  41,  43,  46,  46,
+             36,  35,  35,  35,  34,  34,  36,  36,  37,  38,  38,  41,  42,  44,  48,  48,
+        }, {
+             32,  31,  31,  31,  30,  30,  33,  33,  35,  37,  37,  41,  42,  44,  49,  49,
+             31,  31,  31,  31,  32,  32,  34,  35,  37,  39,  39,  42,  42,  44,  47,  47,
+             31,  31,  31,  32,  32,  32,  35,  36,  37,  40,  40,  42,  43,  44,  46,  46,
+             33,  34,  34,  34,  35,  35,  37,  38,  40,  43,  43,  44,  44,  45,  47,  47,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,
+             45,  45,  45,  45,  44,  44,  46,  46,  46,  47,  47,  49,  49,  50,  52,  52,
+             48,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  49,  50,  51,  53,  53,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  35,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,
+             32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  35,  36,  36,  36,  38,
+        }, {
+             32,  31,  31,  31,  31,  30,  30,  31,  33,  33,  33,  35,  37,  37,  39,  42,
+             31,  31,  31,  31,  31,  31,  31,  32,  34,  35,  35,  37,  39,  39,  40,  42,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,
+             33,  33,  34,  34,  34,  35,  35,  35,  37,  38,  38,  41,  43,  43,  43,  44,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,
+             38,  39,  40,  40,  40,  41,  41,  41,  43,  44,  44,  46,  47,  47,  47,  48,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  31,  33,  33,  33,  33,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  34,  34,  34,  34,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  33,  34,  35,  35,  35,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,
+             33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  36,  37,  38,  38,  38,
+             35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,  40,  41,  41,  41,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_32x8[][2][256] = {
+    {
+        {
+             32,  31,  31,  31,  32,  32,  34,  35,  36,  39,  44,  46,  48,  53,  58,  61,  65,  71,  79,  81,  82,  88,  91,  94,  97, 100, 103, 107, 110, 114, 118, 122,
+             32,  32,  33,  33,  34,  35,  37,  37,  38,  40,  43,  44,  46,  50,  54,  56,  58,  63,  70,  71,  72,  77,  80,  83,  86,  89,  93,  96, 100, 104, 107, 111,
+             36,  35,  34,  35,  36,  38,  42,  45,  48,  50,  53,  55,  56,  60,  63,  66,  68,  73,  79,  80,  81,  85,  88,  91,  94,  97,  98, 100, 101, 103, 105, 107,
+             53,  51,  49,  49,  50,  49,  54,  57,  60,  65,  71,  73,  76,  82,  87,  89,  92,  97, 104, 105, 106, 108, 106, 105, 107, 111, 114, 117, 117, 117, 118, 119,
+             65,  62,  59,  59,  59,  58,  63,  65,  68,  73,  79,  82,  85,  92,  98, 101, 105, 111, 118, 119, 121, 126, 130, 131, 128, 127, 131, 136, 138, 137, 136, 136,
+             87,  82,  78,  78,  77,  75,  79,  82,  84,  89,  95,  98, 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148, 153, 157, 152, 150, 155, 161, 159, 157, 156,
+             93,  88,  86,  84,  82,  82,  80,  84,  86,  91,  94,  98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153, 162, 165, 167, 173, 174, 177, 183, 185, 182, 179,
+             99,  94,  93,  90,  89,  89,  88,  87,  90,  93,  97,  99, 105, 107, 115, 116, 124, 127, 135, 139, 146, 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204,
+        }, {
+             32,  31,  30,  32,  33,  37,  42,  45,  49,  48,  49,  49,  50,  52,  54,  55,  57,  60,  63,  64,  64,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
+             37,  38,  40,  41,  43,  47,  47,  47,  48,  47,  46,  46,  46,  47,  49,  49,  50,  52,  55,  55,  56,  58,  59,  60,  62,  63,  64,  65,  67,  68,  69,  70,
+             48,  47,  46,  46,  47,  47,  50,  51,  53,  53,  53,  53,  54,  54,  55,  56,  56,  58,  60,  61,  61,  63,  64,  65,  66,  67,  66,  66,  66,  66,  67,  67,
+             52,  50,  48,  48,  47,  47,  50,  52,  54,  57,  61,  62,  64,  66,  68,  69,  70,  72,  75,  75,  75,  76,  74,  72,  73,  74,  75,  75,  74,  74,  73,  73,
+             57,  54,  52,  51,  51,  50,  53,  55,  57,  60,  64,  65,  67,  71,  73,  75,  76,  79,  82,  82,  83,  85,  86,  85,  83,  82,  83,  84,  84,  83,  82,  81,
+             66,  63,  60,  59,  59,  57,  60,  61,  62,  66,  69,  71,  73,  77,  80,  82,  84,  88,  92,  92,  93,  95,  94,  95,  96,  93,  92,  93,  94,  93,  91,  90,
+             68,  65,  63,  62,  60,  60,  59,  61,  62,  65,  66,  68,  72,  73,  76,  79,  80,  84,  87,  89,  93,  94,  98,  99,  99, 102, 101, 102, 103, 103, 101,  99,
+             71,  67,  66,  64,  63,  62,  62,  61,  62,  64,  66,  67,  70,  71,  75,  76,  79,  81,  84,  86,  89,  91,  94,  97,  98, 102, 104, 106, 106, 109, 109, 108,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  32,  34,  36,  38,  39,  44,  47,  49,  53,  58,  61,  65,  71,  76,  79,  82,  86,  89,  92,  95,  98, 101, 104, 107, 110, 114,
+             32,  32,  32,  33,  34,  35,  35,  36,  37,  39,  40,  42,  45,  47,  50,  54,  56,  59,  64,  68,  70,  73,  76,  79,  82,  85,  88,  91,  94,  97, 100, 104,
+             36,  35,  35,  35,  36,  38,  40,  42,  48,  49,  50,  53,  56,  57,  60,  63,  65,  68,  73,  76,  79,  81,  84,  87,  89,  92,  93,  94,  95,  96,  98, 100,
+             47,  45,  44,  44,  45,  45,  47,  50,  56,  58,  60,  66,  69,  71,  75,  79,  81,  84,  89,  92,  95,  97, 100,  99, 101, 105, 108, 110, 110, 110, 111, 111,
+             65,  62,  60,  59,  59,  58,  60,  63,  68,  71,  73,  79,  84,  86,  92,  98, 100, 105, 111, 115, 118, 121, 124, 124, 121, 120, 124, 128, 129, 128, 127, 127,
+             79,  75,  72,  71,  71,  69,  71,  73,  78,  81,  84,  90,  95,  97, 103, 110, 113, 118, 125, 130, 133, 136, 140, 145, 148, 143, 141, 146, 151, 149, 147, 145,
+             90,  86,  84,  82,  80,  80,  78,  82,  83,  88,  91,  94, 101, 103, 108, 114, 116, 124, 129, 134, 142, 145, 153, 156, 157, 163, 163, 166, 171, 173, 169, 166,
+             96,  91,  90,  87,  87,  86,  85,  84,  87,  90,  94,  96, 101, 102, 110, 111, 118, 121, 129, 132, 138, 144, 150, 156, 161, 171, 174, 179, 181, 188, 188, 190,
+        }, {
+             32,  31,  30,  31,  33,  37,  39,  42,  49,  48,  48,  49,  50,  51,  52,  54,  55,  57,  60,  62,  63,  64,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,
+             35,  37,  38,  38,  41,  45,  46,  46,  48,  47,  46,  45,  46,  47,  47,  49,  49,  50,  53,  54,  55,  56,  58,  59,  60,  61,  62,  64,  65,  66,  67,  68,
+             48,  47,  46,  46,  47,  47,  48,  50,  53,  53,  53,  53,  54,  54,  54,  55,  56,  56,  58,  60,  60,  61,  62,  63,  64,  65,  65,  65,  65,  65,  65,  65,
+             50,  48,  46,  46,  46,  46,  47,  50,  54,  55,  56,  59,  61,  61,  63,  65,  65,  66,  68,  69,  70,  71,  72,  71,  71,  72,  73,  73,  72,  72,  71,  71,
+             57,  54,  52,  52,  51,  50,  51,  53,  57,  58,  60,  64,  66,  68,  71,  73,  74,  76,  79,  81,  82,  83,  84,  83,  81,  80,  81,  82,  82,  81,  79,  78,
+             63,  60,  58,  57,  56,  54,  55,  57,  60,  62,  64,  67,  70,  71,  75,  78,  79,  82,  85,  87,  89,  90,  91,  93,  94,  91,  89,  90,  92,  90,  89,  87,
+             68,  64,  63,  61,  60,  59,  58,  60,  61,  64,  65,  67,  71,  72,  75,  78,  79,  83,  85,  87,  91,  92,  95,  96,  97,  99,  98,  99, 100, 100,  98,  96,
+             70,  66,  65,  63,  63,  62,  61,  60,  61,  63,  65,  66,  69,  70,  74,  74,  78,  79,  82,  84,  87,  89,  91,  94,  96, 100, 101, 103, 103, 105, 105, 105,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  32,  34,  34,  36,  39,  40,  44,  47,  49,  53,  57,  59,  65,  69,  71,  79,  81,  82,  87,  90,  92,  95,  98, 100, 103, 106,
+             32,  32,  32,  32,  33,  34,  34,  35,  36,  37,  39,  40,  42,  45,  46,  50,  53,  54,  59,  62,  64,  71,  72,  73,  77,  80,  83,  85,  88,  91,  94,  97,
+             36,  35,  35,  34,  36,  37,  38,  42,  44,  48,  50,  51,  53,  56,  57,  60,  63,  64,  68,  71,  73,  79,  80,  81,  85,  87,  88,  88,  89,  90,  92,  93,
+             44,  42,  41,  41,  42,  42,  42,  48,  50,  54,  58,  59,  63,  66,  67,  71,  74,  75,  79,  83,  84,  90,  91,  92,  96,  99, 102, 103, 103, 103, 103, 104,
+             58,  55,  54,  53,  53,  53,  52,  57,  59,  63,  68,  70,  74,  79,  81,  86,  90,  91,  97, 100, 102, 109, 110, 111, 114, 113, 117, 120, 121, 120, 119, 118,
+             79,  75,  73,  72,  71,  70,  69,  73,  75,  78,  84,  85,  90,  95,  97, 103, 108, 111, 118, 122, 125, 133, 135, 136, 140, 135, 133, 137, 141, 139, 137, 135,
+             88,  83,  81,  79,  78,  77,  76,  79,  81,  85,  88,  91,  97,  99, 104, 109, 111, 119, 123, 127, 135, 137, 145, 147, 148, 153, 153, 155, 160, 161, 158, 155,
+             93,  88,  88,  84,  84,  83,  82,  81,  84,  86,  90,  92,  97,  98, 105, 106, 113, 115, 122, 125, 131, 136, 141, 147, 151, 160, 163, 168, 169, 175, 175, 176,
+        }, {
+             32,  31,  31,  30,  33,  35,  37,  42,  44,  49,  48,  48,  49,  50,  51,  52,  54,  54,  57,  59,  60,  63,  64,  64,  66,  67,  68,  69,  70,  71,  72,  73,
+             34,  35,  36,  36,  40,  42,  44,  45,  46,  47,  46,  46,  45,  46,  47,  47,  49,  49,  51,  52,  53,  56,  57,  57,  59,  60,  61,  62,  63,  64,  65,  66,
+             48,  47,  46,  46,  47,  47,  47,  50,  51,  53,  53,  53,  53,  54,  54,  54,  55,  55,  56,  58,  58,  60,  61,  61,  63,  63,  63,  63,  63,  63,  63,  63,
+             49,  47,  46,  45,  46,  45,  45,  49,  51,  53,  56,  56,  58,  59,  60,  61,  62,  62,  64,  65,  65,  67,  68,  68,  69,  70,  71,  71,  70,  70,  69,  69,
+             54,  51,  50,  49,  49,  48,  48,  51,  53,  55,  58,  59,  62,  65,  65,  68,  70,  70,  73,  74,  75,  77,  78,  78,  79,  78,  79,  80,  80,  78,  77,  76,
+             63,  60,  58,  57,  56,  55,  54,  57,  59,  60,  64,  65,  67,  70,  71,  75,  77,  78,  82,  84,  85,  89,  89,  90,  91,  88,  87,  88,  89,  88,  86,  84,
+             67,  63,  62,  60,  59,  58,  57,  59,  60,  63,  64,  66,  70,  70,  73,  76,  77,  81,  83,  85,  89,  90,  93,  94,  94,  96,  96,  96,  97,  97,  95,  93,
+             69,  65,  65,  62,  62,  61,  60,  59,  61,  62,  64,  65,  68,  68,  72,  73,  76,  77,  81,  82,  85,  87,  89,  92,  93,  97,  98, 100, 100, 102, 102, 101,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  34,  35,  36,  39,  41,  44,  47,  48,  53,  55,  58,  63,  65,  71,  74,  79,  82,  82,  87,  89,  92,  94,  97,  99,
+             31,  32,  32,  32,  33,  33,  34,  34,  35,  36,  36,  39,  40,  42,  44,  45,  50,  51,  54,  58,  59,  64,  67,  71,  73,  74,  78,  81,  83,  85,  88,  91,
+             35,  35,  34,  34,  35,  36,  37,  39,  41,  45,  46,  48,  49,  51,  53,  54,  57,  59,  61,  65,  66,  71,  73,  77,  79,  79,  83,  83,  84,  85,  86,  87,
+             44,  42,  41,  41,  42,  42,  42,  44,  48,  52,  54,  58,  60,  63,  66,  67,  71,  72,  75,  78,  79,  84,  86,  90,  92,  92,  96,  97,  97,  97,  97,  97,
+             53,  51,  50,  49,  49,  50,  49,  51,  54,  59,  60,  65,  67,  71,  75,  76,  82,  84,  87,  91,  92,  97, 100, 104, 105, 106, 110, 113, 114, 112, 111, 110,
+             65,  62,  61,  59,  59,  59,  58,  60,  63,  67,  68,  73,  76,  79,  84,  85,  92,  94,  98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132, 130, 128, 126,
+             82,  78,  76,  74,  73,  73,  71,  73,  76,  79,  80,  86,  88,  92,  97,  98, 106, 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150, 151, 147, 144,
+             90,  86,  85,  82,  81,  80,  79,  78,  81,  83,  87,  88,  93,  94, 101, 101, 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, 157, 157, 163, 163, 163,
+        }, {
+             32,  31,  31,  30,  33,  33,  37,  39,  42,  47,  49,  48,  48,  49,  50,  50,  52,  53,  54,  56,  57,  60,  61,  63,  64,  64,  66,  67,  68,  69,  70,  70,
+             33,  34,  34,  35,  37,  38,  43,  43,  44,  46,  47,  46,  46,  45,  46,  46,  47,  48,  49,  51,  51,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
+             45,  45,  45,  44,  46,  46,  47,  48,  49,  51,  52,  51,  51,  51,  52,  52,  53,  53,  54,  55,  55,  57,  58,  59,  60,  60,  61,  61,  61,  61,  61,  61,
+             49,  47,  46,  45,  45,  46,  45,  47,  49,  53,  53,  56,  57,  58,  59,  59,  61,  61,  62,  63,  64,  65,  66,  67,  68,  68,  69,  69,  68,  68,  67,  67,
+             52,  50,  49,  48,  47,  47,  47,  48,  50,  53,  54,  57,  59,  61,  63,  64,  66,  67,  68,  70,  70,  72,  73,  75,  75,  75,  77,  78,  77,  76,  75,  74,
+             57,  54,  53,  52,  51,  51,  50,  51,  53,  56,  57,  60,  61,  64,  66,  67,  71,  72,  73,  76,  76,  79,  80,  82,  83,  83,  84,  85,  86,  85,  83,  82,
+             64,  61,  60,  58,  57,  57,  55,  56,  58,  61,  61,  64,  66,  68,  71,  71,  75,  77,  79,  82,  83,  86,  87,  90,  91,  91,  93,  93,  94,  94,  92,  90,
+             68,  64,  64,  61,  61,  60,  59,  58,  60,  61,  63,  64,  67,  67,  71,  71,  74,  75,  79,  80,  83,  85,  87,  89,  91,  94,  95,  97,  97,  99,  98,  98,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  33,  34,  36,  36,  39,  40,  44,  46,  48,  52,  53,  58,  58,  65,  66,  71,  74,  79,  81,  82,  86,  88,  91,  93,
+             31,  32,  32,  32,  32,  33,  33,  33,  34,  34,  35,  35,  38,  39,  41,  43,  45,  48,  49,  53,  54,  59,  60,  65,  67,  72,  73,  74,  78,  80,  82,  85,
+             33,  33,  33,  33,  34,  35,  36,  36,  38,  39,  42,  42,  44,  45,  46,  48,  50,  52,  53,  57,  57,  62,  63,  67,  69,  73,  75,  75,  78,  80,  80,  81,
+             40,  39,  39,  38,  38,  39,  40,  41,  44,  45,  51,  51,  54,  56,  59,  60,  62,  65,  66,  69,  70,  74,  75,  79,  81,  85,  86,  87,  90,  90,  90,  90,
+             51,  49,  49,  47,  47,  48,  48,  48,  52,  53,  58,  59,  63,  65,  69,  72,  74,  78,  79,  83,  84,  89,  90,  94,  97, 101, 102, 103, 106, 105, 103, 103,
+             65,  62,  61,  59,  59,  59,  58,  58,  62,  63,  68,  68,  73,  75,  79,  82,  85,  90,  92,  97,  98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117,
+             79,  75,  74,  72,  71,  71,  69,  69,  72,  73,  78,  79,  84,  85,  90,  93,  96, 101, 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134,
+             87,  83,  82,  79,  79,  78,  77,  75,  78,  80,  84,  85,  89,  90,  96,  97, 103, 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151, 152,
+        }, {
+             32,  31,  31,  30,  31,  33,  35,  37,  41,  42,  49,  49,  48,  48,  49,  49,  50,  51,  52,  54,  54,  57,  57,  60,  61,  63,  64,  64,  66,  67,  68,  68,
+             32,  33,  33,  33,  35,  37,  39,  41,  43,  43,  47,  47,  46,  46,  45,  46,  46,  47,  48,  49,  50,  52,  52,  54,  55,  57,  58,  58,  59,  60,  61,  62,
+             40,  41,  41,  42,  43,  44,  46,  47,  48,  48,  50,  50,  49,  49,  48,  49,  49,  49,  50,  51,  51,  52,  53,  55,  56,  57,  58,  58,  59,  59,  59,  59,
+             49,  47,  47,  45,  46,  46,  46,  46,  49,  49,  53,  53,  54,  55,  56,  57,  57,  58,  58,  59,  59,  60,  61,  62,  63,  64,  65,  65,  66,  66,  65,  65,
+             51,  49,  49,  47,  47,  47,  47,  46,  49,  50,  54,  54,  57,  58,  61,  62,  63,  64,  65,  67,  67,  69,  69,  71,  72,  73,  73,  74,  75,  74,  72,  71,
+             57,  54,  54,  52,  51,  51,  50,  50,  52,  53,  57,  57,  60,  61,  64,  65,  67,  69,  71,  73,  73,  76,  77,  79,  80,  82,  82,  83,  84,  82,  81,  79,
+             63,  60,  59,  57,  57,  56,  55,  54,  57,  57,  60,  61,  64,  65,  67,  69,  71,  73,  75,  77,  78,  82,  82,  85,  86,  89,  89,  90,  91,  91,  89,  87,
+             67,  63,  63,  60,  60,  59,  58,  57,  59,  60,  62,  63,  65,  66,  69,  70,  73,  74,  77,  78,  81,  83,  85,  87,  88,  92,  92,  94,  94,  96,  95,  95,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  32,  34,  34,  36,  36,  39,  39,  44,  44,  48,  48,  53,  53,  58,  58,  65,  65,  71,  71,  79,  79,  82,  82,  87,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  37,  37,  41,  41,  45,  45,  49,  49,  54,  54,  60,  60,  65,  65,  72,  72,  75,  75,  79,
+             32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  43,  43,  46,  46,  50,  50,  54,  54,  58,  58,  63,  63,  70,  70,  72,  72,  76,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,  50,  50,  53,  53,  56,  56,  60,  60,  63,  63,  68,  68,  73,  73,  79,  79,  81,  81,  84,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  48,  48,  54,  54,  58,  58,  63,  63,  67,  67,  71,  71,  75,  75,  79,  79,  84,  84,  90,  90,  92,  92,  96,
+             53,  51,  51,  49,  49,  50,  50,  49,  49,  54,  54,  60,  60,  65,  65,  71,  71,  76,  76,  82,  82,  87,  87,  92,  92,  97,  97, 104, 104, 106, 106, 109,
+             65,  62,  62,  59,  59,  59,  59,  58,  58,  63,  63,  68,  68,  73,  73,  79,  79,  85,  85,  92,  92,  98,  98, 105, 105, 111, 111, 118, 118, 121, 121, 124,
+             79,  75,  75,  72,  72,  71,  71,  69,  69,  73,  73,  78,  78,  84,  84,  90,  90,  96,  96, 103, 103, 110, 110, 118, 118, 125, 125, 133, 133, 136, 136, 141,
+        }, {
+             32,  31,  31,  30,  30,  33,  33,  37,  37,  42,  42,  49,  49,  48,  48,  49,  49,  50,  50,  52,  52,  54,  54,  57,  57,  60,  60,  63,  63,  64,  64,  66,
+             31,  31,  31,  32,  32,  36,  36,  40,  40,  43,  43,  46,  46,  46,  46,  45,  45,  46,  46,  48,  48,  50,  50,  52,  52,  54,  54,  57,  57,  59,  59,  60,
+             37,  38,  38,  40,  40,  43,  43,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  47,  47,  49,  49,  50,  50,  52,  52,  55,  55,  56,  56,  57,
+             48,  47,  47,  46,  46,  47,  47,  47,  47,  50,  50,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  55,  55,  56,  56,  58,  58,  60,  60,  61,  61,  63,
+             49,  47,  47,  45,  45,  46,  46,  45,  45,  49,  49,  53,  53,  56,  56,  58,  58,  59,  59,  61,  61,  62,  62,  64,  64,  65,  65,  67,  67,  68,  68,  69,
+             52,  50,  50,  48,  48,  47,  47,  47,  47,  50,  50,  54,  54,  57,  57,  61,  61,  64,  64,  66,  66,  68,  68,  70,  70,  72,  72,  75,  75,  75,  75,  77,
+             57,  54,  54,  52,  52,  51,  51,  50,  50,  53,  53,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  73,  73,  76,  76,  79,  79,  82,  82,  83,  83,  84,
+             63,  60,  60,  57,  57,  56,  56,  54,  54,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  75,  75,  78,  78,  82,  82,  85,  85,  89,  89,  90,  90,  92,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  34,  35,  36,  38,  39,  41,  44,  44,  48,  48,  53,  53,  57,  58,  61,  65,  67,  71,  72,  79,  79,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  36,  37,  39,  41,  42,  45,  45,  49,  49,  52,  54,  57,  60,  61,  65,  66,  72,  72,
+             32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  41,  43,  43,  46,  46,  49,  50,  52,  54,  56,  58,  60,  63,  64,  70,  70,
+             36,  35,  35,  35,  34,  35,  36,  37,  38,  39,  42,  42,  47,  48,  49,  50,  51,  53,  54,  56,  56,  59,  60,  62,  63,  66,  68,  69,  73,  73,  79,  79,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  43,  48,  48,  52,  54,  56,  58,  60,  63,  64,  67,  67,  71,  71,  74,  75,  77,  79,  81,  84,  85,  90,  90,
+             53,  51,  51,  50,  49,  49,  50,  49,  49,  50,  54,  54,  59,  60,  63,  65,  67,  71,  72,  76,  76,  81,  82,  85,  87,  89,  92,  94,  97,  98, 104, 104,
+             62,  60,  59,  58,  57,  57,  57,  56,  56,  56,  61,  61,  65,  66,  69,  71,  74,  78,  79,  83,  83,  89,  90,  94,  95,  98, 102, 103, 108, 108, 115, 115,
+             73,  70,  69,  67,  66,  66,  65,  65,  64,  64,  69,  69,  73,  74,  77,  79,  81,  85,  86,  91,  91,  98,  99, 103, 105, 108, 112, 114, 119, 119, 127, 127,
+        }, {
+             32,  31,  31,  30,  30,  32,  33,  34,  37,  37,  42,  42,  47,  49,  48,  48,  48,  49,  49,  50,  50,  52,  52,  53,  54,  55,  57,  58,  60,  60,  63,  63,
+             31,  31,  31,  32,  32,  34,  36,  37,  40,  40,  43,  43,  46,  46,  46,  46,  45,  45,  45,  46,  46,  48,  48,  49,  50,  51,  52,  53,  54,  55,  57,  57,
+             37,  38,  38,  39,  40,  41,  43,  44,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  46,  47,  47,  48,  49,  49,  50,  51,  52,  53,  55,  55,
+             48,  47,  47,  46,  46,  46,  47,  47,  47,  48,  50,  50,  52,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  55,  55,  56,  56,  57,  58,  59,  60,  60,
+             49,  47,  47,  46,  45,  45,  46,  45,  45,  46,  49,  49,  53,  53,  55,  56,  57,  58,  58,  59,  59,  61,  61,  62,  62,  63,  64,  64,  65,  65,  67,  67,
+             52,  50,  50,  48,  48,  48,  47,  47,  47,  47,  50,  50,  53,  54,  56,  57,  59,  61,  62,  64,  64,  66,  66,  68,  68,  69,  70,  71,  72,  73,  75,  75,
+             56,  54,  53,  52,  51,  51,  50,  50,  49,  49,  53,  53,  55,  56,  58,  59,  61,  63,  64,  66,  66,  69,  70,  71,  72,  74,  75,  76,  77,  78,  80,  80,
+             61,  58,  57,  56,  55,  54,  54,  53,  52,  53,  56,  56,  58,  59,  61,  62,  63,  66,  66,  69,  69,  72,  73,  75,  76,  78,  79,  80,  82,  83,  86,  86,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  33,  34,  34,  36,  36,  38,  39,  41,  44,  44,  47,  48,  50,  53,  53,  57,  58,  61,  65,  65,  70,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  37,  37,  39,  41,  41,  44,  45,  46,  49,  49,  53,  54,  56,  60,  60,  64,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,  40,  40,  41,  43,  43,  45,  46,  47,  50,  50,  53,  54,  56,  58,  58,  62,
+             35,  35,  35,  34,  34,  34,  35,  36,  36,  37,  37,  40,  41,  43,  46,  46,  47,  48,  49,  51,  51,  53,  54,  55,  57,  57,  60,  61,  63,  66,  66,  70,
+             39,  38,  38,  37,  37,  37,  38,  38,  39,  40,  40,  43,  44,  46,  50,  50,  52,  53,  54,  57,  57,  59,  60,  61,  64,  64,  67,  68,  69,  72,  72,  76,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  46,  48,  50,  54,  54,  57,  58,  60,  63,  63,  66,  67,  68,  71,  71,  74,  75,  77,  79,  79,  83,
+             53,  52,  51,  50,  49,  49,  49,  50,  49,  49,  49,  53,  54,  56,  60,  60,  64,  65,  67,  71,  71,  75,  76,  78,  82,  82,  86,  87,  89,  92,  92,  96,
+             65,  63,  62,  61,  59,  59,  59,  59,  58,  58,  58,  62,  63,  65,  68,  68,  72,  73,  76,  79,  79,  84,  85,  88,  92,  92,  97,  98, 100, 105, 105, 109,
+        }, {
+             32,  31,  31,  31,  30,  30,  33,  33,  35,  37,  37,  41,  42,  44,  49,  49,  48,  48,  48,  49,  49,  50,  50,  51,  52,  52,  54,  54,  55,  57,  57,  59,
+             31,  31,  31,  32,  32,  32,  35,  36,  37,  40,  40,  42,  43,  44,  46,  46,  46,  46,  45,  45,  45,  46,  46,  47,  48,  48,  49,  50,  51,  52,  52,  54,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  49,  49,  50,  50,  52,
+             45,  45,  45,  45,  44,  44,  46,  46,  46,  47,  47,  49,  49,  50,  52,  52,  51,  51,  51,  51,  51,  52,  52,  52,  53,  53,  54,  54,  54,  55,  55,  57,
+             48,  47,  47,  46,  45,  45,  46,  46,  46,  47,  47,  49,  50,  51,  53,  53,  54,  54,  54,  55,  55,  56,  56,  56,  57,  57,  58,  58,  58,  59,  59,  61,
+             49,  47,  47,  46,  45,  45,  45,  46,  45,  45,  45,  48,  49,  51,  53,  53,  55,  56,  57,  58,  58,  59,  59,  60,  61,  61,  62,  62,  63,  64,  64,  65,
+             52,  50,  50,  49,  48,  48,  47,  47,  47,  47,  47,  50,  50,  52,  54,  54,  57,  57,  59,  61,  61,  63,  64,  65,  66,  66,  68,  68,  69,  70,  70,  72,
+             57,  55,  54,  53,  52,  52,  51,  51,  51,  50,  50,  52,  53,  54,  57,  57,  59,  60,  61,  64,  64,  66,  67,  68,  71,  71,  73,  73,  74,  76,  76,  78,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  34,  34,  35,  36,  36,  38,  39,  39,  42,  44,  44,  47,  48,  49,  53,  53,  55,  58,  58,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  36,  37,  37,  40,  41,  41,  44,  45,  46,  49,  49,  51,  54,  54,
+             32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  35,  36,  36,  37,  37,  37,  39,  40,  40,  42,  42,  43,  45,  46,  47,  49,  50,  51,  54,  54,
+             32,  33,  33,  33,  33,  33,  33,  34,  34,  35,  36,  36,  36,  38,  38,  39,  40,  40,  41,  42,  42,  44,  45,  45,  47,  48,  48,  51,  51,  53,  55,  55,
+             36,  35,  35,  35,  35,  34,  35,  36,  36,  37,  38,  38,  40,  42,  42,  45,  48,  48,  49,  50,  50,  52,  53,  54,  56,  56,  57,  59,  60,  61,  63,  63,
+             44,  43,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  44,  48,  48,  50,  54,  54,  56,  58,  58,  61,  63,  63,  66,  67,  67,  71,  71,  72,  75,  75,
+             47,  46,  45,  45,  44,  44,  44,  45,  45,  45,  45,  45,  47,  50,  50,  53,  56,  56,  58,  60,  60,  64,  66,  66,  69,  70,  71,  74,  75,  76,  79,  79,
+             53,  52,  51,  51,  49,  49,  49,  49,  50,  49,  49,  49,  51,  54,  54,  57,  60,  60,  63,  65,  65,  69,  71,  72,  75,  76,  77,  81,  82,  83,  87,  87,
+        }, {
+             32,  31,  31,  31,  30,  30,  31,  33,  33,  34,  37,  37,  39,  42,  42,  45,  49,  49,  48,  48,  48,  49,  49,  49,  50,  50,  51,  52,  52,  53,  54,  54,
+             31,  31,  31,  31,  32,  32,  33,  35,  36,  37,  40,  40,  41,  43,  43,  44,  46,  46,  46,  46,  46,  45,  45,  45,  46,  46,  47,  48,  48,  48,  50,  50,
+             35,  36,  37,  37,  38,  38,  38,  41,  41,  42,  45,  45,  46,  46,  46,  47,  48,  48,  47,  46,  46,  46,  45,  46,  46,  46,  47,  47,  47,  48,  49,  49,
+             38,  39,  40,  40,  40,  41,  41,  43,  44,  45,  47,  47,  47,  48,  48,  48,  49,  49,  48,  48,  48,  47,  47,  47,  48,  48,  48,  48,  48,  49,  50,  50,
+             48,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  48,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  54,  54,  55,  55,
+             49,  48,  47,  47,  45,  45,  45,  45,  46,  45,  45,  45,  47,  49,  49,  51,  53,  53,  55,  56,  56,  57,  58,  58,  59,  59,  60,  61,  61,  61,  62,  62,
+             50,  49,  48,  48,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  52,  54,  54,  55,  56,  56,  58,  59,  60,  61,  61,  61,  63,  63,  63,  65,  65,
+             52,  50,  50,  50,  48,  48,  48,  47,  47,  47,  47,  47,  48,  50,  50,  52,  54,  54,  56,  57,  57,  60,  61,  61,  63,  64,  64,  66,  66,  67,  68,  68,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  34,  34,  34,  35,  36,  36,  38,  39,  39,  41,  44,  44,  44,  47,  48,  48,  51,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  34,  36,  37,  37,  39,  41,  41,  42,  44,  45,  45,  47,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  36,  36,  37,  39,  39,  40,  42,  42,  42,  44,  45,  45,  48,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  38,  38,  38,  40,  40,  40,  41,  43,  43,  43,  45,  46,  46,  48,
+             35,  35,  35,  35,  34,  34,  34,  34,  35,  36,  36,  37,  37,  37,  39,  41,  41,  42,  45,  46,  46,  47,  48,  48,  49,  51,  51,  51,  53,  54,  54,  56,
+             36,  35,  35,  35,  35,  34,  34,  35,  36,  36,  36,  37,  38,  38,  40,  42,  42,  43,  47,  48,  48,  49,  50,  50,  51,  53,  53,  54,  56,  56,  56,  58,
+             44,  43,  42,  42,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  44,  48,  48,  49,  52,  54,  54,  56,  58,  58,  60,  63,  63,  64,  66,  67,  67,  69,
+             47,  46,  45,  45,  45,  44,  44,  44,  44,  45,  45,  45,  45,  45,  47,  50,  50,  51,  55,  56,  56,  58,  60,  60,  62,  66,  66,  67,  69,  70,  70,  73,
+        }, {
+             32,  31,  31,  31,  31,  30,  30,  31,  33,  33,  33,  35,  37,  37,  39,  42,  42,  43,  47,  49,  49,  48,  48,  48,  48,  49,  49,  49,  50,  50,  50,  51,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,  43,  43,  46,  46,  46,  46,  46,  46,  45,  45,  45,  45,  46,  46,  46,  47,
+             33,  33,  34,  34,  34,  35,  35,  35,  37,  38,  38,  41,  43,  43,  43,  44,  44,  45,  46,  47,  47,  46,  46,  46,  46,  45,  45,  45,  46,  46,  46,  47,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  46,  46,  46,  46,  46,  46,  46,  47,
+             45,  45,  45,  45,  45,  44,  44,  45,  46,  46,  46,  47,  47,  47,  48,  49,  49,  50,  51,  52,  52,  52,  51,  51,  51,  51,  51,  52,  52,  52,  52,  52,
+             48,  47,  47,  47,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  50,  50,  50,  52,  53,  53,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,
+             49,  48,  47,  47,  46,  45,  45,  45,  45,  46,  46,  45,  45,  45,  47,  49,  49,  50,  53,  53,  53,  55,  56,  56,  57,  58,  58,  58,  59,  59,  59,  60,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  50,  53,  54,  54,  55,  56,  56,  57,  59,  59,  60,  61,  61,  61,  62,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  36,  36,  36,  37,  39,  39,  39,  41,  44,  44,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  36,  37,  37,  37,  39,  41,  41,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  36,  37,  37,  37,  39,  41,  41,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  49,  50,  50,  50,  52,  53,  53,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  49,  50,  50,  50,  52,  53,  53,
+             44,  43,  42,  42,  42,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  42,  45,  48,  48,  48,  50,  54,  54,  54,  56,  58,  58,  58,  60,  63,  63,
+        }, {
+             32,  31,  31,  31,  31,  31,  30,  30,  30,  32,  33,  33,  33,  35,  37,  37,  37,  39,  42,  42,  42,  45,  49,  49,  49,  48,  48,  48,  48,  48,  49,  49,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  36,  36,  36,  38,  40,  40,  40,  41,  43,  43,  43,  44,  46,  46,  46,  46,  46,  46,  46,  45,  45,  45,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  36,  36,  36,  38,  40,  40,  40,  41,  43,  43,  43,  44,  46,  46,  46,  46,  46,  46,  46,  45,  45,  45,
+             37,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  47,  46,  46,  46,
+             37,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  47,  46,  46,  46,
+             48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  49,  50,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  53,  53,
+             48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  49,  50,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  53,  53,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  46,  46,  46,  45,  45,  45,  45,  47,  49,  49,  49,  51,  53,  53,  53,  54,  56,  56,  56,  57,  58,  58,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,  36,  36,  36,  37,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  36,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  36,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  35,  35,  36,  36,  36,  36,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,
+             32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,
+             35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  34,  35,  35,  36,  36,  36,  36,  37,  37,  37,  37,  39,  40,  41,  41,  41,  43,  45,  46,  46,  46,  46,
+             36,  35,  35,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  41,  42,  42,  42,  44,  47,  48,  48,  48,  49,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  31,  33,  33,  33,  33,  35,  36,  37,  37,  37,  39,  41,  42,  42,  42,  44,  47,  49,  49,  49,  49,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  33,  34,  35,  35,  35,  37,  38,  39,  39,  39,  40,  42,  42,  42,  42,  44,  46,  47,  47,  47,  47,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,  37,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  46,  46,  46,  46,
+             33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  36,  37,  38,  38,  38,  40,  42,  43,  43,  43,  43,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,
+             37,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  48,  47,
+             37,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  48,  47,
+             45,  45,  45,  45,  45,  45,  45,  44,  44,  44,  44,  45,  46,  46,  46,  46,  46,  47,  47,  47,  47,  48,  49,  49,  49,  49,  50,  51,  52,  52,  52,  52,
+             48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,  53,  53,  53,  53,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  36,  36,  36,  36,  36,  36,  37,  38,  38,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,  31,  32,  33,  33,  33,  33,  33,  34,  35,  36,  37,  37,  37,  37,  39,  40,  42,  42,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  35,  35,  35,  35,  36,  37,  38,  39,  39,  39,  39,  40,  41,  42,  42,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  36,  37,  38,  38,  38,  38,  39,  41,  42,  43,  43,  43,  43,  43,  44,  44,  44,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             38,  39,  39,  40,  40,  40,  40,  40,  40,  40,  41,  41,  41,  41,  41,  42,  43,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,  47,  47,  47,  48,  48,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,  30,  30,  31,  31,  32,  33,  33,  33,  33,  33,  33,  33,  34,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  34,  34,  34,  34,  34,  34,  35,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  38,  38,  38,  38,  38,  39,
+             35,  35,  36,  36,  36,  37,  37,  37,  37,  37,  37,  37,  37,  37,  38,  38,  38,  38,  38,  38,  38,  38,  39,  40,  40,  41,  41,  41,  41,  41,  41,  42,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_32x16[][2][512] = {
+    {
+        {
+             32,  31,  31,  31,  32,  32,  34,  35,  36,  39,  44,  46,  48,  53,  58,  61,  65,  71,  79,  81,  82,  88,  91,  94,  97, 100, 103, 107, 110, 114, 118, 122,
+             31,  32,  32,  32,  32,  33,  34,  34,  34,  37,  41,  43,  45,  49,  54,  57,  60,  65,  72,  74,  75,  80,  83,  85,  88,  91,  94,  97, 101, 104, 108, 111,
+             32,  32,  33,  33,  34,  35,  37,  37,  38,  40,  43,  44,  46,  50,  54,  56,  58,  63,  70,  71,  72,  77,  80,  83,  86,  89,  93,  96, 100, 104, 107, 111,
+             34,  34,  33,  34,  35,  37,  39,  41,  43,  45,  48,  49,  51,  54,  58,  60,  63,  68,  74,  75,  76,  80,  81,  82,  85,  87,  90,  93,  97, 100, 103, 107,
+             36,  35,  34,  35,  36,  38,  42,  45,  48,  50,  53,  55,  56,  60,  63,  66,  68,  73,  79,  80,  81,  85,  88,  91,  94,  97,  98, 100, 101, 103, 105, 107,
+             44,  42,  41,  41,  42,  42,  48,  50,  54,  58,  63,  65,  67,  71,  75,  77,  79,  84,  90,  91,  92,  97, 100, 100, 100, 100, 101, 104, 108, 112, 115, 119,
+             53,  51,  49,  49,  50,  49,  54,  57,  60,  65,  71,  73,  76,  82,  87,  89,  92,  97, 104, 105, 106, 108, 106, 105, 107, 111, 114, 117, 117, 117, 118, 119,
+             59,  56,  54,  54,  54,  53,  58,  61,  64,  69,  75,  78,  80,  87,  92,  95,  98, 103, 110, 112, 113, 115, 114, 118, 123, 121, 120, 119, 123, 127, 131, 136,
+             65,  62,  59,  59,  59,  58,  63,  65,  68,  73,  79,  82,  85,  92,  98, 101, 105, 111, 118, 119, 121, 126, 130, 131, 128, 127, 131, 136, 138, 137, 136, 136,
+             79,  75,  72,  71,  71,  69,  73,  76,  78,  84,  90,  93,  96, 103, 110, 114, 118, 125, 133, 135, 136, 142, 142, 137, 140, 145, 144, 142, 141, 146, 151, 156,
+             87,  82,  78,  78,  77,  75,  79,  82,  84,  89,  95,  98, 102, 109, 116, 120, 124, 132, 141, 142, 144, 149, 148, 153, 157, 152, 150, 155, 161, 159, 157, 156,
+             90,  85,  82,  81,  80,  78,  78,  83,  87,  89,  93, 100, 102, 107, 115, 118, 123, 132, 136, 140, 151, 153, 155, 160, 161, 164, 170, 168, 165, 167, 172, 178,
+             93,  88,  86,  84,  82,  82,  80,  84,  86,  91,  94,  98, 105, 107, 112, 119, 122, 130, 135, 140, 149, 153, 162, 165, 167, 173, 174, 177, 183, 185, 182, 179,
+             96,  91,  90,  87,  86,  86,  83,  84,  89,  91,  95, 100, 102, 110, 111, 118, 123, 128, 135, 138, 149, 152, 160, 167, 173, 178, 180, 187, 188, 190, 197, 203,
+             99,  94,  93,  90,  89,  89,  88,  87,  90,  93,  97,  99, 105, 107, 115, 116, 124, 127, 135, 139, 146, 152, 159, 166, 171, 182, 186, 191, 193, 201, 203, 204,
+            102,  97,  97,  93,  93,  92,  92,  90,  90,  96,  97, 103, 104, 111, 112, 120, 121, 130, 131, 142, 143, 154, 155, 168, 169, 181, 183, 198, 200, 206, 208, 217,
+        }, {
+             32,  31,  30,  32,  33,  37,  42,  45,  49,  48,  49,  49,  50,  52,  54,  55,  57,  60,  63,  64,  64,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
+             31,  31,  32,  34,  36,  40,  43,  44,  46,  46,  45,  46,  46,  48,  50,  51,  52,  54,  57,  58,  59,  61,  62,  62,  63,  64,  65,  66,  67,  68,  69,  70,
+             37,  38,  40,  41,  43,  47,  47,  47,  48,  47,  46,  46,  46,  47,  49,  49,  50,  52,  55,  55,  56,  58,  59,  60,  62,  63,  64,  65,  67,  68,  69,  70,
+             42,  42,  42,  44,  45,  47,  48,  49,  50,  50,  49,  49,  50,  50,  52,  52,  53,  55,  58,  58,  58,  60,  60,  60,  60,  61,  62,  63,  64,  65,  66,  67,
+             48,  47,  46,  46,  47,  47,  50,  51,  53,  53,  53,  53,  54,  54,  55,  56,  56,  58,  60,  61,  61,  63,  64,  65,  66,  67,  66,  66,  66,  66,  67,  67,
+             49,  47,  45,  45,  46,  45,  49,  51,  53,  56,  58,  59,  59,  61,  62,  63,  64,  65,  67,  68,  68,  69,  71,  70,  69,  68,  68,  69,  70,  71,  72,  73,
+             52,  50,  48,  48,  47,  47,  50,  52,  54,  57,  61,  62,  64,  66,  68,  69,  70,  72,  75,  75,  75,  76,  74,  72,  73,  74,  75,  75,  74,  74,  73,  73,
+             54,  52,  50,  49,  49,  48,  52,  54,  55,  59,  62,  64,  65,  68,  71,  72,  73,  75,  78,  78,  79,  79,  78,  79,  81,  79,  78,  76,  77,  78,  80,  81,
+             57,  54,  52,  51,  51,  50,  53,  55,  57,  60,  64,  65,  67,  71,  73,  75,  76,  79,  82,  82,  83,  85,  86,  85,  83,  82,  83,  84,  84,  83,  82,  81,
+             63,  60,  57,  57,  56,  54,  57,  59,  60,  64,  67,  69,  71,  75,  78,  80,  82,  85,  89,  89,  90,  92,  91,  88,  89,  90,  89,  87,  86,  87,  88,  90,
+             66,  63,  60,  59,  59,  57,  60,  61,  62,  66,  69,  71,  73,  77,  80,  82,  84,  88,  92,  92,  93,  95,  94,  95,  96,  93,  92,  93,  94,  93,  91,  90,
+             67,  64,  62,  61,  60,  58,  58,  61,  63,  65,  67,  70,  72,  74,  78,  80,  82,  86,  88,  90,  95,  96,  96,  98,  97,  98, 100,  98,  96,  96,  97,  99,
+             68,  65,  63,  62,  60,  60,  59,  61,  62,  65,  66,  68,  72,  73,  76,  79,  80,  84,  87,  89,  93,  94,  98,  99,  99, 102, 101, 102, 103, 103, 101,  99,
+             69,  66,  65,  63,  62,  61,  60,  60,  63,  64,  66,  68,  70,  73,  74,  78,  80,  82,  85,  87,  91,  92,  96,  98, 101, 102, 103, 105, 105, 105, 107, 108,
+             71,  67,  66,  64,  63,  62,  62,  61,  62,  64,  66,  67,  70,  71,  75,  76,  79,  81,  84,  86,  89,  91,  94,  97,  98, 102, 104, 106, 106, 109, 109, 108,
+             72,  68,  68,  65,  65,  63,  63,  61,  62,  65,  65,  68,  69,  72,  73,  77,  77,  81,  81,  86,  87,  91,  91,  96,  97, 101, 102, 107, 107, 109, 110, 113,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  32,  34,  36,  38,  39,  44,  47,  49,  53,  58,  61,  65,  71,  76,  79,  82,  86,  89,  92,  95,  98, 101, 104, 107, 110, 114,
+             31,  32,  32,  32,  32,  33,  33,  34,  34,  36,  37,  41,  44,  46,  49,  54,  56,  60,  65,  69,  72,  75,  78,  81,  84,  86,  89,  92,  95,  98, 101, 104,
+             32,  32,  32,  33,  34,  35,  35,  36,  37,  39,  40,  42,  45,  47,  50,  54,  56,  59,  64,  68,  70,  73,  76,  79,  82,  85,  88,  91,  94,  97, 100, 104,
+             32,  33,  33,  33,  34,  36,  36,  38,  40,  41,  42,  45,  47,  48,  51,  55,  57,  60,  65,  69,  71,  74,  77,  78,  80,  83,  85,  88,  91,  94,  97, 100,
+             36,  35,  35,  35,  36,  38,  40,  42,  48,  49,  50,  53,  56,  57,  60,  63,  65,  68,  73,  76,  79,  81,  84,  87,  89,  92,  93,  94,  95,  96,  98, 100,
+             44,  42,  41,  41,  42,  42,  44,  48,  54,  56,  58,  63,  66,  67,  71,  75,  77,  79,  84,  88,  90,  92,  95,  95,  95,  95,  95,  98, 101, 105, 108, 111,
+             47,  45,  44,  44,  45,  45,  47,  50,  56,  58,  60,  66,  69,  71,  75,  79,  81,  84,  89,  92,  95,  97, 100,  99, 101, 105, 108, 110, 110, 110, 111, 111,
+             53,  51,  49,  49,  50,  49,  51,  54,  60,  63,  65,  71,  75,  77,  82,  87,  89,  92,  97, 101, 104, 106, 109, 112, 116, 114, 113, 112, 115, 119, 123, 126,
+             65,  62,  60,  59,  59,  58,  60,  63,  68,  71,  73,  79,  84,  86,  92,  98, 100, 105, 111, 115, 118, 121, 124, 124, 121, 120, 124, 128, 129, 128, 127, 127,
+             73,  69,  67,  66,  65,  64,  66,  69,  74,  77,  79,  85,  90,  93,  99, 105, 107, 112, 119, 123, 127, 130, 133, 130, 132, 136, 136, 133, 132, 136, 141, 145,
+             79,  75,  72,  71,  71,  69,  71,  73,  78,  81,  84,  90,  95,  97, 103, 110, 113, 118, 125, 130, 133, 136, 140, 145, 148, 143, 141, 146, 151, 149, 147, 145,
+             87,  83,  80,  79,  78,  76,  76,  80,  84,  86,  90,  96,  99, 103, 111, 114, 118, 126, 130, 134, 143, 146, 147, 152, 151, 155, 160, 158, 154, 156, 161, 166,
+             90,  86,  84,  82,  80,  80,  78,  82,  83,  88,  91,  94, 101, 103, 108, 114, 116, 124, 129, 134, 142, 145, 153, 156, 157, 163, 163, 166, 171, 173, 169, 166,
+             93,  88,  87,  84,  83,  83,  81,  81,  86,  88,  92,  96,  98, 105, 107, 113, 117, 122, 129, 131, 141, 144, 151, 157, 163, 167, 169, 175, 175, 177, 183, 189,
+             96,  91,  90,  87,  87,  86,  85,  84,  87,  90,  94,  96, 101, 102, 110, 111, 118, 121, 129, 132, 138, 144, 150, 156, 161, 171, 174, 179, 181, 188, 188, 190,
+             99,  94,  94,  90,  90,  88,  89,  86,  87,  93,  93,  99,  99, 106, 107, 115, 116, 124, 125, 135, 136, 145, 146, 158, 159, 170, 171, 185, 186, 192, 193, 201,
+        }, {
+             32,  31,  30,  31,  33,  37,  39,  42,  49,  48,  48,  49,  50,  51,  52,  54,  55,  57,  60,  62,  63,  64,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,
+             31,  31,  32,  33,  36,  40,  41,  43,  46,  46,  46,  45,  46,  47,  48,  50,  51,  52,  54,  56,  57,  59,  60,  61,  62,  63,  64,  65,  65,  66,  67,  68,
+             35,  37,  38,  38,  41,  45,  46,  46,  48,  47,  46,  45,  46,  47,  47,  49,  49,  50,  53,  54,  55,  56,  58,  59,  60,  61,  62,  64,  65,  66,  67,  68,
+             38,  40,  40,  41,  44,  47,  47,  48,  49,  48,  48,  47,  48,  48,  48,  50,  50,  51,  53,  55,  56,  57,  58,  58,  59,  60,  60,  61,  62,  63,  64,  65,
+             48,  47,  46,  46,  47,  47,  48,  50,  53,  53,  53,  53,  54,  54,  54,  55,  56,  56,  58,  60,  60,  61,  62,  63,  64,  65,  65,  65,  65,  65,  65,  65,
+             49,  47,  45,  45,  46,  45,  47,  49,  53,  55,  56,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  68,  67,  66,  66,  67,  68,  69,  70,  71,
+             50,  48,  46,  46,  46,  46,  47,  50,  54,  55,  56,  59,  61,  61,  63,  65,  65,  66,  68,  69,  70,  71,  72,  71,  71,  72,  73,  73,  72,  72,  71,  71,
+             52,  50,  48,  48,  47,  47,  48,  50,  54,  56,  57,  61,  63,  64,  66,  68,  69,  70,  72,  74,  75,  75,  76,  78,  79,  77,  76,  74,  75,  76,  77,  78,
+             57,  54,  52,  52,  51,  50,  51,  53,  57,  58,  60,  64,  66,  68,  71,  73,  74,  76,  79,  81,  82,  83,  84,  83,  81,  80,  81,  82,  82,  81,  79,  78,
+             61,  57,  55,  55,  54,  52,  54,  56,  59,  61,  62,  66,  68,  70,  73,  76,  77,  79,  82,  84,  86,  87,  88,  86,  86,  88,  87,  85,  83,  85,  86,  87,
+             63,  60,  58,  57,  56,  54,  55,  57,  60,  62,  64,  67,  70,  71,  75,  78,  79,  82,  85,  87,  89,  90,  91,  93,  94,  91,  89,  90,  92,  90,  89,  87,
+             67,  63,  61,  60,  59,  57,  57,  60,  63,  64,  66,  69,  71,  73,  77,  79,  81,  85,  87,  88,  92,  93,  94,  96,  95,  96,  97,  95,  93,  93,  94,  96,
+             68,  64,  63,  61,  60,  59,  58,  60,  61,  64,  65,  67,  71,  72,  75,  78,  79,  83,  85,  87,  91,  92,  95,  96,  97,  99,  98,  99, 100, 100,  98,  96,
+             69,  65,  64,  62,  61,  61,  59,  59,  62,  63,  65,  67,  68,  72,  73,  76,  78,  81,  84,  85,  89,  90,  93,  96,  98,  99, 100, 102, 102, 102, 103, 105,
+             70,  66,  65,  63,  63,  62,  61,  60,  61,  63,  65,  66,  69,  70,  74,  74,  78,  79,  82,  84,  87,  89,  91,  94,  96, 100, 101, 103, 103, 105, 105, 105,
+             71,  67,  67,  64,  64,  62,  62,  60,  61,  64,  64,  67,  67,  71,  71,  75,  75,  79,  80,  84,  84,  89,  89,  94,  94,  98,  99, 104, 104, 106, 106, 109,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  32,  32,  32,  34,  34,  36,  39,  40,  44,  47,  49,  53,  57,  59,  65,  69,  71,  79,  81,  82,  87,  90,  92,  95,  98, 100, 103, 106,
+             31,  32,  32,  32,  32,  32,  33,  34,  34,  34,  37,  38,  41,  44,  46,  49,  53,  54,  60,  63,  65,  72,  74,  75,  79,  82,  84,  87,  89,  92,  94,  97,
+             32,  32,  32,  32,  33,  34,  34,  35,  36,  37,  39,  40,  42,  45,  46,  50,  53,  54,  59,  62,  64,  71,  72,  73,  77,  80,  83,  85,  88,  91,  94,  97,
+             32,  32,  32,  33,  34,  34,  35,  37,  37,  38,  40,  41,  43,  46,  47,  50,  53,  54,  58,  62,  63,  70,  71,  72,  76,  78,  81,  83,  85,  88,  90,  93,
+             36,  35,  35,  34,  36,  37,  38,  42,  44,  48,  50,  51,  53,  56,  57,  60,  63,  64,  68,  71,  73,  79,  80,  81,  85,  87,  88,  88,  89,  90,  92,  93,
+             39,  38,  38,  37,  39,  40,  40,  45,  47,  51,  54,  55,  58,  61,  62,  65,  68,  69,  73,  76,  78,  84,  85,  86,  90,  89,  90,  92,  95,  98, 101, 104,
+             44,  42,  41,  41,  42,  42,  42,  48,  50,  54,  58,  59,  63,  66,  67,  71,  74,  75,  79,  83,  84,  90,  91,  92,  96,  99, 102, 103, 103, 103, 103, 104,
+             53,  51,  50,  49,  50,  49,  49,  54,  56,  60,  65,  67,  71,  75,  77,  82,  86,  87,  92,  96,  97, 104, 105, 106, 110, 108, 106, 105, 108, 111, 114, 118,
+             58,  55,  54,  53,  53,  53,  52,  57,  59,  63,  68,  70,  74,  79,  81,  86,  90,  91,  97, 100, 102, 109, 110, 111, 114, 113, 117, 120, 121, 120, 119, 118,
+             65,  62,  60,  59,  59,  58,  58,  63,  65,  68,  73,  75,  79,  85,  86,  92,  97,  98, 105, 109, 111, 118, 120, 121, 125, 129, 128, 125, 124, 127, 131, 135,
+             79,  75,  73,  72,  71,  70,  69,  73,  75,  78,  84,  85,  90,  95,  97, 103, 108, 111, 118, 122, 125, 133, 135, 136, 140, 135, 133, 137, 141, 139, 137, 135,
+             81,  77,  75,  74,  72,  71,  70,  75,  77,  80,  85,  87,  91,  97,  99, 105, 110, 112, 119, 124, 127, 135, 137, 139, 143, 146, 150, 148, 144, 146, 150, 154,
+             88,  83,  81,  79,  78,  77,  76,  79,  81,  85,  88,  91,  97,  99, 104, 109, 111, 119, 123, 127, 135, 137, 145, 147, 148, 153, 153, 155, 160, 161, 158, 155,
+             90,  86,  84,  82,  81,  80,  78,  79,  83,  85,  89,  92,  94, 101, 102, 108, 112, 117, 123, 125, 134, 136, 143, 148, 154, 157, 158, 164, 164, 165, 170, 175,
+             93,  88,  88,  84,  84,  83,  82,  81,  84,  86,  90,  92,  97,  98, 105, 106, 113, 115, 122, 125, 131, 136, 141, 147, 151, 160, 163, 168, 169, 175, 175, 176,
+             96,  91,  91,  87,  87,  85,  86,  83,  84,  89,  89,  95,  95, 102, 102, 110, 110, 118, 119, 128, 129, 137, 138, 149, 149, 159, 160, 173, 174, 179, 180, 187,
+        }, {
+             32,  31,  31,  30,  33,  35,  37,  42,  44,  49,  48,  48,  49,  50,  51,  52,  54,  54,  57,  59,  60,  63,  64,  64,  66,  67,  68,  69,  70,  71,  72,  73,
+             31,  31,  32,  32,  36,  38,  40,  43,  44,  46,  46,  45,  45,  46,  47,  48,  49,  50,  52,  54,  54,  57,  58,  59,  60,  61,  62,  63,  64,  65,  65,  66,
+             34,  35,  36,  36,  40,  42,  44,  45,  46,  47,  46,  46,  45,  46,  47,  47,  49,  49,  51,  52,  53,  56,  57,  57,  59,  60,  61,  62,  63,  64,  65,  66,
+             37,  38,  39,  40,  43,  45,  47,  47,  47,  48,  47,  46,  46,  46,  47,  47,  48,  49,  50,  52,  52,  55,  55,  56,  57,  58,  59,  60,  60,  61,  62,  63,
+             48,  47,  46,  46,  47,  47,  47,  50,  51,  53,  53,  53,  53,  54,  54,  54,  55,  55,  56,  58,  58,  60,  61,  61,  63,  63,  63,  63,  63,  63,  63,  63,
+             48,  47,  46,  45,  46,  46,  46,  50,  51,  53,  54,  55,  56,  56,  57,  57,  58,  59,  60,  61,  62,  64,  64,  65,  66,  65,  64,  65,  66,  67,  68,  69,
+             49,  47,  46,  45,  46,  45,  45,  49,  51,  53,  56,  56,  58,  59,  60,  61,  62,  62,  64,  65,  65,  67,  68,  68,  69,  70,  71,  71,  70,  70,  69,  69,
+             52,  50,  48,  48,  47,  47,  47,  50,  52,  54,  57,  58,  61,  63,  64,  66,  68,  68,  70,  72,  72,  75,  75,  75,  77,  75,  74,  72,  73,  74,  75,  76,
+             54,  51,  50,  49,  49,  48,  48,  51,  53,  55,  58,  59,  62,  65,  65,  68,  70,  70,  73,  74,  75,  77,  78,  78,  79,  78,  79,  80,  80,  78,  77,  76,
+             57,  54,  53,  52,  51,  50,  50,  53,  54,  57,  60,  61,  64,  66,  68,  71,  73,  74,  76,  78,  79,  82,  82,  83,  84,  85,  84,  82,  81,  82,  83,  84,
+             63,  60,  58,  57,  56,  55,  54,  57,  59,  60,  64,  65,  67,  70,  71,  75,  77,  78,  82,  84,  85,  89,  89,  90,  91,  88,  87,  88,  89,  88,  86,  84,
+             64,  61,  59,  58,  57,  56,  55,  58,  59,  61,  64,  65,  68,  71,  72,  75,  78,  79,  82,  85,  86,  90,  90,  91,  93,  93,  94,  93,  90,  90,  92,  93,
+             67,  63,  62,  60,  59,  58,  57,  59,  60,  63,  64,  66,  70,  70,  73,  76,  77,  81,  83,  85,  89,  90,  93,  94,  94,  96,  96,  96,  97,  97,  95,  93,
+             68,  64,  63,  61,  60,  60,  58,  58,  61,  62,  64,  66,  67,  71,  71,  75,  77,  79,  82,  83,  87,  88,  91,  93,  95,  97,  97,  99,  99,  99, 100, 101,
+             69,  65,  65,  62,  62,  61,  60,  59,  61,  62,  64,  65,  68,  68,  72,  73,  76,  77,  81,  82,  85,  87,  89,  92,  93,  97,  98, 100, 100, 102, 102, 101,
+             69,  66,  66,  63,  63,  61,  61,  59,  60,  63,  63,  66,  66,  70,  70,  73,  74,  78,  78,  82,  82,  86,  87,  91,  91,  95,  96, 101, 101, 103, 103, 105,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  34,  35,  36,  39,  41,  44,  47,  48,  53,  55,  58,  63,  65,  71,  74,  79,  82,  82,  87,  89,  92,  94,  97,  99,
+             31,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  37,  39,  41,  44,  45,  49,  51,  54,  58,  60,  65,  68,  72,  75,  75,  79,  82,  84,  86,  88,  91,
+             31,  32,  32,  32,  33,  33,  34,  34,  35,  36,  36,  39,  40,  42,  44,  45,  50,  51,  54,  58,  59,  64,  67,  71,  73,  74,  78,  81,  83,  85,  88,  91,
+             32,  32,  32,  33,  34,  34,  35,  36,  37,  38,  38,  40,  41,  43,  45,  46,  50,  51,  54,  57,  58,  63,  66,  70,  72,  72,  76,  78,  80,  82,  85,  87,
+             35,  35,  34,  34,  35,  36,  37,  39,  41,  45,  46,  48,  49,  51,  53,  54,  57,  59,  61,  65,  66,  71,  73,  77,  79,  79,  83,  83,  84,  85,  86,  87,
+             36,  35,  35,  34,  36,  36,  38,  40,  42,  47,  48,  50,  51,  53,  56,  56,  60,  61,  63,  67,  68,  73,  75,  79,  81,  81,  85,  87,  89,  92,  94,  97,
+             44,  42,  41,  41,  42,  42,  42,  44,  48,  52,  54,  58,  60,  63,  66,  67,  71,  72,  75,  78,  79,  84,  86,  90,  92,  92,  96,  97,  97,  97,  97,  97,
+             47,  45,  45,  44,  44,  45,  45,  47,  50,  55,  56,  60,  62,  66,  69,  70,  75,  77,  79,  83,  84,  89,  91,  95,  97,  97, 100,  99, 101, 104, 107, 110,
+             53,  51,  50,  49,  49,  50,  49,  51,  54,  59,  60,  65,  67,  71,  75,  76,  82,  84,  87,  91,  92,  97, 100, 104, 105, 106, 110, 113, 114, 112, 111, 110,
+             62,  59,  58,  57,  57,  57,  56,  58,  61,  65,  66,  71,  74,  78,  82,  83,  90,  92,  95, 100, 102, 108, 110, 115, 117, 117, 120, 118, 116, 119, 123, 126,
+             65,  62,  61,  59,  59,  59,  58,  60,  63,  67,  68,  73,  76,  79,  84,  85,  92,  94,  98, 103, 105, 111, 113, 118, 120, 121, 125, 128, 132, 130, 128, 126,
+             79,  75,  74,  72,  71,  71,  69,  71,  73,  77,  78,  84,  86,  90,  95,  96, 103, 106, 110, 116, 118, 125, 128, 133, 136, 136, 141, 139, 135, 136, 140, 144,
+             82,  78,  76,  74,  73,  73,  71,  73,  76,  79,  80,  86,  88,  92,  97,  98, 106, 108, 112, 118, 120, 127, 131, 136, 139, 139, 144, 145, 150, 151, 147, 144,
+             88,  83,  82,  79,  79,  78,  76,  76,  81,  82,  85,  89,  91,  97,  98, 104, 107, 111, 117, 119, 127, 129, 135, 140, 145, 148, 148, 153, 153, 154, 159, 163,
+             90,  86,  85,  82,  81,  80,  79,  78,  81,  83,  87,  88,  93,  94, 101, 101, 108, 110, 116, 119, 124, 129, 134, 139, 142, 150, 153, 157, 157, 163, 163, 163,
+             93,  88,  88,  84,  84,  82,  83,  80,  80,  86,  86,  91,  91,  97,  98, 105, 105, 112, 113, 121, 122, 130, 130, 140, 140, 149, 150, 161, 162, 166, 167, 173,
+        }, {
+             32,  31,  31,  30,  33,  33,  37,  39,  42,  47,  49,  48,  48,  49,  50,  50,  52,  53,  54,  56,  57,  60,  61,  63,  64,  64,  66,  67,  68,  69,  70,  70,
+             31,  31,  32,  32,  35,  36,  40,  41,  43,  46,  46,  46,  45,  45,  46,  46,  48,  49,  50,  51,  52,  54,  56,  57,  58,  59,  60,  61,  62,  63,  63,  64,
+             33,  34,  34,  35,  37,  38,  43,  43,  44,  46,  47,  46,  46,  45,  46,  46,  47,  48,  49,  51,  51,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
+             37,  38,  39,  40,  42,  43,  47,  47,  47,  48,  48,  47,  46,  46,  46,  46,  47,  48,  49,  50,  50,  52,  53,  55,  56,  56,  57,  58,  59,  59,  60,  61,
+             45,  45,  45,  44,  46,  46,  47,  48,  49,  51,  52,  51,  51,  51,  52,  52,  53,  53,  54,  55,  55,  57,  58,  59,  60,  60,  61,  61,  61,  61,  61,  61,
+             48,  47,  46,  46,  47,  47,  47,  48,  50,  52,  53,  53,  53,  53,  54,  54,  54,  55,  55,  56,  56,  58,  59,  60,  61,  61,  63,  63,  64,  65,  66,  67,
+             49,  47,  46,  45,  45,  46,  45,  47,  49,  53,  53,  56,  57,  58,  59,  59,  61,  61,  62,  63,  64,  65,  66,  67,  68,  68,  69,  69,  68,  68,  67,  67,
+             50,  48,  47,  46,  46,  46,  46,  47,  50,  53,  54,  56,  57,  59,  61,  61,  63,  64,  65,  66,  66,  68,  69,  70,  71,  71,  72,  70,  71,  72,  73,  74,
+             52,  50,  49,  48,  47,  47,  47,  48,  50,  53,  54,  57,  59,  61,  63,  64,  66,  67,  68,  70,  70,  72,  73,  75,  75,  75,  77,  78,  77,  76,  75,  74,
+             56,  53,  52,  51,  50,  50,  49,  50,  53,  55,  56,  59,  61,  63,  65,  66,  70,  71,  72,  74,  75,  77,  79,  80,  81,  81,  82,  80,  79,  80,  81,  82,
+             57,  54,  53,  52,  51,  51,  50,  51,  53,  56,  57,  60,  61,  64,  66,  67,  71,  72,  73,  76,  76,  79,  80,  82,  83,  83,  84,  85,  86,  85,  83,  82,
+             63,  60,  59,  57,  56,  56,  54,  55,  57,  60,  60,  64,  65,  67,  70,  71,  75,  76,  78,  81,  82,  85,  86,  89,  90,  90,  92,  90,  88,  88,  89,  90,
+             64,  61,  60,  58,  57,  57,  55,  56,  58,  61,  61,  64,  66,  68,  71,  71,  75,  77,  79,  82,  83,  86,  87,  90,  91,  91,  93,  93,  94,  94,  92,  90,
+             67,  63,  62,  60,  60,  59,  57,  57,  60,  61,  63,  65,  66,  70,  70,  73,  75,  77,  80,  81,  85,  86,  89,  91,  93,  94,  94,  96,  96,  95,  97,  98,
+             68,  64,  64,  61,  61,  60,  59,  58,  60,  61,  63,  64,  67,  67,  71,  71,  74,  75,  79,  80,  83,  85,  87,  89,  91,  94,  95,  97,  97,  99,  98,  98,
+             68,  65,  65,  62,  62,  60,  61,  59,  59,  62,  62,  65,  65,  68,  68,  72,  72,  76,  76,  80,  80,  84,  84,  89,  89,  93,  93,  97,  98,  99,  99, 102,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  33,  34,  36,  36,  39,  40,  44,  46,  48,  52,  53,  58,  58,  65,  66,  71,  74,  79,  81,  82,  86,  88,  91,  93,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  37,  38,  41,  43,  45,  48,  49,  53,  54,  60,  61,  65,  68,  72,  74,  75,  78,  81,  83,  85,
+             31,  32,  32,  32,  32,  33,  33,  33,  34,  34,  35,  35,  38,  39,  41,  43,  45,  48,  49,  53,  54,  59,  60,  65,  67,  72,  73,  74,  78,  80,  82,  85,
+             32,  32,  32,  33,  33,  34,  35,  35,  36,  37,  38,  38,  40,  41,  43,  44,  46,  49,  50,  53,  54,  58,  59,  63,  66,  70,  71,  72,  75,  77,  79,  81,
+             33,  33,  33,  33,  34,  35,  36,  36,  38,  39,  42,  42,  44,  45,  46,  48,  50,  52,  53,  57,  57,  62,  63,  67,  69,  73,  75,  75,  78,  80,  80,  81,
+             36,  35,  35,  34,  35,  36,  37,  38,  41,  42,  48,  48,  50,  51,  53,  55,  56,  59,  60,  63,  63,  68,  69,  73,  75,  79,  80,  81,  84,  86,  88,  90,
+             40,  39,  39,  38,  38,  39,  40,  41,  44,  45,  51,  51,  54,  56,  59,  60,  62,  65,  66,  69,  70,  74,  75,  79,  81,  85,  86,  87,  90,  90,  90,  90,
+             44,  42,  42,  41,  41,  42,  42,  42,  46,  48,  54,  54,  58,  59,  63,  65,  67,  70,  71,  74,  75,  79,  80,  84,  86,  90,  91,  92,  95,  98, 100, 102,
+             51,  49,  49,  47,  47,  48,  48,  48,  52,  53,  58,  59,  63,  65,  69,  72,  74,  78,  79,  83,  84,  89,  90,  94,  97, 101, 102, 103, 106, 105, 103, 103,
+             53,  51,  51,  49,  49,  50,  49,  49,  53,  54,  60,  60,  65,  67,  71,  73,  76,  80,  82,  86,  87,  92,  93,  97, 100, 104, 105, 106, 109, 112, 114, 117,
+             65,  62,  61,  59,  59,  59,  58,  58,  62,  63,  68,  68,  73,  75,  79,  82,  85,  90,  92,  97,  98, 105, 106, 111, 113, 118, 120, 121, 124, 122, 119, 117,
+             66,  63,  62,  60,  60,  60,  59,  59,  63,  64,  69,  69,  74,  76,  80,  83,  86,  91,  93,  98,  99, 106, 107, 112, 115, 119, 121, 122, 125, 127, 130, 134,
+             79,  75,  74,  72,  71,  71,  69,  69,  72,  73,  78,  79,  84,  85,  90,  93,  96, 101, 103, 109, 110, 118, 119, 125, 128, 133, 135, 136, 140, 140, 137, 134,
+             81,  77,  76,  74,  73,  72,  71,  70,  74,  75,  80,  80,  85,  87,  91,  94,  98, 103, 105, 111, 112, 119, 121, 127, 130, 135, 137, 139, 142, 144, 148, 151,
+             87,  83,  82,  79,  79,  78,  77,  75,  78,  80,  84,  85,  89,  90,  96,  97, 103, 105, 111, 113, 118, 122, 126, 131, 134, 141, 143, 147, 147, 152, 151, 152,
+             90,  85,  85,  81,  81,  80,  80,  77,  78,  83,  83,  87,  88,  93,  93, 100, 100, 107, 107, 115, 115, 123, 123, 132, 132, 140, 140, 151, 151, 155, 155, 160,
+        }, {
+             32,  31,  31,  30,  31,  33,  35,  37,  41,  42,  49,  49,  48,  48,  49,  49,  50,  51,  52,  54,  54,  57,  57,  60,  61,  63,  64,  64,  66,  67,  68,  68,
+             31,  31,  31,  32,  33,  36,  38,  40,  42,  43,  46,  46,  46,  45,  45,  46,  46,  47,  48,  50,  50,  52,  52,  54,  56,  57,  58,  59,  60,  61,  62,  62,
+             32,  33,  33,  33,  35,  37,  39,  41,  43,  43,  47,  47,  46,  46,  45,  46,  46,  47,  48,  49,  50,  52,  52,  54,  55,  57,  58,  58,  59,  60,  61,  62,
+             37,  38,  38,  40,  41,  43,  45,  47,  47,  47,  48,  48,  47,  46,  46,  46,  46,  47,  47,  48,  49,  50,  51,  52,  53,  55,  55,  56,  57,  58,  58,  59,
+             40,  41,  41,  42,  43,  44,  46,  47,  48,  48,  50,  50,  49,  49,  48,  49,  49,  49,  50,  51,  51,  52,  53,  55,  56,  57,  58,  58,  59,  59,  59,  59,
+             48,  47,  47,  46,  46,  47,  47,  47,  49,  50,  53,  53,  53,  53,  53,  53,  54,  54,  54,  55,  55,  56,  57,  58,  59,  60,  61,  61,  62,  63,  64,  65,
+             49,  47,  47,  45,  46,  46,  46,  46,  49,  49,  53,  53,  54,  55,  56,  57,  57,  58,  58,  59,  59,  60,  61,  62,  63,  64,  65,  65,  66,  66,  65,  65,
+             49,  47,  47,  45,  45,  46,  45,  45,  48,  49,  53,  54,  56,  56,  58,  59,  59,  61,  61,  62,  62,  64,  64,  65,  66,  67,  68,  68,  69,  70,  71,  71,
+             51,  49,  49,  47,  47,  47,  47,  46,  49,  50,  54,  54,  57,  58,  61,  62,  63,  64,  65,  67,  67,  69,  69,  71,  72,  73,  73,  74,  75,  74,  72,  71,
+             52,  50,  49,  48,  48,  47,  47,  47,  50,  50,  54,  55,  57,  58,  61,  62,  64,  66,  66,  68,  68,  70,  71,  72,  73,  75,  75,  75,  76,  77,  78,  79,
+             57,  54,  54,  52,  51,  51,  50,  50,  52,  53,  57,  57,  60,  61,  64,  65,  67,  69,  71,  73,  73,  76,  77,  79,  80,  82,  82,  83,  84,  82,  81,  79,
+             58,  55,  54,  52,  52,  52,  51,  50,  53,  54,  57,  57,  60,  61,  64,  66,  67,  70,  71,  73,  74,  77,  77,  79,  81,  82,  83,  83,  85,  85,  86,  87,
+             63,  60,  59,  57,  57,  56,  55,  54,  57,  57,  60,  61,  64,  65,  67,  69,  71,  73,  75,  77,  78,  82,  82,  85,  86,  89,  89,  90,  91,  91,  89,  87,
+             64,  61,  60,  58,  57,  57,  56,  55,  57,  58,  61,  61,  64,  65,  68,  69,  71,  74,  75,  78,  78,  82,  83,  86,  87,  90,  90,  91,  92,  93,  94,  95,
+             67,  63,  63,  60,  60,  59,  58,  57,  59,  60,  62,  63,  65,  66,  69,  70,  73,  74,  77,  78,  81,  83,  85,  87,  88,  92,  92,  94,  94,  96,  95,  95,
+             67,  64,  64,  61,  61,  60,  60,  58,  58,  61,  61,  64,  64,  67,  67,  70,  71,  74,  74,  78,  78,  82,  82,  86,  86,  90,  90,  95,  95,  96,  96,  98,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  32,  32,  32,  32,  34,  34,  36,  36,  39,  39,  44,  44,  48,  48,  53,  53,  58,  58,  65,  65,  71,  71,  79,  79,  82,  82,  87,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  37,  37,  41,  41,  45,  45,  49,  49,  54,  54,  60,  60,  65,  65,  72,  72,  75,  75,  79,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  37,  37,  41,  41,  45,  45,  49,  49,  54,  54,  60,  60,  65,  65,  72,  72,  75,  75,  79,
+             32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  43,  43,  46,  46,  50,  50,  54,  54,  58,  58,  63,  63,  70,  70,  72,  72,  76,
+             32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  43,  43,  46,  46,  50,  50,  54,  54,  58,  58,  63,  63,  70,  70,  72,  72,  76,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,  50,  50,  53,  53,  56,  56,  60,  60,  63,  63,  68,  68,  73,  73,  79,  79,  81,  81,  84,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,  50,  50,  53,  53,  56,  56,  60,  60,  63,  63,  68,  68,  73,  73,  79,  79,  81,  81,  84,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  48,  48,  54,  54,  58,  58,  63,  63,  67,  67,  71,  71,  75,  75,  79,  79,  84,  84,  90,  90,  92,  92,  96,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  48,  48,  54,  54,  58,  58,  63,  63,  67,  67,  71,  71,  75,  75,  79,  79,  84,  84,  90,  90,  92,  92,  96,
+             53,  51,  51,  49,  49,  50,  50,  49,  49,  54,  54,  60,  60,  65,  65,  71,  71,  76,  76,  82,  82,  87,  87,  92,  92,  97,  97, 104, 104, 106, 106, 109,
+             53,  51,  51,  49,  49,  50,  50,  49,  49,  54,  54,  60,  60,  65,  65,  71,  71,  76,  76,  82,  82,  87,  87,  92,  92,  97,  97, 104, 104, 106, 106, 109,
+             65,  62,  62,  59,  59,  59,  59,  58,  58,  63,  63,  68,  68,  73,  73,  79,  79,  85,  85,  92,  92,  98,  98, 105, 105, 111, 111, 118, 118, 121, 121, 124,
+             65,  62,  62,  59,  59,  59,  59,  58,  58,  63,  63,  68,  68,  73,  73,  79,  79,  85,  85,  92,  92,  98,  98, 105, 105, 111, 111, 118, 118, 121, 121, 124,
+             79,  75,  75,  72,  72,  71,  71,  69,  69,  73,  73,  78,  78,  84,  84,  90,  90,  96,  96, 103, 103, 110, 110, 118, 118, 125, 125, 133, 133, 136, 136, 141,
+             79,  75,  75,  72,  72,  71,  71,  69,  69,  73,  73,  78,  78,  84,  84,  90,  90,  96,  96, 103, 103, 110, 110, 118, 118, 125, 125, 133, 133, 136, 136, 141,
+             87,  82,  82,  78,  78,  77,  77,  75,  75,  79,  79,  84,  84,  89,  89,  95,  95, 102, 102, 109, 109, 116, 116, 124, 124, 132, 132, 141, 141, 144, 144, 149,
+        }, {
+             32,  31,  31,  30,  30,  33,  33,  37,  37,  42,  42,  49,  49,  48,  48,  49,  49,  50,  50,  52,  52,  54,  54,  57,  57,  60,  60,  63,  63,  64,  64,  66,
+             31,  31,  31,  32,  32,  36,  36,  40,  40,  43,  43,  46,  46,  46,  46,  45,  45,  46,  46,  48,  48,  50,  50,  52,  52,  54,  54,  57,  57,  59,  59,  60,
+             31,  31,  31,  32,  32,  36,  36,  40,  40,  43,  43,  46,  46,  46,  46,  45,  45,  46,  46,  48,  48,  50,  50,  52,  52,  54,  54,  57,  57,  59,  59,  60,
+             37,  38,  38,  40,  40,  43,  43,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  47,  47,  49,  49,  50,  50,  52,  52,  55,  55,  56,  56,  57,
+             37,  38,  38,  40,  40,  43,  43,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  47,  47,  49,  49,  50,  50,  52,  52,  55,  55,  56,  56,  57,
+             48,  47,  47,  46,  46,  47,  47,  47,  47,  50,  50,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  55,  55,  56,  56,  58,  58,  60,  60,  61,  61,  63,
+             48,  47,  47,  46,  46,  47,  47,  47,  47,  50,  50,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  55,  55,  56,  56,  58,  58,  60,  60,  61,  61,  63,
+             49,  47,  47,  45,  45,  46,  46,  45,  45,  49,  49,  53,  53,  56,  56,  58,  58,  59,  59,  61,  61,  62,  62,  64,  64,  65,  65,  67,  67,  68,  68,  69,
+             49,  47,  47,  45,  45,  46,  46,  45,  45,  49,  49,  53,  53,  56,  56,  58,  58,  59,  59,  61,  61,  62,  62,  64,  64,  65,  65,  67,  67,  68,  68,  69,
+             52,  50,  50,  48,  48,  47,  47,  47,  47,  50,  50,  54,  54,  57,  57,  61,  61,  64,  64,  66,  66,  68,  68,  70,  70,  72,  72,  75,  75,  75,  75,  77,
+             52,  50,  50,  48,  48,  47,  47,  47,  47,  50,  50,  54,  54,  57,  57,  61,  61,  64,  64,  66,  66,  68,  68,  70,  70,  72,  72,  75,  75,  75,  75,  77,
+             57,  54,  54,  52,  52,  51,  51,  50,  50,  53,  53,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  73,  73,  76,  76,  79,  79,  82,  82,  83,  83,  84,
+             57,  54,  54,  52,  52,  51,  51,  50,  50,  53,  53,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  73,  73,  76,  76,  79,  79,  82,  82,  83,  83,  84,
+             63,  60,  60,  57,  57,  56,  56,  54,  54,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  75,  75,  78,  78,  82,  82,  85,  85,  89,  89,  90,  90,  92,
+             63,  60,  60,  57,  57,  56,  56,  54,  54,  57,  57,  60,  60,  64,  64,  67,  67,  71,  71,  75,  75,  78,  78,  82,  82,  85,  85,  89,  89,  90,  90,  92,
+             66,  63,  63,  60,  60,  59,  59,  57,  57,  60,  60,  62,  62,  66,  66,  69,  69,  73,  73,  77,  77,  80,  80,  84,  84,  88,  88,  92,  92,  93,  93,  95,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  34,  35,  36,  38,  39,  41,  44,  44,  48,  48,  53,  53,  57,  58,  61,  65,  67,  71,  72,  79,  79,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  36,  38,  39,  41,  42,  45,  45,  49,  50,  53,  54,  57,  60,  62,  66,  66,  73,  73,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  36,  37,  39,  41,  42,  45,  45,  49,  49,  52,  54,  57,  60,  61,  65,  66,  72,  72,
+             32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  36,  36,  37,  37,  38,  40,  41,  42,  43,  46,  46,  49,  50,  52,  54,  56,  59,  60,  64,  64,  71,  71,
+             32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  37,  37,  38,  38,  40,  40,  41,  43,  43,  46,  46,  49,  50,  52,  54,  56,  58,  60,  63,  64,  70,  70,
+             34,  34,  34,  33,  33,  34,  35,  35,  37,  37,  39,  39,  42,  43,  44,  45,  46,  48,  48,  51,  51,  54,  54,  57,  58,  60,  63,  64,  68,  68,  74,  74,
+             36,  35,  35,  35,  34,  35,  36,  37,  38,  39,  42,  42,  47,  48,  49,  50,  51,  53,  54,  56,  56,  59,  60,  62,  63,  66,  68,  69,  73,  73,  79,  79,
+             38,  37,  37,  36,  36,  37,  38,  38,  39,  40,  44,  44,  48,  49,  51,  52,  54,  56,  56,  59,  59,  62,  63,  65,  67,  69,  71,  72,  76,  76,  82,  82,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  43,  48,  48,  52,  54,  56,  58,  60,  63,  64,  67,  67,  71,  71,  74,  75,  77,  79,  81,  84,  85,  90,  90,
+             44,  43,  43,  42,  41,  42,  43,  43,  43,  44,  48,  48,  53,  54,  57,  58,  60,  64,  64,  67,  67,  71,  72,  75,  76,  78,  80,  82,  85,  86,  91,  91,
+             53,  51,  51,  50,  49,  49,  50,  49,  49,  50,  54,  54,  59,  60,  63,  65,  67,  71,  72,  76,  76,  81,  82,  85,  87,  89,  92,  94,  97,  98, 104, 104,
+             53,  51,  51,  50,  49,  49,  50,  49,  49,  50,  54,  54,  59,  60,  63,  65,  67,  71,  72,  76,  76,  81,  82,  85,  87,  89,  92,  94,  97,  98, 104, 104,
+             62,  60,  59,  58,  57,  57,  57,  56,  56,  56,  61,  61,  65,  66,  69,  71,  74,  78,  79,  83,  83,  89,  90,  94,  95,  98, 102, 103, 108, 108, 115, 115,
+             65,  62,  62,  60,  59,  59,  59,  59,  58,  58,  63,  63,  67,  68,  71,  73,  76,  79,  81,  85,  85,  91,  92,  96,  98, 101, 105, 106, 111, 111, 118, 118,
+             73,  70,  69,  67,  66,  66,  65,  65,  64,  64,  69,  69,  73,  74,  77,  79,  81,  85,  86,  91,  91,  98,  99, 103, 105, 108, 112, 114, 119, 119, 127, 127,
+             79,  75,  75,  73,  72,  71,  71,  70,  69,  69,  73,  73,  77,  78,  81,  84,  86,  90,  91,  96,  96, 103, 103, 108, 110, 114, 118, 120, 125, 125, 133, 133,
+        }, {
+             32,  31,  31,  30,  30,  32,  33,  34,  37,  37,  42,  42,  47,  49,  48,  48,  48,  49,  49,  50,  50,  52,  52,  53,  54,  55,  57,  58,  60,  60,  63,  63,
+             31,  31,  31,  32,  32,  33,  35,  37,  40,  40,  43,  43,  46,  47,  46,  46,  46,  45,  46,  47,  47,  48,  48,  50,  50,  51,  52,  53,  55,  55,  58,  58,
+             31,  31,  31,  32,  32,  34,  36,  37,  40,  40,  43,  43,  46,  46,  46,  46,  45,  45,  45,  46,  46,  48,  48,  49,  50,  51,  52,  53,  54,  55,  57,  57,
+             35,  36,  36,  37,  37,  39,  40,  42,  45,  45,  46,  46,  47,  47,  47,  46,  46,  45,  46,  46,  46,  47,  47,  48,  49,  50,  51,  51,  53,  53,  56,  56,
+             37,  38,  38,  39,  40,  41,  43,  44,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  46,  47,  47,  48,  49,  49,  50,  51,  52,  53,  55,  55,
+             42,  42,  42,  42,  42,  44,  45,  45,  47,  47,  48,  48,  50,  50,  50,  50,  49,  49,  49,  50,  50,  50,  50,  51,  52,  52,  53,  54,  55,  55,  58,  58,
+             48,  47,  47,  46,  46,  46,  47,  47,  47,  48,  50,  50,  52,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  55,  55,  56,  56,  57,  58,  59,  60,  60,
+             48,  47,  47,  46,  46,  46,  46,  47,  47,  47,  50,  50,  52,  53,  53,  54,  54,  55,  55,  55,  55,  56,  56,  57,  57,  58,  58,  59,  60,  60,  62,  62,
+             49,  47,  47,  46,  45,  45,  46,  45,  45,  46,  49,  49,  53,  53,  55,  56,  57,  58,  58,  59,  59,  61,  61,  62,  62,  63,  64,  64,  65,  65,  67,  67,
+             49,  47,  47,  46,  45,  45,  46,  46,  46,  46,  49,  49,  53,  54,  55,  56,  57,  59,  59,  60,  60,  61,  61,  62,  63,  63,  64,  65,  66,  66,  68,  68,
+             52,  50,  50,  48,  48,  48,  47,  47,  47,  47,  50,  50,  53,  54,  56,  57,  59,  61,  62,  64,  64,  66,  66,  68,  68,  69,  70,  71,  72,  73,  75,  75,
+             52,  50,  50,  48,  48,  48,  47,  47,  47,  47,  50,  50,  53,  54,  56,  57,  59,  61,  62,  64,  64,  66,  66,  68,  68,  69,  70,  71,  72,  73,  75,  75,
+             56,  54,  53,  52,  51,  51,  50,  50,  49,  49,  53,  53,  55,  56,  58,  59,  61,  63,  64,  66,  66,  69,  70,  71,  72,  74,  75,  76,  77,  78,  80,  80,
+             57,  54,  54,  52,  52,  51,  51,  51,  50,  50,  53,  53,  56,  57,  58,  60,  61,  64,  64,  67,  67,  70,  71,  72,  73,  75,  76,  77,  79,  79,  82,  82,
+             61,  58,  57,  56,  55,  54,  54,  53,  52,  53,  56,  56,  58,  59,  61,  62,  63,  66,  66,  69,  69,  72,  73,  75,  76,  78,  79,  80,  82,  83,  86,  86,
+             63,  60,  60,  58,  57,  57,  56,  55,  54,  55,  57,  57,  60,  60,  62,  64,  65,  67,  68,  71,  71,  74,  75,  77,  78,  80,  82,  83,  85,  85,  89,  89,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  33,  34,  34,  36,  36,  38,  39,  41,  44,  44,  47,  48,  50,  53,  53,  57,  58,  61,  65,  65,  70,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,  37,  38,  39,  41,  41,  44,  45,  47,  50,  50,  54,  55,  57,  61,  61,  65,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  37,  37,  39,  41,  41,  44,  45,  46,  49,  49,  53,  54,  56,  60,  60,  64,
+             31,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  35,  35,  35,  36,  36,  38,  39,  40,  42,  42,  44,  45,  47,  50,  50,  53,  54,  56,  59,  59,  63,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,  40,  40,  41,  43,  43,  45,  46,  47,  50,  50,  53,  54,  56,  58,  58,  62,
+             32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  38,  38,  40,  40,  41,  43,  43,  45,  46,  47,  50,  50,  53,  54,  56,  58,  58,  62,
+             35,  35,  35,  34,  34,  34,  35,  36,  36,  37,  37,  40,  41,  43,  46,  46,  47,  48,  49,  51,  51,  53,  54,  55,  57,  57,  60,  61,  63,  66,  66,  70,
+             36,  35,  35,  35,  34,  34,  36,  36,  37,  38,  38,  41,  42,  44,  48,  48,  50,  50,  51,  53,  53,  56,  56,  58,  60,  60,  63,  63,  65,  68,  68,  72,
+             39,  38,  38,  37,  37,  37,  38,  38,  39,  40,  40,  43,  44,  46,  50,  50,  52,  53,  54,  57,  57,  59,  60,  61,  64,  64,  67,  68,  69,  72,  72,  76,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  46,  48,  50,  54,  54,  57,  58,  60,  63,  63,  66,  67,  68,  71,  71,  74,  75,  77,  79,  79,  83,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  46,  48,  50,  54,  54,  57,  58,  60,  63,  63,  66,  67,  68,  71,  71,  74,  75,  77,  79,  79,  83,
+             51,  49,  49,  48,  47,  47,  48,  48,  48,  48,  48,  52,  53,  55,  58,  58,  62,  63,  66,  69,  69,  73,  74,  76,  79,  79,  83,  84,  86,  89,  89,  93,
+             53,  52,  51,  50,  49,  49,  49,  50,  49,  49,  49,  53,  54,  56,  60,  60,  64,  65,  67,  71,  71,  75,  76,  78,  82,  82,  86,  87,  89,  92,  92,  96,
+             58,  56,  55,  54,  53,  53,  53,  53,  53,  52,  52,  56,  57,  59,  63,  63,  67,  68,  70,  74,  74,  78,  79,  82,  86,  86,  90,  91,  93,  97,  97, 101,
+             65,  63,  62,  61,  59,  59,  59,  59,  58,  58,  58,  62,  63,  65,  68,  68,  72,  73,  76,  79,  79,  84,  85,  88,  92,  92,  97,  98, 100, 105, 105, 109,
+             65,  63,  62,  61,  59,  59,  59,  59,  58,  58,  58,  62,  63,  65,  68,  68,  72,  73,  76,  79,  79,  84,  85,  88,  92,  92,  97,  98, 100, 105, 105, 109,
+        }, {
+             32,  31,  31,  31,  30,  30,  33,  33,  35,  37,  37,  41,  42,  44,  49,  49,  48,  48,  48,  49,  49,  50,  50,  51,  52,  52,  54,  54,  55,  57,  57,  59,
+             31,  31,  31,  31,  32,  32,  34,  35,  37,  39,  39,  42,  42,  44,  47,  47,  46,  46,  46,  46,  46,  47,  47,  48,  48,  48,  50,  51,  51,  53,  53,  55,
+             31,  31,  31,  32,  32,  32,  35,  36,  37,  40,  40,  42,  43,  44,  46,  46,  46,  46,  45,  45,  45,  46,  46,  47,  48,  48,  49,  50,  51,  52,  52,  54,
+             33,  34,  34,  34,  35,  35,  37,  38,  40,  43,  43,  44,  44,  45,  47,  47,  46,  46,  46,  45,  45,  46,  46,  47,  47,  47,  49,  49,  50,  51,  51,  53,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  49,  49,  50,  50,  52,
+             37,  38,  38,  39,  40,  40,  42,  43,  44,  47,  47,  47,  47,  47,  48,  48,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  49,  49,  50,  50,  52,
+             45,  45,  45,  45,  44,  44,  46,  46,  46,  47,  47,  49,  49,  50,  52,  52,  51,  51,  51,  51,  51,  52,  52,  52,  53,  53,  54,  54,  54,  55,  55,  57,
+             48,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  49,  50,  51,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  54,  55,  55,  56,  56,  56,  58,
+             48,  47,  47,  46,  45,  45,  46,  46,  46,  47,  47,  49,  50,  51,  53,  53,  54,  54,  54,  55,  55,  56,  56,  56,  57,  57,  58,  58,  58,  59,  59,  61,
+             49,  47,  47,  46,  45,  45,  45,  46,  45,  45,  45,  48,  49,  51,  53,  53,  55,  56,  57,  58,  58,  59,  59,  60,  61,  61,  62,  62,  63,  64,  64,  65,
+             49,  47,  47,  46,  45,  45,  45,  46,  45,  45,  45,  48,  49,  51,  53,  53,  55,  56,  57,  58,  58,  59,  59,  60,  61,  61,  62,  62,  63,  64,  64,  65,
+             51,  50,  49,  48,  47,  47,  47,  47,  47,  46,  46,  49,  50,  52,  54,  54,  56,  57,  58,  61,  61,  62,  63,  64,  65,  65,  67,  67,  68,  69,  69,  70,
+             52,  50,  50,  49,  48,  48,  47,  47,  47,  47,  47,  50,  50,  52,  54,  54,  57,  57,  59,  61,  61,  63,  64,  65,  66,  66,  68,  68,  69,  70,  70,  72,
+             54,  52,  51,  51,  49,  49,  49,  49,  48,  48,  48,  51,  51,  53,  55,  55,  58,  58,  60,  62,  62,  64,  65,  66,  68,  68,  70,  70,  71,  73,  73,  74,
+             57,  55,  54,  53,  52,  52,  51,  51,  51,  50,  50,  52,  53,  54,  57,  57,  59,  60,  61,  64,  64,  66,  67,  68,  71,  71,  73,  73,  74,  76,  76,  78,
+             57,  55,  54,  53,  52,  52,  51,  51,  51,  50,  50,  52,  53,  54,  57,  57,  59,  60,  61,  64,  64,  66,  67,  68,  71,  71,  73,  73,  74,  76,  76,  78,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  34,  34,  35,  36,  36,  38,  39,  39,  42,  44,  44,  47,  48,  49,  53,  53,  55,  58,  58,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  35,  37,  38,  38,  40,  42,  42,  45,  46,  47,  50,  51,  52,  55,  55,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  36,  37,  37,  40,  41,  41,  44,  45,  46,  49,  49,  51,  54,  54,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  34,  34,  34,  35,  35,  37,  38,  38,  40,  41,  41,  44,  45,  46,  49,  49,  51,  54,  54,
+             32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  35,  36,  36,  37,  37,  37,  39,  40,  40,  42,  42,  43,  45,  46,  47,  49,  50,  51,  54,  54,
+             32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  35,  35,  36,  37,  37,  37,  38,  38,  40,  40,  40,  42,  43,  43,  45,  46,  47,  49,  50,  51,  54,  54,
+             32,  33,  33,  33,  33,  33,  33,  34,  34,  35,  36,  36,  36,  38,  38,  39,  40,  40,  41,  42,  42,  44,  45,  45,  47,  48,  48,  51,  51,  53,  55,  55,
+             35,  35,  35,  35,  34,  34,  35,  36,  36,  37,  38,  38,  39,  42,  42,  44,  47,  47,  48,  49,  49,  51,  52,  52,  54,  55,  56,  58,  59,  60,  62,  62,
+             36,  35,  35,  35,  35,  34,  35,  36,  36,  37,  38,  38,  40,  42,  42,  45,  48,  48,  49,  50,  50,  52,  53,  54,  56,  56,  57,  59,  60,  61,  63,  63,
+             38,  37,  37,  37,  36,  36,  36,  38,  38,  38,  39,  39,  41,  44,  44,  46,  49,  49,  51,  52,  52,  55,  56,  56,  58,  59,  60,  62,  63,  64,  67,  67,
+             44,  43,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  44,  48,  48,  50,  54,  54,  56,  58,  58,  61,  63,  63,  66,  67,  67,  71,  71,  72,  75,  75,
+             44,  43,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  44,  48,  48,  50,  54,  54,  56,  58,  58,  61,  63,  63,  66,  67,  67,  71,  71,  72,  75,  75,
+             47,  46,  45,  45,  44,  44,  44,  45,  45,  45,  45,  45,  47,  50,  50,  53,  56,  56,  58,  60,  60,  64,  66,  66,  69,  70,  71,  74,  75,  76,  79,  79,
+             53,  52,  51,  51,  49,  49,  49,  49,  50,  49,  49,  49,  51,  54,  54,  57,  60,  60,  63,  65,  65,  69,  71,  72,  75,  76,  77,  81,  82,  83,  87,  87,
+             53,  52,  51,  51,  49,  49,  49,  49,  50,  49,  49,  49,  51,  54,  54,  57,  60,  60,  63,  65,  65,  69,  71,  72,  75,  76,  77,  81,  82,  83,  87,  87,
+             59,  57,  56,  56,  54,  54,  54,  54,  54,  54,  53,  53,  55,  58,  58,  61,  64,  64,  67,  69,  69,  73,  75,  76,  79,  80,  81,  86,  87,  88,  92,  92,
+        }, {
+             32,  31,  31,  31,  30,  30,  31,  33,  33,  34,  37,  37,  39,  42,  42,  45,  49,  49,  48,  48,  48,  49,  49,  49,  50,  50,  51,  52,  52,  53,  54,  54,
+             31,  31,  31,  31,  31,  31,  32,  35,  35,  36,  39,  39,  40,  42,  42,  45,  47,  47,  47,  46,  46,  46,  46,  46,  47,  48,  48,  49,  49,  50,  51,  51,
+             31,  31,  31,  31,  32,  32,  33,  35,  36,  37,  40,  40,  41,  43,  43,  44,  46,  46,  46,  46,  46,  45,  45,  45,  46,  46,  47,  48,  48,  48,  50,  50,
+             31,  32,  32,  32,  32,  33,  33,  36,  36,  37,  41,  41,  42,  43,  43,  45,  47,  47,  46,  46,  46,  45,  45,  45,  46,  46,  47,  48,  48,  48,  50,  50,
+             35,  36,  37,  37,  38,  38,  38,  41,  41,  42,  45,  45,  46,  46,  46,  47,  48,  48,  47,  46,  46,  46,  45,  46,  46,  46,  47,  47,  47,  48,  49,  49,
+             37,  38,  38,  38,  39,  40,  40,  43,  43,  44,  47,  47,  47,  47,  47,  47,  48,  48,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  49,  49,
+             38,  39,  40,  40,  40,  41,  41,  43,  44,  45,  47,  47,  47,  48,  48,  48,  49,  49,  48,  48,  48,  47,  47,  47,  48,  48,  48,  48,  48,  49,  50,  50,
+             47,  46,  46,  46,  45,  45,  45,  46,  46,  47,  47,  47,  48,  50,  50,  51,  52,  52,  52,  52,  52,  52,  52,  52,  53,  53,  53,  53,  53,  54,  55,  55,
+             48,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  48,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,  54,  54,  55,  55,
+             48,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  50,  50,  51,  53,  53,  53,  54,  54,  54,  55,  55,  55,  55,  55,  56,  56,  56,  57,  57,
+             49,  48,  47,  47,  45,  45,  45,  45,  46,  45,  45,  45,  47,  49,  49,  51,  53,  53,  55,  56,  56,  57,  58,  58,  59,  59,  60,  61,  61,  61,  62,  62,
+             49,  48,  47,  47,  45,  45,  45,  45,  46,  45,  45,  45,  47,  49,  49,  51,  53,  53,  55,  56,  56,  57,  58,  58,  59,  59,  60,  61,  61,  61,  62,  62,
+             50,  49,  48,  48,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  52,  54,  54,  55,  56,  56,  58,  59,  60,  61,  61,  61,  63,  63,  63,  65,  65,
+             52,  50,  50,  50,  48,  48,  48,  47,  47,  47,  47,  47,  48,  50,  50,  52,  54,  54,  56,  57,  57,  60,  61,  61,  63,  64,  64,  66,  66,  67,  68,  68,
+             52,  50,  50,  50,  48,  48,  48,  47,  47,  47,  47,  47,  48,  50,  50,  52,  54,  54,  56,  57,  57,  60,  61,  61,  63,  64,  64,  66,  66,  67,  68,  68,
+             54,  53,  52,  52,  50,  50,  50,  49,  49,  49,  48,  48,  50,  52,  52,  54,  55,  55,  57,  59,  59,  61,  62,  63,  65,  65,  66,  68,  68,  69,  71,  71,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  34,  34,  34,  35,  36,  36,  38,  39,  39,  41,  44,  44,  44,  47,  48,  48,  51,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  35,  35,  37,  38,  38,  40,  42,  42,  43,  45,  46,  46,  49,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  34,  36,  37,  37,  39,  41,  41,  42,  44,  45,  45,  47,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  34,  36,  37,  37,  39,  41,  41,  42,  44,  45,  45,  47,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  36,  36,  37,  39,  39,  40,  42,  42,  42,  44,  45,  45,  48,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  38,  38,  38,  40,  40,  40,  41,  43,  43,  43,  45,  46,  46,  48,
+             32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  38,  38,  38,  40,  40,  40,  41,  43,  43,  43,  45,  46,  46,  48,
+             32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  35,  36,  36,  36,  38,  38,  38,  39,  40,  40,  41,  42,  42,  43,  45,  45,  45,  47,  48,  48,  50,
+             35,  35,  35,  35,  34,  34,  34,  34,  35,  36,  36,  37,  37,  37,  39,  41,  41,  42,  45,  46,  46,  47,  48,  48,  49,  51,  51,  51,  53,  54,  54,  56,
+             36,  35,  35,  35,  35,  34,  34,  35,  36,  36,  36,  37,  38,  38,  40,  42,  42,  43,  47,  48,  48,  49,  50,  50,  51,  53,  53,  54,  56,  56,  56,  58,
+             36,  35,  35,  35,  35,  34,  34,  35,  36,  36,  36,  37,  38,  38,  40,  42,  42,  43,  47,  48,  48,  49,  50,  50,  51,  53,  53,  54,  56,  56,  56,  58,
+             40,  39,  39,  39,  39,  38,  38,  38,  39,  39,  39,  40,  41,  41,  42,  45,  45,  46,  50,  51,  51,  53,  54,  54,  56,  59,  59,  59,  61,  62,  62,  64,
+             44,  43,  42,  42,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  44,  48,  48,  49,  52,  54,  54,  56,  58,  58,  60,  63,  63,  64,  66,  67,  67,  69,
+             44,  43,  42,  42,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  44,  48,  48,  49,  52,  54,  54,  56,  58,  58,  60,  63,  63,  64,  66,  67,  67,  69,
+             47,  46,  45,  45,  45,  44,  44,  44,  44,  45,  45,  45,  45,  45,  47,  50,  50,  51,  55,  56,  56,  58,  60,  60,  62,  66,  66,  67,  69,  70,  70,  73,
+             53,  52,  51,  51,  50,  49,  49,  49,  49,  50,  50,  49,  49,  49,  51,  54,  54,  55,  59,  60,  60,  63,  65,  65,  67,  71,  71,  72,  75,  76,  76,  79,
+        }, {
+             32,  31,  31,  31,  31,  30,  30,  31,  33,  33,  33,  35,  37,  37,  39,  42,  42,  43,  47,  49,  49,  48,  48,  48,  48,  49,  49,  49,  50,  50,  50,  51,
+             31,  31,  31,  31,  31,  31,  31,  32,  34,  35,  35,  37,  39,  39,  40,  42,  42,  43,  46,  47,  47,  47,  47,  47,  47,  46,  46,  47,  48,  48,  48,  49,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,  43,  43,  46,  46,  46,  46,  46,  46,  45,  45,  45,  45,  46,  46,  46,  47,
+             31,  31,  31,  31,  32,  32,  32,  33,  35,  36,  36,  38,  40,  40,  41,  43,  43,  43,  46,  46,  46,  46,  46,  46,  45,  45,  45,  45,  46,  46,  46,  47,
+             33,  33,  34,  34,  34,  35,  35,  35,  37,  38,  38,  41,  43,  43,  43,  44,  44,  45,  46,  47,  47,  46,  46,  46,  46,  45,  45,  45,  46,  46,  46,  47,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  46,  46,  46,  46,  46,  46,  46,  47,
+             37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,  45,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  46,  46,  46,  46,  46,  46,  46,  47,
+             38,  39,  40,  40,  40,  41,  41,  41,  43,  44,  44,  46,  47,  47,  47,  48,  48,  48,  48,  49,  49,  48,  48,  48,  47,  47,  47,  47,  48,  48,  48,  48,
+             45,  45,  45,  45,  45,  44,  44,  45,  46,  46,  46,  47,  47,  47,  48,  49,  49,  50,  51,  52,  52,  52,  51,  51,  51,  51,  51,  52,  52,  52,  52,  52,
+             48,  47,  47,  47,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  50,  50,  50,  52,  53,  53,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,
+             48,  47,  47,  47,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  50,  50,  50,  52,  53,  53,  53,  53,  53,  53,  53,  53,  53,  54,  54,  54,  54,
+             49,  48,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  47,  49,  49,  50,  52,  53,  53,  54,  54,  54,  55,  56,  56,  56,  57,  57,  57,  58,
+             49,  48,  47,  47,  46,  45,  45,  45,  45,  46,  46,  45,  45,  45,  47,  49,  49,  50,  53,  53,  53,  55,  56,  56,  57,  58,  58,  58,  59,  59,  59,  60,
+             49,  48,  47,  47,  46,  45,  45,  45,  45,  46,  46,  45,  45,  45,  47,  49,  49,  50,  53,  53,  53,  55,  56,  56,  57,  58,  58,  58,  59,  59,  59,  60,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  50,  53,  54,  54,  55,  56,  56,  57,  59,  59,  60,  61,  61,  61,  62,
+             52,  51,  50,  50,  49,  48,  48,  48,  47,  47,  47,  47,  47,  47,  48,  50,  50,  51,  53,  54,  54,  56,  57,  57,  59,  61,  61,  62,  63,  64,  64,  65,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  36,  36,  36,  37,  39,  39,  39,  41,  44,  44,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,  35,  35,  37,  38,  38,  38,  40,  42,  42,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  36,  37,  37,  37,  39,  41,  41,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  36,  37,  37,  37,  39,  41,  41,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  36,  37,  37,  37,  39,  41,  41,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  36,  36,  36,  38,  39,  39,  39,  40,  42,  42,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  37,  37,  38,  38,  38,  39,  40,  40,  40,  42,  43,  43,
+             34,  34,  34,  34,  34,  34,  33,  33,  33,  34,  35,  35,  35,  36,  37,  37,  37,  38,  39,  39,  39,  41,  43,  43,  43,  44,  45,  45,  45,  46,  48,  48,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  49,  50,  50,  50,  52,  53,  53,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  49,  50,  50,  50,  52,  53,  53,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  49,  50,  50,  50,  52,  53,  53,
+             39,  39,  38,  38,  38,  38,  37,  37,  37,  38,  39,  39,  39,  40,  40,  40,  40,  42,  45,  45,  45,  47,  51,  51,  51,  52,  54,  54,  54,  56,  58,  58,
+             44,  43,  42,  42,  42,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  42,  45,  48,  48,  48,  50,  54,  54,  54,  56,  58,  58,  58,  60,  63,  63,
+             44,  43,  42,  42,  42,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  42,  45,  48,  48,  48,  50,  54,  54,  54,  56,  58,  58,  58,  60,  63,  63,
+        }, {
+             32,  31,  31,  31,  31,  31,  30,  30,  30,  32,  33,  33,  33,  35,  37,  37,  37,  39,  42,  42,  42,  45,  49,  49,  49,  48,  48,  48,  48,  48,  49,  49,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  33,  34,  34,  34,  36,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,  47,  47,  47,  47,  47,  47,  47,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  36,  36,  36,  38,  40,  40,  40,  41,  43,  43,  43,  44,  46,  46,  46,  46,  46,  46,  46,  45,  45,  45,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  36,  36,  36,  38,  40,  40,  40,  41,  43,  43,  43,  44,  46,  46,  46,  46,  46,  46,  46,  45,  45,  45,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  34,  36,  36,  36,  38,  40,  40,  40,  41,  43,  43,  43,  44,  46,  46,  46,  46,  46,  46,  46,  45,  45,  45,
+             33,  34,  34,  34,  34,  35,  35,  35,  35,  37,  39,  39,  39,  41,  43,  43,  43,  44,  45,  45,  45,  46,  47,  47,  47,  47,  46,  46,  46,  46,  45,  45,
+             37,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  47,  46,  46,  46,
+             37,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  47,  46,  46,  46,
+             37,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  47,  47,  47,  47,  46,  46,  46,
+             42,  42,  42,  42,  42,  42,  42,  42,  42,  44,  45,  45,  45,  46,  47,  47,  47,  48,  48,  48,  48,  49,  50,  50,  50,  50,  50,  50,  50,  49,  49,  49,
+             48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  49,  50,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  53,  53,
+             48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  49,  50,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  53,  53,
+             48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  49,  50,  50,  50,  51,  53,  53,  53,  53,  53,  53,  53,  53,  53,  53,
+             48,  48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  46,  46,  48,  50,  50,  50,  51,  53,  53,  53,  54,  54,  54,  54,  55,  56,  56,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  46,  46,  46,  45,  45,  45,  45,  47,  49,  49,  49,  51,  53,  53,  53,  54,  56,  56,  56,  57,  58,  58,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  46,  46,  46,  45,  45,  45,  45,  47,  49,  49,  49,  51,  53,  53,  53,  54,  56,  56,  56,  57,  58,  58,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,  36,  36,  36,  37,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  35,  35,  35,  35,  36,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  36,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  36,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  36,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  36,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  35,  35,  35,  36,  36,  36,  36,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  36,  36,  37,  37,  37,  37,  37,  38,
+             32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,
+             32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,
+             32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,
+             33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  36,  36,  36,  36,  37,  38,  39,  39,  39,  40,  41,  42,  42,  42,  42,
+             35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  34,  35,  35,  36,  36,  36,  36,  37,  37,  37,  37,  39,  40,  41,  41,  41,  43,  45,  46,  46,  46,  46,
+             36,  35,  35,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  41,  42,  42,  42,  44,  47,  48,  48,  48,  49,
+             36,  35,  35,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  41,  42,  42,  42,  44,  47,  48,  48,  48,  49,
+             36,  35,  35,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  41,  42,  42,  42,  44,  47,  48,  48,  48,  49,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  31,  33,  33,  33,  33,  35,  36,  37,  37,  37,  39,  41,  42,  42,  42,  44,  47,  49,  49,  49,  49,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  34,  34,  34,  34,  36,  37,  38,  38,  38,  39,  41,  42,  42,  42,  44,  46,  48,  48,  48,  48,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  33,  34,  35,  35,  35,  37,  38,  39,  39,  39,  40,  42,  42,  42,  42,  44,  46,  47,  47,  47,  47,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,  37,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  46,  46,  46,  46,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,  37,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  46,  46,  46,  46,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  33,  35,  36,  36,  36,  37,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  46,  46,  46,  46,
+             33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  36,  37,  38,  38,  38,  40,  42,  43,  43,  43,  43,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,
+             35,  36,  36,  37,  37,  37,  37,  38,  38,  38,  38,  39,  40,  41,  41,  41,  43,  44,  45,  45,  45,  46,  46,  46,  46,  46,  47,  47,  48,  48,  48,  47,
+             37,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  48,  47,
+             37,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  48,  47,
+             37,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  48,  48,  48,  47,
+             40,  41,  41,  41,  41,  41,  41,  42,  42,  42,  42,  43,  44,  44,  44,  44,  45,  47,  47,  47,  47,  48,  48,  48,  48,  48,  49,  49,  50,  50,  50,  49,
+             45,  45,  45,  45,  45,  45,  45,  44,  44,  44,  44,  45,  46,  46,  46,  46,  46,  47,  47,  47,  47,  48,  49,  49,  49,  49,  50,  51,  52,  52,  52,  52,
+             48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,  53,  53,  53,  53,
+             48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,  53,  53,  53,  53,
+             48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,  53,  53,  53,  53,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  36,  36,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,
+             32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  36,  36,  36,  36,  36,  36,  37,  38,  38,
+             34,  34,  34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  39,  39,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,  31,  32,  33,  33,  33,  33,  33,  34,  35,  36,  37,  37,  37,  37,  39,  40,  42,  42,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  34,  34,  34,  35,  36,  37,  38,  38,  38,  38,  39,  41,  42,  42,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  35,  35,  35,  35,  36,  37,  38,  39,  39,  39,  39,  40,  41,  42,  42,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  35,  35,  35,  35,  35,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  33,  34,  35,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,  42,  43,  43,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  34,  35,  36,  36,  36,  36,  37,  39,  40,  41,  41,  41,  41,  42,  42,  43,  43,
+             33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  36,  37,  38,  38,  38,  38,  39,  41,  42,  43,  43,  43,  43,  43,  44,  44,  44,
+             35,  35,  35,  36,  36,  36,  36,  36,  36,  37,  37,  37,  37,  37,  38,  39,  40,  40,  40,  40,  40,  42,  43,  44,  45,  45,  45,  45,  45,  45,  46,  46,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  45,  47,  47,  47,  47,  47,  47,  47,  47,  47,
+             38,  39,  39,  40,  40,  40,  40,  40,  40,  40,  41,  41,  41,  41,  41,  42,  43,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,  47,  47,  47,  48,  48,
+             42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  44,  44,  45,  45,  45,  45,  45,  46,  47,  47,  47,  47,  47,  48,  48,  48,  48,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,  30,  30,  31,  31,  32,  33,  33,  33,  33,  33,  33,  33,  34,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  33,  34,  34,  34,  34,  34,  34,  34,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  34,  34,  34,  34,  34,  34,  34,  35,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  33,  33,  34,  35,  35,  35,  35,  35,  35,  35,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  35,  36,  36,  36,  36,  36,  36,  36,
+             32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  35,  35,  36,  37,  37,  37,  37,  37,  37,  38,
+             33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  38,  38,  38,  38,  38,  39,
+             34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  36,  36,  36,  36,  36,  36,  36,  36,  37,  37,  38,  39,  40,  40,  40,  40,  40,  40,  40,
+             35,  35,  36,  36,  36,  37,  37,  37,  37,  37,  37,  37,  37,  37,  38,  38,  38,  38,  38,  38,  38,  38,  39,  40,  40,  41,  41,  41,  41,  41,  41,  42,
+             37,  37,  37,  38,  38,  38,  38,  38,  38,  38,  38,  38,  39,  39,  39,  40,  40,  40,  40,  40,  40,  40,  41,  41,  42,  43,  43,  43,  43,  43,  43,  44,
+        },
+    }, {
+        {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        }, {
+             32,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,  30,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  30,  30,  30,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        },
+    },
+};
+
+static const uint8_t qm_tbl_32x32_t[][2][528] = {
+    {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  33,  33,
+             32,  32,  32,  33,  34,  35,
+             34,  34,  33,  34,  35,  37,  39,
+             35,  34,  34,  35,  36,  37,  41,  43,
+             36,  35,  34,  35,  36,  38,  42,  45,  48,
+             39,  38,  37,  38,  39,  40,  45,  47,  50,  54,
+             44,  42,  41,  41,  42,  42,  47,  50,  54,  58,  63,
+             46,  44,  42,  43,  44,  44,  49,  52,  55,  59,  65,  67,
+             48,  46,  44,  45,  45,  46,  51,  53,  57,  61,  67,  69,  71,
+             54,  51,  49,  49,  50,  49,  54,  57,  60,  65,  71,  74,  76,  82,
+             59,  56,  54,  54,  54,  53,  58,  61,  64,  69,  75,  78,  80,  87,  92,
+             62,  59,  56,  56,  56,  55,  60,  63,  66,  71,  77,  80,  83,  89,  95,  98,
+             65,  62,  59,  59,  59,  58,  63,  65,  68,  73,  79,  82,  85,  92,  98, 101, 105,
+             71,  68,  65,  64,  64,  63,  68,  70,  73,  78,  84,  87,  90,  97, 103, 107, 111, 117,
+             80,  76,  72,  72,  71,  69,  74,  76,  79,  84,  90,  93,  96, 104, 110, 114, 118, 125, 134,
+             81,  77,  73,  73,  72,  70,  75,  77,  80,  85,  91,  94,  97, 105, 111, 115, 119, 126, 135, 137,
+             83,  78,  75,  74,  74,  72,  76,  79,  81,  86,  92,  95,  99, 106, 113, 117, 121, 128, 137, 138, 140,
+             88,  84,  80,  79,  78,  76,  80,  82,  85,  91,  95,  98, 103, 111, 115, 119, 126, 134, 139, 144, 147, 152,
+             91,  86,  83,  82,  81,  79,  81,  84,  88,  92,  95, 100, 107, 110, 115, 123, 127, 132, 140, 147, 151, 154, 159,
+             94,  89,  86,  85,  84,  82,  82,  86,  90,  92,  97, 103, 105, 111, 119, 121, 128, 136, 139, 146, 156, 158, 161, 166,
+             97,  92,  90,  88,  86,  85,  84,  89,  91,  95, 100, 102, 108, 114, 116, 125, 130, 133, 143, 148, 152, 163, 166, 168, 174,
+            101,  95,  93,  91,  89,  89,  87,  91,  93,  98, 101, 105, 111, 113, 120, 126, 130, 138, 142, 149, 157, 159, 171, 174, 176, 183,
+            104,  99,  97,  94,  93,  93,  90,  92,  96, 100, 102, 108, 111, 116, 122, 125, 134, 137, 144, 151, 155, 165, 169, 179, 182, 184, 191,
+            107, 102, 101,  97,  96,  96,  93,  93,  99, 101, 105, 110, 113, 120, 122, 129, 133, 140, 146, 150, 161, 163, 173, 178, 187, 191, 193, 200,
+            111, 105, 104, 101, 100,  99,  97,  96, 102, 103, 109, 111, 117, 120, 125, 131, 135, 143, 146, 156, 158, 168, 173, 180, 189, 195, 200, 202, 210,
+            115, 109, 108, 104, 104, 102, 101, 100, 103, 106, 111, 113, 119, 121, 129, 131, 140, 142, 151, 155, 162, 168, 176, 183, 188, 199, 204, 210, 212, 220,
+            119, 113, 112, 107, 107, 106, 105, 103, 105, 110, 112, 117, 120, 125, 130, 135, 140, 145, 152, 157, 165, 169, 179, 183, 193, 197, 210, 214, 220, 222, 231,
+            123, 116, 116, 111, 111, 109, 110, 107, 107, 114, 114, 121, 122, 130, 130, 140, 140, 150, 151, 163, 164, 176, 177, 190, 191, 204, 206, 222, 224, 230, 232, 242,
+        }, {
+             32,
+             31,  31,
+             30,  31,  32,
+             32,  33,  33,  35,
+             33,  34,  35,  37,  39,
+             36,  38,  40,  41,  43,  47,
+             41,  42,  42,  43,  45,  47,  48,
+             45,  45,  44,  45,  46,  47,  49,  50,
+             49,  47,  46,  47,  47,  48,  50,  51,  53,
+             48,  47,  45,  46,  46,  46,  49,  51,  53,  54,
+             49,  47,  45,  45,  45,  45,  49,  51,  53,  55,  58,
+             50,  47,  45,  46,  46,  46,  49,  51,  54,  56,  59,  60,
+             50,  48,  46,  46,  46,  46,  50,  52,  54,  56,  60,  60,  61,
+             52,  50,  47,  47,  47,  47,  50,  52,  54,  57,  61,  62,  63,  66,
+             54,  52,  49,  49,  49,  48,  52,  53,  55,  58,  62,  64,  65,  68,  71,
+             56,  53,  51,  50,  50,  49,  52,  54,  56,  59,  63,  64,  66,  69,  72,  73,
+             57,  54,  52,  51,  51,  50,  53,  55,  56,  60,  63,  65,  67,  70,  73,  75,  76,
+             60,  57,  54,  54,  53,  52,  55,  57,  58,  61,  65,  67,  68,  72,  75,  77,  79,  82,
+             63,  60,  57,  57,  56,  54,  57,  59,  60,  63,  67,  69,  71,  75,  78,  80,  82,  85,  89,
+             64,  61,  58,  57,  57,  55,  58,  59,  61,  64,  67,  69,  71,  75,  78,  80,  82,  85,  89,  90,
+             65,  61,  58,  58,  57,  55,  58,  60,  61,  64,  68,  70,  71,  75,  79,  81,  83,  86,  90,  91,  91,
+             67,  63,  61,  60,  59,  57,  60,  61,  63,  66,  69,  70,  73,  77,  79,  81,  85,  88,  90,  92,  94,  96,
+             68,  64,  62,  61,  60,  58,  59,  61,  64,  66,  67,  71,  74,  75,  78,  82,  84,  86,  90,  93,  94,  96,  98,
+             69,  65,  63,  62,  61,  59,  59,  62,  64,  65,  68,  71,  72,  75,  79,  80,  83,  87,  89,  92,  96,  97,  98, 100,
+             70,  66,  64,  63,  62,  61,  60,  63,  64,  66,  69,  70,  73,  76,  77,  81,  84,  85,  89,  92,  93,  98,  99, 100, 102,
+             71,  67,  66,  64,  63,  62,  61,  63,  64,  67,  68,  70,  74,  75,  78,  81,  83,  86,  88,  91,  94,  95, 100, 101, 102, 104,
+             72,  68,  67,  65,  64,  64,  61,  63,  65,  67,  68,  71,  73,  75,  78,  79,  84,  85,  88,  91,  93,  97,  98, 102, 103, 104, 106,
+             73,  69,  68,  66,  65,  65,  63,  63,  66,  67,  69,  71,  73,  76,  77,  81,  82,  85,  88,  90,  94,  95,  99, 101, 104, 105, 106, 109,
+             74,  70,  70,  67,  66,  66,  64,  63,  66,  67,  70,  71,  74,  75,  78,  80,  82,  86,  87,  91,  92,  96,  98, 101, 104, 106, 108, 108, 111,
+             75,  71,  71,  68,  68,  67,  66,  64,  66,  68,  70,  71,  74,  75,  79,  79,  84,  84,  88,  90,  93,  95,  98, 101, 103, 107, 108, 110, 111, 113,
+             76,  72,  72,  69,  69,  68,  67,  65,  66,  69,  70,  72,  74,  76,  78,  81,  83,  85,  88,  90,  93,  95,  98, 100, 104, 105, 109, 111, 112, 113, 116,
+             78,  74,  74,  70,  70,  69,  69,  66,  66,  70,  70,  74,  74,  77,  78,  82,  82,  86,  87,  92,  92,  96,  97, 102, 102, 107, 107, 112, 113, 115, 115, 118,
+        },
+    }, {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  33,
+             32,  32,  32,  33,  34,  35,
+             32,  33,  33,  33,  34,  36,  36,
+             34,  34,  33,  34,  35,  37,  38,  39,
+             36,  35,  34,  35,  36,  38,  40,  42,  48,
+             38,  37,  36,  36,  38,  39,  41,  44,  50,  51,
+             39,  38,  37,  38,  39,  40,  42,  45,  50,  52,  54,
+             44,  42,  41,  41,  42,  42,  44,  47,  54,  56,  58,  63,
+             47,  45,  44,  44,  45,  45,  47,  50,  56,  58,  60,  66,  69,
+             49,  47,  46,  45,  46,  46,  48,  51,  57,  60,  62,  68,  71,  73,
+             54,  51,  50,  49,  50,  49,  51,  54,  60,  63,  65,  71,  75,  77,  82,
+             59,  56,  54,  54,  54,  53,  55,  58,  64,  67,  69,  75,  79,  81,  87,  92,
+             61,  58,  56,  56,  56,  55,  57,  60,  65,  68,  70,  77,  81,  83,  89,  94,  97,
+             65,  62,  60,  59,  59,  58,  60,  63,  68,  71,  73,  79,  84,  87,  92,  98, 101, 105,
+             71,  68,  65,  65,  64,  63,  65,  68,  73,  76,  78,  84,  89,  92,  97, 103, 106, 111, 117,
+             76,  72,  70,  69,  68,  66,  68,  71,  76,  79,  81,  88,  92,  95, 101, 107, 110, 115, 122, 127,
+             80,  76,  73,  72,  71,  69,  71,  74,  79,  82,  84,  90,  95,  98, 104, 110, 113, 118, 125, 130, 134,
+             83,  78,  76,  75,  74,  72,  73,  76,  81,  84,  86,  92,  97, 100, 106, 113, 116, 121, 128, 133, 137, 140,
+             86,  82,  79,  78,  77,  74,  76,  79,  84,  87,  89,  95, 100, 103, 109, 116, 119, 124, 131, 136, 140, 144, 147,
+             89,  85,  82,  81,  79,  78,  78,  82,  86,  87,  92,  97, 100, 105, 112, 114, 120, 128, 131, 136, 146, 147, 150, 155,
+             92,  88,  85,  84,  82,  81,  80,  85,  86,  90,  95,  97, 102, 107, 110, 117, 122, 125, 134, 138, 142, 152, 154, 156, 162,
+             95,  90,  88,  86,  85,  84,  82,  86,  88,  93,  95,  99, 105, 106, 113, 118, 121, 129, 132, 139, 146, 148, 159, 161, 163, 169,
+             98,  93,  91,  89,  88,  87,  85,  87,  90,  94,  96, 102, 104, 109, 114, 117, 126, 128, 134, 141, 145, 154, 157, 166, 168, 170, 176,
+            101,  96,  95,  92,  91,  90,  88,  88,  93,  95,  99, 103, 106, 112, 114, 121, 124, 131, 136, 140, 149, 151, 160, 165, 173, 176, 178, 184,
+            104,  99,  98,  95,  94,  93,  91,  90,  95,  96, 102, 103, 109, 112, 117, 122, 125, 133, 136, 145, 146, 156, 160, 167, 174, 180, 184, 186, 193,
+            108, 102, 101,  98,  97,  96,  95,  93,  97, 100, 104, 106, 111, 113, 121, 122, 130, 132, 140, 143, 150, 155, 162, 169, 174, 183, 188, 192, 194, 201,
+            111, 105, 105, 101, 100,  99,  98,  96,  98, 103, 105, 109, 112, 117, 121, 125, 130, 135, 141, 146, 152, 156, 165, 169, 178, 181, 193, 196, 201, 202, 210,
+            114, 109, 109, 104, 104, 102, 102,  99, 100, 106, 106, 113, 113, 120, 121, 129, 130, 139, 140, 151, 151, 162, 162, 175, 176, 187, 188, 203, 204, 210, 211, 219,
+        }, {
+             32,
+             31,  31,
+             30,  31,  31,
+             31,  32,  32,  33,
+             33,  34,  35,  36,  39,
+             36,  38,  39,  40,  43,  47,
+             38,  40,  41,  41,  44,  47,  47,
+             41,  42,  42,  43,  45,  47,  48,  48,
+             49,  47,  46,  46,  47,  48,  49,  50,  53,
+             49,  47,  46,  46,  46,  47,  48,  50,  53,  53,
+             48,  47,  46,  45,  46,  46,  48,  49,  53,  54,  54,
+             49,  47,  45,  45,  45,  45,  47,  49,  53,  55,  55,  58,
+             50,  48,  46,  46,  46,  46,  47,  50,  54,  55,  56,  59,  61,
+             51,  48,  47,  46,  47,  46,  47,  50,  54,  55,  56,  60,  61,  62,
+             52,  50,  48,  47,  47,  47,  48,  50,  54,  56,  57,  61,  63,  64,  66,
+             54,  52,  50,  49,  49,  48,  49,  52,  55,  57,  58,  62,  64,  66,  68,  71,
+             55,  53,  51,  50,  50,  49,  50,  52,  56,  58,  59,  63,  65,  66,  69,  72,  73,
+             57,  54,  52,  51,  51,  50,  51,  53,  56,  58,  60,  63,  66,  67,  70,  73,  74,  76,
+             60,  57,  55,  54,  53,  52,  53,  55,  58,  60,  61,  65,  68,  69,  72,  75,  77,  79,  82,
+             62,  59,  57,  56,  55,  53,  54,  56,  59,  61,  63,  66,  69,  70,  74,  77,  78,  80,  84,  86,
+             63,  60,  58,  57,  56,  54,  55,  57,  60,  62,  63,  67,  70,  71,  75,  78,  79,  82,  85,  87,  89,
+             65,  61,  59,  58,  57,  55,  56,  58,  61,  63,  64,  68,  71,  72,  75,  79,  80,  83,  86,  88,  90,  91,
+             66,  63,  60,  59,  58,  56,  58,  59,  62,  64,  65,  69,  72,  73,  76,  80,  81,  84,  87,  90,  91,  93,  94,
+             67,  64,  62,  61,  59,  58,  58,  60,  63,  64,  66,  69,  71,  73,  77,  78,  81,  85,  86,  89,  93,  94,  95,  97,
+             68,  65,  63,  62,  60,  59,  58,  61,  62,  64,  67,  68,  71,  74,  75,  79,  81,  83,  87,  89,  91,  95,  96,  97,  99,
+             69,  66,  64,  63,  61,  61,  59,  61,  62,  65,  66,  68,  72,  73,  76,  78,  80,  84,  85,  88,  91,  92,  97,  98,  98, 101,
+             70,  67,  65,  63,  62,  62,  60,  61,  63,  65,  66,  69,  71,  73,  76,  77,  81,  83,  85,  88,  90,  94,  95,  99, 100, 100, 103,
+             71,  67,  67,  64,  63,  63,  61,  61,  64,  65,  67,  69,  71,  74,  75,  78,  80,  83,  85,  87,  91,  92,  95,  97, 100, 102, 102, 105,
+             72,  68,  68,  65,  65,  64,  62,  62,  64,  65,  68,  69,  72,  73,  76,  78,  80,  83,  84,  88,  89,  93,  95,  97, 100, 102, 104, 104, 107,
+             73,  69,  69,  66,  66,  65,  64,  63,  64,  66,  68,  69,  72,  73,  77,  77,  81,  82,  86,  87,  90,  92,  95,  97,  99, 103, 104, 106, 106, 109,
+             74,  70,  70,  67,  67,  66,  65,  63,  64,  67,  68,  70,  72,  74,  76,  78,  80,  82,  85,  87,  90,  91,  95,  96, 100, 101, 105, 106, 108, 108, 111,
+             75,  71,  71,  68,  68,  66,  66,  64,  64,  68,  68,  71,  71,  75,  75,  79,  79,  83,  84,  88,  89,  93,  93,  98,  98, 102, 103, 108, 108, 110, 110, 113,
+        },
+    }, {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  33,
+             32,  32,  32,  32,  33,  34,
+             32,  32,  32,  32,  34,  34,  35,
+             34,  34,  33,  33,  35,  36,  37,  39,
+             34,  34,  34,  34,  36,  36,  37,  41,  42,
+             36,  35,  34,  34,  36,  37,  38,  42,  45,  48,
+             39,  38,  38,  37,  39,  40,  40,  45,  47,  50,  54,
+             41,  39,  39,  38,  40,  40,  41,  46,  48,  51,  55,  56,
+             44,  42,  41,  41,  42,  42,  42,  47,  50,  54,  58,  59,  63,
+             48,  46,  45,  44,  45,  45,  45,  50,  53,  56,  61,  62,  66,  70,
+             49,  47,  46,  45,  46,  46,  46,  51,  53,  57,  62,  63,  68,  71,  73,
+             54,  51,  50,  49,  50,  49,  49,  54,  56,  60,  65,  67,  71,  76,  77,  82,
+             58,  55,  54,  53,  53,  53,  52,  57,  59,  63,  68,  70,  74,  79,  81,  86,  90,
+             59,  57,  55,  54,  54,  54,  54,  59,  61,  64,  69,  71,  75,  80,  82,  87,  91,  93,
+             65,  62,  60,  59,  59,  58,  58,  63,  65,  68,  73,  75,  79,  85,  87,  92,  97,  99, 105,
+             69,  66,  64,  63,  63,  62,  61,  66,  68,  71,  76,  78,  83,  88,  90,  96, 100, 102, 109, 113,
+             71,  68,  66,  65,  64,  63,  63,  68,  70,  73,  78,  80,  84,  90,  92,  97, 102, 104, 111, 115, 117,
+             80,  76,  73,  72,  71,  70,  69,  74,  76,  79,  84,  86,  90,  96,  98, 104, 109, 111, 118, 123, 125, 134,
+             81,  77,  75,  74,  73,  72,  71,  75,  77,  80,  85,  87,  91,  97,  99, 105, 110, 112, 120, 125, 127, 136, 137,
+             83,  78,  76,  75,  74,  73,  72,  76,  78,  81,  86,  88,  92,  98, 100, 106, 111, 113, 121, 126, 128, 137, 139, 140,
+             87,  83,  81,  79,  78,  77,  75,  80,  82,  85,  90,  91,  96, 101, 103, 110, 114, 117, 125, 129, 133, 142, 143, 145, 150,
+             90,  85,  83,  81,  80,  79,  78,  81,  83,  87,  89,  93,  98, 100, 106, 110, 114, 121, 124, 130, 136, 138, 148, 149, 151, 156,
+             93,  88,  86,  84,  83,  82,  80,  82,  85,  89,  90,  96,  98, 102, 107, 109, 118, 120, 125, 131, 134, 143, 145, 153, 156, 157, 163,
+             95,  90,  89,  86,  85,  85,  83,  83,  88,  89,  93,  97,  99, 105, 106, 113, 116, 122, 127, 130, 139, 140, 148, 153, 159, 162, 164, 169,
+             98,  93,  92,  89,  88,  87,  86,  85,  89,  90,  96,  97, 102, 105, 109, 114, 117, 124, 126, 134, 136, 144, 148, 154, 160, 166, 169, 170, 176,
+            101,  96,  95,  91,  91,  90,  89,  87,  90,  93,  97,  99, 104, 105, 112, 113, 121, 122, 130, 133, 139, 144, 150, 155, 160, 168, 172, 176, 177, 184,
+            104,  99,  98,  94,  94,  92,  92,  90,  92,  96,  98, 102, 104, 109, 112, 116, 121, 125, 130, 135, 141, 144, 152, 155, 163, 166, 177, 179, 184, 185, 191,
+            107, 101, 101,  97,  97,  95,  95,  93,  93,  99,  99, 105, 105, 112, 112, 120, 120, 129, 129, 139, 140, 149, 149, 161, 161, 172, 172, 185, 186, 191, 192, 199,
+        }, {
+             32,
+             31,  31,
+             30,  31,  31,
+             30,  31,  31,  32,
+             33,  34,  35,  35,  39,
+             35,  36,  37,  37,  41,  43,
+             36,  38,  39,  40,  43,  45,  47,
+             41,  42,  42,  42,  45,  46,  47,  48,
+             44,  44,  44,  44,  46,  46,  47,  49,  50,
+             49,  47,  47,  46,  47,  47,  48,  50,  51,  53,
+             48,  47,  46,  45,  46,  46,  46,  49,  51,  53,  54,
+             48,  47,  46,  45,  46,  46,  46,  49,  51,  53,  54,  55,
+             49,  47,  46,  45,  45,  45,  45,  49,  51,  53,  55,  56,  58,
+             50,  48,  47,  46,  46,  46,  46,  50,  51,  54,  56,  57,  59,  61,
+             51,  48,  47,  46,  47,  46,  46,  50,  51,  54,  56,  57,  60,  62,  62,
+             52,  50,  48,  47,  47,  47,  47,  50,  52,  54,  57,  58,  61,  63,  64,  66,
+             54,  51,  50,  49,  49,  48,  48,  51,  53,  55,  58,  59,  62,  64,  65,  68,  70,
+             55,  52,  51,  50,  49,  49,  48,  52,  53,  55,  59,  60,  62,  65,  66,  68,  70,  71,
+             57,  54,  53,  52,  51,  50,  50,  53,  54,  56,  60,  61,  63,  66,  67,  70,  73,  73,  76,
+             59,  56,  54,  53,  53,  52,  51,  54,  56,  58,  61,  62,  65,  68,  69,  72,  74,  75,  78,  80,
+             60,  57,  55,  54,  53,  53,  52,  55,  56,  58,  61,  63,  65,  68,  69,  72,  75,  76,  79,  81,  82,
+             63,  60,  58,  57,  56,  55,  54,  57,  59,  60,  63,  65,  67,  70,  71,  75,  77,  78,  82,  84,  85,  89,
+             64,  61,  59,  58,  57,  56,  55,  58,  59,  61,  64,  65,  68,  71,  72,  75,  78,  79,  82,  85,  86,  89,  90,
+             65,  61,  60,  58,  57,  56,  55,  58,  59,  61,  64,  65,  68,  71,  72,  75,  78,  79,  83,  85,  86,  90,  91,  91,
+             67,  63,  61,  60,  59,  58,  57,  60,  61,  63,  65,  66,  69,  72,  73,  77,  79,  80,  84,  86,  88,  92,  93,  93,  95,
+             68,  64,  63,  61,  60,  59,  58,  60,  61,  63,  65,  67,  70,  71,  74,  76,  78,  81,  83,  86,  88,  89,  94,  94,  95,  97,
+             68,  65,  64,  62,  61,  60,  58,  59,  61,  64,  64,  68,  69,  71,  74,  75,  79,  80,  83,  86,  87,  91,  92,  95,  96,  97,  99,
+             69,  66,  65,  63,  62,  61,  59,  59,  62,  63,  65,  67,  69,  72,  72,  76,  78,  80,  83,  84,  88,  89,  92,  94,  97,  98,  99, 101,
+             70,  67,  66,  63,  63,  62,  61,  60,  63,  63,  66,  67,  69,  71,  73,  76,  77,  81,  82,  85,  86,  90,  91,  94,  96,  99, 100, 100, 103,
+             71,  67,  67,  64,  64,  63,  62,  61,  62,  64,  66,  67,  70,  71,  74,  74,  78,  79,  83,  84,  87,  89,  91,  94,  95,  99, 100, 102, 102, 104,
+             72,  68,  68,  65,  65,  64,  63,  61,  62,  65,  66,  68,  69,  71,  73,  75,  77,  79,  82,  84,  87,  88,  92,  93,  96,  97, 101, 102, 104, 104, 106,
+             73,  69,  69,  66,  66,  64,  64,  62,  62,  66,  66,  69,  69,  72,  73,  76,  77,  81,  81,  85,  85,  89,  90,  94,  94,  99,  99, 104, 104, 106, 106, 108,
+        },
+    }, {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  33,
+             31,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  33,  34,  35,
+             32,  33,  33,  33,  34,  34,  36,  36,
+             34,  34,  34,  33,  35,  35,  37,  38,  39,
+             35,  35,  34,  34,  36,  36,  38,  39,  42,  46,
+             36,  35,  35,  34,  36,  36,  38,  40,  42,  47,  48,
+             39,  38,  38,  37,  39,  39,  40,  42,  45,  49,  50,  54,
+             41,  40,  39,  38,  40,  40,  41,  43,  46,  50,  52,  55,  57,
+             44,  42,  42,  41,  42,  42,  42,  44,  47,  52,  54,  58,  60,  63,
+             47,  45,  45,  44,  44,  45,  45,  47,  50,  55,  56,  60,  62,  66,  69,
+             48,  46,  45,  44,  45,  45,  46,  47,  51,  55,  57,  61,  63,  67,  70,  71,
+             54,  51,  50,  49,  49,  50,  49,  51,  54,  59,  60,  65,  67,  71,  75,  76,  82,
+             56,  53,  52,  51,  51,  51,  51,  53,  56,  60,  61,  66,  69,  73,  77,  78,  84,  86,
+             59,  56,  55,  54,  54,  54,  53,  55,  58,  62,  64,  69,  71,  75,  79,  80,  87,  89,  92,
+             64,  61,  60,  58,  58,  58,  57,  59,  62,  66,  67,  72,  75,  79,  83,  84,  91,  93,  97, 102,
+             65,  62,  61,  59,  59,  59,  58,  60,  63,  67,  68,  73,  75,  79,  84,  85,  92,  94,  98, 103, 105,
+             71,  68,  67,  65,  64,  64,  63,  65,  68,  72,  73,  78,  80,  84,  89,  90,  97, 100, 103, 109, 111, 117,
+             74,  71,  69,  68,  67,  67,  65,  67,  70,  74,  75,  80,  83,  86,  91,  93, 100, 102, 106, 112, 114, 120, 123,
+             80,  76,  74,  72,  71,  71,  69,  71,  74,  78,  79,  84,  86,  90,  95,  96, 104, 106, 110, 116, 118, 125, 128, 134,
+             82,  78,  76,  74,  73,  73,  71,  73,  76,  79,  80,  86,  88,  92,  97,  98, 106, 108, 112, 118, 120, 127, 131, 136, 139,
+             83,  78,  77,  75,  74,  74,  72,  73,  76,  80,  81,  86,  89,  92,  97,  99, 106, 109, 113, 119, 121, 128, 131, 137, 139, 140,
+             87,  83,  81,  79,  78,  78,  75,  77,  80,  83,  85,  90,  92,  96, 100, 102, 110, 112, 117, 122, 125, 133, 135, 142, 144, 145, 150,
+             90,  85,  84,  81,  80,  80,  78,  78,  82,  84,  87,  91,  93,  98,  99, 106, 108, 113, 118, 121, 129, 130, 137, 141, 147, 150, 151, 156,
+             92,  88,  87,  84,  83,  82,  80,  80,  84,  85,  90,  91,  95,  98, 102, 106, 109, 115, 117, 125, 126, 134, 137, 142, 148, 152, 155, 156, 162,
+             95,  90,  89,  86,  85,  84,  83,  82,  85,  87,  91,  92,  97,  98, 105, 105, 112, 114, 121, 123, 129, 133, 138, 143, 147, 155, 158, 161, 162, 168,
+             97,  92,  92,  88,  88,  86,  86,  84,  85,  90,  91,  95,  97, 101, 104, 108, 112, 116, 121, 125, 130, 133, 140, 143, 150, 152, 162, 164, 168, 168, 174,
+            100,  95,  95,  90,  90,  89,  89,  86,  86,  92,  92,  97,  98, 104, 104, 111, 111, 119, 119, 128, 129, 137, 137, 147, 148, 157, 158, 169, 170, 174, 175, 181,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             30,  31,  31,  32,
+             33,  34,  34,  34,  37,
+             33,  34,  35,  35,  38,  39,
+             36,  38,  39,  40,  42,  43,  47,
+             38,  40,  40,  41,  43,  44,  47,  47,
+             41,  42,  42,  42,  44,  45,  47,  48,  48,
+             47,  46,  46,  45,  46,  47,  47,  48,  50,  52,
+             49,  47,  47,  46,  47,  47,  48,  49,  50,  52,  53,
+             48,  47,  46,  45,  46,  46,  46,  48,  49,  52,  53,  54,
+             49,  47,  46,  45,  46,  46,  46,  47,  49,  52,  53,  55,  55,
+             49,  47,  46,  45,  45,  45,  45,  47,  49,  52,  53,  55,  57,  58,
+             50,  48,  47,  46,  46,  46,  46,  47,  50,  53,  54,  56,  57,  59,  61,
+             50,  48,  47,  46,  46,  46,  46,  47,  50,  53,  54,  56,  58,  60,  61,  61,
+             52,  50,  49,  47,  47,  47,  47,  48,  50,  53,  54,  57,  59,  61,  63,  63,  66,
+             53,  50,  50,  48,  48,  48,  47,  49,  51,  54,  55,  58,  59,  62,  64,  64,  67,  68,
+             54,  52,  51,  49,  49,  49,  48,  49,  52,  55,  55,  58,  60,  62,  64,  65,  68,  69,  71,
+             56,  54,  53,  51,  51,  51,  49,  51,  53,  55,  56,  59,  61,  63,  66,  66,  70,  71,  73,  75,
+             57,  54,  53,  52,  51,  51,  50,  51,  53,  56,  56,  60,  61,  63,  66,  67,  70,  71,  73,  76,  76,
+             60,  57,  56,  54,  53,  53,  52,  53,  55,  58,  58,  61,  63,  65,  68,  68,  72,  73,  75,  78,  79,  82,
+             61,  58,  57,  55,  55,  54,  53,  54,  56,  58,  59,  62,  64,  66,  69,  69,  73,  74,  76,  79,  80,  83,  84,
+             63,  60,  59,  57,  56,  56,  54,  55,  57,  60,  60,  63,  65,  67,  70,  71,  75,  76,  78,  81,  82,  85,  86,  89,
+             64,  61,  60,  58,  57,  57,  55,  56,  58,  60,  61,  64,  66,  68,  70,  71,  75,  77,  79,  82,  82,  86,  87,  90,  91,
+             65,  61,  60,  58,  57,  57,  55,  56,  58,  61,  61,  64,  66,  68,  71,  71,  75,  77,  79,  82,  83,  86,  88,  90,  91,  91,
+             67,  63,  62,  60,  59,  59,  57,  58,  60,  62,  63,  66,  67,  69,  72,  73,  77,  78,  80,  83,  84,  88,  89,  92,  93,  93,  95,
+             67,  64,  63,  61,  60,  60,  58,  58,  61,  61,  63,  65,  67,  70,  70,  74,  75,  78,  80,  81,  85,  86,  89,  91,  93,  94,  95,  97,
+             68,  65,  64,  62,  61,  60,  59,  58,  61,  61,  64,  65,  67,  69,  71,  73,  75,  78,  79,  83,  83,  87,  88,  91,  93,  95,  96,  97,  99,
+             69,  65,  65,  62,  62,  61,  60,  59,  61,  62,  64,  65,  68,  68,  72,  72,  76,  76,  80,  81,  84,  86,  88,  90,  92,  95,  96,  98,  98, 100,
+             70,  66,  66,  63,  63,  62,  61,  60,  60,  63,  64,  66,  67,  69,  71,  73,  75,  77,  79,  81,  84,  85,  88,  89,  93,  93,  97,  98, 100, 100, 102,
+             71,  67,  67,  64,  64,  62,  62,  60,  60,  64,  64,  67,  67,  70,  70,  74,  74,  78,  78,  82,  82,  86,  86,  91,  91,  95,  95, 100, 100, 101, 101, 104,
+        },
+    }, {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  33,  33,  34,
+             32,  32,  32,  32,  33,  34,  35,  35,
+             33,  33,  33,  33,  34,  35,  36,  36,  38,
+             34,  34,  34,  33,  34,  35,  36,  37,  39,  39,
+             36,  35,  35,  34,  35,  36,  37,  38,  42,  42,  48,
+             36,  35,  35,  34,  35,  36,  38,  38,  42,  43,  48,  49,
+             39,  38,  38,  37,  38,  39,  40,  40,  44,  45,  50,  51,  54,
+             41,  39,  39,  38,  39,  40,  40,  41,  45,  46,  51,  52,  55,  56,
+             44,  42,  42,  41,  41,  42,  42,  42,  46,  47,  54,  54,  58,  59,  63,
+             46,  44,  44,  42,  43,  44,  44,  44,  48,  49,  55,  55,  59,  61,  65,  67,
+             48,  46,  46,  44,  45,  45,  45,  46,  50,  51,  57,  57,  61,  63,  67,  69,  71,
+             52,  50,  49,  48,  48,  48,  48,  48,  52,  53,  59,  59,  64,  65,  70,  72,  74,  78,
+             54,  51,  51,  49,  49,  50,  49,  49,  53,  54,  60,  60,  65,  67,  71,  74,  76,  80,  82,
+             58,  56,  55,  53,  53,  53,  53,  53,  57,  58,  63,  64,  68,  70,  75,  77,  80,  84,  86,  91,
+             59,  56,  56,  54,  54,  54,  53,  53,  57,  58,  64,  64,  69,  70,  75,  78,  80,  85,  87,  91,  92,
+             65,  62,  61,  59,  59,  59,  58,  58,  62,  63,  68,  68,  73,  75,  79,  82,  85,  90,  92,  97,  98, 105,
+             66,  63,  63,  60,  60,  60,  59,  59,  63,  64,  69,  69,  74,  76,  80,  83,  86,  91,  93,  98,  99, 106, 107,
+             71,  68,  67,  65,  65,  64,  63,  63,  67,  68,  73,  73,  78,  80,  84,  87,  90,  95,  97, 103, 103, 111, 112, 117,
+             74,  71,  70,  68,  67,  67,  66,  65,  69,  70,  75,  75,  80,  82,  86,  89,  93,  97, 100, 105, 106, 114, 115, 120, 123,
+             80,  76,  75,  72,  72,  71,  70,  69,  73,  74,  79,  79,  84,  86,  90,  93,  96, 101, 104, 110, 110, 118, 119, 125, 128, 134,
+             81,  77,  77,  74,  73,  73,  71,  71,  74,  75,  80,  80,  85,  87,  91,  94,  98, 103, 105, 111, 112, 120, 121, 127, 130, 136, 137,
+             83,  78,  78,  75,  74,  74,  72,  72,  75,  76,  81,  81,  86,  88,  92,  95,  99, 104, 106, 112, 113, 121, 122, 128, 131, 137, 139, 140,
+             86,  82,  81,  78,  77,  77,  75,  74,  78,  79,  84,  84,  89,  91,  95,  98, 101, 106, 109, 115, 116, 124, 125, 131, 135, 140, 142, 144, 147,
+             89,  84,  84,  80,  80,  79,  78,  77,  79,  81,  85,  86,  91,  92,  97,  98, 104, 106, 112, 114, 119, 123, 128, 132, 135, 142, 145, 148, 149, 153,
+             91,  86,  86,  82,  82,  81,  80,  79,  80,  84,  85,  88,  91,  94,  97, 100, 104, 107, 112, 115, 120, 123, 129, 132, 138, 140, 148, 150, 153, 154, 159,
+             93,  88,  88,  84,  84,  83,  83,  80,  81,  86,  86,  91,  91,  96,  97, 103, 103, 110, 110, 118, 119, 126, 126, 135, 136, 144, 144, 155, 155, 159, 159, 164,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             30,  31,  31,  32,
+             31,  32,  32,  33,  34,
+             33,  34,  35,  35,  37,  39,
+             35,  37,  37,  38,  39,  41,  44,
+             36,  38,  39,  40,  41,  43,  46,  47,
+             40,  41,  41,  42,  43,  44,  46,  47,  48,
+             41,  42,  42,  42,  43,  45,  46,  47,  48,  48,
+             49,  47,  47,  46,  46,  47,  47,  48,  50,  50,  53,
+             49,  47,  47,  46,  46,  47,  47,  47,  49,  50,  53,  53,
+             48,  47,  47,  45,  46,  46,  46,  46,  49,  49,  53,  53,  54,
+             48,  47,  46,  45,  45,  46,  46,  46,  49,  49,  53,  53,  54,  55,
+             49,  47,  46,  45,  45,  45,  45,  45,  48,  49,  53,  54,  55,  56,  58,
+             50,  47,  47,  45,  46,  46,  46,  46,  49,  49,  54,  54,  56,  57,  59,  60,
+             50,  48,  48,  46,  46,  46,  46,  46,  49,  50,  54,  54,  56,  57,  60,  60,  61,
+             52,  49,  49,  47,  47,  47,  47,  46,  49,  50,  54,  54,  57,  58,  61,  62,  63,  65,
+             52,  50,  49,  47,  47,  47,  47,  47,  49,  50,  54,  54,  57,  58,  61,  62,  63,  65,  66,
+             54,  52,  51,  49,  49,  49,  48,  48,  51,  52,  55,  55,  58,  59,  62,  63,  65,  67,  68,  70,
+             54,  52,  51,  49,  49,  49,  48,  48,  51,  52,  55,  56,  58,  60,  62,  64,  65,  67,  68,  70,  71,
+             57,  54,  54,  52,  51,  51,  50,  50,  52,  53,  56,  57,  60,  61,  63,  65,  67,  69,  70,  73,  73,  76,
+             57,  55,  54,  52,  52,  51,  51,  50,  53,  53,  57,  57,  60,  61,  64,  65,  67,  70,  71,  73,  74,  77,  77,
+             60,  57,  56,  54,  54,  53,  52,  52,  54,  55,  58,  59,  61,  63,  65,  67,  68,  71,  72,  75,  75,  79,  79,  82,
+             61,  58,  57,  55,  55,  54,  53,  53,  55,  56,  59,  59,  62,  63,  66,  68,  69,  72,  73,  76,  76,  80,  80,  83,  84,
+             63,  60,  59,  57,  57,  56,  55,  54,  57,  57,  60,  61,  63,  65,  67,  69,  71,  73,  75,  78,  78,  82,  82,  85,  86,  89,
+             64,  61,  60,  58,  57,  57,  56,  55,  57,  58,  61,  61,  64,  65,  68,  69,  71,  74,  75,  78,  78,  82,  83,  86,  87,  89,  90,
+             65,  61,  61,  58,  58,  57,  56,  55,  58,  58,  61,  62,  64,  65,  68,  70,  71,  74,  75,  78,  79,  83,  83,  86,  88,  90,  91,  91,
+             66,  63,  62,  60,  59,  58,  57,  56,  59,  59,  62,  63,  65,  66,  69,  70,  72,  75,  76,  79,  80,  84,  84,  87,  89,  91,  92,  93,  94,
+             67,  64,  63,  61,  60,  59,  58,  57,  59,  60,  62,  63,  66,  66,  70,  70,  73,  74,  77,  78,  81,  83,  85,  87,  89,  92,  93,  94,  94,  96,
+             68,  64,  64,  61,  61,  60,  59,  58,  59,  61,  62,  64,  65,  67,  69,  71,  72,  74,  77,  78,  81,  82,  85,  86,  89,  90,  94,  94,  96,  96,  98,
+             69,  65,  65,  62,  62,  61,  61,  58,  59,  62,  62,  65,  65,  68,  68,  71,  71,  75,  75,  79,  79,  83,  83,  87,  87,  91,  91,  96,  96,  97,  97,  99,
+        },
+    }, {
+        {
+             32,
+             31,  32,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  32,  34,  34,  35,
+             32,  32,  32,  32,  32,  34,  34,  35,  35,
+             34,  34,  34,  33,  33,  35,  35,  37,  37,  39,
+             34,  34,  34,  33,  33,  35,  35,  37,  37,  39,  39,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,
+             36,  35,  35,  34,  34,  36,  36,  38,  38,  42,  42,  48,  48,
+             39,  38,  38,  37,  37,  39,  39,  40,  40,  45,  45,  50,  50,  54,
+             39,  38,  38,  37,  37,  39,  39,  40,  40,  45,  45,  50,  50,  54,  54,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  47,  47,  54,  54,  58,  58,  63,
+             44,  42,  42,  41,  41,  42,  42,  42,  42,  47,  47,  54,  54,  58,  58,  63,  63,
+             48,  46,  46,  44,  44,  45,  45,  46,  46,  51,  51,  57,  57,  61,  61,  67,  67,  71,
+             48,  46,  46,  44,  44,  45,  45,  46,  46,  51,  51,  57,  57,  61,  61,  67,  67,  71,  71,
+             54,  51,  51,  49,  49,  50,  50,  49,  49,  54,  54,  60,  60,  65,  65,  71,  71,  76,  76,  82,
+             54,  51,  51,  49,  49,  50,  50,  49,  49,  54,  54,  60,  60,  65,  65,  71,  71,  76,  76,  82,  82,
+             59,  56,  56,  54,  54,  54,  54,  53,  53,  58,  58,  64,  64,  69,  69,  75,  75,  80,  80,  87,  87,  92,
+             59,  56,  56,  54,  54,  54,  54,  53,  53,  58,  58,  64,  64,  69,  69,  75,  75,  80,  80,  87,  87,  92,  92,
+             65,  62,  62,  59,  59,  59,  59,  58,  58,  63,  63,  68,  68,  73,  73,  79,  79,  85,  85,  92,  92,  98,  98, 105,
+             65,  62,  62,  59,  59,  59,  59,  58,  58,  63,  63,  68,  68,  73,  73,  79,  79,  85,  85,  92,  92,  98,  98, 105, 105,
+             71,  68,  68,  65,  65,  64,  64,  63,  63,  68,  68,  73,  73,  78,  78,  84,  84,  90,  90,  97,  97, 103, 103, 111, 111, 117,
+             71,  68,  68,  65,  65,  64,  64,  63,  63,  68,  68,  73,  73,  78,  78,  84,  84,  90,  90,  97,  97, 103, 103, 111, 111, 117, 117,
+             80,  76,  76,  72,  72,  71,  71,  69,  69,  74,  74,  79,  79,  84,  84,  90,  90,  96,  96, 104, 104, 110, 110, 118, 118, 125, 125, 134,
+             80,  76,  76,  72,  72,  71,  71,  69,  69,  74,  74,  79,  79,  84,  84,  90,  90,  96,  96, 104, 104, 110, 110, 118, 118, 125, 125, 134, 134,
+             83,  78,  78,  75,  75,  74,  74,  72,  72,  76,  76,  81,  81,  86,  86,  92,  92,  99,  99, 106, 106, 113, 113, 121, 121, 128, 128, 137, 137, 140,
+             83,  78,  78,  75,  75,  74,  74,  72,  72,  76,  76,  81,  81,  86,  86,  92,  92,  99,  99, 106, 106, 113, 113, 121, 121, 128, 128, 137, 137, 140, 140,
+             87,  83,  83,  79,  79,  77,  77,  75,  75,  80,  80,  84,  84,  90,  90,  96,  96, 102, 102, 109, 109, 116, 116, 124, 124, 132, 132, 141, 141, 144, 144, 149,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             30,  31,  31,  32,
+             30,  31,  31,  32,  32,
+             33,  34,  34,  35,  35,  39,
+             33,  34,  34,  35,  35,  39,  39,
+             36,  38,  38,  40,  40,  43,  43,  47,
+             36,  38,  38,  40,  40,  43,  43,  47,  47,
+             41,  42,  42,  42,  42,  45,  45,  47,  47,  48,
+             41,  42,  42,  42,  42,  45,  45,  47,  47,  48,  48,
+             49,  47,  47,  46,  46,  47,  47,  48,  48,  50,  50,  53,
+             49,  47,  47,  46,  46,  47,  47,  48,  48,  50,  50,  53,  53,
+             48,  47,  47,  45,  45,  46,  46,  46,  46,  49,  49,  53,  53,  54,
+             48,  47,  47,  45,  45,  46,  46,  46,  46,  49,  49,  53,  53,  54,  54,
+             49,  47,  47,  45,  45,  45,  45,  45,  45,  49,  49,  53,  53,  55,  55,  58,
+             49,  47,  47,  45,  45,  45,  45,  45,  45,  49,  49,  53,  53,  55,  55,  58,  58,
+             50,  48,  48,  46,  46,  46,  46,  46,  46,  50,  50,  54,  54,  56,  56,  60,  60,  61,
+             50,  48,  48,  46,  46,  46,  46,  46,  46,  50,  50,  54,  54,  56,  56,  60,  60,  61,  61,
+             52,  50,  50,  47,  47,  47,  47,  47,  47,  50,  50,  54,  54,  57,  57,  61,  61,  63,  63,  66,
+             52,  50,  50,  47,  47,  47,  47,  47,  47,  50,  50,  54,  54,  57,  57,  61,  61,  63,  63,  66,  66,
+             54,  52,  52,  49,  49,  49,  49,  48,  48,  52,  52,  55,  55,  58,  58,  62,  62,  65,  65,  68,  68,  71,
+             54,  52,  52,  49,  49,  49,  49,  48,  48,  52,  52,  55,  55,  58,  58,  62,  62,  65,  65,  68,  68,  71,  71,
+             57,  54,  54,  52,  52,  51,  51,  50,  50,  53,  53,  56,  56,  60,  60,  63,  63,  67,  67,  70,  70,  73,  73,  76,
+             57,  54,  54,  52,  52,  51,  51,  50,  50,  53,  53,  56,  56,  60,  60,  63,  63,  67,  67,  70,  70,  73,  73,  76,  76,
+             60,  57,  57,  54,  54,  53,  53,  52,  52,  55,  55,  58,  58,  61,  61,  65,  65,  68,  68,  72,  72,  75,  75,  79,  79,  82,
+             60,  57,  57,  54,  54,  53,  53,  52,  52,  55,  55,  58,  58,  61,  61,  65,  65,  68,  68,  72,  72,  75,  75,  79,  79,  82,  82,
+             63,  60,  60,  57,  57,  56,  56,  54,  54,  57,  57,  60,  60,  63,  63,  67,  67,  71,  71,  75,  75,  78,  78,  82,  82,  85,  85,  89,
+             63,  60,  60,  57,  57,  56,  56,  54,  54,  57,  57,  60,  60,  63,  63,  67,  67,  71,  71,  75,  75,  78,  78,  82,  82,  85,  85,  89,  89,
+             65,  61,  61,  58,  58,  57,  57,  55,  55,  58,  58,  61,  61,  64,  64,  68,  68,  71,  71,  75,  75,  79,  79,  83,  83,  86,  86,  90,  90,  91,
+             65,  61,  61,  58,  58,  57,  57,  55,  55,  58,  58,  61,  61,  64,  64,  68,  68,  71,  71,  75,  75,  79,  79,  83,  83,  86,  86,  90,  90,  91,  91,
+             67,  63,  63,  60,  60,  59,  59,  57,  57,  60,  60,  62,  62,  66,  66,  69,  69,  72,  72,  76,  76,  80,  80,  84,  84,  88,  88,  92,  92,  93,  93,  95,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  32,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  32,  33,  33,  34,
+             32,  32,  32,  32,  32,  33,  34,  34,  35,
+             32,  32,  32,  32,  33,  33,  34,  34,  35,  35,
+             34,  34,  34,  33,  33,  34,  35,  35,  37,  37,  39,
+             34,  34,  34,  33,  33,  34,  35,  35,  37,  37,  39,  39,
+             35,  35,  35,  34,  34,  35,  36,  36,  38,  38,  42,  42,  46,
+             36,  35,  35,  34,  34,  35,  36,  37,  38,  38,  42,  42,  47,  48,
+             38,  37,  37,  36,  36,  37,  38,  38,  39,  40,  44,  44,  48,  50,  51,
+             39,  38,  38,  38,  37,  38,  39,  39,  40,  41,  45,  45,  49,  50,  52,  54,
+             41,  40,  40,  39,  38,  39,  40,  40,  41,  41,  46,  46,  50,  52,  54,  55,  57,
+             44,  42,  42,  41,  41,  41,  42,  42,  42,  43,  47,  47,  52,  54,  56,  58,  60,  63,
+             45,  43,  43,  42,  41,  42,  42,  43,  43,  43,  48,  48,  53,  54,  57,  58,  60,  64,  65,
+             48,  46,  46,  45,  44,  45,  45,  45,  46,  46,  51,  51,  55,  57,  59,  61,  63,  67,  68,  71,
+             48,  46,  46,  45,  44,  45,  45,  45,  46,  46,  51,  51,  55,  57,  59,  61,  63,  67,  68,  71,  71,
+             53,  51,  51,  49,  49,  49,  49,  49,  49,  49,  54,  54,  58,  59,  62,  64,  67,  71,  72,  75,  75,  81,
+             54,  52,  51,  50,  49,  49,  50,  49,  49,  50,  54,  54,  59,  60,  63,  65,  67,  71,  72,  76,  76,  81,  82,
+             57,  55,  55,  53,  52,  52,  52,  52,  52,  52,  57,  57,  61,  62,  65,  67,  70,  74,  75,  79,  79,  85,  85,  89,
+             59,  56,  56,  54,  54,  54,  54,  54,  53,  54,  58,  58,  62,  64,  67,  69,  71,  75,  76,  80,  80,  86,  87,  90,  92,
+             62,  59,  59,  57,  56,  56,  56,  56,  55,  56,  60,  60,  64,  66,  69,  71,  73,  77,  78,  83,  83,  89,  89,  93,  95,  98,
+             65,  62,  62,  60,  59,  59,  59,  59,  58,  58,  63,  63,  67,  68,  71,  73,  75,  79,  81,  85,  85,  91,  92,  96,  98, 101, 105,
+             67,  64,  64,  62,  61,  61,  60,  60,  59,  60,  64,  64,  68,  69,  72,  74,  77,  81,  82,  87,  87,  93,  94,  98,  99, 103, 106, 108,
+             71,  68,  68,  66,  65,  64,  64,  64,  63,  63,  68,  68,  72,  73,  76,  78,  80,  84,  85,  90,  90,  97,  97, 102, 103, 107, 111, 113, 117,
+             72,  69,  69,  66,  65,  65,  65,  64,  63,  64,  68,  68,  72,  73,  76,  78,  81,  85,  86,  91,  91,  97,  98, 102, 104, 108, 111, 113, 118, 119,
+             80,  76,  76,  73,  72,  72,  71,  70,  69,  70,  74,  74,  78,  79,  82,  84,  86,  90,  91,  96,  96, 103, 104, 108, 110, 114, 118, 120, 125, 126, 134,
+             80,  76,  76,  73,  72,  72,  71,  70,  69,  70,  74,  74,  78,  79,  82,  84,  86,  90,  91,  96,  96, 103, 104, 108, 110, 114, 118, 120, 125, 126, 134, 134,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             30,  31,  31,  31,
+             30,  31,  31,  31,  32,
+             32,  32,  33,  33,  33,  35,
+             33,  34,  34,  35,  35,  37,  39,
+             34,  35,  35,  36,  36,  38,  40,  41,
+             36,  38,  38,  39,  40,  41,  43,  44,  47,
+             37,  38,  39,  40,  40,  42,  43,  44,  47,  47,
+             41,  42,  42,  42,  42,  43,  45,  45,  47,  47,  48,
+             41,  42,  42,  42,  42,  43,  45,  45,  47,  47,  48,  48,
+             47,  46,  46,  46,  45,  46,  47,  47,  47,  48,  50,  50,  52,
+             49,  48,  47,  47,  46,  47,  47,  47,  48,  48,  50,  50,  52,  53,
+             49,  47,  47,  46,  46,  46,  46,  47,  47,  47,  50,  50,  52,  53,  53,
+             48,  47,  47,  46,  45,  46,  46,  46,  46,  47,  49,  49,  52,  53,  54,  54,
+             49,  47,  47,  46,  45,  45,  46,  46,  46,  46,  49,  49,  52,  53,  54,  55,  55,
+             49,  47,  47,  45,  45,  45,  45,  45,  45,  45,  49,  49,  52,  53,  55,  55,  57,  58,
+             49,  47,  47,  46,  45,  45,  45,  45,  45,  46,  49,  49,  52,  53,  55,  56,  57,  59,  59,
+             50,  48,  48,  47,  46,  46,  46,  46,  46,  46,  50,  50,  53,  54,  55,  56,  58,  60,  60,  61,
+             50,  48,  48,  47,  46,  46,  46,  46,  46,  46,  50,  50,  53,  54,  55,  56,  58,  60,  60,  61,  61,
+             52,  50,  49,  48,  47,  47,  47,  47,  46,  47,  50,  50,  53,  54,  56,  57,  59,  61,  61,  63,  63,  66,
+             52,  50,  50,  48,  47,  47,  47,  47,  47,  47,  50,  50,  53,  54,  56,  57,  59,  61,  61,  63,  63,  66,  66,
+             54,  51,  51,  50,  49,  49,  49,  48,  48,  48,  51,  51,  54,  55,  57,  58,  60,  62,  62,  65,  65,  67,  68,  69,
+             54,  52,  52,  50,  49,  49,  49,  49,  48,  48,  52,  52,  55,  55,  57,  58,  60,  62,  63,  65,  65,  68,  68,  70,  71,
+             56,  53,  53,  51,  51,  50,  50,  50,  49,  49,  52,  52,  55,  56,  58,  59,  61,  63,  63,  66,  66,  69,  69,  71,  72,  73,
+             57,  54,  54,  52,  52,  51,  51,  51,  50,  50,  53,  53,  56,  56,  58,  60,  61,  63,  64,  67,  67,  70,  70,  72,  73,  75,  76,
+             58,  55,  55,  53,  52,  52,  52,  51,  50,  51,  54,  54,  56,  57,  59,  60,  62,  64,  65,  67,  67,  71,  71,  73,  74,  75,  77,  78,
+             60,  57,  57,  55,  54,  54,  53,  53,  52,  52,  55,  55,  58,  58,  60,  61,  63,  65,  66,  68,  68,  72,  72,  74,  75,  77,  79,  80,  82,
+             60,  57,  57,  55,  54,  54,  54,  53,  52,  52,  55,  55,  58,  58,  60,  62,  63,  65,  66,  69,  69,  72,  73,  75,  76,  77,  79,  80,  82,  82,
+             63,  60,  60,  58,  57,  57,  56,  55,  54,  55,  57,  57,  60,  60,  62,  63,  65,  67,  68,  71,  71,  74,  75,  77,  78,  80,  82,  83,  85,  85,  89,
+             63,  60,  60,  58,  57,  57,  56,  55,  54,  55,  57,  57,  60,  60,  62,  63,  65,  67,  68,  71,  71,  74,  75,  77,  78,  80,  82,  83,  85,  85,  89,  89,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  32,
+             31,  32,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  32,  32,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  33,  34,  34,  35,
+             32,  32,  32,  32,  32,  32,  33,  34,  34,  35,  35,
+             33,  33,  33,  33,  33,  33,  34,  35,  35,  36,  36,  38,
+             34,  34,  34,  34,  33,  33,  35,  35,  36,  37,  37,  39,  39,
+             34,  34,  34,  34,  34,  34,  35,  36,  36,  37,  37,  40,  41,  42,
+             36,  35,  35,  35,  34,  34,  36,  36,  37,  38,  38,  42,  42,  45,  48,
+             36,  35,  35,  35,  34,  34,  36,  36,  37,  38,  38,  42,  42,  45,  48,  48,
+             38,  38,  38,  37,  37,  37,  38,  38,  39,  40,  40,  43,  44,  46,  50,  50,  52,
+             39,  38,  38,  38,  37,  37,  39,  39,  39,  40,  40,  44,  45,  47,  50,  50,  53,  54,
+             41,  40,  40,  39,  38,  38,  40,  40,  40,  41,  41,  45,  46,  48,  52,  52,  54,  55,  57,
+             44,  42,  42,  42,  41,  41,  42,  42,  42,  42,  42,  46,  47,  50,  54,  54,  57,  58,  60,  63,
+             44,  42,  42,  42,  41,  41,  42,  42,  42,  42,  42,  46,  47,  50,  54,  54,  57,  58,  60,  63,  63,
+             47,  46,  45,  45,  44,  44,  44,  45,  45,  45,  45,  49,  50,  52,  56,  56,  59,  60,  62,  66,  66,  69,
+             48,  47,  46,  45,  44,  44,  45,  45,  45,  46,  46,  50,  51,  53,  57,  57,  60,  61,  63,  67,  67,  70,  71,
+             50,  49,  48,  47,  46,  46,  47,  47,  47,  47,  47,  51,  52,  54,  58,  58,  61,  62,  65,  68,  68,  72,  73,  75,
+             54,  52,  51,  50,  49,  49,  49,  50,  49,  49,  49,  53,  54,  56,  60,  60,  64,  65,  67,  71,  71,  75,  76,  78,  82,
+             54,  52,  51,  50,  49,  49,  49,  50,  49,  49,  49,  53,  54,  56,  60,  60,  64,  65,  67,  71,  71,  75,  76,  78,  82,  82,
+             58,  56,  55,  54,  53,  53,  53,  53,  53,  52,  52,  56,  57,  59,  63,  63,  67,  68,  70,  74,  74,  78,  79,  82,  86,  86,  90,
+             59,  57,  56,  55,  54,  54,  54,  54,  54,  53,  53,  57,  58,  60,  64,  64,  68,  69,  71,  75,  75,  79,  80,  83,  87,  87,  91,  92,
+             61,  59,  58,  57,  56,  56,  56,  56,  55,  55,  55,  59,  60,  62,  65,  65,  69,  70,  73,  77,  77,  81,  82,  85,  89,  89,  93,  94,  97,
+             65,  63,  62,  61,  59,  59,  59,  59,  59,  58,  58,  62,  63,  65,  68,  68,  72,  73,  75,  79,  79,  84,  85,  88,  92,  92,  97,  98, 101, 105,
+             65,  63,  62,  61,  59,  59,  59,  59,  59,  58,  58,  62,  63,  65,  68,  68,  72,  73,  75,  79,  79,  84,  85,  88,  92,  92,  97,  98, 101, 105, 105,
+             70,  67,  67,  65,  64,  64,  63,  63,  63,  62,  62,  66,  67,  69,  72,  72,  76,  77,  79,  83,  83,  88,  89,  92,  96,  96, 101, 102, 105, 109, 109, 114,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             30,  31,  31,  31,  32,
+             30,  31,  31,  31,  32,  32,
+             33,  33,  34,  34,  34,  34,  37,
+             33,  34,  34,  35,  35,  35,  38,  39,
+             34,  36,  36,  36,  37,  37,  40,  40,  42,
+             36,  38,  38,  39,  40,  40,  42,  43,  45,  47,
+             36,  38,  38,  39,  40,  40,  42,  43,  45,  47,  47,
+             40,  41,  41,  41,  42,  42,  44,  44,  45,  47,  47,  48,
+             41,  42,  42,  42,  42,  42,  44,  45,  46,  47,  47,  48,  48,
+             44,  44,  44,  44,  44,  44,  45,  46,  46,  47,  47,  49,  49,  50,
+             49,  48,  47,  47,  46,  46,  47,  47,  47,  48,  48,  50,  50,  51,  53,
+             49,  48,  47,  47,  46,  46,  47,  47,  47,  48,  48,  50,  50,  51,  53,  53,
+             48,  47,  47,  46,  45,  45,  46,  46,  46,  47,  47,  49,  50,  51,  53,  53,  54,
+             48,  47,  47,  46,  45,  45,  46,  46,  46,  46,  46,  49,  49,  51,  53,  53,  54,  54,
+             49,  47,  47,  46,  45,  45,  46,  46,  46,  46,  46,  49,  49,  51,  53,  53,  54,  55,  55,
+             49,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  48,  49,  51,  53,  53,  55,  55,  57,  58,
+             49,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  48,  49,  51,  53,  53,  55,  55,  57,  58,  58,
+             50,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  49,  50,  51,  54,  54,  56,  56,  57,  59,  59,  61,
+             50,  49,  48,  47,  46,  46,  46,  46,  46,  46,  46,  49,  50,  51,  54,  54,  56,  56,  58,  60,  60,  61,  61,
+             51,  49,  49,  48,  47,  47,  47,  47,  47,  46,  46,  49,  50,  51,  54,  54,  56,  57,  58,  60,  60,  62,  62,  63,
+             52,  50,  50,  49,  47,  47,  47,  47,  47,  47,  47,  49,  50,  52,  54,  54,  57,  57,  59,  61,  61,  63,  63,  65,  66,
+             52,  50,  50,  49,  47,  47,  47,  47,  47,  47,  47,  49,  50,  52,  54,  54,  57,  57,  59,  61,  61,  63,  63,  65,  66,  66,
+             54,  52,  51,  50,  49,  49,  49,  49,  48,  48,  48,  51,  51,  53,  55,  55,  58,  58,  60,  62,  62,  64,  65,  66,  68,  68,  70,
+             54,  52,  52,  51,  49,  49,  49,  49,  49,  48,  48,  51,  52,  53,  55,  55,  58,  58,  60,  62,  62,  64,  65,  66,  68,  68,  70,  71,
+             55,  53,  53,  52,  50,  50,  50,  50,  49,  49,  49,  51,  52,  54,  56,  56,  58,  59,  60,  63,  63,  65,  66,  67,  69,  69,  71,  72,  73,
+             57,  55,  54,  53,  52,  52,  51,  51,  50,  50,  50,  52,  53,  54,  56,  56,  59,  60,  61,  63,  63,  66,  67,  68,  70,  70,  73,  73,  74,  76,
+             57,  55,  54,  53,  52,  52,  51,  51,  50,  50,  50,  52,  53,  54,  56,  56,  59,  60,  61,  63,  63,  66,  67,  68,  70,  70,  73,  73,  74,  76,  76,
+             59,  57,  56,  55,  54,  54,  53,  53,  52,  51,  51,  54,  55,  56,  58,  58,  60,  61,  63,  65,  65,  67,  68,  70,  72,  72,  74,  75,  76,  78,  78,  80,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  32,
+             31,  31,  32,  32,
+             31,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  32,  32,  33,  33,
+             32,  32,  32,  32,  32,  32,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,
+             32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  35,  35,
+             32,  33,  33,  33,  33,  33,  33,  34,  34,  35,  36,  36,  36,
+             34,  34,  34,  34,  33,  33,  34,  35,  35,  35,  37,  37,  38,  39,
+             34,  34,  34,  34,  33,  33,  34,  35,  35,  35,  37,  37,  38,  39,  39,
+             35,  34,  34,  34,  34,  34,  34,  35,  36,  36,  37,  37,  39,  41,  41,  43,
+             36,  35,  35,  35,  34,  34,  35,  36,  36,  37,  38,  38,  40,  42,  42,  45,  48,
+             36,  35,  35,  35,  34,  34,  35,  36,  36,  37,  38,  38,  40,  42,  42,  45,  48,  48,
+             38,  37,  37,  37,  36,  36,  36,  38,  38,  38,  39,  39,  41,  44,  44,  47,  50,  50,  51,
+             39,  39,  38,  38,  37,  37,  38,  39,  39,  39,  40,  40,  42,  45,  45,  47,  50,  50,  52,  54,
+             39,  39,  38,  38,  37,  37,  38,  39,  39,  39,  40,  40,  42,  45,  45,  47,  50,  50,  52,  54,  54,
+             42,  41,  41,  41,  40,  40,  40,  41,  41,  41,  42,  42,  44,  47,  47,  49,  53,  53,  55,  56,  56,  60,
+             44,  43,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  44,  47,  47,  50,  54,  54,  56,  58,  58,  61,  63,
+             44,  43,  43,  42,  41,  41,  41,  42,  42,  42,  43,  43,  45,  48,  48,  51,  54,  54,  56,  58,  58,  62,  64,  64,
+             47,  46,  45,  45,  44,  44,  44,  44,  45,  45,  45,  45,  47,  50,  50,  53,  56,  56,  58,  60,  60,  64,  66,  66,  69,
+             48,  47,  46,  46,  45,  44,  45,  45,  45,  45,  46,  46,  47,  51,  51,  53,  57,  57,  59,  61,  61,  65,  67,  67,  70,  71,
+             49,  48,  47,  47,  46,  45,  45,  46,  46,  46,  46,  46,  48,  51,  51,  54,  57,  57,  60,  62,  62,  66,  68,  68,  71,  72,  73,
+             53,  51,  51,  51,  49,  49,  49,  49,  49,  49,  49,  49,  51,  54,  54,  57,  59,  59,  62,  64,  64,  69,  71,  71,  74,  75,  77,  81,
+             54,  52,  51,  51,  50,  49,  49,  50,  50,  49,  49,  49,  51,  54,  54,  57,  60,  60,  63,  65,  65,  69,  71,  72,  75,  76,  77,  81,  82,
+             55,  53,  53,  52,  51,  50,  50,  51,  51,  51,  50,  50,  52,  55,  55,  58,  61,  61,  64,  66,  66,  70,  72,  73,  76,  77,  78,  83,  83,  85,
+             59,  57,  56,  56,  54,  54,  54,  54,  54,  54,  53,  53,  55,  58,  58,  61,  64,  64,  67,  69,  69,  73,  75,  76,  79,  80,  81,  86,  87,  88,  92,
+             59,  57,  56,  56,  54,  54,  54,  54,  54,  54,  53,  53,  55,  58,  58,  61,  64,  64,  67,  69,  69,  73,  75,  76,  79,  80,  81,  86,  87,  88,  92,  92,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             30,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  32,
+             31,  31,  32,  32,  32,  32,  33,
+             33,  34,  34,  34,  35,  35,  35,  38,
+             33,  34,  34,  34,  35,  35,  36,  38,  39,
+             34,  35,  35,  36,  36,  36,  37,  40,  40,  41,
+             36,  38,  38,  38,  39,  40,  40,  43,  43,  44,  47,
+             36,  38,  38,  38,  39,  40,  40,  43,  43,  44,  47,  47,
+             38,  39,  40,  40,  41,  41,  41,  43,  44,  45,  47,  47,  47,
+             41,  42,  42,  42,  42,  42,  43,  44,  45,  45,  47,  47,  48,  48,
+             41,  42,  42,  42,  42,  42,  43,  44,  45,  45,  47,  47,  48,  48,  48,
+             45,  45,  45,  45,  44,  44,  44,  46,  46,  46,  47,  47,  48,  49,  49,  50,
+             49,  48,  47,  47,  46,  46,  46,  47,  47,  47,  48,  48,  49,  50,  50,  51,  53,
+             49,  48,  47,  47,  46,  46,  46,  47,  47,  47,  48,  48,  49,  50,  50,  51,  53,  53,
+             49,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  48,  50,  50,  51,  53,  53,  53,
+             48,  47,  47,  47,  46,  45,  45,  46,  46,  46,  46,  46,  48,  49,  49,  51,  53,  53,  54,  54,
+             48,  47,  47,  47,  46,  45,  45,  46,  46,  46,  46,  46,  48,  49,  49,  51,  53,  53,  54,  54,  54,
+             49,  47,  47,  47,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  51,  53,  53,  54,  55,  55,  57,
+             49,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  51,  53,  53,  55,  55,  55,  57,  58,
+             49,  47,  47,  47,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  51,  53,  53,  55,  56,  56,  58,  58,  59,
+             50,  49,  48,  48,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  52,  54,  54,  55,  56,  56,  58,  59,  59,  61,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  52,  54,  54,  55,  56,  56,  59,  60,  60,  61,  61,
+             51,  49,  48,  48,  47,  46,  46,  47,  47,  46,  46,  46,  47,  50,  50,  52,  54,  54,  55,  56,  56,  59,  60,  60,  61,  62,  62,
+             52,  50,  49,  49,  48,  47,  47,  47,  47,  47,  46,  46,  48,  50,  50,  52,  54,  54,  56,  57,  57,  60,  61,  61,  63,  63,  64,  66,
+             52,  50,  50,  49,  48,  47,  47,  47,  47,  47,  47,  47,  48,  50,  50,  52,  54,  54,  56,  57,  57,  60,  61,  61,  63,  63,  64,  66,  66,
+             53,  51,  50,  50,  48,  48,  48,  48,  48,  48,  47,  47,  48,  51,  51,  52,  54,  54,  56,  58,  58,  60,  61,  62,  63,  64,  64,  67,  67,  68,
+             54,  53,  52,  52,  50,  49,  49,  49,  49,  49,  48,  48,  49,  52,  52,  53,  55,  55,  57,  58,  58,  61,  62,  63,  64,  65,  66,  68,  68,  69,  71,
+             54,  53,  52,  52,  50,  49,  49,  49,  49,  49,  48,  48,  49,  52,  52,  53,  55,  55,  57,  58,  58,  61,  62,  63,  64,  65,  66,  68,  68,  69,  71,  71,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  32,
+             31,  31,  32,  32,
+             31,  31,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  35,  35,  35,
+             32,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  35,  36,  36,  36,
+             34,  34,  34,  34,  34,  33,  33,  34,  35,  35,  35,  36,  37,  37,  38,  39,
+             34,  34,  34,  34,  34,  33,  33,  34,  35,  35,  35,  36,  37,  37,  38,  39,  39,
+             34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  36,  37,  37,  38,  40,  40,  41,
+             35,  35,  35,  35,  34,  34,  34,  34,  36,  36,  36,  37,  38,  38,  39,  42,  42,  43,  46,
+             36,  35,  35,  35,  35,  34,  34,  35,  36,  36,  36,  37,  38,  38,  40,  42,  42,  44,  47,  48,
+             36,  35,  35,  35,  35,  34,  34,  35,  36,  36,  36,  37,  38,  38,  40,  42,  42,  44,  47,  48,  48,
+             38,  37,  37,  37,  36,  36,  36,  36,  37,  38,  38,  39,  39,  39,  41,  44,  44,  45,  48,  50,  50,  51,
+             39,  39,  38,  38,  38,  37,  37,  38,  39,  39,  39,  40,  40,  40,  42,  45,  45,  46,  49,  50,  50,  52,  54,
+             39,  39,  38,  38,  38,  37,  37,  38,  39,  39,  39,  40,  40,  40,  42,  45,  45,  46,  49,  50,  50,  52,  54,  54,
+             41,  40,  40,  40,  39,  38,  38,  39,  40,  40,  40,  41,  41,  41,  43,  46,  46,  47,  50,  52,  52,  54,  55,  55,  57,
+             44,  43,  42,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  42,  44,  47,  47,  49,  52,  54,  54,  56,  58,  58,  60,  63,
+             44,  43,  42,  42,  42,  41,  41,  41,  42,  42,  42,  42,  42,  42,  44,  47,  47,  49,  52,  54,  54,  56,  58,  58,  60,  63,  63,
+             45,  44,  43,  43,  42,  41,  41,  42,  42,  42,  42,  43,  43,  43,  45,  48,  48,  49,  53,  54,  54,  57,  58,  58,  60,  64,  64,  65,
+             47,  46,  45,  45,  45,  44,  44,  44,  44,  45,  45,  45,  45,  45,  47,  50,  50,  51,  55,  56,  56,  58,  60,  60,  62,  66,  66,  67,  69,
+             48,  47,  46,  46,  45,  44,  44,  45,  45,  45,  45,  45,  46,  46,  47,  51,  51,  52,  55,  57,  57,  59,  61,  61,  63,  67,  67,  68,  70,  71,
+             48,  47,  46,  46,  45,  44,  44,  45,  45,  45,  45,  45,  46,  46,  47,  51,  51,  52,  55,  57,  57,  59,  61,  61,  63,  67,  67,  68,  70,  71,  71,
+             51,  50,  49,  49,  48,  47,  47,  47,  48,  48,  48,  48,  48,  48,  50,  53,  53,  54,  57,  58,  58,  61,  63,  63,  66,  69,  69,  70,  73,  74,  74,  77,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  32,
+             30,  31,  31,  31,  31,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  33,
+             33,  33,  34,  34,  34,  34,  34,  35,  37,
+             33,  34,  34,  34,  35,  35,  35,  36,  38,  39,
+             33,  34,  34,  34,  35,  35,  35,  36,  38,  39,  39,
+             35,  36,  37,  37,  37,  38,  38,  38,  41,  41,  41,  44,
+             36,  37,  38,  38,  39,  40,  40,  40,  42,  43,  43,  46,  47,
+             36,  37,  38,  38,  39,  40,  40,  40,  42,  43,  43,  46,  47,  47,
+             38,  39,  40,  40,  40,  41,  41,  41,  43,  44,  44,  46,  47,  47,  47,
+             41,  42,  42,  42,  42,  42,  42,  43,  44,  45,  45,  46,  47,  47,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  43,  44,  45,  45,  46,  47,  47,  48,  48,  48,
+             43,  43,  43,  43,  43,  43,  43,  43,  45,  45,  45,  46,  47,  47,  48,  49,  49,  49,
+             47,  47,  46,  46,  46,  45,  45,  46,  46,  47,  47,  47,  47,  47,  48,  50,  50,  50,  52,
+             49,  48,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  48,  48,  49,  50,  50,  51,  52,  53,
+             49,  48,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  48,  48,  49,  50,  50,  51,  52,  53,  53,
+             49,  48,  47,  47,  46,  46,  46,  46,  46,  46,  46,  47,  47,  47,  48,  50,  50,  50,  52,  53,  53,  53,
+             48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  48,  49,  49,  50,  52,  53,  53,  54,  54,
+             48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  48,  49,  49,  50,  52,  53,  53,  54,  54,  54,
+             49,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  47,  49,  49,  50,  52,  53,  53,  54,  55,  55,  55,
+             49,  47,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  50,  52,  53,  53,  55,  55,  55,  57,  58,
+             49,  47,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  50,  52,  53,  53,  55,  55,  55,  57,  58,  58,
+             49,  48,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  50,  52,  53,  53,  55,  56,  56,  57,  59,  59,  59,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  50,  53,  54,  54,  55,  56,  56,  57,  59,  59,  60,  61,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  50,  53,  54,  54,  55,  56,  56,  58,  60,  60,  60,  61,  61,
+             50,  49,  48,  48,  47,  46,  46,  46,  46,  46,  46,  46,  46,  46,  47,  50,  50,  50,  53,  54,  54,  55,  56,  56,  58,  60,  60,  60,  61,  61,  61,
+             51,  50,  49,  49,  48,  47,  47,  47,  47,  47,  47,  47,  46,  46,  48,  50,  50,  51,  53,  54,  54,  56,  57,  57,  58,  60,  60,  61,  62,  63,  63,  64,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  32,
+             31,  31,  32,  32,
+             31,  31,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  34,  34,  34,  34,  35,  35,  35,
+             33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  35,  36,  36,  36,  37,
+             34,  34,  34,  34,  34,  34,  33,  33,  33,  34,  35,  35,  35,  36,  37,  37,  37,  38,  39,
+             34,  34,  34,  34,  34,  34,  33,  33,  33,  34,  35,  35,  35,  36,  37,  37,  37,  38,  39,  39,
+             34,  34,  34,  34,  34,  34,  33,  33,  33,  34,  35,  35,  35,  36,  37,  37,  37,  38,  39,  39,  39,
+             35,  34,  34,  34,  34,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  37,  37,  39,  41,  41,  41,  43,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,
+             36,  35,  35,  35,  35,  35,  34,  34,  34,  35,  36,  36,  36,  37,  38,  38,  38,  40,  42,  42,  42,  45,  48,  48,  48,
+             37,  37,  37,  37,  37,  36,  36,  36,  36,  37,  38,  38,  38,  38,  39,  39,  39,  41,  44,  44,  44,  46,  49,  49,  49,  51,
+             39,  39,  38,  38,  38,  38,  37,  37,  37,  38,  39,  39,  39,  40,  40,  40,  40,  42,  45,  45,  45,  47,  50,  50,  50,  52,  54,
+             39,  39,  38,  38,  38,  38,  37,  37,  37,  38,  39,  39,  39,  40,  40,  40,  40,  42,  45,  45,  45,  47,  50,  50,  50,  52,  54,  54,
+             39,  39,  38,  38,  38,  38,  37,  37,  37,  38,  39,  39,  39,  40,  40,  40,  40,  42,  45,  45,  45,  47,  50,  50,  50,  52,  54,  54,  54,
+             41,  41,  40,  40,  40,  39,  39,  39,  39,  40,  40,  40,  40,  41,  41,  41,  41,  44,  46,  46,  46,  49,  52,  52,  52,  54,  56,  56,  56,  58,
+             44,  43,  42,  42,  42,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  42,  45,  47,  47,  47,  50,  54,  54,  54,  56,  58,  58,  58,  60,  63,
+             44,  43,  42,  42,  42,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  42,  42,  45,  47,  47,  47,  50,  54,  54,  54,  56,  58,  58,  58,  60,  63,  63,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  31,  32,
+             30,  31,  31,  31,  31,  31,  32,  32,
+             30,  31,  31,  31,  31,  31,  32,  32,  32,
+             32,  32,  33,  33,  33,  33,  33,  33,  33,  35,
+             33,  34,  34,  34,  34,  35,  35,  35,  35,  37,  39,
+             33,  34,  34,  34,  34,  35,  35,  35,  35,  37,  39,  39,
+             33,  34,  34,  34,  34,  35,  35,  35,  35,  37,  39,  39,  39,
+             35,  35,  36,  36,  36,  37,  37,  37,  37,  39,  41,  41,  41,  43,
+             36,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,
+             36,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,
+             36,  37,  38,  38,  38,  39,  40,  40,  40,  41,  43,  43,  43,  45,  47,  47,  47,
+             39,  39,  40,  40,  40,  41,  41,  41,  41,  42,  44,  44,  44,  45,  47,  47,  47,  47,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  43,  45,  45,  45,  46,  47,  47,  47,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  43,  45,  45,  45,  46,  47,  47,  47,  48,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  43,  45,  45,  45,  46,  47,  47,  47,  48,  48,  48,  48,
+             45,  45,  45,  45,  45,  44,  44,  44,  44,  45,  46,  46,  46,  47,  47,  47,  47,  48,  49,  49,  49,  50,
+             49,  48,  47,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  51,  53,
+             49,  48,  47,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  51,  53,  53,
+             49,  48,  47,  47,  47,  47,  46,  46,  46,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  51,  53,  53,  53,
+             49,  48,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  48,  50,  50,  50,  51,  53,  53,  53,  53,
+             48,  48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  46,  46,  48,  49,  49,  49,  51,  53,  53,  53,  53,  54,
+             48,  48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  46,  46,  48,  49,  49,  49,  51,  53,  53,  53,  53,  54,  54,
+             48,  48,  47,  47,  47,  46,  45,  45,  45,  46,  46,  46,  46,  46,  46,  46,  46,  48,  49,  49,  49,  51,  53,  53,  53,  53,  54,  54,  54,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  46,  46,  46,  46,  46,  46,  46,  47,  49,  49,  49,  51,  53,  53,  53,  54,  55,  55,  55,  56,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  49,  51,  53,  53,  53,  54,  55,  55,  55,  57,  58,
+             49,  48,  47,  47,  47,  46,  45,  45,  45,  45,  45,  45,  45,  45,  45,  45,  45,  47,  49,  49,  49,  51,  53,  53,  53,  54,  55,  55,  55,  57,  58,  58,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  32,
+             31,  31,  31,  32,  32,
+             31,  31,  31,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  35,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  34,  34,  34,  34,  35,  35,  35,  35,
+             32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  35,  35,  36,  36,  36,  36,
+             33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  36,  36,  36,  36,  37,  38,
+             34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  38,  39,  39,
+             34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  38,  39,  39,  39,
+             34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  34,  35,  35,  35,  35,  36,  36,  37,  37,  37,  38,  39,  39,  39,  39,
+             34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  37,  37,  37,  38,  40,  41,  41,  41,  42,
+             35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  37,  38,  38,  38,  39,  41,  42,  42,  42,  44,  46,
+             36,  35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  42,  42,  42,  42,  45,  47,  48,
+             36,  35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  42,  42,  42,  42,  45,  47,  48,  48,
+             36,  35,  35,  35,  35,  35,  35,  34,  34,  34,  34,  35,  36,  36,  36,  36,  37,  38,  38,  38,  38,  40,  42,  42,  42,  42,  45,  47,  48,  48,  48,
+             37,  37,  36,  36,  36,  36,  36,  35,  35,  35,  35,  36,  37,  37,  37,  37,  38,  39,  39,  39,  39,  41,  42,  43,  43,  43,  45,  48,  49,  49,  49,  50,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  31,  31,  31,  31,  31,  31,  32,
+             30,  30,  31,  31,  31,  31,  31,  31,  32,  32,
+             30,  30,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  34,
+             33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  36,  37,
+             33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  37,  38,  39,
+             33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  37,  38,  39,  39,
+             33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  37,  38,  39,  39,  39,
+             34,  35,  36,  36,  36,  36,  36,  37,  37,  37,  37,  38,  40,  40,  40,  40,  42,
+             36,  36,  37,  37,  37,  37,  38,  38,  39,  39,  39,  40,  41,  42,  42,  42,  44,  46,
+             36,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  45,  46,  47,
+             36,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  45,  46,  47,  47,
+             36,  37,  38,  38,  38,  38,  39,  39,  40,  40,  40,  41,  42,  43,  43,  43,  45,  46,  47,  47,  47,
+             38,  39,  39,  40,  40,  40,  40,  41,  41,  41,  41,  42,  43,  44,  44,  44,  45,  47,  47,  47,  47,  47,
+             40,  41,  41,  41,  41,  41,  41,  42,  42,  42,  42,  43,  44,  44,  44,  44,  45,  47,  47,  47,  47,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  44,  45,  45,  45,  46,  47,  47,  47,  47,  48,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  44,  45,  45,  45,  46,  47,  47,  47,  47,  48,  48,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  44,  45,  45,  45,  46,  47,  47,  47,  47,  48,  48,  48,  48,  48,
+             44,  44,  44,  44,  44,  44,  44,  44,  44,  44,  44,  44,  45,  46,  46,  46,  46,  47,  47,  47,  47,  48,  49,  49,  49,  49,  50,
+             47,  47,  46,  46,  46,  46,  46,  46,  45,  45,  45,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,
+             49,  48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  50,  51,  52,  53,
+             49,  48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  50,  51,  52,  53,  53,
+             49,  48,  48,  47,  47,  47,  47,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  48,  48,  48,  49,  50,  50,  50,  50,  51,  52,  53,  53,  53,
+             49,  48,  47,  47,  47,  47,  47,  46,  46,  46,  46,  46,  46,  47,  47,  47,  47,  47,  47,  47,  47,  48,  49,  50,  50,  50,  51,  52,  53,  53,  53,  53,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  32,  32,
+             31,  31,  31,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,
+             32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  35,  35,  35,  36,  36,  36,  36,  36,
+             33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  36,  36,  36,  37,  38,
+             34,  34,  34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  39,
+             34,  34,  34,  34,  34,  34,  34,  34,  34,  33,  33,  33,  33,  33,  34,  34,  35,  35,  35,  35,  35,  35,  36,  36,  37,  37,  37,  37,  38,  38,  39,  39,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  32,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,
+             32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  35,
+             33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  36,  37,
+             33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  37,  38,  39,
+             33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  37,  38,  39,  39,
+             33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  37,  38,  39,  39,  39,
+             33,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  36,  37,  38,  39,  39,  39,  39,
+             34,  35,  35,  35,  35,  35,  35,  36,  36,  36,  36,  36,  36,  36,  37,  38,  39,  40,  40,  40,  40,  41,
+             35,  36,  36,  36,  37,  37,  37,  37,  37,  37,  38,  38,  38,  38,  38,  39,  41,  41,  41,  41,  41,  42,  44,
+             36,  37,  37,  38,  38,  38,  38,  38,  38,  39,  39,  39,  39,  39,  40,  41,  42,  43,  43,  43,  43,  44,  45,  46,
+             36,  37,  37,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  46,  47,  47,
+             36,  37,  37,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  46,  47,  47,  47,
+             36,  37,  37,  38,  38,  38,  38,  38,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  44,  46,  47,  47,  47,  47,
+             37,  37,  38,  38,  39,  39,  39,  39,  39,  40,  40,  40,  40,  40,  41,  42,  43,  43,  43,  43,  43,  44,  46,  47,  47,  47,  47,  47,
+             38,  39,  39,  40,  40,  40,  40,  40,  40,  40,  41,  41,  41,  41,  41,  42,  43,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,  47,  47,
+             40,  40,  40,  41,  41,  41,  41,  41,  41,  41,  42,  42,  42,  42,  42,  43,  44,  44,  44,  44,  44,  45,  46,  47,  47,  47,  47,  47,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  43,  44,  45,  45,  45,  45,  45,  46,  47,  47,  47,  47,  47,  48,  48,  48,
+             41,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  42,  43,  43,  44,  45,  45,  45,  45,  45,  46,  47,  47,  47,  47,  47,  48,  48,  48,  48,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  32,
+             31,  31,  31,  31,  31,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,
+             31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,
+             32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,
+             31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,
+             32,  32,  32,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  35,  36,
+             33,  33,  33,  33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  36,  37,  37,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,  39,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,  39,  39,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,  39,  39,  39,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,  39,  39,  39,  39,
+             33,  33,  34,  34,  34,  34,  34,  34,  34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  37,  37,  38,  39,  39,  39,  39,  39,  39,
+             34,  34,  34,  35,  35,  35,  35,  35,  35,  35,  35,  35,  35,  36,  36,  36,  36,  36,  36,  36,  36,  37,  37,  38,  39,  40,  40,  40,  40,  40,  40,  40,
+        },
+    }, {
+        {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+             31,  31,  31,  31,  31,  31,  31,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32,
+        }, {
+             32,
+             31,  31,
+             31,  31,  31,
+             31,  31,  31,  31,
+             31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,
+             30,  30,  30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,
+             30,  30,  30,  30,  30,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  31,  32,  32,
+        },
+    },
+};
+
+const uint8_t *av1_qm_tbl[16][2][N_RECT_TX_SIZES];
+static uint8_t pb_32x32[32 * 32];
+static uint8_t qm_tbl_4x4[15][2][16];
+static uint8_t qm_tbl_4x8[15][2][32];
+static uint8_t qm_tbl_4x16[15][2][64];
+static uint8_t qm_tbl_8x8[15][2][64];
+static uint8_t qm_tbl_8x16[15][2][128];
+static uint8_t qm_tbl_8x32[15][2][256];
+static uint8_t qm_tbl_16x16[15][2][256];
+static uint8_t qm_tbl_16x32[15][2][512];
+static uint8_t qm_tbl_32x32[15][2][1024];
+
+static void subsample(uint8_t *const dst, const uint8_t *const src,
+                      const int sz, const int step)
+{
+    for (int y = 0; y < sz; y++)
+        for (int x = 0; x < sz; x++)
+            dst[y * sz + x] = src[y * sz * step * step + x * step];
+}
+
+static void transpose(uint8_t *const dst, const uint8_t *const src,
+                      const int w, const int h)
+{
+    for (int y = 0, y_off = 0; y < h; y++, y_off += w)
+        for (int x = 0, x_off = 0; x < w; x++, x_off += h)
+            dst[x_off + y] = src[y_off + x];
+}
+
+static void untriangle(uint8_t *dst, const uint8_t *src, const int sz) {
+    for (int y = 0; y < sz; y++) {
+        memcpy(dst, src, y + 1);
+        const uint8_t *src_ptr = &src[y];
+        for (int x = y + 1; x < sz; x++) {
+            src_ptr += x;
+            dst[x] = *src_ptr;
+        }
+        dst += sz;
+        src += y + 1;
+    }
+}
+
+void av1_init_qm_tables(void) {
+    static int done = 0;
+    if (done) return;
+    done = 1;
+
+    for (int i = 0; i < 15; i++)
+        for (int j = 0; j < 2; j++) {
+            // note that the w/h in the assignment is inverted, this is on purpose
+            // because we store coefficients transposed
+            av1_qm_tbl[i][j][RTX_4X8  ] = qm_tbl_8x4[i][j];
+            av1_qm_tbl[i][j][RTX_8X4  ] = qm_tbl_4x8[i][j];
+            transpose(qm_tbl_4x8[i][j], qm_tbl_8x4[i][j], 8, 4);
+            av1_qm_tbl[i][j][RTX_4X16 ] = qm_tbl_16x4[i][j];
+            av1_qm_tbl[i][j][RTX_16X4 ] = qm_tbl_4x16[i][j];
+            transpose(qm_tbl_4x16[i][j], qm_tbl_16x4[i][j], 16, 4);
+            av1_qm_tbl[i][j][RTX_8X16 ] = qm_tbl_16x8[i][j];
+            av1_qm_tbl[i][j][RTX_16X8 ] = qm_tbl_8x16[i][j];
+            transpose(qm_tbl_8x16[i][j], qm_tbl_16x8[i][j], 16, 8);
+            av1_qm_tbl[i][j][RTX_8X32 ] = qm_tbl_32x8[i][j];
+            av1_qm_tbl[i][j][RTX_32X8 ] = qm_tbl_8x32[i][j];
+            transpose(qm_tbl_8x32[i][j], qm_tbl_32x8[i][j], 32, 8);
+            av1_qm_tbl[i][j][RTX_16X32] = qm_tbl_32x16[i][j];
+            av1_qm_tbl[i][j][RTX_32X16] = qm_tbl_16x32[i][j];
+            transpose(qm_tbl_16x32[i][j], qm_tbl_32x16[i][j], 32, 16);
+
+            av1_qm_tbl[i][j][ TX_4X4  ] = qm_tbl_4x4[i][j];
+            av1_qm_tbl[i][j][ TX_8X8  ] = qm_tbl_8x8[i][j];
+            av1_qm_tbl[i][j][ TX_16X16] = qm_tbl_16x16[i][j];
+            av1_qm_tbl[i][j][ TX_32X32] = qm_tbl_32x32[i][j];
+            untriangle(qm_tbl_4x4[i][j], qm_tbl_4x4_t[i][j], 4);
+            untriangle(qm_tbl_8x8[i][j], qm_tbl_8x8_t[i][j], 8);
+            untriangle(qm_tbl_32x32[i][j], qm_tbl_32x32_t[i][j], 32);
+            subsample(qm_tbl_16x16[i][j], qm_tbl_32x32[i][j], 16, 2);
+
+            av1_qm_tbl[i][j][ TX_64X64] = av1_qm_tbl[i][j][ TX_32X32];
+            av1_qm_tbl[i][j][RTX_64X32] = av1_qm_tbl[i][j][ TX_32X32];
+            av1_qm_tbl[i][j][RTX_64X16] = av1_qm_tbl[i][j][RTX_32X16];
+            av1_qm_tbl[i][j][RTX_32X64] = av1_qm_tbl[i][j][ TX_32X32];
+            av1_qm_tbl[i][j][RTX_16X64] = av1_qm_tbl[i][j][RTX_16X32];
+        }
+
+    memset(pb_32x32, 32, sizeof(pb_32x32));
+    for (int j = 0; j < 2; j++)
+        for (int k = 0; k < N_RECT_TX_SIZES; k++)
+            av1_qm_tbl[15][j][k] = pb_32x32;
+}
--- /dev/null
+++ b/src/qm.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_QM_H__
+#define __DAV1D_SRC_QM_H__
+
+#include "src/levels.h"
+
+extern const uint8_t *av1_qm_tbl[16][2][N_RECT_TX_SIZES];
+
+void av1_init_qm_tables(void);
+
+#endif /* __DAV1D_SRC_QM_H__ */
--- /dev/null
+++ b/src/recon.c
@@ -1,0 +1,1503 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <string.h>
+#include <stdio.h>
+
+#include "common/attributes.h"
+#include "common/bitdepth.h"
+#include "common/dump.h"
+#include "common/intops.h"
+#include "common/mem.h"
+
+#include "src/cdef_apply.h"
+#include "src/ipred_prepare.h"
+#include "src/lf_apply.h"
+#include "src/lr_apply.h"
+#include "src/recon.h"
+#include "src/scan.h"
+#include "src/tables.h"
+#include "src/wedge.h"
+
+static unsigned read_golomb(MsacContext *const msac) {
+    int len = 0;
+    unsigned val = 1;
+
+    while (!msac_decode_bool(msac, 128 << 7) && len < 32) len++;
+    while (len--) val = (val << 1) | msac_decode_bool(msac, 128 << 7);
+
+    return val - 1;
+}
+
+static int decode_coefs(Dav1dTileContext *const t,
+                        uint8_t *const a, uint8_t *const l,
+                        const enum RectTxfmSize tx, const enum BlockSize bs,
+                        const Av1Block *const b, const int intra,
+                        const int plane, coef *cf,
+                        enum TxfmType *const txtp, uint8_t *res_ctx)
+{
+    Dav1dTileState *const ts = t->ts;
+    const int chroma = !!plane;
+    const Dav1dFrameContext *const f = t->f;
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[tx];
+    const int dbg = DEBUG_BLOCK_INFO && plane && 0;
+
+    if (dbg) printf("Start: r=%d\n", ts->msac.rng);
+
+    // does this block have any non-zero coefficients
+    const int sctx = get_coef_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.p.layout);
+    const int all_skip =
+        msac_decode_bool_adapt(&ts->msac, ts->cdf.coef.skip[t_dim->ctx][sctx]);
+    if (dbg)
+    printf("Post-non-zero[%d][%d][%d]: r=%d\n",
+           t_dim->ctx, sctx, all_skip, ts->msac.rng);
+    if (all_skip) {
+        *res_ctx = 0x40;
+        *txtp = f->frame_hdr.segmentation.lossless[b->seg_id] ? WHT_WHT :
+                                                                DCT_DCT;
+        return -1;
+    }
+
+    // transform type (chroma: derived, luma: explicitly coded)
+    if (chroma) {
+        if (intra) {
+            *txtp = get_uv_intra_txtp(b->uv_mode, tx, &f->frame_hdr, b->seg_id);
+        } else {
+            const enum TxfmType y_txtp = *txtp;
+            *txtp = get_uv_inter_txtp(t_dim, y_txtp, &f->frame_hdr, b->seg_id);
+        }
+    } else {
+        const enum TxfmTypeSet set = get_ext_txtp_set(tx, !intra,
+                                                      &f->frame_hdr, b->seg_id);
+        const unsigned set_cnt = av1_tx_type_count[set];
+        unsigned idx;
+        if (set_cnt == 1) {
+            idx = 0;
+        } else {
+            const int set_idx = av1_tx_type_set_index[!intra][set];
+            const enum IntraPredMode y_mode_nofilt = b->y_mode == FILTER_PRED ?
+                av1_filter_mode_to_y_mode[b->y_angle] : b->y_mode;
+            uint16_t *const txtp_cdf = intra ?
+                       ts->cdf.m.txtp_intra[set_idx][t_dim->min][y_mode_nofilt] :
+                       ts->cdf.m.txtp_inter[set_idx][t_dim->min];
+            idx = msac_decode_symbol_adapt(&ts->msac, txtp_cdf, set_cnt);
+            if (dbg)
+            printf("Post-txtp[%d->%d][%d->%d][%d][%d->%d]: r=%d\n",
+                   set, set_idx, tx, t_dim->min, b->intra ? y_mode_nofilt : -1,
+                   idx, av1_tx_types_per_set[set][idx], ts->msac.rng);
+        }
+        *txtp = av1_tx_types_per_set[set][idx];
+    }
+
+    // find end-of-block (eob)
+    int eob_bin;
+    const int tx2dszctx = imin(t_dim->lw, TX_32X32) + imin(t_dim->lh, TX_32X32);
+    const enum TxClass tx_class = av1_tx_type_class[*txtp];
+    const int is_1d = tx_class != TX_CLASS_2D;
+    switch (tx2dszctx) {
+#define case_sz(sz, bin) \
+    case sz: { \
+        uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma][is_1d]; \
+        eob_bin = msac_decode_symbol_adapt(&ts->msac, eob_bin_cdf, 5 + sz); \
+        break; \
+    }
+    case_sz(0,   16);
+    case_sz(1,   32);
+    case_sz(2,   64);
+    case_sz(3,  128);
+    case_sz(4,  256);
+    case_sz(5,  512);
+    case_sz(6, 1024);
+#undef case_sz
+    }
+    if (dbg)
+    printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
+           16 << tx2dszctx, chroma, is_1d, eob_bin, ts->msac.rng);
+    int eob;
+    if (eob_bin > 1) {
+        eob = 1 << (eob_bin - 1);
+        uint16_t *const eob_hi_bit_cdf =
+            ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
+        const int eob_hi_bit = msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
+        if (dbg)
+        printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
+               t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
+        unsigned mask = eob >> 1;
+        if (eob_hi_bit) eob |= mask;
+        for (mask >>= 1; mask; mask >>= 1) {
+            const int eob_bit = msac_decode_bool(&ts->msac, 128 << 7);
+            if (eob_bit) eob |= mask;
+        }
+        if (dbg)
+        printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
+    } else {
+        eob = eob_bin;
+    }
+
+    // base tokens
+    uint16_t (*const br_cdf)[5] =
+        ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
+    const int16_t *const scan = av1_scans[tx][tx_class];
+    uint8_t levels[36 * 36];
+    ptrdiff_t stride = 4 * (imin(t_dim->h, 8) + 1);
+    memset(levels, 0, stride * 4 * (imin(t_dim->w, 8) + 1));
+    const int shift = 2 + imin(t_dim->lh, 3), mask = 4 * imin(t_dim->h, 8) - 1;
+    unsigned cul_level = 0;
+    for (int i = eob, is_last = 1; i >= 0; i--, is_last = 0) {
+        const int rc = scan[i], x = rc >> shift, y = rc & mask;
+
+        // lo tok
+        const int ctx = get_coef_nz_ctx(levels, i, rc, is_last, tx, tx_class);
+        uint16_t *const lo_cdf = is_last ?
+            ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma][ctx] :
+            ts->cdf.coef.base_tok[t_dim->ctx][chroma][ctx];
+        int tok = msac_decode_symbol_adapt(&ts->msac, lo_cdf,
+                                           4 - is_last) + is_last;
+        if (dbg)
+        printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n",
+               t_dim->ctx, chroma, ctx, i, rc, tok, ts->msac.rng);
+        if (!tok) continue;
+
+        // hi tok
+        if (tok == 3) {
+            const int br_ctx = get_br_ctx(levels, rc, tx, tx_class);
+            do {
+                const int tok_br =
+                    msac_decode_symbol_adapt(&ts->msac, br_cdf[br_ctx], 4);
+                if (dbg)
+                printf("Post-hi_tok[%d][%d][%d][%d=%d=%d->%d]: r=%d\n",
+                       imin(t_dim->ctx, 3), chroma, br_ctx,
+                       i, rc, tok_br, tok, ts->msac.rng);
+                tok += tok_br;
+                if (tok_br < 3) break;
+            } while (tok < 15);
+        }
+
+        levels[x * stride + y] = cf[rc] = tok;
+    }
+
+    // residual and sign
+    int dc_sign = 1;
+    const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
+    const uint8_t *const qm_tbl = f->qm[is_1d || *txtp == IDTX][tx][plane];
+    const int dq_shift = imax(0, t_dim->ctx - 2);
+    for (int i = 0; i <= eob; i++) {
+        const int rc = scan[i];
+        int tok = cf[rc];
+        if (!tok) continue;
+        int dq;
+
+        // sign
+        int sign;
+        if (i == 0) {
+            const int dc_sign_ctx = get_dc_sign_ctx(t_dim, a, l);
+            uint16_t *const dc_sign_cdf =
+                ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
+            sign = msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
+            if (dbg)
+            printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
+                   chroma, dc_sign_ctx, sign, ts->msac.rng);
+            dc_sign = sign ? 0 : 2;
+            dq = (dq_tbl[0] * qm_tbl[0] + 16) >> 5;
+        } else {
+            sign = msac_decode_bool(&ts->msac, 128 << 7);
+            if (dbg)
+            printf("Post-sign[%d=%d=%d]: r=%d\n", i, rc, sign, ts->msac.rng);
+            dq = (dq_tbl[1] * qm_tbl[rc] + 16) >> 5;
+        }
+
+        // residual
+        if (tok == 15) {
+            tok += read_golomb(&ts->msac);
+            if (dbg)
+            printf("Post-residual[%d=%d=%d->%d]: r=%d\n",
+                   i, rc, tok - 15, tok, ts->msac.rng);
+        }
+
+        // dequant
+        cul_level += tok;
+        tok *= dq;
+        tok >>= dq_shift;
+        cf[rc] = sign ? -tok : tok;
+    }
+
+    // context
+    *res_ctx = imin(cul_level, 63) | (dc_sign << 6);
+
+    return eob;
+}
+
+static void read_coef_tree(Dav1dTileContext *const t,
+                           const enum BlockSize bs, const Av1Block *const b,
+                           const enum RectTxfmSize ytx, const int depth,
+                           const uint16_t *const tx_split,
+                           const int x_off, const int y_off, pixel *dst)
+{
+    const Dav1dFrameContext *const f = t->f;
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dDSPContext *const dsp = f->dsp;
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[ytx];
+    const int txw = t_dim->w, txh = t_dim->h;
+
+    if (depth < 2 && tx_split[depth] & (1 << (y_off * 4 + x_off))) {
+        const enum RectTxfmSize sub = t_dim->sub;
+        const TxfmInfo *const sub_t_dim = &av1_txfm_dimensions[sub];
+        const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
+
+        read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
+                       x_off * 2 + 0, y_off * 2 + 0, dst);
+        t->bx += txsw;
+        if (txw >= txh && t->bx < f->bw)
+            read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
+                           y_off * 2 + 0, dst ? &dst[4 * txsw] : NULL);
+        t->bx -= txsw;
+        t->by += txsh;
+        if (txh >= txw && t->by < f->bh) {
+            if (dst)
+                dst += 4 * txsh * PXSTRIDE(f->cur.p.stride[0]);
+            read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
+                           x_off * 2 + 0, y_off * 2 + 1, dst);
+            t->bx += txsw;
+            if (txw >= txh && t->bx < f->bw)
+                read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
+                               y_off * 2 + 1, dst ? &dst[4 * txsw] : NULL);
+            t->bx -= txsw;
+        }
+        t->by -= txsh;
+    } else {
+        const int bx4 = t->bx & 31, by4 = t->by & 31;
+        enum TxfmType txtp;
+        uint8_t cf_ctx;
+        int eob;
+        coef *cf;
+        struct CodedBlockInfo *cbi;
+
+        if (f->frame_thread.pass) {
+            cf = ts->frame_thread.cf;
+            ts->frame_thread.cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
+            cbi = &f->frame_thread.cbi[t->by * f->b4_stride + t->bx];
+        } else {
+            cf = t->cf;
+        }
+        if (f->frame_thread.pass != 2) {
+            eob = decode_coefs(t, &t->a->lcoef[bx4], &t->l.lcoef[by4],
+                               ytx, bs, b, 0, 0, cf, &txtp, &cf_ctx);
+            if (DEBUG_BLOCK_INFO)
+                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
+                       ytx, txtp, eob, ts->msac.rng);
+            memset(&t->a->lcoef[bx4], cf_ctx, imin(txw, f->bw - t->bx));
+            memset(&t->l.lcoef[by4], cf_ctx, imin(txh, f->bh - t->by));
+            for (int y = 0; y < txh; y++)
+                memset(&t->txtp_map[(by4 + y) * 32 + bx4], txtp, txw);
+            if (f->frame_thread.pass == 1) {
+                cbi->eob[0] = eob;
+                cbi->txtp[0] = txtp;
+            }
+        } else {
+            eob = cbi->eob[0];
+            txtp = cbi->txtp[0];
+        }
+        if (!(f->frame_thread.pass & 1)) {
+            assert(dst);
+            if (eob >= 0) {
+                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                    coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
+                dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.p.stride[0], cf, eob);
+                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                    hex_dump(dst, f->cur.p.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
+            }
+        }
+    }
+}
+
+void bytefn(read_coef_blocks)(Dav1dTileContext *const t,
+                              const enum BlockSize bs, const Av1Block *const b)
+{
+    const Dav1dFrameContext *const f = t->f;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int bx4 = t->bx & 31, by4 = t->by & 31;
+    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = b_dim[0], bh4 = b_dim[1];
+    const int cbw4 = (bw4 + 1) >> ss_hor, cbh4 = (bh4 + 1) >> ss_ver;
+    const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
+                           (bw4 > ss_hor || t->bx & 1) &&
+                           (bh4 > ss_ver || t->by & 1);
+
+    if (b->skip) {
+        memset(&t->a->lcoef[bx4], 0x40, bw4);
+        memset(&t->l.lcoef[by4], 0x40, bh4);
+        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
+            memset(&t->a->ccoef[pl][cbx4], 0x40, cbw4);
+            memset(&t->l.ccoef[pl][cby4], 0x40, cbh4);
+        }
+        return;
+    }
+
+    Dav1dTileState *const ts = t->ts;
+    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
+    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
+    assert(f->frame_thread.pass == 1);
+    assert(!b->skip);
+    const TxfmInfo *const uv_t_dim = &av1_txfm_dimensions[b->uvtx];
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[b->intra ? b->tx : b->max_ytx];
+
+    for (int init_y = 0; init_y < h4; init_y += 16) {
+        for (int init_x = 0; init_x < w4; init_x += 16) {
+            const int sub_h4 = imin(h4, 16 + init_y);
+            const int sub_w4 = imin(w4, init_x + 16);
+            int y_off = !!init_y, y, x;
+            for (y = init_y, t->by += init_y; y < sub_h4;
+                 y += t_dim->h, t->by += t_dim->h, y_off++)
+            {
+                struct CodedBlockInfo *const cbi =
+                    &f->frame_thread.cbi[t->by * f->b4_stride];
+                int x_off = !!init_x;
+                for (x = init_x, t->bx += init_x; x < sub_w4;
+                     x += t_dim->w, t->bx += t_dim->w, x_off++)
+                {
+                    if (!b->intra) {
+                        read_coef_tree(t, bs, b, b->max_ytx, 0, b->tx_split,
+                                       x_off, y_off, NULL);
+                    } else {
+                        uint8_t cf_ctx = 0x40;
+                        enum TxfmType txtp;
+                        const int eob = cbi[t->bx].eob[0] =
+                            decode_coefs(t, &t->a->lcoef[bx4 + x],
+                                         &t->l.lcoef[by4 + y], b->tx, bs, b, 1,
+                                         0, ts->frame_thread.cf, &txtp, &cf_ctx);
+                        if (DEBUG_BLOCK_INFO)
+                            printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
+                                   b->tx, txtp, eob, ts->msac.rng);
+                        cbi[t->bx].txtp[0] = txtp;
+                        ts->frame_thread.cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
+                        memset(&t->a->lcoef[bx4 + x], cf_ctx,
+                               imin(t_dim->w, f->bw - t->bx));
+                        memset(&t->l.lcoef[by4 + y], cf_ctx,
+                               imin(t_dim->h, f->bh - t->by));
+                    }
+                }
+                t->bx -= x;
+            }
+            t->by -= y;
+
+            if (!has_chroma) continue;
+
+            const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
+            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
+            for (int pl = 0; pl < 2; pl++) {
+                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
+                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
+                {
+                    struct CodedBlockInfo *const cbi =
+                        &f->frame_thread.cbi[t->by * f->b4_stride];
+                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
+                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
+                    {
+                        uint8_t cf_ctx = 0x40;
+                        enum TxfmType txtp;
+                        if (!b->intra)
+                            txtp = t->txtp_map[(by4 + (y << ss_ver)) * 32 +
+                                                bx4 + (x << ss_hor)];
+                        const int eob = cbi[t->bx].eob[1 + pl] =
+                            decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
+                                         &t->l.ccoef[pl][cby4 + y], b->uvtx, bs,
+                                         b, b->intra, 1 + pl, ts->frame_thread.cf,
+                                         &txtp, &cf_ctx);
+                        if (DEBUG_BLOCK_INFO)
+                            printf("Post-uv-cf-blk[pl=%d,tx=%d,"
+                                   "txtp=%d,eob=%d]: r=%d\n",
+                                   pl, b->uvtx, txtp, eob, ts->msac.rng);
+                        cbi[t->bx].txtp[1 + pl] = txtp;
+                        ts->frame_thread.cf += uv_t_dim->w * uv_t_dim->h * 16;
+                        memset(&t->a->ccoef[pl][cbx4 + x], cf_ctx,
+                               imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor));
+                        memset(&t->l.ccoef[pl][cby4 + y], cf_ctx,
+                               imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver));
+                    }
+                    t->bx -= x << ss_hor;
+                }
+                t->by -= y << ss_ver;
+            }
+        }
+    }
+}
+
+static void emu_edge(pixel *dst, const ptrdiff_t dst_stride,
+                     const pixel *ref, const ptrdiff_t ref_stride,
+                     const int bw, const int bh,
+                     const int iw, const int ih,
+                     const int x, const int y)
+{
+    // find offset in reference of visible block to copy
+    ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) + iclip(x, 0, iw - 1);
+
+    // number of pixels to extend (left, right, top, bottom)
+    const int left_ext = iclip(-x, 0, bw - 1);
+    const int right_ext = iclip(x + bw - iw, 0, bw - 1);
+    assert(left_ext + right_ext < bw);
+    const int top_ext = iclip(-y, 0, bh - 1);
+    const int bottom_ext = iclip(y + bh - ih, 0, bh - 1);
+    assert(top_ext + bottom_ext < bh);
+
+    // copy visible portion first
+    pixel *blk = dst + top_ext * PXSTRIDE(dst_stride);
+    const int center_w = bw - left_ext - right_ext;
+    const int center_h = bh - top_ext - bottom_ext;
+    for (int y = 0; y < center_h; y++) {
+        pixel_copy(blk + left_ext, ref, center_w);
+        // extend left edge for this line
+        if (left_ext)
+            pixel_set(blk, blk[left_ext], left_ext);
+        // extend right edge for this line
+        if (right_ext)
+            pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1],
+                      right_ext);
+        ref += PXSTRIDE(ref_stride);
+        blk += PXSTRIDE(dst_stride);
+    }
+
+    // copy top
+    blk = dst + top_ext * PXSTRIDE(dst_stride);
+    for (int y = 0; y < top_ext; y++) {
+        pixel_copy(dst, blk, bw);
+        dst += PXSTRIDE(dst_stride);
+    }
+
+    // copy bottom
+    dst += center_h * PXSTRIDE(dst_stride);
+    for (int y = 0; y < bottom_ext; y++) {
+        pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw);
+        dst += PXSTRIDE(dst_stride);
+    }
+}
+
+static void mc(Dav1dTileContext *const t,
+               pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride,
+               const int bw4, const int bh4,
+               const int bx, const int by, const int pl,
+               const mv mv, const Dav1dThreadPicture *const refp,
+               const enum Filter2d filter_2d)
+{
+    assert((dst8 != NULL) ^ (dst16 != NULL));
+    const Dav1dFrameContext *const f = t->f;
+    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
+    const int mvx = mv.x, mvy = mv.y;
+    const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
+    const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
+    const int dy = by * v_mul + (mvy >> (3 + ss_ver));
+    ptrdiff_t ref_stride = refp->p.stride[!!pl];
+    const pixel *ref;
+
+    dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
+                              PLANE_TYPE_Y + !!pl);
+    if (dx < 3 || dx + bw4 * h_mul + 4 > f->bw * h_mul ||
+        dy < 3 || dy + bh4 * v_mul + 4 > f->bh * v_mul)
+    {
+        emu_edge(t->emu_edge, 160 * sizeof(pixel), refp->p.data[pl], ref_stride,
+                 bw4 * h_mul + 7, bh4 * v_mul + 7, f->bw * h_mul, f->bh * v_mul,
+                 dx - 3, dy - 3);
+        ref = &t->emu_edge[160 * 3 + 3];
+        ref_stride = 160 * sizeof(pixel);
+    } else {
+        ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
+    }
+
+    if (dst8 != NULL) {
+        f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
+                                 bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+    } else {
+        f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
+                                  bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+    }
+}
+
+static void obmc(Dav1dTileContext *const t,
+                 pixel *const dst, const ptrdiff_t dst_stride,
+                 const uint8_t *const b_dim, const int pl,
+                 const int bx4, const int by4, const int w4, const int h4)
+{
+    assert(!(t->bx & 1) && !(t->by & 1));
+    const Dav1dFrameContext *const f = t->f;
+    const refmvs *const r = &f->mvs[t->by * f->b4_stride + t->bx];
+    pixel *const lap = t->scratch.lap;
+    static const uint8_t obmc_mask_2[2] = { 19,  0 };
+    static const uint8_t obmc_mask_4[4] = { 25, 14,  5,  0 };
+    static const uint8_t obmc_mask_8[8] = { 28, 22, 16, 11,  7,  3,  0,  0 };
+    static const uint8_t obmc_mask_16[16] = { 30, 27, 24, 21, 18, 15, 12, 10,
+                                               8,  6,  4,  3,  0,  0,  0,  0 };
+    static const uint8_t obmc_mask_32[32] = { 31, 29, 28, 26, 24, 23, 21, 20,
+                                              19, 17, 16, 14, 13, 12, 11,  9,
+                                               8,  7,  6,  5,  4,  4,  3,  2,
+                                               0,  0,  0,  0,  0,  0,  0,  0 };
+    static const uint8_t *const obmc_masks[] = {
+        obmc_mask_2, obmc_mask_4, obmc_mask_8, obmc_mask_16, obmc_mask_32
+    };
+    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
+
+    if (t->by > t->ts->tiling.row_start &&
+        (!pl || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
+    {
+        for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
+            // only odd blocks are considered for overlap handling, hence +1
+            const refmvs *const a_r = &r[x - f->b4_stride + 1];
+            const uint8_t *const a_b_dim =
+                av1_block_dimensions[sbtype_to_bs[a_r->sb_type]];
+
+            if (a_r->ref[0] > 0) {
+                mc(t, lap, NULL, 128 * sizeof(pixel),
+                   iclip(a_b_dim[0], 2, b_dim[0]), imin(b_dim[1], 16) >> 1,
+                   t->bx + x, t->by, pl, a_r->mv[0],
+                   &f->refp[a_r->ref[0] - 1],
+                   av1_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
+                f->dsp->mc.blend(&dst[x * h_mul], dst_stride,
+                                 lap, 128 * sizeof(pixel),
+                                 h_mul * iclip(a_b_dim[0], 2, b_dim[0]),
+                                 v_mul * imin(b_dim[1], 16) >> 1,
+                                 obmc_masks[imin(b_dim[3], 4) - ss_ver], 1);
+                i++;
+            }
+            x += imax(a_b_dim[0], 2);
+        }
+    }
+
+    if (t->bx > t->ts->tiling.col_start)
+        for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
+            // only odd blocks are considered for overlap handling, hence +1
+            const refmvs *const l_r = &r[(y + 1) * f->b4_stride - 1];
+            const uint8_t *const l_b_dim =
+                av1_block_dimensions[sbtype_to_bs[l_r->sb_type]];
+
+            if (l_r->ref[0] > 0) {
+                mc(t, lap, NULL, 32 * sizeof(pixel),
+                   imin(b_dim[0], 16) >> 1,
+                   iclip(l_b_dim[1], 2, b_dim[1]),
+                   t->bx, t->by + y, pl, l_r->mv[0],
+                   &f->refp[l_r->ref[0] - 1],
+                   av1_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
+                f->dsp->mc.blend(&dst[y * v_mul * PXSTRIDE(dst_stride)], dst_stride,
+                                 lap, 32 * sizeof(pixel),
+                                 h_mul * imin(b_dim[0], 16) >> 1,
+                                 v_mul * iclip(l_b_dim[1], 2, b_dim[1]),
+                                 obmc_masks[imin(b_dim[2], 4) - ss_hor], 0);
+                i++;
+            }
+            y += imax(l_b_dim[1], 2);
+        }
+}
+
+static void warp_affine(Dav1dTileContext *const t,
+                        pixel *dst8, coef *dst16, const ptrdiff_t dstride,
+                        const uint8_t *const b_dim, const int pl,
+                        const Dav1dThreadPicture *const refp,
+                        const WarpedMotionParams *const wmp)
+{
+    assert((dst8 != NULL) ^ (dst16 != NULL));
+    const Dav1dFrameContext *const f = t->f;
+    const Dav1dDSPContext *const dsp = f->dsp;
+    const int ss_ver = !!pl && f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = !!pl && f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
+    assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
+    const int32_t *const mat = wmp->matrix;
+    const int width = f->bw * h_mul, height = f->bh * v_mul;
+
+    for (int y = 0; y < b_dim[1] * v_mul; y += 8) {
+        for (int x = 0; x < b_dim[0] * h_mul; x += 8) {
+            // calculate transformation relative to center of 8x8 block in
+            // luma pixel units
+            const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
+            const int src_y = t->by * 4 + ((y + 4) << ss_ver);
+            const int mvx = (mat[2] * src_x + mat[3] * src_y + mat[0]) >> ss_hor;
+            const int mvy = (mat[4] * src_x + mat[5] * src_y + mat[1]) >> ss_ver;
+
+            const int dx = (mvx >> 16) - 4;
+            const int mx = ((mvx & 0xffff) - wmp->alpha * 4 -
+                                             wmp->beta  * 7) & ~0x3f;
+            const int dy = (mvy >> 16) - 4;
+            const int my = ((mvy & 0xffff) - wmp->gamma * 4 -
+                                             wmp->delta * 4) & ~0x3f;
+
+            const pixel *ref_ptr;
+            ptrdiff_t ref_stride = refp->p.stride[!!pl];
+
+            dav1d_thread_picture_wait(refp, dy + 4 + 8,
+                                      PLANE_TYPE_Y + !!pl);
+            if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
+                emu_edge(t->emu_edge, 160 * sizeof(pixel), refp->p.data[pl],
+                         ref_stride, 15, 15, width, height, dx - 3, dy - 3);
+                ref_ptr = &t->emu_edge[160 * 3 + 3];
+                ref_stride = 160 * sizeof(pixel);
+            } else {
+                ref_ptr = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
+            }
+            if (dst16 != NULL)
+                dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
+                                 wmp->abcd, mx, my);
+            else
+                dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
+                                wmp->abcd, mx, my);
+        }
+        if (dst8) dst8  += 8 * PXSTRIDE(dstride);
+        else      dst16 += 8 * dstride;
+    }
+}
+
+void bytefn(recon_b_intra)(Dav1dTileContext *const t, const enum BlockSize bs,
+                           const enum EdgeFlags intra_edge_flags,
+                           const Av1Block *const b)
+{
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    const Dav1dDSPContext *const dsp = f->dsp;
+    const int bx4 = t->bx & 31, by4 = t->by & 31;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = b_dim[0], bh4 = b_dim[1];
+    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
+    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
+    const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
+                           (bw4 > ss_hor || t->bx & 1) &&
+                           (bh4 > ss_ver || t->by & 1);
+    const TxfmInfo *const t_dim = &av1_txfm_dimensions[b->tx];
+    const TxfmInfo *const uv_t_dim = &av1_txfm_dimensions[b->uvtx];
+
+    // coefficient coding
+    pixel edge_mem[257], *const edge = &edge_mem[128];
+    const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
+
+    for (int init_y = 0; init_y < h4; init_y += 16) {
+        for (int init_x = 0; init_x < w4; init_x += 16) {
+            if (b->pal_sz[0]) {
+                pixel *dst = ((pixel *) f->cur.p.data[0]) +
+                             4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
+                const uint8_t *pal_idx;
+                if (f->frame_thread.pass) {
+                    pal_idx = ts->frame_thread.pal_idx;
+                    ts->frame_thread.pal_idx += bw4 * bh4 * 16;
+                } else {
+                    pal_idx = t->scratch.pal_idx;
+                }
+                const uint16_t *const pal = f->frame_thread.pass ?
+                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                                        ((t->bx >> 1) + (t->by & 1))][0] : t->pal[0];
+                f->dsp->ipred.pal_pred(dst, f->cur.p.stride[0], pal,
+                                       pal_idx, bw4 * 4, bh4 * 4);
+                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                    hex_dump(dst, PXSTRIDE(f->cur.p.stride[0]),
+                             bw4 * 4, bh4 * 4, "y-pal-pred");
+            }
+
+            const int sm_fl = sm_flag(t->a, bx4) | sm_flag(&t->l, by4);
+            const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
+                              intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
+            const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
+                              intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
+            int y, x;
+            const int sub_h4 = imin(h4, 16 + init_y);
+            const int sub_w4 = imin(w4, init_x + 16);
+            for (y = init_y, t->by += init_y; y < sub_h4;
+                 y += t_dim->h, t->by += t_dim->h)
+            {
+                pixel *dst = ((pixel *) f->cur.p.data[0]) +
+                               4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) +
+                                    t->bx + init_x);
+                for (x = init_x, t->bx += init_x; x < sub_w4;
+                     x += t_dim->w, t->bx += t_dim->w)
+                {
+                    if (b->pal_sz[0]) goto skip_y_pred;
+
+                    int angle = b->y_angle;
+                    const enum EdgeFlags edge_flags =
+                        (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
+                             0 : EDGE_I444_TOP_HAS_RIGHT) |
+                        ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
+                             0 : EDGE_I444_LEFT_HAS_BOTTOM);
+                    const pixel *top_sb_edge = NULL;
+                    if (!(t->by & (f->sb_step - 1))) {
+                        top_sb_edge = f->ipred_edge[0];
+                        const int sby = t->by >> f->sb_shift;
+                        top_sb_edge += f->sb128w * 128 * (sby - 1);
+                    }
+                    const enum IntraPredMode m =
+                        bytefn(prepare_intra_edges)(t->bx,
+                                                    t->bx > ts->tiling.col_start,
+                                                    t->by,
+                                                    t->by > ts->tiling.row_start,
+                                                    ts->tiling.col_end,
+                                                    ts->tiling.row_end,
+                                                    edge_flags, dst,
+                                                    f->cur.p.stride[0], top_sb_edge,
+                                                    b->y_mode, &angle,
+                                                    t_dim->w, t_dim->h, edge);
+                    dsp->ipred.intra_pred[b->tx][m](dst, f->cur.p.stride[0],
+                                                    edge, angle | sm_fl);
+
+                    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
+                        hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
+                                 t_dim->h * 4, 2, "l");
+                        hex_dump(edge, 0, 1, 1, "tl");
+                        hex_dump(edge + 1, t_dim->w * 4,
+                                 t_dim->w * 4, 2, "t");
+                        hex_dump(dst, f->cur.p.stride[0],
+                                 t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
+                    }
+
+                skip_y_pred: {}
+                    if (!b->skip) {
+                        coef *cf;
+                        int eob;
+                        enum TxfmType txtp;
+                        if (f->frame_thread.pass) {
+                            cf = ts->frame_thread.cf;
+                            ts->frame_thread.cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
+                            const struct CodedBlockInfo *const cbi =
+                                &f->frame_thread.cbi[t->by * f->b4_stride + t->bx];
+                            eob = cbi->eob[0];
+                            txtp = cbi->txtp[0];
+                        } else {
+                            uint8_t cf_ctx;
+                            cf = t->cf;
+                            eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
+                                               &t->l.lcoef[by4 + y], b->tx, bs,
+                                               b, 1, 0, cf, &txtp, &cf_ctx);
+                            if (DEBUG_BLOCK_INFO)
+                                printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
+                                       b->tx, txtp, eob, ts->msac.rng);
+                            memset(&t->a->lcoef[bx4 + x], cf_ctx,
+                                   imin(t_dim->w, f->bw - t->bx));
+                            memset(&t->l.lcoef[by4 + y], cf_ctx,
+                                   imin(t_dim->h, f->bh - t->by));
+                        }
+                        if (eob >= 0) {
+                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                coef_dump(cf, imin(t_dim->h, 8) * 4,
+                                          imin(t_dim->w, 8) * 4, 3, "dq");
+                            dsp->itx.itxfm_add[b->tx]
+                                              [txtp](dst,
+                                                     f->cur.p.stride[0],
+                                                     cf, eob);
+                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                hex_dump(dst, f->cur.p.stride[0],
+                                         t_dim->w * 4, t_dim->h * 4, "recon");
+                        }
+                    } else if (!f->frame_thread.pass) {
+                        memset(&t->a->lcoef[bx4 + x], 0x40, t_dim->w);
+                        memset(&t->l.lcoef[by4 + y], 0x40, t_dim->h);
+                    }
+                    dst += 4 * t_dim->w;
+                }
+                t->bx -= x;
+            }
+            t->by -= y;
+
+            if (!has_chroma) continue;
+
+            const ptrdiff_t stride = f->cur.p.stride[1];
+
+            if (b->uv_mode == CFL_PRED) {
+                assert(!init_x && !init_y);
+
+                int16_t *const ac = t->scratch.ac;
+                ALIGN_STK_32(pixel, uv_pred, 2 * 32,);
+                pixel *y_src = ((pixel *) f->cur.p.data[0]) + 4 * (t->bx & ~ss_hor) +
+                                 4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.p.stride[0]);
+                const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
+                                              (t->by >> ss_ver) * PXSTRIDE(stride));
+                pixel *const uv_dst[2] = { ((pixel *) f->cur.p.data[1]) + uv_off,
+                                           ((pixel *) f->cur.p.data[2]) + uv_off };
+                // cfl_uvtx can be different from uvtx in case of lossless
+                const enum RectTxfmSize cfl_uvtx =
+                    av1_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+                const TxfmInfo *const cfl_uv_t_dim =
+                    &av1_txfm_dimensions[cfl_uvtx];
+
+                for (int pl = 0; pl < 2; pl++) {
+                    int angle = 0;
+                    const pixel *top_sb_edge = NULL;
+                    if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
+                        top_sb_edge = f->ipred_edge[pl + 1];
+                        const int sby = t->by >> f->sb_shift;
+                        top_sb_edge += f->sb128w * 128 * (sby - 1);
+                    }
+                    const enum IntraPredMode m =
+                        bytefn(prepare_intra_edges)(t->bx >> ss_hor,
+                                                    (t->bx >> ss_hor) >
+                                                        (ts->tiling.col_start >> ss_hor),
+                                                    t->by >> ss_ver,
+                                                    (t->by >> ss_ver) >
+                                                        (ts->tiling.row_start >> ss_ver),
+                                                    ts->tiling.col_end >> ss_hor,
+                                                    ts->tiling.row_end >> ss_ver,
+                                                    0, uv_dst[pl], stride,
+                                                    top_sb_edge, DC_PRED, &angle,
+                                                    cfl_uv_t_dim->w,
+                                                    cfl_uv_t_dim->h, edge);
+                    dsp->ipred.intra_pred[cfl_uvtx][m](&uv_pred[32 * pl],
+                                                       0, edge, 0);
+                }
+                const int furthest_r =
+                    ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
+                const int furthest_b =
+                    ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
+                dsp->ipred.cfl_ac[f->cur.p.p.layout - 1]
+                                 [cfl_uvtx](ac, y_src, f->cur.p.stride[0],
+                                            cbw4 - (furthest_r >> ss_hor),
+                                            cbh4 - (furthest_b >> ss_ver));
+                dsp->ipred.cfl_pred[cfl_uv_t_dim->lw](uv_dst[0],
+                                                      uv_dst[1], stride,
+                                                      ac, uv_pred,
+                                                      b->cfl_alpha,
+                                                      cbh4 * 4);
+                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
+                    hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
+                    hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
+                }
+            } else if (b->pal_sz[1]) {
+                ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
+                                           (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
+                const uint8_t *pal_idx;
+                if (f->frame_thread.pass) {
+                    pal_idx = ts->frame_thread.pal_idx;
+                    ts->frame_thread.pal_idx += cbw4 * cbh4 * 16;
+                } else {
+                    pal_idx = &t->scratch.pal_idx[bw4 * bh4 * 16];
+                }
+                const uint16_t *const pal_u = f->frame_thread.pass ?
+                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                                        ((t->bx >> 1) + (t->by & 1))][1] : t->pal[1];
+                f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[1]) + uv_dstoff,
+                                       f->cur.p.stride[1], pal_u,
+                                       pal_idx, cbw4 * 4, cbh4 * 4);
+                const uint16_t *const pal_v = f->frame_thread.pass ?
+                    f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
+                                        ((t->bx >> 1) + (t->by & 1))][2] : t->pal[2];
+                f->dsp->ipred.pal_pred(((pixel *) f->cur.p.data[2]) + uv_dstoff,
+                                       f->cur.p.stride[1], pal_v,
+                                       pal_idx, cbw4 * 4, cbh4 * 4);
+                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
+                    hex_dump(((pixel *) f->cur.p.data[1]) + uv_dstoff,
+                             PXSTRIDE(f->cur.p.stride[1]),
+                             cbw4 * 4, cbh4 * 4, "u-pal-pred");
+                    hex_dump(((pixel *) f->cur.p.data[2]) + uv_dstoff,
+                             PXSTRIDE(f->cur.p.stride[1]),
+                             cbw4 * 4, cbh4 * 4, "v-pal-pred");
+                }
+            }
+
+            const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
+                                 sm_uv_flag(&t->l, cby4);
+            const int uv_sb_has_tr =
+                ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
+                intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.p.layout - 1));
+            const int uv_sb_has_bl =
+                init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
+                intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.p.layout - 1));
+            const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
+            const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
+            for (int pl = 0; pl < 2; pl++) {
+                for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
+                     y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
+                {
+                    pixel *dst = ((pixel *) f->cur.p.data[1 + pl]) +
+                                   4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
+                                        ((t->bx + init_x) >> ss_hor));
+                    for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
+                         x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
+                    {
+                        if (b->uv_mode == CFL_PRED || b->pal_sz[1])
+                            goto skip_uv_pred;
+
+                        int angle = b->uv_angle;
+                        // this probably looks weird because we're using
+                        // luma flags in a chroma loop, but that's because
+                        // prepare_intra_edges() expects luma flags as input
+                        const enum EdgeFlags edge_flags =
+                            (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
+                              (x + uv_t_dim->w >= sub_cw4)) ?
+                                 0 : EDGE_I444_TOP_HAS_RIGHT) |
+                            ((x > (init_x >> ss_hor) ||
+                              (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
+                                 0 : EDGE_I444_LEFT_HAS_BOTTOM);
+                        const pixel *top_sb_edge = NULL;
+                        if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
+                            top_sb_edge = f->ipred_edge[1 + pl];
+                            const int sby = t->by >> f->sb_shift;
+                            top_sb_edge += f->sb128w * 128 * (sby - 1);
+                        }
+                        const enum IntraPredMode m =
+                            bytefn(prepare_intra_edges)(t->bx >> ss_hor,
+                                                        (t->bx >> ss_hor) >
+                                                            (ts->tiling.col_start >> ss_hor),
+                                                        t->by >> ss_ver,
+                                                        (t->by >> ss_ver) >
+                                                            (ts->tiling.row_start >> ss_ver),
+                                                        ts->tiling.col_end >> ss_hor,
+                                                        ts->tiling.row_end >> ss_ver,
+                                                        edge_flags, dst, stride,
+                                                        top_sb_edge, b->uv_mode,
+                                                        &angle, uv_t_dim->w,
+                                                        uv_t_dim->h, edge);
+                        dsp->ipred.intra_pred[b->uvtx][m](dst, stride,
+                                                          edge, angle | sm_uv_fl);
+                        if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
+                            hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
+                                     uv_t_dim->h * 4, 2, "l");
+                            hex_dump(edge, 0, 1, 1, "tl");
+                            hex_dump(edge + 1, uv_t_dim->w * 4,
+                                     uv_t_dim->w * 4, 2, "t");
+                            hex_dump(dst, stride, uv_t_dim->w * 4,
+                                     uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
+                        }
+
+                    skip_uv_pred: {}
+                        if (!b->skip) {
+                            enum TxfmType txtp;
+                            int eob;
+                            coef *cf;
+                            if (f->frame_thread.pass) {
+                                cf = ts->frame_thread.cf;
+                                ts->frame_thread.cf += uv_t_dim->w * uv_t_dim->h * 16;
+                                const struct CodedBlockInfo *const cbi =
+                                    &f->frame_thread.cbi[t->by * f->b4_stride + t->bx];
+                                eob = cbi->eob[pl + 1];
+                                txtp = cbi->txtp[pl + 1];
+                            } else {
+                                uint8_t cf_ctx;
+                                cf = t->cf;
+                                eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
+                                                   &t->l.ccoef[pl][cby4 + y],
+                                                   b->uvtx, bs, b, 1, 1 + pl, cf,
+                                                   &txtp, &cf_ctx);
+                                if (DEBUG_BLOCK_INFO)
+                                    printf("Post-uv-cf-blk[pl=%d,tx=%d,"
+                                           "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
+                                           pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
+                                memset(&t->a->ccoef[pl][cbx4 + x], cf_ctx,
+                                       imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor));
+                                memset(&t->l.ccoef[pl][cby4 + y], cf_ctx,
+                                       imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver));
+                            }
+                            if (eob >= 0) {
+                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                    coef_dump(cf, uv_t_dim->h * 4,
+                                              uv_t_dim->w * 4, 3, "dq");
+                                dsp->itx.itxfm_add[b->uvtx]
+                                                  [txtp](dst, stride,
+                                                         cf, eob);
+                                if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                    hex_dump(dst, stride, uv_t_dim->w * 4,
+                                             uv_t_dim->h * 4, "recon");
+                            }
+                        } else if (!f->frame_thread.pass) {
+                            memset(&t->a->ccoef[pl][cbx4 + x], 0x40, uv_t_dim->w);
+                            memset(&t->l.ccoef[pl][cby4 + y], 0x40, uv_t_dim->h);
+                        }
+                        dst += uv_t_dim->w * 4;
+                    }
+                    t->bx -= x << ss_hor;
+                }
+                t->by -= y << ss_ver;
+            }
+        }
+    }
+}
+
+void bytefn(recon_b_inter)(Dav1dTileContext *const t, const enum BlockSize bs,
+                           const Av1Block *const b)
+{
+    Dav1dTileState *const ts = t->ts;
+    const Dav1dFrameContext *const f = t->f;
+    const Dav1dDSPContext *const dsp = f->dsp;
+    const int bx4 = t->bx & 31, by4 = t->by & 31;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
+    const uint8_t *const b_dim = av1_block_dimensions[bs];
+    const int bw4 = b_dim[0], bh4 = b_dim[1];
+    const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
+    const int has_chroma = f->seq_hdr.layout != DAV1D_PIXEL_LAYOUT_I400 &&
+                           (bw4 > ss_hor || t->bx & 1) &&
+                           (bh4 > ss_ver || t->by & 1);
+    const int chr_layout_idx = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
+                               DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.p.layout;
+
+    // prediction
+    const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
+    pixel *dst = ((pixel *) f->cur.p.data[0]) +
+        4 * (t->by * PXSTRIDE(f->cur.p.stride[0]) + t->bx);
+    const ptrdiff_t uvdstoff =
+        4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.p.stride[1]));
+    if (!(f->frame_hdr.frame_type & 1)) {
+        // intrabc
+        mc(t, dst, NULL, f->cur.p.stride[0],
+           bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+        if (has_chroma) for (int pl = 1; pl < 3; pl++)
+            mc(t, ((pixel *) f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],
+               bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
+               t->bx & ~ss_hor, t->by & ~ss_ver,
+               pl, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+    } else if (b->comp_type == COMP_INTER_NONE) {
+        const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
+        const enum Filter2d filter_2d = b->filter2d;
+
+        if (imin(bw4, bh4) > 1 &&
+            ((b->inter_mode == GLOBALMV &&
+              f->frame_hdr.gmv[b->ref[0]].type > WM_TYPE_TRANSLATION) ||
+             (b->motion_mode == MM_WARP &&
+              t->warpmv.type > WM_TYPE_TRANSLATION)))
+        {
+            warp_affine(t, dst, NULL, f->cur.p.stride[0], b_dim, 0, refp,
+                        b->motion_mode == MM_WARP ? &t->warpmv :
+                            &f->frame_hdr.gmv[b->ref[0]]);
+        } else {
+            mc(t, dst, NULL, f->cur.p.stride[0],
+               bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, filter_2d);
+            if (b->motion_mode == MM_OBMC)
+                obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);
+        }
+        if (b->interintra_type) {
+            const enum RectTxfmSize ii_tx = av1_max_txfm_size_for_bs[bs][0];
+            pixel tl_edge_px[65], *const tl_edge = &tl_edge_px[32];
+            enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
+                                   SMOOTH_PRED : b->interintra_mode;
+            pixel *const tmp = t->scratch.interintra;
+            int angle = 0;
+            const pixel *top_sb_edge = NULL;
+            if (!(t->by & (f->sb_step - 1))) {
+                top_sb_edge = f->ipred_edge[0];
+                const int sby = t->by >> f->sb_shift;
+                top_sb_edge += f->sb128w * 128 * (sby - 1);
+            }
+            m = bytefn(prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
+                                            t->by, t->by > ts->tiling.row_start,
+                                            ts->tiling.col_end, ts->tiling.row_end,
+                                            0, dst, f->cur.p.stride[0], top_sb_edge,
+                                            m, &angle, bw4, bh4, tl_edge);
+            dsp->ipred.intra_pred[ii_tx][m](tmp, 4 * bw4 * sizeof(pixel), tl_edge, 0);
+            const uint8_t *const ii_mask =
+                b->interintra_type == INTER_INTRA_BLEND ?
+                     ii_masks[bs][0][b->interintra_mode] :
+                     wedge_masks[bs][0][0][b->wedge_idx];
+            dsp->mc.blend(dst, f->cur.p.stride[0], tmp, bw4 * 4 * sizeof(pixel),
+                          bw4 * 4, bh4 * 4, ii_mask, bw4 * 4);
+        }
+
+        if (!has_chroma) goto skip_inter_chroma_pred;
+
+        // sub8x8 derivation
+        int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
+        refmvs *r;
+        if (is_sub8x8) {
+            assert(ss_hor == 1);
+            r = &f->mvs[t->by * f->b4_stride + t->bx];
+            if (bw4 == 1) is_sub8x8 &= r[-1].ref[0] > 0;
+            if (bh4 == ss_ver) is_sub8x8 &= r[-f->b4_stride].ref[0] > 0;
+            if (bw4 == 1 && bh4 == ss_ver)
+                is_sub8x8 &= r[-(1 + f->b4_stride)].ref[0] > 0;
+        }
+
+        // chroma prediction
+        if (is_sub8x8) {
+            assert(ss_hor == 1);
+            int h_off = 0, v_off = 0;
+            if (bw4 == 1 && bh4 == ss_ver) {
+                for (int pl = 0; pl < 2; pl++)
+                    mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
+                       NULL, f->cur.p.stride[1],
+                       bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
+                       r[-(f->b4_stride + 1)].mv[0],
+                       &f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],
+                       f->frame_thread.pass != 2 ? t->tl_4x4_filter :
+                           f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
+                v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
+                h_off = 2;
+            }
+            if (bw4 == 1) {
+                const enum Filter2d left_filter_2d =
+                    av1_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
+                for (int pl = 0; pl < 2; pl++)
+                    mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,
+                       f->cur.p.stride[1], bw4, bh4, t->bx - 1,
+                       t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],
+                       f->frame_thread.pass != 2 ? left_filter_2d :
+                           f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
+                h_off = 2;
+            }
+            if (bh4 == ss_ver) {
+                const enum Filter2d top_filter_2d =
+                    av1_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
+                for (int pl = 0; pl < 2; pl++)
+                    mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off, NULL,
+                       f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,
+                       1 + pl, r[-f->b4_stride].mv[0],
+                       &f->refp[r[-f->b4_stride].ref[0] - 1],
+                       f->frame_thread.pass != 2 ? top_filter_2d :
+                           f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
+                v_off = 2 * PXSTRIDE(f->cur.p.stride[1]);
+            }
+            for (int pl = 0; pl < 2; pl++)
+                mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],
+                   bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0], refp, filter_2d);
+        } else {
+            if (imin(cbw4, cbh4) > 1 &&
+                ((b->inter_mode == GLOBALMV &&
+                  f->frame_hdr.gmv[b->ref[0]].type > WM_TYPE_TRANSLATION) ||
+                 (b->motion_mode == MM_WARP &&
+                  t->warpmv.type > WM_TYPE_TRANSLATION)))
+            {
+                for (int pl = 0; pl < 2; pl++)
+                    warp_affine(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff, NULL,
+                                f->cur.p.stride[1], b_dim, 1 + pl, refp,
+                                b->motion_mode == MM_WARP ? &t->warpmv :
+                                    &f->frame_hdr.gmv[b->ref[0]]);
+            } else {
+                for (int pl = 0; pl < 2; pl++) {
+                    mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
+                       NULL, f->cur.p.stride[1],
+                       bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
+                       t->bx & ~ss_hor, t->by & ~ss_ver,
+                       1 + pl, b->mv[0], refp, filter_2d);
+                    if (b->motion_mode == MM_OBMC)
+                        obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
+                             f->cur.p.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
+                }
+            }
+            if (b->interintra_type) {
+                // FIXME for 8x32 with 4:2:2 subsampling, this probably does
+                // the wrong thing since it will select 4x16, not 4x32, as a
+                // transform size...
+                const enum RectTxfmSize ii_tx =
+                    av1_max_txfm_size_for_bs[bs][f->cur.p.p.layout];
+                const uint8_t *const ii_mask =
+                    b->interintra_type == INTER_INTRA_BLEND ?
+                         ii_masks[bs][chr_layout_idx][b->interintra_mode] :
+                         wedge_masks[bs][chr_layout_idx][0][b->wedge_idx];
+
+                for (int pl = 0; pl < 2; pl++) {
+                    pixel *const tmp = t->scratch.interintra;
+                    pixel tl_edge_px[65], *const tl_edge = &tl_edge_px[32];
+                    enum IntraPredMode m =
+                        b->interintra_mode == II_SMOOTH_PRED ?
+                        SMOOTH_PRED : b->interintra_mode;
+                    int angle = 0;
+                    pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
+                    const pixel *top_sb_edge = NULL;
+                    if (!(t->by & (f->sb_step - 1))) {
+                        top_sb_edge = f->ipred_edge[pl + 1];
+                        const int sby = t->by >> f->sb_shift;
+                        top_sb_edge += f->sb128w * 128 * (sby - 1);
+                    }
+                    m = bytefn(prepare_intra_edges)(t->bx >> ss_hor,
+                                                    (t->bx >> ss_hor) >
+                                                        (ts->tiling.col_start >> ss_hor),
+                                                    t->by >> ss_ver,
+                                                    (t->by >> ss_ver) >
+                                                        (ts->tiling.row_start >> ss_ver),
+                                                    ts->tiling.col_end >> ss_hor,
+                                                    ts->tiling.row_end >> ss_ver,
+                                                    0, uvdst, f->cur.p.stride[1],
+                                                    top_sb_edge, m,
+                                                    &angle, cbw4, cbh4, tl_edge);
+                    dsp->ipred.intra_pred[ii_tx][m](tmp, cbw4 * 4, tl_edge, 0);
+                    dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp, cbw4 * 4,
+                                  cbw4 * 4, cbh4 * 4, ii_mask, cbw4 * 4);
+                }
+            }
+        }
+
+    skip_inter_chroma_pred: {}
+        t->tl_4x4_filter = filter_2d;
+    } else {
+        const enum Filter2d filter_2d = b->filter2d;
+        coef (*tmp)[bw4 * bh4 * 16] = (coef (*)[bw4 * bh4 * 16]) t->scratch.compinter;
+        int jnt_weight;
+        uint8_t *const seg_mask = t->scratch_seg_mask;
+        const uint8_t *mask;
+
+        for (int i = 0; i < 2; i++) {
+            const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
+
+            if (b->inter_mode == GLOBALMV_GLOBALMV &&
+                f->frame_hdr.gmv[b->ref[i]].type > WM_TYPE_TRANSLATION)
+            {
+                warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
+                            &f->frame_hdr.gmv[b->ref[i]]);
+            } else {
+                mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
+                   b->mv[i], refp, filter_2d);
+            }
+        }
+        switch (b->comp_type) {
+        case COMP_INTER_AVG:
+            dsp->mc.avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
+                        bw4 * 4, bh4 * 4);
+            break;
+        case COMP_INTER_WEIGHTED_AVG:
+            jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
+            dsp->mc.w_avg(dst, f->cur.p.stride[0], tmp[0], tmp[1],
+                          bw4 * 4, bh4 * 4, jnt_weight);
+            break;
+        case COMP_INTER_SEG:
+            dsp->mc.w_mask[chr_layout_idx](dst, f->cur.p.stride[0],
+                                           tmp[b->mask_sign], tmp[!b->mask_sign],
+                                           bw4 * 4, bh4 * 4, seg_mask, b->mask_sign);
+            mask = seg_mask;
+            break;
+        case COMP_INTER_WEDGE:
+            mask = wedge_masks[bs][0][0][b->wedge_idx];
+            dsp->mc.mask(dst, f->cur.p.stride[0],
+                         tmp[b->mask_sign], tmp[!b->mask_sign],
+                         bw4 * 4, bh4 * 4, mask);
+            if (has_chroma)
+                mask = wedge_masks[bs][chr_layout_idx][b->mask_sign][b->wedge_idx];
+            break;
+        }
+
+        // chroma
+        if (has_chroma) for (int pl = 0; pl < 2; pl++) {
+            for (int i = 0; i < 2; i++) {
+                const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
+                if (b->inter_mode == GLOBALMV_GLOBALMV &&
+                    imin(cbw4, cbh4) > 1 &&
+                    f->frame_hdr.gmv[b->ref[i]].type > WM_TYPE_TRANSLATION)
+                {
+                    warp_affine(t, NULL, tmp[i], bw4 * 2, b_dim, 1 + pl,
+                                refp, &f->frame_hdr.gmv[b->ref[i]]);
+                } else {
+                    mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
+                       1 + pl, b->mv[i], refp, filter_2d);
+                }
+            }
+            pixel *const uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff;
+            switch (b->comp_type) {
+            case COMP_INTER_AVG:
+                dsp->mc.avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
+                            bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver);
+                break;
+            case COMP_INTER_WEIGHTED_AVG:
+                dsp->mc.w_avg(uvdst, f->cur.p.stride[1], tmp[0], tmp[1],
+                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight);
+                break;
+            case COMP_INTER_WEDGE:
+            case COMP_INTER_SEG:
+                dsp->mc.mask(uvdst, f->cur.p.stride[1],
+                             tmp[b->mask_sign], tmp[!b->mask_sign],
+                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask);
+                break;
+            }
+        }
+    }
+
+    if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
+        hex_dump(dst, f->cur.p.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
+        if (has_chroma) {
+            hex_dump(&((pixel *) f->cur.p.data[1])[uvdstoff], f->cur.p.stride[1],
+                     cbw4 * 4, cbh4 * 4, "u-pred");
+            hex_dump(&((pixel *) f->cur.p.data[2])[uvdstoff], f->cur.p.stride[1],
+                     cbw4 * 4, cbh4 * 4, "v-pred");
+        }
+    }
+
+    const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
+
+    if (b->skip) {
+        // reset coef contexts
+        memset(&t->a->lcoef[bx4], 0x40, w4);
+        memset(&t->l.lcoef[by4], 0x40, h4);
+        if (has_chroma) {
+            memset(&t->a->ccoef[0][cbx4], 0x40, cw4);
+            memset(&t->l.ccoef[0][cby4], 0x40, ch4);
+            memset(&t->a->ccoef[1][cbx4], 0x40, cw4);
+            memset(&t->l.ccoef[1][cby4], 0x40, ch4);
+        }
+        return;
+    }
+
+    const TxfmInfo *const uvtx = &av1_txfm_dimensions[b->uvtx];
+    const TxfmInfo *const ytx = &av1_txfm_dimensions[b->max_ytx];
+
+    for (int init_y = 0; init_y < bh4; init_y += 16) {
+        for (int init_x = 0; init_x < bw4; init_x += 16) {
+            // coefficient coding & inverse transforms
+            int y_off = !!init_y, y;
+            dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * init_y;
+            for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
+                 y += ytx->h, y_off++)
+            {
+                int x, x_off = !!init_x;
+                for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
+                     x += ytx->w, x_off++)
+                {
+                    read_coef_tree(t, bs, b, b->max_ytx, 0, b->tx_split,
+                                   x_off, y_off, &dst[x * 4]);
+                    t->bx += ytx->w;
+                }
+                dst += PXSTRIDE(f->cur.p.stride[0]) * 4 * ytx->h;
+                t->bx -= x;
+                t->by += ytx->h;
+            }
+            dst -= PXSTRIDE(f->cur.p.stride[0]) * 4 * y;
+            t->by -= y;
+
+            // chroma coefs and inverse transform
+            if (has_chroma) for (int pl = 0; pl < 2; pl++) {
+                pixel *uvdst = ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff +
+                    (PXSTRIDE(f->cur.p.stride[1]) * init_y * 4 >> ss_ver);
+                for (y = init_y >> ss_ver, t->by += init_y;
+                     y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
+                {
+                    int x;
+                    for (x = init_x >> ss_hor, t->bx += init_x;
+                         x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
+                    {
+                        coef *cf;
+                        int eob;
+                        enum TxfmType txtp;
+                        if (f->frame_thread.pass) {
+                            cf = ts->frame_thread.cf;
+                            ts->frame_thread.cf += uvtx->w * uvtx->h * 16;
+                            const struct CodedBlockInfo *const cbi =
+                                &f->frame_thread.cbi[t->by * f->b4_stride + t->bx];
+                            eob = cbi->eob[1 + pl];
+                            txtp = cbi->txtp[1 + pl];
+                        } else {
+                            uint8_t cf_ctx;
+                            cf = t->cf;
+                            txtp = t->txtp_map[(by4 + (y << ss_ver)) * 32 +
+                                                bx4 + (x << ss_hor)];
+                            eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
+                                               &t->l.ccoef[pl][cby4 + y],
+                                               b->uvtx, bs, b, 0, 1 + pl,
+                                               cf, &txtp, &cf_ctx);
+                            if (DEBUG_BLOCK_INFO)
+                                printf("Post-uv-cf-blk[pl=%d,tx=%d,"
+                                       "txtp=%d,eob=%d]: r=%d\n",
+                                       pl, b->uvtx, txtp, eob, ts->msac.rng);
+                            memset(&t->a->ccoef[pl][cbx4 + x], cf_ctx,
+                                   imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor));
+                            memset(&t->l.ccoef[pl][cby4 + y], cf_ctx,
+                                   imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver));
+                        }
+                        if (eob >= 0) {
+                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
+                            dsp->itx.itxfm_add[b->uvtx]
+                                              [txtp](&uvdst[4 * x],
+                                                     f->cur.p.stride[1],
+                                                     cf, eob);
+                            if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
+                                hex_dump(&uvdst[4 * x], f->cur.p.stride[1],
+                                         uvtx->w * 4, uvtx->h * 4, "recon");
+                        }
+                        t->bx += uvtx->w << ss_hor;
+                    }
+                    uvdst += PXSTRIDE(f->cur.p.stride[1]) * 4 * uvtx->h;
+                    t->bx -= x << ss_hor;
+                    t->by += uvtx->h << ss_ver;
+                }
+                t->by -= y << ss_ver;
+            }
+        }
+    }
+}
+
+void bytefn(filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int sbsz = f->sb_step, sbh = f->sbh;
+
+    if (f->frame_hdr.loopfilter.level_y[0] ||
+        f->frame_hdr.loopfilter.level_y[1])
+    {
+        int start_of_tile_row = 0;
+        if (f->frame_hdr.tiling.row_start_sb[f->lf.tile_row] == sby)
+            start_of_tile_row = f->lf.tile_row++;
+        bytefn(dav1d_loopfilter_sbrow)(f, f->lf.p, f->lf.mask_ptr, sby,
+                                       start_of_tile_row);
+    }
+
+    if (f->seq_hdr.restoration) {
+        // Store loop filtered pixels required by loop restoration
+        bytefn(dav1d_lr_copy_lpf)(f, f->lf.p, sby);
+    }
+    if (f->seq_hdr.cdef) {
+        if (sby) {
+            pixel *p_up[3] = {
+                f->lf.p[0] - 8 * PXSTRIDE(f->cur.p.stride[0]),
+                f->lf.p[1] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
+                f->lf.p[2] - (8 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver),
+            };
+            bytefn(dav1d_cdef_brow)(f, p_up, f->lf.prev_mask_ptr,
+                                    sby * sbsz - 2, sby * sbsz);
+        }
+        const int n_blks = sbsz - 2 * (sby + 1 < sbh);
+        bytefn(dav1d_cdef_brow)(f, f->lf.p, f->lf.mask_ptr, sby * sbsz,
+                                imin(sby * sbsz + n_blks, f->bh));
+    }
+    if (f->seq_hdr.restoration) {
+        bytefn(dav1d_lr_sbrow)(f, f->lf.p, sby);
+    }
+
+    f->lf.p[0] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[0]);
+    f->lf.p[1] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+    f->lf.p[2] += sbsz * 4 * PXSTRIDE(f->cur.p.stride[1]) >> ss_ver;
+    f->lf.prev_mask_ptr = f->lf.mask_ptr;
+    if ((sby & 1) || f->seq_hdr.sb128) {
+        f->lf.mask_ptr += f->sb128w;
+    }
+}
+
+void bytefn(backup_ipred_edge)(Dav1dTileContext *const t) {
+    const Dav1dFrameContext *const f = t->f;
+    Dav1dTileState *const ts = t->ts;
+    const int ss_ver = f->cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = f->cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    const int sby = t->by >> f->sb_shift;
+    const int sby_off = f->sb128w * 128 * sby;
+    const int x_off = ts->tiling.col_start;
+
+    const pixel *const y =
+        ((const pixel *) f->cur.p.data[0]) + x_off * 4 +
+                    ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.p.stride[0]);
+    pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
+               4 * (ts->tiling.col_end - x_off));
+    const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
+        (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.p.stride[1]);
+    for (int pl = 1; pl <= 2; pl++)
+        pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
+                   &((const pixel *) f->cur.p.data[pl])[uv_off],
+                   4 * (ts->tiling.col_end - x_off) >> ss_hor);
+}
--- /dev/null
+++ b/src/recon.h
@@ -1,0 +1,75 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_RECON_H__
+#define __DAV1D_SRC_RECON_H__
+
+#include "src/internal.h"
+#include "src/levels.h"
+
+#define DEBUG_BLOCK_INFO 0 && \
+        f->frame_hdr.frame_offset == 2 && t->by >= 0 && t->by < 4 && \
+        t->bx >= 8 && t->bx < 12
+#define DEBUG_B_PIXELS 0
+
+#define decl_recon_b_intra_fn(name) \
+void (name)(Dav1dTileContext *t, enum BlockSize bs, \
+            enum EdgeFlags intra_edge_flags, const Av1Block *b)
+typedef decl_recon_b_intra_fn(*recon_b_intra_fn);
+
+#define decl_recon_b_inter_fn(name) \
+void (name)(Dav1dTileContext *t, enum BlockSize bs, const Av1Block *b)
+typedef decl_recon_b_inter_fn(*recon_b_inter_fn);
+
+#define decl_filter_sbrow_fn(name) \
+void (name)(Dav1dFrameContext *f, int sby)
+typedef decl_filter_sbrow_fn(*filter_sbrow_fn);
+
+#define decl_backup_ipred_edge_fn(name) \
+void (name)(Dav1dTileContext *t)
+typedef decl_backup_ipred_edge_fn(*backup_ipred_edge_fn);
+
+#define decl_read_coef_blocks_fn(name) \
+void (name)(Dav1dTileContext *t, enum BlockSize bs, const Av1Block *b)
+typedef decl_read_coef_blocks_fn(*read_coef_blocks_fn);
+
+decl_recon_b_intra_fn(recon_b_intra_8bpc);
+decl_recon_b_intra_fn(recon_b_intra_16bpc);
+
+decl_recon_b_inter_fn(recon_b_inter_8bpc);
+decl_recon_b_inter_fn(recon_b_inter_16bpc);
+
+decl_filter_sbrow_fn(filter_sbrow_8bpc);
+decl_filter_sbrow_fn(filter_sbrow_16bpc);
+
+decl_backup_ipred_edge_fn(backup_ipred_edge_8bpc);
+decl_backup_ipred_edge_fn(backup_ipred_edge_16bpc);
+
+decl_read_coef_blocks_fn(read_coef_blocks_8bpc);
+decl_read_coef_blocks_fn(read_coef_blocks_16bpc);
+
+#endif /* __DAV1D_SRC_RECON_H__ */
--- /dev/null
+++ b/src/ref.c
@@ -1,0 +1,60 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "common/mem.h"
+
+#include "src/ref.h"
+
+Dav1dRef *dav1d_ref_create(const size_t size) {
+    Dav1dRef *res = malloc(sizeof(Dav1dRef));
+    void *data = dav1d_alloc_aligned(size, 32);
+
+    if (!res || !data) {
+        if (res) free(res);
+        if (data) free(data);
+        return NULL;
+    }
+
+    res->size = size;
+    atomic_init(&res->ref_cnt, 1);
+    res->data = data;
+
+    return res;
+}
+
+void dav1d_ref_inc(Dav1dRef *const ref) {
+    atomic_fetch_add(&ref->ref_cnt, 1);
+}
+
+void dav1d_ref_dec(Dav1dRef *const ref) {
+    if (atomic_fetch_sub(&ref->ref_cnt, 1) == 1) {
+        dav1d_free_aligned(ref->data);
+        free(ref);
+    }
+}
--- /dev/null
+++ b/src/ref.h
@@ -1,0 +1,46 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_REF_H__
+#define __DAV1D_SRC_REF_H__
+
+#include "dav1d/dav1d.h"
+
+#include <stdatomic.h>
+#include <stddef.h>
+
+struct Dav1dRef {
+    void *data;
+    size_t size;
+    atomic_int ref_cnt;
+};
+
+Dav1dRef *dav1d_ref_create(size_t size);
+void dav1d_ref_inc(Dav1dRef *ref);
+void dav1d_ref_dec(Dav1dRef *ref);
+
+#endif /* __DAV1D_SRC_REF_H__ */
--- /dev/null
+++ b/src/ref_mvs.c
@@ -1,0 +1,3550 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+/*
+ * Changes made compared to libaom version:
+ * - we disable TMV and enable MV_COMPRESS so that the
+ *   input array for prev_frames can be at 4x4 instead of
+ *   8x8 resolution, and therefore shared between cur_frame
+ *   and prev_frame. To make enc/dec behave consistent, we
+ *   also make this change around line 2580:
+#if 0
+                AOMMIN(((mi_row >> 1) << 1) + 1 + (((xd->n8_h - 1) >> 1) << 1),
+                       mi_row_end - 1) *
+                    prev_frame_mvs_stride +
+                AOMMIN(((mi_col >> 1) << 1) + 1 + (((xd->n8_w - 1) >> 1) << 1),
+                       mi_col_end - 1)
+#else
+                (((mi_row >> 1) << 1) + 1) * prev_frame_mvs_stride +
+                (((mi_col >> 1) << 1) + 1)
+#endif
+ *   and the same change (swap mi_cols from prev_frame.mv_stride) on line 2407
+ * - we disable rect-block overhanging edge inclusion (see
+ *   line 2642):
+  if (num_8x8_blocks_wide == num_8x8_blocks_high || 1) {
+    mv_ref_search[5].row = -1;
+    mv_ref_search[5].col = 0;
+    mv_ref_search[6].row = 0;
+    mv_ref_search[6].col = -1;
+  } else {
+    mv_ref_search[5].row = -1;
+    mv_ref_search[5].col = num_8x8_blocks_wide;
+    mv_ref_search[6].row = num_8x8_blocks_high;
+    mv_ref_search[6].col = -1;
+  }
+ *   Note that this is a bitstream change and needs the same
+ *   change on the decoder side also.
+ * - we change xd->mi to be a pointer instead of a double ptr.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#define av1_zero(a) memset(a, 0, sizeof(a))
+
+#define ATTRIBUTE_PACKED
+#define INLINE inline
+#define IMPLIES(a, b) (!(a) || (b))  //  Logical 'a implies b' (or 'a -> b')
+
+#define ROUND_POWER_OF_TWO(value, n) (((value) + (((1 << (n)) >> 1))) >> (n))
+#define ROUND_POWER_OF_TWO_SIGNED(value, n)           \
+  (((value) < 0) ? -ROUND_POWER_OF_TWO(-(value), (n)) \
+                 : ROUND_POWER_OF_TWO((value), (n)))
+#define NELEMENTS(x) (int)(sizeof(x) / sizeof(x[0]))
+
+#define MAX_MV_REF_CANDIDATES 2
+
+#define MAX_REF_MV_STACK_SIZE 8
+#define REF_CAT_LEVEL 640
+
+#define FRAME_OFFSET_BITS 5
+#define MAX_FRAME_DISTANCE ((1 << FRAME_OFFSET_BITS) - 1)
+#define INVALID_MV 0x80008000
+
+#define COMP_NEWMV_CTXS 5
+#define REFMV_OFFSET 4
+#define REFMV_CTX_MASK ((1 << (8 - REFMV_OFFSET)) - 1)
+
+#define MV_IN_USE_BITS 14
+#define MV_UPP (1 << MV_IN_USE_BITS)
+#define MV_LOW (-(1 << MV_IN_USE_BITS))
+
+typedef struct MV {
+    int16_t row;
+    int16_t col;
+} MV;
+typedef union int_mv {
+    uint32_t as_int;
+    MV as_mv;
+} int_mv;
+typedef int8_t MV_REFERENCE_FRAME;
+#define MFMV_STACK_SIZE 3
+typedef struct {
+  int_mv mfmv0;
+  uint8_t ref_frame_offset;
+} TPL_MV_REF;
+typedef struct {
+    int_mv mv[2];
+    MV_REFERENCE_FRAME ref_frame[2];
+    int8_t mode, sb_type;
+} MV_REF;
+#define MB_MODE_INFO MV_REF
+
+#define AOMMAX(a,b) ((a)>(b)?(a):(b))
+#define AOMMIN(a,b) ((a)<(b)?(a):(b))
+
+typedef struct candidate_mv {
+    int_mv this_mv;
+    int_mv comp_mv;
+    int weight;
+} CANDIDATE_MV;
+#define NONE_FRAME -1
+#define INTRA_FRAME 0
+#define LAST_FRAME 1
+
+#define LAST2_FRAME 2
+#define LAST3_FRAME 3
+#define GOLDEN_FRAME 4
+#define BWDREF_FRAME 5
+#define ALTREF2_FRAME 6
+#define ALTREF_FRAME 7
+#define LAST_REF_FRAMES (LAST3_FRAME - LAST_FRAME + 1)
+
+#define INTER_REFS_PER_FRAME (ALTREF_FRAME - LAST_FRAME + 1)
+#define TOTAL_REFS_PER_FRAME (ALTREF_FRAME - INTRA_FRAME + 1)
+
+#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
+#define FWD_RF_OFFSET(ref) (ref - LAST_FRAME)
+#define BWD_REFS (ALTREF_FRAME - BWDREF_FRAME + 1)
+#define BWD_RF_OFFSET(ref) (ref - BWDREF_FRAME)
+#define FWD_REFS (GOLDEN_FRAME - LAST_FRAME + 1)
+#define SINGLE_REFS (FWD_REFS + BWD_REFS)
+typedef enum ATTRIBUTE_PACKED {
+  LAST_LAST2_FRAMES,      // { LAST_FRAME, LAST2_FRAME }
+  LAST_LAST3_FRAMES,      // { LAST_FRAME, LAST3_FRAME }
+  LAST_GOLDEN_FRAMES,     // { LAST_FRAME, GOLDEN_FRAME }
+  BWDREF_ALTREF_FRAMES,   // { BWDREF_FRAME, ALTREF_FRAME }
+  LAST2_LAST3_FRAMES,     // { LAST2_FRAME, LAST3_FRAME }
+  LAST2_GOLDEN_FRAMES,    // { LAST2_FRAME, GOLDEN_FRAME }
+  LAST3_GOLDEN_FRAMES,    // { LAST3_FRAME, GOLDEN_FRAME }
+  BWDREF_ALTREF2_FRAMES,  // { BWDREF_FRAME, ALTREF2_FRAME }
+  ALTREF2_ALTREF_FRAMES,  // { ALTREF2_FRAME, ALTREF_FRAME }
+  TOTAL_UNIDIR_COMP_REFS,
+  // NOTE: UNIDIR_COMP_REFS is the number of uni-directional reference pairs
+  //       that are explicitly signaled.
+  UNIDIR_COMP_REFS = BWDREF_ALTREF_FRAMES + 1,
+} UNIDIR_COMP_REF;
+#define TOTAL_COMP_REFS (FWD_REFS * BWD_REFS + TOTAL_UNIDIR_COMP_REFS)
+#define MODE_CTX_REF_FRAMES (TOTAL_REFS_PER_FRAME + TOTAL_COMP_REFS)
+
+#define GLOBALMV_OFFSET 3
+#define NEWMV_CTX_MASK ((1 << GLOBALMV_OFFSET) - 1)
+#define GLOBALMV_CTX_MASK ((1 << (REFMV_OFFSET - GLOBALMV_OFFSET)) - 1)
+#define MI_SIZE_LOG2 2
+#define MI_SIZE (1 << MI_SIZE_LOG2)
+#define MAX_SB_SIZE_LOG2 7
+#define MAX_MIB_SIZE_LOG2 (MAX_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+#define MIN_MIB_SIZE_LOG2 (MIN_SB_SIZE_LOG2 - MI_SIZE_LOG2)
+#define MAX_MIB_SIZE (1 << MAX_MIB_SIZE_LOG2)
+#define MI_SIZE_64X64 (64 >> MI_SIZE_LOG2)
+#define MI_SIZE_128X128 (128 >> MI_SIZE_LOG2)
+#define REFMV_OFFSET 4
+
+typedef enum ATTRIBUTE_PACKED {
+  BLOCK_4X4,
+  BLOCK_4X8,
+  BLOCK_8X4,
+  BLOCK_8X8,
+  BLOCK_8X16,
+  BLOCK_16X8,
+  BLOCK_16X16,
+  BLOCK_16X32,
+  BLOCK_32X16,
+  BLOCK_32X32,
+  BLOCK_32X64,
+  BLOCK_64X32,
+  BLOCK_64X64,
+  BLOCK_64X128,
+  BLOCK_128X64,
+  BLOCK_128X128,
+  BLOCK_4X16,
+  BLOCK_16X4,
+  BLOCK_8X32,
+  BLOCK_32X8,
+  BLOCK_16X64,
+  BLOCK_64X16,
+  BLOCK_32X128,
+  BLOCK_128X32,
+  BLOCK_SIZES_ALL,
+  BLOCK_SIZES = BLOCK_4X16,
+  BLOCK_INVALID = 255,
+  BLOCK_LARGEST = (BLOCK_SIZES - 1)
+} BLOCK_SIZE;
+
+typedef enum ATTRIBUTE_PACKED {
+  PARTITION_NONE,
+  PARTITION_HORZ,
+  PARTITION_VERT,
+  PARTITION_SPLIT,
+  PARTITION_HORZ_A,  // HORZ split and the top partition is split again
+  PARTITION_HORZ_B,  // HORZ split and the bottom partition is split again
+  PARTITION_VERT_A,  // VERT split and the left partition is split again
+  PARTITION_VERT_B,  // VERT split and the right partition is split again
+  PARTITION_HORZ_4,  // 4:1 horizontal partition
+  PARTITION_VERT_4,  // 4:1 vertical partition
+  EXT_PARTITION_TYPES,
+  PARTITION_TYPES = PARTITION_SPLIT + 1,
+  PARTITION_INVALID = 255
+} PARTITION_TYPE;
+typedef struct CUR_MODE_INFO {
+  PARTITION_TYPE partition;
+} CUR_MODE_INFO ;
+
+typedef enum ATTRIBUTE_PACKED {
+  DC_PRED,        // Average of above and left pixels
+  V_PRED,         // Vertical
+  H_PRED,         // Horizontal
+  D45_PRED,       // Directional 45  deg = round(arctan(1/1) * 180/pi)
+  D135_PRED,      // Directional 135 deg = 180 - 45
+  D117_PRED,      // Directional 117 deg = 180 - 63
+  D153_PRED,      // Directional 153 deg = 180 - 27
+  D207_PRED,      // Directional 207 deg = 180 + 27
+  D63_PRED,       // Directional 63  deg = round(arctan(2/1) * 180/pi)
+  SMOOTH_PRED,    // Combination of horizontal and vertical interpolation
+  SMOOTH_V_PRED,  // Vertical interpolation
+  SMOOTH_H_PRED,  // Horizontal interpolation
+  PAETH_PRED,     // Predict from the direction of smallest gradient
+  NEARESTMV,
+  NEARMV,
+  GLOBALMV,
+  NEWMV,
+  // Compound ref compound modes
+  NEAREST_NEARESTMV,
+  NEAR_NEARMV,
+  NEAREST_NEWMV,
+  NEW_NEARESTMV,
+  NEAR_NEWMV,
+  NEW_NEARMV,
+  GLOBAL_GLOBALMV,
+  NEW_NEWMV,
+  MB_MODE_COUNT,
+  INTRA_MODES = PAETH_PRED + 1,  // PAETH_PRED has to be the last intra mode.
+  INTRA_INVALID = MB_MODE_COUNT  // For uv_mode in inter blocks
+} PREDICTION_MODE;
+typedef enum {
+  IDENTITY = 0,      // identity transformation, 0-parameter
+  TRANSLATION = 1,   // translational motion 2-parameter
+  ROTZOOM = 2,       // simplified affine with rotation + zoom only, 4-parameter
+  AFFINE = 3,        // affine, 6-parameter
+  TRANS_TYPES,
+} TransformationType;
+#if 0
+typedef enum {
+  KEY_FRAME = 0,
+  INTER_FRAME = 1,
+#if CONFIG_OBU
+  INTRA_ONLY_FRAME = 2,  // replaces intra-only
+  S_FRAME = 3,
+#endif
+  FRAME_TYPES,
+} FRAME_TYPE;
+#endif
+
+#define LEAST_SQUARES_SAMPLES_MAX_BITS 3
+#define LEAST_SQUARES_SAMPLES_MAX (1 << LEAST_SQUARES_SAMPLES_MAX_BITS)
+#define SAMPLES_ARRAY_SIZE (LEAST_SQUARES_SAMPLES_MAX * 2)
+
+static const uint8_t mi_size_wide[BLOCK_SIZES_ALL] = {
+  1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16,
+  16, 32, 32,  1, 4, 2, 8, 4, 16, 8, 32
+};
+static const uint8_t mi_size_high[BLOCK_SIZES_ALL] = {
+  1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16,
+  32, 16, 32,  4, 1, 8, 2, 16, 4, 32, 8
+};
+
+static const uint8_t block_size_wide[BLOCK_SIZES_ALL] = {
+  4,  4,
+  8,  8,
+  8,  16,
+  16, 16,
+  32, 32,
+  32, 64,
+  64, 64, 128, 128, 4,
+  16, 8,
+  32, 16,
+  64, 32, 128
+};
+
+static const uint8_t block_size_high[BLOCK_SIZES_ALL] = {
+  4,  8,
+  4,  8,
+  16, 8,
+  16, 32,
+  16, 32,
+  64, 32,
+  64, 128, 64, 128, 16,
+  4,  32,
+  8,  64,
+  16, 128, 32
+};
+
+static const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES_ALL] = {
+  1, 1,
+  1, 1,
+  1, 2,
+  2, 2,
+  4, 4,
+  4, 8,
+  8, 8, 16, 16, 1,
+  2, 1,
+  4, 2,
+  8, 4, 16
+};
+static const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES_ALL] = {
+  1, 1,
+  1, 1,
+  2, 1,
+  2, 4,
+  2, 4,
+  8, 4,
+  8, 16, 8, 16, 2,
+  1, 4,
+  1, 8,
+  2, 16, 4
+};
+
+static INLINE int is_global_mv_block(const MB_MODE_INFO *const mbmi,
+                                     TransformationType type) {
+  const PREDICTION_MODE mode = mbmi->mode;
+  const BLOCK_SIZE bsize = mbmi->sb_type;
+  const int block_size_allowed =
+      AOMMIN(block_size_wide[bsize], block_size_high[bsize]) >= 8;
+  return (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) && type > TRANSLATION &&
+         block_size_allowed;
+}
+
+typedef struct {
+  TransformationType wmtype;
+  int32_t wmmat[6];
+  int16_t alpha, beta, gamma, delta;
+} WarpedMotionParams;
+
+#define WARPEDMODEL_PREC_BITS 16
+static const WarpedMotionParams default_warp_params = {
+  IDENTITY,
+  { 0, 0, (1 << WARPEDMODEL_PREC_BITS), 0, 0, (1 << WARPEDMODEL_PREC_BITS) },
+  0, 0, 0, 0,
+};
+
+#define REF_FRAMES_LOG2 3
+#define REF_FRAMES (1 << REF_FRAMES_LOG2)
+#define FRAME_BUFFERS (REF_FRAMES + 7)
+typedef struct {
+#if 0
+  int ref_count;
+#endif
+
+  unsigned int cur_frame_offset;
+  unsigned int ref_frame_offset[INTER_REFS_PER_FRAME];
+
+  MV_REF *mvs;
+  ptrdiff_t mv_stride;
+#if 0
+#if CONFIG_SEGMENT_PRED_LAST
+  uint8_t *seg_map;
+#endif
+#endif
+  int mi_rows;
+  int mi_cols;
+#if 0
+  // Width and height give the size of the buffer (before any upscaling, unlike
+  // the sizes that can be derived from the buf structure)
+  int width;
+  int height;
+  WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
+#if CONFIG_FILM_GRAIN_SHOWEX
+  int showable_frame;  // frame can be used as show existing frame in future
+#endif
+#if CONFIG_FILM_GRAIN
+  int film_grain_params_present;
+  aom_film_grain_t film_grain_params;
+#endif
+  aom_codec_frame_buffer_t raw_frame_buffer;
+  YV12_BUFFER_CONFIG buf;
+#if CONFIG_HASH_ME
+  hash_table hash_table;
+#endif
+#endif
+  uint8_t intra_only;
+#if 0
+  FRAME_TYPE frame_type;
+  // The Following variables will only be used in frame parallel decode.
+
+  // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means
+  // that no FrameWorker owns, or is decoding, this buffer.
+  AVxWorker *frame_worker_owner;
+
+  // row and col indicate which position frame has been decoded to in real
+  // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX
+  // when the frame is fully decoded.
+  int row;
+  int col;
+#endif
+} RefCntBuffer;
+
+#define INVALID_IDX -1  // Invalid buffer index.
+typedef struct TileInfo {
+  int mi_row_start, mi_row_end;
+  int mi_col_start, mi_col_end;
+  int tg_horz_boundary;
+} TileInfo;
+typedef struct macroblockd {
+#if 0
+  struct macroblockd_plane plane[MAX_MB_PLANE];
+  uint8_t bmode_blocks_wl;
+  uint8_t bmode_blocks_hl;
+
+  FRAME_COUNTS *counts;
+#endif
+  TileInfo tile;
+  int mi_stride;
+
+  CUR_MODE_INFO cur_mi;
+  MB_MODE_INFO *mi;
+#if 0
+  MODE_INFO *left_mi;
+  MODE_INFO *above_mi;
+  MB_MODE_INFO *left_mbmi;
+  MB_MODE_INFO *above_mbmi;
+  MB_MODE_INFO *chroma_left_mbmi;
+  MB_MODE_INFO *chroma_above_mbmi;
+#endif
+  int up_available;
+  int left_available;
+#if 0
+  int chroma_up_available;
+  int chroma_left_available;
+#endif
+  /* Distance of MB away from frame edges in subpixels (1/8th pixel)  */
+  int mb_to_left_edge;
+  int mb_to_right_edge;
+  int mb_to_top_edge;
+  int mb_to_bottom_edge;
+#if 0
+  FRAME_CONTEXT *fc;
+
+  /* pointers to reference frames */
+  const RefBuffer *block_refs[2];
+
+  /* pointer to current frame */
+  const YV12_BUFFER_CONFIG *cur_buf;
+
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  ENTROPY_CONTEXT left_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+
+  PARTITION_CONTEXT *above_seg_context;
+  PARTITION_CONTEXT left_seg_context[MAX_MIB_SIZE];
+
+  TXFM_CONTEXT *above_txfm_context;
+  TXFM_CONTEXT *left_txfm_context;
+  TXFM_CONTEXT left_txfm_context_buffer[2 * MAX_MIB_SIZE];
+
+#if CONFIG_LOOP_RESTORATION
+  WienerInfo wiener_info[MAX_MB_PLANE];
+  SgrprojInfo sgrproj_info[MAX_MB_PLANE];
+#endif  // CONFIG_LOOP_RESTORATION
+#endif
+  // block dimension in the unit of mode_info.
+  uint8_t n8_w, n8_h;
+#if 0
+  uint8_t ref_mv_count[MODE_CTX_REF_FRAMES];
+  CANDIDATE_MV ref_mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+#endif
+  uint8_t is_sec_rect;
+
+#if 0
+  // Counts of each reference frame in the above and left neighboring blocks.
+  // NOTE: Take into account both single and comp references.
+  uint8_t neighbors_ref_counts[TOTAL_REFS_PER_FRAME];
+
+  FRAME_CONTEXT *tile_ctx;
+  /* Bit depth: 8, 10, 12 */
+  int bd;
+
+  int qindex[MAX_SEGMENTS];
+  int lossless[MAX_SEGMENTS];
+  int corrupted;
+  int cur_frame_force_integer_mv;
+// same with that in AV1_COMMON
+  struct aom_internal_error_info *error_info;
+  const WarpedMotionParams *global_motion;
+  int prev_qindex;
+  int delta_qindex;
+  int current_qindex;
+#if CONFIG_EXT_DELTA_Q
+  // Since actual frame level loop filtering level value is not available
+  // at the beginning of the tile (only available during actual filtering)
+  // at encoder side.we record the delta_lf (against the frame level loop
+  // filtering level) and code the delta between previous superblock's delta
+  // lf and current delta lf. It is equivalent to the delta between previous
+  // superblock's actual lf and current lf.
+  int prev_delta_lf_from_base;
+  int current_delta_lf_from_base;
+  // For this experiment, we have four frame filter levels for different plane
+  // and direction. So, to support the per superblock update, we need to add
+  // a few more params as below.
+  // 0: delta loop filter level for y plane vertical
+  // 1: delta loop filter level for y plane horizontal
+  // 2: delta loop filter level for u plane
+  // 3: delta loop filter level for v plane
+  // To make it consistent with the reference to each filter level in segment,
+  // we need to -1, since
+  // SEG_LVL_ALT_LF_Y_V = 1;
+  // SEG_LVL_ALT_LF_Y_H = 2;
+  // SEG_LVL_ALT_LF_U   = 3;
+  // SEG_LVL_ALT_LF_V   = 4;
+  int prev_delta_lf[FRAME_LF_COUNT];
+  int curr_delta_lf[FRAME_LF_COUNT];
+#endif
+
+  DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
+
+  CFL_CTX cfl;
+
+  JNT_COMP_PARAMS jcp_param;
+
+  int all_one_sided_refs;
+#endif
+} MACROBLOCKD;
+typedef struct RefBuffer {
+  int idx;  // frame buf idx
+#if 0
+  int map_idx;  // frame map idx
+  YV12_BUFFER_CONFIG *buf;
+  struct scale_factors sf;
+#endif
+} RefBuffer;
+typedef struct BufferPool {
+#if 0
+// Protect BufferPool from being accessed by several FrameWorkers at
+// the same time during frame parallel decode.
+// TODO(hkuang): Try to use atomic variable instead of locking the whole pool.
+#if CONFIG_MULTITHREAD
+  pthread_mutex_t pool_mutex;
+#endif
+
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+
+  aom_get_frame_buffer_cb_fn_t get_fb_cb;
+  aom_release_frame_buffer_cb_fn_t release_fb_cb;
+#endif
+  RefCntBuffer frame_bufs[FRAME_BUFFERS];
+#if 0
+  // Frame buffers allocated internally by the codec.
+  InternalFrameBufferList int_frame_buffers;
+#endif
+} BufferPool;
+typedef struct AV1Common {
+#if 0
+  struct aom_internal_error_info error;
+  aom_color_primaries_t color_primaries;
+  aom_transfer_characteristics_t transfer_characteristics;
+  aom_matrix_coefficients_t matrix_coefficients;
+  int color_range;
+  int width;
+  int height;
+  int render_width;
+  int render_height;
+  int last_width;
+  int last_height;
+  int timing_info_present;
+  uint32_t num_units_in_tick;
+  uint32_t time_scale;
+  int equal_picture_interval;
+  uint32_t num_ticks_per_picture;
+
+  // TODO(jkoleszar): this implies chroma ss right now, but could vary per
+  // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to
+  // support additional planes.
+  int subsampling_x;
+  int subsampling_y;
+
+  int largest_tile_id;
+  size_t largest_tile_size;
+
+  // Scale of the current frame with respect to itself.
+  struct scale_factors sf_identity;
+
+  // Marks if we need to use 16bit frame buffers (1: yes, 0: no).
+  int use_highbitdepth;
+  YV12_BUFFER_CONFIG *frame_to_show;
+#endif
+
+  // TODO(hkuang): Combine this with cur_buf in macroblockd.
+  RefCntBuffer cur_frame;
+#if 0
+  int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */
+
+  // Prepare ref_frame_map for the next frame.
+  // Only used in frame parallel decode.
+  int next_ref_frame_map[REF_FRAMES];
+
+  // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and
+  // roll new_fb_idx into it.
+#endif
+
+  // Each Inter frame can reference INTER_REFS_PER_FRAME buffers
+  RefBuffer frame_refs[INTER_REFS_PER_FRAME];
+
+#if 0
+  int is_skip_mode_allowed;
+  int skip_mode_flag;
+  int ref_frame_idx_0;
+  int ref_frame_idx_1;
+
+  int new_fb_idx;
+
+  FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/
+  FRAME_TYPE frame_type;
+
+  int show_frame;
+#if CONFIG_FILM_GRAIN_SHOWEX
+  int showable_frame;  // frame can be used as show existing frame in future
+#endif
+  int last_show_frame;
+  int show_existing_frame;
+  // Flag for a frame used as a reference - not written to the bitstream
+  int is_reference_frame;
+
+#if CONFIG_FWD_KF
+  int reset_decoder_state;
+#endif  // CONFIG_FWD_KF
+
+  // Flag signaling that the frame is encoded using only INTRA modes.
+  uint8_t intra_only;
+  uint8_t last_intra_only;
+
+#if CONFIG_CDF_UPDATE_MODE
+  uint8_t disable_cdf_update;
+#endif  // CONFIG_CDF_UPDATE_MODE
+#endif
+  int allow_high_precision_mv;
+  int cur_frame_force_integer_mv;  // 0 the default in AOM, 1 only integer
+#if 0
+  int disable_intra_edge_filter;  // 1 - disable corner/edge/upsampling
+  int allow_screen_content_tools;
+  int allow_intrabc;
+  int allow_interintra_compound;
+  int allow_masked_compound;
+
+#if !CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  // Flag signaling which frame contexts should be reset to default values.
+  RESET_FRAME_CONTEXT_MODE reset_frame_context;
+#endif
+
+  // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in
+  // MODE_INFO (8-pixel) units.
+  int MBs;
+  int mb_rows, mi_rows;
+  int mb_cols, mi_cols;
+#endif
+  int mi_rows;
+  int mi_cols;
+  int mi_stride;
+
+#if 0
+  /* profile settings */
+  TX_MODE tx_mode;
+
+  int base_qindex;
+  int y_dc_delta_q;
+  int u_dc_delta_q;
+  int v_dc_delta_q;
+  int u_ac_delta_q;
+  int v_ac_delta_q;
+
+  int separate_uv_delta_q;
+
+  // The dequantizers below are true dequntizers used only in the
+  // dequantization process.  They have the same coefficient
+  // shift/scale as TX.
+  int16_t y_dequant_QTX[MAX_SEGMENTS][2];
+  int16_t u_dequant_QTX[MAX_SEGMENTS][2];
+  int16_t v_dequant_QTX[MAX_SEGMENTS][2];
+
+  // Global quant matrix tables
+  const qm_val_t *giqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
+  const qm_val_t *gqmatrix[NUM_QM_LEVELS][3][TX_SIZES_ALL];
+
+  // Local quant matrix tables for each frame
+  const qm_val_t *y_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+  const qm_val_t *u_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+  const qm_val_t *v_iqmatrix[MAX_SEGMENTS][TX_SIZES_ALL];
+
+  // Encoder
+  int using_qmatrix;
+#if CONFIG_AOM_QM_EXT
+  int qm_y;
+  int qm_u;
+  int qm_v;
+#endif  // CONFIG_AOM_QM_EXT
+  int min_qmlevel;
+  int max_qmlevel;
+
+  /* We allocate a MODE_INFO struct for each macroblock, together with
+     an extra row on top and column on the left to simplify prediction. */
+  int mi_alloc_size;
+  MODE_INFO *mip; /* Base of allocated array */
+  MODE_INFO *mi;  /* Corresponds to upper left visible macroblock */
+
+  // TODO(agrange): Move prev_mi into encoder structure.
+  // prev_mip and prev_mi will only be allocated in encoder.
+  MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */
+  MODE_INFO *prev_mi;  /* 'mi' from last frame (points into prev_mip) */
+
+  // Separate mi functions between encoder and decoder.
+  int (*alloc_mi)(struct AV1Common *cm, int mi_size);
+  void (*free_mi)(struct AV1Common *cm);
+  void (*setup_mi)(struct AV1Common *cm);
+
+  // Grid of pointers to 8x8 MODE_INFO structs.  Any 8x8 not in the visible
+  // area will be NULL.
+  MODE_INFO **mi_grid_base;
+  MODE_INFO **mi_grid_visible;
+  MODE_INFO **prev_mi_grid_base;
+  MODE_INFO **prev_mi_grid_visible;
+#endif
+  // Whether to use previous frame's motion vectors for prediction.
+  int allow_ref_frame_mvs;
+
+#if 0
+#if !CONFIG_SEGMENT_PRED_LAST
+  // Persistent mb segment id map used in prediction.
+  int seg_map_idx;
+  int prev_seg_map_idx;
+
+  uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS];
+#endif
+  uint8_t *last_frame_seg_map;
+  uint8_t *current_frame_seg_map;
+  int seg_map_alloc_size;
+
+  InterpFilter interp_filter;
+
+  int switchable_motion_mode;
+
+  loop_filter_info_n lf_info;
+  // The denominator of the superres scale; the numerator is fixed.
+  uint8_t superres_scale_denominator;
+  int superres_upscaled_width;
+  int superres_upscaled_height;
+  RestorationInfo rst_info[MAX_MB_PLANE];
+
+  // rst_end_stripe[i] is one more than the index of the bottom stripe
+  // for tile row i.
+  int rst_end_stripe[MAX_TILE_ROWS];
+
+  // Pointer to a scratch buffer used by self-guided restoration
+  int32_t *rst_tmpbuf;
+
+  // Flag signaling how frame contexts should be updated at the end of
+  // a frame decode
+  REFRESH_FRAME_CONTEXT_MODE refresh_frame_context;
+#endif
+  int ref_frame_sign_bias[TOTAL_REFS_PER_FRAME]; /* Two state 0, 1 */
+#if 0
+  struct loopfilter lf;
+  struct segmentation seg;
+  int all_lossless;
+#endif
+  int frame_parallel_decode;  // frame-based threading.
+#if 0
+  int reduced_tx_set_used;
+
+  // Context probabilities for reference frame prediction
+  MV_REFERENCE_FRAME comp_fwd_ref[FWD_REFS];
+  MV_REFERENCE_FRAME comp_bwd_ref[BWD_REFS];
+  REFERENCE_MODE reference_mode;
+
+  FRAME_CONTEXT *fc;              /* this frame entropy */
+  FRAME_CONTEXT *frame_contexts;  // FRAME_CONTEXTS
+  FRAME_CONTEXT *pre_fc;          // Context referenced in this frame
+  unsigned int frame_context_idx; /* Context to use/update */
+#if CONFIG_NO_FRAME_CONTEXT_SIGNALING
+  int fb_of_context_type[REF_FRAMES];
+  int primary_ref_frame;
+#endif
+  FRAME_COUNTS counts;
+#endif
+
+  unsigned int frame_offset;
+
+#if 0
+  unsigned int current_video_frame;
+  BITSTREAM_PROFILE profile;
+
+  // AOM_BITS_8 in profile 0 or 1, AOM_BITS_10 or AOM_BITS_12 in profile 2 or 3.
+  aom_bit_depth_t bit_depth;
+  aom_bit_depth_t dequant_bit_depth;  // bit_depth of current dequantizer
+
+  int error_resilient_mode;
+
+  int tile_cols, tile_rows;
+  int last_tile_cols, last_tile_rows;
+
+  BOUNDARY_TYPE *boundary_info;
+  int boundary_info_alloc_size;
+
+#if CONFIG_MAX_TILE
+  int min_log2_tile_cols;
+  int max_log2_tile_cols;
+  int max_log2_tile_rows;
+  int min_log2_tile_rows;
+  int min_log2_tiles;
+  int max_tile_width_sb;
+  int max_tile_height_sb;
+  int uniform_tile_spacing_flag;
+  int log2_tile_cols;                        // only valid for uniform tiles
+  int log2_tile_rows;                        // only valid for uniform tiles
+  int tile_col_start_sb[MAX_TILE_COLS + 1];  // valid for 0 <= i <= tile_cols
+  int tile_row_start_sb[MAX_TILE_ROWS + 1];  // valid for 0 <= i <= tile_rows
+#if CONFIG_DEPENDENT_HORZTILES
+  int tile_row_independent[MAX_TILE_ROWS];  // valid for 0 <= i <  tile_rows
+#endif
+  int tile_width, tile_height;  // In MI units
+#else
+  int log2_tile_cols, log2_tile_rows;  // Used in non-large_scale_tile_coding.
+  int tile_width, tile_height;         // In MI units
+#endif  // CONFIG_MAX_TILE
+
+#if CONFIG_EXT_TILE
+  unsigned int large_scale_tile;
+  unsigned int single_tile_decoding;
+#endif  // CONFIG_EXT_TILE
+
+#if CONFIG_DEPENDENT_HORZTILES
+  int dependent_horz_tiles;
+  int tile_group_start_row[MAX_TILE_ROWS][MAX_TILE_COLS];
+  int tile_group_start_col[MAX_TILE_ROWS][MAX_TILE_COLS];
+#endif
+#if CONFIG_LOOPFILTERING_ACROSS_TILES
+#if CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+  int loop_filter_across_tiles_v_enabled;
+  int loop_filter_across_tiles_h_enabled;
+#else
+  int loop_filter_across_tiles_enabled;
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES_EXT
+#endif  // CONFIG_LOOPFILTERING_ACROSS_TILES
+
+  int byte_alignment;
+  int skip_loop_filter;
+
+  // Private data associated with the frame buffer callbacks.
+  void *cb_priv;
+  aom_get_frame_buffer_cb_fn_t get_fb_cb;
+  aom_release_frame_buffer_cb_fn_t release_fb_cb;
+
+  // Handles memory for the codec.
+  InternalFrameBufferList int_frame_buffers;
+#endif
+  // External BufferPool passed from outside.
+  BufferPool buffer_pool;
+#if 0
+  PARTITION_CONTEXT *above_seg_context;
+  ENTROPY_CONTEXT *above_context[MAX_MB_PLANE];
+  TXFM_CONTEXT *above_txfm_context;
+  TXFM_CONTEXT *top_txfm_context[MAX_MB_PLANE];
+  TXFM_CONTEXT left_txfm_context[MAX_MB_PLANE][2 * MAX_MIB_SIZE];
+  int above_context_alloc_cols;
+#endif
+
+  WarpedMotionParams global_motion[TOTAL_REFS_PER_FRAME];
+#if 0
+#if CONFIG_FILM_GRAIN
+  int film_grain_params_present;
+  aom_film_grain_t film_grain_params;
+#endif
+  int cdef_pri_damping;
+  int cdef_sec_damping;
+  int nb_cdef_strengths;
+  int cdef_strengths[CDEF_MAX_STRENGTHS];
+  int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
+  int cdef_bits;
+  int cdef_preset[4];
+
+  int delta_q_present_flag;
+  // Resolution of delta quant
+  int delta_q_res;
+#if CONFIG_EXT_DELTA_Q
+  int delta_lf_present_flag;
+  // Resolution of delta lf level
+  int delta_lf_res;
+  // This is a flag for number of deltas of loop filter level
+  // 0: use 1 delta, for y_vertical, y_horizontal, u, and v
+  // 1: use separate deltas for each filter level
+  int delta_lf_multi;
+#endif
+  int num_tg;
+#endif
+  struct {
+    BLOCK_SIZE sb_size;
+    int enable_order_hint;
+    int order_hint_bits_minus1;
+  } seq_params;
+#if 0
+  SequenceHeader seq_params;
+  int current_frame_id;
+  int ref_frame_id[REF_FRAMES];
+  int valid_for_referencing[REF_FRAMES];
+  int refresh_mask;
+  int invalid_delta_frame_id_minus1;
+  LV_MAP_CTX_TABLE coeff_ctx_table;
+#endif
+  TPL_MV_REF *tpl_mvs;
+#if 0
+  int tpl_mvs_mem_size;
+#endif
+  // TODO(jingning): This can be combined with sign_bias later.
+  int8_t ref_frame_side[TOTAL_REFS_PER_FRAME];
+
+#if 0
+  int frame_refs_short_signaling;
+
+#if CONFIG_SCALABILITY
+  int temporal_layer_id;
+  int enhancement_layer_id;
+  int enhancement_layers_cnt;
+#endif
+#if TXCOEFF_TIMER
+  int64_t cum_txcoeff_timer;
+  int64_t txcoeff_timer;
+  int txb_count;
+#endif
+
+#if TXCOEFF_COST_TIMER
+  int64_t cum_txcoeff_cost_timer;
+  int64_t txcoeff_cost_timer;
+  int64_t txcoeff_cost_count;
+#endif
+  const cfg_options_t *options;
+#endif
+
+    int ref_buf_idx[INTER_REFS_PER_FRAME];
+    int ref_order_hint[INTER_REFS_PER_FRAME];
+} AV1_COMMON;
+
+static INLINE void integer_mv_precision(MV *mv) {
+  int mod = (mv->row % 8);
+  if (mod != 0) {
+    mv->row -= mod;
+    if (abs(mod) > 4) {
+      if (mod > 0) {
+        mv->row += 8;
+      } else {
+        mv->row -= 8;
+      }
+    }
+  }
+
+  mod = (mv->col % 8);
+  if (mod != 0) {
+    mv->col -= mod;
+    if (abs(mod) > 4) {
+      if (mod > 0) {
+        mv->col += 8;
+      } else {
+        mv->col -= 8;
+      }
+    }
+  }
+}
+
+static INLINE int clamp(int value, int low, int high) {
+  return value < low ? low : (value > high ? high : value);
+}
+
+static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
+                            int max_row) {
+  mv->col = clamp(mv->col, min_col, max_col);
+  mv->row = clamp(mv->row, min_row, max_row);
+}
+
+#if 0
+static INLINE int frame_is_intra_only(const AV1_COMMON *const cm) {
+  return cm->frame_type == KEY_FRAME || cm->intra_only;
+}
+#endif
+
+static INLINE int is_intrabc_block(const MB_MODE_INFO *mbmi) {
+  return mbmi->ref_frame[0] == INTRA_FRAME && mbmi->mv[0].as_mv.row != -0x8000;
+  //return mbmi->use_intrabc;
+}
+
+static INLINE int is_inter_block(const MB_MODE_INFO *mbmi) {
+  if (is_intrabc_block(mbmi)) return 1;
+  return mbmi->ref_frame[0] > INTRA_FRAME;
+}
+
+static INLINE int has_second_ref(const MB_MODE_INFO *mbmi) {
+  return mbmi->ref_frame[1] > INTRA_FRAME;
+}
+
+static INLINE MV_REFERENCE_FRAME comp_ref0(int ref_idx) {
+  static const MV_REFERENCE_FRAME lut[] = {
+    LAST_FRAME,     // LAST_LAST2_FRAMES,
+    LAST_FRAME,     // LAST_LAST3_FRAMES,
+    LAST_FRAME,     // LAST_GOLDEN_FRAMES,
+    BWDREF_FRAME,   // BWDREF_ALTREF_FRAMES,
+    LAST2_FRAME,    // LAST2_LAST3_FRAMES
+    LAST2_FRAME,    // LAST2_GOLDEN_FRAMES,
+    LAST3_FRAME,    // LAST3_GOLDEN_FRAMES,
+    BWDREF_FRAME,   // BWDREF_ALTREF2_FRAMES,
+    ALTREF2_FRAME,  // ALTREF2_ALTREF_FRAMES,
+  };
+  assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
+  return lut[ref_idx];
+}
+
+static INLINE MV_REFERENCE_FRAME comp_ref1(int ref_idx) {
+  static const MV_REFERENCE_FRAME lut[] = {
+    LAST2_FRAME,    // LAST_LAST2_FRAMES,
+    LAST3_FRAME,    // LAST_LAST3_FRAMES,
+    GOLDEN_FRAME,   // LAST_GOLDEN_FRAMES,
+    ALTREF_FRAME,   // BWDREF_ALTREF_FRAMES,
+    LAST3_FRAME,    // LAST2_LAST3_FRAMES
+    GOLDEN_FRAME,   // LAST2_GOLDEN_FRAMES,
+    GOLDEN_FRAME,   // LAST3_GOLDEN_FRAMES,
+    ALTREF2_FRAME,  // BWDREF_ALTREF2_FRAMES,
+    ALTREF_FRAME,   // ALTREF2_ALTREF_FRAMES,
+  };
+  assert(NELEMENTS(lut) == TOTAL_UNIDIR_COMP_REFS);
+  return lut[ref_idx];
+}
+
+#define WARPEDMODEL_PREC_BITS 16
+#define GM_TRANS_ONLY_PREC_DIFF (WARPEDMODEL_PREC_BITS - 3)
+#define WARPEDMODEL_ROW3HOMO_PREC_BITS 16
+
+static INLINE int convert_to_trans_prec(int allow_hp, int coor) {
+  if (allow_hp)
+    return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 3);
+  else
+    return ROUND_POWER_OF_TWO_SIGNED(coor, WARPEDMODEL_PREC_BITS - 2) * 2;
+}
+
+static INLINE int block_center_x(int mi_col, BLOCK_SIZE bs) {
+  const int bw = block_size_wide[bs];
+  return mi_col * MI_SIZE + bw / 2 - 1;
+}
+
+static INLINE int block_center_y(int mi_row, BLOCK_SIZE bs) {
+  const int bh = block_size_high[bs];
+  return mi_row * MI_SIZE + bh / 2 - 1;
+}
+
+#if 0
+static INLINE MV_REFERENCE_FRAME comp_ref0(int ref_idx) {
+  static const MV_REFERENCE_FRAME lut[] = {
+    LAST_FRAME,    // LAST_LAST2_FRAMES,
+    LAST_FRAME,    // LAST_LAST3_FRAMES,
+    LAST_FRAME,    // LAST_GOLDEN_FRAMES,
+    BWDREF_FRAME,  // BWDREF_ALTREF_FRAMES,
+  };
+  assert(NELEMENTS(lut) == UNIDIR_COMP_REFS);
+  return lut[ref_idx];
+}
+
+static INLINE MV_REFERENCE_FRAME comp_ref1(int ref_idx) {
+  static const MV_REFERENCE_FRAME lut[] = {
+    LAST2_FRAME,   // LAST_LAST2_FRAMES,
+    LAST3_FRAME,   // LAST_LAST3_FRAMES,
+    GOLDEN_FRAME,  // LAST_GOLDEN_FRAMES,
+    ALTREF_FRAME,  // BWDREF_ALTREF_FRAMES,
+  };
+  assert(NELEMENTS(lut) == UNIDIR_COMP_REFS);
+  return lut[ref_idx];
+}
+#endif
+
+// Convert a global motion vector into a motion vector at the centre of the
+// given block.
+//
+// The resulting motion vector will have three fractional bits of precision. If
+// allow_hp is zero, the bottom bit will always be zero. If CONFIG_AMVR and
+// is_integer is true, the bottom three bits will be zero (so the motion vector
+// represents an integer)
+static INLINE int_mv gm_get_motion_vector(const WarpedMotionParams *gm,
+                                          int allow_hp, BLOCK_SIZE bsize,
+                                          int mi_col, int mi_row,
+                                          int is_integer) {
+  int_mv res;
+  const int32_t *mat = gm->wmmat;
+  int x, y, tx, ty;
+
+  if (gm->wmtype == TRANSLATION) {
+    // All global motion vectors are stored with WARPEDMODEL_PREC_BITS (16)
+    // bits of fractional precision. The offset for a translation is stored in
+    // entries 0 and 1. For translations, all but the top three (two if
+    // cm->allow_high_precision_mv is false) fractional bits are always zero.
+    //
+    // After the right shifts, there are 3 fractional bits of precision. If
+    // allow_hp is false, the bottom bit is always zero (so we don't need a
+    // call to convert_to_trans_prec here)
+    res.as_mv.row = gm->wmmat[0] >> GM_TRANS_ONLY_PREC_DIFF;
+    res.as_mv.col = gm->wmmat[1] >> GM_TRANS_ONLY_PREC_DIFF;
+    assert(IMPLIES(1 & (res.as_mv.row | res.as_mv.col), allow_hp));
+    if (is_integer) {
+      integer_mv_precision(&res.as_mv);
+    }
+    return res;
+  }
+
+  x = block_center_x(mi_col, bsize);
+  y = block_center_y(mi_row, bsize);
+
+  if (gm->wmtype == ROTZOOM) {
+    assert(gm->wmmat[5] == gm->wmmat[2]);
+    assert(gm->wmmat[4] == -gm->wmmat[3]);
+  }
+  if (gm->wmtype > AFFINE) {
+    int xc = (int)((int64_t)mat[2] * x + (int64_t)mat[3] * y + mat[0]);
+    int yc = (int)((int64_t)mat[4] * x + (int64_t)mat[5] * y + mat[1]);
+    const int Z = (int)((int64_t)mat[6] * x + (int64_t)mat[7] * y +
+                        (1 << WARPEDMODEL_ROW3HOMO_PREC_BITS));
+    xc *= 1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS);
+    yc *= 1 << (WARPEDMODEL_ROW3HOMO_PREC_BITS - WARPEDMODEL_PREC_BITS);
+    xc = (int)(xc > 0 ? ((int64_t)xc + Z / 2) / Z : ((int64_t)xc - Z / 2) / Z);
+    yc = (int)(yc > 0 ? ((int64_t)yc + Z / 2) / Z : ((int64_t)yc - Z / 2) / Z);
+    tx = convert_to_trans_prec(allow_hp, xc) - (x << 3);
+    ty = convert_to_trans_prec(allow_hp, yc) - (y << 3);
+  } else {
+    const int xc =
+        (mat[2] - (1 << WARPEDMODEL_PREC_BITS)) * x + mat[3] * y + mat[0];
+    const int yc =
+        mat[4] * x + (mat[5] - (1 << WARPEDMODEL_PREC_BITS)) * y + mat[1];
+    tx = convert_to_trans_prec(allow_hp, xc);
+    ty = convert_to_trans_prec(allow_hp, yc);
+  }
+
+  res.as_mv.row = ty;
+  res.as_mv.col = tx;
+
+  if (is_integer) {
+    integer_mv_precision(&res.as_mv);
+  }
+  return res;
+}
+
+static INLINE int have_newmv_in_inter_mode(PREDICTION_MODE mode) {
+  return (mode == NEWMV || mode == NEW_NEWMV || mode == NEAREST_NEWMV ||
+          mode == NEW_NEARESTMV || mode == NEAR_NEWMV || mode == NEW_NEARMV);
+}
+
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+#ifndef AV1_COMMON_MVREF_COMMON_H_
+#define AV1_COMMON_MVREF_COMMON_H_
+
+//#include "av1/common/onyxc_int.h"
+//#include "av1/common/blockd.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MVREF_ROW_COLS 3
+
+// Set the upper limit of the motion vector component magnitude.
+// This would make a motion vector fit in 26 bits. Plus 3 bits for the
+// reference frame index. A tuple of motion vector can hence be stored within
+// 32 bit range for efficient load/store operations.
+#define REFMVS_LIMIT ((1 << 12) - 1)
+
+typedef struct position {
+  int row;
+  int col;
+} POSITION;
+
+// clamp_mv_ref
+#define MV_BORDER (16 << 3)  // Allow 16 pels in 1/8th pel units
+
+static INLINE int get_relative_dist(const AV1_COMMON *cm, int a, int b) {
+  if (!cm->seq_params.enable_order_hint) return 0;
+
+  const int bits = cm->seq_params.order_hint_bits_minus1 + 1;
+
+  assert(bits >= 1);
+  assert(a >= 0 && a < (1 << bits));
+  assert(b >= 0 && b < (1 << bits));
+
+  int diff = a - b;
+  int m = 1 << (bits - 1);
+  diff = (diff & (m - 1)) - (diff & m);
+  return diff;
+}
+
+static INLINE void clamp_mv_ref(MV *mv, int bw, int bh, const MACROBLOCKD *xd) {
+  clamp_mv(mv, xd->mb_to_left_edge - bw * 8 - MV_BORDER,
+           xd->mb_to_right_edge + bw * 8 + MV_BORDER,
+           xd->mb_to_top_edge - bh * 8 - MV_BORDER,
+           xd->mb_to_bottom_edge + bh * 8 + MV_BORDER);
+}
+
+// This function returns either the appropriate sub block or block's mv
+// on whether the block_size < 8x8 and we have check_sub_blocks set.
+static INLINE int_mv get_sub_block_mv(const MB_MODE_INFO *candidate,
+                                      int which_mv, int search_col) {
+  (void)search_col;
+  return candidate->mv[which_mv];
+}
+
+static INLINE int_mv get_sub_block_pred_mv(const MB_MODE_INFO *candidate,
+                                           int which_mv, int search_col) {
+  (void)search_col;
+  return candidate->mv[which_mv];
+}
+
+// Performs mv sign inversion if indicated by the reference frame combination.
+static INLINE int_mv scale_mv(const MB_MODE_INFO *mbmi, int ref,
+                              const MV_REFERENCE_FRAME this_ref_frame,
+                              const int *ref_sign_bias) {
+  int_mv mv = mbmi->mv[ref];
+  if (ref_sign_bias[mbmi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) {
+    mv.as_mv.row *= -1;
+    mv.as_mv.col *= -1;
+  }
+  return mv;
+}
+
+// Checks that the given mi_row, mi_col and search point
+// are inside the borders of the tile.
+static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row,
+                            int mi_rows, const POSITION *mi_pos) {
+  const int dependent_horz_tile_flag = 0;
+  if (dependent_horz_tile_flag && !tile->tg_horz_boundary) {
+    return !(mi_row + mi_pos->row < 0 ||
+             mi_col + mi_pos->col < tile->mi_col_start ||
+             mi_row + mi_pos->row >= mi_rows ||
+             mi_col + mi_pos->col >= tile->mi_col_end);
+  } else {
+    return !(mi_row + mi_pos->row < tile->mi_row_start ||
+             mi_col + mi_pos->col < tile->mi_col_start ||
+             mi_row + mi_pos->row >= tile->mi_row_end ||
+             mi_col + mi_pos->col >= tile->mi_col_end);
+  }
+}
+
+static INLINE int find_valid_row_offset(const TileInfo *const tile, int mi_row,
+                                        int mi_rows, int row_offset) {
+  const int dependent_horz_tile_flag = 0;
+  if (dependent_horz_tile_flag && !tile->tg_horz_boundary)
+    return clamp(row_offset, -mi_row, mi_rows - mi_row - 1);
+  else
+    return clamp(row_offset, tile->mi_row_start - mi_row,
+                 tile->mi_row_end - mi_row - 1);
+}
+
+static INLINE int find_valid_col_offset(const TileInfo *const tile, int mi_col,
+                                        int col_offset) {
+  return clamp(col_offset, tile->mi_col_start - mi_col,
+               tile->mi_col_end - mi_col - 1);
+}
+
+static INLINE void lower_mv_precision(MV *mv, int allow_hp,
+                                      int is_integer) {
+  if (is_integer) {
+    integer_mv_precision(mv);
+  } else {
+    if (!allow_hp) {
+      if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1);
+      if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1);
+    }
+  }
+}
+
+static INLINE int8_t get_uni_comp_ref_idx(const MV_REFERENCE_FRAME *const rf) {
+  // Single ref pred
+  if (rf[1] <= INTRA_FRAME) return -1;
+
+  // Bi-directional comp ref pred
+  if ((rf[0] < BWDREF_FRAME) && (rf[1] >= BWDREF_FRAME)) return -1;
+
+  for (int8_t ref_idx = 0; ref_idx < TOTAL_UNIDIR_COMP_REFS; ++ref_idx) {
+    if (rf[0] == comp_ref0(ref_idx) && rf[1] == comp_ref1(ref_idx))
+      return ref_idx;
+  }
+  return -1;
+}
+
+static INLINE int8_t av1_ref_frame_type(const MV_REFERENCE_FRAME *const rf) {
+  if (rf[1] > INTRA_FRAME) {
+    const int8_t uni_comp_ref_idx = get_uni_comp_ref_idx(rf);
+    if (uni_comp_ref_idx >= 0) {
+      assert((REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx) <
+             MODE_CTX_REF_FRAMES);
+      return REF_FRAMES + FWD_REFS * BWD_REFS + uni_comp_ref_idx;
+    } else {
+      return REF_FRAMES + FWD_RF_OFFSET(rf[0]) +
+             BWD_RF_OFFSET(rf[1]) * FWD_REFS;
+    }
+  }
+
+  return rf[0];
+}
+
+// clang-format off
+static MV_REFERENCE_FRAME ref_frame_map[TOTAL_COMP_REFS][2] = {
+  { LAST_FRAME, BWDREF_FRAME },  { LAST2_FRAME, BWDREF_FRAME },
+  { LAST3_FRAME, BWDREF_FRAME }, { GOLDEN_FRAME, BWDREF_FRAME },
+
+  { LAST_FRAME, ALTREF2_FRAME },  { LAST2_FRAME, ALTREF2_FRAME },
+  { LAST3_FRAME, ALTREF2_FRAME }, { GOLDEN_FRAME, ALTREF2_FRAME },
+
+  { LAST_FRAME, ALTREF_FRAME },  { LAST2_FRAME, ALTREF_FRAME },
+  { LAST3_FRAME, ALTREF_FRAME }, { GOLDEN_FRAME, ALTREF_FRAME },
+
+  { LAST_FRAME, LAST2_FRAME }, { LAST_FRAME, LAST3_FRAME },
+  { LAST_FRAME, GOLDEN_FRAME }, { BWDREF_FRAME, ALTREF_FRAME },
+
+  // NOTE: Following reference frame pairs are not supported to be explicitly
+  //       signalled, but they are possibly chosen by the use of skip_mode,
+  //       which may use the most recent one-sided reference frame pair.
+  { LAST2_FRAME, LAST3_FRAME }, { LAST2_FRAME, GOLDEN_FRAME },
+  { LAST3_FRAME, GOLDEN_FRAME }, {BWDREF_FRAME, ALTREF2_FRAME},
+  { ALTREF2_FRAME, ALTREF_FRAME }
+};
+// clang-format on
+
+static INLINE void av1_set_ref_frame(MV_REFERENCE_FRAME *rf,
+                                     int8_t ref_frame_type) {
+  if (ref_frame_type >= REF_FRAMES) {
+    rf[0] = ref_frame_map[ref_frame_type - REF_FRAMES][0];
+    rf[1] = ref_frame_map[ref_frame_type - REF_FRAMES][1];
+  } else {
+    rf[0] = ref_frame_type;
+    rf[1] = NONE_FRAME;
+    assert(ref_frame_type > NONE_FRAME);
+  }
+}
+
+static uint16_t compound_mode_ctx_map[3][COMP_NEWMV_CTXS] = {
+  { 0, 1, 1, 1, 1 },
+  { 1, 2, 3, 4, 4 },
+  { 4, 4, 5, 6, 7 },
+};
+
+static INLINE int16_t av1_mode_context_analyzer(
+    const int16_t *const mode_context, const MV_REFERENCE_FRAME *const rf) {
+  const int8_t ref_frame = av1_ref_frame_type(rf);
+
+  if (rf[1] <= INTRA_FRAME) return mode_context[ref_frame];
+
+  const int16_t newmv_ctx = mode_context[ref_frame] & NEWMV_CTX_MASK;
+  const int16_t refmv_ctx =
+      (mode_context[ref_frame] >> REFMV_OFFSET) & REFMV_CTX_MASK;
+
+  const int16_t comp_ctx = compound_mode_ctx_map[refmv_ctx >> 1][AOMMIN(
+      newmv_ctx, COMP_NEWMV_CTXS - 1)];
+  return comp_ctx;
+}
+
+static INLINE uint8_t av1_drl_ctx(const CANDIDATE_MV *ref_mv_stack,
+                                  int ref_idx) {
+  if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight >= REF_CAT_LEVEL)
+    return 0;
+
+  if (ref_mv_stack[ref_idx].weight >= REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
+    return 1;
+
+  if (ref_mv_stack[ref_idx].weight < REF_CAT_LEVEL &&
+      ref_mv_stack[ref_idx + 1].weight < REF_CAT_LEVEL)
+    return 2;
+
+  return 0;
+}
+
+void av1_setup_frame_buf_refs(AV1_COMMON *cm);
+void av1_setup_frame_sign_bias(AV1_COMMON *cm);
+void av1_setup_skip_mode_allowed(AV1_COMMON *cm);
+
+#if 0
+void av1_setup_motion_field(AV1_COMMON *cm);
+void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx, int gld_map_idx);
+#endif  // CONFIG_FRAME_REFS_SIGNALING
+
+#if 0
+static INLINE void av1_collect_neighbors_ref_counts(MACROBLOCKD *const xd) {
+  av1_zero(xd->neighbors_ref_counts);
+
+  uint8_t *const ref_counts = xd->neighbors_ref_counts;
+
+  const MB_MODE_INFO *const above_mbmi = xd->above_mbmi;
+  const MB_MODE_INFO *const left_mbmi = xd->left_mbmi;
+  const int above_in_image = xd->up_available;
+  const int left_in_image = xd->left_available;
+
+  // Above neighbor
+  if (above_in_image && is_inter_block(above_mbmi)) {
+    ref_counts[above_mbmi->ref_frame[0]]++;
+    if (has_second_ref(above_mbmi)) {
+      ref_counts[above_mbmi->ref_frame[1]]++;
+    }
+  }
+
+  // Left neighbor
+  if (left_in_image && is_inter_block(left_mbmi)) {
+    ref_counts[left_mbmi->ref_frame[0]]++;
+    if (has_second_ref(left_mbmi)) {
+      ref_counts[left_mbmi->ref_frame[1]]++;
+    }
+  }
+}
+#endif
+
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MB_MODE_INFO *mi,
+                        int mi_row, int mi_col, int x_mis, int y_mis);
+
+void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                      MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+                      CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+                      int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+                      int_mv *global_mvs, int mi_row, int mi_col,
+                      int16_t *mode_context);
+
+// check a list of motion vectors by sad score using a number rows of pixels
+// above and a number cols of pixels in the left to select the one with best
+// score to use as ref motion vector
+void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+                           int_mv *near_mv, int is_integer);
+
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize);
+int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
+                int *pts, int *pts_inref);
+
+#define INTRABC_DELAY_PIXELS 256  //  Delay of 256 pixels
+#define INTRABC_DELAY_SB64 (INTRABC_DELAY_PIXELS / 64)
+#define USE_WAVE_FRONT 1  // Use only top left area of frame for reference.
+
+static INLINE void av1_find_ref_dv(int_mv *ref_dv, const TileInfo *const tile,
+                                   int mib_size, int mi_row, int mi_col) {
+  (void)mi_col;
+  if (mi_row - mib_size < tile->mi_row_start) {
+    ref_dv->as_mv.row = 0;
+    ref_dv->as_mv.col = -MI_SIZE * mib_size - INTRABC_DELAY_PIXELS;
+  } else {
+    ref_dv->as_mv.row = -MI_SIZE * mib_size;
+    ref_dv->as_mv.col = 0;
+  }
+  ref_dv->as_mv.row *= 8;
+  ref_dv->as_mv.col *= 8;
+}
+
+static INLINE int av1_is_dv_valid(const MV dv, const AV1_COMMON *cm,
+                                  const MACROBLOCKD *xd, int mi_row, int mi_col,
+                                  BLOCK_SIZE bsize, int mib_size_log2) {
+  const int bw = block_size_wide[bsize];
+  const int bh = block_size_high[bsize];
+  const int SCALE_PX_TO_MV = 8;
+  // Disallow subpixel for now
+  // SUBPEL_MASK is not the correct scale
+  if (((dv.row & (SCALE_PX_TO_MV - 1)) || (dv.col & (SCALE_PX_TO_MV - 1))))
+    return 0;
+
+  const TileInfo *const tile = &xd->tile;
+  // Is the source top-left inside the current tile?
+  const int src_top_edge = mi_row * MI_SIZE * SCALE_PX_TO_MV + dv.row;
+  const int tile_top_edge = tile->mi_row_start * MI_SIZE * SCALE_PX_TO_MV;
+  if (src_top_edge < tile_top_edge) return 0;
+  const int src_left_edge = mi_col * MI_SIZE * SCALE_PX_TO_MV + dv.col;
+  const int tile_left_edge = tile->mi_col_start * MI_SIZE * SCALE_PX_TO_MV;
+  if (src_left_edge < tile_left_edge) return 0;
+  // Is the bottom right inside the current tile?
+  const int src_bottom_edge = (mi_row * MI_SIZE + bh) * SCALE_PX_TO_MV + dv.row;
+  const int tile_bottom_edge = tile->mi_row_end * MI_SIZE * SCALE_PX_TO_MV;
+  if (src_bottom_edge > tile_bottom_edge) return 0;
+  const int src_right_edge = (mi_col * MI_SIZE + bw) * SCALE_PX_TO_MV + dv.col;
+  const int tile_right_edge = tile->mi_col_end * MI_SIZE * SCALE_PX_TO_MV;
+  if (src_right_edge > tile_right_edge) return 0;
+
+#if 0
+  // Special case for sub 8x8 chroma cases, to prevent referring to chroma
+  // pixels outside current tile.
+  for (int plane = 1; plane < av1_num_planes(cm); ++plane) {
+    const struct macroblockd_plane *const pd = &xd->plane[plane];
+    if (is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
+                            pd->subsampling_y)) {
+      if (bw < 8 && pd->subsampling_x)
+        if (src_left_edge < tile_left_edge + 4 * SCALE_PX_TO_MV) return 0;
+      if (bh < 8 && pd->subsampling_y)
+        if (src_top_edge < tile_top_edge + 4 * SCALE_PX_TO_MV) return 0;
+    }
+  }
+#endif
+
+  // Is the bottom right within an already coded SB? Also consider additional
+  // constraints to facilitate HW decoder.
+  const int max_mib_size = 1 << mib_size_log2;
+  const int active_sb_row = mi_row >> mib_size_log2;
+  const int active_sb64_col = (mi_col * MI_SIZE) >> 6;
+  const int sb_size = max_mib_size * MI_SIZE;
+  const int src_sb_row = ((src_bottom_edge >> 3) - 1) / sb_size;
+  const int src_sb64_col = ((src_right_edge >> 3) - 1) >> 6;
+  const int total_sb64_per_row =
+      ((tile->mi_col_end - tile->mi_col_start - 1) >> 4) + 1;
+  const int active_sb64 = active_sb_row * total_sb64_per_row + active_sb64_col;
+  const int src_sb64 = src_sb_row * total_sb64_per_row + src_sb64_col;
+  if (src_sb64 >= active_sb64 - INTRABC_DELAY_SB64) return 0;
+
+#if USE_WAVE_FRONT
+  const int gradient = 1 + INTRABC_DELAY_SB64 + (sb_size > 64);
+  const int wf_offset = gradient * (active_sb_row - src_sb_row);
+  if (src_sb_row > active_sb_row ||
+      src_sb64_col >= active_sb64_col - INTRABC_DELAY_SB64 + wf_offset)
+    return 0;
+#endif
+
+  return 1;
+}
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // AV1_COMMON_MVREF_COMMON_H_
+
+/*
+ * Copyright (c) 2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include <stdlib.h>
+
+//#include "av1/common/mvref_common.h"
+//#include "av1/common/warped_motion.h"
+
+// Although we assign 32 bit integers, all the values are strictly under 14
+// bits.
+static int div_mult[32] = { 0,    16384, 8192, 5461, 4096, 3276, 2730, 2340,
+                            2048, 1820,  1638, 1489, 1365, 1260, 1170, 1092,
+                            1024, 963,   910,  862,  819,  780,  744,  712,
+                            682,  655,   630,  606,  585,  564,  546,  528 };
+
+// TODO(jingning): Consider the use of lookup table for (num / den)
+// altogether.
+static void get_mv_projection(MV *output, MV ref, int num, int den) {
+  den = AOMMIN(den, MAX_FRAME_DISTANCE);
+  num = num > 0 ? AOMMIN(num, MAX_FRAME_DISTANCE)
+                : AOMMAX(num, -MAX_FRAME_DISTANCE);
+  int mv_row = ROUND_POWER_OF_TWO_SIGNED(ref.row * num * div_mult[den], 14);
+  int mv_col = ROUND_POWER_OF_TWO_SIGNED(ref.col * num * div_mult[den], 14);
+  const int clamp_max = MV_UPP - 1;
+  const int clamp_min = MV_LOW + 1;
+  output->row = (int16_t)clamp(mv_row, clamp_min, clamp_max);
+  output->col = (int16_t)clamp(mv_col, clamp_min, clamp_max);
+}
+
+#if 0
+void av1_copy_frame_mvs(const AV1_COMMON *const cm, MB_MODE_INFO *mi,
+                        int mi_row, int mi_col, int x_mis, int y_mis) {
+  const int frame_mvs_stride = ROUND_POWER_OF_TWO(cm->mi_cols, 1);
+  MV_REF *frame_mvs =
+      cm->cur_frame.mvs + (mi_row >> 1) * frame_mvs_stride + (mi_col >> 1);
+  x_mis = ROUND_POWER_OF_TWO(x_mis, 1);
+  y_mis = ROUND_POWER_OF_TWO(y_mis, 1);
+  int w, h;
+
+  for (h = 0; h < y_mis; h++) {
+    MV_REF *mv = frame_mvs;
+    for (w = 0; w < x_mis; w++) {
+      mv->ref_frame = NONE_FRAME;
+      mv->mv.as_int = 0;
+
+      for (int idx = 0; idx < 2; ++idx) {
+        MV_REFERENCE_FRAME ref_frame = mi->ref_frame[idx];
+        if (ref_frame > INTRA_FRAME) {
+          int8_t ref_idx = cm->ref_frame_side[ref_frame];
+          if (ref_idx) continue;
+          if ((abs(mi->mv[idx].as_mv.row) > REFMVS_LIMIT) ||
+              (abs(mi->mv[idx].as_mv.col) > REFMVS_LIMIT))
+            continue;
+          mv->ref_frame = ref_frame;
+          mv->mv.as_int = mi->mv[idx].as_int;
+        }
+      }
+      mv++;
+    }
+    frame_mvs += frame_mvs_stride;
+  }
+}
+#endif
+
+static void add_ref_mv_candidate(
+    const MB_MODE_INFO *const candidate, const MV_REFERENCE_FRAME rf[2],
+    uint8_t *refmv_count, uint8_t *ref_match_count, uint8_t *newmv_count,
+    CANDIDATE_MV *ref_mv_stack, int_mv *gm_mv_candidates,
+    const WarpedMotionParams *gm_params, int col, int weight) {
+  if (!is_inter_block(candidate)) return;  // for intrabc
+  int index = 0, ref;
+  assert(weight % 2 == 0);
+
+  if (rf[1] == NONE_FRAME) {
+    // single reference frame
+    for (ref = 0; ref < 2; ++ref) {
+      if (candidate->ref_frame[ref] == rf[0]) {
+        int_mv this_refmv;
+        if (is_global_mv_block(candidate, gm_params[rf[0]].wmtype))
+          this_refmv = gm_mv_candidates[0];
+        else
+          this_refmv = get_sub_block_mv(candidate, ref, col);
+
+        for (index = 0; index < *refmv_count; ++index)
+          if (ref_mv_stack[index].this_mv.as_int == this_refmv.as_int) break;
+
+        if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+        // Add a new item to the list.
+        if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+          ref_mv_stack[index].this_mv = this_refmv;
+          ref_mv_stack[index].weight = weight;
+          ++(*refmv_count);
+        }
+        if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
+        ++*ref_match_count;
+      }
+    }
+  } else {
+    // compound reference frame
+    if (candidate->ref_frame[0] == rf[0] && candidate->ref_frame[1] == rf[1]) {
+      int_mv this_refmv[2];
+
+      for (ref = 0; ref < 2; ++ref) {
+        if (is_global_mv_block(candidate, gm_params[rf[ref]].wmtype))
+          this_refmv[ref] = gm_mv_candidates[ref];
+        else
+          this_refmv[ref] = get_sub_block_mv(candidate, ref, col);
+      }
+
+      for (index = 0; index < *refmv_count; ++index)
+        if ((ref_mv_stack[index].this_mv.as_int == this_refmv[0].as_int) &&
+            (ref_mv_stack[index].comp_mv.as_int == this_refmv[1].as_int))
+          break;
+
+      if (index < *refmv_count) ref_mv_stack[index].weight += weight;
+
+      // Add a new item to the list.
+      if (index == *refmv_count && *refmv_count < MAX_REF_MV_STACK_SIZE) {
+        ref_mv_stack[index].this_mv = this_refmv[0];
+        ref_mv_stack[index].comp_mv = this_refmv[1];
+        ref_mv_stack[index].weight = weight;
+        ++(*refmv_count);
+      }
+      if (have_newmv_in_inter_mode(candidate->mode)) ++*newmv_count;
+      ++*ref_match_count;
+    }
+  }
+}
+
+static void scan_row_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                          int mi_row, int mi_col,
+                          const MV_REFERENCE_FRAME rf[2], int row_offset,
+                          CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+                          uint8_t *ref_match_count, uint8_t *newmv_count,
+                          int_mv *gm_mv_candidates, int max_row_offset,
+                          int *processed_rows) {
+  int end_mi = AOMMIN(xd->n8_w, cm->mi_cols - mi_col);
+  end_mi = AOMMIN(end_mi, mi_size_wide[BLOCK_64X64]);
+  const int n8_w_8 = mi_size_wide[BLOCK_8X8];
+  const int n8_w_16 = mi_size_wide[BLOCK_16X16];
+  int i;
+  int col_offset = 0;
+  const int shift = 0;
+  // TODO(jingning): Revisit this part after cb4x4 is stable.
+  if (abs(row_offset) > 1) {
+    col_offset = 1;
+    if ((mi_col & 0x01) && xd->n8_w < n8_w_8) --col_offset;
+  }
+  const int use_step_16 = (xd->n8_w >= 16);
+  MB_MODE_INFO *const candidate_mi0 = xd->mi + row_offset * xd->mi_stride;
+  (void)mi_row;
+
+  for (i = 0; i < end_mi;) {
+    const MB_MODE_INFO *const candidate = &candidate_mi0[col_offset + i];
+    const int candidate_bsize = candidate->sb_type;
+    const int n8_w = mi_size_wide[candidate_bsize];
+    int len = AOMMIN(xd->n8_w, n8_w);
+    if (use_step_16)
+      len = AOMMAX(n8_w_16, len);
+    else if (abs(row_offset) > 1)
+      len = AOMMAX(len, n8_w_8);
+
+    int weight = 2;
+    if (xd->n8_w >= n8_w_8 && xd->n8_w <= n8_w) {
+      int inc = AOMMIN(-max_row_offset + row_offset + 1,
+                       mi_size_high[candidate_bsize]);
+      // Obtain range used in weight calculation.
+      weight = AOMMAX(weight, (inc << shift));
+      // Update processed rows.
+      *processed_rows = inc - row_offset - 1;
+    }
+
+    add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+                         newmv_count, ref_mv_stack, gm_mv_candidates,
+                         cm->global_motion, col_offset + i, len * weight);
+
+    i += len;
+  }
+}
+
+static void scan_col_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                          int mi_row, int mi_col,
+                          const MV_REFERENCE_FRAME rf[2], int col_offset,
+                          CANDIDATE_MV *ref_mv_stack, uint8_t *refmv_count,
+                          uint8_t *ref_match_count, uint8_t *newmv_count,
+                          int_mv *gm_mv_candidates, int max_col_offset,
+                          int *processed_cols) {
+  int end_mi = AOMMIN(xd->n8_h, cm->mi_rows - mi_row);
+  end_mi = AOMMIN(end_mi, mi_size_high[BLOCK_64X64]);
+  const int n8_h_8 = mi_size_high[BLOCK_8X8];
+  const int n8_h_16 = mi_size_high[BLOCK_16X16];
+  int i;
+  int row_offset = 0;
+  const int shift = 0;
+  if (abs(col_offset) > 1) {
+    row_offset = 1;
+    if ((mi_row & 0x01) && xd->n8_h < n8_h_8) --row_offset;
+  }
+  const int use_step_16 = (xd->n8_h >= 16);
+  (void)mi_col;
+
+  for (i = 0; i < end_mi;) {
+    const MB_MODE_INFO *const candidate =
+        &xd->mi[(row_offset + i) * xd->mi_stride + col_offset];
+    const int candidate_bsize = candidate->sb_type;
+    const int n8_h = mi_size_high[candidate_bsize];
+    int len = AOMMIN(xd->n8_h, n8_h);
+    if (use_step_16)
+      len = AOMMAX(n8_h_16, len);
+    else if (abs(col_offset) > 1)
+      len = AOMMAX(len, n8_h_8);
+
+    int weight = 2;
+    if (xd->n8_h >= n8_h_8 && xd->n8_h <= n8_h) {
+      int inc = AOMMIN(-max_col_offset + col_offset + 1,
+                       mi_size_wide[candidate_bsize]);
+      // Obtain range used in weight calculation.
+      weight = AOMMAX(weight, (inc << shift));
+      // Update processed cols.
+      *processed_cols = inc - col_offset - 1;
+    }
+
+    add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+                         newmv_count, ref_mv_stack, gm_mv_candidates,
+                         cm->global_motion, col_offset, len * weight);
+
+    i += len;
+  }
+}
+
+static void scan_blk_mbmi(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                          const int mi_row, const int mi_col,
+                          const MV_REFERENCE_FRAME rf[2], int row_offset,
+                          int col_offset, CANDIDATE_MV *ref_mv_stack,
+                          uint8_t *ref_match_count, uint8_t *newmv_count,
+                          int_mv *gm_mv_candidates,
+                          uint8_t refmv_count[MODE_CTX_REF_FRAMES]) {
+  const TileInfo *const tile = &xd->tile;
+  POSITION mi_pos;
+
+  mi_pos.row = row_offset;
+  mi_pos.col = col_offset;
+
+  if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) {
+    const MB_MODE_INFO *const candidate =
+        &xd->mi[mi_pos.row * xd->mi_stride + mi_pos.col];
+    const int len = mi_size_wide[BLOCK_8X8];
+
+    add_ref_mv_candidate(candidate, rf, refmv_count, ref_match_count,
+                         newmv_count, ref_mv_stack, gm_mv_candidates,
+                         cm->global_motion, mi_pos.col, 2 * len);
+  }  // Analyze a single 8x8 block motion information.
+}
+
+static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                         int mi_row, int mi_col, int bs) {
+  const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
+  const int mask_row = mi_row & (sb_mi_size - 1);
+  const int mask_col = mi_col & (sb_mi_size - 1);
+
+  if (bs > mi_size_wide[BLOCK_64X64]) return 0;
+
+  // In a split partition all apart from the bottom right has a top right
+  int has_tr = !((mask_row & bs) && (mask_col & bs));
+
+  // bs > 0 and bs is a power of 2
+  assert(bs > 0 && !(bs & (bs - 1)));
+
+  // For each 4x4 group of blocks, when the bottom right is decoded the blocks
+  // to the right have not been decoded therefore the bottom right does
+  // not have a top right
+  while (bs < sb_mi_size) {
+    if (mask_col & bs) {
+      if ((mask_col & (2 * bs)) && (mask_row & (2 * bs))) {
+        has_tr = 0;
+        break;
+      }
+    } else {
+      break;
+    }
+    bs <<= 1;
+  }
+
+  // The left hand of two vertical rectangles always has a top right (as the
+  // block above will have been decoded)
+  if (xd->n8_w < xd->n8_h)
+    if (!xd->is_sec_rect) has_tr = 1;
+
+  // The bottom of two horizontal rectangles never has a top right (as the block
+  // to the right won't have been decoded)
+  if (xd->n8_w > xd->n8_h)
+    if (xd->is_sec_rect) has_tr = 0;
+
+  // The bottom left square of a Vertical A (in the old format) does
+  // not have a top right as it is decoded before the right hand
+  // rectangle of the partition
+  if (xd->cur_mi.partition == PARTITION_VERT_A) {
+    if (xd->n8_w == xd->n8_h)
+      if (mask_row & bs) has_tr = 0;
+  }
+
+  return has_tr;
+}
+
+static int check_sb_border(const int mi_row, const int mi_col,
+                           const int row_offset, const int col_offset) {
+  const int sb_mi_size = mi_size_wide[BLOCK_64X64];
+  const int row = mi_row & (sb_mi_size - 1);
+  const int col = mi_col & (sb_mi_size - 1);
+
+  if (row + row_offset < 0 || row + row_offset >= sb_mi_size ||
+      col + col_offset < 0 || col + col_offset >= sb_mi_size)
+    return 0;
+
+  return 1;
+}
+
+static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                          int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame,
+                          int blk_row, int blk_col, int_mv *gm_mv_candidates,
+                          uint8_t refmv_count[MODE_CTX_REF_FRAMES],
+                          CANDIDATE_MV ref_mv_stacks[][MAX_REF_MV_STACK_SIZE],
+                          int16_t *mode_context) {
+  POSITION mi_pos;
+  int idx;
+  const int weight_unit = 1;  // mi_size_wide[BLOCK_8X8];
+
+  mi_pos.row = (mi_row & 0x01) ? blk_row : blk_row + 1;
+  mi_pos.col = (mi_col & 0x01) ? blk_col : blk_col + 1;
+
+  if (!is_inside(&xd->tile, mi_col, mi_row, cm->mi_rows, &mi_pos)) return 0;
+
+  const TPL_MV_REF *prev_frame_mvs =
+      cm->tpl_mvs + ((mi_row + mi_pos.row) >> 1) * (cm->mi_stride >> 1) +
+      ((mi_col + mi_pos.col) >> 1);
+
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame);
+
+  if (rf[1] == NONE_FRAME) {
+    int cur_frame_index = cm->cur_frame.cur_frame_offset;
+    int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
+    int frame0_index = cm->buffer_pool.frame_bufs[buf_idx_0].cur_frame_offset;
+    int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
+    CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[rf[0]];
+
+    if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
+      int_mv this_refmv;
+
+      get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+                        cur_offset_0, prev_frame_mvs->ref_frame_offset);
+      lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_force_integer_mv);
+
+      if (blk_row == 0 && blk_col == 0)
+        if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
+            abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16)
+          mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
+
+      for (idx = 0; idx < refmv_count[rf[0]]; ++idx)
+        if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int) break;
+
+      if (idx < refmv_count[rf[0]]) ref_mv_stack[idx].weight += 2 * weight_unit;
+
+      if (idx == refmv_count[rf[0]] &&
+          refmv_count[rf[0]] < MAX_REF_MV_STACK_SIZE) {
+        ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+        ref_mv_stack[idx].weight = 2 * weight_unit;
+        ++(refmv_count[rf[0]]);
+      }
+
+      return 1;
+    }
+  } else {
+    // Process compound inter mode
+    int cur_frame_index = cm->cur_frame.cur_frame_offset;
+    int buf_idx_0 = cm->frame_refs[FWD_RF_OFFSET(rf[0])].idx;
+    int frame0_index = cm->buffer_pool.frame_bufs[buf_idx_0].cur_frame_offset;
+
+    int cur_offset_0 = get_relative_dist(cm, cur_frame_index, frame0_index);
+    int buf_idx_1 = cm->frame_refs[FWD_RF_OFFSET(rf[1])].idx;
+    int frame1_index = cm->buffer_pool.frame_bufs[buf_idx_1].cur_frame_offset;
+    int cur_offset_1 = get_relative_dist(cm, cur_frame_index, frame1_index);
+    CANDIDATE_MV *ref_mv_stack = ref_mv_stacks[ref_frame];
+
+    if (prev_frame_mvs->mfmv0.as_int != INVALID_MV) {
+      int_mv this_refmv;
+      int_mv comp_refmv;
+      get_mv_projection(&this_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+                        cur_offset_0, prev_frame_mvs->ref_frame_offset);
+      get_mv_projection(&comp_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
+                        cur_offset_1, prev_frame_mvs->ref_frame_offset);
+
+      lower_mv_precision(&this_refmv.as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_force_integer_mv);
+      lower_mv_precision(&comp_refmv.as_mv, cm->allow_high_precision_mv,
+                         cm->cur_frame_force_integer_mv);
+
+      if (blk_row == 0 && blk_col == 0)
+        if (abs(this_refmv.as_mv.row - gm_mv_candidates[0].as_mv.row) >= 16 ||
+            abs(this_refmv.as_mv.col - gm_mv_candidates[0].as_mv.col) >= 16 ||
+            abs(comp_refmv.as_mv.row - gm_mv_candidates[1].as_mv.row) >= 16 ||
+            abs(comp_refmv.as_mv.col - gm_mv_candidates[1].as_mv.col) >= 16)
+          mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
+
+      for (idx = 0; idx < refmv_count[ref_frame]; ++idx)
+        if (this_refmv.as_int == ref_mv_stack[idx].this_mv.as_int &&
+            comp_refmv.as_int == ref_mv_stack[idx].comp_mv.as_int)
+          break;
+
+      if (idx < refmv_count[ref_frame])
+        ref_mv_stack[idx].weight += 2 * weight_unit;
+
+      if (idx == refmv_count[ref_frame] &&
+          refmv_count[ref_frame] < MAX_REF_MV_STACK_SIZE) {
+        ref_mv_stack[idx].this_mv.as_int = this_refmv.as_int;
+        ref_mv_stack[idx].comp_mv.as_int = comp_refmv.as_int;
+        ref_mv_stack[idx].weight = 2 * weight_unit;
+        ++(refmv_count[ref_frame]);
+      }
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static void setup_ref_mv_list(
+    const AV1_COMMON *cm, const MACROBLOCKD *xd, MV_REFERENCE_FRAME ref_frame,
+    uint8_t refmv_count[MODE_CTX_REF_FRAMES],
+    CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+    int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES], int_mv *gm_mv_candidates,
+    int mi_row, int mi_col, int16_t *mode_context) {
+  const int bs = AOMMAX(xd->n8_w, xd->n8_h);
+  const int has_tr = has_top_right(cm, xd, mi_row, mi_col, bs);
+  MV_REFERENCE_FRAME rf[2];
+
+  const TileInfo *const tile = &xd->tile;
+  int max_row_offset = 0, max_col_offset = 0;
+  const int row_adj = (xd->n8_h < mi_size_high[BLOCK_8X8]) && (mi_row & 0x01);
+  const int col_adj = (xd->n8_w < mi_size_wide[BLOCK_8X8]) && (mi_col & 0x01);
+  int processed_rows = 0;
+  int processed_cols = 0;
+
+  av1_set_ref_frame(rf, ref_frame);
+  mode_context[ref_frame] = 0;
+  refmv_count[ref_frame] = 0;
+
+  // Find valid maximum row/col offset.
+  if (xd->up_available) {
+    max_row_offset = -(MVREF_ROW_COLS << 1) + row_adj;
+
+    if (xd->n8_h < mi_size_high[BLOCK_8X8])
+      max_row_offset = -(2 << 1) + row_adj;
+
+    max_row_offset =
+        find_valid_row_offset(tile, mi_row, cm->mi_rows, max_row_offset);
+  }
+
+  if (xd->left_available) {
+    max_col_offset = -(MVREF_ROW_COLS << 1) + col_adj;
+
+    if (xd->n8_w < mi_size_wide[BLOCK_8X8])
+      max_col_offset = -(2 << 1) + col_adj;
+
+    max_col_offset = find_valid_col_offset(tile, mi_col, max_col_offset);
+  }
+
+  uint8_t col_match_count = 0;
+  uint8_t row_match_count = 0;
+  uint8_t newmv_count = 0;
+
+  // Scan the first above row mode info. row_offset = -1;
+  if (abs(max_row_offset) >= 1)
+    scan_row_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
+                  &refmv_count[ref_frame], &row_match_count, &newmv_count,
+                  gm_mv_candidates, max_row_offset, &processed_rows);
+  // Scan the first left column mode info. col_offset = -1;
+  if (abs(max_col_offset) >= 1)
+    scan_col_mbmi(cm, xd, mi_row, mi_col, rf, -1, ref_mv_stack[ref_frame],
+                  &refmv_count[ref_frame], &col_match_count, &newmv_count,
+                  gm_mv_candidates, max_col_offset, &processed_cols);
+  // Check top-right boundary
+  if (has_tr)
+    scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, xd->n8_w,
+                  ref_mv_stack[ref_frame], &row_match_count, &newmv_count,
+                  gm_mv_candidates, &refmv_count[ref_frame]);
+
+  uint8_t nearest_match = (row_match_count > 0) + (col_match_count > 0);
+  uint8_t nearest_refmv_count = refmv_count[ref_frame];
+
+  // TODO(yunqing): for comp_search, do it for all 3 cases.
+  for (int idx = 0; idx < nearest_refmv_count; ++idx)
+    ref_mv_stack[ref_frame][idx].weight += REF_CAT_LEVEL;
+
+  if (cm->allow_ref_frame_mvs) {
+    int is_available = 0;
+    const int voffset = AOMMAX(mi_size_high[BLOCK_8X8], xd->n8_h);
+    const int hoffset = AOMMAX(mi_size_wide[BLOCK_8X8], xd->n8_w);
+    const int blk_row_end = AOMMIN(xd->n8_h, mi_size_high[BLOCK_64X64]);
+    const int blk_col_end = AOMMIN(xd->n8_w, mi_size_wide[BLOCK_64X64]);
+
+    const int tpl_sample_pos[3][2] = {
+      { voffset, -2 },
+      { voffset, hoffset },
+      { voffset - 2, hoffset },
+    };
+    const int allow_extension = (xd->n8_h >= mi_size_high[BLOCK_8X8]) &&
+                                (xd->n8_h < mi_size_high[BLOCK_64X64]) &&
+                                (xd->n8_w >= mi_size_wide[BLOCK_8X8]) &&
+                                (xd->n8_w < mi_size_wide[BLOCK_64X64]);
+
+    int step_h = (xd->n8_h >= mi_size_high[BLOCK_64X64])
+                     ? mi_size_high[BLOCK_16X16]
+                     : mi_size_high[BLOCK_8X8];
+    int step_w = (xd->n8_w >= mi_size_wide[BLOCK_64X64])
+                     ? mi_size_wide[BLOCK_16X16]
+                     : mi_size_wide[BLOCK_8X8];
+
+    for (int blk_row = 0; blk_row < blk_row_end; blk_row += step_h) {
+      for (int blk_col = 0; blk_col < blk_col_end; blk_col += step_w) {
+        int ret = add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row,
+                                 blk_col, gm_mv_candidates, refmv_count,
+                                 ref_mv_stack, mode_context);
+        if (blk_row == 0 && blk_col == 0) is_available = ret;
+      }
+    }
+
+    if (is_available == 0) mode_context[ref_frame] |= (1 << GLOBALMV_OFFSET);
+
+    for (int i = 0; i < 3 && allow_extension; ++i) {
+      const int blk_row = tpl_sample_pos[i][0];
+      const int blk_col = tpl_sample_pos[i][1];
+
+      if (!check_sb_border(mi_row, mi_col, blk_row, blk_col)) continue;
+      add_tpl_ref_mv(cm, xd, mi_row, mi_col, ref_frame, blk_row, blk_col,
+                     gm_mv_candidates, refmv_count, ref_mv_stack, mode_context);
+    }
+  }
+
+  uint8_t dummy_newmv_count = 0;
+
+  // Scan the second outer area.
+  scan_blk_mbmi(cm, xd, mi_row, mi_col, rf, -1, -1, ref_mv_stack[ref_frame],
+                &row_match_count, &dummy_newmv_count, gm_mv_candidates,
+                &refmv_count[ref_frame]);
+
+  for (int idx = 2; idx <= MVREF_ROW_COLS; ++idx) {
+    const int row_offset = -(idx << 1) + 1 + row_adj;
+    const int col_offset = -(idx << 1) + 1 + col_adj;
+
+    if (abs(row_offset) <= abs(max_row_offset) &&
+        abs(row_offset) > processed_rows)
+      scan_row_mbmi(cm, xd, mi_row, mi_col, rf, row_offset,
+                    ref_mv_stack[ref_frame], &refmv_count[ref_frame],
+                    &row_match_count, &dummy_newmv_count, gm_mv_candidates,
+                    max_row_offset, &processed_rows);
+
+    if (abs(col_offset) <= abs(max_col_offset) &&
+        abs(col_offset) > processed_cols)
+      scan_col_mbmi(cm, xd, mi_row, mi_col, rf, col_offset,
+                    ref_mv_stack[ref_frame], &refmv_count[ref_frame],
+                    &col_match_count, &dummy_newmv_count, gm_mv_candidates,
+                    max_col_offset, &processed_cols);
+  }
+
+  uint8_t ref_match_count = (row_match_count > 0) + (col_match_count > 0);
+
+  switch (nearest_match) {
+    case 0:
+      mode_context[ref_frame] |= 0;
+      if (ref_match_count >= 1) mode_context[ref_frame] |= 1;
+      if (ref_match_count == 1)
+        mode_context[ref_frame] |= (1 << REFMV_OFFSET);
+      else if (ref_match_count >= 2)
+        mode_context[ref_frame] |= (2 << REFMV_OFFSET);
+      break;
+    case 1:
+      mode_context[ref_frame] |= (newmv_count > 0) ? 2 : 3;
+      if (ref_match_count == 1)
+        mode_context[ref_frame] |= (3 << REFMV_OFFSET);
+      else if (ref_match_count >= 2)
+        mode_context[ref_frame] |= (4 << REFMV_OFFSET);
+      break;
+    case 2:
+    default:
+      if (newmv_count >= 1)
+        mode_context[ref_frame] |= 4;
+      else
+        mode_context[ref_frame] |= 5;
+
+      mode_context[ref_frame] |= (5 << REFMV_OFFSET);
+      break;
+  }
+
+  // Rank the likelihood and assign nearest and near mvs.
+  int len = nearest_refmv_count;
+  while (len > 0) {
+    int nr_len = 0;
+    for (int idx = 1; idx < len; ++idx) {
+      if (ref_mv_stack[ref_frame][idx - 1].weight <
+          ref_mv_stack[ref_frame][idx].weight) {
+        CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
+        ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
+        ref_mv_stack[ref_frame][idx] = tmp_mv;
+        nr_len = idx;
+      }
+    }
+    len = nr_len;
+  }
+
+  len = refmv_count[ref_frame];
+  while (len > nearest_refmv_count) {
+    int nr_len = nearest_refmv_count;
+    for (int idx = nearest_refmv_count + 1; idx < len; ++idx) {
+      if (ref_mv_stack[ref_frame][idx - 1].weight <
+          ref_mv_stack[ref_frame][idx].weight) {
+        CANDIDATE_MV tmp_mv = ref_mv_stack[ref_frame][idx - 1];
+        ref_mv_stack[ref_frame][idx - 1] = ref_mv_stack[ref_frame][idx];
+        ref_mv_stack[ref_frame][idx] = tmp_mv;
+        nr_len = idx;
+      }
+    }
+    len = nr_len;
+  }
+
+  if (rf[1] > NONE_FRAME) {
+    // TODO(jingning, yunqing): Refactor and consolidate the compound and
+    // single reference frame modes. Reduce unnecessary redundancy.
+    if (refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES) {
+      int_mv ref_id[2][2], ref_diff[2][2];
+      int ref_id_count[2] = { 0 }, ref_diff_count[2] = { 0 };
+
+      int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n8_w);
+      mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
+      int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n8_h);
+      mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
+      int mi_size = AOMMIN(mi_width, mi_height);
+
+      for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size;) {
+        const MB_MODE_INFO *const candidate = &xd->mi[-xd->mi_stride + idx];
+        const int candidate_bsize = candidate->sb_type;
+
+        for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+          MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+          for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+            if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+              ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+              ++ref_id_count[cmp_idx];
+            } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+              int_mv this_mv = candidate->mv[rf_idx];
+              if (cm->ref_frame_sign_bias[can_rf] !=
+                  cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+                this_mv.as_mv.row = -this_mv.as_mv.row;
+                this_mv.as_mv.col = -this_mv.as_mv.col;
+              }
+              ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+              ++ref_diff_count[cmp_idx];
+            }
+          }
+        }
+        idx += mi_size_wide[candidate_bsize];
+      }
+
+      for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size;) {
+        const MB_MODE_INFO *const candidate = &xd->mi[idx * xd->mi_stride - 1];
+        const int candidate_bsize = candidate->sb_type;
+
+        for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+          MV_REFERENCE_FRAME can_rf = candidate->ref_frame[rf_idx];
+
+          for (int cmp_idx = 0; cmp_idx < 2; ++cmp_idx) {
+            if (can_rf == rf[cmp_idx] && ref_id_count[cmp_idx] < 2) {
+              ref_id[cmp_idx][ref_id_count[cmp_idx]] = candidate->mv[rf_idx];
+              ++ref_id_count[cmp_idx];
+            } else if (can_rf > INTRA_FRAME && ref_diff_count[cmp_idx] < 2) {
+              int_mv this_mv = candidate->mv[rf_idx];
+              if (cm->ref_frame_sign_bias[can_rf] !=
+                  cm->ref_frame_sign_bias[rf[cmp_idx]]) {
+                this_mv.as_mv.row = -this_mv.as_mv.row;
+                this_mv.as_mv.col = -this_mv.as_mv.col;
+              }
+              ref_diff[cmp_idx][ref_diff_count[cmp_idx]] = this_mv;
+              ++ref_diff_count[cmp_idx];
+            }
+          }
+        }
+        idx += mi_size_high[candidate_bsize];
+      }
+
+      // Build up the compound mv predictor
+      int_mv comp_list[3][2];
+
+      for (int idx = 0; idx < 2; ++idx) {
+        int comp_idx = 0;
+        for (int list_idx = 0; list_idx < ref_id_count[idx] && comp_idx < 2;
+             ++list_idx, ++comp_idx)
+          comp_list[comp_idx][idx] = ref_id[idx][list_idx];
+        for (int list_idx = 0; list_idx < ref_diff_count[idx] && comp_idx < 2;
+             ++list_idx, ++comp_idx)
+          comp_list[comp_idx][idx] = ref_diff[idx][list_idx];
+        for (; comp_idx < 3; ++comp_idx)
+          comp_list[comp_idx][idx] = gm_mv_candidates[idx];
+      }
+
+      if (refmv_count[ref_frame]) {
+        assert(refmv_count[ref_frame] == 1);
+        if (comp_list[0][0].as_int ==
+                ref_mv_stack[ref_frame][0].this_mv.as_int &&
+            comp_list[0][1].as_int ==
+                ref_mv_stack[ref_frame][0].comp_mv.as_int) {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[1][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[1][1];
+        } else {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[0][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[0][1];
+        }
+        ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+        ++refmv_count[ref_frame];
+      } else {
+        for (int idx = 0; idx < MAX_MV_REF_CANDIDATES; ++idx) {
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].this_mv =
+              comp_list[idx][0];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].comp_mv =
+              comp_list[idx][1];
+          ref_mv_stack[ref_frame][refmv_count[ref_frame]].weight = 2;
+          ++refmv_count[ref_frame];
+        }
+      }
+    }
+
+    assert(refmv_count[ref_frame] >= 2);
+
+    for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
+      clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
+                   xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
+      clamp_mv_ref(&ref_mv_stack[ref_frame][idx].comp_mv.as_mv,
+                   xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
+    }
+  } else {
+    // Handle single reference frame extension
+    int mi_width = AOMMIN(mi_size_wide[BLOCK_64X64], xd->n8_w);
+    mi_width = AOMMIN(mi_width, cm->mi_cols - mi_col);
+    int mi_height = AOMMIN(mi_size_high[BLOCK_64X64], xd->n8_h);
+    mi_height = AOMMIN(mi_height, cm->mi_rows - mi_row);
+    int mi_size = AOMMIN(mi_width, mi_height);
+
+    for (int idx = 0; abs(max_row_offset) >= 1 && idx < mi_size &&
+                      refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
+      const MB_MODE_INFO *const candidate = &xd->mi[-xd->mi_stride + idx];
+      const int candidate_bsize = candidate->sb_type;
+
+      // TODO(jingning): Refactor the following code.
+      for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+        if (candidate->ref_frame[rf_idx] > INTRA_FRAME) {
+          int_mv this_mv = candidate->mv[rf_idx];
+          if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] !=
+              cm->ref_frame_sign_bias[ref_frame]) {
+            this_mv.as_mv.row = -this_mv.as_mv.row;
+            this_mv.as_mv.col = -this_mv.as_mv.col;
+          }
+          int stack_idx;
+          for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) {
+            int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv;
+            if (this_mv.as_int == stack_mv.as_int) break;
+          }
+
+          if (stack_idx == refmv_count[ref_frame]) {
+            ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv;
+
+            // TODO(jingning): Set an arbitrary small number here. The weight
+            // doesn't matter as long as it is properly initialized.
+            ref_mv_stack[ref_frame][stack_idx].weight = 2;
+            ++refmv_count[ref_frame];
+          }
+        }
+      }
+      idx += mi_size_wide[candidate_bsize];
+    }
+
+    for (int idx = 0; abs(max_col_offset) >= 1 && idx < mi_size &&
+                      refmv_count[ref_frame] < MAX_MV_REF_CANDIDATES;) {
+      const MB_MODE_INFO *const candidate = &xd->mi[idx * xd->mi_stride - 1];
+      const int candidate_bsize = candidate->sb_type;
+
+      // TODO(jingning): Refactor the following code.
+      for (int rf_idx = 0; rf_idx < 2; ++rf_idx) {
+        if (candidate->ref_frame[rf_idx] > INTRA_FRAME) {
+          int_mv this_mv = candidate->mv[rf_idx];
+          if (cm->ref_frame_sign_bias[candidate->ref_frame[rf_idx]] !=
+              cm->ref_frame_sign_bias[ref_frame]) {
+            this_mv.as_mv.row = -this_mv.as_mv.row;
+            this_mv.as_mv.col = -this_mv.as_mv.col;
+          }
+          int stack_idx;
+          for (stack_idx = 0; stack_idx < refmv_count[ref_frame]; ++stack_idx) {
+            int_mv stack_mv = ref_mv_stack[ref_frame][stack_idx].this_mv;
+            if (this_mv.as_int == stack_mv.as_int) break;
+          }
+
+          if (stack_idx == refmv_count[ref_frame]) {
+            ref_mv_stack[ref_frame][stack_idx].this_mv = this_mv;
+
+            // TODO(jingning): Set an arbitrary small number here. The weight
+            // doesn't matter as long as it is properly initialized.
+            ref_mv_stack[ref_frame][stack_idx].weight = 2;
+            ++refmv_count[ref_frame];
+          }
+        }
+      }
+      idx += mi_size_high[candidate_bsize];
+    }
+
+    for (int idx = 0; idx < refmv_count[ref_frame]; ++idx) {
+      clamp_mv_ref(&ref_mv_stack[ref_frame][idx].this_mv.as_mv,
+                   xd->n8_w << MI_SIZE_LOG2, xd->n8_h << MI_SIZE_LOG2, xd);
+    }
+
+    if (mv_ref_list != NULL) {
+      for (int idx = refmv_count[ref_frame]; idx < MAX_MV_REF_CANDIDATES; ++idx)
+        mv_ref_list[rf[0]][idx].as_int = gm_mv_candidates[0].as_int;
+
+      for (int idx = 0;
+           idx < AOMMIN(MAX_MV_REF_CANDIDATES, refmv_count[ref_frame]); ++idx) {
+        mv_ref_list[rf[0]][idx].as_int =
+            ref_mv_stack[ref_frame][idx].this_mv.as_int;
+      }
+    }
+  }
+}
+
+void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                      MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                      uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+                      CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+                      int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+                      int_mv *global_mvs, int mi_row, int mi_col,
+                      int16_t *mode_context) {
+  int_mv zeromv[2];
+  BLOCK_SIZE bsize = mi->sb_type;
+  MV_REFERENCE_FRAME rf[2];
+  av1_set_ref_frame(rf, ref_frame);
+
+  if (ref_frame < REF_FRAMES) {
+    if (ref_frame != INTRA_FRAME) {
+      global_mvs[ref_frame] = gm_get_motion_vector(
+          &cm->global_motion[ref_frame], cm->allow_high_precision_mv, bsize,
+          mi_col, mi_row, cm->cur_frame_force_integer_mv);
+    } else {
+      global_mvs[ref_frame].as_int = INVALID_MV;
+    }
+  }
+
+  if (ref_frame != INTRA_FRAME) {
+    zeromv[0].as_int =
+        gm_get_motion_vector(&cm->global_motion[rf[0]],
+                             cm->allow_high_precision_mv, bsize, mi_col, mi_row,
+                             cm->cur_frame_force_integer_mv)
+            .as_int;
+    zeromv[1].as_int =
+        (rf[1] != NONE_FRAME)
+            ? gm_get_motion_vector(&cm->global_motion[rf[1]],
+                                   cm->allow_high_precision_mv, bsize, mi_col,
+                                   mi_row, cm->cur_frame_force_integer_mv)
+                  .as_int
+            : 0;
+  } else {
+    zeromv[0].as_int = zeromv[1].as_int = 0;
+  }
+
+  setup_ref_mv_list(cm, xd, ref_frame, ref_mv_count, ref_mv_stack, mv_ref_list,
+                    zeromv, mi_row, mi_col, mode_context);
+}
+
+void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
+                           int_mv *near_mv, int is_integer) {
+  int i;
+  // Make sure all the candidates are properly clamped etc
+  for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) {
+    lower_mv_precision(&mvlist[i].as_mv, allow_hp, is_integer);
+  }
+  *nearest_mv = mvlist[0];
+  *near_mv = mvlist[1];
+}
+
+void av1_setup_frame_buf_refs(AV1_COMMON *cm) {
+  cm->cur_frame.cur_frame_offset = cm->frame_offset;
+
+  MV_REFERENCE_FRAME ref_frame;
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
+    if (buf_idx >= 0)
+      cm->cur_frame.ref_frame_offset[ref_frame - LAST_FRAME] =
+          cm->buffer_pool.frame_bufs[buf_idx].cur_frame_offset;
+  }
+}
+
+#if 0
+void av1_setup_frame_sign_bias(AV1_COMMON *cm) {
+  MV_REFERENCE_FRAME ref_frame;
+  for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+    const int buf_idx = cm->frame_refs[ref_frame - LAST_FRAME].idx;
+    if (cm->seq_params.enable_order_hint && buf_idx != INVALID_IDX) {
+      const int ref_frame_offset =
+          cm->buffer_pool->frame_bufs[buf_idx].cur_frame_offset;
+      cm->ref_frame_sign_bias[ref_frame] =
+          (get_relative_dist(cm, ref_frame_offset, (int)cm->frame_offset) <= 0)
+              ? 0
+              : 1;
+    } else {
+      cm->ref_frame_sign_bias[ref_frame] = 0;
+    }
+  }
+}
+#endif
+
+#define MAX_OFFSET_WIDTH 64
+#define MAX_OFFSET_HEIGHT 0
+
+static int get_block_position(AV1_COMMON *cm, int *mi_r, int *mi_c, int blk_row,
+                              int blk_col, MV mv, int sign_bias) {
+  const int base_blk_row = (blk_row >> 3) << 3;
+  const int base_blk_col = (blk_col >> 3) << 3;
+
+  const int row_offset = (mv.row >= 0) ? (mv.row >> (4 + MI_SIZE_LOG2))
+                                       : -((-mv.row) >> (4 + MI_SIZE_LOG2));
+
+  const int col_offset = (mv.col >= 0) ? (mv.col >> (4 + MI_SIZE_LOG2))
+                                       : -((-mv.col) >> (4 + MI_SIZE_LOG2));
+
+  int row = (sign_bias == 1) ? blk_row - row_offset : blk_row + row_offset;
+  int col = (sign_bias == 1) ? blk_col - col_offset : blk_col + col_offset;
+
+  if (row < 0 || row >= (cm->mi_rows >> 1) || col < 0 ||
+      col >= (cm->mi_cols >> 1))
+    return 0;
+
+  if (row < base_blk_row - (MAX_OFFSET_HEIGHT >> 3) ||
+      row >= base_blk_row + 8 + (MAX_OFFSET_HEIGHT >> 3) ||
+      col < base_blk_col - (MAX_OFFSET_WIDTH >> 3) ||
+      col >= base_blk_col + 8 + (MAX_OFFSET_WIDTH >> 3))
+    return 0;
+
+  *mi_r = row;
+  *mi_c = col;
+
+  return 1;
+}
+
+static int motion_field_projection(AV1_COMMON *cm, MV_REFERENCE_FRAME ref_frame,
+                                   int dir,
+                                   const int from_x4, const int to_x4,
+                                   const int from_y4, const int to_y4) {
+  TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
+  int ref_offset[TOTAL_REFS_PER_FRAME] = { 0 };
+  int ref_sign[TOTAL_REFS_PER_FRAME] = { 0 };
+
+  (void)dir;
+
+  int ref_frame_idx = cm->frame_refs[FWD_RF_OFFSET(ref_frame)].idx;
+  if (ref_frame_idx < 0) return 0;
+
+  if (cm->buffer_pool.frame_bufs[ref_frame_idx].intra_only) return 0;
+
+  if (cm->buffer_pool.frame_bufs[ref_frame_idx].mi_rows != cm->mi_rows ||
+      cm->buffer_pool.frame_bufs[ref_frame_idx].mi_cols != cm->mi_cols)
+    return 0;
+
+  int ref_frame_index =
+      cm->buffer_pool.frame_bufs[ref_frame_idx].cur_frame_offset;
+  unsigned int *ref_rf_idx =
+      &cm->buffer_pool.frame_bufs[ref_frame_idx].ref_frame_offset[0];
+   int cur_frame_index = cm->cur_frame.cur_frame_offset;
+  int ref_to_cur = get_relative_dist(cm, ref_frame_index, cur_frame_index);
+
+  for (MV_REFERENCE_FRAME rf = LAST_FRAME; rf <= INTER_REFS_PER_FRAME; ++rf) {
+    ref_offset[rf] =
+        get_relative_dist(cm, ref_frame_index, ref_rf_idx[rf - LAST_FRAME]);
+    // note the inverted sign
+    ref_sign[rf] =
+        get_relative_dist(cm, ref_rf_idx[rf - LAST_FRAME], ref_frame_index) < 0;
+  }
+
+  if (dir == 2) ref_to_cur = -ref_to_cur;
+
+  MV_REF *mv_ref_base = cm->buffer_pool.frame_bufs[ref_frame_idx].mvs;
+  const ptrdiff_t mv_stride =
+    cm->buffer_pool.frame_bufs[ref_frame_idx].mv_stride;
+  const int mvs_rows = (cm->mi_rows + 1) >> 1;
+  const int mvs_cols = (cm->mi_cols + 1) >> 1;
+
+  assert(from_y4 >= 0);
+  const int row_start8 = from_y4 >> 1;
+  const int row_end8 = imin(to_y4 >> 1, mvs_rows);
+  const int col_start8 = imax((from_x4 - (MAX_OFFSET_WIDTH >> 2)) >> 1, 0);
+  const int col_end8 = imin((to_x4 + (MAX_OFFSET_WIDTH >> 2)) >> 1, mvs_cols);
+  for (int blk_row = row_start8; blk_row < row_end8; ++blk_row) {
+    for (int blk_col = col_start8; blk_col < col_end8; ++blk_col) {
+      MV_REF *mv_ref = &mv_ref_base[((blk_row << 1) + 1) * mv_stride +
+                                     (blk_col << 1) + 1];
+      int diridx;
+      const int ref0 = mv_ref->ref_frame[0], ref1 = mv_ref->ref_frame[1];
+      if (ref1 > 0 && ref_sign[ref1] &&
+          abs(mv_ref->mv[1].as_mv.row) < (1 << 12) &&
+          abs(mv_ref->mv[1].as_mv.col) < (1 << 12))
+      {
+        diridx = 1;
+      } else if (ref0 > 0 && ref_sign[ref0] &&
+                 abs(mv_ref->mv[0].as_mv.row) < (1 << 12) &&
+                 abs(mv_ref->mv[0].as_mv.col) < (1 << 12))
+      {
+        diridx = 0;
+      } else {
+        continue;
+      }
+      MV fwd_mv = mv_ref->mv[diridx].as_mv;
+
+      if (mv_ref->ref_frame[diridx] > INTRA_FRAME) {
+        int_mv this_mv;
+        int mi_r, mi_c;
+        const int ref_frame_offset = ref_offset[mv_ref->ref_frame[diridx]];
+
+        int pos_valid = abs(ref_frame_offset) <= MAX_FRAME_DISTANCE &&
+                        ref_frame_offset > 0 &&
+                        abs(ref_to_cur) <= MAX_FRAME_DISTANCE;
+
+        if (pos_valid) {
+          get_mv_projection(&this_mv.as_mv, fwd_mv, ref_to_cur,
+                            ref_frame_offset);
+          pos_valid = get_block_position(cm, &mi_r, &mi_c, blk_row, blk_col,
+                                         this_mv.as_mv, dir >> 1);
+        }
+
+        if (pos_valid && mi_c >= (from_x4 >> 1) && mi_c < (to_x4 >> 1)) {
+          int mi_offset = mi_r * (cm->mi_stride >> 1) + mi_c;
+
+          tpl_mvs_base[mi_offset].mfmv0.as_mv.row = fwd_mv.row;
+          tpl_mvs_base[mi_offset].mfmv0.as_mv.col = fwd_mv.col;
+          tpl_mvs_base[mi_offset].ref_frame_offset = ref_frame_offset;
+        }
+      }
+    }
+  }
+
+  return 1;
+}
+
+#if 0
+void av1_setup_motion_field(AV1_COMMON *cm) {
+  memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
+  if (!cm->seq_params.enable_order_hint) return;
+
+  TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
+  int size = ((cm->mi_rows + MAX_MIB_SIZE) >> 1) * (cm->mi_stride >> 1);
+  for (int idx = 0; idx < size; ++idx) {
+    tpl_mvs_base[idx].mfmv0.as_int = INVALID_MV;
+    tpl_mvs_base[idx].ref_frame_offset = 0;
+  }
+
+  const int cur_order_hint = cm->cur_frame.cur_frame_offset;
+  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+
+  int ref_buf_idx[INTER_REFS_PER_FRAME];
+  int ref_order_hint[INTER_REFS_PER_FRAME];
+
+  for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+    const int ref_idx = ref_frame - LAST_FRAME;
+    const int buf_idx = cm->frame_refs[ref_idx].idx;
+    int order_hint = 0;
+
+    if (buf_idx >= 0) order_hint = frame_bufs[buf_idx].cur_frame_offset;
+
+    ref_buf_idx[ref_idx] = buf_idx;
+    ref_order_hint[ref_idx] = order_hint;
+
+    if (get_relative_dist(cm, order_hint, cur_order_hint) > 0)
+      cm->ref_frame_side[ref_frame] = 1;
+    else if (order_hint == cur_order_hint)
+      cm->ref_frame_side[ref_frame] = -1;
+  }
+
+  int ref_stamp = MFMV_STACK_SIZE - 1;
+
+  if (ref_buf_idx[LAST_FRAME - LAST_FRAME] >= 0) {
+    const int alt_of_lst_order_hint =
+        frame_bufs[ref_buf_idx[LAST_FRAME - LAST_FRAME]]
+            .ref_frame_offset[ALTREF_FRAME - LAST_FRAME];
+
+    const int is_lst_overlay =
+        (alt_of_lst_order_hint == ref_order_hint[GOLDEN_FRAME - LAST_FRAME]);
+    if (!is_lst_overlay) motion_field_projection(cm, LAST_FRAME, 2);
+    --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[BWDREF_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0) {
+    if (motion_field_projection(cm, BWDREF_FRAME, 0)) --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[ALTREF2_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0) {
+    if (motion_field_projection(cm, ALTREF2_FRAME, 0)) --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[ALTREF_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0 &&
+      ref_stamp >= 0)
+    if (motion_field_projection(cm, ALTREF_FRAME, 0)) --ref_stamp;
+
+  if (ref_stamp >= 0 && ref_buf_idx[LAST2_FRAME - LAST_FRAME] >= 0)
+    if (motion_field_projection(cm, LAST2_FRAME, 2)) --ref_stamp;
+}
+#endif
+
+void av1_setup_motion_field(AV1_COMMON *cm) {
+  if (!cm->seq_params.enable_order_hint) return;
+
+  TPL_MV_REF *tpl_mvs_base = cm->tpl_mvs;
+  int size = (((cm->mi_rows + 31) & ~31) >> 1) * (cm->mi_stride >> 1);
+  for (int idx = 0; idx < size; ++idx) {
+    tpl_mvs_base[idx].mfmv0.as_int = INVALID_MV;
+    tpl_mvs_base[idx].ref_frame_offset = 0;
+  }
+
+  memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
+  RefCntBuffer *const frame_bufs = cm->buffer_pool.frame_bufs;
+
+  const int cur_order_hint = cm->cur_frame.cur_frame_offset;
+  int *const ref_buf_idx = cm->ref_buf_idx;
+  int *const ref_order_hint = cm->ref_order_hint;
+
+  for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+    const int ref_idx = ref_frame - LAST_FRAME;
+    const int buf_idx = cm->frame_refs[ref_idx].idx;
+    int order_hint = 0;
+
+    if (buf_idx >= 0) order_hint = frame_bufs[buf_idx].cur_frame_offset;
+
+    ref_buf_idx[ref_idx] = buf_idx;
+    ref_order_hint[ref_idx] = order_hint;
+
+    if (get_relative_dist(cm, order_hint, cur_order_hint) > 0)
+      cm->ref_frame_side[ref_frame] = 1;
+    else if (order_hint == cur_order_hint)
+      cm->ref_frame_side[ref_frame] = -1;
+  }
+}
+
+void av1_fill_motion_field(AV1_COMMON *cm,
+                           const int tile_col_start4, const int tile_col_end4,
+                           const int row_start4, int row_end4)
+{
+  RefCntBuffer *const frame_bufs = cm->buffer_pool.frame_bufs;
+  const int cur_order_hint = cm->cur_frame.cur_frame_offset;
+  int *const ref_buf_idx = cm->ref_buf_idx;
+  int *const ref_order_hint = cm->ref_order_hint;
+
+  int ref_stamp = MFMV_STACK_SIZE - 1;
+
+  if (ref_buf_idx[LAST_FRAME - LAST_FRAME] >= 0) {
+    const int alt_of_lst_order_hint =
+        frame_bufs[ref_buf_idx[LAST_FRAME - LAST_FRAME]]
+            .ref_frame_offset[ALTREF_FRAME - LAST_FRAME];
+
+    const int is_lst_overlay =
+        (alt_of_lst_order_hint == ref_order_hint[GOLDEN_FRAME - LAST_FRAME]);
+      if (!is_lst_overlay) motion_field_projection(cm, LAST_FRAME, 2,
+                                                   tile_col_start4, tile_col_end4,
+                                                   row_start4, row_end4);
+    --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[BWDREF_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0) {
+      if (motion_field_projection(cm, BWDREF_FRAME, 0,
+                                  tile_col_start4, tile_col_end4,
+                                  row_start4, row_end4)) --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[ALTREF2_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0) {
+      if (motion_field_projection(cm, ALTREF2_FRAME, 0,
+                                  tile_col_start4, tile_col_end4,
+                                  row_start4, row_end4)) --ref_stamp;
+  }
+
+  if (get_relative_dist(cm, ref_order_hint[ALTREF_FRAME - LAST_FRAME],
+                        cur_order_hint) > 0 &&
+      ref_stamp >= 0)
+      if (motion_field_projection(cm, ALTREF_FRAME, 0,
+                                  tile_col_start4, tile_col_end4,
+                                  row_start4, row_end4)) --ref_stamp;
+
+  if (ref_stamp >= 0 && ref_buf_idx[LAST2_FRAME - LAST_FRAME] >= 0)
+      if (motion_field_projection(cm, LAST2_FRAME, 2,
+                                  tile_col_start4, tile_col_end4,
+                                  row_start4, row_end4)) --ref_stamp;
+}
+
+#if 0
+static INLINE void record_samples(MB_MODE_INFO *mbmi, int *pts, int *pts_inref,
+                                  int row_offset, int sign_r, int col_offset,
+                                  int sign_c) {
+  int bw = block_size_wide[mbmi->sb_type];
+  int bh = block_size_high[mbmi->sb_type];
+  int x = col_offset * MI_SIZE + sign_c * AOMMAX(bw, MI_SIZE) / 2 - 1;
+  int y = row_offset * MI_SIZE + sign_r * AOMMAX(bh, MI_SIZE) / 2 - 1;
+
+  pts[0] = (x * 8);
+  pts[1] = (y * 8);
+  pts_inref[0] = (x * 8) + mbmi->mv[0].as_mv.col;
+  pts_inref[1] = (y * 8) + mbmi->mv[0].as_mv.row;
+}
+
+// Select samples according to the motion vector difference.
+int selectSamples(MV *mv, int *pts, int *pts_inref, int len, BLOCK_SIZE bsize) {
+  const int bw = block_size_wide[bsize];
+  const int bh = block_size_high[bsize];
+  const int thresh = clamp(AOMMAX(bw, bh), 16, 112);
+  int pts_mvd[SAMPLES_ARRAY_SIZE] = { 0 };
+  int i, j, k, l = len;
+  int ret = 0;
+  assert(len <= LEAST_SQUARES_SAMPLES_MAX);
+
+  // Obtain the motion vector difference.
+  for (i = 0; i < len; ++i) {
+    pts_mvd[i] = abs(pts_inref[2 * i] - pts[2 * i] - mv->col) +
+                 abs(pts_inref[2 * i + 1] - pts[2 * i + 1] - mv->row);
+
+    if (pts_mvd[i] > thresh)
+      pts_mvd[i] = -1;
+    else
+      ret++;
+  }
+
+  // Keep at least 1 sample.
+  if (!ret) return 1;
+
+  i = 0;
+  j = l - 1;
+  for (k = 0; k < l - ret; k++) {
+    while (pts_mvd[i] != -1) i++;
+    while (pts_mvd[j] == -1) j--;
+    assert(i != j);
+    if (i > j) break;
+
+    // Replace the discarded samples;
+    pts_mvd[i] = pts_mvd[j];
+    pts[2 * i] = pts[2 * j];
+    pts[2 * i + 1] = pts[2 * j + 1];
+    pts_inref[2 * i] = pts_inref[2 * j];
+    pts_inref[2 * i + 1] = pts_inref[2 * j + 1];
+    i++;
+    j--;
+  }
+
+  return ret;
+}
+
+// Note: Samples returned are at 1/8-pel precision
+// Sample are the neighbor block center point's coordinates relative to the
+// left-top pixel of current block.
+int findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col,
+                int *pts, int *pts_inref) {
+  MB_MODE_INFO *const mbmi0 = xd->mi[0];
+  int ref_frame = mbmi0->ref_frame[0];
+  int up_available = xd->up_available;
+  int left_available = xd->left_available;
+  int i, mi_step = 1, np = 0;
+
+  const TileInfo *const tile = &xd->tile;
+  int do_tl = 1;
+  int do_tr = 1;
+
+  // scan the nearest above rows
+  if (up_available) {
+    int mi_row_offset = -1;
+    MB_MODE_INFO *mbmi = xd->mi[mi_row_offset * xd->mi_stride];
+    uint8_t n8_w = mi_size_wide[mbmi->sb_type];
+
+    if (xd->n8_w <= n8_w) {
+      // Handle "current block width <= above block width" case.
+      int col_offset = -mi_col % n8_w;
+
+      if (col_offset < 0) do_tl = 0;
+      if (col_offset + n8_w > xd->n8_w) do_tr = 0;
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, 0, -1, col_offset, 1);
+        pts += 2;
+        pts_inref += 2;
+        np++;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+      }
+    } else {
+      // Handle "current block width > above block width" case.
+      for (i = 0; i < AOMMIN(xd->n8_w, cm->mi_cols - mi_col); i += mi_step) {
+        int mi_col_offset = i;
+        mi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        mbmi = &mi->mbmi;
+        n8_w = mi_size_wide[mbmi->sb_type];
+        mi_step = AOMMIN(xd->n8_w, n8_w);
+
+        if (mbmi->ref_frame[0] == ref_frame &&
+            mbmi->ref_frame[1] == NONE_FRAME) {
+          record_samples(mbmi, pts, pts_inref, 0, -1, i, 1);
+          pts += 2;
+          pts_inref += 2;
+          np++;
+          if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+        }
+      }
+    }
+  }
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
+
+  // scan the nearest left columns
+  if (left_available) {
+    int mi_col_offset = -1;
+
+    MB_MODE_INFO *mi = xd->mi[mi_col_offset];
+    uint8_t n8_h = mi_size_high[mbmi->sb_type];
+
+    if (xd->n8_h <= n8_h) {
+      // Handle "current block height <= above block height" case.
+      int row_offset = -mi_row % n8_h;
+
+      if (row_offset < 0) do_tl = 0;
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, row_offset, 1, 0, -1);
+        pts += 2;
+        pts_inref += 2;
+        np++;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+      }
+    } else {
+      // Handle "current block height > above block height" case.
+      for (i = 0; i < AOMMIN(xd->n8_h, cm->mi_rows - mi_row); i += mi_step) {
+        int mi_row_offset = i;
+        mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+        n8_h = mi_size_high[mbmi->sb_type];
+        mi_step = AOMMIN(xd->n8_h, n8_h);
+
+        if (mbmi->ref_frame[0] == ref_frame &&
+            mbmi->ref_frame[1] == NONE_FRAME) {
+          record_samples(mbmi, pts, pts_inref, i, 1, 0, -1);
+          pts += 2;
+          pts_inref += 2;
+          np++;
+          if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+        }
+      }
+    }
+  }
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
+
+  // Top-left block
+  if (do_tl && left_available && up_available) {
+    int mi_row_offset = -1;
+    int mi_col_offset = -1;
+
+    MB_MODE_INFO *mbmi = xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+
+    if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+      record_samples(mbmi, pts, pts_inref, 0, -1, 0, -1);
+      pts += 2;
+      pts_inref += 2;
+      np++;
+      if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+    }
+  }
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
+
+  // Top-right block
+  if (do_tr &&
+      has_top_right(cm, xd, mi_row, mi_col, AOMMAX(xd->n8_w, xd->n8_h))) {
+    POSITION trb_pos = { -1, xd->n8_w };
+
+    if (is_inside(tile, mi_col, mi_row, cm->mi_rows, &trb_pos)) {
+      int mi_row_offset = -1;
+      int mi_col_offset = xd->n8_w;
+
+      MB_MODE_INFO *mbmi =
+          xd->mi[mi_col_offset + mi_row_offset * xd->mi_stride];
+
+      if (mbmi->ref_frame[0] == ref_frame && mbmi->ref_frame[1] == NONE_FRAME) {
+        record_samples(mbmi, pts, pts_inref, 0, -1, xd->n8_w, 1);
+        np++;
+        if (np >= LEAST_SQUARES_SAMPLES_MAX) return LEAST_SQUARES_SAMPLES_MAX;
+      }
+    }
+  }
+  assert(np <= LEAST_SQUARES_SAMPLES_MAX);
+
+  return np;
+}
+
+void av1_setup_skip_mode_allowed(AV1_COMMON *cm) {
+  cm->is_skip_mode_allowed = 0;
+  cm->ref_frame_idx_0 = cm->ref_frame_idx_1 = INVALID_IDX;
+
+  if (!cm->seq_params.enable_order_hint || frame_is_intra_only(cm) ||
+      cm->reference_mode == SINGLE_REFERENCE)
+    return;
+
+  RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+  const int cur_frame_offset = cm->frame_offset;
+  int ref_frame_offset[2] = { -1, INT_MAX };
+  int ref_idx[2] = { INVALID_IDX, INVALID_IDX };
+
+  // Identify the nearest forward and backward references.
+  for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+    const int buf_idx = cm->frame_refs[i].idx;
+    if (buf_idx == INVALID_IDX) continue;
+
+    const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
+    if (get_relative_dist(cm, ref_offset, cur_frame_offset) < 0) {
+      // Forward reference
+      if (ref_frame_offset[0] == -1 ||
+          get_relative_dist(cm, ref_offset, ref_frame_offset[0]) > 0) {
+        ref_frame_offset[0] = ref_offset;
+        ref_idx[0] = i;
+      }
+    } else if (get_relative_dist(cm, ref_offset, cur_frame_offset) > 0) {
+      // Backward reference
+      if (ref_frame_offset[1] == INT_MAX ||
+          get_relative_dist(cm, ref_offset, ref_frame_offset[1]) < 0) {
+        ref_frame_offset[1] = ref_offset;
+        ref_idx[1] = i;
+      }
+    }
+  }
+
+  if (ref_idx[0] != INVALID_IDX && ref_idx[1] != INVALID_IDX) {
+    // == Bi-directional prediction ==
+    cm->is_skip_mode_allowed = 1;
+    cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
+    cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
+  } else if (ref_idx[0] != INVALID_IDX && ref_idx[1] == INVALID_IDX) {
+    // == Forward prediction only ==
+    // Identify the second nearest forward reference.
+    ref_frame_offset[1] = -1;
+    for (int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
+      const int buf_idx = cm->frame_refs[i].idx;
+      if (buf_idx == INVALID_IDX) continue;
+
+      const int ref_offset = frame_bufs[buf_idx].cur_frame_offset;
+      if ((ref_frame_offset[0] != -1 &&
+           get_relative_dist(cm, ref_offset, ref_frame_offset[0]) < 0) &&
+          (ref_frame_offset[1] == -1 ||
+           get_relative_dist(cm, ref_offset, ref_frame_offset[1]) > 0)) {
+        // Second closest forward reference
+        ref_frame_offset[1] = ref_offset;
+        ref_idx[1] = i;
+      }
+    }
+    if (ref_frame_offset[1] != -1) {
+      cm->is_skip_mode_allowed = 1;
+      cm->ref_frame_idx_0 = AOMMIN(ref_idx[0], ref_idx[1]);
+      cm->ref_frame_idx_1 = AOMMAX(ref_idx[0], ref_idx[1]);
+    }
+  }
+}
+
+typedef struct {
+  int map_idx;   // frame map index
+  int buf_idx;   // frame buffer index
+  int sort_idx;  // index based on the offset to be used for sorting
+} REF_FRAME_INFO;
+
+static int compare_ref_frame_info(const void *arg_a, const void *arg_b) {
+  const REF_FRAME_INFO *info_a = (REF_FRAME_INFO *)arg_a;
+  const REF_FRAME_INFO *info_b = (REF_FRAME_INFO *)arg_b;
+
+  if (info_a->sort_idx < info_b->sort_idx) return -1;
+  if (info_a->sort_idx > info_b->sort_idx) return 1;
+  return (info_a->map_idx < info_b->map_idx)
+             ? -1
+             : ((info_a->map_idx > info_b->map_idx) ? 1 : 0);
+}
+
+static void set_ref_frame_info(AV1_COMMON *const cm, int frame_idx,
+                               REF_FRAME_INFO *ref_info) {
+  assert(frame_idx >= 0 && frame_idx <= INTER_REFS_PER_FRAME);
+
+  const int buf_idx = ref_info->buf_idx;
+
+  cm->frame_refs[frame_idx].idx = buf_idx;
+  cm->frame_refs[frame_idx].buf = &cm->buffer_pool->frame_bufs[buf_idx].buf;
+  cm->frame_refs[frame_idx].map_idx = ref_info->map_idx;
+}
+
+void av1_set_frame_refs(AV1_COMMON *const cm, int lst_map_idx,
+                        int gld_map_idx) {
+  BufferPool *const pool = cm->buffer_pool;
+  RefCntBuffer *const frame_bufs = pool->frame_bufs;
+
+  assert(cm->seq_params.enable_order_hint);
+  assert(cm->seq_params.order_hint_bits_minus_1 >= 0);
+  const int cur_frame_offset = (int)cm->frame_offset;
+  const int cur_frame_sort_idx = 1 << cm->seq_params.order_hint_bits_minus_1;
+
+  REF_FRAME_INFO ref_frame_info[REF_FRAMES];
+  int ref_flag_list[INTER_REFS_PER_FRAME] = { 0, 0, 0, 0, 0, 0, 0 };
+
+  for (int i = 0; i < REF_FRAMES; ++i) {
+    const int map_idx = i;
+
+    ref_frame_info[i].map_idx = map_idx;
+    ref_frame_info[i].sort_idx = -1;
+
+    const int buf_idx = cm->ref_frame_map[map_idx];
+    ref_frame_info[i].buf_idx = buf_idx;
+
+    if (buf_idx < 0 || buf_idx >= FRAME_BUFFERS) continue;
+    // TODO([email protected]): To verify the checking on ref_count.
+    if (frame_bufs[buf_idx].ref_count <= 0) continue;
+
+    const int offset = (int)frame_bufs[buf_idx].cur_frame_offset;
+    ref_frame_info[i].sort_idx =
+        (offset == -1) ? -1
+                       : cur_frame_sort_idx +
+                             get_relative_dist(cm, offset, cur_frame_offset);
+    assert(ref_frame_info[i].sort_idx >= -1);
+
+    if (map_idx == lst_map_idx) lst_frame_sort_idx = ref_frame_info[i].sort_idx;
+    if (map_idx == gld_map_idx) gld_frame_sort_idx = ref_frame_info[i].sort_idx;
+  }
+
+  // Confirm both LAST_FRAME and GOLDEN_FRAME are valid forward reference
+  // frames.
+  if (lst_frame_sort_idx == -1 || lst_frame_sort_idx >= cur_frame_sort_idx) {
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Inter frame requests a look-ahead frame as LAST");
+  }
+  if (gld_frame_sort_idx == -1 || gld_frame_sort_idx >= cur_frame_sort_idx) {
+    aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+                       "Inter frame requests a look-ahead frame as GOLDEN");
+  }
+
+  // Sort ref frames based on their frame_offset values.
+  qsort(ref_frame_info, REF_FRAMES, sizeof(REF_FRAME_INFO),
+        compare_ref_frame_info);
+
+  // Identify forward and backward reference frames.
+  // Forward  reference: offset < cur_frame_offset
+  // Backward reference: offset >= cur_frame_offset
+  int fwd_start_idx = 0, fwd_end_idx = REF_FRAMES - 1;
+
+  for (int i = 0; i < REF_FRAMES; i++) {
+    if (ref_frame_info[i].sort_idx == -1) {
+      fwd_start_idx++;
+      continue;
+    }
+
+    if (ref_frame_info[i].sort_idx >= cur_frame_sort_idx) {
+      fwd_end_idx = i - 1;
+      break;
+    }
+  }
+
+  int bwd_start_idx = fwd_end_idx + 1;
+  int bwd_end_idx = REF_FRAMES - 1;
+
+  // === Backward Reference Frames ===
+
+  // == ALTREF_FRAME ==
+  if (bwd_start_idx <= bwd_end_idx) {
+    set_ref_frame_info(cm, ALTREF_FRAME - LAST_FRAME,
+                       &ref_frame_info[bwd_end_idx]);
+    ref_flag_list[ALTREF_FRAME - LAST_FRAME] = 1;
+    bwd_end_idx--;
+  }
+
+  // == BWDREF_FRAME ==
+  if (bwd_start_idx <= bwd_end_idx) {
+    set_ref_frame_info(cm, BWDREF_FRAME - LAST_FRAME,
+                       &ref_frame_info[bwd_start_idx]);
+    ref_flag_list[BWDREF_FRAME - LAST_FRAME] = 1;
+    bwd_start_idx++;
+  }
+
+  // == ALTREF2_FRAME ==
+  if (bwd_start_idx <= bwd_end_idx) {
+    set_ref_frame_info(cm, ALTREF2_FRAME - LAST_FRAME,
+                       &ref_frame_info[bwd_start_idx]);
+    ref_flag_list[ALTREF2_FRAME - LAST_FRAME] = 1;
+  }
+
+  // === Forward Reference Frames ===
+
+  for (int i = fwd_start_idx; i <= fwd_end_idx; ++i) {
+    // == LAST_FRAME ==
+    if (ref_frame_info[i].map_idx == lst_map_idx) {
+      set_ref_frame_info(cm, LAST_FRAME - LAST_FRAME, &ref_frame_info[i]);
+      ref_flag_list[LAST_FRAME - LAST_FRAME] = 1;
+    }
+
+    // == GOLDEN_FRAME ==
+    if (ref_frame_info[i].map_idx == gld_map_idx) {
+      set_ref_frame_info(cm, GOLDEN_FRAME - LAST_FRAME, &ref_frame_info[i]);
+      ref_flag_list[GOLDEN_FRAME - LAST_FRAME] = 1;
+    }
+  }
+
+  assert(ref_flag_list[LAST_FRAME - LAST_FRAME] == 1 &&
+         ref_flag_list[GOLDEN_FRAME - LAST_FRAME] == 1);
+
+  // == LAST2_FRAME ==
+  // == LAST3_FRAME ==
+  // == BWDREF_FRAME ==
+  // == ALTREF2_FRAME ==
+  // == ALTREF_FRAME ==
+
+  // Set up the reference frames in the anti-chronological order.
+  static const MV_REFERENCE_FRAME ref_frame_list[INTER_REFS_PER_FRAME - 2] = {
+    LAST2_FRAME, LAST3_FRAME, BWDREF_FRAME, ALTREF2_FRAME, ALTREF_FRAME
+  };
+
+  int ref_idx;
+  for (ref_idx = 0; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
+    const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
+
+    if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
+
+    while (fwd_start_idx <= fwd_end_idx &&
+           (ref_frame_info[fwd_end_idx].map_idx == lst_map_idx ||
+            ref_frame_info[fwd_end_idx].map_idx == gld_map_idx)) {
+      fwd_end_idx--;
+    }
+    if (fwd_start_idx > fwd_end_idx) break;
+
+    set_ref_frame_info(cm, ref_frame - LAST_FRAME,
+                       &ref_frame_info[fwd_end_idx]);
+    ref_flag_list[ref_frame - LAST_FRAME] = 1;
+
+    fwd_end_idx--;
+  }
+
+  // Assign all the remaining frame(s), if any, to the earliest reference frame.
+  for (; ref_idx < (INTER_REFS_PER_FRAME - 2); ref_idx++) {
+    const MV_REFERENCE_FRAME ref_frame = ref_frame_list[ref_idx];
+    if (ref_flag_list[ref_frame - LAST_FRAME] == 1) continue;
+    set_ref_frame_info(cm, ref_frame - LAST_FRAME,
+                       &ref_frame_info[fwd_start_idx]);
+    ref_flag_list[ref_frame - LAST_FRAME] = 1;
+  }
+
+  for (int i = 0; i < INTER_REFS_PER_FRAME; i++) {
+    assert(ref_flag_list[i] == 1);
+  }
+}
+#endif
+
+enum BlockSize {
+    BS_128x128,
+    BS_128x64,
+    BS_64x128,
+    BS_64x64,
+    BS_64x32,
+    BS_64x16,
+    BS_32x64,
+    BS_32x32,
+    BS_32x16,
+    BS_32x8,
+    BS_16x64,
+    BS_16x32,
+    BS_16x16,
+    BS_16x8,
+    BS_16x4,
+    BS_8x32,
+    BS_8x16,
+    BS_8x8,
+    BS_8x4,
+    BS_4x16,
+    BS_4x8,
+    BS_4x4,
+    N_BS_SIZES,
+};
+extern const uint8_t av1_block_dimensions[N_BS_SIZES][4];
+const uint8_t bs_to_sbtype[N_BS_SIZES] = {
+    [BS_128x128] = BLOCK_128X128,
+    [BS_128x64] = BLOCK_128X64,
+    [BS_64x128] = BLOCK_64X128,
+    [BS_64x64] = BLOCK_64X64,
+    [BS_64x32] = BLOCK_64X32,
+    [BS_64x16] = BLOCK_64X16,
+    [BS_32x64] = BLOCK_32X64,
+    [BS_32x32] = BLOCK_32X32,
+    [BS_32x16] = BLOCK_32X16,
+    [BS_32x8] = BLOCK_32X8,
+    [BS_16x64] = BLOCK_16X64,
+    [BS_16x32] = BLOCK_16X32,
+    [BS_16x16] = BLOCK_16X16,
+    [BS_16x8] = BLOCK_16X8,
+    [BS_16x4] = BLOCK_16X4,
+    [BS_8x32] = BLOCK_8X32,
+    [BS_8x16] = BLOCK_8X16,
+    [BS_8x8] = BLOCK_8X8,
+    [BS_8x4] = BLOCK_8X4,
+    [BS_4x16] = BLOCK_4X16,
+    [BS_4x8] = BLOCK_4X8,
+    [BS_4x4] = BLOCK_4X4,
+};
+const uint8_t sbtype_to_bs[BLOCK_SIZES_ALL] = {
+    [BLOCK_128X128] = BS_128x128,
+    [BLOCK_128X64] = BS_128x64,
+    [BLOCK_64X128] = BS_64x128,
+    [BLOCK_64X64] = BS_64x64,
+    [BLOCK_64X32] = BS_64x32,
+    [BLOCK_64X16] = BS_64x16,
+    [BLOCK_32X64] = BS_32x64,
+    [BLOCK_32X32] = BS_32x32,
+    [BLOCK_32X16] = BS_32x16,
+    [BLOCK_32X8] = BS_32x8,
+    [BLOCK_16X64] = BS_16x64,
+    [BLOCK_16X32] = BS_16x32,
+    [BLOCK_16X16] = BS_16x16,
+    [BLOCK_16X8] = BS_16x8,
+    [BLOCK_16X4] = BS_16x4,
+    [BLOCK_8X32] = BS_8x32,
+    [BLOCK_8X16] = BS_8x16,
+    [BLOCK_8X8] = BS_8x8,
+    [BLOCK_8X4] = BS_8x4,
+    [BLOCK_4X16] = BS_4x16,
+    [BLOCK_4X8] = BS_4x8,
+    [BLOCK_4X4] = BS_4x4,
+};
+
+static inline struct MV av1_clamp_mv(const struct MV mv,
+                                    const int bx4, const int by4,
+                                    const int bw4, const int bh4,
+                                    const int iw4, const int ih4)
+{
+    const int left = -(bx4 + bw4 + 4) * 4 * 8;
+    const int right = (iw4 - bx4 + 0 * bw4 + 4) * 4 * 8;
+    const int top = -(by4 + bh4 + 4) * 4 * 8;
+    const int bottom = (ih4 - by4 + 0 * bh4 + 4) * 4 * 8;
+
+    return (struct MV) { .col = iclip(mv.col, left, right),
+                         .row = iclip(mv.row, top, bottom) };
+}
+
+#include <stdio.h>
+
+void av1_find_ref_mvs(CANDIDATE_MV *mvstack, int *cnt, int_mv (*mvlist)[2],
+                      int *ctx, int refidx_dav1d[2],
+                      int w4, int h4, int bs, int bp, int by4, int bx4,
+                      int tile_col_start4, int tile_col_end4,
+                      int tile_row_start4, int tile_row_end4,
+                      AV1_COMMON *cm)
+{
+    const int bw4 = av1_block_dimensions[bs][0];
+    const int bh4 = av1_block_dimensions[bs][1];
+    int stride = cm->cur_frame.mv_stride;
+    MACROBLOCKD xd = (MACROBLOCKD) {
+        .n8_w = bw4,
+        .n8_h = bh4,
+        .mi_stride = stride,
+        .up_available = by4 > tile_row_start4,
+        .left_available = bx4 > tile_col_start4,
+        .tile = {
+            .mi_col_end = AOMMIN(w4, tile_col_end4),
+            .mi_row_end = AOMMIN(h4, tile_row_end4),
+            .tg_horz_boundary = 0,
+            .mi_row_start = tile_row_start4,
+            .mi_col_start = tile_col_start4,
+        },
+        .mi = (MB_MODE_INFO *) &cm->cur_frame.mvs[by4 * stride + bx4],
+        .mb_to_bottom_edge = (h4 - bh4 - by4) * 32,
+        .mb_to_left_edge = -bx4 * 32,
+        .mb_to_right_edge = (w4 - bw4 - bx4) * 32,
+        .mb_to_top_edge = -by4 * 32,
+        .is_sec_rect = 0,
+        .cur_mi = {
+            .partition = bp,
+        },
+    };
+    xd.mi->sb_type = bs_to_sbtype[bs];
+    if (xd.n8_w < xd.n8_h) {
+        // Only mark is_sec_rect as 1 for the last block.
+        // For PARTITION_VERT_4, it would be (0, 0, 0, 1);
+        // For other partitions, it would be (0, 1).
+        if (!((bx4 + xd.n8_w) & (xd.n8_h - 1))) xd.is_sec_rect = 1;
+    }
+
+    if (xd.n8_w > xd.n8_h)
+        if (by4 & (xd.n8_w - 1)) xd.is_sec_rect = 1;
+
+    MV_REFERENCE_FRAME rf[2] = { refidx_dav1d[0] + 1, refidx_dav1d[1] + 1 };
+    const int refidx = av1_ref_frame_type(rf);
+    int16_t single_context[MODE_CTX_REF_FRAMES];
+    uint8_t mv_cnt[MODE_CTX_REF_FRAMES] = { 0 };
+    CANDIDATE_MV mv_stack[MODE_CTX_REF_FRAMES][MAX_REF_MV_STACK_SIZE];
+    memset(mv_stack, 0, sizeof(mv_stack));
+    int_mv mv_list[MODE_CTX_REF_FRAMES][MAX_MV_REF_CANDIDATES] = { { { 0 } } };
+    int_mv gmvs[MODE_CTX_REF_FRAMES];
+#if 0
+    void av1_find_mv_refs(const AV1_COMMON *cm, const MACROBLOCKD *xd,
+                          MB_MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame,
+                          uint8_t ref_mv_count[MODE_CTX_REF_FRAMES],
+                          CANDIDATE_MV ref_mv_stack[][MAX_REF_MV_STACK_SIZE],
+                          int_mv mv_ref_list[][MAX_MV_REF_CANDIDATES],
+                          int_mv *global_mvs, int mi_row, int mi_col,
+                          int16_t *mode_context)
+#endif
+    av1_find_mv_refs(cm, &xd, xd.mi, refidx, mv_cnt,
+                     mv_stack, mv_list, gmvs, by4, bx4,
+                     single_context);
+#if !defined(NDEBUG)
+    if (refidx_dav1d[1] == -1 && mv_cnt[refidx] >= 1) {
+        int_mv tmpa = { .as_int = mv_stack[refidx][0].this_mv.as_int };
+        clamp_mv_ref(&tmpa.as_mv, bw4 * 4, bh4 * 4, &xd);
+        int_mv tmp1 = { .as_mv =
+                 av1_clamp_mv(mv_stack[refidx][0].this_mv.as_mv,
+                              bx4, by4, bw4, bh4, w4, h4) };
+        assert(tmpa.as_int == tmp1.as_int);
+        assert(tmp1.as_int == mv_list[refidx][0].as_int);
+        if (mv_cnt[refidx] >= 2) {
+            int_mv tmpb = { .as_int = mv_stack[refidx][1].this_mv.as_int };
+            clamp_mv_ref(&tmpb.as_mv, bw4 * 4, bh4 * 4, &xd);
+            int_mv tmp2 = { .as_mv =
+                     av1_clamp_mv(mv_stack[refidx][1].this_mv.as_mv,
+                                  bx4, by4, bw4, bh4, w4, h4) };
+            assert(tmp2.as_int == tmpb.as_int);
+            assert(tmp2.as_int == mv_list[refidx][1].as_int);
+        }
+    }
+#endif
+    for (int i = 0; i < mv_cnt[refidx]; i++)
+        mvstack[i] = mv_stack[refidx][i];
+    *cnt = mv_cnt[refidx];
+
+    mvlist[0][0] = mv_list[refidx_dav1d[0] + 1][0];
+    mvlist[0][1] = mv_list[refidx_dav1d[0] + 1][1];
+    if (refidx_dav1d[1] != -1) {
+        mvlist[1][0] = mv_list[refidx_dav1d[1] + 1][0];
+        mvlist[1][1] = mv_list[refidx_dav1d[1] + 1][1];
+    }
+
+    if (ctx) {
+        if (refidx_dav1d[1] == -1)
+            *ctx = single_context[refidx_dav1d[0] + 1];
+        else
+            *ctx = av1_mode_context_analyzer(single_context, rf);
+    }
+
+    if (0 && bx4 == 38 && by4 == 15 && cm->frame_offset == 3 &&
+        refidx_dav1d[1] == -1 && refidx_dav1d[0] == 4 &&
+        bw4 == 1 && bh4 == 1 && bp == 3)
+    {
+        MV_REF *l = bx4 ? &cm->cur_frame.mvs[by4*stride+bx4-1] : NULL;
+        MV_REF *a = by4 ? &cm->cur_frame.mvs[by4*stride+bx4-stride] : NULL;
+        printf("Input: left=[0]y:%d,x:%d,r:%d,[1]y:%d,x:%d,r:%d,mode=%d, "
+               "above=[0]y:%d,x:%d,r:%d,[1]y:%d,x:%d,r:%d,mode=%d, "
+               "temp=y:%d,x:%d,r:%d [use_ref=%d]\n",
+               l ? l->mv[0].as_mv.row : -1,
+               l ? l->mv[0].as_mv.col : -1,
+               l ? l->ref_frame[0]: -1,
+               l ? l->mv[1].as_mv.row : -1,
+               l ? l->mv[1].as_mv.col : -1,
+               l ? l->ref_frame[1]: -1,
+               l ? l->mode : -1,
+               a ? a->mv[0].as_mv.row: -1,
+               a ? a->mv[0].as_mv.col : -1,
+               a ? a->ref_frame[0] : -1,
+               a ? a->mv[1].as_mv.row: -1,
+               a ? a->mv[1].as_mv.col : -1,
+               a ? a->ref_frame[1] : -1,
+               a ? a->mode : -1,
+               cm->tpl_mvs[(by4 >> 1) * (cm->mi_stride >> 1) + (bx4 >> 1)].mfmv0.as_mv.row,
+               cm->tpl_mvs[(by4 >> 1) * (cm->mi_stride >> 1) + (bx4 >> 1)].mfmv0.as_mv.col,
+               cm->tpl_mvs[(by4 >> 1) * (cm->mi_stride >> 1) +
+                           (bx4 >> 1)].ref_frame_offset,
+               cm->allow_ref_frame_mvs);
+        printf("Edges: l=%d,t=%d,r=%d,b=%d,w=%d,h=%d,border=%d\n",
+               xd.mb_to_left_edge,
+               xd.mb_to_top_edge,
+               xd.mb_to_right_edge,
+               xd.mb_to_bottom_edge,
+               xd.n8_w << MI_SIZE_LOG2,
+               xd.n8_h << MI_SIZE_LOG2,
+               MV_BORDER);
+        printf("bp=%d, x=%d, y=%d, refs=%d/%d, n_mvs: %d, "
+               "first mv: y=%d,x=%d | y=%d,x=%d, "
+               "first comp mv: y=%d,x=%d | y=%d,x=%d, "
+               "second mv: y=%d, x=%d | y=%d, x=%d, "
+               "second comp mv: y=%d, x=%d | y=%d, x=%d, "
+               "third mv: y=%d, x=%d, "
+               "ctx=%d\n",
+               bp, bx4, by4, refidx_dav1d[0], refidx_dav1d[1], mv_cnt[refidx],
+               mv_stack[refidx][0].this_mv.as_mv.row,
+               mv_stack[refidx][0].this_mv.as_mv.col,
+               mv_list[refidx_dav1d[0] + 1][0].as_mv.row,
+               mv_list[refidx_dav1d[0] + 1][0].as_mv.col,
+               mv_stack[refidx][0].comp_mv.as_mv.row,
+               mv_stack[refidx][0].comp_mv.as_mv.col,
+               mv_list[refidx_dav1d[1] + 1][0].as_mv.row,
+               mv_list[refidx_dav1d[1] + 1][0].as_mv.col,
+               mv_stack[refidx][1].this_mv.as_mv.row,
+               mv_stack[refidx][1].this_mv.as_mv.col,
+               mv_list[refidx_dav1d[0] + 1][1].as_mv.row,
+               mv_list[refidx_dav1d[0] + 1][1].as_mv.col,
+               mv_stack[refidx][1].comp_mv.as_mv.row,
+               mv_stack[refidx][1].comp_mv.as_mv.col,
+               mv_list[refidx_dav1d[1] + 1][1].as_mv.row,
+               mv_list[refidx_dav1d[1] + 1][1].as_mv.col,
+               mv_stack[refidx][2].this_mv.as_mv.row,
+               mv_stack[refidx][2].this_mv.as_mv.col,
+               *ctx);
+    }
+}
+
+void av1_init_ref_mv_common(AV1_COMMON *cm,
+                            const int w8, const int h8,
+                            const ptrdiff_t stride,
+                            const int allow_sb128,
+                            MV_REF *cur,
+                            MV_REF *ref_mvs[7],
+                            const unsigned cur_poc,
+                            const unsigned ref_poc[7],
+                            const unsigned ref_ref_poc[7][7],
+                            const WarpedMotionParams gmv[7],
+                            const int allow_hp,
+                            const int force_int_mv,
+                            const int allow_ref_frame_mvs,
+                            const int order_hint)
+{
+    if (cm->mi_cols != (w8 << 1) || cm->mi_rows != (h8 << 1)) {
+        const int align_h = (h8 + 15) & ~15;
+        if (cm->tpl_mvs) free(cm->tpl_mvs);
+        cm->tpl_mvs = malloc(sizeof(*cm->tpl_mvs) * (stride >> 1) * align_h);
+        for (int i = 0; i < 7; i++)
+            cm->frame_refs[i].idx = i;
+        cm->mi_cols = w8 << 1;
+        cm->mi_rows = h8 << 1;
+        cm->mi_stride = stride;
+        for (int i = 0; i < 7; i++) {
+            cm->buffer_pool.frame_bufs[i].mi_rows = cm->mi_rows;
+            cm->buffer_pool.frame_bufs[i].mi_cols = cm->mi_cols;
+            cm->buffer_pool.frame_bufs[i].mv_stride = stride;
+        }
+        cm->cur_frame.mv_stride = stride;
+    }
+
+    cm->allow_high_precision_mv = allow_hp;
+    cm->seq_params.sb_size = allow_sb128 ? BLOCK_128X128 : BLOCK_64X64;
+
+    cm->seq_params.enable_order_hint = !!order_hint;
+    cm->seq_params.order_hint_bits_minus1 = order_hint - 1;
+    // FIXME get these from the sequence/frame headers instead of hardcoding
+    cm->frame_parallel_decode = 0;
+    cm->cur_frame_force_integer_mv = force_int_mv;
+
+    memcpy(&cm->global_motion[1], gmv, sizeof(*gmv) * 7);
+
+    cm->frame_offset = cur_poc;
+    cm->allow_ref_frame_mvs = allow_ref_frame_mvs;
+    cm->cur_frame.mvs = cur;
+    for (int i = 0; i < 7; i++) {
+        cm->buffer_pool.frame_bufs[i].mvs = ref_mvs[i];
+        cm->buffer_pool.frame_bufs[i].intra_only = ref_mvs[i] == NULL;
+        cm->buffer_pool.frame_bufs[i].cur_frame_offset = ref_poc[i];
+        for (int j = 0; j < 7; j++)
+            cm->buffer_pool.frame_bufs[i].ref_frame_offset[j] =
+                ref_ref_poc[i][j];
+    }
+    av1_setup_frame_buf_refs(cm);
+    for (int i = 0; i < 7; i++) {
+        const int ref_poc = cm->buffer_pool.frame_bufs[i].cur_frame_offset;
+        cm->ref_frame_sign_bias[1 + i] = get_relative_dist(cm, ref_poc, cur_poc) > 0;
+    }
+    av1_setup_motion_field(cm);
+}
+
+void av1_init_ref_mv_tile_row(AV1_COMMON *cm,
+                              int tile_col_start4, int tile_col_end4,
+                              int row_start4, int row_end4)
+{
+    av1_fill_motion_field(cm, tile_col_start4, tile_col_end4,
+                          row_start4, row_end4);
+}
+
+AV1_COMMON *av1_alloc_ref_mv_common(void) {
+    AV1_COMMON *cm = malloc(sizeof(*cm));
+    memset(cm, 0, sizeof(*cm));
+    return cm;
+}
+
+void av1_free_ref_mv_common(AV1_COMMON *cm) {
+    if (cm->tpl_mvs) free(cm->tpl_mvs);
+    free(cm);
+}
--- /dev/null
+++ b/src/ref_mvs.h
@@ -1,0 +1,185 @@
+/*
+ * Copyright (c) 2001-2016, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef __DAV1D_SRC_REF_MVS_H__
+#define __DAV1D_SRC_REF_MVS_H__
+
+#include <stddef.h>
+
+#include "src/levels.h"
+#include "src/tables.h"
+
+typedef struct refmvs {
+    mv mv[2];
+    int8_t ref[2]; // [0] = 0: intra=1, [1] = -1: comp=0
+    int8_t mode, sb_type;
+} refmvs;
+
+typedef struct candidate_mv {
+    mv this_mv;
+    mv comp_mv;
+    int weight;
+} candidate_mv;
+
+typedef struct AV1_COMMON AV1_COMMON;
+
+// call once per frame thread
+AV1_COMMON *av1_alloc_ref_mv_common(void);
+void av1_free_ref_mv_common(AV1_COMMON *cm);
+
+// call once per frame
+void av1_init_ref_mv_common(AV1_COMMON *cm,
+                            int w8, int h8,
+                            ptrdiff_t stride,
+                            int allow_sb128,
+                            refmvs *cur,
+                            refmvs *ref_mvs[7],
+                            unsigned cur_poc,
+                            const unsigned ref_poc[7],
+                            const unsigned ref_ref_poc[7][7],
+                            const WarpedMotionParams gmv[7],
+                            int allow_hp, int force_int_mv,
+                            int allow_ref_frame_mvs, int order_hint);
+
+// call for start of each sbrow per tile
+void av1_init_ref_mv_tile_row(AV1_COMMON *cm,
+                              int tile_col_start4, int tile_col_end4,
+                              int row_start4, int row_end4);
+
+// call for each block
+void av1_find_ref_mvs(candidate_mv *mvstack, int *cnt, mv (*mvlist)[2],
+                      int *ctx, int refidx[2], int w4, int h4,
+                      enum BlockSize bs, enum BlockPartition bp,
+                      int by4, int bx4, int tile_col_start4,
+                      int tile_col_end4, int tile_row_start4,
+                      int tile_row_end4, AV1_COMMON *cm);
+
+extern const uint8_t bs_to_sbtype[];
+extern const uint8_t sbtype_to_bs[];
+static inline void splat_oneref_mv(refmvs *r, const ptrdiff_t stride,
+                                   const int by4, const int bx4,
+                                   const enum BlockSize bs,
+                                   const enum InterPredMode mode,
+                                   const int ref, const mv mv,
+                                   const int is_interintra)
+{
+    const int bw4 = av1_block_dimensions[bs][0];
+    int bh4 = av1_block_dimensions[bs][1];
+
+    r += by4 * stride + bx4;
+    const refmvs tmpl = (refmvs) {
+        .ref = { ref + 1, is_interintra ? 0 : -1 },
+        .mv = { mv },
+        .sb_type = bs_to_sbtype[bs],
+        .mode = N_INTRA_PRED_MODES + mode,
+    };
+    do {
+        for (int x = 0; x < bw4; x++)
+            r[x] = tmpl;
+        r += stride;
+    } while (--bh4);
+}
+
+static inline void splat_intrabc_mv(refmvs *r, const ptrdiff_t stride,
+                                    const int by4, const int bx4,
+                                    const enum BlockSize bs, const mv mv)
+{
+    const int bw4 = av1_block_dimensions[bs][0];
+    int bh4 = av1_block_dimensions[bs][1];
+
+    r += by4 * stride + bx4;
+    const refmvs tmpl = (refmvs) {
+        .ref = { 0, -1 },
+        .mv = { mv },
+        .sb_type = bs_to_sbtype[bs],
+        .mode = DC_PRED,
+    };
+    do {
+        for (int x = 0; x < bw4; x++)
+            r[x] = tmpl;
+        r += stride;
+    } while (--bh4);
+}
+
+static inline void splat_tworef_mv(refmvs *r, const ptrdiff_t stride,
+                                   const int by4, const int bx4,
+                                   const enum BlockSize bs,
+                                   const enum CompInterPredMode mode,
+                                   const int ref1, const int ref2,
+                                   const mv mv1, const mv mv2)
+{
+    const int bw4 = av1_block_dimensions[bs][0];
+    int bh4 = av1_block_dimensions[bs][1];
+
+    r += by4 * stride + bx4;
+    const refmvs tmpl = (refmvs) {
+        .ref = { ref1 + 1, ref2 + 1 },
+        .mv = { mv1, mv2 },
+        .sb_type = bs_to_sbtype[bs],
+        .mode = N_INTRA_PRED_MODES + N_INTER_PRED_MODES + mode,
+    };
+    do {
+        for (int x = 0; x < bw4; x++)
+            r[x] = tmpl;
+        r += stride;
+    } while (--bh4);
+}
+
+static inline void splat_intraref(refmvs *r, const ptrdiff_t stride,
+                                  const int by4, const int bx4,
+                                  const enum BlockSize bs,
+                                  const enum IntraPredMode mode)
+{
+    const int bw4 = av1_block_dimensions[bs][0];
+    int bh4 = av1_block_dimensions[bs][1];
+
+    r += by4 * stride + bx4;
+    do {
+        int x;
+
+        for (x = 0; x < bw4; x++)
+            r[x] = (refmvs) {
+                .ref = { 0, -1 },
+                .mv = { [0] = { .y = -0x8000, .x = -0x8000 }, },
+                .sb_type = bs_to_sbtype[bs],
+                .mode = mode,
+            };
+        r += stride;
+    } while (--bh4);
+}
+
+// FIXME integer_mv
+static inline void unset_hp_bit(mv *const a) {
+    if (a->x & 1) {
+        if (a->x < 0) a->x++;
+        else          a->x--;
+    }
+    if (a->y & 1) {
+        if (a->y < 0) a->y++;
+        else          a->y--;
+    }
+}
+
+static inline mv av1_clamp_mv(const mv mv,
+                              const int bx4, const int by4,
+                              const int bw4, const int bh4,
+                              const int iw4, const int ih4)
+{
+    const int left = -(bx4 + bw4 + 4) * 4 * 8;
+    const int right = (iw4 - bx4 + 0 * bw4 + 4) * 4 * 8;
+    const int top = -(by4 + bh4 + 4) * 4 * 8;
+    const int bottom = (ih4 - by4 + 0 * bh4 + 4) * 4 * 8;
+
+    return (struct mv) { .x = iclip(mv.x, left, right),
+                         .y = iclip(mv.y, top, bottom) };
+}
+
+#endif /* __DAV1D_SRC_REF_MVS_H__ */
--- /dev/null
+++ b/src/scan.c
@@ -1,0 +1,527 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "src/scan.h"
+
+static const int16_t av1_default_scan_4x4[] __attribute__((aligned(32))) = {
+     0,  4,  1,  2,
+     5,  8, 12,  9,
+     6,  3,  7, 10,
+    13, 14, 11, 15,
+};
+static const int16_t av1_mrow_scan_4x4[] __attribute__((aligned(32))) = {
+     0,  4,  8, 12,
+     1,  5,  9, 13,
+     2,  6, 10, 14,
+     3,  7, 11, 15,
+};
+static const int16_t av1_mcol_scan_4x4[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,
+     4,  5,  6,  7,
+     8,  9, 10, 11,
+    12, 13, 14, 15,
+};
+static const int16_t av1_default_scan_4x8[] __attribute__((aligned(32))) = {
+     0,  8,  1, 16,
+     9,  2, 24, 17,
+    10,  3, 25, 18,
+    11,  4, 26, 19,
+    12,  5, 27, 20,
+    13,  6, 28, 21,
+    14,  7, 29, 22,
+    15, 30, 23, 31,
+};
+static const int16_t av1_mrow_scan_4x8[] __attribute__((aligned(32))) = {
+     0,  8, 16, 24,
+     1,  9, 17, 25,
+     2, 10, 18, 26,
+     3, 11, 19, 27,
+     4, 12, 20, 28,
+     5, 13, 21, 29,
+     6, 14, 22, 30,
+     7, 15, 23, 31,
+};
+static const int16_t av1_mcol_scan_4x8[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,
+     4,  5,  6,  7,
+     8,  9, 10, 11,
+    12, 13, 14, 15,
+    16, 17, 18, 19,
+    20, 21, 22, 23,
+    24, 25, 26, 27,
+    28, 29, 30, 31,
+};
+static const int16_t av1_default_scan_4x16[] __attribute__((aligned(32))) = {
+     0, 16,  1, 32,
+    17,  2, 48, 33,
+    18,  3, 49, 34,
+    19,  4, 50, 35,
+    20,  5, 51, 36,
+    21,  6, 52, 37,
+    22,  7, 53, 38,
+    23,  8, 54, 39,
+    24,  9, 55, 40,
+    25, 10, 56, 41,
+    26, 11, 57, 42,
+    27, 12, 58, 43,
+    28, 13, 59, 44,
+    29, 14, 60, 45,
+    30, 15, 61, 46,
+    31, 62, 47, 63,
+};
+static const int16_t av1_mrow_scan_4x16[] __attribute__((aligned(32))) = {
+     0, 16, 32, 48,
+     1, 17, 33, 49,
+     2, 18, 34, 50,
+     3, 19, 35, 51,
+     4, 20, 36, 52,
+     5, 21, 37, 53,
+     6, 22, 38, 54,
+     7, 23, 39, 55,
+     8, 24, 40, 56,
+     9, 25, 41, 57,
+    10, 26, 42, 58,
+    11, 27, 43, 59,
+    12, 28, 44, 60,
+    13, 29, 45, 61,
+    14, 30, 46, 62,
+    15, 31, 47, 63,
+};
+static const int16_t av1_mcol_scan_4x16[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,
+     4,  5,  6,  7,
+     8,  9, 10, 11,
+    12, 13, 14, 15,
+    16, 17, 18, 19,
+    20, 21, 22, 23,
+    24, 25, 26, 27,
+    28, 29, 30, 31,
+    32, 33, 34, 35,
+    36, 37, 38, 39,
+    40, 41, 42, 43,
+    44, 45, 46, 47,
+    48, 49, 50, 51,
+    52, 53, 54, 55,
+    56, 57, 58, 59,
+    60, 61, 62, 63,
+};
+static const int16_t av1_default_scan_8x4[] __attribute__((aligned(32))) = {
+     0,  1,  4,  2,  5,  8,  3,  6,
+     9, 12,  7, 10, 13, 16, 11, 14,
+    17, 20, 15, 18, 21, 24, 19, 22,
+    25, 28, 23, 26, 29, 27, 30, 31,
+};
+static const int16_t av1_mrow_scan_8x4[] __attribute__((aligned(32))) = {
+     0,  4,  8, 12, 16, 20, 24, 28,
+     1,  5,  9, 13, 17, 21, 25, 29,
+     2,  6, 10, 14, 18, 22, 26, 30,
+     3,  7, 11, 15, 19, 23, 27, 31,
+};
+static const int16_t av1_mcol_scan_8x4[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,  4,  5,  6,  7,
+     8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23,
+    24, 25, 26, 27, 28, 29, 30, 31,
+};
+static const int16_t av1_default_scan_8x8[] __attribute__((aligned(32))) = {
+     0,  8,  1,  2,  9, 16, 24, 17,
+    10,  3,  4, 11, 18, 25, 32, 40,
+    33, 26, 19, 12,  5,  6, 13, 20,
+    27, 34, 41, 48, 56, 49, 42, 35,
+    28, 21, 14,  7, 15, 22, 29, 36,
+    43, 50, 57, 58, 51, 44, 37, 30,
+    23, 31, 38, 45, 52, 59, 60, 53,
+    46, 39, 47, 54, 61, 62, 55, 63,
+};
+static const int16_t av1_mrow_scan_8x8[] __attribute__((aligned(32))) = {
+     0,  8, 16, 24, 32, 40, 48, 56,
+     1,  9, 17, 25, 33, 41, 49, 57,
+     2, 10, 18, 26, 34, 42, 50, 58,
+     3, 11, 19, 27, 35, 43, 51, 59,
+     4, 12, 20, 28, 36, 44, 52, 60,
+     5, 13, 21, 29, 37, 45, 53, 61,
+     6, 14, 22, 30, 38, 46, 54, 62,
+     7, 15, 23, 31, 39, 47, 55, 63,
+};
+static const int16_t av1_mcol_scan_8x8[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,  4,  5,  6,  7,
+     8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23,
+    24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39,
+    40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55,
+    56, 57, 58, 59, 60, 61, 62, 63,
+};
+static const int16_t av1_default_scan_8x16[] __attribute__((aligned(32))) = {
+      0,  16,   1,  32,  17,   2,  48,  33,
+     18,   3,  64,  49,  34,  19,   4,  80,
+     65,  50,  35,  20,   5,  96,  81,  66,
+     51,  36,  21,   6, 112,  97,  82,  67,
+     52,  37,  22,   7, 113,  98,  83,  68,
+     53,  38,  23,   8, 114,  99,  84,  69,
+     54,  39,  24,   9, 115, 100,  85,  70,
+     55,  40,  25,  10, 116, 101,  86,  71,
+     56,  41,  26,  11, 117, 102,  87,  72,
+     57,  42,  27,  12, 118, 103,  88,  73,
+     58,  43,  28,  13, 119, 104,  89,  74,
+     59,  44,  29,  14, 120, 105,  90,  75,
+     60,  45,  30,  15, 121, 106,  91,  76,
+     61,  46,  31, 122, 107,  92,  77,  62,
+     47, 123, 108,  93,  78,  63, 124, 109,
+     94,  79, 125, 110,  95, 126, 111, 127,
+};
+static const int16_t av1_mrow_scan_8x16[] __attribute__((aligned(32))) = {
+      0,  16,  32,  48,  64,  80,  96, 112,
+      1,  17,  33,  49,  65,  81,  97, 113,
+      2,  18,  34,  50,  66,  82,  98, 114,
+      3,  19,  35,  51,  67,  83,  99, 115,
+      4,  20,  36,  52,  68,  84, 100, 116,
+      5,  21,  37,  53,  69,  85, 101, 117,
+      6,  22,  38,  54,  70,  86, 102, 118,
+      7,  23,  39,  55,  71,  87, 103, 119,
+      8,  24,  40,  56,  72,  88, 104, 120,
+      9,  25,  41,  57,  73,  89, 105, 121,
+     10,  26,  42,  58,  74,  90, 106, 122,
+     11,  27,  43,  59,  75,  91, 107, 123,
+     12,  28,  44,  60,  76,  92, 108, 124,
+     13,  29,  45,  61,  77,  93, 109, 125,
+     14,  30,  46,  62,  78,  94, 110, 126,
+     15,  31,  47,  63,  79,  95, 111, 127,
+};
+static const int16_t av1_mcol_scan_8x16[] __attribute__((aligned(32))) = {
+      0,   1,   2,   3,   4,   5,   6,   7,
+      8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,
+     24,  25,  26,  27,  28,  29,  30,  31,
+     32,  33,  34,  35,  36,  37,  38,  39,
+     40,  41,  42,  43,  44,  45,  46,  47,
+     48,  49,  50,  51,  52,  53,  54,  55,
+     56,  57,  58,  59,  60,  61,  62,  63,
+     64,  65,  66,  67,  68,  69,  70,  71,
+     72,  73,  74,  75,  76,  77,  78,  79,
+     80,  81,  82,  83,  84,  85,  86,  87,
+     88,  89,  90,  91,  92,  93,  94,  95,
+     96,  97,  98,  99, 100, 101, 102, 103,
+    104, 105, 106, 107, 108, 109, 110, 111,
+    112, 113, 114, 115, 116, 117, 118, 119,
+    120, 121, 122, 123, 124, 125, 126, 127,
+};
+static const int16_t av1_default_scan_8x32[] __attribute__((aligned(32))) = {
+      0,  32,   1,  64,  33,   2,  96,  65,
+     34,   3, 128,  97,  66,  35,   4, 160,
+    129,  98,  67,  36,   5, 192, 161, 130,
+     99,  68,  37,   6, 224, 193, 162, 131,
+    100,  69,  38,   7, 225, 194, 163, 132,
+    101,  70,  39,   8, 226, 195, 164, 133,
+    102,  71,  40,   9, 227, 196, 165, 134,
+    103,  72,  41,  10, 228, 197, 166, 135,
+    104,  73,  42,  11, 229, 198, 167, 136,
+    105,  74,  43,  12, 230, 199, 168, 137,
+    106,  75,  44,  13, 231, 200, 169, 138,
+    107,  76,  45,  14, 232, 201, 170, 139,
+    108,  77,  46,  15, 233, 202, 171, 140,
+    109,  78,  47,  16, 234, 203, 172, 141,
+    110,  79,  48,  17, 235, 204, 173, 142,
+    111,  80,  49,  18, 236, 205, 174, 143,
+    112,  81,  50,  19, 237, 206, 175, 144,
+    113,  82,  51,  20, 238, 207, 176, 145,
+    114,  83,  52,  21, 239, 208, 177, 146,
+    115,  84,  53,  22, 240, 209, 178, 147,
+    116,  85,  54,  23, 241, 210, 179, 148,
+    117,  86,  55,  24, 242, 211, 180, 149,
+    118,  87,  56,  25, 243, 212, 181, 150,
+    119,  88,  57,  26, 244, 213, 182, 151,
+    120,  89,  58,  27, 245, 214, 183, 152,
+    121,  90,  59,  28, 246, 215, 184, 153,
+    122,  91,  60,  29, 247, 216, 185, 154,
+    123,  92,  61,  30, 248, 217, 186, 155,
+    124,  93,  62,  31, 249, 218, 187, 156,
+    125,  94,  63, 250, 219, 188, 157, 126,
+     95, 251, 220, 189, 158, 127, 252, 221,
+    190, 159, 253, 222, 191, 254, 223, 255,
+};
+static const int16_t av1_default_scan_16x4[] __attribute__((aligned(32))) = {
+     0,  1,  4,  2,  5,  8,  3,  6,  9, 12,  7, 10, 13, 16, 11, 14,
+    17, 20, 15, 18, 21, 24, 19, 22, 25, 28, 23, 26, 29, 32, 27, 30,
+    33, 36, 31, 34, 37, 40, 35, 38, 41, 44, 39, 42, 45, 48, 43, 46,
+    49, 52, 47, 50, 53, 56, 51, 54, 57, 60, 55, 58, 61, 59, 62, 63,
+};
+static const int16_t av1_mrow_scan_16x4[] __attribute__((aligned(32))) = {
+     0,  4,  8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60,
+     1,  5,  9, 13, 17, 21, 25, 29, 33, 37, 41, 45, 49, 53, 57, 61,
+     2,  6, 10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, 54, 58, 62,
+     3,  7, 11, 15, 19, 23, 27, 31, 35, 39, 43, 47, 51, 55, 59, 63,
+};
+static const int16_t av1_mcol_scan_16x4[] __attribute__((aligned(32))) = {
+     0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+    32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+    48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+};
+static const int16_t av1_default_scan_16x8[] __attribute__((aligned(32))) = {
+      0,   1,   8,   2,   9,  16,   3,  10,  17,  24,   4,  11,  18,  25,  32,   5,
+     12,  19,  26,  33,  40,   6,  13,  20,  27,  34,  41,  48,   7,  14,  21,  28,
+     35,  42,  49,  56,  15,  22,  29,  36,  43,  50,  57,  64,  23,  30,  37,  44,
+     51,  58,  65,  72,  31,  38,  45,  52,  59,  66,  73,  80,  39,  46,  53,  60,
+     67,  74,  81,  88,  47,  54,  61,  68,  75,  82,  89,  96,  55,  62,  69,  76,
+     83,  90,  97, 104,  63,  70,  77,  84,  91,  98, 105, 112,  71,  78,  85,  92,
+     99, 106, 113, 120,  79,  86,  93, 100, 107, 114, 121,  87,  94, 101, 108, 115,
+    122,  95, 102, 109, 116, 123, 103, 110, 117, 124, 111, 118, 125, 119, 126, 127,
+};
+static const int16_t av1_mrow_scan_16x8[] __attribute__((aligned(32))) = {
+      0,   8,  16,  24,  32,  40,  48,  56,  64,  72,  80,  88,  96, 104, 112, 120,
+      1,   9,  17,  25,  33,  41,  49,  57,  65,  73,  81,  89,  97, 105, 113, 121,
+      2,  10,  18,  26,  34,  42,  50,  58,  66,  74,  82,  90,  98, 106, 114, 122,
+      3,  11,  19,  27,  35,  43,  51,  59,  67,  75,  83,  91,  99, 107, 115, 123,
+      4,  12,  20,  28,  36,  44,  52,  60,  68,  76,  84,  92, 100, 108, 116, 124,
+      5,  13,  21,  29,  37,  45,  53,  61,  69,  77,  85,  93, 101, 109, 117, 125,
+      6,  14,  22,  30,  38,  46,  54,  62,  70,  78,  86,  94, 102, 110, 118, 126,
+      7,  15,  23,  31,  39,  47,  55,  63,  71,  79,  87,  95, 103, 111, 119, 127,
+};
+static const int16_t av1_mcol_scan_16x8[] __attribute__((aligned(32))) = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
+     32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+     48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
+     64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
+     80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+     96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+    112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+};
+static const int16_t av1_default_scan_16x16[] __attribute__((aligned(32))) = {
+      0,  16,   1,   2,  17,  32,  48,  33,  18,   3,   4,  19,  34,  49,  64,  80,
+     65,  50,  35,  20,   5,   6,  21,  36,  51,  66,  81,  96, 112,  97,  82,  67,
+     52,  37,  22,   7,   8,  23,  38,  53,  68,  83,  98, 113, 128, 144, 129, 114,
+     99,  84,  69,  54,  39,  24,   9,  10,  25,  40,  55,  70,  85, 100, 115, 130,
+    145, 160, 176, 161, 146, 131, 116, 101,  86,  71,  56,  41,  26,  11,  12,  27,
+     42,  57,  72,  87, 102, 117, 132, 147, 162, 177, 192, 208, 193, 178, 163, 148,
+    133, 118, 103,  88,  73,  58,  43,  28,  13,  14,  29,  44,  59,  74,  89, 104,
+    119, 134, 149, 164, 179, 194, 209, 224, 240, 225, 210, 195, 180, 165, 150, 135,
+    120, 105,  90,  75,  60,  45,  30,  15,  31,  46,  61,  76,  91, 106, 121, 136,
+    151, 166, 181, 196, 211, 226, 241, 242, 227, 212, 197, 182, 167, 152, 137, 122,
+    107,  92,  77,  62,  47,  63,  78,  93, 108, 123, 138, 153, 168, 183, 198, 213,
+    228, 243, 244, 229, 214, 199, 184, 169, 154, 139, 124, 109,  94,  79,  95, 110,
+    125, 140, 155, 170, 185, 200, 215, 230, 245, 246, 231, 216, 201, 186, 171, 156,
+    141, 126, 111, 127, 142, 157, 172, 187, 202, 217, 232, 247, 248, 233, 218, 203,
+    188, 173, 158, 143, 159, 174, 189, 204, 219, 234, 249, 250, 235, 220, 205, 190,
+    175, 191, 206, 221, 236, 251, 252, 237, 222, 207, 223, 238, 253, 254, 239, 255,
+};
+static const int16_t av1_mrow_scan_16x16[] __attribute__((aligned(32))) = {
+      0,  16,  32,  48,  64,  80,  96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
+      1,  17,  33,  49,  65,  81,  97, 113, 129, 145, 161, 177, 193, 209, 225, 241,
+      2,  18,  34,  50,  66,  82,  98, 114, 130, 146, 162, 178, 194, 210, 226, 242,
+      3,  19,  35,  51,  67,  83,  99, 115, 131, 147, 163, 179, 195, 211, 227, 243,
+      4,  20,  36,  52,  68,  84, 100, 116, 132, 148, 164, 180, 196, 212, 228, 244,
+      5,  21,  37,  53,  69,  85, 101, 117, 133, 149, 165, 181, 197, 213, 229, 245,
+      6,  22,  38,  54,  70,  86, 102, 118, 134, 150, 166, 182, 198, 214, 230, 246,
+      7,  23,  39,  55,  71,  87, 103, 119, 135, 151, 167, 183, 199, 215, 231, 247,
+      8,  24,  40,  56,  72,  88, 104, 120, 136, 152, 168, 184, 200, 216, 232, 248,
+      9,  25,  41,  57,  73,  89, 105, 121, 137, 153, 169, 185, 201, 217, 233, 249,
+     10,  26,  42,  58,  74,  90, 106, 122, 138, 154, 170, 186, 202, 218, 234, 250,
+     11,  27,  43,  59,  75,  91, 107, 123, 139, 155, 171, 187, 203, 219, 235, 251,
+     12,  28,  44,  60,  76,  92, 108, 124, 140, 156, 172, 188, 204, 220, 236, 252,
+     13,  29,  45,  61,  77,  93, 109, 125, 141, 157, 173, 189, 205, 221, 237, 253,
+     14,  30,  46,  62,  78,  94, 110, 126, 142, 158, 174, 190, 206, 222, 238, 254,
+     15,  31,  47,  63,  79,  95, 111, 127, 143, 159, 175, 191, 207, 223, 239, 255,
+};
+static const int16_t av1_mcol_scan_16x16[] __attribute__((aligned(32))) = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
+     32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
+     48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
+     64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
+     80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+     96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+    112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+    128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+    144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+    160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+    176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+    192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+    208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+    224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+    240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+};
+static const int16_t av1_default_scan_16x32[] __attribute__((aligned(32))) = {
+      0,  32,   1,  64,  33,   2,  96,  65,  34,   3, 128,  97,  66,  35,   4, 160,
+    129,  98,  67,  36,   5, 192, 161, 130,  99,  68,  37,   6, 224, 193, 162, 131,
+    100,  69,  38,   7, 256, 225, 194, 163, 132, 101,  70,  39,   8, 288, 257, 226,
+    195, 164, 133, 102,  71,  40,   9, 320, 289, 258, 227, 196, 165, 134, 103,  72,
+     41,  10, 352, 321, 290, 259, 228, 197, 166, 135, 104,  73,  42,  11, 384, 353,
+    322, 291, 260, 229, 198, 167, 136, 105,  74,  43,  12, 416, 385, 354, 323, 292,
+    261, 230, 199, 168, 137, 106,  75,  44,  13, 448, 417, 386, 355, 324, 293, 262,
+    231, 200, 169, 138, 107,  76,  45,  14, 480, 449, 418, 387, 356, 325, 294, 263,
+    232, 201, 170, 139, 108,  77,  46,  15, 481, 450, 419, 388, 357, 326, 295, 264,
+    233, 202, 171, 140, 109,  78,  47,  16, 482, 451, 420, 389, 358, 327, 296, 265,
+    234, 203, 172, 141, 110,  79,  48,  17, 483, 452, 421, 390, 359, 328, 297, 266,
+    235, 204, 173, 142, 111,  80,  49,  18, 484, 453, 422, 391, 360, 329, 298, 267,
+    236, 205, 174, 143, 112,  81,  50,  19, 485, 454, 423, 392, 361, 330, 299, 268,
+    237, 206, 175, 144, 113,  82,  51,  20, 486, 455, 424, 393, 362, 331, 300, 269,
+    238, 207, 176, 145, 114,  83,  52,  21, 487, 456, 425, 394, 363, 332, 301, 270,
+    239, 208, 177, 146, 115,  84,  53,  22, 488, 457, 426, 395, 364, 333, 302, 271,
+    240, 209, 178, 147, 116,  85,  54,  23, 489, 458, 427, 396, 365, 334, 303, 272,
+    241, 210, 179, 148, 117,  86,  55,  24, 490, 459, 428, 397, 366, 335, 304, 273,
+    242, 211, 180, 149, 118,  87,  56,  25, 491, 460, 429, 398, 367, 336, 305, 274,
+    243, 212, 181, 150, 119,  88,  57,  26, 492, 461, 430, 399, 368, 337, 306, 275,
+    244, 213, 182, 151, 120,  89,  58,  27, 493, 462, 431, 400, 369, 338, 307, 276,
+    245, 214, 183, 152, 121,  90,  59,  28, 494, 463, 432, 401, 370, 339, 308, 277,
+    246, 215, 184, 153, 122,  91,  60,  29, 495, 464, 433, 402, 371, 340, 309, 278,
+    247, 216, 185, 154, 123,  92,  61,  30, 496, 465, 434, 403, 372, 341, 310, 279,
+    248, 217, 186, 155, 124,  93,  62,  31, 497, 466, 435, 404, 373, 342, 311, 280,
+    249, 218, 187, 156, 125,  94,  63, 498, 467, 436, 405, 374, 343, 312, 281, 250,
+    219, 188, 157, 126,  95, 499, 468, 437, 406, 375, 344, 313, 282, 251, 220, 189,
+    158, 127, 500, 469, 438, 407, 376, 345, 314, 283, 252, 221, 190, 159, 501, 470,
+    439, 408, 377, 346, 315, 284, 253, 222, 191, 502, 471, 440, 409, 378, 347, 316,
+    285, 254, 223, 503, 472, 441, 410, 379, 348, 317, 286, 255, 504, 473, 442, 411,
+    380, 349, 318, 287, 505, 474, 443, 412, 381, 350, 319, 506, 475, 444, 413, 382,
+    351, 507, 476, 445, 414, 383, 508, 477, 446, 415, 509, 478, 447, 510, 479, 511,
+};
+static const int16_t av1_default_scan_32x8[] __attribute__((aligned(32))) = {
+      0,   1,   8,   2,   9,  16,   3,  10,  17,  24,   4,  11,  18,  25,  32,   5,  12,  19,  26,  33,  40,   6,  13,  20,  27,  34,  41,  48,   7,  14,  21,  28,
+     35,  42,  49,  56,  15,  22,  29,  36,  43,  50,  57,  64,  23,  30,  37,  44,  51,  58,  65,  72,  31,  38,  45,  52,  59,  66,  73,  80,  39,  46,  53,  60,
+     67,  74,  81,  88,  47,  54,  61,  68,  75,  82,  89,  96,  55,  62,  69,  76,  83,  90,  97, 104,  63,  70,  77,  84,  91,  98, 105, 112,  71,  78,  85,  92,
+     99, 106, 113, 120,  79,  86,  93, 100, 107, 114, 121, 128,  87,  94, 101, 108, 115, 122, 129, 136,  95, 102, 109, 116, 123, 130, 137, 144, 103, 110, 117, 124,
+    131, 138, 145, 152, 111, 118, 125, 132, 139, 146, 153, 160, 119, 126, 133, 140, 147, 154, 161, 168, 127, 134, 141, 148, 155, 162, 169, 176, 135, 142, 149, 156,
+    163, 170, 177, 184, 143, 150, 157, 164, 171, 178, 185, 192, 151, 158, 165, 172, 179, 186, 193, 200, 159, 166, 173, 180, 187, 194, 201, 208, 167, 174, 181, 188,
+    195, 202, 209, 216, 175, 182, 189, 196, 203, 210, 217, 224, 183, 190, 197, 204, 211, 218, 225, 232, 191, 198, 205, 212, 219, 226, 233, 240, 199, 206, 213, 220,
+    227, 234, 241, 248, 207, 214, 221, 228, 235, 242, 249, 215, 222, 229, 236, 243, 250, 223, 230, 237, 244, 251, 231, 238, 245, 252, 239, 246, 253, 247, 254, 255,
+};
+static const int16_t av1_default_scan_32x16[] __attribute__((aligned(32))) = {
+      0,   1,  16,   2,  17,  32,   3,  18,  33,  48,   4,  19,  34,  49,  64,   5,  20,  35,  50,  65,  80,   6,  21,  36,  51,  66,  81,  96,   7,  22,  37,  52,
+     67,  82,  97, 112,   8,  23,  38,  53,  68,  83,  98, 113, 128,   9,  24,  39,  54,  69,  84,  99, 114, 129, 144,  10,  25,  40,  55,  70,  85, 100, 115, 130,
+    145, 160,  11,  26,  41,  56,  71,  86, 101, 116, 131, 146, 161, 176,  12,  27,  42,  57,  72,  87, 102, 117, 132, 147, 162, 177, 192,  13,  28,  43,  58,  73,
+     88, 103, 118, 133, 148, 163, 178, 193, 208,  14,  29,  44,  59,  74,  89, 104, 119, 134, 149, 164, 179, 194, 209, 224,  15,  30,  45,  60,  75,  90, 105, 120,
+    135, 150, 165, 180, 195, 210, 225, 240,  31,  46,  61,  76,  91, 106, 121, 136, 151, 166, 181, 196, 211, 226, 241, 256,  47,  62,  77,  92, 107, 122, 137, 152,
+    167, 182, 197, 212, 227, 242, 257, 272,  63,  78,  93, 108, 123, 138, 153, 168, 183, 198, 213, 228, 243, 258, 273, 288,  79,  94, 109, 124, 139, 154, 169, 184,
+    199, 214, 229, 244, 259, 274, 289, 304,  95, 110, 125, 140, 155, 170, 185, 200, 215, 230, 245, 260, 275, 290, 305, 320, 111, 126, 141, 156, 171, 186, 201, 216,
+    231, 246, 261, 276, 291, 306, 321, 336, 127, 142, 157, 172, 187, 202, 217, 232, 247, 262, 277, 292, 307, 322, 337, 352, 143, 158, 173, 188, 203, 218, 233, 248,
+    263, 278, 293, 308, 323, 338, 353, 368, 159, 174, 189, 204, 219, 234, 249, 264, 279, 294, 309, 324, 339, 354, 369, 384, 175, 190, 205, 220, 235, 250, 265, 280,
+    295, 310, 325, 340, 355, 370, 385, 400, 191, 206, 221, 236, 251, 266, 281, 296, 311, 326, 341, 356, 371, 386, 401, 416, 207, 222, 237, 252, 267, 282, 297, 312,
+    327, 342, 357, 372, 387, 402, 417, 432, 223, 238, 253, 268, 283, 298, 313, 328, 343, 358, 373, 388, 403, 418, 433, 448, 239, 254, 269, 284, 299, 314, 329, 344,
+    359, 374, 389, 404, 419, 434, 449, 464, 255, 270, 285, 300, 315, 330, 345, 360, 375, 390, 405, 420, 435, 450, 465, 480, 271, 286, 301, 316, 331, 346, 361, 376,
+    391, 406, 421, 436, 451, 466, 481, 496, 287, 302, 317, 332, 347, 362, 377, 392, 407, 422, 437, 452, 467, 482, 497, 303, 318, 333, 348, 363, 378, 393, 408, 423,
+    438, 453, 468, 483, 498, 319, 334, 349, 364, 379, 394, 409, 424, 439, 454, 469, 484, 499, 335, 350, 365, 380, 395, 410, 425, 440, 455, 470, 485, 500, 351, 366,
+    381, 396, 411, 426, 441, 456, 471, 486, 501, 367, 382, 397, 412, 427, 442, 457, 472, 487, 502, 383, 398, 413, 428, 443, 458, 473, 488, 503, 399, 414, 429, 444,
+    459, 474, 489, 504, 415, 430, 445, 460, 475, 490, 505, 431, 446, 461, 476, 491, 506, 447, 462, 477, 492, 507, 463, 478, 493, 508, 479, 494, 509, 495, 510, 511,
+};
+static const int16_t av1_default_scan_32x32[] __attribute__((aligned(32))) = {
+       0,   32,    1,    2,   33,   64,   96,   65,   34,    3,    4,   35,   66,   97,  128,  160,  129,   98,   67,   36,    5,    6,   37,   68,   99,  130,  161,  192,  224,  193,  162,  131,
+     100,   69,   38,    7,    8,   39,   70,  101,  132,  163,  194,  225,  256,  288,  257,  226,  195,  164,  133,  102,   71,   40,    9,   10,   41,   72,  103,  134,  165,  196,  227,  258,
+     289,  320,  352,  321,  290,  259,  228,  197,  166,  135,  104,   73,   42,   11,   12,   43,   74,  105,  136,  167,  198,  229,  260,  291,  322,  353,  384,  416,  385,  354,  323,  292,
+     261,  230,  199,  168,  137,  106,   75,   44,   13,   14,   45,   76,  107,  138,  169,  200,  231,  262,  293,  324,  355,  386,  417,  448,  480,  449,  418,  387,  356,  325,  294,  263,
+     232,  201,  170,  139,  108,   77,   46,   15,   16,   47,   78,  109,  140,  171,  202,  233,  264,  295,  326,  357,  388,  419,  450,  481,  512,  544,  513,  482,  451,  420,  389,  358,
+     327,  296,  265,  234,  203,  172,  141,  110,   79,   48,   17,   18,   49,   80,  111,  142,  173,  204,  235,  266,  297,  328,  359,  390,  421,  452,  483,  514,  545,  576,  608,  577,
+     546,  515,  484,  453,  422,  391,  360,  329,  298,  267,  236,  205,  174,  143,  112,   81,   50,   19,   20,   51,   82,  113,  144,  175,  206,  237,  268,  299,  330,  361,  392,  423,
+     454,  485,  516,  547,  578,  609,  640,  672,  641,  610,  579,  548,  517,  486,  455,  424,  393,  362,  331,  300,  269,  238,  207,  176,  145,  114,   83,   52,   21,   22,   53,   84,
+     115,  146,  177,  208,  239,  270,  301,  332,  363,  394,  425,  456,  487,  518,  549,  580,  611,  642,  673,  704,  736,  705,  674,  643,  612,  581,  550,  519,  488,  457,  426,  395,
+     364,  333,  302,  271,  240,  209,  178,  147,  116,   85,   54,   23,   24,   55,   86,  117,  148,  179,  210,  241,  272,  303,  334,  365,  396,  427,  458,  489,  520,  551,  582,  613,
+     644,  675,  706,  737,  768,  800,  769,  738,  707,  676,  645,  614,  583,  552,  521,  490,  459,  428,  397,  366,  335,  304,  273,  242,  211,  180,  149,  118,   87,   56,   25,   26,
+      57,   88,  119,  150,  181,  212,  243,  274,  305,  336,  367,  398,  429,  460,  491,  522,  553,  584,  615,  646,  677,  708,  739,  770,  801,  832,  864,  833,  802,  771,  740,  709,
+     678,  647,  616,  585,  554,  523,  492,  461,  430,  399,  368,  337,  306,  275,  244,  213,  182,  151,  120,   89,   58,   27,   28,   59,   90,  121,  152,  183,  214,  245,  276,  307,
+     338,  369,  400,  431,  462,  493,  524,  555,  586,  617,  648,  679,  710,  741,  772,  803,  834,  865,  896,  928,  897,  866,  835,  804,  773,  742,  711,  680,  649,  618,  587,  556,
+     525,  494,  463,  432,  401,  370,  339,  308,  277,  246,  215,  184,  153,  122,   91,   60,   29,   30,   61,   92,  123,  154,  185,  216,  247,  278,  309,  340,  371,  402,  433,  464,
+     495,  526,  557,  588,  619,  650,  681,  712,  743,  774,  805,  836,  867,  898,  929,  960,  992,  961,  930,  899,  868,  837,  806,  775,  744,  713,  682,  651,  620,  589,  558,  527,
+     496,  465,  434,  403,  372,  341,  310,  279,  248,  217,  186,  155,  124,   93,   62,   31,   63,   94,  125,  156,  187,  218,  249,  280,  311,  342,  373,  404,  435,  466,  497,  528,
+     559,  590,  621,  652,  683,  714,  745,  776,  807,  838,  869,  900,  931,  962,  993,  994,  963,  932,  901,  870,  839,  808,  777,  746,  715,  684,  653,  622,  591,  560,  529,  498,
+     467,  436,  405,  374,  343,  312,  281,  250,  219,  188,  157,  126,   95,  127,  158,  189,  220,  251,  282,  313,  344,  375,  406,  437,  468,  499,  530,  561,  592,  623,  654,  685,
+     716,  747,  778,  809,  840,  871,  902,  933,  964,  995,  996,  965,  934,  903,  872,  841,  810,  779,  748,  717,  686,  655,  624,  593,  562,  531,  500,  469,  438,  407,  376,  345,
+     314,  283,  252,  221,  190,  159,  191,  222,  253,  284,  315,  346,  377,  408,  439,  470,  501,  532,  563,  594,  625,  656,  687,  718,  749,  780,  811,  842,  873,  904,  935,  966,
+     997,  998,  967,  936,  905,  874,  843,  812,  781,  750,  719,  688,  657,  626,  595,  564,  533,  502,  471,  440,  409,  378,  347,  316,  285,  254,  223,  255,  286,  317,  348,  379,
+     410,  441,  472,  503,  534,  565,  596,  627,  658,  689,  720,  751,  782,  813,  844,  875,  906,  937,  968,  999, 1000,  969,  938,  907,  876,  845,  814,  783,  752,  721,  690,  659,
+     628,  597,  566,  535,  504,  473,  442,  411,  380,  349,  318,  287,  319,  350,  381,  412,  443,  474,  505,  536,  567,  598,  629,  660,  691,  722,  753,  784,  815,  846,  877,  908,
+     939,  970, 1001, 1002,  971,  940,  909,  878,  847,  816,  785,  754,  723,  692,  661,  630,  599,  568,  537,  506,  475,  444,  413,  382,  351,  383,  414,  445,  476,  507,  538,  569,
+     600,  631,  662,  693,  724,  755,  786,  817,  848,  879,  910,  941,  972, 1003, 1004,  973,  942,  911,  880,  849,  818,  787,  756,  725,  694,  663,  632,  601,  570,  539,  508,  477,
+     446,  415,  447,  478,  509,  540,  571,  602,  633,  664,  695,  726,  757,  788,  819,  850,  881,  912,  943,  974, 1005, 1006,  975,  944,  913,  882,  851,  820,  789,  758,  727,  696,
+     665,  634,  603,  572,  541,  510,  479,  511,  542,  573,  604,  635,  666,  697,  728,  759,  790,  821,  852,  883,  914,  945,  976, 1007, 1008,  977,  946,  915,  884,  853,  822,  791,
+     760,  729,  698,  667,  636,  605,  574,  543,  575,  606,  637,  668,  699,  730,  761,  792,  823,  854,  885,  916,  947,  978, 1009, 1010,  979,  948,  917,  886,  855,  824,  793,  762,
+     731,  700,  669,  638,  607,  639,  670,  701,  732,  763,  794,  825,  856,  887,  918,  949,  980, 1011, 1012,  981,  950,  919,  888,  857,  826,  795,  764,  733,  702,  671,  703,  734,
+     765,  796,  827,  858,  889,  920,  951,  982, 1013, 1014,  983,  952,  921,  890,  859,  828,  797,  766,  735,  767,  798,  829,  860,  891,  922,  953,  984, 1015, 1016,  985,  954,  923,
+     892,  861,  830,  799,  831,  862,  893,  924,  955,  986, 1017, 1018,  987,  956,  925,  894,  863,  895,  926,  957,  988, 1019, 1020,  989,  958,  927,  959,  990, 1021, 1022,  991, 1023,
+};
+
+const int16_t *const av1_scans[N_RECT_TX_SIZES][3] = {
+    [TX_4X4] = {
+        [TX_CLASS_2D] = av1_default_scan_4x4,
+        [TX_CLASS_V]  = av1_mrow_scan_4x4,
+        [TX_CLASS_H]  = av1_mcol_scan_4x4,
+    }, [TX_8X8] = {
+        [TX_CLASS_2D] = av1_default_scan_8x8,
+        [TX_CLASS_V]  = av1_mrow_scan_8x8,
+        [TX_CLASS_H]  = av1_mcol_scan_8x8,
+    }, [TX_16X16] = {
+        [TX_CLASS_2D] = av1_default_scan_16x16,
+        [TX_CLASS_V]  = av1_mrow_scan_16x16,
+        [TX_CLASS_H]  = av1_mcol_scan_16x16,
+    }, [TX_32X32] = {
+        [TX_CLASS_2D] = av1_default_scan_32x32,
+    }, [TX_64X64] = {
+        [TX_CLASS_2D] = av1_default_scan_32x32,
+    }, [RTX_4X8] = {
+        [TX_CLASS_2D] = av1_default_scan_4x8,
+        [TX_CLASS_V]  = av1_mrow_scan_4x8,
+        [TX_CLASS_H]  = av1_mcol_scan_4x8,
+    }, [RTX_8X4] = {
+        [TX_CLASS_2D] = av1_default_scan_8x4,
+        [TX_CLASS_V]  = av1_mrow_scan_8x4,
+        [TX_CLASS_H]  = av1_mcol_scan_8x4,
+    }, [RTX_8X16] = {
+        [TX_CLASS_2D] = av1_default_scan_8x16,
+        [TX_CLASS_V]  = av1_mrow_scan_8x16,
+        [TX_CLASS_H]  = av1_mcol_scan_8x16,
+    }, [RTX_16X8] = {
+        [TX_CLASS_2D] = av1_default_scan_16x8,
+        [TX_CLASS_V]  = av1_mrow_scan_16x8,
+        [TX_CLASS_H]  = av1_mcol_scan_16x8,
+    }, [RTX_16X32] = {
+        [TX_CLASS_2D] = av1_default_scan_16x32,
+    }, [RTX_32X16] = {
+        [TX_CLASS_2D] = av1_default_scan_32x16,
+    }, [RTX_32X64] = {
+        [TX_CLASS_2D] = av1_default_scan_32x32,
+    }, [RTX_64X32] = {
+        [TX_CLASS_2D] = av1_default_scan_32x32,
+    }, [RTX_4X16] = {
+        [TX_CLASS_2D] = av1_default_scan_4x16,
+        [TX_CLASS_V]  = av1_mrow_scan_4x16,
+        [TX_CLASS_H]  = av1_mcol_scan_4x16,
+    }, [RTX_16X4] = {
+        [TX_CLASS_2D] = av1_default_scan_16x4,
+        [TX_CLASS_V]  = av1_mrow_scan_16x4,
+        [TX_CLASS_H]  = av1_mcol_scan_16x4,
+    }, [RTX_8X32] = {
+        [TX_CLASS_2D] = av1_default_scan_8x32,
+    }, [RTX_32X8] = {
+        [TX_CLASS_2D] = av1_default_scan_32x8,
+    }, [RTX_16X64] = {
+        [TX_CLASS_2D] = av1_default_scan_16x32,
+    }, [RTX_64X16] = {
+        [TX_CLASS_2D] = av1_default_scan_32x16,
+    },
+};
--- /dev/null
+++ b/src/scan.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SCAN_H__
+#define __DAV1D_SCAN_H__
+
+#include <stdint.h>
+
+#include "src/levels.h"
+
+extern const int16_t *const av1_scans[N_RECT_TX_SIZES][3];
+
+#endif /* __DAV1D_SCAN_H__ */
--- /dev/null
+++ b/src/tables.c
@@ -1,0 +1,756 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "src/levels.h"
+#include "src/tables.h"
+
+const uint8_t av1_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS] = {
+    {
+        // partitions:
+        // none,  h,    v, split,  tts,  tbs,  tls,  trs,   h4,   v4
+        { 0x00, 0x00, 0x10,   -1, 0x00, 0x10, 0x10, 0x10,   -1,   -1 }, // bl128
+        { 0x10, 0x10, 0x18,   -1, 0x10, 0x18, 0x18, 0x18, 0x10, 0x1c }, // bl64
+        { 0x18, 0x18, 0x1c,   -1, 0x18, 0x1c, 0x1c, 0x1c, 0x18, 0x1e }, // bl32
+        { 0x1c, 0x1c, 0x1e,   -1, 0x1c, 0x1e, 0x1e, 0x1e, 0x1c, 0x1f }, // bl16
+        { 0x1e, 0x1e, 0x1f, 0x1f,   -1,   -1,   -1,   -1,   -1,   -1 }, // bl8
+    }, {
+        { 0x00, 0x10, 0x00,   -1, 0x10, 0x10, 0x00, 0x10,   -1,   -1 }, // bl128
+        { 0x10, 0x18, 0x10,   -1, 0x18, 0x18, 0x10, 0x18, 0x1c, 0x10 }, // bl64
+        { 0x18, 0x1c, 0x18,   -1, 0x1c, 0x1c, 0x18, 0x1c, 0x1e, 0x18 }, // bl32
+        { 0x1c, 0x1e, 0x1c,   -1, 0x1e, 0x1e, 0x1c, 0x1e, 0x1f, 0x1c }, // bl16
+        { 0x1e, 0x1f, 0x1e, 0x1f,   -1,   -1,   -1,   -1,   -1,   -1 }, // bl8
+    }
+};
+
+const uint8_t /* enum BlockSize */
+    av1_block_sizes[N_BL_LEVELS][N_PARTITIONS][2] =
+{
+    [BL_128X128] = {
+        [PARTITION_NONE]           = { BS_128x128 },
+        [PARTITION_H]              = { BS_128x64 },
+        [PARTITION_V]              = { BS_64x128 },
+        [PARTITION_T_TOP_SPLIT]    = { BS_64x64, BS_128x64 },
+        [PARTITION_T_BOTTOM_SPLIT] = { BS_128x64, BS_64x64 },
+        [PARTITION_T_LEFT_SPLIT]   = { BS_64x64, BS_64x128 },
+        [PARTITION_T_RIGHT_SPLIT]  = { BS_64x128, BS_64x64 },
+    }, [BL_64X64] = {
+        [PARTITION_NONE]           = { BS_64x64 },
+        [PARTITION_H]              = { BS_64x32 },
+        [PARTITION_V]              = { BS_32x64 },
+        [PARTITION_T_TOP_SPLIT]    = { BS_32x32, BS_64x32 },
+        [PARTITION_T_BOTTOM_SPLIT] = { BS_64x32, BS_32x32 },
+        [PARTITION_T_LEFT_SPLIT]   = { BS_32x32, BS_32x64 },
+        [PARTITION_T_RIGHT_SPLIT]  = { BS_32x64, BS_32x32 },
+        [PARTITION_H4]             = { BS_64x16 },
+        [PARTITION_V4]             = { BS_16x64 },
+    }, [BL_32X32] = {
+        [PARTITION_NONE]           = { BS_32x32 },
+        [PARTITION_H]              = { BS_32x16 },
+        [PARTITION_V]              = { BS_16x32 },
+        [PARTITION_T_TOP_SPLIT]    = { BS_16x16, BS_32x16 },
+        [PARTITION_T_BOTTOM_SPLIT] = { BS_32x16, BS_16x16 },
+        [PARTITION_T_LEFT_SPLIT]   = { BS_16x16, BS_16x32 },
+        [PARTITION_T_RIGHT_SPLIT]  = { BS_16x32, BS_16x16 },
+        [PARTITION_H4]             = { BS_32x8  },
+        [PARTITION_V4]             = { BS_8x32  },
+    }, [BL_16X16] = {
+        [PARTITION_NONE]           = { BS_16x16 },
+        [PARTITION_H]              = { BS_16x8  },
+        [PARTITION_V]              = { BS_8x16  },
+        [PARTITION_T_TOP_SPLIT]    = { BS_8x8,   BS_16x8  },
+        [PARTITION_T_BOTTOM_SPLIT] = { BS_16x8,  BS_8x8   },
+        [PARTITION_T_LEFT_SPLIT]   = { BS_8x8,   BS_8x16  },
+        [PARTITION_T_RIGHT_SPLIT]  = { BS_8x16,  BS_8x8   },
+        [PARTITION_H4]             = { BS_16x4  },
+        [PARTITION_V4]             = { BS_4x16  },
+    }, [BL_8X8] = {
+        [PARTITION_NONE]           = { BS_8x8   },
+        [PARTITION_H]              = { BS_8x4   },
+        [PARTITION_V]              = { BS_4x8   },
+        [PARTITION_SPLIT]          = { BS_4x4   },
+    }
+};
+
+const uint8_t av1_block_dimensions[N_BS_SIZES][4] = {
+    [BS_128x128] = { 32, 32, 5, 5 },
+    [BS_128x64]  = { 32, 16, 5, 4 },
+    [BS_64x128]  = { 16, 32, 4, 5 },
+    [BS_64x64]   = { 16, 16, 4, 4 },
+    [BS_64x32]   = { 16,  8, 4, 3 },
+    [BS_64x16]   = { 16,  4, 4, 2 },
+    [BS_32x64]   = {  8, 16, 3, 4 },
+    [BS_32x32]   = {  8,  8, 3, 3 },
+    [BS_32x16]   = {  8,  4, 3, 2 },
+    [BS_32x8]    = {  8,  2, 3, 1 },
+    [BS_16x64]   = {  4, 16, 2, 4 },
+    [BS_16x32]   = {  4,  8, 2, 3 },
+    [BS_16x16]   = {  4,  4, 2, 2 },
+    [BS_16x8]    = {  4,  2, 2, 1 },
+    [BS_16x4]    = {  4,  1, 2, 0 },
+    [BS_8x32]    = {  2,  8, 1, 3 },
+    [BS_8x16]    = {  2,  4, 1, 2 },
+    [BS_8x8]     = {  2,  2, 1, 1 },
+    [BS_8x4]     = {  2,  1, 1, 0 },
+    [BS_4x16]    = {  1,  4, 0, 2 },
+    [BS_4x8]     = {  1,  2, 0, 1 },
+    [BS_4x4]     = {  1,  1, 0, 0 },
+};
+
+const TxfmInfo av1_txfm_dimensions[N_RECT_TX_SIZES] = {
+    [ TX_4X4]   = { .w = 1, .h = 1, .lw = 0, .lh = 0,
+                    .min = 0, .max = 0, .ctx = 0 },
+    [ TX_8X8]   = { .w = 2, .h = 2, .lw = 1, .lh = 1,
+                    .min = 1, .max = 1, .sub = TX_4X4, .ctx = 1 },
+    [ TX_16X16] = { .w = 4, .h = 4, .lw = 2, .lh = 2,
+                    .min = 2, .max = 2, .sub = TX_8X8, .ctx = 2 },
+    [ TX_32X32] = { .w = 8, .h = 8, .lw = 3, .lh = 3,
+                    .min = 3, .max = 3, .sub = TX_16X16, .ctx = 3 },
+    [ TX_64X64] = { .w = 16, .h = 16, .lw = 4, .lh = 4,
+                    .min = 4, .max = 4, .sub = TX_32X32, .ctx = 4 },
+    [RTX_4X8]   = { .w = 1, .h = 2, .lw = 0, .lh = 1,
+                    .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 },
+    [RTX_8X4]   = { .w = 2, .h = 1, .lw = 1, .lh = 0,
+                    .min = 0, .max = 1, .sub = TX_4X4, .ctx = 1 },
+    [RTX_8X16]  = { .w = 2, .h = 4, .lw = 1, .lh = 2,
+                    .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 },
+    [RTX_16X8]  = { .w = 4, .h = 2, .lw = 2, .lh = 1,
+                    .min = 1, .max = 2, .sub = TX_8X8, .ctx = 2 },
+    [RTX_16X32] = { .w = 4, .h = 8, .lw = 2, .lh = 3,
+                    .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 },
+    [RTX_32X16] = { .w = 8, .h = 4, .lw = 3, .lh = 2,
+                    .min = 2, .max = 3, .sub = TX_16X16, .ctx = 3 },
+    [RTX_32X64] = { .w = 8, .h = 16, .lw = 3, .lh = 4,
+                    .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 },
+    [RTX_64X32] = { .w = 16, .h = 8, .lw = 4, .lh = 3,
+                    .min = 3, .max = 4, .sub = TX_32X32, .ctx = 4 },
+    [RTX_4X16]  = { .w = 1, .h = 4, .lw = 0, .lh = 2,
+                    .min = 0, .max = 2, .sub = RTX_4X8, .ctx = 1 },
+    [RTX_16X4]  = { .w = 4, .h = 1, .lw = 2, .lh = 0,
+                    .min = 0, .max = 2, .sub = RTX_8X4, .ctx = 1 },
+    [RTX_8X32]  = { .w = 2, .h = 8, .lw = 1, .lh = 3,
+                    .min = 1, .max = 3, .sub = RTX_8X16, .ctx = 2 },
+    [RTX_32X8]  = { .w = 8, .h = 2, .lw = 3, .lh = 1,
+                    .min = 1, .max = 3, .sub = RTX_16X8, .ctx = 2 },
+    [RTX_16X64] = { .w = 4, .h = 16, .lw = 2, .lh = 4,
+                    .min = 2, .max = 4, .sub = RTX_16X32, .ctx = 3 },
+    [RTX_64X16] = { .w = 16, .h = 4, .lw = 4, .lh = 2,
+                    .min = 2, .max = 4, .sub = RTX_32X16, .ctx = 3 },
+};
+
+const uint8_t /* enum (Rect)TxfmSize */
+    av1_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */] =
+{
+    [BS_128x128] = {  TX_64X64,  TX_32X32,  TX_32X32,  TX_32X32 },
+    [BS_128x64]  = {  TX_64X64,  TX_32X32,  TX_32X32,  TX_32X32 },
+    [BS_64x128]  = {  TX_64X64,  TX_32X32,       0,    TX_32X32 },
+    [BS_64x64]   = {  TX_64X64,  TX_32X32,  TX_32X32,  TX_32X32 },
+    [BS_64x32]   = { RTX_64X32, RTX_32X16,  TX_32X32,  TX_32X32 },
+    [BS_64x16]   = { RTX_64X16, RTX_32X8,  RTX_32X16, RTX_32X16 },
+    [BS_32x64]   = { RTX_32X64, RTX_16X32,       0,    TX_32X32 },
+    [BS_32x32]   = {  TX_32X32,  TX_16X16, RTX_16X32,  TX_32X32 },
+    [BS_32x16]   = { RTX_32X16, RTX_16X8,   TX_16X16, RTX_32X16 },
+    [BS_32x8]    = { RTX_32X8,  RTX_16X4,  RTX_16X8,  RTX_32X8  },
+    [BS_16x64]   = { RTX_16X64, RTX_8X32,        0,   RTX_16X32 },
+    [BS_16x32]   = { RTX_16X32, RTX_8X16,        0,   RTX_16X32 },
+    [BS_16x16]   = {  TX_16X16,  TX_8X8,   RTX_8X16,   TX_16X16 },
+    [BS_16x8]    = { RTX_16X8,  RTX_8X4,    TX_8X8,   RTX_16X8  },
+    [BS_16x4]    = { RTX_16X4,  RTX_8X4,   RTX_8X4,   RTX_16X4  },
+    [BS_8x32]    = { RTX_8X32,  RTX_4X16,       0,    RTX_8X32  },
+    [BS_8x16]    = { RTX_8X16,  RTX_4X8,        0,    RTX_8X16  },
+    [BS_8x8]     = {  TX_8X8,    TX_4X4,   RTX_4X8,    TX_8X8   },
+    [BS_8x4]     = { RTX_8X4,    TX_4X4,    TX_4X4,   RTX_8X4   },
+    [BS_4x16]    = { RTX_4X16,  RTX_4X8,        0,    RTX_4X16  },
+    [BS_4x8]     = { RTX_4X8,    TX_4X4,        0,    RTX_4X8   },
+    [BS_4x4]     = {  TX_4X4,    TX_4X4,    TX_4X4,    TX_4X4   },
+};
+
+const uint8_t /* enum TxfmType */
+    av1_txtp_from_uvmode[N_UV_INTRA_PRED_MODES] =
+{
+    [DC_PRED]              = DCT_DCT,
+    [VERT_PRED]            = ADST_DCT,
+    [HOR_PRED]             = DCT_ADST,
+    [DIAG_DOWN_LEFT_PRED]  = DCT_DCT,
+    [DIAG_DOWN_RIGHT_PRED] = ADST_ADST,
+    [VERT_RIGHT_PRED]      = ADST_DCT,
+    [HOR_DOWN_PRED]        = DCT_ADST,
+    [HOR_UP_PRED]          = DCT_ADST,
+    [VERT_LEFT_PRED]       = ADST_DCT,
+    [SMOOTH_PRED]          = ADST_ADST,
+    [SMOOTH_V_PRED]        = ADST_DCT,
+    [SMOOTH_H_PRED]        = DCT_ADST,
+    [PAETH_PRED]           = ADST_ADST,
+};
+
+const uint8_t /* enum InterPredMode */
+    av1_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2] =
+{
+    [NEARESTMV_NEARESTMV] = { NEARESTMV, NEARESTMV },
+    [NEARMV_NEARMV]       = { NEARMV,    NEARMV    },
+    [NEWMV_NEWMV]         = { NEWMV,     NEWMV     },
+    [GLOBALMV_GLOBALMV]   = { GLOBALMV,  GLOBALMV  },
+    [NEWMV_NEARESTMV]     = { NEWMV,     NEARESTMV },
+    [NEWMV_NEARMV]        = { NEWMV,     NEARMV    },
+    [NEARESTMV_NEWMV]     = { NEARESTMV, NEWMV     },
+    [NEARMV_NEWMV]        = { NEARMV,    NEWMV     },
+};
+
+const uint8_t av1_tx_type_count[N_TXTP_SETS] = {
+    [TXTP_SET_DCT] = 1,
+    [TXTP_SET_DCT_ID] = 2,
+    [TXTP_SET_DT4_ID] = 5,
+    [TXTP_SET_DT4_ID_1D] = 7,
+    [TXTP_SET_DT9_ID_1D] = 12,
+    [TXTP_SET_ALL] = 16,
+    [TXTP_SET_LOSSLESS] = 1,
+};
+
+const uint8_t /* enum TxfmType */
+              av1_tx_types_per_set[N_TXTP_SETS][N_TX_TYPES] =
+{
+    [TXTP_SET_DCT]       = { DCT_DCT },
+    [TXTP_SET_DCT_ID]    = { IDTX, DCT_DCT },
+    [TXTP_SET_DT4_ID]    = { IDTX, DCT_DCT, ADST_ADST, ADST_DCT, DCT_ADST },
+    [TXTP_SET_DT4_ID_1D] = { IDTX, DCT_DCT, V_DCT, H_DCT, ADST_ADST, ADST_DCT,
+                             DCT_ADST },
+    [TXTP_SET_DT9_ID_1D] = { IDTX, V_DCT, H_DCT, DCT_DCT, ADST_DCT, DCT_ADST,
+                             FLIPADST_DCT, DCT_FLIPADST, ADST_ADST,
+                             FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST },
+    [TXTP_SET_ALL]       = { IDTX, V_DCT, H_DCT, V_ADST, H_ADST, V_FLIPADST,
+                             H_FLIPADST, DCT_DCT, ADST_DCT, DCT_ADST,
+                             FLIPADST_DCT, DCT_FLIPADST, ADST_ADST,
+                             FLIPADST_FLIPADST, ADST_FLIPADST, FLIPADST_ADST },
+    [TXTP_SET_LOSSLESS]  = { WHT_WHT },
+};
+
+const uint8_t av1_tx_type_set_index[2][N_TXTP_SETS] = {
+    { 0, -1,  2,  1, -1, -1, 3 },
+    { 0,  3, -1, -1,  2,  1, 4 },
+};
+
+const uint8_t av1_ymode_size_context[N_BS_SIZES] = {
+    [BS_128x128] = 3,
+    [BS_128x64]  = 3,
+    [BS_64x128]  = 3,
+    [BS_64x64]   = 3,
+    [BS_64x32]   = 3,
+    [BS_64x16]   = 2,
+    [BS_32x64]   = 3,
+    [BS_32x32]   = 3,
+    [BS_32x16]   = 2,
+    [BS_32x8 ]   = 1,
+    [BS_16x64]   = 2,
+    [BS_16x32]   = 2,
+    [BS_16x16]   = 2,
+    [BS_16x8 ]   = 1,
+    [BS_16x4 ]   = 0,
+    [BS_8x32 ]   = 1,
+    [BS_8x16 ]   = 1,
+    [BS_8x8  ]   = 1,
+    [BS_8x4  ]   = 0,
+    [BS_4x16 ]   = 0,
+    [BS_4x8  ]   = 0,
+    [BS_4x4  ]   = 0,
+};
+
+const uint8_t av1_nz_map_ctx_offset[N_RECT_TX_SIZES][5][5] = {
+    [TX_4X4] = {
+        { 0, 1, 6, 6 },
+        { 1, 6, 6, 21 },
+        { 6, 6, 21, 21 },
+        { 6, 21, 21, 21 },
+    }, [TX_8X8] = {
+        { 0, 1, 6, 6, 21 },
+        { 1, 6, 6, 21, 21 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [TX_16X16] = {
+        { 0, 1, 6, 6, 21 },
+        { 1, 6, 6, 21, 21 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [TX_32X32] = {
+        { 0, 1, 6, 6, 21 },
+        { 1, 6, 6, 21, 21 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [TX_64X64] = {
+        { 0, 1, 6, 6, 21 },
+        { 1, 6, 6, 21, 21 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_4X8] = {
+        { 0, 11, 11, 11 },
+        { 11, 11, 11, 11 },
+        { 6, 6, 21, 21 },
+        { 6, 21, 21, 21 },
+        { 21, 21, 21, 21 }
+    }, [RTX_8X4] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+    }, [RTX_8X16] = {
+        { 0, 11, 11, 11, 11 },
+        { 11, 11, 11, 11, 11 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_16X8] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 }
+    }, [RTX_16X32] = {
+        { 0, 11, 11, 11, 11 },
+        { 11, 11, 11, 11, 11 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_32X16] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 }
+    }, [RTX_32X64] = {
+        { 0, 11, 11, 11, 11 },
+        { 11, 11, 11, 11, 11 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_64X32] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 }
+    }, [RTX_4X16] = {
+        { 0, 11, 11, 11 },
+        { 11, 11, 11, 11 },
+        { 6, 6, 21, 21 },
+        { 6, 21, 21, 21 },
+        { 21, 21, 21, 21 }
+    }, [RTX_16X4] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+    }, [RTX_8X32] = {
+        { 0, 11, 11, 11, 11 },
+        { 11, 11, 11, 11, 11 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_32X8] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 }
+    }, [RTX_16X64] = {
+        { 0, 11, 11, 11, 11 },
+        { 11, 11, 11, 11, 11 },
+        { 6, 6, 21, 21, 21 },
+        { 6, 21, 21, 21, 21 },
+        { 21, 21, 21, 21, 21 }
+    }, [RTX_64X16] = {
+        { 0, 16, 6, 6, 21 },
+        { 16, 16, 6, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 },
+        { 16, 16, 21, 21, 21 }
+    }
+};
+
+const uint8_t /* enum TxClass */ av1_tx_type_class[N_TX_TYPES_PLUS_LL] = {
+    [DCT_DCT]           = TX_CLASS_2D,
+    [ADST_DCT]          = TX_CLASS_2D,
+    [DCT_ADST]          = TX_CLASS_2D,
+    [ADST_ADST]         = TX_CLASS_2D,
+    [FLIPADST_DCT]      = TX_CLASS_2D,
+    [DCT_FLIPADST]      = TX_CLASS_2D,
+    [FLIPADST_FLIPADST] = TX_CLASS_2D,
+    [ADST_FLIPADST]     = TX_CLASS_2D,
+    [FLIPADST_ADST]     = TX_CLASS_2D,
+    [IDTX]              = TX_CLASS_2D,
+    [V_DCT]             = TX_CLASS_V,
+    [H_DCT]             = TX_CLASS_H,
+    [V_ADST]            = TX_CLASS_V,
+    [H_ADST]            = TX_CLASS_H,
+    [V_FLIPADST]        = TX_CLASS_V,
+    [H_FLIPADST]        = TX_CLASS_H,
+    [WHT_WHT]           = TX_CLASS_2D,
+};
+
+const uint8_t /* enum Filter2d */ av1_filter_2d[N_FILTERS][N_FILTERS] = {
+    [FILTER_8TAP_REGULAR] = {
+        [FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_REGULAR,
+        [FILTER_8TAP_SHARP]   = FILTER_2D_8TAP_REGULAR_SHARP,
+        [FILTER_8TAP_SMOOTH]  = FILTER_2D_8TAP_REGULAR_SMOOTH,
+    }, [FILTER_8TAP_SHARP] = {
+        [FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SHARP_REGULAR,
+        [FILTER_8TAP_SHARP]   = FILTER_2D_8TAP_SHARP,
+        [FILTER_8TAP_SMOOTH]  = FILTER_2D_8TAP_SHARP_SMOOTH,
+    }, [FILTER_8TAP_SMOOTH] = {
+        [FILTER_8TAP_REGULAR] = FILTER_2D_8TAP_SMOOTH_REGULAR,
+        [FILTER_8TAP_SHARP]   = FILTER_2D_8TAP_SMOOTH_SHARP,
+        [FILTER_8TAP_SMOOTH]  = FILTER_2D_8TAP_SMOOTH,
+    }, [FILTER_BILINEAR] = {
+        [FILTER_BILINEAR]     = FILTER_2D_BILINEAR,
+    }
+};
+
+const uint8_t /* enum FilterMode */ eve_av1_filter_dir[N_2D_FILTERS][2] = {
+    [FILTER_2D_8TAP_REGULAR]        = { FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR },
+    [FILTER_2D_8TAP_REGULAR_SMOOTH] = { FILTER_8TAP_SMOOTH,  FILTER_8TAP_REGULAR },
+    [FILTER_2D_8TAP_REGULAR_SHARP]  = { FILTER_8TAP_SHARP,   FILTER_8TAP_REGULAR },
+    [FILTER_2D_8TAP_SHARP_REGULAR]  = { FILTER_8TAP_REGULAR, FILTER_8TAP_SHARP   },
+    [FILTER_2D_8TAP_SHARP_SMOOTH]   = { FILTER_8TAP_SMOOTH,  FILTER_8TAP_SHARP   },
+    [FILTER_2D_8TAP_SHARP]          = { FILTER_8TAP_SHARP,   FILTER_8TAP_SHARP   },
+    [FILTER_2D_8TAP_SMOOTH_REGULAR] = { FILTER_8TAP_REGULAR, FILTER_8TAP_SMOOTH  },
+    [FILTER_2D_8TAP_SMOOTH]         = { FILTER_8TAP_SMOOTH,  FILTER_8TAP_SMOOTH  },
+    [FILTER_2D_8TAP_SMOOTH_SHARP]   = { FILTER_8TAP_SHARP,   FILTER_8TAP_SMOOTH  },
+    [FILTER_2D_BILINEAR]            = { FILTER_2D_BILINEAR,  FILTER_2D_BILINEAR  },
+};
+
+const uint8_t av1_filter_mode_to_y_mode[5] = {
+    DC_PRED, VERT_PRED, HOR_PRED, HOR_DOWN_PRED, DC_PRED
+};
+
+const uint8_t intra_mode_context[N_INTRA_PRED_MODES] = {
+    [DC_PRED]              = 0,
+    [VERT_PRED]            = 1,
+    [HOR_PRED]             = 2,
+    [DIAG_DOWN_LEFT_PRED]  = 3,
+    [DIAG_DOWN_RIGHT_PRED] = 4,
+    [VERT_RIGHT_PRED]      = 4,
+    [HOR_DOWN_PRED]        = 4,
+    [HOR_UP_PRED]          = 4,
+    [VERT_LEFT_PRED]       = 3,
+    [SMOOTH_PRED]          = 0,
+    [SMOOTH_V_PRED]        = 1,
+    [SMOOTH_H_PRED]        = 2,
+    [PAETH_PRED]           = 0,
+};
+
+const unsigned cfl_allowed_mask =
+    (1 << BS_32x32) |
+    (1 << BS_32x16) |
+    (1 << BS_32x8) |
+    (1 << BS_16x32) |
+    (1 << BS_16x16) |
+    (1 << BS_16x8) |
+    (1 << BS_16x4) |
+    (1 << BS_8x32) |
+    (1 << BS_8x16) |
+    (1 << BS_8x8) |
+    (1 << BS_8x4) |
+    (1 << BS_4x16) |
+    (1 << BS_4x8) |
+    (1 << BS_4x4);
+
+const unsigned wedge_allowed_mask =
+    (1 << BS_32x32) |
+    (1 << BS_32x16) |
+    (1 << BS_32x8) |
+    (1 << BS_16x32) |
+    (1 << BS_16x16) |
+    (1 << BS_16x8) |
+    (1 << BS_8x32) |
+    (1 << BS_8x16) |
+    (1 << BS_8x8);
+
+const unsigned interintra_allowed_mask =
+    (1 << BS_32x32) |
+    (1 << BS_32x16) |
+    (1 << BS_16x32) |
+    (1 << BS_16x16) |
+    (1 << BS_16x8) |
+    (1 << BS_8x16) |
+    (1 << BS_8x8);
+
+const uint8_t av1_wedge_ctx_lut[N_BS_SIZES] = {
+    [BS_32x32] = 6,
+    [BS_32x16] = 5,
+    [BS_32x8]  = 8,
+    [BS_16x32] = 4,
+    [BS_16x16] = 3,
+    [BS_16x8]  = 2,
+    [BS_8x32]  = 7,
+    [BS_8x16]  = 1,
+    [BS_8x8]   = 0,
+};
+
+const WarpedMotionParams default_wm_params = {
+    .type = WM_TYPE_IDENTITY,
+    .matrix = {
+        0, 0, 1 << 16,
+        0, 0, 1 << 16,
+    },
+    .alpha = 0,
+    .beta = 0,
+    .gamma = 0,
+    .delta = 0,
+};
+
+const int16_t sgr_params[16][4] = { // r0, r1, e0, e1
+    { 2, 1, 140, 3236 }, { 2, 1, 112, 2158 }, { 2, 1, 93, 1618 },
+    { 2, 1,  80, 1438 }, { 2, 1,  70, 1295 }, { 2, 1, 58, 1177 },
+    { 2, 1,  47, 1079 }, { 2, 1,  37,  996 }, { 2, 1, 30,  925 },
+    { 2, 1,  25,  863 }, { 0, 1,  -1, 2589 }, { 0, 1, -1, 1618 },
+    { 0, 1,  -1, 1177 }, { 0, 1,  -1,  925 }, { 2, 0, 56,   -1 },
+    { 2, 0,  22,   -1 },
+};
+
+const int16_t sgr_x_by_xplus1[256] = {
+  1,   128, 171, 192, 205, 213, 219, 224, 228, 230, 233, 235, 236, 238, 239,
+  240, 241, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 247, 247,
+  248, 248, 248, 248, 249, 249, 249, 249, 249, 250, 250, 250, 250, 250, 250,
+  250, 251, 251, 251, 251, 251, 251, 251, 251, 251, 251, 252, 252, 252, 252,
+  252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 253, 253,
+  253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253,
+  253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 253, 254, 254, 254,
+  254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+  254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+  254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+  254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254,
+  254, 254, 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  256,
+};
+
+const int16_t sgr_one_by_x[] = {
+  4096, 2048, 1365, 1024, 819, 683, 585, 512, 455, 410, 372, 341, 315,
+  293,  273,  256,  241,  228, 216, 205, 195, 186, 178, 171, 164,
+};
+
+const int8_t dav1d_mc_subpel_filters[5][15][8] = {
+    [FILTER_8TAP_REGULAR] = {
+        { 0, 2,  -6, 126,   8,  -2, 0, 0 },
+        { 0, 2, -10, 122,  18,  -4, 0, 0 },
+        { 0, 2, -12, 116,  28,  -8, 2, 0 },
+        { 0, 2, -14, 110,  38, -10, 2, 0 },
+        { 0, 2, -14, 102,  48, -12, 2, 0 },
+        { 0, 2, -16,  94,  58, -12, 2, 0 },
+        { 0, 2, -14,  84,  66, -12, 2, 0 },
+        { 0, 2, -14,  76,  76, -14, 2, 0 },
+        { 0, 2, -12,  66,  84, -14, 2, 0 },
+        { 0, 2, -12,  58,  94, -16, 2, 0 },
+        { 0, 2, -12,  48, 102, -14, 2, 0 },
+        { 0, 2, -10,  38, 110, -14, 2, 0 },
+        { 0, 2,  -8,  28, 116, -12, 2, 0 },
+        { 0, 0,  -4,  18, 122, -10, 2, 0 },
+        { 0, 0,  -2,   8, 126,  -6, 2, 0 }
+    }, [FILTER_8TAP_SHARP] = {
+        { -2,  2,  -6, 126,   8,  -2,  2,  0 },
+        { -2,  6, -12, 124,  16,  -6,  4, -2 },
+        { -2,  8, -18, 120,  26, -10,  6, -2 },
+        { -4, 10, -22, 116,  38, -14,  6, -2 },
+        { -4, 10, -22, 108,  48, -18,  8, -2 },
+        { -4, 10, -24, 100,  60, -20,  8, -2 },
+        { -4, 10, -24,  90,  70, -22, 10, -2 },
+        { -4, 12, -24,  80,  80, -24, 12, -4 },
+        { -2, 10, -22,  70,  90, -24, 10, -4 },
+        { -2,  8, -20,  60, 100, -24, 10, -4 },
+        { -2,  8, -18,  48, 108, -22, 10, -4 },
+        { -2,  6, -14,  38, 116, -22, 10, -4 },
+        { -2,  6, -10,  26, 120, -18,  8, -2 },
+        { -2,  4,  -6,  16, 124, -12,  6, -2 },
+        {  0,  2,  -2,   8, 126,  -6,  2, -2 }
+    }, [FILTER_8TAP_SMOOTH] = {
+        { 0,  2, 28,  62, 34,  2,  0, 0 },
+        { 0,  0, 26,  62, 36,  4,  0, 0 },
+        { 0,  0, 22,  62, 40,  4,  0, 0 },
+        { 0,  0, 20,  60, 42,  6,  0, 0 },
+        { 0,  0, 18,  58, 44,  8,  0, 0 },
+        { 0,  0, 16,  56, 46, 10,  0, 0 },
+        { 0, -2, 16,  54, 48, 12,  0, 0 },
+        { 0, -2, 14,  52, 52, 14, -2, 0 },
+        { 0,  0, 12,  48, 54, 16, -2, 0 },
+        { 0,  0, 10,  46, 56, 16,  0, 0 },
+        { 0,  0,  8,  44, 58, 18,  0, 0 },
+        { 0,  0,  6,  42, 60, 20,  0, 0 },
+        { 0,  0,  4,  40, 62, 22,  0, 0 },
+        { 0,  0,  4,  36, 62, 26,  0, 0 },
+        { 0,  0,  2,  34, 62, 28,  2, 0 },
+    },
+    /* width <= 4 */
+    [3 + FILTER_8TAP_REGULAR] = {
+        { 0, 0,  -4, 126,   8,  -2, 0, 0 },
+        { 0, 0,  -8, 122,  18,  -4, 0, 0 },
+        { 0, 0, -10, 116,  28,  -6, 0, 0 },
+        { 0, 0, -12, 110,  38,  -8, 0, 0 },
+        { 0, 0, -12, 102,  48, -10, 0, 0 },
+        { 0, 0, -14,  94,  58, -10, 0, 0 },
+        { 0, 0, -12,  84,  66, -10, 0, 0 },
+        { 0, 0, -12,  76,  76, -12, 0, 0 },
+        { 0, 0, -10,  66,  84, -12, 0, 0 },
+        { 0, 0, -10,  58,  94, -14, 0, 0 },
+        { 0, 0, -10,  48, 102, -12, 0, 0 },
+        { 0, 0,  -8,  38, 110, -12, 0, 0 },
+        { 0, 0,  -6,  28, 116, -10, 0, 0 },
+        { 0, 0,  -4,  18, 122,  -8, 0, 0 },
+        { 0, 0,  -2,   8, 126,  -4, 0, 0 }
+    }, [3 + FILTER_8TAP_SMOOTH] = {
+        { 0, 0, 30,  62, 34,  2, 0, 0 },
+        { 0, 0, 26,  62, 36,  4, 0, 0 },
+        { 0, 0, 22,  62, 40,  4, 0, 0 },
+        { 0, 0, 20,  60, 42,  6, 0, 0 },
+        { 0, 0, 18,  58, 44,  8, 0, 0 },
+        { 0, 0, 16,  56, 46, 10, 0, 0 },
+        { 0, 0, 14,  54, 48, 12, 0, 0 },
+        { 0, 0, 12,  52, 52, 12, 0, 0 },
+        { 0, 0, 12,  48, 54, 14, 0, 0 },
+        { 0, 0, 10,  46, 56, 16, 0, 0 },
+        { 0, 0,  8,  44, 58, 18, 0, 0 },
+        { 0, 0,  6,  42, 60, 20, 0, 0 },
+        { 0, 0,  4,  40, 62, 22, 0, 0 },
+        { 0, 0,  4,  36, 62, 26, 0, 0 },
+        { 0, 0,  2,  34, 62, 30, 0, 0 }
+    }
+};
+
+const int8_t dav1d_mc_warp_filter[][8] = {
+    // [-1, 0)
+    { 0,   0, 127,   1,   0, 0, 0, 0 }, { 0, - 1, 127,   2,   0, 0, 0, 0 },
+    { 1, - 3, 127,   4, - 1, 0, 0, 0 }, { 1, - 4, 126,   6, - 2, 1, 0, 0 },
+    { 1, - 5, 126,   8, - 3, 1, 0, 0 }, { 1, - 6, 125,  11, - 4, 1, 0, 0 },
+    { 1, - 7, 124,  13, - 4, 1, 0, 0 }, { 2, - 8, 123,  15, - 5, 1, 0, 0 },
+    { 2, - 9, 122,  18, - 6, 1, 0, 0 }, { 2, -10, 121,  20, - 6, 1, 0, 0 },
+    { 2, -11, 120,  22, - 7, 2, 0, 0 }, { 2, -12, 119,  25, - 8, 2, 0, 0 },
+    { 3, -13, 117,  27, - 8, 2, 0, 0 }, { 3, -13, 116,  29, - 9, 2, 0, 0 },
+    { 3, -14, 114,  32, -10, 3, 0, 0 }, { 3, -15, 113,  35, -10, 2, 0, 0 },
+    { 3, -15, 111,  37, -11, 3, 0, 0 }, { 3, -16, 109,  40, -11, 3, 0, 0 },
+    { 3, -16, 108,  42, -12, 3, 0, 0 }, { 4, -17, 106,  45, -13, 3, 0, 0 },
+    { 4, -17, 104,  47, -13, 3, 0, 0 }, { 4, -17, 102,  50, -14, 3, 0, 0 },
+    { 4, -17, 100,  52, -14, 3, 0, 0 }, { 4, -18,  98,  55, -15, 4, 0, 0 },
+    { 4, -18,  96,  58, -15, 3, 0, 0 }, { 4, -18,  94,  60, -16, 4, 0, 0 },
+    { 4, -18,  91,  63, -16, 4, 0, 0 }, { 4, -18,  89,  65, -16, 4, 0, 0 },
+    { 4, -18,  87,  68, -17, 4, 0, 0 }, { 4, -18,  85,  70, -17, 4, 0, 0 },
+    { 4, -18,  82,  73, -17, 4, 0, 0 }, { 4, -18,  80,  75, -17, 4, 0, 0 },
+    { 4, -18,  78,  78, -18, 4, 0, 0 }, { 4, -17,  75,  80, -18, 4, 0, 0 },
+    { 4, -17,  73,  82, -18, 4, 0, 0 }, { 4, -17,  70,  85, -18, 4, 0, 0 },
+    { 4, -17,  68,  87, -18, 4, 0, 0 }, { 4, -16,  65,  89, -18, 4, 0, 0 },
+    { 4, -16,  63,  91, -18, 4, 0, 0 }, { 4, -16,  60,  94, -18, 4, 0, 0 },
+    { 3, -15,  58,  96, -18, 4, 0, 0 }, { 4, -15,  55,  98, -18, 4, 0, 0 },
+    { 3, -14,  52, 100, -17, 4, 0, 0 }, { 3, -14,  50, 102, -17, 4, 0, 0 },
+    { 3, -13,  47, 104, -17, 4, 0, 0 }, { 3, -13,  45, 106, -17, 4, 0, 0 },
+    { 3, -12,  42, 108, -16, 3, 0, 0 }, { 3, -11,  40, 109, -16, 3, 0, 0 },
+    { 3, -11,  37, 111, -15, 3, 0, 0 }, { 2, -10,  35, 113, -15, 3, 0, 0 },
+    { 3, -10,  32, 114, -14, 3, 0, 0 }, { 2, - 9,  29, 116, -13, 3, 0, 0 },
+    { 2, - 8,  27, 117, -13, 3, 0, 0 }, { 2, - 8,  25, 119, -12, 2, 0, 0 },
+    { 2, - 7,  22, 120, -11, 2, 0, 0 }, { 1, - 6,  20, 121, -10, 2, 0, 0 },
+    { 1, - 6,  18, 122, - 9, 2, 0, 0 }, { 1, - 5,  15, 123, - 8, 2, 0, 0 },
+    { 1, - 4,  13, 124, - 7, 1, 0, 0 }, { 1, - 4,  11, 125, - 6, 1, 0, 0 },
+    { 1, - 3,   8, 126, - 5, 1, 0, 0 }, { 1, - 2,   6, 126, - 4, 1, 0, 0 },
+    { 0, - 1,   4, 127, - 3, 1, 0, 0 }, { 0,   0,   2, 127, - 1, 0, 0, 0 },
+
+    // [0, 1)
+    { 0,  0,   0, 127,   1,   0,  0,  0}, { 0,  0,  -1, 127,   2,   0,  0,  0},
+    { 0,  1,  -3, 127,   4,  -2,  1,  0}, { 0,  1,  -5, 127,   6,  -2,  1,  0},
+    { 0,  2,  -6, 126,   8,  -3,  1,  0}, {-1,  2,  -7, 126,  11,  -4,  2, -1},
+    {-1,  3,  -8, 125,  13,  -5,  2, -1}, {-1,  3, -10, 124,  16,  -6,  3, -1},
+    {-1,  4, -11, 123,  18,  -7,  3, -1}, {-1,  4, -12, 122,  20,  -7,  3, -1},
+    {-1,  4, -13, 121,  23,  -8,  3, -1}, {-2,  5, -14, 120,  25,  -9,  4, -1},
+    {-1,  5, -15, 119,  27, -10,  4, -1}, {-1,  5, -16, 118,  30, -11,  4, -1},
+    {-2,  6, -17, 116,  33, -12,  5, -1}, {-2,  6, -17, 114,  35, -12,  5, -1},
+    {-2,  6, -18, 113,  38, -13,  5, -1}, {-2,  7, -19, 111,  41, -14,  6, -2},
+    {-2,  7, -19, 110,  43, -15,  6, -2}, {-2,  7, -20, 108,  46, -15,  6, -2},
+    {-2,  7, -20, 106,  49, -16,  6, -2}, {-2,  7, -21, 104,  51, -16,  7, -2},
+    {-2,  7, -21, 102,  54, -17,  7, -2}, {-2,  8, -21, 100,  56, -18,  7, -2},
+    {-2,  8, -22,  98,  59, -18,  7, -2}, {-2,  8, -22,  96,  62, -19,  7, -2},
+    {-2,  8, -22,  94,  64, -19,  7, -2}, {-2,  8, -22,  91,  67, -20,  8, -2},
+    {-2,  8, -22,  89,  69, -20,  8, -2}, {-2,  8, -22,  87,  72, -21,  8, -2},
+    {-2,  8, -21,  84,  74, -21,  8, -2}, {-2,  8, -22,  82,  77, -21,  8, -2},
+    {-2,  8, -21,  79,  79, -21,  8, -2}, {-2,  8, -21,  77,  82, -22,  8, -2},
+    {-2,  8, -21,  74,  84, -21,  8, -2}, {-2,  8, -21,  72,  87, -22,  8, -2},
+    {-2,  8, -20,  69,  89, -22,  8, -2}, {-2,  8, -20,  67,  91, -22,  8, -2},
+    {-2,  7, -19,  64,  94, -22,  8, -2}, {-2,  7, -19,  62,  96, -22,  8, -2},
+    {-2,  7, -18,  59,  98, -22,  8, -2}, {-2,  7, -18,  56, 100, -21,  8, -2},
+    {-2,  7, -17,  54, 102, -21,  7, -2}, {-2,  7, -16,  51, 104, -21,  7, -2},
+    {-2,  6, -16,  49, 106, -20,  7, -2}, {-2,  6, -15,  46, 108, -20,  7, -2},
+    {-2,  6, -15,  43, 110, -19,  7, -2}, {-2,  6, -14,  41, 111, -19,  7, -2},
+    {-1,  5, -13,  38, 113, -18,  6, -2}, {-1,  5, -12,  35, 114, -17,  6, -2},
+    {-1,  5, -12,  33, 116, -17,  6, -2}, {-1,  4, -11,  30, 118, -16,  5, -1},
+    {-1,  4, -10,  27, 119, -15,  5, -1}, {-1,  4,  -9,  25, 120, -14,  5, -2},
+    {-1,  3,  -8,  23, 121, -13,  4, -1}, {-1,  3,  -7,  20, 122, -12,  4, -1},
+    {-1,  3,  -7,  18, 123, -11,  4, -1}, {-1,  3,  -6,  16, 124, -10,  3, -1},
+    {-1,  2,  -5,  13, 125,  -8,  3, -1}, {-1,  2,  -4,  11, 126,  -7,  2, -1},
+    { 0,  1,  -3,   8, 126,  -6,  2,  0}, { 0,  1,  -2,   6, 127,  -5,  1,  0},
+    { 0,  1,  -2,   4, 127,  -3,  1,  0}, { 0,  0,   0,   2, 127,  -1,  0,  0},
+
+    // [1, 2)
+    { 0, 0, 0,   1, 127,   0,   0, 0 }, { 0, 0, 0, - 1, 127,   2,   0, 0 },
+    { 0, 0, 1, - 3, 127,   4, - 1, 0 }, { 0, 0, 1, - 4, 126,   6, - 2, 1 },
+    { 0, 0, 1, - 5, 126,   8, - 3, 1 }, { 0, 0, 1, - 6, 125,  11, - 4, 1 },
+    { 0, 0, 1, - 7, 124,  13, - 4, 1 }, { 0, 0, 2, - 8, 123,  15, - 5, 1 },
+    { 0, 0, 2, - 9, 122,  18, - 6, 1 }, { 0, 0, 2, -10, 121,  20, - 6, 1 },
+    { 0, 0, 2, -11, 120,  22, - 7, 2 }, { 0, 0, 2, -12, 119,  25, - 8, 2 },
+    { 0, 0, 3, -13, 117,  27, - 8, 2 }, { 0, 0, 3, -13, 116,  29, - 9, 2 },
+    { 0, 0, 3, -14, 114,  32, -10, 3 }, { 0, 0, 3, -15, 113,  35, -10, 2 },
+    { 0, 0, 3, -15, 111,  37, -11, 3 }, { 0, 0, 3, -16, 109,  40, -11, 3 },
+    { 0, 0, 3, -16, 108,  42, -12, 3 }, { 0, 0, 4, -17, 106,  45, -13, 3 },
+    { 0, 0, 4, -17, 104,  47, -13, 3 }, { 0, 0, 4, -17, 102,  50, -14, 3 },
+    { 0, 0, 4, -17, 100,  52, -14, 3 }, { 0, 0, 4, -18,  98,  55, -15, 4 },
+    { 0, 0, 4, -18,  96,  58, -15, 3 }, { 0, 0, 4, -18,  94,  60, -16, 4 },
+    { 0, 0, 4, -18,  91,  63, -16, 4 }, { 0, 0, 4, -18,  89,  65, -16, 4 },
+    { 0, 0, 4, -18,  87,  68, -17, 4 }, { 0, 0, 4, -18,  85,  70, -17, 4 },
+    { 0, 0, 4, -18,  82,  73, -17, 4 }, { 0, 0, 4, -18,  80,  75, -17, 4 },
+    { 0, 0, 4, -18,  78,  78, -18, 4 }, { 0, 0, 4, -17,  75,  80, -18, 4 },
+    { 0, 0, 4, -17,  73,  82, -18, 4 }, { 0, 0, 4, -17,  70,  85, -18, 4 },
+    { 0, 0, 4, -17,  68,  87, -18, 4 }, { 0, 0, 4, -16,  65,  89, -18, 4 },
+    { 0, 0, 4, -16,  63,  91, -18, 4 }, { 0, 0, 4, -16,  60,  94, -18, 4 },
+    { 0, 0, 3, -15,  58,  96, -18, 4 }, { 0, 0, 4, -15,  55,  98, -18, 4 },
+    { 0, 0, 3, -14,  52, 100, -17, 4 }, { 0, 0, 3, -14,  50, 102, -17, 4 },
+    { 0, 0, 3, -13,  47, 104, -17, 4 }, { 0, 0, 3, -13,  45, 106, -17, 4 },
+    { 0, 0, 3, -12,  42, 108, -16, 3 }, { 0, 0, 3, -11,  40, 109, -16, 3 },
+    { 0, 0, 3, -11,  37, 111, -15, 3 }, { 0, 0, 2, -10,  35, 113, -15, 3 },
+    { 0, 0, 3, -10,  32, 114, -14, 3 }, { 0, 0, 2, - 9,  29, 116, -13, 3 },
+    { 0, 0, 2, - 8,  27, 117, -13, 3 }, { 0, 0, 2, - 8,  25, 119, -12, 2 },
+    { 0, 0, 2, - 7,  22, 120, -11, 2 }, { 0, 0, 1, - 6,  20, 121, -10, 2 },
+    { 0, 0, 1, - 6,  18, 122, - 9, 2 }, { 0, 0, 1, - 5,  15, 123, - 8, 2 },
+    { 0, 0, 1, - 4,  13, 124, - 7, 1 }, { 0, 0, 1, - 4,  11, 125, - 6, 1 },
+    { 0, 0, 1, - 3,   8, 126, - 5, 1 }, { 0, 0, 1, - 2,   6, 126, - 4, 1 },
+    { 0, 0, 0, - 1,   4, 127, - 3, 1 }, { 0, 0, 0,   0,   2, 127, - 1, 0 },
+
+    // dummy (replicate row index 191)
+    { 0, 0, 0,   0,   2, 127, - 1, 0 },
+};
--- /dev/null
+++ b/src/tables.h
@@ -1,0 +1,84 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_TABLES_H__
+#define __DAV1D_SRC_TABLES_H__
+
+#include <stdint.h>
+
+#include "common/intops.h"
+
+#include "src/levels.h"
+
+extern const uint8_t av1_al_part_ctx[2][N_BL_LEVELS][N_PARTITIONS];
+extern const uint8_t /* enum BlockSize */
+                     av1_block_sizes[N_BL_LEVELS][N_PARTITIONS][2];
+// width, height (in 4px blocks), log2 versions of these two
+extern const uint8_t av1_block_dimensions[N_BS_SIZES][4];
+typedef struct TxfmInfo {
+    // width, height (in 4px blocks), log2 of them, min/max of log2, sub, pad
+    uint8_t w, h, lw, lh, min, max, sub, ctx;
+} TxfmInfo;
+extern const TxfmInfo av1_txfm_dimensions[N_RECT_TX_SIZES];
+extern const uint8_t /* enum (Rect)TxfmSize */
+                     av1_max_txfm_size_for_bs[N_BS_SIZES][4 /* y, 420, 422, 444 */];
+extern const uint8_t /* enum TxfmType */
+                     av1_txtp_from_uvmode[N_UV_INTRA_PRED_MODES];
+
+extern const uint8_t /* enum InterPredMode */
+                     av1_comp_inter_pred_modes[N_COMP_INTER_PRED_MODES][2];
+
+extern const uint8_t av1_tx_type_count[N_TXTP_SETS];
+extern const uint8_t /* enum TxfmType */
+                     av1_tx_types_per_set[N_TXTP_SETS][N_TX_TYPES];
+extern const uint8_t av1_tx_type_set_index[2][N_TXTP_SETS];
+
+extern const uint8_t av1_filter_mode_to_y_mode[5];
+extern const uint8_t av1_ymode_size_context[N_BS_SIZES];
+extern const uint8_t av1_nz_map_ctx_offset[N_RECT_TX_SIZES][5][5];
+extern const uint8_t /* enum TxClass */
+                     av1_tx_type_class[N_TX_TYPES_PLUS_LL];
+extern const uint8_t /* enum Filter2d */
+                     av1_filter_2d[N_FILTERS /* h */][N_FILTERS /* v */];
+extern const uint8_t /* enum FilterMode */ eve_av1_filter_dir[N_2D_FILTERS][2];
+extern const uint8_t intra_mode_context[N_INTRA_PRED_MODES];
+extern const uint8_t av1_wedge_ctx_lut[N_BS_SIZES];
+
+extern const unsigned cfl_allowed_mask;
+extern const unsigned wedge_allowed_mask;
+extern const unsigned interintra_allowed_mask;
+
+extern const WarpedMotionParams default_wm_params;
+
+extern const int16_t sgr_params[16][4];
+extern const int16_t sgr_x_by_xplus1[256];
+extern const int16_t sgr_one_by_x[25];
+
+extern const int8_t dav1d_mc_subpel_filters[5][15][8];
+extern const int8_t dav1d_mc_warp_filter[][8];
+
+#endif /* __DAV1D_SRC_TABLES_H__ */
--- /dev/null
+++ b/src/thread_data.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_THREAD_DATA_H__
+#define __DAV1D_SRC_THREAD_DATA_H__
+
+struct thread_data {
+    pthread_t thread;
+    pthread_cond_t cond;
+    pthread_mutex_t lock;
+};
+
+#endif /* __DAV1D_SRC_THREAD_DATA_H__ */
--- /dev/null
+++ b/src/thread_task.c
@@ -1,0 +1,137 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "src/thread_task.h"
+
+void *dav1d_frame_task(void *const data) {
+    Dav1dFrameContext *const f = data;
+
+    for (;;) {
+        pthread_mutex_lock(&f->frame_thread.td.lock);
+        f->n_tile_data = 0;
+        int did_signal = 0;
+        while (!f->n_tile_data && !f->frame_thread.die) {
+            if (!did_signal) {
+                did_signal = 1;
+                pthread_cond_signal(&f->frame_thread.td.cond);
+            }
+            pthread_cond_wait(&f->frame_thread.td.cond,
+                              &f->frame_thread.td.lock);
+        }
+        if (f->frame_thread.die) {
+            pthread_mutex_unlock(&f->frame_thread.td.lock);
+            break;
+        }
+        pthread_mutex_unlock(&f->frame_thread.td.lock);
+
+        decode_frame(f);
+    }
+
+    pthread_exit(NULL);
+    return NULL;
+}
+
+void *dav1d_tile_task(void *const data) {
+    Dav1dTileContext *const t = data;
+    struct FrameTileThreadData *const fttd = t->tile_thread.fttd;
+    const Dav1dFrameContext *const f = t->f;
+    const int tile_thread_idx = t - f->tc;
+    const uint64_t mask = 1ULL << tile_thread_idx;
+
+    for (;;) {
+        pthread_mutex_lock(&fttd->lock);
+        fttd->available |= mask;
+        int did_signal = 0;
+        while (!fttd->tasks_left && !t->tile_thread.die) {
+            if (!did_signal) {
+                did_signal = 1;
+                pthread_cond_signal(&fttd->icond);
+            }
+            pthread_cond_wait(&fttd->cond, &fttd->lock);
+        }
+        if (t->tile_thread.die) {
+            pthread_mutex_unlock(&fttd->lock);
+            break;
+        }
+        fttd->available &= ~mask;
+        const int task_idx = fttd->num_tasks - fttd->tasks_left--;
+        pthread_mutex_unlock(&fttd->lock);
+
+        if (f->frame_thread.pass == 1 || f->n_tc >= f->frame_hdr.tiling.cols) {
+            // we can (or in fact, if >, we need to) do full tile decoding.
+            // loopfilter happens in the main thread
+            Dav1dTileState *const ts = t->ts = &f->ts[task_idx];
+            for (t->by = ts->tiling.row_start; t->by < ts->tiling.row_end;
+                 t->by += f->sb_step)
+            {
+                decode_tile_sbrow(t);
+
+                // signal progress
+                pthread_mutex_lock(&ts->tile_thread.lock);
+                atomic_store(&ts->progress, 1 + (t->by >> f->sb_shift));
+                pthread_cond_signal(&ts->tile_thread.cond);
+                pthread_mutex_unlock(&ts->tile_thread.lock);
+            }
+        } else {
+            const int sby = f->tile_thread.task_idx_to_sby_and_tile_idx[task_idx][0];
+            const int tile_idx = f->tile_thread.task_idx_to_sby_and_tile_idx[task_idx][1];
+            Dav1dTileState *const ts = &f->ts[tile_idx];
+
+            // the interleaved decoding can sometimes cause dependency issues
+            // if one part of the frame decodes signifcantly faster than others.
+            // Ideally, we'd "skip" tile_sbrows where dependencies are missing,
+            // and resume them later as dependencies are met. This also would
+            // solve the broadcast() below and allow us to use signal(). However,
+            // for now, we use linear dependency tracking because it's simpler.
+            if (atomic_load(&ts->progress) < sby) {
+                pthread_mutex_lock(&ts->tile_thread.lock);
+                while (atomic_load(&ts->progress) < sby)
+                    pthread_cond_wait(&ts->tile_thread.cond,
+                                      &ts->tile_thread.lock);
+                pthread_mutex_unlock(&ts->tile_thread.lock);
+            }
+
+            // we need to interleave sbrow decoding for all tile cols in a
+            // tile row, since otherwise subsequent threads will be blocked
+            // waiting for the post-filter to complete
+            t->ts = ts;
+            t->by = sby << f->sb_shift;
+            decode_tile_sbrow(t);
+
+            // signal progress
+            pthread_mutex_lock(&ts->tile_thread.lock);
+            atomic_store(&ts->progress, 1 + sby);
+            pthread_cond_broadcast(&ts->tile_thread.cond);
+            pthread_mutex_unlock(&ts->tile_thread.lock);
+        }
+    }
+
+    pthread_exit(NULL);
+    return NULL;
+}
--- /dev/null
+++ b/src/thread_task.h
@@ -1,0 +1,39 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_THREAD_TASK_H__
+#define __DAV1D_SRC_THREAD_TASK_H__
+
+#include "src/internal.h"
+
+int decode_frame(Dav1dFrameContext *f);
+void *dav1d_frame_task(void *data);
+
+int decode_tile_sbrow(Dav1dTileContext *t);
+void *dav1d_tile_task(void *data);
+
+#endif /* __DAV1D_SRC_THREAD_TASK_H__ */
--- /dev/null
+++ b/src/warpmv.c
@@ -1,0 +1,194 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include "common/intops.h"
+
+#include "src/warpmv.h"
+
+static const uint16_t div_lut[257] = {
+    16384, 16320, 16257, 16194, 16132, 16070, 16009, 15948, 15888, 15828, 15768,
+    15709, 15650, 15592, 15534, 15477, 15420, 15364, 15308, 15252, 15197, 15142,
+    15087, 15033, 14980, 14926, 14873, 14821, 14769, 14717, 14665, 14614, 14564,
+    14513, 14463, 14413, 14364, 14315, 14266, 14218, 14170, 14122, 14075, 14028,
+    13981, 13935, 13888, 13843, 13797, 13752, 13707, 13662, 13618, 13574, 13530,
+    13487, 13443, 13400, 13358, 13315, 13273, 13231, 13190, 13148, 13107, 13066,
+    13026, 12985, 12945, 12906, 12866, 12827, 12788, 12749, 12710, 12672, 12633,
+    12596, 12558, 12520, 12483, 12446, 12409, 12373, 12336, 12300, 12264, 12228,
+    12193, 12157, 12122, 12087, 12053, 12018, 11984, 11950, 11916, 11882, 11848,
+    11815, 11782, 11749, 11716, 11683, 11651, 11619, 11586, 11555, 11523, 11491,
+    11460, 11429, 11398, 11367, 11336, 11305, 11275, 11245, 11215, 11185, 11155,
+    11125, 11096, 11067, 11038, 11009, 10980, 10951, 10923, 10894, 10866, 10838,
+    10810, 10782, 10755, 10727, 10700, 10673, 10645, 10618, 10592, 10565, 10538,
+    10512, 10486, 10460, 10434, 10408, 10382, 10356, 10331, 10305, 10280, 10255,
+    10230, 10205, 10180, 10156, 10131, 10107, 10082, 10058, 10034, 10010,  9986,
+     9963,  9939,  9916,  9892,  9869,  9846,  9823,  9800,  9777,  9754,  9732,
+     9709,  9687,  9664,  9642,  9620,  9598,  9576,  9554,  9533,  9511,  9489,
+     9468,  9447,  9425,  9404,  9383,  9362,  9341,  9321,  9300,  9279,  9259,
+     9239,  9218,  9198,  9178,  9158,  9138,  9118,  9098,  9079,  9059,  9039,
+     9020,  9001,  8981,  8962,  8943,  8924,  8905,  8886,  8867,  8849,  8830,
+     8812,  8793,  8775,  8756,  8738,  8720,  8702,  8684,  8666,  8648,  8630,
+     8613,  8595,  8577,  8560,  8542,  8525,  8508,  8490,  8473,  8456,  8439,
+     8422,  8405,  8389,  8372,  8355,  8339,  8322,  8306,  8289,  8273,  8257,
+     8240,  8224,  8208,  8192,
+};
+
+static inline int iclip_wmp(const int v) {
+    const int cv = iclip(v, INT16_MIN, INT16_MAX);
+
+    return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
+}
+
+static inline int apply_sign64(const int v, const int64_t s) {
+    return s < 0 ? -v : v;
+}
+
+static inline int resolve_divisor_32(const unsigned d, int *const shift) {
+    *shift = ulog2(d);
+    const int e = d - (1 << *shift);
+    const int f = *shift > 8 ? (e + (1 << (*shift - 9))) >> (*shift - 8) :
+                               e << (8 - *shift);
+    assert(f <= 256);
+    *shift += 14;
+    // Use f as lookup into the precomputed table of multipliers
+    return div_lut[f];
+}
+
+int get_shear_params(WarpedMotionParams *const wm) {
+    const int32_t *const mat = wm->matrix;
+
+    if (mat[2] <= 0) return 1;
+
+    wm->alpha = iclip_wmp(mat[2] - 0x10000);
+    wm->beta = iclip_wmp(mat[3]);
+
+    int shift;
+    const int y = apply_sign(resolve_divisor_32(abs(mat[2]), &shift), mat[2]);
+    const int64_t v1 = ((int64_t) mat[4] * 0x10000) * y;
+    const int rnd = (1 << shift) >> 1;
+    wm->gamma = iclip_wmp(apply_sign64((llabs(v1) + rnd) >> shift, v1));
+    const int64_t v2 = ((int64_t) mat[3] * mat[4]) * y;
+    wm->delta = iclip_wmp(mat[5] -
+                          (int) apply_sign64((llabs(v2) + rnd) >> shift, v2) -
+                          0x10000);
+
+    return (4 * abs(wm->alpha) + 7 * abs(wm->beta) >= 0x10000) ||
+           (4 * abs(wm->gamma) + 4 * abs(wm->delta) >= 0x10000);
+}
+
+static int resolve_divisor_64(const uint64_t d, int *const shift) {
+    *shift = u64log2(d);
+    const int64_t e = d - (1LL << *shift);
+    const int64_t f = *shift > 8 ? (e + (1LL << (*shift - 9))) >> (*shift - 8) :
+                                   e << (8 - *shift);
+    assert(f <= 256);
+    *shift += 14;
+    // Use f as lookup into the precomputed table of multipliers
+    return div_lut[f];
+}
+
+static int get_mult_shift_ndiag(const int64_t px,
+                                const int idet, const int shift)
+{
+    const int64_t v1 = px * idet;
+    const int v2 = apply_sign64((llabs(v1) + ((1LL << shift) >> 1)) >> shift, v1);
+    return iclip(v2, -0x1fff, 0x1fff);
+}
+
+static int get_mult_shift_diag(const int64_t px,
+                               const int idet, const int shift)
+{
+    const int64_t v1 = px * idet;
+    const int v2 = apply_sign64((llabs(v1) + ((1LL << shift) >> 1)) >> shift, v1);
+    return iclip(v2, 0xe001, 0x11fff);
+}
+
+int find_affine_int(const int (*pts)[2][2], const int np,
+                    const int bw4, const int bh4,
+                    const mv mv, WarpedMotionParams *const wm,
+                    const int bx4, const int by4)
+{
+    int32_t *const mat = wm->matrix;
+    int a[2][2] = { { 0, 0 }, { 0, 0 } };
+    int bx[2] = { 0, 0 };
+    int by[2] = { 0, 0 };
+    const int rsuy = 2 * bh4 - 1;
+    const int rsux = 2 * bw4 - 1;
+    const int suy = rsuy * 8;
+    const int sux = rsux * 8;
+    const int duy = suy + mv.y;
+    const int dux = sux + mv.x;
+    const int isuy = by4 * 4 + rsuy;
+    const int isux = bx4 * 4 + rsux;
+
+    for (int i = 0; i < np; i++) {
+        const int dx = pts[i][1][0] - dux;
+        const int dy = pts[i][1][1] - duy;
+        const int sx = pts[i][0][0] - sux;
+        const int sy = pts[i][0][1] - suy;
+        if (abs(sx - dx) < 256 && abs(sy - dy) < 256) {
+            a[0][0] += ((sx * sx) >> 2) + sx * 2 + 8;
+            a[0][1] += ((sx * sy) >> 2) + sx + sy + 4;
+            a[1][1] += ((sy * sy) >> 2) + sy * 2 + 8;
+            bx[0] += ((sx * dx) >> 2) + sx + dx + 8;
+            bx[1] += ((sy * dx) >> 2) + sy + dx + 4;
+            by[0] += ((sx * dy) >> 2) + sx + dy + 4;
+            by[1] += ((sy * dy) >> 2) + sy + dy + 8;
+        }
+    }
+
+    // compute determinant of a
+    const int64_t det = (int64_t) a[0][0] * a[1][1] - (int64_t) a[0][1] * a[0][1];
+    if (det == 0) return 1;
+    int shift, idet = apply_sign64(resolve_divisor_64(llabs(det), &shift), det);
+    shift -= 16;
+    if (shift < 0) {
+        idet <<= -shift;
+        shift = 0;
+    }
+
+    // solve the least-squares
+    mat[2] = get_mult_shift_diag((int64_t) a[1][1] * bx[0] -
+                                 (int64_t) a[0][1] * bx[1], idet, shift);
+    mat[3] = get_mult_shift_ndiag((int64_t) a[0][0] * bx[1] -
+                                  (int64_t) a[0][1] * bx[0], idet, shift);
+    mat[4] = get_mult_shift_ndiag((int64_t) a[1][1] * by[0] -
+                                  (int64_t) a[0][1] * by[1], idet, shift);
+    mat[5] = get_mult_shift_diag((int64_t) a[0][0] * by[1] -
+                                 (int64_t) a[0][1] * by[0], idet, shift);
+
+    mat[0] = iclip(mv.x * 0x2000 - (isux * (mat[2] - 0x10000) + isuy * mat[3]),
+                   -0x800000, 0x7fffff);
+    mat[1] = iclip(mv.y * 0x2000 - (isux * mat[4] + isuy * (mat[5] - 0x10000)),
+                   -0x800000, 0x7fffff);
+
+    return 0;
+}
--- /dev/null
+++ b/src/warpmv.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_WARPMV_H__
+#define __DAV1D_SRC_WARPMV_H__
+
+#include "src/levels.h"
+
+int get_shear_params(WarpedMotionParams *wm);
+int find_affine_int(const int (*pts)[2][2], int np, int bw4, int bh4,
+                    mv mv, WarpedMotionParams *wm, int by, int bx);
+
+#endif /* __DAV1D_SRC_WARPMV_H__ */
--- /dev/null
+++ b/src/wedge.c
@@ -1,0 +1,334 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "src/wedge.h"
+
+enum WedgeDirectionType {
+    WEDGE_HORIZONTAL = 0,
+    WEDGE_VERTICAL = 1,
+    WEDGE_OBLIQUE27 = 2,
+    WEDGE_OBLIQUE63 = 3,
+    WEDGE_OBLIQUE117 = 4,
+    WEDGE_OBLIQUE153 = 5,
+    N_WEDGE_DIRECTIONS
+};
+
+typedef struct {
+    enum WedgeDirectionType direction;
+    int x_offset;
+    int y_offset;
+} wedge_code_type;
+
+static const wedge_code_type wedge_codebook_16_hgtw[16] = {
+    { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+    { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
+    { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
+    { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+    { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_hltw[16] = {
+    { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+    { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
+    { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
+    { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+    { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static const wedge_code_type wedge_codebook_16_heqw[16] = {
+    { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
+    { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
+    { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
+    { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
+    { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
+    { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
+    { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
+    { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
+};
+
+static uint8_t wedge_masks_444_32x32[2][16][32 * 32];
+static uint8_t wedge_masks_444_32x16[2][16][32 * 16];
+static uint8_t wedge_masks_444_32x8[2][16][32 * 8];
+static uint8_t wedge_masks_444_16x32[2][16][16 * 32];
+static uint8_t wedge_masks_444_16x16[2][16][16 * 16];
+static uint8_t wedge_masks_444_16x8[2][16][16 * 8];
+static uint8_t wedge_masks_444_8x32[2][16][8 * 32];
+static uint8_t wedge_masks_444_8x16[2][16][8 * 16];
+static uint8_t wedge_masks_444_8x8[2][16][8 * 8];
+
+static uint8_t wedge_masks_422_16x32[2][16][16 * 32];
+static uint8_t wedge_masks_422_16x16[2][16][16 * 16];
+static uint8_t wedge_masks_422_16x8[2][16][16 * 8];
+static uint8_t wedge_masks_422_8x32[2][16][8 * 32];
+static uint8_t wedge_masks_422_8x16[2][16][8 * 16];
+static uint8_t wedge_masks_422_8x8[2][16][8 * 8];
+static uint8_t wedge_masks_422_4x32[2][16][4 * 32];
+static uint8_t wedge_masks_422_4x16[2][16][4 * 16];
+static uint8_t wedge_masks_422_4x8[2][16][4 * 8];
+
+static uint8_t wedge_masks_420_16x16[2][16][16 * 16];
+static uint8_t wedge_masks_420_16x8[2][16][16 * 8];
+static uint8_t wedge_masks_420_16x4[2][16][16 * 4];
+static uint8_t wedge_masks_420_8x16[2][16][8 * 16];
+static uint8_t wedge_masks_420_8x8[2][16][8 * 8];
+static uint8_t wedge_masks_420_8x4[2][16][8 * 4];
+static uint8_t wedge_masks_420_4x16[2][16][4 * 16];
+static uint8_t wedge_masks_420_4x8[2][16][4 * 8];
+static uint8_t wedge_masks_420_4x4[2][16][4 * 4];
+
+const uint8_t *wedge_masks[N_BS_SIZES][3][2][16];
+
+static void insert_border(uint8_t *const dst, const uint8_t *src,
+                          const int ctr)
+{
+    if (ctr > 4) memset(dst, 0, ctr - 4);
+    memcpy(dst + imax(ctr, 4) - 4, src + imax(4 - ctr, 0), imin(64 - ctr, 8));
+    if (ctr < 64 - 4)
+        memset(dst + ctr + 4, 64, 64 - 4 - ctr);
+}
+
+static void transpose(uint8_t *const dst, const uint8_t *const src) {
+    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
+        for (int x = 0, x_off = 0; x < 64; x++, x_off += 64)
+            dst[x_off + y] = src[y_off + x];
+}
+
+static void hflip(uint8_t *const dst, const uint8_t *const src) {
+    for (int y = 0, y_off = 0; y < 64; y++, y_off += 64)
+        for (int x = 0; x < 64; x++)
+            dst[y_off + 64 - 1 - x] = src[y_off + x];
+}
+
+static void invert(uint8_t *const dst, const uint8_t *const src,
+                   const int w, const int h)
+{
+    for (int y = 0, y_off = 0; y < h; y++, y_off += w)
+        for (int x = 0; x < w; x++)
+            dst[y_off + x] = 64 - src[y_off + x];
+}
+
+static void copy2d(uint8_t *dst, const uint8_t *src,
+                   const int w, const int h, const int x_off, const int y_off)
+{
+    src += y_off * 64 + x_off;
+    for (int y = 0; y < h; y++) {
+        memcpy(dst, src, w);
+        src += 64;
+        dst += w;
+    }
+}
+
+static void init_chroma(uint8_t *chroma, const uint8_t *luma,
+                        const int sign, const int w, const int h, const int ss_ver)
+{
+    for (int y = 0; y < h; y += 1 + ss_ver) {
+        for (int x = 0; x < w; x += 2) {
+            int sum = luma[x] + luma[x + 1] + 1;
+            if (ss_ver) sum += luma[w + x] + luma[w + x + 1] + 1;
+            chroma[x >> 1] = (sum - sign) >> (1 + ss_ver);
+        }
+        luma += w << ss_ver;
+        chroma += w >> 1;
+    }
+}
+
+static void fill2d_16x2(uint8_t *dst, const int w, const int h,
+                        const enum BlockSize bs,
+                        const uint8_t (*const master)[64 * 64],
+                        const wedge_code_type *const cb,
+                        uint8_t (*masks_444)[16][w * h],
+                        uint8_t (*masks_422)[16][w * h >> 1],
+                        uint8_t (*masks_420)[16][w * h >> 2],
+                        const unsigned signs)
+{
+    uint8_t *ptr = dst;
+    for (int n = 0; n < 16; n++) {
+        copy2d(ptr, master[cb[n].direction], w, h,
+               32 - (w * cb[n].x_offset >> 3), 32 - (h * cb[n].y_offset >> 3));
+        ptr += w * h;
+    }
+    for (int n = 0, off = 0; n < 16; n++, off += w * h)
+        invert(ptr + off, dst + off, w, h);
+    // assign pointers in externally visible array
+    for (int n = 0; n < 16; n++) {
+        const int sign = (signs >> n) & 1;
+        wedge_masks[bs][0][0][n] = masks_444[ sign][n];
+        wedge_masks[bs][0][1][n] = masks_444[ sign][n];
+        wedge_masks[bs][1][0][n] = masks_422[ sign][n];
+        wedge_masks[bs][1][1][n] = masks_422[!sign][n];
+        wedge_masks[bs][2][0][n] = masks_420[ sign][n];
+        wedge_masks[bs][2][1][n] = masks_420[!sign][n];
+    }
+    for (int n = 0; n < 16; n++) {
+        // since the pointers come from inside, we know that
+        // violation of the const is OK here. Any other approach
+        // means we would have to duplicate the sign correction
+        // logic in two places, which isn't very nice, or mark
+        // the table faced externally as non-const, which also sucks
+        init_chroma((uint8_t *) wedge_masks[bs][1][0][n],
+                    wedge_masks[bs][0][0][n], 0, w, h, 0);
+        init_chroma((uint8_t *) wedge_masks[bs][1][1][n],
+                    wedge_masks[bs][0][0][n], 1, w, h, 0);
+        init_chroma((uint8_t *) wedge_masks[bs][2][0][n],
+                    wedge_masks[bs][0][0][n], 0, w, h, 1);
+        init_chroma((uint8_t *) wedge_masks[bs][2][1][n],
+                    wedge_masks[bs][0][0][n], 1, w, h, 1);
+    }
+}
+
+void av1_init_wedge_masks(void) {
+    static int done = 0;
+    if (done) return;
+    done = 1;
+
+    enum WedgeMasterLineType {
+        WEDGE_MASTER_LINE_ODD,
+        WEDGE_MASTER_LINE_EVEN,
+        WEDGE_MASTER_LINE_VERT,
+        N_WEDGE_MASTER_LINES,
+    };
+    static const uint8_t wedge_master_border[N_WEDGE_MASTER_LINES][8] = {
+        [WEDGE_MASTER_LINE_ODD]  = {  1,  2,  6, 18, 37, 53, 60, 63 },
+        [WEDGE_MASTER_LINE_EVEN] = {  1,  4, 11, 27, 46, 58, 62, 63 },
+        [WEDGE_MASTER_LINE_VERT] = {  0,  2,  7, 21, 43, 57, 62, 64 },
+    };
+    uint8_t master[6][64 * 64];
+
+    // create master templates
+    for (int y = 0, off = 0; y < 64; y++, off += 64)
+        insert_border(&master[WEDGE_VERTICAL][off],
+                      wedge_master_border[WEDGE_MASTER_LINE_VERT], 32);
+    for (int y = 0, off = 0, ctr = 48; y < 64; y += 2, off += 128, ctr--)
+    {
+        insert_border(&master[WEDGE_OBLIQUE63][off],
+                      wedge_master_border[WEDGE_MASTER_LINE_EVEN], ctr);
+        insert_border(&master[WEDGE_OBLIQUE63][off + 64],
+                      wedge_master_border[WEDGE_MASTER_LINE_ODD], ctr - 1);
+    }
+
+    transpose(master[WEDGE_OBLIQUE27], master[WEDGE_OBLIQUE63]);
+    transpose(master[WEDGE_HORIZONTAL], master[WEDGE_VERTICAL]);
+    hflip(master[WEDGE_OBLIQUE117], master[WEDGE_OBLIQUE63]);
+    hflip(master[WEDGE_OBLIQUE153], master[WEDGE_OBLIQUE27]);
+
+#define fill(w, h, sz_422, sz_420, hvsw, signs) \
+    fill2d_16x2((uint8_t *) wedge_masks_444_##w##x##h,  w, h, BS_##w##x##h, \
+                master, wedge_codebook_16_##hvsw, wedge_masks_444_##w##x##h, \
+                wedge_masks_422_##sz_422, wedge_masks_420_##sz_420, signs)
+
+    fill(32, 32, 16x32, 16x16, heqw, 0x7bfb);
+    fill(32, 16, 16x16, 16x8,  hltw, 0x7beb);
+    fill(32,  8, 16x8,  16x4,  hltw, 0x6beb);
+    fill(16, 32,  8x32,  8x16, hgtw, 0x7beb);
+    fill(16, 16,  8x16,  8x8,  heqw, 0x7bfb);
+    fill(16,  8,  8x8,   8x4,  hltw, 0x7beb);
+    fill( 8, 32,  4x32,  4x16, hgtw, 0x7aeb);
+    fill( 8, 16,  4x16,  4x8,  hgtw, 0x7beb);
+    fill( 8,  8,  4x8,   4x4,  heqw, 0x7bfb);
+#undef fill
+}
+
+static uint8_t ii_dc_mask[32 * 32];
+static uint8_t ii_nondc_mask_32x32[N_INTER_INTRA_PRED_MODES - 1][32 * 32];
+static uint8_t ii_nondc_mask_16x32[N_INTER_INTRA_PRED_MODES - 1][16 * 32];
+static uint8_t ii_nondc_mask_16x16[N_INTER_INTRA_PRED_MODES - 1][16 * 16];
+static uint8_t ii_nondc_mask_8x32[N_INTER_INTRA_PRED_MODES - 1][8 * 32];
+static uint8_t ii_nondc_mask_8x16[N_INTER_INTRA_PRED_MODES - 1][8 * 16];
+static uint8_t ii_nondc_mask_8x8[N_INTER_INTRA_PRED_MODES - 1][8 * 8];
+static uint8_t ii_nondc_mask_4x16[N_INTER_INTRA_PRED_MODES - 1][4 * 16];
+static uint8_t ii_nondc_mask_4x8[N_INTER_INTRA_PRED_MODES - 1][4 * 8];
+static uint8_t ii_nondc_mask_4x4[N_INTER_INTRA_PRED_MODES - 1][4 * 4];
+
+#define set1(sz) \
+    [II_DC_PRED] = ii_dc_mask, \
+    [II_VERT_PRED] = ii_nondc_mask_##sz[II_VERT_PRED - 1], \
+    [II_HOR_PRED] = ii_nondc_mask_##sz[II_HOR_PRED - 1], \
+    [II_SMOOTH_PRED] = ii_nondc_mask_##sz[II_SMOOTH_PRED - 1]
+#define set(sz_444, sz_422, sz_420) \
+    { { set1(sz_444) }, { set1(sz_422) }, { set1(sz_420) } }
+const uint8_t *const ii_masks[N_BS_SIZES][3][N_INTER_INTRA_PRED_MODES] = {
+    [BS_8x8]   = set( 8x8,   4x8,   4x4),
+    [BS_8x16]  = set( 8x16,  4x16,  4x8),
+    [BS_16x8]  = set(16x16,  8x8,   8x8),
+    [BS_16x16] = set(16x16,  8x16,  8x8),
+    [BS_16x32] = set(16x32,  8x32,  8x16),
+    [BS_32x16] = set(32x32, 16x16, 16x16),
+    [BS_32x32] = set(32x32, 16x32, 16x16),
+};
+#undef set
+#undef set1
+
+static void build_nondc_ii_masks(uint8_t *const mask_v,
+                                 uint8_t *const mask_h,
+                                 uint8_t *const mask_sm,
+                                 const int w, const int h, const int step)
+{
+    static const uint8_t ii_weights_1d[] = {
+        60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
+         6,  6,  5,  4,  4,  3,  3,  2,  2,  2,  2,  1,  1,  1,  1,  1,
+    };
+
+    for (int y = 0, off = 0; y < h; y++, off += w) {
+        memset(&mask_v[off], ii_weights_1d[y * step], w);
+        for (int x = 0; x < w; x++) {
+            mask_sm[off + x] = ii_weights_1d[imin(x, y) * step];
+            mask_h[off + x] = ii_weights_1d[x * step];
+        }
+    }
+}
+
+void av1_init_interintra_masks(void) {
+    static int done = 0;
+    if (done) return;
+    done = 1;
+
+    memset(ii_dc_mask, 32, 32 * 32);
+#define set(a) a[II_VERT_PRED - 1], a[II_HOR_PRED - 1], a[II_SMOOTH_PRED - 1]
+    build_nondc_ii_masks(set(ii_nondc_mask_32x32), 32, 32, 1);
+    build_nondc_ii_masks(set(ii_nondc_mask_16x32), 16, 32, 1);
+    build_nondc_ii_masks(set(ii_nondc_mask_16x16), 16, 16, 2);
+    build_nondc_ii_masks(set(ii_nondc_mask_8x32),   8, 32, 1);
+    build_nondc_ii_masks(set(ii_nondc_mask_8x16),   8, 16, 2);
+    build_nondc_ii_masks(set(ii_nondc_mask_8x8),    8,  8, 4);
+    build_nondc_ii_masks(set(ii_nondc_mask_4x16),   4, 16, 2);
+    build_nondc_ii_masks(set(ii_nondc_mask_4x8),    4,  8, 4);
+    build_nondc_ii_masks(set(ii_nondc_mask_4x4),    4,  4, 8);
+#undef set
+}
--- /dev/null
+++ b/src/wedge.h
@@ -1,0 +1,41 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_WEDGE_H__
+#define __DAV1D_SRC_WEDGE_H__
+
+#include "src/levels.h"
+
+void av1_init_wedge_masks(void);
+extern const uint8_t *wedge_masks[N_BS_SIZES][3 /* 444/luma, 422, 420 */]
+                                 [2 /* sign */][16 /* wedge_idx */];
+
+void av1_init_interintra_masks(void);
+extern const uint8_t *const ii_masks[N_BS_SIZES][3 /* 444/luma, 422, 420 */]
+                                    [N_INTER_INTRA_PRED_MODES];
+
+#endif /* __DAV1D_SRC_WEDGE_H__ */
--- /dev/null
+++ b/tools/dav1d.c
@@ -1,0 +1,173 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "dav1d/data.h"
+
+#include "include/version.h"
+
+#include "input/input.h"
+
+#include "output/output.h"
+
+#include "dav1d_cli_parse.h"
+
+static void print_stats(const int istty, const unsigned n,
+                        const unsigned num)
+{
+    const char *pre_string = istty ? "\r" : "";
+    const char *post_string = istty ? "" : "\n";
+
+    if (num == 0xFFFFFFFFU) {
+        fprintf(stderr, "%sDecoded %u frames%s", pre_string, n, post_string);
+    } else {
+        fprintf(stderr, "%sDecoded %u/%u frames (%.1lf%%)%s",
+                pre_string, n, num, 100.0 * n / num, post_string);
+    }
+}
+
+int main(const int argc, char *const *const argv) {
+    const int istty = isatty(fileno(stderr));
+    int res = 0;
+    CLISettings cli_settings;
+    Dav1dSettings lib_settings;
+    DemuxerContext *in;
+    MuxerContext *out = NULL;
+    Dav1dPicture p;
+    Dav1dContext *c;
+    Dav1dData data;
+    unsigned n_out = 0, total, fps[2];
+    const char *version = dav1d_version();
+
+    if (strcmp(version, DAV1D_VERSION)) {
+        fprintf(stderr, "Version mismatch (library: %s, executable: %s)\n",
+                version, DAV1D_VERSION);
+        return -1;
+    }
+
+    dav1d_init();
+    init_demuxers();
+    init_muxers();
+    parse(argc, argv, &cli_settings, &lib_settings);
+
+    if ((res = input_open(&in, cli_settings.inputfile,
+                          fps, &total)) < 0)
+    {
+        return res;
+    }
+    for (unsigned i = 0; i <= cli_settings.skip; i++) {
+        if ((res = input_read(in, &data)) < 0)
+            return res;
+        if (i < cli_settings.skip) dav1d_data_unref(&data);
+    }
+
+    if (!cli_settings.quiet)
+        fprintf(stderr, "Dav1d %s - by Two Orioles\n", DAV1D_VERSION);
+
+    //getc(stdin);
+    if (cli_settings.limit != 0 && cli_settings.limit < total)
+        total = cli_settings.limit;
+
+    if ((res = dav1d_open(&c, &lib_settings)))
+        return res;
+
+    do {
+        memset(&p, 0, sizeof(p));
+        if ((res = dav1d_decode(c, &data, &p)) < 0) {
+            if (res != -EAGAIN) {
+                fprintf(stderr, "Error decoding frame: %s\n",
+                        strerror(-res));
+                break;
+            }
+            res = 0;
+        } else {
+            if (!n_out) {
+                if ((res = output_open(&out, cli_settings.muxer,
+                                       cli_settings.outputfile,
+                                       &p.p, fps)) < 0)
+                {
+                    return res;
+                }
+            }
+            if ((res = output_write(out, &p)) < 0)
+                break;
+            n_out++;
+            if (!cli_settings.quiet)
+                print_stats(istty, n_out, total);
+        }
+
+        if (cli_settings.limit && n_out == cli_settings.limit)
+            break;
+    } while (data.sz > 0 || !input_read(in, &data));
+
+    // flush
+    if (res == 0) while (!cli_settings.limit || n_out < cli_settings.limit) {
+        if ((res = dav1d_decode(c, NULL, &p)) < 0) {
+            if (res != -EAGAIN) {
+                fprintf(stderr, "Error decoding frame: %s\n",
+                        strerror(-res));
+            } else
+                res = 0;
+            break;
+        } else {
+            if (!n_out) {
+                if ((res = output_open(&out, cli_settings.muxer,
+                                       cli_settings.outputfile,
+                                       &p.p, fps)) < 0)
+                {
+                    return res;
+                }
+            }
+            if ((res = output_write(out, &p)) < 0)
+                break;
+            n_out++;
+            if (!cli_settings.quiet)
+                print_stats(istty, n_out, total);
+        }
+    }
+
+    input_close(in);
+    if (out) {
+        if (!cli_settings.quiet && istty)
+            fprintf(stderr, "\n");
+        output_close(out);
+    } else {
+        fprintf(stderr, "No data decoded\n");
+        res = 1;
+    }
+    dav1d_close(c);
+
+    return res;
+}
--- /dev/null
+++ b/tools/dav1d_cli_parse.c
@@ -1,0 +1,160 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <getopt.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "dav1d_cli_parse.h"
+
+static const char short_opts[] = "i:o:vql:";
+
+enum {
+    ARG_MUXER,
+    ARG_FRAME_THREADS,
+    ARG_TILE_THREADS,
+};
+
+static const struct option long_opts[] = {
+    { "input",          1, NULL, 'i' },
+    { "output",         1, NULL, 'o' },
+    { "quiet",          0, NULL, 'q' },
+    { "muxer",          1, NULL, ARG_MUXER },
+    { "version",        0, NULL, 'v' },
+    { "limit",          1, NULL, 'l' },
+    { "skip",           1, NULL, 's' },
+    { "framethreads",   1, NULL, ARG_FRAME_THREADS },
+    { "tilethreads",    1, NULL, ARG_TILE_THREADS },
+    { NULL },
+};
+
+static void usage(const char *const app, const char *const reason, ...) {
+    if (reason) {
+        va_list args;
+
+        va_start(args, reason);
+        vfprintf(stderr, reason, args);
+        va_end(args);
+        fprintf(stderr, "\n\n");
+    }
+    fprintf(stderr, "Usage: %s [options]\n\n", app);
+    fprintf(stderr, "Supported options:\n"
+            " --input/-i  $file:   input file\n"
+            " --output/-o $file:   output file\n"
+            " --muxer $name:       force muxer type (default: detect from extension)\n"
+            " --quiet/-q:          disable status messages\n"
+            " --limit/-l $num:     stop decoding after $num frames\n"
+            " --skip/-s $num:      skip decoding of the first $num frames\n"
+            " --version/-v:        print version and exit\n"
+            " --framethreads $num: number of frame threads (default: 1)\n"
+            " --tilethreads $num:  number of tile threads (default: 1)\n");
+    exit(1);
+}
+
+static void error(const char *const app, const char *const optarg,
+                  const int option, const char *const shouldbe)
+{
+    char optname[256];
+    int n;
+
+    for (n = 0; long_opts[n].name; n++)
+        if (long_opts[n].val == option)
+            break;
+    assert(long_opts[n].name);
+    if (long_opts[n].val < 256) {
+        sprintf(optname, "-%c/--%s", long_opts[n].val, long_opts[n].name);
+    } else {
+        sprintf(optname, "--%s", long_opts[n].name);
+    }
+
+    usage(app, "Invalid argument \"%s\" for option %s; should be %s",
+          optarg, optname, shouldbe);
+}
+
+static unsigned parse_unsigned(char *optarg, const int option, const char *app) {
+    char *end;
+    const double res = strtoul(optarg, &end, 0);
+    if (*end || end == optarg) error(app, optarg, option, "an integer");
+    return res;
+}
+
+void parse(const int argc, char *const *const argv,
+           CLISettings *const cli_settings, Dav1dSettings *const lib_settings)
+{
+    int o;
+
+    memset(cli_settings, 0, sizeof(*cli_settings));
+    dav1d_default_settings(lib_settings);
+
+    while ((o = getopt_long(argc, argv, short_opts, long_opts, NULL)) >= 0) {
+        switch (o) {
+        case 'o':
+            cli_settings->outputfile = optarg;
+            break;
+        case 'i':
+            cli_settings->inputfile = optarg;
+            break;
+        case 'q':
+            cli_settings->quiet = 1;
+            break;
+        case 'l':
+            cli_settings->limit = parse_unsigned(optarg, 'l', argv[0]);
+            break;
+        case 's':
+            cli_settings->skip = parse_unsigned(optarg, 's', argv[0]);
+            break;
+        case ARG_MUXER:
+            cli_settings->muxer = optarg;
+            break;
+        case ARG_FRAME_THREADS:
+            lib_settings->n_frame_threads =
+                parse_unsigned(optarg, ARG_FRAME_THREADS, argv[0]);
+            break;
+        case ARG_TILE_THREADS:
+            lib_settings->n_tile_threads =
+                parse_unsigned(optarg, ARG_TILE_THREADS, argv[0]);
+            break;
+        case 'v':
+            fprintf(stderr, "%s\n", dav1d_version());
+            exit(0);
+        default:
+            break;
+        }
+    }
+
+    if (!cli_settings->inputfile)
+        usage(argv[0], "Input file (-i/--input) is required");
+    if (!cli_settings->outputfile)
+        usage(argv[0], "Output file (-o/--output) is required");
+}
--- /dev/null
+++ b/tools/dav1d_cli_parse.h
@@ -1,0 +1,44 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_CLI_PARSE_H__
+#define __DAV1D_CLI_PARSE_H__
+
+#include "dav1d.h"
+
+typedef struct {
+    const char *outputfile;
+    const char *inputfile;
+    const char *muxer;
+    unsigned limit, skip;
+    int quiet;
+} CLISettings;
+
+void parse(const int argc, char *const *const argv,
+           CLISettings *const cli_settings, Dav1dSettings *const lib_settings);
+
+#endif /* __DAV1D_CLI_PARSE_H__ */
--- /dev/null
+++ b/tools/input/demuxer.h
@@ -1,0 +1,44 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_INPUT_DEMUXER_H__
+#define __DAV1D_INPUT_DEMUXER_H__
+
+#include "data.h"
+
+typedef struct DemuxerPriv DemuxerPriv;
+typedef struct Demuxer {
+    int priv_data_size;
+    const char *name;
+    const char *extension;
+    int (*open)(DemuxerPriv *ctx, const char *filename,
+                unsigned fps[2], unsigned *num_frames);
+    int (*read)(DemuxerPriv *ctx, Dav1dData *data);
+    void (*close)(DemuxerPriv *ctx);
+} Demuxer;
+
+#endif /* __DAV1D_INPUT_DEMUXER_H__ */
--- /dev/null
+++ b/tools/input/input.c
@@ -1,0 +1,124 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "input/input.h"
+#include "input/demuxer.h"
+
+struct DemuxerContext {
+    DemuxerPriv *data;
+    const Demuxer *impl;
+};
+
+#define MAX_NUM_DEMUXERS 1
+static const Demuxer *demuxers[MAX_NUM_DEMUXERS];
+static int num_demuxers = 0;
+
+#define register_demuxer(impl) { \
+    extern const Demuxer impl; \
+    assert(num_demuxers < MAX_NUM_DEMUXERS); \
+    demuxers[num_demuxers++] = &impl; \
+}
+
+void init_demuxers(void) {
+    register_demuxer(ivf_demuxer);
+}
+
+static const char *find_extension(const char *const f) {
+    const int l = strlen(f);
+
+    if (l == 0) return NULL;
+
+    const char *const end = &f[l - 1], *step = end;
+    while ((*step >= 'a' && *step <= 'z') ||
+           (*step >= 'A' && *step <= 'Z') ||
+           (*step >= '0' && *step <= '9'))
+    {
+        step--;
+    }
+
+    return (step < end && step > f && *step == '.' && step[-1] != '/') ?
+           &step[1] : NULL;
+}
+
+int input_open(DemuxerContext **const c_out, const char *const filename,
+               unsigned fps[2], unsigned *const num_frames)
+{
+    const Demuxer *impl;
+    DemuxerContext *c;
+    int res, i;
+
+    const char *const ext = find_extension(filename);
+    if (!ext) {
+        fprintf(stderr, "No extension found for file %s\n", filename);
+        return -1;
+    }
+
+    for (i = 0; i < num_demuxers; i++) {
+        if (!strcmp(demuxers[i]->extension, ext)) {
+            impl = demuxers[i];
+            break;
+        }
+    }
+    if (i == num_demuxers) {
+        fprintf(stderr,
+                "Failed to find demuxer for file %s (\"%s\")\n",
+                filename, ext);
+        return -ENOPROTOOPT;
+    }
+
+    if (!(c = malloc(sizeof(DemuxerContext) + impl->priv_data_size))) {
+        fprintf(stderr, "Failed to allocate memory\n");
+        return -ENOMEM;
+    }
+    memset(c, 0, sizeof(DemuxerContext) + impl->priv_data_size);
+    c->impl = impl;
+    c->data = (DemuxerPriv *) &c[1];
+    if ((res = impl->open(c->data, filename, fps, num_frames)) < 0) {
+        free(c);
+        return res;
+    }
+    *c_out = c;
+
+    return 0;
+}
+
+int input_read(DemuxerContext *const ctx, Dav1dData *const data) {
+    return ctx->impl->read(ctx->data, data);
+}
+
+void input_close(DemuxerContext *const ctx) {
+    ctx->impl->close(ctx->data);
+    free(ctx);
+}
--- /dev/null
+++ b/tools/input/input.h
@@ -1,0 +1,41 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_INPUT_INPUT_H__
+#define __DAV1D_INPUT_INPUT_H__
+
+#include "data.h"
+
+typedef struct DemuxerContext DemuxerContext;
+
+void init_demuxers(void);
+int input_open(DemuxerContext **c, const char *filename,
+               unsigned fps[2], unsigned *num_frames);
+int input_read(DemuxerContext *ctx, Dav1dData *data);
+void input_close(DemuxerContext *ctx);
+
+#endif /* __DAV1D_INPUT_INPUT_H__ */
--- /dev/null
+++ b/tools/input/ivf.c
@@ -1,0 +1,113 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "common/intops.h"
+
+#include "input/demuxer.h"
+
+typedef struct DemuxerPriv {
+    FILE *f;
+} IvfInputContext;
+
+static int ivf_open(IvfInputContext *const c, const char *const file,
+                    unsigned fps[2], unsigned *const num_frames)
+{
+    int res;
+    uint8_t hdr[32];
+
+    memset(c, 0, sizeof(*c));
+    if (!(c->f = fopen(file, "r"))) {
+        fprintf(stderr, "Failed to open %s: %s\n", file, strerror(errno));
+        return -1;
+    } else if ((res = fread(hdr, 32, 1, c->f)) != 1) {
+        fprintf(stderr, "Failed to read stream header: %s\n", strerror(errno));
+        fclose(c->f);
+        return -1;
+    } else if (memcmp(hdr, "DKIF", 4)) {
+        fprintf(stderr, "%s is not an IVF file [tag=%4s|0x%02x%02x%02x%02x]\n",
+                file, hdr, hdr[0], hdr[1], hdr[2], hdr[3]);
+        fclose(c->f);
+        return -1;
+    } else if (memcmp(&hdr[8], "AV01", 4)) {
+        fprintf(stderr, "%s is not an AV1 file [tag=%4s|0x%02x%02x%02x%02x]\n",
+                file, &hdr[8], hdr[8], hdr[9], hdr[10], hdr[11]);
+        fclose(c->f);
+        return -1;
+    }
+
+    fps[0] = rl32(&hdr[16]);
+    fps[1] = rl32(&hdr[20]);
+    const unsigned duration = rl32(&hdr[24]);
+    uint8_t data[4];
+    for (*num_frames = 0;; (*num_frames)++) {
+        if ((res = fread(data, 4, 1, c->f)) != 1)
+            break; // EOF
+        fseek(c->f, rl32(data) + 8, SEEK_CUR);
+    }
+    fps[0] *= *num_frames;
+    fps[1] *= duration;
+    fseek(c->f, 32, SEEK_SET);
+
+    return 0;
+}
+
+static int ivf_read(IvfInputContext *const c, Dav1dData *const buf) {
+    uint8_t data[4];
+    int res;
+
+    if ((res = fread(data, 4, 1, c->f)) != 1)
+        return -1; // EOF
+    fseek(c->f, 8, SEEK_CUR); // skip timestamp
+    const ptrdiff_t sz = rl32(data);
+    dav1d_data_create(buf, sz);
+    if ((res = fread(buf->data, sz, 1, c->f)) != 1)
+        fprintf(stderr, "Failed to read frame data: %s\n", strerror(errno));
+
+    return 0;
+}
+
+static void ivf_close(IvfInputContext *const c) {
+    fclose(c->f);
+}
+
+const Demuxer ivf_demuxer = {
+    .priv_data_size = sizeof(IvfInputContext),
+    .name = "ivf",
+    .extension = "ivf",
+    .open = ivf_open,
+    .read = ivf_read,
+    .close = ivf_close,
+};
--- /dev/null
+++ b/tools/output/md5.c
@@ -1,0 +1,220 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "common/intops.h"
+
+#include "output/muxer.h"
+
+static const uint8_t s[][4] = {
+    { 7, 12, 17, 22, },
+    { 5,  9, 14, 20, },
+    { 4, 11, 16, 23, },
+    { 6, 10, 15, 21, },
+};
+
+static const unsigned k[] = {
+    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
+    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
+    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
+    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
+    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
+    0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
+    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
+    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
+    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
+    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
+    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
+    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
+    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
+    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
+    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
+    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391,
+};
+
+typedef struct MuxerPriv {
+    unsigned abcd[4];
+    uint8_t data[64];
+    uint64_t len;
+    FILE *f;
+} MD5Context;
+
+static int md5_open(MD5Context *const md5, const char *const file,
+                    const Dav1dPictureParameters *const p,
+                    const unsigned fps[2])
+{
+    if (!strcmp(file, "-")) {
+        md5->f = stdout;
+    } else if (!(md5->f = fopen(file, "w"))) {
+        fprintf(stderr, "Failed to open %s: %s\n", file, strerror(errno));
+        return -1;
+    }
+
+    md5->abcd[0] = 0x67452301;
+    md5->abcd[1] = 0xefcdab89;
+    md5->abcd[2] = 0x98badcfe;
+    md5->abcd[3] = 0x10325476;
+    md5->len = 0;
+
+    return 0;
+}
+
+static inline unsigned leftrotate(const unsigned x, const unsigned c) {
+    return (x << c) | (x >> (32 - c));
+}
+
+static void md5_body(MD5Context *md5, const uint8_t *const _data) {
+    const uint32_t *data = (uint32_t *) _data;
+
+    unsigned a = md5->abcd[0];
+    unsigned b = md5->abcd[1];
+    unsigned c = md5->abcd[2];
+    unsigned d = md5->abcd[3];
+    unsigned i;
+
+    for (i = 0; i < 64; i++) {
+        unsigned f, g, tmp;
+
+        if (i < 16) {
+            f = (b & c) | (~b & d);
+            g = i;
+        } else if (i < 32) {
+            f = (d & b) | (~d & c);
+            g = (5 * i + 1) & 15;
+        } else if (i < 48) {
+            f = b ^ c ^ d;
+            g = (3 * i + 5) & 15;
+        } else {
+            f = c ^ (b | ~d);
+            g = (7 * i) & 15;
+        }
+
+        tmp = d;
+        d = c;
+        c = b;
+        b += leftrotate(a + f + k[i] + data[g], s[i >> 4][i & 3]);
+        a = tmp;
+    }
+
+    md5->abcd[0] += a;
+    md5->abcd[1] += b;
+    md5->abcd[2] += c;
+    md5->abcd[3] += d;
+}
+
+static void md5_update(MD5Context *const md5, const uint8_t *data, unsigned len) {
+    if (!len) return;
+
+    if (md5->len & 63) {
+        const unsigned tmp = imin(len, 64 - (md5->len & 63));
+
+        memcpy(&md5->data[md5->len & 63], data, tmp);
+        len -= tmp;
+        data += tmp;
+        md5->len += tmp;
+        if (!(md5->len & 63))
+            md5_body(md5, md5->data);
+    }
+
+    while (len >= 64) {
+        md5_body(md5, data);
+        md5->len += 64;
+        data += 64;
+        len -= 64;
+    }
+
+    if (len) {
+        memcpy(md5->data, data, len);
+        md5->len += len;
+    }
+}
+
+static int md5_write(MD5Context *const md5, Dav1dPicture *const p) {
+    const int hbd = p->p.bpc > 8;
+    const int w = p->p.w, h = p->p.h;
+    uint8_t *yptr = p->data[0];
+
+    for (int y = 0; y < h; y++) {
+        md5_update(md5, yptr, w << hbd);
+        yptr += p->stride[0];
+    }
+
+    if (p->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+        const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int cw = (w + ss_hor) >> ss_hor;
+        const int ch = (h + ss_ver) >> ss_ver;
+        for (int pl = 1; pl <= 2; pl++) {
+            uint8_t *uvptr = p->data[pl];
+
+            for (int y = 0; y < ch; y++) {
+                md5_update(md5, uvptr, cw << hbd);
+                uvptr += p->stride[1];
+            }
+        }
+    }
+
+    dav1d_picture_unref(p);
+
+    return 0;
+}
+
+static void md5_close(MD5Context *const md5) {
+    static const uint8_t bit[2] = { 0x80, 0x00 };
+    uint64_t len = md5->len << 3;
+
+    md5_update(md5, &bit[0], 1);
+    while ((md5->len & 63) != 56)
+        md5_update(md5, &bit[1], 1);
+    md5_update(md5, (uint8_t *) &len, 8);
+    for (int i = 0; i < 4; i++)
+        fprintf(md5->f, "%2.2x%2.2x%2.2x%2.2x",
+                md5->abcd[i] & 0xff,
+                (md5->abcd[i] >> 8) & 0xff,
+                (md5->abcd[i] >> 16) & 0xff,
+                md5->abcd[i] >> 24);
+    fprintf(md5->f, "\n");
+
+    if (md5->f != stdin)
+        fclose(md5->f);
+}
+
+const Muxer md5_muxer = {
+    .priv_data_size = sizeof(MD5Context),
+    .name = "md5",
+    .extension = "md5",
+    .write_header = md5_open,
+    .write_picture = md5_write,
+    .write_trailer = md5_close,
+};
--- /dev/null
+++ b/tools/output/muxer.h
@@ -1,0 +1,44 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_OUTPUT_MUXER_H__
+#define __DAV1D_OUTPUT_MUXER_H__
+
+#include "picture.h"
+
+typedef struct MuxerPriv MuxerPriv;
+typedef struct Muxer {
+    int priv_data_size;
+    const char *name;
+    const char *extension;
+    int (*write_header)(MuxerPriv *ctx, const char *filename,
+                        const Dav1dPictureParameters *p, const unsigned fps[2]);
+    int (*write_picture)(MuxerPriv *ctx, Dav1dPicture *p);
+    void (*write_trailer)(MuxerPriv *ctx);
+} Muxer;
+
+#endif /* __DAV1D_OUTPUT_MUXER_H__ */
--- /dev/null
+++ b/tools/output/output.c
@@ -1,0 +1,141 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "output/output.h"
+#include "output/muxer.h"
+
+struct MuxerContext {
+    MuxerPriv *data;
+    const Muxer *impl;
+};
+
+#define MAX_NUM_MUXERS 3
+static const Muxer *muxers[MAX_NUM_MUXERS];
+static int num_muxers = 0;
+
+#define register_muxer(impl) { \
+    extern const Muxer impl; \
+    assert(num_muxers < MAX_NUM_MUXERS); \
+    muxers[num_muxers++] = &impl; \
+}
+
+void init_muxers(void) {
+    register_muxer(md5_muxer);
+    register_muxer(yuv_muxer);
+    register_muxer(y4m2_muxer);
+}
+
+static const char *find_extension(const char *const f) {
+    const int l = strlen(f);
+
+    if (l == 0) return NULL;
+
+    const char *const end = &f[l - 1], *step = end;
+    while ((*step >= 'a' && *step <= 'z') ||
+           (*step >= 'A' && *step <= 'Z') ||
+           (*step >= '0' && *step <= '9'))
+    {
+        step--;
+    }
+
+    return (step < end && step > f && *step == '.' && step[-1] != '/') ?
+           &step[1] : NULL;
+}
+
+int output_open(MuxerContext **const c_out,
+                const char *const name, const char *const filename,
+                const Dav1dPictureParameters *const p, const unsigned fps[2])
+{
+    const Muxer *impl;
+    MuxerContext *c;
+    int res, i;
+
+    if (name) {
+        for (i = 0; i < num_muxers; i++) {
+            if (!strcmp(muxers[i]->name, name)) {
+                impl = muxers[i];
+                break;
+            }
+        }
+        if (i == num_muxers) {
+            fprintf(stderr, "Failed to find muxer named \"%s\"\n", name);
+            return -ENOPROTOOPT;
+        }
+    } else {
+        const char *ext = find_extension(filename);
+        if (!ext) {
+            fprintf(stderr, "No extension found for file %s\n", filename);
+            return -1;
+        }
+        for (i = 0; i < num_muxers; i++) {
+            if (!strcmp(muxers[i]->extension, ext)) {
+                impl = muxers[i];
+                break;
+            }
+        }
+        if (i == num_muxers) {
+            fprintf(stderr, "Failed to find muxer for extension \"%s\"\n", ext);
+            return -ENOPROTOOPT;
+        }
+    }
+
+    if (!(c = malloc(sizeof(MuxerContext) + impl->priv_data_size))) {
+        fprintf(stderr, "Failed to allocate memory\n");
+        return -ENOMEM;
+    }
+    c->impl = impl;
+    c->data = (MuxerPriv *) &c[1];
+    if ((res = impl->write_header(c->data, filename, p, fps)) < 0) {
+        free(c);
+        return res;
+    }
+    *c_out = c;
+
+    return 0;
+}
+
+int output_write(MuxerContext *const ctx, Dav1dPicture *const p) {
+    int res;
+
+    if ((res = ctx->impl->write_picture(ctx->data, p)) < 0)
+        return res;
+
+    return 0;
+}
+
+void output_close(MuxerContext *const ctx) {
+    ctx->impl->write_trailer(ctx->data);
+    free(ctx);
+}
--- /dev/null
+++ b/tools/output/output.h
@@ -1,0 +1,41 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_OUTPUT_OUTPUT_H__
+#define __DAV1D_OUTPUT_OUTPUT_H__
+
+#include "picture.h"
+
+typedef struct MuxerContext MuxerContext;
+
+void init_muxers(void);
+int output_open(MuxerContext **c, const char *name, const char *filename,
+                const Dav1dPictureParameters *p, const unsigned fps[2]);
+int output_write(MuxerContext *ctx, Dav1dPicture *pic);
+void output_close(MuxerContext *ctx);
+
+#endif /* __DAV1D_OUTPUT_OUTPUT_H__ */
--- /dev/null
+++ b/tools/output/y4m2.c
@@ -1,0 +1,114 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "output/muxer.h"
+
+typedef struct MuxerPriv {
+    FILE *f;
+} Y4m2OutputContext;
+
+static int y4m2_open(Y4m2OutputContext *const c, const char *const file,
+                     const Dav1dPictureParameters *p, const unsigned fps[2])
+{
+    if (!strcmp(file, "-")) {
+        c->f = stdin;
+    } else if (!(c->f = fopen(file, "w"))) {
+        fprintf(stderr, "Failed to open %s: %s\n", file, strerror(errno));
+        return -1;
+    }
+
+    static const char *const ss_name[][2] = {
+        [DAV1D_PIXEL_LAYOUT_I400] = { "mono", "mono10" },
+        [DAV1D_PIXEL_LAYOUT_I420] = { "420jpeg", "420p10" },
+        [DAV1D_PIXEL_LAYOUT_I422] = { "422", "422p10" },
+        [DAV1D_PIXEL_LAYOUT_I444] = { "444", "444p10" }
+    };
+    fprintf(c->f, "YUV4MPEG2 W%d H%d C%s Ip F%d:%d\n",
+            p->w, p->h, ss_name[p->layout][p->bpc > 8], fps[0], fps[1]);
+
+    return 0;
+}
+
+static int y4m2_write(Y4m2OutputContext *const c, Dav1dPicture *const p) {
+    fprintf(c->f, "FRAME\n");
+
+    uint8_t *ptr;
+    const int hbd = p->p.bpc > 8;
+
+    ptr = p->data[0];
+    for (int y = 0; y < p->p.h; y++) {
+        if (fwrite(ptr, p->p.w << hbd, 1, c->f) != 1)
+            goto error;
+        ptr += p->stride[0];
+    }
+
+    if (p->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+        // u/v
+        const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int cw = (p->p.w + ss_hor) >> ss_hor;
+        const int ch = (p->p.h + ss_ver) >> ss_ver;
+        for (int pl = 1; pl <= 2; pl++) {
+            ptr = p->data[pl];
+            for (int y = 0; y < ch; y++) {
+                if (fwrite(ptr, cw << hbd, 1, c->f) != 1)
+                    goto error;
+                ptr += p->stride[1];
+            }
+        }
+    }
+
+    dav1d_picture_unref(p);
+    return 0;
+
+error:
+    dav1d_picture_unref(p);
+    fprintf(stderr, "Failed to write frame data: %s\n", strerror(errno));
+    return -1;
+}
+
+static void y4m2_close(Y4m2OutputContext *const c) {
+    if (c->f != stdin)
+        fclose(c->f);
+}
+
+const Muxer y4m2_muxer = {
+    .priv_data_size = sizeof(Y4m2OutputContext),
+    .name = "yuv4mpeg2",
+    .extension = "y4m",
+    .write_header = y4m2_open,
+    .write_picture = y4m2_write,
+    .write_trailer = y4m2_close,
+};
--- /dev/null
+++ b/tools/output/yuv.c
@@ -1,0 +1,104 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "output/muxer.h"
+
+typedef struct MuxerPriv {
+    FILE *f;
+} YuvOutputContext;
+
+static int yuv_open(YuvOutputContext *const c, const char *const file,
+                    const Dav1dPictureParameters *const p,
+                    const unsigned fps[2])
+{
+    if (!strcmp(file, "-")) {
+        c->f = stdin;
+    } else if (!(c->f = fopen(file, "w"))) {
+        fprintf(stderr, "Failed to open %s: %s\n", file, strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int yuv_write(YuvOutputContext *const c, Dav1dPicture *const p) {
+    uint8_t *ptr;
+    const int hbd = p->p.bpc > 8;
+
+    ptr = p->data[0];
+    for (int y = 0; y < p->p.h; y++) {
+        if (fwrite(ptr, p->p.w << hbd, 1, c->f) != 1)
+            goto error;
+        ptr += p->stride[0];
+    }
+
+    if (p->p.layout != DAV1D_PIXEL_LAYOUT_I400) {
+        // u/v
+        const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+        const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
+        const int cw = (p->p.w + ss_hor) >> ss_hor;
+        const int ch = (p->p.h + ss_ver) >> ss_ver;
+        for (int pl = 1; pl <= 2; pl++) {
+            ptr = p->data[pl];
+            for (int y = 0; y < ch; y++) {
+                if (fwrite(ptr, cw << hbd, 1, c->f) != 1)
+                    goto error;
+                ptr += p->stride[1];
+            }
+        }
+    }
+
+    dav1d_picture_unref(p);
+    return 0;
+
+error:
+    dav1d_picture_unref(p);
+    fprintf(stderr, "Failed to write frame data: %s\n", strerror(errno));
+    return -1;
+}
+
+static void yuv_close(YuvOutputContext *const c) {
+    if (c->f != stdin)
+        fclose(c->f);
+}
+
+const Muxer yuv_muxer = {
+    .priv_data_size = sizeof(YuvOutputContext),
+    .name = "yuv",
+    .extension = "yuv",
+    .write_header = yuv_open,
+    .write_picture = yuv_write,
+    .write_trailer = yuv_close,
+};