shithub: dav1d

--- a/src/mc_tmpl.c

+++ b/src/mc_tmpl.c

@@ -39,9 +39,14 @@

 #if BITDEPTH == 8

 #define get_intermediate_bits(bitdepth_max) 4

+// Output in interval [-5132, 9212], fits in int16_t as is

+#define PREP_BIAS 0

 #else

 // 4 for 10 bits/component, 2 for 12 bits/component

 #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max))

+// Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit)

+// Subtract a bias to ensure the output fits in int16_t

+#define PREP_BIAS 8192

 #endif

 static NOINLINE void

@@ -63,7 +68,7 @@

     const int intermediate_bits = get_intermediate_bits(bitdepth_max);

     do {

         for (int x = 0; x < w; x++)

-            tmp[x] = src[x] << intermediate_bits;

+            tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS;

         tmp += w;

         src += src_stride;

@@ -237,8 +242,12 @@

             mid_ptr = mid + 128 * 3;

             do {

-                for (int x = 0; x < w; x++)

-                    tmp[x] = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6);

+                for (int x = 0; x < w; x++) {

+                    int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) -

+                                  PREP_BIAS;

+                    assert(t >= INT16_MIN && t <= INT16_MAX);

+                    tmp[x] = t;

+                }

                 mid_ptr += 128;

                 tmp += w;

@@ -247,7 +256,8 @@

             do {

                 for (int x = 0; x < w; x++)

                     tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,

-                                                   6 - intermediate_bits);

+                                                   6 - intermediate_bits) -

+                             PREP_BIAS;

                 tmp += w;

                 src += src_stride;

@@ -257,7 +267,8 @@

         do {

             for (int x = 0; x < w; x++)

                 tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride,

-                                               6 - intermediate_bits);

+                                               6 - intermediate_bits) -

+                         PREP_BIAS;

             tmp += w;

             src += src_stride;

@@ -302,7 +313,8 @@

         GET_V_FILTER(my >> 6);

         for (x = 0; x < w; x++)

-            tmp[x] = fv ? DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) : mid_ptr[x];

+            tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6)

+                         : mid_ptr[x]) - PREP_BIAS;

         my += dy;

         mid_ptr += (my >> 10) * 128;

@@ -499,7 +511,8 @@

             mid_ptr = mid;

             do {

                 for (int x = 0; x < w; x++)

-                    tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4);

+                    tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) -

+                             PREP_BIAS;

                 mid_ptr += 128;

                 tmp += w;

@@ -508,7 +521,8 @@

             do {

                 for (int x = 0; x < w; x++)

                     tmp[x] = FILTER_BILIN_RND(src, x, mx, 1,

-                                              4 - intermediate_bits);

+                                              4 - intermediate_bits) -

+                             PREP_BIAS;

                 tmp += w;

                 src += src_stride;

@@ -518,7 +532,7 @@

         do {

             for (int x = 0; x < w; x++)

                 tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride,

-                                          4 - intermediate_bits);

+                                          4 - intermediate_bits) - PREP_BIAS;

             tmp += w;

             src += src_stride;

@@ -557,7 +571,7 @@

         int x;

         for (x = 0; x < w; x++)

-            tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my >> 6, 128, 4);

+            tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my >> 6, 128, 4) - PREP_BIAS;

         my += dy;

         mid_ptr += (my >> 10) * 128;

@@ -571,7 +585,8 @@

                   HIGHBD_DECL_SUFFIX)

     const int intermediate_bits = get_intermediate_bits(bitdepth_max);

-    const int sh = intermediate_bits + 1, rnd = 1 << intermediate_bits;

+    const int sh = intermediate_bits + 1;

+    const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2;

     do {

         for (int x = 0; x < w; x++)

             dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh);

@@ -587,7 +602,8 @@

                     const int weight HIGHBD_DECL_SUFFIX)

     const int intermediate_bits = get_intermediate_bits(bitdepth_max);

-    const int sh = intermediate_bits + 4, rnd = 8 << intermediate_bits;

+    const int sh = intermediate_bits + 4;

+    const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16;

     do {

         for (int x = 0; x < w; x++)

             dst[x] = iclip_pixel((tmp1[x] * weight +

@@ -604,7 +620,8 @@

                    const uint8_t *mask HIGHBD_DECL_SUFFIX)

     const int intermediate_bits = get_intermediate_bits(bitdepth_max);

-    const int sh = intermediate_bits + 6, rnd = 32 << intermediate_bits;

+    const int sh = intermediate_bits + 6;

+    const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64;

     do {

         for (int x = 0; x < w; x++)

             dst[x] = iclip_pixel((tmp1[x] * mask[x] +

@@ -668,7 +685,8 @@

     // and then load this intermediate to calculate final value for odd rows

     const int intermediate_bits = get_intermediate_bits(bitdepth_max);

     const int bitdepth = bitdepth_from_max(bitdepth_max);

-    const int sh = intermediate_bits + 6, rnd = 32 << intermediate_bits;

+    const int sh = intermediate_bits + 6;

+    const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64;

     const int mask_sh = bitdepth + intermediate_bits - 4;

     const int mask_rnd = 1 << (mask_sh - 5);

     do {

@@ -797,7 +815,7 @@

             const int8_t *const filter =

                 dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];

-            tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7);

+            tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS;

         mid_ptr += 8;

         tmp += tmp_stride;

--- a/tests/checkasm/mc.c

+++ b/tests/checkasm/mc.c

@@ -84,6 +84,17 @@

     report("mc");

+/* Generate worst case input in the topleft corner, randomize the rest */

+static void generate_mct_input(pixel *const buf, const int bitdepth_max) {

+    static const int8_t pattern[8] = { -1,  0, -1,  0,  0, -1,  0, -1 };

+    const int sign = -(rnd() & 1);

+    for (int y = 0; y < 135; y++)

+        for (int x = 0; x < 135; x++)

+            buf[135*y+x] = ((x | y) < 8 ? (pattern[x] ^ pattern[y] ^ sign)

+                                        : rnd()) & bitdepth_max;

+}

 static void check_mct(Dav1dMCDSPContext *const c) {

     ALIGN_STK_32(pixel, src_buf, 135 * 135,);

     ALIGN_STK_32(int16_t, c_tmp,   128 * 128,);

@@ -107,10 +118,8 @@

 #else

                         const int bitdepth_max = 0xff;

 #endif

+                        generate_mct_input(src_buf, bitdepth_max);

-                        for (int i = 0; i < 135 * 135; i++)

-                            src_buf[i] = rnd() & bitdepth_max;

                         call_ref(c_tmp, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);

                         call_new(a_tmp, src, w, w, h, mx, my HIGHBD_TAIL_SUFFIX);

                         if (memcmp(c_tmp, a_tmp, w * h * sizeof(*c_tmp)))

@@ -127,12 +136,10 @@

                      int16_t (*const tmp)[128 * 128], const int bitdepth_max)

     for (int i = 0; i < 2; i++) {

-        for (int j = 0; j < 135 * 135; j++)

-            buf[j] = rnd() & bitdepth_max;

-        c->mct[rnd() % N_2D_FILTERS](tmp[i], buf + 135 * 3 + 3,

-                                      128 * sizeof(pixel), 128, 128,

-                                      rnd() & 15, rnd() & 15

-                                      HIGHBD_TAIL_SUFFIX);

+        generate_mct_input(buf, bitdepth_max);

+        c->mct[FILTER_2D_8TAP_SHARP](tmp[i], buf + 135 * 3 + 3,

+                                      135 * sizeof(pixel), 128, 128,

+                                      8, 8 HIGHBD_TAIL_SUFFIX);