shithub: dav1d

Download patch

ref: e54028690a6afaf825b4bd04be6638692b855964
parent: 65b08e23be250c053e60d49fadeaf5b2fe53e31e
author: Luc Trudeau <[email protected]>
date: Mon Oct 1 08:10:36 EDT 2018

Remove VLAs from wedge.c

--- a/src/wedge.c
+++ b/src/wedge.c
@@ -83,35 +83,35 @@
     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
 };
 
-static uint8_t wedge_masks_444_32x32[2][16][32 * 32];
-static uint8_t wedge_masks_444_32x16[2][16][32 * 16];
-static uint8_t wedge_masks_444_32x8[2][16][32 * 8];
-static uint8_t wedge_masks_444_16x32[2][16][16 * 32];
-static uint8_t wedge_masks_444_16x16[2][16][16 * 16];
-static uint8_t wedge_masks_444_16x8[2][16][16 * 8];
-static uint8_t wedge_masks_444_8x32[2][16][8 * 32];
-static uint8_t wedge_masks_444_8x16[2][16][8 * 16];
-static uint8_t wedge_masks_444_8x8[2][16][8 * 8];
+static uint8_t wedge_masks_444_32x32[2 * 16 * 32 * 32];
+static uint8_t wedge_masks_444_32x16[2 * 16 * 32 * 16];
+static uint8_t wedge_masks_444_32x8[ 2 * 16 * 32 *  8];
+static uint8_t wedge_masks_444_16x32[2 * 16 * 16 * 32];
+static uint8_t wedge_masks_444_16x16[2 * 16 * 16 * 16];
+static uint8_t wedge_masks_444_16x8[ 2 * 16 * 16 *  8];
+static uint8_t wedge_masks_444_8x32[ 2 * 16 *  8 * 32];
+static uint8_t wedge_masks_444_8x16[ 2 * 16 *  8 * 16];
+static uint8_t wedge_masks_444_8x8[  2 * 16 *  8 *  8];
 
-static uint8_t wedge_masks_422_16x32[2][16][16 * 32];
-static uint8_t wedge_masks_422_16x16[2][16][16 * 16];
-static uint8_t wedge_masks_422_16x8[2][16][16 * 8];
-static uint8_t wedge_masks_422_8x32[2][16][8 * 32];
-static uint8_t wedge_masks_422_8x16[2][16][8 * 16];
-static uint8_t wedge_masks_422_8x8[2][16][8 * 8];
-static uint8_t wedge_masks_422_4x32[2][16][4 * 32];
-static uint8_t wedge_masks_422_4x16[2][16][4 * 16];
-static uint8_t wedge_masks_422_4x8[2][16][4 * 8];
+static uint8_t wedge_masks_422_16x32[2 * 16 * 16 * 32];
+static uint8_t wedge_masks_422_16x16[2 * 16 * 16 * 16];
+static uint8_t wedge_masks_422_16x8[ 2 * 16 * 16 *  8];
+static uint8_t wedge_masks_422_8x32[ 2 * 16 *  8 * 32];
+static uint8_t wedge_masks_422_8x16[ 2 * 16 *  8 * 16];
+static uint8_t wedge_masks_422_8x8[  2 * 16 *  8 *  8];
+static uint8_t wedge_masks_422_4x32[ 2 * 16 *  4 * 32];
+static uint8_t wedge_masks_422_4x16[ 2 * 16 *  4 * 16];
+static uint8_t wedge_masks_422_4x8[  2 * 16 *  4 *  8];
 
-static uint8_t wedge_masks_420_16x16[2][16][16 * 16];
-static uint8_t wedge_masks_420_16x8[2][16][16 * 8];
-static uint8_t wedge_masks_420_16x4[2][16][16 * 4];
-static uint8_t wedge_masks_420_8x16[2][16][8 * 16];
-static uint8_t wedge_masks_420_8x8[2][16][8 * 8];
-static uint8_t wedge_masks_420_8x4[2][16][8 * 4];
-static uint8_t wedge_masks_420_4x16[2][16][4 * 16];
-static uint8_t wedge_masks_420_4x8[2][16][4 * 8];
-static uint8_t wedge_masks_420_4x4[2][16][4 * 4];
+static uint8_t wedge_masks_420_16x16[2 * 16 * 16 * 16];
+static uint8_t wedge_masks_420_16x8[ 2 * 16 * 16 *  8];
+static uint8_t wedge_masks_420_16x4[ 2 * 16 * 16 *  4];
+static uint8_t wedge_masks_420_8x16[ 2 * 16 *  8 * 16];
+static uint8_t wedge_masks_420_8x8[  2 * 16 *  8 *  8];
+static uint8_t wedge_masks_420_8x4[  2 * 16 *  8 *  4];
+static uint8_t wedge_masks_420_4x16[ 2 * 16 *  4 * 16];
+static uint8_t wedge_masks_420_4x8[  2 * 16 *  4 *  8];
+static uint8_t wedge_masks_420_4x4[  2 * 16 *  4 *  4];
 
 const uint8_t *wedge_masks[N_BS_SIZES][3][2][16];
 
@@ -173,10 +173,8 @@
                         const enum BlockSize bs,
                         const uint8_t (*const master)[64 * 64],
                         const wedge_code_type *const cb,
-                        uint8_t (*masks_444)[16][w * h],
-                        uint8_t (*masks_422)[16][w * h >> 1],
-                        uint8_t (*masks_420)[16][w * h >> 2],
-                        const unsigned signs)
+                        uint8_t *masks_444, uint8_t *masks_422,
+                        uint8_t *masks_420, const unsigned signs)
 {
     uint8_t *ptr = dst;
     for (int n = 0; n < 16; n++) {
@@ -186,17 +184,28 @@
     }
     for (int n = 0, off = 0; n < 16; n++, off += w * h)
         invert(ptr + off, dst + off, w, h);
+
+    const int n_stride_444 = (w * h);
+    const int n_stride_422 = n_stride_444 >> 1;
+    const int n_stride_420 = n_stride_444 >> 2;
+    const int sign_stride_444 = 16 * n_stride_444;
+    const int sign_stride_422 = 16 * n_stride_422;
+    const int sign_stride_420 = 16 * n_stride_420;
     // assign pointers in externally visible array
     for (int n = 0; n < 16; n++) {
         const int sign = (signs >> n) & 1;
-        wedge_masks[bs][0][0][n] = masks_444[ sign][n];
-        wedge_masks[bs][0][1][n] = masks_444[ sign][n];
-        wedge_masks[bs][1][0][n] = masks_422[ sign][n];
-        wedge_masks[bs][1][1][n] = masks_422[!sign][n];
-        wedge_masks[bs][2][0][n] = masks_420[ sign][n];
-        wedge_masks[bs][2][1][n] = masks_420[!sign][n];
-    }
-    for (int n = 0; n < 16; n++) {
+        wedge_masks[bs][0][0][n] = &masks_444[ sign * sign_stride_444];
+        // not using !sign is intentional here, since 444 does not require
+        // any rounding since no chroma subsampling is applied.
+        wedge_masks[bs][0][1][n] = &masks_444[ sign * sign_stride_444];
+        wedge_masks[bs][1][0][n] = &masks_422[ sign * sign_stride_422];
+        wedge_masks[bs][1][1][n] = &masks_422[!sign * sign_stride_422];
+        wedge_masks[bs][2][0][n] = &masks_420[ sign * sign_stride_420];
+        wedge_masks[bs][2][1][n] = &masks_420[!sign * sign_stride_420];
+        masks_444 += n_stride_444;
+        masks_422 += n_stride_422;
+        masks_420 += n_stride_420;
+
         // since the pointers come from inside, we know that
         // violation of the const is OK here. Any other approach
         // means we would have to duplicate the sign correction