shithub: dav1d

Download patch

ref: 98ed9be69b08f5438cce7e696b2c8eadfb3ce905
parent: 6ea3fda58c17ec8d55d7fc90eb5305ade3f4ebbc
author: Victorien Le Couviour--Tuffet <[email protected]>
date: Wed Apr 15 14:18:30 EDT 2020

Fix MC masks alignment for sizes >= 64 for AVX-512

Those need to be aligned when w*h >= 64, as we will try to load by 64 bytes.

(also realigns the 4x4 masks to 16 as a 32-byte alignment is unnecessary)

--- a/src/wedge.c
+++ b/src/wedge.c
@@ -83,35 +83,35 @@
     { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
 };
 
-static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 *  8], 32);
-static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 *  8], 32);
-static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 *  8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 *  8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_444_8x8  [2 * 16 *  8 *  8], 32);
+static uint8_t ALIGN(wedge_masks_444_32x32[2 * 16 * 32 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_32x16[2 * 16 * 32 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_32x8 [2 * 16 * 32 *  8], 64);
+static uint8_t ALIGN(wedge_masks_444_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_16x8 [2 * 16 * 16 *  8], 64);
+static uint8_t ALIGN(wedge_masks_444_8x32 [2 * 16 *  8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_444_8x16 [2 * 16 *  8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_444_8x8  [2 * 16 *  8 *  8], 64);
 
-static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 *  8], 32);
-static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 *  8 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 *  8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_422_8x8  [2 * 16 *  8 *  8], 32);
-static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 *  4 * 32], 32);
-static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 *  4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_422_16x32[2 * 16 * 16 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_16x8 [2 * 16 * 16 *  8], 64);
+static uint8_t ALIGN(wedge_masks_422_8x32 [2 * 16 *  8 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_8x16 [2 * 16 *  8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_422_8x8  [2 * 16 *  8 *  8], 64);
+static uint8_t ALIGN(wedge_masks_422_4x32 [2 * 16 *  4 * 32], 64);
+static uint8_t ALIGN(wedge_masks_422_4x16 [2 * 16 *  4 * 16], 64);
 static uint8_t ALIGN(wedge_masks_422_4x8  [2 * 16 *  4 *  8], 32);
 
-static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 *  8], 32);
-static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 *  4], 32);
-static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 *  8 * 16], 32);
-static uint8_t ALIGN(wedge_masks_420_8x8  [2 * 16 *  8 *  8], 32);
-static uint8_t ALIGN(wedge_masks_420_8x4  [2 * 16 *  8 *  4], 32);
-static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 *  4 * 16], 32);
+static uint8_t ALIGN(wedge_masks_420_16x16[2 * 16 * 16 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_16x8 [2 * 16 * 16 *  8], 64);
+static uint8_t ALIGN(wedge_masks_420_16x4 [2 * 16 * 16 *  4], 64);
+static uint8_t ALIGN(wedge_masks_420_8x16 [2 * 16 *  8 * 16], 64);
+static uint8_t ALIGN(wedge_masks_420_8x8  [2 * 16 *  8 *  8], 64);
+static uint8_t ALIGN(wedge_masks_420_8x4  [2 * 16 *  8 *  4], 64);
+static uint8_t ALIGN(wedge_masks_420_4x16 [2 * 16 *  4 * 16], 64);
 static uint8_t ALIGN(wedge_masks_420_4x8  [2 * 16 *  4 *  8], 32);
-static uint8_t ALIGN(wedge_masks_420_4x4  [2 * 16 *  4 *  4], 32);
+static uint8_t ALIGN(wedge_masks_420_4x4  [2 * 16 *  4 *  4], 16);
 
 const uint8_t *dav1d_wedge_masks[N_BS_SIZES][3][2][16];
 
@@ -274,16 +274,16 @@
 }
 
 #define N_II_PRED_MODES (N_INTER_INTRA_PRED_MODES - 1)
-static uint8_t ALIGN(ii_dc_mask[32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 32);
-static uint8_t ALIGN(ii_nondc_mask_8x8  [N_II_PRED_MODES][ 8 *  8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 32);
+static uint8_t ALIGN(ii_dc_mask[32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_32x32[N_II_PRED_MODES][32 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x32[N_II_PRED_MODES][16 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_16x16[N_II_PRED_MODES][16 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x32 [N_II_PRED_MODES][ 8 * 32], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x16 [N_II_PRED_MODES][ 8 * 16], 64);
+static uint8_t ALIGN(ii_nondc_mask_8x8  [N_II_PRED_MODES][ 8 *  8], 64);
+static uint8_t ALIGN(ii_nondc_mask_4x16 [N_II_PRED_MODES][ 4 * 16], 64);
 static uint8_t ALIGN(ii_nondc_mask_4x8  [N_II_PRED_MODES][ 4 *  8], 32);
-static uint8_t ALIGN(ii_nondc_mask_4x4  [N_II_PRED_MODES][ 4 *  4], 32);
+static uint8_t ALIGN(ii_nondc_mask_4x4  [N_II_PRED_MODES][ 4 *  4], 16);
 #undef N_II_PRED_MODES
 
 #define set1(sz) \