ref: 32c62b5f38e9c935454b95e3aa1730ebd00f87cf
parent: c163cbdb8360847b22ab4f0fbd6c7a5723af7bdf
author: Ronald S. Bultje <[email protected]>
date: Tue Oct 23 16:34:35 EDT 2018
Make access into noskip_mask safe across sb64 tile threads
--- a/src/cdef_apply.c
+++ b/src/cdef_apply.c
@@ -152,10 +152,10 @@
// check if this 8x8 block had any coded coefficients; if not,
// go to the next block
- const unsigned bx_mask = 3U << (bx & 30);
- const int by_idx = by & 30;
- if (!((lflvl[sb128x].noskip_mask[by_idx + 0] |
- lflvl[sb128x].noskip_mask[by_idx + 1]) & bx_mask))
+ const unsigned bx_mask = 3U << (bx & 14);
+ const int by_idx = by & 30, bx_idx = (bx & 16) >> 4;
+ if (!((lflvl[sb128x].noskip_mask[by_idx + 0][bx_idx] |
+ lflvl[sb128x].noskip_mask[by_idx + 1][bx_idx]) & bx_mask))
{
last_skip = 1;
goto next_b;
--- a/src/decode.c
+++ b/src/decode.c
@@ -1811,10 +1811,14 @@
memset(&t->l.skip[by4], b->skip, bh4);
memset(&t->a->skip[bx4], b->skip, bw4);
if (!b->skip) {
- uint32_t *noskip_mask = &t->lf_mask->noskip_mask[by4];
- const unsigned mask = ((1ULL << bw4) - 1) << bx4;
- for (int y = 0; y < bh4; y++)
- *noskip_mask++ |= mask;
+ uint16_t (*noskip_mask)[2] = &t->lf_mask->noskip_mask[by4];
+ const unsigned mask = (~0U >> (32 - bw4)) << (bx4 & 15);
+ const int bx_idx = (bx4 & 16) >> 4;
+ for (int y = 0; y < bh4; y++, noskip_mask++) {
+ (*noskip_mask)[bx_idx] |= mask;
+ if (bw4 == 32) // this should be mask >> 16, but it's 0xffffffff anyway
+ (*noskip_mask)[1] |= mask;
+ }
}
return 0;
--- a/src/lf_mask.h
+++ b/src/lf_mask.h
@@ -53,7 +53,7 @@
uint32_t filter_y[2 /* 0=col, 1=row */][32][3];
uint32_t filter_uv[2 /* 0=col, 1=row */][32][2];
int8_t cdef_idx[4]; // -1 means "unset"
- uint32_t noskip_mask[32];
+ uint16_t noskip_mask[32][2];
Av1RestorationUnit lr[3][4];
} Av1Filter;