ref: 5f4e28fe77a77682ac52841308582e74d09ca6e7
parent: 1c88bce602842999e2afda43dea55db3069ad470
author: Martin Storsjö <[email protected]>
date: Sat Apr 11 07:46:17 EDT 2020
itx: Add a bpc parameter to the itx dsp init function
--- a/src/arm/itx_init_tmpl.c
+++ b/src/arm/itx_init_tmpl.c
@@ -77,7 +77,7 @@
decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_neon);
decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_neon);
-COLD void bitfn(dav1d_itx_dsp_init_arm)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init_arm)(Dav1dInvTxfmDSPContext *const c, int bpc) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
--- a/src/decode.c
+++ b/src/decode.c
@@ -3302,7 +3302,7 @@
#define assign_bitdepth_case(bd) \
dav1d_cdef_dsp_init_##bd##bpc(&dsp->cdef); \
dav1d_intra_pred_dsp_init_##bd##bpc(&dsp->ipred); \
- dav1d_itx_dsp_init_##bd##bpc(&dsp->itx); \
+ dav1d_itx_dsp_init_##bd##bpc(&dsp->itx, bpc); \
dav1d_loop_filter_dsp_init_##bd##bpc(&dsp->lf); \
dav1d_loop_restoration_dsp_init_##bd##bpc(&dsp->lr, bpc); \
dav1d_mc_dsp_init_##bd##bpc(&dsp->mc); \
--- a/src/itx.h
+++ b/src/itx.h
@@ -43,8 +43,8 @@
itxfm_fn itxfm_add[N_RECT_TX_SIZES][N_TX_TYPES_PLUS_LL];
} Dav1dInvTxfmDSPContext;
-bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c);
-bitfn_decls(void dav1d_itx_dsp_init_arm, Dav1dInvTxfmDSPContext *c);
+bitfn_decls(void dav1d_itx_dsp_init, Dav1dInvTxfmDSPContext *c, int bpc);
+bitfn_decls(void dav1d_itx_dsp_init_arm, Dav1dInvTxfmDSPContext *c, int bpc);
bitfn_decls(void dav1d_itx_dsp_init_x86, Dav1dInvTxfmDSPContext *c);
#endif /* DAV1D_SRC_ITX_H */
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -180,7 +180,7 @@
dst[x] = iclip_pixel(dst[x] + *c++);
}
-COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c, int bpc) {
#define assign_itx_all_fn64(w, h, pfx) \
c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT ] = \
inv_txfm_add_dct_dct_##w##x##h##_c
@@ -249,7 +249,7 @@
#if HAVE_ASM
#if ARCH_AARCH64 || ARCH_ARM
- bitfn(dav1d_itx_dsp_init_arm)(c);
+ bitfn(dav1d_itx_dsp_init_arm)(c, bpc);
#endif
#if ARCH_X86
bitfn(dav1d_itx_dsp_init_x86)(c);
--- a/tests/checkasm/itx.c
+++ b/tests/checkasm/itx.c
@@ -223,8 +223,11 @@
}
void bitfn(checkasm_check_itx)(void) {
- Dav1dInvTxfmDSPContext c;
- bitfn(dav1d_itx_dsp_init)(&c);
+#if BITDEPTH == 16
+ const int bpc_min = 10, bpc_max = 12;
+#else
+ const int bpc_min = 8, bpc_max = 8;
+#endif
ALIGN_STK_64(coef, coeff, 2, [32 * 32]);
ALIGN_STK_64(pixel, c_dst, 64 * 64,);
@@ -250,39 +253,39 @@
const int subsh_max = subsh_iters[imax(dav1d_txfm_dimensions[tx].lw,
dav1d_txfm_dimensions[tx].lh)];
- for (enum TxfmType txtp = 0; txtp < N_TX_TYPES_PLUS_LL; txtp++)
- for (int subsh = 0; subsh < subsh_max; subsh++)
- if (check_func(c.itxfm_add[tx][txtp],
- "inv_txfm_add_%dx%d_%s_%s_%d_%dbpc",
- w, h, itx_1d_names[itx_1d_types[txtp][0]],
- itx_1d_names[itx_1d_types[txtp][1]], subsh,
- BITDEPTH))
- {
-#if BITDEPTH == 16
- const int bitdepth_max = rnd() & 1 ? 0x3ff : 0xfff;
-#else
- const int bitdepth_max = 0xff;
-#endif
- const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, bitdepth_max);
- memcpy(coeff[1], coeff[0], sizeof(*coeff));
+ for (int bpc = bpc_min; bpc <= bpc_max; bpc += 2) {
+ Dav1dInvTxfmDSPContext c;
+ bitfn(dav1d_itx_dsp_init)(&c, bpc);
+ for (enum TxfmType txtp = 0; txtp < N_TX_TYPES_PLUS_LL; txtp++)
+ for (int subsh = 0; subsh < subsh_max; subsh++)
+ if (check_func(c.itxfm_add[tx][txtp],
+ "inv_txfm_add_%dx%d_%s_%s_%d_%dbpc",
+ w, h, itx_1d_names[itx_1d_types[txtp][0]],
+ itx_1d_names[itx_1d_types[txtp][1]], subsh,
+ bpc))
+ {
+ const int bitdepth_max = (1 << bpc) - 1;
+ const int eob = ftx(coeff[0], tx, txtp, w, h, subsh, bitdepth_max);
+ memcpy(coeff[1], coeff[0], sizeof(*coeff));
- for (int j = 0; j < w * h; j++)
- c_dst[j] = a_dst[j] = rnd() & bitdepth_max;
+ for (int j = 0; j < w * h; j++)
+ c_dst[j] = a_dst[j] = rnd() & bitdepth_max;
- call_ref(c_dst, w * sizeof(*c_dst), coeff[0], eob
- HIGHBD_TAIL_SUFFIX);
- call_new(a_dst, w * sizeof(*c_dst), coeff[1], eob
- HIGHBD_TAIL_SUFFIX);
+ call_ref(c_dst, w * sizeof(*c_dst), coeff[0], eob
+ HIGHBD_TAIL_SUFFIX);
+ call_new(a_dst, w * sizeof(*c_dst), coeff[1], eob
+ HIGHBD_TAIL_SUFFIX);
- checkasm_check_pixel(c_dst, w * sizeof(*c_dst),
- a_dst, w * sizeof(*a_dst),
- w, h, "dst");
- if (memcmp(coeff[0], coeff[1], sizeof(*coeff)))
- fail();
+ checkasm_check_pixel(c_dst, w * sizeof(*c_dst),
+ a_dst, w * sizeof(*a_dst),
+ w, h, "dst");
+ if (memcmp(coeff[0], coeff[1], sizeof(*coeff)))
+ fail();
- bench_new(a_dst, w * sizeof(*c_dst), coeff[0], eob
- HIGHBD_TAIL_SUFFIX);
- }
+ bench_new(a_dst, w * sizeof(*c_dst), coeff[0], eob
+ HIGHBD_TAIL_SUFFIX);
+ }
+ }
report("add_%dx%d", w, h);
}
}