ref: e214351b0e485f68d0c7a03f726e8a694f0a8cbf
parent: 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0
author: Janne Grunau <[email protected]>
date: Thu Oct 25 16:03:51 EDT 2018
rename arch specific bitdepth template files Missed in 46e2a2d0cc. Arm asm will be hard to template so move them to the plain source list. Fix #96.
--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -28,8 +28,6 @@
#include "src/arm/asm.S"
-#if BITDEPTH == 8
-
.macro avg dst0, dst1, t0, t1, t2, t3
vld1.16 {\t0,\t1}, [r2, :128]!
vld1.16 {\t2,\t3}, [r3, :128]!
@@ -215,5 +213,3 @@
bidir_fn avg
bidir_fn w_avg
bidir_fn mask
-
-#endif /* BITDEPTH == 8 */
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -27,8 +27,6 @@
#include "src/arm/asm.S"
-#if BITDEPTH == 8
-
.macro avg dst, t0, t1
ld1 {\t0\().8h}, [x2], 16
ld1 {\t1\().8h}, [x3], 16
@@ -233,5 +231,3 @@
bidir_fn avg
bidir_fn w_avg
bidir_fn mask
-
-#endif /* BITDEPTH == 8 */
--- a/src/arm/mc_init.c
+++ /dev/null
@@ -1,47 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "src/mc.h"
-#include "src/cpu.h"
-
-decl_avg_fn(dav1d_avg_8bpc_neon);
-decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
-decl_mask_fn(dav1d_mask_8bpc_neon);
-
-void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
-
-#if BITDEPTH == 8
- c->avg = dav1d_avg_8bpc_neon;
- c->w_avg = dav1d_w_avg_8bpc_neon;
- c->mask = dav1d_mask_8bpc_neon;
-#endif
-}
--- /dev/null
+++ b/src/arm/mc_init_tmpl.c
@@ -1,0 +1,47 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "src/mc.h"
+#include "src/cpu.h"
+
+decl_avg_fn(dav1d_avg_8bpc_neon);
+decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
+decl_mask_fn(dav1d_mask_8bpc_neon);
+
+void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
+
+#if BITDEPTH == 8
+ c->avg = dav1d_avg_8bpc_neon;
+ c->w_avg = dav1d_w_avg_8bpc_neon;
+ c->mask = dav1d_mask_8bpc_neon;
+#endif
+}
--- a/src/meson.build
+++ b/src/meson.build
@@ -83,14 +83,14 @@
'arm/cpu.c',
)
libdav1d_tmpl_sources += files(
- 'arm/mc_init.c',
+ 'arm/mc_init_tmpl.c',
)
if host_machine.cpu_family() == 'aarch64'
- libdav1d_tmpl_sources += files(
+ libdav1d_sources += files(
'arm/64/mc.S',
)
elif host_machine.cpu_family().startswith('arm')
- libdav1d_tmpl_sources += files(
+ libdav1d_sources += files(
'arm/32/mc.S',
)
endif
@@ -101,11 +101,11 @@
)
libdav1d_tmpl_sources += files(
- 'x86/ipred_init.c',
- 'x86/itx_init.c',
- 'x86/loopfilter_init.c',
- 'x86/looprestoration_init.c',
- 'x86/mc_init.c',
+ 'x86/ipred_init_tmpl.c',
+ 'x86/itx_init_tmpl.c',
+ 'x86/loopfilter_init_tmpl.c',
+ 'x86/looprestoration_init_tmpl.c',
+ 'x86/mc_init_tmpl.c',
)
# NASM source files
--- a/src/x86/ipred_init.c
+++ /dev/null
@@ -1,75 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/ipred.h"
-
-decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
-decl_angular_ipred_fn(dav1d_ipred_h_avx2);
-decl_angular_ipred_fn(dav1d_ipred_v_avx2);
-decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
-decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
-
-decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
-
-decl_pal_pred_fn(dav1d_pal_pred_avx2);
-
-void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
- c->intra_pred[DC_PRED] = dav1d_ipred_dc_avx2;
- c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_avx2;
- c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_avx2;
- c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_avx2;
- c->intra_pred[HOR_PRED] = dav1d_ipred_h_avx2;
- c->intra_pred[VERT_PRED] = dav1d_ipred_v_avx2;
- c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_avx2;
- c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_avx2;
- c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
- c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
- c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_avx2;
-
- c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_avx2;
- c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_avx2;
- c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_avx2;
- c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
-
- c->pal_pred = dav1d_pal_pred_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/ipred_init_tmpl.c
@@ -1,0 +1,75 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/ipred.h"
+
+decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
+decl_angular_ipred_fn(dav1d_ipred_h_avx2);
+decl_angular_ipred_fn(dav1d_ipred_v_avx2);
+decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
+decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
+
+decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
+
+decl_pal_pred_fn(dav1d_pal_pred_avx2);
+
+void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+ c->intra_pred[DC_PRED] = dav1d_ipred_dc_avx2;
+ c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_avx2;
+ c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_avx2;
+ c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_avx2;
+ c->intra_pred[HOR_PRED] = dav1d_ipred_h_avx2;
+ c->intra_pred[VERT_PRED] = dav1d_ipred_v_avx2;
+ c->intra_pred[PAETH_PRED] = dav1d_ipred_paeth_avx2;
+ c->intra_pred[SMOOTH_PRED] = dav1d_ipred_smooth_avx2;
+ c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
+ c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
+ c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_avx2;
+
+ c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_avx2;
+ c->cfl_pred[DC_128_PRED] = dav1d_ipred_cfl_128_avx2;
+ c->cfl_pred[TOP_DC_PRED] = dav1d_ipred_cfl_top_avx2;
+ c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
+
+ c->pal_pred = dav1d_pal_pred_avx2;
+#endif
+}
--- a/src/x86/itx_init.c
+++ /dev/null
@@ -1,141 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/itx.h"
-
-#define decl_itx2_fns(w, h, opt) \
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_identity_##w##x##h##_##opt)
-
-#define decl_itx12_fns(w, h, opt) \
-decl_itx2_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_dct_##w##x##h##_##opt)
-
-#define decl_itx16_fns(w, h, opt) \
-decl_itx12_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_flipadst_##w##x##h##_##opt)
-
-#define decl_itx17_fns(w, h, opt) \
-decl_itx16_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_wht_wht_##w##x##h##_##opt)
-
-decl_itx17_fns( 4, 4, avx2);
-decl_itx16_fns( 4, 8, avx2);
-decl_itx16_fns( 4, 16, avx2);
-decl_itx16_fns( 8, 4, avx2);
-decl_itx16_fns( 8, 8, avx2);
-decl_itx16_fns( 8, 16, avx2);
-decl_itx2_fns ( 8, 32, avx2);
-decl_itx16_fns(16, 4, avx2);
-decl_itx16_fns(16, 8, avx2);
-decl_itx12_fns(16, 16, avx2);
-decl_itx2_fns (16, 32, avx2);
-decl_itx2_fns (32, 8, avx2);
-decl_itx2_fns (32, 16, avx2);
-decl_itx2_fns (32, 32, avx2);
-
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_16x64_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_32x64_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x16_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_avx2);
-
-void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
-#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
- c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
- dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
-
-#define assign_itx1_fn(pfx, w, h, ext) \
- assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
-
-#define assign_itx2_fn(pfx, w, h, ext) \
- assign_itx1_fn(pfx, w, h, ext); \
- assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
-
-#define assign_itx12_fn(pfx, w, h, ext) \
- assign_itx2_fn(pfx, w, h, ext); \
- assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
- assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
- assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
- assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
- assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
- assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
- assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
- assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
- assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
- assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
-
-#define assign_itx16_fn(pfx, w, h, ext) \
- assign_itx12_fn(pfx, w, h, ext); \
- assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
- assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
- assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
- assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
-
-#define assign_itx17_fn(pfx, w, h, ext) \
- assign_itx16_fn(pfx, w, h, ext); \
- assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
-
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
- assign_itx17_fn( , 4, 4, avx2);
- assign_itx16_fn(R, 4, 8, avx2);
- assign_itx16_fn(R, 4, 16, avx2);
- assign_itx16_fn(R, 8, 4, avx2);
- assign_itx16_fn( , 8, 8, avx2);
- assign_itx16_fn(R, 8, 16, avx2);
- assign_itx2_fn (R, 8, 32, avx2);
- assign_itx16_fn(R, 16, 4, avx2);
- assign_itx16_fn(R, 16, 8, avx2);
- assign_itx12_fn( , 16, 16, avx2);
- assign_itx2_fn (R, 16, 32, avx2);
- assign_itx1_fn (R, 16, 64, avx2);
- assign_itx2_fn (R, 32, 8, avx2);
- assign_itx2_fn (R, 32, 16, avx2);
- assign_itx2_fn ( , 32, 32, avx2);
- assign_itx1_fn (R, 32, 64, avx2);
- assign_itx1_fn (R, 64, 16, avx2);
- assign_itx1_fn (R, 64, 32, avx2);
- assign_itx1_fn ( , 64, 64, avx2);
-#endif
-}
--- /dev/null
+++ b/src/x86/itx_init_tmpl.c
@@ -1,0 +1,141 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/itx.h"
+
+#define decl_itx2_fns(w, h, opt) \
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_identity_##w##x##h##_##opt)
+
+#define decl_itx12_fns(w, h, opt) \
+decl_itx2_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_dct_##w##x##h##_##opt)
+
+#define decl_itx16_fns(w, h, opt) \
+decl_itx12_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_flipadst_##w##x##h##_##opt)
+
+#define decl_itx17_fns(w, h, opt) \
+decl_itx16_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_wht_wht_##w##x##h##_##opt)
+
+decl_itx17_fns( 4, 4, avx2);
+decl_itx16_fns( 4, 8, avx2);
+decl_itx16_fns( 4, 16, avx2);
+decl_itx16_fns( 8, 4, avx2);
+decl_itx16_fns( 8, 8, avx2);
+decl_itx16_fns( 8, 16, avx2);
+decl_itx2_fns ( 8, 32, avx2);
+decl_itx16_fns(16, 4, avx2);
+decl_itx16_fns(16, 8, avx2);
+decl_itx12_fns(16, 16, avx2);
+decl_itx2_fns (16, 32, avx2);
+decl_itx2_fns (32, 8, avx2);
+decl_itx2_fns (32, 16, avx2);
+decl_itx2_fns (32, 32, avx2);
+
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_16x64_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_32x64_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x16_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_avx2);
+
+void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
+#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
+ c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
+ dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
+
+#define assign_itx1_fn(pfx, w, h, ext) \
+ assign_itx_fn(pfx, w, h, dct_dct, DCT_DCT, ext)
+
+#define assign_itx2_fn(pfx, w, h, ext) \
+ assign_itx1_fn(pfx, w, h, ext); \
+ assign_itx_fn(pfx, w, h, identity_identity, IDTX, ext)
+
+#define assign_itx12_fn(pfx, w, h, ext) \
+ assign_itx2_fn(pfx, w, h, ext); \
+ assign_itx_fn(pfx, w, h, dct_adst, ADST_DCT, ext); \
+ assign_itx_fn(pfx, w, h, dct_flipadst, FLIPADST_DCT, ext); \
+ assign_itx_fn(pfx, w, h, dct_identity, H_DCT, ext); \
+ assign_itx_fn(pfx, w, h, adst_dct, DCT_ADST, ext); \
+ assign_itx_fn(pfx, w, h, adst_adst, ADST_ADST, ext); \
+ assign_itx_fn(pfx, w, h, adst_flipadst, FLIPADST_ADST, ext); \
+ assign_itx_fn(pfx, w, h, flipadst_dct, DCT_FLIPADST, ext); \
+ assign_itx_fn(pfx, w, h, flipadst_adst, ADST_FLIPADST, ext); \
+ assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
+ assign_itx_fn(pfx, w, h, identity_dct, V_DCT, ext)
+
+#define assign_itx16_fn(pfx, w, h, ext) \
+ assign_itx12_fn(pfx, w, h, ext); \
+ assign_itx_fn(pfx, w, h, adst_identity, H_ADST, ext); \
+ assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST, ext); \
+ assign_itx_fn(pfx, w, h, identity_adst, V_ADST, ext); \
+ assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST, ext)
+
+#define assign_itx17_fn(pfx, w, h, ext) \
+ assign_itx16_fn(pfx, w, h, ext); \
+ assign_itx_fn(pfx, w, h, wht_wht, WHT_WHT, ext)
+
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+ assign_itx17_fn( , 4, 4, avx2);
+ assign_itx16_fn(R, 4, 8, avx2);
+ assign_itx16_fn(R, 4, 16, avx2);
+ assign_itx16_fn(R, 8, 4, avx2);
+ assign_itx16_fn( , 8, 8, avx2);
+ assign_itx16_fn(R, 8, 16, avx2);
+ assign_itx2_fn (R, 8, 32, avx2);
+ assign_itx16_fn(R, 16, 4, avx2);
+ assign_itx16_fn(R, 16, 8, avx2);
+ assign_itx12_fn( , 16, 16, avx2);
+ assign_itx2_fn (R, 16, 32, avx2);
+ assign_itx1_fn (R, 16, 64, avx2);
+ assign_itx2_fn (R, 32, 8, avx2);
+ assign_itx2_fn (R, 32, 16, avx2);
+ assign_itx2_fn ( , 32, 32, avx2);
+ assign_itx1_fn (R, 32, 64, avx2);
+ assign_itx1_fn (R, 64, 16, avx2);
+ assign_itx1_fn (R, 64, 32, avx2);
+ assign_itx1_fn ( , 64, 64, avx2);
+#endif
+}
--- a/src/x86/loopfilter_init.c
+++ /dev/null
@@ -1,47 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/loopfilter.h"
-
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_y_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_y_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
-
-void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
- c->loop_filter_sb[0][0] = dav1d_lpf_h_sb_y_avx2;
- c->loop_filter_sb[0][1] = dav1d_lpf_v_sb_y_avx2;
- c->loop_filter_sb[1][0] = dav1d_lpf_h_sb_uv_avx2;
- c->loop_filter_sb[1][1] = dav1d_lpf_v_sb_uv_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/loopfilter_init_tmpl.c
@@ -1,0 +1,47 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/loopfilter.h"
+
+decl_loopfilter_sb_fn(dav1d_lpf_h_sb_y_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_v_sb_y_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
+
+void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+ c->loop_filter_sb[0][0] = dav1d_lpf_h_sb_y_avx2;
+ c->loop_filter_sb[0][1] = dav1d_lpf_v_sb_y_avx2;
+ c->loop_filter_sb[1][0] = dav1d_lpf_h_sb_uv_avx2;
+ c->loop_filter_sb[1][1] = dav1d_lpf_v_sb_uv_avx2;
+#endif
+}
--- a/src/x86/looprestoration_init.c
+++ /dev/null
@@ -1,86 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/looprestoration.h"
-
-#include "common/attributes.h"
-#include "common/intops.h"
-
-#if BITDEPTH == 8 && ARCH_X86_64
-void dav1d_wiener_filter_h_avx2(int16_t *dst, const pixel (*left)[4],
- const pixel *src, ptrdiff_t stride,
- const int16_t fh[7], const intptr_t w,
- int h, enum LrEdgeFlags edges);
-void dav1d_wiener_filter_v_avx2(pixel *dst, ptrdiff_t stride,
- const int16_t *mid, int w, int h,
- const int16_t fv[7], enum LrEdgeFlags edges);
-
-// Future potential optimizations:
-// - special chroma versions which don't filter [0]/[6];
-// - running filter_h_avx2 transposed (one col of 32 pixels per iteration, top
-// to bottom) instead of scanline-ordered should be faster since then the
-// if (have_left) and similar conditions run only once instead of per line;
-// - filter_v_avx2 currently runs 16 pixels per iteration, it should be possible
-// to run 32 (like filter_h_avx2), and then all vpermqs can go;
-// - maybe split out the top/bottom filter_h_avx2 from the main body filter_h_avx2,
-// since then the have_left condition can be inlined;
-// - consider having the wrapper (wiener_filter_avx2) also in hand-written
-// assembly, so the setup overhead is minimized.
-
-static void wiener_filter_avx2(pixel *const dst, const ptrdiff_t dst_stride,
- const pixel (*const left)[4],
- const pixel *lpf, const ptrdiff_t lpf_stride,
- const int w, const int h, const int16_t fh[7],
- const int16_t fv[7], const enum LrEdgeFlags edges)
-{
- ALIGN_STK_32(int16_t, mid, 68 * 384,);
-
- // horizontal filter
- dav1d_wiener_filter_h_avx2(&mid[2 * 384], left, dst, dst_stride,
- fh, w, h, edges);
- if (edges & LR_HAVE_TOP)
- dav1d_wiener_filter_h_avx2(mid, NULL, lpf, lpf_stride,
- fh, w, 2, edges);
- if (edges & LR_HAVE_BOTTOM)
- dav1d_wiener_filter_h_avx2(&mid[(2 + h) * 384], NULL,
- lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
- fh, w, 2, edges);
-
- dav1d_wiener_filter_v_avx2(dst, dst_stride, &mid[2*384], w, h, fv, edges);
-}
-#endif
-
-void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
- c->wiener = wiener_filter_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/looprestoration_init_tmpl.c
@@ -1,0 +1,86 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/looprestoration.h"
+
+#include "common/attributes.h"
+#include "common/intops.h"
+
+#if BITDEPTH == 8 && ARCH_X86_64
+void dav1d_wiener_filter_h_avx2(int16_t *dst, const pixel (*left)[4],
+ const pixel *src, ptrdiff_t stride,
+ const int16_t fh[7], const intptr_t w,
+ int h, enum LrEdgeFlags edges);
+void dav1d_wiener_filter_v_avx2(pixel *dst, ptrdiff_t stride,
+ const int16_t *mid, int w, int h,
+ const int16_t fv[7], enum LrEdgeFlags edges);
+
+// Future potential optimizations:
+// - special chroma versions which don't filter [0]/[6];
+// - running filter_h_avx2 transposed (one col of 32 pixels per iteration, top
+// to bottom) instead of scanline-ordered should be faster since then the
+// if (have_left) and similar conditions run only once instead of per line;
+// - filter_v_avx2 currently runs 16 pixels per iteration, it should be possible
+// to run 32 (like filter_h_avx2), and then all vpermqs can go;
+// - maybe split out the top/bottom filter_h_avx2 from the main body filter_h_avx2,
+// since then the have_left condition can be inlined;
+// - consider having the wrapper (wiener_filter_avx2) also in hand-written
+// assembly, so the setup overhead is minimized.
+
+static void wiener_filter_avx2(pixel *const dst, const ptrdiff_t dst_stride,
+ const pixel (*const left)[4],
+ const pixel *lpf, const ptrdiff_t lpf_stride,
+ const int w, const int h, const int16_t fh[7],
+ const int16_t fv[7], const enum LrEdgeFlags edges)
+{
+ ALIGN_STK_32(int16_t, mid, 68 * 384,);
+
+ // horizontal filter
+ dav1d_wiener_filter_h_avx2(&mid[2 * 384], left, dst, dst_stride,
+ fh, w, h, edges);
+ if (edges & LR_HAVE_TOP)
+ dav1d_wiener_filter_h_avx2(mid, NULL, lpf, lpf_stride,
+ fh, w, 2, edges);
+ if (edges & LR_HAVE_BOTTOM)
+ dav1d_wiener_filter_h_avx2(&mid[(2 + h) * 384], NULL,
+ lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
+ fh, w, 2, edges);
+
+ dav1d_wiener_filter_v_avx2(dst, dst_stride, &mid[2*384], w, h, fv, edges);
+}
+#endif
+
+void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+ c->wiener = wiener_filter_avx2;
+#endif
+}
--- a/src/x86/mc_init.c
+++ /dev/null
@@ -1,95 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/mc.h"
-
-decl_mc_fn(dav1d_put_8tap_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
-decl_mc_fn(dav1d_put_8tap_regular_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2);
-decl_mc_fn(dav1d_put_bilin_avx2);
-
-decl_mct_fn(dav1d_prep_8tap_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
-decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
-decl_mct_fn(dav1d_prep_bilin_avx2);
-
-decl_avg_fn(dav1d_avg_avx2);
-decl_w_avg_fn(dav1d_w_avg_avx2);
-decl_mask_fn(dav1d_mask_avx2);
-decl_w_mask_fn(dav1d_w_mask_420_avx2);
-
-void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
-#define init_mc_fn(type, name, suffix) \
- c->mc[type] = dav1d_put_##name##_##suffix
-#define init_mct_fn(type, name, suffix) \
- c->mct[type] = dav1d_prep_##name##_##suffix
- const unsigned flags = dav1d_get_cpu_flags();
-
- if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
- init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, avx2);
- init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
- init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, avx2);
- init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
- init_mc_fn (FILTER_2D_8TAP_SMOOTH, 8tap_smooth, avx2);
- init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, avx2);
- init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, avx2);
- init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, avx2);
- init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, avx2);
- init_mc_fn (FILTER_2D_BILINEAR, bilin, avx2);
-
- init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, avx2);
- init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
- init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, avx2);
- init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
- init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, avx2);
- init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, avx2);
- init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, avx2);
- init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, avx2);
- init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, avx2);
- init_mct_fn(FILTER_2D_BILINEAR, bilin, avx2);
-
- c->avg = dav1d_avg_avx2;
- c->w_avg = dav1d_w_avg_avx2;
- c->mask = dav1d_mask_avx2;
- c->w_mask[2] = dav1d_w_mask_420_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/mc_init_tmpl.c
@@ -1,0 +1,95 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/mc.h"
+
+decl_mc_fn(dav1d_put_8tap_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
+decl_mc_fn(dav1d_put_8tap_regular_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2);
+decl_mc_fn(dav1d_put_bilin_avx2);
+
+decl_mct_fn(dav1d_prep_8tap_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
+decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
+decl_mct_fn(dav1d_prep_bilin_avx2);
+
+decl_avg_fn(dav1d_avg_avx2);
+decl_w_avg_fn(dav1d_w_avg_avx2);
+decl_mask_fn(dav1d_mask_avx2);
+decl_w_mask_fn(dav1d_w_mask_420_avx2);
+
+void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
+#define init_mc_fn(type, name, suffix) \
+ c->mc[type] = dav1d_put_##name##_##suffix
+#define init_mct_fn(type, name, suffix) \
+ c->mct[type] = dav1d_prep_##name##_##suffix
+ const unsigned flags = dav1d_get_cpu_flags();
+
+ if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+ init_mc_fn (FILTER_2D_8TAP_REGULAR, 8tap_regular, avx2);
+ init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
+ init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SMOOTH, 8tap_smooth, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, avx2);
+ init_mc_fn (FILTER_2D_8TAP_SHARP, 8tap_sharp, avx2);
+ init_mc_fn (FILTER_2D_BILINEAR, bilin, avx2);
+
+ init_mct_fn(FILTER_2D_8TAP_REGULAR, 8tap_regular, avx2);
+ init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
+ init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SMOOTH, 8tap_smooth, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth, avx2);
+ init_mct_fn(FILTER_2D_8TAP_SHARP, 8tap_sharp, avx2);
+ init_mct_fn(FILTER_2D_BILINEAR, bilin, avx2);
+
+ c->avg = dav1d_avg_avx2;
+ c->w_avg = dav1d_w_avg_avx2;
+ c->mask = dav1d_mask_avx2;
+ c->w_mask[2] = dav1d_w_mask_420_avx2;
+#endif
+}