shithub: dav1d

Download patch

ref: 8d2dd439005f72e2f73fc6155f0c2245cbf3227f
parent: d400361524ce739db30d552a9e54809d812710c6
author: Henrik Gramner <[email protected]>
date: Thu May 9 16:39:08 EDT 2019

Add __attribute__((cold)) to rarely used functions

--- a/include/common/attributes.h
+++ b/include/common/attributes.h
@@ -34,10 +34,12 @@
 
 #ifdef __GNUC__
 #define ATTR_ALIAS __attribute__((may_alias))
-#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr)));
+#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr)))
+#define COLD __attribute__((cold))
 #else
 #define ATTR_ALIAS
 #define ATTR_FORMAT_PRINTF(fmt, attr)
+#define COLD
 #endif
 
 #if ARCH_X86_64
--- a/src/arm/cdef_init_tmpl.c
+++ b/src/arm/cdef_init_tmpl.c
@@ -24,7 +24,6 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include "common/attributes.h"
 #include "src/cpu.h"
 #include "src/cdef.h"
 
@@ -72,7 +71,7 @@
 #endif
 
 
-void bitfn(dav1d_cdef_dsp_init_arm)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init_arm)(Dav1dCdefDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/arm/cpu.c
+++ b/src/arm/cpu.c
@@ -27,6 +27,8 @@
 
 #include "config.h"
 
+#include "common/attributes.h"
+
 #include "src/arm/cpu.h"
 
 #if defined(HAVE_GETAUXVAL) && ARCH_ARM
@@ -73,7 +75,7 @@
 }
 #endif
 
-unsigned dav1d_get_cpu_flags_arm(void) {
+COLD unsigned dav1d_get_cpu_flags_arm(void) {
     unsigned flags = 0;
 #if ARCH_AARCH64
     flags |= DAV1D_ARM_CPU_FLAG_NEON;
--- a/src/arm/loopfilter_init_tmpl.c
+++ b/src/arm/loopfilter_init_tmpl.c
@@ -33,7 +33,7 @@
 decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_neon);
 decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_neon);
 
-void bitfn(dav1d_loop_filter_dsp_init_arm)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init_arm)(Dav1dLoopFilterDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/arm/looprestoration_init_tmpl.c
+++ b/src/arm/looprestoration_init_tmpl.c
@@ -27,8 +27,6 @@
 
 #include "src/cpu.h"
 #include "src/looprestoration.h"
-
-#include "common/attributes.h"
 #include "src/tables.h"
 
 #if BITDEPTH == 8
@@ -258,7 +256,7 @@
 #endif // ARCH_AARCH64
 #endif // BITDEPTH == 8
 
-void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/cdef_tmpl.c
+++ b/src/cdef_tmpl.c
@@ -254,7 +254,7 @@
     return best_dir;
 }
 
-void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
     c->dir = cdef_find_dir_c;
     c->fb[0] = cdef_filter_block_8x8_c;
     c->fb[1] = cdef_filter_block_4x8_c;
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -32,7 +32,7 @@
 
 static unsigned flags_mask = -1;
 
-unsigned dav1d_get_cpu_flags(void) {
+COLD unsigned dav1d_get_cpu_flags(void) {
     static unsigned flags;
     static uint8_t checked = 0;
 
@@ -49,6 +49,6 @@
     return flags & flags_mask;
 }
 
-void dav1d_set_cpu_flags_mask(const unsigned mask) {
+COLD void dav1d_set_cpu_flags_mask(const unsigned mask) {
     flags_mask = mask;
 }
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -30,6 +30,8 @@
 
 #include "config.h"
 
+#include "common/attributes.h"
+
 #include "dav1d/common.h"
 
 #if ARCH_AARCH64 || ARCH_ARM
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -725,7 +725,7 @@
     }
 }
 
-void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
+COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
     c->intra_pred[DC_PRED      ] = ipred_dc_c;
     c->intra_pred[DC_128_PRED  ] = ipred_dc_128_c;
     c->intra_pred[TOP_DC_PRED  ] = ipred_dc_top_c;
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -193,7 +193,7 @@
     memset(coeff, 0, sizeof(*coeff) * 4 * 4);
 }
 
-void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
 #define assign_itx_all_fn64(w, h, pfx) \
     c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT  ] = \
         inv_txfm_add_dct_dct_##w##x##h##_c
--- a/src/lib.c
+++ b/src/lib.c
@@ -46,17 +46,17 @@
 #include "src/wedge.h"
 #include "src/film_grain.h"
 
-static void init_internal(void) {
+static COLD void init_internal(void) {
     dav1d_init_wedge_masks();
     dav1d_init_interintra_masks();
     dav1d_init_qm_tables();
 }
 
-const char *dav1d_version(void) {
+COLD const char *dav1d_version(void) {
     return DAV1D_VERSION;
 }
 
-void dav1d_default_settings(Dav1dSettings *const s) {
+COLD void dav1d_default_settings(Dav1dSettings *const s) {
     s->n_frame_threads = 1;
     s->n_tile_threads = 1;
     s->apply_grain = 1;
@@ -71,9 +71,7 @@
 
 static void close_internal(Dav1dContext **const c_out, int flush);
 
-int dav1d_open(Dav1dContext **const c_out,
-               const Dav1dSettings *const s)
-{
+COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
     static pthread_once_t initted = PTHREAD_ONCE_INIT;
     pthread_once(&initted, init_internal);
 
@@ -432,12 +430,12 @@
     c->frame_thread.next = 0;
 }
 
-void dav1d_close(Dav1dContext **const c_out) {
+COLD void dav1d_close(Dav1dContext **const c_out) {
     validate_input(c_out != NULL);
     close_internal(c_out, 1);
 }
 
-static void close_internal(Dav1dContext **const c_out, int flush) {
+static COLD void close_internal(Dav1dContext **const c_out, int flush) {
     Dav1dContext *const c = *c_out;
     if (!c) return;
 
--- a/src/log.c
+++ b/src/log.c
@@ -36,14 +36,14 @@
 #include "src/internal.h"
 #include "src/log.h"
 
-void dav1d_log_default_callback(void *const cookie,
-                                const char *const format, va_list ap)
+COLD void dav1d_log_default_callback(void *const cookie,
+                                     const char *const format, va_list ap)
 {
     vfprintf(stderr, format, ap);
 }
 
 #if CONFIG_LOG
-void dav1d_log(Dav1dContext *const c, const char *const format, ...) {
+COLD void dav1d_log(Dav1dContext *const c, const char *const format, ...) {
     validate_input(c != NULL);
 
     if (!c->logger.callback)
--- a/src/loopfilter_tmpl.c
+++ b/src/loopfilter_tmpl.c
@@ -244,7 +244,7 @@
     }
 }
 
-void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
     c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
     c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
     c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
--- a/src/looprestoration_tmpl.c
+++ b/src/looprestoration_tmpl.c
@@ -573,7 +573,7 @@
     }
 }
 
-void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
     c->wiener = wiener_c;
     c->selfguided = selfguided_c;
 
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -912,7 +912,7 @@
     } while (--h);
 }
 
-void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
+COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
 #define init_mc_fns(type, name) do { \
     c->mc        [type] = put_##name##_c; \
     c->mc_scaled [type] = put_##name##_scaled_c; \
--- a/src/qm.c
+++ b/src/qm.c
@@ -29,6 +29,8 @@
 
 #include <string.h>
 
+#include "common/attributes.h"
+
 #include "src/qm.h"
 
 static const uint8_t qm_tbl_4x4_t[][2][10] = {
@@ -3104,7 +3106,7 @@
     }
 }
 
-void dav1d_init_qm_tables(void) {
+COLD void dav1d_init_qm_tables(void) {
     // This function is guaranteed to be called only once
 
     for (int i = 0; i < 15; i++)
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -155,7 +155,7 @@
     }
 }
 
-static void init_chroma(uint8_t *chroma, const uint8_t *luma,
+static COLD void init_chroma(uint8_t *chroma, const uint8_t *luma,
                         const int sign, const int w, const int h, const int ss_ver)
 {
     for (int y = 0; y < h; y += 1 + ss_ver) {
@@ -169,12 +169,12 @@
     }
 }
 
-static void fill2d_16x2(uint8_t *dst, const int w, const int h,
-                        const enum BlockSize bs,
-                        const uint8_t (*const master)[64 * 64],
-                        const wedge_code_type *const cb,
-                        uint8_t *masks_444, uint8_t *masks_422,
-                        uint8_t *masks_420, const unsigned signs)
+static COLD void fill2d_16x2(uint8_t *dst, const int w, const int h,
+                             const enum BlockSize bs,
+                             const uint8_t (*const master)[64 * 64],
+                             const wedge_code_type *const cb,
+                             uint8_t *masks_444, uint8_t *masks_422,
+                             uint8_t *masks_420, const unsigned signs)
 {
     uint8_t *ptr = dst;
     for (int n = 0; n < 16; n++) {
@@ -222,7 +222,7 @@
     }
 }
 
-void dav1d_init_wedge_masks(void) {
+COLD void dav1d_init_wedge_masks(void) {
     // This function is guaranteed to be called only once
 
     enum WedgeMasterLineType {
@@ -304,10 +304,10 @@
 #undef set
 #undef set1
 
-static void build_nondc_ii_masks(uint8_t *const mask_v,
-                                 uint8_t *const mask_h,
-                                 uint8_t *const mask_sm,
-                                 const int w, const int h, const int step)
+static COLD void build_nondc_ii_masks(uint8_t *const mask_v,
+                                      uint8_t *const mask_h,
+                                      uint8_t *const mask_sm,
+                                      const int w, const int h, const int step)
 {
     static const uint8_t ii_weights_1d[] = {
         60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10,  8,  7,
@@ -323,7 +323,7 @@
     }
 }
 
-void dav1d_init_interintra_masks(void) {
+COLD void dav1d_init_interintra_masks(void) {
     // This function is guaranteed to be called only once
 
     memset(ii_dc_mask, 32, 32 * 32);
--- a/src/win32/thread.c
+++ b/src/win32/thread.c
@@ -33,17 +33,19 @@
 #include <stdlib.h>
 #include <windows.h>
 
+#include "common/attributes.h"
+
 #include "src/thread.h"
 
-static unsigned __stdcall thread_entrypoint(void *const data) {
+static COLD unsigned __stdcall thread_entrypoint(void *const data) {
     pthread_t *const t = data;
     t->arg = t->func(t->arg);
     return 0;
 }
 
-int dav1d_pthread_create(pthread_t *const thread,
-                         const pthread_attr_t *const attr,
-                         void *(*const func)(void*), void *const arg)
+COLD int dav1d_pthread_create(pthread_t *const thread,
+                              const pthread_attr_t *const attr,
+                              void *(*const func)(void*), void *const arg)
 {
     const unsigned stack_size = attr ? attr->stack_size : 0;
     thread->func = func;
@@ -53,7 +55,7 @@
     return !thread->h;
 }
 
-int dav1d_pthread_join(pthread_t *const thread, void **const res) {
+COLD int dav1d_pthread_join(pthread_t *const thread, void **const res) {
     if (WaitForSingleObject(thread->h, INFINITE))
         return 1;
 
@@ -63,8 +65,8 @@
     return !CloseHandle(thread->h);
 }
 
-int dav1d_pthread_once(pthread_once_t *const once_control,
-                       void (*const init_routine)(void))
+COLD int dav1d_pthread_once(pthread_once_t *const once_control,
+                            void (*const init_routine)(void))
 {
     BOOL pending = FALSE;
 
--- a/src/x86/cdef_init_tmpl.c
+++ b/src/x86/cdef_init_tmpl.c
@@ -44,7 +44,7 @@
 decl_cdef_dir_fn(dav1d_cdef_dir_sse4);
 decl_cdef_dir_fn(dav1d_cdef_dir_ssse3);
 
-void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -29,12 +29,14 @@
 
 #include <stdint.h>
 
+#include "common/attributes.h"
+
 #include "src/x86/cpu.h"
 
 void dav1d_cpu_cpuid(uint32_t *info, int leaf);
 uint64_t dav1d_cpu_xgetbv(int xcr);
 
-unsigned dav1d_get_cpu_flags_x86(void) {
+COLD unsigned dav1d_get_cpu_flags_x86(void) {
     uint32_t info[4] = {0}, n_ids;
     unsigned flags = 0;
 
--- a/src/x86/ipred_init_tmpl.c
+++ b/src/x86/ipred_init_tmpl.c
@@ -75,7 +75,7 @@
 
 decl_pal_pred_fn(dav1d_pal_pred_ssse3);
 
-void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
+COLD void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/itx_init_tmpl.c
+++ b/src/x86/itx_init_tmpl.c
@@ -98,7 +98,7 @@
 decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_ssse3);
 decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_ssse3);
 
-void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
 #define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
     c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
         dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
--- a/src/x86/loopfilter_init_tmpl.c
+++ b/src/x86/loopfilter_init_tmpl.c
@@ -33,7 +33,7 @@
 decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
 decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
 
-void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
--- a/src/x86/looprestoration_init_tmpl.c
+++ b/src/x86/looprestoration_init_tmpl.c
@@ -28,7 +28,6 @@
 #include "src/cpu.h"
 #include "src/looprestoration.h"
 
-#include "common/attributes.h"
 #include "common/intops.h"
 #include "src/tables.h"
 
@@ -211,7 +210,7 @@
 # endif
 #endif
 
-void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
 
     if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/mc_init_tmpl.c
+++ b/src/x86/mc_init_tmpl.c
@@ -93,7 +93,7 @@
 decl_emu_edge_fn(dav1d_emu_edge_avx2);
 decl_emu_edge_fn(dav1d_emu_edge_ssse3);
 
-void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
+COLD void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
 #define init_mc_fn(type, name, suffix) \
     c->mc[type] = dav1d_put_##name##_##suffix
 #define init_mct_fn(type, name, suffix) \