shithub: dav1d

Download patch

ref: 35e777f3780327ea055339408bf7f9c1313254ce
parent: e72c2e45748a9f80223d57333772419f00fc4e7e
author: Janne Grunau <[email protected]>
date: Sat Sep 29 09:57:29 EDT 2018

build: add support for arm/aarch64 asm and integrate checkasm

--- a/meson.build
+++ b/meson.build
@@ -59,7 +59,9 @@
 
 # ASM option
 is_asm_enabled = (get_option('build_asm') == true and
-    host_machine.cpu_family().startswith('x86'))
+    (host_machine.cpu_family().startswith('x86')) or
+     host_machine.cpu_family() == 'aarch64'       or
+     host_machine.cpu_family().startswith('arm'))
 cdata.set10('HAVE_ASM', is_asm_enabled)
 
 
@@ -183,6 +185,21 @@
     endif
 endif
 
+cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64')
+cdata.set10('ARCH_ARM',     host_machine.cpu_family().startswith('arm'))
+if (is_asm_enabled and
+    (host_machine.cpu_family() == 'aarch64' or
+     host_machine.cpu_family().startswith('arm')))
+
+   as_func_code = '''__asm__ (
+".func meson_test"
+".endfunc"
+);
+'''
+    have_as_func = cc.compiles(as_func_code)
+    cdata.set10('HAVE_AS_FUNC', have_as_func)
+endif
+
 if host_machine.cpu_family().startswith('x86')
     cdata.set10('ARCH_X86', true)
     if host_machine.cpu_family() == 'x86_64'
@@ -205,6 +222,7 @@
 endif
 
 if cc.symbols_have_underscore_prefix()
+    cdata.set10('PREFIX', true)
     cdata_asm.set10('PREFIX', true)
 endif
 
@@ -216,7 +234,7 @@
 #
 # ASM specific stuff
 #
-if is_asm_enabled
+if is_asm_enabled and host_machine.cpu_family().startswith('x86')
 
     # Generate config.asm
     config_asm_target = configure_file(output: 'config.asm', output_format: 'nasm', configuration: cdata_asm)
--- /dev/null
+++ b/src/arm/32/util.S
@@ -1,0 +1,50 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef __DAVID_SRC_ARM_32_UTIL_S__
+#define __DAVID_SRC_ARM_32_UTIL_S__
+
+#include "config.h"
+#include "src/arm/asm.S"
+
+.macro movrel rd, val
+#if defined(PIC)
+    ldr         \rd,  1f
+    b           2f
+1:
+@ FIXME: thumb
+    .word       \val - (2f + 8)
+2:
+    add         \rd,  \rd,  pc
+#else
+    movw        \rd, #:lower16:\val
+    movt        \rd, #:upper16:\val
+#endif
+.endm
+
+#endif /* __DAVID_SRC_ARM_32_UTIL_S__ */
--- /dev/null
+++ b/src/arm/64/util.S
@@ -1,0 +1,62 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef __DAVID_SRC_ARM_64_UTIL_S__
+#define __DAVID_SRC_ARM_64_UTIL_S__
+
+#include "config.h"
+#include "src/arm/asm.S"
+
+.macro  movrel rd, val, offset=0
+#if defined(PIC) && defined(__APPLE__)
+  .if \offset < 0
+    adrp        \rd, \val@PAGE
+    add         \rd, \rd, \val@PAGEOFF
+    sub         \rd, \rd, -(\offset)
+  .else
+    adrp        \rd, \val+(\offset)@PAGE
+    add         \rd, \rd, \val+(\offset)@PAGEOFF
+  .endif
+#elif defined(PIC) && defined(_WIN32)
+  .if \offset < 0
+    adrp        \rd, \val
+    add         \rd, \rd, :lo12:\val
+    sub         \rd, \rd, -(\offset)
+  .else
+    adrp        \rd, \val+(\offset)
+    add         \rd, \rd, :lo12:\val+(\offset)
+  .endif
+#elif defined(PIC)
+    adrp        \rd, \val+(\offset)
+    add         \rd, \rd, :lo12:\val+(\offset)
+#else
+    ldr         \rd, =\val+\offset
+#endif
+.endm
+
+#endif /* __DAVID_SRC_ARM_64_UTIL_S__ */
--- /dev/null
+++ b/src/arm/asm.S
@@ -1,0 +1,94 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ARM_ASM_S__
+#define __DAV1D_SRC_ARM_ASM_S__
+
+#include "config.h"
+
+#ifndef PRIVATE_PREFIX
+#define PRIVATE_PREFIX dav1d_
+#endif
+
+#define PASTE(a,b) a ## b
+#define CONCAT(a,b) PASTE(a,b)
+
+#ifdef PREFIX
+#define EXTERN CONCAT(_,PRIVATE_PREFIX)
+#else
+#define EXTERN PRIVATE_PREFIX
+#endif
+
+.macro function name, export=0, align=2
+    .macro endfunc
+#ifdef __ELF__
+        .size   \name, . - \name
+#endif
+#if HAVE_AS_FUNC
+        .endfunc
+#endif
+        .purgem endfunc
+    .endm
+    .text
+    .align \align
+  .if \export
+    .global EXTERN\name
+#ifdef __ELF__
+    .type   EXTERN\name, %function
+#endif
+#if HAVE_AS_FUNC
+    .func   EXTERN\name
+#endif
+EXTERN\name:
+  .else
+#ifdef __ELF__
+    .type \name, %function
+#endif
+#if HAVE_AS_FUNC
+    .func \name
+#endif
+  .endif
+\name:
+.endm
+
+.macro  const   name, align=2
+    .macro endconst
+#ifdef __ELF__
+        .size   \name, . - \name
+#endif
+        .purgem endconst
+    .endm
+#if !defined(__MACH__)
+        .section        .rodata
+#else
+        .const_data
+#endif
+        .align          \align
+\name:
+.endm
+
+#endif /* __DAV1D_SRC_ARM_ASM_S__ */
--- /dev/null
+++ b/src/arm/cpu.c
@@ -1,0 +1,32 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/arm/cpu.h"
+
+unsigned dav1d_get_cpu_flags_arm(void) {
+    return DAV1D_ARM_CPU_FLAG_NEON;
+}
--- /dev/null
+++ b/src/arm/cpu.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ARM_CPU_H__
+#define __DAV1D_SRC_ARM_CPU_H__
+
+enum CpuFlags {
+    DAV1D_ARM_CPU_FLAG_NEON = 1 << 0,
+};
+
+unsigned dav1d_get_cpu_flags_arm(void);
+
+#endif /* __DAV1D_SRC_ARM_CPU_H__ */
--- /dev/null
+++ b/src/cpu.c
@@ -1,0 +1,54 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "src/cpu.h"
+
+static unsigned flags_mask = -1;
+
+unsigned dav1d_get_cpu_flags(void) {
+    static unsigned flags;
+    static uint8_t checked = 0;
+
+    if (!checked) {
+#if ARCH_AARCH64 || ARCH_ARM
+        flags = dav1d_get_cpu_flags_arm();
+#elif ARCH_X86 && HAVE_ASM
+        flags = dav1d_get_cpu_flags_x86();
+#else
+        flags = 0;
+#endif
+        checked = 1;
+    }
+    return flags & flags_mask;
+}
+
+void dav1d_set_cpu_flags_mask(const unsigned mask) {
+    flags_mask = mask;
+}
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -30,11 +30,13 @@
 
 #include "config.h"
 
-#if ARCH_X86
+#if ARCH_AARCH64 || ARCH_ARM
+#include "src/arm/cpu.h"
+#elif ARCH_X86
 #include "src/x86/cpu.h"
-#else
-#define dav1d_get_cpu_flags 0
-#define dav1d_set_cpu_flags_mask(mask) while (0)
 #endif
+
+unsigned dav1d_get_cpu_flags(void);
+void dav1d_set_cpu_flags_mask(const unsigned mask);
 
 #endif /* __DAV1D_SRC_CPU_H__ */
--- a/src/meson.build
+++ b/src/meson.build
@@ -29,6 +29,7 @@
 # libdav1d source files
 libdav1d_sources = files(
     'picture.c',
+    'cpu.c',
     'data.c',
     'ref.c',
     'getbits.c',
@@ -73,26 +74,42 @@
 )
 
 # ASM specific sources
+libdav1d_nasm_objs = []
 if is_asm_enabled
+    if (host_machine.cpu_family() == 'aarch64' or
+        host_machine.cpu_family().startswith('arm'))
 
-    libdav1d_sources += files(
-        'x86/cpu.c',
-    )
+        libdav1d_sources += files(
+            'arm/cpu.c',
+        )
+        libdav1d_tmpl_sources += files(
+        )
+        if host_machine.cpu_family() == 'aarch64'
+            libdav1d_tmpl_sources += files(
+            )
+        elif host_machine.cpu_family().startswith('arm')
+            libdav1d_tmpl_sources += files(
+            )
+        endif
+    elif host_machine.cpu_family().startswith('x86')
 
-    libdav1d_tmpl_sources += files(
-        'x86/mc_init.c',
-    )
+        libdav1d_sources += files(
+            'x86/cpu.c',
+        )
 
-    # NASM source files
-    libdav1d_sources_asm = files(
-        'x86/cpuid.asm',
-        'x86/mc.asm',
-    )
+        libdav1d_tmpl_sources += files(
+            'x86/mc_init.c',
+        )
 
-    # Compile the ASM sources with NASM
-    libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm)
-else
-    libdav1d_nasm_objs = []
+        # NASM source files
+        libdav1d_sources_asm = files(
+            'x86/cpuid.asm',
+            'x86/mc.asm',
+        )
+
+        # Compile the ASM sources with NASM
+        libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm)
+    endif
 endif
 
 
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -32,7 +32,7 @@
 void dav1d_cpu_cpuid(uint32_t *info, int leaf);
 uint64_t dav1d_cpu_xgetbv(int xcr);
 
-static unsigned get_cpu_flags(void) {
+unsigned dav1d_get_cpu_flags_x86(void) {
     uint32_t info[4] = {0}, n_ids;
     unsigned flags = 0;
 
@@ -64,21 +64,4 @@
     }
 
     return flags;
-}
-
-static unsigned flags_mask = -1;
-
-unsigned dav1d_get_cpu_flags(void) {
-    static unsigned flags;
-    static uint8_t checked = 0;
-
-    if (!checked) {
-        flags = get_cpu_flags();
-        checked = 1;
-    }
-    return flags & flags_mask;
-}
-
-void dav1d_set_cpu_flags_mask(const unsigned mask) {
-    flags_mask = mask;
 }
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -40,7 +40,6 @@
     DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F + CD + BW + DQ + VL */
 };
 
-unsigned dav1d_get_cpu_flags(void);
-void dav1d_set_cpu_flags_mask(unsigned mask);
+unsigned dav1d_get_cpu_flags_x86(void);
 
 #endif /* __DAV1D_SRC_X86_CPU_H__ */
--- /dev/null
+++ b/tests/checkasm/arm/checkasm_32.S
@@ -1,0 +1,172 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/32/util.S"
+
+const register_init, align=3
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+endconst
+
+const error_message_fpscr
+    .asciz "failed to preserve register FPSCR, changed bits: %x"
+error_message_gpr:
+    .asciz "failed to preserve register r%d"
+error_message_vfp:
+    .asciz "failed to preserve register d%d"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 4)
+
+@ align the used stack space to 8 to preserve the stack alignment
+#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checked_call_\variant, export=1
+    push        {r4-r11, lr}
+.ifc \variant, vfp
+    vpush       {d8-d15}
+    fmrx        r4,  FPSCR
+    push        {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+    movrel      r12, register_init
+.ifc \variant, vfp
+    vldm        r12, {d8-d15}
+.endif
+    ldm         r12, {r4-r11}
+
+    sub         sp,  sp,  #ARG_STACK_A
+.equ pos, 0
+.rept MAX_ARGS-4
+    ldr         r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
+    str         r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+    mov         r12, r0
+    mov         r0,  r2
+    mov         r1,  r3
+    ldrd        r2,  r3,  [sp, #ARG_STACK_A + pushed]
+    blx         r12
+    add         sp,  sp,  #ARG_STACK_A
+
+    push        {r0, r1}
+    movrel      r12, register_init
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, offset
+    ldrd        r2,  r3,  [r12, #8 * (\offset)]
+    vmov        r0,  lr,  \dreg
+    eor         r2,  r2,  r0
+    eor         r3,  r3,  lr
+    orrs        r2,  r2,  r3
+    bne         4f
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14, 15
+    @ keep track of the checked double/SIMD register
+    mov         r1,  #\n
+    check_reg_vfp d\n, \n-8
+.endr
+.purgem check_reg_vfp
+
+    fmrx        r1,  FPSCR
+    ldr         r3,  [sp, #8]
+    eor         r1,  r1,  r3
+    @ Ignore changes in bits 0-4 and 7
+    bic         r1,  r1,  #0x9f
+    @ Ignore changes in the topmost 5 bits
+    bics        r1,  r1,  #0xf8000000
+    bne         3f
+.endif
+
+    @ keep track of the checked GPR
+    mov         r1,  #4
+.macro check_reg reg1, reg2=
+    ldrd        r2,  r3,  [r12], #8
+    eors        r2,  r2,  \reg1
+    bne         2f
+    add         r1,  r1,  #1
+.ifnb \reg2
+    eors        r3,  r3,  \reg2
+    bne         2f
+.endif
+    add         r1,  r1,  #1
+.endm
+    check_reg   r4,  r5
+    check_reg   r6,  r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+    check_reg   r8
+#else
+    check_reg   r8,  r9
+#endif
+    check_reg   r10, r11
+.purgem check_reg
+
+    b           0f
+4:
+    movrel      r0, error_message_vfp
+    b           1f
+3:
+    movrel      r0, error_message_fpscr
+    b           1f
+2:
+    movrel      r0, error_message_gpr
+1:
+#ifdef PREFIX
+    blx         _checkasm_fail_func
+#else
+    blx         checkasm_fail_func
+#endif
+0:
+    pop         {r0, r1}
+.ifc \variant, vfp
+    pop         {r2}
+    fmxr        FPSCR, r2
+    vpop        {d8-d15}
+.endif
+    pop         {r4-r11, pc}
+endfunc
+.endm
+
+clobbercheck vfp
--- /dev/null
+++ b/tests/checkasm/arm/checkasm_64.S
@@ -1,0 +1,170 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/64/util.S"
+
+const register_init, align=4
+    .quad 0x21f86d66c8ca00ce
+    .quad 0x75b6ba21077c48ad
+    .quad 0xed56bb2dcb3c7736
+    .quad 0x8bda43d3fd1a7e06
+    .quad 0xb64a9c9e5d318408
+    .quad 0xdf9a54b303f1d3a3
+    .quad 0x4a75479abd64e097
+    .quad 0x249214109d5d1c88
+    .quad 0x1a1b2550a612b48c
+    .quad 0x79445c159ce79064
+    .quad 0x2eed899d5a28ddcd
+    .quad 0x86b2536fcd8cf636
+    .quad 0xb0856806085e7943
+    .quad 0x3f2bf84fc0fcca4e
+    .quad 0xacbd382dcf5b8de2
+    .quad 0xd229e1f5b281303f
+    .quad 0x71aeaff20b095fd9
+    .quad 0xab63e2e11fa38ed9
+endconst
+
+
+const error_message
+    .asciz "failed to preserve register"
+endconst
+
+
+// max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
+
+function stack_clobber, export=1
+    mov         x3,  sp
+    mov         x2,  #CLOBBER_STACK
+1:
+    stp         x0,  x1,  [sp, #-16]!
+    subs        x2,  x2,  #16
+    b.gt        1b
+    mov         sp,  x3
+    ret
+endfunc
+
+#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15)
+
+function checked_call, export=1
+    stp         x29, x30, [sp, #-16]!
+    mov         x29, sp
+    stp         x19, x20, [sp, #-16]!
+    stp         x21, x22, [sp, #-16]!
+    stp         x23, x24, [sp, #-16]!
+    stp         x25, x26, [sp, #-16]!
+    stp         x27, x28, [sp, #-16]!
+    stp         d8,  d9,  [sp, #-16]!
+    stp         d10, d11, [sp, #-16]!
+    stp         d12, d13, [sp, #-16]!
+    stp         d14, d15, [sp, #-16]!
+
+    movrel      x9, register_init
+    ldp         d8,  d9,  [x9], #16
+    ldp         d10, d11, [x9], #16
+    ldp         d12, d13, [x9], #16
+    ldp         d14, d15, [x9], #16
+    ldp         x19, x20, [x9], #16
+    ldp         x21, x22, [x9], #16
+    ldp         x23, x24, [x9], #16
+    ldp         x25, x26, [x9], #16
+    ldp         x27, x28, [x9], #16
+
+    sub         sp,  sp,  #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-8
+    // Skip the first 8 args, that are loaded into registers
+    ldr         x9, [x29, #16 + 8*8 + pos]
+    str         x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+    mov         x12, x0
+    ldp         x0,  x1,  [x29, #16]
+    ldp         x2,  x3,  [x29, #32]
+    ldp         x4,  x5,  [x29, #48]
+    ldp         x6,  x7,  [x29, #64]
+    blr         x12
+    add         sp,  sp,  #ARG_STACK
+    stp         x0,  x1,  [sp, #-16]!
+    movrel      x9, register_init
+    movi        v3.8h,  #0
+
+.macro check_reg_neon reg1, reg2
+    ldr         q0,  [x9], #16
+    uzp1        v1.2d,  v\reg1\().2d, v\reg2\().2d
+    eor         v0.16b, v0.16b, v1.16b
+    orr         v3.16b, v3.16b, v0.16b
+.endm
+    check_reg_neon  8,  9
+    check_reg_neon  10, 11
+    check_reg_neon  12, 13
+    check_reg_neon  14, 15
+    uqxtn       v3.8b,  v3.8h
+    umov        x3,  v3.d[0]
+
+.macro check_reg reg1, reg2
+    ldp         x0,  x1,  [x9], #16
+    eor         x0,  x0,  \reg1
+    eor         x1,  x1,  \reg2
+    orr         x3,  x3,  x0
+    orr         x3,  x3,  x1
+.endm
+    check_reg   x19, x20
+    check_reg   x21, x22
+    check_reg   x23, x24
+    check_reg   x25, x26
+    check_reg   x27, x28
+
+    cbz         x3,  0f
+
+    movrel      x0, error_message
+#ifdef PREFIX
+    bl          _checkasm_fail_func
+#else
+    bl          checkasm_fail_func
+#endif
+0:
+    ldp         x0,  x1,  [sp], #16
+    ldp         d14, d15, [sp], #16
+    ldp         d12, d13, [sp], #16
+    ldp         d10, d11, [sp], #16
+    ldp         d8,  d9,  [sp], #16
+    ldp         x27, x28, [sp], #16
+    ldp         x25, x26, [sp], #16
+    ldp         x23, x24, [sp], #16
+    ldp         x21, x22, [sp], #16
+    ldp         x19, x20, [sp], #16
+    ldp         x29, x30, [sp], #16
+    ret
+endfunc
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -73,6 +73,8 @@
     { "AVX",     "avx",    DAV1D_X86_CPU_FLAG_AVX },
     { "AVX2",    "avx2",   DAV1D_X86_CPU_FLAG_AVX2 },
     { "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 },
+#elif ARCH_AARCH64 || ARCH_ARM
+    { "NEON",    "neon",   DAV1D_ARM_CPU_FLAG_NEON },
 #endif
     { 0 }
 };
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -92,6 +92,7 @@
 }
 #define readtime readtime
 #endif
+#endif
 
 /* Verifies that clobbered callee-saved registers
  * are properly saved and restored */
@@ -122,13 +123,34 @@
 #define declare_new(ret, ...)\
     ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
 #define call_new(...) checked_call(func_new, __VA_ARGS__)
+#elif ARCH_ARM
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept
+ * the same even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+#define declare_new(ret, ...)\
+    ret (*checked_call)(void *, int dummy, __VA_ARGS__) =\
+    (void *)checkasm_checked_call_vfp;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...)\
+    ret (*checked_call)(void *, int, int, int, int, int, int, int,\
+                        __VA_ARGS__) =\
+    (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...)\
+    (checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+                            CLOB, CLOB, CLOB, CLOB, CLOB),\
+     checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__))
 #endif
-#else
+#else /* HAVE_ASM */
 #define declare_new(ret, ...)
 /* Call the function */
 #define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
-#endif
-#endif
+#endif /* HAVE_ASM */
 
 /* Benchmark the function */
 #ifdef readtime
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -49,7 +49,14 @@
         checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects()
     endforeach
 
-    checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm'))
+    checkasm_nasm_objs = []
+    if host_machine.cpu_family() == 'aarch64'
+        checkasm_sources += files('checkasm/arm/checkasm_64.S')
+    elif host_machine.cpu_family().startswith('arm')
+        checkasm_sources += files('checkasm/arm/checkasm_32.S')
+    elif host_machine.cpu_family().startswith('x86')
+        checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm'))
+    endif
 
     checkasm = executable('checkasm', checkasm_sources, checkasm_nasm_objs,
         objects: [checkasm_bitdepth_objs],