ref: 0f25db1ab9fad46e134a037b85342a0d886d2ec7
parent: 276973ecebea1fa7657253af26f6af3ac1e85513
author: Henrik Gramner <[email protected]>
date: Wed Dec 19 19:43:42 EST 2018
Use 16-byte stack alignment on 32-bit x86 We don't support AVX2 on x86-32 so having 32-byte alignment is redundant.
--- a/include/common/attributes.h
+++ b/include/common/attributes.h
@@ -38,15 +38,16 @@
#define ATTR_ALIAS
#endif
-#if ARCH_X86
+#if ARCH_X86_64
+/* x86-64 needs 32-byte alignment for AVX2. */
#define ALIGN_32_VAL 32
#define ALIGN_16_VAL 16
-#elif ARCH_ARM || ARCH_AARCH64
-// ARM doesn't benefit from anything more than 16 byte alignment.
+#elif ARCH_X86_32 || ARCH_ARM || ARCH_AARCH64
+/* ARM doesn't benefit from anything more than 16-byte alignment. */
#define ALIGN_32_VAL 16
#define ALIGN_16_VAL 16
#else
-// No need for extra alignment on platforms without assembly.
+/* No need for extra alignment on platforms without assembly. */
#define ALIGN_32_VAL 8
#define ALIGN_16_VAL 8
#endif
--- a/meson.build
+++ b/meson.build
@@ -194,30 +194,35 @@
stackrealign_flag = []
if host_machine.cpu_family().startswith('x86')
- if cc.has_argument('-mpreferred-stack-boundary=5')
- stackalign_flag = ['-mpreferred-stack-boundary=5']
- stackrealign_flag = ['-mincoming-stack-boundary=4']
- cdata_asm.set('STACK_ALIGNMENT', 32)
- cdata.set('STACK_ALIGNMENT', 32)
- elif cc.has_argument('-mpreferred-stack-boundary=4')
- stackalign_flag = ['-mpreferred-stack-boundary=4']
- stackrealign_flag = ['-mincoming-stack-boundary=4']
- cdata_asm.set('STACK_ALIGNMENT', 16)
- cdata.set('STACK_ALIGNMENT', 16)
- elif cc.has_argument('-mstack-alignment=32')
- stackalign_flag = ['-mstack-alignment=32']
- stackrealign_flag = ['-mstackrealign']
- cdata_asm.set('STACK_ALIGNMENT', 32)
- cdata.set('STACK_ALIGNMENT', 32)
+ if host_machine.cpu_family() == 'x86_64'
+ if cc.has_argument('-mpreferred-stack-boundary=5')
+ stackalign_flag = ['-mpreferred-stack-boundary=5']
+ stackrealign_flag = ['-mincoming-stack-boundary=4']
+ stack_alignment = 32
+ elif cc.has_argument('-mstack-alignment=32')
+ stackalign_flag = ['-mstack-alignment=32']
+ stackrealign_flag = ['-mstackrealign']
+ stack_alignment = 32
+ else
+ stack_alignment = 16
+ endif
else
- if host_machine.cpu_family() == 'x86_64'
- cdata_asm.set('STACK_ALIGNMENT', 16)
- cdata.set('STACK_ALIGNMENT', 16)
+ if host_machine.system() == 'linux' or host_machine.system() == 'darwin'
+ stack_alignment = 16
+ elif cc.has_argument('-mpreferred-stack-boundary=4')
+ stackalign_flag = ['-mpreferred-stack-boundary=4']
+ stackrealign_flag = ['-mincoming-stack-boundary=2']
+ stack_alignment = 16
+ elif cc.has_argument('-mstack-alignment=16')
+ stackalign_flag = ['-mstack-alignment=16']
+ stackrealign_flag = ['-mstackrealign']
+ stack_alignment = 16
else
- cdata_asm.set('STACK_ALIGNMENT', 4)
- cdata.set('STACK_ALIGNMENT', 4)
+ stack_alignment = 4
endif
endif
+ cdata_asm.set('STACK_ALIGNMENT', stack_alignment)
+ cdata.set('STACK_ALIGNMENT', stack_alignment)
endif
cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64')
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -25,6 +25,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include "config.h"
+
#include <stdint.h>
#include "src/x86/cpu.h"
@@ -47,6 +49,8 @@
if (info[2] & (1 << 9)) flags |= DAV1D_X86_CPU_FLAG_SSSE3;
if (info[2] & (1 << 19)) flags |= DAV1D_X86_CPU_FLAG_SSE41;
if (info[2] & (1 << 20)) flags |= DAV1D_X86_CPU_FLAG_SSE42;
+#if ARCH_X86_64
+ /* We only support >128-bit SIMD on x86-64. */
if (info[2] & (1 << 27)) /* OSXSAVE */ {
uint64_t xcr = dav1d_cpu_xgetbv(0);
if ((xcr & 0x00000006) == 0x00000006) /* XMM/YMM */ {
@@ -61,6 +65,7 @@
}
}
}
+#endif
}
return flags;