ref: cd4c8249bc0e091789495a09b8942d28b687273c
parent: aa6a1a16ad84a58484d4550d661118fe7b8bb310
author: Aurélien Zanelli <[email protected]>
date: Fri May 31 11:07:00 EDT 2013
Add run-time CPU detection and support for ARM architecture Run-time CPU detection (RTCD) is enabled by default if target platform support it. It can be disable at compile time with --disable-rtcd option. Add RTCD support for ARM architecture. Thanks to Timothy B. Terriberry for help and code review Signed-off-by: Timothy B. Terriberry <[email protected]>
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,10 @@
OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
endif
+if CPU_ARM
+CELT_SOURCES += $(CELT_SOURCES_ARM)
+endif
+
include celt_headers.mk
include silk_headers.mk
include opus_headers.mk
--- /dev/null
+++ b/celt/arm/armcpu.c
@@ -1,0 +1,166 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+
+#define OPUS_CPU_ARM_V4 (1)
+#define OPUS_CPU_ARM_EDSP (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON (1<<3)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static inline opus_uint32 opus_cpu_capabilities(void){
+ opus_uint32 flags;
+ flags=0;
+ /* MSVC has no inline __asm support for ARM, but it does let you __emit
+ * instructions via their assembled hex code.
+ * All of these instructions should be essentially nops. */
+# if defined(ARMv5E_ASM)
+ __try{
+ /*PLD [r13]*/
+ __emit(0xF5DDF000);
+ flags|=OPUS_CPU_ARM_EDSP;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(ARMv6E_ASM)
+ __try{
+ /*SHADD8 r3,r3,r3*/
+ __emit(0xE6333F93);
+ flags|=OPUS_CPU_ARM_MEDIA;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# if defined(ARM_HAVE_NEON)
+ __try{
+ /*VORR q0,q0,q0*/
+ __emit(0xF2200150);
+ flags|=OPUS_CPU_ARM_NEON;
+ }
+ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+ /*Ignore exception.*/
+ }
+# endif
+# endif
+# endif
+ return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+ opus_uint32 flags = 0;
+ FILE *cpuinfo;
+
+ /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+ * Android */
+ cpuinfo = fopen("/proc/cpuinfo", "r");
+
+ if(cpuinfo != NULL)
+ {
+ /* 512 should be enough for anybody (it's even enough for all the flags that
+ * x86 has accumulated... so far). */
+ char buf[512];
+
+ while(fgets(buf, 512, cpuinfo) != NULL)
+ {
+ /* Search for edsp and neon flag */
+ if(memcmp(buf, "Features", 8) == 0)
+ {
+ char *p;
+ p = strstr(buf, " edsp");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_EDSP;
+
+ p = strstr(buf, " neon");
+ if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+ flags |= OPUS_CPU_ARM_NEON;
+ }
+
+ /* Search for media capabilities (>= ARMv6) */
+ if(memcmp(buf, "CPU architecture:", 17) == 0)
+ {
+ int version;
+ version = atoi(buf+17);
+
+ if(version >= 6)
+ flags |= OPUS_CPU_ARM_MEDIA;
+ }
+ }
+
+ fclose(cpuinfo);
+ }
+ return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+ "your platform. Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+ opus_uint32 flags = opus_cpu_capabilities();
+ int arch = 0;
+
+ if(!(flags & OPUS_CPU_ARM_EDSP))
+ return arch;
+ arch++;
+
+ if(!(flags & OPUS_CPU_ARM_MEDIA))
+ return arch;
+ arch++;
+
+ if(!(flags & OPUS_CPU_ARM_NEON))
+ return arch;
+ arch++;
+
+ return arch;
+}
+
+#endif
--- /dev/null
+++ b/celt/arm/armcpu.h
@@ -1,0 +1,35 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifndef ARMCPU_H
+#define ARMCPU_H
+
+int opus_select_arch(void);
+
+#endif
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -33,6 +33,7 @@
#define CELT_DECODER_C
+#include "cpu_support.h"
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -69,6 +70,7 @@
int downsample;
int start, end;
int signalling;
+ int arch;
/* Everything beyond this point gets cleared on a reset */
#define DECODER_RESET_START rng
@@ -157,6 +159,7 @@
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
+ st->arch = opus_select_arch();
st->loss_count = 0;
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -33,6 +33,7 @@
#define CELT_ENCODER_C
+#include "cpu_support.h"
#include "os_support.h"
#include "mdct.h"
#include <math.h>
@@ -75,6 +76,7 @@
int lsb_depth;
int variable_duration;
int lfe;
+ int arch;
/* Everything beyond this point gets cleared on a reset */
#define ENCODER_RESET_START rng
@@ -187,6 +189,8 @@
st->start = 0;
st->end = st->mode->effEBands;
st->signalling = 1;
+
+ st->arch = opus_select_arch();
st->constrained_vbr = 1;
st->clip = 1;
--- /dev/null
+++ b/celt/cpu_support.h
@@ -1,0 +1,51 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#else
+#define OPUS_ARCHMASK 0
+
+static inline int opus_select_arch(void)
+{
+ return 0;
+}
+#endif
+
+#endif
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -2,6 +2,7 @@
celt/arch.h \
celt/bands.h \
celt/celt.h \
+celt/cpu_support.h \
include/opus_types.h \
include/opus_defines.h \
include/opus_custom.h \
@@ -29,6 +30,7 @@
celt/vq.h \
celt/static_modes_float.h \
celt/static_modes_fixed.h \
+celt/arm/armcpu.h \
celt/arm/fixed_armv4.h \
celt/arm/fixed_armv5e.h \
celt/arm/kiss_fft_armv4.h \
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -16,3 +16,6 @@
celt/quant_bands.c \
celt/rate.c \
celt/vq.c
+
+CELT_SOURCES_ARM = \
+celt/arm/armcpu.c
--- a/configure.ac
+++ b/configure.ac
@@ -155,6 +155,7 @@
AC_DEFINE([FLOAT_APPROX], , [Float approximations])
fi
+rtcd_support=no
cpu_arm=no
AC_ARG_ENABLE(asm,
AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),
@@ -167,6 +168,7 @@
AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],
[asm_optimization="disabled"])
if test "x${asm_optimization}" = "xARM" ; then
+ rtcd_support=yes
AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations])
AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])
if test "x${ARMv5E_ASM}" = "x1" ; then
@@ -178,6 +180,11 @@
AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations])
asm_optimization="${asm_optimization} (Media)"
fi
+ AS_ASM_ARM_NEON([ARM_HAVE_NEON=1],[ARM_HAVE_NEON=0])
+ if test "x${ARM_HAVE_NEON}" = "x1" ; then
+ AC_DEFINE([ARM_HAVE_NEON], 1, [Use ARM NEON optimizations])
+ asm_optimization="${asm_optimization} (NEON)"
+ fi
fi
;;
esac
@@ -185,6 +192,17 @@
asm_optimization="disabled"
fi
+AC_ARG_ENABLE(rtcd,
+ AS_HELP_STRING([--disable-rtcd], [Disable run-time CPU capabilities detection]),
+ [ ac_enable_rtcd=$enableval ], [ ac_enable_rtcd=yes] )
+if test "x${ac_enable_rtcd}" = xyes -a "x${rtcd_support}" = xyes ; then
+ AC_DEFINE([OPUS_HAVE_RTCD], 1, [Use run-time CPU capabilities detection])
+elif test "x${rtcd_support}" = xno ; then
+ rtcd_support="no rtcd for your platform, please send patches"
+else
+ rtcd_support="no"
+fi
+
ac_enable_assertions="no"
AC_ARG_ENABLE(assertions, [ --enable-assertions enable additional software error checking],
[if test "$enableval" = yes; then
@@ -281,6 +299,7 @@
AM_CONDITIONAL([FIXED_POINT], [test x$ac_enable_fixed = xyes])
AM_CONDITIONAL([CUSTOM_MODES], [test x$ac_enable_custom_modes = xyes])
AM_CONDITIONAL([EXTRA_PROGRAMS], [test x$ac_enable_extra_programs = xyes])
+AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes])
dnl subsitutions for the pkg-config files
if test x$ac_enable_float = xyes; then
@@ -321,6 +340,7 @@
Fast float approximations: ..... ${float_approx}
Fixed point debugging: ......... ${ac_enable_fixed_debug}
Assembly optimization: ......... ${asm_optimization}
+ Run-time CPU detection: ........ ${rtcd_support}
Custom modes: .................. ${ac_enable_custom_modes}
Assertion checking: ............ ${ac_enable_assertions}
Fuzzing: ....................... ${ac_enable_fuzzing}
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -46,6 +46,7 @@
#include "structs.h"
#include "define.h"
#include "mathops.h"
+#include "cpu_support.h"
struct OpusDecoder {
int celt_dec_offset;
@@ -70,6 +71,7 @@
#endif
opus_uint32 rangeFinal;
+ int arch;
};
#ifdef FIXED_POINT
@@ -119,6 +121,7 @@
st->Fs = Fs;
st->DecControl.API_sampleRate = st->Fs;
st->DecControl.nChannelsAPI = st->channels;
+ st->arch = opus_select_arch();
/* Reset decoder */
ret = silk_InitDecoder( silk_dec );
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,6 +40,7 @@
#include "arch.h"
#include "opus_private.h"
#include "os_support.h"
+#include "cpu_support.h"
#include "analysis.h"
#include "mathops.h"
#include "tuning_parameters.h"
@@ -103,6 +104,7 @@
int analysis_offset;
#endif
opus_uint32 rangeFinal;
+ int arch;
};
/* Transition tables for the voice and music. First column is the
@@ -183,6 +185,8 @@
st->stream_channels = st->channels = channels;
st->Fs = Fs;
+
+ st->arch = opus_select_arch();
ret = silk_InitEncoder( silk_enc, &st->silk_mode );
if(ret)return OPUS_INTERNAL_ERROR;