shithub: opus

Download patch

ref: cd4c8249bc0e091789495a09b8942d28b687273c
parent: aa6a1a16ad84a58484d4550d661118fe7b8bb310
author: Aurélien Zanelli <[email protected]>
date: Fri May 31 11:07:00 EDT 2013

Add run-time CPU detection and support for ARM architecture

Run-time CPU detection (RTCD) is enabled by default if target platform support
it.
It can be disable at compile time with --disable-rtcd option.

Add RTCD support for ARM architecture.

Thanks to Timothy B. Terriberry for help and code review

Signed-off-by: Timothy B. Terriberry <[email protected]>

--- a/Makefile.am
+++ b/Makefile.am
@@ -18,6 +18,10 @@
 OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
 endif
 
+if CPU_ARM
+CELT_SOURCES += $(CELT_SOURCES_ARM)
+endif
+
 include celt_headers.mk
 include silk_headers.mk
 include opus_headers.mk
--- /dev/null
+++ b/celt/arm/armcpu.c
@@ -1,0 +1,166 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef OPUS_HAVE_RTCD
+
+#include "armcpu.h"
+#include "cpu_support.h"
+#include "os_support.h"
+#include "opus_types.h"
+
+#define OPUS_CPU_ARM_V4    (1)
+#define OPUS_CPU_ARM_EDSP  (1<<1)
+#define OPUS_CPU_ARM_MEDIA (1<<2)
+#define OPUS_CPU_ARM_NEON  (1<<3)
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+# define WIN32_LEAN_AND_MEAN
+# define WIN32_EXTRA_LEAN
+# include <windows.h>
+
+static inline opus_uint32 opus_cpu_capabilities(void){
+  opus_uint32 flags;
+  flags=0;
+  /* MSVC has no inline __asm support for ARM, but it does let you __emit
+   * instructions via their assembled hex code.
+   * All of these instructions should be essentially nops. */
+# if defined(ARMv5E_ASM)
+  __try{
+    /*PLD [r13]*/
+    __emit(0xF5DDF000);
+    flags|=OPUS_CPU_ARM_EDSP;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#  if defined(ARMv6E_ASM)
+  __try{
+    /*SHADD8 r3,r3,r3*/
+    __emit(0xE6333F93);
+    flags|=OPUS_CPU_ARM_MEDIA;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   if defined(ARM_HAVE_NEON)
+  __try{
+    /*VORR q0,q0,q0*/
+    __emit(0xF2200150);
+    flags|=OPUS_CPU_ARM_NEON;
+  }
+  __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
+    /*Ignore exception.*/
+  }
+#   endif
+#  endif
+# endif
+  return flags;
+}
+
+#elif defined(__linux__)
+/* Linux based */
+opus_uint32 opus_cpu_capabilities(void)
+{
+  opus_uint32 flags = 0;
+  FILE *cpuinfo;
+
+  /* Reading /proc/self/auxv would be easier, but that doesn't work reliably on
+   * Android */
+  cpuinfo = fopen("/proc/cpuinfo", "r");
+
+  if(cpuinfo != NULL)
+  {
+    /* 512 should be enough for anybody (it's even enough for all the flags that
+     * x86 has accumulated... so far). */
+    char buf[512];
+
+    while(fgets(buf, 512, cpuinfo) != NULL)
+    {
+      /* Search for edsp and neon flag */
+      if(memcmp(buf, "Features", 8) == 0)
+      {
+        char *p;
+        p = strstr(buf, " edsp");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_EDSP;
+
+        p = strstr(buf, " neon");
+        if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
+          flags |= OPUS_CPU_ARM_NEON;
+      }
+
+      /* Search for media capabilities (>= ARMv6) */
+      if(memcmp(buf, "CPU architecture:", 17) == 0)
+      {
+        int version;
+        version = atoi(buf+17);
+
+        if(version >= 6)
+          flags |= OPUS_CPU_ARM_MEDIA;
+      }
+    }
+
+    fclose(cpuinfo);
+  }
+  return flags;
+}
+#else
+/* The feature registers which can tell us what the processor supports are
+ * accessible in priveleged modes only, so we can't have a general user-space
+ * detection method like on x86.*/
+# error "Configured to use ARM asm but no CPU detection method available for " \
+   "your platform.  Reconfigure with --disable-rtcd (or send patches)."
+#endif
+
+int opus_select_arch(void)
+{
+  opus_uint32 flags = opus_cpu_capabilities();
+  int arch = 0;
+
+  if(!(flags & OPUS_CPU_ARM_EDSP))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_MEDIA))
+    return arch;
+  arch++;
+
+  if(!(flags & OPUS_CPU_ARM_NEON))
+    return arch;
+  arch++;
+
+  return arch;
+}
+
+#endif
--- /dev/null
+++ b/celt/arm/armcpu.h
@@ -1,0 +1,35 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/* Original code from libtheora modified to suit to Opus */
+
+#ifndef ARMCPU_H
+#define ARMCPU_H
+
+int opus_select_arch(void);
+
+#endif
--- a/celt/celt_decoder.c
+++ b/celt/celt_decoder.c
@@ -33,6 +33,7 @@
 
 #define CELT_DECODER_C
 
+#include "cpu_support.h"
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -69,6 +70,7 @@
    int downsample;
    int start, end;
    int signalling;
+   int arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define DECODER_RESET_START rng
@@ -157,6 +159,7 @@
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
+   st->arch = opus_select_arch();
 
    st->loss_count = 0;
 
--- a/celt/celt_encoder.c
+++ b/celt/celt_encoder.c
@@ -33,6 +33,7 @@
 
 #define CELT_ENCODER_C
 
+#include "cpu_support.h"
 #include "os_support.h"
 #include "mdct.h"
 #include <math.h>
@@ -75,6 +76,7 @@
    int lsb_depth;
    int variable_duration;
    int lfe;
+   int arch;
 
    /* Everything beyond this point gets cleared on a reset */
 #define ENCODER_RESET_START rng
@@ -187,6 +189,8 @@
    st->start = 0;
    st->end = st->mode->effEBands;
    st->signalling = 1;
+
+   st->arch = opus_select_arch();
 
    st->constrained_vbr = 1;
    st->clip = 1;
--- /dev/null
+++ b/celt/cpu_support.h
@@ -1,0 +1,51 @@
+/* Copyright (c) 2010 Xiph.Org Foundation
+ * Copyright (c) 2013 Parrot */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef CPU_SUPPORT_H
+#define CPU_SUPPORT_H
+
+#if defined(OPUS_HAVE_RTCD) && defined(ARMv4_ASM)
+#include "arm/armcpu.h"
+
+/* We currently support 4 ARM variants:
+ * arch[0] -> ARMv4
+ * arch[1] -> ARMv5E
+ * arch[2] -> ARMv6
+ * arch[3] -> NEON
+ */
+#define OPUS_ARCHMASK 3
+
+#else
+#define OPUS_ARCHMASK 0
+
+static inline int opus_select_arch(void)
+{
+  return 0;
+}
+#endif
+
+#endif
--- a/celt_headers.mk
+++ b/celt_headers.mk
@@ -2,6 +2,7 @@
 celt/arch.h \
 celt/bands.h \
 celt/celt.h \
+celt/cpu_support.h \
 include/opus_types.h \
 include/opus_defines.h \
 include/opus_custom.h \
@@ -29,6 +30,7 @@
 celt/vq.h \
 celt/static_modes_float.h \
 celt/static_modes_fixed.h \
+celt/arm/armcpu.h \
 celt/arm/fixed_armv4.h \
 celt/arm/fixed_armv5e.h \
 celt/arm/kiss_fft_armv4.h \
--- a/celt_sources.mk
+++ b/celt_sources.mk
@@ -16,3 +16,6 @@
 celt/quant_bands.c \
 celt/rate.c \
 celt/vq.c
+
+CELT_SOURCES_ARM = \
+celt/arm/armcpu.c
--- a/configure.ac
+++ b/configure.ac
@@ -155,6 +155,7 @@
     AC_DEFINE([FLOAT_APPROX], , [Float approximations])
 fi
 
+rtcd_support=no
 cpu_arm=no
 AC_ARG_ENABLE(asm,
     AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),
@@ -167,6 +168,7 @@
         AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],
             [asm_optimization="disabled"])
         if test "x${asm_optimization}" = "xARM" ; then
+            rtcd_support=yes
             AC_DEFINE([ARMv4_ASM], 1, [Use generic ARMv4 asm optimizations])
             AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])
             if test "x${ARMv5E_ASM}" = "x1" ; then
@@ -178,6 +180,11 @@
                 AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations])
                 asm_optimization="${asm_optimization} (Media)"
             fi
+            AS_ASM_ARM_NEON([ARM_HAVE_NEON=1],[ARM_HAVE_NEON=0])
+            if test "x${ARM_HAVE_NEON}" = "x1" ; then
+              AC_DEFINE([ARM_HAVE_NEON], 1, [Use ARM NEON optimizations])
+              asm_optimization="${asm_optimization} (NEON)"
+            fi
         fi
         ;;
     esac
@@ -185,6 +192,17 @@
     asm_optimization="disabled"
 fi
 
+AC_ARG_ENABLE(rtcd,
+    AS_HELP_STRING([--disable-rtcd], [Disable run-time CPU capabilities detection]),
+    [ ac_enable_rtcd=$enableval ], [ ac_enable_rtcd=yes] )
+if test "x${ac_enable_rtcd}" = xyes -a "x${rtcd_support}" = xyes ; then
+    AC_DEFINE([OPUS_HAVE_RTCD], 1, [Use run-time CPU capabilities detection])
+elif test "x${rtcd_support}" = xno ; then
+    rtcd_support="no rtcd for your platform, please send patches"
+else
+    rtcd_support="no"
+fi
+
 ac_enable_assertions="no"
 AC_ARG_ENABLE(assertions, [  --enable-assertions        enable additional software error checking],
 [if test "$enableval" = yes; then
@@ -281,6 +299,7 @@
 AM_CONDITIONAL([FIXED_POINT], [test x$ac_enable_fixed = xyes])
 AM_CONDITIONAL([CUSTOM_MODES], [test x$ac_enable_custom_modes = xyes])
 AM_CONDITIONAL([EXTRA_PROGRAMS], [test x$ac_enable_extra_programs = xyes])
+AM_CONDITIONAL([CPU_ARM], [test x$cpu_arm = xyes])
 
 dnl subsitutions for the pkg-config files
 if test x$ac_enable_float = xyes; then
@@ -321,6 +340,7 @@
       Fast float approximations: ..... ${float_approx}
       Fixed point debugging: ......... ${ac_enable_fixed_debug}
       Assembly optimization: ......... ${asm_optimization}
+      Run-time CPU detection: ........ ${rtcd_support}
       Custom modes: .................. ${ac_enable_custom_modes}
       Assertion checking: ............ ${ac_enable_assertions}
       Fuzzing: ....................... ${ac_enable_fuzzing}
--- a/src/opus_decoder.c
+++ b/src/opus_decoder.c
@@ -46,6 +46,7 @@
 #include "structs.h"
 #include "define.h"
 #include "mathops.h"
+#include "cpu_support.h"
 
 struct OpusDecoder {
    int          celt_dec_offset;
@@ -70,6 +71,7 @@
 #endif
 
    opus_uint32  rangeFinal;
+   int arch;
 };
 
 #ifdef FIXED_POINT
@@ -119,6 +121,7 @@
    st->Fs = Fs;
    st->DecControl.API_sampleRate = st->Fs;
    st->DecControl.nChannelsAPI      = st->channels;
+   st->arch = opus_select_arch();
 
    /* Reset decoder */
    ret = silk_InitDecoder( silk_dec );
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,6 +40,7 @@
 #include "arch.h"
 #include "opus_private.h"
 #include "os_support.h"
+#include "cpu_support.h"
 #include "analysis.h"
 #include "mathops.h"
 #include "tuning_parameters.h"
@@ -103,6 +104,7 @@
     int          analysis_offset;
 #endif
     opus_uint32  rangeFinal;
+    int arch;
 };
 
 /* Transition tables for the voice and music. First column is the
@@ -183,6 +185,8 @@
     st->stream_channels = st->channels = channels;
 
     st->Fs = Fs;
+
+    st->arch = opus_select_arch();
 
     ret = silk_InitEncoder( silk_enc, &st->silk_mode );
     if(ret)return OPUS_INTERNAL_ERROR;