shithub: opus

--- a/autogen.sh

+++ b/autogen.sh

@@ -135,6 +135,7 @@

 echo "Generating configuration files for $package, please wait...."

+ACLOCAL_FLAGS="-I m4"

 echo "  $ACLOCAL $ACLOCAL_FLAGS"

 $ACLOCAL $ACLOCAL_FLAGS || exit 1

 echo "  autoheader"

--- a/celt/_kiss_fft_guts.h

+++ b/celt/_kiss_fft_guts.h

@@ -94,6 +94,179 @@

     do {(res).r = ADD32((res).r,(a).r);  (res).i = SUB32((res).i,(a).i); \

     }while(0)

+#if defined(ARMv4_ASM)

+#undef C_MUL

+#define C_MUL(m,a,b) \

+    do{ \

+       int br__; \

+       int bi__; \

+       int tt__; \

+        __asm__ __volatile__( \

+            "#C_MUL\n\t" \

+            "ldm %[ap], {r0,r1}\n\t" \

+            "ldrsh %[br], [%[bp], #0]\n\t" \

+            "ldrsh %[bi], [%[bp], #2]\n\t" \

+            "smull %[tt], %[mi], r1, %[br]\n\t" \

+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \

+            "rsb %[bi], %[bi], #0\n\t" \

+            "smull r0, %[mr], r0, %[br]\n\t" \

+            "mov %[tt], %[tt], lsr #15\n\t" \

+            "smlal r0, %[mr], r1, %[bi]\n\t" \

+            "orr %[mi], %[tt], %[mi], lsl #17\n\t" \

+            "mov r0, r0, lsr #15\n\t" \

+            "orr %[mr], r0, %[mr], lsl #17\n\t" \

+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \

+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \

+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \

+            : "r0", "r1" \

+        ); \

+    } \

+    while(0)

+#undef C_MUL4

+#define C_MUL4(m,a,b) \

+    do{ \

+       int br__; \

+       int bi__; \

+       int tt__; \

+        __asm__ __volatile__( \

+            "#C_MUL4\n\t" \

+            "ldm %[ap], {r0,r1}\n\t" \

+            "ldrsh %[br], [%[bp], #0]\n\t" \

+            "ldrsh %[bi], [%[bp], #2]\n\t" \

+            "smull %[tt], %[mi], r1, %[br]\n\t" \

+            "smlal %[tt], %[mi], r0, %[bi]\n\t" \

+            "rsb %[bi], %[bi], #0\n\t" \

+            "smull r0, %[mr], r0, %[br]\n\t" \

+            "mov %[tt], %[tt], lsr #17\n\t" \

+            "smlal r0, %[mr], r1, %[bi]\n\t" \

+            "orr %[mi], %[tt], %[mi], lsl #15\n\t" \

+            "mov r0, r0, lsr #17\n\t" \

+            "orr %[mr], r0, %[mr], lsl #15\n\t" \

+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \

+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \

+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \

+            : "r0", "r1" \

+        ); \

+    } \

+    while(0)

+#undef C_MULC

+#define C_MULC(m,a,b) \

+    do{ \

+       int br__; \

+       int bi__; \

+       int tt__; \

+        __asm__ __volatile__( \

+            "#C_MULC\n\t" \

+            "ldm %[ap], {r0,r1}\n\t" \

+            "ldrsh %[br], [%[bp], #0]\n\t" \

+            "ldrsh %[bi], [%[bp], #2]\n\t" \

+            "smull %[tt], %[mr], r0, %[br]\n\t" \

+            "smlal %[tt], %[mr], r1, %[bi]\n\t" \

+            "rsb %[bi], %[bi], #0\n\t" \

+            "smull r1, %[mi], r1, %[br]\n\t" \

+            "mov %[tt], %[tt], lsr #15\n\t" \

+            "smlal r1, %[mi], r0, %[bi]\n\t" \

+            "orr %[mr], %[tt], %[mr], lsl #17\n\t" \

+            "mov r1, r1, lsr #15\n\t" \

+            "orr %[mi], r1, %[mi], lsl #17\n\t" \

+            : [mr]"=r"((m).r), [mi]"=r"((m).i), \

+              [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \

+            : [ap]"r"(&(a)), [bp]"r"(&(b)) \

+            : "r0", "r1" \

+        ); \

+    } \

+    while(0)

+#endif /* ARMv4_ASM */

+#if defined(ARMv5E_ASM)

+#if defined(__thumb__)||defined(__thumb2__)

+#define LDRD_CONS "Q"

+#else

+#define LDRD_CONS "Uq"

+#endif

+#undef C_MUL

+#define C_MUL(m,a,b) \

+    do{ \

+        int mr1__; \

+        int mr2__; \

+        int mi__; \

+        long long aval__; \

+        int bval__; \

+        __asm__( \

+            "#C_MUL\n\t" \

+            "ldrd %[aval], %H[aval], %[ap]\n\t" \

+            "ldr %[bval], %[bp]\n\t" \

+            "smulwb %[mi], %H[aval], %[bval]\n\t" \

+            "smulwb %[mr1], %[aval], %[bval]\n\t" \

+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \

+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \

+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \

+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \

+            : [ap]LDRD_CONS(a), [bp]"m"(b) \

+        ); \

+        (m).r = SHL32(SUB32(mr1__, mr2__), 1); \

+        (m).i = SHL32(mi__, 1); \

+    } \

+    while(0)

+#undef C_MUL4

+#define C_MUL4(m,a,b) \

+    do{ \

+        int mr1__; \

+        int mr2__; \

+        int mi__; \

+        long long aval__; \

+        int bval__; \

+        __asm__( \

+            "#C_MUL4\n\t" \

+            "ldrd %[aval], %H[aval], %[ap]\n\t" \

+            "ldr %[bval], %[bp]\n\t" \

+            "smulwb %[mi], %H[aval], %[bval]\n\t" \

+            "smulwb %[mr1], %[aval], %[bval]\n\t" \

+            "smulwt %[mr2], %H[aval], %[bval]\n\t" \

+            "smlawt %[mi], %[aval], %[bval], %[mi]\n\t" \

+            : [mr1]"=r"(mr1__), [mr2]"=r"(mr2__), [mi]"=r"(mi__), \

+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \

+            : [ap]LDRD_CONS(a), [bp]"m"(b) \

+        ); \

+        (m).r = SHR32(SUB32(mr1__, mr2__), 1); \

+        (m).i = SHR32(mi__, 1); \

+    } \

+    while(0)

+#undef C_MULC

+#define C_MULC(m,a,b) \

+    do{ \

+        int mr__; \

+        int mi1__; \

+        int mi2__; \

+        long long aval__; \

+        int bval__; \

+        __asm__( \

+            "#C_MULC\n\t" \

+            "ldrd %[aval], %H[aval], %[ap]\n\t" \

+            "ldr %[bval], %[bp]\n\t" \

+            "smulwb %[mr], %[aval], %[bval]\n\t" \

+            "smulwb %[mi1], %H[aval], %[bval]\n\t" \

+            "smulwt %[mi2], %[aval], %[bval]\n\t" \

+            "smlawt %[mr], %H[aval], %[bval], %[mr]\n\t" \

+            : [mr]"=r"(mr__), [mi1]"=r"(mi1__), [mi2]"=r"(mi2__), \

+              [aval]"=&r"(aval__), [bval]"=r"(bval__) \

+            : [ap]LDRD_CONS(a), [bp]"m"(b) \

+        ); \

+        (m).r = SHL32(mr__, 1); \

+        (m).i = SHL32(SUB32(mi1__, mi2__), 1); \

+    } \

+    while(0)

+#endif /* ARMv5E_ASM */

 #else  /* not FIXED_POINT*/

 #   define S_MUL(a,b) ( (a)*(b) )

--- a/celt/arch.h

+++ b/celt/arch.h

@@ -112,10 +112,10 @@

 #include "fixed_generic.h"

-#ifdef ARM5E_ASM

-#include "fixed_arm5e.h"

-#elif defined (ARM4_ASM)

-#include "fixed_arm4.h"

+#ifdef ARMv5E_ASM

+#include "fixed_armv5e.h"

+#elif defined (ARMv4_ASM)

+#include "fixed_armv4.h"

 #elif defined (BFIN_ASM)

 #include "fixed_bfin.h"

 #elif defined (TI_C5X_ASM)

--- /dev/null

+++ b/celt/fixed_armv4.h

@@ -1,0 +1,71 @@

+/* Copyright (C) 2013 Xiph.Org Foundation and contributors */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef FIXED_ARMv4_H

+#define FIXED_ARMv4_H

+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */

+#undef MULT16_32_Q16

+static inline opus_val32 MULT16_32_Q16_armv4(opus_val16 a, opus_val32 b)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+      "#MULT16_32_Q16\n\t"

+      "smull %0, %1, %2, %3\n\t"

+      : "=r"(rd_lo), "=r"(rd_hi)

+      : "r"(b),"r"(a<<16)

+  );

+  return rd_hi;

+}

+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv4(a, b))

+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */

+#undef MULT16_32_Q15

+static inline opus_val32 MULT16_32_Q15_armv4(opus_val16 a, opus_val32 b)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+      "#MULT16_32_Q15\n\t"

+      "smull %0, %1, %2, %3\n\t"

+      : "=r"(rd_lo), "=r"(rd_hi)

+      : "%r"(b), "r"(a<<16)

+  );

+  /*We intentionally don't OR in the high bit of rd_lo for speed.*/

+  return rd_hi<<1;

+}

+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv4(a, b))

+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.

+    b must fit in 31 bits.

+    Result fits in 32 bits. */

+#undef MAC16_32_Q15

+#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))

+#endif

--- /dev/null

+++ b/celt/fixed_armv5e.h

@@ -1,0 +1,127 @@

+/* Copyright (C) 2007-2009 Xiph.Org Foundation

+   Copyright (C) 2003-2008 Jean-Marc Valin

+   Copyright (C) 2007-2008 CSIRO

+   Copyright (C) 2013      Parrot */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef FIXED_ARMv5E_H

+#define FIXED_ARMv5E_H

+#include "fixed_armv4.h"

+/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */

+#undef MULT16_32_Q16

+static inline opus_val32 MULT16_32_Q16_armv5e(opus_val16 a, opus_val32 b)

+{

+  int res;

+  __asm__(

+      "#MULT16_32_Q16\n\t"

+      "smulwb %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(b),"r"(a)

+  );

+  return res;

+}

+#define MULT16_32_Q16(a, b) (MULT16_32_Q16_armv5e(a, b))

+/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */

+#undef MULT16_32_Q15

+static inline opus_val32 MULT16_32_Q15_armv5e(opus_val16 a, opus_val32 b)

+{

+#if 0

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+      "#MULT16_32_Q15\n\t"

+      "smull %0, %1, %2, %3\n\t"

+      : "=r"(rd_lo), "=r"(rd_hi)

+      : "%r"(b), "r"(a<<16)

+  );

+  return (rd_lo>>31)|(rd_hi<<1);

+#else

+  int res;

+  __asm__(

+      "#MULT16_32_Q15\n\t"

+      "smulwb %0, %1, %2\n\t"

+      : "=r"(res)

+      : "%r"(b), "r"(a)

+  );

+  return res<<1;

+#endif

+}

+#define MULT16_32_Q15(a, b) (MULT16_32_Q15_armv5e(a, b))

+/** 16x32 multiply, followed by a 15-bit shift right and 32-bit add.

+    b must fit in 31 bits.

+    Result fits in 32 bits. */

+#undef MAC16_32_Q15

+static inline opus_val32 MAC16_32_Q15_armv5e(opus_val32 c, opus_val16 a,

+ opus_val32 b)

+{

+  int res;

+  __asm__(

+      "#MAC16_32_Q15\n\t"

+      "smlawb %0, %1, %2, %3;\n"

+      : "=r"(res)

+      : "r"(b<<1), "r"(a), "r"(c)

+  );

+  return res;

+}

+#define MAC16_32_Q15(c, a, b) (MAC16_32_Q15_armv5e(c, a, b))

+/** 16x16 multiply-add where the result fits in 32 bits */

+#undef MAC16_16

+static inline opus_val32 MAC16_16(opus_val32 c, opus_val16 a, opus_val16 b)

+{

+  int res;

+  __asm__(

+      "#MAC16_16\n\t"

+      "smlabb %0, %1, %2, %3;\n"

+      : "=r"(res)

+      : "r"(a), "r"(b), "r"(c)

+  );

+  return res;

+}

+#define MAC16_16(c, a, b) (MAC16_16(c, a, b))

+/** 16x16 multiplication where the result fits in 32 bits */

+#undef MULT16_16

+static inline opus_val32 MULT16_16(opus_val16 a, opus_val16 b)

+{

+  int res;

+  __asm__(

+      "#MULT16_16\n\t"

+      "smulbb %0, %1, %2;\n"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define MULT16_16(a, b) (MULT16_16(a, b))

+#endif

--- a/configure.ac

+++ b/configure.ac

@@ -18,7 +18,6 @@

 dnl enable silent rules on automake 1.11 and later

 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])

 # For libtool.

 dnl Please update these for releases.

 OPUS_LT_CURRENT=4

@@ -155,6 +154,36 @@

     AC_DEFINE([FLOAT_APPROX], , [Float approximations])

fi

+cpu_arm=no

+AC_ARG_ENABLE(asm,

+    AS_HELP_STRING([--disable-asm], [Disable assembly optimizations]),

+    [ ac_enable_asm=$enableval ], [ ac_enable_asm=yes] )

+if test "x${ac_enable_asm}" = xyes ; then

+    asm_optimization="no asm for your platform, please send patches"

+    case $host_cpu in

+    arm*)

+        cpu_arm=yes

+        AS_GCC_INLINE_ASSEMBLY([asm_optimization="ARM"],

+            [asm_optimization="disabled"])

+        if test "x${asm_optimization}" = "xARM" ; then

+            AC_DEFINE([ARMv4_ASM], [], [Use generic ARMv4 asm optimizations])

+            AS_ASM_ARM_EDSP([ARMv5E_ASM=1],[ARMv5E_ASM=0])

+            if test "x${ARMv5E_ASM}" = "x1" ; then

+                AC_DEFINE(ARMv5E_ASM, 1, [Use ARMv5E asm optimizations])

+                asm_optimization="${asm_optimization} (EDSP)"

+            fi

+            AS_ASM_ARM_MEDIA([ARMv6_ASM=1],[ARMv6_ASM=0])

+            if test "x${ARMv6_ASM}" = "x1" ; then

+                AC_DEFINE(ARMv6_ASM, 1, [Use ARMv6 asm optimizations])

+                asm_optimization="${asm_optimization} (Media)"

+            fi

+        fi

+        ;;

+    esac

+else

+    asm_optimization="disabled"

+fi

 ac_enable_assertions="no"

 AC_ARG_ENABLE(assertions, [  --enable-assertions        enable additional software error checking],

 [if test "$enableval" = yes; then

@@ -281,6 +310,7 @@

       Floating point support: ........ ${ac_enable_float}

       Fast float approximations: ..... ${float_approx}

       Fixed point debugging: ......... ${ac_enable_fixed_debug}

+      Assembly optimization: ......... ${asm_optimization}

       Custom modes: .................. ${ac_enable_custom_modes}

       Assertion checking: ............ ${ac_enable_assertions}

       Fuzzing: ....................... ${ac_enable_fuzzing}

--- /dev/null

+++ b/m4/as-gcc-inline-assembly.m4

@@ -1,0 +1,106 @@

+dnl as-gcc-inline-assembly.m4 0.1.0

+dnl autostars m4 macro for detection of gcc inline assembly

+dnl David Schleef <[email protected]>

+dnl $Id$

+dnl AS_COMPILER_FLAG(ACTION-IF-ACCEPTED, [ACTION-IF-NOT-ACCEPTED])

+dnl Tries to compile with the given CFLAGS.

+dnl Runs ACTION-IF-ACCEPTED if the compiler can compile with the flags,

+dnl and ACTION-IF-NOT-ACCEPTED otherwise.

+AC_DEFUN([AS_GCC_INLINE_ASSEMBLY],

+[

+  AC_MSG_CHECKING([if compiler supports gcc-style inline assembly])

+  AC_TRY_COMPILE([], [

+#ifdef __GNUC_MINOR__

+#if (__GNUC__ * 1000 + __GNUC_MINOR__) < 3004

+#error GCC before 3.4 has critical bugs compiling inline assembly

+#endif

+#endif

+__asm__ (""::) ], [flag_ok=yes], [flag_ok=no])

+  if test "X$flag_ok" = Xyes ; then

+    $1

+    true

+  else

+    $2

+    true

+  fi

+  AC_MSG_RESULT([$flag_ok])

+])

+AC_DEFUN([AC_TRY_ASSEMBLE],

+[ac_c_ext=$ac_ext

+ ac_ext=${ac_s_ext-s}

+ cat > conftest.$ac_ext <<EOF

+ .file "configure"

+[$1]

+EOF

+if AC_TRY_EVAL(ac_compile); then

+  ac_ext=$ac_c_ext

+  ifelse([$2], , :, [  $2

+  rm -rf conftest*])

+else

+  echo "configure: failed program was:" >&AC_FD_CC

+  cat conftest.$ac_ext >&AC_FD_CC

+  ac_ext=$ac_c_ext

+ifelse([$3], , , [  rm -rf conftest*

+  $3

+])dnl

+fi

+rm -rf conftest*])

+AC_DEFUN([AS_ASM_ARM_NEON],

+[

+  AC_MSG_CHECKING([if assembler supports NEON instructions on ARM])

+  AC_TRY_ASSEMBLE([vorr d0,d0,d0], [flag_ok=yes], [flag_ok=no])

+  if test "X$flag_ok" = Xyes ; then

+    $1

+    true

+  else

+    $2

+    true

+  fi

+  AC_MSG_RESULT([$flag_ok])

+])

+AC_DEFUN([AS_ASM_ARM_MEDIA],

+[

+  AC_MSG_CHECKING([if assembler supports ARMv6 media instructions on ARM])

+  AC_TRY_ASSEMBLE([shadd8 r3,r3,r3], [flag_ok=yes], [flag_ok=no])

+  if test "X$flag_ok" = Xyes ; then

+    $1

+    true

+  else

+    $2

+    true

+  fi

+  AC_MSG_RESULT([$flag_ok])

+])

+AC_DEFUN([AS_ASM_ARM_EDSP],

+[

+  AC_MSG_CHECKING([if assembler supports EDSP instructions on ARM])

+  AC_TRY_ASSEMBLE([qadd r3,r3,r3], [flag_ok=yes], [flag_ok=no])

+  if test "X$flag_ok" = Xyes ; then

+    $1

+    true

+  else

+    $2

+    true

+  fi

+  AC_MSG_RESULT([$flag_ok])

+])

--- a/silk/SigProc_FIX.h

+++ b/silk/SigProc_FIX.h

@@ -576,6 +576,14 @@

 #include "MacroCount.h"

 #include "MacroDebug.h"

+#ifdef ARMv4_ASM

+#include "SigProc_FIX_armv4.h"

+#endif

+#ifdef ARMv5E_ASM

+#include "SigProc_FIX_armv5e.h"

+#endif

 #ifdef  __cplusplus

 #endif

--- /dev/null

+++ b/silk/SigProc_FIX_armv4.h

@@ -1,0 +1,47 @@

+/***********************************************************************

+Copyright (C) 2013 Xiph.Org Foundation and contributors

+Copyright (c) 2013       Parrot

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef SILK_SIGPROC_FIX_ARMv4_H

+#define SILK_SIGPROC_FIX_ARMv4_H

+#undef silk_MLA

+static inline opus_int32 silk_MLA_armv4(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  opus_int32 res;

+  __asm__(

+      "#silk_MLA\n\t"

+      "mla %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_MLA(a, b, c) (silk_MLA_armv4(a, b, c))

+#endif

--- /dev/null

+++ b/silk/SigProc_FIX_armv5e.h

@@ -1,0 +1,61 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Copyright (c) 2013       Parrot

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef SILK_SIGPROC_FIX_ARMv5E_H

+#define SILK_SIGPROC_FIX_ARMv5E_H

+#undef silk_SMULTT

+static inline opus_int32 silk_SMULTT_armv5e(opus_int32 a, opus_int32 b)

+{

+  opus_int32 res;

+  __asm__(

+      "#silk_SMULTT\n\t"

+      "smultt %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SMULTT(a, b) (silk_SMULTT_armv5e(a, b))

+#undef silk_SMLATT

+static inline opus_int32 silk_SMLATT_armv5e(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  opus_int32 res;

+  __asm__(

+      "#silk_SMLATT\n\t"

+      "smlatt %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_SMLATT(a, b, c) (silk_SMLATT_armv5e(a, b, c))

+#endif

--- a/silk/macros.h

+++ b/silk/macros.h

@@ -134,5 +134,13 @@

     (*((Matrix_base_adr) + ((row)+(M)*(column))))

 #endif

+#ifdef ARMv4_ASM

+#include "macros_armv4.h"

+#endif

+#ifdef ARMv5E_ASM

+#include "macros_armv5e.h"

+#endif

 #endif /* SILK_MACROS_H */

--- /dev/null

+++ b/silk/macros_armv4.h

@@ -1,0 +1,103 @@

+/***********************************************************************

+Copyright (C) 2013 Xiph.Org Foundation and contributors.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef SILK_MACROS_ARMv4_H

+#define SILK_MACROS_ARMv4_H

+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */

+#undef silk_SMULWB

+static inline opus_int32 silk_SMULWB_armv4(opus_int32 a, opus_int16 b)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+      "#silk_SMULWB\n\t"

+      "smull %0, %1, %2, %3\n\t"

+      : "=r"(rd_lo), "=r"(rd_hi)

+      : "%r"(a), "r"(b<<16)

+  );

+  return rd_hi;

+}

+#define silk_SMULWB(a, b) (silk_SMULWB_armv4(a, b))

+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */

+#undef silk_SMLAWB

+#define silk_SMLAWB(a, b, c) ((a) + silk_SMULWB(b, c))

+/* (a32 * (b32 >> 16)) >> 16 */

+#undef silk_SMULWT

+static inline opus_int32 silk_SMULWT_armv4(opus_int32 a, opus_int32 b)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+      "#silk_SMULWT\n\t"

+      "smull %0, %1, %2, %3\n\t"

+      : "=r"(rd_lo), "=r"(rd_hi)

+      : "%r"(a), "r"(b&~0xFFFF)

+  );

+  return rd_hi;

+}

+#define silk_SMULWT(a, b) (silk_SMULWT_armv4(a, b))

+/* a32 + (b32 * (c32 >> 16)) >> 16 */

+#undef silk_SMLAWT

+#define silk_SMLAWT(a, b, c) ((a) + silk_SMULWT(b, c))

+/* (a32 * b32) >> 16 */

+#undef silk_SMULWW

+static inline opus_int32 silk_SMULWW_armv4(opus_int32 a, opus_int32 b)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+    "#silk_SMULWW\n\t"

+    "smull %0, %1, %2, %3\n\t"

+    : "=r"(rd_lo), "=r"(rd_hi)

+    : "%r"(a), "r"(b)

+  );

+  return (rd_lo>>16)|(rd_hi<<16);

+}

+#define silk_SMULWW(a, b) (silk_SMULWW_armv4(a, b))

+#undef silk_SMLAWW

+static inline opus_int32 silk_SMLAWW_armv4(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  unsigned rd_lo;

+  int rd_hi;

+  __asm__(

+    "#silk_SMULWW\n\t"

+    "smull %0, %1, %2, %3\n\t"

+    : "=r"(rd_lo), "=r"(rd_hi)

+    : "%r"(b), "r"(c)

+  );

+  return a+((rd_lo>>16)|(rd_hi<<16));

+}

+#define silk_SMLAWW(a, b, c) (silk_SMLAWW_armv4(a, b, c))

+#endif /* SILK_MACROS_ARMv4_H */

--- /dev/null

+++ b/silk/macros_armv5e.h

@@ -1,0 +1,213 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Copyright (c) 2013       Parrot

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef SILK_MACROS_ARMv5E_H

+#define SILK_MACROS_ARMv5E_H

+/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */

+#undef silk_SMULWB

+static inline opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)

+{

+  int res;

+  __asm__(

+      "#silk_SMULWB\n\t"

+      "smulwb %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))

+/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */

+#undef silk_SMLAWB

+static inline opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,

+ opus_int16 c)

+{

+  int res;

+  __asm__(

+      "#silk_SMLAWB\n\t"

+      "smlawb %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))

+/* (a32 * (b32 >> 16)) >> 16 */

+#undef silk_SMULWT

+static inline opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)

+{

+  int res;

+  __asm__(

+      "#silk_SMULWT\n\t"

+      "smulwt %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))

+/* a32 + (b32 * (c32 >> 16)) >> 16 */

+#undef silk_SMLAWT

+static inline opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  int res;

+  __asm__(

+      "#silk_SMLAWT\n\t"

+      "smlawt %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))

+/* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */

+#undef silk_SMULBB

+static inline opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)

+{

+  int res;

+  __asm__(

+      "#silk_SMULBB\n\t"

+      "smulbb %0, %1, %2\n\t"

+      : "=r"(res)

+      : "%r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))

+/* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */

+#undef silk_SMLABB

+static inline opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  int res;

+  __asm__(

+      "#silk_SMLABB\n\t"

+      "smlabb %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "%r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))

+/* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */

+#undef silk_SMULBT

+static inline opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)

+{

+  int res;

+  __asm__(

+      "#silk_SMULBT\n\t"

+      "smulbt %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))

+/* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */

+#undef silk_SMLABT

+static inline opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,

+ opus_int32 c)

+{

+  int res;

+  __asm__(

+      "#silk_SMLABT\n\t"

+      "smlabt %0, %1, %2, %3\n\t"

+      : "=r"(res)

+      : "r"(b), "r"(c), "r"(a)

+  );

+  return res;

+}

+#define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))

+/* add/subtract with output saturated */

+#undef silk_ADD_SAT32

+static inline opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)

+{

+  int res;

+  __asm__(

+      "#silk_ADD_SAT32\n\t"

+      "qadd %0, %1, %2\n\t"

+      : "=r"(res)

+      : "%r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))

+#undef silk_SUB_SAT32

+static inline opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)

+{

+  int res;

+  __asm__(

+      "#silk_SUB_SAT32\n\t"

+      "qsub %0, %1, %2\n\t"

+      : "=r"(res)

+      : "r"(a), "r"(b)

+  );

+  return res;

+}

+#define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))

+#undef silk_CLZ16

+static inline opus_int32 silk_CLZ16_armv5(opus_int16 in16)

+{

+  int res;

+  __asm__(

+      "#silk_CLZ16\n\t"

+      "clz %0, %1;\n"

+      : "=r"(res)

+      : "r"(in16<<16|0x8000)

+  );

+  return res;

+}

+#define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))

+#undef silk_CLZ32

+static inline opus_int32 silk_CLZ32_armv5(opus_int32 in32)

+{

+  int res;

+  __asm__(

+      "#silk_CLZ32\n\t"

+      "clz %0, %1\n\t"

+      : "=&r"(res)

+      : "r"(in32)

+  );

+  return res;

+}

+#define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))

+#endif /* SILK_MACROS_ARMv5E_H */