shithub: opus

--- /dev/null

+++ b/Makefile.mips

@@ -1,0 +1,156 @@

+#################### COMPILE OPTIONS #######################

+# Uncomment this for fixed-point build

+FIXED_POINT=1

+# It is strongly recommended to uncomment one of these

+# VAR_ARRAYS: Use C99 variable-length arrays for stack allocation

+# USE_ALLOCA: Use alloca() for stack allocation

+# If none is defined, then the fallback is a non-threadsafe global array

+CFLAGS := -DUSE_ALLOCA $(CFLAGS)

+#CFLAGS := -DVAR_ARRAYS $(CFLAGS)

+# These options affect performance

+# HAVE_LRINTF: Use C99 intrinsics to speed up float-to-int conversion

+#CFLAGS := -DHAVE_LRINTF $(CFLAGS)

+###################### END OF OPTIONS ######################

+-include package_version

+include silk_sources.mk

+include celt_sources.mk

+include opus_sources.mk

+ifdef FIXED_POINT

+SILK_SOURCES += $(SILK_SOURCES_FIXED)

+else

+SILK_SOURCES += $(SILK_SOURCES_FLOAT)

+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)

+endif

+EXESUFFIX =

+LIBPREFIX = lib

+LIBSUFFIX = .a

+OBJSUFFIX = .o

+CC     = $(TOOLCHAIN_PREFIX)cc$(TOOLCHAIN_SUFFIX)

+AR     = $(TOOLCHAIN_PREFIX)ar

+RANLIB = $(TOOLCHAIN_PREFIX)ranlib

+CP     = $(TOOLCHAIN_PREFIX)cp

+cppflags-from-defines   = $(addprefix -D,$(1))

+cppflags-from-includes  = $(addprefix -I,$(1))

+ldflags-from-ldlibdirs  = $(addprefix -L,$(1))

+ldlibs-from-libs        = $(addprefix -l,$(1))

+WARNINGS = -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow

+CFLAGS  += -mips32r2 -mno-mips16 -std=gnu99 -O2 -g $(WARNINGS) -DENABLE_ASSERTIONS -DMIPSr1_ASM -DOPUS_BUILD -mdspr2 -march=74kc -mtune=74kc -mmt -mgp32

+CINCLUDES = include silk celt

+ifdef FIXED_POINT

+CFLAGS += -DFIXED_POINT=1 -DDISABLE_FLOAT_API

+CINCLUDES += silk/fixed

+else

+CINCLUDES += silk/float

+endif

+LIBS = m

+LDLIBDIRS = ./

+CFLAGS  += $(call cppflags-from-defines,$(CDEFINES))

+CFLAGS  += $(call cppflags-from-includes,$(CINCLUDES))

+LDFLAGS += $(call ldflags-from-ldlibdirs,$(LDLIBDIRS))

+LDLIBS  += $(call ldlibs-from-libs,$(LIBS))

+COMPILE.c.cmdline   = $(CC) -c $(CFLAGS) -o $@ $<

+LINK.o              = $(CC) $(LDPREFLAGS) $(LDFLAGS)

+LINK.o.cmdline      = $(LINK.o) $^ $(LDLIBS) -o $@$(EXESUFFIX)

+ARCHIVE.cmdline     = $(AR) $(ARFLAGS) $@ $^ && $(RANLIB) $@

+%$(OBJSUFFIX):%.c

+	$(COMPILE.c.cmdline)

+%$(OBJSUFFIX):%.cpp

+	$(COMPILE.cpp.cmdline)

+# Directives

+# Variable definitions

+LIB_NAME = opus

+TARGET = $(LIBPREFIX)$(LIB_NAME)$(LIBSUFFIX)

+SRCS_C = $(SILK_SOURCES) $(CELT_SOURCES) $(OPUS_SOURCES)

+OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(SRCS_C))

+OPUSDEMO_SRCS_C = src/opus_demo.c

+OPUSDEMO_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSDEMO_SRCS_C))

+TESTOPUSAPI_SRCS_C = tests/test_opus_api.c

+TESTOPUSAPI_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSAPI_SRCS_C))

+TESTOPUSDECODE_SRCS_C = tests/test_opus_decode.c

+TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))

+TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c

+TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))

+TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c

+TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))

+OPUSCOMPARE_SRCS_C = src/opus_compare.c

+OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))

+# Rules

+all: lib opus_demo opus_compare test_opus_api test_opus_decode test_opus_encode test_opus_padding

+lib: $(TARGET)

+$(TARGET): $(OBJS)

+	$(ARCHIVE.cmdline)

+opus_demo$(EXESUFFIX): $(OPUSDEMO_OBJS) $(TARGET)

+	$(LINK.o.cmdline)

+test_opus_api$(EXESUFFIX): $(TESTOPUSAPI_OBJS) $(TARGET)

+	$(LINK.o.cmdline)

+test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)

+	$(LINK.o.cmdline)

+test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)

+	$(LINK.o.cmdline)

+test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)

+	$(LINK.o.cmdline)

+opus_compare$(EXESUFFIX): $(OPUSCOMPARE_OBJS)

+	$(LINK.o.cmdline)

+celt/celt.o: CFLAGS += -DPACKAGE_VERSION='$(PACKAGE_VERSION)'

+celt/celt.o: package_version

+package_version: force

+	@if [ -x ./update_version ]; then \

+		./update_version || true; \

+	elif [ ! -e ./package_version ]; then \

+		echo 'PACKAGE_VERSION="unknown"' > ./package_version; \

+	fi

+force:

+clean:

+	rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \

+                test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \

+                test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \

+		$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \

+                $(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)

+.PHONY: all lib clean

--- a/celt/_kiss_fft_guts.h

+++ b/celt/_kiss_fft_guts.h

@@ -97,6 +97,9 @@

 #if defined(OPUS_ARM_INLINE_EDSP)

 #include "arm/kiss_fft_armv5e.h"

 #endif

+#if defined(MIPSr1_ASM)

+#include "mips/kiss_fft_mipsr1.h"

+#endif

 #else  /* not FIXED_POINT*/

--- a/celt/celt.c

+++ b/celt/celt.c

@@ -54,7 +54,11 @@

 #define PACKAGE_VERSION "unknown"

 #endif

+#if defined(MIPSr1_ASM)

+#include "mips/celt_mipsr1.h"

+#endif

 int resampling_factor(opus_int32 rate)

    int ret;

@@ -169,6 +173,7 @@

 #endif

 #endif

+#ifndef OVERRIDE_comb_filter

 void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,

       opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,

       const opus_val16 *window, int overlap)

@@ -231,6 +236,7 @@

    /* Compute the part with the constant filter. */

    comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);

+#endif /* OVERRIDE_comb_filter */

 const signed char tf_select_table[4][8] = {

       {0, -1, 0, -1,    0,-1, 0,-1},

--- a/celt/fixed_generic.h

+++ b/celt/fixed_generic.h

@@ -135,6 +135,10 @@

 /** Divide a 32-bit value by a 32-bit value. Result fits in 32 bits */

 #define DIV32(a,b) (((opus_val32)(a))/((opus_val32)(b)))

+#if defined(MIPSr1_ASM)

+#include "mips/fixed_generic_mipsr1.h"

+#endif

 static OPUS_INLINE opus_val16 SIG2WORD16_generic(celt_sig x)

    x = PSHR32(x, SIG_SHIFT);

--- a/celt/kiss_fft.c

+++ b/celt/kiss_fft.c

@@ -231,6 +231,7 @@

+#ifndef OVERRIDE_kf_bfly5

 static void kf_bfly5(

                      kiss_fft_cpx * Fout,

                      const size_t fstride,

@@ -305,6 +306,7 @@

+#endif /* OVERRIDE_kf_bfly5 */

 #endif

--- a/celt/mdct.c

+++ b/celt/mdct.c

@@ -53,6 +53,11 @@

 #include "mathops.h"

 #include "stack_alloc.h"

+#if defined(MIPSr1_ASM)

+#include "mips/mdct_mipsr1.h"

+#endif

 #ifdef CUSTOM_MODES

 int clt_mdct_init(mdct_lookup *l,int N, int maxshift)

@@ -110,6 +115,7 @@

 #endif /* CUSTOM_MODES */

 /* Forward MDCT trashes the input array */

+#ifndef OVERRIDE_clt_mdct_forward

 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,

       const opus_val16 *window, int overlap, int shift, int stride)

@@ -229,7 +235,9 @@

    RESTORE_STACK;

+#endif /* OVERRIDE_clt_mdct_forward */

+#ifndef OVERRIDE_clt_mdct_backward

 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,

       const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)

@@ -330,3 +338,4 @@

+#endif /* OVERRIDE_clt_mdct_backward */

--- /dev/null

+++ b/celt/mips/celt_mipsr1.h

@@ -1,0 +1,149 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2010 Xiph.Org Foundation

+   Copyright (c) 2008 Gregory Maxwell

+   Written by Jean-Marc Valin and Gregory Maxwell */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef __CELT_MIPSR1_H__

+#define __CELT_MIPSR1_H__

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#define CELT_C

+#include "os_support.h"

+#include "mdct.h"

+#include <math.h>

+#include "celt.h"

+#include "pitch.h"

+#include "bands.h"

+#include "modes.h"

+#include "entcode.h"

+#include "quant_bands.h"

+#include "rate.h"

+#include "stack_alloc.h"

+#include "mathops.h"

+#include "float_cast.h"

+#include <stdarg.h>

+#include "celt_lpc.h"

+#include "vq.h"

+#define OVERRIDE_comb_filter

+void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,

+      opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,

+      const opus_val16 *window, int overlap)

+{

+   int i;

+   opus_val32 x0, x1, x2, x3, x4;

+   /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */

+   opus_val16 g00, g01, g02, g10, g11, g12;

+   static const opus_val16 gains[3][3] = {

+         {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},

+         {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)},

+         {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}};

+   if (g0==0 && g1==0)

+   {

+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */

+      if (x!=y)

+         OPUS_MOVE(y, x, N);

+      return;

+   }

+   g00 = MULT16_16_P15(g0, gains[tapset0][0]);

+   g01 = MULT16_16_P15(g0, gains[tapset0][1]);

+   g02 = MULT16_16_P15(g0, gains[tapset0][2]);

+   g10 = MULT16_16_P15(g1, gains[tapset1][0]);

+   g11 = MULT16_16_P15(g1, gains[tapset1][1]);

+   g12 = MULT16_16_P15(g1, gains[tapset1][2]);

+   x1 = x[-T1+1];

+   x2 = x[-T1  ];

+   x3 = x[-T1-1];

+   x4 = x[-T1-2];

+   /* If the filter didn't change, we don't need the overlap */

+   if (g0==g1 && T0==T1 && tapset0==tapset1)

+      overlap=0;

+   for (i=0;i<overlap;i++)

+   {

+      opus_val16 f;

+      opus_val32 res;

+      f = MULT16_16_Q15(window[i],window[i]);

+      x0= x[i-T1+2];

+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g00)), "r" ((int)x[i-T0]));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g01)), "r" ((int)ADD32(x[i-T0-1],x[i-T0+1])));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15((Q15ONE-f),g02)), "r" ((int)ADD32(x[i-T0-2],x[i-T0+2])));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g10)), "r" ((int)x2));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g11)), "r" ((int)ADD32(x3,x1)));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)MULT16_16_Q15(f,g12)), "r" ((int)ADD32(x4,x0)));

+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));

+      y[i] = x[i] + res;

+      x4=x3;

+      x3=x2;

+      x2=x1;

+      x1=x0;

+   }

+   x4 = x[i-T1-2];

+   x3 = x[i-T1-1];

+   x2 = x[i-T1];

+   x1 = x[i-T1+1];

+   if (g1==0)

+   {

+      /* OPT: Happens to work without the OPUS_MOVE(), but only because the current encoder already copies x to y */

+      if (x!=y)

+         OPUS_MOVE(y+overlap, x+overlap, N-overlap);

+      return;

+   }

+   for (i=overlap;i<N;i++)

+   {

+      opus_val32 res;

+      x0=x[i-T1+2];

+      asm volatile("MULT $ac1, %0, %1" : : "r" ((int)g10), "r" ((int)x2));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g11), "r" ((int)ADD32(x3,x1)));

+      asm volatile("MADD $ac1, %0, %1" : : "r" ((int)g12), "r" ((int)ADD32(x4,x0)));

+      asm volatile("EXTR.W %0,$ac1, %1" : "=r" (res): "i" (15));

+      y[i] = x[i] + res;

+      x4=x3;

+      x3=x2;

+      x2=x1;

+      x1=x0;

+   }

+}

+#endif /* __CELT_MIPSR1_H__ */

--- /dev/null

+++ b/celt/mips/fixed_generic_mipsr1.h

@@ -1,0 +1,126 @@

+/* Copyright (C) 2007-2009 Xiph.Org Foundation

+   Copyright (C) 2003-2008 Jean-Marc Valin

+   Copyright (C) 2007-2008 CSIRO */

+/**

+   @file fixed_generic.h

+   @brief Generic fixed-point operations

+*/

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef CELT_FIXED_GENERIC_MIPSR1_H

+#define CELT_FIXED_GENERIC_MIPSR1_H

+#undef MULT16_32_Q15_ADD

+static inline int MULT16_32_Q15_ADD(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef MULT16_32_Q15_SUB

+static inline int MULT16_32_Q15_SUB(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef MULT16_16_Q15_ADD

+static inline int MULT16_16_Q15_ADD(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef MULT16_16_Q15_SUB

+static inline int MULT16_16_Q15_SUB(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef MULT16_32_Q16

+static inline int MULT16_32_Q16(int a, int b)

+{

+    int c;

+    asm volatile("MULT $ac1,%0, %1" : : "r" (a), "r" (b));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (16));

+    return c;

+}

+#undef MULT16_32_P16

+static inline int MULT16_32_P16(int a, int b)

+{

+    int c;

+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

+    asm volatile("EXTR_R.W %0,$ac1, %1" : "=r" (c): "i" (16));

+    return c;

+}

+#undef MULT16_32_Q15

+static inline int MULT16_32_Q15(int a, int b)

+{

+    int c;

+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (c): "i" (15));

+    return c;

+}

+#undef MULT32_32_Q31

+static inline int MULT32_32_Q31(int a, int b)

+{

+    int r;

+    asm volatile("MULT $ac1, %0, %1" : : "r" (a), "r" (b));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (r): "i" (31));

+    return r;

+}

+#undef PSHR32

+static inline int PSHR32(int a, int shift)

+{

+    int r;

+    asm volatile ("SHRAV_R.W %0, %1, %2" :"=r" (r): "r" (a), "r" (shift));

+    return r;

+}

+#undef MULT16_16_P15

+static inline int MULT16_16_P15(int a, int b)

+{

+    int r;

+    asm volatile ("mul %0, %1, %2" :"=r" (r): "r" (a), "r" (b));

+    asm volatile ("SHRA_R.W %0, %1, %2" : "+r" (r):  "0" (r), "i"(15));

+    return r;

+}

+#endif /* CELT_FIXED_GENERIC_MIPSR1_H */

--- /dev/null

+++ b/celt/mips/kiss_fft_mipsr1.h

@@ -1,0 +1,167 @@

+/*Copyright (c) 2013, Xiph.Org Foundation and contributors.

+  All rights reserved.

+  Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions are met:

+    * Redistributions of source code must retain the above copyright notice,

+       this list of conditions and the following disclaimer.

+    * Redistributions in binary form must reproduce the above copyright notice,

+       this list of conditions and the following disclaimer in the

+       documentation and/or other materials provided with the distribution.

+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+  POSSIBILITY OF SUCH DAMAGE.*/

+#ifndef KISS_FFT_MIPSR1_H

+#define KISS_FFT_MIPSR1_H

+#if !defined(KISS_FFT_GUTS_H)

+#error "This file should only be included from _kiss_fft_guts.h"

+#endif

+#ifdef FIXED_POINT

+#define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))

+#define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))

+#undef S_MUL_ADD

+static inline int S_MUL_ADD(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef S_MUL_SUB

+static inline int S_MUL_SUB(int a, int b, int c, int d) {

+	int m;

+	asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));

+	asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));

+	asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));

+	return m;

+}

+#undef C_MUL

+#   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))

+static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {

+    kiss_fft_cpx m;

+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));

+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));

+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));

+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));

+    return m;

+}

+#undef C_MULC

+#   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))

+static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {

+    kiss_fft_cpx m;

+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));

+    asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));

+    asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));

+    asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));

+    asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));

+    return m;

+}

+#endif /* FIXED_POINT */

+#define OVERRIDE_kf_bfly5

+static void kf_bfly5(

+                     kiss_fft_cpx * Fout,

+                     const size_t fstride,

+                     const kiss_fft_state *st,

+                     int m,

+                     int N,

+                     int mm

+                    )

+{

+   kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

+   int i, u;

+   kiss_fft_cpx scratch[13];

+   const kiss_twiddle_cpx *tw;

+   kiss_twiddle_cpx ya,yb;

+   kiss_fft_cpx * Fout_beg = Fout;

+#ifdef FIXED_POINT

+   ya.r = 10126;

+   ya.i = -31164;

+   yb.r = -26510;

+   yb.i = -19261;

+#else

+   ya = st->twiddles[fstride*m];

+   yb = st->twiddles[fstride*2*m];

+#endif

+   tw=st->twiddles;

+   for (i=0;i<N;i++)

+   {

+      Fout = Fout_beg + i*mm;

+      Fout0=Fout;

+      Fout1=Fout0+m;

+      Fout2=Fout0+2*m;

+      Fout3=Fout0+3*m;

+      Fout4=Fout0+4*m;

+      /* For non-custom modes, m is guaranteed to be a multiple of 4. */

+      for ( u=0; u<m; ++u ) {

+         scratch[0] = *Fout0;

+         C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);

+         C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);

+         C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);

+         C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);

+         C_ADD( scratch[7],scratch[1],scratch[4]);

+         C_SUB( scratch[10],scratch[1],scratch[4]);

+         C_ADD( scratch[8],scratch[2],scratch[3]);

+         C_SUB( scratch[9],scratch[2],scratch[3]);

+         Fout0->r += scratch[7].r + scratch[8].r;

+         Fout0->i += scratch[7].i + scratch[8].i;

+         scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);

+         scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);

+         scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);

+         scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);

+         C_SUB(*Fout1,scratch[5],scratch[6]);

+         C_ADD(*Fout4,scratch[5],scratch[6]);

+         scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);

+         scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);

+         scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);

+         scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);

+         C_ADD(*Fout2,scratch[11],scratch[12]);

+         C_SUB(*Fout3,scratch[11],scratch[12]);

+         ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

+      }

+   }

+}

+#endif /* KISS_FFT_MIPSR1_H */

--- /dev/null

+++ b/celt/mips/mdct_mipsr1.h

@@ -1,0 +1,283 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2008 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+/* This is a simple MDCT implementation that uses a N/4 complex FFT

+   to do most of the work. It should be relatively straightforward to

+   plug in pretty much and FFT here.

+   This replaces the Vorbis FFT (and uses the exact same API), which

+   was a bit too messy and that was ending up duplicating code

+   (might as well use the same FFT everywhere).

+   The algorithm is similar to (and inspired from) Fabrice Bellard's

+   MDCT implementation in FFMPEG, but has differences in signs, ordering

+   and scaling in many places.

+*/

+#ifndef __MDCT_MIPSR1_H__

+#define __MDCT_MIPSR1_H__

+#ifndef SKIP_CONFIG_H

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#endif

+#include "mdct.h"

+#include "kiss_fft.h"

+#include "_kiss_fft_guts.h"

+#include <math.h>

+#include "os_support.h"

+#include "mathops.h"

+#include "stack_alloc.h"

+/* Forward MDCT trashes the input array */

+#define OVERRIDE_clt_mdct_forward

+void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,

+      const opus_val16 *window, int overlap, int shift, int stride)

+{

+   int i;

+   int N, N2, N4;

+   VARDECL(kiss_fft_scalar, f);

+   VARDECL(kiss_fft_cpx, f2);

+   const kiss_fft_state *st = l->kfft[shift];

+   const kiss_twiddle_scalar *trig;

+   opus_val16 scale;

+#ifdef FIXED_POINT

+   /* Allows us to scale with MULT16_32_Q16(), which is faster than

+      MULT16_32_Q15() on ARM. */

+   int scale_shift = st->scale_shift-1;

+#endif

+   SAVE_STACK;

+   scale = st->scale;

+   N = l->n;

+   trig = l->trig;

+   for (i=0;i<shift;i++)

+   {

+      N >>= 1;

+      trig += N;

+   }

+   N2 = N>>1;

+   N4 = N>>2;

+   ALLOC(f, N2, kiss_fft_scalar);

+   ALLOC(f2, N4, kiss_fft_cpx);

+   /* Consider the input to be composed of four blocks: [a, b, c, d] */

+   /* Window, shuffle, fold */

+   {

+      /* Temp pointers to make it really clear to the compiler what we're doing */

+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);

+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);

+      kiss_fft_scalar * OPUS_RESTRICT yp = f;

+      const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);

+      const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;

+      for(i=0;i<((overlap+3)>>2);i++)

+      {

+         /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/

+          *yp++ = S_MUL_ADD(*wp2, xp1[N2],*wp1,*xp2);

+          *yp++ = S_MUL_SUB(*wp1, *xp1,*wp2, xp2[-N2]);

+         xp1+=2;

+         xp2-=2;

+         wp1+=2;

+         wp2-=2;

+      }

+      wp1 = window;

+      wp2 = window+overlap-1;

+      for(;i<N4-((overlap+3)>>2);i++)

+      {

+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */

+         *yp++ = *xp2;

+         *yp++ = *xp1;

+         xp1+=2;

+         xp2-=2;

+      }

+      for(;i<N4;i++)

+      {

+         /* Real part arranged as a-bR, Imag part arranged as -c-dR */

+          *yp++ =  S_MUL_SUB(*wp2, *xp2, *wp1, xp1[-N2]);

+          *yp++ = S_MUL_ADD(*wp2, *xp1, *wp1, xp2[N2]);

+         xp1+=2;

+         xp2-=2;

+         wp1+=2;

+         wp2-=2;

+      }

+   }

+   /* Pre-rotation */

+   {

+      kiss_fft_scalar * OPUS_RESTRICT yp = f;

+      const kiss_twiddle_scalar *t = &trig[0];

+      for(i=0;i<N4;i++)

+      {

+         kiss_fft_cpx yc;

+         kiss_twiddle_scalar t0, t1;

+         kiss_fft_scalar re, im, yr, yi;

+         t0 = t[i];

+         t1 = t[N4+i];

+         re = *yp++;

+         im = *yp++;

+         yr = S_MUL_SUB(re,t0,im,t1);

+         yi = S_MUL_ADD(im,t0,re,t1);

+         yc.r = yr;

+         yc.i = yi;

+         yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);

+         yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);

+         f2[st->bitrev[i]] = yc;

+      }

+   }

+   /* N/4 complex FFT, does not downscale anymore */

+   opus_fft_impl(st, f2);

+   /* Post-rotate */

+   {

+      /* Temp pointers to make it really clear to the compiler what we're doing */

+      const kiss_fft_cpx * OPUS_RESTRICT fp = f2;

+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;

+      kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);

+      const kiss_twiddle_scalar *t = &trig[0];

+      /* Temp pointers to make it really clear to the compiler what we're doing */

+      for(i=0;i<N4;i++)

+      {

+         kiss_fft_scalar yr, yi;

+         yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);

+         yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);

+         *yp1 = yr;

+         *yp2 = yi;

+         fp++;

+         yp1 += 2*stride;

+         yp2 -= 2*stride;

+      }

+   }

+   RESTORE_STACK;

+}

+#define OVERRIDE_clt_mdct_backward

+void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,

+      const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride)

+{

+   int i;

+   int N, N2, N4;

+   const kiss_twiddle_scalar *trig;

+   N = l->n;

+   trig = l->trig;

+   for (i=0;i<shift;i++)

+   {

+      N >>= 1;

+      trig += N;

+   }

+   N2 = N>>1;

+   N4 = N>>2;

+   /* Pre-rotate */

+   {

+      /* Temp pointers to make it really clear to the compiler what we're doing */

+      const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;

+      const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);

+      kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);

+      const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];

+      const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;

+      for(i=0;i<N4;i++)

+      {

+         int rev;

+         kiss_fft_scalar yr, yi;

+         rev = *bitrev++;

+         yr = S_MUL_ADD(*xp2, t[i] , *xp1, t[N4+i]);

+         yi = S_MUL_SUB(*xp1, t[i] , *xp2, t[N4+i]);

+         /* We swap real and imag because we use an FFT instead of an IFFT. */

+         yp[2*rev+1] = yr;

+         yp[2*rev] = yi;

+         /* Storing the pre-rotation directly in the bitrev order. */

+         xp1+=2*stride;

+         xp2-=2*stride;

+      }

+   }

+   opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));

+   /* Post-rotate and de-shuffle from both ends of the buffer at once to make

+      it in-place. */

+   {

+      kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);

+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;

+      const kiss_twiddle_scalar *t = &trig[0];

+      /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the

+         middle pair will be computed twice. */

+      for(i=0;i<(N4+1)>>1;i++)

+      {

+         kiss_fft_scalar re, im, yr, yi;

+         kiss_twiddle_scalar t0, t1;

+         /* We swap real and imag because we're using an FFT instead of an IFFT. */

+         re = yp0[1];

+         im = yp0[0];

+         t0 = t[i];

+         t1 = t[N4+i];

+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */

+         yr = S_MUL_ADD(re,t0 , im,t1);

+         yi = S_MUL_SUB(re,t1 , im,t0);

+         /* We swap real and imag because we're using an FFT instead of an IFFT. */

+         re = yp1[1];

+         im = yp1[0];

+         yp0[0] = yr;

+         yp1[1] = yi;

+         t0 = t[(N4-i-1)];

+         t1 = t[(N2-i-1)];

+         /* We'd scale up by 2 here, but instead it's done when mixing the windows */

+         yr = S_MUL_ADD(re,t0,im,t1);

+         yi = S_MUL_SUB(re,t1,im,t0);

+         yp1[0] = yr;

+         yp0[1] = yi;

+         yp0 += 2;

+         yp1 -= 2;

+      }

+   }

+   /* Mirror on both sides for TDAC */

+   {

+      kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;

+      kiss_fft_scalar * OPUS_RESTRICT yp1 = out;

+      const opus_val16 * OPUS_RESTRICT wp1 = window;

+      const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;

+      for(i = 0; i < overlap/2; i++)

+      {

+         kiss_fft_scalar x1, x2;

+         x1 = *xp1;

+         x2 = *yp1;

+         *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);

+         *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);

+         wp1++;

+         wp2--;

+      }

+   }

+}

+#endif /* __MDCT_MIPSR1_H__ */

--- /dev/null

+++ b/celt/mips/pitch_mipsr1.h

@@ -1,0 +1,154 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/**

+   @file pitch.h

+   @brief Pitch analysis

+ */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef PITCH_MIPSR1_H

+#define PITCH_MIPSR1_H

+#define OVERRIDE_DUAL_INNER_PROD

+static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,

+      int N, opus_val32 *xy1, opus_val32 *xy2)

+{

+   int j;

+   opus_val32 xy01=0;

+   opus_val32 xy02=0;

+   asm volatile("MULT $ac1, $0, $0");

+   asm volatile("MULT $ac2, $0, $0");

+   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */

+   for (j=0;j<N;j++)

+   {

+	  asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));

+	  asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));

+	  ++j;

+	  asm volatile("MADD $ac1, %0, %1" : : "r" ((int)x[j]), "r" ((int)y01[j]));

+	  asm volatile("MADD $ac2, %0, %1" : : "r" ((int)x[j]), "r" ((int)y02[j]));

+   }

+   asm volatile ("mflo %0, $ac1": "=r"(xy01));

+   asm volatile ("mflo %0, $ac2": "=r"(xy02));

+   *xy1 = xy01;

+   *xy2 = xy02;

+}

+#define OVERRIDE_XCORR_KERNEL

+static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)

+{

+   int j;

+   opus_val16 y_0, y_1, y_2, y_3;

+    opus_int64 sum_0, sum_1, sum_2, sum_3;

+    sum_0 =  (opus_int64)sum[0];

+    sum_1 =  (opus_int64)sum[1];

+    sum_2 =  (opus_int64)sum[2];

+    sum_3 =  (opus_int64)sum[3];

+    y_3=0; /* gcc doesn't realize that y_3 can't be used uninitialized */

+    y_0=*y++;

+    y_1=*y++;

+    y_2=*y++;

+    for (j=0;j<len-3;j+=4)

+    {

+        opus_val16 tmp;

+        tmp = *x++;

+        y_3=*y++;

+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_0);

+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_1);

+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);

+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);

+        tmp=*x++;

+        y_0=*y++;

+        sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );

+        sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );

+        sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);

+        sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);

+       tmp=*x++;

+       y_1=*y++;

+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );

+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );

+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);

+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);

+      tmp=*x++;

+      y_2=*y++;

+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_3 );

+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_0 );

+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_1);

+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_2);

+   }

+   if (j++<len)

+   {

+      opus_val16 tmp = *x++;

+      y_3=*y++;

+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_0 );

+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_1 );

+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_2);

+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_3);

+   }

+   if (j++<len)

+   {

+      opus_val16 tmp=*x++;

+      y_0=*y++;

+      sum_0 = __builtin_mips_madd( sum_0, tmp, y_1 );

+      sum_1 = __builtin_mips_madd( sum_1, tmp, y_2 );

+      sum_2 = __builtin_mips_madd( sum_2, tmp, y_3);

+      sum_3 = __builtin_mips_madd( sum_3, tmp, y_0);

+   }

+   if (j<len)

+   {

+      opus_val16 tmp=*x++;

+      y_1=*y++;

+       sum_0 = __builtin_mips_madd( sum_0, tmp, y_2 );

+       sum_1 = __builtin_mips_madd( sum_1, tmp, y_3 );

+       sum_2 = __builtin_mips_madd( sum_2, tmp, y_0);

+       sum_3 = __builtin_mips_madd( sum_3, tmp, y_1);

+   }

+   sum[0] = (opus_val32)sum_0;

+   sum[1] = (opus_val32)sum_1;

+   sum[2] = (opus_val32)sum_2;

+   sum[3] = (opus_val32)sum_3;

+}

+#endif /* PITCH_MIPSR1_H */

--- /dev/null

+++ b/celt/mips/vq_mipsr1.h

@@ -1,0 +1,123 @@

+/* Copyright (c) 2007-2008 CSIRO

+   Copyright (c) 2007-2009 Xiph.Org Foundation

+   Written by Jean-Marc Valin */

+/*

+   Redistribution and use in source and binary forms, with or without

+   modification, are permitted provided that the following conditions

+   are met:

+   - Redistributions of source code must retain the above copyright

+   notice, this list of conditions and the following disclaimer.

+   - Redistributions in binary form must reproduce the above copyright

+   notice, this list of conditions and the following disclaimer in the

+   documentation and/or other materials provided with the distribution.

+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER

+   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF

+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING

+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+*/

+#ifndef __VQ_MIPSR1_H__

+#define __VQ_MIPSR1_H__

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "mathops.h"

+#include "cwrs.h"

+#include "vq.h"

+#include "arch.h"

+#include "os_support.h"

+#include "bands.h"

+#include "rate.h"

+static unsigned extract_collapse_mask(int *iy, int N, int B);

+static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, int N, opus_val32 Ryy, opus_val16 gain);

+static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread);

+#define OVERRIDE_vq_exp_rotation1

+static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)

+{

+   int i;

+   opus_val16 ms;

+   celt_norm *Xptr;

+   Xptr = X;

+   ms = NEG16(s);

+   for (i=0;i<len-stride;i++)

+   {

+      celt_norm x1, x2;

+      x1 = Xptr[0];

+      x2 = Xptr[stride];

+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));

+      *Xptr++      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));

+   }

+   Xptr = &X[len-2*stride-1];

+   for (i=len-2*stride-1;i>=0;i--)

+   {

+      celt_norm x1, x2;

+      x1 = Xptr[0];

+      x2 = Xptr[stride];

+      Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2),  s, x1), 15));

+      *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));

+   }

+}

+#define OVERRIDE_renormalise_vector

+void renormalise_vector(celt_norm *X, int N, opus_val16 gain)

+{

+   int i;

+#ifdef FIXED_POINT

+   int k;

+#endif

+   opus_val32 E = EPSILON;

+   opus_val16 g;

+   opus_val32 t;

+   celt_norm *xptr = X;

+   int X0, X2, X3, X1;

+   asm volatile("mult $ac1, $0, $0");

+   asm volatile("MTLO %0, $ac1" : :"r" (E));

+   /*if(N %4)

+       printf("error");*/

+   for (i=0;i<N-2;i+=2)

+   {

+      X0 = (int)*xptr++;

+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));

+      X1 = (int)*xptr++;

+      asm volatile("MADD $ac1, %0, %1" : : "r" (X1), "r" (X1));

+   }

+   for (;i<N;i++)

+   {

+      X0 = (int)*xptr++;

+      asm volatile("MADD $ac1, %0, %1" : : "r" (X0), "r" (X0));

+   }

+   asm volatile("MFLO %0, $ac1" : "=r" (E));

+#ifdef FIXED_POINT

+   k = celt_ilog2(E)>>1;

+#endif

+   t = VSHR32(E, 2*(k-7));

+   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);

+   xptr = X;

+   for (i=0;i<N;i++)

+   {

+      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));

+      xptr++;

+   }

+   /*return celt_sqrt(E);*/

+}

+#endif /* __VQ_MIPSR1_H__ */

--- a/celt/pitch.h

+++ b/celt/pitch.h

@@ -41,6 +41,10 @@

 #include "x86/pitch_sse.h"

 #endif

+#if defined(MIPSr1_ASM)

+#include "mips/pitch_mipsr1.h"

+#endif

 #if defined(OPUS_ARM_ASM) && defined(FIXED_POINT)

 # include "arm/pitch_arm.h"

 #endif

--- a/celt/vq.c

+++ b/celt/vq.c

@@ -39,6 +39,11 @@

 #include "rate.h"

 #include "pitch.h"

+#if defined(MIPSr1_ASM)

+#include "mips/vq_mipsr1.h"

+#endif

+#ifndef OVERRIDE_vq_exp_rotation1

 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)

    int i;

@@ -64,6 +69,7 @@

       *Xptr--      = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));

+#endif /* OVERRIDE_vq_exp_rotation1 */

 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)

@@ -343,6 +349,7 @@

    return collapse_mask;

+#ifndef OVERRIDE_renormalise_vector

 void renormalise_vector(celt_norm *X, int N, opus_val16 gain)

    int i;

@@ -368,6 +375,7 @@

    /*return celt_sqrt(E);*/

+#endif /* OVERRIDE_renormalise_vector */

 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N)

--- a/silk/NSQ_del_dec.c

+++ b/silk/NSQ_del_dec.c

@@ -57,6 +57,9 @@

 typedef NSQ_sample_struct  NSQ_sample_pair[ 2 ];

+#if defined(MIPSr1_ASM)

+#include "mips/NSQ_del_dec_mipsr1.h"

+#endif

 static OPUS_INLINE void silk_nsq_del_dec_scale_states(

     const silk_encoder_state *psEncC,               /* I    Encoder State                       */

     silk_nsq_state      *NSQ,                       /* I/O  NSQ state                           */

@@ -303,6 +306,7 @@

 /******************************************/

 /* Noise shape quantizer for one subframe */

 /******************************************/

+#ifndef OVERRIDE_silk_noise_shape_quantizer_del_dec

 static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(

     silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */

     NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */

@@ -629,6 +633,7 @@

     RESTORE_STACK;

+#endif /* OVERRIDE_silk_noise_shape_quantizer_del_dec */

 static OPUS_INLINE void silk_nsq_del_dec_scale_states(

     const silk_encoder_state *psEncC,               /* I    Encoder State                       */

--- a/silk/SigProc_FIX.h

+++ b/silk/SigProc_FIX.h

@@ -587,6 +587,11 @@

 #include "arm/SigProc_FIX_armv5e.h"

 #endif

+#if defined(MIPSr1_ASM)

+#include "mips/sigproc_fix_mipsr1.h"

+#endif

 #ifdef  __cplusplus

 #endif

--- /dev/null

+++ b/silk/fixed/mips/noise_shape_analysis_FIX_mipsr1.h

@@ -1,0 +1,336 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+/**************************************************************/

+/* Compute noise shaping coefficients and initial gain values */

+/**************************************************************/

+#define OVERRIDE_silk_noise_shape_analysis_FIX

+void silk_noise_shape_analysis_FIX(

+    silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */

+    silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */

+    const opus_int16                *pitch_res,                             /* I    LPC residual from pitch analysis                                            */

+    const opus_int16                *x,                                     /* I    Input signal [ frame_length + la_shape ]                                    */

+    int                              arch                                   /* I    Run-time architecture                                                       */

+)

+{

+    silk_shape_state_FIX *psShapeSt = &psEnc->sShape;

+    opus_int     k, i, nSamples, Qnrg, b_Q14, warping_Q16, scale = 0;

+    opus_int32   SNR_adj_dB_Q7, HarmBoost_Q16, HarmShapeGain_Q16, Tilt_Q16, tmp32;

+    opus_int32   nrg, pre_nrg_Q30, log_energy_Q7, log_energy_prev_Q7, energy_variation_Q7;

+    opus_int32   delta_Q16, BWExp1_Q16, BWExp2_Q16, gain_mult_Q16, gain_add_Q16, strength_Q16, b_Q8;

+    opus_int32   auto_corr[     MAX_SHAPE_LPC_ORDER + 1 ];

+    opus_int32   refl_coef_Q16[ MAX_SHAPE_LPC_ORDER ];

+    opus_int32   AR1_Q24[       MAX_SHAPE_LPC_ORDER ];

+    opus_int32   AR2_Q24[       MAX_SHAPE_LPC_ORDER ];

+    VARDECL( opus_int16, x_windowed );

+    const opus_int16 *x_ptr, *pitch_res_ptr;

+    SAVE_STACK;

+    /* Point to start of first LPC analysis block */

+    x_ptr = x - psEnc->sCmn.la_shape;

+    /****************/

+    /* GAIN CONTROL */

+    /****************/

+    SNR_adj_dB_Q7 = psEnc->sCmn.SNR_dB_Q7;

+    /* Input quality is the average of the quality in the lowest two VAD bands */

+    psEncCtrl->input_quality_Q14 = ( opus_int )silk_RSHIFT( (opus_int32)psEnc->sCmn.input_quality_bands_Q15[ 0 ]

+        + psEnc->sCmn.input_quality_bands_Q15[ 1 ], 2 );

+    /* Coding quality level, between 0.0_Q0 and 1.0_Q0, but in Q14 */

+    psEncCtrl->coding_quality_Q14 = silk_RSHIFT( silk_sigm_Q15( silk_RSHIFT_ROUND( SNR_adj_dB_Q7 -

+        SILK_FIX_CONST( 20.0, 7 ), 4 ) ), 1 );

+    /* Reduce coding SNR during low speech activity */

+    if( psEnc->sCmn.useCBR == 0 ) {

+        b_Q8 = SILK_FIX_CONST( 1.0, 8 ) - psEnc->sCmn.speech_activity_Q8;

+        b_Q8 = silk_SMULWB( silk_LSHIFT( b_Q8, 8 ), b_Q8 );

+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,

+            silk_SMULBB( SILK_FIX_CONST( -BG_SNR_DECR_dB, 7 ) >> ( 4 + 1 ), b_Q8 ),                                       /* Q11*/

+            silk_SMULWB( SILK_FIX_CONST( 1.0, 14 ) + psEncCtrl->input_quality_Q14, psEncCtrl->coding_quality_Q14 ) );     /* Q12*/

+    }

+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

+        /* Reduce gains for periodic signals */

+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( HARM_SNR_INCR_dB, 8 ), psEnc->LTPCorr_Q15 );

+    } else {

+        /* For unvoiced signals and low-quality input, adjust the quality slower than SNR_dB setting */

+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7,

+            silk_SMLAWB( SILK_FIX_CONST( 6.0, 9 ), -SILK_FIX_CONST( 0.4, 18 ), psEnc->sCmn.SNR_dB_Q7 ),

+            SILK_FIX_CONST( 1.0, 14 ) - psEncCtrl->input_quality_Q14 );

+    }

+    /*************************/

+    /* SPARSENESS PROCESSING */

+    /*************************/

+    /* Set quantizer offset */

+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

+        /* Initially set to 0; may be overruled in process_gains(..) */

+        psEnc->sCmn.indices.quantOffsetType = 0;

+        psEncCtrl->sparseness_Q8 = 0;

+    } else {

+        /* Sparseness measure, based on relative fluctuations of energy per 2 milliseconds */

+        nSamples = silk_LSHIFT( psEnc->sCmn.fs_kHz, 1 );

+        energy_variation_Q7 = 0;

+        log_energy_prev_Q7  = 0;

+        pitch_res_ptr = pitch_res;

+        for( k = 0; k < silk_SMULBB( SUB_FRAME_LENGTH_MS, psEnc->sCmn.nb_subfr ) / 2; k++ ) {

+            silk_sum_sqr_shift( &nrg, &scale, pitch_res_ptr, nSamples );

+            nrg += silk_RSHIFT( nSamples, scale );           /* Q(-scale)*/

+            log_energy_Q7 = silk_lin2log( nrg );

+            if( k > 0 ) {

+                energy_variation_Q7 += silk_abs( log_energy_Q7 - log_energy_prev_Q7 );

+            }

+            log_energy_prev_Q7 = log_energy_Q7;

+            pitch_res_ptr += nSamples;

+        }

+        psEncCtrl->sparseness_Q8 = silk_RSHIFT( silk_sigm_Q15( silk_SMULWB( energy_variation_Q7 -

+            SILK_FIX_CONST( 5.0, 7 ), SILK_FIX_CONST( 0.1, 16 ) ) ), 7 );

+        /* Set quantization offset depending on sparseness measure */

+        if( psEncCtrl->sparseness_Q8 > SILK_FIX_CONST( SPARSENESS_THRESHOLD_QNT_OFFSET, 8 ) ) {

+            psEnc->sCmn.indices.quantOffsetType = 0;

+        } else {

+            psEnc->sCmn.indices.quantOffsetType = 1;

+        }

+        /* Increase coding SNR for sparse signals */

+        SNR_adj_dB_Q7 = silk_SMLAWB( SNR_adj_dB_Q7, SILK_FIX_CONST( SPARSE_SNR_INCR_dB, 15 ), psEncCtrl->sparseness_Q8 - SILK_FIX_CONST( 0.5, 8 ) );

+    }

+    /*******************************/

+    /* Control bandwidth expansion */

+    /*******************************/

+    /* More BWE for signals with high prediction gain */

+    strength_Q16 = silk_SMULWB( psEncCtrl->predGain_Q16, SILK_FIX_CONST( FIND_PITCH_WHITE_NOISE_FRACTION, 16 ) );

+    BWExp1_Q16 = BWExp2_Q16 = silk_DIV32_varQ( SILK_FIX_CONST( BANDWIDTH_EXPANSION, 16 ),

+        silk_SMLAWW( SILK_FIX_CONST( 1.0, 16 ), strength_Q16, strength_Q16 ), 16 );

+    delta_Q16  = silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - silk_SMULBB( 3, psEncCtrl->coding_quality_Q14 ),

+        SILK_FIX_CONST( LOW_RATE_BANDWIDTH_EXPANSION_DELTA, 16 ) );

+    BWExp1_Q16 = silk_SUB32( BWExp1_Q16, delta_Q16 );

+    BWExp2_Q16 = silk_ADD32( BWExp2_Q16, delta_Q16 );

+    /* BWExp1 will be applied after BWExp2, so make it relative */

+    BWExp1_Q16 = silk_DIV32_16( silk_LSHIFT( BWExp1_Q16, 14 ), silk_RSHIFT( BWExp2_Q16, 2 ) );

+    if( psEnc->sCmn.warping_Q16 > 0 ) {

+        /* Slightly more warping in analysis will move quantization noise up in frequency, where it's better masked */

+        warping_Q16 = silk_SMLAWB( psEnc->sCmn.warping_Q16, (opus_int32)psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( 0.01, 18 ) );

+    } else {

+        warping_Q16 = 0;

+    }

+    /********************************************/

+    /* Compute noise shaping AR coefs and gains */

+    /********************************************/

+    ALLOC( x_windowed, psEnc->sCmn.shapeWinLength, opus_int16 );

+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

+        /* Apply window: sine slope followed by flat part followed by cosine slope */

+        opus_int shift, slope_part, flat_part;

+        flat_part = psEnc->sCmn.fs_kHz * 3;

+        slope_part = silk_RSHIFT( psEnc->sCmn.shapeWinLength - flat_part, 1 );

+        silk_apply_sine_window( x_windowed, x_ptr, 1, slope_part );

+        shift = slope_part;

+        silk_memcpy( x_windowed + shift, x_ptr + shift, flat_part * sizeof(opus_int16) );

+        shift += flat_part;

+        silk_apply_sine_window( x_windowed + shift, x_ptr + shift, 2, slope_part );

+        /* Update pointer: next LPC analysis block */

+        x_ptr += psEnc->sCmn.subfr_length;

+        if( psEnc->sCmn.warping_Q16 > 0 ) {

+            /* Calculate warped auto correlation */

+            silk_warped_autocorrelation_FIX( auto_corr, &scale, x_windowed, warping_Q16, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder );

+        } else {

+            /* Calculate regular auto correlation */

+            silk_autocorr( auto_corr, &scale, x_windowed, psEnc->sCmn.shapeWinLength, psEnc->sCmn.shapingLPCOrder + 1, arch );

+        }

+        /* Add white noise, as a fraction of energy */

+        auto_corr[0] = silk_ADD32( auto_corr[0], silk_max_32( silk_SMULWB( silk_RSHIFT( auto_corr[ 0 ], 4 ),

+            SILK_FIX_CONST( SHAPE_WHITE_NOISE_FRACTION, 20 ) ), 1 ) );

+        /* Calculate the reflection coefficients using schur */

+        nrg = silk_schur64( refl_coef_Q16, auto_corr, psEnc->sCmn.shapingLPCOrder );

+        silk_assert( nrg >= 0 );

+        /* Convert reflection coefficients to prediction coefficients */

+        silk_k2a_Q16( AR2_Q24, refl_coef_Q16, psEnc->sCmn.shapingLPCOrder );

+        Qnrg = -scale;          /* range: -12...30*/

+        silk_assert( Qnrg >= -12 );

+        silk_assert( Qnrg <=  30 );

+        /* Make sure that Qnrg is an even number */

+        if( Qnrg & 1 ) {

+            Qnrg -= 1;

+            nrg >>= 1;

+        }

+        tmp32 = silk_SQRT_APPROX( nrg );

+        Qnrg >>= 1;             /* range: -6...15*/

+        psEncCtrl->Gains_Q16[ k ] = (silk_LSHIFT32( silk_LIMIT( (tmp32), silk_RSHIFT32( silk_int32_MIN, (16 - Qnrg) ), \

+                            silk_RSHIFT32( silk_int32_MAX, (16 - Qnrg) ) ), (16 - Qnrg) ));

+        if( psEnc->sCmn.warping_Q16 > 0 ) {

+            /* Adjust gain for warping */

+            gain_mult_Q16 = warped_gain( AR2_Q24, warping_Q16, psEnc->sCmn.shapingLPCOrder );

+            silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );

+            if ( silk_SMULWW( silk_RSHIFT_ROUND( psEncCtrl->Gains_Q16[ k ], 1 ), gain_mult_Q16 ) >= ( silk_int32_MAX >> 1 ) ) {

+               psEncCtrl->Gains_Q16[ k ] = silk_int32_MAX;

+            } else {

+               psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );

+            }

+        }

+        /* Bandwidth expansion for synthesis filter shaping */

+        silk_bwexpander_32( AR2_Q24, psEnc->sCmn.shapingLPCOrder, BWExp2_Q16 );

+        /* Compute noise shaping filter coefficients */

+        silk_memcpy( AR1_Q24, AR2_Q24, psEnc->sCmn.shapingLPCOrder * sizeof( opus_int32 ) );

+        /* Bandwidth expansion for analysis filter shaping */

+        silk_assert( BWExp1_Q16 <= SILK_FIX_CONST( 1.0, 16 ) );

+        silk_bwexpander_32( AR1_Q24, psEnc->sCmn.shapingLPCOrder, BWExp1_Q16 );

+        /* Ratio of prediction gains, in energy domain */

+        pre_nrg_Q30 = silk_LPC_inverse_pred_gain_Q24( AR2_Q24, psEnc->sCmn.shapingLPCOrder );

+        nrg         = silk_LPC_inverse_pred_gain_Q24( AR1_Q24, psEnc->sCmn.shapingLPCOrder );

+        /*psEncCtrl->GainsPre[ k ] = 1.0f - 0.7f * ( 1.0f - pre_nrg / nrg ) = 0.3f + 0.7f * pre_nrg / nrg;*/

+        pre_nrg_Q30 = silk_LSHIFT32( silk_SMULWB( pre_nrg_Q30, SILK_FIX_CONST( 0.7, 15 ) ), 1 );

+        psEncCtrl->GainsPre_Q14[ k ] = ( opus_int ) SILK_FIX_CONST( 0.3, 14 ) + silk_DIV32_varQ( pre_nrg_Q30, nrg, 14 );

+        /* Convert to monic warped prediction coefficients and limit absolute values */

+        limit_warped_coefs( AR2_Q24, AR1_Q24, warping_Q16, SILK_FIX_CONST( 3.999, 24 ), psEnc->sCmn.shapingLPCOrder );

+        /* Convert from Q24 to Q13 and store in int16 */

+        for( i = 0; i < psEnc->sCmn.shapingLPCOrder; i++ ) {

+            psEncCtrl->AR1_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR1_Q24[ i ], 11 ) );

+            psEncCtrl->AR2_Q13[ k * MAX_SHAPE_LPC_ORDER + i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( AR2_Q24[ i ], 11 ) );

+        }

+    }

+    /*****************/

+    /* Gain tweaking */

+    /*****************/

+    /* Increase gains during low speech activity and put lower limit on gains */

+    gain_mult_Q16 = silk_log2lin( -silk_SMLAWB( -SILK_FIX_CONST( 16.0, 7 ), SNR_adj_dB_Q7, SILK_FIX_CONST( 0.16, 16 ) ) );

+    gain_add_Q16  = silk_log2lin(  silk_SMLAWB(  SILK_FIX_CONST( 16.0, 7 ), SILK_FIX_CONST( MIN_QGAIN_DB, 7 ), SILK_FIX_CONST( 0.16, 16 ) ) );

+    silk_assert( gain_mult_Q16 > 0 );

+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

+        psEncCtrl->Gains_Q16[ k ] = silk_SMULWW( psEncCtrl->Gains_Q16[ k ], gain_mult_Q16 );

+        silk_assert( psEncCtrl->Gains_Q16[ k ] >= 0 );

+        psEncCtrl->Gains_Q16[ k ] = silk_ADD_POS_SAT32( psEncCtrl->Gains_Q16[ k ], gain_add_Q16 );

+    }

+    gain_mult_Q16 = SILK_FIX_CONST( 1.0, 16 ) + silk_RSHIFT_ROUND( silk_MLA( SILK_FIX_CONST( INPUT_TILT, 26 ),

+        psEncCtrl->coding_quality_Q14, SILK_FIX_CONST( HIGH_RATE_INPUT_TILT, 12 ) ), 10 );

+    for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

+        psEncCtrl->GainsPre_Q14[ k ] = silk_SMULWB( gain_mult_Q16, psEncCtrl->GainsPre_Q14[ k ] );

+    }

+    /************************************************/

+    /* Control low-frequency shaping and noise tilt */

+    /************************************************/

+    /* Less low frequency shaping for noisy inputs */

+    strength_Q16 = silk_MUL( SILK_FIX_CONST( LOW_FREQ_SHAPING, 4 ), silk_SMLAWB( SILK_FIX_CONST( 1.0, 12 ),

+        SILK_FIX_CONST( LOW_QUALITY_LOW_FREQ_SHAPING_DECR, 13 ), psEnc->sCmn.input_quality_bands_Q15[ 0 ] - SILK_FIX_CONST( 1.0, 15 ) ) );

+    strength_Q16 = silk_RSHIFT( silk_MUL( strength_Q16, psEnc->sCmn.speech_activity_Q8 ), 8 );

+    if( psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

+        /* Reduce low frequencies quantization noise for periodic signals, depending on pitch lag */

+        /*f = 400; freqz([1, -0.98 + 2e-4 * f], [1, -0.97 + 7e-4 * f], 2^12, Fs); axis([0, 1000, -10, 1])*/

+        opus_int fs_kHz_inv = silk_DIV32_16( SILK_FIX_CONST( 0.2, 14 ), psEnc->sCmn.fs_kHz );

+        for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) {

+            b_Q14 = fs_kHz_inv + silk_DIV32_16( SILK_FIX_CONST( 3.0, 14 ), psEncCtrl->pitchL[ k ] );

+            /* Pack two coefficients in one int32 */

+            psEncCtrl->LF_shp_Q14[ k ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 - silk_SMULWB( strength_Q16, b_Q14 ), 16 );

+            psEncCtrl->LF_shp_Q14[ k ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );

+        }

+        silk_assert( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ) < SILK_FIX_CONST( 0.5, 24 ) ); /* Guarantees that second argument to SMULWB() is within range of an opus_int16*/

+        Tilt_Q16 = - SILK_FIX_CONST( HP_NOISE_COEF, 16 ) -

+            silk_SMULWB( SILK_FIX_CONST( 1.0, 16 ) - SILK_FIX_CONST( HP_NOISE_COEF, 16 ),

+                silk_SMULWB( SILK_FIX_CONST( HARM_HP_NOISE_COEF, 24 ), psEnc->sCmn.speech_activity_Q8 ) );

+    } else {

+        b_Q14 = silk_DIV32_16( 21299, psEnc->sCmn.fs_kHz ); /* 1.3_Q0 = 21299_Q14*/

+        /* Pack two coefficients in one int32 */

+        psEncCtrl->LF_shp_Q14[ 0 ]  = silk_LSHIFT( SILK_FIX_CONST( 1.0, 14 ) - b_Q14 -

+            silk_SMULWB( strength_Q16, silk_SMULWB( SILK_FIX_CONST( 0.6, 16 ), b_Q14 ) ), 16 );

+        psEncCtrl->LF_shp_Q14[ 0 ] |= (opus_uint16)( b_Q14 - SILK_FIX_CONST( 1.0, 14 ) );

+        for( k = 1; k < psEnc->sCmn.nb_subfr; k++ ) {

+            psEncCtrl->LF_shp_Q14[ k ] = psEncCtrl->LF_shp_Q14[ 0 ];

+        }

+        Tilt_Q16 = -SILK_FIX_CONST( HP_NOISE_COEF, 16 );

+    }

+    /****************************/

+    /* HARMONIC SHAPING CONTROL */

+    /****************************/

+    /* Control boosting of harmonic frequencies */

+    HarmBoost_Q16 = silk_SMULWB( silk_SMULWB( SILK_FIX_CONST( 1.0, 17 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 3 ),

+        psEnc->LTPCorr_Q15 ), SILK_FIX_CONST( LOW_RATE_HARMONIC_BOOST, 16 ) );

+    /* More harmonic boost for noisy input signals */

+    HarmBoost_Q16 = silk_SMLAWB( HarmBoost_Q16,

+        SILK_FIX_CONST( 1.0, 16 ) - silk_LSHIFT( psEncCtrl->input_quality_Q14, 2 ), SILK_FIX_CONST( LOW_INPUT_QUALITY_HARMONIC_BOOST, 16 ) );

+    if( USE_HARM_SHAPING && psEnc->sCmn.indices.signalType == TYPE_VOICED ) {

+        /* More harmonic noise shaping for high bitrates or noisy input */

+        HarmShapeGain_Q16 = silk_SMLAWB( SILK_FIX_CONST( HARMONIC_SHAPING, 16 ),

+                SILK_FIX_CONST( 1.0, 16 ) - silk_SMULWB( SILK_FIX_CONST( 1.0, 18 ) - silk_LSHIFT( psEncCtrl->coding_quality_Q14, 4 ),

+                psEncCtrl->input_quality_Q14 ), SILK_FIX_CONST( HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING, 16 ) );

+        /* Less harmonic noise shaping for less periodic signals */

+        HarmShapeGain_Q16 = silk_SMULWB( silk_LSHIFT( HarmShapeGain_Q16, 1 ),

+            silk_SQRT_APPROX( silk_LSHIFT( psEnc->LTPCorr_Q15, 15 ) ) );

+    } else {

+        HarmShapeGain_Q16 = 0;

+    }

+    /*************************/

+    /* Smooth over subframes */

+    /*************************/

+    for( k = 0; k < MAX_NB_SUBFR; k++ ) {

+        psShapeSt->HarmBoost_smth_Q16 =

+            silk_SMLAWB( psShapeSt->HarmBoost_smth_Q16,     HarmBoost_Q16     - psShapeSt->HarmBoost_smth_Q16,     SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );

+        psShapeSt->HarmShapeGain_smth_Q16 =

+            silk_SMLAWB( psShapeSt->HarmShapeGain_smth_Q16, HarmShapeGain_Q16 - psShapeSt->HarmShapeGain_smth_Q16, SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );

+        psShapeSt->Tilt_smth_Q16 =

+            silk_SMLAWB( psShapeSt->Tilt_smth_Q16,          Tilt_Q16          - psShapeSt->Tilt_smth_Q16,          SILK_FIX_CONST( SUBFR_SMTH_COEF, 16 ) );

+        psEncCtrl->HarmBoost_Q14[ k ]     = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmBoost_smth_Q16,     2 );

+        psEncCtrl->HarmShapeGain_Q14[ k ] = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->HarmShapeGain_smth_Q16, 2 );

+        psEncCtrl->Tilt_Q14[ k ]          = ( opus_int )silk_RSHIFT_ROUND( psShapeSt->Tilt_smth_Q16,          2 );

+    }

+    RESTORE_STACK;

+}

--- /dev/null

+++ b/silk/fixed/mips/prefilter_FIX_mipsr1.h

@@ -1,0 +1,181 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef __PREFILTER_FIX_MIPSR1_H__

+#define __PREFILTER_FIX_MIPSR1_H__

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "main_FIX.h"

+#include "stack_alloc.h"

+#include "tuning_parameters.h"

+#define OVERRIDE_silk_warped_LPC_analysis_filter_FIX

+void silk_warped_LPC_analysis_filter_FIX(

+          opus_int32            state[],                    /* I/O  State [order + 1]                   */

+          opus_int32            res_Q2[],                   /* O    Residual signal [length]            */

+    const opus_int16            coef_Q13[],                 /* I    Coefficients [order]                */

+    const opus_int16            input[],                    /* I    Input signal [length]               */

+    const opus_int16            lambda_Q16,                 /* I    Warping factor                      */

+    const opus_int              length,                     /* I    Length of input signal              */

+    const opus_int              order                       /* I    Filter order (even)                 */

+)

+{

+    opus_int     n, i;

+    opus_int32   acc_Q11, acc_Q22, tmp1, tmp2, tmp3, tmp4;

+    opus_int32   state_cur, state_next;

+    /* Order must be even */

+    /*Length must be even */

+    silk_assert( ( order & 1 ) == 0 );

+    silk_assert( ( length & 1 ) == 0 );

+    for( n = 0; n < length; n+=2 ) {

+        /* Output of lowpass section */

+        tmp2 = silk_SMLAWB( state[ 0 ], state[ 1 ], lambda_Q16 );

+        state_cur = silk_LSHIFT( input[ n ], 14 );

+        /* Output of allpass section */

+        tmp1 = silk_SMLAWB( state[ 1 ], state[ 2 ] - tmp2, lambda_Q16 );

+        state_next = tmp2;

+        acc_Q11 = silk_RSHIFT( order, 1 );

+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ 0 ] );

+        /* Output of lowpass section */

+        tmp4 = silk_SMLAWB( state_cur, state_next, lambda_Q16 );

+        state[ 0 ] = silk_LSHIFT( input[ n+1 ], 14 );

+        /* Output of allpass section */

+        tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );

+        state[ 1 ] = tmp4;

+        acc_Q22 = silk_RSHIFT( order, 1 );

+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ 0 ] );

+        /* Loop over allpass sections */

+        for( i = 2; i < order; i += 2 ) {

+            /* Output of allpass section */

+            tmp2 = silk_SMLAWB( state[ i ], state[ i + 1 ] - tmp1, lambda_Q16 );

+            state_cur = tmp1;

+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ i - 1 ] );

+            /* Output of allpass section */

+            tmp1 = silk_SMLAWB( state[ i + 1 ], state[ i + 2 ] - tmp2, lambda_Q16 );

+            state_next = tmp2;

+            acc_Q11 = silk_SMLAWB( acc_Q11, tmp2, coef_Q13[ i ] );

+            /* Output of allpass section */

+            tmp4 = silk_SMLAWB( state_cur, state_next - tmp3, lambda_Q16 );

+            state[ i ] = tmp3;

+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ i - 1 ] );

+            /* Output of allpass section */

+            tmp3 = silk_SMLAWB( state_next, tmp1 - tmp4, lambda_Q16 );

+            state[ i + 1 ] = tmp4;

+            acc_Q22 = silk_SMLAWB( acc_Q22, tmp4, coef_Q13[ i ] );

+        }

+        acc_Q11 = silk_SMLAWB( acc_Q11, tmp1, coef_Q13[ order - 1 ] );

+        res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );

+        state[ order ] = tmp3;

+        acc_Q22 = silk_SMLAWB( acc_Q22, tmp3, coef_Q13[ order - 1 ] );

+        res_Q2[ n+1 ] = silk_LSHIFT( (opus_int32)input[ n+1 ], 2 ) - silk_RSHIFT_ROUND( acc_Q22, 9 );

+    }

+}

+/* Prefilter for finding Quantizer input signal */

+#define OVERRIDE_silk_prefilt_FIX

+static inline void silk_prefilt_FIX(

+    silk_prefilter_state_FIX    *P,                         /* I/O  state                               */

+    opus_int32                  st_res_Q12[],               /* I    short term residual signal          */

+    opus_int32                  xw_Q3[],                    /* O    prefiltered signal                  */

+    opus_int32                  HarmShapeFIRPacked_Q12,     /* I    Harmonic shaping coeficients        */

+    opus_int                    Tilt_Q14,                   /* I    Tilt shaping coeficient             */

+    opus_int32                  LF_shp_Q14,                 /* I    Low-frequancy shaping coeficients   */

+    opus_int                    lag,                        /* I    Lag for harmonic shaping            */

+    opus_int                    length                      /* I    Length of signals                   */

+)

+{

+    opus_int   i, idx, LTP_shp_buf_idx;

+    opus_int32 n_LTP_Q12, n_Tilt_Q10, n_LF_Q10;

+    opus_int32 sLF_MA_shp_Q12, sLF_AR_shp_Q12;

+    opus_int16 *LTP_shp_buf;

+    /* To speed up use temp variables instead of using the struct */

+    LTP_shp_buf     = P->sLTP_shp;

+    LTP_shp_buf_idx = P->sLTP_shp_buf_idx;

+    sLF_AR_shp_Q12  = P->sLF_AR_shp_Q12;

+    sLF_MA_shp_Q12  = P->sLF_MA_shp_Q12;

+    if( lag > 0 ) {

+        for( i = 0; i < length; i++ ) {

+            /* unrolled loop */

+            silk_assert( HARM_SHAPE_FIR_TAPS == 3 );

+            idx = lag + LTP_shp_buf_idx;

+            n_LTP_Q12 = silk_SMULBB(            LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 - 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );

+            n_LTP_Q12 = silk_SMLABT( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2    ) & LTP_MASK ], HarmShapeFIRPacked_Q12 );

+            n_LTP_Q12 = silk_SMLABB( n_LTP_Q12, LTP_shp_buf[ ( idx - HARM_SHAPE_FIR_TAPS / 2 + 1) & LTP_MASK ], HarmShapeFIRPacked_Q12 );

+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );

+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );

+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );

+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );

+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;

+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );

+            xw_Q3[i] = silk_RSHIFT_ROUND( silk_SUB32( sLF_MA_shp_Q12, n_LTP_Q12 ), 9 );

+        }

+    }

+    else

+    {

+        for( i = 0; i < length; i++ ) {

+            n_LTP_Q12 = 0;

+            n_Tilt_Q10 = silk_SMULWB( sLF_AR_shp_Q12, Tilt_Q14 );

+            n_LF_Q10   = silk_SMLAWB( silk_SMULWT( sLF_AR_shp_Q12, LF_shp_Q14 ), sLF_MA_shp_Q12, LF_shp_Q14 );

+            sLF_AR_shp_Q12 = silk_SUB32( st_res_Q12[ i ], silk_LSHIFT( n_Tilt_Q10, 2 ) );

+            sLF_MA_shp_Q12 = silk_SUB32( sLF_AR_shp_Q12,  silk_LSHIFT( n_LF_Q10,   2 ) );

+            LTP_shp_buf_idx = ( LTP_shp_buf_idx - 1 ) & LTP_MASK;

+            LTP_shp_buf[ LTP_shp_buf_idx ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 12 ) );

+            xw_Q3[i] = silk_RSHIFT_ROUND( sLF_MA_shp_Q12, 9 );

+        }

+    }

+    /* Copy temp variable back to state */

+    P->sLF_AR_shp_Q12   = sLF_AR_shp_Q12;

+    P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;

+    P->sLTP_shp_buf_idx = LTP_shp_buf_idx;

+}

+#endif /* __PREFILTER_FIX_MIPSR1_H__ */

--- /dev/null

+++ b/silk/fixed/mips/warped_autocorrelation_FIX_mipsr1.h

@@ -1,0 +1,165 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__

+#define __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "main_FIX.h"

+#undef QC

+#define QC  10

+#undef QS

+#define QS  14

+/* Autocorrelations for a warped frequency axis */

+#define OVERRIDE_silk_warped_autocorrelation_FIX

+void silk_warped_autocorrelation_FIX(

+          opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */

+          opus_int                  *scale,                                 /* O    Scaling of the correlation vector                                           */

+    const opus_int16                *input,                                 /* I    Input data to correlate                                                     */

+    const opus_int                  warping_Q16,                            /* I    Warping coefficient                                                         */

+    const opus_int                  length,                                 /* I    Length of input                                                             */

+    const opus_int                  order                                   /* I    Correlation order (even)                                                    */

+)

+{

+    opus_int   n, i, lsh;

+    opus_int32 tmp1_QS=0, tmp2_QS=0, tmp3_QS=0, tmp4_QS=0, tmp5_QS=0, tmp6_QS=0, tmp7_QS=0, tmp8_QS=0, start_1=0, start_2=0, start_3=0;

+    opus_int32 state_QS[ MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };

+    opus_int64 corr_QC[  MAX_SHAPE_LPC_ORDER + 1 ] = { 0 };

+    opus_int64 temp64;

+    opus_int32 val;

+    val = 2 * QS - QC;

+    /* Order must be even */

+    silk_assert( ( order & 1 ) == 0 );

+    silk_assert( 2 * QS - QC >= 0 );

+    /* Loop over samples */

+    for( n = 0; n < length; n=n+4 ) {

+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );

+        start_1 = tmp1_QS;

+        tmp3_QS = silk_LSHIFT32( (opus_int32)input[ n+1], QS );

+        start_2 = tmp3_QS;

+        tmp5_QS = silk_LSHIFT32( (opus_int32)input[ n+2], QS );

+        start_3 = tmp5_QS;

+        tmp7_QS = silk_LSHIFT32( (opus_int32)input[ n+3], QS );

+        /* Loop over allpass sections */

+        for( i = 0; i < order; i += 2 ) {

+            /* Output of allpass section */

+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );

+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,  start_1);

+            tmp4_QS = silk_SMLAWB( tmp1_QS, tmp2_QS - tmp3_QS, warping_Q16 );

+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp3_QS,  start_2);

+            tmp6_QS = silk_SMLAWB( tmp3_QS, tmp4_QS - tmp5_QS, warping_Q16 );

+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp5_QS,  start_3);

+            tmp8_QS = silk_SMLAWB( tmp5_QS, tmp6_QS - tmp7_QS, warping_Q16 );

+            state_QS[ i ]  = tmp7_QS;

+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp7_QS, state_QS[0]);

+            /* Output of allpass section */

+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );

+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,  start_1);

+            tmp3_QS = silk_SMLAWB( tmp2_QS, tmp1_QS - tmp4_QS, warping_Q16 );

+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp4_QS,  start_2);

+            tmp5_QS = silk_SMLAWB( tmp4_QS, tmp3_QS - tmp6_QS, warping_Q16 );

+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp6_QS,  start_3);

+            tmp7_QS = silk_SMLAWB( tmp6_QS, tmp5_QS - tmp8_QS, warping_Q16 );

+            state_QS[ i + 1 ]  = tmp8_QS;

+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp8_QS,  state_QS[ 0 ]);

+        }

+        state_QS[ order ] = tmp7_QS;

+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,  start_1);

+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp3_QS,  start_2);

+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp5_QS,  start_3);

+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp7_QS,  state_QS[ 0 ]);

+    }

+    for(;n< length; n++ ) {

+        tmp1_QS = silk_LSHIFT32( (opus_int32)input[ n ], QS );

+        /* Loop over allpass sections */

+        for( i = 0; i < order; i += 2 ) {

+            /* Output of allpass section */

+            tmp2_QS = silk_SMLAWB( state_QS[ i ], state_QS[ i + 1 ] - tmp1_QS, warping_Q16 );

+            state_QS[ i ] = tmp1_QS;

+            corr_QC[  i ] = __builtin_mips_madd( corr_QC[  i ], tmp1_QS,   state_QS[ 0 ]);

+            /* Output of allpass section */

+            tmp1_QS = silk_SMLAWB( state_QS[ i + 1 ], state_QS[ i + 2 ] - tmp2_QS, warping_Q16 );

+            state_QS[ i + 1 ]  = tmp2_QS;

+            corr_QC[  i+1 ] = __builtin_mips_madd( corr_QC[  i+1 ], tmp2_QS,   state_QS[ 0 ]);

+        }

+        state_QS[ order ] = tmp1_QS;

+        corr_QC[  order ] = __builtin_mips_madd( corr_QC[  order ], tmp1_QS,   state_QS[ 0 ]);

+    }

+    temp64 =  corr_QC[ 0 ];

+    temp64 = __builtin_mips_shilo(temp64, val);

+    lsh = silk_CLZ64( temp64 ) - 35;

+    lsh = silk_LIMIT( lsh, -12 - QC, 30 - QC );

+    *scale = -( QC + lsh );

+    silk_assert( *scale >= -30 && *scale <= 12 );

+    if( lsh >= 0 ) {

+        for( i = 0; i < order + 1; i++ ) {

+            temp64 = corr_QC[ i ];

+            //temp64 = __builtin_mips_shilo(temp64, val);

+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);

+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );

+        }

+    } else {

+        for( i = 0; i < order + 1; i++ ) {

+            temp64 = corr_QC[ i ];

+            //temp64 = __builtin_mips_shilo(temp64, val);

+            temp64 = (val >= 0) ? (temp64 >> val) : (temp64 << -val);

+            corr[ i ] = (opus_int32)silk_CHECK_FIT32( __builtin_mips_shilo( temp64, -lsh ) );

+        }

+    }

+     corr_QC[ 0 ] = __builtin_mips_shilo(corr_QC[ 0 ], val);

+     silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/

+}

+#endif /* __WARPED_AUTOCORRELATION_FIX_MIPSR1_H__ */

--- a/silk/fixed/noise_shape_analysis_FIX.c

+++ b/silk/fixed/noise_shape_analysis_FIX.c

@@ -138,9 +138,14 @@

     silk_assert( 0 );

+#if defined(MIPSr1_ASM)

+#include "mips/noise_shape_analysis_FIX_mipsr1.h"

+#endif

 /**************************************************************/

 /* Compute noise shaping coefficients and initial gain values */

 /**************************************************************/

+#ifndef OVERRIDE_silk_noise_shape_analysis_FIX

 void silk_noise_shape_analysis_FIX(

     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state FIX                                                           */

     silk_encoder_control_FIX        *psEncCtrl,                             /* I/O  Encoder control FIX                                                         */

@@ -443,3 +448,4 @@

     RESTORE_STACK;

+#endif /* OVERRIDE_silk_noise_shape_analysis_FIX */

--- a/silk/fixed/prefilter_FIX.c

+++ b/silk/fixed/prefilter_FIX.c

@@ -33,6 +33,11 @@

 #include "stack_alloc.h"

 #include "tuning_parameters.h"

+#if defined(MIPSr1_ASM)

+#include "mips/prefilter_FIX_mipsr1.h"

+#endif

 /* Prefilter for finding Quantizer input signal */

 static OPUS_INLINE void silk_prefilt_FIX(

     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */

@@ -45,6 +50,7 @@

     opus_int                    length                      /* I    Length of signals                   */

);

+#ifndef OVERRIDE_silk_warped_LPC_analysis_filter_FIX

 void silk_warped_LPC_analysis_filter_FIX(

           opus_int32            state[],                    /* I/O  State [order + 1]                   */

           opus_int32            res_Q2[],                   /* O    Residual signal [length]            */

@@ -86,6 +92,7 @@

         res_Q2[ n ] = silk_LSHIFT( (opus_int32)input[ n ], 2 ) - silk_RSHIFT_ROUND( acc_Q11, 9 );

+#endif /* OVERRIDE_silk_warped_LPC_analysis_filter_FIX */

 void silk_prefilter_FIX(

     silk_encoder_state_FIX          *psEnc,                                 /* I/O  Encoder state                                                               */

@@ -155,6 +162,7 @@

     RESTORE_STACK;

+#ifndef OVERRIDE_silk_prefilt_FIX

 /* Prefilter for finding Quantizer input signal */

 static OPUS_INLINE void silk_prefilt_FIX(

     silk_prefilter_state_FIX    *P,                         /* I/O  state                               */

@@ -207,3 +215,4 @@

     P->sLF_MA_shp_Q12   = sLF_MA_shp_Q12;

     P->sLTP_shp_buf_idx = LTP_shp_buf_idx;

+#endif /* OVERRIDE_silk_prefilt_FIX */

--- a/silk/fixed/warped_autocorrelation_FIX.c

+++ b/silk/fixed/warped_autocorrelation_FIX.c

@@ -34,6 +34,12 @@

 #define QC  10

 #define QS  14

+#if defined(MIPSr1_ASM)

+#include "mips/warped_autocorrelation_FIX_mipsr1.h"

+#endif

+#ifndef OVERRIDE_silk_warped_autocorrelation_FIX

 /* Autocorrelations for a warped frequency axis */

 void silk_warped_autocorrelation_FIX(

           opus_int32                *corr,                                  /* O    Result [order + 1]                                                          */

@@ -86,3 +92,4 @@

     silk_assert( corr_QC[ 0 ] >= 0 ); /* If breaking, decrease QC*/

+#endif /* OVERRIDE_silk_warped_autocorrelation_FIX */

--- a/silk/macros.h

+++ b/silk/macros.h

@@ -79,17 +79,24 @@

                                         (( (a) & ((b)^0x80000000) & 0x80000000) ? silk_int32_MIN : (a)-(b)) :    \

                                         ((((a)^0x80000000) & (b)  & 0x80000000) ? silk_int32_MAX : (a)-(b)) )

-#include "ecintrin.h"

+#if defined(MIPSr1_ASM)

+#include "mips/macros_mipsr1.h"

+#endif

+#include "ecintrin.h"

+#ifndef OVERRIDE_silk_CLZ16

 static OPUS_INLINE opus_int32 silk_CLZ16(opus_int16 in16)

     return 32 - EC_ILOG(in16<<16|0x8000);

+#endif

+#ifndef OVERRIDE_silk_CLZ32

 static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)

     return in32 ? 32 - EC_ILOG(in32) : 32;

+#endif

 /* Row based */

 #define matrix_ptr(Matrix_base_adr, row, column, N) \

--- /dev/null

+++ b/silk/mips/NSQ_del_dec_mipsr1.h

@@ -1,0 +1,405 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef __NSQ_DEL_DEC_MIPSR1_H__

+#define __NSQ_DEL_DEC_MIPSR1_H__

+#ifdef HAVE_CONFIG_H

+#include "config.h"

+#endif

+#include "main.h"

+#include "stack_alloc.h"

+#define OVERRIDE_silk_noise_shape_quantizer_del_dec

+static inline void silk_noise_shape_quantizer_del_dec(

+    silk_nsq_state      *NSQ,                   /* I/O  NSQ state                           */

+    NSQ_del_dec_struct  psDelDec[],             /* I/O  Delayed decision states             */

+    opus_int            signalType,             /* I    Signal type                         */

+    const opus_int32    x_Q10[],                /* I                                        */

+    opus_int8           pulses[],               /* O                                        */

+    opus_int16          xq[],                   /* O                                        */

+    opus_int32          sLTP_Q15[],             /* I/O  LTP filter state                    */

+    opus_int32          delayedGain_Q10[],      /* I/O  Gain delay buffer                   */

+    const opus_int16    a_Q12[],                /* I    Short term prediction coefs         */

+    const opus_int16    b_Q14[],                /* I    Long term prediction coefs          */

+    const opus_int16    AR_shp_Q13[],           /* I    Noise shaping coefs                 */

+    opus_int            lag,                    /* I    Pitch lag                           */

+    opus_int32          HarmShapeFIRPacked_Q14, /* I                                        */

+    opus_int            Tilt_Q14,               /* I    Spectral tilt                       */

+    opus_int32          LF_shp_Q14,             /* I                                        */

+    opus_int32          Gain_Q16,               /* I                                        */

+    opus_int            Lambda_Q10,             /* I                                        */

+    opus_int            offset_Q10,             /* I                                        */

+    opus_int            length,                 /* I    Input length                        */

+    opus_int            subfr,                  /* I    Subframe number                     */

+    opus_int            shapingLPCOrder,        /* I    Shaping LPC filter order            */

+    opus_int            predictLPCOrder,        /* I    Prediction filter order             */

+    opus_int            warping_Q16,            /* I                                        */

+    opus_int            nStatesDelayedDecision, /* I    Number of states in decision tree   */

+    opus_int            *smpl_buf_idx,          /* I    Index to newest samples in buffers  */

+    opus_int            decisionDelay           /* I                                        */

+)

+{

+    opus_int     i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;

+    opus_int32   Winner_rand_state;

+    opus_int32   LTP_pred_Q14, LPC_pred_Q14, n_AR_Q14, n_LTP_Q14;

+    opus_int32   n_LF_Q14, r_Q10, rr_Q10, rd1_Q10, rd2_Q10, RDmin_Q10, RDmax_Q10;

+    opus_int32   q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;

+    opus_int32   tmp1, tmp2, sLF_AR_shp_Q14;

+    opus_int32   *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;

+    NSQ_sample_struct  psSampleState[ MAX_DEL_DEC_STATES ][ 2 ];

+    NSQ_del_dec_struct *psDD;

+    NSQ_sample_struct  *psSS;

+    opus_int16 b_Q14_0, b_Q14_1, b_Q14_2, b_Q14_3, b_Q14_4;

+    opus_int16 a_Q12_0, a_Q12_1, a_Q12_2, a_Q12_3, a_Q12_4, a_Q12_5, a_Q12_6;

+    opus_int16 a_Q12_7, a_Q12_8, a_Q12_9, a_Q12_10, a_Q12_11, a_Q12_12, a_Q12_13;

+    opus_int16 a_Q12_14, a_Q12_15;

+    opus_int32 cur, prev, next;

+    //Intialize b_Q14 variables

+    b_Q14_0 = b_Q14[ 0 ];

+    b_Q14_1 = b_Q14[ 1 ];

+    b_Q14_2 = b_Q14[ 2 ];

+    b_Q14_3 = b_Q14[ 3 ];

+    b_Q14_4 = b_Q14[ 4 ];

+    //Intialize a_Q12 variables

+    a_Q12_0 = a_Q12[0];

+    a_Q12_1 = a_Q12[1];

+    a_Q12_2 = a_Q12[2];

+    a_Q12_3 = a_Q12[3];

+    a_Q12_4 = a_Q12[4];

+    a_Q12_5 = a_Q12[5];

+    a_Q12_6 = a_Q12[6];

+    a_Q12_7 = a_Q12[7];

+    a_Q12_8 = a_Q12[8];

+    a_Q12_9 = a_Q12[9];

+    a_Q12_10 = a_Q12[10];

+    a_Q12_11 = a_Q12[11];

+    a_Q12_12 = a_Q12[12];

+    a_Q12_13 = a_Q12[13];

+    a_Q12_14 = a_Q12[14];

+    a_Q12_15 = a_Q12[15];

+    long long temp64;

+    silk_assert( nStatesDelayedDecision > 0 );

+    shp_lag_ptr  = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];

+    pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];

+    Gain_Q10     = silk_RSHIFT( Gain_Q16, 6 );

+    for( i = 0; i < length; i++ ) {

+        /* Perform common calculations used in all states */

+        /* Long-term prediction */

+        if( signalType == TYPE_VOICED ) {

+            /* Unrolled loop */

+            /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */

+            temp64 = __builtin_mips_mult(pred_lag_ptr[ 0 ], b_Q14_0 );

+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -1 ], b_Q14_1 );

+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -2 ], b_Q14_2 );

+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -3 ], b_Q14_3 );

+            temp64 = __builtin_mips_madd( temp64, pred_lag_ptr[ -4 ], b_Q14_4 );

+			temp64 += 32768;

+            LTP_pred_Q14 = __builtin_mips_extr_w(temp64, 16);

+            LTP_pred_Q14 = silk_LSHIFT( LTP_pred_Q14, 1 );                          /* Q13 -> Q14 */

+            pred_lag_ptr++;

+        } else {

+            LTP_pred_Q14 = 0;

+        }

+        /* Long-term shaping */

+        if( lag > 0 ) {

+            /* Symmetric, packed FIR coefficients */

+            n_LTP_Q14 = silk_SMULWB( silk_ADD32( shp_lag_ptr[ 0 ], shp_lag_ptr[ -2 ] ), HarmShapeFIRPacked_Q14 );

+            n_LTP_Q14 = silk_SMLAWT( n_LTP_Q14, shp_lag_ptr[ -1 ],                      HarmShapeFIRPacked_Q14 );

+            n_LTP_Q14 = silk_SUB_LSHIFT32( LTP_pred_Q14, n_LTP_Q14, 2 );            /* Q12 -> Q14 */

+            shp_lag_ptr++;

+        } else {

+            n_LTP_Q14 = 0;

+        }

+        for( k = 0; k < nStatesDelayedDecision; k++ ) {

+            /* Delayed decision state */

+            psDD = &psDelDec[ k ];

+            /* Sample state */

+            psSS = psSampleState[ k ];

+            /* Generate dither */

+            psDD->Seed = silk_RAND( psDD->Seed );

+            /* Pointer used in short term prediction and shaping */

+            psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];

+            /* Short-term prediction */

+            silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 );

+            temp64 = __builtin_mips_mult(psLPC_Q14[  0 ], a_Q12_0 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -1 ], a_Q12_1 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -2 ], a_Q12_2 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -3 ], a_Q12_3 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -4 ], a_Q12_4 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -5 ], a_Q12_5 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -6 ], a_Q12_6 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -7 ], a_Q12_7 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -8 ], a_Q12_8 );

+            temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -9 ], a_Q12_9 );

+            if( predictLPCOrder == 16 ) {

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -10 ], a_Q12_10 );

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -11 ], a_Q12_11 );

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -12 ], a_Q12_12 );

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -13 ], a_Q12_13 );

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -14 ], a_Q12_14 );

+                temp64 = __builtin_mips_madd( temp64, psLPC_Q14[ -15 ], a_Q12_15 );

+            }

+            temp64 += 32768;

+            LPC_pred_Q14 = __builtin_mips_extr_w(temp64, 16);

+            LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 );                              /* Q10 -> Q14 */

+            /* Noise shape feedback */

+            silk_assert( ( shapingLPCOrder & 1 ) == 0 );   /* check that order is even */

+            /* Output of lowpass section */

+            tmp2 = silk_SMLAWB( psLPC_Q14[ 0 ], psDD->sAR2_Q14[ 0 ], warping_Q16 );

+            /* Output of allpass section */

+            tmp1 = silk_SMLAWB( psDD->sAR2_Q14[ 0 ], psDD->sAR2_Q14[ 1 ] - tmp2, warping_Q16 );

+            psDD->sAR2_Q14[ 0 ] = tmp2;

+            temp64 = __builtin_mips_mult(tmp2, AR_shp_Q13[ 0 ] );

+            prev = psDD->sAR2_Q14[ 1 ];

+            /* Loop over allpass sections */

+            for( j = 2; j < shapingLPCOrder; j += 2 ) {

+                cur = psDD->sAR2_Q14[ j ];

+                next = psDD->sAR2_Q14[ j+1 ];

+                /* Output of allpass section */

+                tmp2 = silk_SMLAWB( prev, cur - tmp1, warping_Q16 );

+                psDD->sAR2_Q14[ j - 1 ] = tmp1;

+                temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ j - 1 ] );

+                temp64 = __builtin_mips_madd( temp64, tmp2, AR_shp_Q13[ j ] );

+                /* Output of allpass section */

+                tmp1 = silk_SMLAWB( cur, next - tmp2, warping_Q16 );

+                psDD->sAR2_Q14[ j + 0 ] = tmp2;

+                prev = next;

+            }

+            psDD->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;

+            temp64 = __builtin_mips_madd( temp64, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );

+            temp64 += 32768;

+            n_AR_Q14 = __builtin_mips_extr_w(temp64, 16);

+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 1 );                                      /* Q11 -> Q12 */

+            n_AR_Q14 = silk_SMLAWB( n_AR_Q14, psDD->LF_AR_Q14, Tilt_Q14 );              /* Q12 */

+            n_AR_Q14 = silk_LSHIFT( n_AR_Q14, 2 );                                      /* Q12 -> Q14 */

+            n_LF_Q14 = silk_SMULWB( psDD->Shape_Q14[ *smpl_buf_idx ], LF_shp_Q14 );     /* Q12 */

+            n_LF_Q14 = silk_SMLAWT( n_LF_Q14, psDD->LF_AR_Q14, LF_shp_Q14 );            /* Q12 */

+            n_LF_Q14 = silk_LSHIFT( n_LF_Q14, 2 );                                      /* Q12 -> Q14 */

+            /* Input minus prediction plus noise feedback                       */

+            /* r = x[ i ] - LTP_pred - LPC_pred + n_AR + n_Tilt + n_LF + n_LTP  */

+            tmp1 = silk_ADD32( n_AR_Q14, n_LF_Q14 );                                    /* Q14 */

+            tmp2 = silk_ADD32( n_LTP_Q14, LPC_pred_Q14 );                               /* Q13 */

+            tmp1 = silk_SUB32( tmp2, tmp1 );                                            /* Q13 */

+            tmp1 = silk_RSHIFT_ROUND( tmp1, 4 );                                        /* Q10 */

+            r_Q10 = silk_SUB32( x_Q10[ i ], tmp1 );                                     /* residual error Q10 */

+            /* Flip sign depending on dither */

+            if ( psDD->Seed < 0 ) {

+                r_Q10 = -r_Q10;

+            }

+            r_Q10 = silk_LIMIT_32( r_Q10, -(31 << 10), 30 << 10 );

+            /* Find two quantization level candidates and measure their rate-distortion */

+            q1_Q10 = silk_SUB32( r_Q10, offset_Q10 );

+            q1_Q0 = silk_RSHIFT( q1_Q10, 10 );

+            if( q1_Q0 > 0 ) {

+                q1_Q10  = silk_SUB32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );

+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );

+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );

+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );

+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );

+            } else if( q1_Q0 == 0 ) {

+                q1_Q10  = offset_Q10;

+                q2_Q10  = silk_ADD32( q1_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );

+                rd1_Q10 = silk_SMULBB( q1_Q10, Lambda_Q10 );

+                rd2_Q10 = silk_SMULBB( q2_Q10, Lambda_Q10 );

+            } else if( q1_Q0 == -1 ) {

+                q2_Q10  = offset_Q10;

+                q1_Q10  = silk_SUB32( q2_Q10, 1024 - QUANT_LEVEL_ADJUST_Q10 );

+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );

+                rd2_Q10 = silk_SMULBB(  q2_Q10, Lambda_Q10 );

+            } else {            /* q1_Q0 < -1 */

+                q1_Q10  = silk_ADD32( silk_LSHIFT( q1_Q0, 10 ), QUANT_LEVEL_ADJUST_Q10 );

+                q1_Q10  = silk_ADD32( q1_Q10, offset_Q10 );

+                q2_Q10  = silk_ADD32( q1_Q10, 1024 );

+                rd1_Q10 = silk_SMULBB( -q1_Q10, Lambda_Q10 );

+                rd2_Q10 = silk_SMULBB( -q2_Q10, Lambda_Q10 );

+            }

+            rr_Q10  = silk_SUB32( r_Q10, q1_Q10 );

+            rd1_Q10 = silk_RSHIFT( silk_SMLABB( rd1_Q10, rr_Q10, rr_Q10 ), 10 );

+            rr_Q10  = silk_SUB32( r_Q10, q2_Q10 );

+            rd2_Q10 = silk_RSHIFT( silk_SMLABB( rd2_Q10, rr_Q10, rr_Q10 ), 10 );

+            if( rd1_Q10 < rd2_Q10 ) {

+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );

+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );

+                psSS[ 0 ].Q_Q10  = q1_Q10;

+                psSS[ 1 ].Q_Q10  = q2_Q10;

+            } else {

+                psSS[ 0 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd2_Q10 );

+                psSS[ 1 ].RD_Q10 = silk_ADD32( psDD->RD_Q10, rd1_Q10 );

+                psSS[ 0 ].Q_Q10  = q2_Q10;

+                psSS[ 1 ].Q_Q10  = q1_Q10;

+            }

+            /* Update states for best quantization */

+            /* Quantized excitation */

+            exc_Q14 = silk_LSHIFT32( psSS[ 0 ].Q_Q10, 4 );

+            if ( psDD->Seed < 0 ) {

+                exc_Q14 = -exc_Q14;

+            }

+            /* Add predictions */

+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );

+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );

+            /* Update states */

+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );

+            psSS[ 0 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );

+            psSS[ 0 ].LF_AR_Q14    = sLF_AR_shp_Q14;

+            psSS[ 0 ].LPC_exc_Q14  = LPC_exc_Q14;

+            psSS[ 0 ].xq_Q14       = xq_Q14;

+            /* Update states for second best quantization */

+            /* Quantized excitation */

+            exc_Q14 = silk_LSHIFT32( psSS[ 1 ].Q_Q10, 4 );

+            if ( psDD->Seed < 0 ) {

+                exc_Q14 = -exc_Q14;

+            }

+            /* Add predictions */

+            LPC_exc_Q14 = silk_ADD32( exc_Q14, LTP_pred_Q14 );

+            xq_Q14      = silk_ADD32( LPC_exc_Q14, LPC_pred_Q14 );

+            /* Update states */

+            sLF_AR_shp_Q14         = silk_SUB32( xq_Q14, n_AR_Q14 );

+            psSS[ 1 ].sLTP_shp_Q14 = silk_SUB32( sLF_AR_shp_Q14, n_LF_Q14 );

+            psSS[ 1 ].LF_AR_Q14    = sLF_AR_shp_Q14;

+            psSS[ 1 ].LPC_exc_Q14  = LPC_exc_Q14;

+            psSS[ 1 ].xq_Q14       = xq_Q14;

+        }

+        *smpl_buf_idx  = ( *smpl_buf_idx - 1 ) & DECISION_DELAY_MASK;                   /* Index to newest samples              */

+        last_smple_idx = ( *smpl_buf_idx + decisionDelay ) & DECISION_DELAY_MASK;       /* Index to decisionDelay old samples   */

+        /* Find winner */

+        RDmin_Q10 = psSampleState[ 0 ][ 0 ].RD_Q10;

+        Winner_ind = 0;

+        for( k = 1; k < nStatesDelayedDecision; k++ ) {

+            if( psSampleState[ k ][ 0 ].RD_Q10 < RDmin_Q10 ) {

+                RDmin_Q10  = psSampleState[ k ][ 0 ].RD_Q10;

+                Winner_ind = k;

+            }

+        }

+        /* Increase RD values of expired states */

+        Winner_rand_state = psDelDec[ Winner_ind ].RandState[ last_smple_idx ];

+        for( k = 0; k < nStatesDelayedDecision; k++ ) {

+            if( psDelDec[ k ].RandState[ last_smple_idx ] != Winner_rand_state ) {

+                psSampleState[ k ][ 0 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 0 ].RD_Q10, silk_int32_MAX >> 4 );

+                psSampleState[ k ][ 1 ].RD_Q10 = silk_ADD32( psSampleState[ k ][ 1 ].RD_Q10, silk_int32_MAX >> 4 );

+                silk_assert( psSampleState[ k ][ 0 ].RD_Q10 >= 0 );

+            }

+        }

+        /* Find worst in first set and best in second set */

+        RDmax_Q10  = psSampleState[ 0 ][ 0 ].RD_Q10;

+        RDmin_Q10  = psSampleState[ 0 ][ 1 ].RD_Q10;

+        RDmax_ind = 0;

+        RDmin_ind = 0;

+        for( k = 1; k < nStatesDelayedDecision; k++ ) {

+            /* find worst in first set */

+            if( psSampleState[ k ][ 0 ].RD_Q10 > RDmax_Q10 ) {

+                RDmax_Q10  = psSampleState[ k ][ 0 ].RD_Q10;

+                RDmax_ind = k;

+            }

+            /* find best in second set */

+            if( psSampleState[ k ][ 1 ].RD_Q10 < RDmin_Q10 ) {

+                RDmin_Q10  = psSampleState[ k ][ 1 ].RD_Q10;

+                RDmin_ind = k;

+            }

+        }

+        /* Replace a state if best from second set outperforms worst in first set */

+        if( RDmin_Q10 < RDmax_Q10 ) {

+            silk_memcpy( ( (opus_int32 *)&psDelDec[ RDmax_ind ] ) + i,

+                         ( (opus_int32 *)&psDelDec[ RDmin_ind ] ) + i, sizeof( NSQ_del_dec_struct ) - i * sizeof( opus_int32) );

+            silk_memcpy( &psSampleState[ RDmax_ind ][ 0 ], &psSampleState[ RDmin_ind ][ 1 ], sizeof( NSQ_sample_struct ) );

+        }

+        /* Write samples from winner to output and long-term filter states */

+        psDD = &psDelDec[ Winner_ind ];

+        if( subfr > 0 || i >= decisionDelay ) {

+            pulses[  i - decisionDelay ] = (opus_int8)silk_RSHIFT_ROUND( psDD->Q_Q10[ last_smple_idx ], 10 );

+            xq[ i - decisionDelay ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND(

+                silk_SMULWW( psDD->Xq_Q14[ last_smple_idx ], delayedGain_Q10[ last_smple_idx ] ), 8 ) );

+            NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - decisionDelay ] = psDD->Shape_Q14[ last_smple_idx ];

+            sLTP_Q15[          NSQ->sLTP_buf_idx     - decisionDelay ] = psDD->Pred_Q15[  last_smple_idx ];

+        }

+        NSQ->sLTP_shp_buf_idx++;

+        NSQ->sLTP_buf_idx++;

+        /* Update states */

+        for( k = 0; k < nStatesDelayedDecision; k++ ) {

+            psDD                                     = &psDelDec[ k ];

+            psSS                                     = &psSampleState[ k ][ 0 ];

+            psDD->LF_AR_Q14                          = psSS->LF_AR_Q14;

+            psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH + i ] = psSS->xq_Q14;

+            psDD->Xq_Q14[    *smpl_buf_idx ]         = psSS->xq_Q14;

+            psDD->Q_Q10[     *smpl_buf_idx ]         = psSS->Q_Q10;

+            psDD->Pred_Q15[  *smpl_buf_idx ]         = silk_LSHIFT32( psSS->LPC_exc_Q14, 1 );

+            psDD->Shape_Q14[ *smpl_buf_idx ]         = psSS->sLTP_shp_Q14;

+            psDD->Seed                               = silk_ADD32_ovflw( psDD->Seed, silk_RSHIFT_ROUND( psSS->Q_Q10, 10 ) );

+            psDD->RandState[ *smpl_buf_idx ]         = psDD->Seed;

+            psDD->RD_Q10                             = psSS->RD_Q10;

+        }

+        delayedGain_Q10[     *smpl_buf_idx ]         = Gain_Q10;

+    }

+    /* Update LPC states */

+    for( k = 0; k < nStatesDelayedDecision; k++ ) {

+        psDD = &psDelDec[ k ];

+        silk_memcpy( psDD->sLPC_Q14, &psDD->sLPC_Q14[ length ], NSQ_LPC_BUF_LENGTH * sizeof( opus_int32 ) );

+    }

+}

+#endif /* __NSQ_DEL_DEC_MIPSR1_H__ */

--- /dev/null

+++ b/silk/mips/macros_mipsr1.h

@@ -1,0 +1,92 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef __SILK_MACROS_MIPSR1_H__

+#define __SILK_MACROS_MIPSR1_H__

+#define mips_clz(x) __builtin_clz(x)

+#undef silk_SMULWB

+static inline int silk_SMULWB(int a, int b)

+{

+    long long ac;

+    int c;

+    ac = __builtin_mips_mult(a, (opus_int32)(opus_int16)b);

+    c = __builtin_mips_extr_w(ac, 16);

+    return c;

+}

+#undef silk_SMLAWB

+#define silk_SMLAWB(a32, b32, c32)       ((a32) + silk_SMULWB(b32, c32))

+#undef silk_SMULWW

+static inline int silk_SMULWW(int a, int b)

+{

+    long long ac;

+    int c;

+    ac = __builtin_mips_mult(a, b);

+    c = __builtin_mips_extr_w(ac, 16);

+    return c;

+}

+#undef silk_SMLAWW

+static inline int silk_SMLAWW(int a, int b, int c)

+{

+    long long ac;

+    int res;

+    ac = __builtin_mips_mult(b, c);

+    res = __builtin_mips_extr_w(ac, 16);

+    res += a;

+    return res;

+}

+#define OVERRIDE_silk_CLZ16

+static inline opus_int32 silk_CLZ16(opus_int16 in16)

+{

+    int re32;

+    opus_int32 in32 = (opus_int32 )in16;

+    re32 = mips_clz(in32);

+    re32-=16;

+    return re32;

+}

+#define OVERRIDE_silk_CLZ32

+static inline opus_int32 silk_CLZ32(opus_int32 in32)

+{

+    int re32;

+    re32 = mips_clz(in32);

+    return re32;

+}

+#endif /* __SILK_MACROS_MIPSR1_H__ */

--- /dev/null

+++ b/silk/mips/sigproc_fix_mipsr1.h

@@ -1,0 +1,65 @@

+/***********************************************************************

+Copyright (c) 2006-2011, Skype Limited. All rights reserved.

+Redistribution and use in source and binary forms, with or without

+modification, are permitted provided that the following conditions

+are met:

+- Redistributions of source code must retain the above copyright notice,

+this list of conditions and the following disclaimer.

+- Redistributions in binary form must reproduce the above copyright

+notice, this list of conditions and the following disclaimer in the

+documentation and/or other materials provided with the distribution.

+- Neither the name of Internet Society, IETF or IETF Trust, nor the

+names of specific contributors, may be used to endorse or promote

+products derived from this software without specific prior written

+permission.

+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

+POSSIBILITY OF SUCH DAMAGE.

+***********************************************************************/

+#ifndef SILK_SIGPROC_FIX_MIPSR1_H

+#define SILK_SIGPROC_FIX_MIPSR1_H

+#ifdef  __cplusplus

+extern "C"

+{

+#endif

+#undef silk_SAT16

+static inline short int silk_SAT16(int a)

+{

+    int c;

+    c = __builtin_mips_shll_s_w(a, 16);

+    c = c>>16;

+    return c;

+}

+#undef silk_LSHIFT_SAT32

+static inline int silk_LSHIFT_SAT32(int a, int shift)

+{

+    int r;

+    r = __builtin_mips_shll_s_w(a, shift);

+    return r;

+}

+#undef silk_RSHIFT_ROUND

+static inline int silk_RSHIFT_ROUND(int a, int shift)

+{

+    int r;

+    r = __builtin_mips_shra_r_w(a, shift);

+    return r;

+}

+#endif /* SILK_SIGPROC_FIX_MIPSR1_H */