ref: 70485d895487563b0558ff5c7e52fd2f3d4ee2ef
parent: 85ede2c6aa066da29fce5186394f46927358be3b
author: Nils Wallménius <[email protected]>
date: Wed May 22 19:05:07 EDT 2013
Faster MULT32_32_Q31 for ARM. Uses a C implementation with a 32*32 => 64 multiplication, which ARM has. Speeds up decoding of a 64 kbps test file by 0.5MHz on an ARM7TDMI and 1.0MHz on an ARM9TDMI. 0.2% speedup on a 96 kbps enc+dec test on a Cortex A8. Signed-off-by: Timothy B. Terriberry <[email protected]>
--- a/celt/arm/fixed_armv4.h
+++ b/celt/arm/fixed_armv4.h
@@ -68,4 +68,9 @@
#undef MAC16_32_Q15
#define MAC16_32_Q15(c, a, b) ADD32(c, MULT16_32_Q15(a, b))
+
+/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
+#undef MULT32_32_Q31
+#define MULT32_32_Q31(a,b) (opus_val32)((((opus_int64)(a)) * ((opus_int64)(b)))>>31)
+
#endif