shithub: libvpx

Download patch

ref: 99adf8b22ed827f81e9501dd5068ca8a5d5d2d2e
parent: 04a6010742b45fa0f9aef1c0f0f5ba79f24ce5e8
parent: d05104b4885a3908b548013ed426dc95ca184355
author: Kaustubh Raste <[email protected]>
date: Wed Oct 12 22:11:59 EDT 2016

Merge "Optimize vpx_get4x4sse_cs_msa function"

--- a/vpx_dsp/mips/variance_msa.c
+++ b/vpx_dsp/mips/variance_msa.c
@@ -489,27 +489,19 @@
 
 uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride,
                               const uint8_t *ref_ptr, int32_t ref_stride) {
-  uint32_t err = 0;
   uint32_t src0, src1, src2, src3;
   uint32_t ref0, ref1, ref2, ref3;
   v16i8 src = { 0 };
   v16i8 ref = { 0 };
-  v16u8 src_vec0, src_vec1;
-  v8i16 diff0, diff1;
   v4i32 err0 = { 0 };
-  v4i32 err1 = { 0 };
 
   LW4(src_ptr, src_stride, src0, src1, src2, src3);
   LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3);
   INSERT_W4_SB(src0, src1, src2, src3, src);
   INSERT_W4_SB(ref0, ref1, ref2, ref3, ref);
-  ILVRL_B2_UB(src, ref, src_vec0, src_vec1);
-  HSUB_UB2_SH(src_vec0, src_vec1, diff0, diff1);
-  DPADD_SH2_SW(diff0, diff1, diff0, diff1, err0, err1);
-  err = HADD_SW_S32(err0);
-  err += HADD_SW_S32(err1);
+  CALC_MSE_B(src, ref, err0);
 
-  return err;
+  return HADD_SW_S32(err0);
 }
 
 #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4);