shithub: libvpx

--- a/test/variance_test.cc

+++ b/test/variance_test.cc

@@ -54,11 +54,11 @@

   switch (bit_depth) {

     case VPX_BITS_12:

       *sse = (*sse + 128) >> 8;

-      *se = (*se + 8) >> 4;

+      *se = *se / (1 << 4);

       break;

     case VPX_BITS_10:

       *sse = (*sse + 8) >> 4;

-      *se = (*se + 2) >> 2;

+      *se = *se / (1 << 2);

       break;

     case VPX_BITS_8:

     default:

--- a/vpx_dsp/variance.c

+++ b/vpx_dsp/variance.c

@@ -275,7 +275,7 @@

 #if CONFIG_VP9_HIGHBITDEPTH

 static void highbd_variance64(const uint8_t *a8, int  a_stride,

                               const uint8_t *b8, int  b_stride,

-                              int w, int h, uint64_t *sse, uint64_t *sum) {

+                              int w, int h, uint64_t *sse, int *sum) {

   int i, j;

   uint16_t *a = CONVERT_TO_SHORTPTR(a8);

@@ -298,10 +298,8 @@

                               const uint8_t *b8, int  b_stride,

                               int w, int h, uint32_t *sse, int *sum) {

   uint64_t sse_long = 0;

-  uint64_t sum_long = 0;

-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);

   *sse = (uint32_t)sse_long;

-  *sum = (int)sum_long;

 static void highbd_10_variance(const uint8_t *a8, int  a_stride,

@@ -308,10 +306,9 @@

                                const uint8_t *b8, int  b_stride,

                                int w, int h, uint32_t *sse, int *sum) {

   uint64_t sse_long = 0;

-  uint64_t sum_long = 0;

-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);

-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);

+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);

+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 4);

+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 2);

 static void highbd_12_variance(const uint8_t *a8, int  a_stride,

@@ -318,10 +315,9 @@

                                const uint8_t *b8, int  b_stride,

                                int w, int h, uint32_t *sse, int *sum) {

   uint64_t sse_long = 0;

-  uint64_t sum_long = 0;

-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);

-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);

-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);

+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);

+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 8);

+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 4);

 #define HIGHBD_VAR(W, H) \

--- a/vpx_dsp/variance.h

+++ b/vpx_dsp/variance.h

@@ -19,6 +19,8 @@

 extern "C" {

 #endif

+#define ROUND_ZERO_POWER_OF_TWO(value, n) ((value) / (1 << (n)))

 #define FILTER_BITS 7

 #define FILTER_WEIGHT 128

--- a/vpx_dsp/x86/highbd_variance_sse2.c

+++ b/vpx_dsp/x86/highbd_variance_sse2.c

@@ -9,6 +9,7 @@

*/

 #include "./vpx_config.h"

+#include "vpx_dsp/variance.h"

 #include "vpx_ports/mem.h"

 typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,

@@ -62,8 +63,8 @@

       sum_long += sum0;

-  *sum = ROUND_POWER_OF_TWO(sum_long, 2);

-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);

+  *sum = ROUND_ZERO_POWER_OF_TWO(sum_long, 2);

+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 4);

 static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,

@@ -84,8 +85,8 @@

       sum_long += sum0;

-  *sum = ROUND_POWER_OF_TWO(sum_long, 4);

-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);

+  *sum = ROUND_ZERO_POWER_OF_TWO(sum_long, 4);

+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 8);

@@ -106,7 +107,7 @@

   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \

   vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \

                                      sse, sum); \

-  *sum = ROUND_POWER_OF_TWO(*sum, 2); \

+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 2); \

   *sse = ROUND_POWER_OF_TWO(*sse, 4); \

} \

@@ -117,7 +118,7 @@

   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \

   vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \

                                      sse, sum); \

-  *sum = ROUND_POWER_OF_TWO(*sum, 4); \

+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 4); \

   *sse = ROUND_POWER_OF_TWO(*sse, 8); \

@@ -345,7 +346,7 @@

       sse += sse2; \

} \

} \

-  se = ROUND_POWER_OF_TWO(se, 2); \

+  se = ROUND_ZERO_POWER_OF_TWO(se, 2); \

   sse = ROUND_POWER_OF_TWO(sse, 4); \

   *sse_ptr = sse; \

   return sse - ((cast se * se) >> (wlog2 + hlog2)); \

@@ -392,7 +393,7 @@

}\

} \

} \

-  se = ROUND_POWER_OF_TWO(se, 4); \

+  se = ROUND_ZERO_POWER_OF_TWO(se, 4); \

   sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \

   *sse_ptr = sse; \

   return sse - ((cast se * se) >> (wlog2 + hlog2)); \

@@ -514,7 +515,7 @@

       sse += sse2; \

} \

} \

-  se = ROUND_POWER_OF_TWO(se, 2); \

+  se = ROUND_ZERO_POWER_OF_TWO(se, 2); \

   sse = ROUND_POWER_OF_TWO(sse, 4); \

   *sse_ptr = sse; \

   return sse - ((cast se * se) >> (wlog2 + hlog2)); \

@@ -566,7 +567,7 @@

} \

} \

} \

-  se = ROUND_POWER_OF_TWO(se, 4); \

+  se = ROUND_ZERO_POWER_OF_TWO(se, 4); \

   sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \

   *sse_ptr = sse; \

   return sse - ((cast se * se) >> (wlog2 + hlog2)); \