shithub: openh264

Download patch

ref: f066df412c0ced04ca082c73919a958942b330a4
parent: e4220b7b55925ff2feacc2f5944dc528629b57ad
author: Martin Storsjö <[email protected]>
date: Wed Jun 25 07:05:44 EDT 2014

Use WELS_ROUND in the neon version of GeneralBilinearAccurateDownsampler

Also use it in the commented out sse2 version - in case it gets
taken into use at some point. (It only works on 32 bit x86
at the moment.)

This makes sure the scaling factors are rounded similarly to the
in GeneralBilinearAccurateDownsampler_c, making sure the
accelerated versions of these function return identical results
to the C version.

--- a/codec/processing/src/downsample/downsamplefuncs.cpp
+++ b/codec/processing/src/downsample/downsamplefuncs.cpp
@@ -208,8 +208,8 @@
 //  const int32_t kiScaleBitWidth = 16, kiScaleBitHeight = 15;
 //  const uint32_t kuiScaleWidth = (1 << kiScaleBitWidth), kuiScaleHeight = (1 << kiScaleBitHeight);
 //
-//  uint32_t uiScalex = (uint32_t) ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
-//  uint32_t uiScaley = (uint32_t) ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
+//  uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScaleWidth);
+//  uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScaleHeight);
 //
 //  GeneralBilinearFastDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
 //                                       pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
@@ -221,8 +221,8 @@
 //  const int32_t kiScaleBit = 15;
 //  const uint32_t kuiScale = (1 << kiScaleBit);
 //
-//  uint32_t uiScalex = (uint32_t) ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
-//  uint32_t uiScaley = (uint32_t) ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
+//  uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
+//  uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
 //
 //  GeneralBilinearAccurateDownsampler_sse2 (pDst, kiDstStride, kiDstWidth, kiDstHeight,
 //      pSrc, kiSrcStride, kiSrcWidth, kiSrcHeight, uiScalex, uiScaley);
@@ -235,8 +235,8 @@
     uint8_t* pSrc, const int32_t kiSrcStride, const int32_t kiSrcWidth, const int32_t kiSrcHeight) {
   const int32_t kiScaleBit = 15;
   const uint32_t kuiScale = (1 << kiScaleBit);
-  uint32_t uiScalex = (uint32_t) ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
-  uint32_t uiScaley = (uint32_t) ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
+  uint32_t uiScalex = WELS_ROUND ((float)kiSrcWidth / (float)kiDstWidth * kuiScale);
+  uint32_t uiScaley = WELS_ROUND ((float)kiSrcHeight / (float)kiDstHeight * kuiScale);
   GeneralBilinearAccurateDownsampler_neon (pDst, kiDstStride, kiDstWidth, kiDstHeight, pSrc, kiSrcStride, uiScalex,
       uiScaley);
 }