shithub: openh264

Download patch

ref: 23b20fb14cd7eb99fcefa9709517457fb3b5b3e1
parent: 279e14b34e7cb752eb82c0720512e08555a53b88
author: Martin Storsjö <[email protected]>
date: Mon Jan 26 17:57:20 EST 2015

Simplify code in HorFilterInput16bit in MC

This avoids a gcc optimizer bug (which seems to be present in some
gcc 4.6 and 4.7 versions) at the -O3 level.

--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -134,9 +134,9 @@
 //--------------------Luma sample MC------------------//
 
 static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) {
-  int32_t iPix05 = pSrc[-2] + pSrc[3];
-  int32_t iPix14 = pSrc[-1] + pSrc[2];
-  int32_t iPix23 = pSrc[ 0] + pSrc[1];
+  int32_t iPix05 = pSrc[0] + pSrc[5];
+  int32_t iPix14 = pSrc[1] + pSrc[4];
+  int32_t iPix23 = pSrc[2] + pSrc[3];
 
   return (iPix05 - (iPix14 * 5) + (iPix23 * 20));
 }
@@ -213,7 +213,7 @@
       iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride);
     }
     for (k = 0; k < iWidth; k++) {
-      pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[2 + k]) + 512) >> 10);
+      pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10);
     }
     pSrc += iSrcStride;
     pDst += iDstStride;
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -126,9 +126,9 @@
 }
 
 static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) {
-  int32_t iPix05 = pSrc[-2] + pSrc[3];
-  int32_t iPix14 = pSrc[-1] + pSrc[2];
-  int32_t iPix23 = pSrc[ 0] + pSrc[1];
+  int32_t iPix05 = pSrc[0] + pSrc[5];
+  int32_t iPix14 = pSrc[1] + pSrc[4];
+  int32_t iPix23 = pSrc[2] + pSrc[3];
 
   return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
 }
@@ -203,7 +203,7 @@
       pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
     }
     for (k = 0; k < 16; k++) {
-      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
+      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
     }
     pSrc += iSrcStride;
     pDst += iDstStride;
@@ -342,7 +342,7 @@
       pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
     }
     for (k = 0; k < iWidth; k++) {
-      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
+      pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
     }
     pSrc += iSrcStride;
     pDst += iDstStride;