ref: 23b20fb14cd7eb99fcefa9709517457fb3b5b3e1
parent: 279e14b34e7cb752eb82c0720512e08555a53b88
author: Martin Storsjö <[email protected]>
date: Mon Jan 26 17:57:20 EST 2015
Simplify code in HorFilterInput16bit in MC This avoids a gcc optimizer bug (which seems to be present in some gcc 4.6 and 4.7 versions) at the -O3 level.
--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -134,9 +134,9 @@
//--------------------Luma sample MC------------------//
static inline int32_t HorFilterInput16bit_c (const int16_t* pSrc) {
- int32_t iPix05 = pSrc[-2] + pSrc[3];
- int32_t iPix14 = pSrc[-1] + pSrc[2];
- int32_t iPix23 = pSrc[ 0] + pSrc[1];
+ int32_t iPix05 = pSrc[0] + pSrc[5];
+ int32_t iPix14 = pSrc[1] + pSrc[4];
+ int32_t iPix23 = pSrc[2] + pSrc[3];
return (iPix05 - (iPix14 * 5) + (iPix23 * 20));
}
@@ -213,7 +213,7 @@
iTmp[j] = FilterInput8bitWithStride_c (pSrc - 2 + j, iSrcStride);
}
for (k = 0; k < iWidth; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[2 + k]) + 512) >> 10);
+ pDst[k] = WelsClip1 ((HorFilterInput16bit_c (&iTmp[k]) + 512) >> 10);
}
pSrc += iSrcStride;
pDst += iDstStride;
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -126,9 +126,9 @@
}
static inline int32_t HorFilterInput16bit1_c (const int16_t* pSrc) {
- int32_t iPix05 = pSrc[-2] + pSrc[3];
- int32_t iPix14 = pSrc[-1] + pSrc[2];
- int32_t iPix23 = pSrc[ 0] + pSrc[1];
+ int32_t iPix05 = pSrc[0] + pSrc[5];
+ int32_t iPix14 = pSrc[1] + pSrc[4];
+ int32_t iPix23 = pSrc[2] + pSrc[3];
return (iPix05 - ((iPix14 << 2) + iPix14) + (iPix23 << 4) + (iPix23 << 2));
}
@@ -203,7 +203,7 @@
pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
}
for (k = 0; k < 16; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
+ pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
}
pSrc += iSrcStride;
pDst += iDstStride;
@@ -342,7 +342,7 @@
pTmp[j] = VerFilter_c (pSrc - 2 + j, iSrcStride);
}
for (k = 0; k < iWidth; k++) {
- pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[2 + k]) + 512) >> 10);
+ pDst[k] = WelsClip1 ((HorFilterInput16bit1_c (&pTmp[k]) + 512) >> 10);
}
pSrc += iSrcStride;
pDst += iDstStride;