ref: c01b0f4f92e027fb2c500ab4ebbc09c102a561cb
parent: 6a43b8201f77f368786d7ec3c7594edfb1654907
parent: 5ba3ead01527e5f932c82252540e495029468833
author: ruil2 <[email protected]>
date: Mon Apr 21 07:05:38 EDT 2014
Merge pull request #716 from licaiguo/ForJS Specify accurate align information for mc copy and intrapred c functions
--- a/codec/common/inc/ls_defines.h
+++ b/codec/common/inc/ls_defines.h
@@ -51,11 +51,36 @@
#define LD16(a) (((struct tagUnaligned_16 *) (a))->l)
#define LD32(a) (((struct tagUnaligned_32 *) (a))->l)
#define LD64(a) (((struct tagUnaligned_64 *) (a))->l)
+
+#define STRUCTA(size, align) struct tagUnaligned_##size##_##align {\
+ uint##size##_t l; \
+} __attribute__ ((aligned(align)))
+STRUCTA(16,2);
+STRUCTA(32,2);
+STRUCTA(32,4);
+STRUCTA(64,2);
+STRUCTA(64,4);
+STRUCTA(64,8);
//#define _USE_STRUCT_INT_CVT
// #ifdef _USE_STRUCT_INT_CVT
#define ST16(a, b) (((struct tagUnaligned_16 *) (a))->l) = (b)
#define ST32(a, b) (((struct tagUnaligned_32 *) (a))->l) = (b)
#define ST64(a, b) (((struct tagUnaligned_64 *) (a))->l) = (b)
+
+#define LDA(a, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l)
+#define STA(a, b, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) = (b)
+#define LD16A2(a) LDA(a, 16, 2)
+#define LD32A2(a) LDA(a, 32, 2)
+#define LD32A4(a) LDA(a, 32, 4)
+#define LD64A2(a) LDA(a, 64, 2)
+#define LD64A4(a) LDA(a, 64, 4)
+#define LD64A8(a) LDA(a, 64, 8)
+#define ST16A2(a, b) STA(a, b, 16, 2)
+#define ST32A2(a, b) STA(a, b, 32, 2)
+#define ST32A4(a, b) STA(a, b, 32, 4)
+#define ST64A2(a, b) STA(a, b, 64, 2)
+#define ST64A4(a, b) STA(a, b, 64, 4)
+#define ST64A8(a, b) STA(a, b, 64, 8)
// #else
// inline void __ST16(void *dst, uint16_t v) { memcpy(dst, &v, 2); }
// inline void __ST32(void *dst, uint32_t v) { memcpy(dst, &v, 4); }
@@ -75,6 +100,18 @@
#define ST16(a, b) *((uint16_t*)(a)) = (b)
#define ST32(a, b) *((uint32_t*)(a)) = (b)
#define ST64(a, b) *((uint64_t*)(a)) = (b)
+#define LD16A2 LD16
+#define LD32A2 LD32
+#define LD32A4 LD32
+#define LD64A2 LD64
+#define LD64A4 LD64
+#define LD64A8 LD64
+#define ST16A2 ST16
+#define ST32A2 ST32
+#define ST32A4 ST32
+#define ST64A2 ST64
+#define ST64A4 ST64
+#define ST64A8 ST64
#endif /* !__GNUC__ */
--- a/codec/decoder/core/src/get_intra_predictor.cpp
+++ b/codec/decoder/core/src/get_intra_predictor.cpp
@@ -52,12 +52,12 @@
#define I16x16_COUNT 16
void WelsI4x4LumaPredV_c (uint8_t* pPred, const int32_t kiStride) {
- const uint32_t kuiVal = LD32 (pPred - kiStride);
+ const uint32_t kuiVal = LD32A4 (pPred - kiStride);
- ST32 (pPred , kuiVal);
- ST32 (pPred + kiStride , kuiVal);
- ST32 (pPred + (kiStride << 1) , kuiVal);
- ST32 (pPred + (kiStride << 1) + kiStride , kuiVal);
+ ST32A4 (pPred , kuiVal);
+ ST32A4 (pPred + kiStride , kuiVal);
+ ST32A4 (pPred + (kiStride << 1) , kuiVal);
+ ST32A4 (pPred + (kiStride << 1) + kiStride , kuiVal);
}
void WelsI4x4LumaPredH_c (uint8_t* pPred, const int32_t kiStride) {
@@ -68,10 +68,10 @@
const uint32_t kuiL2 = 0x01010101U * pPred[-1 + kiStride2];
const uint32_t kuiL3 = 0x01010101U * pPred[-1 + kiStride3];
- ST32 (pPred , kuiL0);
- ST32 (pPred + kiStride , kuiL1);
- ST32 (pPred + kiStride2, kuiL2);
- ST32 (pPred + kiStride3, kuiL3);
+ ST32A4 (pPred , kuiL0);
+ ST32A4 (pPred + kiStride , kuiL1);
+ ST32A4 (pPred + kiStride2, kuiL2);
+ ST32A4 (pPred + kiStride3, kuiL3);
}
void WelsI4x4LumaPredDc_c (uint8_t* pPred, const int32_t kiStride) {
@@ -81,10 +81,10 @@
pPred[-kiStride] + pPred[-kiStride + 1] + pPred[-kiStride + 2] + pPred[-kiStride + 3] + 4) >> 3;
const uint32_t kuiMean32 = 0x01010101U * kuiMean;
- ST32 (pPred , kuiMean32);
- ST32 (pPred + kiStride , kuiMean32);
- ST32 (pPred + kiStride2, kuiMean32);
- ST32 (pPred + kiStride3, kuiMean32);
+ ST32A4 (pPred , kuiMean32);
+ ST32A4 (pPred + kiStride , kuiMean32);
+ ST32A4 (pPred + kiStride2, kuiMean32);
+ ST32A4 (pPred + kiStride3, kuiMean32);
}
void WelsI4x4LumaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride) {
@@ -93,10 +93,10 @@
const uint8_t kuiMean = (pPred[-1] + pPred[-1 + kiStride] + pPred[-1 + kiStride2] + pPred[-1 + kiStride3] + 2) >> 2;
const uint32_t kuiMean32 = 0x01010101U * kuiMean;
- ST32 (pPred , kuiMean32);
- ST32 (pPred + kiStride , kuiMean32);
- ST32 (pPred + kiStride2, kuiMean32);
- ST32 (pPred + kiStride3, kuiMean32);
+ ST32A4 (pPred , kuiMean32);
+ ST32A4 (pPred + kiStride , kuiMean32);
+ ST32A4 (pPred + kiStride2, kuiMean32);
+ ST32A4 (pPred + kiStride3, kuiMean32);
}
void WelsI4x4LumaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) {
@@ -106,19 +106,19 @@
2;
const uint32_t kuiMean32 = 0x01010101U * kuiMean;
- ST32 (pPred , kuiMean32);
- ST32 (pPred + kiStride , kuiMean32);
- ST32 (pPred + kiStride2, kuiMean32);
- ST32 (pPred + kiStride3, kuiMean32);
+ ST32A4 (pPred , kuiMean32);
+ ST32A4 (pPred + kiStride , kuiMean32);
+ ST32A4 (pPred + kiStride2, kuiMean32);
+ ST32A4 (pPred + kiStride3, kuiMean32);
}
void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride) {
const uint32_t kuiDC32 = 0x80808080U;
- ST32 (pPred , kuiDC32);
- ST32 (pPred + kiStride , kuiDC32);
- ST32 (pPred + (kiStride << 1) , kuiDC32);
- ST32 (pPred + (kiStride << 1) + kiStride, kuiDC32);
+ ST32A4 (pPred , kuiDC32);
+ ST32A4 (pPred + kiStride , kuiDC32);
+ ST32A4 (pPred + (kiStride << 1) , kuiDC32);
+ ST32A4 (pPred + (kiStride << 1) + kiStride, kuiDC32);
}
/*down pLeft*/
@@ -144,10 +144,10 @@
const uint8_t kuiDDL6 = (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2; // kDDL6
const uint8_t kuiList[8] = { kuiDDL0, kuiDDL1, kuiDDL2, kuiDDL3, kuiDDL4, kuiDDL5, kuiDDL6, 0 };
- ST32 (pPred , LD32 (kuiList));
- ST32 (pPred + kiStride , LD32 (kuiList + 1));
- ST32 (pPred + kiStride2, LD32 (kuiList + 2));
- ST32 (pPred + kiStride3, LD32 (kuiList + 3));
+ ST32A4 (pPred , LD32 (kuiList));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 2));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 3));
}
/*down pLeft*/
@@ -170,10 +170,10 @@
const uint8_t kuiDLT3 = kuiT33 >> 1; // kDLT3
const uint8_t kuiList[8] = { kuiDLT0, kuiDLT1, kuiDLT2, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3 , kuiDLT3 };
- ST32 (pPred, LD32 (kuiList));
- ST32 (pPred + kiStride, LD32 (kuiList + 1));
- ST32 (pPred + kiStride2, LD32 (kuiList + 2));
- ST32 (pPred + kiStride3, LD32 (kuiList + 3));
+ ST32A4 (pPred, LD32 (kuiList));
+ ST32A4 (pPred + kiStride, LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 2));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 3));
}
@@ -210,10 +210,10 @@
const uint8_t kuiDDR6 = (kuiL12 + kuiL23) >> 2; // kuiDDR6
const uint8_t kuiList[8] = { kuiDDR6, kuiDDR5, kuiDDR4, kuiDDR0, kuiDDR1, kuiDDR2, kuiDDR3, 0 };
- ST32 (pPred , LD32 (kuiList + 3));
- ST32 (pPred + kiStride , LD32 (kuiList + 2));
- ST32 (pPred + kiStride2, LD32 (kuiList + 1));
- ST32 (pPred + kiStride3, LD32 (kuiList));
+ ST32A4 (pPred , LD32 (kuiList + 3));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 2));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList));
}
@@ -248,10 +248,10 @@
const uint8_t kuiVL9 = (kuiT45 + kuiT56) >> 2; // kuiVL9
const uint8_t kuiList[10] = { kuiVL0, kuiVL1, kuiVL2, kuiVL3, kuiVL4, kuiVL5, kuiVL6, kuiVL7, kuiVL8, kuiVL9 };
- ST32 (pPred, LD32 (kuiList));
- ST32 (pPred + kiStride, LD32 (kuiList + 5));
- ST32 (pPred + kiStride2, LD32 (kuiList + 1));
- ST32 (pPred + kiStride3, LD32 (kuiList + 6));
+ ST32A4 (pPred, LD32 (kuiList));
+ ST32A4 (pPred + kiStride, LD32 (kuiList + 5));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 6));
}
/*vertical pLeft*/
@@ -278,10 +278,10 @@
const uint8_t kuiVL7 = kuiVL3;
const uint8_t kuiList[10] = { kuiVL0, kuiVL1, kuiVL2, kuiVL3, kuiVL3, kuiVL4, kuiVL5, kuiVL6, kuiVL7, kuiVL7 };
- ST32 (pPred , LD32 (kuiList));
- ST32 (pPred + kiStride , LD32 (kuiList + 5));
- ST32 (pPred + kiStride2, LD32 (kuiList + 1));
- ST32 (pPred + kiStride3, LD32 (kuiList + 6));
+ ST32A4 (pPred , LD32 (kuiList));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 5));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 6));
}
@@ -310,10 +310,10 @@
const uint8_t kuiVR9 = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2; // kuiVR9
const uint8_t kuiList[10] = { kuiVR8, kuiVR0, kuiVR1, kuiVR2, kuiVR3, kuiVR9, kuiVR4, kuiVR5, kuiVR6, kuiVR7 };
- ST32 (pPred , LD32 (kuiList + 1));
- ST32 (pPred + kiStride , LD32 (kuiList + 6));
- ST32 (pPred + kiStride2, LD32 (kuiList));
- ST32 (pPred + kiStride3, LD32 (kuiList + 5));
+ ST32A4 (pPred , LD32 (kuiList + 1));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 6));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 5));
}
/*horizontal up*/
@@ -336,10 +336,10 @@
const uint8_t kuiHU5 = (1 + kuiL23 + (kuiL3 << 1)) >> 2;
const uint8_t kuiList[10] = { kuiHU0, kuiHU1, kuiHU2, kuiHU3, kuiHU4, kuiHU5, kuiL3, kuiL3, kuiL3, kuiL3 };
- ST32 (pPred , LD32 (kuiList));
- ST32 (pPred + kiStride , LD32 (kuiList + 2));
- ST32 (pPred + kiStride2, LD32 (kuiList + 4));
- ST32 (pPred + kiStride3, LD32 (kuiList + 6));
+ ST32A4 (pPred , LD32 (kuiList));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 2));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 4));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList + 6));
}
/*horizontal down*/
@@ -374,25 +374,25 @@
const uint8_t kuiHD9 = (kuiL12 + kuiL23) >> 2;
const uint8_t kuiList[10] = { kuiHD8, kuiHD9, kuiHD6, kuiHD7, kuiHD4, kuiHD5, kuiHD0, kuiHD1, kuiHD2, kuiHD3 };
- ST32 (pPred , LD32 (kuiList + 6));
- ST32 (pPred + kiStride , LD32 (kuiList + 4));
- ST32 (pPred + kiStride2, LD32 (kuiList + 2));
- ST32 (pPred + kiStride3, LD32 (kuiList));
+ ST32A4 (pPred , LD32 (kuiList + 6));
+ ST32A4 (pPred + kiStride , LD32 (kuiList + 4));
+ ST32A4 (pPred + kiStride2, LD32 (kuiList + 2));
+ ST32A4 (pPred + kiStride3, LD32 (kuiList));
}
void WelsIChromaPredV_c (uint8_t* pPred, const int32_t kiStride) {
- const uint64_t kuiVal64 = LD64 (&pPred[-kiStride]);
+ const uint64_t kuiVal64 = LD64A8 (&pPred[-kiStride]);
const int32_t kiStride2 = kiStride << 1;
const int32_t kiStride4 = kiStride2 << 1;
- ST64 (pPred , kuiVal64);
- ST64 (pPred + kiStride , kuiVal64);
- ST64 (pPred + kiStride2 , kuiVal64);
- ST64 (pPred + kiStride2 + kiStride , kuiVal64);
- ST64 (pPred + kiStride4 , kuiVal64);
- ST64 (pPred + kiStride4 + kiStride , kuiVal64);
- ST64 (pPred + kiStride4 + kiStride2 , kuiVal64);
- ST64 (pPred + (kiStride << 3) - kiStride , kuiVal64);
+ ST64A8 (pPred , kuiVal64);
+ ST64A8 (pPred + kiStride , kuiVal64);
+ ST64A8 (pPred + kiStride2 , kuiVal64);
+ ST64A8 (pPred + kiStride2 + kiStride , kuiVal64);
+ ST64A8 (pPred + kiStride4 , kuiVal64);
+ ST64A8 (pPred + kiStride4 + kiStride , kuiVal64);
+ ST64A8 (pPred + kiStride4 + kiStride2 , kuiVal64);
+ ST64A8 (pPred + (kiStride << 3) - kiStride , kuiVal64);
}
void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride) {
@@ -403,7 +403,7 @@
const uint8_t kuiVal8 = pPred[iTmp - 1];
const uint64_t kuiVal64 = 0x0101010101010101ULL * kuiVal8;
- ST64 (pPred + iTmp, kuiVal64);
+ ST64A8 (pPred + iTmp, kuiVal64);
iTmp -= kiStride;
} while (i-- > 0);
@@ -457,14 +457,14 @@
const uint64_t kuiUP64 = LD64 (kuiMUP);
const uint64_t kuiDN64 = LD64 (kuiMDown);
- ST64 (pPred , kuiUP64);
- ST64 (pPred + kiL1 + 1, kuiUP64);
- ST64 (pPred + kiL2 + 1, kuiUP64);
- ST64 (pPred + kiL3 + 1, kuiUP64);
- ST64 (pPred + kiL4 + 1, kuiDN64);
- ST64 (pPred + kiL5 + 1, kuiDN64);
- ST64 (pPred + kiL6 + 1, kuiDN64);
- ST64 (pPred + kiL7 + 1, kuiDN64);
+ ST64A8 (pPred , kuiUP64);
+ ST64A8 (pPred + kiL1 + 1, kuiUP64);
+ ST64A8 (pPred + kiL2 + 1, kuiUP64);
+ ST64A8 (pPred + kiL3 + 1, kuiUP64);
+ ST64A8 (pPred + kiL4 + 1, kuiDN64);
+ ST64A8 (pPred + kiL5 + 1, kuiDN64);
+ ST64A8 (pPred + kiL6 + 1, kuiDN64);
+ ST64A8 (pPred + kiL7 + 1, kuiDN64);
}
void WelsIChromaPredDcLeft_c (uint8_t* pPred, const int32_t kiStride) {
@@ -481,14 +481,14 @@
const uint64_t kuiUP64 = 0x0101010101010101ULL * kuiMUP;
const uint64_t kuiDN64 = 0x0101010101010101ULL * kuiMDown;
- ST64 (pPred , kuiUP64);
- ST64 (pPred + kiL1 + 1, kuiUP64);
- ST64 (pPred + kiL2 + 1, kuiUP64);
- ST64 (pPred + kiL3 + 1, kuiUP64);
- ST64 (pPred + kiL4 + 1, kuiDN64);
- ST64 (pPred + kiL5 + 1, kuiDN64);
- ST64 (pPred + kiL6 + 1, kuiDN64);
- ST64 (pPred + kiL7 + 1, kuiDN64);
+ ST64A8 (pPred , kuiUP64);
+ ST64A8 (pPred + kiL1 + 1, kuiUP64);
+ ST64A8 (pPred + kiL2 + 1, kuiUP64);
+ ST64A8 (pPred + kiL3 + 1, kuiUP64);
+ ST64A8 (pPred + kiL4 + 1, kuiDN64);
+ ST64A8 (pPred + kiL5 + 1, kuiDN64);
+ ST64A8 (pPred + kiL6 + 1, kuiDN64);
+ ST64A8 (pPred + kiL7 + 1, kuiDN64);
}
void WelsIChromaPredDcTop_c (uint8_t* pPred, const int32_t kiStride) {
@@ -502,7 +502,7 @@
uint8_t i = 7;
do {
- ST64 (pPred + iTmp, LD64 (kuiM));
+ ST64A8 (pPred + iTmp, LD64 (kuiM));
iTmp -= kiStride;
} while (i-- > 0);
@@ -514,7 +514,7 @@
uint8_t i = 7;
do {
- ST64 (pPred + iTmp, kuiDC64);
+ ST64A8 (pPred + iTmp, kuiDC64);
iTmp -= kiStride;
} while (i-- > 0);
@@ -522,13 +522,13 @@
void WelsI16x16LumaPredV_c (uint8_t* pPred, const int32_t kiStride) {
int32_t iTmp = (kiStride << 4) - kiStride;
- const uint64_t kuiTop1 = LD64 (pPred - kiStride);
- const uint64_t kuiTop2 = LD64 (pPred - kiStride + 8);
+ const uint64_t kuiTop1 = LD64A8 (pPred - kiStride);
+ const uint64_t kuiTop2 = LD64A8 (pPred - kiStride + 8);
uint8_t i = 15;
do {
- ST64 (pPred + iTmp , kuiTop1);
- ST64 (pPred + iTmp + 8, kuiTop2);
+ ST64A8 (pPred + iTmp , kuiTop1);
+ ST64A8 (pPred + iTmp + 8, kuiTop2);
iTmp -= kiStride;
} while (i-- > 0);
@@ -542,8 +542,8 @@
const uint8_t kuiVal8 = pPred[iTmp - 1];
const uint64_t kuiVal64 = 0x0101010101010101ULL * kuiVal8;
- ST64 (pPred + iTmp , kuiVal64);
- ST64 (pPred + iTmp + 8, kuiVal64);
+ ST64A8 (pPred + iTmp , kuiVal64);
+ ST64A8 (pPred + iTmp + 8, kuiVal64);
iTmp -= kiStride;
} while (i-- > 0);
@@ -633,8 +633,8 @@
iTmp = (kiStride << 4) - kiStride;
i = 15;
do {
- ST64 (pPred + iTmp , uiMean64);
- ST64 (pPred + iTmp + 8, uiMean64);
+ ST64A8 (pPred + iTmp , uiMean64);
+ ST64A8 (pPred + iTmp + 8, uiMean64);
iTmp -= kiStride;
} while (i-- > 0);
@@ -646,8 +646,8 @@
uint8_t i = 15;
do {
- ST64 (pPred + iTmp, kuiDC64);
- ST64 (pPred + iTmp + 8, kuiDC64);
+ ST64A8 (pPred + iTmp, kuiDC64);
+ ST64A8 (pPred + iTmp + 8, kuiDC64);
iTmp -= kiStride;
} while (i-- > 0);
--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -94,7 +94,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma
- ST16 (pDst, LD16 (pSrc));
+ ST16A2 (pDst, LD16 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -104,7 +104,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST32 (pDst, LD32 (pSrc));
+ ST32A4 (pDst, LD32 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -114,7 +114,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST64 (pDst, LD64 (pSrc));
+ ST64A8 (pDst, LD64 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -124,8 +124,8 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST64 (pDst , LD64 (pSrc));
- ST64 (pDst + 8, LD64 (pSrc + 8));
+ ST64A8 (pDst , LD64 (pSrc));
+ ST64A8 (pDst + 8, LD64 (pSrc + 8));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -202,7 +202,7 @@
static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
int32_t iHeight) {
- int16_t iTmp[16 + 5] = {0}; //16
+ int16_t iTmp[16 + 5]; //16
int32_t i, j, k;
for (i = 0; i < iHeight; i++) {