ref: 039a54780478b233626007993b49d43b51709ff1
parent: 427da1c990923fa68632f4a7b99294da78cbfeb2
author: Licai Guo <[email protected]>
date: Fri Apr 18 20:33:23 EDT 2014
give accurate align information for mc copy functions this can improve the performance for target like javascript
--- a/codec/common/inc/ls_defines.h
+++ b/codec/common/inc/ls_defines.h
@@ -51,11 +51,36 @@
#define LD16(a) (((struct tagUnaligned_16 *) (a))->l)
#define LD32(a) (((struct tagUnaligned_32 *) (a))->l)
#define LD64(a) (((struct tagUnaligned_64 *) (a))->l)
+
+#define STRUCTA(size, align) struct tagUnaligned_##size##_##align {\
+ uint##size##_t l; \
+} __attribute__ ((aligned(align)))
+STRUCTA(16,2);
+STRUCTA(32,2);
+STRUCTA(32,4);
+STRUCTA(64,2);
+STRUCTA(64,4);
+STRUCTA(64,8);
//#define _USE_STRUCT_INT_CVT
// #ifdef _USE_STRUCT_INT_CVT
#define ST16(a, b) (((struct tagUnaligned_16 *) (a))->l) = (b)
#define ST32(a, b) (((struct tagUnaligned_32 *) (a))->l) = (b)
#define ST64(a, b) (((struct tagUnaligned_64 *) (a))->l) = (b)
+
+#define LDA(a, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l)
+#define STA(a, b, size, align) (((struct tagUnaligned_##size##_##align *) (a))->l) = (b)
+#define LD16A2(a) LDA(a, 16, 2)
+#define LD32A2(a) LDA(a, 32, 2)
+#define LD32A4(a) LDA(a, 32, 4)
+#define LD64A2(a) LDA(a, 64, 2)
+#define LD64A4(a) LDA(a, 64, 4)
+#define LD64A8(a) LDA(a, 64, 8)
+#define ST16A2(a, b) STA(a, b, 16, 2)
+#define ST32A2(a, b) STA(a, b, 32, 2)
+#define ST32A4(a, b) STA(a, b, 32, 4)
+#define ST64A2(a, b) STA(a, b, 64, 2)
+#define ST64A4(a, b) STA(a, b, 64, 4)
+#define ST64A8(a, b) STA(a, b, 64, 8)
// #else
// inline void __ST16(void *dst, uint16_t v) { memcpy(dst, &v, 2); }
// inline void __ST32(void *dst, uint32_t v) { memcpy(dst, &v, 4); }
@@ -75,6 +100,18 @@
#define ST16(a, b) *((uint16_t*)(a)) = (b)
#define ST32(a, b) *((uint32_t*)(a)) = (b)
#define ST64(a, b) *((uint64_t*)(a)) = (b)
+#define LD16A2 LD16
+#define LD32A2 LD32
+#define LD32A4 LD32
+#define LD64A2 LD64
+#define LD64A4 LD64
+#define LD64A8 LD64
+#define ST16A2 ST16
+#define ST32A2 ST32
+#define ST32A4 ST32
+#define ST64A2 ST64
+#define ST64A4 ST64
+#define ST64A8 ST64
#endif /* !__GNUC__ */
--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -94,7 +94,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) { // iWidth == 2 only for chroma
- ST16 (pDst, LD16 (pSrc));
+ ST16A2 (pDst, LD16 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -104,7 +104,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST32 (pDst, LD32 (pSrc));
+ ST32A4 (pDst, LD32 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -114,7 +114,7 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST64 (pDst, LD64 (pSrc));
+ ST64A8 (pDst, LD64 (pSrc));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -124,8 +124,8 @@
int32_t iHeight) {
int32_t i;
for (i = 0; i < iHeight; i++) {
- ST64 (pDst , LD64 (pSrc));
- ST64 (pDst + 8, LD64 (pSrc + 8));
+ ST64A8 (pDst , LD64 (pSrc));
+ ST64A8 (pDst + 8, LD64 (pSrc + 8));
pDst += iDstStride;
pSrc += iSrcStride;
}
@@ -202,7 +202,7 @@
static inline void McHorVer22_c (const uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride, int32_t iWidth,
int32_t iHeight) {
- int16_t iTmp[16 + 5] = {0}; //16
+ int16_t iTmp[16 + 5]; //16
int32_t i, j, k;
for (i = 0; i < iHeight; i++) {