shithub: openh264

Download patch

ref: a4f59bc0d7505b35535e002be1cdcebeb3622207
parent: 4062fa9d34c6e9bc3d5b7ea491bcf9096d323011
author: dongzhang <[email protected]>
date: Thu Apr 17 11:16:04 EDT 2014

Modify ARM32 Neon code for Expand Chroma Picture, when UVWidth%16==8.

--- a/codec/common/arm/expand_picture_neon.S
+++ b/codec/common/arm/expand_picture_neon.S
@@ -87,7 +87,7 @@
 
 
 WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon
-    stmdb sp!, {r4-r8}
+    stmdb sp!, {r4-r9}
 	//Save the dst
 	mov r7, r0
 	mov r8, r3
@@ -109,12 +109,14 @@
 
 	//for the top and bottom expand
 	add r2, #32
+        mov r9, r2
+        bic r2, #15
 	sub r0, #16
 	mla r4, r1, r3, r0
 	sub r4, r1
 _expand_picture_chroma_loop0:
 	mov r5, #16
-    mls r5, r5, r1, r0
+        mls r5, r5, r1, r0
 	add r6, r4, r1
 	vld1.8 {q0}, [r0]!
 	vld1.8 {q1}, [r4]!
@@ -124,7 +126,7 @@
 	vst1.8 {q0}, [r5], r1
 	vst1.8 {q1}, [r6], r1
 	subs r8, #1
-    bne _expand_picture_chroma_loop1
+        bne _expand_picture_chroma_loop1
 
 	subs r2, #16
 	bne	_expand_picture_chroma_loop0
@@ -131,7 +133,23 @@
 
     //vldreq.32 d0, [r0]
 
-	ldmia sp!, {r4-r8}
+        and r9, #15
+        cmp r9, #8
+        bne _expand_picture_chroma_end
+	mov r5, #16
+        mls r5, r5, r1, r0
+	add r6, r4, r1
+	vld1.8 {d0}, [r0]!
+	vld1.8 {d2}, [r4]!
+	mov r8, #16
+_expand_picture_chroma_loop3:
+	vst1.8 {d0}, [r5], r1
+	vst1.8 {d2}, [r6], r1
+	subs r8, #1
+        bne _expand_picture_chroma_loop3
+_expand_picture_chroma_end:
+
+	ldmia sp!, {r4-r9}
 WELS_ASM_FUNC_END
 
 #endif
--- a/codec/encoder/core/src/expand_pic.cpp
+++ b/codec/encoder/core/src/expand_pic.cpp
@@ -132,7 +132,7 @@
 #if defined(HAVE_NEON)
   if (kuiCPUFlag & WELS_CPU_NEON) {
     pFuncList->pfExpandLumaPicture	= ExpandPictureLuma_neon;
-    pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_c;
+    pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_neon;
     pFuncList->pfExpandChromaPicture[1] = ExpandPictureChroma_neon;
   }
 #endif//HAVE_NEON