ref: 86b3a999d548ae9d544029d262b56ad0b30132f0
parent: 3a1cc63649759469ebbec91da3aaa21719b9a96f
author: Martin Storsjö <[email protected]>
date: Tue Jul 8 05:29:23 EDT 2014
Use mov.16b instead of mov.8h According to the arm architecture reference manual, the mov (vector) instruction can only use the arrangement specifiers '8b' and '16b'. The apple tools still accept the '8h' form, but it assembles into the same as '16b'. (When copying a vector register to another, the element size in the vectors don't matter.) This fixes building with gnu binutils.
--- a/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
+++ b/codec/encoder/core/arm64/reconstruct_aarch64_neon.S
@@ -73,7 +73,7 @@
cmgt $4.8h, $0.8h, #0 // if true, location of coef == 11111111
bif $3.16b, $1.16b, $4.16b // if (x<0) reserved part; else keep 0 untouched
shl $3.8h, $3.8h, #1
- mov.8h $6, $1
+ mov.16b $6, $1
sub $1.8h, $1.8h, $3.8h // if x > 0, -= 0; else x-= 2x
// }
.endm
@@ -315,7 +315,7 @@
cmgt \arg4\().8h, \arg0\().8h, #0 // if true, location of coef == 11111111
bif \arg3\().16b, \arg1\().16b, \arg4\().16b // if (x<0) reserved part; else keep 0 untouched
shl \arg3\().8h, \arg3\().8h, #1
-mov \arg6\().8h, \arg1\().8h
+mov \arg6\().16b, \arg1\().16b
sub \arg1\().8h, \arg1\().8h, \arg3\().8h // if x > 0, -= 0; else x-= 2x
// }
.endm
@@ -533,7 +533,7 @@
ld1 {v2.8h}, [x1]
ld1 {v0.8h, v1.8h}, [x0]
ld1 {v3.8h}, [x2]
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7
st1 {v2.8h}, [x0], #16
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
@@ -545,7 +545,7 @@
ld1 {v0.8h, v1.8h}, [x0]
dup v2.8h, w1 // even ff range [0, 768]
dup v3.8h, w2
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v2, v3, v5, v6, v7
st1 {v2.8h}, [x0], #16
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
@@ -559,10 +559,10 @@
.rept 4
ld1 {v0.8h, v1.8h}, [x0], #32
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v0, v4, v3, v5, v6, v7
st1 {v4.8h}, [x1], #16
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS v1, v4, v3, v5, v6, v7
st1 {v4.8h}, [x1], #16
.endr
@@ -575,18 +575,18 @@
mov x1, x0
ld1 {v0.8h, v1.8h}, [x0], #32
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16
st1 {v4.8h}, [x1], #16
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17
st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17
ld1 {v0.8h, v1.8h}, [x0], #32
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18
st1 {v4.8h}, [x1], #16
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19
st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19
@@ -593,18 +593,18 @@
SELECT_MAX_IN_ABS_COEF v16, v17, v18, v19, h20, h21
ld1 {v0.8h, v1.8h}, [x0], #32
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v16
st1 {v4.8h}, [x1], #16
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v17
st1 {v4.8h}, [x1], #16 // then 1st 16 elem in v16 & v17
ld1 {v0.8h, v1.8h}, [x0], #32
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v0, v4, v3, v5, v6, v7, v18
st1 {v4.8h}, [x1], #16
- mov.8h v4, v2
+ mov.16b v4, v2
NEWQUANT_COEF_EACH_16BITS_MAX v1, v4, v3, v5, v6, v7, v19
st1 {v4.8h}, [x1], #16 // then 2st 16 elem in v18 & v19
@@ -944,4 +944,4 @@
st1 {v3.16b}, [x0], x1
.endr
WELS_ASM_AARCH64_FUNC_END
-#endif
\ No newline at end of file
+#endif