ref: 36a2d2caa090cbcbedbca33dd207ccd9f9bee170
parent: e214351b0e485f68d0c7a03f726e8a694f0a8cbf
author: Janne Grunau <[email protected]>
date: Thu Oct 25 17:13:18 EDT 2018
arm: reverse jump tables Instead of bit reverting the register subtract 24 from clz. This avoids two empty filler values in the jump table with the same instruction count.
--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -72,6 +72,7 @@
push {r4-r6,lr}
ldr r4, [sp, #16]
ldr r5, [sp, #20]
+ clz r4, r4
.ifnc \type, avg
ldr lr, [sp, #24]
.endif
@@ -83,9 +84,8 @@
.ifc \type, mask
vmov.i8 q15, #256-2
.endif
- rbit r4, r4
adr r12, L(\type\()_tbl)
- clz r4, r4
+ sub r4, r4, #24
ldr r4, [r12, r4, lsl #2]
\type d16, d17, q0, q1, q2, q3
add r12, r12, r4
@@ -92,13 +92,12 @@
bx r12
.align 2
L(\type\()_tbl):
- .word 0, 0
- .word 4f - L(\type\()_tbl) + CONFIG_THUMB
- .word 80f - L(\type\()_tbl) + CONFIG_THUMB
- .word 160f - L(\type\()_tbl) + CONFIG_THUMB
- .word 320f - L(\type\()_tbl) + CONFIG_THUMB
- .word 640f - L(\type\()_tbl) + CONFIG_THUMB
.word 1280f - L(\type\()_tbl) + CONFIG_THUMB
+ .word 640f - L(\type\()_tbl) + CONFIG_THUMB
+ .word 320f - L(\type\()_tbl) + CONFIG_THUMB
+ .word 160f - L(\type\()_tbl) + CONFIG_THUMB
+ .word 80f - L(\type\()_tbl) + CONFIG_THUMB
+ .word 4f - L(\type\()_tbl) + CONFIG_THUMB
4:
add r6, r0, r1
lsl r1, r1, #1
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -96,6 +96,7 @@
.macro bidir_fn type
function \type\()_8bpc_neon, export=1
+ clz w4, w4
.ifc \type, w_avg
dup v30.8h, w6
neg v30.8h, v30.8h
@@ -104,9 +105,8 @@
.ifc \type, mask
movi v31.16b, #256-2
.endif
- rbit w4, w4
adr x7, L(\type\()_tbl)
- clz w4, w4
+ sub w4, w4, #24
\type v4, v0, v1
ldrh w4, [x7, x4, lsl #1]
\type v5, v2, v3
@@ -218,13 +218,12 @@
0:
ret
L(\type\()_tbl):
- .hword 0, 0
- .hword L(\type\()_tbl) - 4b
- .hword L(\type\()_tbl) - 8b
- .hword L(\type\()_tbl) - 160b
- .hword L(\type\()_tbl) - 320b
- .hword L(\type\()_tbl) - 640b
.hword L(\type\()_tbl) - 1280b
+ .hword L(\type\()_tbl) - 640b
+ .hword L(\type\()_tbl) - 320b
+ .hword L(\type\()_tbl) - 160b
+ .hword L(\type\()_tbl) - 8b
+ .hword L(\type\()_tbl) - 4b
endfunc
.endm