shithub: dav1d

Download patch

ref: a6711a5c2b12b74e4bc887c525c25a6981158930
parent: 39d6c599352bff68038500756488e80f9cd31295
author: Martin Storsjö <[email protected]>
date: Mon May 4 04:58:12 EDT 2020

arm64: itx: Fix the eob checking for dct_dct_64x16

Before this, we never did the early exit from the first pass.

Before:                               Cortex A53      A72      A73
inv_txfm_add_64x16_dct_dct_1_8bpc_neon:   7275.7   5198.3   5250.9
inv_txfm_add_64x16_dct_dct_2_8bpc_neon:   7276.1   5197.0   5251.3
inv_txfm_add_64x16_dct_dct_3_8bpc_neon:   7275.8   5196.2   5254.5
inv_txfm_add_64x16_dct_dct_4_8bpc_neon:   7273.6   5198.8   5254.2
After:
inv_txfm_add_64x16_dct_dct_1_8bpc_neon:   5187.8   3763.8   3735.0
inv_txfm_add_64x16_dct_dct_2_8bpc_neon:   7280.6   5185.6   5256.3
inv_txfm_add_64x16_dct_dct_3_8bpc_neon:   7270.7   5179.8   5250.3
inv_txfm_add_64x16_dct_dct_4_8bpc_neon:   7271.7   5212.4   5256.4

The other related variants didn't have this bug and properly exited
early when possible.

--- a/src/arm/64/itx.S
+++ b/src/arm/64/itx.S
@@ -3218,7 +3218,6 @@
         mov             w8,  #(16 - \i)
         cmp             w3,  w12
         b.lt            1f
-        ldrh            w12, [x13], #2
 .endif
         add             x7,  x2,  #(\i*2)
         mov             x8,  #16*2
@@ -3226,6 +3225,9 @@
         bl              inv_txfm_dct_clear_8x64_neon
         add             x6,  x4,  #(\i*64*2)
         bl              inv_txfm_horz_dct_64x8_neon
+.if \i < 8
+        ldrh            w12, [x13], #2
+.endif
 .endr
         b               3f