ref: 94f2907dc40a6415a10c252cb9ba3971f1f7e838
dir: /third_party/boringssl/src/gen/bcm/aesv8-armv7-linux.S/
// This file is generated from a similarly-named Perl script in the BoringSSL // source tree. Do not edit by hand. #include <openssl/asm_base.h> #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_ARM) && defined(__ELF__) #include <openssl/arm_arch.h> #if __ARM_MAX_ARCH__>=7 .text .arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-) .fpu neon .code 32 #undef __thumb2__ .align 5 .Lrcon: .long 0x01,0x01,0x01,0x01 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat .long 0x1b,0x1b,0x1b,0x1b .text .globl aes_hw_set_encrypt_key .hidden aes_hw_set_encrypt_key .type aes_hw_set_encrypt_key,%function .align 5 aes_hw_set_encrypt_key: .Lenc_key: mov r3,#-2 cmp r1,#128 blt .Lenc_key_abort cmp r1,#256 bgt .Lenc_key_abort tst r1,#0x3f bne .Lenc_key_abort adr r3,.Lrcon cmp r1,#192 veor q0,q0,q0 vld1.8 {q3},[r0]! mov r1,#8 @ reuse r1 vld1.32 {q1,q2},[r3]! blt .Loop128 beq .L192 b .L256 .align 4 .Loop128: vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 bne .Loop128 vld1.32 {q1},[r3] vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 vtbl.8 d20,{q3},d4 vtbl.8 d21,{q3},d5 vext.8 q9,q0,q3,#12 vst1.32 {q3},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 veor q3,q3,q10 vst1.32 {q3},[r2] add r2,r2,#0x50 mov r12,#10 b .Ldone .align 4 .L192: vld1.8 {d16},[r0]! vmov.i8 q10,#8 @ borrow q10 vst1.32 {q3},[r2]! vsub.i8 q2,q2,q10 @ adjust the mask .Loop192: vtbl.8 d20,{q8},d4 vtbl.8 d21,{q8},d5 vext.8 q9,q0,q3,#12 vst1.32 {d16},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vdup.32 q9,d7[1] veor q9,q9,q8 veor q10,q10,q1 vext.8 q8,q0,q8,#12 vshl.u8 q1,q1,#1 veor q8,q8,q9 veor q3,q3,q10 veor q8,q8,q10 vst1.32 {q3},[r2]! bne .Loop192 mov r12,#12 add r2,r2,#0x20 b .Ldone .align 4 .L256: vld1.8 {q8},[r0] mov r1,#7 mov r12,#14 vst1.32 {q3},[r2]! .Loop256: vtbl.8 d20,{q8},d4 vtbl.8 d21,{q8},d5 vext.8 q9,q0,q3,#12 vst1.32 {q8},[r2]! .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 subs r1,r1,#1 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q3,q3,q9 vext.8 q9,q0,q9,#12 veor q10,q10,q1 veor q3,q3,q9 vshl.u8 q1,q1,#1 veor q3,q3,q10 vst1.32 {q3},[r2]! beq .Ldone vdup.32 q10,d7[1] vext.8 q9,q0,q8,#12 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 veor q8,q8,q9 vext.8 q9,q0,q9,#12 veor q8,q8,q9 vext.8 q9,q0,q9,#12 veor q8,q8,q9 veor q8,q8,q10 b .Loop256 .Ldone: str r12,[r2] mov r3,#0 .Lenc_key_abort: mov r0,r3 @ return value bx lr .size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key .globl aes_hw_set_decrypt_key .hidden aes_hw_set_decrypt_key .type aes_hw_set_decrypt_key,%function .align 5 aes_hw_set_decrypt_key: stmdb sp!,{r4,lr} bl .Lenc_key cmp r0,#0 bne .Ldec_key_abort sub r2,r2,#240 @ restore original r2 mov r4,#-16 add r0,r2,r12,lsl#4 @ end of key schedule vld1.32 {q0},[r2] vld1.32 {q1},[r0] vst1.32 {q0},[r0],r4 vst1.32 {q1},[r2]! .Loop_imc: vld1.32 {q0},[r2] vld1.32 {q1},[r0] .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 vst1.32 {q0},[r0],r4 vst1.32 {q1},[r2]! cmp r0,r2 bhi .Loop_imc vld1.32 {q0},[r2] .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 vst1.32 {q0},[r0] eor r0,r0,r0 @ return value .Ldec_key_abort: ldmia sp!,{r4,pc} .size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key .globl aes_hw_encrypt .hidden aes_hw_encrypt .type aes_hw_encrypt,%function .align 5 aes_hw_encrypt: AARCH64_VALID_CALL_TARGET ldr r3,[r2,#240] vld1.32 {q0},[r2]! vld1.8 {q2},[r0] sub r3,r3,#2 vld1.32 {q1},[r2]! .Loop_enc: .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q1},[r2]! bgt .Loop_enc .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 vld1.32 {q0},[r2] .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 veor q2,q2,q0 vst1.8 {q2},[r1] bx lr .size aes_hw_encrypt,.-aes_hw_encrypt .globl aes_hw_decrypt .hidden aes_hw_decrypt .type aes_hw_decrypt,%function .align 5 aes_hw_decrypt: AARCH64_VALID_CALL_TARGET ldr r3,[r2,#240] vld1.32 {q0},[r2]! vld1.8 {q2},[r0] sub r3,r3,#2 vld1.32 {q1},[r2]! .Loop_dec: .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q1},[r2]! bgt .Loop_dec .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 vld1.32 {q0},[r2] .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 veor q2,q2,q0 vst1.8 {q2},[r1] bx lr .size aes_hw_decrypt,.-aes_hw_decrypt .globl aes_hw_cbc_encrypt .hidden aes_hw_cbc_encrypt .type aes_hw_cbc_encrypt,%function .align 5 aes_hw_cbc_encrypt: mov ip,sp stmdb sp!,{r4,r5,r6,r7,r8,lr} vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ldmia ip,{r4,r5} @ load remaining args subs r2,r2,#16 mov r8,#16 blo .Lcbc_abort moveq r8,#0 cmp r5,#0 @ en- or decrypting? ldr r5,[r3,#240] and r2,r2,#-16 vld1.8 {q6},[r4] vld1.8 {q0},[r0],r8 vld1.32 {q8,q9},[r3] @ load key schedule... sub r5,r5,#6 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys sub r5,r5,#2 vld1.32 {q10,q11},[r7]! vld1.32 {q12,q13},[r7]! vld1.32 {q14,q15},[r7]! vld1.32 {q7},[r7] add r7,r3,#32 mov r6,r5 beq .Lcbc_dec cmp r5,#2 veor q0,q0,q6 veor q5,q8,q7 beq .Lcbc_enc128 vld1.32 {q2,q3},[r7] add r7,r3,#16 add r6,r3,#16*4 add r12,r3,#16*5 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 add r14,r3,#16*6 add r3,r3,#16*7 b .Lenter_cbc_enc .align 4 .Loop_cbc_enc: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vst1.8 {q6},[r1]! .Lenter_cbc_enc: .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q8},[r6] cmp r5,#4 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q9},[r12] beq .Lcbc_enc192 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q8},[r14] .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q9},[r3] nop .Lcbc_enc192: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 subs r2,r2,#16 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 moveq r8,#0 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.8 {q8},[r0],r8 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 veor q8,q8,q5 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.32 {q9},[r7] @ re-pre-load rndkey[1] .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 veor q6,q0,q7 bhs .Loop_cbc_enc vst1.8 {q6},[r1]! b .Lcbc_done .align 5 .Lcbc_enc128: vld1.32 {q2,q3},[r7] .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 b .Lenter_cbc_enc128 .Loop_cbc_enc128: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vst1.8 {q6},[r1]! .Lenter_cbc_enc128: .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 subs r2,r2,#16 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 moveq r8,#0 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.8 {q8},[r0],r8 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 veor q8,q8,q5 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 veor q6,q0,q7 bhs .Loop_cbc_enc128 vst1.8 {q6},[r1]! b .Lcbc_done .align 5 .Lcbc_dec: vld1.8 {q10},[r0]! subs r2,r2,#32 @ bias add r6,r5,#2 vorr q3,q0,q0 vorr q1,q0,q0 vorr q11,q10,q10 blo .Lcbc_dec_tail vorr q1,q10,q10 vld1.8 {q10},[r0]! vorr q2,q0,q0 vorr q3,q1,q1 vorr q11,q10,q10 .Loop3x_cbc_dec: .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q9},[r7]! bgt .Loop3x_cbc_dec .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q4,q6,q7 subs r2,r2,#0x30 veor q5,q2,q7 movlo r6,r2 @ r6, r6, is zero at this point .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q9,q3,q7 add r0,r0,r6 @ r0 is adjusted in such way that @ at exit from the loop q1-q10 @ are loaded with last "words" vorr q6,q11,q11 mov r7,r3 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.8 {q2},[r0]! .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.8 {q3},[r0]! .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.8 {q11},[r0]! .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] add r6,r5,#2 veor q4,q4,q0 veor q5,q5,q1 veor q10,q10,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] vst1.8 {q4},[r1]! vorr q0,q2,q2 vst1.8 {q5},[r1]! vorr q1,q3,q3 vst1.8 {q10},[r1]! vorr q10,q11,q11 bhs .Loop3x_cbc_dec cmn r2,#0x30 beq .Lcbc_done nop .Lcbc_dec_tail: .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.32 {q9},[r7]! bgt .Lcbc_dec_tail .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 cmn r2,#0x20 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q5,q6,q7 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q9,q3,q7 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 beq .Lcbc_dec_one veor q5,q5,q1 veor q9,q9,q10 vorr q6,q11,q11 vst1.8 {q5},[r1]! vst1.8 {q9},[r1]! b .Lcbc_done .Lcbc_dec_one: veor q5,q5,q10 vorr q6,q11,q11 vst1.8 {q5},[r1]! .Lcbc_done: vst1.8 {q6},[r4] .Lcbc_abort: vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ldmia sp!,{r4,r5,r6,r7,r8,pc} .size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt .globl aes_hw_ctr32_encrypt_blocks .hidden aes_hw_ctr32_encrypt_blocks .type aes_hw_ctr32_encrypt_blocks,%function .align 5 aes_hw_ctr32_encrypt_blocks: mov ip,sp stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ldr r4, [ip] @ load remaining arg ldr r5,[r3,#240] ldr r8, [r4, #12] vld1.32 {q0},[r4] vld1.32 {q8,q9},[r3] @ load key schedule... sub r5,r5,#4 mov r12,#16 cmp r2,#2 add r7,r3,r5,lsl#4 @ pointer to last 5 round keys sub r5,r5,#2 vld1.32 {q12,q13},[r7]! vld1.32 {q14,q15},[r7]! vld1.32 {q7},[r7] add r7,r3,#32 mov r6,r5 movlo r12,#0 @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are @ affected by silicon errata #1742098 [0] and #1655431 [1], @ respectively, where the second instruction of an aese/aesmc @ instruction pair may execute twice if an interrupt is taken right @ after the first instruction consumes an input register of which a @ single 32-bit lane has been updated the last time it was modified. @ @ This function uses a counter in one 32-bit lane. The @ could write to q1 and q10 directly, but that trips this bugs. @ We write to q6 and copy to the final register as a workaround. @ @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice #ifndef __ARMEB__ rev r8, r8 #endif add r10, r8, #1 vorr q6,q0,q0 rev r10, r10 vmov.32 d13[1],r10 add r8, r8, #2 vorr q1,q6,q6 bls .Lctr32_tail rev r12, r8 vmov.32 d13[1],r12 sub r2,r2,#3 @ bias vorr q10,q6,q6 b .Loop3x_ctr32 .align 4 .Loop3x_ctr32: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.32 {q9},[r7]! bgt .Loop3x_ctr32 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 vld1.8 {q2},[r0]! add r9,r8,#1 .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.8 {q3},[r0]! rev r9,r9 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vld1.8 {q11},[r0]! mov r7,r3 .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q2,q2,q7 add r10,r8,#2 .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 veor q3,q3,q7 add r8,r8,#3 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 @ Note the logic to update q0, q1, and q1 is written to work @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in @ 32-bit mode. See the comment above. veor q11,q11,q7 vmov.32 d13[1], r9 .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vorr q0,q6,q6 rev r10,r10 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 vmov.32 d13[1], r10 rev r12,r8 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vorr q1,q6,q6 vmov.32 d13[1], r12 .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vorr q10,q6,q6 subs r2,r2,#3 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 veor q2,q2,q4 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] vst1.8 {q2},[r1]! veor q3,q3,q5 mov r6,r5 vst1.8 {q3},[r1]! veor q11,q11,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] vst1.8 {q11},[r1]! bhs .Loop3x_ctr32 adds r2,r2,#3 beq .Lctr32_done cmp r2,#1 mov r12,#16 moveq r12,#0 .Lctr32_tail: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.32 {q9},[r7]! bgt .Lctr32_tail .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q2},[r0],r12 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q3},[r0] .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q2,q2,q7 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q3,q3,q7 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 cmp r2,#1 veor q2,q2,q0 veor q3,q3,q1 vst1.8 {q2},[r1]! beq .Lctr32_done vst1.8 {q3},[r1] .Lctr32_done: vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} .size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks #endif #endif // !OPENSSL_NO_ASM && defined(OPENSSL_ARM) && defined(__ELF__)