.text
.arch armv8-a+crypto
.macro load_round_keys, rk, nr, tmp
sub w\tmp, \nr,
add \tmp, \rk, w\tmp, sxtw
ld1 {v10.4s-v13.4s}, [\rk]
ld1 {v14.4s-v17.4s}, [\tmp],
ld1 {v18.4s-v21.4s}, [\tmp],
ld1 {v3.4s-v5.4s}, [\tmp]
.endm
.macro dround, va, vb, vk
aese \va\().16b, \vk\().16b
aesmc \va\().16b, \va\().16b
aese \vb\().16b, \vk\().16b
aesmc \vb\().16b, \vb\().16b
.endm
.macro aes_encrypt, va, vb, nr
tbz \nr,
dround \va, \vb, v10
dround \va, \vb, v11
tbz \nr,
dround \va, \vb, v12
dround \va, \vb, v13
.L\@: .irp v, v14, v15, v16, v17, v18, v19, v20, v21, v3
dround \va, \vb, \v
.endr
aese \va\().16b, v4.16b
aese \vb\().16b, v4.16b
.endm
.macro aes_ccm_do_crypt,enc
load_round_keys x3, w4, x10
ld1 {v0.16b}, [x5]
cbz x2, ce_aes_ccm_final
ldr x8, [x6,
CPU_LE( rev x8, x8 )
0:
ld1 {v1.8b}, [x6]
prfm pldl1strm, [x1]
add x8, x8,
rev x9, x8
ins v1.d[1], x9
aes_encrypt v0, v1, w4
subs w2, w2,
bmi ce_aes_ccm_crypt_tail
ld1 {v2.16b}, [x1],
.if \enc == 1
eor v2.16b, v2.16b, v5.16b
eor v6.16b, v1.16b, v2.16b
.else
eor v2.16b, v2.16b, v1.16b
eor v6.16b, v2.16b, v5.16b
.endif
eor v0.16b, v0.16b, v2.16b
st1 {v6.16b}, [x0],
bne 0b
CPU_LE( rev x8, x8 )
str x8, [x6,
cbnz x7, ce_aes_ccm_final
st1 {v0.16b}, [x5]
ret
.endm
SYM_FUNC_START_LOCAL(ce_aes_ccm_crypt_tail)
eor v0.16b, v0.16b, v5.16b
eor v1.16b, v1.16b, v5.16b
add x1, x1, w2, sxtw
add x0, x0, w2, sxtw
adr_l x8, .Lpermute
add x9, x8, w2, sxtw
sub x8, x8, w2, sxtw
ld1 {v7.16b-v8.16b}, [x9]
ld1 {v9.16b}, [x8]
ld1 {v2.16b}, [x1]
tbl v1.16b, {v1.16b}, v7.16b
eor v7.16b, v2.16b, v1.16b
bif v2.16b, v7.16b, v22.16b
tbx v7.16b, {v6.16b}, v8.16b
tbl v2.16b, {v2.16b}, v9.16b
eor v0.16b, v0.16b, v2.16b
st1 {v7.16b}, [x0]
cbz x7, 0f
SYM_INNER_LABEL(ce_aes_ccm_final, SYM_L_LOCAL)
ld1 {v1.16b}, [x7]
aes_encrypt v0, v1, w4
eor v0.16b, v0.16b, v1.16b
0: st1 {v0.16b}, [x5]
ret
SYM_FUNC_END(ce_aes_ccm_crypt_tail)
SYM_FUNC_START(ce_aes_ccm_encrypt)
movi v22.16b,
aes_ccm_do_crypt 1
SYM_FUNC_END(ce_aes_ccm_encrypt)
SYM_FUNC_START(ce_aes_ccm_decrypt)
movi v22.16b,
aes_ccm_do_crypt 0
SYM_FUNC_END(ce_aes_ccm_decrypt)
.section ".rodata", "a"
.align 6
.fill 15, 1, 0xff
.Lpermute:
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
.fill 15, 1, 0xff