.arch armv8-a+crypto
.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
.set .Lv\b\().4s, \b
.endr
.macro sm4e, vd, vn
.inst 0xcec08400 | (.L\vn << 5) | .L\vd
.endm
mov vctr.d[1], x8; \
mov vctr.d[0], x7; \
adds x8, x8,
rev64 vctr.16b, vctr.16b; \
adc x7, x7, xzr;
.align 3
SYM_FUNC_START(sm4_ce_cbcmac_update)
SM4_PREPARE(x0)
ld1 {RMAC.16b}, [x1]
.Lcbcmac_loop_4x:
cmp w3,
blt .Lcbcmac_loop_1x
sub w3, w3,
ld1 {v0.16b-v3.16b}, [x2],
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v1.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v2.16b
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v3.16b
cbz w3, .Lcbcmac_end
b .Lcbcmac_loop_4x
.Lcbcmac_loop_1x:
sub w3, w3,
ld1 {v0.16b}, [x2],
SM4_CRYPT_BLK(RMAC)
eor RMAC.16b, RMAC.16b, v0.16b
cbnz w3, .Lcbcmac_loop_1x
.Lcbcmac_end:
st1 {RMAC.16b}, [x1]
ret
SYM_FUNC_END(sm4_ce_cbcmac_update)
.align 3
SYM_FUNC_START(sm4_ce_ccm_final)
SM4_PREPARE(x0)
ld1 {RMAC.16b}, [x2]
ld1 {v0.16b}, [x1]
SM4_CRYPT_BLK2(RMAC, v0)
eor RMAC.16b, RMAC.16b, v0.16b
st1 {RMAC.16b}, [x2]
ret
SYM_FUNC_END(sm4_ce_ccm_final)
.align 3
SYM_TYPED_FUNC_START(sm4_ce_ccm_enc)
SM4_PREPARE(x0)
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
ld1 {RMAC.16b}, [x5]
.Lccm_enc_loop_4x:
cmp w4,
blt .Lccm_enc_loop_1x
sub w4, w4,
inc_le128(v8)
inc_le128(v9)
inc_le128(v10)
inc_le128(v11)
ld1 {v0.16b-v3.16b}, [x2],
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v0.16b
SM4_CRYPT_BLK2(v9, RMAC)
eor v9.16b, v9.16b, v1.16b
eor RMAC.16b, RMAC.16b, v1.16b
SM4_CRYPT_BLK2(v10, RMAC)
eor v10.16b, v10.16b, v2.16b
eor RMAC.16b, RMAC.16b, v2.16b
SM4_CRYPT_BLK2(v11, RMAC)
eor v11.16b, v11.16b, v3.16b
eor RMAC.16b, RMAC.16b, v3.16b
st1 {v8.16b-v11.16b}, [x1],
cbz w4, .Lccm_enc_end
b .Lccm_enc_loop_4x
.Lccm_enc_loop_1x:
cmp w4,
blt .Lccm_enc_tail
sub w4, w4,
inc_le128(v8)
ld1 {v0.16b}, [x2],
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v0.16b
st1 {v8.16b}, [x1],
cbz w4, .Lccm_enc_end
b .Lccm_enc_loop_1x
.Lccm_enc_tail:
inc_le128(v8)
SM4_CRYPT_BLK2(RMAC, v8)
st1 {RMAC.16b}, [x5]
.Lccm_enc_tail_loop:
ldrb w0, [x2],
umov w9, v8.b[0]
umov w6, RMAC.b[0]
eor w9, w9, w0
eor w6, w6, w0
strb w9, [x1],
strb w6, [x5],
subs w4, w4,
beq .Lccm_enc_ret
ext RMAC.16b, RMAC.16b, RMAC.16b,
ext v8.16b, v8.16b, v8.16b,
b .Lccm_enc_tail_loop
.Lccm_enc_end:
st1 {RMAC.16b}, [x5]
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
.Lccm_enc_ret:
ret
SYM_FUNC_END(sm4_ce_ccm_enc)
.align 3
SYM_TYPED_FUNC_START(sm4_ce_ccm_dec)
SM4_PREPARE(x0)
ldp x7, x8, [x3]
rev x7, x7
rev x8, x8
ld1 {RMAC.16b}, [x5]
.Lccm_dec_loop_4x:
cmp w4,
blt .Lccm_dec_loop_1x
sub w4, w4,
inc_le128(v8)
inc_le128(v9)
inc_le128(v10)
inc_le128(v11)
ld1 {v0.16b-v3.16b}, [x2],
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v8.16b
SM4_CRYPT_BLK2(v9, RMAC)
eor v9.16b, v9.16b, v1.16b
eor RMAC.16b, RMAC.16b, v9.16b
SM4_CRYPT_BLK2(v10, RMAC)
eor v10.16b, v10.16b, v2.16b
eor RMAC.16b, RMAC.16b, v10.16b
SM4_CRYPT_BLK2(v11, RMAC)
eor v11.16b, v11.16b, v3.16b
eor RMAC.16b, RMAC.16b, v11.16b
st1 {v8.16b-v11.16b}, [x1],
cbz w4, .Lccm_dec_end
b .Lccm_dec_loop_4x
.Lccm_dec_loop_1x:
cmp w4,
blt .Lccm_dec_tail
sub w4, w4,
inc_le128(v8)
ld1 {v0.16b}, [x2],
SM4_CRYPT_BLK2(v8, RMAC)
eor v8.16b, v8.16b, v0.16b
eor RMAC.16b, RMAC.16b, v8.16b
st1 {v8.16b}, [x1],
cbz w4, .Lccm_dec_end
b .Lccm_dec_loop_1x
.Lccm_dec_tail:
inc_le128(v8)
SM4_CRYPT_BLK2(RMAC, v8)
st1 {RMAC.16b}, [x5]
.Lccm_dec_tail_loop:
ldrb w0, [x2],
umov w9, v8.b[0]
umov w6, RMAC.b[0]
eor w9, w9, w0
eor w6, w6, w9
strb w9, [x1],
strb w6, [x5],
subs w4, w4,
beq .Lccm_dec_ret
ext RMAC.16b, RMAC.16b, RMAC.16b,
ext v8.16b, v8.16b, v8.16b,
b .Lccm_dec_tail_loop
.Lccm_dec_end:
st1 {RMAC.16b}, [x5]
rev x7, x7
rev x8, x8
stp x7, x8, [x3]
.Lccm_dec_ret:
ret
SYM_FUNC_END(sm4_ce_ccm_dec)