Path: blob/main/sys/crypto/openssl/aarch64/aesv8-armx.S
39536 views
/* Do not modify. This file is auto-generated from aesv8-armx.pl. */1#include "arm_arch.h"23#if __ARM_MAX_ARCH__>=74.arch armv8-a+crypto5.text6.section .rodata7.align 58.Lrcon:9.long 0x01,0x01,0x01,0x0110.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat11.long 0x1b,0x1b,0x1b,0x1b12.previous13.globl aes_v8_set_encrypt_key14.type aes_v8_set_encrypt_key,%function15.align 516aes_v8_set_encrypt_key:17.Lenc_key:18AARCH64_VALID_CALL_TARGET19// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.20stp x29,x30,[sp,#-16]!21add x29,sp,#022mov x3,#-123cmp x0,#024b.eq .Lenc_key_abort25cmp x2,#026b.eq .Lenc_key_abort27mov x3,#-228cmp w1,#12829b.lt .Lenc_key_abort30cmp w1,#25631b.gt .Lenc_key_abort32tst w1,#0x3f33b.ne .Lenc_key_abort3435adrp x3,.Lrcon36add x3,x3,#:lo12:.Lrcon37cmp w1,#1923839eor v0.16b,v0.16b,v0.16b40ld1 {v3.16b},[x0],#1641mov w1,#8 // reuse w142ld1 {v1.4s,v2.4s},[x3],#324344b.lt .Loop12845b.eq .L19246b .L2564748.align 449.Loop128:50tbl v6.16b,{v3.16b},v2.16b51ext v5.16b,v0.16b,v3.16b,#1252st1 {v3.4s},[x2],#1653aese v6.16b,v0.16b54subs w1,w1,#15556eor v3.16b,v3.16b,v5.16b57ext v5.16b,v0.16b,v5.16b,#1258eor v3.16b,v3.16b,v5.16b59ext v5.16b,v0.16b,v5.16b,#1260eor v6.16b,v6.16b,v1.16b61eor v3.16b,v3.16b,v5.16b62shl v1.16b,v1.16b,#163eor v3.16b,v3.16b,v6.16b64b.ne .Loop1286566ld1 {v1.4s},[x3]6768tbl v6.16b,{v3.16b},v2.16b69ext v5.16b,v0.16b,v3.16b,#1270st1 {v3.4s},[x2],#1671aese v6.16b,v0.16b7273eor v3.16b,v3.16b,v5.16b74ext v5.16b,v0.16b,v5.16b,#1275eor v3.16b,v3.16b,v5.16b76ext v5.16b,v0.16b,v5.16b,#1277eor v6.16b,v6.16b,v1.16b78eor v3.16b,v3.16b,v5.16b79shl v1.16b,v1.16b,#180eor v3.16b,v3.16b,v6.16b8182tbl v6.16b,{v3.16b},v2.16b83ext v5.16b,v0.16b,v3.16b,#1284st1 {v3.4s},[x2],#1685aese v6.16b,v0.16b8687eor v3.16b,v3.16b,v5.16b88ext v5.16b,v0.16b,v5.16b,#1289eor v3.16b,v3.16b,v5.16b90ext v5.16b,v0.16b,v5.16b,#1291eor v6.16b,v6.16b,v1.16b92eor v3.16b,v3.16b,v5.16b93eor v3.16b,v3.16b,v6.16b94st1 {v3.4s},[x2]95add x2,x2,#0x509697mov w12,#1098b .Ldone99100.align 4101.L192:102ld1 {v4.8b},[x0],#8103movi v6.16b,#8 // borrow v6.16b104st1 {v3.4s},[x2],#16105sub v2.16b,v2.16b,v6.16b // adjust the mask106107.Loop192:108tbl v6.16b,{v4.16b},v2.16b109ext v5.16b,v0.16b,v3.16b,#12110#ifdef __AARCH64EB__111st1 {v4.4s},[x2],#16112sub x2,x2,#8113#else114st1 {v4.8b},[x2],#8115#endif116aese v6.16b,v0.16b117subs w1,w1,#1118119eor v3.16b,v3.16b,v5.16b120ext v5.16b,v0.16b,v5.16b,#12121eor v3.16b,v3.16b,v5.16b122ext v5.16b,v0.16b,v5.16b,#12123eor v3.16b,v3.16b,v5.16b124125dup v5.4s,v3.s[3]126eor v5.16b,v5.16b,v4.16b127eor v6.16b,v6.16b,v1.16b128ext v4.16b,v0.16b,v4.16b,#12129shl v1.16b,v1.16b,#1130eor v4.16b,v4.16b,v5.16b131eor v3.16b,v3.16b,v6.16b132eor v4.16b,v4.16b,v6.16b133st1 {v3.4s},[x2],#16134b.ne .Loop192135136mov w12,#12137add x2,x2,#0x20138b .Ldone139140.align 4141.L256:142ld1 {v4.16b},[x0]143mov w1,#7144mov w12,#14145st1 {v3.4s},[x2],#16146147.Loop256:148tbl v6.16b,{v4.16b},v2.16b149ext v5.16b,v0.16b,v3.16b,#12150st1 {v4.4s},[x2],#16151aese v6.16b,v0.16b152subs w1,w1,#1153154eor v3.16b,v3.16b,v5.16b155ext v5.16b,v0.16b,v5.16b,#12156eor v3.16b,v3.16b,v5.16b157ext v5.16b,v0.16b,v5.16b,#12158eor v6.16b,v6.16b,v1.16b159eor v3.16b,v3.16b,v5.16b160shl v1.16b,v1.16b,#1161eor v3.16b,v3.16b,v6.16b162st1 {v3.4s},[x2],#16163b.eq .Ldone164165dup v6.4s,v3.s[3] // just splat166ext v5.16b,v0.16b,v4.16b,#12167aese v6.16b,v0.16b168169eor v4.16b,v4.16b,v5.16b170ext v5.16b,v0.16b,v5.16b,#12171eor v4.16b,v4.16b,v5.16b172ext v5.16b,v0.16b,v5.16b,#12173eor v4.16b,v4.16b,v5.16b174175eor v4.16b,v4.16b,v6.16b176b .Loop256177178.Ldone:179str w12,[x2]180mov x3,#0181182.Lenc_key_abort:183mov x0,x3 // return value184ldr x29,[sp],#16185ret186.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key187188.globl aes_v8_set_decrypt_key189.type aes_v8_set_decrypt_key,%function190.align 5191aes_v8_set_decrypt_key:192AARCH64_SIGN_LINK_REGISTER193stp x29,x30,[sp,#-16]!194add x29,sp,#0195bl .Lenc_key196197cmp x0,#0198b.ne .Ldec_key_abort199200sub x2,x2,#240 // restore original x2201mov x4,#-16202add x0,x2,x12,lsl#4 // end of key schedule203204ld1 {v0.4s},[x2]205ld1 {v1.4s},[x0]206st1 {v0.4s},[x0],x4207st1 {v1.4s},[x2],#16208209.Loop_imc:210ld1 {v0.4s},[x2]211ld1 {v1.4s},[x0]212aesimc v0.16b,v0.16b213aesimc v1.16b,v1.16b214st1 {v0.4s},[x0],x4215st1 {v1.4s},[x2],#16216cmp x0,x2217b.hi .Loop_imc218219ld1 {v0.4s},[x2]220aesimc v0.16b,v0.16b221st1 {v0.4s},[x0]222223eor x0,x0,x0 // return value224.Ldec_key_abort:225ldp x29,x30,[sp],#16226AARCH64_VALIDATE_LINK_REGISTER227ret228.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key229.globl aes_v8_encrypt230.type aes_v8_encrypt,%function231.align 5232aes_v8_encrypt:233AARCH64_VALID_CALL_TARGET234ldr w3,[x2,#240]235ld1 {v0.4s},[x2],#16236ld1 {v2.16b},[x0]237sub w3,w3,#2238ld1 {v1.4s},[x2],#16239240.Loop_enc:241aese v2.16b,v0.16b242aesmc v2.16b,v2.16b243ld1 {v0.4s},[x2],#16244subs w3,w3,#2245aese v2.16b,v1.16b246aesmc v2.16b,v2.16b247ld1 {v1.4s},[x2],#16248b.gt .Loop_enc249250aese v2.16b,v0.16b251aesmc v2.16b,v2.16b252ld1 {v0.4s},[x2]253aese v2.16b,v1.16b254eor v2.16b,v2.16b,v0.16b255256st1 {v2.16b},[x1]257ret258.size aes_v8_encrypt,.-aes_v8_encrypt259.globl aes_v8_decrypt260.type aes_v8_decrypt,%function261.align 5262aes_v8_decrypt:263AARCH64_VALID_CALL_TARGET264ldr w3,[x2,#240]265ld1 {v0.4s},[x2],#16266ld1 {v2.16b},[x0]267sub w3,w3,#2268ld1 {v1.4s},[x2],#16269270.Loop_dec:271aesd v2.16b,v0.16b272aesimc v2.16b,v2.16b273ld1 {v0.4s},[x2],#16274subs w3,w3,#2275aesd v2.16b,v1.16b276aesimc v2.16b,v2.16b277ld1 {v1.4s},[x2],#16278b.gt .Loop_dec279280aesd v2.16b,v0.16b281aesimc v2.16b,v2.16b282ld1 {v0.4s},[x2]283aesd v2.16b,v1.16b284eor v2.16b,v2.16b,v0.16b285286st1 {v2.16b},[x1]287ret288.size aes_v8_decrypt,.-aes_v8_decrypt289.globl aes_v8_ecb_encrypt290.type aes_v8_ecb_encrypt,%function291.align 5292aes_v8_ecb_encrypt:293AARCH64_VALID_CALL_TARGET294subs x2,x2,#16295// Original input data size bigger than 16, jump to big size processing.296b.ne .Lecb_big_size297ld1 {v0.16b},[x0]298cmp w4,#0 // en- or decrypting?299ldr w5,[x3,#240]300ld1 {v5.4s,v6.4s},[x3],#32 // load key schedule...301302b.eq .Lecb_small_dec303aese v0.16b,v5.16b304aesmc v0.16b,v0.16b305ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule...306aese v0.16b,v6.16b307aesmc v0.16b,v0.16b308subs w5,w5,#10 // if rounds==10, jump to aes-128-ecb processing309b.eq .Lecb_128_enc310.Lecb_round_loop:311aese v0.16b,v16.16b312aesmc v0.16b,v0.16b313ld1 {v16.4s},[x3],#16 // load key schedule...314aese v0.16b,v17.16b315aesmc v0.16b,v0.16b316ld1 {v17.4s},[x3],#16 // load key schedule...317subs w5,w5,#2 // bias318b.gt .Lecb_round_loop319.Lecb_128_enc:320ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule...321aese v0.16b,v16.16b322aesmc v0.16b,v0.16b323aese v0.16b,v17.16b324aesmc v0.16b,v0.16b325ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule...326aese v0.16b,v18.16b327aesmc v0.16b,v0.16b328aese v0.16b,v19.16b329aesmc v0.16b,v0.16b330ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule...331aese v0.16b,v20.16b332aesmc v0.16b,v0.16b333aese v0.16b,v21.16b334aesmc v0.16b,v0.16b335ld1 {v7.4s},[x3]336aese v0.16b,v22.16b337aesmc v0.16b,v0.16b338aese v0.16b,v23.16b339eor v0.16b,v0.16b,v7.16b340st1 {v0.16b},[x1]341b .Lecb_Final_abort342.Lecb_small_dec:343aesd v0.16b,v5.16b344aesimc v0.16b,v0.16b345ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule...346aesd v0.16b,v6.16b347aesimc v0.16b,v0.16b348subs w5,w5,#10 // bias349b.eq .Lecb_128_dec350.Lecb_dec_round_loop:351aesd v0.16b,v16.16b352aesimc v0.16b,v0.16b353ld1 {v16.4s},[x3],#16 // load key schedule...354aesd v0.16b,v17.16b355aesimc v0.16b,v0.16b356ld1 {v17.4s},[x3],#16 // load key schedule...357subs w5,w5,#2 // bias358b.gt .Lecb_dec_round_loop359.Lecb_128_dec:360ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule...361aesd v0.16b,v16.16b362aesimc v0.16b,v0.16b363aesd v0.16b,v17.16b364aesimc v0.16b,v0.16b365ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule...366aesd v0.16b,v18.16b367aesimc v0.16b,v0.16b368aesd v0.16b,v19.16b369aesimc v0.16b,v0.16b370ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule...371aesd v0.16b,v20.16b372aesimc v0.16b,v0.16b373aesd v0.16b,v21.16b374aesimc v0.16b,v0.16b375ld1 {v7.4s},[x3]376aesd v0.16b,v22.16b377aesimc v0.16b,v0.16b378aesd v0.16b,v23.16b379eor v0.16b,v0.16b,v7.16b380st1 {v0.16b},[x1]381b .Lecb_Final_abort382.Lecb_big_size:383stp x29,x30,[sp,#-16]!384add x29,sp,#0385mov x8,#16386b.lo .Lecb_done387csel x8,xzr,x8,eq388389cmp w4,#0 // en- or decrypting?390ldr w5,[x3,#240]391and x2,x2,#-16392ld1 {v0.16b},[x0],x8393394ld1 {v16.4s,v17.4s},[x3] // load key schedule...395sub w5,w5,#6396add x7,x3,x5,lsl#4 // pointer to last 7 round keys397sub w5,w5,#2398ld1 {v18.4s,v19.4s},[x7],#32399ld1 {v20.4s,v21.4s},[x7],#32400ld1 {v22.4s,v23.4s},[x7],#32401ld1 {v7.4s},[x7]402403add x7,x3,#32404mov w6,w5405b.eq .Lecb_dec406407ld1 {v1.16b},[x0],#16408subs x2,x2,#32 // bias409add w6,w5,#2410orr v3.16b,v1.16b,v1.16b411orr v24.16b,v1.16b,v1.16b412orr v1.16b,v0.16b,v0.16b413b.lo .Lecb_enc_tail414415orr v1.16b,v3.16b,v3.16b416ld1 {v24.16b},[x0],#16417cmp x2,#32418b.lo .Loop3x_ecb_enc419420ld1 {v25.16b},[x0],#16421ld1 {v26.16b},[x0],#16422sub x2,x2,#32 // bias423mov w6,w5424425.Loop5x_ecb_enc:426aese v0.16b,v16.16b427aesmc v0.16b,v0.16b428aese v1.16b,v16.16b429aesmc v1.16b,v1.16b430aese v24.16b,v16.16b431aesmc v24.16b,v24.16b432aese v25.16b,v16.16b433aesmc v25.16b,v25.16b434aese v26.16b,v16.16b435aesmc v26.16b,v26.16b436ld1 {v16.4s},[x7],#16437subs w6,w6,#2438aese v0.16b,v17.16b439aesmc v0.16b,v0.16b440aese v1.16b,v17.16b441aesmc v1.16b,v1.16b442aese v24.16b,v17.16b443aesmc v24.16b,v24.16b444aese v25.16b,v17.16b445aesmc v25.16b,v25.16b446aese v26.16b,v17.16b447aesmc v26.16b,v26.16b448ld1 {v17.4s},[x7],#16449b.gt .Loop5x_ecb_enc450451aese v0.16b,v16.16b452aesmc v0.16b,v0.16b453aese v1.16b,v16.16b454aesmc v1.16b,v1.16b455aese v24.16b,v16.16b456aesmc v24.16b,v24.16b457aese v25.16b,v16.16b458aesmc v25.16b,v25.16b459aese v26.16b,v16.16b460aesmc v26.16b,v26.16b461cmp x2,#0x40 // because .Lecb_enc_tail4x462sub x2,x2,#0x50463464aese v0.16b,v17.16b465aesmc v0.16b,v0.16b466aese v1.16b,v17.16b467aesmc v1.16b,v1.16b468aese v24.16b,v17.16b469aesmc v24.16b,v24.16b470aese v25.16b,v17.16b471aesmc v25.16b,v25.16b472aese v26.16b,v17.16b473aesmc v26.16b,v26.16b474csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo475mov x7,x3476477aese v0.16b,v18.16b478aesmc v0.16b,v0.16b479aese v1.16b,v18.16b480aesmc v1.16b,v1.16b481aese v24.16b,v18.16b482aesmc v24.16b,v24.16b483aese v25.16b,v18.16b484aesmc v25.16b,v25.16b485aese v26.16b,v18.16b486aesmc v26.16b,v26.16b487add x0,x0,x6 // x0 is adjusted in such way that488// at exit from the loop v1.16b-v26.16b489// are loaded with last "words"490add x6,x2,#0x60 // because .Lecb_enc_tail4x491492aese v0.16b,v19.16b493aesmc v0.16b,v0.16b494aese v1.16b,v19.16b495aesmc v1.16b,v1.16b496aese v24.16b,v19.16b497aesmc v24.16b,v24.16b498aese v25.16b,v19.16b499aesmc v25.16b,v25.16b500aese v26.16b,v19.16b501aesmc v26.16b,v26.16b502503aese v0.16b,v20.16b504aesmc v0.16b,v0.16b505aese v1.16b,v20.16b506aesmc v1.16b,v1.16b507aese v24.16b,v20.16b508aesmc v24.16b,v24.16b509aese v25.16b,v20.16b510aesmc v25.16b,v25.16b511aese v26.16b,v20.16b512aesmc v26.16b,v26.16b513514aese v0.16b,v21.16b515aesmc v0.16b,v0.16b516aese v1.16b,v21.16b517aesmc v1.16b,v1.16b518aese v24.16b,v21.16b519aesmc v24.16b,v24.16b520aese v25.16b,v21.16b521aesmc v25.16b,v25.16b522aese v26.16b,v21.16b523aesmc v26.16b,v26.16b524525aese v0.16b,v22.16b526aesmc v0.16b,v0.16b527aese v1.16b,v22.16b528aesmc v1.16b,v1.16b529aese v24.16b,v22.16b530aesmc v24.16b,v24.16b531aese v25.16b,v22.16b532aesmc v25.16b,v25.16b533aese v26.16b,v22.16b534aesmc v26.16b,v26.16b535536aese v0.16b,v23.16b537ld1 {v2.16b},[x0],#16538aese v1.16b,v23.16b539ld1 {v3.16b},[x0],#16540aese v24.16b,v23.16b541ld1 {v27.16b},[x0],#16542aese v25.16b,v23.16b543ld1 {v28.16b},[x0],#16544aese v26.16b,v23.16b545ld1 {v29.16b},[x0],#16546cbz x6,.Lecb_enc_tail4x547ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]548eor v4.16b,v7.16b,v0.16b549orr v0.16b,v2.16b,v2.16b550eor v5.16b,v7.16b,v1.16b551orr v1.16b,v3.16b,v3.16b552eor v17.16b,v7.16b,v24.16b553orr v24.16b,v27.16b,v27.16b554eor v30.16b,v7.16b,v25.16b555orr v25.16b,v28.16b,v28.16b556eor v31.16b,v7.16b,v26.16b557st1 {v4.16b},[x1],#16558orr v26.16b,v29.16b,v29.16b559st1 {v5.16b},[x1],#16560mov w6,w5561st1 {v17.16b},[x1],#16562ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]563st1 {v30.16b},[x1],#16564st1 {v31.16b},[x1],#16565b.hs .Loop5x_ecb_enc566567add x2,x2,#0x50568cbz x2,.Lecb_done569570add w6,w5,#2571subs x2,x2,#0x30572orr v0.16b,v27.16b,v27.16b573orr v1.16b,v28.16b,v28.16b574orr v24.16b,v29.16b,v29.16b575b.lo .Lecb_enc_tail576577b .Loop3x_ecb_enc578579.align 4580.Lecb_enc_tail4x:581eor v5.16b,v7.16b,v1.16b582eor v17.16b,v7.16b,v24.16b583eor v30.16b,v7.16b,v25.16b584eor v31.16b,v7.16b,v26.16b585st1 {v5.16b},[x1],#16586st1 {v17.16b},[x1],#16587st1 {v30.16b},[x1],#16588st1 {v31.16b},[x1],#16589590b .Lecb_done591.align 4592.Loop3x_ecb_enc:593aese v0.16b,v16.16b594aesmc v0.16b,v0.16b595aese v1.16b,v16.16b596aesmc v1.16b,v1.16b597aese v24.16b,v16.16b598aesmc v24.16b,v24.16b599ld1 {v16.4s},[x7],#16600subs w6,w6,#2601aese v0.16b,v17.16b602aesmc v0.16b,v0.16b603aese v1.16b,v17.16b604aesmc v1.16b,v1.16b605aese v24.16b,v17.16b606aesmc v24.16b,v24.16b607ld1 {v17.4s},[x7],#16608b.gt .Loop3x_ecb_enc609610aese v0.16b,v16.16b611aesmc v0.16b,v0.16b612aese v1.16b,v16.16b613aesmc v1.16b,v1.16b614aese v24.16b,v16.16b615aesmc v24.16b,v24.16b616subs x2,x2,#0x30617csel x6,x2,x6,lo // x6, w6, is zero at this point618aese v0.16b,v17.16b619aesmc v0.16b,v0.16b620aese v1.16b,v17.16b621aesmc v1.16b,v1.16b622aese v24.16b,v17.16b623aesmc v24.16b,v24.16b624add x0,x0,x6 // x0 is adjusted in such way that625// at exit from the loop v1.16b-v24.16b626// are loaded with last "words"627mov x7,x3628aese v0.16b,v20.16b629aesmc v0.16b,v0.16b630aese v1.16b,v20.16b631aesmc v1.16b,v1.16b632aese v24.16b,v20.16b633aesmc v24.16b,v24.16b634ld1 {v2.16b},[x0],#16635aese v0.16b,v21.16b636aesmc v0.16b,v0.16b637aese v1.16b,v21.16b638aesmc v1.16b,v1.16b639aese v24.16b,v21.16b640aesmc v24.16b,v24.16b641ld1 {v3.16b},[x0],#16642aese v0.16b,v22.16b643aesmc v0.16b,v0.16b644aese v1.16b,v22.16b645aesmc v1.16b,v1.16b646aese v24.16b,v22.16b647aesmc v24.16b,v24.16b648ld1 {v27.16b},[x0],#16649aese v0.16b,v23.16b650aese v1.16b,v23.16b651aese v24.16b,v23.16b652ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]653add w6,w5,#2654eor v4.16b,v7.16b,v0.16b655eor v5.16b,v7.16b,v1.16b656eor v24.16b,v24.16b,v7.16b657ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]658st1 {v4.16b},[x1],#16659orr v0.16b,v2.16b,v2.16b660st1 {v5.16b},[x1],#16661orr v1.16b,v3.16b,v3.16b662st1 {v24.16b},[x1],#16663orr v24.16b,v27.16b,v27.16b664b.hs .Loop3x_ecb_enc665666cmn x2,#0x30667b.eq .Lecb_done668nop669670.Lecb_enc_tail:671aese v1.16b,v16.16b672aesmc v1.16b,v1.16b673aese v24.16b,v16.16b674aesmc v24.16b,v24.16b675ld1 {v16.4s},[x7],#16676subs w6,w6,#2677aese v1.16b,v17.16b678aesmc v1.16b,v1.16b679aese v24.16b,v17.16b680aesmc v24.16b,v24.16b681ld1 {v17.4s},[x7],#16682b.gt .Lecb_enc_tail683684aese v1.16b,v16.16b685aesmc v1.16b,v1.16b686aese v24.16b,v16.16b687aesmc v24.16b,v24.16b688aese v1.16b,v17.16b689aesmc v1.16b,v1.16b690aese v24.16b,v17.16b691aesmc v24.16b,v24.16b692aese v1.16b,v20.16b693aesmc v1.16b,v1.16b694aese v24.16b,v20.16b695aesmc v24.16b,v24.16b696cmn x2,#0x20697aese v1.16b,v21.16b698aesmc v1.16b,v1.16b699aese v24.16b,v21.16b700aesmc v24.16b,v24.16b701aese v1.16b,v22.16b702aesmc v1.16b,v1.16b703aese v24.16b,v22.16b704aesmc v24.16b,v24.16b705aese v1.16b,v23.16b706aese v24.16b,v23.16b707b.eq .Lecb_enc_one708eor v5.16b,v7.16b,v1.16b709eor v17.16b,v7.16b,v24.16b710st1 {v5.16b},[x1],#16711st1 {v17.16b},[x1],#16712b .Lecb_done713714.Lecb_enc_one:715eor v5.16b,v7.16b,v24.16b716st1 {v5.16b},[x1],#16717b .Lecb_done718.align 5719.Lecb_dec:720ld1 {v1.16b},[x0],#16721subs x2,x2,#32 // bias722add w6,w5,#2723orr v3.16b,v1.16b,v1.16b724orr v24.16b,v1.16b,v1.16b725orr v1.16b,v0.16b,v0.16b726b.lo .Lecb_dec_tail727728orr v1.16b,v3.16b,v3.16b729ld1 {v24.16b},[x0],#16730cmp x2,#32731b.lo .Loop3x_ecb_dec732733ld1 {v25.16b},[x0],#16734ld1 {v26.16b},[x0],#16735sub x2,x2,#32 // bias736mov w6,w5737738.Loop5x_ecb_dec:739aesd v0.16b,v16.16b740aesimc v0.16b,v0.16b741aesd v1.16b,v16.16b742aesimc v1.16b,v1.16b743aesd v24.16b,v16.16b744aesimc v24.16b,v24.16b745aesd v25.16b,v16.16b746aesimc v25.16b,v25.16b747aesd v26.16b,v16.16b748aesimc v26.16b,v26.16b749ld1 {v16.4s},[x7],#16750subs w6,w6,#2751aesd v0.16b,v17.16b752aesimc v0.16b,v0.16b753aesd v1.16b,v17.16b754aesimc v1.16b,v1.16b755aesd v24.16b,v17.16b756aesimc v24.16b,v24.16b757aesd v25.16b,v17.16b758aesimc v25.16b,v25.16b759aesd v26.16b,v17.16b760aesimc v26.16b,v26.16b761ld1 {v17.4s},[x7],#16762b.gt .Loop5x_ecb_dec763764aesd v0.16b,v16.16b765aesimc v0.16b,v0.16b766aesd v1.16b,v16.16b767aesimc v1.16b,v1.16b768aesd v24.16b,v16.16b769aesimc v24.16b,v24.16b770aesd v25.16b,v16.16b771aesimc v25.16b,v25.16b772aesd v26.16b,v16.16b773aesimc v26.16b,v26.16b774cmp x2,#0x40 // because .Lecb_tail4x775sub x2,x2,#0x50776777aesd v0.16b,v17.16b778aesimc v0.16b,v0.16b779aesd v1.16b,v17.16b780aesimc v1.16b,v1.16b781aesd v24.16b,v17.16b782aesimc v24.16b,v24.16b783aesd v25.16b,v17.16b784aesimc v25.16b,v25.16b785aesd v26.16b,v17.16b786aesimc v26.16b,v26.16b787csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo788mov x7,x3789790aesd v0.16b,v18.16b791aesimc v0.16b,v0.16b792aesd v1.16b,v18.16b793aesimc v1.16b,v1.16b794aesd v24.16b,v18.16b795aesimc v24.16b,v24.16b796aesd v25.16b,v18.16b797aesimc v25.16b,v25.16b798aesd v26.16b,v18.16b799aesimc v26.16b,v26.16b800add x0,x0,x6 // x0 is adjusted in such way that801// at exit from the loop v1.16b-v26.16b802// are loaded with last "words"803add x6,x2,#0x60 // because .Lecb_tail4x804805aesd v0.16b,v19.16b806aesimc v0.16b,v0.16b807aesd v1.16b,v19.16b808aesimc v1.16b,v1.16b809aesd v24.16b,v19.16b810aesimc v24.16b,v24.16b811aesd v25.16b,v19.16b812aesimc v25.16b,v25.16b813aesd v26.16b,v19.16b814aesimc v26.16b,v26.16b815816aesd v0.16b,v20.16b817aesimc v0.16b,v0.16b818aesd v1.16b,v20.16b819aesimc v1.16b,v1.16b820aesd v24.16b,v20.16b821aesimc v24.16b,v24.16b822aesd v25.16b,v20.16b823aesimc v25.16b,v25.16b824aesd v26.16b,v20.16b825aesimc v26.16b,v26.16b826827aesd v0.16b,v21.16b828aesimc v0.16b,v0.16b829aesd v1.16b,v21.16b830aesimc v1.16b,v1.16b831aesd v24.16b,v21.16b832aesimc v24.16b,v24.16b833aesd v25.16b,v21.16b834aesimc v25.16b,v25.16b835aesd v26.16b,v21.16b836aesimc v26.16b,v26.16b837838aesd v0.16b,v22.16b839aesimc v0.16b,v0.16b840aesd v1.16b,v22.16b841aesimc v1.16b,v1.16b842aesd v24.16b,v22.16b843aesimc v24.16b,v24.16b844aesd v25.16b,v22.16b845aesimc v25.16b,v25.16b846aesd v26.16b,v22.16b847aesimc v26.16b,v26.16b848849aesd v0.16b,v23.16b850ld1 {v2.16b},[x0],#16851aesd v1.16b,v23.16b852ld1 {v3.16b},[x0],#16853aesd v24.16b,v23.16b854ld1 {v27.16b},[x0],#16855aesd v25.16b,v23.16b856ld1 {v28.16b},[x0],#16857aesd v26.16b,v23.16b858ld1 {v29.16b},[x0],#16859cbz x6,.Lecb_tail4x860ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]861eor v4.16b,v7.16b,v0.16b862orr v0.16b,v2.16b,v2.16b863eor v5.16b,v7.16b,v1.16b864orr v1.16b,v3.16b,v3.16b865eor v17.16b,v7.16b,v24.16b866orr v24.16b,v27.16b,v27.16b867eor v30.16b,v7.16b,v25.16b868orr v25.16b,v28.16b,v28.16b869eor v31.16b,v7.16b,v26.16b870st1 {v4.16b},[x1],#16871orr v26.16b,v29.16b,v29.16b872st1 {v5.16b},[x1],#16873mov w6,w5874st1 {v17.16b},[x1],#16875ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]876st1 {v30.16b},[x1],#16877st1 {v31.16b},[x1],#16878b.hs .Loop5x_ecb_dec879880add x2,x2,#0x50881cbz x2,.Lecb_done882883add w6,w5,#2884subs x2,x2,#0x30885orr v0.16b,v27.16b,v27.16b886orr v1.16b,v28.16b,v28.16b887orr v24.16b,v29.16b,v29.16b888b.lo .Lecb_dec_tail889890b .Loop3x_ecb_dec891892.align 4893.Lecb_tail4x:894eor v5.16b,v7.16b,v1.16b895eor v17.16b,v7.16b,v24.16b896eor v30.16b,v7.16b,v25.16b897eor v31.16b,v7.16b,v26.16b898st1 {v5.16b},[x1],#16899st1 {v17.16b},[x1],#16900st1 {v30.16b},[x1],#16901st1 {v31.16b},[x1],#16902903b .Lecb_done904.align 4905.Loop3x_ecb_dec:906aesd v0.16b,v16.16b907aesimc v0.16b,v0.16b908aesd v1.16b,v16.16b909aesimc v1.16b,v1.16b910aesd v24.16b,v16.16b911aesimc v24.16b,v24.16b912ld1 {v16.4s},[x7],#16913subs w6,w6,#2914aesd v0.16b,v17.16b915aesimc v0.16b,v0.16b916aesd v1.16b,v17.16b917aesimc v1.16b,v1.16b918aesd v24.16b,v17.16b919aesimc v24.16b,v24.16b920ld1 {v17.4s},[x7],#16921b.gt .Loop3x_ecb_dec922923aesd v0.16b,v16.16b924aesimc v0.16b,v0.16b925aesd v1.16b,v16.16b926aesimc v1.16b,v1.16b927aesd v24.16b,v16.16b928aesimc v24.16b,v24.16b929subs x2,x2,#0x30930csel x6,x2,x6,lo // x6, w6, is zero at this point931aesd v0.16b,v17.16b932aesimc v0.16b,v0.16b933aesd v1.16b,v17.16b934aesimc v1.16b,v1.16b935aesd v24.16b,v17.16b936aesimc v24.16b,v24.16b937add x0,x0,x6 // x0 is adjusted in such way that938// at exit from the loop v1.16b-v24.16b939// are loaded with last "words"940mov x7,x3941aesd v0.16b,v20.16b942aesimc v0.16b,v0.16b943aesd v1.16b,v20.16b944aesimc v1.16b,v1.16b945aesd v24.16b,v20.16b946aesimc v24.16b,v24.16b947ld1 {v2.16b},[x0],#16948aesd v0.16b,v21.16b949aesimc v0.16b,v0.16b950aesd v1.16b,v21.16b951aesimc v1.16b,v1.16b952aesd v24.16b,v21.16b953aesimc v24.16b,v24.16b954ld1 {v3.16b},[x0],#16955aesd v0.16b,v22.16b956aesimc v0.16b,v0.16b957aesd v1.16b,v22.16b958aesimc v1.16b,v1.16b959aesd v24.16b,v22.16b960aesimc v24.16b,v24.16b961ld1 {v27.16b},[x0],#16962aesd v0.16b,v23.16b963aesd v1.16b,v23.16b964aesd v24.16b,v23.16b965ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]966add w6,w5,#2967eor v4.16b,v7.16b,v0.16b968eor v5.16b,v7.16b,v1.16b969eor v24.16b,v24.16b,v7.16b970ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]971st1 {v4.16b},[x1],#16972orr v0.16b,v2.16b,v2.16b973st1 {v5.16b},[x1],#16974orr v1.16b,v3.16b,v3.16b975st1 {v24.16b},[x1],#16976orr v24.16b,v27.16b,v27.16b977b.hs .Loop3x_ecb_dec978979cmn x2,#0x30980b.eq .Lecb_done981nop982983.Lecb_dec_tail:984aesd v1.16b,v16.16b985aesimc v1.16b,v1.16b986aesd v24.16b,v16.16b987aesimc v24.16b,v24.16b988ld1 {v16.4s},[x7],#16989subs w6,w6,#2990aesd v1.16b,v17.16b991aesimc v1.16b,v1.16b992aesd v24.16b,v17.16b993aesimc v24.16b,v24.16b994ld1 {v17.4s},[x7],#16995b.gt .Lecb_dec_tail996997aesd v1.16b,v16.16b998aesimc v1.16b,v1.16b999aesd v24.16b,v16.16b1000aesimc v24.16b,v24.16b1001aesd v1.16b,v17.16b1002aesimc v1.16b,v1.16b1003aesd v24.16b,v17.16b1004aesimc v24.16b,v24.16b1005aesd v1.16b,v20.16b1006aesimc v1.16b,v1.16b1007aesd v24.16b,v20.16b1008aesimc v24.16b,v24.16b1009cmn x2,#0x201010aesd v1.16b,v21.16b1011aesimc v1.16b,v1.16b1012aesd v24.16b,v21.16b1013aesimc v24.16b,v24.16b1014aesd v1.16b,v22.16b1015aesimc v1.16b,v1.16b1016aesd v24.16b,v22.16b1017aesimc v24.16b,v24.16b1018aesd v1.16b,v23.16b1019aesd v24.16b,v23.16b1020b.eq .Lecb_dec_one1021eor v5.16b,v7.16b,v1.16b1022eor v17.16b,v7.16b,v24.16b1023st1 {v5.16b},[x1],#161024st1 {v17.16b},[x1],#161025b .Lecb_done10261027.Lecb_dec_one:1028eor v5.16b,v7.16b,v24.16b1029st1 {v5.16b},[x1],#1610301031.Lecb_done:1032ldr x29,[sp],#161033.Lecb_Final_abort:1034ret1035.size aes_v8_ecb_encrypt,.-aes_v8_ecb_encrypt1036.globl aes_v8_cbc_encrypt1037.type aes_v8_cbc_encrypt,%function1038.align 51039aes_v8_cbc_encrypt:1040AARCH64_VALID_CALL_TARGET1041// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.1042stp x29,x30,[sp,#-16]!1043add x29,sp,#01044subs x2,x2,#161045mov x8,#161046b.lo .Lcbc_abort1047csel x8,xzr,x8,eq10481049cmp w5,#0 // en- or decrypting?1050ldr w5,[x3,#240]1051and x2,x2,#-161052ld1 {v6.16b},[x4]1053ld1 {v0.16b},[x0],x810541055ld1 {v16.4s,v17.4s},[x3] // load key schedule...1056sub w5,w5,#61057add x7,x3,x5,lsl#4 // pointer to last 7 round keys1058sub w5,w5,#21059ld1 {v18.4s,v19.4s},[x7],#321060ld1 {v20.4s,v21.4s},[x7],#321061ld1 {v22.4s,v23.4s},[x7],#321062ld1 {v7.4s},[x7]10631064add x7,x3,#321065mov w6,w51066b.eq .Lcbc_dec10671068cmp w5,#21069eor v0.16b,v0.16b,v6.16b1070eor v5.16b,v16.16b,v7.16b1071b.eq .Lcbc_enc12810721073ld1 {v2.4s,v3.4s},[x7]1074add x7,x3,#161075add x6,x3,#16*41076add x12,x3,#16*51077aese v0.16b,v16.16b1078aesmc v0.16b,v0.16b1079add x14,x3,#16*61080add x3,x3,#16*71081b .Lenter_cbc_enc10821083.align 41084.Loop_cbc_enc:1085aese v0.16b,v16.16b1086aesmc v0.16b,v0.16b1087st1 {v6.16b},[x1],#161088.Lenter_cbc_enc:1089aese v0.16b,v17.16b1090aesmc v0.16b,v0.16b1091aese v0.16b,v2.16b1092aesmc v0.16b,v0.16b1093ld1 {v16.4s},[x6]1094cmp w5,#41095aese v0.16b,v3.16b1096aesmc v0.16b,v0.16b1097ld1 {v17.4s},[x12]1098b.eq .Lcbc_enc19210991100aese v0.16b,v16.16b1101aesmc v0.16b,v0.16b1102ld1 {v16.4s},[x14]1103aese v0.16b,v17.16b1104aesmc v0.16b,v0.16b1105ld1 {v17.4s},[x3]1106nop11071108.Lcbc_enc192:1109aese v0.16b,v16.16b1110aesmc v0.16b,v0.16b1111subs x2,x2,#161112aese v0.16b,v17.16b1113aesmc v0.16b,v0.16b1114csel x8,xzr,x8,eq1115aese v0.16b,v18.16b1116aesmc v0.16b,v0.16b1117aese v0.16b,v19.16b1118aesmc v0.16b,v0.16b1119ld1 {v16.16b},[x0],x81120aese v0.16b,v20.16b1121aesmc v0.16b,v0.16b1122eor v16.16b,v16.16b,v5.16b1123aese v0.16b,v21.16b1124aesmc v0.16b,v0.16b1125ld1 {v17.4s},[x7] // re-pre-load rndkey[1]1126aese v0.16b,v22.16b1127aesmc v0.16b,v0.16b1128aese v0.16b,v23.16b1129eor v6.16b,v0.16b,v7.16b1130b.hs .Loop_cbc_enc11311132st1 {v6.16b},[x1],#161133b .Lcbc_done11341135.align 51136.Lcbc_enc128:1137ld1 {v2.4s,v3.4s},[x7]1138aese v0.16b,v16.16b1139aesmc v0.16b,v0.16b1140b .Lenter_cbc_enc1281141.Loop_cbc_enc128:1142aese v0.16b,v16.16b1143aesmc v0.16b,v0.16b1144st1 {v6.16b},[x1],#161145.Lenter_cbc_enc128:1146aese v0.16b,v17.16b1147aesmc v0.16b,v0.16b1148subs x2,x2,#161149aese v0.16b,v2.16b1150aesmc v0.16b,v0.16b1151csel x8,xzr,x8,eq1152aese v0.16b,v3.16b1153aesmc v0.16b,v0.16b1154aese v0.16b,v18.16b1155aesmc v0.16b,v0.16b1156aese v0.16b,v19.16b1157aesmc v0.16b,v0.16b1158ld1 {v16.16b},[x0],x81159aese v0.16b,v20.16b1160aesmc v0.16b,v0.16b1161aese v0.16b,v21.16b1162aesmc v0.16b,v0.16b1163aese v0.16b,v22.16b1164aesmc v0.16b,v0.16b1165eor v16.16b,v16.16b,v5.16b1166aese v0.16b,v23.16b1167eor v6.16b,v0.16b,v7.16b1168b.hs .Loop_cbc_enc12811691170st1 {v6.16b},[x1],#161171b .Lcbc_done1172.align 51173.Lcbc_dec:1174ld1 {v24.16b},[x0],#161175subs x2,x2,#32 // bias1176add w6,w5,#21177orr v3.16b,v0.16b,v0.16b1178orr v1.16b,v0.16b,v0.16b1179orr v27.16b,v24.16b,v24.16b1180b.lo .Lcbc_dec_tail11811182orr v1.16b,v24.16b,v24.16b1183ld1 {v24.16b},[x0],#161184orr v2.16b,v0.16b,v0.16b1185orr v3.16b,v1.16b,v1.16b1186orr v27.16b,v24.16b,v24.16b1187cmp x2,#321188b.lo .Loop3x_cbc_dec11891190ld1 {v25.16b},[x0],#161191ld1 {v26.16b},[x0],#161192sub x2,x2,#32 // bias1193mov w6,w51194orr v28.16b,v25.16b,v25.16b1195orr v29.16b,v26.16b,v26.16b11961197.Loop5x_cbc_dec:1198aesd v0.16b,v16.16b1199aesimc v0.16b,v0.16b1200aesd v1.16b,v16.16b1201aesimc v1.16b,v1.16b1202aesd v24.16b,v16.16b1203aesimc v24.16b,v24.16b1204aesd v25.16b,v16.16b1205aesimc v25.16b,v25.16b1206aesd v26.16b,v16.16b1207aesimc v26.16b,v26.16b1208ld1 {v16.4s},[x7],#161209subs w6,w6,#21210aesd v0.16b,v17.16b1211aesimc v0.16b,v0.16b1212aesd v1.16b,v17.16b1213aesimc v1.16b,v1.16b1214aesd v24.16b,v17.16b1215aesimc v24.16b,v24.16b1216aesd v25.16b,v17.16b1217aesimc v25.16b,v25.16b1218aesd v26.16b,v17.16b1219aesimc v26.16b,v26.16b1220ld1 {v17.4s},[x7],#161221b.gt .Loop5x_cbc_dec12221223aesd v0.16b,v16.16b1224aesimc v0.16b,v0.16b1225aesd v1.16b,v16.16b1226aesimc v1.16b,v1.16b1227aesd v24.16b,v16.16b1228aesimc v24.16b,v24.16b1229aesd v25.16b,v16.16b1230aesimc v25.16b,v25.16b1231aesd v26.16b,v16.16b1232aesimc v26.16b,v26.16b1233cmp x2,#0x40 // because .Lcbc_tail4x1234sub x2,x2,#0x5012351236aesd v0.16b,v17.16b1237aesimc v0.16b,v0.16b1238aesd v1.16b,v17.16b1239aesimc v1.16b,v1.16b1240aesd v24.16b,v17.16b1241aesimc v24.16b,v24.16b1242aesd v25.16b,v17.16b1243aesimc v25.16b,v25.16b1244aesd v26.16b,v17.16b1245aesimc v26.16b,v26.16b1246csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo1247mov x7,x312481249aesd v0.16b,v18.16b1250aesimc v0.16b,v0.16b1251aesd v1.16b,v18.16b1252aesimc v1.16b,v1.16b1253aesd v24.16b,v18.16b1254aesimc v24.16b,v24.16b1255aesd v25.16b,v18.16b1256aesimc v25.16b,v25.16b1257aesd v26.16b,v18.16b1258aesimc v26.16b,v26.16b1259add x0,x0,x6 // x0 is adjusted in such way that1260// at exit from the loop v1.16b-v26.16b1261// are loaded with last "words"1262add x6,x2,#0x60 // because .Lcbc_tail4x12631264aesd v0.16b,v19.16b1265aesimc v0.16b,v0.16b1266aesd v1.16b,v19.16b1267aesimc v1.16b,v1.16b1268aesd v24.16b,v19.16b1269aesimc v24.16b,v24.16b1270aesd v25.16b,v19.16b1271aesimc v25.16b,v25.16b1272aesd v26.16b,v19.16b1273aesimc v26.16b,v26.16b12741275aesd v0.16b,v20.16b1276aesimc v0.16b,v0.16b1277aesd v1.16b,v20.16b1278aesimc v1.16b,v1.16b1279aesd v24.16b,v20.16b1280aesimc v24.16b,v24.16b1281aesd v25.16b,v20.16b1282aesimc v25.16b,v25.16b1283aesd v26.16b,v20.16b1284aesimc v26.16b,v26.16b12851286aesd v0.16b,v21.16b1287aesimc v0.16b,v0.16b1288aesd v1.16b,v21.16b1289aesimc v1.16b,v1.16b1290aesd v24.16b,v21.16b1291aesimc v24.16b,v24.16b1292aesd v25.16b,v21.16b1293aesimc v25.16b,v25.16b1294aesd v26.16b,v21.16b1295aesimc v26.16b,v26.16b12961297aesd v0.16b,v22.16b1298aesimc v0.16b,v0.16b1299aesd v1.16b,v22.16b1300aesimc v1.16b,v1.16b1301aesd v24.16b,v22.16b1302aesimc v24.16b,v24.16b1303aesd v25.16b,v22.16b1304aesimc v25.16b,v25.16b1305aesd v26.16b,v22.16b1306aesimc v26.16b,v26.16b13071308eor v4.16b,v6.16b,v7.16b1309aesd v0.16b,v23.16b1310eor v5.16b,v2.16b,v7.16b1311ld1 {v2.16b},[x0],#161312aesd v1.16b,v23.16b1313eor v17.16b,v3.16b,v7.16b1314ld1 {v3.16b},[x0],#161315aesd v24.16b,v23.16b1316eor v30.16b,v27.16b,v7.16b1317ld1 {v27.16b},[x0],#161318aesd v25.16b,v23.16b1319eor v31.16b,v28.16b,v7.16b1320ld1 {v28.16b},[x0],#161321aesd v26.16b,v23.16b1322orr v6.16b,v29.16b,v29.16b1323ld1 {v29.16b},[x0],#161324cbz x6,.Lcbc_tail4x1325ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]1326eor v4.16b,v4.16b,v0.16b1327orr v0.16b,v2.16b,v2.16b1328eor v5.16b,v5.16b,v1.16b1329orr v1.16b,v3.16b,v3.16b1330eor v17.16b,v17.16b,v24.16b1331orr v24.16b,v27.16b,v27.16b1332eor v30.16b,v30.16b,v25.16b1333orr v25.16b,v28.16b,v28.16b1334eor v31.16b,v31.16b,v26.16b1335st1 {v4.16b},[x1],#161336orr v26.16b,v29.16b,v29.16b1337st1 {v5.16b},[x1],#161338mov w6,w51339st1 {v17.16b},[x1],#161340ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]1341st1 {v30.16b},[x1],#161342st1 {v31.16b},[x1],#161343b.hs .Loop5x_cbc_dec13441345add x2,x2,#0x501346cbz x2,.Lcbc_done13471348add w6,w5,#21349subs x2,x2,#0x301350orr v0.16b,v27.16b,v27.16b1351orr v2.16b,v27.16b,v27.16b1352orr v1.16b,v28.16b,v28.16b1353orr v3.16b,v28.16b,v28.16b1354orr v24.16b,v29.16b,v29.16b1355orr v27.16b,v29.16b,v29.16b1356b.lo .Lcbc_dec_tail13571358b .Loop3x_cbc_dec13591360.align 41361.Lcbc_tail4x:1362eor v5.16b,v4.16b,v1.16b1363eor v17.16b,v17.16b,v24.16b1364eor v30.16b,v30.16b,v25.16b1365eor v31.16b,v31.16b,v26.16b1366st1 {v5.16b},[x1],#161367st1 {v17.16b},[x1],#161368st1 {v30.16b},[x1],#161369st1 {v31.16b},[x1],#1613701371b .Lcbc_done1372.align 41373.Loop3x_cbc_dec:1374aesd v0.16b,v16.16b1375aesimc v0.16b,v0.16b1376aesd v1.16b,v16.16b1377aesimc v1.16b,v1.16b1378aesd v24.16b,v16.16b1379aesimc v24.16b,v24.16b1380ld1 {v16.4s},[x7],#161381subs w6,w6,#21382aesd v0.16b,v17.16b1383aesimc v0.16b,v0.16b1384aesd v1.16b,v17.16b1385aesimc v1.16b,v1.16b1386aesd v24.16b,v17.16b1387aesimc v24.16b,v24.16b1388ld1 {v17.4s},[x7],#161389b.gt .Loop3x_cbc_dec13901391aesd v0.16b,v16.16b1392aesimc v0.16b,v0.16b1393aesd v1.16b,v16.16b1394aesimc v1.16b,v1.16b1395aesd v24.16b,v16.16b1396aesimc v24.16b,v24.16b1397eor v4.16b,v6.16b,v7.16b1398subs x2,x2,#0x301399eor v5.16b,v2.16b,v7.16b1400csel x6,x2,x6,lo // x6, w6, is zero at this point1401aesd v0.16b,v17.16b1402aesimc v0.16b,v0.16b1403aesd v1.16b,v17.16b1404aesimc v1.16b,v1.16b1405aesd v24.16b,v17.16b1406aesimc v24.16b,v24.16b1407eor v17.16b,v3.16b,v7.16b1408add x0,x0,x6 // x0 is adjusted in such way that1409// at exit from the loop v1.16b-v24.16b1410// are loaded with last "words"1411orr v6.16b,v27.16b,v27.16b1412mov x7,x31413aesd v0.16b,v20.16b1414aesimc v0.16b,v0.16b1415aesd v1.16b,v20.16b1416aesimc v1.16b,v1.16b1417aesd v24.16b,v20.16b1418aesimc v24.16b,v24.16b1419ld1 {v2.16b},[x0],#161420aesd v0.16b,v21.16b1421aesimc v0.16b,v0.16b1422aesd v1.16b,v21.16b1423aesimc v1.16b,v1.16b1424aesd v24.16b,v21.16b1425aesimc v24.16b,v24.16b1426ld1 {v3.16b},[x0],#161427aesd v0.16b,v22.16b1428aesimc v0.16b,v0.16b1429aesd v1.16b,v22.16b1430aesimc v1.16b,v1.16b1431aesd v24.16b,v22.16b1432aesimc v24.16b,v24.16b1433ld1 {v27.16b},[x0],#161434aesd v0.16b,v23.16b1435aesd v1.16b,v23.16b1436aesd v24.16b,v23.16b1437ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]1438add w6,w5,#21439eor v4.16b,v4.16b,v0.16b1440eor v5.16b,v5.16b,v1.16b1441eor v24.16b,v24.16b,v17.16b1442ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]1443st1 {v4.16b},[x1],#161444orr v0.16b,v2.16b,v2.16b1445st1 {v5.16b},[x1],#161446orr v1.16b,v3.16b,v3.16b1447st1 {v24.16b},[x1],#161448orr v24.16b,v27.16b,v27.16b1449b.hs .Loop3x_cbc_dec14501451cmn x2,#0x301452b.eq .Lcbc_done1453nop14541455.Lcbc_dec_tail:1456aesd v1.16b,v16.16b1457aesimc v1.16b,v1.16b1458aesd v24.16b,v16.16b1459aesimc v24.16b,v24.16b1460ld1 {v16.4s},[x7],#161461subs w6,w6,#21462aesd v1.16b,v17.16b1463aesimc v1.16b,v1.16b1464aesd v24.16b,v17.16b1465aesimc v24.16b,v24.16b1466ld1 {v17.4s},[x7],#161467b.gt .Lcbc_dec_tail14681469aesd v1.16b,v16.16b1470aesimc v1.16b,v1.16b1471aesd v24.16b,v16.16b1472aesimc v24.16b,v24.16b1473aesd v1.16b,v17.16b1474aesimc v1.16b,v1.16b1475aesd v24.16b,v17.16b1476aesimc v24.16b,v24.16b1477aesd v1.16b,v20.16b1478aesimc v1.16b,v1.16b1479aesd v24.16b,v20.16b1480aesimc v24.16b,v24.16b1481cmn x2,#0x201482aesd v1.16b,v21.16b1483aesimc v1.16b,v1.16b1484aesd v24.16b,v21.16b1485aesimc v24.16b,v24.16b1486eor v5.16b,v6.16b,v7.16b1487aesd v1.16b,v22.16b1488aesimc v1.16b,v1.16b1489aesd v24.16b,v22.16b1490aesimc v24.16b,v24.16b1491eor v17.16b,v3.16b,v7.16b1492aesd v1.16b,v23.16b1493aesd v24.16b,v23.16b1494b.eq .Lcbc_dec_one1495eor v5.16b,v5.16b,v1.16b1496eor v17.16b,v17.16b,v24.16b1497orr v6.16b,v27.16b,v27.16b1498st1 {v5.16b},[x1],#161499st1 {v17.16b},[x1],#161500b .Lcbc_done15011502.Lcbc_dec_one:1503eor v5.16b,v5.16b,v24.16b1504orr v6.16b,v27.16b,v27.16b1505st1 {v5.16b},[x1],#1615061507.Lcbc_done:1508st1 {v6.16b},[x4]1509.Lcbc_abort:1510ldr x29,[sp],#161511ret1512.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt1513.globl aes_v8_ctr32_encrypt_blocks_unroll12_eor31514.type aes_v8_ctr32_encrypt_blocks_unroll12_eor3,%function1515.align 51516aes_v8_ctr32_encrypt_blocks_unroll12_eor3:1517AARCH64_VALID_CALL_TARGET1518// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.1519stp x29,x30,[sp,#-80]!1520stp d8,d9,[sp, #16]1521stp d10,d11,[sp, #32]1522stp d12,d13,[sp, #48]1523stp d14,d15,[sp, #64]1524add x29,sp,#015251526ldr w5,[x3,#240]15271528ldr w8, [x4, #12]1529#ifdef __AARCH64EB__1530ld1 {v24.16b},[x4]1531#else1532ld1 {v24.4s},[x4]1533#endif1534ld1 {v2.4s,v3.4s},[x3] // load key schedule...1535sub w5,w5,#41536cmp x2,#21537add x7,x3,x5,lsl#4 // pointer to last round key1538sub w5,w5,#21539add x7, x7, #641540ld1 {v1.4s},[x7]1541add x7,x3,#321542mov w6,w51543#ifndef __AARCH64EB__1544rev w8, w81545#endif15461547orr v25.16b,v24.16b,v24.16b1548add w10, w8, #11549orr v26.16b,v24.16b,v24.16b1550add w8, w8, #21551orr v0.16b,v24.16b,v24.16b1552rev w10, w101553mov v25.s[3],w101554b.ls .Lctr32_tail_unroll1555cmp x2,#61556rev w12, w81557sub x2,x2,#3 // bias1558mov v26.s[3],w121559b.lo .Loop3x_ctr32_unroll1560cmp x2,#91561orr v27.16b,v24.16b,v24.16b1562add w11, w8, #11563orr v28.16b,v24.16b,v24.16b1564add w13, w8, #21565rev w11, w111566orr v29.16b,v24.16b,v24.16b1567add w8, w8, #31568rev w13, w131569mov v27.s[3],w111570rev w14, w81571mov v28.s[3],w131572mov v29.s[3],w141573sub x2,x2,#31574b.lo .Loop6x_ctr32_unroll15751576// push regs to stack when 12 data chunks are interleaved1577stp x19,x20,[sp,#-16]!1578stp x21,x22,[sp,#-16]!1579stp x23,x24,[sp,#-16]!1580stp d8,d9,[sp,#-32]!1581stp d10,d11,[sp,#-32]!15821583add w15,w8,#11584add w19,w8,#21585add w20,w8,#31586add w21,w8,#41587add w22,w8,#51588add w8,w8,#61589orr v30.16b,v24.16b,v24.16b1590rev w15,w151591orr v31.16b,v24.16b,v24.16b1592rev w19,w191593orr v8.16b,v24.16b,v24.16b1594rev w20,w201595orr v9.16b,v24.16b,v24.16b1596rev w21,w211597orr v10.16b,v24.16b,v24.16b1598rev w22,w221599orr v11.16b,v24.16b,v24.16b1600rev w23,w816011602sub x2,x2,#6 // bias1603mov v30.s[3],w151604mov v31.s[3],w191605mov v8.s[3],w201606mov v9.s[3],w211607mov v10.s[3],w221608mov v11.s[3],w231609b .Loop12x_ctr32_unroll16101611.align 41612.Loop12x_ctr32_unroll:1613aese v24.16b,v2.16b1614aesmc v24.16b,v24.16b1615aese v25.16b,v2.16b1616aesmc v25.16b,v25.16b1617aese v26.16b,v2.16b1618aesmc v26.16b,v26.16b1619aese v27.16b,v2.16b1620aesmc v27.16b,v27.16b1621aese v28.16b,v2.16b1622aesmc v28.16b,v28.16b1623aese v29.16b,v2.16b1624aesmc v29.16b,v29.16b1625aese v30.16b,v2.16b1626aesmc v30.16b,v30.16b1627aese v31.16b,v2.16b1628aesmc v31.16b,v31.16b1629aese v8.16b,v2.16b1630aesmc v8.16b,v8.16b1631aese v9.16b,v2.16b1632aesmc v9.16b,v9.16b1633aese v10.16b,v2.16b1634aesmc v10.16b,v10.16b1635aese v11.16b,v2.16b1636aesmc v11.16b,v11.16b1637ld1 {v2.4s},[x7],#161638subs w6,w6,#21639aese v24.16b,v3.16b1640aesmc v24.16b,v24.16b1641aese v25.16b,v3.16b1642aesmc v25.16b,v25.16b1643aese v26.16b,v3.16b1644aesmc v26.16b,v26.16b1645aese v27.16b,v3.16b1646aesmc v27.16b,v27.16b1647aese v28.16b,v3.16b1648aesmc v28.16b,v28.16b1649aese v29.16b,v3.16b1650aesmc v29.16b,v29.16b1651aese v30.16b,v3.16b1652aesmc v30.16b,v30.16b1653aese v31.16b,v3.16b1654aesmc v31.16b,v31.16b1655aese v8.16b,v3.16b1656aesmc v8.16b,v8.16b1657aese v9.16b,v3.16b1658aesmc v9.16b,v9.16b1659aese v10.16b,v3.16b1660aesmc v10.16b,v10.16b1661aese v11.16b,v3.16b1662aesmc v11.16b,v11.16b1663ld1 {v3.4s},[x7],#161664b.gt .Loop12x_ctr32_unroll16651666aese v24.16b,v2.16b1667aesmc v24.16b,v24.16b1668aese v25.16b,v2.16b1669aesmc v25.16b,v25.16b1670aese v26.16b,v2.16b1671aesmc v26.16b,v26.16b1672aese v27.16b,v2.16b1673aesmc v27.16b,v27.16b1674aese v28.16b,v2.16b1675aesmc v28.16b,v28.16b1676aese v29.16b,v2.16b1677aesmc v29.16b,v29.16b1678aese v30.16b,v2.16b1679aesmc v30.16b,v30.16b1680aese v31.16b,v2.16b1681aesmc v31.16b,v31.16b1682aese v8.16b,v2.16b1683aesmc v8.16b,v8.16b1684aese v9.16b,v2.16b1685aesmc v9.16b,v9.16b1686aese v10.16b,v2.16b1687aesmc v10.16b,v10.16b1688aese v11.16b,v2.16b1689aesmc v11.16b,v11.16b1690ld1 {v2.4s},[x7],#1616911692aese v24.16b,v3.16b1693aesmc v24.16b,v24.16b1694aese v25.16b,v3.16b1695aesmc v25.16b,v25.16b1696aese v26.16b,v3.16b1697aesmc v26.16b,v26.16b1698aese v27.16b,v3.16b1699aesmc v27.16b,v27.16b1700aese v28.16b,v3.16b1701aesmc v28.16b,v28.16b1702aese v29.16b,v3.16b1703aesmc v29.16b,v29.16b1704aese v30.16b,v3.16b1705aesmc v30.16b,v30.16b1706aese v31.16b,v3.16b1707aesmc v31.16b,v31.16b1708aese v8.16b,v3.16b1709aesmc v8.16b,v8.16b1710aese v9.16b,v3.16b1711aesmc v9.16b,v9.16b1712aese v10.16b,v3.16b1713aesmc v10.16b,v10.16b1714aese v11.16b,v3.16b1715aesmc v11.16b,v11.16b1716ld1 {v3.4s},[x7],#1617171718aese v24.16b,v2.16b1719aesmc v24.16b,v24.16b1720add w9,w8,#11721add w10,w8,#21722aese v25.16b,v2.16b1723aesmc v25.16b,v25.16b1724add w12,w8,#31725add w11,w8,#41726aese v26.16b,v2.16b1727aesmc v26.16b,v26.16b1728add w13,w8,#51729add w14,w8,#61730rev w9,w91731aese v27.16b,v2.16b1732aesmc v27.16b,v27.16b1733add w15,w8,#71734add w19,w8,#81735rev w10,w101736rev w12,w121737aese v28.16b,v2.16b1738aesmc v28.16b,v28.16b1739add w20,w8,#91740add w21,w8,#101741rev w11,w111742rev w13,w131743aese v29.16b,v2.16b1744aesmc v29.16b,v29.16b1745add w22,w8,#111746add w23,w8,#121747rev w14,w141748rev w15,w151749aese v30.16b,v2.16b1750aesmc v30.16b,v30.16b1751rev w19,w191752rev w20,w201753aese v31.16b,v2.16b1754aesmc v31.16b,v31.16b1755rev w21,w211756rev w22,w221757aese v8.16b,v2.16b1758aesmc v8.16b,v8.16b1759rev w23,w231760aese v9.16b,v2.16b1761aesmc v9.16b,v9.16b1762aese v10.16b,v2.16b1763aesmc v10.16b,v10.16b1764aese v11.16b,v2.16b1765aesmc v11.16b,v11.16b1766ld1 {v2.4s},[x7],#1617671768aese v24.16b,v3.16b1769aesmc v24.16b,v24.16b1770aese v25.16b,v3.16b1771aesmc v25.16b,v25.16b1772aese v26.16b,v3.16b1773aesmc v26.16b,v26.16b1774aese v27.16b,v3.16b1775aesmc v27.16b,v27.16b1776ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#641777aese v28.16b,v3.16b1778aesmc v28.16b,v28.16b1779aese v29.16b,v3.16b1780aesmc v29.16b,v29.16b1781aese v30.16b,v3.16b1782aesmc v30.16b,v30.16b1783aese v31.16b,v3.16b1784aesmc v31.16b,v31.16b1785ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#641786aese v8.16b,v3.16b1787aesmc v8.16b,v8.16b1788aese v9.16b,v3.16b1789aesmc v9.16b,v9.16b1790aese v10.16b,v3.16b1791aesmc v10.16b,v10.16b1792aese v11.16b,v3.16b1793aesmc v11.16b,v11.16b1794ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#641795ld1 {v3.4s},[x7],#1617961797mov x7, x31798aese v24.16b,v2.16b1799aesmc v24.16b,v24.16b1800aese v25.16b,v2.16b1801aesmc v25.16b,v25.16b1802aese v26.16b,v2.16b1803aesmc v26.16b,v26.16b1804aese v27.16b,v2.16b1805aesmc v27.16b,v27.16b1806aese v28.16b,v2.16b1807aesmc v28.16b,v28.16b1808aese v29.16b,v2.16b1809aesmc v29.16b,v29.16b1810aese v30.16b,v2.16b1811aesmc v30.16b,v30.16b1812aese v31.16b,v2.16b1813aesmc v31.16b,v31.16b1814aese v8.16b,v2.16b1815aesmc v8.16b,v8.16b1816aese v9.16b,v2.16b1817aesmc v9.16b,v9.16b1818aese v10.16b,v2.16b1819aesmc v10.16b,v10.16b1820aese v11.16b,v2.16b1821aesmc v11.16b,v11.16b1822ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0]18231824aese v24.16b,v3.16b1825.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b1826orr v24.16b,v0.16b,v0.16b1827aese v25.16b,v3.16b1828.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b1829orr v25.16b,v0.16b,v0.16b1830aese v26.16b,v3.16b1831.inst 0xce0168c6 //eor3 v6.16b,v6.16b,v1.16b,v26.16b1832orr v26.16b,v0.16b,v0.16b1833aese v27.16b,v3.16b1834.inst 0xce016ce7 //eor3 v7.16b,v7.16b,v1.16b,v27.16b1835orr v27.16b,v0.16b,v0.16b1836aese v28.16b,v3.16b1837.inst 0xce017210 //eor3 v16.16b,v16.16b,v1.16b,v28.16b1838orr v28.16b,v0.16b,v0.16b1839aese v29.16b,v3.16b1840.inst 0xce017631 //eor3 v17.16b,v17.16b,v1.16b,v29.16b1841orr v29.16b,v0.16b,v0.16b1842aese v30.16b,v3.16b1843.inst 0xce017a52 //eor3 v18.16b,v18.16b,v1.16b,v30.16b1844orr v30.16b,v0.16b,v0.16b1845aese v31.16b,v3.16b1846.inst 0xce017e73 //eor3 v19.16b,v19.16b,v1.16b,v31.16b1847orr v31.16b,v0.16b,v0.16b1848aese v8.16b,v3.16b1849.inst 0xce012294 //eor3 v20.16b,v20.16b,v1.16b,v8.16b1850orr v8.16b,v0.16b,v0.16b1851aese v9.16b,v3.16b1852.inst 0xce0126b5 //eor3 v21.16b,v21.16b,v1.16b,v9.16b1853orr v9.16b,v0.16b,v0.16b1854aese v10.16b,v3.16b1855.inst 0xce012ad6 //eor3 v22.16b,v22.16b,v1.16b,v10.16b1856orr v10.16b,v0.16b,v0.16b1857aese v11.16b,v3.16b1858.inst 0xce012ef7 //eor3 v23.16b,v23.16b,v1.16b,v11.16b1859orr v11.16b,v0.16b,v0.16b1860ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1]18611862mov v24.s[3],w91863mov v25.s[3],w101864mov v26.s[3],w121865mov v27.s[3],w111866st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#641867mov v28.s[3],w131868mov v29.s[3],w141869mov v30.s[3],w151870mov v31.s[3],w191871st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#641872mov v8.s[3],w201873mov v9.s[3],w211874mov v10.s[3],w221875mov v11.s[3],w231876st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#6418771878mov w6,w518791880add w8,w8,#121881subs x2,x2,#121882b.hs .Loop12x_ctr32_unroll18831884// pop regs from stack when 12 data chunks are interleaved1885ldp d10,d11,[sp],#321886ldp d8,d9,[sp],#321887ldp x23,x24,[sp],#161888ldp x21,x22,[sp],#161889ldp x19,x20,[sp],#1618901891add x2,x2,#121892cbz x2,.Lctr32_done_unroll1893sub w8,w8,#1218941895cmp x2,#21896b.ls .Lctr32_tail_unroll18971898cmp x2,#61899sub x2,x2,#3 // bias1900add w8,w8,#31901b.lo .Loop3x_ctr32_unroll19021903sub x2,x2,#31904add w8,w8,#31905b.lo .Loop6x_ctr32_unroll19061907.align 41908.Loop6x_ctr32_unroll:1909aese v24.16b,v2.16b1910aesmc v24.16b,v24.16b1911aese v25.16b,v2.16b1912aesmc v25.16b,v25.16b1913aese v26.16b,v2.16b1914aesmc v26.16b,v26.16b1915aese v27.16b,v2.16b1916aesmc v27.16b,v27.16b1917aese v28.16b,v2.16b1918aesmc v28.16b,v28.16b1919aese v29.16b,v2.16b1920aesmc v29.16b,v29.16b1921ld1 {v2.4s},[x7],#161922subs w6,w6,#21923aese v24.16b,v3.16b1924aesmc v24.16b,v24.16b1925aese v25.16b,v3.16b1926aesmc v25.16b,v25.16b1927aese v26.16b,v3.16b1928aesmc v26.16b,v26.16b1929aese v27.16b,v3.16b1930aesmc v27.16b,v27.16b1931aese v28.16b,v3.16b1932aesmc v28.16b,v28.16b1933aese v29.16b,v3.16b1934aesmc v29.16b,v29.16b1935ld1 {v3.4s},[x7],#161936b.gt .Loop6x_ctr32_unroll19371938aese v24.16b,v2.16b1939aesmc v24.16b,v24.16b1940aese v25.16b,v2.16b1941aesmc v25.16b,v25.16b1942aese v26.16b,v2.16b1943aesmc v26.16b,v26.16b1944aese v27.16b,v2.16b1945aesmc v27.16b,v27.16b1946aese v28.16b,v2.16b1947aesmc v28.16b,v28.16b1948aese v29.16b,v2.16b1949aesmc v29.16b,v29.16b1950ld1 {v2.4s},[x7],#1619511952aese v24.16b,v3.16b1953aesmc v24.16b,v24.16b1954aese v25.16b,v3.16b1955aesmc v25.16b,v25.16b1956aese v26.16b,v3.16b1957aesmc v26.16b,v26.16b1958aese v27.16b,v3.16b1959aesmc v27.16b,v27.16b1960aese v28.16b,v3.16b1961aesmc v28.16b,v28.16b1962aese v29.16b,v3.16b1963aesmc v29.16b,v29.16b1964ld1 {v3.4s},[x7],#1619651966aese v24.16b,v2.16b1967aesmc v24.16b,v24.16b1968add w9,w8,#11969add w10,w8,#21970aese v25.16b,v2.16b1971aesmc v25.16b,v25.16b1972add w12,w8,#31973add w11,w8,#41974aese v26.16b,v2.16b1975aesmc v26.16b,v26.16b1976add w13,w8,#51977add w14,w8,#61978rev w9,w91979aese v27.16b,v2.16b1980aesmc v27.16b,v27.16b1981rev w10,w101982rev w12,w121983aese v28.16b,v2.16b1984aesmc v28.16b,v28.16b1985rev w11,w111986rev w13,w131987aese v29.16b,v2.16b1988aesmc v29.16b,v29.16b1989rev w14,w141990ld1 {v2.4s},[x7],#1619911992aese v24.16b,v3.16b1993aesmc v24.16b,v24.16b1994aese v25.16b,v3.16b1995aesmc v25.16b,v25.16b1996ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#641997aese v26.16b,v3.16b1998aesmc v26.16b,v26.16b1999aese v27.16b,v3.16b2000aesmc v27.16b,v27.16b2001ld1 {v16.16b,v17.16b},[x0],#322002aese v28.16b,v3.16b2003aesmc v28.16b,v28.16b2004aese v29.16b,v3.16b2005aesmc v29.16b,v29.16b2006ld1 {v3.4s},[x7],#1620072008mov x7, x32009aese v24.16b,v2.16b2010aesmc v24.16b,v24.16b2011aese v25.16b,v2.16b2012aesmc v25.16b,v25.16b2013aese v26.16b,v2.16b2014aesmc v26.16b,v26.16b2015aese v27.16b,v2.16b2016aesmc v27.16b,v27.16b2017aese v28.16b,v2.16b2018aesmc v28.16b,v28.16b2019aese v29.16b,v2.16b2020aesmc v29.16b,v29.16b2021ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0]20222023aese v24.16b,v3.16b2024.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b2025aese v25.16b,v3.16b2026.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b2027aese v26.16b,v3.16b2028.inst 0xce0168c6 //eor3 v6.16b,v6.16b,v1.16b,v26.16b2029aese v27.16b,v3.16b2030.inst 0xce016ce7 //eor3 v7.16b,v7.16b,v1.16b,v27.16b2031aese v28.16b,v3.16b2032.inst 0xce017210 //eor3 v16.16b,v16.16b,v1.16b,v28.16b2033aese v29.16b,v3.16b2034.inst 0xce017631 //eor3 v17.16b,v17.16b,v1.16b,v29.16b2035ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1]20362037orr v24.16b,v0.16b,v0.16b2038orr v25.16b,v0.16b,v0.16b2039orr v26.16b,v0.16b,v0.16b2040orr v27.16b,v0.16b,v0.16b2041orr v28.16b,v0.16b,v0.16b2042orr v29.16b,v0.16b,v0.16b20432044mov v24.s[3],w92045mov v25.s[3],w102046st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#642047mov v26.s[3],w122048mov v27.s[3],w112049st1 {v16.16b,v17.16b},[x1],#322050mov v28.s[3],w132051mov v29.s[3],w1420522053cbz x2,.Lctr32_done_unroll2054mov w6,w520552056cmp x2,#22057b.ls .Lctr32_tail_unroll20582059sub x2,x2,#3 // bias2060add w8,w8,#32061b .Loop3x_ctr32_unroll20622063.align 42064.Loop3x_ctr32_unroll:2065aese v24.16b,v2.16b2066aesmc v24.16b,v24.16b2067aese v25.16b,v2.16b2068aesmc v25.16b,v25.16b2069aese v26.16b,v2.16b2070aesmc v26.16b,v26.16b2071ld1 {v2.4s},[x7],#162072subs w6,w6,#22073aese v24.16b,v3.16b2074aesmc v24.16b,v24.16b2075aese v25.16b,v3.16b2076aesmc v25.16b,v25.16b2077aese v26.16b,v3.16b2078aesmc v26.16b,v26.16b2079ld1 {v3.4s},[x7],#162080b.gt .Loop3x_ctr32_unroll20812082aese v24.16b,v2.16b2083aesmc v9.16b,v24.16b2084aese v25.16b,v2.16b2085aesmc v10.16b,v25.16b2086ld1 {v4.16b,v5.16b,v6.16b},[x0],#482087orr v24.16b,v0.16b,v0.16b2088aese v26.16b,v2.16b2089aesmc v26.16b,v26.16b2090ld1 {v2.4s},[x7],#162091orr v25.16b,v0.16b,v0.16b2092aese v9.16b,v3.16b2093aesmc v9.16b,v9.16b2094aese v10.16b,v3.16b2095aesmc v10.16b,v10.16b2096aese v26.16b,v3.16b2097aesmc v11.16b,v26.16b2098ld1 {v3.4s},[x7],#162099orr v26.16b,v0.16b,v0.16b2100add w9,w8,#12101aese v9.16b,v2.16b2102aesmc v9.16b,v9.16b2103aese v10.16b,v2.16b2104aesmc v10.16b,v10.16b2105add w10,w8,#22106aese v11.16b,v2.16b2107aesmc v11.16b,v11.16b2108ld1 {v2.4s},[x7],#162109add w8,w8,#32110aese v9.16b,v3.16b2111aesmc v9.16b,v9.16b2112aese v10.16b,v3.16b2113aesmc v10.16b,v10.16b21142115rev w9,w92116aese v11.16b,v3.16b2117aesmc v11.16b,v11.16b2118ld1 {v3.4s},[x7],#162119mov v24.s[3], w92120mov x7,x32121rev w10,w102122aese v9.16b,v2.16b2123aesmc v9.16b,v9.16b21242125aese v10.16b,v2.16b2126aesmc v10.16b,v10.16b2127mov v25.s[3], w102128rev w12,w82129aese v11.16b,v2.16b2130aesmc v11.16b,v11.16b2131mov v26.s[3], w1221322133aese v9.16b,v3.16b2134aese v10.16b,v3.16b2135aese v11.16b,v3.16b21362137.inst 0xce012484 //eor3 v4.16b,v4.16b,v1.16b,v9.16b2138ld1 {v2.4s},[x7],#16 // re-pre-load rndkey[0]2139.inst 0xce0128a5 //eor3 v5.16b,v5.16b,v1.16b,v10.16b2140mov w6,w52141.inst 0xce012cc6 //eor3 v6.16b,v6.16b,v1.16b,v11.16b2142ld1 {v3.4s},[x7],#16 // re-pre-load rndkey[1]2143st1 {v4.16b,v5.16b,v6.16b},[x1],#4821442145cbz x2,.Lctr32_done_unroll21462147.Lctr32_tail_unroll:2148cmp x2,#12149b.eq .Lctr32_tail_1_unroll21502151.Lctr32_tail_2_unroll:2152aese v24.16b,v2.16b2153aesmc v24.16b,v24.16b2154aese v25.16b,v2.16b2155aesmc v25.16b,v25.16b2156ld1 {v2.4s},[x7],#162157subs w6,w6,#22158aese v24.16b,v3.16b2159aesmc v24.16b,v24.16b2160aese v25.16b,v3.16b2161aesmc v25.16b,v25.16b2162ld1 {v3.4s},[x7],#162163b.gt .Lctr32_tail_2_unroll21642165aese v24.16b,v2.16b2166aesmc v24.16b,v24.16b2167aese v25.16b,v2.16b2168aesmc v25.16b,v25.16b2169ld1 {v2.4s},[x7],#162170aese v24.16b,v3.16b2171aesmc v24.16b,v24.16b2172aese v25.16b,v3.16b2173aesmc v25.16b,v25.16b2174ld1 {v3.4s},[x7],#162175ld1 {v4.16b,v5.16b},[x0],#322176aese v24.16b,v2.16b2177aesmc v24.16b,v24.16b2178aese v25.16b,v2.16b2179aesmc v25.16b,v25.16b2180ld1 {v2.4s},[x7],#162181aese v24.16b,v3.16b2182aesmc v24.16b,v24.16b2183aese v25.16b,v3.16b2184aesmc v25.16b,v25.16b2185ld1 {v3.4s},[x7],#162186aese v24.16b,v2.16b2187aesmc v24.16b,v24.16b2188aese v25.16b,v2.16b2189aesmc v25.16b,v25.16b2190aese v24.16b,v3.16b2191aese v25.16b,v3.16b21922193.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b2194.inst 0xce0164a5 //eor3 v5.16b,v5.16b,v1.16b,v25.16b2195st1 {v4.16b,v5.16b},[x1],#322196b .Lctr32_done_unroll21972198.Lctr32_tail_1_unroll:2199aese v24.16b,v2.16b2200aesmc v24.16b,v24.16b2201ld1 {v2.4s},[x7],#162202subs w6,w6,#22203aese v24.16b,v3.16b2204aesmc v24.16b,v24.16b2205ld1 {v3.4s},[x7],#162206b.gt .Lctr32_tail_1_unroll22072208aese v24.16b,v2.16b2209aesmc v24.16b,v24.16b2210ld1 {v2.4s},[x7],#162211aese v24.16b,v3.16b2212aesmc v24.16b,v24.16b2213ld1 {v3.4s},[x7],#162214ld1 {v4.16b},[x0]2215aese v24.16b,v2.16b2216aesmc v24.16b,v24.16b2217ld1 {v2.4s},[x7],#162218aese v24.16b,v3.16b2219aesmc v24.16b,v24.16b2220ld1 {v3.4s},[x7],#162221aese v24.16b,v2.16b2222aesmc v24.16b,v24.16b2223aese v24.16b,v3.16b22242225.inst 0xce016084 //eor3 v4.16b,v4.16b,v1.16b,v24.16b2226st1 {v4.16b},[x1],#1622272228.Lctr32_done_unroll:2229ldp d8,d9,[sp, #16]2230ldp d10,d11,[sp, #32]2231ldp d12,d13,[sp, #48]2232ldp d14,d15,[sp, #64]2233ldr x29,[sp],#802234ret2235.size aes_v8_ctr32_encrypt_blocks_unroll12_eor3,.-aes_v8_ctr32_encrypt_blocks_unroll12_eor32236.globl aes_v8_ctr32_encrypt_blocks2237.type aes_v8_ctr32_encrypt_blocks,%function2238.align 52239aes_v8_ctr32_encrypt_blocks:2240AARCH64_VALID_CALL_TARGET2241// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.2242stp x29,x30,[sp,#-16]!2243add x29,sp,#02244ldr w5,[x3,#240]22452246ldr w8, [x4, #12]2247#ifdef __AARCH64EB__2248ld1 {v0.16b},[x4]2249#else2250ld1 {v0.4s},[x4]2251#endif2252ld1 {v16.4s,v17.4s},[x3] // load key schedule...2253sub w5,w5,#42254mov x12,#162255cmp x2,#22256add x7,x3,x5,lsl#4 // pointer to last 5 round keys2257sub w5,w5,#22258ld1 {v20.4s,v21.4s},[x7],#322259ld1 {v22.4s,v23.4s},[x7],#322260ld1 {v7.4s},[x7]2261add x7,x3,#322262mov w6,w52263csel x12,xzr,x12,lo2264#ifndef __AARCH64EB__2265rev w8, w82266#endif2267orr v1.16b,v0.16b,v0.16b2268add w10, w8, #12269orr v18.16b,v0.16b,v0.16b2270add w8, w8, #22271orr v6.16b,v0.16b,v0.16b2272rev w10, w102273mov v1.s[3],w102274b.ls .Lctr32_tail2275rev w12, w82276sub x2,x2,#3 // bias2277mov v18.s[3],w122278cmp x2,#322279b.lo .Loop3x_ctr3222802281add w13,w8,#12282add w14,w8,#22283orr v24.16b,v0.16b,v0.16b2284rev w13,w132285orr v25.16b,v0.16b,v0.16b2286rev w14,w142287mov v24.s[3],w132288sub x2,x2,#2 // bias2289mov v25.s[3],w142290add w8,w8,#22291b .Loop5x_ctr3222922293.align 42294.Loop5x_ctr32:2295aese v0.16b,v16.16b2296aesmc v0.16b,v0.16b2297aese v1.16b,v16.16b2298aesmc v1.16b,v1.16b2299aese v18.16b,v16.16b2300aesmc v18.16b,v18.16b2301aese v24.16b,v16.16b2302aesmc v24.16b,v24.16b2303aese v25.16b,v16.16b2304aesmc v25.16b,v25.16b2305ld1 {v16.4s},[x7],#162306subs w6,w6,#22307aese v0.16b,v17.16b2308aesmc v0.16b,v0.16b2309aese v1.16b,v17.16b2310aesmc v1.16b,v1.16b2311aese v18.16b,v17.16b2312aesmc v18.16b,v18.16b2313aese v24.16b,v17.16b2314aesmc v24.16b,v24.16b2315aese v25.16b,v17.16b2316aesmc v25.16b,v25.16b2317ld1 {v17.4s},[x7],#162318b.gt .Loop5x_ctr3223192320mov x7,x32321aese v0.16b,v16.16b2322aesmc v0.16b,v0.16b2323aese v1.16b,v16.16b2324aesmc v1.16b,v1.16b2325aese v18.16b,v16.16b2326aesmc v18.16b,v18.16b2327aese v24.16b,v16.16b2328aesmc v24.16b,v24.16b2329aese v25.16b,v16.16b2330aesmc v25.16b,v25.16b2331ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]23322333aese v0.16b,v17.16b2334aesmc v0.16b,v0.16b2335aese v1.16b,v17.16b2336aesmc v1.16b,v1.16b2337aese v18.16b,v17.16b2338aesmc v18.16b,v18.16b2339aese v24.16b,v17.16b2340aesmc v24.16b,v24.16b2341aese v25.16b,v17.16b2342aesmc v25.16b,v25.16b2343ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]23442345aese v0.16b,v20.16b2346aesmc v0.16b,v0.16b2347add w9,w8,#12348add w10,w8,#22349aese v1.16b,v20.16b2350aesmc v1.16b,v1.16b2351add w12,w8,#32352add w13,w8,#42353aese v18.16b,v20.16b2354aesmc v18.16b,v18.16b2355add w14,w8,#52356rev w9,w92357aese v24.16b,v20.16b2358aesmc v24.16b,v24.16b2359rev w10,w102360rev w12,w122361aese v25.16b,v20.16b2362aesmc v25.16b,v25.16b2363rev w13,w132364rev w14,w1423652366aese v0.16b,v21.16b2367aesmc v0.16b,v0.16b2368aese v1.16b,v21.16b2369aesmc v1.16b,v1.16b2370aese v18.16b,v21.16b2371aesmc v18.16b,v18.16b2372aese v24.16b,v21.16b2373aesmc v24.16b,v24.16b2374aese v25.16b,v21.16b2375aesmc v25.16b,v25.16b23762377aese v0.16b,v22.16b2378aesmc v0.16b,v0.16b2379ld1 {v2.16b},[x0],#162380aese v1.16b,v22.16b2381aesmc v1.16b,v1.16b2382ld1 {v3.16b},[x0],#162383aese v18.16b,v22.16b2384aesmc v18.16b,v18.16b2385ld1 {v19.16b},[x0],#162386aese v24.16b,v22.16b2387aesmc v24.16b,v24.16b2388ld1 {v26.16b},[x0],#162389aese v25.16b,v22.16b2390aesmc v25.16b,v25.16b2391ld1 {v27.16b},[x0],#1623922393aese v0.16b,v23.16b2394eor v2.16b,v2.16b,v7.16b2395aese v1.16b,v23.16b2396eor v3.16b,v3.16b,v7.16b2397aese v18.16b,v23.16b2398eor v19.16b,v19.16b,v7.16b2399aese v24.16b,v23.16b2400eor v26.16b,v26.16b,v7.16b2401aese v25.16b,v23.16b2402eor v27.16b,v27.16b,v7.16b24032404eor v2.16b,v2.16b,v0.16b2405orr v0.16b,v6.16b,v6.16b2406eor v3.16b,v3.16b,v1.16b2407orr v1.16b,v6.16b,v6.16b2408eor v19.16b,v19.16b,v18.16b2409orr v18.16b,v6.16b,v6.16b2410eor v26.16b,v26.16b,v24.16b2411orr v24.16b,v6.16b,v6.16b2412eor v27.16b,v27.16b,v25.16b2413orr v25.16b,v6.16b,v6.16b24142415st1 {v2.16b},[x1],#162416mov v0.s[3],w92417st1 {v3.16b},[x1],#162418mov v1.s[3],w102419st1 {v19.16b},[x1],#162420mov v18.s[3],w122421st1 {v26.16b},[x1],#162422mov v24.s[3],w132423st1 {v27.16b},[x1],#162424mov v25.s[3],w1424252426mov w6,w52427cbz x2,.Lctr32_done24282429add w8,w8,#52430subs x2,x2,#52431b.hs .Loop5x_ctr3224322433add x2,x2,#52434sub w8,w8,#524352436cmp x2,#22437mov x12,#162438csel x12,xzr,x12,lo2439b.ls .Lctr32_tail24402441sub x2,x2,#3 // bias2442add w8,w8,#32443b .Loop3x_ctr3224442445.align 42446.Loop3x_ctr32:2447aese v0.16b,v16.16b2448aesmc v0.16b,v0.16b2449aese v1.16b,v16.16b2450aesmc v1.16b,v1.16b2451aese v18.16b,v16.16b2452aesmc v18.16b,v18.16b2453ld1 {v16.4s},[x7],#162454subs w6,w6,#22455aese v0.16b,v17.16b2456aesmc v0.16b,v0.16b2457aese v1.16b,v17.16b2458aesmc v1.16b,v1.16b2459aese v18.16b,v17.16b2460aesmc v18.16b,v18.16b2461ld1 {v17.4s},[x7],#162462b.gt .Loop3x_ctr3224632464aese v0.16b,v16.16b2465aesmc v4.16b,v0.16b2466aese v1.16b,v16.16b2467aesmc v5.16b,v1.16b2468ld1 {v2.16b},[x0],#162469orr v0.16b,v6.16b,v6.16b2470aese v18.16b,v16.16b2471aesmc v18.16b,v18.16b2472ld1 {v3.16b},[x0],#162473orr v1.16b,v6.16b,v6.16b2474aese v4.16b,v17.16b2475aesmc v4.16b,v4.16b2476aese v5.16b,v17.16b2477aesmc v5.16b,v5.16b2478ld1 {v19.16b},[x0],#162479mov x7,x32480aese v18.16b,v17.16b2481aesmc v17.16b,v18.16b2482orr v18.16b,v6.16b,v6.16b2483add w9,w8,#12484aese v4.16b,v20.16b2485aesmc v4.16b,v4.16b2486aese v5.16b,v20.16b2487aesmc v5.16b,v5.16b2488eor v2.16b,v2.16b,v7.16b2489add w10,w8,#22490aese v17.16b,v20.16b2491aesmc v17.16b,v17.16b2492eor v3.16b,v3.16b,v7.16b2493add w8,w8,#32494aese v4.16b,v21.16b2495aesmc v4.16b,v4.16b2496aese v5.16b,v21.16b2497aesmc v5.16b,v5.16b2498eor v19.16b,v19.16b,v7.16b2499rev w9,w92500aese v17.16b,v21.16b2501aesmc v17.16b,v17.16b2502mov v0.s[3], w92503rev w10,w102504aese v4.16b,v22.16b2505aesmc v4.16b,v4.16b2506aese v5.16b,v22.16b2507aesmc v5.16b,v5.16b2508mov v1.s[3], w102509rev w12,w82510aese v17.16b,v22.16b2511aesmc v17.16b,v17.16b2512mov v18.s[3], w122513subs x2,x2,#32514aese v4.16b,v23.16b2515aese v5.16b,v23.16b2516aese v17.16b,v23.16b25172518eor v2.16b,v2.16b,v4.16b2519ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]2520st1 {v2.16b},[x1],#162521eor v3.16b,v3.16b,v5.16b2522mov w6,w52523st1 {v3.16b},[x1],#162524eor v19.16b,v19.16b,v17.16b2525ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]2526st1 {v19.16b},[x1],#162527b.hs .Loop3x_ctr3225282529adds x2,x2,#32530b.eq .Lctr32_done2531cmp x2,#12532mov x12,#162533csel x12,xzr,x12,eq25342535.Lctr32_tail:2536aese v0.16b,v16.16b2537aesmc v0.16b,v0.16b2538aese v1.16b,v16.16b2539aesmc v1.16b,v1.16b2540ld1 {v16.4s},[x7],#162541subs w6,w6,#22542aese v0.16b,v17.16b2543aesmc v0.16b,v0.16b2544aese v1.16b,v17.16b2545aesmc v1.16b,v1.16b2546ld1 {v17.4s},[x7],#162547b.gt .Lctr32_tail25482549aese v0.16b,v16.16b2550aesmc v0.16b,v0.16b2551aese v1.16b,v16.16b2552aesmc v1.16b,v1.16b2553aese v0.16b,v17.16b2554aesmc v0.16b,v0.16b2555aese v1.16b,v17.16b2556aesmc v1.16b,v1.16b2557ld1 {v2.16b},[x0],x122558aese v0.16b,v20.16b2559aesmc v0.16b,v0.16b2560aese v1.16b,v20.16b2561aesmc v1.16b,v1.16b2562ld1 {v3.16b},[x0]2563aese v0.16b,v21.16b2564aesmc v0.16b,v0.16b2565aese v1.16b,v21.16b2566aesmc v1.16b,v1.16b2567eor v2.16b,v2.16b,v7.16b2568aese v0.16b,v22.16b2569aesmc v0.16b,v0.16b2570aese v1.16b,v22.16b2571aesmc v1.16b,v1.16b2572eor v3.16b,v3.16b,v7.16b2573aese v0.16b,v23.16b2574aese v1.16b,v23.16b25752576cmp x2,#12577eor v2.16b,v2.16b,v0.16b2578eor v3.16b,v3.16b,v1.16b2579st1 {v2.16b},[x1],#162580b.eq .Lctr32_done2581st1 {v3.16b},[x1]25822583.Lctr32_done:2584ldr x29,[sp],#162585ret2586.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks2587.globl aes_v8_xts_encrypt2588.type aes_v8_xts_encrypt,%function2589.align 52590aes_v8_xts_encrypt:2591AARCH64_VALID_CALL_TARGET2592cmp x2,#162593// Original input data size bigger than 16, jump to big size processing.2594b.ne .Lxts_enc_big_size2595// Encrypt the iv with key2, as the first XEX iv.2596ldr w6,[x4,#240]2597ld1 {v0.4s},[x4],#162598ld1 {v6.16b},[x5]2599sub w6,w6,#22600ld1 {v1.4s},[x4],#1626012602.Loop_enc_iv_enc:2603aese v6.16b,v0.16b2604aesmc v6.16b,v6.16b2605ld1 {v0.4s},[x4],#162606subs w6,w6,#22607aese v6.16b,v1.16b2608aesmc v6.16b,v6.16b2609ld1 {v1.4s},[x4],#162610b.gt .Loop_enc_iv_enc26112612aese v6.16b,v0.16b2613aesmc v6.16b,v6.16b2614ld1 {v0.4s},[x4]2615aese v6.16b,v1.16b2616eor v6.16b,v6.16b,v0.16b26172618ld1 {v0.16b},[x0]2619eor v0.16b,v6.16b,v0.16b26202621ldr w6,[x3,#240]2622ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule...26232624aese v0.16b,v28.16b2625aesmc v0.16b,v0.16b2626ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule...2627aese v0.16b,v29.16b2628aesmc v0.16b,v0.16b2629subs w6,w6,#10 // if rounds==10, jump to aes-128-xts processing2630b.eq .Lxts_128_enc2631.Lxts_enc_round_loop:2632aese v0.16b,v16.16b2633aesmc v0.16b,v0.16b2634ld1 {v16.4s},[x3],#16 // load key schedule...2635aese v0.16b,v17.16b2636aesmc v0.16b,v0.16b2637ld1 {v17.4s},[x3],#16 // load key schedule...2638subs w6,w6,#2 // bias2639b.gt .Lxts_enc_round_loop2640.Lxts_128_enc:2641ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule...2642aese v0.16b,v16.16b2643aesmc v0.16b,v0.16b2644aese v0.16b,v17.16b2645aesmc v0.16b,v0.16b2646ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule...2647aese v0.16b,v18.16b2648aesmc v0.16b,v0.16b2649aese v0.16b,v19.16b2650aesmc v0.16b,v0.16b2651ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule...2652aese v0.16b,v20.16b2653aesmc v0.16b,v0.16b2654aese v0.16b,v21.16b2655aesmc v0.16b,v0.16b2656ld1 {v7.4s},[x3]2657aese v0.16b,v22.16b2658aesmc v0.16b,v0.16b2659aese v0.16b,v23.16b2660eor v0.16b,v0.16b,v7.16b2661eor v0.16b,v0.16b,v6.16b2662st1 {v0.16b},[x1]2663b .Lxts_enc_final_abort26642665.align 42666.Lxts_enc_big_size:2667stp x19,x20,[sp,#-64]!2668stp x21,x22,[sp,#48]2669stp d8,d9,[sp,#32]2670stp d10,d11,[sp,#16]26712672// tailcnt store the tail value of length%16.2673and x21,x2,#0xf2674and x2,x2,#-162675subs x2,x2,#162676mov x8,#162677b.lo .Lxts_abort2678csel x8,xzr,x8,eq26792680// Firstly, encrypt the iv with key2, as the first iv of XEX.2681ldr w6,[x4,#240]2682ld1 {v0.4s},[x4],#162683ld1 {v6.16b},[x5]2684sub w6,w6,#22685ld1 {v1.4s},[x4],#1626862687.Loop_iv_enc:2688aese v6.16b,v0.16b2689aesmc v6.16b,v6.16b2690ld1 {v0.4s},[x4],#162691subs w6,w6,#22692aese v6.16b,v1.16b2693aesmc v6.16b,v6.16b2694ld1 {v1.4s},[x4],#162695b.gt .Loop_iv_enc26962697aese v6.16b,v0.16b2698aesmc v6.16b,v6.16b2699ld1 {v0.4s},[x4]2700aese v6.16b,v1.16b2701eor v6.16b,v6.16b,v0.16b27022703// The iv for second block2704// x9- iv(low), x10 - iv(high)2705// the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b2706fmov x9,d62707fmov x10,v6.d[1]2708mov w19,#0x872709extr x22,x10,x10,#322710extr x10,x10,x9,#632711and w11,w19,w22,asr#312712eor x9,x11,x9,lsl#12713fmov d8,x92714fmov v8.d[1],x1027152716ldr w5,[x3,#240] // next starting point2717ld1 {v0.16b},[x0],x827182719ld1 {v16.4s,v17.4s},[x3] // load key schedule...2720sub w5,w5,#62721add x7,x3,x5,lsl#4 // pointer to last 7 round keys2722sub w5,w5,#22723ld1 {v18.4s,v19.4s},[x7],#322724ld1 {v20.4s,v21.4s},[x7],#322725ld1 {v22.4s,v23.4s},[x7],#322726ld1 {v7.4s},[x7]27272728add x7,x3,#322729mov w6,w527302731// Encryption2732.Lxts_enc:2733ld1 {v24.16b},[x0],#162734subs x2,x2,#32 // bias2735add w6,w5,#22736orr v3.16b,v0.16b,v0.16b2737orr v1.16b,v0.16b,v0.16b2738orr v28.16b,v0.16b,v0.16b2739orr v27.16b,v24.16b,v24.16b2740orr v29.16b,v24.16b,v24.16b2741b.lo .Lxts_inner_enc_tail2742eor v0.16b,v0.16b,v6.16b // before encryption, xor with iv2743eor v24.16b,v24.16b,v8.16b27442745// The iv for third block2746extr x22,x10,x10,#322747extr x10,x10,x9,#632748and w11,w19,w22,asr#312749eor x9,x11,x9,lsl#12750fmov d9,x92751fmov v9.d[1],x10275227532754orr v1.16b,v24.16b,v24.16b2755ld1 {v24.16b},[x0],#162756orr v2.16b,v0.16b,v0.16b2757orr v3.16b,v1.16b,v1.16b2758eor v27.16b,v24.16b,v9.16b // the third block2759eor v24.16b,v24.16b,v9.16b2760cmp x2,#322761b.lo .Lxts_outer_enc_tail27622763// The iv for fourth block2764extr x22,x10,x10,#322765extr x10,x10,x9,#632766and w11,w19,w22,asr#312767eor x9,x11,x9,lsl#12768fmov d10,x92769fmov v10.d[1],x1027702771ld1 {v25.16b},[x0],#162772// The iv for fifth block2773extr x22,x10,x10,#322774extr x10,x10,x9,#632775and w11,w19,w22,asr#312776eor x9,x11,x9,lsl#12777fmov d11,x92778fmov v11.d[1],x1027792780ld1 {v26.16b},[x0],#162781eor v25.16b,v25.16b,v10.16b // the fourth block2782eor v26.16b,v26.16b,v11.16b2783sub x2,x2,#32 // bias2784mov w6,w52785b .Loop5x_xts_enc27862787.align 42788.Loop5x_xts_enc:2789aese v0.16b,v16.16b2790aesmc v0.16b,v0.16b2791aese v1.16b,v16.16b2792aesmc v1.16b,v1.16b2793aese v24.16b,v16.16b2794aesmc v24.16b,v24.16b2795aese v25.16b,v16.16b2796aesmc v25.16b,v25.16b2797aese v26.16b,v16.16b2798aesmc v26.16b,v26.16b2799ld1 {v16.4s},[x7],#162800subs w6,w6,#22801aese v0.16b,v17.16b2802aesmc v0.16b,v0.16b2803aese v1.16b,v17.16b2804aesmc v1.16b,v1.16b2805aese v24.16b,v17.16b2806aesmc v24.16b,v24.16b2807aese v25.16b,v17.16b2808aesmc v25.16b,v25.16b2809aese v26.16b,v17.16b2810aesmc v26.16b,v26.16b2811ld1 {v17.4s},[x7],#162812b.gt .Loop5x_xts_enc28132814aese v0.16b,v16.16b2815aesmc v0.16b,v0.16b2816aese v1.16b,v16.16b2817aesmc v1.16b,v1.16b2818aese v24.16b,v16.16b2819aesmc v24.16b,v24.16b2820aese v25.16b,v16.16b2821aesmc v25.16b,v25.16b2822aese v26.16b,v16.16b2823aesmc v26.16b,v26.16b2824subs x2,x2,#0x50 // because .Lxts_enc_tail4x28252826aese v0.16b,v17.16b2827aesmc v0.16b,v0.16b2828aese v1.16b,v17.16b2829aesmc v1.16b,v1.16b2830aese v24.16b,v17.16b2831aesmc v24.16b,v24.16b2832aese v25.16b,v17.16b2833aesmc v25.16b,v25.16b2834aese v26.16b,v17.16b2835aesmc v26.16b,v26.16b2836csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo2837mov x7,x328382839aese v0.16b,v18.16b2840aesmc v0.16b,v0.16b2841aese v1.16b,v18.16b2842aesmc v1.16b,v1.16b2843aese v24.16b,v18.16b2844aesmc v24.16b,v24.16b2845aese v25.16b,v18.16b2846aesmc v25.16b,v25.16b2847aese v26.16b,v18.16b2848aesmc v26.16b,v26.16b2849add x0,x0,x6 // x0 is adjusted in such way that2850// at exit from the loop v1.16b-v26.16b2851// are loaded with last "words"2852add x6,x2,#0x60 // because .Lxts_enc_tail4x28532854aese v0.16b,v19.16b2855aesmc v0.16b,v0.16b2856aese v1.16b,v19.16b2857aesmc v1.16b,v1.16b2858aese v24.16b,v19.16b2859aesmc v24.16b,v24.16b2860aese v25.16b,v19.16b2861aesmc v25.16b,v25.16b2862aese v26.16b,v19.16b2863aesmc v26.16b,v26.16b28642865aese v0.16b,v20.16b2866aesmc v0.16b,v0.16b2867aese v1.16b,v20.16b2868aesmc v1.16b,v1.16b2869aese v24.16b,v20.16b2870aesmc v24.16b,v24.16b2871aese v25.16b,v20.16b2872aesmc v25.16b,v25.16b2873aese v26.16b,v20.16b2874aesmc v26.16b,v26.16b28752876aese v0.16b,v21.16b2877aesmc v0.16b,v0.16b2878aese v1.16b,v21.16b2879aesmc v1.16b,v1.16b2880aese v24.16b,v21.16b2881aesmc v24.16b,v24.16b2882aese v25.16b,v21.16b2883aesmc v25.16b,v25.16b2884aese v26.16b,v21.16b2885aesmc v26.16b,v26.16b28862887aese v0.16b,v22.16b2888aesmc v0.16b,v0.16b2889aese v1.16b,v22.16b2890aesmc v1.16b,v1.16b2891aese v24.16b,v22.16b2892aesmc v24.16b,v24.16b2893aese v25.16b,v22.16b2894aesmc v25.16b,v25.16b2895aese v26.16b,v22.16b2896aesmc v26.16b,v26.16b28972898eor v4.16b,v7.16b,v6.16b2899aese v0.16b,v23.16b2900// The iv for first block of one iteration2901extr x22,x10,x10,#322902extr x10,x10,x9,#632903and w11,w19,w22,asr#312904eor x9,x11,x9,lsl#12905fmov d6,x92906fmov v6.d[1],x102907eor v5.16b,v7.16b,v8.16b2908ld1 {v2.16b},[x0],#162909aese v1.16b,v23.16b2910// The iv for second block2911extr x22,x10,x10,#322912extr x10,x10,x9,#632913and w11,w19,w22,asr#312914eor x9,x11,x9,lsl#12915fmov d8,x92916fmov v8.d[1],x102917eor v17.16b,v7.16b,v9.16b2918ld1 {v3.16b},[x0],#162919aese v24.16b,v23.16b2920// The iv for third block2921extr x22,x10,x10,#322922extr x10,x10,x9,#632923and w11,w19,w22,asr#312924eor x9,x11,x9,lsl#12925fmov d9,x92926fmov v9.d[1],x102927eor v30.16b,v7.16b,v10.16b2928ld1 {v27.16b},[x0],#162929aese v25.16b,v23.16b2930// The iv for fourth block2931extr x22,x10,x10,#322932extr x10,x10,x9,#632933and w11,w19,w22,asr#312934eor x9,x11,x9,lsl#12935fmov d10,x92936fmov v10.d[1],x102937eor v31.16b,v7.16b,v11.16b2938ld1 {v28.16b},[x0],#162939aese v26.16b,v23.16b29402941// The iv for fifth block2942extr x22,x10,x10,#322943extr x10,x10,x9,#632944and w11,w19,w22,asr #312945eor x9,x11,x9,lsl #12946fmov d11,x92947fmov v11.d[1],x1029482949ld1 {v29.16b},[x0],#162950cbz x6,.Lxts_enc_tail4x2951ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]2952eor v4.16b,v4.16b,v0.16b2953eor v0.16b,v2.16b,v6.16b2954eor v5.16b,v5.16b,v1.16b2955eor v1.16b,v3.16b,v8.16b2956eor v17.16b,v17.16b,v24.16b2957eor v24.16b,v27.16b,v9.16b2958eor v30.16b,v30.16b,v25.16b2959eor v25.16b,v28.16b,v10.16b2960eor v31.16b,v31.16b,v26.16b2961st1 {v4.16b},[x1],#162962eor v26.16b,v29.16b,v11.16b2963st1 {v5.16b},[x1],#162964mov w6,w52965st1 {v17.16b},[x1],#162966ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]2967st1 {v30.16b},[x1],#162968st1 {v31.16b},[x1],#162969b.hs .Loop5x_xts_enc297029712972// If left 4 blocks, borrow the five block's processing.2973cmn x2,#0x102974b.ne .Loop5x_enc_after2975orr v11.16b,v10.16b,v10.16b2976orr v10.16b,v9.16b,v9.16b2977orr v9.16b,v8.16b,v8.16b2978orr v8.16b,v6.16b,v6.16b2979fmov x9,d112980fmov x10,v11.d[1]2981eor v0.16b,v6.16b,v2.16b2982eor v1.16b,v8.16b,v3.16b2983eor v24.16b,v27.16b,v9.16b2984eor v25.16b,v28.16b,v10.16b2985eor v26.16b,v29.16b,v11.16b2986b.eq .Loop5x_xts_enc29872988.Loop5x_enc_after:2989add x2,x2,#0x502990cbz x2,.Lxts_enc_done29912992add w6,w5,#22993subs x2,x2,#0x302994b.lo .Lxts_inner_enc_tail29952996eor v0.16b,v6.16b,v27.16b2997eor v1.16b,v8.16b,v28.16b2998eor v24.16b,v29.16b,v9.16b2999b .Lxts_outer_enc_tail30003001.align 43002.Lxts_enc_tail4x:3003add x0,x0,#163004eor v5.16b,v1.16b,v5.16b3005st1 {v5.16b},[x1],#163006eor v17.16b,v24.16b,v17.16b3007st1 {v17.16b},[x1],#163008eor v30.16b,v25.16b,v30.16b3009eor v31.16b,v26.16b,v31.16b3010st1 {v30.16b,v31.16b},[x1],#3230113012b .Lxts_enc_done3013.align 43014.Lxts_outer_enc_tail:3015aese v0.16b,v16.16b3016aesmc v0.16b,v0.16b3017aese v1.16b,v16.16b3018aesmc v1.16b,v1.16b3019aese v24.16b,v16.16b3020aesmc v24.16b,v24.16b3021ld1 {v16.4s},[x7],#163022subs w6,w6,#23023aese v0.16b,v17.16b3024aesmc v0.16b,v0.16b3025aese v1.16b,v17.16b3026aesmc v1.16b,v1.16b3027aese v24.16b,v17.16b3028aesmc v24.16b,v24.16b3029ld1 {v17.4s},[x7],#163030b.gt .Lxts_outer_enc_tail30313032aese v0.16b,v16.16b3033aesmc v0.16b,v0.16b3034aese v1.16b,v16.16b3035aesmc v1.16b,v1.16b3036aese v24.16b,v16.16b3037aesmc v24.16b,v24.16b3038eor v4.16b,v6.16b,v7.16b3039subs x2,x2,#0x303040// The iv for first block3041fmov x9,d93042fmov x10,v9.d[1]3043//mov w19,#0x873044extr x22,x10,x10,#323045extr x10,x10,x9,#633046and w11,w19,w22,asr#313047eor x9,x11,x9,lsl#13048fmov d6,x93049fmov v6.d[1],x103050eor v5.16b,v8.16b,v7.16b3051csel x6,x2,x6,lo // x6, w6, is zero at this point3052aese v0.16b,v17.16b3053aesmc v0.16b,v0.16b3054aese v1.16b,v17.16b3055aesmc v1.16b,v1.16b3056aese v24.16b,v17.16b3057aesmc v24.16b,v24.16b3058eor v17.16b,v9.16b,v7.16b30593060add x6,x6,#0x203061add x0,x0,x63062mov x7,x330633064aese v0.16b,v20.16b3065aesmc v0.16b,v0.16b3066aese v1.16b,v20.16b3067aesmc v1.16b,v1.16b3068aese v24.16b,v20.16b3069aesmc v24.16b,v24.16b3070aese v0.16b,v21.16b3071aesmc v0.16b,v0.16b3072aese v1.16b,v21.16b3073aesmc v1.16b,v1.16b3074aese v24.16b,v21.16b3075aesmc v24.16b,v24.16b3076aese v0.16b,v22.16b3077aesmc v0.16b,v0.16b3078aese v1.16b,v22.16b3079aesmc v1.16b,v1.16b3080aese v24.16b,v22.16b3081aesmc v24.16b,v24.16b3082aese v0.16b,v23.16b3083aese v1.16b,v23.16b3084aese v24.16b,v23.16b3085ld1 {v27.16b},[x0],#163086add w6,w5,#23087ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]3088eor v4.16b,v4.16b,v0.16b3089eor v5.16b,v5.16b,v1.16b3090eor v24.16b,v24.16b,v17.16b3091ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]3092st1 {v4.16b},[x1],#163093st1 {v5.16b},[x1],#163094st1 {v24.16b},[x1],#163095cmn x2,#0x303096b.eq .Lxts_enc_done3097.Lxts_encxor_one:3098orr v28.16b,v3.16b,v3.16b3099orr v29.16b,v27.16b,v27.16b3100nop31013102.Lxts_inner_enc_tail:3103cmn x2,#0x103104eor v1.16b,v28.16b,v6.16b3105eor v24.16b,v29.16b,v8.16b3106b.eq .Lxts_enc_tail_loop3107eor v24.16b,v29.16b,v6.16b3108.Lxts_enc_tail_loop:3109aese v1.16b,v16.16b3110aesmc v1.16b,v1.16b3111aese v24.16b,v16.16b3112aesmc v24.16b,v24.16b3113ld1 {v16.4s},[x7],#163114subs w6,w6,#23115aese v1.16b,v17.16b3116aesmc v1.16b,v1.16b3117aese v24.16b,v17.16b3118aesmc v24.16b,v24.16b3119ld1 {v17.4s},[x7],#163120b.gt .Lxts_enc_tail_loop31213122aese v1.16b,v16.16b3123aesmc v1.16b,v1.16b3124aese v24.16b,v16.16b3125aesmc v24.16b,v24.16b3126aese v1.16b,v17.16b3127aesmc v1.16b,v1.16b3128aese v24.16b,v17.16b3129aesmc v24.16b,v24.16b3130aese v1.16b,v20.16b3131aesmc v1.16b,v1.16b3132aese v24.16b,v20.16b3133aesmc v24.16b,v24.16b3134cmn x2,#0x203135aese v1.16b,v21.16b3136aesmc v1.16b,v1.16b3137aese v24.16b,v21.16b3138aesmc v24.16b,v24.16b3139eor v5.16b,v6.16b,v7.16b3140aese v1.16b,v22.16b3141aesmc v1.16b,v1.16b3142aese v24.16b,v22.16b3143aesmc v24.16b,v24.16b3144eor v17.16b,v8.16b,v7.16b3145aese v1.16b,v23.16b3146aese v24.16b,v23.16b3147b.eq .Lxts_enc_one3148eor v5.16b,v5.16b,v1.16b3149st1 {v5.16b},[x1],#163150eor v17.16b,v17.16b,v24.16b3151orr v6.16b,v8.16b,v8.16b3152st1 {v17.16b},[x1],#163153fmov x9,d83154fmov x10,v8.d[1]3155mov w19,#0x873156extr x22,x10,x10,#323157extr x10,x10,x9,#633158and w11,w19,w22,asr #313159eor x9,x11,x9,lsl #13160fmov d6,x93161fmov v6.d[1],x103162b .Lxts_enc_done31633164.Lxts_enc_one:3165eor v5.16b,v5.16b,v24.16b3166orr v6.16b,v6.16b,v6.16b3167st1 {v5.16b},[x1],#163168fmov x9,d63169fmov x10,v6.d[1]3170mov w19,#0x873171extr x22,x10,x10,#323172extr x10,x10,x9,#633173and w11,w19,w22,asr #313174eor x9,x11,x9,lsl #13175fmov d6,x93176fmov v6.d[1],x103177b .Lxts_enc_done3178.align 53179.Lxts_enc_done:3180// Process the tail block with cipher stealing.3181tst x21,#0xf3182b.eq .Lxts_abort31833184mov x20,x03185mov x13,x13186sub x1,x1,#163187.composite_enc_loop:3188subs x21,x21,#13189ldrb w15,[x1,x21]3190ldrb w14,[x20,x21]3191strb w15,[x13,x21]3192strb w14,[x1,x21]3193b.gt .composite_enc_loop3194.Lxts_enc_load_done:3195ld1 {v26.16b},[x1]3196eor v26.16b,v26.16b,v6.16b31973198// Encrypt the composite block to get the last second encrypted text block3199ldr w6,[x3,#240] // load key schedule...3200ld1 {v0.4s},[x3],#163201sub w6,w6,#23202ld1 {v1.4s},[x3],#16 // load key schedule...3203.Loop_final_enc:3204aese v26.16b,v0.16b3205aesmc v26.16b,v26.16b3206ld1 {v0.4s},[x3],#163207subs w6,w6,#23208aese v26.16b,v1.16b3209aesmc v26.16b,v26.16b3210ld1 {v1.4s},[x3],#163211b.gt .Loop_final_enc32123213aese v26.16b,v0.16b3214aesmc v26.16b,v26.16b3215ld1 {v0.4s},[x3]3216aese v26.16b,v1.16b3217eor v26.16b,v26.16b,v0.16b3218eor v26.16b,v26.16b,v6.16b3219st1 {v26.16b},[x1]32203221.Lxts_abort:3222ldp x21,x22,[sp,#48]3223ldp d8,d9,[sp,#32]3224ldp d10,d11,[sp,#16]3225ldp x19,x20,[sp],#643226.Lxts_enc_final_abort:3227ret3228.size aes_v8_xts_encrypt,.-aes_v8_xts_encrypt3229.globl aes_v8_xts_decrypt3230.type aes_v8_xts_decrypt,%function3231.align 53232aes_v8_xts_decrypt:3233AARCH64_VALID_CALL_TARGET3234cmp x2,#163235// Original input data size bigger than 16, jump to big size processing.3236b.ne .Lxts_dec_big_size3237// Encrypt the iv with key2, as the first XEX iv.3238ldr w6,[x4,#240]3239ld1 {v0.4s},[x4],#163240ld1 {v6.16b},[x5]3241sub w6,w6,#23242ld1 {v1.4s},[x4],#1632433244.Loop_dec_small_iv_enc:3245aese v6.16b,v0.16b3246aesmc v6.16b,v6.16b3247ld1 {v0.4s},[x4],#163248subs w6,w6,#23249aese v6.16b,v1.16b3250aesmc v6.16b,v6.16b3251ld1 {v1.4s},[x4],#163252b.gt .Loop_dec_small_iv_enc32533254aese v6.16b,v0.16b3255aesmc v6.16b,v6.16b3256ld1 {v0.4s},[x4]3257aese v6.16b,v1.16b3258eor v6.16b,v6.16b,v0.16b32593260ld1 {v0.16b},[x0]3261eor v0.16b,v6.16b,v0.16b32623263ldr w6,[x3,#240]3264ld1 {v28.4s,v29.4s},[x3],#32 // load key schedule...32653266aesd v0.16b,v28.16b3267aesimc v0.16b,v0.16b3268ld1 {v16.4s,v17.4s},[x3],#32 // load key schedule...3269aesd v0.16b,v29.16b3270aesimc v0.16b,v0.16b3271subs w6,w6,#10 // bias3272b.eq .Lxts_128_dec3273.Lxts_dec_round_loop:3274aesd v0.16b,v16.16b3275aesimc v0.16b,v0.16b3276ld1 {v16.4s},[x3],#16 // load key schedule...3277aesd v0.16b,v17.16b3278aesimc v0.16b,v0.16b3279ld1 {v17.4s},[x3],#16 // load key schedule...3280subs w6,w6,#2 // bias3281b.gt .Lxts_dec_round_loop3282.Lxts_128_dec:3283ld1 {v18.4s,v19.4s},[x3],#32 // load key schedule...3284aesd v0.16b,v16.16b3285aesimc v0.16b,v0.16b3286aesd v0.16b,v17.16b3287aesimc v0.16b,v0.16b3288ld1 {v20.4s,v21.4s},[x3],#32 // load key schedule...3289aesd v0.16b,v18.16b3290aesimc v0.16b,v0.16b3291aesd v0.16b,v19.16b3292aesimc v0.16b,v0.16b3293ld1 {v22.4s,v23.4s},[x3],#32 // load key schedule...3294aesd v0.16b,v20.16b3295aesimc v0.16b,v0.16b3296aesd v0.16b,v21.16b3297aesimc v0.16b,v0.16b3298ld1 {v7.4s},[x3]3299aesd v0.16b,v22.16b3300aesimc v0.16b,v0.16b3301aesd v0.16b,v23.16b3302eor v0.16b,v0.16b,v7.16b3303eor v0.16b,v6.16b,v0.16b3304st1 {v0.16b},[x1]3305b .Lxts_dec_final_abort3306.Lxts_dec_big_size:3307stp x19,x20,[sp,#-64]!3308stp x21,x22,[sp,#48]3309stp d8,d9,[sp,#32]3310stp d10,d11,[sp,#16]33113312and x21,x2,#0xf3313and x2,x2,#-163314subs x2,x2,#163315mov x8,#163316b.lo .Lxts_dec_abort33173318// Encrypt the iv with key2, as the first XEX iv3319ldr w6,[x4,#240]3320ld1 {v0.4s},[x4],#163321ld1 {v6.16b},[x5]3322sub w6,w6,#23323ld1 {v1.4s},[x4],#1633243325.Loop_dec_iv_enc:3326aese v6.16b,v0.16b3327aesmc v6.16b,v6.16b3328ld1 {v0.4s},[x4],#163329subs w6,w6,#23330aese v6.16b,v1.16b3331aesmc v6.16b,v6.16b3332ld1 {v1.4s},[x4],#163333b.gt .Loop_dec_iv_enc33343335aese v6.16b,v0.16b3336aesmc v6.16b,v6.16b3337ld1 {v0.4s},[x4]3338aese v6.16b,v1.16b3339eor v6.16b,v6.16b,v0.16b33403341// The iv for second block3342// x9- iv(low), x10 - iv(high)3343// the five ivs stored into, v6.16b,v8.16b,v9.16b,v10.16b,v11.16b3344fmov x9,d63345fmov x10,v6.d[1]3346mov w19,#0x873347extr x22,x10,x10,#323348extr x10,x10,x9,#633349and w11,w19,w22,asr #313350eor x9,x11,x9,lsl #13351fmov d8,x93352fmov v8.d[1],x1033533354ldr w5,[x3,#240] // load rounds number33553356// The iv for third block3357extr x22,x10,x10,#323358extr x10,x10,x9,#633359and w11,w19,w22,asr #313360eor x9,x11,x9,lsl #13361fmov d9,x93362fmov v9.d[1],x1033633364ld1 {v16.4s,v17.4s},[x3] // load key schedule...3365sub w5,w5,#63366add x7,x3,x5,lsl#4 // pointer to last 7 round keys3367sub w5,w5,#23368ld1 {v18.4s,v19.4s},[x7],#32 // load key schedule...3369ld1 {v20.4s,v21.4s},[x7],#323370ld1 {v22.4s,v23.4s},[x7],#323371ld1 {v7.4s},[x7]33723373// The iv for fourth block3374extr x22,x10,x10,#323375extr x10,x10,x9,#633376and w11,w19,w22,asr #313377eor x9,x11,x9,lsl #13378fmov d10,x93379fmov v10.d[1],x1033803381add x7,x3,#323382mov w6,w53383b .Lxts_dec33843385// Decryption3386.align 53387.Lxts_dec:3388tst x21,#0xf3389b.eq .Lxts_dec_begin3390subs x2,x2,#163391csel x8,xzr,x8,eq3392ld1 {v0.16b},[x0],#163393b.lo .Lxts_done3394sub x0,x0,#163395.Lxts_dec_begin:3396ld1 {v0.16b},[x0],x83397subs x2,x2,#32 // bias3398add w6,w5,#23399orr v3.16b,v0.16b,v0.16b3400orr v1.16b,v0.16b,v0.16b3401orr v28.16b,v0.16b,v0.16b3402ld1 {v24.16b},[x0],#163403orr v27.16b,v24.16b,v24.16b3404orr v29.16b,v24.16b,v24.16b3405b.lo .Lxts_inner_dec_tail3406eor v0.16b,v0.16b,v6.16b // before decryt, xor with iv3407eor v24.16b,v24.16b,v8.16b34083409orr v1.16b,v24.16b,v24.16b3410ld1 {v24.16b},[x0],#163411orr v2.16b,v0.16b,v0.16b3412orr v3.16b,v1.16b,v1.16b3413eor v27.16b,v24.16b,v9.16b // third block xox with third iv3414eor v24.16b,v24.16b,v9.16b3415cmp x2,#323416b.lo .Lxts_outer_dec_tail34173418ld1 {v25.16b},[x0],#1634193420// The iv for fifth block3421extr x22,x10,x10,#323422extr x10,x10,x9,#633423and w11,w19,w22,asr #313424eor x9,x11,x9,lsl #13425fmov d11,x93426fmov v11.d[1],x1034273428ld1 {v26.16b},[x0],#163429eor v25.16b,v25.16b,v10.16b // the fourth block3430eor v26.16b,v26.16b,v11.16b3431sub x2,x2,#32 // bias3432mov w6,w53433b .Loop5x_xts_dec34343435.align 43436.Loop5x_xts_dec:3437aesd v0.16b,v16.16b3438aesimc v0.16b,v0.16b3439aesd v1.16b,v16.16b3440aesimc v1.16b,v1.16b3441aesd v24.16b,v16.16b3442aesimc v24.16b,v24.16b3443aesd v25.16b,v16.16b3444aesimc v25.16b,v25.16b3445aesd v26.16b,v16.16b3446aesimc v26.16b,v26.16b3447ld1 {v16.4s},[x7],#16 // load key schedule...3448subs w6,w6,#23449aesd v0.16b,v17.16b3450aesimc v0.16b,v0.16b3451aesd v1.16b,v17.16b3452aesimc v1.16b,v1.16b3453aesd v24.16b,v17.16b3454aesimc v24.16b,v24.16b3455aesd v25.16b,v17.16b3456aesimc v25.16b,v25.16b3457aesd v26.16b,v17.16b3458aesimc v26.16b,v26.16b3459ld1 {v17.4s},[x7],#16 // load key schedule...3460b.gt .Loop5x_xts_dec34613462aesd v0.16b,v16.16b3463aesimc v0.16b,v0.16b3464aesd v1.16b,v16.16b3465aesimc v1.16b,v1.16b3466aesd v24.16b,v16.16b3467aesimc v24.16b,v24.16b3468aesd v25.16b,v16.16b3469aesimc v25.16b,v25.16b3470aesd v26.16b,v16.16b3471aesimc v26.16b,v26.16b3472subs x2,x2,#0x50 // because .Lxts_dec_tail4x34733474aesd v0.16b,v17.16b3475aesimc v0.16b,v0.16b3476aesd v1.16b,v17.16b3477aesimc v1.16b,v1.16b3478aesd v24.16b,v17.16b3479aesimc v24.16b,v24.16b3480aesd v25.16b,v17.16b3481aesimc v25.16b,v25.16b3482aesd v26.16b,v17.16b3483aesimc v26.16b,v26.16b3484csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo3485mov x7,x334863487aesd v0.16b,v18.16b3488aesimc v0.16b,v0.16b3489aesd v1.16b,v18.16b3490aesimc v1.16b,v1.16b3491aesd v24.16b,v18.16b3492aesimc v24.16b,v24.16b3493aesd v25.16b,v18.16b3494aesimc v25.16b,v25.16b3495aesd v26.16b,v18.16b3496aesimc v26.16b,v26.16b3497add x0,x0,x6 // x0 is adjusted in such way that3498// at exit from the loop v1.16b-v26.16b3499// are loaded with last "words"3500add x6,x2,#0x60 // because .Lxts_dec_tail4x35013502aesd v0.16b,v19.16b3503aesimc v0.16b,v0.16b3504aesd v1.16b,v19.16b3505aesimc v1.16b,v1.16b3506aesd v24.16b,v19.16b3507aesimc v24.16b,v24.16b3508aesd v25.16b,v19.16b3509aesimc v25.16b,v25.16b3510aesd v26.16b,v19.16b3511aesimc v26.16b,v26.16b35123513aesd v0.16b,v20.16b3514aesimc v0.16b,v0.16b3515aesd v1.16b,v20.16b3516aesimc v1.16b,v1.16b3517aesd v24.16b,v20.16b3518aesimc v24.16b,v24.16b3519aesd v25.16b,v20.16b3520aesimc v25.16b,v25.16b3521aesd v26.16b,v20.16b3522aesimc v26.16b,v26.16b35233524aesd v0.16b,v21.16b3525aesimc v0.16b,v0.16b3526aesd v1.16b,v21.16b3527aesimc v1.16b,v1.16b3528aesd v24.16b,v21.16b3529aesimc v24.16b,v24.16b3530aesd v25.16b,v21.16b3531aesimc v25.16b,v25.16b3532aesd v26.16b,v21.16b3533aesimc v26.16b,v26.16b35343535aesd v0.16b,v22.16b3536aesimc v0.16b,v0.16b3537aesd v1.16b,v22.16b3538aesimc v1.16b,v1.16b3539aesd v24.16b,v22.16b3540aesimc v24.16b,v24.16b3541aesd v25.16b,v22.16b3542aesimc v25.16b,v25.16b3543aesd v26.16b,v22.16b3544aesimc v26.16b,v26.16b35453546eor v4.16b,v7.16b,v6.16b3547aesd v0.16b,v23.16b3548// The iv for first block of next iteration.3549extr x22,x10,x10,#323550extr x10,x10,x9,#633551and w11,w19,w22,asr #313552eor x9,x11,x9,lsl #13553fmov d6,x93554fmov v6.d[1],x103555eor v5.16b,v7.16b,v8.16b3556ld1 {v2.16b},[x0],#163557aesd v1.16b,v23.16b3558// The iv for second block3559extr x22,x10,x10,#323560extr x10,x10,x9,#633561and w11,w19,w22,asr #313562eor x9,x11,x9,lsl #13563fmov d8,x93564fmov v8.d[1],x103565eor v17.16b,v7.16b,v9.16b3566ld1 {v3.16b},[x0],#163567aesd v24.16b,v23.16b3568// The iv for third block3569extr x22,x10,x10,#323570extr x10,x10,x9,#633571and w11,w19,w22,asr #313572eor x9,x11,x9,lsl #13573fmov d9,x93574fmov v9.d[1],x103575eor v30.16b,v7.16b,v10.16b3576ld1 {v27.16b},[x0],#163577aesd v25.16b,v23.16b3578// The iv for fourth block3579extr x22,x10,x10,#323580extr x10,x10,x9,#633581and w11,w19,w22,asr #313582eor x9,x11,x9,lsl #13583fmov d10,x93584fmov v10.d[1],x103585eor v31.16b,v7.16b,v11.16b3586ld1 {v28.16b},[x0],#163587aesd v26.16b,v23.16b35883589// The iv for fifth block3590extr x22,x10,x10,#323591extr x10,x10,x9,#633592and w11,w19,w22,asr #313593eor x9,x11,x9,lsl #13594fmov d11,x93595fmov v11.d[1],x1035963597ld1 {v29.16b},[x0],#163598cbz x6,.Lxts_dec_tail4x3599ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]3600eor v4.16b,v4.16b,v0.16b3601eor v0.16b,v2.16b,v6.16b3602eor v5.16b,v5.16b,v1.16b3603eor v1.16b,v3.16b,v8.16b3604eor v17.16b,v17.16b,v24.16b3605eor v24.16b,v27.16b,v9.16b3606eor v30.16b,v30.16b,v25.16b3607eor v25.16b,v28.16b,v10.16b3608eor v31.16b,v31.16b,v26.16b3609st1 {v4.16b},[x1],#163610eor v26.16b,v29.16b,v11.16b3611st1 {v5.16b},[x1],#163612mov w6,w53613st1 {v17.16b},[x1],#163614ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]3615st1 {v30.16b},[x1],#163616st1 {v31.16b},[x1],#163617b.hs .Loop5x_xts_dec36183619cmn x2,#0x103620b.ne .Loop5x_dec_after3621// If x2(x2) equal to -0x10, the left blocks is 4.3622// After specially processing, utilize the five blocks processing again.3623// It will use the following IVs: v6.16b,v6.16b,v8.16b,v9.16b,v10.16b.3624orr v11.16b,v10.16b,v10.16b3625orr v10.16b,v9.16b,v9.16b3626orr v9.16b,v8.16b,v8.16b3627orr v8.16b,v6.16b,v6.16b3628fmov x9,d113629fmov x10,v11.d[1]3630eor v0.16b,v6.16b,v2.16b3631eor v1.16b,v8.16b,v3.16b3632eor v24.16b,v27.16b,v9.16b3633eor v25.16b,v28.16b,v10.16b3634eor v26.16b,v29.16b,v11.16b3635b.eq .Loop5x_xts_dec36363637.Loop5x_dec_after:3638add x2,x2,#0x503639cbz x2,.Lxts_done36403641add w6,w5,#23642subs x2,x2,#0x303643b.lo .Lxts_inner_dec_tail36443645eor v0.16b,v6.16b,v27.16b3646eor v1.16b,v8.16b,v28.16b3647eor v24.16b,v29.16b,v9.16b3648b .Lxts_outer_dec_tail36493650.align 43651.Lxts_dec_tail4x:3652add x0,x0,#163653tst x21,#0xf3654eor v5.16b,v1.16b,v4.16b3655st1 {v5.16b},[x1],#163656eor v17.16b,v24.16b,v17.16b3657st1 {v17.16b},[x1],#163658eor v30.16b,v25.16b,v30.16b3659eor v31.16b,v26.16b,v31.16b3660st1 {v30.16b,v31.16b},[x1],#3236613662b.eq .Lxts_dec_abort3663ld1 {v0.16b},[x0],#163664b .Lxts_done3665.align 43666.Lxts_outer_dec_tail:3667aesd v0.16b,v16.16b3668aesimc v0.16b,v0.16b3669aesd v1.16b,v16.16b3670aesimc v1.16b,v1.16b3671aesd v24.16b,v16.16b3672aesimc v24.16b,v24.16b3673ld1 {v16.4s},[x7],#163674subs w6,w6,#23675aesd v0.16b,v17.16b3676aesimc v0.16b,v0.16b3677aesd v1.16b,v17.16b3678aesimc v1.16b,v1.16b3679aesd v24.16b,v17.16b3680aesimc v24.16b,v24.16b3681ld1 {v17.4s},[x7],#163682b.gt .Lxts_outer_dec_tail36833684aesd v0.16b,v16.16b3685aesimc v0.16b,v0.16b3686aesd v1.16b,v16.16b3687aesimc v1.16b,v1.16b3688aesd v24.16b,v16.16b3689aesimc v24.16b,v24.16b3690eor v4.16b,v6.16b,v7.16b3691subs x2,x2,#0x303692// The iv for first block3693fmov x9,d93694fmov x10,v9.d[1]3695mov w19,#0x873696extr x22,x10,x10,#323697extr x10,x10,x9,#633698and w11,w19,w22,asr #313699eor x9,x11,x9,lsl #13700fmov d6,x93701fmov v6.d[1],x103702eor v5.16b,v8.16b,v7.16b3703csel x6,x2,x6,lo // x6, w6, is zero at this point3704aesd v0.16b,v17.16b3705aesimc v0.16b,v0.16b3706aesd v1.16b,v17.16b3707aesimc v1.16b,v1.16b3708aesd v24.16b,v17.16b3709aesimc v24.16b,v24.16b3710eor v17.16b,v9.16b,v7.16b3711// The iv for second block3712extr x22,x10,x10,#323713extr x10,x10,x9,#633714and w11,w19,w22,asr #313715eor x9,x11,x9,lsl #13716fmov d8,x93717fmov v8.d[1],x1037183719add x6,x6,#0x203720add x0,x0,x6 // x0 is adjusted to the last data37213722mov x7,x337233724// The iv for third block3725extr x22,x10,x10,#323726extr x10,x10,x9,#633727and w11,w19,w22,asr #313728eor x9,x11,x9,lsl #13729fmov d9,x93730fmov v9.d[1],x1037313732aesd v0.16b,v20.16b3733aesimc v0.16b,v0.16b3734aesd v1.16b,v20.16b3735aesimc v1.16b,v1.16b3736aesd v24.16b,v20.16b3737aesimc v24.16b,v24.16b3738aesd v0.16b,v21.16b3739aesimc v0.16b,v0.16b3740aesd v1.16b,v21.16b3741aesimc v1.16b,v1.16b3742aesd v24.16b,v21.16b3743aesimc v24.16b,v24.16b3744aesd v0.16b,v22.16b3745aesimc v0.16b,v0.16b3746aesd v1.16b,v22.16b3747aesimc v1.16b,v1.16b3748aesd v24.16b,v22.16b3749aesimc v24.16b,v24.16b3750ld1 {v27.16b},[x0],#163751aesd v0.16b,v23.16b3752aesd v1.16b,v23.16b3753aesd v24.16b,v23.16b3754ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]3755add w6,w5,#23756eor v4.16b,v4.16b,v0.16b3757eor v5.16b,v5.16b,v1.16b3758eor v24.16b,v24.16b,v17.16b3759ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]3760st1 {v4.16b},[x1],#163761st1 {v5.16b},[x1],#163762st1 {v24.16b},[x1],#1637633764cmn x2,#0x303765add x2,x2,#0x303766b.eq .Lxts_done3767sub x2,x2,#0x303768orr v28.16b,v3.16b,v3.16b3769orr v29.16b,v27.16b,v27.16b3770nop37713772.Lxts_inner_dec_tail:3773// x2 == -0x10 means two blocks left.3774cmn x2,#0x103775eor v1.16b,v28.16b,v6.16b3776eor v24.16b,v29.16b,v8.16b3777b.eq .Lxts_dec_tail_loop3778eor v24.16b,v29.16b,v6.16b3779.Lxts_dec_tail_loop:3780aesd v1.16b,v16.16b3781aesimc v1.16b,v1.16b3782aesd v24.16b,v16.16b3783aesimc v24.16b,v24.16b3784ld1 {v16.4s},[x7],#163785subs w6,w6,#23786aesd v1.16b,v17.16b3787aesimc v1.16b,v1.16b3788aesd v24.16b,v17.16b3789aesimc v24.16b,v24.16b3790ld1 {v17.4s},[x7],#163791b.gt .Lxts_dec_tail_loop37923793aesd v1.16b,v16.16b3794aesimc v1.16b,v1.16b3795aesd v24.16b,v16.16b3796aesimc v24.16b,v24.16b3797aesd v1.16b,v17.16b3798aesimc v1.16b,v1.16b3799aesd v24.16b,v17.16b3800aesimc v24.16b,v24.16b3801aesd v1.16b,v20.16b3802aesimc v1.16b,v1.16b3803aesd v24.16b,v20.16b3804aesimc v24.16b,v24.16b3805cmn x2,#0x203806aesd v1.16b,v21.16b3807aesimc v1.16b,v1.16b3808aesd v24.16b,v21.16b3809aesimc v24.16b,v24.16b3810eor v5.16b,v6.16b,v7.16b3811aesd v1.16b,v22.16b3812aesimc v1.16b,v1.16b3813aesd v24.16b,v22.16b3814aesimc v24.16b,v24.16b3815eor v17.16b,v8.16b,v7.16b3816aesd v1.16b,v23.16b3817aesd v24.16b,v23.16b3818b.eq .Lxts_dec_one3819eor v5.16b,v5.16b,v1.16b3820eor v17.16b,v17.16b,v24.16b3821orr v6.16b,v9.16b,v9.16b3822orr v8.16b,v10.16b,v10.16b3823st1 {v5.16b},[x1],#163824st1 {v17.16b},[x1],#163825add x2,x2,#163826b .Lxts_done38273828.Lxts_dec_one:3829eor v5.16b,v5.16b,v24.16b3830orr v6.16b,v8.16b,v8.16b3831orr v8.16b,v9.16b,v9.16b3832st1 {v5.16b},[x1],#163833add x2,x2,#3238343835.Lxts_done:3836tst x21,#0xf3837b.eq .Lxts_dec_abort3838// Processing the last two blocks with cipher stealing.3839mov x7,x33840cbnz x2,.Lxts_dec_1st_done3841ld1 {v0.16b},[x0],#1638423843// Decrypt the last second block to get the last plain text block3844.Lxts_dec_1st_done:3845eor v26.16b,v0.16b,v8.16b3846ldr w6,[x3,#240]3847ld1 {v0.4s},[x3],#163848sub w6,w6,#23849ld1 {v1.4s},[x3],#163850.Loop_final_2nd_dec:3851aesd v26.16b,v0.16b3852aesimc v26.16b,v26.16b3853ld1 {v0.4s},[x3],#16 // load key schedule...3854subs w6,w6,#23855aesd v26.16b,v1.16b3856aesimc v26.16b,v26.16b3857ld1 {v1.4s},[x3],#16 // load key schedule...3858b.gt .Loop_final_2nd_dec38593860aesd v26.16b,v0.16b3861aesimc v26.16b,v26.16b3862ld1 {v0.4s},[x3]3863aesd v26.16b,v1.16b3864eor v26.16b,v26.16b,v0.16b3865eor v26.16b,v26.16b,v8.16b3866st1 {v26.16b},[x1]38673868mov x20,x03869add x13,x1,#1638703871// Composite the tailcnt "16 byte not aligned block" into the last second plain blocks3872// to get the last encrypted block.3873.composite_dec_loop:3874subs x21,x21,#13875ldrb w15,[x1,x21]3876ldrb w14,[x20,x21]3877strb w15,[x13,x21]3878strb w14,[x1,x21]3879b.gt .composite_dec_loop3880.Lxts_dec_load_done:3881ld1 {v26.16b},[x1]3882eor v26.16b,v26.16b,v6.16b38833884// Decrypt the composite block to get the last second plain text block3885ldr w6,[x7,#240]3886ld1 {v0.4s},[x7],#163887sub w6,w6,#23888ld1 {v1.4s},[x7],#163889.Loop_final_dec:3890aesd v26.16b,v0.16b3891aesimc v26.16b,v26.16b3892ld1 {v0.4s},[x7],#16 // load key schedule...3893subs w6,w6,#23894aesd v26.16b,v1.16b3895aesimc v26.16b,v26.16b3896ld1 {v1.4s},[x7],#16 // load key schedule...3897b.gt .Loop_final_dec38983899aesd v26.16b,v0.16b3900aesimc v26.16b,v26.16b3901ld1 {v0.4s},[x7]3902aesd v26.16b,v1.16b3903eor v26.16b,v26.16b,v0.16b3904eor v26.16b,v26.16b,v6.16b3905st1 {v26.16b},[x1]39063907.Lxts_dec_abort:3908ldp x21,x22,[sp,#48]3909ldp d8,d9,[sp,#32]3910ldp d10,d11,[sp,#16]3911ldp x19,x20,[sp],#6439123913.Lxts_dec_final_abort:3914ret3915.size aes_v8_xts_decrypt,.-aes_v8_xts_decrypt3916#endif391739183919