Path: blob/main/sys/crypto/openssl/aarch64/aes-gcm-armv8-unroll8_64.S
39507 views
/* Do not modify. This file is auto-generated from aes-gcm-armv8-unroll8_64.pl. */1#include "arm_arch.h"23#if __ARM_MAX_ARCH__>=84.arch armv8-a+crypto5.text6.globl unroll8_eor3_aes_gcm_enc_128_kernel7.type unroll8_eor3_aes_gcm_enc_128_kernel,%function8.align 49unroll8_eor3_aes_gcm_enc_128_kernel:10AARCH64_VALID_CALL_TARGET11cbz x1, .L128_enc_ret12stp d8, d9, [sp, #-80]!13lsr x9, x1, #314mov x16, x415mov x8, x516stp d10, d11, [sp, #16]17stp d12, d13, [sp, #32]18stp d14, d15, [sp, #48]19mov x5, #0xc20000000000000020stp x5, xzr, [sp, #64]21add x10, sp, #642223mov x15, #0x100000000 //set up counter increment24movi v31.16b, #0x025mov v31.d[1], x1526mov x5, x927ld1 { v0.16b}, [x16] //CTR block 02829sub x5, x5, #1 //byte_len - 13031and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)3233rev32 v30.16b, v0.16b //set up reversed counter3435add v30.4s, v30.4s, v31.4s //CTR block 03637rev32 v1.16b, v30.16b //CTR block 138add v30.4s, v30.4s, v31.4s //CTR block 13940rev32 v2.16b, v30.16b //CTR block 241add v30.4s, v30.4s, v31.4s //CTR block 24243rev32 v3.16b, v30.16b //CTR block 344add v30.4s, v30.4s, v31.4s //CTR block 34546rev32 v4.16b, v30.16b //CTR block 447add v30.4s, v30.4s, v31.4s //CTR block 44849rev32 v5.16b, v30.16b //CTR block 550add v30.4s, v30.4s, v31.4s //CTR block 551ldp q26, q27, [x8, #0] //load rk0, rk15253rev32 v6.16b, v30.16b //CTR block 654add v30.4s, v30.4s, v31.4s //CTR block 65556rev32 v7.16b, v30.16b //CTR block 757add v30.4s, v30.4s, v31.4s //CTR block 75859aese v4.16b, v26.16b60aesmc v4.16b, v4.16b //AES block 4 - round 061aese v6.16b, v26.16b62aesmc v6.16b, v6.16b //AES block 6 - round 063aese v3.16b, v26.16b64aesmc v3.16b, v3.16b //AES block 3 - round 06566aese v0.16b, v26.16b67aesmc v0.16b, v0.16b //AES block 0 - round 068aese v1.16b, v26.16b69aesmc v1.16b, v1.16b //AES block 1 - round 070aese v2.16b, v26.16b71aesmc v2.16b, v2.16b //AES block 2 - round 07273aese v7.16b, v26.16b74aesmc v7.16b, v7.16b //AES block 7 - round 075aese v5.16b, v26.16b76aesmc v5.16b, v5.16b //AES block 5 - round 077ldp q28, q26, [x8, #32] //load rk2, rk37879aese v3.16b, v27.16b80aesmc v3.16b, v3.16b //AES block 3 - round 18182aese v7.16b, v27.16b83aesmc v7.16b, v7.16b //AES block 7 - round 184aese v5.16b, v27.16b85aesmc v5.16b, v5.16b //AES block 5 - round 186aese v4.16b, v27.16b87aesmc v4.16b, v4.16b //AES block 4 - round 18889aese v2.16b, v27.16b90aesmc v2.16b, v2.16b //AES block 2 - round 191aese v6.16b, v27.16b92aesmc v6.16b, v6.16b //AES block 6 - round 193aese v0.16b, v27.16b94aesmc v0.16b, v0.16b //AES block 0 - round 19596aese v5.16b, v28.16b97aesmc v5.16b, v5.16b //AES block 5 - round 298aese v1.16b, v27.16b99aesmc v1.16b, v1.16b //AES block 1 - round 1100aese v0.16b, v28.16b101aesmc v0.16b, v0.16b //AES block 0 - round 2102103aese v2.16b, v28.16b104aesmc v2.16b, v2.16b //AES block 2 - round 2105aese v3.16b, v28.16b106aesmc v3.16b, v3.16b //AES block 3 - round 2107aese v7.16b, v28.16b108aesmc v7.16b, v7.16b //AES block 7 - round 2109110aese v1.16b, v28.16b111aesmc v1.16b, v1.16b //AES block 1 - round 2112aese v6.16b, v28.16b113aesmc v6.16b, v6.16b //AES block 6 - round 2114aese v4.16b, v28.16b115aesmc v4.16b, v4.16b //AES block 4 - round 2116117aese v2.16b, v26.16b118aesmc v2.16b, v2.16b //AES block 2 - round 3119120ldp q27, q28, [x8, #64] //load rk4, rk5121aese v5.16b, v26.16b122aesmc v5.16b, v5.16b //AES block 5 - round 3123aese v0.16b, v26.16b124aesmc v0.16b, v0.16b //AES block 0 - round 3125126aese v4.16b, v26.16b127aesmc v4.16b, v4.16b //AES block 4 - round 3128aese v3.16b, v26.16b129aesmc v3.16b, v3.16b //AES block 3 - round 3130aese v6.16b, v26.16b131aesmc v6.16b, v6.16b //AES block 6 - round 3132133aese v7.16b, v26.16b134aesmc v7.16b, v7.16b //AES block 7 - round 3135136aese v6.16b, v27.16b137aesmc v6.16b, v6.16b //AES block 6 - round 4138aese v1.16b, v26.16b139aesmc v1.16b, v1.16b //AES block 1 - round 3140aese v5.16b, v27.16b141aesmc v5.16b, v5.16b //AES block 5 - round 4142143aese v7.16b, v27.16b144aesmc v7.16b, v7.16b //AES block 7 - round 4145aese v4.16b, v27.16b146aesmc v4.16b, v4.16b //AES block 4 - round 4147aese v0.16b, v27.16b148aesmc v0.16b, v0.16b //AES block 0 - round 4149150aese v1.16b, v27.16b151aesmc v1.16b, v1.16b //AES block 1 - round 4152aese v2.16b, v27.16b153aesmc v2.16b, v2.16b //AES block 2 - round 4154aese v3.16b, v27.16b155aesmc v3.16b, v3.16b //AES block 3 - round 4156157aese v7.16b, v28.16b158aesmc v7.16b, v7.16b //AES block 7 - round 5159aese v0.16b, v28.16b160aesmc v0.16b, v0.16b //AES block 0 - round 5161ldp q26, q27, [x8, #96] //load rk6, rk7162163aese v1.16b, v28.16b164aesmc v1.16b, v1.16b //AES block 1 - round 5165aese v3.16b, v28.16b166aesmc v3.16b, v3.16b //AES block 3 - round 5167aese v2.16b, v28.16b168aesmc v2.16b, v2.16b //AES block 2 - round 5169170aese v4.16b, v28.16b171aesmc v4.16b, v4.16b //AES block 4 - round 5172aese v5.16b, v28.16b173aesmc v5.16b, v5.16b //AES block 5 - round 5174aese v6.16b, v28.16b175aesmc v6.16b, v6.16b //AES block 6 - round 5176177aese v4.16b, v26.16b178aesmc v4.16b, v4.16b //AES block 4 - round 6179aese v3.16b, v26.16b180aesmc v3.16b, v3.16b //AES block 3 - round 6181aese v2.16b, v26.16b182aesmc v2.16b, v2.16b //AES block 2 - round 6183184aese v7.16b, v26.16b185aesmc v7.16b, v7.16b //AES block 7 - round 6186aese v6.16b, v26.16b187aesmc v6.16b, v6.16b //AES block 6 - round 6188aese v5.16b, v26.16b189aesmc v5.16b, v5.16b //AES block 5 - round 6190191aese v0.16b, v26.16b192aesmc v0.16b, v0.16b //AES block 0 - round 6193aese v1.16b, v26.16b194aesmc v1.16b, v1.16b //AES block 1 - round 6195ldp q28, q26, [x8, #128] //load rk8, rk9196197aese v5.16b, v27.16b198aesmc v5.16b, v5.16b //AES block 5 - round 7199200ld1 { v19.16b}, [x3]201ext v19.16b, v19.16b, v19.16b, #8202rev64 v19.16b, v19.16b203204aese v7.16b, v27.16b205aesmc v7.16b, v7.16b //AES block 7 - round 7206207aese v4.16b, v27.16b208aesmc v4.16b, v4.16b //AES block 4 - round 7209aese v3.16b, v27.16b210aesmc v3.16b, v3.16b //AES block 3 - round 7211aese v6.16b, v27.16b212aesmc v6.16b, v6.16b //AES block 6 - round 7213214aese v1.16b, v27.16b215aesmc v1.16b, v1.16b //AES block 1 - round 7216aese v2.16b, v27.16b217aesmc v2.16b, v2.16b //AES block 2 - round 7218aese v0.16b, v27.16b219aesmc v0.16b, v0.16b //AES block 0 - round 7220221aese v3.16b, v28.16b222aesmc v3.16b, v3.16b //AES block 8k+11 - round 8223aese v6.16b, v28.16b224aesmc v6.16b, v6.16b //AES block 8k+14 - round 8225aese v2.16b, v28.16b226aesmc v2.16b, v2.16b //AES block 8k+10 - round 8227228aese v7.16b, v28.16b229aesmc v7.16b, v7.16b //AES block 8k+15 - round 8230aese v0.16b, v28.16b231aesmc v0.16b, v0.16b //AES block 8k+8 - round 8232ldr q27, [x8, #160] //load rk10233234aese v3.16b, v26.16b //AES block 8k+11 - round 9235aese v4.16b, v28.16b236aesmc v4.16b, v4.16b //AES block 8k+12 - round 8237aese v2.16b, v26.16b //AES block 8k+10 - round 9238239aese v5.16b, v28.16b240aesmc v5.16b, v5.16b //AES block 8k+13 - round 8241aese v1.16b, v28.16b242aesmc v1.16b, v1.16b //AES block 8k+9 - round 8243aese v6.16b, v26.16b //AES block 8k+14 - round 9244245aese v4.16b, v26.16b //AES block 8k+12 - round 9246add x5, x5, x0247aese v0.16b, v26.16b //AES block 8k+8 - round 9248249aese v7.16b, v26.16b //AES block 8k+15 - round 9250aese v5.16b, v26.16b //AES block 8k+13 - round 9251aese v1.16b, v26.16b //AES block 8k+9 - round 9252253add x4, x0, x1, lsr #3 //end_input_ptr254cmp x0, x5 //check if we have <= 8 blocks255b.ge .L128_enc_tail //handle tail256257ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext258259ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext260261ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext262263ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext264cmp x0, x5 //check if we have <= 8 blocks265266.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result267rev32 v0.16b, v30.16b //CTR block 8268add v30.4s, v30.4s, v31.4s //CTR block 8269270.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result271stp q8, q9, [x2], #32 //AES block 0, 1 - store result272273rev32 v1.16b, v30.16b //CTR block 9274.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result275add v30.4s, v30.4s, v31.4s //CTR block 9276277.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result278.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result279.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result280281rev32 v2.16b, v30.16b //CTR block 10282add v30.4s, v30.4s, v31.4s //CTR block 10283284.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result285.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b,v27.16b //AES block 7 - result286stp q10, q11, [x2], #32 //AES block 2, 3 - store result287288rev32 v3.16b, v30.16b //CTR block 11289add v30.4s, v30.4s, v31.4s //CTR block 11290stp q12, q13, [x2], #32 //AES block 4, 5 - store result291292stp q14, q15, [x2], #32 //AES block 6, 7 - store result293294rev32 v4.16b, v30.16b //CTR block 12295add v30.4s, v30.4s, v31.4s //CTR block 12296b.ge .L128_enc_prepretail //do prepretail297298.L128_enc_main_loop: //main loop start299rev32 v5.16b, v30.16b //CTR block 8k+13300ldr q20, [x3, #128] //load h5l | h5h301ext v20.16b, v20.16b, v20.16b, #8302ldr q22, [x3, #160] //load h6l | h6h303ext v22.16b, v22.16b, v22.16b, #8304add v30.4s, v30.4s, v31.4s //CTR block 8k+13305306rev64 v9.16b, v9.16b //GHASH block 8k+1307rev64 v8.16b, v8.16b //GHASH block 8k308ldr q23, [x3, #176] //load h7l | h7h309ext v23.16b, v23.16b, v23.16b, #8310ldr q25, [x3, #208] //load h8l | h8h311ext v25.16b, v25.16b, v25.16b, #8312313rev32 v6.16b, v30.16b //CTR block 8k+14314add v30.4s, v30.4s, v31.4s //CTR block 8k+14315ext v19.16b, v19.16b, v19.16b, #8 //PRE 0316317ldr q21, [x3, #144] //load h6k | h5k318ldr q24, [x3, #192] //load h8k | h7k319rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free)320rev64 v11.16b, v11.16b //GHASH block 8k+3321322ldp q26, q27, [x8, #0] //load rk0, rk1323eor v8.16b, v8.16b, v19.16b //PRE 1324rev32 v7.16b, v30.16b //CTR block 8k+15325326rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free)327328pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high329rev64 v10.16b, v10.16b //GHASH block 8k+2330pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high331332pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low333trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid334pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low335336trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid337pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high338pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high339340eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low341ldr q23, [x3, #80] //load h3l | h3h342ext v23.16b, v23.16b, v23.16b, #8343ldr q25, [x3, #112] //load h3l | h3h344ext v25.16b, v25.16b, v25.16b, #8345aese v5.16b, v26.16b346aesmc v5.16b, v5.16b //AES block 8k+13 - round 0347348aese v1.16b, v26.16b349aesmc v1.16b, v1.16b //AES block 8k+9 - round 0350aese v4.16b, v26.16b351aesmc v4.16b, v4.16b //AES block 8k+12 - round 0352eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high353354add v30.4s, v30.4s, v31.4s //CTR block 8k+15355aese v2.16b, v26.16b356aesmc v2.16b, v2.16b //AES block 8k+10 - round 0357eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid358359aese v6.16b, v26.16b360aesmc v6.16b, v6.16b //AES block 8k+14 - round 0361aese v1.16b, v27.16b362aesmc v1.16b, v1.16b //AES block 8k+9 - round 1363aese v0.16b, v26.16b364aesmc v0.16b, v0.16b //AES block 8k+8 - round 0365366aese v2.16b, v27.16b367aesmc v2.16b, v2.16b //AES block 8k+10 - round 1368aese v3.16b, v26.16b369aesmc v3.16b, v3.16b //AES block 8k+11 - round 0370pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low371372aese v5.16b, v27.16b373aesmc v5.16b, v5.16b //AES block 8k+13 - round 1374aese v7.16b, v26.16b375aesmc v7.16b, v7.16b //AES block 8k+15 - round 0376aese v0.16b, v27.16b377aesmc v0.16b, v0.16b //AES block 8k+8 - round 1378379.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b,v9.16b //GHASH block 8k+2, 8k+3 - high380trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid381trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid382383ldp q28, q26, [x8, #32] //load rk2, rk3384aese v4.16b, v27.16b385aesmc v4.16b, v4.16b //AES block 8k+12 - round 1386aese v3.16b, v27.16b387aesmc v3.16b, v3.16b //AES block 8k+11 - round 1388389pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low390aese v7.16b, v27.16b391aesmc v7.16b, v7.16b //AES block 8k+15 - round 1392aese v6.16b, v27.16b393aesmc v6.16b, v6.16b //AES block 8k+14 - round 1394395pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid396eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid397pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid398399rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free)400.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low401402pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid403eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid404pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid405406aese v5.16b, v28.16b407aesmc v5.16b, v5.16b //AES block 8k+13 - round 2408aese v4.16b, v28.16b409aesmc v4.16b, v4.16b //AES block 8k+12 - round 2410aese v2.16b, v28.16b411aesmc v2.16b, v2.16b //AES block 8k+10 - round 2412413aese v1.16b, v28.16b414aesmc v1.16b, v1.16b //AES block 8k+9 - round 2415.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid416aese v6.16b, v28.16b417aesmc v6.16b, v6.16b //AES block 8k+14 - round 2418419aese v0.16b, v28.16b420aesmc v0.16b, v0.16b //AES block 8k+8 - round 2421aese v3.16b, v28.16b422aesmc v3.16b, v3.16b //AES block 8k+11 - round 2423aese v7.16b, v28.16b424aesmc v7.16b, v7.16b //AES block 8k+15 - round 2425426aese v6.16b, v26.16b427aesmc v6.16b, v6.16b //AES block 8k+14 - round 3428ldr q21, [x3, #48] //load h2k | h1k429ldr q24, [x3, #96] //load h4k | h3k430rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free)431432ldp q27, q28, [x8, #64] //load rk4, rk5433aese v2.16b, v26.16b434aesmc v2.16b, v2.16b //AES block 8k+10 - round 3435aese v1.16b, v26.16b436aesmc v1.16b, v1.16b //AES block 8k+9 - round 3437438ldr q20, [x3, #32] //load h1l | h1h439ext v20.16b, v20.16b, v20.16b, #8440ldr q22, [x3, #64] //load h1l | h1h441ext v22.16b, v22.16b, v22.16b, #8442pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high443pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low444445trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid446trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid447448aese v0.16b, v26.16b449aesmc v0.16b, v0.16b //AES block 8k+8 - round 3450aese v3.16b, v26.16b451aesmc v3.16b, v3.16b //AES block 8k+11 - round 3452453aese v7.16b, v26.16b454aesmc v7.16b, v7.16b //AES block 8k+15 - round 3455aese v4.16b, v26.16b456aesmc v4.16b, v4.16b //AES block 8k+12 - round 3457458aese v5.16b, v26.16b459aesmc v5.16b, v5.16b //AES block 8k+13 - round 3460aese v0.16b, v27.16b461aesmc v0.16b, v0.16b //AES block 8k+8 - round 4462463aese v7.16b, v27.16b464aesmc v7.16b, v7.16b //AES block 8k+15 - round 4465aese v3.16b, v27.16b466aesmc v3.16b, v3.16b //AES block 8k+11 - round 4467aese v4.16b, v27.16b468aesmc v4.16b, v4.16b //AES block 8k+12 - round 4469470aese v5.16b, v27.16b471aesmc v5.16b, v5.16b //AES block 8k+13 - round 4472aese v6.16b, v27.16b473aesmc v6.16b, v6.16b //AES block 8k+14 - round 4474aese v1.16b, v27.16b475aesmc v1.16b, v1.16b //AES block 8k+9 - round 4476477pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high478eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid479pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low480481aese v2.16b, v27.16b482aesmc v2.16b, v2.16b //AES block 8k+10 - round 4483ldp q26, q27, [x8, #96] //load rk6, rk7484trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid485486pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid487pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid488pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high489490pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high491aese v2.16b, v28.16b492aesmc v2.16b, v2.16b //AES block 8k+10 - round 5493aese v5.16b, v28.16b494aesmc v5.16b, v5.16b //AES block 8k+13 - round 5495496pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low497.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high498trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid499500.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low501aese v6.16b, v28.16b502aesmc v6.16b, v6.16b //AES block 8k+14 - round 5503504eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid505aese v7.16b, v28.16b506aesmc v7.16b, v7.16b //AES block 8k+15 - round 5507aese v1.16b, v28.16b508aesmc v1.16b, v1.16b //AES block 8k+9 - round 5509510aese v3.16b, v28.16b511aesmc v3.16b, v3.16b //AES block 8k+11 - round 5512aese v4.16b, v28.16b513aesmc v4.16b, v4.16b //AES block 8k+12 - round 5514aese v0.16b, v28.16b515aesmc v0.16b, v0.16b //AES block 8k+8 - round 5516517.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid518ldr d16, [x10] //MODULO - load modulo constant519pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low520521aese v7.16b, v26.16b522aesmc v7.16b, v7.16b //AES block 8k+15 - round 6523aese v5.16b, v26.16b524aesmc v5.16b, v5.16b //AES block 8k+13 - round 6525526pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid527aese v1.16b, v26.16b528aesmc v1.16b, v1.16b //AES block 8k+9 - round 6529aese v2.16b, v26.16b530aesmc v2.16b, v2.16b //AES block 8k+10 - round 6531532pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid533.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low534ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext535536aese v3.16b, v26.16b537aesmc v3.16b, v3.16b //AES block 8k+11 - round 6538rev32 v20.16b, v30.16b //CTR block 8k+16539add v30.4s, v30.4s, v31.4s //CTR block 8k+16540541aese v4.16b, v26.16b542aesmc v4.16b, v4.16b //AES block 8k+12 - round 6543aese v0.16b, v26.16b544aesmc v0.16b, v0.16b //AES block 8k+8 - round 6545aese v6.16b, v26.16b546aesmc v6.16b, v6.16b //AES block 8k+14 - round 6547548.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid549ldp q28, q26, [x8, #128] //load rk8, rk9550.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high551552aese v2.16b, v27.16b553aesmc v2.16b, v2.16b //AES block 8k+10 - round 7554aese v7.16b, v27.16b555aesmc v7.16b, v7.16b //AES block 8k+15 - round 7556ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext557558aese v5.16b, v27.16b559aesmc v5.16b, v5.16b //AES block 8k+13 - round 7560aese v6.16b, v27.16b561aesmc v6.16b, v6.16b //AES block 8k+14 - round 7562aese v1.16b, v27.16b563aesmc v1.16b, v1.16b //AES block 8k+9 - round 7564565pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid566aese v0.16b, v27.16b567aesmc v0.16b, v0.16b //AES block 8k+8 - round 7568aese v4.16b, v27.16b569aesmc v4.16b, v4.16b //AES block 8k+12 - round 7570571rev32 v22.16b, v30.16b //CTR block 8k+17572aese v3.16b, v27.16b573aesmc v3.16b, v3.16b //AES block 8k+11 - round 7574575aese v5.16b, v28.16b576aesmc v5.16b, v5.16b //AES block 8k+13 - round 8577ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext578add v30.4s, v30.4s, v31.4s //CTR block 8k+17579580aese v2.16b, v28.16b581aesmc v2.16b, v2.16b //AES block 8k+10 - round 8582aese v1.16b, v28.16b583aesmc v1.16b, v1.16b //AES block 8k+9 - round 8584aese v7.16b, v28.16b585aesmc v7.16b, v7.16b //AES block 8k+15 - round 8586587aese v4.16b, v28.16b588aesmc v4.16b, v4.16b //AES block 8k+12 - round 8589.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up590ldr q27, [x8, #160] //load rk10591592ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment593rev32 v23.16b, v30.16b //CTR block 8k+18594add v30.4s, v30.4s, v31.4s //CTR block 8k+18595aese v3.16b, v28.16b596aesmc v3.16b, v3.16b //AES block 8k+11 - round 8597598aese v0.16b, v28.16b599aesmc v0.16b, v0.16b //AES block 8k+8 - round 8600.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid601aese v6.16b, v28.16b602aesmc v6.16b, v6.16b //AES block 8k+14 - round 8603604aese v2.16b, v26.16b //AES block 8k+10 - round 9605aese v4.16b, v26.16b //AES block 8k+12 - round 9606aese v1.16b, v26.16b //AES block 8k+9 - round 9607608ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext609rev32 v25.16b, v30.16b //CTR block 8k+19610add v30.4s, v30.4s, v31.4s //CTR block 8k+19611612cmp x0, x5 //.LOOP CONTROL613.inst 0xce046d8c //eor3 v12.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result614aese v7.16b, v26.16b //AES block 8k+15 - round 9615616aese v6.16b, v26.16b //AES block 8k+14 - round 9617aese v3.16b, v26.16b //AES block 8k+11 - round 9618619.inst 0xce026d4a //eor3 v10.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result620621mov v2.16b, v23.16b //CTR block 8k+18622aese v0.16b, v26.16b //AES block 8k+8 - round 9623624rev32 v4.16b, v30.16b //CTR block 8k+20625add v30.4s, v30.4s, v31.4s //CTR block 8k+20626627.inst 0xce076def //eor3 v15.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result628aese v5.16b, v26.16b //AES block 8k+13 - round 9629pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low630631.inst 0xce016d29 //eor3 v9.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result632.inst 0xce036d6b //eor3 v11.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result633mov v3.16b, v25.16b //CTR block 8k+19634635ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment636.inst 0xce056dad //eor3 v13.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result637mov v1.16b, v22.16b //CTR block 8k+17638639.inst 0xce006d08 //eor3 v8.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result640mov v0.16b, v20.16b //CTR block 8k+16641stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result642643stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result644.inst 0xce066dce //eor3 v14.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result645646stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result647.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low648649stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result650b.lt .L128_enc_main_loop651652.L128_enc_prepretail: //PREPRETAIL653rev32 v5.16b, v30.16b //CTR block 8k+13654ldr q23, [x3, #176] //load h7l | h7h655ext v23.16b, v23.16b, v23.16b, #8656ldr q25, [x3, #208] //load h8l | h8h657ext v25.16b, v25.16b, v25.16b, #8658ext v19.16b, v19.16b, v19.16b, #8 //PRE 0659660ldr q20, [x3, #128] //load h5l | h5h661ext v20.16b, v20.16b, v20.16b, #8662ldr q22, [x3, #160] //load h6l | h6h663ext v22.16b, v22.16b, v22.16b, #8664rev64 v8.16b, v8.16b //GHASH block 8k665rev64 v9.16b, v9.16b //GHASH block 8k+1666667ldr q21, [x3, #144] //load h6k | h5k668ldr q24, [x3, #192] //load h6k | h5k669add v30.4s, v30.4s, v31.4s //CTR block 8k+13670rev64 v11.16b, v11.16b //GHASH block 8k+3671672rev64 v10.16b, v10.16b //GHASH block 8k+2673eor v8.16b, v8.16b, v19.16b //PRE 1674675rev32 v6.16b, v30.16b //CTR block 8k+14676677pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high678pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low679pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high680681rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free)682trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid683684pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low685eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high686trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid687688eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low689eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid690691ldp q26, q27, [x8, #0] //load rk0, rk1692add v30.4s, v30.4s, v31.4s //CTR block 8k+14693694pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid695pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid696697rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free)698rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free)699700eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid701702rev32 v7.16b, v30.16b //CTR block 8k+15703704rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free)705706aese v2.16b, v26.16b707aesmc v2.16b, v2.16b //AES block 8k+10 - round 0708709pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high710pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high711712aese v6.16b, v26.16b713aesmc v6.16b, v6.16b //AES block 8k+14 - round 0714aese v3.16b, v26.16b715aesmc v3.16b, v3.16b //AES block 8k+11 - round 0716717pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low718aese v1.16b, v26.16b719aesmc v1.16b, v1.16b //AES block 8k+9 - round 0720721.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high722trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid723trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid724725aese v5.16b, v26.16b726aesmc v5.16b, v5.16b //AES block 8k+13 - round 0727aese v7.16b, v26.16b728aesmc v7.16b, v7.16b //AES block 8k+15 - round 0729730eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid731aese v4.16b, v26.16b732aesmc v4.16b, v4.16b //AES block 8k+12 - round 0733aese v0.16b, v26.16b734aesmc v0.16b, v0.16b //AES block 8k+8 - round 0735736aese v3.16b, v27.16b737aesmc v3.16b, v3.16b //AES block 8k+11 - round 1738pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low739740ldr q23, [x3, #80] //load h3l | h3h741ext v23.16b, v23.16b, v23.16b, #8742ldr q25, [x3, #112] //load h4l | h4h743ext v25.16b, v25.16b, v25.16b, #8744745ldp q28, q26, [x8, #32] //load rk2, rk3746aese v5.16b, v27.16b747aesmc v5.16b, v5.16b //AES block 8k+13 - round 1748pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid749750.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low751pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid752753aese v1.16b, v27.16b754aesmc v1.16b, v1.16b //AES block 8k+9 - round 1755aese v0.16b, v27.16b756aesmc v0.16b, v0.16b //AES block 8k+8 - round 1757758.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid759ldr q21, [x3, #48] //load h2k | h1k760ldr q24, [x3, #96] //load h4k | h3k761aese v2.16b, v27.16b762aesmc v2.16b, v2.16b //AES block 8k+10 - round 1763764aese v4.16b, v27.16b765aesmc v4.16b, v4.16b //AES block 8k+12 - round 1766aese v7.16b, v27.16b767aesmc v7.16b, v7.16b //AES block 8k+15 - round 1768769aese v5.16b, v28.16b770aesmc v5.16b, v5.16b //AES block 8k+13 - round 2771aese v2.16b, v28.16b772aesmc v2.16b, v2.16b //AES block 8k+10 - round 2773aese v3.16b, v28.16b774aesmc v3.16b, v3.16b //AES block 8k+11 - round 2775776aese v1.16b, v28.16b777aesmc v1.16b, v1.16b //AES block 8k+9 - round 2778aese v6.16b, v27.16b779aesmc v6.16b, v6.16b //AES block 8k+14 - round 1780aese v4.16b, v28.16b781aesmc v4.16b, v4.16b //AES block 8k+12 - round 2782783aese v5.16b, v26.16b784aesmc v5.16b, v5.16b //AES block 8k+13 - round 3785aese v0.16b, v28.16b786aesmc v0.16b, v0.16b //AES block 8k+8 - round 2787788aese v6.16b, v28.16b789aesmc v6.16b, v6.16b //AES block 8k+14 - round 2790aese v7.16b, v28.16b791aesmc v7.16b, v7.16b //AES block 8k+15 - round 2792ldp q27, q28, [x8, #64] //load rk4, rk5793794ldr q20, [x3, #32] //load h1l | h1h795ext v20.16b, v20.16b, v20.16b, #8796ldr q22, [x3, #64] //load h1l | h1h797ext v22.16b, v22.16b, v22.16b, #8798trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid799aese v0.16b, v26.16b800aesmc v0.16b, v0.16b //AES block 8k+8 - round 3801802pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high803aese v6.16b, v26.16b804aesmc v6.16b, v6.16b //AES block 8k+14 - round 3805aese v3.16b, v26.16b806aesmc v3.16b, v3.16b //AES block 8k+11 - round 3807808pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low809trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid810pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high811812aese v2.16b, v26.16b813aesmc v2.16b, v2.16b //AES block 8k+10 - round 3814add v30.4s, v30.4s, v31.4s //CTR block 8k+15815816aese v7.16b, v26.16b817aesmc v7.16b, v7.16b //AES block 8k+15 - round 3818aese v1.16b, v26.16b819aesmc v1.16b, v1.16b //AES block 8k+9 - round 3820eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid821822pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low823aese v4.16b, v26.16b824aesmc v4.16b, v4.16b //AES block 8k+12 - round 3825pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high826827trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid828pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low829trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid830831aese v1.16b, v27.16b832aesmc v1.16b, v1.16b //AES block 8k+9 - round 4833aese v3.16b, v27.16b834aesmc v3.16b, v3.16b //AES block 8k+11 - round 4835.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high836837.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low838eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid839pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid840841aese v1.16b, v28.16b842aesmc v1.16b, v1.16b //AES block 8k+9 - round 5843aese v6.16b, v27.16b844aesmc v6.16b, v6.16b //AES block 8k+14 - round 4845aese v0.16b, v27.16b846aesmc v0.16b, v0.16b //AES block 8k+8 - round 4847848aese v7.16b, v27.16b849aesmc v7.16b, v7.16b //AES block 8k+15 - round 4850aese v2.16b, v27.16b851aesmc v2.16b, v2.16b //AES block 8k+10 - round 4852853pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid854aese v4.16b, v27.16b855aesmc v4.16b, v4.16b //AES block 8k+12 - round 4856aese v5.16b, v27.16b857aesmc v5.16b, v5.16b //AES block 8k+13 - round 4858859pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high860ldp q26, q27, [x8, #96] //load rk6, rk7861pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low862863.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid864pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid865pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid866867aese v0.16b, v28.16b868aesmc v0.16b, v0.16b //AES block 8k+8 - round 5869aese v7.16b, v28.16b870aesmc v7.16b, v7.16b //AES block 8k+15 - round 5871ldr d16, [x10] //MODULO - load modulo constant872873aese v2.16b, v28.16b874aesmc v2.16b, v2.16b //AES block 8k+10 - round 5875aese v4.16b, v28.16b876aesmc v4.16b, v4.16b //AES block 8k+12 - round 5877878.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high879aese v5.16b, v28.16b880aesmc v5.16b, v5.16b //AES block 8k+13 - round 5881aese v6.16b, v28.16b882aesmc v6.16b, v6.16b //AES block 8k+14 - round 5883884aese v3.16b, v28.16b885aesmc v3.16b, v3.16b //AES block 8k+11 - round 5886aese v4.16b, v26.16b887aesmc v4.16b, v4.16b //AES block 8k+12 - round 6888889aese v5.16b, v26.16b890aesmc v5.16b, v5.16b //AES block 8k+13 - round 6891aese v2.16b, v26.16b892aesmc v2.16b, v2.16b //AES block 8k+10 - round 6893aese v0.16b, v26.16b894aesmc v0.16b, v0.16b //AES block 8k+8 - round 6895896aese v3.16b, v26.16b897aesmc v3.16b, v3.16b //AES block 8k+11 - round 6898.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low899.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid900901aese v6.16b, v26.16b902aesmc v6.16b, v6.16b //AES block 8k+14 - round 6903aese v1.16b, v26.16b904aesmc v1.16b, v1.16b //AES block 8k+9 - round 6905aese v7.16b, v26.16b906aesmc v7.16b, v7.16b //AES block 8k+15 - round 6907908pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid909.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up910ldp q28, q26, [x8, #128] //load rk8, rk9911912aese v3.16b, v27.16b913aesmc v3.16b, v3.16b //AES block 8k+11 - round 7914aese v6.16b, v27.16b915aesmc v6.16b, v6.16b //AES block 8k+14 - round 7916aese v1.16b, v27.16b917aesmc v1.16b, v1.16b //AES block 8k+9 - round 7918ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment919920aese v5.16b, v27.16b921aesmc v5.16b, v5.16b //AES block 8k+13 - round 7922aese v0.16b, v27.16b923aesmc v0.16b, v0.16b //AES block 8k+8 - round 7924.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid925926aese v2.16b, v27.16b927aesmc v2.16b, v2.16b //AES block 8k+10 - round 7928aese v7.16b, v27.16b929aesmc v7.16b, v7.16b //AES block 8k+15 - round 7930931pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low932aese v4.16b, v27.16b933aesmc v4.16b, v4.16b //AES block 8k+12 - round 7934935aese v7.16b, v28.16b936aesmc v7.16b, v7.16b //AES block 8k+15 - round 8937aese v2.16b, v28.16b938aesmc v2.16b, v2.16b //AES block 8k+10 - round 8939aese v1.16b, v28.16b940aesmc v1.16b, v1.16b //AES block 8k+9 - round 8941ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment942943aese v6.16b, v28.16b944aesmc v6.16b, v6.16b //AES block 8k+14 - round 8945.inst 0xce114a73 //eor3 v19.16b, v19.16b, v17.16b, v18.16b //MODULO - fold into low946aese v4.16b, v28.16b947aesmc v4.16b, v4.16b //AES block 8k+12 - round 8948949aese v3.16b, v28.16b950aesmc v3.16b, v3.16b //AES block 8k+11 - round 8951aese v0.16b, v28.16b952aesmc v0.16b, v0.16b //AES block 8k+8 - round 8953aese v5.16b, v28.16b954aesmc v5.16b, v5.16b //AES block 8k+13 - round 8955956ldr q27, [x8, #160] //load rk10957aese v6.16b, v26.16b //AES block 8k+14 - round 9958aese v2.16b, v26.16b //AES block 8k+10 - round 9959960aese v0.16b, v26.16b //AES block 8k+8 - round 9961aese v1.16b, v26.16b //AES block 8k+9 - round 9962963aese v3.16b, v26.16b //AES block 8k+11 - round 9964aese v5.16b, v26.16b //AES block 8k+13 - round 9965966aese v4.16b, v26.16b //AES block 8k+12 - round 9967aese v7.16b, v26.16b //AES block 8k+15 - round 9968.L128_enc_tail: //TAIL969970sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process971ldr q8, [x0], #16 //AES block 8k+8 - load plaintext972973mov v29.16b, v27.16b974ldp q20, q21, [x3, #128] //load h5l | h5h975ext v20.16b, v20.16b, v20.16b, #8976977.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result978ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag979ldp q22, q23, [x3, #160] //load h6l | h6h980ext v22.16b, v22.16b, v22.16b, #8981ext v23.16b, v23.16b, v23.16b, #8982983ldp q24, q25, [x3, #192] //load h8k | h7k984ext v25.16b, v25.16b, v25.16b, #8985cmp x5, #112986b.gt .L128_enc_blocks_more_than_7987988mov v7.16b, v6.16b989mov v6.16b, v5.16b990movi v17.8b, #0991992cmp x5, #96993sub v30.4s, v30.4s, v31.4s994mov v5.16b, v4.16b995996mov v4.16b, v3.16b997mov v3.16b, v2.16b998mov v2.16b, v1.16b9991000movi v19.8b, #01001movi v18.8b, #01002b.gt .L128_enc_blocks_more_than_610031004mov v7.16b, v6.16b1005cmp x5, #8010061007sub v30.4s, v30.4s, v31.4s1008mov v6.16b, v5.16b1009mov v5.16b, v4.16b10101011mov v4.16b, v3.16b1012mov v3.16b, v1.16b1013b.gt .L128_enc_blocks_more_than_510141015cmp x5, #641016sub v30.4s, v30.4s, v31.4s10171018mov v7.16b, v6.16b1019mov v6.16b, v5.16b10201021mov v5.16b, v4.16b1022mov v4.16b, v1.16b1023b.gt .L128_enc_blocks_more_than_410241025mov v7.16b, v6.16b1026sub v30.4s, v30.4s, v31.4s1027mov v6.16b, v5.16b10281029mov v5.16b, v1.16b1030cmp x5, #481031b.gt .L128_enc_blocks_more_than_310321033sub v30.4s, v30.4s, v31.4s1034mov v7.16b, v6.16b1035mov v6.16b, v1.16b10361037cmp x5, #321038ldr q24, [x3, #96] //load h4k | h3k1039b.gt .L128_enc_blocks_more_than_210401041cmp x5, #1610421043sub v30.4s, v30.4s, v31.4s1044mov v7.16b, v1.16b1045b.gt .L128_enc_blocks_more_than_110461047ldr q21, [x3, #48] //load h2k | h1k1048sub v30.4s, v30.4s, v31.4s1049b .L128_enc_blocks_less_than_11050.L128_enc_blocks_more_than_7: //blocks left > 71051st1 { v9.16b}, [x2], #16 //AES final-7 block - store result10521053rev64 v8.16b, v9.16b //GHASH final-7 block1054ldr q9, [x0], #16 //AES final-6 block - load plaintext10551056eor v8.16b, v8.16b, v16.16b //feed in partial tag10571058ins v27.d[0], v8.d[1] //GHASH final-7 block - mid10591060pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high10611062ins v18.d[0], v24.d[1] //GHASH final-7 block - mid10631064eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid1065movi v16.8b, #0 //suppress further partial tag feed in10661067.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result10681069pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid1070pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low1071.L128_enc_blocks_more_than_6: //blocks left > 610721073st1 { v9.16b}, [x2], #16 //AES final-6 block - store result10741075rev64 v8.16b, v9.16b //GHASH final-6 block1076ldr q9, [x0], #16 //AES final-5 block - load plaintext10771078eor v8.16b, v8.16b, v16.16b //feed in partial tag10791080ins v27.d[0], v8.d[1] //GHASH final-6 block - mid10811082.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result1083pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low10841085eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid1086movi v16.8b, #0 //suppress further partial tag feed in10871088pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid1089pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high10901091eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low10921093eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid1094eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high1095.L128_enc_blocks_more_than_5: //blocks left > 510961097st1 { v9.16b}, [x2], #16 //AES final-5 block - store result10981099rev64 v8.16b, v9.16b //GHASH final-5 block11001101eor v8.16b, v8.16b, v16.16b //feed in partial tag11021103ins v27.d[0], v8.d[1] //GHASH final-5 block - mid1104ldr q9, [x0], #16 //AES final-4 block - load plaintext1105pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high11061107eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high11081109eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid11101111ins v27.d[1], v27.d[0] //GHASH final-5 block - mid11121113.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result1114pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low1115movi v16.8b, #0 //suppress further partial tag feed in11161117pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid1118eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low11191120eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid1121.L128_enc_blocks_more_than_4: //blocks left > 411221123st1 { v9.16b}, [x2], #16 //AES final-4 block - store result11241125rev64 v8.16b, v9.16b //GHASH final-4 block11261127ldr q9, [x0], #16 //AES final-3 block - load plaintext11281129eor v8.16b, v8.16b, v16.16b //feed in partial tag11301131ins v27.d[0], v8.d[1] //GHASH final-4 block - mid1132movi v16.8b, #0 //suppress further partial tag feed in1133pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high11341135eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid11361137pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low11381139eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high1140pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid11411142eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low11431144.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result1145eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid1146.L128_enc_blocks_more_than_3: //blocks left > 311471148st1 { v9.16b}, [x2], #16 //AES final-3 block - store result11491150ldr q25, [x3, #112] //load h4l | h4h1151ext v25.16b, v25.16b, v25.16b, #811521153rev64 v8.16b, v9.16b //GHASH final-3 block11541155eor v8.16b, v8.16b, v16.16b //feed in partial tag1156movi v16.8b, #0 //suppress further partial tag feed in11571158ins v27.d[0], v8.d[1] //GHASH final-3 block - mid1159ldr q24, [x3, #96] //load h4k | h3k1160pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low11611162ldr q9, [x0], #16 //AES final-2 block - load plaintext11631164eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid11651166ins v27.d[1], v27.d[0] //GHASH final-3 block - mid1167eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low11681169.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result11701171pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid1172pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high11731174eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid1175eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high1176.L128_enc_blocks_more_than_2: //blocks left > 211771178st1 { v9.16b}, [x2], #16 //AES final-2 block - store result11791180rev64 v8.16b, v9.16b //GHASH final-2 block11811182eor v8.16b, v8.16b, v16.16b //feed in partial tag11831184ldr q9, [x0], #16 //AES final-1 block - load plaintext11851186ins v27.d[0], v8.d[1] //GHASH final-2 block - mid1187ldr q23, [x3, #80] //load h3l | h3h1188ext v23.16b, v23.16b, v23.16b, #81189movi v16.8b, #0 //suppress further partial tag feed in11901191eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid1192.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result11931194pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high11951196pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low1197pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid11981199eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high12001201eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid1202eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low1203.L128_enc_blocks_more_than_1: //blocks left > 112041205st1 { v9.16b}, [x2], #16 //AES final-1 block - store result12061207ldr q22, [x3, #64] //load h2l | h2h1208ext v22.16b, v22.16b, v22.16b, #81209rev64 v8.16b, v9.16b //GHASH final-1 block1210ldr q9, [x0], #16 //AES final block - load plaintext12111212eor v8.16b, v8.16b, v16.16b //feed in partial tag12131214movi v16.8b, #0 //suppress further partial tag feed in1215ins v27.d[0], v8.d[1] //GHASH final-1 block - mid1216.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result12171218pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high12191220eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid12211222ldr q21, [x3, #48] //load h2k | h1k12231224ins v27.d[1], v27.d[0] //GHASH final-1 block - mid12251226pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low1227pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid12281229eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high12301231eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid1232eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low1233.L128_enc_blocks_less_than_1: //blocks left <= 112341235rev32 v30.16b, v30.16b1236str q30, [x16] //store the updated counter1237and x1, x1, #127 //bit_length %= 12812381239sub x1, x1, #128 //bit_length -= 12812401241neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])12421243mvn x6, xzr //temp0_x = 0xffffffffffffffff1244ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored1245and x1, x1, #127 //bit_length %= 12812461247lsr x6, x6, x1 //temp0_x is mask for top 64b of last block1248mvn x7, xzr //temp1_x = 0xffffffffffffffff1249cmp x1, #6412501251csel x13, x7, x6, lt1252csel x14, x6, xzr, lt12531254mov v0.d[1], x141255mov v0.d[0], x13 //ctr0b is mask for last block12561257and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits12581259rev64 v8.16b, v9.16b //GHASH final block12601261bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing1262st1 { v9.16b}, [x2] //store all 16B12631264eor v8.16b, v8.16b, v16.16b //feed in partial tag12651266ins v16.d[0], v8.d[1] //GHASH final block - mid12671268eor v16.8b, v16.8b, v8.8b //GHASH final block - mid1269ldr q20, [x3, #32] //load h1l | h1h1270ext v20.16b, v20.16b, v20.16b, #812711272pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid12731274pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high1275eor v18.16b, v18.16b, v16.16b //GHASH final block - mid1276ldr d16, [x10] //MODULO - load modulo constant12771278pmull v26.1q, v8.1d, v20.1d //GHASH final block - low12791280eor v17.16b, v17.16b, v28.16b //GHASH final block - high12811282eor v19.16b, v19.16b, v26.16b //GHASH final block - low12831284ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment1285pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid12861287.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up12881289.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid12901291pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low1292ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment12931294.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low1295ext v19.16b, v19.16b, v19.16b, #81296rev64 v19.16b, v19.16b1297st1 { v19.16b }, [x3]1298mov x0, x912991300ldp d10, d11, [sp, #16]1301ldp d12, d13, [sp, #32]1302ldp d14, d15, [sp, #48]1303ldp d8, d9, [sp], #801304ret13051306.L128_enc_ret:1307mov w0, #0x01308ret1309.size unroll8_eor3_aes_gcm_enc_128_kernel,.-unroll8_eor3_aes_gcm_enc_128_kernel1310.globl unroll8_eor3_aes_gcm_dec_128_kernel1311.type unroll8_eor3_aes_gcm_dec_128_kernel,%function1312.align 41313unroll8_eor3_aes_gcm_dec_128_kernel:1314AARCH64_VALID_CALL_TARGET1315cbz x1, .L128_dec_ret1316stp d8, d9, [sp, #-80]!1317lsr x9, x1, #31318mov x16, x41319mov x8, x51320stp d10, d11, [sp, #16]1321stp d12, d13, [sp, #32]1322stp d14, d15, [sp, #48]1323mov x5, #0xc2000000000000001324stp x5, xzr, [sp, #64]1325add x10, sp, #6413261327mov x5, x91328ld1 { v0.16b}, [x16] //CTR block 013291330ldp q26, q27, [x8, #0] //load rk0, rk11331sub x5, x5, #1 //byte_len - 113321333mov x15, #0x100000000 //set up counter increment1334movi v31.16b, #0x01335mov v31.d[1], x151336ld1 { v19.16b}, [x3]1337ext v19.16b, v19.16b, v19.16b, #81338rev64 v19.16b, v19.16b13391340rev32 v30.16b, v0.16b //set up reversed counter13411342aese v0.16b, v26.16b1343aesmc v0.16b, v0.16b //AES block 0 - round 013441345add v30.4s, v30.4s, v31.4s //CTR block 013461347rev32 v1.16b, v30.16b //CTR block 11348add v30.4s, v30.4s, v31.4s //CTR block 113491350and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)13511352rev32 v2.16b, v30.16b //CTR block 21353add v30.4s, v30.4s, v31.4s //CTR block 21354aese v1.16b, v26.16b1355aesmc v1.16b, v1.16b //AES block 1 - round 013561357rev32 v3.16b, v30.16b //CTR block 31358add v30.4s, v30.4s, v31.4s //CTR block 313591360aese v0.16b, v27.16b1361aesmc v0.16b, v0.16b //AES block 0 - round 11362aese v1.16b, v27.16b1363aesmc v1.16b, v1.16b //AES block 1 - round 113641365rev32 v4.16b, v30.16b //CTR block 41366add v30.4s, v30.4s, v31.4s //CTR block 413671368rev32 v5.16b, v30.16b //CTR block 51369add v30.4s, v30.4s, v31.4s //CTR block 513701371aese v2.16b, v26.16b1372aesmc v2.16b, v2.16b //AES block 2 - round 013731374rev32 v6.16b, v30.16b //CTR block 61375add v30.4s, v30.4s, v31.4s //CTR block 61376aese v5.16b, v26.16b1377aesmc v5.16b, v5.16b //AES block 5 - round 013781379aese v3.16b, v26.16b1380aesmc v3.16b, v3.16b //AES block 3 - round 01381aese v4.16b, v26.16b1382aesmc v4.16b, v4.16b //AES block 4 - round 013831384rev32 v7.16b, v30.16b //CTR block 713851386aese v6.16b, v26.16b1387aesmc v6.16b, v6.16b //AES block 6 - round 01388aese v2.16b, v27.16b1389aesmc v2.16b, v2.16b //AES block 2 - round 113901391aese v7.16b, v26.16b1392aesmc v7.16b, v7.16b //AES block 7 - round 013931394ldp q28, q26, [x8, #32] //load rk2, rk313951396aese v6.16b, v27.16b1397aesmc v6.16b, v6.16b //AES block 6 - round 11398aese v5.16b, v27.16b1399aesmc v5.16b, v5.16b //AES block 5 - round 114001401aese v4.16b, v27.16b1402aesmc v4.16b, v4.16b //AES block 4 - round 11403aese v7.16b, v27.16b1404aesmc v7.16b, v7.16b //AES block 7 - round 114051406aese v7.16b, v28.16b1407aesmc v7.16b, v7.16b //AES block 7 - round 21408aese v0.16b, v28.16b1409aesmc v0.16b, v0.16b //AES block 0 - round 21410aese v3.16b, v27.16b1411aesmc v3.16b, v3.16b //AES block 3 - round 114121413aese v6.16b, v28.16b1414aesmc v6.16b, v6.16b //AES block 6 - round 21415aese v2.16b, v28.16b1416aesmc v2.16b, v2.16b //AES block 2 - round 21417aese v5.16b, v28.16b1418aesmc v5.16b, v5.16b //AES block 5 - round 214191420aese v4.16b, v28.16b1421aesmc v4.16b, v4.16b //AES block 4 - round 21422aese v3.16b, v28.16b1423aesmc v3.16b, v3.16b //AES block 3 - round 21424aese v1.16b, v28.16b1425aesmc v1.16b, v1.16b //AES block 1 - round 214261427aese v6.16b, v26.16b1428aesmc v6.16b, v6.16b //AES block 6 - round 31429aese v2.16b, v26.16b1430aesmc v2.16b, v2.16b //AES block 2 - round 314311432ldp q27, q28, [x8, #64] //load rk4, rk51433aese v5.16b, v26.16b1434aesmc v5.16b, v5.16b //AES block 5 - round 314351436aese v0.16b, v26.16b1437aesmc v0.16b, v0.16b //AES block 0 - round 31438aese v7.16b, v26.16b1439aesmc v7.16b, v7.16b //AES block 7 - round 314401441aese v3.16b, v26.16b1442aesmc v3.16b, v3.16b //AES block 3 - round 31443aese v1.16b, v26.16b1444aesmc v1.16b, v1.16b //AES block 1 - round 314451446aese v0.16b, v27.16b1447aesmc v0.16b, v0.16b //AES block 0 - round 41448aese v7.16b, v27.16b1449aesmc v7.16b, v7.16b //AES block 7 - round 41450aese v4.16b, v26.16b1451aesmc v4.16b, v4.16b //AES block 4 - round 314521453aese v6.16b, v27.16b1454aesmc v6.16b, v6.16b //AES block 6 - round 41455aese v1.16b, v27.16b1456aesmc v1.16b, v1.16b //AES block 1 - round 41457aese v3.16b, v27.16b1458aesmc v3.16b, v3.16b //AES block 3 - round 414591460aese v5.16b, v27.16b1461aesmc v5.16b, v5.16b //AES block 5 - round 41462aese v4.16b, v27.16b1463aesmc v4.16b, v4.16b //AES block 4 - round 41464aese v2.16b, v27.16b1465aesmc v2.16b, v2.16b //AES block 2 - round 414661467ldp q26, q27, [x8, #96] //load rk6, rk71468aese v2.16b, v28.16b1469aesmc v2.16b, v2.16b //AES block 2 - round 51470aese v3.16b, v28.16b1471aesmc v3.16b, v3.16b //AES block 3 - round 514721473aese v6.16b, v28.16b1474aesmc v6.16b, v6.16b //AES block 6 - round 51475aese v1.16b, v28.16b1476aesmc v1.16b, v1.16b //AES block 1 - round 514771478aese v7.16b, v28.16b1479aesmc v7.16b, v7.16b //AES block 7 - round 51480aese v5.16b, v28.16b1481aesmc v5.16b, v5.16b //AES block 5 - round 514821483aese v4.16b, v28.16b1484aesmc v4.16b, v4.16b //AES block 4 - round 514851486aese v3.16b, v26.16b1487aesmc v3.16b, v3.16b //AES block 3 - round 61488aese v2.16b, v26.16b1489aesmc v2.16b, v2.16b //AES block 2 - round 61490aese v0.16b, v28.16b1491aesmc v0.16b, v0.16b //AES block 0 - round 514921493aese v5.16b, v26.16b1494aesmc v5.16b, v5.16b //AES block 5 - round 61495aese v4.16b, v26.16b1496aesmc v4.16b, v4.16b //AES block 4 - round 61497aese v1.16b, v26.16b1498aesmc v1.16b, v1.16b //AES block 1 - round 614991500aese v0.16b, v26.16b1501aesmc v0.16b, v0.16b //AES block 0 - round 61502aese v7.16b, v26.16b1503aesmc v7.16b, v7.16b //AES block 7 - round 61504aese v6.16b, v26.16b1505aesmc v6.16b, v6.16b //AES block 6 - round 615061507aese v3.16b, v27.16b1508aesmc v3.16b, v3.16b //AES block 3 - round 71509aese v4.16b, v27.16b1510aesmc v4.16b, v4.16b //AES block 4 - round 71511aese v1.16b, v27.16b1512aesmc v1.16b, v1.16b //AES block 1 - round 715131514aese v7.16b, v27.16b1515aesmc v7.16b, v7.16b //AES block 7 - round 71516aese v5.16b, v27.16b1517aesmc v5.16b, v5.16b //AES block 5 - round 71518ldp q28, q26, [x8, #128] //load rk8, rk915191520aese v6.16b, v27.16b1521aesmc v6.16b, v6.16b //AES block 6 - round 71522aese v2.16b, v27.16b1523aesmc v2.16b, v2.16b //AES block 2 - round 71524aese v0.16b, v27.16b1525aesmc v0.16b, v0.16b //AES block 0 - round 715261527add x5, x5, x01528add v30.4s, v30.4s, v31.4s //CTR block 715291530aese v6.16b, v28.16b1531aesmc v6.16b, v6.16b //AES block 6 - round 81532aese v0.16b, v28.16b1533aesmc v0.16b, v0.16b //AES block 0 - round 815341535aese v1.16b, v28.16b1536aesmc v1.16b, v1.16b //AES block 1 - round 81537aese v7.16b, v28.16b1538aesmc v7.16b, v7.16b //AES block 7 - round 81539aese v3.16b, v28.16b1540aesmc v3.16b, v3.16b //AES block 3 - round 815411542aese v5.16b, v28.16b1543aesmc v5.16b, v5.16b //AES block 5 - round 81544aese v2.16b, v28.16b1545aesmc v2.16b, v2.16b //AES block 2 - round 81546aese v4.16b, v28.16b1547aesmc v4.16b, v4.16b //AES block 4 - round 815481549aese v0.16b, v26.16b //AES block 0 - round 91550aese v1.16b, v26.16b //AES block 1 - round 91551aese v6.16b, v26.16b //AES block 6 - round 915521553ldr q27, [x8, #160] //load rk101554aese v4.16b, v26.16b //AES block 4 - round 91555aese v3.16b, v26.16b //AES block 3 - round 915561557aese v2.16b, v26.16b //AES block 2 - round 91558aese v5.16b, v26.16b //AES block 5 - round 91559aese v7.16b, v26.16b //AES block 7 - round 915601561add x4, x0, x1, lsr #3 //end_input_ptr1562cmp x0, x5 //check if we have <= 8 blocks1563b.ge .L128_dec_tail //handle tail15641565ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext15661567.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 0 - result1568.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 1 - result1569stp q0, q1, [x2], #32 //AES block 0, 1 - store result15701571rev32 v0.16b, v30.16b //CTR block 81572add v30.4s, v30.4s, v31.4s //CTR block 81573ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext15741575ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext15761577rev32 v1.16b, v30.16b //CTR block 91578add v30.4s, v30.4s, v31.4s //CTR block 91579ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext15801581.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 3 - result1582.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 2 - result1583stp q2, q3, [x2], #32 //AES block 2, 3 - store result15841585rev32 v2.16b, v30.16b //CTR block 101586add v30.4s, v30.4s, v31.4s //CTR block 1015871588.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 6 - result15891590rev32 v3.16b, v30.16b //CTR block 111591add v30.4s, v30.4s, v31.4s //CTR block 1115921593.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 4 - result1594.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 5 - result1595stp q4, q5, [x2], #32 //AES block 4, 5 - store result15961597.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 7 - result1598stp q6, q7, [x2], #32 //AES block 6, 7 - store result1599rev32 v4.16b, v30.16b //CTR block 1216001601cmp x0, x5 //check if we have <= 8 blocks1602add v30.4s, v30.4s, v31.4s //CTR block 121603b.ge .L128_dec_prepretail //do prepretail16041605.L128_dec_main_loop: //main loop start1606ldr q23, [x3, #176] //load h7l | h7h1607ext v23.16b, v23.16b, v23.16b, #81608ldr q25, [x3, #208] //load h8l | h8h1609ext v25.16b, v25.16b, v25.16b, #816101611rev64 v9.16b, v9.16b //GHASH block 8k+11612rev64 v8.16b, v8.16b //GHASH block 8k1613ext v19.16b, v19.16b, v19.16b, #8 //PRE 016141615rev64 v14.16b, v14.16b //GHASH block 8k+61616ldr q20, [x3, #128] //load h5l | h5h1617ext v20.16b, v20.16b, v20.16b, #81618ldr q22, [x3, #160] //load h6l | h6h1619ext v22.16b, v22.16b, v22.16b, #816201621eor v8.16b, v8.16b, v19.16b //PRE 11622rev32 v5.16b, v30.16b //CTR block 8k+131623add v30.4s, v30.4s, v31.4s //CTR block 8k+1316241625rev64 v10.16b, v10.16b //GHASH block 8k+21626rev64 v12.16b, v12.16b //GHASH block 8k+41627ldp q26, q27, [x8, #0] //load rk0, rk116281629rev32 v6.16b, v30.16b //CTR block 8k+141630add v30.4s, v30.4s, v31.4s //CTR block 8k+141631ldr q21, [x3, #144] //load h6k | h5k1632ldr q24, [x3, #192] //load h8k | h7k16331634pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high1635pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high1636rev64 v11.16b, v11.16b //GHASH block 8k+316371638rev32 v7.16b, v30.16b //CTR block 8k+151639trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid1640rev64 v13.16b, v13.16b //GHASH block 8k+516411642pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low1643pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low1644trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid16451646pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high1647aese v4.16b, v26.16b1648aesmc v4.16b, v4.16b //AES block 8k+12 - round 01649pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high16501651aese v6.16b, v26.16b1652aesmc v6.16b, v6.16b //AES block 8k+14 - round 01653aese v5.16b, v26.16b1654aesmc v5.16b, v5.16b //AES block 8k+13 - round 01655aese v7.16b, v26.16b1656aesmc v7.16b, v7.16b //AES block 8k+15 - round 016571658aese v3.16b, v26.16b1659aesmc v3.16b, v3.16b //AES block 8k+11 - round 01660aese v2.16b, v26.16b1661aesmc v2.16b, v2.16b //AES block 8k+10 - round 01662eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high16631664aese v1.16b, v26.16b1665aesmc v1.16b, v1.16b //AES block 8k+9 - round 01666eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid1667aese v0.16b, v26.16b1668aesmc v0.16b, v0.16b //AES block 8k+8 - round 016691670aese v2.16b, v27.16b1671aesmc v2.16b, v2.16b //AES block 8k+10 - round 11672eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low1673.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high16741675ldp q28, q26, [x8, #32] //load rk2, rk31676trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid1677aese v7.16b, v27.16b1678aesmc v7.16b, v7.16b //AES block 8k+15 - round 116791680pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low1681trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid1682pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid16831684ldr q23, [x3, #80] //load h3l | h3h1685ext v23.16b, v23.16b, v23.16b, #81686ldr q25, [x3, #112] //load h4l | h4h1687ext v25.16b, v25.16b, v25.16b, #81688pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid1689aese v6.16b, v27.16b1690aesmc v6.16b, v6.16b //AES block 8k+14 - round 116911692aese v4.16b, v27.16b1693aesmc v4.16b, v4.16b //AES block 8k+12 - round 11694aese v5.16b, v27.16b1695aesmc v5.16b, v5.16b //AES block 8k+13 - round 11696pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low16971698aese v3.16b, v27.16b1699aesmc v3.16b, v3.16b //AES block 8k+11 - round 11700aese v0.16b, v27.16b1701aesmc v0.16b, v0.16b //AES block 8k+8 - round 11702aese v1.16b, v27.16b1703aesmc v1.16b, v1.16b //AES block 8k+9 - round 117041705aese v7.16b, v28.16b1706aesmc v7.16b, v7.16b //AES block 8k+15 - round 21707aese v2.16b, v28.16b1708aesmc v2.16b, v2.16b //AES block 8k+10 - round 21709.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low17101711aese v4.16b, v28.16b1712aesmc v4.16b, v4.16b //AES block 8k+12 - round 21713eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid1714ldr q20, [x3, #32] //load h1l | h1h1715ext v20.16b, v20.16b, v20.16b, #81716ldr q22, [x3, #64] //load h2l | h2h1717ext v22.16b, v22.16b, v22.16b, #817181719eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid1720aese v1.16b, v28.16b1721aesmc v1.16b, v1.16b //AES block 8k+9 - round 21722aese v3.16b, v28.16b1723aesmc v3.16b, v3.16b //AES block 8k+11 - round 217241725trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid1726aese v5.16b, v28.16b1727aesmc v5.16b, v5.16b //AES block 8k+13 - round 21728aese v0.16b, v28.16b1729aesmc v0.16b, v0.16b //AES block 8k+8 - round 217301731aese v6.16b, v28.16b1732aesmc v6.16b, v6.16b //AES block 8k+14 - round 21733pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid1734pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid17351736aese v7.16b, v26.16b1737aesmc v7.16b, v7.16b //AES block 8k+15 - round 31738rev64 v15.16b, v15.16b //GHASH block 8k+71739pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high17401741ldp q27, q28, [x8, #64] //load rk4, rk51742pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low1743.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid17441745ldr q21, [x3, #48] //load h2k | h1k1746ldr q24, [x3, #96] //load h4k | h3k1747aese v2.16b, v26.16b1748aesmc v2.16b, v2.16b //AES block 8k+10 - round 31749trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid17501751aese v4.16b, v26.16b1752aesmc v4.16b, v4.16b //AES block 8k+12 - round 31753aese v3.16b, v26.16b1754aesmc v3.16b, v3.16b //AES block 8k+11 - round 31755aese v1.16b, v26.16b1756aesmc v1.16b, v1.16b //AES block 8k+9 - round 317571758aese v0.16b, v26.16b1759aesmc v0.16b, v0.16b //AES block 8k+8 - round 31760aese v6.16b, v26.16b1761aesmc v6.16b, v6.16b //AES block 8k+14 - round 31762aese v5.16b, v26.16b1763aesmc v5.16b, v5.16b //AES block 8k+13 - round 317641765pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high1766pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low1767pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high17681769pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low1770aese v0.16b, v27.16b1771aesmc v0.16b, v0.16b //AES block 8k+8 - round 41772aese v7.16b, v27.16b1773aesmc v7.16b, v7.16b //AES block 8k+15 - round 417741775eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid1776trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid1777aese v3.16b, v27.16b1778aesmc v3.16b, v3.16b //AES block 8k+11 - round 417791780aese v1.16b, v27.16b1781aesmc v1.16b, v1.16b //AES block 8k+9 - round 41782aese v5.16b, v27.16b1783aesmc v5.16b, v5.16b //AES block 8k+13 - round 41784aese v6.16b, v27.16b1785aesmc v6.16b, v6.16b //AES block 8k+14 - round 417861787aese v2.16b, v27.16b1788aesmc v2.16b, v2.16b //AES block 8k+10 - round 41789aese v4.16b, v27.16b1790aesmc v4.16b, v4.16b //AES block 8k+12 - round 41791trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid17921793ldp q26, q27, [x8, #96] //load rk6, rk71794aese v0.16b, v28.16b1795aesmc v0.16b, v0.16b //AES block 8k+8 - round 51796pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid17971798aese v2.16b, v28.16b1799aesmc v2.16b, v2.16b //AES block 8k+10 - round 51800eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid1801aese v1.16b, v28.16b1802aesmc v1.16b, v1.16b //AES block 8k+9 - round 518031804pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid1805aese v6.16b, v28.16b1806aesmc v6.16b, v6.16b //AES block 8k+14 - round 51807aese v7.16b, v28.16b1808aesmc v7.16b, v7.16b //AES block 8k+15 - round 518091810aese v3.16b, v28.16b1811aesmc v3.16b, v3.16b //AES block 8k+11 - round 51812aese v5.16b, v28.16b1813aesmc v5.16b, v5.16b //AES block 8k+13 - round 51814aese v4.16b, v28.16b1815aesmc v4.16b, v4.16b //AES block 8k+12 - round 518161817pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high1818.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid1819.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low18201821aese v3.16b, v26.16b1822aesmc v3.16b, v3.16b //AES block 8k+11 - round 61823.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high1824aese v7.16b, v26.16b1825aesmc v7.16b, v7.16b //AES block 8k+15 - round 618261827aese v1.16b, v26.16b1828aesmc v1.16b, v1.16b //AES block 8k+9 - round 61829pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid1830aese v6.16b, v26.16b1831aesmc v6.16b, v6.16b //AES block 8k+14 - round 618321833aese v2.16b, v26.16b1834aesmc v2.16b, v2.16b //AES block 8k+10 - round 61835aese v5.16b, v26.16b1836aesmc v5.16b, v5.16b //AES block 8k+13 - round 61837pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low18381839pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid1840aese v0.16b, v26.16b1841aesmc v0.16b, v0.16b //AES block 8k+8 - round 61842add v30.4s, v30.4s, v31.4s //CTR block 8k+1518431844.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high1845aese v4.16b, v26.16b1846aesmc v4.16b, v4.16b //AES block 8k+12 - round 61847ldp q28, q26, [x8, #128] //load rk8, rk918481849ldr d16, [x10] //MODULO - load modulo constant1850.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low1851aese v5.16b, v27.16b1852aesmc v5.16b, v5.16b //AES block 8k+13 - round 718531854rev32 v20.16b, v30.16b //CTR block 8k+161855.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid1856add v30.4s, v30.4s, v31.4s //CTR block 8k+1618571858aese v6.16b, v27.16b1859aesmc v6.16b, v6.16b //AES block 8k+14 - round 71860aese v3.16b, v27.16b1861aesmc v3.16b, v3.16b //AES block 8k+11 - round 71862aese v7.16b, v27.16b1863aesmc v7.16b, v7.16b //AES block 8k+15 - round 718641865aese v2.16b, v27.16b1866aesmc v2.16b, v2.16b //AES block 8k+10 - round 71867aese v1.16b, v27.16b1868aesmc v1.16b, v1.16b //AES block 8k+9 - round 71869rev32 v22.16b, v30.16b //CTR block 8k+1718701871aese v4.16b, v27.16b1872aesmc v4.16b, v4.16b //AES block 8k+12 - round 71873ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment1874pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid18751876.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up1877aese v0.16b, v27.16b1878aesmc v0.16b, v0.16b //AES block 8k+8 - round 71879add v30.4s, v30.4s, v31.4s //CTR block 8k+1718801881aese v5.16b, v28.16b1882aesmc v5.16b, v5.16b //AES block 8k+13 - round 81883aese v1.16b, v28.16b1884aesmc v1.16b, v1.16b //AES block 8k+9 - round 81885ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext18861887ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext1888aese v0.16b, v28.16b1889aesmc v0.16b, v0.16b //AES block 8k+8 - round 81890rev32 v23.16b, v30.16b //CTR block 8k+1818911892ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext1893aese v4.16b, v28.16b1894aesmc v4.16b, v4.16b //AES block 8k+12 - round 81895.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid18961897ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext1898aese v3.16b, v28.16b1899aesmc v3.16b, v3.16b //AES block 8k+11 - round 81900add v30.4s, v30.4s, v31.4s //CTR block 8k+1819011902aese v7.16b, v28.16b1903aesmc v7.16b, v7.16b //AES block 8k+15 - round 81904aese v2.16b, v28.16b1905aesmc v2.16b, v2.16b //AES block 8k+10 - round 81906aese v6.16b, v28.16b1907aesmc v6.16b, v6.16b //AES block 8k+14 - round 819081909aese v0.16b, v26.16b //AES block 8k+8 - round 91910aese v1.16b, v26.16b //AES block 8k+9 - round 91911ldr q27, [x8, #160] //load rk1019121913aese v6.16b, v26.16b //AES block 8k+14 - round 91914pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low1915aese v2.16b, v26.16b //AES block 8k+10 - round 919161917aese v7.16b, v26.16b //AES block 8k+15 - round 91918aese v4.16b, v26.16b //AES block 8k+12 - round 91919ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment19201921rev32 v25.16b, v30.16b //CTR block 8k+191922add v30.4s, v30.4s, v31.4s //CTR block 8k+1919231924aese v3.16b, v26.16b //AES block 8k+11 - round 91925aese v5.16b, v26.16b //AES block 8k+13 - round 91926.inst 0xce016d21 //eor3 v1.16b, v9.16b, v1.16b, v27.16b //AES block 8k+9 - result19271928.inst 0xce006d00 //eor3 v0.16b, v8.16b, v0.16b, v27.16b //AES block 8k+8 - result1929.inst 0xce076de7 //eor3 v7.16b, v15.16b, v7.16b, v27.16b //AES block 8k+15 - result1930.inst 0xce066dc6 //eor3 v6.16b, v14.16b, v6.16b, v27.16b //AES block 8k+14 - result19311932.inst 0xce026d42 //eor3 v2.16b, v10.16b, v2.16b, v27.16b //AES block 8k+10 - result1933stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result1934mov v1.16b, v22.16b //CTR block 8k+1719351936.inst 0xce046d84 //eor3 v4.16b, v12.16b, v4.16b, v27.16b //AES block 8k+12 - result1937.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low1938mov v0.16b, v20.16b //CTR block 8k+1619391940.inst 0xce036d63 //eor3 v3.16b, v11.16b, v3.16b, v27.16b //AES block 8k+11 - result1941cmp x0, x5 //.LOOP CONTROL1942stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result19431944.inst 0xce056da5 //eor3 v5.16b, v13.16b, v5.16b, v27.16b //AES block 8k+13 - result1945mov v2.16b, v23.16b //CTR block 8k+1819461947stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result1948rev32 v4.16b, v30.16b //CTR block 8k+201949add v30.4s, v30.4s, v31.4s //CTR block 8k+2019501951stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result1952mov v3.16b, v25.16b //CTR block 8k+191953b.lt .L128_dec_main_loop19541955.L128_dec_prepretail: //PREPRETAIL1956rev64 v11.16b, v11.16b //GHASH block 8k+31957ext v19.16b, v19.16b, v19.16b, #8 //PRE 01958rev64 v8.16b, v8.16b //GHASH block 8k19591960rev64 v10.16b, v10.16b //GHASH block 8k+21961rev32 v5.16b, v30.16b //CTR block 8k+131962ldp q26, q27, [x8, #0] //load rk0, rk119631964ldr q23, [x3, #176] //load h7l | h7h1965ext v23.16b, v23.16b, v23.16b, #81966ldr q25, [x3, #208] //load h8l | h8h1967ext v25.16b, v25.16b, v25.16b, #81968eor v8.16b, v8.16b, v19.16b //PRE 11969rev64 v9.16b, v9.16b //GHASH block 8k+119701971add v30.4s, v30.4s, v31.4s //CTR block 8k+131972ldr q20, [x3, #128] //load h5l | h5h1973ext v20.16b, v20.16b, v20.16b, #81974ldr q22, [x3, #160] //load h6l | h6h1975ext v22.16b, v22.16b, v22.16b, #81976rev64 v13.16b, v13.16b //GHASH block 8k+519771978rev64 v12.16b, v12.16b //GHASH block 8k+419791980rev64 v14.16b, v14.16b //GHASH block 8k+619811982ldr q21, [x3, #144] //load h6k | h5k1983ldr q24, [x3, #192] //load h8k | h7k1984rev32 v6.16b, v30.16b //CTR block 8k+141985add v30.4s, v30.4s, v31.4s //CTR block 8k+1419861987pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high1988pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low1989pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high19901991trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid1992trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid1993pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high19941995pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low1996pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high1997aese v0.16b, v26.16b1998aesmc v0.16b, v0.16b //AES block 8k+8 - round 019992000eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high2001aese v4.16b, v26.16b2002aesmc v4.16b, v4.16b //AES block 8k+12 - round 02003eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid20042005pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low2006rev32 v7.16b, v30.16b //CTR block 8k+152007aese v3.16b, v26.16b2008aesmc v3.16b, v3.16b //AES block 8k+11 - round 020092010.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high2011trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid2012trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid20132014aese v2.16b, v26.16b2015aesmc v2.16b, v2.16b //AES block 8k+10 - round 02016aese v1.16b, v26.16b2017aesmc v1.16b, v1.16b //AES block 8k+9 - round 02018aese v5.16b, v26.16b2019aesmc v5.16b, v5.16b //AES block 8k+13 - round 020202021pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid2022pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid2023pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low20242025aese v2.16b, v27.16b2026aesmc v2.16b, v2.16b //AES block 8k+10 - round 12027aese v7.16b, v26.16b2028aesmc v7.16b, v7.16b //AES block 8k+15 - round 02029aese v6.16b, v26.16b2030aesmc v6.16b, v6.16b //AES block 8k+14 - round 020312032eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low2033eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid2034eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid20352036aese v6.16b, v27.16b2037aesmc v6.16b, v6.16b //AES block 8k+14 - round 12038aese v4.16b, v27.16b2039aesmc v4.16b, v4.16b //AES block 8k+12 - round 12040aese v5.16b, v27.16b2041aesmc v5.16b, v5.16b //AES block 8k+13 - round 120422043ldp q28, q26, [x8, #32] //load rk2, rk32044.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low2045pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid20462047ldr q23, [x3, #80] //load h3l | h3h2048ext v23.16b, v23.16b, v23.16b, #82049ldr q25, [x3, #112] //load h4l | h4h2050ext v25.16b, v25.16b, v25.16b, #82051aese v1.16b, v27.16b2052aesmc v1.16b, v1.16b //AES block 8k+9 - round 12053pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid20542055aese v3.16b, v27.16b2056aesmc v3.16b, v3.16b //AES block 8k+11 - round 12057aese v7.16b, v27.16b2058aesmc v7.16b, v7.16b //AES block 8k+15 - round 12059aese v0.16b, v27.16b2060aesmc v0.16b, v0.16b //AES block 8k+8 - round 120612062ldr q20, [x3, #32] //load h1l | h1h2063ext v20.16b, v20.16b, v20.16b, #82064ldr q22, [x3, #64] //load h2l | h2h2065ext v22.16b, v22.16b, v22.16b, #82066.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid20672068aese v0.16b, v28.16b2069aesmc v0.16b, v0.16b //AES block 8k+8 - round 22070aese v6.16b, v28.16b2071aesmc v6.16b, v6.16b //AES block 8k+14 - round 22072aese v2.16b, v28.16b2073aesmc v2.16b, v2.16b //AES block 8k+10 - round 220742075aese v4.16b, v28.16b2076aesmc v4.16b, v4.16b //AES block 8k+12 - round 22077trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid2078aese v7.16b, v28.16b2079aesmc v7.16b, v7.16b //AES block 8k+15 - round 220802081aese v1.16b, v28.16b2082aesmc v1.16b, v1.16b //AES block 8k+9 - round 22083aese v5.16b, v28.16b2084aesmc v5.16b, v5.16b //AES block 8k+13 - round 22085aese v3.16b, v28.16b2086aesmc v3.16b, v3.16b //AES block 8k+11 - round 220872088pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high2089pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low2090trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid20912092ldp q27, q28, [x8, #64] //load rk4, rk52093rev64 v15.16b, v15.16b //GHASH block 8k+72094aese v6.16b, v26.16b2095aesmc v6.16b, v6.16b //AES block 8k+14 - round 320962097ldr q21, [x3, #48] //load h2k | h1k2098ldr q24, [x3, #96] //load h4k | h3k2099pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high2100pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low21012102aese v2.16b, v26.16b2103aesmc v2.16b, v2.16b //AES block 8k+10 - round 32104aese v0.16b, v26.16b2105aesmc v0.16b, v0.16b //AES block 8k+8 - round 32106trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid21072108pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high2109pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low2110trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid21112112aese v4.16b, v26.16b2113aesmc v4.16b, v4.16b //AES block 8k+12 - round 32114aese v3.16b, v26.16b2115aesmc v3.16b, v3.16b //AES block 8k+11 - round 32116aese v7.16b, v26.16b2117aesmc v7.16b, v7.16b //AES block 8k+15 - round 321182119aese v1.16b, v26.16b2120aesmc v1.16b, v1.16b //AES block 8k+9 - round 32121aese v5.16b, v26.16b2122aesmc v5.16b, v5.16b //AES block 8k+13 - round 32123eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid21242125.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high2126aese v0.16b, v27.16b2127aesmc v0.16b, v0.16b //AES block 8k+8 - round 42128aese v2.16b, v27.16b2129aesmc v2.16b, v2.16b //AES block 8k+10 - round 421302131eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid2132aese v5.16b, v27.16b2133aesmc v5.16b, v5.16b //AES block 8k+13 - round 42134pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid21352136aese v1.16b, v27.16b2137aesmc v1.16b, v1.16b //AES block 8k+9 - round 42138aese v6.16b, v27.16b2139aesmc v6.16b, v6.16b //AES block 8k+14 - round 42140aese v4.16b, v27.16b2141aesmc v4.16b, v4.16b //AES block 8k+12 - round 421422143aese v7.16b, v27.16b2144aesmc v7.16b, v7.16b //AES block 8k+15 - round 42145aese v3.16b, v27.16b2146aesmc v3.16b, v3.16b //AES block 8k+11 - round 42147pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid21482149pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high2150pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid2151pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid21522153ldp q26, q27, [x8, #96] //load rk6, rk72154.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid2155aese v6.16b, v28.16b2156aesmc v6.16b, v6.16b //AES block 8k+14 - round 521572158ldr d16, [x10] //MODULO - load modulo constant2159pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low2160.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low21612162aese v0.16b, v28.16b2163aesmc v0.16b, v0.16b //AES block 8k+8 - round 52164aese v2.16b, v28.16b2165aesmc v2.16b, v2.16b //AES block 8k+10 - round 52166aese v4.16b, v28.16b2167aesmc v4.16b, v4.16b //AES block 8k+12 - round 521682169aese v3.16b, v28.16b2170aesmc v3.16b, v3.16b //AES block 8k+11 - round 52171aese v1.16b, v28.16b2172aesmc v1.16b, v1.16b //AES block 8k+9 - round 52173aese v5.16b, v28.16b2174aesmc v5.16b, v5.16b //AES block 8k+13 - round 521752176aese v7.16b, v28.16b2177aesmc v7.16b, v7.16b //AES block 8k+15 - round 52178.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid2179.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low21802181aese v4.16b, v26.16b2182aesmc v4.16b, v4.16b //AES block 8k+12 - round 62183aese v1.16b, v26.16b2184aesmc v1.16b, v1.16b //AES block 8k+9 - round 62185aese v2.16b, v26.16b2186aesmc v2.16b, v2.16b //AES block 8k+10 - round 621872188.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high2189aese v5.16b, v26.16b2190aesmc v5.16b, v5.16b //AES block 8k+13 - round 62191aese v0.16b, v26.16b2192aesmc v0.16b, v0.16b //AES block 8k+8 - round 621932194aese v3.16b, v26.16b2195aesmc v3.16b, v3.16b //AES block 8k+11 - round 62196aese v6.16b, v26.16b2197aesmc v6.16b, v6.16b //AES block 8k+14 - round 62198aese v7.16b, v26.16b2199aesmc v7.16b, v7.16b //AES block 8k+15 - round 622002201aese v4.16b, v27.16b2202aesmc v4.16b, v4.16b //AES block 8k+12 - round 72203.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up2204ldp q28, q26, [x8, #128] //load rk8, rk922052206pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid2207aese v3.16b, v27.16b2208aesmc v3.16b, v3.16b //AES block 8k+11 - round 72209ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment22102211aese v5.16b, v27.16b2212aesmc v5.16b, v5.16b //AES block 8k+13 - round 72213aese v6.16b, v27.16b2214aesmc v6.16b, v6.16b //AES block 8k+14 - round 72215aese v0.16b, v27.16b2216aesmc v0.16b, v0.16b //AES block 8k+8 - round 722172218aese v7.16b, v27.16b2219aesmc v7.16b, v7.16b //AES block 8k+15 - round 72220aese v1.16b, v27.16b2221aesmc v1.16b, v1.16b //AES block 8k+9 - round 72222aese v2.16b, v27.16b2223aesmc v2.16b, v2.16b //AES block 8k+10 - round 722242225.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid2226ldr q27, [x8, #160] //load rk1022272228aese v3.16b, v28.16b2229aesmc v3.16b, v3.16b //AES block 8k+11 - round 82230aese v0.16b, v28.16b2231aesmc v0.16b, v0.16b //AES block 8k+8 - round 822322233pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low2234aese v6.16b, v28.16b2235aesmc v6.16b, v6.16b //AES block 8k+14 - round 82236ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment22372238aese v2.16b, v28.16b2239aesmc v2.16b, v2.16b //AES block 8k+10 - round 82240aese v1.16b, v28.16b2241aesmc v1.16b, v1.16b //AES block 8k+9 - round 82242aese v7.16b, v28.16b2243aesmc v7.16b, v7.16b //AES block 8k+15 - round 822442245aese v6.16b, v26.16b //AES block 8k+14 - round 92246aese v5.16b, v28.16b2247aesmc v5.16b, v5.16b //AES block 8k+13 - round 82248aese v4.16b, v28.16b2249aesmc v4.16b, v4.16b //AES block 8k+12 - round 822502251.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low2252add v30.4s, v30.4s, v31.4s //CTR block 8k+152253aese v2.16b, v26.16b //AES block 8k+10 - round 922542255aese v3.16b, v26.16b //AES block 8k+11 - round 92256aese v5.16b, v26.16b //AES block 8k+13 - round 92257aese v0.16b, v26.16b //AES block 8k+8 - round 922582259aese v4.16b, v26.16b //AES block 8k+12 - round 92260aese v1.16b, v26.16b //AES block 8k+9 - round 92261aese v7.16b, v26.16b //AES block 8k+15 - round 922622263.L128_dec_tail: //TAIL22642265mov v29.16b, v27.16b2266sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process22672268cmp x5, #11222692270ldp q24, q25, [x3, #192] //load h8k | h7k2271ext v25.16b, v25.16b, v25.16b, #82272ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext22732274ldp q20, q21, [x3, #128] //load h5l | h5h2275ext v20.16b, v20.16b, v20.16b, #82276ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag22772278ldp q22, q23, [x3, #160] //load h6l | h6h2279ext v22.16b, v22.16b, v22.16b, #82280ext v23.16b, v23.16b, v23.16b, #822812282.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result2283b.gt .L128_dec_blocks_more_than_722842285cmp x5, #962286mov v7.16b, v6.16b2287movi v19.8b, #022882289movi v17.8b, #02290mov v6.16b, v5.16b2291mov v5.16b, v4.16b22922293mov v4.16b, v3.16b2294mov v3.16b, v2.16b2295mov v2.16b, v1.16b22962297movi v18.8b, #02298sub v30.4s, v30.4s, v31.4s2299b.gt .L128_dec_blocks_more_than_623002301cmp x5, #802302sub v30.4s, v30.4s, v31.4s23032304mov v7.16b, v6.16b2305mov v6.16b, v5.16b2306mov v5.16b, v4.16b23072308mov v4.16b, v3.16b2309mov v3.16b, v1.16b2310b.gt .L128_dec_blocks_more_than_523112312cmp x5, #6423132314mov v7.16b, v6.16b2315mov v6.16b, v5.16b2316mov v5.16b, v4.16b23172318mov v4.16b, v1.16b2319sub v30.4s, v30.4s, v31.4s2320b.gt .L128_dec_blocks_more_than_423212322sub v30.4s, v30.4s, v31.4s2323mov v7.16b, v6.16b2324mov v6.16b, v5.16b23252326mov v5.16b, v1.16b2327cmp x5, #482328b.gt .L128_dec_blocks_more_than_323292330sub v30.4s, v30.4s, v31.4s2331mov v7.16b, v6.16b2332cmp x5, #3223332334ldr q24, [x3, #96] //load h4k | h3k2335mov v6.16b, v1.16b2336b.gt .L128_dec_blocks_more_than_223372338cmp x5, #1623392340mov v7.16b, v1.16b2341sub v30.4s, v30.4s, v31.4s2342b.gt .L128_dec_blocks_more_than_123432344sub v30.4s, v30.4s, v31.4s2345ldr q21, [x3, #48] //load h2k | h1k2346b .L128_dec_blocks_less_than_12347.L128_dec_blocks_more_than_7: //blocks left > 72348rev64 v8.16b, v9.16b //GHASH final-7 block23492350eor v8.16b, v8.16b, v16.16b //feed in partial tag23512352ins v18.d[0], v24.d[1] //GHASH final-7 block - mid23532354pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low2355ins v27.d[0], v8.d[1] //GHASH final-7 block - mid23562357movi v16.8b, #0 //suppress further partial tag feed in2358ldr q9, [x0], #16 //AES final-6 block - load ciphertext23592360eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid23612362pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high2363st1 { v12.16b}, [x2], #16 //AES final-7 block - store result2364.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result23652366pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid2367.L128_dec_blocks_more_than_6: //blocks left > 623682369rev64 v8.16b, v9.16b //GHASH final-6 block23702371eor v8.16b, v8.16b, v16.16b //feed in partial tag23722373ins v27.d[0], v8.d[1] //GHASH final-6 block - mid23742375eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid23762377pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low2378ldr q9, [x0], #16 //AES final-5 block - load ciphertext2379movi v16.8b, #0 //suppress further partial tag feed in23802381pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid2382st1 { v12.16b}, [x2], #16 //AES final-6 block - store result2383pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high23842385eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low2386eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high23872388eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid2389.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result2390.L128_dec_blocks_more_than_5: //blocks left > 523912392rev64 v8.16b, v9.16b //GHASH final-5 block23932394ldr q9, [x0], #16 //AES final-4 block - load ciphertext2395st1 { v12.16b}, [x2], #16 //AES final-5 block - store result23962397eor v8.16b, v8.16b, v16.16b //feed in partial tag23982399ins v27.d[0], v8.d[1] //GHASH final-5 block - mid24002401.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result24022403eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid24042405ins v27.d[1], v27.d[0] //GHASH final-5 block - mid2406pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low2407movi v16.8b, #0 //suppress further partial tag feed in24082409pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid2410pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high2411eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low24122413eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid2414eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high2415.L128_dec_blocks_more_than_4: //blocks left > 424162417rev64 v8.16b, v9.16b //GHASH final-4 block24182419eor v8.16b, v8.16b, v16.16b //feed in partial tag2420ldr q9, [x0], #16 //AES final-3 block - load ciphertext24212422ins v27.d[0], v8.d[1] //GHASH final-4 block - mid2423movi v16.8b, #0 //suppress further partial tag feed in2424pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high24252426pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low24272428eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high24292430st1 { v12.16b}, [x2], #16 //AES final-4 block - store result2431eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid24322433.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result2434eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low24352436pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid24372438eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid2439.L128_dec_blocks_more_than_3: //blocks left > 324402441st1 { v12.16b}, [x2], #16 //AES final-3 block - store result2442rev64 v8.16b, v9.16b //GHASH final-3 block24432444eor v8.16b, v8.16b, v16.16b //feed in partial tag24452446ins v27.d[0], v8.d[1] //GHASH final-3 block - mid24472448ldr q25, [x3, #112] //load h4l | h4h2449ext v25.16b, v25.16b, v25.16b, #82450ldr q24, [x3, #96] //load h4k | h3k24512452eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid24532454ldr q9, [x0], #16 //AES final-2 block - load ciphertext24552456ins v27.d[1], v27.d[0] //GHASH final-3 block - mid2457pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low2458pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high24592460movi v16.8b, #0 //suppress further partial tag feed in2461.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result2462eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low24632464pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid24652466eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high2467eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid2468.L128_dec_blocks_more_than_2: //blocks left > 224692470rev64 v8.16b, v9.16b //GHASH final-2 block24712472st1 { v12.16b}, [x2], #16 //AES final-2 block - store result24732474eor v8.16b, v8.16b, v16.16b //feed in partial tag2475ldr q23, [x3, #80] //load h3l | h3h2476ext v23.16b, v23.16b, v23.16b, #82477movi v16.8b, #0 //suppress further partial tag feed in24782479ins v27.d[0], v8.d[1] //GHASH final-2 block - mid24802481eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid24822483pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low24842485pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high2486pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid2487ldr q9, [x0], #16 //AES final-1 block - load ciphertext24882489eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid24902491eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low24922493.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result2494eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high2495.L128_dec_blocks_more_than_1: //blocks left > 124962497st1 { v12.16b}, [x2], #16 //AES final-1 block - store result2498rev64 v8.16b, v9.16b //GHASH final-1 block24992500ldr q22, [x3, #64] //load h2l | h2h2501ext v22.16b, v22.16b, v22.16b, #825022503eor v8.16b, v8.16b, v16.16b //feed in partial tag25042505movi v16.8b, #0 //suppress further partial tag feed in25062507ins v27.d[0], v8.d[1] //GHASH final-1 block - mid25082509ldr q9, [x0], #16 //AES final block - load ciphertext2510pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high25112512eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid2513eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high2514ldr q21, [x3, #48] //load h2k | h1k25152516ins v27.d[1], v27.d[0] //GHASH final-1 block - mid2517.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result25182519pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low25202521pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid25222523eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low25242525eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid2526.L128_dec_blocks_less_than_1: //blocks left <= 125272528and x1, x1, #127 //bit_length %= 12825292530sub x1, x1, #128 //bit_length -= 12825312532neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])25332534mvn x6, xzr //temp0_x = 0xffffffffffffffff2535and x1, x1, #127 //bit_length %= 12825362537lsr x6, x6, x1 //temp0_x is mask for top 64b of last block2538cmp x1, #642539mvn x7, xzr //temp1_x = 0xffffffffffffffff25402541csel x13, x7, x6, lt2542csel x14, x6, xzr, lt25432544mov v0.d[1], x142545mov v0.d[0], x13 //ctr0b is mask for last block25462547ldr q20, [x3, #32] //load h1l | h1h2548ext v20.16b, v20.16b, v20.16b, #82549ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored25502551and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits25522553rev64 v8.16b, v9.16b //GHASH final block25542555eor v8.16b, v8.16b, v16.16b //feed in partial tag25562557pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high2558ins v16.d[0], v8.d[1] //GHASH final block - mid25592560eor v17.16b, v17.16b, v28.16b //GHASH final block - high2561eor v16.8b, v16.8b, v8.8b //GHASH final block - mid25622563bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing25642565pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid2566st1 { v12.16b}, [x2] //store all 16B25672568pmull v26.1q, v8.1d, v20.1d //GHASH final block - low25692570eor v18.16b, v18.16b, v16.16b //GHASH final block - mid2571ldr d16, [x10] //MODULO - load modulo constant25722573eor v19.16b, v19.16b, v26.16b //GHASH final block - low25742575eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up25762577pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid2578ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment25792580eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up25812582.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid25832584pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low2585ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment25862587.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low2588ext v19.16b, v19.16b, v19.16b, #82589rev64 v19.16b, v19.16b2590st1 { v19.16b }, [x3]2591rev32 v30.16b, v30.16b25922593str q30, [x16] //store the updated counter25942595mov x0, x925962597ldp d10, d11, [sp, #16]2598ldp d12, d13, [sp, #32]2599ldp d14, d15, [sp, #48]2600ldp d8, d9, [sp], #802601ret2602.L128_dec_ret:2603mov w0, #0x02604ret2605.size unroll8_eor3_aes_gcm_dec_128_kernel,.-unroll8_eor3_aes_gcm_dec_128_kernel2606.globl unroll8_eor3_aes_gcm_enc_192_kernel2607.type unroll8_eor3_aes_gcm_enc_192_kernel,%function2608.align 42609unroll8_eor3_aes_gcm_enc_192_kernel:2610AARCH64_VALID_CALL_TARGET2611cbz x1, .L192_enc_ret2612stp d8, d9, [sp, #-80]!2613lsr x9, x1, #32614mov x16, x42615mov x8, x52616stp d10, d11, [sp, #16]2617stp d12, d13, [sp, #32]2618stp d14, d15, [sp, #48]2619mov x5, #0xc2000000000000002620stp x5, xzr, [sp, #64]2621add x10, sp, #6426222623mov x5, x92624ld1 { v0.16b}, [x16] //CTR block 026252626mov x15, #0x100000000 //set up counter increment2627movi v31.16b, #0x02628mov v31.d[1], x1526292630rev32 v30.16b, v0.16b //set up reversed counter26312632add v30.4s, v30.4s, v31.4s //CTR block 026332634rev32 v1.16b, v30.16b //CTR block 12635add v30.4s, v30.4s, v31.4s //CTR block 126362637rev32 v2.16b, v30.16b //CTR block 22638add v30.4s, v30.4s, v31.4s //CTR block 226392640rev32 v3.16b, v30.16b //CTR block 32641add v30.4s, v30.4s, v31.4s //CTR block 326422643rev32 v4.16b, v30.16b //CTR block 42644add v30.4s, v30.4s, v31.4s //CTR block 42645sub x5, x5, #1 //byte_len - 126462647and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)26482649rev32 v5.16b, v30.16b //CTR block 52650add v30.4s, v30.4s, v31.4s //CTR block 52651ldp q26, q27, [x8, #0] //load rk0, rk126522653add x5, x5, x026542655rev32 v6.16b, v30.16b //CTR block 62656add v30.4s, v30.4s, v31.4s //CTR block 626572658rev32 v7.16b, v30.16b //CTR block 726592660aese v5.16b, v26.16b2661aesmc v5.16b, v5.16b //AES block 5 - round 02662aese v4.16b, v26.16b2663aesmc v4.16b, v4.16b //AES block 4 - round 02664aese v3.16b, v26.16b2665aesmc v3.16b, v3.16b //AES block 3 - round 026662667aese v0.16b, v26.16b2668aesmc v0.16b, v0.16b //AES block 0 - round 02669aese v1.16b, v26.16b2670aesmc v1.16b, v1.16b //AES block 1 - round 02671aese v7.16b, v26.16b2672aesmc v7.16b, v7.16b //AES block 7 - round 026732674aese v6.16b, v26.16b2675aesmc v6.16b, v6.16b //AES block 6 - round 02676aese v2.16b, v26.16b2677aesmc v2.16b, v2.16b //AES block 2 - round 02678ldp q28, q26, [x8, #32] //load rk2, rk326792680aese v5.16b, v27.16b2681aesmc v5.16b, v5.16b //AES block 5 - round 12682aese v7.16b, v27.16b2683aesmc v7.16b, v7.16b //AES block 7 - round 126842685aese v2.16b, v27.16b2686aesmc v2.16b, v2.16b //AES block 2 - round 12687aese v3.16b, v27.16b2688aesmc v3.16b, v3.16b //AES block 3 - round 12689aese v6.16b, v27.16b2690aesmc v6.16b, v6.16b //AES block 6 - round 126912692aese v5.16b, v28.16b2693aesmc v5.16b, v5.16b //AES block 5 - round 22694aese v4.16b, v27.16b2695aesmc v4.16b, v4.16b //AES block 4 - round 12696aese v0.16b, v27.16b2697aesmc v0.16b, v0.16b //AES block 0 - round 126982699aese v1.16b, v27.16b2700aesmc v1.16b, v1.16b //AES block 1 - round 12701aese v7.16b, v28.16b2702aesmc v7.16b, v7.16b //AES block 7 - round 22703aese v3.16b, v28.16b2704aesmc v3.16b, v3.16b //AES block 3 - round 227052706aese v2.16b, v28.16b2707aesmc v2.16b, v2.16b //AES block 2 - round 22708aese v0.16b, v28.16b2709aesmc v0.16b, v0.16b //AES block 0 - round 227102711aese v1.16b, v28.16b2712aesmc v1.16b, v1.16b //AES block 1 - round 22713aese v4.16b, v28.16b2714aesmc v4.16b, v4.16b //AES block 4 - round 22715aese v6.16b, v28.16b2716aesmc v6.16b, v6.16b //AES block 6 - round 227172718ldp q27, q28, [x8, #64] //load rk4, rk52719aese v4.16b, v26.16b2720aesmc v4.16b, v4.16b //AES block 4 - round 327212722aese v7.16b, v26.16b2723aesmc v7.16b, v7.16b //AES block 7 - round 32724aese v3.16b, v26.16b2725aesmc v3.16b, v3.16b //AES block 3 - round 32726aese v2.16b, v26.16b2727aesmc v2.16b, v2.16b //AES block 2 - round 327282729aese v1.16b, v26.16b2730aesmc v1.16b, v1.16b //AES block 1 - round 327312732aese v0.16b, v26.16b2733aesmc v0.16b, v0.16b //AES block 0 - round 327342735aese v6.16b, v26.16b2736aesmc v6.16b, v6.16b //AES block 6 - round 327372738aese v0.16b, v27.16b2739aesmc v0.16b, v0.16b //AES block 0 - round 42740aese v1.16b, v27.16b2741aesmc v1.16b, v1.16b //AES block 1 - round 42742aese v5.16b, v26.16b2743aesmc v5.16b, v5.16b //AES block 5 - round 327442745aese v3.16b, v27.16b2746aesmc v3.16b, v3.16b //AES block 3 - round 42747aese v2.16b, v27.16b2748aesmc v2.16b, v2.16b //AES block 2 - round 42749aese v4.16b, v27.16b2750aesmc v4.16b, v4.16b //AES block 4 - round 427512752aese v6.16b, v27.16b2753aesmc v6.16b, v6.16b //AES block 6 - round 42754aese v7.16b, v27.16b2755aesmc v7.16b, v7.16b //AES block 7 - round 42756aese v5.16b, v27.16b2757aesmc v5.16b, v5.16b //AES block 5 - round 427582759aese v1.16b, v28.16b2760aesmc v1.16b, v1.16b //AES block 1 - round 52761ldp q26, q27, [x8, #96] //load rk6, rk72762aese v2.16b, v28.16b2763aesmc v2.16b, v2.16b //AES block 2 - round 527642765aese v4.16b, v28.16b2766aesmc v4.16b, v4.16b //AES block 4 - round 52767aese v7.16b, v28.16b2768aesmc v7.16b, v7.16b //AES block 7 - round 52769aese v0.16b, v28.16b2770aesmc v0.16b, v0.16b //AES block 0 - round 527712772aese v5.16b, v28.16b2773aesmc v5.16b, v5.16b //AES block 5 - round 52774aese v6.16b, v28.16b2775aesmc v6.16b, v6.16b //AES block 6 - round 52776aese v3.16b, v28.16b2777aesmc v3.16b, v3.16b //AES block 3 - round 527782779add v30.4s, v30.4s, v31.4s //CTR block 727802781aese v5.16b, v26.16b2782aesmc v5.16b, v5.16b //AES block 5 - round 62783aese v4.16b, v26.16b2784aesmc v4.16b, v4.16b //AES block 4 - round 62785aese v3.16b, v26.16b2786aesmc v3.16b, v3.16b //AES block 3 - round 627872788aese v2.16b, v26.16b2789aesmc v2.16b, v2.16b //AES block 2 - round 62790aese v6.16b, v26.16b2791aesmc v6.16b, v6.16b //AES block 6 - round 62792aese v1.16b, v26.16b2793aesmc v1.16b, v1.16b //AES block 1 - round 627942795aese v0.16b, v26.16b2796aesmc v0.16b, v0.16b //AES block 0 - round 62797aese v7.16b, v26.16b2798aesmc v7.16b, v7.16b //AES block 7 - round 62799ldp q28, q26, [x8, #128] //load rk8, rk928002801aese v6.16b, v27.16b2802aesmc v6.16b, v6.16b //AES block 6 - round 72803aese v3.16b, v27.16b2804aesmc v3.16b, v3.16b //AES block 3 - round 728052806aese v4.16b, v27.16b2807aesmc v4.16b, v4.16b //AES block 4 - round 72808aese v0.16b, v27.16b2809aesmc v0.16b, v0.16b //AES block 0 - round 728102811aese v7.16b, v27.16b2812aesmc v7.16b, v7.16b //AES block 7 - round 72813aese v1.16b, v27.16b2814aesmc v1.16b, v1.16b //AES block 1 - round 728152816aese v2.16b, v27.16b2817aesmc v2.16b, v2.16b //AES block 2 - round 72818aese v5.16b, v27.16b2819aesmc v5.16b, v5.16b //AES block 5 - round 728202821aese v7.16b, v28.16b2822aesmc v7.16b, v7.16b //AES block 7 - round 82823aese v0.16b, v28.16b2824aesmc v0.16b, v0.16b //AES block 0 - round 828252826aese v4.16b, v28.16b2827aesmc v4.16b, v4.16b //AES block 4 - round 82828aese v3.16b, v28.16b2829aesmc v3.16b, v3.16b //AES block 3 - round 82830aese v5.16b, v28.16b2831aesmc v5.16b, v5.16b //AES block 5 - round 828322833aese v2.16b, v28.16b2834aesmc v2.16b, v2.16b //AES block 2 - round 82835aese v1.16b, v28.16b2836aesmc v1.16b, v1.16b //AES block 1 - round 82837aese v6.16b, v28.16b2838aesmc v6.16b, v6.16b //AES block 6 - round 828392840add x4, x0, x1, lsr #3 //end_input_ptr2841cmp x0, x5 //check if we have <= 8 blocks2842aese v3.16b, v26.16b2843aesmc v3.16b, v3.16b //AES block 3 - round 928442845ld1 { v19.16b}, [x3]2846ext v19.16b, v19.16b, v19.16b, #82847rev64 v19.16b, v19.16b2848ldp q27, q28, [x8, #160] //load rk10, rk1128492850aese v6.16b, v26.16b2851aesmc v6.16b, v6.16b //AES block 6 - round 92852aese v1.16b, v26.16b2853aesmc v1.16b, v1.16b //AES block 1 - round 928542855aese v5.16b, v26.16b2856aesmc v5.16b, v5.16b //AES block 5 - round 92857aese v2.16b, v26.16b2858aesmc v2.16b, v2.16b //AES block 2 - round 928592860aese v0.16b, v26.16b2861aesmc v0.16b, v0.16b //AES block 0 - round 92862aese v4.16b, v26.16b2863aesmc v4.16b, v4.16b //AES block 4 - round 928642865aese v6.16b, v27.16b2866aesmc v6.16b, v6.16b //AES block 14 - round 102867aese v7.16b, v26.16b2868aesmc v7.16b, v7.16b //AES block 7 - round 92869aese v3.16b, v27.16b2870aesmc v3.16b, v3.16b //AES block 11 - round 1028712872aese v1.16b, v27.16b2873aesmc v1.16b, v1.16b //AES block 9 - round 102874aese v5.16b, v27.16b2875aesmc v5.16b, v5.16b //AES block 13 - round 102876aese v4.16b, v27.16b2877aesmc v4.16b, v4.16b //AES block 12 - round 1028782879aese v0.16b, v27.16b2880aesmc v0.16b, v0.16b //AES block 8 - round 102881aese v2.16b, v27.16b2882aesmc v2.16b, v2.16b //AES block 10 - round 102883aese v7.16b, v27.16b2884aesmc v7.16b, v7.16b //AES block 15 - round 1028852886aese v6.16b, v28.16b //AES block 14 - round 112887aese v3.16b, v28.16b //AES block 11 - round 1128882889aese v4.16b, v28.16b //AES block 12 - round 112890aese v7.16b, v28.16b //AES block 15 - round 112891ldr q26, [x8, #192] //load rk1228922893aese v1.16b, v28.16b //AES block 9 - round 112894aese v5.16b, v28.16b //AES block 13 - round 1128952896aese v2.16b, v28.16b //AES block 10 - round 112897aese v0.16b, v28.16b //AES block 8 - round 112898b.ge .L192_enc_tail //handle tail28992900ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext29012902ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext29032904ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext29052906ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext29072908.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result2909rev32 v0.16b, v30.16b //CTR block 82910add v30.4s, v30.4s, v31.4s //CTR block 829112912.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result2913.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result29142915rev32 v1.16b, v30.16b //CTR block 92916add v30.4s, v30.4s, v31.4s //CTR block 92917.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result29182919.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result2920.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result2921stp q8, q9, [x2], #32 //AES block 0, 1 - store result29222923.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result2924rev32 v2.16b, v30.16b //CTR block 102925add v30.4s, v30.4s, v31.4s //CTR block 1029262927stp q10, q11, [x2], #32 //AES block 2, 3 - store result2928cmp x0, x5 //check if we have <= 8 blocks29292930rev32 v3.16b, v30.16b //CTR block 112931add v30.4s, v30.4s, v31.4s //CTR block 112932.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result29332934stp q12, q13, [x2], #32 //AES block 4, 5 - store result29352936rev32 v4.16b, v30.16b //CTR block 122937stp q14, q15, [x2], #32 //AES block 6, 7 - store result2938add v30.4s, v30.4s, v31.4s //CTR block 1229392940b.ge .L192_enc_prepretail //do prepretail29412942.L192_enc_main_loop: //main loop start2943rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free)2944ldp q26, q27, [x8, #0] //load rk0, rk12945rev64 v10.16b, v10.16b //GHASH block 8k+229462947rev32 v5.16b, v30.16b //CTR block 8k+132948add v30.4s, v30.4s, v31.4s //CTR block 8k+132949ldr q23, [x3, #176] //load h7l | h7h2950ext v23.16b, v23.16b, v23.16b, #82951ldr q25, [x3, #208] //load h8l | h8h2952ext v25.16b, v25.16b, v25.16b, #829532954ext v19.16b, v19.16b, v19.16b, #8 //PRE 02955rev64 v8.16b, v8.16b //GHASH block 8k2956ldr q20, [x3, #128] //load h5l | h5h2957ext v20.16b, v20.16b, v20.16b, #82958ldr q22, [x3, #160] //load h6l | h6h2959ext v22.16b, v22.16b, v22.16b, #829602961rev64 v9.16b, v9.16b //GHASH block 8k+12962rev32 v6.16b, v30.16b //CTR block 8k+142963add v30.4s, v30.4s, v31.4s //CTR block 8k+1429642965eor v8.16b, v8.16b, v19.16b //PRE 12966rev64 v11.16b, v11.16b //GHASH block 8k+32967rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free)29682969aese v0.16b, v26.16b2970aesmc v0.16b, v0.16b //AES block 8k+8 - round 02971rev32 v7.16b, v30.16b //CTR block 8k+152972aese v1.16b, v26.16b2973aesmc v1.16b, v1.16b //AES block 8k+9 - round 029742975aese v3.16b, v26.16b2976aesmc v3.16b, v3.16b //AES block 8k+11 - round 02977aese v5.16b, v26.16b2978aesmc v5.16b, v5.16b //AES block 8k+13 - round 02979aese v2.16b, v26.16b2980aesmc v2.16b, v2.16b //AES block 8k+10 - round 029812982aese v7.16b, v26.16b2983aesmc v7.16b, v7.16b //AES block 8k+15 - round 02984aese v4.16b, v26.16b2985aesmc v4.16b, v4.16b //AES block 8k+12 - round 02986aese v6.16b, v26.16b2987aesmc v6.16b, v6.16b //AES block 8k+14 - round 029882989ldp q28, q26, [x8, #32] //load rk2, rk32990pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high2991aese v0.16b, v27.16b2992aesmc v0.16b, v0.16b //AES block 8k+8 - round 129932994aese v4.16b, v27.16b2995aesmc v4.16b, v4.16b //AES block 8k+12 - round 12996pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high2997pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low29982999trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid3000aese v3.16b, v27.16b3001aesmc v3.16b, v3.16b //AES block 8k+11 - round 13002ldr q21, [x3, #144] //load h6k | h5k3003ldr q24, [x3, #192] //load h8k | h7k30043005pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high3006pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low3007trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid30083009aese v1.16b, v27.16b3010aesmc v1.16b, v1.16b //AES block 8k+9 - round 13011aese v2.16b, v27.16b3012aesmc v2.16b, v2.16b //AES block 8k+10 - round 13013aese v5.16b, v27.16b3014aesmc v5.16b, v5.16b //AES block 8k+13 - round 130153016eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high3017aese v6.16b, v27.16b3018aesmc v6.16b, v6.16b //AES block 8k+14 - round 13019aese v7.16b, v27.16b3020aesmc v7.16b, v7.16b //AES block 8k+15 - round 130213022pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high3023eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid3024aese v1.16b, v28.16b3025aesmc v1.16b, v1.16b //AES block 8k+9 - round 230263027aese v3.16b, v28.16b3028aesmc v3.16b, v3.16b //AES block 8k+11 - round 23029aese v4.16b, v28.16b3030aesmc v4.16b, v4.16b //AES block 8k+12 - round 23031aese v6.16b, v28.16b3032aesmc v6.16b, v6.16b //AES block 8k+14 - round 230333034aese v5.16b, v28.16b3035aesmc v5.16b, v5.16b //AES block 8k+13 - round 23036aese v1.16b, v26.16b3037aesmc v1.16b, v1.16b //AES block 8k+9 - round 33038.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high30393040pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low3041aese v7.16b, v28.16b3042aesmc v7.16b, v7.16b //AES block 8k+15 - round 23043aese v4.16b, v26.16b3044aesmc v4.16b, v4.16b //AES block 8k+12 - round 330453046aese v2.16b, v28.16b3047aesmc v2.16b, v2.16b //AES block 8k+10 - round 23048trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid3049aese v0.16b, v28.16b3050aesmc v0.16b, v0.16b //AES block 8k+8 - round 230513052trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid3053aese v3.16b, v26.16b3054aesmc v3.16b, v3.16b //AES block 8k+11 - round 33055ldp q27, q28, [x8, #64] //load rk4, rk530563057aese v0.16b, v26.16b3058aesmc v0.16b, v0.16b //AES block 8k+8 - round 33059eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low3060ldr q23, [x3, #80] //load h3l | h3h3061ext v23.16b, v23.16b, v23.16b, #83062ldr q25, [x3, #112] //load h4l | h4h3063ext v25.16b, v25.16b, v25.16b, #830643065pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid3066pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid3067pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low30683069aese v5.16b, v26.16b3070aesmc v5.16b, v5.16b //AES block 8k+13 - round 33071eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid3072trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid30733074eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid3075aese v6.16b, v26.16b3076aesmc v6.16b, v6.16b //AES block 8k+14 - round 33077.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low30783079aese v1.16b, v27.16b3080aesmc v1.16b, v1.16b //AES block 8k+9 - round 43081aese v3.16b, v27.16b3082aesmc v3.16b, v3.16b //AES block 8k+11 - round 43083aese v7.16b, v26.16b3084aesmc v7.16b, v7.16b //AES block 8k+15 - round 330853086pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid3087aese v6.16b, v27.16b3088aesmc v6.16b, v6.16b //AES block 8k+14 - round 43089aese v2.16b, v26.16b3090aesmc v2.16b, v2.16b //AES block 8k+10 - round 330913092pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid3093aese v0.16b, v27.16b3094aesmc v0.16b, v0.16b //AES block 8k+8 - round 43095aese v4.16b, v27.16b3096aesmc v4.16b, v4.16b //AES block 8k+12 - round 430973098aese v2.16b, v27.16b3099aesmc v2.16b, v2.16b //AES block 8k+10 - round 43100aese v5.16b, v27.16b3101aesmc v5.16b, v5.16b //AES block 8k+13 - round 43102aese v7.16b, v27.16b3103aesmc v7.16b, v7.16b //AES block 8k+15 - round 431043105.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid3106aese v4.16b, v28.16b3107aesmc v4.16b, v4.16b //AES block 8k+12 - round 53108ldr q20, [x3, #32] //load h1l | h1h3109ext v20.16b, v20.16b, v20.16b, #83110ldr q22, [x3, #64] //load h2l | h2h3111ext v22.16b, v22.16b, v22.16b, #831123113ldp q26, q27, [x8, #96] //load rk6, rk73114aese v2.16b, v28.16b3115aesmc v2.16b, v2.16b //AES block 8k+10 - round 53116rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free)31173118rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free)3119pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high3120pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low31213122aese v5.16b, v28.16b3123aesmc v5.16b, v5.16b //AES block 8k+13 - round 53124trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid31253126aese v6.16b, v28.16b3127aesmc v6.16b, v6.16b //AES block 8k+14 - round 53128ldr q21, [x3, #48] //load h2k | h1k3129ldr q24, [x3, #96] //load h4k | h3k31303131aese v1.16b, v28.16b3132aesmc v1.16b, v1.16b //AES block 8k+9 - round 53133pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high3134eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid31353136aese v3.16b, v28.16b3137aesmc v3.16b, v3.16b //AES block 8k+11 - round 53138aese v7.16b, v28.16b3139aesmc v7.16b, v7.16b //AES block 8k+15 - round 53140aese v0.16b, v28.16b3141aesmc v0.16b, v0.16b //AES block 8k+8 - round 531423143pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low3144aese v4.16b, v26.16b3145aesmc v4.16b, v4.16b //AES block 8k+12 - round 63146trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid31473148aese v0.16b, v26.16b3149aesmc v0.16b, v0.16b //AES block 8k+8 - round 63150aese v3.16b, v26.16b3151aesmc v3.16b, v3.16b //AES block 8k+11 - round 63152pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high31533154pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low3155trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid3156aese v2.16b, v26.16b3157aesmc v2.16b, v2.16b //AES block 8k+10 - round 631583159aese v6.16b, v26.16b3160aesmc v6.16b, v6.16b //AES block 8k+14 - round 63161aese v5.16b, v26.16b3162aesmc v5.16b, v5.16b //AES block 8k+13 - round 631633164aese v7.16b, v26.16b3165aesmc v7.16b, v7.16b //AES block 8k+15 - round 63166aese v2.16b, v27.16b3167aesmc v2.16b, v2.16b //AES block 8k+10 - round 73168aese v1.16b, v26.16b3169aesmc v1.16b, v1.16b //AES block 8k+9 - round 631703171aese v6.16b, v27.16b3172aesmc v6.16b, v6.16b //AES block 8k+14 - round 73173eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid31743175pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid3176ldp q28, q26, [x8, #128] //load rk8, rk93177pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid31783179aese v4.16b, v27.16b3180aesmc v4.16b, v4.16b //AES block 8k+12 - round 73181pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high3182aese v5.16b, v27.16b3183aesmc v5.16b, v5.16b //AES block 8k+13 - round 731843185.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid3186aese v7.16b, v27.16b3187aesmc v7.16b, v7.16b //AES block 8k+15 - round 73188add v30.4s, v30.4s, v31.4s //CTR block 8k+1531893190ldr d16, [x10] //MODULO - load modulo constant3191.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high3192aese v0.16b, v27.16b3193aesmc v0.16b, v0.16b //AES block 8k+8 - round 731943195pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid3196pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low3197aese v3.16b, v27.16b3198aesmc v3.16b, v3.16b //AES block 8k+11 - round 731993200aese v5.16b, v28.16b3201aesmc v5.16b, v5.16b //AES block 8k+13 - round 83202aese v4.16b, v28.16b3203aesmc v4.16b, v4.16b //AES block 8k+12 - round 83204aese v0.16b, v28.16b3205aesmc v0.16b, v0.16b //AES block 8k+8 - round 832063207aese v6.16b, v28.16b3208aesmc v6.16b, v6.16b //AES block 8k+14 - round 83209.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low3210aese v1.16b, v27.16b3211aesmc v1.16b, v1.16b //AES block 8k+9 - round 732123213aese v7.16b, v28.16b3214aesmc v7.16b, v7.16b //AES block 8k+15 - round 83215aese v2.16b, v28.16b3216aesmc v2.16b, v2.16b //AES block 8k+10 - round 83217pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid32183219aese v1.16b, v28.16b3220aesmc v1.16b, v1.16b //AES block 8k+9 - round 83221aese v3.16b, v28.16b3222aesmc v3.16b, v3.16b //AES block 8k+11 - round 83223ldp q27, q28, [x8, #160] //load rk10, rk1132243225.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low3226rev32 v20.16b, v30.16b //CTR block 8k+163227add v30.4s, v30.4s, v31.4s //CTR block 8k+1632283229aese v2.16b, v26.16b3230aesmc v2.16b, v2.16b //AES block 8k+10 - round 93231.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid3232.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high32333234aese v6.16b, v26.16b3235aesmc v6.16b, v6.16b //AES block 8k+14 - round 93236aese v3.16b, v26.16b3237aesmc v3.16b, v3.16b //AES block 8k+11 - round 93238ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext32393240pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid3241rev32 v22.16b, v30.16b //CTR block 8k+173242aese v0.16b, v26.16b3243aesmc v0.16b, v0.16b //AES block 8k+8 - round 932443245aese v4.16b, v26.16b3246aesmc v4.16b, v4.16b //AES block 8k+12 - round 93247aese v1.16b, v26.16b3248aesmc v1.16b, v1.16b //AES block 8k+9 - round 93249aese v7.16b, v26.16b3250aesmc v7.16b, v7.16b //AES block 8k+15 - round 932513252.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up3253aese v5.16b, v26.16b3254aesmc v5.16b, v5.16b //AES block 8k+13 - round 93255add v30.4s, v30.4s, v31.4s //CTR block 8k+1732563257aese v2.16b, v27.16b3258aesmc v2.16b, v2.16b //AES block 8k+10 - round 103259aese v4.16b, v27.16b3260aesmc v4.16b, v4.16b //AES block 8k+12 - round 103261ldr q26, [x8, #192] //load rk123262ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment32633264aese v0.16b, v27.16b3265aesmc v0.16b, v0.16b //AES block 8k+8 - round 103266aese v7.16b, v27.16b3267aesmc v7.16b, v7.16b //AES block 8k+15 - round 103268ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext32693270aese v4.16b, v28.16b //AES block 8k+12 - round 113271.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid3272ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load plaintext32733274ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load plaintext3275aese v2.16b, v28.16b //AES block 8k+10 - round 113276aese v1.16b, v27.16b3277aesmc v1.16b, v1.16b //AES block 8k+9 - round 1032783279rev32 v23.16b, v30.16b //CTR block 8k+183280aese v5.16b, v27.16b3281aesmc v5.16b, v5.16b //AES block 8k+13 - round 1032823283aese v3.16b, v27.16b3284aesmc v3.16b, v3.16b //AES block 8k+11 - round 103285pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low32863287aese v6.16b, v27.16b3288aesmc v6.16b, v6.16b //AES block 8k+14 - round 103289aese v5.16b, v28.16b //AES block 8k+13 - round 113290add v30.4s, v30.4s, v31.4s //CTR block 8k+1832913292aese v7.16b, v28.16b //AES block 8k+15 - round 113293aese v0.16b, v28.16b //AES block 8k+8 - round 113294.inst 0xce04698c //eor3 v12.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result32953296aese v6.16b, v28.16b //AES block 8k+14 - round 113297aese v3.16b, v28.16b //AES block 8k+11 - round 113298aese v1.16b, v28.16b //AES block 8k+9 - round 1132993300rev32 v25.16b, v30.16b //CTR block 8k+193301add v30.4s, v30.4s, v31.4s //CTR block 8k+193302.inst 0xce0769ef //eor3 v15.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result33033304.inst 0xce02694a //eor3 v10.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result3305.inst 0xce006908 //eor3 v8.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result3306mov v2.16b, v23.16b //CTR block 8k+1833073308.inst 0xce016929 //eor3 v9.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result3309mov v1.16b, v22.16b //CTR block 8k+173310stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result3311ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment33123313.inst 0xce0669ce //eor3 v14.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result3314mov v0.16b, v20.16b //CTR block 8k+163315rev32 v4.16b, v30.16b //CTR block 8k+2033163317add v30.4s, v30.4s, v31.4s //CTR block 8k+203318.inst 0xce0569ad //eor3 v13.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result3319.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low33203321.inst 0xce03696b //eor3 v11.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result3322mov v3.16b, v25.16b //CTR block 8k+1933233324stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result33253326stp q12, q13, [x2], #32 //AES block 8k+12, 8k+13 - store result33273328cmp x0, x5 //.LOOP CONTROL3329stp q14, q15, [x2], #32 //AES block 8k+14, 8k+15 - store result3330b.lt .L192_enc_main_loop33313332.L192_enc_prepretail: //PREPRETAIL3333rev32 v5.16b, v30.16b //CTR block 8k+133334ldp q26, q27, [x8, #0] //load rk0, rk13335add v30.4s, v30.4s, v31.4s //CTR block 8k+1333363337ldr q23, [x3, #176] //load h7l | h7h3338ext v23.16b, v23.16b, v23.16b, #83339ldr q25, [x3, #208] //load h8l | h8h3340ext v25.16b, v25.16b, v25.16b, #83341rev64 v8.16b, v8.16b //GHASH block 8k3342ext v19.16b, v19.16b, v19.16b, #8 //PRE 033433344rev32 v6.16b, v30.16b //CTR block 8k+143345add v30.4s, v30.4s, v31.4s //CTR block 8k+143346ldr q21, [x3, #144] //load h6k | h5k3347ldr q24, [x3, #192] //load h8k | h7k33483349rev64 v11.16b, v11.16b //GHASH block 8k+33350rev64 v10.16b, v10.16b //GHASH block 8k+23351ldr q20, [x3, #128] //load h5l | h5h3352ext v20.16b, v20.16b, v20.16b, #83353ldr q22, [x3, #160] //load h6l | h6h3354ext v22.16b, v22.16b, v22.16b, #833553356eor v8.16b, v8.16b, v19.16b //PRE 13357rev32 v7.16b, v30.16b //CTR block 8k+153358rev64 v9.16b, v9.16b //GHASH block 8k+133593360aese v5.16b, v26.16b3361aesmc v5.16b, v5.16b //AES block 8k+13 - round 03362aese v2.16b, v26.16b3363aesmc v2.16b, v2.16b //AES block 8k+10 - round 03364aese v3.16b, v26.16b3365aesmc v3.16b, v3.16b //AES block 8k+11 - round 033663367pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high3368aese v0.16b, v26.16b3369aesmc v0.16b, v0.16b //AES block 8k+8 - round 03370aese v6.16b, v26.16b3371aesmc v6.16b, v6.16b //AES block 8k+14 - round 033723373aese v1.16b, v26.16b3374aesmc v1.16b, v1.16b //AES block 8k+9 - round 03375aese v4.16b, v26.16b3376aesmc v4.16b, v4.16b //AES block 8k+12 - round 03377pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high33783379aese v6.16b, v27.16b3380aesmc v6.16b, v6.16b //AES block 8k+14 - round 13381pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low3382trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid33833384trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid3385aese v7.16b, v26.16b3386aesmc v7.16b, v7.16b //AES block 8k+15 - round 03387ldp q28, q26, [x8, #32] //load rk2, rk333883389pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low3390eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high3391aese v2.16b, v27.16b3392aesmc v2.16b, v2.16b //AES block 8k+10 - round 133933394aese v5.16b, v27.16b3395aesmc v5.16b, v5.16b //AES block 8k+13 - round 13396eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid3397aese v1.16b, v27.16b3398aesmc v1.16b, v1.16b //AES block 8k+9 - round 133993400aese v7.16b, v27.16b3401aesmc v7.16b, v7.16b //AES block 8k+15 - round 13402pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high3403pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high34043405aese v3.16b, v27.16b3406aesmc v3.16b, v3.16b //AES block 8k+11 - round 13407aese v0.16b, v27.16b3408aesmc v0.16b, v0.16b //AES block 8k+8 - round 13409aese v4.16b, v27.16b3410aesmc v4.16b, v4.16b //AES block 8k+12 - round 134113412pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low3413aese v5.16b, v28.16b3414aesmc v5.16b, v5.16b //AES block 8k+13 - round 23415eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low34163417pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low3418aese v7.16b, v28.16b3419aesmc v7.16b, v7.16b //AES block 8k+15 - round 23420.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high34213422aese v5.16b, v26.16b3423aesmc v5.16b, v5.16b //AES block 8k+13 - round 33424trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid3425aese v6.16b, v28.16b3426aesmc v6.16b, v6.16b //AES block 8k+14 - round 234273428aese v0.16b, v28.16b3429aesmc v0.16b, v0.16b //AES block 8k+8 - round 23430pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid3431trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid34323433aese v3.16b, v28.16b3434aesmc v3.16b, v3.16b //AES block 8k+11 - round 23435rev64 v13.16b, v13.16b //GHASH block 8k+5 (t0, t1, t2 and t3 free)3436rev64 v14.16b, v14.16b //GHASH block 8k+6 (t0, t1, and t2 free)34373438aese v2.16b, v28.16b3439aesmc v2.16b, v2.16b //AES block 8k+10 - round 23440aese v1.16b, v28.16b3441aesmc v1.16b, v1.16b //AES block 8k+9 - round 23442aese v4.16b, v28.16b3443aesmc v4.16b, v4.16b //AES block 8k+12 - round 234443445eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid3446pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid3447ldp q27, q28, [x8, #64] //load rk4, rk534483449aese v1.16b, v26.16b3450aesmc v1.16b, v1.16b //AES block 8k+9 - round 33451aese v6.16b, v26.16b3452aesmc v6.16b, v6.16b //AES block 8k+14 - round 33453aese v2.16b, v26.16b3454aesmc v2.16b, v2.16b //AES block 8k+10 - round 334553456eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid3457.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low3458aese v7.16b, v26.16b3459aesmc v7.16b, v7.16b //AES block 8k+15 - round 334603461ldr q23, [x3, #80] //load h3l | h3h3462ext v23.16b, v23.16b, v23.16b, #83463ldr q25, [x3, #112] //load h4l | h4h3464ext v25.16b, v25.16b, v25.16b, #83465aese v3.16b, v26.16b3466aesmc v3.16b, v3.16b //AES block 8k+11 - round 33467pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid34683469ldr q20, [x3, #32] //load h1l | h1h3470ext v20.16b, v20.16b, v20.16b, #83471ldr q22, [x3, #64] //load h2l | h2h3472ext v22.16b, v22.16b, v22.16b, #83473aese v4.16b, v26.16b3474aesmc v4.16b, v4.16b //AES block 8k+12 - round 33475rev64 v12.16b, v12.16b //GHASH block 8k+4 (t0, t1, and t2 free)34763477aese v0.16b, v26.16b3478aesmc v0.16b, v0.16b //AES block 8k+8 - round 33479pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid3480aese v6.16b, v27.16b3481aesmc v6.16b, v6.16b //AES block 8k+14 - round 434823483trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid3484aese v7.16b, v27.16b3485aesmc v7.16b, v7.16b //AES block 8k+15 - round 43486aese v5.16b, v27.16b3487aesmc v5.16b, v5.16b //AES block 8k+13 - round 434883489.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid3490aese v3.16b, v27.16b3491aesmc v3.16b, v3.16b //AES block 8k+11 - round 43492aese v0.16b, v27.16b3493aesmc v0.16b, v0.16b //AES block 8k+8 - round 434943495aese v1.16b, v27.16b3496aesmc v1.16b, v1.16b //AES block 8k+9 - round 43497aese v4.16b, v27.16b3498aesmc v4.16b, v4.16b //AES block 8k+12 - round 43499aese v2.16b, v27.16b3500aesmc v2.16b, v2.16b //AES block 8k+10 - round 435013502aese v0.16b, v28.16b3503aesmc v0.16b, v0.16b //AES block 8k+8 - round 53504rev64 v15.16b, v15.16b //GHASH block 8k+7 (t0, t1, t2 and t3 free)3505ldr q21, [x3, #48] //load h2k | h1k3506ldr q24, [x3, #96] //load h4k | h3k35073508aese v1.16b, v28.16b3509aesmc v1.16b, v1.16b //AES block 8k+9 - round 53510aese v2.16b, v28.16b3511aesmc v2.16b, v2.16b //AES block 8k+10 - round 53512ldp q26, q27, [x8, #96] //load rk6, rk735133514pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high3515pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high3516pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low35173518aese v4.16b, v28.16b3519aesmc v4.16b, v4.16b //AES block 8k+12 - round 53520trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid35213522pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high3523pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low3524pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low35253526trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid3527eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid3528trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid35293530aese v5.16b, v28.16b3531aesmc v5.16b, v5.16b //AES block 8k+13 - round 53532aese v1.16b, v26.16b3533aesmc v1.16b, v1.16b //AES block 8k+9 - round 63534aese v7.16b, v28.16b3535aesmc v7.16b, v7.16b //AES block 8k+15 - round 535363537aese v6.16b, v28.16b3538aesmc v6.16b, v6.16b //AES block 8k+14 - round 53539eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid3540aese v3.16b, v28.16b3541aesmc v3.16b, v3.16b //AES block 8k+11 - round 535423543pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid3544pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid35453546aese v4.16b, v26.16b3547aesmc v4.16b, v4.16b //AES block 8k+12 - round 63548aese v5.16b, v26.16b3549aesmc v5.16b, v5.16b //AES block 8k+13 - round 63550aese v1.16b, v27.16b3551aesmc v1.16b, v1.16b //AES block 8k+9 - round 735523553aese v0.16b, v26.16b3554aesmc v0.16b, v0.16b //AES block 8k+8 - round 63555aese v7.16b, v26.16b3556aesmc v7.16b, v7.16b //AES block 8k+15 - round 63557.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid35583559aese v2.16b, v26.16b3560aesmc v2.16b, v2.16b //AES block 8k+10 - round 63561.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high3562aese v5.16b, v27.16b3563aesmc v5.16b, v5.16b //AES block 8k+13 - round 735643565aese v6.16b, v26.16b3566aesmc v6.16b, v6.16b //AES block 8k+14 - round 63567ldr d16, [x10] //MODULO - load modulo constant3568aese v3.16b, v26.16b3569aesmc v3.16b, v3.16b //AES block 8k+11 - round 635703571pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid3572aese v0.16b, v27.16b3573aesmc v0.16b, v0.16b //AES block 8k+8 - round 73574.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low35753576pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high3577pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid3578pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low35793580aese v4.16b, v27.16b3581aesmc v4.16b, v4.16b //AES block 8k+12 - round 73582aese v2.16b, v27.16b3583aesmc v2.16b, v2.16b //AES block 8k+10 - round 73584ldp q28, q26, [x8, #128] //load rk8, rk935853586aese v3.16b, v27.16b3587aesmc v3.16b, v3.16b //AES block 8k+11 - round 73588.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid35893590.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low3591.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high35923593.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up3594ext v29.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment3595aese v7.16b, v27.16b3596aesmc v7.16b, v7.16b //AES block 8k+15 - round 73597pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid35983599aese v5.16b, v28.16b3600aesmc v5.16b, v5.16b //AES block 8k+13 - round 83601aese v1.16b, v28.16b3602aesmc v1.16b, v1.16b //AES block 8k+9 - round 836033604aese v6.16b, v27.16b3605aesmc v6.16b, v6.16b //AES block 8k+14 - round 73606aese v2.16b, v28.16b3607aesmc v2.16b, v2.16b //AES block 8k+10 - round 83608.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid36093610aese v3.16b, v28.16b3611aesmc v3.16b, v3.16b //AES block 8k+11 - round 83612aese v5.16b, v26.16b3613aesmc v5.16b, v5.16b //AES block 8k+13 - round 93614aese v4.16b, v28.16b3615aesmc v4.16b, v4.16b //AES block 8k+12 - round 836163617aese v0.16b, v28.16b3618aesmc v0.16b, v0.16b //AES block 8k+8 - round 83619aese v7.16b, v28.16b3620aesmc v7.16b, v7.16b //AES block 8k+15 - round 83621aese v6.16b, v28.16b3622aesmc v6.16b, v6.16b //AES block 8k+14 - round 836233624aese v3.16b, v26.16b3625aesmc v3.16b, v3.16b //AES block 8k+11 - round 93626ldp q27, q28, [x8, #160] //load rk10, rk113627aese v4.16b, v26.16b3628aesmc v4.16b, v4.16b //AES block 8k+12 - round 936293630aese v2.16b, v26.16b3631aesmc v2.16b, v2.16b //AES block 8k+10 - round 93632aese v7.16b, v26.16b3633aesmc v7.16b, v7.16b //AES block 8k+15 - round 936343635ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment3636aese v6.16b, v26.16b3637aesmc v6.16b, v6.16b //AES block 8k+14 - round 93638aese v0.16b, v26.16b3639aesmc v0.16b, v0.16b //AES block 8k+8 - round 93640aese v1.16b, v26.16b3641aesmc v1.16b, v1.16b //AES block 8k+9 - round 936423643pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low3644ldr q26, [x8, #192] //load rk1236453646aese v7.16b, v27.16b3647aesmc v7.16b, v7.16b //AES block 8k+15 - round 103648aese v1.16b, v27.16b3649aesmc v1.16b, v1.16b //AES block 8k+9 - round 103650aese v2.16b, v27.16b3651aesmc v2.16b, v2.16b //AES block 8k+10 - round 1036523653.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low3654aese v0.16b, v27.16b3655aesmc v0.16b, v0.16b //AES block 8k+8 - round 103656aese v3.16b, v27.16b3657aesmc v3.16b, v3.16b //AES block 8k+11 - round 1036583659aese v1.16b, v28.16b //AES block 8k+9 - round 113660aese v7.16b, v28.16b //AES block 8k+15 - round 1136613662aese v4.16b, v27.16b3663aesmc v4.16b, v4.16b //AES block 8k+12 - round 103664aese v3.16b, v28.16b //AES block 8k+11 - round 1136653666aese v5.16b, v27.16b3667aesmc v5.16b, v5.16b //AES block 8k+13 - round 103668aese v6.16b, v27.16b3669aesmc v6.16b, v6.16b //AES block 8k+14 - round 1036703671add v30.4s, v30.4s, v31.4s //CTR block 8k+153672aese v2.16b, v28.16b //AES block 8k+10 - round 113673aese v0.16b, v28.16b //AES block 8k+8 - round 1136743675aese v6.16b, v28.16b //AES block 8k+14 - round 113676aese v4.16b, v28.16b //AES block 8k+12 - round 113677aese v5.16b, v28.16b //AES block 8k+13 - round 1136783679.L192_enc_tail: //TAIL36803681ldp q20, q21, [x3, #128] //load h5l | h5h3682ext v20.16b, v20.16b, v20.16b, #83683sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process36843685ldr q8, [x0], #16 //AES block 8k+8 - l3ad plaintext36863687ldp q24, q25, [x3, #192] //load h8k | h7k3688ext v25.16b, v25.16b, v25.16b, #836893690mov v29.16b, v26.16b36913692ldp q22, q23, [x3, #160] //load h6l | h6h3693ext v22.16b, v22.16b, v22.16b, #83694ext v23.16b, v23.16b, v23.16b, #83695cmp x5, #11236963697.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result3698ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag3699b.gt .L192_enc_blocks_more_than_737003701cmp x5, #963702mov v7.16b, v6.16b3703movi v17.8b, #037043705mov v6.16b, v5.16b3706movi v19.8b, #03707sub v30.4s, v30.4s, v31.4s37083709mov v5.16b, v4.16b3710mov v4.16b, v3.16b3711mov v3.16b, v2.16b37123713mov v2.16b, v1.16b3714movi v18.8b, #03715b.gt .L192_enc_blocks_more_than_637163717mov v7.16b, v6.16b3718cmp x5, #8037193720mov v6.16b, v5.16b3721mov v5.16b, v4.16b3722mov v4.16b, v3.16b37233724mov v3.16b, v1.16b3725sub v30.4s, v30.4s, v31.4s3726b.gt .L192_enc_blocks_more_than_537273728cmp x5, #643729sub v30.4s, v30.4s, v31.4s37303731mov v7.16b, v6.16b3732mov v6.16b, v5.16b3733mov v5.16b, v4.16b37343735mov v4.16b, v1.16b3736b.gt .L192_enc_blocks_more_than_437373738mov v7.16b, v6.16b3739mov v6.16b, v5.16b3740mov v5.16b, v1.16b37413742sub v30.4s, v30.4s, v31.4s3743cmp x5, #483744b.gt .L192_enc_blocks_more_than_337453746mov v7.16b, v6.16b3747mov v6.16b, v1.16b3748sub v30.4s, v30.4s, v31.4s37493750ldr q24, [x3, #96] //load h4k | h3k3751cmp x5, #323752b.gt .L192_enc_blocks_more_than_237533754sub v30.4s, v30.4s, v31.4s37553756cmp x5, #163757mov v7.16b, v1.16b3758b.gt .L192_enc_blocks_more_than_137593760sub v30.4s, v30.4s, v31.4s3761ldr q21, [x3, #48] //load h2k | h1k3762b .L192_enc_blocks_less_than_13763.L192_enc_blocks_more_than_7: //blocks left > 73764st1 { v9.16b}, [x2], #16 //AES final-7 block - store result37653766rev64 v8.16b, v9.16b //GHASH final-7 block3767ins v18.d[0], v24.d[1] //GHASH final-7 block - mid37683769eor v8.16b, v8.16b, v16.16b //feed in partial tag37703771ins v27.d[0], v8.d[1] //GHASH final-7 block - mid37723773ldr q9, [x0], #16 //AES final-6 block - load plaintext37743775eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid3776movi v16.8b, #0 //suppress further partial tag feed in3777pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low37783779pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high37803781pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid3782.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result3783.L192_enc_blocks_more_than_6: //blocks left > 637843785st1 { v9.16b}, [x2], #16 //AES final-6 block - store result37863787rev64 v8.16b, v9.16b //GHASH final-6 block37883789ldr q9, [x0], #16 //AES final-5 block - load plaintext37903791eor v8.16b, v8.16b, v16.16b //feed in partial tag37923793ins v27.d[0], v8.d[1] //GHASH final-6 block - mid37943795pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low3796.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result37973798movi v16.8b, #0 //suppress further partial tag feed in3799pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high3800eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid38013802pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid38033804eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high3805eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low38063807eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid3808.L192_enc_blocks_more_than_5: //blocks left > 538093810st1 { v9.16b}, [x2], #16 //AES final-5 block - store result38113812rev64 v8.16b, v9.16b //GHASH final-5 block38133814eor v8.16b, v8.16b, v16.16b //feed in partial tag38153816ins v27.d[0], v8.d[1] //GHASH final-5 block - mid38173818ldr q9, [x0], #16 //AES final-4 block - load plaintext3819pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high38203821eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid3822eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high38233824ins v27.d[1], v27.d[0] //GHASH final-5 block - mid3825pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low38263827eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low3828pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid38293830.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result3831movi v16.8b, #0 //suppress further partial tag feed in38323833eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid3834.L192_enc_blocks_more_than_4: //blocks left > 438353836st1 { v9.16b}, [x2], #16 //AES final-4 block - store result38373838rev64 v8.16b, v9.16b //GHASH final-4 block38393840eor v8.16b, v8.16b, v16.16b //feed in partial tag38413842ldr q9, [x0], #16 //AES final-3 block - load plaintext3843pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high3844ins v27.d[0], v8.d[1] //GHASH final-4 block - mid38453846pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low3847eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high38483849eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid38503851movi v16.8b, #0 //suppress further partial tag feed in3852eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low38533854pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid38553856eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid3857.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result3858.L192_enc_blocks_more_than_3: //blocks left > 338593860ldr q24, [x3, #96] //load h4k | h3k3861st1 { v9.16b}, [x2], #16 //AES final-3 block - store result38623863rev64 v8.16b, v9.16b //GHASH final-3 block38643865eor v8.16b, v8.16b, v16.16b //feed in partial tag3866movi v16.8b, #0 //suppress further partial tag feed in38673868ldr q9, [x0], #16 //AES final-2 block - load plaintext3869ldr q25, [x3, #112] //load h4l | h4h3870ext v25.16b, v25.16b, v25.16b, #838713872ins v27.d[0], v8.d[1] //GHASH final-3 block - mid38733874.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result3875eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid38763877ins v27.d[1], v27.d[0] //GHASH final-3 block - mid3878pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low38793880pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high3881pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid38823883eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low38843885eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid3886eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high3887.L192_enc_blocks_more_than_2: //blocks left > 238883889st1 { v9.16b}, [x2], #16 //AES final-2 block - store result38903891rev64 v8.16b, v9.16b //GHASH final-2 block3892ldr q23, [x3, #80] //load h3l | h3h3893ext v23.16b, v23.16b, v23.16b, #838943895eor v8.16b, v8.16b, v16.16b //feed in partial tag38963897ldr q9, [x0], #16 //AES final-1 block - load plaintext3898ins v27.d[0], v8.d[1] //GHASH final-2 block - mid38993900eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid39013902pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low3903pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high3904movi v16.8b, #0 //suppress further partial tag feed in39053906pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid39073908eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low3909eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high39103911eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid3912.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result3913.L192_enc_blocks_more_than_1: //blocks left > 139143915ldr q22, [x3, #64] //load h1l | h1h3916ext v22.16b, v22.16b, v22.16b, #83917st1 { v9.16b}, [x2], #16 //AES final-1 block - store result39183919rev64 v8.16b, v9.16b //GHASH final-1 block39203921eor v8.16b, v8.16b, v16.16b //feed in partial tag39223923ins v27.d[0], v8.d[1] //GHASH final-1 block - mid3924pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low39253926eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low3927pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high3928eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid39293930ldr q9, [x0], #16 //AES final block - load plaintext3931ldr q21, [x3, #48] //load h2k | h1k39323933ins v27.d[1], v27.d[0] //GHASH final-1 block - mid39343935.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result3936pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid39373938movi v16.8b, #0 //suppress further partial tag feed in39393940eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid3941eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high3942.L192_enc_blocks_less_than_1: //blocks left <= 139433944mvn x6, xzr //temp0_x = 0xffffffffffffffff3945and x1, x1, #127 //bit_length %= 12839463947sub x1, x1, #128 //bit_length -= 12839483949neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])39503951and x1, x1, #127 //bit_length %= 12839523953lsr x6, x6, x1 //temp0_x is mask for top 64b of last block3954cmp x1, #643955mvn x7, xzr //temp1_x = 0xffffffffffffffff39563957csel x13, x7, x6, lt3958csel x14, x6, xzr, lt39593960mov v0.d[1], x143961ldr q20, [x3, #32] //load h1l | h1h3962ext v20.16b, v20.16b, v20.16b, #839633964ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored3965mov v0.d[0], x13 //ctr0b is mask for last block39663967and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits39683969rev64 v8.16b, v9.16b //GHASH final block3970bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing39713972st1 { v9.16b}, [x2] //store all 16B39733974eor v8.16b, v8.16b, v16.16b //feed in partial tag39753976ins v16.d[0], v8.d[1] //GHASH final block - mid3977pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high39783979eor v17.16b, v17.16b, v28.16b //GHASH final block - high3980pmull v26.1q, v8.1d, v20.1d //GHASH final block - low39813982eor v16.8b, v16.8b, v8.8b //GHASH final block - mid39833984pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid39853986eor v18.16b, v18.16b, v16.16b //GHASH final block - mid3987ldr d16, [x10] //MODULO - load modulo constant39883989eor v19.16b, v19.16b, v26.16b //GHASH final block - low3990ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment39913992rev32 v30.16b, v30.16b39933994str q30, [x16] //store the updated counter3995.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up39963997pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid39983999.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid40004001pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low4002ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment40034004.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low4005ext v19.16b, v19.16b, v19.16b, #84006rev64 v19.16b, v19.16b4007st1 { v19.16b }, [x3]40084009mov x0, x9 //return sizes40104011ldp d10, d11, [sp, #16]4012ldp d12, d13, [sp, #32]4013ldp d14, d15, [sp, #48]4014ldp d8, d9, [sp], #804015ret40164017.L192_enc_ret:4018mov w0, #0x04019ret4020.size unroll8_eor3_aes_gcm_enc_192_kernel,.-unroll8_eor3_aes_gcm_enc_192_kernel4021.globl unroll8_eor3_aes_gcm_dec_192_kernel4022.type unroll8_eor3_aes_gcm_dec_192_kernel,%function4023.align 44024unroll8_eor3_aes_gcm_dec_192_kernel:4025AARCH64_VALID_CALL_TARGET4026cbz x1, .L192_dec_ret4027stp d8, d9, [sp, #-80]!4028lsr x9, x1, #34029mov x16, x44030mov x8, x54031stp d10, d11, [sp, #16]4032stp d12, d13, [sp, #32]4033stp d14, d15, [sp, #48]4034mov x5, #0xc2000000000000004035stp x5, xzr, [sp, #64]4036add x10, sp, #6440374038mov x5, x94039ld1 { v0.16b}, [x16] //CTR block 04040ld1 { v19.16b}, [x3]40414042mov x15, #0x100000000 //set up counter increment4043movi v31.16b, #0x04044mov v31.d[1], x1540454046rev32 v30.16b, v0.16b //set up reversed counter40474048add v30.4s, v30.4s, v31.4s //CTR block 040494050rev32 v1.16b, v30.16b //CTR block 14051add v30.4s, v30.4s, v31.4s //CTR block 140524053rev32 v2.16b, v30.16b //CTR block 24054add v30.4s, v30.4s, v31.4s //CTR block 240554056rev32 v3.16b, v30.16b //CTR block 34057add v30.4s, v30.4s, v31.4s //CTR block 340584059rev32 v4.16b, v30.16b //CTR block 44060add v30.4s, v30.4s, v31.4s //CTR block 440614062rev32 v5.16b, v30.16b //CTR block 54063add v30.4s, v30.4s, v31.4s //CTR block 54064ldp q26, q27, [x8, #0] //load rk0, rk140654066rev32 v6.16b, v30.16b //CTR block 64067add v30.4s, v30.4s, v31.4s //CTR block 640684069rev32 v7.16b, v30.16b //CTR block 740704071aese v3.16b, v26.16b4072aesmc v3.16b, v3.16b //AES block 3 - round 04073aese v6.16b, v26.16b4074aesmc v6.16b, v6.16b //AES block 6 - round 04075aese v5.16b, v26.16b4076aesmc v5.16b, v5.16b //AES block 5 - round 040774078aese v0.16b, v26.16b4079aesmc v0.16b, v0.16b //AES block 0 - round 04080aese v1.16b, v26.16b4081aesmc v1.16b, v1.16b //AES block 1 - round 04082aese v7.16b, v26.16b4083aesmc v7.16b, v7.16b //AES block 7 - round 040844085aese v2.16b, v26.16b4086aesmc v2.16b, v2.16b //AES block 2 - round 04087aese v4.16b, v26.16b4088aesmc v4.16b, v4.16b //AES block 4 - round 04089ldp q28, q26, [x8, #32] //load rk2, rk340904091aese v1.16b, v27.16b4092aesmc v1.16b, v1.16b //AES block 1 - round 140934094aese v2.16b, v27.16b4095aesmc v2.16b, v2.16b //AES block 2 - round 140964097aese v0.16b, v27.16b4098aesmc v0.16b, v0.16b //AES block 0 - round 14099aese v3.16b, v27.16b4100aesmc v3.16b, v3.16b //AES block 3 - round 14101aese v7.16b, v27.16b4102aesmc v7.16b, v7.16b //AES block 7 - round 141034104aese v5.16b, v27.16b4105aesmc v5.16b, v5.16b //AES block 5 - round 14106aese v6.16b, v27.16b4107aesmc v6.16b, v6.16b //AES block 6 - round 141084109aese v7.16b, v28.16b4110aesmc v7.16b, v7.16b //AES block 7 - round 24111aese v0.16b, v28.16b4112aesmc v0.16b, v0.16b //AES block 0 - round 24113aese v4.16b, v27.16b4114aesmc v4.16b, v4.16b //AES block 4 - round 141154116aese v5.16b, v28.16b4117aesmc v5.16b, v5.16b //AES block 5 - round 24118aese v1.16b, v28.16b4119aesmc v1.16b, v1.16b //AES block 1 - round 24120aese v2.16b, v28.16b4121aesmc v2.16b, v2.16b //AES block 2 - round 241224123aese v3.16b, v28.16b4124aesmc v3.16b, v3.16b //AES block 3 - round 24125aese v4.16b, v28.16b4126aesmc v4.16b, v4.16b //AES block 4 - round 24127aese v6.16b, v28.16b4128aesmc v6.16b, v6.16b //AES block 6 - round 241294130aese v7.16b, v26.16b4131aesmc v7.16b, v7.16b //AES block 7 - round 341324133ldp q27, q28, [x8, #64] //load rk4, rk54134aese v2.16b, v26.16b4135aesmc v2.16b, v2.16b //AES block 2 - round 34136aese v5.16b, v26.16b4137aesmc v5.16b, v5.16b //AES block 5 - round 341384139aese v0.16b, v26.16b4140aesmc v0.16b, v0.16b //AES block 0 - round 34141aese v3.16b, v26.16b4142aesmc v3.16b, v3.16b //AES block 3 - round 341434144aese v4.16b, v26.16b4145aesmc v4.16b, v4.16b //AES block 4 - round 34146aese v1.16b, v26.16b4147aesmc v1.16b, v1.16b //AES block 1 - round 34148aese v6.16b, v26.16b4149aesmc v6.16b, v6.16b //AES block 6 - round 341504151aese v3.16b, v27.16b4152aesmc v3.16b, v3.16b //AES block 3 - round 44153aese v2.16b, v27.16b4154aesmc v2.16b, v2.16b //AES block 2 - round 44155aese v5.16b, v27.16b4156aesmc v5.16b, v5.16b //AES block 5 - round 441574158aese v1.16b, v27.16b4159aesmc v1.16b, v1.16b //AES block 1 - round 44160aese v7.16b, v27.16b4161aesmc v7.16b, v7.16b //AES block 7 - round 44162aese v6.16b, v27.16b4163aesmc v6.16b, v6.16b //AES block 6 - round 441644165aese v0.16b, v27.16b4166aesmc v0.16b, v0.16b //AES block 0 - round 44167aese v5.16b, v28.16b4168aesmc v5.16b, v5.16b //AES block 5 - round 54169aese v4.16b, v27.16b4170aesmc v4.16b, v4.16b //AES block 4 - round 441714172aese v6.16b, v28.16b4173aesmc v6.16b, v6.16b //AES block 6 - round 54174ldp q26, q27, [x8, #96] //load rk6, rk741754176aese v0.16b, v28.16b4177aesmc v0.16b, v0.16b //AES block 0 - round 54178aese v4.16b, v28.16b4179aesmc v4.16b, v4.16b //AES block 4 - round 54180aese v1.16b, v28.16b4181aesmc v1.16b, v1.16b //AES block 1 - round 541824183aese v3.16b, v28.16b4184aesmc v3.16b, v3.16b //AES block 3 - round 54185aese v2.16b, v28.16b4186aesmc v2.16b, v2.16b //AES block 2 - round 54187aese v7.16b, v28.16b4188aesmc v7.16b, v7.16b //AES block 7 - round 541894190sub x5, x5, #1 //byte_len - 141914192aese v4.16b, v26.16b4193aesmc v4.16b, v4.16b //AES block 4 - round 64194aese v5.16b, v26.16b4195aesmc v5.16b, v5.16b //AES block 5 - round 64196aese v1.16b, v26.16b4197aesmc v1.16b, v1.16b //AES block 1 - round 641984199aese v0.16b, v26.16b4200aesmc v0.16b, v0.16b //AES block 0 - round 64201aese v3.16b, v26.16b4202aesmc v3.16b, v3.16b //AES block 3 - round 64203aese v6.16b, v26.16b4204aesmc v6.16b, v6.16b //AES block 6 - round 642054206aese v7.16b, v26.16b4207aesmc v7.16b, v7.16b //AES block 7 - round 64208aese v2.16b, v26.16b4209aesmc v2.16b, v2.16b //AES block 2 - round 64210ldp q28, q26, [x8, #128] //load rk8, rk942114212add v30.4s, v30.4s, v31.4s //CTR block 742134214aese v3.16b, v27.16b4215aesmc v3.16b, v3.16b //AES block 3 - round 74216aese v7.16b, v27.16b4217aesmc v7.16b, v7.16b //AES block 7 - round 742184219aese v2.16b, v27.16b4220aesmc v2.16b, v2.16b //AES block 2 - round 74221aese v1.16b, v27.16b4222aesmc v1.16b, v1.16b //AES block 1 - round 74223aese v4.16b, v27.16b4224aesmc v4.16b, v4.16b //AES block 4 - round 742254226aese v6.16b, v27.16b4227aesmc v6.16b, v6.16b //AES block 6 - round 74228aese v0.16b, v27.16b4229aesmc v0.16b, v0.16b //AES block 0 - round 74230aese v5.16b, v27.16b4231aesmc v5.16b, v5.16b //AES block 5 - round 742324233aese v1.16b, v28.16b4234aesmc v1.16b, v1.16b //AES block 1 - round 84235aese v2.16b, v28.16b4236aesmc v2.16b, v2.16b //AES block 2 - round 84237and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)42384239aese v7.16b, v28.16b4240aesmc v7.16b, v7.16b //AES block 7 - round 84241aese v6.16b, v28.16b4242aesmc v6.16b, v6.16b //AES block 6 - round 84243aese v5.16b, v28.16b4244aesmc v5.16b, v5.16b //AES block 5 - round 842454246aese v4.16b, v28.16b4247aesmc v4.16b, v4.16b //AES block 4 - round 84248aese v3.16b, v28.16b4249aesmc v3.16b, v3.16b //AES block 3 - round 84250aese v0.16b, v28.16b4251aesmc v0.16b, v0.16b //AES block 0 - round 842524253add x4, x0, x1, lsr #3 //end_input_ptr4254aese v6.16b, v26.16b4255aesmc v6.16b, v6.16b //AES block 6 - round 942564257ld1 { v19.16b}, [x3]4258ext v19.16b, v19.16b, v19.16b, #84259rev64 v19.16b, v19.16b42604261ldp q27, q28, [x8, #160] //load rk10, rk1142624263aese v0.16b, v26.16b4264aesmc v0.16b, v0.16b //AES block 0 - round 94265add x5, x5, x042664267aese v1.16b, v26.16b4268aesmc v1.16b, v1.16b //AES block 1 - round 94269aese v7.16b, v26.16b4270aesmc v7.16b, v7.16b //AES block 7 - round 94271aese v4.16b, v26.16b4272aesmc v4.16b, v4.16b //AES block 4 - round 942734274cmp x0, x5 //check if we have <= 8 blocks4275aese v3.16b, v26.16b4276aesmc v3.16b, v3.16b //AES block 3 - round 942774278aese v5.16b, v26.16b4279aesmc v5.16b, v5.16b //AES block 5 - round 94280aese v2.16b, v26.16b4281aesmc v2.16b, v2.16b //AES block 2 - round 942824283aese v3.16b, v27.16b4284aesmc v3.16b, v3.16b //AES block 3 - round 104285aese v1.16b, v27.16b4286aesmc v1.16b, v1.16b //AES block 1 - round 104287aese v7.16b, v27.16b4288aesmc v7.16b, v7.16b //AES block 7 - round 1042894290aese v4.16b, v27.16b4291aesmc v4.16b, v4.16b //AES block 4 - round 104292aese v0.16b, v27.16b4293aesmc v0.16b, v0.16b //AES block 0 - round 104294aese v2.16b, v27.16b4295aesmc v2.16b, v2.16b //AES block 2 - round 1042964297aese v6.16b, v27.16b4298aesmc v6.16b, v6.16b //AES block 6 - round 104299aese v5.16b, v27.16b4300aesmc v5.16b, v5.16b //AES block 5 - round 104301ldr q26, [x8, #192] //load rk1243024303aese v0.16b, v28.16b //AES block 0 - round 114304aese v1.16b, v28.16b //AES block 1 - round 114305aese v4.16b, v28.16b //AES block 4 - round 1143064307aese v6.16b, v28.16b //AES block 6 - round 114308aese v5.16b, v28.16b //AES block 5 - round 114309aese v7.16b, v28.16b //AES block 7 - round 1143104311aese v2.16b, v28.16b //AES block 2 - round 114312aese v3.16b, v28.16b //AES block 3 - round 114313b.ge .L192_dec_tail //handle tail43144315ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext43164317ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext43184319ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext43204321.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 1 - result4322.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 0 - result4323stp q0, q1, [x2], #32 //AES block 0, 1 - store result43244325rev32 v0.16b, v30.16b //CTR block 84326add v30.4s, v30.4s, v31.4s //CTR block 843274328rev32 v1.16b, v30.16b //CTR block 94329add v30.4s, v30.4s, v31.4s //CTR block 94330.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 3 - result43314332.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 2 - result4333stp q2, q3, [x2], #32 //AES block 2, 3 - store result4334ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext43354336rev32 v2.16b, v30.16b //CTR block 104337add v30.4s, v30.4s, v31.4s //CTR block 1043384339.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 4 - result43404341rev32 v3.16b, v30.16b //CTR block 114342add v30.4s, v30.4s, v31.4s //CTR block 1143434344.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 5 - result4345stp q4, q5, [x2], #32 //AES block 4, 5 - store result4346cmp x0, x5 //check if we have <= 8 blocks43474348.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 6 - result4349.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 7 - result4350rev32 v4.16b, v30.16b //CTR block 1243514352add v30.4s, v30.4s, v31.4s //CTR block 124353stp q6, q7, [x2], #32 //AES block 6, 7 - store result4354b.ge .L192_dec_prepretail //do prepretail43554356.L192_dec_main_loop: //main loop start4357rev64 v9.16b, v9.16b //GHASH block 8k+14358ldp q26, q27, [x8, #0] //load rk0, rk14359ext v19.16b, v19.16b, v19.16b, #8 //PRE 043604361rev64 v8.16b, v8.16b //GHASH block 8k4362rev32 v5.16b, v30.16b //CTR block 8k+134363add v30.4s, v30.4s, v31.4s //CTR block 8k+1343644365ldr q23, [x3, #176] //load h7l | h7h4366ext v23.16b, v23.16b, v23.16b, #84367ldr q25, [x3, #208] //load h8l | h8h4368ext v25.16b, v25.16b, v25.16b, #84369rev64 v12.16b, v12.16b //GHASH block 8k+44370rev64 v11.16b, v11.16b //GHASH block 8k+343714372eor v8.16b, v8.16b, v19.16b //PRE 14373rev32 v6.16b, v30.16b //CTR block 8k+144374add v30.4s, v30.4s, v31.4s //CTR block 8k+1443754376rev64 v13.16b, v13.16b //GHASH block 8k+543774378rev32 v7.16b, v30.16b //CTR block 8k+154379aese v1.16b, v26.16b4380aesmc v1.16b, v1.16b //AES block 8k+9 - round 04381aese v6.16b, v26.16b4382aesmc v6.16b, v6.16b //AES block 8k+14 - round 043834384aese v5.16b, v26.16b4385aesmc v5.16b, v5.16b //AES block 8k+13 - round 04386aese v4.16b, v26.16b4387aesmc v4.16b, v4.16b //AES block 8k+12 - round 04388aese v0.16b, v26.16b4389aesmc v0.16b, v0.16b //AES block 8k+8 - round 043904391aese v7.16b, v26.16b4392aesmc v7.16b, v7.16b //AES block 8k+15 - round 04393aese v2.16b, v26.16b4394aesmc v2.16b, v2.16b //AES block 8k+10 - round 04395aese v3.16b, v26.16b4396aesmc v3.16b, v3.16b //AES block 8k+11 - round 043974398pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low4399pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high4400ldp q28, q26, [x8, #32] //load rk2, rk344014402aese v6.16b, v27.16b4403aesmc v6.16b, v6.16b //AES block 8k+14 - round 14404pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low4405ldr q20, [x3, #128] //load h5l | h5h4406ext v20.16b, v20.16b, v20.16b, #84407ldr q22, [x3, #160] //load h6l | h6h4408ext v22.16b, v22.16b, v22.16b, #844094410aese v0.16b, v27.16b4411aesmc v0.16b, v0.16b //AES block 8k+8 - round 14412aese v3.16b, v27.16b4413aesmc v3.16b, v3.16b //AES block 8k+11 - round 14414aese v7.16b, v27.16b4415aesmc v7.16b, v7.16b //AES block 8k+15 - round 144164417pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high4418aese v2.16b, v27.16b4419aesmc v2.16b, v2.16b //AES block 8k+10 - round 14420aese v4.16b, v27.16b4421aesmc v4.16b, v4.16b //AES block 8k+12 - round 144224423trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid4424rev64 v10.16b, v10.16b //GHASH block 8k+24425aese v1.16b, v27.16b4426aesmc v1.16b, v1.16b //AES block 8k+9 - round 144274428aese v5.16b, v27.16b4429aesmc v5.16b, v5.16b //AES block 8k+13 - round 14430ldr q21, [x3, #144] //load h6k | h5k4431ldr q24, [x3, #192] //load h8k | h7k4432trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid44334434eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high4435pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high4436pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high44374438eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid4439eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low4440aese v6.16b, v28.16b4441aesmc v6.16b, v6.16b //AES block 8k+14 - round 244424443aese v2.16b, v28.16b4444aesmc v2.16b, v2.16b //AES block 8k+10 - round 24445pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low4446.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high44474448aese v1.16b, v28.16b4449aesmc v1.16b, v1.16b //AES block 8k+9 - round 24450aese v6.16b, v26.16b4451aesmc v6.16b, v6.16b //AES block 8k+14 - round 34452aese v4.16b, v28.16b4453aesmc v4.16b, v4.16b //AES block 8k+12 - round 244544455aese v0.16b, v28.16b4456aesmc v0.16b, v0.16b //AES block 8k+8 - round 24457aese v7.16b, v28.16b4458aesmc v7.16b, v7.16b //AES block 8k+15 - round 24459aese v3.16b, v28.16b4460aesmc v3.16b, v3.16b //AES block 8k+11 - round 244614462ldr q23, [x3, #80] //load h3l | h3h4463ext v23.16b, v23.16b, v23.16b, #84464ldr q25, [x3, #112] //load h4l | h4h4465ext v25.16b, v25.16b, v25.16b, #84466aese v5.16b, v28.16b4467aesmc v5.16b, v5.16b //AES block 8k+13 - round 24468aese v2.16b, v26.16b4469aesmc v2.16b, v2.16b //AES block 8k+10 - round 344704471pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low4472trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid4473trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid44744475aese v3.16b, v26.16b4476aesmc v3.16b, v3.16b //AES block 8k+11 - round 34477aese v4.16b, v26.16b4478aesmc v4.16b, v4.16b //AES block 8k+12 - round 344794480aese v0.16b, v26.16b4481aesmc v0.16b, v0.16b //AES block 8k+8 - round 34482aese v7.16b, v26.16b4483aesmc v7.16b, v7.16b //AES block 8k+15 - round 34484ldp q27, q28, [x8, #64] //load rk4, rk544854486eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid4487.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low4488aese v1.16b, v26.16b4489aesmc v1.16b, v1.16b //AES block 8k+9 - round 344904491trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid4492add v30.4s, v30.4s, v31.4s //CTR block 8k+1544934494pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid4495pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid4496pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid44974498aese v5.16b, v26.16b4499aesmc v5.16b, v5.16b //AES block 8k+13 - round 34500pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid4501pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high45024503aese v4.16b, v27.16b4504aesmc v4.16b, v4.16b //AES block 8k+12 - round 44505aese v6.16b, v27.16b4506aesmc v6.16b, v6.16b //AES block 8k+14 - round 44507eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid45084509aese v5.16b, v27.16b4510aesmc v5.16b, v5.16b //AES block 8k+13 - round 44511aese v1.16b, v27.16b4512aesmc v1.16b, v1.16b //AES block 8k+9 - round 44513aese v3.16b, v27.16b4514aesmc v3.16b, v3.16b //AES block 8k+11 - round 445154516aese v2.16b, v27.16b4517aesmc v2.16b, v2.16b //AES block 8k+10 - round 44518aese v0.16b, v27.16b4519aesmc v0.16b, v0.16b //AES block 8k+8 - round 44520aese v7.16b, v27.16b4521aesmc v7.16b, v7.16b //AES block 8k+15 - round 445224523ldr q20, [x3, #32] //load h1l | h1h4524ext v20.16b, v20.16b, v20.16b, #84525ldr q22, [x3, #64] //load h2l | h2h4526ext v22.16b, v22.16b, v22.16b, #84527aese v3.16b, v28.16b4528aesmc v3.16b, v3.16b //AES block 8k+11 - round 54529aese v5.16b, v28.16b4530aesmc v5.16b, v5.16b //AES block 8k+13 - round 545314532ldp q26, q27, [x8, #96] //load rk6, rk74533aese v7.16b, v28.16b4534aesmc v7.16b, v7.16b //AES block 8k+15 - round 54535rev64 v15.16b, v15.16b //GHASH block 8k+745364537aese v4.16b, v28.16b4538aesmc v4.16b, v4.16b //AES block 8k+12 - round 54539.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid4540aese v1.16b, v28.16b4541aesmc v1.16b, v1.16b //AES block 8k+9 - round 545424543pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low4544trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid4545aese v2.16b, v28.16b4546aesmc v2.16b, v2.16b //AES block 8k+10 - round 545474548aese v6.16b, v28.16b4549aesmc v6.16b, v6.16b //AES block 8k+14 - round 54550aese v0.16b, v28.16b4551aesmc v0.16b, v0.16b //AES block 8k+8 - round 54552rev64 v14.16b, v14.16b //GHASH block 8k+645534554ldr q21, [x3, #48] //load h2k | h1k4555ldr q24, [x3, #96] //load h4k | h3k4556pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high4557pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low45584559aese v0.16b, v26.16b4560aesmc v0.16b, v0.16b //AES block 8k+8 - round 64561eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid4562trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid45634564aese v7.16b, v26.16b4565aesmc v7.16b, v7.16b //AES block 8k+15 - round 64566aese v2.16b, v26.16b4567aesmc v2.16b, v2.16b //AES block 8k+10 - round 64568aese v6.16b, v26.16b4569aesmc v6.16b, v6.16b //AES block 8k+14 - round 645704571pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high4572aese v3.16b, v26.16b4573aesmc v3.16b, v3.16b //AES block 8k+11 - round 64574aese v1.16b, v26.16b4575aesmc v1.16b, v1.16b //AES block 8k+9 - round 645764577aese v2.16b, v27.16b4578aesmc v2.16b, v2.16b //AES block 8k+10 - round 74579aese v6.16b, v27.16b4580aesmc v6.16b, v6.16b //AES block 8k+14 - round 74581aese v5.16b, v26.16b4582aesmc v5.16b, v5.16b //AES block 8k+13 - round 645834584pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid4585.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high4586.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low45874588pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low4589trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid4590aese v4.16b, v26.16b4591aesmc v4.16b, v4.16b //AES block 8k+12 - round 645924593aese v5.16b, v27.16b4594aesmc v5.16b, v5.16b //AES block 8k+13 - round 74595ldp q28, q26, [x8, #128] //load rk8, rk94596aese v3.16b, v27.16b4597aesmc v3.16b, v3.16b //AES block 8k+11 - round 745984599eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid4600pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid4601aese v1.16b, v27.16b4602aesmc v1.16b, v1.16b //AES block 8k+9 - round 746034604aese v4.16b, v27.16b4605aesmc v4.16b, v4.16b //AES block 8k+12 - round 74606aese v0.16b, v27.16b4607aesmc v0.16b, v0.16b //AES block 8k+8 - round 74608aese v7.16b, v27.16b4609aesmc v7.16b, v7.16b //AES block 8k+15 - round 746104611.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid4612pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid4613pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high46144615pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid4616ldr d16, [x10] //MODULO - load modulo constant4617pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low46184619aese v2.16b, v28.16b4620aesmc v2.16b, v2.16b //AES block 8k+10 - round 84621aese v5.16b, v28.16b4622aesmc v5.16b, v5.16b //AES block 8k+13 - round 84623aese v7.16b, v28.16b4624aesmc v7.16b, v7.16b //AES block 8k+15 - round 846254626aese v0.16b, v28.16b4627aesmc v0.16b, v0.16b //AES block 8k+8 - round 84628aese v3.16b, v28.16b4629aesmc v3.16b, v3.16b //AES block 8k+11 - round 84630.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low46314632aese v4.16b, v28.16b4633aesmc v4.16b, v4.16b //AES block 8k+12 - round 84634aese v1.16b, v28.16b4635aesmc v1.16b, v1.16b //AES block 8k+9 - round 84636aese v6.16b, v28.16b4637aesmc v6.16b, v6.16b //AES block 8k+14 - round 846384639.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high4640rev32 v20.16b, v30.16b //CTR block 8k+164641add v30.4s, v30.4s, v31.4s //CTR block 8k+1646424643aese v5.16b, v26.16b4644aesmc v5.16b, v5.16b //AES block 8k+13 - round 94645.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid4646aese v1.16b, v26.16b4647aesmc v1.16b, v1.16b //AES block 8k+9 - round 946484649aese v3.16b, v26.16b4650aesmc v3.16b, v3.16b //AES block 8k+11 - round 94651aese v7.16b, v26.16b4652aesmc v7.16b, v7.16b //AES block 8k+15 - round 94653ldp q27, q28, [x8, #160] //load rk10, rk1146544655.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up4656ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext46574658aese v2.16b, v26.16b4659aesmc v2.16b, v2.16b //AES block 8k+10 - round 94660aese v0.16b, v26.16b4661aesmc v0.16b, v0.16b //AES block 8k+8 - round 94662ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext46634664rev32 v22.16b, v30.16b //CTR block 8k+174665pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid4666add v30.4s, v30.4s, v31.4s //CTR block 8k+1746674668aese v6.16b, v26.16b4669aesmc v6.16b, v6.16b //AES block 8k+14 - round 94670aese v4.16b, v26.16b4671aesmc v4.16b, v4.16b //AES block 8k+12 - round 94672ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment46734674aese v3.16b, v27.16b4675aesmc v3.16b, v3.16b //AES block 8k+11 - round 104676aese v7.16b, v27.16b4677aesmc v7.16b, v7.16b //AES block 8k+15 - round 104678ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext46794680rev32 v23.16b, v30.16b //CTR block 8k+184681add v30.4s, v30.4s, v31.4s //CTR block 8k+184682.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid46834684aese v0.16b, v27.16b4685aesmc v0.16b, v0.16b //AES block 8k+8 - round 104686aese v1.16b, v27.16b4687aesmc v1.16b, v1.16b //AES block 8k+9 - round 104688ldr q26, [x8, #192] //load rk1246894690ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext4691aese v4.16b, v27.16b4692aesmc v4.16b, v4.16b //AES block 8k+12 - round 104693aese v6.16b, v27.16b4694aesmc v6.16b, v6.16b //AES block 8k+14 - round 1046954696aese v0.16b, v28.16b //AES block 8k+8 - round 114697ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment4698aese v1.16b, v28.16b //AES block 8k+9 - round 1146994700aese v2.16b, v27.16b4701aesmc v2.16b, v2.16b //AES block 8k+10 - round 104702aese v6.16b, v28.16b //AES block 8k+14 - round 114703aese v3.16b, v28.16b //AES block 8k+11 - round 1147044705.inst 0xce006900 //eor3 v0.16b, v8.16b, v0.16b, v26.16b //AES block 8k+8 - result4706rev32 v25.16b, v30.16b //CTR block 8k+194707aese v5.16b, v27.16b4708aesmc v5.16b, v5.16b //AES block 8k+13 - round 1047094710aese v4.16b, v28.16b //AES block 8k+12 - round 114711aese v2.16b, v28.16b //AES block 8k+10 - round 114712add v30.4s, v30.4s, v31.4s //CTR block 8k+1947134714aese v7.16b, v28.16b //AES block 8k+15 - round 114715aese v5.16b, v28.16b //AES block 8k+13 - round 114716pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low47174718.inst 0xce016921 //eor3 v1.16b, v9.16b, v1.16b, v26.16b //AES block 8k+9 - result4719stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result4720.inst 0xce036963 //eor3 v3.16b, v11.16b, v3.16b, v26.16b //AES block 8k+11 - result47214722.inst 0xce026942 //eor3 v2.16b, v10.16b, v2.16b, v26.16b //AES block 8k+10 - result4723.inst 0xce0769e7 //eor3 v7.16b, v15.16b, v7.16b, v26.16b //AES block 8k+15 - result4724stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result47254726.inst 0xce0569a5 //eor3 v5.16b, v13.16b, v5.16b, v26.16b //AES block 8k+13 - result4727.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low4728mov v3.16b, v25.16b //CTR block 8k+1947294730.inst 0xce046984 //eor3 v4.16b, v12.16b, v4.16b, v26.16b //AES block 8k+12 - result4731stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result4732cmp x0, x5 //.LOOP CONTROL47334734.inst 0xce0669c6 //eor3 v6.16b, v14.16b, v6.16b, v26.16b //AES block 8k+14 - result4735stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result4736mov v0.16b, v20.16b //CTR block 8k+1647374738mov v1.16b, v22.16b //CTR block 8k+174739mov v2.16b, v23.16b //CTR block 8k+1847404741rev32 v4.16b, v30.16b //CTR block 8k+204742add v30.4s, v30.4s, v31.4s //CTR block 8k+204743b.lt .L192_dec_main_loop47444745.L192_dec_prepretail: //PREPRETAIL4746ldp q26, q27, [x8, #0] //load rk0, rk14747rev32 v5.16b, v30.16b //CTR block 8k+134748add v30.4s, v30.4s, v31.4s //CTR block 8k+1347494750ldr q23, [x3, #176] //load h7l | h7h4751ext v23.16b, v23.16b, v23.16b, #84752ldr q25, [x3, #208] //load h8l | h8h4753ext v25.16b, v25.16b, v25.16b, #84754rev64 v8.16b, v8.16b //GHASH block 8k4755ext v19.16b, v19.16b, v19.16b, #8 //PRE 047564757rev64 v11.16b, v11.16b //GHASH block 8k+34758rev32 v6.16b, v30.16b //CTR block 8k+144759add v30.4s, v30.4s, v31.4s //CTR block 8k+1447604761eor v8.16b, v8.16b, v19.16b //PRE 14762rev64 v10.16b, v10.16b //GHASH block 8k+24763rev64 v9.16b, v9.16b //GHASH block 8k+147644765ldr q20, [x3, #128] //load h5l | h5h4766ext v20.16b, v20.16b, v20.16b, #84767ldr q22, [x3, #160] //load h6l | h6h4768ext v22.16b, v22.16b, v22.16b, #84769rev32 v7.16b, v30.16b //CTR block 8k+1547704771aese v0.16b, v26.16b4772aesmc v0.16b, v0.16b //AES block 8k+8 - round 04773aese v6.16b, v26.16b4774aesmc v6.16b, v6.16b //AES block 8k+14 - round 04775aese v5.16b, v26.16b4776aesmc v5.16b, v5.16b //AES block 8k+13 - round 047774778aese v3.16b, v26.16b4779aesmc v3.16b, v3.16b //AES block 8k+11 - round 04780aese v2.16b, v26.16b4781aesmc v2.16b, v2.16b //AES block 8k+10 - round 04782pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high47834784aese v4.16b, v26.16b4785aesmc v4.16b, v4.16b //AES block 8k+12 - round 04786pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high4787aese v1.16b, v26.16b4788aesmc v1.16b, v1.16b //AES block 8k+9 - round 047894790aese v6.16b, v27.16b4791aesmc v6.16b, v6.16b //AES block 8k+14 - round 14792aese v7.16b, v26.16b4793aesmc v7.16b, v7.16b //AES block 8k+15 - round 04794ldp q28, q26, [x8, #32] //load rk2, rk347954796aese v4.16b, v27.16b4797aesmc v4.16b, v4.16b //AES block 8k+12 - round 14798pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high4799pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low48004801pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low4802eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high4803aese v3.16b, v27.16b4804aesmc v3.16b, v3.16b //AES block 8k+11 - round 148054806pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low4807aese v7.16b, v27.16b4808aesmc v7.16b, v7.16b //AES block 8k+15 - round 14809aese v0.16b, v27.16b4810aesmc v0.16b, v0.16b //AES block 8k+8 - round 148114812trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid4813trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid4814pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high48154816aese v2.16b, v27.16b4817aesmc v2.16b, v2.16b //AES block 8k+10 - round 14818aese v1.16b, v27.16b4819aesmc v1.16b, v1.16b //AES block 8k+9 - round 14820aese v5.16b, v27.16b4821aesmc v5.16b, v5.16b //AES block 8k+13 - round 148224823ldr q21, [x3, #144] //load h6k | h5k4824ldr q24, [x3, #192] //load h8k | h7k4825aese v3.16b, v28.16b4826aesmc v3.16b, v3.16b //AES block 8k+11 - round 24827eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid48284829aese v6.16b, v28.16b4830aesmc v6.16b, v6.16b //AES block 8k+14 - round 24831rev64 v13.16b, v13.16b //GHASH block 8k+54832pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low48334834.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high4835aese v4.16b, v28.16b4836aesmc v4.16b, v4.16b //AES block 8k+12 - round 24837aese v5.16b, v28.16b4838aesmc v5.16b, v5.16b //AES block 8k+13 - round 248394840trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid4841aese v3.16b, v26.16b4842aesmc v3.16b, v3.16b //AES block 8k+11 - round 34843aese v7.16b, v28.16b4844aesmc v7.16b, v7.16b //AES block 8k+15 - round 248454846aese v0.16b, v28.16b4847aesmc v0.16b, v0.16b //AES block 8k+8 - round 24848aese v2.16b, v28.16b4849aesmc v2.16b, v2.16b //AES block 8k+10 - round 24850trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid48514852pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid4853aese v1.16b, v28.16b4854aesmc v1.16b, v1.16b //AES block 8k+9 - round 24855pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid48564857aese v5.16b, v26.16b4858aesmc v5.16b, v5.16b //AES block 8k+13 - round 34859eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid4860eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low48614862aese v7.16b, v26.16b4863aesmc v7.16b, v7.16b //AES block 8k+15 - round 34864aese v6.16b, v26.16b4865aesmc v6.16b, v6.16b //AES block 8k+14 - round 34866aese v4.16b, v26.16b4867aesmc v4.16b, v4.16b //AES block 8k+12 - round 348684869.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low4870ldp q27, q28, [x8, #64] //load rk4, rk54871aese v0.16b, v26.16b4872aesmc v0.16b, v0.16b //AES block 8k+8 - round 348734874ldr q23, [x3, #80] //load h3l | h3h4875ext v23.16b, v23.16b, v23.16b, #84876ldr q25, [x3, #112] //load h4l | h4h4877ext v25.16b, v25.16b, v25.16b, #84878pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid4879pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid48804881ldr q20, [x3, #32] //load h1l | h1h4882ext v20.16b, v20.16b, v20.16b, #84883ldr q22, [x3, #64] //load h2l | h2h4884ext v22.16b, v22.16b, v22.16b, #84885eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid4886aese v2.16b, v26.16b4887aesmc v2.16b, v2.16b //AES block 8k+10 - round 348884889rev64 v15.16b, v15.16b //GHASH block 8k+748904891.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid4892rev64 v12.16b, v12.16b //GHASH block 8k+448934894aese v5.16b, v27.16b4895aesmc v5.16b, v5.16b //AES block 8k+13 - round 44896aese v4.16b, v27.16b4897aesmc v4.16b, v4.16b //AES block 8k+12 - round 44898aese v1.16b, v26.16b4899aesmc v1.16b, v1.16b //AES block 8k+9 - round 349004901aese v2.16b, v27.16b4902aesmc v2.16b, v2.16b //AES block 8k+10 - round 44903aese v0.16b, v27.16b4904aesmc v0.16b, v0.16b //AES block 8k+8 - round 44905aese v3.16b, v27.16b4906aesmc v3.16b, v3.16b //AES block 8k+11 - round 449074908aese v1.16b, v27.16b4909aesmc v1.16b, v1.16b //AES block 8k+9 - round 44910aese v6.16b, v27.16b4911aesmc v6.16b, v6.16b //AES block 8k+14 - round 44912aese v7.16b, v27.16b4913aesmc v7.16b, v7.16b //AES block 8k+15 - round 449144915rev64 v14.16b, v14.16b //GHASH block 8k+64916ldr q21, [x3, #48] //load h2k | h1k4917ldr q24, [x3, #96] //load h4k | h3k4918trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid49194920aese v7.16b, v28.16b4921aesmc v7.16b, v7.16b //AES block 8k+15 - round 54922aese v1.16b, v28.16b4923aesmc v1.16b, v1.16b //AES block 8k+9 - round 54924aese v2.16b, v28.16b4925aesmc v2.16b, v2.16b //AES block 8k+10 - round 549264927ldp q26, q27, [x8, #96] //load rk6, rk74928aese v6.16b, v28.16b4929aesmc v6.16b, v6.16b //AES block 8k+14 - round 54930aese v5.16b, v28.16b4931aesmc v5.16b, v5.16b //AES block 8k+13 - round 549324933pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high4934pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high4935pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low49364937aese v4.16b, v28.16b4938aesmc v4.16b, v4.16b //AES block 8k+12 - round 549394940pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low4941trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid4942pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high49434944pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low4945trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid4946aese v0.16b, v28.16b4947aesmc v0.16b, v0.16b //AES block 8k+8 - round 549484949trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid4950aese v3.16b, v28.16b4951aesmc v3.16b, v3.16b //AES block 8k+11 - round 54952eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid49534954aese v4.16b, v26.16b4955aesmc v4.16b, v4.16b //AES block 8k+12 - round 64956aese v2.16b, v26.16b4957aesmc v2.16b, v2.16b //AES block 8k+10 - round 649584959eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid4960aese v1.16b, v26.16b4961aesmc v1.16b, v1.16b //AES block 8k+9 - round 64962aese v7.16b, v26.16b4963aesmc v7.16b, v7.16b //AES block 8k+15 - round 649644965pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid4966pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid4967aese v0.16b, v26.16b4968aesmc v0.16b, v0.16b //AES block 8k+8 - round 649694970pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid4971aese v5.16b, v26.16b4972aesmc v5.16b, v5.16b //AES block 8k+13 - round 64973pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high49744975.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid4976aese v4.16b, v27.16b4977aesmc v4.16b, v4.16b //AES block 8k+12 - round 74978.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low49794980aese v3.16b, v26.16b4981aesmc v3.16b, v3.16b //AES block 8k+11 - round 64982aese v6.16b, v26.16b4983aesmc v6.16b, v6.16b //AES block 8k+14 - round 64984aese v5.16b, v27.16b4985aesmc v5.16b, v5.16b //AES block 8k+13 - round 749864987ldp q28, q26, [x8, #128] //load rk8, rk94988pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid4989aese v2.16b, v27.16b4990aesmc v2.16b, v2.16b //AES block 8k+10 - round 749914992ldr d16, [x10] //MODULO - load modulo constant4993.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high4994pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low49954996aese v1.16b, v27.16b4997aesmc v1.16b, v1.16b //AES block 8k+9 - round 74998aese v7.16b, v27.16b4999aesmc v7.16b, v7.16b //AES block 8k+15 - round 75000aese v6.16b, v27.16b5001aesmc v6.16b, v6.16b //AES block 8k+14 - round 750025003.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high5004.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low5005.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid50065007aese v0.16b, v27.16b5008aesmc v0.16b, v0.16b //AES block 8k+8 - round 75009aese v3.16b, v27.16b5010aesmc v3.16b, v3.16b //AES block 8k+11 - round 750115012.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up5013ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment5014aese v2.16b, v28.16b5015aesmc v2.16b, v2.16b //AES block 8k+10 - round 850165017aese v6.16b, v28.16b5018aesmc v6.16b, v6.16b //AES block 8k+14 - round 85019aese v7.16b, v28.16b5020aesmc v7.16b, v7.16b //AES block 8k+15 - round 85021aese v1.16b, v28.16b5022aesmc v1.16b, v1.16b //AES block 8k+9 - round 850235024aese v3.16b, v28.16b5025aesmc v3.16b, v3.16b //AES block 8k+11 - round 85026pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid5027aese v0.16b, v28.16b5028aesmc v0.16b, v0.16b //AES block 8k+8 - round 850295030aese v5.16b, v28.16b5031aesmc v5.16b, v5.16b //AES block 8k+13 - round 85032aese v4.16b, v28.16b5033aesmc v4.16b, v4.16b //AES block 8k+12 - round 85034ldp q27, q28, [x8, #160] //load rk10, rk1150355036.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid5037aese v7.16b, v26.16b5038aesmc v7.16b, v7.16b //AES block 8k+15 - round 95039aese v6.16b, v26.16b5040aesmc v6.16b, v6.16b //AES block 8k+14 - round 950415042aese v5.16b, v26.16b5043aesmc v5.16b, v5.16b //AES block 8k+13 - round 95044aese v2.16b, v26.16b5045aesmc v2.16b, v2.16b //AES block 8k+10 - round 95046aese v3.16b, v26.16b5047aesmc v3.16b, v3.16b //AES block 8k+11 - round 950485049aese v0.16b, v26.16b5050aesmc v0.16b, v0.16b //AES block 8k+8 - round 95051aese v1.16b, v26.16b5052aesmc v1.16b, v1.16b //AES block 8k+9 - round 95053aese v4.16b, v26.16b5054aesmc v4.16b, v4.16b //AES block 8k+12 - round 950555056pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low5057ldr q26, [x8, #192] //load rk125058ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment50595060aese v2.16b, v27.16b5061aesmc v2.16b, v2.16b //AES block 8k+10 - round 105062aese v5.16b, v27.16b5063aesmc v5.16b, v5.16b //AES block 8k+13 - round 105064aese v0.16b, v27.16b5065aesmc v0.16b, v0.16b //AES block 8k+8 - round 1050665067aese v4.16b, v27.16b5068aesmc v4.16b, v4.16b //AES block 8k+12 - round 105069aese v6.16b, v27.16b5070aesmc v6.16b, v6.16b //AES block 8k+14 - round 105071aese v7.16b, v27.16b5072aesmc v7.16b, v7.16b //AES block 8k+15 - round 1050735074aese v0.16b, v28.16b //AES block 8k+8 - round 115075.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low5076aese v5.16b, v28.16b //AES block 8k+13 - round 1150775078aese v2.16b, v28.16b //AES block 8k+10 - round 115079aese v3.16b, v27.16b5080aesmc v3.16b, v3.16b //AES block 8k+11 - round 105081aese v1.16b, v27.16b5082aesmc v1.16b, v1.16b //AES block 8k+9 - round 1050835084aese v6.16b, v28.16b //AES block 8k+14 - round 115085aese v4.16b, v28.16b //AES block 8k+12 - round 115086add v30.4s, v30.4s, v31.4s //CTR block 8k+1550875088aese v3.16b, v28.16b //AES block 8k+11 - round 115089aese v1.16b, v28.16b //AES block 8k+9 - round 115090aese v7.16b, v28.16b //AES block 8k+15 - round 1150915092.L192_dec_tail: //TAIL50935094sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process50955096ldp q20, q21, [x3, #128] //load h5l | h5h5097ext v20.16b, v20.16b, v20.16b, #85098ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext50995100ldp q24, q25, [x3, #192] //load h8k | h7k5101ext v25.16b, v25.16b, v25.16b, #851025103mov v29.16b, v26.16b51045105ldp q22, q23, [x3, #160] //load h6l | h6h5106ext v22.16b, v22.16b, v22.16b, #85107ext v23.16b, v23.16b, v23.16b, #85108ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag51095110.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result5111cmp x5, #1125112b.gt .L192_dec_blocks_more_than_751135114mov v7.16b, v6.16b5115movi v17.8b, #05116sub v30.4s, v30.4s, v31.4s51175118mov v6.16b, v5.16b5119mov v5.16b, v4.16b5120mov v4.16b, v3.16b51215122cmp x5, #965123movi v19.8b, #05124mov v3.16b, v2.16b51255126mov v2.16b, v1.16b5127movi v18.8b, #05128b.gt .L192_dec_blocks_more_than_651295130mov v7.16b, v6.16b5131mov v6.16b, v5.16b5132mov v5.16b, v4.16b51335134mov v4.16b, v3.16b5135mov v3.16b, v1.16b51365137sub v30.4s, v30.4s, v31.4s5138cmp x5, #805139b.gt .L192_dec_blocks_more_than_551405141mov v7.16b, v6.16b5142mov v6.16b, v5.16b51435144mov v5.16b, v4.16b5145mov v4.16b, v1.16b5146cmp x5, #6451475148sub v30.4s, v30.4s, v31.4s5149b.gt .L192_dec_blocks_more_than_451505151sub v30.4s, v30.4s, v31.4s5152mov v7.16b, v6.16b5153mov v6.16b, v5.16b51545155mov v5.16b, v1.16b5156cmp x5, #485157b.gt .L192_dec_blocks_more_than_351585159sub v30.4s, v30.4s, v31.4s5160mov v7.16b, v6.16b5161cmp x5, #3251625163mov v6.16b, v1.16b5164ldr q24, [x3, #96] //load h4k | h3k5165b.gt .L192_dec_blocks_more_than_251665167sub v30.4s, v30.4s, v31.4s51685169mov v7.16b, v1.16b5170cmp x5, #165171b.gt .L192_dec_blocks_more_than_151725173sub v30.4s, v30.4s, v31.4s5174ldr q21, [x3, #48] //load h2k | h1k5175b .L192_dec_blocks_less_than_15176.L192_dec_blocks_more_than_7: //blocks left > 75177rev64 v8.16b, v9.16b //GHASH final-7 block51785179ins v18.d[0], v24.d[1] //GHASH final-7 block - mid5180eor v8.16b, v8.16b, v16.16b //feed in partial tag51815182pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high5183ins v27.d[0], v8.d[1] //GHASH final-7 block - mid5184ldr q9, [x0], #16 //AES final-6 block - load ciphertext51855186pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low51875188eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid5189st1 { v12.16b}, [x2], #16 //AES final-7 block - store result51905191.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result51925193pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid5194movi v16.8b, #0 //suppress further partial tag feed in5195.L192_dec_blocks_more_than_6: //blocks left > 651965197rev64 v8.16b, v9.16b //GHASH final-6 block51985199eor v8.16b, v8.16b, v16.16b //feed in partial tag52005201ldr q9, [x0], #16 //AES final-5 block - load ciphertext5202ins v27.d[0], v8.d[1] //GHASH final-6 block - mid52035204eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid5205movi v16.8b, #0 //suppress further partial tag feed in5206pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high52075208st1 { v12.16b}, [x2], #16 //AES final-6 block - store result5209.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result52105211eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high5212pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid5213pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low52145215eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid5216eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low5217.L192_dec_blocks_more_than_5: //blocks left > 552185219rev64 v8.16b, v9.16b //GHASH final-5 block52205221eor v8.16b, v8.16b, v16.16b //feed in partial tag52225223ins v27.d[0], v8.d[1] //GHASH final-5 block - mid52245225eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid52265227ins v27.d[1], v27.d[0] //GHASH final-5 block - mid5228pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high52295230ldr q9, [x0], #16 //AES final-4 block - load ciphertext52315232eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high5233pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low52345235pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid52365237eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low5238movi v16.8b, #0 //suppress further partial tag feed in5239st1 { v12.16b}, [x2], #16 //AES final-5 block - store result52405241eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid5242.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result5243.L192_dec_blocks_more_than_4: //blocks left > 452445245rev64 v8.16b, v9.16b //GHASH final-4 block52465247eor v8.16b, v8.16b, v16.16b //feed in partial tag5248movi v16.8b, #0 //suppress further partial tag feed in52495250ldr q9, [x0], #16 //AES final-3 block - load ciphertext5251ins v27.d[0], v8.d[1] //GHASH final-4 block - mid5252pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low52535254eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid52555256eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low52575258pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid5259st1 { v12.16b}, [x2], #16 //AES final-4 block - store result5260pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high52615262.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result52635264eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid5265eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high5266.L192_dec_blocks_more_than_3: //blocks left > 352675268ldr q25, [x3, #112] //load h4l | h4h5269ext v25.16b, v25.16b, v25.16b, #85270rev64 v8.16b, v9.16b //GHASH final-3 block5271ldr q9, [x0], #16 //AES final-2 block - load ciphertext52725273eor v8.16b, v8.16b, v16.16b //feed in partial tag52745275ins v27.d[0], v8.d[1] //GHASH final-3 block - mid5276pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high52775278eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high5279movi v16.8b, #0 //suppress further partial tag feed in5280pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low52815282st1 { v12.16b}, [x2], #16 //AES final-3 block - store result5283eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid5284.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result52855286eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low5287ldr q24, [x3, #96] //load h4k | h3k52885289ins v27.d[1], v27.d[0] //GHASH final-3 block - mid52905291pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid52925293eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid5294.L192_dec_blocks_more_than_2: //blocks left > 252955296rev64 v8.16b, v9.16b //GHASH final-2 block5297ldr q23, [x3, #80] //load h3l | h3h5298ext v23.16b, v23.16b, v23.16b, #852995300eor v8.16b, v8.16b, v16.16b //feed in partial tag53015302ins v27.d[0], v8.d[1] //GHASH final-2 block - mid5303ldr q9, [x0], #16 //AES final-1 block - load ciphertext53045305pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high53065307eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid53085309eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high5310pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low53115312pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid5313movi v16.8b, #0 //suppress further partial tag feed in53145315eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low5316st1 { v12.16b}, [x2], #16 //AES final-2 block - store result53175318eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid5319.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result5320.L192_dec_blocks_more_than_1: //blocks left > 153215322rev64 v8.16b, v9.16b //GHASH final-1 block5323ldr q9, [x0], #16 //AES final block - load ciphertext5324ldr q22, [x3, #64] //load h1l | h1h5325ext v22.16b, v22.16b, v22.16b, #853265327eor v8.16b, v8.16b, v16.16b //feed in partial tag5328movi v16.8b, #0 //suppress further partial tag feed in5329ldr q21, [x3, #48] //load h2k | h1k53305331pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low5332ins v27.d[0], v8.d[1] //GHASH final-1 block - mid5333st1 { v12.16b}, [x2], #16 //AES final-1 block - store result53345335pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high53365337.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result53385339eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid53405341ins v27.d[1], v27.d[0] //GHASH final-1 block - mid53425343pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid53445345eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low53465347eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid5348eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high5349.L192_dec_blocks_less_than_1: //blocks left <= 153505351rev32 v30.16b, v30.16b5352and x1, x1, #127 //bit_length %= 12853535354sub x1, x1, #128 //bit_length -= 1285355str q30, [x16] //store the updated counter53565357neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])5358mvn x6, xzr //temp0_x = 0xffffffffffffffff53595360and x1, x1, #127 //bit_length %= 12853615362mvn x7, xzr //temp1_x = 0xffffffffffffffff5363lsr x6, x6, x1 //temp0_x is mask for top 64b of last block5364cmp x1, #6453655366csel x13, x7, x6, lt5367csel x14, x6, xzr, lt5368ldr q20, [x3, #32] //load h1l | h1h5369ext v20.16b, v20.16b, v20.16b, #853705371mov v0.d[1], x145372ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored53735374mov v0.d[0], x13 //ctr0b is mask for last block53755376and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits5377bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing53785379rev64 v8.16b, v9.16b //GHASH final block53805381st1 { v12.16b}, [x2] //store all 16B53825383eor v8.16b, v8.16b, v16.16b //feed in partial tag53845385ins v16.d[0], v8.d[1] //GHASH final block - mid5386pmull v26.1q, v8.1d, v20.1d //GHASH final block - low53875388eor v16.8b, v16.8b, v8.8b //GHASH final block - mid5389pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high5390eor v19.16b, v19.16b, v26.16b //GHASH final block - low53915392pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid5393eor v17.16b, v17.16b, v28.16b //GHASH final block - high53945395eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up5396eor v18.16b, v18.16b, v16.16b //GHASH final block - mid5397ldr d16, [x10] //MODULO - load modulo constant53985399pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid5400ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment54015402eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up54035404.inst 0xce115652 //eor3 v18.16b, v18.16b, v17.16b, v21.16b //MODULO - fold into mid54055406pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low5407ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment54085409.inst 0xce124673 //eor3 v19.16b, v19.16b, v18.16b, v17.16b //MODULO - fold into low5410ext v19.16b, v19.16b, v19.16b, #85411rev64 v19.16b, v19.16b5412st1 { v19.16b }, [x3]54135414mov x0, x954155416ldp d10, d11, [sp, #16]5417ldp d12, d13, [sp, #32]5418ldp d14, d15, [sp, #48]5419ldp d8, d9, [sp], #805420ret54215422.L192_dec_ret:5423mov w0, #0x05424ret5425.size unroll8_eor3_aes_gcm_dec_192_kernel,.-unroll8_eor3_aes_gcm_dec_192_kernel5426.globl unroll8_eor3_aes_gcm_enc_256_kernel5427.type unroll8_eor3_aes_gcm_enc_256_kernel,%function5428.align 45429unroll8_eor3_aes_gcm_enc_256_kernel:5430AARCH64_VALID_CALL_TARGET5431cbz x1, .L256_enc_ret5432stp d8, d9, [sp, #-80]!5433lsr x9, x1, #35434mov x16, x45435mov x8, x55436stp d10, d11, [sp, #16]5437stp d12, d13, [sp, #32]5438stp d14, d15, [sp, #48]5439mov x5, #0xc2000000000000005440stp x5, xzr, [sp, #64]5441add x10, sp, #6454425443ld1 { v0.16b}, [x16] //CTR block 054445445mov x5, x954465447mov x15, #0x100000000 //set up counter increment5448movi v31.16b, #0x05449mov v31.d[1], x155450sub x5, x5, #1 //byte_len - 154515452and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)54535454add x5, x5, x054555456rev32 v30.16b, v0.16b //set up reversed counter54575458add v30.4s, v30.4s, v31.4s //CTR block 054595460rev32 v1.16b, v30.16b //CTR block 15461add v30.4s, v30.4s, v31.4s //CTR block 154625463rev32 v2.16b, v30.16b //CTR block 25464add v30.4s, v30.4s, v31.4s //CTR block 254655466rev32 v3.16b, v30.16b //CTR block 35467add v30.4s, v30.4s, v31.4s //CTR block 354685469rev32 v4.16b, v30.16b //CTR block 45470add v30.4s, v30.4s, v31.4s //CTR block 454715472rev32 v5.16b, v30.16b //CTR block 55473add v30.4s, v30.4s, v31.4s //CTR block 55474ldp q26, q27, [x8, #0] //load rk0, rk154755476rev32 v6.16b, v30.16b //CTR block 65477add v30.4s, v30.4s, v31.4s //CTR block 654785479rev32 v7.16b, v30.16b //CTR block 754805481aese v3.16b, v26.16b5482aesmc v3.16b, v3.16b //AES block 3 - round 05483aese v4.16b, v26.16b5484aesmc v4.16b, v4.16b //AES block 4 - round 05485aese v2.16b, v26.16b5486aesmc v2.16b, v2.16b //AES block 2 - round 054875488aese v0.16b, v26.16b5489aesmc v0.16b, v0.16b //AES block 0 - round 05490aese v1.16b, v26.16b5491aesmc v1.16b, v1.16b //AES block 1 - round 05492aese v6.16b, v26.16b5493aesmc v6.16b, v6.16b //AES block 6 - round 054945495aese v5.16b, v26.16b5496aesmc v5.16b, v5.16b //AES block 5 - round 05497aese v7.16b, v26.16b5498aesmc v7.16b, v7.16b //AES block 7 - round 05499ldp q28, q26, [x8, #32] //load rk2, rk355005501aese v4.16b, v27.16b5502aesmc v4.16b, v4.16b //AES block 4 - round 15503aese v1.16b, v27.16b5504aesmc v1.16b, v1.16b //AES block 1 - round 15505aese v3.16b, v27.16b5506aesmc v3.16b, v3.16b //AES block 3 - round 155075508aese v6.16b, v27.16b5509aesmc v6.16b, v6.16b //AES block 6 - round 15510aese v5.16b, v27.16b5511aesmc v5.16b, v5.16b //AES block 5 - round 155125513aese v2.16b, v27.16b5514aesmc v2.16b, v2.16b //AES block 2 - round 155155516aese v7.16b, v27.16b5517aesmc v7.16b, v7.16b //AES block 7 - round 155185519aese v2.16b, v28.16b5520aesmc v2.16b, v2.16b //AES block 2 - round 25521aese v3.16b, v28.16b5522aesmc v3.16b, v3.16b //AES block 3 - round 25523aese v0.16b, v27.16b5524aesmc v0.16b, v0.16b //AES block 0 - round 155255526aese v7.16b, v28.16b5527aesmc v7.16b, v7.16b //AES block 7 - round 25528aese v6.16b, v28.16b5529aesmc v6.16b, v6.16b //AES block 6 - round 25530aese v5.16b, v28.16b5531aesmc v5.16b, v5.16b //AES block 5 - round 255325533aese v4.16b, v28.16b5534aesmc v4.16b, v4.16b //AES block 4 - round 25535aese v0.16b, v28.16b5536aesmc v0.16b, v0.16b //AES block 0 - round 25537aese v1.16b, v28.16b5538aesmc v1.16b, v1.16b //AES block 1 - round 255395540aese v5.16b, v26.16b5541aesmc v5.16b, v5.16b //AES block 5 - round 35542aese v3.16b, v26.16b5543aesmc v3.16b, v3.16b //AES block 3 - round 35544ldp q27, q28, [x8, #64] //load rk4, rk555455546aese v4.16b, v26.16b5547aesmc v4.16b, v4.16b //AES block 4 - round 355485549aese v1.16b, v26.16b5550aesmc v1.16b, v1.16b //AES block 1 - round 35551aese v6.16b, v26.16b5552aesmc v6.16b, v6.16b //AES block 6 - round 35553aese v7.16b, v26.16b5554aesmc v7.16b, v7.16b //AES block 7 - round 355555556aese v2.16b, v26.16b5557aesmc v2.16b, v2.16b //AES block 2 - round 35558aese v0.16b, v26.16b5559aesmc v0.16b, v0.16b //AES block 0 - round 355605561aese v4.16b, v27.16b5562aesmc v4.16b, v4.16b //AES block 4 - round 45563aese v6.16b, v27.16b5564aesmc v6.16b, v6.16b //AES block 6 - round 45565aese v1.16b, v27.16b5566aesmc v1.16b, v1.16b //AES block 1 - round 455675568aese v2.16b, v27.16b5569aesmc v2.16b, v2.16b //AES block 2 - round 45570aese v0.16b, v27.16b5571aesmc v0.16b, v0.16b //AES block 0 - round 455725573aese v3.16b, v27.16b5574aesmc v3.16b, v3.16b //AES block 3 - round 45575aese v7.16b, v27.16b5576aesmc v7.16b, v7.16b //AES block 7 - round 45577aese v5.16b, v27.16b5578aesmc v5.16b, v5.16b //AES block 5 - round 455795580aese v0.16b, v28.16b5581aesmc v0.16b, v0.16b //AES block 0 - round 55582aese v2.16b, v28.16b5583aesmc v2.16b, v2.16b //AES block 2 - round 55584ldp q26, q27, [x8, #96] //load rk6, rk755855586aese v1.16b, v28.16b5587aesmc v1.16b, v1.16b //AES block 1 - round 55588aese v4.16b, v28.16b5589aesmc v4.16b, v4.16b //AES block 4 - round 55590aese v5.16b, v28.16b5591aesmc v5.16b, v5.16b //AES block 5 - round 555925593aese v3.16b, v28.16b5594aesmc v3.16b, v3.16b //AES block 3 - round 55595aese v6.16b, v28.16b5596aesmc v6.16b, v6.16b //AES block 6 - round 55597aese v7.16b, v28.16b5598aesmc v7.16b, v7.16b //AES block 7 - round 555995600aese v1.16b, v26.16b5601aesmc v1.16b, v1.16b //AES block 1 - round 65602aese v5.16b, v26.16b5603aesmc v5.16b, v5.16b //AES block 5 - round 65604aese v4.16b, v26.16b5605aesmc v4.16b, v4.16b //AES block 4 - round 656065607aese v2.16b, v26.16b5608aesmc v2.16b, v2.16b //AES block 2 - round 65609aese v6.16b, v26.16b5610aesmc v6.16b, v6.16b //AES block 6 - round 65611aese v0.16b, v26.16b5612aesmc v0.16b, v0.16b //AES block 0 - round 656135614aese v7.16b, v26.16b5615aesmc v7.16b, v7.16b //AES block 7 - round 65616aese v3.16b, v26.16b5617aesmc v3.16b, v3.16b //AES block 3 - round 65618ldp q28, q26, [x8, #128] //load rk8, rk956195620aese v2.16b, v27.16b5621aesmc v2.16b, v2.16b //AES block 2 - round 75622aese v0.16b, v27.16b5623aesmc v0.16b, v0.16b //AES block 0 - round 756245625aese v7.16b, v27.16b5626aesmc v7.16b, v7.16b //AES block 7 - round 75627aese v6.16b, v27.16b5628aesmc v6.16b, v6.16b //AES block 6 - round 75629aese v1.16b, v27.16b5630aesmc v1.16b, v1.16b //AES block 1 - round 756315632aese v5.16b, v27.16b5633aesmc v5.16b, v5.16b //AES block 5 - round 75634aese v3.16b, v27.16b5635aesmc v3.16b, v3.16b //AES block 3 - round 756365637aese v4.16b, v27.16b5638aesmc v4.16b, v4.16b //AES block 4 - round 756395640aese v6.16b, v28.16b5641aesmc v6.16b, v6.16b //AES block 6 - round 85642aese v1.16b, v28.16b5643aesmc v1.16b, v1.16b //AES block 1 - round 856445645aese v3.16b, v28.16b5646aesmc v3.16b, v3.16b //AES block 3 - round 85647aese v0.16b, v28.16b5648aesmc v0.16b, v0.16b //AES block 0 - round 85649aese v7.16b, v28.16b5650aesmc v7.16b, v7.16b //AES block 7 - round 856515652aese v5.16b, v28.16b5653aesmc v5.16b, v5.16b //AES block 5 - round 85654aese v4.16b, v28.16b5655aesmc v4.16b, v4.16b //AES block 4 - round 85656aese v2.16b, v28.16b5657aesmc v2.16b, v2.16b //AES block 2 - round 856585659ld1 { v19.16b}, [x3]5660ext v19.16b, v19.16b, v19.16b, #85661rev64 v19.16b, v19.16b5662ldp q27, q28, [x8, #160] //load rk10, rk1156635664aese v6.16b, v26.16b5665aesmc v6.16b, v6.16b //AES block 6 - round 95666aese v7.16b, v26.16b5667aesmc v7.16b, v7.16b //AES block 7 - round 95668aese v3.16b, v26.16b5669aesmc v3.16b, v3.16b //AES block 3 - round 956705671aese v4.16b, v26.16b5672aesmc v4.16b, v4.16b //AES block 4 - round 95673aese v5.16b, v26.16b5674aesmc v5.16b, v5.16b //AES block 5 - round 95675aese v2.16b, v26.16b5676aesmc v2.16b, v2.16b //AES block 2 - round 956775678aese v1.16b, v26.16b5679aesmc v1.16b, v1.16b //AES block 1 - round 956805681aese v7.16b, v27.16b5682aesmc v7.16b, v7.16b //AES block 7 - round 105683aese v4.16b, v27.16b5684aesmc v4.16b, v4.16b //AES block 4 - round 105685aese v0.16b, v26.16b5686aesmc v0.16b, v0.16b //AES block 0 - round 956875688aese v1.16b, v27.16b5689aesmc v1.16b, v1.16b //AES block 1 - round 105690aese v5.16b, v27.16b5691aesmc v5.16b, v5.16b //AES block 5 - round 105692aese v3.16b, v27.16b5693aesmc v3.16b, v3.16b //AES block 3 - round 1056945695aese v2.16b, v27.16b5696aesmc v2.16b, v2.16b //AES block 2 - round 105697aese v0.16b, v27.16b5698aesmc v0.16b, v0.16b //AES block 0 - round 105699aese v6.16b, v27.16b5700aesmc v6.16b, v6.16b //AES block 6 - round 1057015702aese v4.16b, v28.16b5703aesmc v4.16b, v4.16b //AES block 4 - round 115704ldp q26, q27, [x8, #192] //load rk12, rk135705aese v5.16b, v28.16b5706aesmc v5.16b, v5.16b //AES block 5 - round 1157075708aese v2.16b, v28.16b5709aesmc v2.16b, v2.16b //AES block 2 - round 115710aese v6.16b, v28.16b5711aesmc v6.16b, v6.16b //AES block 6 - round 115712aese v1.16b, v28.16b5713aesmc v1.16b, v1.16b //AES block 1 - round 1157145715aese v0.16b, v28.16b5716aesmc v0.16b, v0.16b //AES block 0 - round 115717aese v3.16b, v28.16b5718aesmc v3.16b, v3.16b //AES block 3 - round 115719aese v7.16b, v28.16b5720aesmc v7.16b, v7.16b //AES block 7 - round 1157215722add v30.4s, v30.4s, v31.4s //CTR block 75723ldr q28, [x8, #224] //load rk1457245725aese v4.16b, v26.16b5726aesmc v4.16b, v4.16b //AES block 4 - round 125727aese v2.16b, v26.16b5728aesmc v2.16b, v2.16b //AES block 2 - round 125729aese v1.16b, v26.16b5730aesmc v1.16b, v1.16b //AES block 1 - round 1257315732aese v0.16b, v26.16b5733aesmc v0.16b, v0.16b //AES block 0 - round 125734aese v5.16b, v26.16b5735aesmc v5.16b, v5.16b //AES block 5 - round 125736aese v3.16b, v26.16b5737aesmc v3.16b, v3.16b //AES block 3 - round 1257385739aese v2.16b, v27.16b //AES block 2 - round 135740aese v1.16b, v27.16b //AES block 1 - round 135741aese v4.16b, v27.16b //AES block 4 - round 1357425743aese v6.16b, v26.16b5744aesmc v6.16b, v6.16b //AES block 6 - round 125745aese v7.16b, v26.16b5746aesmc v7.16b, v7.16b //AES block 7 - round 1257475748aese v0.16b, v27.16b //AES block 0 - round 135749aese v5.16b, v27.16b //AES block 5 - round 1357505751aese v6.16b, v27.16b //AES block 6 - round 135752aese v7.16b, v27.16b //AES block 7 - round 135753aese v3.16b, v27.16b //AES block 3 - round 1357545755add x4, x0, x1, lsr #3 //end_input_ptr5756cmp x0, x5 //check if we have <= 8 blocks5757b.ge .L256_enc_tail //handle tail57585759ldp q8, q9, [x0], #32 //AES block 0, 1 - load plaintext57605761ldp q10, q11, [x0], #32 //AES block 2, 3 - load plaintext57625763.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result5764rev32 v0.16b, v30.16b //CTR block 85765add v30.4s, v30.4s, v31.4s //CTR block 857665767.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result5768.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result57695770rev32 v1.16b, v30.16b //CTR block 95771add v30.4s, v30.4s, v31.4s //CTR block 95772ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext57735774ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext5775.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result5776cmp x0, x5 //check if we have <= 8 blocks57775778rev32 v2.16b, v30.16b //CTR block 105779add v30.4s, v30.4s, v31.4s //CTR block 105780stp q8, q9, [x2], #32 //AES block 0, 1 - store result57815782stp q10, q11, [x2], #32 //AES block 2, 3 - store result57835784rev32 v3.16b, v30.16b //CTR block 115785add v30.4s, v30.4s, v31.4s //CTR block 1157865787.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result57885789.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result5790.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result5791.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result57925793stp q12, q13, [x2], #32 //AES block 4, 5 - store result5794rev32 v4.16b, v30.16b //CTR block 1257955796stp q14, q15, [x2], #32 //AES block 6, 7 - store result5797add v30.4s, v30.4s, v31.4s //CTR block 125798b.ge .L256_enc_prepretail //do prepretail57995800.L256_enc_main_loop: //main loop start5801ldp q26, q27, [x8, #0] //load rk0, rk158025803rev32 v5.16b, v30.16b //CTR block 8k+135804add v30.4s, v30.4s, v31.4s //CTR block 8k+135805ldr q21, [x3, #144] //load h6k | h5k5806ldr q24, [x3, #192] //load h8k | h7k58075808rev64 v11.16b, v11.16b //GHASH block 8k+35809ldr q20, [x3, #128] //load h5l | h5h5810ext v20.16b, v20.16b, v20.16b, #85811ldr q22, [x3, #160] //load h6l | h6h5812ext v22.16b, v22.16b, v22.16b, #85813rev64 v9.16b, v9.16b //GHASH block 8k+158145815rev32 v6.16b, v30.16b //CTR block 8k+145816add v30.4s, v30.4s, v31.4s //CTR block 8k+145817rev64 v8.16b, v8.16b //GHASH block 8k58185819rev64 v12.16b, v12.16b //GHASH block 8k+45820ext v19.16b, v19.16b, v19.16b, #8 //PRE 05821ldr q23, [x3, #176] //load h7l | h7h5822ext v23.16b, v23.16b, v23.16b, #85823ldr q25, [x3, #208] //load h8l | h8h5824ext v25.16b, v25.16b, v25.16b, #858255826aese v3.16b, v26.16b5827aesmc v3.16b, v3.16b //AES block 8k+11 - round 05828aese v5.16b, v26.16b5829aesmc v5.16b, v5.16b //AES block 8k+13 - round 05830rev32 v7.16b, v30.16b //CTR block 8k+1558315832aese v0.16b, v26.16b5833aesmc v0.16b, v0.16b //AES block 8k+8 - round 05834aese v1.16b, v26.16b5835aesmc v1.16b, v1.16b //AES block 8k+9 - round 05836aese v6.16b, v26.16b5837aesmc v6.16b, v6.16b //AES block 8k+14 - round 058385839aese v7.16b, v26.16b5840aesmc v7.16b, v7.16b //AES block 8k+15 - round 05841aese v2.16b, v26.16b5842aesmc v2.16b, v2.16b //AES block 8k+10 - round 05843aese v4.16b, v26.16b5844aesmc v4.16b, v4.16b //AES block 8k+12 - round 058455846ldp q28, q26, [x8, #32] //load rk2, rk35847eor v8.16b, v8.16b, v19.16b //PRE 15848aese v6.16b, v27.16b5849aesmc v6.16b, v6.16b //AES block 8k+14 - round 158505851aese v2.16b, v27.16b5852aesmc v2.16b, v2.16b //AES block 8k+10 - round 15853aese v1.16b, v27.16b5854aesmc v1.16b, v1.16b //AES block 8k+9 - round 15855aese v0.16b, v27.16b5856aesmc v0.16b, v0.16b //AES block 8k+8 - round 158575858aese v4.16b, v27.16b5859aesmc v4.16b, v4.16b //AES block 8k+12 - round 15860aese v3.16b, v27.16b5861aesmc v3.16b, v3.16b //AES block 8k+11 - round 15862aese v5.16b, v27.16b5863aesmc v5.16b, v5.16b //AES block 8k+13 - round 158645865pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high5866pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low5867pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high58685869trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid5870trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid5871aese v7.16b, v27.16b5872aesmc v7.16b, v7.16b //AES block 8k+15 - round 158735874aese v1.16b, v28.16b5875aesmc v1.16b, v1.16b //AES block 8k+9 - round 25876aese v5.16b, v28.16b5877aesmc v5.16b, v5.16b //AES block 8k+13 - round 25878aese v6.16b, v28.16b5879aesmc v6.16b, v6.16b //AES block 8k+14 - round 258805881aese v2.16b, v28.16b5882aesmc v2.16b, v2.16b //AES block 8k+10 - round 25883pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low5884aese v4.16b, v28.16b5885aesmc v4.16b, v4.16b //AES block 8k+12 - round 258865887aese v5.16b, v26.16b5888aesmc v5.16b, v5.16b //AES block 8k+13 - round 35889aese v6.16b, v26.16b5890aesmc v6.16b, v6.16b //AES block 8k+14 - round 35891aese v0.16b, v28.16b5892aesmc v0.16b, v0.16b //AES block 8k+8 - round 258935894aese v1.16b, v26.16b5895aesmc v1.16b, v1.16b //AES block 8k+9 - round 35896aese v7.16b, v28.16b5897aesmc v7.16b, v7.16b //AES block 8k+15 - round 25898aese v3.16b, v28.16b5899aesmc v3.16b, v3.16b //AES block 8k+11 - round 259005901aese v4.16b, v26.16b5902aesmc v4.16b, v4.16b //AES block 8k+12 - round 35903rev64 v14.16b, v14.16b //GHASH block 8k+65904pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high59055906aese v3.16b, v26.16b5907aesmc v3.16b, v3.16b //AES block 8k+11 - round 35908ldp q27, q28, [x8, #64] //load rk4, rk55909rev64 v10.16b, v10.16b //GHASH block 8k+259105911aese v2.16b, v26.16b5912aesmc v2.16b, v2.16b //AES block 8k+10 - round 35913aese v7.16b, v26.16b5914aesmc v7.16b, v7.16b //AES block 8k+15 - round 35915aese v0.16b, v26.16b5916aesmc v0.16b, v0.16b //AES block 8k+8 - round 359175918eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high5919pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high5920rev64 v13.16b, v13.16b //GHASH block 8k+559215922pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low5923eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low5924ldr q23, [x3, #80] //load h3l | h3h5925ext v23.16b, v23.16b, v23.16b, #85926ldr q25, [x3, #112] //load h4l | h4h5927ext v25.16b, v25.16b, v25.16b, #859285929trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid5930.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high5931pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low59325933aese v4.16b, v27.16b5934aesmc v4.16b, v4.16b //AES block 8k+12 - round 45935aese v1.16b, v27.16b5936aesmc v1.16b, v1.16b //AES block 8k+9 - round 45937aese v5.16b, v27.16b5938aesmc v5.16b, v5.16b //AES block 8k+13 - round 459395940aese v7.16b, v27.16b5941aesmc v7.16b, v7.16b //AES block 8k+15 - round 45942aese v3.16b, v27.16b5943aesmc v3.16b, v3.16b //AES block 8k+11 - round 45944aese v2.16b, v27.16b5945aesmc v2.16b, v2.16b //AES block 8k+10 - round 459465947trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid5948aese v6.16b, v27.16b5949aesmc v6.16b, v6.16b //AES block 8k+14 - round 45950aese v0.16b, v27.16b5951aesmc v0.16b, v0.16b //AES block 8k+8 - round 459525953trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid5954eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid5955ldp q26, q27, [x8, #96] //load rk6, rk759565957aese v5.16b, v28.16b5958aesmc v5.16b, v5.16b //AES block 8k+13 - round 55959aese v7.16b, v28.16b5960aesmc v7.16b, v7.16b //AES block 8k+15 - round 55961aese v4.16b, v28.16b5962aesmc v4.16b, v4.16b //AES block 8k+12 - round 559635964eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid5965aese v2.16b, v28.16b5966aesmc v2.16b, v2.16b //AES block 8k+10 - round 55967rev64 v15.16b, v15.16b //GHASH block 8k+759685969aese v3.16b, v28.16b5970aesmc v3.16b, v3.16b //AES block 8k+11 - round 55971aese v6.16b, v28.16b5972aesmc v6.16b, v6.16b //AES block 8k+14 - round 55973aese v1.16b, v28.16b5974aesmc v1.16b, v1.16b //AES block 8k+9 - round 559755976pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid5977pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid5978aese v0.16b, v28.16b5979aesmc v0.16b, v0.16b //AES block 8k+8 - round 559805981pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid5982aese v4.16b, v26.16b5983aesmc v4.16b, v4.16b //AES block 8k+12 - round 65984aese v2.16b, v26.16b5985aesmc v2.16b, v2.16b //AES block 8k+10 - round 659865987aese v6.16b, v26.16b5988aesmc v6.16b, v6.16b //AES block 8k+14 - round 65989aese v1.16b, v26.16b5990aesmc v1.16b, v1.16b //AES block 8k+9 - round 65991aese v7.16b, v26.16b5992aesmc v7.16b, v7.16b //AES block 8k+15 - round 659935994eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid5995pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid5996aese v5.16b, v26.16b5997aesmc v5.16b, v5.16b //AES block 8k+13 - round 659985999.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low6000aese v3.16b, v26.16b6001aesmc v3.16b, v3.16b //AES block 8k+11 - round 66002aese v0.16b, v26.16b6003aesmc v0.16b, v0.16b //AES block 8k+8 - round 660046005ldp q28, q26, [x8, #128] //load rk8, rk96006pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high6007aese v5.16b, v27.16b6008aesmc v5.16b, v5.16b //AES block 8k+13 - round 760096010ldr q20, [x3, #32] //load h1l | h1h6011ext v20.16b, v20.16b, v20.16b, #86012ldr q22, [x3, #64] //load h2l | h2h6013ext v22.16b, v22.16b, v22.16b, #86014aese v2.16b, v27.16b6015aesmc v2.16b, v2.16b //AES block 8k+10 - round 76016.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid60176018ldr q21, [x3, #48] //load h2k | h1k6019ldr q24, [x3, #96] //load h4k | h3k6020aese v6.16b, v27.16b6021aesmc v6.16b, v6.16b //AES block 8k+14 - round 76022aese v3.16b, v27.16b6023aesmc v3.16b, v3.16b //AES block 8k+11 - round 760246025aese v0.16b, v27.16b6026aesmc v0.16b, v0.16b //AES block 8k+8 - round 76027aese v7.16b, v27.16b6028aesmc v7.16b, v7.16b //AES block 8k+15 - round 76029pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low60306031trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid6032aese v4.16b, v27.16b6033aesmc v4.16b, v4.16b //AES block 8k+12 - round 76034aese v1.16b, v27.16b6035aesmc v1.16b, v1.16b //AES block 8k+9 - round 760366037pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high6038aese v7.16b, v28.16b6039aesmc v7.16b, v7.16b //AES block 8k+15 - round 86040aese v0.16b, v28.16b6041aesmc v0.16b, v0.16b //AES block 8k+8 - round 860426043pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low6044trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid6045eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid60466047aese v3.16b, v28.16b6048aesmc v3.16b, v3.16b //AES block 8k+11 - round 86049aese v0.16b, v26.16b6050aesmc v0.16b, v0.16b //AES block 8k+8 - round 96051aese v1.16b, v28.16b6052aesmc v1.16b, v1.16b //AES block 8k+9 - round 860536054pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid6055pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid6056aese v2.16b, v28.16b6057aesmc v2.16b, v2.16b //AES block 8k+10 - round 860586059aese v5.16b, v28.16b6060aesmc v5.16b, v5.16b //AES block 8k+13 - round 86061pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high6062pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low60636064aese v6.16b, v28.16b6065aesmc v6.16b, v6.16b //AES block 8k+14 - round 86066trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid6067aese v4.16b, v28.16b6068aesmc v4.16b, v4.16b //AES block 8k+12 - round 860696070.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid6071aese v7.16b, v26.16b6072aesmc v7.16b, v7.16b //AES block 8k+15 - round 96073aese v5.16b, v26.16b6074aesmc v5.16b, v5.16b //AES block 8k+13 - round 960756076eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid6077aese v6.16b, v26.16b6078aesmc v6.16b, v6.16b //AES block 8k+14 - round 96079aese v4.16b, v26.16b6080aesmc v4.16b, v4.16b //AES block 8k+12 - round 960816082ldp q27, q28, [x8, #160] //load rk10, rk116083aese v2.16b, v26.16b6084aesmc v2.16b, v2.16b //AES block 8k+10 - round 96085aese v3.16b, v26.16b6086aesmc v3.16b, v3.16b //AES block 8k+11 - round 960876088pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high6089.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low6090pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low60916092ldr d16, [x10] //MODULO - load modulo constant6093pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid6094pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid60956096aese v1.16b, v26.16b6097aesmc v1.16b, v1.16b //AES block 8k+9 - round 960986099.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid6100.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low6101.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high61026103aese v4.16b, v27.16b6104aesmc v4.16b, v4.16b //AES block 8k+12 - round 106105aese v3.16b, v27.16b6106aesmc v3.16b, v3.16b //AES block 8k+11 - round 106107aese v5.16b, v27.16b6108aesmc v5.16b, v5.16b //AES block 8k+13 - round 1061096110aese v0.16b, v27.16b6111aesmc v0.16b, v0.16b //AES block 8k+8 - round 106112aese v2.16b, v27.16b6113aesmc v2.16b, v2.16b //AES block 8k+10 - round 106114add v30.4s, v30.4s, v31.4s //CTR block 8k+1561156116aese v1.16b, v27.16b6117aesmc v1.16b, v1.16b //AES block 8k+9 - round 106118aese v7.16b, v27.16b6119aesmc v7.16b, v7.16b //AES block 8k+15 - round 106120aese v6.16b, v27.16b6121aesmc v6.16b, v6.16b //AES block 8k+14 - round 1061226123.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high61246125ldp q26, q27, [x8, #192] //load rk12, rk136126rev32 v20.16b, v30.16b //CTR block 8k+1661276128ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment6129ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load plaintext6130aese v2.16b, v28.16b6131aesmc v2.16b, v2.16b //AES block 8k+10 - round 1161326133aese v6.16b, v28.16b6134aesmc v6.16b, v6.16b //AES block 8k+14 - round 116135add v30.4s, v30.4s, v31.4s //CTR block 8k+166136aese v3.16b, v28.16b6137aesmc v3.16b, v3.16b //AES block 8k+11 - round 1161386139aese v0.16b, v28.16b6140aesmc v0.16b, v0.16b //AES block 8k+8 - round 116141aese v7.16b, v28.16b6142aesmc v7.16b, v7.16b //AES block 8k+15 - round 1161436144pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid6145aese v1.16b, v28.16b6146aesmc v1.16b, v1.16b //AES block 8k+9 - round 1161476148aese v7.16b, v26.16b6149aesmc v7.16b, v7.16b //AES block 8k+15 - round 126150aese v5.16b, v28.16b6151aesmc v5.16b, v5.16b //AES block 8k+13 - round 1161526153aese v3.16b, v26.16b6154aesmc v3.16b, v3.16b //AES block 8k+11 - round 126155aese v6.16b, v26.16b6156aesmc v6.16b, v6.16b //AES block 8k+14 - round 126157rev32 v22.16b, v30.16b //CTR block 8k+1761586159add v30.4s, v30.4s, v31.4s //CTR block 8k+176160aese v4.16b, v28.16b6161aesmc v4.16b, v4.16b //AES block 8k+12 - round 116162.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up61636164aese v5.16b, v26.16b6165aesmc v5.16b, v5.16b //AES block 8k+13 - round 126166ldr q28, [x8, #224] //load rk146167aese v7.16b, v27.16b //AES block 8k+15 - round 1361686169ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load plaintext6170aese v2.16b, v26.16b6171aesmc v2.16b, v2.16b //AES block 8k+10 - round 126172aese v4.16b, v26.16b6173aesmc v4.16b, v4.16b //AES block 8k+12 - round 1261746175.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid6176aese v1.16b, v26.16b6177aesmc v1.16b, v1.16b //AES block 8k+9 - round 126178ldp q12, q13, [x0], #32 //AES block 4, 5 - load plaintext61796180ldp q14, q15, [x0], #32 //AES block 6, 7 - load plaintext6181aese v2.16b, v27.16b //AES block 8k+10 - round 136182aese v4.16b, v27.16b //AES block 8k+12 - round 1361836184rev32 v23.16b, v30.16b //CTR block 8k+186185add v30.4s, v30.4s, v31.4s //CTR block 8k+186186aese v5.16b, v27.16b //AES block 8k+13 - round 1361876188aese v0.16b, v26.16b6189aesmc v0.16b, v0.16b //AES block 8k+8 - round 126190aese v3.16b, v27.16b //AES block 8k+11 - round 136191cmp x0, x5 //.LOOP CONTROL61926193.inst 0xce02714a //eor3 v10.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result6194rev32 v25.16b, v30.16b //CTR block 8k+196195add v30.4s, v30.4s, v31.4s //CTR block 8k+1961966197aese v0.16b, v27.16b //AES block 8k+8 - round 136198aese v6.16b, v27.16b //AES block 8k+14 - round 136199.inst 0xce0571ad //eor3 v13.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result62006201ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment6202pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low6203aese v1.16b, v27.16b //AES block 8k+9 - round 1362046205.inst 0xce04718c //eor3 v12.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result6206rev32 v4.16b, v30.16b //CTR block 8k+206207.inst 0xce03716b //eor3 v11.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result62086209mov v3.16b, v25.16b //CTR block 8k+196210.inst 0xce017129 //eor3 v9.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result6211.inst 0xce007108 //eor3 v8.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result62126213add v30.4s, v30.4s, v31.4s //CTR block 8k+206214stp q8, q9, [x2], #32 //AES block 8k+8, 8k+9 - store result6215mov v2.16b, v23.16b //CTR block 8k+1862166217.inst 0xce0771ef //eor3 v15.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result6218.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low6219stp q10, q11, [x2], #32 //AES block 8k+10, 8k+11 - store result62206221.inst 0xce0671ce //eor3 v14.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result6222mov v1.16b, v22.16b //CTR block 8k+176223stp q12, q13, [x2], #32 //AES block 4, 5 - store result62246225stp q14, q15, [x2], #32 //AES block 6, 7 - store result6226mov v0.16b, v20.16b //CTR block 8k+166227b.lt .L256_enc_main_loop62286229.L256_enc_prepretail: //PREPRETAIL6230rev32 v5.16b, v30.16b //CTR block 8k+136231ldp q26, q27, [x8, #0] //load rk0, rk16232add v30.4s, v30.4s, v31.4s //CTR block 8k+1362336234rev64 v10.16b, v10.16b //GHASH block 8k+262356236rev32 v6.16b, v30.16b //CTR block 8k+146237add v30.4s, v30.4s, v31.4s //CTR block 8k+1462386239rev64 v13.16b, v13.16b //GHASH block 8k+56240ldr q21, [x3, #144] //load h6k | h5k6241ldr q24, [x3, #192] //load h8k | h7k62426243rev32 v7.16b, v30.16b //CTR block 8k+1562446245aese v6.16b, v26.16b6246aesmc v6.16b, v6.16b //AES block 8k+14 - round 06247aese v4.16b, v26.16b6248aesmc v4.16b, v4.16b //AES block 8k+12 - round 06249aese v1.16b, v26.16b6250aesmc v1.16b, v1.16b //AES block 8k+9 - round 062516252aese v5.16b, v26.16b6253aesmc v5.16b, v5.16b //AES block 8k+13 - round 06254aese v0.16b, v26.16b6255aesmc v0.16b, v0.16b //AES block 8k+8 - round 062566257aese v2.16b, v26.16b6258aesmc v2.16b, v2.16b //AES block 8k+10 - round 06259aese v7.16b, v26.16b6260aesmc v7.16b, v7.16b //AES block 8k+15 - round 06261aese v3.16b, v26.16b6262aesmc v3.16b, v3.16b //AES block 8k+11 - round 062636264ext v19.16b, v19.16b, v19.16b, #8 //PRE 06265rev64 v8.16b, v8.16b //GHASH block 8k6266aese v1.16b, v27.16b6267aesmc v1.16b, v1.16b //AES block 8k+9 - round 162686269rev64 v9.16b, v9.16b //GHASH block 8k+16270ldp q28, q26, [x8, #32] //load rk2, rk36271aese v3.16b, v27.16b6272aesmc v3.16b, v3.16b //AES block 8k+11 - round 162736274ldr q23, [x3, #176] //load h7l | h7h6275ext v23.16b, v23.16b, v23.16b, #86276ldr q25, [x3, #208] //load h8l | h8h6277ext v25.16b, v25.16b, v25.16b, #86278aese v2.16b, v27.16b6279aesmc v2.16b, v2.16b //AES block 8k+10 - round 162806281ldr q20, [x3, #128] //load h5l | h5h6282ext v20.16b, v20.16b, v20.16b, #86283ldr q22, [x3, #160] //load h6l | h6h6284ext v22.16b, v22.16b, v22.16b, #86285aese v0.16b, v27.16b6286aesmc v0.16b, v0.16b //AES block 8k+8 - round 16287aese v5.16b, v27.16b6288aesmc v5.16b, v5.16b //AES block 8k+13 - round 162896290aese v4.16b, v27.16b6291aesmc v4.16b, v4.16b //AES block 8k+12 - round 16292eor v8.16b, v8.16b, v19.16b //PRE 162936294rev64 v11.16b, v11.16b //GHASH block 8k+36295aese v6.16b, v27.16b6296aesmc v6.16b, v6.16b //AES block 8k+14 - round 162976298aese v1.16b, v28.16b6299aesmc v1.16b, v1.16b //AES block 8k+9 - round 26300aese v2.16b, v28.16b6301aesmc v2.16b, v2.16b //AES block 8k+10 - round 26302aese v7.16b, v27.16b6303aesmc v7.16b, v7.16b //AES block 8k+15 - round 163046305aese v4.16b, v28.16b6306aesmc v4.16b, v4.16b //AES block 8k+12 - round 26307aese v0.16b, v28.16b6308aesmc v0.16b, v0.16b //AES block 8k+8 - round 26309aese v6.16b, v28.16b6310aesmc v6.16b, v6.16b //AES block 8k+14 - round 263116312aese v5.16b, v28.16b6313aesmc v5.16b, v5.16b //AES block 8k+13 - round 26314aese v7.16b, v28.16b6315aesmc v7.16b, v7.16b //AES block 8k+15 - round 26316aese v3.16b, v28.16b6317aesmc v3.16b, v3.16b //AES block 8k+11 - round 263186319ldp q27, q28, [x8, #64] //load rk4, rk56320trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid6321pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high63226323rev64 v14.16b, v14.16b //GHASH block 8k+66324aese v4.16b, v26.16b6325aesmc v4.16b, v4.16b //AES block 8k+12 - round 36326pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high63276328aese v7.16b, v26.16b6329aesmc v7.16b, v7.16b //AES block 8k+15 - round 36330pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low6331trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid63326333pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high6334aese v6.16b, v26.16b6335aesmc v6.16b, v6.16b //AES block 8k+14 - round 363366337aese v2.16b, v26.16b6338aesmc v2.16b, v2.16b //AES block 8k+10 - round 36339aese v3.16b, v26.16b6340aesmc v3.16b, v3.16b //AES block 8k+11 - round 36341eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high63426343pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low6344pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high6345aese v1.16b, v26.16b6346aesmc v1.16b, v1.16b //AES block 8k+9 - round 363476348aese v0.16b, v26.16b6349aesmc v0.16b, v0.16b //AES block 8k+8 - round 36350eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid6351aese v5.16b, v26.16b6352aesmc v5.16b, v5.16b //AES block 8k+13 - round 363536354pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low6355aese v1.16b, v27.16b6356aesmc v1.16b, v1.16b //AES block 8k+9 - round 46357aese v6.16b, v27.16b6358aesmc v6.16b, v6.16b //AES block 8k+14 - round 463596360aese v0.16b, v27.16b6361aesmc v0.16b, v0.16b //AES block 8k+8 - round 46362aese v2.16b, v27.16b6363aesmc v2.16b, v2.16b //AES block 8k+10 - round 46364aese v4.16b, v27.16b6365aesmc v4.16b, v4.16b //AES block 8k+12 - round 463666367aese v6.16b, v28.16b6368aesmc v6.16b, v6.16b //AES block 8k+14 - round 56369pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid6370.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high63716372aese v7.16b, v27.16b6373aesmc v7.16b, v7.16b //AES block 8k+15 - round 46374trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid6375trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid63766377aese v5.16b, v27.16b6378aesmc v5.16b, v5.16b //AES block 8k+13 - round 46379eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low6380aese v3.16b, v27.16b6381aesmc v3.16b, v3.16b //AES block 8k+11 - round 463826383pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low6384pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid6385eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid63866387rev64 v12.16b, v12.16b //GHASH block 8k+46388aese v1.16b, v28.16b6389aesmc v1.16b, v1.16b //AES block 8k+9 - round 56390aese v0.16b, v28.16b6391aesmc v0.16b, v0.16b //AES block 8k+8 - round 563926393aese v7.16b, v28.16b6394aesmc v7.16b, v7.16b //AES block 8k+15 - round 56395aese v4.16b, v28.16b6396aesmc v4.16b, v4.16b //AES block 8k+12 - round 56397ldp q26, q27, [x8, #96] //load rk6, rk763986399ldr q23, [x3, #80] //load h3l | h3h6400ext v23.16b, v23.16b, v23.16b, #86401ldr q25, [x3, #112] //load h4l | h4h6402ext v25.16b, v25.16b, v25.16b, #86403pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid6404pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid64056406.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low6407eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid64086409aese v5.16b, v28.16b6410aesmc v5.16b, v5.16b //AES block 8k+13 - round 56411rev64 v15.16b, v15.16b //GHASH block 8k+76412trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid64136414aese v3.16b, v28.16b6415aesmc v3.16b, v3.16b //AES block 8k+11 - round 56416aese v2.16b, v28.16b6417aesmc v2.16b, v2.16b //AES block 8k+10 - round 56418.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid64196420aese v7.16b, v26.16b6421aesmc v7.16b, v7.16b //AES block 8k+15 - round 66422aese v4.16b, v26.16b6423aesmc v4.16b, v4.16b //AES block 8k+12 - round 66424aese v6.16b, v26.16b6425aesmc v6.16b, v6.16b //AES block 8k+14 - round 664266427ldr q21, [x3, #48] //load h2k | h1k6428ldr q24, [x3, #96] //load h4k | h3k6429aese v5.16b, v26.16b6430aesmc v5.16b, v5.16b //AES block 8k+13 - round 66431aese v3.16b, v26.16b6432aesmc v3.16b, v3.16b //AES block 8k+11 - round 664336434aese v0.16b, v26.16b6435aesmc v0.16b, v0.16b //AES block 8k+8 - round 66436aese v1.16b, v26.16b6437aesmc v1.16b, v1.16b //AES block 8k+9 - round 66438aese v2.16b, v26.16b6439aesmc v2.16b, v2.16b //AES block 8k+10 - round 664406441pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high6442pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low6443ldr q20, [x3, #32] //load h1l | h1h6444ext v20.16b, v20.16b, v20.16b, #86445ldr q22, [x3, #64] //load h2l | h2h6446ext v22.16b, v22.16b, v22.16b, #864476448ldp q28, q26, [x8, #128] //load rk8, rk96449aese v1.16b, v27.16b6450aesmc v1.16b, v1.16b //AES block 8k+9 - round 76451aese v4.16b, v27.16b6452aesmc v4.16b, v4.16b //AES block 8k+12 - round 764536454pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high6455trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid64566457aese v5.16b, v27.16b6458aesmc v5.16b, v5.16b //AES block 8k+13 - round 76459aese v6.16b, v27.16b6460aesmc v6.16b, v6.16b //AES block 8k+14 - round 76461pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low64626463aese v7.16b, v27.16b6464aesmc v7.16b, v7.16b //AES block 8k+15 - round 76465aese v3.16b, v27.16b6466aesmc v3.16b, v3.16b //AES block 8k+11 - round 76467eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid64686469pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high6470pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low6471aese v2.16b, v27.16b6472aesmc v2.16b, v2.16b //AES block 8k+10 - round 764736474trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid6475trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid6476aese v0.16b, v27.16b6477aesmc v0.16b, v0.16b //AES block 8k+8 - round 764786479aese v7.16b, v28.16b6480aesmc v7.16b, v7.16b //AES block 8k+15 - round 86481.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low6482aese v2.16b, v28.16b6483aesmc v2.16b, v2.16b //AES block 8k+10 - round 864846485aese v6.16b, v28.16b6486aesmc v6.16b, v6.16b //AES block 8k+14 - round 86487aese v4.16b, v28.16b6488aesmc v4.16b, v4.16b //AES block 8k+12 - round 86489aese v3.16b, v28.16b6490aesmc v3.16b, v3.16b //AES block 8k+11 - round 864916492aese v5.16b, v28.16b6493aesmc v5.16b, v5.16b //AES block 8k+13 - round 86494eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid6495aese v0.16b, v28.16b6496aesmc v0.16b, v0.16b //AES block 8k+8 - round 864976498pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid6499pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid6500aese v1.16b, v28.16b6501aesmc v1.16b, v1.16b //AES block 8k+9 - round 865026503pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high6504pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid6505pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid65066507pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low6508.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid6509.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high65106511ldp q27, q28, [x8, #160] //load rk10, rk116512aese v1.16b, v26.16b6513aesmc v1.16b, v1.16b //AES block 8k+9 - round 96514aese v0.16b, v26.16b6515aesmc v0.16b, v0.16b //AES block 8k+8 - round 965166517.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high6518.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid6519ldr d16, [x10] //MODULO - load modulo constant65206521.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low65226523aese v3.16b, v26.16b6524aesmc v3.16b, v3.16b //AES block 8k+11 - round 96525aese v7.16b, v26.16b6526aesmc v7.16b, v7.16b //AES block 8k+15 - round 96527aese v5.16b, v26.16b6528aesmc v5.16b, v5.16b //AES block 8k+13 - round 965296530aese v2.16b, v26.16b6531aesmc v2.16b, v2.16b //AES block 8k+10 - round 96532aese v6.16b, v26.16b6533aesmc v6.16b, v6.16b //AES block 8k+14 - round 965346535aese v5.16b, v27.16b6536aesmc v5.16b, v5.16b //AES block 8k+13 - round 106537aese v1.16b, v27.16b6538aesmc v1.16b, v1.16b //AES block 8k+9 - round 106539aese v4.16b, v26.16b6540aesmc v4.16b, v4.16b //AES block 8k+12 - round 965416542aese v7.16b, v27.16b6543aesmc v7.16b, v7.16b //AES block 8k+15 - round 106544aese v6.16b, v27.16b6545aesmc v6.16b, v6.16b //AES block 8k+14 - round 106546aese v3.16b, v27.16b6547aesmc v3.16b, v3.16b //AES block 8k+11 - round 1065486549aese v4.16b, v27.16b6550aesmc v4.16b, v4.16b //AES block 8k+12 - round 106551aese v0.16b, v27.16b6552aesmc v0.16b, v0.16b //AES block 8k+8 - round 106553aese v2.16b, v27.16b6554aesmc v2.16b, v2.16b //AES block 8k+10 - round 1065556556pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid6557.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up6558aese v7.16b, v28.16b6559aesmc v7.16b, v7.16b //AES block 8k+15 - round 1165606561ldp q26, q27, [x8, #192] //load rk12, rk136562ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment6563aese v2.16b, v28.16b6564aesmc v2.16b, v2.16b //AES block 8k+10 - round 1165656566.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid6567aese v1.16b, v28.16b6568aesmc v1.16b, v1.16b //AES block 8k+9 - round 116569aese v6.16b, v28.16b6570aesmc v6.16b, v6.16b //AES block 8k+14 - round 1165716572aese v0.16b, v28.16b6573aesmc v0.16b, v0.16b //AES block 8k+8 - round 116574aese v4.16b, v28.16b6575aesmc v4.16b, v4.16b //AES block 8k+12 - round 116576aese v5.16b, v28.16b6577aesmc v5.16b, v5.16b //AES block 8k+13 - round 1165786579pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low6580aese v3.16b, v28.16b6581aesmc v3.16b, v3.16b //AES block 8k+11 - round 116582ldr q28, [x8, #224] //load rk1465836584aese v1.16b, v26.16b6585aesmc v1.16b, v1.16b //AES block 8k+9 - round 126586aese v2.16b, v26.16b6587aesmc v2.16b, v2.16b //AES block 8k+10 - round 126588aese v0.16b, v26.16b6589aesmc v0.16b, v0.16b //AES block 8k+8 - round 1265906591aese v6.16b, v26.16b6592aesmc v6.16b, v6.16b //AES block 8k+14 - round 126593aese v5.16b, v26.16b6594aesmc v5.16b, v5.16b //AES block 8k+13 - round 126595ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment65966597aese v4.16b, v26.16b6598aesmc v4.16b, v4.16b //AES block 8k+12 - round 126599add v30.4s, v30.4s, v31.4s //CTR block 8k+1566006601aese v3.16b, v26.16b6602aesmc v3.16b, v3.16b //AES block 8k+11 - round 126603aese v7.16b, v26.16b6604aesmc v7.16b, v7.16b //AES block 8k+15 - round 126605aese v0.16b, v27.16b //AES block 8k+8 - round 1366066607.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low6608aese v5.16b, v27.16b //AES block 8k+13 - round 136609aese v1.16b, v27.16b //AES block 8k+9 - round 1366106611aese v3.16b, v27.16b //AES block 8k+11 - round 136612aese v4.16b, v27.16b //AES block 8k+12 - round 136613aese v7.16b, v27.16b //AES block 8k+15 - round 1366146615aese v2.16b, v27.16b //AES block 8k+10 - round 136616aese v6.16b, v27.16b //AES block 8k+14 - round 136617.L256_enc_tail: //TAIL66186619ldp q24, q25, [x3, #192] //load h8l | h8h6620ext v25.16b, v25.16b, v25.16b, #86621sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process66226623ldr q8, [x0], #16 //AES block 8k+8 - load plaintext66246625ldp q20, q21, [x3, #128] //load h5l | h5h6626ext v20.16b, v20.16b, v20.16b, #866276628ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag6629ldp q22, q23, [x3, #160] //load h6l | h6h6630ext v22.16b, v22.16b, v22.16b, #86631ext v23.16b, v23.16b, v23.16b, #86632mov v29.16b, v28.16b66336634cmp x5, #1126635.inst 0xce007509 //eor3 v9.16b, v8.16b, v0.16b, v29.16b //AES block 8k+8 - result6636b.gt .L256_enc_blocks_more_than_766376638movi v19.8b, #06639mov v7.16b, v6.16b6640movi v17.8b, #066416642mov v6.16b, v5.16b6643mov v5.16b, v4.16b6644mov v4.16b, v3.16b66456646mov v3.16b, v2.16b6647sub v30.4s, v30.4s, v31.4s6648mov v2.16b, v1.16b66496650movi v18.8b, #06651cmp x5, #966652b.gt .L256_enc_blocks_more_than_666536654mov v7.16b, v6.16b6655mov v6.16b, v5.16b6656cmp x5, #8066576658mov v5.16b, v4.16b6659mov v4.16b, v3.16b6660mov v3.16b, v1.16b66616662sub v30.4s, v30.4s, v31.4s6663b.gt .L256_enc_blocks_more_than_566646665mov v7.16b, v6.16b6666sub v30.4s, v30.4s, v31.4s66676668mov v6.16b, v5.16b6669mov v5.16b, v4.16b66706671cmp x5, #646672mov v4.16b, v1.16b6673b.gt .L256_enc_blocks_more_than_466746675cmp x5, #486676mov v7.16b, v6.16b6677mov v6.16b, v5.16b66786679mov v5.16b, v1.16b6680sub v30.4s, v30.4s, v31.4s6681b.gt .L256_enc_blocks_more_than_366826683cmp x5, #326684mov v7.16b, v6.16b6685ldr q24, [x3, #96] //load h4k | h3k66866687mov v6.16b, v1.16b6688sub v30.4s, v30.4s, v31.4s6689b.gt .L256_enc_blocks_more_than_266906691mov v7.16b, v1.16b66926693sub v30.4s, v30.4s, v31.4s6694cmp x5, #166695b.gt .L256_enc_blocks_more_than_166966697sub v30.4s, v30.4s, v31.4s6698ldr q21, [x3, #48] //load h2k | h1k6699b .L256_enc_blocks_less_than_16700.L256_enc_blocks_more_than_7: //blocks left > 76701st1 { v9.16b}, [x2], #16 //AES final-7 block - store result67026703rev64 v8.16b, v9.16b //GHASH final-7 block67046705eor v8.16b, v8.16b, v16.16b //feed in partial tag67066707ldr q9, [x0], #16 //AES final-6 block - load plaintext67086709pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high6710ins v27.d[0], v8.d[1] //GHASH final-7 block - mid6711ins v18.d[0], v24.d[1] //GHASH final-7 block - mid67126713movi v16.8b, #0 //suppress further partial tag feed in67146715eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid6716.inst 0xce017529 //eor3 v9.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result67176718pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid6719pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low6720.L256_enc_blocks_more_than_6: //blocks left > 667216722st1 { v9.16b}, [x2], #16 //AES final-6 block - store result67236724rev64 v8.16b, v9.16b //GHASH final-6 block67256726eor v8.16b, v8.16b, v16.16b //feed in partial tag67276728pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low6729ins v27.d[0], v8.d[1] //GHASH final-6 block - mid6730pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high67316732ldr q9, [x0], #16 //AES final-5 block - load plaintext67336734eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low67356736eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid67376738pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid6739.inst 0xce027529 //eor3 v9.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result67406741movi v16.8b, #0 //suppress further partial tag feed in67426743eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid6744eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high6745.L256_enc_blocks_more_than_5: //blocks left > 567466747st1 { v9.16b}, [x2], #16 //AES final-5 block - store result67486749rev64 v8.16b, v9.16b //GHASH final-5 block67506751eor v8.16b, v8.16b, v16.16b //feed in partial tag67526753ins v27.d[0], v8.d[1] //GHASH final-5 block - mid67546755pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high67566757eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high6758eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid67596760ins v27.d[1], v27.d[0] //GHASH final-5 block - mid67616762ldr q9, [x0], #16 //AES final-4 block - load plaintext6763pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low67646765pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid6766movi v16.8b, #0 //suppress further partial tag feed in6767eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low67686769eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid6770.inst 0xce037529 //eor3 v9.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result6771.L256_enc_blocks_more_than_4: //blocks left > 467726773st1 { v9.16b}, [x2], #16 //AES final-4 block - store result67746775rev64 v8.16b, v9.16b //GHASH final-4 block67766777ldr q9, [x0], #16 //AES final-3 block - load plaintext67786779eor v8.16b, v8.16b, v16.16b //feed in partial tag67806781ins v27.d[0], v8.d[1] //GHASH final-4 block - mid6782pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high67836784.inst 0xce047529 //eor3 v9.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result6785pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low67866787eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid6788eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low67896790pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid67916792movi v16.8b, #0 //suppress further partial tag feed in67936794eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid6795eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high6796.L256_enc_blocks_more_than_3: //blocks left > 367976798st1 { v9.16b}, [x2], #16 //AES final-3 block - store result67996800ldr q25, [x3, #112] //load h4l | h4h6801ext v25.16b, v25.16b, v25.16b, #86802rev64 v8.16b, v9.16b //GHASH final-3 block68036804eor v8.16b, v8.16b, v16.16b //feed in partial tag68056806ins v27.d[0], v8.d[1] //GHASH final-3 block - mid6807pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high68086809eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high6810eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid6811ldr q24, [x3, #96] //load h4k | h3k68126813ins v27.d[1], v27.d[0] //GHASH final-3 block - mid6814ldr q9, [x0], #16 //AES final-2 block - load plaintext68156816pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid6817pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low68186819.inst 0xce057529 //eor3 v9.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result6820movi v16.8b, #0 //suppress further partial tag feed in68216822eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid6823eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low6824.L256_enc_blocks_more_than_2: //blocks left > 268256826ldr q23, [x3, #80] //load h3l | h3h6827ext v23.16b, v23.16b, v23.16b, #868286829st1 { v9.16b}, [x2], #16 //AES final-2 block - store result68306831rev64 v8.16b, v9.16b //GHASH final-2 block6832ldr q9, [x0], #16 //AES final-1 block - load plaintext68336834eor v8.16b, v8.16b, v16.16b //feed in partial tag68356836ins v27.d[0], v8.d[1] //GHASH final-2 block - mid68376838movi v16.8b, #0 //suppress further partial tag feed in68396840pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high6841.inst 0xce067529 //eor3 v9.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result68426843eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid68446845eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high68466847pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid6848pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low68496850eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid6851eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low6852.L256_enc_blocks_more_than_1: //blocks left > 168536854st1 { v9.16b}, [x2], #16 //AES final-1 block - store result68556856ldr q22, [x3, #64] //load h2l | h2h6857ext v22.16b, v22.16b, v22.16b, #86858rev64 v8.16b, v9.16b //GHASH final-1 block6859ldr q9, [x0], #16 //AES final block - load plaintext68606861eor v8.16b, v8.16b, v16.16b //feed in partial tag6862movi v16.8b, #0 //suppress further partial tag feed in68636864ins v27.d[0], v8.d[1] //GHASH final-1 block - mid6865pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high68666867.inst 0xce077529 //eor3 v9.16b, v9.16b, v7.16b, v29.16b //AES final block - result6868eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high68696870pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low6871eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid68726873ldr q21, [x3, #48] //load h2k | h1k68746875eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low6876ins v27.d[1], v27.d[0] //GHASH final-1 block - mid68776878pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid68796880eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid6881.L256_enc_blocks_less_than_1: //blocks left <= 168826883and x1, x1, #127 //bit_length %= 12868846885sub x1, x1, #128 //bit_length -= 12868866887neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])68886889mvn x6, xzr //temp0_x = 0xffffffffffffffff6890and x1, x1, #127 //bit_length %= 12868916892lsr x6, x6, x1 //temp0_x is mask for top 64b of last block6893cmp x1, #646894mvn x7, xzr //temp1_x = 0xffffffffffffffff68956896csel x14, x6, xzr, lt6897csel x13, x7, x6, lt68986899mov v0.d[0], x13 //ctr0b is mask for last block6900ldr q20, [x3, #32] //load h1l | h1h6901ext v20.16b, v20.16b, v20.16b, #869026903ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored6904mov v0.d[1], x1469056906and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits69076908rev64 v8.16b, v9.16b //GHASH final block69096910rev32 v30.16b, v30.16b6911bif v9.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing6912str q30, [x16] //store the updated counter69136914eor v8.16b, v8.16b, v16.16b //feed in partial tag6915st1 { v9.16b}, [x2] //store all 16B69166917ins v16.d[0], v8.d[1] //GHASH final block - mid6918pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high6919pmull v26.1q, v8.1d, v20.1d //GHASH final block - low69206921eor v17.16b, v17.16b, v28.16b //GHASH final block - high6922eor v19.16b, v19.16b, v26.16b //GHASH final block - low69236924eor v16.8b, v16.8b, v8.8b //GHASH final block - mid69256926pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid69276928eor v18.16b, v18.16b, v16.16b //GHASH final block - mid6929ldr d16, [x10] //MODULO - load modulo constant69306931ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment69326933.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up6934pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid69356936.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid69376938pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low6939ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment69406941.inst 0xce115673 //eor3 v19.16b, v19.16b, v17.16b, v21.16b //MODULO - fold into low6942ext v19.16b, v19.16b, v19.16b, #86943rev64 v19.16b, v19.16b6944st1 { v19.16b }, [x3]6945mov x0, x9 //return sizes69466947ldp d10, d11, [sp, #16]6948ldp d12, d13, [sp, #32]6949ldp d14, d15, [sp, #48]6950ldp d8, d9, [sp], #806951ret69526953.L256_enc_ret:6954mov w0, #0x06955ret6956.size unroll8_eor3_aes_gcm_enc_256_kernel,.-unroll8_eor3_aes_gcm_enc_256_kernel6957.globl unroll8_eor3_aes_gcm_dec_256_kernel6958.type unroll8_eor3_aes_gcm_dec_256_kernel,%function6959.align 46960unroll8_eor3_aes_gcm_dec_256_kernel:6961AARCH64_VALID_CALL_TARGET6962cbz x1, .L256_dec_ret6963stp d8, d9, [sp, #-80]!6964lsr x9, x1, #36965mov x16, x46966mov x8, x56967stp d10, d11, [sp, #16]6968stp d12, d13, [sp, #32]6969stp d14, d15, [sp, #48]6970mov x5, #0xc2000000000000006971stp x5, xzr, [sp, #64]6972add x10, sp, #6469736974ld1 { v0.16b}, [x16] //CTR block 069756976mov x15, #0x100000000 //set up counter increment6977movi v31.16b, #0x06978mov v31.d[1], x156979mov x5, x969806981sub x5, x5, #1 //byte_len - 169826983rev32 v30.16b, v0.16b //set up reversed counter69846985add v30.4s, v30.4s, v31.4s //CTR block 069866987rev32 v1.16b, v30.16b //CTR block 16988add v30.4s, v30.4s, v31.4s //CTR block 169896990rev32 v2.16b, v30.16b //CTR block 26991add v30.4s, v30.4s, v31.4s //CTR block 26992ldp q26, q27, [x8, #0] //load rk0, rk169936994rev32 v3.16b, v30.16b //CTR block 36995add v30.4s, v30.4s, v31.4s //CTR block 369966997rev32 v4.16b, v30.16b //CTR block 46998add v30.4s, v30.4s, v31.4s //CTR block 469997000aese v0.16b, v26.16b7001aesmc v0.16b, v0.16b //AES block 0 - round 070027003rev32 v5.16b, v30.16b //CTR block 57004add v30.4s, v30.4s, v31.4s //CTR block 570057006aese v1.16b, v26.16b7007aesmc v1.16b, v1.16b //AES block 1 - round 07008aese v2.16b, v26.16b7009aesmc v2.16b, v2.16b //AES block 2 - round 070107011rev32 v6.16b, v30.16b //CTR block 67012add v30.4s, v30.4s, v31.4s //CTR block 670137014rev32 v7.16b, v30.16b //CTR block 77015aese v4.16b, v26.16b7016aesmc v4.16b, v4.16b //AES block 4 - round 070177018aese v6.16b, v26.16b7019aesmc v6.16b, v6.16b //AES block 6 - round 07020aese v5.16b, v26.16b7021aesmc v5.16b, v5.16b //AES block 5 - round 070227023aese v3.16b, v26.16b7024aesmc v3.16b, v3.16b //AES block 3 - round 07025aese v7.16b, v26.16b7026aesmc v7.16b, v7.16b //AES block 7 - round 07027ldp q28, q26, [x8, #32] //load rk2, rk370287029aese v6.16b, v27.16b7030aesmc v6.16b, v6.16b //AES block 6 - round 17031aese v4.16b, v27.16b7032aesmc v4.16b, v4.16b //AES block 4 - round 17033aese v0.16b, v27.16b7034aesmc v0.16b, v0.16b //AES block 0 - round 170357036aese v5.16b, v27.16b7037aesmc v5.16b, v5.16b //AES block 5 - round 17038aese v7.16b, v27.16b7039aesmc v7.16b, v7.16b //AES block 7 - round 17040aese v1.16b, v27.16b7041aesmc v1.16b, v1.16b //AES block 1 - round 170427043aese v2.16b, v27.16b7044aesmc v2.16b, v2.16b //AES block 2 - round 17045aese v3.16b, v27.16b7046aesmc v3.16b, v3.16b //AES block 3 - round 170477048aese v3.16b, v28.16b7049aesmc v3.16b, v3.16b //AES block 3 - round 27050aese v2.16b, v28.16b7051aesmc v2.16b, v2.16b //AES block 2 - round 27052aese v6.16b, v28.16b7053aesmc v6.16b, v6.16b //AES block 6 - round 270547055aese v1.16b, v28.16b7056aesmc v1.16b, v1.16b //AES block 1 - round 27057aese v7.16b, v28.16b7058aesmc v7.16b, v7.16b //AES block 7 - round 27059aese v5.16b, v28.16b7060aesmc v5.16b, v5.16b //AES block 5 - round 270617062aese v0.16b, v28.16b7063aesmc v0.16b, v0.16b //AES block 0 - round 27064aese v4.16b, v28.16b7065aesmc v4.16b, v4.16b //AES block 4 - round 27066ldp q27, q28, [x8, #64] //load rk4, rk570677068aese v1.16b, v26.16b7069aesmc v1.16b, v1.16b //AES block 1 - round 37070aese v2.16b, v26.16b7071aesmc v2.16b, v2.16b //AES block 2 - round 370727073aese v3.16b, v26.16b7074aesmc v3.16b, v3.16b //AES block 3 - round 37075aese v4.16b, v26.16b7076aesmc v4.16b, v4.16b //AES block 4 - round 370777078aese v5.16b, v26.16b7079aesmc v5.16b, v5.16b //AES block 5 - round 37080aese v7.16b, v26.16b7081aesmc v7.16b, v7.16b //AES block 7 - round 37082aese v0.16b, v26.16b7083aesmc v0.16b, v0.16b //AES block 0 - round 370847085aese v6.16b, v26.16b7086aesmc v6.16b, v6.16b //AES block 6 - round 370877088aese v7.16b, v27.16b7089aesmc v7.16b, v7.16b //AES block 7 - round 47090aese v3.16b, v27.16b7091aesmc v3.16b, v3.16b //AES block 3 - round 470927093aese v6.16b, v27.16b7094aesmc v6.16b, v6.16b //AES block 6 - round 47095aese v2.16b, v27.16b7096aesmc v2.16b, v2.16b //AES block 2 - round 47097aese v0.16b, v27.16b7098aesmc v0.16b, v0.16b //AES block 0 - round 470997100aese v4.16b, v27.16b7101aesmc v4.16b, v4.16b //AES block 4 - round 47102aese v1.16b, v27.16b7103aesmc v1.16b, v1.16b //AES block 1 - round 47104aese v5.16b, v27.16b7105aesmc v5.16b, v5.16b //AES block 5 - round 471067107aese v0.16b, v28.16b7108aesmc v0.16b, v0.16b //AES block 0 - round 57109aese v6.16b, v28.16b7110aesmc v6.16b, v6.16b //AES block 6 - round 571117112ldp q26, q27, [x8, #96] //load rk6, rk77113aese v4.16b, v28.16b7114aesmc v4.16b, v4.16b //AES block 4 - round 57115aese v7.16b, v28.16b7116aesmc v7.16b, v7.16b //AES block 7 - round 571177118aese v5.16b, v28.16b7119aesmc v5.16b, v5.16b //AES block 5 - round 571207121aese v2.16b, v28.16b7122aesmc v2.16b, v2.16b //AES block 2 - round 57123aese v3.16b, v28.16b7124aesmc v3.16b, v3.16b //AES block 3 - round 571257126aese v1.16b, v28.16b7127aesmc v1.16b, v1.16b //AES block 1 - round 571287129aese v4.16b, v26.16b7130aesmc v4.16b, v4.16b //AES block 4 - round 67131aese v3.16b, v26.16b7132aesmc v3.16b, v3.16b //AES block 3 - round 67133aese v7.16b, v26.16b7134aesmc v7.16b, v7.16b //AES block 7 - round 671357136aese v6.16b, v26.16b7137aesmc v6.16b, v6.16b //AES block 6 - round 67138aese v0.16b, v26.16b7139aesmc v0.16b, v0.16b //AES block 0 - round 67140aese v5.16b, v26.16b7141aesmc v5.16b, v5.16b //AES block 5 - round 671427143aese v2.16b, v26.16b7144aesmc v2.16b, v2.16b //AES block 2 - round 67145aese v1.16b, v26.16b7146aesmc v1.16b, v1.16b //AES block 1 - round 67147ldp q28, q26, [x8, #128] //load rk8, rk971487149aese v5.16b, v27.16b7150aesmc v5.16b, v5.16b //AES block 5 - round 77151aese v0.16b, v27.16b7152aesmc v0.16b, v0.16b //AES block 0 - round 771537154aese v3.16b, v27.16b7155aesmc v3.16b, v3.16b //AES block 3 - round 77156aese v2.16b, v27.16b7157aesmc v2.16b, v2.16b //AES block 2 - round 77158aese v7.16b, v27.16b7159aesmc v7.16b, v7.16b //AES block 7 - round 771607161aese v4.16b, v27.16b7162aesmc v4.16b, v4.16b //AES block 4 - round 77163aese v1.16b, v27.16b7164aesmc v1.16b, v1.16b //AES block 1 - round 77165aese v6.16b, v27.16b7166aesmc v6.16b, v6.16b //AES block 6 - round 771677168and x5, x5, #0xffffffffffffff80 //number of bytes to be processed in main loop (at least 1 byte must be handled by tail)7169aese v7.16b, v28.16b7170aesmc v7.16b, v7.16b //AES block 7 - round 87171aese v5.16b, v28.16b7172aesmc v5.16b, v5.16b //AES block 5 - round 871737174aese v0.16b, v28.16b7175aesmc v0.16b, v0.16b //AES block 0 - round 87176aese v1.16b, v28.16b7177aesmc v1.16b, v1.16b //AES block 1 - round 87178aese v2.16b, v28.16b7179aesmc v2.16b, v2.16b //AES block 2 - round 871807181aese v4.16b, v28.16b7182aesmc v4.16b, v4.16b //AES block 4 - round 87183aese v3.16b, v28.16b7184aesmc v3.16b, v3.16b //AES block 3 - round 87185aese v6.16b, v28.16b7186aesmc v6.16b, v6.16b //AES block 6 - round 871877188aese v2.16b, v26.16b7189aesmc v2.16b, v2.16b //AES block 2 - round 971907191ld1 { v19.16b}, [x3]7192ext v19.16b, v19.16b, v19.16b, #87193rev64 v19.16b, v19.16b7194ldp q27, q28, [x8, #160] //load rk10, rk117195add x4, x0, x1, lsr #3 //end_input_ptr7196add x5, x5, x071977198aese v3.16b, v26.16b7199aesmc v3.16b, v3.16b //AES block 3 - round 97200aese v6.16b, v26.16b7201aesmc v6.16b, v6.16b //AES block 6 - round 972027203aese v4.16b, v26.16b7204aesmc v4.16b, v4.16b //AES block 4 - round 97205aese v5.16b, v26.16b7206aesmc v5.16b, v5.16b //AES block 5 - round 972077208aese v7.16b, v26.16b7209aesmc v7.16b, v7.16b //AES block 7 - round 972107211aese v0.16b, v26.16b7212aesmc v0.16b, v0.16b //AES block 0 - round 97213aese v1.16b, v26.16b7214aesmc v1.16b, v1.16b //AES block 1 - round 972157216aese v4.16b, v27.16b7217aesmc v4.16b, v4.16b //AES block 4 - round 107218aese v7.16b, v27.16b7219aesmc v7.16b, v7.16b //AES block 7 - round 107220aese v5.16b, v27.16b7221aesmc v5.16b, v5.16b //AES block 5 - round 1072227223aese v1.16b, v27.16b7224aesmc v1.16b, v1.16b //AES block 1 - round 107225aese v2.16b, v27.16b7226aesmc v2.16b, v2.16b //AES block 2 - round 107227aese v0.16b, v27.16b7228aesmc v0.16b, v0.16b //AES block 0 - round 1072297230aese v6.16b, v27.16b7231aesmc v6.16b, v6.16b //AES block 6 - round 107232aese v3.16b, v27.16b7233aesmc v3.16b, v3.16b //AES block 3 - round 107234ldp q26, q27, [x8, #192] //load rk12, rk1372357236aese v0.16b, v28.16b7237aesmc v0.16b, v0.16b //AES block 0 - round 117238add v30.4s, v30.4s, v31.4s //CTR block 772397240aese v7.16b, v28.16b7241aesmc v7.16b, v7.16b //AES block 7 - round 117242aese v3.16b, v28.16b7243aesmc v3.16b, v3.16b //AES block 3 - round 117244aese v1.16b, v28.16b7245aesmc v1.16b, v1.16b //AES block 1 - round 1172467247aese v5.16b, v28.16b7248aesmc v5.16b, v5.16b //AES block 5 - round 117249aese v4.16b, v28.16b7250aesmc v4.16b, v4.16b //AES block 4 - round 117251aese v2.16b, v28.16b7252aesmc v2.16b, v2.16b //AES block 2 - round 1172537254aese v6.16b, v28.16b7255aesmc v6.16b, v6.16b //AES block 6 - round 117256ldr q28, [x8, #224] //load rk1472577258aese v1.16b, v26.16b7259aesmc v1.16b, v1.16b //AES block 1 - round 127260aese v4.16b, v26.16b7261aesmc v4.16b, v4.16b //AES block 4 - round 127262aese v5.16b, v26.16b7263aesmc v5.16b, v5.16b //AES block 5 - round 1272647265cmp x0, x5 //check if we have <= 8 blocks7266aese v3.16b, v26.16b7267aesmc v3.16b, v3.16b //AES block 3 - round 127268aese v2.16b, v26.16b7269aesmc v2.16b, v2.16b //AES block 2 - round 1272707271aese v6.16b, v26.16b7272aesmc v6.16b, v6.16b //AES block 6 - round 127273aese v0.16b, v26.16b7274aesmc v0.16b, v0.16b //AES block 0 - round 127275aese v7.16b, v26.16b7276aesmc v7.16b, v7.16b //AES block 7 - round 1272777278aese v5.16b, v27.16b //AES block 5 - round 137279aese v1.16b, v27.16b //AES block 1 - round 137280aese v2.16b, v27.16b //AES block 2 - round 1372817282aese v0.16b, v27.16b //AES block 0 - round 137283aese v4.16b, v27.16b //AES block 4 - round 137284aese v6.16b, v27.16b //AES block 6 - round 1372857286aese v3.16b, v27.16b //AES block 3 - round 137287aese v7.16b, v27.16b //AES block 7 - round 137288b.ge .L256_dec_tail //handle tail72897290ldp q8, q9, [x0], #32 //AES block 0, 1 - load ciphertext72917292ldp q10, q11, [x0], #32 //AES block 2, 3 - load ciphertext72937294ldp q12, q13, [x0], #32 //AES block 4, 5 - load ciphertext72957296ldp q14, q15, [x0], #32 //AES block 6, 7 - load ciphertext7297cmp x0, x5 //check if we have <= 8 blocks72987299.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 1 - result7300.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 0 - result7301stp q0, q1, [x2], #32 //AES block 0, 1 - store result73027303rev32 v0.16b, v30.16b //CTR block 87304add v30.4s, v30.4s, v31.4s //CTR block 87305.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 3 - result73067307.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 5 - result73087309.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 4 - result7310rev32 v1.16b, v30.16b //CTR block 97311add v30.4s, v30.4s, v31.4s //CTR block 973127313.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 2 - result7314stp q2, q3, [x2], #32 //AES block 2, 3 - store result73157316rev32 v2.16b, v30.16b //CTR block 107317add v30.4s, v30.4s, v31.4s //CTR block 1073187319.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 6 - result73207321rev32 v3.16b, v30.16b //CTR block 117322add v30.4s, v30.4s, v31.4s //CTR block 117323stp q4, q5, [x2], #32 //AES block 4, 5 - store result73247325.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 7 - result7326stp q6, q7, [x2], #32 //AES block 6, 7 - store result73277328rev32 v4.16b, v30.16b //CTR block 127329add v30.4s, v30.4s, v31.4s //CTR block 127330b.ge .L256_dec_prepretail //do prepretail73317332.L256_dec_main_loop: //main loop start7333rev32 v5.16b, v30.16b //CTR block 8k+137334ldp q26, q27, [x8, #0] //load rk0, rk17335add v30.4s, v30.4s, v31.4s //CTR block 8k+1373367337rev64 v9.16b, v9.16b //GHASH block 8k+17338ldr q23, [x3, #176] //load h7l | h7h7339ext v23.16b, v23.16b, v23.16b, #87340ldr q25, [x3, #208] //load h8l | h8h7341ext v25.16b, v25.16b, v25.16b, #873427343rev32 v6.16b, v30.16b //CTR block 8k+147344add v30.4s, v30.4s, v31.4s //CTR block 8k+147345rev64 v8.16b, v8.16b //GHASH block 8k73467347ext v19.16b, v19.16b, v19.16b, #8 //PRE 07348rev64 v12.16b, v12.16b //GHASH block 8k+47349rev64 v11.16b, v11.16b //GHASH block 8k+373507351rev32 v7.16b, v30.16b //CTR block 8k+157352rev64 v15.16b, v15.16b //GHASH block 8k+773537354aese v3.16b, v26.16b7355aesmc v3.16b, v3.16b //AES block 8k+11 - round 07356aese v6.16b, v26.16b7357aesmc v6.16b, v6.16b //AES block 8k+14 - round 07358aese v2.16b, v26.16b7359aesmc v2.16b, v2.16b //AES block 8k+10 - round 073607361aese v7.16b, v26.16b7362aesmc v7.16b, v7.16b //AES block 8k+15 - round 07363aese v0.16b, v26.16b7364aesmc v0.16b, v0.16b //AES block 8k+8 - round 07365aese v5.16b, v26.16b7366aesmc v5.16b, v5.16b //AES block 8k+13 - round 073677368aese v4.16b, v26.16b7369aesmc v4.16b, v4.16b //AES block 8k+12 - round 07370aese v1.16b, v26.16b7371aesmc v1.16b, v1.16b //AES block 8k+9 - round 07372ldp q28, q26, [x8, #32] //load rk2, rk373737374eor v8.16b, v8.16b, v19.16b //PRE 17375ldr q20, [x3, #128] //load h5l | h5h7376ext v20.16b, v20.16b, v20.16b, #87377ldr q22, [x3, #160] //load h6l | h6h7378ext v22.16b, v22.16b, v22.16b, #87379aese v6.16b, v27.16b7380aesmc v6.16b, v6.16b //AES block 8k+14 - round 173817382aese v4.16b, v27.16b7383aesmc v4.16b, v4.16b //AES block 8k+12 - round 17384rev64 v10.16b, v10.16b //GHASH block 8k+27385aese v3.16b, v27.16b7386aesmc v3.16b, v3.16b //AES block 8k+11 - round 173877388aese v0.16b, v27.16b7389aesmc v0.16b, v0.16b //AES block 8k+8 - round 17390aese v5.16b, v27.16b7391aesmc v5.16b, v5.16b //AES block 8k+13 - round 17392aese v2.16b, v27.16b7393aesmc v2.16b, v2.16b //AES block 8k+10 - round 173947395trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid7396aese v7.16b, v27.16b7397aesmc v7.16b, v7.16b //AES block 8k+15 - round 17398aese v1.16b, v27.16b7399aesmc v1.16b, v1.16b //AES block 8k+9 - round 174007401aese v4.16b, v28.16b7402aesmc v4.16b, v4.16b //AES block 8k+12 - round 27403aese v0.16b, v28.16b7404aesmc v0.16b, v0.16b //AES block 8k+8 - round 27405aese v3.16b, v28.16b7406aesmc v3.16b, v3.16b //AES block 8k+11 - round 274077408aese v6.16b, v28.16b7409aesmc v6.16b, v6.16b //AES block 8k+14 - round 27410aese v7.16b, v28.16b7411aesmc v7.16b, v7.16b //AES block 8k+15 - round 27412pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low74137414aese v5.16b, v28.16b7415aesmc v5.16b, v5.16b //AES block 8k+13 - round 27416aese v2.16b, v28.16b7417aesmc v2.16b, v2.16b //AES block 8k+10 - round 27418aese v1.16b, v28.16b7419aesmc v1.16b, v1.16b //AES block 8k+9 - round 274207421ldp q27, q28, [x8, #64] //load rk4, rk57422pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high7423aese v3.16b, v26.16b7424aesmc v3.16b, v3.16b //AES block 8k+11 - round 374257426aese v0.16b, v26.16b7427aesmc v0.16b, v0.16b //AES block 8k+8 - round 37428pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high7429pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low74307431aese v5.16b, v26.16b7432aesmc v5.16b, v5.16b //AES block 8k+13 - round 37433aese v6.16b, v26.16b7434aesmc v6.16b, v6.16b //AES block 8k+14 - round 37435pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high74367437aese v4.16b, v26.16b7438aesmc v4.16b, v4.16b //AES block 8k+12 - round 37439aese v1.16b, v26.16b7440aesmc v1.16b, v1.16b //AES block 8k+9 - round 37441trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid74427443pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high7444aese v2.16b, v26.16b7445aesmc v2.16b, v2.16b //AES block 8k+10 - round 37446eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high74477448aese v5.16b, v27.16b7449aesmc v5.16b, v5.16b //AES block 8k+13 - round 47450aese v7.16b, v26.16b7451aesmc v7.16b, v7.16b //AES block 8k+15 - round 37452aese v3.16b, v27.16b7453aesmc v3.16b, v3.16b //AES block 8k+11 - round 474547455aese v2.16b, v27.16b7456aesmc v2.16b, v2.16b //AES block 8k+10 - round 47457aese v0.16b, v27.16b7458aesmc v0.16b, v0.16b //AES block 8k+8 - round 47459aese v1.16b, v27.16b7460aesmc v1.16b, v1.16b //AES block 8k+9 - round 474617462aese v6.16b, v27.16b7463aesmc v6.16b, v6.16b //AES block 8k+14 - round 47464aese v7.16b, v27.16b7465aesmc v7.16b, v7.16b //AES block 8k+15 - round 47466aese v4.16b, v27.16b7467aesmc v4.16b, v4.16b //AES block 8k+12 - round 474687469ldr q21, [x3, #144] //load h6k | h5k7470ldr q24, [x3, #192] //load h8k | h7k7471eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid7472pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low74737474ldp q26, q27, [x8, #96] //load rk6, rk77475aese v5.16b, v28.16b7476aesmc v5.16b, v5.16b //AES block 8k+13 - round 57477eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low74787479aese v0.16b, v28.16b7480aesmc v0.16b, v0.16b //AES block 8k+8 - round 57481aese v3.16b, v28.16b7482aesmc v3.16b, v3.16b //AES block 8k+11 - round 57483aese v7.16b, v28.16b7484aesmc v7.16b, v7.16b //AES block 8k+15 - round 574857486aese v1.16b, v28.16b7487aesmc v1.16b, v1.16b //AES block 8k+9 - round 57488aese v2.16b, v28.16b7489aesmc v2.16b, v2.16b //AES block 8k+10 - round 57490aese v6.16b, v28.16b7491aesmc v6.16b, v6.16b //AES block 8k+14 - round 574927493.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high7494trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid7495rev64 v13.16b, v13.16b //GHASH block 8k+574967497pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid7498pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid7499trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid75007501aese v3.16b, v26.16b7502aesmc v3.16b, v3.16b //AES block 8k+11 - round 67503aese v0.16b, v26.16b7504aesmc v0.16b, v0.16b //AES block 8k+8 - round 67505aese v4.16b, v28.16b7506aesmc v4.16b, v4.16b //AES block 8k+12 - round 575077508trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid7509aese v1.16b, v26.16b7510aesmc v1.16b, v1.16b //AES block 8k+9 - round 67511aese v6.16b, v26.16b7512aesmc v6.16b, v6.16b //AES block 8k+14 - round 675137514eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid7515pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low7516aese v4.16b, v26.16b7517aesmc v4.16b, v4.16b //AES block 8k+12 - round 675187519aese v2.16b, v26.16b7520aesmc v2.16b, v2.16b //AES block 8k+10 - round 67521aese v5.16b, v26.16b7522aesmc v5.16b, v5.16b //AES block 8k+13 - round 67523aese v7.16b, v26.16b7524aesmc v7.16b, v7.16b //AES block 8k+15 - round 675257526pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid7527pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid7528.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low75297530ldr q23, [x3, #80] //load h3l | h3h7531ext v23.16b, v23.16b, v23.16b, #87532ldr q25, [x3, #112] //load h4l | h4h7533ext v25.16b, v25.16b, v25.16b, #87534rev64 v14.16b, v14.16b //GHASH block 8k+67535eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid75367537aese v2.16b, v27.16b7538aesmc v2.16b, v2.16b //AES block 8k+10 - round 77539aese v5.16b, v27.16b7540aesmc v5.16b, v5.16b //AES block 8k+13 - round 77541ldp q28, q26, [x8, #128] //load rk8, rk975427543ldr q20, [x3, #32] //load h1l | h1h7544ext v20.16b, v20.16b, v20.16b, #87545ldr q22, [x3, #64] //load h2l | h2h7546ext v22.16b, v22.16b, v22.16b, #87547.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid7548aese v7.16b, v27.16b7549aesmc v7.16b, v7.16b //AES block 8k+15 - round 775507551aese v1.16b, v27.16b7552aesmc v1.16b, v1.16b //AES block 8k+9 - round 77553aese v3.16b, v27.16b7554aesmc v3.16b, v3.16b //AES block 8k+11 - round 77555aese v6.16b, v27.16b7556aesmc v6.16b, v6.16b //AES block 8k+14 - round 775577558ldr q21, [x3, #48] //load h2k | h1k7559ldr q24, [x3, #96] //load h4k | h3k7560aese v0.16b, v27.16b7561aesmc v0.16b, v0.16b //AES block 8k+8 - round 77562aese v4.16b, v27.16b7563aesmc v4.16b, v4.16b //AES block 8k+12 - round 775647565pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high7566pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low7567trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid75687569aese v5.16b, v28.16b7570aesmc v5.16b, v5.16b //AES block 8k+13 - round 87571pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high7572aese v2.16b, v28.16b7573aesmc v2.16b, v2.16b //AES block 8k+10 - round 875747575aese v6.16b, v28.16b7576aesmc v6.16b, v6.16b //AES block 8k+14 - round 87577pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low7578aese v1.16b, v28.16b7579aesmc v1.16b, v1.16b //AES block 8k+9 - round 875807581aese v4.16b, v28.16b7582aesmc v4.16b, v4.16b //AES block 8k+12 - round 87583aese v0.16b, v28.16b7584aesmc v0.16b, v0.16b //AES block 8k+8 - round 87585pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high75867587trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid7588aese v3.16b, v28.16b7589aesmc v3.16b, v3.16b //AES block 8k+11 - round 87590aese v7.16b, v28.16b7591aesmc v7.16b, v7.16b //AES block 8k+15 - round 875927593ldp q27, q28, [x8, #160] //load rk10, rk117594pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low7595trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid75967597add v30.4s, v30.4s, v31.4s //CTR block 8k+157598.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high7599aese v3.16b, v26.16b7600aesmc v3.16b, v3.16b //AES block 8k+11 - round 976017602aese v6.16b, v26.16b7603aesmc v6.16b, v6.16b //AES block 8k+14 - round 97604eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid7605aese v5.16b, v26.16b7606aesmc v5.16b, v5.16b //AES block 8k+13 - round 976077608ldp q8, q9, [x0], #32 //AES block 8k+8, 8k+9 - load ciphertext7609eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid7610aese v7.16b, v26.16b7611aesmc v7.16b, v7.16b //AES block 8k+15 - round 976127613pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid7614aese v2.16b, v26.16b7615aesmc v2.16b, v2.16b //AES block 8k+10 - round 97616aese v1.16b, v26.16b7617aesmc v1.16b, v1.16b //AES block 8k+9 - round 976187619pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid7620pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid7621pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high76227623pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low7624aese v3.16b, v27.16b7625aesmc v3.16b, v3.16b //AES block 8k+11 - round 107626aese v6.16b, v27.16b7627aesmc v6.16b, v6.16b //AES block 8k+14 - round 1076287629pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid7630aese v0.16b, v26.16b7631aesmc v0.16b, v0.16b //AES block 8k+8 - round 97632.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low76337634aese v4.16b, v26.16b7635aesmc v4.16b, v4.16b //AES block 8k+12 - round 97636.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid7637.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high76387639aese v2.16b, v27.16b7640aesmc v2.16b, v2.16b //AES block 8k+10 - round 107641aese v5.16b, v27.16b7642aesmc v5.16b, v5.16b //AES block 8k+13 - round 107643aese v7.16b, v27.16b7644aesmc v7.16b, v7.16b //AES block 8k+15 - round 1076457646aese v1.16b, v27.16b7647aesmc v1.16b, v1.16b //AES block 8k+9 - round 107648aese v0.16b, v27.16b7649aesmc v0.16b, v0.16b //AES block 8k+8 - round 107650aese v4.16b, v27.16b7651aesmc v4.16b, v4.16b //AES block 8k+12 - round 1076527653.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low7654rev32 v20.16b, v30.16b //CTR block 8k+167655ldr d16, [x10] //MODULO - load modulo constant76567657add v30.4s, v30.4s, v31.4s //CTR block 8k+167658aese v1.16b, v28.16b7659aesmc v1.16b, v1.16b //AES block 8k+9 - round 117660ldp q26, q27, [x8, #192] //load rk12, rk1376617662aese v0.16b, v28.16b7663aesmc v0.16b, v0.16b //AES block 8k+8 - round 117664aese v6.16b, v28.16b7665aesmc v6.16b, v6.16b //AES block 8k+14 - round 1176667667.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid7668rev32 v22.16b, v30.16b //CTR block 8k+177669aese v2.16b, v28.16b7670aesmc v2.16b, v2.16b //AES block 8k+10 - round 1176717672ldp q10, q11, [x0], #32 //AES block 8k+10, 8k+11 - load ciphertext7673aese v7.16b, v28.16b7674aesmc v7.16b, v7.16b //AES block 8k+15 - round 117675ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment76767677aese v5.16b, v28.16b7678aesmc v5.16b, v5.16b //AES block 8k+13 - round 117679add v30.4s, v30.4s, v31.4s //CTR block 8k+177680aese v3.16b, v28.16b7681aesmc v3.16b, v3.16b //AES block 8k+11 - round 1176827683aese v2.16b, v26.16b7684aesmc v2.16b, v2.16b //AES block 8k+10 - round 127685aese v7.16b, v26.16b7686aesmc v7.16b, v7.16b //AES block 8k+15 - round 127687aese v6.16b, v26.16b7688aesmc v6.16b, v6.16b //AES block 8k+14 - round 1276897690rev32 v23.16b, v30.16b //CTR block 8k+187691add v30.4s, v30.4s, v31.4s //CTR block 8k+187692pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid76937694.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up7695aese v1.16b, v26.16b7696aesmc v1.16b, v1.16b //AES block 8k+9 - round 127697aese v4.16b, v28.16b7698aesmc v4.16b, v4.16b //AES block 8k+12 - round 1176997700ldr q28, [x8, #224] //load rk147701aese v5.16b, v26.16b7702aesmc v5.16b, v5.16b //AES block 8k+13 - round 127703aese v3.16b, v26.16b7704aesmc v3.16b, v3.16b //AES block 8k+11 - round 1277057706.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid7707aese v0.16b, v26.16b7708aesmc v0.16b, v0.16b //AES block 8k+8 - round 127709aese v4.16b, v26.16b7710aesmc v4.16b, v4.16b //AES block 8k+12 - round 1277117712ldp q12, q13, [x0], #32 //AES block 8k+12, 8k+13 - load ciphertext7713aese v1.16b, v27.16b //AES block 8k+9 - round 137714aese v2.16b, v27.16b //AES block 8k+10 - round 1377157716ldp q14, q15, [x0], #32 //AES block 8k+14, 8k+15 - load ciphertext7717aese v0.16b, v27.16b //AES block 8k+8 - round 137718aese v5.16b, v27.16b //AES block 8k+13 - round 1377197720rev32 v25.16b, v30.16b //CTR block 8k+197721.inst 0xce027142 //eor3 v2.16b, v10.16b, v2.16b, v28.16b //AES block 8k+10 - result7722.inst 0xce017121 //eor3 v1.16b, v9.16b, v1.16b, v28.16b //AES block 8k+9 - result77237724ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment7725aese v7.16b, v27.16b //AES block 8k+15 - round 1377267727add v30.4s, v30.4s, v31.4s //CTR block 8k+197728pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low7729aese v4.16b, v27.16b //AES block 8k+12 - round 1377307731.inst 0xce0571a5 //eor3 v5.16b, v13.16b, v5.16b, v28.16b //AES block 8k+13 - result7732.inst 0xce007100 //eor3 v0.16b, v8.16b, v0.16b, v28.16b //AES block 8k+8 - result7733aese v3.16b, v27.16b //AES block 8k+11 - round 1377347735stp q0, q1, [x2], #32 //AES block 8k+8, 8k+9 - store result7736mov v0.16b, v20.16b //CTR block 8k+167737.inst 0xce047184 //eor3 v4.16b, v12.16b, v4.16b, v28.16b //AES block 8k+12 - result77387739.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low7740.inst 0xce037163 //eor3 v3.16b, v11.16b, v3.16b, v28.16b //AES block 8k+11 - result7741stp q2, q3, [x2], #32 //AES block 8k+10, 8k+11 - store result77427743mov v3.16b, v25.16b //CTR block 8k+197744mov v2.16b, v23.16b //CTR block 8k+187745aese v6.16b, v27.16b //AES block 8k+14 - round 1377467747mov v1.16b, v22.16b //CTR block 8k+177748stp q4, q5, [x2], #32 //AES block 8k+12, 8k+13 - store result7749.inst 0xce0771e7 //eor3 v7.16b, v15.16b, v7.16b, v28.16b //AES block 8k+15 - result77507751.inst 0xce0671c6 //eor3 v6.16b, v14.16b, v6.16b, v28.16b //AES block 8k+14 - result7752rev32 v4.16b, v30.16b //CTR block 8k+207753add v30.4s, v30.4s, v31.4s //CTR block 8k+2077547755cmp x0, x5 //.LOOP CONTROL7756stp q6, q7, [x2], #32 //AES block 8k+14, 8k+15 - store result7757b.lt .L256_dec_main_loop77587759.L256_dec_prepretail: //PREPRETAIL7760ldp q26, q27, [x8, #0] //load rk0, rk17761rev32 v5.16b, v30.16b //CTR block 8k+137762add v30.4s, v30.4s, v31.4s //CTR block 8k+1377637764rev64 v12.16b, v12.16b //GHASH block 8k+47765ldr q21, [x3, #144] //load h6k | h5k7766ldr q24, [x3, #192] //load h8k | h7k77677768rev32 v6.16b, v30.16b //CTR block 8k+147769rev64 v8.16b, v8.16b //GHASH block 8k7770add v30.4s, v30.4s, v31.4s //CTR block 8k+1477717772ext v19.16b, v19.16b, v19.16b, #8 //PRE 07773ldr q23, [x3, #176] //load h7l | h7h7774ext v23.16b, v23.16b, v23.16b, #87775ldr q25, [x3, #208] //load h8l | h8h7776ext v25.16b, v25.16b, v25.16b, #87777rev64 v9.16b, v9.16b //GHASH block 8k+177787779rev32 v7.16b, v30.16b //CTR block 8k+157780rev64 v10.16b, v10.16b //GHASH block 8k+27781ldr q20, [x3, #128] //load h5l | h5h7782ext v20.16b, v20.16b, v20.16b, #87783ldr q22, [x3, #160] //load h6l | h6h7784ext v22.16b, v22.16b, v22.16b, #877857786aese v0.16b, v26.16b7787aesmc v0.16b, v0.16b //AES block 8k+8 - round 07788aese v1.16b, v26.16b7789aesmc v1.16b, v1.16b //AES block 8k+9 - round 07790aese v4.16b, v26.16b7791aesmc v4.16b, v4.16b //AES block 8k+12 - round 077927793aese v3.16b, v26.16b7794aesmc v3.16b, v3.16b //AES block 8k+11 - round 07795aese v5.16b, v26.16b7796aesmc v5.16b, v5.16b //AES block 8k+13 - round 07797aese v6.16b, v26.16b7798aesmc v6.16b, v6.16b //AES block 8k+14 - round 077997800aese v4.16b, v27.16b7801aesmc v4.16b, v4.16b //AES block 8k+12 - round 17802aese v7.16b, v26.16b7803aesmc v7.16b, v7.16b //AES block 8k+15 - round 07804aese v2.16b, v26.16b7805aesmc v2.16b, v2.16b //AES block 8k+10 - round 078067807ldp q28, q26, [x8, #32] //load rk2, rk37808aese v0.16b, v27.16b7809aesmc v0.16b, v0.16b //AES block 8k+8 - round 17810eor v8.16b, v8.16b, v19.16b //PRE 178117812aese v7.16b, v27.16b7813aesmc v7.16b, v7.16b //AES block 8k+15 - round 17814aese v6.16b, v27.16b7815aesmc v6.16b, v6.16b //AES block 8k+14 - round 17816aese v2.16b, v27.16b7817aesmc v2.16b, v2.16b //AES block 8k+10 - round 178187819aese v3.16b, v27.16b7820aesmc v3.16b, v3.16b //AES block 8k+11 - round 17821aese v1.16b, v27.16b7822aesmc v1.16b, v1.16b //AES block 8k+9 - round 17823aese v5.16b, v27.16b7824aesmc v5.16b, v5.16b //AES block 8k+13 - round 178257826pmull2 v16.1q, v9.2d, v23.2d //GHASH block 8k+1 - high7827trn1 v18.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid7828pmull v19.1q, v8.1d, v25.1d //GHASH block 8k - low78297830rev64 v11.16b, v11.16b //GHASH block 8k+37831pmull v23.1q, v9.1d, v23.1d //GHASH block 8k+1 - low78327833aese v5.16b, v28.16b7834aesmc v5.16b, v5.16b //AES block 8k+13 - round 27835aese v7.16b, v28.16b7836aesmc v7.16b, v7.16b //AES block 8k+15 - round 27837aese v1.16b, v28.16b7838aesmc v1.16b, v1.16b //AES block 8k+9 - round 278397840aese v3.16b, v28.16b7841aesmc v3.16b, v3.16b //AES block 8k+11 - round 27842aese v6.16b, v28.16b7843aesmc v6.16b, v6.16b //AES block 8k+14 - round 27844pmull2 v17.1q, v8.2d, v25.2d //GHASH block 8k - high78457846aese v0.16b, v28.16b7847aesmc v0.16b, v0.16b //AES block 8k+8 - round 27848aese v7.16b, v26.16b7849aesmc v7.16b, v7.16b //AES block 8k+15 - round 378507851aese v5.16b, v26.16b7852aesmc v5.16b, v5.16b //AES block 8k+13 - round 37853rev64 v14.16b, v14.16b //GHASH block 8k+678547855aese v0.16b, v26.16b7856aesmc v0.16b, v0.16b //AES block 8k+8 - round 37857aese v2.16b, v28.16b7858aesmc v2.16b, v2.16b //AES block 8k+10 - round 27859aese v6.16b, v26.16b7860aesmc v6.16b, v6.16b //AES block 8k+14 - round 378617862pmull2 v29.1q, v10.2d, v22.2d //GHASH block 8k+2 - high7863trn2 v8.2d, v9.2d, v8.2d //GHASH block 8k, 8k+1 - mid7864aese v4.16b, v28.16b7865aesmc v4.16b, v4.16b //AES block 8k+12 - round 278667867ldp q27, q28, [x8, #64] //load rk4, rk57868aese v1.16b, v26.16b7869aesmc v1.16b, v1.16b //AES block 8k+9 - round 37870pmull2 v9.1q, v11.2d, v20.2d //GHASH block 8k+3 - high78717872aese v2.16b, v26.16b7873aesmc v2.16b, v2.16b //AES block 8k+10 - round 37874eor v17.16b, v17.16b, v16.16b //GHASH block 8k+1 - high7875eor v8.16b, v8.16b, v18.16b //GHASH block 8k, 8k+1 - mid78767877aese v4.16b, v26.16b7878aesmc v4.16b, v4.16b //AES block 8k+12 - round 37879pmull v22.1q, v10.1d, v22.1d //GHASH block 8k+2 - low7880aese v3.16b, v26.16b7881aesmc v3.16b, v3.16b //AES block 8k+11 - round 378827883.inst 0xce1d2631 //eor3 v17.16b, v17.16b, v29.16b, v9.16b //GHASH block 8k+2, 8k+3 - high7884trn1 v29.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid7885trn2 v10.2d, v11.2d, v10.2d //GHASH block 8k+2, 8k+3 - mid78867887pmull2 v18.1q, v8.2d, v24.2d //GHASH block 8k - mid7888pmull v20.1q, v11.1d, v20.1d //GHASH block 8k+3 - low7889eor v19.16b, v19.16b, v23.16b //GHASH block 8k+1 - low78907891pmull v24.1q, v8.1d, v24.1d //GHASH block 8k+1 - mid7892aese v5.16b, v27.16b7893aesmc v5.16b, v5.16b //AES block 8k+13 - round 47894aese v0.16b, v27.16b7895aesmc v0.16b, v0.16b //AES block 8k+8 - round 478967897.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+2, 8k+3 - low7898ldr q20, [x3, #32] //load h1l | h1h7899ext v20.16b, v20.16b, v20.16b, #87900ldr q22, [x3, #64] //load h2l | h2h7901ext v22.16b, v22.16b, v22.16b, #87902aese v7.16b, v27.16b7903aesmc v7.16b, v7.16b //AES block 8k+15 - round 479047905aese v2.16b, v27.16b7906aesmc v2.16b, v2.16b //AES block 8k+10 - round 47907aese v6.16b, v27.16b7908aesmc v6.16b, v6.16b //AES block 8k+14 - round 47909eor v18.16b, v18.16b, v24.16b //GHASH block 8k+1 - mid79107911eor v10.16b, v10.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid7912aese v7.16b, v28.16b7913aesmc v7.16b, v7.16b //AES block 8k+15 - round 57914aese v1.16b, v27.16b7915aesmc v1.16b, v1.16b //AES block 8k+9 - round 479167917aese v2.16b, v28.16b7918aesmc v2.16b, v2.16b //AES block 8k+10 - round 57919aese v3.16b, v27.16b7920aesmc v3.16b, v3.16b //AES block 8k+11 - round 47921aese v4.16b, v27.16b7922aesmc v4.16b, v4.16b //AES block 8k+12 - round 479237924aese v1.16b, v28.16b7925aesmc v1.16b, v1.16b //AES block 8k+9 - round 57926pmull2 v29.1q, v10.2d, v21.2d //GHASH block 8k+2 - mid7927aese v6.16b, v28.16b7928aesmc v6.16b, v6.16b //AES block 8k+14 - round 579297930aese v4.16b, v28.16b7931aesmc v4.16b, v4.16b //AES block 8k+12 - round 57932aese v3.16b, v28.16b7933aesmc v3.16b, v3.16b //AES block 8k+11 - round 57934pmull v21.1q, v10.1d, v21.1d //GHASH block 8k+3 - mid79357936aese v0.16b, v28.16b7937aesmc v0.16b, v0.16b //AES block 8k+8 - round 57938aese v5.16b, v28.16b7939aesmc v5.16b, v5.16b //AES block 8k+13 - round 57940ldp q26, q27, [x8, #96] //load rk6, rk779417942ldr q23, [x3, #80] //load h3l | h3h7943ext v23.16b, v23.16b, v23.16b, #87944ldr q25, [x3, #112] //load h4l | h4h7945ext v25.16b, v25.16b, v25.16b, #87946rev64 v15.16b, v15.16b //GHASH block 8k+77947rev64 v13.16b, v13.16b //GHASH block 8k+579487949.inst 0xce157652 //eor3 v18.16b, v18.16b, v21.16b, v29.16b //GHASH block 8k+2, 8k+3 - mid79507951trn1 v16.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid79527953aese v0.16b, v26.16b7954aesmc v0.16b, v0.16b //AES block 8k+8 - round 67955ldr q21, [x3, #48] //load h2k | h1k7956ldr q24, [x3, #96] //load h4k | h3k7957aese v6.16b, v26.16b7958aesmc v6.16b, v6.16b //AES block 8k+14 - round 679597960aese v5.16b, v26.16b7961aesmc v5.16b, v5.16b //AES block 8k+13 - round 67962aese v7.16b, v26.16b7963aesmc v7.16b, v7.16b //AES block 8k+15 - round 679647965pmull2 v8.1q, v12.2d, v25.2d //GHASH block 8k+4 - high7966pmull2 v10.1q, v13.2d, v23.2d //GHASH block 8k+5 - high7967pmull v25.1q, v12.1d, v25.1d //GHASH block 8k+4 - low79687969trn2 v12.2d, v13.2d, v12.2d //GHASH block 8k+4, 8k+5 - mid7970pmull v23.1q, v13.1d, v23.1d //GHASH block 8k+5 - low7971trn1 v13.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid79727973aese v7.16b, v27.16b7974aesmc v7.16b, v7.16b //AES block 8k+15 - round 77975pmull2 v11.1q, v14.2d, v22.2d //GHASH block 8k+6 - high7976aese v1.16b, v26.16b7977aesmc v1.16b, v1.16b //AES block 8k+9 - round 679787979aese v2.16b, v26.16b7980aesmc v2.16b, v2.16b //AES block 8k+10 - round 67981aese v3.16b, v26.16b7982aesmc v3.16b, v3.16b //AES block 8k+11 - round 67983aese v4.16b, v26.16b7984aesmc v4.16b, v4.16b //AES block 8k+12 - round 679857986ldp q28, q26, [x8, #128] //load rk8, rk97987pmull v22.1q, v14.1d, v22.1d //GHASH block 8k+6 - low7988aese v5.16b, v27.16b7989aesmc v5.16b, v5.16b //AES block 8k+13 - round 779907991aese v1.16b, v27.16b7992aesmc v1.16b, v1.16b //AES block 8k+9 - round 77993aese v4.16b, v27.16b7994aesmc v4.16b, v4.16b //AES block 8k+12 - round 779957996aese v6.16b, v27.16b7997aesmc v6.16b, v6.16b //AES block 8k+14 - round 77998aese v2.16b, v27.16b7999aesmc v2.16b, v2.16b //AES block 8k+10 - round 78000.inst 0xce082a31 //eor3 v17.16b, v17.16b, v8.16b, v10.16b //GHASH block 8k+4, 8k+5 - high80018002aese v0.16b, v27.16b8003aesmc v0.16b, v0.16b //AES block 8k+8 - round 78004trn2 v14.2d, v15.2d, v14.2d //GHASH block 8k+6, 8k+7 - mid8005aese v3.16b, v27.16b8006aesmc v3.16b, v3.16b //AES block 8k+11 - round 780078008aese v0.16b, v28.16b8009aesmc v0.16b, v0.16b //AES block 8k+8 - round 88010aese v7.16b, v28.16b8011aesmc v7.16b, v7.16b //AES block 8k+15 - round 88012aese v4.16b, v28.16b8013aesmc v4.16b, v4.16b //AES block 8k+12 - round 880148015aese v1.16b, v28.16b8016aesmc v1.16b, v1.16b //AES block 8k+9 - round 88017aese v5.16b, v28.16b8018aesmc v5.16b, v5.16b //AES block 8k+13 - round 88019aese v6.16b, v28.16b8020aesmc v6.16b, v6.16b //AES block 8k+14 - round 880218022aese v3.16b, v28.16b8023aesmc v3.16b, v3.16b //AES block 8k+11 - round 88024aese v4.16b, v26.16b8025aesmc v4.16b, v4.16b //AES block 8k+12 - round 98026eor v12.16b, v12.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid80278028aese v0.16b, v26.16b8029aesmc v0.16b, v0.16b //AES block 8k+8 - round 98030aese v1.16b, v26.16b8031aesmc v1.16b, v1.16b //AES block 8k+9 - round 98032eor v14.16b, v14.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid80338034aese v6.16b, v26.16b8035aesmc v6.16b, v6.16b //AES block 8k+14 - round 98036aese v7.16b, v26.16b8037aesmc v7.16b, v7.16b //AES block 8k+15 - round 98038pmull2 v16.1q, v12.2d, v24.2d //GHASH block 8k+4 - mid80398040aese v2.16b, v28.16b8041aesmc v2.16b, v2.16b //AES block 8k+10 - round 88042pmull v24.1q, v12.1d, v24.1d //GHASH block 8k+5 - mid8043pmull2 v12.1q, v15.2d, v20.2d //GHASH block 8k+7 - high80448045pmull2 v13.1q, v14.2d, v21.2d //GHASH block 8k+6 - mid8046pmull v21.1q, v14.1d, v21.1d //GHASH block 8k+7 - mid8047pmull v20.1q, v15.1d, v20.1d //GHASH block 8k+7 - low80488049ldp q27, q28, [x8, #160] //load rk10, rk118050.inst 0xce195e73 //eor3 v19.16b, v19.16b, v25.16b, v23.16b //GHASH block 8k+4, 8k+5 - low8051.inst 0xce184252 //eor3 v18.16b, v18.16b, v24.16b, v16.16b //GHASH block 8k+4, 8k+5 - mid80528053aese v2.16b, v26.16b8054aesmc v2.16b, v2.16b //AES block 8k+10 - round 98055aese v3.16b, v26.16b8056aesmc v3.16b, v3.16b //AES block 8k+11 - round 98057aese v5.16b, v26.16b8058aesmc v5.16b, v5.16b //AES block 8k+13 - round 980598060.inst 0xce0b3231 //eor3 v17.16b, v17.16b, v11.16b, v12.16b //GHASH block 8k+6, 8k+7 - high8061.inst 0xce165273 //eor3 v19.16b, v19.16b, v22.16b, v20.16b //GHASH block 8k+6, 8k+7 - low8062ldr d16, [x10] //MODULO - load modulo constant80638064.inst 0xce153652 //eor3 v18.16b, v18.16b, v21.16b, v13.16b //GHASH block 8k+6, 8k+7 - mid80658066aese v4.16b, v27.16b8067aesmc v4.16b, v4.16b //AES block 8k+12 - round 108068aese v6.16b, v27.16b8069aesmc v6.16b, v6.16b //AES block 8k+14 - round 108070aese v5.16b, v27.16b8071aesmc v5.16b, v5.16b //AES block 8k+13 - round 1080728073aese v0.16b, v27.16b8074aesmc v0.16b, v0.16b //AES block 8k+8 - round 108075aese v2.16b, v27.16b8076aesmc v2.16b, v2.16b //AES block 8k+10 - round 108077aese v3.16b, v27.16b8078aesmc v3.16b, v3.16b //AES block 8k+11 - round 1080798080.inst 0xce114e52 //eor3 v18.16b, v18.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up80818082aese v7.16b, v27.16b8083aesmc v7.16b, v7.16b //AES block 8k+15 - round 108084aese v1.16b, v27.16b8085aesmc v1.16b, v1.16b //AES block 8k+9 - round 108086ldp q26, q27, [x8, #192] //load rk12, rk1380878088ext v21.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment80898090aese v2.16b, v28.16b8091aesmc v2.16b, v2.16b //AES block 8k+10 - round 118092aese v1.16b, v28.16b8093aesmc v1.16b, v1.16b //AES block 8k+9 - round 118094aese v0.16b, v28.16b8095aesmc v0.16b, v0.16b //AES block 8k+8 - round 1180968097pmull v29.1q, v17.1d, v16.1d //MODULO - top 64b align with mid8098aese v3.16b, v28.16b8099aesmc v3.16b, v3.16b //AES block 8k+11 - round 1181008101aese v7.16b, v28.16b8102aesmc v7.16b, v7.16b //AES block 8k+15 - round 118103aese v6.16b, v28.16b8104aesmc v6.16b, v6.16b //AES block 8k+14 - round 118105aese v4.16b, v28.16b8106aesmc v4.16b, v4.16b //AES block 8k+12 - round 1181078108aese v5.16b, v28.16b8109aesmc v5.16b, v5.16b //AES block 8k+13 - round 118110aese v3.16b, v26.16b8111aesmc v3.16b, v3.16b //AES block 8k+11 - round 1281128113.inst 0xce1d5652 //eor3 v18.16b, v18.16b, v29.16b, v21.16b //MODULO - fold into mid81148115aese v3.16b, v27.16b //AES block 8k+11 - round 138116aese v2.16b, v26.16b8117aesmc v2.16b, v2.16b //AES block 8k+10 - round 128118aese v6.16b, v26.16b8119aesmc v6.16b, v6.16b //AES block 8k+14 - round 1281208121pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low8122aese v4.16b, v26.16b8123aesmc v4.16b, v4.16b //AES block 8k+12 - round 128124aese v7.16b, v26.16b8125aesmc v7.16b, v7.16b //AES block 8k+15 - round 1281268127aese v0.16b, v26.16b8128aesmc v0.16b, v0.16b //AES block 8k+8 - round 128129ldr q28, [x8, #224] //load rk148130aese v1.16b, v26.16b8131aesmc v1.16b, v1.16b //AES block 8k+9 - round 1281328133aese v4.16b, v27.16b //AES block 8k+12 - round 138134ext v21.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment8135aese v5.16b, v26.16b8136aesmc v5.16b, v5.16b //AES block 8k+13 - round 1281378138aese v6.16b, v27.16b //AES block 8k+14 - round 138139aese v2.16b, v27.16b //AES block 8k+10 - round 138140aese v1.16b, v27.16b //AES block 8k+9 - round 1381418142aese v5.16b, v27.16b //AES block 8k+13 - round 138143.inst 0xce154673 //eor3 v19.16b, v19.16b, v21.16b, v17.16b //MODULO - fold into low8144add v30.4s, v30.4s, v31.4s //CTR block 8k+1581458146aese v7.16b, v27.16b //AES block 8k+15 - round 138147aese v0.16b, v27.16b //AES block 8k+8 - round 138148.L256_dec_tail: //TAIL81498150ext v16.16b, v19.16b, v19.16b, #8 //prepare final partial tag8151sub x5, x4, x0 //main_end_input_ptr is number of bytes left to process8152cmp x5, #11281538154ldr q9, [x0], #16 //AES block 8k+8 - load ciphertext81558156ldp q24, q25, [x3, #192] //load h8k | h7k8157ext v25.16b, v25.16b, v25.16b, #88158mov v29.16b, v28.16b81598160ldp q20, q21, [x3, #128] //load h5l | h5h8161ext v20.16b, v20.16b, v20.16b, #881628163.inst 0xce00752c //eor3 v12.16b, v9.16b, v0.16b, v29.16b //AES block 8k+8 - result8164ldp q22, q23, [x3, #160] //load h6l | h6h8165ext v22.16b, v22.16b, v22.16b, #88166ext v23.16b, v23.16b, v23.16b, #88167b.gt .L256_dec_blocks_more_than_781688169mov v7.16b, v6.16b8170sub v30.4s, v30.4s, v31.4s8171mov v6.16b, v5.16b81728173mov v5.16b, v4.16b8174mov v4.16b, v3.16b8175movi v19.8b, #081768177movi v17.8b, #08178movi v18.8b, #08179mov v3.16b, v2.16b81808181cmp x5, #968182mov v2.16b, v1.16b8183b.gt .L256_dec_blocks_more_than_681848185mov v7.16b, v6.16b8186mov v6.16b, v5.16b81878188mov v5.16b, v4.16b8189cmp x5, #808190sub v30.4s, v30.4s, v31.4s81918192mov v4.16b, v3.16b8193mov v3.16b, v1.16b8194b.gt .L256_dec_blocks_more_than_581958196cmp x5, #648197mov v7.16b, v6.16b8198sub v30.4s, v30.4s, v31.4s81998200mov v6.16b, v5.16b82018202mov v5.16b, v4.16b8203mov v4.16b, v1.16b8204b.gt .L256_dec_blocks_more_than_482058206sub v30.4s, v30.4s, v31.4s8207mov v7.16b, v6.16b8208cmp x5, #4882098210mov v6.16b, v5.16b8211mov v5.16b, v1.16b8212b.gt .L256_dec_blocks_more_than_382138214ldr q24, [x3, #96] //load h4k | h3k8215sub v30.4s, v30.4s, v31.4s8216mov v7.16b, v6.16b82178218cmp x5, #328219mov v6.16b, v1.16b8220b.gt .L256_dec_blocks_more_than_282218222sub v30.4s, v30.4s, v31.4s82238224mov v7.16b, v1.16b8225cmp x5, #168226b.gt .L256_dec_blocks_more_than_182278228sub v30.4s, v30.4s, v31.4s8229ldr q21, [x3, #48] //load h2k | h1k8230b .L256_dec_blocks_less_than_18231.L256_dec_blocks_more_than_7: //blocks left > 78232rev64 v8.16b, v9.16b //GHASH final-7 block8233ldr q9, [x0], #16 //AES final-6 block - load ciphertext8234st1 { v12.16b}, [x2], #16 //AES final-7 block - store result82358236ins v18.d[0], v24.d[1] //GHASH final-7 block - mid82378238eor v8.16b, v8.16b, v16.16b //feed in partial tag82398240ins v27.d[0], v8.d[1] //GHASH final-7 block - mid8241.inst 0xce01752c //eor3 v12.16b, v9.16b, v1.16b, v29.16b //AES final-6 block - result82428243pmull2 v17.1q, v8.2d, v25.2d //GHASH final-7 block - high82448245eor v27.8b, v27.8b, v8.8b //GHASH final-7 block - mid8246movi v16.8b, #0 //suppress further partial tag feed in82478248pmull v19.1q, v8.1d, v25.1d //GHASH final-7 block - low8249pmull v18.1q, v27.1d, v18.1d //GHASH final-7 block - mid8250.L256_dec_blocks_more_than_6: //blocks left > 682518252rev64 v8.16b, v9.16b //GHASH final-6 block82538254eor v8.16b, v8.16b, v16.16b //feed in partial tag8255ldr q9, [x0], #16 //AES final-5 block - load ciphertext8256movi v16.8b, #0 //suppress further partial tag feed in82578258ins v27.d[0], v8.d[1] //GHASH final-6 block - mid8259st1 { v12.16b}, [x2], #16 //AES final-6 block - store result8260pmull2 v28.1q, v8.2d, v23.2d //GHASH final-6 block - high82618262pmull v26.1q, v8.1d, v23.1d //GHASH final-6 block - low82638264.inst 0xce02752c //eor3 v12.16b, v9.16b, v2.16b, v29.16b //AES final-5 block - result8265eor v19.16b, v19.16b, v26.16b //GHASH final-6 block - low8266eor v27.8b, v27.8b, v8.8b //GHASH final-6 block - mid82678268pmull v27.1q, v27.1d, v24.1d //GHASH final-6 block - mid82698270eor v18.16b, v18.16b, v27.16b //GHASH final-6 block - mid8271eor v17.16b, v17.16b, v28.16b //GHASH final-6 block - high8272.L256_dec_blocks_more_than_5: //blocks left > 582738274rev64 v8.16b, v9.16b //GHASH final-5 block82758276eor v8.16b, v8.16b, v16.16b //feed in partial tag82778278pmull2 v28.1q, v8.2d, v22.2d //GHASH final-5 block - high8279ins v27.d[0], v8.d[1] //GHASH final-5 block - mid82808281ldr q9, [x0], #16 //AES final-4 block - load ciphertext82828283eor v27.8b, v27.8b, v8.8b //GHASH final-5 block - mid8284st1 { v12.16b}, [x2], #16 //AES final-5 block - store result82858286pmull v26.1q, v8.1d, v22.1d //GHASH final-5 block - low8287ins v27.d[1], v27.d[0] //GHASH final-5 block - mid82888289pmull2 v27.1q, v27.2d, v21.2d //GHASH final-5 block - mid82908291eor v17.16b, v17.16b, v28.16b //GHASH final-5 block - high8292.inst 0xce03752c //eor3 v12.16b, v9.16b, v3.16b, v29.16b //AES final-4 block - result8293eor v19.16b, v19.16b, v26.16b //GHASH final-5 block - low82948295eor v18.16b, v18.16b, v27.16b //GHASH final-5 block - mid8296movi v16.8b, #0 //suppress further partial tag feed in8297.L256_dec_blocks_more_than_4: //blocks left > 482988299rev64 v8.16b, v9.16b //GHASH final-4 block83008301eor v8.16b, v8.16b, v16.16b //feed in partial tag83028303ins v27.d[0], v8.d[1] //GHASH final-4 block - mid8304ldr q9, [x0], #16 //AES final-3 block - load ciphertext83058306movi v16.8b, #0 //suppress further partial tag feed in83078308pmull v26.1q, v8.1d, v20.1d //GHASH final-4 block - low8309pmull2 v28.1q, v8.2d, v20.2d //GHASH final-4 block - high83108311eor v27.8b, v27.8b, v8.8b //GHASH final-4 block - mid83128313eor v17.16b, v17.16b, v28.16b //GHASH final-4 block - high83148315pmull v27.1q, v27.1d, v21.1d //GHASH final-4 block - mid83168317eor v19.16b, v19.16b, v26.16b //GHASH final-4 block - low8318st1 { v12.16b}, [x2], #16 //AES final-4 block - store result83198320eor v18.16b, v18.16b, v27.16b //GHASH final-4 block - mid8321.inst 0xce04752c //eor3 v12.16b, v9.16b, v4.16b, v29.16b //AES final-3 block - result8322.L256_dec_blocks_more_than_3: //blocks left > 383238324ldr q25, [x3, #112] //load h4l | h4h8325ext v25.16b, v25.16b, v25.16b, #88326rev64 v8.16b, v9.16b //GHASH final-3 block83278328eor v8.16b, v8.16b, v16.16b //feed in partial tag8329ldr q9, [x0], #16 //AES final-2 block - load ciphertext8330ldr q24, [x3, #96] //load h4k | h3k83318332ins v27.d[0], v8.d[1] //GHASH final-3 block - mid8333st1 { v12.16b}, [x2], #16 //AES final-3 block - store result83348335.inst 0xce05752c //eor3 v12.16b, v9.16b, v5.16b, v29.16b //AES final-2 block - result83368337eor v27.8b, v27.8b, v8.8b //GHASH final-3 block - mid83388339ins v27.d[1], v27.d[0] //GHASH final-3 block - mid8340pmull v26.1q, v8.1d, v25.1d //GHASH final-3 block - low8341pmull2 v28.1q, v8.2d, v25.2d //GHASH final-3 block - high83428343movi v16.8b, #0 //suppress further partial tag feed in8344pmull2 v27.1q, v27.2d, v24.2d //GHASH final-3 block - mid8345eor v19.16b, v19.16b, v26.16b //GHASH final-3 block - low83468347eor v17.16b, v17.16b, v28.16b //GHASH final-3 block - high83488349eor v18.16b, v18.16b, v27.16b //GHASH final-3 block - mid8350.L256_dec_blocks_more_than_2: //blocks left > 283518352rev64 v8.16b, v9.16b //GHASH final-2 block83538354ldr q23, [x3, #80] //load h3l | h3h8355ext v23.16b, v23.16b, v23.16b, #88356ldr q9, [x0], #16 //AES final-1 block - load ciphertext83578358eor v8.16b, v8.16b, v16.16b //feed in partial tag83598360ins v27.d[0], v8.d[1] //GHASH final-2 block - mid83618362pmull v26.1q, v8.1d, v23.1d //GHASH final-2 block - low8363st1 { v12.16b}, [x2], #16 //AES final-2 block - store result8364.inst 0xce06752c //eor3 v12.16b, v9.16b, v6.16b, v29.16b //AES final-1 block - result83658366eor v27.8b, v27.8b, v8.8b //GHASH final-2 block - mid8367eor v19.16b, v19.16b, v26.16b //GHASH final-2 block - low8368movi v16.8b, #0 //suppress further partial tag feed in83698370pmull v27.1q, v27.1d, v24.1d //GHASH final-2 block - mid8371pmull2 v28.1q, v8.2d, v23.2d //GHASH final-2 block - high83728373eor v18.16b, v18.16b, v27.16b //GHASH final-2 block - mid8374eor v17.16b, v17.16b, v28.16b //GHASH final-2 block - high8375.L256_dec_blocks_more_than_1: //blocks left > 183768377rev64 v8.16b, v9.16b //GHASH final-1 block83788379eor v8.16b, v8.16b, v16.16b //feed in partial tag83808381ins v27.d[0], v8.d[1] //GHASH final-1 block - mid8382ldr q22, [x3, #64] //load h2l | h2h8383ext v22.16b, v22.16b, v22.16b, #883848385eor v27.8b, v27.8b, v8.8b //GHASH final-1 block - mid8386ldr q9, [x0], #16 //AES final block - load ciphertext8387st1 { v12.16b}, [x2], #16 //AES final-1 block - store result83888389ldr q21, [x3, #48] //load h2k | h1k8390pmull v26.1q, v8.1d, v22.1d //GHASH final-1 block - low83918392ins v27.d[1], v27.d[0] //GHASH final-1 block - mid83938394eor v19.16b, v19.16b, v26.16b //GHASH final-1 block - low83958396.inst 0xce07752c //eor3 v12.16b, v9.16b, v7.16b, v29.16b //AES final block - result8397pmull2 v28.1q, v8.2d, v22.2d //GHASH final-1 block - high83988399pmull2 v27.1q, v27.2d, v21.2d //GHASH final-1 block - mid84008401movi v16.8b, #0 //suppress further partial tag feed in8402eor v17.16b, v17.16b, v28.16b //GHASH final-1 block - high84038404eor v18.16b, v18.16b, v27.16b //GHASH final-1 block - mid8405.L256_dec_blocks_less_than_1: //blocks left <= 184068407ld1 { v26.16b}, [x2] //load existing bytes where the possibly partial last block is to be stored8408mvn x6, xzr //temp0_x = 0xffffffffffffffff8409and x1, x1, #127 //bit_length %= 12884108411sub x1, x1, #128 //bit_length -= 1288412rev32 v30.16b, v30.16b8413str q30, [x16] //store the updated counter84148415neg x1, x1 //bit_length = 128 - #bits in input (in range [1,128])84168417and x1, x1, #127 //bit_length %= 12884188419lsr x6, x6, x1 //temp0_x is mask for top 64b of last block8420cmp x1, #648421mvn x7, xzr //temp1_x = 0xffffffffffffffff84228423csel x14, x6, xzr, lt8424csel x13, x7, x6, lt84258426mov v0.d[0], x13 //ctr0b is mask for last block8427mov v0.d[1], x1484288429and v9.16b, v9.16b, v0.16b //possibly partial last block has zeroes in highest bits8430ldr q20, [x3, #32] //load h1l | h1h8431ext v20.16b, v20.16b, v20.16b, #88432bif v12.16b, v26.16b, v0.16b //insert existing bytes in top end of result before storing84338434rev64 v8.16b, v9.16b //GHASH final block84358436eor v8.16b, v8.16b, v16.16b //feed in partial tag84378438ins v16.d[0], v8.d[1] //GHASH final block - mid8439pmull2 v28.1q, v8.2d, v20.2d //GHASH final block - high84408441eor v16.8b, v16.8b, v8.8b //GHASH final block - mid84428443pmull v26.1q, v8.1d, v20.1d //GHASH final block - low8444eor v17.16b, v17.16b, v28.16b //GHASH final block - high84458446pmull v16.1q, v16.1d, v21.1d //GHASH final block - mid84478448eor v18.16b, v18.16b, v16.16b //GHASH final block - mid8449ldr d16, [x10] //MODULO - load modulo constant8450eor v19.16b, v19.16b, v26.16b //GHASH final block - low84518452pmull v21.1q, v17.1d, v16.1d //MODULO - top 64b align with mid8453eor v14.16b, v17.16b, v19.16b //MODULO - karatsuba tidy up84548455ext v17.16b, v17.16b, v17.16b, #8 //MODULO - other top alignment8456st1 { v12.16b}, [x2] //store all 16B84578458eor v18.16b, v18.16b, v14.16b //MODULO - karatsuba tidy up84598460eor v21.16b, v17.16b, v21.16b //MODULO - fold into mid8461eor v18.16b, v18.16b, v21.16b //MODULO - fold into mid84628463pmull v17.1q, v18.1d, v16.1d //MODULO - mid 64b align with low84648465ext v18.16b, v18.16b, v18.16b, #8 //MODULO - other mid alignment8466eor v19.16b, v19.16b, v17.16b //MODULO - fold into low84678468eor v19.16b, v19.16b, v18.16b //MODULO - fold into low8469ext v19.16b, v19.16b, v19.16b, #88470rev64 v19.16b, v19.16b8471st1 { v19.16b }, [x3]8472mov x0, x984738474ldp d10, d11, [sp, #16]8475ldp d12, d13, [sp, #32]8476ldp d14, d15, [sp, #48]8477ldp d8, d9, [sp], #808478ret84798480.L256_dec_ret:8481mov w0, #0x08482ret8483.size unroll8_eor3_aes_gcm_dec_256_kernel,.-unroll8_eor3_aes_gcm_dec_256_kernel8484.byte 65,69,83,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,65,82,77,118,56,44,32,83,80,68,88,32,66,83,68,45,51,45,67,108,97,117,115,101,32,98,121,32,60,120,105,97,111,107,97,110,103,46,113,105,97,110,64,97,114,109,46,99,111,109,62,08485.align 28486.align 28487#endif848884898490