Path: blob/main/sys/crypto/openssl/aarch64/vpsm4-armv8.S
39536 views
/* Do not modify. This file is auto-generated from vpsm4-armv8.pl. */1// Copyright 2020-2025 The OpenSSL Project Authors. All Rights Reserved.2//3// Licensed under the Apache License 2.0 (the "License"). You may not use4// this file except in compliance with the License. You can obtain a copy5// in the file LICENSE in the source distribution or at6// https://www.openssl.org/source/license.html78//9// This module implements SM4 with ASIMD on aarch6410//11// Feb 202212//1314// $output is the last argument if it looks like a file (it has an extension)15// $flavour is the first argument if it doesn't look like a file16#include "arm_arch.h"17.arch armv8-a18.text1920.section .rodata21.type _vpsm4_consts,%object22.align 723_vpsm4_consts:24.Lsbox:25.byte 0xD6,0x90,0xE9,0xFE,0xCC,0xE1,0x3D,0xB7,0x16,0xB6,0x14,0xC2,0x28,0xFB,0x2C,0x0526.byte 0x2B,0x67,0x9A,0x76,0x2A,0xBE,0x04,0xC3,0xAA,0x44,0x13,0x26,0x49,0x86,0x06,0x9927.byte 0x9C,0x42,0x50,0xF4,0x91,0xEF,0x98,0x7A,0x33,0x54,0x0B,0x43,0xED,0xCF,0xAC,0x6228.byte 0xE4,0xB3,0x1C,0xA9,0xC9,0x08,0xE8,0x95,0x80,0xDF,0x94,0xFA,0x75,0x8F,0x3F,0xA629.byte 0x47,0x07,0xA7,0xFC,0xF3,0x73,0x17,0xBA,0x83,0x59,0x3C,0x19,0xE6,0x85,0x4F,0xA830.byte 0x68,0x6B,0x81,0xB2,0x71,0x64,0xDA,0x8B,0xF8,0xEB,0x0F,0x4B,0x70,0x56,0x9D,0x3531.byte 0x1E,0x24,0x0E,0x5E,0x63,0x58,0xD1,0xA2,0x25,0x22,0x7C,0x3B,0x01,0x21,0x78,0x8732.byte 0xD4,0x00,0x46,0x57,0x9F,0xD3,0x27,0x52,0x4C,0x36,0x02,0xE7,0xA0,0xC4,0xC8,0x9E33.byte 0xEA,0xBF,0x8A,0xD2,0x40,0xC7,0x38,0xB5,0xA3,0xF7,0xF2,0xCE,0xF9,0x61,0x15,0xA134.byte 0xE0,0xAE,0x5D,0xA4,0x9B,0x34,0x1A,0x55,0xAD,0x93,0x32,0x30,0xF5,0x8C,0xB1,0xE335.byte 0x1D,0xF6,0xE2,0x2E,0x82,0x66,0xCA,0x60,0xC0,0x29,0x23,0xAB,0x0D,0x53,0x4E,0x6F36.byte 0xD5,0xDB,0x37,0x45,0xDE,0xFD,0x8E,0x2F,0x03,0xFF,0x6A,0x72,0x6D,0x6C,0x5B,0x5137.byte 0x8D,0x1B,0xAF,0x92,0xBB,0xDD,0xBC,0x7F,0x11,0xD9,0x5C,0x41,0x1F,0x10,0x5A,0xD838.byte 0x0A,0xC1,0x31,0x88,0xA5,0xCD,0x7B,0xBD,0x2D,0x74,0xD0,0x12,0xB8,0xE5,0xB4,0xB039.byte 0x89,0x69,0x97,0x4A,0x0C,0x96,0x77,0x7E,0x65,0xB9,0xF1,0x09,0xC5,0x6E,0xC6,0x8440.byte 0x18,0xF0,0x7D,0xEC,0x3A,0xDC,0x4D,0x20,0x79,0xEE,0x5F,0x3E,0xD7,0xCB,0x39,0x4841.Lck:42.long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B626943.long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D944.long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B424945.long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B946.long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B222947.long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B929948.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB020949.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B727950.Lfk:51.quad 0x56aa3350a3b1bac6,0xb27022dc677d919752.Lshuffles:53.quad 0x0B0A090807060504,0x030201000F0E0D0C54.Lxts_magic:55.quad 0x0101010101010187,0x01010101010101015657.size _vpsm4_consts,.-_vpsm4_consts5859.previous6061.type _vpsm4_set_key,%function62.align 463_vpsm4_set_key:64AARCH64_VALID_CALL_TARGET65ld1 {v5.4s},[x0]66adrp x10,.Lsbox67add x10,x10,#:lo12:.Lsbox68ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#6469ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#6470ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#6471ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]72#ifndef __AARCH64EB__73rev32 v5.16b,v5.16b74#endif75adrp x5,.Lshuffles76add x5,x5,#:lo12:.Lshuffles77ld1 {v7.2d},[x5]78adrp x5,.Lfk79add x5,x5,#:lo12:.Lfk80ld1 {v6.2d},[x5]81eor v5.16b,v5.16b,v6.16b82mov x6,#3283adrp x5,.Lck84add x5,x5,#:lo12:.Lck85movi v0.16b,#6486cbnz w2,1f87add x1,x1,124881:89mov w7,v5.s[1]90ldr w8,[x5],#491eor w8,w8,w792mov w7,v5.s[2]93eor w8,w8,w794mov w7,v5.s[3]95eor w8,w8,w796// sbox lookup97mov v4.s[0],w898tbl v1.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v4.16b99sub v4.16b,v4.16b,v0.16b100tbx v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v4.16b101sub v4.16b,v4.16b,v0.16b102tbx v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v4.16b103sub v4.16b,v4.16b,v0.16b104tbx v1.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v4.16b105mov w7,v1.s[0]106eor w8,w7,w7,ror #19107eor w8,w8,w7,ror #9108mov w7,v5.s[0]109eor w8,w8,w7110mov v5.s[0],w8111cbz w2,2f112str w8,[x1],#4113b 3f1142:115str w8,[x1],#-41163:117tbl v5.16b,{v5.16b},v7.16b118subs x6,x6,#1119b.ne 1b120ret121.size _vpsm4_set_key,.-_vpsm4_set_key122.type _vpsm4_enc_4blks,%function123.align 4124_vpsm4_enc_4blks:125AARCH64_VALID_CALL_TARGET126mov x10,x3127mov w11,#812810:129ldp w7,w8,[x10],8130dup v12.4s,w7131dup v13.4s,w8132133// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)134eor v14.16b,v6.16b,v7.16b135eor v12.16b,v5.16b,v12.16b136eor v12.16b,v14.16b,v12.16b137movi v0.16b,#64138movi v1.16b,#128139movi v2.16b,#192140sub v0.16b,v12.16b,v0.16b141sub v1.16b,v12.16b,v1.16b142sub v2.16b,v12.16b,v2.16b143tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b144tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b145tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b146tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b147add v0.2d,v0.2d,v1.2d148add v2.2d,v2.2d,v12.2d149add v12.2d,v0.2d,v2.2d150151ushr v0.4s,v12.4s,32-2152sli v0.4s,v12.4s,2153ushr v2.4s,v12.4s,32-10154eor v1.16b,v0.16b,v12.16b155sli v2.4s,v12.4s,10156eor v1.16b,v2.16b,v1.16b157ushr v0.4s,v12.4s,32-18158sli v0.4s,v12.4s,18159ushr v2.4s,v12.4s,32-24160eor v1.16b,v0.16b,v1.16b161sli v2.4s,v12.4s,24162eor v12.16b,v2.16b,v1.16b163eor v4.16b,v4.16b,v12.16b164165// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)166eor v14.16b,v14.16b,v4.16b167eor v13.16b,v14.16b,v13.16b168movi v0.16b,#64169movi v1.16b,#128170movi v2.16b,#192171sub v0.16b,v13.16b,v0.16b172sub v1.16b,v13.16b,v1.16b173sub v2.16b,v13.16b,v2.16b174tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b175tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b176tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b177tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b178add v0.2d,v0.2d,v1.2d179add v2.2d,v2.2d,v13.2d180add v13.2d,v0.2d,v2.2d181182ushr v0.4s,v13.4s,32-2183sli v0.4s,v13.4s,2184ushr v2.4s,v13.4s,32-10185eor v1.16b,v0.16b,v13.16b186sli v2.4s,v13.4s,10187eor v1.16b,v2.16b,v1.16b188ushr v0.4s,v13.4s,32-18189sli v0.4s,v13.4s,18190ushr v2.4s,v13.4s,32-24191eor v1.16b,v0.16b,v1.16b192sli v2.4s,v13.4s,24193eor v13.16b,v2.16b,v1.16b194ldp w7,w8,[x10],8195eor v5.16b,v5.16b,v13.16b196197dup v12.4s,w7198dup v13.4s,w8199200// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)201eor v14.16b,v4.16b,v5.16b202eor v12.16b,v7.16b,v12.16b203eor v12.16b,v14.16b,v12.16b204movi v0.16b,#64205movi v1.16b,#128206movi v2.16b,#192207sub v0.16b,v12.16b,v0.16b208sub v1.16b,v12.16b,v1.16b209sub v2.16b,v12.16b,v2.16b210tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b211tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b212tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b213tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b214add v0.2d,v0.2d,v1.2d215add v2.2d,v2.2d,v12.2d216add v12.2d,v0.2d,v2.2d217218ushr v0.4s,v12.4s,32-2219sli v0.4s,v12.4s,2220ushr v2.4s,v12.4s,32-10221eor v1.16b,v0.16b,v12.16b222sli v2.4s,v12.4s,10223eor v1.16b,v2.16b,v1.16b224ushr v0.4s,v12.4s,32-18225sli v0.4s,v12.4s,18226ushr v2.4s,v12.4s,32-24227eor v1.16b,v0.16b,v1.16b228sli v2.4s,v12.4s,24229eor v12.16b,v2.16b,v1.16b230eor v6.16b,v6.16b,v12.16b231232// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)233eor v14.16b,v14.16b,v6.16b234eor v13.16b,v14.16b,v13.16b235movi v0.16b,#64236movi v1.16b,#128237movi v2.16b,#192238sub v0.16b,v13.16b,v0.16b239sub v1.16b,v13.16b,v1.16b240sub v2.16b,v13.16b,v2.16b241tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b242tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b243tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b244tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b245add v0.2d,v0.2d,v1.2d246add v2.2d,v2.2d,v13.2d247add v13.2d,v0.2d,v2.2d248249ushr v0.4s,v13.4s,32-2250sli v0.4s,v13.4s,2251ushr v2.4s,v13.4s,32-10252eor v1.16b,v0.16b,v13.16b253sli v2.4s,v13.4s,10254eor v1.16b,v2.16b,v1.16b255ushr v0.4s,v13.4s,32-18256sli v0.4s,v13.4s,18257ushr v2.4s,v13.4s,32-24258eor v1.16b,v0.16b,v1.16b259sli v2.4s,v13.4s,24260eor v13.16b,v2.16b,v1.16b261eor v7.16b,v7.16b,v13.16b262subs w11,w11,#1263b.ne 10b264#ifndef __AARCH64EB__265rev32 v3.16b,v4.16b266#else267mov v3.16b,v4.16b268#endif269#ifndef __AARCH64EB__270rev32 v2.16b,v5.16b271#else272mov v2.16b,v5.16b273#endif274#ifndef __AARCH64EB__275rev32 v1.16b,v6.16b276#else277mov v1.16b,v6.16b278#endif279#ifndef __AARCH64EB__280rev32 v0.16b,v7.16b281#else282mov v0.16b,v7.16b283#endif284ret285.size _vpsm4_enc_4blks,.-_vpsm4_enc_4blks286.type _vpsm4_enc_8blks,%function287.align 4288_vpsm4_enc_8blks:289AARCH64_VALID_CALL_TARGET290mov x10,x3291mov w11,#829210:293ldp w7,w8,[x10],8294// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)295dup v12.4s,w7296eor v14.16b,v6.16b,v7.16b297eor v15.16b,v10.16b,v11.16b298eor v0.16b,v5.16b,v12.16b299eor v1.16b,v9.16b,v12.16b300eor v12.16b,v14.16b,v0.16b301eor v13.16b,v15.16b,v1.16b302movi v3.16b,#64303sub v0.16b,v12.16b,v3.16b304sub v1.16b,v0.16b,v3.16b305sub v2.16b,v1.16b,v3.16b306tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b307tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b308tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b309tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b310add v1.2d,v0.2d,v1.2d311add v12.2d,v2.2d,v12.2d312add v12.2d,v1.2d,v12.2d313314sub v0.16b,v13.16b,v3.16b315sub v1.16b,v0.16b,v3.16b316sub v2.16b,v1.16b,v3.16b317tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b318tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b319tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b320tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b321add v1.2d,v0.2d,v1.2d322add v13.2d,v2.2d,v13.2d323add v13.2d,v1.2d,v13.2d324325ushr v0.4s,v12.4s,32-2326sli v0.4s,v12.4s,2327ushr v2.4s,v13.4s,32-2328eor v1.16b,v0.16b,v12.16b329sli v2.4s,v13.4s,2330331ushr v0.4s,v12.4s,32-10332eor v3.16b,v2.16b,v13.16b333sli v0.4s,v12.4s,10334ushr v2.4s,v13.4s,32-10335eor v1.16b,v0.16b,v1.16b336sli v2.4s,v13.4s,10337338ushr v0.4s,v12.4s,32-18339eor v3.16b,v2.16b,v3.16b340sli v0.4s,v12.4s,18341ushr v2.4s,v13.4s,32-18342eor v1.16b,v0.16b,v1.16b343sli v2.4s,v13.4s,18344345ushr v0.4s,v12.4s,32-24346eor v3.16b,v2.16b,v3.16b347sli v0.4s,v12.4s,24348ushr v2.4s,v13.4s,32-24349eor v12.16b,v0.16b,v1.16b350sli v2.4s,v13.4s,24351eor v13.16b,v2.16b,v3.16b352eor v4.16b,v4.16b,v12.16b353eor v8.16b,v8.16b,v13.16b354355// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)356dup v13.4s,w8357eor v14.16b,v14.16b,v4.16b358eor v15.16b,v15.16b,v8.16b359eor v12.16b,v14.16b,v13.16b360eor v13.16b,v15.16b,v13.16b361movi v3.16b,#64362sub v0.16b,v12.16b,v3.16b363sub v1.16b,v0.16b,v3.16b364sub v2.16b,v1.16b,v3.16b365tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b366tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b367tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b368tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b369add v1.2d,v0.2d,v1.2d370add v12.2d,v2.2d,v12.2d371add v12.2d,v1.2d,v12.2d372373sub v0.16b,v13.16b,v3.16b374sub v1.16b,v0.16b,v3.16b375sub v2.16b,v1.16b,v3.16b376tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b377tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b378tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b379tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b380add v1.2d,v0.2d,v1.2d381add v13.2d,v2.2d,v13.2d382add v13.2d,v1.2d,v13.2d383384ushr v0.4s,v12.4s,32-2385sli v0.4s,v12.4s,2386ushr v2.4s,v13.4s,32-2387eor v1.16b,v0.16b,v12.16b388sli v2.4s,v13.4s,2389390ushr v0.4s,v12.4s,32-10391eor v3.16b,v2.16b,v13.16b392sli v0.4s,v12.4s,10393ushr v2.4s,v13.4s,32-10394eor v1.16b,v0.16b,v1.16b395sli v2.4s,v13.4s,10396397ushr v0.4s,v12.4s,32-18398eor v3.16b,v2.16b,v3.16b399sli v0.4s,v12.4s,18400ushr v2.4s,v13.4s,32-18401eor v1.16b,v0.16b,v1.16b402sli v2.4s,v13.4s,18403404ushr v0.4s,v12.4s,32-24405eor v3.16b,v2.16b,v3.16b406sli v0.4s,v12.4s,24407ushr v2.4s,v13.4s,32-24408eor v12.16b,v0.16b,v1.16b409sli v2.4s,v13.4s,24410eor v13.16b,v2.16b,v3.16b411ldp w7,w8,[x10],8412eor v5.16b,v5.16b,v12.16b413eor v9.16b,v9.16b,v13.16b414415// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)416dup v12.4s,w7417eor v14.16b,v4.16b,v5.16b418eor v15.16b,v8.16b,v9.16b419eor v0.16b,v7.16b,v12.16b420eor v1.16b,v11.16b,v12.16b421eor v12.16b,v14.16b,v0.16b422eor v13.16b,v15.16b,v1.16b423movi v3.16b,#64424sub v0.16b,v12.16b,v3.16b425sub v1.16b,v0.16b,v3.16b426sub v2.16b,v1.16b,v3.16b427tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b428tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b429tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b430tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b431add v1.2d,v0.2d,v1.2d432add v12.2d,v2.2d,v12.2d433add v12.2d,v1.2d,v12.2d434435sub v0.16b,v13.16b,v3.16b436sub v1.16b,v0.16b,v3.16b437sub v2.16b,v1.16b,v3.16b438tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b439tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b440tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b441tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b442add v1.2d,v0.2d,v1.2d443add v13.2d,v2.2d,v13.2d444add v13.2d,v1.2d,v13.2d445446ushr v0.4s,v12.4s,32-2447sli v0.4s,v12.4s,2448ushr v2.4s,v13.4s,32-2449eor v1.16b,v0.16b,v12.16b450sli v2.4s,v13.4s,2451452ushr v0.4s,v12.4s,32-10453eor v3.16b,v2.16b,v13.16b454sli v0.4s,v12.4s,10455ushr v2.4s,v13.4s,32-10456eor v1.16b,v0.16b,v1.16b457sli v2.4s,v13.4s,10458459ushr v0.4s,v12.4s,32-18460eor v3.16b,v2.16b,v3.16b461sli v0.4s,v12.4s,18462ushr v2.4s,v13.4s,32-18463eor v1.16b,v0.16b,v1.16b464sli v2.4s,v13.4s,18465466ushr v0.4s,v12.4s,32-24467eor v3.16b,v2.16b,v3.16b468sli v0.4s,v12.4s,24469ushr v2.4s,v13.4s,32-24470eor v12.16b,v0.16b,v1.16b471sli v2.4s,v13.4s,24472eor v13.16b,v2.16b,v3.16b473eor v6.16b,v6.16b,v12.16b474eor v10.16b,v10.16b,v13.16b475476// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)477dup v13.4s,w8478eor v14.16b,v14.16b,v6.16b479eor v15.16b,v15.16b,v10.16b480eor v12.16b,v14.16b,v13.16b481eor v13.16b,v15.16b,v13.16b482movi v3.16b,#64483sub v0.16b,v12.16b,v3.16b484sub v1.16b,v0.16b,v3.16b485sub v2.16b,v1.16b,v3.16b486tbl v12.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v12.16b487tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b488tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b489tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b490add v1.2d,v0.2d,v1.2d491add v12.2d,v2.2d,v12.2d492add v12.2d,v1.2d,v12.2d493494sub v0.16b,v13.16b,v3.16b495sub v1.16b,v0.16b,v3.16b496sub v2.16b,v1.16b,v3.16b497tbl v13.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v13.16b498tbl v0.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v0.16b499tbl v1.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v1.16b500tbl v2.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v2.16b501add v1.2d,v0.2d,v1.2d502add v13.2d,v2.2d,v13.2d503add v13.2d,v1.2d,v13.2d504505ushr v0.4s,v12.4s,32-2506sli v0.4s,v12.4s,2507ushr v2.4s,v13.4s,32-2508eor v1.16b,v0.16b,v12.16b509sli v2.4s,v13.4s,2510511ushr v0.4s,v12.4s,32-10512eor v3.16b,v2.16b,v13.16b513sli v0.4s,v12.4s,10514ushr v2.4s,v13.4s,32-10515eor v1.16b,v0.16b,v1.16b516sli v2.4s,v13.4s,10517518ushr v0.4s,v12.4s,32-18519eor v3.16b,v2.16b,v3.16b520sli v0.4s,v12.4s,18521ushr v2.4s,v13.4s,32-18522eor v1.16b,v0.16b,v1.16b523sli v2.4s,v13.4s,18524525ushr v0.4s,v12.4s,32-24526eor v3.16b,v2.16b,v3.16b527sli v0.4s,v12.4s,24528ushr v2.4s,v13.4s,32-24529eor v12.16b,v0.16b,v1.16b530sli v2.4s,v13.4s,24531eor v13.16b,v2.16b,v3.16b532eor v7.16b,v7.16b,v12.16b533eor v11.16b,v11.16b,v13.16b534subs w11,w11,#1535b.ne 10b536#ifndef __AARCH64EB__537rev32 v3.16b,v4.16b538#else539mov v3.16b,v4.16b540#endif541#ifndef __AARCH64EB__542rev32 v2.16b,v5.16b543#else544mov v2.16b,v5.16b545#endif546#ifndef __AARCH64EB__547rev32 v1.16b,v6.16b548#else549mov v1.16b,v6.16b550#endif551#ifndef __AARCH64EB__552rev32 v0.16b,v7.16b553#else554mov v0.16b,v7.16b555#endif556#ifndef __AARCH64EB__557rev32 v7.16b,v8.16b558#else559mov v7.16b,v8.16b560#endif561#ifndef __AARCH64EB__562rev32 v6.16b,v9.16b563#else564mov v6.16b,v9.16b565#endif566#ifndef __AARCH64EB__567rev32 v5.16b,v10.16b568#else569mov v5.16b,v10.16b570#endif571#ifndef __AARCH64EB__572rev32 v4.16b,v11.16b573#else574mov v4.16b,v11.16b575#endif576ret577.size _vpsm4_enc_8blks,.-_vpsm4_enc_8blks578.globl vpsm4_set_encrypt_key579.type vpsm4_set_encrypt_key,%function580.align 5581vpsm4_set_encrypt_key:582AARCH64_SIGN_LINK_REGISTER583stp x29,x30,[sp,#-16]!584mov w2,1585bl _vpsm4_set_key586ldp x29,x30,[sp],#16587AARCH64_VALIDATE_LINK_REGISTER588ret589.size vpsm4_set_encrypt_key,.-vpsm4_set_encrypt_key590.globl vpsm4_set_decrypt_key591.type vpsm4_set_decrypt_key,%function592.align 5593vpsm4_set_decrypt_key:594AARCH64_SIGN_LINK_REGISTER595stp x29,x30,[sp,#-16]!596mov w2,0597bl _vpsm4_set_key598ldp x29,x30,[sp],#16599AARCH64_VALIDATE_LINK_REGISTER600ret601.size vpsm4_set_decrypt_key,.-vpsm4_set_decrypt_key602.globl vpsm4_encrypt603.type vpsm4_encrypt,%function604.align 5605vpsm4_encrypt:606AARCH64_VALID_CALL_TARGET607ld1 {v4.4s},[x0]608adrp x10,.Lsbox609add x10,x10,#:lo12:.Lsbox610ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64611ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64612ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64613ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]614#ifndef __AARCH64EB__615rev32 v4.16b,v4.16b616#endif617mov x3,x2618mov x10,x3619mov w11,#8620mov w12,v4.s[0]621mov w13,v4.s[1]622mov w14,v4.s[2]623mov w15,v4.s[3]62410:625ldp w7,w8,[x10],8626// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)627eor w6,w14,w15628eor w9,w7,w13629eor w6,w6,w9630movi v1.16b,#64631movi v2.16b,#128632movi v3.16b,#192633mov v0.s[0],w6634635sub v1.16b,v0.16b,v1.16b636sub v2.16b,v0.16b,v2.16b637sub v3.16b,v0.16b,v3.16b638639tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b640tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b641tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b642tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b643644mov w6,v0.s[0]645mov w7,v1.s[0]646mov w9,v2.s[0]647add w7,w6,w7648mov w6,v3.s[0]649add w7,w7,w9650add w7,w7,w6651652eor w6,w7,w7,ror #32-2653eor w6,w6,w7,ror #32-10654eor w6,w6,w7,ror #32-18655eor w6,w6,w7,ror #32-24656eor w12,w12,w6657// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)658eor w6,w14,w15659eor w9,w12,w8660eor w6,w6,w9661movi v1.16b,#64662movi v2.16b,#128663movi v3.16b,#192664mov v0.s[0],w6665666sub v1.16b,v0.16b,v1.16b667sub v2.16b,v0.16b,v2.16b668sub v3.16b,v0.16b,v3.16b669670tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b671tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b672tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b673tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b674675mov w6,v0.s[0]676mov w7,v1.s[0]677mov w9,v2.s[0]678add w7,w6,w7679mov w6,v3.s[0]680add w7,w7,w9681add w7,w7,w6682683eor w6,w7,w7,ror #32-2684eor w6,w6,w7,ror #32-10685eor w6,w6,w7,ror #32-18686eor w6,w6,w7,ror #32-24687ldp w7,w8,[x10],8688eor w13,w13,w6689// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)690eor w6,w12,w13691eor w9,w7,w15692eor w6,w6,w9693movi v1.16b,#64694movi v2.16b,#128695movi v3.16b,#192696mov v0.s[0],w6697698sub v1.16b,v0.16b,v1.16b699sub v2.16b,v0.16b,v2.16b700sub v3.16b,v0.16b,v3.16b701702tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b703tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b704tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b705tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b706707mov w6,v0.s[0]708mov w7,v1.s[0]709mov w9,v2.s[0]710add w7,w6,w7711mov w6,v3.s[0]712add w7,w7,w9713add w7,w7,w6714715eor w6,w7,w7,ror #32-2716eor w6,w6,w7,ror #32-10717eor w6,w6,w7,ror #32-18718eor w6,w6,w7,ror #32-24719eor w14,w14,w6720// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)721eor w6,w12,w13722eor w9,w14,w8723eor w6,w6,w9724movi v1.16b,#64725movi v2.16b,#128726movi v3.16b,#192727mov v0.s[0],w6728729sub v1.16b,v0.16b,v1.16b730sub v2.16b,v0.16b,v2.16b731sub v3.16b,v0.16b,v3.16b732733tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b734tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b735tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b736tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b737738mov w6,v0.s[0]739mov w7,v1.s[0]740mov w9,v2.s[0]741add w7,w6,w7742mov w6,v3.s[0]743add w7,w7,w9744add w7,w7,w6745746eor w6,w7,w7,ror #32-2747eor w6,w6,w7,ror #32-10748eor w6,w6,w7,ror #32-18749eor w6,w6,w7,ror #32-24750eor w15,w15,w6751subs w11,w11,#1752b.ne 10b753mov v4.s[0],w15754mov v4.s[1],w14755mov v4.s[2],w13756mov v4.s[3],w12757#ifndef __AARCH64EB__758rev32 v4.16b,v4.16b759#endif760st1 {v4.4s},[x1]761ret762.size vpsm4_encrypt,.-vpsm4_encrypt763.globl vpsm4_decrypt764.type vpsm4_decrypt,%function765.align 5766vpsm4_decrypt:767AARCH64_VALID_CALL_TARGET768ld1 {v4.4s},[x0]769adrp x10,.Lsbox770add x10,x10,#:lo12:.Lsbox771ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64772ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64773ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64774ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]775#ifndef __AARCH64EB__776rev32 v4.16b,v4.16b777#endif778mov x3,x2779mov x10,x3780mov w11,#8781mov w12,v4.s[0]782mov w13,v4.s[1]783mov w14,v4.s[2]784mov w15,v4.s[3]78510:786ldp w7,w8,[x10],8787// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)788eor w6,w14,w15789eor w9,w7,w13790eor w6,w6,w9791movi v1.16b,#64792movi v2.16b,#128793movi v3.16b,#192794mov v0.s[0],w6795796sub v1.16b,v0.16b,v1.16b797sub v2.16b,v0.16b,v2.16b798sub v3.16b,v0.16b,v3.16b799800tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b801tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b802tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b803tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b804805mov w6,v0.s[0]806mov w7,v1.s[0]807mov w9,v2.s[0]808add w7,w6,w7809mov w6,v3.s[0]810add w7,w7,w9811add w7,w7,w6812813eor w6,w7,w7,ror #32-2814eor w6,w6,w7,ror #32-10815eor w6,w6,w7,ror #32-18816eor w6,w6,w7,ror #32-24817eor w12,w12,w6818// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)819eor w6,w14,w15820eor w9,w12,w8821eor w6,w6,w9822movi v1.16b,#64823movi v2.16b,#128824movi v3.16b,#192825mov v0.s[0],w6826827sub v1.16b,v0.16b,v1.16b828sub v2.16b,v0.16b,v2.16b829sub v3.16b,v0.16b,v3.16b830831tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b832tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b833tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b834tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b835836mov w6,v0.s[0]837mov w7,v1.s[0]838mov w9,v2.s[0]839add w7,w6,w7840mov w6,v3.s[0]841add w7,w7,w9842add w7,w7,w6843844eor w6,w7,w7,ror #32-2845eor w6,w6,w7,ror #32-10846eor w6,w6,w7,ror #32-18847eor w6,w6,w7,ror #32-24848ldp w7,w8,[x10],8849eor w13,w13,w6850// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)851eor w6,w12,w13852eor w9,w7,w15853eor w6,w6,w9854movi v1.16b,#64855movi v2.16b,#128856movi v3.16b,#192857mov v0.s[0],w6858859sub v1.16b,v0.16b,v1.16b860sub v2.16b,v0.16b,v2.16b861sub v3.16b,v0.16b,v3.16b862863tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b864tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b865tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b866tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b867868mov w6,v0.s[0]869mov w7,v1.s[0]870mov w9,v2.s[0]871add w7,w6,w7872mov w6,v3.s[0]873add w7,w7,w9874add w7,w7,w6875876eor w6,w7,w7,ror #32-2877eor w6,w6,w7,ror #32-10878eor w6,w6,w7,ror #32-18879eor w6,w6,w7,ror #32-24880eor w14,w14,w6881// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)882eor w6,w12,w13883eor w9,w14,w8884eor w6,w6,w9885movi v1.16b,#64886movi v2.16b,#128887movi v3.16b,#192888mov v0.s[0],w6889890sub v1.16b,v0.16b,v1.16b891sub v2.16b,v0.16b,v2.16b892sub v3.16b,v0.16b,v3.16b893894tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b895tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b896tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b897tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b898899mov w6,v0.s[0]900mov w7,v1.s[0]901mov w9,v2.s[0]902add w7,w6,w7903mov w6,v3.s[0]904add w7,w7,w9905add w7,w7,w6906907eor w6,w7,w7,ror #32-2908eor w6,w6,w7,ror #32-10909eor w6,w6,w7,ror #32-18910eor w6,w6,w7,ror #32-24911eor w15,w15,w6912subs w11,w11,#1913b.ne 10b914mov v4.s[0],w15915mov v4.s[1],w14916mov v4.s[2],w13917mov v4.s[3],w12918#ifndef __AARCH64EB__919rev32 v4.16b,v4.16b920#endif921st1 {v4.4s},[x1]922ret923.size vpsm4_decrypt,.-vpsm4_decrypt924.globl vpsm4_ecb_encrypt925.type vpsm4_ecb_encrypt,%function926.align 5927vpsm4_ecb_encrypt:928AARCH64_SIGN_LINK_REGISTER929// convert length into blocks930lsr x2,x2,4931stp d8,d9,[sp,#-80]!932stp d10,d11,[sp,#16]933stp d12,d13,[sp,#32]934stp d14,d15,[sp,#48]935stp x29,x30,[sp,#64]936adrp x10,.Lsbox937add x10,x10,#:lo12:.Lsbox938ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#64939ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#64940ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#64941ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]942.Lecb_8_blocks_process:943cmp w2,#8944b.lt .Lecb_4_blocks_process945ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64946ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64947#ifndef __AARCH64EB__948rev32 v4.16b,v4.16b949#endif950#ifndef __AARCH64EB__951rev32 v5.16b,v5.16b952#endif953#ifndef __AARCH64EB__954rev32 v6.16b,v6.16b955#endif956#ifndef __AARCH64EB__957rev32 v7.16b,v7.16b958#endif959#ifndef __AARCH64EB__960rev32 v8.16b,v8.16b961#endif962#ifndef __AARCH64EB__963rev32 v9.16b,v9.16b964#endif965#ifndef __AARCH64EB__966rev32 v10.16b,v10.16b967#endif968#ifndef __AARCH64EB__969rev32 v11.16b,v11.16b970#endif971bl _vpsm4_enc_8blks972st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64973st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#64974subs w2,w2,#8975b.gt .Lecb_8_blocks_process976b 100f977.Lecb_4_blocks_process:978cmp w2,#4979b.lt 1f980ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64981#ifndef __AARCH64EB__982rev32 v4.16b,v4.16b983#endif984#ifndef __AARCH64EB__985rev32 v5.16b,v5.16b986#endif987#ifndef __AARCH64EB__988rev32 v6.16b,v6.16b989#endif990#ifndef __AARCH64EB__991rev32 v7.16b,v7.16b992#endif993bl _vpsm4_enc_4blks994st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#64995sub w2,w2,#49961:997// process last block998cmp w2,#1999b.lt 100f1000b.gt 1f1001ld1 {v4.4s},[x0]1002#ifndef __AARCH64EB__1003rev32 v4.16b,v4.16b1004#endif1005mov x10,x31006mov w11,#81007mov w12,v4.s[0]1008mov w13,v4.s[1]1009mov w14,v4.s[2]1010mov w15,v4.s[3]101110:1012ldp w7,w8,[x10],81013// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1014eor w6,w14,w151015eor w9,w7,w131016eor w6,w6,w91017movi v1.16b,#641018movi v2.16b,#1281019movi v3.16b,#1921020mov v0.s[0],w610211022sub v1.16b,v0.16b,v1.16b1023sub v2.16b,v0.16b,v2.16b1024sub v3.16b,v0.16b,v3.16b10251026tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1027tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1028tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1029tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b10301031mov w6,v0.s[0]1032mov w7,v1.s[0]1033mov w9,v2.s[0]1034add w7,w6,w71035mov w6,v3.s[0]1036add w7,w7,w91037add w7,w7,w610381039eor w6,w7,w7,ror #32-21040eor w6,w6,w7,ror #32-101041eor w6,w6,w7,ror #32-181042eor w6,w6,w7,ror #32-241043eor w12,w12,w61044// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1045eor w6,w14,w151046eor w9,w12,w81047eor w6,w6,w91048movi v1.16b,#641049movi v2.16b,#1281050movi v3.16b,#1921051mov v0.s[0],w610521053sub v1.16b,v0.16b,v1.16b1054sub v2.16b,v0.16b,v2.16b1055sub v3.16b,v0.16b,v3.16b10561057tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1058tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1059tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1060tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b10611062mov w6,v0.s[0]1063mov w7,v1.s[0]1064mov w9,v2.s[0]1065add w7,w6,w71066mov w6,v3.s[0]1067add w7,w7,w91068add w7,w7,w610691070eor w6,w7,w7,ror #32-21071eor w6,w6,w7,ror #32-101072eor w6,w6,w7,ror #32-181073eor w6,w6,w7,ror #32-241074ldp w7,w8,[x10],81075eor w13,w13,w61076// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1077eor w6,w12,w131078eor w9,w7,w151079eor w6,w6,w91080movi v1.16b,#641081movi v2.16b,#1281082movi v3.16b,#1921083mov v0.s[0],w610841085sub v1.16b,v0.16b,v1.16b1086sub v2.16b,v0.16b,v2.16b1087sub v3.16b,v0.16b,v3.16b10881089tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1090tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1091tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1092tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b10931094mov w6,v0.s[0]1095mov w7,v1.s[0]1096mov w9,v2.s[0]1097add w7,w6,w71098mov w6,v3.s[0]1099add w7,w7,w91100add w7,w7,w611011102eor w6,w7,w7,ror #32-21103eor w6,w6,w7,ror #32-101104eor w6,w6,w7,ror #32-181105eor w6,w6,w7,ror #32-241106eor w14,w14,w61107// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1108eor w6,w12,w131109eor w9,w14,w81110eor w6,w6,w91111movi v1.16b,#641112movi v2.16b,#1281113movi v3.16b,#1921114mov v0.s[0],w611151116sub v1.16b,v0.16b,v1.16b1117sub v2.16b,v0.16b,v2.16b1118sub v3.16b,v0.16b,v3.16b11191120tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1121tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1122tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1123tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b11241125mov w6,v0.s[0]1126mov w7,v1.s[0]1127mov w9,v2.s[0]1128add w7,w6,w71129mov w6,v3.s[0]1130add w7,w7,w91131add w7,w7,w611321133eor w6,w7,w7,ror #32-21134eor w6,w6,w7,ror #32-101135eor w6,w6,w7,ror #32-181136eor w6,w6,w7,ror #32-241137eor w15,w15,w61138subs w11,w11,#11139b.ne 10b1140mov v4.s[0],w151141mov v4.s[1],w141142mov v4.s[2],w131143mov v4.s[3],w121144#ifndef __AARCH64EB__1145rev32 v4.16b,v4.16b1146#endif1147st1 {v4.4s},[x1]1148b 100f11491: // process last 2 blocks1150ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0],#161151ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x0],#161152cmp w2,#21153b.gt 1f1154#ifndef __AARCH64EB__1155rev32 v4.16b,v4.16b1156#endif1157#ifndef __AARCH64EB__1158rev32 v5.16b,v5.16b1159#endif1160#ifndef __AARCH64EB__1161rev32 v6.16b,v6.16b1162#endif1163#ifndef __AARCH64EB__1164rev32 v7.16b,v7.16b1165#endif1166bl _vpsm4_enc_4blks1167st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#161168st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1]1169b 100f11701: // process last 3 blocks1171ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x0],#161172#ifndef __AARCH64EB__1173rev32 v4.16b,v4.16b1174#endif1175#ifndef __AARCH64EB__1176rev32 v5.16b,v5.16b1177#endif1178#ifndef __AARCH64EB__1179rev32 v6.16b,v6.16b1180#endif1181#ifndef __AARCH64EB__1182rev32 v7.16b,v7.16b1183#endif1184bl _vpsm4_enc_4blks1185st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#161186st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#161187st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1]1188100:1189ldp d10,d11,[sp,#16]1190ldp d12,d13,[sp,#32]1191ldp d14,d15,[sp,#48]1192ldp x29,x30,[sp,#64]1193ldp d8,d9,[sp],#801194AARCH64_VALIDATE_LINK_REGISTER1195ret1196.size vpsm4_ecb_encrypt,.-vpsm4_ecb_encrypt1197.globl vpsm4_cbc_encrypt1198.type vpsm4_cbc_encrypt,%function1199.align 51200vpsm4_cbc_encrypt:1201AARCH64_VALID_CALL_TARGET1202lsr x2,x2,41203adrp x10,.Lsbox1204add x10,x10,#:lo12:.Lsbox1205ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#641206ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#641207ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#641208ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]1209cbz w5,.Ldec1210ld1 {v3.4s},[x4]1211.Lcbc_4_blocks_enc:1212cmp w2,#41213b.lt 1f1214ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#641215eor v4.16b,v4.16b,v3.16b1216#ifndef __AARCH64EB__1217rev32 v5.16b,v5.16b1218#endif1219#ifndef __AARCH64EB__1220rev32 v4.16b,v4.16b1221#endif1222#ifndef __AARCH64EB__1223rev32 v6.16b,v6.16b1224#endif1225#ifndef __AARCH64EB__1226rev32 v7.16b,v7.16b1227#endif1228mov x10,x31229mov w11,#81230mov w12,v4.s[0]1231mov w13,v4.s[1]1232mov w14,v4.s[2]1233mov w15,v4.s[3]123410:1235ldp w7,w8,[x10],81236// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1237eor w6,w14,w151238eor w9,w7,w131239eor w6,w6,w91240movi v1.16b,#641241movi v2.16b,#1281242movi v3.16b,#1921243mov v0.s[0],w612441245sub v1.16b,v0.16b,v1.16b1246sub v2.16b,v0.16b,v2.16b1247sub v3.16b,v0.16b,v3.16b12481249tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1250tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1251tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1252tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b12531254mov w6,v0.s[0]1255mov w7,v1.s[0]1256mov w9,v2.s[0]1257add w7,w6,w71258mov w6,v3.s[0]1259add w7,w7,w91260add w7,w7,w612611262eor w6,w7,w7,ror #32-21263eor w6,w6,w7,ror #32-101264eor w6,w6,w7,ror #32-181265eor w6,w6,w7,ror #32-241266eor w12,w12,w61267// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1268eor w6,w14,w151269eor w9,w12,w81270eor w6,w6,w91271movi v1.16b,#641272movi v2.16b,#1281273movi v3.16b,#1921274mov v0.s[0],w612751276sub v1.16b,v0.16b,v1.16b1277sub v2.16b,v0.16b,v2.16b1278sub v3.16b,v0.16b,v3.16b12791280tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1281tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1282tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1283tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b12841285mov w6,v0.s[0]1286mov w7,v1.s[0]1287mov w9,v2.s[0]1288add w7,w6,w71289mov w6,v3.s[0]1290add w7,w7,w91291add w7,w7,w612921293eor w6,w7,w7,ror #32-21294eor w6,w6,w7,ror #32-101295eor w6,w6,w7,ror #32-181296eor w6,w6,w7,ror #32-241297ldp w7,w8,[x10],81298eor w13,w13,w61299// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1300eor w6,w12,w131301eor w9,w7,w151302eor w6,w6,w91303movi v1.16b,#641304movi v2.16b,#1281305movi v3.16b,#1921306mov v0.s[0],w613071308sub v1.16b,v0.16b,v1.16b1309sub v2.16b,v0.16b,v2.16b1310sub v3.16b,v0.16b,v3.16b13111312tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1313tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1314tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1315tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b13161317mov w6,v0.s[0]1318mov w7,v1.s[0]1319mov w9,v2.s[0]1320add w7,w6,w71321mov w6,v3.s[0]1322add w7,w7,w91323add w7,w7,w613241325eor w6,w7,w7,ror #32-21326eor w6,w6,w7,ror #32-101327eor w6,w6,w7,ror #32-181328eor w6,w6,w7,ror #32-241329eor w14,w14,w61330// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1331eor w6,w12,w131332eor w9,w14,w81333eor w6,w6,w91334movi v1.16b,#641335movi v2.16b,#1281336movi v3.16b,#1921337mov v0.s[0],w613381339sub v1.16b,v0.16b,v1.16b1340sub v2.16b,v0.16b,v2.16b1341sub v3.16b,v0.16b,v3.16b13421343tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1344tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1345tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1346tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b13471348mov w6,v0.s[0]1349mov w7,v1.s[0]1350mov w9,v2.s[0]1351add w7,w6,w71352mov w6,v3.s[0]1353add w7,w7,w91354add w7,w7,w613551356eor w6,w7,w7,ror #32-21357eor w6,w6,w7,ror #32-101358eor w6,w6,w7,ror #32-181359eor w6,w6,w7,ror #32-241360eor w15,w15,w61361subs w11,w11,#11362b.ne 10b1363mov v4.s[0],w151364mov v4.s[1],w141365mov v4.s[2],w131366mov v4.s[3],w121367eor v5.16b,v5.16b,v4.16b1368mov x10,x31369mov w11,#81370mov w12,v5.s[0]1371mov w13,v5.s[1]1372mov w14,v5.s[2]1373mov w15,v5.s[3]137410:1375ldp w7,w8,[x10],81376// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1377eor w6,w14,w151378eor w9,w7,w131379eor w6,w6,w91380movi v1.16b,#641381movi v2.16b,#1281382movi v3.16b,#1921383mov v0.s[0],w613841385sub v1.16b,v0.16b,v1.16b1386sub v2.16b,v0.16b,v2.16b1387sub v3.16b,v0.16b,v3.16b13881389tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1390tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1391tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1392tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b13931394mov w6,v0.s[0]1395mov w7,v1.s[0]1396mov w9,v2.s[0]1397add w7,w6,w71398mov w6,v3.s[0]1399add w7,w7,w91400add w7,w7,w614011402eor w6,w7,w7,ror #32-21403eor w6,w6,w7,ror #32-101404eor w6,w6,w7,ror #32-181405eor w6,w6,w7,ror #32-241406eor w12,w12,w61407// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1408eor w6,w14,w151409eor w9,w12,w81410eor w6,w6,w91411movi v1.16b,#641412movi v2.16b,#1281413movi v3.16b,#1921414mov v0.s[0],w614151416sub v1.16b,v0.16b,v1.16b1417sub v2.16b,v0.16b,v2.16b1418sub v3.16b,v0.16b,v3.16b14191420tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1421tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1422tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1423tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b14241425mov w6,v0.s[0]1426mov w7,v1.s[0]1427mov w9,v2.s[0]1428add w7,w6,w71429mov w6,v3.s[0]1430add w7,w7,w91431add w7,w7,w614321433eor w6,w7,w7,ror #32-21434eor w6,w6,w7,ror #32-101435eor w6,w6,w7,ror #32-181436eor w6,w6,w7,ror #32-241437ldp w7,w8,[x10],81438eor w13,w13,w61439// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1440eor w6,w12,w131441eor w9,w7,w151442eor w6,w6,w91443movi v1.16b,#641444movi v2.16b,#1281445movi v3.16b,#1921446mov v0.s[0],w614471448sub v1.16b,v0.16b,v1.16b1449sub v2.16b,v0.16b,v2.16b1450sub v3.16b,v0.16b,v3.16b14511452tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1453tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1454tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1455tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b14561457mov w6,v0.s[0]1458mov w7,v1.s[0]1459mov w9,v2.s[0]1460add w7,w6,w71461mov w6,v3.s[0]1462add w7,w7,w91463add w7,w7,w614641465eor w6,w7,w7,ror #32-21466eor w6,w6,w7,ror #32-101467eor w6,w6,w7,ror #32-181468eor w6,w6,w7,ror #32-241469eor w14,w14,w61470// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1471eor w6,w12,w131472eor w9,w14,w81473eor w6,w6,w91474movi v1.16b,#641475movi v2.16b,#1281476movi v3.16b,#1921477mov v0.s[0],w614781479sub v1.16b,v0.16b,v1.16b1480sub v2.16b,v0.16b,v2.16b1481sub v3.16b,v0.16b,v3.16b14821483tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1484tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1485tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1486tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b14871488mov w6,v0.s[0]1489mov w7,v1.s[0]1490mov w9,v2.s[0]1491add w7,w6,w71492mov w6,v3.s[0]1493add w7,w7,w91494add w7,w7,w614951496eor w6,w7,w7,ror #32-21497eor w6,w6,w7,ror #32-101498eor w6,w6,w7,ror #32-181499eor w6,w6,w7,ror #32-241500eor w15,w15,w61501subs w11,w11,#11502b.ne 10b1503mov v5.s[0],w151504mov v5.s[1],w141505mov v5.s[2],w131506mov v5.s[3],w121507#ifndef __AARCH64EB__1508rev32 v4.16b,v4.16b1509#endif1510eor v6.16b,v6.16b,v5.16b1511mov x10,x31512mov w11,#81513mov w12,v6.s[0]1514mov w13,v6.s[1]1515mov w14,v6.s[2]1516mov w15,v6.s[3]151710:1518ldp w7,w8,[x10],81519// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1520eor w6,w14,w151521eor w9,w7,w131522eor w6,w6,w91523movi v1.16b,#641524movi v2.16b,#1281525movi v3.16b,#1921526mov v0.s[0],w615271528sub v1.16b,v0.16b,v1.16b1529sub v2.16b,v0.16b,v2.16b1530sub v3.16b,v0.16b,v3.16b15311532tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1533tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1534tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1535tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b15361537mov w6,v0.s[0]1538mov w7,v1.s[0]1539mov w9,v2.s[0]1540add w7,w6,w71541mov w6,v3.s[0]1542add w7,w7,w91543add w7,w7,w615441545eor w6,w7,w7,ror #32-21546eor w6,w6,w7,ror #32-101547eor w6,w6,w7,ror #32-181548eor w6,w6,w7,ror #32-241549eor w12,w12,w61550// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1551eor w6,w14,w151552eor w9,w12,w81553eor w6,w6,w91554movi v1.16b,#641555movi v2.16b,#1281556movi v3.16b,#1921557mov v0.s[0],w615581559sub v1.16b,v0.16b,v1.16b1560sub v2.16b,v0.16b,v2.16b1561sub v3.16b,v0.16b,v3.16b15621563tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1564tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1565tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1566tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b15671568mov w6,v0.s[0]1569mov w7,v1.s[0]1570mov w9,v2.s[0]1571add w7,w6,w71572mov w6,v3.s[0]1573add w7,w7,w91574add w7,w7,w615751576eor w6,w7,w7,ror #32-21577eor w6,w6,w7,ror #32-101578eor w6,w6,w7,ror #32-181579eor w6,w6,w7,ror #32-241580ldp w7,w8,[x10],81581eor w13,w13,w61582// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1583eor w6,w12,w131584eor w9,w7,w151585eor w6,w6,w91586movi v1.16b,#641587movi v2.16b,#1281588movi v3.16b,#1921589mov v0.s[0],w615901591sub v1.16b,v0.16b,v1.16b1592sub v2.16b,v0.16b,v2.16b1593sub v3.16b,v0.16b,v3.16b15941595tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1596tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1597tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1598tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b15991600mov w6,v0.s[0]1601mov w7,v1.s[0]1602mov w9,v2.s[0]1603add w7,w6,w71604mov w6,v3.s[0]1605add w7,w7,w91606add w7,w7,w616071608eor w6,w7,w7,ror #32-21609eor w6,w6,w7,ror #32-101610eor w6,w6,w7,ror #32-181611eor w6,w6,w7,ror #32-241612eor w14,w14,w61613// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1614eor w6,w12,w131615eor w9,w14,w81616eor w6,w6,w91617movi v1.16b,#641618movi v2.16b,#1281619movi v3.16b,#1921620mov v0.s[0],w616211622sub v1.16b,v0.16b,v1.16b1623sub v2.16b,v0.16b,v2.16b1624sub v3.16b,v0.16b,v3.16b16251626tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1627tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1628tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1629tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b16301631mov w6,v0.s[0]1632mov w7,v1.s[0]1633mov w9,v2.s[0]1634add w7,w6,w71635mov w6,v3.s[0]1636add w7,w7,w91637add w7,w7,w616381639eor w6,w7,w7,ror #32-21640eor w6,w6,w7,ror #32-101641eor w6,w6,w7,ror #32-181642eor w6,w6,w7,ror #32-241643eor w15,w15,w61644subs w11,w11,#11645b.ne 10b1646mov v6.s[0],w151647mov v6.s[1],w141648mov v6.s[2],w131649mov v6.s[3],w121650#ifndef __AARCH64EB__1651rev32 v5.16b,v5.16b1652#endif1653eor v7.16b,v7.16b,v6.16b1654mov x10,x31655mov w11,#81656mov w12,v7.s[0]1657mov w13,v7.s[1]1658mov w14,v7.s[2]1659mov w15,v7.s[3]166010:1661ldp w7,w8,[x10],81662// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1663eor w6,w14,w151664eor w9,w7,w131665eor w6,w6,w91666movi v1.16b,#641667movi v2.16b,#1281668movi v3.16b,#1921669mov v0.s[0],w616701671sub v1.16b,v0.16b,v1.16b1672sub v2.16b,v0.16b,v2.16b1673sub v3.16b,v0.16b,v3.16b16741675tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1676tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1677tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1678tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b16791680mov w6,v0.s[0]1681mov w7,v1.s[0]1682mov w9,v2.s[0]1683add w7,w6,w71684mov w6,v3.s[0]1685add w7,w7,w91686add w7,w7,w616871688eor w6,w7,w7,ror #32-21689eor w6,w6,w7,ror #32-101690eor w6,w6,w7,ror #32-181691eor w6,w6,w7,ror #32-241692eor w12,w12,w61693// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1694eor w6,w14,w151695eor w9,w12,w81696eor w6,w6,w91697movi v1.16b,#641698movi v2.16b,#1281699movi v3.16b,#1921700mov v0.s[0],w617011702sub v1.16b,v0.16b,v1.16b1703sub v2.16b,v0.16b,v2.16b1704sub v3.16b,v0.16b,v3.16b17051706tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1707tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1708tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1709tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b17101711mov w6,v0.s[0]1712mov w7,v1.s[0]1713mov w9,v2.s[0]1714add w7,w6,w71715mov w6,v3.s[0]1716add w7,w7,w91717add w7,w7,w617181719eor w6,w7,w7,ror #32-21720eor w6,w6,w7,ror #32-101721eor w6,w6,w7,ror #32-181722eor w6,w6,w7,ror #32-241723ldp w7,w8,[x10],81724eor w13,w13,w61725// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1726eor w6,w12,w131727eor w9,w7,w151728eor w6,w6,w91729movi v1.16b,#641730movi v2.16b,#1281731movi v3.16b,#1921732mov v0.s[0],w617331734sub v1.16b,v0.16b,v1.16b1735sub v2.16b,v0.16b,v2.16b1736sub v3.16b,v0.16b,v3.16b17371738tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1739tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1740tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1741tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b17421743mov w6,v0.s[0]1744mov w7,v1.s[0]1745mov w9,v2.s[0]1746add w7,w6,w71747mov w6,v3.s[0]1748add w7,w7,w91749add w7,w7,w617501751eor w6,w7,w7,ror #32-21752eor w6,w6,w7,ror #32-101753eor w6,w6,w7,ror #32-181754eor w6,w6,w7,ror #32-241755eor w14,w14,w61756// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1757eor w6,w12,w131758eor w9,w14,w81759eor w6,w6,w91760movi v1.16b,#641761movi v2.16b,#1281762movi v3.16b,#1921763mov v0.s[0],w617641765sub v1.16b,v0.16b,v1.16b1766sub v2.16b,v0.16b,v2.16b1767sub v3.16b,v0.16b,v3.16b17681769tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1770tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1771tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1772tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b17731774mov w6,v0.s[0]1775mov w7,v1.s[0]1776mov w9,v2.s[0]1777add w7,w6,w71778mov w6,v3.s[0]1779add w7,w7,w91780add w7,w7,w617811782eor w6,w7,w7,ror #32-21783eor w6,w6,w7,ror #32-101784eor w6,w6,w7,ror #32-181785eor w6,w6,w7,ror #32-241786eor w15,w15,w61787subs w11,w11,#11788b.ne 10b1789mov v7.s[0],w151790mov v7.s[1],w141791mov v7.s[2],w131792mov v7.s[3],w121793#ifndef __AARCH64EB__1794rev32 v6.16b,v6.16b1795#endif1796#ifndef __AARCH64EB__1797rev32 v7.16b,v7.16b1798#endif1799orr v3.16b,v7.16b,v7.16b1800st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#641801subs w2,w2,#41802b.ne .Lcbc_4_blocks_enc1803b 2f18041:1805subs w2,w2,#11806b.lt 2f1807ld1 {v4.4s},[x0],#161808eor v3.16b,v3.16b,v4.16b1809#ifndef __AARCH64EB__1810rev32 v3.16b,v3.16b1811#endif1812mov x10,x31813mov w11,#81814mov w12,v3.s[0]1815mov w13,v3.s[1]1816mov w14,v3.s[2]1817mov w15,v3.s[3]181810:1819ldp w7,w8,[x10],81820// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)1821eor w6,w14,w151822eor w9,w7,w131823eor w6,w6,w91824movi v1.16b,#641825movi v2.16b,#1281826movi v3.16b,#1921827mov v0.s[0],w618281829sub v1.16b,v0.16b,v1.16b1830sub v2.16b,v0.16b,v2.16b1831sub v3.16b,v0.16b,v3.16b18321833tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1834tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1835tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1836tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b18371838mov w6,v0.s[0]1839mov w7,v1.s[0]1840mov w9,v2.s[0]1841add w7,w6,w71842mov w6,v3.s[0]1843add w7,w7,w91844add w7,w7,w618451846eor w6,w7,w7,ror #32-21847eor w6,w6,w7,ror #32-101848eor w6,w6,w7,ror #32-181849eor w6,w6,w7,ror #32-241850eor w12,w12,w61851// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)1852eor w6,w14,w151853eor w9,w12,w81854eor w6,w6,w91855movi v1.16b,#641856movi v2.16b,#1281857movi v3.16b,#1921858mov v0.s[0],w618591860sub v1.16b,v0.16b,v1.16b1861sub v2.16b,v0.16b,v2.16b1862sub v3.16b,v0.16b,v3.16b18631864tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1865tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1866tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1867tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b18681869mov w6,v0.s[0]1870mov w7,v1.s[0]1871mov w9,v2.s[0]1872add w7,w6,w71873mov w6,v3.s[0]1874add w7,w7,w91875add w7,w7,w618761877eor w6,w7,w7,ror #32-21878eor w6,w6,w7,ror #32-101879eor w6,w6,w7,ror #32-181880eor w6,w6,w7,ror #32-241881ldp w7,w8,[x10],81882eor w13,w13,w61883// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)1884eor w6,w12,w131885eor w9,w7,w151886eor w6,w6,w91887movi v1.16b,#641888movi v2.16b,#1281889movi v3.16b,#1921890mov v0.s[0],w618911892sub v1.16b,v0.16b,v1.16b1893sub v2.16b,v0.16b,v2.16b1894sub v3.16b,v0.16b,v3.16b18951896tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1897tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1898tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1899tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b19001901mov w6,v0.s[0]1902mov w7,v1.s[0]1903mov w9,v2.s[0]1904add w7,w6,w71905mov w6,v3.s[0]1906add w7,w7,w91907add w7,w7,w619081909eor w6,w7,w7,ror #32-21910eor w6,w6,w7,ror #32-101911eor w6,w6,w7,ror #32-181912eor w6,w6,w7,ror #32-241913eor w14,w14,w61914// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)1915eor w6,w12,w131916eor w9,w14,w81917eor w6,w6,w91918movi v1.16b,#641919movi v2.16b,#1281920movi v3.16b,#1921921mov v0.s[0],w619221923sub v1.16b,v0.16b,v1.16b1924sub v2.16b,v0.16b,v2.16b1925sub v3.16b,v0.16b,v3.16b19261927tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b1928tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b1929tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b1930tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b19311932mov w6,v0.s[0]1933mov w7,v1.s[0]1934mov w9,v2.s[0]1935add w7,w6,w71936mov w6,v3.s[0]1937add w7,w7,w91938add w7,w7,w619391940eor w6,w7,w7,ror #32-21941eor w6,w6,w7,ror #32-101942eor w6,w6,w7,ror #32-181943eor w6,w6,w7,ror #32-241944eor w15,w15,w61945subs w11,w11,#11946b.ne 10b1947mov v3.s[0],w151948mov v3.s[1],w141949mov v3.s[2],w131950mov v3.s[3],w121951#ifndef __AARCH64EB__1952rev32 v3.16b,v3.16b1953#endif1954st1 {v3.4s},[x1],#161955b 1b19562:1957// save back IV1958st1 {v3.4s},[x4]1959ret19601961.Ldec:1962// decryption mode starts1963AARCH64_SIGN_LINK_REGISTER1964stp d8,d9,[sp,#-80]!1965stp d10,d11,[sp,#16]1966stp d12,d13,[sp,#32]1967stp d14,d15,[sp,#48]1968stp x29,x30,[sp,#64]1969.Lcbc_8_blocks_dec:1970cmp w2,#81971b.lt 1f1972ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0]1973add x10,x0,#641974ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x10]1975#ifndef __AARCH64EB__1976rev32 v4.16b,v4.16b1977#endif1978#ifndef __AARCH64EB__1979rev32 v5.16b,v5.16b1980#endif1981#ifndef __AARCH64EB__1982rev32 v6.16b,v6.16b1983#endif1984#ifndef __AARCH64EB__1985rev32 v7.16b,v7.16b1986#endif1987#ifndef __AARCH64EB__1988rev32 v8.16b,v8.16b1989#endif1990#ifndef __AARCH64EB__1991rev32 v9.16b,v9.16b1992#endif1993#ifndef __AARCH64EB__1994rev32 v10.16b,v10.16b1995#endif1996#ifndef __AARCH64EB__1997rev32 v11.16b,v11.16b1998#endif1999bl _vpsm4_enc_8blks2000zip1 v8.4s,v0.4s,v1.4s2001zip2 v9.4s,v0.4s,v1.4s2002zip1 v10.4s,v2.4s,v3.4s2003zip2 v11.4s,v2.4s,v3.4s2004zip1 v0.2d,v8.2d,v10.2d2005zip2 v1.2d,v8.2d,v10.2d2006zip1 v2.2d,v9.2d,v11.2d2007zip2 v3.2d,v9.2d,v11.2d2008zip1 v8.4s,v4.4s,v5.4s2009zip2 v9.4s,v4.4s,v5.4s2010zip1 v10.4s,v6.4s,v7.4s2011zip2 v11.4s,v6.4s,v7.4s2012zip1 v4.2d,v8.2d,v10.2d2013zip2 v5.2d,v8.2d,v10.2d2014zip1 v6.2d,v9.2d,v11.2d2015zip2 v7.2d,v9.2d,v11.2d2016ld1 {v15.4s},[x4]2017ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#642018// note ivec1 and vtmpx[3] are reusing the same register2019// care needs to be taken to avoid conflict2020eor v0.16b,v0.16b,v15.16b2021ld1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#642022eor v1.16b,v1.16b,v8.16b2023eor v2.16b,v2.16b,v9.16b2024eor v3.16b,v3.16b,v10.16b2025// save back IV2026st1 {v15.4s}, [x4]2027eor v4.16b,v4.16b,v11.16b2028eor v5.16b,v5.16b,v12.16b2029eor v6.16b,v6.16b,v13.16b2030eor v7.16b,v7.16b,v14.16b2031st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#642032st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#642033subs w2,w2,#82034b.gt .Lcbc_8_blocks_dec2035b.eq 100f20361:2037ld1 {v15.4s},[x4]2038.Lcbc_4_blocks_dec:2039cmp w2,#42040b.lt 1f2041ld4 {v4.4s,v5.4s,v6.4s,v7.4s},[x0]2042#ifndef __AARCH64EB__2043rev32 v4.16b,v4.16b2044#endif2045#ifndef __AARCH64EB__2046rev32 v5.16b,v5.16b2047#endif2048#ifndef __AARCH64EB__2049rev32 v6.16b,v6.16b2050#endif2051#ifndef __AARCH64EB__2052rev32 v7.16b,v7.16b2053#endif2054bl _vpsm4_enc_4blks2055ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#642056zip1 v8.4s,v0.4s,v1.4s2057zip2 v9.4s,v0.4s,v1.4s2058zip1 v10.4s,v2.4s,v3.4s2059zip2 v11.4s,v2.4s,v3.4s2060zip1 v0.2d,v8.2d,v10.2d2061zip2 v1.2d,v8.2d,v10.2d2062zip1 v2.2d,v9.2d,v11.2d2063zip2 v3.2d,v9.2d,v11.2d2064eor v0.16b,v0.16b,v15.16b2065eor v1.16b,v1.16b,v4.16b2066orr v15.16b,v7.16b,v7.16b2067eor v2.16b,v2.16b,v5.16b2068eor v3.16b,v3.16b,v6.16b2069st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#642070subs w2,w2,#42071b.gt .Lcbc_4_blocks_dec2072// save back IV2073st1 {v7.4s}, [x4]2074b 100f20751: // last block2076subs w2,w2,#12077b.lt 100f2078b.gt 1f2079ld1 {v4.4s},[x0],#162080// save back IV2081st1 {v4.4s}, [x4]2082#ifndef __AARCH64EB__2083rev32 v8.16b,v4.16b2084#else2085mov v8.16b,v4.16b2086#endif2087mov x10,x32088mov w11,#82089mov w12,v8.s[0]2090mov w13,v8.s[1]2091mov w14,v8.s[2]2092mov w15,v8.s[3]209310:2094ldp w7,w8,[x10],82095// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)2096eor w6,w14,w152097eor w9,w7,w132098eor w6,w6,w92099movi v1.16b,#642100movi v2.16b,#1282101movi v3.16b,#1922102mov v0.s[0],w621032104sub v1.16b,v0.16b,v1.16b2105sub v2.16b,v0.16b,v2.16b2106sub v3.16b,v0.16b,v3.16b21072108tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2109tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2110tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2111tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b21122113mov w6,v0.s[0]2114mov w7,v1.s[0]2115mov w9,v2.s[0]2116add w7,w6,w72117mov w6,v3.s[0]2118add w7,w7,w92119add w7,w7,w621202121eor w6,w7,w7,ror #32-22122eor w6,w6,w7,ror #32-102123eor w6,w6,w7,ror #32-182124eor w6,w6,w7,ror #32-242125eor w12,w12,w62126// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)2127eor w6,w14,w152128eor w9,w12,w82129eor w6,w6,w92130movi v1.16b,#642131movi v2.16b,#1282132movi v3.16b,#1922133mov v0.s[0],w621342135sub v1.16b,v0.16b,v1.16b2136sub v2.16b,v0.16b,v2.16b2137sub v3.16b,v0.16b,v3.16b21382139tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2140tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2141tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2142tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b21432144mov w6,v0.s[0]2145mov w7,v1.s[0]2146mov w9,v2.s[0]2147add w7,w6,w72148mov w6,v3.s[0]2149add w7,w7,w92150add w7,w7,w621512152eor w6,w7,w7,ror #32-22153eor w6,w6,w7,ror #32-102154eor w6,w6,w7,ror #32-182155eor w6,w6,w7,ror #32-242156ldp w7,w8,[x10],82157eor w13,w13,w62158// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)2159eor w6,w12,w132160eor w9,w7,w152161eor w6,w6,w92162movi v1.16b,#642163movi v2.16b,#1282164movi v3.16b,#1922165mov v0.s[0],w621662167sub v1.16b,v0.16b,v1.16b2168sub v2.16b,v0.16b,v2.16b2169sub v3.16b,v0.16b,v3.16b21702171tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2172tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2173tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2174tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b21752176mov w6,v0.s[0]2177mov w7,v1.s[0]2178mov w9,v2.s[0]2179add w7,w6,w72180mov w6,v3.s[0]2181add w7,w7,w92182add w7,w7,w621832184eor w6,w7,w7,ror #32-22185eor w6,w6,w7,ror #32-102186eor w6,w6,w7,ror #32-182187eor w6,w6,w7,ror #32-242188eor w14,w14,w62189// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)2190eor w6,w12,w132191eor w9,w14,w82192eor w6,w6,w92193movi v1.16b,#642194movi v2.16b,#1282195movi v3.16b,#1922196mov v0.s[0],w621972198sub v1.16b,v0.16b,v1.16b2199sub v2.16b,v0.16b,v2.16b2200sub v3.16b,v0.16b,v3.16b22012202tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2203tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2204tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2205tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b22062207mov w6,v0.s[0]2208mov w7,v1.s[0]2209mov w9,v2.s[0]2210add w7,w6,w72211mov w6,v3.s[0]2212add w7,w7,w92213add w7,w7,w622142215eor w6,w7,w7,ror #32-22216eor w6,w6,w7,ror #32-102217eor w6,w6,w7,ror #32-182218eor w6,w6,w7,ror #32-242219eor w15,w15,w62220subs w11,w11,#12221b.ne 10b2222mov v8.s[0],w152223mov v8.s[1],w142224mov v8.s[2],w132225mov v8.s[3],w122226#ifndef __AARCH64EB__2227rev32 v8.16b,v8.16b2228#endif2229eor v8.16b,v8.16b,v15.16b2230st1 {v8.4s},[x1],#162231b 100f22321: // last two blocks2233ld4 {v4.s,v5.s,v6.s,v7.s}[0],[x0]2234add x10,x0,#162235ld4 {v4.s,v5.s,v6.s,v7.s}[1],[x10],#162236subs w2,w2,12237b.gt 1f2238#ifndef __AARCH64EB__2239rev32 v4.16b,v4.16b2240#endif2241#ifndef __AARCH64EB__2242rev32 v5.16b,v5.16b2243#endif2244#ifndef __AARCH64EB__2245rev32 v6.16b,v6.16b2246#endif2247#ifndef __AARCH64EB__2248rev32 v7.16b,v7.16b2249#endif2250bl _vpsm4_enc_4blks2251ld1 {v4.4s,v5.4s},[x0],#322252zip1 v8.4s,v0.4s,v1.4s2253zip2 v9.4s,v0.4s,v1.4s2254zip1 v10.4s,v2.4s,v3.4s2255zip2 v11.4s,v2.4s,v3.4s2256zip1 v0.2d,v8.2d,v10.2d2257zip2 v1.2d,v8.2d,v10.2d2258zip1 v2.2d,v9.2d,v11.2d2259zip2 v3.2d,v9.2d,v11.2d2260eor v0.16b,v0.16b,v15.16b2261eor v1.16b,v1.16b,v4.16b2262st1 {v0.4s,v1.4s},[x1],#322263// save back IV2264st1 {v5.4s}, [x4]2265b 100f22661: // last 3 blocks2267ld4 {v4.s,v5.s,v6.s,v7.s}[2],[x10]2268#ifndef __AARCH64EB__2269rev32 v4.16b,v4.16b2270#endif2271#ifndef __AARCH64EB__2272rev32 v5.16b,v5.16b2273#endif2274#ifndef __AARCH64EB__2275rev32 v6.16b,v6.16b2276#endif2277#ifndef __AARCH64EB__2278rev32 v7.16b,v7.16b2279#endif2280bl _vpsm4_enc_4blks2281ld1 {v4.4s,v5.4s,v6.4s},[x0],#482282zip1 v8.4s,v0.4s,v1.4s2283zip2 v9.4s,v0.4s,v1.4s2284zip1 v10.4s,v2.4s,v3.4s2285zip2 v11.4s,v2.4s,v3.4s2286zip1 v0.2d,v8.2d,v10.2d2287zip2 v1.2d,v8.2d,v10.2d2288zip1 v2.2d,v9.2d,v11.2d2289zip2 v3.2d,v9.2d,v11.2d2290eor v0.16b,v0.16b,v15.16b2291eor v1.16b,v1.16b,v4.16b2292eor v2.16b,v2.16b,v5.16b2293st1 {v0.4s,v1.4s,v2.4s},[x1],#482294// save back IV2295st1 {v6.4s}, [x4]2296100:2297ldp d10,d11,[sp,#16]2298ldp d12,d13,[sp,#32]2299ldp d14,d15,[sp,#48]2300ldp x29,x30,[sp,#64]2301ldp d8,d9,[sp],#802302AARCH64_VALIDATE_LINK_REGISTER2303ret2304.size vpsm4_cbc_encrypt,.-vpsm4_cbc_encrypt2305.globl vpsm4_ctr32_encrypt_blocks2306.type vpsm4_ctr32_encrypt_blocks,%function2307.align 52308vpsm4_ctr32_encrypt_blocks:2309AARCH64_VALID_CALL_TARGET2310ld1 {v3.4s},[x4]2311#ifndef __AARCH64EB__2312rev32 v3.16b,v3.16b2313#endif2314adrp x10,.Lsbox2315add x10,x10,#:lo12:.Lsbox2316ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#642317ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#642318ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#642319ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]2320cmp w2,#12321b.ne 1f2322// fast processing for one single block without2323// context saving overhead2324mov x10,x32325mov w11,#82326mov w12,v3.s[0]2327mov w13,v3.s[1]2328mov w14,v3.s[2]2329mov w15,v3.s[3]233010:2331ldp w7,w8,[x10],82332// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)2333eor w6,w14,w152334eor w9,w7,w132335eor w6,w6,w92336movi v1.16b,#642337movi v2.16b,#1282338movi v3.16b,#1922339mov v0.s[0],w623402341sub v1.16b,v0.16b,v1.16b2342sub v2.16b,v0.16b,v2.16b2343sub v3.16b,v0.16b,v3.16b23442345tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2346tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2347tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2348tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b23492350mov w6,v0.s[0]2351mov w7,v1.s[0]2352mov w9,v2.s[0]2353add w7,w6,w72354mov w6,v3.s[0]2355add w7,w7,w92356add w7,w7,w623572358eor w6,w7,w7,ror #32-22359eor w6,w6,w7,ror #32-102360eor w6,w6,w7,ror #32-182361eor w6,w6,w7,ror #32-242362eor w12,w12,w62363// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)2364eor w6,w14,w152365eor w9,w12,w82366eor w6,w6,w92367movi v1.16b,#642368movi v2.16b,#1282369movi v3.16b,#1922370mov v0.s[0],w623712372sub v1.16b,v0.16b,v1.16b2373sub v2.16b,v0.16b,v2.16b2374sub v3.16b,v0.16b,v3.16b23752376tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2377tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2378tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2379tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b23802381mov w6,v0.s[0]2382mov w7,v1.s[0]2383mov w9,v2.s[0]2384add w7,w6,w72385mov w6,v3.s[0]2386add w7,w7,w92387add w7,w7,w623882389eor w6,w7,w7,ror #32-22390eor w6,w6,w7,ror #32-102391eor w6,w6,w7,ror #32-182392eor w6,w6,w7,ror #32-242393ldp w7,w8,[x10],82394eor w13,w13,w62395// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)2396eor w6,w12,w132397eor w9,w7,w152398eor w6,w6,w92399movi v1.16b,#642400movi v2.16b,#1282401movi v3.16b,#1922402mov v0.s[0],w624032404sub v1.16b,v0.16b,v1.16b2405sub v2.16b,v0.16b,v2.16b2406sub v3.16b,v0.16b,v3.16b24072408tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2409tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2410tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2411tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b24122413mov w6,v0.s[0]2414mov w7,v1.s[0]2415mov w9,v2.s[0]2416add w7,w6,w72417mov w6,v3.s[0]2418add w7,w7,w92419add w7,w7,w624202421eor w6,w7,w7,ror #32-22422eor w6,w6,w7,ror #32-102423eor w6,w6,w7,ror #32-182424eor w6,w6,w7,ror #32-242425eor w14,w14,w62426// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)2427eor w6,w12,w132428eor w9,w14,w82429eor w6,w6,w92430movi v1.16b,#642431movi v2.16b,#1282432movi v3.16b,#1922433mov v0.s[0],w624342435sub v1.16b,v0.16b,v1.16b2436sub v2.16b,v0.16b,v2.16b2437sub v3.16b,v0.16b,v3.16b24382439tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2440tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2441tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2442tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b24432444mov w6,v0.s[0]2445mov w7,v1.s[0]2446mov w9,v2.s[0]2447add w7,w6,w72448mov w6,v3.s[0]2449add w7,w7,w92450add w7,w7,w624512452eor w6,w7,w7,ror #32-22453eor w6,w6,w7,ror #32-102454eor w6,w6,w7,ror #32-182455eor w6,w6,w7,ror #32-242456eor w15,w15,w62457subs w11,w11,#12458b.ne 10b2459mov v3.s[0],w152460mov v3.s[1],w142461mov v3.s[2],w132462mov v3.s[3],w122463#ifndef __AARCH64EB__2464rev32 v3.16b,v3.16b2465#endif2466ld1 {v4.4s},[x0]2467eor v4.16b,v4.16b,v3.16b2468st1 {v4.4s},[x1]2469ret24701:2471AARCH64_SIGN_LINK_REGISTER2472stp d8,d9,[sp,#-80]!2473stp d10,d11,[sp,#16]2474stp d12,d13,[sp,#32]2475stp d14,d15,[sp,#48]2476stp x29,x30,[sp,#64]2477mov w12,v3.s[0]2478mov w13,v3.s[1]2479mov w14,v3.s[2]2480mov w5,v3.s[3]2481.Lctr32_4_blocks_process:2482cmp w2,#42483b.lt 1f2484dup v4.4s,w122485dup v5.4s,w132486dup v6.4s,w142487mov v7.s[0],w52488add w5,w5,#12489mov v7.s[1],w52490add w5,w5,#12491mov v7.s[2],w52492add w5,w5,#12493mov v7.s[3],w52494add w5,w5,#12495cmp w2,#82496b.ge .Lctr32_8_blocks_process2497bl _vpsm4_enc_4blks2498ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#642499eor v0.16b,v0.16b,v12.16b2500eor v1.16b,v1.16b,v13.16b2501eor v2.16b,v2.16b,v14.16b2502eor v3.16b,v3.16b,v15.16b2503st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#642504subs w2,w2,#42505b.ne .Lctr32_4_blocks_process2506b 100f2507.Lctr32_8_blocks_process:2508dup v8.4s,w122509dup v9.4s,w132510dup v10.4s,w142511mov v11.s[0],w52512add w5,w5,#12513mov v11.s[1],w52514add w5,w5,#12515mov v11.s[2],w52516add w5,w5,#12517mov v11.s[3],w52518add w5,w5,#12519bl _vpsm4_enc_8blks2520ld4 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#642521ld4 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#642522eor v0.16b,v0.16b,v12.16b2523eor v1.16b,v1.16b,v13.16b2524eor v2.16b,v2.16b,v14.16b2525eor v3.16b,v3.16b,v15.16b2526eor v4.16b,v4.16b,v8.16b2527eor v5.16b,v5.16b,v9.16b2528eor v6.16b,v6.16b,v10.16b2529eor v7.16b,v7.16b,v11.16b2530st4 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#642531st4 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#642532subs w2,w2,#82533b.ne .Lctr32_4_blocks_process2534b 100f25351: // last block processing2536subs w2,w2,#12537b.lt 100f2538b.gt 1f2539mov v3.s[0],w122540mov v3.s[1],w132541mov v3.s[2],w142542mov v3.s[3],w52543mov x10,x32544mov w11,#82545mov w12,v3.s[0]2546mov w13,v3.s[1]2547mov w14,v3.s[2]2548mov w15,v3.s[3]254910:2550ldp w7,w8,[x10],82551// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)2552eor w6,w14,w152553eor w9,w7,w132554eor w6,w6,w92555movi v1.16b,#642556movi v2.16b,#1282557movi v3.16b,#1922558mov v0.s[0],w625592560sub v1.16b,v0.16b,v1.16b2561sub v2.16b,v0.16b,v2.16b2562sub v3.16b,v0.16b,v3.16b25632564tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2565tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2566tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2567tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b25682569mov w6,v0.s[0]2570mov w7,v1.s[0]2571mov w9,v2.s[0]2572add w7,w6,w72573mov w6,v3.s[0]2574add w7,w7,w92575add w7,w7,w625762577eor w6,w7,w7,ror #32-22578eor w6,w6,w7,ror #32-102579eor w6,w6,w7,ror #32-182580eor w6,w6,w7,ror #32-242581eor w12,w12,w62582// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)2583eor w6,w14,w152584eor w9,w12,w82585eor w6,w6,w92586movi v1.16b,#642587movi v2.16b,#1282588movi v3.16b,#1922589mov v0.s[0],w625902591sub v1.16b,v0.16b,v1.16b2592sub v2.16b,v0.16b,v2.16b2593sub v3.16b,v0.16b,v3.16b25942595tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2596tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2597tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2598tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b25992600mov w6,v0.s[0]2601mov w7,v1.s[0]2602mov w9,v2.s[0]2603add w7,w6,w72604mov w6,v3.s[0]2605add w7,w7,w92606add w7,w7,w626072608eor w6,w7,w7,ror #32-22609eor w6,w6,w7,ror #32-102610eor w6,w6,w7,ror #32-182611eor w6,w6,w7,ror #32-242612ldp w7,w8,[x10],82613eor w13,w13,w62614// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)2615eor w6,w12,w132616eor w9,w7,w152617eor w6,w6,w92618movi v1.16b,#642619movi v2.16b,#1282620movi v3.16b,#1922621mov v0.s[0],w626222623sub v1.16b,v0.16b,v1.16b2624sub v2.16b,v0.16b,v2.16b2625sub v3.16b,v0.16b,v3.16b26262627tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2628tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2629tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2630tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b26312632mov w6,v0.s[0]2633mov w7,v1.s[0]2634mov w9,v2.s[0]2635add w7,w6,w72636mov w6,v3.s[0]2637add w7,w7,w92638add w7,w7,w626392640eor w6,w7,w7,ror #32-22641eor w6,w6,w7,ror #32-102642eor w6,w6,w7,ror #32-182643eor w6,w6,w7,ror #32-242644eor w14,w14,w62645// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)2646eor w6,w12,w132647eor w9,w14,w82648eor w6,w6,w92649movi v1.16b,#642650movi v2.16b,#1282651movi v3.16b,#1922652mov v0.s[0],w626532654sub v1.16b,v0.16b,v1.16b2655sub v2.16b,v0.16b,v2.16b2656sub v3.16b,v0.16b,v3.16b26572658tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2659tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2660tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2661tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b26622663mov w6,v0.s[0]2664mov w7,v1.s[0]2665mov w9,v2.s[0]2666add w7,w6,w72667mov w6,v3.s[0]2668add w7,w7,w92669add w7,w7,w626702671eor w6,w7,w7,ror #32-22672eor w6,w6,w7,ror #32-102673eor w6,w6,w7,ror #32-182674eor w6,w6,w7,ror #32-242675eor w15,w15,w62676subs w11,w11,#12677b.ne 10b2678mov v3.s[0],w152679mov v3.s[1],w142680mov v3.s[2],w132681mov v3.s[3],w122682#ifndef __AARCH64EB__2683rev32 v3.16b,v3.16b2684#endif2685ld1 {v4.4s},[x0]2686eor v4.16b,v4.16b,v3.16b2687st1 {v4.4s},[x1]2688b 100f26891: // last 2 blocks processing2690dup v4.4s,w122691dup v5.4s,w132692dup v6.4s,w142693mov v7.s[0],w52694add w5,w5,#12695mov v7.s[1],w52696subs w2,w2,#12697b.ne 1f2698bl _vpsm4_enc_4blks2699ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#162700ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#162701eor v0.16b,v0.16b,v12.16b2702eor v1.16b,v1.16b,v13.16b2703eor v2.16b,v2.16b,v14.16b2704eor v3.16b,v3.16b,v15.16b2705st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#162706st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#162707b 100f27081: // last 3 blocks processing2709add w5,w5,#12710mov v7.s[2],w52711bl _vpsm4_enc_4blks2712ld4 {v12.s,v13.s,v14.s,v15.s}[0],[x0],#162713ld4 {v12.s,v13.s,v14.s,v15.s}[1],[x0],#162714ld4 {v12.s,v13.s,v14.s,v15.s}[2],[x0],#162715eor v0.16b,v0.16b,v12.16b2716eor v1.16b,v1.16b,v13.16b2717eor v2.16b,v2.16b,v14.16b2718eor v3.16b,v3.16b,v15.16b2719st4 {v0.s,v1.s,v2.s,v3.s}[0],[x1],#162720st4 {v0.s,v1.s,v2.s,v3.s}[1],[x1],#162721st4 {v0.s,v1.s,v2.s,v3.s}[2],[x1],#162722100:2723ldp d10,d11,[sp,#16]2724ldp d12,d13,[sp,#32]2725ldp d14,d15,[sp,#48]2726ldp x29,x30,[sp,#64]2727ldp d8,d9,[sp],#802728AARCH64_VALIDATE_LINK_REGISTER2729ret2730.size vpsm4_ctr32_encrypt_blocks,.-vpsm4_ctr32_encrypt_blocks2731.globl vpsm4_xts_encrypt_gb2732.type vpsm4_xts_encrypt_gb,%function2733.align 52734vpsm4_xts_encrypt_gb:2735AARCH64_SIGN_LINK_REGISTER2736stp x15, x16, [sp, #-0x10]!2737stp x17, x18, [sp, #-0x10]!2738stp x19, x20, [sp, #-0x10]!2739stp x21, x22, [sp, #-0x10]!2740stp x23, x24, [sp, #-0x10]!2741stp x25, x26, [sp, #-0x10]!2742stp x27, x28, [sp, #-0x10]!2743stp x29, x30, [sp, #-0x10]!2744stp d8, d9, [sp, #-0x10]!2745stp d10, d11, [sp, #-0x10]!2746stp d12, d13, [sp, #-0x10]!2747stp d14, d15, [sp, #-0x10]!2748mov x26,x32749mov x27,x42750mov w28,w62751ld1 {v8.4s}, [x5]2752mov x3,x272753adrp x10,.Lsbox2754add x10,x10,#:lo12:.Lsbox2755ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#642756ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#642757ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#642758ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]2759#ifndef __AARCH64EB__2760rev32 v8.16b,v8.16b2761#endif2762mov x10,x32763mov w11,#82764mov w12,v8.s[0]2765mov w13,v8.s[1]2766mov w14,v8.s[2]2767mov w15,v8.s[3]276810:2769ldp w7,w8,[x10],82770// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)2771eor w6,w14,w152772eor w9,w7,w132773eor w6,w6,w92774movi v1.16b,#642775movi v2.16b,#1282776movi v3.16b,#1922777mov v0.s[0],w627782779sub v1.16b,v0.16b,v1.16b2780sub v2.16b,v0.16b,v2.16b2781sub v3.16b,v0.16b,v3.16b27822783tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2784tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2785tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2786tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b27872788mov w6,v0.s[0]2789mov w7,v1.s[0]2790mov w9,v2.s[0]2791add w7,w6,w72792mov w6,v3.s[0]2793add w7,w7,w92794add w7,w7,w627952796eor w6,w7,w7,ror #32-22797eor w6,w6,w7,ror #32-102798eor w6,w6,w7,ror #32-182799eor w6,w6,w7,ror #32-242800eor w12,w12,w62801// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)2802eor w6,w14,w152803eor w9,w12,w82804eor w6,w6,w92805movi v1.16b,#642806movi v2.16b,#1282807movi v3.16b,#1922808mov v0.s[0],w628092810sub v1.16b,v0.16b,v1.16b2811sub v2.16b,v0.16b,v2.16b2812sub v3.16b,v0.16b,v3.16b28132814tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2815tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2816tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2817tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b28182819mov w6,v0.s[0]2820mov w7,v1.s[0]2821mov w9,v2.s[0]2822add w7,w6,w72823mov w6,v3.s[0]2824add w7,w7,w92825add w7,w7,w628262827eor w6,w7,w7,ror #32-22828eor w6,w6,w7,ror #32-102829eor w6,w6,w7,ror #32-182830eor w6,w6,w7,ror #32-242831ldp w7,w8,[x10],82832eor w13,w13,w62833// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)2834eor w6,w12,w132835eor w9,w7,w152836eor w6,w6,w92837movi v1.16b,#642838movi v2.16b,#1282839movi v3.16b,#1922840mov v0.s[0],w628412842sub v1.16b,v0.16b,v1.16b2843sub v2.16b,v0.16b,v2.16b2844sub v3.16b,v0.16b,v3.16b28452846tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2847tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2848tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2849tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b28502851mov w6,v0.s[0]2852mov w7,v1.s[0]2853mov w9,v2.s[0]2854add w7,w6,w72855mov w6,v3.s[0]2856add w7,w7,w92857add w7,w7,w628582859eor w6,w7,w7,ror #32-22860eor w6,w6,w7,ror #32-102861eor w6,w6,w7,ror #32-182862eor w6,w6,w7,ror #32-242863eor w14,w14,w62864// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)2865eor w6,w12,w132866eor w9,w14,w82867eor w6,w6,w92868movi v1.16b,#642869movi v2.16b,#1282870movi v3.16b,#1922871mov v0.s[0],w628722873sub v1.16b,v0.16b,v1.16b2874sub v2.16b,v0.16b,v2.16b2875sub v3.16b,v0.16b,v3.16b28762877tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b2878tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b2879tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b2880tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b28812882mov w6,v0.s[0]2883mov w7,v1.s[0]2884mov w9,v2.s[0]2885add w7,w6,w72886mov w6,v3.s[0]2887add w7,w7,w92888add w7,w7,w628892890eor w6,w7,w7,ror #32-22891eor w6,w6,w7,ror #32-102892eor w6,w6,w7,ror #32-182893eor w6,w6,w7,ror #32-242894eor w15,w15,w62895subs w11,w11,#12896b.ne 10b2897mov v8.s[0],w152898mov v8.s[1],w142899mov v8.s[2],w132900mov v8.s[3],w122901#ifndef __AARCH64EB__2902rev32 v8.16b,v8.16b2903#endif2904mov x3,x262905and x29,x2,#0x0F2906// convert length into blocks2907lsr x2,x2,42908cmp x2,#12909b.lt .return_gb29102911cmp x29,02912// If the encryption/decryption Length is N times of 16,2913// the all blocks are encrypted/decrypted in .xts_encrypt_blocks_gb2914b.eq .xts_encrypt_blocks_gb29152916// If the encryption/decryption length is not N times of 16,2917// the last two blocks are encrypted/decrypted in .last_2blks_tweak_gb or .only_2blks_tweak_gb2918// the other blocks are encrypted/decrypted in .xts_encrypt_blocks_gb2919subs x2,x2,#12920b.eq .only_2blks_tweak_gb2921.xts_encrypt_blocks_gb:2922rbit v8.16b,v8.16b2923#ifdef __AARCH64EB__2924rev32 v8.16b,v8.16b2925#endif2926mov x12,v8.d[0]2927mov x13,v8.d[1]2928mov w7,0x872929extr x9,x13,x13,#322930extr x15,x13,x12,#632931and w8,w7,w9,asr#312932eor x14,x8,x12,lsl#12933mov w7,0x872934extr x9,x15,x15,#322935extr x17,x15,x14,#632936and w8,w7,w9,asr#312937eor x16,x8,x14,lsl#12938mov w7,0x872939extr x9,x17,x17,#322940extr x19,x17,x16,#632941and w8,w7,w9,asr#312942eor x18,x8,x16,lsl#12943mov w7,0x872944extr x9,x19,x19,#322945extr x21,x19,x18,#632946and w8,w7,w9,asr#312947eor x20,x8,x18,lsl#12948mov w7,0x872949extr x9,x21,x21,#322950extr x23,x21,x20,#632951and w8,w7,w9,asr#312952eor x22,x8,x20,lsl#12953mov w7,0x872954extr x9,x23,x23,#322955extr x25,x23,x22,#632956and w8,w7,w9,asr#312957eor x24,x8,x22,lsl#12958mov w7,0x872959extr x9,x25,x25,#322960extr x27,x25,x24,#632961and w8,w7,w9,asr#312962eor x26,x8,x24,lsl#12963.Lxts_8_blocks_process_gb:2964cmp x2,#82965b.lt .Lxts_4_blocks_process_gb2966mov v0.d[0],x122967mov v0.d[1],x132968#ifdef __AARCH64EB__2969rev32 v0.16b,v0.16b2970#endif2971mov v1.d[0],x142972mov v1.d[1],x152973#ifdef __AARCH64EB__2974rev32 v1.16b,v1.16b2975#endif2976mov v2.d[0],x162977mov v2.d[1],x172978#ifdef __AARCH64EB__2979rev32 v2.16b,v2.16b2980#endif2981mov v3.d[0],x182982mov v3.d[1],x192983#ifdef __AARCH64EB__2984rev32 v3.16b,v3.16b2985#endif2986mov v12.d[0],x202987mov v12.d[1],x212988#ifdef __AARCH64EB__2989rev32 v12.16b,v12.16b2990#endif2991mov v13.d[0],x222992mov v13.d[1],x232993#ifdef __AARCH64EB__2994rev32 v13.16b,v13.16b2995#endif2996mov v14.d[0],x242997mov v14.d[1],x252998#ifdef __AARCH64EB__2999rev32 v14.16b,v14.16b3000#endif3001mov v15.d[0],x263002mov v15.d[1],x273003#ifdef __AARCH64EB__3004rev32 v15.16b,v15.16b3005#endif3006ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#643007rbit v0.16b,v0.16b3008rbit v1.16b,v1.16b3009rbit v2.16b,v2.16b3010rbit v3.16b,v3.16b3011eor v4.16b, v4.16b, v0.16b3012eor v5.16b, v5.16b, v1.16b3013eor v6.16b, v6.16b, v2.16b3014eor v7.16b, v7.16b, v3.16b3015ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#643016rbit v12.16b,v12.16b3017rbit v13.16b,v13.16b3018rbit v14.16b,v14.16b3019rbit v15.16b,v15.16b3020eor v8.16b, v8.16b, v12.16b3021eor v9.16b, v9.16b, v13.16b3022eor v10.16b, v10.16b, v14.16b3023eor v11.16b, v11.16b, v15.16b3024#ifndef __AARCH64EB__3025rev32 v4.16b,v4.16b3026#endif3027#ifndef __AARCH64EB__3028rev32 v5.16b,v5.16b3029#endif3030#ifndef __AARCH64EB__3031rev32 v6.16b,v6.16b3032#endif3033#ifndef __AARCH64EB__3034rev32 v7.16b,v7.16b3035#endif3036#ifndef __AARCH64EB__3037rev32 v8.16b,v8.16b3038#endif3039#ifndef __AARCH64EB__3040rev32 v9.16b,v9.16b3041#endif3042#ifndef __AARCH64EB__3043rev32 v10.16b,v10.16b3044#endif3045#ifndef __AARCH64EB__3046rev32 v11.16b,v11.16b3047#endif3048zip1 v0.4s,v4.4s,v5.4s3049zip2 v1.4s,v4.4s,v5.4s3050zip1 v2.4s,v6.4s,v7.4s3051zip2 v3.4s,v6.4s,v7.4s3052zip1 v4.2d,v0.2d,v2.2d3053zip2 v5.2d,v0.2d,v2.2d3054zip1 v6.2d,v1.2d,v3.2d3055zip2 v7.2d,v1.2d,v3.2d3056zip1 v0.4s,v8.4s,v9.4s3057zip2 v1.4s,v8.4s,v9.4s3058zip1 v2.4s,v10.4s,v11.4s3059zip2 v3.4s,v10.4s,v11.4s3060zip1 v8.2d,v0.2d,v2.2d3061zip2 v9.2d,v0.2d,v2.2d3062zip1 v10.2d,v1.2d,v3.2d3063zip2 v11.2d,v1.2d,v3.2d3064bl _vpsm4_enc_8blks3065zip1 v8.4s,v0.4s,v1.4s3066zip2 v9.4s,v0.4s,v1.4s3067zip1 v10.4s,v2.4s,v3.4s3068zip2 v11.4s,v2.4s,v3.4s3069zip1 v0.2d,v8.2d,v10.2d3070zip2 v1.2d,v8.2d,v10.2d3071zip1 v2.2d,v9.2d,v11.2d3072zip2 v3.2d,v9.2d,v11.2d3073zip1 v8.4s,v4.4s,v5.4s3074zip2 v9.4s,v4.4s,v5.4s3075zip1 v10.4s,v6.4s,v7.4s3076zip2 v11.4s,v6.4s,v7.4s3077zip1 v4.2d,v8.2d,v10.2d3078zip2 v5.2d,v8.2d,v10.2d3079zip1 v6.2d,v9.2d,v11.2d3080zip2 v7.2d,v9.2d,v11.2d3081mov v12.d[0],x123082mov v12.d[1],x133083#ifdef __AARCH64EB__3084rev32 v12.16b,v12.16b3085#endif3086mov w7,0x873087extr x9,x27,x27,#323088extr x13,x27,x26,#633089and w8,w7,w9,asr#313090eor x12,x8,x26,lsl#13091mov v13.d[0],x143092mov v13.d[1],x153093#ifdef __AARCH64EB__3094rev32 v13.16b,v13.16b3095#endif3096mov w7,0x873097extr x9,x13,x13,#323098extr x15,x13,x12,#633099and w8,w7,w9,asr#313100eor x14,x8,x12,lsl#13101mov v14.d[0],x163102mov v14.d[1],x173103#ifdef __AARCH64EB__3104rev32 v14.16b,v14.16b3105#endif3106mov w7,0x873107extr x9,x15,x15,#323108extr x17,x15,x14,#633109and w8,w7,w9,asr#313110eor x16,x8,x14,lsl#13111mov v15.d[0],x183112mov v15.d[1],x193113#ifdef __AARCH64EB__3114rev32 v15.16b,v15.16b3115#endif3116mov w7,0x873117extr x9,x17,x17,#323118extr x19,x17,x16,#633119and w8,w7,w9,asr#313120eor x18,x8,x16,lsl#13121mov v8.d[0],x203122mov v8.d[1],x213123#ifdef __AARCH64EB__3124rev32 v8.16b,v8.16b3125#endif3126mov w7,0x873127extr x9,x19,x19,#323128extr x21,x19,x18,#633129and w8,w7,w9,asr#313130eor x20,x8,x18,lsl#13131mov v9.d[0],x223132mov v9.d[1],x233133#ifdef __AARCH64EB__3134rev32 v9.16b,v9.16b3135#endif3136mov w7,0x873137extr x9,x21,x21,#323138extr x23,x21,x20,#633139and w8,w7,w9,asr#313140eor x22,x8,x20,lsl#13141mov v10.d[0],x243142mov v10.d[1],x253143#ifdef __AARCH64EB__3144rev32 v10.16b,v10.16b3145#endif3146mov w7,0x873147extr x9,x23,x23,#323148extr x25,x23,x22,#633149and w8,w7,w9,asr#313150eor x24,x8,x22,lsl#13151mov v11.d[0],x263152mov v11.d[1],x273153#ifdef __AARCH64EB__3154rev32 v11.16b,v11.16b3155#endif3156mov w7,0x873157extr x9,x25,x25,#323158extr x27,x25,x24,#633159and w8,w7,w9,asr#313160eor x26,x8,x24,lsl#13161eor v0.16b, v0.16b, v12.16b3162eor v1.16b, v1.16b, v13.16b3163eor v2.16b, v2.16b, v14.16b3164eor v3.16b, v3.16b, v15.16b3165eor v4.16b, v4.16b, v8.16b3166eor v5.16b, v5.16b, v9.16b3167eor v6.16b, v6.16b, v10.16b3168eor v7.16b, v7.16b, v11.16b31693170// save the last tweak3171st1 {v11.4s},[x5]3172st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#643173st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#643174subs x2,x2,#83175b.gt .Lxts_8_blocks_process_gb3176b 100f3177.Lxts_4_blocks_process_gb:3178mov v8.d[0],x123179mov v8.d[1],x133180#ifdef __AARCH64EB__3181rev32 v8.16b,v8.16b3182#endif3183mov v9.d[0],x143184mov v9.d[1],x153185#ifdef __AARCH64EB__3186rev32 v9.16b,v9.16b3187#endif3188mov v10.d[0],x163189mov v10.d[1],x173190#ifdef __AARCH64EB__3191rev32 v10.16b,v10.16b3192#endif3193mov v11.d[0],x183194mov v11.d[1],x193195#ifdef __AARCH64EB__3196rev32 v11.16b,v11.16b3197#endif3198cmp x2,#43199b.lt 1f3200ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#643201rbit v8.16b,v8.16b3202rbit v9.16b,v9.16b3203rbit v10.16b,v10.16b3204rbit v11.16b,v11.16b3205eor v4.16b, v4.16b, v8.16b3206eor v5.16b, v5.16b, v9.16b3207eor v6.16b, v6.16b, v10.16b3208eor v7.16b, v7.16b, v11.16b3209#ifndef __AARCH64EB__3210rev32 v4.16b,v4.16b3211#endif3212#ifndef __AARCH64EB__3213rev32 v5.16b,v5.16b3214#endif3215#ifndef __AARCH64EB__3216rev32 v6.16b,v6.16b3217#endif3218#ifndef __AARCH64EB__3219rev32 v7.16b,v7.16b3220#endif3221zip1 v0.4s,v4.4s,v5.4s3222zip2 v1.4s,v4.4s,v5.4s3223zip1 v2.4s,v6.4s,v7.4s3224zip2 v3.4s,v6.4s,v7.4s3225zip1 v4.2d,v0.2d,v2.2d3226zip2 v5.2d,v0.2d,v2.2d3227zip1 v6.2d,v1.2d,v3.2d3228zip2 v7.2d,v1.2d,v3.2d3229bl _vpsm4_enc_4blks3230zip1 v4.4s,v0.4s,v1.4s3231zip2 v5.4s,v0.4s,v1.4s3232zip1 v6.4s,v2.4s,v3.4s3233zip2 v7.4s,v2.4s,v3.4s3234zip1 v0.2d,v4.2d,v6.2d3235zip2 v1.2d,v4.2d,v6.2d3236zip1 v2.2d,v5.2d,v7.2d3237zip2 v3.2d,v5.2d,v7.2d3238eor v0.16b, v0.16b, v8.16b3239eor v1.16b, v1.16b, v9.16b3240eor v2.16b, v2.16b, v10.16b3241eor v3.16b, v3.16b, v11.16b3242st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#643243sub x2,x2,#43244mov v8.d[0],x203245mov v8.d[1],x213246#ifdef __AARCH64EB__3247rev32 v8.16b,v8.16b3248#endif3249mov v9.d[0],x223250mov v9.d[1],x233251#ifdef __AARCH64EB__3252rev32 v9.16b,v9.16b3253#endif3254mov v10.d[0],x243255mov v10.d[1],x253256#ifdef __AARCH64EB__3257rev32 v10.16b,v10.16b3258#endif3259// save the last tweak3260st1 {v11.4s},[x5]32611:3262// process last block3263cmp x2,#13264b.lt 100f3265b.gt 1f3266ld1 {v4.4s},[x0],#163267rbit v8.16b,v8.16b3268eor v4.16b, v4.16b, v8.16b3269#ifndef __AARCH64EB__3270rev32 v4.16b,v4.16b3271#endif3272mov x10,x33273mov w11,#83274mov w12,v4.s[0]3275mov w13,v4.s[1]3276mov w14,v4.s[2]3277mov w15,v4.s[3]327810:3279ldp w7,w8,[x10],83280// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)3281eor w6,w14,w153282eor w9,w7,w133283eor w6,w6,w93284movi v1.16b,#643285movi v2.16b,#1283286movi v3.16b,#1923287mov v0.s[0],w632883289sub v1.16b,v0.16b,v1.16b3290sub v2.16b,v0.16b,v2.16b3291sub v3.16b,v0.16b,v3.16b32923293tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3294tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3295tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3296tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b32973298mov w6,v0.s[0]3299mov w7,v1.s[0]3300mov w9,v2.s[0]3301add w7,w6,w73302mov w6,v3.s[0]3303add w7,w7,w93304add w7,w7,w633053306eor w6,w7,w7,ror #32-23307eor w6,w6,w7,ror #32-103308eor w6,w6,w7,ror #32-183309eor w6,w6,w7,ror #32-243310eor w12,w12,w63311// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)3312eor w6,w14,w153313eor w9,w12,w83314eor w6,w6,w93315movi v1.16b,#643316movi v2.16b,#1283317movi v3.16b,#1923318mov v0.s[0],w633193320sub v1.16b,v0.16b,v1.16b3321sub v2.16b,v0.16b,v2.16b3322sub v3.16b,v0.16b,v3.16b33233324tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3325tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3326tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3327tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b33283329mov w6,v0.s[0]3330mov w7,v1.s[0]3331mov w9,v2.s[0]3332add w7,w6,w73333mov w6,v3.s[0]3334add w7,w7,w93335add w7,w7,w633363337eor w6,w7,w7,ror #32-23338eor w6,w6,w7,ror #32-103339eor w6,w6,w7,ror #32-183340eor w6,w6,w7,ror #32-243341ldp w7,w8,[x10],83342eor w13,w13,w63343// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)3344eor w6,w12,w133345eor w9,w7,w153346eor w6,w6,w93347movi v1.16b,#643348movi v2.16b,#1283349movi v3.16b,#1923350mov v0.s[0],w633513352sub v1.16b,v0.16b,v1.16b3353sub v2.16b,v0.16b,v2.16b3354sub v3.16b,v0.16b,v3.16b33553356tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3357tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3358tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3359tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b33603361mov w6,v0.s[0]3362mov w7,v1.s[0]3363mov w9,v2.s[0]3364add w7,w6,w73365mov w6,v3.s[0]3366add w7,w7,w93367add w7,w7,w633683369eor w6,w7,w7,ror #32-23370eor w6,w6,w7,ror #32-103371eor w6,w6,w7,ror #32-183372eor w6,w6,w7,ror #32-243373eor w14,w14,w63374// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)3375eor w6,w12,w133376eor w9,w14,w83377eor w6,w6,w93378movi v1.16b,#643379movi v2.16b,#1283380movi v3.16b,#1923381mov v0.s[0],w633823383sub v1.16b,v0.16b,v1.16b3384sub v2.16b,v0.16b,v2.16b3385sub v3.16b,v0.16b,v3.16b33863387tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3388tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3389tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3390tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b33913392mov w6,v0.s[0]3393mov w7,v1.s[0]3394mov w9,v2.s[0]3395add w7,w6,w73396mov w6,v3.s[0]3397add w7,w7,w93398add w7,w7,w633993400eor w6,w7,w7,ror #32-23401eor w6,w6,w7,ror #32-103402eor w6,w6,w7,ror #32-183403eor w6,w6,w7,ror #32-243404eor w15,w15,w63405subs w11,w11,#13406b.ne 10b3407mov v4.s[0],w153408mov v4.s[1],w143409mov v4.s[2],w133410mov v4.s[3],w123411#ifndef __AARCH64EB__3412rev32 v4.16b,v4.16b3413#endif3414eor v4.16b, v4.16b, v8.16b3415st1 {v4.4s},[x1],#163416// save the last tweak3417st1 {v8.4s},[x5]3418b 100f34191: // process last 2 blocks3420cmp x2,#23421b.gt 1f3422ld1 {v4.4s,v5.4s},[x0],#323423rbit v8.16b,v8.16b3424rbit v9.16b,v9.16b3425eor v4.16b, v4.16b, v8.16b3426eor v5.16b, v5.16b, v9.16b3427#ifndef __AARCH64EB__3428rev32 v4.16b,v4.16b3429#endif3430#ifndef __AARCH64EB__3431rev32 v5.16b,v5.16b3432#endif3433zip1 v0.4s,v4.4s,v5.4s3434zip2 v1.4s,v4.4s,v5.4s3435zip1 v2.4s,v6.4s,v7.4s3436zip2 v3.4s,v6.4s,v7.4s3437zip1 v4.2d,v0.2d,v2.2d3438zip2 v5.2d,v0.2d,v2.2d3439zip1 v6.2d,v1.2d,v3.2d3440zip2 v7.2d,v1.2d,v3.2d3441bl _vpsm4_enc_4blks3442zip1 v4.4s,v0.4s,v1.4s3443zip2 v5.4s,v0.4s,v1.4s3444zip1 v6.4s,v2.4s,v3.4s3445zip2 v7.4s,v2.4s,v3.4s3446zip1 v0.2d,v4.2d,v6.2d3447zip2 v1.2d,v4.2d,v6.2d3448zip1 v2.2d,v5.2d,v7.2d3449zip2 v3.2d,v5.2d,v7.2d3450eor v0.16b, v0.16b, v8.16b3451eor v1.16b, v1.16b, v9.16b3452st1 {v0.4s,v1.4s},[x1],#323453// save the last tweak3454st1 {v9.4s},[x5]3455b 100f34561: // process last 3 blocks3457ld1 {v4.4s,v5.4s,v6.4s},[x0],#483458rbit v8.16b,v8.16b3459rbit v9.16b,v9.16b3460rbit v10.16b,v10.16b3461eor v4.16b, v4.16b, v8.16b3462eor v5.16b, v5.16b, v9.16b3463eor v6.16b, v6.16b, v10.16b3464#ifndef __AARCH64EB__3465rev32 v4.16b,v4.16b3466#endif3467#ifndef __AARCH64EB__3468rev32 v5.16b,v5.16b3469#endif3470#ifndef __AARCH64EB__3471rev32 v6.16b,v6.16b3472#endif3473zip1 v0.4s,v4.4s,v5.4s3474zip2 v1.4s,v4.4s,v5.4s3475zip1 v2.4s,v6.4s,v7.4s3476zip2 v3.4s,v6.4s,v7.4s3477zip1 v4.2d,v0.2d,v2.2d3478zip2 v5.2d,v0.2d,v2.2d3479zip1 v6.2d,v1.2d,v3.2d3480zip2 v7.2d,v1.2d,v3.2d3481bl _vpsm4_enc_4blks3482zip1 v4.4s,v0.4s,v1.4s3483zip2 v5.4s,v0.4s,v1.4s3484zip1 v6.4s,v2.4s,v3.4s3485zip2 v7.4s,v2.4s,v3.4s3486zip1 v0.2d,v4.2d,v6.2d3487zip2 v1.2d,v4.2d,v6.2d3488zip1 v2.2d,v5.2d,v7.2d3489zip2 v3.2d,v5.2d,v7.2d3490eor v0.16b, v0.16b, v8.16b3491eor v1.16b, v1.16b, v9.16b3492eor v2.16b, v2.16b, v10.16b3493st1 {v0.4s,v1.4s,v2.4s},[x1],#483494// save the last tweak3495st1 {v10.4s},[x5]3496100:3497cmp x29,03498b.eq .return_gb34993500// This branch calculates the last two tweaks,3501// while the encryption/decryption length is larger than 323502.last_2blks_tweak_gb:3503ld1 {v8.4s},[x5]3504#ifdef __AARCH64EB__3505rev32 v8.16b,v8.16b3506#endif3507rbit v2.16b,v8.16b3508adrp x10,.Lxts_magic3509ldr q0, [x10, #:lo12:.Lxts_magic]3510shl v9.16b, v2.16b, #13511ext v1.16b, v2.16b, v2.16b,#153512ushr v1.16b, v1.16b, #73513mul v1.16b, v1.16b, v0.16b3514eor v9.16b, v9.16b, v1.16b3515rbit v9.16b,v9.16b3516rbit v2.16b,v9.16b3517adrp x10,.Lxts_magic3518ldr q0, [x10, #:lo12:.Lxts_magic]3519shl v10.16b, v2.16b, #13520ext v1.16b, v2.16b, v2.16b,#153521ushr v1.16b, v1.16b, #73522mul v1.16b, v1.16b, v0.16b3523eor v10.16b, v10.16b, v1.16b3524rbit v10.16b,v10.16b3525b .check_dec_gb352635273528// This branch calculates the last two tweaks,3529// while the encryption/decryption length is equal to 32, who only need two tweaks3530.only_2blks_tweak_gb:3531mov v9.16b,v8.16b3532#ifdef __AARCH64EB__3533rev32 v9.16b,v9.16b3534#endif3535rbit v2.16b,v9.16b3536adrp x10,.Lxts_magic3537ldr q0, [x10, #:lo12:.Lxts_magic]3538shl v10.16b, v2.16b, #13539ext v1.16b, v2.16b, v2.16b,#153540ushr v1.16b, v1.16b, #73541mul v1.16b, v1.16b, v0.16b3542eor v10.16b, v10.16b, v1.16b3543rbit v10.16b,v10.16b3544b .check_dec_gb354535463547// Determine whether encryption or decryption is required.3548// The last two tweaks need to be swapped for decryption.3549.check_dec_gb:3550// encryption:1 decryption:03551cmp w28,13552b.eq .process_last_2blks_gb3553mov v0.16B,v9.16b3554mov v9.16B,v10.16b3555mov v10.16B,v0.16b35563557.process_last_2blks_gb:3558#ifdef __AARCH64EB__3559rev32 v9.16b,v9.16b3560#endif3561#ifdef __AARCH64EB__3562rev32 v10.16b,v10.16b3563#endif3564ld1 {v4.4s},[x0],#163565eor v4.16b, v4.16b, v9.16b3566#ifndef __AARCH64EB__3567rev32 v4.16b,v4.16b3568#endif3569mov x10,x33570mov w11,#83571mov w12,v4.s[0]3572mov w13,v4.s[1]3573mov w14,v4.s[2]3574mov w15,v4.s[3]357510:3576ldp w7,w8,[x10],83577// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)3578eor w6,w14,w153579eor w9,w7,w133580eor w6,w6,w93581movi v1.16b,#643582movi v2.16b,#1283583movi v3.16b,#1923584mov v0.s[0],w635853586sub v1.16b,v0.16b,v1.16b3587sub v2.16b,v0.16b,v2.16b3588sub v3.16b,v0.16b,v3.16b35893590tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3591tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3592tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3593tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b35943595mov w6,v0.s[0]3596mov w7,v1.s[0]3597mov w9,v2.s[0]3598add w7,w6,w73599mov w6,v3.s[0]3600add w7,w7,w93601add w7,w7,w636023603eor w6,w7,w7,ror #32-23604eor w6,w6,w7,ror #32-103605eor w6,w6,w7,ror #32-183606eor w6,w6,w7,ror #32-243607eor w12,w12,w63608// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)3609eor w6,w14,w153610eor w9,w12,w83611eor w6,w6,w93612movi v1.16b,#643613movi v2.16b,#1283614movi v3.16b,#1923615mov v0.s[0],w636163617sub v1.16b,v0.16b,v1.16b3618sub v2.16b,v0.16b,v2.16b3619sub v3.16b,v0.16b,v3.16b36203621tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3622tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3623tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3624tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b36253626mov w6,v0.s[0]3627mov w7,v1.s[0]3628mov w9,v2.s[0]3629add w7,w6,w73630mov w6,v3.s[0]3631add w7,w7,w93632add w7,w7,w636333634eor w6,w7,w7,ror #32-23635eor w6,w6,w7,ror #32-103636eor w6,w6,w7,ror #32-183637eor w6,w6,w7,ror #32-243638ldp w7,w8,[x10],83639eor w13,w13,w63640// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)3641eor w6,w12,w133642eor w9,w7,w153643eor w6,w6,w93644movi v1.16b,#643645movi v2.16b,#1283646movi v3.16b,#1923647mov v0.s[0],w636483649sub v1.16b,v0.16b,v1.16b3650sub v2.16b,v0.16b,v2.16b3651sub v3.16b,v0.16b,v3.16b36523653tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3654tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3655tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3656tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b36573658mov w6,v0.s[0]3659mov w7,v1.s[0]3660mov w9,v2.s[0]3661add w7,w6,w73662mov w6,v3.s[0]3663add w7,w7,w93664add w7,w7,w636653666eor w6,w7,w7,ror #32-23667eor w6,w6,w7,ror #32-103668eor w6,w6,w7,ror #32-183669eor w6,w6,w7,ror #32-243670eor w14,w14,w63671// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)3672eor w6,w12,w133673eor w9,w14,w83674eor w6,w6,w93675movi v1.16b,#643676movi v2.16b,#1283677movi v3.16b,#1923678mov v0.s[0],w636793680sub v1.16b,v0.16b,v1.16b3681sub v2.16b,v0.16b,v2.16b3682sub v3.16b,v0.16b,v3.16b36833684tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3685tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3686tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3687tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b36883689mov w6,v0.s[0]3690mov w7,v1.s[0]3691mov w9,v2.s[0]3692add w7,w6,w73693mov w6,v3.s[0]3694add w7,w7,w93695add w7,w7,w636963697eor w6,w7,w7,ror #32-23698eor w6,w6,w7,ror #32-103699eor w6,w6,w7,ror #32-183700eor w6,w6,w7,ror #32-243701eor w15,w15,w63702subs w11,w11,#13703b.ne 10b3704mov v4.s[0],w153705mov v4.s[1],w143706mov v4.s[2],w133707mov v4.s[3],w123708#ifndef __AARCH64EB__3709rev32 v4.16b,v4.16b3710#endif3711eor v4.16b, v4.16b, v9.16b3712st1 {v4.4s},[x1],#1637133714sub x26,x1,163715.loop_gb:3716subs x29,x29,13717ldrb w7,[x26,x29]3718ldrb w8,[x0,x29]3719strb w8,[x26,x29]3720strb w7,[x1,x29]3721b.gt .loop_gb3722ld1 {v4.4s}, [x26]3723eor v4.16b, v4.16b, v10.16b3724#ifndef __AARCH64EB__3725rev32 v4.16b,v4.16b3726#endif3727mov x10,x33728mov w11,#83729mov w12,v4.s[0]3730mov w13,v4.s[1]3731mov w14,v4.s[2]3732mov w15,v4.s[3]373310:3734ldp w7,w8,[x10],83735// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)3736eor w6,w14,w153737eor w9,w7,w133738eor w6,w6,w93739movi v1.16b,#643740movi v2.16b,#1283741movi v3.16b,#1923742mov v0.s[0],w637433744sub v1.16b,v0.16b,v1.16b3745sub v2.16b,v0.16b,v2.16b3746sub v3.16b,v0.16b,v3.16b37473748tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3749tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3750tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3751tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b37523753mov w6,v0.s[0]3754mov w7,v1.s[0]3755mov w9,v2.s[0]3756add w7,w6,w73757mov w6,v3.s[0]3758add w7,w7,w93759add w7,w7,w637603761eor w6,w7,w7,ror #32-23762eor w6,w6,w7,ror #32-103763eor w6,w6,w7,ror #32-183764eor w6,w6,w7,ror #32-243765eor w12,w12,w63766// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)3767eor w6,w14,w153768eor w9,w12,w83769eor w6,w6,w93770movi v1.16b,#643771movi v2.16b,#1283772movi v3.16b,#1923773mov v0.s[0],w637743775sub v1.16b,v0.16b,v1.16b3776sub v2.16b,v0.16b,v2.16b3777sub v3.16b,v0.16b,v3.16b37783779tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3780tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3781tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3782tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b37833784mov w6,v0.s[0]3785mov w7,v1.s[0]3786mov w9,v2.s[0]3787add w7,w6,w73788mov w6,v3.s[0]3789add w7,w7,w93790add w7,w7,w637913792eor w6,w7,w7,ror #32-23793eor w6,w6,w7,ror #32-103794eor w6,w6,w7,ror #32-183795eor w6,w6,w7,ror #32-243796ldp w7,w8,[x10],83797eor w13,w13,w63798// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)3799eor w6,w12,w133800eor w9,w7,w153801eor w6,w6,w93802movi v1.16b,#643803movi v2.16b,#1283804movi v3.16b,#1923805mov v0.s[0],w638063807sub v1.16b,v0.16b,v1.16b3808sub v2.16b,v0.16b,v2.16b3809sub v3.16b,v0.16b,v3.16b38103811tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3812tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3813tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3814tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b38153816mov w6,v0.s[0]3817mov w7,v1.s[0]3818mov w9,v2.s[0]3819add w7,w6,w73820mov w6,v3.s[0]3821add w7,w7,w93822add w7,w7,w638233824eor w6,w7,w7,ror #32-23825eor w6,w6,w7,ror #32-103826eor w6,w6,w7,ror #32-183827eor w6,w6,w7,ror #32-243828eor w14,w14,w63829// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)3830eor w6,w12,w133831eor w9,w14,w83832eor w6,w6,w93833movi v1.16b,#643834movi v2.16b,#1283835movi v3.16b,#1923836mov v0.s[0],w638373838sub v1.16b,v0.16b,v1.16b3839sub v2.16b,v0.16b,v2.16b3840sub v3.16b,v0.16b,v3.16b38413842tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3843tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3844tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3845tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b38463847mov w6,v0.s[0]3848mov w7,v1.s[0]3849mov w9,v2.s[0]3850add w7,w6,w73851mov w6,v3.s[0]3852add w7,w7,w93853add w7,w7,w638543855eor w6,w7,w7,ror #32-23856eor w6,w6,w7,ror #32-103857eor w6,w6,w7,ror #32-183858eor w6,w6,w7,ror #32-243859eor w15,w15,w63860subs w11,w11,#13861b.ne 10b3862mov v4.s[0],w153863mov v4.s[1],w143864mov v4.s[2],w133865mov v4.s[3],w123866#ifndef __AARCH64EB__3867rev32 v4.16b,v4.16b3868#endif3869eor v4.16b, v4.16b, v10.16b3870st1 {v4.4s}, [x26]3871.return_gb:3872ldp d14, d15, [sp], #0x103873ldp d12, d13, [sp], #0x103874ldp d10, d11, [sp], #0x103875ldp d8, d9, [sp], #0x103876ldp x29, x30, [sp], #0x103877ldp x27, x28, [sp], #0x103878ldp x25, x26, [sp], #0x103879ldp x23, x24, [sp], #0x103880ldp x21, x22, [sp], #0x103881ldp x19, x20, [sp], #0x103882ldp x17, x18, [sp], #0x103883ldp x15, x16, [sp], #0x103884AARCH64_VALIDATE_LINK_REGISTER3885ret3886.size vpsm4_xts_encrypt_gb,.-vpsm4_xts_encrypt_gb3887.globl vpsm4_xts_encrypt3888.type vpsm4_xts_encrypt,%function3889.align 53890vpsm4_xts_encrypt:3891AARCH64_SIGN_LINK_REGISTER3892stp x15, x16, [sp, #-0x10]!3893stp x17, x18, [sp, #-0x10]!3894stp x19, x20, [sp, #-0x10]!3895stp x21, x22, [sp, #-0x10]!3896stp x23, x24, [sp, #-0x10]!3897stp x25, x26, [sp, #-0x10]!3898stp x27, x28, [sp, #-0x10]!3899stp x29, x30, [sp, #-0x10]!3900stp d8, d9, [sp, #-0x10]!3901stp d10, d11, [sp, #-0x10]!3902stp d12, d13, [sp, #-0x10]!3903stp d14, d15, [sp, #-0x10]!3904mov x26,x33905mov x27,x43906mov w28,w63907ld1 {v8.4s}, [x5]3908mov x3,x273909adrp x10,.Lsbox3910add x10,x10,#:lo12:.Lsbox3911ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x10],#643912ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x10],#643913ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x10],#643914ld1 {v28.16b,v29.16b,v30.16b,v31.16b},[x10]3915#ifndef __AARCH64EB__3916rev32 v8.16b,v8.16b3917#endif3918mov x10,x33919mov w11,#83920mov w12,v8.s[0]3921mov w13,v8.s[1]3922mov w14,v8.s[2]3923mov w15,v8.s[3]392410:3925ldp w7,w8,[x10],83926// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)3927eor w6,w14,w153928eor w9,w7,w133929eor w6,w6,w93930movi v1.16b,#643931movi v2.16b,#1283932movi v3.16b,#1923933mov v0.s[0],w639343935sub v1.16b,v0.16b,v1.16b3936sub v2.16b,v0.16b,v2.16b3937sub v3.16b,v0.16b,v3.16b39383939tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3940tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3941tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3942tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b39433944mov w6,v0.s[0]3945mov w7,v1.s[0]3946mov w9,v2.s[0]3947add w7,w6,w73948mov w6,v3.s[0]3949add w7,w7,w93950add w7,w7,w639513952eor w6,w7,w7,ror #32-23953eor w6,w6,w7,ror #32-103954eor w6,w6,w7,ror #32-183955eor w6,w6,w7,ror #32-243956eor w12,w12,w63957// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)3958eor w6,w14,w153959eor w9,w12,w83960eor w6,w6,w93961movi v1.16b,#643962movi v2.16b,#1283963movi v3.16b,#1923964mov v0.s[0],w639653966sub v1.16b,v0.16b,v1.16b3967sub v2.16b,v0.16b,v2.16b3968sub v3.16b,v0.16b,v3.16b39693970tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b3971tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b3972tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b3973tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b39743975mov w6,v0.s[0]3976mov w7,v1.s[0]3977mov w9,v2.s[0]3978add w7,w6,w73979mov w6,v3.s[0]3980add w7,w7,w93981add w7,w7,w639823983eor w6,w7,w7,ror #32-23984eor w6,w6,w7,ror #32-103985eor w6,w6,w7,ror #32-183986eor w6,w6,w7,ror #32-243987ldp w7,w8,[x10],83988eor w13,w13,w63989// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)3990eor w6,w12,w133991eor w9,w7,w153992eor w6,w6,w93993movi v1.16b,#643994movi v2.16b,#1283995movi v3.16b,#1923996mov v0.s[0],w639973998sub v1.16b,v0.16b,v1.16b3999sub v2.16b,v0.16b,v2.16b4000sub v3.16b,v0.16b,v3.16b40014002tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4003tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4004tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4005tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b40064007mov w6,v0.s[0]4008mov w7,v1.s[0]4009mov w9,v2.s[0]4010add w7,w6,w74011mov w6,v3.s[0]4012add w7,w7,w94013add w7,w7,w640144015eor w6,w7,w7,ror #32-24016eor w6,w6,w7,ror #32-104017eor w6,w6,w7,ror #32-184018eor w6,w6,w7,ror #32-244019eor w14,w14,w64020// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)4021eor w6,w12,w134022eor w9,w14,w84023eor w6,w6,w94024movi v1.16b,#644025movi v2.16b,#1284026movi v3.16b,#1924027mov v0.s[0],w640284029sub v1.16b,v0.16b,v1.16b4030sub v2.16b,v0.16b,v2.16b4031sub v3.16b,v0.16b,v3.16b40324033tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4034tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4035tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4036tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b40374038mov w6,v0.s[0]4039mov w7,v1.s[0]4040mov w9,v2.s[0]4041add w7,w6,w74042mov w6,v3.s[0]4043add w7,w7,w94044add w7,w7,w640454046eor w6,w7,w7,ror #32-24047eor w6,w6,w7,ror #32-104048eor w6,w6,w7,ror #32-184049eor w6,w6,w7,ror #32-244050eor w15,w15,w64051subs w11,w11,#14052b.ne 10b4053mov v8.s[0],w154054mov v8.s[1],w144055mov v8.s[2],w134056mov v8.s[3],w124057#ifndef __AARCH64EB__4058rev32 v8.16b,v8.16b4059#endif4060mov x3,x264061and x29,x2,#0x0F4062// convert length into blocks4063lsr x2,x2,44064cmp x2,#14065b.lt .return40664067cmp x29,04068// If the encryption/decryption Length is N times of 16,4069// the all blocks are encrypted/decrypted in .xts_encrypt_blocks4070b.eq .xts_encrypt_blocks40714072// If the encryption/decryption length is not N times of 16,4073// the last two blocks are encrypted/decrypted in .last_2blks_tweak or .only_2blks_tweak4074// the other blocks are encrypted/decrypted in .xts_encrypt_blocks4075subs x2,x2,#14076b.eq .only_2blks_tweak4077.xts_encrypt_blocks:4078#ifdef __AARCH64EB__4079rev32 v8.16b,v8.16b4080#endif4081mov x12,v8.d[0]4082mov x13,v8.d[1]4083mov w7,0x874084extr x9,x13,x13,#324085extr x15,x13,x12,#634086and w8,w7,w9,asr#314087eor x14,x8,x12,lsl#14088mov w7,0x874089extr x9,x15,x15,#324090extr x17,x15,x14,#634091and w8,w7,w9,asr#314092eor x16,x8,x14,lsl#14093mov w7,0x874094extr x9,x17,x17,#324095extr x19,x17,x16,#634096and w8,w7,w9,asr#314097eor x18,x8,x16,lsl#14098mov w7,0x874099extr x9,x19,x19,#324100extr x21,x19,x18,#634101and w8,w7,w9,asr#314102eor x20,x8,x18,lsl#14103mov w7,0x874104extr x9,x21,x21,#324105extr x23,x21,x20,#634106and w8,w7,w9,asr#314107eor x22,x8,x20,lsl#14108mov w7,0x874109extr x9,x23,x23,#324110extr x25,x23,x22,#634111and w8,w7,w9,asr#314112eor x24,x8,x22,lsl#14113mov w7,0x874114extr x9,x25,x25,#324115extr x27,x25,x24,#634116and w8,w7,w9,asr#314117eor x26,x8,x24,lsl#14118.Lxts_8_blocks_process:4119cmp x2,#84120b.lt .Lxts_4_blocks_process4121mov v0.d[0],x124122mov v0.d[1],x134123#ifdef __AARCH64EB__4124rev32 v0.16b,v0.16b4125#endif4126mov v1.d[0],x144127mov v1.d[1],x154128#ifdef __AARCH64EB__4129rev32 v1.16b,v1.16b4130#endif4131mov v2.d[0],x164132mov v2.d[1],x174133#ifdef __AARCH64EB__4134rev32 v2.16b,v2.16b4135#endif4136mov v3.d[0],x184137mov v3.d[1],x194138#ifdef __AARCH64EB__4139rev32 v3.16b,v3.16b4140#endif4141mov v12.d[0],x204142mov v12.d[1],x214143#ifdef __AARCH64EB__4144rev32 v12.16b,v12.16b4145#endif4146mov v13.d[0],x224147mov v13.d[1],x234148#ifdef __AARCH64EB__4149rev32 v13.16b,v13.16b4150#endif4151mov v14.d[0],x244152mov v14.d[1],x254153#ifdef __AARCH64EB__4154rev32 v14.16b,v14.16b4155#endif4156mov v15.d[0],x264157mov v15.d[1],x274158#ifdef __AARCH64EB__4159rev32 v15.16b,v15.16b4160#endif4161ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#644162eor v4.16b, v4.16b, v0.16b4163eor v5.16b, v5.16b, v1.16b4164eor v6.16b, v6.16b, v2.16b4165eor v7.16b, v7.16b, v3.16b4166ld1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#644167eor v8.16b, v8.16b, v12.16b4168eor v9.16b, v9.16b, v13.16b4169eor v10.16b, v10.16b, v14.16b4170eor v11.16b, v11.16b, v15.16b4171#ifndef __AARCH64EB__4172rev32 v4.16b,v4.16b4173#endif4174#ifndef __AARCH64EB__4175rev32 v5.16b,v5.16b4176#endif4177#ifndef __AARCH64EB__4178rev32 v6.16b,v6.16b4179#endif4180#ifndef __AARCH64EB__4181rev32 v7.16b,v7.16b4182#endif4183#ifndef __AARCH64EB__4184rev32 v8.16b,v8.16b4185#endif4186#ifndef __AARCH64EB__4187rev32 v9.16b,v9.16b4188#endif4189#ifndef __AARCH64EB__4190rev32 v10.16b,v10.16b4191#endif4192#ifndef __AARCH64EB__4193rev32 v11.16b,v11.16b4194#endif4195zip1 v0.4s,v4.4s,v5.4s4196zip2 v1.4s,v4.4s,v5.4s4197zip1 v2.4s,v6.4s,v7.4s4198zip2 v3.4s,v6.4s,v7.4s4199zip1 v4.2d,v0.2d,v2.2d4200zip2 v5.2d,v0.2d,v2.2d4201zip1 v6.2d,v1.2d,v3.2d4202zip2 v7.2d,v1.2d,v3.2d4203zip1 v0.4s,v8.4s,v9.4s4204zip2 v1.4s,v8.4s,v9.4s4205zip1 v2.4s,v10.4s,v11.4s4206zip2 v3.4s,v10.4s,v11.4s4207zip1 v8.2d,v0.2d,v2.2d4208zip2 v9.2d,v0.2d,v2.2d4209zip1 v10.2d,v1.2d,v3.2d4210zip2 v11.2d,v1.2d,v3.2d4211bl _vpsm4_enc_8blks4212zip1 v8.4s,v0.4s,v1.4s4213zip2 v9.4s,v0.4s,v1.4s4214zip1 v10.4s,v2.4s,v3.4s4215zip2 v11.4s,v2.4s,v3.4s4216zip1 v0.2d,v8.2d,v10.2d4217zip2 v1.2d,v8.2d,v10.2d4218zip1 v2.2d,v9.2d,v11.2d4219zip2 v3.2d,v9.2d,v11.2d4220zip1 v8.4s,v4.4s,v5.4s4221zip2 v9.4s,v4.4s,v5.4s4222zip1 v10.4s,v6.4s,v7.4s4223zip2 v11.4s,v6.4s,v7.4s4224zip1 v4.2d,v8.2d,v10.2d4225zip2 v5.2d,v8.2d,v10.2d4226zip1 v6.2d,v9.2d,v11.2d4227zip2 v7.2d,v9.2d,v11.2d4228mov v12.d[0],x124229mov v12.d[1],x134230#ifdef __AARCH64EB__4231rev32 v12.16b,v12.16b4232#endif4233mov w7,0x874234extr x9,x27,x27,#324235extr x13,x27,x26,#634236and w8,w7,w9,asr#314237eor x12,x8,x26,lsl#14238mov v13.d[0],x144239mov v13.d[1],x154240#ifdef __AARCH64EB__4241rev32 v13.16b,v13.16b4242#endif4243mov w7,0x874244extr x9,x13,x13,#324245extr x15,x13,x12,#634246and w8,w7,w9,asr#314247eor x14,x8,x12,lsl#14248mov v14.d[0],x164249mov v14.d[1],x174250#ifdef __AARCH64EB__4251rev32 v14.16b,v14.16b4252#endif4253mov w7,0x874254extr x9,x15,x15,#324255extr x17,x15,x14,#634256and w8,w7,w9,asr#314257eor x16,x8,x14,lsl#14258mov v15.d[0],x184259mov v15.d[1],x194260#ifdef __AARCH64EB__4261rev32 v15.16b,v15.16b4262#endif4263mov w7,0x874264extr x9,x17,x17,#324265extr x19,x17,x16,#634266and w8,w7,w9,asr#314267eor x18,x8,x16,lsl#14268mov v8.d[0],x204269mov v8.d[1],x214270#ifdef __AARCH64EB__4271rev32 v8.16b,v8.16b4272#endif4273mov w7,0x874274extr x9,x19,x19,#324275extr x21,x19,x18,#634276and w8,w7,w9,asr#314277eor x20,x8,x18,lsl#14278mov v9.d[0],x224279mov v9.d[1],x234280#ifdef __AARCH64EB__4281rev32 v9.16b,v9.16b4282#endif4283mov w7,0x874284extr x9,x21,x21,#324285extr x23,x21,x20,#634286and w8,w7,w9,asr#314287eor x22,x8,x20,lsl#14288mov v10.d[0],x244289mov v10.d[1],x254290#ifdef __AARCH64EB__4291rev32 v10.16b,v10.16b4292#endif4293mov w7,0x874294extr x9,x23,x23,#324295extr x25,x23,x22,#634296and w8,w7,w9,asr#314297eor x24,x8,x22,lsl#14298mov v11.d[0],x264299mov v11.d[1],x274300#ifdef __AARCH64EB__4301rev32 v11.16b,v11.16b4302#endif4303mov w7,0x874304extr x9,x25,x25,#324305extr x27,x25,x24,#634306and w8,w7,w9,asr#314307eor x26,x8,x24,lsl#14308eor v0.16b, v0.16b, v12.16b4309eor v1.16b, v1.16b, v13.16b4310eor v2.16b, v2.16b, v14.16b4311eor v3.16b, v3.16b, v15.16b4312eor v4.16b, v4.16b, v8.16b4313eor v5.16b, v5.16b, v9.16b4314eor v6.16b, v6.16b, v10.16b4315eor v7.16b, v7.16b, v11.16b43164317// save the last tweak4318st1 {v11.4s},[x5]4319st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#644320st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x1],#644321subs x2,x2,#84322b.gt .Lxts_8_blocks_process4323b 100f4324.Lxts_4_blocks_process:4325mov v8.d[0],x124326mov v8.d[1],x134327#ifdef __AARCH64EB__4328rev32 v8.16b,v8.16b4329#endif4330mov v9.d[0],x144331mov v9.d[1],x154332#ifdef __AARCH64EB__4333rev32 v9.16b,v9.16b4334#endif4335mov v10.d[0],x164336mov v10.d[1],x174337#ifdef __AARCH64EB__4338rev32 v10.16b,v10.16b4339#endif4340mov v11.d[0],x184341mov v11.d[1],x194342#ifdef __AARCH64EB__4343rev32 v11.16b,v11.16b4344#endif4345cmp x2,#44346b.lt 1f4347ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#644348eor v4.16b, v4.16b, v8.16b4349eor v5.16b, v5.16b, v9.16b4350eor v6.16b, v6.16b, v10.16b4351eor v7.16b, v7.16b, v11.16b4352#ifndef __AARCH64EB__4353rev32 v4.16b,v4.16b4354#endif4355#ifndef __AARCH64EB__4356rev32 v5.16b,v5.16b4357#endif4358#ifndef __AARCH64EB__4359rev32 v6.16b,v6.16b4360#endif4361#ifndef __AARCH64EB__4362rev32 v7.16b,v7.16b4363#endif4364zip1 v0.4s,v4.4s,v5.4s4365zip2 v1.4s,v4.4s,v5.4s4366zip1 v2.4s,v6.4s,v7.4s4367zip2 v3.4s,v6.4s,v7.4s4368zip1 v4.2d,v0.2d,v2.2d4369zip2 v5.2d,v0.2d,v2.2d4370zip1 v6.2d,v1.2d,v3.2d4371zip2 v7.2d,v1.2d,v3.2d4372bl _vpsm4_enc_4blks4373zip1 v4.4s,v0.4s,v1.4s4374zip2 v5.4s,v0.4s,v1.4s4375zip1 v6.4s,v2.4s,v3.4s4376zip2 v7.4s,v2.4s,v3.4s4377zip1 v0.2d,v4.2d,v6.2d4378zip2 v1.2d,v4.2d,v6.2d4379zip1 v2.2d,v5.2d,v7.2d4380zip2 v3.2d,v5.2d,v7.2d4381eor v0.16b, v0.16b, v8.16b4382eor v1.16b, v1.16b, v9.16b4383eor v2.16b, v2.16b, v10.16b4384eor v3.16b, v3.16b, v11.16b4385st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x1],#644386sub x2,x2,#44387mov v8.d[0],x204388mov v8.d[1],x214389#ifdef __AARCH64EB__4390rev32 v8.16b,v8.16b4391#endif4392mov v9.d[0],x224393mov v9.d[1],x234394#ifdef __AARCH64EB__4395rev32 v9.16b,v9.16b4396#endif4397mov v10.d[0],x244398mov v10.d[1],x254399#ifdef __AARCH64EB__4400rev32 v10.16b,v10.16b4401#endif4402// save the last tweak4403st1 {v11.4s},[x5]44041:4405// process last block4406cmp x2,#14407b.lt 100f4408b.gt 1f4409ld1 {v4.4s},[x0],#164410eor v4.16b, v4.16b, v8.16b4411#ifndef __AARCH64EB__4412rev32 v4.16b,v4.16b4413#endif4414mov x10,x34415mov w11,#84416mov w12,v4.s[0]4417mov w13,v4.s[1]4418mov w14,v4.s[2]4419mov w15,v4.s[3]442010:4421ldp w7,w8,[x10],84422// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)4423eor w6,w14,w154424eor w9,w7,w134425eor w6,w6,w94426movi v1.16b,#644427movi v2.16b,#1284428movi v3.16b,#1924429mov v0.s[0],w644304431sub v1.16b,v0.16b,v1.16b4432sub v2.16b,v0.16b,v2.16b4433sub v3.16b,v0.16b,v3.16b44344435tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4436tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4437tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4438tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b44394440mov w6,v0.s[0]4441mov w7,v1.s[0]4442mov w9,v2.s[0]4443add w7,w6,w74444mov w6,v3.s[0]4445add w7,w7,w94446add w7,w7,w644474448eor w6,w7,w7,ror #32-24449eor w6,w6,w7,ror #32-104450eor w6,w6,w7,ror #32-184451eor w6,w6,w7,ror #32-244452eor w12,w12,w64453// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)4454eor w6,w14,w154455eor w9,w12,w84456eor w6,w6,w94457movi v1.16b,#644458movi v2.16b,#1284459movi v3.16b,#1924460mov v0.s[0],w644614462sub v1.16b,v0.16b,v1.16b4463sub v2.16b,v0.16b,v2.16b4464sub v3.16b,v0.16b,v3.16b44654466tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4467tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4468tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4469tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b44704471mov w6,v0.s[0]4472mov w7,v1.s[0]4473mov w9,v2.s[0]4474add w7,w6,w74475mov w6,v3.s[0]4476add w7,w7,w94477add w7,w7,w644784479eor w6,w7,w7,ror #32-24480eor w6,w6,w7,ror #32-104481eor w6,w6,w7,ror #32-184482eor w6,w6,w7,ror #32-244483ldp w7,w8,[x10],84484eor w13,w13,w64485// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)4486eor w6,w12,w134487eor w9,w7,w154488eor w6,w6,w94489movi v1.16b,#644490movi v2.16b,#1284491movi v3.16b,#1924492mov v0.s[0],w644934494sub v1.16b,v0.16b,v1.16b4495sub v2.16b,v0.16b,v2.16b4496sub v3.16b,v0.16b,v3.16b44974498tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4499tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4500tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4501tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b45024503mov w6,v0.s[0]4504mov w7,v1.s[0]4505mov w9,v2.s[0]4506add w7,w6,w74507mov w6,v3.s[0]4508add w7,w7,w94509add w7,w7,w645104511eor w6,w7,w7,ror #32-24512eor w6,w6,w7,ror #32-104513eor w6,w6,w7,ror #32-184514eor w6,w6,w7,ror #32-244515eor w14,w14,w64516// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)4517eor w6,w12,w134518eor w9,w14,w84519eor w6,w6,w94520movi v1.16b,#644521movi v2.16b,#1284522movi v3.16b,#1924523mov v0.s[0],w645244525sub v1.16b,v0.16b,v1.16b4526sub v2.16b,v0.16b,v2.16b4527sub v3.16b,v0.16b,v3.16b45284529tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4530tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4531tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4532tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b45334534mov w6,v0.s[0]4535mov w7,v1.s[0]4536mov w9,v2.s[0]4537add w7,w6,w74538mov w6,v3.s[0]4539add w7,w7,w94540add w7,w7,w645414542eor w6,w7,w7,ror #32-24543eor w6,w6,w7,ror #32-104544eor w6,w6,w7,ror #32-184545eor w6,w6,w7,ror #32-244546eor w15,w15,w64547subs w11,w11,#14548b.ne 10b4549mov v4.s[0],w154550mov v4.s[1],w144551mov v4.s[2],w134552mov v4.s[3],w124553#ifndef __AARCH64EB__4554rev32 v4.16b,v4.16b4555#endif4556eor v4.16b, v4.16b, v8.16b4557st1 {v4.4s},[x1],#164558// save the last tweak4559st1 {v8.4s},[x5]4560b 100f45611: // process last 2 blocks4562cmp x2,#24563b.gt 1f4564ld1 {v4.4s,v5.4s},[x0],#324565eor v4.16b, v4.16b, v8.16b4566eor v5.16b, v5.16b, v9.16b4567#ifndef __AARCH64EB__4568rev32 v4.16b,v4.16b4569#endif4570#ifndef __AARCH64EB__4571rev32 v5.16b,v5.16b4572#endif4573zip1 v0.4s,v4.4s,v5.4s4574zip2 v1.4s,v4.4s,v5.4s4575zip1 v2.4s,v6.4s,v7.4s4576zip2 v3.4s,v6.4s,v7.4s4577zip1 v4.2d,v0.2d,v2.2d4578zip2 v5.2d,v0.2d,v2.2d4579zip1 v6.2d,v1.2d,v3.2d4580zip2 v7.2d,v1.2d,v3.2d4581bl _vpsm4_enc_4blks4582zip1 v4.4s,v0.4s,v1.4s4583zip2 v5.4s,v0.4s,v1.4s4584zip1 v6.4s,v2.4s,v3.4s4585zip2 v7.4s,v2.4s,v3.4s4586zip1 v0.2d,v4.2d,v6.2d4587zip2 v1.2d,v4.2d,v6.2d4588zip1 v2.2d,v5.2d,v7.2d4589zip2 v3.2d,v5.2d,v7.2d4590eor v0.16b, v0.16b, v8.16b4591eor v1.16b, v1.16b, v9.16b4592st1 {v0.4s,v1.4s},[x1],#324593// save the last tweak4594st1 {v9.4s},[x5]4595b 100f45961: // process last 3 blocks4597ld1 {v4.4s,v5.4s,v6.4s},[x0],#484598eor v4.16b, v4.16b, v8.16b4599eor v5.16b, v5.16b, v9.16b4600eor v6.16b, v6.16b, v10.16b4601#ifndef __AARCH64EB__4602rev32 v4.16b,v4.16b4603#endif4604#ifndef __AARCH64EB__4605rev32 v5.16b,v5.16b4606#endif4607#ifndef __AARCH64EB__4608rev32 v6.16b,v6.16b4609#endif4610zip1 v0.4s,v4.4s,v5.4s4611zip2 v1.4s,v4.4s,v5.4s4612zip1 v2.4s,v6.4s,v7.4s4613zip2 v3.4s,v6.4s,v7.4s4614zip1 v4.2d,v0.2d,v2.2d4615zip2 v5.2d,v0.2d,v2.2d4616zip1 v6.2d,v1.2d,v3.2d4617zip2 v7.2d,v1.2d,v3.2d4618bl _vpsm4_enc_4blks4619zip1 v4.4s,v0.4s,v1.4s4620zip2 v5.4s,v0.4s,v1.4s4621zip1 v6.4s,v2.4s,v3.4s4622zip2 v7.4s,v2.4s,v3.4s4623zip1 v0.2d,v4.2d,v6.2d4624zip2 v1.2d,v4.2d,v6.2d4625zip1 v2.2d,v5.2d,v7.2d4626zip2 v3.2d,v5.2d,v7.2d4627eor v0.16b, v0.16b, v8.16b4628eor v1.16b, v1.16b, v9.16b4629eor v2.16b, v2.16b, v10.16b4630st1 {v0.4s,v1.4s,v2.4s},[x1],#484631// save the last tweak4632st1 {v10.4s},[x5]4633100:4634cmp x29,04635b.eq .return46364637// This branch calculates the last two tweaks,4638// while the encryption/decryption length is larger than 324639.last_2blks_tweak:4640ld1 {v8.4s},[x5]4641#ifdef __AARCH64EB__4642rev32 v8.16b,v8.16b4643#endif4644mov v2.16b,v8.16b4645adrp x10,.Lxts_magic4646ldr q0, [x10, #:lo12:.Lxts_magic]4647shl v9.16b, v2.16b, #14648ext v1.16b, v2.16b, v2.16b,#154649ushr v1.16b, v1.16b, #74650mul v1.16b, v1.16b, v0.16b4651eor v9.16b, v9.16b, v1.16b4652mov v2.16b,v9.16b4653adrp x10,.Lxts_magic4654ldr q0, [x10, #:lo12:.Lxts_magic]4655shl v10.16b, v2.16b, #14656ext v1.16b, v2.16b, v2.16b,#154657ushr v1.16b, v1.16b, #74658mul v1.16b, v1.16b, v0.16b4659eor v10.16b, v10.16b, v1.16b4660b .check_dec466146624663// This branch calculates the last two tweaks,4664// while the encryption/decryption length is equal to 32, who only need two tweaks4665.only_2blks_tweak:4666mov v9.16b,v8.16b4667#ifdef __AARCH64EB__4668rev32 v9.16b,v9.16b4669#endif4670mov v2.16b,v9.16b4671adrp x10,.Lxts_magic4672ldr q0, [x10, #:lo12:.Lxts_magic]4673shl v10.16b, v2.16b, #14674ext v1.16b, v2.16b, v2.16b,#154675ushr v1.16b, v1.16b, #74676mul v1.16b, v1.16b, v0.16b4677eor v10.16b, v10.16b, v1.16b4678b .check_dec467946804681// Determine whether encryption or decryption is required.4682// The last two tweaks need to be swapped for decryption.4683.check_dec:4684// encryption:1 decryption:04685cmp w28,14686b.eq .process_last_2blks4687mov v0.16B,v9.16b4688mov v9.16B,v10.16b4689mov v10.16B,v0.16b46904691.process_last_2blks:4692#ifdef __AARCH64EB__4693rev32 v9.16b,v9.16b4694#endif4695#ifdef __AARCH64EB__4696rev32 v10.16b,v10.16b4697#endif4698ld1 {v4.4s},[x0],#164699eor v4.16b, v4.16b, v9.16b4700#ifndef __AARCH64EB__4701rev32 v4.16b,v4.16b4702#endif4703mov x10,x34704mov w11,#84705mov w12,v4.s[0]4706mov w13,v4.s[1]4707mov w14,v4.s[2]4708mov w15,v4.s[3]470910:4710ldp w7,w8,[x10],84711// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)4712eor w6,w14,w154713eor w9,w7,w134714eor w6,w6,w94715movi v1.16b,#644716movi v2.16b,#1284717movi v3.16b,#1924718mov v0.s[0],w647194720sub v1.16b,v0.16b,v1.16b4721sub v2.16b,v0.16b,v2.16b4722sub v3.16b,v0.16b,v3.16b47234724tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4725tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4726tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4727tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b47284729mov w6,v0.s[0]4730mov w7,v1.s[0]4731mov w9,v2.s[0]4732add w7,w6,w74733mov w6,v3.s[0]4734add w7,w7,w94735add w7,w7,w647364737eor w6,w7,w7,ror #32-24738eor w6,w6,w7,ror #32-104739eor w6,w6,w7,ror #32-184740eor w6,w6,w7,ror #32-244741eor w12,w12,w64742// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)4743eor w6,w14,w154744eor w9,w12,w84745eor w6,w6,w94746movi v1.16b,#644747movi v2.16b,#1284748movi v3.16b,#1924749mov v0.s[0],w647504751sub v1.16b,v0.16b,v1.16b4752sub v2.16b,v0.16b,v2.16b4753sub v3.16b,v0.16b,v3.16b47544755tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4756tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4757tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4758tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b47594760mov w6,v0.s[0]4761mov w7,v1.s[0]4762mov w9,v2.s[0]4763add w7,w6,w74764mov w6,v3.s[0]4765add w7,w7,w94766add w7,w7,w647674768eor w6,w7,w7,ror #32-24769eor w6,w6,w7,ror #32-104770eor w6,w6,w7,ror #32-184771eor w6,w6,w7,ror #32-244772ldp w7,w8,[x10],84773eor w13,w13,w64774// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)4775eor w6,w12,w134776eor w9,w7,w154777eor w6,w6,w94778movi v1.16b,#644779movi v2.16b,#1284780movi v3.16b,#1924781mov v0.s[0],w647824783sub v1.16b,v0.16b,v1.16b4784sub v2.16b,v0.16b,v2.16b4785sub v3.16b,v0.16b,v3.16b47864787tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4788tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4789tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4790tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b47914792mov w6,v0.s[0]4793mov w7,v1.s[0]4794mov w9,v2.s[0]4795add w7,w6,w74796mov w6,v3.s[0]4797add w7,w7,w94798add w7,w7,w647994800eor w6,w7,w7,ror #32-24801eor w6,w6,w7,ror #32-104802eor w6,w6,w7,ror #32-184803eor w6,w6,w7,ror #32-244804eor w14,w14,w64805// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)4806eor w6,w12,w134807eor w9,w14,w84808eor w6,w6,w94809movi v1.16b,#644810movi v2.16b,#1284811movi v3.16b,#1924812mov v0.s[0],w648134814sub v1.16b,v0.16b,v1.16b4815sub v2.16b,v0.16b,v2.16b4816sub v3.16b,v0.16b,v3.16b48174818tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4819tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4820tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4821tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b48224823mov w6,v0.s[0]4824mov w7,v1.s[0]4825mov w9,v2.s[0]4826add w7,w6,w74827mov w6,v3.s[0]4828add w7,w7,w94829add w7,w7,w648304831eor w6,w7,w7,ror #32-24832eor w6,w6,w7,ror #32-104833eor w6,w6,w7,ror #32-184834eor w6,w6,w7,ror #32-244835eor w15,w15,w64836subs w11,w11,#14837b.ne 10b4838mov v4.s[0],w154839mov v4.s[1],w144840mov v4.s[2],w134841mov v4.s[3],w124842#ifndef __AARCH64EB__4843rev32 v4.16b,v4.16b4844#endif4845eor v4.16b, v4.16b, v9.16b4846st1 {v4.4s},[x1],#1648474848sub x26,x1,164849.loop:4850subs x29,x29,14851ldrb w7,[x26,x29]4852ldrb w8,[x0,x29]4853strb w8,[x26,x29]4854strb w7,[x1,x29]4855b.gt .loop4856ld1 {v4.4s}, [x26]4857eor v4.16b, v4.16b, v10.16b4858#ifndef __AARCH64EB__4859rev32 v4.16b,v4.16b4860#endif4861mov x10,x34862mov w11,#84863mov w12,v4.s[0]4864mov w13,v4.s[1]4865mov w14,v4.s[2]4866mov w15,v4.s[3]486710:4868ldp w7,w8,[x10],84869// B0 ^= SBOX(B1 ^ B2 ^ B3 ^ RK0)4870eor w6,w14,w154871eor w9,w7,w134872eor w6,w6,w94873movi v1.16b,#644874movi v2.16b,#1284875movi v3.16b,#1924876mov v0.s[0],w648774878sub v1.16b,v0.16b,v1.16b4879sub v2.16b,v0.16b,v2.16b4880sub v3.16b,v0.16b,v3.16b48814882tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4883tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4884tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4885tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b48864887mov w6,v0.s[0]4888mov w7,v1.s[0]4889mov w9,v2.s[0]4890add w7,w6,w74891mov w6,v3.s[0]4892add w7,w7,w94893add w7,w7,w648944895eor w6,w7,w7,ror #32-24896eor w6,w6,w7,ror #32-104897eor w6,w6,w7,ror #32-184898eor w6,w6,w7,ror #32-244899eor w12,w12,w64900// B1 ^= SBOX(B0 ^ B2 ^ B3 ^ RK1)4901eor w6,w14,w154902eor w9,w12,w84903eor w6,w6,w94904movi v1.16b,#644905movi v2.16b,#1284906movi v3.16b,#1924907mov v0.s[0],w649084909sub v1.16b,v0.16b,v1.16b4910sub v2.16b,v0.16b,v2.16b4911sub v3.16b,v0.16b,v3.16b49124913tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4914tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4915tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4916tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b49174918mov w6,v0.s[0]4919mov w7,v1.s[0]4920mov w9,v2.s[0]4921add w7,w6,w74922mov w6,v3.s[0]4923add w7,w7,w94924add w7,w7,w649254926eor w6,w7,w7,ror #32-24927eor w6,w6,w7,ror #32-104928eor w6,w6,w7,ror #32-184929eor w6,w6,w7,ror #32-244930ldp w7,w8,[x10],84931eor w13,w13,w64932// B2 ^= SBOX(B0 ^ B1 ^ B3 ^ RK2)4933eor w6,w12,w134934eor w9,w7,w154935eor w6,w6,w94936movi v1.16b,#644937movi v2.16b,#1284938movi v3.16b,#1924939mov v0.s[0],w649404941sub v1.16b,v0.16b,v1.16b4942sub v2.16b,v0.16b,v2.16b4943sub v3.16b,v0.16b,v3.16b49444945tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4946tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4947tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4948tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b49494950mov w6,v0.s[0]4951mov w7,v1.s[0]4952mov w9,v2.s[0]4953add w7,w6,w74954mov w6,v3.s[0]4955add w7,w7,w94956add w7,w7,w649574958eor w6,w7,w7,ror #32-24959eor w6,w6,w7,ror #32-104960eor w6,w6,w7,ror #32-184961eor w6,w6,w7,ror #32-244962eor w14,w14,w64963// B3 ^= SBOX(B0 ^ B1 ^ B2 ^ RK3)4964eor w6,w12,w134965eor w9,w14,w84966eor w6,w6,w94967movi v1.16b,#644968movi v2.16b,#1284969movi v3.16b,#1924970mov v0.s[0],w649714972sub v1.16b,v0.16b,v1.16b4973sub v2.16b,v0.16b,v2.16b4974sub v3.16b,v0.16b,v3.16b49754976tbl v0.16b,{v16.16b,v17.16b,v18.16b,v19.16b},v0.16b4977tbl v1.16b,{v20.16b,v21.16b,v22.16b,v23.16b},v1.16b4978tbl v2.16b,{v24.16b,v25.16b,v26.16b,v27.16b},v2.16b4979tbl v3.16b,{v28.16b,v29.16b,v30.16b,v31.16b},v3.16b49804981mov w6,v0.s[0]4982mov w7,v1.s[0]4983mov w9,v2.s[0]4984add w7,w6,w74985mov w6,v3.s[0]4986add w7,w7,w94987add w7,w7,w649884989eor w6,w7,w7,ror #32-24990eor w6,w6,w7,ror #32-104991eor w6,w6,w7,ror #32-184992eor w6,w6,w7,ror #32-244993eor w15,w15,w64994subs w11,w11,#14995b.ne 10b4996mov v4.s[0],w154997mov v4.s[1],w144998mov v4.s[2],w134999mov v4.s[3],w125000#ifndef __AARCH64EB__5001rev32 v4.16b,v4.16b5002#endif5003eor v4.16b, v4.16b, v10.16b5004st1 {v4.4s}, [x26]5005.return:5006ldp d14, d15, [sp], #0x105007ldp d12, d13, [sp], #0x105008ldp d10, d11, [sp], #0x105009ldp d8, d9, [sp], #0x105010ldp x29, x30, [sp], #0x105011ldp x27, x28, [sp], #0x105012ldp x25, x26, [sp], #0x105013ldp x23, x24, [sp], #0x105014ldp x21, x22, [sp], #0x105015ldp x19, x20, [sp], #0x105016ldp x17, x18, [sp], #0x105017ldp x15, x16, [sp], #0x105018AARCH64_VALIDATE_LINK_REGISTER5019ret5020.size vpsm4_xts_encrypt,.-vpsm4_xts_encrypt502150225023