Path: blob/main/sys/crypto/openssl/aarch64/sha512-armv8.S
39507 views
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */1// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.2//3// Licensed under the Apache License 2.0 (the "License"). You may not use4// this file except in compliance with the License. You can obtain a copy5// in the file LICENSE in the source distribution or at6// https://www.openssl.org/source/license.html78// ====================================================================9// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL10// project. The module is, however, dual licensed under OpenSSL and11// CRYPTOGAMS licenses depending on where you obtain it. For further12// details see http://www.openssl.org/~appro/cryptogams/.13//14// Permission to use under GPLv2 terms is granted.15// ====================================================================16//17// SHA256/512 for ARMv8.18//19// Performance in cycles per processed byte and improvement coefficient20// over code generated with "default" compiler:21//22// SHA256-hw SHA256(*) SHA51223// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))24// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))25// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))26// Denver 2.01 10.5 (+26%) 6.70 (+8%)27// X-Gene 20.0 (+100%) 12.8 (+300%(***))28// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)29// Kryo 1.92 17.4 (+30%) 11.2 (+8%)30// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%)31//32// (*) Software SHA256 results are of lesser relevance, presented33// mostly for informational purposes.34// (**) The result is a trade-off: it's possible to improve it by35// 10% (or by 1 cycle per round), but at the cost of 20% loss36// on Cortex-A53 (or by 4 cycles per round).37// (***) Super-impressive coefficients over gcc-generated code are38// indication of some compiler "pathology", most notably code39// generated with -mgeneral-regs-only is significantly faster40// and the gap is only 40-90%.41//42// October 2016.43//44// Originally it was reckoned that it makes no sense to implement NEON45// version of SHA256 for 64-bit processors. This is because performance46// improvement on most wide-spread Cortex-A5x processors was observed47// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was48// observed that 32-bit NEON SHA256 performs significantly better than49// 64-bit scalar version on *some* of the more recent processors. As50// result 64-bit NEON version of SHA256 was added to provide best51// all-round performance. For example it executes ~30% faster on X-Gene52// and Mongoose. [For reference, NEON version of SHA512 is bound to53// deliver much less improvement, likely *negative* on Cortex-A5x.54// Which is why NEON support is limited to SHA256.]5556// $output is the last argument if it looks like a file (it has an extension)57// $flavour is the first argument if it doesn't look like a file58#include "arm_arch.h"59#ifndef __KERNEL__6061.hidden OPENSSL_armcap_P62#endif6364.text6566.globl sha512_block_data_order67.type sha512_block_data_order,%function68.align 669sha512_block_data_order:70AARCH64_VALID_CALL_TARGET71#ifndef __KERNEL__72adrp x16,OPENSSL_armcap_P73ldr w16,[x16,#:lo12:OPENSSL_armcap_P]74tst w16,#ARMV8_SHA51275b.ne .Lv8_entry76#endif77AARCH64_SIGN_LINK_REGISTER78stp x29,x30,[sp,#-128]!79add x29,sp,#08081stp x19,x20,[sp,#16]82stp x21,x22,[sp,#32]83stp x23,x24,[sp,#48]84stp x25,x26,[sp,#64]85stp x27,x28,[sp,#80]86sub sp,sp,#4*88788ldp x20,x21,[x0] // load context89ldp x22,x23,[x0,#2*8]90ldp x24,x25,[x0,#4*8]91add x2,x1,x2,lsl#7 // end of input92ldp x26,x27,[x0,#6*8]93adrp x30,.LK51294add x30,x30,#:lo12:.LK51295stp x0,x2,[x29,#96]9697.Loop:98ldp x3,x4,[x1],#2*899ldr x19,[x30],#8 // *K++100eor x28,x21,x22 // magic seed101str x1,[x29,#112]102#ifndef __AARCH64EB__103rev x3,x3 // 0104#endif105ror x16,x24,#14106add x27,x27,x19 // h+=K[i]107eor x6,x24,x24,ror#23108and x17,x25,x24109bic x19,x26,x24110add x27,x27,x3 // h+=X[i]111orr x17,x17,x19 // Ch(e,f,g)112eor x19,x20,x21 // a^b, b^c in next round113eor x16,x16,x6,ror#18 // Sigma1(e)114ror x6,x20,#28115add x27,x27,x17 // h+=Ch(e,f,g)116eor x17,x20,x20,ror#5117add x27,x27,x16 // h+=Sigma1(e)118and x28,x28,x19 // (b^c)&=(a^b)119add x23,x23,x27 // d+=h120eor x28,x28,x21 // Maj(a,b,c)121eor x17,x6,x17,ror#34 // Sigma0(a)122add x27,x27,x28 // h+=Maj(a,b,c)123ldr x28,[x30],#8 // *K++, x19 in next round124//add x27,x27,x17 // h+=Sigma0(a)125#ifndef __AARCH64EB__126rev x4,x4 // 1127#endif128ldp x5,x6,[x1],#2*8129add x27,x27,x17 // h+=Sigma0(a)130ror x16,x23,#14131add x26,x26,x28 // h+=K[i]132eor x7,x23,x23,ror#23133and x17,x24,x23134bic x28,x25,x23135add x26,x26,x4 // h+=X[i]136orr x17,x17,x28 // Ch(e,f,g)137eor x28,x27,x20 // a^b, b^c in next round138eor x16,x16,x7,ror#18 // Sigma1(e)139ror x7,x27,#28140add x26,x26,x17 // h+=Ch(e,f,g)141eor x17,x27,x27,ror#5142add x26,x26,x16 // h+=Sigma1(e)143and x19,x19,x28 // (b^c)&=(a^b)144add x22,x22,x26 // d+=h145eor x19,x19,x20 // Maj(a,b,c)146eor x17,x7,x17,ror#34 // Sigma0(a)147add x26,x26,x19 // h+=Maj(a,b,c)148ldr x19,[x30],#8 // *K++, x28 in next round149//add x26,x26,x17 // h+=Sigma0(a)150#ifndef __AARCH64EB__151rev x5,x5 // 2152#endif153add x26,x26,x17 // h+=Sigma0(a)154ror x16,x22,#14155add x25,x25,x19 // h+=K[i]156eor x8,x22,x22,ror#23157and x17,x23,x22158bic x19,x24,x22159add x25,x25,x5 // h+=X[i]160orr x17,x17,x19 // Ch(e,f,g)161eor x19,x26,x27 // a^b, b^c in next round162eor x16,x16,x8,ror#18 // Sigma1(e)163ror x8,x26,#28164add x25,x25,x17 // h+=Ch(e,f,g)165eor x17,x26,x26,ror#5166add x25,x25,x16 // h+=Sigma1(e)167and x28,x28,x19 // (b^c)&=(a^b)168add x21,x21,x25 // d+=h169eor x28,x28,x27 // Maj(a,b,c)170eor x17,x8,x17,ror#34 // Sigma0(a)171add x25,x25,x28 // h+=Maj(a,b,c)172ldr x28,[x30],#8 // *K++, x19 in next round173//add x25,x25,x17 // h+=Sigma0(a)174#ifndef __AARCH64EB__175rev x6,x6 // 3176#endif177ldp x7,x8,[x1],#2*8178add x25,x25,x17 // h+=Sigma0(a)179ror x16,x21,#14180add x24,x24,x28 // h+=K[i]181eor x9,x21,x21,ror#23182and x17,x22,x21183bic x28,x23,x21184add x24,x24,x6 // h+=X[i]185orr x17,x17,x28 // Ch(e,f,g)186eor x28,x25,x26 // a^b, b^c in next round187eor x16,x16,x9,ror#18 // Sigma1(e)188ror x9,x25,#28189add x24,x24,x17 // h+=Ch(e,f,g)190eor x17,x25,x25,ror#5191add x24,x24,x16 // h+=Sigma1(e)192and x19,x19,x28 // (b^c)&=(a^b)193add x20,x20,x24 // d+=h194eor x19,x19,x26 // Maj(a,b,c)195eor x17,x9,x17,ror#34 // Sigma0(a)196add x24,x24,x19 // h+=Maj(a,b,c)197ldr x19,[x30],#8 // *K++, x28 in next round198//add x24,x24,x17 // h+=Sigma0(a)199#ifndef __AARCH64EB__200rev x7,x7 // 4201#endif202add x24,x24,x17 // h+=Sigma0(a)203ror x16,x20,#14204add x23,x23,x19 // h+=K[i]205eor x10,x20,x20,ror#23206and x17,x21,x20207bic x19,x22,x20208add x23,x23,x7 // h+=X[i]209orr x17,x17,x19 // Ch(e,f,g)210eor x19,x24,x25 // a^b, b^c in next round211eor x16,x16,x10,ror#18 // Sigma1(e)212ror x10,x24,#28213add x23,x23,x17 // h+=Ch(e,f,g)214eor x17,x24,x24,ror#5215add x23,x23,x16 // h+=Sigma1(e)216and x28,x28,x19 // (b^c)&=(a^b)217add x27,x27,x23 // d+=h218eor x28,x28,x25 // Maj(a,b,c)219eor x17,x10,x17,ror#34 // Sigma0(a)220add x23,x23,x28 // h+=Maj(a,b,c)221ldr x28,[x30],#8 // *K++, x19 in next round222//add x23,x23,x17 // h+=Sigma0(a)223#ifndef __AARCH64EB__224rev x8,x8 // 5225#endif226ldp x9,x10,[x1],#2*8227add x23,x23,x17 // h+=Sigma0(a)228ror x16,x27,#14229add x22,x22,x28 // h+=K[i]230eor x11,x27,x27,ror#23231and x17,x20,x27232bic x28,x21,x27233add x22,x22,x8 // h+=X[i]234orr x17,x17,x28 // Ch(e,f,g)235eor x28,x23,x24 // a^b, b^c in next round236eor x16,x16,x11,ror#18 // Sigma1(e)237ror x11,x23,#28238add x22,x22,x17 // h+=Ch(e,f,g)239eor x17,x23,x23,ror#5240add x22,x22,x16 // h+=Sigma1(e)241and x19,x19,x28 // (b^c)&=(a^b)242add x26,x26,x22 // d+=h243eor x19,x19,x24 // Maj(a,b,c)244eor x17,x11,x17,ror#34 // Sigma0(a)245add x22,x22,x19 // h+=Maj(a,b,c)246ldr x19,[x30],#8 // *K++, x28 in next round247//add x22,x22,x17 // h+=Sigma0(a)248#ifndef __AARCH64EB__249rev x9,x9 // 6250#endif251add x22,x22,x17 // h+=Sigma0(a)252ror x16,x26,#14253add x21,x21,x19 // h+=K[i]254eor x12,x26,x26,ror#23255and x17,x27,x26256bic x19,x20,x26257add x21,x21,x9 // h+=X[i]258orr x17,x17,x19 // Ch(e,f,g)259eor x19,x22,x23 // a^b, b^c in next round260eor x16,x16,x12,ror#18 // Sigma1(e)261ror x12,x22,#28262add x21,x21,x17 // h+=Ch(e,f,g)263eor x17,x22,x22,ror#5264add x21,x21,x16 // h+=Sigma1(e)265and x28,x28,x19 // (b^c)&=(a^b)266add x25,x25,x21 // d+=h267eor x28,x28,x23 // Maj(a,b,c)268eor x17,x12,x17,ror#34 // Sigma0(a)269add x21,x21,x28 // h+=Maj(a,b,c)270ldr x28,[x30],#8 // *K++, x19 in next round271//add x21,x21,x17 // h+=Sigma0(a)272#ifndef __AARCH64EB__273rev x10,x10 // 7274#endif275ldp x11,x12,[x1],#2*8276add x21,x21,x17 // h+=Sigma0(a)277ror x16,x25,#14278add x20,x20,x28 // h+=K[i]279eor x13,x25,x25,ror#23280and x17,x26,x25281bic x28,x27,x25282add x20,x20,x10 // h+=X[i]283orr x17,x17,x28 // Ch(e,f,g)284eor x28,x21,x22 // a^b, b^c in next round285eor x16,x16,x13,ror#18 // Sigma1(e)286ror x13,x21,#28287add x20,x20,x17 // h+=Ch(e,f,g)288eor x17,x21,x21,ror#5289add x20,x20,x16 // h+=Sigma1(e)290and x19,x19,x28 // (b^c)&=(a^b)291add x24,x24,x20 // d+=h292eor x19,x19,x22 // Maj(a,b,c)293eor x17,x13,x17,ror#34 // Sigma0(a)294add x20,x20,x19 // h+=Maj(a,b,c)295ldr x19,[x30],#8 // *K++, x28 in next round296//add x20,x20,x17 // h+=Sigma0(a)297#ifndef __AARCH64EB__298rev x11,x11 // 8299#endif300add x20,x20,x17 // h+=Sigma0(a)301ror x16,x24,#14302add x27,x27,x19 // h+=K[i]303eor x14,x24,x24,ror#23304and x17,x25,x24305bic x19,x26,x24306add x27,x27,x11 // h+=X[i]307orr x17,x17,x19 // Ch(e,f,g)308eor x19,x20,x21 // a^b, b^c in next round309eor x16,x16,x14,ror#18 // Sigma1(e)310ror x14,x20,#28311add x27,x27,x17 // h+=Ch(e,f,g)312eor x17,x20,x20,ror#5313add x27,x27,x16 // h+=Sigma1(e)314and x28,x28,x19 // (b^c)&=(a^b)315add x23,x23,x27 // d+=h316eor x28,x28,x21 // Maj(a,b,c)317eor x17,x14,x17,ror#34 // Sigma0(a)318add x27,x27,x28 // h+=Maj(a,b,c)319ldr x28,[x30],#8 // *K++, x19 in next round320//add x27,x27,x17 // h+=Sigma0(a)321#ifndef __AARCH64EB__322rev x12,x12 // 9323#endif324ldp x13,x14,[x1],#2*8325add x27,x27,x17 // h+=Sigma0(a)326ror x16,x23,#14327add x26,x26,x28 // h+=K[i]328eor x15,x23,x23,ror#23329and x17,x24,x23330bic x28,x25,x23331add x26,x26,x12 // h+=X[i]332orr x17,x17,x28 // Ch(e,f,g)333eor x28,x27,x20 // a^b, b^c in next round334eor x16,x16,x15,ror#18 // Sigma1(e)335ror x15,x27,#28336add x26,x26,x17 // h+=Ch(e,f,g)337eor x17,x27,x27,ror#5338add x26,x26,x16 // h+=Sigma1(e)339and x19,x19,x28 // (b^c)&=(a^b)340add x22,x22,x26 // d+=h341eor x19,x19,x20 // Maj(a,b,c)342eor x17,x15,x17,ror#34 // Sigma0(a)343add x26,x26,x19 // h+=Maj(a,b,c)344ldr x19,[x30],#8 // *K++, x28 in next round345//add x26,x26,x17 // h+=Sigma0(a)346#ifndef __AARCH64EB__347rev x13,x13 // 10348#endif349add x26,x26,x17 // h+=Sigma0(a)350ror x16,x22,#14351add x25,x25,x19 // h+=K[i]352eor x0,x22,x22,ror#23353and x17,x23,x22354bic x19,x24,x22355add x25,x25,x13 // h+=X[i]356orr x17,x17,x19 // Ch(e,f,g)357eor x19,x26,x27 // a^b, b^c in next round358eor x16,x16,x0,ror#18 // Sigma1(e)359ror x0,x26,#28360add x25,x25,x17 // h+=Ch(e,f,g)361eor x17,x26,x26,ror#5362add x25,x25,x16 // h+=Sigma1(e)363and x28,x28,x19 // (b^c)&=(a^b)364add x21,x21,x25 // d+=h365eor x28,x28,x27 // Maj(a,b,c)366eor x17,x0,x17,ror#34 // Sigma0(a)367add x25,x25,x28 // h+=Maj(a,b,c)368ldr x28,[x30],#8 // *K++, x19 in next round369//add x25,x25,x17 // h+=Sigma0(a)370#ifndef __AARCH64EB__371rev x14,x14 // 11372#endif373ldp x15,x0,[x1],#2*8374add x25,x25,x17 // h+=Sigma0(a)375str x6,[sp,#24]376ror x16,x21,#14377add x24,x24,x28 // h+=K[i]378eor x6,x21,x21,ror#23379and x17,x22,x21380bic x28,x23,x21381add x24,x24,x14 // h+=X[i]382orr x17,x17,x28 // Ch(e,f,g)383eor x28,x25,x26 // a^b, b^c in next round384eor x16,x16,x6,ror#18 // Sigma1(e)385ror x6,x25,#28386add x24,x24,x17 // h+=Ch(e,f,g)387eor x17,x25,x25,ror#5388add x24,x24,x16 // h+=Sigma1(e)389and x19,x19,x28 // (b^c)&=(a^b)390add x20,x20,x24 // d+=h391eor x19,x19,x26 // Maj(a,b,c)392eor x17,x6,x17,ror#34 // Sigma0(a)393add x24,x24,x19 // h+=Maj(a,b,c)394ldr x19,[x30],#8 // *K++, x28 in next round395//add x24,x24,x17 // h+=Sigma0(a)396#ifndef __AARCH64EB__397rev x15,x15 // 12398#endif399add x24,x24,x17 // h+=Sigma0(a)400str x7,[sp,#0]401ror x16,x20,#14402add x23,x23,x19 // h+=K[i]403eor x7,x20,x20,ror#23404and x17,x21,x20405bic x19,x22,x20406add x23,x23,x15 // h+=X[i]407orr x17,x17,x19 // Ch(e,f,g)408eor x19,x24,x25 // a^b, b^c in next round409eor x16,x16,x7,ror#18 // Sigma1(e)410ror x7,x24,#28411add x23,x23,x17 // h+=Ch(e,f,g)412eor x17,x24,x24,ror#5413add x23,x23,x16 // h+=Sigma1(e)414and x28,x28,x19 // (b^c)&=(a^b)415add x27,x27,x23 // d+=h416eor x28,x28,x25 // Maj(a,b,c)417eor x17,x7,x17,ror#34 // Sigma0(a)418add x23,x23,x28 // h+=Maj(a,b,c)419ldr x28,[x30],#8 // *K++, x19 in next round420//add x23,x23,x17 // h+=Sigma0(a)421#ifndef __AARCH64EB__422rev x0,x0 // 13423#endif424ldp x1,x2,[x1]425add x23,x23,x17 // h+=Sigma0(a)426str x8,[sp,#8]427ror x16,x27,#14428add x22,x22,x28 // h+=K[i]429eor x8,x27,x27,ror#23430and x17,x20,x27431bic x28,x21,x27432add x22,x22,x0 // h+=X[i]433orr x17,x17,x28 // Ch(e,f,g)434eor x28,x23,x24 // a^b, b^c in next round435eor x16,x16,x8,ror#18 // Sigma1(e)436ror x8,x23,#28437add x22,x22,x17 // h+=Ch(e,f,g)438eor x17,x23,x23,ror#5439add x22,x22,x16 // h+=Sigma1(e)440and x19,x19,x28 // (b^c)&=(a^b)441add x26,x26,x22 // d+=h442eor x19,x19,x24 // Maj(a,b,c)443eor x17,x8,x17,ror#34 // Sigma0(a)444add x22,x22,x19 // h+=Maj(a,b,c)445ldr x19,[x30],#8 // *K++, x28 in next round446//add x22,x22,x17 // h+=Sigma0(a)447#ifndef __AARCH64EB__448rev x1,x1 // 14449#endif450ldr x6,[sp,#24]451add x22,x22,x17 // h+=Sigma0(a)452str x9,[sp,#16]453ror x16,x26,#14454add x21,x21,x19 // h+=K[i]455eor x9,x26,x26,ror#23456and x17,x27,x26457bic x19,x20,x26458add x21,x21,x1 // h+=X[i]459orr x17,x17,x19 // Ch(e,f,g)460eor x19,x22,x23 // a^b, b^c in next round461eor x16,x16,x9,ror#18 // Sigma1(e)462ror x9,x22,#28463add x21,x21,x17 // h+=Ch(e,f,g)464eor x17,x22,x22,ror#5465add x21,x21,x16 // h+=Sigma1(e)466and x28,x28,x19 // (b^c)&=(a^b)467add x25,x25,x21 // d+=h468eor x28,x28,x23 // Maj(a,b,c)469eor x17,x9,x17,ror#34 // Sigma0(a)470add x21,x21,x28 // h+=Maj(a,b,c)471ldr x28,[x30],#8 // *K++, x19 in next round472//add x21,x21,x17 // h+=Sigma0(a)473#ifndef __AARCH64EB__474rev x2,x2 // 15475#endif476ldr x7,[sp,#0]477add x21,x21,x17 // h+=Sigma0(a)478str x10,[sp,#24]479ror x16,x25,#14480add x20,x20,x28 // h+=K[i]481ror x9,x4,#1482and x17,x26,x25483ror x8,x1,#19484bic x28,x27,x25485ror x10,x21,#28486add x20,x20,x2 // h+=X[i]487eor x16,x16,x25,ror#18488eor x9,x9,x4,ror#8489orr x17,x17,x28 // Ch(e,f,g)490eor x28,x21,x22 // a^b, b^c in next round491eor x16,x16,x25,ror#41 // Sigma1(e)492eor x10,x10,x21,ror#34493add x20,x20,x17 // h+=Ch(e,f,g)494and x19,x19,x28 // (b^c)&=(a^b)495eor x8,x8,x1,ror#61496eor x9,x9,x4,lsr#7 // sigma0(X[i+1])497add x20,x20,x16 // h+=Sigma1(e)498eor x19,x19,x22 // Maj(a,b,c)499eor x17,x10,x21,ror#39 // Sigma0(a)500eor x8,x8,x1,lsr#6 // sigma1(X[i+14])501add x3,x3,x12502add x24,x24,x20 // d+=h503add x20,x20,x19 // h+=Maj(a,b,c)504ldr x19,[x30],#8 // *K++, x28 in next round505add x3,x3,x9506add x20,x20,x17 // h+=Sigma0(a)507add x3,x3,x8508.Loop_16_xx:509ldr x8,[sp,#8]510str x11,[sp,#0]511ror x16,x24,#14512add x27,x27,x19 // h+=K[i]513ror x10,x5,#1514and x17,x25,x24515ror x9,x2,#19516bic x19,x26,x24517ror x11,x20,#28518add x27,x27,x3 // h+=X[i]519eor x16,x16,x24,ror#18520eor x10,x10,x5,ror#8521orr x17,x17,x19 // Ch(e,f,g)522eor x19,x20,x21 // a^b, b^c in next round523eor x16,x16,x24,ror#41 // Sigma1(e)524eor x11,x11,x20,ror#34525add x27,x27,x17 // h+=Ch(e,f,g)526and x28,x28,x19 // (b^c)&=(a^b)527eor x9,x9,x2,ror#61528eor x10,x10,x5,lsr#7 // sigma0(X[i+1])529add x27,x27,x16 // h+=Sigma1(e)530eor x28,x28,x21 // Maj(a,b,c)531eor x17,x11,x20,ror#39 // Sigma0(a)532eor x9,x9,x2,lsr#6 // sigma1(X[i+14])533add x4,x4,x13534add x23,x23,x27 // d+=h535add x27,x27,x28 // h+=Maj(a,b,c)536ldr x28,[x30],#8 // *K++, x19 in next round537add x4,x4,x10538add x27,x27,x17 // h+=Sigma0(a)539add x4,x4,x9540ldr x9,[sp,#16]541str x12,[sp,#8]542ror x16,x23,#14543add x26,x26,x28 // h+=K[i]544ror x11,x6,#1545and x17,x24,x23546ror x10,x3,#19547bic x28,x25,x23548ror x12,x27,#28549add x26,x26,x4 // h+=X[i]550eor x16,x16,x23,ror#18551eor x11,x11,x6,ror#8552orr x17,x17,x28 // Ch(e,f,g)553eor x28,x27,x20 // a^b, b^c in next round554eor x16,x16,x23,ror#41 // Sigma1(e)555eor x12,x12,x27,ror#34556add x26,x26,x17 // h+=Ch(e,f,g)557and x19,x19,x28 // (b^c)&=(a^b)558eor x10,x10,x3,ror#61559eor x11,x11,x6,lsr#7 // sigma0(X[i+1])560add x26,x26,x16 // h+=Sigma1(e)561eor x19,x19,x20 // Maj(a,b,c)562eor x17,x12,x27,ror#39 // Sigma0(a)563eor x10,x10,x3,lsr#6 // sigma1(X[i+14])564add x5,x5,x14565add x22,x22,x26 // d+=h566add x26,x26,x19 // h+=Maj(a,b,c)567ldr x19,[x30],#8 // *K++, x28 in next round568add x5,x5,x11569add x26,x26,x17 // h+=Sigma0(a)570add x5,x5,x10571ldr x10,[sp,#24]572str x13,[sp,#16]573ror x16,x22,#14574add x25,x25,x19 // h+=K[i]575ror x12,x7,#1576and x17,x23,x22577ror x11,x4,#19578bic x19,x24,x22579ror x13,x26,#28580add x25,x25,x5 // h+=X[i]581eor x16,x16,x22,ror#18582eor x12,x12,x7,ror#8583orr x17,x17,x19 // Ch(e,f,g)584eor x19,x26,x27 // a^b, b^c in next round585eor x16,x16,x22,ror#41 // Sigma1(e)586eor x13,x13,x26,ror#34587add x25,x25,x17 // h+=Ch(e,f,g)588and x28,x28,x19 // (b^c)&=(a^b)589eor x11,x11,x4,ror#61590eor x12,x12,x7,lsr#7 // sigma0(X[i+1])591add x25,x25,x16 // h+=Sigma1(e)592eor x28,x28,x27 // Maj(a,b,c)593eor x17,x13,x26,ror#39 // Sigma0(a)594eor x11,x11,x4,lsr#6 // sigma1(X[i+14])595add x6,x6,x15596add x21,x21,x25 // d+=h597add x25,x25,x28 // h+=Maj(a,b,c)598ldr x28,[x30],#8 // *K++, x19 in next round599add x6,x6,x12600add x25,x25,x17 // h+=Sigma0(a)601add x6,x6,x11602ldr x11,[sp,#0]603str x14,[sp,#24]604ror x16,x21,#14605add x24,x24,x28 // h+=K[i]606ror x13,x8,#1607and x17,x22,x21608ror x12,x5,#19609bic x28,x23,x21610ror x14,x25,#28611add x24,x24,x6 // h+=X[i]612eor x16,x16,x21,ror#18613eor x13,x13,x8,ror#8614orr x17,x17,x28 // Ch(e,f,g)615eor x28,x25,x26 // a^b, b^c in next round616eor x16,x16,x21,ror#41 // Sigma1(e)617eor x14,x14,x25,ror#34618add x24,x24,x17 // h+=Ch(e,f,g)619and x19,x19,x28 // (b^c)&=(a^b)620eor x12,x12,x5,ror#61621eor x13,x13,x8,lsr#7 // sigma0(X[i+1])622add x24,x24,x16 // h+=Sigma1(e)623eor x19,x19,x26 // Maj(a,b,c)624eor x17,x14,x25,ror#39 // Sigma0(a)625eor x12,x12,x5,lsr#6 // sigma1(X[i+14])626add x7,x7,x0627add x20,x20,x24 // d+=h628add x24,x24,x19 // h+=Maj(a,b,c)629ldr x19,[x30],#8 // *K++, x28 in next round630add x7,x7,x13631add x24,x24,x17 // h+=Sigma0(a)632add x7,x7,x12633ldr x12,[sp,#8]634str x15,[sp,#0]635ror x16,x20,#14636add x23,x23,x19 // h+=K[i]637ror x14,x9,#1638and x17,x21,x20639ror x13,x6,#19640bic x19,x22,x20641ror x15,x24,#28642add x23,x23,x7 // h+=X[i]643eor x16,x16,x20,ror#18644eor x14,x14,x9,ror#8645orr x17,x17,x19 // Ch(e,f,g)646eor x19,x24,x25 // a^b, b^c in next round647eor x16,x16,x20,ror#41 // Sigma1(e)648eor x15,x15,x24,ror#34649add x23,x23,x17 // h+=Ch(e,f,g)650and x28,x28,x19 // (b^c)&=(a^b)651eor x13,x13,x6,ror#61652eor x14,x14,x9,lsr#7 // sigma0(X[i+1])653add x23,x23,x16 // h+=Sigma1(e)654eor x28,x28,x25 // Maj(a,b,c)655eor x17,x15,x24,ror#39 // Sigma0(a)656eor x13,x13,x6,lsr#6 // sigma1(X[i+14])657add x8,x8,x1658add x27,x27,x23 // d+=h659add x23,x23,x28 // h+=Maj(a,b,c)660ldr x28,[x30],#8 // *K++, x19 in next round661add x8,x8,x14662add x23,x23,x17 // h+=Sigma0(a)663add x8,x8,x13664ldr x13,[sp,#16]665str x0,[sp,#8]666ror x16,x27,#14667add x22,x22,x28 // h+=K[i]668ror x15,x10,#1669and x17,x20,x27670ror x14,x7,#19671bic x28,x21,x27672ror x0,x23,#28673add x22,x22,x8 // h+=X[i]674eor x16,x16,x27,ror#18675eor x15,x15,x10,ror#8676orr x17,x17,x28 // Ch(e,f,g)677eor x28,x23,x24 // a^b, b^c in next round678eor x16,x16,x27,ror#41 // Sigma1(e)679eor x0,x0,x23,ror#34680add x22,x22,x17 // h+=Ch(e,f,g)681and x19,x19,x28 // (b^c)&=(a^b)682eor x14,x14,x7,ror#61683eor x15,x15,x10,lsr#7 // sigma0(X[i+1])684add x22,x22,x16 // h+=Sigma1(e)685eor x19,x19,x24 // Maj(a,b,c)686eor x17,x0,x23,ror#39 // Sigma0(a)687eor x14,x14,x7,lsr#6 // sigma1(X[i+14])688add x9,x9,x2689add x26,x26,x22 // d+=h690add x22,x22,x19 // h+=Maj(a,b,c)691ldr x19,[x30],#8 // *K++, x28 in next round692add x9,x9,x15693add x22,x22,x17 // h+=Sigma0(a)694add x9,x9,x14695ldr x14,[sp,#24]696str x1,[sp,#16]697ror x16,x26,#14698add x21,x21,x19 // h+=K[i]699ror x0,x11,#1700and x17,x27,x26701ror x15,x8,#19702bic x19,x20,x26703ror x1,x22,#28704add x21,x21,x9 // h+=X[i]705eor x16,x16,x26,ror#18706eor x0,x0,x11,ror#8707orr x17,x17,x19 // Ch(e,f,g)708eor x19,x22,x23 // a^b, b^c in next round709eor x16,x16,x26,ror#41 // Sigma1(e)710eor x1,x1,x22,ror#34711add x21,x21,x17 // h+=Ch(e,f,g)712and x28,x28,x19 // (b^c)&=(a^b)713eor x15,x15,x8,ror#61714eor x0,x0,x11,lsr#7 // sigma0(X[i+1])715add x21,x21,x16 // h+=Sigma1(e)716eor x28,x28,x23 // Maj(a,b,c)717eor x17,x1,x22,ror#39 // Sigma0(a)718eor x15,x15,x8,lsr#6 // sigma1(X[i+14])719add x10,x10,x3720add x25,x25,x21 // d+=h721add x21,x21,x28 // h+=Maj(a,b,c)722ldr x28,[x30],#8 // *K++, x19 in next round723add x10,x10,x0724add x21,x21,x17 // h+=Sigma0(a)725add x10,x10,x15726ldr x15,[sp,#0]727str x2,[sp,#24]728ror x16,x25,#14729add x20,x20,x28 // h+=K[i]730ror x1,x12,#1731and x17,x26,x25732ror x0,x9,#19733bic x28,x27,x25734ror x2,x21,#28735add x20,x20,x10 // h+=X[i]736eor x16,x16,x25,ror#18737eor x1,x1,x12,ror#8738orr x17,x17,x28 // Ch(e,f,g)739eor x28,x21,x22 // a^b, b^c in next round740eor x16,x16,x25,ror#41 // Sigma1(e)741eor x2,x2,x21,ror#34742add x20,x20,x17 // h+=Ch(e,f,g)743and x19,x19,x28 // (b^c)&=(a^b)744eor x0,x0,x9,ror#61745eor x1,x1,x12,lsr#7 // sigma0(X[i+1])746add x20,x20,x16 // h+=Sigma1(e)747eor x19,x19,x22 // Maj(a,b,c)748eor x17,x2,x21,ror#39 // Sigma0(a)749eor x0,x0,x9,lsr#6 // sigma1(X[i+14])750add x11,x11,x4751add x24,x24,x20 // d+=h752add x20,x20,x19 // h+=Maj(a,b,c)753ldr x19,[x30],#8 // *K++, x28 in next round754add x11,x11,x1755add x20,x20,x17 // h+=Sigma0(a)756add x11,x11,x0757ldr x0,[sp,#8]758str x3,[sp,#0]759ror x16,x24,#14760add x27,x27,x19 // h+=K[i]761ror x2,x13,#1762and x17,x25,x24763ror x1,x10,#19764bic x19,x26,x24765ror x3,x20,#28766add x27,x27,x11 // h+=X[i]767eor x16,x16,x24,ror#18768eor x2,x2,x13,ror#8769orr x17,x17,x19 // Ch(e,f,g)770eor x19,x20,x21 // a^b, b^c in next round771eor x16,x16,x24,ror#41 // Sigma1(e)772eor x3,x3,x20,ror#34773add x27,x27,x17 // h+=Ch(e,f,g)774and x28,x28,x19 // (b^c)&=(a^b)775eor x1,x1,x10,ror#61776eor x2,x2,x13,lsr#7 // sigma0(X[i+1])777add x27,x27,x16 // h+=Sigma1(e)778eor x28,x28,x21 // Maj(a,b,c)779eor x17,x3,x20,ror#39 // Sigma0(a)780eor x1,x1,x10,lsr#6 // sigma1(X[i+14])781add x12,x12,x5782add x23,x23,x27 // d+=h783add x27,x27,x28 // h+=Maj(a,b,c)784ldr x28,[x30],#8 // *K++, x19 in next round785add x12,x12,x2786add x27,x27,x17 // h+=Sigma0(a)787add x12,x12,x1788ldr x1,[sp,#16]789str x4,[sp,#8]790ror x16,x23,#14791add x26,x26,x28 // h+=K[i]792ror x3,x14,#1793and x17,x24,x23794ror x2,x11,#19795bic x28,x25,x23796ror x4,x27,#28797add x26,x26,x12 // h+=X[i]798eor x16,x16,x23,ror#18799eor x3,x3,x14,ror#8800orr x17,x17,x28 // Ch(e,f,g)801eor x28,x27,x20 // a^b, b^c in next round802eor x16,x16,x23,ror#41 // Sigma1(e)803eor x4,x4,x27,ror#34804add x26,x26,x17 // h+=Ch(e,f,g)805and x19,x19,x28 // (b^c)&=(a^b)806eor x2,x2,x11,ror#61807eor x3,x3,x14,lsr#7 // sigma0(X[i+1])808add x26,x26,x16 // h+=Sigma1(e)809eor x19,x19,x20 // Maj(a,b,c)810eor x17,x4,x27,ror#39 // Sigma0(a)811eor x2,x2,x11,lsr#6 // sigma1(X[i+14])812add x13,x13,x6813add x22,x22,x26 // d+=h814add x26,x26,x19 // h+=Maj(a,b,c)815ldr x19,[x30],#8 // *K++, x28 in next round816add x13,x13,x3817add x26,x26,x17 // h+=Sigma0(a)818add x13,x13,x2819ldr x2,[sp,#24]820str x5,[sp,#16]821ror x16,x22,#14822add x25,x25,x19 // h+=K[i]823ror x4,x15,#1824and x17,x23,x22825ror x3,x12,#19826bic x19,x24,x22827ror x5,x26,#28828add x25,x25,x13 // h+=X[i]829eor x16,x16,x22,ror#18830eor x4,x4,x15,ror#8831orr x17,x17,x19 // Ch(e,f,g)832eor x19,x26,x27 // a^b, b^c in next round833eor x16,x16,x22,ror#41 // Sigma1(e)834eor x5,x5,x26,ror#34835add x25,x25,x17 // h+=Ch(e,f,g)836and x28,x28,x19 // (b^c)&=(a^b)837eor x3,x3,x12,ror#61838eor x4,x4,x15,lsr#7 // sigma0(X[i+1])839add x25,x25,x16 // h+=Sigma1(e)840eor x28,x28,x27 // Maj(a,b,c)841eor x17,x5,x26,ror#39 // Sigma0(a)842eor x3,x3,x12,lsr#6 // sigma1(X[i+14])843add x14,x14,x7844add x21,x21,x25 // d+=h845add x25,x25,x28 // h+=Maj(a,b,c)846ldr x28,[x30],#8 // *K++, x19 in next round847add x14,x14,x4848add x25,x25,x17 // h+=Sigma0(a)849add x14,x14,x3850ldr x3,[sp,#0]851str x6,[sp,#24]852ror x16,x21,#14853add x24,x24,x28 // h+=K[i]854ror x5,x0,#1855and x17,x22,x21856ror x4,x13,#19857bic x28,x23,x21858ror x6,x25,#28859add x24,x24,x14 // h+=X[i]860eor x16,x16,x21,ror#18861eor x5,x5,x0,ror#8862orr x17,x17,x28 // Ch(e,f,g)863eor x28,x25,x26 // a^b, b^c in next round864eor x16,x16,x21,ror#41 // Sigma1(e)865eor x6,x6,x25,ror#34866add x24,x24,x17 // h+=Ch(e,f,g)867and x19,x19,x28 // (b^c)&=(a^b)868eor x4,x4,x13,ror#61869eor x5,x5,x0,lsr#7 // sigma0(X[i+1])870add x24,x24,x16 // h+=Sigma1(e)871eor x19,x19,x26 // Maj(a,b,c)872eor x17,x6,x25,ror#39 // Sigma0(a)873eor x4,x4,x13,lsr#6 // sigma1(X[i+14])874add x15,x15,x8875add x20,x20,x24 // d+=h876add x24,x24,x19 // h+=Maj(a,b,c)877ldr x19,[x30],#8 // *K++, x28 in next round878add x15,x15,x5879add x24,x24,x17 // h+=Sigma0(a)880add x15,x15,x4881ldr x4,[sp,#8]882str x7,[sp,#0]883ror x16,x20,#14884add x23,x23,x19 // h+=K[i]885ror x6,x1,#1886and x17,x21,x20887ror x5,x14,#19888bic x19,x22,x20889ror x7,x24,#28890add x23,x23,x15 // h+=X[i]891eor x16,x16,x20,ror#18892eor x6,x6,x1,ror#8893orr x17,x17,x19 // Ch(e,f,g)894eor x19,x24,x25 // a^b, b^c in next round895eor x16,x16,x20,ror#41 // Sigma1(e)896eor x7,x7,x24,ror#34897add x23,x23,x17 // h+=Ch(e,f,g)898and x28,x28,x19 // (b^c)&=(a^b)899eor x5,x5,x14,ror#61900eor x6,x6,x1,lsr#7 // sigma0(X[i+1])901add x23,x23,x16 // h+=Sigma1(e)902eor x28,x28,x25 // Maj(a,b,c)903eor x17,x7,x24,ror#39 // Sigma0(a)904eor x5,x5,x14,lsr#6 // sigma1(X[i+14])905add x0,x0,x9906add x27,x27,x23 // d+=h907add x23,x23,x28 // h+=Maj(a,b,c)908ldr x28,[x30],#8 // *K++, x19 in next round909add x0,x0,x6910add x23,x23,x17 // h+=Sigma0(a)911add x0,x0,x5912ldr x5,[sp,#16]913str x8,[sp,#8]914ror x16,x27,#14915add x22,x22,x28 // h+=K[i]916ror x7,x2,#1917and x17,x20,x27918ror x6,x15,#19919bic x28,x21,x27920ror x8,x23,#28921add x22,x22,x0 // h+=X[i]922eor x16,x16,x27,ror#18923eor x7,x7,x2,ror#8924orr x17,x17,x28 // Ch(e,f,g)925eor x28,x23,x24 // a^b, b^c in next round926eor x16,x16,x27,ror#41 // Sigma1(e)927eor x8,x8,x23,ror#34928add x22,x22,x17 // h+=Ch(e,f,g)929and x19,x19,x28 // (b^c)&=(a^b)930eor x6,x6,x15,ror#61931eor x7,x7,x2,lsr#7 // sigma0(X[i+1])932add x22,x22,x16 // h+=Sigma1(e)933eor x19,x19,x24 // Maj(a,b,c)934eor x17,x8,x23,ror#39 // Sigma0(a)935eor x6,x6,x15,lsr#6 // sigma1(X[i+14])936add x1,x1,x10937add x26,x26,x22 // d+=h938add x22,x22,x19 // h+=Maj(a,b,c)939ldr x19,[x30],#8 // *K++, x28 in next round940add x1,x1,x7941add x22,x22,x17 // h+=Sigma0(a)942add x1,x1,x6943ldr x6,[sp,#24]944str x9,[sp,#16]945ror x16,x26,#14946add x21,x21,x19 // h+=K[i]947ror x8,x3,#1948and x17,x27,x26949ror x7,x0,#19950bic x19,x20,x26951ror x9,x22,#28952add x21,x21,x1 // h+=X[i]953eor x16,x16,x26,ror#18954eor x8,x8,x3,ror#8955orr x17,x17,x19 // Ch(e,f,g)956eor x19,x22,x23 // a^b, b^c in next round957eor x16,x16,x26,ror#41 // Sigma1(e)958eor x9,x9,x22,ror#34959add x21,x21,x17 // h+=Ch(e,f,g)960and x28,x28,x19 // (b^c)&=(a^b)961eor x7,x7,x0,ror#61962eor x8,x8,x3,lsr#7 // sigma0(X[i+1])963add x21,x21,x16 // h+=Sigma1(e)964eor x28,x28,x23 // Maj(a,b,c)965eor x17,x9,x22,ror#39 // Sigma0(a)966eor x7,x7,x0,lsr#6 // sigma1(X[i+14])967add x2,x2,x11968add x25,x25,x21 // d+=h969add x21,x21,x28 // h+=Maj(a,b,c)970ldr x28,[x30],#8 // *K++, x19 in next round971add x2,x2,x8972add x21,x21,x17 // h+=Sigma0(a)973add x2,x2,x7974ldr x7,[sp,#0]975str x10,[sp,#24]976ror x16,x25,#14977add x20,x20,x28 // h+=K[i]978ror x9,x4,#1979and x17,x26,x25980ror x8,x1,#19981bic x28,x27,x25982ror x10,x21,#28983add x20,x20,x2 // h+=X[i]984eor x16,x16,x25,ror#18985eor x9,x9,x4,ror#8986orr x17,x17,x28 // Ch(e,f,g)987eor x28,x21,x22 // a^b, b^c in next round988eor x16,x16,x25,ror#41 // Sigma1(e)989eor x10,x10,x21,ror#34990add x20,x20,x17 // h+=Ch(e,f,g)991and x19,x19,x28 // (b^c)&=(a^b)992eor x8,x8,x1,ror#61993eor x9,x9,x4,lsr#7 // sigma0(X[i+1])994add x20,x20,x16 // h+=Sigma1(e)995eor x19,x19,x22 // Maj(a,b,c)996eor x17,x10,x21,ror#39 // Sigma0(a)997eor x8,x8,x1,lsr#6 // sigma1(X[i+14])998add x3,x3,x12999add x24,x24,x20 // d+=h1000add x20,x20,x19 // h+=Maj(a,b,c)1001ldr x19,[x30],#8 // *K++, x28 in next round1002add x3,x3,x91003add x20,x20,x17 // h+=Sigma0(a)1004add x3,x3,x81005cbnz x19,.Loop_16_xx10061007ldp x0,x2,[x29,#96]1008ldr x1,[x29,#112]1009sub x30,x30,#648 // rewind10101011ldp x3,x4,[x0]1012ldp x5,x6,[x0,#2*8]1013add x1,x1,#14*8 // advance input pointer1014ldp x7,x8,[x0,#4*8]1015add x20,x20,x31016ldp x9,x10,[x0,#6*8]1017add x21,x21,x41018add x22,x22,x51019add x23,x23,x61020stp x20,x21,[x0]1021add x24,x24,x71022add x25,x25,x81023stp x22,x23,[x0,#2*8]1024add x26,x26,x91025add x27,x27,x101026cmp x1,x21027stp x24,x25,[x0,#4*8]1028stp x26,x27,[x0,#6*8]1029b.ne .Loop10301031ldp x19,x20,[x29,#16]1032add sp,sp,#4*81033ldp x21,x22,[x29,#32]1034ldp x23,x24,[x29,#48]1035ldp x25,x26,[x29,#64]1036ldp x27,x28,[x29,#80]1037ldp x29,x30,[sp],#1281038AARCH64_VALIDATE_LINK_REGISTER1039ret1040.size sha512_block_data_order,.-sha512_block_data_order10411042.section .rodata10431044.align 61045.type .LK512,%object1046.LK512:1047.quad 0x428a2f98d728ae22,0x7137449123ef65cd1048.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc1049.quad 0x3956c25bf348b538,0x59f111f1b605d0191050.quad 0x923f82a4af194f9b,0xab1c5ed5da6d81181051.quad 0xd807aa98a3030242,0x12835b0145706fbe1052.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e21053.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b11054.quad 0x9bdc06a725c71235,0xc19bf174cf6926941055.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e31056.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c651057.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e4831058.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b51059.quad 0x983e5152ee66dfab,0xa831c66d2db432101060.quad 0xb00327c898fb213f,0xbf597fc7beef0ee41061.quad 0xc6e00bf33da88fc2,0xd5a79147930aa7251062.quad 0x06ca6351e003826f,0x142929670a0e6e701063.quad 0x27b70a8546d22ffc,0x2e1b21385c26c9261064.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df1065.quad 0x650a73548baf63de,0x766a0abb3c77b2a81066.quad 0x81c2c92e47edaee6,0x92722c851482353b1067.quad 0xa2bfe8a14cf10364,0xa81a664bbc4230011068.quad 0xc24b8b70d0f89791,0xc76c51a30654be301069.quad 0xd192e819d6ef5218,0xd69906245565a9101070.quad 0xf40e35855771202a,0x106aa07032bbd1b81071.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab531072.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a81073.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb1074.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a31075.quad 0x748f82ee5defb2fc,0x78a5636f43172f601076.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec1077.quad 0x90befffa23631e28,0xa4506cebde82bde91078.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b1079.quad 0xca273eceea26619c,0xd186b8c721c0c2071080.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed1781081.quad 0x06f067aa72176fba,0x0a637dc5a2c898a61082.quad 0x113f9804bef90dae,0x1b710b35131c471b1083.quad 0x28db77f523047d84,0x32caab7b40c724931084.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c1085.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a1086.quad 0x5fcb6fab3ad6faec,0x6c44198c4a4758171087.quad 0 // terminator1088.size .LK512,.-.LK5121089.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,01090.align 21091.align 210921093.text1094#ifndef __KERNEL__1095.type sha512_block_armv8,%function1096.align 61097sha512_block_armv8:1098.Lv8_entry:1099// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later1100stp x29,x30,[sp,#-16]!1101add x29,sp,#011021103ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input1104ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#6411051106ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context1107adrp x3,.LK5121108add x3,x3,#:lo12:.LK51211091110rev64 v16.16b,v16.16b1111rev64 v17.16b,v17.16b1112rev64 v18.16b,v18.16b1113rev64 v19.16b,v19.16b1114rev64 v20.16b,v20.16b1115rev64 v21.16b,v21.16b1116rev64 v22.16b,v22.16b1117rev64 v23.16b,v23.16b1118b .Loop_hw11191120.align 41121.Loop_hw:1122ld1 {v24.2d},[x3],#161123subs x2,x2,#11124sub x4,x1,#1281125orr v26.16b,v0.16b,v0.16b // offload1126orr v27.16b,v1.16b,v1.16b1127orr v28.16b,v2.16b,v2.16b1128orr v29.16b,v3.16b,v3.16b1129csel x1,x1,x4,ne // conditional rewind1130add v24.2d,v24.2d,v16.2d1131ld1 {v25.2d},[x3],#161132ext v24.16b,v24.16b,v24.16b,#81133ext v5.16b,v2.16b,v3.16b,#81134ext v6.16b,v1.16b,v2.16b,#81135add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"1136.inst 0xcec08230 //sha512su0 v16.16b,v17.16b1137ext v7.16b,v20.16b,v21.16b,#81138.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1139.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b1140add v4.2d,v1.2d,v3.2d // "D + T1"1141.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1142add v25.2d,v25.2d,v17.2d1143ld1 {v24.2d},[x3],#161144ext v25.16b,v25.16b,v25.16b,#81145ext v5.16b,v4.16b,v2.16b,#81146ext v6.16b,v0.16b,v4.16b,#81147add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"1148.inst 0xcec08251 //sha512su0 v17.16b,v18.16b1149ext v7.16b,v21.16b,v22.16b,#81150.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1151.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b1152add v1.2d,v0.2d,v2.2d // "D + T1"1153.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1154add v24.2d,v24.2d,v18.2d1155ld1 {v25.2d},[x3],#161156ext v24.16b,v24.16b,v24.16b,#81157ext v5.16b,v1.16b,v4.16b,#81158ext v6.16b,v3.16b,v1.16b,#81159add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"1160.inst 0xcec08272 //sha512su0 v18.16b,v19.16b1161ext v7.16b,v22.16b,v23.16b,#81162.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1163.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b1164add v0.2d,v3.2d,v4.2d // "D + T1"1165.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1166add v25.2d,v25.2d,v19.2d1167ld1 {v24.2d},[x3],#161168ext v25.16b,v25.16b,v25.16b,#81169ext v5.16b,v0.16b,v1.16b,#81170ext v6.16b,v2.16b,v0.16b,#81171add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"1172.inst 0xcec08293 //sha512su0 v19.16b,v20.16b1173ext v7.16b,v23.16b,v16.16b,#81174.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1175.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b1176add v3.2d,v2.2d,v1.2d // "D + T1"1177.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1178add v24.2d,v24.2d,v20.2d1179ld1 {v25.2d},[x3],#161180ext v24.16b,v24.16b,v24.16b,#81181ext v5.16b,v3.16b,v0.16b,#81182ext v6.16b,v4.16b,v3.16b,#81183add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"1184.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b1185ext v7.16b,v16.16b,v17.16b,#81186.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1187.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b1188add v2.2d,v4.2d,v0.2d // "D + T1"1189.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1190add v25.2d,v25.2d,v21.2d1191ld1 {v24.2d},[x3],#161192ext v25.16b,v25.16b,v25.16b,#81193ext v5.16b,v2.16b,v3.16b,#81194ext v6.16b,v1.16b,v2.16b,#81195add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"1196.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b1197ext v7.16b,v17.16b,v18.16b,#81198.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1199.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b1200add v4.2d,v1.2d,v3.2d // "D + T1"1201.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1202add v24.2d,v24.2d,v22.2d1203ld1 {v25.2d},[x3],#161204ext v24.16b,v24.16b,v24.16b,#81205ext v5.16b,v4.16b,v2.16b,#81206ext v6.16b,v0.16b,v4.16b,#81207add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"1208.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b1209ext v7.16b,v18.16b,v19.16b,#81210.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1211.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b1212add v1.2d,v0.2d,v2.2d // "D + T1"1213.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1214add v25.2d,v25.2d,v23.2d1215ld1 {v24.2d},[x3],#161216ext v25.16b,v25.16b,v25.16b,#81217ext v5.16b,v1.16b,v4.16b,#81218ext v6.16b,v3.16b,v1.16b,#81219add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"1220.inst 0xcec08217 //sha512su0 v23.16b,v16.16b1221ext v7.16b,v19.16b,v20.16b,#81222.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1223.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b1224add v0.2d,v3.2d,v4.2d // "D + T1"1225.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1226add v24.2d,v24.2d,v16.2d1227ld1 {v25.2d},[x3],#161228ext v24.16b,v24.16b,v24.16b,#81229ext v5.16b,v0.16b,v1.16b,#81230ext v6.16b,v2.16b,v0.16b,#81231add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"1232.inst 0xcec08230 //sha512su0 v16.16b,v17.16b1233ext v7.16b,v20.16b,v21.16b,#81234.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1235.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b1236add v3.2d,v2.2d,v1.2d // "D + T1"1237.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1238add v25.2d,v25.2d,v17.2d1239ld1 {v24.2d},[x3],#161240ext v25.16b,v25.16b,v25.16b,#81241ext v5.16b,v3.16b,v0.16b,#81242ext v6.16b,v4.16b,v3.16b,#81243add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"1244.inst 0xcec08251 //sha512su0 v17.16b,v18.16b1245ext v7.16b,v21.16b,v22.16b,#81246.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1247.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b1248add v2.2d,v4.2d,v0.2d // "D + T1"1249.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1250add v24.2d,v24.2d,v18.2d1251ld1 {v25.2d},[x3],#161252ext v24.16b,v24.16b,v24.16b,#81253ext v5.16b,v2.16b,v3.16b,#81254ext v6.16b,v1.16b,v2.16b,#81255add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"1256.inst 0xcec08272 //sha512su0 v18.16b,v19.16b1257ext v7.16b,v22.16b,v23.16b,#81258.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1259.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b1260add v4.2d,v1.2d,v3.2d // "D + T1"1261.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1262add v25.2d,v25.2d,v19.2d1263ld1 {v24.2d},[x3],#161264ext v25.16b,v25.16b,v25.16b,#81265ext v5.16b,v4.16b,v2.16b,#81266ext v6.16b,v0.16b,v4.16b,#81267add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"1268.inst 0xcec08293 //sha512su0 v19.16b,v20.16b1269ext v7.16b,v23.16b,v16.16b,#81270.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1271.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b1272add v1.2d,v0.2d,v2.2d // "D + T1"1273.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1274add v24.2d,v24.2d,v20.2d1275ld1 {v25.2d},[x3],#161276ext v24.16b,v24.16b,v24.16b,#81277ext v5.16b,v1.16b,v4.16b,#81278ext v6.16b,v3.16b,v1.16b,#81279add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"1280.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b1281ext v7.16b,v16.16b,v17.16b,#81282.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1283.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b1284add v0.2d,v3.2d,v4.2d // "D + T1"1285.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1286add v25.2d,v25.2d,v21.2d1287ld1 {v24.2d},[x3],#161288ext v25.16b,v25.16b,v25.16b,#81289ext v5.16b,v0.16b,v1.16b,#81290ext v6.16b,v2.16b,v0.16b,#81291add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"1292.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b1293ext v7.16b,v17.16b,v18.16b,#81294.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1295.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b1296add v3.2d,v2.2d,v1.2d // "D + T1"1297.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1298add v24.2d,v24.2d,v22.2d1299ld1 {v25.2d},[x3],#161300ext v24.16b,v24.16b,v24.16b,#81301ext v5.16b,v3.16b,v0.16b,#81302ext v6.16b,v4.16b,v3.16b,#81303add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"1304.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b1305ext v7.16b,v18.16b,v19.16b,#81306.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1307.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b1308add v2.2d,v4.2d,v0.2d // "D + T1"1309.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1310add v25.2d,v25.2d,v23.2d1311ld1 {v24.2d},[x3],#161312ext v25.16b,v25.16b,v25.16b,#81313ext v5.16b,v2.16b,v3.16b,#81314ext v6.16b,v1.16b,v2.16b,#81315add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"1316.inst 0xcec08217 //sha512su0 v23.16b,v16.16b1317ext v7.16b,v19.16b,v20.16b,#81318.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1319.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b1320add v4.2d,v1.2d,v3.2d // "D + T1"1321.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1322add v24.2d,v24.2d,v16.2d1323ld1 {v25.2d},[x3],#161324ext v24.16b,v24.16b,v24.16b,#81325ext v5.16b,v4.16b,v2.16b,#81326ext v6.16b,v0.16b,v4.16b,#81327add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"1328.inst 0xcec08230 //sha512su0 v16.16b,v17.16b1329ext v7.16b,v20.16b,v21.16b,#81330.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1331.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b1332add v1.2d,v0.2d,v2.2d // "D + T1"1333.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1334add v25.2d,v25.2d,v17.2d1335ld1 {v24.2d},[x3],#161336ext v25.16b,v25.16b,v25.16b,#81337ext v5.16b,v1.16b,v4.16b,#81338ext v6.16b,v3.16b,v1.16b,#81339add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"1340.inst 0xcec08251 //sha512su0 v17.16b,v18.16b1341ext v7.16b,v21.16b,v22.16b,#81342.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1343.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b1344add v0.2d,v3.2d,v4.2d // "D + T1"1345.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1346add v24.2d,v24.2d,v18.2d1347ld1 {v25.2d},[x3],#161348ext v24.16b,v24.16b,v24.16b,#81349ext v5.16b,v0.16b,v1.16b,#81350ext v6.16b,v2.16b,v0.16b,#81351add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"1352.inst 0xcec08272 //sha512su0 v18.16b,v19.16b1353ext v7.16b,v22.16b,v23.16b,#81354.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1355.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b1356add v3.2d,v2.2d,v1.2d // "D + T1"1357.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1358add v25.2d,v25.2d,v19.2d1359ld1 {v24.2d},[x3],#161360ext v25.16b,v25.16b,v25.16b,#81361ext v5.16b,v3.16b,v0.16b,#81362ext v6.16b,v4.16b,v3.16b,#81363add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"1364.inst 0xcec08293 //sha512su0 v19.16b,v20.16b1365ext v7.16b,v23.16b,v16.16b,#81366.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1367.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b1368add v2.2d,v4.2d,v0.2d // "D + T1"1369.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1370add v24.2d,v24.2d,v20.2d1371ld1 {v25.2d},[x3],#161372ext v24.16b,v24.16b,v24.16b,#81373ext v5.16b,v2.16b,v3.16b,#81374ext v6.16b,v1.16b,v2.16b,#81375add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"1376.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b1377ext v7.16b,v16.16b,v17.16b,#81378.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1379.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b1380add v4.2d,v1.2d,v3.2d // "D + T1"1381.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1382add v25.2d,v25.2d,v21.2d1383ld1 {v24.2d},[x3],#161384ext v25.16b,v25.16b,v25.16b,#81385ext v5.16b,v4.16b,v2.16b,#81386ext v6.16b,v0.16b,v4.16b,#81387add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"1388.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b1389ext v7.16b,v17.16b,v18.16b,#81390.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1391.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b1392add v1.2d,v0.2d,v2.2d // "D + T1"1393.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1394add v24.2d,v24.2d,v22.2d1395ld1 {v25.2d},[x3],#161396ext v24.16b,v24.16b,v24.16b,#81397ext v5.16b,v1.16b,v4.16b,#81398ext v6.16b,v3.16b,v1.16b,#81399add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"1400.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b1401ext v7.16b,v18.16b,v19.16b,#81402.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1403.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b1404add v0.2d,v3.2d,v4.2d // "D + T1"1405.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1406add v25.2d,v25.2d,v23.2d1407ld1 {v24.2d},[x3],#161408ext v25.16b,v25.16b,v25.16b,#81409ext v5.16b,v0.16b,v1.16b,#81410ext v6.16b,v2.16b,v0.16b,#81411add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"1412.inst 0xcec08217 //sha512su0 v23.16b,v16.16b1413ext v7.16b,v19.16b,v20.16b,#81414.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1415.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b1416add v3.2d,v2.2d,v1.2d // "D + T1"1417.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1418add v24.2d,v24.2d,v16.2d1419ld1 {v25.2d},[x3],#161420ext v24.16b,v24.16b,v24.16b,#81421ext v5.16b,v3.16b,v0.16b,#81422ext v6.16b,v4.16b,v3.16b,#81423add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"1424.inst 0xcec08230 //sha512su0 v16.16b,v17.16b1425ext v7.16b,v20.16b,v21.16b,#81426.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1427.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b1428add v2.2d,v4.2d,v0.2d // "D + T1"1429.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1430add v25.2d,v25.2d,v17.2d1431ld1 {v24.2d},[x3],#161432ext v25.16b,v25.16b,v25.16b,#81433ext v5.16b,v2.16b,v3.16b,#81434ext v6.16b,v1.16b,v2.16b,#81435add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"1436.inst 0xcec08251 //sha512su0 v17.16b,v18.16b1437ext v7.16b,v21.16b,v22.16b,#81438.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1439.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b1440add v4.2d,v1.2d,v3.2d // "D + T1"1441.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1442add v24.2d,v24.2d,v18.2d1443ld1 {v25.2d},[x3],#161444ext v24.16b,v24.16b,v24.16b,#81445ext v5.16b,v4.16b,v2.16b,#81446ext v6.16b,v0.16b,v4.16b,#81447add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"1448.inst 0xcec08272 //sha512su0 v18.16b,v19.16b1449ext v7.16b,v22.16b,v23.16b,#81450.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1451.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b1452add v1.2d,v0.2d,v2.2d // "D + T1"1453.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1454add v25.2d,v25.2d,v19.2d1455ld1 {v24.2d},[x3],#161456ext v25.16b,v25.16b,v25.16b,#81457ext v5.16b,v1.16b,v4.16b,#81458ext v6.16b,v3.16b,v1.16b,#81459add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"1460.inst 0xcec08293 //sha512su0 v19.16b,v20.16b1461ext v7.16b,v23.16b,v16.16b,#81462.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1463.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b1464add v0.2d,v3.2d,v4.2d // "D + T1"1465.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1466add v24.2d,v24.2d,v20.2d1467ld1 {v25.2d},[x3],#161468ext v24.16b,v24.16b,v24.16b,#81469ext v5.16b,v0.16b,v1.16b,#81470ext v6.16b,v2.16b,v0.16b,#81471add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"1472.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b1473ext v7.16b,v16.16b,v17.16b,#81474.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1475.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b1476add v3.2d,v2.2d,v1.2d // "D + T1"1477.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1478add v25.2d,v25.2d,v21.2d1479ld1 {v24.2d},[x3],#161480ext v25.16b,v25.16b,v25.16b,#81481ext v5.16b,v3.16b,v0.16b,#81482ext v6.16b,v4.16b,v3.16b,#81483add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"1484.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b1485ext v7.16b,v17.16b,v18.16b,#81486.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1487.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b1488add v2.2d,v4.2d,v0.2d // "D + T1"1489.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1490add v24.2d,v24.2d,v22.2d1491ld1 {v25.2d},[x3],#161492ext v24.16b,v24.16b,v24.16b,#81493ext v5.16b,v2.16b,v3.16b,#81494ext v6.16b,v1.16b,v2.16b,#81495add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"1496.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b1497ext v7.16b,v18.16b,v19.16b,#81498.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1499.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b1500add v4.2d,v1.2d,v3.2d // "D + T1"1501.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1502add v25.2d,v25.2d,v23.2d1503ld1 {v24.2d},[x3],#161504ext v25.16b,v25.16b,v25.16b,#81505ext v5.16b,v4.16b,v2.16b,#81506ext v6.16b,v0.16b,v4.16b,#81507add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"1508.inst 0xcec08217 //sha512su0 v23.16b,v16.16b1509ext v7.16b,v19.16b,v20.16b,#81510.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1511.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b1512add v1.2d,v0.2d,v2.2d // "D + T1"1513.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1514ld1 {v25.2d},[x3],#161515add v24.2d,v24.2d,v16.2d1516ld1 {v16.16b},[x1],#16 // load next input1517ext v24.16b,v24.16b,v24.16b,#81518ext v5.16b,v1.16b,v4.16b,#81519ext v6.16b,v3.16b,v1.16b,#81520add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"1521.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1522rev64 v16.16b,v16.16b1523add v0.2d,v3.2d,v4.2d // "D + T1"1524.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1525ld1 {v24.2d},[x3],#161526add v25.2d,v25.2d,v17.2d1527ld1 {v17.16b},[x1],#16 // load next input1528ext v25.16b,v25.16b,v25.16b,#81529ext v5.16b,v0.16b,v1.16b,#81530ext v6.16b,v2.16b,v0.16b,#81531add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"1532.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1533rev64 v17.16b,v17.16b1534add v3.2d,v2.2d,v1.2d // "D + T1"1535.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1536ld1 {v25.2d},[x3],#161537add v24.2d,v24.2d,v18.2d1538ld1 {v18.16b},[x1],#16 // load next input1539ext v24.16b,v24.16b,v24.16b,#81540ext v5.16b,v3.16b,v0.16b,#81541ext v6.16b,v4.16b,v3.16b,#81542add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"1543.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1544rev64 v18.16b,v18.16b1545add v2.2d,v4.2d,v0.2d // "D + T1"1546.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1547ld1 {v24.2d},[x3],#161548add v25.2d,v25.2d,v19.2d1549ld1 {v19.16b},[x1],#16 // load next input1550ext v25.16b,v25.16b,v25.16b,#81551ext v5.16b,v2.16b,v3.16b,#81552ext v6.16b,v1.16b,v2.16b,#81553add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"1554.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b1555rev64 v19.16b,v19.16b1556add v4.2d,v1.2d,v3.2d // "D + T1"1557.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b1558ld1 {v25.2d},[x3],#161559add v24.2d,v24.2d,v20.2d1560ld1 {v20.16b},[x1],#16 // load next input1561ext v24.16b,v24.16b,v24.16b,#81562ext v5.16b,v4.16b,v2.16b,#81563ext v6.16b,v0.16b,v4.16b,#81564add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"1565.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b1566rev64 v20.16b,v20.16b1567add v1.2d,v0.2d,v2.2d // "D + T1"1568.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b1569ld1 {v24.2d},[x3],#161570add v25.2d,v25.2d,v21.2d1571ld1 {v21.16b},[x1],#16 // load next input1572ext v25.16b,v25.16b,v25.16b,#81573ext v5.16b,v1.16b,v4.16b,#81574ext v6.16b,v3.16b,v1.16b,#81575add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"1576.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b1577rev64 v21.16b,v21.16b1578add v0.2d,v3.2d,v4.2d // "D + T1"1579.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b1580ld1 {v25.2d},[x3],#161581add v24.2d,v24.2d,v22.2d1582ld1 {v22.16b},[x1],#16 // load next input1583ext v24.16b,v24.16b,v24.16b,#81584ext v5.16b,v0.16b,v1.16b,#81585ext v6.16b,v2.16b,v0.16b,#81586add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"1587.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b1588rev64 v22.16b,v22.16b1589add v3.2d,v2.2d,v1.2d // "D + T1"1590.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b1591sub x3,x3,#80*8 // rewind1592add v25.2d,v25.2d,v23.2d1593ld1 {v23.16b},[x1],#16 // load next input1594ext v25.16b,v25.16b,v25.16b,#81595ext v5.16b,v3.16b,v0.16b,#81596ext v6.16b,v4.16b,v3.16b,#81597add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"1598.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b1599rev64 v23.16b,v23.16b1600add v2.2d,v4.2d,v0.2d // "D + T1"1601.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b1602add v0.2d,v0.2d,v26.2d // accumulate1603add v1.2d,v1.2d,v27.2d1604add v2.2d,v2.2d,v28.2d1605add v3.2d,v3.2d,v29.2d16061607cbnz x2,.Loop_hw16081609st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context16101611ldr x29,[sp],#161612ret1613.size sha512_block_armv8,.-sha512_block_armv81614#endif161516161617