Path: blob/main/sys/crypto/openssl/aarch64/ecp_sm2p256-armv8.S
39507 views
/* Do not modify. This file is auto-generated from ecp_sm2p256-armv8.pl. */1#include "arm_arch.h"2.arch armv8-a3.section .rodata45.align 56// The polynomial p7.Lpoly:8.quad 0xffffffffffffffff,0xffffffff00000000,0xffffffffffffffff,0xfffffffeffffffff9// The order of polynomial n10.Lord:11.quad 0x53bbf40939d54123,0x7203df6b21c6052b,0xffffffffffffffff,0xfffffffeffffffff12// (p + 1) / 213.Lpoly_div_2:14.quad 0x8000000000000000,0xffffffff80000000,0xffffffffffffffff,0x7fffffff7fffffff15// (n + 1) / 216.Lord_div_2:17.quad 0xa9ddfa049ceaa092,0xb901efb590e30295,0xffffffffffffffff,0x7fffffff7fffffff1819.text2021// void bn_rshift1(BN_ULONG *a);22.globl bn_rshift123.type bn_rshift1,%function24.align 525bn_rshift1:26AARCH64_VALID_CALL_TARGET27// Load inputs28ldp x7,x8,[x0]29ldp x9,x10,[x0,#16]3031// Right shift32extr x7,x8,x7,#133extr x8,x9,x8,#134extr x9,x10,x9,#135lsr x10,x10,#13637// Store results38stp x7,x8,[x0]39stp x9,x10,[x0,#16]4041ret42.size bn_rshift1,.-bn_rshift14344// void bn_sub(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);45.globl bn_sub46.type bn_sub,%function47.align 548bn_sub:49AARCH64_VALID_CALL_TARGET50// Load inputs51ldp x7,x8,[x1]52ldp x9,x10,[x1,#16]53ldp x11,x12,[x2]54ldp x13,x14,[x2,#16]5556// Subtraction57subs x7,x7,x1158sbcs x8,x8,x1259sbcs x9,x9,x1360sbc x10,x10,x146162// Store results63stp x7,x8,[x0]64stp x9,x10,[x0,#16]6566ret67.size bn_sub,.-bn_sub6869// void ecp_sm2p256_div_by_2(BN_ULONG *r,const BN_ULONG *a);70.globl ecp_sm2p256_div_by_271.type ecp_sm2p256_div_by_2,%function72.align 573ecp_sm2p256_div_by_2:74AARCH64_VALID_CALL_TARGET75// Load inputs76ldp x7,x8,[x1]77ldp x9,x10,[x1,#16]7879// Save the least significant bit80mov x3,x78182// Right shift 183extr x7,x8,x7,#184extr x8,x9,x8,#185extr x9,x10,x9,#186lsr x10,x10,#18788// Load mod89adrp x2,.Lpoly_div_290add x2,x2,#:lo12:.Lpoly_div_291ldp x11,x12,[x2]92ldp x13,x14,[x2,#16]9394// Parity check95tst x3,#196csel x11,xzr,x11,eq97csel x12,xzr,x12,eq98csel x13,xzr,x13,eq99csel x14,xzr,x14,eq100101// Add102adds x7,x7,x11103adcs x8,x8,x12104adcs x9,x9,x13105adc x10,x10,x14106107// Store results108stp x7,x8,[x0]109stp x9,x10,[x0,#16]110ret111.size ecp_sm2p256_div_by_2,.-ecp_sm2p256_div_by_2112113// void ecp_sm2p256_div_by_2_mod_ord(BN_ULONG *r,const BN_ULONG *a);114.globl ecp_sm2p256_div_by_2_mod_ord115.type ecp_sm2p256_div_by_2_mod_ord,%function116.align 5117ecp_sm2p256_div_by_2_mod_ord:118AARCH64_VALID_CALL_TARGET119// Load inputs120ldp x7,x8,[x1]121ldp x9,x10,[x1,#16]122123// Save the least significant bit124mov x3,x7125126// Right shift 1127extr x7,x8,x7,#1128extr x8,x9,x8,#1129extr x9,x10,x9,#1130lsr x10,x10,#1131132// Load mod133adrp x2,.Lord_div_2134add x2,x2,#:lo12:.Lord_div_2135ldp x11,x12,[x2]136ldp x13,x14,[x2,#16]137138// Parity check139tst x3,#1140csel x11,xzr,x11,eq141csel x12,xzr,x12,eq142csel x13,xzr,x13,eq143csel x14,xzr,x14,eq144145// Add146adds x7,x7,x11147adcs x8,x8,x12148adcs x9,x9,x13149adc x10,x10,x14150151// Store results152stp x7,x8,[x0]153stp x9,x10,[x0,#16]154ret155.size ecp_sm2p256_div_by_2_mod_ord,.-ecp_sm2p256_div_by_2_mod_ord156157// void ecp_sm2p256_mul_by_3(BN_ULONG *r,const BN_ULONG *a);158.globl ecp_sm2p256_mul_by_3159.type ecp_sm2p256_mul_by_3,%function160.align 5161ecp_sm2p256_mul_by_3:162AARCH64_VALID_CALL_TARGET163// Load inputs164ldp x7,x8,[x1]165ldp x9,x10,[x1,#16]166167// 2*a168adds x7,x7,x7169adcs x8,x8,x8170adcs x9,x9,x9171adcs x10,x10,x10172adcs x15,xzr,xzr173174mov x3,x7175mov x4,x8176mov x5,x9177mov x6,x10178179// Sub polynomial180adrp x2,.Lpoly181add x2,x2,#:lo12:.Lpoly182ldp x11,x12,[x2]183ldp x13,x14,[x2,#16]184subs x7,x7,x11185sbcs x8,x8,x12186sbcs x9,x9,x13187sbcs x10,x10,x14188sbcs x15,x15,xzr189190csel x7,x7,x3,cs191csel x8,x8,x4,cs192csel x9,x9,x5,cs193csel x10,x10,x6,cs194eor x15,x15,x15195196// 3*a197ldp x11,x12,[x1]198ldp x13,x14,[x1,#16]199adds x7,x7,x11200adcs x8,x8,x12201adcs x9,x9,x13202adcs x10,x10,x14203adcs x15,xzr,xzr204205mov x3,x7206mov x4,x8207mov x5,x9208mov x6,x10209210// Sub polynomial211adrp x2,.Lpoly212add x2,x2,#:lo12:.Lpoly213ldp x11,x12,[x2]214ldp x13,x14,[x2,#16]215subs x7,x7,x11216sbcs x8,x8,x12217sbcs x9,x9,x13218sbcs x10,x10,x14219sbcs x15,x15,xzr220221csel x7,x7,x3,cs222csel x8,x8,x4,cs223csel x9,x9,x5,cs224csel x10,x10,x6,cs225226// Store results227stp x7,x8,[x0]228stp x9,x10,[x0,#16]229230ret231.size ecp_sm2p256_mul_by_3,.-ecp_sm2p256_mul_by_3232233// void ecp_sm2p256_add(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b);234.globl ecp_sm2p256_add235.type ecp_sm2p256_add,%function236.align 5237ecp_sm2p256_add:238AARCH64_VALID_CALL_TARGET239// Load inputs240ldp x7,x8,[x1]241ldp x9,x10,[x1,#16]242ldp x11,x12,[x2]243ldp x13,x14,[x2,#16]244245// Addition246adds x7,x7,x11247adcs x8,x8,x12248adcs x9,x9,x13249adcs x10,x10,x14250adc x15,xzr,xzr251252// Load polynomial253adrp x2,.Lpoly254add x2,x2,#:lo12:.Lpoly255ldp x11,x12,[x2]256ldp x13,x14,[x2,#16]257258// Backup Addition259mov x3,x7260mov x4,x8261mov x5,x9262mov x6,x10263264// Sub polynomial265subs x3,x3,x11266sbcs x4,x4,x12267sbcs x5,x5,x13268sbcs x6,x6,x14269sbcs x15,x15,xzr270271// Select based on carry272csel x7,x7,x3,cc273csel x8,x8,x4,cc274csel x9,x9,x5,cc275csel x10,x10,x6,cc276277// Store results278stp x7,x8,[x0]279stp x9,x10,[x0,#16]280ret281.size ecp_sm2p256_add,.-ecp_sm2p256_add282283// void ecp_sm2p256_sub(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b);284.globl ecp_sm2p256_sub285.type ecp_sm2p256_sub,%function286.align 5287ecp_sm2p256_sub:288AARCH64_VALID_CALL_TARGET289// Load inputs290ldp x7,x8,[x1]291ldp x9,x10,[x1,#16]292ldp x11,x12,[x2]293ldp x13,x14,[x2,#16]294295// Subtraction296subs x7,x7,x11297sbcs x8,x8,x12298sbcs x9,x9,x13299sbcs x10,x10,x14300sbc x15,xzr,xzr301302// Load polynomial303adrp x2,.Lpoly304add x2,x2,#:lo12:.Lpoly305ldp x11,x12,[x2]306ldp x13,x14,[x2,#16]307308// Backup subtraction309mov x3,x7310mov x4,x8311mov x5,x9312mov x6,x10313314// Add polynomial315adds x3,x3,x11316adcs x4,x4,x12317adcs x5,x5,x13318adcs x6,x6,x14319tst x15,x15320321// Select based on carry322csel x7,x7,x3,eq323csel x8,x8,x4,eq324csel x9,x9,x5,eq325csel x10,x10,x6,eq326327// Store results328stp x7,x8,[x0]329stp x9,x10,[x0,#16]330ret331.size ecp_sm2p256_sub,.-ecp_sm2p256_sub332333// void ecp_sm2p256_sub_mod_ord(BN_ULONG *r,const BN_ULONG *a,const BN_ULONG *b);334.globl ecp_sm2p256_sub_mod_ord335.type ecp_sm2p256_sub_mod_ord,%function336.align 5337ecp_sm2p256_sub_mod_ord:338AARCH64_VALID_CALL_TARGET339// Load inputs340ldp x7,x8,[x1]341ldp x9,x10,[x1,#16]342ldp x11,x12,[x2]343ldp x13,x14,[x2,#16]344345// Subtraction346subs x7,x7,x11347sbcs x8,x8,x12348sbcs x9,x9,x13349sbcs x10,x10,x14350sbc x15,xzr,xzr351352// Load polynomial353adrp x2,.Lord354add x2,x2,#:lo12:.Lord355ldp x11,x12,[x2]356ldp x13,x14,[x2,#16]357358// Backup subtraction359mov x3,x7360mov x4,x8361mov x5,x9362mov x6,x10363364// Add polynomial365adds x3,x3,x11366adcs x4,x4,x12367adcs x5,x5,x13368adcs x6,x6,x14369tst x15,x15370371// Select based on carry372csel x7,x7,x3,eq373csel x8,x8,x4,eq374csel x9,x9,x5,eq375csel x10,x10,x6,eq376377// Store results378stp x7,x8,[x0]379stp x9,x10,[x0,#16]380ret381.size ecp_sm2p256_sub_mod_ord,.-ecp_sm2p256_sub_mod_ord382383.macro RDC384// a = | s7 | ... | s0 |, where si are 64-bit quantities385// = |a15|a14| ... |a1|a0|, where ai are 32-bit quantities386// | s7 | s6 | s5 | s4 |387// | a15 | a14 | a13 | a12 | a11 | a10 | a9 | a8 |388// | s3 | s2 | s1 | s0 |389// | a7 | a6 | a5 | a4 | a3 | a2 | a1 | a0 |390// =================================================391// | a8 | a11 | a10 | a9 | a8 | 0 | s4 | (+)392// | a9 | a15 | s6 | a11 | 0 | a10 | a9 | (+)393// | a10 | 0 | a14 | a13 | a12 | 0 | s5 | (+)394// | a11 | 0 | s7 | a13 | 0 | a12 | a11 | (+)395// | a12 | 0 | s7 | a13 | 0 | s6 | (+)396// | a12 | 0 | 0 | a15 | a14 | 0 | a14 | a13 | (+)397// | a13 | 0 | 0 | 0 | a15 | 0 | a14 | a13 | (+)398// | a13 | 0 | 0 | 0 | 0 | 0 | s7 | (+)399// | a14 | 0 | 0 | 0 | 0 | 0 | s7 | (+)400// | a14 | 0 | 0 | 0 | 0 | 0 | 0 | a15 | (+)401// | a15 | 0 | 0 | 0 | 0 | 0 | 0 | a15 | (+)402// | a15 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | (+)403// | s7 | 0 | 0 | 0 | 0 | 0 | 0 | (+)404// | 0 | 0 | 0 | 0 | 0 | a8 | 0 | 0 | (-)405// | 0 | 0 | 0 | 0 | 0 | a9 | 0 | 0 | (-)406// | 0 | 0 | 0 | 0 | 0 | a13 | 0 | 0 | (-)407// | 0 | 0 | 0 | 0 | 0 | a14 | 0 | 0 | (-)408// | U[7]| U[6]| U[5]| U[4]| U[3]| U[2]| U[1]| U[0]|409// | V[3] | V[2] | V[1] | V[0] |410411// 1. 64-bit addition412// t2=s6+s7+s7413adds x5,x13,x14414adcs x4,xzr,xzr415adds x5,x5,x14416adcs x4,x4,xzr417// t3=s4+s5+t2418adds x6,x11,x5419adcs x15,x4,xzr420adds x6,x6,x12421adcs x15,x15,xzr422// sum423adds x7,x7,x6424adcs x8,x8,x15425adcs x9,x9,x5426adcs x10,x10,x14427adcs x3,xzr,xzr428adds x10,x10,x4429adcs x3,x3,xzr430431stp x7,x8,[sp,#32]432stp x9,x10,[sp,#48]433434// 2. 64-bit to 32-bit spread435mov x4,#0xffffffff436mov x7,x11437mov x8,x12438mov x9,x13439mov x10,x14440and x7,x7,x4 // a8441and x8,x8,x4 // a10442and x9,x9,x4 // a12443and x10,x10,x4 // a14444lsr x11,x11,#32 // a9445lsr x12,x12,#32 // a11446lsr x13,x13,#32 // a13447lsr x14,x14,#32 // a15448449// 3. 32-bit addition450add x4,x10,x9 // t1 <- a12 + a14451add x5,x14,x13 // t2 <- a13 + a15452add x6,x7,x11 // t3 <- a8 + a9453add x15,x10,x8 // t4 <- a10 + a14454add x14,x14,x12 // a15 <- a11 + a15455add x9,x5,x4 // a12 <- a12 + a13 + a14 + a15456add x8,x8,x9 // a10 <- a10 + a12 + a13 + a14 + a15457add x8,x8,x9 // a10 <- a10 + 2*(a12 + a13 + a14 + a15)458add x8,x8,x6 // a10 <- a8 + a9 + a10 + 2*(a12 + a13 + a14 + a15)459add x8,x8,x12 // a10 <- a8 + a9 + a10 + a11 + 2*(a12 + a13 + a14 + a15)460add x9,x9,x13 // a12 <- a12 + 2*a13 + a14 + a15461add x9,x9,x12 // a12 <- a11 + a12 + 2*a13 + a14 + a15462add x9,x9,x7 // a12 <- a8 + a11 + a12 + 2*a13 + a14 + a15463add x6,x6,x10 // t3 <- a8 + a9 + a14464add x6,x6,x13 // t3 <- a8 + a9 + a13 + a14465add x11,x11,x5 // a9 <- a9 + a13 + a15466add x12,x12,x11 // a11 <- a9 + a11 + a13 + a15467add x12,x12,x5 // a11 <- a9 + a11 + 2*(a13 + a15)468add x4,x4,x15 // t1 <- a10 + a12 + 2*a14469470// U[0] s5 a9 + a11 + 2*(a13 + a15)471// U[1] t1 a10 + a12 + 2*a14472// U[2] -t3 a8 + a9 + a13 + a14473// U[3] s2 a8 + a11 + a12 + 2*a13 + a14 + a15474// U[4] s4 a9 + a13 + a15475// U[5] t4 a10 + a14476// U[6] s7 a11 + a15477// U[7] s1 a8 + a9 + a10 + a11 + 2*(a12 + a13 + a14 + a15)478479// 4. 32-bit to 64-bit480lsl x7,x4,#32481extr x4,x9,x4,#32482extr x9,x15,x9,#32483extr x15,x8,x15,#32484lsr x8,x8,#32485486// 5. 64-bit addition487adds x12,x12,x7488adcs x4,x4,xzr489adcs x11,x11,x9490adcs x14,x14,x15491adcs x3,x3,x8492493// V[0] s5494// V[1] t1495// V[2] s4496// V[3] s7497// carry t0498// sub t3499500// 5. Process s0-s3501ldp x7,x8,[sp,#32]502ldp x9,x10,[sp,#48]503// add with V0-V3504adds x7,x7,x12505adcs x8,x8,x4506adcs x9,x9,x11507adcs x10,x10,x14508adcs x3,x3,xzr509// sub with t3510subs x8,x8,x6511sbcs x9,x9,xzr512sbcs x10,x10,xzr513sbcs x3,x3,xzr514515// 6. MOD516// First Mod517lsl x4,x3,#32518subs x5,x4,x3519520adds x7,x7,x3521adcs x8,x8,x5522adcs x9,x9,xzr523adcs x10,x10,x4524525// Last Mod526// return y - p if y > p else y527mov x11,x7528mov x12,x8529mov x13,x9530mov x14,x10531532adrp x3,.Lpoly533add x3,x3,#:lo12:.Lpoly534ldp x4,x5,[x3]535ldp x6,x15,[x3,#16]536537adcs x16,xzr,xzr538539subs x7,x7,x4540sbcs x8,x8,x5541sbcs x9,x9,x6542sbcs x10,x10,x15543sbcs x16,x16,xzr544545csel x7,x7,x11,cs546csel x8,x8,x12,cs547csel x9,x9,x13,cs548csel x10,x10,x14,cs549550.endm551552// void ecp_sm2p256_mul(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b);553.globl ecp_sm2p256_mul554.type ecp_sm2p256_mul,%function555.align 5556ecp_sm2p256_mul:557AARCH64_SIGN_LINK_REGISTER558// Store scalar registers559stp x29,x30,[sp,#-80]!560add x29,sp,#0561stp x16,x17,[sp,#16]562stp x19,x20,[sp,#64]563564// Load inputs565ldp x7,x8,[x1]566ldp x9,x10,[x1,#16]567ldp x11,x12,[x2]568ldp x13,x14,[x2,#16]569570// ### multiplication ###571// ========================572// s3 s2 s1 s0573// * s7 s6 s5 s4574// ------------------------575// + s0 s0 s0 s0576// * * * *577// s7 s6 s5 s4578// s1 s1 s1 s1579// * * * *580// s7 s6 s5 s4581// s2 s2 s2 s2582// * * * *583// s7 s6 s5 s4584// s3 s3 s3 s3585// * * * *586// s7 s6 s5 s4587// ------------------------588// s7 s6 s5 s4 s3 s2 s1 s0589// ========================590591// ### s0*s4 ###592mul x16,x7,x11593umulh x5,x7,x11594595// ### s1*s4 + s0*s5 ###596mul x3,x8,x11597umulh x4,x8,x11598adds x5,x5,x3599adcs x6,x4,xzr600601mul x3,x7,x12602umulh x4,x7,x12603adds x5,x5,x3604adcs x6,x6,x4605adcs x15,xzr,xzr606607// ### s2*s4 + s1*s5 + s0*s6 ###608mul x3,x9,x11609umulh x4,x9,x11610adds x6,x6,x3611adcs x15,x15,x4612613mul x3,x8,x12614umulh x4,x8,x12615adds x6,x6,x3616adcs x15,x15,x4617adcs x17,xzr,xzr618619mul x3,x7,x13620umulh x4,x7,x13621adds x6,x6,x3622adcs x15,x15,x4623adcs x17,x17,xzr624625// ### s3*s4 + s2*s5 + s1*s6 + s0*s7 ###626mul x3,x10,x11627umulh x4,x10,x11628adds x15,x15,x3629adcs x17,x17,x4630adcs x19,xzr,xzr631632mul x3,x9,x12633umulh x4,x9,x12634adds x15,x15,x3635adcs x17,x17,x4636adcs x19,x19,xzr637638mul x3,x8,x13639umulh x4,x8,x13640adds x15,x15,x3641adcs x17,x17,x4642adcs x19,x19,xzr643644mul x3,x7,x14645umulh x4,x7,x14646adds x15,x15,x3647adcs x17,x17,x4648adcs x19,x19,xzr649650// ### s3*s5 + s2*s6 + s1*s7 ###651mul x3,x10,x12652umulh x4,x10,x12653adds x17,x17,x3654adcs x19,x19,x4655adcs x20,xzr,xzr656657mul x3,x9,x13658umulh x4,x9,x13659adds x17,x17,x3660adcs x19,x19,x4661adcs x20,x20,xzr662663mul x3,x8,x14664umulh x4,x8,x14665adds x11,x17,x3666adcs x19,x19,x4667adcs x20,x20,xzr668669// ### s3*s6 + s2*s7 ###670mul x3,x10,x13671umulh x4,x10,x13672adds x19,x19,x3673adcs x20,x20,x4674adcs x17,xzr,xzr675676mul x3,x9,x14677umulh x4,x9,x14678adds x12,x19,x3679adcs x20,x20,x4680adcs x17,x17,xzr681682// ### s3*s7 ###683mul x3,x10,x14684umulh x4,x10,x14685adds x13,x20,x3686adcs x14,x17,x4687688mov x7,x16689mov x8,x5690mov x9,x6691mov x10,x15692693// result of mul: s7 s6 s5 s4 s3 s2 s1 s0694695// ### Reduction ###696RDC697698stp x7,x8,[x0]699stp x9,x10,[x0,#16]700701// Restore scalar registers702ldp x16,x17,[sp,#16]703ldp x19,x20,[sp,#64]704ldp x29,x30,[sp],#80705706AARCH64_VALIDATE_LINK_REGISTER707ret708.size ecp_sm2p256_mul,.-ecp_sm2p256_mul709710// void ecp_sm2p256_sqr(BN_ULONG *r, const BN_ULONG *a);711.globl ecp_sm2p256_sqr712.type ecp_sm2p256_sqr,%function713.align 5714715ecp_sm2p256_sqr:716AARCH64_SIGN_LINK_REGISTER717// Store scalar registers718stp x29,x30,[sp,#-80]!719add x29,sp,#0720stp x16,x17,[sp,#16]721stp x19,x20,[sp,#64]722723// Load inputs724ldp x11,x12,[x1]725ldp x13,x14,[x1,#16]726727// ### square ###728// ========================729// s7 s6 s5 s4730// * s7 s6 s5 s4731// ------------------------732// + s4 s4 s4 s4733// * * * *734// s7 s6 s5 s4735// s5 s5 s5 s5736// * * * *737// s7 s6 s5 s4738// s6 s6 s6 s6739// * * * *740// s7 s6 s5 s4741// s7 s7 s7 s7742// * * * *743// s7 s6 s5 s4744// ------------------------745// s7 s6 s5 s4 s3 s2 s1 s0746// ========================747748// ### s4*s5 ###749mul x8,x11,x12750umulh x9,x11,x12751752// ### s4*s6 ###753mul x3,x13,x11754umulh x10,x13,x11755adds x9,x9,x3756adcs x10,x10,xzr757758// ### s4*s7 + s5*s6 ###759mul x3,x14,x11760umulh x4,x14,x11761adds x10,x10,x3762adcs x7,x4,xzr763764mul x3,x13,x12765umulh x4,x13,x12766adds x10,x10,x3767adcs x7,x7,x4768adcs x5,xzr,xzr769770// ### s5*s7 ###771mul x3,x14,x12772umulh x4,x14,x12773adds x7,x7,x3774adcs x5,x5,x4775776// ### s6*s7 ###777mul x3,x14,x13778umulh x4,x14,x13779adds x5,x5,x3780adcs x6,x4,xzr781782// ### 2*(t3,t2,s0,s3,s2,s1) ###783adds x8,x8,x8784adcs x9,x9,x9785adcs x10,x10,x10786adcs x7,x7,x7787adcs x5,x5,x5788adcs x6,x6,x6789adcs x15,xzr,xzr790791// ### s4*s4 ###792mul x16,x11,x11793umulh x17,x11,x11794795// ### s5*s5 ###796mul x11,x12,x12797umulh x12,x12,x12798799// ### s6*s6 ###800mul x3,x13,x13801umulh x4,x13,x13802803// ### s7*s7 ###804mul x19,x14,x14805umulh x20,x14,x14806807adds x8,x8,x17808adcs x9,x9,x11809adcs x10,x10,x12810adcs x7,x7,x3811adcs x5,x5,x4812adcs x6,x6,x19813adcs x15,x15,x20814815mov x11,x7816mov x7,x16817mov x12,x5818mov x13,x6819mov x14,x15820821// result of mul: s7 s6 s5 s4 s3 s2 s1 s0822823// ### Reduction ###824RDC825826stp x7,x8,[x0]827stp x9,x10,[x0,#16]828829// Restore scalar registers830ldp x16,x17,[sp,#16]831ldp x19,x20,[sp,#64]832ldp x29,x30,[sp],#80833834AARCH64_VALIDATE_LINK_REGISTER835ret836.size ecp_sm2p256_sqr,.-ecp_sm2p256_sqr837838839