Path: blob/main/sys/crypto/openssl/aarch64/chacha-armv8-sve.S
39536 views
/* Do not modify. This file is auto-generated from chacha-armv8-sve.pl. */1// Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved.2//3// Licensed under the Apache License 2.0 (the "License"). You may not use4// this file except in compliance with the License. You can obtain a copy5// in the file LICENSE in the source distribution or at6// https://www.openssl.org/source/license.html7//8//9// ChaCha20 for ARMv8 via SVE10//11// $output is the last argument if it looks like a file (it has an extension)12// $flavour is the first argument if it doesn't look like a file13#include "arm_arch.h"1415.arch armv8-a161718.hidden OPENSSL_armcap_P1920.text2122.section .rodata23.align 524.type _chacha_sve_consts,%object25_chacha_sve_consts:26.Lchacha20_consts:27.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral28.Lrot8:29.word 0x02010003,0x04040404,0x02010003,0x0404040430.size _chacha_sve_consts,.-_chacha_sve_consts3132.previous3334.globl ChaCha20_ctr32_sve35.type ChaCha20_ctr32_sve,%function36.align 537ChaCha20_ctr32_sve:38AARCH64_VALID_CALL_TARGET39.inst 0x04a0e3e5 //cntw x5, ALL, MUL #140cmp x2,x5,lsl #641b.lt .Lreturn42mov x7,043adrp x6,OPENSSL_armcap_P44ldr w6,[x6,#:lo12:OPENSSL_armcap_P]45tst w6,#ARMV8_SVE246b.eq 1f47mov x7,148b 2f491:50cmp x5,451b.le .Lreturn52adrp x6,.Lrot853add x6,x6,#:lo12:.Lrot854ldp w9,w10,[x6]55.inst 0x04aa4d3f //index z31.s,w9,w10562:57AARCH64_SIGN_LINK_REGISTER58stp d8,d9,[sp,-192]!59stp d10,d11,[sp,16]60stp d12,d13,[sp,32]61stp d14,d15,[sp,48]62stp x16,x17,[sp,64]63stp x18,x19,[sp,80]64stp x20,x21,[sp,96]65stp x22,x23,[sp,112]66stp x24,x25,[sp,128]67stp x26,x27,[sp,144]68stp x28,x29,[sp,160]69str x30,[sp,176]7071adrp x6,.Lchacha20_consts72add x6,x6,#:lo12:.Lchacha20_consts73ldp x23,x24,[x6]74ldp x25,x26,[x3]75ldp x27,x28,[x3, 16]76ldp x29,x30,[x4]77.inst 0x2599e3e0 //ptrues p0.s,ALL78#ifdef __AARCH64EB__79ror x25,x25,#3280ror x26,x26,#3281ror x27,x27,#3282ror x28,x28,#3283ror x29,x29,#3284ror x30,x30,#3285#endif86cbz x7, 1f87.align 588100:89subs x7,x2,x5,lsl #690b.lt 110f91mov x2,x792b.eq 101f93cmp x2,6494b.lt 101f95mixin=196lsr x8,x23,#3297.inst 0x05a03ae0 //dup z0.s,w2398.inst 0x05a03af9 //dup z25.s,w2399.if mixin == 1100mov w7,w23101.endif102.inst 0x05a03904 //dup z4.s,w8103.inst 0x05a0391a //dup z26.s,w8104lsr x10,x24,#32105.inst 0x05a03b08 //dup z8.s,w24106.inst 0x05a03b1b //dup z27.s,w24107.if mixin == 1108mov w9,w24109.endif110.inst 0x05a0394c //dup z12.s,w10111.inst 0x05a0395c //dup z28.s,w10112lsr x12,x25,#32113.inst 0x05a03b21 //dup z1.s,w25114.inst 0x05a03b3d //dup z29.s,w25115.if mixin == 1116mov w11,w25117.endif118.inst 0x05a03985 //dup z5.s,w12119.inst 0x05a0399e //dup z30.s,w12120lsr x14,x26,#32121.inst 0x05a03b49 //dup z9.s,w26122.inst 0x05a03b55 //dup z21.s,w26123.if mixin == 1124mov w13,w26125.endif126.inst 0x05a039cd //dup z13.s,w14127.inst 0x05a039d6 //dup z22.s,w14128lsr x16,x27,#32129.inst 0x05a03b62 //dup z2.s,w27130.inst 0x05a03b77 //dup z23.s,w27131.if mixin == 1132mov w15,w27133.endif134.inst 0x05a03a06 //dup z6.s,w16135.inst 0x05a03a18 //dup z24.s,w16136lsr x18,x28,#32137.inst 0x05a03b8a //dup z10.s,w28138.inst 0x05a03b91 //dup z17.s,w28139.if mixin == 1140mov w17,w28141.endif142.inst 0x05a03a4e //dup z14.s,w18143.inst 0x05a03a52 //dup z18.s,w18144lsr x22,x30,#32145.inst 0x05a03bcb //dup z11.s,w30146.inst 0x05a03bd4 //dup z20.s,w30147.if mixin == 1148mov w21,w30149.endif150.inst 0x05a03acf //dup z15.s,w22151.inst 0x05a03adf //dup z31.s,w22152.if mixin == 1153add w20,w29,#1154mov w19,w29155.inst 0x04a14690 //index z16.s,w20,1156.inst 0x04a14683 //index z3.s,w20,1157.else158.inst 0x04a147b0 //index z16.s,w29,1159.inst 0x04a147a3 //index z3.s,w29,1160.endif161lsr x20,x29,#32162.inst 0x05a03a87 //dup z7.s,w20163.inst 0x05a03a93 //dup z19.s,w20164mov x6,#1016510:166.align 5167.inst 0x04a10000 //add z0.s,z0.s,z1.s168.if mixin == 1169add w7,w7,w11170.endif171.inst 0x04a50084 //add z4.s,z4.s,z5.s172.if mixin == 1173add w8,w8,w12174.endif175.inst 0x04a90108 //add z8.s,z8.s,z9.s176.if mixin == 1177add w9,w9,w13178.endif179.inst 0x04ad018c //add z12.s,z12.s,z13.s180.if mixin == 1181add w10,w10,w14182.endif183.if mixin == 1184eor w19,w19,w7185.endif186.inst 0x04703403 //xar z3.s,z3.s,z0.s,16187.if mixin == 1188ror w19,w19,16189.endif190.if mixin == 1191eor w20,w20,w8192.endif193.inst 0x04703487 //xar z7.s,z7.s,z4.s,16194.if mixin == 1195ror w20,w20,16196.endif197.if mixin == 1198eor w21,w21,w9199.endif200.inst 0x0470350b //xar z11.s,z11.s,z8.s,16201.if mixin == 1202ror w21,w21,16203.endif204.if mixin == 1205eor w22,w22,w10206.endif207.inst 0x0470358f //xar z15.s,z15.s,z12.s,16208.if mixin == 1209ror w22,w22,16210.endif211.inst 0x04a30042 //add z2.s,z2.s,z3.s212.if mixin == 1213add w15,w15,w19214.endif215.inst 0x04a700c6 //add z6.s,z6.s,z7.s216.if mixin == 1217add w16,w16,w20218.endif219.inst 0x04ab014a //add z10.s,z10.s,z11.s220.if mixin == 1221add w17,w17,w21222.endif223.inst 0x04af01ce //add z14.s,z14.s,z15.s224.if mixin == 1225add w18,w18,w22226.endif227.if mixin == 1228eor w11,w11,w15229.endif230.inst 0x046c3441 //xar z1.s,z1.s,z2.s,20231.if mixin == 1232ror w11,w11,20233.endif234.if mixin == 1235eor w12,w12,w16236.endif237.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,20238.if mixin == 1239ror w12,w12,20240.endif241.if mixin == 1242eor w13,w13,w17243.endif244.inst 0x046c3549 //xar z9.s,z9.s,z10.s,20245.if mixin == 1246ror w13,w13,20247.endif248.if mixin == 1249eor w14,w14,w18250.endif251.inst 0x046c35cd //xar z13.s,z13.s,z14.s,20252.if mixin == 1253ror w14,w14,20254.endif255.inst 0x04a10000 //add z0.s,z0.s,z1.s256.if mixin == 1257add w7,w7,w11258.endif259.inst 0x04a50084 //add z4.s,z4.s,z5.s260.if mixin == 1261add w8,w8,w12262.endif263.inst 0x04a90108 //add z8.s,z8.s,z9.s264.if mixin == 1265add w9,w9,w13266.endif267.inst 0x04ad018c //add z12.s,z12.s,z13.s268.if mixin == 1269add w10,w10,w14270.endif271.if mixin == 1272eor w19,w19,w7273.endif274.inst 0x04683403 //xar z3.s,z3.s,z0.s,24275.if mixin == 1276ror w19,w19,24277.endif278.if mixin == 1279eor w20,w20,w8280.endif281.inst 0x04683487 //xar z7.s,z7.s,z4.s,24282.if mixin == 1283ror w20,w20,24284.endif285.if mixin == 1286eor w21,w21,w9287.endif288.inst 0x0468350b //xar z11.s,z11.s,z8.s,24289.if mixin == 1290ror w21,w21,24291.endif292.if mixin == 1293eor w22,w22,w10294.endif295.inst 0x0468358f //xar z15.s,z15.s,z12.s,24296.if mixin == 1297ror w22,w22,24298.endif299.inst 0x04a30042 //add z2.s,z2.s,z3.s300.if mixin == 1301add w15,w15,w19302.endif303.inst 0x04a700c6 //add z6.s,z6.s,z7.s304.if mixin == 1305add w16,w16,w20306.endif307.inst 0x04ab014a //add z10.s,z10.s,z11.s308.if mixin == 1309add w17,w17,w21310.endif311.inst 0x04af01ce //add z14.s,z14.s,z15.s312.if mixin == 1313add w18,w18,w22314.endif315.if mixin == 1316eor w11,w11,w15317.endif318.inst 0x04673441 //xar z1.s,z1.s,z2.s,25319.if mixin == 1320ror w11,w11,25321.endif322.if mixin == 1323eor w12,w12,w16324.endif325.inst 0x046734c5 //xar z5.s,z5.s,z6.s,25326.if mixin == 1327ror w12,w12,25328.endif329.if mixin == 1330eor w13,w13,w17331.endif332.inst 0x04673549 //xar z9.s,z9.s,z10.s,25333.if mixin == 1334ror w13,w13,25335.endif336.if mixin == 1337eor w14,w14,w18338.endif339.inst 0x046735cd //xar z13.s,z13.s,z14.s,25340.if mixin == 1341ror w14,w14,25342.endif343.inst 0x04a50000 //add z0.s,z0.s,z5.s344.if mixin == 1345add w7,w7,w12346.endif347.inst 0x04a90084 //add z4.s,z4.s,z9.s348.if mixin == 1349add w8,w8,w13350.endif351.inst 0x04ad0108 //add z8.s,z8.s,z13.s352.if mixin == 1353add w9,w9,w14354.endif355.inst 0x04a1018c //add z12.s,z12.s,z1.s356.if mixin == 1357add w10,w10,w11358.endif359.if mixin == 1360eor w22,w22,w7361.endif362.inst 0x0470340f //xar z15.s,z15.s,z0.s,16363.if mixin == 1364ror w22,w22,16365.endif366.if mixin == 1367eor w19,w19,w8368.endif369.inst 0x04703483 //xar z3.s,z3.s,z4.s,16370.if mixin == 1371ror w19,w19,16372.endif373.if mixin == 1374eor w20,w20,w9375.endif376.inst 0x04703507 //xar z7.s,z7.s,z8.s,16377.if mixin == 1378ror w20,w20,16379.endif380.if mixin == 1381eor w21,w21,w10382.endif383.inst 0x0470358b //xar z11.s,z11.s,z12.s,16384.if mixin == 1385ror w21,w21,16386.endif387.inst 0x04af014a //add z10.s,z10.s,z15.s388.if mixin == 1389add w17,w17,w22390.endif391.inst 0x04a301ce //add z14.s,z14.s,z3.s392.if mixin == 1393add w18,w18,w19394.endif395.inst 0x04a70042 //add z2.s,z2.s,z7.s396.if mixin == 1397add w15,w15,w20398.endif399.inst 0x04ab00c6 //add z6.s,z6.s,z11.s400.if mixin == 1401add w16,w16,w21402.endif403.if mixin == 1404eor w12,w12,w17405.endif406.inst 0x046c3545 //xar z5.s,z5.s,z10.s,20407.if mixin == 1408ror w12,w12,20409.endif410.if mixin == 1411eor w13,w13,w18412.endif413.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,20414.if mixin == 1415ror w13,w13,20416.endif417.if mixin == 1418eor w14,w14,w15419.endif420.inst 0x046c344d //xar z13.s,z13.s,z2.s,20421.if mixin == 1422ror w14,w14,20423.endif424.if mixin == 1425eor w11,w11,w16426.endif427.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,20428.if mixin == 1429ror w11,w11,20430.endif431.inst 0x04a50000 //add z0.s,z0.s,z5.s432.if mixin == 1433add w7,w7,w12434.endif435.inst 0x04a90084 //add z4.s,z4.s,z9.s436.if mixin == 1437add w8,w8,w13438.endif439.inst 0x04ad0108 //add z8.s,z8.s,z13.s440.if mixin == 1441add w9,w9,w14442.endif443.inst 0x04a1018c //add z12.s,z12.s,z1.s444.if mixin == 1445add w10,w10,w11446.endif447.if mixin == 1448eor w22,w22,w7449.endif450.inst 0x0468340f //xar z15.s,z15.s,z0.s,24451.if mixin == 1452ror w22,w22,24453.endif454.if mixin == 1455eor w19,w19,w8456.endif457.inst 0x04683483 //xar z3.s,z3.s,z4.s,24458.if mixin == 1459ror w19,w19,24460.endif461.if mixin == 1462eor w20,w20,w9463.endif464.inst 0x04683507 //xar z7.s,z7.s,z8.s,24465.if mixin == 1466ror w20,w20,24467.endif468.if mixin == 1469eor w21,w21,w10470.endif471.inst 0x0468358b //xar z11.s,z11.s,z12.s,24472.if mixin == 1473ror w21,w21,24474.endif475.inst 0x04af014a //add z10.s,z10.s,z15.s476.if mixin == 1477add w17,w17,w22478.endif479.inst 0x04a301ce //add z14.s,z14.s,z3.s480.if mixin == 1481add w18,w18,w19482.endif483.inst 0x04a70042 //add z2.s,z2.s,z7.s484.if mixin == 1485add w15,w15,w20486.endif487.inst 0x04ab00c6 //add z6.s,z6.s,z11.s488.if mixin == 1489add w16,w16,w21490.endif491.if mixin == 1492eor w12,w12,w17493.endif494.inst 0x04673545 //xar z5.s,z5.s,z10.s,25495.if mixin == 1496ror w12,w12,25497.endif498.if mixin == 1499eor w13,w13,w18500.endif501.inst 0x046735c9 //xar z9.s,z9.s,z14.s,25502.if mixin == 1503ror w13,w13,25504.endif505.if mixin == 1506eor w14,w14,w15507.endif508.inst 0x0467344d //xar z13.s,z13.s,z2.s,25509.if mixin == 1510ror w14,w14,25511.endif512.if mixin == 1513eor w11,w11,w16514.endif515.inst 0x046734c1 //xar z1.s,z1.s,z6.s,25516.if mixin == 1517ror w11,w11,25518.endif519sub x6,x6,1520cbnz x6,10b521.if mixin == 1522add w7,w7,w23523.endif524.inst 0x04b90000 //add z0.s,z0.s,z25.s525.if mixin == 1526add x8,x8,x23,lsr #32527.endif528.inst 0x04ba0084 //add z4.s,z4.s,z26.s529.if mixin == 1530add x7,x7,x8,lsl #32 // pack531.endif532.if mixin == 1533add w9,w9,w24534.endif535.inst 0x04bb0108 //add z8.s,z8.s,z27.s536.if mixin == 1537add x10,x10,x24,lsr #32538.endif539.inst 0x04bc018c //add z12.s,z12.s,z28.s540.if mixin == 1541add x9,x9,x10,lsl #32 // pack542.endif543.if mixin == 1544ldp x8,x10,[x1],#16545.endif546.if mixin == 1547add w11,w11,w25548.endif549.inst 0x04bd0021 //add z1.s,z1.s,z29.s550.if mixin == 1551add x12,x12,x25,lsr #32552.endif553.inst 0x04be00a5 //add z5.s,z5.s,z30.s554.if mixin == 1555add x11,x11,x12,lsl #32 // pack556.endif557.if mixin == 1558add w13,w13,w26559.endif560.inst 0x04b50129 //add z9.s,z9.s,z21.s561.if mixin == 1562add x14,x14,x26,lsr #32563.endif564.inst 0x04b601ad //add z13.s,z13.s,z22.s565.if mixin == 1566add x13,x13,x14,lsl #32 // pack567.endif568.if mixin == 1569ldp x12,x14,[x1],#16570.endif571.if mixin == 1572add w15,w15,w27573.endif574.inst 0x04b70042 //add z2.s,z2.s,z23.s575.if mixin == 1576add x16,x16,x27,lsr #32577.endif578.inst 0x04b800c6 //add z6.s,z6.s,z24.s579.if mixin == 1580add x15,x15,x16,lsl #32 // pack581.endif582.if mixin == 1583add w17,w17,w28584.endif585.inst 0x04b1014a //add z10.s,z10.s,z17.s586.if mixin == 1587add x18,x18,x28,lsr #32588.endif589.inst 0x04b201ce //add z14.s,z14.s,z18.s590.if mixin == 1591add x17,x17,x18,lsl #32 // pack592.endif593.if mixin == 1594ldp x16,x18,[x1],#16595.endif596.if mixin == 1597add w19,w19,w29598.endif599.inst 0x04b00063 //add z3.s,z3.s,z16.s600.if mixin == 1601add x20,x20,x29,lsr #32602.endif603.inst 0x04b300e7 //add z7.s,z7.s,z19.s604.if mixin == 1605add x19,x19,x20,lsl #32 // pack606.endif607.if mixin == 1608add w21,w21,w30609.endif610.inst 0x04b4016b //add z11.s,z11.s,z20.s611.if mixin == 1612add x22,x22,x30,lsr #32613.endif614.inst 0x04bf01ef //add z15.s,z15.s,z31.s615.if mixin == 1616add x21,x21,x22,lsl #32 // pack617.endif618.if mixin == 1619ldp x20,x22,[x1],#16620.endif621#ifdef __AARCH64EB__622rev x7,x7623.inst 0x05a48000 //revb z0.s,p0/m,z0.s624.inst 0x05a48084 //revb z4.s,p0/m,z4.s625rev x9,x9626.inst 0x05a48108 //revb z8.s,p0/m,z8.s627.inst 0x05a4818c //revb z12.s,p0/m,z12.s628rev x11,x11629.inst 0x05a48021 //revb z1.s,p0/m,z1.s630.inst 0x05a480a5 //revb z5.s,p0/m,z5.s631rev x13,x13632.inst 0x05a48129 //revb z9.s,p0/m,z9.s633.inst 0x05a481ad //revb z13.s,p0/m,z13.s634rev x15,x15635.inst 0x05a48042 //revb z2.s,p0/m,z2.s636.inst 0x05a480c6 //revb z6.s,p0/m,z6.s637rev x17,x17638.inst 0x05a4814a //revb z10.s,p0/m,z10.s639.inst 0x05a481ce //revb z14.s,p0/m,z14.s640rev x19,x19641.inst 0x05a48063 //revb z3.s,p0/m,z3.s642.inst 0x05a480e7 //revb z7.s,p0/m,z7.s643rev x21,x21644.inst 0x05a4816b //revb z11.s,p0/m,z11.s645.inst 0x05a481ef //revb z15.s,p0/m,z15.s646#endif647.if mixin == 1648add x29,x29,#1649.endif650cmp x5,4651b.ne 200f652.if mixin == 1653eor x7,x7,x8654.endif655.if mixin == 1656eor x9,x9,x10657.endif658.if mixin == 1659eor x11,x11,x12660.endif661.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s662.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s663.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s664.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s665666.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s667.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s668.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s669.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s670671.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d672.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d673.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d674.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d675676.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d677.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d678.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d679.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d680.if mixin == 1681eor x13,x13,x14682.endif683.if mixin == 1684eor x15,x15,x16685.endif686.if mixin == 1687eor x17,x17,x18688.endif689.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s690.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s691.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s692.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s693694.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s695.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s696.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s697.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s698699.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d700.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d701.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d702.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d703704.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d705.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d706.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d707.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d708.if mixin == 1709eor x19,x19,x20710.endif711.if mixin == 1712eor x21,x21,x22713.endif714ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64715ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64716.inst 0x04b13000 //eor z0.d,z0.d,z17.d717.inst 0x04b23021 //eor z1.d,z1.d,z18.d718.inst 0x04b33042 //eor z2.d,z2.d,z19.d719.inst 0x04b43063 //eor z3.d,z3.d,z20.d720.inst 0x04b53084 //eor z4.d,z4.d,z21.d721.inst 0x04b630a5 //eor z5.d,z5.d,z22.d722.inst 0x04b730c6 //eor z6.d,z6.d,z23.d723.inst 0x04b830e7 //eor z7.d,z7.d,z24.d724ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#64725ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#64726.if mixin == 1727stp x7,x9,[x0],#16728.endif729.inst 0x04b13108 //eor z8.d,z8.d,z17.d730.inst 0x04b23129 //eor z9.d,z9.d,z18.d731.if mixin == 1732stp x11,x13,[x0],#16733.endif734.inst 0x04b3314a //eor z10.d,z10.d,z19.d735.inst 0x04b4316b //eor z11.d,z11.d,z20.d736.if mixin == 1737stp x15,x17,[x0],#16738.endif739.inst 0x04b5318c //eor z12.d,z12.d,z21.d740.inst 0x04b631ad //eor z13.d,z13.d,z22.d741.if mixin == 1742stp x19,x21,[x0],#16743.endif744.inst 0x04b731ce //eor z14.d,z14.d,z23.d745.inst 0x04b831ef //eor z15.d,z15.d,z24.d746st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#64747st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#64748st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#64749st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#64750b 210f751200:752.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s753.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s754.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s755.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s756757.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s758.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s759.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s760.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s761762.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d763.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d764.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d765.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d766767.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d768.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d769.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d770.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d771.if mixin == 1772eor x7,x7,x8773.endif774.if mixin == 1775eor x9,x9,x10776.endif777.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s778.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s779.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s780.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s781782.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s783.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s784.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s785.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s786787.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d788.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d789.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d790.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d791792.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d793.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d794.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d795.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d796.if mixin == 1797eor x11,x11,x12798.endif799.if mixin == 1800eor x13,x13,x14801.endif802.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s803.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s804.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s805.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s806807.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s808.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s809.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s810.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s811812.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d813.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d814.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d815.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d816817.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d818.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d819.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d820.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d821.if mixin == 1822eor x15,x15,x16823.endif824.if mixin == 1825eor x17,x17,x18826.endif827.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s828.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s829.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s830.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s831832.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s833.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s834.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s835.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s836837.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d838.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d839.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d840.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d841842.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d843.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d844.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d845.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d846.if mixin == 1847eor x19,x19,x20848.endif849.if mixin == 1850eor x21,x21,x22851.endif852.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]853.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]854.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]855.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]856.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]857.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]858.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]859.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]860.inst 0x04215101 //addvl x1,x1,8861.inst 0x04b13000 //eor z0.d,z0.d,z17.d862.inst 0x04b23084 //eor z4.d,z4.d,z18.d863.inst 0x04b33108 //eor z8.d,z8.d,z19.d864.inst 0x04b4318c //eor z12.d,z12.d,z20.d865.inst 0x04b53021 //eor z1.d,z1.d,z21.d866.inst 0x04b630a5 //eor z5.d,z5.d,z22.d867.inst 0x04b73129 //eor z9.d,z9.d,z23.d868.inst 0x04b831ad //eor z13.d,z13.d,z24.d869.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]870.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]871.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]872.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]873.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]874.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]875.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]876.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]877.inst 0x04215101 //addvl x1,x1,8878.if mixin == 1879stp x7,x9,[x0],#16880.endif881.inst 0x04b13042 //eor z2.d,z2.d,z17.d882.inst 0x04b230c6 //eor z6.d,z6.d,z18.d883.if mixin == 1884stp x11,x13,[x0],#16885.endif886.inst 0x04b3314a //eor z10.d,z10.d,z19.d887.inst 0x04b431ce //eor z14.d,z14.d,z20.d888.if mixin == 1889stp x15,x17,[x0],#16890.endif891.inst 0x04b53063 //eor z3.d,z3.d,z21.d892.inst 0x04b630e7 //eor z7.d,z7.d,z22.d893.if mixin == 1894stp x19,x21,[x0],#16895.endif896.inst 0x04b7316b //eor z11.d,z11.d,z23.d897.inst 0x04b831ef //eor z15.d,z15.d,z24.d898.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL]899.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL]900.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL]901.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL]902.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL]903.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL]904.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL]905.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL]906.inst 0x04205100 //addvl x0,x0,8907.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL]908.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL]909.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL]910.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL]911.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL]912.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL]913.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL]914.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL]915.inst 0x04205100 //addvl x0,x0,8916210:917.inst 0x04b0e3fd //incw x29, ALL, MUL #1918subs x2,x2,64919b.gt 100b920b 110f921101:922mixin=0923lsr x8,x23,#32924.inst 0x05a03ae0 //dup z0.s,w23925.inst 0x05a03af9 //dup z25.s,w23926.if mixin == 1927mov w7,w23928.endif929.inst 0x05a03904 //dup z4.s,w8930.inst 0x05a0391a //dup z26.s,w8931lsr x10,x24,#32932.inst 0x05a03b08 //dup z8.s,w24933.inst 0x05a03b1b //dup z27.s,w24934.if mixin == 1935mov w9,w24936.endif937.inst 0x05a0394c //dup z12.s,w10938.inst 0x05a0395c //dup z28.s,w10939lsr x12,x25,#32940.inst 0x05a03b21 //dup z1.s,w25941.inst 0x05a03b3d //dup z29.s,w25942.if mixin == 1943mov w11,w25944.endif945.inst 0x05a03985 //dup z5.s,w12946.inst 0x05a0399e //dup z30.s,w12947lsr x14,x26,#32948.inst 0x05a03b49 //dup z9.s,w26949.inst 0x05a03b55 //dup z21.s,w26950.if mixin == 1951mov w13,w26952.endif953.inst 0x05a039cd //dup z13.s,w14954.inst 0x05a039d6 //dup z22.s,w14955lsr x16,x27,#32956.inst 0x05a03b62 //dup z2.s,w27957.inst 0x05a03b77 //dup z23.s,w27958.if mixin == 1959mov w15,w27960.endif961.inst 0x05a03a06 //dup z6.s,w16962.inst 0x05a03a18 //dup z24.s,w16963lsr x18,x28,#32964.inst 0x05a03b8a //dup z10.s,w28965.inst 0x05a03b91 //dup z17.s,w28966.if mixin == 1967mov w17,w28968.endif969.inst 0x05a03a4e //dup z14.s,w18970.inst 0x05a03a52 //dup z18.s,w18971lsr x22,x30,#32972.inst 0x05a03bcb //dup z11.s,w30973.inst 0x05a03bd4 //dup z20.s,w30974.if mixin == 1975mov w21,w30976.endif977.inst 0x05a03acf //dup z15.s,w22978.inst 0x05a03adf //dup z31.s,w22979.if mixin == 1980add w20,w29,#1981mov w19,w29982.inst 0x04a14690 //index z16.s,w20,1983.inst 0x04a14683 //index z3.s,w20,1984.else985.inst 0x04a147b0 //index z16.s,w29,1986.inst 0x04a147a3 //index z3.s,w29,1987.endif988lsr x20,x29,#32989.inst 0x05a03a87 //dup z7.s,w20990.inst 0x05a03a93 //dup z19.s,w20991mov x6,#1099210:993.align 5994.inst 0x04a10000 //add z0.s,z0.s,z1.s995.if mixin == 1996add w7,w7,w11997.endif998.inst 0x04a50084 //add z4.s,z4.s,z5.s999.if mixin == 11000add w8,w8,w121001.endif1002.inst 0x04a90108 //add z8.s,z8.s,z9.s1003.if mixin == 11004add w9,w9,w131005.endif1006.inst 0x04ad018c //add z12.s,z12.s,z13.s1007.if mixin == 11008add w10,w10,w141009.endif1010.if mixin == 11011eor w19,w19,w71012.endif1013.inst 0x04703403 //xar z3.s,z3.s,z0.s,161014.if mixin == 11015ror w19,w19,161016.endif1017.if mixin == 11018eor w20,w20,w81019.endif1020.inst 0x04703487 //xar z7.s,z7.s,z4.s,161021.if mixin == 11022ror w20,w20,161023.endif1024.if mixin == 11025eor w21,w21,w91026.endif1027.inst 0x0470350b //xar z11.s,z11.s,z8.s,161028.if mixin == 11029ror w21,w21,161030.endif1031.if mixin == 11032eor w22,w22,w101033.endif1034.inst 0x0470358f //xar z15.s,z15.s,z12.s,161035.if mixin == 11036ror w22,w22,161037.endif1038.inst 0x04a30042 //add z2.s,z2.s,z3.s1039.if mixin == 11040add w15,w15,w191041.endif1042.inst 0x04a700c6 //add z6.s,z6.s,z7.s1043.if mixin == 11044add w16,w16,w201045.endif1046.inst 0x04ab014a //add z10.s,z10.s,z11.s1047.if mixin == 11048add w17,w17,w211049.endif1050.inst 0x04af01ce //add z14.s,z14.s,z15.s1051.if mixin == 11052add w18,w18,w221053.endif1054.if mixin == 11055eor w11,w11,w151056.endif1057.inst 0x046c3441 //xar z1.s,z1.s,z2.s,201058.if mixin == 11059ror w11,w11,201060.endif1061.if mixin == 11062eor w12,w12,w161063.endif1064.inst 0x046c34c5 //xar z5.s,z5.s,z6.s,201065.if mixin == 11066ror w12,w12,201067.endif1068.if mixin == 11069eor w13,w13,w171070.endif1071.inst 0x046c3549 //xar z9.s,z9.s,z10.s,201072.if mixin == 11073ror w13,w13,201074.endif1075.if mixin == 11076eor w14,w14,w181077.endif1078.inst 0x046c35cd //xar z13.s,z13.s,z14.s,201079.if mixin == 11080ror w14,w14,201081.endif1082.inst 0x04a10000 //add z0.s,z0.s,z1.s1083.if mixin == 11084add w7,w7,w111085.endif1086.inst 0x04a50084 //add z4.s,z4.s,z5.s1087.if mixin == 11088add w8,w8,w121089.endif1090.inst 0x04a90108 //add z8.s,z8.s,z9.s1091.if mixin == 11092add w9,w9,w131093.endif1094.inst 0x04ad018c //add z12.s,z12.s,z13.s1095.if mixin == 11096add w10,w10,w141097.endif1098.if mixin == 11099eor w19,w19,w71100.endif1101.inst 0x04683403 //xar z3.s,z3.s,z0.s,241102.if mixin == 11103ror w19,w19,241104.endif1105.if mixin == 11106eor w20,w20,w81107.endif1108.inst 0x04683487 //xar z7.s,z7.s,z4.s,241109.if mixin == 11110ror w20,w20,241111.endif1112.if mixin == 11113eor w21,w21,w91114.endif1115.inst 0x0468350b //xar z11.s,z11.s,z8.s,241116.if mixin == 11117ror w21,w21,241118.endif1119.if mixin == 11120eor w22,w22,w101121.endif1122.inst 0x0468358f //xar z15.s,z15.s,z12.s,241123.if mixin == 11124ror w22,w22,241125.endif1126.inst 0x04a30042 //add z2.s,z2.s,z3.s1127.if mixin == 11128add w15,w15,w191129.endif1130.inst 0x04a700c6 //add z6.s,z6.s,z7.s1131.if mixin == 11132add w16,w16,w201133.endif1134.inst 0x04ab014a //add z10.s,z10.s,z11.s1135.if mixin == 11136add w17,w17,w211137.endif1138.inst 0x04af01ce //add z14.s,z14.s,z15.s1139.if mixin == 11140add w18,w18,w221141.endif1142.if mixin == 11143eor w11,w11,w151144.endif1145.inst 0x04673441 //xar z1.s,z1.s,z2.s,251146.if mixin == 11147ror w11,w11,251148.endif1149.if mixin == 11150eor w12,w12,w161151.endif1152.inst 0x046734c5 //xar z5.s,z5.s,z6.s,251153.if mixin == 11154ror w12,w12,251155.endif1156.if mixin == 11157eor w13,w13,w171158.endif1159.inst 0x04673549 //xar z9.s,z9.s,z10.s,251160.if mixin == 11161ror w13,w13,251162.endif1163.if mixin == 11164eor w14,w14,w181165.endif1166.inst 0x046735cd //xar z13.s,z13.s,z14.s,251167.if mixin == 11168ror w14,w14,251169.endif1170.inst 0x04a50000 //add z0.s,z0.s,z5.s1171.if mixin == 11172add w7,w7,w121173.endif1174.inst 0x04a90084 //add z4.s,z4.s,z9.s1175.if mixin == 11176add w8,w8,w131177.endif1178.inst 0x04ad0108 //add z8.s,z8.s,z13.s1179.if mixin == 11180add w9,w9,w141181.endif1182.inst 0x04a1018c //add z12.s,z12.s,z1.s1183.if mixin == 11184add w10,w10,w111185.endif1186.if mixin == 11187eor w22,w22,w71188.endif1189.inst 0x0470340f //xar z15.s,z15.s,z0.s,161190.if mixin == 11191ror w22,w22,161192.endif1193.if mixin == 11194eor w19,w19,w81195.endif1196.inst 0x04703483 //xar z3.s,z3.s,z4.s,161197.if mixin == 11198ror w19,w19,161199.endif1200.if mixin == 11201eor w20,w20,w91202.endif1203.inst 0x04703507 //xar z7.s,z7.s,z8.s,161204.if mixin == 11205ror w20,w20,161206.endif1207.if mixin == 11208eor w21,w21,w101209.endif1210.inst 0x0470358b //xar z11.s,z11.s,z12.s,161211.if mixin == 11212ror w21,w21,161213.endif1214.inst 0x04af014a //add z10.s,z10.s,z15.s1215.if mixin == 11216add w17,w17,w221217.endif1218.inst 0x04a301ce //add z14.s,z14.s,z3.s1219.if mixin == 11220add w18,w18,w191221.endif1222.inst 0x04a70042 //add z2.s,z2.s,z7.s1223.if mixin == 11224add w15,w15,w201225.endif1226.inst 0x04ab00c6 //add z6.s,z6.s,z11.s1227.if mixin == 11228add w16,w16,w211229.endif1230.if mixin == 11231eor w12,w12,w171232.endif1233.inst 0x046c3545 //xar z5.s,z5.s,z10.s,201234.if mixin == 11235ror w12,w12,201236.endif1237.if mixin == 11238eor w13,w13,w181239.endif1240.inst 0x046c35c9 //xar z9.s,z9.s,z14.s,201241.if mixin == 11242ror w13,w13,201243.endif1244.if mixin == 11245eor w14,w14,w151246.endif1247.inst 0x046c344d //xar z13.s,z13.s,z2.s,201248.if mixin == 11249ror w14,w14,201250.endif1251.if mixin == 11252eor w11,w11,w161253.endif1254.inst 0x046c34c1 //xar z1.s,z1.s,z6.s,201255.if mixin == 11256ror w11,w11,201257.endif1258.inst 0x04a50000 //add z0.s,z0.s,z5.s1259.if mixin == 11260add w7,w7,w121261.endif1262.inst 0x04a90084 //add z4.s,z4.s,z9.s1263.if mixin == 11264add w8,w8,w131265.endif1266.inst 0x04ad0108 //add z8.s,z8.s,z13.s1267.if mixin == 11268add w9,w9,w141269.endif1270.inst 0x04a1018c //add z12.s,z12.s,z1.s1271.if mixin == 11272add w10,w10,w111273.endif1274.if mixin == 11275eor w22,w22,w71276.endif1277.inst 0x0468340f //xar z15.s,z15.s,z0.s,241278.if mixin == 11279ror w22,w22,241280.endif1281.if mixin == 11282eor w19,w19,w81283.endif1284.inst 0x04683483 //xar z3.s,z3.s,z4.s,241285.if mixin == 11286ror w19,w19,241287.endif1288.if mixin == 11289eor w20,w20,w91290.endif1291.inst 0x04683507 //xar z7.s,z7.s,z8.s,241292.if mixin == 11293ror w20,w20,241294.endif1295.if mixin == 11296eor w21,w21,w101297.endif1298.inst 0x0468358b //xar z11.s,z11.s,z12.s,241299.if mixin == 11300ror w21,w21,241301.endif1302.inst 0x04af014a //add z10.s,z10.s,z15.s1303.if mixin == 11304add w17,w17,w221305.endif1306.inst 0x04a301ce //add z14.s,z14.s,z3.s1307.if mixin == 11308add w18,w18,w191309.endif1310.inst 0x04a70042 //add z2.s,z2.s,z7.s1311.if mixin == 11312add w15,w15,w201313.endif1314.inst 0x04ab00c6 //add z6.s,z6.s,z11.s1315.if mixin == 11316add w16,w16,w211317.endif1318.if mixin == 11319eor w12,w12,w171320.endif1321.inst 0x04673545 //xar z5.s,z5.s,z10.s,251322.if mixin == 11323ror w12,w12,251324.endif1325.if mixin == 11326eor w13,w13,w181327.endif1328.inst 0x046735c9 //xar z9.s,z9.s,z14.s,251329.if mixin == 11330ror w13,w13,251331.endif1332.if mixin == 11333eor w14,w14,w151334.endif1335.inst 0x0467344d //xar z13.s,z13.s,z2.s,251336.if mixin == 11337ror w14,w14,251338.endif1339.if mixin == 11340eor w11,w11,w161341.endif1342.inst 0x046734c1 //xar z1.s,z1.s,z6.s,251343.if mixin == 11344ror w11,w11,251345.endif1346sub x6,x6,11347cbnz x6,10b1348.if mixin == 11349add w7,w7,w231350.endif1351.inst 0x04b90000 //add z0.s,z0.s,z25.s1352.if mixin == 11353add x8,x8,x23,lsr #321354.endif1355.inst 0x04ba0084 //add z4.s,z4.s,z26.s1356.if mixin == 11357add x7,x7,x8,lsl #32 // pack1358.endif1359.if mixin == 11360add w9,w9,w241361.endif1362.inst 0x04bb0108 //add z8.s,z8.s,z27.s1363.if mixin == 11364add x10,x10,x24,lsr #321365.endif1366.inst 0x04bc018c //add z12.s,z12.s,z28.s1367.if mixin == 11368add x9,x9,x10,lsl #32 // pack1369.endif1370.if mixin == 11371ldp x8,x10,[x1],#161372.endif1373.if mixin == 11374add w11,w11,w251375.endif1376.inst 0x04bd0021 //add z1.s,z1.s,z29.s1377.if mixin == 11378add x12,x12,x25,lsr #321379.endif1380.inst 0x04be00a5 //add z5.s,z5.s,z30.s1381.if mixin == 11382add x11,x11,x12,lsl #32 // pack1383.endif1384.if mixin == 11385add w13,w13,w261386.endif1387.inst 0x04b50129 //add z9.s,z9.s,z21.s1388.if mixin == 11389add x14,x14,x26,lsr #321390.endif1391.inst 0x04b601ad //add z13.s,z13.s,z22.s1392.if mixin == 11393add x13,x13,x14,lsl #32 // pack1394.endif1395.if mixin == 11396ldp x12,x14,[x1],#161397.endif1398.if mixin == 11399add w15,w15,w271400.endif1401.inst 0x04b70042 //add z2.s,z2.s,z23.s1402.if mixin == 11403add x16,x16,x27,lsr #321404.endif1405.inst 0x04b800c6 //add z6.s,z6.s,z24.s1406.if mixin == 11407add x15,x15,x16,lsl #32 // pack1408.endif1409.if mixin == 11410add w17,w17,w281411.endif1412.inst 0x04b1014a //add z10.s,z10.s,z17.s1413.if mixin == 11414add x18,x18,x28,lsr #321415.endif1416.inst 0x04b201ce //add z14.s,z14.s,z18.s1417.if mixin == 11418add x17,x17,x18,lsl #32 // pack1419.endif1420.if mixin == 11421ldp x16,x18,[x1],#161422.endif1423.if mixin == 11424add w19,w19,w291425.endif1426.inst 0x04b00063 //add z3.s,z3.s,z16.s1427.if mixin == 11428add x20,x20,x29,lsr #321429.endif1430.inst 0x04b300e7 //add z7.s,z7.s,z19.s1431.if mixin == 11432add x19,x19,x20,lsl #32 // pack1433.endif1434.if mixin == 11435add w21,w21,w301436.endif1437.inst 0x04b4016b //add z11.s,z11.s,z20.s1438.if mixin == 11439add x22,x22,x30,lsr #321440.endif1441.inst 0x04bf01ef //add z15.s,z15.s,z31.s1442.if mixin == 11443add x21,x21,x22,lsl #32 // pack1444.endif1445.if mixin == 11446ldp x20,x22,[x1],#161447.endif1448#ifdef __AARCH64EB__1449rev x7,x71450.inst 0x05a48000 //revb z0.s,p0/m,z0.s1451.inst 0x05a48084 //revb z4.s,p0/m,z4.s1452rev x9,x91453.inst 0x05a48108 //revb z8.s,p0/m,z8.s1454.inst 0x05a4818c //revb z12.s,p0/m,z12.s1455rev x11,x111456.inst 0x05a48021 //revb z1.s,p0/m,z1.s1457.inst 0x05a480a5 //revb z5.s,p0/m,z5.s1458rev x13,x131459.inst 0x05a48129 //revb z9.s,p0/m,z9.s1460.inst 0x05a481ad //revb z13.s,p0/m,z13.s1461rev x15,x151462.inst 0x05a48042 //revb z2.s,p0/m,z2.s1463.inst 0x05a480c6 //revb z6.s,p0/m,z6.s1464rev x17,x171465.inst 0x05a4814a //revb z10.s,p0/m,z10.s1466.inst 0x05a481ce //revb z14.s,p0/m,z14.s1467rev x19,x191468.inst 0x05a48063 //revb z3.s,p0/m,z3.s1469.inst 0x05a480e7 //revb z7.s,p0/m,z7.s1470rev x21,x211471.inst 0x05a4816b //revb z11.s,p0/m,z11.s1472.inst 0x05a481ef //revb z15.s,p0/m,z15.s1473#endif1474.if mixin == 11475add x29,x29,#11476.endif1477cmp x5,41478b.ne 200f1479.if mixin == 11480eor x7,x7,x81481.endif1482.if mixin == 11483eor x9,x9,x101484.endif1485.if mixin == 11486eor x11,x11,x121487.endif1488.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s1489.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s1490.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s1491.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s14921493.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s1494.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s1495.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s1496.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s14971498.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d1499.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d1500.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d1501.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d15021503.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d1504.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d1505.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d1506.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d1507.if mixin == 11508eor x13,x13,x141509.endif1510.if mixin == 11511eor x15,x15,x161512.endif1513.if mixin == 11514eor x17,x17,x181515.endif1516.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s1517.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s1518.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s1519.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s15201521.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s1522.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s1523.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s1524.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s15251526.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d1527.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d1528.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d1529.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d15301531.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d1532.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d1533.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d1534.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d1535.if mixin == 11536eor x19,x19,x201537.endif1538.if mixin == 11539eor x21,x21,x221540.endif1541ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#641542ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#641543.inst 0x04b13000 //eor z0.d,z0.d,z17.d1544.inst 0x04b23021 //eor z1.d,z1.d,z18.d1545.inst 0x04b33042 //eor z2.d,z2.d,z19.d1546.inst 0x04b43063 //eor z3.d,z3.d,z20.d1547.inst 0x04b53084 //eor z4.d,z4.d,z21.d1548.inst 0x04b630a5 //eor z5.d,z5.d,z22.d1549.inst 0x04b730c6 //eor z6.d,z6.d,z23.d1550.inst 0x04b830e7 //eor z7.d,z7.d,z24.d1551ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#641552ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#641553.if mixin == 11554stp x7,x9,[x0],#161555.endif1556.inst 0x04b13108 //eor z8.d,z8.d,z17.d1557.inst 0x04b23129 //eor z9.d,z9.d,z18.d1558.if mixin == 11559stp x11,x13,[x0],#161560.endif1561.inst 0x04b3314a //eor z10.d,z10.d,z19.d1562.inst 0x04b4316b //eor z11.d,z11.d,z20.d1563.if mixin == 11564stp x15,x17,[x0],#161565.endif1566.inst 0x04b5318c //eor z12.d,z12.d,z21.d1567.inst 0x04b631ad //eor z13.d,z13.d,z22.d1568.if mixin == 11569stp x19,x21,[x0],#161570.endif1571.inst 0x04b731ce //eor z14.d,z14.d,z23.d1572.inst 0x04b831ef //eor z15.d,z15.d,z24.d1573st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#641574st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#641575st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#641576st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#641577b 210f1578200:1579.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s1580.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s1581.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s1582.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s15831584.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s1585.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s1586.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s1587.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s15881589.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d1590.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d1591.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d1592.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d15931594.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d1595.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d1596.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d1597.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d1598.if mixin == 11599eor x7,x7,x81600.endif1601.if mixin == 11602eor x9,x9,x101603.endif1604.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s1605.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s1606.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s1607.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s16081609.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s1610.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s1611.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s1612.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s16131614.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d1615.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d1616.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d1617.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d16181619.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d1620.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d1621.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d1622.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d1623.if mixin == 11624eor x11,x11,x121625.endif1626.if mixin == 11627eor x13,x13,x141628.endif1629.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s1630.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s1631.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s1632.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s16331634.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s1635.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s1636.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s1637.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s16381639.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d1640.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d1641.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d1642.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d16431644.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d1645.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d1646.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d1647.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d1648.if mixin == 11649eor x15,x15,x161650.endif1651.if mixin == 11652eor x17,x17,x181653.endif1654.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s1655.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s1656.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s1657.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s16581659.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s1660.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s1661.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s1662.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s16631664.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d1665.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d1666.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d1667.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d16681669.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d1670.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d1671.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d1672.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d1673.if mixin == 11674eor x19,x19,x201675.endif1676.if mixin == 11677eor x21,x21,x221678.endif1679.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]1680.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]1681.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]1682.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]1683.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]1684.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]1685.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]1686.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]1687.inst 0x04215101 //addvl x1,x1,81688.inst 0x04b13000 //eor z0.d,z0.d,z17.d1689.inst 0x04b23084 //eor z4.d,z4.d,z18.d1690.inst 0x04b33108 //eor z8.d,z8.d,z19.d1691.inst 0x04b4318c //eor z12.d,z12.d,z20.d1692.inst 0x04b53021 //eor z1.d,z1.d,z21.d1693.inst 0x04b630a5 //eor z5.d,z5.d,z22.d1694.inst 0x04b73129 //eor z9.d,z9.d,z23.d1695.inst 0x04b831ad //eor z13.d,z13.d,z24.d1696.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]1697.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]1698.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]1699.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]1700.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]1701.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]1702.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]1703.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]1704.inst 0x04215101 //addvl x1,x1,81705.if mixin == 11706stp x7,x9,[x0],#161707.endif1708.inst 0x04b13042 //eor z2.d,z2.d,z17.d1709.inst 0x04b230c6 //eor z6.d,z6.d,z18.d1710.if mixin == 11711stp x11,x13,[x0],#161712.endif1713.inst 0x04b3314a //eor z10.d,z10.d,z19.d1714.inst 0x04b431ce //eor z14.d,z14.d,z20.d1715.if mixin == 11716stp x15,x17,[x0],#161717.endif1718.inst 0x04b53063 //eor z3.d,z3.d,z21.d1719.inst 0x04b630e7 //eor z7.d,z7.d,z22.d1720.if mixin == 11721stp x19,x21,[x0],#161722.endif1723.inst 0x04b7316b //eor z11.d,z11.d,z23.d1724.inst 0x04b831ef //eor z15.d,z15.d,z24.d1725.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL]1726.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL]1727.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL]1728.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL]1729.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL]1730.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL]1731.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL]1732.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL]1733.inst 0x04205100 //addvl x0,x0,81734.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL]1735.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL]1736.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL]1737.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL]1738.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL]1739.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL]1740.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL]1741.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL]1742.inst 0x04205100 //addvl x0,x0,81743210:1744.inst 0x04b0e3fd //incw x29, ALL, MUL #11745110:1746b 2f17471:1748.align 51749100:1750subs x7,x2,x5,lsl #61751b.lt 110f1752mov x2,x71753b.eq 101f1754cmp x2,641755b.lt 101f1756mixin=11757lsr x8,x23,#321758.inst 0x05a03ae0 //dup z0.s,w231759.inst 0x05a03af9 //dup z25.s,w231760.if mixin == 11761mov w7,w231762.endif1763.inst 0x05a03904 //dup z4.s,w81764.inst 0x05a0391a //dup z26.s,w81765lsr x10,x24,#321766.inst 0x05a03b08 //dup z8.s,w241767.inst 0x05a03b1b //dup z27.s,w241768.if mixin == 11769mov w9,w241770.endif1771.inst 0x05a0394c //dup z12.s,w101772.inst 0x05a0395c //dup z28.s,w101773lsr x12,x25,#321774.inst 0x05a03b21 //dup z1.s,w251775.inst 0x05a03b3d //dup z29.s,w251776.if mixin == 11777mov w11,w251778.endif1779.inst 0x05a03985 //dup z5.s,w121780.inst 0x05a0399e //dup z30.s,w121781lsr x14,x26,#321782.inst 0x05a03b49 //dup z9.s,w261783.inst 0x05a03b55 //dup z21.s,w261784.if mixin == 11785mov w13,w261786.endif1787.inst 0x05a039cd //dup z13.s,w141788.inst 0x05a039d6 //dup z22.s,w141789lsr x16,x27,#321790.inst 0x05a03b62 //dup z2.s,w271791.inst 0x05a03b77 //dup z23.s,w271792.if mixin == 11793mov w15,w271794.endif1795.inst 0x05a03a06 //dup z6.s,w161796.inst 0x05a03a18 //dup z24.s,w161797lsr x18,x28,#321798.inst 0x05a03b8a //dup z10.s,w281799.if mixin == 11800mov w17,w281801.endif1802.inst 0x05a03a4e //dup z14.s,w181803lsr x22,x30,#321804.inst 0x05a03bcb //dup z11.s,w301805.if mixin == 11806mov w21,w301807.endif1808.inst 0x05a03acf //dup z15.s,w221809.if mixin == 11810add w20,w29,#11811mov w19,w291812.inst 0x04a14690 //index z16.s,w20,11813.inst 0x04a14683 //index z3.s,w20,11814.else1815.inst 0x04a147b0 //index z16.s,w29,11816.inst 0x04a147a3 //index z3.s,w29,11817.endif1818lsr x20,x29,#321819.inst 0x05a03a87 //dup z7.s,w201820mov x6,#10182110:1822.align 51823.inst 0x04a10000 //add z0.s,z0.s,z1.s1824.if mixin == 11825add w7,w7,w111826.endif1827.inst 0x04a50084 //add z4.s,z4.s,z5.s1828.if mixin == 11829add w8,w8,w121830.endif1831.inst 0x04a90108 //add z8.s,z8.s,z9.s1832.if mixin == 11833add w9,w9,w131834.endif1835.inst 0x04ad018c //add z12.s,z12.s,z13.s1836.if mixin == 11837add w10,w10,w141838.endif1839.inst 0x04a03063 //eor z3.d,z3.d,z0.d1840.if mixin == 11841eor w19,w19,w71842.endif1843.inst 0x04a430e7 //eor z7.d,z7.d,z4.d1844.if mixin == 11845eor w20,w20,w81846.endif1847.inst 0x04a8316b //eor z11.d,z11.d,z8.d1848.if mixin == 11849eor w21,w21,w91850.endif1851.inst 0x04ac31ef //eor z15.d,z15.d,z12.d1852.if mixin == 11853eor w22,w22,w101854.endif1855.inst 0x05a58063 //revh z3.s,p0/m,z3.s1856.if mixin == 11857ror w19,w19,#161858.endif1859.inst 0x05a580e7 //revh z7.s,p0/m,z7.s1860.if mixin == 11861ror w20,w20,#161862.endif1863.inst 0x05a5816b //revh z11.s,p0/m,z11.s1864.if mixin == 11865ror w21,w21,#161866.endif1867.inst 0x05a581ef //revh z15.s,p0/m,z15.s1868.if mixin == 11869ror w22,w22,#161870.endif1871.inst 0x04a30042 //add z2.s,z2.s,z3.s1872.if mixin == 11873add w15,w15,w191874.endif1875.inst 0x04a700c6 //add z6.s,z6.s,z7.s1876.if mixin == 11877add w16,w16,w201878.endif1879.inst 0x04ab014a //add z10.s,z10.s,z11.s1880.if mixin == 11881add w17,w17,w211882.endif1883.inst 0x04af01ce //add z14.s,z14.s,z15.s1884.if mixin == 11885add w18,w18,w221886.endif1887.inst 0x04a23021 //eor z1.d,z1.d,z2.d1888.if mixin == 11889eor w11,w11,w151890.endif1891.inst 0x04a630a5 //eor z5.d,z5.d,z6.d1892.if mixin == 11893eor w12,w12,w161894.endif1895.inst 0x04aa3129 //eor z9.d,z9.d,z10.d1896.if mixin == 11897eor w13,w13,w171898.endif1899.inst 0x04ae31ad //eor z13.d,z13.d,z14.d1900.if mixin == 11901eor w14,w14,w181902.endif1903.inst 0x046c9c31 //lsl z17.s,z1.s,121904.inst 0x046c9cb2 //lsl z18.s,z5.s,121905.inst 0x046c9d33 //lsl z19.s,z9.s,121906.inst 0x046c9db4 //lsl z20.s,z13.s,121907.inst 0x046c9421 //lsr z1.s,z1.s,201908.if mixin == 11909ror w11,w11,201910.endif1911.inst 0x046c94a5 //lsr z5.s,z5.s,201912.if mixin == 11913ror w12,w12,201914.endif1915.inst 0x046c9529 //lsr z9.s,z9.s,201916.if mixin == 11917ror w13,w13,201918.endif1919.inst 0x046c95ad //lsr z13.s,z13.s,201920.if mixin == 11921ror w14,w14,201922.endif1923.inst 0x04713021 //orr z1.d,z1.d,z17.d1924.inst 0x047230a5 //orr z5.d,z5.d,z18.d1925.inst 0x04733129 //orr z9.d,z9.d,z19.d1926.inst 0x047431ad //orr z13.d,z13.d,z20.d1927.inst 0x04a10000 //add z0.s,z0.s,z1.s1928.if mixin == 11929add w7,w7,w111930.endif1931.inst 0x04a50084 //add z4.s,z4.s,z5.s1932.if mixin == 11933add w8,w8,w121934.endif1935.inst 0x04a90108 //add z8.s,z8.s,z9.s1936.if mixin == 11937add w9,w9,w131938.endif1939.inst 0x04ad018c //add z12.s,z12.s,z13.s1940.if mixin == 11941add w10,w10,w141942.endif1943.inst 0x04a03063 //eor z3.d,z3.d,z0.d1944.if mixin == 11945eor w19,w19,w71946.endif1947.inst 0x04a430e7 //eor z7.d,z7.d,z4.d1948.if mixin == 11949eor w20,w20,w81950.endif1951.inst 0x04a8316b //eor z11.d,z11.d,z8.d1952.if mixin == 11953eor w21,w21,w91954.endif1955.inst 0x04ac31ef //eor z15.d,z15.d,z12.d1956.if mixin == 11957eor w22,w22,w101958.endif1959.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b1960.if mixin == 11961ror w19,w19,#241962.endif1963.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b1964.if mixin == 11965ror w20,w20,#241966.endif1967.inst 0x053f316b //tbl z11.b,{z11.b},z31.b1968.if mixin == 11969ror w21,w21,#241970.endif1971.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b1972.if mixin == 11973ror w22,w22,#241974.endif1975.inst 0x04a30042 //add z2.s,z2.s,z3.s1976.if mixin == 11977add w15,w15,w191978.endif1979.inst 0x04a700c6 //add z6.s,z6.s,z7.s1980.if mixin == 11981add w16,w16,w201982.endif1983.inst 0x04ab014a //add z10.s,z10.s,z11.s1984.if mixin == 11985add w17,w17,w211986.endif1987.inst 0x04af01ce //add z14.s,z14.s,z15.s1988.if mixin == 11989add w18,w18,w221990.endif1991.inst 0x04a23021 //eor z1.d,z1.d,z2.d1992.if mixin == 11993eor w11,w11,w151994.endif1995.inst 0x04a630a5 //eor z5.d,z5.d,z6.d1996.if mixin == 11997eor w12,w12,w161998.endif1999.inst 0x04aa3129 //eor z9.d,z9.d,z10.d2000.if mixin == 12001eor w13,w13,w172002.endif2003.inst 0x04ae31ad //eor z13.d,z13.d,z14.d2004.if mixin == 12005eor w14,w14,w182006.endif2007.inst 0x04679c31 //lsl z17.s,z1.s,72008.inst 0x04679cb2 //lsl z18.s,z5.s,72009.inst 0x04679d33 //lsl z19.s,z9.s,72010.inst 0x04679db4 //lsl z20.s,z13.s,72011.inst 0x04679421 //lsr z1.s,z1.s,252012.if mixin == 12013ror w11,w11,252014.endif2015.inst 0x046794a5 //lsr z5.s,z5.s,252016.if mixin == 12017ror w12,w12,252018.endif2019.inst 0x04679529 //lsr z9.s,z9.s,252020.if mixin == 12021ror w13,w13,252022.endif2023.inst 0x046795ad //lsr z13.s,z13.s,252024.if mixin == 12025ror w14,w14,252026.endif2027.inst 0x04713021 //orr z1.d,z1.d,z17.d2028.inst 0x047230a5 //orr z5.d,z5.d,z18.d2029.inst 0x04733129 //orr z9.d,z9.d,z19.d2030.inst 0x047431ad //orr z13.d,z13.d,z20.d2031.inst 0x04a50000 //add z0.s,z0.s,z5.s2032.if mixin == 12033add w7,w7,w122034.endif2035.inst 0x04a90084 //add z4.s,z4.s,z9.s2036.if mixin == 12037add w8,w8,w132038.endif2039.inst 0x04ad0108 //add z8.s,z8.s,z13.s2040.if mixin == 12041add w9,w9,w142042.endif2043.inst 0x04a1018c //add z12.s,z12.s,z1.s2044.if mixin == 12045add w10,w10,w112046.endif2047.inst 0x04a031ef //eor z15.d,z15.d,z0.d2048.if mixin == 12049eor w22,w22,w72050.endif2051.inst 0x04a43063 //eor z3.d,z3.d,z4.d2052.if mixin == 12053eor w19,w19,w82054.endif2055.inst 0x04a830e7 //eor z7.d,z7.d,z8.d2056.if mixin == 12057eor w20,w20,w92058.endif2059.inst 0x04ac316b //eor z11.d,z11.d,z12.d2060.if mixin == 12061eor w21,w21,w102062.endif2063.inst 0x05a581ef //revh z15.s,p0/m,z15.s2064.if mixin == 12065ror w22,w22,#162066.endif2067.inst 0x05a58063 //revh z3.s,p0/m,z3.s2068.if mixin == 12069ror w19,w19,#162070.endif2071.inst 0x05a580e7 //revh z7.s,p0/m,z7.s2072.if mixin == 12073ror w20,w20,#162074.endif2075.inst 0x05a5816b //revh z11.s,p0/m,z11.s2076.if mixin == 12077ror w21,w21,#162078.endif2079.inst 0x04af014a //add z10.s,z10.s,z15.s2080.if mixin == 12081add w17,w17,w222082.endif2083.inst 0x04a301ce //add z14.s,z14.s,z3.s2084.if mixin == 12085add w18,w18,w192086.endif2087.inst 0x04a70042 //add z2.s,z2.s,z7.s2088.if mixin == 12089add w15,w15,w202090.endif2091.inst 0x04ab00c6 //add z6.s,z6.s,z11.s2092.if mixin == 12093add w16,w16,w212094.endif2095.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d2096.if mixin == 12097eor w12,w12,w172098.endif2099.inst 0x04ae3129 //eor z9.d,z9.d,z14.d2100.if mixin == 12101eor w13,w13,w182102.endif2103.inst 0x04a231ad //eor z13.d,z13.d,z2.d2104.if mixin == 12105eor w14,w14,w152106.endif2107.inst 0x04a63021 //eor z1.d,z1.d,z6.d2108.if mixin == 12109eor w11,w11,w162110.endif2111.inst 0x046c9cb1 //lsl z17.s,z5.s,122112.inst 0x046c9d32 //lsl z18.s,z9.s,122113.inst 0x046c9db3 //lsl z19.s,z13.s,122114.inst 0x046c9c34 //lsl z20.s,z1.s,122115.inst 0x046c94a5 //lsr z5.s,z5.s,202116.if mixin == 12117ror w12,w12,202118.endif2119.inst 0x046c9529 //lsr z9.s,z9.s,202120.if mixin == 12121ror w13,w13,202122.endif2123.inst 0x046c95ad //lsr z13.s,z13.s,202124.if mixin == 12125ror w14,w14,202126.endif2127.inst 0x046c9421 //lsr z1.s,z1.s,202128.if mixin == 12129ror w11,w11,202130.endif2131.inst 0x047130a5 //orr z5.d,z5.d,z17.d2132.inst 0x04723129 //orr z9.d,z9.d,z18.d2133.inst 0x047331ad //orr z13.d,z13.d,z19.d2134.inst 0x04743021 //orr z1.d,z1.d,z20.d2135.inst 0x04a50000 //add z0.s,z0.s,z5.s2136.if mixin == 12137add w7,w7,w122138.endif2139.inst 0x04a90084 //add z4.s,z4.s,z9.s2140.if mixin == 12141add w8,w8,w132142.endif2143.inst 0x04ad0108 //add z8.s,z8.s,z13.s2144.if mixin == 12145add w9,w9,w142146.endif2147.inst 0x04a1018c //add z12.s,z12.s,z1.s2148.if mixin == 12149add w10,w10,w112150.endif2151.inst 0x04a031ef //eor z15.d,z15.d,z0.d2152.if mixin == 12153eor w22,w22,w72154.endif2155.inst 0x04a43063 //eor z3.d,z3.d,z4.d2156.if mixin == 12157eor w19,w19,w82158.endif2159.inst 0x04a830e7 //eor z7.d,z7.d,z8.d2160.if mixin == 12161eor w20,w20,w92162.endif2163.inst 0x04ac316b //eor z11.d,z11.d,z12.d2164.if mixin == 12165eor w21,w21,w102166.endif2167.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b2168.if mixin == 12169ror w22,w22,#242170.endif2171.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b2172.if mixin == 12173ror w19,w19,#242174.endif2175.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b2176.if mixin == 12177ror w20,w20,#242178.endif2179.inst 0x053f316b //tbl z11.b,{z11.b},z31.b2180.if mixin == 12181ror w21,w21,#242182.endif2183.inst 0x04af014a //add z10.s,z10.s,z15.s2184.if mixin == 12185add w17,w17,w222186.endif2187.inst 0x04a301ce //add z14.s,z14.s,z3.s2188.if mixin == 12189add w18,w18,w192190.endif2191.inst 0x04a70042 //add z2.s,z2.s,z7.s2192.if mixin == 12193add w15,w15,w202194.endif2195.inst 0x04ab00c6 //add z6.s,z6.s,z11.s2196.if mixin == 12197add w16,w16,w212198.endif2199.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d2200.if mixin == 12201eor w12,w12,w172202.endif2203.inst 0x04ae3129 //eor z9.d,z9.d,z14.d2204.if mixin == 12205eor w13,w13,w182206.endif2207.inst 0x04a231ad //eor z13.d,z13.d,z2.d2208.if mixin == 12209eor w14,w14,w152210.endif2211.inst 0x04a63021 //eor z1.d,z1.d,z6.d2212.if mixin == 12213eor w11,w11,w162214.endif2215.inst 0x04679cb1 //lsl z17.s,z5.s,72216.inst 0x04679d32 //lsl z18.s,z9.s,72217.inst 0x04679db3 //lsl z19.s,z13.s,72218.inst 0x04679c34 //lsl z20.s,z1.s,72219.inst 0x046794a5 //lsr z5.s,z5.s,252220.if mixin == 12221ror w12,w12,252222.endif2223.inst 0x04679529 //lsr z9.s,z9.s,252224.if mixin == 12225ror w13,w13,252226.endif2227.inst 0x046795ad //lsr z13.s,z13.s,252228.if mixin == 12229ror w14,w14,252230.endif2231.inst 0x04679421 //lsr z1.s,z1.s,252232.if mixin == 12233ror w11,w11,252234.endif2235.inst 0x047130a5 //orr z5.d,z5.d,z17.d2236.inst 0x04723129 //orr z9.d,z9.d,z18.d2237.inst 0x047331ad //orr z13.d,z13.d,z19.d2238.inst 0x04743021 //orr z1.d,z1.d,z20.d2239sub x6,x6,12240cbnz x6,10b2241lsr x6,x28,#322242.inst 0x05a03b91 //dup z17.s,w282243.inst 0x05a038d2 //dup z18.s,w62244lsr x6,x29,#322245.inst 0x05a038d3 //dup z19.s,w62246lsr x6,x30,#322247.if mixin == 12248add w7,w7,w232249.endif2250.inst 0x04b90000 //add z0.s,z0.s,z25.s2251.if mixin == 12252add x8,x8,x23,lsr #322253.endif2254.inst 0x04ba0084 //add z4.s,z4.s,z26.s2255.if mixin == 12256add x7,x7,x8,lsl #32 // pack2257.endif2258.if mixin == 12259add w9,w9,w242260.endif2261.inst 0x04bb0108 //add z8.s,z8.s,z27.s2262.if mixin == 12263add x10,x10,x24,lsr #322264.endif2265.inst 0x04bc018c //add z12.s,z12.s,z28.s2266.if mixin == 12267add x9,x9,x10,lsl #32 // pack2268.endif2269.if mixin == 12270ldp x8,x10,[x1],#162271.endif2272.if mixin == 12273add w11,w11,w252274.endif2275.inst 0x04bd0021 //add z1.s,z1.s,z29.s2276.if mixin == 12277add x12,x12,x25,lsr #322278.endif2279.inst 0x04be00a5 //add z5.s,z5.s,z30.s2280.if mixin == 12281add x11,x11,x12,lsl #32 // pack2282.endif2283.if mixin == 12284add w13,w13,w262285.endif2286.inst 0x04b50129 //add z9.s,z9.s,z21.s2287.if mixin == 12288add x14,x14,x26,lsr #322289.endif2290.inst 0x04b601ad //add z13.s,z13.s,z22.s2291.if mixin == 12292add x13,x13,x14,lsl #32 // pack2293.endif2294.if mixin == 12295ldp x12,x14,[x1],#162296.endif2297.if mixin == 12298add w15,w15,w272299.endif2300.inst 0x04b70042 //add z2.s,z2.s,z23.s2301.if mixin == 12302add x16,x16,x27,lsr #322303.endif2304.inst 0x04b800c6 //add z6.s,z6.s,z24.s2305.if mixin == 12306add x15,x15,x16,lsl #32 // pack2307.endif2308.if mixin == 12309add w17,w17,w282310.endif2311.inst 0x04b1014a //add z10.s,z10.s,z17.s2312.if mixin == 12313add x18,x18,x28,lsr #322314.endif2315.inst 0x04b201ce //add z14.s,z14.s,z18.s2316.if mixin == 12317add x17,x17,x18,lsl #32 // pack2318.endif2319.if mixin == 12320ldp x16,x18,[x1],#162321.endif2322.inst 0x05a03bd4 //dup z20.s,w302323.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE2324.if mixin == 12325add w19,w19,w292326.endif2327.inst 0x04b00063 //add z3.s,z3.s,z16.s2328.if mixin == 12329add x20,x20,x29,lsr #322330.endif2331.inst 0x04b300e7 //add z7.s,z7.s,z19.s2332.if mixin == 12333add x19,x19,x20,lsl #32 // pack2334.endif2335.if mixin == 12336add w21,w21,w302337.endif2338.inst 0x04b4016b //add z11.s,z11.s,z20.s2339.if mixin == 12340add x22,x22,x30,lsr #322341.endif2342.inst 0x04b901ef //add z15.s,z15.s,z25.s2343.if mixin == 12344add x21,x21,x22,lsl #32 // pack2345.endif2346.if mixin == 12347ldp x20,x22,[x1],#162348.endif2349#ifdef __AARCH64EB__2350rev x7,x72351.inst 0x05a48000 //revb z0.s,p0/m,z0.s2352.inst 0x05a48084 //revb z4.s,p0/m,z4.s2353rev x9,x92354.inst 0x05a48108 //revb z8.s,p0/m,z8.s2355.inst 0x05a4818c //revb z12.s,p0/m,z12.s2356rev x11,x112357.inst 0x05a48021 //revb z1.s,p0/m,z1.s2358.inst 0x05a480a5 //revb z5.s,p0/m,z5.s2359rev x13,x132360.inst 0x05a48129 //revb z9.s,p0/m,z9.s2361.inst 0x05a481ad //revb z13.s,p0/m,z13.s2362rev x15,x152363.inst 0x05a48042 //revb z2.s,p0/m,z2.s2364.inst 0x05a480c6 //revb z6.s,p0/m,z6.s2365rev x17,x172366.inst 0x05a4814a //revb z10.s,p0/m,z10.s2367.inst 0x05a481ce //revb z14.s,p0/m,z14.s2368rev x19,x192369.inst 0x05a48063 //revb z3.s,p0/m,z3.s2370.inst 0x05a480e7 //revb z7.s,p0/m,z7.s2371rev x21,x212372.inst 0x05a4816b //revb z11.s,p0/m,z11.s2373.inst 0x05a481ef //revb z15.s,p0/m,z15.s2374#endif2375.if mixin == 12376add x29,x29,#12377.endif2378cmp x5,42379b.ne 200f2380.if mixin == 12381eor x7,x7,x82382.endif2383.if mixin == 12384eor x9,x9,x102385.endif2386.if mixin == 12387eor x11,x11,x122388.endif2389.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s2390.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s2391.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s2392.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s23932394.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s2395.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s2396.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s2397.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s23982399.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d2400.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d2401.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d2402.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d24032404.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d2405.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d2406.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d2407.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d2408.if mixin == 12409eor x13,x13,x142410.endif2411.if mixin == 12412eor x15,x15,x162413.endif2414.if mixin == 12415eor x17,x17,x182416.endif2417.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s2418.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s2419.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s2420.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s24212422.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s2423.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s2424.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s2425.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s24262427.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d2428.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d2429.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d2430.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d24312432.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d2433.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d2434.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d2435.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d2436.if mixin == 12437eor x19,x19,x202438.endif2439.if mixin == 12440eor x21,x21,x222441.endif2442ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#642443ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#642444.inst 0x04b13000 //eor z0.d,z0.d,z17.d2445.inst 0x04b23021 //eor z1.d,z1.d,z18.d2446.inst 0x04b33042 //eor z2.d,z2.d,z19.d2447.inst 0x04b43063 //eor z3.d,z3.d,z20.d2448.inst 0x04b53084 //eor z4.d,z4.d,z21.d2449.inst 0x04b630a5 //eor z5.d,z5.d,z22.d2450.inst 0x04b730c6 //eor z6.d,z6.d,z23.d2451.inst 0x04b830e7 //eor z7.d,z7.d,z24.d2452ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#642453ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#642454.if mixin == 12455stp x7,x9,[x0],#162456.endif2457.inst 0x04b13108 //eor z8.d,z8.d,z17.d2458.inst 0x04b23129 //eor z9.d,z9.d,z18.d2459.if mixin == 12460stp x11,x13,[x0],#162461.endif2462.inst 0x04b3314a //eor z10.d,z10.d,z19.d2463.inst 0x04b4316b //eor z11.d,z11.d,z20.d2464.if mixin == 12465stp x15,x17,[x0],#162466.endif2467.inst 0x04b5318c //eor z12.d,z12.d,z21.d2468.inst 0x04b631ad //eor z13.d,z13.d,z22.d2469.if mixin == 12470stp x19,x21,[x0],#162471.endif2472.inst 0x04b731ce //eor z14.d,z14.d,z23.d2473.inst 0x04b831ef //eor z15.d,z15.d,z24.d2474st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#642475st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#642476st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#642477st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#642478b 210f2479200:2480.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s2481.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s2482.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s2483.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s24842485.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s2486.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s2487.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s2488.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s24892490.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d2491.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d2492.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d2493.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d24942495.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d2496.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d2497.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d2498.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d2499.if mixin == 12500eor x7,x7,x82501.endif2502.if mixin == 12503eor x9,x9,x102504.endif2505.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s2506.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s2507.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s2508.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s25092510.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s2511.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s2512.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s2513.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s25142515.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d2516.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d2517.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d2518.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d25192520.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d2521.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d2522.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d2523.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d2524.if mixin == 12525eor x11,x11,x122526.endif2527.if mixin == 12528eor x13,x13,x142529.endif2530.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s2531.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s2532.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s2533.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s25342535.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s2536.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s2537.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s2538.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s25392540.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d2541.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d2542.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d2543.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d25442545.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d2546.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d2547.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d2548.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d2549.if mixin == 12550eor x15,x15,x162551.endif2552.if mixin == 12553eor x17,x17,x182554.endif2555.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s2556.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s2557.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s2558.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s25592560.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s2561.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s2562.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s2563.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s25642565.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d2566.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d2567.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d2568.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d25692570.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d2571.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d2572.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d2573.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d2574.if mixin == 12575eor x19,x19,x202576.endif2577.if mixin == 12578eor x21,x21,x222579.endif2580.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]2581.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]2582.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]2583.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]2584.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]2585.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]2586.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]2587.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]2588.inst 0x04215101 //addvl x1,x1,82589.inst 0x04b13000 //eor z0.d,z0.d,z17.d2590.inst 0x04b23084 //eor z4.d,z4.d,z18.d2591.inst 0x04b33108 //eor z8.d,z8.d,z19.d2592.inst 0x04b4318c //eor z12.d,z12.d,z20.d2593.inst 0x04b53021 //eor z1.d,z1.d,z21.d2594.inst 0x04b630a5 //eor z5.d,z5.d,z22.d2595.inst 0x04b73129 //eor z9.d,z9.d,z23.d2596.inst 0x04b831ad //eor z13.d,z13.d,z24.d2597.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]2598.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]2599.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]2600.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]2601.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]2602.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]2603.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]2604.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]2605.inst 0x04215101 //addvl x1,x1,82606.if mixin == 12607stp x7,x9,[x0],#162608.endif2609.inst 0x04b13042 //eor z2.d,z2.d,z17.d2610.inst 0x04b230c6 //eor z6.d,z6.d,z18.d2611.if mixin == 12612stp x11,x13,[x0],#162613.endif2614.inst 0x04b3314a //eor z10.d,z10.d,z19.d2615.inst 0x04b431ce //eor z14.d,z14.d,z20.d2616.if mixin == 12617stp x15,x17,[x0],#162618.endif2619.inst 0x04b53063 //eor z3.d,z3.d,z21.d2620.inst 0x04b630e7 //eor z7.d,z7.d,z22.d2621.if mixin == 12622stp x19,x21,[x0],#162623.endif2624.inst 0x04b7316b //eor z11.d,z11.d,z23.d2625.inst 0x04b831ef //eor z15.d,z15.d,z24.d2626.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL]2627.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL]2628.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL]2629.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL]2630.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL]2631.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL]2632.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL]2633.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL]2634.inst 0x04205100 //addvl x0,x0,82635.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL]2636.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL]2637.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL]2638.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL]2639.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL]2640.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL]2641.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL]2642.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL]2643.inst 0x04205100 //addvl x0,x0,82644210:2645.inst 0x04b0e3fd //incw x29, ALL, MUL #12646subs x2,x2,642647b.gt 100b2648b 110f2649101:2650mixin=02651lsr x8,x23,#322652.inst 0x05a03ae0 //dup z0.s,w232653.inst 0x05a03af9 //dup z25.s,w232654.if mixin == 12655mov w7,w232656.endif2657.inst 0x05a03904 //dup z4.s,w82658.inst 0x05a0391a //dup z26.s,w82659lsr x10,x24,#322660.inst 0x05a03b08 //dup z8.s,w242661.inst 0x05a03b1b //dup z27.s,w242662.if mixin == 12663mov w9,w242664.endif2665.inst 0x05a0394c //dup z12.s,w102666.inst 0x05a0395c //dup z28.s,w102667lsr x12,x25,#322668.inst 0x05a03b21 //dup z1.s,w252669.inst 0x05a03b3d //dup z29.s,w252670.if mixin == 12671mov w11,w252672.endif2673.inst 0x05a03985 //dup z5.s,w122674.inst 0x05a0399e //dup z30.s,w122675lsr x14,x26,#322676.inst 0x05a03b49 //dup z9.s,w262677.inst 0x05a03b55 //dup z21.s,w262678.if mixin == 12679mov w13,w262680.endif2681.inst 0x05a039cd //dup z13.s,w142682.inst 0x05a039d6 //dup z22.s,w142683lsr x16,x27,#322684.inst 0x05a03b62 //dup z2.s,w272685.inst 0x05a03b77 //dup z23.s,w272686.if mixin == 12687mov w15,w272688.endif2689.inst 0x05a03a06 //dup z6.s,w162690.inst 0x05a03a18 //dup z24.s,w162691lsr x18,x28,#322692.inst 0x05a03b8a //dup z10.s,w282693.if mixin == 12694mov w17,w282695.endif2696.inst 0x05a03a4e //dup z14.s,w182697lsr x22,x30,#322698.inst 0x05a03bcb //dup z11.s,w302699.if mixin == 12700mov w21,w302701.endif2702.inst 0x05a03acf //dup z15.s,w222703.if mixin == 12704add w20,w29,#12705mov w19,w292706.inst 0x04a14690 //index z16.s,w20,12707.inst 0x04a14683 //index z3.s,w20,12708.else2709.inst 0x04a147b0 //index z16.s,w29,12710.inst 0x04a147a3 //index z3.s,w29,12711.endif2712lsr x20,x29,#322713.inst 0x05a03a87 //dup z7.s,w202714mov x6,#10271510:2716.align 52717.inst 0x04a10000 //add z0.s,z0.s,z1.s2718.if mixin == 12719add w7,w7,w112720.endif2721.inst 0x04a50084 //add z4.s,z4.s,z5.s2722.if mixin == 12723add w8,w8,w122724.endif2725.inst 0x04a90108 //add z8.s,z8.s,z9.s2726.if mixin == 12727add w9,w9,w132728.endif2729.inst 0x04ad018c //add z12.s,z12.s,z13.s2730.if mixin == 12731add w10,w10,w142732.endif2733.inst 0x04a03063 //eor z3.d,z3.d,z0.d2734.if mixin == 12735eor w19,w19,w72736.endif2737.inst 0x04a430e7 //eor z7.d,z7.d,z4.d2738.if mixin == 12739eor w20,w20,w82740.endif2741.inst 0x04a8316b //eor z11.d,z11.d,z8.d2742.if mixin == 12743eor w21,w21,w92744.endif2745.inst 0x04ac31ef //eor z15.d,z15.d,z12.d2746.if mixin == 12747eor w22,w22,w102748.endif2749.inst 0x05a58063 //revh z3.s,p0/m,z3.s2750.if mixin == 12751ror w19,w19,#162752.endif2753.inst 0x05a580e7 //revh z7.s,p0/m,z7.s2754.if mixin == 12755ror w20,w20,#162756.endif2757.inst 0x05a5816b //revh z11.s,p0/m,z11.s2758.if mixin == 12759ror w21,w21,#162760.endif2761.inst 0x05a581ef //revh z15.s,p0/m,z15.s2762.if mixin == 12763ror w22,w22,#162764.endif2765.inst 0x04a30042 //add z2.s,z2.s,z3.s2766.if mixin == 12767add w15,w15,w192768.endif2769.inst 0x04a700c6 //add z6.s,z6.s,z7.s2770.if mixin == 12771add w16,w16,w202772.endif2773.inst 0x04ab014a //add z10.s,z10.s,z11.s2774.if mixin == 12775add w17,w17,w212776.endif2777.inst 0x04af01ce //add z14.s,z14.s,z15.s2778.if mixin == 12779add w18,w18,w222780.endif2781.inst 0x04a23021 //eor z1.d,z1.d,z2.d2782.if mixin == 12783eor w11,w11,w152784.endif2785.inst 0x04a630a5 //eor z5.d,z5.d,z6.d2786.if mixin == 12787eor w12,w12,w162788.endif2789.inst 0x04aa3129 //eor z9.d,z9.d,z10.d2790.if mixin == 12791eor w13,w13,w172792.endif2793.inst 0x04ae31ad //eor z13.d,z13.d,z14.d2794.if mixin == 12795eor w14,w14,w182796.endif2797.inst 0x046c9c31 //lsl z17.s,z1.s,122798.inst 0x046c9cb2 //lsl z18.s,z5.s,122799.inst 0x046c9d33 //lsl z19.s,z9.s,122800.inst 0x046c9db4 //lsl z20.s,z13.s,122801.inst 0x046c9421 //lsr z1.s,z1.s,202802.if mixin == 12803ror w11,w11,202804.endif2805.inst 0x046c94a5 //lsr z5.s,z5.s,202806.if mixin == 12807ror w12,w12,202808.endif2809.inst 0x046c9529 //lsr z9.s,z9.s,202810.if mixin == 12811ror w13,w13,202812.endif2813.inst 0x046c95ad //lsr z13.s,z13.s,202814.if mixin == 12815ror w14,w14,202816.endif2817.inst 0x04713021 //orr z1.d,z1.d,z17.d2818.inst 0x047230a5 //orr z5.d,z5.d,z18.d2819.inst 0x04733129 //orr z9.d,z9.d,z19.d2820.inst 0x047431ad //orr z13.d,z13.d,z20.d2821.inst 0x04a10000 //add z0.s,z0.s,z1.s2822.if mixin == 12823add w7,w7,w112824.endif2825.inst 0x04a50084 //add z4.s,z4.s,z5.s2826.if mixin == 12827add w8,w8,w122828.endif2829.inst 0x04a90108 //add z8.s,z8.s,z9.s2830.if mixin == 12831add w9,w9,w132832.endif2833.inst 0x04ad018c //add z12.s,z12.s,z13.s2834.if mixin == 12835add w10,w10,w142836.endif2837.inst 0x04a03063 //eor z3.d,z3.d,z0.d2838.if mixin == 12839eor w19,w19,w72840.endif2841.inst 0x04a430e7 //eor z7.d,z7.d,z4.d2842.if mixin == 12843eor w20,w20,w82844.endif2845.inst 0x04a8316b //eor z11.d,z11.d,z8.d2846.if mixin == 12847eor w21,w21,w92848.endif2849.inst 0x04ac31ef //eor z15.d,z15.d,z12.d2850.if mixin == 12851eor w22,w22,w102852.endif2853.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b2854.if mixin == 12855ror w19,w19,#242856.endif2857.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b2858.if mixin == 12859ror w20,w20,#242860.endif2861.inst 0x053f316b //tbl z11.b,{z11.b},z31.b2862.if mixin == 12863ror w21,w21,#242864.endif2865.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b2866.if mixin == 12867ror w22,w22,#242868.endif2869.inst 0x04a30042 //add z2.s,z2.s,z3.s2870.if mixin == 12871add w15,w15,w192872.endif2873.inst 0x04a700c6 //add z6.s,z6.s,z7.s2874.if mixin == 12875add w16,w16,w202876.endif2877.inst 0x04ab014a //add z10.s,z10.s,z11.s2878.if mixin == 12879add w17,w17,w212880.endif2881.inst 0x04af01ce //add z14.s,z14.s,z15.s2882.if mixin == 12883add w18,w18,w222884.endif2885.inst 0x04a23021 //eor z1.d,z1.d,z2.d2886.if mixin == 12887eor w11,w11,w152888.endif2889.inst 0x04a630a5 //eor z5.d,z5.d,z6.d2890.if mixin == 12891eor w12,w12,w162892.endif2893.inst 0x04aa3129 //eor z9.d,z9.d,z10.d2894.if mixin == 12895eor w13,w13,w172896.endif2897.inst 0x04ae31ad //eor z13.d,z13.d,z14.d2898.if mixin == 12899eor w14,w14,w182900.endif2901.inst 0x04679c31 //lsl z17.s,z1.s,72902.inst 0x04679cb2 //lsl z18.s,z5.s,72903.inst 0x04679d33 //lsl z19.s,z9.s,72904.inst 0x04679db4 //lsl z20.s,z13.s,72905.inst 0x04679421 //lsr z1.s,z1.s,252906.if mixin == 12907ror w11,w11,252908.endif2909.inst 0x046794a5 //lsr z5.s,z5.s,252910.if mixin == 12911ror w12,w12,252912.endif2913.inst 0x04679529 //lsr z9.s,z9.s,252914.if mixin == 12915ror w13,w13,252916.endif2917.inst 0x046795ad //lsr z13.s,z13.s,252918.if mixin == 12919ror w14,w14,252920.endif2921.inst 0x04713021 //orr z1.d,z1.d,z17.d2922.inst 0x047230a5 //orr z5.d,z5.d,z18.d2923.inst 0x04733129 //orr z9.d,z9.d,z19.d2924.inst 0x047431ad //orr z13.d,z13.d,z20.d2925.inst 0x04a50000 //add z0.s,z0.s,z5.s2926.if mixin == 12927add w7,w7,w122928.endif2929.inst 0x04a90084 //add z4.s,z4.s,z9.s2930.if mixin == 12931add w8,w8,w132932.endif2933.inst 0x04ad0108 //add z8.s,z8.s,z13.s2934.if mixin == 12935add w9,w9,w142936.endif2937.inst 0x04a1018c //add z12.s,z12.s,z1.s2938.if mixin == 12939add w10,w10,w112940.endif2941.inst 0x04a031ef //eor z15.d,z15.d,z0.d2942.if mixin == 12943eor w22,w22,w72944.endif2945.inst 0x04a43063 //eor z3.d,z3.d,z4.d2946.if mixin == 12947eor w19,w19,w82948.endif2949.inst 0x04a830e7 //eor z7.d,z7.d,z8.d2950.if mixin == 12951eor w20,w20,w92952.endif2953.inst 0x04ac316b //eor z11.d,z11.d,z12.d2954.if mixin == 12955eor w21,w21,w102956.endif2957.inst 0x05a581ef //revh z15.s,p0/m,z15.s2958.if mixin == 12959ror w22,w22,#162960.endif2961.inst 0x05a58063 //revh z3.s,p0/m,z3.s2962.if mixin == 12963ror w19,w19,#162964.endif2965.inst 0x05a580e7 //revh z7.s,p0/m,z7.s2966.if mixin == 12967ror w20,w20,#162968.endif2969.inst 0x05a5816b //revh z11.s,p0/m,z11.s2970.if mixin == 12971ror w21,w21,#162972.endif2973.inst 0x04af014a //add z10.s,z10.s,z15.s2974.if mixin == 12975add w17,w17,w222976.endif2977.inst 0x04a301ce //add z14.s,z14.s,z3.s2978.if mixin == 12979add w18,w18,w192980.endif2981.inst 0x04a70042 //add z2.s,z2.s,z7.s2982.if mixin == 12983add w15,w15,w202984.endif2985.inst 0x04ab00c6 //add z6.s,z6.s,z11.s2986.if mixin == 12987add w16,w16,w212988.endif2989.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d2990.if mixin == 12991eor w12,w12,w172992.endif2993.inst 0x04ae3129 //eor z9.d,z9.d,z14.d2994.if mixin == 12995eor w13,w13,w182996.endif2997.inst 0x04a231ad //eor z13.d,z13.d,z2.d2998.if mixin == 12999eor w14,w14,w153000.endif3001.inst 0x04a63021 //eor z1.d,z1.d,z6.d3002.if mixin == 13003eor w11,w11,w163004.endif3005.inst 0x046c9cb1 //lsl z17.s,z5.s,123006.inst 0x046c9d32 //lsl z18.s,z9.s,123007.inst 0x046c9db3 //lsl z19.s,z13.s,123008.inst 0x046c9c34 //lsl z20.s,z1.s,123009.inst 0x046c94a5 //lsr z5.s,z5.s,203010.if mixin == 13011ror w12,w12,203012.endif3013.inst 0x046c9529 //lsr z9.s,z9.s,203014.if mixin == 13015ror w13,w13,203016.endif3017.inst 0x046c95ad //lsr z13.s,z13.s,203018.if mixin == 13019ror w14,w14,203020.endif3021.inst 0x046c9421 //lsr z1.s,z1.s,203022.if mixin == 13023ror w11,w11,203024.endif3025.inst 0x047130a5 //orr z5.d,z5.d,z17.d3026.inst 0x04723129 //orr z9.d,z9.d,z18.d3027.inst 0x047331ad //orr z13.d,z13.d,z19.d3028.inst 0x04743021 //orr z1.d,z1.d,z20.d3029.inst 0x04a50000 //add z0.s,z0.s,z5.s3030.if mixin == 13031add w7,w7,w123032.endif3033.inst 0x04a90084 //add z4.s,z4.s,z9.s3034.if mixin == 13035add w8,w8,w133036.endif3037.inst 0x04ad0108 //add z8.s,z8.s,z13.s3038.if mixin == 13039add w9,w9,w143040.endif3041.inst 0x04a1018c //add z12.s,z12.s,z1.s3042.if mixin == 13043add w10,w10,w113044.endif3045.inst 0x04a031ef //eor z15.d,z15.d,z0.d3046.if mixin == 13047eor w22,w22,w73048.endif3049.inst 0x04a43063 //eor z3.d,z3.d,z4.d3050.if mixin == 13051eor w19,w19,w83052.endif3053.inst 0x04a830e7 //eor z7.d,z7.d,z8.d3054.if mixin == 13055eor w20,w20,w93056.endif3057.inst 0x04ac316b //eor z11.d,z11.d,z12.d3058.if mixin == 13059eor w21,w21,w103060.endif3061.inst 0x053f31ef //tbl z15.b,{z15.b},z31.b3062.if mixin == 13063ror w22,w22,#243064.endif3065.inst 0x053f3063 //tbl z3.b,{z3.b},z31.b3066.if mixin == 13067ror w19,w19,#243068.endif3069.inst 0x053f30e7 //tbl z7.b,{z7.b},z31.b3070.if mixin == 13071ror w20,w20,#243072.endif3073.inst 0x053f316b //tbl z11.b,{z11.b},z31.b3074.if mixin == 13075ror w21,w21,#243076.endif3077.inst 0x04af014a //add z10.s,z10.s,z15.s3078.if mixin == 13079add w17,w17,w223080.endif3081.inst 0x04a301ce //add z14.s,z14.s,z3.s3082.if mixin == 13083add w18,w18,w193084.endif3085.inst 0x04a70042 //add z2.s,z2.s,z7.s3086.if mixin == 13087add w15,w15,w203088.endif3089.inst 0x04ab00c6 //add z6.s,z6.s,z11.s3090.if mixin == 13091add w16,w16,w213092.endif3093.inst 0x04aa30a5 //eor z5.d,z5.d,z10.d3094.if mixin == 13095eor w12,w12,w173096.endif3097.inst 0x04ae3129 //eor z9.d,z9.d,z14.d3098.if mixin == 13099eor w13,w13,w183100.endif3101.inst 0x04a231ad //eor z13.d,z13.d,z2.d3102.if mixin == 13103eor w14,w14,w153104.endif3105.inst 0x04a63021 //eor z1.d,z1.d,z6.d3106.if mixin == 13107eor w11,w11,w163108.endif3109.inst 0x04679cb1 //lsl z17.s,z5.s,73110.inst 0x04679d32 //lsl z18.s,z9.s,73111.inst 0x04679db3 //lsl z19.s,z13.s,73112.inst 0x04679c34 //lsl z20.s,z1.s,73113.inst 0x046794a5 //lsr z5.s,z5.s,253114.if mixin == 13115ror w12,w12,253116.endif3117.inst 0x04679529 //lsr z9.s,z9.s,253118.if mixin == 13119ror w13,w13,253120.endif3121.inst 0x046795ad //lsr z13.s,z13.s,253122.if mixin == 13123ror w14,w14,253124.endif3125.inst 0x04679421 //lsr z1.s,z1.s,253126.if mixin == 13127ror w11,w11,253128.endif3129.inst 0x047130a5 //orr z5.d,z5.d,z17.d3130.inst 0x04723129 //orr z9.d,z9.d,z18.d3131.inst 0x047331ad //orr z13.d,z13.d,z19.d3132.inst 0x04743021 //orr z1.d,z1.d,z20.d3133sub x6,x6,13134cbnz x6,10b3135lsr x6,x28,#323136.inst 0x05a03b91 //dup z17.s,w283137.inst 0x05a038d2 //dup z18.s,w63138lsr x6,x29,#323139.inst 0x05a038d3 //dup z19.s,w63140lsr x6,x30,#323141.if mixin == 13142add w7,w7,w233143.endif3144.inst 0x04b90000 //add z0.s,z0.s,z25.s3145.if mixin == 13146add x8,x8,x23,lsr #323147.endif3148.inst 0x04ba0084 //add z4.s,z4.s,z26.s3149.if mixin == 13150add x7,x7,x8,lsl #32 // pack3151.endif3152.if mixin == 13153add w9,w9,w243154.endif3155.inst 0x04bb0108 //add z8.s,z8.s,z27.s3156.if mixin == 13157add x10,x10,x24,lsr #323158.endif3159.inst 0x04bc018c //add z12.s,z12.s,z28.s3160.if mixin == 13161add x9,x9,x10,lsl #32 // pack3162.endif3163.if mixin == 13164ldp x8,x10,[x1],#163165.endif3166.if mixin == 13167add w11,w11,w253168.endif3169.inst 0x04bd0021 //add z1.s,z1.s,z29.s3170.if mixin == 13171add x12,x12,x25,lsr #323172.endif3173.inst 0x04be00a5 //add z5.s,z5.s,z30.s3174.if mixin == 13175add x11,x11,x12,lsl #32 // pack3176.endif3177.if mixin == 13178add w13,w13,w263179.endif3180.inst 0x04b50129 //add z9.s,z9.s,z21.s3181.if mixin == 13182add x14,x14,x26,lsr #323183.endif3184.inst 0x04b601ad //add z13.s,z13.s,z22.s3185.if mixin == 13186add x13,x13,x14,lsl #32 // pack3187.endif3188.if mixin == 13189ldp x12,x14,[x1],#163190.endif3191.if mixin == 13192add w15,w15,w273193.endif3194.inst 0x04b70042 //add z2.s,z2.s,z23.s3195.if mixin == 13196add x16,x16,x27,lsr #323197.endif3198.inst 0x04b800c6 //add z6.s,z6.s,z24.s3199.if mixin == 13200add x15,x15,x16,lsl #32 // pack3201.endif3202.if mixin == 13203add w17,w17,w283204.endif3205.inst 0x04b1014a //add z10.s,z10.s,z17.s3206.if mixin == 13207add x18,x18,x28,lsr #323208.endif3209.inst 0x04b201ce //add z14.s,z14.s,z18.s3210.if mixin == 13211add x17,x17,x18,lsl #32 // pack3212.endif3213.if mixin == 13214ldp x16,x18,[x1],#163215.endif3216.inst 0x05a03bd4 //dup z20.s,w303217.inst 0x05a038d9 //dup z25.s,w6 // bak[15] not available for SVE3218.if mixin == 13219add w19,w19,w293220.endif3221.inst 0x04b00063 //add z3.s,z3.s,z16.s3222.if mixin == 13223add x20,x20,x29,lsr #323224.endif3225.inst 0x04b300e7 //add z7.s,z7.s,z19.s3226.if mixin == 13227add x19,x19,x20,lsl #32 // pack3228.endif3229.if mixin == 13230add w21,w21,w303231.endif3232.inst 0x04b4016b //add z11.s,z11.s,z20.s3233.if mixin == 13234add x22,x22,x30,lsr #323235.endif3236.inst 0x04b901ef //add z15.s,z15.s,z25.s3237.if mixin == 13238add x21,x21,x22,lsl #32 // pack3239.endif3240.if mixin == 13241ldp x20,x22,[x1],#163242.endif3243#ifdef __AARCH64EB__3244rev x7,x73245.inst 0x05a48000 //revb z0.s,p0/m,z0.s3246.inst 0x05a48084 //revb z4.s,p0/m,z4.s3247rev x9,x93248.inst 0x05a48108 //revb z8.s,p0/m,z8.s3249.inst 0x05a4818c //revb z12.s,p0/m,z12.s3250rev x11,x113251.inst 0x05a48021 //revb z1.s,p0/m,z1.s3252.inst 0x05a480a5 //revb z5.s,p0/m,z5.s3253rev x13,x133254.inst 0x05a48129 //revb z9.s,p0/m,z9.s3255.inst 0x05a481ad //revb z13.s,p0/m,z13.s3256rev x15,x153257.inst 0x05a48042 //revb z2.s,p0/m,z2.s3258.inst 0x05a480c6 //revb z6.s,p0/m,z6.s3259rev x17,x173260.inst 0x05a4814a //revb z10.s,p0/m,z10.s3261.inst 0x05a481ce //revb z14.s,p0/m,z14.s3262rev x19,x193263.inst 0x05a48063 //revb z3.s,p0/m,z3.s3264.inst 0x05a480e7 //revb z7.s,p0/m,z7.s3265rev x21,x213266.inst 0x05a4816b //revb z11.s,p0/m,z11.s3267.inst 0x05a481ef //revb z15.s,p0/m,z15.s3268#endif3269.if mixin == 13270add x29,x29,#13271.endif3272cmp x5,43273b.ne 200f3274.if mixin == 13275eor x7,x7,x83276.endif3277.if mixin == 13278eor x9,x9,x103279.endif3280.if mixin == 13281eor x11,x11,x123282.endif3283.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s3284.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s3285.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s3286.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s32873288.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s3289.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s3290.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s3291.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s32923293.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d3294.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d3295.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d3296.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d32973298.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d3299.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d3300.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d3301.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d3302.if mixin == 13303eor x13,x13,x143304.endif3305.if mixin == 13306eor x15,x15,x163307.endif3308.if mixin == 13309eor x17,x17,x183310.endif3311.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s3312.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s3313.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s3314.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s33153316.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s3317.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s3318.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s3319.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s33203321.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d3322.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d3323.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d3324.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d33253326.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d3327.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d3328.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d3329.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d3330.if mixin == 13331eor x19,x19,x203332.endif3333.if mixin == 13334eor x21,x21,x223335.endif3336ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#643337ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#643338.inst 0x04b13000 //eor z0.d,z0.d,z17.d3339.inst 0x04b23021 //eor z1.d,z1.d,z18.d3340.inst 0x04b33042 //eor z2.d,z2.d,z19.d3341.inst 0x04b43063 //eor z3.d,z3.d,z20.d3342.inst 0x04b53084 //eor z4.d,z4.d,z21.d3343.inst 0x04b630a5 //eor z5.d,z5.d,z22.d3344.inst 0x04b730c6 //eor z6.d,z6.d,z23.d3345.inst 0x04b830e7 //eor z7.d,z7.d,z24.d3346ld1 {v17.4s,v18.4s,v19.4s,v20.4s},[x1],#643347ld1 {v21.4s,v22.4s,v23.4s,v24.4s},[x1],#643348.if mixin == 13349stp x7,x9,[x0],#163350.endif3351.inst 0x04b13108 //eor z8.d,z8.d,z17.d3352.inst 0x04b23129 //eor z9.d,z9.d,z18.d3353.if mixin == 13354stp x11,x13,[x0],#163355.endif3356.inst 0x04b3314a //eor z10.d,z10.d,z19.d3357.inst 0x04b4316b //eor z11.d,z11.d,z20.d3358.if mixin == 13359stp x15,x17,[x0],#163360.endif3361.inst 0x04b5318c //eor z12.d,z12.d,z21.d3362.inst 0x04b631ad //eor z13.d,z13.d,z22.d3363.if mixin == 13364stp x19,x21,[x0],#163365.endif3366.inst 0x04b731ce //eor z14.d,z14.d,z23.d3367.inst 0x04b831ef //eor z15.d,z15.d,z24.d3368st1 {v0.4s,v1.4s,v2.4s,v3.4s},[x0],#643369st1 {v4.4s,v5.4s,v6.4s,v7.4s},[x0],#643370st1 {v8.4s,v9.4s,v10.4s,v11.4s},[x0],#643371st1 {v12.4s,v13.4s,v14.4s,v15.4s},[x0],#643372b 210f3373200:3374.inst 0x05a16011 //zip1 z17.s,z0.s,z1.s3375.inst 0x05a16412 //zip2 z18.s,z0.s,z1.s3376.inst 0x05a36053 //zip1 z19.s,z2.s,z3.s3377.inst 0x05a36454 //zip2 z20.s,z2.s,z3.s33783379.inst 0x05a56095 //zip1 z21.s,z4.s,z5.s3380.inst 0x05a56496 //zip2 z22.s,z4.s,z5.s3381.inst 0x05a760d7 //zip1 z23.s,z6.s,z7.s3382.inst 0x05a764d8 //zip2 z24.s,z6.s,z7.s33833384.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d3385.inst 0x05f36621 //zip2 z1.d,z17.d,z19.d3386.inst 0x05f46242 //zip1 z2.d,z18.d,z20.d3387.inst 0x05f46643 //zip2 z3.d,z18.d,z20.d33883389.inst 0x05f762a4 //zip1 z4.d,z21.d,z23.d3390.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d3391.inst 0x05f862c6 //zip1 z6.d,z22.d,z24.d3392.inst 0x05f866c7 //zip2 z7.d,z22.d,z24.d3393.if mixin == 13394eor x7,x7,x83395.endif3396.if mixin == 13397eor x9,x9,x103398.endif3399.inst 0x05a96111 //zip1 z17.s,z8.s,z9.s3400.inst 0x05a96512 //zip2 z18.s,z8.s,z9.s3401.inst 0x05ab6153 //zip1 z19.s,z10.s,z11.s3402.inst 0x05ab6554 //zip2 z20.s,z10.s,z11.s34033404.inst 0x05ad6195 //zip1 z21.s,z12.s,z13.s3405.inst 0x05ad6596 //zip2 z22.s,z12.s,z13.s3406.inst 0x05af61d7 //zip1 z23.s,z14.s,z15.s3407.inst 0x05af65d8 //zip2 z24.s,z14.s,z15.s34083409.inst 0x05f36228 //zip1 z8.d,z17.d,z19.d3410.inst 0x05f36629 //zip2 z9.d,z17.d,z19.d3411.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d3412.inst 0x05f4664b //zip2 z11.d,z18.d,z20.d34133414.inst 0x05f762ac //zip1 z12.d,z21.d,z23.d3415.inst 0x05f766ad //zip2 z13.d,z21.d,z23.d3416.inst 0x05f862ce //zip1 z14.d,z22.d,z24.d3417.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d3418.if mixin == 13419eor x11,x11,x123420.endif3421.if mixin == 13422eor x13,x13,x143423.endif3424.inst 0x05a46011 //zip1 z17.s,z0.s,z4.s3425.inst 0x05a46412 //zip2 z18.s,z0.s,z4.s3426.inst 0x05ac6113 //zip1 z19.s,z8.s,z12.s3427.inst 0x05ac6514 //zip2 z20.s,z8.s,z12.s34283429.inst 0x05a56035 //zip1 z21.s,z1.s,z5.s3430.inst 0x05a56436 //zip2 z22.s,z1.s,z5.s3431.inst 0x05ad6137 //zip1 z23.s,z9.s,z13.s3432.inst 0x05ad6538 //zip2 z24.s,z9.s,z13.s34333434.inst 0x05f36220 //zip1 z0.d,z17.d,z19.d3435.inst 0x05f36624 //zip2 z4.d,z17.d,z19.d3436.inst 0x05f46248 //zip1 z8.d,z18.d,z20.d3437.inst 0x05f4664c //zip2 z12.d,z18.d,z20.d34383439.inst 0x05f762a1 //zip1 z1.d,z21.d,z23.d3440.inst 0x05f766a5 //zip2 z5.d,z21.d,z23.d3441.inst 0x05f862c9 //zip1 z9.d,z22.d,z24.d3442.inst 0x05f866cd //zip2 z13.d,z22.d,z24.d3443.if mixin == 13444eor x15,x15,x163445.endif3446.if mixin == 13447eor x17,x17,x183448.endif3449.inst 0x05a66051 //zip1 z17.s,z2.s,z6.s3450.inst 0x05a66452 //zip2 z18.s,z2.s,z6.s3451.inst 0x05ae6153 //zip1 z19.s,z10.s,z14.s3452.inst 0x05ae6554 //zip2 z20.s,z10.s,z14.s34533454.inst 0x05a76075 //zip1 z21.s,z3.s,z7.s3455.inst 0x05a76476 //zip2 z22.s,z3.s,z7.s3456.inst 0x05af6177 //zip1 z23.s,z11.s,z15.s3457.inst 0x05af6578 //zip2 z24.s,z11.s,z15.s34583459.inst 0x05f36222 //zip1 z2.d,z17.d,z19.d3460.inst 0x05f36626 //zip2 z6.d,z17.d,z19.d3461.inst 0x05f4624a //zip1 z10.d,z18.d,z20.d3462.inst 0x05f4664e //zip2 z14.d,z18.d,z20.d34633464.inst 0x05f762a3 //zip1 z3.d,z21.d,z23.d3465.inst 0x05f766a7 //zip2 z7.d,z21.d,z23.d3466.inst 0x05f862cb //zip1 z11.d,z22.d,z24.d3467.inst 0x05f866cf //zip2 z15.d,z22.d,z24.d3468.if mixin == 13469eor x19,x19,x203470.endif3471.if mixin == 13472eor x21,x21,x223473.endif3474.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]3475.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]3476.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]3477.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]3478.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]3479.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]3480.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]3481.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]3482.inst 0x04215101 //addvl x1,x1,83483.inst 0x04b13000 //eor z0.d,z0.d,z17.d3484.inst 0x04b23084 //eor z4.d,z4.d,z18.d3485.inst 0x04b33108 //eor z8.d,z8.d,z19.d3486.inst 0x04b4318c //eor z12.d,z12.d,z20.d3487.inst 0x04b53021 //eor z1.d,z1.d,z21.d3488.inst 0x04b630a5 //eor z5.d,z5.d,z22.d3489.inst 0x04b73129 //eor z9.d,z9.d,z23.d3490.inst 0x04b831ad //eor z13.d,z13.d,z24.d3491.inst 0xa540a031 //ld1w {z17.s},p0/z,[x1,#0,MUL VL]3492.inst 0xa541a032 //ld1w {z18.s},p0/z,[x1,#1,MUL VL]3493.inst 0xa542a033 //ld1w {z19.s},p0/z,[x1,#2,MUL VL]3494.inst 0xa543a034 //ld1w {z20.s},p0/z,[x1,#3,MUL VL]3495.inst 0xa544a035 //ld1w {z21.s},p0/z,[x1,#4,MUL VL]3496.inst 0xa545a036 //ld1w {z22.s},p0/z,[x1,#5,MUL VL]3497.inst 0xa546a037 //ld1w {z23.s},p0/z,[x1,#6,MUL VL]3498.inst 0xa547a038 //ld1w {z24.s},p0/z,[x1,#7,MUL VL]3499.inst 0x04215101 //addvl x1,x1,83500.if mixin == 13501stp x7,x9,[x0],#163502.endif3503.inst 0x04b13042 //eor z2.d,z2.d,z17.d3504.inst 0x04b230c6 //eor z6.d,z6.d,z18.d3505.if mixin == 13506stp x11,x13,[x0],#163507.endif3508.inst 0x04b3314a //eor z10.d,z10.d,z19.d3509.inst 0x04b431ce //eor z14.d,z14.d,z20.d3510.if mixin == 13511stp x15,x17,[x0],#163512.endif3513.inst 0x04b53063 //eor z3.d,z3.d,z21.d3514.inst 0x04b630e7 //eor z7.d,z7.d,z22.d3515.if mixin == 13516stp x19,x21,[x0],#163517.endif3518.inst 0x04b7316b //eor z11.d,z11.d,z23.d3519.inst 0x04b831ef //eor z15.d,z15.d,z24.d3520.inst 0xe540e000 //st1w {z0.s},p0,[x0,#0,MUL VL]3521.inst 0xe541e004 //st1w {z4.s},p0,[x0,#1,MUL VL]3522.inst 0xe542e008 //st1w {z8.s},p0,[x0,#2,MUL VL]3523.inst 0xe543e00c //st1w {z12.s},p0,[x0,#3,MUL VL]3524.inst 0xe544e001 //st1w {z1.s},p0,[x0,#4,MUL VL]3525.inst 0xe545e005 //st1w {z5.s},p0,[x0,#5,MUL VL]3526.inst 0xe546e009 //st1w {z9.s},p0,[x0,#6,MUL VL]3527.inst 0xe547e00d //st1w {z13.s},p0,[x0,#7,MUL VL]3528.inst 0x04205100 //addvl x0,x0,83529.inst 0xe540e002 //st1w {z2.s},p0,[x0,#0,MUL VL]3530.inst 0xe541e006 //st1w {z6.s},p0,[x0,#1,MUL VL]3531.inst 0xe542e00a //st1w {z10.s},p0,[x0,#2,MUL VL]3532.inst 0xe543e00e //st1w {z14.s},p0,[x0,#3,MUL VL]3533.inst 0xe544e003 //st1w {z3.s},p0,[x0,#4,MUL VL]3534.inst 0xe545e007 //st1w {z7.s},p0,[x0,#5,MUL VL]3535.inst 0xe546e00b //st1w {z11.s},p0,[x0,#6,MUL VL]3536.inst 0xe547e00f //st1w {z15.s},p0,[x0,#7,MUL VL]3537.inst 0x04205100 //addvl x0,x0,83538210:3539.inst 0x04b0e3fd //incw x29, ALL, MUL #13540110:35412:3542str w29,[x4]3543ldp d10,d11,[sp,16]3544ldp d12,d13,[sp,32]3545ldp d14,d15,[sp,48]3546ldp x16,x17,[sp,64]3547ldp x18,x19,[sp,80]3548ldp x20,x21,[sp,96]3549ldp x22,x23,[sp,112]3550ldp x24,x25,[sp,128]3551ldp x26,x27,[sp,144]3552ldp x28,x29,[sp,160]3553ldr x30,[sp,176]3554ldp d8,d9,[sp],1923555AARCH64_VALIDATE_LINK_REGISTER3556.Lreturn:3557ret3558.size ChaCha20_ctr32_sve,.-ChaCha20_ctr32_sve355935603561