Path: blob/main/sys/crypto/openssl/aarch64/sm3-armv8.S
39536 views
/* Do not modify. This file is auto-generated from sm3-armv8.pl. */1// Copyright 2021-2025 The OpenSSL Project Authors. All Rights Reserved.2//3// Licensed under the Apache License 2.0 (the "License"). You may not use4// this file except in compliance with the License. You can obtain a copy5// in the file LICENSE in the source distribution or at6// https://www.openssl.org/source/license.html7//8// This module implements support for Armv8 SM3 instructions910// $output is the last argument if it looks like a file (it has an extension)11// $flavour is the first argument if it doesn't look like a file12#include "arm_arch.h"13.text14.globl ossl_hwsm3_block_data_order15.type ossl_hwsm3_block_data_order,%function16.align 517ossl_hwsm3_block_data_order:18AARCH64_VALID_CALL_TARGET19// load state20ld1 {v5.4s,v6.4s}, [x0]21rev64 v5.4s, v5.4s22rev64 v6.4s, v6.4s23ext v5.16b, v5.16b, v5.16b, #824ext v6.16b, v6.16b, v6.16b, #825adrp x8, .Tj26add x8, x8, #:lo12:.Tj27ldp s16, s17, [x8]2829.Loop:30// load input31ld1 {v0.4s,v1.4s,v2.4s,v3.4s}, [x1], #6432sub w2, w2, #13334mov v18.16b, v5.16b35mov v19.16b, v6.16b3637#ifndef __AARCH64EB__38rev32 v0.16b, v0.16b39rev32 v1.16b, v1.16b40rev32 v2.16b, v2.16b41rev32 v3.16b, v3.16b42#endif4344ext v20.16b, v16.16b, v16.16b, #445// s4 = w7 | w8 | w9 | w1046ext v4.16b, v1.16b, v2.16b, #1247// vtmp1 = w3 | w4 | w5 | w648ext v22.16b, v0.16b, v1.16b, #1249// vtmp2 = w10 | w11 | w12 | w1350ext v23.16b, v2.16b, v3.16b, #851.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s52.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s53eor v22.16b, v0.16b, v1.16b54.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s55shl v21.4s, v20.4s, #156sri v21.4s, v20.4s, #3157.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0]58.inst 0xce408ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[0]59.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s60shl v20.4s, v21.4s, #161sri v20.4s, v21.4s, #3162.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1]63.inst 0xce409ae6 //sm3tt2a v6.4s, v23.4s, v0.4s[1]64.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s65shl v21.4s, v20.4s, #166sri v21.4s, v20.4s, #3167.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2]68.inst 0xce40aae6 //sm3tt2a v6.4s, v23.4s, v0.4s[2]69.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s70shl v20.4s, v21.4s, #171sri v20.4s, v21.4s, #3172.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3]73.inst 0xce40bae6 //sm3tt2a v6.4s, v23.4s, v0.4s[3]74// s4 = w7 | w8 | w9 | w1075ext v0.16b, v2.16b, v3.16b, #1276// vtmp1 = w3 | w4 | w5 | w677ext v22.16b, v1.16b, v2.16b, #1278// vtmp2 = w10 | w11 | w12 | w1379ext v23.16b, v3.16b, v4.16b, #880.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s81.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s82eor v22.16b, v1.16b, v2.16b83.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s84shl v21.4s, v20.4s, #185sri v21.4s, v20.4s, #3186.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0]87.inst 0xce418ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[0]88.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s89shl v20.4s, v21.4s, #190sri v20.4s, v21.4s, #3191.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1]92.inst 0xce419ae6 //sm3tt2a v6.4s, v23.4s, v1.4s[1]93.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s94shl v21.4s, v20.4s, #195sri v21.4s, v20.4s, #3196.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2]97.inst 0xce41aae6 //sm3tt2a v6.4s, v23.4s, v1.4s[2]98.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s99shl v20.4s, v21.4s, #1100sri v20.4s, v21.4s, #31101.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3]102.inst 0xce41bae6 //sm3tt2a v6.4s, v23.4s, v1.4s[3]103// s4 = w7 | w8 | w9 | w10104ext v1.16b, v3.16b, v4.16b, #12105// vtmp1 = w3 | w4 | w5 | w6106ext v22.16b, v2.16b, v3.16b, #12107// vtmp2 = w10 | w11 | w12 | w13108ext v23.16b, v4.16b, v0.16b, #8109.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s110.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s111eor v22.16b, v2.16b, v3.16b112.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s113shl v21.4s, v20.4s, #1114sri v21.4s, v20.4s, #31115.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0]116.inst 0xce428ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[0]117.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s118shl v20.4s, v21.4s, #1119sri v20.4s, v21.4s, #31120.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1]121.inst 0xce429ae6 //sm3tt2a v6.4s, v23.4s, v2.4s[1]122.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s123shl v21.4s, v20.4s, #1124sri v21.4s, v20.4s, #31125.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2]126.inst 0xce42aae6 //sm3tt2a v6.4s, v23.4s, v2.4s[2]127.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s128shl v20.4s, v21.4s, #1129sri v20.4s, v21.4s, #31130.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3]131.inst 0xce42bae6 //sm3tt2a v6.4s, v23.4s, v2.4s[3]132// s4 = w7 | w8 | w9 | w10133ext v2.16b, v4.16b, v0.16b, #12134// vtmp1 = w3 | w4 | w5 | w6135ext v22.16b, v3.16b, v4.16b, #12136// vtmp2 = w10 | w11 | w12 | w13137ext v23.16b, v0.16b, v1.16b, #8138.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s139.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s140eor v22.16b, v3.16b, v4.16b141.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s142shl v21.4s, v20.4s, #1143sri v21.4s, v20.4s, #31144.inst 0xce5682e5 //sm3tt1a v5.4s, v23.4s, v22.4s[0]145.inst 0xce438ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[0]146.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s147shl v20.4s, v21.4s, #1148sri v20.4s, v21.4s, #31149.inst 0xce5692e5 //sm3tt1a v5.4s, v23.4s, v22.4s[1]150.inst 0xce439ae6 //sm3tt2a v6.4s, v23.4s, v3.4s[1]151.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s152shl v21.4s, v20.4s, #1153sri v21.4s, v20.4s, #31154.inst 0xce56a2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[2]155.inst 0xce43aae6 //sm3tt2a v6.4s, v23.4s, v3.4s[2]156.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s157shl v20.4s, v21.4s, #1158sri v20.4s, v21.4s, #31159.inst 0xce56b2e5 //sm3tt1a v5.4s, v23.4s, v22.4s[3]160.inst 0xce43bae6 //sm3tt2a v6.4s, v23.4s, v3.4s[3]161ext v20.16b, v17.16b, v17.16b, #4162// s4 = w7 | w8 | w9 | w10163ext v3.16b, v0.16b, v1.16b, #12164// vtmp1 = w3 | w4 | w5 | w6165ext v22.16b, v4.16b, v0.16b, #12166// vtmp2 = w10 | w11 | w12 | w13167ext v23.16b, v1.16b, v2.16b, #8168.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s169.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s170eor v22.16b, v4.16b, v0.16b171.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s172shl v21.4s, v20.4s, #1173sri v21.4s, v20.4s, #31174.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]175.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0]176.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s177shl v20.4s, v21.4s, #1178sri v20.4s, v21.4s, #31179.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]180.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1]181.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s182shl v21.4s, v20.4s, #1183sri v21.4s, v20.4s, #31184.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]185.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2]186.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s187shl v20.4s, v21.4s, #1188sri v20.4s, v21.4s, #31189.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]190.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3]191// s4 = w7 | w8 | w9 | w10192ext v4.16b, v1.16b, v2.16b, #12193// vtmp1 = w3 | w4 | w5 | w6194ext v22.16b, v0.16b, v1.16b, #12195// vtmp2 = w10 | w11 | w12 | w13196ext v23.16b, v2.16b, v3.16b, #8197.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s198.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s199eor v22.16b, v0.16b, v1.16b200.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s201shl v21.4s, v20.4s, #1202sri v21.4s, v20.4s, #31203.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]204.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0]205.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s206shl v20.4s, v21.4s, #1207sri v20.4s, v21.4s, #31208.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]209.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1]210.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s211shl v21.4s, v20.4s, #1212sri v21.4s, v20.4s, #31213.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]214.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2]215.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s216shl v20.4s, v21.4s, #1217sri v20.4s, v21.4s, #31218.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]219.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3]220// s4 = w7 | w8 | w9 | w10221ext v0.16b, v2.16b, v3.16b, #12222// vtmp1 = w3 | w4 | w5 | w6223ext v22.16b, v1.16b, v2.16b, #12224// vtmp2 = w10 | w11 | w12 | w13225ext v23.16b, v3.16b, v4.16b, #8226.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s227.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s228eor v22.16b, v1.16b, v2.16b229.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s230shl v21.4s, v20.4s, #1231sri v21.4s, v20.4s, #31232.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]233.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0]234.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s235shl v20.4s, v21.4s, #1236sri v20.4s, v21.4s, #31237.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]238.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1]239.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s240shl v21.4s, v20.4s, #1241sri v21.4s, v20.4s, #31242.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]243.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2]244.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s245shl v20.4s, v21.4s, #1246sri v20.4s, v21.4s, #31247.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]248.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3]249// s4 = w7 | w8 | w9 | w10250ext v1.16b, v3.16b, v4.16b, #12251// vtmp1 = w3 | w4 | w5 | w6252ext v22.16b, v2.16b, v3.16b, #12253// vtmp2 = w10 | w11 | w12 | w13254ext v23.16b, v4.16b, v0.16b, #8255.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s256.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s257eor v22.16b, v2.16b, v3.16b258.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s259shl v21.4s, v20.4s, #1260sri v21.4s, v20.4s, #31261.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]262.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0]263.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s264shl v20.4s, v21.4s, #1265sri v20.4s, v21.4s, #31266.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]267.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1]268.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s269shl v21.4s, v20.4s, #1270sri v21.4s, v20.4s, #31271.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]272.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2]273.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s274shl v20.4s, v21.4s, #1275sri v20.4s, v21.4s, #31276.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]277.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3]278// s4 = w7 | w8 | w9 | w10279ext v2.16b, v4.16b, v0.16b, #12280// vtmp1 = w3 | w4 | w5 | w6281ext v22.16b, v3.16b, v4.16b, #12282// vtmp2 = w10 | w11 | w12 | w13283ext v23.16b, v0.16b, v1.16b, #8284.inst 0xce61c062 //sm3partw1 v2.4s, v3.4s, v1.4s285.inst 0xce76c6e2 //sm3partw2 v2.4s, v23.4s, v22.4s286eor v22.16b, v3.16b, v4.16b287.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s288shl v21.4s, v20.4s, #1289sri v21.4s, v20.4s, #31290.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]291.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0]292.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s293shl v20.4s, v21.4s, #1294sri v20.4s, v21.4s, #31295.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]296.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1]297.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s298shl v21.4s, v20.4s, #1299sri v21.4s, v20.4s, #31300.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]301.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2]302.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s303shl v20.4s, v21.4s, #1304sri v20.4s, v21.4s, #31305.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]306.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3]307// s4 = w7 | w8 | w9 | w10308ext v3.16b, v0.16b, v1.16b, #12309// vtmp1 = w3 | w4 | w5 | w6310ext v22.16b, v4.16b, v0.16b, #12311// vtmp2 = w10 | w11 | w12 | w13312ext v23.16b, v1.16b, v2.16b, #8313.inst 0xce62c083 //sm3partw1 v3.4s, v4.4s, v2.4s314.inst 0xce76c6e3 //sm3partw2 v3.4s, v23.4s, v22.4s315eor v22.16b, v4.16b, v0.16b316.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s317shl v21.4s, v20.4s, #1318sri v21.4s, v20.4s, #31319.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]320.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0]321.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s322shl v20.4s, v21.4s, #1323sri v20.4s, v21.4s, #31324.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]325.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1]326.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s327shl v21.4s, v20.4s, #1328sri v21.4s, v20.4s, #31329.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]330.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2]331.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s332shl v20.4s, v21.4s, #1333sri v20.4s, v21.4s, #31334.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]335.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3]336// s4 = w7 | w8 | w9 | w10337ext v4.16b, v1.16b, v2.16b, #12338// vtmp1 = w3 | w4 | w5 | w6339ext v22.16b, v0.16b, v1.16b, #12340// vtmp2 = w10 | w11 | w12 | w13341ext v23.16b, v2.16b, v3.16b, #8342.inst 0xce63c004 //sm3partw1 v4.4s, v0.4s, v3.4s343.inst 0xce76c6e4 //sm3partw2 v4.4s, v23.4s, v22.4s344eor v22.16b, v0.16b, v1.16b345.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s346shl v21.4s, v20.4s, #1347sri v21.4s, v20.4s, #31348.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]349.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0]350.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s351shl v20.4s, v21.4s, #1352sri v20.4s, v21.4s, #31353.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]354.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1]355.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s356shl v21.4s, v20.4s, #1357sri v21.4s, v20.4s, #31358.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]359.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2]360.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s361shl v20.4s, v21.4s, #1362sri v20.4s, v21.4s, #31363.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]364.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3]365// s4 = w7 | w8 | w9 | w10366ext v0.16b, v2.16b, v3.16b, #12367// vtmp1 = w3 | w4 | w5 | w6368ext v22.16b, v1.16b, v2.16b, #12369// vtmp2 = w10 | w11 | w12 | w13370ext v23.16b, v3.16b, v4.16b, #8371.inst 0xce64c020 //sm3partw1 v0.4s, v1.4s, v4.4s372.inst 0xce76c6e0 //sm3partw2 v0.4s, v23.4s, v22.4s373eor v22.16b, v1.16b, v2.16b374.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s375shl v21.4s, v20.4s, #1376sri v21.4s, v20.4s, #31377.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]378.inst 0xce418ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[0]379.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s380shl v20.4s, v21.4s, #1381sri v20.4s, v21.4s, #31382.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]383.inst 0xce419ee6 //sm3tt2b v6.4s, v23.4s, v1.4s[1]384.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s385shl v21.4s, v20.4s, #1386sri v21.4s, v20.4s, #31387.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]388.inst 0xce41aee6 //sm3tt2b v6.4s, v23.4s, v1.4s[2]389.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s390shl v20.4s, v21.4s, #1391sri v20.4s, v21.4s, #31392.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]393.inst 0xce41bee6 //sm3tt2b v6.4s, v23.4s, v1.4s[3]394// s4 = w7 | w8 | w9 | w10395ext v1.16b, v3.16b, v4.16b, #12396// vtmp1 = w3 | w4 | w5 | w6397ext v22.16b, v2.16b, v3.16b, #12398// vtmp2 = w10 | w11 | w12 | w13399ext v23.16b, v4.16b, v0.16b, #8400.inst 0xce60c041 //sm3partw1 v1.4s, v2.4s, v0.4s401.inst 0xce76c6e1 //sm3partw2 v1.4s, v23.4s, v22.4s402eor v22.16b, v2.16b, v3.16b403.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s404shl v21.4s, v20.4s, #1405sri v21.4s, v20.4s, #31406.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]407.inst 0xce428ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[0]408.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s409shl v20.4s, v21.4s, #1410sri v20.4s, v21.4s, #31411.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]412.inst 0xce429ee6 //sm3tt2b v6.4s, v23.4s, v2.4s[1]413.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s414shl v21.4s, v20.4s, #1415sri v21.4s, v20.4s, #31416.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]417.inst 0xce42aee6 //sm3tt2b v6.4s, v23.4s, v2.4s[2]418.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s419shl v20.4s, v21.4s, #1420sri v20.4s, v21.4s, #31421.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]422.inst 0xce42bee6 //sm3tt2b v6.4s, v23.4s, v2.4s[3]423eor v22.16b, v3.16b, v4.16b424.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s425shl v21.4s, v20.4s, #1426sri v21.4s, v20.4s, #31427.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]428.inst 0xce438ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[0]429.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s430shl v20.4s, v21.4s, #1431sri v20.4s, v21.4s, #31432.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]433.inst 0xce439ee6 //sm3tt2b v6.4s, v23.4s, v3.4s[1]434.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s435shl v21.4s, v20.4s, #1436sri v21.4s, v20.4s, #31437.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]438.inst 0xce43aee6 //sm3tt2b v6.4s, v23.4s, v3.4s[2]439.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s440shl v20.4s, v21.4s, #1441sri v20.4s, v21.4s, #31442.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]443.inst 0xce43bee6 //sm3tt2b v6.4s, v23.4s, v3.4s[3]444eor v22.16b, v4.16b, v0.16b445.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s446shl v21.4s, v20.4s, #1447sri v21.4s, v20.4s, #31448.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]449.inst 0xce448ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[0]450.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s451shl v20.4s, v21.4s, #1452sri v20.4s, v21.4s, #31453.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]454.inst 0xce449ee6 //sm3tt2b v6.4s, v23.4s, v4.4s[1]455.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s456shl v21.4s, v20.4s, #1457sri v21.4s, v20.4s, #31458.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]459.inst 0xce44aee6 //sm3tt2b v6.4s, v23.4s, v4.4s[2]460.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s461shl v20.4s, v21.4s, #1462sri v20.4s, v21.4s, #31463.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]464.inst 0xce44bee6 //sm3tt2b v6.4s, v23.4s, v4.4s[3]465eor v22.16b, v0.16b, v1.16b466.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s467shl v21.4s, v20.4s, #1468sri v21.4s, v20.4s, #31469.inst 0xce5686e5 //sm3tt1b v5.4s, v23.4s, v22.4s[0]470.inst 0xce408ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[0]471.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s472shl v20.4s, v21.4s, #1473sri v20.4s, v21.4s, #31474.inst 0xce5696e5 //sm3tt1b v5.4s, v23.4s, v22.4s[1]475.inst 0xce409ee6 //sm3tt2b v6.4s, v23.4s, v0.4s[1]476.inst 0xce5418b7 //sm3ss1 v23.4s, v5.4s, v20.4s, v6.4s477shl v21.4s, v20.4s, #1478sri v21.4s, v20.4s, #31479.inst 0xce56a6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[2]480.inst 0xce40aee6 //sm3tt2b v6.4s, v23.4s, v0.4s[2]481.inst 0xce5518b7 //sm3ss1 v23.4s, v5.4s, v21.4s, v6.4s482shl v20.4s, v21.4s, #1483sri v20.4s, v21.4s, #31484.inst 0xce56b6e5 //sm3tt1b v5.4s, v23.4s, v22.4s[3]485.inst 0xce40bee6 //sm3tt2b v6.4s, v23.4s, v0.4s[3]486eor v5.16b, v5.16b, v18.16b487eor v6.16b, v6.16b, v19.16b488489// any remained blocks?490cbnz w2, .Loop491492// save state493rev64 v5.4s, v5.4s494rev64 v6.4s, v6.4s495ext v5.16b, v5.16b, v5.16b, #8496ext v6.16b, v6.16b, v6.16b, #8497st1 {v5.4s,v6.4s}, [x0]498ret499.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order500.section .rodata501502.type _sm3_consts,%object503.align 3504_sm3_consts:505.Tj:506.word 0x79cc4519, 0x9d8a7a87507.size _sm3_consts,.-_sm3_consts508.previous509510511