Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S
48775 views
1
// SPDX-License-Identifier: Apache-2.0
2
/*
3
* Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
4
*
5
* Licensed under the Apache License, Version 2.0 (the "License");
6
* you may not use this file except in compliance with the License.
7
* You may obtain a copy of the License at
8
*
9
* https://www.apache.org/licenses/LICENSE-2.0
10
*
11
* Unless required by applicable law or agreed to in writing, software
12
* distributed under the License is distributed on an "AS IS" BASIS,
13
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
* See the License for the specific language governing permissions and
15
* limitations under the License.
16
*/
17
18
/*
19
* Portions Copyright (c) 2022 Tino Reichardt <[email protected]>
20
* - modified assembly to fit into OpenZFS
21
*/
22
23
#if defined(__aarch64__)
24
25
.section .note.gnu.property,"a",@note
26
.p2align 3
27
.word 4
28
.word 16
29
.word 5
30
.asciz "GNU"
31
.word 3221225472
32
.word 4
33
.word 3
34
.word 0
35
.text
36
37
.align 6
38
.type .LK512,%object
39
.LK512:
40
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
41
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
42
.quad 0x3956c25bf348b538,0x59f111f1b605d019
43
.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
44
.quad 0xd807aa98a3030242,0x12835b0145706fbe
45
.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
46
.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
47
.quad 0x9bdc06a725c71235,0xc19bf174cf692694
48
.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
49
.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
50
.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
51
.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
52
.quad 0x983e5152ee66dfab,0xa831c66d2db43210
53
.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
54
.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
55
.quad 0x06ca6351e003826f,0x142929670a0e6e70
56
.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
57
.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
58
.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
59
.quad 0x81c2c92e47edaee6,0x92722c851482353b
60
.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
61
.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
62
.quad 0xd192e819d6ef5218,0xd69906245565a910
63
.quad 0xf40e35855771202a,0x106aa07032bbd1b8
64
.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
65
.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
66
.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
67
.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
68
.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
69
.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
70
.quad 0x90befffa23631e28,0xa4506cebde82bde9
71
.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
72
.quad 0xca273eceea26619c,0xd186b8c721c0c207
73
.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
74
.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
75
.quad 0x113f9804bef90dae,0x1b710b35131c471b
76
.quad 0x28db77f523047d84,0x32caab7b40c72493
77
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
78
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
79
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
80
.quad 0 // terminator
81
.size .LK512,.-.LK512
82
83
.globl zfs_sha512_block_armv7
84
.type zfs_sha512_block_armv7,%function
85
.align 6
86
zfs_sha512_block_armv7:
87
hint #34 // bti c
88
stp x29,x30,[sp,#-128]!
89
add x29,sp,#0
90
91
stp x19,x20,[sp,#16]
92
stp x21,x22,[sp,#32]
93
stp x23,x24,[sp,#48]
94
stp x25,x26,[sp,#64]
95
stp x27,x28,[sp,#80]
96
sub sp,sp,#4*8
97
98
ldp x20,x21,[x0] // load context
99
ldp x22,x23,[x0,#2*8]
100
ldp x24,x25,[x0,#4*8]
101
add x2,x1,x2,lsl#7 // end of input
102
ldp x26,x27,[x0,#6*8]
103
adr x30,.LK512
104
stp x0,x2,[x29,#96]
105
106
.Loop:
107
ldp x3,x4,[x1],#2*8
108
ldr x19,[x30],#8 // *K++
109
eor x28,x21,x22 // magic seed
110
str x1,[x29,#112]
111
#ifndef __AARCH64EB__
112
rev x3,x3 // 0
113
#endif
114
ror x16,x24,#14
115
add x27,x27,x19 // h+=K[i]
116
eor x6,x24,x24,ror#23
117
and x17,x25,x24
118
bic x19,x26,x24
119
add x27,x27,x3 // h+=X[i]
120
orr x17,x17,x19 // Ch(e,f,g)
121
eor x19,x20,x21 // a^b, b^c in next round
122
eor x16,x16,x6,ror#18 // Sigma1(e)
123
ror x6,x20,#28
124
add x27,x27,x17 // h+=Ch(e,f,g)
125
eor x17,x20,x20,ror#5
126
add x27,x27,x16 // h+=Sigma1(e)
127
and x28,x28,x19 // (b^c)&=(a^b)
128
add x23,x23,x27 // d+=h
129
eor x28,x28,x21 // Maj(a,b,c)
130
eor x17,x6,x17,ror#34 // Sigma0(a)
131
add x27,x27,x28 // h+=Maj(a,b,c)
132
ldr x28,[x30],#8 // *K++, x19 in next round
133
//add x27,x27,x17 // h+=Sigma0(a)
134
#ifndef __AARCH64EB__
135
rev x4,x4 // 1
136
#endif
137
ldp x5,x6,[x1],#2*8
138
add x27,x27,x17 // h+=Sigma0(a)
139
ror x16,x23,#14
140
add x26,x26,x28 // h+=K[i]
141
eor x7,x23,x23,ror#23
142
and x17,x24,x23
143
bic x28,x25,x23
144
add x26,x26,x4 // h+=X[i]
145
orr x17,x17,x28 // Ch(e,f,g)
146
eor x28,x27,x20 // a^b, b^c in next round
147
eor x16,x16,x7,ror#18 // Sigma1(e)
148
ror x7,x27,#28
149
add x26,x26,x17 // h+=Ch(e,f,g)
150
eor x17,x27,x27,ror#5
151
add x26,x26,x16 // h+=Sigma1(e)
152
and x19,x19,x28 // (b^c)&=(a^b)
153
add x22,x22,x26 // d+=h
154
eor x19,x19,x20 // Maj(a,b,c)
155
eor x17,x7,x17,ror#34 // Sigma0(a)
156
add x26,x26,x19 // h+=Maj(a,b,c)
157
ldr x19,[x30],#8 // *K++, x28 in next round
158
//add x26,x26,x17 // h+=Sigma0(a)
159
#ifndef __AARCH64EB__
160
rev x5,x5 // 2
161
#endif
162
add x26,x26,x17 // h+=Sigma0(a)
163
ror x16,x22,#14
164
add x25,x25,x19 // h+=K[i]
165
eor x8,x22,x22,ror#23
166
and x17,x23,x22
167
bic x19,x24,x22
168
add x25,x25,x5 // h+=X[i]
169
orr x17,x17,x19 // Ch(e,f,g)
170
eor x19,x26,x27 // a^b, b^c in next round
171
eor x16,x16,x8,ror#18 // Sigma1(e)
172
ror x8,x26,#28
173
add x25,x25,x17 // h+=Ch(e,f,g)
174
eor x17,x26,x26,ror#5
175
add x25,x25,x16 // h+=Sigma1(e)
176
and x28,x28,x19 // (b^c)&=(a^b)
177
add x21,x21,x25 // d+=h
178
eor x28,x28,x27 // Maj(a,b,c)
179
eor x17,x8,x17,ror#34 // Sigma0(a)
180
add x25,x25,x28 // h+=Maj(a,b,c)
181
ldr x28,[x30],#8 // *K++, x19 in next round
182
//add x25,x25,x17 // h+=Sigma0(a)
183
#ifndef __AARCH64EB__
184
rev x6,x6 // 3
185
#endif
186
ldp x7,x8,[x1],#2*8
187
add x25,x25,x17 // h+=Sigma0(a)
188
ror x16,x21,#14
189
add x24,x24,x28 // h+=K[i]
190
eor x9,x21,x21,ror#23
191
and x17,x22,x21
192
bic x28,x23,x21
193
add x24,x24,x6 // h+=X[i]
194
orr x17,x17,x28 // Ch(e,f,g)
195
eor x28,x25,x26 // a^b, b^c in next round
196
eor x16,x16,x9,ror#18 // Sigma1(e)
197
ror x9,x25,#28
198
add x24,x24,x17 // h+=Ch(e,f,g)
199
eor x17,x25,x25,ror#5
200
add x24,x24,x16 // h+=Sigma1(e)
201
and x19,x19,x28 // (b^c)&=(a^b)
202
add x20,x20,x24 // d+=h
203
eor x19,x19,x26 // Maj(a,b,c)
204
eor x17,x9,x17,ror#34 // Sigma0(a)
205
add x24,x24,x19 // h+=Maj(a,b,c)
206
ldr x19,[x30],#8 // *K++, x28 in next round
207
//add x24,x24,x17 // h+=Sigma0(a)
208
#ifndef __AARCH64EB__
209
rev x7,x7 // 4
210
#endif
211
add x24,x24,x17 // h+=Sigma0(a)
212
ror x16,x20,#14
213
add x23,x23,x19 // h+=K[i]
214
eor x10,x20,x20,ror#23
215
and x17,x21,x20
216
bic x19,x22,x20
217
add x23,x23,x7 // h+=X[i]
218
orr x17,x17,x19 // Ch(e,f,g)
219
eor x19,x24,x25 // a^b, b^c in next round
220
eor x16,x16,x10,ror#18 // Sigma1(e)
221
ror x10,x24,#28
222
add x23,x23,x17 // h+=Ch(e,f,g)
223
eor x17,x24,x24,ror#5
224
add x23,x23,x16 // h+=Sigma1(e)
225
and x28,x28,x19 // (b^c)&=(a^b)
226
add x27,x27,x23 // d+=h
227
eor x28,x28,x25 // Maj(a,b,c)
228
eor x17,x10,x17,ror#34 // Sigma0(a)
229
add x23,x23,x28 // h+=Maj(a,b,c)
230
ldr x28,[x30],#8 // *K++, x19 in next round
231
//add x23,x23,x17 // h+=Sigma0(a)
232
#ifndef __AARCH64EB__
233
rev x8,x8 // 5
234
#endif
235
ldp x9,x10,[x1],#2*8
236
add x23,x23,x17 // h+=Sigma0(a)
237
ror x16,x27,#14
238
add x22,x22,x28 // h+=K[i]
239
eor x11,x27,x27,ror#23
240
and x17,x20,x27
241
bic x28,x21,x27
242
add x22,x22,x8 // h+=X[i]
243
orr x17,x17,x28 // Ch(e,f,g)
244
eor x28,x23,x24 // a^b, b^c in next round
245
eor x16,x16,x11,ror#18 // Sigma1(e)
246
ror x11,x23,#28
247
add x22,x22,x17 // h+=Ch(e,f,g)
248
eor x17,x23,x23,ror#5
249
add x22,x22,x16 // h+=Sigma1(e)
250
and x19,x19,x28 // (b^c)&=(a^b)
251
add x26,x26,x22 // d+=h
252
eor x19,x19,x24 // Maj(a,b,c)
253
eor x17,x11,x17,ror#34 // Sigma0(a)
254
add x22,x22,x19 // h+=Maj(a,b,c)
255
ldr x19,[x30],#8 // *K++, x28 in next round
256
//add x22,x22,x17 // h+=Sigma0(a)
257
#ifndef __AARCH64EB__
258
rev x9,x9 // 6
259
#endif
260
add x22,x22,x17 // h+=Sigma0(a)
261
ror x16,x26,#14
262
add x21,x21,x19 // h+=K[i]
263
eor x12,x26,x26,ror#23
264
and x17,x27,x26
265
bic x19,x20,x26
266
add x21,x21,x9 // h+=X[i]
267
orr x17,x17,x19 // Ch(e,f,g)
268
eor x19,x22,x23 // a^b, b^c in next round
269
eor x16,x16,x12,ror#18 // Sigma1(e)
270
ror x12,x22,#28
271
add x21,x21,x17 // h+=Ch(e,f,g)
272
eor x17,x22,x22,ror#5
273
add x21,x21,x16 // h+=Sigma1(e)
274
and x28,x28,x19 // (b^c)&=(a^b)
275
add x25,x25,x21 // d+=h
276
eor x28,x28,x23 // Maj(a,b,c)
277
eor x17,x12,x17,ror#34 // Sigma0(a)
278
add x21,x21,x28 // h+=Maj(a,b,c)
279
ldr x28,[x30],#8 // *K++, x19 in next round
280
//add x21,x21,x17 // h+=Sigma0(a)
281
#ifndef __AARCH64EB__
282
rev x10,x10 // 7
283
#endif
284
ldp x11,x12,[x1],#2*8
285
add x21,x21,x17 // h+=Sigma0(a)
286
ror x16,x25,#14
287
add x20,x20,x28 // h+=K[i]
288
eor x13,x25,x25,ror#23
289
and x17,x26,x25
290
bic x28,x27,x25
291
add x20,x20,x10 // h+=X[i]
292
orr x17,x17,x28 // Ch(e,f,g)
293
eor x28,x21,x22 // a^b, b^c in next round
294
eor x16,x16,x13,ror#18 // Sigma1(e)
295
ror x13,x21,#28
296
add x20,x20,x17 // h+=Ch(e,f,g)
297
eor x17,x21,x21,ror#5
298
add x20,x20,x16 // h+=Sigma1(e)
299
and x19,x19,x28 // (b^c)&=(a^b)
300
add x24,x24,x20 // d+=h
301
eor x19,x19,x22 // Maj(a,b,c)
302
eor x17,x13,x17,ror#34 // Sigma0(a)
303
add x20,x20,x19 // h+=Maj(a,b,c)
304
ldr x19,[x30],#8 // *K++, x28 in next round
305
//add x20,x20,x17 // h+=Sigma0(a)
306
#ifndef __AARCH64EB__
307
rev x11,x11 // 8
308
#endif
309
add x20,x20,x17 // h+=Sigma0(a)
310
ror x16,x24,#14
311
add x27,x27,x19 // h+=K[i]
312
eor x14,x24,x24,ror#23
313
and x17,x25,x24
314
bic x19,x26,x24
315
add x27,x27,x11 // h+=X[i]
316
orr x17,x17,x19 // Ch(e,f,g)
317
eor x19,x20,x21 // a^b, b^c in next round
318
eor x16,x16,x14,ror#18 // Sigma1(e)
319
ror x14,x20,#28
320
add x27,x27,x17 // h+=Ch(e,f,g)
321
eor x17,x20,x20,ror#5
322
add x27,x27,x16 // h+=Sigma1(e)
323
and x28,x28,x19 // (b^c)&=(a^b)
324
add x23,x23,x27 // d+=h
325
eor x28,x28,x21 // Maj(a,b,c)
326
eor x17,x14,x17,ror#34 // Sigma0(a)
327
add x27,x27,x28 // h+=Maj(a,b,c)
328
ldr x28,[x30],#8 // *K++, x19 in next round
329
//add x27,x27,x17 // h+=Sigma0(a)
330
#ifndef __AARCH64EB__
331
rev x12,x12 // 9
332
#endif
333
ldp x13,x14,[x1],#2*8
334
add x27,x27,x17 // h+=Sigma0(a)
335
ror x16,x23,#14
336
add x26,x26,x28 // h+=K[i]
337
eor x15,x23,x23,ror#23
338
and x17,x24,x23
339
bic x28,x25,x23
340
add x26,x26,x12 // h+=X[i]
341
orr x17,x17,x28 // Ch(e,f,g)
342
eor x28,x27,x20 // a^b, b^c in next round
343
eor x16,x16,x15,ror#18 // Sigma1(e)
344
ror x15,x27,#28
345
add x26,x26,x17 // h+=Ch(e,f,g)
346
eor x17,x27,x27,ror#5
347
add x26,x26,x16 // h+=Sigma1(e)
348
and x19,x19,x28 // (b^c)&=(a^b)
349
add x22,x22,x26 // d+=h
350
eor x19,x19,x20 // Maj(a,b,c)
351
eor x17,x15,x17,ror#34 // Sigma0(a)
352
add x26,x26,x19 // h+=Maj(a,b,c)
353
ldr x19,[x30],#8 // *K++, x28 in next round
354
//add x26,x26,x17 // h+=Sigma0(a)
355
#ifndef __AARCH64EB__
356
rev x13,x13 // 10
357
#endif
358
add x26,x26,x17 // h+=Sigma0(a)
359
ror x16,x22,#14
360
add x25,x25,x19 // h+=K[i]
361
eor x0,x22,x22,ror#23
362
and x17,x23,x22
363
bic x19,x24,x22
364
add x25,x25,x13 // h+=X[i]
365
orr x17,x17,x19 // Ch(e,f,g)
366
eor x19,x26,x27 // a^b, b^c in next round
367
eor x16,x16,x0,ror#18 // Sigma1(e)
368
ror x0,x26,#28
369
add x25,x25,x17 // h+=Ch(e,f,g)
370
eor x17,x26,x26,ror#5
371
add x25,x25,x16 // h+=Sigma1(e)
372
and x28,x28,x19 // (b^c)&=(a^b)
373
add x21,x21,x25 // d+=h
374
eor x28,x28,x27 // Maj(a,b,c)
375
eor x17,x0,x17,ror#34 // Sigma0(a)
376
add x25,x25,x28 // h+=Maj(a,b,c)
377
ldr x28,[x30],#8 // *K++, x19 in next round
378
//add x25,x25,x17 // h+=Sigma0(a)
379
#ifndef __AARCH64EB__
380
rev x14,x14 // 11
381
#endif
382
ldp x15,x0,[x1],#2*8
383
add x25,x25,x17 // h+=Sigma0(a)
384
str x6,[sp,#24]
385
ror x16,x21,#14
386
add x24,x24,x28 // h+=K[i]
387
eor x6,x21,x21,ror#23
388
and x17,x22,x21
389
bic x28,x23,x21
390
add x24,x24,x14 // h+=X[i]
391
orr x17,x17,x28 // Ch(e,f,g)
392
eor x28,x25,x26 // a^b, b^c in next round
393
eor x16,x16,x6,ror#18 // Sigma1(e)
394
ror x6,x25,#28
395
add x24,x24,x17 // h+=Ch(e,f,g)
396
eor x17,x25,x25,ror#5
397
add x24,x24,x16 // h+=Sigma1(e)
398
and x19,x19,x28 // (b^c)&=(a^b)
399
add x20,x20,x24 // d+=h
400
eor x19,x19,x26 // Maj(a,b,c)
401
eor x17,x6,x17,ror#34 // Sigma0(a)
402
add x24,x24,x19 // h+=Maj(a,b,c)
403
ldr x19,[x30],#8 // *K++, x28 in next round
404
//add x24,x24,x17 // h+=Sigma0(a)
405
#ifndef __AARCH64EB__
406
rev x15,x15 // 12
407
#endif
408
add x24,x24,x17 // h+=Sigma0(a)
409
str x7,[sp,#0]
410
ror x16,x20,#14
411
add x23,x23,x19 // h+=K[i]
412
eor x7,x20,x20,ror#23
413
and x17,x21,x20
414
bic x19,x22,x20
415
add x23,x23,x15 // h+=X[i]
416
orr x17,x17,x19 // Ch(e,f,g)
417
eor x19,x24,x25 // a^b, b^c in next round
418
eor x16,x16,x7,ror#18 // Sigma1(e)
419
ror x7,x24,#28
420
add x23,x23,x17 // h+=Ch(e,f,g)
421
eor x17,x24,x24,ror#5
422
add x23,x23,x16 // h+=Sigma1(e)
423
and x28,x28,x19 // (b^c)&=(a^b)
424
add x27,x27,x23 // d+=h
425
eor x28,x28,x25 // Maj(a,b,c)
426
eor x17,x7,x17,ror#34 // Sigma0(a)
427
add x23,x23,x28 // h+=Maj(a,b,c)
428
ldr x28,[x30],#8 // *K++, x19 in next round
429
//add x23,x23,x17 // h+=Sigma0(a)
430
#ifndef __AARCH64EB__
431
rev x0,x0 // 13
432
#endif
433
ldp x1,x2,[x1]
434
add x23,x23,x17 // h+=Sigma0(a)
435
str x8,[sp,#8]
436
ror x16,x27,#14
437
add x22,x22,x28 // h+=K[i]
438
eor x8,x27,x27,ror#23
439
and x17,x20,x27
440
bic x28,x21,x27
441
add x22,x22,x0 // h+=X[i]
442
orr x17,x17,x28 // Ch(e,f,g)
443
eor x28,x23,x24 // a^b, b^c in next round
444
eor x16,x16,x8,ror#18 // Sigma1(e)
445
ror x8,x23,#28
446
add x22,x22,x17 // h+=Ch(e,f,g)
447
eor x17,x23,x23,ror#5
448
add x22,x22,x16 // h+=Sigma1(e)
449
and x19,x19,x28 // (b^c)&=(a^b)
450
add x26,x26,x22 // d+=h
451
eor x19,x19,x24 // Maj(a,b,c)
452
eor x17,x8,x17,ror#34 // Sigma0(a)
453
add x22,x22,x19 // h+=Maj(a,b,c)
454
ldr x19,[x30],#8 // *K++, x28 in next round
455
//add x22,x22,x17 // h+=Sigma0(a)
456
#ifndef __AARCH64EB__
457
rev x1,x1 // 14
458
#endif
459
ldr x6,[sp,#24]
460
add x22,x22,x17 // h+=Sigma0(a)
461
str x9,[sp,#16]
462
ror x16,x26,#14
463
add x21,x21,x19 // h+=K[i]
464
eor x9,x26,x26,ror#23
465
and x17,x27,x26
466
bic x19,x20,x26
467
add x21,x21,x1 // h+=X[i]
468
orr x17,x17,x19 // Ch(e,f,g)
469
eor x19,x22,x23 // a^b, b^c in next round
470
eor x16,x16,x9,ror#18 // Sigma1(e)
471
ror x9,x22,#28
472
add x21,x21,x17 // h+=Ch(e,f,g)
473
eor x17,x22,x22,ror#5
474
add x21,x21,x16 // h+=Sigma1(e)
475
and x28,x28,x19 // (b^c)&=(a^b)
476
add x25,x25,x21 // d+=h
477
eor x28,x28,x23 // Maj(a,b,c)
478
eor x17,x9,x17,ror#34 // Sigma0(a)
479
add x21,x21,x28 // h+=Maj(a,b,c)
480
ldr x28,[x30],#8 // *K++, x19 in next round
481
//add x21,x21,x17 // h+=Sigma0(a)
482
#ifndef __AARCH64EB__
483
rev x2,x2 // 15
484
#endif
485
ldr x7,[sp,#0]
486
add x21,x21,x17 // h+=Sigma0(a)
487
str x10,[sp,#24]
488
ror x16,x25,#14
489
add x20,x20,x28 // h+=K[i]
490
ror x9,x4,#1
491
and x17,x26,x25
492
ror x8,x1,#19
493
bic x28,x27,x25
494
ror x10,x21,#28
495
add x20,x20,x2 // h+=X[i]
496
eor x16,x16,x25,ror#18
497
eor x9,x9,x4,ror#8
498
orr x17,x17,x28 // Ch(e,f,g)
499
eor x28,x21,x22 // a^b, b^c in next round
500
eor x16,x16,x25,ror#41 // Sigma1(e)
501
eor x10,x10,x21,ror#34
502
add x20,x20,x17 // h+=Ch(e,f,g)
503
and x19,x19,x28 // (b^c)&=(a^b)
504
eor x8,x8,x1,ror#61
505
eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
506
add x20,x20,x16 // h+=Sigma1(e)
507
eor x19,x19,x22 // Maj(a,b,c)
508
eor x17,x10,x21,ror#39 // Sigma0(a)
509
eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
510
add x3,x3,x12
511
add x24,x24,x20 // d+=h
512
add x20,x20,x19 // h+=Maj(a,b,c)
513
ldr x19,[x30],#8 // *K++, x28 in next round
514
add x3,x3,x9
515
add x20,x20,x17 // h+=Sigma0(a)
516
add x3,x3,x8
517
.Loop_16_xx:
518
ldr x8,[sp,#8]
519
str x11,[sp,#0]
520
ror x16,x24,#14
521
add x27,x27,x19 // h+=K[i]
522
ror x10,x5,#1
523
and x17,x25,x24
524
ror x9,x2,#19
525
bic x19,x26,x24
526
ror x11,x20,#28
527
add x27,x27,x3 // h+=X[i]
528
eor x16,x16,x24,ror#18
529
eor x10,x10,x5,ror#8
530
orr x17,x17,x19 // Ch(e,f,g)
531
eor x19,x20,x21 // a^b, b^c in next round
532
eor x16,x16,x24,ror#41 // Sigma1(e)
533
eor x11,x11,x20,ror#34
534
add x27,x27,x17 // h+=Ch(e,f,g)
535
and x28,x28,x19 // (b^c)&=(a^b)
536
eor x9,x9,x2,ror#61
537
eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
538
add x27,x27,x16 // h+=Sigma1(e)
539
eor x28,x28,x21 // Maj(a,b,c)
540
eor x17,x11,x20,ror#39 // Sigma0(a)
541
eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
542
add x4,x4,x13
543
add x23,x23,x27 // d+=h
544
add x27,x27,x28 // h+=Maj(a,b,c)
545
ldr x28,[x30],#8 // *K++, x19 in next round
546
add x4,x4,x10
547
add x27,x27,x17 // h+=Sigma0(a)
548
add x4,x4,x9
549
ldr x9,[sp,#16]
550
str x12,[sp,#8]
551
ror x16,x23,#14
552
add x26,x26,x28 // h+=K[i]
553
ror x11,x6,#1
554
and x17,x24,x23
555
ror x10,x3,#19
556
bic x28,x25,x23
557
ror x12,x27,#28
558
add x26,x26,x4 // h+=X[i]
559
eor x16,x16,x23,ror#18
560
eor x11,x11,x6,ror#8
561
orr x17,x17,x28 // Ch(e,f,g)
562
eor x28,x27,x20 // a^b, b^c in next round
563
eor x16,x16,x23,ror#41 // Sigma1(e)
564
eor x12,x12,x27,ror#34
565
add x26,x26,x17 // h+=Ch(e,f,g)
566
and x19,x19,x28 // (b^c)&=(a^b)
567
eor x10,x10,x3,ror#61
568
eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
569
add x26,x26,x16 // h+=Sigma1(e)
570
eor x19,x19,x20 // Maj(a,b,c)
571
eor x17,x12,x27,ror#39 // Sigma0(a)
572
eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
573
add x5,x5,x14
574
add x22,x22,x26 // d+=h
575
add x26,x26,x19 // h+=Maj(a,b,c)
576
ldr x19,[x30],#8 // *K++, x28 in next round
577
add x5,x5,x11
578
add x26,x26,x17 // h+=Sigma0(a)
579
add x5,x5,x10
580
ldr x10,[sp,#24]
581
str x13,[sp,#16]
582
ror x16,x22,#14
583
add x25,x25,x19 // h+=K[i]
584
ror x12,x7,#1
585
and x17,x23,x22
586
ror x11,x4,#19
587
bic x19,x24,x22
588
ror x13,x26,#28
589
add x25,x25,x5 // h+=X[i]
590
eor x16,x16,x22,ror#18
591
eor x12,x12,x7,ror#8
592
orr x17,x17,x19 // Ch(e,f,g)
593
eor x19,x26,x27 // a^b, b^c in next round
594
eor x16,x16,x22,ror#41 // Sigma1(e)
595
eor x13,x13,x26,ror#34
596
add x25,x25,x17 // h+=Ch(e,f,g)
597
and x28,x28,x19 // (b^c)&=(a^b)
598
eor x11,x11,x4,ror#61
599
eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
600
add x25,x25,x16 // h+=Sigma1(e)
601
eor x28,x28,x27 // Maj(a,b,c)
602
eor x17,x13,x26,ror#39 // Sigma0(a)
603
eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
604
add x6,x6,x15
605
add x21,x21,x25 // d+=h
606
add x25,x25,x28 // h+=Maj(a,b,c)
607
ldr x28,[x30],#8 // *K++, x19 in next round
608
add x6,x6,x12
609
add x25,x25,x17 // h+=Sigma0(a)
610
add x6,x6,x11
611
ldr x11,[sp,#0]
612
str x14,[sp,#24]
613
ror x16,x21,#14
614
add x24,x24,x28 // h+=K[i]
615
ror x13,x8,#1
616
and x17,x22,x21
617
ror x12,x5,#19
618
bic x28,x23,x21
619
ror x14,x25,#28
620
add x24,x24,x6 // h+=X[i]
621
eor x16,x16,x21,ror#18
622
eor x13,x13,x8,ror#8
623
orr x17,x17,x28 // Ch(e,f,g)
624
eor x28,x25,x26 // a^b, b^c in next round
625
eor x16,x16,x21,ror#41 // Sigma1(e)
626
eor x14,x14,x25,ror#34
627
add x24,x24,x17 // h+=Ch(e,f,g)
628
and x19,x19,x28 // (b^c)&=(a^b)
629
eor x12,x12,x5,ror#61
630
eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
631
add x24,x24,x16 // h+=Sigma1(e)
632
eor x19,x19,x26 // Maj(a,b,c)
633
eor x17,x14,x25,ror#39 // Sigma0(a)
634
eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
635
add x7,x7,x0
636
add x20,x20,x24 // d+=h
637
add x24,x24,x19 // h+=Maj(a,b,c)
638
ldr x19,[x30],#8 // *K++, x28 in next round
639
add x7,x7,x13
640
add x24,x24,x17 // h+=Sigma0(a)
641
add x7,x7,x12
642
ldr x12,[sp,#8]
643
str x15,[sp,#0]
644
ror x16,x20,#14
645
add x23,x23,x19 // h+=K[i]
646
ror x14,x9,#1
647
and x17,x21,x20
648
ror x13,x6,#19
649
bic x19,x22,x20
650
ror x15,x24,#28
651
add x23,x23,x7 // h+=X[i]
652
eor x16,x16,x20,ror#18
653
eor x14,x14,x9,ror#8
654
orr x17,x17,x19 // Ch(e,f,g)
655
eor x19,x24,x25 // a^b, b^c in next round
656
eor x16,x16,x20,ror#41 // Sigma1(e)
657
eor x15,x15,x24,ror#34
658
add x23,x23,x17 // h+=Ch(e,f,g)
659
and x28,x28,x19 // (b^c)&=(a^b)
660
eor x13,x13,x6,ror#61
661
eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
662
add x23,x23,x16 // h+=Sigma1(e)
663
eor x28,x28,x25 // Maj(a,b,c)
664
eor x17,x15,x24,ror#39 // Sigma0(a)
665
eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
666
add x8,x8,x1
667
add x27,x27,x23 // d+=h
668
add x23,x23,x28 // h+=Maj(a,b,c)
669
ldr x28,[x30],#8 // *K++, x19 in next round
670
add x8,x8,x14
671
add x23,x23,x17 // h+=Sigma0(a)
672
add x8,x8,x13
673
ldr x13,[sp,#16]
674
str x0,[sp,#8]
675
ror x16,x27,#14
676
add x22,x22,x28 // h+=K[i]
677
ror x15,x10,#1
678
and x17,x20,x27
679
ror x14,x7,#19
680
bic x28,x21,x27
681
ror x0,x23,#28
682
add x22,x22,x8 // h+=X[i]
683
eor x16,x16,x27,ror#18
684
eor x15,x15,x10,ror#8
685
orr x17,x17,x28 // Ch(e,f,g)
686
eor x28,x23,x24 // a^b, b^c in next round
687
eor x16,x16,x27,ror#41 // Sigma1(e)
688
eor x0,x0,x23,ror#34
689
add x22,x22,x17 // h+=Ch(e,f,g)
690
and x19,x19,x28 // (b^c)&=(a^b)
691
eor x14,x14,x7,ror#61
692
eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
693
add x22,x22,x16 // h+=Sigma1(e)
694
eor x19,x19,x24 // Maj(a,b,c)
695
eor x17,x0,x23,ror#39 // Sigma0(a)
696
eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
697
add x9,x9,x2
698
add x26,x26,x22 // d+=h
699
add x22,x22,x19 // h+=Maj(a,b,c)
700
ldr x19,[x30],#8 // *K++, x28 in next round
701
add x9,x9,x15
702
add x22,x22,x17 // h+=Sigma0(a)
703
add x9,x9,x14
704
ldr x14,[sp,#24]
705
str x1,[sp,#16]
706
ror x16,x26,#14
707
add x21,x21,x19 // h+=K[i]
708
ror x0,x11,#1
709
and x17,x27,x26
710
ror x15,x8,#19
711
bic x19,x20,x26
712
ror x1,x22,#28
713
add x21,x21,x9 // h+=X[i]
714
eor x16,x16,x26,ror#18
715
eor x0,x0,x11,ror#8
716
orr x17,x17,x19 // Ch(e,f,g)
717
eor x19,x22,x23 // a^b, b^c in next round
718
eor x16,x16,x26,ror#41 // Sigma1(e)
719
eor x1,x1,x22,ror#34
720
add x21,x21,x17 // h+=Ch(e,f,g)
721
and x28,x28,x19 // (b^c)&=(a^b)
722
eor x15,x15,x8,ror#61
723
eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
724
add x21,x21,x16 // h+=Sigma1(e)
725
eor x28,x28,x23 // Maj(a,b,c)
726
eor x17,x1,x22,ror#39 // Sigma0(a)
727
eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
728
add x10,x10,x3
729
add x25,x25,x21 // d+=h
730
add x21,x21,x28 // h+=Maj(a,b,c)
731
ldr x28,[x30],#8 // *K++, x19 in next round
732
add x10,x10,x0
733
add x21,x21,x17 // h+=Sigma0(a)
734
add x10,x10,x15
735
ldr x15,[sp,#0]
736
str x2,[sp,#24]
737
ror x16,x25,#14
738
add x20,x20,x28 // h+=K[i]
739
ror x1,x12,#1
740
and x17,x26,x25
741
ror x0,x9,#19
742
bic x28,x27,x25
743
ror x2,x21,#28
744
add x20,x20,x10 // h+=X[i]
745
eor x16,x16,x25,ror#18
746
eor x1,x1,x12,ror#8
747
orr x17,x17,x28 // Ch(e,f,g)
748
eor x28,x21,x22 // a^b, b^c in next round
749
eor x16,x16,x25,ror#41 // Sigma1(e)
750
eor x2,x2,x21,ror#34
751
add x20,x20,x17 // h+=Ch(e,f,g)
752
and x19,x19,x28 // (b^c)&=(a^b)
753
eor x0,x0,x9,ror#61
754
eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
755
add x20,x20,x16 // h+=Sigma1(e)
756
eor x19,x19,x22 // Maj(a,b,c)
757
eor x17,x2,x21,ror#39 // Sigma0(a)
758
eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
759
add x11,x11,x4
760
add x24,x24,x20 // d+=h
761
add x20,x20,x19 // h+=Maj(a,b,c)
762
ldr x19,[x30],#8 // *K++, x28 in next round
763
add x11,x11,x1
764
add x20,x20,x17 // h+=Sigma0(a)
765
add x11,x11,x0
766
ldr x0,[sp,#8]
767
str x3,[sp,#0]
768
ror x16,x24,#14
769
add x27,x27,x19 // h+=K[i]
770
ror x2,x13,#1
771
and x17,x25,x24
772
ror x1,x10,#19
773
bic x19,x26,x24
774
ror x3,x20,#28
775
add x27,x27,x11 // h+=X[i]
776
eor x16,x16,x24,ror#18
777
eor x2,x2,x13,ror#8
778
orr x17,x17,x19 // Ch(e,f,g)
779
eor x19,x20,x21 // a^b, b^c in next round
780
eor x16,x16,x24,ror#41 // Sigma1(e)
781
eor x3,x3,x20,ror#34
782
add x27,x27,x17 // h+=Ch(e,f,g)
783
and x28,x28,x19 // (b^c)&=(a^b)
784
eor x1,x1,x10,ror#61
785
eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
786
add x27,x27,x16 // h+=Sigma1(e)
787
eor x28,x28,x21 // Maj(a,b,c)
788
eor x17,x3,x20,ror#39 // Sigma0(a)
789
eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
790
add x12,x12,x5
791
add x23,x23,x27 // d+=h
792
add x27,x27,x28 // h+=Maj(a,b,c)
793
ldr x28,[x30],#8 // *K++, x19 in next round
794
add x12,x12,x2
795
add x27,x27,x17 // h+=Sigma0(a)
796
add x12,x12,x1
797
ldr x1,[sp,#16]
798
str x4,[sp,#8]
799
ror x16,x23,#14
800
add x26,x26,x28 // h+=K[i]
801
ror x3,x14,#1
802
and x17,x24,x23
803
ror x2,x11,#19
804
bic x28,x25,x23
805
ror x4,x27,#28
806
add x26,x26,x12 // h+=X[i]
807
eor x16,x16,x23,ror#18
808
eor x3,x3,x14,ror#8
809
orr x17,x17,x28 // Ch(e,f,g)
810
eor x28,x27,x20 // a^b, b^c in next round
811
eor x16,x16,x23,ror#41 // Sigma1(e)
812
eor x4,x4,x27,ror#34
813
add x26,x26,x17 // h+=Ch(e,f,g)
814
and x19,x19,x28 // (b^c)&=(a^b)
815
eor x2,x2,x11,ror#61
816
eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
817
add x26,x26,x16 // h+=Sigma1(e)
818
eor x19,x19,x20 // Maj(a,b,c)
819
eor x17,x4,x27,ror#39 // Sigma0(a)
820
eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
821
add x13,x13,x6
822
add x22,x22,x26 // d+=h
823
add x26,x26,x19 // h+=Maj(a,b,c)
824
ldr x19,[x30],#8 // *K++, x28 in next round
825
add x13,x13,x3
826
add x26,x26,x17 // h+=Sigma0(a)
827
add x13,x13,x2
828
ldr x2,[sp,#24]
829
str x5,[sp,#16]
830
ror x16,x22,#14
831
add x25,x25,x19 // h+=K[i]
832
ror x4,x15,#1
833
and x17,x23,x22
834
ror x3,x12,#19
835
bic x19,x24,x22
836
ror x5,x26,#28
837
add x25,x25,x13 // h+=X[i]
838
eor x16,x16,x22,ror#18
839
eor x4,x4,x15,ror#8
840
orr x17,x17,x19 // Ch(e,f,g)
841
eor x19,x26,x27 // a^b, b^c in next round
842
eor x16,x16,x22,ror#41 // Sigma1(e)
843
eor x5,x5,x26,ror#34
844
add x25,x25,x17 // h+=Ch(e,f,g)
845
and x28,x28,x19 // (b^c)&=(a^b)
846
eor x3,x3,x12,ror#61
847
eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
848
add x25,x25,x16 // h+=Sigma1(e)
849
eor x28,x28,x27 // Maj(a,b,c)
850
eor x17,x5,x26,ror#39 // Sigma0(a)
851
eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
852
add x14,x14,x7
853
add x21,x21,x25 // d+=h
854
add x25,x25,x28 // h+=Maj(a,b,c)
855
ldr x28,[x30],#8 // *K++, x19 in next round
856
add x14,x14,x4
857
add x25,x25,x17 // h+=Sigma0(a)
858
add x14,x14,x3
859
ldr x3,[sp,#0]
860
str x6,[sp,#24]
861
ror x16,x21,#14
862
add x24,x24,x28 // h+=K[i]
863
ror x5,x0,#1
864
and x17,x22,x21
865
ror x4,x13,#19
866
bic x28,x23,x21
867
ror x6,x25,#28
868
add x24,x24,x14 // h+=X[i]
869
eor x16,x16,x21,ror#18
870
eor x5,x5,x0,ror#8
871
orr x17,x17,x28 // Ch(e,f,g)
872
eor x28,x25,x26 // a^b, b^c in next round
873
eor x16,x16,x21,ror#41 // Sigma1(e)
874
eor x6,x6,x25,ror#34
875
add x24,x24,x17 // h+=Ch(e,f,g)
876
and x19,x19,x28 // (b^c)&=(a^b)
877
eor x4,x4,x13,ror#61
878
eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
879
add x24,x24,x16 // h+=Sigma1(e)
880
eor x19,x19,x26 // Maj(a,b,c)
881
eor x17,x6,x25,ror#39 // Sigma0(a)
882
eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
883
add x15,x15,x8
884
add x20,x20,x24 // d+=h
885
add x24,x24,x19 // h+=Maj(a,b,c)
886
ldr x19,[x30],#8 // *K++, x28 in next round
887
add x15,x15,x5
888
add x24,x24,x17 // h+=Sigma0(a)
889
add x15,x15,x4
890
ldr x4,[sp,#8]
891
str x7,[sp,#0]
892
ror x16,x20,#14
893
add x23,x23,x19 // h+=K[i]
894
ror x6,x1,#1
895
and x17,x21,x20
896
ror x5,x14,#19
897
bic x19,x22,x20
898
ror x7,x24,#28
899
add x23,x23,x15 // h+=X[i]
900
eor x16,x16,x20,ror#18
901
eor x6,x6,x1,ror#8
902
orr x17,x17,x19 // Ch(e,f,g)
903
eor x19,x24,x25 // a^b, b^c in next round
904
eor x16,x16,x20,ror#41 // Sigma1(e)
905
eor x7,x7,x24,ror#34
906
add x23,x23,x17 // h+=Ch(e,f,g)
907
and x28,x28,x19 // (b^c)&=(a^b)
908
eor x5,x5,x14,ror#61
909
eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
910
add x23,x23,x16 // h+=Sigma1(e)
911
eor x28,x28,x25 // Maj(a,b,c)
912
eor x17,x7,x24,ror#39 // Sigma0(a)
913
eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
914
add x0,x0,x9
915
add x27,x27,x23 // d+=h
916
add x23,x23,x28 // h+=Maj(a,b,c)
917
ldr x28,[x30],#8 // *K++, x19 in next round
918
add x0,x0,x6
919
add x23,x23,x17 // h+=Sigma0(a)
920
add x0,x0,x5
921
ldr x5,[sp,#16]
922
str x8,[sp,#8]
923
ror x16,x27,#14
924
add x22,x22,x28 // h+=K[i]
925
ror x7,x2,#1
926
and x17,x20,x27
927
ror x6,x15,#19
928
bic x28,x21,x27
929
ror x8,x23,#28
930
add x22,x22,x0 // h+=X[i]
931
eor x16,x16,x27,ror#18
932
eor x7,x7,x2,ror#8
933
orr x17,x17,x28 // Ch(e,f,g)
934
eor x28,x23,x24 // a^b, b^c in next round
935
eor x16,x16,x27,ror#41 // Sigma1(e)
936
eor x8,x8,x23,ror#34
937
add x22,x22,x17 // h+=Ch(e,f,g)
938
and x19,x19,x28 // (b^c)&=(a^b)
939
eor x6,x6,x15,ror#61
940
eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
941
add x22,x22,x16 // h+=Sigma1(e)
942
eor x19,x19,x24 // Maj(a,b,c)
943
eor x17,x8,x23,ror#39 // Sigma0(a)
944
eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
945
add x1,x1,x10
946
add x26,x26,x22 // d+=h
947
add x22,x22,x19 // h+=Maj(a,b,c)
948
ldr x19,[x30],#8 // *K++, x28 in next round
949
add x1,x1,x7
950
add x22,x22,x17 // h+=Sigma0(a)
951
add x1,x1,x6
952
ldr x6,[sp,#24]
953
str x9,[sp,#16]
954
ror x16,x26,#14
955
add x21,x21,x19 // h+=K[i]
956
ror x8,x3,#1
957
and x17,x27,x26
958
ror x7,x0,#19
959
bic x19,x20,x26
960
ror x9,x22,#28
961
add x21,x21,x1 // h+=X[i]
962
eor x16,x16,x26,ror#18
963
eor x8,x8,x3,ror#8
964
orr x17,x17,x19 // Ch(e,f,g)
965
eor x19,x22,x23 // a^b, b^c in next round
966
eor x16,x16,x26,ror#41 // Sigma1(e)
967
eor x9,x9,x22,ror#34
968
add x21,x21,x17 // h+=Ch(e,f,g)
969
and x28,x28,x19 // (b^c)&=(a^b)
970
eor x7,x7,x0,ror#61
971
eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
972
add x21,x21,x16 // h+=Sigma1(e)
973
eor x28,x28,x23 // Maj(a,b,c)
974
eor x17,x9,x22,ror#39 // Sigma0(a)
975
eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
976
add x2,x2,x11
977
add x25,x25,x21 // d+=h
978
add x21,x21,x28 // h+=Maj(a,b,c)
979
ldr x28,[x30],#8 // *K++, x19 in next round
980
add x2,x2,x8
981
add x21,x21,x17 // h+=Sigma0(a)
982
add x2,x2,x7
983
ldr x7,[sp,#0]
984
str x10,[sp,#24]
985
ror x16,x25,#14
986
add x20,x20,x28 // h+=K[i]
987
ror x9,x4,#1
988
and x17,x26,x25
989
ror x8,x1,#19
990
bic x28,x27,x25
991
ror x10,x21,#28
992
add x20,x20,x2 // h+=X[i]
993
eor x16,x16,x25,ror#18
994
eor x9,x9,x4,ror#8
995
orr x17,x17,x28 // Ch(e,f,g)
996
eor x28,x21,x22 // a^b, b^c in next round
997
eor x16,x16,x25,ror#41 // Sigma1(e)
998
eor x10,x10,x21,ror#34
999
add x20,x20,x17 // h+=Ch(e,f,g)
1000
and x19,x19,x28 // (b^c)&=(a^b)
1001
eor x8,x8,x1,ror#61
1002
eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
1003
add x20,x20,x16 // h+=Sigma1(e)
1004
eor x19,x19,x22 // Maj(a,b,c)
1005
eor x17,x10,x21,ror#39 // Sigma0(a)
1006
eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
1007
add x3,x3,x12
1008
add x24,x24,x20 // d+=h
1009
add x20,x20,x19 // h+=Maj(a,b,c)
1010
ldr x19,[x30],#8 // *K++, x28 in next round
1011
add x3,x3,x9
1012
add x20,x20,x17 // h+=Sigma0(a)
1013
add x3,x3,x8
1014
cbnz x19,.Loop_16_xx
1015
1016
ldp x0,x2,[x29,#96]
1017
ldr x1,[x29,#112]
1018
sub x30,x30,#648 // rewind
1019
1020
ldp x3,x4,[x0]
1021
ldp x5,x6,[x0,#2*8]
1022
add x1,x1,#14*8 // advance input pointer
1023
ldp x7,x8,[x0,#4*8]
1024
add x20,x20,x3
1025
ldp x9,x10,[x0,#6*8]
1026
add x21,x21,x4
1027
add x22,x22,x5
1028
add x23,x23,x6
1029
stp x20,x21,[x0]
1030
add x24,x24,x7
1031
add x25,x25,x8
1032
stp x22,x23,[x0,#2*8]
1033
add x26,x26,x9
1034
add x27,x27,x10
1035
cmp x1,x2
1036
stp x24,x25,[x0,#4*8]
1037
stp x26,x27,[x0,#6*8]
1038
b.ne .Loop
1039
1040
ldp x19,x20,[x29,#16]
1041
add sp,sp,#4*8
1042
ldp x21,x22,[x29,#32]
1043
ldp x23,x24,[x29,#48]
1044
ldp x25,x26,[x29,#64]
1045
ldp x27,x28,[x29,#80]
1046
ldp x29,x30,[sp],#128
1047
ret
1048
.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
1049
1050
1051
.globl zfs_sha512_block_armv8
1052
.type zfs_sha512_block_armv8,%function
1053
.align 6
1054
zfs_sha512_block_armv8:
1055
hint #34 // bti c
1056
.Lv8_entry:
1057
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1058
stp x29,x30,[sp,#-16]!
1059
add x29,sp,#0
1060
1061
ld1 {v16.16b-v19.16b},[x1],#64 // load input
1062
ld1 {v20.16b-v23.16b},[x1],#64
1063
1064
ld1 {v0.2d-v3.2d},[x0] // load context
1065
adr x3,.LK512
1066
1067
rev64 v16.16b,v16.16b
1068
rev64 v17.16b,v17.16b
1069
rev64 v18.16b,v18.16b
1070
rev64 v19.16b,v19.16b
1071
rev64 v20.16b,v20.16b
1072
rev64 v21.16b,v21.16b
1073
rev64 v22.16b,v22.16b
1074
rev64 v23.16b,v23.16b
1075
b .Loop_hw
1076
1077
.align 4
1078
.Loop_hw:
1079
ld1 {v24.2d},[x3],#16
1080
subs x2,x2,#1
1081
sub x4,x1,#128
1082
orr v26.16b,v0.16b,v0.16b // offload
1083
orr v27.16b,v1.16b,v1.16b
1084
orr v28.16b,v2.16b,v2.16b
1085
orr v29.16b,v3.16b,v3.16b
1086
csel x1,x1,x4,ne // conditional rewind
1087
add v24.2d,v24.2d,v16.2d
1088
ld1 {v25.2d},[x3],#16
1089
ext v24.16b,v24.16b,v24.16b,#8
1090
ext v5.16b,v2.16b,v3.16b,#8
1091
ext v6.16b,v1.16b,v2.16b,#8
1092
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1093
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1094
ext v7.16b,v20.16b,v21.16b,#8
1095
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1096
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1097
add v4.2d,v1.2d,v3.2d // "D + T1"
1098
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1099
add v25.2d,v25.2d,v17.2d
1100
ld1 {v24.2d},[x3],#16
1101
ext v25.16b,v25.16b,v25.16b,#8
1102
ext v5.16b,v4.16b,v2.16b,#8
1103
ext v6.16b,v0.16b,v4.16b,#8
1104
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1105
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1106
ext v7.16b,v21.16b,v22.16b,#8
1107
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1108
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1109
add v1.2d,v0.2d,v2.2d // "D + T1"
1110
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1111
add v24.2d,v24.2d,v18.2d
1112
ld1 {v25.2d},[x3],#16
1113
ext v24.16b,v24.16b,v24.16b,#8
1114
ext v5.16b,v1.16b,v4.16b,#8
1115
ext v6.16b,v3.16b,v1.16b,#8
1116
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1117
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1118
ext v7.16b,v22.16b,v23.16b,#8
1119
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1120
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1121
add v0.2d,v3.2d,v4.2d // "D + T1"
1122
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1123
add v25.2d,v25.2d,v19.2d
1124
ld1 {v24.2d},[x3],#16
1125
ext v25.16b,v25.16b,v25.16b,#8
1126
ext v5.16b,v0.16b,v1.16b,#8
1127
ext v6.16b,v2.16b,v0.16b,#8
1128
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1129
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1130
ext v7.16b,v23.16b,v16.16b,#8
1131
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1132
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1133
add v3.2d,v2.2d,v1.2d // "D + T1"
1134
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1135
add v24.2d,v24.2d,v20.2d
1136
ld1 {v25.2d},[x3],#16
1137
ext v24.16b,v24.16b,v24.16b,#8
1138
ext v5.16b,v3.16b,v0.16b,#8
1139
ext v6.16b,v4.16b,v3.16b,#8
1140
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1141
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1142
ext v7.16b,v16.16b,v17.16b,#8
1143
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1144
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1145
add v2.2d,v4.2d,v0.2d // "D + T1"
1146
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1147
add v25.2d,v25.2d,v21.2d
1148
ld1 {v24.2d},[x3],#16
1149
ext v25.16b,v25.16b,v25.16b,#8
1150
ext v5.16b,v2.16b,v3.16b,#8
1151
ext v6.16b,v1.16b,v2.16b,#8
1152
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1153
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1154
ext v7.16b,v17.16b,v18.16b,#8
1155
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1156
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1157
add v4.2d,v1.2d,v3.2d // "D + T1"
1158
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1159
add v24.2d,v24.2d,v22.2d
1160
ld1 {v25.2d},[x3],#16
1161
ext v24.16b,v24.16b,v24.16b,#8
1162
ext v5.16b,v4.16b,v2.16b,#8
1163
ext v6.16b,v0.16b,v4.16b,#8
1164
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1165
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1166
ext v7.16b,v18.16b,v19.16b,#8
1167
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1168
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1169
add v1.2d,v0.2d,v2.2d // "D + T1"
1170
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1171
add v25.2d,v25.2d,v23.2d
1172
ld1 {v24.2d},[x3],#16
1173
ext v25.16b,v25.16b,v25.16b,#8
1174
ext v5.16b,v1.16b,v4.16b,#8
1175
ext v6.16b,v3.16b,v1.16b,#8
1176
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1177
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1178
ext v7.16b,v19.16b,v20.16b,#8
1179
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1180
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1181
add v0.2d,v3.2d,v4.2d // "D + T1"
1182
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1183
add v24.2d,v24.2d,v16.2d
1184
ld1 {v25.2d},[x3],#16
1185
ext v24.16b,v24.16b,v24.16b,#8
1186
ext v5.16b,v0.16b,v1.16b,#8
1187
ext v6.16b,v2.16b,v0.16b,#8
1188
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1189
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1190
ext v7.16b,v20.16b,v21.16b,#8
1191
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1192
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1193
add v3.2d,v2.2d,v1.2d // "D + T1"
1194
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1195
add v25.2d,v25.2d,v17.2d
1196
ld1 {v24.2d},[x3],#16
1197
ext v25.16b,v25.16b,v25.16b,#8
1198
ext v5.16b,v3.16b,v0.16b,#8
1199
ext v6.16b,v4.16b,v3.16b,#8
1200
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1201
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1202
ext v7.16b,v21.16b,v22.16b,#8
1203
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1204
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1205
add v2.2d,v4.2d,v0.2d // "D + T1"
1206
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1207
add v24.2d,v24.2d,v18.2d
1208
ld1 {v25.2d},[x3],#16
1209
ext v24.16b,v24.16b,v24.16b,#8
1210
ext v5.16b,v2.16b,v3.16b,#8
1211
ext v6.16b,v1.16b,v2.16b,#8
1212
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1213
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1214
ext v7.16b,v22.16b,v23.16b,#8
1215
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1216
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1217
add v4.2d,v1.2d,v3.2d // "D + T1"
1218
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1219
add v25.2d,v25.2d,v19.2d
1220
ld1 {v24.2d},[x3],#16
1221
ext v25.16b,v25.16b,v25.16b,#8
1222
ext v5.16b,v4.16b,v2.16b,#8
1223
ext v6.16b,v0.16b,v4.16b,#8
1224
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1225
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1226
ext v7.16b,v23.16b,v16.16b,#8
1227
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1228
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1229
add v1.2d,v0.2d,v2.2d // "D + T1"
1230
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1231
add v24.2d,v24.2d,v20.2d
1232
ld1 {v25.2d},[x3],#16
1233
ext v24.16b,v24.16b,v24.16b,#8
1234
ext v5.16b,v1.16b,v4.16b,#8
1235
ext v6.16b,v3.16b,v1.16b,#8
1236
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1237
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1238
ext v7.16b,v16.16b,v17.16b,#8
1239
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1240
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1241
add v0.2d,v3.2d,v4.2d // "D + T1"
1242
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1243
add v25.2d,v25.2d,v21.2d
1244
ld1 {v24.2d},[x3],#16
1245
ext v25.16b,v25.16b,v25.16b,#8
1246
ext v5.16b,v0.16b,v1.16b,#8
1247
ext v6.16b,v2.16b,v0.16b,#8
1248
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1249
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1250
ext v7.16b,v17.16b,v18.16b,#8
1251
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1252
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1253
add v3.2d,v2.2d,v1.2d // "D + T1"
1254
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1255
add v24.2d,v24.2d,v22.2d
1256
ld1 {v25.2d},[x3],#16
1257
ext v24.16b,v24.16b,v24.16b,#8
1258
ext v5.16b,v3.16b,v0.16b,#8
1259
ext v6.16b,v4.16b,v3.16b,#8
1260
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1261
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1262
ext v7.16b,v18.16b,v19.16b,#8
1263
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1264
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1265
add v2.2d,v4.2d,v0.2d // "D + T1"
1266
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1267
add v25.2d,v25.2d,v23.2d
1268
ld1 {v24.2d},[x3],#16
1269
ext v25.16b,v25.16b,v25.16b,#8
1270
ext v5.16b,v2.16b,v3.16b,#8
1271
ext v6.16b,v1.16b,v2.16b,#8
1272
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1273
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1274
ext v7.16b,v19.16b,v20.16b,#8
1275
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1276
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1277
add v4.2d,v1.2d,v3.2d // "D + T1"
1278
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1279
add v24.2d,v24.2d,v16.2d
1280
ld1 {v25.2d},[x3],#16
1281
ext v24.16b,v24.16b,v24.16b,#8
1282
ext v5.16b,v4.16b,v2.16b,#8
1283
ext v6.16b,v0.16b,v4.16b,#8
1284
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1285
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1286
ext v7.16b,v20.16b,v21.16b,#8
1287
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1288
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1289
add v1.2d,v0.2d,v2.2d // "D + T1"
1290
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1291
add v25.2d,v25.2d,v17.2d
1292
ld1 {v24.2d},[x3],#16
1293
ext v25.16b,v25.16b,v25.16b,#8
1294
ext v5.16b,v1.16b,v4.16b,#8
1295
ext v6.16b,v3.16b,v1.16b,#8
1296
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1297
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1298
ext v7.16b,v21.16b,v22.16b,#8
1299
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1300
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1301
add v0.2d,v3.2d,v4.2d // "D + T1"
1302
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1303
add v24.2d,v24.2d,v18.2d
1304
ld1 {v25.2d},[x3],#16
1305
ext v24.16b,v24.16b,v24.16b,#8
1306
ext v5.16b,v0.16b,v1.16b,#8
1307
ext v6.16b,v2.16b,v0.16b,#8
1308
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1309
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1310
ext v7.16b,v22.16b,v23.16b,#8
1311
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1312
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1313
add v3.2d,v2.2d,v1.2d // "D + T1"
1314
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1315
add v25.2d,v25.2d,v19.2d
1316
ld1 {v24.2d},[x3],#16
1317
ext v25.16b,v25.16b,v25.16b,#8
1318
ext v5.16b,v3.16b,v0.16b,#8
1319
ext v6.16b,v4.16b,v3.16b,#8
1320
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1321
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1322
ext v7.16b,v23.16b,v16.16b,#8
1323
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1324
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1325
add v2.2d,v4.2d,v0.2d // "D + T1"
1326
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1327
add v24.2d,v24.2d,v20.2d
1328
ld1 {v25.2d},[x3],#16
1329
ext v24.16b,v24.16b,v24.16b,#8
1330
ext v5.16b,v2.16b,v3.16b,#8
1331
ext v6.16b,v1.16b,v2.16b,#8
1332
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1333
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1334
ext v7.16b,v16.16b,v17.16b,#8
1335
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1336
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1337
add v4.2d,v1.2d,v3.2d // "D + T1"
1338
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1339
add v25.2d,v25.2d,v21.2d
1340
ld1 {v24.2d},[x3],#16
1341
ext v25.16b,v25.16b,v25.16b,#8
1342
ext v5.16b,v4.16b,v2.16b,#8
1343
ext v6.16b,v0.16b,v4.16b,#8
1344
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1345
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1346
ext v7.16b,v17.16b,v18.16b,#8
1347
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1348
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1349
add v1.2d,v0.2d,v2.2d // "D + T1"
1350
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1351
add v24.2d,v24.2d,v22.2d
1352
ld1 {v25.2d},[x3],#16
1353
ext v24.16b,v24.16b,v24.16b,#8
1354
ext v5.16b,v1.16b,v4.16b,#8
1355
ext v6.16b,v3.16b,v1.16b,#8
1356
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1357
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1358
ext v7.16b,v18.16b,v19.16b,#8
1359
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1360
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1361
add v0.2d,v3.2d,v4.2d // "D + T1"
1362
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1363
add v25.2d,v25.2d,v23.2d
1364
ld1 {v24.2d},[x3],#16
1365
ext v25.16b,v25.16b,v25.16b,#8
1366
ext v5.16b,v0.16b,v1.16b,#8
1367
ext v6.16b,v2.16b,v0.16b,#8
1368
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1369
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1370
ext v7.16b,v19.16b,v20.16b,#8
1371
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1372
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1373
add v3.2d,v2.2d,v1.2d // "D + T1"
1374
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1375
add v24.2d,v24.2d,v16.2d
1376
ld1 {v25.2d},[x3],#16
1377
ext v24.16b,v24.16b,v24.16b,#8
1378
ext v5.16b,v3.16b,v0.16b,#8
1379
ext v6.16b,v4.16b,v3.16b,#8
1380
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1381
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1382
ext v7.16b,v20.16b,v21.16b,#8
1383
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1384
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1385
add v2.2d,v4.2d,v0.2d // "D + T1"
1386
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1387
add v25.2d,v25.2d,v17.2d
1388
ld1 {v24.2d},[x3],#16
1389
ext v25.16b,v25.16b,v25.16b,#8
1390
ext v5.16b,v2.16b,v3.16b,#8
1391
ext v6.16b,v1.16b,v2.16b,#8
1392
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1393
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1394
ext v7.16b,v21.16b,v22.16b,#8
1395
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1396
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1397
add v4.2d,v1.2d,v3.2d // "D + T1"
1398
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1399
add v24.2d,v24.2d,v18.2d
1400
ld1 {v25.2d},[x3],#16
1401
ext v24.16b,v24.16b,v24.16b,#8
1402
ext v5.16b,v4.16b,v2.16b,#8
1403
ext v6.16b,v0.16b,v4.16b,#8
1404
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1405
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1406
ext v7.16b,v22.16b,v23.16b,#8
1407
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1408
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1409
add v1.2d,v0.2d,v2.2d // "D + T1"
1410
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1411
add v25.2d,v25.2d,v19.2d
1412
ld1 {v24.2d},[x3],#16
1413
ext v25.16b,v25.16b,v25.16b,#8
1414
ext v5.16b,v1.16b,v4.16b,#8
1415
ext v6.16b,v3.16b,v1.16b,#8
1416
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1417
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1418
ext v7.16b,v23.16b,v16.16b,#8
1419
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1420
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1421
add v0.2d,v3.2d,v4.2d // "D + T1"
1422
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1423
add v24.2d,v24.2d,v20.2d
1424
ld1 {v25.2d},[x3],#16
1425
ext v24.16b,v24.16b,v24.16b,#8
1426
ext v5.16b,v0.16b,v1.16b,#8
1427
ext v6.16b,v2.16b,v0.16b,#8
1428
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1429
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1430
ext v7.16b,v16.16b,v17.16b,#8
1431
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1432
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1433
add v3.2d,v2.2d,v1.2d // "D + T1"
1434
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1435
add v25.2d,v25.2d,v21.2d
1436
ld1 {v24.2d},[x3],#16
1437
ext v25.16b,v25.16b,v25.16b,#8
1438
ext v5.16b,v3.16b,v0.16b,#8
1439
ext v6.16b,v4.16b,v3.16b,#8
1440
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1441
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1442
ext v7.16b,v17.16b,v18.16b,#8
1443
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1444
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1445
add v2.2d,v4.2d,v0.2d // "D + T1"
1446
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1447
add v24.2d,v24.2d,v22.2d
1448
ld1 {v25.2d},[x3],#16
1449
ext v24.16b,v24.16b,v24.16b,#8
1450
ext v5.16b,v2.16b,v3.16b,#8
1451
ext v6.16b,v1.16b,v2.16b,#8
1452
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1453
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1454
ext v7.16b,v18.16b,v19.16b,#8
1455
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1456
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1457
add v4.2d,v1.2d,v3.2d // "D + T1"
1458
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1459
add v25.2d,v25.2d,v23.2d
1460
ld1 {v24.2d},[x3],#16
1461
ext v25.16b,v25.16b,v25.16b,#8
1462
ext v5.16b,v4.16b,v2.16b,#8
1463
ext v6.16b,v0.16b,v4.16b,#8
1464
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1465
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1466
ext v7.16b,v19.16b,v20.16b,#8
1467
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1468
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1469
add v1.2d,v0.2d,v2.2d // "D + T1"
1470
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1471
ld1 {v25.2d},[x3],#16
1472
add v24.2d,v24.2d,v16.2d
1473
ld1 {v16.16b},[x1],#16 // load next input
1474
ext v24.16b,v24.16b,v24.16b,#8
1475
ext v5.16b,v1.16b,v4.16b,#8
1476
ext v6.16b,v3.16b,v1.16b,#8
1477
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1478
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1479
rev64 v16.16b,v16.16b
1480
add v0.2d,v3.2d,v4.2d // "D + T1"
1481
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1482
ld1 {v24.2d},[x3],#16
1483
add v25.2d,v25.2d,v17.2d
1484
ld1 {v17.16b},[x1],#16 // load next input
1485
ext v25.16b,v25.16b,v25.16b,#8
1486
ext v5.16b,v0.16b,v1.16b,#8
1487
ext v6.16b,v2.16b,v0.16b,#8
1488
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1489
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1490
rev64 v17.16b,v17.16b
1491
add v3.2d,v2.2d,v1.2d // "D + T1"
1492
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1493
ld1 {v25.2d},[x3],#16
1494
add v24.2d,v24.2d,v18.2d
1495
ld1 {v18.16b},[x1],#16 // load next input
1496
ext v24.16b,v24.16b,v24.16b,#8
1497
ext v5.16b,v3.16b,v0.16b,#8
1498
ext v6.16b,v4.16b,v3.16b,#8
1499
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1500
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1501
rev64 v18.16b,v18.16b
1502
add v2.2d,v4.2d,v0.2d // "D + T1"
1503
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1504
ld1 {v24.2d},[x3],#16
1505
add v25.2d,v25.2d,v19.2d
1506
ld1 {v19.16b},[x1],#16 // load next input
1507
ext v25.16b,v25.16b,v25.16b,#8
1508
ext v5.16b,v2.16b,v3.16b,#8
1509
ext v6.16b,v1.16b,v2.16b,#8
1510
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1511
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1512
rev64 v19.16b,v19.16b
1513
add v4.2d,v1.2d,v3.2d // "D + T1"
1514
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1515
ld1 {v25.2d},[x3],#16
1516
add v24.2d,v24.2d,v20.2d
1517
ld1 {v20.16b},[x1],#16 // load next input
1518
ext v24.16b,v24.16b,v24.16b,#8
1519
ext v5.16b,v4.16b,v2.16b,#8
1520
ext v6.16b,v0.16b,v4.16b,#8
1521
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1522
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1523
rev64 v20.16b,v20.16b
1524
add v1.2d,v0.2d,v2.2d // "D + T1"
1525
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1526
ld1 {v24.2d},[x3],#16
1527
add v25.2d,v25.2d,v21.2d
1528
ld1 {v21.16b},[x1],#16 // load next input
1529
ext v25.16b,v25.16b,v25.16b,#8
1530
ext v5.16b,v1.16b,v4.16b,#8
1531
ext v6.16b,v3.16b,v1.16b,#8
1532
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1533
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1534
rev64 v21.16b,v21.16b
1535
add v0.2d,v3.2d,v4.2d // "D + T1"
1536
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1537
ld1 {v25.2d},[x3],#16
1538
add v24.2d,v24.2d,v22.2d
1539
ld1 {v22.16b},[x1],#16 // load next input
1540
ext v24.16b,v24.16b,v24.16b,#8
1541
ext v5.16b,v0.16b,v1.16b,#8
1542
ext v6.16b,v2.16b,v0.16b,#8
1543
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1544
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1545
rev64 v22.16b,v22.16b
1546
add v3.2d,v2.2d,v1.2d // "D + T1"
1547
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1548
sub x3,x3,#80*8 // rewind
1549
add v25.2d,v25.2d,v23.2d
1550
ld1 {v23.16b},[x1],#16 // load next input
1551
ext v25.16b,v25.16b,v25.16b,#8
1552
ext v5.16b,v3.16b,v0.16b,#8
1553
ext v6.16b,v4.16b,v3.16b,#8
1554
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1555
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1556
rev64 v23.16b,v23.16b
1557
add v2.2d,v4.2d,v0.2d // "D + T1"
1558
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1559
add v0.2d,v0.2d,v26.2d // accumulate
1560
add v1.2d,v1.2d,v27.2d
1561
add v2.2d,v2.2d,v28.2d
1562
add v3.2d,v3.2d,v29.2d
1563
1564
cbnz x2,.Loop_hw
1565
1566
st1 {v0.2d-v3.2d},[x0] // store context
1567
1568
ldr x29,[sp],#16
1569
ret
1570
.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8
1571
#endif
1572
1573