Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/aarch64/sha512-armv8.S
39507 views
1
/* Do not modify. This file is auto-generated from sha512-armv8.pl. */
2
// Copyright 2014-2025 The OpenSSL Project Authors. All Rights Reserved.
3
//
4
// Licensed under the Apache License 2.0 (the "License"). You may not use
5
// this file except in compliance with the License. You can obtain a copy
6
// in the file LICENSE in the source distribution or at
7
// https://www.openssl.org/source/license.html
8
9
// ====================================================================
10
// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11
// project. The module is, however, dual licensed under OpenSSL and
12
// CRYPTOGAMS licenses depending on where you obtain it. For further
13
// details see http://www.openssl.org/~appro/cryptogams/.
14
//
15
// Permission to use under GPLv2 terms is granted.
16
// ====================================================================
17
//
18
// SHA256/512 for ARMv8.
19
//
20
// Performance in cycles per processed byte and improvement coefficient
21
// over code generated with "default" compiler:
22
//
23
// SHA256-hw SHA256(*) SHA512
24
// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
25
// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
26
// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
27
// Denver 2.01 10.5 (+26%) 6.70 (+8%)
28
// X-Gene 20.0 (+100%) 12.8 (+300%(***))
29
// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
30
// Kryo 1.92 17.4 (+30%) 11.2 (+8%)
31
// ThunderX2 2.54 13.2 (+40%) 8.40 (+18%)
32
//
33
// (*) Software SHA256 results are of lesser relevance, presented
34
// mostly for informational purposes.
35
// (**) The result is a trade-off: it's possible to improve it by
36
// 10% (or by 1 cycle per round), but at the cost of 20% loss
37
// on Cortex-A53 (or by 4 cycles per round).
38
// (***) Super-impressive coefficients over gcc-generated code are
39
// indication of some compiler "pathology", most notably code
40
// generated with -mgeneral-regs-only is significantly faster
41
// and the gap is only 40-90%.
42
//
43
// October 2016.
44
//
45
// Originally it was reckoned that it makes no sense to implement NEON
46
// version of SHA256 for 64-bit processors. This is because performance
47
// improvement on most wide-spread Cortex-A5x processors was observed
48
// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
49
// observed that 32-bit NEON SHA256 performs significantly better than
50
// 64-bit scalar version on *some* of the more recent processors. As
51
// result 64-bit NEON version of SHA256 was added to provide best
52
// all-round performance. For example it executes ~30% faster on X-Gene
53
// and Mongoose. [For reference, NEON version of SHA512 is bound to
54
// deliver much less improvement, likely *negative* on Cortex-A5x.
55
// Which is why NEON support is limited to SHA256.]
56
57
// $output is the last argument if it looks like a file (it has an extension)
58
// $flavour is the first argument if it doesn't look like a file
59
#include "arm_arch.h"
60
#ifndef __KERNEL__
61
62
.hidden OPENSSL_armcap_P
63
#endif
64
65
.text
66
67
.globl sha512_block_data_order
68
.type sha512_block_data_order,%function
69
.align 6
70
sha512_block_data_order:
71
AARCH64_VALID_CALL_TARGET
72
#ifndef __KERNEL__
73
adrp x16,OPENSSL_armcap_P
74
ldr w16,[x16,#:lo12:OPENSSL_armcap_P]
75
tst w16,#ARMV8_SHA512
76
b.ne .Lv8_entry
77
#endif
78
AARCH64_SIGN_LINK_REGISTER
79
stp x29,x30,[sp,#-128]!
80
add x29,sp,#0
81
82
stp x19,x20,[sp,#16]
83
stp x21,x22,[sp,#32]
84
stp x23,x24,[sp,#48]
85
stp x25,x26,[sp,#64]
86
stp x27,x28,[sp,#80]
87
sub sp,sp,#4*8
88
89
ldp x20,x21,[x0] // load context
90
ldp x22,x23,[x0,#2*8]
91
ldp x24,x25,[x0,#4*8]
92
add x2,x1,x2,lsl#7 // end of input
93
ldp x26,x27,[x0,#6*8]
94
adrp x30,.LK512
95
add x30,x30,#:lo12:.LK512
96
stp x0,x2,[x29,#96]
97
98
.Loop:
99
ldp x3,x4,[x1],#2*8
100
ldr x19,[x30],#8 // *K++
101
eor x28,x21,x22 // magic seed
102
str x1,[x29,#112]
103
#ifndef __AARCH64EB__
104
rev x3,x3 // 0
105
#endif
106
ror x16,x24,#14
107
add x27,x27,x19 // h+=K[i]
108
eor x6,x24,x24,ror#23
109
and x17,x25,x24
110
bic x19,x26,x24
111
add x27,x27,x3 // h+=X[i]
112
orr x17,x17,x19 // Ch(e,f,g)
113
eor x19,x20,x21 // a^b, b^c in next round
114
eor x16,x16,x6,ror#18 // Sigma1(e)
115
ror x6,x20,#28
116
add x27,x27,x17 // h+=Ch(e,f,g)
117
eor x17,x20,x20,ror#5
118
add x27,x27,x16 // h+=Sigma1(e)
119
and x28,x28,x19 // (b^c)&=(a^b)
120
add x23,x23,x27 // d+=h
121
eor x28,x28,x21 // Maj(a,b,c)
122
eor x17,x6,x17,ror#34 // Sigma0(a)
123
add x27,x27,x28 // h+=Maj(a,b,c)
124
ldr x28,[x30],#8 // *K++, x19 in next round
125
//add x27,x27,x17 // h+=Sigma0(a)
126
#ifndef __AARCH64EB__
127
rev x4,x4 // 1
128
#endif
129
ldp x5,x6,[x1],#2*8
130
add x27,x27,x17 // h+=Sigma0(a)
131
ror x16,x23,#14
132
add x26,x26,x28 // h+=K[i]
133
eor x7,x23,x23,ror#23
134
and x17,x24,x23
135
bic x28,x25,x23
136
add x26,x26,x4 // h+=X[i]
137
orr x17,x17,x28 // Ch(e,f,g)
138
eor x28,x27,x20 // a^b, b^c in next round
139
eor x16,x16,x7,ror#18 // Sigma1(e)
140
ror x7,x27,#28
141
add x26,x26,x17 // h+=Ch(e,f,g)
142
eor x17,x27,x27,ror#5
143
add x26,x26,x16 // h+=Sigma1(e)
144
and x19,x19,x28 // (b^c)&=(a^b)
145
add x22,x22,x26 // d+=h
146
eor x19,x19,x20 // Maj(a,b,c)
147
eor x17,x7,x17,ror#34 // Sigma0(a)
148
add x26,x26,x19 // h+=Maj(a,b,c)
149
ldr x19,[x30],#8 // *K++, x28 in next round
150
//add x26,x26,x17 // h+=Sigma0(a)
151
#ifndef __AARCH64EB__
152
rev x5,x5 // 2
153
#endif
154
add x26,x26,x17 // h+=Sigma0(a)
155
ror x16,x22,#14
156
add x25,x25,x19 // h+=K[i]
157
eor x8,x22,x22,ror#23
158
and x17,x23,x22
159
bic x19,x24,x22
160
add x25,x25,x5 // h+=X[i]
161
orr x17,x17,x19 // Ch(e,f,g)
162
eor x19,x26,x27 // a^b, b^c in next round
163
eor x16,x16,x8,ror#18 // Sigma1(e)
164
ror x8,x26,#28
165
add x25,x25,x17 // h+=Ch(e,f,g)
166
eor x17,x26,x26,ror#5
167
add x25,x25,x16 // h+=Sigma1(e)
168
and x28,x28,x19 // (b^c)&=(a^b)
169
add x21,x21,x25 // d+=h
170
eor x28,x28,x27 // Maj(a,b,c)
171
eor x17,x8,x17,ror#34 // Sigma0(a)
172
add x25,x25,x28 // h+=Maj(a,b,c)
173
ldr x28,[x30],#8 // *K++, x19 in next round
174
//add x25,x25,x17 // h+=Sigma0(a)
175
#ifndef __AARCH64EB__
176
rev x6,x6 // 3
177
#endif
178
ldp x7,x8,[x1],#2*8
179
add x25,x25,x17 // h+=Sigma0(a)
180
ror x16,x21,#14
181
add x24,x24,x28 // h+=K[i]
182
eor x9,x21,x21,ror#23
183
and x17,x22,x21
184
bic x28,x23,x21
185
add x24,x24,x6 // h+=X[i]
186
orr x17,x17,x28 // Ch(e,f,g)
187
eor x28,x25,x26 // a^b, b^c in next round
188
eor x16,x16,x9,ror#18 // Sigma1(e)
189
ror x9,x25,#28
190
add x24,x24,x17 // h+=Ch(e,f,g)
191
eor x17,x25,x25,ror#5
192
add x24,x24,x16 // h+=Sigma1(e)
193
and x19,x19,x28 // (b^c)&=(a^b)
194
add x20,x20,x24 // d+=h
195
eor x19,x19,x26 // Maj(a,b,c)
196
eor x17,x9,x17,ror#34 // Sigma0(a)
197
add x24,x24,x19 // h+=Maj(a,b,c)
198
ldr x19,[x30],#8 // *K++, x28 in next round
199
//add x24,x24,x17 // h+=Sigma0(a)
200
#ifndef __AARCH64EB__
201
rev x7,x7 // 4
202
#endif
203
add x24,x24,x17 // h+=Sigma0(a)
204
ror x16,x20,#14
205
add x23,x23,x19 // h+=K[i]
206
eor x10,x20,x20,ror#23
207
and x17,x21,x20
208
bic x19,x22,x20
209
add x23,x23,x7 // h+=X[i]
210
orr x17,x17,x19 // Ch(e,f,g)
211
eor x19,x24,x25 // a^b, b^c in next round
212
eor x16,x16,x10,ror#18 // Sigma1(e)
213
ror x10,x24,#28
214
add x23,x23,x17 // h+=Ch(e,f,g)
215
eor x17,x24,x24,ror#5
216
add x23,x23,x16 // h+=Sigma1(e)
217
and x28,x28,x19 // (b^c)&=(a^b)
218
add x27,x27,x23 // d+=h
219
eor x28,x28,x25 // Maj(a,b,c)
220
eor x17,x10,x17,ror#34 // Sigma0(a)
221
add x23,x23,x28 // h+=Maj(a,b,c)
222
ldr x28,[x30],#8 // *K++, x19 in next round
223
//add x23,x23,x17 // h+=Sigma0(a)
224
#ifndef __AARCH64EB__
225
rev x8,x8 // 5
226
#endif
227
ldp x9,x10,[x1],#2*8
228
add x23,x23,x17 // h+=Sigma0(a)
229
ror x16,x27,#14
230
add x22,x22,x28 // h+=K[i]
231
eor x11,x27,x27,ror#23
232
and x17,x20,x27
233
bic x28,x21,x27
234
add x22,x22,x8 // h+=X[i]
235
orr x17,x17,x28 // Ch(e,f,g)
236
eor x28,x23,x24 // a^b, b^c in next round
237
eor x16,x16,x11,ror#18 // Sigma1(e)
238
ror x11,x23,#28
239
add x22,x22,x17 // h+=Ch(e,f,g)
240
eor x17,x23,x23,ror#5
241
add x22,x22,x16 // h+=Sigma1(e)
242
and x19,x19,x28 // (b^c)&=(a^b)
243
add x26,x26,x22 // d+=h
244
eor x19,x19,x24 // Maj(a,b,c)
245
eor x17,x11,x17,ror#34 // Sigma0(a)
246
add x22,x22,x19 // h+=Maj(a,b,c)
247
ldr x19,[x30],#8 // *K++, x28 in next round
248
//add x22,x22,x17 // h+=Sigma0(a)
249
#ifndef __AARCH64EB__
250
rev x9,x9 // 6
251
#endif
252
add x22,x22,x17 // h+=Sigma0(a)
253
ror x16,x26,#14
254
add x21,x21,x19 // h+=K[i]
255
eor x12,x26,x26,ror#23
256
and x17,x27,x26
257
bic x19,x20,x26
258
add x21,x21,x9 // h+=X[i]
259
orr x17,x17,x19 // Ch(e,f,g)
260
eor x19,x22,x23 // a^b, b^c in next round
261
eor x16,x16,x12,ror#18 // Sigma1(e)
262
ror x12,x22,#28
263
add x21,x21,x17 // h+=Ch(e,f,g)
264
eor x17,x22,x22,ror#5
265
add x21,x21,x16 // h+=Sigma1(e)
266
and x28,x28,x19 // (b^c)&=(a^b)
267
add x25,x25,x21 // d+=h
268
eor x28,x28,x23 // Maj(a,b,c)
269
eor x17,x12,x17,ror#34 // Sigma0(a)
270
add x21,x21,x28 // h+=Maj(a,b,c)
271
ldr x28,[x30],#8 // *K++, x19 in next round
272
//add x21,x21,x17 // h+=Sigma0(a)
273
#ifndef __AARCH64EB__
274
rev x10,x10 // 7
275
#endif
276
ldp x11,x12,[x1],#2*8
277
add x21,x21,x17 // h+=Sigma0(a)
278
ror x16,x25,#14
279
add x20,x20,x28 // h+=K[i]
280
eor x13,x25,x25,ror#23
281
and x17,x26,x25
282
bic x28,x27,x25
283
add x20,x20,x10 // h+=X[i]
284
orr x17,x17,x28 // Ch(e,f,g)
285
eor x28,x21,x22 // a^b, b^c in next round
286
eor x16,x16,x13,ror#18 // Sigma1(e)
287
ror x13,x21,#28
288
add x20,x20,x17 // h+=Ch(e,f,g)
289
eor x17,x21,x21,ror#5
290
add x20,x20,x16 // h+=Sigma1(e)
291
and x19,x19,x28 // (b^c)&=(a^b)
292
add x24,x24,x20 // d+=h
293
eor x19,x19,x22 // Maj(a,b,c)
294
eor x17,x13,x17,ror#34 // Sigma0(a)
295
add x20,x20,x19 // h+=Maj(a,b,c)
296
ldr x19,[x30],#8 // *K++, x28 in next round
297
//add x20,x20,x17 // h+=Sigma0(a)
298
#ifndef __AARCH64EB__
299
rev x11,x11 // 8
300
#endif
301
add x20,x20,x17 // h+=Sigma0(a)
302
ror x16,x24,#14
303
add x27,x27,x19 // h+=K[i]
304
eor x14,x24,x24,ror#23
305
and x17,x25,x24
306
bic x19,x26,x24
307
add x27,x27,x11 // h+=X[i]
308
orr x17,x17,x19 // Ch(e,f,g)
309
eor x19,x20,x21 // a^b, b^c in next round
310
eor x16,x16,x14,ror#18 // Sigma1(e)
311
ror x14,x20,#28
312
add x27,x27,x17 // h+=Ch(e,f,g)
313
eor x17,x20,x20,ror#5
314
add x27,x27,x16 // h+=Sigma1(e)
315
and x28,x28,x19 // (b^c)&=(a^b)
316
add x23,x23,x27 // d+=h
317
eor x28,x28,x21 // Maj(a,b,c)
318
eor x17,x14,x17,ror#34 // Sigma0(a)
319
add x27,x27,x28 // h+=Maj(a,b,c)
320
ldr x28,[x30],#8 // *K++, x19 in next round
321
//add x27,x27,x17 // h+=Sigma0(a)
322
#ifndef __AARCH64EB__
323
rev x12,x12 // 9
324
#endif
325
ldp x13,x14,[x1],#2*8
326
add x27,x27,x17 // h+=Sigma0(a)
327
ror x16,x23,#14
328
add x26,x26,x28 // h+=K[i]
329
eor x15,x23,x23,ror#23
330
and x17,x24,x23
331
bic x28,x25,x23
332
add x26,x26,x12 // h+=X[i]
333
orr x17,x17,x28 // Ch(e,f,g)
334
eor x28,x27,x20 // a^b, b^c in next round
335
eor x16,x16,x15,ror#18 // Sigma1(e)
336
ror x15,x27,#28
337
add x26,x26,x17 // h+=Ch(e,f,g)
338
eor x17,x27,x27,ror#5
339
add x26,x26,x16 // h+=Sigma1(e)
340
and x19,x19,x28 // (b^c)&=(a^b)
341
add x22,x22,x26 // d+=h
342
eor x19,x19,x20 // Maj(a,b,c)
343
eor x17,x15,x17,ror#34 // Sigma0(a)
344
add x26,x26,x19 // h+=Maj(a,b,c)
345
ldr x19,[x30],#8 // *K++, x28 in next round
346
//add x26,x26,x17 // h+=Sigma0(a)
347
#ifndef __AARCH64EB__
348
rev x13,x13 // 10
349
#endif
350
add x26,x26,x17 // h+=Sigma0(a)
351
ror x16,x22,#14
352
add x25,x25,x19 // h+=K[i]
353
eor x0,x22,x22,ror#23
354
and x17,x23,x22
355
bic x19,x24,x22
356
add x25,x25,x13 // h+=X[i]
357
orr x17,x17,x19 // Ch(e,f,g)
358
eor x19,x26,x27 // a^b, b^c in next round
359
eor x16,x16,x0,ror#18 // Sigma1(e)
360
ror x0,x26,#28
361
add x25,x25,x17 // h+=Ch(e,f,g)
362
eor x17,x26,x26,ror#5
363
add x25,x25,x16 // h+=Sigma1(e)
364
and x28,x28,x19 // (b^c)&=(a^b)
365
add x21,x21,x25 // d+=h
366
eor x28,x28,x27 // Maj(a,b,c)
367
eor x17,x0,x17,ror#34 // Sigma0(a)
368
add x25,x25,x28 // h+=Maj(a,b,c)
369
ldr x28,[x30],#8 // *K++, x19 in next round
370
//add x25,x25,x17 // h+=Sigma0(a)
371
#ifndef __AARCH64EB__
372
rev x14,x14 // 11
373
#endif
374
ldp x15,x0,[x1],#2*8
375
add x25,x25,x17 // h+=Sigma0(a)
376
str x6,[sp,#24]
377
ror x16,x21,#14
378
add x24,x24,x28 // h+=K[i]
379
eor x6,x21,x21,ror#23
380
and x17,x22,x21
381
bic x28,x23,x21
382
add x24,x24,x14 // h+=X[i]
383
orr x17,x17,x28 // Ch(e,f,g)
384
eor x28,x25,x26 // a^b, b^c in next round
385
eor x16,x16,x6,ror#18 // Sigma1(e)
386
ror x6,x25,#28
387
add x24,x24,x17 // h+=Ch(e,f,g)
388
eor x17,x25,x25,ror#5
389
add x24,x24,x16 // h+=Sigma1(e)
390
and x19,x19,x28 // (b^c)&=(a^b)
391
add x20,x20,x24 // d+=h
392
eor x19,x19,x26 // Maj(a,b,c)
393
eor x17,x6,x17,ror#34 // Sigma0(a)
394
add x24,x24,x19 // h+=Maj(a,b,c)
395
ldr x19,[x30],#8 // *K++, x28 in next round
396
//add x24,x24,x17 // h+=Sigma0(a)
397
#ifndef __AARCH64EB__
398
rev x15,x15 // 12
399
#endif
400
add x24,x24,x17 // h+=Sigma0(a)
401
str x7,[sp,#0]
402
ror x16,x20,#14
403
add x23,x23,x19 // h+=K[i]
404
eor x7,x20,x20,ror#23
405
and x17,x21,x20
406
bic x19,x22,x20
407
add x23,x23,x15 // h+=X[i]
408
orr x17,x17,x19 // Ch(e,f,g)
409
eor x19,x24,x25 // a^b, b^c in next round
410
eor x16,x16,x7,ror#18 // Sigma1(e)
411
ror x7,x24,#28
412
add x23,x23,x17 // h+=Ch(e,f,g)
413
eor x17,x24,x24,ror#5
414
add x23,x23,x16 // h+=Sigma1(e)
415
and x28,x28,x19 // (b^c)&=(a^b)
416
add x27,x27,x23 // d+=h
417
eor x28,x28,x25 // Maj(a,b,c)
418
eor x17,x7,x17,ror#34 // Sigma0(a)
419
add x23,x23,x28 // h+=Maj(a,b,c)
420
ldr x28,[x30],#8 // *K++, x19 in next round
421
//add x23,x23,x17 // h+=Sigma0(a)
422
#ifndef __AARCH64EB__
423
rev x0,x0 // 13
424
#endif
425
ldp x1,x2,[x1]
426
add x23,x23,x17 // h+=Sigma0(a)
427
str x8,[sp,#8]
428
ror x16,x27,#14
429
add x22,x22,x28 // h+=K[i]
430
eor x8,x27,x27,ror#23
431
and x17,x20,x27
432
bic x28,x21,x27
433
add x22,x22,x0 // h+=X[i]
434
orr x17,x17,x28 // Ch(e,f,g)
435
eor x28,x23,x24 // a^b, b^c in next round
436
eor x16,x16,x8,ror#18 // Sigma1(e)
437
ror x8,x23,#28
438
add x22,x22,x17 // h+=Ch(e,f,g)
439
eor x17,x23,x23,ror#5
440
add x22,x22,x16 // h+=Sigma1(e)
441
and x19,x19,x28 // (b^c)&=(a^b)
442
add x26,x26,x22 // d+=h
443
eor x19,x19,x24 // Maj(a,b,c)
444
eor x17,x8,x17,ror#34 // Sigma0(a)
445
add x22,x22,x19 // h+=Maj(a,b,c)
446
ldr x19,[x30],#8 // *K++, x28 in next round
447
//add x22,x22,x17 // h+=Sigma0(a)
448
#ifndef __AARCH64EB__
449
rev x1,x1 // 14
450
#endif
451
ldr x6,[sp,#24]
452
add x22,x22,x17 // h+=Sigma0(a)
453
str x9,[sp,#16]
454
ror x16,x26,#14
455
add x21,x21,x19 // h+=K[i]
456
eor x9,x26,x26,ror#23
457
and x17,x27,x26
458
bic x19,x20,x26
459
add x21,x21,x1 // h+=X[i]
460
orr x17,x17,x19 // Ch(e,f,g)
461
eor x19,x22,x23 // a^b, b^c in next round
462
eor x16,x16,x9,ror#18 // Sigma1(e)
463
ror x9,x22,#28
464
add x21,x21,x17 // h+=Ch(e,f,g)
465
eor x17,x22,x22,ror#5
466
add x21,x21,x16 // h+=Sigma1(e)
467
and x28,x28,x19 // (b^c)&=(a^b)
468
add x25,x25,x21 // d+=h
469
eor x28,x28,x23 // Maj(a,b,c)
470
eor x17,x9,x17,ror#34 // Sigma0(a)
471
add x21,x21,x28 // h+=Maj(a,b,c)
472
ldr x28,[x30],#8 // *K++, x19 in next round
473
//add x21,x21,x17 // h+=Sigma0(a)
474
#ifndef __AARCH64EB__
475
rev x2,x2 // 15
476
#endif
477
ldr x7,[sp,#0]
478
add x21,x21,x17 // h+=Sigma0(a)
479
str x10,[sp,#24]
480
ror x16,x25,#14
481
add x20,x20,x28 // h+=K[i]
482
ror x9,x4,#1
483
and x17,x26,x25
484
ror x8,x1,#19
485
bic x28,x27,x25
486
ror x10,x21,#28
487
add x20,x20,x2 // h+=X[i]
488
eor x16,x16,x25,ror#18
489
eor x9,x9,x4,ror#8
490
orr x17,x17,x28 // Ch(e,f,g)
491
eor x28,x21,x22 // a^b, b^c in next round
492
eor x16,x16,x25,ror#41 // Sigma1(e)
493
eor x10,x10,x21,ror#34
494
add x20,x20,x17 // h+=Ch(e,f,g)
495
and x19,x19,x28 // (b^c)&=(a^b)
496
eor x8,x8,x1,ror#61
497
eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
498
add x20,x20,x16 // h+=Sigma1(e)
499
eor x19,x19,x22 // Maj(a,b,c)
500
eor x17,x10,x21,ror#39 // Sigma0(a)
501
eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
502
add x3,x3,x12
503
add x24,x24,x20 // d+=h
504
add x20,x20,x19 // h+=Maj(a,b,c)
505
ldr x19,[x30],#8 // *K++, x28 in next round
506
add x3,x3,x9
507
add x20,x20,x17 // h+=Sigma0(a)
508
add x3,x3,x8
509
.Loop_16_xx:
510
ldr x8,[sp,#8]
511
str x11,[sp,#0]
512
ror x16,x24,#14
513
add x27,x27,x19 // h+=K[i]
514
ror x10,x5,#1
515
and x17,x25,x24
516
ror x9,x2,#19
517
bic x19,x26,x24
518
ror x11,x20,#28
519
add x27,x27,x3 // h+=X[i]
520
eor x16,x16,x24,ror#18
521
eor x10,x10,x5,ror#8
522
orr x17,x17,x19 // Ch(e,f,g)
523
eor x19,x20,x21 // a^b, b^c in next round
524
eor x16,x16,x24,ror#41 // Sigma1(e)
525
eor x11,x11,x20,ror#34
526
add x27,x27,x17 // h+=Ch(e,f,g)
527
and x28,x28,x19 // (b^c)&=(a^b)
528
eor x9,x9,x2,ror#61
529
eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
530
add x27,x27,x16 // h+=Sigma1(e)
531
eor x28,x28,x21 // Maj(a,b,c)
532
eor x17,x11,x20,ror#39 // Sigma0(a)
533
eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
534
add x4,x4,x13
535
add x23,x23,x27 // d+=h
536
add x27,x27,x28 // h+=Maj(a,b,c)
537
ldr x28,[x30],#8 // *K++, x19 in next round
538
add x4,x4,x10
539
add x27,x27,x17 // h+=Sigma0(a)
540
add x4,x4,x9
541
ldr x9,[sp,#16]
542
str x12,[sp,#8]
543
ror x16,x23,#14
544
add x26,x26,x28 // h+=K[i]
545
ror x11,x6,#1
546
and x17,x24,x23
547
ror x10,x3,#19
548
bic x28,x25,x23
549
ror x12,x27,#28
550
add x26,x26,x4 // h+=X[i]
551
eor x16,x16,x23,ror#18
552
eor x11,x11,x6,ror#8
553
orr x17,x17,x28 // Ch(e,f,g)
554
eor x28,x27,x20 // a^b, b^c in next round
555
eor x16,x16,x23,ror#41 // Sigma1(e)
556
eor x12,x12,x27,ror#34
557
add x26,x26,x17 // h+=Ch(e,f,g)
558
and x19,x19,x28 // (b^c)&=(a^b)
559
eor x10,x10,x3,ror#61
560
eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
561
add x26,x26,x16 // h+=Sigma1(e)
562
eor x19,x19,x20 // Maj(a,b,c)
563
eor x17,x12,x27,ror#39 // Sigma0(a)
564
eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
565
add x5,x5,x14
566
add x22,x22,x26 // d+=h
567
add x26,x26,x19 // h+=Maj(a,b,c)
568
ldr x19,[x30],#8 // *K++, x28 in next round
569
add x5,x5,x11
570
add x26,x26,x17 // h+=Sigma0(a)
571
add x5,x5,x10
572
ldr x10,[sp,#24]
573
str x13,[sp,#16]
574
ror x16,x22,#14
575
add x25,x25,x19 // h+=K[i]
576
ror x12,x7,#1
577
and x17,x23,x22
578
ror x11,x4,#19
579
bic x19,x24,x22
580
ror x13,x26,#28
581
add x25,x25,x5 // h+=X[i]
582
eor x16,x16,x22,ror#18
583
eor x12,x12,x7,ror#8
584
orr x17,x17,x19 // Ch(e,f,g)
585
eor x19,x26,x27 // a^b, b^c in next round
586
eor x16,x16,x22,ror#41 // Sigma1(e)
587
eor x13,x13,x26,ror#34
588
add x25,x25,x17 // h+=Ch(e,f,g)
589
and x28,x28,x19 // (b^c)&=(a^b)
590
eor x11,x11,x4,ror#61
591
eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
592
add x25,x25,x16 // h+=Sigma1(e)
593
eor x28,x28,x27 // Maj(a,b,c)
594
eor x17,x13,x26,ror#39 // Sigma0(a)
595
eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
596
add x6,x6,x15
597
add x21,x21,x25 // d+=h
598
add x25,x25,x28 // h+=Maj(a,b,c)
599
ldr x28,[x30],#8 // *K++, x19 in next round
600
add x6,x6,x12
601
add x25,x25,x17 // h+=Sigma0(a)
602
add x6,x6,x11
603
ldr x11,[sp,#0]
604
str x14,[sp,#24]
605
ror x16,x21,#14
606
add x24,x24,x28 // h+=K[i]
607
ror x13,x8,#1
608
and x17,x22,x21
609
ror x12,x5,#19
610
bic x28,x23,x21
611
ror x14,x25,#28
612
add x24,x24,x6 // h+=X[i]
613
eor x16,x16,x21,ror#18
614
eor x13,x13,x8,ror#8
615
orr x17,x17,x28 // Ch(e,f,g)
616
eor x28,x25,x26 // a^b, b^c in next round
617
eor x16,x16,x21,ror#41 // Sigma1(e)
618
eor x14,x14,x25,ror#34
619
add x24,x24,x17 // h+=Ch(e,f,g)
620
and x19,x19,x28 // (b^c)&=(a^b)
621
eor x12,x12,x5,ror#61
622
eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
623
add x24,x24,x16 // h+=Sigma1(e)
624
eor x19,x19,x26 // Maj(a,b,c)
625
eor x17,x14,x25,ror#39 // Sigma0(a)
626
eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
627
add x7,x7,x0
628
add x20,x20,x24 // d+=h
629
add x24,x24,x19 // h+=Maj(a,b,c)
630
ldr x19,[x30],#8 // *K++, x28 in next round
631
add x7,x7,x13
632
add x24,x24,x17 // h+=Sigma0(a)
633
add x7,x7,x12
634
ldr x12,[sp,#8]
635
str x15,[sp,#0]
636
ror x16,x20,#14
637
add x23,x23,x19 // h+=K[i]
638
ror x14,x9,#1
639
and x17,x21,x20
640
ror x13,x6,#19
641
bic x19,x22,x20
642
ror x15,x24,#28
643
add x23,x23,x7 // h+=X[i]
644
eor x16,x16,x20,ror#18
645
eor x14,x14,x9,ror#8
646
orr x17,x17,x19 // Ch(e,f,g)
647
eor x19,x24,x25 // a^b, b^c in next round
648
eor x16,x16,x20,ror#41 // Sigma1(e)
649
eor x15,x15,x24,ror#34
650
add x23,x23,x17 // h+=Ch(e,f,g)
651
and x28,x28,x19 // (b^c)&=(a^b)
652
eor x13,x13,x6,ror#61
653
eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
654
add x23,x23,x16 // h+=Sigma1(e)
655
eor x28,x28,x25 // Maj(a,b,c)
656
eor x17,x15,x24,ror#39 // Sigma0(a)
657
eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
658
add x8,x8,x1
659
add x27,x27,x23 // d+=h
660
add x23,x23,x28 // h+=Maj(a,b,c)
661
ldr x28,[x30],#8 // *K++, x19 in next round
662
add x8,x8,x14
663
add x23,x23,x17 // h+=Sigma0(a)
664
add x8,x8,x13
665
ldr x13,[sp,#16]
666
str x0,[sp,#8]
667
ror x16,x27,#14
668
add x22,x22,x28 // h+=K[i]
669
ror x15,x10,#1
670
and x17,x20,x27
671
ror x14,x7,#19
672
bic x28,x21,x27
673
ror x0,x23,#28
674
add x22,x22,x8 // h+=X[i]
675
eor x16,x16,x27,ror#18
676
eor x15,x15,x10,ror#8
677
orr x17,x17,x28 // Ch(e,f,g)
678
eor x28,x23,x24 // a^b, b^c in next round
679
eor x16,x16,x27,ror#41 // Sigma1(e)
680
eor x0,x0,x23,ror#34
681
add x22,x22,x17 // h+=Ch(e,f,g)
682
and x19,x19,x28 // (b^c)&=(a^b)
683
eor x14,x14,x7,ror#61
684
eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
685
add x22,x22,x16 // h+=Sigma1(e)
686
eor x19,x19,x24 // Maj(a,b,c)
687
eor x17,x0,x23,ror#39 // Sigma0(a)
688
eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
689
add x9,x9,x2
690
add x26,x26,x22 // d+=h
691
add x22,x22,x19 // h+=Maj(a,b,c)
692
ldr x19,[x30],#8 // *K++, x28 in next round
693
add x9,x9,x15
694
add x22,x22,x17 // h+=Sigma0(a)
695
add x9,x9,x14
696
ldr x14,[sp,#24]
697
str x1,[sp,#16]
698
ror x16,x26,#14
699
add x21,x21,x19 // h+=K[i]
700
ror x0,x11,#1
701
and x17,x27,x26
702
ror x15,x8,#19
703
bic x19,x20,x26
704
ror x1,x22,#28
705
add x21,x21,x9 // h+=X[i]
706
eor x16,x16,x26,ror#18
707
eor x0,x0,x11,ror#8
708
orr x17,x17,x19 // Ch(e,f,g)
709
eor x19,x22,x23 // a^b, b^c in next round
710
eor x16,x16,x26,ror#41 // Sigma1(e)
711
eor x1,x1,x22,ror#34
712
add x21,x21,x17 // h+=Ch(e,f,g)
713
and x28,x28,x19 // (b^c)&=(a^b)
714
eor x15,x15,x8,ror#61
715
eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
716
add x21,x21,x16 // h+=Sigma1(e)
717
eor x28,x28,x23 // Maj(a,b,c)
718
eor x17,x1,x22,ror#39 // Sigma0(a)
719
eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
720
add x10,x10,x3
721
add x25,x25,x21 // d+=h
722
add x21,x21,x28 // h+=Maj(a,b,c)
723
ldr x28,[x30],#8 // *K++, x19 in next round
724
add x10,x10,x0
725
add x21,x21,x17 // h+=Sigma0(a)
726
add x10,x10,x15
727
ldr x15,[sp,#0]
728
str x2,[sp,#24]
729
ror x16,x25,#14
730
add x20,x20,x28 // h+=K[i]
731
ror x1,x12,#1
732
and x17,x26,x25
733
ror x0,x9,#19
734
bic x28,x27,x25
735
ror x2,x21,#28
736
add x20,x20,x10 // h+=X[i]
737
eor x16,x16,x25,ror#18
738
eor x1,x1,x12,ror#8
739
orr x17,x17,x28 // Ch(e,f,g)
740
eor x28,x21,x22 // a^b, b^c in next round
741
eor x16,x16,x25,ror#41 // Sigma1(e)
742
eor x2,x2,x21,ror#34
743
add x20,x20,x17 // h+=Ch(e,f,g)
744
and x19,x19,x28 // (b^c)&=(a^b)
745
eor x0,x0,x9,ror#61
746
eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
747
add x20,x20,x16 // h+=Sigma1(e)
748
eor x19,x19,x22 // Maj(a,b,c)
749
eor x17,x2,x21,ror#39 // Sigma0(a)
750
eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
751
add x11,x11,x4
752
add x24,x24,x20 // d+=h
753
add x20,x20,x19 // h+=Maj(a,b,c)
754
ldr x19,[x30],#8 // *K++, x28 in next round
755
add x11,x11,x1
756
add x20,x20,x17 // h+=Sigma0(a)
757
add x11,x11,x0
758
ldr x0,[sp,#8]
759
str x3,[sp,#0]
760
ror x16,x24,#14
761
add x27,x27,x19 // h+=K[i]
762
ror x2,x13,#1
763
and x17,x25,x24
764
ror x1,x10,#19
765
bic x19,x26,x24
766
ror x3,x20,#28
767
add x27,x27,x11 // h+=X[i]
768
eor x16,x16,x24,ror#18
769
eor x2,x2,x13,ror#8
770
orr x17,x17,x19 // Ch(e,f,g)
771
eor x19,x20,x21 // a^b, b^c in next round
772
eor x16,x16,x24,ror#41 // Sigma1(e)
773
eor x3,x3,x20,ror#34
774
add x27,x27,x17 // h+=Ch(e,f,g)
775
and x28,x28,x19 // (b^c)&=(a^b)
776
eor x1,x1,x10,ror#61
777
eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
778
add x27,x27,x16 // h+=Sigma1(e)
779
eor x28,x28,x21 // Maj(a,b,c)
780
eor x17,x3,x20,ror#39 // Sigma0(a)
781
eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
782
add x12,x12,x5
783
add x23,x23,x27 // d+=h
784
add x27,x27,x28 // h+=Maj(a,b,c)
785
ldr x28,[x30],#8 // *K++, x19 in next round
786
add x12,x12,x2
787
add x27,x27,x17 // h+=Sigma0(a)
788
add x12,x12,x1
789
ldr x1,[sp,#16]
790
str x4,[sp,#8]
791
ror x16,x23,#14
792
add x26,x26,x28 // h+=K[i]
793
ror x3,x14,#1
794
and x17,x24,x23
795
ror x2,x11,#19
796
bic x28,x25,x23
797
ror x4,x27,#28
798
add x26,x26,x12 // h+=X[i]
799
eor x16,x16,x23,ror#18
800
eor x3,x3,x14,ror#8
801
orr x17,x17,x28 // Ch(e,f,g)
802
eor x28,x27,x20 // a^b, b^c in next round
803
eor x16,x16,x23,ror#41 // Sigma1(e)
804
eor x4,x4,x27,ror#34
805
add x26,x26,x17 // h+=Ch(e,f,g)
806
and x19,x19,x28 // (b^c)&=(a^b)
807
eor x2,x2,x11,ror#61
808
eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
809
add x26,x26,x16 // h+=Sigma1(e)
810
eor x19,x19,x20 // Maj(a,b,c)
811
eor x17,x4,x27,ror#39 // Sigma0(a)
812
eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
813
add x13,x13,x6
814
add x22,x22,x26 // d+=h
815
add x26,x26,x19 // h+=Maj(a,b,c)
816
ldr x19,[x30],#8 // *K++, x28 in next round
817
add x13,x13,x3
818
add x26,x26,x17 // h+=Sigma0(a)
819
add x13,x13,x2
820
ldr x2,[sp,#24]
821
str x5,[sp,#16]
822
ror x16,x22,#14
823
add x25,x25,x19 // h+=K[i]
824
ror x4,x15,#1
825
and x17,x23,x22
826
ror x3,x12,#19
827
bic x19,x24,x22
828
ror x5,x26,#28
829
add x25,x25,x13 // h+=X[i]
830
eor x16,x16,x22,ror#18
831
eor x4,x4,x15,ror#8
832
orr x17,x17,x19 // Ch(e,f,g)
833
eor x19,x26,x27 // a^b, b^c in next round
834
eor x16,x16,x22,ror#41 // Sigma1(e)
835
eor x5,x5,x26,ror#34
836
add x25,x25,x17 // h+=Ch(e,f,g)
837
and x28,x28,x19 // (b^c)&=(a^b)
838
eor x3,x3,x12,ror#61
839
eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
840
add x25,x25,x16 // h+=Sigma1(e)
841
eor x28,x28,x27 // Maj(a,b,c)
842
eor x17,x5,x26,ror#39 // Sigma0(a)
843
eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
844
add x14,x14,x7
845
add x21,x21,x25 // d+=h
846
add x25,x25,x28 // h+=Maj(a,b,c)
847
ldr x28,[x30],#8 // *K++, x19 in next round
848
add x14,x14,x4
849
add x25,x25,x17 // h+=Sigma0(a)
850
add x14,x14,x3
851
ldr x3,[sp,#0]
852
str x6,[sp,#24]
853
ror x16,x21,#14
854
add x24,x24,x28 // h+=K[i]
855
ror x5,x0,#1
856
and x17,x22,x21
857
ror x4,x13,#19
858
bic x28,x23,x21
859
ror x6,x25,#28
860
add x24,x24,x14 // h+=X[i]
861
eor x16,x16,x21,ror#18
862
eor x5,x5,x0,ror#8
863
orr x17,x17,x28 // Ch(e,f,g)
864
eor x28,x25,x26 // a^b, b^c in next round
865
eor x16,x16,x21,ror#41 // Sigma1(e)
866
eor x6,x6,x25,ror#34
867
add x24,x24,x17 // h+=Ch(e,f,g)
868
and x19,x19,x28 // (b^c)&=(a^b)
869
eor x4,x4,x13,ror#61
870
eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
871
add x24,x24,x16 // h+=Sigma1(e)
872
eor x19,x19,x26 // Maj(a,b,c)
873
eor x17,x6,x25,ror#39 // Sigma0(a)
874
eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
875
add x15,x15,x8
876
add x20,x20,x24 // d+=h
877
add x24,x24,x19 // h+=Maj(a,b,c)
878
ldr x19,[x30],#8 // *K++, x28 in next round
879
add x15,x15,x5
880
add x24,x24,x17 // h+=Sigma0(a)
881
add x15,x15,x4
882
ldr x4,[sp,#8]
883
str x7,[sp,#0]
884
ror x16,x20,#14
885
add x23,x23,x19 // h+=K[i]
886
ror x6,x1,#1
887
and x17,x21,x20
888
ror x5,x14,#19
889
bic x19,x22,x20
890
ror x7,x24,#28
891
add x23,x23,x15 // h+=X[i]
892
eor x16,x16,x20,ror#18
893
eor x6,x6,x1,ror#8
894
orr x17,x17,x19 // Ch(e,f,g)
895
eor x19,x24,x25 // a^b, b^c in next round
896
eor x16,x16,x20,ror#41 // Sigma1(e)
897
eor x7,x7,x24,ror#34
898
add x23,x23,x17 // h+=Ch(e,f,g)
899
and x28,x28,x19 // (b^c)&=(a^b)
900
eor x5,x5,x14,ror#61
901
eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
902
add x23,x23,x16 // h+=Sigma1(e)
903
eor x28,x28,x25 // Maj(a,b,c)
904
eor x17,x7,x24,ror#39 // Sigma0(a)
905
eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
906
add x0,x0,x9
907
add x27,x27,x23 // d+=h
908
add x23,x23,x28 // h+=Maj(a,b,c)
909
ldr x28,[x30],#8 // *K++, x19 in next round
910
add x0,x0,x6
911
add x23,x23,x17 // h+=Sigma0(a)
912
add x0,x0,x5
913
ldr x5,[sp,#16]
914
str x8,[sp,#8]
915
ror x16,x27,#14
916
add x22,x22,x28 // h+=K[i]
917
ror x7,x2,#1
918
and x17,x20,x27
919
ror x6,x15,#19
920
bic x28,x21,x27
921
ror x8,x23,#28
922
add x22,x22,x0 // h+=X[i]
923
eor x16,x16,x27,ror#18
924
eor x7,x7,x2,ror#8
925
orr x17,x17,x28 // Ch(e,f,g)
926
eor x28,x23,x24 // a^b, b^c in next round
927
eor x16,x16,x27,ror#41 // Sigma1(e)
928
eor x8,x8,x23,ror#34
929
add x22,x22,x17 // h+=Ch(e,f,g)
930
and x19,x19,x28 // (b^c)&=(a^b)
931
eor x6,x6,x15,ror#61
932
eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
933
add x22,x22,x16 // h+=Sigma1(e)
934
eor x19,x19,x24 // Maj(a,b,c)
935
eor x17,x8,x23,ror#39 // Sigma0(a)
936
eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
937
add x1,x1,x10
938
add x26,x26,x22 // d+=h
939
add x22,x22,x19 // h+=Maj(a,b,c)
940
ldr x19,[x30],#8 // *K++, x28 in next round
941
add x1,x1,x7
942
add x22,x22,x17 // h+=Sigma0(a)
943
add x1,x1,x6
944
ldr x6,[sp,#24]
945
str x9,[sp,#16]
946
ror x16,x26,#14
947
add x21,x21,x19 // h+=K[i]
948
ror x8,x3,#1
949
and x17,x27,x26
950
ror x7,x0,#19
951
bic x19,x20,x26
952
ror x9,x22,#28
953
add x21,x21,x1 // h+=X[i]
954
eor x16,x16,x26,ror#18
955
eor x8,x8,x3,ror#8
956
orr x17,x17,x19 // Ch(e,f,g)
957
eor x19,x22,x23 // a^b, b^c in next round
958
eor x16,x16,x26,ror#41 // Sigma1(e)
959
eor x9,x9,x22,ror#34
960
add x21,x21,x17 // h+=Ch(e,f,g)
961
and x28,x28,x19 // (b^c)&=(a^b)
962
eor x7,x7,x0,ror#61
963
eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
964
add x21,x21,x16 // h+=Sigma1(e)
965
eor x28,x28,x23 // Maj(a,b,c)
966
eor x17,x9,x22,ror#39 // Sigma0(a)
967
eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
968
add x2,x2,x11
969
add x25,x25,x21 // d+=h
970
add x21,x21,x28 // h+=Maj(a,b,c)
971
ldr x28,[x30],#8 // *K++, x19 in next round
972
add x2,x2,x8
973
add x21,x21,x17 // h+=Sigma0(a)
974
add x2,x2,x7
975
ldr x7,[sp,#0]
976
str x10,[sp,#24]
977
ror x16,x25,#14
978
add x20,x20,x28 // h+=K[i]
979
ror x9,x4,#1
980
and x17,x26,x25
981
ror x8,x1,#19
982
bic x28,x27,x25
983
ror x10,x21,#28
984
add x20,x20,x2 // h+=X[i]
985
eor x16,x16,x25,ror#18
986
eor x9,x9,x4,ror#8
987
orr x17,x17,x28 // Ch(e,f,g)
988
eor x28,x21,x22 // a^b, b^c in next round
989
eor x16,x16,x25,ror#41 // Sigma1(e)
990
eor x10,x10,x21,ror#34
991
add x20,x20,x17 // h+=Ch(e,f,g)
992
and x19,x19,x28 // (b^c)&=(a^b)
993
eor x8,x8,x1,ror#61
994
eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
995
add x20,x20,x16 // h+=Sigma1(e)
996
eor x19,x19,x22 // Maj(a,b,c)
997
eor x17,x10,x21,ror#39 // Sigma0(a)
998
eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
999
add x3,x3,x12
1000
add x24,x24,x20 // d+=h
1001
add x20,x20,x19 // h+=Maj(a,b,c)
1002
ldr x19,[x30],#8 // *K++, x28 in next round
1003
add x3,x3,x9
1004
add x20,x20,x17 // h+=Sigma0(a)
1005
add x3,x3,x8
1006
cbnz x19,.Loop_16_xx
1007
1008
ldp x0,x2,[x29,#96]
1009
ldr x1,[x29,#112]
1010
sub x30,x30,#648 // rewind
1011
1012
ldp x3,x4,[x0]
1013
ldp x5,x6,[x0,#2*8]
1014
add x1,x1,#14*8 // advance input pointer
1015
ldp x7,x8,[x0,#4*8]
1016
add x20,x20,x3
1017
ldp x9,x10,[x0,#6*8]
1018
add x21,x21,x4
1019
add x22,x22,x5
1020
add x23,x23,x6
1021
stp x20,x21,[x0]
1022
add x24,x24,x7
1023
add x25,x25,x8
1024
stp x22,x23,[x0,#2*8]
1025
add x26,x26,x9
1026
add x27,x27,x10
1027
cmp x1,x2
1028
stp x24,x25,[x0,#4*8]
1029
stp x26,x27,[x0,#6*8]
1030
b.ne .Loop
1031
1032
ldp x19,x20,[x29,#16]
1033
add sp,sp,#4*8
1034
ldp x21,x22,[x29,#32]
1035
ldp x23,x24,[x29,#48]
1036
ldp x25,x26,[x29,#64]
1037
ldp x27,x28,[x29,#80]
1038
ldp x29,x30,[sp],#128
1039
AARCH64_VALIDATE_LINK_REGISTER
1040
ret
1041
.size sha512_block_data_order,.-sha512_block_data_order
1042
1043
.section .rodata
1044
1045
.align 6
1046
.type .LK512,%object
1047
.LK512:
1048
.quad 0x428a2f98d728ae22,0x7137449123ef65cd
1049
.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1050
.quad 0x3956c25bf348b538,0x59f111f1b605d019
1051
.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1052
.quad 0xd807aa98a3030242,0x12835b0145706fbe
1053
.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1054
.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1055
.quad 0x9bdc06a725c71235,0xc19bf174cf692694
1056
.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1057
.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1058
.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1059
.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1060
.quad 0x983e5152ee66dfab,0xa831c66d2db43210
1061
.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
1062
.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
1063
.quad 0x06ca6351e003826f,0x142929670a0e6e70
1064
.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
1065
.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1066
.quad 0x650a73548baf63de,0x766a0abb3c77b2a8
1067
.quad 0x81c2c92e47edaee6,0x92722c851482353b
1068
.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
1069
.quad 0xc24b8b70d0f89791,0xc76c51a30654be30
1070
.quad 0xd192e819d6ef5218,0xd69906245565a910
1071
.quad 0xf40e35855771202a,0x106aa07032bbd1b8
1072
.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1073
.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1074
.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1075
.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1076
.quad 0x748f82ee5defb2fc,0x78a5636f43172f60
1077
.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
1078
.quad 0x90befffa23631e28,0xa4506cebde82bde9
1079
.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
1080
.quad 0xca273eceea26619c,0xd186b8c721c0c207
1081
.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1082
.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
1083
.quad 0x113f9804bef90dae,0x1b710b35131c471b
1084
.quad 0x28db77f523047d84,0x32caab7b40c72493
1085
.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1086
.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1087
.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1088
.quad 0 // terminator
1089
.size .LK512,.-.LK512
1090
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1091
.align 2
1092
.align 2
1093
1094
.text
1095
#ifndef __KERNEL__
1096
.type sha512_block_armv8,%function
1097
.align 6
1098
sha512_block_armv8:
1099
.Lv8_entry:
1100
// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
1101
stp x29,x30,[sp,#-16]!
1102
add x29,sp,#0
1103
1104
ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input
1105
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
1106
1107
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context
1108
adrp x3,.LK512
1109
add x3,x3,#:lo12:.LK512
1110
1111
rev64 v16.16b,v16.16b
1112
rev64 v17.16b,v17.16b
1113
rev64 v18.16b,v18.16b
1114
rev64 v19.16b,v19.16b
1115
rev64 v20.16b,v20.16b
1116
rev64 v21.16b,v21.16b
1117
rev64 v22.16b,v22.16b
1118
rev64 v23.16b,v23.16b
1119
b .Loop_hw
1120
1121
.align 4
1122
.Loop_hw:
1123
ld1 {v24.2d},[x3],#16
1124
subs x2,x2,#1
1125
sub x4,x1,#128
1126
orr v26.16b,v0.16b,v0.16b // offload
1127
orr v27.16b,v1.16b,v1.16b
1128
orr v28.16b,v2.16b,v2.16b
1129
orr v29.16b,v3.16b,v3.16b
1130
csel x1,x1,x4,ne // conditional rewind
1131
add v24.2d,v24.2d,v16.2d
1132
ld1 {v25.2d},[x3],#16
1133
ext v24.16b,v24.16b,v24.16b,#8
1134
ext v5.16b,v2.16b,v3.16b,#8
1135
ext v6.16b,v1.16b,v2.16b,#8
1136
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1137
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1138
ext v7.16b,v20.16b,v21.16b,#8
1139
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1140
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1141
add v4.2d,v1.2d,v3.2d // "D + T1"
1142
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1143
add v25.2d,v25.2d,v17.2d
1144
ld1 {v24.2d},[x3],#16
1145
ext v25.16b,v25.16b,v25.16b,#8
1146
ext v5.16b,v4.16b,v2.16b,#8
1147
ext v6.16b,v0.16b,v4.16b,#8
1148
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1149
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1150
ext v7.16b,v21.16b,v22.16b,#8
1151
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1152
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1153
add v1.2d,v0.2d,v2.2d // "D + T1"
1154
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1155
add v24.2d,v24.2d,v18.2d
1156
ld1 {v25.2d},[x3],#16
1157
ext v24.16b,v24.16b,v24.16b,#8
1158
ext v5.16b,v1.16b,v4.16b,#8
1159
ext v6.16b,v3.16b,v1.16b,#8
1160
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1161
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1162
ext v7.16b,v22.16b,v23.16b,#8
1163
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1164
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1165
add v0.2d,v3.2d,v4.2d // "D + T1"
1166
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1167
add v25.2d,v25.2d,v19.2d
1168
ld1 {v24.2d},[x3],#16
1169
ext v25.16b,v25.16b,v25.16b,#8
1170
ext v5.16b,v0.16b,v1.16b,#8
1171
ext v6.16b,v2.16b,v0.16b,#8
1172
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1173
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1174
ext v7.16b,v23.16b,v16.16b,#8
1175
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1176
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1177
add v3.2d,v2.2d,v1.2d // "D + T1"
1178
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1179
add v24.2d,v24.2d,v20.2d
1180
ld1 {v25.2d},[x3],#16
1181
ext v24.16b,v24.16b,v24.16b,#8
1182
ext v5.16b,v3.16b,v0.16b,#8
1183
ext v6.16b,v4.16b,v3.16b,#8
1184
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1185
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1186
ext v7.16b,v16.16b,v17.16b,#8
1187
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1188
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1189
add v2.2d,v4.2d,v0.2d // "D + T1"
1190
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1191
add v25.2d,v25.2d,v21.2d
1192
ld1 {v24.2d},[x3],#16
1193
ext v25.16b,v25.16b,v25.16b,#8
1194
ext v5.16b,v2.16b,v3.16b,#8
1195
ext v6.16b,v1.16b,v2.16b,#8
1196
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1197
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1198
ext v7.16b,v17.16b,v18.16b,#8
1199
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1200
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1201
add v4.2d,v1.2d,v3.2d // "D + T1"
1202
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1203
add v24.2d,v24.2d,v22.2d
1204
ld1 {v25.2d},[x3],#16
1205
ext v24.16b,v24.16b,v24.16b,#8
1206
ext v5.16b,v4.16b,v2.16b,#8
1207
ext v6.16b,v0.16b,v4.16b,#8
1208
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1209
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1210
ext v7.16b,v18.16b,v19.16b,#8
1211
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1212
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1213
add v1.2d,v0.2d,v2.2d // "D + T1"
1214
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1215
add v25.2d,v25.2d,v23.2d
1216
ld1 {v24.2d},[x3],#16
1217
ext v25.16b,v25.16b,v25.16b,#8
1218
ext v5.16b,v1.16b,v4.16b,#8
1219
ext v6.16b,v3.16b,v1.16b,#8
1220
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1221
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1222
ext v7.16b,v19.16b,v20.16b,#8
1223
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1224
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1225
add v0.2d,v3.2d,v4.2d // "D + T1"
1226
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1227
add v24.2d,v24.2d,v16.2d
1228
ld1 {v25.2d},[x3],#16
1229
ext v24.16b,v24.16b,v24.16b,#8
1230
ext v5.16b,v0.16b,v1.16b,#8
1231
ext v6.16b,v2.16b,v0.16b,#8
1232
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1233
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1234
ext v7.16b,v20.16b,v21.16b,#8
1235
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1236
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1237
add v3.2d,v2.2d,v1.2d // "D + T1"
1238
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1239
add v25.2d,v25.2d,v17.2d
1240
ld1 {v24.2d},[x3],#16
1241
ext v25.16b,v25.16b,v25.16b,#8
1242
ext v5.16b,v3.16b,v0.16b,#8
1243
ext v6.16b,v4.16b,v3.16b,#8
1244
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1245
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1246
ext v7.16b,v21.16b,v22.16b,#8
1247
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1248
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1249
add v2.2d,v4.2d,v0.2d // "D + T1"
1250
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1251
add v24.2d,v24.2d,v18.2d
1252
ld1 {v25.2d},[x3],#16
1253
ext v24.16b,v24.16b,v24.16b,#8
1254
ext v5.16b,v2.16b,v3.16b,#8
1255
ext v6.16b,v1.16b,v2.16b,#8
1256
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1257
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1258
ext v7.16b,v22.16b,v23.16b,#8
1259
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1260
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1261
add v4.2d,v1.2d,v3.2d // "D + T1"
1262
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1263
add v25.2d,v25.2d,v19.2d
1264
ld1 {v24.2d},[x3],#16
1265
ext v25.16b,v25.16b,v25.16b,#8
1266
ext v5.16b,v4.16b,v2.16b,#8
1267
ext v6.16b,v0.16b,v4.16b,#8
1268
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1269
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1270
ext v7.16b,v23.16b,v16.16b,#8
1271
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1272
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1273
add v1.2d,v0.2d,v2.2d // "D + T1"
1274
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1275
add v24.2d,v24.2d,v20.2d
1276
ld1 {v25.2d},[x3],#16
1277
ext v24.16b,v24.16b,v24.16b,#8
1278
ext v5.16b,v1.16b,v4.16b,#8
1279
ext v6.16b,v3.16b,v1.16b,#8
1280
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1281
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1282
ext v7.16b,v16.16b,v17.16b,#8
1283
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1284
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1285
add v0.2d,v3.2d,v4.2d // "D + T1"
1286
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1287
add v25.2d,v25.2d,v21.2d
1288
ld1 {v24.2d},[x3],#16
1289
ext v25.16b,v25.16b,v25.16b,#8
1290
ext v5.16b,v0.16b,v1.16b,#8
1291
ext v6.16b,v2.16b,v0.16b,#8
1292
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1293
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1294
ext v7.16b,v17.16b,v18.16b,#8
1295
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1296
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1297
add v3.2d,v2.2d,v1.2d // "D + T1"
1298
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1299
add v24.2d,v24.2d,v22.2d
1300
ld1 {v25.2d},[x3],#16
1301
ext v24.16b,v24.16b,v24.16b,#8
1302
ext v5.16b,v3.16b,v0.16b,#8
1303
ext v6.16b,v4.16b,v3.16b,#8
1304
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1305
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1306
ext v7.16b,v18.16b,v19.16b,#8
1307
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1308
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1309
add v2.2d,v4.2d,v0.2d // "D + T1"
1310
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1311
add v25.2d,v25.2d,v23.2d
1312
ld1 {v24.2d},[x3],#16
1313
ext v25.16b,v25.16b,v25.16b,#8
1314
ext v5.16b,v2.16b,v3.16b,#8
1315
ext v6.16b,v1.16b,v2.16b,#8
1316
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1317
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1318
ext v7.16b,v19.16b,v20.16b,#8
1319
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1320
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1321
add v4.2d,v1.2d,v3.2d // "D + T1"
1322
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1323
add v24.2d,v24.2d,v16.2d
1324
ld1 {v25.2d},[x3],#16
1325
ext v24.16b,v24.16b,v24.16b,#8
1326
ext v5.16b,v4.16b,v2.16b,#8
1327
ext v6.16b,v0.16b,v4.16b,#8
1328
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1329
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1330
ext v7.16b,v20.16b,v21.16b,#8
1331
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1332
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1333
add v1.2d,v0.2d,v2.2d // "D + T1"
1334
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1335
add v25.2d,v25.2d,v17.2d
1336
ld1 {v24.2d},[x3],#16
1337
ext v25.16b,v25.16b,v25.16b,#8
1338
ext v5.16b,v1.16b,v4.16b,#8
1339
ext v6.16b,v3.16b,v1.16b,#8
1340
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1341
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1342
ext v7.16b,v21.16b,v22.16b,#8
1343
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1344
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1345
add v0.2d,v3.2d,v4.2d // "D + T1"
1346
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1347
add v24.2d,v24.2d,v18.2d
1348
ld1 {v25.2d},[x3],#16
1349
ext v24.16b,v24.16b,v24.16b,#8
1350
ext v5.16b,v0.16b,v1.16b,#8
1351
ext v6.16b,v2.16b,v0.16b,#8
1352
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1353
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1354
ext v7.16b,v22.16b,v23.16b,#8
1355
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1356
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1357
add v3.2d,v2.2d,v1.2d // "D + T1"
1358
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1359
add v25.2d,v25.2d,v19.2d
1360
ld1 {v24.2d},[x3],#16
1361
ext v25.16b,v25.16b,v25.16b,#8
1362
ext v5.16b,v3.16b,v0.16b,#8
1363
ext v6.16b,v4.16b,v3.16b,#8
1364
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1365
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1366
ext v7.16b,v23.16b,v16.16b,#8
1367
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1368
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1369
add v2.2d,v4.2d,v0.2d // "D + T1"
1370
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1371
add v24.2d,v24.2d,v20.2d
1372
ld1 {v25.2d},[x3],#16
1373
ext v24.16b,v24.16b,v24.16b,#8
1374
ext v5.16b,v2.16b,v3.16b,#8
1375
ext v6.16b,v1.16b,v2.16b,#8
1376
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1377
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1378
ext v7.16b,v16.16b,v17.16b,#8
1379
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1380
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1381
add v4.2d,v1.2d,v3.2d // "D + T1"
1382
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1383
add v25.2d,v25.2d,v21.2d
1384
ld1 {v24.2d},[x3],#16
1385
ext v25.16b,v25.16b,v25.16b,#8
1386
ext v5.16b,v4.16b,v2.16b,#8
1387
ext v6.16b,v0.16b,v4.16b,#8
1388
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1389
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1390
ext v7.16b,v17.16b,v18.16b,#8
1391
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1392
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1393
add v1.2d,v0.2d,v2.2d // "D + T1"
1394
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1395
add v24.2d,v24.2d,v22.2d
1396
ld1 {v25.2d},[x3],#16
1397
ext v24.16b,v24.16b,v24.16b,#8
1398
ext v5.16b,v1.16b,v4.16b,#8
1399
ext v6.16b,v3.16b,v1.16b,#8
1400
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1401
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1402
ext v7.16b,v18.16b,v19.16b,#8
1403
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1404
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1405
add v0.2d,v3.2d,v4.2d // "D + T1"
1406
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1407
add v25.2d,v25.2d,v23.2d
1408
ld1 {v24.2d},[x3],#16
1409
ext v25.16b,v25.16b,v25.16b,#8
1410
ext v5.16b,v0.16b,v1.16b,#8
1411
ext v6.16b,v2.16b,v0.16b,#8
1412
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1413
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1414
ext v7.16b,v19.16b,v20.16b,#8
1415
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1416
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1417
add v3.2d,v2.2d,v1.2d // "D + T1"
1418
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1419
add v24.2d,v24.2d,v16.2d
1420
ld1 {v25.2d},[x3],#16
1421
ext v24.16b,v24.16b,v24.16b,#8
1422
ext v5.16b,v3.16b,v0.16b,#8
1423
ext v6.16b,v4.16b,v3.16b,#8
1424
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1425
.inst 0xcec08230 //sha512su0 v16.16b,v17.16b
1426
ext v7.16b,v20.16b,v21.16b,#8
1427
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1428
.inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b
1429
add v2.2d,v4.2d,v0.2d // "D + T1"
1430
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1431
add v25.2d,v25.2d,v17.2d
1432
ld1 {v24.2d},[x3],#16
1433
ext v25.16b,v25.16b,v25.16b,#8
1434
ext v5.16b,v2.16b,v3.16b,#8
1435
ext v6.16b,v1.16b,v2.16b,#8
1436
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1437
.inst 0xcec08251 //sha512su0 v17.16b,v18.16b
1438
ext v7.16b,v21.16b,v22.16b,#8
1439
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1440
.inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b
1441
add v4.2d,v1.2d,v3.2d // "D + T1"
1442
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1443
add v24.2d,v24.2d,v18.2d
1444
ld1 {v25.2d},[x3],#16
1445
ext v24.16b,v24.16b,v24.16b,#8
1446
ext v5.16b,v4.16b,v2.16b,#8
1447
ext v6.16b,v0.16b,v4.16b,#8
1448
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1449
.inst 0xcec08272 //sha512su0 v18.16b,v19.16b
1450
ext v7.16b,v22.16b,v23.16b,#8
1451
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1452
.inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b
1453
add v1.2d,v0.2d,v2.2d // "D + T1"
1454
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1455
add v25.2d,v25.2d,v19.2d
1456
ld1 {v24.2d},[x3],#16
1457
ext v25.16b,v25.16b,v25.16b,#8
1458
ext v5.16b,v1.16b,v4.16b,#8
1459
ext v6.16b,v3.16b,v1.16b,#8
1460
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1461
.inst 0xcec08293 //sha512su0 v19.16b,v20.16b
1462
ext v7.16b,v23.16b,v16.16b,#8
1463
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1464
.inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b
1465
add v0.2d,v3.2d,v4.2d // "D + T1"
1466
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1467
add v24.2d,v24.2d,v20.2d
1468
ld1 {v25.2d},[x3],#16
1469
ext v24.16b,v24.16b,v24.16b,#8
1470
ext v5.16b,v0.16b,v1.16b,#8
1471
ext v6.16b,v2.16b,v0.16b,#8
1472
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1473
.inst 0xcec082b4 //sha512su0 v20.16b,v21.16b
1474
ext v7.16b,v16.16b,v17.16b,#8
1475
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1476
.inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b
1477
add v3.2d,v2.2d,v1.2d // "D + T1"
1478
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1479
add v25.2d,v25.2d,v21.2d
1480
ld1 {v24.2d},[x3],#16
1481
ext v25.16b,v25.16b,v25.16b,#8
1482
ext v5.16b,v3.16b,v0.16b,#8
1483
ext v6.16b,v4.16b,v3.16b,#8
1484
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1485
.inst 0xcec082d5 //sha512su0 v21.16b,v22.16b
1486
ext v7.16b,v17.16b,v18.16b,#8
1487
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1488
.inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b
1489
add v2.2d,v4.2d,v0.2d // "D + T1"
1490
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1491
add v24.2d,v24.2d,v22.2d
1492
ld1 {v25.2d},[x3],#16
1493
ext v24.16b,v24.16b,v24.16b,#8
1494
ext v5.16b,v2.16b,v3.16b,#8
1495
ext v6.16b,v1.16b,v2.16b,#8
1496
add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]"
1497
.inst 0xcec082f6 //sha512su0 v22.16b,v23.16b
1498
ext v7.16b,v18.16b,v19.16b,#8
1499
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1500
.inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b
1501
add v4.2d,v1.2d,v3.2d // "D + T1"
1502
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1503
add v25.2d,v25.2d,v23.2d
1504
ld1 {v24.2d},[x3],#16
1505
ext v25.16b,v25.16b,v25.16b,#8
1506
ext v5.16b,v4.16b,v2.16b,#8
1507
ext v6.16b,v0.16b,v4.16b,#8
1508
add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]"
1509
.inst 0xcec08217 //sha512su0 v23.16b,v16.16b
1510
ext v7.16b,v19.16b,v20.16b,#8
1511
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1512
.inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b
1513
add v1.2d,v0.2d,v2.2d // "D + T1"
1514
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1515
ld1 {v25.2d},[x3],#16
1516
add v24.2d,v24.2d,v16.2d
1517
ld1 {v16.16b},[x1],#16 // load next input
1518
ext v24.16b,v24.16b,v24.16b,#8
1519
ext v5.16b,v1.16b,v4.16b,#8
1520
ext v6.16b,v3.16b,v1.16b,#8
1521
add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]"
1522
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1523
rev64 v16.16b,v16.16b
1524
add v0.2d,v3.2d,v4.2d // "D + T1"
1525
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1526
ld1 {v24.2d},[x3],#16
1527
add v25.2d,v25.2d,v17.2d
1528
ld1 {v17.16b},[x1],#16 // load next input
1529
ext v25.16b,v25.16b,v25.16b,#8
1530
ext v5.16b,v0.16b,v1.16b,#8
1531
ext v6.16b,v2.16b,v0.16b,#8
1532
add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]"
1533
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1534
rev64 v17.16b,v17.16b
1535
add v3.2d,v2.2d,v1.2d // "D + T1"
1536
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1537
ld1 {v25.2d},[x3],#16
1538
add v24.2d,v24.2d,v18.2d
1539
ld1 {v18.16b},[x1],#16 // load next input
1540
ext v24.16b,v24.16b,v24.16b,#8
1541
ext v5.16b,v3.16b,v0.16b,#8
1542
ext v6.16b,v4.16b,v3.16b,#8
1543
add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]"
1544
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1545
rev64 v18.16b,v18.16b
1546
add v2.2d,v4.2d,v0.2d // "D + T1"
1547
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1548
ld1 {v24.2d},[x3],#16
1549
add v25.2d,v25.2d,v19.2d
1550
ld1 {v19.16b},[x1],#16 // load next input
1551
ext v25.16b,v25.16b,v25.16b,#8
1552
ext v5.16b,v2.16b,v3.16b,#8
1553
ext v6.16b,v1.16b,v2.16b,#8
1554
add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]"
1555
.inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b
1556
rev64 v19.16b,v19.16b
1557
add v4.2d,v1.2d,v3.2d // "D + T1"
1558
.inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b
1559
ld1 {v25.2d},[x3],#16
1560
add v24.2d,v24.2d,v20.2d
1561
ld1 {v20.16b},[x1],#16 // load next input
1562
ext v24.16b,v24.16b,v24.16b,#8
1563
ext v5.16b,v4.16b,v2.16b,#8
1564
ext v6.16b,v0.16b,v4.16b,#8
1565
add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]"
1566
.inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b
1567
rev64 v20.16b,v20.16b
1568
add v1.2d,v0.2d,v2.2d // "D + T1"
1569
.inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b
1570
ld1 {v24.2d},[x3],#16
1571
add v25.2d,v25.2d,v21.2d
1572
ld1 {v21.16b},[x1],#16 // load next input
1573
ext v25.16b,v25.16b,v25.16b,#8
1574
ext v5.16b,v1.16b,v4.16b,#8
1575
ext v6.16b,v3.16b,v1.16b,#8
1576
add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]"
1577
.inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b
1578
rev64 v21.16b,v21.16b
1579
add v0.2d,v3.2d,v4.2d // "D + T1"
1580
.inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b
1581
ld1 {v25.2d},[x3],#16
1582
add v24.2d,v24.2d,v22.2d
1583
ld1 {v22.16b},[x1],#16 // load next input
1584
ext v24.16b,v24.16b,v24.16b,#8
1585
ext v5.16b,v0.16b,v1.16b,#8
1586
ext v6.16b,v2.16b,v0.16b,#8
1587
add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]"
1588
.inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b
1589
rev64 v22.16b,v22.16b
1590
add v3.2d,v2.2d,v1.2d // "D + T1"
1591
.inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b
1592
sub x3,x3,#80*8 // rewind
1593
add v25.2d,v25.2d,v23.2d
1594
ld1 {v23.16b},[x1],#16 // load next input
1595
ext v25.16b,v25.16b,v25.16b,#8
1596
ext v5.16b,v3.16b,v0.16b,#8
1597
ext v6.16b,v4.16b,v3.16b,#8
1598
add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]"
1599
.inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b
1600
rev64 v23.16b,v23.16b
1601
add v2.2d,v4.2d,v0.2d // "D + T1"
1602
.inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b
1603
add v0.2d,v0.2d,v26.2d // accumulate
1604
add v1.2d,v1.2d,v27.2d
1605
add v2.2d,v2.2d,v28.2d
1606
add v3.2d,v3.2d,v29.2d
1607
1608
cbnz x2,.Loop_hw
1609
1610
st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context
1611
1612
ldr x29,[sp],#16
1613
ret
1614
.size sha512_block_armv8,.-sha512_block_armv8
1615
#endif
1616
1617