Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/arm/keccak1600-armv4.S
39482 views
1
/* Do not modify. This file is auto-generated from keccak1600-armv4.pl. */
2
#include "arm_arch.h"
3
4
#if defined(__thumb2__)
5
.syntax unified
6
.thumb
7
#else
8
.code 32
9
#endif
10
11
.text
12
13
.type iotas32, %object
14
.align 5
15
iotas32:
16
.long 0x00000001, 0x00000000
17
.long 0x00000000, 0x00000089
18
.long 0x00000000, 0x8000008b
19
.long 0x00000000, 0x80008080
20
.long 0x00000001, 0x0000008b
21
.long 0x00000001, 0x00008000
22
.long 0x00000001, 0x80008088
23
.long 0x00000001, 0x80000082
24
.long 0x00000000, 0x0000000b
25
.long 0x00000000, 0x0000000a
26
.long 0x00000001, 0x00008082
27
.long 0x00000000, 0x00008003
28
.long 0x00000001, 0x0000808b
29
.long 0x00000001, 0x8000000b
30
.long 0x00000001, 0x8000008a
31
.long 0x00000001, 0x80000081
32
.long 0x00000000, 0x80000081
33
.long 0x00000000, 0x80000008
34
.long 0x00000000, 0x00000083
35
.long 0x00000000, 0x80008003
36
.long 0x00000001, 0x80008088
37
.long 0x00000000, 0x80000088
38
.long 0x00000001, 0x00008000
39
.long 0x00000000, 0x80008082
40
.size iotas32,.-iotas32
41
42
.type KeccakF1600_int, %function
43
.align 5
44
KeccakF1600_int:
45
add r9,sp,#176
46
add r12,sp,#0
47
add r10,sp,#40
48
ldmia r9,{r4,r5,r6,r7,r8,r9} @ A[4][2..4]
49
KeccakF1600_enter:
50
str lr,[sp,#440]
51
eor r11,r11,r11
52
str r11,[sp,#444]
53
b .Lround2x
54
55
.align 4
56
.Lround2x:
57
ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
58
ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
59
#ifdef __thumb2__
60
eor r0,r0,r10
61
eor r1,r1,r11
62
eor r2,r2,r12
63
ldrd r10,r11,[sp,#56]
64
eor r3,r3,r14
65
ldrd r12,r14,[sp,#64]
66
eor r4,r4,r10
67
eor r5,r5,r11
68
eor r6,r6,r12
69
ldrd r10,r11,[sp,#72]
70
eor r7,r7,r14
71
ldrd r12,r14,[sp,#80]
72
eor r8,r8,r10
73
eor r9,r9,r11
74
eor r0,r0,r12
75
ldrd r10,r11,[sp,#88]
76
eor r1,r1,r14
77
ldrd r12,r14,[sp,#96]
78
eor r2,r2,r10
79
eor r3,r3,r11
80
eor r4,r4,r12
81
ldrd r10,r11,[sp,#104]
82
eor r5,r5,r14
83
ldrd r12,r14,[sp,#112]
84
eor r6,r6,r10
85
eor r7,r7,r11
86
eor r8,r8,r12
87
ldrd r10,r11,[sp,#120]
88
eor r9,r9,r14
89
ldrd r12,r14,[sp,#128]
90
eor r0,r0,r10
91
eor r1,r1,r11
92
eor r2,r2,r12
93
ldrd r10,r11,[sp,#136]
94
eor r3,r3,r14
95
ldrd r12,r14,[sp,#144]
96
eor r4,r4,r10
97
eor r5,r5,r11
98
eor r6,r6,r12
99
ldrd r10,r11,[sp,#152]
100
eor r7,r7,r14
101
ldrd r12,r14,[sp,#160]
102
eor r8,r8,r10
103
eor r9,r9,r11
104
eor r0,r0,r12
105
ldrd r10,r11,[sp,#168]
106
eor r1,r1,r14
107
ldrd r12,r14,[sp,#16]
108
eor r2,r2,r10
109
eor r3,r3,r11
110
eor r4,r4,r12
111
ldrd r10,r11,[sp,#24]
112
eor r5,r5,r14
113
ldrd r12,r14,[sp,#32]
114
#else
115
eor r0,r0,r10
116
add r10,sp,#56
117
eor r1,r1,r11
118
eor r2,r2,r12
119
eor r3,r3,r14
120
ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
121
eor r4,r4,r10
122
add r10,sp,#72
123
eor r5,r5,r11
124
eor r6,r6,r12
125
eor r7,r7,r14
126
ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
127
eor r8,r8,r10
128
add r10,sp,#88
129
eor r9,r9,r11
130
eor r0,r0,r12
131
eor r1,r1,r14
132
ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
133
eor r2,r2,r10
134
add r10,sp,#104
135
eor r3,r3,r11
136
eor r4,r4,r12
137
eor r5,r5,r14
138
ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
139
eor r6,r6,r10
140
add r10,sp,#120
141
eor r7,r7,r11
142
eor r8,r8,r12
143
eor r9,r9,r14
144
ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
145
eor r0,r0,r10
146
add r10,sp,#136
147
eor r1,r1,r11
148
eor r2,r2,r12
149
eor r3,r3,r14
150
ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
151
eor r4,r4,r10
152
add r10,sp,#152
153
eor r5,r5,r11
154
eor r6,r6,r12
155
eor r7,r7,r14
156
ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
157
eor r8,r8,r10
158
ldr r10,[sp,#168] @ A[4][1]
159
eor r9,r9,r11
160
ldr r11,[sp,#168+4]
161
eor r0,r0,r12
162
ldr r12,[sp,#16] @ A[0][2]
163
eor r1,r1,r14
164
ldr r14,[sp,#16+4]
165
eor r2,r2,r10
166
add r10,sp,#24
167
eor r3,r3,r11
168
eor r4,r4,r12
169
eor r5,r5,r14
170
ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
171
#endif
172
eor r6,r6,r10
173
eor r7,r7,r11
174
eor r8,r8,r12
175
eor r9,r9,r14
176
177
eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
178
#ifndef __thumb2__
179
str r10,[sp,#208] @ D[1] = E[0]
180
#endif
181
eor r11,r1,r4
182
#ifndef __thumb2__
183
str r11,[sp,#208+4]
184
#else
185
strd r10,r11,[sp,#208] @ D[1] = E[0]
186
#endif
187
eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
188
eor r14,r7,r0
189
#ifndef __thumb2__
190
str r12,[sp,#232] @ D[4] = E[1]
191
#endif
192
eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
193
#ifndef __thumb2__
194
str r14,[sp,#232+4]
195
#else
196
strd r12,r14,[sp,#232] @ D[4] = E[1]
197
#endif
198
eor r1,r9,r2
199
#ifndef __thumb2__
200
str r0,[sp,#200] @ D[0] = C[0]
201
#endif
202
eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
203
#ifndef __thumb2__
204
ldr r7,[sp,#144]
205
#endif
206
eor r3,r3,r6
207
#ifndef __thumb2__
208
str r1,[sp,#200+4]
209
#else
210
strd r0,r1,[sp,#200] @ D[0] = C[0]
211
#endif
212
#ifndef __thumb2__
213
ldr r6,[sp,#144+4]
214
#else
215
ldrd r7,r6,[sp,#144]
216
#endif
217
#ifndef __thumb2__
218
str r2,[sp,#216] @ D[2] = C[1]
219
#endif
220
eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
221
#ifndef __thumb2__
222
str r3,[sp,#216+4]
223
#else
224
strd r2,r3,[sp,#216] @ D[2] = C[1]
225
#endif
226
eor r5,r5,r8
227
228
#ifndef __thumb2__
229
ldr r8,[sp,#192]
230
#endif
231
#ifndef __thumb2__
232
ldr r9,[sp,#192+4]
233
#else
234
ldrd r8,r9,[sp,#192]
235
#endif
236
#ifndef __thumb2__
237
str r4,[sp,#224] @ D[3] = C[2]
238
#endif
239
eor r7,r7,r4
240
#ifndef __thumb2__
241
str r5,[sp,#224+4]
242
#else
243
strd r4,r5,[sp,#224] @ D[3] = C[2]
244
#endif
245
eor r6,r6,r5
246
#ifndef __thumb2__
247
ldr r4,[sp,#0]
248
#endif
249
@ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
250
@ mov r6,r6,ror#32-11
251
#ifndef __thumb2__
252
ldr r5,[sp,#0+4]
253
#else
254
ldrd r4,r5,[sp,#0]
255
#endif
256
eor r8,r8,r12
257
eor r9,r9,r14
258
#ifndef __thumb2__
259
ldr r12,[sp,#96]
260
#endif
261
eor r0,r0,r4
262
#ifndef __thumb2__
263
ldr r14,[sp,#96+4]
264
#else
265
ldrd r12,r14,[sp,#96]
266
#endif
267
@ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
268
@ mov r9,r9,ror#32-7
269
eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
270
eor r12,r12,r2
271
#ifndef __thumb2__
272
ldr r2,[sp,#48]
273
#endif
274
eor r14,r14,r3
275
#ifndef __thumb2__
276
ldr r3,[sp,#48+4]
277
#else
278
ldrd r2,r3,[sp,#48]
279
#endif
280
mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
281
ldr r12,[sp,#444] @ load counter
282
eor r2,r2,r10
283
adr r10,iotas32
284
mov r4,r14,ror#32-22
285
add r14,r10,r12
286
eor r3,r3,r11
287
ldmia r14,{r10,r11} @ iotas[i]
288
bic r12,r4,r2,ror#32-22
289
bic r14,r5,r3,ror#32-22
290
mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
291
mov r3,r3,ror#32-22
292
eor r12,r12,r0
293
eor r14,r14,r1
294
eor r10,r10,r12
295
eor r11,r11,r14
296
#ifndef __thumb2__
297
str r10,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
298
#endif
299
bic r12,r6,r4,ror#11
300
#ifndef __thumb2__
301
str r11,[sp,#240+4]
302
#else
303
strd r10,r11,[sp,#240] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
304
#endif
305
bic r14,r7,r5,ror#10
306
bic r10,r8,r6,ror#32-(11-7)
307
bic r11,r9,r7,ror#32-(10-7)
308
eor r12,r2,r12,ror#32-11
309
#ifndef __thumb2__
310
str r12,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
311
#endif
312
eor r14,r3,r14,ror#32-10
313
#ifndef __thumb2__
314
str r14,[sp,#248+4]
315
#else
316
strd r12,r14,[sp,#248] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
317
#endif
318
eor r10,r4,r10,ror#32-7
319
eor r11,r5,r11,ror#32-7
320
#ifndef __thumb2__
321
str r10,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
322
#endif
323
bic r12,r0,r8,ror#32-7
324
#ifndef __thumb2__
325
str r11,[sp,#256+4]
326
#else
327
strd r10,r11,[sp,#256] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
328
#endif
329
bic r14,r1,r9,ror#32-7
330
eor r12,r12,r6,ror#32-11
331
#ifndef __thumb2__
332
str r12,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
333
#endif
334
eor r14,r14,r7,ror#32-10
335
#ifndef __thumb2__
336
str r14,[sp,#264+4]
337
#else
338
strd r12,r14,[sp,#264] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
339
#endif
340
bic r10,r2,r0
341
add r14,sp,#224
342
#ifndef __thumb2__
343
ldr r0,[sp,#24] @ A[0][3]
344
#endif
345
bic r11,r3,r1
346
#ifndef __thumb2__
347
ldr r1,[sp,#24+4]
348
#else
349
ldrd r0,r1,[sp,#24] @ A[0][3]
350
#endif
351
eor r10,r10,r8,ror#32-7
352
eor r11,r11,r9,ror#32-7
353
#ifndef __thumb2__
354
str r10,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
355
#endif
356
add r9,sp,#200
357
#ifndef __thumb2__
358
str r11,[sp,#272+4]
359
#else
360
strd r10,r11,[sp,#272] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
361
#endif
362
363
ldmia r14,{r10,r11,r12,r14} @ D[3..4]
364
ldmia r9,{r6,r7,r8,r9} @ D[0..1]
365
366
#ifndef __thumb2__
367
ldr r2,[sp,#72] @ A[1][4]
368
#endif
369
eor r0,r0,r10
370
#ifndef __thumb2__
371
ldr r3,[sp,#72+4]
372
#else
373
ldrd r2,r3,[sp,#72] @ A[1][4]
374
#endif
375
eor r1,r1,r11
376
@ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
377
#ifndef __thumb2__
378
ldr r10,[sp,#128] @ A[3][1]
379
#endif
380
@ mov r1,r1,ror#32-14
381
#ifndef __thumb2__
382
ldr r11,[sp,#128+4]
383
#else
384
ldrd r10,r11,[sp,#128] @ A[3][1]
385
#endif
386
387
eor r2,r2,r12
388
#ifndef __thumb2__
389
ldr r4,[sp,#80] @ A[2][0]
390
#endif
391
eor r3,r3,r14
392
#ifndef __thumb2__
393
ldr r5,[sp,#80+4]
394
#else
395
ldrd r4,r5,[sp,#80] @ A[2][0]
396
#endif
397
@ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
398
@ mov r3,r3,ror#32-10
399
400
eor r6,r6,r4
401
#ifndef __thumb2__
402
ldr r12,[sp,#216] @ D[2]
403
#endif
404
eor r7,r7,r5
405
#ifndef __thumb2__
406
ldr r14,[sp,#216+4]
407
#else
408
ldrd r12,r14,[sp,#216] @ D[2]
409
#endif
410
mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
411
mov r4,r7,ror#32-2
412
413
eor r10,r10,r8
414
#ifndef __thumb2__
415
ldr r8,[sp,#176] @ A[4][2]
416
#endif
417
eor r11,r11,r9
418
#ifndef __thumb2__
419
ldr r9,[sp,#176+4]
420
#else
421
ldrd r8,r9,[sp,#176] @ A[4][2]
422
#endif
423
mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
424
mov r6,r11,ror#32-23
425
426
bic r10,r4,r2,ror#32-10
427
bic r11,r5,r3,ror#32-10
428
eor r12,r12,r8
429
eor r14,r14,r9
430
mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
431
mov r8,r14,ror#32-31
432
eor r10,r10,r0,ror#32-14
433
eor r11,r11,r1,ror#32-14
434
#ifndef __thumb2__
435
str r10,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
436
#endif
437
bic r12,r6,r4
438
#ifndef __thumb2__
439
str r11,[sp,#280+4]
440
#else
441
strd r10,r11,[sp,#280] @ R[1][0] = C[0] ^ (~C[1] & C[2])
442
#endif
443
bic r14,r7,r5
444
eor r12,r12,r2,ror#32-10
445
#ifndef __thumb2__
446
str r12,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
447
#endif
448
eor r14,r14,r3,ror#32-10
449
#ifndef __thumb2__
450
str r14,[sp,#288+4]
451
#else
452
strd r12,r14,[sp,#288] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
453
#endif
454
bic r10,r8,r6
455
bic r11,r9,r7
456
bic r12,r0,r8,ror#14
457
bic r14,r1,r9,ror#14
458
eor r10,r10,r4
459
eor r11,r11,r5
460
#ifndef __thumb2__
461
str r10,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
462
#endif
463
bic r2,r2,r0,ror#32-(14-10)
464
#ifndef __thumb2__
465
str r11,[sp,#296+4]
466
#else
467
strd r10,r11,[sp,#296] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
468
#endif
469
eor r12,r6,r12,ror#32-14
470
bic r11,r3,r1,ror#32-(14-10)
471
#ifndef __thumb2__
472
str r12,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
473
#endif
474
eor r14,r7,r14,ror#32-14
475
#ifndef __thumb2__
476
str r14,[sp,#304+4]
477
#else
478
strd r12,r14,[sp,#304] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
479
#endif
480
add r12,sp,#208
481
#ifndef __thumb2__
482
ldr r1,[sp,#8] @ A[0][1]
483
#endif
484
eor r10,r8,r2,ror#32-10
485
#ifndef __thumb2__
486
ldr r0,[sp,#8+4]
487
#else
488
ldrd r1,r0,[sp,#8] @ A[0][1]
489
#endif
490
eor r11,r9,r11,ror#32-10
491
#ifndef __thumb2__
492
str r10,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
493
#endif
494
#ifndef __thumb2__
495
str r11,[sp,#312+4]
496
#else
497
strd r10,r11,[sp,#312] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
498
#endif
499
500
add r9,sp,#224
501
ldmia r12,{r10,r11,r12,r14} @ D[1..2]
502
#ifndef __thumb2__
503
ldr r2,[sp,#56] @ A[1][2]
504
#endif
505
#ifndef __thumb2__
506
ldr r3,[sp,#56+4]
507
#else
508
ldrd r2,r3,[sp,#56] @ A[1][2]
509
#endif
510
ldmia r9,{r6,r7,r8,r9} @ D[3..4]
511
512
eor r1,r1,r10
513
#ifndef __thumb2__
514
ldr r4,[sp,#104] @ A[2][3]
515
#endif
516
eor r0,r0,r11
517
#ifndef __thumb2__
518
ldr r5,[sp,#104+4]
519
#else
520
ldrd r4,r5,[sp,#104] @ A[2][3]
521
#endif
522
mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
523
524
eor r2,r2,r12
525
#ifndef __thumb2__
526
ldr r10,[sp,#152] @ A[3][4]
527
#endif
528
eor r3,r3,r14
529
#ifndef __thumb2__
530
ldr r11,[sp,#152+4]
531
#else
532
ldrd r10,r11,[sp,#152] @ A[3][4]
533
#endif
534
@ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
535
#ifndef __thumb2__
536
ldr r12,[sp,#200] @ D[0]
537
#endif
538
@ mov r3,r3,ror#32-3
539
#ifndef __thumb2__
540
ldr r14,[sp,#200+4]
541
#else
542
ldrd r12,r14,[sp,#200] @ D[0]
543
#endif
544
545
eor r4,r4,r6
546
eor r5,r5,r7
547
@ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
548
@ mov r4,r7,ror#32-13 @ [track reverse order below]
549
550
eor r10,r10,r8
551
#ifndef __thumb2__
552
ldr r8,[sp,#160] @ A[4][0]
553
#endif
554
eor r11,r11,r9
555
#ifndef __thumb2__
556
ldr r9,[sp,#160+4]
557
#else
558
ldrd r8,r9,[sp,#160] @ A[4][0]
559
#endif
560
mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
561
mov r7,r11,ror#32-4
562
563
eor r12,r12,r8
564
eor r14,r14,r9
565
mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
566
mov r9,r14,ror#32-9
567
568
bic r10,r5,r2,ror#13-3
569
bic r11,r4,r3,ror#12-3
570
bic r12,r6,r5,ror#32-13
571
bic r14,r7,r4,ror#32-12
572
eor r10,r0,r10,ror#32-13
573
eor r11,r1,r11,ror#32-12
574
#ifndef __thumb2__
575
str r10,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
576
#endif
577
eor r12,r12,r2,ror#32-3
578
#ifndef __thumb2__
579
str r11,[sp,#320+4]
580
#else
581
strd r10,r11,[sp,#320] @ R[2][0] = C[0] ^ (~C[1] & C[2])
582
#endif
583
eor r14,r14,r3,ror#32-3
584
#ifndef __thumb2__
585
str r12,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
586
#endif
587
bic r10,r8,r6
588
bic r11,r9,r7
589
#ifndef __thumb2__
590
str r14,[sp,#328+4]
591
#else
592
strd r12,r14,[sp,#328] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
593
#endif
594
eor r10,r10,r5,ror#32-13
595
eor r11,r11,r4,ror#32-12
596
#ifndef __thumb2__
597
str r10,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
598
#endif
599
bic r12,r0,r8
600
#ifndef __thumb2__
601
str r11,[sp,#336+4]
602
#else
603
strd r10,r11,[sp,#336] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
604
#endif
605
bic r14,r1,r9
606
eor r12,r12,r6
607
eor r14,r14,r7
608
#ifndef __thumb2__
609
str r12,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
610
#endif
611
bic r10,r2,r0,ror#3
612
#ifndef __thumb2__
613
str r14,[sp,#344+4]
614
#else
615
strd r12,r14,[sp,#344] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
616
#endif
617
bic r11,r3,r1,ror#3
618
#ifndef __thumb2__
619
ldr r1,[sp,#32] @ A[0][4] [in reverse order]
620
#endif
621
eor r10,r8,r10,ror#32-3
622
#ifndef __thumb2__
623
ldr r0,[sp,#32+4]
624
#else
625
ldrd r1,r0,[sp,#32] @ A[0][4] [in reverse order]
626
#endif
627
eor r11,r9,r11,ror#32-3
628
#ifndef __thumb2__
629
str r10,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
630
#endif
631
add r9,sp,#208
632
#ifndef __thumb2__
633
str r11,[sp,#352+4]
634
#else
635
strd r10,r11,[sp,#352] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
636
#endif
637
638
#ifndef __thumb2__
639
ldr r10,[sp,#232] @ D[4]
640
#endif
641
#ifndef __thumb2__
642
ldr r11,[sp,#232+4]
643
#else
644
ldrd r10,r11,[sp,#232] @ D[4]
645
#endif
646
#ifndef __thumb2__
647
ldr r12,[sp,#200] @ D[0]
648
#endif
649
#ifndef __thumb2__
650
ldr r14,[sp,#200+4]
651
#else
652
ldrd r12,r14,[sp,#200] @ D[0]
653
#endif
654
655
ldmia r9,{r6,r7,r8,r9} @ D[1..2]
656
657
eor r1,r1,r10
658
#ifndef __thumb2__
659
ldr r2,[sp,#40] @ A[1][0]
660
#endif
661
eor r0,r0,r11
662
#ifndef __thumb2__
663
ldr r3,[sp,#40+4]
664
#else
665
ldrd r2,r3,[sp,#40] @ A[1][0]
666
#endif
667
@ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
668
#ifndef __thumb2__
669
ldr r4,[sp,#88] @ A[2][1]
670
#endif
671
@ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
672
#ifndef __thumb2__
673
ldr r5,[sp,#88+4]
674
#else
675
ldrd r4,r5,[sp,#88] @ A[2][1]
676
#endif
677
678
eor r2,r2,r12
679
#ifndef __thumb2__
680
ldr r10,[sp,#136] @ A[3][2]
681
#endif
682
eor r3,r3,r14
683
#ifndef __thumb2__
684
ldr r11,[sp,#136+4]
685
#else
686
ldrd r10,r11,[sp,#136] @ A[3][2]
687
#endif
688
@ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
689
#ifndef __thumb2__
690
ldr r12,[sp,#224] @ D[3]
691
#endif
692
@ mov r3,r3,ror#32-18
693
#ifndef __thumb2__
694
ldr r14,[sp,#224+4]
695
#else
696
ldrd r12,r14,[sp,#224] @ D[3]
697
#endif
698
699
eor r6,r6,r4
700
eor r7,r7,r5
701
mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
702
mov r5,r7,ror#32-5
703
704
eor r10,r10,r8
705
#ifndef __thumb2__
706
ldr r8,[sp,#184] @ A[4][3]
707
#endif
708
eor r11,r11,r9
709
#ifndef __thumb2__
710
ldr r9,[sp,#184+4]
711
#else
712
ldrd r8,r9,[sp,#184] @ A[4][3]
713
#endif
714
mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
715
mov r6,r11,ror#32-8
716
717
eor r12,r12,r8
718
eor r14,r14,r9
719
mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
720
mov r9,r14,ror#32-28
721
722
bic r10,r4,r2,ror#32-18
723
bic r11,r5,r3,ror#32-18
724
eor r10,r10,r0,ror#32-14
725
eor r11,r11,r1,ror#32-13
726
#ifndef __thumb2__
727
str r10,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
728
#endif
729
bic r12,r6,r4
730
#ifndef __thumb2__
731
str r11,[sp,#360+4]
732
#else
733
strd r10,r11,[sp,#360] @ R[3][0] = C[0] ^ (~C[1] & C[2])
734
#endif
735
bic r14,r7,r5
736
eor r12,r12,r2,ror#32-18
737
#ifndef __thumb2__
738
str r12,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
739
#endif
740
eor r14,r14,r3,ror#32-18
741
#ifndef __thumb2__
742
str r14,[sp,#368+4]
743
#else
744
strd r12,r14,[sp,#368] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
745
#endif
746
bic r10,r8,r6
747
bic r11,r9,r7
748
bic r12,r0,r8,ror#14
749
bic r14,r1,r9,ror#13
750
eor r10,r10,r4
751
eor r11,r11,r5
752
#ifndef __thumb2__
753
str r10,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
754
#endif
755
bic r2,r2,r0,ror#18-14
756
#ifndef __thumb2__
757
str r11,[sp,#376+4]
758
#else
759
strd r10,r11,[sp,#376] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
760
#endif
761
eor r12,r6,r12,ror#32-14
762
bic r11,r3,r1,ror#18-13
763
eor r14,r7,r14,ror#32-13
764
#ifndef __thumb2__
765
str r12,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
766
#endif
767
#ifndef __thumb2__
768
str r14,[sp,#384+4]
769
#else
770
strd r12,r14,[sp,#384] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
771
#endif
772
add r14,sp,#216
773
#ifndef __thumb2__
774
ldr r0,[sp,#16] @ A[0][2]
775
#endif
776
eor r10,r8,r2,ror#32-18
777
#ifndef __thumb2__
778
ldr r1,[sp,#16+4]
779
#else
780
ldrd r0,r1,[sp,#16] @ A[0][2]
781
#endif
782
eor r11,r9,r11,ror#32-18
783
#ifndef __thumb2__
784
str r10,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
785
#endif
786
#ifndef __thumb2__
787
str r11,[sp,#392+4]
788
#else
789
strd r10,r11,[sp,#392] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
790
#endif
791
792
ldmia r14,{r10,r11,r12,r14} @ D[2..3]
793
#ifndef __thumb2__
794
ldr r2,[sp,#64] @ A[1][3]
795
#endif
796
#ifndef __thumb2__
797
ldr r3,[sp,#64+4]
798
#else
799
ldrd r2,r3,[sp,#64] @ A[1][3]
800
#endif
801
#ifndef __thumb2__
802
ldr r6,[sp,#232] @ D[4]
803
#endif
804
#ifndef __thumb2__
805
ldr r7,[sp,#232+4]
806
#else
807
ldrd r6,r7,[sp,#232] @ D[4]
808
#endif
809
810
eor r0,r0,r10
811
#ifndef __thumb2__
812
ldr r4,[sp,#112] @ A[2][4]
813
#endif
814
eor r1,r1,r11
815
#ifndef __thumb2__
816
ldr r5,[sp,#112+4]
817
#else
818
ldrd r4,r5,[sp,#112] @ A[2][4]
819
#endif
820
@ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
821
#ifndef __thumb2__
822
ldr r8,[sp,#200] @ D[0]
823
#endif
824
@ mov r1,r1,ror#32-31
825
#ifndef __thumb2__
826
ldr r9,[sp,#200+4]
827
#else
828
ldrd r8,r9,[sp,#200] @ D[0]
829
#endif
830
831
eor r12,r12,r2
832
#ifndef __thumb2__
833
ldr r10,[sp,#120] @ A[3][0]
834
#endif
835
eor r14,r14,r3
836
#ifndef __thumb2__
837
ldr r11,[sp,#120+4]
838
#else
839
ldrd r10,r11,[sp,#120] @ A[3][0]
840
#endif
841
mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
842
#ifndef __thumb2__
843
ldr r12,[sp,#208] @ D[1]
844
#endif
845
mov r2,r14,ror#32-28
846
#ifndef __thumb2__
847
ldr r14,[sp,#208+4]
848
#else
849
ldrd r12,r14,[sp,#208] @ D[1]
850
#endif
851
852
eor r6,r6,r4
853
eor r7,r7,r5
854
mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
855
mov r4,r7,ror#32-20
856
857
eor r10,r10,r8
858
#ifndef __thumb2__
859
ldr r8,[sp,#168] @ A[4][1]
860
#endif
861
eor r11,r11,r9
862
#ifndef __thumb2__
863
ldr r9,[sp,#168+4]
864
#else
865
ldrd r8,r9,[sp,#168] @ A[4][1]
866
#endif
867
mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
868
mov r6,r11,ror#32-21
869
870
eor r8,r8,r12
871
eor r9,r9,r14
872
@ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
873
@ mov r9,r3,ror#32-1
874
875
bic r10,r4,r2
876
bic r11,r5,r3
877
eor r10,r10,r0,ror#32-31
878
#ifndef __thumb2__
879
str r10,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
880
#endif
881
eor r11,r11,r1,ror#32-31
882
#ifndef __thumb2__
883
str r11,[sp,#400+4]
884
#else
885
strd r10,r11,[sp,#400] @ R[4][0] = C[0] ^ (~C[1] & C[2])
886
#endif
887
bic r12,r6,r4
888
bic r14,r7,r5
889
eor r12,r12,r2
890
eor r14,r14,r3
891
#ifndef __thumb2__
892
str r12,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
893
#endif
894
bic r10,r8,r6,ror#1
895
#ifndef __thumb2__
896
str r14,[sp,#408+4]
897
#else
898
strd r12,r14,[sp,#408] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
899
#endif
900
bic r11,r9,r7,ror#1
901
bic r12,r0,r8,ror#31-1
902
bic r14,r1,r9,ror#31-1
903
eor r4,r4,r10,ror#32-1
904
#ifndef __thumb2__
905
str r4,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
906
#endif
907
eor r5,r5,r11,ror#32-1
908
#ifndef __thumb2__
909
str r5,[sp,#416+4]
910
#else
911
strd r4,r5,[sp,#416] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
912
#endif
913
eor r6,r6,r12,ror#32-31
914
eor r7,r7,r14,ror#32-31
915
#ifndef __thumb2__
916
str r6,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
917
#endif
918
bic r10,r2,r0,ror#32-31
919
#ifndef __thumb2__
920
str r7,[sp,#424+4]
921
#else
922
strd r6,r7,[sp,#424] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
923
#endif
924
bic r11,r3,r1,ror#32-31
925
add r12,sp,#240
926
eor r8,r10,r8,ror#32-1
927
add r10,sp,#280
928
eor r9,r11,r9,ror#32-1
929
#ifndef __thumb2__
930
str r8,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
931
#endif
932
#ifndef __thumb2__
933
str r9,[sp,#432+4]
934
#else
935
strd r8,r9,[sp,#432] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
936
#endif
937
ldmia r12,{r0,r1,r2,r3} @ A[0][0..1]
938
ldmia r10,{r10,r11,r12,r14} @ A[1][0..1]
939
#ifdef __thumb2__
940
eor r0,r0,r10
941
eor r1,r1,r11
942
eor r2,r2,r12
943
ldrd r10,r11,[sp,#296]
944
eor r3,r3,r14
945
ldrd r12,r14,[sp,#304]
946
eor r4,r4,r10
947
eor r5,r5,r11
948
eor r6,r6,r12
949
ldrd r10,r11,[sp,#312]
950
eor r7,r7,r14
951
ldrd r12,r14,[sp,#320]
952
eor r8,r8,r10
953
eor r9,r9,r11
954
eor r0,r0,r12
955
ldrd r10,r11,[sp,#328]
956
eor r1,r1,r14
957
ldrd r12,r14,[sp,#336]
958
eor r2,r2,r10
959
eor r3,r3,r11
960
eor r4,r4,r12
961
ldrd r10,r11,[sp,#344]
962
eor r5,r5,r14
963
ldrd r12,r14,[sp,#352]
964
eor r6,r6,r10
965
eor r7,r7,r11
966
eor r8,r8,r12
967
ldrd r10,r11,[sp,#360]
968
eor r9,r9,r14
969
ldrd r12,r14,[sp,#368]
970
eor r0,r0,r10
971
eor r1,r1,r11
972
eor r2,r2,r12
973
ldrd r10,r11,[sp,#376]
974
eor r3,r3,r14
975
ldrd r12,r14,[sp,#384]
976
eor r4,r4,r10
977
eor r5,r5,r11
978
eor r6,r6,r12
979
ldrd r10,r11,[sp,#392]
980
eor r7,r7,r14
981
ldrd r12,r14,[sp,#400]
982
eor r8,r8,r10
983
eor r9,r9,r11
984
eor r0,r0,r12
985
ldrd r10,r11,[sp,#408]
986
eor r1,r1,r14
987
ldrd r12,r14,[sp,#256]
988
eor r2,r2,r10
989
eor r3,r3,r11
990
eor r4,r4,r12
991
ldrd r10,r11,[sp,#264]
992
eor r5,r5,r14
993
ldrd r12,r14,[sp,#272]
994
#else
995
eor r0,r0,r10
996
add r10,sp,#296
997
eor r1,r1,r11
998
eor r2,r2,r12
999
eor r3,r3,r14
1000
ldmia r10,{r10,r11,r12,r14} @ A[1][2..3]
1001
eor r4,r4,r10
1002
add r10,sp,#312
1003
eor r5,r5,r11
1004
eor r6,r6,r12
1005
eor r7,r7,r14
1006
ldmia r10,{r10,r11,r12,r14} @ A[1][4]..A[2][0]
1007
eor r8,r8,r10
1008
add r10,sp,#328
1009
eor r9,r9,r11
1010
eor r0,r0,r12
1011
eor r1,r1,r14
1012
ldmia r10,{r10,r11,r12,r14} @ A[2][1..2]
1013
eor r2,r2,r10
1014
add r10,sp,#344
1015
eor r3,r3,r11
1016
eor r4,r4,r12
1017
eor r5,r5,r14
1018
ldmia r10,{r10,r11,r12,r14} @ A[2][3..4]
1019
eor r6,r6,r10
1020
add r10,sp,#360
1021
eor r7,r7,r11
1022
eor r8,r8,r12
1023
eor r9,r9,r14
1024
ldmia r10,{r10,r11,r12,r14} @ A[3][0..1]
1025
eor r0,r0,r10
1026
add r10,sp,#376
1027
eor r1,r1,r11
1028
eor r2,r2,r12
1029
eor r3,r3,r14
1030
ldmia r10,{r10,r11,r12,r14} @ A[3][2..3]
1031
eor r4,r4,r10
1032
add r10,sp,#392
1033
eor r5,r5,r11
1034
eor r6,r6,r12
1035
eor r7,r7,r14
1036
ldmia r10,{r10,r11,r12,r14} @ A[3][4]..A[4][0]
1037
eor r8,r8,r10
1038
ldr r10,[sp,#408] @ A[4][1]
1039
eor r9,r9,r11
1040
ldr r11,[sp,#408+4]
1041
eor r0,r0,r12
1042
ldr r12,[sp,#256] @ A[0][2]
1043
eor r1,r1,r14
1044
ldr r14,[sp,#256+4]
1045
eor r2,r2,r10
1046
add r10,sp,#264
1047
eor r3,r3,r11
1048
eor r4,r4,r12
1049
eor r5,r5,r14
1050
ldmia r10,{r10,r11,r12,r14} @ A[0][3..4]
1051
#endif
1052
eor r6,r6,r10
1053
eor r7,r7,r11
1054
eor r8,r8,r12
1055
eor r9,r9,r14
1056
1057
eor r10,r0,r5,ror#32-1 @ E[0] = ROL64(C[2], 1) ^ C[0];
1058
#ifndef __thumb2__
1059
str r10,[sp,#208] @ D[1] = E[0]
1060
#endif
1061
eor r11,r1,r4
1062
#ifndef __thumb2__
1063
str r11,[sp,#208+4]
1064
#else
1065
strd r10,r11,[sp,#208] @ D[1] = E[0]
1066
#endif
1067
eor r12,r6,r1,ror#32-1 @ E[1] = ROL64(C[0], 1) ^ C[3];
1068
eor r14,r7,r0
1069
#ifndef __thumb2__
1070
str r12,[sp,#232] @ D[4] = E[1]
1071
#endif
1072
eor r0,r8,r3,ror#32-1 @ C[0] = ROL64(C[1], 1) ^ C[4];
1073
#ifndef __thumb2__
1074
str r14,[sp,#232+4]
1075
#else
1076
strd r12,r14,[sp,#232] @ D[4] = E[1]
1077
#endif
1078
eor r1,r9,r2
1079
#ifndef __thumb2__
1080
str r0,[sp,#200] @ D[0] = C[0]
1081
#endif
1082
eor r2,r2,r7,ror#32-1 @ C[1] = ROL64(C[3], 1) ^ C[1];
1083
#ifndef __thumb2__
1084
ldr r7,[sp,#384]
1085
#endif
1086
eor r3,r3,r6
1087
#ifndef __thumb2__
1088
str r1,[sp,#200+4]
1089
#else
1090
strd r0,r1,[sp,#200] @ D[0] = C[0]
1091
#endif
1092
#ifndef __thumb2__
1093
ldr r6,[sp,#384+4]
1094
#else
1095
ldrd r7,r6,[sp,#384]
1096
#endif
1097
#ifndef __thumb2__
1098
str r2,[sp,#216] @ D[2] = C[1]
1099
#endif
1100
eor r4,r4,r9,ror#32-1 @ C[2] = ROL64(C[4], 1) ^ C[2];
1101
#ifndef __thumb2__
1102
str r3,[sp,#216+4]
1103
#else
1104
strd r2,r3,[sp,#216] @ D[2] = C[1]
1105
#endif
1106
eor r5,r5,r8
1107
1108
#ifndef __thumb2__
1109
ldr r8,[sp,#432]
1110
#endif
1111
#ifndef __thumb2__
1112
ldr r9,[sp,#432+4]
1113
#else
1114
ldrd r8,r9,[sp,#432]
1115
#endif
1116
#ifndef __thumb2__
1117
str r4,[sp,#224] @ D[3] = C[2]
1118
#endif
1119
eor r7,r7,r4
1120
#ifndef __thumb2__
1121
str r5,[sp,#224+4]
1122
#else
1123
strd r4,r5,[sp,#224] @ D[3] = C[2]
1124
#endif
1125
eor r6,r6,r5
1126
#ifndef __thumb2__
1127
ldr r4,[sp,#240]
1128
#endif
1129
@ mov r7,r7,ror#32-10 @ C[3] = ROL64(A[3][3] ^ C[2], rhotates[3][3]); /* D[3] */
1130
@ mov r6,r6,ror#32-11
1131
#ifndef __thumb2__
1132
ldr r5,[sp,#240+4]
1133
#else
1134
ldrd r4,r5,[sp,#240]
1135
#endif
1136
eor r8,r8,r12
1137
eor r9,r9,r14
1138
#ifndef __thumb2__
1139
ldr r12,[sp,#336]
1140
#endif
1141
eor r0,r0,r4
1142
#ifndef __thumb2__
1143
ldr r14,[sp,#336+4]
1144
#else
1145
ldrd r12,r14,[sp,#336]
1146
#endif
1147
@ mov r8,r8,ror#32-7 @ C[4] = ROL64(A[4][4] ^ E[1], rhotates[4][4]); /* D[4] */
1148
@ mov r9,r9,ror#32-7
1149
eor r1,r1,r5 @ C[0] = A[0][0] ^ C[0];
1150
eor r12,r12,r2
1151
#ifndef __thumb2__
1152
ldr r2,[sp,#288]
1153
#endif
1154
eor r14,r14,r3
1155
#ifndef __thumb2__
1156
ldr r3,[sp,#288+4]
1157
#else
1158
ldrd r2,r3,[sp,#288]
1159
#endif
1160
mov r5,r12,ror#32-21 @ C[2] = ROL64(A[2][2] ^ C[1], rhotates[2][2]);
1161
ldr r12,[sp,#444] @ load counter
1162
eor r2,r2,r10
1163
adr r10,iotas32
1164
mov r4,r14,ror#32-22
1165
add r14,r10,r12
1166
eor r3,r3,r11
1167
#ifndef __thumb2__
1168
ldr r10,[r14,#8] @ iotas[i].lo
1169
#endif
1170
add r12,r12,#16
1171
#ifndef __thumb2__
1172
ldr r11,[r14,#12] @ iotas[i].hi
1173
#else
1174
ldrd r10,r11,[r14,#8] @ iotas[i].lo
1175
#endif
1176
cmp r12,#192
1177
str r12,[sp,#444] @ store counter
1178
bic r12,r4,r2,ror#32-22
1179
bic r14,r5,r3,ror#32-22
1180
mov r2,r2,ror#32-22 @ C[1] = ROL64(A[1][1] ^ E[0], rhotates[1][1]);
1181
mov r3,r3,ror#32-22
1182
eor r12,r12,r0
1183
eor r14,r14,r1
1184
eor r10,r10,r12
1185
eor r11,r11,r14
1186
#ifndef __thumb2__
1187
str r10,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1188
#endif
1189
bic r12,r6,r4,ror#11
1190
#ifndef __thumb2__
1191
str r11,[sp,#0+4]
1192
#else
1193
strd r10,r11,[sp,#0] @ R[0][0] = C[0] ^ (~C[1] & C[2]) ^ iotas[i];
1194
#endif
1195
bic r14,r7,r5,ror#10
1196
bic r10,r8,r6,ror#32-(11-7)
1197
bic r11,r9,r7,ror#32-(10-7)
1198
eor r12,r2,r12,ror#32-11
1199
#ifndef __thumb2__
1200
str r12,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1201
#endif
1202
eor r14,r3,r14,ror#32-10
1203
#ifndef __thumb2__
1204
str r14,[sp,#8+4]
1205
#else
1206
strd r12,r14,[sp,#8] @ R[0][1] = C[1] ^ (~C[2] & C[3]);
1207
#endif
1208
eor r10,r4,r10,ror#32-7
1209
eor r11,r5,r11,ror#32-7
1210
#ifndef __thumb2__
1211
str r10,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1212
#endif
1213
bic r12,r0,r8,ror#32-7
1214
#ifndef __thumb2__
1215
str r11,[sp,#16+4]
1216
#else
1217
strd r10,r11,[sp,#16] @ R[0][2] = C[2] ^ (~C[3] & C[4]);
1218
#endif
1219
bic r14,r1,r9,ror#32-7
1220
eor r12,r12,r6,ror#32-11
1221
#ifndef __thumb2__
1222
str r12,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1223
#endif
1224
eor r14,r14,r7,ror#32-10
1225
#ifndef __thumb2__
1226
str r14,[sp,#24+4]
1227
#else
1228
strd r12,r14,[sp,#24] @ R[0][3] = C[3] ^ (~C[4] & C[0]);
1229
#endif
1230
bic r10,r2,r0
1231
add r14,sp,#224
1232
#ifndef __thumb2__
1233
ldr r0,[sp,#264] @ A[0][3]
1234
#endif
1235
bic r11,r3,r1
1236
#ifndef __thumb2__
1237
ldr r1,[sp,#264+4]
1238
#else
1239
ldrd r0,r1,[sp,#264] @ A[0][3]
1240
#endif
1241
eor r10,r10,r8,ror#32-7
1242
eor r11,r11,r9,ror#32-7
1243
#ifndef __thumb2__
1244
str r10,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1245
#endif
1246
add r9,sp,#200
1247
#ifndef __thumb2__
1248
str r11,[sp,#32+4]
1249
#else
1250
strd r10,r11,[sp,#32] @ R[0][4] = C[4] ^ (~C[0] & C[1]);
1251
#endif
1252
1253
ldmia r14,{r10,r11,r12,r14} @ D[3..4]
1254
ldmia r9,{r6,r7,r8,r9} @ D[0..1]
1255
1256
#ifndef __thumb2__
1257
ldr r2,[sp,#312] @ A[1][4]
1258
#endif
1259
eor r0,r0,r10
1260
#ifndef __thumb2__
1261
ldr r3,[sp,#312+4]
1262
#else
1263
ldrd r2,r3,[sp,#312] @ A[1][4]
1264
#endif
1265
eor r1,r1,r11
1266
@ mov r0,r0,ror#32-14 @ C[0] = ROL64(A[0][3] ^ D[3], rhotates[0][3]);
1267
#ifndef __thumb2__
1268
ldr r10,[sp,#368] @ A[3][1]
1269
#endif
1270
@ mov r1,r1,ror#32-14
1271
#ifndef __thumb2__
1272
ldr r11,[sp,#368+4]
1273
#else
1274
ldrd r10,r11,[sp,#368] @ A[3][1]
1275
#endif
1276
1277
eor r2,r2,r12
1278
#ifndef __thumb2__
1279
ldr r4,[sp,#320] @ A[2][0]
1280
#endif
1281
eor r3,r3,r14
1282
#ifndef __thumb2__
1283
ldr r5,[sp,#320+4]
1284
#else
1285
ldrd r4,r5,[sp,#320] @ A[2][0]
1286
#endif
1287
@ mov r2,r2,ror#32-10 @ C[1] = ROL64(A[1][4] ^ D[4], rhotates[1][4]);
1288
@ mov r3,r3,ror#32-10
1289
1290
eor r6,r6,r4
1291
#ifndef __thumb2__
1292
ldr r12,[sp,#216] @ D[2]
1293
#endif
1294
eor r7,r7,r5
1295
#ifndef __thumb2__
1296
ldr r14,[sp,#216+4]
1297
#else
1298
ldrd r12,r14,[sp,#216] @ D[2]
1299
#endif
1300
mov r5,r6,ror#32-1 @ C[2] = ROL64(A[2][0] ^ D[0], rhotates[2][0]);
1301
mov r4,r7,ror#32-2
1302
1303
eor r10,r10,r8
1304
#ifndef __thumb2__
1305
ldr r8,[sp,#416] @ A[4][2]
1306
#endif
1307
eor r11,r11,r9
1308
#ifndef __thumb2__
1309
ldr r9,[sp,#416+4]
1310
#else
1311
ldrd r8,r9,[sp,#416] @ A[4][2]
1312
#endif
1313
mov r7,r10,ror#32-22 @ C[3] = ROL64(A[3][1] ^ D[1], rhotates[3][1]);
1314
mov r6,r11,ror#32-23
1315
1316
bic r10,r4,r2,ror#32-10
1317
bic r11,r5,r3,ror#32-10
1318
eor r12,r12,r8
1319
eor r14,r14,r9
1320
mov r9,r12,ror#32-30 @ C[4] = ROL64(A[4][2] ^ D[2], rhotates[4][2]);
1321
mov r8,r14,ror#32-31
1322
eor r10,r10,r0,ror#32-14
1323
eor r11,r11,r1,ror#32-14
1324
#ifndef __thumb2__
1325
str r10,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1326
#endif
1327
bic r12,r6,r4
1328
#ifndef __thumb2__
1329
str r11,[sp,#40+4]
1330
#else
1331
strd r10,r11,[sp,#40] @ R[1][0] = C[0] ^ (~C[1] & C[2])
1332
#endif
1333
bic r14,r7,r5
1334
eor r12,r12,r2,ror#32-10
1335
#ifndef __thumb2__
1336
str r12,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1337
#endif
1338
eor r14,r14,r3,ror#32-10
1339
#ifndef __thumb2__
1340
str r14,[sp,#48+4]
1341
#else
1342
strd r12,r14,[sp,#48] @ R[1][1] = C[1] ^ (~C[2] & C[3]);
1343
#endif
1344
bic r10,r8,r6
1345
bic r11,r9,r7
1346
bic r12,r0,r8,ror#14
1347
bic r14,r1,r9,ror#14
1348
eor r10,r10,r4
1349
eor r11,r11,r5
1350
#ifndef __thumb2__
1351
str r10,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1352
#endif
1353
bic r2,r2,r0,ror#32-(14-10)
1354
#ifndef __thumb2__
1355
str r11,[sp,#56+4]
1356
#else
1357
strd r10,r11,[sp,#56] @ R[1][2] = C[2] ^ (~C[3] & C[4]);
1358
#endif
1359
eor r12,r6,r12,ror#32-14
1360
bic r11,r3,r1,ror#32-(14-10)
1361
#ifndef __thumb2__
1362
str r12,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1363
#endif
1364
eor r14,r7,r14,ror#32-14
1365
#ifndef __thumb2__
1366
str r14,[sp,#64+4]
1367
#else
1368
strd r12,r14,[sp,#64] @ R[1][3] = C[3] ^ (~C[4] & C[0]);
1369
#endif
1370
add r12,sp,#208
1371
#ifndef __thumb2__
1372
ldr r1,[sp,#248] @ A[0][1]
1373
#endif
1374
eor r10,r8,r2,ror#32-10
1375
#ifndef __thumb2__
1376
ldr r0,[sp,#248+4]
1377
#else
1378
ldrd r1,r0,[sp,#248] @ A[0][1]
1379
#endif
1380
eor r11,r9,r11,ror#32-10
1381
#ifndef __thumb2__
1382
str r10,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1383
#endif
1384
#ifndef __thumb2__
1385
str r11,[sp,#72+4]
1386
#else
1387
strd r10,r11,[sp,#72] @ R[1][4] = C[4] ^ (~C[0] & C[1]);
1388
#endif
1389
1390
add r9,sp,#224
1391
ldmia r12,{r10,r11,r12,r14} @ D[1..2]
1392
#ifndef __thumb2__
1393
ldr r2,[sp,#296] @ A[1][2]
1394
#endif
1395
#ifndef __thumb2__
1396
ldr r3,[sp,#296+4]
1397
#else
1398
ldrd r2,r3,[sp,#296] @ A[1][2]
1399
#endif
1400
ldmia r9,{r6,r7,r8,r9} @ D[3..4]
1401
1402
eor r1,r1,r10
1403
#ifndef __thumb2__
1404
ldr r4,[sp,#344] @ A[2][3]
1405
#endif
1406
eor r0,r0,r11
1407
#ifndef __thumb2__
1408
ldr r5,[sp,#344+4]
1409
#else
1410
ldrd r4,r5,[sp,#344] @ A[2][3]
1411
#endif
1412
mov r0,r0,ror#32-1 @ C[0] = ROL64(A[0][1] ^ D[1], rhotates[0][1]);
1413
1414
eor r2,r2,r12
1415
#ifndef __thumb2__
1416
ldr r10,[sp,#392] @ A[3][4]
1417
#endif
1418
eor r3,r3,r14
1419
#ifndef __thumb2__
1420
ldr r11,[sp,#392+4]
1421
#else
1422
ldrd r10,r11,[sp,#392] @ A[3][4]
1423
#endif
1424
@ mov r2,r2,ror#32-3 @ C[1] = ROL64(A[1][2] ^ D[2], rhotates[1][2]);
1425
#ifndef __thumb2__
1426
ldr r12,[sp,#200] @ D[0]
1427
#endif
1428
@ mov r3,r3,ror#32-3
1429
#ifndef __thumb2__
1430
ldr r14,[sp,#200+4]
1431
#else
1432
ldrd r12,r14,[sp,#200] @ D[0]
1433
#endif
1434
1435
eor r4,r4,r6
1436
eor r5,r5,r7
1437
@ mov r5,r6,ror#32-12 @ C[2] = ROL64(A[2][3] ^ D[3], rhotates[2][3]);
1438
@ mov r4,r7,ror#32-13 @ [track reverse order below]
1439
1440
eor r10,r10,r8
1441
#ifndef __thumb2__
1442
ldr r8,[sp,#400] @ A[4][0]
1443
#endif
1444
eor r11,r11,r9
1445
#ifndef __thumb2__
1446
ldr r9,[sp,#400+4]
1447
#else
1448
ldrd r8,r9,[sp,#400] @ A[4][0]
1449
#endif
1450
mov r6,r10,ror#32-4 @ C[3] = ROL64(A[3][4] ^ D[4], rhotates[3][4]);
1451
mov r7,r11,ror#32-4
1452
1453
eor r12,r12,r8
1454
eor r14,r14,r9
1455
mov r8,r12,ror#32-9 @ C[4] = ROL64(A[4][0] ^ D[0], rhotates[4][0]);
1456
mov r9,r14,ror#32-9
1457
1458
bic r10,r5,r2,ror#13-3
1459
bic r11,r4,r3,ror#12-3
1460
bic r12,r6,r5,ror#32-13
1461
bic r14,r7,r4,ror#32-12
1462
eor r10,r0,r10,ror#32-13
1463
eor r11,r1,r11,ror#32-12
1464
#ifndef __thumb2__
1465
str r10,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1466
#endif
1467
eor r12,r12,r2,ror#32-3
1468
#ifndef __thumb2__
1469
str r11,[sp,#80+4]
1470
#else
1471
strd r10,r11,[sp,#80] @ R[2][0] = C[0] ^ (~C[1] & C[2])
1472
#endif
1473
eor r14,r14,r3,ror#32-3
1474
#ifndef __thumb2__
1475
str r12,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1476
#endif
1477
bic r10,r8,r6
1478
bic r11,r9,r7
1479
#ifndef __thumb2__
1480
str r14,[sp,#88+4]
1481
#else
1482
strd r12,r14,[sp,#88] @ R[2][1] = C[1] ^ (~C[2] & C[3]);
1483
#endif
1484
eor r10,r10,r5,ror#32-13
1485
eor r11,r11,r4,ror#32-12
1486
#ifndef __thumb2__
1487
str r10,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1488
#endif
1489
bic r12,r0,r8
1490
#ifndef __thumb2__
1491
str r11,[sp,#96+4]
1492
#else
1493
strd r10,r11,[sp,#96] @ R[2][2] = C[2] ^ (~C[3] & C[4]);
1494
#endif
1495
bic r14,r1,r9
1496
eor r12,r12,r6
1497
eor r14,r14,r7
1498
#ifndef __thumb2__
1499
str r12,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1500
#endif
1501
bic r10,r2,r0,ror#3
1502
#ifndef __thumb2__
1503
str r14,[sp,#104+4]
1504
#else
1505
strd r12,r14,[sp,#104] @ R[2][3] = C[3] ^ (~C[4] & C[0]);
1506
#endif
1507
bic r11,r3,r1,ror#3
1508
#ifndef __thumb2__
1509
ldr r1,[sp,#272] @ A[0][4] [in reverse order]
1510
#endif
1511
eor r10,r8,r10,ror#32-3
1512
#ifndef __thumb2__
1513
ldr r0,[sp,#272+4]
1514
#else
1515
ldrd r1,r0,[sp,#272] @ A[0][4] [in reverse order]
1516
#endif
1517
eor r11,r9,r11,ror#32-3
1518
#ifndef __thumb2__
1519
str r10,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1520
#endif
1521
add r9,sp,#208
1522
#ifndef __thumb2__
1523
str r11,[sp,#112+4]
1524
#else
1525
strd r10,r11,[sp,#112] @ R[2][4] = C[4] ^ (~C[0] & C[1]);
1526
#endif
1527
1528
#ifndef __thumb2__
1529
ldr r10,[sp,#232] @ D[4]
1530
#endif
1531
#ifndef __thumb2__
1532
ldr r11,[sp,#232+4]
1533
#else
1534
ldrd r10,r11,[sp,#232] @ D[4]
1535
#endif
1536
#ifndef __thumb2__
1537
ldr r12,[sp,#200] @ D[0]
1538
#endif
1539
#ifndef __thumb2__
1540
ldr r14,[sp,#200+4]
1541
#else
1542
ldrd r12,r14,[sp,#200] @ D[0]
1543
#endif
1544
1545
ldmia r9,{r6,r7,r8,r9} @ D[1..2]
1546
1547
eor r1,r1,r10
1548
#ifndef __thumb2__
1549
ldr r2,[sp,#280] @ A[1][0]
1550
#endif
1551
eor r0,r0,r11
1552
#ifndef __thumb2__
1553
ldr r3,[sp,#280+4]
1554
#else
1555
ldrd r2,r3,[sp,#280] @ A[1][0]
1556
#endif
1557
@ mov r1,r10,ror#32-13 @ C[0] = ROL64(A[0][4] ^ D[4], rhotates[0][4]);
1558
#ifndef __thumb2__
1559
ldr r4,[sp,#328] @ A[2][1]
1560
#endif
1561
@ mov r0,r11,ror#32-14 @ [was loaded in reverse order]
1562
#ifndef __thumb2__
1563
ldr r5,[sp,#328+4]
1564
#else
1565
ldrd r4,r5,[sp,#328] @ A[2][1]
1566
#endif
1567
1568
eor r2,r2,r12
1569
#ifndef __thumb2__
1570
ldr r10,[sp,#376] @ A[3][2]
1571
#endif
1572
eor r3,r3,r14
1573
#ifndef __thumb2__
1574
ldr r11,[sp,#376+4]
1575
#else
1576
ldrd r10,r11,[sp,#376] @ A[3][2]
1577
#endif
1578
@ mov r2,r2,ror#32-18 @ C[1] = ROL64(A[1][0] ^ D[0], rhotates[1][0]);
1579
#ifndef __thumb2__
1580
ldr r12,[sp,#224] @ D[3]
1581
#endif
1582
@ mov r3,r3,ror#32-18
1583
#ifndef __thumb2__
1584
ldr r14,[sp,#224+4]
1585
#else
1586
ldrd r12,r14,[sp,#224] @ D[3]
1587
#endif
1588
1589
eor r6,r6,r4
1590
eor r7,r7,r5
1591
mov r4,r6,ror#32-5 @ C[2] = ROL64(A[2][1] ^ D[1], rhotates[2][1]);
1592
mov r5,r7,ror#32-5
1593
1594
eor r10,r10,r8
1595
#ifndef __thumb2__
1596
ldr r8,[sp,#424] @ A[4][3]
1597
#endif
1598
eor r11,r11,r9
1599
#ifndef __thumb2__
1600
ldr r9,[sp,#424+4]
1601
#else
1602
ldrd r8,r9,[sp,#424] @ A[4][3]
1603
#endif
1604
mov r7,r10,ror#32-7 @ C[3] = ROL64(A[3][2] ^ D[2], rhotates[3][2]);
1605
mov r6,r11,ror#32-8
1606
1607
eor r12,r12,r8
1608
eor r14,r14,r9
1609
mov r8,r12,ror#32-28 @ C[4] = ROL64(A[4][3] ^ D[3], rhotates[4][3]);
1610
mov r9,r14,ror#32-28
1611
1612
bic r10,r4,r2,ror#32-18
1613
bic r11,r5,r3,ror#32-18
1614
eor r10,r10,r0,ror#32-14
1615
eor r11,r11,r1,ror#32-13
1616
#ifndef __thumb2__
1617
str r10,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1618
#endif
1619
bic r12,r6,r4
1620
#ifndef __thumb2__
1621
str r11,[sp,#120+4]
1622
#else
1623
strd r10,r11,[sp,#120] @ R[3][0] = C[0] ^ (~C[1] & C[2])
1624
#endif
1625
bic r14,r7,r5
1626
eor r12,r12,r2,ror#32-18
1627
#ifndef __thumb2__
1628
str r12,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1629
#endif
1630
eor r14,r14,r3,ror#32-18
1631
#ifndef __thumb2__
1632
str r14,[sp,#128+4]
1633
#else
1634
strd r12,r14,[sp,#128] @ R[3][1] = C[1] ^ (~C[2] & C[3]);
1635
#endif
1636
bic r10,r8,r6
1637
bic r11,r9,r7
1638
bic r12,r0,r8,ror#14
1639
bic r14,r1,r9,ror#13
1640
eor r10,r10,r4
1641
eor r11,r11,r5
1642
#ifndef __thumb2__
1643
str r10,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1644
#endif
1645
bic r2,r2,r0,ror#18-14
1646
#ifndef __thumb2__
1647
str r11,[sp,#136+4]
1648
#else
1649
strd r10,r11,[sp,#136] @ R[3][2] = C[2] ^ (~C[3] & C[4]);
1650
#endif
1651
eor r12,r6,r12,ror#32-14
1652
bic r11,r3,r1,ror#18-13
1653
eor r14,r7,r14,ror#32-13
1654
#ifndef __thumb2__
1655
str r12,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1656
#endif
1657
#ifndef __thumb2__
1658
str r14,[sp,#144+4]
1659
#else
1660
strd r12,r14,[sp,#144] @ R[3][3] = C[3] ^ (~C[4] & C[0]);
1661
#endif
1662
add r14,sp,#216
1663
#ifndef __thumb2__
1664
ldr r0,[sp,#256] @ A[0][2]
1665
#endif
1666
eor r10,r8,r2,ror#32-18
1667
#ifndef __thumb2__
1668
ldr r1,[sp,#256+4]
1669
#else
1670
ldrd r0,r1,[sp,#256] @ A[0][2]
1671
#endif
1672
eor r11,r9,r11,ror#32-18
1673
#ifndef __thumb2__
1674
str r10,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1675
#endif
1676
#ifndef __thumb2__
1677
str r11,[sp,#152+4]
1678
#else
1679
strd r10,r11,[sp,#152] @ R[3][4] = C[4] ^ (~C[0] & C[1]);
1680
#endif
1681
1682
ldmia r14,{r10,r11,r12,r14} @ D[2..3]
1683
#ifndef __thumb2__
1684
ldr r2,[sp,#304] @ A[1][3]
1685
#endif
1686
#ifndef __thumb2__
1687
ldr r3,[sp,#304+4]
1688
#else
1689
ldrd r2,r3,[sp,#304] @ A[1][3]
1690
#endif
1691
#ifndef __thumb2__
1692
ldr r6,[sp,#232] @ D[4]
1693
#endif
1694
#ifndef __thumb2__
1695
ldr r7,[sp,#232+4]
1696
#else
1697
ldrd r6,r7,[sp,#232] @ D[4]
1698
#endif
1699
1700
eor r0,r0,r10
1701
#ifndef __thumb2__
1702
ldr r4,[sp,#352] @ A[2][4]
1703
#endif
1704
eor r1,r1,r11
1705
#ifndef __thumb2__
1706
ldr r5,[sp,#352+4]
1707
#else
1708
ldrd r4,r5,[sp,#352] @ A[2][4]
1709
#endif
1710
@ mov r0,r0,ror#32-31 @ C[0] = ROL64(A[0][2] ^ D[2], rhotates[0][2]);
1711
#ifndef __thumb2__
1712
ldr r8,[sp,#200] @ D[0]
1713
#endif
1714
@ mov r1,r1,ror#32-31
1715
#ifndef __thumb2__
1716
ldr r9,[sp,#200+4]
1717
#else
1718
ldrd r8,r9,[sp,#200] @ D[0]
1719
#endif
1720
1721
eor r12,r12,r2
1722
#ifndef __thumb2__
1723
ldr r10,[sp,#360] @ A[3][0]
1724
#endif
1725
eor r14,r14,r3
1726
#ifndef __thumb2__
1727
ldr r11,[sp,#360+4]
1728
#else
1729
ldrd r10,r11,[sp,#360] @ A[3][0]
1730
#endif
1731
mov r3,r12,ror#32-27 @ C[1] = ROL64(A[1][3] ^ D[3], rhotates[1][3]);
1732
#ifndef __thumb2__
1733
ldr r12,[sp,#208] @ D[1]
1734
#endif
1735
mov r2,r14,ror#32-28
1736
#ifndef __thumb2__
1737
ldr r14,[sp,#208+4]
1738
#else
1739
ldrd r12,r14,[sp,#208] @ D[1]
1740
#endif
1741
1742
eor r6,r6,r4
1743
eor r7,r7,r5
1744
mov r5,r6,ror#32-19 @ C[2] = ROL64(A[2][4] ^ D[4], rhotates[2][4]);
1745
mov r4,r7,ror#32-20
1746
1747
eor r10,r10,r8
1748
#ifndef __thumb2__
1749
ldr r8,[sp,#408] @ A[4][1]
1750
#endif
1751
eor r11,r11,r9
1752
#ifndef __thumb2__
1753
ldr r9,[sp,#408+4]
1754
#else
1755
ldrd r8,r9,[sp,#408] @ A[4][1]
1756
#endif
1757
mov r7,r10,ror#32-20 @ C[3] = ROL64(A[3][0] ^ D[0], rhotates[3][0]);
1758
mov r6,r11,ror#32-21
1759
1760
eor r8,r8,r12
1761
eor r9,r9,r14
1762
@ mov r8,r2,ror#32-1 @ C[4] = ROL64(A[4][1] ^ D[1], rhotates[4][1]);
1763
@ mov r9,r3,ror#32-1
1764
1765
bic r10,r4,r2
1766
bic r11,r5,r3
1767
eor r10,r10,r0,ror#32-31
1768
#ifndef __thumb2__
1769
str r10,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1770
#endif
1771
eor r11,r11,r1,ror#32-31
1772
#ifndef __thumb2__
1773
str r11,[sp,#160+4]
1774
#else
1775
strd r10,r11,[sp,#160] @ R[4][0] = C[0] ^ (~C[1] & C[2])
1776
#endif
1777
bic r12,r6,r4
1778
bic r14,r7,r5
1779
eor r12,r12,r2
1780
eor r14,r14,r3
1781
#ifndef __thumb2__
1782
str r12,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1783
#endif
1784
bic r10,r8,r6,ror#1
1785
#ifndef __thumb2__
1786
str r14,[sp,#168+4]
1787
#else
1788
strd r12,r14,[sp,#168] @ R[4][1] = C[1] ^ (~C[2] & C[3]);
1789
#endif
1790
bic r11,r9,r7,ror#1
1791
bic r12,r0,r8,ror#31-1
1792
bic r14,r1,r9,ror#31-1
1793
eor r4,r4,r10,ror#32-1
1794
#ifndef __thumb2__
1795
str r4,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1796
#endif
1797
eor r5,r5,r11,ror#32-1
1798
#ifndef __thumb2__
1799
str r5,[sp,#176+4]
1800
#else
1801
strd r4,r5,[sp,#176] @ R[4][2] = C[2] ^= (~C[3] & C[4]);
1802
#endif
1803
eor r6,r6,r12,ror#32-31
1804
eor r7,r7,r14,ror#32-31
1805
#ifndef __thumb2__
1806
str r6,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1807
#endif
1808
bic r10,r2,r0,ror#32-31
1809
#ifndef __thumb2__
1810
str r7,[sp,#184+4]
1811
#else
1812
strd r6,r7,[sp,#184] @ R[4][3] = C[3] ^= (~C[4] & C[0]);
1813
#endif
1814
bic r11,r3,r1,ror#32-31
1815
add r12,sp,#0
1816
eor r8,r10,r8,ror#32-1
1817
add r10,sp,#40
1818
eor r9,r11,r9,ror#32-1
1819
#ifndef __thumb2__
1820
str r8,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1821
#endif
1822
#ifndef __thumb2__
1823
str r9,[sp,#192+4]
1824
#else
1825
strd r8,r9,[sp,#192] @ R[4][4] = C[4] ^= (~C[0] & C[1]);
1826
#endif
1827
blo .Lround2x
1828
1829
#if __ARM_ARCH__>=5
1830
ldr pc,[sp,#440]
1831
#else
1832
ldr lr,[sp,#440]
1833
tst lr,#1
1834
moveq pc,lr @ be binary compatible with V4, yet
1835
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
1836
#endif
1837
.size KeccakF1600_int,.-KeccakF1600_int
1838
1839
.type KeccakF1600, %function
1840
.align 5
1841
KeccakF1600:
1842
stmdb sp!,{r0,r4-r11,lr}
1843
sub sp,sp,#440+16 @ space for A[5][5],D[5],T[5][5],...
1844
1845
add r10,r0,#40
1846
add r11,sp,#40
1847
ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1848
stmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1849
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1850
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1851
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1852
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1853
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1854
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1855
ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1856
add r12,sp,#0
1857
add r10,sp,#40
1858
stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1859
1860
bl KeccakF1600_enter
1861
1862
ldr r11, [sp,#440+16] @ restore pointer to A
1863
ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1864
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
1865
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1866
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1867
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1868
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1869
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1870
stmia r11!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1871
ldmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1872
stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1873
1874
add sp,sp,#440+20
1875
#if __ARM_ARCH__>=5
1876
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1877
#else
1878
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1879
tst lr,#1
1880
moveq pc,lr @ be binary compatible with V4, yet
1881
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
1882
#endif
1883
.size KeccakF1600,.-KeccakF1600
1884
.globl SHA3_absorb
1885
.type SHA3_absorb,%function
1886
.align 5
1887
SHA3_absorb:
1888
stmdb sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1889
sub sp,sp,#456+16
1890
1891
add r10,r0,#40
1892
@ mov r11,r1
1893
mov r12,r2
1894
mov r14,r3
1895
cmp r2,r3
1896
blo .Labsorb_abort
1897
1898
add r11,sp,#0
1899
ldmia r0, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ copy A[5][5] to stack
1900
stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1901
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1902
stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1903
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1904
stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1905
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1906
stmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1907
ldmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1908
stmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
1909
1910
ldr r11,[sp,#476] @ restore r11
1911
#ifdef __thumb2__
1912
mov r9,#0x00ff00ff
1913
mov r8,#0x0f0f0f0f
1914
mov r7,#0x33333333
1915
mov r6,#0x55555555
1916
#else
1917
mov r6,#0x11 @ compose constants
1918
mov r8,#0x0f
1919
mov r9,#0xff
1920
orr r6,r6,r6,lsl#8
1921
orr r8,r8,r8,lsl#8
1922
orr r6,r6,r6,lsl#16 @ 0x11111111
1923
orr r9,r9,r9,lsl#16 @ 0x00ff00ff
1924
orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
1925
orr r7,r6,r6,lsl#1 @ 0x33333333
1926
orr r6,r6,r6,lsl#2 @ 0x55555555
1927
#endif
1928
str r9,[sp,#468]
1929
str r8,[sp,#464]
1930
str r7,[sp,#460]
1931
str r6,[sp,#456]
1932
b .Loop_absorb
1933
1934
.align 4
1935
.Loop_absorb:
1936
subs r0,r12,r14
1937
blo .Labsorbed
1938
add r10,sp,#0
1939
str r0,[sp,#480] @ save len - bsz
1940
1941
.align 4
1942
.Loop_block:
1943
ldrb r0,[r11],#1
1944
ldrb r1,[r11],#1
1945
ldrb r2,[r11],#1
1946
ldrb r3,[r11],#1
1947
ldrb r4,[r11],#1
1948
orr r0,r0,r1,lsl#8
1949
ldrb r1,[r11],#1
1950
orr r0,r0,r2,lsl#16
1951
ldrb r2,[r11],#1
1952
orr r0,r0,r3,lsl#24 @ lo
1953
ldrb r3,[r11],#1
1954
orr r1,r4,r1,lsl#8
1955
orr r1,r1,r2,lsl#16
1956
orr r1,r1,r3,lsl#24 @ hi
1957
1958
and r2,r0,r6 @ &=0x55555555
1959
and r0,r0,r6,lsl#1 @ &=0xaaaaaaaa
1960
and r3,r1,r6 @ &=0x55555555
1961
and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
1962
orr r2,r2,r2,lsr#1
1963
orr r0,r0,r0,lsl#1
1964
orr r3,r3,r3,lsr#1
1965
orr r1,r1,r1,lsl#1
1966
and r2,r2,r7 @ &=0x33333333
1967
and r0,r0,r7,lsl#2 @ &=0xcccccccc
1968
and r3,r3,r7 @ &=0x33333333
1969
and r1,r1,r7,lsl#2 @ &=0xcccccccc
1970
orr r2,r2,r2,lsr#2
1971
orr r0,r0,r0,lsl#2
1972
orr r3,r3,r3,lsr#2
1973
orr r1,r1,r1,lsl#2
1974
and r2,r2,r8 @ &=0x0f0f0f0f
1975
and r0,r0,r8,lsl#4 @ &=0xf0f0f0f0
1976
and r3,r3,r8 @ &=0x0f0f0f0f
1977
and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
1978
ldmia r10,{r4,r5} @ A_flat[i]
1979
orr r2,r2,r2,lsr#4
1980
orr r0,r0,r0,lsl#4
1981
orr r3,r3,r3,lsr#4
1982
orr r1,r1,r1,lsl#4
1983
and r2,r2,r9 @ &=0x00ff00ff
1984
and r0,r0,r9,lsl#8 @ &=0xff00ff00
1985
and r3,r3,r9 @ &=0x00ff00ff
1986
and r1,r1,r9,lsl#8 @ &=0xff00ff00
1987
orr r2,r2,r2,lsr#8
1988
orr r0,r0,r0,lsl#8
1989
orr r3,r3,r3,lsr#8
1990
orr r1,r1,r1,lsl#8
1991
1992
mov r2,r2,lsl#16
1993
mov r1,r1,lsr#16
1994
eor r4,r4,r3,lsl#16
1995
eor r5,r5,r0,lsr#16
1996
eor r4,r4,r2,lsr#16
1997
eor r5,r5,r1,lsl#16
1998
stmia r10!,{r4,r5} @ A_flat[i++] ^= BitInterleave(inp[0..7])
1999
2000
subs r14,r14,#8
2001
bhi .Loop_block
2002
2003
str r11,[sp,#476]
2004
2005
bl KeccakF1600_int
2006
2007
add r14,sp,#456
2008
ldmia r14,{r6,r7,r8,r9,r10,r11,r12,r14} @ restore constants and variables
2009
b .Loop_absorb
2010
2011
.align 4
2012
.Labsorbed:
2013
add r11,sp,#40
2014
ldmia sp, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2015
stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ return A[5][5]
2016
ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2017
stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2018
ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2019
stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2020
ldmia r11!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2021
stmia r10!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2022
ldmia r11, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2023
stmia r10, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9}
2024
2025
.Labsorb_abort:
2026
add sp,sp,#456+32
2027
mov r0,r12 @ return value
2028
#if __ARM_ARCH__>=5
2029
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2030
#else
2031
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
2032
tst lr,#1
2033
moveq pc,lr @ be binary compatible with V4, yet
2034
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
2035
#endif
2036
.size SHA3_absorb,.-SHA3_absorb
2037
.globl SHA3_squeeze
2038
.type SHA3_squeeze,%function
2039
.align 5
2040
SHA3_squeeze:
2041
stmdb sp!,{r0,r3-r10,lr}
2042
2043
mov r10,r0
2044
mov r4,r1
2045
mov r5,r2
2046
mov r12,r3
2047
ldr r0, [sp, #40] @ next is after the 10 pushed registers (10*4)
2048
2049
#ifdef __thumb2__
2050
mov r9,#0x00ff00ff
2051
mov r8,#0x0f0f0f0f
2052
mov r7,#0x33333333
2053
mov r6,#0x55555555
2054
#else
2055
mov r6,#0x11 @ compose constants
2056
mov r8,#0x0f
2057
mov r9,#0xff
2058
orr r6,r6,r6,lsl#8
2059
orr r8,r8,r8,lsl#8
2060
orr r6,r6,r6,lsl#16 @ 0x11111111
2061
orr r9,r9,r9,lsl#16 @ 0x00ff00ff
2062
orr r8,r8,r8,lsl#16 @ 0x0f0f0f0f
2063
orr r7,r6,r6,lsl#1 @ 0x33333333
2064
orr r6,r6,r6,lsl#2 @ 0x55555555
2065
#endif
2066
stmdb sp!,{r6,r7,r8,r9}
2067
2068
mov r14,r10
2069
cmp r0, #1
2070
beq .Lnext_block
2071
b .Loop_squeeze
2072
2073
.align 4
2074
.Loop_squeeze:
2075
ldmia r10!,{r0,r1} @ A_flat[i++]
2076
2077
mov r2,r0,lsl#16
2078
mov r3,r1,lsl#16 @ r3 = r1 << 16
2079
mov r2,r2,lsr#16 @ r2 = r0 & 0x0000ffff
2080
mov r1,r1,lsr#16
2081
mov r0,r0,lsr#16 @ r0 = r0 >> 16
2082
mov r1,r1,lsl#16 @ r1 = r1 & 0xffff0000
2083
2084
orr r2,r2,r2,lsl#8
2085
orr r3,r3,r3,lsr#8
2086
orr r0,r0,r0,lsl#8
2087
orr r1,r1,r1,lsr#8
2088
and r2,r2,r9 @ &=0x00ff00ff
2089
and r3,r3,r9,lsl#8 @ &=0xff00ff00
2090
and r0,r0,r9 @ &=0x00ff00ff
2091
and r1,r1,r9,lsl#8 @ &=0xff00ff00
2092
orr r2,r2,r2,lsl#4
2093
orr r3,r3,r3,lsr#4
2094
orr r0,r0,r0,lsl#4
2095
orr r1,r1,r1,lsr#4
2096
and r2,r2,r8 @ &=0x0f0f0f0f
2097
and r3,r3,r8,lsl#4 @ &=0xf0f0f0f0
2098
and r0,r0,r8 @ &=0x0f0f0f0f
2099
and r1,r1,r8,lsl#4 @ &=0xf0f0f0f0
2100
orr r2,r2,r2,lsl#2
2101
orr r3,r3,r3,lsr#2
2102
orr r0,r0,r0,lsl#2
2103
orr r1,r1,r1,lsr#2
2104
and r2,r2,r7 @ &=0x33333333
2105
and r3,r3,r7,lsl#2 @ &=0xcccccccc
2106
and r0,r0,r7 @ &=0x33333333
2107
and r1,r1,r7,lsl#2 @ &=0xcccccccc
2108
orr r2,r2,r2,lsl#1
2109
orr r3,r3,r3,lsr#1
2110
orr r0,r0,r0,lsl#1
2111
orr r1,r1,r1,lsr#1
2112
and r2,r2,r6 @ &=0x55555555
2113
and r3,r3,r6,lsl#1 @ &=0xaaaaaaaa
2114
and r0,r0,r6 @ &=0x55555555
2115
and r1,r1,r6,lsl#1 @ &=0xaaaaaaaa
2116
2117
orr r2,r2,r3
2118
orr r0,r0,r1
2119
2120
cmp r5,#8
2121
blo .Lsqueeze_tail
2122
mov r1,r2,lsr#8
2123
strb r2,[r4],#1
2124
mov r3,r2,lsr#16
2125
strb r1,[r4],#1
2126
mov r2,r2,lsr#24
2127
strb r3,[r4],#1
2128
strb r2,[r4],#1
2129
2130
mov r1,r0,lsr#8
2131
strb r0,[r4],#1
2132
mov r3,r0,lsr#16
2133
strb r1,[r4],#1
2134
mov r0,r0,lsr#24
2135
strb r3,[r4],#1
2136
strb r0,[r4],#1
2137
subs r5,r5,#8
2138
beq .Lsqueeze_done
2139
2140
subs r12,r12,#8 @ bsz -= 8
2141
bhi .Loop_squeeze
2142
.Lnext_block:
2143
mov r0,r14 @ original r10
2144
2145
bl KeccakF1600
2146
2147
ldmia sp,{r6,r7,r8,r9,r10,r12} @ restore constants and variables
2148
mov r14,r10
2149
b .Loop_squeeze
2150
2151
.align 4
2152
.Lsqueeze_tail:
2153
strb r2,[r4],#1
2154
mov r2,r2,lsr#8
2155
subs r5,r5,#1
2156
beq .Lsqueeze_done
2157
strb r2,[r4],#1
2158
mov r2,r2,lsr#8
2159
subs r5,r5,#1
2160
beq .Lsqueeze_done
2161
strb r2,[r4],#1
2162
mov r2,r2,lsr#8
2163
subs r5,r5,#1
2164
beq .Lsqueeze_done
2165
strb r2,[r4],#1
2166
subs r5,r5,#1
2167
beq .Lsqueeze_done
2168
2169
strb r0,[r4],#1
2170
mov r0,r0,lsr#8
2171
subs r5,r5,#1
2172
beq .Lsqueeze_done
2173
strb r0,[r4],#1
2174
mov r0,r0,lsr#8
2175
subs r5,r5,#1
2176
beq .Lsqueeze_done
2177
strb r0,[r4]
2178
b .Lsqueeze_done
2179
2180
.align 4
2181
.Lsqueeze_done:
2182
add sp,sp,#24
2183
#if __ARM_ARCH__>=5
2184
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
2185
#else
2186
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
2187
tst lr,#1
2188
moveq pc,lr @ be binary compatible with V4, yet
2189
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
2190
#endif
2191
.size SHA3_squeeze,.-SHA3_squeeze
2192
#if __ARM_MAX_ARCH__>=7
2193
.fpu neon
2194
2195
.type iotas64, %object
2196
.align 5
2197
iotas64:
2198
.quad 0x0000000000000001
2199
.quad 0x0000000000008082
2200
.quad 0x800000000000808a
2201
.quad 0x8000000080008000
2202
.quad 0x000000000000808b
2203
.quad 0x0000000080000001
2204
.quad 0x8000000080008081
2205
.quad 0x8000000000008009
2206
.quad 0x000000000000008a
2207
.quad 0x0000000000000088
2208
.quad 0x0000000080008009
2209
.quad 0x000000008000000a
2210
.quad 0x000000008000808b
2211
.quad 0x800000000000008b
2212
.quad 0x8000000000008089
2213
.quad 0x8000000000008003
2214
.quad 0x8000000000008002
2215
.quad 0x8000000000000080
2216
.quad 0x000000000000800a
2217
.quad 0x800000008000000a
2218
.quad 0x8000000080008081
2219
.quad 0x8000000000008080
2220
.quad 0x0000000080000001
2221
.quad 0x8000000080008008
2222
.size iotas64,.-iotas64
2223
2224
.type KeccakF1600_neon, %function
2225
.align 5
2226
KeccakF1600_neon:
2227
add r1, r0, #16
2228
adr r2, iotas64
2229
mov r3, #24 @ loop counter
2230
b .Loop_neon
2231
2232
.align 4
2233
.Loop_neon:
2234
@ Theta
2235
vst1.64 {q4}, [r0,:64] @ offload A[0..1][4]
2236
veor q13, q0, q5 @ A[0..1][0]^A[2..3][0]
2237
vst1.64 {d18}, [r1,:64] @ offload A[2][4]
2238
veor q14, q1, q6 @ A[0..1][1]^A[2..3][1]
2239
veor q15, q2, q7 @ A[0..1][2]^A[2..3][2]
2240
veor d26, d26, d27 @ C[0]=A[0][0]^A[1][0]^A[2][0]^A[3][0]
2241
veor d27, d28, d29 @ C[1]=A[0][1]^A[1][1]^A[2][1]^A[3][1]
2242
veor q14, q3, q8 @ A[0..1][3]^A[2..3][3]
2243
veor q4, q4, q9 @ A[0..1][4]^A[2..3][4]
2244
veor d30, d30, d31 @ C[2]=A[0][2]^A[1][2]^A[2][2]^A[3][2]
2245
veor d31, d28, d29 @ C[3]=A[0][3]^A[1][3]^A[2][3]^A[3][3]
2246
veor d25, d8, d9 @ C[4]=A[0][4]^A[1][4]^A[2][4]^A[3][4]
2247
veor q13, q13, q10 @ C[0..1]^=A[4][0..1]
2248
veor q14, q15, q11 @ C[2..3]^=A[4][2..3]
2249
veor d25, d25, d24 @ C[4]^=A[4][4]
2250
2251
vadd.u64 q4, q13, q13 @ C[0..1]<<1
2252
vadd.u64 q15, q14, q14 @ C[2..3]<<1
2253
vadd.u64 d18, d25, d25 @ C[4]<<1
2254
vsri.u64 q4, q13, #63 @ ROL64(C[0..1],1)
2255
vsri.u64 q15, q14, #63 @ ROL64(C[2..3],1)
2256
vsri.u64 d18, d25, #63 @ ROL64(C[4],1)
2257
veor d25, d25, d9 @ D[0] = C[4] ^= ROL64(C[1],1)
2258
veor q13, q13, q15 @ D[1..2] = C[0..1] ^ ROL64(C[2..3],1)
2259
veor d28, d28, d18 @ D[3] = C[2] ^= ROL64(C[4],1)
2260
veor d29, d29, d8 @ D[4] = C[3] ^= ROL64(C[0],1)
2261
2262
veor d0, d0, d25 @ A[0][0] ^= C[4]
2263
veor d1, d1, d25 @ A[1][0] ^= C[4]
2264
veor d10, d10, d25 @ A[2][0] ^= C[4]
2265
veor d11, d11, d25 @ A[3][0] ^= C[4]
2266
veor d20, d20, d25 @ A[4][0] ^= C[4]
2267
2268
veor d2, d2, d26 @ A[0][1] ^= D[1]
2269
veor d3, d3, d26 @ A[1][1] ^= D[1]
2270
veor d12, d12, d26 @ A[2][1] ^= D[1]
2271
veor d13, d13, d26 @ A[3][1] ^= D[1]
2272
veor d21, d21, d26 @ A[4][1] ^= D[1]
2273
vmov d26, d27
2274
2275
veor d6, d6, d28 @ A[0][3] ^= C[2]
2276
veor d7, d7, d28 @ A[1][3] ^= C[2]
2277
veor d16, d16, d28 @ A[2][3] ^= C[2]
2278
veor d17, d17, d28 @ A[3][3] ^= C[2]
2279
veor d23, d23, d28 @ A[4][3] ^= C[2]
2280
vld1.64 {q4}, [r0,:64] @ restore A[0..1][4]
2281
vmov d28, d29
2282
2283
vld1.64 {d18}, [r1,:64] @ restore A[2][4]
2284
veor q2, q2, q13 @ A[0..1][2] ^= D[2]
2285
veor q7, q7, q13 @ A[2..3][2] ^= D[2]
2286
veor d22, d22, d27 @ A[4][2] ^= D[2]
2287
2288
veor q4, q4, q14 @ A[0..1][4] ^= C[3]
2289
veor q9, q9, q14 @ A[2..3][4] ^= C[3]
2290
veor d24, d24, d29 @ A[4][4] ^= C[3]
2291
2292
@ Rho + Pi
2293
vmov d26, d2 @ C[1] = A[0][1]
2294
vshl.u64 d2, d3, #44
2295
vmov d27, d4 @ C[2] = A[0][2]
2296
vshl.u64 d4, d14, #43
2297
vmov d28, d6 @ C[3] = A[0][3]
2298
vshl.u64 d6, d17, #21
2299
vmov d29, d8 @ C[4] = A[0][4]
2300
vshl.u64 d8, d24, #14
2301
vsri.u64 d2, d3, #64-44 @ A[0][1] = ROL64(A[1][1], rhotates[1][1])
2302
vsri.u64 d4, d14, #64-43 @ A[0][2] = ROL64(A[2][2], rhotates[2][2])
2303
vsri.u64 d6, d17, #64-21 @ A[0][3] = ROL64(A[3][3], rhotates[3][3])
2304
vsri.u64 d8, d24, #64-14 @ A[0][4] = ROL64(A[4][4], rhotates[4][4])
2305
2306
vshl.u64 d3, d9, #20
2307
vshl.u64 d14, d16, #25
2308
vshl.u64 d17, d15, #15
2309
vshl.u64 d24, d21, #2
2310
vsri.u64 d3, d9, #64-20 @ A[1][1] = ROL64(A[1][4], rhotates[1][4])
2311
vsri.u64 d14, d16, #64-25 @ A[2][2] = ROL64(A[2][3], rhotates[2][3])
2312
vsri.u64 d17, d15, #64-15 @ A[3][3] = ROL64(A[3][2], rhotates[3][2])
2313
vsri.u64 d24, d21, #64-2 @ A[4][4] = ROL64(A[4][1], rhotates[4][1])
2314
2315
vshl.u64 d9, d22, #61
2316
@ vshl.u64 d16, d19, #8
2317
vshl.u64 d15, d12, #10
2318
vshl.u64 d21, d7, #55
2319
vsri.u64 d9, d22, #64-61 @ A[1][4] = ROL64(A[4][2], rhotates[4][2])
2320
vext.8 d16, d19, d19, #8-1 @ A[2][3] = ROL64(A[3][4], rhotates[3][4])
2321
vsri.u64 d15, d12, #64-10 @ A[3][2] = ROL64(A[2][1], rhotates[2][1])
2322
vsri.u64 d21, d7, #64-55 @ A[4][1] = ROL64(A[1][3], rhotates[1][3])
2323
2324
vshl.u64 d22, d18, #39
2325
@ vshl.u64 d19, d23, #56
2326
vshl.u64 d12, d5, #6
2327
vshl.u64 d7, d13, #45
2328
vsri.u64 d22, d18, #64-39 @ A[4][2] = ROL64(A[2][4], rhotates[2][4])
2329
vext.8 d19, d23, d23, #8-7 @ A[3][4] = ROL64(A[4][3], rhotates[4][3])
2330
vsri.u64 d12, d5, #64-6 @ A[2][1] = ROL64(A[1][2], rhotates[1][2])
2331
vsri.u64 d7, d13, #64-45 @ A[1][3] = ROL64(A[3][1], rhotates[3][1])
2332
2333
vshl.u64 d18, d20, #18
2334
vshl.u64 d23, d11, #41
2335
vshl.u64 d5, d10, #3
2336
vshl.u64 d13, d1, #36
2337
vsri.u64 d18, d20, #64-18 @ A[2][4] = ROL64(A[4][0], rhotates[4][0])
2338
vsri.u64 d23, d11, #64-41 @ A[4][3] = ROL64(A[3][0], rhotates[3][0])
2339
vsri.u64 d5, d10, #64-3 @ A[1][2] = ROL64(A[2][0], rhotates[2][0])
2340
vsri.u64 d13, d1, #64-36 @ A[3][1] = ROL64(A[1][0], rhotates[1][0])
2341
2342
vshl.u64 d1, d28, #28
2343
vshl.u64 d10, d26, #1
2344
vshl.u64 d11, d29, #27
2345
vshl.u64 d20, d27, #62
2346
vsri.u64 d1, d28, #64-28 @ A[1][0] = ROL64(C[3], rhotates[0][3])
2347
vsri.u64 d10, d26, #64-1 @ A[2][0] = ROL64(C[1], rhotates[0][1])
2348
vsri.u64 d11, d29, #64-27 @ A[3][0] = ROL64(C[4], rhotates[0][4])
2349
vsri.u64 d20, d27, #64-62 @ A[4][0] = ROL64(C[2], rhotates[0][2])
2350
2351
@ Chi + Iota
2352
vbic q13, q2, q1
2353
vbic q14, q3, q2
2354
vbic q15, q4, q3
2355
veor q13, q13, q0 @ A[0..1][0] ^ (~A[0..1][1] & A[0..1][2])
2356
veor q14, q14, q1 @ A[0..1][1] ^ (~A[0..1][2] & A[0..1][3])
2357
veor q2, q2, q15 @ A[0..1][2] ^= (~A[0..1][3] & A[0..1][4])
2358
vst1.64 {q13}, [r0,:64] @ offload A[0..1][0]
2359
vbic q13, q0, q4
2360
vbic q15, q1, q0
2361
vmov q1, q14 @ A[0..1][1]
2362
veor q3, q3, q13 @ A[0..1][3] ^= (~A[0..1][4] & A[0..1][0])
2363
veor q4, q4, q15 @ A[0..1][4] ^= (~A[0..1][0] & A[0..1][1])
2364
2365
vbic q13, q7, q6
2366
vmov q0, q5 @ A[2..3][0]
2367
vbic q14, q8, q7
2368
vmov q15, q6 @ A[2..3][1]
2369
veor q5, q5, q13 @ A[2..3][0] ^= (~A[2..3][1] & A[2..3][2])
2370
vbic q13, q9, q8
2371
veor q6, q6, q14 @ A[2..3][1] ^= (~A[2..3][2] & A[2..3][3])
2372
vbic q14, q0, q9
2373
veor q7, q7, q13 @ A[2..3][2] ^= (~A[2..3][3] & A[2..3][4])
2374
vbic q13, q15, q0
2375
veor q8, q8, q14 @ A[2..3][3] ^= (~A[2..3][4] & A[2..3][0])
2376
vmov q14, q10 @ A[4][0..1]
2377
veor q9, q9, q13 @ A[2..3][4] ^= (~A[2..3][0] & A[2..3][1])
2378
2379
vld1.64 d25, [r2,:64]! @ Iota[i++]
2380
vbic d26, d22, d21
2381
vbic d27, d23, d22
2382
vld1.64 {q0}, [r0,:64] @ restore A[0..1][0]
2383
veor d20, d20, d26 @ A[4][0] ^= (~A[4][1] & A[4][2])
2384
vbic d26, d24, d23
2385
veor d21, d21, d27 @ A[4][1] ^= (~A[4][2] & A[4][3])
2386
vbic d27, d28, d24
2387
veor d22, d22, d26 @ A[4][2] ^= (~A[4][3] & A[4][4])
2388
vbic d26, d29, d28
2389
veor d23, d23, d27 @ A[4][3] ^= (~A[4][4] & A[4][0])
2390
veor d0, d0, d25 @ A[0][0] ^= Iota[i]
2391
veor d24, d24, d26 @ A[4][4] ^= (~A[4][0] & A[4][1])
2392
2393
subs r3, r3, #1
2394
bne .Loop_neon
2395
2396
bx lr
2397
.size KeccakF1600_neon,.-KeccakF1600_neon
2398
2399
.globl SHA3_absorb_neon
2400
.type SHA3_absorb_neon, %function
2401
.align 5
2402
SHA3_absorb_neon:
2403
stmdb sp!, {r4,r5,r6,lr}
2404
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2405
2406
mov r4, r1 @ inp
2407
mov r5, r2 @ len
2408
mov r6, r3 @ bsz
2409
2410
vld1.32 {d0}, [r0,:64]! @ A[0][0]
2411
vld1.32 {d2}, [r0,:64]! @ A[0][1]
2412
vld1.32 {d4}, [r0,:64]! @ A[0][2]
2413
vld1.32 {d6}, [r0,:64]! @ A[0][3]
2414
vld1.32 {d8}, [r0,:64]! @ A[0][4]
2415
2416
vld1.32 {d1}, [r0,:64]! @ A[1][0]
2417
vld1.32 {d3}, [r0,:64]! @ A[1][1]
2418
vld1.32 {d5}, [r0,:64]! @ A[1][2]
2419
vld1.32 {d7}, [r0,:64]! @ A[1][3]
2420
vld1.32 {d9}, [r0,:64]! @ A[1][4]
2421
2422
vld1.32 {d10}, [r0,:64]! @ A[2][0]
2423
vld1.32 {d12}, [r0,:64]! @ A[2][1]
2424
vld1.32 {d14}, [r0,:64]! @ A[2][2]
2425
vld1.32 {d16}, [r0,:64]! @ A[2][3]
2426
vld1.32 {d18}, [r0,:64]! @ A[2][4]
2427
2428
vld1.32 {d11}, [r0,:64]! @ A[3][0]
2429
vld1.32 {d13}, [r0,:64]! @ A[3][1]
2430
vld1.32 {d15}, [r0,:64]! @ A[3][2]
2431
vld1.32 {d17}, [r0,:64]! @ A[3][3]
2432
vld1.32 {d19}, [r0,:64]! @ A[3][4]
2433
2434
vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..3]
2435
vld1.32 {d24}, [r0,:64] @ A[4][4]
2436
sub r0, r0, #24*8 @ rewind
2437
b .Loop_absorb_neon
2438
2439
.align 4
2440
.Loop_absorb_neon:
2441
subs r12, r5, r6 @ len - bsz
2442
blo .Labsorbed_neon
2443
mov r5, r12
2444
2445
vld1.8 {d31}, [r4]! @ endian-neutral loads...
2446
cmp r6, #8*2
2447
veor d0, d0, d31 @ A[0][0] ^= *inp++
2448
blo .Lprocess_neon
2449
vld1.8 {d31}, [r4]!
2450
veor d2, d2, d31 @ A[0][1] ^= *inp++
2451
beq .Lprocess_neon
2452
vld1.8 {d31}, [r4]!
2453
cmp r6, #8*4
2454
veor d4, d4, d31 @ A[0][2] ^= *inp++
2455
blo .Lprocess_neon
2456
vld1.8 {d31}, [r4]!
2457
veor d6, d6, d31 @ A[0][3] ^= *inp++
2458
beq .Lprocess_neon
2459
vld1.8 {d31},[r4]!
2460
cmp r6, #8*6
2461
veor d8, d8, d31 @ A[0][4] ^= *inp++
2462
blo .Lprocess_neon
2463
2464
vld1.8 {d31}, [r4]!
2465
veor d1, d1, d31 @ A[1][0] ^= *inp++
2466
beq .Lprocess_neon
2467
vld1.8 {d31}, [r4]!
2468
cmp r6, #8*8
2469
veor d3, d3, d31 @ A[1][1] ^= *inp++
2470
blo .Lprocess_neon
2471
vld1.8 {d31}, [r4]!
2472
veor d5, d5, d31 @ A[1][2] ^= *inp++
2473
beq .Lprocess_neon
2474
vld1.8 {d31}, [r4]!
2475
cmp r6, #8*10
2476
veor d7, d7, d31 @ A[1][3] ^= *inp++
2477
blo .Lprocess_neon
2478
vld1.8 {d31}, [r4]!
2479
veor d9, d9, d31 @ A[1][4] ^= *inp++
2480
beq .Lprocess_neon
2481
2482
vld1.8 {d31}, [r4]!
2483
cmp r6, #8*12
2484
veor d10, d10, d31 @ A[2][0] ^= *inp++
2485
blo .Lprocess_neon
2486
vld1.8 {d31}, [r4]!
2487
veor d12, d12, d31 @ A[2][1] ^= *inp++
2488
beq .Lprocess_neon
2489
vld1.8 {d31}, [r4]!
2490
cmp r6, #8*14
2491
veor d14, d14, d31 @ A[2][2] ^= *inp++
2492
blo .Lprocess_neon
2493
vld1.8 {d31}, [r4]!
2494
veor d16, d16, d31 @ A[2][3] ^= *inp++
2495
beq .Lprocess_neon
2496
vld1.8 {d31}, [r4]!
2497
cmp r6, #8*16
2498
veor d18, d18, d31 @ A[2][4] ^= *inp++
2499
blo .Lprocess_neon
2500
2501
vld1.8 {d31}, [r4]!
2502
veor d11, d11, d31 @ A[3][0] ^= *inp++
2503
beq .Lprocess_neon
2504
vld1.8 {d31}, [r4]!
2505
cmp r6, #8*18
2506
veor d13, d13, d31 @ A[3][1] ^= *inp++
2507
blo .Lprocess_neon
2508
vld1.8 {d31}, [r4]!
2509
veor d15, d15, d31 @ A[3][2] ^= *inp++
2510
beq .Lprocess_neon
2511
vld1.8 {d31}, [r4]!
2512
cmp r6, #8*20
2513
veor d17, d17, d31 @ A[3][3] ^= *inp++
2514
blo .Lprocess_neon
2515
vld1.8 {d31}, [r4]!
2516
veor d19, d19, d31 @ A[3][4] ^= *inp++
2517
beq .Lprocess_neon
2518
2519
vld1.8 {d31}, [r4]!
2520
cmp r6, #8*22
2521
veor d20, d20, d31 @ A[4][0] ^= *inp++
2522
blo .Lprocess_neon
2523
vld1.8 {d31}, [r4]!
2524
veor d21, d21, d31 @ A[4][1] ^= *inp++
2525
beq .Lprocess_neon
2526
vld1.8 {d31}, [r4]!
2527
cmp r6, #8*24
2528
veor d22, d22, d31 @ A[4][2] ^= *inp++
2529
blo .Lprocess_neon
2530
vld1.8 {d31}, [r4]!
2531
veor d23, d23, d31 @ A[4][3] ^= *inp++
2532
beq .Lprocess_neon
2533
vld1.8 {d31}, [r4]!
2534
veor d24, d24, d31 @ A[4][4] ^= *inp++
2535
2536
.Lprocess_neon:
2537
bl KeccakF1600_neon
2538
b .Loop_absorb_neon
2539
2540
.align 4
2541
.Labsorbed_neon:
2542
vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2543
vst1.32 {d2}, [r0,:64]!
2544
vst1.32 {d4}, [r0,:64]!
2545
vst1.32 {d6}, [r0,:64]!
2546
vst1.32 {d8}, [r0,:64]!
2547
2548
vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2549
vst1.32 {d3}, [r0,:64]!
2550
vst1.32 {d5}, [r0,:64]!
2551
vst1.32 {d7}, [r0,:64]!
2552
vst1.32 {d9}, [r0,:64]!
2553
2554
vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2555
vst1.32 {d12}, [r0,:64]!
2556
vst1.32 {d14}, [r0,:64]!
2557
vst1.32 {d16}, [r0,:64]!
2558
vst1.32 {d18}, [r0,:64]!
2559
2560
vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2561
vst1.32 {d13}, [r0,:64]!
2562
vst1.32 {d15}, [r0,:64]!
2563
vst1.32 {d17}, [r0,:64]!
2564
vst1.32 {d19}, [r0,:64]!
2565
2566
vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2567
vst1.32 {d24}, [r0,:64]
2568
2569
mov r0, r5 @ return value
2570
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2571
ldmia sp!, {r4,r5,r6,pc}
2572
.size SHA3_absorb_neon,.-SHA3_absorb_neon
2573
2574
.globl SHA3_squeeze_neon
2575
.type SHA3_squeeze_neon, %function
2576
.align 5
2577
SHA3_squeeze_neon:
2578
stmdb sp!, {r4,r5,r6,lr}
2579
2580
mov r4, r1 @ out
2581
mov r5, r2 @ len
2582
mov r6, r3 @ bsz
2583
mov r12, r0 @ A_flat
2584
mov r14, r3 @ bsz
2585
b .Loop_squeeze_neon
2586
2587
.align 4
2588
.Loop_squeeze_neon:
2589
cmp r5, #8
2590
blo .Lsqueeze_neon_tail
2591
vld1.32 {d0}, [r12]!
2592
vst1.8 {d0}, [r4]! @ endian-neutral store
2593
2594
subs r5, r5, #8 @ len -= 8
2595
beq .Lsqueeze_neon_done
2596
2597
subs r14, r14, #8 @ bsz -= 8
2598
bhi .Loop_squeeze_neon
2599
2600
vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2601
2602
vld1.32 {d0}, [r0,:64]! @ A[0][0..4]
2603
vld1.32 {d2}, [r0,:64]!
2604
vld1.32 {d4}, [r0,:64]!
2605
vld1.32 {d6}, [r0,:64]!
2606
vld1.32 {d8}, [r0,:64]!
2607
2608
vld1.32 {d1}, [r0,:64]! @ A[1][0..4]
2609
vld1.32 {d3}, [r0,:64]!
2610
vld1.32 {d5}, [r0,:64]!
2611
vld1.32 {d7}, [r0,:64]!
2612
vld1.32 {d9}, [r0,:64]!
2613
2614
vld1.32 {d10}, [r0,:64]! @ A[2][0..4]
2615
vld1.32 {d12}, [r0,:64]!
2616
vld1.32 {d14}, [r0,:64]!
2617
vld1.32 {d16}, [r0,:64]!
2618
vld1.32 {d18}, [r0,:64]!
2619
2620
vld1.32 {d11}, [r0,:64]! @ A[3][0..4]
2621
vld1.32 {d13}, [r0,:64]!
2622
vld1.32 {d15}, [r0,:64]!
2623
vld1.32 {d17}, [r0,:64]!
2624
vld1.32 {d19}, [r0,:64]!
2625
2626
vld1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2627
vld1.32 {d24}, [r0,:64]
2628
sub r0, r0, #24*8 @ rewind
2629
2630
bl KeccakF1600_neon
2631
2632
mov r12, r0 @ A_flat
2633
vst1.32 {d0}, [r0,:64]! @ A[0][0..4]
2634
vst1.32 {d2}, [r0,:64]!
2635
vst1.32 {d4}, [r0,:64]!
2636
vst1.32 {d6}, [r0,:64]!
2637
vst1.32 {d8}, [r0,:64]!
2638
2639
vst1.32 {d1}, [r0,:64]! @ A[1][0..4]
2640
vst1.32 {d3}, [r0,:64]!
2641
vst1.32 {d5}, [r0,:64]!
2642
vst1.32 {d7}, [r0,:64]!
2643
vst1.32 {d9}, [r0,:64]!
2644
2645
vst1.32 {d10}, [r0,:64]! @ A[2][0..4]
2646
vst1.32 {d12}, [r0,:64]!
2647
vst1.32 {d14}, [r0,:64]!
2648
vst1.32 {d16}, [r0,:64]!
2649
vst1.32 {d18}, [r0,:64]!
2650
2651
vst1.32 {d11}, [r0,:64]! @ A[3][0..4]
2652
vst1.32 {d13}, [r0,:64]!
2653
vst1.32 {d15}, [r0,:64]!
2654
vst1.32 {d17}, [r0,:64]!
2655
vst1.32 {d19}, [r0,:64]!
2656
2657
vst1.32 {d20,d21,d22,d23}, [r0,:64]! @ A[4][0..4]
2658
mov r14, r6 @ bsz
2659
vst1.32 {d24}, [r0,:64]
2660
mov r0, r12 @ rewind
2661
2662
vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15}
2663
b .Loop_squeeze_neon
2664
2665
.align 4
2666
.Lsqueeze_neon_tail:
2667
ldmia r12, {r2,r3}
2668
cmp r5, #2
2669
strb r2, [r4],#1 @ endian-neutral store
2670
mov r2, r2, lsr#8
2671
blo .Lsqueeze_neon_done
2672
strb r2, [r4], #1
2673
mov r2, r2, lsr#8
2674
beq .Lsqueeze_neon_done
2675
strb r2, [r4], #1
2676
mov r2, r2, lsr#8
2677
cmp r5, #4
2678
blo .Lsqueeze_neon_done
2679
strb r2, [r4], #1
2680
beq .Lsqueeze_neon_done
2681
2682
strb r3, [r4], #1
2683
mov r3, r3, lsr#8
2684
cmp r5, #6
2685
blo .Lsqueeze_neon_done
2686
strb r3, [r4], #1
2687
mov r3, r3, lsr#8
2688
beq .Lsqueeze_neon_done
2689
strb r3, [r4], #1
2690
2691
.Lsqueeze_neon_done:
2692
ldmia sp!, {r4,r5,r6,pc}
2693
.size SHA3_squeeze_neon,.-SHA3_squeeze_neon
2694
#endif
2695
.byte 75,101,99,99,97,107,45,49,54,48,48,32,97,98,115,111,114,98,32,97,110,100,32,115,113,117,101,101,122,101,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2696
.align 2
2697
.align 2
2698
2699