Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/lib/crypto/s390/chacha-s390.S
26289 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*
3
* Original implementation written by Andy Polyakov, @dot-asm.
4
* This is an adaptation of the original code for kernel use.
5
*
6
* Copyright (C) 2006-2019 CRYPTOGAMS by <[email protected]>. All Rights Reserved.
7
*/
8
9
#include <linux/linkage.h>
10
#include <asm/nospec-insn.h>
11
#include <asm/fpu-insn.h>
12
13
#define SP %r15
14
#define FRAME (16 * 8 + 4 * 8)
15
16
.data
17
.balign 32
18
19
SYM_DATA_START_LOCAL(sigma)
20
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
21
.long 1,0,0,0
22
.long 2,0,0,0
23
.long 3,0,0,0
24
.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
25
26
.long 0,1,2,3
27
.long 0x61707865,0x61707865,0x61707865,0x61707865 # smashed sigma
28
.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e
29
.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32
30
.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574
31
SYM_DATA_END(sigma)
32
33
.previous
34
35
GEN_BR_THUNK %r14
36
37
.text
38
39
#############################################################################
40
# void chacha20_vx_4x(u8 *out, counst u8 *inp, size_t len,
41
# counst u32 *key, const u32 *counter)
42
43
#define OUT %r2
44
#define INP %r3
45
#define LEN %r4
46
#define KEY %r5
47
#define COUNTER %r6
48
49
#define BEPERM %v31
50
#define CTR %v26
51
52
#define K0 %v16
53
#define K1 %v17
54
#define K2 %v18
55
#define K3 %v19
56
57
#define XA0 %v0
58
#define XA1 %v1
59
#define XA2 %v2
60
#define XA3 %v3
61
62
#define XB0 %v4
63
#define XB1 %v5
64
#define XB2 %v6
65
#define XB3 %v7
66
67
#define XC0 %v8
68
#define XC1 %v9
69
#define XC2 %v10
70
#define XC3 %v11
71
72
#define XD0 %v12
73
#define XD1 %v13
74
#define XD2 %v14
75
#define XD3 %v15
76
77
#define XT0 %v27
78
#define XT1 %v28
79
#define XT2 %v29
80
#define XT3 %v30
81
82
SYM_FUNC_START(chacha20_vx_4x)
83
stmg %r6,%r7,6*8(SP)
84
85
larl %r7,sigma
86
lhi %r0,10
87
lhi %r1,0
88
89
VL K0,0,,%r7 # load sigma
90
VL K1,0,,KEY # load key
91
VL K2,16,,KEY
92
VL K3,0,,COUNTER # load counter
93
94
VL BEPERM,0x40,,%r7
95
VL CTR,0x50,,%r7
96
97
VLM XA0,XA3,0x60,%r7,4 # load [smashed] sigma
98
99
VREPF XB0,K1,0 # smash the key
100
VREPF XB1,K1,1
101
VREPF XB2,K1,2
102
VREPF XB3,K1,3
103
104
VREPF XD0,K3,0
105
VREPF XD1,K3,1
106
VREPF XD2,K3,2
107
VREPF XD3,K3,3
108
VAF XD0,XD0,CTR
109
110
VREPF XC0,K2,0
111
VREPF XC1,K2,1
112
VREPF XC2,K2,2
113
VREPF XC3,K2,3
114
115
.Loop_4x:
116
VAF XA0,XA0,XB0
117
VX XD0,XD0,XA0
118
VERLLF XD0,XD0,16
119
120
VAF XA1,XA1,XB1
121
VX XD1,XD1,XA1
122
VERLLF XD1,XD1,16
123
124
VAF XA2,XA2,XB2
125
VX XD2,XD2,XA2
126
VERLLF XD2,XD2,16
127
128
VAF XA3,XA3,XB3
129
VX XD3,XD3,XA3
130
VERLLF XD3,XD3,16
131
132
VAF XC0,XC0,XD0
133
VX XB0,XB0,XC0
134
VERLLF XB0,XB0,12
135
136
VAF XC1,XC1,XD1
137
VX XB1,XB1,XC1
138
VERLLF XB1,XB1,12
139
140
VAF XC2,XC2,XD2
141
VX XB2,XB2,XC2
142
VERLLF XB2,XB2,12
143
144
VAF XC3,XC3,XD3
145
VX XB3,XB3,XC3
146
VERLLF XB3,XB3,12
147
148
VAF XA0,XA0,XB0
149
VX XD0,XD0,XA0
150
VERLLF XD0,XD0,8
151
152
VAF XA1,XA1,XB1
153
VX XD1,XD1,XA1
154
VERLLF XD1,XD1,8
155
156
VAF XA2,XA2,XB2
157
VX XD2,XD2,XA2
158
VERLLF XD2,XD2,8
159
160
VAF XA3,XA3,XB3
161
VX XD3,XD3,XA3
162
VERLLF XD3,XD3,8
163
164
VAF XC0,XC0,XD0
165
VX XB0,XB0,XC0
166
VERLLF XB0,XB0,7
167
168
VAF XC1,XC1,XD1
169
VX XB1,XB1,XC1
170
VERLLF XB1,XB1,7
171
172
VAF XC2,XC2,XD2
173
VX XB2,XB2,XC2
174
VERLLF XB2,XB2,7
175
176
VAF XC3,XC3,XD3
177
VX XB3,XB3,XC3
178
VERLLF XB3,XB3,7
179
180
VAF XA0,XA0,XB1
181
VX XD3,XD3,XA0
182
VERLLF XD3,XD3,16
183
184
VAF XA1,XA1,XB2
185
VX XD0,XD0,XA1
186
VERLLF XD0,XD0,16
187
188
VAF XA2,XA2,XB3
189
VX XD1,XD1,XA2
190
VERLLF XD1,XD1,16
191
192
VAF XA3,XA3,XB0
193
VX XD2,XD2,XA3
194
VERLLF XD2,XD2,16
195
196
VAF XC2,XC2,XD3
197
VX XB1,XB1,XC2
198
VERLLF XB1,XB1,12
199
200
VAF XC3,XC3,XD0
201
VX XB2,XB2,XC3
202
VERLLF XB2,XB2,12
203
204
VAF XC0,XC0,XD1
205
VX XB3,XB3,XC0
206
VERLLF XB3,XB3,12
207
208
VAF XC1,XC1,XD2
209
VX XB0,XB0,XC1
210
VERLLF XB0,XB0,12
211
212
VAF XA0,XA0,XB1
213
VX XD3,XD3,XA0
214
VERLLF XD3,XD3,8
215
216
VAF XA1,XA1,XB2
217
VX XD0,XD0,XA1
218
VERLLF XD0,XD0,8
219
220
VAF XA2,XA2,XB3
221
VX XD1,XD1,XA2
222
VERLLF XD1,XD1,8
223
224
VAF XA3,XA3,XB0
225
VX XD2,XD2,XA3
226
VERLLF XD2,XD2,8
227
228
VAF XC2,XC2,XD3
229
VX XB1,XB1,XC2
230
VERLLF XB1,XB1,7
231
232
VAF XC3,XC3,XD0
233
VX XB2,XB2,XC3
234
VERLLF XB2,XB2,7
235
236
VAF XC0,XC0,XD1
237
VX XB3,XB3,XC0
238
VERLLF XB3,XB3,7
239
240
VAF XC1,XC1,XD2
241
VX XB0,XB0,XC1
242
VERLLF XB0,XB0,7
243
brct %r0,.Loop_4x
244
245
VAF XD0,XD0,CTR
246
247
VMRHF XT0,XA0,XA1 # transpose data
248
VMRHF XT1,XA2,XA3
249
VMRLF XT2,XA0,XA1
250
VMRLF XT3,XA2,XA3
251
VPDI XA0,XT0,XT1,0b0000
252
VPDI XA1,XT0,XT1,0b0101
253
VPDI XA2,XT2,XT3,0b0000
254
VPDI XA3,XT2,XT3,0b0101
255
256
VMRHF XT0,XB0,XB1
257
VMRHF XT1,XB2,XB3
258
VMRLF XT2,XB0,XB1
259
VMRLF XT3,XB2,XB3
260
VPDI XB0,XT0,XT1,0b0000
261
VPDI XB1,XT0,XT1,0b0101
262
VPDI XB2,XT2,XT3,0b0000
263
VPDI XB3,XT2,XT3,0b0101
264
265
VMRHF XT0,XC0,XC1
266
VMRHF XT1,XC2,XC3
267
VMRLF XT2,XC0,XC1
268
VMRLF XT3,XC2,XC3
269
VPDI XC0,XT0,XT1,0b0000
270
VPDI XC1,XT0,XT1,0b0101
271
VPDI XC2,XT2,XT3,0b0000
272
VPDI XC3,XT2,XT3,0b0101
273
274
VMRHF XT0,XD0,XD1
275
VMRHF XT1,XD2,XD3
276
VMRLF XT2,XD0,XD1
277
VMRLF XT3,XD2,XD3
278
VPDI XD0,XT0,XT1,0b0000
279
VPDI XD1,XT0,XT1,0b0101
280
VPDI XD2,XT2,XT3,0b0000
281
VPDI XD3,XT2,XT3,0b0101
282
283
VAF XA0,XA0,K0
284
VAF XB0,XB0,K1
285
VAF XC0,XC0,K2
286
VAF XD0,XD0,K3
287
288
VPERM XA0,XA0,XA0,BEPERM
289
VPERM XB0,XB0,XB0,BEPERM
290
VPERM XC0,XC0,XC0,BEPERM
291
VPERM XD0,XD0,XD0,BEPERM
292
293
VLM XT0,XT3,0,INP,0
294
295
VX XT0,XT0,XA0
296
VX XT1,XT1,XB0
297
VX XT2,XT2,XC0
298
VX XT3,XT3,XD0
299
300
VSTM XT0,XT3,0,OUT,0
301
302
la INP,0x40(INP)
303
la OUT,0x40(OUT)
304
aghi LEN,-0x40
305
306
VAF XA0,XA1,K0
307
VAF XB0,XB1,K1
308
VAF XC0,XC1,K2
309
VAF XD0,XD1,K3
310
311
VPERM XA0,XA0,XA0,BEPERM
312
VPERM XB0,XB0,XB0,BEPERM
313
VPERM XC0,XC0,XC0,BEPERM
314
VPERM XD0,XD0,XD0,BEPERM
315
316
clgfi LEN,0x40
317
jl .Ltail_4x
318
319
VLM XT0,XT3,0,INP,0
320
321
VX XT0,XT0,XA0
322
VX XT1,XT1,XB0
323
VX XT2,XT2,XC0
324
VX XT3,XT3,XD0
325
326
VSTM XT0,XT3,0,OUT,0
327
328
la INP,0x40(INP)
329
la OUT,0x40(OUT)
330
aghi LEN,-0x40
331
je .Ldone_4x
332
333
VAF XA0,XA2,K0
334
VAF XB0,XB2,K1
335
VAF XC0,XC2,K2
336
VAF XD0,XD2,K3
337
338
VPERM XA0,XA0,XA0,BEPERM
339
VPERM XB0,XB0,XB0,BEPERM
340
VPERM XC0,XC0,XC0,BEPERM
341
VPERM XD0,XD0,XD0,BEPERM
342
343
clgfi LEN,0x40
344
jl .Ltail_4x
345
346
VLM XT0,XT3,0,INP,0
347
348
VX XT0,XT0,XA0
349
VX XT1,XT1,XB0
350
VX XT2,XT2,XC0
351
VX XT3,XT3,XD0
352
353
VSTM XT0,XT3,0,OUT,0
354
355
la INP,0x40(INP)
356
la OUT,0x40(OUT)
357
aghi LEN,-0x40
358
je .Ldone_4x
359
360
VAF XA0,XA3,K0
361
VAF XB0,XB3,K1
362
VAF XC0,XC3,K2
363
VAF XD0,XD3,K3
364
365
VPERM XA0,XA0,XA0,BEPERM
366
VPERM XB0,XB0,XB0,BEPERM
367
VPERM XC0,XC0,XC0,BEPERM
368
VPERM XD0,XD0,XD0,BEPERM
369
370
clgfi LEN,0x40
371
jl .Ltail_4x
372
373
VLM XT0,XT3,0,INP,0
374
375
VX XT0,XT0,XA0
376
VX XT1,XT1,XB0
377
VX XT2,XT2,XC0
378
VX XT3,XT3,XD0
379
380
VSTM XT0,XT3,0,OUT,0
381
382
.Ldone_4x:
383
lmg %r6,%r7,6*8(SP)
384
BR_EX %r14
385
386
.Ltail_4x:
387
VLR XT0,XC0
388
VLR XT1,XD0
389
390
VST XA0,8*8+0x00,,SP
391
VST XB0,8*8+0x10,,SP
392
VST XT0,8*8+0x20,,SP
393
VST XT1,8*8+0x30,,SP
394
395
lghi %r1,0
396
397
.Loop_tail_4x:
398
llgc %r5,0(%r1,INP)
399
llgc %r6,8*8(%r1,SP)
400
xr %r6,%r5
401
stc %r6,0(%r1,OUT)
402
la %r1,1(%r1)
403
brct LEN,.Loop_tail_4x
404
405
lmg %r6,%r7,6*8(SP)
406
BR_EX %r14
407
SYM_FUNC_END(chacha20_vx_4x)
408
409
#undef OUT
410
#undef INP
411
#undef LEN
412
#undef KEY
413
#undef COUNTER
414
415
#undef BEPERM
416
417
#undef K0
418
#undef K1
419
#undef K2
420
#undef K3
421
422
423
#############################################################################
424
# void chacha20_vx(u8 *out, counst u8 *inp, size_t len,
425
# counst u32 *key, const u32 *counter)
426
427
#define OUT %r2
428
#define INP %r3
429
#define LEN %r4
430
#define KEY %r5
431
#define COUNTER %r6
432
433
#define BEPERM %v31
434
435
#define K0 %v27
436
#define K1 %v24
437
#define K2 %v25
438
#define K3 %v26
439
440
#define A0 %v0
441
#define B0 %v1
442
#define C0 %v2
443
#define D0 %v3
444
445
#define A1 %v4
446
#define B1 %v5
447
#define C1 %v6
448
#define D1 %v7
449
450
#define A2 %v8
451
#define B2 %v9
452
#define C2 %v10
453
#define D2 %v11
454
455
#define A3 %v12
456
#define B3 %v13
457
#define C3 %v14
458
#define D3 %v15
459
460
#define A4 %v16
461
#define B4 %v17
462
#define C4 %v18
463
#define D4 %v19
464
465
#define A5 %v20
466
#define B5 %v21
467
#define C5 %v22
468
#define D5 %v23
469
470
#define T0 %v27
471
#define T1 %v28
472
#define T2 %v29
473
#define T3 %v30
474
475
SYM_FUNC_START(chacha20_vx)
476
clgfi LEN,256
477
jle chacha20_vx_4x
478
stmg %r6,%r7,6*8(SP)
479
480
lghi %r1,-FRAME
481
lgr %r0,SP
482
la SP,0(%r1,SP)
483
stg %r0,0(SP) # back-chain
484
485
larl %r7,sigma
486
lhi %r0,10
487
488
VLM K1,K2,0,KEY,0 # load key
489
VL K3,0,,COUNTER # load counter
490
491
VLM K0,BEPERM,0,%r7,4 # load sigma, increments, ...
492
493
.Loop_outer_vx:
494
VLR A0,K0
495
VLR B0,K1
496
VLR A1,K0
497
VLR B1,K1
498
VLR A2,K0
499
VLR B2,K1
500
VLR A3,K0
501
VLR B3,K1
502
VLR A4,K0
503
VLR B4,K1
504
VLR A5,K0
505
VLR B5,K1
506
507
VLR D0,K3
508
VAF D1,K3,T1 # K[3]+1
509
VAF D2,K3,T2 # K[3]+2
510
VAF D3,K3,T3 # K[3]+3
511
VAF D4,D2,T2 # K[3]+4
512
VAF D5,D2,T3 # K[3]+5
513
514
VLR C0,K2
515
VLR C1,K2
516
VLR C2,K2
517
VLR C3,K2
518
VLR C4,K2
519
VLR C5,K2
520
521
VLR T1,D1
522
VLR T2,D2
523
VLR T3,D3
524
525
.Loop_vx:
526
VAF A0,A0,B0
527
VAF A1,A1,B1
528
VAF A2,A2,B2
529
VAF A3,A3,B3
530
VAF A4,A4,B4
531
VAF A5,A5,B5
532
VX D0,D0,A0
533
VX D1,D1,A1
534
VX D2,D2,A2
535
VX D3,D3,A3
536
VX D4,D4,A4
537
VX D5,D5,A5
538
VERLLF D0,D0,16
539
VERLLF D1,D1,16
540
VERLLF D2,D2,16
541
VERLLF D3,D3,16
542
VERLLF D4,D4,16
543
VERLLF D5,D5,16
544
545
VAF C0,C0,D0
546
VAF C1,C1,D1
547
VAF C2,C2,D2
548
VAF C3,C3,D3
549
VAF C4,C4,D4
550
VAF C5,C5,D5
551
VX B0,B0,C0
552
VX B1,B1,C1
553
VX B2,B2,C2
554
VX B3,B3,C3
555
VX B4,B4,C4
556
VX B5,B5,C5
557
VERLLF B0,B0,12
558
VERLLF B1,B1,12
559
VERLLF B2,B2,12
560
VERLLF B3,B3,12
561
VERLLF B4,B4,12
562
VERLLF B5,B5,12
563
564
VAF A0,A0,B0
565
VAF A1,A1,B1
566
VAF A2,A2,B2
567
VAF A3,A3,B3
568
VAF A4,A4,B4
569
VAF A5,A5,B5
570
VX D0,D0,A0
571
VX D1,D1,A1
572
VX D2,D2,A2
573
VX D3,D3,A3
574
VX D4,D4,A4
575
VX D5,D5,A5
576
VERLLF D0,D0,8
577
VERLLF D1,D1,8
578
VERLLF D2,D2,8
579
VERLLF D3,D3,8
580
VERLLF D4,D4,8
581
VERLLF D5,D5,8
582
583
VAF C0,C0,D0
584
VAF C1,C1,D1
585
VAF C2,C2,D2
586
VAF C3,C3,D3
587
VAF C4,C4,D4
588
VAF C5,C5,D5
589
VX B0,B0,C0
590
VX B1,B1,C1
591
VX B2,B2,C2
592
VX B3,B3,C3
593
VX B4,B4,C4
594
VX B5,B5,C5
595
VERLLF B0,B0,7
596
VERLLF B1,B1,7
597
VERLLF B2,B2,7
598
VERLLF B3,B3,7
599
VERLLF B4,B4,7
600
VERLLF B5,B5,7
601
602
VSLDB C0,C0,C0,8
603
VSLDB C1,C1,C1,8
604
VSLDB C2,C2,C2,8
605
VSLDB C3,C3,C3,8
606
VSLDB C4,C4,C4,8
607
VSLDB C5,C5,C5,8
608
VSLDB B0,B0,B0,4
609
VSLDB B1,B1,B1,4
610
VSLDB B2,B2,B2,4
611
VSLDB B3,B3,B3,4
612
VSLDB B4,B4,B4,4
613
VSLDB B5,B5,B5,4
614
VSLDB D0,D0,D0,12
615
VSLDB D1,D1,D1,12
616
VSLDB D2,D2,D2,12
617
VSLDB D3,D3,D3,12
618
VSLDB D4,D4,D4,12
619
VSLDB D5,D5,D5,12
620
621
VAF A0,A0,B0
622
VAF A1,A1,B1
623
VAF A2,A2,B2
624
VAF A3,A3,B3
625
VAF A4,A4,B4
626
VAF A5,A5,B5
627
VX D0,D0,A0
628
VX D1,D1,A1
629
VX D2,D2,A2
630
VX D3,D3,A3
631
VX D4,D4,A4
632
VX D5,D5,A5
633
VERLLF D0,D0,16
634
VERLLF D1,D1,16
635
VERLLF D2,D2,16
636
VERLLF D3,D3,16
637
VERLLF D4,D4,16
638
VERLLF D5,D5,16
639
640
VAF C0,C0,D0
641
VAF C1,C1,D1
642
VAF C2,C2,D2
643
VAF C3,C3,D3
644
VAF C4,C4,D4
645
VAF C5,C5,D5
646
VX B0,B0,C0
647
VX B1,B1,C1
648
VX B2,B2,C2
649
VX B3,B3,C3
650
VX B4,B4,C4
651
VX B5,B5,C5
652
VERLLF B0,B0,12
653
VERLLF B1,B1,12
654
VERLLF B2,B2,12
655
VERLLF B3,B3,12
656
VERLLF B4,B4,12
657
VERLLF B5,B5,12
658
659
VAF A0,A0,B0
660
VAF A1,A1,B1
661
VAF A2,A2,B2
662
VAF A3,A3,B3
663
VAF A4,A4,B4
664
VAF A5,A5,B5
665
VX D0,D0,A0
666
VX D1,D1,A1
667
VX D2,D2,A2
668
VX D3,D3,A3
669
VX D4,D4,A4
670
VX D5,D5,A5
671
VERLLF D0,D0,8
672
VERLLF D1,D1,8
673
VERLLF D2,D2,8
674
VERLLF D3,D3,8
675
VERLLF D4,D4,8
676
VERLLF D5,D5,8
677
678
VAF C0,C0,D0
679
VAF C1,C1,D1
680
VAF C2,C2,D2
681
VAF C3,C3,D3
682
VAF C4,C4,D4
683
VAF C5,C5,D5
684
VX B0,B0,C0
685
VX B1,B1,C1
686
VX B2,B2,C2
687
VX B3,B3,C3
688
VX B4,B4,C4
689
VX B5,B5,C5
690
VERLLF B0,B0,7
691
VERLLF B1,B1,7
692
VERLLF B2,B2,7
693
VERLLF B3,B3,7
694
VERLLF B4,B4,7
695
VERLLF B5,B5,7
696
697
VSLDB C0,C0,C0,8
698
VSLDB C1,C1,C1,8
699
VSLDB C2,C2,C2,8
700
VSLDB C3,C3,C3,8
701
VSLDB C4,C4,C4,8
702
VSLDB C5,C5,C5,8
703
VSLDB B0,B0,B0,12
704
VSLDB B1,B1,B1,12
705
VSLDB B2,B2,B2,12
706
VSLDB B3,B3,B3,12
707
VSLDB B4,B4,B4,12
708
VSLDB B5,B5,B5,12
709
VSLDB D0,D0,D0,4
710
VSLDB D1,D1,D1,4
711
VSLDB D2,D2,D2,4
712
VSLDB D3,D3,D3,4
713
VSLDB D4,D4,D4,4
714
VSLDB D5,D5,D5,4
715
brct %r0,.Loop_vx
716
717
VAF A0,A0,K0
718
VAF B0,B0,K1
719
VAF C0,C0,K2
720
VAF D0,D0,K3
721
VAF A1,A1,K0
722
VAF D1,D1,T1 # +K[3]+1
723
724
VPERM A0,A0,A0,BEPERM
725
VPERM B0,B0,B0,BEPERM
726
VPERM C0,C0,C0,BEPERM
727
VPERM D0,D0,D0,BEPERM
728
729
clgfi LEN,0x40
730
jl .Ltail_vx
731
732
VAF D2,D2,T2 # +K[3]+2
733
VAF D3,D3,T3 # +K[3]+3
734
VLM T0,T3,0,INP,0
735
736
VX A0,A0,T0
737
VX B0,B0,T1
738
VX C0,C0,T2
739
VX D0,D0,T3
740
741
VLM K0,T3,0,%r7,4 # re-load sigma and increments
742
743
VSTM A0,D0,0,OUT,0
744
745
la INP,0x40(INP)
746
la OUT,0x40(OUT)
747
aghi LEN,-0x40
748
je .Ldone_vx
749
750
VAF B1,B1,K1
751
VAF C1,C1,K2
752
753
VPERM A0,A1,A1,BEPERM
754
VPERM B0,B1,B1,BEPERM
755
VPERM C0,C1,C1,BEPERM
756
VPERM D0,D1,D1,BEPERM
757
758
clgfi LEN,0x40
759
jl .Ltail_vx
760
761
VLM A1,D1,0,INP,0
762
763
VX A0,A0,A1
764
VX B0,B0,B1
765
VX C0,C0,C1
766
VX D0,D0,D1
767
768
VSTM A0,D0,0,OUT,0
769
770
la INP,0x40(INP)
771
la OUT,0x40(OUT)
772
aghi LEN,-0x40
773
je .Ldone_vx
774
775
VAF A2,A2,K0
776
VAF B2,B2,K1
777
VAF C2,C2,K2
778
779
VPERM A0,A2,A2,BEPERM
780
VPERM B0,B2,B2,BEPERM
781
VPERM C0,C2,C2,BEPERM
782
VPERM D0,D2,D2,BEPERM
783
784
clgfi LEN,0x40
785
jl .Ltail_vx
786
787
VLM A1,D1,0,INP,0
788
789
VX A0,A0,A1
790
VX B0,B0,B1
791
VX C0,C0,C1
792
VX D0,D0,D1
793
794
VSTM A0,D0,0,OUT,0
795
796
la INP,0x40(INP)
797
la OUT,0x40(OUT)
798
aghi LEN,-0x40
799
je .Ldone_vx
800
801
VAF A3,A3,K0
802
VAF B3,B3,K1
803
VAF C3,C3,K2
804
VAF D2,K3,T3 # K[3]+3
805
806
VPERM A0,A3,A3,BEPERM
807
VPERM B0,B3,B3,BEPERM
808
VPERM C0,C3,C3,BEPERM
809
VPERM D0,D3,D3,BEPERM
810
811
clgfi LEN,0x40
812
jl .Ltail_vx
813
814
VAF D3,D2,T1 # K[3]+4
815
VLM A1,D1,0,INP,0
816
817
VX A0,A0,A1
818
VX B0,B0,B1
819
VX C0,C0,C1
820
VX D0,D0,D1
821
822
VSTM A0,D0,0,OUT,0
823
824
la INP,0x40(INP)
825
la OUT,0x40(OUT)
826
aghi LEN,-0x40
827
je .Ldone_vx
828
829
VAF A4,A4,K0
830
VAF B4,B4,K1
831
VAF C4,C4,K2
832
VAF D4,D4,D3 # +K[3]+4
833
VAF D3,D3,T1 # K[3]+5
834
VAF K3,D2,T3 # K[3]+=6
835
836
VPERM A0,A4,A4,BEPERM
837
VPERM B0,B4,B4,BEPERM
838
VPERM C0,C4,C4,BEPERM
839
VPERM D0,D4,D4,BEPERM
840
841
clgfi LEN,0x40
842
jl .Ltail_vx
843
844
VLM A1,D1,0,INP,0
845
846
VX A0,A0,A1
847
VX B0,B0,B1
848
VX C0,C0,C1
849
VX D0,D0,D1
850
851
VSTM A0,D0,0,OUT,0
852
853
la INP,0x40(INP)
854
la OUT,0x40(OUT)
855
aghi LEN,-0x40
856
je .Ldone_vx
857
858
VAF A5,A5,K0
859
VAF B5,B5,K1
860
VAF C5,C5,K2
861
VAF D5,D5,D3 # +K[3]+5
862
863
VPERM A0,A5,A5,BEPERM
864
VPERM B0,B5,B5,BEPERM
865
VPERM C0,C5,C5,BEPERM
866
VPERM D0,D5,D5,BEPERM
867
868
clgfi LEN,0x40
869
jl .Ltail_vx
870
871
VLM A1,D1,0,INP,0
872
873
VX A0,A0,A1
874
VX B0,B0,B1
875
VX C0,C0,C1
876
VX D0,D0,D1
877
878
VSTM A0,D0,0,OUT,0
879
880
la INP,0x40(INP)
881
la OUT,0x40(OUT)
882
lhi %r0,10
883
aghi LEN,-0x40
884
jne .Loop_outer_vx
885
886
.Ldone_vx:
887
lmg %r6,%r7,FRAME+6*8(SP)
888
la SP,FRAME(SP)
889
BR_EX %r14
890
891
.Ltail_vx:
892
VSTM A0,D0,8*8,SP,3
893
lghi %r1,0
894
895
.Loop_tail_vx:
896
llgc %r5,0(%r1,INP)
897
llgc %r6,8*8(%r1,SP)
898
xr %r6,%r5
899
stc %r6,0(%r1,OUT)
900
la %r1,1(%r1)
901
brct LEN,.Loop_tail_vx
902
903
lmg %r6,%r7,FRAME+6*8(SP)
904
la SP,FRAME(SP)
905
BR_EX %r14
906
SYM_FUNC_END(chacha20_vx)
907
908
.previous
909
910