Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/serpent-sse2-i586-asm_32.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Serpent Cipher 4-way parallel algorithm (i586/SSE2)
4
*
5
* Copyright (C) 2011 Jussi Kivilinna <[email protected]>
6
*
7
* Based on crypto/serpent.c by
8
* Copyright (C) 2002 Dag Arne Osvik <[email protected]>
9
* 2003 Herbert Valerio Riedel <[email protected]>
10
*/
11
12
#include <linux/linkage.h>
13
14
.file "serpent-sse2-i586-asm_32.S"
15
.text
16
17
#define arg_ctx 4
18
#define arg_dst 8
19
#define arg_src 12
20
#define arg_xor 16
21
22
/**********************************************************************
23
4-way SSE2 serpent
24
**********************************************************************/
25
#define CTX %edx
26
27
#define RA %xmm0
28
#define RB %xmm1
29
#define RC %xmm2
30
#define RD %xmm3
31
#define RE %xmm4
32
33
#define RT0 %xmm5
34
#define RT1 %xmm6
35
36
#define RNOT %xmm7
37
38
#define get_key(i, j, t) \
39
movd (4*(i)+(j))*4(CTX), t; \
40
pshufd $0, t, t;
41
42
#define K(x0, x1, x2, x3, x4, i) \
43
get_key(i, 0, x4); \
44
get_key(i, 1, RT0); \
45
get_key(i, 2, RT1); \
46
pxor x4, x0; \
47
pxor RT0, x1; \
48
pxor RT1, x2; \
49
get_key(i, 3, x4); \
50
pxor x4, x3;
51
52
#define LK(x0, x1, x2, x3, x4, i) \
53
movdqa x0, x4; \
54
pslld $13, x0; \
55
psrld $(32 - 13), x4; \
56
por x4, x0; \
57
pxor x0, x1; \
58
movdqa x2, x4; \
59
pslld $3, x2; \
60
psrld $(32 - 3), x4; \
61
por x4, x2; \
62
pxor x2, x1; \
63
movdqa x1, x4; \
64
pslld $1, x1; \
65
psrld $(32 - 1), x4; \
66
por x4, x1; \
67
movdqa x0, x4; \
68
pslld $3, x4; \
69
pxor x2, x3; \
70
pxor x4, x3; \
71
movdqa x3, x4; \
72
pslld $7, x3; \
73
psrld $(32 - 7), x4; \
74
por x4, x3; \
75
movdqa x1, x4; \
76
pslld $7, x4; \
77
pxor x1, x0; \
78
pxor x3, x0; \
79
pxor x3, x2; \
80
pxor x4, x2; \
81
movdqa x0, x4; \
82
get_key(i, 1, RT0); \
83
pxor RT0, x1; \
84
get_key(i, 3, RT0); \
85
pxor RT0, x3; \
86
pslld $5, x0; \
87
psrld $(32 - 5), x4; \
88
por x4, x0; \
89
movdqa x2, x4; \
90
pslld $22, x2; \
91
psrld $(32 - 22), x4; \
92
por x4, x2; \
93
get_key(i, 0, RT0); \
94
pxor RT0, x0; \
95
get_key(i, 2, RT0); \
96
pxor RT0, x2;
97
98
#define KL(x0, x1, x2, x3, x4, i) \
99
K(x0, x1, x2, x3, x4, i); \
100
movdqa x0, x4; \
101
psrld $5, x0; \
102
pslld $(32 - 5), x4; \
103
por x4, x0; \
104
movdqa x2, x4; \
105
psrld $22, x2; \
106
pslld $(32 - 22), x4; \
107
por x4, x2; \
108
pxor x3, x2; \
109
pxor x3, x0; \
110
movdqa x1, x4; \
111
pslld $7, x4; \
112
pxor x1, x0; \
113
pxor x4, x2; \
114
movdqa x1, x4; \
115
psrld $1, x1; \
116
pslld $(32 - 1), x4; \
117
por x4, x1; \
118
movdqa x3, x4; \
119
psrld $7, x3; \
120
pslld $(32 - 7), x4; \
121
por x4, x3; \
122
pxor x0, x1; \
123
movdqa x0, x4; \
124
pslld $3, x4; \
125
pxor x4, x3; \
126
movdqa x0, x4; \
127
psrld $13, x0; \
128
pslld $(32 - 13), x4; \
129
por x4, x0; \
130
pxor x2, x1; \
131
pxor x2, x3; \
132
movdqa x2, x4; \
133
psrld $3, x2; \
134
pslld $(32 - 3), x4; \
135
por x4, x2;
136
137
#define S0(x0, x1, x2, x3, x4) \
138
movdqa x3, x4; \
139
por x0, x3; \
140
pxor x4, x0; \
141
pxor x2, x4; \
142
pxor RNOT, x4; \
143
pxor x1, x3; \
144
pand x0, x1; \
145
pxor x4, x1; \
146
pxor x0, x2; \
147
pxor x3, x0; \
148
por x0, x4; \
149
pxor x2, x0; \
150
pand x1, x2; \
151
pxor x2, x3; \
152
pxor RNOT, x1; \
153
pxor x4, x2; \
154
pxor x2, x1;
155
156
#define S1(x0, x1, x2, x3, x4) \
157
movdqa x1, x4; \
158
pxor x0, x1; \
159
pxor x3, x0; \
160
pxor RNOT, x3; \
161
pand x1, x4; \
162
por x1, x0; \
163
pxor x2, x3; \
164
pxor x3, x0; \
165
pxor x3, x1; \
166
pxor x4, x3; \
167
por x4, x1; \
168
pxor x2, x4; \
169
pand x0, x2; \
170
pxor x1, x2; \
171
por x0, x1; \
172
pxor RNOT, x0; \
173
pxor x2, x0; \
174
pxor x1, x4;
175
176
#define S2(x0, x1, x2, x3, x4) \
177
pxor RNOT, x3; \
178
pxor x0, x1; \
179
movdqa x0, x4; \
180
pand x2, x0; \
181
pxor x3, x0; \
182
por x4, x3; \
183
pxor x1, x2; \
184
pxor x1, x3; \
185
pand x0, x1; \
186
pxor x2, x0; \
187
pand x3, x2; \
188
por x1, x3; \
189
pxor RNOT, x0; \
190
pxor x0, x3; \
191
pxor x0, x4; \
192
pxor x2, x0; \
193
por x2, x1;
194
195
#define S3(x0, x1, x2, x3, x4) \
196
movdqa x1, x4; \
197
pxor x3, x1; \
198
por x0, x3; \
199
pand x0, x4; \
200
pxor x2, x0; \
201
pxor x1, x2; \
202
pand x3, x1; \
203
pxor x3, x2; \
204
por x4, x0; \
205
pxor x3, x4; \
206
pxor x0, x1; \
207
pand x3, x0; \
208
pand x4, x3; \
209
pxor x2, x3; \
210
por x1, x4; \
211
pand x1, x2; \
212
pxor x3, x4; \
213
pxor x3, x0; \
214
pxor x2, x3;
215
216
#define S4(x0, x1, x2, x3, x4) \
217
movdqa x3, x4; \
218
pand x0, x3; \
219
pxor x4, x0; \
220
pxor x2, x3; \
221
por x4, x2; \
222
pxor x1, x0; \
223
pxor x3, x4; \
224
por x0, x2; \
225
pxor x1, x2; \
226
pand x0, x1; \
227
pxor x4, x1; \
228
pand x2, x4; \
229
pxor x3, x2; \
230
pxor x0, x4; \
231
por x1, x3; \
232
pxor RNOT, x1; \
233
pxor x0, x3;
234
235
#define S5(x0, x1, x2, x3, x4) \
236
movdqa x1, x4; \
237
por x0, x1; \
238
pxor x1, x2; \
239
pxor RNOT, x3; \
240
pxor x0, x4; \
241
pxor x2, x0; \
242
pand x4, x1; \
243
por x3, x4; \
244
pxor x0, x4; \
245
pand x3, x0; \
246
pxor x3, x1; \
247
pxor x2, x3; \
248
pxor x1, x0; \
249
pand x4, x2; \
250
pxor x2, x1; \
251
pand x0, x2; \
252
pxor x2, x3;
253
254
#define S6(x0, x1, x2, x3, x4) \
255
movdqa x1, x4; \
256
pxor x0, x3; \
257
pxor x2, x1; \
258
pxor x0, x2; \
259
pand x3, x0; \
260
por x3, x1; \
261
pxor RNOT, x4; \
262
pxor x1, x0; \
263
pxor x2, x1; \
264
pxor x4, x3; \
265
pxor x0, x4; \
266
pand x0, x2; \
267
pxor x1, x4; \
268
pxor x3, x2; \
269
pand x1, x3; \
270
pxor x0, x3; \
271
pxor x2, x1;
272
273
#define S7(x0, x1, x2, x3, x4) \
274
pxor RNOT, x1; \
275
movdqa x1, x4; \
276
pxor RNOT, x0; \
277
pand x2, x1; \
278
pxor x3, x1; \
279
por x4, x3; \
280
pxor x2, x4; \
281
pxor x3, x2; \
282
pxor x0, x3; \
283
por x1, x0; \
284
pand x0, x2; \
285
pxor x4, x0; \
286
pxor x3, x4; \
287
pand x0, x3; \
288
pxor x1, x4; \
289
pxor x4, x2; \
290
pxor x1, x3; \
291
por x0, x4; \
292
pxor x1, x4;
293
294
#define SI0(x0, x1, x2, x3, x4) \
295
movdqa x3, x4; \
296
pxor x0, x1; \
297
por x1, x3; \
298
pxor x1, x4; \
299
pxor RNOT, x0; \
300
pxor x3, x2; \
301
pxor x0, x3; \
302
pand x1, x0; \
303
pxor x2, x0; \
304
pand x3, x2; \
305
pxor x4, x3; \
306
pxor x3, x2; \
307
pxor x3, x1; \
308
pand x0, x3; \
309
pxor x0, x1; \
310
pxor x2, x0; \
311
pxor x3, x4;
312
313
#define SI1(x0, x1, x2, x3, x4) \
314
pxor x3, x1; \
315
movdqa x0, x4; \
316
pxor x2, x0; \
317
pxor RNOT, x2; \
318
por x1, x4; \
319
pxor x3, x4; \
320
pand x1, x3; \
321
pxor x2, x1; \
322
pand x4, x2; \
323
pxor x1, x4; \
324
por x3, x1; \
325
pxor x0, x3; \
326
pxor x0, x2; \
327
por x4, x0; \
328
pxor x4, x2; \
329
pxor x0, x1; \
330
pxor x1, x4;
331
332
#define SI2(x0, x1, x2, x3, x4) \
333
pxor x1, x2; \
334
movdqa x3, x4; \
335
pxor RNOT, x3; \
336
por x2, x3; \
337
pxor x4, x2; \
338
pxor x0, x4; \
339
pxor x1, x3; \
340
por x2, x1; \
341
pxor x0, x2; \
342
pxor x4, x1; \
343
por x3, x4; \
344
pxor x3, x2; \
345
pxor x2, x4; \
346
pand x1, x2; \
347
pxor x3, x2; \
348
pxor x4, x3; \
349
pxor x0, x4;
350
351
#define SI3(x0, x1, x2, x3, x4) \
352
pxor x1, x2; \
353
movdqa x1, x4; \
354
pand x2, x1; \
355
pxor x0, x1; \
356
por x4, x0; \
357
pxor x3, x4; \
358
pxor x3, x0; \
359
por x1, x3; \
360
pxor x2, x1; \
361
pxor x3, x1; \
362
pxor x2, x0; \
363
pxor x3, x2; \
364
pand x1, x3; \
365
pxor x0, x1; \
366
pand x2, x0; \
367
pxor x3, x4; \
368
pxor x0, x3; \
369
pxor x1, x0;
370
371
#define SI4(x0, x1, x2, x3, x4) \
372
pxor x3, x2; \
373
movdqa x0, x4; \
374
pand x1, x0; \
375
pxor x2, x0; \
376
por x3, x2; \
377
pxor RNOT, x4; \
378
pxor x0, x1; \
379
pxor x2, x0; \
380
pand x4, x2; \
381
pxor x0, x2; \
382
por x4, x0; \
383
pxor x3, x0; \
384
pand x2, x3; \
385
pxor x3, x4; \
386
pxor x1, x3; \
387
pand x0, x1; \
388
pxor x1, x4; \
389
pxor x3, x0;
390
391
#define SI5(x0, x1, x2, x3, x4) \
392
movdqa x1, x4; \
393
por x2, x1; \
394
pxor x4, x2; \
395
pxor x3, x1; \
396
pand x4, x3; \
397
pxor x3, x2; \
398
por x0, x3; \
399
pxor RNOT, x0; \
400
pxor x2, x3; \
401
por x0, x2; \
402
pxor x1, x4; \
403
pxor x4, x2; \
404
pand x0, x4; \
405
pxor x1, x0; \
406
pxor x3, x1; \
407
pand x2, x0; \
408
pxor x3, x2; \
409
pxor x2, x0; \
410
pxor x4, x2; \
411
pxor x3, x4;
412
413
#define SI6(x0, x1, x2, x3, x4) \
414
pxor x2, x0; \
415
movdqa x0, x4; \
416
pand x3, x0; \
417
pxor x3, x2; \
418
pxor x2, x0; \
419
pxor x1, x3; \
420
por x4, x2; \
421
pxor x3, x2; \
422
pand x0, x3; \
423
pxor RNOT, x0; \
424
pxor x1, x3; \
425
pand x2, x1; \
426
pxor x0, x4; \
427
pxor x4, x3; \
428
pxor x2, x4; \
429
pxor x1, x0; \
430
pxor x0, x2;
431
432
#define SI7(x0, x1, x2, x3, x4) \
433
movdqa x3, x4; \
434
pand x0, x3; \
435
pxor x2, x0; \
436
por x4, x2; \
437
pxor x1, x4; \
438
pxor RNOT, x0; \
439
por x3, x1; \
440
pxor x0, x4; \
441
pand x2, x0; \
442
pxor x1, x0; \
443
pand x2, x1; \
444
pxor x2, x3; \
445
pxor x3, x4; \
446
pand x3, x2; \
447
por x0, x3; \
448
pxor x4, x1; \
449
pxor x4, x3; \
450
pand x0, x4; \
451
pxor x2, x4;
452
453
#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
454
movdqa x0, t2; \
455
punpckldq x1, x0; \
456
punpckhdq x1, t2; \
457
movdqa x2, t1; \
458
punpckhdq x3, x2; \
459
punpckldq x3, t1; \
460
movdqa x0, x1; \
461
punpcklqdq t1, x0; \
462
punpckhqdq t1, x1; \
463
movdqa t2, x3; \
464
punpcklqdq x2, t2; \
465
punpckhqdq x2, x3; \
466
movdqa t2, x2;
467
468
#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
469
movdqu (0*4*4)(in), x0; \
470
movdqu (1*4*4)(in), x1; \
471
movdqu (2*4*4)(in), x2; \
472
movdqu (3*4*4)(in), x3; \
473
\
474
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
475
476
#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
477
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
478
\
479
movdqu x0, (0*4*4)(out); \
480
movdqu x1, (1*4*4)(out); \
481
movdqu x2, (2*4*4)(out); \
482
movdqu x3, (3*4*4)(out);
483
484
#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
485
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
486
\
487
movdqu (0*4*4)(out), t0; \
488
pxor t0, x0; \
489
movdqu x0, (0*4*4)(out); \
490
movdqu (1*4*4)(out), t0; \
491
pxor t0, x1; \
492
movdqu x1, (1*4*4)(out); \
493
movdqu (2*4*4)(out), t0; \
494
pxor t0, x2; \
495
movdqu x2, (2*4*4)(out); \
496
movdqu (3*4*4)(out), t0; \
497
pxor t0, x3; \
498
movdqu x3, (3*4*4)(out);
499
500
SYM_FUNC_START(__serpent_enc_blk_4way)
501
/* input:
502
* arg_ctx(%esp): ctx, CTX
503
* arg_dst(%esp): dst
504
* arg_src(%esp): src
505
* arg_xor(%esp): bool, if true: xor output
506
*/
507
508
pcmpeqd RNOT, RNOT;
509
510
movl arg_ctx(%esp), CTX;
511
512
movl arg_src(%esp), %eax;
513
read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
514
515
K(RA, RB, RC, RD, RE, 0);
516
S0(RA, RB, RC, RD, RE); LK(RC, RB, RD, RA, RE, 1);
517
S1(RC, RB, RD, RA, RE); LK(RE, RD, RA, RC, RB, 2);
518
S2(RE, RD, RA, RC, RB); LK(RB, RD, RE, RC, RA, 3);
519
S3(RB, RD, RE, RC, RA); LK(RC, RA, RD, RB, RE, 4);
520
S4(RC, RA, RD, RB, RE); LK(RA, RD, RB, RE, RC, 5);
521
S5(RA, RD, RB, RE, RC); LK(RC, RA, RD, RE, RB, 6);
522
S6(RC, RA, RD, RE, RB); LK(RD, RB, RA, RE, RC, 7);
523
S7(RD, RB, RA, RE, RC); LK(RC, RA, RE, RD, RB, 8);
524
S0(RC, RA, RE, RD, RB); LK(RE, RA, RD, RC, RB, 9);
525
S1(RE, RA, RD, RC, RB); LK(RB, RD, RC, RE, RA, 10);
526
S2(RB, RD, RC, RE, RA); LK(RA, RD, RB, RE, RC, 11);
527
S3(RA, RD, RB, RE, RC); LK(RE, RC, RD, RA, RB, 12);
528
S4(RE, RC, RD, RA, RB); LK(RC, RD, RA, RB, RE, 13);
529
S5(RC, RD, RA, RB, RE); LK(RE, RC, RD, RB, RA, 14);
530
S6(RE, RC, RD, RB, RA); LK(RD, RA, RC, RB, RE, 15);
531
S7(RD, RA, RC, RB, RE); LK(RE, RC, RB, RD, RA, 16);
532
S0(RE, RC, RB, RD, RA); LK(RB, RC, RD, RE, RA, 17);
533
S1(RB, RC, RD, RE, RA); LK(RA, RD, RE, RB, RC, 18);
534
S2(RA, RD, RE, RB, RC); LK(RC, RD, RA, RB, RE, 19);
535
S3(RC, RD, RA, RB, RE); LK(RB, RE, RD, RC, RA, 20);
536
S4(RB, RE, RD, RC, RA); LK(RE, RD, RC, RA, RB, 21);
537
S5(RE, RD, RC, RA, RB); LK(RB, RE, RD, RA, RC, 22);
538
S6(RB, RE, RD, RA, RC); LK(RD, RC, RE, RA, RB, 23);
539
S7(RD, RC, RE, RA, RB); LK(RB, RE, RA, RD, RC, 24);
540
S0(RB, RE, RA, RD, RC); LK(RA, RE, RD, RB, RC, 25);
541
S1(RA, RE, RD, RB, RC); LK(RC, RD, RB, RA, RE, 26);
542
S2(RC, RD, RB, RA, RE); LK(RE, RD, RC, RA, RB, 27);
543
S3(RE, RD, RC, RA, RB); LK(RA, RB, RD, RE, RC, 28);
544
S4(RA, RB, RD, RE, RC); LK(RB, RD, RE, RC, RA, 29);
545
S5(RB, RD, RE, RC, RA); LK(RA, RB, RD, RC, RE, 30);
546
S6(RA, RB, RD, RC, RE); LK(RD, RE, RB, RC, RA, 31);
547
S7(RD, RE, RB, RC, RA); K(RA, RB, RC, RD, RE, 32);
548
549
movl arg_dst(%esp), %eax;
550
551
cmpb $0, arg_xor(%esp);
552
jnz .L__enc_xor4;
553
554
write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
555
556
RET;
557
558
.L__enc_xor4:
559
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
560
561
RET;
562
SYM_FUNC_END(__serpent_enc_blk_4way)
563
564
SYM_FUNC_START(serpent_dec_blk_4way)
565
/* input:
566
* arg_ctx(%esp): ctx, CTX
567
* arg_dst(%esp): dst
568
* arg_src(%esp): src
569
*/
570
571
pcmpeqd RNOT, RNOT;
572
573
movl arg_ctx(%esp), CTX;
574
575
movl arg_src(%esp), %eax;
576
read_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
577
578
K(RA, RB, RC, RD, RE, 32);
579
SI7(RA, RB, RC, RD, RE); KL(RB, RD, RA, RE, RC, 31);
580
SI6(RB, RD, RA, RE, RC); KL(RA, RC, RE, RB, RD, 30);
581
SI5(RA, RC, RE, RB, RD); KL(RC, RD, RA, RE, RB, 29);
582
SI4(RC, RD, RA, RE, RB); KL(RC, RA, RB, RE, RD, 28);
583
SI3(RC, RA, RB, RE, RD); KL(RB, RC, RD, RE, RA, 27);
584
SI2(RB, RC, RD, RE, RA); KL(RC, RA, RE, RD, RB, 26);
585
SI1(RC, RA, RE, RD, RB); KL(RB, RA, RE, RD, RC, 25);
586
SI0(RB, RA, RE, RD, RC); KL(RE, RC, RA, RB, RD, 24);
587
SI7(RE, RC, RA, RB, RD); KL(RC, RB, RE, RD, RA, 23);
588
SI6(RC, RB, RE, RD, RA); KL(RE, RA, RD, RC, RB, 22);
589
SI5(RE, RA, RD, RC, RB); KL(RA, RB, RE, RD, RC, 21);
590
SI4(RA, RB, RE, RD, RC); KL(RA, RE, RC, RD, RB, 20);
591
SI3(RA, RE, RC, RD, RB); KL(RC, RA, RB, RD, RE, 19);
592
SI2(RC, RA, RB, RD, RE); KL(RA, RE, RD, RB, RC, 18);
593
SI1(RA, RE, RD, RB, RC); KL(RC, RE, RD, RB, RA, 17);
594
SI0(RC, RE, RD, RB, RA); KL(RD, RA, RE, RC, RB, 16);
595
SI7(RD, RA, RE, RC, RB); KL(RA, RC, RD, RB, RE, 15);
596
SI6(RA, RC, RD, RB, RE); KL(RD, RE, RB, RA, RC, 14);
597
SI5(RD, RE, RB, RA, RC); KL(RE, RC, RD, RB, RA, 13);
598
SI4(RE, RC, RD, RB, RA); KL(RE, RD, RA, RB, RC, 12);
599
SI3(RE, RD, RA, RB, RC); KL(RA, RE, RC, RB, RD, 11);
600
SI2(RA, RE, RC, RB, RD); KL(RE, RD, RB, RC, RA, 10);
601
SI1(RE, RD, RB, RC, RA); KL(RA, RD, RB, RC, RE, 9);
602
SI0(RA, RD, RB, RC, RE); KL(RB, RE, RD, RA, RC, 8);
603
SI7(RB, RE, RD, RA, RC); KL(RE, RA, RB, RC, RD, 7);
604
SI6(RE, RA, RB, RC, RD); KL(RB, RD, RC, RE, RA, 6);
605
SI5(RB, RD, RC, RE, RA); KL(RD, RA, RB, RC, RE, 5);
606
SI4(RD, RA, RB, RC, RE); KL(RD, RB, RE, RC, RA, 4);
607
SI3(RD, RB, RE, RC, RA); KL(RE, RD, RA, RC, RB, 3);
608
SI2(RE, RD, RA, RC, RB); KL(RD, RB, RC, RA, RE, 2);
609
SI1(RD, RB, RC, RA, RE); KL(RE, RB, RC, RA, RD, 1);
610
SI0(RE, RB, RC, RA, RD); K(RC, RD, RB, RE, RA, 0);
611
612
movl arg_dst(%esp), %eax;
613
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
614
615
RET;
616
SYM_FUNC_END(serpent_dec_blk_4way)
617
618