Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/crypto/camellia-x86_64-asm_64.S
26451 views
1
/* SPDX-License-Identifier: GPL-2.0-or-later */
2
/*
3
* Camellia Cipher Algorithm (x86_64)
4
*
5
* Copyright (C) 2012 Jussi Kivilinna <[email protected]>
6
*/
7
8
#include <linux/linkage.h>
9
#include <linux/cfi_types.h>
10
11
.file "camellia-x86_64-asm_64.S"
12
.text
13
14
.extern camellia_sp10011110;
15
.extern camellia_sp22000222;
16
.extern camellia_sp03303033;
17
.extern camellia_sp00444404;
18
.extern camellia_sp02220222;
19
.extern camellia_sp30333033;
20
.extern camellia_sp44044404;
21
.extern camellia_sp11101110;
22
23
#define sp10011110 camellia_sp10011110
24
#define sp22000222 camellia_sp22000222
25
#define sp03303033 camellia_sp03303033
26
#define sp00444404 camellia_sp00444404
27
#define sp02220222 camellia_sp02220222
28
#define sp30333033 camellia_sp30333033
29
#define sp44044404 camellia_sp44044404
30
#define sp11101110 camellia_sp11101110
31
32
#define CAMELLIA_TABLE_BYTE_LEN 272
33
34
/* struct camellia_ctx: */
35
#define key_table 0
36
#define key_length CAMELLIA_TABLE_BYTE_LEN
37
38
/* register macros */
39
#define CTX %rdi
40
#define RIO %rsi
41
#define RIOd %esi
42
43
#define RAB0 %rax
44
#define RCD0 %rcx
45
#define RAB1 %rbx
46
#define RCD1 %rdx
47
48
#define RAB0d %eax
49
#define RCD0d %ecx
50
#define RAB1d %ebx
51
#define RCD1d %edx
52
53
#define RAB0bl %al
54
#define RCD0bl %cl
55
#define RAB1bl %bl
56
#define RCD1bl %dl
57
58
#define RAB0bh %ah
59
#define RCD0bh %ch
60
#define RAB1bh %bh
61
#define RCD1bh %dh
62
63
#define RT0 %rsi
64
#define RT1 %r12
65
#define RT2 %r8
66
67
#define RT0d %esi
68
#define RT1d %r12d
69
#define RT2d %r8d
70
71
#define RT2bl %r8b
72
73
#define RXOR %r9
74
#define RR12 %r10
75
#define RDST %r11
76
77
#define RXORd %r9d
78
#define RXORbl %r9b
79
80
#define xor2ror16(T0, T1, tmp1, tmp2, ab, dst) \
81
leaq T0(%rip), tmp1; \
82
movzbl ab ## bl, tmp2 ## d; \
83
xorq (tmp1, tmp2, 8), dst; \
84
leaq T1(%rip), tmp2; \
85
movzbl ab ## bh, tmp1 ## d; \
86
rorq $16, ab; \
87
xorq (tmp2, tmp1, 8), dst;
88
89
/**********************************************************************
90
1-way camellia
91
**********************************************************************/
92
#define roundsm(ab, subkey, cd) \
93
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
94
\
95
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
96
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
97
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
98
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
99
\
100
xorq RT2, cd ## 0;
101
102
#define fls(l, r, kl, kr) \
103
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
104
andl l ## 0d, RT0d; \
105
roll $1, RT0d; \
106
shlq $32, RT0; \
107
xorq RT0, l ## 0; \
108
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
109
orq r ## 0, RT1; \
110
shrq $32, RT1; \
111
xorq RT1, r ## 0; \
112
\
113
movq (key_table + ((kl) * 2) * 4)(CTX), RT2; \
114
orq l ## 0, RT2; \
115
shrq $32, RT2; \
116
xorq RT2, l ## 0; \
117
movl (key_table + ((kr) * 2) * 4)(CTX), RT0d; \
118
andl r ## 0d, RT0d; \
119
roll $1, RT0d; \
120
shlq $32, RT0; \
121
xorq RT0, r ## 0;
122
123
#define enc_rounds(i) \
124
roundsm(RAB, i + 2, RCD); \
125
roundsm(RCD, i + 3, RAB); \
126
roundsm(RAB, i + 4, RCD); \
127
roundsm(RCD, i + 5, RAB); \
128
roundsm(RAB, i + 6, RCD); \
129
roundsm(RCD, i + 7, RAB);
130
131
#define enc_fls(i) \
132
fls(RAB, RCD, i + 0, i + 1);
133
134
#define enc_inpack() \
135
movq (RIO), RAB0; \
136
bswapq RAB0; \
137
rolq $32, RAB0; \
138
movq 4*2(RIO), RCD0; \
139
bswapq RCD0; \
140
rorq $32, RCD0; \
141
xorq key_table(CTX), RAB0;
142
143
#define enc_outunpack(op, max) \
144
xorq key_table(CTX, max, 8), RCD0; \
145
rorq $32, RCD0; \
146
bswapq RCD0; \
147
op ## q RCD0, (RIO); \
148
rolq $32, RAB0; \
149
bswapq RAB0; \
150
op ## q RAB0, 4*2(RIO);
151
152
#define dec_rounds(i) \
153
roundsm(RAB, i + 7, RCD); \
154
roundsm(RCD, i + 6, RAB); \
155
roundsm(RAB, i + 5, RCD); \
156
roundsm(RCD, i + 4, RAB); \
157
roundsm(RAB, i + 3, RCD); \
158
roundsm(RCD, i + 2, RAB);
159
160
#define dec_fls(i) \
161
fls(RAB, RCD, i + 1, i + 0);
162
163
#define dec_inpack(max) \
164
movq (RIO), RAB0; \
165
bswapq RAB0; \
166
rolq $32, RAB0; \
167
movq 4*2(RIO), RCD0; \
168
bswapq RCD0; \
169
rorq $32, RCD0; \
170
xorq key_table(CTX, max, 8), RAB0;
171
172
#define dec_outunpack() \
173
xorq key_table(CTX), RCD0; \
174
rorq $32, RCD0; \
175
bswapq RCD0; \
176
movq RCD0, (RIO); \
177
rolq $32, RAB0; \
178
bswapq RAB0; \
179
movq RAB0, 4*2(RIO);
180
181
SYM_TYPED_FUNC_START(__camellia_enc_blk)
182
/* input:
183
* %rdi: ctx, CTX
184
* %rsi: dst
185
* %rdx: src
186
* %rcx: bool xor
187
*/
188
movq %r12, RR12;
189
190
movq %rcx, RXOR;
191
movq %rsi, RDST;
192
movq %rdx, RIO;
193
194
enc_inpack();
195
196
enc_rounds(0);
197
enc_fls(8);
198
enc_rounds(8);
199
enc_fls(16);
200
enc_rounds(16);
201
movl $24, RT1d; /* max */
202
203
cmpb $16, key_length(CTX);
204
je .L__enc_done;
205
206
enc_fls(24);
207
enc_rounds(24);
208
movl $32, RT1d; /* max */
209
210
.L__enc_done:
211
testb RXORbl, RXORbl;
212
movq RDST, RIO;
213
214
jnz .L__enc_xor;
215
216
enc_outunpack(mov, RT1);
217
218
movq RR12, %r12;
219
RET;
220
221
.L__enc_xor:
222
enc_outunpack(xor, RT1);
223
224
movq RR12, %r12;
225
RET;
226
SYM_FUNC_END(__camellia_enc_blk)
227
228
SYM_TYPED_FUNC_START(camellia_dec_blk)
229
/* input:
230
* %rdi: ctx, CTX
231
* %rsi: dst
232
* %rdx: src
233
*/
234
cmpl $16, key_length(CTX);
235
movl $32, RT2d;
236
movl $24, RXORd;
237
cmovel RXORd, RT2d; /* max */
238
239
movq %r12, RR12;
240
movq %rsi, RDST;
241
movq %rdx, RIO;
242
243
dec_inpack(RT2);
244
245
cmpb $24, RT2bl;
246
je .L__dec_rounds16;
247
248
dec_rounds(24);
249
dec_fls(24);
250
251
.L__dec_rounds16:
252
dec_rounds(16);
253
dec_fls(16);
254
dec_rounds(8);
255
dec_fls(8);
256
dec_rounds(0);
257
258
movq RDST, RIO;
259
260
dec_outunpack();
261
262
movq RR12, %r12;
263
RET;
264
SYM_FUNC_END(camellia_dec_blk)
265
266
/**********************************************************************
267
2-way camellia
268
**********************************************************************/
269
#define roundsm2(ab, subkey, cd) \
270
movq (key_table + ((subkey) * 2) * 4)(CTX), RT2; \
271
xorq RT2, cd ## 1; \
272
\
273
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 0, cd ## 0); \
274
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 0, RT2); \
275
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 0, cd ## 0); \
276
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 0, RT2); \
277
\
278
xor2ror16(sp00444404, sp03303033, RT0, RT1, ab ## 1, cd ## 1); \
279
xorq RT2, cd ## 0; \
280
xor2ror16(sp22000222, sp10011110, RT0, RT1, ab ## 1, cd ## 1); \
281
xor2ror16(sp11101110, sp44044404, RT0, RT1, ab ## 1, cd ## 1); \
282
xor2ror16(sp30333033, sp02220222, RT0, RT1, ab ## 1, cd ## 1);
283
284
#define fls2(l, r, kl, kr) \
285
movl (key_table + ((kl) * 2) * 4)(CTX), RT0d; \
286
andl l ## 0d, RT0d; \
287
roll $1, RT0d; \
288
shlq $32, RT0; \
289
xorq RT0, l ## 0; \
290
movq (key_table + ((kr) * 2) * 4)(CTX), RT1; \
291
orq r ## 0, RT1; \
292
shrq $32, RT1; \
293
xorq RT1, r ## 0; \
294
\
295
movl (key_table + ((kl) * 2) * 4)(CTX), RT2d; \
296
andl l ## 1d, RT2d; \
297
roll $1, RT2d; \
298
shlq $32, RT2; \
299
xorq RT2, l ## 1; \
300
movq (key_table + ((kr) * 2) * 4)(CTX), RT0; \
301
orq r ## 1, RT0; \
302
shrq $32, RT0; \
303
xorq RT0, r ## 1; \
304
\
305
movq (key_table + ((kl) * 2) * 4)(CTX), RT1; \
306
orq l ## 0, RT1; \
307
shrq $32, RT1; \
308
xorq RT1, l ## 0; \
309
movl (key_table + ((kr) * 2) * 4)(CTX), RT2d; \
310
andl r ## 0d, RT2d; \
311
roll $1, RT2d; \
312
shlq $32, RT2; \
313
xorq RT2, r ## 0; \
314
\
315
movq (key_table + ((kl) * 2) * 4)(CTX), RT0; \
316
orq l ## 1, RT0; \
317
shrq $32, RT0; \
318
xorq RT0, l ## 1; \
319
movl (key_table + ((kr) * 2) * 4)(CTX), RT1d; \
320
andl r ## 1d, RT1d; \
321
roll $1, RT1d; \
322
shlq $32, RT1; \
323
xorq RT1, r ## 1;
324
325
#define enc_rounds2(i) \
326
roundsm2(RAB, i + 2, RCD); \
327
roundsm2(RCD, i + 3, RAB); \
328
roundsm2(RAB, i + 4, RCD); \
329
roundsm2(RCD, i + 5, RAB); \
330
roundsm2(RAB, i + 6, RCD); \
331
roundsm2(RCD, i + 7, RAB);
332
333
#define enc_fls2(i) \
334
fls2(RAB, RCD, i + 0, i + 1);
335
336
#define enc_inpack2() \
337
movq (RIO), RAB0; \
338
bswapq RAB0; \
339
rorq $32, RAB0; \
340
movq 4*2(RIO), RCD0; \
341
bswapq RCD0; \
342
rolq $32, RCD0; \
343
xorq key_table(CTX), RAB0; \
344
\
345
movq 8*2(RIO), RAB1; \
346
bswapq RAB1; \
347
rorq $32, RAB1; \
348
movq 12*2(RIO), RCD1; \
349
bswapq RCD1; \
350
rolq $32, RCD1; \
351
xorq key_table(CTX), RAB1;
352
353
#define enc_outunpack2(op, max) \
354
xorq key_table(CTX, max, 8), RCD0; \
355
rolq $32, RCD0; \
356
bswapq RCD0; \
357
op ## q RCD0, (RIO); \
358
rorq $32, RAB0; \
359
bswapq RAB0; \
360
op ## q RAB0, 4*2(RIO); \
361
\
362
xorq key_table(CTX, max, 8), RCD1; \
363
rolq $32, RCD1; \
364
bswapq RCD1; \
365
op ## q RCD1, 8*2(RIO); \
366
rorq $32, RAB1; \
367
bswapq RAB1; \
368
op ## q RAB1, 12*2(RIO);
369
370
#define dec_rounds2(i) \
371
roundsm2(RAB, i + 7, RCD); \
372
roundsm2(RCD, i + 6, RAB); \
373
roundsm2(RAB, i + 5, RCD); \
374
roundsm2(RCD, i + 4, RAB); \
375
roundsm2(RAB, i + 3, RCD); \
376
roundsm2(RCD, i + 2, RAB);
377
378
#define dec_fls2(i) \
379
fls2(RAB, RCD, i + 1, i + 0);
380
381
#define dec_inpack2(max) \
382
movq (RIO), RAB0; \
383
bswapq RAB0; \
384
rorq $32, RAB0; \
385
movq 4*2(RIO), RCD0; \
386
bswapq RCD0; \
387
rolq $32, RCD0; \
388
xorq key_table(CTX, max, 8), RAB0; \
389
\
390
movq 8*2(RIO), RAB1; \
391
bswapq RAB1; \
392
rorq $32, RAB1; \
393
movq 12*2(RIO), RCD1; \
394
bswapq RCD1; \
395
rolq $32, RCD1; \
396
xorq key_table(CTX, max, 8), RAB1;
397
398
#define dec_outunpack2() \
399
xorq key_table(CTX), RCD0; \
400
rolq $32, RCD0; \
401
bswapq RCD0; \
402
movq RCD0, (RIO); \
403
rorq $32, RAB0; \
404
bswapq RAB0; \
405
movq RAB0, 4*2(RIO); \
406
\
407
xorq key_table(CTX), RCD1; \
408
rolq $32, RCD1; \
409
bswapq RCD1; \
410
movq RCD1, 8*2(RIO); \
411
rorq $32, RAB1; \
412
bswapq RAB1; \
413
movq RAB1, 12*2(RIO);
414
415
SYM_TYPED_FUNC_START(__camellia_enc_blk_2way)
416
/* input:
417
* %rdi: ctx, CTX
418
* %rsi: dst
419
* %rdx: src
420
* %rcx: bool xor
421
*/
422
pushq %rbx;
423
424
movq %r12, RR12;
425
movq %rcx, RXOR;
426
movq %rsi, RDST;
427
movq %rdx, RIO;
428
429
enc_inpack2();
430
431
enc_rounds2(0);
432
enc_fls2(8);
433
enc_rounds2(8);
434
enc_fls2(16);
435
enc_rounds2(16);
436
movl $24, RT2d; /* max */
437
438
cmpb $16, key_length(CTX);
439
je .L__enc2_done;
440
441
enc_fls2(24);
442
enc_rounds2(24);
443
movl $32, RT2d; /* max */
444
445
.L__enc2_done:
446
test RXORbl, RXORbl;
447
movq RDST, RIO;
448
jnz .L__enc2_xor;
449
450
enc_outunpack2(mov, RT2);
451
452
movq RR12, %r12;
453
popq %rbx;
454
RET;
455
456
.L__enc2_xor:
457
enc_outunpack2(xor, RT2);
458
459
movq RR12, %r12;
460
popq %rbx;
461
RET;
462
SYM_FUNC_END(__camellia_enc_blk_2way)
463
464
SYM_TYPED_FUNC_START(camellia_dec_blk_2way)
465
/* input:
466
* %rdi: ctx, CTX
467
* %rsi: dst
468
* %rdx: src
469
*/
470
cmpl $16, key_length(CTX);
471
movl $32, RT2d;
472
movl $24, RXORd;
473
cmovel RXORd, RT2d; /* max */
474
475
movq %rbx, RXOR;
476
movq %r12, RR12;
477
movq %rsi, RDST;
478
movq %rdx, RIO;
479
480
dec_inpack2(RT2);
481
482
cmpb $24, RT2bl;
483
je .L__dec2_rounds16;
484
485
dec_rounds2(24);
486
dec_fls2(24);
487
488
.L__dec2_rounds16:
489
dec_rounds2(16);
490
dec_fls2(16);
491
dec_rounds2(8);
492
dec_fls2(8);
493
dec_rounds2(0);
494
495
movq RDST, RIO;
496
497
dec_outunpack2();
498
499
movq RR12, %r12;
500
movq RXOR, %rbx;
501
RET;
502
SYM_FUNC_END(camellia_dec_blk_2way)
503
504