Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/mbedtls/library/bn_mul.h
9898 views
1
/**
2
* \file bn_mul.h
3
*
4
* \brief Multi-precision integer library
5
*/
6
/*
7
* Copyright The Mbed TLS Contributors
8
* SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
9
*/
10
/*
11
* Multiply source vector [s] with b, add result
12
* to destination vector [d] and set carry c.
13
*
14
* Currently supports:
15
*
16
* . IA-32 (386+) . AMD64 / EM64T
17
* . IA-32 (SSE2) . Motorola 68000
18
* . PowerPC, 32-bit . MicroBlaze
19
* . PowerPC, 64-bit . TriCore
20
* . SPARC v8 . ARM v3+
21
* . Alpha . MIPS32
22
* . C, longlong . C, generic
23
*/
24
#ifndef MBEDTLS_BN_MUL_H
25
#define MBEDTLS_BN_MUL_H
26
27
#include "mbedtls/build_info.h"
28
29
#include "mbedtls/bignum.h"
30
31
32
/*
33
* Conversion macros for embedded constants:
34
* build lists of mbedtls_mpi_uint's from lists of unsigned char's grouped by 8, 4 or 2
35
*/
36
#if defined(MBEDTLS_HAVE_INT32)
37
38
#define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \
39
((mbedtls_mpi_uint) (a) << 0) | \
40
((mbedtls_mpi_uint) (b) << 8) | \
41
((mbedtls_mpi_uint) (c) << 16) | \
42
((mbedtls_mpi_uint) (d) << 24)
43
44
#define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \
45
MBEDTLS_BYTES_TO_T_UINT_4(a, b, 0, 0)
46
47
#define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \
48
MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d), \
49
MBEDTLS_BYTES_TO_T_UINT_4(e, f, g, h)
50
51
#else /* 64-bits */
52
53
#define MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, e, f, g, h) \
54
((mbedtls_mpi_uint) (a) << 0) | \
55
((mbedtls_mpi_uint) (b) << 8) | \
56
((mbedtls_mpi_uint) (c) << 16) | \
57
((mbedtls_mpi_uint) (d) << 24) | \
58
((mbedtls_mpi_uint) (e) << 32) | \
59
((mbedtls_mpi_uint) (f) << 40) | \
60
((mbedtls_mpi_uint) (g) << 48) | \
61
((mbedtls_mpi_uint) (h) << 56)
62
63
#define MBEDTLS_BYTES_TO_T_UINT_4(a, b, c, d) \
64
MBEDTLS_BYTES_TO_T_UINT_8(a, b, c, d, 0, 0, 0, 0)
65
66
#define MBEDTLS_BYTES_TO_T_UINT_2(a, b) \
67
MBEDTLS_BYTES_TO_T_UINT_8(a, b, 0, 0, 0, 0, 0, 0)
68
69
#endif /* bits in mbedtls_mpi_uint */
70
71
/* *INDENT-OFF* */
72
#if defined(MBEDTLS_HAVE_ASM)
73
74
/* armcc5 --gnu defines __GNUC__ but doesn't support GNU's extended asm */
75
#if defined(__GNUC__) && \
76
( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
77
78
/*
79
* GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a
80
* fixed reserved register when building as PIC, leading to errors
81
* like: bn_mul.h:46:13: error: PIC register clobbered by 'ebx' in 'asm'
82
*
83
* This is fixed by an improved register allocator in GCC 5+. From the
84
* release notes:
85
* Register allocation improvements: Reuse of the PIC hard register,
86
* instead of using a fixed register, was implemented on x86/x86-64
87
* targets. This improves generated PIC code performance as more hard
88
* registers can be used.
89
*/
90
#if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
91
#define MULADDC_CANNOT_USE_EBX
92
#endif
93
94
/*
95
* Disable use of the i386 assembly code below if option -O0, to disable all
96
* compiler optimisations, is passed, detected with __OPTIMIZE__
97
* This is done as the number of registers used in the assembly code doesn't
98
* work with the -O0 option.
99
*/
100
#if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
101
102
#define MULADDC_X1_INIT \
103
{ mbedtls_mpi_uint t; \
104
asm( \
105
"movl %%ebx, %0 \n\t" \
106
"movl %5, %%esi \n\t" \
107
"movl %6, %%edi \n\t" \
108
"movl %7, %%ecx \n\t" \
109
"movl %8, %%ebx \n\t"
110
111
#define MULADDC_X1_CORE \
112
"lodsl \n\t" \
113
"mull %%ebx \n\t" \
114
"addl %%ecx, %%eax \n\t" \
115
"adcl $0, %%edx \n\t" \
116
"addl (%%edi), %%eax \n\t" \
117
"adcl $0, %%edx \n\t" \
118
"movl %%edx, %%ecx \n\t" \
119
"stosl \n\t"
120
121
#define MULADDC_X1_STOP \
122
"movl %4, %%ebx \n\t" \
123
"movl %%ecx, %1 \n\t" \
124
"movl %%edi, %2 \n\t" \
125
"movl %%esi, %3 \n\t" \
126
: "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
127
: "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
128
: "eax", "ebx", "ecx", "edx", "esi", "edi" \
129
); }
130
131
#if defined(MBEDTLS_HAVE_SSE2)
132
133
#define MULADDC_X8_INIT MULADDC_X1_INIT
134
135
#define MULADDC_X8_CORE \
136
"movd %%ecx, %%mm1 \n\t" \
137
"movd %%ebx, %%mm0 \n\t" \
138
"movd (%%edi), %%mm3 \n\t" \
139
"paddq %%mm3, %%mm1 \n\t" \
140
"movd (%%esi), %%mm2 \n\t" \
141
"pmuludq %%mm0, %%mm2 \n\t" \
142
"movd 4(%%esi), %%mm4 \n\t" \
143
"pmuludq %%mm0, %%mm4 \n\t" \
144
"movd 8(%%esi), %%mm6 \n\t" \
145
"pmuludq %%mm0, %%mm6 \n\t" \
146
"movd 12(%%esi), %%mm7 \n\t" \
147
"pmuludq %%mm0, %%mm7 \n\t" \
148
"paddq %%mm2, %%mm1 \n\t" \
149
"movd 4(%%edi), %%mm3 \n\t" \
150
"paddq %%mm4, %%mm3 \n\t" \
151
"movd 8(%%edi), %%mm5 \n\t" \
152
"paddq %%mm6, %%mm5 \n\t" \
153
"movd 12(%%edi), %%mm4 \n\t" \
154
"paddq %%mm4, %%mm7 \n\t" \
155
"movd %%mm1, (%%edi) \n\t" \
156
"movd 16(%%esi), %%mm2 \n\t" \
157
"pmuludq %%mm0, %%mm2 \n\t" \
158
"psrlq $32, %%mm1 \n\t" \
159
"movd 20(%%esi), %%mm4 \n\t" \
160
"pmuludq %%mm0, %%mm4 \n\t" \
161
"paddq %%mm3, %%mm1 \n\t" \
162
"movd 24(%%esi), %%mm6 \n\t" \
163
"pmuludq %%mm0, %%mm6 \n\t" \
164
"movd %%mm1, 4(%%edi) \n\t" \
165
"psrlq $32, %%mm1 \n\t" \
166
"movd 28(%%esi), %%mm3 \n\t" \
167
"pmuludq %%mm0, %%mm3 \n\t" \
168
"paddq %%mm5, %%mm1 \n\t" \
169
"movd 16(%%edi), %%mm5 \n\t" \
170
"paddq %%mm5, %%mm2 \n\t" \
171
"movd %%mm1, 8(%%edi) \n\t" \
172
"psrlq $32, %%mm1 \n\t" \
173
"paddq %%mm7, %%mm1 \n\t" \
174
"movd 20(%%edi), %%mm5 \n\t" \
175
"paddq %%mm5, %%mm4 \n\t" \
176
"movd %%mm1, 12(%%edi) \n\t" \
177
"psrlq $32, %%mm1 \n\t" \
178
"paddq %%mm2, %%mm1 \n\t" \
179
"movd 24(%%edi), %%mm5 \n\t" \
180
"paddq %%mm5, %%mm6 \n\t" \
181
"movd %%mm1, 16(%%edi) \n\t" \
182
"psrlq $32, %%mm1 \n\t" \
183
"paddq %%mm4, %%mm1 \n\t" \
184
"movd 28(%%edi), %%mm5 \n\t" \
185
"paddq %%mm5, %%mm3 \n\t" \
186
"movd %%mm1, 20(%%edi) \n\t" \
187
"psrlq $32, %%mm1 \n\t" \
188
"paddq %%mm6, %%mm1 \n\t" \
189
"movd %%mm1, 24(%%edi) \n\t" \
190
"psrlq $32, %%mm1 \n\t" \
191
"paddq %%mm3, %%mm1 \n\t" \
192
"movd %%mm1, 28(%%edi) \n\t" \
193
"addl $32, %%edi \n\t" \
194
"addl $32, %%esi \n\t" \
195
"psrlq $32, %%mm1 \n\t" \
196
"movd %%mm1, %%ecx \n\t"
197
198
#define MULADDC_X8_STOP \
199
"emms \n\t" \
200
"movl %4, %%ebx \n\t" \
201
"movl %%ecx, %1 \n\t" \
202
"movl %%edi, %2 \n\t" \
203
"movl %%esi, %3 \n\t" \
204
: "=m" (t), "=m" (c), "=m" (d), "=m" (s) \
205
: "m" (t), "m" (s), "m" (d), "m" (c), "m" (b) \
206
: "eax", "ebx", "ecx", "edx", "esi", "edi" \
207
); } \
208
209
#endif /* SSE2 */
210
211
#endif /* i386 */
212
213
#if defined(__amd64__) || defined (__x86_64__)
214
215
#define MULADDC_X1_INIT \
216
asm( \
217
"xorq %%r8, %%r8\n"
218
219
#define MULADDC_X1_CORE \
220
"movq (%%rsi), %%rax\n" \
221
"mulq %%rbx\n" \
222
"addq $8, %%rsi\n" \
223
"addq %%rcx, %%rax\n" \
224
"movq %%r8, %%rcx\n" \
225
"adcq $0, %%rdx\n" \
226
"nop \n" \
227
"addq %%rax, (%%rdi)\n" \
228
"adcq %%rdx, %%rcx\n" \
229
"addq $8, %%rdi\n"
230
231
#define MULADDC_X1_STOP \
232
: "+c" (c), "+D" (d), "+S" (s), "+m" (*(uint64_t (*)[16]) d) \
233
: "b" (b), "m" (*(const uint64_t (*)[16]) s) \
234
: "rax", "rdx", "r8" \
235
);
236
237
#endif /* AMD64 */
238
239
// The following assembly code assumes that a pointer will fit in a 64-bit register
240
// (including ILP32 __aarch64__ ABIs such as on watchOS, hence the 2^32 - 1)
241
#if defined(__aarch64__) && (UINTPTR_MAX == 0xfffffffful || UINTPTR_MAX == 0xfffffffffffffffful)
242
243
/*
244
* There are some issues around different compilers requiring different constraint
245
* syntax for updating pointers from assembly code (see notes for
246
* MBEDTLS_ASM_AARCH64_PTR_CONSTRAINT in common.h), especially on aarch64_32 (aka ILP32).
247
*
248
* For this reason we cast the pointers to/from uintptr_t here.
249
*/
250
#define MULADDC_X1_INIT \
251
do { uintptr_t muladdc_d = (uintptr_t) d, muladdc_s = (uintptr_t) s; asm(
252
253
#define MULADDC_X1_CORE \
254
"ldr x4, [%x2], #8 \n\t" \
255
"ldr x5, [%x1] \n\t" \
256
"mul x6, x4, %4 \n\t" \
257
"umulh x7, x4, %4 \n\t" \
258
"adds x5, x5, x6 \n\t" \
259
"adc x7, x7, xzr \n\t" \
260
"adds x5, x5, %0 \n\t" \
261
"adc %0, x7, xzr \n\t" \
262
"str x5, [%x1], #8 \n\t"
263
264
#define MULADDC_X1_STOP \
265
: "+r" (c), \
266
"+r" (muladdc_d), \
267
"+r" (muladdc_s), \
268
"+m" (*(uint64_t (*)[16]) d) \
269
: "r" (b), "m" (*(const uint64_t (*)[16]) s) \
270
: "x4", "x5", "x6", "x7", "cc" \
271
); d = (mbedtls_mpi_uint *)muladdc_d; s = (mbedtls_mpi_uint *)muladdc_s; } while (0);
272
273
#endif /* Aarch64 */
274
275
#if defined(__mc68020__) || defined(__mcpu32__)
276
277
#define MULADDC_X1_INIT \
278
asm( \
279
"movl %3, %%a2 \n\t" \
280
"movl %4, %%a3 \n\t" \
281
"movl %5, %%d3 \n\t" \
282
"movl %6, %%d2 \n\t" \
283
"moveq #0, %%d0 \n\t"
284
285
#define MULADDC_X1_CORE \
286
"movel %%a2@+, %%d1 \n\t" \
287
"mulul %%d2, %%d4:%%d1 \n\t" \
288
"addl %%d3, %%d1 \n\t" \
289
"addxl %%d0, %%d4 \n\t" \
290
"moveq #0, %%d3 \n\t" \
291
"addl %%d1, %%a3@+ \n\t" \
292
"addxl %%d4, %%d3 \n\t"
293
294
#define MULADDC_X1_STOP \
295
"movl %%d3, %0 \n\t" \
296
"movl %%a3, %1 \n\t" \
297
"movl %%a2, %2 \n\t" \
298
: "=m" (c), "=m" (d), "=m" (s) \
299
: "m" (s), "m" (d), "m" (c), "m" (b) \
300
: "d0", "d1", "d2", "d3", "d4", "a2", "a3" \
301
);
302
303
#define MULADDC_X8_INIT MULADDC_X1_INIT
304
305
#define MULADDC_X8_CORE \
306
"movel %%a2@+, %%d1 \n\t" \
307
"mulul %%d2, %%d4:%%d1 \n\t" \
308
"addxl %%d3, %%d1 \n\t" \
309
"addxl %%d0, %%d4 \n\t" \
310
"addl %%d1, %%a3@+ \n\t" \
311
"movel %%a2@+, %%d1 \n\t" \
312
"mulul %%d2, %%d3:%%d1 \n\t" \
313
"addxl %%d4, %%d1 \n\t" \
314
"addxl %%d0, %%d3 \n\t" \
315
"addl %%d1, %%a3@+ \n\t" \
316
"movel %%a2@+, %%d1 \n\t" \
317
"mulul %%d2, %%d4:%%d1 \n\t" \
318
"addxl %%d3, %%d1 \n\t" \
319
"addxl %%d0, %%d4 \n\t" \
320
"addl %%d1, %%a3@+ \n\t" \
321
"movel %%a2@+, %%d1 \n\t" \
322
"mulul %%d2, %%d3:%%d1 \n\t" \
323
"addxl %%d4, %%d1 \n\t" \
324
"addxl %%d0, %%d3 \n\t" \
325
"addl %%d1, %%a3@+ \n\t" \
326
"movel %%a2@+, %%d1 \n\t" \
327
"mulul %%d2, %%d4:%%d1 \n\t" \
328
"addxl %%d3, %%d1 \n\t" \
329
"addxl %%d0, %%d4 \n\t" \
330
"addl %%d1, %%a3@+ \n\t" \
331
"movel %%a2@+, %%d1 \n\t" \
332
"mulul %%d2, %%d3:%%d1 \n\t" \
333
"addxl %%d4, %%d1 \n\t" \
334
"addxl %%d0, %%d3 \n\t" \
335
"addl %%d1, %%a3@+ \n\t" \
336
"movel %%a2@+, %%d1 \n\t" \
337
"mulul %%d2, %%d4:%%d1 \n\t" \
338
"addxl %%d3, %%d1 \n\t" \
339
"addxl %%d0, %%d4 \n\t" \
340
"addl %%d1, %%a3@+ \n\t" \
341
"movel %%a2@+, %%d1 \n\t" \
342
"mulul %%d2, %%d3:%%d1 \n\t" \
343
"addxl %%d4, %%d1 \n\t" \
344
"addxl %%d0, %%d3 \n\t" \
345
"addl %%d1, %%a3@+ \n\t" \
346
"addxl %%d0, %%d3 \n\t"
347
348
#define MULADDC_X8_STOP MULADDC_X1_STOP
349
350
#endif /* MC68000 */
351
352
#if defined(__powerpc64__) || defined(__ppc64__)
353
354
#if defined(__MACH__) && defined(__APPLE__)
355
356
#define MULADDC_X1_INIT \
357
asm( \
358
"ld r3, %3 \n\t" \
359
"ld r4, %4 \n\t" \
360
"ld r5, %5 \n\t" \
361
"ld r6, %6 \n\t" \
362
"addi r3, r3, -8 \n\t" \
363
"addi r4, r4, -8 \n\t" \
364
"addic r5, r5, 0 \n\t"
365
366
#define MULADDC_X1_CORE \
367
"ldu r7, 8(r3) \n\t" \
368
"mulld r8, r7, r6 \n\t" \
369
"mulhdu r9, r7, r6 \n\t" \
370
"adde r8, r8, r5 \n\t" \
371
"ld r7, 8(r4) \n\t" \
372
"addze r5, r9 \n\t" \
373
"addc r8, r8, r7 \n\t" \
374
"stdu r8, 8(r4) \n\t"
375
376
#define MULADDC_X1_STOP \
377
"addze r5, r5 \n\t" \
378
"addi r4, r4, 8 \n\t" \
379
"addi r3, r3, 8 \n\t" \
380
"std r5, %0 \n\t" \
381
"std r4, %1 \n\t" \
382
"std r3, %2 \n\t" \
383
: "=m" (c), "=m" (d), "=m" (s) \
384
: "m" (s), "m" (d), "m" (c), "m" (b) \
385
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
386
);
387
388
389
#else /* __MACH__ && __APPLE__ */
390
391
#define MULADDC_X1_INIT \
392
asm( \
393
"ld %%r3, %3 \n\t" \
394
"ld %%r4, %4 \n\t" \
395
"ld %%r5, %5 \n\t" \
396
"ld %%r6, %6 \n\t" \
397
"addi %%r3, %%r3, -8 \n\t" \
398
"addi %%r4, %%r4, -8 \n\t" \
399
"addic %%r5, %%r5, 0 \n\t"
400
401
#define MULADDC_X1_CORE \
402
"ldu %%r7, 8(%%r3) \n\t" \
403
"mulld %%r8, %%r7, %%r6 \n\t" \
404
"mulhdu %%r9, %%r7, %%r6 \n\t" \
405
"adde %%r8, %%r8, %%r5 \n\t" \
406
"ld %%r7, 8(%%r4) \n\t" \
407
"addze %%r5, %%r9 \n\t" \
408
"addc %%r8, %%r8, %%r7 \n\t" \
409
"stdu %%r8, 8(%%r4) \n\t"
410
411
#define MULADDC_X1_STOP \
412
"addze %%r5, %%r5 \n\t" \
413
"addi %%r4, %%r4, 8 \n\t" \
414
"addi %%r3, %%r3, 8 \n\t" \
415
"std %%r5, %0 \n\t" \
416
"std %%r4, %1 \n\t" \
417
"std %%r3, %2 \n\t" \
418
: "=m" (c), "=m" (d), "=m" (s) \
419
: "m" (s), "m" (d), "m" (c), "m" (b) \
420
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
421
);
422
423
#endif /* __MACH__ && __APPLE__ */
424
425
#elif defined(__powerpc__) || defined(__ppc__) /* end PPC64/begin PPC32 */
426
427
#if defined(__MACH__) && defined(__APPLE__)
428
429
#define MULADDC_X1_INIT \
430
asm( \
431
"lwz r3, %3 \n\t" \
432
"lwz r4, %4 \n\t" \
433
"lwz r5, %5 \n\t" \
434
"lwz r6, %6 \n\t" \
435
"addi r3, r3, -4 \n\t" \
436
"addi r4, r4, -4 \n\t" \
437
"addic r5, r5, 0 \n\t"
438
439
#define MULADDC_X1_CORE \
440
"lwzu r7, 4(r3) \n\t" \
441
"mullw r8, r7, r6 \n\t" \
442
"mulhwu r9, r7, r6 \n\t" \
443
"adde r8, r8, r5 \n\t" \
444
"lwz r7, 4(r4) \n\t" \
445
"addze r5, r9 \n\t" \
446
"addc r8, r8, r7 \n\t" \
447
"stwu r8, 4(r4) \n\t"
448
449
#define MULADDC_X1_STOP \
450
"addze r5, r5 \n\t" \
451
"addi r4, r4, 4 \n\t" \
452
"addi r3, r3, 4 \n\t" \
453
"stw r5, %0 \n\t" \
454
"stw r4, %1 \n\t" \
455
"stw r3, %2 \n\t" \
456
: "=m" (c), "=m" (d), "=m" (s) \
457
: "m" (s), "m" (d), "m" (c), "m" (b) \
458
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
459
);
460
461
#else /* __MACH__ && __APPLE__ */
462
463
#define MULADDC_X1_INIT \
464
asm( \
465
"lwz %%r3, %3 \n\t" \
466
"lwz %%r4, %4 \n\t" \
467
"lwz %%r5, %5 \n\t" \
468
"lwz %%r6, %6 \n\t" \
469
"addi %%r3, %%r3, -4 \n\t" \
470
"addi %%r4, %%r4, -4 \n\t" \
471
"addic %%r5, %%r5, 0 \n\t"
472
473
#define MULADDC_X1_CORE \
474
"lwzu %%r7, 4(%%r3) \n\t" \
475
"mullw %%r8, %%r7, %%r6 \n\t" \
476
"mulhwu %%r9, %%r7, %%r6 \n\t" \
477
"adde %%r8, %%r8, %%r5 \n\t" \
478
"lwz %%r7, 4(%%r4) \n\t" \
479
"addze %%r5, %%r9 \n\t" \
480
"addc %%r8, %%r8, %%r7 \n\t" \
481
"stwu %%r8, 4(%%r4) \n\t"
482
483
#define MULADDC_X1_STOP \
484
"addze %%r5, %%r5 \n\t" \
485
"addi %%r4, %%r4, 4 \n\t" \
486
"addi %%r3, %%r3, 4 \n\t" \
487
"stw %%r5, %0 \n\t" \
488
"stw %%r4, %1 \n\t" \
489
"stw %%r3, %2 \n\t" \
490
: "=m" (c), "=m" (d), "=m" (s) \
491
: "m" (s), "m" (d), "m" (c), "m" (b) \
492
: "r3", "r4", "r5", "r6", "r7", "r8", "r9" \
493
);
494
495
#endif /* __MACH__ && __APPLE__ */
496
497
#endif /* PPC32 */
498
499
/*
500
* The Sparc(64) assembly is reported to be broken.
501
* Disable it for now, until we're able to fix it.
502
*/
503
#if 0 && defined(__sparc__)
504
#if defined(__sparc64__)
505
506
#define MULADDC_X1_INIT \
507
asm( \
508
"ldx %3, %%o0 \n\t" \
509
"ldx %4, %%o1 \n\t" \
510
"ld %5, %%o2 \n\t" \
511
"ld %6, %%o3 \n\t"
512
513
#define MULADDC_X1_CORE \
514
"ld [%%o0], %%o4 \n\t" \
515
"inc 4, %%o0 \n\t" \
516
"ld [%%o1], %%o5 \n\t" \
517
"umul %%o3, %%o4, %%o4 \n\t" \
518
"addcc %%o4, %%o2, %%o4 \n\t" \
519
"rd %%y, %%g1 \n\t" \
520
"addx %%g1, 0, %%g1 \n\t" \
521
"addcc %%o4, %%o5, %%o4 \n\t" \
522
"st %%o4, [%%o1] \n\t" \
523
"addx %%g1, 0, %%o2 \n\t" \
524
"inc 4, %%o1 \n\t"
525
526
#define MULADDC_X1_STOP \
527
"st %%o2, %0 \n\t" \
528
"stx %%o1, %1 \n\t" \
529
"stx %%o0, %2 \n\t" \
530
: "=m" (c), "=m" (d), "=m" (s) \
531
: "m" (s), "m" (d), "m" (c), "m" (b) \
532
: "g1", "o0", "o1", "o2", "o3", "o4", \
533
"o5" \
534
);
535
536
#else /* __sparc64__ */
537
538
#define MULADDC_X1_INIT \
539
asm( \
540
"ld %3, %%o0 \n\t" \
541
"ld %4, %%o1 \n\t" \
542
"ld %5, %%o2 \n\t" \
543
"ld %6, %%o3 \n\t"
544
545
#define MULADDC_X1_CORE \
546
"ld [%%o0], %%o4 \n\t" \
547
"inc 4, %%o0 \n\t" \
548
"ld [%%o1], %%o5 \n\t" \
549
"umul %%o3, %%o4, %%o4 \n\t" \
550
"addcc %%o4, %%o2, %%o4 \n\t" \
551
"rd %%y, %%g1 \n\t" \
552
"addx %%g1, 0, %%g1 \n\t" \
553
"addcc %%o4, %%o5, %%o4 \n\t" \
554
"st %%o4, [%%o1] \n\t" \
555
"addx %%g1, 0, %%o2 \n\t" \
556
"inc 4, %%o1 \n\t"
557
558
#define MULADDC_X1_STOP \
559
"st %%o2, %0 \n\t" \
560
"st %%o1, %1 \n\t" \
561
"st %%o0, %2 \n\t" \
562
: "=m" (c), "=m" (d), "=m" (s) \
563
: "m" (s), "m" (d), "m" (c), "m" (b) \
564
: "g1", "o0", "o1", "o2", "o3", "o4", \
565
"o5" \
566
);
567
568
#endif /* __sparc64__ */
569
#endif /* __sparc__ */
570
571
#if defined(__microblaze__) || defined(microblaze)
572
573
#define MULADDC_X1_INIT \
574
asm( \
575
"lwi r3, %3 \n\t" \
576
"lwi r4, %4 \n\t" \
577
"lwi r5, %5 \n\t" \
578
"lwi r6, %6 \n\t" \
579
"andi r7, r6, 0xffff \n\t" \
580
"bsrli r6, r6, 16 \n\t"
581
582
#if(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
583
#define MULADDC_LHUI \
584
"lhui r9, r3, 0 \n\t" \
585
"addi r3, r3, 2 \n\t" \
586
"lhui r8, r3, 0 \n\t"
587
#else
588
#define MULADDC_LHUI \
589
"lhui r8, r3, 0 \n\t" \
590
"addi r3, r3, 2 \n\t" \
591
"lhui r9, r3, 0 \n\t"
592
#endif
593
594
#define MULADDC_X1_CORE \
595
MULADDC_LHUI \
596
"addi r3, r3, 2 \n\t" \
597
"mul r10, r9, r6 \n\t" \
598
"mul r11, r8, r7 \n\t" \
599
"mul r12, r9, r7 \n\t" \
600
"mul r13, r8, r6 \n\t" \
601
"bsrli r8, r10, 16 \n\t" \
602
"bsrli r9, r11, 16 \n\t" \
603
"add r13, r13, r8 \n\t" \
604
"add r13, r13, r9 \n\t" \
605
"bslli r10, r10, 16 \n\t" \
606
"bslli r11, r11, 16 \n\t" \
607
"add r12, r12, r10 \n\t" \
608
"addc r13, r13, r0 \n\t" \
609
"add r12, r12, r11 \n\t" \
610
"addc r13, r13, r0 \n\t" \
611
"lwi r10, r4, 0 \n\t" \
612
"add r12, r12, r10 \n\t" \
613
"addc r13, r13, r0 \n\t" \
614
"add r12, r12, r5 \n\t" \
615
"addc r5, r13, r0 \n\t" \
616
"swi r12, r4, 0 \n\t" \
617
"addi r4, r4, 4 \n\t"
618
619
#define MULADDC_X1_STOP \
620
"swi r5, %0 \n\t" \
621
"swi r4, %1 \n\t" \
622
"swi r3, %2 \n\t" \
623
: "=m" (c), "=m" (d), "=m" (s) \
624
: "m" (s), "m" (d), "m" (c), "m" (b) \
625
: "r3", "r4", "r5", "r6", "r7", "r8", \
626
"r9", "r10", "r11", "r12", "r13" \
627
);
628
629
#endif /* MicroBlaze */
630
631
#if defined(__tricore__)
632
633
#define MULADDC_X1_INIT \
634
asm( \
635
"ld.a %%a2, %3 \n\t" \
636
"ld.a %%a3, %4 \n\t" \
637
"ld.w %%d4, %5 \n\t" \
638
"ld.w %%d1, %6 \n\t" \
639
"xor %%d5, %%d5 \n\t"
640
641
#define MULADDC_X1_CORE \
642
"ld.w %%d0, [%%a2+] \n\t" \
643
"madd.u %%e2, %%e4, %%d0, %%d1 \n\t" \
644
"ld.w %%d0, [%%a3] \n\t" \
645
"addx %%d2, %%d2, %%d0 \n\t" \
646
"addc %%d3, %%d3, 0 \n\t" \
647
"mov %%d4, %%d3 \n\t" \
648
"st.w [%%a3+], %%d2 \n\t"
649
650
#define MULADDC_X1_STOP \
651
"st.w %0, %%d4 \n\t" \
652
"st.a %1, %%a3 \n\t" \
653
"st.a %2, %%a2 \n\t" \
654
: "=m" (c), "=m" (d), "=m" (s) \
655
: "m" (s), "m" (d), "m" (c), "m" (b) \
656
: "d0", "d1", "e2", "d4", "a2", "a3" \
657
);
658
659
#endif /* TriCore */
660
661
#if defined(__arm__)
662
663
#if defined(__thumb__) && !defined(__thumb2__)
664
#if defined(MBEDTLS_COMPILER_IS_GCC)
665
/*
666
* Thumb 1 ISA. This code path has only been tested successfully on gcc;
667
* it does not compile on clang or armclang.
668
*/
669
670
#if !defined(__OPTIMIZE__) && defined(__GNUC__)
671
/*
672
* Note, gcc -O0 by default uses r7 for the frame pointer, so it complains about
673
* our use of r7 below, unless -fomit-frame-pointer is passed.
674
*
675
* On the other hand, -fomit-frame-pointer is implied by any -Ox options with
676
* x !=0, which we can detect using __OPTIMIZE__ (which is also defined by
677
* clang and armcc5 under the same conditions).
678
*
679
* If gcc needs to use r7, we use r1 as a scratch register and have a few extra
680
* instructions to preserve/restore it; otherwise, we can use r7 and avoid
681
* the preserve/restore overhead.
682
*/
683
#define MULADDC_SCRATCH "RS .req r1 \n\t"
684
#define MULADDC_PRESERVE_SCRATCH "mov r10, r1 \n\t"
685
#define MULADDC_RESTORE_SCRATCH "mov r1, r10 \n\t"
686
#define MULADDC_SCRATCH_CLOBBER "r10"
687
#else /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
688
#define MULADDC_SCRATCH "RS .req r7 \n\t"
689
#define MULADDC_PRESERVE_SCRATCH ""
690
#define MULADDC_RESTORE_SCRATCH ""
691
#define MULADDC_SCRATCH_CLOBBER "r7"
692
#endif /* !defined(__OPTIMIZE__) && defined(__GNUC__) */
693
694
#define MULADDC_X1_INIT \
695
asm( \
696
MULADDC_SCRATCH \
697
"ldr r0, %3 \n\t" \
698
"ldr r1, %4 \n\t" \
699
"ldr r2, %5 \n\t" \
700
"ldr r3, %6 \n\t" \
701
"lsr r4, r3, #16 \n\t" \
702
"mov r9, r4 \n\t" \
703
"lsl r4, r3, #16 \n\t" \
704
"lsr r4, r4, #16 \n\t" \
705
"mov r8, r4 \n\t" \
706
707
708
#define MULADDC_X1_CORE \
709
MULADDC_PRESERVE_SCRATCH \
710
"ldmia r0!, {r6} \n\t" \
711
"lsr RS, r6, #16 \n\t" \
712
"lsl r6, r6, #16 \n\t" \
713
"lsr r6, r6, #16 \n\t" \
714
"mov r4, r8 \n\t" \
715
"mul r4, r6 \n\t" \
716
"mov r3, r9 \n\t" \
717
"mul r6, r3 \n\t" \
718
"mov r5, r9 \n\t" \
719
"mul r5, RS \n\t" \
720
"mov r3, r8 \n\t" \
721
"mul RS, r3 \n\t" \
722
"lsr r3, r6, #16 \n\t" \
723
"add r5, r5, r3 \n\t" \
724
"lsr r3, RS, #16 \n\t" \
725
"add r5, r5, r3 \n\t" \
726
"add r4, r4, r2 \n\t" \
727
"mov r2, #0 \n\t" \
728
"adc r5, r2 \n\t" \
729
"lsl r3, r6, #16 \n\t" \
730
"add r4, r4, r3 \n\t" \
731
"adc r5, r2 \n\t" \
732
"lsl r3, RS, #16 \n\t" \
733
"add r4, r4, r3 \n\t" \
734
"adc r5, r2 \n\t" \
735
MULADDC_RESTORE_SCRATCH \
736
"ldr r3, [r1] \n\t" \
737
"add r4, r4, r3 \n\t" \
738
"adc r2, r5 \n\t" \
739
"stmia r1!, {r4} \n\t"
740
741
#define MULADDC_X1_STOP \
742
"str r2, %0 \n\t" \
743
"str r1, %1 \n\t" \
744
"str r0, %2 \n\t" \
745
: "=m" (c), "=m" (d), "=m" (s) \
746
: "m" (s), "m" (d), "m" (c), "m" (b) \
747
: "r0", "r1", "r2", "r3", "r4", "r5", \
748
"r6", MULADDC_SCRATCH_CLOBBER, "r8", "r9", "cc" \
749
);
750
#endif /* !defined(__ARMCC_VERSION) && !defined(__clang__) */
751
752
#elif (__ARM_ARCH >= 6) && \
753
defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)
754
/* Armv6-M (or later) with DSP Instruction Set Extensions.
755
* Requires support for either Thumb 2 or Arm ISA.
756
*/
757
758
#define MULADDC_X1_INIT \
759
{ \
760
mbedtls_mpi_uint tmp_a, tmp_b; \
761
asm volatile (
762
763
#define MULADDC_X1_CORE \
764
".p2align 2 \n\t" \
765
"ldr %[a], [%[in]], #4 \n\t" \
766
"ldr %[b], [%[acc]] \n\t" \
767
"umaal %[b], %[carry], %[scalar], %[a] \n\t" \
768
"str %[b], [%[acc]], #4 \n\t"
769
770
#define MULADDC_X1_STOP \
771
: [a] "=&r" (tmp_a), \
772
[b] "=&r" (tmp_b), \
773
[in] "+r" (s), \
774
[acc] "+r" (d), \
775
[carry] "+l" (c) \
776
: [scalar] "r" (b) \
777
: "memory" \
778
); \
779
}
780
781
#define MULADDC_X2_INIT \
782
{ \
783
mbedtls_mpi_uint tmp_a0, tmp_b0; \
784
mbedtls_mpi_uint tmp_a1, tmp_b1; \
785
asm volatile (
786
787
/* - Make sure loop is 4-byte aligned to avoid stalls
788
* upon repeated non-word aligned instructions in
789
* some microarchitectures.
790
* - Don't use ldm with post-increment or back-to-back
791
* loads with post-increment and same address register
792
* to avoid stalls on some microarchitectures.
793
* - Bunch loads and stores to reduce latency on some
794
* microarchitectures. E.g., on Cortex-M4, the first
795
* in a series of load/store operations has latency
796
* 2 cycles, while subsequent loads/stores are single-cycle. */
797
#define MULADDC_X2_CORE \
798
".p2align 2 \n\t" \
799
"ldr %[a0], [%[in]], #+8 \n\t" \
800
"ldr %[b0], [%[acc]], #+8 \n\t" \
801
"ldr %[a1], [%[in], #-4] \n\t" \
802
"ldr %[b1], [%[acc], #-4] \n\t" \
803
"umaal %[b0], %[carry], %[scalar], %[a0] \n\t" \
804
"umaal %[b1], %[carry], %[scalar], %[a1] \n\t" \
805
"str %[b0], [%[acc], #-8] \n\t" \
806
"str %[b1], [%[acc], #-4] \n\t"
807
808
#define MULADDC_X2_STOP \
809
: [a0] "=&r" (tmp_a0), \
810
[b0] "=&r" (tmp_b0), \
811
[a1] "=&r" (tmp_a1), \
812
[b1] "=&r" (tmp_b1), \
813
[in] "+r" (s), \
814
[acc] "+r" (d), \
815
[carry] "+l" (c) \
816
: [scalar] "r" (b) \
817
: "memory" \
818
); \
819
}
820
821
#else /* Thumb 2 or Arm ISA, without DSP extensions */
822
823
#define MULADDC_X1_INIT \
824
asm( \
825
"ldr r0, %3 \n\t" \
826
"ldr r1, %4 \n\t" \
827
"ldr r2, %5 \n\t" \
828
"ldr r3, %6 \n\t"
829
830
#define MULADDC_X1_CORE \
831
"ldr r4, [r0], #4 \n\t" \
832
"mov r5, #0 \n\t" \
833
"ldr r6, [r1] \n\t" \
834
"umlal r2, r5, r3, r4 \n\t" \
835
"adds r4, r6, r2 \n\t" \
836
"adc r2, r5, #0 \n\t" \
837
"str r4, [r1], #4 \n\t"
838
839
#define MULADDC_X1_STOP \
840
"str r2, %0 \n\t" \
841
"str r1, %1 \n\t" \
842
"str r0, %2 \n\t" \
843
: "=m" (c), "=m" (d), "=m" (s) \
844
: "m" (s), "m" (d), "m" (c), "m" (b) \
845
: "r0", "r1", "r2", "r3", "r4", "r5", \
846
"r6", "cc" \
847
);
848
849
#endif /* ISA codepath selection */
850
851
#endif /* defined(__arm__) */
852
853
#if defined(__alpha__)
854
855
#define MULADDC_X1_INIT \
856
asm( \
857
"ldq $1, %3 \n\t" \
858
"ldq $2, %4 \n\t" \
859
"ldq $3, %5 \n\t" \
860
"ldq $4, %6 \n\t"
861
862
#define MULADDC_X1_CORE \
863
"ldq $6, 0($1) \n\t" \
864
"addq $1, 8, $1 \n\t" \
865
"mulq $6, $4, $7 \n\t" \
866
"umulh $6, $4, $6 \n\t" \
867
"addq $7, $3, $7 \n\t" \
868
"cmpult $7, $3, $3 \n\t" \
869
"ldq $5, 0($2) \n\t" \
870
"addq $7, $5, $7 \n\t" \
871
"cmpult $7, $5, $5 \n\t" \
872
"stq $7, 0($2) \n\t" \
873
"addq $2, 8, $2 \n\t" \
874
"addq $6, $3, $3 \n\t" \
875
"addq $5, $3, $3 \n\t"
876
877
#define MULADDC_X1_STOP \
878
"stq $3, %0 \n\t" \
879
"stq $2, %1 \n\t" \
880
"stq $1, %2 \n\t" \
881
: "=m" (c), "=m" (d), "=m" (s) \
882
: "m" (s), "m" (d), "m" (c), "m" (b) \
883
: "$1", "$2", "$3", "$4", "$5", "$6", "$7" \
884
);
885
#endif /* Alpha */
886
887
#if defined(__mips__) && !defined(__mips64)
888
889
#define MULADDC_X1_INIT \
890
asm( \
891
"lw $10, %3 \n\t" \
892
"lw $11, %4 \n\t" \
893
"lw $12, %5 \n\t" \
894
"lw $13, %6 \n\t"
895
896
#define MULADDC_X1_CORE \
897
"lw $14, 0($10) \n\t" \
898
"multu $13, $14 \n\t" \
899
"addi $10, $10, 4 \n\t" \
900
"mflo $14 \n\t" \
901
"mfhi $9 \n\t" \
902
"addu $14, $12, $14 \n\t" \
903
"lw $15, 0($11) \n\t" \
904
"sltu $12, $14, $12 \n\t" \
905
"addu $15, $14, $15 \n\t" \
906
"sltu $14, $15, $14 \n\t" \
907
"addu $12, $12, $9 \n\t" \
908
"sw $15, 0($11) \n\t" \
909
"addu $12, $12, $14 \n\t" \
910
"addi $11, $11, 4 \n\t"
911
912
#define MULADDC_X1_STOP \
913
"sw $12, %0 \n\t" \
914
"sw $11, %1 \n\t" \
915
"sw $10, %2 \n\t" \
916
: "=m" (c), "=m" (d), "=m" (s) \
917
: "m" (s), "m" (d), "m" (c), "m" (b) \
918
: "$9", "$10", "$11", "$12", "$13", "$14", "$15", "lo", "hi" \
919
);
920
921
#endif /* MIPS */
922
#endif /* GNUC */
923
924
#if (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
925
926
#define MULADDC_X1_INIT \
927
__asm mov esi, s \
928
__asm mov edi, d \
929
__asm mov ecx, c \
930
__asm mov ebx, b
931
932
#define MULADDC_X1_CORE \
933
__asm lodsd \
934
__asm mul ebx \
935
__asm add eax, ecx \
936
__asm adc edx, 0 \
937
__asm add eax, [edi] \
938
__asm adc edx, 0 \
939
__asm mov ecx, edx \
940
__asm stosd
941
942
#define MULADDC_X1_STOP \
943
__asm mov c, ecx \
944
__asm mov d, edi \
945
__asm mov s, esi
946
947
#if defined(MBEDTLS_HAVE_SSE2)
948
949
#define EMIT __asm _emit
950
951
#define MULADDC_X8_INIT MULADDC_X1_INIT
952
953
#define MULADDC_X8_CORE \
954
EMIT 0x0F EMIT 0x6E EMIT 0xC9 \
955
EMIT 0x0F EMIT 0x6E EMIT 0xC3 \
956
EMIT 0x0F EMIT 0x6E EMIT 0x1F \
957
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
958
EMIT 0x0F EMIT 0x6E EMIT 0x16 \
959
EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
960
EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x04 \
961
EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
962
EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x08 \
963
EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
964
EMIT 0x0F EMIT 0x6E EMIT 0x7E EMIT 0x0C \
965
EMIT 0x0F EMIT 0xF4 EMIT 0xF8 \
966
EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
967
EMIT 0x0F EMIT 0x6E EMIT 0x5F EMIT 0x04 \
968
EMIT 0x0F EMIT 0xD4 EMIT 0xDC \
969
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x08 \
970
EMIT 0x0F EMIT 0xD4 EMIT 0xEE \
971
EMIT 0x0F EMIT 0x6E EMIT 0x67 EMIT 0x0C \
972
EMIT 0x0F EMIT 0xD4 EMIT 0xFC \
973
EMIT 0x0F EMIT 0x7E EMIT 0x0F \
974
EMIT 0x0F EMIT 0x6E EMIT 0x56 EMIT 0x10 \
975
EMIT 0x0F EMIT 0xF4 EMIT 0xD0 \
976
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
977
EMIT 0x0F EMIT 0x6E EMIT 0x66 EMIT 0x14 \
978
EMIT 0x0F EMIT 0xF4 EMIT 0xE0 \
979
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
980
EMIT 0x0F EMIT 0x6E EMIT 0x76 EMIT 0x18 \
981
EMIT 0x0F EMIT 0xF4 EMIT 0xF0 \
982
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x04 \
983
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
984
EMIT 0x0F EMIT 0x6E EMIT 0x5E EMIT 0x1C \
985
EMIT 0x0F EMIT 0xF4 EMIT 0xD8 \
986
EMIT 0x0F EMIT 0xD4 EMIT 0xCD \
987
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x10 \
988
EMIT 0x0F EMIT 0xD4 EMIT 0xD5 \
989
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x08 \
990
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
991
EMIT 0x0F EMIT 0xD4 EMIT 0xCF \
992
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x14 \
993
EMIT 0x0F EMIT 0xD4 EMIT 0xE5 \
994
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x0C \
995
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
996
EMIT 0x0F EMIT 0xD4 EMIT 0xCA \
997
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x18 \
998
EMIT 0x0F EMIT 0xD4 EMIT 0xF5 \
999
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x10 \
1000
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
1001
EMIT 0x0F EMIT 0xD4 EMIT 0xCC \
1002
EMIT 0x0F EMIT 0x6E EMIT 0x6F EMIT 0x1C \
1003
EMIT 0x0F EMIT 0xD4 EMIT 0xDD \
1004
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x14 \
1005
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
1006
EMIT 0x0F EMIT 0xD4 EMIT 0xCE \
1007
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x18 \
1008
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
1009
EMIT 0x0F EMIT 0xD4 EMIT 0xCB \
1010
EMIT 0x0F EMIT 0x7E EMIT 0x4F EMIT 0x1C \
1011
EMIT 0x83 EMIT 0xC7 EMIT 0x20 \
1012
EMIT 0x83 EMIT 0xC6 EMIT 0x20 \
1013
EMIT 0x0F EMIT 0x73 EMIT 0xD1 EMIT 0x20 \
1014
EMIT 0x0F EMIT 0x7E EMIT 0xC9
1015
1016
#define MULADDC_X8_STOP \
1017
EMIT 0x0F EMIT 0x77 \
1018
__asm mov c, ecx \
1019
__asm mov d, edi \
1020
__asm mov s, esi
1021
1022
#endif /* SSE2 */
1023
#endif /* MSVC */
1024
1025
#endif /* MBEDTLS_HAVE_ASM */
1026
1027
#if !defined(MULADDC_X1_CORE)
1028
#if defined(MBEDTLS_HAVE_UDBL)
1029
1030
#define MULADDC_X1_INIT \
1031
{ \
1032
mbedtls_t_udbl r; \
1033
mbedtls_mpi_uint r0, r1;
1034
1035
#define MULADDC_X1_CORE \
1036
r = *(s++) * (mbedtls_t_udbl) b; \
1037
r0 = (mbedtls_mpi_uint) r; \
1038
r1 = (mbedtls_mpi_uint)( r >> biL ); \
1039
r0 += c; r1 += (r0 < c); \
1040
r0 += *d; r1 += (r0 < *d); \
1041
c = r1; *(d++) = r0;
1042
1043
#define MULADDC_X1_STOP \
1044
}
1045
1046
#else /* MBEDTLS_HAVE_UDBL */
1047
1048
#define MULADDC_X1_INIT \
1049
{ \
1050
mbedtls_mpi_uint s0, s1, b0, b1; \
1051
mbedtls_mpi_uint r0, r1, rx, ry; \
1052
b0 = ( b << biH ) >> biH; \
1053
b1 = ( b >> biH );
1054
1055
#define MULADDC_X1_CORE \
1056
s0 = ( *s << biH ) >> biH; \
1057
s1 = ( *s >> biH ); s++; \
1058
rx = s0 * b1; r0 = s0 * b0; \
1059
ry = s1 * b0; r1 = s1 * b1; \
1060
r1 += ( rx >> biH ); \
1061
r1 += ( ry >> biH ); \
1062
rx <<= biH; ry <<= biH; \
1063
r0 += rx; r1 += (r0 < rx); \
1064
r0 += ry; r1 += (r0 < ry); \
1065
r0 += c; r1 += (r0 < c); \
1066
r0 += *d; r1 += (r0 < *d); \
1067
c = r1; *(d++) = r0;
1068
1069
#define MULADDC_X1_STOP \
1070
}
1071
1072
#endif /* C (longlong) */
1073
#endif /* C (generic) */
1074
1075
#if !defined(MULADDC_X2_CORE)
1076
#define MULADDC_X2_INIT MULADDC_X1_INIT
1077
#define MULADDC_X2_STOP MULADDC_X1_STOP
1078
#define MULADDC_X2_CORE MULADDC_X1_CORE MULADDC_X1_CORE
1079
#endif /* MULADDC_X2_CORE */
1080
1081
#if !defined(MULADDC_X4_CORE)
1082
#define MULADDC_X4_INIT MULADDC_X2_INIT
1083
#define MULADDC_X4_STOP MULADDC_X2_STOP
1084
#define MULADDC_X4_CORE MULADDC_X2_CORE MULADDC_X2_CORE
1085
#endif /* MULADDC_X4_CORE */
1086
1087
#if !defined(MULADDC_X8_CORE)
1088
#define MULADDC_X8_INIT MULADDC_X4_INIT
1089
#define MULADDC_X8_STOP MULADDC_X4_STOP
1090
#define MULADDC_X8_CORE MULADDC_X4_CORE MULADDC_X4_CORE
1091
#endif /* MULADDC_X8_CORE */
1092
1093
/* *INDENT-ON* */
1094
#endif /* bn_mul.h */
1095
1096