Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/bearssl/src/inner.h
39478 views
1
/*
2
* Copyright (c) 2016 Thomas Pornin <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining
5
* a copy of this software and associated documentation files (the
6
* "Software"), to deal in the Software without restriction, including
7
* without limitation the rights to use, copy, modify, merge, publish,
8
* distribute, sublicense, and/or sell copies of the Software, and to
9
* permit persons to whom the Software is furnished to do so, subject to
10
* the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be
13
* included in all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*/
24
25
#ifndef INNER_H__
26
#define INNER_H__
27
28
#include <string.h>
29
#include <limits.h>
30
31
#include "config.h"
32
#include "bearssl.h"
33
34
/*
35
* On MSVC, disable the warning about applying unary minus on an
36
* unsigned type: it is standard, we do it all the time, and for
37
* good reasons.
38
*/
39
#if _MSC_VER
40
#pragma warning( disable : 4146 )
41
#endif
42
43
/*
44
* Maximum size for a RSA modulus (in bits). Allocated stack buffers
45
* depend on that size, so this value should be kept small. Currently,
46
* 2048-bit RSA keys offer adequate security, and should still do so for
47
* the next few decades; however, a number of widespread PKI have
48
* already set their root keys to RSA-4096, so we should be able to
49
* process such keys.
50
*
51
* This value MUST be a multiple of 64. This value MUST NOT exceed 47666
52
* (some computations in RSA key generation rely on the factor size being
53
* no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a
54
* lot of sense anyway.
55
*/
56
#define BR_MAX_RSA_SIZE 4096
57
58
/*
59
* Minimum size for a RSA modulus (in bits); this value is used only to
60
* filter out invalid parameters for key pair generation. Normally,
61
* applications should not use RSA keys smaller than 2048 bits; but some
62
* specific cases might need shorter keys, for legacy or research
63
* purposes.
64
*/
65
#define BR_MIN_RSA_SIZE 512
66
67
/*
68
* Maximum size for a RSA factor (in bits). This is for RSA private-key
69
* operations. Default is to support factors up to a bit more than half
70
* the maximum modulus size.
71
*
72
* This value MUST be a multiple of 32.
73
*/
74
#define BR_MAX_RSA_FACTOR ((BR_MAX_RSA_SIZE + 64) >> 1)
75
76
/*
77
* Maximum size for an EC curve (modulus or order), in bits. Size of
78
* stack buffers depends on that parameter. This size MUST be a multiple
79
* of 8 (so that decoding an integer with that many bytes does not
80
* overflow).
81
*/
82
#define BR_MAX_EC_SIZE 528
83
84
/*
85
* Some macros to recognize the current architecture. Right now, we are
86
* interested into automatically recognizing architecture with efficient
87
* 64-bit types so that we may automatically use implementations that
88
* use 64-bit registers in that case. Future versions may detect, e.g.,
89
* availability of SSE2 intrinsics.
90
*
91
* If 'unsigned long' is a 64-bit type, then we assume that 64-bit types
92
* are efficient. Otherwise, we rely on macros that depend on compiler,
93
* OS and architecture. In any case, failure to detect the architecture
94
* as 64-bit means that the 32-bit code will be used, and that code
95
* works also on 64-bit architectures (the 64-bit code may simply be
96
* more efficient).
97
*
98
* The test on 'unsigned long' should already catch most cases, the one
99
* notable exception being Windows code where 'unsigned long' is kept to
100
* 32-bit for compatibility with all the legacy code that liberally uses
101
* the 'DWORD' type for 32-bit values.
102
*
103
* Macro names are taken from: http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros
104
*/
105
#ifndef BR_64
106
#if ((ULONG_MAX >> 31) >> 31) == 3
107
#define BR_64 1
108
#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)
109
#define BR_64 1
110
#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
111
|| defined(__64BIT__) || defined(_LP64) || defined(__LP64__)
112
#define BR_64 1
113
#elif defined(__sparc64__)
114
#define BR_64 1
115
#elif defined(__x86_64__) || defined(_M_X64)
116
#define BR_64 1
117
#elif defined(__aarch64__) || defined(_M_ARM64)
118
#define BR_64 1
119
#elif defined(__mips64)
120
#define BR_64 1
121
#endif
122
#endif
123
124
/*
125
* Set BR_LOMUL on platforms where it makes sense.
126
*/
127
#ifndef BR_LOMUL
128
#if BR_ARMEL_CORTEXM_GCC
129
#define BR_LOMUL 1
130
#endif
131
#endif
132
133
/*
134
* Architecture detection.
135
*/
136
#ifndef BR_i386
137
#if __i386__ || _M_IX86
138
#define BR_i386 1
139
#endif
140
#endif
141
142
#ifndef BR_amd64
143
#if __x86_64__ || _M_X64
144
#define BR_amd64 1
145
#endif
146
#endif
147
148
/*
149
* Compiler brand and version.
150
*
151
* Implementations that use intrinsics need to detect the compiler type
152
* and version because some specific actions may be needed to activate
153
* the corresponding opcodes, both for header inclusion, and when using
154
* them in a function.
155
*
156
* BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC,
157
* Clang and MS Visual C. For each of them, sub-macros will be defined
158
* for versions; each sub-macro is set whenever the compiler version is
159
* at least as recent as the one corresponding to the macro.
160
*/
161
162
/*
163
* GCC thresholds are on versions 4.4 to 4.9 and 5.0.
164
*/
165
#ifndef BR_GCC
166
#if __GNUC__ && !__clang__
167
#define BR_GCC 1
168
169
#if __GNUC__ > 4
170
#define BR_GCC_5_0 1
171
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
172
#define BR_GCC_4_9 1
173
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8
174
#define BR_GCC_4_8 1
175
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7
176
#define BR_GCC_4_7 1
177
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6
178
#define BR_GCC_4_6 1
179
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5
180
#define BR_GCC_4_5 1
181
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4
182
#define BR_GCC_4_4 1
183
#endif
184
185
#if BR_GCC_5_0
186
#define BR_GCC_4_9 1
187
#endif
188
#if BR_GCC_4_9
189
#define BR_GCC_4_8 1
190
#endif
191
#if BR_GCC_4_8
192
#define BR_GCC_4_7 1
193
#endif
194
#if BR_GCC_4_7
195
#define BR_GCC_4_6 1
196
#endif
197
#if BR_GCC_4_6
198
#define BR_GCC_4_5 1
199
#endif
200
#if BR_GCC_4_5
201
#define BR_GCC_4_4 1
202
#endif
203
204
#endif
205
#endif
206
207
/*
208
* Clang thresholds are on versions 3.7.0 and 3.8.0.
209
*/
210
#ifndef BR_CLANG
211
#if __clang__
212
#define BR_CLANG 1
213
214
#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
215
#define BR_CLANG_3_8 1
216
#elif __clang_major__ == 3 && __clang_minor__ >= 7
217
#define BR_CLANG_3_7 1
218
#endif
219
220
#if BR_CLANG_3_8
221
#define BR_CLANG_3_7 1
222
#endif
223
224
#endif
225
#endif
226
227
/*
228
* MS Visual C thresholds are on Visual Studio 2005 to 2015.
229
*/
230
#ifndef BR_MSC
231
#if _MSC_VER
232
#define BR_MSC 1
233
234
#if _MSC_VER >= 1900
235
#define BR_MSC_2015 1
236
#elif _MSC_VER >= 1800
237
#define BR_MSC_2013 1
238
#elif _MSC_VER >= 1700
239
#define BR_MSC_2012 1
240
#elif _MSC_VER >= 1600
241
#define BR_MSC_2010 1
242
#elif _MSC_VER >= 1500
243
#define BR_MSC_2008 1
244
#elif _MSC_VER >= 1400
245
#define BR_MSC_2005 1
246
#endif
247
248
#if BR_MSC_2015
249
#define BR_MSC_2013 1
250
#endif
251
#if BR_MSC_2013
252
#define BR_MSC_2012 1
253
#endif
254
#if BR_MSC_2012
255
#define BR_MSC_2010 1
256
#endif
257
#if BR_MSC_2010
258
#define BR_MSC_2008 1
259
#endif
260
#if BR_MSC_2008
261
#define BR_MSC_2005 1
262
#endif
263
264
#endif
265
#endif
266
267
/*
268
* GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a
269
* 'target' attribute that activates support for specific opcodes.
270
*/
271
#if BR_GCC_4_4 || BR_CLANG_3_7
272
#define BR_TARGET(x) __attribute__((target(x)))
273
#else
274
#define BR_TARGET(x)
275
#endif
276
277
/*
278
* AES-NI intrinsics are available on x86 (32-bit and 64-bit) with
279
* GCC 4.8+, Clang 3.7+ and MSC 2012+.
280
*/
281
#ifndef BR_AES_X86NI
282
#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012)
283
#define BR_AES_X86NI 1
284
#endif
285
#endif
286
287
/*
288
* SSE2 intrinsics are available on x86 (32-bit and 64-bit) with
289
* GCC 4.4+, Clang 3.7+ and MSC 2005+.
290
*/
291
#ifndef BR_SSE2
292
#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
293
#define BR_SSE2 1
294
#endif
295
#endif
296
297
/*
298
* RDRAND intrinsics are available on x86 (32-bit and 64-bit) with
299
* GCC 4.6+, Clang 3.7+ and MSC 2012+.
300
*/
301
#ifndef BR_RDRAND
302
#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012)
303
#define BR_RDRAND 1
304
#endif
305
#endif
306
307
/*
308
* Determine type of OS for random number generation. Macro names and
309
* values are documented on:
310
* https://sourceforge.net/p/predef/wiki/OperatingSystems/
311
*
312
* Win32's CryptGenRandom() should be available on Windows systems.
313
*
314
* /dev/urandom should work on all Unix-like systems (including macOS X).
315
*
316
* getentropy() is present on Linux (Glibc 2.25+), FreeBSD (12.0+) and
317
* OpenBSD (5.6+). For OpenBSD, there does not seem to be easy to use
318
* macros to test the minimum version, so we just assume that it is
319
* recent enough (last version without getentropy() has gone out of
320
* support in May 2015).
321
*
322
* Ideally we should use getentropy() on macOS (10.12+) too, but I don't
323
* know how to test the exact OS version with preprocessor macros.
324
*
325
* TODO: enrich the list of detected system.
326
*/
327
328
#ifndef BR_USE_URANDOM
329
#if defined _AIX \
330
|| defined __ANDROID__ \
331
|| defined __FreeBSD__ \
332
|| defined __NetBSD__ \
333
|| defined __OpenBSD__ \
334
|| defined __DragonFly__ \
335
|| defined __linux__ \
336
|| (defined __sun && (defined __SVR4 || defined __svr4__)) \
337
|| (defined __APPLE__ && defined __MACH__)
338
#define BR_USE_URANDOM 1
339
#endif
340
#endif
341
342
#ifndef BR_USE_GETENTROPY
343
#if (defined __linux__ \
344
&& (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \
345
|| (defined __FreeBSD__ && __FreeBSD__ >= 12) \
346
|| defined __OpenBSD__
347
#define BR_USE_GETENTROPY 1
348
#endif
349
#endif
350
351
#ifndef BR_USE_WIN32_RAND
352
#if defined _WIN32 || defined _WIN64
353
#define BR_USE_WIN32_RAND 1
354
#endif
355
#endif
356
357
/*
358
* POWER8 crypto support. We rely on compiler macros for the
359
* architecture, since we do not have a reliable, simple way to detect
360
* the required support at runtime (we could try running an opcode, and
361
* trapping the exception or signal on illegal instruction, but this
362
* induces some non-trivial OS dependencies that we would prefer to
363
* avoid if possible).
364
*/
365
#ifndef BR_POWER8
366
#if __GNUC__ && ((_ARCH_PWR8 || _ARCH_PPC) && __CRYPTO__)
367
#define BR_POWER8 1
368
#endif
369
#endif
370
371
/*
372
* Detect endinanness on POWER8.
373
*/
374
#if BR_POWER8
375
#if defined BR_POWER8_LE
376
#undef BR_POWER8_BE
377
#if BR_POWER8_LE
378
#define BR_POWER8_BE 0
379
#else
380
#define BR_POWER8_BE 1
381
#endif
382
#elif defined BR_POWER8_BE
383
#undef BR_POWER8_LE
384
#if BR_POWER8_BE
385
#define BR_POWER8_LE 0
386
#else
387
#define BR_POWER8_LE 1
388
#endif
389
#else
390
#if __LITTLE_ENDIAN__
391
#define BR_POWER8_LE 1
392
#define BR_POWER8_BE 0
393
#else
394
#define BR_POWER8_LE 0
395
#define BR_POWER8_BE 1
396
#endif
397
#endif
398
#endif
399
400
/*
401
* Detect support for 128-bit integers.
402
*/
403
#if !defined BR_INT128 && !defined BR_UMUL128
404
#ifdef __SIZEOF_INT128__
405
#define BR_INT128 1
406
#elif _M_X64
407
#define BR_UMUL128 1
408
#endif
409
#endif
410
411
/*
412
* Detect support for unaligned accesses with known endianness.
413
*
414
* x86 (both 32-bit and 64-bit) is little-endian and allows unaligned
415
* accesses.
416
*
417
* POWER/PowerPC allows unaligned accesses when big-endian. POWER8 and
418
* later also allow unaligned accesses when little-endian.
419
*/
420
#if !defined BR_LE_UNALIGNED && !defined BR_BE_UNALIGNED
421
422
#if __i386 || __i386__ || __x86_64__ || _M_IX86 || _M_X64
423
#define BR_LE_UNALIGNED 1
424
#elif BR_POWER8_BE
425
#define BR_BE_UNALIGNED 1
426
#elif BR_POWER8_LE
427
#define BR_LE_UNALIGNED 1
428
#elif (__powerpc__ || __powerpc64__ || _M_PPC || _ARCH_PPC || _ARCH_PPC64) \
429
&& __BIG_ENDIAN__
430
#define BR_BE_UNALIGNED 1
431
#endif
432
433
#endif
434
435
/*
436
* Detect support for an OS-provided time source.
437
*/
438
439
#ifndef BR_USE_UNIX_TIME
440
#if defined __unix__ || defined __linux__ \
441
|| defined _POSIX_SOURCE || defined _POSIX_C_SOURCE \
442
|| (defined __APPLE__ && defined __MACH__)
443
#define BR_USE_UNIX_TIME 1
444
#endif
445
#endif
446
447
#ifndef BR_USE_WIN32_TIME
448
#if defined _WIN32 || defined _WIN64
449
#define BR_USE_WIN32_TIME 1
450
#endif
451
#endif
452
453
/* ==================================================================== */
454
/*
455
* Encoding/decoding functions.
456
*
457
* 32-bit and 64-bit decoding, both little-endian and big-endian, is
458
* implemented with the inline functions below.
459
*
460
* When allowed by some compile-time options (autodetected or provided),
461
* optimised code is used, to perform direct memory access when the
462
* underlying architecture supports it, both for endianness and
463
* alignment. This, however, may trigger strict aliasing issues; the
464
* code below uses unions to perform (supposedly) safe type punning.
465
* Since the C aliasing rules are relatively complex and were amended,
466
* or at least re-explained with different phrasing, in all successive
467
* versions of the C standard, it is always a bit risky to bet that any
468
* specific version of a C compiler got it right, for some notion of
469
* "right".
470
*/
471
472
typedef union {
473
uint16_t u;
474
unsigned char b[sizeof(uint16_t)];
475
} br_union_u16;
476
477
typedef union {
478
uint32_t u;
479
unsigned char b[sizeof(uint32_t)];
480
} br_union_u32;
481
482
typedef union {
483
uint64_t u;
484
unsigned char b[sizeof(uint64_t)];
485
} br_union_u64;
486
487
static inline void
488
br_enc16le(void *dst, unsigned x)
489
{
490
#if BR_LE_UNALIGNED
491
((br_union_u16 *)dst)->u = x;
492
#else
493
unsigned char *buf;
494
495
buf = dst;
496
buf[0] = (unsigned char)x;
497
buf[1] = (unsigned char)(x >> 8);
498
#endif
499
}
500
501
static inline void
502
br_enc16be(void *dst, unsigned x)
503
{
504
#if BR_BE_UNALIGNED
505
((br_union_u16 *)dst)->u = x;
506
#else
507
unsigned char *buf;
508
509
buf = dst;
510
buf[0] = (unsigned char)(x >> 8);
511
buf[1] = (unsigned char)x;
512
#endif
513
}
514
515
static inline unsigned
516
br_dec16le(const void *src)
517
{
518
#if BR_LE_UNALIGNED
519
return ((const br_union_u16 *)src)->u;
520
#else
521
const unsigned char *buf;
522
523
buf = src;
524
return (unsigned)buf[0] | ((unsigned)buf[1] << 8);
525
#endif
526
}
527
528
static inline unsigned
529
br_dec16be(const void *src)
530
{
531
#if BR_BE_UNALIGNED
532
return ((const br_union_u16 *)src)->u;
533
#else
534
const unsigned char *buf;
535
536
buf = src;
537
return ((unsigned)buf[0] << 8) | (unsigned)buf[1];
538
#endif
539
}
540
541
static inline void
542
br_enc32le(void *dst, uint32_t x)
543
{
544
#if BR_LE_UNALIGNED
545
((br_union_u32 *)dst)->u = x;
546
#else
547
unsigned char *buf;
548
549
buf = dst;
550
buf[0] = (unsigned char)x;
551
buf[1] = (unsigned char)(x >> 8);
552
buf[2] = (unsigned char)(x >> 16);
553
buf[3] = (unsigned char)(x >> 24);
554
#endif
555
}
556
557
static inline void
558
br_enc32be(void *dst, uint32_t x)
559
{
560
#if BR_BE_UNALIGNED
561
((br_union_u32 *)dst)->u = x;
562
#else
563
unsigned char *buf;
564
565
buf = dst;
566
buf[0] = (unsigned char)(x >> 24);
567
buf[1] = (unsigned char)(x >> 16);
568
buf[2] = (unsigned char)(x >> 8);
569
buf[3] = (unsigned char)x;
570
#endif
571
}
572
573
static inline uint32_t
574
br_dec32le(const void *src)
575
{
576
#if BR_LE_UNALIGNED
577
return ((const br_union_u32 *)src)->u;
578
#else
579
const unsigned char *buf;
580
581
buf = src;
582
return (uint32_t)buf[0]
583
| ((uint32_t)buf[1] << 8)
584
| ((uint32_t)buf[2] << 16)
585
| ((uint32_t)buf[3] << 24);
586
#endif
587
}
588
589
static inline uint32_t
590
br_dec32be(const void *src)
591
{
592
#if BR_BE_UNALIGNED
593
return ((const br_union_u32 *)src)->u;
594
#else
595
const unsigned char *buf;
596
597
buf = src;
598
return ((uint32_t)buf[0] << 24)
599
| ((uint32_t)buf[1] << 16)
600
| ((uint32_t)buf[2] << 8)
601
| (uint32_t)buf[3];
602
#endif
603
}
604
605
static inline void
606
br_enc64le(void *dst, uint64_t x)
607
{
608
#if BR_LE_UNALIGNED
609
((br_union_u64 *)dst)->u = x;
610
#else
611
unsigned char *buf;
612
613
buf = dst;
614
br_enc32le(buf, (uint32_t)x);
615
br_enc32le(buf + 4, (uint32_t)(x >> 32));
616
#endif
617
}
618
619
static inline void
620
br_enc64be(void *dst, uint64_t x)
621
{
622
#if BR_BE_UNALIGNED
623
((br_union_u64 *)dst)->u = x;
624
#else
625
unsigned char *buf;
626
627
buf = dst;
628
br_enc32be(buf, (uint32_t)(x >> 32));
629
br_enc32be(buf + 4, (uint32_t)x);
630
#endif
631
}
632
633
static inline uint64_t
634
br_dec64le(const void *src)
635
{
636
#if BR_LE_UNALIGNED
637
return ((const br_union_u64 *)src)->u;
638
#else
639
const unsigned char *buf;
640
641
buf = src;
642
return (uint64_t)br_dec32le(buf)
643
| ((uint64_t)br_dec32le(buf + 4) << 32);
644
#endif
645
}
646
647
static inline uint64_t
648
br_dec64be(const void *src)
649
{
650
#if BR_BE_UNALIGNED
651
return ((const br_union_u64 *)src)->u;
652
#else
653
const unsigned char *buf;
654
655
buf = src;
656
return ((uint64_t)br_dec32be(buf) << 32)
657
| (uint64_t)br_dec32be(buf + 4);
658
#endif
659
}
660
661
/*
662
* Range decoding and encoding (for several successive values).
663
*/
664
void br_range_dec16le(uint16_t *v, size_t num, const void *src);
665
void br_range_dec16be(uint16_t *v, size_t num, const void *src);
666
void br_range_enc16le(void *dst, const uint16_t *v, size_t num);
667
void br_range_enc16be(void *dst, const uint16_t *v, size_t num);
668
669
void br_range_dec32le(uint32_t *v, size_t num, const void *src);
670
void br_range_dec32be(uint32_t *v, size_t num, const void *src);
671
void br_range_enc32le(void *dst, const uint32_t *v, size_t num);
672
void br_range_enc32be(void *dst, const uint32_t *v, size_t num);
673
674
void br_range_dec64le(uint64_t *v, size_t num, const void *src);
675
void br_range_dec64be(uint64_t *v, size_t num, const void *src);
676
void br_range_enc64le(void *dst, const uint64_t *v, size_t num);
677
void br_range_enc64be(void *dst, const uint64_t *v, size_t num);
678
679
/*
680
* Byte-swap a 32-bit integer.
681
*/
682
static inline uint32_t
683
br_swap32(uint32_t x)
684
{
685
x = ((x & (uint32_t)0x00FF00FF) << 8)
686
| ((x >> 8) & (uint32_t)0x00FF00FF);
687
return (x << 16) | (x >> 16);
688
}
689
690
/* ==================================================================== */
691
/*
692
* Support code for hash functions.
693
*/
694
695
/*
696
* IV for MD5, SHA-1, SHA-224 and SHA-256.
697
*/
698
extern const uint32_t br_md5_IV[];
699
extern const uint32_t br_sha1_IV[];
700
extern const uint32_t br_sha224_IV[];
701
extern const uint32_t br_sha256_IV[];
702
703
/*
704
* Round functions for MD5, SHA-1, SHA-224 and SHA-256 (SHA-224 and
705
* SHA-256 use the same round function).
706
*/
707
void br_md5_round(const unsigned char *buf, uint32_t *val);
708
void br_sha1_round(const unsigned char *buf, uint32_t *val);
709
void br_sha2small_round(const unsigned char *buf, uint32_t *val);
710
711
/*
712
* The core function for the TLS PRF. It computes
713
* P_hash(secret, label + seed), and XORs the result into the dst buffer.
714
*/
715
void br_tls_phash(void *dst, size_t len,
716
const br_hash_class *dig,
717
const void *secret, size_t secret_len, const char *label,
718
size_t seed_num, const br_tls_prf_seed_chunk *seed);
719
720
/*
721
* Copy all configured hash implementations from a multihash context
722
* to another.
723
*/
724
static inline void
725
br_multihash_copyimpl(br_multihash_context *dst,
726
const br_multihash_context *src)
727
{
728
memcpy((void *)dst->impl, src->impl, sizeof src->impl);
729
}
730
731
/* ==================================================================== */
732
/*
733
* Constant-time primitives. These functions manipulate 32-bit values in
734
* order to provide constant-time comparisons and multiplexers.
735
*
736
* Boolean values (the "ctl" bits) MUST have value 0 or 1.
737
*
738
* Implementation notes:
739
* =====================
740
*
741
* The uintN_t types are unsigned and with width exactly N bits; the C
742
* standard guarantees that computations are performed modulo 2^N, and
743
* there can be no overflow. Negation (unary '-') works on unsigned types
744
* as well.
745
*
746
* The intN_t types are guaranteed to have width exactly N bits, with no
747
* padding bit, and using two's complement representation. Casting
748
* intN_t to uintN_t really is conversion modulo 2^N. Beware that intN_t
749
* types, being signed, trigger implementation-defined behaviour on
750
* overflow (including raising some signal): with GCC, while modular
751
* arithmetics are usually applied, the optimizer may assume that
752
* overflows don't occur (unless the -fwrapv command-line option is
753
* added); Clang has the additional -ftrapv option to explicitly trap on
754
* integer overflow or underflow.
755
*/
756
757
/*
758
* Negate a boolean.
759
*/
760
static inline uint32_t
761
NOT(uint32_t ctl)
762
{
763
return ctl ^ 1;
764
}
765
766
/*
767
* Multiplexer: returns x if ctl == 1, y if ctl == 0.
768
*/
769
static inline uint32_t
770
MUX(uint32_t ctl, uint32_t x, uint32_t y)
771
{
772
return y ^ (-ctl & (x ^ y));
773
}
774
775
/*
776
* Equality check: returns 1 if x == y, 0 otherwise.
777
*/
778
static inline uint32_t
779
EQ(uint32_t x, uint32_t y)
780
{
781
uint32_t q;
782
783
q = x ^ y;
784
return NOT((q | -q) >> 31);
785
}
786
787
/*
788
* Inequality check: returns 1 if x != y, 0 otherwise.
789
*/
790
static inline uint32_t
791
NEQ(uint32_t x, uint32_t y)
792
{
793
uint32_t q;
794
795
q = x ^ y;
796
return (q | -q) >> 31;
797
}
798
799
/*
800
* Comparison: returns 1 if x > y, 0 otherwise.
801
*/
802
static inline uint32_t
803
GT(uint32_t x, uint32_t y)
804
{
805
/*
806
* If both x < 2^31 and x < 2^31, then y-x will have its high
807
* bit set if x > y, cleared otherwise.
808
*
809
* If either x >= 2^31 or y >= 2^31 (but not both), then the
810
* result is the high bit of x.
811
*
812
* If both x >= 2^31 and y >= 2^31, then we can virtually
813
* subtract 2^31 from both, and we are back to the first case.
814
* Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
815
* fine.
816
*/
817
uint32_t z;
818
819
z = y - x;
820
return (z ^ ((x ^ y) & (x ^ z))) >> 31;
821
}
822
823
/*
824
* Other comparisons (greater-or-equal, lower-than, lower-or-equal).
825
*/
826
#define GE(x, y) NOT(GT(y, x))
827
#define LT(x, y) GT(y, x)
828
#define LE(x, y) NOT(GT(x, y))
829
830
/*
831
* General comparison: returned value is -1, 0 or 1, depending on
832
* whether x is lower than, equal to, or greater than y.
833
*/
834
static inline int32_t
835
CMP(uint32_t x, uint32_t y)
836
{
837
return (int32_t)GT(x, y) | -(int32_t)GT(y, x);
838
}
839
840
/*
841
* Returns 1 if x == 0, 0 otherwise. Take care that the operand is signed.
842
*/
843
static inline uint32_t
844
EQ0(int32_t x)
845
{
846
uint32_t q;
847
848
q = (uint32_t)x;
849
return ~(q | -q) >> 31;
850
}
851
852
/*
853
* Returns 1 if x > 0, 0 otherwise. Take care that the operand is signed.
854
*/
855
static inline uint32_t
856
GT0(int32_t x)
857
{
858
/*
859
* High bit of -x is 0 if x == 0, but 1 if x > 0.
860
*/
861
uint32_t q;
862
863
q = (uint32_t)x;
864
return (~q & -q) >> 31;
865
}
866
867
/*
868
* Returns 1 if x >= 0, 0 otherwise. Take care that the operand is signed.
869
*/
870
static inline uint32_t
871
GE0(int32_t x)
872
{
873
return ~(uint32_t)x >> 31;
874
}
875
876
/*
877
* Returns 1 if x < 0, 0 otherwise. Take care that the operand is signed.
878
*/
879
static inline uint32_t
880
LT0(int32_t x)
881
{
882
return (uint32_t)x >> 31;
883
}
884
885
/*
886
* Returns 1 if x <= 0, 0 otherwise. Take care that the operand is signed.
887
*/
888
static inline uint32_t
889
LE0(int32_t x)
890
{
891
uint32_t q;
892
893
/*
894
* ~-x has its high bit set if and only if -x is nonnegative (as
895
* a signed int), i.e. x is in the -(2^31-1) to 0 range. We must
896
* do an OR with x itself to account for x = -2^31.
897
*/
898
q = (uint32_t)x;
899
return (q | ~-q) >> 31;
900
}
901
902
/*
903
* Conditional copy: src[] is copied into dst[] if and only if ctl is 1.
904
* dst[] and src[] may overlap completely (but not partially).
905
*/
906
void br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len);
907
908
#define CCOPY br_ccopy
909
910
/*
911
* Compute the bit length of a 32-bit integer. Returned value is between 0
912
* and 32 (inclusive).
913
*/
914
static inline uint32_t
915
BIT_LENGTH(uint32_t x)
916
{
917
uint32_t k, c;
918
919
k = NEQ(x, 0);
920
c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
921
c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3;
922
c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2;
923
c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1;
924
k += GT(x, 0x0001);
925
return k;
926
}
927
928
/*
929
* Compute the minimum of x and y.
930
*/
931
static inline uint32_t
932
MIN(uint32_t x, uint32_t y)
933
{
934
return MUX(GT(x, y), y, x);
935
}
936
937
/*
938
* Compute the maximum of x and y.
939
*/
940
static inline uint32_t
941
MAX(uint32_t x, uint32_t y)
942
{
943
return MUX(GT(x, y), x, y);
944
}
945
946
/*
947
* Multiply two 32-bit integers, with a 64-bit result. This default
948
* implementation assumes that the basic multiplication operator
949
* yields constant-time code.
950
*/
951
#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y))
952
953
#if BR_CT_MUL31
954
955
/*
956
* Alternate implementation of MUL31, that will be constant-time on some
957
* (old) platforms where the default MUL31 is not. Unfortunately, it is
958
* also substantially slower, and yields larger code, on more modern
959
* platforms, which is why it is deactivated by default.
960
*
961
* MUL31_lo() must do some extra work because on some platforms, the
962
* _signed_ multiplication may return early if the top bits are 1.
963
* Simply truncating (casting) the output of MUL31() would not be
964
* sufficient, because the compiler may notice that we keep only the low
965
* word, and then replace automatically the unsigned multiplication with
966
* a signed multiplication opcode.
967
*/
968
#define MUL31(x, y) ((uint64_t)((x) | (uint32_t)0x80000000) \
969
* (uint64_t)((y) | (uint32_t)0x80000000) \
970
- ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \
971
- ((uint64_t)1 << 62))
972
static inline uint32_t
973
MUL31_lo(uint32_t x, uint32_t y)
974
{
975
uint32_t xl, xh;
976
uint32_t yl, yh;
977
978
xl = (x & 0xFFFF) | (uint32_t)0x80000000;
979
xh = (x >> 16) | (uint32_t)0x80000000;
980
yl = (y & 0xFFFF) | (uint32_t)0x80000000;
981
yh = (y >> 16) | (uint32_t)0x80000000;
982
return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF;
983
}
984
985
#else
986
987
/*
988
* Multiply two 31-bit integers, with a 62-bit result. This default
989
* implementation assumes that the basic multiplication operator
990
* yields constant-time code.
991
* The MUL31_lo() macro returns only the low 31 bits of the product.
992
*/
993
#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y))
994
#define MUL31_lo(x, y) (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF)
995
996
#endif
997
998
/*
999
* Multiply two words together; the sum of the lengths of the two
1000
* operands must not exceed 31 (for instance, one operand may use 16
1001
* bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the
1002
* macro will contain some extra operations that help in making the
1003
* operation constant-time on some platforms, where the basic 32-bit
1004
* multiplication is not constant-time.
1005
*/
1006
#if BR_CT_MUL15
1007
#define MUL15(x, y) (((uint32_t)(x) | (uint32_t)0x80000000) \
1008
* ((uint32_t)(y) | (uint32_t)0x80000000) \
1009
& (uint32_t)0x7FFFFFFF)
1010
#else
1011
#define MUL15(x, y) ((uint32_t)(x) * (uint32_t)(y))
1012
#endif
1013
1014
/*
1015
* Arithmetic right shift (sign bit is copied). What happens when
1016
* right-shifting a negative value is _implementation-defined_, so it
1017
* does not trigger undefined behaviour, but it is still up to each
1018
* compiler to define (and document) what it does. Most/all compilers
1019
* will do an arithmetic shift, the sign bit being used to fill the
1020
* holes; this is a native operation on the underlying CPU, and it would
1021
* make little sense for the compiler to do otherwise. GCC explicitly
1022
* documents that it follows that convention.
1023
*
1024
* Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an
1025
* alternate version will be used, that does not rely on such
1026
* implementation-defined behaviour. Unfortunately, it is also slower
1027
* and yields bigger code, which is why it is deactivated by default.
1028
*/
1029
#if BR_NO_ARITH_SHIFT
1030
#define ARSH(x, n) (((uint32_t)(x) >> (n)) \
1031
| ((-((uint32_t)(x) >> 31)) << (32 - (n))))
1032
#else
1033
#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n))
1034
#endif
1035
1036
/*
1037
* Constant-time division. The dividend hi:lo is divided by the
1038
* divisor d; the quotient is returned and the remainder is written
1039
* in *r. If hi == d, then the quotient does not fit on 32 bits;
1040
* returned value is thus truncated. If hi > d, returned values are
1041
* indeterminate.
1042
*/
1043
uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r);
1044
1045
/*
1046
* Wrapper for br_divrem(); the remainder is returned, and the quotient
1047
* is discarded.
1048
*/
1049
static inline uint32_t
1050
br_rem(uint32_t hi, uint32_t lo, uint32_t d)
1051
{
1052
uint32_t r;
1053
1054
br_divrem(hi, lo, d, &r);
1055
return r;
1056
}
1057
1058
/*
1059
* Wrapper for br_divrem(); the quotient is returned, and the remainder
1060
* is discarded.
1061
*/
1062
static inline uint32_t
1063
br_div(uint32_t hi, uint32_t lo, uint32_t d)
1064
{
1065
uint32_t r;
1066
1067
return br_divrem(hi, lo, d, &r);
1068
}
1069
1070
/* ==================================================================== */
1071
1072
/*
1073
* Integers 'i32'
1074
* --------------
1075
*
1076
* The 'i32' functions implement computations on big integers using
1077
* an internal representation as an array of 32-bit integers. For
1078
* an array x[]:
1079
* -- x[0] contains the "announced bit length" of the integer
1080
* -- x[1], x[2]... contain the value in little-endian order (x[1]
1081
* contains the least significant 32 bits)
1082
*
1083
* Multiplications rely on the elementary 32x32->64 multiplication.
1084
*
1085
* The announced bit length specifies the number of bits that are
1086
* significant in the subsequent 32-bit words. Unused bits in the
1087
* last (most significant) word are set to 0; subsequent words are
1088
* uninitialized and need not exist at all.
1089
*
1090
* The execution time and memory access patterns of all computations
1091
* depend on the announced bit length, but not on the actual word
1092
* values. For modular integers, the announced bit length of any integer
1093
* modulo n is equal to the actual bit length of n; thus, computations
1094
* on modular integers are "constant-time" (only the modulus length may
1095
* leak).
1096
*/
1097
1098
/*
1099
* Compute the actual bit length of an integer. The argument x should
1100
* point to the first (least significant) value word of the integer.
1101
* The len 'xlen' contains the number of 32-bit words to access.
1102
*
1103
* CT: value or length of x does not leak.
1104
*/
1105
uint32_t br_i32_bit_length(uint32_t *x, size_t xlen);
1106
1107
/*
1108
* Decode an integer from its big-endian unsigned representation. The
1109
* "true" bit length of the integer is computed, but all words of x[]
1110
* corresponding to the full 'len' bytes of the source are set.
1111
*
1112
* CT: value or length of x does not leak.
1113
*/
1114
void br_i32_decode(uint32_t *x, const void *src, size_t len);
1115
1116
/*
1117
* Decode an integer from its big-endian unsigned representation. The
1118
* integer MUST be lower than m[]; the announced bit length written in
1119
* x[] will be equal to that of m[]. All 'len' bytes from the source are
1120
* read.
1121
*
1122
* Returned value is 1 if the decode value fits within the modulus, 0
1123
* otherwise. In the latter case, the x[] buffer will be set to 0 (but
1124
* still with the announced bit length of m[]).
1125
*
1126
* CT: value or length of x does not leak. Memory access pattern depends
1127
* only of 'len' and the announced bit length of m. Whether x fits or
1128
* not does not leak either.
1129
*/
1130
uint32_t br_i32_decode_mod(uint32_t *x,
1131
const void *src, size_t len, const uint32_t *m);
1132
1133
/*
1134
* Reduce an integer (a[]) modulo another (m[]). The result is written
1135
* in x[] and its announced bit length is set to be equal to that of m[].
1136
*
1137
* x[] MUST be distinct from a[] and m[].
1138
*
1139
* CT: only announced bit lengths leak, not values of x, a or m.
1140
*/
1141
void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
1142
1143
/*
1144
* Decode an integer from its big-endian unsigned representation, and
1145
* reduce it modulo the provided modulus m[]. The announced bit length
1146
* of the result is set to be equal to that of the modulus.
1147
*
1148
* x[] MUST be distinct from m[].
1149
*/
1150
void br_i32_decode_reduce(uint32_t *x,
1151
const void *src, size_t len, const uint32_t *m);
1152
1153
/*
1154
* Encode an integer into its big-endian unsigned representation. The
1155
* output length in bytes is provided (parameter 'len'); if the length
1156
* is too short then the integer is appropriately truncated; if it is
1157
* too long then the extra bytes are set to 0.
1158
*/
1159
void br_i32_encode(void *dst, size_t len, const uint32_t *x);
1160
1161
/*
1162
* Multiply x[] by 2^32 and then add integer z, modulo m[]. This
1163
* function assumes that x[] and m[] have the same announced bit
1164
* length, and the announced bit length of m[] matches its true
1165
* bit length.
1166
*
1167
* x[] and m[] MUST be distinct arrays.
1168
*
1169
* CT: only the common announced bit length of x and m leaks, not
1170
* the values of x, z or m.
1171
*/
1172
void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
1173
1174
/*
1175
* Extract one word from an integer. The offset is counted in bits.
1176
* The word MUST entirely fit within the word elements corresponding
1177
* to the announced bit length of a[].
1178
*/
1179
static inline uint32_t
1180
br_i32_word(const uint32_t *a, uint32_t off)
1181
{
1182
size_t u;
1183
unsigned j;
1184
1185
u = (size_t)(off >> 5) + 1;
1186
j = (unsigned)off & 31;
1187
if (j == 0) {
1188
return a[u];
1189
} else {
1190
return (a[u] >> j) | (a[u + 1] << (32 - j));
1191
}
1192
}
1193
1194
/*
1195
* Test whether an integer is zero.
1196
*/
1197
uint32_t br_i32_iszero(const uint32_t *x);
1198
1199
/*
1200
* Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
1201
* is unmodified, but the carry is still computed and returned. The
1202
* arrays a[] and b[] MUST have the same announced bit length.
1203
*
1204
* a[] and b[] MAY be the same array, but partial overlap is not allowed.
1205
*/
1206
uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
1207
1208
/*
1209
* Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
1210
* then a[] is unmodified, but the carry is still computed and returned.
1211
* The arrays a[] and b[] MUST have the same announced bit length.
1212
*
1213
* a[] and b[] MAY be the same array, but partial overlap is not allowed.
1214
*/
1215
uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
1216
1217
/*
1218
* Compute d+a*b, result in d. The initial announced bit length of d[]
1219
* MUST match that of a[]. The d[] array MUST be large enough to
1220
* accommodate the full result, plus (possibly) an extra word. The
1221
* resulting announced bit length of d[] will be the sum of the announced
1222
* bit lengths of a[] and b[] (therefore, it may be larger than the actual
1223
* bit length of the numerical result).
1224
*
1225
* a[] and b[] may be the same array. d[] must be disjoint from both a[]
1226
* and b[].
1227
*/
1228
void br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
1229
1230
/*
1231
* Zeroize an integer. The announced bit length is set to the provided
1232
* value, and the corresponding words are set to 0.
1233
*/
1234
static inline void
1235
br_i32_zero(uint32_t *x, uint32_t bit_len)
1236
{
1237
*x ++ = bit_len;
1238
memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
1239
}
1240
1241
/*
1242
* Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
1243
*/
1244
uint32_t br_i32_ninv32(uint32_t x);
1245
1246
/*
1247
* Convert a modular integer to Montgomery representation. The integer x[]
1248
* MUST be lower than m[], but with the same announced bit length.
1249
*/
1250
void br_i32_to_monty(uint32_t *x, const uint32_t *m);
1251
1252
/*
1253
* Convert a modular integer back from Montgomery representation. The
1254
* integer x[] MUST be lower than m[], but with the same announced bit
1255
* length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
1256
* the least significant value word of m[] (this works only if m[] is
1257
* an odd integer).
1258
*/
1259
void br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
1260
1261
/*
1262
* Compute a modular Montgomery multiplication. d[] is filled with the
1263
* value of x*y/R modulo m[] (where R is the Montgomery factor). The
1264
* array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
1265
* numerically lower than m[]. x[] and y[] MAY be the same array. The
1266
* "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
1267
* significant value word of m[] (this works only if m[] is an odd
1268
* integer).
1269
*/
1270
void br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
1271
const uint32_t *m, uint32_t m0i);
1272
1273
/*
1274
* Compute a modular exponentiation. x[] MUST be an integer modulo m[]
1275
* (same announced bit length, lower value). m[] MUST be odd. The
1276
* exponent is in big-endian unsigned notation, over 'elen' bytes. The
1277
* "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
1278
* significant value word of m[] (this works only if m[] is an odd
1279
* integer). The t1[] and t2[] parameters must be temporary arrays,
1280
* each large enough to accommodate an integer with the same size as m[].
1281
*/
1282
void br_i32_modpow(uint32_t *x, const unsigned char *e, size_t elen,
1283
const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
1284
1285
/* ==================================================================== */
1286
1287
/*
1288
* Integers 'i31'
1289
* --------------
1290
*
1291
* The 'i31' functions implement computations on big integers using
1292
* an internal representation as an array of 32-bit integers. For
1293
* an array x[]:
1294
* -- x[0] encodes the array length and the "announced bit length"
1295
* of the integer: namely, if the announced bit length is k,
1296
* then x[0] = ((k / 31) << 5) + (k % 31).
1297
* -- x[1], x[2]... contain the value in little-endian order, 31
1298
* bits per word (x[1] contains the least significant 31 bits).
1299
* The upper bit of each word is 0.
1300
*
1301
* Multiplications rely on the elementary 32x32->64 multiplication.
1302
*
1303
* The announced bit length specifies the number of bits that are
1304
* significant in the subsequent 32-bit words. Unused bits in the
1305
* last (most significant) word are set to 0; subsequent words are
1306
* uninitialized and need not exist at all.
1307
*
1308
* The execution time and memory access patterns of all computations
1309
* depend on the announced bit length, but not on the actual word
1310
* values. For modular integers, the announced bit length of any integer
1311
* modulo n is equal to the actual bit length of n; thus, computations
1312
* on modular integers are "constant-time" (only the modulus length may
1313
* leak).
1314
*/
1315
1316
/*
1317
* Test whether an integer is zero.
1318
*/
1319
uint32_t br_i31_iszero(const uint32_t *x);
1320
1321
/*
1322
* Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
1323
* is unmodified, but the carry is still computed and returned. The
1324
* arrays a[] and b[] MUST have the same announced bit length.
1325
*
1326
* a[] and b[] MAY be the same array, but partial overlap is not allowed.
1327
*/
1328
uint32_t br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
1329
1330
/*
1331
* Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
1332
* then a[] is unmodified, but the carry is still computed and returned.
1333
* The arrays a[] and b[] MUST have the same announced bit length.
1334
*
1335
* a[] and b[] MAY be the same array, but partial overlap is not allowed.
1336
*/
1337
uint32_t br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
1338
1339
/*
1340
* Compute the ENCODED actual bit length of an integer. The argument x
1341
* should point to the first (least significant) value word of the
1342
* integer. The len 'xlen' contains the number of 32-bit words to
1343
* access. The upper bit of each value word MUST be 0.
1344
* Returned value is ((k / 31) << 5) + (k % 31) if the bit length is k.
1345
*
1346
* CT: value or length of x does not leak.
1347
*/
1348
uint32_t br_i31_bit_length(uint32_t *x, size_t xlen);
1349
1350
/*
1351
* Decode an integer from its big-endian unsigned representation. The
1352
* "true" bit length of the integer is computed and set in the encoded
1353
* announced bit length (x[0]), but all words of x[] corresponding to
1354
* the full 'len' bytes of the source are set.
1355
*
1356
* CT: value or length of x does not leak.
1357
*/
1358
void br_i31_decode(uint32_t *x, const void *src, size_t len);
1359
1360
/*
1361
* Decode an integer from its big-endian unsigned representation. The
1362
* integer MUST be lower than m[]; the (encoded) announced bit length
1363
* written in x[] will be equal to that of m[]. All 'len' bytes from the
1364
* source are read.
1365
*
1366
* Returned value is 1 if the decode value fits within the modulus, 0
1367
* otherwise. In the latter case, the x[] buffer will be set to 0 (but
1368
* still with the announced bit length of m[]).
1369
*
1370
* CT: value or length of x does not leak. Memory access pattern depends
1371
* only of 'len' and the announced bit length of m. Whether x fits or
1372
* not does not leak either.
1373
*/
1374
uint32_t br_i31_decode_mod(uint32_t *x,
1375
const void *src, size_t len, const uint32_t *m);
1376
1377
/*
1378
* Zeroize an integer. The announced bit length is set to the provided
1379
* value, and the corresponding words are set to 0. The ENCODED bit length
1380
* is expected here.
1381
*/
1382
static inline void
1383
br_i31_zero(uint32_t *x, uint32_t bit_len)
1384
{
1385
*x ++ = bit_len;
1386
memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
1387
}
1388
1389
/*
1390
* Right-shift an integer. The shift amount must be lower than 31
1391
* bits.
1392
*/
1393
void br_i31_rshift(uint32_t *x, int count);
1394
1395
/*
1396
* Reduce an integer (a[]) modulo another (m[]). The result is written
1397
* in x[] and its announced bit length is set to be equal to that of m[].
1398
*
1399
* x[] MUST be distinct from a[] and m[].
1400
*
1401
* CT: only announced bit lengths leak, not values of x, a or m.
1402
*/
1403
void br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
1404
1405
/*
1406
* Decode an integer from its big-endian unsigned representation, and
1407
* reduce it modulo the provided modulus m[]. The announced bit length
1408
* of the result is set to be equal to that of the modulus.
1409
*
1410
* x[] MUST be distinct from m[].
1411
*/
1412
void br_i31_decode_reduce(uint32_t *x,
1413
const void *src, size_t len, const uint32_t *m);
1414
1415
/*
1416
* Multiply x[] by 2^31 and then add integer z, modulo m[]. This
1417
* function assumes that x[] and m[] have the same announced bit
1418
* length, the announced bit length of m[] matches its true
1419
* bit length.
1420
*
1421
* x[] and m[] MUST be distinct arrays. z MUST fit in 31 bits (upper
1422
* bit set to 0).
1423
*
1424
* CT: only the common announced bit length of x and m leaks, not
1425
* the values of x, z or m.
1426
*/
1427
void br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
1428
1429
/*
1430
* Encode an integer into its big-endian unsigned representation. The
1431
* output length in bytes is provided (parameter 'len'); if the length
1432
* is too short then the integer is appropriately truncated; if it is
1433
* too long then the extra bytes are set to 0.
1434
*/
1435
void br_i31_encode(void *dst, size_t len, const uint32_t *x);
1436
1437
/*
1438
* Compute -(1/x) mod 2^31. If x is even, then this function returns 0.
1439
*/
1440
uint32_t br_i31_ninv31(uint32_t x);
1441
1442
/*
1443
* Compute a modular Montgomery multiplication. d[] is filled with the
1444
* value of x*y/R modulo m[] (where R is the Montgomery factor). The
1445
* array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
1446
* numerically lower than m[]. x[] and y[] MAY be the same array. The
1447
* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
1448
* significant value word of m[] (this works only if m[] is an odd
1449
* integer).
1450
*/
1451
void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
1452
const uint32_t *m, uint32_t m0i);
1453
1454
/*
1455
* Convert a modular integer to Montgomery representation. The integer x[]
1456
* MUST be lower than m[], but with the same announced bit length.
1457
*/
1458
void br_i31_to_monty(uint32_t *x, const uint32_t *m);
1459
1460
/*
1461
* Convert a modular integer back from Montgomery representation. The
1462
* integer x[] MUST be lower than m[], but with the same announced bit
1463
* length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
1464
* the least significant value word of m[] (this works only if m[] is
1465
* an odd integer).
1466
*/
1467
void br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
1468
1469
/*
1470
* Compute a modular exponentiation. x[] MUST be an integer modulo m[]
1471
* (same announced bit length, lower value). m[] MUST be odd. The
1472
* exponent is in big-endian unsigned notation, over 'elen' bytes. The
1473
* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
1474
* significant value word of m[] (this works only if m[] is an odd
1475
* integer). The t1[] and t2[] parameters must be temporary arrays,
1476
* each large enough to accommodate an integer with the same size as m[].
1477
*/
1478
void br_i31_modpow(uint32_t *x, const unsigned char *e, size_t elen,
1479
const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
1480
1481
/*
1482
* Compute a modular exponentiation. x[] MUST be an integer modulo m[]
1483
* (same announced bit length, lower value). m[] MUST be odd. The
1484
* exponent is in big-endian unsigned notation, over 'elen' bytes. The
1485
* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
1486
* significant value word of m[] (this works only if m[] is an odd
1487
* integer). The tmp[] array is used for temporaries, and has size
1488
* 'twlen' words; it must be large enough to accommodate at least two
1489
* temporary values with the same size as m[] (including the leading
1490
* "bit length" word). If there is room for more temporaries, then this
1491
* function may use the extra room for window-based optimisation,
1492
* resulting in faster computations.
1493
*
1494
* Returned value is 1 on success, 0 on error. An error is reported if
1495
* the provided tmp[] array is too short.
1496
*/
1497
uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen,
1498
const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
1499
1500
/*
1501
* Compute d+a*b, result in d. The initial announced bit length of d[]
1502
* MUST match that of a[]. The d[] array MUST be large enough to
1503
* accommodate the full result, plus (possibly) an extra word. The
1504
* resulting announced bit length of d[] will be the sum of the announced
1505
* bit lengths of a[] and b[] (therefore, it may be larger than the actual
1506
* bit length of the numerical result).
1507
*
1508
* a[] and b[] may be the same array. d[] must be disjoint from both a[]
1509
* and b[].
1510
*/
1511
void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
1512
1513
/*
1514
* Compute x/y mod m, result in x. Values x and y must be between 0 and
1515
* m-1, and have the same announced bit length as m. Modulus m must be
1516
* odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't'
1517
* must point to a temporary area that can hold at least three integers
1518
* of the size of m.
1519
*
1520
* m may not overlap x and y. x and y may overlap each other (this can
1521
* be useful to test whether a value is invertible modulo m). t must be
1522
* disjoint from all other arrays.
1523
*
1524
* Returned value is 1 on success, 0 otherwise. Success is attained if
1525
* y is invertible modulo m.
1526
*/
1527
uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y,
1528
const uint32_t *m, uint32_t m0i, uint32_t *t);
1529
1530
/* ==================================================================== */
1531
1532
/*
1533
* FIXME: document "i15" functions.
1534
*/
1535
1536
static inline void
1537
br_i15_zero(uint16_t *x, uint16_t bit_len)
1538
{
1539
*x ++ = bit_len;
1540
memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x);
1541
}
1542
1543
uint32_t br_i15_iszero(const uint16_t *x);
1544
1545
uint16_t br_i15_ninv15(uint16_t x);
1546
1547
uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl);
1548
1549
uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl);
1550
1551
void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m);
1552
1553
void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
1554
const uint16_t *m, uint16_t m0i);
1555
1556
void br_i15_to_monty(uint16_t *x, const uint16_t *m);
1557
1558
void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen,
1559
const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2);
1560
1561
uint32_t br_i15_modpow_opt(uint16_t *x, const unsigned char *e, size_t elen,
1562
const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen);
1563
1564
void br_i15_encode(void *dst, size_t len, const uint16_t *x);
1565
1566
uint32_t br_i15_decode_mod(uint16_t *x,
1567
const void *src, size_t len, const uint16_t *m);
1568
1569
void br_i15_rshift(uint16_t *x, int count);
1570
1571
uint32_t br_i15_bit_length(uint16_t *x, size_t xlen);
1572
1573
void br_i15_decode(uint16_t *x, const void *src, size_t len);
1574
1575
void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i);
1576
1577
void br_i15_decode_reduce(uint16_t *x,
1578
const void *src, size_t len, const uint16_t *m);
1579
1580
void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m);
1581
1582
void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b);
1583
1584
uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y,
1585
const uint16_t *m, uint16_t m0i, uint16_t *t);
1586
1587
/*
1588
* Variant of br_i31_modpow_opt() that internally uses 64x64->128
1589
* multiplications. It expects the same parameters as br_i31_modpow_opt(),
1590
* except that the temporaries should be 64-bit integers, not 32-bit
1591
* integers.
1592
*/
1593
uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
1594
const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen);
1595
1596
/*
1597
* Type for a function with the same API as br_i31_modpow_opt() (some
1598
* implementations of this type may have stricter alignment requirements
1599
* on the temporaries).
1600
*/
1601
typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x,
1602
const unsigned char *e, size_t elen,
1603
const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
1604
1605
/*
1606
* Wrapper for br_i62_modpow_opt() that uses the same type as
1607
* br_i31_modpow_opt(); however, it requires its 'tmp' argument to the
1608
* 64-bit aligned.
1609
*/
1610
uint32_t br_i62_modpow_opt_as_i31(uint32_t *x,
1611
const unsigned char *e, size_t elen,
1612
const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
1613
1614
/* ==================================================================== */
1615
1616
static inline size_t
1617
br_digest_size(const br_hash_class *digest_class)
1618
{
1619
return (size_t)(digest_class->desc >> BR_HASHDESC_OUT_OFF)
1620
& BR_HASHDESC_OUT_MASK;
1621
}
1622
1623
/*
1624
* Get the output size (in bytes) of a hash function.
1625
*/
1626
size_t br_digest_size_by_ID(int digest_id);
1627
1628
/*
1629
* Get the OID (encoded OBJECT IDENTIFIER value, without tag and length)
1630
* for a hash function. If digest_id is not a supported digest identifier
1631
* (in particular if it is equal to 0, i.e. br_md5sha1_ID), then NULL is
1632
* returned and *len is set to 0.
1633
*/
1634
const unsigned char *br_digest_OID(int digest_id, size_t *len);
1635
1636
/* ==================================================================== */
1637
/*
1638
* DES support functions.
1639
*/
1640
1641
/*
1642
* Apply DES Initial Permutation.
1643
*/
1644
void br_des_do_IP(uint32_t *xl, uint32_t *xr);
1645
1646
/*
1647
* Apply DES Final Permutation (inverse of IP).
1648
*/
1649
void br_des_do_invIP(uint32_t *xl, uint32_t *xr);
1650
1651
/*
1652
* Key schedule unit: for a DES key (8 bytes), compute 16 subkeys. Each
1653
* subkey is two 28-bit words represented as two 32-bit words; the PC-2
1654
* bit extration is NOT applied.
1655
*/
1656
void br_des_keysched_unit(uint32_t *skey, const void *key);
1657
1658
/*
1659
* Reversal of 16 DES sub-keys (for decryption).
1660
*/
1661
void br_des_rev_skey(uint32_t *skey);
1662
1663
/*
1664
* DES/3DES key schedule for 'des_tab' (encryption direction). Returned
1665
* value is the number of rounds.
1666
*/
1667
unsigned br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len);
1668
1669
/*
1670
* DES/3DES key schedule for 'des_ct' (encryption direction). Returned
1671
* value is the number of rounds.
1672
*/
1673
unsigned br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len);
1674
1675
/*
1676
* DES/3DES subkey decompression (from the compressed bitsliced subkeys).
1677
*/
1678
void br_des_ct_skey_expand(uint32_t *sk_exp,
1679
unsigned num_rounds, const uint32_t *skey);
1680
1681
/*
1682
* DES/3DES block encryption/decryption ('des_tab').
1683
*/
1684
void br_des_tab_process_block(unsigned num_rounds,
1685
const uint32_t *skey, void *block);
1686
1687
/*
1688
* DES/3DES block encryption/decryption ('des_ct').
1689
*/
1690
void br_des_ct_process_block(unsigned num_rounds,
1691
const uint32_t *skey, void *block);
1692
1693
/* ==================================================================== */
1694
/*
1695
* AES support functions.
1696
*/
1697
1698
/*
1699
* The AES S-box (256-byte table).
1700
*/
1701
extern const unsigned char br_aes_S[];
1702
1703
/*
1704
* AES key schedule. skey[] is filled with n+1 128-bit subkeys, where n
1705
* is the number of rounds (10 to 14, depending on key size). The number
1706
* of rounds is returned. If the key size is invalid (not 16, 24 or 32),
1707
* then 0 is returned.
1708
*
1709
* This implementation uses a 256-byte table and is NOT constant-time.
1710
*/
1711
unsigned br_aes_keysched(uint32_t *skey, const void *key, size_t key_len);
1712
1713
/*
1714
* AES key schedule for decryption ('aes_big' implementation).
1715
*/
1716
unsigned br_aes_big_keysched_inv(uint32_t *skey,
1717
const void *key, size_t key_len);
1718
1719
/*
1720
* AES block encryption with the 'aes_big' implementation (fast, but
1721
* not constant-time). This function encrypts a single block "in place".
1722
*/
1723
void br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data);
1724
1725
/*
1726
* AES block decryption with the 'aes_big' implementation (fast, but
1727
* not constant-time). This function decrypts a single block "in place".
1728
*/
1729
void br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data);
1730
1731
/*
1732
* AES block encryption with the 'aes_small' implementation (small, but
1733
* slow and not constant-time). This function encrypts a single block
1734
* "in place".
1735
*/
1736
void br_aes_small_encrypt(unsigned num_rounds,
1737
const uint32_t *skey, void *data);
1738
1739
/*
1740
* AES block decryption with the 'aes_small' implementation (small, but
1741
* slow and not constant-time). This function decrypts a single block
1742
* "in place".
1743
*/
1744
void br_aes_small_decrypt(unsigned num_rounds,
1745
const uint32_t *skey, void *data);
1746
1747
/*
1748
* The constant-time implementation is "bitsliced": the 128-bit state is
1749
* split over eight 32-bit words q* in the following way:
1750
*
1751
* -- Input block consists in 16 bytes:
1752
* a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a33
1753
* In the terminology of FIPS 197, this is a 4x4 matrix which is read
1754
* column by column.
1755
*
1756
* -- Each byte is split into eight bits which are distributed over the
1757
* eight words, at the same rank. Thus, for a byte x at rank k, bit 0
1758
* (least significant) of x will be at rank k in q0 (if that bit is b,
1759
* then it contributes "b << k" to the value of q0), bit 1 of x will be
1760
* at rank k in q1, and so on.
1761
*
1762
* -- Ranks given to bits are in "row order" and are either all even, or
1763
* all odd. Two independent AES states are thus interleaved, one using
1764
* the even ranks, the other the odd ranks. Row order means:
1765
* a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a33
1766
*
1767
* Converting input bytes from two AES blocks to bitslice representation
1768
* is done in the following way:
1769
* -- Decode first block into the four words q0 q2 q4 q6, in that order,
1770
* using little-endian convention.
1771
* -- Decode second block into the four words q1 q3 q5 q7, in that order,
1772
* using little-endian convention.
1773
* -- Call br_aes_ct_ortho().
1774
*
1775
* Converting back to bytes is done by using the reverse operations. Note
1776
* that br_aes_ct_ortho() is its own inverse.
1777
*/
1778
1779
/*
1780
* Perform bytewise orthogonalization of eight 32-bit words. Bytes
1781
* of q0..q7 are spread over all words: for a byte x that occurs
1782
* at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
1783
* of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
1784
*
1785
* This operation is an involution.
1786
*/
1787
void br_aes_ct_ortho(uint32_t *q);
1788
1789
/*
1790
* The AES S-box, as a bitsliced constant-time version. The input array
1791
* consists in eight 32-bit words; 32 S-box instances are computed in
1792
* parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
1793
* are spread over the words 0 to 7, at the same rank.
1794
*/
1795
void br_aes_ct_bitslice_Sbox(uint32_t *q);
1796
1797
/*
1798
* Like br_aes_bitslice_Sbox(), but for the inverse S-box.
1799
*/
1800
void br_aes_ct_bitslice_invSbox(uint32_t *q);
1801
1802
/*
1803
* Compute AES encryption on bitsliced data. Since input is stored on
1804
* eight 32-bit words, two block encryptions are actually performed
1805
* in parallel.
1806
*/
1807
void br_aes_ct_bitslice_encrypt(unsigned num_rounds,
1808
const uint32_t *skey, uint32_t *q);
1809
1810
/*
1811
* Compute AES decryption on bitsliced data. Since input is stored on
1812
* eight 32-bit words, two block decryptions are actually performed
1813
* in parallel.
1814
*/
1815
void br_aes_ct_bitslice_decrypt(unsigned num_rounds,
1816
const uint32_t *skey, uint32_t *q);
1817
1818
/*
1819
* AES key schedule, constant-time version. skey[] is filled with n+1
1820
* 128-bit subkeys, where n is the number of rounds (10 to 14, depending
1821
* on key size). The number of rounds is returned. If the key size is
1822
* invalid (not 16, 24 or 32), then 0 is returned.
1823
*/
1824
unsigned br_aes_ct_keysched(uint32_t *comp_skey,
1825
const void *key, size_t key_len);
1826
1827
/*
1828
* Expand AES subkeys as produced by br_aes_ct_keysched(), into
1829
* a larger array suitable for br_aes_ct_bitslice_encrypt() and
1830
* br_aes_ct_bitslice_decrypt().
1831
*/
1832
void br_aes_ct_skey_expand(uint32_t *skey,
1833
unsigned num_rounds, const uint32_t *comp_skey);
1834
1835
/*
1836
* For the ct64 implementation, the same bitslicing technique is used,
1837
* but four instances are interleaved. First instance uses bits 0, 4,
1838
* 8, 12,... of each word; second instance uses bits 1, 5, 9, 13,...
1839
* and so on.
1840
*/
1841
1842
/*
1843
* Perform bytewise orthogonalization of eight 64-bit words. Bytes
1844
* of q0..q7 are spread over all words: for a byte x that occurs
1845
* at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
1846
* of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
1847
*
1848
* This operation is an involution.
1849
*/
1850
void br_aes_ct64_ortho(uint64_t *q);
1851
1852
/*
1853
* Interleave bytes for an AES input block. If input bytes are
1854
* denoted 0123456789ABCDEF, and have been decoded with little-endian
1855
* convention (w[0] contains 0123, with '3' being most significant;
1856
* w[1] contains 4567, and so on), then output word q0 will be
1857
* set to 08192A3B (again little-endian convention) and q1 will
1858
* be set to 4C5D6E7F.
1859
*/
1860
void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w);
1861
1862
/*
1863
* Perform the opposite of br_aes_ct64_interleave_in().
1864
*/
1865
void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1);
1866
1867
/*
1868
* The AES S-box, as a bitsliced constant-time version. The input array
1869
* consists in eight 64-bit words; 64 S-box instances are computed in
1870
* parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
1871
* are spread over the words 0 to 7, at the same rank.
1872
*/
1873
void br_aes_ct64_bitslice_Sbox(uint64_t *q);
1874
1875
/*
1876
* Like br_aes_bitslice_Sbox(), but for the inverse S-box.
1877
*/
1878
void br_aes_ct64_bitslice_invSbox(uint64_t *q);
1879
1880
/*
1881
* Compute AES encryption on bitsliced data. Since input is stored on
1882
* eight 64-bit words, four block encryptions are actually performed
1883
* in parallel.
1884
*/
1885
void br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
1886
const uint64_t *skey, uint64_t *q);
1887
1888
/*
1889
* Compute AES decryption on bitsliced data. Since input is stored on
1890
* eight 64-bit words, four block decryptions are actually performed
1891
* in parallel.
1892
*/
1893
void br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
1894
const uint64_t *skey, uint64_t *q);
1895
1896
/*
1897
* AES key schedule, constant-time version. skey[] is filled with n+1
1898
* 128-bit subkeys, where n is the number of rounds (10 to 14, depending
1899
* on key size). The number of rounds is returned. If the key size is
1900
* invalid (not 16, 24 or 32), then 0 is returned.
1901
*/
1902
unsigned br_aes_ct64_keysched(uint64_t *comp_skey,
1903
const void *key, size_t key_len);
1904
1905
/*
1906
* Expand AES subkeys as produced by br_aes_ct64_keysched(), into
1907
* a larger array suitable for br_aes_ct64_bitslice_encrypt() and
1908
* br_aes_ct64_bitslice_decrypt().
1909
*/
1910
void br_aes_ct64_skey_expand(uint64_t *skey,
1911
unsigned num_rounds, const uint64_t *comp_skey);
1912
1913
/*
1914
* Test support for AES-NI opcodes.
1915
*/
1916
int br_aes_x86ni_supported(void);
1917
1918
/*
1919
* AES key schedule, using x86 AES-NI instructions. This yields the
1920
* subkeys in the encryption direction. Number of rounds is returned.
1921
* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
1922
*/
1923
unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
1924
const void *key, size_t len);
1925
1926
/*
1927
* AES key schedule, using x86 AES-NI instructions. This yields the
1928
* subkeys in the decryption direction. Number of rounds is returned.
1929
* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
1930
*/
1931
unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
1932
const void *key, size_t len);
1933
1934
/*
1935
* Test support for AES POWER8 opcodes.
1936
*/
1937
int br_aes_pwr8_supported(void);
1938
1939
/*
1940
* AES key schedule, using POWER8 instructions. This yields the
1941
* subkeys in the encryption direction. Number of rounds is returned.
1942
* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
1943
*/
1944
unsigned br_aes_pwr8_keysched(unsigned char *skni,
1945
const void *key, size_t len);
1946
1947
/* ==================================================================== */
1948
/*
1949
* RSA.
1950
*/
1951
1952
/*
1953
* Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is
1954
* the encoded hash function OID, or NULL.
1955
*/
1956
uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
1957
const unsigned char *hash, size_t hash_len,
1958
uint32_t n_bitlen, unsigned char *x);
1959
1960
/*
1961
* Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded
1962
* hash function OID, or NULL. The provided 'sig' value is _after_ the
1963
* modular exponentiation, i.e. it should be the padded hash. On
1964
* success, the hashed message is extracted.
1965
*/
1966
uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
1967
const unsigned char *hash_oid, size_t hash_len,
1968
unsigned char *hash_out);
1969
1970
/*
1971
* Apply proper PSS padding. The 'x' buffer is output only: it
1972
* receives the value that is to be exponentiated.
1973
*/
1974
uint32_t br_rsa_pss_sig_pad(const br_prng_class **rng,
1975
const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
1976
const unsigned char *hash, size_t salt_len,
1977
uint32_t n_bitlen, unsigned char *x);
1978
1979
/*
1980
* Check PSS padding. The provided value is the one _after_
1981
* the modular exponentiation; it is modified by this function.
1982
* This function infers the signature length from the public key
1983
* size, i.e. it assumes that this has already been verified (as
1984
* part of the exponentiation).
1985
*/
1986
uint32_t br_rsa_pss_sig_unpad(
1987
const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
1988
const unsigned char *hash, size_t salt_len,
1989
const br_rsa_public_key *pk, unsigned char *x);
1990
1991
/*
1992
* Apply OAEP padding. Returned value is the actual padded string length,
1993
* or zero on error.
1994
*/
1995
size_t br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,
1996
const void *label, size_t label_len, const br_rsa_public_key *pk,
1997
void *dst, size_t dst_nax_len, const void *src, size_t src_len);
1998
1999
/*
2000
* Unravel and check OAEP padding. If the padding is correct, then 1 is
2001
* returned, '*len' is adjusted to the length of the message, and the
2002
* data is moved to the start of the 'data' buffer. If the padding is
2003
* incorrect, then 0 is returned and '*len' is untouched. Either way,
2004
* the complete buffer contents are altered.
2005
*/
2006
uint32_t br_rsa_oaep_unpad(const br_hash_class *dig,
2007
const void *label, size_t label_len, void *data, size_t *len);
2008
2009
/*
2010
* Compute MGF1 for a given seed, and XOR the output into the provided
2011
* buffer.
2012
*/
2013
void br_mgf1_xor(void *data, size_t len,
2014
const br_hash_class *dig, const void *seed, size_t seed_len);
2015
2016
/*
2017
* Inner function for RSA key generation; used by the "i31" and "i62"
2018
* implementations.
2019
*/
2020
uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng,
2021
br_rsa_private_key *sk, void *kbuf_priv,
2022
br_rsa_public_key *pk, void *kbuf_pub,
2023
unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31);
2024
2025
/* ==================================================================== */
2026
/*
2027
* Elliptic curves.
2028
*/
2029
2030
/*
2031
* Type for generic EC parameters: curve order (unsigned big-endian
2032
* encoding) and encoded conventional generator.
2033
*/
2034
typedef struct {
2035
int curve;
2036
const unsigned char *order;
2037
size_t order_len;
2038
const unsigned char *generator;
2039
size_t generator_len;
2040
} br_ec_curve_def;
2041
2042
extern const br_ec_curve_def br_secp256r1;
2043
extern const br_ec_curve_def br_secp384r1;
2044
extern const br_ec_curve_def br_secp521r1;
2045
2046
/*
2047
* For Curve25519, the advertised "order" really is 2^255-1, since the
2048
* point multipliction function really works over arbitrary 255-bit
2049
* scalars. This value is only meant as a hint for ECDH key generation;
2050
* only ECDSA uses the exact curve order, and ECDSA is not used with
2051
* that specific curve.
2052
*/
2053
extern const br_ec_curve_def br_curve25519;
2054
2055
/*
2056
* Decode some bytes as an i31 integer, with truncation (corresponding
2057
* to the 'bits2int' operation in RFC 6979). The target ENCODED bit
2058
* length is provided as last parameter. The resulting value will have
2059
* this declared bit length, and consists the big-endian unsigned decoding
2060
* of exactly that many bits in the source (capped at the source length).
2061
*/
2062
void br_ecdsa_i31_bits2int(uint32_t *x,
2063
const void *src, size_t len, uint32_t ebitlen);
2064
2065
/*
2066
* Decode some bytes as an i15 integer, with truncation (corresponding
2067
* to the 'bits2int' operation in RFC 6979). The target ENCODED bit
2068
* length is provided as last parameter. The resulting value will have
2069
* this declared bit length, and consists the big-endian unsigned decoding
2070
* of exactly that many bits in the source (capped at the source length).
2071
*/
2072
void br_ecdsa_i15_bits2int(uint16_t *x,
2073
const void *src, size_t len, uint32_t ebitlen);
2074
2075
/* ==================================================================== */
2076
/*
2077
* ASN.1 support functions.
2078
*/
2079
2080
/*
2081
* A br_asn1_uint structure contains encoding information about an
2082
* INTEGER nonnegative value: pointer to the integer contents (unsigned
2083
* big-endian representation), length of the integer contents,
2084
* and length of the encoded value. The data shall have minimal length:
2085
* - If the integer value is zero, then 'len' must be zero.
2086
* - If the integer value is not zero, then data[0] must be non-zero.
2087
*
2088
* Under these conditions, 'asn1len' is necessarily equal to either len
2089
* or len+1.
2090
*/
2091
typedef struct {
2092
const unsigned char *data;
2093
size_t len;
2094
size_t asn1len;
2095
} br_asn1_uint;
2096
2097
/*
2098
* Given an encoded integer (unsigned big-endian, with possible leading
2099
* bytes of value 0), returned the "prepared INTEGER" structure.
2100
*/
2101
br_asn1_uint br_asn1_uint_prepare(const void *xdata, size_t xlen);
2102
2103
/*
2104
* Encode an ASN.1 length. The length of the encoded length is returned.
2105
* If 'dest' is NULL, then no encoding is performed, but the length of
2106
* the encoded length is still computed and returned.
2107
*/
2108
size_t br_asn1_encode_length(void *dest, size_t len);
2109
2110
/*
2111
* Convenient macro for computing lengths of lengths.
2112
*/
2113
#define len_of_len(len) br_asn1_encode_length(NULL, len)
2114
2115
/*
2116
* Encode a (prepared) ASN.1 INTEGER. The encoded length is returned.
2117
* If 'dest' is NULL, then no encoding is performed, but the length of
2118
* the encoded integer is still computed and returned.
2119
*/
2120
size_t br_asn1_encode_uint(void *dest, br_asn1_uint pp);
2121
2122
/*
2123
* Get the OID that identifies an elliptic curve. Returned value is
2124
* the DER-encoded OID, with the length (always one byte) but without
2125
* the tag. Thus, the first byte of the returned buffer contains the
2126
* number of subsequent bytes in the value. If the curve is not
2127
* recognised, NULL is returned.
2128
*/
2129
const unsigned char *br_get_curve_OID(int curve);
2130
2131
/*
2132
* Inner function for EC private key encoding. This is equivalent to
2133
* the API function br_encode_ec_raw_der(), except for an extra
2134
* parameter: if 'include_curve_oid' is zero, then the curve OID is
2135
* _not_ included in the output blob (this is for PKCS#8 support).
2136
*/
2137
size_t br_encode_ec_raw_der_inner(void *dest,
2138
const br_ec_private_key *sk, const br_ec_public_key *pk,
2139
int include_curve_oid);
2140
2141
/* ==================================================================== */
2142
/*
2143
* SSL/TLS support functions.
2144
*/
2145
2146
/*
2147
* Record types.
2148
*/
2149
#define BR_SSL_CHANGE_CIPHER_SPEC 20
2150
#define BR_SSL_ALERT 21
2151
#define BR_SSL_HANDSHAKE 22
2152
#define BR_SSL_APPLICATION_DATA 23
2153
2154
/*
2155
* Handshake message types.
2156
*/
2157
#define BR_SSL_HELLO_REQUEST 0
2158
#define BR_SSL_CLIENT_HELLO 1
2159
#define BR_SSL_SERVER_HELLO 2
2160
#define BR_SSL_CERTIFICATE 11
2161
#define BR_SSL_SERVER_KEY_EXCHANGE 12
2162
#define BR_SSL_CERTIFICATE_REQUEST 13
2163
#define BR_SSL_SERVER_HELLO_DONE 14
2164
#define BR_SSL_CERTIFICATE_VERIFY 15
2165
#define BR_SSL_CLIENT_KEY_EXCHANGE 16
2166
#define BR_SSL_FINISHED 20
2167
2168
/*
2169
* Alert levels.
2170
*/
2171
#define BR_LEVEL_WARNING 1
2172
#define BR_LEVEL_FATAL 2
2173
2174
/*
2175
* Low-level I/O state.
2176
*/
2177
#define BR_IO_FAILED 0
2178
#define BR_IO_IN 1
2179
#define BR_IO_OUT 2
2180
#define BR_IO_INOUT 3
2181
2182
/*
2183
* Mark a SSL engine as failed. The provided error code is recorded if
2184
* the engine was not already marked as failed. If 'err' is 0, then the
2185
* engine is marked as closed (without error).
2186
*/
2187
void br_ssl_engine_fail(br_ssl_engine_context *cc, int err);
2188
2189
/*
2190
* Test whether the engine is closed (normally or as a failure).
2191
*/
2192
static inline int
2193
br_ssl_engine_closed(const br_ssl_engine_context *cc)
2194
{
2195
return cc->iomode == BR_IO_FAILED;
2196
}
2197
2198
/*
2199
* Configure a new maximum fragment length. If possible, the maximum
2200
* length for outgoing records is immediately adjusted (if there are
2201
* not already too many buffered bytes for that).
2202
*/
2203
void br_ssl_engine_new_max_frag_len(
2204
br_ssl_engine_context *rc, unsigned max_frag_len);
2205
2206
/*
2207
* Test whether the current incoming record has been fully received
2208
* or not. This functions returns 0 only if a complete record header
2209
* has been received, but some of the (possibly encrypted) payload
2210
* has not yet been obtained.
2211
*/
2212
int br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc);
2213
2214
/*
2215
* Flush the current record (if not empty). This is meant to be called
2216
* from the handshake processor only.
2217
*/
2218
void br_ssl_engine_flush_record(br_ssl_engine_context *cc);
2219
2220
/*
2221
* Test whether there is some accumulated payload to send.
2222
*/
2223
static inline int
2224
br_ssl_engine_has_pld_to_send(const br_ssl_engine_context *rc)
2225
{
2226
return rc->oxa != rc->oxb && rc->oxa != rc->oxc;
2227
}
2228
2229
/*
2230
* Initialize RNG in engine. Returned value is 1 on success, 0 on error.
2231
* This function will try to use the OS-provided RNG, if available. If
2232
* there is no OS-provided RNG, or if it failed, and no entropy was
2233
* injected by the caller, then a failure will be reported. On error,
2234
* the context error code is set.
2235
*/
2236
int br_ssl_engine_init_rand(br_ssl_engine_context *cc);
2237
2238
/*
2239
* Reset the handshake-related parts of the engine.
2240
*/
2241
void br_ssl_engine_hs_reset(br_ssl_engine_context *cc,
2242
void (*hsinit)(void *), void (*hsrun)(void *));
2243
2244
/*
2245
* Get the PRF to use for this context, for the provided PRF hash
2246
* function ID.
2247
*/
2248
br_tls_prf_impl br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id);
2249
2250
/*
2251
* Consume the provided pre-master secret and compute the corresponding
2252
* master secret. The 'prf_id' is the ID of the hash function to use
2253
* with the TLS 1.2 PRF (ignored if the version is TLS 1.0 or 1.1).
2254
*/
2255
void br_ssl_engine_compute_master(br_ssl_engine_context *cc,
2256
int prf_id, const void *pms, size_t len);
2257
2258
/*
2259
* Switch to CBC decryption for incoming records.
2260
* cc the engine context
2261
* is_client non-zero for a client, zero for a server
2262
* prf_id id of hash function for PRF (ignored if not TLS 1.2+)
2263
* mac_id id of hash function for HMAC
2264
* bc_impl block cipher implementation (CBC decryption)
2265
* cipher_key_len block cipher key length (in bytes)
2266
*/
2267
void br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,
2268
int is_client, int prf_id, int mac_id,
2269
const br_block_cbcdec_class *bc_impl, size_t cipher_key_len);
2270
2271
/*
2272
* Switch to CBC encryption for outgoing records.
2273
* cc the engine context
2274
* is_client non-zero for a client, zero for a server
2275
* prf_id id of hash function for PRF (ignored if not TLS 1.2+)
2276
* mac_id id of hash function for HMAC
2277
* bc_impl block cipher implementation (CBC encryption)
2278
* cipher_key_len block cipher key length (in bytes)
2279
*/
2280
void br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,
2281
int is_client, int prf_id, int mac_id,
2282
const br_block_cbcenc_class *bc_impl, size_t cipher_key_len);
2283
2284
/*
2285
* Switch to GCM decryption for incoming records.
2286
* cc the engine context
2287
* is_client non-zero for a client, zero for a server
2288
* prf_id id of hash function for PRF
2289
* bc_impl block cipher implementation (CTR)
2290
* cipher_key_len block cipher key length (in bytes)
2291
*/
2292
void br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,
2293
int is_client, int prf_id,
2294
const br_block_ctr_class *bc_impl, size_t cipher_key_len);
2295
2296
/*
2297
* Switch to GCM encryption for outgoing records.
2298
* cc the engine context
2299
* is_client non-zero for a client, zero for a server
2300
* prf_id id of hash function for PRF
2301
* bc_impl block cipher implementation (CTR)
2302
* cipher_key_len block cipher key length (in bytes)
2303
*/
2304
void br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,
2305
int is_client, int prf_id,
2306
const br_block_ctr_class *bc_impl, size_t cipher_key_len);
2307
2308
/*
2309
* Switch to ChaCha20+Poly1305 decryption for incoming records.
2310
* cc the engine context
2311
* is_client non-zero for a client, zero for a server
2312
* prf_id id of hash function for PRF
2313
*/
2314
void br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,
2315
int is_client, int prf_id);
2316
2317
/*
2318
* Switch to ChaCha20+Poly1305 encryption for outgoing records.
2319
* cc the engine context
2320
* is_client non-zero for a client, zero for a server
2321
* prf_id id of hash function for PRF
2322
*/
2323
void br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,
2324
int is_client, int prf_id);
2325
2326
/*
2327
* Switch to CCM decryption for incoming records.
2328
* cc the engine context
2329
* is_client non-zero for a client, zero for a server
2330
* prf_id id of hash function for PRF
2331
* bc_impl block cipher implementation (CTR+CBC)
2332
* cipher_key_len block cipher key length (in bytes)
2333
* tag_len tag length (in bytes)
2334
*/
2335
void br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,
2336
int is_client, int prf_id,
2337
const br_block_ctrcbc_class *bc_impl,
2338
size_t cipher_key_len, size_t tag_len);
2339
2340
/*
2341
* Switch to GCM encryption for outgoing records.
2342
* cc the engine context
2343
* is_client non-zero for a client, zero for a server
2344
* prf_id id of hash function for PRF
2345
* bc_impl block cipher implementation (CTR+CBC)
2346
* cipher_key_len block cipher key length (in bytes)
2347
* tag_len tag length (in bytes)
2348
*/
2349
void br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,
2350
int is_client, int prf_id,
2351
const br_block_ctrcbc_class *bc_impl,
2352
size_t cipher_key_len, size_t tag_len);
2353
2354
/*
2355
* Calls to T0-generated code.
2356
*/
2357
void br_ssl_hs_client_init_main(void *ctx);
2358
void br_ssl_hs_client_run(void *ctx);
2359
void br_ssl_hs_server_init_main(void *ctx);
2360
void br_ssl_hs_server_run(void *ctx);
2361
2362
/*
2363
* Get the hash function to use for signatures, given a bit mask of
2364
* supported hash functions. This implements a strict choice order
2365
* (namely SHA-256, SHA-384, SHA-512, SHA-224, SHA-1). If the mask
2366
* does not document support of any of these hash functions, then this
2367
* functions returns 0.
2368
*/
2369
int br_ssl_choose_hash(unsigned bf);
2370
2371
/* ==================================================================== */
2372
2373
/*
2374
* PowerPC / POWER assembly stuff. The special BR_POWER_ASM_MACROS macro
2375
* must be defined before including this file; this is done by source
2376
* files that use some inline assembly for PowerPC / POWER machines.
2377
*/
2378
2379
#if BR_POWER_ASM_MACROS
2380
2381
#define lxvw4x(xt, ra, rb) lxvw4x_(xt, ra, rb)
2382
#define stxvw4x(xt, ra, rb) stxvw4x_(xt, ra, rb)
2383
2384
#define bdnz(foo) bdnz_(foo)
2385
#define bdz(foo) bdz_(foo)
2386
#define beq(foo) beq_(foo)
2387
2388
#define li(rx, value) li_(rx, value)
2389
#define addi(rx, ra, imm) addi_(rx, ra, imm)
2390
#define cmpldi(rx, imm) cmpldi_(rx, imm)
2391
#define mtctr(rx) mtctr_(rx)
2392
#define vspltb(vrt, vrb, uim) vspltb_(vrt, vrb, uim)
2393
#define vspltw(vrt, vrb, uim) vspltw_(vrt, vrb, uim)
2394
#define vspltisb(vrt, imm) vspltisb_(vrt, imm)
2395
#define vspltisw(vrt, imm) vspltisw_(vrt, imm)
2396
#define vrlw(vrt, vra, vrb) vrlw_(vrt, vra, vrb)
2397
#define vsbox(vrt, vra) vsbox_(vrt, vra)
2398
#define vxor(vrt, vra, vrb) vxor_(vrt, vra, vrb)
2399
#define vand(vrt, vra, vrb) vand_(vrt, vra, vrb)
2400
#define vsro(vrt, vra, vrb) vsro_(vrt, vra, vrb)
2401
#define vsl(vrt, vra, vrb) vsl_(vrt, vra, vrb)
2402
#define vsldoi(vt, va, vb, sh) vsldoi_(vt, va, vb, sh)
2403
#define vsr(vrt, vra, vrb) vsr_(vrt, vra, vrb)
2404
#define vaddcuw(vrt, vra, vrb) vaddcuw_(vrt, vra, vrb)
2405
#define vadduwm(vrt, vra, vrb) vadduwm_(vrt, vra, vrb)
2406
#define vsububm(vrt, vra, vrb) vsububm_(vrt, vra, vrb)
2407
#define vsubuwm(vrt, vra, vrb) vsubuwm_(vrt, vra, vrb)
2408
#define vsrw(vrt, vra, vrb) vsrw_(vrt, vra, vrb)
2409
#define vcipher(vt, va, vb) vcipher_(vt, va, vb)
2410
#define vcipherlast(vt, va, vb) vcipherlast_(vt, va, vb)
2411
#define vncipher(vt, va, vb) vncipher_(vt, va, vb)
2412
#define vncipherlast(vt, va, vb) vncipherlast_(vt, va, vb)
2413
#define vperm(vt, va, vb, vc) vperm_(vt, va, vb, vc)
2414
#define vpmsumd(vt, va, vb) vpmsumd_(vt, va, vb)
2415
#define xxpermdi(vt, va, vb, d) xxpermdi_(vt, va, vb, d)
2416
2417
#define lxvw4x_(xt, ra, rb) "\tlxvw4x\t" #xt "," #ra "," #rb "\n"
2418
#define stxvw4x_(xt, ra, rb) "\tstxvw4x\t" #xt "," #ra "," #rb "\n"
2419
2420
#define label(foo) #foo "%=:\n"
2421
#define bdnz_(foo) "\tbdnz\t" #foo "%=\n"
2422
#define bdz_(foo) "\tbdz\t" #foo "%=\n"
2423
#define beq_(foo) "\tbeq\t" #foo "%=\n"
2424
2425
#define li_(rx, value) "\tli\t" #rx "," #value "\n"
2426
#define addi_(rx, ra, imm) "\taddi\t" #rx "," #ra "," #imm "\n"
2427
#define cmpldi_(rx, imm) "\tcmpldi\t" #rx "," #imm "\n"
2428
#define mtctr_(rx) "\tmtctr\t" #rx "\n"
2429
#define vspltb_(vrt, vrb, uim) "\tvspltb\t" #vrt "," #vrb "," #uim "\n"
2430
#define vspltw_(vrt, vrb, uim) "\tvspltw\t" #vrt "," #vrb "," #uim "\n"
2431
#define vspltisb_(vrt, imm) "\tvspltisb\t" #vrt "," #imm "\n"
2432
#define vspltisw_(vrt, imm) "\tvspltisw\t" #vrt "," #imm "\n"
2433
#define vrlw_(vrt, vra, vrb) "\tvrlw\t" #vrt "," #vra "," #vrb "\n"
2434
#define vsbox_(vrt, vra) "\tvsbox\t" #vrt "," #vra "\n"
2435
#define vxor_(vrt, vra, vrb) "\tvxor\t" #vrt "," #vra "," #vrb "\n"
2436
#define vand_(vrt, vra, vrb) "\tvand\t" #vrt "," #vra "," #vrb "\n"
2437
#define vsro_(vrt, vra, vrb) "\tvsro\t" #vrt "," #vra "," #vrb "\n"
2438
#define vsl_(vrt, vra, vrb) "\tvsl\t" #vrt "," #vra "," #vrb "\n"
2439
#define vsldoi_(vt, va, vb, sh) "\tvsldoi\t" #vt "," #va "," #vb "," #sh "\n"
2440
#define vsr_(vrt, vra, vrb) "\tvsr\t" #vrt "," #vra "," #vrb "\n"
2441
#define vaddcuw_(vrt, vra, vrb) "\tvaddcuw\t" #vrt "," #vra "," #vrb "\n"
2442
#define vadduwm_(vrt, vra, vrb) "\tvadduwm\t" #vrt "," #vra "," #vrb "\n"
2443
#define vsububm_(vrt, vra, vrb) "\tvsububm\t" #vrt "," #vra "," #vrb "\n"
2444
#define vsubuwm_(vrt, vra, vrb) "\tvsubuwm\t" #vrt "," #vra "," #vrb "\n"
2445
#define vsrw_(vrt, vra, vrb) "\tvsrw\t" #vrt "," #vra "," #vrb "\n"
2446
#define vcipher_(vt, va, vb) "\tvcipher\t" #vt "," #va "," #vb "\n"
2447
#define vcipherlast_(vt, va, vb) "\tvcipherlast\t" #vt "," #va "," #vb "\n"
2448
#define vncipher_(vt, va, vb) "\tvncipher\t" #vt "," #va "," #vb "\n"
2449
#define vncipherlast_(vt, va, vb) "\tvncipherlast\t" #vt "," #va "," #vb "\n"
2450
#define vperm_(vt, va, vb, vc) "\tvperm\t" #vt "," #va "," #vb "," #vc "\n"
2451
#define vpmsumd_(vt, va, vb) "\tvpmsumd\t" #vt "," #va "," #vb "\n"
2452
#define xxpermdi_(vt, va, vb, d) "\txxpermdi\t" #vt "," #va "," #vb "," #d "\n"
2453
2454
#endif
2455
2456
/* ==================================================================== */
2457
/*
2458
* Special "activate intrinsics" code, needed for some compiler versions.
2459
* This is defined at the end of this file, so that it won't impact any
2460
* of the inline functions defined previously; and it is controlled by
2461
* a specific macro defined in the caller code.
2462
*
2463
* Calling code conventions:
2464
*
2465
* - Caller must define BR_ENABLE_INTRINSICS before including "inner.h".
2466
* - Functions that use intrinsics must be enclosed in an "enabled"
2467
* region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN).
2468
* - Functions that use intrinsics must be tagged with the appropriate
2469
* BR_TARGET().
2470
*/
2471
2472
#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
2473
2474
/*
2475
* x86 intrinsics (both 32-bit and 64-bit).
2476
*/
2477
#if BR_i386 || BR_amd64
2478
2479
/*
2480
* On GCC before version 5.0, we need to use the pragma to enable the
2481
* target options globally, because the 'target' function attribute
2482
* appears to be unreliable. Before 4.6 we must also avoid the
2483
* push_options / pop_options mechanism, because it tends to trigger
2484
* some internal compiler errors.
2485
*/
2486
#if BR_GCC && !BR_GCC_5_0
2487
#if BR_GCC_4_6
2488
#define BR_TARGETS_X86_UP \
2489
_Pragma("GCC push_options") \
2490
_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")")
2491
#define BR_TARGETS_X86_DOWN \
2492
_Pragma("GCC pop_options")
2493
#else
2494
#define BR_TARGETS_X86_UP \
2495
_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")")
2496
#define BR_TARGETS_X86_DOWN
2497
#endif
2498
#pragma GCC diagnostic ignored "-Wpsabi"
2499
#endif
2500
2501
#if BR_CLANG && !BR_CLANG_3_8
2502
#undef __SSE2__
2503
#undef __SSE3__
2504
#undef __SSSE3__
2505
#undef __SSE4_1__
2506
#undef __AES__
2507
#undef __PCLMUL__
2508
#undef __RDRND__
2509
#define __SSE2__ 1
2510
#define __SSE3__ 1
2511
#define __SSSE3__ 1
2512
#define __SSE4_1__ 1
2513
#define __AES__ 1
2514
#define __PCLMUL__ 1
2515
#define __RDRND__ 1
2516
#endif
2517
2518
#ifndef BR_TARGETS_X86_UP
2519
#define BR_TARGETS_X86_UP
2520
#endif
2521
#ifndef BR_TARGETS_X86_DOWN
2522
#define BR_TARGETS_X86_DOWN
2523
#endif
2524
2525
#if BR_GCC || BR_CLANG
2526
BR_TARGETS_X86_UP
2527
#include <x86intrin.h>
2528
#include <cpuid.h>
2529
#define br_bswap32 __builtin_bswap32
2530
BR_TARGETS_X86_DOWN
2531
#endif
2532
2533
#if BR_MSC
2534
#include <stdlib.h>
2535
#include <intrin.h>
2536
#include <immintrin.h>
2537
#define br_bswap32 _byteswap_ulong
2538
#endif
2539
2540
static inline int
2541
br_cpuid(uint32_t mask_eax, uint32_t mask_ebx,
2542
uint32_t mask_ecx, uint32_t mask_edx)
2543
{
2544
#if BR_GCC || BR_CLANG
2545
unsigned eax, ebx, ecx, edx;
2546
2547
if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
2548
if ((eax & mask_eax) == mask_eax
2549
&& (ebx & mask_ebx) == mask_ebx
2550
&& (ecx & mask_ecx) == mask_ecx
2551
&& (edx & mask_edx) == mask_edx)
2552
{
2553
return 1;
2554
}
2555
}
2556
#elif BR_MSC
2557
int info[4];
2558
2559
__cpuid(info, 1);
2560
if (((uint32_t)info[0] & mask_eax) == mask_eax
2561
&& ((uint32_t)info[1] & mask_ebx) == mask_ebx
2562
&& ((uint32_t)info[2] & mask_ecx) == mask_ecx
2563
&& ((uint32_t)info[3] & mask_edx) == mask_edx)
2564
{
2565
return 1;
2566
}
2567
#endif
2568
return 0;
2569
}
2570
2571
#endif
2572
2573
#endif
2574
2575
/* ==================================================================== */
2576
2577
#endif
2578
2579