/*1* Copyright (c) 2016 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#ifndef INNER_H__25#define INNER_H__2627#include <string.h>28#include <limits.h>2930#include "config.h"31#include "bearssl.h"3233/*34* On MSVC, disable the warning about applying unary minus on an35* unsigned type: it is standard, we do it all the time, and for36* good reasons.37*/38#if _MSC_VER39#pragma warning( disable : 4146 )40#endif4142/*43* Maximum size for a RSA modulus (in bits). Allocated stack buffers44* depend on that size, so this value should be kept small. Currently,45* 2048-bit RSA keys offer adequate security, and should still do so for46* the next few decades; however, a number of widespread PKI have47* already set their root keys to RSA-4096, so we should be able to48* process such keys.49*50* This value MUST be a multiple of 64. This value MUST NOT exceed 4766651* (some computations in RSA key generation rely on the factor size being52* no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a53* lot of sense anyway.54*/55#define BR_MAX_RSA_SIZE 40965657/*58* Minimum size for a RSA modulus (in bits); this value is used only to59* filter out invalid parameters for key pair generation. Normally,60* applications should not use RSA keys smaller than 2048 bits; but some61* specific cases might need shorter keys, for legacy or research62* purposes.63*/64#define BR_MIN_RSA_SIZE 5126566/*67* Maximum size for a RSA factor (in bits). This is for RSA private-key68* operations. Default is to support factors up to a bit more than half69* the maximum modulus size.70*71* This value MUST be a multiple of 32.72*/73#define BR_MAX_RSA_FACTOR ((BR_MAX_RSA_SIZE + 64) >> 1)7475/*76* Maximum size for an EC curve (modulus or order), in bits. Size of77* stack buffers depends on that parameter. This size MUST be a multiple78* of 8 (so that decoding an integer with that many bytes does not79* overflow).80*/81#define BR_MAX_EC_SIZE 5288283/*84* Some macros to recognize the current architecture. Right now, we are85* interested into automatically recognizing architecture with efficient86* 64-bit types so that we may automatically use implementations that87* use 64-bit registers in that case. Future versions may detect, e.g.,88* availability of SSE2 intrinsics.89*90* If 'unsigned long' is a 64-bit type, then we assume that 64-bit types91* are efficient. Otherwise, we rely on macros that depend on compiler,92* OS and architecture. In any case, failure to detect the architecture93* as 64-bit means that the 32-bit code will be used, and that code94* works also on 64-bit architectures (the 64-bit code may simply be95* more efficient).96*97* The test on 'unsigned long' should already catch most cases, the one98* notable exception being Windows code where 'unsigned long' is kept to99* 32-bit for compatibility with all the legacy code that liberally uses100* the 'DWORD' type for 32-bit values.101*102* Macro names are taken from: http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros103*/104#ifndef BR_64105#if ((ULONG_MAX >> 31) >> 31) == 3106#define BR_64 1107#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)108#define BR_64 1109#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \110|| defined(__64BIT__) || defined(_LP64) || defined(__LP64__)111#define BR_64 1112#elif defined(__sparc64__)113#define BR_64 1114#elif defined(__x86_64__) || defined(_M_X64)115#define BR_64 1116#elif defined(__aarch64__) || defined(_M_ARM64)117#define BR_64 1118#elif defined(__mips64)119#define BR_64 1120#endif121#endif122123/*124* Set BR_LOMUL on platforms where it makes sense.125*/126#ifndef BR_LOMUL127#if BR_ARMEL_CORTEXM_GCC128#define BR_LOMUL 1129#endif130#endif131132/*133* Architecture detection.134*/135#ifndef BR_i386136#if __i386__ || _M_IX86137#define BR_i386 1138#endif139#endif140141#ifndef BR_amd64142#if __x86_64__ || _M_X64143#define BR_amd64 1144#endif145#endif146147/*148* Compiler brand and version.149*150* Implementations that use intrinsics need to detect the compiler type151* and version because some specific actions may be needed to activate152* the corresponding opcodes, both for header inclusion, and when using153* them in a function.154*155* BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC,156* Clang and MS Visual C. For each of them, sub-macros will be defined157* for versions; each sub-macro is set whenever the compiler version is158* at least as recent as the one corresponding to the macro.159*/160161/*162* GCC thresholds are on versions 4.4 to 4.9 and 5.0.163*/164#ifndef BR_GCC165#if __GNUC__ && !__clang__166#define BR_GCC 1167168#if __GNUC__ > 4169#define BR_GCC_5_0 1170#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9171#define BR_GCC_4_9 1172#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8173#define BR_GCC_4_8 1174#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7175#define BR_GCC_4_7 1176#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6177#define BR_GCC_4_6 1178#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5179#define BR_GCC_4_5 1180#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4181#define BR_GCC_4_4 1182#endif183184#if BR_GCC_5_0185#define BR_GCC_4_9 1186#endif187#if BR_GCC_4_9188#define BR_GCC_4_8 1189#endif190#if BR_GCC_4_8191#define BR_GCC_4_7 1192#endif193#if BR_GCC_4_7194#define BR_GCC_4_6 1195#endif196#if BR_GCC_4_6197#define BR_GCC_4_5 1198#endif199#if BR_GCC_4_5200#define BR_GCC_4_4 1201#endif202203#endif204#endif205206/*207* Clang thresholds are on versions 3.7.0 and 3.8.0.208*/209#ifndef BR_CLANG210#if __clang__211#define BR_CLANG 1212213#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)214#define BR_CLANG_3_8 1215#elif __clang_major__ == 3 && __clang_minor__ >= 7216#define BR_CLANG_3_7 1217#endif218219#if BR_CLANG_3_8220#define BR_CLANG_3_7 1221#endif222223#endif224#endif225226/*227* MS Visual C thresholds are on Visual Studio 2005 to 2015.228*/229#ifndef BR_MSC230#if _MSC_VER231#define BR_MSC 1232233#if _MSC_VER >= 1900234#define BR_MSC_2015 1235#elif _MSC_VER >= 1800236#define BR_MSC_2013 1237#elif _MSC_VER >= 1700238#define BR_MSC_2012 1239#elif _MSC_VER >= 1600240#define BR_MSC_2010 1241#elif _MSC_VER >= 1500242#define BR_MSC_2008 1243#elif _MSC_VER >= 1400244#define BR_MSC_2005 1245#endif246247#if BR_MSC_2015248#define BR_MSC_2013 1249#endif250#if BR_MSC_2013251#define BR_MSC_2012 1252#endif253#if BR_MSC_2012254#define BR_MSC_2010 1255#endif256#if BR_MSC_2010257#define BR_MSC_2008 1258#endif259#if BR_MSC_2008260#define BR_MSC_2005 1261#endif262263#endif264#endif265266/*267* GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a268* 'target' attribute that activates support for specific opcodes.269*/270#if BR_GCC_4_4 || BR_CLANG_3_7271#define BR_TARGET(x) __attribute__((target(x)))272#else273#define BR_TARGET(x)274#endif275276/*277* AES-NI intrinsics are available on x86 (32-bit and 64-bit) with278* GCC 4.8+, Clang 3.7+ and MSC 2012+.279*/280#ifndef BR_AES_X86NI281#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012)282#define BR_AES_X86NI 1283#endif284#endif285286/*287* SSE2 intrinsics are available on x86 (32-bit and 64-bit) with288* GCC 4.4+, Clang 3.7+ and MSC 2005+.289*/290#ifndef BR_SSE2291#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)292#define BR_SSE2 1293#endif294#endif295296/*297* RDRAND intrinsics are available on x86 (32-bit and 64-bit) with298* GCC 4.6+, Clang 3.7+ and MSC 2012+.299*/300#ifndef BR_RDRAND301#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012)302#define BR_RDRAND 1303#endif304#endif305306/*307* Determine type of OS for random number generation. Macro names and308* values are documented on:309* https://sourceforge.net/p/predef/wiki/OperatingSystems/310*311* Win32's CryptGenRandom() should be available on Windows systems.312*313* /dev/urandom should work on all Unix-like systems (including macOS X).314*315* getentropy() is present on Linux (Glibc 2.25+), FreeBSD (12.0+) and316* OpenBSD (5.6+). For OpenBSD, there does not seem to be easy to use317* macros to test the minimum version, so we just assume that it is318* recent enough (last version without getentropy() has gone out of319* support in May 2015).320*321* Ideally we should use getentropy() on macOS (10.12+) too, but I don't322* know how to test the exact OS version with preprocessor macros.323*324* TODO: enrich the list of detected system.325*/326327#ifndef BR_USE_URANDOM328#if defined _AIX \329|| defined __ANDROID__ \330|| defined __FreeBSD__ \331|| defined __NetBSD__ \332|| defined __OpenBSD__ \333|| defined __DragonFly__ \334|| defined __linux__ \335|| (defined __sun && (defined __SVR4 || defined __svr4__)) \336|| (defined __APPLE__ && defined __MACH__)337#define BR_USE_URANDOM 1338#endif339#endif340341#ifndef BR_USE_GETENTROPY342#if (defined __linux__ \343&& (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \344|| (defined __FreeBSD__ && __FreeBSD__ >= 12) \345|| defined __OpenBSD__346#define BR_USE_GETENTROPY 1347#endif348#endif349350#ifndef BR_USE_WIN32_RAND351#if defined _WIN32 || defined _WIN64352#define BR_USE_WIN32_RAND 1353#endif354#endif355356/*357* POWER8 crypto support. We rely on compiler macros for the358* architecture, since we do not have a reliable, simple way to detect359* the required support at runtime (we could try running an opcode, and360* trapping the exception or signal on illegal instruction, but this361* induces some non-trivial OS dependencies that we would prefer to362* avoid if possible).363*/364#ifndef BR_POWER8365#if __GNUC__ && ((_ARCH_PWR8 || _ARCH_PPC) && __CRYPTO__)366#define BR_POWER8 1367#endif368#endif369370/*371* Detect endinanness on POWER8.372*/373#if BR_POWER8374#if defined BR_POWER8_LE375#undef BR_POWER8_BE376#if BR_POWER8_LE377#define BR_POWER8_BE 0378#else379#define BR_POWER8_BE 1380#endif381#elif defined BR_POWER8_BE382#undef BR_POWER8_LE383#if BR_POWER8_BE384#define BR_POWER8_LE 0385#else386#define BR_POWER8_LE 1387#endif388#else389#if __LITTLE_ENDIAN__390#define BR_POWER8_LE 1391#define BR_POWER8_BE 0392#else393#define BR_POWER8_LE 0394#define BR_POWER8_BE 1395#endif396#endif397#endif398399/*400* Detect support for 128-bit integers.401*/402#if !defined BR_INT128 && !defined BR_UMUL128403#ifdef __SIZEOF_INT128__404#define BR_INT128 1405#elif _M_X64406#define BR_UMUL128 1407#endif408#endif409410/*411* Detect support for unaligned accesses with known endianness.412*413* x86 (both 32-bit and 64-bit) is little-endian and allows unaligned414* accesses.415*416* POWER/PowerPC allows unaligned accesses when big-endian. POWER8 and417* later also allow unaligned accesses when little-endian.418*/419#if !defined BR_LE_UNALIGNED && !defined BR_BE_UNALIGNED420421#if __i386 || __i386__ || __x86_64__ || _M_IX86 || _M_X64422#define BR_LE_UNALIGNED 1423#elif BR_POWER8_BE424#define BR_BE_UNALIGNED 1425#elif BR_POWER8_LE426#define BR_LE_UNALIGNED 1427#elif (__powerpc__ || __powerpc64__ || _M_PPC || _ARCH_PPC || _ARCH_PPC64) \428&& __BIG_ENDIAN__429#define BR_BE_UNALIGNED 1430#endif431432#endif433434/*435* Detect support for an OS-provided time source.436*/437438#ifndef BR_USE_UNIX_TIME439#if defined __unix__ || defined __linux__ \440|| defined _POSIX_SOURCE || defined _POSIX_C_SOURCE \441|| (defined __APPLE__ && defined __MACH__)442#define BR_USE_UNIX_TIME 1443#endif444#endif445446#ifndef BR_USE_WIN32_TIME447#if defined _WIN32 || defined _WIN64448#define BR_USE_WIN32_TIME 1449#endif450#endif451452/* ==================================================================== */453/*454* Encoding/decoding functions.455*456* 32-bit and 64-bit decoding, both little-endian and big-endian, is457* implemented with the inline functions below.458*459* When allowed by some compile-time options (autodetected or provided),460* optimised code is used, to perform direct memory access when the461* underlying architecture supports it, both for endianness and462* alignment. This, however, may trigger strict aliasing issues; the463* code below uses unions to perform (supposedly) safe type punning.464* Since the C aliasing rules are relatively complex and were amended,465* or at least re-explained with different phrasing, in all successive466* versions of the C standard, it is always a bit risky to bet that any467* specific version of a C compiler got it right, for some notion of468* "right".469*/470471typedef union {472uint16_t u;473unsigned char b[sizeof(uint16_t)];474} br_union_u16;475476typedef union {477uint32_t u;478unsigned char b[sizeof(uint32_t)];479} br_union_u32;480481typedef union {482uint64_t u;483unsigned char b[sizeof(uint64_t)];484} br_union_u64;485486static inline void487br_enc16le(void *dst, unsigned x)488{489#if BR_LE_UNALIGNED490((br_union_u16 *)dst)->u = x;491#else492unsigned char *buf;493494buf = dst;495buf[0] = (unsigned char)x;496buf[1] = (unsigned char)(x >> 8);497#endif498}499500static inline void501br_enc16be(void *dst, unsigned x)502{503#if BR_BE_UNALIGNED504((br_union_u16 *)dst)->u = x;505#else506unsigned char *buf;507508buf = dst;509buf[0] = (unsigned char)(x >> 8);510buf[1] = (unsigned char)x;511#endif512}513514static inline unsigned515br_dec16le(const void *src)516{517#if BR_LE_UNALIGNED518return ((const br_union_u16 *)src)->u;519#else520const unsigned char *buf;521522buf = src;523return (unsigned)buf[0] | ((unsigned)buf[1] << 8);524#endif525}526527static inline unsigned528br_dec16be(const void *src)529{530#if BR_BE_UNALIGNED531return ((const br_union_u16 *)src)->u;532#else533const unsigned char *buf;534535buf = src;536return ((unsigned)buf[0] << 8) | (unsigned)buf[1];537#endif538}539540static inline void541br_enc32le(void *dst, uint32_t x)542{543#if BR_LE_UNALIGNED544((br_union_u32 *)dst)->u = x;545#else546unsigned char *buf;547548buf = dst;549buf[0] = (unsigned char)x;550buf[1] = (unsigned char)(x >> 8);551buf[2] = (unsigned char)(x >> 16);552buf[3] = (unsigned char)(x >> 24);553#endif554}555556static inline void557br_enc32be(void *dst, uint32_t x)558{559#if BR_BE_UNALIGNED560((br_union_u32 *)dst)->u = x;561#else562unsigned char *buf;563564buf = dst;565buf[0] = (unsigned char)(x >> 24);566buf[1] = (unsigned char)(x >> 16);567buf[2] = (unsigned char)(x >> 8);568buf[3] = (unsigned char)x;569#endif570}571572static inline uint32_t573br_dec32le(const void *src)574{575#if BR_LE_UNALIGNED576return ((const br_union_u32 *)src)->u;577#else578const unsigned char *buf;579580buf = src;581return (uint32_t)buf[0]582| ((uint32_t)buf[1] << 8)583| ((uint32_t)buf[2] << 16)584| ((uint32_t)buf[3] << 24);585#endif586}587588static inline uint32_t589br_dec32be(const void *src)590{591#if BR_BE_UNALIGNED592return ((const br_union_u32 *)src)->u;593#else594const unsigned char *buf;595596buf = src;597return ((uint32_t)buf[0] << 24)598| ((uint32_t)buf[1] << 16)599| ((uint32_t)buf[2] << 8)600| (uint32_t)buf[3];601#endif602}603604static inline void605br_enc64le(void *dst, uint64_t x)606{607#if BR_LE_UNALIGNED608((br_union_u64 *)dst)->u = x;609#else610unsigned char *buf;611612buf = dst;613br_enc32le(buf, (uint32_t)x);614br_enc32le(buf + 4, (uint32_t)(x >> 32));615#endif616}617618static inline void619br_enc64be(void *dst, uint64_t x)620{621#if BR_BE_UNALIGNED622((br_union_u64 *)dst)->u = x;623#else624unsigned char *buf;625626buf = dst;627br_enc32be(buf, (uint32_t)(x >> 32));628br_enc32be(buf + 4, (uint32_t)x);629#endif630}631632static inline uint64_t633br_dec64le(const void *src)634{635#if BR_LE_UNALIGNED636return ((const br_union_u64 *)src)->u;637#else638const unsigned char *buf;639640buf = src;641return (uint64_t)br_dec32le(buf)642| ((uint64_t)br_dec32le(buf + 4) << 32);643#endif644}645646static inline uint64_t647br_dec64be(const void *src)648{649#if BR_BE_UNALIGNED650return ((const br_union_u64 *)src)->u;651#else652const unsigned char *buf;653654buf = src;655return ((uint64_t)br_dec32be(buf) << 32)656| (uint64_t)br_dec32be(buf + 4);657#endif658}659660/*661* Range decoding and encoding (for several successive values).662*/663void br_range_dec16le(uint16_t *v, size_t num, const void *src);664void br_range_dec16be(uint16_t *v, size_t num, const void *src);665void br_range_enc16le(void *dst, const uint16_t *v, size_t num);666void br_range_enc16be(void *dst, const uint16_t *v, size_t num);667668void br_range_dec32le(uint32_t *v, size_t num, const void *src);669void br_range_dec32be(uint32_t *v, size_t num, const void *src);670void br_range_enc32le(void *dst, const uint32_t *v, size_t num);671void br_range_enc32be(void *dst, const uint32_t *v, size_t num);672673void br_range_dec64le(uint64_t *v, size_t num, const void *src);674void br_range_dec64be(uint64_t *v, size_t num, const void *src);675void br_range_enc64le(void *dst, const uint64_t *v, size_t num);676void br_range_enc64be(void *dst, const uint64_t *v, size_t num);677678/*679* Byte-swap a 32-bit integer.680*/681static inline uint32_t682br_swap32(uint32_t x)683{684x = ((x & (uint32_t)0x00FF00FF) << 8)685| ((x >> 8) & (uint32_t)0x00FF00FF);686return (x << 16) | (x >> 16);687}688689/* ==================================================================== */690/*691* Support code for hash functions.692*/693694/*695* IV for MD5, SHA-1, SHA-224 and SHA-256.696*/697extern const uint32_t br_md5_IV[];698extern const uint32_t br_sha1_IV[];699extern const uint32_t br_sha224_IV[];700extern const uint32_t br_sha256_IV[];701702/*703* Round functions for MD5, SHA-1, SHA-224 and SHA-256 (SHA-224 and704* SHA-256 use the same round function).705*/706void br_md5_round(const unsigned char *buf, uint32_t *val);707void br_sha1_round(const unsigned char *buf, uint32_t *val);708void br_sha2small_round(const unsigned char *buf, uint32_t *val);709710/*711* The core function for the TLS PRF. It computes712* P_hash(secret, label + seed), and XORs the result into the dst buffer.713*/714void br_tls_phash(void *dst, size_t len,715const br_hash_class *dig,716const void *secret, size_t secret_len, const char *label,717size_t seed_num, const br_tls_prf_seed_chunk *seed);718719/*720* Copy all configured hash implementations from a multihash context721* to another.722*/723static inline void724br_multihash_copyimpl(br_multihash_context *dst,725const br_multihash_context *src)726{727memcpy((void *)dst->impl, src->impl, sizeof src->impl);728}729730/* ==================================================================== */731/*732* Constant-time primitives. These functions manipulate 32-bit values in733* order to provide constant-time comparisons and multiplexers.734*735* Boolean values (the "ctl" bits) MUST have value 0 or 1.736*737* Implementation notes:738* =====================739*740* The uintN_t types are unsigned and with width exactly N bits; the C741* standard guarantees that computations are performed modulo 2^N, and742* there can be no overflow. Negation (unary '-') works on unsigned types743* as well.744*745* The intN_t types are guaranteed to have width exactly N bits, with no746* padding bit, and using two's complement representation. Casting747* intN_t to uintN_t really is conversion modulo 2^N. Beware that intN_t748* types, being signed, trigger implementation-defined behaviour on749* overflow (including raising some signal): with GCC, while modular750* arithmetics are usually applied, the optimizer may assume that751* overflows don't occur (unless the -fwrapv command-line option is752* added); Clang has the additional -ftrapv option to explicitly trap on753* integer overflow or underflow.754*/755756/*757* Negate a boolean.758*/759static inline uint32_t760NOT(uint32_t ctl)761{762return ctl ^ 1;763}764765/*766* Multiplexer: returns x if ctl == 1, y if ctl == 0.767*/768static inline uint32_t769MUX(uint32_t ctl, uint32_t x, uint32_t y)770{771return y ^ (-ctl & (x ^ y));772}773774/*775* Equality check: returns 1 if x == y, 0 otherwise.776*/777static inline uint32_t778EQ(uint32_t x, uint32_t y)779{780uint32_t q;781782q = x ^ y;783return NOT((q | -q) >> 31);784}785786/*787* Inequality check: returns 1 if x != y, 0 otherwise.788*/789static inline uint32_t790NEQ(uint32_t x, uint32_t y)791{792uint32_t q;793794q = x ^ y;795return (q | -q) >> 31;796}797798/*799* Comparison: returns 1 if x > y, 0 otherwise.800*/801static inline uint32_t802GT(uint32_t x, uint32_t y)803{804/*805* If both x < 2^31 and x < 2^31, then y-x will have its high806* bit set if x > y, cleared otherwise.807*808* If either x >= 2^31 or y >= 2^31 (but not both), then the809* result is the high bit of x.810*811* If both x >= 2^31 and y >= 2^31, then we can virtually812* subtract 2^31 from both, and we are back to the first case.813* Since (y-2^31)-(x-2^31) = y-x, the subtraction is already814* fine.815*/816uint32_t z;817818z = y - x;819return (z ^ ((x ^ y) & (x ^ z))) >> 31;820}821822/*823* Other comparisons (greater-or-equal, lower-than, lower-or-equal).824*/825#define GE(x, y) NOT(GT(y, x))826#define LT(x, y) GT(y, x)827#define LE(x, y) NOT(GT(x, y))828829/*830* General comparison: returned value is -1, 0 or 1, depending on831* whether x is lower than, equal to, or greater than y.832*/833static inline int32_t834CMP(uint32_t x, uint32_t y)835{836return (int32_t)GT(x, y) | -(int32_t)GT(y, x);837}838839/*840* Returns 1 if x == 0, 0 otherwise. Take care that the operand is signed.841*/842static inline uint32_t843EQ0(int32_t x)844{845uint32_t q;846847q = (uint32_t)x;848return ~(q | -q) >> 31;849}850851/*852* Returns 1 if x > 0, 0 otherwise. Take care that the operand is signed.853*/854static inline uint32_t855GT0(int32_t x)856{857/*858* High bit of -x is 0 if x == 0, but 1 if x > 0.859*/860uint32_t q;861862q = (uint32_t)x;863return (~q & -q) >> 31;864}865866/*867* Returns 1 if x >= 0, 0 otherwise. Take care that the operand is signed.868*/869static inline uint32_t870GE0(int32_t x)871{872return ~(uint32_t)x >> 31;873}874875/*876* Returns 1 if x < 0, 0 otherwise. Take care that the operand is signed.877*/878static inline uint32_t879LT0(int32_t x)880{881return (uint32_t)x >> 31;882}883884/*885* Returns 1 if x <= 0, 0 otherwise. Take care that the operand is signed.886*/887static inline uint32_t888LE0(int32_t x)889{890uint32_t q;891892/*893* ~-x has its high bit set if and only if -x is nonnegative (as894* a signed int), i.e. x is in the -(2^31-1) to 0 range. We must895* do an OR with x itself to account for x = -2^31.896*/897q = (uint32_t)x;898return (q | ~-q) >> 31;899}900901/*902* Conditional copy: src[] is copied into dst[] if and only if ctl is 1.903* dst[] and src[] may overlap completely (but not partially).904*/905void br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len);906907#define CCOPY br_ccopy908909/*910* Compute the bit length of a 32-bit integer. Returned value is between 0911* and 32 (inclusive).912*/913static inline uint32_t914BIT_LENGTH(uint32_t x)915{916uint32_t k, c;917918k = NEQ(x, 0);919c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;920c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3;921c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2;922c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1;923k += GT(x, 0x0001);924return k;925}926927/*928* Compute the minimum of x and y.929*/930static inline uint32_t931MIN(uint32_t x, uint32_t y)932{933return MUX(GT(x, y), y, x);934}935936/*937* Compute the maximum of x and y.938*/939static inline uint32_t940MAX(uint32_t x, uint32_t y)941{942return MUX(GT(x, y), x, y);943}944945/*946* Multiply two 32-bit integers, with a 64-bit result. This default947* implementation assumes that the basic multiplication operator948* yields constant-time code.949*/950#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y))951952#if BR_CT_MUL31953954/*955* Alternate implementation of MUL31, that will be constant-time on some956* (old) platforms where the default MUL31 is not. Unfortunately, it is957* also substantially slower, and yields larger code, on more modern958* platforms, which is why it is deactivated by default.959*960* MUL31_lo() must do some extra work because on some platforms, the961* _signed_ multiplication may return early if the top bits are 1.962* Simply truncating (casting) the output of MUL31() would not be963* sufficient, because the compiler may notice that we keep only the low964* word, and then replace automatically the unsigned multiplication with965* a signed multiplication opcode.966*/967#define MUL31(x, y) ((uint64_t)((x) | (uint32_t)0x80000000) \968* (uint64_t)((y) | (uint32_t)0x80000000) \969- ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \970- ((uint64_t)1 << 62))971static inline uint32_t972MUL31_lo(uint32_t x, uint32_t y)973{974uint32_t xl, xh;975uint32_t yl, yh;976977xl = (x & 0xFFFF) | (uint32_t)0x80000000;978xh = (x >> 16) | (uint32_t)0x80000000;979yl = (y & 0xFFFF) | (uint32_t)0x80000000;980yh = (y >> 16) | (uint32_t)0x80000000;981return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF;982}983984#else985986/*987* Multiply two 31-bit integers, with a 62-bit result. This default988* implementation assumes that the basic multiplication operator989* yields constant-time code.990* The MUL31_lo() macro returns only the low 31 bits of the product.991*/992#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y))993#define MUL31_lo(x, y) (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF)994995#endif996997/*998* Multiply two words together; the sum of the lengths of the two999* operands must not exceed 31 (for instance, one operand may use 161000* bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the1001* macro will contain some extra operations that help in making the1002* operation constant-time on some platforms, where the basic 32-bit1003* multiplication is not constant-time.1004*/1005#if BR_CT_MUL151006#define MUL15(x, y) (((uint32_t)(x) | (uint32_t)0x80000000) \1007* ((uint32_t)(y) | (uint32_t)0x80000000) \1008& (uint32_t)0x7FFFFFFF)1009#else1010#define MUL15(x, y) ((uint32_t)(x) * (uint32_t)(y))1011#endif10121013/*1014* Arithmetic right shift (sign bit is copied). What happens when1015* right-shifting a negative value is _implementation-defined_, so it1016* does not trigger undefined behaviour, but it is still up to each1017* compiler to define (and document) what it does. Most/all compilers1018* will do an arithmetic shift, the sign bit being used to fill the1019* holes; this is a native operation on the underlying CPU, and it would1020* make little sense for the compiler to do otherwise. GCC explicitly1021* documents that it follows that convention.1022*1023* Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an1024* alternate version will be used, that does not rely on such1025* implementation-defined behaviour. Unfortunately, it is also slower1026* and yields bigger code, which is why it is deactivated by default.1027*/1028#if BR_NO_ARITH_SHIFT1029#define ARSH(x, n) (((uint32_t)(x) >> (n)) \1030| ((-((uint32_t)(x) >> 31)) << (32 - (n))))1031#else1032#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n))1033#endif10341035/*1036* Constant-time division. The dividend hi:lo is divided by the1037* divisor d; the quotient is returned and the remainder is written1038* in *r. If hi == d, then the quotient does not fit on 32 bits;1039* returned value is thus truncated. If hi > d, returned values are1040* indeterminate.1041*/1042uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r);10431044/*1045* Wrapper for br_divrem(); the remainder is returned, and the quotient1046* is discarded.1047*/1048static inline uint32_t1049br_rem(uint32_t hi, uint32_t lo, uint32_t d)1050{1051uint32_t r;10521053br_divrem(hi, lo, d, &r);1054return r;1055}10561057/*1058* Wrapper for br_divrem(); the quotient is returned, and the remainder1059* is discarded.1060*/1061static inline uint32_t1062br_div(uint32_t hi, uint32_t lo, uint32_t d)1063{1064uint32_t r;10651066return br_divrem(hi, lo, d, &r);1067}10681069/* ==================================================================== */10701071/*1072* Integers 'i32'1073* --------------1074*1075* The 'i32' functions implement computations on big integers using1076* an internal representation as an array of 32-bit integers. For1077* an array x[]:1078* -- x[0] contains the "announced bit length" of the integer1079* -- x[1], x[2]... contain the value in little-endian order (x[1]1080* contains the least significant 32 bits)1081*1082* Multiplications rely on the elementary 32x32->64 multiplication.1083*1084* The announced bit length specifies the number of bits that are1085* significant in the subsequent 32-bit words. Unused bits in the1086* last (most significant) word are set to 0; subsequent words are1087* uninitialized and need not exist at all.1088*1089* The execution time and memory access patterns of all computations1090* depend on the announced bit length, but not on the actual word1091* values. For modular integers, the announced bit length of any integer1092* modulo n is equal to the actual bit length of n; thus, computations1093* on modular integers are "constant-time" (only the modulus length may1094* leak).1095*/10961097/*1098* Compute the actual bit length of an integer. The argument x should1099* point to the first (least significant) value word of the integer.1100* The len 'xlen' contains the number of 32-bit words to access.1101*1102* CT: value or length of x does not leak.1103*/1104uint32_t br_i32_bit_length(uint32_t *x, size_t xlen);11051106/*1107* Decode an integer from its big-endian unsigned representation. The1108* "true" bit length of the integer is computed, but all words of x[]1109* corresponding to the full 'len' bytes of the source are set.1110*1111* CT: value or length of x does not leak.1112*/1113void br_i32_decode(uint32_t *x, const void *src, size_t len);11141115/*1116* Decode an integer from its big-endian unsigned representation. The1117* integer MUST be lower than m[]; the announced bit length written in1118* x[] will be equal to that of m[]. All 'len' bytes from the source are1119* read.1120*1121* Returned value is 1 if the decode value fits within the modulus, 01122* otherwise. In the latter case, the x[] buffer will be set to 0 (but1123* still with the announced bit length of m[]).1124*1125* CT: value or length of x does not leak. Memory access pattern depends1126* only of 'len' and the announced bit length of m. Whether x fits or1127* not does not leak either.1128*/1129uint32_t br_i32_decode_mod(uint32_t *x,1130const void *src, size_t len, const uint32_t *m);11311132/*1133* Reduce an integer (a[]) modulo another (m[]). The result is written1134* in x[] and its announced bit length is set to be equal to that of m[].1135*1136* x[] MUST be distinct from a[] and m[].1137*1138* CT: only announced bit lengths leak, not values of x, a or m.1139*/1140void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);11411142/*1143* Decode an integer from its big-endian unsigned representation, and1144* reduce it modulo the provided modulus m[]. The announced bit length1145* of the result is set to be equal to that of the modulus.1146*1147* x[] MUST be distinct from m[].1148*/1149void br_i32_decode_reduce(uint32_t *x,1150const void *src, size_t len, const uint32_t *m);11511152/*1153* Encode an integer into its big-endian unsigned representation. The1154* output length in bytes is provided (parameter 'len'); if the length1155* is too short then the integer is appropriately truncated; if it is1156* too long then the extra bytes are set to 0.1157*/1158void br_i32_encode(void *dst, size_t len, const uint32_t *x);11591160/*1161* Multiply x[] by 2^32 and then add integer z, modulo m[]. This1162* function assumes that x[] and m[] have the same announced bit1163* length, and the announced bit length of m[] matches its true1164* bit length.1165*1166* x[] and m[] MUST be distinct arrays.1167*1168* CT: only the common announced bit length of x and m leaks, not1169* the values of x, z or m.1170*/1171void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);11721173/*1174* Extract one word from an integer. The offset is counted in bits.1175* The word MUST entirely fit within the word elements corresponding1176* to the announced bit length of a[].1177*/1178static inline uint32_t1179br_i32_word(const uint32_t *a, uint32_t off)1180{1181size_t u;1182unsigned j;11831184u = (size_t)(off >> 5) + 1;1185j = (unsigned)off & 31;1186if (j == 0) {1187return a[u];1188} else {1189return (a[u] >> j) | (a[u + 1] << (32 - j));1190}1191}11921193/*1194* Test whether an integer is zero.1195*/1196uint32_t br_i32_iszero(const uint32_t *x);11971198/*1199* Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]1200* is unmodified, but the carry is still computed and returned. The1201* arrays a[] and b[] MUST have the same announced bit length.1202*1203* a[] and b[] MAY be the same array, but partial overlap is not allowed.1204*/1205uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl);12061207/*1208* Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,1209* then a[] is unmodified, but the carry is still computed and returned.1210* The arrays a[] and b[] MUST have the same announced bit length.1211*1212* a[] and b[] MAY be the same array, but partial overlap is not allowed.1213*/1214uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);12151216/*1217* Compute d+a*b, result in d. The initial announced bit length of d[]1218* MUST match that of a[]. The d[] array MUST be large enough to1219* accommodate the full result, plus (possibly) an extra word. The1220* resulting announced bit length of d[] will be the sum of the announced1221* bit lengths of a[] and b[] (therefore, it may be larger than the actual1222* bit length of the numerical result).1223*1224* a[] and b[] may be the same array. d[] must be disjoint from both a[]1225* and b[].1226*/1227void br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);12281229/*1230* Zeroize an integer. The announced bit length is set to the provided1231* value, and the corresponding words are set to 0.1232*/1233static inline void1234br_i32_zero(uint32_t *x, uint32_t bit_len)1235{1236*x ++ = bit_len;1237memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);1238}12391240/*1241* Compute -(1/x) mod 2^32. If x is even, then this function returns 0.1242*/1243uint32_t br_i32_ninv32(uint32_t x);12441245/*1246* Convert a modular integer to Montgomery representation. The integer x[]1247* MUST be lower than m[], but with the same announced bit length.1248*/1249void br_i32_to_monty(uint32_t *x, const uint32_t *m);12501251/*1252* Convert a modular integer back from Montgomery representation. The1253* integer x[] MUST be lower than m[], but with the same announced bit1254* length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is1255* the least significant value word of m[] (this works only if m[] is1256* an odd integer).1257*/1258void br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);12591260/*1261* Compute a modular Montgomery multiplication. d[] is filled with the1262* value of x*y/R modulo m[] (where R is the Montgomery factor). The1263* array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be1264* numerically lower than m[]. x[] and y[] MAY be the same array. The1265* "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least1266* significant value word of m[] (this works only if m[] is an odd1267* integer).1268*/1269void br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,1270const uint32_t *m, uint32_t m0i);12711272/*1273* Compute a modular exponentiation. x[] MUST be an integer modulo m[]1274* (same announced bit length, lower value). m[] MUST be odd. The1275* exponent is in big-endian unsigned notation, over 'elen' bytes. The1276* "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least1277* significant value word of m[] (this works only if m[] is an odd1278* integer). The t1[] and t2[] parameters must be temporary arrays,1279* each large enough to accommodate an integer with the same size as m[].1280*/1281void br_i32_modpow(uint32_t *x, const unsigned char *e, size_t elen,1282const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);12831284/* ==================================================================== */12851286/*1287* Integers 'i31'1288* --------------1289*1290* The 'i31' functions implement computations on big integers using1291* an internal representation as an array of 32-bit integers. For1292* an array x[]:1293* -- x[0] encodes the array length and the "announced bit length"1294* of the integer: namely, if the announced bit length is k,1295* then x[0] = ((k / 31) << 5) + (k % 31).1296* -- x[1], x[2]... contain the value in little-endian order, 311297* bits per word (x[1] contains the least significant 31 bits).1298* The upper bit of each word is 0.1299*1300* Multiplications rely on the elementary 32x32->64 multiplication.1301*1302* The announced bit length specifies the number of bits that are1303* significant in the subsequent 32-bit words. Unused bits in the1304* last (most significant) word are set to 0; subsequent words are1305* uninitialized and need not exist at all.1306*1307* The execution time and memory access patterns of all computations1308* depend on the announced bit length, but not on the actual word1309* values. For modular integers, the announced bit length of any integer1310* modulo n is equal to the actual bit length of n; thus, computations1311* on modular integers are "constant-time" (only the modulus length may1312* leak).1313*/13141315/*1316* Test whether an integer is zero.1317*/1318uint32_t br_i31_iszero(const uint32_t *x);13191320/*1321* Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]1322* is unmodified, but the carry is still computed and returned. The1323* arrays a[] and b[] MUST have the same announced bit length.1324*1325* a[] and b[] MAY be the same array, but partial overlap is not allowed.1326*/1327uint32_t br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl);13281329/*1330* Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,1331* then a[] is unmodified, but the carry is still computed and returned.1332* The arrays a[] and b[] MUST have the same announced bit length.1333*1334* a[] and b[] MAY be the same array, but partial overlap is not allowed.1335*/1336uint32_t br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);13371338/*1339* Compute the ENCODED actual bit length of an integer. The argument x1340* should point to the first (least significant) value word of the1341* integer. The len 'xlen' contains the number of 32-bit words to1342* access. The upper bit of each value word MUST be 0.1343* Returned value is ((k / 31) << 5) + (k % 31) if the bit length is k.1344*1345* CT: value or length of x does not leak.1346*/1347uint32_t br_i31_bit_length(uint32_t *x, size_t xlen);13481349/*1350* Decode an integer from its big-endian unsigned representation. The1351* "true" bit length of the integer is computed and set in the encoded1352* announced bit length (x[0]), but all words of x[] corresponding to1353* the full 'len' bytes of the source are set.1354*1355* CT: value or length of x does not leak.1356*/1357void br_i31_decode(uint32_t *x, const void *src, size_t len);13581359/*1360* Decode an integer from its big-endian unsigned representation. The1361* integer MUST be lower than m[]; the (encoded) announced bit length1362* written in x[] will be equal to that of m[]. All 'len' bytes from the1363* source are read.1364*1365* Returned value is 1 if the decode value fits within the modulus, 01366* otherwise. In the latter case, the x[] buffer will be set to 0 (but1367* still with the announced bit length of m[]).1368*1369* CT: value or length of x does not leak. Memory access pattern depends1370* only of 'len' and the announced bit length of m. Whether x fits or1371* not does not leak either.1372*/1373uint32_t br_i31_decode_mod(uint32_t *x,1374const void *src, size_t len, const uint32_t *m);13751376/*1377* Zeroize an integer. The announced bit length is set to the provided1378* value, and the corresponding words are set to 0. The ENCODED bit length1379* is expected here.1380*/1381static inline void1382br_i31_zero(uint32_t *x, uint32_t bit_len)1383{1384*x ++ = bit_len;1385memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);1386}13871388/*1389* Right-shift an integer. The shift amount must be lower than 311390* bits.1391*/1392void br_i31_rshift(uint32_t *x, int count);13931394/*1395* Reduce an integer (a[]) modulo another (m[]). The result is written1396* in x[] and its announced bit length is set to be equal to that of m[].1397*1398* x[] MUST be distinct from a[] and m[].1399*1400* CT: only announced bit lengths leak, not values of x, a or m.1401*/1402void br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);14031404/*1405* Decode an integer from its big-endian unsigned representation, and1406* reduce it modulo the provided modulus m[]. The announced bit length1407* of the result is set to be equal to that of the modulus.1408*1409* x[] MUST be distinct from m[].1410*/1411void br_i31_decode_reduce(uint32_t *x,1412const void *src, size_t len, const uint32_t *m);14131414/*1415* Multiply x[] by 2^31 and then add integer z, modulo m[]. This1416* function assumes that x[] and m[] have the same announced bit1417* length, the announced bit length of m[] matches its true1418* bit length.1419*1420* x[] and m[] MUST be distinct arrays. z MUST fit in 31 bits (upper1421* bit set to 0).1422*1423* CT: only the common announced bit length of x and m leaks, not1424* the values of x, z or m.1425*/1426void br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);14271428/*1429* Encode an integer into its big-endian unsigned representation. The1430* output length in bytes is provided (parameter 'len'); if the length1431* is too short then the integer is appropriately truncated; if it is1432* too long then the extra bytes are set to 0.1433*/1434void br_i31_encode(void *dst, size_t len, const uint32_t *x);14351436/*1437* Compute -(1/x) mod 2^31. If x is even, then this function returns 0.1438*/1439uint32_t br_i31_ninv31(uint32_t x);14401441/*1442* Compute a modular Montgomery multiplication. d[] is filled with the1443* value of x*y/R modulo m[] (where R is the Montgomery factor). The1444* array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be1445* numerically lower than m[]. x[] and y[] MAY be the same array. The1446* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least1447* significant value word of m[] (this works only if m[] is an odd1448* integer).1449*/1450void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,1451const uint32_t *m, uint32_t m0i);14521453/*1454* Convert a modular integer to Montgomery representation. The integer x[]1455* MUST be lower than m[], but with the same announced bit length.1456*/1457void br_i31_to_monty(uint32_t *x, const uint32_t *m);14581459/*1460* Convert a modular integer back from Montgomery representation. The1461* integer x[] MUST be lower than m[], but with the same announced bit1462* length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is1463* the least significant value word of m[] (this works only if m[] is1464* an odd integer).1465*/1466void br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);14671468/*1469* Compute a modular exponentiation. x[] MUST be an integer modulo m[]1470* (same announced bit length, lower value). m[] MUST be odd. The1471* exponent is in big-endian unsigned notation, over 'elen' bytes. The1472* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least1473* significant value word of m[] (this works only if m[] is an odd1474* integer). The t1[] and t2[] parameters must be temporary arrays,1475* each large enough to accommodate an integer with the same size as m[].1476*/1477void br_i31_modpow(uint32_t *x, const unsigned char *e, size_t elen,1478const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);14791480/*1481* Compute a modular exponentiation. x[] MUST be an integer modulo m[]1482* (same announced bit length, lower value). m[] MUST be odd. The1483* exponent is in big-endian unsigned notation, over 'elen' bytes. The1484* "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least1485* significant value word of m[] (this works only if m[] is an odd1486* integer). The tmp[] array is used for temporaries, and has size1487* 'twlen' words; it must be large enough to accommodate at least two1488* temporary values with the same size as m[] (including the leading1489* "bit length" word). If there is room for more temporaries, then this1490* function may use the extra room for window-based optimisation,1491* resulting in faster computations.1492*1493* Returned value is 1 on success, 0 on error. An error is reported if1494* the provided tmp[] array is too short.1495*/1496uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen,1497const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);14981499/*1500* Compute d+a*b, result in d. The initial announced bit length of d[]1501* MUST match that of a[]. The d[] array MUST be large enough to1502* accommodate the full result, plus (possibly) an extra word. The1503* resulting announced bit length of d[] will be the sum of the announced1504* bit lengths of a[] and b[] (therefore, it may be larger than the actual1505* bit length of the numerical result).1506*1507* a[] and b[] may be the same array. d[] must be disjoint from both a[]1508* and b[].1509*/1510void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);15111512/*1513* Compute x/y mod m, result in x. Values x and y must be between 0 and1514* m-1, and have the same announced bit length as m. Modulus m must be1515* odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't'1516* must point to a temporary area that can hold at least three integers1517* of the size of m.1518*1519* m may not overlap x and y. x and y may overlap each other (this can1520* be useful to test whether a value is invertible modulo m). t must be1521* disjoint from all other arrays.1522*1523* Returned value is 1 on success, 0 otherwise. Success is attained if1524* y is invertible modulo m.1525*/1526uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y,1527const uint32_t *m, uint32_t m0i, uint32_t *t);15281529/* ==================================================================== */15301531/*1532* FIXME: document "i15" functions.1533*/15341535static inline void1536br_i15_zero(uint16_t *x, uint16_t bit_len)1537{1538*x ++ = bit_len;1539memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x);1540}15411542uint32_t br_i15_iszero(const uint16_t *x);15431544uint16_t br_i15_ninv15(uint16_t x);15451546uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl);15471548uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl);15491550void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m);15511552void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,1553const uint16_t *m, uint16_t m0i);15541555void br_i15_to_monty(uint16_t *x, const uint16_t *m);15561557void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen,1558const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2);15591560uint32_t br_i15_modpow_opt(uint16_t *x, const unsigned char *e, size_t elen,1561const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen);15621563void br_i15_encode(void *dst, size_t len, const uint16_t *x);15641565uint32_t br_i15_decode_mod(uint16_t *x,1566const void *src, size_t len, const uint16_t *m);15671568void br_i15_rshift(uint16_t *x, int count);15691570uint32_t br_i15_bit_length(uint16_t *x, size_t xlen);15711572void br_i15_decode(uint16_t *x, const void *src, size_t len);15731574void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i);15751576void br_i15_decode_reduce(uint16_t *x,1577const void *src, size_t len, const uint16_t *m);15781579void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m);15801581void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b);15821583uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y,1584const uint16_t *m, uint16_t m0i, uint16_t *t);15851586/*1587* Variant of br_i31_modpow_opt() that internally uses 64x64->1281588* multiplications. It expects the same parameters as br_i31_modpow_opt(),1589* except that the temporaries should be 64-bit integers, not 32-bit1590* integers.1591*/1592uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,1593const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen);15941595/*1596* Type for a function with the same API as br_i31_modpow_opt() (some1597* implementations of this type may have stricter alignment requirements1598* on the temporaries).1599*/1600typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x,1601const unsigned char *e, size_t elen,1602const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);16031604/*1605* Wrapper for br_i62_modpow_opt() that uses the same type as1606* br_i31_modpow_opt(); however, it requires its 'tmp' argument to the1607* 64-bit aligned.1608*/1609uint32_t br_i62_modpow_opt_as_i31(uint32_t *x,1610const unsigned char *e, size_t elen,1611const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);16121613/* ==================================================================== */16141615static inline size_t1616br_digest_size(const br_hash_class *digest_class)1617{1618return (size_t)(digest_class->desc >> BR_HASHDESC_OUT_OFF)1619& BR_HASHDESC_OUT_MASK;1620}16211622/*1623* Get the output size (in bytes) of a hash function.1624*/1625size_t br_digest_size_by_ID(int digest_id);16261627/*1628* Get the OID (encoded OBJECT IDENTIFIER value, without tag and length)1629* for a hash function. If digest_id is not a supported digest identifier1630* (in particular if it is equal to 0, i.e. br_md5sha1_ID), then NULL is1631* returned and *len is set to 0.1632*/1633const unsigned char *br_digest_OID(int digest_id, size_t *len);16341635/* ==================================================================== */1636/*1637* DES support functions.1638*/16391640/*1641* Apply DES Initial Permutation.1642*/1643void br_des_do_IP(uint32_t *xl, uint32_t *xr);16441645/*1646* Apply DES Final Permutation (inverse of IP).1647*/1648void br_des_do_invIP(uint32_t *xl, uint32_t *xr);16491650/*1651* Key schedule unit: for a DES key (8 bytes), compute 16 subkeys. Each1652* subkey is two 28-bit words represented as two 32-bit words; the PC-21653* bit extration is NOT applied.1654*/1655void br_des_keysched_unit(uint32_t *skey, const void *key);16561657/*1658* Reversal of 16 DES sub-keys (for decryption).1659*/1660void br_des_rev_skey(uint32_t *skey);16611662/*1663* DES/3DES key schedule for 'des_tab' (encryption direction). Returned1664* value is the number of rounds.1665*/1666unsigned br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len);16671668/*1669* DES/3DES key schedule for 'des_ct' (encryption direction). Returned1670* value is the number of rounds.1671*/1672unsigned br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len);16731674/*1675* DES/3DES subkey decompression (from the compressed bitsliced subkeys).1676*/1677void br_des_ct_skey_expand(uint32_t *sk_exp,1678unsigned num_rounds, const uint32_t *skey);16791680/*1681* DES/3DES block encryption/decryption ('des_tab').1682*/1683void br_des_tab_process_block(unsigned num_rounds,1684const uint32_t *skey, void *block);16851686/*1687* DES/3DES block encryption/decryption ('des_ct').1688*/1689void br_des_ct_process_block(unsigned num_rounds,1690const uint32_t *skey, void *block);16911692/* ==================================================================== */1693/*1694* AES support functions.1695*/16961697/*1698* The AES S-box (256-byte table).1699*/1700extern const unsigned char br_aes_S[];17011702/*1703* AES key schedule. skey[] is filled with n+1 128-bit subkeys, where n1704* is the number of rounds (10 to 14, depending on key size). The number1705* of rounds is returned. If the key size is invalid (not 16, 24 or 32),1706* then 0 is returned.1707*1708* This implementation uses a 256-byte table and is NOT constant-time.1709*/1710unsigned br_aes_keysched(uint32_t *skey, const void *key, size_t key_len);17111712/*1713* AES key schedule for decryption ('aes_big' implementation).1714*/1715unsigned br_aes_big_keysched_inv(uint32_t *skey,1716const void *key, size_t key_len);17171718/*1719* AES block encryption with the 'aes_big' implementation (fast, but1720* not constant-time). This function encrypts a single block "in place".1721*/1722void br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data);17231724/*1725* AES block decryption with the 'aes_big' implementation (fast, but1726* not constant-time). This function decrypts a single block "in place".1727*/1728void br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data);17291730/*1731* AES block encryption with the 'aes_small' implementation (small, but1732* slow and not constant-time). This function encrypts a single block1733* "in place".1734*/1735void br_aes_small_encrypt(unsigned num_rounds,1736const uint32_t *skey, void *data);17371738/*1739* AES block decryption with the 'aes_small' implementation (small, but1740* slow and not constant-time). This function decrypts a single block1741* "in place".1742*/1743void br_aes_small_decrypt(unsigned num_rounds,1744const uint32_t *skey, void *data);17451746/*1747* The constant-time implementation is "bitsliced": the 128-bit state is1748* split over eight 32-bit words q* in the following way:1749*1750* -- Input block consists in 16 bytes:1751* a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a331752* In the terminology of FIPS 197, this is a 4x4 matrix which is read1753* column by column.1754*1755* -- Each byte is split into eight bits which are distributed over the1756* eight words, at the same rank. Thus, for a byte x at rank k, bit 01757* (least significant) of x will be at rank k in q0 (if that bit is b,1758* then it contributes "b << k" to the value of q0), bit 1 of x will be1759* at rank k in q1, and so on.1760*1761* -- Ranks given to bits are in "row order" and are either all even, or1762* all odd. Two independent AES states are thus interleaved, one using1763* the even ranks, the other the odd ranks. Row order means:1764* a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a331765*1766* Converting input bytes from two AES blocks to bitslice representation1767* is done in the following way:1768* -- Decode first block into the four words q0 q2 q4 q6, in that order,1769* using little-endian convention.1770* -- Decode second block into the four words q1 q3 q5 q7, in that order,1771* using little-endian convention.1772* -- Call br_aes_ct_ortho().1773*1774* Converting back to bytes is done by using the reverse operations. Note1775* that br_aes_ct_ortho() is its own inverse.1776*/17771778/*1779* Perform bytewise orthogonalization of eight 32-bit words. Bytes1780* of q0..q7 are spread over all words: for a byte x that occurs1781* at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit1782* of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.1783*1784* This operation is an involution.1785*/1786void br_aes_ct_ortho(uint32_t *q);17871788/*1789* The AES S-box, as a bitsliced constant-time version. The input array1790* consists in eight 32-bit words; 32 S-box instances are computed in1791* parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)1792* are spread over the words 0 to 7, at the same rank.1793*/1794void br_aes_ct_bitslice_Sbox(uint32_t *q);17951796/*1797* Like br_aes_bitslice_Sbox(), but for the inverse S-box.1798*/1799void br_aes_ct_bitslice_invSbox(uint32_t *q);18001801/*1802* Compute AES encryption on bitsliced data. Since input is stored on1803* eight 32-bit words, two block encryptions are actually performed1804* in parallel.1805*/1806void br_aes_ct_bitslice_encrypt(unsigned num_rounds,1807const uint32_t *skey, uint32_t *q);18081809/*1810* Compute AES decryption on bitsliced data. Since input is stored on1811* eight 32-bit words, two block decryptions are actually performed1812* in parallel.1813*/1814void br_aes_ct_bitslice_decrypt(unsigned num_rounds,1815const uint32_t *skey, uint32_t *q);18161817/*1818* AES key schedule, constant-time version. skey[] is filled with n+11819* 128-bit subkeys, where n is the number of rounds (10 to 14, depending1820* on key size). The number of rounds is returned. If the key size is1821* invalid (not 16, 24 or 32), then 0 is returned.1822*/1823unsigned br_aes_ct_keysched(uint32_t *comp_skey,1824const void *key, size_t key_len);18251826/*1827* Expand AES subkeys as produced by br_aes_ct_keysched(), into1828* a larger array suitable for br_aes_ct_bitslice_encrypt() and1829* br_aes_ct_bitslice_decrypt().1830*/1831void br_aes_ct_skey_expand(uint32_t *skey,1832unsigned num_rounds, const uint32_t *comp_skey);18331834/*1835* For the ct64 implementation, the same bitslicing technique is used,1836* but four instances are interleaved. First instance uses bits 0, 4,1837* 8, 12,... of each word; second instance uses bits 1, 5, 9, 13,...1838* and so on.1839*/18401841/*1842* Perform bytewise orthogonalization of eight 64-bit words. Bytes1843* of q0..q7 are spread over all words: for a byte x that occurs1844* at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit1845* of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.1846*1847* This operation is an involution.1848*/1849void br_aes_ct64_ortho(uint64_t *q);18501851/*1852* Interleave bytes for an AES input block. If input bytes are1853* denoted 0123456789ABCDEF, and have been decoded with little-endian1854* convention (w[0] contains 0123, with '3' being most significant;1855* w[1] contains 4567, and so on), then output word q0 will be1856* set to 08192A3B (again little-endian convention) and q1 will1857* be set to 4C5D6E7F.1858*/1859void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w);18601861/*1862* Perform the opposite of br_aes_ct64_interleave_in().1863*/1864void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1);18651866/*1867* The AES S-box, as a bitsliced constant-time version. The input array1868* consists in eight 64-bit words; 64 S-box instances are computed in1869* parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)1870* are spread over the words 0 to 7, at the same rank.1871*/1872void br_aes_ct64_bitslice_Sbox(uint64_t *q);18731874/*1875* Like br_aes_bitslice_Sbox(), but for the inverse S-box.1876*/1877void br_aes_ct64_bitslice_invSbox(uint64_t *q);18781879/*1880* Compute AES encryption on bitsliced data. Since input is stored on1881* eight 64-bit words, four block encryptions are actually performed1882* in parallel.1883*/1884void br_aes_ct64_bitslice_encrypt(unsigned num_rounds,1885const uint64_t *skey, uint64_t *q);18861887/*1888* Compute AES decryption on bitsliced data. Since input is stored on1889* eight 64-bit words, four block decryptions are actually performed1890* in parallel.1891*/1892void br_aes_ct64_bitslice_decrypt(unsigned num_rounds,1893const uint64_t *skey, uint64_t *q);18941895/*1896* AES key schedule, constant-time version. skey[] is filled with n+11897* 128-bit subkeys, where n is the number of rounds (10 to 14, depending1898* on key size). The number of rounds is returned. If the key size is1899* invalid (not 16, 24 or 32), then 0 is returned.1900*/1901unsigned br_aes_ct64_keysched(uint64_t *comp_skey,1902const void *key, size_t key_len);19031904/*1905* Expand AES subkeys as produced by br_aes_ct64_keysched(), into1906* a larger array suitable for br_aes_ct64_bitslice_encrypt() and1907* br_aes_ct64_bitslice_decrypt().1908*/1909void br_aes_ct64_skey_expand(uint64_t *skey,1910unsigned num_rounds, const uint64_t *comp_skey);19111912/*1913* Test support for AES-NI opcodes.1914*/1915int br_aes_x86ni_supported(void);19161917/*1918* AES key schedule, using x86 AES-NI instructions. This yields the1919* subkeys in the encryption direction. Number of rounds is returned.1920* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.1921*/1922unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,1923const void *key, size_t len);19241925/*1926* AES key schedule, using x86 AES-NI instructions. This yields the1927* subkeys in the decryption direction. Number of rounds is returned.1928* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.1929*/1930unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,1931const void *key, size_t len);19321933/*1934* Test support for AES POWER8 opcodes.1935*/1936int br_aes_pwr8_supported(void);19371938/*1939* AES key schedule, using POWER8 instructions. This yields the1940* subkeys in the encryption direction. Number of rounds is returned.1941* Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.1942*/1943unsigned br_aes_pwr8_keysched(unsigned char *skni,1944const void *key, size_t len);19451946/* ==================================================================== */1947/*1948* RSA.1949*/19501951/*1952* Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is1953* the encoded hash function OID, or NULL.1954*/1955uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,1956const unsigned char *hash, size_t hash_len,1957uint32_t n_bitlen, unsigned char *x);19581959/*1960* Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded1961* hash function OID, or NULL. The provided 'sig' value is _after_ the1962* modular exponentiation, i.e. it should be the padded hash. On1963* success, the hashed message is extracted.1964*/1965uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,1966const unsigned char *hash_oid, size_t hash_len,1967unsigned char *hash_out);19681969/*1970* Apply proper PSS padding. The 'x' buffer is output only: it1971* receives the value that is to be exponentiated.1972*/1973uint32_t br_rsa_pss_sig_pad(const br_prng_class **rng,1974const br_hash_class *hf_data, const br_hash_class *hf_mgf1,1975const unsigned char *hash, size_t salt_len,1976uint32_t n_bitlen, unsigned char *x);19771978/*1979* Check PSS padding. The provided value is the one _after_1980* the modular exponentiation; it is modified by this function.1981* This function infers the signature length from the public key1982* size, i.e. it assumes that this has already been verified (as1983* part of the exponentiation).1984*/1985uint32_t br_rsa_pss_sig_unpad(1986const br_hash_class *hf_data, const br_hash_class *hf_mgf1,1987const unsigned char *hash, size_t salt_len,1988const br_rsa_public_key *pk, unsigned char *x);19891990/*1991* Apply OAEP padding. Returned value is the actual padded string length,1992* or zero on error.1993*/1994size_t br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,1995const void *label, size_t label_len, const br_rsa_public_key *pk,1996void *dst, size_t dst_nax_len, const void *src, size_t src_len);19971998/*1999* Unravel and check OAEP padding. If the padding is correct, then 1 is2000* returned, '*len' is adjusted to the length of the message, and the2001* data is moved to the start of the 'data' buffer. If the padding is2002* incorrect, then 0 is returned and '*len' is untouched. Either way,2003* the complete buffer contents are altered.2004*/2005uint32_t br_rsa_oaep_unpad(const br_hash_class *dig,2006const void *label, size_t label_len, void *data, size_t *len);20072008/*2009* Compute MGF1 for a given seed, and XOR the output into the provided2010* buffer.2011*/2012void br_mgf1_xor(void *data, size_t len,2013const br_hash_class *dig, const void *seed, size_t seed_len);20142015/*2016* Inner function for RSA key generation; used by the "i31" and "i62"2017* implementations.2018*/2019uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng,2020br_rsa_private_key *sk, void *kbuf_priv,2021br_rsa_public_key *pk, void *kbuf_pub,2022unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31);20232024/* ==================================================================== */2025/*2026* Elliptic curves.2027*/20282029/*2030* Type for generic EC parameters: curve order (unsigned big-endian2031* encoding) and encoded conventional generator.2032*/2033typedef struct {2034int curve;2035const unsigned char *order;2036size_t order_len;2037const unsigned char *generator;2038size_t generator_len;2039} br_ec_curve_def;20402041extern const br_ec_curve_def br_secp256r1;2042extern const br_ec_curve_def br_secp384r1;2043extern const br_ec_curve_def br_secp521r1;20442045/*2046* For Curve25519, the advertised "order" really is 2^255-1, since the2047* point multipliction function really works over arbitrary 255-bit2048* scalars. This value is only meant as a hint for ECDH key generation;2049* only ECDSA uses the exact curve order, and ECDSA is not used with2050* that specific curve.2051*/2052extern const br_ec_curve_def br_curve25519;20532054/*2055* Decode some bytes as an i31 integer, with truncation (corresponding2056* to the 'bits2int' operation in RFC 6979). The target ENCODED bit2057* length is provided as last parameter. The resulting value will have2058* this declared bit length, and consists the big-endian unsigned decoding2059* of exactly that many bits in the source (capped at the source length).2060*/2061void br_ecdsa_i31_bits2int(uint32_t *x,2062const void *src, size_t len, uint32_t ebitlen);20632064/*2065* Decode some bytes as an i15 integer, with truncation (corresponding2066* to the 'bits2int' operation in RFC 6979). The target ENCODED bit2067* length is provided as last parameter. The resulting value will have2068* this declared bit length, and consists the big-endian unsigned decoding2069* of exactly that many bits in the source (capped at the source length).2070*/2071void br_ecdsa_i15_bits2int(uint16_t *x,2072const void *src, size_t len, uint32_t ebitlen);20732074/* ==================================================================== */2075/*2076* ASN.1 support functions.2077*/20782079/*2080* A br_asn1_uint structure contains encoding information about an2081* INTEGER nonnegative value: pointer to the integer contents (unsigned2082* big-endian representation), length of the integer contents,2083* and length of the encoded value. The data shall have minimal length:2084* - If the integer value is zero, then 'len' must be zero.2085* - If the integer value is not zero, then data[0] must be non-zero.2086*2087* Under these conditions, 'asn1len' is necessarily equal to either len2088* or len+1.2089*/2090typedef struct {2091const unsigned char *data;2092size_t len;2093size_t asn1len;2094} br_asn1_uint;20952096/*2097* Given an encoded integer (unsigned big-endian, with possible leading2098* bytes of value 0), returned the "prepared INTEGER" structure.2099*/2100br_asn1_uint br_asn1_uint_prepare(const void *xdata, size_t xlen);21012102/*2103* Encode an ASN.1 length. The length of the encoded length is returned.2104* If 'dest' is NULL, then no encoding is performed, but the length of2105* the encoded length is still computed and returned.2106*/2107size_t br_asn1_encode_length(void *dest, size_t len);21082109/*2110* Convenient macro for computing lengths of lengths.2111*/2112#define len_of_len(len) br_asn1_encode_length(NULL, len)21132114/*2115* Encode a (prepared) ASN.1 INTEGER. The encoded length is returned.2116* If 'dest' is NULL, then no encoding is performed, but the length of2117* the encoded integer is still computed and returned.2118*/2119size_t br_asn1_encode_uint(void *dest, br_asn1_uint pp);21202121/*2122* Get the OID that identifies an elliptic curve. Returned value is2123* the DER-encoded OID, with the length (always one byte) but without2124* the tag. Thus, the first byte of the returned buffer contains the2125* number of subsequent bytes in the value. If the curve is not2126* recognised, NULL is returned.2127*/2128const unsigned char *br_get_curve_OID(int curve);21292130/*2131* Inner function for EC private key encoding. This is equivalent to2132* the API function br_encode_ec_raw_der(), except for an extra2133* parameter: if 'include_curve_oid' is zero, then the curve OID is2134* _not_ included in the output blob (this is for PKCS#8 support).2135*/2136size_t br_encode_ec_raw_der_inner(void *dest,2137const br_ec_private_key *sk, const br_ec_public_key *pk,2138int include_curve_oid);21392140/* ==================================================================== */2141/*2142* SSL/TLS support functions.2143*/21442145/*2146* Record types.2147*/2148#define BR_SSL_CHANGE_CIPHER_SPEC 202149#define BR_SSL_ALERT 212150#define BR_SSL_HANDSHAKE 222151#define BR_SSL_APPLICATION_DATA 2321522153/*2154* Handshake message types.2155*/2156#define BR_SSL_HELLO_REQUEST 02157#define BR_SSL_CLIENT_HELLO 12158#define BR_SSL_SERVER_HELLO 22159#define BR_SSL_CERTIFICATE 112160#define BR_SSL_SERVER_KEY_EXCHANGE 122161#define BR_SSL_CERTIFICATE_REQUEST 132162#define BR_SSL_SERVER_HELLO_DONE 142163#define BR_SSL_CERTIFICATE_VERIFY 152164#define BR_SSL_CLIENT_KEY_EXCHANGE 162165#define BR_SSL_FINISHED 2021662167/*2168* Alert levels.2169*/2170#define BR_LEVEL_WARNING 12171#define BR_LEVEL_FATAL 221722173/*2174* Low-level I/O state.2175*/2176#define BR_IO_FAILED 02177#define BR_IO_IN 12178#define BR_IO_OUT 22179#define BR_IO_INOUT 321802181/*2182* Mark a SSL engine as failed. The provided error code is recorded if2183* the engine was not already marked as failed. If 'err' is 0, then the2184* engine is marked as closed (without error).2185*/2186void br_ssl_engine_fail(br_ssl_engine_context *cc, int err);21872188/*2189* Test whether the engine is closed (normally or as a failure).2190*/2191static inline int2192br_ssl_engine_closed(const br_ssl_engine_context *cc)2193{2194return cc->iomode == BR_IO_FAILED;2195}21962197/*2198* Configure a new maximum fragment length. If possible, the maximum2199* length for outgoing records is immediately adjusted (if there are2200* not already too many buffered bytes for that).2201*/2202void br_ssl_engine_new_max_frag_len(2203br_ssl_engine_context *rc, unsigned max_frag_len);22042205/*2206* Test whether the current incoming record has been fully received2207* or not. This functions returns 0 only if a complete record header2208* has been received, but some of the (possibly encrypted) payload2209* has not yet been obtained.2210*/2211int br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc);22122213/*2214* Flush the current record (if not empty). This is meant to be called2215* from the handshake processor only.2216*/2217void br_ssl_engine_flush_record(br_ssl_engine_context *cc);22182219/*2220* Test whether there is some accumulated payload to send.2221*/2222static inline int2223br_ssl_engine_has_pld_to_send(const br_ssl_engine_context *rc)2224{2225return rc->oxa != rc->oxb && rc->oxa != rc->oxc;2226}22272228/*2229* Initialize RNG in engine. Returned value is 1 on success, 0 on error.2230* This function will try to use the OS-provided RNG, if available. If2231* there is no OS-provided RNG, or if it failed, and no entropy was2232* injected by the caller, then a failure will be reported. On error,2233* the context error code is set.2234*/2235int br_ssl_engine_init_rand(br_ssl_engine_context *cc);22362237/*2238* Reset the handshake-related parts of the engine.2239*/2240void br_ssl_engine_hs_reset(br_ssl_engine_context *cc,2241void (*hsinit)(void *), void (*hsrun)(void *));22422243/*2244* Get the PRF to use for this context, for the provided PRF hash2245* function ID.2246*/2247br_tls_prf_impl br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id);22482249/*2250* Consume the provided pre-master secret and compute the corresponding2251* master secret. The 'prf_id' is the ID of the hash function to use2252* with the TLS 1.2 PRF (ignored if the version is TLS 1.0 or 1.1).2253*/2254void br_ssl_engine_compute_master(br_ssl_engine_context *cc,2255int prf_id, const void *pms, size_t len);22562257/*2258* Switch to CBC decryption for incoming records.2259* cc the engine context2260* is_client non-zero for a client, zero for a server2261* prf_id id of hash function for PRF (ignored if not TLS 1.2+)2262* mac_id id of hash function for HMAC2263* bc_impl block cipher implementation (CBC decryption)2264* cipher_key_len block cipher key length (in bytes)2265*/2266void br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,2267int is_client, int prf_id, int mac_id,2268const br_block_cbcdec_class *bc_impl, size_t cipher_key_len);22692270/*2271* Switch to CBC encryption for outgoing records.2272* cc the engine context2273* is_client non-zero for a client, zero for a server2274* prf_id id of hash function for PRF (ignored if not TLS 1.2+)2275* mac_id id of hash function for HMAC2276* bc_impl block cipher implementation (CBC encryption)2277* cipher_key_len block cipher key length (in bytes)2278*/2279void br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,2280int is_client, int prf_id, int mac_id,2281const br_block_cbcenc_class *bc_impl, size_t cipher_key_len);22822283/*2284* Switch to GCM decryption for incoming records.2285* cc the engine context2286* is_client non-zero for a client, zero for a server2287* prf_id id of hash function for PRF2288* bc_impl block cipher implementation (CTR)2289* cipher_key_len block cipher key length (in bytes)2290*/2291void br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,2292int is_client, int prf_id,2293const br_block_ctr_class *bc_impl, size_t cipher_key_len);22942295/*2296* Switch to GCM encryption for outgoing records.2297* cc the engine context2298* is_client non-zero for a client, zero for a server2299* prf_id id of hash function for PRF2300* bc_impl block cipher implementation (CTR)2301* cipher_key_len block cipher key length (in bytes)2302*/2303void br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,2304int is_client, int prf_id,2305const br_block_ctr_class *bc_impl, size_t cipher_key_len);23062307/*2308* Switch to ChaCha20+Poly1305 decryption for incoming records.2309* cc the engine context2310* is_client non-zero for a client, zero for a server2311* prf_id id of hash function for PRF2312*/2313void br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,2314int is_client, int prf_id);23152316/*2317* Switch to ChaCha20+Poly1305 encryption for outgoing records.2318* cc the engine context2319* is_client non-zero for a client, zero for a server2320* prf_id id of hash function for PRF2321*/2322void br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,2323int is_client, int prf_id);23242325/*2326* Switch to CCM decryption for incoming records.2327* cc the engine context2328* is_client non-zero for a client, zero for a server2329* prf_id id of hash function for PRF2330* bc_impl block cipher implementation (CTR+CBC)2331* cipher_key_len block cipher key length (in bytes)2332* tag_len tag length (in bytes)2333*/2334void br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,2335int is_client, int prf_id,2336const br_block_ctrcbc_class *bc_impl,2337size_t cipher_key_len, size_t tag_len);23382339/*2340* Switch to GCM encryption for outgoing records.2341* cc the engine context2342* is_client non-zero for a client, zero for a server2343* prf_id id of hash function for PRF2344* bc_impl block cipher implementation (CTR+CBC)2345* cipher_key_len block cipher key length (in bytes)2346* tag_len tag length (in bytes)2347*/2348void br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,2349int is_client, int prf_id,2350const br_block_ctrcbc_class *bc_impl,2351size_t cipher_key_len, size_t tag_len);23522353/*2354* Calls to T0-generated code.2355*/2356void br_ssl_hs_client_init_main(void *ctx);2357void br_ssl_hs_client_run(void *ctx);2358void br_ssl_hs_server_init_main(void *ctx);2359void br_ssl_hs_server_run(void *ctx);23602361/*2362* Get the hash function to use for signatures, given a bit mask of2363* supported hash functions. This implements a strict choice order2364* (namely SHA-256, SHA-384, SHA-512, SHA-224, SHA-1). If the mask2365* does not document support of any of these hash functions, then this2366* functions returns 0.2367*/2368int br_ssl_choose_hash(unsigned bf);23692370/* ==================================================================== */23712372/*2373* PowerPC / POWER assembly stuff. The special BR_POWER_ASM_MACROS macro2374* must be defined before including this file; this is done by source2375* files that use some inline assembly for PowerPC / POWER machines.2376*/23772378#if BR_POWER_ASM_MACROS23792380#define lxvw4x(xt, ra, rb) lxvw4x_(xt, ra, rb)2381#define stxvw4x(xt, ra, rb) stxvw4x_(xt, ra, rb)23822383#define bdnz(foo) bdnz_(foo)2384#define bdz(foo) bdz_(foo)2385#define beq(foo) beq_(foo)23862387#define li(rx, value) li_(rx, value)2388#define addi(rx, ra, imm) addi_(rx, ra, imm)2389#define cmpldi(rx, imm) cmpldi_(rx, imm)2390#define mtctr(rx) mtctr_(rx)2391#define vspltb(vrt, vrb, uim) vspltb_(vrt, vrb, uim)2392#define vspltw(vrt, vrb, uim) vspltw_(vrt, vrb, uim)2393#define vspltisb(vrt, imm) vspltisb_(vrt, imm)2394#define vspltisw(vrt, imm) vspltisw_(vrt, imm)2395#define vrlw(vrt, vra, vrb) vrlw_(vrt, vra, vrb)2396#define vsbox(vrt, vra) vsbox_(vrt, vra)2397#define vxor(vrt, vra, vrb) vxor_(vrt, vra, vrb)2398#define vand(vrt, vra, vrb) vand_(vrt, vra, vrb)2399#define vsro(vrt, vra, vrb) vsro_(vrt, vra, vrb)2400#define vsl(vrt, vra, vrb) vsl_(vrt, vra, vrb)2401#define vsldoi(vt, va, vb, sh) vsldoi_(vt, va, vb, sh)2402#define vsr(vrt, vra, vrb) vsr_(vrt, vra, vrb)2403#define vaddcuw(vrt, vra, vrb) vaddcuw_(vrt, vra, vrb)2404#define vadduwm(vrt, vra, vrb) vadduwm_(vrt, vra, vrb)2405#define vsububm(vrt, vra, vrb) vsububm_(vrt, vra, vrb)2406#define vsubuwm(vrt, vra, vrb) vsubuwm_(vrt, vra, vrb)2407#define vsrw(vrt, vra, vrb) vsrw_(vrt, vra, vrb)2408#define vcipher(vt, va, vb) vcipher_(vt, va, vb)2409#define vcipherlast(vt, va, vb) vcipherlast_(vt, va, vb)2410#define vncipher(vt, va, vb) vncipher_(vt, va, vb)2411#define vncipherlast(vt, va, vb) vncipherlast_(vt, va, vb)2412#define vperm(vt, va, vb, vc) vperm_(vt, va, vb, vc)2413#define vpmsumd(vt, va, vb) vpmsumd_(vt, va, vb)2414#define xxpermdi(vt, va, vb, d) xxpermdi_(vt, va, vb, d)24152416#define lxvw4x_(xt, ra, rb) "\tlxvw4x\t" #xt "," #ra "," #rb "\n"2417#define stxvw4x_(xt, ra, rb) "\tstxvw4x\t" #xt "," #ra "," #rb "\n"24182419#define label(foo) #foo "%=:\n"2420#define bdnz_(foo) "\tbdnz\t" #foo "%=\n"2421#define bdz_(foo) "\tbdz\t" #foo "%=\n"2422#define beq_(foo) "\tbeq\t" #foo "%=\n"24232424#define li_(rx, value) "\tli\t" #rx "," #value "\n"2425#define addi_(rx, ra, imm) "\taddi\t" #rx "," #ra "," #imm "\n"2426#define cmpldi_(rx, imm) "\tcmpldi\t" #rx "," #imm "\n"2427#define mtctr_(rx) "\tmtctr\t" #rx "\n"2428#define vspltb_(vrt, vrb, uim) "\tvspltb\t" #vrt "," #vrb "," #uim "\n"2429#define vspltw_(vrt, vrb, uim) "\tvspltw\t" #vrt "," #vrb "," #uim "\n"2430#define vspltisb_(vrt, imm) "\tvspltisb\t" #vrt "," #imm "\n"2431#define vspltisw_(vrt, imm) "\tvspltisw\t" #vrt "," #imm "\n"2432#define vrlw_(vrt, vra, vrb) "\tvrlw\t" #vrt "," #vra "," #vrb "\n"2433#define vsbox_(vrt, vra) "\tvsbox\t" #vrt "," #vra "\n"2434#define vxor_(vrt, vra, vrb) "\tvxor\t" #vrt "," #vra "," #vrb "\n"2435#define vand_(vrt, vra, vrb) "\tvand\t" #vrt "," #vra "," #vrb "\n"2436#define vsro_(vrt, vra, vrb) "\tvsro\t" #vrt "," #vra "," #vrb "\n"2437#define vsl_(vrt, vra, vrb) "\tvsl\t" #vrt "," #vra "," #vrb "\n"2438#define vsldoi_(vt, va, vb, sh) "\tvsldoi\t" #vt "," #va "," #vb "," #sh "\n"2439#define vsr_(vrt, vra, vrb) "\tvsr\t" #vrt "," #vra "," #vrb "\n"2440#define vaddcuw_(vrt, vra, vrb) "\tvaddcuw\t" #vrt "," #vra "," #vrb "\n"2441#define vadduwm_(vrt, vra, vrb) "\tvadduwm\t" #vrt "," #vra "," #vrb "\n"2442#define vsububm_(vrt, vra, vrb) "\tvsububm\t" #vrt "," #vra "," #vrb "\n"2443#define vsubuwm_(vrt, vra, vrb) "\tvsubuwm\t" #vrt "," #vra "," #vrb "\n"2444#define vsrw_(vrt, vra, vrb) "\tvsrw\t" #vrt "," #vra "," #vrb "\n"2445#define vcipher_(vt, va, vb) "\tvcipher\t" #vt "," #va "," #vb "\n"2446#define vcipherlast_(vt, va, vb) "\tvcipherlast\t" #vt "," #va "," #vb "\n"2447#define vncipher_(vt, va, vb) "\tvncipher\t" #vt "," #va "," #vb "\n"2448#define vncipherlast_(vt, va, vb) "\tvncipherlast\t" #vt "," #va "," #vb "\n"2449#define vperm_(vt, va, vb, vc) "\tvperm\t" #vt "," #va "," #vb "," #vc "\n"2450#define vpmsumd_(vt, va, vb) "\tvpmsumd\t" #vt "," #va "," #vb "\n"2451#define xxpermdi_(vt, va, vb, d) "\txxpermdi\t" #vt "," #va "," #vb "," #d "\n"24522453#endif24542455/* ==================================================================== */2456/*2457* Special "activate intrinsics" code, needed for some compiler versions.2458* This is defined at the end of this file, so that it won't impact any2459* of the inline functions defined previously; and it is controlled by2460* a specific macro defined in the caller code.2461*2462* Calling code conventions:2463*2464* - Caller must define BR_ENABLE_INTRINSICS before including "inner.h".2465* - Functions that use intrinsics must be enclosed in an "enabled"2466* region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN).2467* - Functions that use intrinsics must be tagged with the appropriate2468* BR_TARGET().2469*/24702471#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)24722473/*2474* x86 intrinsics (both 32-bit and 64-bit).2475*/2476#if BR_i386 || BR_amd6424772478/*2479* On GCC before version 5.0, we need to use the pragma to enable the2480* target options globally, because the 'target' function attribute2481* appears to be unreliable. Before 4.6 we must also avoid the2482* push_options / pop_options mechanism, because it tends to trigger2483* some internal compiler errors.2484*/2485#if BR_GCC && !BR_GCC_5_02486#if BR_GCC_4_62487#define BR_TARGETS_X86_UP \2488_Pragma("GCC push_options") \2489_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")")2490#define BR_TARGETS_X86_DOWN \2491_Pragma("GCC pop_options")2492#else2493#define BR_TARGETS_X86_UP \2494_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")")2495#define BR_TARGETS_X86_DOWN2496#endif2497#pragma GCC diagnostic ignored "-Wpsabi"2498#endif24992500#if BR_CLANG && !BR_CLANG_3_82501#undef __SSE2__2502#undef __SSE3__2503#undef __SSSE3__2504#undef __SSE4_1__2505#undef __AES__2506#undef __PCLMUL__2507#undef __RDRND__2508#define __SSE2__ 12509#define __SSE3__ 12510#define __SSSE3__ 12511#define __SSE4_1__ 12512#define __AES__ 12513#define __PCLMUL__ 12514#define __RDRND__ 12515#endif25162517#ifndef BR_TARGETS_X86_UP2518#define BR_TARGETS_X86_UP2519#endif2520#ifndef BR_TARGETS_X86_DOWN2521#define BR_TARGETS_X86_DOWN2522#endif25232524#if BR_GCC || BR_CLANG2525BR_TARGETS_X86_UP2526#include <x86intrin.h>2527#include <cpuid.h>2528#define br_bswap32 __builtin_bswap322529BR_TARGETS_X86_DOWN2530#endif25312532#if BR_MSC2533#include <stdlib.h>2534#include <intrin.h>2535#include <immintrin.h>2536#define br_bswap32 _byteswap_ulong2537#endif25382539static inline int2540br_cpuid(uint32_t mask_eax, uint32_t mask_ebx,2541uint32_t mask_ecx, uint32_t mask_edx)2542{2543#if BR_GCC || BR_CLANG2544unsigned eax, ebx, ecx, edx;25452546if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {2547if ((eax & mask_eax) == mask_eax2548&& (ebx & mask_ebx) == mask_ebx2549&& (ecx & mask_ecx) == mask_ecx2550&& (edx & mask_edx) == mask_edx)2551{2552return 1;2553}2554}2555#elif BR_MSC2556int info[4];25572558__cpuid(info, 1);2559if (((uint32_t)info[0] & mask_eax) == mask_eax2560&& ((uint32_t)info[1] & mask_ebx) == mask_ebx2561&& ((uint32_t)info[2] & mask_ecx) == mask_ecx2562&& ((uint32_t)info[3] & mask_edx) == mask_edx)2563{2564return 1;2565}2566#endif2567return 0;2568}25692570#endif25712572#endif25732574/* ==================================================================== */25752576#endif257725782579