Path: blob/main/contrib/bearssl/src/rsa/rsa_i15_keygen.c
39483 views
/*1* Copyright (c) 2018 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "inner.h"2526/*27* Make a random integer of the provided size. The size is encoded.28* The header word is untouched.29*/30static void31mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize)32{33size_t u, len;34unsigned m;3536len = (esize + 15) >> 4;37(*rng)->generate(rng, x + 1, len * sizeof(uint16_t));38for (u = 1; u < len; u ++) {39x[u] &= 0x7FFF;40}41m = esize & 15;42if (m == 0) {43x[len] &= 0x7FFF;44} else {45x[len] &= 0x7FFF >> (15 - m);46}47}4849/*50* This is the big-endian unsigned representation of the product of51* all small primes from 13 to 1481.52*/53static const unsigned char SMALL_PRIMES[] = {540x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,550x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,560x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,570xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,580x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,590x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,600x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,610xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,620xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,630xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,640x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,650x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,660x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,670x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,680xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,690x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,700xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,710xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,720x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,730xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,740x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,750x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,760x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,770x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,780x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,790xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD80};8182/*83* We need temporary values for at least 7 integers of the same size84* as a factor (including header word); more space helps with performance85* (in modular exponentiations), but we much prefer to remain under86* 2 kilobytes in total, to save stack space. The macro TEMPS below87* exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE88* is greater than 4350 (default value is 4096, so the 2-kB limit is89* maintained unless BR_MAX_RSA_SIZE was modified).90*/91#define MAX(x, y) ((x) > (y) ? (x) : (y))92#define TEMPS MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15))9394/*95* Perform trial division on a candidate prime. This computes96* y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The97* br_i15_moddiv() function will report an error if y is not invertible98* modulo x. Returned value is 1 on success (none of the small primes99* divides x), 0 on error (a non-trivial GCD is obtained).100*101* This function assumes that x is odd.102*/103static uint32_t104trial_divisions(const uint16_t *x, uint16_t *t)105{106uint16_t *y;107uint16_t x0i;108109y = t;110t += 1 + ((x[0] + 15) >> 4);111x0i = br_i15_ninv15(x[1]);112br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);113return br_i15_moddiv(y, y, x, x0i, t);114}115116/*117* Perform n rounds of Miller-Rabin on the candidate prime x. This118* function assumes that x = 3 mod 4.119*120* Returned value is 1 on success (all rounds completed successfully),121* 0 otherwise.122*/123static uint32_t124miller_rabin(const br_prng_class **rng, const uint16_t *x, int n,125uint16_t *t, size_t tlen)126{127/*128* Since x = 3 mod 4, the Miller-Rabin test is simple:129* - get a random base a (such that 1 < a < x-1)130* - compute z = a^((x-1)/2) mod x131* - if z != 1 and z != x-1, the number x is composite132*133* We generate bases 'a' randomly with a size which is134* one bit less than x, which ensures that a < x-1. It135* is not useful to verify that a > 1 because the probability136* that we get a value a equal to 0 or 1 is much smaller137* than the probability of our Miller-Rabin tests not to138* detect a composite, which is already quite smaller than the139* probability of the hardware misbehaving and return a140* composite integer because of some glitch (e.g. bad RAM141* or ill-timed cosmic ray).142*/143unsigned char *xm1d2;144size_t xlen, xm1d2_len, xm1d2_len_u16, u;145uint32_t asize;146unsigned cc;147uint16_t x0i;148149/*150* Compute (x-1)/2 (encoded).151*/152xm1d2 = (unsigned char *)t;153xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3;154br_i15_encode(xm1d2, xm1d2_len, x);155cc = 0;156for (u = 0; u < xm1d2_len; u ++) {157unsigned w;158159w = xm1d2[u];160xm1d2[u] = (unsigned char)((w >> 1) | cc);161cc = w << 7;162}163164/*165* We used some words of the provided buffer for (x-1)/2.166*/167xm1d2_len_u16 = (xm1d2_len + 1) >> 1;168t += xm1d2_len_u16;169tlen -= xm1d2_len_u16;170171xlen = (x[0] + 15) >> 4;172asize = x[0] - 1 - EQ0(x[0] & 15);173x0i = br_i15_ninv15(x[1]);174while (n -- > 0) {175uint16_t *a;176uint32_t eq1, eqm1;177178/*179* Generate a random base. We don't need the base to be180* really uniform modulo x, so we just get a random181* number which is one bit shorter than x.182*/183a = t;184a[0] = x[0];185a[xlen] = 0;186mkrand(rng, a, asize);187188/*189* Compute a^((x-1)/2) mod x. We assume here that the190* function will not fail (the temporary array is large191* enough).192*/193br_i15_modpow_opt(a, xm1d2, xm1d2_len,194x, x0i, t + 1 + xlen, tlen - 1 - xlen);195196/*197* We must obtain either 1 or x-1. Note that x is odd,198* hence x-1 differs from x only in its low word (no199* carry).200*/201eq1 = a[1] ^ 1;202eqm1 = a[1] ^ (x[1] - 1);203for (u = 2; u <= xlen; u ++) {204eq1 |= a[u];205eqm1 |= a[u] ^ x[u];206}207208if ((EQ0(eq1) | EQ0(eqm1)) == 0) {209return 0;210}211}212return 1;213}214215/*216* Create a random prime of the provided size. 'size' is the _encoded_217* bit length. The two top bits and the two bottom bits are set to 1.218*/219static void220mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize,221uint32_t pubexp, uint16_t *t, size_t tlen)222{223size_t len;224225x[0] = esize;226len = (esize + 15) >> 4;227for (;;) {228size_t u;229uint32_t m3, m5, m7, m11;230int rounds;231232/*233* Generate random bits. We force the two top bits and the234* two bottom bits to 1.235*/236mkrand(rng, x, esize);237if ((esize & 15) == 0) {238x[len] |= 0x6000;239} else if ((esize & 15) == 1) {240x[len] |= 0x0001;241x[len - 1] |= 0x4000;242} else {243x[len] |= 0x0003 << ((esize & 15) - 2);244}245x[1] |= 0x0003;246247/*248* Trial division with low primes (3, 5, 7 and 11). We249* use the following properties:250*251* 2^2 = 1 mod 3252* 2^4 = 1 mod 5253* 2^3 = 1 mod 7254* 2^10 = 1 mod 11255*/256m3 = 0;257m5 = 0;258m7 = 0;259m11 = 0;260for (u = 0; u < len; u ++) {261uint32_t w;262263w = x[1 + u];264m3 += w << (u & 1);265m3 = (m3 & 0xFF) + (m3 >> 8);266m5 += w << ((4 - u) & 3);267m5 = (m5 & 0xFF) + (m5 >> 8);268m7 += w;269m7 = (m7 & 0x1FF) + (m7 >> 9);270m11 += w << (5 & -(u & 1));271m11 = (m11 & 0x3FF) + (m11 >> 10);272}273274/*275* Maximum values of m* at this point:276* m3: 511277* m5: 2310278* m7: 510279* m11: 2047280* We use the same properties to make further reductions.281*/282283m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 46 */284m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 16 */285m3 = ((m3 * 43) >> 5) & 3;286287m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 263 */288m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 30 */289m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 15 */290m5 -= 10 & -GT(m5, 9);291m5 -= 5 & -GT(m5, 4);292293m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 69 */294m7 = (m7 & 7) + (m7 >> 3); /* max: 14 */295m7 = ((m7 * 147) >> 7) & 7;296297/*298* 2^5 = 32 = -1 mod 11.299*/300m11 = (m11 & 0x1F) + 66 - (m11 >> 5); /* max: 97 */301m11 -= 88 & -GT(m11, 87);302m11 -= 44 & -GT(m11, 43);303m11 -= 22 & -GT(m11, 21);304m11 -= 11 & -GT(m11, 10);305306/*307* If any of these modulo is 0, then the candidate is308* not prime. Also, if pubexp is 3, 5, 7 or 11, and the309* corresponding modulus is 1, then the candidate must310* be rejected, because we need e to be invertible311* modulo p-1. We can use simple comparisons here312* because they won't leak information on a candidate313* that we keep, only on one that we reject (and is thus314* not secret).315*/316if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {317continue;318}319if ((pubexp == 3 && m3 == 1)320|| (pubexp == 5 && m5 == 1)321|| (pubexp == 7 && m7 == 1)322|| (pubexp == 11 && m11 == 1))323{324continue;325}326327/*328* More trial divisions.329*/330if (!trial_divisions(x, t)) {331continue;332}333334/*335* Miller-Rabin algorithm. Since we selected a random336* integer, not a maliciously crafted integer, we can use337* relatively few rounds to lower the risk of a false338* positive (i.e. declaring prime a non-prime) under339* 2^(-80). It is not useful to lower the probability much340* below that, since that would be substantially below341* the probability of the hardware misbehaving. Sufficient342* numbers of rounds are extracted from the Handbook of343* Applied Cryptography, note 4.49 (page 149).344*345* Since we work on the encoded size (esize), we need to346* compare with encoded thresholds.347*/348if (esize < 320) {349rounds = 12;350} else if (esize < 480) {351rounds = 9;352} else if (esize < 693) {353rounds = 6;354} else if (esize < 906) {355rounds = 4;356} else if (esize < 1386) {357rounds = 3;358} else {359rounds = 2;360}361362if (miller_rabin(rng, x, rounds, t, tlen)) {363return;364}365}366}367368/*369* Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided370* as parameter (with announced bit length equal to that of p). This371* function computes d = 1/e mod p-1 (for an odd integer e). Returned372* value is 1 on success, 0 on error (an error is reported if e is not373* invertible modulo p-1).374*375* The temporary buffer (t) must have room for at least 4 integers of376* the size of p.377*/378static uint32_t379invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t)380{381uint16_t *f;382uint32_t r;383384f = t;385t += 1 + ((m[0] + 15) >> 4);386387/*388* Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.389*/390br_i15_zero(d, m[0]);391d[1] = 1;392br_i15_zero(f, m[0]);393f[1] = e & 0x7FFF;394f[2] = (e >> 15) & 0x7FFF;395f[3] = e >> 30;396r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t);397398/*399* We really want d = 1/e mod p-1, with p = 2m. By the CRT,400* the result is either the d we got, or d + m.401*402* Let's write e*d = 1 + k*m, for some integer k. Integers e403* and m are odd. If d is odd, then e*d is odd, which implies404* that k must be even; in that case, e*d = 1 + (k/2)*2m, and405* thus d is already fine. Conversely, if d is even, then k406* is odd, and we must add m to d in order to get the correct407* result.408*/409br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1)));410411return r;412}413414/*415* Swap two buffers in RAM. They must be disjoint.416*/417static void418bufswap(void *b1, void *b2, size_t len)419{420size_t u;421unsigned char *buf1, *buf2;422423buf1 = b1;424buf2 = b2;425for (u = 0; u < len; u ++) {426unsigned w;427428w = buf1[u];429buf1[u] = buf2[u];430buf2[u] = w;431}432}433434/* see bearssl_rsa.h */435uint32_t436br_rsa_i15_keygen(const br_prng_class **rng,437br_rsa_private_key *sk, void *kbuf_priv,438br_rsa_public_key *pk, void *kbuf_pub,439unsigned size, uint32_t pubexp)440{441uint32_t esize_p, esize_q;442size_t plen, qlen, tlen;443uint16_t *p, *q, *t;444uint16_t tmp[TEMPS];445uint32_t r;446447if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {448return 0;449}450if (pubexp == 0) {451pubexp = 3;452} else if (pubexp == 1 || (pubexp & 1) == 0) {453return 0;454}455456esize_p = (size + 1) >> 1;457esize_q = size - esize_p;458sk->n_bitlen = size;459sk->p = kbuf_priv;460sk->plen = (esize_p + 7) >> 3;461sk->q = sk->p + sk->plen;462sk->qlen = (esize_q + 7) >> 3;463sk->dp = sk->q + sk->qlen;464sk->dplen = sk->plen;465sk->dq = sk->dp + sk->dplen;466sk->dqlen = sk->qlen;467sk->iq = sk->dq + sk->dqlen;468sk->iqlen = sk->plen;469470if (pk != NULL) {471pk->n = kbuf_pub;472pk->nlen = (size + 7) >> 3;473pk->e = pk->n + pk->nlen;474pk->elen = 4;475br_enc32be(pk->e, pubexp);476while (*pk->e == 0) {477pk->e ++;478pk->elen --;479}480}481482/*483* We now switch to encoded sizes.484*485* floor((x * 17477) / (2^18)) is equal to floor(x/15) for all486* integers x from 0 to 23833.487*/488esize_p += MUL15(esize_p, 17477) >> 18;489esize_q += MUL15(esize_q, 17477) >> 18;490plen = (esize_p + 15) >> 4;491qlen = (esize_q + 15) >> 4;492p = tmp;493q = p + 1 + plen;494t = q + 1 + qlen;495tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen);496497/*498* When looking for primes p and q, we temporarily divide499* candidates by 2, in order to compute the inverse of the500* public exponent.501*/502503for (;;) {504mkprime(rng, p, esize_p, pubexp, t, tlen);505br_i15_rshift(p, 1);506if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {507br_i15_add(p, p, 1);508p[1] |= 1;509br_i15_encode(sk->p, sk->plen, p);510br_i15_encode(sk->dp, sk->dplen, t);511break;512}513}514515for (;;) {516mkprime(rng, q, esize_q, pubexp, t, tlen);517br_i15_rshift(q, 1);518if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {519br_i15_add(q, q, 1);520q[1] |= 1;521br_i15_encode(sk->q, sk->qlen, q);522br_i15_encode(sk->dq, sk->dqlen, t);523break;524}525}526527/*528* If p and q have the same size, then it is possible that q > p529* (when the target modulus size is odd, we generate p with a530* greater bit length than q). If q > p, we want to swap p and q531* (and also dp and dq) for two reasons:532* - The final step below (inversion of q modulo p) is easier if533* p > q.534* - While BearSSL's RSA code is perfectly happy with RSA keys such535* that p < q, some other implementations have restrictions and536* require p > q.537*538* Note that we can do a simple non-constant-time swap here,539* because the only information we leak here is that we insist on540* returning p and q such that p > q, which is not a secret.541*/542if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) {543bufswap(p, q, (1 + plen) * sizeof *p);544bufswap(sk->p, sk->q, sk->plen);545bufswap(sk->dp, sk->dq, sk->dplen);546}547548/*549* We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.550*551* We ensured that p >= q, so this is just a matter of updating the552* header word for q (and possibly adding an extra word).553*554* Theoretically, the call below may fail, in case we were555* extraordinarily unlucky, and p = q. Another failure case is if556* Miller-Rabin failed us _twice_, and p and q are non-prime and557* have a factor is common. We report the error mostly because it558* is cheap and we can, but in practice this never happens (or, at559* least, it happens way less often than hardware glitches).560*/561q[0] = p[0];562if (plen > qlen) {563q[plen] = 0;564t ++;565tlen --;566}567br_i15_zero(t, p[0]);568t[1] = 1;569r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen);570br_i15_encode(sk->iq, sk->iqlen, t);571572/*573* Compute the public modulus too, if required.574*/575if (pk != NULL) {576br_i15_zero(t, p[0]);577br_i15_mulacc(t, p, q);578br_i15_encode(pk->n, pk->nlen, t);579}580581return r;582}583584585