Path: blob/main/contrib/bearssl/src/rsa/rsa_i31_keygen_inner.c
39482 views
/*1* Copyright (c) 2018 Thomas Pornin <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining4* a copy of this software and associated documentation files (the5* "Software"), to deal in the Software without restriction, including6* without limitation the rights to use, copy, modify, merge, publish,7* distribute, sublicense, and/or sell copies of the Software, and to8* permit persons to whom the Software is furnished to do so, subject to9* the following conditions:10*11* The above copyright notice and this permission notice shall be12* included in all copies or substantial portions of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,15* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF16* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND17* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS18* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN19* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN20* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "inner.h"2526/*27* Make a random integer of the provided size. The size is encoded.28* The header word is untouched.29*/30static void31mkrand(const br_prng_class **rng, uint32_t *x, uint32_t esize)32{33size_t u, len;34unsigned m;3536len = (esize + 31) >> 5;37(*rng)->generate(rng, x + 1, len * sizeof(uint32_t));38for (u = 1; u < len; u ++) {39x[u] &= 0x7FFFFFFF;40}41m = esize & 31;42if (m == 0) {43x[len] &= 0x7FFFFFFF;44} else {45x[len] &= 0x7FFFFFFF >> (31 - m);46}47}4849/*50* This is the big-endian unsigned representation of the product of51* all small primes from 13 to 1481.52*/53static const unsigned char SMALL_PRIMES[] = {540x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,550x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,560x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,570xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,580x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,590x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,600x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,610xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,620xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,630xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,640x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,650x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,660x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,670x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,680xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,690x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,700xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,710xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,720x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,730xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,740x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,750x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,760x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,770x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,780x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,790xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD80};8182/*83* We need temporary values for at least 7 integers of the same size84* as a factor (including header word); more space helps with performance85* (in modular exponentiations), but we much prefer to remain under86* 2 kilobytes in total, to save stack space. The macro TEMPS below87* exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE88* is greater than 4464 (default value is 4096, so the 2-kB limit is89* maintained unless BR_MAX_RSA_SIZE was modified).90*/91#define MAX(x, y) ((x) > (y) ? (x) : (y))92#define ROUND2(x) ((((x) + 1) >> 1) << 1)9394#define TEMPS MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31)))9596/*97* Perform trial division on a candidate prime. This computes98* y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The99* br_i31_moddiv() function will report an error if y is not invertible100* modulo x. Returned value is 1 on success (none of the small primes101* divides x), 0 on error (a non-trivial GCD is obtained).102*103* This function assumes that x is odd.104*/105static uint32_t106trial_divisions(const uint32_t *x, uint32_t *t)107{108uint32_t *y;109uint32_t x0i;110111y = t;112t += 1 + ((x[0] + 31) >> 5);113x0i = br_i31_ninv31(x[1]);114br_i31_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);115return br_i31_moddiv(y, y, x, x0i, t);116}117118/*119* Perform n rounds of Miller-Rabin on the candidate prime x. This120* function assumes that x = 3 mod 4.121*122* Returned value is 1 on success (all rounds completed successfully),123* 0 otherwise.124*/125static uint32_t126miller_rabin(const br_prng_class **rng, const uint32_t *x, int n,127uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)128{129/*130* Since x = 3 mod 4, the Miller-Rabin test is simple:131* - get a random base a (such that 1 < a < x-1)132* - compute z = a^((x-1)/2) mod x133* - if z != 1 and z != x-1, the number x is composite134*135* We generate bases 'a' randomly with a size which is136* one bit less than x, which ensures that a < x-1. It137* is not useful to verify that a > 1 because the probability138* that we get a value a equal to 0 or 1 is much smaller139* than the probability of our Miller-Rabin tests not to140* detect a composite, which is already quite smaller than the141* probability of the hardware misbehaving and return a142* composite integer because of some glitch (e.g. bad RAM143* or ill-timed cosmic ray).144*/145unsigned char *xm1d2;146size_t xlen, xm1d2_len, xm1d2_len_u32, u;147uint32_t asize;148unsigned cc;149uint32_t x0i;150151/*152* Compute (x-1)/2 (encoded).153*/154xm1d2 = (unsigned char *)t;155xm1d2_len = ((x[0] - (x[0] >> 5)) + 7) >> 3;156br_i31_encode(xm1d2, xm1d2_len, x);157cc = 0;158for (u = 0; u < xm1d2_len; u ++) {159unsigned w;160161w = xm1d2[u];162xm1d2[u] = (unsigned char)((w >> 1) | cc);163cc = w << 7;164}165166/*167* We used some words of the provided buffer for (x-1)/2.168*/169xm1d2_len_u32 = (xm1d2_len + 3) >> 2;170t += xm1d2_len_u32;171tlen -= xm1d2_len_u32;172173xlen = (x[0] + 31) >> 5;174asize = x[0] - 1 - EQ0(x[0] & 31);175x0i = br_i31_ninv31(x[1]);176while (n -- > 0) {177uint32_t *a, *t2;178uint32_t eq1, eqm1;179size_t t2len;180181/*182* Generate a random base. We don't need the base to be183* really uniform modulo x, so we just get a random184* number which is one bit shorter than x.185*/186a = t;187a[0] = x[0];188a[xlen] = 0;189mkrand(rng, a, asize);190191/*192* Compute a^((x-1)/2) mod x. We assume here that the193* function will not fail (the temporary array is large194* enough).195*/196t2 = t + 1 + xlen;197t2len = tlen - 1 - xlen;198if ((t2len & 1) != 0) {199/*200* Since the source array is 64-bit aligned and201* has an even number of elements (TEMPS), we202* can use the parity of the remaining length to203* detect and adjust alignment.204*/205t2 ++;206t2len --;207}208mp31(a, xm1d2, xm1d2_len, x, x0i, t2, t2len);209210/*211* We must obtain either 1 or x-1. Note that x is odd,212* hence x-1 differs from x only in its low word (no213* carry).214*/215eq1 = a[1] ^ 1;216eqm1 = a[1] ^ (x[1] - 1);217for (u = 2; u <= xlen; u ++) {218eq1 |= a[u];219eqm1 |= a[u] ^ x[u];220}221222if ((EQ0(eq1) | EQ0(eqm1)) == 0) {223return 0;224}225}226return 1;227}228229/*230* Create a random prime of the provided size. 'size' is the _encoded_231* bit length. The two top bits and the two bottom bits are set to 1.232*/233static void234mkprime(const br_prng_class **rng, uint32_t *x, uint32_t esize,235uint32_t pubexp, uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)236{237size_t len;238239x[0] = esize;240len = (esize + 31) >> 5;241for (;;) {242size_t u;243uint32_t m3, m5, m7, m11;244int rounds, s7, s11;245246/*247* Generate random bits. We force the two top bits and the248* two bottom bits to 1.249*/250mkrand(rng, x, esize);251if ((esize & 31) == 0) {252x[len] |= 0x60000000;253} else if ((esize & 31) == 1) {254x[len] |= 0x00000001;255x[len - 1] |= 0x40000000;256} else {257x[len] |= 0x00000003 << ((esize & 31) - 2);258}259x[1] |= 0x00000003;260261/*262* Trial division with low primes (3, 5, 7 and 11). We263* use the following properties:264*265* 2^2 = 1 mod 3266* 2^4 = 1 mod 5267* 2^3 = 1 mod 7268* 2^10 = 1 mod 11269*/270m3 = 0;271m5 = 0;272m7 = 0;273m11 = 0;274s7 = 0;275s11 = 0;276for (u = 0; u < len; u ++) {277uint32_t w, w3, w5, w7, w11;278279w = x[1 + u];280w3 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */281w5 = (w & 0xFFFF) + (w >> 16); /* max: 98302 */282w7 = (w & 0x7FFF) + (w >> 15); /* max: 98302 */283w11 = (w & 0xFFFFF) + (w >> 20); /* max: 1050622 */284285m3 += w3 << (u & 1);286m3 = (m3 & 0xFF) + (m3 >> 8); /* max: 1025 */287288m5 += w5 << ((4 - u) & 3);289m5 = (m5 & 0xFFF) + (m5 >> 12); /* max: 4479 */290291m7 += w7 << s7;292m7 = (m7 & 0x1FF) + (m7 >> 9); /* max: 1280 */293if (++ s7 == 3) {294s7 = 0;295}296297m11 += w11 << s11;298if (++ s11 == 10) {299s11 = 0;300}301m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 526847 */302}303304m3 = (m3 & 0x3F) + (m3 >> 6); /* max: 78 */305m3 = (m3 & 0x0F) + (m3 >> 4); /* max: 18 */306m3 = ((m3 * 43) >> 5) & 3;307308m5 = (m5 & 0xFF) + (m5 >> 8); /* max: 271 */309m5 = (m5 & 0x0F) + (m5 >> 4); /* max: 31 */310m5 -= 20 & -GT(m5, 19);311m5 -= 10 & -GT(m5, 9);312m5 -= 5 & -GT(m5, 4);313314m7 = (m7 & 0x3F) + (m7 >> 6); /* max: 82 */315m7 = (m7 & 0x07) + (m7 >> 3); /* max: 16 */316m7 = ((m7 * 147) >> 7) & 7;317318/*319* 2^5 = 32 = -1 mod 11.320*/321m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1536 */322m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 1023 */323m11 = (m11 & 0x1F) + 33 - (m11 >> 5); /* max: 64 */324m11 -= 44 & -GT(m11, 43);325m11 -= 22 & -GT(m11, 21);326m11 -= 11 & -GT(m11, 10);327328/*329* If any of these modulo is 0, then the candidate is330* not prime. Also, if pubexp is 3, 5, 7 or 11, and the331* corresponding modulus is 1, then the candidate must332* be rejected, because we need e to be invertible333* modulo p-1. We can use simple comparisons here334* because they won't leak information on a candidate335* that we keep, only on one that we reject (and is thus336* not secret).337*/338if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {339continue;340}341if ((pubexp == 3 && m3 == 1)342|| (pubexp == 5 && m5 == 1)343|| (pubexp == 7 && m7 == 1)344|| (pubexp == 11 && m11 == 1))345{346continue;347}348349/*350* More trial divisions.351*/352if (!trial_divisions(x, t)) {353continue;354}355356/*357* Miller-Rabin algorithm. Since we selected a random358* integer, not a maliciously crafted integer, we can use359* relatively few rounds to lower the risk of a false360* positive (i.e. declaring prime a non-prime) under361* 2^(-80). It is not useful to lower the probability much362* below that, since that would be substantially below363* the probability of the hardware misbehaving. Sufficient364* numbers of rounds are extracted from the Handbook of365* Applied Cryptography, note 4.49 (page 149).366*367* Since we work on the encoded size (esize), we need to368* compare with encoded thresholds.369*/370if (esize < 309) {371rounds = 12;372} else if (esize < 464) {373rounds = 9;374} else if (esize < 670) {375rounds = 6;376} else if (esize < 877) {377rounds = 4;378} else if (esize < 1341) {379rounds = 3;380} else {381rounds = 2;382}383384if (miller_rabin(rng, x, rounds, t, tlen, mp31)) {385return;386}387}388}389390/*391* Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided392* as parameter (with announced bit length equal to that of p). This393* function computes d = 1/e mod p-1 (for an odd integer e). Returned394* value is 1 on success, 0 on error (an error is reported if e is not395* invertible modulo p-1).396*397* The temporary buffer (t) must have room for at least 4 integers of398* the size of p.399*/400static uint32_t401invert_pubexp(uint32_t *d, const uint32_t *m, uint32_t e, uint32_t *t)402{403uint32_t *f;404uint32_t r;405406f = t;407t += 1 + ((m[0] + 31) >> 5);408409/*410* Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.411*/412br_i31_zero(d, m[0]);413d[1] = 1;414br_i31_zero(f, m[0]);415f[1] = e & 0x7FFFFFFF;416f[2] = e >> 31;417r = br_i31_moddiv(d, f, m, br_i31_ninv31(m[1]), t);418419/*420* We really want d = 1/e mod p-1, with p = 2m. By the CRT,421* the result is either the d we got, or d + m.422*423* Let's write e*d = 1 + k*m, for some integer k. Integers e424* and m are odd. If d is odd, then e*d is odd, which implies425* that k must be even; in that case, e*d = 1 + (k/2)*2m, and426* thus d is already fine. Conversely, if d is even, then k427* is odd, and we must add m to d in order to get the correct428* result.429*/430br_i31_add(d, m, (uint32_t)(1 - (d[1] & 1)));431432return r;433}434435/*436* Swap two buffers in RAM. They must be disjoint.437*/438static void439bufswap(void *b1, void *b2, size_t len)440{441size_t u;442unsigned char *buf1, *buf2;443444buf1 = b1;445buf2 = b2;446for (u = 0; u < len; u ++) {447unsigned w;448449w = buf1[u];450buf1[u] = buf2[u];451buf2[u] = w;452}453}454455/* see inner.h */456uint32_t457br_rsa_i31_keygen_inner(const br_prng_class **rng,458br_rsa_private_key *sk, void *kbuf_priv,459br_rsa_public_key *pk, void *kbuf_pub,460unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31)461{462uint32_t esize_p, esize_q;463size_t plen, qlen, tlen;464uint32_t *p, *q, *t;465union {466uint32_t t32[TEMPS];467uint64_t t64[TEMPS >> 1]; /* for 64-bit alignment */468} tmp;469uint32_t r;470471if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {472return 0;473}474if (pubexp == 0) {475pubexp = 3;476} else if (pubexp == 1 || (pubexp & 1) == 0) {477return 0;478}479480esize_p = (size + 1) >> 1;481esize_q = size - esize_p;482sk->n_bitlen = size;483sk->p = kbuf_priv;484sk->plen = (esize_p + 7) >> 3;485sk->q = sk->p + sk->plen;486sk->qlen = (esize_q + 7) >> 3;487sk->dp = sk->q + sk->qlen;488sk->dplen = sk->plen;489sk->dq = sk->dp + sk->dplen;490sk->dqlen = sk->qlen;491sk->iq = sk->dq + sk->dqlen;492sk->iqlen = sk->plen;493494if (pk != NULL) {495pk->n = kbuf_pub;496pk->nlen = (size + 7) >> 3;497pk->e = pk->n + pk->nlen;498pk->elen = 4;499br_enc32be(pk->e, pubexp);500while (*pk->e == 0) {501pk->e ++;502pk->elen --;503}504}505506/*507* We now switch to encoded sizes.508*509* floor((x * 16913) / (2^19)) is equal to floor(x/31) for all510* integers x from 0 to 34966; the intermediate product fits on511* 30 bits, thus we can use MUL31().512*/513esize_p += MUL31(esize_p, 16913) >> 19;514esize_q += MUL31(esize_q, 16913) >> 19;515plen = (esize_p + 31) >> 5;516qlen = (esize_q + 31) >> 5;517p = tmp.t32;518q = p + 1 + plen;519t = q + 1 + qlen;520tlen = ((sizeof tmp.t32) / sizeof(uint32_t)) - (2 + plen + qlen);521522/*523* When looking for primes p and q, we temporarily divide524* candidates by 2, in order to compute the inverse of the525* public exponent.526*/527528for (;;) {529mkprime(rng, p, esize_p, pubexp, t, tlen, mp31);530br_i31_rshift(p, 1);531if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {532br_i31_add(p, p, 1);533p[1] |= 1;534br_i31_encode(sk->p, sk->plen, p);535br_i31_encode(sk->dp, sk->dplen, t);536break;537}538}539540for (;;) {541mkprime(rng, q, esize_q, pubexp, t, tlen, mp31);542br_i31_rshift(q, 1);543if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {544br_i31_add(q, q, 1);545q[1] |= 1;546br_i31_encode(sk->q, sk->qlen, q);547br_i31_encode(sk->dq, sk->dqlen, t);548break;549}550}551552/*553* If p and q have the same size, then it is possible that q > p554* (when the target modulus size is odd, we generate p with a555* greater bit length than q). If q > p, we want to swap p and q556* (and also dp and dq) for two reasons:557* - The final step below (inversion of q modulo p) is easier if558* p > q.559* - While BearSSL's RSA code is perfectly happy with RSA keys such560* that p < q, some other implementations have restrictions and561* require p > q.562*563* Note that we can do a simple non-constant-time swap here,564* because the only information we leak here is that we insist on565* returning p and q such that p > q, which is not a secret.566*/567if (esize_p == esize_q && br_i31_sub(p, q, 0) == 1) {568bufswap(p, q, (1 + plen) * sizeof *p);569bufswap(sk->p, sk->q, sk->plen);570bufswap(sk->dp, sk->dq, sk->dplen);571}572573/*574* We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.575*576* We ensured that p >= q, so this is just a matter of updating the577* header word for q (and possibly adding an extra word).578*579* Theoretically, the call below may fail, in case we were580* extraordinarily unlucky, and p = q. Another failure case is if581* Miller-Rabin failed us _twice_, and p and q are non-prime and582* have a factor is common. We report the error mostly because it583* is cheap and we can, but in practice this never happens (or, at584* least, it happens way less often than hardware glitches).585*/586q[0] = p[0];587if (plen > qlen) {588q[plen] = 0;589t ++;590tlen --;591}592br_i31_zero(t, p[0]);593t[1] = 1;594r = br_i31_moddiv(t, q, p, br_i31_ninv31(p[1]), t + 1 + plen);595br_i31_encode(sk->iq, sk->iqlen, t);596597/*598* Compute the public modulus too, if required.599*/600if (pk != NULL) {601br_i31_zero(t, p[0]);602br_i31_mulacc(t, p, q);603br_i31_encode(pk->n, pk->nlen, t);604}605606return r;607}608609610