Path: blob/a-new-beginning/SharedDependencies/Sources/cryptopp/donna_64.cpp
2 views
// donna_64.cpp - written and placed in public domain by Jeffrey Walton1// Crypto++ specific implementation wrapped around Andrew2// Moon's public domain curve25519-donna and ed25519-donna,3// https://github.com/floodyberry/curve25519-donna and4// https://github.com/floodyberry/ed25519-donna.56// The curve25519 and ed25519 source files multiplex different repos and7// architectures using namespaces. The repos are Andrew Moon's8// curve25519-donna and ed25519-donna. The architectures are 32-bit, 64-bit9// and SSE. For example, 32-bit x25519 uses symbols from Donna::X25519 and10// Donna::Arch32.1112// A fair amount of duplication happens below, but we could not directly13// use curve25519 for both x25519 and ed25519. A close examination reveals14// slight differences in the implementation. For example, look at the15// two curve25519_sub functions.1617// If needed, see Moon's commit "Go back to ignoring 256th bit [sic]",18// https://github.com/floodyberry/curve25519-donna/commit/57a683d18721a6581920#include "pch.h"2122#include "config.h"23#include "donna.h"24#include "secblock.h"25#include "sha.h"26#include "misc.h"27#include "cpu.h"2829#include <istream>30#include <sstream>3132#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE33# pragma GCC diagnostic ignored "-Wunused-function"34#endif3536#if CRYPTOPP_MSC_VERSION37# pragma warning(disable: 4244)38#endif3940// Squash MS LNK4221 and libtool warnings41extern const char DONNA64_FNAME[] = __FILE__;4243ANONYMOUS_NAMESPACE_BEGIN4445// Can't use GetAlignmentOf<word64>() because of C++11 and constexpr46// Can use 'const unsigned int' because of MSVC 201347#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)48# define ALIGN_SPEC 1649#else50# define ALIGN_SPEC 851#endif5253ANONYMOUS_NAMESPACE_END5455#if defined(CRYPTOPP_CURVE25519_64BIT)5657#include "donna_64.h"5859ANONYMOUS_NAMESPACE_BEGIN6061using CryptoPP::byte;62using CryptoPP::word64;63using CryptoPP::GetWord;64using CryptoPP::PutWord;65using CryptoPP::LITTLE_ENDIAN_ORDER;6667inline word64 U8TO64_LE(const byte* p)68{69return GetWord<word64>(false, LITTLE_ENDIAN_ORDER, p);70}7172inline void U64TO8_LE(byte* p, word64 w)73{74PutWord(false, LITTLE_ENDIAN_ORDER, p, w);75}7677ANONYMOUS_NAMESPACE_END7879NAMESPACE_BEGIN(CryptoPP)80NAMESPACE_BEGIN(Donna)81NAMESPACE_BEGIN(X25519)82ANONYMOUS_NAMESPACE_BEGIN8384using CryptoPP::byte;85using CryptoPP::word32;86using CryptoPP::sword32;87using CryptoPP::word64;88using CryptoPP::sword64;8990using CryptoPP::GetBlock;91using CryptoPP::LittleEndian;9293// Bring in all the symbols from the 64-bit header94using namespace CryptoPP::Donna::Arch64;9596/* out = in */97inline void98curve25519_copy(bignum25519 out, const bignum25519 in) {99out[0] = in[0]; out[1] = in[1];100out[2] = in[2]; out[3] = in[3];101out[4] = in[4];102}103104/* out = a + b */105inline void106curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {107out[0] = a[0] + b[0];108out[1] = a[1] + b[1];109out[2] = a[2] + b[2];110out[3] = a[3] + b[3];111out[4] = a[4] + b[4];112}113114/* out = a - b */115inline void116curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {117out[0] = a[0] + two54m152 - b[0];118out[1] = a[1] + two54m8 - b[1];119out[2] = a[2] + two54m8 - b[2];120out[3] = a[3] + two54m8 - b[3];121out[4] = a[4] + two54m8 - b[4];122}123124/* out = (in * scalar) */125inline void126curve25519_scalar_product(bignum25519 out, const bignum25519 in, const word64 scalar) {127word128 a;128word64 c;129130#if defined(CRYPTOPP_WORD128_AVAILABLE)131a = ((word128) in[0]) * scalar; out[0] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);132a = ((word128) in[1]) * scalar + c; out[1] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);133a = ((word128) in[2]) * scalar + c; out[2] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);134a = ((word128) in[3]) * scalar + c; out[3] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);135a = ((word128) in[4]) * scalar + c; out[4] = (word64)a & reduce_mask_51; c = (word64)(a >> 51);136out[0] += c * 19;137#else138mul64x64_128(a, in[0], scalar) out[0] = lo128(a) & reduce_mask_51; shr128(c, a, 51);139mul64x64_128(a, in[1], scalar) add128_64(a, c) out[1] = lo128(a) & reduce_mask_51; shr128(c, a, 51);140mul64x64_128(a, in[2], scalar) add128_64(a, c) out[2] = lo128(a) & reduce_mask_51; shr128(c, a, 51);141mul64x64_128(a, in[3], scalar) add128_64(a, c) out[3] = lo128(a) & reduce_mask_51; shr128(c, a, 51);142mul64x64_128(a, in[4], scalar) add128_64(a, c) out[4] = lo128(a) & reduce_mask_51; shr128(c, a, 51);143out[0] += c * 19;144#endif145}146147/* out = a * b */148inline void149curve25519_mul(bignum25519 out, const bignum25519 a, const bignum25519 b) {150#if !defined(CRYPTOPP_WORD128_AVAILABLE)151word128 mul;152#endif153word128 t[5];154word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;155156r0 = b[0]; r1 = b[1]; r2 = b[2]; r3 = b[3]; r4 = b[4];157s0 = a[0]; s1 = a[1]; s2 = a[2]; s3 = a[3]; s4 = a[4];158159#if defined(CRYPTOPP_WORD128_AVAILABLE)160t[0] = ((word128) r0) * s0;161t[1] = ((word128) r0) * s1 + ((word128) r1) * s0;162t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1;163t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1;164t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2;165#else166mul64x64_128(t[0], r0, s0)167mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)168mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)169mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)170mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)171#endif172173r1 *= 19; r2 *= 19; r3 *= 19; r4 *= 19;174175#if defined(CRYPTOPP_WORD128_AVAILABLE)176t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2;177t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3;178t[2] += ((word128) r4) * s3 + ((word128) r3) * s4;179t[3] += ((word128) r4) * s4;180#else181mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)182mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)183mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)184mul64x64_128(mul, r4, s4) add128(t[3], mul)185#endif186187r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);188add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);189add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);190add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);191add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);192r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;193r1 += c;194195out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;196}197198/* out = in^(2 * count) */199inline void200curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) {201#if !defined(CRYPTOPP_WORD128_AVAILABLE)202word128 mul;203#endif204word128 t[5];205word64 r0,r1,r2,r3,r4,c;206word64 d0,d1,d2,d4,d419;207208r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];209210do {211d0 = r0 * 2; d1 = r1 * 2;212d2 = r2 * 2 * 19;213d419 = r4 * 19; d4 = d419 * 2;214215#if defined(CRYPTOPP_WORD128_AVAILABLE)216t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));217t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));218t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));219t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));220t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));221#else222mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)223mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)224mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)225mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)226mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)227#endif228229r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);230add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);231add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);232add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);233add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);234r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;235r1 += c;236} while(--count);237238out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;239}240241inline void242curve25519_square(bignum25519 out, const bignum25519 in) {243#if !defined(CRYPTOPP_WORD128_AVAILABLE)244word128 mul;245#endif246word128 t[5];247word64 r0,r1,r2,r3,r4,c;248word64 d0,d1,d2,d4,d419;249250r0 = in[0]; r1 = in[1]; r2 = in[2]; r3 = in[3]; r4 = in[4];251252d0 = r0 * 2; d1 = r1 * 2;253d2 = r2 * 2 * 19;254d419 = r4 * 19; d4 = d419 * 2;255256#if defined(CRYPTOPP_WORD128_AVAILABLE)257t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));258t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));259t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));260t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));261t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));262#else263mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)264mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)265mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)266mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)267mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)268#endif269270r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);271add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);272add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);273add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);274add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);275r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;276r1 += c;277278out[0] = r0; out[1] = r1; out[2] = r2; out[3] = r3; out[4] = r4;279}280281/* Take a little-endian, 32-byte number and expand it into polynomial form */282inline void283curve25519_expand(bignum25519 out, const byte *in) {284word64 x0,x1,x2,x3;285GetBlock<word64, LittleEndian> block(in);286block(x0)(x1)(x2)(x3);287288out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);289out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);290out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);291out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);292out[4] = x3 & reduce_mask_51; /* ignore the top bit */293}294295/* Take a fully reduced polynomial form number and contract it into a296* little-endian, 32-byte array297*/298inline void299curve25519_contract(byte *out, const bignum25519 input) {300word64 t[5];301word64 f, i;302303t[0] = input[0];304t[1] = input[1];305t[2] = input[2];306t[3] = input[3];307t[4] = input[4];308309#define curve25519_contract_carry() \310t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \311t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \312t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \313t[4] += t[3] >> 51; t[3] &= reduce_mask_51;314315#define curve25519_contract_carry_full() curve25519_contract_carry() \316t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;317318#define curve25519_contract_carry_final() curve25519_contract_carry() \319t[4] &= reduce_mask_51;320321curve25519_contract_carry_full()322curve25519_contract_carry_full()323324/* now t is between 0 and 2^255-1, properly carried. */325/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */326t[0] += 19;327curve25519_contract_carry_full()328329/* now between 19 and 2^255-1 in both cases, and offset by 19. */330t[0] += 0x8000000000000 - 19;331t[1] += 0x8000000000000 - 1;332t[2] += 0x8000000000000 - 1;333t[3] += 0x8000000000000 - 1;334t[4] += 0x8000000000000 - 1;335336/* now between 2^255 and 2^256-20, and offset by 2^255. */337curve25519_contract_carry_final()338339#define write51full(n,shift) \340f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \341for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f;342#define write51(n) write51full(n,13*n)343344write51(0)345write51(1)346write51(2)347write51(3)348349#undef curve25519_contract_carry350#undef curve25519_contract_carry_full351#undef curve25519_contract_carry_final352#undef write51full353#undef write51354}355356/*357* Swap the contents of [qx] and [qpx] iff @swap is non-zero358*/359inline void360curve25519_swap_conditional(bignum25519 x, bignum25519 qpx, word64 iswap) {361const word64 swap = (word64)(-(sword64)iswap);362word64 x0,x1,x2,x3,x4;363364x0 = swap & (x[0] ^ qpx[0]); x[0] ^= x0; qpx[0] ^= x0;365x1 = swap & (x[1] ^ qpx[1]); x[1] ^= x1; qpx[1] ^= x1;366x2 = swap & (x[2] ^ qpx[2]); x[2] ^= x2; qpx[2] ^= x2;367x3 = swap & (x[3] ^ qpx[3]); x[3] ^= x3; qpx[3] ^= x3;368x4 = swap & (x[4] ^ qpx[4]); x[4] ^= x4; qpx[4] ^= x4;369}370371/*372* In: b = 2^5 - 2^0373* Out: b = 2^250 - 2^0374*/375void376curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {377ALIGN(ALIGN_SPEC) bignum25519 t0,c;378379/* 2^5 - 2^0 */ /* b */380/* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);381/* 2^10 - 2^0 */ curve25519_mul(b, t0, b);382/* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);383/* 2^20 - 2^0 */ curve25519_mul(c, t0, b);384/* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);385/* 2^40 - 2^0 */ curve25519_mul(t0, t0, c);386/* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);387/* 2^50 - 2^0 */ curve25519_mul(b, t0, b);388/* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);389/* 2^100 - 2^0 */ curve25519_mul(c, t0, b);390/* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);391/* 2^200 - 2^0 */ curve25519_mul(t0, t0, c);392/* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);393/* 2^250 - 2^0 */ curve25519_mul(b, t0, b);394}395396/*397* z^(p - 2) = z(2^255 - 21)398*/399void400curve25519_recip(bignum25519 out, const bignum25519 z) {401ALIGN(ALIGN_SPEC) bignum25519 a, t0, b;402403/* 2 */ curve25519_square(a, z); /* a = 2 */404/* 8 */ curve25519_square_times(t0, a, 2);405/* 9 */ curve25519_mul(b, t0, z); /* b = 9 */406/* 11 */ curve25519_mul(a, b, a); /* a = 11 */407/* 22 */ curve25519_square(t0, a);408/* 2^5 - 2^0 = 31 */ curve25519_mul(b, t0, b);409/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);410/* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);411/* 2^255 - 21 */ curve25519_mul(out, b, a);412}413414ANONYMOUS_NAMESPACE_END415NAMESPACE_END // X25519416NAMESPACE_END // Donna417NAMESPACE_END // CryptoPP418419//******************************* ed25519 *******************************//420421NAMESPACE_BEGIN(CryptoPP)422NAMESPACE_BEGIN(Donna)423NAMESPACE_BEGIN(Ed25519)424ANONYMOUS_NAMESPACE_BEGIN425426using CryptoPP::byte;427using CryptoPP::word32;428using CryptoPP::sword32;429using CryptoPP::word64;430using CryptoPP::sword64;431432using CryptoPP::GetBlock;433using CryptoPP::LittleEndian;434435using CryptoPP::SHA512;436437// Bring in all the symbols from the 64-bit header438using namespace CryptoPP::Donna::Arch64;439440/* out = in */441inline void442curve25519_copy(bignum25519 out, const bignum25519 in) {443out[0] = in[0]; out[1] = in[1];444out[2] = in[2]; out[3] = in[3];445out[4] = in[4];446}447448/* out = a + b */449inline void450curve25519_add(bignum25519 out, const bignum25519 a, const bignum25519 b) {451out[0] = a[0] + b[0]; out[1] = a[1] + b[1];452out[2] = a[2] + b[2]; out[3] = a[3] + b[3];453out[4] = a[4] + b[4];454}455456/* out = a + b, where a and/or b are the result of a basic op (add,sub) */457inline void458curve25519_add_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {459out[0] = a[0] + b[0]; out[1] = a[1] + b[1];460out[2] = a[2] + b[2]; out[3] = a[3] + b[3];461out[4] = a[4] + b[4];462}463464inline void465curve25519_add_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {466word64 c;467out[0] = a[0] + b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;468out[1] = a[1] + b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;469out[2] = a[2] + b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;470out[3] = a[3] + b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;471out[4] = a[4] + b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;472out[0] += c * 19;473}474475/* out = a - b */476inline void477curve25519_sub(bignum25519 out, const bignum25519 a, const bignum25519 b) {478out[0] = a[0] + twoP0 - b[0];479out[1] = a[1] + twoP1234 - b[1];480out[2] = a[2] + twoP1234 - b[2];481out[3] = a[3] + twoP1234 - b[3];482out[4] = a[4] + twoP1234 - b[4];483}484485/* out = a - b, where a and/or b are the result of a basic op (add,sub) */486inline void487curve25519_sub_after_basic(bignum25519 out, const bignum25519 a, const bignum25519 b) {488out[0] = a[0] + fourP0 - b[0];489out[1] = a[1] + fourP1234 - b[1];490out[2] = a[2] + fourP1234 - b[2];491out[3] = a[3] + fourP1234 - b[3];492out[4] = a[4] + fourP1234 - b[4];493}494495inline void496curve25519_sub_reduce(bignum25519 out, const bignum25519 a, const bignum25519 b) {497word64 c;498out[0] = a[0] + fourP0 - b[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;499out[1] = a[1] + fourP1234 - b[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;500out[2] = a[2] + fourP1234 - b[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;501out[3] = a[3] + fourP1234 - b[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;502out[4] = a[4] + fourP1234 - b[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;503out[0] += c * 19;504}505506/* out = -a */507inline void508curve25519_neg(bignum25519 out, const bignum25519 a) {509word64 c;510out[0] = twoP0 - a[0] ; c = (out[0] >> 51); out[0] &= reduce_mask_51;511out[1] = twoP1234 - a[1] + c; c = (out[1] >> 51); out[1] &= reduce_mask_51;512out[2] = twoP1234 - a[2] + c; c = (out[2] >> 51); out[2] &= reduce_mask_51;513out[3] = twoP1234 - a[3] + c; c = (out[3] >> 51); out[3] &= reduce_mask_51;514out[4] = twoP1234 - a[4] + c; c = (out[4] >> 51); out[4] &= reduce_mask_51;515out[0] += c * 19;516}517518/* out = a * b */519inline void520curve25519_mul(bignum25519 out, const bignum25519 in2, const bignum25519 in) {521#if !defined(CRYPTOPP_WORD128_AVAILABLE)522word128 mul;523#endif524word128 t[5];525word64 r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;526527r0 = in[0]; r1 = in[1];528r2 = in[2]; r3 = in[3];529r4 = in[4];530531s0 = in2[0]; s1 = in2[1];532s2 = in2[2]; s3 = in2[3];533s4 = in2[4];534535#if defined(CRYPTOPP_WORD128_AVAILABLE)536t[0] = ((word128) r0) * s0;537t[1] = ((word128) r0) * s1 + ((word128) r1) * s0;538t[2] = ((word128) r0) * s2 + ((word128) r2) * s0 + ((word128) r1) * s1;539t[3] = ((word128) r0) * s3 + ((word128) r3) * s0 + ((word128) r1) * s2 + ((word128) r2) * s1;540t[4] = ((word128) r0) * s4 + ((word128) r4) * s0 + ((word128) r3) * s1 + ((word128) r1) * s3 + ((word128) r2) * s2;541#else542mul64x64_128(t[0], r0, s0)543mul64x64_128(t[1], r0, s1) mul64x64_128(mul, r1, s0) add128(t[1], mul)544mul64x64_128(t[2], r0, s2) mul64x64_128(mul, r2, s0) add128(t[2], mul) mul64x64_128(mul, r1, s1) add128(t[2], mul)545mul64x64_128(t[3], r0, s3) mul64x64_128(mul, r3, s0) add128(t[3], mul) mul64x64_128(mul, r1, s2) add128(t[3], mul) mul64x64_128(mul, r2, s1) add128(t[3], mul)546mul64x64_128(t[4], r0, s4) mul64x64_128(mul, r4, s0) add128(t[4], mul) mul64x64_128(mul, r3, s1) add128(t[4], mul) mul64x64_128(mul, r1, s3) add128(t[4], mul) mul64x64_128(mul, r2, s2) add128(t[4], mul)547#endif548549r1 *= 19; r2 *= 19;550r3 *= 19; r4 *= 19;551552#if defined(CRYPTOPP_WORD128_AVAILABLE)553t[0] += ((word128) r4) * s1 + ((word128) r1) * s4 + ((word128) r2) * s3 + ((word128) r3) * s2;554t[1] += ((word128) r4) * s2 + ((word128) r2) * s4 + ((word128) r3) * s3;555t[2] += ((word128) r4) * s3 + ((word128) r3) * s4;556t[3] += ((word128) r4) * s4;557#else558mul64x64_128(mul, r4, s1) add128(t[0], mul) mul64x64_128(mul, r1, s4) add128(t[0], mul) mul64x64_128(mul, r2, s3) add128(t[0], mul) mul64x64_128(mul, r3, s2) add128(t[0], mul)559mul64x64_128(mul, r4, s2) add128(t[1], mul) mul64x64_128(mul, r2, s4) add128(t[1], mul) mul64x64_128(mul, r3, s3) add128(t[1], mul)560mul64x64_128(mul, r4, s3) add128(t[2], mul) mul64x64_128(mul, r3, s4) add128(t[2], mul)561mul64x64_128(mul, r4, s4) add128(t[3], mul)562#endif563564r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);565add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);566add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);567add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);568add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);569r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;570r1 += c;571572out[0] = r0; out[1] = r1;573out[2] = r2; out[3] = r3;574out[4] = r4;575}576577void578curve25519_mul_noinline(bignum25519 out, const bignum25519 in2, const bignum25519 in) {579curve25519_mul(out, in2, in);580}581582/* out = in^(2 * count) */583void584curve25519_square_times(bignum25519 out, const bignum25519 in, word64 count) {585#if !defined(CRYPTOPP_WORD128_AVAILABLE)586word128 mul;587#endif588word128 t[5];589word64 r0,r1,r2,r3,r4,c;590word64 d0,d1,d2,d4,d419;591592r0 = in[0]; r1 = in[1];593r2 = in[2]; r3 = in[3];594r4 = in[4];595596do {597d0 = r0 * 2;598d1 = r1 * 2;599d2 = r2 * 2 * 19;600d419 = r4 * 19;601d4 = d419 * 2;602603#if defined(CRYPTOPP_WORD128_AVAILABLE)604t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));605t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));606t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));607t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));608t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));609#else610mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)611mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)612mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)613mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)614mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)615#endif616617r0 = lo128(t[0]) & reduce_mask_51;618r1 = lo128(t[1]) & reduce_mask_51; shl128(c, t[0], 13); r1 += c;619r2 = lo128(t[2]) & reduce_mask_51; shl128(c, t[1], 13); r2 += c;620r3 = lo128(t[3]) & reduce_mask_51; shl128(c, t[2], 13); r3 += c;621r4 = lo128(t[4]) & reduce_mask_51; shl128(c, t[3], 13); r4 += c;622shl128(c, t[4], 13); r0 += c * 19;623c = r0 >> 51; r0 &= reduce_mask_51;624r1 += c ; c = r1 >> 51; r1 &= reduce_mask_51;625r2 += c ; c = r2 >> 51; r2 &= reduce_mask_51;626r3 += c ; c = r3 >> 51; r3 &= reduce_mask_51;627r4 += c ; c = r4 >> 51; r4 &= reduce_mask_51;628r0 += c * 19;629} while(--count);630631out[0] = r0; out[1] = r1;632out[2] = r2; out[3] = r3;633out[4] = r4;634}635636inline void637curve25519_square(bignum25519 out, const bignum25519 in) {638#if !defined(CRYPTOPP_WORD128_AVAILABLE)639word128 mul;640#endif641word128 t[5];642word64 r0,r1,r2,r3,r4,c;643word64 d0,d1,d2,d4,d419;644645r0 = in[0]; r1 = in[1];646r2 = in[2]; r3 = in[3];647r4 = in[4];648649d0 = r0 * 2; d1 = r1 * 2;650d2 = r2 * 2 * 19;651d419 = r4 * 19;652d4 = d419 * 2;653654#if defined(CRYPTOPP_WORD128_AVAILABLE)655t[0] = ((word128) r0) * r0 + ((word128) d4) * r1 + (((word128) d2) * (r3 ));656t[1] = ((word128) d0) * r1 + ((word128) d4) * r2 + (((word128) r3) * (r3 * 19));657t[2] = ((word128) d0) * r2 + ((word128) r1) * r1 + (((word128) d4) * (r3 ));658t[3] = ((word128) d0) * r3 + ((word128) d1) * r2 + (((word128) r4) * (d419 ));659t[4] = ((word128) d0) * r4 + ((word128) d1) * r3 + (((word128) r2) * (r2 ));660#else661mul64x64_128(t[0], r0, r0) mul64x64_128(mul, d4, r1) add128(t[0], mul) mul64x64_128(mul, d2, r3) add128(t[0], mul)662mul64x64_128(t[1], d0, r1) mul64x64_128(mul, d4, r2) add128(t[1], mul) mul64x64_128(mul, r3, r3 * 19) add128(t[1], mul)663mul64x64_128(t[2], d0, r2) mul64x64_128(mul, r1, r1) add128(t[2], mul) mul64x64_128(mul, d4, r3) add128(t[2], mul)664mul64x64_128(t[3], d0, r3) mul64x64_128(mul, d1, r2) add128(t[3], mul) mul64x64_128(mul, r4, d419) add128(t[3], mul)665mul64x64_128(t[4], d0, r4) mul64x64_128(mul, d1, r3) add128(t[4], mul) mul64x64_128(mul, r2, r2) add128(t[4], mul)666#endif667668r0 = lo128(t[0]) & reduce_mask_51; shr128(c, t[0], 51);669add128_64(t[1], c) r1 = lo128(t[1]) & reduce_mask_51; shr128(c, t[1], 51);670add128_64(t[2], c) r2 = lo128(t[2]) & reduce_mask_51; shr128(c, t[2], 51);671add128_64(t[3], c) r3 = lo128(t[3]) & reduce_mask_51; shr128(c, t[3], 51);672add128_64(t[4], c) r4 = lo128(t[4]) & reduce_mask_51; shr128(c, t[4], 51);673r0 += c * 19; c = r0 >> 51; r0 = r0 & reduce_mask_51;674r1 += c;675676out[0] = r0; out[1] = r1;677out[2] = r2; out[3] = r3;678out[4] = r4;679}680681/* Take a little-endian, 32-byte number and expand it into polynomial form */682inline void683curve25519_expand(bignum25519 out, const byte *in) {684word64 x0,x1,x2,x3;685GetBlock<word64, LittleEndian> block(in);686block(x0)(x1)(x2)(x3);687688out[0] = x0 & reduce_mask_51; x0 = (x0 >> 51) | (x1 << 13);689out[1] = x0 & reduce_mask_51; x1 = (x1 >> 38) | (x2 << 26);690out[2] = x1 & reduce_mask_51; x2 = (x2 >> 25) | (x3 << 39);691out[3] = x2 & reduce_mask_51; x3 = (x3 >> 12);692out[4] = x3 & reduce_mask_51;693}694695/* Take a fully reduced polynomial form number and contract it into a696* little-endian, 32-byte array697*/698inline void699curve25519_contract(byte *out, const bignum25519 input) {700word64 t[5];701word64 f, i;702703t[0] = input[0];704t[1] = input[1];705t[2] = input[2];706t[3] = input[3];707t[4] = input[4];708709#define curve25519_contract_carry() \710t[1] += t[0] >> 51; t[0] &= reduce_mask_51; \711t[2] += t[1] >> 51; t[1] &= reduce_mask_51; \712t[3] += t[2] >> 51; t[2] &= reduce_mask_51; \713t[4] += t[3] >> 51; t[3] &= reduce_mask_51;714715#define curve25519_contract_carry_full() curve25519_contract_carry() \716t[0] += 19 * (t[4] >> 51); t[4] &= reduce_mask_51;717718#define curve25519_contract_carry_final() curve25519_contract_carry() \719t[4] &= reduce_mask_51;720721curve25519_contract_carry_full()722curve25519_contract_carry_full()723724/* now t is between 0 and 2^255-1, properly carried. */725/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */726t[0] += 19;727curve25519_contract_carry_full()728729/* now between 19 and 2^255-1 in both cases, and offset by 19. */730t[0] += (reduce_mask_51 + 1) - 19;731t[1] += (reduce_mask_51 + 1) - 1;732t[2] += (reduce_mask_51 + 1) - 1;733t[3] += (reduce_mask_51 + 1) - 1;734t[4] += (reduce_mask_51 + 1) - 1;735736/* now between 2^255 and 2^256-20, and offset by 2^255. */737curve25519_contract_carry_final()738739#define write51full(n,shift) \740f = ((t[n] >> shift) | (t[n+1] << (51 - shift))); \741for (i = 0; i < 8; i++, f >>= 8) *out++ = (byte)f;742#define write51(n) write51full(n,13*n)743write51(0)744write51(1)745write51(2)746write51(3)747}748749#if !defined(ED25519_GCC_64BIT_CHOOSE)750751/* out = (flag) ? in : out */752inline void753curve25519_move_conditional_bytes(byte out[96], const byte in[96], word64 flag)754{755// TODO: enable this code path once we can test and benchmark it.756// It is about 24 insns shorter, it avoids punning which may be UB,757// and it is guaranteed constant time.758#if defined(__GNUC__) && defined(__x86_64__) && 0759const word32 iter = 96/sizeof(word64);760word64* outq = reinterpret_cast<word64*>(out);761const word64* inq = reinterpret_cast<const word64*>(in);762word64 idx=0, val;763764__asm__ __volatile__ (765".att_syntax ;\n"766"cmpq $0, %[flag] ;\n" // compare, set ZERO flag767"movq %[iter], %%rcx ;\n" // load iteration count768"1: ;\n"769" movq (%[idx],%[out]), %[val] ;\n" // val = out[idx]770" cmovnzq (%[idx],%[in]), %[val] ;\n" // copy in[idx] to val if NZ771" movq %[val], (%[idx],%[out]) ;\n" // out[idx] = val772" leaq 8(%[idx]), %[idx] ;\n" // increment index773" loopnz 1b ;\n" // does not affect flags774: [out] "+S" (outq), [in] "+D" (inq),775[idx] "+b" (idx), [val] "=r" (val)776: [flag] "g" (flag), [iter] "I" (iter)777: "rcx", "memory", "cc"778);779#else780const word64 nb = flag - 1, b = ~nb;781const word64 *inq = (const word64 *)(const void*)in;782word64 *outq = (word64 *)(void *)out;783outq[0] = (outq[0] & nb) | (inq[0] & b);784outq[1] = (outq[1] & nb) | (inq[1] & b);785outq[2] = (outq[2] & nb) | (inq[2] & b);786outq[3] = (outq[3] & nb) | (inq[3] & b);787outq[4] = (outq[4] & nb) | (inq[4] & b);788outq[5] = (outq[5] & nb) | (inq[5] & b);789outq[6] = (outq[6] & nb) | (inq[6] & b);790outq[7] = (outq[7] & nb) | (inq[7] & b);791outq[8] = (outq[8] & nb) | (inq[8] & b);792outq[9] = (outq[9] & nb) | (inq[9] & b);793outq[10] = (outq[10] & nb) | (inq[10] & b);794outq[11] = (outq[11] & nb) | (inq[11] & b);795#endif796}797798/* if (iswap) swap(a, b) */799inline void800curve25519_swap_conditional(bignum25519 a, bignum25519 b, word64 iswap) {801const word64 swap = (word64)(-(sword64)iswap);802word64 x0,x1,x2,x3,x4;803804x0 = swap & (a[0] ^ b[0]); a[0] ^= x0; b[0] ^= x0;805x1 = swap & (a[1] ^ b[1]); a[1] ^= x1; b[1] ^= x1;806x2 = swap & (a[2] ^ b[2]); a[2] ^= x2; b[2] ^= x2;807x3 = swap & (a[3] ^ b[3]); a[3] ^= x3; b[3] ^= x3;808x4 = swap & (a[4] ^ b[4]); a[4] ^= x4; b[4] ^= x4;809}810811#endif /* ED25519_GCC_64BIT_CHOOSE */812813// ************************************************************************************814815inline void816ed25519_hash(byte *hash, const byte *in, size_t inlen) {817SHA512().CalculateDigest(hash, in, inlen);818}819820inline void821ed25519_extsk(hash_512bits extsk, const byte sk[32]) {822ed25519_hash(extsk, sk, 32);823extsk[0] &= 248;824extsk[31] &= 127;825extsk[31] |= 64;826}827828void829UpdateFromStream(HashTransformation& hash, std::istream& stream)830{831SecByteBlock block(4096);832while (stream.read((char*)block.begin(), block.size()))833hash.Update(block, block.size());834835std::streamsize rem = stream.gcount();836if (rem)837hash.Update(block, rem);838839block.SetMark(0);840}841842void843ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], const byte *m, size_t mlen) {844SHA512 hash;845hash.Update(RS, 32);846hash.Update(pk, 32);847hash.Update(m, mlen);848hash.Final(hram);849}850851void852ed25519_hram(hash_512bits hram, const byte RS[64], const byte pk[32], std::istream& stream) {853SHA512 hash;854hash.Update(RS, 32);855hash.Update(pk, 32);856UpdateFromStream(hash, stream);857hash.Final(hram);858}859860bignum256modm_element_t861lt_modm(bignum256modm_element_t a, bignum256modm_element_t b) {862return (a - b) >> 63;863}864865void866reduce256_modm(bignum256modm r) {867bignum256modm t;868bignum256modm_element_t b = 0, pb, mask;869870/* t = r - m */871pb = 0;872pb += modm_m[0]; b = lt_modm(r[0], pb); t[0] = (r[0] - pb + (b << 56)); pb = b;873pb += modm_m[1]; b = lt_modm(r[1], pb); t[1] = (r[1] - pb + (b << 56)); pb = b;874pb += modm_m[2]; b = lt_modm(r[2], pb); t[2] = (r[2] - pb + (b << 56)); pb = b;875pb += modm_m[3]; b = lt_modm(r[3], pb); t[3] = (r[3] - pb + (b << 56)); pb = b;876pb += modm_m[4]; b = lt_modm(r[4], pb); t[4] = (r[4] - pb + (b << 32));877878/* keep r if r was smaller than m */879mask = b - 1;880881r[0] ^= mask & (r[0] ^ t[0]);882r[1] ^= mask & (r[1] ^ t[1]);883r[2] ^= mask & (r[2] ^ t[2]);884r[3] ^= mask & (r[3] ^ t[3]);885r[4] ^= mask & (r[4] ^ t[4]);886}887888void889barrett_reduce256_modm(bignum256modm r, const bignum256modm q1, const bignum256modm r1) {890bignum256modm q3, r2;891word128 c, mul;892bignum256modm_element_t f, b, pb;893894/* q1 = x >> 248 = 264 bits = 5 56 bit elements895q2 = mu * q1896q3 = (q2 / 256(32+1)) = q2 / (2^8)^(32+1) = q2 >> 264 */897mul64x64_128(c, modm_mu[0], q1[3]) mul64x64_128(mul, modm_mu[3], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[2]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[1]) add128(c, mul) shr128(f, c, 56);898mul64x64_128(c, modm_mu[0], q1[4]) add128_64(c, f) mul64x64_128(mul, modm_mu[4], q1[0]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[1]) add128(c, mul) mul64x64_128(mul, modm_mu[1], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[2]) add128(c, mul)899f = lo128(c); q3[0] = (f >> 40) & 0xffff; shr128(f, c, 56);900mul64x64_128(c, modm_mu[4], q1[1]) add128_64(c, f) mul64x64_128(mul, modm_mu[1], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[2], q1[3]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[2]) add128(c, mul)901f = lo128(c); q3[0] |= (f << 16) & 0xffffffffffffff; q3[1] = (f >> 40) & 0xffff; shr128(f, c, 56);902mul64x64_128(c, modm_mu[4], q1[2]) add128_64(c, f) mul64x64_128(mul, modm_mu[2], q1[4]) add128(c, mul) mul64x64_128(mul, modm_mu[3], q1[3]) add128(c, mul)903f = lo128(c); q3[1] |= (f << 16) & 0xffffffffffffff; q3[2] = (f >> 40) & 0xffff; shr128(f, c, 56);904mul64x64_128(c, modm_mu[4], q1[3]) add128_64(c, f) mul64x64_128(mul, modm_mu[3], q1[4]) add128(c, mul)905f = lo128(c); q3[2] |= (f << 16) & 0xffffffffffffff; q3[3] = (f >> 40) & 0xffff; shr128(f, c, 56);906mul64x64_128(c, modm_mu[4], q1[4]) add128_64(c, f)907f = lo128(c); q3[3] |= (f << 16) & 0xffffffffffffff; q3[4] = (f >> 40) & 0xffff; shr128(f, c, 56);908q3[4] |= (f << 16);909910mul64x64_128(c, modm_m[0], q3[0])911r2[0] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);912mul64x64_128(c, modm_m[0], q3[1]) add128_64(c, f) mul64x64_128(mul, modm_m[1], q3[0]) add128(c, mul)913r2[1] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);914mul64x64_128(c, modm_m[0], q3[2]) add128_64(c, f) mul64x64_128(mul, modm_m[2], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[1]) add128(c, mul)915r2[2] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);916mul64x64_128(c, modm_m[0], q3[3]) add128_64(c, f) mul64x64_128(mul, modm_m[3], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[2]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[1]) add128(c, mul)917r2[3] = lo128(c) & 0xffffffffffffff; shr128(f, c, 56);918mul64x64_128(c, modm_m[0], q3[4]) add128_64(c, f) mul64x64_128(mul, modm_m[4], q3[0]) add128(c, mul) mul64x64_128(mul, modm_m[3], q3[1]) add128(c, mul) mul64x64_128(mul, modm_m[1], q3[3]) add128(c, mul) mul64x64_128(mul, modm_m[2], q3[2]) add128(c, mul)919r2[4] = lo128(c) & 0x0000ffffffffff;920921pb = 0;922pb += r2[0]; b = lt_modm(r1[0], pb); r[0] = (r1[0] - pb + (b << 56)); pb = b;923pb += r2[1]; b = lt_modm(r1[1], pb); r[1] = (r1[1] - pb + (b << 56)); pb = b;924pb += r2[2]; b = lt_modm(r1[2], pb); r[2] = (r1[2] - pb + (b << 56)); pb = b;925pb += r2[3]; b = lt_modm(r1[3], pb); r[3] = (r1[3] - pb + (b << 56)); pb = b;926pb += r2[4]; b = lt_modm(r1[4], pb); r[4] = (r1[4] - pb + (b << 40));927928reduce256_modm(r);929reduce256_modm(r);930}931932void933add256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {934bignum256modm_element_t c;935936c = x[0] + y[0]; r[0] = c & 0xffffffffffffff; c >>= 56;937c += x[1] + y[1]; r[1] = c & 0xffffffffffffff; c >>= 56;938c += x[2] + y[2]; r[2] = c & 0xffffffffffffff; c >>= 56;939c += x[3] + y[3]; r[3] = c & 0xffffffffffffff; c >>= 56;940c += x[4] + y[4]; r[4] = c;941942reduce256_modm(r);943}944945void946mul256_modm(bignum256modm r, const bignum256modm x, const bignum256modm y) {947bignum256modm q1, r1;948word128 c, mul;949bignum256modm_element_t f;950951mul64x64_128(c, x[0], y[0])952f = lo128(c); r1[0] = f & 0xffffffffffffff; shr128(f, c, 56);953mul64x64_128(c, x[0], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[0]) add128(c, mul)954f = lo128(c); r1[1] = f & 0xffffffffffffff; shr128(f, c, 56);955mul64x64_128(c, x[0], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[1]) add128(c, mul)956f = lo128(c); r1[2] = f & 0xffffffffffffff; shr128(f, c, 56);957mul64x64_128(c, x[0], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[0]) add128(c, mul) mul64x64_128(mul, x[1], y[2]) add128(c, mul) mul64x64_128(mul, x[2], y[1]) add128(c, mul)958f = lo128(c); r1[3] = f & 0xffffffffffffff; shr128(f, c, 56);959mul64x64_128(c, x[0], y[4]) add128_64(c, f) mul64x64_128(mul, x[4], y[0]) add128(c, mul) mul64x64_128(mul, x[3], y[1]) add128(c, mul) mul64x64_128(mul, x[1], y[3]) add128(c, mul) mul64x64_128(mul, x[2], y[2]) add128(c, mul)960f = lo128(c); r1[4] = f & 0x0000ffffffffff; q1[0] = (f >> 24) & 0xffffffff; shr128(f, c, 56);961mul64x64_128(c, x[4], y[1]) add128_64(c, f) mul64x64_128(mul, x[1], y[4]) add128(c, mul) mul64x64_128(mul, x[2], y[3]) add128(c, mul) mul64x64_128(mul, x[3], y[2]) add128(c, mul)962f = lo128(c); q1[0] |= (f << 32) & 0xffffffffffffff; q1[1] = (f >> 24) & 0xffffffff; shr128(f, c, 56);963mul64x64_128(c, x[4], y[2]) add128_64(c, f) mul64x64_128(mul, x[2], y[4]) add128(c, mul) mul64x64_128(mul, x[3], y[3]) add128(c, mul)964f = lo128(c); q1[1] |= (f << 32) & 0xffffffffffffff; q1[2] = (f >> 24) & 0xffffffff; shr128(f, c, 56);965mul64x64_128(c, x[4], y[3]) add128_64(c, f) mul64x64_128(mul, x[3], y[4]) add128(c, mul)966f = lo128(c); q1[2] |= (f << 32) & 0xffffffffffffff; q1[3] = (f >> 24) & 0xffffffff; shr128(f, c, 56);967mul64x64_128(c, x[4], y[4]) add128_64(c, f)968f = lo128(c); q1[3] |= (f << 32) & 0xffffffffffffff; q1[4] = (f >> 24) & 0xffffffff; shr128(f, c, 56);969q1[4] |= (f << 32);970971barrett_reduce256_modm(r, q1, r1);972}973974void975expand256_modm(bignum256modm out, const byte *in, size_t len) {976byte work[64] = {0};977bignum256modm_element_t x[16];978bignum256modm q1;979980std::memcpy(work, in, len);981x[0] = U8TO64_LE(work + 0);982x[1] = U8TO64_LE(work + 8);983x[2] = U8TO64_LE(work + 16);984x[3] = U8TO64_LE(work + 24);985x[4] = U8TO64_LE(work + 32);986x[5] = U8TO64_LE(work + 40);987x[6] = U8TO64_LE(work + 48);988x[7] = U8TO64_LE(work + 56);989990/* r1 = (x mod 256^(32+1)) = x mod (2^8)(31+1) = x & ((1 << 264) - 1) */991out[0] = ( x[0]) & 0xffffffffffffff;992out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;993out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;994out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;995out[4] = ((x[ 3] >> 32) | (x[ 4] << 32)) & 0x0000ffffffffff;996997/* under 252 bits, no need to reduce */998if (len < 32)999return;10001001/* q1 = x >> 248 = 264 bits */1002q1[0] = ((x[ 3] >> 56) | (x[ 4] << 8)) & 0xffffffffffffff;1003q1[1] = ((x[ 4] >> 48) | (x[ 5] << 16)) & 0xffffffffffffff;1004q1[2] = ((x[ 5] >> 40) | (x[ 6] << 24)) & 0xffffffffffffff;1005q1[3] = ((x[ 6] >> 32) | (x[ 7] << 32)) & 0xffffffffffffff;1006q1[4] = ((x[ 7] >> 24) );10071008barrett_reduce256_modm(out, q1, out);1009}10101011void1012expand_raw256_modm(bignum256modm out, const byte in[32]) {1013bignum256modm_element_t x[4];10141015x[0] = U8TO64_LE(in + 0);1016x[1] = U8TO64_LE(in + 8);1017x[2] = U8TO64_LE(in + 16);1018x[3] = U8TO64_LE(in + 24);10191020out[0] = ( x[0]) & 0xffffffffffffff;1021out[1] = ((x[ 0] >> 56) | (x[ 1] << 8)) & 0xffffffffffffff;1022out[2] = ((x[ 1] >> 48) | (x[ 2] << 16)) & 0xffffffffffffff;1023out[3] = ((x[ 2] >> 40) | (x[ 3] << 24)) & 0xffffffffffffff;1024out[4] = ((x[ 3] >> 32) ) & 0x000000ffffffff;1025}10261027void1028contract256_modm(byte out[32], const bignum256modm in) {1029U64TO8_LE(out + 0, (in[0] ) | (in[1] << 56));1030U64TO8_LE(out + 8, (in[1] >> 8) | (in[2] << 48));1031U64TO8_LE(out + 16, (in[2] >> 16) | (in[3] << 40));1032U64TO8_LE(out + 24, (in[3] >> 24) | (in[4] << 32));1033}10341035void1036contract256_window4_modm(signed char r[64], const bignum256modm in) {1037char carry;1038signed char *quads = r;1039bignum256modm_element_t i, j, v, m;10401041for (i = 0; i < 5; i++) {1042v = in[i];1043m = (i == 4) ? 8 : 14;1044for (j = 0; j < m; j++) {1045*quads++ = (v & 15);1046v >>= 4;1047}1048}10491050/* making it signed */1051carry = 0;1052for(i = 0; i < 63; i++) {1053r[i] += carry;1054r[i+1] += (r[i] >> 4);1055r[i] &= 15;1056carry = (r[i] >> 3);1057r[i] -= (carry << 4);1058}1059r[63] += carry;1060}10611062void1063contract256_slidingwindow_modm(signed char r[256], const bignum256modm s, int windowsize) {1064int i,j,k,b;1065int m = (1 << (windowsize - 1)) - 1, soplen = 256;1066signed char *bits = r;1067bignum256modm_element_t v;10681069/* first put the binary expansion into r */1070for (i = 0; i < 4; i++) {1071v = s[i];1072for (j = 0; j < 56; j++, v >>= 1)1073*bits++ = (v & 1);1074}1075v = s[4];1076for (j = 0; j < 32; j++, v >>= 1)1077*bits++ = (v & 1);10781079/* Making it sliding window */1080for (j = 0; j < soplen; j++) {1081if (!r[j])1082continue;10831084for (b = 1; (b < (soplen - j)) && (b <= 6); b++) {1085if ((r[j] + (r[j + b] << b)) <= m) {1086r[j] += r[j + b] << b;1087r[j + b] = 0;1088} else if ((r[j] - (r[j + b] << b)) >= -m) {1089r[j] -= r[j + b] << b;1090for (k = j + b; k < soplen; k++) {1091if (!r[k]) {1092r[k] = 1;1093break;1094}1095r[k] = 0;1096}1097} else if (r[j + b]) {1098break;1099}1100}1101}1102}11031104/*1105* In: b = 2^5 - 2^01106* Out: b = 2^250 - 2^01107*/1108void1109curve25519_pow_two5mtwo0_two250mtwo0(bignum25519 b) {1110ALIGN(ALIGN_SPEC) bignum25519 t0,c;11111112/* 2^5 - 2^0 */ /* b */1113/* 2^10 - 2^5 */ curve25519_square_times(t0, b, 5);1114/* 2^10 - 2^0 */ curve25519_mul_noinline(b, t0, b);1115/* 2^20 - 2^10 */ curve25519_square_times(t0, b, 10);1116/* 2^20 - 2^0 */ curve25519_mul_noinline(c, t0, b);1117/* 2^40 - 2^20 */ curve25519_square_times(t0, c, 20);1118/* 2^40 - 2^0 */ curve25519_mul_noinline(t0, t0, c);1119/* 2^50 - 2^10 */ curve25519_square_times(t0, t0, 10);1120/* 2^50 - 2^0 */ curve25519_mul_noinline(b, t0, b);1121/* 2^100 - 2^50 */ curve25519_square_times(t0, b, 50);1122/* 2^100 - 2^0 */ curve25519_mul_noinline(c, t0, b);1123/* 2^200 - 2^100 */ curve25519_square_times(t0, c, 100);1124/* 2^200 - 2^0 */ curve25519_mul_noinline(t0, t0, c);1125/* 2^250 - 2^50 */ curve25519_square_times(t0, t0, 50);1126/* 2^250 - 2^0 */ curve25519_mul_noinline(b, t0, b);1127}11281129/*1130* z^(p - 2) = z(2^255 - 21)1131*/1132void1133curve25519_recip(bignum25519 out, const bignum25519 z) {1134ALIGN(ALIGN_SPEC) bignum25519 a,t0,b;11351136/* 2 */ curve25519_square_times(a, z, 1); /* a = 2 */1137/* 8 */ curve25519_square_times(t0, a, 2);1138/* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */1139/* 11 */ curve25519_mul_noinline(a, b, a); /* a = 11 */1140/* 22 */ curve25519_square_times(t0, a, 1);1141/* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b);1142/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);1143/* 2^255 - 2^5 */ curve25519_square_times(b, b, 5);1144/* 2^255 - 21 */ curve25519_mul_noinline(out, b, a);1145}11461147/*1148* z^((p-5)/8) = z^(2^252 - 3)1149*/1150void1151curve25519_pow_two252m3(bignum25519 two252m3, const bignum25519 z) {1152ALIGN(ALIGN_SPEC) bignum25519 b,c,t0;11531154/* 2 */ curve25519_square_times(c, z, 1); /* c = 2 */1155/* 8 */ curve25519_square_times(t0, c, 2); /* t0 = 8 */1156/* 9 */ curve25519_mul_noinline(b, t0, z); /* b = 9 */1157/* 11 */ curve25519_mul_noinline(c, b, c); /* c = 11 */1158/* 22 */ curve25519_square_times(t0, c, 1);1159/* 2^5 - 2^0 = 31 */ curve25519_mul_noinline(b, t0, b);1160/* 2^250 - 2^0 */ curve25519_pow_two5mtwo0_two250mtwo0(b);1161/* 2^252 - 2^2 */ curve25519_square_times(b, b, 2);1162/* 2^252 - 3 */ curve25519_mul_noinline(two252m3, b, z);1163}11641165inline void1166ge25519_p1p1_to_partial(ge25519 *r, const ge25519_p1p1 *p) {1167curve25519_mul(r->x, p->x, p->t);1168curve25519_mul(r->y, p->y, p->z);1169curve25519_mul(r->z, p->z, p->t);1170}11711172inline void1173ge25519_p1p1_to_full(ge25519 *r, const ge25519_p1p1 *p) {1174curve25519_mul(r->x, p->x, p->t);1175curve25519_mul(r->y, p->y, p->z);1176curve25519_mul(r->z, p->z, p->t);1177curve25519_mul(r->t, p->x, p->y);1178}11791180void1181ge25519_full_to_pniels(ge25519_pniels *p, const ge25519 *r) {1182curve25519_sub(p->ysubx, r->y, r->x);1183curve25519_add(p->xaddy, r->y, r->x);1184curve25519_copy(p->z, r->z);1185curve25519_mul(p->t2d, r->t, ge25519_ec2d);1186}11871188void1189ge25519_add_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519 *q) {1190bignum25519 a,b,c,d,t,u;11911192curve25519_sub(a, p->y, p->x);1193curve25519_add(b, p->y, p->x);1194curve25519_sub(t, q->y, q->x);1195curve25519_add(u, q->y, q->x);1196curve25519_mul(a, a, t);1197curve25519_mul(b, b, u);1198curve25519_mul(c, p->t, q->t);1199curve25519_mul(c, c, ge25519_ec2d);1200curve25519_mul(d, p->z, q->z);1201curve25519_add(d, d, d);1202curve25519_sub(r->x, b, a);1203curve25519_add(r->y, b, a);1204curve25519_add_after_basic(r->z, d, c);1205curve25519_sub_after_basic(r->t, d, c);1206}12071208void1209ge25519_double_p1p1(ge25519_p1p1 *r, const ge25519 *p) {1210bignum25519 a,b,c;12111212curve25519_square(a, p->x);1213curve25519_square(b, p->y);1214curve25519_square(c, p->z);1215curve25519_add_reduce(c, c, c);1216curve25519_add(r->x, p->x, p->y);1217curve25519_square(r->x, r->x);1218curve25519_add(r->y, b, a);1219curve25519_sub(r->z, b, a);1220curve25519_sub_after_basic(r->x, r->x, r->y);1221curve25519_sub_after_basic(r->t, c, r->z);1222}12231224void1225ge25519_nielsadd2_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_niels *q, byte signbit) {1226const bignum25519 *qb = (const bignum25519 *)q;1227bignum25519 *rb = (bignum25519 *)r;1228bignum25519 a,b,c;12291230curve25519_sub(a, p->y, p->x);1231curve25519_add(b, p->y, p->x);1232curve25519_mul(a, a, qb[signbit]); /* x for +, y for - */1233curve25519_mul(r->x, b, qb[signbit^1]); /* y for +, x for - */1234curve25519_add(r->y, r->x, a);1235curve25519_sub(r->x, r->x, a);1236curve25519_mul(c, p->t, q->t2d);1237curve25519_add_reduce(r->t, p->z, p->z);1238curve25519_copy(r->z, r->t);1239curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */1240curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */1241}12421243void1244ge25519_pnielsadd_p1p1(ge25519_p1p1 *r, const ge25519 *p, const ge25519_pniels *q, byte signbit) {1245const bignum25519 *qb = (const bignum25519 *)q;1246bignum25519 *rb = (bignum25519 *)r;1247bignum25519 a,b,c;12481249curve25519_sub(a, p->y, p->x);1250curve25519_add(b, p->y, p->x);1251curve25519_mul(a, a, qb[signbit]); /* ysubx for +, xaddy for - */1252curve25519_mul(r->x, b, qb[signbit^1]); /* xaddy for +, ysubx for - */1253curve25519_add(r->y, r->x, a);1254curve25519_sub(r->x, r->x, a);1255curve25519_mul(c, p->t, q->t2d);1256curve25519_mul(r->t, p->z, q->z);1257curve25519_add_reduce(r->t, r->t, r->t);1258curve25519_copy(r->z, r->t);1259curve25519_add(rb[2+signbit], rb[2+signbit], c); /* z for +, t for - */1260curve25519_sub(rb[2+(signbit^1)], rb[2+(signbit^1)], c); /* t for +, z for - */1261}12621263void1264ge25519_double_partial(ge25519 *r, const ge25519 *p) {1265ge25519_p1p1 t;1266ge25519_double_p1p1(&t, p);1267ge25519_p1p1_to_partial(r, &t);1268}12691270void1271ge25519_double(ge25519 *r, const ge25519 *p) {1272ge25519_p1p1 t;1273ge25519_double_p1p1(&t, p);1274ge25519_p1p1_to_full(r, &t);1275}12761277void1278ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q) {1279ge25519_p1p1 t;1280ge25519_add_p1p1(&t, p, q);1281ge25519_p1p1_to_full(r, &t);1282}12831284void1285ge25519_nielsadd2(ge25519 *r, const ge25519_niels *q) {1286bignum25519 a,b,c,e,f,g,h;12871288curve25519_sub(a, r->y, r->x);1289curve25519_add(b, r->y, r->x);1290curve25519_mul(a, a, q->ysubx);1291curve25519_mul(e, b, q->xaddy);1292curve25519_add(h, e, a);1293curve25519_sub(e, e, a);1294curve25519_mul(c, r->t, q->t2d);1295curve25519_add(f, r->z, r->z);1296curve25519_add_after_basic(g, f, c);1297curve25519_sub_after_basic(f, f, c);1298curve25519_mul(r->x, e, f);1299curve25519_mul(r->y, h, g);1300curve25519_mul(r->z, g, f);1301curve25519_mul(r->t, e, h);1302}13031304void1305ge25519_pnielsadd(ge25519_pniels *r, const ge25519 *p, const ge25519_pniels *q) {1306bignum25519 a,b,c,x,y,z,t;13071308curve25519_sub(a, p->y, p->x);1309curve25519_add(b, p->y, p->x);1310curve25519_mul(a, a, q->ysubx);1311curve25519_mul(x, b, q->xaddy);1312curve25519_add(y, x, a);1313curve25519_sub(x, x, a);1314curve25519_mul(c, p->t, q->t2d);1315curve25519_mul(t, p->z, q->z);1316curve25519_add(t, t, t);1317curve25519_add_after_basic(z, t, c);1318curve25519_sub_after_basic(t, t, c);1319curve25519_mul(r->xaddy, x, t);1320curve25519_mul(r->ysubx, y, z);1321curve25519_mul(r->z, z, t);1322curve25519_mul(r->t2d, x, y);1323curve25519_copy(y, r->ysubx);1324curve25519_sub(r->ysubx, r->ysubx, r->xaddy);1325curve25519_add(r->xaddy, r->xaddy, y);1326curve25519_mul(r->t2d, r->t2d, ge25519_ec2d);1327}13281329void1330ge25519_pack(byte r[32], const ge25519 *p) {1331bignum25519 tx, ty, zi;1332byte parity[32];1333curve25519_recip(zi, p->z);1334curve25519_mul(tx, p->x, zi);1335curve25519_mul(ty, p->y, zi);1336curve25519_contract(r, ty);1337curve25519_contract(parity, tx);1338r[31] ^= ((parity[0] & 1) << 7);1339}13401341int1342ed25519_verify(const byte *x, const byte *y, size_t len) {1343size_t differentbits = 0;1344while (len--)1345differentbits |= (*x++ ^ *y++);1346return (int) (1 & ((differentbits - 1) >> 8));1347}13481349int1350ge25519_unpack_negative_vartime(ge25519 *r, const byte p[32]) {1351const byte zero[32] = {0};1352const bignum25519 one = {1};1353byte parity = p[31] >> 7;1354byte check[32];1355bignum25519 t, root, num, den, d3;13561357curve25519_expand(r->y, p);1358curve25519_copy(r->z, one);1359curve25519_square(num, r->y); /* x = y^2 */1360curve25519_mul(den, num, ge25519_ecd); /* den = dy^2 */1361curve25519_sub_reduce(num, num, r->z); /* x = y^1 - 1 */1362curve25519_add(den, den, r->z); /* den = dy^2 + 1 */13631364/* Computation of sqrt(num/den) */1365/* 1.: computation of num^((p-5)/8)*den^((7p-35)/8) = (num*den^7)^((p-5)/8) */1366curve25519_square(t, den);1367curve25519_mul(d3, t, den);1368curve25519_square(r->x, d3);1369curve25519_mul(r->x, r->x, den);1370curve25519_mul(r->x, r->x, num);1371curve25519_pow_two252m3(r->x, r->x);13721373/* 2. computation of r->x = num * den^3 * (num*den^7)^((p-5)/8) */1374curve25519_mul(r->x, r->x, d3);1375curve25519_mul(r->x, r->x, num);13761377/* 3. Check if either of the roots works: */1378curve25519_square(t, r->x);1379curve25519_mul(t, t, den);1380curve25519_sub_reduce(root, t, num);1381curve25519_contract(check, root);1382if (!ed25519_verify(check, zero, 32)) {1383curve25519_add_reduce(t, t, num);1384curve25519_contract(check, t);1385if (!ed25519_verify(check, zero, 32))1386return 0;1387curve25519_mul(r->x, r->x, ge25519_sqrtneg1);1388}13891390curve25519_contract(check, r->x);1391if ((check[0] & 1) == parity) {1392curve25519_copy(t, r->x);1393curve25519_neg(r->x, t);1394}1395curve25519_mul(r->t, r->x, r->y);1396return 1;1397}13981399/* computes [s1]p1 + [s2]basepoint */1400void1401ge25519_double_scalarmult_vartime(ge25519 *r, const ge25519 *p1, const bignum256modm s1, const bignum256modm s2) {1402signed char slide1[256], slide2[256];1403ge25519_pniels pre1[S1_TABLE_SIZE];1404ge25519 d1;1405ge25519_p1p1 t;1406sword32 i;14071408contract256_slidingwindow_modm(slide1, s1, S1_SWINDOWSIZE);1409contract256_slidingwindow_modm(slide2, s2, S2_SWINDOWSIZE);14101411ge25519_double(&d1, p1);1412ge25519_full_to_pniels(pre1, p1);1413for (i = 0; i < S1_TABLE_SIZE - 1; i++)1414ge25519_pnielsadd(&pre1[i+1], &d1, &pre1[i]);14151416/* set neutral */1417std::memset(r, 0, sizeof(ge25519));1418r->y[0] = 1;1419r->z[0] = 1;14201421i = 255;1422while ((i >= 0) && !(slide1[i] | slide2[i]))1423i--;14241425for (; i >= 0; i--) {1426ge25519_double_p1p1(&t, r);14271428if (slide1[i]) {1429ge25519_p1p1_to_full(r, &t);1430ge25519_pnielsadd_p1p1(&t, r, &pre1[abs(slide1[i]) / 2], (byte)slide1[i] >> 7);1431}14321433if (slide2[i]) {1434ge25519_p1p1_to_full(r, &t);1435ge25519_nielsadd2_p1p1(&t, r, &ge25519_niels_sliding_multiples[abs(slide2[i]) / 2], (byte)slide2[i] >> 7);1436}14371438ge25519_p1p1_to_partial(r, &t);1439}1440}14411442#if !defined(HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS)14431444word321445ge25519_windowb_equal(word32 b, word32 c) {1446return ((b ^ c) - 1) >> 31;1447}14481449void1450ge25519_scalarmult_base_choose_niels(ge25519_niels *t, const byte table[256][96], word32 pos, signed char b) {1451bignum25519 neg;1452word32 sign = (word32)((byte)b >> 7);1453word32 mask = ~(sign - 1);1454word32 u = (b + mask) ^ mask;1455word32 i;14561457/* ysubx, xaddy, t2d in packed form. initialize to ysubx = 1, xaddy = 1, t2d = 0 */1458byte packed[96] = {0};1459packed[0] = 1;1460packed[32] = 1;14611462for (i = 0; i < 8; i++)1463curve25519_move_conditional_bytes(packed, table[(pos * 8) + i], ge25519_windowb_equal(u, i + 1));14641465/* expand in to t */1466curve25519_expand(t->ysubx, packed + 0);1467curve25519_expand(t->xaddy, packed + 32);1468curve25519_expand(t->t2d , packed + 64);14691470/* adjust for sign */1471curve25519_swap_conditional(t->ysubx, t->xaddy, sign);1472curve25519_neg(neg, t->t2d);1473curve25519_swap_conditional(t->t2d, neg, sign);1474}14751476#endif /* HAVE_GE25519_SCALARMULT_BASE_CHOOSE_NIELS */14771478/* computes [s]basepoint */1479void1480ge25519_scalarmult_base_niels(ge25519 *r, const byte basepoint_table[256][96], const bignum256modm s) {1481signed char b[64];1482word32 i;1483ge25519_niels t;14841485contract256_window4_modm(b, s);14861487ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[1]);1488curve25519_sub_reduce(r->x, t.xaddy, t.ysubx);1489curve25519_add_reduce(r->y, t.xaddy, t.ysubx);1490std::memset(r->z, 0, sizeof(bignum25519));1491curve25519_copy(r->t, t.t2d);1492r->z[0] = 2;1493for (i = 3; i < 64; i += 2) {1494ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);1495ge25519_nielsadd2(r, &t);1496}1497ge25519_double_partial(r, r);1498ge25519_double_partial(r, r);1499ge25519_double_partial(r, r);1500ge25519_double(r, r);1501ge25519_scalarmult_base_choose_niels(&t, basepoint_table, 0, b[0]);1502curve25519_mul(t.t2d, t.t2d, ge25519_ecd);1503ge25519_nielsadd2(r, &t);1504for(i = 2; i < 64; i += 2) {1505ge25519_scalarmult_base_choose_niels(&t, basepoint_table, i / 2, b[i]);1506ge25519_nielsadd2(r, &t);1507}1508}15091510ANONYMOUS_NAMESPACE_END1511NAMESPACE_END // Ed255191512NAMESPACE_END // Donna1513NAMESPACE_END // CryptoPP15141515//***************************** curve25519 *****************************//15161517NAMESPACE_BEGIN(CryptoPP)1518NAMESPACE_BEGIN(Donna)15191520int curve25519_mult_CXX(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])1521{1522using namespace CryptoPP::Donna::X25519;15231524FixedSizeSecBlock<byte, 32> e;1525for (size_t i = 0;i < 32;++i)1526e[i] = secretKey[i];1527e[0] &= 0xf8; e[31] &= 0x7f; e[31] |= 0x40;15281529bignum25519 nqpqx = {1}, nqpqz = {0}, nqz = {1}, nqx;1530bignum25519 q, qx, qpqx, qqx, zzz, zmone;1531size_t bit, lastbit;15321533curve25519_expand(q, othersKey);1534curve25519_copy(nqx, q);15351536/* bit 255 is always 0, and bit 254 is always 1, so skip bit 255 and1537start pre-swapped on bit 254 */1538lastbit = 1;15391540/* we are doing bits 254..3 in the loop, but are swapping in bits 253..2 */1541for (int i = 253; i >= 2; i--) {1542curve25519_add(qx, nqx, nqz);1543curve25519_sub(nqz, nqx, nqz);1544curve25519_add(qpqx, nqpqx, nqpqz);1545curve25519_sub(nqpqz, nqpqx, nqpqz);1546curve25519_mul(nqpqx, qpqx, nqz);1547curve25519_mul(nqpqz, qx, nqpqz);1548curve25519_add(qqx, nqpqx, nqpqz);1549curve25519_sub(nqpqz, nqpqx, nqpqz);1550curve25519_square(nqpqz, nqpqz);1551curve25519_square(nqpqx, qqx);1552curve25519_mul(nqpqz, nqpqz, q);1553curve25519_square(qx, qx);1554curve25519_square(nqz, nqz);1555curve25519_mul(nqx, qx, nqz);1556curve25519_sub(nqz, qx, nqz);1557curve25519_scalar_product(zzz, nqz, 121665);1558curve25519_add(zzz, zzz, qx);1559curve25519_mul(nqz, nqz, zzz);15601561bit = (e[i/8] >> (i & 7)) & 1;1562curve25519_swap_conditional(nqx, nqpqx, bit ^ lastbit);1563curve25519_swap_conditional(nqz, nqpqz, bit ^ lastbit);1564lastbit = bit;1565}15661567/* the final 3 bits are always zero, so we only need to double */1568for (int i = 0; i < 3; i++) {1569curve25519_add(qx, nqx, nqz);1570curve25519_sub(nqz, nqx, nqz);1571curve25519_square(qx, qx);1572curve25519_square(nqz, nqz);1573curve25519_mul(nqx, qx, nqz);1574curve25519_sub(nqz, qx, nqz);1575curve25519_scalar_product(zzz, nqz, 121665);1576curve25519_add(zzz, zzz, qx);1577curve25519_mul(nqz, nqz, zzz);1578}15791580curve25519_recip(zmone, nqz);1581curve25519_mul(nqz, nqx, zmone);1582curve25519_contract(sharedKey, nqz);15831584return 0;1585}15861587int curve25519_mult(byte publicKey[32], const byte secretKey[32])1588{1589using namespace CryptoPP::Donna::X25519;15901591#if (CRYPTOPP_CURVE25519_SSE2)1592if (HasSSE2())1593return curve25519_mult_SSE2(publicKey, secretKey, basePoint);1594else1595#endif15961597return curve25519_mult_CXX(publicKey, secretKey, basePoint);1598}15991600int curve25519_mult(byte sharedKey[32], const byte secretKey[32], const byte othersKey[32])1601{1602#if (CRYPTOPP_CURVE25519_SSE2)1603if (HasSSE2())1604return curve25519_mult_SSE2(sharedKey, secretKey, othersKey);1605else1606#endif16071608return curve25519_mult_CXX(sharedKey, secretKey, othersKey);1609}16101611NAMESPACE_END // Donna1612NAMESPACE_END // CryptoPP16131614//******************************* ed25519 *******************************//16151616NAMESPACE_BEGIN(CryptoPP)1617NAMESPACE_BEGIN(Donna)16181619int1620ed25519_publickey_CXX(byte publicKey[32], const byte secretKey[32])1621{1622using namespace CryptoPP::Donna::Ed25519;16231624bignum256modm a;1625ALIGN(ALIGN_SPEC) ge25519 A;1626hash_512bits extsk;16271628/* A = aB */1629ed25519_extsk(extsk, secretKey);1630expand256_modm(a, extsk, 32);1631ge25519_scalarmult_base_niels(&A, ge25519_niels_base_multiples, a);1632ge25519_pack(publicKey, &A);16331634return 0;1635}16361637int1638ed25519_publickey(byte publicKey[32], const byte secretKey[32])1639{1640return ed25519_publickey_CXX(publicKey, secretKey);1641}16421643int1644ed25519_sign_CXX(std::istream& stream, const byte sk[32], const byte pk[32], byte RS[64])1645{1646using namespace CryptoPP::Donna::Ed25519;16471648bignum256modm r, S, a;1649ALIGN(ALIGN_SPEC) ge25519 R;1650hash_512bits extsk, hashr, hram;16511652// Unfortunately we need to read the stream twice. The first time calculates1653// 'r = H(aExt[32..64], m)'. The second time calculates 'S = H(R,A,m)'. There1654// is a data dependency due to hashing 'RS' with 'R = [r]B' that does not1655// allow us to read the stream once.1656std::streampos where = stream.tellg();16571658ed25519_extsk(extsk, sk);16591660/* r = H(aExt[32..64], m) */1661SHA512 hash;1662hash.Update(extsk + 32, 32);1663UpdateFromStream(hash, stream);1664hash.Final(hashr);1665expand256_modm(r, hashr, 64);16661667/* R = rB */1668ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);1669ge25519_pack(RS, &R);16701671// Reset stream for the second digest1672stream.clear();1673stream.seekg(where);16741675/* S = H(R,A,m).. */1676ed25519_hram(hram, RS, pk, stream);1677expand256_modm(S, hram, 64);16781679/* S = H(R,A,m)a */1680expand256_modm(a, extsk, 32);1681mul256_modm(S, S, a);16821683/* S = (r + H(R,A,m)a) */1684add256_modm(S, S, r);16851686/* S = (r + H(R,A,m)a) mod L */1687contract256_modm(RS + 32, S);1688return 0;1689}16901691int1692ed25519_sign_CXX(const byte *m, size_t mlen, const byte sk[32], const byte pk[32], byte RS[64])1693{1694using namespace CryptoPP::Donna::Ed25519;16951696bignum256modm r, S, a;1697ALIGN(ALIGN_SPEC) ge25519 R;1698hash_512bits extsk, hashr, hram;16991700ed25519_extsk(extsk, sk);17011702/* r = H(aExt[32..64], m) */1703SHA512 hash;1704hash.Update(extsk + 32, 32);1705hash.Update(m, mlen);1706hash.Final(hashr);1707expand256_modm(r, hashr, 64);17081709/* R = rB */1710ge25519_scalarmult_base_niels(&R, ge25519_niels_base_multiples, r);1711ge25519_pack(RS, &R);17121713/* S = H(R,A,m).. */1714ed25519_hram(hram, RS, pk, m, mlen);1715expand256_modm(S, hram, 64);17161717/* S = H(R,A,m)a */1718expand256_modm(a, extsk, 32);1719mul256_modm(S, S, a);17201721/* S = (r + H(R,A,m)a) */1722add256_modm(S, S, r);17231724/* S = (r + H(R,A,m)a) mod L */1725contract256_modm(RS + 32, S);1726return 0;1727}17281729int1730ed25519_sign(std::istream& stream, const byte secretKey[32], const byte publicKey[32],1731byte signature[64])1732{1733return ed25519_sign_CXX(stream, secretKey, publicKey, signature);1734}17351736int1737ed25519_sign(const byte* message, size_t messageLength, const byte secretKey[32],1738const byte publicKey[32], byte signature[64])1739{1740return ed25519_sign_CXX(message, messageLength, secretKey, publicKey, signature);1741}17421743int1744ed25519_sign_open_CXX(const byte *m, size_t mlen, const byte pk[32], const byte RS[64]) {17451746using namespace CryptoPP::Donna::Ed25519;17471748ALIGN(ALIGN_SPEC) ge25519 R, A;1749hash_512bits hash;1750bignum256modm hram, S;1751byte checkR[32];17521753if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))1754return -1;17551756/* hram = H(R,A,m) */1757ed25519_hram(hash, RS, pk, m, mlen);1758expand256_modm(hram, hash, 64);17591760/* S */1761expand256_modm(S, RS + 32, 32);17621763/* SB - H(R,A,m)A */1764ge25519_double_scalarmult_vartime(&R, &A, hram, S);1765ge25519_pack(checkR, &R);17661767/* check that R = SB - H(R,A,m)A */1768return ed25519_verify(RS, checkR, 32) ? 0 : -1;1769}17701771int1772ed25519_sign_open_CXX(std::istream& stream, const byte pk[32], const byte RS[64]) {17731774using namespace CryptoPP::Donna::Ed25519;17751776ALIGN(ALIGN_SPEC) ge25519 R, A;1777hash_512bits hash;1778bignum256modm hram, S;1779byte checkR[32];17801781if ((RS[63] & 224) || !ge25519_unpack_negative_vartime(&A, pk))1782return -1;17831784/* hram = H(R,A,m) */1785ed25519_hram(hash, RS, pk, stream);1786expand256_modm(hram, hash, 64);17871788/* S */1789expand256_modm(S, RS + 32, 32);17901791/* SB - H(R,A,m)A */1792ge25519_double_scalarmult_vartime(&R, &A, hram, S);1793ge25519_pack(checkR, &R);17941795/* check that R = SB - H(R,A,m)A */1796return ed25519_verify(RS, checkR, 32) ? 0 : -1;1797}17981799int1800ed25519_sign_open(std::istream& stream, const byte publicKey[32], const byte signature[64])1801{1802return ed25519_sign_open_CXX(stream, publicKey, signature);1803}18041805int1806ed25519_sign_open(const byte *message, size_t messageLength, const byte publicKey[32], const byte signature[64])1807{1808return ed25519_sign_open_CXX(message, messageLength, publicKey, signature);1809}18101811NAMESPACE_END // Donna1812NAMESPACE_END // CryptoPP18131814#endif // CRYPTOPP_CURVE25519_64BIT181518161817