#include <stdlib.h>1#include <stdio.h>2#include <string.h>3#include <math.h>4#include <time.h>5#include "gmp.h"6#include "mpzutil.h"7#include "ffwrapper.h"8#include "ffpoly.h"9#include "hecurve.h"10#include "cstd.h"111213/*14Copyright 2007 Andrew V. Sutherland1516This file is part of smalljac.1718smalljac is free software: you can redistribute it and/or modify19it under the terms of the GNU General Public License as published by20the Free Software Foundation, either version 2 of the License, or21(at your option) any later version.2223smalljac is distributed in the hope that it will be useful,24but WITHOUT ANY WARRANTY; without even the implied warranty of25MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the26GNU General Public License for more details.2728You should have received a copy of the GNU General Public License29along with smalljac. If not, see <http://www.gnu.org/licenses/>.30*/3132#if HECURVE_GENUS == 23334static char buf[4096];3536#define _deg_u(u) (_ff_nonzero(u[2])?2:(_ff_nonzero(u[1])?1:0))37#define _deg_v(v) (_ff_nonzero(v[1])?1:(_ff_nonzero(v[0])?0:-1))38#define _dbg_print_uv(s,u,v) if ( dbg_level >= DEBUG_LEVEL ) { printf (s); hecurve_print(u,v); }3940void hecurve_g2_square_1 (ff_t u[3], ff_t v[2], ff_t u0, ff_t v0, ff_t f[6]);41void hecurve_g2_square_2_r0 (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t f[6]);42void hecurve_g2_compose_1_2 (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t u2[3], ff_t v2[2], ff_t f[6]);43int hecurve_g2_compose_special (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t u2[3], ff_t v2[3], ff_t f[6]);4445/*46TODO: Clean up code to not use quite so many variables. Useful for debugging at the moment.47*/48// note that (u,v) may be equal to (u1,v1) or (u2,v2) but we assume overlap is aligned.49int hecurve_g2_compose (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t u2[3], ff_t v2[2], ff_t f[6], hecurve_ctx_t *ctx)50{51_ff_t_declare_reg r, inv0, inv1, w0, w1, w2, w3, w4, w5, s0, s1, t1, L0, L1, L2;5253if ( ctx && ctx->state == 1 ) {54// get invert result and restore saved variables55_ff_set (w1, ctx->invert); // inverted value of r*s156_ff_set (r, ctx->r);57_ff_set (s0, ctx->s0);58_ff_set (s1, ctx->s1);59_ff_set (inv1, ctx->inv1);60/// dbg_printf ("Restored r = %Zd, s1 = %Zd, s0 = %Zd, inv1 = %Zd, w1 = %Zd\n",_ff_wrap_mpz(r,0), _ff_wrap_mpz(s1,0), _ff_wrap_mpz(s0,0), _ff_wrap_mpz(inv1,0), _ff_wrap_mpz(w1,0));61goto hecurve_g2_compose_2_2_inverted;62}63if ( _ff_nonzero(f[4]) ) { hecurve_compose_cantor (u, v, u1, v1, u2, v2, f); return 1; } // revert to Cantor if f4 is non-zero - should only happen over F_564if ( _ff_zero(u1[2]) || _ff_zero(u2[2]) ) { hecurve_g2_compose_special (u, v, u1, v1, u2, v2, f); return 1; }65#if ! HECURVE_FAST66dbg_printf ("Compose_2_2: (%Zdx^2+%Zdx+%Zd, %Zdx+%Zd) * (%Zdx^2+%Zdx+%Zd, %Zdx+%Zd)\n",67_ff_wrap_mpz(u1[2],0), _ff_wrap_mpz(u1[1],1), _ff_wrap_mpz(u1[0],2), _ff_wrap_mpz(v1[1],3), _ff_wrap_mpz(v1[0],4),68_ff_wrap_mpz(u2[2],5), _ff_wrap_mpz(u2[1],6), _ff_wrap_mpz(u2[0],7), _ff_wrap_mpz(v2[1],8), _ff_wrap_mpz(v2[0],9));69if ( !_ff_one(u1[2]) || !_ff_one(u2[2]) ) { err_printf ("u1 or u2 not monic deg 2 in hecurve_compose_2_2!\n"); exit (0); }70#endif71#if HECURVE_VERIFY72if ( ! hecurve_verify (u1, v1, f) ) { err_printf ("hecurve_compose: invalid input\n"); exit (0); }73if ( ! hecurve_verify (u2, v2, f) ) { err_printf ("hecurve_compose: invalid input\n"); exit (0); }74#endif7576// 1. compute r = Resultant(u1,u2) and save z1=inv1 and z3=inv077_ff_sub(inv1,u1[1],u2[1]); // reduce inv178_ff_qsub(w0,u2[0],u1[0]); // w0 = z2 (unreduced)79_ff_multadd(inv0,u1[1],inv1,w0); // z3 = inv0 = u1[1]*inv1+w0 = u1[1]*z1+z280_ff_mult(w1,inv1,inv1);81ff_mult(w1,w1,u1[0]);82_ff_multadd(r,w0,inv0,w1);83/// dbg_printf ("r = %Zd\n",_ff_wrap_mpz(r,0));84// if ( _ff_zero(r) ) { hecurve_compose_special (u, v, u1, v1, u2, v2, f); return 1; } this is checked below8586// 2. compute almost inverse of u2 mod u1, i.e. inv = (r/u2) mod u187// nothing to do, simply note that inv = inv1x + inv0, i.e. inv1 = z1 and inv0 = z388/// dbg_printf ("inv = %Zdx + %Zd\n",_ff_wrap_mpz(inv1,0), _ff_wrap_mpz(inv0,1));8990// 3. compute s' = rs = ((v1-v2)inv) mod u191_ff_qsub(w0,v1[0],v2[0]);92_ff_mult(w2,inv0,w0);93_ff_qsub(w1,v1[1],v2[1]);94_ff_mult(w3,inv1,w1);95_ff_incmult(t1,u1[1],w3);96_ff_qadd (w4,inv0,inv1);97_ff_qadd (w5,w0,w1);98_ff_qnegsum(w0,w2,t1);99_ff_multadd(s1,w4,w5,w0);100_ff_mult(t1,u1[0],w3);101_ff_qsub(s0,w2,t1);102/// dbg_printf ("w0 = %Zd, w1 = %Zd, w2 = %Zd, w3 = %Zd, w4 = %Zd, w5 = %Zd, s\' = rs = %Zdx + %Zd\n",103/// _ff_wrap_mpz(w0,0), _ff_wrap_mpz(w1,1), _ff_wrap_mpz(w2,2), _ff_wrap_mpz(w3,3), _ff_wrap_mpz(w4,4), _ff_wrap_mpz(w5,5), _ff_wrap_mpz(s1,6), _ff_wrap_mpz(s0,7));104105// Note that we need to save w3, w4, and w5, but w0, w1, and w2 are free106// if ( _ff_zero(s1) ) goto hecurve_compose_s1_zero; this is checked below107108// 4. compute s'' = x+s0/s1 = x + s'0/s'1 and s1109_ff_mult(w2,r,s1); // assume r and s1 both non-zero - this is the usual case110if ( _ff_zero(w2) ) {111if ( _ff_nonzero(r) ) goto hecurve_g2_compose_s1_zero;112hecurve_g2_compose_special (u, v, u1, v1, u2, v2, f); return 1;113}114if ( ctx && ! ctx->state ) {115_ff_set (ctx->s0, s0);116_ff_set (ctx->s1, s1);117_ff_set (ctx->r, r);118_ff_set(ctx->inv1, inv1);119_ff_set (ctx->invert, w2); // return to let caller invert120ctx->state = 1;121return 0;122}123_ff_invert(w1,w2);124125hecurve_g2_compose_2_2_inverted: // caller returns here after inverting126// we assume w1, r, s0, s1, and inv1 are set, all others free, s0 is unreduced, the rest are reduced127_ff_mult(w2,w1,r);128_ff_square(w3,s1);129ff_mult(w3,w3,w1);130_ff_mult(w4,r,w2);131_ff_square(w5,w4);132_ff_mult(s0,s0,w2);133/// dbg_printf ("s\'\' = x + %Zd, s1 = %Zd\n",_ff_wrap_mpz(s0,0), _ff_wrap_mpz(w3,1));134135// 5. compute L = s''0*u2 = x^3 + L2*x^2 + L1*x + L0136_ff_qadd(L2,u2[1],s0); // ok to leave unreduced137_ff_multadd(L1,u2[1],s0,u2[0]);138_ff_mult(L0,u2[0],s0);139/// dbg_printf ("L = x^3 + %Zdx^2 + %Zdx + %Zd\n",_ff_wrap_mpz(L2,0), _ff_wrap_mpz(L1,1), _ff_wrap_mpz(L0,2));140141// 6. compute u = (s(L+2v2)-t)/u1 = x^2 + u[1]*x + u[0] note h = 0142_ff_qsub(w0,s0,u1[1]);143_ff_qsub(w1,s0,inv1);144_ff_mult(w2,w0,w1); // w2 = (s0-u1[1])(s0-inv1)145_ff_qadd(w1,u2[1],u2[1]);146_ff_qaddto(w1,inv1);147ff_mult(w1,w1,w5);148_ff_qaddto (w2,w1);149_ff_qneg(w1,u1[0]);150_ff_qaddto(w2,w1);151_ff_addto(w2,L1);152_ff_qadd(w0,v2[1],v2[1]);153_ff_multadd(u[0],w0,w4,w2); // potentially destroys u1[0] and u2[0]154// no mults to work with so we are better off reducing as we go to compute u[1]155_ff_add(w0,s0,s0);156_ff_subfrom(w0,inv1);157_ff_sub(u[1],w0,w5);158_ff_set_one(u[2]);159/// dbg_printf ("u = %Zdx^2 + %Zdx + %Zd\n",_ff_wrap_mpz(u[2],0), _ff_wrap_mpz(u[1],1), _ff_wrap_mpz(u[0],2));160161// 7. compute v = (-(L+v2)mod u = v[1]*x + v[0] note h = 0162_ff_qsub(w1,L2,u[1]);163_ff_qsub(w0,u[0],L1);164_ff_multadd(w2,w1,u[1],w0);165_ff_qneg(w0,v2[1]);166_ff_multadd (v[1],w2,w3,w0);167_ff_qneg(w0,L0);168_ff_multadd(w2,w1,u[0],w0);169_ff_qneg(w0,v2[0]);170_ff_multadd(v[0],w2,w3,w0);171/// dbg_printf ("v = %Zdx + %Zd\n",_ff_wrap_mpz(v[1],0), _ff_wrap_mpz(v[0],1));172#if ! HECURVE_FAST173_dbg_print_uv ("Compose_2 Result: ", u, v);174#endif175#if HECURVE_VERIFY176if ( ! hecurve_verify (u, v, f) ) {177err_printf ("hecurve_compose output verification failed for inputs:\n");178err_printf (" "); hecurve_print(u1,v1);179err_printf (" "); hecurve_print(u2,v2);180err_printf ("note that inputs have been modified if output overlapped.\n");181exit (0);182}183#endif184return 1;185186// Special case is not optimized187hecurve_g2_compose_s1_zero:188/// dbg_printf ("Special case, s\'\'1 == 0\n");189190// 4'. compute s191_ff_invert(inv0,r);192ff_mult(s0,s0,inv0);193/// dbg_printf ("s = %Zd\n",_ff_wrap_mpz(s0,0));194195// Part of 6' moved here incase u2[0] gets overwritten below196_ff_mult(w2,u2[0],s0);197_ff_addto(w2,v2[0]);198199// 5'. compute u = (t-s(L+2v2))/u1 = x + u[0] note h = 0200_ff_square(w1,s0);201_ff_addto(w1,u2[1]);202_ff_addto(w1,u1[1]);203_ff_neg(u[0],w1); // this potentially destroys u1[0] and u2[0], we assume u2[1] is ok204/// dbg_printf ("u = x + %Zd\n",_ff_wrap_mpz(u[0],0));205206// 6. compute v = -(L+v2) mod u = v[0] note h = 0207_ff_sub (w1,u2[1],u[0]); // BUG FIX - handbook has w1 = s0(u21+u[0]) (+ SHOULD BE -)208ff_mult(w1,w1,s0);209_ff_addto(w1,v2[1]);210ff_mult(w1,w1,u[0]);211_ff_sub(v[0],w1,w2);212_ff_set_zero(v[1]);213214_ff_set_zero(u[2]); _ff_set_one(u[1]); // set these last to avoid overlap issues215216/// dbg_printf ("v = %Zd\n",_ff_wrap_mpz(v[0],0));217#if ! HECURVE_FAST218_dbg_print_uv ("Compose_2 Result: ", u, v);219#endif220#if HECURVE_VERIFY221if ( ! hecurve_verify (u, v, f) ) {222err_printf ("hecurve_compose output verification failed for inputs:\n");223err_printf (" "); hecurve_print(u1,v1);224err_printf (" "); hecurve_print(u2,v2);225err_printf ("note that inputs have been modified if output overlapped.\n");226exit (0);227}228#endif229return 1;230}231232233int hecurve_g2_square (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t f[6], hecurve_ctx_t *ctx)234{235_ff_t_declare_reg w0, w1, w2, w3, w4, w5, inv0, inv1, L0, L1, L2, r, s0, s1, t0, t1;236237#if ! HECURVE_FAST238_dbg_print_uv ("Square_2: ", u, v);239#endif240#if HECURVE_VERIFY241if ( ! hecurve_verify (u1, v1, f) ) { err_printf ("hecurve_square: invalid input\n"); exit (0); }242#endif243244if ( ctx && ctx->state == 1 ) {245// get invert result and restore saved variables246_ff_set (w1, ctx->invert); // inverted value of r*s1247_ff_set (r, ctx->r);248_ff_set (s0, ctx->s0);249_ff_set (s1, ctx->s1);250/// dbg_printf ("Restored r = %Zd, s1 = %Zd, s0 = %Zd, w1 = %Zd\n",_ff_wrap_mpz(r,0), _ff_wrap_mpz(s1,1), _ff_wrap_mpz(s0,2), _ff_wrap_mpz(w1,3));251goto hecurve_g2_square_2_2_inverted;252}253if ( _ff_nonzero(f[4]) ) { hecurve_compose_cantor (u, v, u1, v1, u1, v1, f); return 1; } // revert to Cantor if f4 is non-zero - should only happen over F_5254if ( _ff_zero(u1[2]) ) { hecurve_g2_square_1 (u, v, u1[0], v1[0], f); return 1; }255256// 1. compute v' = 2v mod u = v1*x + v0 note h = 0, we use w5=~v1, inv0=~v0257_ff_add(w5,v1[1],v1[1]); // w5 gets negated below, must be reduced258_ff_qadd(inv0,v1[0],v1[0]);259/// dbg_printf ("2v = %Zdx + %Zd\n",_ff_wrap_mpz(w5,0), _ff_wrap_mpz(inv0,1));260261// 2. compute resultant r = Resultant(2v,u)262_ff_square(w0,v1[1]);263_ff_square(w1,u1[1]);264_ff_qadd(w2,w0,w0);265_ff_qadd(w2,w2,w2);266_ff_mult(w3,u1[1],w5);267_ff_qsub(w4,inv0,w3);268ff_mult(w4,w4,inv0);269_ff_multadd(r,u1[0],w2,w4);270// _ff_addto(r,w4);271/// dbg_printf ("w0 = %Zd, w1 = %Zd, w2 = %Zd, w3 = %Zd, w4 = %Zd r = res(2v,u) = %Zd\n",272/// _ff_wrap_mpz(w0,0), _ff_wrap_mpz(w1,1), _ff_wrap_mpz(w2,2), _ff_wrap_mpz(w3,3), _ff_wrap_mpz(w4,4), _ff_wrap_mpz(r,5));273// if ( _ff_zero(r) ) { hecurve_square_2_r0 (u, v, u1, v1, f); return 1; } handled below274275// 3. compute almost inverse invv = r*inv276_ff_neg(inv1,w5);277_ff_subfrom(inv0,w3);278/// dbg_printf ("inv = %Zdx + %Zd\n", _ff_wrap_mpz(inv1,0), _ff_wrap_mpz(inv0,1));279280// 4. compute t = ((f-v^2)/u) mod u = tt1*x+tt0 note h = 0281// this code could be improved further when f[3] == 0282#if HECURVE_SPARSE283_ff_set (w3,w1); // this could be further optimized284#else285_ff_add(w3,f[3],w1);286#endif287_ff_add(w4,u1[0],u1[0]);288_ff_qadd(t1,w1,w1);289_ff_qaddto(t1,w3);290_ff_qsubfrom(t1,w4);291_ff_qadd(t0,w4,w4);292_ff_qsubfrom(t0,w3);293_ff_qneg(w5,w0);294_ff_multadd(t0,t0,u1[1],w5);295#if ! HECURVE_SPARSE296_ff_addto(t0,f[2]);297#endif298/// dbg_printf ("t = %Zdx + %Zd\n", _ff_wrap_mpz(t1,0), _ff_wrap_mpz(t0,1));299300// 5. compute ss = (tt*invv) mod u301_ff_mult(w0,t0,inv0);302_ff_mult(w1,t1,inv1);303_ff_add(s1,inv0,inv1); // need s1 reduced to keep mult from getting too big304_ff_add(w2,t0,t1);305_ff_qneg(w5,w0);306_ff_multadd(s1,s1,w2,w5);307_ff_incmult(w2,u1[1],w1);308_ff_qsubfrom(s1,w2);309_ff_mult(w5,w1,u1[0]);310_ff_qsub(s0,w0,w5);311/// dbg_printf ("s' = %Zdx + %Zd\n",_ff_wrap_mpz(s1,0), _ff_wrap_mpz(s0,1));312// if ( _ff_zero(s1) ) goto hecurve_square_s1_zero; handled below313314// 6. compute s'' = x + s0/s1 and s1315_ff_mult(w0,r,s1); // assume non-zero r and s1 is usual case316if ( _ff_zero(w0) ) {317if ( _ff_nonzero(r) ) goto hecurve_g2_square_s1_zero;318hecurve_g2_square_2_r0 (u, v, u1, v1, f);319return 1;320}321if ( ctx && ! ctx->state ) {322_ff_set (ctx->s0, s0);323_ff_set (ctx->s1, s1);324_ff_set (ctx->r, r);325_ff_set (ctx->invert, w0);326ctx->state = 1; // return to let caller invert327return 0;328}329_ff_invert(w1,w0);330331hecurve_g2_square_2_2_inverted: // caller returns here after inverting332// we assume w0, s0, s1, and r are set and everything else is free333_ff_mult(w2,w1,r);334_ff_square(w3,s1);335ff_mult(w3,w3,w1);336_ff_mult(w4,w2,r);337_ff_square(w5,w4);338ff_mult(s0,s0,w2);339/// dbg_printf ("s\'\'0 = %Zd, w0 = %Zd, w1 = %Zd, w2 = %Zd, w3 = %Zd, w4 = %Zd, w5 = %Zd\n",340/// _ff_wrap_mpz(s0,0), _ff_wrap_mpz(w0,1), _ff_wrap_mpz(w1,2), _ff_wrap_mpz(w2,3), _ff_wrap_mpz(w3,4), _ff_wrap_mpz(w4,5), _ff_wrap_mpz(w5,6));341// note that w3, w4, and w5, need to be saved, but w0, w1 and w2 are free342343// 7. compute LL = sss*u = x^3 + LL2*x^2 + LL1*x + LL0344_ff_qadd(L2,s0,u1[1]);345_ff_multadd(L1,s0,u1[1],u1[0]);346_ff_mult(L0,s0,u1[0]);347/// dbg_printf ("L' = s''u = x^3 + %Zdx^2 + %Zdx + %Zd\n",_ff_wrap_mpz(L2,0), _ff_wrap_mpz(L1,1), _ff_wrap_mpz(L0,2));348349// 8. compute u = s^2 + 2vs/u + (v^2-f)/u1^2 note h = 0350_ff_square(w0,s0);351_ff_qadd(w1,v1[1],v1[1]);352ff_mult(w1,w1,w4);353_ff_qaddto(w0,w1);354_ff_qadd(w1,u1[1],u1[1]);355_ff_multadd(u[0],w1,w5,w0); // potentially destroys u1[0] and u2[0]356_ff_add(w0,s0,s0); // no mult to absorb so stay reduced357_ff_sub(u[1],w0,w5); // potentially destroys u1[1] and u2[1]358_ff_set_one(u[2]);359/// dbg_printf ("u = x^2 + %Zdx + %Zd\n",_ff_wrap_mpz(u[1],0), _ff_wrap_mpz(u[0],1));360361// 9. compute v = -(L+v1) mod u = v[1]x + v[0] note h = 0362_ff_qsub(w1,L2,u[1]);363_ff_qsub (w0,u[0],L1);364_ff_multadd(w2,w1,u[1], w0);365_ff_qneg(w0,v1[1]); // v1[1] could overlap v[1]366_ff_multadd(v[1],w2,w3,w0);367_ff_qneg(w0,L0);368_ff_multadd(w2,w1,u[0],w0);369_ff_qneg(w0,v1[0]); // v1[0] could overlap v[0]370_ff_multadd(v[0],w2,w3,w0);371/// dbg_printf ("v = %Zdx + %Zd\n",_ff_wrap_mpz(v[1],0), _ff_wrap_mpz(v[0],1));372#if HECURVE_VERIFY373if ( ! hecurve_verify (u, v, f) ) {374err_printf ("hecurve_square: output verification failed for inputs:\n");375err_printf (" "); hecurve_print(u1,v1);376err_printf ("note that input has been modified if output overlapped.\n");377exit (0);378}379#endif380return 1;381382hecurve_g2_square_s1_zero:383/// dbg_printf ("Special case, s'1 == 0\n");384// 6'. compute s and precomputations385_ff_invert(w1,r);386ff_mult(s0,s0,w1);387_ff_mult(w2,s0,u1[0]);388_ff_addto(w2,v1[0]);389/// dbg_printf ("w1 = %Zd, w2 = %Zd, s0 = %Zd\n",_ff_wrap_mpz(w1,0), _ff_wrap_mpz(w2,1), _ff_wrap_mpz(s0,2));390391// 7'. compute u' = (f-v^2)/u^2 - 2vs/u - s^2 note h =0392_ff_square(w3,s0);393_ff_add(w4,u1[1],u1[1]);394_ff_addto(w3,w4);395_ff_neg(u[0],w3); // potentially destroys u1[0] and u2[0]396/// dbg_printf ("u = x + %Zd\n",_ff_wrap_mpz(u[0],0));397398// 8'. compute v' = -(su+v) mod u399_ff_sub(w1,u1[1],u[0]);400ff_mult(w1,w1,s0);401_ff_addto(w1,v1[1]);402ff_mult(w1,w1,u[0]);403_ff_sub(v[0],w1,w2);404_ff_set_zero(v[1]);405/// dbg_printf ("v = %Zd\n",_ff_wrap_mpz(v[0],0));406407_ff_set_one(u[1]); // set these last in case of overlap408_ff_set_zero(u[2]);409#if HECURVE_VERIFY410if ( ! hecurve_verify (u, v,f) ) {411err_printf ("hecurve_square output: verification failed for inputs:\n");412err_printf (" "); hecurve_print(u1,v1);413err_printf ("note that input has been modified if output overlapped.\n");414exit (0);415}416#endif417return 1;418}419420// Note that (u,v) == (u1,v1) or (u2,v2) (or both) are possible. We do assume that any overlap is aligned.421// Thus modifying u[1] shouldn't effect u1[0] or u2[0] but may destory u1[1] and/or u2[1]422//423// This algorithm handles all the unusual cases - it assumes that either u1 or u2 has degree < 2424// or that Resultant(u1,u2) = 0, otherwise hecurve_compose would have handled it425int hecurve_g2_compose_special (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t u2[3], ff_t v2[3], ff_t f[6])426{427_ff_t_declare_reg t1, t2, t3, r;428_ff_t_declare *swap;429int d1, d2, swapd, sts;430431#if ! HECURVE_FAST432dbg_printf ("Compose Special: (%Zdx^2+%Zdx+%Zd, %Zdx+%Zd) * (%Zdx^2+%Zdx+%Zd, %Zdx+%Zd)\n",433_ff_wrap_mpz(u1[2],0), _ff_wrap_mpz(u1[1],1), _ff_wrap_mpz(u1[0],2), _ff_wrap_mpz(v1[1],3), _ff_wrap_mpz(v1[0],4),434_ff_wrap_mpz(u2[2],5), _ff_wrap_mpz(u2[1],6), _ff_wrap_mpz(u2[0],7), _ff_wrap_mpz(v2[1],8), _ff_wrap_mpz(v2[0],9));435#endif436#if HECURVE_VERIFY437if ( ! hecurve_verify (u1, v1, f) ) { err_printf ("hecurve_compose_special: invalid input\n"); exit (0); }438if ( ! hecurve_verify (u2, v2, f) ) { err_printf ("hecurve_compose_special: invalid input\n"); exit (0); }439#endif440d1 = _deg_u(u1);441d2 = _deg_u(u2);442if ( d1 > d2 ) {443swap = u2; u2 = u1; u1 = swap;444swap = v2; v2 = v1; v1 = swap;445swapd = d2; d2 = d1; d1 = swapd;446/// dbg_printf ("swapped u1 = x + %Zd, v1 = %Zd.\n",_ff_wrap_mpz(u1[0],0), _ff_wrap_mpz(v1[0],1));447/// dbg_printf ("swapped u2 = x^2 + %Zdx + %Zd, v2 = %Zdx + %Zd.\n",448/// _ff_wrap_mpz(u2[1],0), _ff_wrap_mpz(u2[0],1), _ff_wrap_mpz(v2[1],2), _ff_wrap_mpz(v2[0],3));449}450switch(d1){451case 0: // deg u1 == 0, i.e. u1 is the identity452/// dbg_printf ("Case 1, deg(u1) == 0\n");453_hecurve_set (u, v, u2, v2);454break;455case 1: // deg u1 == 1456// case 2457/// dbg_printf ("Case 2, deg(u1) == 1\n");458if ( d2 == 1 ) { // deg u1 == dege u2 == 1459/// dbg_printf ("Case 2.A, deg(u1) == deg(u2) == 1\n");460// case 2.A461if ( _ff_equal(u1[0],u2[0]) && _ff_equal(u1[1],u2[1]) ) {462if ( _ff_zero(v1[0]) && _ff_zero(v2[0]) ) {463/// dbg_printf ("Case 2.A.i.a D1 == -D2 == D1\n");464_hecurve_set_identity(u,v);465break;466}467_ff_neg(t1,v2[0]);468if ( _ff_equal(v1[0],t1) ) {469/// dbg_printf ("Case 2.A.i.b D1 == -D2\n");470_hecurve_set_identity(u,v);471break;472}473if ( _ff_equal(v1[0],v2[0]) ) {474/// dbg_printf ("Case 2.A.ii D1 == D2\n");475hecurve_g2_square_1 (u, v, u1[0], v1[0], f);476break;477}478}479/// dbg_printf ("Case 2.A.iii D1 != +- D2\n");480if ( _ff_equal(u1[0],u2[0]) ) { err_printf ("u1[0] == u2[0] in case 2.A.iii of hecurve_compose!\n"); exit(0); }481482// u = u1*u2 and v = ((v2-v1)x+v2u1[0]-v1u2[0])/(u1[0]-u2[0]) note that v1 and v2 are constants (deg 0)483_ff_sub(t1,u1[0],u2[0]);484ff_invert(t1,t1);485_ff_sub(t2,v2[0],v1[0]);486_ff_mult(v[1],t2,t1); // we assume writing [1] doesn't hurt [0]487_ff_mult(t2,v2[0],u1[0]);488_ff_mult(t3,v1[0],u2[0]);489_ff_subfrom(t2,t3);490_ff_mult(v[0],t2,t1);491_ff_set_one(u[2]); // update u last to handle overlap492_ff_add(u[1],u1[0],u2[0]); // we assume writing [1] doesn't hurt [0]493ff_mult(u[0],u1[0],u2[0]); // mult should be safe494break;495} else { // deg u1 == 1, deg u2 == 2496/// dbg_printf ("Case 2.B, deg(u1) == 1, deg(u2) == 2\n");497// compute u2(-u1[0]) = (-u1[0])(-u1[0] + u2[1]) + u2[0]498_ff_neg(t1,u1[0]); // save t1 = -u1[0] for later499_ff_add(t2,t1,u2[1]);500ff_mult(t2,t2,t1);501_ff_addto(t2,u2[0]);502if ( _ff_nonzero(t2) ) {503/// dbg_printf ("Case 2.B.i, u2(-u1[0]) != 0\n");504hecurve_g2_compose_1_2 (u, v, u1, v1, u2, v2, f);505break;506}507/// dbg_printf ("Case 2.B.ii\n");508_ff_add(t2,u1[0],u1[0]);509if ( _ff_equal(u2[1],t2) ) {510_ff_square(t2,u1[0]);511if ( _ff_equal(u2[0],t2) ) {512/// dbg_printf ("Case 2.B.ii.a, u2[1] == 2u1[0] and u2[0] == u1[0]^2\n");513// compute v2(-u1[0])514_ff_mult(t2,v2[1],t1);515_ff_addto(t2,v2[0]);516if ( _ff_equal(t2,v1[0]) ) {517_ff_t_declare_dynamic u3[3], v3[2], v4[2];518519// this is expensive, but this is a special case - may optimize later to handle recursion more elegantly520_ff_init(u3[0]); _ff_init(u3[1]); _ff_init(u3[2]);521_ff_init(v3[0]); _ff_init(v3[1]); _ff_init(v4[0]); _ff_init(v4[1]);522523/// dbg_printf ("Case 2.B.ii.a.I D2 = 2D1\n");524// Double D2 then subtract D1525// this is slightly inefficient but easier and is a rare case anyway526hecurve_square (u3, v3, u2, v2, f, 0); // double D2527// Bug fix - need to avoid infinite recursion on order 3 elements, this occurs when 2D2 == -D2528if ( _ff_equal(u3[0],u2[0]) && _ff_equal(u3[1],u2[1]) && _ff_equal(u3[2],u2[2]) ) {529_ff_neg(t2,v2[0]);530_ff_neg(t3,v2[1]);531if ( _ff_equal(v3[0],t2) && _ff_equal(v3[1],t3) ) {532hecurve_compose_cantor (u, v, u1, v1, u2, v2, f);533break;534// err_printf ("Attempt to compute 2D+D for D with order 6 - unable to handle this case\n");535// exit (0);536}537}538/// dbg_printf ("2D2 != -2D1, computing 2D2-D1\n");539_ff_neg(v4[0],v1[0]);540_ff_neg(v4[1],v1[1]); // D4 = invert D1, note u4 = u1541hecurve_compose (u, v, u1, v4, u3, v3, f, 0); // compute D2+D2-D1542_ff_clear(u3[0]); _ff_clear(u3[1]); _ff_clear(u3[2]);543_ff_clear(v3[0]); _ff_clear(v3[1]); _ff_clear(v4[0]); _ff_clear(v4[1]);544} else {545/// dbg_printf ("Case 2.B.ii.a.II, D2 = -2P1\n");546_hecurve_set (u, v, u1, v1);547hecurve_invert (u, v); // compute inverse of D1548}549break;550}551} else {552/// dbg_printf ("Case 2.B.ii.b, u2[1] != 2u1[0] or u2[0] != u1[0]^2\n");553// compute -v2(-u1[0]) - this is a bug fix - p. 314/315 of handbook uses v2(-u1[0]) (or not)554_ff_mult(t3,v2[1],t1); // t1 = -u1[0]555_ff_addto(t3,v2[0]);556_ff_neg(t2,t3);557// we need to compute v2(u1[0]-u2[1]) either way558_ff_sub(t3,u1[0],u2[1]);559ff_mult(t3,t3,v2[1]);560_ff_addto(t3,v2[0]);561/// dbg_printf ("t3 = v2[1]*(u1[0]-u2[1]) + v2[0] = %Zd\n",_ff_wrap_mpz(t3,0));562if ( _ff_equal(t2,v1[0]) ) {563/// dbg_printf ("Case 2.B.ii.b.I, -P1 occurs in D2\n");564_ff_add(u[0],u2[1],t1); // t1 = -u1[0]565_ff_set_zero(u[2]); _ff_set_one(u[1]); // u[1] must get set here, could destory u2[1]566_ff_set_zero(v[1]);567_ff_set(v[0],t3);568} else {569_ff_t_declare_dynamic u3[3], v3[2], u4[3], v4[2];570571// this is expensive, but this is a special case - may optimize later to handle recursion more elegantly572_ff_init(u3[0]); _ff_init(u3[1]); _ff_init(u3[2]); _ff_init(v3[0]); _ff_init(v3[1]);573_ff_init(u4[0]); _ff_init(u4[1]); _ff_init(u4[2]); _ff_init(v4[0]); _ff_init(v4[1]);574575/// dbg_printf ("Case 2.B.ii.b.II, Doubling D1 and composing with (x+u21-u10, v2(u10-u21)),\n");576hecurve_g2_square_1 (u3, v3, u1[0], v1[0], f);577_ff_set_zero(u4[2]); _ff_set_one(u4[1]);578_ff_sub(u4[0],u2[1],u1[0]);579_ff_set_zero(v4[1]);580_ff_set(v4[0],t3);581hecurve_compose (u, v, u4, v4, u3, v3, f, 0);582_ff_clear(u3[0]); _ff_clear(u3[1]); _ff_clear(u3[2]); _ff_clear(v3[0]); _ff_clear(v3[1]);583_ff_clear(u4[0]); _ff_clear(u4[1]); _ff_clear(u4[2]); _ff_clear(v4[0]); _ff_clear(v4[1]);584}585}586}587break;588case 2: // deg u1 == deg u2 == 2 (case 3)589/// dbg_printf ("Case 3, deg(u1) == deg(u2) == 2\n");590if ( _ff_equal(u1[0],u2[0]) && _ff_equal(u1[1],u2[1]) ) {591/// dbg_printf ("Case 3.A, u1 == u2\n");592if ( _ff_equal(v1[1],v2[1]) && _ff_equal(v1[0],v2[0]) ) {593// catch inverse case for order 2 elements594if ( _ff_zero(v1[1]) && _ff_zero(v1[0]) ) {595/// dbg_printf ("Case 3.A.i*, v1 == -v2 == 0\n");596_hecurve_set_identity (u, v);597break;598}599/// dbg_printf ("Case 3.A.ii, v1 == v2\n");600hecurve_square (u, v, u1, v1, f, 0); // handles case 3.A.ii.b (zero resultant) also601break;602}603_ff_neg(t1,v2[0]);604_ff_neg(t2,v2[1]);605if ( _ff_equal(v1[0],t1) && _ff_equal(v1[1],t2) ) {606/// dbg_printf ("Case 3.A.i, v1 == -v2\n");607_hecurve_set_identity (u, v);608break;609}610/// dbg_printf ("Case 3.A.iii v1 != +- v2\n");611// case 3.A.iii612_ff_sub(t1,v2[1],v1[1]);613#if ! HECURVE_FAST614if ( _ff_zero(t1) ) { err_printf ("v2[1] == v1[1] in case 3.A.iii of hecurve_compose_special\n"); exit (0); }615#endif616_ff_invert(t2,t1);617_ff_sub(t1,v1[0],v2[0]);618ff_mult(t1,t1,t2);619_ff_neg(t2,t1);620_ff_mult(t3,v1[1],t1);621_ff_addto(t3,v1[0]);622hecurve_g2_square_1 (u, v, t2, t3, f);623} else { // u1 != u2624/// dbg_printf ("Case 3.B, u1 != u2\n");625// case 3.B626// We can assume Resultant(u1,u2) == 0, since otherwise we wouldn't have been called627_ff_t_declare_dynamic x2, y2, x3, y3;628629// this is expensive, but this is a special case - may optimize later to handle recursion more elegantly630_ff_init(x2); _ff_init(x3);631_ff_init(y2); _ff_init(y3);632633/// dbg_printf ("Case 3.B.ii, res(u1,u2) == 0\n");634// need to compute gcd(u1,u2) = x-x1 where x1 = (u1[1]-u2[1])/(u2[0]-u1[0]))635_ff_sub(t1,u1[1],u2[1]);636#if ! HECURVE_FAST637if ( _ff_zero(t1) ) { err_printf ("u1[1] == u2[1] in case 3.B.ii of hecurve_compose_special\n"); exit (0); }638#endif639_ff_invert(t2,t1);640_ff_sub(t1,u2[0],u1[0]);641_ff_mult(t3,t1,t2);642/// dbg_printf ("gcd(u1,u2) = x-%Zd\n", _ff_wrap_mpz(t3,0));643// compute v1(x1) and v2(x1) and compare644_ff_mult(t1,v1[1],t3);645_ff_addto(t1,v1[0]);646_ff_mult(t2,v2[1],t3);647_ff_addto(t2,v2[0]);648// Need to extract coords of P2 and P3 in either case, so compute them here649// This code could be cleaned up a bit - there is some double negation, but it is a special case...650_ff_add(y2,u1[1],t3);651_ff_neg(x2,y2);652_ff_mult(y2,v1[1],x2);653_ff_addto(y2,v1[0]);654_ff_add(y3,u2[1],t3);655_ff_neg(x3,y3);656_ff_mult(y3,v2[1],x3);657_ff_addto(y3,v2[0]);658/// dbg_printf ("P2 = (%Zd,%Zd), P3 = (%Zd,%Zd)\n", _ff_wrap_mpz(x2,0), _ff_wrap_mpz(y2,1), _ff_wrap_mpz(x3,2), _ff_wrap_mpz(y3,3));659if ( _ff_equal(t1,t2) ) {660_ff_t_declare_dynamic u3[3], v3[2], u4[3], v4[2], u5[3], v5[2];661662// this is expensive, but this is a special case - may optimize later to handle recursion more elegantly663_ff_init(u3[0]); _ff_init(u3[1]); _ff_init(u3[2]); _ff_init(v3[0]); _ff_init(v3[1]);664_ff_init(u4[0]); _ff_init(u4[1]); _ff_init(u4[2]); _ff_init(v4[0]); _ff_init(v4[1]);665_ff_init(u5[0]); _ff_init(u5[1]); _ff_init(u5[2]); _ff_init(v5[0]); _ff_init(v5[1]);666667/// dbg_printf ("Case 3.B.ii.a, v1(x1) == v2(x1)\n");668_ff_neg(t1,t3);669_ff_mult(t2,v1[1],t3);670_ff_add(t2,t2,v1[0]);671hecurve_g2_square_1 (u4, v4, t1, t2, f); // D' = 2(P1)672/// dbg_printf ("D' = (%Zdx^2 + %Zdx + %Zd, %Zdx + %Zd)\n",_ff_wrap_mpz(u4[2],0), _ff_wrap_mpz(u4[1],1), _ff_wrap_mpz(u4[0],2), _ff_wrap_mpz(v4[1],3), _ff_wrap_mpz(v4[0],4));673// could we use hecurve_compose_1_2 here?674_ff_set_zero(u3[2]); _ff_set_one(u3[1]);675_ff_neg(u3[0],x2);676_ff_set_zero(v3[1]);677_ff_set(v3[0],y2);678hecurve_compose (u5, v5, u3, v3, u4, v4, f, 0); // D'' = D' + (P2)679/// dbg_printf ("D'' = (%Zdx^2 + %Zdx + %Zd, %Zdx + %Zd)\n",_ff_wrap_mpz(u5[2],0), _ff_wrap_mpz(u5[1],1), _ff_wrap_mpz(u5[0],2), _ff_wrap_mpz(v5[1],3), _ff_wrap_mpz(v5[0],4));680_ff_neg(u3[0],x3);681_ff_set(v3[0],y3);682hecurve_compose (u, v, u3, v3, u5, v5, f, 0); // D = D'' + (P3)683_ff_clear(u3[0]); _ff_clear(u3[1]); _ff_clear(u3[2]); _ff_clear(v3[0]); _ff_clear(v3[1]);684_ff_clear(u4[0]); _ff_clear(u4[1]); _ff_clear(u4[2]); _ff_clear(v4[0]); _ff_clear(v4[1]);685_ff_clear(u5[0]); _ff_clear(u5[1]); _ff_clear(u5[2]); _ff_clear(v5[0]); _ff_clear(v5[1]);686} else {687/// dbg_printf ("Case 3.B.ii.a, v1(x1) != v2(x1)\n");688hecurve_make_2 (u, v, x2, y2, x3, y3);689}690_ff_clear(x2); _ff_clear(x3);691_ff_clear(y2); _ff_clear(y3);692}693break;694default:695err_printf ("Invalid degree in hecurve_compose!\n"); exit (0);696}697#if ! HECURVE_FAST698_dbg_print_uv ("Compose Special Result: ", u, v);699#endif700#if HECURVE_VERIFY701if ( ! hecurve_verify (u, v, f) ) {702err_printf ("hecurve_compose_special output verification failed for inputs:\n");703err_printf (" "); hecurve_print(u1,v1);704err_printf (" "); hecurve_print(u2,v2);705err_printf ("note that inputs have been modified if output overlapped.\n");706exit (0);707}708#endif709return 1;710}711712713// Algorithm 14.20 p.318714// As above, handle aligned overlap of (u,v) with (u1,v1) and/or (u2,v2)715void hecurve_g2_compose_1_2 (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t u2[3], ff_t v2[2], ff_t f[6])716{717_ff_t_declare_reg r, inv, t1, t2, w1, L0, L1, s0;718719/// dbg_printf ("Compose_1_2\n");720721// 1. compute r = u2 mod u1722_ff_sub(w1,u2[1],u1[0]);723ff_mult(w1,w1,u1[0]);724_ff_sub(r,u2[0],w1);725/// dbg_printf ("r = %Zd\n",_ff_wrap_mpz(r,0));726727// 2. compute inverse of u2 mod u1728_ff_invert(inv, r);729/// dbg_printf ("inv = %Zd\n",_ff_wrap_mpz(inv,0));730731// 3. compute s = ((v1-v2)inv) mod u1732_ff_mult(w1,v2[1],u1[0]); // BUG FIX - handbook incorrectly uses -v2[1]u1[0] - should be positive733_ff_addto(w1,v1[0]);734_ff_subfrom(w1,v2[0]);735_ff_mult(s0,w1,inv);736/// dbg_printf ("s = %Zd\n", _ff_wrap_mpz(s0,0));737738// 4. compute L = su2 = s0*x^2 + L1*x + L0739_ff_mult(L1,s0,u2[1]);740_ff_mult(L0,s0,u2[0]);741// no reduction required for L coeffs742/// dbg_printf ("L = su2 = %Zdx^2 + %Zdx + %Zd\n",_ff_wrap_mpz(s0,0), _ff_wrap_mpz(L1,1), _ff_wrap_mpz(L0,2));743744// 5. compute t = (f - v2^2)/u2 = x^3 + t2*x2 + t1*x + t0 note h = 0745_ff_neg(t2,u2[1]);746_ff_square(t1,u2[1]);747_ff_subfrom(t1,u2[0]);748if ( _ff_nonzero(f[3]) ) _ff_addto(t1,f[3]);749/// dbg_printf ("t = x^3 + %Zdx^2 + %Zdx + t0\n",_ff_wrap_mpz(t2,0), _ff_wrap_mpz(t1,1));750751// 6. compute u = (t-s(L+2v2))/u1 = x^2 + u[1]*x + u[0] note h = 0752_ff_set_one(u[2]);753_ff_square(r,s0);754_ff_sub(w1,t2,r);755_ff_sub(u[1],w1,u1[0]); // ther should be no overlap - potentially destroys u1[1] and u2[1]756_ff_add(w1,v2[1],v2[1]);757_ff_addto(w1,L1);758ff_mult(w1,w1,s0);759_ff_sub(r,t1,w1);760_ff_mult(t2,u1[0],u[1]);761_ff_sub(u[0],r,t2); // potentially destroys u1[0] and u2[0]762/// dbg_printf ("u = x^2 + %Zdx + %Zd\n",_ff_wrap_mpz(u[1],0), _ff_wrap_mpz(u[0],1));763764// 7. compute v = (-(L+v2)) mod u = v[1]x + v[0] note h = 0765_ff_mult(w1,s0,u[1]);766_ff_subfrom(w1,L1);767_ff_subfrom(w1,v2[1]); // v2[1] could overlap v[1]768_ff_set (v[1],w1);769_ff_mult(w1,s0,u[0]);770_ff_subfrom(w1,L0);771_ff_subfrom(w1,v2[0]); // v2[0] could overlap v[0]772_ff_set(v[0],w1);773/// dbg_printf ("v = %Zdx + %Zd\n",_ff_wrap_mpz(v[1],0), _ff_wrap_mpz(v[0],1));774}775776777// This code would benefit from precomputing f' - todo later778// Note that overlap of u[0] and u0 (and v[0] and v0) is possible!779void hecurve_g2_square_1 (ff_t u[3], ff_t v[2], ff_t u0, ff_t v0, ff_t f[6])780{781_ff_t_declare_reg x, y, t1, t2, t3;782783if ( _ff_zero(v0) ) { _hecurve_set_identity (u, v); return; }784// The code below corresponds to (14.9) on p. 314 of Handbook of E+HE Crypto785/// dbg_printf ("Square_1 (x + %Zd,%Zd)\n",_ff_wrap_mpz(u0,0),_ff_wrap_mpz(v0,0));786// u = u1^2787_ff_set (t3, u0); // save t3=u0 since we need it later and may overwrite it here.788_ff_set_one(u[2]);789_ff_add(u[1],t3,t3);790_ff_square(u[0],t3);791/// dbg_printf ("u = x^2 + %Zdx + %Zd\n",_ff_wrap_mpz(u[1],0), _ff_wrap_mpz(u[0],1));792// v = (f'(-u1[0])x + f'(-u1[0])u1[0])/(2v1) + v1793// Compute y = f'(-u1[0]) - note that this code assumes that 5* max coeff of f fits in an ff_t794_ff_neg(x,t3);795_ff_set(y,f[1]);796if ( _ff_nonzero(f[2]) ) {797_ff_add(t2,f[2],f[2]);798ff_mult(t2,t2,x);799_ff_addto(y,t2);800}801_ff_square(t1,x);802if ( _ff_nonzero(f[3]) ) {803_ff_add(t2,f[3],f[3]);804_ff_addto(t2,f[3]);805ff_mult(t2,t2,t1);806_ff_addto(y,t2);807}808#if FF_WORDS == 1809if ( _ff_p != 5 ) {810#endif811ff_square(t1,t1);812_ff_add(t2,t1,t1);813_ff_x2(t2);814_ff_addto(t2,t1);815_ff_addto(y,t2);816#if FF_WORDS == 1817}818#endif819/// dbg_printf ("f'(-u0) = %Zd\n",_ff_wrap_mpz(y,0));820_ff_add(t2,v0,v0);821_ff_invert(t1,t2);822_ff_mult(v[1],t1,y);823_ff_mult(t1,v[1],t3);824_ff_add(v[0],t1,v0);825/// dbg_printf ("v = %Zdx + %Zd\n",_ff_wrap_mpz(v[1],0), _ff_wrap_mpz(v[0],0));826}827828/*829This algorithm handles squaring a degree 2 element where Resultant(2v,u) = 0.830In this case the divisor contains a point with order 2 and we simply want to square the other point.831This corresponds to case 3.A.ii.b in the general algorithm, but may also be detected in hecurve_square832*/833void hecurve_g2_square_2_r0 (ff_t u[3], ff_t v[2], ff_t u1[3], ff_t v1[2], ff_t f[6])834{835_ff_t_declare_reg t1, t2;836837// Since h = 0, we simply need gcd(2v1, u1) which must be (x+v1[0]/v1[1]) hence x1 = -v1[0]/v1[1] is the x-coord of P1838// We want to then double P2 = (-u1[1]-x1,v1(-u1[1]-x1)) see p. 315 of the handbook839if ( _ff_zero(v1[1]) ) {840if ( _ff_zero(v1[0]) ) { _hecurve_set_identity (u, v); return; }841// this should be impossible if Results(u,2v) = 0842err_printf ("v1[1] = 0 and v1[0] != 0 in hecurve_square_2_r0!");843exit (0);844}845_ff_invert(t1,v1[1]);846_ff_neg(t2,v1[0]);847ff_mult(t1,t1,t2);848_ff_addto(t1,u1[1]);849_ff_neg(t2,t1);850ff_mult(t2,t2,v1[1]);851_ff_addto(t2,v1[0]);852hecurve_g2_square_1 (u, v, t1, t2, f);853}854855856#endif857858859