#include <stdlib.h>
#include <stdio.h>
#include "ff.h"
ff_t _ff_t1;
ff_t _ff_p;
ff_t _ff_2g;
ff_t _ff_2gi;
ff_t _ff_3g;
ff_t _ff_negone;
ff_t _ff_half;
ff_t _ff_third;
static unsigned long mod256itab[256];
static ff_t __ff_mont_itab[FF_ITAB_SIZE];
static ff_t _ff_2exp;
unsigned long _ff_p2_m;
int _ff_p2_e;
unsigned long _ff_p3_m;
int _ff_p3_m1mod3;
int _ff_p3_e;
ff_t _ff_2Sylow_tab[64][2];
static ff_t _ff_3Sylow_tab[42][2];
ff_t _ff_cbrt_unity;
int _ff_sqrt_setup;
int _ff_cbrt_setup;
int _ff_p1mod3;
ff_t *_ff_mont_itab = __ff_mont_itab;
ff_t _ff_mont_R;
ff_t _ff_mont_R2;
unsigned long _ff_mont_pni;
void ff_montgomery_setup (int ring);
int ff_ui_legendre (unsigned long a, unsigned long b);
void ff_ext_setup(void);
int _ff_nr35_tab[35] = { 0, 0, 5, 5, 0, 7, 7, 5, 5, 0, 7, 0, 5, 5, 0, 0, 0, 5, 5, 7, 7, 0, 5, 5, 7, 0, 7, 5, 5, 0, 0, 7, 5, 5, 7};
int _ff_nric35_tab[35] = { 0, 0, 2, 3, 0, 4, 1, 2, 3, 0, 2, 0, 2, 3, 0, 0, 0, 2, 3, 4, 1, 0, 2, 3, 2, 0, 4, 2, 3, 0, 0, 2, 2, 3, 1};
void ff_setup_ui (unsigned long p)
{
static int init = 0;
unsigned long n;
ff_t t;
#if FF_WORDS != 1
err_printf ("ff_setup_ui only supports single-word Montgomery or native representation\n");
exit (0);
#endif
#if FF_MONTGOMERY
if ( ! init ) { for ( n = 1 ; n < 256 ; n+= 2 ) mod256itab[n] = ff_ui_inverse(n,256); init = 1; }
#endif
if ( p && _ff_p == p ) return;
if ( p > (1UL<<FF_BITS) ) { printf ("ff_setup: p=%lu is too large, FF_BITS = %d\n", p, FF_BITS); exit (0); }
if ( p < 3 ) { printf ("ff_setup: p=%lu must be an odd prime\n", p); exit (0); }
_ff_p = p;
#if FF_MONTGOMERY
ff_montgomery_setup (0);
#endif
_ff_p2_m = (_ff_p-1)/2;
_ff_set_ui (_ff_half, _ff_p2_m+1);
_ff_set_zero(_ff_negone); _ff_dec(_ff_negone);
for ( _ff_p2_e = 1 ; !(_ff_p2_m&0x1) ; _ff_p2_m>>=1, _ff_p2_e++ );
_ff_sqrt_setup = _ff_cbrt_setup = 0;
_ff_p1mod3 = ((p%3)==1);
n = (_ff_p1mod3? (2*p+1UL)/3UL : (p==3?0:(p+1UL)/3UL));
_ff_set_ui(_ff_third,n);
if ( _ff_p1mod3 ) {
_ff_p3_m = (_ff_p-1)/3;
for ( _ff_p3_e = 1 ; !(_ff_p3_m%3) ; _ff_p3_m /= 3, _ff_p3_e++ );
_ff_p3_m1mod3 = ((_ff_p3_m%3)==1);
} else {
_ff_p3_m = (_ff_p+1)/3;
for ( _ff_p3_e = 1 ; !(_ff_p3_m%3) ; _ff_p3_m /= 3, _ff_p3_e++ );
_ff_p3_m1mod3 = ((_ff_p3_m%3)==1);
}
_ff_2exp = _ff_2g = _ff_3g = 0;
ff_ext_setup();
return;
}
#if FF_WORDS == 1
#define _ff_raw_set_mpz(z,X) ((z) = mpz_get_ui(X))
#define _ff_raw_get_mpz(Z,x) (mpz_set_ui(Z,x),Z)
#endif
#if FF_WORDS == 1 && FF_MONTGOMERY
void ff_montgomery_setup (int ring)
{
register unsigned long p, t;
register int i;
p = _ff_p;
t = mod256itab[p&0xFF];
t = (2*t + (-(p*t*t)))&0xFFFF;
t = (2*t + (-(p*t*t)))&0xFFFFFFFF;
#if FF_HALF_WORDS == 1
_ff_mont_pni = (-t)&0xFFFFFFFF;
t = (1UL<<FF_MONTGOMERY_RBITS)%p;
_ff_mont_R = (ff_t)t;
t = (t*t)%p;
_ff_mont_R2 = (ff_t)t;
#else
t = (2*t + (-(p*t*t)))&0xFFFFFFFFFFFFFFFF;
_ff_mont_pni = -t;
_ff_mont_R = (1UL<<(FF_MONTGOMERY_RBITS-1))%p;
_ff_mont_R += _ff_mont_R;
if ( _ff_mont_R > p ) _ff_mont_R -= p;
_ff_mont_R2 = _ff_mont_R;
for ( i = 0 ; i < FF_MONTGOMERY_RBITS ; i++ ) {
_ff_mont_R2 += _ff_mont_R2;
if ( _ff_mont_R2 > p ) _ff_mont_R2 -= p;
}
#endif
if ( ring ) return;
t = 1;
for ( i = 0 ; i <= 3*FF_MONTGOMERY_RBITS ; i++ ) {
_ff_mont_itab[i] = t;
t += t;
if ( t >= _ff_p ) t -= _ff_p;
}
}
unsigned long ff_montgomery1_invert (unsigned long x)
{
register unsigned long r, s, t, v;
register int k;
if ( _ff_zero(x) ) { printf ("ff_invert: attempt to invert 0,\n");exit(0); return 0; }
r = x; s = 1; t = _ff_p; v = 0; k = 0;
while ( r > 0 ) {
if ( !(t&0x1UL) ) {
t >>= 1; s <<= 1;
} else if ( !(r&0x1UL) ) {
r >>= 1; v <<= 1;
} else if ( t > r ) {
t = (t-r)>>1; v += s; s <<= 1;
} else {
r = (r-t)>>1; s += v; v <<= 1;
}
k++;
}
if ( v >= _ff_p ) v -= _ff_p;
v = _ff_p - v;
v = ff_montgomery1_mult (v, _ff_mont_itab[3*FF_MONTGOMERY_RBITS-k]);
#if ! FF_FAST
if ( ff_montgomery1_mult (v,x) != _ff_mont_R ) {printf ("ff_montgomery1_invert failed, %lu*%lu != 1, raw input: %lu raw output %lu\n", _ff_get_ui(x), _ff_get_ui(v), x, v); exit (0); }
#endif
return v;
}
#endif
void ff_parallel_invert (ff_t z[], ff_t x[], unsigned n)
{
ff_t c[FF_MAX_PARALLEL_INVERTS];
register ff_t u, v;
register unsigned i;
if ( ! n ) return;
if ( n > FF_MAX_PARALLEL_INVERTS ) { printf ("Exceeded FF_MAX_INVERTS, %d > %d\n", n, FF_MAX_PARALLEL_INVERTS); exit (0); }
_ff_set (c[0], x[0]);
for ( i = 1 ; i < n ; i++ ) _ff_mult (c[i], c[i-1], x[i]);
_ff_invert (u, c[n-1]);
for ( i = n-1 ; i > 0 ; i-- ) {
_ff_mult (v, c[i-1], u);
_ff_mult (u, u, x[i]);
_ff_set (z[i], v);
}
_ff_set (z[0], u);
}
static inline void ff_setup_2exp(void) { ff_t t; if ( _ff_2exp ) return; _ff_set_one(t); _ff_x2(t); ff_exp_ui (&_ff_2exp,&t,(_ff_p+3)/4-1); }
void ff_setup_2g (void)
{
ff_t t;
register int i,n;
if ( _ff_2g ) return;
if ( _ff_p2_e == 1 ) { _ff_set(_ff_2g,_ff_negone); _ff_set(_ff_2gi,_ff_negone); _ff_set(_ff_2Sylow_tab[0][0],_ff_negone); return; }
_ff_set_zero(t);
if ( ! _ff_p1mod3 ) { _ff_set_i (t,-3); goto SylowSetup; }
if ( (_ff_p&0x7UL) == 5 ) { _ff_set_ui (t, 2); goto SylowSetup; }
n = _ff_nr35_tab[_ff_p%35];
if ( n ) { _ff_set_ui (t, n); goto SylowSetup; }
for ( n = 11 ; ; n++ ) if ( ff_ui_legendre (n, _ff_p) < 0 ) break;
_ff_set_ui(t,n);
SylowSetup:
ff_exp_ui (&_ff_2g, &t, _ff_p2_m);
_ff_set(_ff_2Sylow_tab[0][0], _ff_2g);
_ff_set(_ff_2gi,_ff_2g);
for ( i = 1 ; i < _ff_p2_e-1 ; i++ ) { _ff_square(_ff_2Sylow_tab[i][0],_ff_2Sylow_tab[i-1][0]); ff_mult(_ff_2gi,_ff_2gi,_ff_2Sylow_tab[i][0]); }
_ff_set(_ff_2Sylow_tab[i][0],_ff_negone);
ff_negate(_ff_2gi);
for ( i = 0 ; i < _ff_p2_e-1 ; i++ ) _ff_mult(_ff_2Sylow_tab[i][1], _ff_2Sylow_tab[i][0], _ff_2Sylow_tab[i+1][0]);
_ff_set(_ff_2Sylow_tab[i][1],_ff_negone);
}
int ff_invsqrt (ff_t o[1], ff_t a[1], int ext)
{
ff_t b, x, y;
int sts;
if ( _ff_zero(a[0]) ) { ff_setup_2g(); _ff_set_zero (o[0]); return 1; }
ff_exp_ui (&x, a, (_ff_p2_m-1)/2);
ff_mult (b, a[0], x);
ff_mult (b, b, x);
sts = ff_2Sylow_invsqrt(&y,&b, ext);
if ( ! ext && ! sts ) return 0;
ff_mult (x, x, y);
#if ! FF_FAST
_ff_square (y, x);
if ( ! sts ) ff_mult(y,y,_ff_2g);
ff_mult(y,y,a[0]);
if ( ! _ff_one(y) ) {
printf ("p=%lu, a=%lu, b=%lu, 2g=%lu, 2gi=%lu\n", _ff_p, _ff_get_ui(a[0]), _ff_get_ui(b), _ff_get_ui(_ff_2g), _ff_get_ui(_ff_2gi));
if ( sts ) {
printf ("ff_sqrt failed, %lu^2 != 1/%lu\n", _ff_get_ui(x), _ff_get_ui(a[0])); exit (0);
} else {
printf ("ff_sqrt failed, %lu^2*%lu != 1/%lu\n", _ff_get_ui(x), _ff_get_ui(_ff_2g), _ff_get_ui(a[0])); exit (0);
}
}
#endif
_ff_set (o[0], x);
return sts;
}
int _ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext)
{
register ff_t b, q, q1, q2, t, w1, w2;
register int i, j, k, s;
int sts;
_ff_set (w1, _ff_2Sylow_tab[_ff_p2_e-2][0]);
_ff_set (w2, _ff_2Sylow_tab[_ff_p2_e-2][1]);
_ff_set_one (t);
_ff_set (b,a[0]);
sts = 1;
for(;;) {
_ff_set (q, b);
for ( s = 2 ; s<_ff_p2_e+1 ; s++ ) {
j=1;
if ( _ff_equal(q,w1) ) break;
j=0;
if ( _ff_equal(q,w2) ) break;
ff_square (q,q);
}
k = _ff_p2_e-s;
#if ! FF_FAST
if ( k < 0 ) { printf ("Unexpected result: k<0 in ff_2Sylow_invsqrt?! a = %lu, p = %lu\n", _ff_get_ui(a[0]), _ff_p); exit (1); }
#endif
if ( !k ) {
if ( ! ext ) return 0;
sts=0;
if ( j ) {
ff_mult(b,b,_ff_2gi);
ff_mult(t,t,_ff_2gi);
} else {
ff_mult(b,b,_ff_2g);
}
} else {
ff_mult (b, b, _ff_2Sylow_tab[k][j]);
ff_mult (t, t, _ff_2Sylow_tab[k-1][j]);
}
if ( _ff_equal(b,_ff_negone) ) { ff_mult(t, t, w1); break; }
if ( _ff_one(b) ) break;
ff_mult(q2,q2,_ff_2Sylow_tab[_ff_p2_e-4][j]);
if ( _ff_one(q2) ) continue;
if ( _ff_equal(q2,_ff_negone) ) {
_ff_mult(b,b,_ff_2Sylow_tab[k+3][0]);
_ff_mult(t,t,_ff_2Sylow_tab[k+2][0]);
} else {
if ( _ff_equal(q2,w1) ) j = 1; else j = 0;
_ff_mult(b,b,_ff_2Sylow_tab[k+2][j]);
_ff_mult(t,t,_ff_2Sylow_tab[k+1][j]);
}
if ( _ff_equal(b,_ff_negone) ) { ff_mult(t, t, w1); break; }
if ( _ff_one(b) ) break;
}
check:
#if ! FF_FAST
_ff_square (b, t);
if ( ! sts ) ff_mult(b,b,_ff_2g);
ff_mult(b,b,a[0]);
if ( ! _ff_one(b) ) {
printf ("a=%lu, p=%lu\n", _ff_get_ui(a[0]), _ff_p);
if ( sts ) {
printf ("ff_2Sylow_invsqrt failed, %lu^2 * %lu != 1\n", _ff_get_ui(t), _ff_get_ui(a[0]));
} else {
printf ("ff_2Sylow_invsqrt failed, %lu^2 * %lu * %lu != 1\n", _ff_get_ui(t), _ff_get_ui(_ff_2g), _ff_get_ui(a[0]));
}
exit (0);
}
#endif
_ff_set(o[0],t);
return sts;
}
int ff_2Sylow_dlog (ff_t a, ff_t b, int e)
{
register ff_t x0, x1,y;
register int d, i, k0, k1;
ff_t t;
if ( _ff_one(b) ) return 0;
if ( _ff_equal(b,_ff_negone) ) return (1<<(e-1));
if ( e < 2 ) return -1;
if ( e == 2 ) {
if ( _ff_equal (b,a) ) return 1;
_ff_mult(y,a,b);
if ( _ff_one(y) ) return 3;
return -1;
}
d = e/2;
_ff_set(x0,a); _ff_set(y,b);
for ( i = 0 ; i < e-d ; i++ ) {
if ( i == d ) _ff_set(x1,x0);
ff_square(x0,x0);
ff_square(y,y);
}
if ( i == d ) _ff_set(x1,x0);
k0 = ff_2Sylow_dlog (x0,y,d);
if ( k0 < 0 ) return -1;
k1 = (1<<e)-k0;
ff_exp_ui(&t, &a, k1);
_ff_mult(y,b,t);
k1 = ff_2Sylow_dlog(x1,y,e-d);
if ( k1 < 0 ) return -1;
return (1<<d)*k1+k0;
}
void _ff_setup_3g (void)
{
register int i;
register unsigned long n;
ff_t r,s,t;
if ( _ff_3g || ! _ff_p1mod3 ) return;
_ff_cbrt_setup = 1;
n = (_ff_p-1)/(3*_ff_p3_m);
_ff_set_one(t);
_ff_x2(t);
for(;;) {
ff_exp_ui (&r,&t, _ff_p3_m);
if ( _ff_one(r) ) { _ff_inc(t); continue; }
if ( _ff_p3_e == 1 ) break;
ff_exp_ui (&s, &r, n);
if ( ! _ff_one(s) ) break;
_ff_inc(t);
}
_ff_set (_ff_3g, r);
_ff_set (_ff_3Sylow_tab[0][0], r);
_ff_square (_ff_3Sylow_tab[0][1],_ff_3Sylow_tab[0][0]);
for ( i = 1 ; i < _ff_p3_e ; i++ ) {
_ff_mult(_ff_3Sylow_tab[i][0],_ff_3Sylow_tab[i-1][0],_ff_3Sylow_tab[i-1][1]);
_ff_square(_ff_3Sylow_tab[i][1], _ff_3Sylow_tab[i][0]);
}
_ff_set(_ff_cbrt_unity,_ff_3Sylow_tab[_ff_p3_e-1][0]);
}
int ff_cbrt (ff_t o[1], ff_t a[1])
{
ff_t b, x, y;
ff_setup_3g();
if ( _ff_zero(a[0]) ) { _ff_set_zero (o[0]); return 1; }
if ( ! _ff_p1mod3 ) {
if ( _ff_p == 3 ) {_ff_set(o[0],a[0]); return 1; }
ff_exp_ui (o, a, (2*_ff_p-1)/3);
return 1;
}
if ( _ff_p3_m1mod3 ) {
ff_exp_ui (&x, a, (_ff_p3_m-1)/3);
_ff_square (y, x); _ff_mult (b, x, y);
ff_mult (b, b, a[0]);
} else {
ff_exp_ui (&x, a, (_ff_p3_m-2)/3);
_ff_square (y, x); _ff_mult (b, x, y);
_ff_square (y,a[0]); ff_mult (b, b, y);
}
if ( _ff_equal(b,_ff_3Sylow_tab[0][0]) || _ff_equal(b,_ff_3Sylow_tab[0][1]) ) return 0;
if ( ! ff_3Sylow_invcbrt(&y,&b) ) return 0;
if ( _ff_p3_m1mod3 ) { ff_square(x,x); ff_square(y,y); }
ff_mult (x, x, a[0]); ff_mult(x,x,y);
#if ! FF_FAST
_ff_square (y, x); ff_mult(y,y,x);
if ( ! _ff_equal(y,a[0]) ) { printf ("ff_cbrt failed, %lu^3 = %lu != %lu\n", _ff_get_ui(x), _ff_get_ui(y), _ff_get_ui(a[0])); exit (0); }
#endif
_ff_set (o[0], x);
return 1;
}
int ff_3Sylow_invcbrt (ff_t o[1], ff_t a[1])
{
register ff_t b, q, q1, t, w1, w2;
register int i, j, k, s;
ff_setup_3g();
if ( _ff_one(a[0]) ) { _ff_set_one(o[0]); return 1; }
if ( _ff_p3_e == 1 ) return 0;
_ff_set (w1, _ff_3Sylow_tab[_ff_p3_e-1][0]);
_ff_set (w2, _ff_3Sylow_tab[_ff_p3_e-1][1]);
_ff_set_one (t);
_ff_set (b,a[0]);
do {
_ff_set (q, b);
for ( s = 1 ; s < _ff_p3_e+1 ; s++ ) {
j=1;
if ( _ff_equal(q,w1) ) break;
j=0;
if ( _ff_equal(q,w2) ) break;
_ff_set(q1,q);
ff_square (q,q); ff_mult(q,q,q1);
}
k = _ff_p3_e-s;
#if ! FF_FAST
if ( k < 0 ) { printf ("Unexpected result: k<0 in ff_3Sylow_invsqrt?! a = %lu, p = %lu\n", _ff_get_ui(a[0]), _ff_p); exit (1); }
#endif
if ( k <= 0 ) return 0;
ff_mult (b, b, _ff_3Sylow_tab[k][j]);
ff_mult (t, t, _ff_3Sylow_tab[k-1][j]);
} while ( !_ff_one(b) );
#if ! FF_FAST
_ff_square (b, t); ff_mult(b,b,t);
ff_mult(b,b,a[0]);
if ( ! _ff_one(b) ) { printf ("ff_3Sylow_invcbrt failed, %lu^3 * %lu != 1\n", _ff_get_ui(t), _ff_get_ui(a[0])); exit (0); }
#endif
_ff_set(o[0],t);
return 1;
}
void ff_exp_ui (ff_t o[1], ff_t a[1], unsigned long e)
{
register int i, j;
register ff_t c;
register unsigned long m;
ff_t b[4];
if ( ! e ) { _ff_set_one (o[0]); return; }
i = _asm_highbit(e);
if ( i&1 ) i--;
m = 3UL<<i;
_ff_set (b[1], a[0]);
_ff_square (b[2],b[1]);
_ff_mult(b[3],b[2],b[1]);
_ff_set (c, b[(m&e)>>i]);
for ( m>>=2,i-=2 ; m ; m>>=2, i-=2 ) {
ff_square(c,c); ff_square(c,c);
j = (m&e)>>i;
if ( j ) ff_mult(c,c,b[j]);
}
_ff_set (o[0], c);
}
unsigned long ff_ui_inverse (unsigned long a, unsigned long m)
{
register unsigned long q, r, r0, r1;
register long t, t0, t1;
if ( a >= m ) a = a%m;
if ( a == 0 ) return 0;
t0 = 0; t1 = 1;
r0 = m; r1 = a;
while ( r1 > 0 ) {
q = r0 / r1;
r = r0 - q*r1;
r0 = r1; r1 = r;
t = t0 - q*t1;
t0 = t1; t1 = t;
}
if ( r0 > 1 ) return 0;
if ( t0 < 0 ) return m - ((unsigned long)(-t0));
return (unsigned long)t0;
}
int ff_ui_legendre (unsigned long a, unsigned long b)
{
register unsigned long r;
register int k, v;
#if ! FF_FAST
if ( ! (b&0x1) ) { printf ("ff_ui_Legendre, b = %lu must be odd!\n", b); exit (0); }
#endif
k = 1;
while ( a ) {
for ( v = 0 ; ! (a&0x1) ; v++ ) a >>= 1;
if ( v&0x1 ) if ( (b&0x7) ==3 || (b&0x7) ==5 ) k = -k;
if ( (a&0x3) == 3 && (b&0x3) == 3 ) k = -k;
r = a; a = b % r; b = r;
}
return ( b == 1 ? k : 0 );
}
int ff_fast_sqrt (ff_t o[1], ff_t a[1])
{
register ff_t i;
ff_t temp;
register int k;
#if FF_WORDS > 1
return 0;
#endif
if ( _ff_zero(a[0]) ) { _ff_set_zero(o[0]); return 1; }
if ( (_ff_p&3)==3 ) {
ff_exp_ui (&temp, a, (_ff_p+1)>>2);
} else if ((_ff_p&7)==5 ) {
ff_exp_ui (&temp, a, (_ff_p+3)>>3);
_ff_square (i,temp);
if ( _ff_equal (i, a[0]) ) {
_ff_set(o[0], temp);
return 1;
}
ff_setup_2exp();
ff_mult(temp, temp, _ff_2exp);
} else {
return 0;
}
_ff_square(i,temp);
if ( ! _ff_equal(i,a[0]) ) return 0;
_ff_set(o[0],temp);
return 1;
}