#ifndef _FF_INCLUDE_
#define _FF_INCLUDE_
#include "asm.h"
#include "smalljac.h"
#define ULONG_BITS (8*sizeof(unsigned long))
#define FF_FAST 1
#define FF_MAX_PARALLEL_INVERTS 2048
#define FF_WORDS 1
#define FF_HALF_WORDS (SMALLJAC_FF_64BITS?2:1)
#define FF_MONTGOMERY 1
#define FF_NAIL_BITS 1
#define FF_BITS ((FF_HALF_WORDS*ULONG_BITS/2) - FF_NAIL_BITS)
#define FF_MONTGOMERY_RBITS (FF_HALF_WORDS*ULONG_BITS/2)
#define FF_ITAB_SIZE (3*FF_MONTGOMERY_RBITS+1)
#if FF_MONTGOMERY && FF_HALF_WORDS == 1
typedef unsigned ff_t;
#else
typedef unsigned long ff_t;
#endif
extern ff_t _ff_t1;
extern ff_t _ff_p;
extern ff_t _ff_2g;
extern ff_t _ff_2gi;
extern ff_t _ff_2Sylow_tab[64][2];
extern ff_t _ff_3g;
extern ff_t _ff_half;
extern ff_t _ff_third;
extern ff_t _ff_negone;
extern int _ff_p2_e;
extern unsigned long _ff_p2_m;
extern int _ff_p3_e;
extern unsigned long _ff_p3_m;
extern int _ff_p3_m1mod3;
extern ff_t *_ff_mont_itab;
extern ff_t _ff_mont_R;
extern ff_t _ff_mont_R2;
extern unsigned long _ff_mont_pni;
extern int _ff_p1mod3;
extern int _ff_cbrt_setup;
extern ff_t _ff_cbrt_unity;
void ff_setup_ui (unsigned long p);
#define _ff_sprint(s,x) sprintf(s,"%lu", _ff_get_ui(x))
#define _ff_set_raw(z,x) ((z) = (x))
#if FF_MONTGOMERY
#define _ff_set_ui(z,x) ((z) = ff_montgomery1_mult(((x)%_ff_p), _ff_mont_R2));
#define _ff_rset_ui(z,x) ((z) = ff_montgomery1_mult((x), _ff_mont_R2))
#define _ff_get_ui(x) (ff_montgomery1_mult((x),1UL))
#else
#define _ff_set_ui(z,x) ((z) = ((x)%_ff_p))
#define _ff_rset_ui(z,x) ((z) = (x))
#define _ff_get_ui(x) (x)
#endif
#define _ff_get_i(x) ((long)_ff_get_ui(x))
#define _ff_set_i(x,z) if ( (z) < 0 ) { _ff_set_ui(x,-(z)); ff_negate(x); } else { _ff_set_ui(x,z); }
#define _ff_set_zero(z) ((z)=0)
#define _ff_zero(x) (!(x))
#define _ff_nonzero(x) (x)
#define _ff_parity(x) ((x)&0x1)
#define _ff_set(z,x) ((z)=(x))
#define _ff_equal(x,y) ((x) == (y))
#define _ff_low_word(x) (x)
#if FF_MONTGOMERY
#define _ff_set_one(z) _ff_set(z,_ff_mont_R)
#define _ff_one(z) _ff_equal(z,_ff_mont_R)
#else
#define _ff_set_one(z) ((z)=1UL)
#define _ff_one(z) ((z)==1UL)
#endif
#define _ff_core_addto(z,x) ((z) += (x))
#define _ff_core_subfrom(z,x) ((z) -= (x))
#define _ff_core_shiftl(z) ((z) <<= 1)
#define _ff_core_shiftr(z) ((z) >>= 1)
#define _ff_core_ge(x,y) ((x) >= (y))
#define _ff_addto(z,x) {register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_addto(_ff_t,x);_ff_core_red(_ff_t); _ff_set(z,_ff_t);}
#define _ff_add(z,x,y) {_ff_set(z,x);_ff_addto(z,y);}
#define _ff_subfrom(z,x) {register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_dom(_ff_t,x); _ff_core_subfrom(_ff_t,x); _ff_set(z,_ff_t);}
#define _ff_sub(z,x,y) {_ff_set(z,x);_ff_subfrom(z,y);}
#if FF_MONTGOMERY
#define _ff_core_inc(z) _ff_core_addto(z,_ff_mont_R)
#else
#define _ff_core_inc(z) ((z)++)
#endif
#define _ff_core_red(z) if (_ff_core_ge(z,_ff_p) ) _ff_core_subfrom (z,_ff_p)
#define _ff_core_dom(z,x) if ( !_ff_core_ge(z,x) ) _ff_core_addto (z,_ff_p)
#define _ff_neg(z,x) if (_ff_nonzero(x) ) {_ff_set(z,_ff_p); _ff_core_subfrom(z,x); } else { _ff_set_zero(z); }
#define _ff_x2(z) _ff_addto(z,z);
#define _ff_inc(z) {_ff_core_inc(z);_ff_core_red(z);}
#if FF_MONTGOMERY
#define _ff_dec(z) _ff_subfrom((z),_ff_mont_R)
#else
#define _ff_dec(z) if (z) { (z)--; } else { (z) = _ff_p-1; }
#endif
#define ff_negate(z) if (_ff_nonzero(z) ) {_ff_set(_ff_t1,_ff_p); _ff_core_subfrom(_ff_t1,z); _ff_set(z,_ff_t1); } else { _ff_set_zero(z); }
#define ff_add(z,x,y) {_ff_set(_ff_t1,x);_ff_addto(_ff_t1,y);_ff_set(z,_ff_t1);}
#define ff_sub(z,x,y) {_ff_set(_ff_t1,x);_ff_subfrom(z,y);_ff_set(z,_ff_t1);}
#if FF_MONTGOMERY
#define _ff_mult(z,x,y) ((z) = ff_montgomery1_mult(x,y))
#define _ff_square(z,x) _ff_mult(z,x,x)
#define _ff_invert(z,x) ((z)=ff_montgomery1_invert(x))
#else
#define _ff_mult(z,x,y) ((z) = ((x)*(y)) % _ff_p)
#define _ff_square(z,x) _ff_mult(z,x,x)
#define _ff_invert(z,x) ((z) = ff_ui_inverse(x,_ff_p))
#endif
#define _ff_div2(z,x) _ff_mult(z,x,_ff_half);
#define _ff_incmult(z,x,w) { _ff_set(z,x);_ff_inc(z);_ff_mult(z,z,w); }
#define _ff_multadd(z,x,y,a) {_ff_mult(z,x,y); _ff_addto(z,a); }
#define ff_multadd(z,x,y,a) _ff_multadd(z,x,y,a)
unsigned long ff_montgomery1_invert (unsigned long x);
unsigned long ff_ui_inverse (unsigned long a, unsigned long m);
void ff_exp_ui (ff_t o[1], ff_t a[1], unsigned long e);
int ff_ui_legendre (unsigned long a, unsigned long b);
int ff_invsqrt (ff_t o[1], ff_t a[1], int ext);
void ff_setup_2g (void);
int ff_cbrt (ff_t o[1], ff_t a[1]);
int ff_3Sylow_invcbrt (ff_t o[1], ff_t a[1]);
int ff_fast_sqrt (ff_t o[1], ff_t a[1]);
void ff_setup_2g(void);
void _ff_setup_3g(void);
static inline void ff_setup_3g(void) { if ( ! _ff_3g ) _ff_setup_3g(); }
void ff_parallel_invert (ff_t z[], ff_t x[], unsigned n);
#define ff_mult(z,x,y) _ff_mult(z,x,y)
#define ff_square(z,x) _ff_square(z,x)
#define ff_invert(z,x) { if ( ! _ff_one(x) ) _ff_invert(z,x); else _ff_set(z,x); }
static inline int ff_is_negation (ff_t x, ff_t y)
{
register ff_t t;
_ff_set(t,x);
_ff_core_addto(t,y);
return ( _ff_equal(t,_ff_p) || _ff_zero(t) );
}
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 1
static inline unsigned ff_montgomery1_mult (unsigned x, unsigned y)
{
register unsigned long z;
register unsigned a;
z = (unsigned long)x*y;
a = z*_ff_mont_pni;
z += ((unsigned long)a * _ff_p);
z >>= 32;
if ( z >= _ff_p ) z -= _ff_p;
return z;
}
#endif
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 2
static inline unsigned long ff_montgomery1_mult (unsigned long x, unsigned long y)
{
register unsigned long x0, x1,a0, a1;
_asm_mult_1_1 (x1,x0,x,y);
a0 = x0*_ff_mont_pni;
_asm_mult_1_1 (a1,a0,a0,_ff_p);
_asm_addto_2_2 (a1,a0,x1,x0);
if ( a1 >= _ff_p ) a1 -= _ff_p;
return a1;
}
#endif
static inline int ff_residue (ff_t z) { ff_t t; if ( _ff_zero(z) ) return 1; ff_exp_ui(&t,&z,(_ff_p-1)/2); return _ff_one(t); }
static inline int ff_sqrt(ff_t o[1], ff_t a[1]) { ff_t t; if ( ! ff_invsqrt(&t,a,0) ) return 0; _ff_mult(o[0],t,a[0]); return 1; }
static inline int ff_sqrt_ext(ff_t o[1], ff_t a[1]) { register int sts; ff_t t; sts=ff_invsqrt(&t,a,1); _ff_mult(o[0],t,a[0]); return sts; }
int _ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext);
static inline int ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext)
{
ff_setup_2g();
if ( _ff_one(a[0]) ) { _ff_set_one(o[0]); return 1; }
if ( _ff_equal(a[0],_ff_2g) ) { _ff_set(o[0],_ff_2gi); return 0; }
if ( _ff_equal(a[0],_ff_negone) ) { _ff_set(o[0], _ff_2Sylow_tab[_ff_p2_e-2][1]); return 1;}
if ( _ff_equal(a[0],_ff_2gi) ) { _ff_set_one (o[0]); return 0; }
return _ff_2Sylow_invsqrt (o, a,ext);
}
#endif