CoCalc -- ff.h

Harvard Workshop: Distribution of modular symbols and L-values
code / alex / psage / psage / libs / smalljac / ff.h
²⁴¹⁸¹⁸ views
1
#ifndef _FF_INCLUDE_
2
#define _FF_INCLUDE_
3

4
/*
5
    Copyright 2007 Andrew V. Sutherland
6

7
    This file is part of smalljac.
8

9
    smalljac is free software: you can redistribute it and/or modify
10
    it under the terms of the GNU General Public License as published by
11
    the Free Software Foundation, either version 2 of the License, or
12
    (at your option) any later version.
13

14
    smalljac is distributed in the hope that it will be useful,
15
    but WITHOUT ANY WARRANTY; without even the implied warranty of
16
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
    GNU General Public License for more details.
18

19
    You should have received a copy of the GNU General Public License
20
    along with smalljac.  If not, see <http://www.gnu.org/licenses/>.
21
*/
22

23
// Stripped-down, self-contained implementation if 32 or 64 bit single precision
24
// finite fields using a Montgomery representation.  All you need is ff.h, asm.h, and ff.c
25

26
// THIS CODE IS NOT PORTABLE, but it is fast.  It assumes an unsigned is 32 bits and that an
27
// unsigned long is 64 bits and (most critically) uses  inline assembly directives.  These
28
// should work on any AMD/Intel compatible instruction set.
29

30
// The FF_MONTGOMERY flag should generally be set, the only reason to unset it is for 
31
// testing or benchmarking.  It can be helpful to find bugs in code that breaks when
32
// the field representation changes (e.g. when the bit string "000...01" is not the identity element).
33

34
#include "asm.h"
35
#include "smalljac.h"								// only needed to pick word size
36

37
#define ULONG_BITS	(8*sizeof(unsigned long))		// must be 32 or 64
38

39
#define FF_FAST					1				// define to turn off certain error checking
40

41
#define FF_MAX_PARALLEL_INVERTS		2048			// make big enough to not force chunking in hecurve1
42

43
#define FF_WORDS					1				// must be 1
44
#define FF_HALF_WORDS				(SMALLJAC_FF_64BITS?2:1)
45
#define FF_MONTGOMERY			1				// if 0 FF_HALF_WORDS must be 1
46
#define FF_NAIL_BITS				1				// must be at least 1 (need to be able to add without overflow)
47
#define FF_BITS					((FF_HALF_WORDS*ULONG_BITS/2) - FF_NAIL_BITS)
48
#define FF_MONTGOMERY_RBITS		(FF_HALF_WORDS*ULONG_BITS/2)	// note that R=B are identical - only one word is ever used
49
#define FF_ITAB_SIZE				(3*FF_MONTGOMERY_RBITS+1)
50

51

52
#if FF_MONTGOMERY && FF_HALF_WORDS == 1
53
typedef unsigned ff_t;
54
#else
55
typedef unsigned long ff_t;
56
#endif
57

58
// temporary value used by macros - could remove by replacing macros with inlines, probably should
59
extern ff_t _ff_t1;
60

61
// The globals below all assume a single modulus environment.  Multiple moduli can be managed by
62
// copying and resetting these (provided this is reasonably infrequent).  We don't put them into a
63
// dynamically allocated context to save the cost of dereferencing a pointer every time we want to
64
// perform a field operation.
65

66
extern ff_t _ff_p;
67
extern ff_t _ff_2g;					// generator of the Sylow 2-subgroup, necessarily a quadratic non-residue
68
extern ff_t _ff_2gi;					// inverse of _ff_2g
69
extern ff_t _ff_2Sylow_tab[64][2];
70
extern ff_t _ff_3g;					// generator of the Sylow 3-subgroup, necessarily a cubic non-residue
71
extern ff_t _ff_half;					// 1/2 = (p+1)/2
72
extern ff_t _ff_third;				// 1/3 = (p+1)/3 or (2p+1)/3 for p=2mod3 or 1mod3 (resp.)
73
extern ff_t _ff_negone;
74
extern int _ff_p2_e;
75
extern unsigned long _ff_p2_m;		// p = 2^e*m+1
76
extern int _ff_p3_e;
77
extern unsigned long _ff_p3_m;		// p = 3^e*m+1 when p=1mod3
78
extern int _ff_p3_m1mod3;
79
extern ff_t *_ff_mont_itab;
80
extern ff_t _ff_mont_R;
81
extern ff_t _ff_mont_R2;
82
extern unsigned long _ff_mont_pni;
83
extern int _ff_p1mod3;
84

85
extern int _ff_cbrt_setup;
86
extern ff_t _ff_cbrt_unity;			// MUST CALL ff_cbrt_setup() or ff_cbrt() to initialize.  Set to 1 if p=2mod3
87

88
void ff_setup_ui (unsigned long p);	// note - DOES NOT CHECK PRIMALITY
89

90

91
// WARNING - several of the macros below are decidedly unsafe.  In general, if it starts with an underscore,
92
// don't use side effects, and be aware that many can't be used in expressions.
93
// These could be converted to inline functions, but many involve assignments and would require passing
94
// by reference and then dereferencing a pointer (this is handled by reference types in C++). 
95
// A smart compiler could produce inline function code that is just as fast as these macros, but we won't rely on this.
96

97
// Input/Output
98
#define _ff_sprint(s,x)				sprintf(s,"%lu", _ff_get_ui(x))
99
#define _ff_set_raw(z,x)				((z) = (x))								// useful for setting random field elements.  assumes 0<=x<p
100
#if FF_MONTGOMERY
101
#define _ff_set_ui(z,x)				((z) = ff_montgomery1_mult(((x)%_ff_p), _ff_mont_R2));
102
#define _ff_rset_ui(z,x)				((z) = ff_montgomery1_mult((x), _ff_mont_R2))	// assumes 0<=x<p
103
#define _ff_get_ui(x)				(ff_montgomery1_mult((x),1UL))
104
#else
105
#define _ff_set_ui(z,x)				((z) = ((x)%_ff_p))
106
#define _ff_rset_ui(z,x)				((z) = (x))			 					// assumes 0<=x<p
107
#define _ff_get_ui(x)				(x)
108
#endif
109
// signed conversions - you must use these for negative values
110
#define _ff_get_i(x)						((long)_ff_get_ui(x))
111
#define _ff_set_i(x,z)					if ( (z) < 0 ) { _ff_set_ui(x,-(z));  ff_negate(x); } else { _ff_set_ui(x,z); }
112

113
// Basic operations and 0/1 - IMPORTANT: the binary value 1 is not the field identity in a Montgomery representation (but 0 is zero)
114
#define _ff_set_zero(z)				((z)=0)
115
#define _ff_zero(x)					(!(x))
116
#define _ff_nonzero(x)				(x)
117
#define _ff_parity(x)					((x)&0x1)
118
#define _ff_set(z,x)					((z)=(x))
119
#define _ff_equal(x,y)				((x) == (y))
120
#define _ff_low_word(x)				(x)
121
#if FF_MONTGOMERY
122
#define _ff_set_one(z)				_ff_set(z,_ff_mont_R)
123
#define _ff_one(z)					_ff_equal(z,_ff_mont_R)
124
#else
125
#define _ff_set_one(z)				((z)=1UL)
126
#define _ff_one(z)					((z)==1UL)
127
#endif
128
// end basic operations and 0/1
129

130
// Core arithmetic operations - these may be applied to unreduced values that fit in the word limit
131
#define _ff_core_addto(z,x)			((z) += (x))
132
#define _ff_core_subfrom(z,x)			((z) -= (x))											// assumes z dominates x
133
#define _ff_core_shiftl(z)				((z) <<= 1)
134
#define _ff_core_shiftr(z)				((z) >>= 1)
135
#define _ff_core_ge(x,y)				((x) >= (y))
136
#define _ff_addto(z,x)				{register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_addto(_ff_t,x);_ff_core_red(_ff_t); _ff_set(z,_ff_t);}
137
#define _ff_add(z,x,y)				{_ff_set(z,x);_ff_addto(z,y);}
138
#define _ff_subfrom(z,x)				{register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_dom(_ff_t,x); _ff_core_subfrom(_ff_t,x); _ff_set(z,_ff_t);}
139
#define _ff_sub(z,x,y)				{_ff_set(z,x);_ff_subfrom(z,y);}
140
#if FF_MONTGOMERY
141
#define _ff_core_inc(z)				_ff_core_addto(z,_ff_mont_R)
142
#else
143
#define _ff_core_inc(z)				((z)++)
144
#endif
145

146
// derived arithmetic operations - note that inputs cannot overlap outputs! use higher level versions if needed
147
// Note that core_red requires z < 2p
148
#define _ff_core_red(z)				if (_ff_core_ge(z,_ff_p) ) _ff_core_subfrom (z,_ff_p)
149
#define _ff_core_dom(z,x)			if ( !_ff_core_ge(z,x) ) _ff_core_addto (z,_ff_p)
150
#define _ff_neg(z,x)					if (_ff_nonzero(x) ) {_ff_set(z,_ff_p); _ff_core_subfrom(z,x); } else { _ff_set_zero(z); }
151
#define _ff_x2(z)					_ff_addto(z,z);		// this is much faster than shifting
152
#define _ff_inc(z)					{_ff_core_inc(z);_ff_core_red(z);}
153
#if FF_MONTGOMERY
154
#define _ff_dec(z)					_ff_subfrom((z),_ff_mont_R)
155
#else
156
#define _ff_dec(z)					if (z) { (z)--; } else { (z) = _ff_p-1; }
157
#endif
158
// end core arithmetic operations
159

160
// Safer versions - overlap ok, but still need to watch side effects and can't use in expressions
161
#define ff_negate(z)				if (_ff_nonzero(z) ) {_ff_set(_ff_t1,_ff_p); _ff_core_subfrom(_ff_t1,z);  _ff_set(z,_ff_t1); } else { _ff_set_zero(z); }
162
#define ff_add(z,x,y)				{_ff_set(_ff_t1,x);_ff_addto(_ff_t1,y);_ff_set(z,_ff_t1);}
163
#define ff_sub(z,x,y)					{_ff_set(_ff_t1,x);_ff_subfrom(z,y);_ff_set(z,_ff_t1);}
164

165
// higher arithmetic operations
166
#if FF_MONTGOMERY
167
#define _ff_mult(z,x,y)				((z) = ff_montgomery1_mult(x,y))
168
#define _ff_square(z,x)				_ff_mult(z,x,x)
169
#define _ff_invert(z,x)				((z)=ff_montgomery1_invert(x))
170
#else
171
#define _ff_mult(z,x,y)				((z) = ((x)*(y)) % _ff_p)
172
#define _ff_square(z,x)				_ff_mult(z,x,x)
173
#define _ff_invert(z,x)				((z) = ff_ui_inverse(x,_ff_p))
174
#endif
175
#define _ff_div2(z,x)					_ff_mult(z,x,_ff_half);
176

177
#define _ff_incmult(z,x,w)				{ _ff_set(z,x);_ff_inc(z);_ff_mult(z,z,w); }	// could be optimized
178
#define _ff_multadd(z,x,y,a)			{_ff_mult(z,x,y); _ff_addto(z,a); }		// no optimization here
179
#define ff_multadd(z,x,y,a)			_ff_multadd(z,x,y,a)					// overlap of x and z ok but a can't overlap
180

181
unsigned long ff_montgomery1_invert (unsigned long x);
182
unsigned long ff_ui_inverse (unsigned long a, unsigned long m);
183
void ff_exp_ui (ff_t o[1], ff_t a[1], unsigned long e);
184
int ff_ui_legendre (unsigned long a, unsigned long b);
185
int ff_invsqrt (ff_t o[1], ff_t a[1], int ext);							// returns 1 if sqrt is rational, 0 if sqrt is in F_p^2.  In the later case, if ext is true, sqrt(a)=o*z where z^2=nr
186
void ff_setup_2g (void);	
187
int ff_cbrt (ff_t o[1], ff_t a[1]);										// returns 1 for success, 0 if a is a non-residue
188
int ff_3Sylow_invcbrt (ff_t o[1], ff_t a[1]);								// computes a^{-1/3} for a in the 3-Sylow subgroup, returns 0 if a is not a residue in the 3-Sylow
189
int ff_fast_sqrt (ff_t o[1], ff_t a[1]);									// fast sqrt via single exponentiation, returns 1 for success, 0 if no power of a is sqrt(a) (obsolete, not used)
190

191
// called automatically by ff_sqrt, but also used in ffext.c - note that for p=3mod4, e=1 and -1 generates the 2-Sylow subgroup
192
void ff_setup_2g(void);
193

194
// ditto for cbrt, only relevant when p=1mod3
195
void _ff_setup_3g(void);
196
static inline void ff_setup_3g(void) { if ( ! _ff_3g ) _ff_setup_3g(); }
197

198
void ff_parallel_invert (ff_t z[], ff_t x[], unsigned n);					// z and x may overlap
199

200
#define ff_mult(z,x,y)				_ff_mult(z,x,y)					// overlap is ok for these macros (but not side effects)
201
#define ff_square(z,x)				_ff_square(z,x)
202
#define ff_invert(z,x)				{ if ( ! _ff_one(x) ) _ff_invert(z,x); else _ff_set(z,x); }
203

204
// end higher arithmetic operations
205

206
// Inline Functions
207
static inline int ff_is_negation (ff_t x, ff_t y)
208
{
209
	register ff_t t;
210
	
211
	_ff_set(t,x);
212
	_ff_core_addto(t,y);
213
	return ( _ff_equal(t,_ff_p) || _ff_zero(t) );
214
}
215

216
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 1
217
static  inline unsigned  ff_montgomery1_mult (unsigned  x, unsigned  y)
218
{
219
	register unsigned long z;
220
	register unsigned a;
221

222
	z = (unsigned long)x*y;
223
	a = z*_ff_mont_pni;
224
	z  += ((unsigned long)a * _ff_p);
225
	z >>= 32;
226
	if ( z >= _ff_p ) z -= _ff_p;
227
	return z;
228
}
229
#endif
230

231
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 2
232
static  inline unsigned long ff_montgomery1_mult (unsigned long x, unsigned long y)
233
{
234
	register unsigned long x0, x1,a0, a1;
235

236
	_asm_mult_1_1 (x1,x0,x,y);
237
	a0 = x0*_ff_mont_pni;
238
	_asm_mult_1_1 (a1,a0,a0,_ff_p);
239
	_asm_addto_2_2 (a1,a0,x1,x0);
240
	if ( a1 >= _ff_p ) a1 -= _ff_p;
241
	return a1;
242
}
243
#endif
244

245
static inline int ff_residue (ff_t z) { ff_t t;  if ( _ff_zero(z) ) return 1;  ff_exp_ui(&t,&z,(_ff_p-1)/2);  return _ff_one(t); } // this is faster than using Legendre
246
static inline int ff_sqrt(ff_t o[1], ff_t a[1]) { ff_t t;  if ( ! ff_invsqrt(&t,a,0) ) return 0;  _ff_mult(o[0],t,a[0]);  return 1; }
247
static inline int ff_sqrt_ext(ff_t o[1], ff_t a[1]) { register int sts; ff_t t;  sts=ff_invsqrt(&t,a,1);  _ff_mult(o[0],t,a[0]);  return sts; }
248
int _ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext);
249
static inline int ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext)			// computes a^{-1/2} for a in the 2-Sylow subgroup, returns 0 if a^{-1/2} is not in F_p and returns a^{-1/2}/z
250
{
251
	// handle easy cases inline, otherwise call _ff_2Sylow_invsqrt (if e <= 2 this will never happen)
252
	ff_setup_2g();
253
	if ( _ff_one(a[0]) ) { _ff_set_one(o[0]);  return 1; }				// use 1 as the inverse square root of 1
254
	if ( _ff_equal(a[0],_ff_2g) ) { _ff_set(o[0],_ff_2gi);  return 0; }		// if e=1, this must happen
255
	if ( _ff_equal(a[0],_ff_negone) ) { _ff_set(o[0], _ff_2Sylow_tab[_ff_p2_e-2][1]); return 1;}
256
	if ( _ff_equal(a[0],_ff_2gi) ) { _ff_set_one (o[0]);  return 0; }			// if e=2, this must happen
257
	return _ff_2Sylow_invsqrt (o, a,ext);
258
}		
259

260
#endif
261

262
Product

Resources

Company