Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
241818 views
1
#ifndef _FF_INCLUDE_
2
#define _FF_INCLUDE_
3
4
/*
5
Copyright 2007 Andrew V. Sutherland
6
7
This file is part of smalljac.
8
9
smalljac is free software: you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation, either version 2 of the License, or
12
(at your option) any later version.
13
14
smalljac is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
GNU General Public License for more details.
18
19
You should have received a copy of the GNU General Public License
20
along with smalljac. If not, see <http://www.gnu.org/licenses/>.
21
*/
22
23
// Stripped-down, self-contained implementation if 32 or 64 bit single precision
24
// finite fields using a Montgomery representation. All you need is ff.h, asm.h, and ff.c
25
26
// THIS CODE IS NOT PORTABLE, but it is fast. It assumes an unsigned is 32 bits and that an
27
// unsigned long is 64 bits and (most critically) uses inline assembly directives. These
28
// should work on any AMD/Intel compatible instruction set.
29
30
// The FF_MONTGOMERY flag should generally be set, the only reason to unset it is for
31
// testing or benchmarking. It can be helpful to find bugs in code that breaks when
32
// the field representation changes (e.g. when the bit string "000...01" is not the identity element).
33
34
#include "asm.h"
35
#include "smalljac.h" // only needed to pick word size
36
37
#define ULONG_BITS (8*sizeof(unsigned long)) // must be 32 or 64
38
39
#define FF_FAST 1 // define to turn off certain error checking
40
41
#define FF_MAX_PARALLEL_INVERTS 2048 // make big enough to not force chunking in hecurve1
42
43
#define FF_WORDS 1 // must be 1
44
#define FF_HALF_WORDS (SMALLJAC_FF_64BITS?2:1)
45
#define FF_MONTGOMERY 1 // if 0 FF_HALF_WORDS must be 1
46
#define FF_NAIL_BITS 1 // must be at least 1 (need to be able to add without overflow)
47
#define FF_BITS ((FF_HALF_WORDS*ULONG_BITS/2) - FF_NAIL_BITS)
48
#define FF_MONTGOMERY_RBITS (FF_HALF_WORDS*ULONG_BITS/2) // note that R=B are identical - only one word is ever used
49
#define FF_ITAB_SIZE (3*FF_MONTGOMERY_RBITS+1)
50
51
52
#if FF_MONTGOMERY && FF_HALF_WORDS == 1
53
typedef unsigned ff_t;
54
#else
55
typedef unsigned long ff_t;
56
#endif
57
58
// temporary value used by macros - could remove by replacing macros with inlines, probably should
59
extern ff_t _ff_t1;
60
61
// The globals below all assume a single modulus environment. Multiple moduli can be managed by
62
// copying and resetting these (provided this is reasonably infrequent). We don't put them into a
63
// dynamically allocated context to save the cost of dereferencing a pointer every time we want to
64
// perform a field operation.
65
66
extern ff_t _ff_p;
67
extern ff_t _ff_2g; // generator of the Sylow 2-subgroup, necessarily a quadratic non-residue
68
extern ff_t _ff_2gi; // inverse of _ff_2g
69
extern ff_t _ff_2Sylow_tab[64][2];
70
extern ff_t _ff_3g; // generator of the Sylow 3-subgroup, necessarily a cubic non-residue
71
extern ff_t _ff_half; // 1/2 = (p+1)/2
72
extern ff_t _ff_third; // 1/3 = (p+1)/3 or (2p+1)/3 for p=2mod3 or 1mod3 (resp.)
73
extern ff_t _ff_negone;
74
extern int _ff_p2_e;
75
extern unsigned long _ff_p2_m; // p = 2^e*m+1
76
extern int _ff_p3_e;
77
extern unsigned long _ff_p3_m; // p = 3^e*m+1 when p=1mod3
78
extern int _ff_p3_m1mod3;
79
extern ff_t *_ff_mont_itab;
80
extern ff_t _ff_mont_R;
81
extern ff_t _ff_mont_R2;
82
extern unsigned long _ff_mont_pni;
83
extern int _ff_p1mod3;
84
85
extern int _ff_cbrt_setup;
86
extern ff_t _ff_cbrt_unity; // MUST CALL ff_cbrt_setup() or ff_cbrt() to initialize. Set to 1 if p=2mod3
87
88
void ff_setup_ui (unsigned long p); // note - DOES NOT CHECK PRIMALITY
89
90
91
// WARNING - several of the macros below are decidedly unsafe. In general, if it starts with an underscore,
92
// don't use side effects, and be aware that many can't be used in expressions.
93
// These could be converted to inline functions, but many involve assignments and would require passing
94
// by reference and then dereferencing a pointer (this is handled by reference types in C++).
95
// A smart compiler could produce inline function code that is just as fast as these macros, but we won't rely on this.
96
97
// Input/Output
98
#define _ff_sprint(s,x) sprintf(s,"%lu", _ff_get_ui(x))
99
#define _ff_set_raw(z,x) ((z) = (x)) // useful for setting random field elements. assumes 0<=x<p
100
#if FF_MONTGOMERY
101
#define _ff_set_ui(z,x) ((z) = ff_montgomery1_mult(((x)%_ff_p), _ff_mont_R2));
102
#define _ff_rset_ui(z,x) ((z) = ff_montgomery1_mult((x), _ff_mont_R2)) // assumes 0<=x<p
103
#define _ff_get_ui(x) (ff_montgomery1_mult((x),1UL))
104
#else
105
#define _ff_set_ui(z,x) ((z) = ((x)%_ff_p))
106
#define _ff_rset_ui(z,x) ((z) = (x)) // assumes 0<=x<p
107
#define _ff_get_ui(x) (x)
108
#endif
109
// signed conversions - you must use these for negative values
110
#define _ff_get_i(x) ((long)_ff_get_ui(x))
111
#define _ff_set_i(x,z) if ( (z) < 0 ) { _ff_set_ui(x,-(z)); ff_negate(x); } else { _ff_set_ui(x,z); }
112
113
// Basic operations and 0/1 - IMPORTANT: the binary value 1 is not the field identity in a Montgomery representation (but 0 is zero)
114
#define _ff_set_zero(z) ((z)=0)
115
#define _ff_zero(x) (!(x))
116
#define _ff_nonzero(x) (x)
117
#define _ff_parity(x) ((x)&0x1)
118
#define _ff_set(z,x) ((z)=(x))
119
#define _ff_equal(x,y) ((x) == (y))
120
#define _ff_low_word(x) (x)
121
#if FF_MONTGOMERY
122
#define _ff_set_one(z) _ff_set(z,_ff_mont_R)
123
#define _ff_one(z) _ff_equal(z,_ff_mont_R)
124
#else
125
#define _ff_set_one(z) ((z)=1UL)
126
#define _ff_one(z) ((z)==1UL)
127
#endif
128
// end basic operations and 0/1
129
130
// Core arithmetic operations - these may be applied to unreduced values that fit in the word limit
131
#define _ff_core_addto(z,x) ((z) += (x))
132
#define _ff_core_subfrom(z,x) ((z) -= (x)) // assumes z dominates x
133
#define _ff_core_shiftl(z) ((z) <<= 1)
134
#define _ff_core_shiftr(z) ((z) >>= 1)
135
#define _ff_core_ge(x,y) ((x) >= (y))
136
#define _ff_addto(z,x) {register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_addto(_ff_t,x);_ff_core_red(_ff_t); _ff_set(z,_ff_t);}
137
#define _ff_add(z,x,y) {_ff_set(z,x);_ff_addto(z,y);}
138
#define _ff_subfrom(z,x) {register ff_t _ff_t; _ff_set(_ff_t,z); _ff_core_dom(_ff_t,x); _ff_core_subfrom(_ff_t,x); _ff_set(z,_ff_t);}
139
#define _ff_sub(z,x,y) {_ff_set(z,x);_ff_subfrom(z,y);}
140
#if FF_MONTGOMERY
141
#define _ff_core_inc(z) _ff_core_addto(z,_ff_mont_R)
142
#else
143
#define _ff_core_inc(z) ((z)++)
144
#endif
145
146
// derived arithmetic operations - note that inputs cannot overlap outputs! use higher level versions if needed
147
// Note that core_red requires z < 2p
148
#define _ff_core_red(z) if (_ff_core_ge(z,_ff_p) ) _ff_core_subfrom (z,_ff_p)
149
#define _ff_core_dom(z,x) if ( !_ff_core_ge(z,x) ) _ff_core_addto (z,_ff_p)
150
#define _ff_neg(z,x) if (_ff_nonzero(x) ) {_ff_set(z,_ff_p); _ff_core_subfrom(z,x); } else { _ff_set_zero(z); }
151
#define _ff_x2(z) _ff_addto(z,z); // this is much faster than shifting
152
#define _ff_inc(z) {_ff_core_inc(z);_ff_core_red(z);}
153
#if FF_MONTGOMERY
154
#define _ff_dec(z) _ff_subfrom((z),_ff_mont_R)
155
#else
156
#define _ff_dec(z) if (z) { (z)--; } else { (z) = _ff_p-1; }
157
#endif
158
// end core arithmetic operations
159
160
// Safer versions - overlap ok, but still need to watch side effects and can't use in expressions
161
#define ff_negate(z) if (_ff_nonzero(z) ) {_ff_set(_ff_t1,_ff_p); _ff_core_subfrom(_ff_t1,z); _ff_set(z,_ff_t1); } else { _ff_set_zero(z); }
162
#define ff_add(z,x,y) {_ff_set(_ff_t1,x);_ff_addto(_ff_t1,y);_ff_set(z,_ff_t1);}
163
#define ff_sub(z,x,y) {_ff_set(_ff_t1,x);_ff_subfrom(z,y);_ff_set(z,_ff_t1);}
164
165
// higher arithmetic operations
166
#if FF_MONTGOMERY
167
#define _ff_mult(z,x,y) ((z) = ff_montgomery1_mult(x,y))
168
#define _ff_square(z,x) _ff_mult(z,x,x)
169
#define _ff_invert(z,x) ((z)=ff_montgomery1_invert(x))
170
#else
171
#define _ff_mult(z,x,y) ((z) = ((x)*(y)) % _ff_p)
172
#define _ff_square(z,x) _ff_mult(z,x,x)
173
#define _ff_invert(z,x) ((z) = ff_ui_inverse(x,_ff_p))
174
#endif
175
#define _ff_div2(z,x) _ff_mult(z,x,_ff_half);
176
177
#define _ff_incmult(z,x,w) { _ff_set(z,x);_ff_inc(z);_ff_mult(z,z,w); } // could be optimized
178
#define _ff_multadd(z,x,y,a) {_ff_mult(z,x,y); _ff_addto(z,a); } // no optimization here
179
#define ff_multadd(z,x,y,a) _ff_multadd(z,x,y,a) // overlap of x and z ok but a can't overlap
180
181
unsigned long ff_montgomery1_invert (unsigned long x);
182
unsigned long ff_ui_inverse (unsigned long a, unsigned long m);
183
void ff_exp_ui (ff_t o[1], ff_t a[1], unsigned long e);
184
int ff_ui_legendre (unsigned long a, unsigned long b);
185
int ff_invsqrt (ff_t o[1], ff_t a[1], int ext); // returns 1 if sqrt is rational, 0 if sqrt is in F_p^2. In the later case, if ext is true, sqrt(a)=o*z where z^2=nr
186
void ff_setup_2g (void);
187
int ff_cbrt (ff_t o[1], ff_t a[1]); // returns 1 for success, 0 if a is a non-residue
188
int ff_3Sylow_invcbrt (ff_t o[1], ff_t a[1]); // computes a^{-1/3} for a in the 3-Sylow subgroup, returns 0 if a is not a residue in the 3-Sylow
189
int ff_fast_sqrt (ff_t o[1], ff_t a[1]); // fast sqrt via single exponentiation, returns 1 for success, 0 if no power of a is sqrt(a) (obsolete, not used)
190
191
// called automatically by ff_sqrt, but also used in ffext.c - note that for p=3mod4, e=1 and -1 generates the 2-Sylow subgroup
192
void ff_setup_2g(void);
193
194
// ditto for cbrt, only relevant when p=1mod3
195
void _ff_setup_3g(void);
196
static inline void ff_setup_3g(void) { if ( ! _ff_3g ) _ff_setup_3g(); }
197
198
void ff_parallel_invert (ff_t z[], ff_t x[], unsigned n); // z and x may overlap
199
200
#define ff_mult(z,x,y) _ff_mult(z,x,y) // overlap is ok for these macros (but not side effects)
201
#define ff_square(z,x) _ff_square(z,x)
202
#define ff_invert(z,x) { if ( ! _ff_one(x) ) _ff_invert(z,x); else _ff_set(z,x); }
203
204
// end higher arithmetic operations
205
206
// Inline Functions
207
static inline int ff_is_negation (ff_t x, ff_t y)
208
{
209
register ff_t t;
210
211
_ff_set(t,x);
212
_ff_core_addto(t,y);
213
return ( _ff_equal(t,_ff_p) || _ff_zero(t) );
214
}
215
216
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 1
217
static inline unsigned ff_montgomery1_mult (unsigned x, unsigned y)
218
{
219
register unsigned long z;
220
register unsigned a;
221
222
z = (unsigned long)x*y;
223
a = z*_ff_mont_pni;
224
z += ((unsigned long)a * _ff_p);
225
z >>= 32;
226
if ( z >= _ff_p ) z -= _ff_p;
227
return z;
228
}
229
#endif
230
231
#if FF_MONTGOMERY && FF_WORDS == 1 && FF_HALF_WORDS == 2
232
static inline unsigned long ff_montgomery1_mult (unsigned long x, unsigned long y)
233
{
234
register unsigned long x0, x1,a0, a1;
235
236
_asm_mult_1_1 (x1,x0,x,y);
237
a0 = x0*_ff_mont_pni;
238
_asm_mult_1_1 (a1,a0,a0,_ff_p);
239
_asm_addto_2_2 (a1,a0,x1,x0);
240
if ( a1 >= _ff_p ) a1 -= _ff_p;
241
return a1;
242
}
243
#endif
244
245
static inline int ff_residue (ff_t z) { ff_t t; if ( _ff_zero(z) ) return 1; ff_exp_ui(&t,&z,(_ff_p-1)/2); return _ff_one(t); } // this is faster than using Legendre
246
static inline int ff_sqrt(ff_t o[1], ff_t a[1]) { ff_t t; if ( ! ff_invsqrt(&t,a,0) ) return 0; _ff_mult(o[0],t,a[0]); return 1; }
247
static inline int ff_sqrt_ext(ff_t o[1], ff_t a[1]) { register int sts; ff_t t; sts=ff_invsqrt(&t,a,1); _ff_mult(o[0],t,a[0]); return sts; }
248
int _ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext);
249
static inline int ff_2Sylow_invsqrt (ff_t o[1], ff_t a[1], int ext) // computes a^{-1/2} for a in the 2-Sylow subgroup, returns 0 if a^{-1/2} is not in F_p and returns a^{-1/2}/z
250
{
251
// handle easy cases inline, otherwise call _ff_2Sylow_invsqrt (if e <= 2 this will never happen)
252
ff_setup_2g();
253
if ( _ff_one(a[0]) ) { _ff_set_one(o[0]); return 1; } // use 1 as the inverse square root of 1
254
if ( _ff_equal(a[0],_ff_2g) ) { _ff_set(o[0],_ff_2gi); return 0; } // if e=1, this must happen
255
if ( _ff_equal(a[0],_ff_negone) ) { _ff_set(o[0], _ff_2Sylow_tab[_ff_p2_e-2][1]); return 1;}
256
if ( _ff_equal(a[0],_ff_2gi) ) { _ff_set_one (o[0]); return 0; } // if e=2, this must happen
257
return _ff_2Sylow_invsqrt (o, a,ext);
258
}
259
260
#endif
261
262