Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
241818 views
1
#include <stdlib.h>
2
#include <stdio.h>
3
#include "ffext.h"
4
#include "ffpoly.h"
5
#include "cstd.h"
6
7
/*
8
Copyright 2007-2008 Andrew V. Sutherland
9
10
This file is part of smalljac.
11
12
smalljac is free software: you can redistribute it and/or modify
13
it under the terms of the GNU General Public License as published by
14
the Free Software Foundation, either version 2 of the License, or
15
(at your option) any later version.
16
17
smalljac is distributed in the hope that it will be useful,
18
but WITHOUT ANY WARRANTY; without even the implied warranty of
19
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20
GNU General Public License for more details.
21
22
You should have received a copy of the GNU General Public License
23
along with smalljac. If not, see <http://www.gnu.org/licenses/>.
24
*/
25
26
/*
27
This module contains performance-oriented code for operations on polynomials of low degree,
28
and support functions for computations on genus 1 and genus 2 curves.
29
30
It is assumed here that the ff_t type does not require initialization (no support for GMP's mpz type),
31
and in a number of cases it is further assumed that the ff_t type fits in an unsigned long (this assumption is verified when made).
32
*/
33
34
35
void ff_poly_xpan_mod_d2 (ff_t g[2], ff_t a, unsigned long n, ff_t f[1]);
36
void ff_poly_xn_mod_d3 (ff_t g[3], unsigned long n, ff_t f[4]);
37
void ff_poly_xpan_mod_d3 (ff_t g[2], ff_t a, unsigned long n, ff_t f[4]);
38
void ff_poly_mult_mod_d3 (ff_t w[3], ff_t u[3], ff_t v[3], ff_t f[4]);
39
void ff_poly_square_mod_d3 (ff_t w[3], ff_t u[3], ff_t f[4]);
40
void ff_poly_exp4_mod_d3 (ff_t g[3], ff_t p[12], unsigned long n, ff_t f[4]);
41
void ff_poly_exp8_mod_d3 (ff_t g[3], ff_t p[24], unsigned long n, ff_t f[4]);
42
43
void ff_poly_xn_mod_d4 (ff_t g[4], unsigned long n, ff_t f[5]);
44
void ff_poly_xpan_mod_d4 (ff_t g[3], ff_t a, unsigned long n, ff_t f[5]);
45
void ff_poly_mult_mod_d4 (ff_t w[4], ff_t u[4], ff_t v[4], ff_t f[5]);
46
void ff_poly_square_mod_d4 (ff_t w[4], ff_t u[4], ff_t f[5]);
47
void ff_poly_exp4_mod_d4 (ff_t g[4], ff_t p[16], unsigned long n, ff_t f[5]);
48
void ff_poly_exp8_mod_d4 (ff_t g[4], ff_t p[32], unsigned long n, ff_t f[5]);
49
50
51
// assumes f monic, char > 3, sets t to translation value: depressed cubic is f(x-t)
52
void ff_depress_cubic (ff_t t[1], ff_t f[4])
53
{
54
_ff_t_declare_reg t1, t2, f1;
55
56
_ff_mult(t1,_ff_third,f[2]); // t1 = f2/3
57
_ff_set(t[0],t1);
58
_ff_mult(f1,t1,f[2]); // f1 = f2^2/3
59
_ff_square(t2,t1); ff_mult(t2,t2,t1); _ff_x2(t2); ff_mult(t1,t1,f[1]); // t2 = f2^3/27, t1 = f1f2/3
60
_ff_addto(f[0],t2); _ff_subfrom(f[0],t1);
61
_ff_subfrom(f[1],f1);
62
_ff_set_zero(f[2]);
63
// 5M+3A
64
}
65
66
// computes cubic resolvent of f=x^4+f2x^2+f1x+f0 and depresses it
67
void ff_depressed_cubic_resolvent (ff_t t[1], ff_t g[4], ff_t f[5])
68
{
69
_ff_t_declare_reg t0,t1,t2;
70
71
_ff_set_one(g[3]);
72
_ff_add(t1,f[2],f[2]);
73
_ff_neg(g[2],t1); // g2 = -2f2
74
_ff_add(t1,f[0],f[0]); _ff_x2(t1);
75
_ff_square(t2,f[2]);
76
_ff_sub(g[1],t2,t1); // g1 = f2^2-4f0
77
_ff_square(g[0],f[1]); // g0 = f1^2
78
ff_depress_cubic (t,g);
79
// total 7M+5A
80
}
81
82
83
// assumes f monic, char > 3, sets t to translation value: depressed cubic is f(x-t)
84
void ff_depress_quartic (ff_t t[1], ff_t f[5])
85
{
86
_ff_t_declare_reg t0,t1, t2, t3, t4, w1, w2;
87
88
_ff_mult(t1,f[3],_ff_half); // t1 = f3/2
89
ff_mult(t0,t1,_ff_half); // t0 = f3/4
90
_ff_set(t[0],t0);
91
_ff_square(t2,t0); _ff_square(t4,t3); // t2 = f3^2/16, t4=f3^4/256
92
_ff_add(t3,t4,t4); _ff_addto(t3,t4); // t3 = 3f3^4/256
93
_ff_mult(w1,f[2],t2); // w1 = f2f3^2/16
94
_ff_mult(w2,f[1],t0); // w2 = f1f3/4
95
_ff_subfrom(w1,w2);
96
_ff_subfrom(w1,t3);
97
_ff_addto(f[0],w1); // new f0 = f0 + f2f3^2/16 - f1f3/4 - 3f^4/256
98
_ff_mult(t3,t0,t2); // t3 = f3^2/8
99
_ff_mult(w1,t3,f[3]);
100
_ff_mult(w2,f[2],t1);
101
_ff_subfrom(w1,w2);
102
_ff_addto(f[1],w1); // new f1 = f1 + f3^3/8 - f2f3/2
103
_ff_add(w1,t3,t3); _ff_addto(w1,t3); // w1 = 3f3^2/8
104
_ff_subfrom(f[2],w1); // new f2 = f2-3f3^2/8
105
_ff_set_zero(f[3]);
106
// 9M+10A
107
}
108
109
// computes the gcd of f and g where f=x^3+ax+b
110
void ff_poly_gcd_g1 (ff_t h[4], int *d_h, ff_t f[4], ff_t g[4], int d_g)
111
{
112
_ff_t_declare_reg t1,t2;
113
ff_t r[4], s[4];
114
int d_r, d_s;
115
116
if ( d_g == 3 ) {
117
_ff_neg(r[2],g[2]);
118
_ff_mult(t1,f[1],g[3]);
119
_ff_sub(r[1],t1,g[1]);
120
_ff_mult(t1,f[0],g[3]);
121
_ff_sub(f[0],t1,g[0]);
122
for ( d_r = 2 ; d_r >= 0 && _ff_zero(r[d_r]) ; d_r-- );
123
} else {
124
ff_poly_copy (r,&d_r,g,d_g);
125
}
126
if ( d_r < 0 ) { ff_poly_copy (h, d_h, f, 3); return; }
127
if ( ! d_r ) { _ff_set_one(h[0]); *d_h = 0; return; }
128
_ff_neg(s[2],r[d_r-1]);
129
_ff_mult(t1,f[1],r[d_r]);
130
if ( d_r > 1 ) { _ff_sub(s[1],t1,r[d_r-2]); } else { _ff_set(s[1],t1); }
131
_ff_mult(s[0],f[0],r[d_r]);
132
for ( d_s = 2 ; d_s >= 0 && _ff_zero(s[d_s]) ; d_s-- );
133
if ( d_s == 2 && d_r == 2) {
134
_ff_mult(t1,s[1],r[2]);
135
_ff_mult(t2,r[1],s[2]);
136
_ff_sub(s[1],t1,t2);
137
_ff_mult(t1,s[0],r[2]);
138
_ff_mult(t2,r[0],s[2]);
139
_ff_sub(s[0],t1,t2);
140
for ( d_s = 1 ; d_s >= 0 && _ff_zero(s[d_s]) ; d_s-- );
141
}
142
if ( d_s < 0 ) { ff_poly_copy(h,d_h,r,d_r); return; }
143
if ( ! d_s ) { _ff_set_one(h[0]); *d_h = 0; return; }
144
if ( d_r == 2 && d_s == 1 ) {
145
_ff_mult(t1,r[1],s[1]);
146
_ff_mult(t2,s[0],r[2]);
147
_ff_sub(r[1],t1,t2);
148
ff_mult(r[0],r[0],s[1]);
149
for ( d_r = 1 ; d_r >= 0 && _ff_zero(r[d_r]) ; d_r-- );
150
if ( d_r < 0 ) { ff_poly_copy (h, d_h, s, d_s); return; }
151
if ( ! d_r ) { _ff_set_one(h[0]); *d_h = 0; return; }
152
}
153
if ( d_s == 2 && d_r == 1 ) {
154
_ff_mult(t1,s[1],r[1]);
155
_ff_mult(t2,r[0],s[2]);
156
_ff_sub(s[1],t1,t2);
157
ff_mult(s[0],s[0],r[1]);
158
for ( d_s = 1 ; d_s >= 0 && _ff_zero(s[d_s]) ; d_s-- );
159
if ( d_s < 0 ) { ff_poly_copy (h, d_h, r, d_r); return; }
160
if ( ! d_s ) { _ff_set_one(h[0]); *d_h = 0; return; }
161
}
162
// we must have d_r==d_s==1 at this point
163
_ff_mult(t1,r[0],s[1]);
164
_ff_mult(t2,s[0],r[1]);
165
_ff_sub(s[0],t2,t1);
166
if ( _ff_zero(s[0]) ) { ff_poly_copy(h,d_h,r,d_r); return; }
167
_ff_set_one(h[0]); *d_h = 0;
168
// worst case 13M+7A and some copies
169
return;
170
}
171
172
/*
173
Standard root finding algorithm specialized to degree 3, note that roots may be null if only the count is required
174
Returns the number of roots found. This function is superseded by ff_poly_roots_d3 below which solves the cubic with radicals.
175
*/
176
int ff_old_poly_roots_d3 (ff_t roots[3], ff_t f[4])
177
{
178
_ff_t_declare g[3], h[3], hh[3], a;
179
_ff_t_declare_reg t0, t1, t2;
180
int i, j, d_g, d_h, d_hh;
181
182
#if FF_WORDS > 1
183
err_printf ("ff_poly_factors_g1 only implemented for single word fields\n"); exit (0);
184
#endif
185
186
// this is wasteful if the caller already knows the discriminant!
187
_ff_square(t0,f[0]); _ff_set_ui(t1,27); _ff_mult(t2,t0,t1); // t2 = 27f1^2
188
_ff_square(t0,f[1]); _ff_mult(t1,t0,f[1]); _ff_x2(t1); _ff_x2(t1); // t1 = 4f0^3
189
_ff_addto(t1,t2);
190
if ( _ff_zero(t1) ) { // D=0 => curve has a repeated root
191
if ( roots ) {
192
if ( _ff_zero(f[1]) ) { // if f[1]=0 then f[0]=0 and we have a triple root at 0
193
_ff_set_zero(roots[0]); _ff_set_zero(roots[1]); _ff_set_zero(roots[2]);
194
} else { // otherwise return the distinct root is 3b/a and the double root is (-3b/(2a)
195
_ff_invert(t0,f[1]); // 1/a
196
_ff_add(t1,f[0],f[0]); _ff_addto(t1,f[0]); // 3b
197
_ff_mult(roots[0],t0,t1); // 3b/a
198
_ff_neg(t0,roots[0]);
199
_ff_mult(roots[1],t0,_ff_half); // -3b/(2a)
200
_ff_set(roots[2],roots[1]);
201
}
202
}
203
return 3;
204
}
205
206
// compute x^p mod f
207
ff3_zn_mod (g, _ff_p, f); // g = x^p mod f
208
_ff_dec(g[1]); // g = x^p-x mod f
209
for ( d_g = 2 ; d_g >= 0 && _ff_zero(g[d_g]) ; d_g-- );
210
ff_poly_gcd_red (h, &d_h, f, 3, g, d_g);
211
switch (d_h) {
212
case 0:
213
return 0;
214
case 1:
215
if ( roots ) {
216
_ff_invert(t1,h[1]);
217
_ff_mult(t2,t1,h[0]);
218
_ff_neg(roots[0], t2);
219
}
220
return 1;
221
case 2:
222
err_printf ("Impossible, non-singular cubic with 2 roots in ff_old_poly_roots_d3: "); ff_poly_print (f,3); ff_poly_print(h,d_h); exit (0);
223
case 3:
224
if ( ! roots ) return 3;
225
// We know there are three roots, and separate the roots using the standard probabilistic algorithm
226
// Pick a random a and compute g=(x+a)^((p-1)/2). Half of the elements in F_p* will be roots of g,
227
// and with probability 3/4 gcd(f,g) will have degree 1 or 2 allowing us to pick out a root
228
// As a minor optimization, we pick a=0 as our first "random" choice, since we can compute g faster in this case.
229
for ( i = 0 ; ; i++ ) {
230
if ( ! i ) {
231
ff3_zn_mod (g, (_ff_p-1)/2, f);
232
} else {
233
ff_random(&a);
234
ff_poly_xpan_mod_d3 (g, a, (_ff_p-1)/2, f);
235
}
236
_ff_dec (g[0]); // g = (x+a)^((p-1)/2) mod f
237
for ( d_g = 2 ; d_g >= 0 && _ff_zero(g[d_g]) ; d_g-- );
238
ff_poly_gcd_red (h, &d_h, f, 3, g, d_g);
239
if ( d_h > 0 && d_h < 3 ) break;
240
}
241
i = ff_poly_roots_d2(roots,h,d_h);
242
if ( i != d_h ) { err_printf ("poly_roots_d2 failed in ff_old_poly_roots_d3!"); exit (0); }
243
ff_poly_div (h, &d_h, g, &d_g, f, 3, h, d_h);
244
if ( d_g != -1 ) { err_printf ("inexact poly division in ff_old_poly_factors_g1!\n"); exit(0); }
245
j = ff_poly_roots_d2(roots+i,h,d_h);
246
if ( j != d_h ) { err_printf ("poly_roots_d2 failed in ff_old_poly_roots_d3!"); exit (0); }
247
if ( i+j != 3 ) { err_printf ("missing root in ff_old_poly_roots_d3!"); exit (0); }
248
return 3;
249
}
250
err_printf ("Unreachable code in ff_old_poly_factors_g1!\n");
251
exit (0);
252
}
253
254
255
256
257
/*
258
Finds the roots of a monic depreseed cubic x^3+ax+b over F_p.
259
If roots is NULL only the # of roots is determined, which can be significantly quicker (e.g. when D is not a QR)
260
The value d is optional. If non-null it must contain sqrt(-D/3) (this is used when computing 3-torsion)
261
262
Assumes _ff_p fits in an unsigned long
263
*/
264
int _ff_poly_roots_d3 (ff_t roots[3], ff_t f[4], ff_t *pd)
265
{
266
_ff_t_declare g[3], h[2], w[2], v[2], vk[2], d, t, r, s;
267
_ff_t_declare_reg a, t0, t1, t2;
268
int i, k, sts;
269
270
#if FF_WORDS > 1
271
err_printf ("ff_poly_factors_g1 only implemented for single word fields\n"); exit (0);
272
#endif
273
274
// handle p=3 separately
275
if ( _ff_p == 3 ) {
276
if ( _ff_zero(f[0]) ) { if ( roots ) _ff_set_zero(roots[0]); if ( _ff_one(f[1]) ) return 1; if ( roots ) { _ff_set_one(roots[1]); _ff_set(roots[2],_ff_negone); } return 3; }
277
if ( _ff_zero(f[1]) ) { if ( roots ) { _ff_neg(roots[0],f[0]); _ff_neg(roots[0],f[0]); _ff_neg(roots[0],f[0]); } return 3; }
278
if ( _ff_one(f[1]) ) { if ( roots ) _ff_set(roots[0],f[0]); return 1; } else return 0;
279
}
280
281
// f=x^3+ax
282
if ( _ff_zero(f[0]) ) {
283
_ff_neg(t,f[1]);
284
if ( ! roots ) { return (ff_residue(t)?3:1); }
285
_ff_set_zero(roots[0]);
286
if ( ! ff_sqrt(roots+1,&t) ) return 1;
287
_ff_neg(roots[2],roots[1]);
288
return 3;
289
}
290
291
// f = x^3+b
292
if ( _ff_zero(f[1]) ) {
293
_ff_neg(t,f[0]);
294
if ( _ff_p1mod3 ) {
295
if ( ! ff_cbrt(&d,&t) ) return 0;
296
if ( roots ) { _ff_set(roots[0],d); _ff_mult(roots[1], roots[0], _ff_cbrt_unity); _ff_mult(roots[2], roots[1], _ff_cbrt_unity); }
297
return 3;
298
} else {
299
if ( roots ) if ( ! ff_cbrt(roots,&t) ) { printf ("Impossible, ff_cbrt failed on input %ld when p=%ld is 2 mod 3\n", _ff_get_ui(t), _ff_p); exit(0); }
300
return 1;
301
}
302
}
303
304
if ( ! pd ) {
305
_ff_square(t0,f[0]); _ff_set_ui(t1,27); _ff_mult(t2,t0,t1); // t2 = 27f1^2
306
_ff_square(t0,f[1]); _ff_mult(t1,t0,f[1]); _ff_x2(t1); _ff_x2(t1); // t1 = 4f0^3
307
_ff_add(t0,t1,t2); _ff_mult(t,t0,_ff_third); // t = -D/3 = (27f1^2+4f0^3)/3
308
if ( _ff_zero(t) ) { // t=0 => D=0 => curve is singular
309
if ( roots ) { // distinct root is 3b/a (we know a!=0), and (-3b/(2a) is a double root).
310
_ff_invert(t0,f[1]); // 1/a
311
_ff_add(t1,f[0],f[0]); _ff_addto(t1,f[0]); // 3b
312
_ff_mult(roots[0],t0,t1); // 3b/a
313
_ff_neg(t0,roots[0]);
314
_ff_mult(roots[1],t0,_ff_half); // -3b/(2a)
315
_ff_set(roots[2],roots[1]);
316
}
317
return 3;
318
}
319
sts = ff_sqrt_ext(&d,&t); // use extended sqrt so we get a solution in F_p^2 if necessary
320
} else {
321
_ff_set(d,*pd);
322
}
323
_ff_mult(t2,_ff_half,_ff_third); // t2=1/6 (used later)
324
_ff_mult(t0, _ff_half, f[0]);
325
_ff_neg(a,t0); // a = -f[0]/2 (used later)
326
if ( _ff_p1mod3 ) {
327
if ( ! sts ) { // p=1mod3 => -1/3 is a QR => (t not a QR => D not a QR => f has an even # of factors (by Stickleberger), so 1 root
328
if ( ! roots ) return 1; // if all the caller wants is the number of roots, we are done
329
_ff_square(t0,t2); // 1/36
330
_ff_mult(s,t,t0); // s = -D/108 is a not a QR, we will now work in the field F_p^2 = F_p[z]/(z^2-s) to compute (z+a)^1/3
331
// compute (z+t1)^((p+1)*m) in F_p^2, this will be in the 3-Sylow subgroup of F_p^2, necessarily in F_p (here p=3^e*m+1, m not divisible by 3)
332
// in the process, we compute g=(z+1)^((m-i)/3) where i is congruent to m mod 3 and h=(z+a)^m
333
if ( _ff_p3_m1mod3 ) {
334
ff_poly_xpan_mod_d2(g,a,(_ff_p3_m-1)/3,&s); // g = (z+a)^((m-1)/3)
335
ff2_square_s(h,g,s); ff2_mult_s (h,h,g,s); // h = g^3 = (z+a)^(m-1)
336
ff2_mult_zpa_s(h,h,a,s); // h = g^3*(z+a) = (z+a)^m
337
} else {
338
ff_poly_xpan_mod_d2(g,a,(_ff_p3_m-2)/3,&s); // g = (z+a)^((m-2)/3)
339
ff2_square_s (h, g, s); ff2_mult_s (h, h, g, s); // h = g^3 = (z+a)^(m-2)
340
_ff_square(t0,a); _ff_add(w[1],a,a); _ff_add(w[0],t0,s); // w = (z+a)^2
341
ff2_mult_s (h,h,w,s); // h = g^3*(z+a)^2 = (z+a)^m
342
}
343
ff2_norm_s (&t,h,s); // norm(h)=((z+a)^(m*(p+1)) is in the 3-Sylow of F_p
344
if ( ! ff_3Sylow_invcbrt(&r,&t) ) { // (z+a) is not a cubic residue - this should be impossible
345
printf ("(z+%ld)^%ld = ", _ff_get_ui(a), _ff_p3_m); ff_poly_print(h,1);
346
printf ("norm(h)=%ld is not a cube in F_%ld\n", _ff_get_ui(t), _ff_p);
347
printf ("z+%ld is not a CR in F_p[z]/(z^2-%ld)\n", _ff_get_ui(a), _ff_get_ui(s));
348
ff_poly_print(f,3);
349
exit (0);
350
}
351
// we now know (z+a)^-((p+1)m)/3
352
ff2_norm_s(&t,g,s); // t = norm(g) = g^(p+1)
353
ff2_exp_ui_s(h,h,(_ff_p+1)/(3*_ff_p3_m),s); // compute h^(3^(e-1))
354
if ( !_ff_p3_m1mod3 ) {
355
// We need to construct (z+a)^n where n = (2(p+1)m+1)/3 = 2(p+1)(m-2)/3 + 2(2p+1)/3 + 1 = 2(p+1)(m-2)/3 + 2(2*3^(e-1)*m+1) + 1
356
// We then have (z+a)^n = norm(g)^2*((h^(3^(e-1))^2*(z+a))^2*(z+a) (note that we also need to square r to get (z+a)^(-2(p+1)m/3) here)
357
ff_square(t,t); ff_square(r,r);
358
ff2_square_s(h,h,s); ff2_mult_zpa_s (h,h,a,s); ff2_square_s(h,h,s);
359
}// when m is 1 mod 3:
360
// We need to construct (z+a)^n where n = ((p+1)m+1)/3 = (p+1)(m-1)/3 + (p-1)/3 + 1 = (p+1)(m-1)/3 + 3^(e-1)*m + 1
361
// We then have (z+a)^n = norm(g)*h^(3^(e-1))*(z+a)
362
ff2_scalar_mult(g,t,h);
363
ff2_mult_zpa_s(g,g,a,s); // g = (z+a)^n
364
ff2_scalar_mult(g,r,g); // g = (z+a)^(1/3)
365
// We know that (g-f1/(3g)) is a root of f. To get others we multiply by cube roots of unity (which are conveniently in F_p).
366
// We use this to find the root in F_p (there must be exactly one, since we know f has 2 factors by Stickleberger)
367
// The multiple h of g yielding a root in F_p must have the property that -f1/3=norm(h), which we can test without inverting, and we then have tr(h) as our root
368
_ff_mult(t0,f[1],_ff_third);
369
ff_negate(t0);
370
for ( i = 0 ; i < 3 ; i++ ) { ff2_norm_s(&t,g,s); if ( _ff_equal(t,t0) ) break; ff2_scalar_mult(g,_ff_cbrt_unity,g); }
371
if ( i == 3 ) {
372
printf ("g=%ldz+%ld is a cbrt of (z+%ld) in F_p[z]/(z^2-%ld), but couldn't find an F_%ld root of 2-factor f = ", _ff_get_ui(g[1]), _ff_get_ui(g[0]),_ff_get_ui(a), _ff_get_ui(s), _ff_p);
373
ff_poly_print(f,3);
374
printf ("t0=%ld, cube root of unity is %ld\n", _ff_get_ui(t0), _ff_get_ui(_ff_cbrt_unity));
375
exit (0);
376
}
377
ff2_trace(roots,g);
378
return 1;
379
} else { // in this case have a sqrt of t and we know that f has 1 or 3 factors
380
ff_mult(d,d,t2); // d = sqrt(t)/6 = sqrt(f1^3/27+f0^2/4) (this is the square root in Cardano's formula for depressed cubics)
381
_ff_add(t,a,d); // t = -f0/2 + d (first cube to check)
382
if ( ! ff_cbrt(&r,&t) ) return 0;
383
_ff_sub(t,a,d); // t = -f0/2 - d (second cube to check - we really ought to be able to compute this from the first one)
384
if ( ! ff_cbrt(&s,&t) ) { printf("Impossible cube root failure in ff_poly_roots_d3 for p=%ld, f = ", _ff_p); ff_poly_print(f,3); exit(0); }
385
if ( ! roots ) return 3; // we know the roots exist but we don't need to find them
386
// We have to cycle through the choices of one of our cube roots to verify that we have the correct s and t, unforutnately this means we may need to evaluate f twice
387
_ff_add(t,r,s);
388
_ff_square(t0,t); _ff_addto(t0,f[1]); _ff_multadd(t1,t0,t,f[0]);
389
if ( ! _ff_zero(t1) ) {
390
ff_mult(s,s,_ff_cbrt_unity); // try second s
391
_ff_add(t,r,s);
392
_ff_square(t0,t); _ff_addto(t0,f[1]); _ff_multadd(t1,t0,t,f[0]);
393
if ( ! _ff_zero(t1) ) ff_mult(s,s,_ff_cbrt_unity); // must be the third s
394
}
395
_ff_add(roots[0],r,s); // third choice must work since first two didn't
396
_ff_square(t0,_ff_cbrt_unity); // could cache this in ff.c
397
_ff_mult(t1,r,_ff_cbrt_unity);
398
_ff_mult(t2,s,t0);
399
_ff_add(roots[1],t1,t2); // second root is wr+w^2s where w is a primitive cbrt of unity
400
_ff_mult(t1,r,t0);
401
_ff_mult(t2,s,_ff_cbrt_unity);
402
_ff_add(roots[2],t1,t2); // third root is wr^2+ws
403
return 3;
404
}
405
} else { // p=2mod3
406
if ( sts ) { // p=2mod3 => -1/3 not a QR => (-D/3 a QR => D not a QR => f has an even # of factors (by Stickleberger))
407
if ( ! roots ) return 1; // if we only need the # of factors, we are done.
408
ff_mult(d,d,t2); // d = sqrt(t)/6 = sqrt(f1^3/27+f0^2/4) (this is the square root in Cardano's formula for depressed cubics)
409
_ff_add(t,a,d); // t = -f0/2 + d (first cube)
410
ff_cbrt(&r,&t); // p is 2 mod 3 so we know there is exactly 1 cube root
411
_ff_sub(t,a,d); // t = -f0/2 - d (second cube)
412
ff_cbrt(&s,&t);
413
_ff_add(t,r,s); // there was no choice of cuberoots (and changing sign of d doesn't change r+s=t), so this has got to be the root
414
_ff_set(roots[0],t);
415
return 1;
416
} else { // in this case t = -D/3 is not a QR, D=d^2 is a QR, and we know that f has 1 or 3 factors
417
// for p=2mod3, handle roots=NULL separately, since we can be much more efficient
418
if ( ! roots ) {
419
_ff_square(t0,t2); // 1/36
420
_ff_mult(s,t,t0); // s = -D/108 is a not a QR, we will now work in the field F_p^2 = F_p[z]/(z^2-s)
421
ff_poly_xpan_mod_d2(g,a,(_ff_p+1)/3,&s); // (z+a)^((p+1)/3) is in F_p iff (z+a)^((p+1)(p-1)/3)=1 iff (z+a) is a cubic residue
422
return ( _ff_zero(g[1]) ? 3 : 0 );
423
}
424
425
// in this case we will be slightly less efficient and work in the standard basis for F_p^2=F_p[z](z^2-s) because we
426
// want don't want to have to compute a new generator of the Sylow 3-subgroup in F_p^2 every time
427
_ff_set(v[0],a); _ff_mult(v[1],d,t2); // v is the element of F_p^2 (in the standard basis) whose cuberoot we need
428
// this code is slightly inefficient, but we are trying to compute h=v^((p+1)/3) as quickly as possible (if h is not in F_p, f is irreducible and we are done)
429
// while also saving values we will need to compute a cube root of v when f splits
430
if ( _ff_p3_m1mod3 ){ k=1; ff2_set(vk,v); }
431
else { k=2; ff2_square(vk,v); } // vk = v^k
432
ff2_exp_ui(g,v,(_ff_p3_m-k)/3); // g = v^((m-k)/3)
433
ff2_square(h,g); ff2_mult(h,h,g); // h = v^(m-k)
434
ff2_mult(h,h,vk); // h = v^m
435
if ( _ff_p3_e > 1 ) {
436
ff2_exp_ui(w,h,(_ff_p+1)/(3*_ff_p3_m)-1); // w = v^((3^(e-1)-1)m) = v^(((p+1)-3m)/3)
437
_ff_mult(t1,w[1],h[0]); _ff_mult(t2,w[0],h[1]); // compute z coefficient of w*h
438
_ff_add(t0,t1,t2);
439
if ( ! _ff_zero(t0) ) return 0; // if w*h=v^((p+1)/3) is not in F_p, then v^((p^2-1)/3)=h^(p-1) is not 1 and v is not a cube, and o.w. it is
440
} else {
441
if ( ! _ff_zero(h[1]) ) return 0; // here h=v^m=v^((p+1)/3) since e=1
442
}
443
// We now know v is a cube and there are three roots
444
if ( ! roots ) return 3;
445
ff2_norm(&t,g); // t = N(g)=g^(p+1)=v^((p+1)(m-k)/3) where k=m mod 3
446
ff2_scalar_mult(g,t,g); // g = v^((p+2)(m-k)/3)
447
// When e==1, life is pretty simple: we know v is a cube, and there is only one cube in the Sylow 3-subgroup, name 1, and its inverse cube root is 1
448
// It follows that we can assume v^((p-1)m/3)=v^((p-2)m/3)=1.
449
if ( _ff_p3_e == 1 ) {
450
if ( k==2 ) {
451
ff2_mult(g,g,h); // g = v^((p+2)(m-2)/3+m)=v^((pm+2m-2p-4+3m)/3)=v^((pm+2m-2(3m-1)-4+3m)/3)=v^(((p-1)m-2)/3)
452
} else { // note that e=1 and k=1 implies g=v^((p+1)(m-1)/3) = v^(((p-1)m-1)/3)
453
ff2_square(g,g); // g=v^((2(p-1)m-2)/3)
454
}
455
ff2_mult(g,g,v); // g = v^(((3-k)(p-1)m+1)/3) = v^(1/3}
456
ff2_trace(roots,g); // Every choice of cube root of v corresponds to a root of f, so we can just take the trace
457
ff2_setup_cbrt(); // we still need to get the cube root of unity (this is annoying)
458
ff2_mult(g,g,_ff2_cbrt_unity);
459
ff2_trace(roots+1,g);
460
ff2_mult(g,g,_ff2_cbrt_unity);
461
ff2_trace(roots+2,g);
462
return 3;
463
}
464
// now we have e>1
465
if ( k==2 ) { ff2_square(w,w); ff2_mult(w,w,h); } // w = v^((k*3^(e-1)-1)m)
466
ff2_mult(g,g,w); // g = v^(((p-1)m-k)/3) (note that (p+2)(m-k)/3 + (k*3^(e-1)-1)m = (pm+2m-kp-2k+k3^em-3m)/3
467
// = (pm-m-kp-2k+k(p+1))/3=((p-1)m-k)/3
468
ff2_square(h,g); ff2_mult(h,h,g); // h = v^((p-1)m-k)
469
ff2_mult(h,h,vk); // h = v^((p-1)m) is in the 3-Sylow subgroup, and it must be a cubic residue
470
if ( ! ff2_3Sylow_invcbrt(h,h) ) { // h = v^(-(p-1)m)/3) if e=1
471
printf ("Impossible situation in ff_poly_factors_g1 with p=%ld,m=%ld: v^((p-1)m)=(%ldz+%ld) is not a cube in 3-Sylow of F_p^2=F_p[z]/(z^2-%ld) ",
472
_ff_p, _ff_p3_m, _ff_get_ui(a), _ff_get_ui(h[1]), _ff_get_ui(h[0]), _ff_get_ui(s));
473
ff_poly_print(f,3);
474
exit (0);
475
}
476
if ( k==1 ) {
477
ff2_square(h,h); // h = v^(-2(p-1)m/3)
478
ff2_square(g,g); // g = v^(2((p-1)m-1)/3)
479
ff2_mult(g,g,v); // g = v^(2((p-1)m-1)/3+1) = v^((2(p-1)m+1)/3
480
} else {
481
ff2_mult(g,g,v); // g = v^((p-1)m-2)/3+1 = v^((p-1)m+1/3
482
}
483
ff2_mult(g,g,h); // g = v^(1/3)
484
ff2_trace(roots,g); // Every choice of cube root of v corresponds to a root of f, so we can just take the trace of each
485
ff2_mult(g,g,_ff2_cbrt_unity);
486
ff2_trace(roots+1,g);
487
ff2_mult(g,g,_ff2_cbrt_unity);
488
ff2_trace(roots+2,g);
489
return 3;
490
}
491
}
492
puts ("Unreachable code in ff_poly_factors_g1!");
493
exit (0);
494
}
495
496
// Compute roots of poly of degree 2, or less
497
int ff_poly_roots_d2 (ff_t r[2], ff_t f[3], int d_f)
498
{
499
ff_t t1,t2,D;
500
501
switch (d_f) {
502
case 0: return 0;
503
case 1:
504
if ( _ff_one(f[1]) ) { _ff_set_one(t1); } else { _ff_invert(t1,f[1]); }
505
ff_negate(t1); _ff_mult(r[0],t1,f[0]); return 1;
506
case 2:
507
_ff_square(t1,f[1]);
508
_ff_mult(t2,f[2],f[0]);
509
_ff_x2(t2); _ff_x2(t2);
510
_ff_subfrom(t1,t2);
511
if ( ! ff_sqrt(&D,&t1) ) return 0;
512
if ( _ff_one(f[2]) ) {
513
_ff_set(t1,_ff_half);
514
} else {
515
_ff_add(t2,f[2],f[2]);
516
_ff_invert(t1,t2);
517
}
518
_ff_sub(t2,D,f[1]);
519
_ff_mult(r[0],t1,t2);
520
ff_negate(D);
521
_ff_sub(t2,D,f[1]);
522
_ff_mult(r[1],t1,t2);
523
return 2;
524
}
525
}
526
527
528
/*
529
ff_poly_g1_3tor computes the size of the 3-torsion subgroup of y^2=f(x) = x^3+ax+b.
530
If the 3-torsion is trivial, it will instead attempt to determine the 3-torsion subgroup of the twist, and if it is non-trivial
531
will return a negative value whose absolute value is the size of the 3-torsion subgroup in the twist.
532
533
This means that a return value of 1 indicates that neither the curve nor its twist have non-trivial 3-torsion.
534
In this situation, if p=1mod3, then a_p=0mod3 and the group order must be congruent to 2 mod 3.
535
536
If the curve is singular (discriminant zero), 0 is returned.
537
*/
538
int ff_poly_g1_3tor (ff_t f[4])
539
{
540
ff_t div[5],r[4],g[4],t0,t1, t2, d;
541
int j, k, tor;
542
543
#if FF_WORDS > 1
544
err_printf ("ff_poly_g1_3tor only supports single word finite fields\n"); exit (0);
545
#endif
546
547
if ( _ff_p == 3 ) { if ( _ff_zero(f[1]) ) return 0; else return 1; } // no curve y^2=x^3+ax+b over F_3 has 3 torsion
548
549
_ff_square(t1,f[0]); _ff_set_ui(t2,27); _ff_mult(t0,t1,t2); // t0 = 27f0^2
550
_ff_square(t2,f[1]); _ff_mult(t1,t2,f[1]); _ff_x2(t1); _ff_x2(t1); // t1 = 4f1^3, t2 = f1^2
551
_ff_add(d, t0, t1); // d = 4f1^3+27f0^2 = -discriminant of f
552
if ( _ff_zero(d) ) return 0;
553
_ff_add(t0,_ff_third, _ff_third); _ff_x2(t0); _ff_square(t1,t0); // t0 = (4/3)^2 = 16/9
554
ff_mult(d,d,t0); // d^2 = 256/81*D_f^2 = -D/3 where D is discriminant of the 3-div poly
555
556
ff_negate(t2);
557
ff_mult(div[0],_ff_third,t2);
558
_ff_add(div[1],f[0],f[0]); _ff_x2(div[1]);
559
_ff_add(div[2],f[1],f[1]);
560
_ff_set_zero(div[3]);
561
_ff_set_one(div[4]);
562
// 3-division poly is x^4 + 2f1x^2 +4f0x - f1^2/3
563
564
// handle p=2mod3 ourselves since we can easily save some time. note that in this case, either both the curve and its twist have non-trivial 3-torsion, or neither do.
565
// we also know that the 3-rank is 1 and there can be at most one root of the 3-division poly (2 pts of order 3).
566
if ( ! _ff_p1mod3 ) {
567
if ( _ff_zero(div[1]) ) { // biquadratic case is handled quickly by ff_poly_roots
568
k = _ff_poly_roots_d4(r,div,&d);
569
return ( k ? 3 : 1);
570
} else {
571
ff_depressed_cubic_resolvent(&t0,g,div);
572
k = _ff_poly_roots_d3(r,g,&d);
573
if ( k != 1 ) { printf ("p=%ld is 2mod3 but cubic resolvent of 3div poly has k=%d!=1 roots!\n f=", _ff_p, k); ff_poly_print(f,3); exit(0); }
574
}
575
_ff_sub(t1,t0,r[0]);
576
return ( ff_residue(t1) ? 3 : 1);
577
}
578
579
// Now handle p=1mod3 by computing the roots of the 3-division polynomial (we actually only need to know the number of roots and the value of one of them)
580
k = _ff_poly_roots_d4(r,div,&d);
581
if ( ! k ) return 1;
582
_ff_square(t1,r[0]); _ff_addto(t1,f[1]); _ff_mult(t2,t1,r[0]); _ff_addto(t2,f[0]); // t2 = f(r[0])
583
if ( _ff_zero(t2) ) { printf ("impossible - non-id point (%lu,0) with 2-tor and 3-tor!\n", _ff_get_ui(r[j])); ff_poly_print(f,3); ff_poly_print(div,4); exit(0); }
584
585
// We now rely on the fact that the 3-division poly of the curve and its twist are intimately related. They have the same factorization pattern,
586
// and either all the roots of the 3-division poly correspond to points on the curve, or none of them do, so we only need to check one.
587
// In the latter case, it must be that all the roots of the 3-division poly of the twist correspond to points on the twist.
588
if ( ff_residue(t2) ) return ( k==1?3:9); else return (k==1?-3:-9);
589
}
590
591
#if SMALLJAC_GENUS==2
592
int ff_poly_g2_3tor(ff_t f[6])
593
{
594
ff_t g[POLY_G2TOR3_DEGREE+1], h[POLY_G2TOR3_DEGREE], r[POLY_G2TOR3_DEGREE];
595
int k, d_h, d_r;
596
597
ff_poly_g2tor3_modpoly(g,f);
598
599
// first make sure modular poly is square free
600
ff_poly_derivative (h,&d_h,g,POLY_G2TOR3_DEGREE);
601
ff_poly_gcd (r,&d_r,g,POLY_G2TOR3_DEGREE,h,d_h);
602
if ( d_r > 0 ) return 0;
603
604
k = ff_poly_roots(g,40);
605
if ( ! k ) return 1;
606
if ( k ==2 ||k==5 ||k==8 ) return 3;
607
return 0;
608
}
609
#endif
610
611
/*
612
Given a point (x,y) on y^2=f(x)=x^3+f1x+f0, replaces (x,y) with a point (u,v) s.t. (u,v) composed with itself yields (x,y) or its inverse (we don't distinguish these cases!)
613
Returns 1 for success, 0 if no such point exists.
614
*/
615
int ff_poly_g1_halve (ff_t *x, ff_t *y, ff_t f[4])
616
{
617
ff_t g[5], r[4];
618
register ff_t t0, t1, t2;
619
int i, k;
620
621
// construct g(z) = z^4 - 2(f1+3x^2)z^2 - 8y^2z + f1^2 - 3x(y^2+f1x + 3f0). If (u,v)*(u,v)=(x,y) then g(u-x)=0 (note translation by x to kill degree 3 coeff of g).
622
_ff_set_one(g[4]); _ff_set_zero(g[3]);
623
_ff_square(t0,*x); _ff_add(t1,t0,t0); _ff_addto(t1,t0); _ff_addto(t1,f[1]); _ff_x2(t1); _ff_neg(g[2],t1); // g[2] = -2(f1+3x^2)
624
_ff_add(t0,*y,*y); _ff_square(t1,t0); _ff_x2(t1); _ff_neg(g[1],t1); // g[1] = -8y^2
625
_ff_square(t0,*y); _ff_mult(t1,f[1],*x); _ff_addto(t0,t1); _ff_add(t1,f[0],f[0]); _ff_addto(t1,f[0]); _ff_addto(t0,t1); // t0 = y^2 + f1x + 3f0
626
_ff_add(t1,*x,*x); _ff_addto(t1,*x); _ff_mult(t2,t0,t1); _ff_square(t1,f[1]); _ff_sub(g[0],t1,t2); // g[0] = f1^2 - 3x(y^2+f1x+3f0)
627
//printf ("(%d,%d) havling poly: ", _ff_get_ui(*x), _ff_get_ui(*y)); ff_poly_print(g,4);
628
629
k = ff_poly_roots_d4 (r, g);
630
//printf ("%d roots\n", k);
631
for ( i = 0 ; i < k ; i++ ) {
632
_ff_add(t2,r[i],*x);
633
_ff_square(t0,t2); _ff_addto(t0,f[1]); _ff_mult(t1,t0,t2); _ff_add(g[1],t1,f[0]); // g[1] = f(r[i]+x)
634
if ( ff_sqrt(g,g+1) ) break;
635
}
636
if ( i == k ) return 0;
637
//printf ("found (%d,%d)^2 = (%d,%d)\n", _ff_get_ui(t2), _ff_get_ui(g[0]), _ff_get_ui(*x), _ff_get_ui(*y));
638
_ff_set(*x,t2); _ff_set(*y,*g);
639
return 1;
640
}
641
642
643
/*
644
Returns the size of the 2-Sylow subgroup of the elliptic curve y^2=f(x)=x^3+f1x+f0, provided it is cyclic.
645
Otherwise the return value is -1, which indicates that Z/2Z x Z/2Z is a subgroup (and the group order is divisible by 4)
646
(we could compute the entire 2-Sylow in this case, but it would be much more time consuming)
647
*/
648
int ff_poly_g1_2Sylow (ff_t f[4])
649
{
650
ff_t r[3];
651
ff_t x, y;
652
int n;
653
654
n = ff_poly_roots_d3(r,f);
655
if ( ! n ) return 1;
656
if ( n > 1) return 0;
657
658
_ff_set(x,r[0]); _ff_set_zero(y);
659
for ( n = 2 ; ff_poly_g1_halve (&x, &y, f) ; n<<= 1 );
660
return n;
661
}
662
663
664
/*
665
Computes the order and rank of the 4-torsion subgroup of the elliptic curve y^2=f(x)=x^3+f1x+f0.
666
Set o to the order and returns d to indicate the group Z/dZ x Z/eZ where d*e = o, d divides e (and may be 1)
667
668
If flag8 is set, also check for Z/8Z (for a random curve with full Galois image in GL(2,Z/8Z), these occurs with probability 1/8).
669
We could also check for Z/2ZxZ/8Z, or even Z/4ZxZ/8Z or Z/8ZxZ/8Z (these occur with prob. 3/64, 3/512 and 1/1536 resp);
670
671
The latter two are rare enough not to be worth the trouble, but Z/2xZ/8Z might be worth doing. Note, however, that Z/2xZ/4Z
672
contains 4 points of order 4 (two pairs of inverses) and we would need to compute two of these.
673
*/
674
int ff_poly_g1_4tor (int *o, ff_t f[4], int flag8)
675
{
676
ff_t r[3];
677
ff_t u, v, v2;
678
register ff_t t0,t1,t2;
679
register int i, n;
680
681
n = ff_poly_roots_d3(r,f);
682
for ( i = 0 ; i < n ; i++ ) {
683
_ff_square(t0,r[i]); _ff_add(t1,t0,t0); _ff_addto(t1,t0); _ff_add(u,t1,f[1]); // u = 3x^2+a where (x,0) is a 2-torsion point (x=r[i] was a root of f)
684
if ( ff_sqrt(&v,&u) ) { // v is a root of the translated halving poly
685
_ff_add(t2,r[i],v); // t2=x+v is a root of the halving poly
686
_ff_square(t0,t2); _ff_addto(t0,f[1]); _ff_mult(t1,t0,t2); _ff_add(u,t1,f[0]); // u = f(t2)
687
if ( ff_sqrt(&v,&u) ) break; // if successful, (t2,v) is a point of order 4
688
_ff_sub(t2,r[i],v); // t2=x-u is also a root of the halving poly
689
_ff_square(t0,t2); _ff_addto(t0,f[1]); _ff_mult(t1,t0,t2); _ff_add(u,t1,f[0]); // u = f(t2)
690
if ( ff_sqrt(&v,&u) ) break; // if successful, (t2,v) is a point of order 4
691
}
692
}
693
if ( i == n ) { *o = n+1; return (n==3?2:1); } // no pts of order 4, so 4-torsion subgroup = 2-torsion subgroup
694
//printf ("%ld: found point(%ld,%ld) with order 4 (i=%d,n=%d) on curve y^2 = ", _ff_p, _ff_get_ui(t2), _ff_get_ui(v), i, n); ff_poly_print(f,3);
695
if ( n==1 ) { // rank 1 case, with a point of order 4
696
if ( ! flag8 ) { *o = 4; return 1; }
697
//printf("%ld: flag8 set (Z/4Z), attempting to halve order 4 point (%ld,%ld) on curve y^2 = ", _ff_p, _ff_get_ui(t2), _ff_get_ui(v)); ff_poly_print(f,3);
698
_ff_set(u,t2);
699
*o = ( ff_poly_g1_halve(&u,&v,f) ? 8 : 4 ); // check for a point of order 8 (this takes us outside the 4-torsion subgroup)
700
//if ( *o == 8 ) puts ("Succeeded"); else puts ("Failed");
701
return 1;
702
}
703
if ( i > 0 ) { *o = 8; return 2; }
704
705
// We are here if the 2-rank is 2 and we succesfully halved the first point of order 2 that we tried. We need to check one more to distinguish Z/2Z x Z/4Z from Z/4Z x Z/4Z.
706
// Don't change v or t2 from above, we may need them below
707
i = 1;
708
_ff_square(t0,r[i]); _ff_add(t1,t0,t0); _ff_addto(t1,t0); _ff_add(u,t1,f[1]); // u = 3x^2+a where (x,0) is a 2-torsion point (x=r[0] was a root of f)
709
if ( ff_sqrt(&v2,&u) ) { // v2 is a root of the translated halving poly
710
_ff_add(t1,r[i],v2); // t1is a root of the halving poly
711
_ff_square(t0,t1); _ff_addto(t0,f[1]); ff_mult(t1,t1,t0); _ff_addto(t1,f[0]); // t1 = f(t1)
712
if ( ff_residue(t1) ) { *o = 16; return 4; }
713
}
714
*o = 8;
715
return 2;
716
}
717
718
/*
719
Computes the roots of f(x)=x^4+ax^2+bx+c, solving by radicals.
720
Handles singular f, will return repeated roots correctly.
721
722
Currently r is required. We could easily extend to make it optional and only return a count.
723
The parameter pd is optional. If specified it points to sqrt(-D/3) (used for 3-torsion).
724
*/
725
int _ff_poly_roots_d4 (ff_t r[4], ff_t f[5], ff_t *pd)
726
{
727
ff_t g[4], h[3], s[3], x, y, t;
728
register ff_t t0,t1,t2,w1,w2;
729
register int i,k,res;
730
731
if ( _ff_zero(f[1]) ) {
732
// f = x^4+f2x^2+f0 = g(x^2) where g is monic quadratic
733
_ff_set_one(g[2]); _ff_set(g[1],f[2]); _ff_set(g[0],f[0]);
734
k = ff_poly_roots_d2(s,g,2);
735
if ( ! k ) return 0;
736
if ( ff_sqrt(r,s) ) {
737
_ff_neg(r[1],r[0]);
738
// could optimize for double root here, but why bother
739
if ( ff_sqrt(r+2,s+1) ) { _ff_neg(r[3],r[2]); return 4; }
740
return 2;
741
}
742
if ( ff_sqrt(r,s+1) ) { _ff_neg(r[1],r[0]); return 2; }
743
return 0;
744
}
745
ff_depressed_cubic_resolvent(&t,g,f);
746
k = _ff_poly_roots_d3(r,g,pd); // put the roots of g into r for now, they will get replaced by roots of f below.
747
if ( k ) {
748
res = 0;
749
for ( i = 0 ; i < k ; i++ ) {
750
// ff_poly_eval (&y, g, 3, r+i);
751
// if ( ! _ff_zero(y) ) { printf ("(%d roots) %ld is not a root of ", k, _ff_get_ui(r[i])); ff_poly_print(g,3); exit(0); }
752
_ff_sub(x,r[i],t);
753
ff_negate(x);
754
if ( ! ff_sqrt(s+i,&x) ) break;
755
if ( ++res == 2 ) break;
756
}
757
if ( res==2 ) {
758
_ff_mult(t1,s[0],s[1]);
759
ff_negate(t1);
760
_ff_invert(t0,t1);
761
_ff_mult(s[2],t0,f[1]);
762
_ff_square(t1,s[2]);
763
_ff_sub(x,r[2],t);
764
ff_negate(x);
765
if ( ! _ff_equal(t1,x) ) { printf("%ld: division failed s[0]=%d, s[1]=%d, d=%d\n", _ff_p, _ff_get_ui(s[0]), _ff_get_ui(s[1]), _ff_get_ui(t0)); ff_poly_print(f,4); ff_poly_print(g,3); exit(0); }
766
_ff_add(t1,s[0],s[1]); _ff_addto(t1,s[2]); ff_mult(t1,t1,_ff_half);
767
_ff_set(r[0],t1);
768
ff_negate(s[0]); ff_negate(s[2]);
769
_ff_add(t1,s[0],s[1]); _ff_addto(t1,s[2]); ff_mult(t1,t1,_ff_half);
770
_ff_set(r[1],t1);
771
ff_negate(s[0]); ff_negate(s[1]);
772
_ff_add(t1,s[0],s[1]); _ff_addto(t1,s[2]); ff_mult(t1,t1,_ff_half);
773
_ff_set(r[2],t1);
774
ff_negate(s[0]); ff_negate(s[2]);
775
_ff_add(t1,s[0],s[1]); _ff_addto(t1,s[2]); ff_mult(t1,t1,_ff_half);
776
_ff_set(r[3],t1);
777
return 4;
778
} else if ( k==1 && res==1 ) {
779
_ff_set_one(h[2]);
780
_ff_set(h[1],s[0]);
781
_ff_square(t1,s[0]); // t1 = s^2
782
_ff_add(w1,t1,f[2]);
783
ff_mult(w1,w1,_ff_half); // w1 = (s^2+f2)/2
784
_ff_square(t0,f[2]);
785
_ff_add(t2,f[0], f[0]); _ff_x2(t2); // t2 = 4f0
786
_ff_subfrom(t0,t2); // t0 = f2^2-4f0
787
_ff_add(t2,f[2],f[2]); // t2 = 2f2
788
_ff_add(w2,t1,t2); // w2 = s^2+2f2
789
ff_mult(w2,w2,t1); // w2 = s^4+2f2s^2
790
_ff_addto(w2,t0);
791
ff_mult(w2,w2,s[0]); // w2 = s(s^4+2f2s^2+f2^2-4f0)
792
_ff_invert(t0,f[1]);
793
ff_mult(t1,_ff_half,t0);
794
ff_mult(w2,w2,t1);
795
_ff_sub(h[0],w1,w2);
796
if ( ff_poly_roots_d2(r,h,2) != 2 ) {
797
ff_negate(h[1]);
798
_ff_add(h[0],w1,w2);
799
if ( ff_poly_roots_d2(r,h,2) != 2 ) { printf("%ld: fail 2-1-1 split with s=%ld\n", _ff_p, _ff_get_ui(s[0])); ff_poly_print(f,4); ff_poly_print(g,3); ff_poly_print(h,2); exit(0); }
800
}
801
return 2;
802
} else {
803
return 0;
804
}
805
} else {
806
// this is the annoying case, we know we have a 1,3 split, but getting the value of the root is expensive
807
// we need sqrt(-z) in F_p[z]/h(z) where h(z) is the undepressed resolvent
808
// so we want sqrt(t-z) in F_p[z]/g(z) which is sqrt(z+t) in F_p[z]/k(z) where k(z)=-g(-z)=z^3+g1z-g0z. In terms of z^3-rz-s, r=-g1 and s=g0
809
_ff_neg(t1,g[1]);
810
if ( ff3_trsqrt_zpa_mod_rs(&x,t,t1,g[0]) ) res++; else { printf ("%ld: impossible failure of ff3_trsqrt_zpa_mod_rs in ff_poly_roots_d4!", _ff_p); ff_poly_print(f,4); ff_poly_print(g,3); exit(0); }
811
ff_mult(x,x,_ff_half);
812
ff_poly_eval(&y,f,4,&x);
813
if ( ! _ff_zero(y) ) ff_negate(x);
814
_ff_set(r[0],x);
815
return 1;
816
}
817
puts ("Unreachable code in ff_poly_factors_g1!");
818
exit (0);
819
}
820
821
/*
822
Hard-wired code for computing the roots of f(x)=x^4+ax^2+bx+c. This is used
823
primarily for testing for 3-torsion by computing roots of the 3-division poly (made monic).
824
If f is singular, it only returns distinct roots.
825
826
It is generally very fast, although when f splits completely more work is required.
827
*/
828
int ff_old_poly_roots_d4 (ff_t r[4], ff_t f[5])
829
{
830
_ff_t_declare g[5], h[5], A[5], t, a;
831
int i,j,k,d_g, d_h, d_A;
832
833
#if FF_WORDS > 1
834
err_printf ("ff_poly_roots_d4 only implemented for single word fields\n"); exit (0);
835
#endif
836
ff_poly_xn_mod_d4 (g, _ff_p, f); // compute x^p mod f
837
_ff_set_one(t);
838
_ff_subfrom (g[1],t); // x^p-x mod f
839
for ( d_g = 3 ; d_g >= 0 && _ff_zero(g[d_g]) ; d_g-- );
840
ff_poly_gcd (h, &d_h, g, d_g, f, 4);
841
switch (d_h) {
842
case 0: return 0;
843
case 1:
844
case 2: return ff_poly_roots_d2 (r, h, d_h);
845
case 3:
846
case 4:
847
ff_poly_monic (A, &d_A, h, d_h);
848
i = j = k = 0;
849
_ff_set_zero(t);
850
do {
851
// if A was just made cubic, it may need to be translated to make x^2 coeff zero
852
// if this is done, we remember the translation so we can shift roots back at the end
853
if ( d_A == 3 && ! _ff_zero(A[2]) ) { j = i; ff_depress_cubic(&t, A); }
854
if ( k ) ff_random(&a); // use a=0 first time through
855
if ( d_A == 4 ) {
856
if ( !k ) {
857
ff_poly_xn_mod_d4 (g, (_ff_p-1)/2, A);
858
} else {
859
ff_poly_xpan_mod_d4 (g, a, (_ff_p-1)/2, A);
860
}
861
_ff_dec (g[0]);
862
for ( d_g = 3 ; d_g >= 0 && _ff_zero(g[d_g]) ; d_g-- );
863
} else {
864
if ( !k ) {
865
ff3_zn_mod (g, (_ff_p-1)/2, A);
866
} else {
867
ff_poly_xpan_mod_d3 (g, a, (_ff_p-1)/2, A);
868
}
869
_ff_dec (g[0]);
870
for ( d_g = 2 ; d_g >= 0 && _ff_zero(g[d_g]) ; d_g-- );
871
}
872
ff_poly_gcd (h, &d_h, g, d_g, A, d_A);
873
if ( d_h > 0 && d_h < d_A ) {
874
ff_poly_div (A, &d_A, g, &d_g, A, d_A, h, d_h);
875
if ( d_g != -1 ) { err_printf ("inexact poly division in ff_poly_roots_d4!\n"); exit(0); }
876
if ( d_h < 3 ) {
877
i += ff_poly_roots_d2 (r+i,h,d_h);
878
ff_poly_copy (h,&d_h,A,d_A);
879
ff_poly_monic (A,&d_A,h,d_h);
880
} else {
881
i += ff_poly_roots_d2 (r+i,A,d_A);
882
ff_poly_monic (A,&d_A,h,d_h);
883
}
884
}
885
k++;
886
} while ( d_A > 2 );
887
i += ff_poly_roots_d2 (r+i,A,d_A);
888
// shift any roots found after translation occurred (if it did)
889
if ( ! _ff_zero(t) ) while ( j < i ) { _ff_subfrom(r[j],t); j++; }
890
return i;
891
}
892
return d_h;
893
}
894
895
896
void ff_poly_xn_mod_d4 (ff_t g[4], unsigned long n, ff_t f[5])
897
{
898
_ff_t_declare_reg t1,t2,t3,nf0,f02,f12,f22,w1,w2;
899
register unsigned long j, m;
900
register int i;
901
902
for ( i = 0 ; i < 4 ; i++ ) _ff_set_zero(g[i]);
903
if ( !n ) { _ff_set_one(g[0]); return; }
904
i = _asm_highbit(n);
905
if ( i&1 ) i--;
906
m = 3UL<<i;
907
j = (n&m)>>i;
908
_ff_set_one(g[j]);
909
m >>= 2; i -= 2;
910
if ( m > 1 ) {
911
_ff_neg(nf0,f[0]);
912
// these won't get used if no digits of n are 3, we could check for this...
913
_ff_mult(f02,f[0],f[2]);
914
_ff_mult(f12,f[1],f[2]);
915
_ff_square(f22,f[2]);
916
}
917
while (m) {
918
ff_poly_square_mod_d4 (g,g,f);
919
ff_poly_square_mod_d4 (g,g,f);
920
j = ((n&m)>>i);
921
switch (j) {
922
case 1: // 3M+2A
923
_ff_set(t3,g[3]);
924
_ff_set(g[3],g[2]);
925
_ff_mult(w2,f[2],t3);
926
_ff_sub(g[2],g[1],w2);
927
_ff_mult(w2,f[1],t3);
928
_ff_sub(g[1],g[0],w2);
929
_ff_mult(g[0],t3,nf0);
930
break;
931
case 2: // 6M+3A
932
_ff_set(t3,g[3]);
933
_ff_set(t2,g[2]);
934
_ff_mult(w1,f[2],t3);
935
_ff_sub(g[3],g[1],w1);
936
_ff_mult(w1,f[2],t2);
937
_ff_mult(w2,f[1],t3);
938
_ff_addto(w2,w1);
939
_ff_sub(g[2],g[0],w2);
940
_ff_mult(w1,t3,nf0);
941
_ff_mult(w2,f[1],t2);
942
_ff_sub(g[1],w1,w2);
943
_ff_mult(g[0],t2,nf0);
944
break;
945
case 3: // 9M+8A
946
_ff_set(t3,g[3]);
947
_ff_set(t2,g[2]);
948
_ff_set(t1,g[1]);
949
_ff_mult(w1,t2,f[2]);
950
_ff_mult(w2,t3,f[1]);
951
_ff_addto(w1,w2);
952
_ff_sub(g[3],g[0],w1);
953
_ff_sub(w2,f22,f[0]);
954
_ff_mult(w1,t3,w2);
955
_ff_mult(w2,t1,f[2]);
956
_ff_subfrom(w1,w2);
957
_ff_mult(w2,t2,f[1]);
958
_ff_sub(g[2],w1,w2);
959
_ff_mult(w1,t1,f[1]);
960
_ff_mult(w2,t2,f[0]);
961
_ff_addto(w1,w2);
962
_ff_mult(w2,t3,f12);
963
_ff_sub(g[1],w2,w1);
964
_ff_mult(w1,t3,f02);
965
_ff_mult(w2,t1,f[0]);
966
_ff_sub(g[0],w1,w2);
967
}
968
m >>= 2; i -= 2;
969
}
970
}
971
972
// Computes (x+a)^n modulo x^2-f0 (note the sign). Assumes n < 2^63
973
void ff_poly_xpan_mod_d2 (ff_t g[2], ff_t a, unsigned long n, ff_t f[1])
974
{
975
_ff_t_declare_reg t0,t1,t2,t3,s0,s1;
976
register unsigned long m;
977
int i, j;
978
979
if ( ! n ) { _ff_set_zero(g[1]); _ff_set_one(g[0]); return;}
980
_ff_set_one(g[1]); _ff_set(g[0],a);
981
if ( n==1 ) return;
982
i = _asm_highbit(n)-1;
983
m = 1UL<<i;
984
_ff_add(s0,a,a);
985
_ff_add(s1,f[0],f[0]);
986
while (m) {
987
j = ((n&m)>>i);
988
if ( j ) {
989
// square g and multiply by (x+a) to get (g0^2+g1^2f0+a*2g0g1)*x + a(g0^2+g1^2f0) + 2g0g1f0
990
_ff_square(t0,g[0]); // t0=g0^2
991
_ff_square(t1,g[1]); // t1=g1^2
992
_ff_mult(t2,f[0],t1); // t2 = g1^2f0
993
_ff_add(t1,t0,t2); // t1 = g0^2+g1^2f0
994
_ff_mult(t0,g[0],g[1]); // t0 = g0g1
995
_ff_mult(t2,a,t1); // t2 = a(g0^2+g1^2f0)
996
_ff_mult(t3,s1,t0); // t3 = 2f0g0g1
997
_ff_add(g[0],t2,t3); // g0 = a(g0^2+g1^2f0) + 2g0g1f0
998
_ff_mult(t2,t0,s0); // t2 = 2ag0g1
999
_ff_add(g[1],t1,t2); // g1 = g0^2+g1^2f0+a*2g0g1
1000
// 7M + 3A
1001
} else {
1002
// square g mod f to get 2g0g1*x + (g0^2+g1^2f0)
1003
_ff_square(t0, g[0]); _ff_square(t1, g[1]);
1004
_ff_mult(t2,g[0],g[1]);
1005
_ff_add(g[1],t2,t2);
1006
_ff_mult(t2,t1,f[0]);
1007
_ff_add(g[0],t0,t2);
1008
// 4M + 2A
1009
}
1010
m >>= 1; i -= 1;
1011
}
1012
}
1013
1014
// Computes (x+a)^n modulo a cubic of the form x^3+f1x+f0. Assumes n < 2^63
1015
void ff_poly_xpan_mod_d3 (ff_t g[3], ff_t a, unsigned long n, ff_t f[4])
1016
{
1017
_ff_t_declare_reg w1,w2,t2;
1018
register unsigned long m;
1019
register int i, j;
1020
1021
if ( ! n ) { _ff_set_zero(g[1]); _ff_set_one(g[0]); return;}
1022
_ff_set_zero(g[2]); _ff_set_one(g[1]); _ff_set(g[0],a);
1023
if ( n==1 ) return;
1024
i = _asm_highbit(n)-1;
1025
m = 1UL<<i;
1026
while (m) {
1027
ff_poly_square_mod_d3 (g,g,f);
1028
j = ((n&m)>>i);
1029
if ( j ) { // 5M+4A
1030
_ff_set(t2,g[2]);
1031
_ff_mult(w1,t2,a);
1032
_ff_add(g[2],g[1],w1);
1033
_ff_mult(w1,g[1],a);
1034
_ff_mult(w2,t2,f[1]);
1035
_ff_subfrom(w1,w2);
1036
_ff_add(g[1],g[0],w1);
1037
_ff_mult(w1,g[0],a);
1038
_ff_mult(w2,t2,f[0]);
1039
_ff_sub(g[0],w1,w2);
1040
}
1041
m >>= 1; i -= 1;
1042
}
1043
}
1044
1045
// squares a quadratic (arbitrary coeff) modulo a cubic of the form x^3+f1x+f0
1046
// no initialization or validation is done, overlap is ok.
1047
void ff_poly_square_mod_d3 (ff_t w[3], ff_t u[3], ff_t f[4])
1048
{
1049
_ff_t_declare_reg t0,t1,s1,s2, w1, w2;
1050
1051
_ff_mult(t0,u[2],f[0]);
1052
_ff_mult(t1,u[2],f[1]);
1053
_ff_mult(s1,t0,u[2]);
1054
_ff_add(s2,u[1],u[1]);
1055
1056
_ff_add(w1,u[0],u[0]);
1057
_ff_subfrom(w1,t1);
1058
_ff_mult(w2,w1,u[2]);
1059
_ff_square(w1,u[1]);
1060
_ff_add(w[2],w1,w2); // w2 = u1^2+(2u0-f1u2)u2 = 2u0u2+u1^2 - f1u2^2
1061
1062
_ff_sub(w1,u[0],t1);
1063
_ff_mult(w2,w1,s2);
1064
_ff_sub(u[1],w2,s1); // w1 = 2u1(u0-f1u2)-(f0u2)u2) = 2u0u2+u1^2 - f1u2^2
1065
1066
_ff_square(w1,u[0]);
1067
_ff_mult(w2,s2,t0);
1068
_ff_sub(u[0],w1,w2); // w0 = u0^2-(2u1)(f0u2)
1069
// 8M+7A
1070
}
1071
1072
// Computes (x+a)^n modulo a quartic of the form x^4+f2x^2+f1x+f0. Assumes n < 2^63
1073
void ff_poly_xpan_mod_d4 (ff_t g[4], ff_t a, unsigned long n, ff_t f[5])
1074
{
1075
_ff_t_declare_reg w1,w2,t3;
1076
ff_t h[4];
1077
register unsigned long m;
1078
int i, j;
1079
1080
if ( ! n ) { _ff_set_zero(g[1]); _ff_set_one(g[0]); return; }
1081
_ff_set_zero(g[3]); _ff_set_zero(g[2]); _ff_set_one(g[1]); _ff_set(g[0],a);
1082
if ( n==1 ) return;
1083
i = _asm_highbit(n)-1;
1084
m = 1UL<<i;
1085
while (m) {
1086
ff_poly_square_mod_d4 (g,g,f);
1087
j = ((n&m)>>i);
1088
if ( j ) {
1089
_ff_set(t3,g[3]);
1090
_ff_mult(w1,t3,a);
1091
_ff_add(g[3],g[2],w1);
1092
_ff_mult(w1,g[2],a);
1093
_ff_mult(w2,t3,f[2]);
1094
_ff_subfrom(w1,w2);
1095
_ff_add(g[2],g[1],w1);
1096
_ff_mult(w1,g[1],a);
1097
_ff_mult(w2,t3,f[1]);
1098
_ff_subfrom(w1,w2);
1099
_ff_add(g[1],g[0],w1);
1100
_ff_mult(w1,g[0],a);
1101
_ff_mult(w2,t3,f[0]);
1102
_ff_sub(g[0],w1,w2);
1103
// 7M+6A
1104
}
1105
m >>= 1; i -= 1;
1106
}
1107
}
1108
1109
// squares a cubic (arbitrary coeff) modulo a quartic of the form x^4+ax^2+bx+c
1110
// no initialization or validation is done, overlap of u,w is OK. Uses 18M+21A
1111
void ff_poly_square_mod_d4 (ff_t w[4], ff_t u[4], ff_t f[5])
1112
{
1113
_ff_t_declare_reg t0,t1,t2,s1,s2,s3,s4,s5,w1,w2,w3;
1114
1115
// compute t_i = u_3*f_i
1116
_ff_mult(t0,u[3],f[0]); _ff_mult(t1,u[3],f[1]); _ff_mult(t2,u[3],f[2]);
1117
_ff_mult(s1,u[2],f[2]); // s1=u2f2
1118
_ff_square(s2,u[2]); // s2=u2^2
1119
_ff_mult(s3,u[3],t0); // s3=u3^2f0 (not reused, but need to save since u[3] may get clobbered)
1120
_ff_mult(s4,u[2],t0);
1121
_ff_x2(s4); // s4 = 2u2u3f0 (not reused, but need to save since u[2] may get clobbered)
1122
_ff_mult(s5,u[1],u[3]);
1123
_ff_x2(s5); // s5 = 2u1u3 (not reused, but need to save since u[1],u[3] may get clobbered)
1124
1125
_ff_sub(w1,u[0],s1);
1126
_ff_x2(w1);
1127
_ff_subfrom(w1,t1);
1128
_ff_mult(w2,w1,u[3]);
1129
_ff_mult(w1,u[1],u[2]);
1130
_ff_x2(w1);
1131
_ff_add(w[3],w1,w2); // w3 = (2(u0-u2f2)-u3f2)u3+2u1u2 = 2*u0*u3 + 2*u1*u2 - 2*u2*u3*f2 - u3^2*f1
1132
// note u[3] is now (potentially) clobbered
1133
1134
_ff_sub(w2,u[1],t2);
1135
_ff_square(w1,w2);
1136
_ff_sub(w2,u[0],t1);
1137
_ff_x2(w2);
1138
_ff_subfrom(w2,s1);
1139
_ff_mult(w3,u[2],w2);
1140
_ff_addto(w1,w3);
1141
_ff_sub(w[2],w1,s3); // w2 = (u1-u3f2)^2+u2(2(u0-u3f1)-u2f2)-u3(u3f0) = 2*u0*u2 + u1^2 - 2*u1*u3*f2 - u2^2*f2 - 2*u2*u3*f1 - u3^2*f0 + u3^2*f2^2
1142
// note u[2] is now (potentially) clobbered
1143
1144
_ff_sub(w2,u[0],t1);
1145
_ff_mult(w1,u[1],w2);
1146
_ff_x2(w1);
1147
_ff_mult(w2,s2,f[1]);
1148
_ff_subfrom(w1,w2);
1149
_ff_subfrom(w1,s4);
1150
_ff_mult(w2,t1,t2);
1151
_ff_add(w[1],w1,w2); // w1 = 2u1(u0-u3f1)-u2^2f1-2u2(u3f0)+(u3f1)(u3f2) = 2*u0*u1 - 2*u1*u3*f1 - u2^2*f1 - 2*u2*u3*f0 + u3^2*f1*f2
1152
// note u[1] is now (potentially) clobbered
1153
1154
_ff_square(w1,u[0]);
1155
_ff_add(w2,s5,s2);
1156
_ff_mult(w3,w2,f[0]);
1157
_ff_subfrom(w1,w3);
1158
_ff_mult(w2,t0,t2);
1159
_ff_add(w[0],w1,w2); // w0 = u0^2-f0(2u1u3+u2^2)+(u3f0)(u3f2) = u0^2 - 2*u1*u3*f0 - u2^2*f0 + u3^2*f0*f2
1160
// 18M+21A
1161
}
1162
1163
1164
/*
1165
1166
The code below has all been superseded by more efficient routines above or in ffext.c
1167
Keep it around in case we need it for debugging.
1168
1169
// squares a quadratic (arbitrary coeff) modulo a cubic of the form x^3+f1x+f0
1170
// no initialization or validation is done, overlap is ok. Uses 10M+9A
1171
void old_ff_poly_square_mod_d3 (ff_t w[3], ff_t u[3], ff_t f[4])
1172
{
1173
_ff_t_declare_reg t1,t3, t4, w0, w1, w2;
1174
1175
_ff_mult(t1,u[0],u[2]);
1176
_ff_add(w2,t1,t1);
1177
_ff_square(t1,u[1]);
1178
_ff_addto(w2,t1);
1179
_ff_square(t4,u[2]); // t4 = u2^2
1180
_ff_mult(t1,f[1],t4);
1181
_ff_subfrom(w2,t1); // w2 = 2u0u2+u1^2 - f1u2^2
1182
_ff_mult(t1,u[0],u[1]);
1183
_ff_add(w1,t1,t1);
1184
_ff_mult(t1,f[0],t4);
1185
_ff_subfrom(w1,t1);
1186
_ff_mult(t1,u[1],u[2]);
1187
_ff_add(t3,t1,t1); // t3 = 2u1u2
1188
_ff_mult(t1,f[1],t3);
1189
_ff_subfrom(w1,t1); // w1 = 2u0u1 - f0u2^2 - 2f1u1u2
1190
_ff_square(w0,u[0]);
1191
_ff_mult(t1,f[0],t3);
1192
_ff_sub(w[0],w0,t1); // w0 = u0^2 - 2u1u2f0
1193
_ff_set(w[1],w1);
1194
_ff_set(w[2],w2);
1195
}
1196
1197
// Computes (x+a)^n modulo a cubic of the form x^3+f1x+f0. Assumes n < 2^63
1198
void ff_poly_xpan_mod_d3 (ff_t g[3], ff_t a, unsigned long n, ff_t f[5])
1199
{
1200
_ff_t_declare p[12], *q;
1201
_ff_t_declare_reg t1,a2, ca;
1202
register unsigned long m;
1203
int i, d;
1204
1205
memset (p, 0, sizeof(p));
1206
q = p;
1207
_ff_set_one(q[0]); // (x+a)^0 = 1
1208
q += 3;
1209
_ff_set_one(q[1]);
1210
_ff_set(q[0],a); // (x+a)^1 = x + a
1211
q += 3;
1212
_ff_set_one(q[2]);
1213
_ff_add(ca,a,a);
1214
_ff_set(q[1],ca);
1215
_ff_square(a2,a);
1216
_ff_set(q[0],a2); // (x+a)^2 = x^2 + 2ax + a^2
1217
q += 3;
1218
_ff_addto(ca,a);
1219
_ff_set(q[2],ca);
1220
_ff_mult(t1,ca,a);
1221
_ff_sub(q[1],t1,f[1]);
1222
_ff_mult(t1,a2,a);
1223
_ff_sub(q[0],t1,f[0]); // (x+a)^3 = 3ax^2+(3a^2-f1)x + (a^3-f0)
1224
1225
// could move up to exp8 but we'll settle for a window size of 2 bits for now
1226
ff_poly_exp4_mod_d3 (g, p, n, f);
1227
}
1228
1229
1230
// Computes x^n modulo a cubic of the form x^3+ax+b. Assumes n < 2^63
1231
void ff_poly_xn_mod_d3 (ff_t g[3], unsigned long n, ff_t f[4])
1232
{
1233
_ff_t_declare p[24], *q;
1234
_ff_t_declare_reg t0, t1, t3;
1235
register unsigned long m;
1236
int i, d;
1237
1238
memset (p, 0, sizeof(p));
1239
q = p;
1240
_ff_set_one(q[0]); // x^0 = 1
1241
q += 3;
1242
_ff_set_one(q[1]); // x^1 = x
1243
q += 3;
1244
_ff_set_one(q[2]); // x^2 = x^2
1245
q += 3;
1246
_ff_neg(t1,f[1]);
1247
_ff_neg(t0,f[0]);
1248
_ff_set(q[1],t1);
1249
_ff_set(q[0],t0); // x^3 = -f1x-f0
1250
q += 3;
1251
_ff_set(q[2],t1);
1252
_ff_set(q[1],t0); // x^4 = -f1x^2-f0x
1253
q += 3;
1254
_ff_set(q[2],t0);
1255
_ff_square(t3,f[1]);
1256
_ff_set(q[1],t3); // t3 = f1^2
1257
_ff_mult(t1,f[0],f[1]);
1258
_ff_set(q[0],t1); // x^5 = -f0x^2+f1x^2+f0f1
1259
q += 3;
1260
_ff_set(q[2],t3);
1261
_ff_add(t0,t1,t1); // t0 = 2f0f1
1262
_ff_set(q[1],t0);
1263
_ff_square(t1,f[0]); // t1 = f0^2
1264
_ff_set(q[0],t1); // x^6 = f1^2x^2+2f0f1x+f0^2
1265
q += 3;
1266
_ff_set(q[2],t0);
1267
_ff_mult(t0,f[1],t3);
1268
_ff_sub(q[1],t1,t0);
1269
_ff_mult(t0,f[0],t3);
1270
_ff_neg(q[0],t0); // x^7 = 2f0f1x^2 + (f0^2-f1^3)x - f0f1^2
1271
1272
ff_poly_exp8_mod_d3 (g, p, n, f);
1273
}
1274
1275
1276
void ff_poly_exp4_mod_d3 (ff_t g[3], ff_t p[12], unsigned long n, ff_t f[4])
1277
{
1278
register ff_t *q;
1279
register unsigned long m;
1280
int i, d;
1281
1282
for ( m = (0x3UL<<60), i = 60 ; ! (n&m) ; m >>= 2, i-=2 );
1283
q = p + 3*((n&m)>>i);
1284
_ff_set (g[2],q[2]); _ff_set(g[1],q[1]); _ff_set(g[0],q[0]);
1285
m >>= 2; i -= 2;
1286
while (m) {
1287
ff_poly_square_mod_d3 (g,g,f);
1288
ff_poly_square_mod_d3 (g,g,f);
1289
q = p + 3*((n&m)>>i);
1290
ff_poly_mult_mod_d3 (g, g, q, f);
1291
m >>= 2; i -= 2;
1292
}
1293
}
1294
1295
void ff_poly_exp8_mod_d3 (ff_t g[3], ff_t p[24], unsigned long n, ff_t f[4])
1296
{
1297
register ff_t *q;
1298
register unsigned long m;
1299
int i, d;
1300
1301
for ( m = (0x7UL<<60), i = 60 ; ! (n&m) ; m >>= 3, i-=3 );
1302
q = p + 3*((n&m)>>i);
1303
_ff_set (g[2],q[2]); _ff_set(g[1],q[1]); _ff_set(g[0],q[0]);
1304
1305
m >>= 3; i -= 3;
1306
while (m) {
1307
ff_poly_square_mod_d3 (g,g,f);
1308
ff_poly_square_mod_d3 (g,g,f);
1309
ff_poly_square_mod_d3 (g,g,f);
1310
q = p + 3*((n&m)>>i);
1311
ff_poly_mult_mod_d3 (g, g, q, f);
1312
m >>= 3; i -= 3;
1313
}
1314
}
1315
1316
1317
// multiplies two quadratics (arbitrary coeff) modulo a cubic of the form x^3+ax+b
1318
// no initialization or validation is done, overlap of u,v,w is ok. Uses 10M+11A
1319
void ff_poly_mult_mod_d3 (ff_t w[3], ff_t u[3], ff_t v[3], ff_t f[4])
1320
{
1321
// _ff_t_declare_reg t1, t3, t4, w0, w1, w2;
1322
_ff_t_declare_reg t0, t1, t2, s01, s02, s12, w0, w1;
1323
1324
_ff_mult(t0,u[0],v[0]); _ff_mult(t1,u[1],v[1]); _ff_mult(t2,u[2],v[2]);
1325
_ff_add(w1,u[0],u[1]); _ff_add(w0,v[0],v[1]); _ff_mult(s01,w1,w0); _ff_subfrom(s01,t0); _ff_subfrom(s01,t1);
1326
_ff_add(w1,u[0],u[2]); _ff_add(w0,v[0],v[2]); _ff_mult(s02,w1,w0); _ff_subfrom(s02,t0); _ff_subfrom(s02,t2);
1327
_ff_add(w1,u[1],u[2]); _ff_add(w0,v[1],v[2]); _ff_mult(s12,w1,w0); _ff_subfrom(s12,t1); _ff_subfrom(s12,t2);
1328
1329
_ff_add(w1,s02,t1);
1330
_ff_mult(w0,f[1],t2);
1331
_ff_sub(w[2],w1,w0); // w2 = s02 + t1 - t2f1 = u2v0+u1v1+ u0v2 - f1u2v2
1332
1333
_ff_mult(w1,f[0],t2);
1334
_ff_mult(w0,f[1],s12);
1335
_ff_addto(w0,w1);
1336
_ff_sub(w[1],s01,w0); // w1= s01 - t2f0 - s12f1= u1v0+u0v1-f0u2v2-f1(u1v2+u2v1)
1337
1338
_ff_mult (w0, f[0],s12);
1339
_ff_sub(w[0],t0,w0); // w0 = t0 - s12f0 = u0v0 - (u1v2+u2v1)f0
1340
return;
1341
}
1342
1343
1344
*/
1345
1346
/*
1347
// multiplies two cubics (arbitrary coeff) modulo a quartic of the form x^4+ax^2+bx+c
1348
// no initialization or validation is done, overlap of u,v,w is ok. Uses 23M+38A
1349
void ff_poly_mult_mod_d4 (ff_t w[4], ff_t u[4], ff_t v[4], ff_t f[5])
1350
{
1351
_ff_t_declare_reg t0, t1, t2, t3, s01,s02,s03,s12,s13,s23,w0, w1; // don't need so many variables
1352
1353
// compute t_i = u_i*v_i
1354
_ff_mult(t0,u[0],v[0]); _ff_mult(t1,u[1],v[1]); _ff_mult(t2,u[2],v[2]); _ff_mult(t3,u[3],v[3]);
1355
1356
// compute s_ij = u_i*v_j + u_j*v_i
1357
_ff_add(w0,u[0],u[3]); _ff_add(w1,v[0],v[3]); _ff_mult(s03,w0,w1); _ff_subfrom(s03,t0); _ff_subfrom(s03,t3);
1358
_ff_add(w0,u[0],u[2]); _ff_add(w1,v[0],v[2]); _ff_mult(s02,w0,w1); _ff_subfrom(s02,t0); _ff_subfrom(s02,t2);
1359
_ff_add(w0,u[0],u[1]); _ff_add(w1,v[0],v[1]); _ff_mult(s01,w0,w1); _ff_subfrom(s01,t0); _ff_subfrom(s01,t1);
1360
_ff_add(w0,u[1],u[3]); _ff_add(w1,v[1],v[3]); _ff_mult(s13,w0,w1); _ff_subfrom(s13,t1); _ff_subfrom(s13,t3);
1361
_ff_add(w0,u[1],u[2]); _ff_add(w1,v[1],v[2]); _ff_mult(s12,w0,w1); _ff_subfrom(s12,t1); _ff_subfrom(s12,t2);
1362
_ff_add(w0,u[2],u[3]); _ff_add(w1,v[2],v[3]); _ff_mult(s23,w0,w1); _ff_subfrom(s23,t2); _ff_subfrom(s23,t3);
1363
1364
// s13 is special, add t2 to it
1365
_ff_addto(s13,t2);
1366
1367
_ff_mult(w0,s23,f[2]);
1368
_ff_mult(w1,t3,f[1]);
1369
_ff_addto(w0,w1);
1370
_ff_add(w1,s03,s12);
1371
_ff_sub(w[3],w1,w0); // w3 = s03 + s12 - s23f2 - t3f1 = u0v3 + u1v2 + u2v1 - u2v3f2 + u3v0 - u3v2f2 - u3v3f1
1372
1373
_ff_mult(w1,s13,f[2]);
1374
_ff_mult(w0,s23,f[1]);
1375
_ff_addto(w0,w1);
1376
_ff_square(w1,f[2]);
1377
_ff_subfrom(w1,f[0]);
1378
ff_mult(w1,w1,t3);
1379
_ff_addto(w1,s02);
1380
_ff_addto(w1,t1);
1381
_ff_sub(w[2],w1,w0); // w2 = s02 + t1 - s13f2 - s23f1 + t3(f2^2-f0) = u0v2 + u1v1 - u1v3f2 + u2v0 - u2v2f2 - u2v3f1 - u3v1f2 - u3v2f1 - u3v3f0 + u3v3f2^2
1382
1383
_ff_mult(w1,s13,f[1]);
1384
_ff_mult(w0,s23,f[0]);
1385
_ff_addto(w0,w1);
1386
_ff_mult(w1,f[1],f[2]);
1387
ff_mult(w1,w1,t3);
1388
_ff_addto(w1,s01);
1389
_ff_sub(w[1],w1,w0); // w1 = s01 - s13f1 - s23f0 + t3f1f2 = u0v1 + u1v0 - u1v3f1 - u2v2f1 - u2v3f0 - u3v1f1 - u3v2f0 + u3v3f1f2
1390
1391
_ff_mult(w0,s13,f[0]);
1392
_ff_mult(w1,f[0],f[2]);
1393
ff_mult(w1,w1,t3);
1394
_ff_addto(w1,t0);
1395
_ff_sub(w[0],w1,w0); // w0 = t0 - s13f0 + t3f0f2 = u0v0 - u1v3f0 - u2v2f0 - u3v1f0 + u3v3f0f2
1396
}
1397
1398
1399
// squares a cubic (arbitrary coeff) modulo a quartic of the form x^4+ax^2+bx+c
1400
// no initialization or validation is done, overlap of u,w is OK. Uses 23M+21A
1401
void old_ff_poly_square_mod_d4 (ff_t w[4], ff_t u[4], ff_t f[5])
1402
{
1403
_ff_t_declare_reg t0,t1,t2,t3,s03,s02,s01,s13,s12,s23,w0,w1;
1404
1405
// compute t_i = u_i^2
1406
_ff_square(t0,u[0]); _ff_square(t1,u[1]); _ff_square(t2,u[2]); _ff_square(t3,u[3]);
1407
1408
// compute s_ij = u_i*u_j
1409
_ff_mult(s13,u[1],u[3]); _ff_mult(s23,u[2],u[3]);
1410
1411
_ff_mult(w1,u[0],u[3]);
1412
_ff_mult(w0,u[1],u[2]);
1413
_ff_addto(w1,w0);
1414
_ff_mult(w0,s23,f[2]);
1415
_ff_subfrom(w1,w0);
1416
_ff_x2(w1);
1417
_ff_mult(w0,t3,f[1]);
1418
_ff_sub(w[3],w1,w0); // w3 = 2(s03 + s12 - s23f2) - t3f1 = 2*u0*u3 + 2*u1*u2 - 2*u2*u3*f2 - u3^2*f1
1419
1420
_ff_mult(w0,s13,f[2]);
1421
_ff_mult(w1,s23,f[1]);
1422
_ff_addto(w0,w1);
1423
_ff_mult(w1,u[0],u[2]);
1424
_ff_subfrom(w1,w0);
1425
_ff_x2(w1);
1426
_ff_addto(w1,t1);
1427
_ff_square(w0,f[2]);
1428
_ff_subfrom(w0,f[0]);
1429
ff_mult(w0,w0,t3);
1430
_ff_addto(w1,w0);
1431
_ff_mult(w0,t2,f[2]);
1432
_ff_sub(w[2],w1,w0); // w2 = 2(s02 - s12f2 - s23f1) + t1 - t2f2 + t3(f2^2-f0) = 2*u0*u2 + u1^2 - 2*u1*u3*f2 - u2^2*f2 - 2*u2*u3*f1 - u3^2*f0 + u3^2*f2^2
1433
1434
_ff_mult(w0,s23,f[0]);
1435
_ff_mult(w1,u[0],u[1]);
1436
_ff_subfrom(w1,w0);
1437
_ff_x2(w1);
1438
_ff_mult(w0,f[1],f[2]);
1439
ff_mult(w0,w0,t3);
1440
_ff_addto(w1,w0);
1441
_ff_add(w0,s13,s13);
1442
_ff_addto(w0,t2);
1443
ff_mult(w0,w0,f[1]);
1444
_ff_sub(w[1],w1,w0); // w1 = 2(s01 - s23f0) - (t2 + 2s13)f1 + t3f1f2 = 2*u0*u1 - 2*u1*u3*f1 - u2^2*f1 - 2*u2*u3*f0 + u3^2*f1*f2
1445
1446
_ff_mult(w0,t3,f[2]);
1447
_ff_add(w1,s13,s13);
1448
_ff_addto(w1,t2);
1449
_ff_subfrom(w0,w1);
1450
_ff_mult(w1,w0,f[0]);
1451
_ff_add(w[0],w1,t0); // w0 = t0 + (t3f2 - 2s13 - t2)f0 = u0^2 - 2*u1*u3*f0 - u2^2*f0 + u3^2*f0*f2
1452
}
1453
*/
1454
/*
1455
void old_ff_poly_xpan_mod_d4 (ff_t g[4], ff_t a, unsigned long n, ff_t f[5])
1456
{
1457
_ff_t_declare p[16], *q;
1458
_ff_t_declare_reg a2, ca;
1459
register unsigned long m;
1460
int i, d;
1461
1462
memset (p, 0, sizeof(p));
1463
q = p;
1464
_ff_set_one(q[0]); // (x+a)^0 = 1
1465
q += 4;
1466
_ff_set_one(q[1]);
1467
_ff_set(q[0],a); // (x+a)^1 = x + a
1468
q += 4;
1469
_ff_set_one(q[2]);
1470
_ff_add(ca,a,a);
1471
_ff_set(q[1],ca);
1472
_ff_square(a2,a);
1473
_ff_set(q[0],a2); // (x+a)^2 = x^2 + 2ax + a^2
1474
q += 4;
1475
_ff_set_one(q[3]);
1476
_ff_addto(ca,a);
1477
_ff_set(q[2],ca);
1478
_ff_mult(q[1],ca,a);
1479
_ff_mult(q[0],a2,a); // (x+a)^3 = x^3 + 3ax^2+3a^2x+a^3
1480
1481
// could move up to exp8 but we'll settle for a window size of 2 bits for now
1482
ff_poly_exp4_mod_d4 (g, p, n, f);
1483
}
1484
1485
1486
void ff_poly_exp4_mod_d4 (ff_t g[4], ff_t p[16], unsigned long n, ff_t f[5])
1487
{
1488
register ff_t *q;
1489
register unsigned long m;
1490
int i, d;
1491
1492
for ( m = (0x3UL<<60), i = 60 ; ! (n&m) ; m >>= 2, i-=2 );
1493
q = p + 4*((n&m)>>i);
1494
_ff_set(g[3],q[3]); _ff_set (g[2],q[2]); _ff_set(g[1],q[1]); _ff_set(g[0],q[0]);
1495
m >>= 2; i -= 2;
1496
while (m) {
1497
ff_poly_square_mod_d4 (g,g,f);
1498
ff_poly_square_mod_d4 (g,g,f);
1499
q = p + 4*((n&m)>>i);
1500
ff_poly_mult_mod_d4 (g, g, q, f);
1501
m >>= 2; i -= 2;
1502
}
1503
}
1504
1505
void ff_poly_exp8_mod_d4 (ff_t g[4], ff_t p[32], unsigned long n, ff_t f[5])
1506
{
1507
register ff_t *q;
1508
register unsigned long m;
1509
int i, d;
1510
1511
for ( m = (0x7UL<<60), i = 60 ; ! (n&m) ; m >>= 3, i-=3 );
1512
q = p + 4*((n&m)>>i);
1513
_ff_set(g[3],q[3]); _ff_set (g[2],q[2]); _ff_set(g[1],q[1]); _ff_set(g[0],q[0]);
1514
m >>= 3; i -= 3;
1515
while (m) {
1516
ff_poly_square_mod_d4 (g,g,f);
1517
ff_poly_square_mod_d4 (g,g,f);
1518
ff_poly_square_mod_d4 (g,g,f);
1519
q = p + 4*((n&m)>>i);
1520
ff_poly_mult_mod_d4 (g, g, q, f);
1521
m >>= 3; i -= 3;
1522
}
1523
}
1524
*/
1525
1526
/*
1527
// Computes x^n modulo a quartic of the form x^4+f2x^2+f1x+f0. Assumes n < 2^63
1528
void old_ff_poly_xn_mod_d4 (ff_t g[4], unsigned long n, ff_t f[5])
1529
{
1530
_ff_t_declare p[32], *q;
1531
_ff_t_declare_reg t0, t1, t2, t3;
1532
register unsigned long m;
1533
int i, d;
1534
1535
memset (p, 0, sizeof(p));
1536
q = p;
1537
_ff_set_one(q[0]); // x^0 = 1
1538
q += 4;
1539
_ff_set_one(q[1]); // x^1 = x
1540
q += 4;
1541
_ff_set_one(q[2]); // x^2 = x^2
1542
q += 4;
1543
_ff_set_one(q[3]); // x^3 = x^3
1544
q += 4;
1545
_ff_neg(t2,f[2]);
1546
_ff_neg(t1,f[1]);
1547
_ff_neg(t0,f[0]);
1548
_ff_set(q[2],t2);
1549
_ff_set(q[1],t1);
1550
_ff_set(q[0],t0); // x^4 = -f2x^2 - f1x-f0
1551
q += 4;
1552
_ff_set(q[3],t2);
1553
_ff_set(q[2],t1);
1554
_ff_set(q[1],t0); // x^5 = -f2x^3 - f1x^2 - f0x
1555
q += 4;
1556
_ff_set(q[3],t1);
1557
_ff_square(t3,f[2]);
1558
_ff_subfrom(t3,f[0]);
1559
_ff_set(q[2],t3);
1560
_ff_mult(t1,f[1],f[2]);
1561
_ff_set(q[1],t1);
1562
_ff_mult(t0,f[0],f[2]);
1563
_ff_set(q[0],t0); // x^6 = -f1x^3 + (f2^2-f0)x^2 + f1f2x^2 + f0f2
1564
q += 4;
1565
_ff_set(q[3],t3);
1566
_ff_add(q[2],t1,t1);
1567
_ff_square(t2,f[1]);
1568
_ff_add(q[1],t0,t2);
1569
_ff_mult(q[0],f[0],f[1]); // x^7 = (f2^2-f0)x^3 + 2f1f2x^2 + (f0f2+f1^2)x + f0f1
1570
1571
ff_poly_exp8_mod_d4 (g, p, n, f);
1572
}
1573
*/
1574
1575