Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
241818 views
1
#include <stdlib.h>
2
#include <stdio.h>
3
#include <errno.h>
4
#include <stdint.h>
5
#include <memory.h>
6
#include <math.h>
7
#include "gmp.h"
8
#include "mpzutil.h"
9
#include "ffwrapper.h"
10
#include "ffpoly.h"
11
#include "hecurve.h"
12
#include "jac.h"
13
#include "smalljac.h"
14
#include "smalljactab.h"
15
#include "jacorder.h"
16
#include "cstd.h"
17
18
/*
19
Copyright 2007-2008 Andrew V. Sutherland
20
21
This file is part of smalljac.
22
23
smalljac is free software: you can redistribute it and/or modify
24
it under the terms of the GNU General Public License as published by
25
the Free Software Foundation, either version 2 of the License, or
26
(at your option) any later version.
27
28
smalljac is distributed in the hope that it will be useful,
29
but WITHOUT ANY WARRANTY; without even the implied warranty of
30
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31
GNU General Public License for more details.
32
33
You should have received a copy of the GNU General Public License
34
along with smalljac. If not, see <http://www.gnu.org/licenses/>.
35
*/
36
37
/*
38
This module contains generic order computations for Jacobians using BSGS.
39
The detail are described in [SutherlandThesis] and [KedlayaSutherland2007].
40
41
This code is still subject to occasional tweaking, so some debugging code has been
42
left in the source (but commented out).
43
44
6/1/2008: As of version 3.0, genus 1 order and structure computations are now handled in hecurve1x.c,
45
so none of the code here gets used in genus 1 anymore, but for the moment we won't change things.
46
*/
47
48
// SMALLJAC_M determines the number of group operations to perform simultaneously
49
// when doing a parallel search
50
#if HECURVE_GENUS == 1
51
unsigned long SMALLJAC_M = 4; // increase to 8 for large p
52
#else
53
unsigned long SMALLJAC_M = 32;
54
#endif
55
56
#define SMALLJAC_BABY_STASHSIZE 4096
57
jac_t baby_stash[SMALLJAC_BABY_STASHSIZE];
58
59
/*
60
The tables below are used to select parameters for BSGS searches in genus 2 and 3
61
when a1 is known. They contain the median and expected mean absolute error
62
for a2 conditioned on a1, precomputed using the Haar distribution on USp(2g).
63
See [KedlayaSutherland2007] for details.
64
*/
65
66
struct a2tab_entry g2a2tab[] = {
67
{ -4.00, 5.36, 0.09 },
68
{ -3.60, 4.72, 0.15 },
69
{ -3.20, 4.08, 0.17 },
70
{ -2.80, 3.44, 0.19 },
71
{ -2.40, 2.80, 0.22 },
72
{ -2.00, 2.24, 0.29 },
73
{ -1.60, 1.68, 0.38 },
74
{ -1.20, 1.20, 0.49 },
75
{ -0.80, 0.80, 0.62 },
76
{ -0.40, 0.48, 0.74 },
77
{ 0.00, 0.48, 0.74 },
78
{ 0.40, 0.80, 0.62 },
79
{ 0.80, 1.20, 0.49 },
80
{ 1.20, 1.68, 0.38 },
81
{ 1.60, 2.24, 0.29 },
82
{ 2.00, 2.80, 0.22 },
83
{ 2.40, 3.44, 0.19 },
84
{ 2.80, 4.08, 0.17 },
85
{ 3.20, 4.72, 0.15 },
86
{ 3.60, 5.36, 0.09 },
87
{ 4.00, 0.00 , 0.00 } // can't happen, used to terminate
88
};
89
90
struct a2tab_entry g3a2tab[] = {
91
{ -6.00, 12.92, 0.15 },
92
{ -5.40, 10.84, 0.26 },
93
{ -4.80, 8.92, 0.34 },
94
{ -4.20, 7.16, 0.40 },
95
{ -3.60, 5.56, 0.45 },
96
{ -3.00, 4.12, 0.52 },
97
{ -2.40, 2.84, 0.61 },
98
{ -1.80, 1.72, 0.66 },
99
{ -1.20, 0.92, 0.61 },
100
{ -0.60, 0.60, 0.55 },
101
{ 0.00, 0.60, 0.55 },
102
{ 0.60, 0.92, 0.61 },
103
{ 1.20, 1.72, 0.66 },
104
{ 1.80, 2.84, 0.61 },
105
{ 2.40, 4.12, 0.52 },
106
{ 3.00, 5.56, 0.45 },
107
{ 3.60, 7.16, 0.40 },
108
{ 4.20, 8.92, 0.34 },
109
{ 4.80, 10.84, 0.26 },
110
{ 5.40, 12.92, 0.15 },
111
{ 6.00, 0.00 , 0.00 } // can't happen, used to terminate
112
};
113
114
// various counters used for tuning and testing - none of these are functionally necessary
115
unsigned long jac_curve_count, jac_prebaby_gops, jac_baby_gops, jac_pregiant_gops, jac_giant_gops, jac_fastorder_gops, jac_ambexp_gops, jac_exp_gops, jac_charpoly_gops, jac_order_count;
116
117
/*
118
The function jac_order computes #J(C/F_p) = P(1) given that Min <= #J(C/F_p) <= Max and (optionally in genus < 3)
119
given the coefficient a1 of P(T) = L_p(T) (the numerator of Z(C/F_p;T) and (optionally) a list of constraints on the
120
possible value of P(1) modulo 2(p+1).
121
122
The return value is 0 for an error, otherwise the number of multiples of the order of largest subgroup found
123
that lie in the interval [Min,Max]. If there is only one multiple in the interval, *pP1 is set to the unique
124
value which is then (provably) equal to P(1) = #J(C/F_p). Otherwise *pP1 is set to the size of the largest,
125
which will typically be less than Min. In this case it is up to the caller to resolve the ambiguity.
126
127
See [KedlayaSutherland2007] for more details.
128
*/
129
130
int jac_order (unsigned long *pP1, unsigned long Min, unsigned long Max, long a1, int *constraints, curve_t c[1], int fExponentOnly)
131
{
132
static mpz_t Z;
133
static int init;
134
jac_t g, gen[JAC_MAX_GENERATORS];
135
unsigned long ords[JAC_MAX_GENERATORS];
136
unsigned long q[MPZ_MAX_UI_PP_FACTORS], h[MPZ_MAX_UI_PP_FACTORS];
137
int i, j, k, two_rank, q_rank, cnt;
138
unsigned long m, n, e, e2, o, p, order, M, W, M2, W2;
139
unsigned long SylowLimit, abandoned_q;
140
double x, z;
141
long t;
142
int sts, full, ambiguous_exponent,tor3;
143
144
if ( ! init ) { mpz_init (Z); init = 1; }
145
full = 0; // default is to do a fastorder computation, not a full search of the interval
146
p = (unsigned long)_ff_p;
147
x = sqrt((double)p);
148
if ( a1 != JAC_INVALID_A1 ) {
149
#if HECURVE_GENUS == 2
150
t = (long)(p*p+1) + (long)(p+1)*a1 + (a1*a1)/2 - (long)2*p; // lower bound on a2 given a1 is (a1^2)/2 - g*p
151
if ( t > (long)Min ) Min = (unsigned long) t;
152
t = (long)(p*p+1) + (long)(p+1)*a1 + (a1*a1)/4 + (long)2*p + 1; // upper bound on a2 given a1 is (a1^2)/2 + g*p - (a1^2)/(2g)
153
if ( t < (long)Max ) Max = (unsigned long)t;
154
z = (double)a1/x;
155
for ( i = 0 ; g2a2tab[i].a1 < z ; i++ );
156
i--;
157
M = (p*p+1) + (p+1)*a1 + g2a2tab[i].a2median*p ; // Median value of a2 conditioned on a1 from table
158
W = (unsigned long) ceil(g2a2tab[i].a2mae*p); // Conditional expected distance of a2 from conditional median from table
159
#endif
160
#if HECURVE_GENUS == 3
161
t = (long)(p*p*p +1) + (long)(p*p+1)*a1 + ((a1*a1)/2 - (long)3*p)*(p+1) - 20*p*(long)ceil(x); // lower bound on a2 given a1 is (a1^2)/2 - g*p
162
if ( t > (long)Min ) Min = (unsigned long) t;
163
t = (long)(p*p*p+1) + (long)(p*p+1)*a1 + ((a1*a1)/3 + (long)3*p + 1)*(p+1) + 20*p*(long)ceil(x); // upper bound on a2 given a1 is (a1^2)/2 + g*p - (a1^2)/(2g)
164
if ( t < (long)Max ) Max = (unsigned long)t;
165
z = (double)a1/x;
166
for ( i = 0 ; g3a2tab[i].a1 < z ; i++ );
167
i--;
168
M =(p*p*p +1) +(p*p+1)*a1 + g3a2tab[i].a2median*p*(p+1); // Median value of a2 conditioned on a1 from table
169
W = (unsigned long) ceil(g3a2tab[i].a2mae*p*p); // Conditional expected distance of a2 from conditional median from table
170
#endif
171
} else {
172
M = Min + (Max-Min)/2; // median value of a1- symmetrically distributed
173
#if HECURVE_GENUS == 1
174
// In genus 1 we may want to do a "full" search of the interval to avoid a fastorder computation - see [KedlayaSutherland2007] section 4.2
175
if ( p < SMALLJAC_FULL_P ) {
176
W = (unsigned long) ceil(x); // (1+4/(3pi))sqrt(p) ~ 1.4244 but with divisor optimization, 1 is about right
177
full = 1;
178
} else {
179
W = (unsigned long) ceil(0.8488*x); // expected distance of a1 from the median is 8pi/3~0.8488...
180
}
181
if ( p > (1UL<<28) ) SMALLJAC_M = 8; // tweak parallelism
182
#endif
183
#if HECURVE_GENUS == 2
184
W = (unsigned long) ceil(0.7905*p*x); // expected distance of a1 from the median is (exactly) 4096/525pi^2 = 0.790498...
185
#endif
186
#if HECURVE_GENUS == 3
187
err_printf ("%lu: Invalid a1 value in jac_order in genus 3!\n", _ff_p); exit (0);
188
#endif
189
}
190
//printf ("%u: M = %ld, W = %ld, a1 = %ld, Min = %ld, Max = %ld\n", p, M, W, a1, Min, Max);
191
ambiguous_exponent = 0;
192
193
tor3 = 0;
194
#if HECURVE_GENUS == 1
195
two_rank = (ff_poly_roots_d3(0,c[0].f)+1)/2; // this takes < 1 microsecond (AMD Athlon 2.5GHz), equivalent to less than 10 gops
196
if ( ! _ff_p1mod3 ) { // for now only use 3tor when p=2mod3 and it is fast and easy
197
tor3 = ff_poly_g1_3tor(c[0].f);
198
if ( tor3 !=3 && tor3 != 9 ) tor3=1; // don't deal with special cases
199
}
200
#else
201
two_rank = ff_poly_factors (c[0].f, c[0].d)-1; // this takes 10 to 20 microseconds (AMD Athlon 2.5GHz)
202
if ( a1 == JAC_INVALID_A1 )
203
tor3 = ff_poly_g2_3tor(c[0].f); // this takes 300-500 microseconds, don't use unless we are past pointcounting range
204
#endif
205
e = ( two_rank >0 ? 2 : 1);
206
if ( tor3 ) e*=tor3;
207
for(;;) {
208
M2 = M/e;
209
W2 = _ui_ceil_ratio (W,e);
210
for ( i = 0 ; i < SMALLJAC_RETRIES ; i++ ) {
211
_jac_random (g, c[0]);
212
if ( e > 1 ) jac_exp_ui (&g, &g, e, c);
213
if ( ! _jac_is_identity (g) ) break;
214
}
215
if ( i == SMALLJAC_RETRIES ) { ambiguous_exponent = 1; break; }
216
o = jac_parallel_search (&g, M2, W2, Min/e, _ui_ceil_ratio(Max,e), full, (two_rank?0:1), c);
217
if ( ! o ) { err_printf ("%lu: search failed with e=%lu, M=%lu, W=%lu, m=%u for element: ", p, e, M, W, SMALLJAC_M); _jac_print(g); return 0; }
218
e *= o;
219
if ( fExponentOnly ) continue;
220
e2 = e;
221
if ( two_rank > 1&& ! full ) e2 <<= (two_rank-1); // use knowledge of 2-rank but only when doing an order computation rather than a full search
222
order = _ui_ceil_ratio(Min,e2) * e2;
223
if ( ! two_rank && ! (order&0x1) ) order += e2; // if 2-rank is 0, order must be odd
224
if ( order > Max ) { err_printf ("%7u: No multiple of exponent %lu in interval (%lu,%lu) with 2-rank %d after computing order %lu with full = %d for element: ", p, e, Min, Max, two_rank, o, full); _jac_print(g); return 0; }
225
if ( order+e2 > Max ) break;
226
if ( ! two_rank && !((order+e2)&0x1) && order + 2*e2 > Max ) break;
227
}
228
if ( ! two_rank && !(e&0x1) ) { err_printf ("%lu: Even exponent %ld found for group with trivial 2-rank\n", p, e); return 0; }
229
if ( fExponentOnly ) {
230
*pP1 = e;
231
return 1;
232
}
233
if ( ambiguous_exponent ) {
234
n = jac_gops;
235
e2 = e;
236
if ( two_rank > 1 && ! full ) e2 <<= (two_rank-1); // use knowledge of 2-rank, but only when not doing a full search
237
dbg_printf ("%9lu: Ambiguous exponent %d, %d possible orders in [%ld,%ld]\n", p, e2, ui_multiples_in_range (e2, Min, Max), Min, Max);
238
SylowLimit = ceil(sqrt(Max));
239
k = ui_factor (q, h, e2);
240
abandoned_q = 0;
241
for ( i = 0 ; i < k ; i++ ) {
242
if ( constraints ) {
243
order = 0;
244
cnt = 0;
245
for ( o = _ui_ceil_ratio(Min,e2)*e2 ; o <= Max ; o += e2 ) {
246
register int *pcon;
247
for ( pcon = constraints ; *pcon >= 0 ; pcon++ ) {
248
if ( (o % (2*(p+1))) == *pcon ) {
249
cnt++;
250
if ( cnt == 1 ) { order = o; }
251
}
252
}
253
}
254
if ( cnt == 1 ) { printf ("%lu: Ambiguity resolved via modular constraint.\n", p); break; }
255
if ( ! cnt ) { err_printf ("%lu: No orders satisfy modularity constraints!\n", p); return 0; }
256
}
257
if ( q[i]*e2 <= Max ) {
258
for ( j = 0, o = e2 ; j < h[i] ; j++, o/= q[i] );
259
m = Max/o;
260
// set M to the largest power of q[i] <= m, but use mpz to avoid overflow
261
mpz_set_ui (Z, q[i]);
262
while ( mpz_cmp_ui(Z,m) <= 0 ) { M = mpz_get_ui(Z); mpz_mul_ui (Z, Z, q[i]); }
263
for ( m = M ; m > SylowLimit ; m /= q[i] );
264
q_rank = jac_sylow (gen, ords, q[i], e2, M, m, c);
265
if ( q_rank < 0 ) {
266
info_printf ("%lu: Abandoned large %d-Sylow subgroup computation, m= %lu, SylowLimit = %lu.\n", p, q[i], m, SylowLimit);
267
if ( abandoned_q ) { err_printf ("%lu: Abandoned second %d-Sylow subgroup computation - this should be impossible Max = %lu, m = %lu, SylowLimit = %lu\n", p, q[i], Max, m, SylowLimit); return 0; }
268
abandoned_q = q[i];
269
}
270
for ( j = 0 ; j < q_rank ; j++ ) o *= ords[j];
271
if ( o > e2 ) {
272
dbg_printf ("%7u: %d-Sylow computation enlarged subgroup to %lu\n", p, q[i], o);
273
e2 = o;
274
} else {
275
dbg_printf ("%7u: %d-Sylow computation failed to enlarge subgroup\n", p, q[i]);
276
}
277
order = _ui_ceil_ratio(Min,e2)*e2;
278
if ( ! two_rank && ! (order&0x1) ) order += e2; // if 2-rank is 0, order must be odd
279
if ( order > Max ) { err_printf ("%lu: No multiple of subgroup order %lu in interval (%lu,%lu) after %d-Sylow computation, 2-rank = %d\n", p, e2, Min, Max, q[i], two_rank); return 0; }
280
if ( order + e2 > Max ) break;
281
if ( ! two_rank && !((order+e2)&0x1) && order + 2*e2 > Max ) break;
282
order = 0;
283
}
284
}
285
jac_ambexp_gops += jac_gops-n;
286
if ( i == k ) {
287
// in genus 3 don't rely on Sylow subgroup computations - we can't be sure we generated the whole group due to poor random element generation
288
#if HECURVE_GENUS == 3
289
info_printf ("%lu: Maximal subgroup order %lu is ambiguous in (%lu,%lu)\n", p, e2, Min, Max); *pP1 = e2; return ui_multiples_in_range (e2, Min, Max);
290
#else
291
if ( ! abandoned_q ) { err_printf ("%lu: Scanned all Sylow subgroups and maximal order %lu is still ambiguous in (%lu,%lu)\n", p, e2, Min, Max); *pP1 = e2; return ui_multiples_in_range (e2, Min, Max); }
292
for ( order = e2 ; order < Min ; order *= abandoned_q );
293
if ( order > Max ) { err_printf ("%lu: No %lu-multitple of subgroup order %lu in interval (%lu,%lu) after all but one Sylow subgroups computed, 2-rank = %d\n",
294
p, abandoned_q, e2, Min, Max, two_rank); return 0; }
295
info_printf ("%lu: Ambiguity resolved by computing all but 1 Sylow subgroup using %d group operations\n", p, jac_gops-n);
296
#endif
297
}
298
info_printf ("%lu: Ambiguity resolved using %d group operations\n", p, jac_gops-n);
299
}
300
*pP1 = order;
301
return 1;
302
}
303
304
305
/*
306
Parallel baby/giant search on [Min,Max] from starting point M with expected width W to find an exponent of the element a.
307
The parity flag, if set, indicates that an odd exponent of a is known to lie in the interval.
308
309
The returned value is either the order of a (the default), or if "full" is non-zero it may be either the order of a or the unique
310
exponent of a in the interval [Min,Max]. Note that in either case, if [Min,Max] is known to contain a divisor of the
311
group order, then the returned value divides the group order (and, in fact, the group exponent).
312
313
The parameter full is usually 0 or 1, but can be set to 2 to force a true full search of the interval. By default, the algorithm
314
attempts to use divisors of the first exponent found to terminate the search early, and discontinues the search in the
315
direction of the first divisor found - see section 4.2 of [KedlayaSutherland2007] for a discussion of these optimizations.
316
317
6/1/2008: Many of the optimizations below are primarily relevant in genus 1, but these are now handled by the new code in hecurve1x.c.
318
We may eventually want to take these out to simplify things, but for the moment we won't change anything.
319
*/
320
unsigned long jac_parallel_search (jac_t a[1], unsigned long M, unsigned long W, unsigned long Min, unsigned long Max, int full, int parity, curve_t c[1])
321
{
322
jac_t babys[SMALLJAC_M], giants[SMALLJAC_M], gsteps[SMALLJAC_M];
323
jac_t b[64]; // needs to be at least as big as 2*SMALLJAC_M and SMALLJAC_VBITS
324
jac_t s1, s2;
325
register unsigned long GS, dvalue, uvalue;
326
long e, n, o, o1;
327
register unsigned i, j, k, cnt;
328
unsigned baby_steps, baby_span;
329
register uint32_t value, vstep;
330
uint32_t matches[SMALLJAC_MAX_MATCHES];
331
register int sign, stash, fast;
332
unsigned long tabbits;
333
int w;
334
unsigned long p[MPZ_MAX_FACTORS], h[MPZ_MAX_FACTORS];
335
336
337
n = jac_gops;
338
//printf ("Begin search p = %lu, M = %lu, W = %lu, Min = %lu, Max = %lu, parity = %d, full = %d\n", _ff_p, M, W, Min, Max, parity, full); _jac_print(a[0]);
339
if ( W == 0 ) { err_printf ("%9u: W == 0 in smalljac_parallel_search, increasing to 1\n", _ff_p); W = 1; }
340
if ( _jac_is_identity (a[0]) ) { return 1; }
341
342
// We can check inverses for free, so we effectively double the giant step size and in s+s steps we cover 2*s^2 = 2W, hence we want s=sqrt(W)
343
// If we know the exponent is odd, we double the size of the baby steps and in s+s steps we cover 4*s^2 = 2W, hence we want s=sqrt(W/2)
344
if ( parity ) { // odd exponent
345
parity = 1;
346
baby_steps = (unsigned)ceil(sqrt((double)W/2.0));
347
if ( M&0x1 ) M--; // M must be even
348
} else {
349
if ( full < 2 ) {
350
baby_steps = (unsigned)ceil(sqrt((double)W));
351
} else {
352
baby_steps = (unsigned)ceil(sqrt((double)2.0*W)); // don't use inverse optimization - only for comparison
353
}
354
}
355
baby_span = (parity ? 2*SMALLJAC_M*_ui_ceil_ratio(baby_steps,SMALLJAC_M) : SMALLJAC_M*_ui_ceil_ratio(baby_steps,SMALLJAC_M));
356
if ( baby_span >= (1<<SMALLJAC_VBITS) ) { err_printf ("SMALLJAC_VBITS = %d is too small for baby span %d, increase SMALLJAC_VBITS\n", SMALLJAC_VBITS, baby_span); exit (0); }
357
stash = baby_span;
358
if ( stash >= SMALLJAC_BABY_STASHSIZE ) stash = 0;
359
360
_jac_set (babys[0], a[0]);
361
for ( i = 1 ; i < SMALLJAC_M ; i += i ) jac_parallel_mult_1 (babys+i, babys, babys+i-1, c, i);
362
if ( ! stash ) { // if stash isn't big enough, precompute baby powers to use for reconstruction
363
for ( i = 0 ; (1<<i) <= SMALLJAC_M ; i++ ) _jac_set (b[i], babys[(1<<i)-1]);
364
for ( i-- ; i < SMALLJAC_VBITS ; i++ ) _jac_square (b[i+1], b[i], c[0]);
365
}
366
if ( parity ) {
367
for ( i = 2 ; i < SMALLJAC_M ; i += 2 ) _jac_set (babys[(i+1)/2], babys[i]);
368
_jac_set (s1, babys[SMALLJAC_M-1]); // overlap is probably ok, but no need to push it
369
jac_parallel_mult_1(babys+SMALLJAC_M/2, babys, &s1, c, SMALLJAC_M/2);
370
_jac_mult (s1, babys[0], babys[SMALLJAC_M-1], c[0]); // m-th baby has odd index 2m-1, we add one to get s1 = 2m: (1,3,...,2m-1) -> (2m+1,2m+3,...,4m-1)
371
vstep = 2;
372
} else {
373
_jac_set (s1, babys[SMALLJAC_M-1]); // s1 = m-th baby: (1,2,...,m) -> (m+1,m+2,...,2m)
374
vstep = 1;
375
}
376
377
// put identity in table
378
_jac_set_identity (baby_stash[0]);
379
for ( tabbits = 8 ; (1<<tabbits) < 2*baby_steps ; tabbits++ );
380
smalljac_table_init (tabbits);
381
smalljac_table_insert (baby_stash, 0);
382
// take baby steps
383
o = 0;
384
value = 1;
385
n = jac_gops;
386
if ( stash ) {
387
for (;; ) {
388
for ( i = 0 ; i < SMALLJAC_M ; i++ ) {
389
if ( _jac_is_identity (babys[i]) ) { o = value; goto found; }
390
//printf ("%lu: inserting baby value %d: ", _ff_p, value); _jac_print(babys[i]);
391
smalljac_table_insert (babys+i, value);
392
_jac_set (baby_stash[value], babys[i]); // we could avoid copying here, but parity makes it awkward
393
value += vstep;
394
}
395
if ( value > baby_span ) break; // value is now the index of the next baby we will compute - one (or two) greater than the last
396
jac_parallel_mult_1 (babys, babys, &s1, c, SMALLJAC_M);
397
}
398
} else {
399
for (;; ) {
400
for ( i = 0 ; i < SMALLJAC_M ; i++ ) {
401
if ( _jac_is_identity (babys[i]) ) { o = value; goto found; }
402
smalljac_table_insert (babys+i, value);
403
value += vstep;
404
}
405
if ( value > baby_span ) break; // value is now the index of the next baby we will compute - one (or two) greater than the last
406
jac_parallel_mult_1 (babys, babys, &s1, c, SMALLJAC_M);
407
}
408
}
409
value -= vstep; // set value to index of last baby computed
410
jac_baby_gops += jac_gops-n;
411
//printf ("babys done, used %lu gops\n", jac_gops-n);
412
413
e = M/value; // the cost of the exponentiation can be quite significant in genus 1 - > 1/4 of the total time for p~2^24
414
if ( parity && (e&1) ) e++; // we use the largest baby to get the first giant by tweaking M to be a multiple of value (with the right parity)
415
M = e * value; // this adds at most one giant step.
416
n = jac_gops;
417
jac_exp_ui (giants, babys+SMALLJAC_M-1, e, c); // exponentiate biggest baby to get first giant
418
jac_exp_gops += jac_gops-n;
419
420
_jac_square (s1, babys[SMALLJAC_M-1], c[0]); // s1 is double last baby
421
if ( parity ) {
422
if ( full < 2 ) GS = 2*value;
423
else GS = ( (value&1) ? value+1 : value ); // giant spacing must be even if babys are odd
424
} else {
425
_jac_mult (s1, s1, a[0], c[0]); // we can add one otherwise
426
if ( full < 2 ) GS = 2*value+1; else GS = value;
427
}
428
// compute m/2 giants - currently done serially
429
for ( i = 1 ; i < SMALLJAC_M/2 ; i++ ) _jac_mult (giants[i], giants[i-1], s1, c[0]);
430
for ( i = 1 ; i < SMALLJAC_M/2 ; i += i ) _jac_square (s1, s1, c[0]); // power up s1 so that each giant will step m/2 giant spacings
431
_jac_set (s2, s1);
432
if ( _jac_is_identity (s2) ) { o = GS*(SMALLJAC_M/2); goto found; } // unlikely but possible and the giants won't get very far if this happens
433
_jac_invert (s2); // s2 = -s1 is used for downward steps
434
jac_parallel_mult_1 (giants+SMALLJAC_M/2, giants, &s2, c, SMALLJAC_M/2); // have m/2 up giants step down once to form m/2 down giants - this means down giants are in reverse order but we can cope
435
for ( i = 0 ; i < SMALLJAC_M/2 ; i++ ) { // gsteps array is just m/2 copies of s1 and s2, used to perform up and down steps in parallel
436
_jac_set (gsteps[i],s1);
437
_jac_set (gsteps[i+SMALLJAC_M/2],s2);
438
}
439
440
uvalue = M; // index of first up giant
441
dvalue = M-GS; // index of "last" down giant giants[SMALLJAC_M-1] - this is the one with the largest index
442
443
//printf ("%lu: Beginning giant steps GS = %ld, uvalue = %ld, dvalue = %ld, used %lu gops\n", _ff_p, GS, uvalue, dvalue, jac_gops-n);
444
445
//giant steps
446
// the code duplication below is intentional. it could be avoided, but at the cost of more conditional code inside the loops
447
o = o1 = 0;
448
for ( j = 0 ; ; j++ ) { // search until we find it - could add Min/Max check
449
if ( uvalue ) {
450
for ( i = 0 ; i < SMALLJAC_M/2 ; i++ ) { // up giants
451
//printf ("%lu: checking up giant = %lu ", _ff_p, uvalue); _jac_print(giants[i]);
452
if ( (cnt = smalljac_table_lookup (matches, giants+i)) > 0 ) {
453
//printf ("%9lu: matched %d up\n", _ff_p, cnt);
454
for ( k = 0 ; k < cnt ; k++ ) {
455
//printf ("%d\n", matches[k]);
456
if ( stash ) {
457
sign = _jac_cmp (baby_stash[matches[k]], giants[i]);
458
} else {
459
jac_exp_ui32_powers (&s1, b, matches[k], c); // reconstruct baby for comparison
460
sign = _jac_cmp (s1, giants[i]);
461
}
462
if ( sign ) {
463
//printf ("%9lu: Found a match at uvalue = %lu, k = %d, matches[k] = %u, sign = %d, %d gops\n", _ff_p, uvalue, k, matches[k], sign, jac_gops-n);
464
if ( o ) o1 = o;
465
o = ( sign > 0 ? uvalue - matches[k] : uvalue + matches[k] );
466
if ( ! o ) { o = o1; continue; } // can happen for small groups
467
if ( o < 0 ) o = -o; // can happen for small groups
468
if ( ! full ) goto found;
469
if ( o == o1 ) continue; // yes this can happen - up and down steps can overlap in odd parity case
470
if ( o1 ) { o = ( o > o1 ? o-o1 : o1-o); goto found; }
471
if ( full < 2 && j && 2*o > Max-Min ) { // make sure we are actually in the top half of the interval - we might not have started at the center
472
if ( ! dvalue ) return o; // if we already hit the other end, we're done
473
w = ui_factor (p, h, o);
474
if ( w == 1 && h[0] == 1 ) { jac_order_count++; jac_giant_gops += jac_gops-n; return o; }
475
if ( p[w-1] > o-Min && (vstep*o/p[w-1])+dvalue+baby_span < o ) {
476
//printf ("%lu: [%lu,%lu], exp %lu, q=%lu, o-dvalue-baby_span=%lu, dvalue=%lu, baby_span = %d, parity =%d\n", _ff_p, Min, Max, o, p[w-1], o-dvalue-baby_span, dvalue, baby_span, parity);
477
jac_order_count++; jac_giant_gops += jac_gops-n; return o;
478
}
479
uvalue = 0;
480
goto udone;
481
}
482
}
483
}
484
}
485
if ( uvalue ) uvalue += GS;
486
}
487
}
488
udone:
489
if ( dvalue ) {
490
for ( i = SMALLJAC_M-1 ; dvalue && i >= SMALLJAC_M/2 ; i-- ) { // down giants - process largest dvalue first
491
//printf ("%lu: checking down giant = %lu ", _ff_p, dvalue); _jac_print(giants[i]);
492
if ( (cnt = smalljac_table_lookup (matches, giants+i)) > 0 ) {
493
//printf ("%9lu: matched %d down\n", _ff_p, cnt);
494
for ( k = 0 ; k < cnt ; k++ ) {
495
if ( stash ) {
496
sign = _jac_cmp (baby_stash[matches[k]], giants[i]);
497
} else {
498
jac_exp_ui32_powers (&s1, b, matches[k], c); // reconstruct baby for comparison
499
sign = _jac_cmp (s1, giants[i]);
500
}
501
if ( sign ) {
502
//printf ("%9lu: Found a match at dvalue = %lu, k = %d, matches[k] = %u, sign = %d, %d gops\n", _ff_p, dvalue, k, matches[k], sign, jac_gops-n);
503
if ( o ) o1 = o;
504
o = ( sign > 0 ? dvalue - matches[k] : dvalue + matches[k] );
505
if ( ! o ) { o = o1; continue; } // can happen for small groups
506
if ( o < 0 ) o = -o; // can happen for small groups
507
if ( ! full ) goto found;
508
if ( o == o1 ) continue; // yes this can happen - up and down steps can overlap in odd parity case
509
if ( o1 ) { o = ( o > o1 ? o-o1 : o1-o); goto found; }
510
if ( full < 2 && 2*o < Max-Min ) {
511
if ( ! uvalue ) return o; // if we already hit the other end, we're done
512
w = ui_factor (p, h, o);
513
if ( w == 1 && h[0] == 1 ) { jac_order_count++; jac_giant_gops += jac_gops-n; return o; }
514
if ( p[w-1] > Max-o && (vstep*o/p[w-1])+o+baby_span < uvalue ) {
515
//printf ("%lu: [%lu,%lu], exp %lu, q=%lu, uvalue-o-baby_span=%lu, parity =%d\n", _ff_p, Min, Max, o, p[w-1], uvalue-o-baby_span, parity);
516
jac_order_count++; jac_giant_gops += jac_gops-n; return o;
517
}
518
dvalue = 0;
519
goto ddone;
520
}
521
}
522
}
523
}
524
if ( dvalue <= GS ) dvalue = 0; else dvalue -= GS;
525
}
526
}
527
ddone:
528
if ( uvalue > Max+baby_span ) uvalue = 0;
529
if ( dvalue+baby_span < Min ) dvalue = 0;
530
if ( ! dvalue && ! uvalue ) break;
531
if ( ! dvalue ) {
532
jac_parallel_mult_c (giants, giants, gsteps, c, SMALLJAC_M/2);
533
} else if ( ! uvalue ) {
534
jac_parallel_mult_c (giants+SMALLJAC_M/2, giants+SMALLJAC_M/2, gsteps+SMALLJAC_M/2, c, SMALLJAC_M/2);
535
} else {
536
jac_parallel_mult_c (giants, giants, gsteps, c, SMALLJAC_M);
537
}
538
}
539
jac_giant_gops += jac_gops-n;
540
//printf ("giant steps in full computation, used %lu gops\n", jac_gops-n);
541
if ( ! o ) {
542
err_printf ("%lu: No exponent found in smalljac_parallel_search, full=%d, parity=%d, baby_span = %d, GS = %ld, dvalue = %d, uvalue = %ld, Min = %lu, Max = %lu!\n",
543
_ff_p, full, parity, baby_span, GS, dvalue, uvalue, Min, Max); _curve_print(c[0]);
544
_jac_print(a[0]);
545
return 0;
546
}
547
//if ( full ) jac_full_not_found_count++;
548
//printf ("%lu: found one exponent %lu\n", _ff_p, o);
549
if ( o < 0 ) { err_printf("%lu: Negative exponent %ld found with full=%d, o1=%ld for element ", _ff_p, o, full, o1); _jac_print(a[0]); return 0; }
550
return o;
551
found:
552
if ( ! o ) { err_printf("%lu: Zero exponent found with full=%d, o1=%ld for element ", _ff_p, full, o1); _jac_print(a[0]); exit (0); }
553
if ( o < 0 ) { err_printf("%lu: Negative exponent %ld found with full=%d, o1=%ld for element ", _ff_p, o, full, o1); _jac_print(a[0]); return 0; }
554
555
//printf ("found, used %lu gops\n", jac_gops-n);
556
//if ( j > 3*baby_s ) printf ("%9u: Long search - %d baby steps, %d giant steps, W = %ld\n", _ff_p, baby_s*SMALLJAC_M, j*SMALLJAC_M, W);
557
//printf ("giant steps in order computation, used %lu gops\n", jac_gops-n);
558
jac_giant_gops += jac_gops-n;
559
n = jac_gops;
560
w = ui_factor (p, h, o);
561
/*
562
There is no reason not to do this, but we need to be sure the caller handles it - currently it is assumed that the order is returned unless the full flag is set.
563
564
if ( j && p[w-1] > o-Min && (vstep*o/p[w-1])+dvalue+baby_span < o ) { jac_order_count++; return o; }
565
if ( p[w-1] > Max-o && (vstep*o/p[w-1])+o+baby_span < uvalue ) { jac_order_count++; return o; }
566
*/
567
jac_factored_order_ui (&o, a, p, h, w, c);
568
jac_fastorder_gops += jac_gops-n;
569
//jac_exp_ui (&s1, a, o, c);
570
//if ( ! _jac_is_identity(s1) ) { err_printf ("%d: %d is not an exponent of ", _ff_p, o); _jac_print(s1); exit (0); }
571
//printf ("%u: %lu is the order of ", _ff_p, o); _jac_print(a[0]);
572
if ( o < 0 ) { err_printf("%lu: Negative exponent %ld after fastorder with full=%d, o1=%ld for element ", _ff_p, o, full, o1); _jac_print(a[0]); return 0; }
573
return o;
574
}
575
576
577
// Straight baby/giant search over values Min + km up to Max as k ranges from 0 to ceil((Max-Min)/m)
578
// Finds least two matches in interval, returns number of matches found
579
int jac_search (mpz_t e[2], jac_t a[1], unsigned m, mpz_t Min, mpz_t Max, curve_t c[1])
580
{
581
static int init;
582
static mpz_t o, o1, o2, G;
583
jac_t b[32], baby, giant;
584
jac_t s1, step, babystep;
585
unsigned long S, tabbits;
586
unsigned i, j, k, s, cnt;
587
uint32_t matches[SMALLJAC_MAX_MATCHES];
588
int sign;
589
590
if ( ! init ) { mpz_init (o); mpz_init(o1); mpz_init (o2); mpz_init(G); init = 1; }
591
mpz_sub (G, Max, Min);
592
S = mpz_get_ui (G);
593
s = (unsigned) ceil(sqrt(_ui_ceil_ratio(S, m)));
594
//out_printf ("Small search, search p = %d, m = %d, Min = %Zd, Max = %Zd, s = %d\n", _ff_p, m, Min, Max, s); _jac_print(a[0]);
595
for ( tabbits = 8 ; (1<<tabbits) < 2*s ; tabbits++ );
596
smalljac_table_init (tabbits);
597
// put identity in table
598
_jac_set_identity (step);
599
smalljac_table_insert (&step, 0);
600
// Compute babystep = first baby
601
jac_exp_ui (&babystep, a, m, c);
602
_jac_set (baby, babystep);
603
// Compute baby powers used to recompute baby matches
604
_jac_set (b[0], babystep);
605
for ( i = 1 ; i < 32 ; i++ ) _jac_square(b[i], b[i-1], c[0]);
606
// Compute first giant
607
// G = Min + (unsigned long)s*(unsigned long)m; // First giant starts one babyspan into the interval
608
mpz_set(G, Min);
609
mpz_add_ui (G, G, (unsigned long)s*(unsigned long)m);
610
jac_exp_mpz (&giant, a, G, c);
611
// Compute giant step - twice the span of the babys
612
S = 2*(unsigned long)s*(unsigned long)m; // giant step size - should fit in 64 bits
613
jac_exp_ui (&step, a, S, c); // could be clever and use last baby, but only saves a few gops - not needed here
614
615
// baby steps
616
for ( i = 1 ; ; i++ ) {
617
smalljac_table_insert (&baby, i);
618
if ( i > s ) break;
619
_jac_mult (baby, baby, babystep, c[0]);
620
}
621
622
// trim s to avoid taking extra giant steps - it may be larger than necessary
623
for (;;) {
624
mpz_set (o, G);
625
mpz_add_ui (o, o, (unsigned long)s*(S-(unsigned long)m));
626
if ( mpz_cmp (o, Max) <= 0 ) break;
627
s--;
628
}
629
630
//giant steps
631
mpz_set_ui (o, 0);
632
mpz_set_ui (o1, 0); mpz_set_ui (o2, 0);
633
for ( j = 0 ; j < s ; j++ ) {
634
if ( (cnt = smalljac_table_lookup (matches, &giant)) > 0 ) {
635
for ( k = 0 ; k < cnt ; k++ ) {
636
if ( matches[k] ) {
637
// reconstruct baby for comparison
638
jac_exp_ui32_powers (&s1, b, matches[k], c);
639
if ( (sign = _jac_cmp(s1,giant)) ) {
640
mpz_set(o, G);
641
mpz_add_ui (o, o, j*S);
642
if ( sign > 0 ) { mpz_sub_ui (o, o, (unsigned long)matches[k]*(unsigned long)m); } else { mpz_add_ui (o, o, (unsigned long)matches[k]*(unsigned long)m); }
643
} else {
644
mpz_set_ui (o, 0);
645
}
646
} else {
647
mpz_set (o, G);
648
mpz_add_ui (o, o, j*S);
649
}
650
//jac_exp_mpz (&s1, b, o, c);
651
//if ( ! _jac_is_identity (s1) ) { err_printf ("%Zd is not an exponent of ", o); _jac_print(b[0]); exit (0); }
652
// track the smallest two matches - note that we don't necessarily find them in order.
653
if ( mpz_sgn(o) && mpz_cmp(o,Min) >= 0 && mpz_cmp (o,Max) <= 0 ) { // ignore matches outside the interval - these can happen and can cause confusion when they do
654
if ( ! mpz_sgn(o1) ) {
655
mpz_set (o1, o);;
656
} else {
657
if ( mpz_cmp(o,o1) < 0 ) {
658
mpz_set (o2, o1); mpz_set (o1, o);
659
} else if ( mpz_cmp(o,o1) > 0 && (! mpz_sgn(o2) || mpz_cmp(o,o2) < 0) ) {
660
mpz_set (o2, o);
661
}
662
}
663
}
664
}
665
}
666
if ( j == s ) break;
667
_jac_mult (giant, giant, step, c[0]);
668
}
669
//out_printf ("o1=%Zd, o2=%Zd\n", o1, o2);
670
mpz_set (e[0], o1);
671
mpz_set (e[1], o2);
672
return ( mpz_sgn(o2) ? 2 : ( mpz_sgn(o1) ? 1 : 0 ));
673
}
674
675