CoCalc -- jac.c

Harvard Workshop: Distribution of modular symbols and L-values
code / alex / psage / psage / libs / smalljac / jac.c
²⁴¹⁸¹⁸ views
1
#include <stdlib.h>
2
#include <stdio.h>
3
#include <ctype.h>
4
#include <string.h>
5
#include <stdint.h>
6
#include "gmp.h"
7
#include "mpzutil.h"
8
#include "ff.h"
9
#include "ffpoly.h"
10
#include "hecurve.h"
11
#include "jac.h"
12
#include "cstd.h"
13

14
/*
15
    Copyright 2007 Andrew V. Sutherland
16

17
    This file is part of smalljac.
18

19
    smalljac is free software: you can redistribute it and/or modify
20
    it under the terms of the GNU General Public License as published by
21
    the Free Software Foundation, either version 2 of the License, or
22
    (at your option) any later version.
23

24
    smalljac is distributed in the hope that it will be useful,
25
    but WITHOUT ANY WARRANTY; without even the implied warranty of
26
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27
    GNU General Public License for more details.
28

29
    You should have received a copy of the GNU General Public License
30
    along with smalljac.  If not, see <http://www.gnu.org/licenses/>.
31
*/
32

33
/*
34
	Basic group operations for Jacobians are implemented here, plus some generic
35
	fastorder computation functions (computations of element orders from
36
	a known exponent).
37
*/	
38

39

40
#define JAC_MAX_FASTORDER_UI_W		20
41

42

43
// o1 should not overlap a2 or b2
44
void jac_mult2 (jac_t o1[1], jac_t a1[1], jac_t b1[1], jac_t o2[1], jac_t a2[1], jac_t b2[1], curve_t c[1])
45
{
46
	hecurve_ctx_t ctx[2];
47
	ff_t inverts[2];
48
	
49
	ctx[0].state = ctx[1].state = 0;
50
	if ( ! __jac_mult (o1[0], a1[0], b1[0], c[0], ctx) ) {
51
		if ( ! __jac_mult (o2[0], a2[0], b2[0], c[0], ctx+1) ) {
52
			_ff_set (inverts[0], ctx[0].invert);
53
			_ff_set (inverts[1], ctx[1].invert);
54
			ff_parallel_invert (inverts, inverts, 2);
55
			_ff_set (ctx[0].invert, inverts[0]);
56
			_ff_set (ctx[1].invert, inverts[1]);
57
			__jac_mult (o1[0], a1[0], b1[0], c[0], ctx) ;
58
			__jac_mult (o2[0], a2[0], b2[0], c[0], ctx+1) ;
59
		} else {
60
			_ff_invert (ctx[0].invert, ctx[0].invert);
61
			__jac_mult (o1[0], a1[0], b1[0], c[0], ctx);
62
		}
63
	} else {
64
		__jac_mult (o2[0], a2[0], b2[0], c[0], 0);
65
	}
66
	jac_gops += 2;
67
}
68

69

70
// o1 should not overlap a2 or b2
71
void jac_square2 (jac_t o1[1], jac_t a1[1], jac_t o2[1], jac_t a2[1], curve_t c[1])
72
{
73
	hecurve_ctx_t ctx[2];
74
	ff_t inverts[2];
75
	
76
	ctx[0].state = ctx[1].state = 0;
77
	if ( ! __jac_square (o1[0], a1[0], c[0], ctx) ) {
78
		if ( ! __jac_square (o2[0], a2[0], c[0], ctx+1) ) {
79
			_ff_set (inverts[0], ctx[0].invert);
80
			_ff_set (inverts[1], ctx[1].invert);
81
			ff_parallel_invert (inverts, inverts, 2);
82
			_ff_set (ctx[0].invert, inverts[0]);
83
			_ff_set (ctx[1].invert, inverts[1]);
84
			__jac_square (o1[0], a1[0], c[0], ctx) ;
85
			__jac_square (o2[0], a2[0], c[0], ctx+1) ;
86
		} else {
87
			_ff_invert (ctx[0].invert, ctx[0].invert);
88
			__jac_square (o1[0], a1[0], c[0], ctx);
89
		}
90
	} else {
91
		__jac_square (o2[0], a2[0], c[0], 0);
92
	}
93
	jac_gops += 2;
94
}
95

96

97
// replaces a by a^2 and b by ab - a and b should not overlap 
98
void jac_square_mult (jac_t a[1], jac_t b[1], curve_t c[1])
99
{
100
	hecurve_ctx_t ctx[2];
101
	jac_t t;
102
	ff_t inverts[2];
103
	
104
	ctx[0].state = ctx[1].state = 0;
105
	if ( ! __jac_mult (b[0], a[0], b[0], c[0], ctx) ) {
106
		if ( ! __jac_square (t, a[0], c[0], ctx+1) ) {			// can't overwrite a - needed to finish ab and square might not return for inversion
107
			_ff_set (inverts[0], ctx[0].invert);
108
			_ff_set (inverts[1], ctx[1].invert);
109
			ff_parallel_invert (inverts, inverts, 2);
110
			_ff_set (ctx[0].invert, inverts[0]);
111
			_ff_set (ctx[1].invert, inverts[1]);
112
			__jac_mult (b[0], a[0], b[0], c[0], ctx) ;
113
			__jac_square (a[0], a[0], c[0], ctx+1) ;			// but we can overwrite a here, in which case we didn't actually use t - nice
114
		} else {
115
			_ff_invert (ctx[0].invert, ctx[0].invert);
116
			__jac_mult (b[0], a[0], b[0], c[0], ctx);
117
			_jac_set (a[0], t);							// here's where we need to use t
118
		}
119
	} else {
120
		__jac_square (a[0], a[0], c[0], 0);
121
	}
122
	jac_gops += 2;
123
}
124

125

126
void jac_parallel_mult (jac_t o[], jac_t a[], jac_t b[], curve_t c[], int n)
127
{
128
	int inv[JAC_MAX_CURVES];
129
	ff_t inverts[JAC_MAX_CURVES];
130
	hecurve_ctx_t ctx[JAC_MAX_CURVES];
131
	register int i, j;
132
	
133
	j = 0;
134
	for ( i = 0 ; i < n ; i++ ) {
135
		ctx[i].state = 0;
136
		if ( ! __jac_mult (o[i], a[i], b[i], c[i], ctx+i) ) {
137
			_ff_set (inverts[j], ctx[i].invert);
138
			inv[j++] =i;
139
		}
140
	}
141
	ff_parallel_invert (inverts, inverts, j);	// we could modify this to avoid overlap if needed
142
	for ( i = 0 ; i < j ; i++ ) {
143
		_ff_set (ctx[inv[i]].invert, inverts[i]);
144
		__jac_mult (o[inv[i]], a[inv[i]], b[inv[i]], c[inv[i]], ctx+inv[i]);
145
	}
146
	jac_gops += n;
147
}
148

149

150
void jac_parallel_mult_c (jac_t o[], jac_t a[], jac_t b[], curve_t c[1], int n)
151
{
152
	int inv[JAC_MAX_CURVES];
153
	ff_t inverts[JAC_MAX_CURVES];
154
	hecurve_ctx_t ctx[JAC_MAX_CURVES];
155
	register int i, j;
156
	
157
	j = 0;
158
	for ( i = 0 ; i < n ; i++ ) {
159
		ctx[i].state = 0;
160
		if ( ! __jac_mult (o[i], a[i], b[i], c[0], ctx+i) ) {
161
			_ff_set (inverts[j], ctx[i].invert);
162
			inv[j++] =i;
163
		}
164
	}
165
	ff_parallel_invert (inverts, inverts, j);	// we could modify this to avoid overlap if needed
166
	for ( i = 0 ; i < j ; i++ ) {
167
		_ff_set (ctx[inv[i]].invert, inverts[i]);
168
		__jac_mult (o[inv[i]], a[inv[i]], b[inv[i]], c[0], ctx+inv[i]);
169
	}
170
	jac_gops += n;
171
}
172

173

174
void jac_parallel_mult_1 (jac_t o[], jac_t a[], jac_t b[1], curve_t c[1], int n)
175
{
176
	int inv[JAC_MAX_CURVES];
177
	ff_t inverts[JAC_MAX_CURVES];
178
	hecurve_ctx_t ctx[JAC_MAX_CURVES];
179
	register int i, j;
180
	
181
	j = 0;
182
	for ( i = 0 ; i < n ; i++ ) {
183
		ctx[i].state = 0;
184
		if ( ! __jac_mult (o[i], a[i], b[0], c[0], ctx+i) ) {
185
			_ff_set (inverts[j], ctx[i].invert);
186
			inv[j++] =i;
187
		}
188
	}
189
	ff_parallel_invert (inverts, inverts, j);	// we could modify this to avoid overlap if needed
190
	for ( i = 0 ; i < j ; i++ ) {
191
		_ff_set (ctx[inv[i]].invert, inverts[i]);
192
		__jac_mult (o[inv[i]], a[inv[i]], b[0], c[0], ctx+inv[i]);
193
	}
194
	jac_gops += n;
195
}
196

197

198
void jac_parallel_square (jac_t o[], jac_t a[], curve_t c[], int n)
199
{
200
	int inv[JAC_MAX_CURVES];
201
	ff_t inverts[JAC_MAX_CURVES];
202
	hecurve_ctx_t ctx[JAC_MAX_CURVES];
203
	register int i, j;
204

205
	j = 0;
206
	for ( i = 0 ; i < n ; i++ ) {
207
		ctx[i].state = 0;
208
		if ( ! __jac_square (o[i], a[i], c[i], ctx+i) ) {
209
			_ff_set (inverts[j], ctx[i].invert);
210
			inv[j++] =i;
211
		}
212
	}
213
	ff_parallel_invert (inverts, inverts, j);	// we could modify this to avoid overlap if needed
214
	for ( i = 0 ; i < j ; i++ ) {
215
		_ff_set ( ctx[inv[i]].invert, inverts[i]);
216
		__jac_square (o[inv[i]], a[inv[i]], c[inv[i]], ctx+inv[i]);
217
	}
218
	jac_gops += n;
219
}
220

221
// o cannot overlap a[]
222
void jac_exp_ui32_powers (jac_t o[1], jac_t a[], uint32_t e, curve_t c[1])
223
{
224
	register int i;
225
	
226
	if ( ! e ) { _jac_set_identity (o[0]);  return; }
227
	// could use parallel mult here but don't bother for now
228
	for ( i = 0 ; ! (e & 0x1) ; e >>= 1, i++ );
229
	_jac_set (o[0], a[i]);
230
	for ( e >>= 1, i++ ; e ; e >>= 1, i++ ) {
231
		if ( (e & 0x1) ) _jac_mult (o[0], o[0], a[i], c[0]);
232
	}
233
}
234

235
/*
236
	Generic exponentiation optimized to take advantage of a fast parallel group operation.
237
	Splits exponent and does two smaller exponentiations in parallel, then shifts (squares) and combines.
238
*/
239
void jac_exp_ui (jac_t o[1], jac_t a[1], unsigned long e, curve_t c[1])
240
{
241
	register int i, j;
242
	unsigned long e1, e2;
243
	jac_t b[2];
244

245
#if SMALLJAC_GENUS == 1
246
	hecurve_g1_exp_ui(o[0].u,o[0].v,a[0].u,a[0].v,e,c[0].f);
247
	jac_gops += (3*ui_len(e))/2;
248
	return;
249
#endif
250
		
251
	// hardwire common cases
252
	switch (e) {
253
	case 0: _jac_set_identity (o[0]);  return;
254
	case 1: _jac_set (o[0], a[0]);  return;
255
	case 2: _jac_square (o[0], a[0], c[0]);  return;
256
	case 3: _jac_square (b[0], a[0], c[0]); _jac_mult(o[0], a[0], b[0], c[0]);  return;
257
	case 4: _jac_square (b[0], a[0], c[0]); _jac_square (o[0], b[0], c[0]);  return;
258
	case 5: _jac_square (b[0], a[0], c[0]); _jac_square (b[0], b[0], c[0]);  _jac_mult(o[0], a[0], b[0], c[0]);  return;
259
	case 6: _jac_square (b[0], a[0], c[0]); _jac_square (b[1], b[0], c[0]);  _jac_mult(o[0], b[0], b[1], c[0]);  return;
260
	case 7: _jac_square (b[0], a[0], c[0]); _jac_square (b[1], b[0], c[0]);  _jac_mult(b[1], b[1], b[0], c[0]);  _jac_mult(o[0], b[1], a[0], c[0]);  return;
261
	case 8: _jac_square (b[0], a[0], c[0]); _jac_square (b[0], b[0], c[0]); _jac_square (o[0], b[0], c[0]);  return;
262
	case 9: _jac_square (b[0], a[0], c[0]); _jac_square (b[0], b[0], c[0]); _jac_square (b[0], b[0], c[0]);  _jac_mult(o[0], b[0], a[0], c[0]);  return;
263
	}
264

265
	// In theory this uses about the same number of gops as binary exponentiation (about 3n/2), but performs 2/3 of them in parallel at a 3/2 speedup, giving an effective 7n/9 serial gops
266
	j = ui_len(e)/2;
267
	e2 = e >> j;
268
	e1 = e & ((1UL<<j)-1);
269
	jac_exp2_ui (b, a, e1, e2, c);
270
	for ( i = 0 ; i < j ; i++ ) _jac_square (b[1], b[1], c[0]);
271
	_jac_mult (o[0], b[0], b[1], c[0]);
272
}
273

274
// simple simultaneous exponentiation of a common base to two exponents using a 1-bit window size
275
void jac_exp2_ui (jac_t o[2], jac_t a[1], unsigned long e1, unsigned long e2, curve_t c[1])
276
{
277
	register unsigned long m, max;
278
	jac_t b[2], b01, b10, b11;
279

280
	if ( e1 < 2 ) { if ( e1 ) { _jac_set(o[0],a[0]); } else { _jac_set_identity(o[0]); } jac_exp_ui (o+1, a, e2, c);  return; }
281
	if ( e2 < 2 ) { if ( e2 ) { _jac_set(o[1],a[0]); } else { _jac_set_identity(o[1]); }  jac_exp_ui (o, a, e1, c);  return; }
282
	max = _ui_max (e1,e2);
283
	m = 1;
284
	_jac_set (b[0], a[0]);
285
	if ( m&e2 ) {
286
		if ( m&e1) { _jac_set (b11, b[0]);  _jac_set_identity (b10);  _jac_set_identity (b01);  } else {  _jac_set (b10, b[0]);  _jac_set_identity (b11);  _jac_set_identity (b01);  }
287
	} else {
288
		if ( m&e1) { _jac_set (b01, b[0]);  _jac_set_identity (b10);  _jac_set_identity (b11);  } else {  _jac_set_identity (b10);  _jac_set_identity (b11);  _jac_set_identity (b01); }
289
	}
290
	_jac_square (b[0], b[0], c[0]);
291
	m <<= 1;
292
	max >>= 1;
293
	while ( m <= max ) {
294
		if ( m&e2 ) {
295
			if ( m&e1 ) { jac_square_mult (b, &b11, c);  } else {  jac_square_mult (b, &b10, c); }
296
		} else {
297
			if ( m&e1 ) { jac_square_mult (b, &b01, c); } else { _jac_square (b[0], b[0], c[0]); }
298
		}
299
		m <<= 1;
300
	}
301
	if ( m&e2 ) {
302
		if ( m&e1 ) { _jac_mult (b11, b11, b[0], c[0]); } else { _jac_mult (b10, b10, b[0], c[0]); }
303
	} else {
304
		if ( m&e1 ) { _jac_mult (b01, b01, b[0], c[0]); } // else should be impossible
305
	}
306
	jac_mult2 (o, &b01, &b11, o+1, &b10, &b11, c);
307
}
308

309
/*
310
	This function isn't currently used but it could be.  It assumes that b[] contains at least 3 powers of a, indexed by exponent.
311
	This is useful if you have already computed the powers for some other reason, e.g. baby-steps.
312
*/
313
void jac_exp2_powers_ui (jac_t o[2], jac_t a[1], unsigned long e1, unsigned long e2, jac_t b[], unsigned n, int parity, curve_t c[1])
314
{
315
	register unsigned long mask, j1, j2;
316
	register int i, j, k;
317
	
318
	// we assume b[0] holds the identity and that powers > 2.
319
	// we make no attempt to combine operations other than squarings, which should be most of them
320
	
321
	k = ui_lg_floor(n);
322
	mask = (1UL<<k)-1;	
323
	i = _ui_max (ui_len(e1), ui_len(e2));
324
	if ( i > k ) {
325
		j1 = (e1&(mask<<(i-k)))>>(i-k);  j2 = (e2&(mask<<(i-k)))>>(i-k);
326
	} else {
327
		j1 = e1;  j2 = e2;
328
	}
329
	if ( parity && ! (j1&0x1) ) {											// only use odd values if parity is set
330
		_jac_set (o[0], b[j1-1]);
331
		_jac_mult (o[0], o[0], a[0], c[0]);
332
	} else {
333
		_jac_set (o[0], b[j1]);
334
	}
335
	if ( parity && ! (j2&0x1) ) {											// only use odd values if parity is set
336
		_jac_set (o[1], b[j2-1]);
337
		_jac_mult (o[1], o[1], a[0], c[0]);
338
	} else {
339
		_jac_set (o[1], b[j2]);
340
	}
341
	i -= k;
342
	while ( i > k ) {
343
		for ( j = 0 ; j < k ; j++ ) jac_square2 (o, o, o+1, o+1, c);				// this is where the time is spent
344
		j1 = (e1&(mask<<(i-k)))>>(i-k);
345
		if ( j1 ) {
346
			if ( parity && ! (j1&0x1) ) {									// only use odd values if parity is set
347
				_jac_mult (o[0], o[0], b[j1-1], c[0]);
348
				_jac_mult (o[0], o[0], a[0], c[0]);
349
			} else {
350
				_jac_mult (o[0], o[0], b[j1], c[0]);
351
			}
352
		}
353
		j2 = (e2&(mask<<(i-k)))>>(i-k);
354
		if ( j2 ) {
355
			if ( parity && ! (j2&0x1) ) {									// only use odd values if parity is set
356
				_jac_mult (o[1], o[1], b[j2-1], c[0]);
357
				_jac_mult (o[1], o[1], a[0], c[0]);
358
			} else {
359
				_jac_mult (o[1], o[1], b[j2], c[0]);
360
			}
361
		}
362
		i -= k;
363
	}
364
	for ( j = 0 ; j < i ; j++ ) jac_square2 (o, o, o+1, o+1, c);					// and here
365
	j1 = e1&(mask>>(k-i));
366
	if ( j1 ) {
367
		if ( parity && ! (j1&0x1) ) {										// only use odd values if parity is set
368
			_jac_mult (o[0], o[0], b[j1-1], c[0]);
369
			_jac_mult (o[0], o[0], a[0], c[0]);
370
		} else {
371
			_jac_mult (o[0], o[0], b[j1], c[0]);
372
		}
373
	}
374
	j2 = e2&(mask>>(k-i));
375
	if ( j2 ) {
376
		if ( parity && ! (j2&0x1) ) {										// only use odd values if parity is set
377
			_jac_mult (o[1], o[1], b[j1-1], c[0]);
378
			_jac_mult (o[1], o[1], a[0], c[0]);
379
		} else {
380
			_jac_mult (o[1], o[1], b[j2], c[0]);
381
		}
382
	}
383
}
384

385

386
// quick and dirty binary exponentiation - don't worry about efficiency, this is only used for validation
387
void jac_exp_mpz (jac_t o[1], jac_t a[1], mpz_t e, curve_t c[1])
388
{
389
	register int i, t;
390
	jac_t b;
391
	
392
	if ( ! mpz_sgn(e) ) { _jac_set_identity (o[0]);  return; }
393

394
	_jac_set (b, a[0]);			// copy a to handle overlap
395
	_jac_set (o[0], a[0]) ;
396
	i = mpz_sizeinbase(e,2) - 1;
397
	while ( i > 0 ) {
398
		_jac_square (o[0], o[0], c[0]);
399
		i--;
400
		if ( mpz_tstbit(e,i) ) _jac_mult (o[0], o[0], b, c[0]);
401
	}
402
}
403

404

405
int jac_verify_group_exponent (mpz_t e, curve_t c[1])
406
{
407
	jac_t a, b;
408
	int i;
409

410
	for ( i = 0 ; i < JAC_CONFIDENCE_LEVEL ; i++ ) {
411
		_jac_random (a, c[0]);
412
		jac_exp_mpz (&b, &a, e, c);
413
		if ( ! _jac_is_identity (b) ) return 0;
414
	}
415
	return 1;
416
}
417

418

419
/*
420
	Computess the order of a given that a^{p^h} = 1
421
*/
422
unsigned long jac_pp_order_ui (jac_t a[1], unsigned long p, unsigned long h, curve_t c[1])
423
{
424
	jac_t x;
425
	unsigned long o;
426
	int i;
427
	
428
	o = 1;
429
	if ( _jac_is_identity (a[0]) ) return o;
430
	_jac_set (x, a[0]);
431
	for ( i = 1 ; i < h ; i++ ) {
432
		o *= p;
433
		jac_exp_ui (&x, &x, p, c);
434
		if ( _jac_is_identity (x) ) break;
435
	}
436
	if ( ! _jac_is_identity (x) ) o *= p;
437
	return o;
438
}
439

440

441
/*
442
	Recursive fastorder algorithm optimized for small exponents (takes advantage of fast dual exponentiation).
443
	This algorithm is, to my knowledge, new and not written up anywhere - maybe it should be, it's quite fast (AVS).
444
*/
445
int jac_fastorder_ui (unsigned long *po, jac_t a[1], unsigned long e, curve_t c[1])
446
{
447
	jac_t t[JAC_MAX_FASTORDER_UI_W];
448
	unsigned long q[JAC_MAX_FASTORDER_UI_W];
449
	unsigned long h[JAC_MAX_FASTORDER_UI_W];
450
	unsigned long o, qp, m, n;
451
	register int i, j, k, w;
452

453
	if ( _jac_is_identity (a[0]) ) { *po = 1;  return 1; }
454
	if ( ! e ) { err_printf ("%7u: zero exponent for non-identity element in fastorder!\n", _ff_p);  exit (0); }
455
	w = ui_factor (q, h, e);
456
	if ( w > JAC_MAX_FASTORDER_UI_W ) { err_printf ("w=%d too large for jac_fastorder_ui, exceeded %d\n", w, JAC_MAX_FASTORDER_UI_W);  exit (0); }
457
	return jac_factored_order_ui (po, a, q, h, w, c);
458

459
}
460

461
int jac_factored_order_ui (unsigned long *po, jac_t a[1], unsigned long p[], unsigned long h[], int w, curve_t c[1])
462
{
463
	jac_t t[JAC_MAX_FASTORDER_UI_W];
464
	register unsigned long m, n;
465
	register int i, j, k;
466
	
467
	if ( w == 1 ) {
468
		n = p[0];
469
		_jac_set (t[0], a[0]);
470
		for ( i = 1 ; i < h[0] ; i++ ) {
471
			jac_exp_ui (t, t, p[0], c);
472
			if ( _jac_is_identity (t[0]) ) break;
473
			n *= p[0];
474
		}
475
		*po = n;
476
		return 1;
477
	}
478
	k = w-1;	// index of biggest prime factor
479
	for ( i = 1, n = p[k] ; i < h[k] ; i++ ) n *= p[k];
480
	m = 1;
481
	for ( i = 0 ; i < k ; i++ )
482
		for ( j = 1, m*=p[i] ; j < h[i] ; j++ ) m *= p[i];
483
		
484
	jac_exp2_ui (t, a, m, n, c); 
485
	
486
	if ( _jac_is_identity(t[0]) ) return jac_factored_order_ui (po, a, p, h, k, c);	// don't need biggest prime power at all
487
	for ( i = 1 ; i < h[k] ; i++ ) {
488
		jac_exp_ui (t, t, p[k], c);
489
		if ( _jac_is_identity(t[0]) ) break;
490
	}
491
	if ( i < h[k] ) {												// don't need all the powers of biggest prime factor (rare)
492
		_jac_set (t[1], a[0]);									// adjust t1 to include only the powers required
493
		for ( j = 0, n=1 ; j < i ; j++ ) { jac_exp_ui (t+1, t+1, p[k], c);  n*= p[k]; }
494
	}
495
	if ( _jac_is_identity (t[1]) ) { *po = n;  return 1; }
496
	if ( ! jac_factored_order_ui (po, t+1, p, h, k, c) ) return 0;
497
	*po *= n;
498
	return 1;
499
}
500

501

502
/*
503
    This function is not used by smalljac.
504

505
    An implementation of Algorithm 9.1 in [SutherlandThesis].  It is slightly out-of-date as it doesn't include
506
    the refinements of Algorithm 3 in [Sutherland2007].  These don't impact performance significantly,
507
    but make the algorithm simpler - should update eventually.
508

509
    For now don't try to save space - the space required is roughly L/log(L) which isn't bad compared to the baby/giant tables
510
*/
511
unsigned long jac_fastorder_powersmooth (mpz_t o, jac_t a[1], unsigned long L, curve_t c[1])
512
{
513
	static mpz_t e;
514
	static int init;
515
	prime_enum_ctx_t *ctx;
516
	jac_t *b, temp;
517
	unsigned *pp;
518
	unsigned q[JAC_MAX_FACTORS];
519
	unsigned p[JAC_MAX_FACTORS];
520
	unsigned h[JAC_MAX_FACTORS];
521
	unsigned i, j, k, t, w, low, mid, high;
522
	unsigned long size;
523
	int sts;
524
	
525
	if ( ! init ) { mpz_init(e);  init = 1; }
526
	
527
	mpz_set_ui(o,1);
528
	if ( _jac_is_identity (a[0]) ) return 1;
529
		
530
	ctx = fast_prime_enum_start (0, L, 1);
531
	w = (unsigned) ui_pi_bound(L) + 1;
532
	b = mem_alloc (w*sizeof(*b));
533
	pp = mem_alloc (w*sizeof(*pp));
534
	size = (unsigned long)w*(sizeof(*b)+sizeof(*pp));
535
	_jac_set (b[1], a[0]);
536
	pp[0] = 1;
537
	for ( i = 1 ; i < w-1 ; i++ ) {			// w is larger than needed, but we should never reach it (assuming |a| is really L-powersmooth)
538
		if ( _jac_is_identity (b[i]) ) break;
539
		pp[i] = fast_prime_enum (ctx);
540
		if ( pp[i] > L ) break;
541
		jac_exp_ui (b+i+1, b+i, pp[i], c);
542
	}
543
	fast_prime_enum_end (ctx);
544
	if ( ! _jac_is_identity (b[i]) ) { sts = 0; goto cleanup; }
545
	sts = 1;
546
	t = i-1;
547
	w = 1;
548
	q[0] = pp[t];
549
	mpz_mul_ui (o, o, pp[t]);
550
	for (;;) {
551
		if ( t == 1 ) break;
552
		low = 1;  high = t-1;
553
		while ( high > low ) {
554
			mid = (high + low + 1)/2;
555
			jac_exp_mpz (&temp, b+mid, o, c);
556
			if ( _jac_is_identity (temp) ) {
557
				high = mid-1;
558
			} else {
559
				low = mid;
560
			}
561
		}
562
		t = low;
563
		jac_exp_mpz (&temp, b+t, o, c);
564
		if ( _jac_is_identity(temp) ) { if ( t != 1 ) printf ("breaking with t = %d\n", t);  break; }
565
		if ( w >= JAC_MAX_FACTORS ) { err_printf ("Excceded JAC_MAX_FACTORS in jac_fastorder_powersmooth!\n");  exit (0); }
566
		mpz_mul_ui (o, o, pp[t]);
567
		q[w++] = pp[t];
568
	}
569
	
570
	// We now have a reasonably small factored exponent in q[0]...q[w] - do a standard classical order algorithm to finish
571
	for ( i = 0 ; i < w ; i++ ) {
572
		p[i] = ui_pp_base (q[i]);
573
		for ( h[i] = 1, t = p[i] ; t < q[i] ; h[i]++, t *= p[i] );
574
	}
575
	jac_exp_mpz (b, a, o, c);
576
	if ( ! _jac_is_identity (b[0]) ) { err_printf ("Exponent validation failed in jac_fastorder_powersmooth\n");  exit (0); }
577
	
578
	for ( i = 0 ; i < w ; i++ ) {
579
		mpz_divexact_ui (e, o, q[i]);
580
		jac_exp_mpz (b+i, a, e, c);
581
	}
582
	mpz_set_ui (o, 1);
583
	for ( i = 0 ; i < w ; i++ ) {
584
		for ( j = 0 ; j < h[i]-1 ; j++ ) {
585
			if ( _jac_is_identity (b[i]) ) break;
586
			jac_exp_ui (b+i, b+i, p[i], c);
587
		}
588
		if ( ! _jac_is_identity (b[i]) ) j++;
589
		for ( k = 0 ; k < j ; k++ ) mpz_mul_ui (o, o, p[i]);
590
	}
591
cleanup:
592
	mem_free (b);
593
	mem_free (pp);
594
	return ( sts ? size : 0 );
595
}
596

597
Product

Resources

Company