Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
241818 views
1
#include <stdio.h>
2
#include <stdlib.h>
3
#include <memory.h>
4
#include "ff.h" // only used by slowcount
5
#include "cstd.h"
6
#include "pointcount.h"
7
8
/*
9
Copyright 2007-2008 Andrew V. Sutherland
10
11
This file is part of smalljac.
12
13
smalljac is free software: you can redistribute it and/or modify
14
it under the terms of the GNU General Public License as published by
15
the Free Software Foundation, either version 2 of the License, or
16
(at your option) any later version.
17
18
smalljac is distributed in the hope that it will be useful,
19
but WITHOUT ANY WARRANTY; without even the implied warranty of
20
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
GNU General Public License for more details.
22
23
You should have received a copy of the GNU General Public License
24
along with smalljac. If not, see <http://www.gnu.org/licenses/>.
25
*/
26
27
/*
28
Fast pointcounting using finite differences as described in [KedlayaSutherland2007].
29
Hardwired versions for each genus are provided for optimal performance.
30
Characteristic 2 is not supported (for genus 1 p=2 is handled in smalljac.c).
31
*/
32
33
34
/* We use the triangle T(j,k) are from OEIS A019538 - and add row 0 and col 0 which are just 1 0 0 0 ...
35
Note that the square tables displayed by OEIS transpose the entries
36
37
T(j,k) = k!S(j,k) where S(j,k) is a Stirling number of the second kind
38
*/
39
40
static unsigned long T[10][10] = { {1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
41
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
42
{0, 1, 2, 0, 0, 0, 0, 0, 0, 0},
43
{0, 1, 6, 6, 0, 0, 0, 0, 0, 0},
44
{0, 1, 14, 36, 24, 0, 0, 0, 0, 0},
45
{0, 1, 30, 150, 240, 120, 0, 0, 0, 0},
46
{0, 1, 62, 540, 1560, 1800, 720, 0, 0, 0},
47
{0, 1, 126, 1806, 8400, 16800, 15120, 5040, 0, 0},
48
{0, 1, 254, 5796, 40824, 126000, 191520, 141120, 40320, 0},
49
{0, 1, 510, 18150, 186480, 834120, 1905120, 2328480, 1451520, 362880}};
50
51
static unsigned long *map;
52
53
static unsigned long tab[64] = { 0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
54
0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000,
55
0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, 0x400000, 0x800000,
56
0x1000000, 0x2000000, 0x4000000, 0x8000000, 0x10000000, 0x20000000, 0x40000000, 0x80000000,
57
0x100000000, 0x200000000, 0x400000000, 0x800000000, 0x1000000000, 0x2000000000, 0x4000000000, 0x8000000000,
58
0x10000000000, 0x20000000000, 0x40000000000, 0x80000000000, 0x100000000000, 0x200000000000, 0x400000000000, 0x800000000000,
59
0x1000000000000, 0x2000000000000, 0x4000000000000, 0x8000000000000, 0x10000000000000, 0x20000000000000, 0x40000000000000, 0x80000000000000,
60
0x100000000000000, 0x200000000000000, 0x400000000000000, 0x800000000000000, 0x1000000000000000, 0x2000000000000000, 0x4000000000000000, 0x8000000000000000 };
61
62
#define _advance_3t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p;
63
#define _advance_4t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p;
64
#define _advance_5t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p; t4 -= t5; if ( t4 < 0 ) t4 += p;
65
#define _advance_6t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p; t4 -= t5; if ( t4 < 0 ) t4 += p; t5 -= t6; if ( t5 < 0 ) t5 += p;
66
#define _advance_7t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p; t4 -= t5; if ( t4 < 0 ) t4 += p; \
67
t5 -= t6; if ( t5 < 0 ) t5 += p; t6 -= t7; if ( t6 < 0 ) t6 += p;
68
#define _advance_8t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p; t4 -= t5; if ( t4 < 0 ) t4 += p; \
69
t5 -= t6; if ( t5 < 0 ) t5 += p; t6 -= t7; if ( t6 < 0 ) t6 += p; t7 -= t8; if ( t7 < 0 ) t7 += p;
70
#define _advance_9t_seq() t0 -= t1; if ( t0 < 0 ) t0 += p; t1 -= t2; if ( t1 < 0 ) t1 += p; t2 -= t3; if ( t2 < 0 ) t2 += p; t3 -= t4; if ( t3 < 0 ) t3 += p; t4 -= t5; if ( t4 < 0 ) t4 += p; \
71
t5 -= t6; if ( t5 < 0 ) t5 += p; t6 -= t7; if ( t6 < 0 ) t6 += p; t7 -= t8; if ( t7 < 0 ) t7 += p; t8 -= t9; if ( t8 < 0 ) t8 += p;
72
73
void pointcount_init (unsigned maxp)
74
{
75
map = mem_alloc (maxp/8+64); // budget extra space for wrapping
76
}
77
78
/*
79
As described in KedlayaSutherland2007, we compute
80
D[k] = (-1)^k\(Delta^k f)(0)
81
where Delta is the difference operator: (Delta f)(x) = f(x+1) - f(x)
82
*/
83
void pointcount_precompute (mpz_t D[], mpz_t f[], int degree)
84
{
85
static mpz_t x;
86
static int init;
87
int j, k;
88
89
if ( ! init ) { mpz_init (x); init = 1; }
90
for ( k= 0 ; k <= degree ; k++ ) {
91
mpz_set_ui (D[k], 0);
92
for ( j = 0 ; j <= degree ; j++ ) {
93
if ( mpz_sgn (f[j]) ) {
94
mpz_mul_ui (x, f[j], T[j][k]);
95
mpz_add (D[k], D[k], x);
96
}
97
}
98
if ( k&1 ) mpz_neg (D[k], D[k]);
99
}
100
}
101
102
103
void pointcount_precompute_long (long D[], long f[], int degree)
104
{
105
int j, k;
106
107
for ( k= 0 ; k <= degree ; k++ ) {
108
D[k] = 0;
109
for ( j = 0 ; j <= degree ; j++ ) D[k] += f[j] * T[j][k];
110
if ( k&1 ) D[k] = -D[k];
111
}
112
}
113
114
115
// Creates a bitmap of non-zero quadratic residues in F_p
116
void pointcount_map_residues (unsigned p)
117
{
118
register signed t0, t1;
119
unsigned long x;
120
121
memset (map, 0, p/8+9); // be sure to clear out 64 bits past the end
122
t1 = p;
123
x = (unsigned long)((p-1)/2);
124
x *= x;
125
t0 = (signed)(x%(unsigned long)p); // the first square we check is [(p-1)/2]^2 - need to compute in long to handle overflow
126
// work down toward zero but don't include zero since its handled explicitly
127
while ( t0 ) {
128
map[t0>>6] |= tab[t0&0x3F];
129
t1 -= 2;
130
t0 -= t1; if ( t0 < 0 ) t0 += p;
131
};
132
}
133
134
135
// Creates a bitmap of non-zero quadratic residues in F_p which are <= (p-1)/2 (as integers mod p)
136
void pointcount_map_small_residues (unsigned p)
137
{
138
register signed c, t0, t1;
139
unsigned long x;
140
141
memset (map, 0, p/16+9); // be sure to clear out 64 bits past the end
142
143
t1 = p;
144
x = (unsigned long)((p-1)/2);
145
x *= x;
146
t0 = (signed)(x%(unsigned long)p); // the first square we check is [(p-1)/2]^2 - need to compute in long to handle overflow
147
// work down toward zero but don't include zero since its handled explicitly
148
c = p>>1;
149
while ( t0 ) {
150
if ( t0 <= c ) map[t0>>6] |= tab[t0&0x3F];
151
t1 -= 2;
152
t0 -= t1; if ( t0 < 0 ) t0 += p;
153
};
154
}
155
156
157
// Create a bitmap of non-zero quadratic residues in F_p^2 represented as az+b F[z]/(z^2-s) where s is the least
158
// non-residue mod p. Note that z is not necessarily a primitive element of F_p^2 and we don't need it to be.
159
// The bit p*a+b is set to 1 if az+b is a residue. The value of s is returned.
160
161
unsigned pointcount_map_residues_d2 (unsigned p)
162
{
163
register signed i, k, s, t00, t01, t10, t11;
164
unsigned long x;
165
166
memset (map, 0, (p*p)/8+1);
167
168
// This program is not as efficient as it could be, it maps every residue twice, but
169
// efficiency is not so critical in extension fields and simplicity is a virtue
170
// Map residues mod p to compute least non-residue s - this is overkill of course.
171
pointcount_map_residues (p);
172
for ( s = 2 ; (map[s>>6] & tab[s&0x3f]) ; s++ );
173
174
// For simplicity, we use addition rather than subtraction
175
for ( k = 1 ; k <= (p-1)/2 ; k++ ) {
176
x = (unsigned long)k; x *= k; x *= s; x %= p; // x = (kz)^2 = k^2s
177
t00 = (signed)x; t01 = 0; // t0 = 0z + k^2s
178
t10 = 1; t11 = k+k; if ( t11 >= p ) t11 -= p; // t1 = 2kz + 1
179
do {
180
i = p*t01+t00;
181
map[i>>6] |= tab[i&0x3F];
182
t00 += t10; if ( t00 >= p ) t00 -= p;
183
t01 += t11; if ( t01 >= p ) t01 -= p;
184
t10 += 2; if ( t10 >= p ) t10 -= p;
185
} while ( t01 );
186
}
187
map[0] &= ~(1UL); // clear zero-bit, we only want non-zero residues
188
return s;
189
}
190
191
#define POINTCOUNT_MAX_D3_PRIME 47
192
static int d3stab[POINTCOUNT_MAX_D3_PRIME+1] = { 0,0,0,1,0,2,0,2,0,0,0,3,0,1,0,0,0,2,0,4,0,0,0,4,0,0,0,0,0,1,0,1,0,0,0,0,0,2,0,0,0,1,0,2,0,0,0,1};
193
194
// Create a bitmap of non-zero quadratic residues in F_p^3 represented as az^2+bz+c in F[z]/(z^3-z-s) where s the
195
// the least s for which z^3-z-s is irreducible mod p. Note that z is not necessarily a primitive element of F_p^3
196
// and we don't need it to be. The bit p^2*a+pb+c is set to 1 if az^2+bz+c is a residue.
197
// The value of s is returned - currently hardwired via tablelookup for odd primes up to 23
198
unsigned pointcount_map_residues_d3 (unsigned p)
199
{
200
register signed i, j, a, b, s, t00, t01, t02, t10, t11, t12;
201
202
if ( p > POINTCOUNT_MAX_D3_PRIME || ! (s=d3stab[p]) ) { err_printf ("invalid prime %u specified in pointcount_map_residues_d3\n", p); exit (0); }
203
memset (map, 0, (p*p)/8+1);
204
205
// For simplicity, we use addition rather than subtraction, assume p is small enough so that overflow is not a worry
206
for ( a = 0 ; a <= (p-1)/2 ; a++ ) { // only need to square half the elements to get all the residues
207
for ( b = 0 ; b < p ; b++ ) {
208
i=2*a*b; j=a*a;
209
t00 = (i*s)%p; t01 = (j*s+i)%p; t02 = (j+b*b)%p; // t0 = (az^2+bz)^2 = (a^2+b^2)z^2 + (a^2s+2ab)z + 2abs (applying z^3=z+s)
210
t10 = 1; t11 = (b+b)%p; t12 = (a+a)%p; // t1 = 2(az^2+bz)+1 = 2az^2 + 2bz + 1
211
for ( j = 0 ; j < p ; j++ ) {
212
i = p*p*t02+p*t01+t00;
213
map[i>>6] |= tab[i&0x3F];
214
t00 += t10; if ( t00 >= p ) t00 -= p;
215
t01 += t11; if ( t01 >= p ) t01 -= p;
216
t02 += t12; if ( t02 >= p ) t02 -= p;
217
t10 += 2; if ( t10 >= p ) t10 -= p;
218
}
219
}
220
}
221
map[0] &= ~(1UL); // clear zero-bit, we only want non-zero residues
222
return s;
223
}
224
225
226
// Creates a bitmap of non-zero cubic residues in F_p
227
void pointcount_map_cubic_residues (unsigned p)
228
{
229
register signed t0, t1, t2, t3;
230
231
memset (map, 0, p/8+1);
232
// enumerate non-zero values of f(x) = x^3 via finite differences starting at f(1)
233
t0 = 1; // f(1)
234
t1 = 7%p; if ( t1) t1 = p-t1; // (-1)^1 (Delta^1 f)(1)
235
t2 = 12%p; // (-1)^2 (Delta^2 f)(1)
236
t3 = 6%p; if ( t3 ) t3 = p-t3; // (-1)^3 (Delta^3 f)(1)
237
238
while ( t0 ) {
239
map[t0>>6] |= tab[t0&0x3F];
240
_advance_3t_seq();
241
};
242
}
243
244
// counts points on Picard curves y^3 = f(x) with f(x) of degree 4
245
unsigned pointcount_pd4 (unsigned long D[5], unsigned p)
246
{
247
register signed i, t0, t1, t2, t3, t4, m, n;
248
249
pointcount_map_cubic_residues (p);
250
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4];
251
m = n = 0;
252
for ( i = 0 ; i < p ; i++ ) {
253
if ( ! t0 ) m++;
254
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
255
_advance_4t_seq();
256
}
257
return m+((p%3)==1?3:1)*n+1; // add point at infinity
258
}
259
260
261
262
unsigned pointcount_g1 (unsigned long D[4], unsigned p)
263
{
264
register signed i, t0, t1, t2, t3, t, s0, m, n;
265
266
pointcount_map_residues (p);
267
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3];
268
m = n = 0;
269
for ( i = 0 ; i < p ; i++ ) {
270
if ( ! t0 ) m++;
271
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
272
_advance_3t_seq();
273
}
274
return m+2*n+1; // add point at infinity
275
}
276
277
278
unsigned pointcount_g2 (unsigned long D[6], unsigned p)
279
{
280
register signed i, t0, t1, t2, t3, t4, t5, m, n;
281
282
pointcount_map_residues (p);
283
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5];
284
m = n = 0;
285
for ( i = 0 ; i < p ; i++ ) {
286
if ( ! t0 ) m++;
287
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
288
_advance_5t_seq();
289
}
290
return m+2*n+1; // add point at infinity
291
}
292
293
unsigned pointcount_g2d6 (unsigned long D[7], unsigned p, unsigned long f6)
294
{
295
register signed i, t0, t1, t2, t3, t4, t5, t6, m, n;
296
297
pointcount_map_residues (p);
298
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6];
299
m = 1 + ((map[f6>>6] & tab[f6&0x3f])?1:-1); // 2 or 0 points at infinity depending on whether leading coeff is square or not.
300
n = 0;
301
for ( i = 0 ; i < p ; i++ ) {
302
if ( ! t0 ) m++;
303
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
304
_advance_6t_seq();
305
}
306
return m+2*n;
307
}
308
309
unsigned pointcount_g3 (unsigned long D[8], unsigned p)
310
{
311
register signed i, t0, t1, t2, t3, t4, t5, t6, t7, m, n;
312
unsigned long x;
313
314
pointcount_map_residues (p);
315
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6]; t7 = (signed) D[7];
316
m = n = 0;
317
for ( i = 0 ; i < p ; i++ ) {
318
if ( ! t0 ) m++;
319
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
320
_advance_7t_seq();
321
}
322
return m+2*n+1; // add point at infinity
323
}
324
325
unsigned pointcount_g3d8 (unsigned long D[9], unsigned p, unsigned long f8)
326
{
327
register signed i, t0, t1, t2, t3, t4, t5, t6, t7, t8, m, n;
328
unsigned long x;
329
330
pointcount_map_residues (p);
331
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6]; t7 = (signed) D[7]; t8 = (signed) D[8];
332
m = 1 + ((map[f8>>6] & tab[f8&0x3F])?1:-1); // 2 or 0 points at infinity depending on whether leading coeff is square or not.
333
n = 0;
334
for ( i = 0 ; i < p ; i++ ) {
335
if ( ! t0 ) m++;
336
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
337
_advance_8t_seq();
338
}
339
return m+2*n;
340
}
341
342
unsigned pointcount_g4 (unsigned long D[10], unsigned p)
343
{
344
register signed i, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, m, n;
345
unsigned long x;
346
347
pointcount_map_residues(p);
348
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5];
349
t6 = (signed) D[6]; t7 = (signed) D[7]; t8 = (signed) D[8]; t9 = (signed) D[9];
350
m = n = 0;
351
for ( i = 0 ; i < p ; i++ ) {
352
if ( ! t0 ) m++;
353
if ( (map[t0>>6] & tab[t0&0x3F]) ) n++;
354
_advance_9t_seq();
355
}
356
return m+2*n+1; // add point at infinity
357
}
358
359
/*
360
The "big" versions below are intended for use with "big" primes. They use a "small" residue
361
map (half the size) that may better fit in cache.
362
*/
363
364
unsigned pointcount_big_g1 (unsigned long D[4], unsigned p)
365
{
366
register signed i, j, k, t0, t1, t2, t3, t, c, m, n;
367
368
pointcount_map_small_residues (p);
369
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3];
370
371
m = n = 0;
372
c = p>>1;
373
if ( (p&0x3) == 1 ) { // -1 a quadratic residue mod p?
374
for ( i = 0 ; i < p ; i++ ) {
375
if ( ! t0 ) m++;
376
j = ( t0>c ? p-t0 : t0 );
377
if ( (map[j>>6] & tab[j&0x3F]) ) n++;
378
_advance_3t_seq();
379
}
380
} else { // this case is unfortunately nearly twice as slow - the conditional code kills it
381
for ( i = 0 ; i < p ; i++ ) {
382
if ( ! t0 ) m++;
383
k = (t0>c);
384
j = ( k ? p-t0 : t0 );
385
n += ( (map[j>>6] & tab[j&0x3F]) ? !k : k );
386
_advance_3t_seq();
387
}
388
}
389
return m+2*n+1; // add point at infinity
390
}
391
392
unsigned pointcount_big_g2 (unsigned long D[6], unsigned p)
393
{
394
register signed i, j, k, t0, t1, t2, t3, t4, t5, t, m, n, c;
395
396
pointcount_map_small_residues (p);
397
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5];
398
399
m = n = 0;
400
c = p>>1;
401
402
if ( (p&0x3) == 1 ) { // -1 a quadratic residue mod p?
403
for ( i = 0 ; i < p ; i++ ) {
404
if ( ! t0 ) m++;
405
j = ( t0>c ? p-t0 : t0 );
406
if ( (map[j>>6] & tab[j&0x3F]) ) n++;
407
_advance_5t_seq();
408
}
409
} else { // this case is unfortunately nearly twice as slow - the conditional code kills it
410
for ( i = 0 ; i < p ; i++ ) {
411
if ( ! t0 ) m++;
412
k = (t0>c);
413
j = ( k ? p-t0 : t0 );
414
n += ( (map[j>>6] & tab[j&0x3F]) ? !k : k );
415
_advance_5t_seq();
416
}
417
}
418
return m+2*n+1; // add point at infinity
419
}
420
421
unsigned pointcount_big_g2d6 (unsigned long D[7], unsigned p, unsigned long f6)
422
{
423
register signed i, j, k, t0, t1, t2, t3, t4, t5, t6, t, m, n, c;
424
425
pointcount_map_small_residues (p);
426
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6];
427
428
m = 1 + ((map[f6>>6] & tab[f6&0x3f])?1:-1); // 2 or 0 points at infinity depending on whether leading coeff is square or not.
429
n = 0;
430
c = p>>1;
431
432
if ( (p&0x3) == 1 ) { // -1 a quadratic residue mod p?
433
for ( i = 0 ; i < p ; i++ ) {
434
if ( ! t0 ) m++;
435
j = ( t0>c ? p-t0 : t0 );
436
if ( (map[j>>6] & tab[j&0x3F]) ) n++;
437
_advance_6t_seq();
438
}
439
} else { // this case is unfortunately nearly twice as slow - the conditional code kills it
440
for ( i = 0 ; i < p ; i++ ) {
441
if ( ! t0 ) m++;
442
k = (t0>c);
443
j = ( k ? p-t0 : t0 );
444
n += ( (map[j>>6] & tab[j&0x3F]) ? !k : k );
445
_advance_6t_seq();
446
}
447
}
448
return m+2*n;
449
}
450
451
unsigned pointcount_big_g3 (unsigned long D[8], unsigned p)
452
{
453
register signed i, j, k, t0, t1, t2, t3, t4, t5, t6, t7, m, n, c;
454
unsigned long x;
455
456
pointcount_map_small_residues (p);
457
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6]; t7 = (signed) D[7];
458
459
c = p>>1;
460
m = n = 0;
461
if ( (p&0x3) == 1 ) { // -1 a quadratic residue mod p?
462
for ( i = 0 ; i < p ; i++ ) {
463
if ( ! t0 ) m++;
464
j = ( t0>c ? p-t0 : t0 );
465
if ( (map[j>>6] & tab[j&0x3F]) ) n++;
466
_advance_7t_seq();
467
}
468
} else { // this case is unfortunately nearly twice as slow - the conditional code kills it
469
for ( i = 0 ; i < p ; i++ ) {
470
if ( ! t0 ) m++;
471
k = (t0>c);
472
j = ( k ? p-t0 : t0 );
473
n += ( (map[j>>6] & tab[j&0x3F]) ? !k : k );
474
_advance_7t_seq();
475
}
476
}
477
return m+2*n+1; // add point at infinity
478
}
479
480
unsigned pointcount_big_g4 (unsigned long D[10], unsigned p)
481
{
482
register signed i, j, k, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, m, n, c;
483
unsigned long x;
484
485
pointcount_map_small_residues (p);
486
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5];
487
t6 = (signed) D[6]; t7 = (signed) D[7]; t8 = (signed) D[8]; t9 = (signed) D[9];
488
489
c = p>>1;
490
m = n = 0;
491
if ( (p&0x3) == 1 ) { // -1 a quadratic residue mod p?
492
for ( i = 0 ; i < p ; i++ ) {
493
if ( ! t0 ) m++;
494
j = ( t0>c ? p-t0 : t0 );
495
if ( (map[j>>6] & tab[j&0x3F]) ) n++;
496
_advance_9t_seq();
497
}
498
} else { // this case is unfortunately nearly twice as slow - the conditional code kills it
499
for ( i = 0 ; i < p ; i++ ) {
500
if ( ! t0 ) m++;
501
k = (t0>c);
502
j = ( k ? p-t0 : t0 );
503
n += ( (map[j>>6] & tab[j&0x3F]) ? !k : k );
504
_advance_9t_seq();
505
}
506
}
507
return m+2*n+1; // add point at infinity
508
}
509
510
511
#define _count32(z) if ( (z & 1) ) p0 += 2; z >>= 1; if ( (z & 1) ) p1 += 2; z >>= 1; if ( (z & 1) ) p2 += 2; z >>= 1; if ( (z & 1) ) p3 += 2; z >>= 1; \
512
if ( (z & 1) ) p4 += 2; z >>= 1; if ( (z & 1) ) p5 += 2; z >>= 1; if ( (z & 1) ) p6 += 2; z >>= 1; if ( (z & 1) ) p7 += 2; z >>= 1; \
513
if ( (z & 1) ) p8 += 2; z >>= 1; if ( (z & 1) ) p9 += 2; z >>= 1; if ( (z & 1) ) p10 += 2; z >>= 1; if ( (z & 1) ) p11 += 2; z >>= 1; \
514
if ( (z & 1) ) p12 += 2; z >>= 1; if ( (z & 1) ) p13 += 2; z >>= 1; if ( (z & 1) ) p14 += 2; z >>= 1; if ( (z & 1) ) p15 += 2; z >>= 1; \
515
if ( (z & 1) ) q0 += 2; z >>= 1; if ( (z & 1) ) q1 += 2; z >>= 1; if ( (z & 1) ) q2 += 2; z >>= 1; if ( (z & 1) ) q3 += 2; z >>= 1; \
516
if ( (z & 1) ) q4 += 2; z >>= 1; if ( (z & 1) ) q5 += 2; z >>= 1; if ( (z & 1) ) q6 += 2; z >>= 1; if ( (z & 1) ) q7 += 2; z >>= 1; \
517
if ( (z & 1) ) q8 += 2; z >>= 1; if ( (z & 1) ) q9 += 2; z >>= 1; if ( (z & 1) ) q10 += 2; z >>= 1; if ( (z & 1) ) q11 += 2; z >>= 1; \
518
if ( (z & 1) ) q12 += 2; z >>= 1; if ( (z & 1) ) q13 += 2; z >>= 1; if ( (z & 1) ) q14 += 2; z >>= 1; if ( (z & 1) ) q15 += 2;
519
520
// hardwired for 32x
521
int pointcount_multi_g2 (unsigned pts[], unsigned long D[6], unsigned p)
522
{
523
register signed i, t0, t1, t2, t3, t4, t5;
524
unsigned p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15;
525
unsigned q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14, q15;
526
register unsigned long z;
527
528
if ( p < 32 ) { err_printf ("cannot multicount with p < 32\n"); exit (0); }
529
530
pointcount_map_residues (p);
531
// wrap table around top
532
for ( t2 = 1 ; t2 < 32 ; t2++ )
533
if ( map[t2>>6] & tab[t2&0x3f] ) map[(p+t2)>>6] |= tab[(p+t2)&0x3f];
534
535
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5];
536
537
for ( i = 0 ; i < 32 ; i++ ) pts[i] = 1; // point at infinity
538
p0 = p1 = p2 = p3 = p4 = p5 = p6 = p7 = p8 = p9 = p10 = p11 = p12 = p13 = p14 = p15 = 0;
539
q0 = q1 = q2 = q3 = q4 = q5 = q6 = q7 = q8 = q9 = q10 = q11 = q12 = q13 = q14 = q15 = 0;
540
for ( i = 0 ; i < p ; i++ ) {
541
if ( ! t0 ) p0++;
542
if ( t0+32 > p ) pts[p-t0]++; // assumes p > 32
543
z = map[t0>>6];
544
if ( (t0&0x3f) > 31 ) {
545
z >>= 32;
546
z |= map[(t0>>6)+1]<<32;
547
z >>= (t0&0x3f) - 32;
548
} else {
549
z >>= (t0&0x3f);
550
}
551
_count32(z);
552
_advance_5t_seq();
553
}
554
pts[0] += p0; pts[1] += p1; pts[2] += p2; pts[3] += p3; pts[4] += p4; pts[5] += p5; pts[6] += p6; pts[7] += p7;
555
pts[8] += p8; pts[9] += p9; pts[10] += p10; pts[11] += p11; pts[12] += p12; pts[13] += p13; pts[14] += p14; pts[15] += p15;
556
pts[16] += q0; pts[17] += q1; pts[18] += q2; pts[19] += q3; pts[20] += q4; pts[21] += q5; pts[22] += q6; pts[23] += q7;
557
pts[24] += q8; pts[25] += q9; pts[26] += q10; pts[27] += q11; pts[28] += q12; pts[29] += q13; pts[30] += q14; pts[31] += q15;
558
return 1;
559
}
560
561
// hardwired for 32x
562
int pointcount_multi_g2d6 (unsigned pts[], unsigned long D[7], unsigned p, unsigned long f6)
563
{
564
register signed i, t0, t1, t2, t3, t4, t5, t6;
565
unsigned p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15;
566
unsigned q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14, q15;
567
register unsigned long z;
568
569
if ( p < 32 ) { err_printf ("cannot multicount with p < 32\n"); exit (0); }
570
571
pointcount_map_residues (p);
572
// wrap table around top
573
for ( t2 = 1 ; t2 < 32 ; t2++ )
574
if ( map[t2>>6] & tab[t2&0x3f] ) map[(p+t2)>>6] |= tab[(p+t2)&0x3f];
575
576
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6];
577
578
for ( i = 0 ; i < 32 ; i++ ) pts[i] = 1 + ((map[f6>>6] & tab[f6&0x3F])?1:-1); // 2 or 0 points at infinity depending on whether leading coeff is square or not.
579
580
p0 = p1 = p2 = p3 = p4 = p5 = p6 = p7 = p8 = p9 = p10 = p11 = p12 = p13 = p14 = p15 = 0;
581
q0 = q1 = q2 = q3 = q4 = q5 = q6 = q7 = q8 = q9 = q10 = q11 = q12 = q13 = q14 = q15 = 0;
582
for ( i = 0 ; i < p ; i++ ) {
583
if ( ! t0 ) p0++;
584
if ( t0+32 > p ) pts[p-t0]++; // assumes p > 32
585
z = map[t0>>6];
586
if ( (t0&0x3f) > 31 ) {
587
z >>= 32;
588
z |= map[(t0>>6)+1]<<32;
589
z >>= (t0&0x3f) - 32;
590
} else {
591
z >>= (t0&0x3f);
592
}
593
_count32(z);
594
_advance_6t_seq();
595
}
596
pts[0] += p0; pts[1] += p1; pts[2] += p2; pts[3] += p3; pts[4] += p4; pts[5] += p5; pts[6] += p6; pts[7] += p7;
597
pts[8] += p8; pts[9] += p9; pts[10] += p10; pts[11] += p11; pts[12] += p12; pts[13] += p13; pts[14] += p14; pts[15] += p15;
598
pts[16] += q0; pts[17] += q1; pts[18] += q2; pts[19] += q3; pts[20] += q4; pts[21] += q5; pts[22] += q6; pts[23] += q7;
599
pts[24] += q8; pts[25] += q9; pts[26] += q10; pts[27] += q11; pts[28] += q12; pts[29] += q13; pts[30] += q14; pts[31] += q15;
600
return 1;
601
}
602
603
// hardwired for 32x
604
int pointcount_multi_g3 (unsigned pts[], unsigned long D[8], unsigned p)
605
{
606
register signed i, t0, t1, t2, t3, t4, t5, t6, t7;
607
unsigned p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15;
608
unsigned q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14, q15;
609
register unsigned long z;
610
unsigned long x;
611
612
if ( p < 32 ) return 0;
613
614
pointcount_map_residues (p);
615
616
// wrap table around top
617
for ( t2 = 0 ; t2 < 32 ; t2++ )
618
if ( map[t2>>6] & tab[t2&0x3f] ) map[(p+t2)>>6] |= tab[(p+t2)&0x3f];
619
620
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6]; t7 = (signed) D[7];
621
622
for ( i = 0 ; i < 32 ; i++ ) pts[i] = 1; // point at infinity
623
624
p0 = p1 = p2 = p3 = p4 = p5 = p6 = p7 = p8 = p9 = p10 = p11 = p12 = p13 = p14 = p15 = 0;
625
q0 = q1 = q2 = q3 = q4 = q5 = q6 = q7 = q8 = q9 = q10 = q11 = q12 = q13 = q14 = q15 = 0;
626
for ( i = 0 ; i < p ; i++ ) {
627
if ( ! t0 ) p0++;
628
if ( t0+32 > p ) pts[p-t0]++; // assumes p > 32
629
z = map[t0>>6];
630
if ( (t0&0x3f) > 31 ) {
631
z >>= 32;
632
z |= map[(t0>>6)+1]<<32;
633
z >>= (t0&0x3f) - 32;
634
} else {
635
z >>= (t0&0x3f);
636
}
637
_count32(z);
638
_advance_7t_seq();
639
}
640
pts[0] += p0; pts[1] += p1; pts[2] += p2; pts[3] += p3; pts[4] += p4; pts[5] += p5; pts[6] += p6; pts[7] += p7;
641
pts[8] += p8; pts[9] += p9; pts[10] += p10; pts[11] += p11; pts[12] += p12; pts[13] += p13; pts[14] += p14; pts[15] += p15;
642
pts[16] += q0; pts[17] += q1; pts[18] += q2; pts[19] += q3; pts[20] += q4; pts[21] += q5; pts[22] += q6; pts[23] += q7;
643
pts[24] += q8; pts[25] += q9; pts[26] += q10; pts[27] += q11; pts[28] += q12; pts[29] += q13; pts[30] += q14; pts[31] += q15;
644
return 1;
645
}
646
647
648
// hardwired for 32x
649
int pointcount_multi_g3d8 (unsigned pts[], unsigned long D[9], unsigned p, unsigned long f8)
650
{
651
register signed i, t0, t1, t2, t3, t4, t5, t6, t7, t8;
652
unsigned p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, p13, p14, p15;
653
unsigned q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11, q12, q13, q14, q15;
654
register unsigned long z;
655
unsigned long x;
656
657
if ( p < 32 ) return 0;
658
659
pointcount_map_residues (p);
660
661
// wrap table around top
662
for ( t2 = 0 ; t2 < 32 ; t2++ )
663
if ( map[t2>>6] & tab[t2&0x3f] ) map[(p+t2)>>6] |= tab[(p+t2)&0x3f];
664
665
t0 = (signed) D[0]; t1 = (signed) D[1]; t2 = (signed) D[2]; t3 = (signed) D[3]; t4 = (signed) D[4]; t5 = (signed) D[5]; t6 = (signed) D[6]; t7 = (signed) D[7]; t8 = (signed) D[8];
666
667
for ( i = 0 ; i < 32 ; i++ ) 1 + ((map[f8>>6] & tab[f8&0x3F])?1:-1); // 2 or 0 points at infinity depending on whether leading coeff is square or not.
668
669
p0 = p1 = p2 = p3 = p4 = p5 = p6 = p7 = p8 = p9 = p10 = p11 = p12 = p13 = p14 = p15 = 0;
670
q0 = q1 = q2 = q3 = q4 = q5 = q6 = q7 = q8 = q9 = q10 = q11 = q12 = q13 = q14 = q15 = 0;
671
for ( i = 0 ; i < p ; i++ ) {
672
if ( ! t0 ) p0++;
673
if ( t0+32 > p ) pts[p-t0]++; // assumes p > 32
674
z = map[t0>>6];
675
if ( (t0&0x3f) > 31 ) {
676
z >>= 32;
677
z |= map[(t0>>6)+1]<<32;
678
z >>= (t0&0x3f) - 32;
679
} else {
680
z >>= (t0&0x3f);
681
}
682
_count32(z);
683
_advance_8t_seq();
684
}
685
pts[0] += p0; pts[1] += p1; pts[2] += p2; pts[3] += p3; pts[4] += p4; pts[5] += p5; pts[6] += p6; pts[7] += p7;
686
pts[8] += p8; pts[9] += p9; pts[10] += p10; pts[11] += p11; pts[12] += p12; pts[13] += p13; pts[14] += p14; pts[15] += p15;
687
pts[16] += q0; pts[17] += q1; pts[18] += q2; pts[19] += q3; pts[20] += q4; pts[21] += q5; pts[22] += q6; pts[23] += q7;
688
pts[24] += q8; pts[25] += q9; pts[26] += q10; pts[27] += q11; pts[28] += q12; pts[29] += q13; pts[30] += q14; pts[31] += q15;
689
return 1;
690
}
691
692
693
unsigned pointcount_slow (ff_t f[], int d, unsigned p)
694
{
695
register unsigned m, n;
696
register ff_t x, y, z;
697
register int i;
698
699
memset (map, 0, p/8+1);
700
_ff_set_ui (z, (p/2)+1);
701
_ff_set_one (x);
702
do { // note that we start checking squares at x=1, the square 0 is handled explicitly by the counter m
703
_ff_square(y,x);
704
map[y>>6] |= tab[y&0x3F];
705
_ff_inc (x);
706
} while ( ! _ff_equal(x,z) );
707
708
m = 1; // one point at infinity
709
_ff_set(y,f[d]);
710
if ( !(d&1) ) m += ((map[y>>6] & tab[y&0x3F])?1:-1); // add/sub point at infinity when f has even degree
711
n = 0;
712
713
_ff_set_zero(x);
714
do {
715
_ff_set (y, f[d]);
716
for ( i = d-1 ; i >= 0 ; i-- ) {
717
ff_mult (y, y, x);
718
_ff_addto (y, f[i]);
719
}
720
if ( _ff_zero(y) ) m++;
721
if ( (map[y>>6] & tab[y&0x3F]) ) n++;
722
_ff_inc(x);
723
} while ( _ff_nonzero(x) );
724
return m+2*n;
725
}
726
727
728
// basically the same as pointcount_slow, only it doesn't require ff initialization and will handle the prime 3
729
unsigned pointcount_tiny (unsigned long f[], int d, unsigned p)
730
{
731
register unsigned m, n;
732
register unsigned long x, y, z;
733
register int i;
734
735
if ( p == 2 ) { err_printf ("p==2 not supported in pointcount_tiny_d2\n"); exit (0); }
736
737
// avoid worrying about overflow
738
if ( p > (1<<20) ) { err_printf ("p=%d not tiny enough in pointcount_tiny_d2\n", p); exit (0); }
739
740
pointcount_map_residues (p);
741
742
m = 1; // one point at infinity
743
if ( !(d&1) ) m += ((map[f[d]>>6] & tab[f[d]&0x3F])?1:-1); // add/sub point at infinity when f has even degree
744
n = 0;
745
746
x = 0;
747
do {
748
y = f[d];
749
for ( i = d-1 ; i >= 0 ; i-- ) y = (x*y + f[i]) % p;
750
if ( !y ) m++;
751
if ( (map[y>>6] & tab[y&0x3F]) ) n++;
752
x++;
753
} while ( x < p );
754
return m+2*n; // add point at infinity
755
}
756
757
758
unsigned pointcount_g2_d2 (unsigned long f[6], unsigned p)
759
{
760
register signed i, j, k, t00, t01, t10, t11, t20, t21, t30, t31, t40, t41, t50, t51;
761
register unsigned m, n;
762
register unsigned long s, x0, x1, y0, y1, t0, t1;
763
764
// handle p <= d seperately, otherwise it interferes with our \Delta computations
765
if ( p <= 5 ) return pointcount_tiny_d2 (f, 5, p);
766
767
s = pointcount_map_residues_d2 (p);
768
769
m = 1; // one point at infinity for odd degree
770
n = 0;
771
772
// t_5 = \Delta^5 f(x) is constant
773
t51 = 0; t50 = (120*f[5]) % p;
774
775
for ( k = 0 ; k < p ; k++ ) {
776
// Compute t_j = \Delta^j f(kz) where z = sqrt(s) is our extension element in F_p^2 = F[x]/(z^2-s)
777
// We do this the old fashioned way, computing f(kz), f(kz+1), f(kz+2), ..., f(kz+d-1) and taking differences
778
// Clumsy but effective - speed is not really an issue here
779
x1 = k; x0 = 0; y1 = 0; y0 = f[5];
780
for ( i = 4 ; i >= 0 ; i-- ) { t0 = (x0*y0+s*x1*y1+ f[i]) % p; t1 = (x0*y1+x1*y0) % p; y0 = t0; y1 = t1; }
781
t00 = (unsigned) t0; t01 = (unsigned) t1;
782
x1 = k; x0 = 1; y1 = 0; y0 = f[5];
783
for ( i = 4 ; i >= 0 ; i-- ) { t0 = (x0*y0+s*x1*y1+ f[i]) % p; t1 = (x0*y1+x1*y0) % p; y0 = t0; y1 = t1; }
784
t10 = (unsigned) t0; t11 = (unsigned) t1;
785
x1 = k; x0 = 2; y1 = 0; y0 = f[5];
786
for ( i = 4 ; i >= 0 ; i-- ) { t0 = (x0*y0+s*x1*y1+ f[i]) % p; t1 = (x0*y1+x1*y0) % p; y0 = t0; y1 = t1; }
787
t20 = (unsigned) t0; t21 = (unsigned) t1;
788
x1 = k; x0 = 3; y1 = 0; y0 = f[5];
789
for ( i = 4 ; i >= 0 ; i-- ) { t0 = (x0*y0+s*x1*y1+ f[i]) % p; t1 = (x0*y1+x1*y0) % p; y0 = t0; y1 = t1; }
790
t30 = (unsigned) t0; t31 = (unsigned) t1;
791
x1 = k; x0 = 4; y1 = 0; y0 = f[5];
792
for ( i = 4 ; i >= 0 ; i-- ) { t0 = (x0*y0+s*x1*y1+ f[i]) % p; t1 = (x0*y1+x1*y0) % p; y0 = t0; y1 = t1; }
793
t40 = (unsigned) t0; t41 = (unsigned) t1;
794
t40 -= t30; if ( t40 < 0 ) t40 += p; t41 -= t31; if ( t41 < 0 ) t41 += p;
795
t30 -= t20; if ( t30 < 0 ) t30 += p; t31 -= t21; if ( t31 < 0 ) t31 += p;
796
t20 -= t10; if ( t20 < 0 ) t20 += p; t21 -= t11; if ( t21 < 0 ) t21 += p;
797
t10 -= t00; if ( t10 < 0 ) t10 += p; t11 -= t01; if ( t11 < 0 ) t11 += p;
798
t40 -= t30; if ( t40 < 0 ) t40 += p; t41 -= t31; if ( t41 < 0 ) t41 += p;
799
t30 -= t20; if ( t30 < 0 ) t30 += p; t31 -= t21; if ( t31 < 0 ) t31 += p;
800
t20 -= t10; if ( t20 < 0 ) t20 += p; t21 -= t11; if ( t21 < 0 ) t21 += p;
801
t40 -= t30; if ( t40 < 0 ) t40 += p; t41 -= t31; if ( t41 < 0 ) t41 += p;
802
t30 -= t20; if ( t30 < 0 ) t30 += p; t31 -= t21; if ( t31 < 0 ) t31 += p;
803
t40 -= t30; if ( t40 < 0 ) t40 += p; t41 -= t31; if ( t41 < 0 ) t41 += p;
804
for ( j = 0 ; j < p ; j++ ) {
805
i = p*t01+t00;
806
if ( ! i ) m++;
807
if ( (map[i>>6] & tab[i&0x3F]) ) n++;
808
t00 += t10; if ( t00 >= p ) t00 -= p; t10 += t20; if ( t10 >= p ) t10 -= p; t20 += t30; if ( t20 >= p ) t20 -= p; t30 += t40; if ( t30 >= p ) t30 -= p; t40 += t50; if ( t40 >= p ) t40 -= p;
809
t01 += t11; if ( t01 >= p ) t01 -= p; t11 += t21; if ( t11 >= p ) t11 -= p; t21 += t31; if ( t21 >= p ) t21 -= p; t31 += t41; if ( t31 >= p ) t31 -= p; t41 += t51; if ( t41 >= p ) t41 -= p;
810
}
811
}
812
return m+2*n;
813
}
814
815
816
// This code is brutally slow and should only be used for very small p
817
unsigned pointcount_slow_d2 (ff_t f[], int d, unsigned p)
818
{
819
register unsigned m, n;
820
register ff_t x0, x1, y0, y1, t0, t1, t, s;
821
register int i;
822
823
824
if ( ! (d&1) ) { err_printf ("degree not odd in pointcount_slow_d2\n"); exit (0); }
825
m = pointcount_map_residues_d2 (p);
826
_ff_set_ui (s, m);
827
m = 1; // one point at infinity
828
n = 0;
829
830
_ff_set_zero(x1);
831
do {
832
_ff_set_zero(x0);
833
do {
834
// compute f(x) = f(x1*z+x0) where z = sqrt(s) is our extension element, F_p^2 = F[x]/(z^2-s)
835
_ff_set_zero (y1); _ff_set (y0, f[d]);
836
for ( i = d-1 ; i >= 0 ; i-- ) {
837
_ff_mult (t0, x0, y0); _ff_mult (t, x1, y1); ff_mult (t, t, s); _ff_addto (t0, t);
838
_ff_mult (t1, x0, y1); _ff_mult (t, x1, y0); _ff_addto (t1, t);
839
_ff_set (y1, t1); _ff_add (y0, t0, f[i]);
840
}
841
if ( _ff_zero(y1) && _ff_zero(y0) ) m++;
842
// pull out of field representation to compute table index
843
i = (int) (_ff_get_ui (y1) * p + _ff_get_ui (y0));
844
if ( ( map[i>>6] & tab[i&0x3f]) ) n++;
845
_ff_inc(x0);
846
} while ( _ff_nonzero(x0) );
847
_ff_inc(x1);
848
} while ( _ff_nonzero(x1) );
849
return m+2*n;
850
}
851
852
853
/*
854
This code is not very efficient and should be changed to use routines in ffext.c.
855
*/
856
unsigned pointcount_tiny_d2 (unsigned long f[], int d, unsigned p)
857
{
858
register unsigned m, n;
859
register unsigned x0, x1, y0, y1, t0, t1, s;
860
register int i;
861
862
if ( p == 2 ) { err_printf ("p==2 not supported in pointcount_tiny_d2\n"); exit (0); }
863
864
// avoid worrying about overflow
865
if ( p > (1<<14) ) { err_printf ("p=%d not tiny enough in pointcount_tiny_d2\n", p); exit (0); }
866
867
s = pointcount_map_residues_d2 (p);
868
m = 1; // one point at infinity
869
if ( !(d&1) ) m += ((map[f[d]>>6] & tab[f[d]&0x3F])?1:-1); // add/sub point at infinity when f has even degree
870
n = 0;
871
872
x1 = 0;
873
do {
874
x0 = 0;
875
do {
876
// compute f(x) = f(x1*z+x0) where z = sqrt(s) is our extension element, F_p^2 = F[z]/(z^2-s)
877
y1 = 0; y0 = f[d];
878
for ( i = d-1 ; i >= 0 ; i-- ) {
879
t0 = (x0*y0+s*x1*y1+ f[i]) % p;
880
t1 = (x0*y1+x1*y0) % p;
881
y0 = t0; y1 = t1;
882
}
883
if ( ! y0 && ! y1 ) m++;
884
i = p*y1+y0;
885
if ( ( map[i>>6] & tab[i&0x3f]) ) n++;
886
x0++;
887
} while ( x0 < p );
888
x1++;
889
} while ( x1 < p );
890
return m+2*n;
891
}
892
893
/*
894
This code is not very efficient and should be changed to use routines in ffext.c.
895
*/
896
unsigned pointcount_tiny_d3 (unsigned long f[], int d, unsigned p)
897
{
898
register unsigned m, n;
899
register unsigned x0, x1, x2, y0, y1, y2, t0, t1, t2, r1, r2, s;
900
register int i;
901
902
if ( p == 2 ) { err_printf ("p==2 not supported in pointcount_tiny_d2\n"); exit (0); }
903
904
// avoid worrying about overflow
905
if ( p > (1<<10) ) { err_printf ("p=%d not tiny enough in pointcount_tiny_d2\n", p); exit (0); }
906
907
s = pointcount_map_residues_d3 (p);
908
m = 1;
909
if ( !(d&1) ) m += ((map[f[d]>>6] & tab[f[d]&0x3F])?1:-1); // add/sub point at infinity when f has even degree
910
n = 0;
911
912
x2 = 0;
913
do {
914
x1 = 0;
915
do {
916
x0 = 0;
917
do {
918
// compute f(x) = f(x2*z^2+x1*z+x0) where z is our extension element in F_p^3 = F[z]/(z^3-z-s)
919
y2 = 0; y1 = 0; y0 = f[d];
920
for ( i = d-1 ; i >= 0 ; i-- ) {
921
r1 = x1*y2+x2*y1; r2 = x2*y2;
922
t0 = (x0*y0 + s*r1 + f[i]) % p;
923
t1 = (x0*y1 + x1*y0 + r1 + s*r2) % p;
924
t2 = (x0*y2 + x1*y1 + x2*y0 + r2) %p;
925
y0 = t0; y1 = t1; y2 = t2;
926
}
927
i = p*p*y2 + p*y1+y0;
928
if ( ! i ) m++;
929
if ( ( map[i>>6] & tab[i&0x3f]) ) n++;
930
x0++;
931
} while ( x0 < p );
932
x1++;
933
} while ( x1 < p );
934
x2++;
935
} while (x2 < p );
936
return m+2*n;
937
}
938
939