Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/schemes/hyperelliptic_curves/hypellfrob/recurrences_zn_poly.cpp
4108 views
1
/* ============================================================================
2
3
recurrences_zn_poly.cpp: recurrences solved via zn_poly arithmetic
4
5
This file is part of hypellfrob (version 2.1.1).
6
7
Copyright (C) 2007, 2008, David Harvey
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
GNU General Public License for more details.
18
19
You should have received a copy of the GNU General Public License along
20
with this program; if not, write to the Free Software Foundation, Inc.,
21
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
23
============================================================================ */
24
25
26
#include "recurrences_zn_poly.h"
27
28
29
NTL_CLIENT
30
31
32
namespace hypellfrob {
33
34
35
Shifter::~Shifter()
36
{
37
zn_array_mulmid_precomp1_clear(kernel_precomp);
38
free(input_twist);
39
}
40
41
42
Shifter::Shifter(ulong d, ulong a, ulong b, const zn_mod_t mod)
43
{
44
this->d = d;
45
this->mod = mod;
46
47
input_twist = (ulong*) malloc(sizeof(ulong) * (3*d + 3));
48
output_twist = input_twist + d + 1;
49
scratch = output_twist + d + 1;
50
51
ZZ modulus;
52
modulus = zn_mod_get (mod);
53
54
// ------------------------ compute input_twist -------------------------
55
56
// prod = (d!)^(-1)
57
ulong prod = 1;
58
for (ulong i = 2; i <= d; i++)
59
prod = zn_mod_mul(prod, i, mod);
60
prod = to_ulong(InvMod(to_ZZ(prod), modulus));
61
62
// input_twist[i] = ((d-i)!)^(-1)
63
input_twist[0] = prod;
64
for (ulong i = 1; i <= d; i++)
65
input_twist[i] = zn_mod_mul(input_twist[i-1], d - (i-1), mod);
66
67
// input_twist[i] = ((d-i)!*i!)^(-1)
68
for (ulong i = 0; i <= d/2; i++)
69
{
70
input_twist[i] = zn_mod_mul(input_twist[i], input_twist[d-i], mod);
71
input_twist[d-i] = input_twist[i];
72
}
73
74
// input_twist[i] = \prod_{0 <= j <= d, j != i} (i-j)^(-1)
75
// = (-1)^(d-i) ((d-i)!*i!)^(-1)
76
for (long i = d - 1; i >= 0; i -= 2)
77
input_twist[i] = zn_mod_neg(input_twist[i], mod);
78
79
// ----------------- compute output_twist and kernel --------------------
80
81
// need some temp space:
82
// c, accum, accum_inv each of length 2d+1
83
ulong* c = (ulong*) malloc(sizeof(ulong) * (6*d+3));
84
ulong* accum = c + 2*d + 1;
85
ulong* accum_inv = accum + 2*d + 1;
86
ulong* kernel = c; // overwrites c
87
88
// c[i] = c_i = a + (i-d)*b for 0 <= i <= 2d
89
c[0] = zn_mod_sub(a, zn_mod_mul(zn_mod_reduce(d, mod), b, mod), mod);
90
for (ulong i = 1; i <= 2*d; i++)
91
c[i] = zn_mod_add(c[i-1], b, mod);
92
93
// accum[i] = c_0 * c_1 * ... * c_i for 0 <= i <= 2d
94
accum[0] = c[0];
95
for (ulong i = 1; i <= 2*d; i++)
96
accum[i] = zn_mod_mul(accum[i-1], c[i], mod);
97
98
// accum_inv[i] = (c_0 * c_1 * ... * c_i)^(-1) for 0 <= i <= 2d
99
accum_inv[2*d] = to_ulong(InvMod(to_ZZ(accum[2*d]), modulus));
100
101
for (long i = 2*d - 1; i >= 0; i--)
102
accum_inv[i] = zn_mod_mul(accum_inv[i+1], c[i+1], mod);
103
104
// output_twist[i] = b^{-d} * c_i * c_{i+1} * ... * c_{i+d}
105
// for 0 <= i <= d
106
ulong factor = to_long(PowerMod(to_ZZ(b), -((long)d), modulus));
107
output_twist[0] = zn_mod_mul(factor, accum[d], mod);
108
for (ulong i = 1; i <= d; i++)
109
output_twist[i] = zn_mod_mul(zn_mod_mul(factor, accum[i+d], mod),
110
accum_inv[i-1], mod);
111
112
// kernel[i] = (c_i)^(-1) for 0 <= i <= 2d
113
kernel[0] = accum_inv[0];
114
for (ulong i = 1; i <= 2*d; i++)
115
kernel[i] = zn_mod_mul(accum_inv[i], accum[i-1], mod);
116
117
// precompute FFT of kernel
118
zn_array_mulmid_precomp1_init(kernel_precomp, kernel, 2*d+1, d+1, mod);
119
120
free(c);
121
}
122
123
124
void Shifter::shift(ulong* output, const ulong* input)
125
{
126
// multiply inputs pointwise by input_twist
127
for (ulong i = 0; i <= d; i++)
128
scratch[i] = zn_mod_mul(input[i], input_twist[i], mod);
129
130
// do middle product
131
zn_array_mulmid_precomp1_execute(output, scratch, kernel_precomp);
132
133
// multiply outputs pointwise by output_twist
134
for (ulong i = 0; i <= d; i++)
135
output[i] = zn_mod_mul(output[i], output_twist[i], mod);
136
}
137
138
139
/*
140
Checks whether large_evaluate() will succeed for these choices of k and u.
141
Returns 1 if okay, otherwise 0.
142
*/
143
int check_params(ulong k, ulong u, const zn_mod_t mod)
144
{
145
ulong n = zn_mod_get (mod);
146
147
if (k >= n || u >= n)
148
return 0;
149
150
if (k <= 1)
151
return 1;
152
153
if (k == n - 1)
154
return 0;
155
156
ulong k2 = k / 2;
157
158
// need the following elements to be invertible:
159
// u
160
// 1, 2, ..., k + 1
161
// k2 + i*u for -k2 <= i <= k2
162
ulong prod = u;
163
for (ulong i = 2; i <= k; i++)
164
prod = zn_mod_mul(prod, i, mod);
165
ulong temp = zn_mod_mul(k2, zn_mod_sub(1, u, mod), mod);
166
for (ulong i = 0; i <= 2*k2; i++)
167
{
168
prod = zn_mod_mul(prod, temp, mod);
169
temp = zn_mod_add(temp, u, mod);
170
}
171
172
ZZ x, y;
173
x = prod;
174
y = n;
175
if (GCD(x, y) != 1)
176
return 0;
177
178
// check recursively below
179
return check_params(k2, u, mod);
180
}
181
182
183
184
/*
185
Let M0 and M1 be square matrices of size r*r. Let M(x) = M0 + x*M1; this
186
is a matrix of linear polys in x. The matrices M0 and M1 are passed in
187
row-major order.
188
189
Let P(x) = M(x+1) M(x+2) ... M(x+k); this is a matrix of polynomials of
190
degree k.
191
192
This class attempts to compute the matrices
193
P(0), P(u), P(2u), ..., P(ku).
194
195
Usage:
196
197
* Call the constructor.
198
199
* Call evaluate() with half == 0. This computes the first half of the
200
outputs, specifically P(mu) for 0 <= m <= k2, where k2 = floor(k/2).
201
The (i, j) entry of P(mu) is stored in output[i*r+j][m+offset]. Each array
202
output[i*r+j] must be preallocated to length k2 + 3. (The extra two matrices
203
at the end are used for scratch space.)
204
205
* Call evaluate() with half == 1. This computes the second half, namely
206
P(mu) for k2 + 1 <= m <= k. The (i, j) entry of P(mu) is stored in
207
output[i*r+j][m-(k2+1)+offset]. Each array output[i*r+j] must be
208
preallocated to length k2 + 3. It's okay for this second half to overwrite
209
the first half generated earlier (in fact this property is used by
210
zn_poly_interval_products to conserve memory).
211
212
The computation may fail for certain bad choices of k and u. Let p = residue
213
characteristic (i.e. assume mod is p^m for some m). Typically this function
214
gets called for k ~ u and k*u ~ M*p, for some M much smaller than p. In this
215
situation, I expect most choices of (k, u) are not bad. Failure is very
216
bad: the program will crash. Luckily, you can call check_params() to test
217
whether (k, u) is bad. If it's bad, you probably should just increment u
218
until you find one that's not bad. (Sorry, I haven't done a proper analysis
219
of the situation yet.) The main routines fall back on the zz_pX version if
220
they can't find a good parameter choice.
221
222
Must have 0 <= k < n and 0 < u < n.
223
224
*/
225
LargeEvaluator::LargeEvaluator(int r, ulong k, ulong u,
226
const vector<vector<ulong> >& M0,
227
const vector<vector<ulong> >& M1,
228
const zn_mod_t& mod) : M0(M0), M1(M1), mod(mod)
229
{
230
assert(k < zn_mod_get(mod));
231
assert(u < zn_mod_get(mod));
232
assert(r >= 1);
233
assert(k >= 1);
234
235
this->r = r;
236
this->k = k;
237
this->k2 = k / 2;
238
this->odd = k & 1;
239
this->u = u;
240
this->shifter = NULL;
241
}
242
243
244
LargeEvaluator::~LargeEvaluator()
245
{
246
if (this->shifter)
247
delete shifter;
248
}
249
250
251
void LargeEvaluator::evaluate(int half, vector<ulong_array>& output,
252
ulong offset)
253
{
254
// base cases
255
256
if (k == 0)
257
{
258
// identity matrix
259
for (int x = 0; x < r; x++)
260
for (int y = 0; y < r; y++)
261
output[y*r + x].data[offset] = (x == y);
262
return;
263
}
264
265
if (k == 1)
266
{
267
// evaluate M(1) (for half == 0) or M(u+1) (for half == 1)
268
for (int x = 0; x < r; x++)
269
for (int y = 0; y < r; y++)
270
{
271
ulong temp = zn_mod_add(M0[y][x], M1[y][x], mod);
272
output[y*r + x].data[offset] = half ?
273
zn_mod_add(temp, zn_mod_mul(u, M1[y][x], mod), mod) : temp;
274
}
275
return;
276
}
277
278
// recursive case
279
280
// Let Q(x) = M(x+1) ... M(x+k2), where k2 = floor(k/2).
281
// Then we have either
282
// P(x) = Q(x) Q(x+k2) if k is even
283
// P(x) = Q(x) Q(x+k2) M(x+k) if k is odd
284
285
if (half == 0)
286
{
287
// This is the first time through this function.
288
// Allocate scratch space.
289
scratch.resize(r*r);
290
for (int i = 0; i < r*r; i++)
291
scratch[i].resize(k2 + 3);
292
293
{
294
// Recursively compute Q(0), Q(u), ..., Q(k2*u).
295
// These are stored in scratch.
296
LargeEvaluator recurse(r, k2, u, M0, M1, mod);
297
recurse.evaluate_all(scratch);
298
}
299
300
// Precomputations for value-shifting
301
shifter = new Shifter(k2, k2, u, mod);
302
}
303
else // half == 1
304
{
305
// Shift original sequence by (k2+1)*u to obtain
306
// Q((k2+1)*u), Q((k2+2)*u), ..., Q((2*k2+1)*u)
307
Shifter big_shifter(k2, zn_mod_mul(k2+1, u, mod), u, mod);
308
for (int i = 0; i < r*r; i++)
309
big_shifter.shift(scratch[i].data, scratch[i].data);
310
}
311
312
// Let H = (k2+1)*u*half, so now scratch contains
313
// Q(H), Q(H + u), ..., Q(H + k2*u).
314
315
// Shift by k2 to obtain Q(H + k2), Q(H + u + k2), ..., Q(H + k2*u + k2).
316
// Results are stored directly in output array. We put them one slot
317
// to the right, to make room for inplace matrix multiplies later on.
318
// (If k is odd, they're shifted right by yet another slot, to make room
319
// for another round of multiplies.)
320
for (int i = 0; i < r*r; i++)
321
shifter->shift(output[i].data + offset + odd + 1, scratch[i].data);
322
323
// If k is odd, right-multiply each Q(H + i*u + k2) by M(H + i*u + k)
324
// (results are stored in output array, shifted one entry to the left)
325
if (odd)
326
{
327
ulong_array cruft(r*r); // for storing M(H + i*u + k)
328
ulong point = k; // evaluation point
329
if (half)
330
point = zn_mod_add(point, zn_mod_mul(k2 + 1, u, mod), mod);
331
332
for (ulong i = 0; i <= k2; i++, point = zn_mod_add(point, u, mod))
333
{
334
// compute M(H + i*u + k) = M0 + M1*point
335
for (int x = 0; x < r; x++)
336
for (int y = 0; y < r; y++)
337
cruft.data[y*r + x] = zn_mod_add(M0[y][x],
338
zn_mod_mul(M1[y][x], point, mod), mod);
339
340
// multiply
341
for (int x = 0; x < r; x++)
342
for (int y = 0; y < r; y++)
343
{
344
ulong accum = 0;
345
for (int z = 0; z < r; z++)
346
accum = zn_mod_add(accum,
347
zn_mod_mul(output[y*r + z].data[offset + i + 2],
348
cruft.data[z*r + x], mod), mod);
349
output[y*r + x].data[offset + i + 1] = accum;
350
}
351
}
352
}
353
354
ulong n = zn_mod_get(mod);
355
356
// Multiply to obtain P(H), P(H + u), ..., P(H + k2*u)
357
// (except for the last one, in the second half, if k is even)
358
// Store results directly in output array.
359
for (ulong i = 0; i + (half && !odd) <= k2; i++)
360
for (int x = 0; x < r; x++)
361
for (int y = 0; y < r; y++)
362
{
363
ulong sum_hi = 0;
364
ulong sum_lo = 0;
365
for (int z = 0; z < r; z++)
366
{
367
ulong hi, lo;
368
ZNP_MUL_WIDE(hi, lo, scratch[y*r + z].data[i],
369
output[z*r + x].data[offset + i + 1]);
370
ZNP_ADD_WIDE(sum_hi, sum_lo, sum_hi, sum_lo, hi, lo);
371
if (sum_hi >= n)
372
sum_hi -= n;
373
}
374
output[y*r + x].data[offset + i] =
375
zn_mod_reduce_wide(sum_hi, sum_lo, mod);
376
}
377
}
378
379
380
/*
381
Evaluates both halves, storing results in output array.
382
*/
383
void LargeEvaluator::evaluate_all(vector<ulong_array>& output)
384
{
385
evaluate(0, output, 0);
386
evaluate(1, output, k/2 + 1);
387
}
388
389
390
391
/*
392
See interval_products_wrapper().
393
394
NOTE 1:
395
I haven't proved that the algorithm here always succeeds, although I expect
396
that it almost always will, especially when p is very large. If it
397
succeeds, it returns 1. If it fails, it returns 0 (practically instantly).
398
In the latter case, at least the caller can fall back on
399
ntl_interval_products().
400
401
NOTE 2:
402
The algorithm here is similar to ntl_interval_products(). However, it
403
doesn't do the "refining step" -- it just handles the smaller intervals
404
in the naive fashion. Also, instead of breaking intervals into power-of-four
405
lengths, it just does the whole thing in one chunk. The performance ends up
406
being smoother, but it's harder to prove anything about avoiding
407
non-invertible elements. Hence the caveat in Note 1.
408
409
*/
410
int zn_poly_interval_products(vector<vector<vector<ulong> > >& output,
411
const vector<vector<ulong> >& M0,
412
const vector<vector<ulong> >& M1,
413
const vector<ZZ>& target, const zn_mod_t& mod)
414
{
415
output.resize(target.size() / 2);
416
417
// select k such that k*(k+1) >= total length of intervals
418
ulong k;
419
{
420
ZZ len = target.back() - target.front();
421
ZZ kk = SqrRoot(len);
422
k = to_ulong(kk);
423
if (kk * (kk + 1) < len)
424
k++;
425
}
426
427
int r = M0.size();
428
429
// try to find good parameters u and k
430
ulong u = k;
431
for (int trial = 0; ; trial++, u++)
432
{
433
if (check_params(k, u, mod))
434
break; // found some good parameters
435
if (trial == 5)
436
return 0; // too many failures, give up
437
}
438
439
ulong n = zn_mod_get(mod);
440
441
// shift M0 over to account for starting index
442
vector<vector<ulong> > M0_shifted = M0;
443
ulong shift = target.front() % n;
444
for (int x = 0; x < r; x++)
445
for (int y = 0; y < r; y++)
446
M0_shifted[y][x] = zn_mod_add(M0[y][x],
447
zn_mod_mul(shift, M1[y][x], mod), mod);
448
449
// prepare for evaluating products over the big intervals
450
// [0, k), [u, u+k), ..., [ku, ku+k)
451
LargeEvaluator evaluator(r, k, u, M0_shifted, M1, mod);
452
453
vector<ulong_array> big(r*r);
454
for (int i = 0; i < r*r; i++)
455
big[i].resize(k/2 + 3); // space for half the products, plus two more
456
457
// evaluate the first half of the products
458
evaluator.evaluate(0, big, 0);
459
// flag indicating which half we currently have stored in the "big" array
460
int half = 0;
461
462
vector<vector<ulong> > accum(r, vector<ulong>(r));
463
vector<vector<ulong> > temp1(r, vector<ulong>(r));
464
vector<vector<ulong> > temp2(r, vector<ulong>(r));
465
466
// for each target interval....
467
for (int i = 0; i < target.size()/2; i++)
468
{
469
// doing interval [s0, s1)
470
ZZ s0 = target[2*i];
471
ZZ s1 = target[2*i+1];
472
473
// product accumulated so far is [t0, t1).
474
ZZ t0 = s0;
475
ZZ t1 = s0;
476
for (int x = 0; x < r; x++)
477
for (int y = 0; y < r; y++)
478
accum[x][y] = (x == y); // identity matrix
479
480
while (t1 < s1)
481
{
482
// if we are exactly on the left end of a big interval, and the
483
// big interval fits inside the target interval, then roll it in
484
if ((t1 - target[0]) % u == 0 && t1 + k <= s1)
485
{
486
// compute which "big" interval we are rolling in
487
int index = to_ulong((t1 - target[0]) / u);
488
489
if (index >= k/2 + 1)
490
{
491
// if the "big" interval is in the second half, and we haven't
492
// computed the second half of the intervals yet, then go and
493
// compute them (overwriting the first half)
494
if (half == 0)
495
{
496
evaluator.evaluate(1, big, 0);
497
half = 1;
498
}
499
index -= (k/2 + 1);
500
}
501
502
for (int x = 0; x < r; x++)
503
for (int y = 0; y < r; y++)
504
temp1[y][x] = big[y*r + x].data[index];
505
506
t1 += k;
507
}
508
else
509
{
510
// otherwise just multiply by the single matrix M(t1 + 1)
511
ulong e = (t1 + 1) % n;
512
513
for (int x = 0; x < r; x++)
514
for (int y = 0; y < r; y++)
515
temp1[y][x] = zn_mod_add(M0[y][x],
516
zn_mod_mul(M1[y][x], e, mod), mod);
517
518
t1++;
519
}
520
521
// multiply by whichever matrix we picked up above
522
for (int x = 0; x < r; x++)
523
for (int y = 0; y < r; y++)
524
{
525
ulong sum_hi = 0;
526
ulong sum_lo = 0;
527
for (int z = 0; z < r; z++)
528
{
529
ulong hi, lo;
530
ZNP_MUL_WIDE(hi, lo, accum[y][z], temp1[z][x]);
531
ZNP_ADD_WIDE(sum_hi, sum_lo, sum_hi, sum_lo, hi, lo);
532
if (sum_hi >= n)
533
sum_hi -= n;
534
}
535
temp2[y][x] = zn_mod_reduce_wide(sum_hi, sum_lo, mod);
536
}
537
538
accum.swap(temp2);
539
}
540
541
// store result in output array
542
output[i] = accum;
543
}
544
545
return 1;
546
}
547
548
549
}; // namespace hypellfrob
550
551
552
// ----------------------- end of file
553
554