Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/schemes/hyperelliptic_curves/hypellfrob/recurrences_ntl.cpp
4108 views
1
/* ============================================================================
2
3
recurrences_ntl.cpp: recurrences solved via NTL polynomial arithmetic
4
5
This file is part of hypellfrob (version 2.1.1).
6
7
Copyright (C) 2007, 2008, David Harvey
8
9
This program is free software; you can redistribute it and/or modify
10
it under the terms of the GNU General Public License as published by
11
the Free Software Foundation; either version 2 of the License, or
12
(at your option) any later version.
13
14
This program is distributed in the hope that it will be useful,
15
but WITHOUT ANY WARRANTY; without even the implied warranty of
16
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
GNU General Public License for more details.
18
19
You should have received a copy of the GNU General Public License along
20
with this program; if not, write to the Free Software Foundation, Inc.,
21
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22
23
============================================================================ */
24
25
26
#include <NTL/ZZ_pX.h>
27
#include <NTL/mat_ZZ_p.h>
28
#include <NTL/lzz_pX.h>
29
#include <NTL/mat_lzz_p.h>
30
#include <cassert>
31
#include "recurrences_ntl.h"
32
33
34
NTL_CLIENT
35
36
37
namespace hypellfrob {
38
39
40
/* ============================================================================
41
42
Some template stuff
43
44
The matrix evaluation code is templated so that it can work over either ZZ_p
45
or zz_p. There are several template parameters, which can have two settings:
46
47
SCALAR: ZZ_p zz_p
48
POLY: ZZ_pX zz_pX
49
VECTOR: vec_ZZ_p vec_zz_p
50
MATRIX: mat_ZZ_p mat_zz_p
51
POLYMODULUS: ZZ_pXModulus zz_pXModulus
52
FFTREP: FFTRep fftRep
53
54
For the most part NTL uses the same function names for both columns, which
55
makes life easy, but there's a few I needed to define explicitly:
56
57
to_scalar() convert a ZZ or int into a SCALAR
58
forward_fft() runs a forward FFT (either ToFFTRep() or TofftRep())
59
inverse_fft() runs an inverse FFT (either FromFFTRep() or FromfftRep())
60
61
============================================================================ */
62
63
64
// to_scalar(ZZ)
65
66
template <typename SCALAR> SCALAR to_scalar(const ZZ& input);
67
68
template<> inline ZZ_p to_scalar<ZZ_p>(const ZZ& input)
69
{
70
return to_ZZ_p(input);
71
}
72
73
template<> inline zz_p to_scalar<zz_p>(const ZZ& input)
74
{
75
return to_zz_p(input);
76
}
77
78
79
// to_scalar(int)
80
81
template <typename SCALAR> SCALAR to_scalar(int input);
82
83
template<> inline ZZ_p to_scalar<ZZ_p>(int input)
84
{
85
return to_ZZ_p(input);
86
}
87
88
template<> inline zz_p to_scalar<zz_p>(int input)
89
{
90
return to_zz_p(input);
91
}
92
93
94
// forward_fft
95
96
template <typename POLY, typename FFTREP>
97
void forward_fft(FFTREP& y, const POLY& x, long k, long lo, long hi);
98
99
template<> inline void
100
forward_fft<ZZ_pX, FFTRep>(FFTRep& y, const ZZ_pX& x, long k, long lo, long hi)
101
{
102
ToFFTRep(y, x, k, lo, hi);
103
}
104
105
template<> inline void
106
forward_fft<zz_pX, fftRep>(fftRep& y, const zz_pX& x, long k, long lo, long hi)
107
{
108
TofftRep(y, x, k, lo, hi);
109
}
110
111
112
// inverse_fft
113
114
template <typename POLY, typename FFTREP>
115
void inverse_fft(POLY& x, FFTREP& y, long lo, long hi);
116
117
template<> inline void
118
inverse_fft<ZZ_pX, FFTRep>(ZZ_pX& x, FFTRep& y, long lo, long hi)
119
{
120
FromFFTRep(x, y, lo, hi);
121
}
122
123
template<> inline void
124
inverse_fft<zz_pX, fftRep>(zz_pX& x, fftRep& y, long lo, long hi)
125
{
126
FromfftRep(x, y, lo, hi);
127
}
128
129
130
131
/* ============================================================================
132
133
Dyadic evaluation stuff
134
135
This section essentially implements Theorem 8 of [BGS], for the particular
136
case of the parameters that are used in Theorem 15.
137
138
============================================================================ */
139
140
141
/*
142
Assume that f has degree d = 2^n and that g has degree 2d.
143
144
This function computes a polynomial h whose x^d through x^{2d} coefficients
145
(inclusive) are the same as those of f*g. The bottom d coefficients of h
146
will be junk.
147
148
The parameter g_fft should be the precomputed length 2d FFT of g.
149
150
The algorithm in this function is based on the paper "The middle product
151
algorithm" by Hanrot, Quercia and Zimmermann. (Many thanks to Victor Shoup
152
for writing such wonderfully modular FFT code. It really made my day.)
153
154
*/
155
template <typename SCALAR, typename POLY, typename FFTREP>
156
void middle_product(POLY& h, const POLY& f,
157
const POLY& g, const FFTREP& g_fft, int n)
158
{
159
int d = 1 << n;
160
h.rep.SetLength(2*d + 1);
161
162
FFTREP f_fft(INIT_SIZE, n+1);
163
164
// Compute length 2d cyclic convolutions of f and g, letting the top
165
// third of f*g "wrap around" to the bottom half of the output.
166
forward_fft<POLY, FFTREP>(f_fft, f, n+1, 0, 2*d);
167
mul(f_fft, f_fft, g_fft);
168
inverse_fft<POLY, FFTREP>(h, f_fft, 0, 2*d);
169
170
// Need to correct for the x^{2d} term of g which got wrapped around to the
171
// constant term.
172
h.rep[d] -= g.rep[2*d] * f.rep[d];
173
174
// Now h contains terms x^d through x^{2d-1} of f*g.
175
// To finish off, we just need the x^{2d} term.
176
SCALAR temp;
177
SCALAR& sum = h.rep[2*d];
178
sum = 0;
179
for (int i = 0; i <= d; i++)
180
{
181
mul(temp, f.rep[i], g.rep[2*d-i]);
182
add(sum, sum, temp);
183
}
184
}
185
186
187
188
/*
189
This struct stores precomputed information that can then be used to shift
190
evaluation values of a polynomial F(x) of degree d = 2^n.
191
192
Specifically, given the values
193
F(0), F(b), F(2*b), ..., F(d*b),
194
the shift() method computes
195
F(a), F(a + b), F(a + 2*b), ..., F(a + d*b).
196
197
PRECONDITIONS:
198
n >= 1
199
1, 2, ..., d + 1 are invertible
200
a + i*b are invertible for -d <= i <= d
201
202
*/
203
template <typename SCALAR, typename POLY, typename VECTOR, typename FFTREP>
204
struct DyadicShifter
205
{
206
int d, n;
207
208
// input_twist is a vector of length d/2 + 1.
209
// The i-th entry is \prod_{0 <= j <= d, j != i} (i-j)^(-1).
210
VECTOR input_twist;
211
212
// output_twist is a vector of length d+1.
213
// The i-th entry is b^(-d) \prod_{0 <= j <= d} (a + (i-j)*b).
214
VECTOR output_twist;
215
216
// kernel is a polynomial of degree 2d.
217
// The coefficients are (a + k*b)^(-1) for -d <= k <= d.
218
// We also store its length 2d FFT.
219
POLY kernel;
220
FFTREP kernel_fft;
221
222
// Polynomials for scratch space in shift()
223
POLY scratch, scratch2;
224
225
// Constructor (performs various precomputations)
226
DyadicShifter(int n, const SCALAR& a, const SCALAR& b)
227
{
228
assert(n >= 1);
229
this->n = n;
230
d = 1 << n;
231
232
// ------------------------ compute input_twist -------------------------
233
234
input_twist.SetLength(d/2 + 1);
235
236
// prod = (d!)^(-1)
237
SCALAR prod;
238
prod = 1;
239
for (int i = 2; i <= d; i++)
240
mul(prod, prod, i);
241
prod = 1 / prod;
242
243
// input_twist[i] = ((d-i)!)^(-1)
244
input_twist[0] = prod;
245
for (int i = 1; i <= d/2; i++)
246
mul(input_twist[i], input_twist[i-1], d-(i-1));
247
248
// input_twist[i] = ((d-i)!*i!)^(-1)
249
prod = input_twist[d/2];
250
for (int i = d/2; i >= 0; i--)
251
{
252
mul(input_twist[i], input_twist[i], prod);
253
mul(prod, prod, i);
254
}
255
256
// input_twist[i] = \prod_{0 <= j <= d, j != i} (i-j)^(-1) :-)
257
for (int i = 1; i <= d/2; i += 2)
258
NTL::negate(input_twist[i], input_twist[i]);
259
260
// ----------------- compute output_twist and kernel --------------------
261
262
output_twist.SetLength(d+1);
263
264
// c[i] = c_i = a + (i-d)*b for 0 <= i <= 2d
265
VECTOR c;
266
c.SetLength(2*d+1);
267
c[0] = a - d*b;
268
for (int i = 1; i <= 2*d; i++)
269
add(c[i], c[i-1], b);
270
271
// accum[i] = c_0 * c_1 * ... * c_i for 0 <= i <= 2d
272
VECTOR accum;
273
accum.SetLength(2*d+1);
274
accum[0] = c[0];
275
for (int i = 1; i <= 2*d; i++)
276
mul(accum[i], accum[i-1], c[i]);
277
278
// accum_inv[i] = (c_0 * c_1 * ... * c_i)^(-1) for 0 <= i <= 2d
279
VECTOR accum_inv;
280
accum_inv.SetLength(2*d+1);
281
accum_inv[2*d] = 1 / accum[2*d];
282
for (int i = 2*d-1; i >= 0; i--)
283
mul(accum_inv[i], accum_inv[i+1], c[i+1]);
284
285
// kernel[i] = (c_i)^(-1) for 0 <= i <= 2d
286
kernel.rep.SetLength(2*d+1);
287
kernel.rep[0] = accum_inv[0];
288
for (int i = 1; i <= 2*d; i++)
289
mul(kernel.rep[i], accum_inv[i], accum[i-1]);
290
291
// precompute transform of kernel
292
forward_fft<POLY, FFTREP>(kernel_fft, kernel, n+1, 0, 2*d);
293
294
// output_twist[i] = b^{-d} * c_i * c_{i+1} * ... * c_{i+d}
295
// for 0 <= i <= d
296
SCALAR factor = power(b, -d);
297
SCALAR temp;
298
output_twist.SetLength(d+1);
299
output_twist[0] = factor * accum[d];
300
for (int i = 1; i <= d; i++)
301
{
302
mul(temp, factor, accum[i+d]);
303
mul(output_twist[i], temp, accum_inv[i-1]);
304
}
305
}
306
307
308
// Shifts evaluation values as described above.
309
// Assumes both output and input have length d + 1.
310
void shift(VECTOR& output, const VECTOR& input)
311
{
312
assert(input.length() == d+1);
313
assert(output.length() == d+1);
314
315
// multiply inputs pointwise by input_twist
316
scratch.rep.SetLength(d+1);
317
for (int i = 0; i <= d/2; i++)
318
mul(scratch.rep[i], input[i], input_twist[i]);
319
for (int i = 1; i <= d/2; i++)
320
mul(scratch.rep[i+d/2], input[i+d/2], input_twist[d/2-i]);
321
322
middle_product<SCALAR, POLY, FFTREP>(scratch2, scratch, kernel,
323
kernel_fft, n);
324
325
// multiply outputs pointwise by output_twist
326
for (int i = 0; i <= d; i++)
327
mul(output[i], scratch2.rep[i+d], output_twist[i]);
328
}
329
};
330
331
332
333
/*
334
Let M0 and M1 be square matrices of size n*n. Let M(x) = M0 + x*M1; this is a
335
matrix of linear polys in x. Let P(x) = M(x+1) M(x+2) ... M(x+2^s); this is a
336
matrix of polynomials of degree 2^s. This function computes the values
337
P(a), P(a + 2^t), P(a + 2*2^t), ..., P(a + 2^s*2^t).
338
339
The output array should have length n^2. Each entry should be a vector of
340
length 2^s+1, pre-initialised to all zeroes. The (y*n + x)-th vector will be
341
the values of the (y, x) entries of the above list of matrices. (This data
342
format is optimised for the case that 2^s+1 is much larger than n.)
343
344
PRECONDITIONS:
345
0 <= s <= t
346
2, 3, ..., 2^t + 1 must be invertible
347
348
*/
349
template <typename SCALAR, typename POLY, typename VECTOR,
350
typename MATRIX, typename FFTREP>
351
void dyadic_evaluation(vector<VECTOR>& output,
352
const MATRIX& M0, const MATRIX& M1,
353
int s, int t, const SCALAR& a)
354
{
355
int n = M0.NumRows();
356
357
// base cases; just evaluate naively
358
if (s <= 1)
359
{
360
MATRIX X[3];
361
362
if (s == 0)
363
{
364
X[0] = M0 + (a+1) * M1;
365
X[1] = M0 + (a+1 + (1 << t)) * M1;
366
}
367
else
368
{
369
for (int i = 0; i <= 2; i++)
370
X[i] = (M0 + (a+1 + (i << t)) * M1) * (M0 + (a+2 + (i << t)) * M1);
371
}
372
373
for (int x = 0; x < n; x++)
374
for (int y = 0; y < n; y++)
375
for (int i = 0; i < output[0].length(); i++)
376
output[y*n + x][i] = X[i][y][x];
377
378
return;
379
}
380
381
// General case.
382
// Let Q(x) = M(x+1) M(x+2) ... M(x+2^(s-1)).
383
384
// Recursively compute Q(a), Q(a + 2^t), ..., Q(a + 2^(s-1)*2^t).
385
vector<VECTOR> X(n*n);
386
for (int i = 0; i < n*n; i++)
387
X[i].SetLength((1 << (s-1)) + 1);
388
dyadic_evaluation<SCALAR, POLY, VECTOR, MATRIX, FFTREP>
389
(X, M0, M1, s-1, t, a);
390
391
// Do precomputations for shifting by 2^(s-1) and by (2^(s-1)+1)*2^t
392
SCALAR c, b;
393
c = 1 << (s-1);
394
b = 1 << t;
395
DyadicShifter<SCALAR, POLY, VECTOR, FFTREP> shifter1(s-1, c, b);
396
DyadicShifter<SCALAR, POLY, VECTOR, FFTREP> shifter2(s-1, (c + 1) * b, b);
397
398
// Shift by 2^(s-1) to obtain
399
// Q(a + 2^(s-1)), Q(a + 2^t + 2^(s-1)), ..., Q(a + 2^(s-1)*2^t + 2^(s-1))
400
vector<VECTOR> Y(n*n);
401
for (int i = 0; i < n*n; i++)
402
{
403
Y[i].SetLength((1 << (s-1)) + 1);
404
shifter1.shift(Y[i], X[i]);
405
}
406
407
// Multiply matrices to obtain
408
// P(a), P(a + 2^t), ..., P(a + 2^(s-1)*2^t).
409
SCALAR temp;
410
for (int i = 0; i <= (1 << (s-1)); i++)
411
for (int x = 0; x < n; x++)
412
for (int y = 0; y < n; y++)
413
for (int z = 0; z < n; z++)
414
{
415
mul(temp, X[y*n + z][i], Y[z*n + x][i]);
416
output[y*n + x][i] += temp;
417
}
418
419
// Shift original sequence by (2^(s-1)+1)*2^t to obtain
420
// Q(a + (2^(s-1)+1)*2^t), Q(a + (2^(s-1)+2)*2^t), ..., Q(a + (2^s+1)*2^t).
421
for (int i = 0; i < n*n; i++)
422
shifter2.shift(Y[i], X[i]);
423
424
// Shift again by 2^(s-1) to obtain
425
// Q(a + (2^(s-1)+1)*2^t + 2^(s-1)), Q(a + (2^(s-1)+2)*2^t + 2^(s-1)), ...,
426
// Q(a + (2^s+1)*2^t + 2^(s-1)).
427
for (int i = 0; i < n*n; i++)
428
shifter1.shift(X[i], Y[i]);
429
430
// Multiply matrices to obtain
431
// P(a + (2^(s-1)+1)*2^t), P(a + (2^(s-1)+2)*2^t), ..., P(a + (2^s+1)*2^t).
432
// (we throw out the last one since it's surplus to requirements)
433
for (int i = 0; i < (1 << (s-1)); i++)
434
for (int x = 0; x < n; x++)
435
for (int y = 0; y < n; y++)
436
for (int z = 0; z < n; z++)
437
{
438
mul(temp, Y[y*n + z][i], X[z*n + x][i]);
439
output[y*n + x][i + (1 << (s-1)) + 1] += temp;
440
}
441
}
442
443
444
445
/* ============================================================================
446
447
General evaluation stuff
448
449
This section essentially implements Corollary 10 of [BGS].
450
451
============================================================================ */
452
453
454
/*
455
This struct stores the product tree associated to a vector a[0], ..., a[n-1].
456
457
The top node stores the polynomial product
458
(x - a[0]) ... (x - a[n-1]).
459
The two children nodes store
460
(x - a[0]) ... (x - a[m-1])
461
and
462
(x - a[m]) ... (x - a[n-1])
463
where m = floor(n/2). This continues recursively until we reach n = 1,
464
in which case just the polynomial x - a[0] is stored, and no children.
465
466
*/
467
template <typename SCALAR, typename POLY, typename VECTOR>
468
struct ProductTree
469
{
470
// polynomial product stored at this node
471
POLY poly;
472
473
// children for left and right halves, if deg(poly) > 1
474
ProductTree* child1;
475
ProductTree* child2;
476
477
// These are temp polys used by the Evaluator and Interpolator classes.
478
// It's not very hygienic to keep them here... but it makes things more
479
// efficient, because we need two temps for each node, and this prevent
480
// unnecessary reallocations. (The lengths will be the same on repeated
481
// calls to evaluate() and interpolate().)
482
POLY scratch1, scratch2;
483
484
// Constructs product tree for the supplied vector.
485
ProductTree(const VECTOR& points)
486
{
487
build(points, 0, points.length());
488
}
489
490
ProductTree(const VECTOR& points, int start, int end)
491
{
492
build(points, start, end);
493
}
494
495
// Constructs product tree recursively for the subset [start, end) of
496
// the supplied vector.
497
void build(const VECTOR& points, int start, int end)
498
{
499
assert(end - start >= 1);
500
assert(start >= 0);
501
assert(end <= points.length());
502
503
if (end - start == 1)
504
{
505
SetCoeff(poly, 1, 1);
506
SetCoeff(poly, 0, -points[start]);
507
}
508
else
509
{
510
int m = (end - start) / 2;
511
child1 = new ProductTree(points, start, start + m);
512
child2 = new ProductTree(points, start + m, end);
513
mul(poly, child1->poly, child2->poly);
514
}
515
}
516
517
~ProductTree()
518
{
519
if (deg(poly) > 1)
520
{
521
delete child1;
522
delete child2;
523
}
524
}
525
};
526
527
528
529
/*
530
Given a list of evaluation points a[0], ..., a[n-1], this struct stores some
531
precomputed information to permit evaluating an arbitrary polynomial at those
532
points.
533
*/
534
template <typename SCALAR, typename POLY,
535
typename POLYMODULUS, typename VECTOR>
536
struct Evaluator
537
{
538
// The product tree for the evaluation points
539
ProductTree<SCALAR, POLY, VECTOR>* tree;
540
541
// A list of NTL ZZ_pXModulus/zz_pXModulus objects corresponding to the
542
// polynomials in the product tree, in the order that they get used as the
543
// tree is traversed in recursive_evaluate().
544
vector<POLYMODULUS> moduli;
545
546
// Constructs evaluator object for the given list of evaluation points
547
Evaluator(const VECTOR& points)
548
{
549
assert(points.length() >= 1);
550
tree = new ProductTree<SCALAR, POLY, VECTOR>(points);
551
moduli.reserve(2*points.length());
552
build(tree);
553
assert(moduli.size() <= 2*points.length());
554
}
555
556
// Compute modulus objects for each polynomial under the supplied node of
557
// the product tree; appends them in traversal order to "moduli".
558
void build(const ProductTree<SCALAR, POLY, VECTOR>* node)
559
{
560
if (deg(node->poly) > 1)
561
{
562
moduli.push_back(POLYMODULUS(node->poly));
563
build(node->child1);
564
build(node->child2);
565
}
566
}
567
568
~Evaluator()
569
{
570
delete tree;
571
}
572
573
// Evaluates the input polynomial at the evaluation points, writes the
574
// results to output. The output array must have the correct length.
575
void evaluate(VECTOR& output, const POLY& input)
576
{
577
recursive_evaluate(output, input, tree, 0, 0);
578
}
579
580
// Evaluates the input polynomial at the subset [start, end) of the
581
// evaluation points, which should correspond to the supplied product tree
582
// node. (The length of the interval is implied by the degree of the poly
583
// at that node.) Writes the output to the subset [start, end) of the
584
// output array. The index parameter indicates which modulus in "moduli"
585
// to use for this node of the tree. The return value is the index for
586
// the modulus that should be used immediately after this call.
587
int recursive_evaluate(VECTOR& output, const POLY& input,
588
ProductTree<SCALAR, POLY, VECTOR>* node,
589
int start, int index)
590
{
591
if (deg(node->poly) == 1)
592
{
593
eval(output[start], input, -coeff(node->poly, 0));
594
}
595
else
596
{
597
rem(node->scratch1, input, moduli[index++]);
598
index = recursive_evaluate(output, node->scratch1, node->child1,
599
start, index);
600
index = recursive_evaluate(output, node->scratch1, node->child2,
601
start + deg(node->child1->poly), index);
602
}
603
return index;
604
}
605
};
606
607
608
/*
609
Given an integer L >= 1, this struct does some precomputations to permit
610
interpolating a polynomial whose values at 0, 1, ..., L are known.
611
612
PRECONDITIONS:
613
1, 2, ..., L must be invertible.
614
615
*/
616
template <typename SCALAR, typename POLY, typename VECTOR>
617
struct Interpolator
618
{
619
ProductTree<SCALAR, POLY, VECTOR>* tree;
620
int L;
621
622
// input_twist is a vector of length L+1.
623
// The i-th entry is \prod_{0 <= j <= L, j != i} (i-j)^(-1).
624
VECTOR input_twist;
625
626
// vector of length L+1, used in interpolate()
627
VECTOR temp;
628
629
// Performs various precomputations for the given L.
630
Interpolator(int L)
631
{
632
this->L = L;
633
temp.SetLength(L+1);
634
635
// Build a product tree for the evaluation points
636
for (int i = 0; i <= L; i++)
637
temp[i] = i;
638
tree = new ProductTree<SCALAR, POLY, VECTOR>(temp);
639
640
// prod = (L!)^(-1)
641
SCALAR prod;
642
prod = 1;
643
for (int i = 2; i <= L; i++)
644
mul(prod, prod, i);
645
prod = 1 / prod;
646
647
// input_twist[i] = (i!)^(-1), 0 <= i <= L
648
input_twist.SetLength(L+1);
649
input_twist[L] = prod;
650
for (int i = L; i >= 1; i--)
651
mul(input_twist[i-1], input_twist[i], i);
652
653
// input_twist[i] = \prod_{0 <= j <= L, j != i} (i-j)^(-1).
654
for (int i = 0; i <= L/2; i++)
655
{
656
mul(input_twist[i], input_twist[i], input_twist[L-i]);
657
input_twist[L-i] = input_twist[i];
658
}
659
for (int i = L-1; i >= 0; i -= 2)
660
NTL::negate(input_twist[i], input_twist[i]);
661
}
662
663
~Interpolator()
664
{
665
delete tree;
666
}
667
668
669
// Returns the polynomial
670
// \sum_{i=start}^{end-1} values[i] * (x-start) (x-start+1) ... (x-end-1)
671
// where [start, end) is the interval associated to the supplied product
672
// tree node, and where the (x-i) term is omitted in each product.
673
void combine(POLY& output, const VECTOR& values,
674
ProductTree<SCALAR, POLY, VECTOR>* node, int start)
675
{
676
if (deg(node->poly) == 1)
677
{
678
// base case
679
clear(output);
680
SetCoeff(output, 0, values[start]);
681
}
682
else
683
{
684
// recursively build up from two halves
685
// i.e. if f1, f2 are the results of "combine" for the two halves,
686
// and if p1, p2 are the associated product tree polys, we compute
687
// f1*p2 + f2*p1
688
689
combine(node->scratch1, values, node->child1, start);
690
mul(output, node->scratch1, node->child2->poly);
691
692
combine(node->scratch1, values, node->child2,
693
start + deg(node->child1->poly));
694
mul(node->scratch2, node->scratch1, node->child1->poly);
695
696
add(output, output, node->scratch2);
697
}
698
}
699
700
// Returns a polynomial F(x) of degree at most L such that F(i) = values[i]
701
// for each 0 <= i <= L.
702
void interpolate(POLY& output, const VECTOR& values)
703
{
704
assert(values.length() == L+1);
705
706
// multiply input values pointwise by input_twist; this corrects for
707
// the factor (i-0) (i-1) ... (i-L) (where the i-i factor is omitted).
708
for (int i = 0; i <= L; i++)
709
mul(temp[i], values[i], input_twist[i]);
710
711
// do the interpolation
712
combine(output, temp, tree, 0);
713
}
714
};
715
716
717
718
/* ============================================================================
719
720
Matrix products over arbitrary, relatively short intervals
721
722
This section implements something similar to steps 1, 2, ... and the final
723
refining step of Theorem 15 of [BGS].
724
725
============================================================================ */
726
727
728
/*
729
Let M0 and M1 be matrices of constants. This function evaluates
730
M(x) = M0 + x*M1
731
at x = a.
732
733
The output matrix must already have the correct dimensions.
734
735
*/
736
template <typename SCALAR, typename MATRIX>
737
void eval_matrix(MATRIX& output, const MATRIX& M0, const MATRIX& M1,
738
const SCALAR& a)
739
{
740
int n = M0.NumRows();
741
for (int x = 0; x < n; x++)
742
for (int y = 0; y < n; y++)
743
{
744
mul(output[x][y], a, M1[x][y]);
745
add(output[x][y], output[x][y], M0[x][y]);
746
}
747
}
748
749
750
751
/*
752
Similar to ntl_interval_products. This is used as a subroutine of
753
ntl_interval_products() to handle the smaller "refining" subintervals.
754
Its asymptotic complexity theoretically has an extra logarithmic factor
755
over that of ntl_interval_products().
756
757
PRECONDITIONS:
758
Let d = sum of lengths of intervals. Then 2, 3, ... 1 + floor(sqrt(d)) must
759
all be invertible.
760
761
*/
762
template <typename SCALAR, typename POLY, typename POLYMODULUS,
763
typename VECTOR, typename MATRIX>
764
void ntl_short_interval_products(vector<MATRIX>& output,
765
const MATRIX& M0, const MATRIX& M1,
766
const vector<ZZ>& target)
767
{
768
output.clear();
769
770
if (target.size() == 0)
771
return;
772
773
int dim = M0.NumRows();
774
int num_intervals = target.size() / 2;
775
776
// Determine maximum target interval length
777
int max_length = -1;
778
for (int i = 0; i < target.size(); i += 2)
779
{
780
int temp = to_ulong(target[i+1] - target[i]);
781
if (temp > max_length)
782
max_length = temp;
783
}
784
785
// Select an appropriate length for the matrix products we'll use
786
int L, max_eval_points;
787
if (max_length > 2*num_intervals)
788
{
789
// The intervals are still pretty long relative to the number of
790
// intervals, so we're only going to do a single multipoint
791
// evaluation.
792
L = 1 + to_ulong(SqrRoot(num_intervals * to_ZZ(max_length)));
793
max_eval_points = L;
794
}
795
else
796
{
797
// The intervals are getting pretty short, so we probably will need
798
// to do several shorter multipoint evaluations.
799
L = 1 + max_length/2;
800
max_eval_points = num_intervals;
801
}
802
803
// =========================================================================
804
// Step 1: compute entries of M(X, X+L) as polynomials in X.
805
806
vector<POLY> polys(dim*dim);
807
{
808
// left_accum[i] = M(L-i-1, L) for 0 <= i <= L-1
809
// right_accum[i] = M(L, L+i+1) for 0 <= i <= L-1
810
vector<MATRIX> left_accum(L), right_accum(L);
811
812
MATRIX temp;
813
temp.SetDims(dim, dim);
814
815
left_accum[0].SetDims(dim, dim);
816
eval_matrix<SCALAR, MATRIX>(left_accum[0], M0, M1, to_scalar<SCALAR>(L));
817
for (int i = L-1; i >= 1; i--)
818
{
819
eval_matrix<SCALAR, MATRIX>(temp, M0, M1, to_scalar<SCALAR>(i));
820
mul(left_accum[L-i], temp, left_accum[L-i-1]);
821
}
822
823
right_accum[0].SetDims(dim, dim);
824
eval_matrix<SCALAR, MATRIX>(right_accum[0], M0, M1,
825
to_scalar<SCALAR>(L+1));
826
for (int i = 1; i <= L-1; i++)
827
{
828
eval_matrix<SCALAR, MATRIX>(temp, M0, M1, to_scalar<SCALAR>(L+1+i));
829
mul(right_accum[i], right_accum[i-1], temp);
830
}
831
832
// Use left_accum and right_accum to compute:
833
// initial[i] = M(i, L+i) for 0 <= i <= L
834
// i.e. initial[i] are the values of M(X, X+L) at X = 0, 1, ..., L.
835
vector<MATRIX> initial(L+1);
836
initial[0] = left_accum.back();
837
initial[L] = right_accum.back();
838
for (int i = 1; i <= L-1; i++)
839
mul(initial[i], left_accum[L-1-i], right_accum[i-1]);
840
841
// Now interpolate entries of initial[i] to get entries of M(X, X+L)
842
// as polynomials of degree L.
843
Interpolator<SCALAR, POLY, VECTOR> interpolator(L);
844
VECTOR values;
845
values.SetLength(L+1);
846
for (int x = 0; x < dim; x++)
847
for (int y = 0; y < dim; y++)
848
{
849
for (int j = 0; j <= L; j++)
850
values[j] = initial[j][y][x];
851
interpolator.interpolate(polys[y*dim + x], values);
852
}
853
}
854
855
// =========================================================================
856
// Step 2: decompose intervals into subintervals of length L which we'll
857
// attack by direct multipoint evaluation, plus leftover pieces that we'll
858
// handle with a recursive call to ntl_short_interval_products().
859
860
// eval_points holds all the values of X for which we want to
861
// evaluate M(X, X+L)
862
VECTOR eval_points;
863
eval_points.SetMaxLength(max_eval_points);
864
865
// leftover_target is the list of leftover intervals that we're going to
866
// later do recursively
867
vector<ZZ> leftover_target;
868
leftover_target.reserve(target.size());
869
870
ZZ current, next;
871
for (int i = 0; i < target.size(); i += 2)
872
{
873
current = target[i];
874
next = current + L;
875
while (next <= target[i+1])
876
{
877
// [current, next) fits inside this interval, so peel it off into
878
// eval_points
879
append(eval_points, to_scalar<SCALAR>(current));
880
swap(current, next);
881
next = current + L;
882
}
883
if (current < target[i+1])
884
{
885
// the rest of this interval is too short to handle with M(X, X+L),
886
// so put it in the leftover bin
887
leftover_target.push_back(current);
888
leftover_target.push_back(target[i+1]);
889
}
890
}
891
892
// =========================================================================
893
// Step 3: recursively handle leftover pieces
894
895
// leftover_matrices[i] holds the matrix for leftover interval #i
896
vector<MATRIX> leftover_matrices;
897
ntl_short_interval_products<SCALAR, POLY, POLYMODULUS, VECTOR, MATRIX>
898
(leftover_matrices, M0, M1, leftover_target);
899
900
// =========================================================================
901
// Step 4: evaluate M(X, X+L) at each of the evaluation points. We do this
902
// by breaking up the list of evaluation points into blocks of length at
903
// most L+1, and using multipoint evaluation on each block.
904
905
// main_matrices[i] will hold M(X, X+L) for the i-th evaluation point X.
906
vector<MATRIX> main_matrices(eval_points.length());
907
for (int i = 0; i < main_matrices.size(); i++)
908
main_matrices[i].SetDims(dim, dim);
909
910
VECTOR block, values;
911
block.SetMaxLength(L+1);
912
values.SetMaxLength(L+1);
913
914
// for each block...
915
for (int i = 0; i < eval_points.length(); i += (L+1))
916
{
917
// determine length of this block, which is at most L+1
918
int length = eval_points.length() - i;
919
if (length >= (L+1))
920
length = (L+1);
921
block.SetLength(length);
922
923
// construct Evaluator object for evaluating at these points
924
for (int j = 0; j < length; j++)
925
block[j] = eval_points[i+j];
926
Evaluator<SCALAR, POLY, POLYMODULUS, VECTOR> evaluator(block);
927
928
// evaluate each entry of M(X, X+L) at those points
929
for (int x = 0; x < dim; x++)
930
for (int y = 0; y < dim; y++)
931
{
932
evaluator.evaluate(values, polys[y*dim + x]);
933
for (int k = 0; k < length; k++)
934
main_matrices[i+k][y][x] = values[k];
935
}
936
}
937
938
// =========================================================================
939
// Step 5: merge together the matrices obtained from the multipoint
940
// evaluation step and the recursive leftover interval step.
941
942
output.clear();
943
output.resize(target.size() / 2);
944
for (int i = 0; i < target.size()/2; i++)
945
output[i].SetDims(dim, dim);
946
947
int main_index = 0; // index into main_matrices
948
int leftover_index = 0; // index into leftover_matrices
949
950
MATRIX temp;
951
temp.SetDims(dim, dim);
952
953
for (int i = 0; i < target.size(); i += 2)
954
{
955
current = target[i];
956
next = current + L;
957
ident(output[i/2], dim);
958
959
while (next <= target[i+1])
960
{
961
// merge in a matrix from multipoint evaluation step
962
mul(temp, output[i/2], main_matrices[main_index++]);
963
swap(temp, output[i/2]);
964
swap(current, next);
965
next = current + L;
966
}
967
if (current < target[i+1])
968
{
969
// merge in a matrix from leftover interval step
970
mul(temp, output[i/2], leftover_matrices[leftover_index++]);
971
swap(temp, output[i/2]);
972
}
973
}
974
}
975
976
977
/* ============================================================================
978
979
Matrix products over arbitrary, long intervals
980
981
This section implements an algorithm similar to Theorem 15 of [BGS].
982
983
============================================================================ */
984
985
986
/*
987
See interval_products_wrapper().
988
989
NOTE:
990
This algorithm works best if the intervals are very long and don't have
991
much space between them. The case where the gaps are relatively large is
992
best handled by ntl_short_interval_products().
993
994
*/
995
template <typename SCALAR, typename POLY, typename POLYMODULUS,
996
typename VECTOR, typename MATRIX, typename FFTREP>
997
void ntl_interval_products(vector<MATRIX>& output,
998
const MATRIX& M0, const MATRIX& M1,
999
const vector<ZZ>& target)
1000
{
1001
assert(target.size() % 2 == 0);
1002
output.resize(target.size() / 2);
1003
1004
int dim = M0.NumRows();
1005
assert(dim == M0.NumCols());
1006
assert(dim == M1.NumRows());
1007
assert(dim == M1.NumCols());
1008
1009
// =========================================================================
1010
// Step 0: get as many intervals as possible using dyadic_evaluation().
1011
1012
// step0_matrix[i] is the transition matrix between step0_index[2*i]
1013
// and step0_index[2*i+1].
1014
vector<MATRIX> step0_matrix;
1015
vector<ZZ> step0_index;
1016
// preallocate the maximum number of matrices that could arise (plus safety)
1017
int reserve_size = target.size() +
1018
4*NumBits(target.back() - target.front());
1019
step0_matrix.reserve(reserve_size);
1020
step0_index.reserve(2 * reserve_size);
1021
1022
ZZ current_index = target.front();
1023
int next_target = 0; // index into "target" array
1024
1025
// This flag indicates whether the last entry of step0_matrix is
1026
// still accumulating matrices (in which the right endpoint of the
1027
// corresponding interval hasn't been written to step0_index yet).
1028
int active = 0;
1029
1030
MATRIX temp_mat;
1031
temp_mat.SetDims(dim, dim);
1032
1033
while (current_index < target.back() - 3)
1034
{
1035
// find largest t such that 2^t*(2^t + 1) <= remaining distance to go
1036
ZZ remaining = target.back() - current_index;
1037
int t = 0;
1038
while ((to_ZZ(1) << (2*t)) + (1 << t) <= remaining)
1039
t++;
1040
t--;
1041
1042
// evaluate matrices for 2^t+1 intervals of length 2^t
1043
vector<VECTOR> dyadic_output(dim*dim);
1044
for (int i = 0; i < dim*dim; i++)
1045
dyadic_output[i].SetLength((1 << t) + 1);
1046
dyadic_evaluation<SCALAR, POLY, VECTOR, MATRIX, FFTREP>
1047
(dyadic_output, M0, M1, t, t, to_scalar<SCALAR>(current_index));
1048
1049
// Walk through the intervals we just computed. Find maximal subsequences
1050
// of intervals none of which contain any target endpoints. Merge them
1051
// together (by multiplying the appropriate matrices) and store results
1052
// in step0_matrix, step0_index.
1053
1054
SCALAR scratch;
1055
1056
for (int i = 0; i <= (1 << t); i++, current_index += (1 << t))
1057
{
1058
assert(next_target == target.size() ||
1059
target[next_target] >= current_index);
1060
1061
// Skip over target endpoints which are exactly at the beginning
1062
// of this interval
1063
while ((next_target < target.size()) &&
1064
(target[next_target] == current_index))
1065
{
1066
// if there's an active matrix, don't forget to close it off
1067
if (active)
1068
{
1069
step0_index.push_back(current_index);
1070
active = 0;
1071
}
1072
next_target++;
1073
}
1074
1075
// Test if any target endpoints are strictly within this interval.
1076
if ((next_target == target.size()) ||
1077
(target[next_target] >= current_index + (1 << t)))
1078
{
1079
// There are no target endpoints in this interval.
1080
if (active)
1081
{
1082
// Merge this matrix with the active one
1083
MATRIX& active_mat = step0_matrix.back();
1084
for (int y = 0; y < dim; y++)
1085
for (int x = 0; x < dim; x++)
1086
{
1087
SCALAR& accum = temp_mat[y][x];
1088
accum = 0;
1089
for (int z = 0; z < dim; z++)
1090
{
1091
mul(scratch, active_mat[y][z],
1092
dyadic_output[z*dim + x][i]);
1093
add(accum, accum, scratch);
1094
}
1095
}
1096
1097
swap(temp_mat, active_mat);
1098
}
1099
else
1100
{
1101
// Make this matrix into a new active one
1102
step0_index.push_back(current_index);
1103
step0_matrix.resize(step0_matrix.size() + 1);
1104
MATRIX& X = step0_matrix.back();
1105
X.SetDims(dim, dim);
1106
for (int y = 0; y < dim; y++)
1107
for (int x = 0; x < dim; x++)
1108
X[y][x] = dyadic_output[y*dim + x][i];
1109
active = 1;
1110
}
1111
}
1112
else
1113
{
1114
// There are target endpoints in this interval.
1115
if (active)
1116
{
1117
// If there is still an active matrix, close it off.
1118
step0_index.push_back(current_index);
1119
active = 0;
1120
}
1121
1122
// skip over any other endpoints in this interval
1123
while ((next_target < target.size()) &&
1124
(target[next_target] < current_index + (1 << t)))
1125
{
1126
next_target++;
1127
}
1128
}
1129
}
1130
}
1131
1132
// If there is still an active matrix, close it off.
1133
if (active)
1134
step0_index.push_back(current_index);
1135
1136
assert(step0_index.size() == 2*step0_matrix.size());
1137
1138
// =========================================================================
1139
// Step 1: Make a list of all subintervals that we are going to need in
1140
// the refining steps.
1141
1142
int next_step0 = 0; // index into step0_index
1143
vector<ZZ> step1_index; // list of pairs of endpoints of needed intervals
1144
step1_index.reserve(2*target.size());
1145
1146
// add sentinel endpoints to make the next loop simpler:
1147
step0_index.push_back(target.back() + 10);
1148
step0_index.push_back(target.back() + 20);
1149
1150
for (next_target = 0; next_target < target.size(); next_target += 2)
1151
{
1152
// skip dyadic intervals that come before this target interval
1153
while (step0_index[next_step0+1] <= target[next_target])
1154
next_step0 += 2;
1155
1156
if (step0_index[next_step0] < target[next_target+1])
1157
{
1158
// The next dyadic interval starts before the end of this target
1159
// interval.
1160
if (step0_index[next_step0] > target[next_target])
1161
{
1162
// The next dyadic interval starts strictly within this target
1163
// interval, so we need to create a refining subinterval for the
1164
// initial segment of this target interval.
1165
step1_index.push_back(target[next_target]);
1166
step1_index.push_back(step0_index[next_step0]);
1167
}
1168
1169
// Skip over dyadic intervals to find the last one still contained
1170
// within this target interval.
1171
while (step0_index[next_step0+3] <= target[next_target+1])
1172
next_step0 += 2;
1173
1174
if (step0_index[next_step0+1] < target[next_target+1])
1175
{
1176
// The next dyadic interval finishes strictly within this target
1177
// interval, so we need to create a refining subinterval for the
1178
// final segment of this target interval.
1179
step1_index.push_back(step0_index[next_step0+1]);
1180
step1_index.push_back(target[next_target+1]);
1181
}
1182
1183
// Move on to next dyadic interval
1184
next_step0 += 2;
1185
}
1186
else
1187
{
1188
// The next dyadic interval starts beyond (or just at the end of)
1189
// this target interval, so we need to create a refining subinterval
1190
// for this *whole* target interval.
1191
step1_index.push_back(target[next_target]);
1192
step1_index.push_back(target[next_target+1]);
1193
}
1194
}
1195
1196
// remove sentinels for my sanity
1197
step0_index.pop_back();
1198
step0_index.pop_back();
1199
1200
// Step 1b: Compute matrix products over those refining subintervals.
1201
vector<MATRIX> step1_matrix;
1202
ntl_short_interval_products<SCALAR, POLY, POLYMODULUS, VECTOR, MATRIX>
1203
(step1_matrix, M0, M1, step1_index);
1204
1205
assert(step1_index.size() == 2 * step1_matrix.size());
1206
1207
// =========================================================================
1208
// Step 2: Merge together the dyadic intervals and refining intervals into
1209
// a single list, in sorted order.
1210
1211
vector<MATRIX> step2_matrix(step0_matrix.size() + step1_matrix.size());
1212
vector<ZZ> step2_index(step0_index.size() + step1_index.size());
1213
1214
// add sentinels to make the next loop simpler
1215
step0_index.push_back(target.back() + 10);
1216
step0_index.push_back(target.back() + 20);
1217
step1_index.push_back(target.back() + 10);
1218
step1_index.push_back(target.back() + 20);
1219
1220
next_step0 = 0; // index into step0_matrix
1221
int next_step1 = 0; // index into step1_matrix
1222
1223
for (int next_step2 = 0; next_step2 < step2_matrix.size(); next_step2++)
1224
{
1225
if (step0_index[2*next_step0] < step1_index[2*next_step1])
1226
{
1227
// grab a matrix and pair of indices from step0
1228
swap(step2_matrix[next_step2], step0_matrix[next_step0]);
1229
step2_index[2*next_step2] = step0_index[2*next_step0];
1230
step2_index[2*next_step2+1] = step0_index[2*next_step0+1];
1231
next_step0++;
1232
}
1233
else
1234
{
1235
// grab a matrix and pair of indices from step1
1236
swap(step2_matrix[next_step2], step1_matrix[next_step1]);
1237
step2_index[2*next_step2] = step1_index[2*next_step1];
1238
step2_index[2*next_step2+1] = step1_index[2*next_step1+1];
1239
next_step1++;
1240
}
1241
}
1242
1243
// remove sentinels for my sanity
1244
step0_index.pop_back();
1245
step0_index.pop_back();
1246
step1_index.pop_back();
1247
step1_index.pop_back();
1248
1249
assert(step2_index.size() == 2*step2_matrix.size());
1250
1251
// =========================================================================
1252
// Step 3: Walk through target intervals, and merge together appropriate
1253
// intervals from step2 to get those target intervals.
1254
1255
int next_step2 = 0; // index into step2_matrix
1256
1257
// add sentinels to make the next loop simpler
1258
step2_index.push_back(target.back() + 1);
1259
step2_index.push_back(target.back() + 2);
1260
1261
for (int next_target = 0; next_target < target.size(); next_target += 2)
1262
{
1263
// search for step2 interval matching the start of this target interval
1264
while (step2_index[2*next_step2] < target[next_target])
1265
next_step2++;
1266
1267
assert(step2_index[2*next_step2] == target[next_target]);
1268
1269
// merge together matrices for step2 intervals contained in this target
1270
// interval
1271
swap(step2_matrix[next_step2++], output[next_target/2]);
1272
while (step2_index[2*next_step2+1] <= target[next_target+1])
1273
{
1274
mul(temp_mat, output[next_target/2], step2_matrix[next_step2]);
1275
swap(temp_mat, output[next_target/2]);
1276
next_step2++;
1277
}
1278
}
1279
}
1280
1281
1282
// explicit instantiations for zz_p and ZZ_p versions:
1283
1284
1285
template void ntl_interval_products
1286
<ZZ_p, ZZ_pX, ZZ_pXModulus, vec_ZZ_p, mat_ZZ_p, FFTRep>
1287
(vector<mat_ZZ_p>& output, const mat_ZZ_p& M0, const mat_ZZ_p& M1,
1288
const vector<ZZ>& target);
1289
1290
1291
template void ntl_interval_products
1292
<zz_p, zz_pX, zz_pXModulus, vec_zz_p, mat_zz_p, fftRep>
1293
(vector<mat_zz_p>& output, const mat_zz_p& M0, const mat_zz_p& M1,
1294
const vector<ZZ>& target);
1295
1296
1297
}; // namespace hypellfrob
1298
1299
1300
// ----------------------- end of file
1301
1302