CoCalc -- recurrences

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/schemes/hyperelliptic_curves/hypellfrob/recurrences_ntl.cpp
⁴¹⁰⁸ views
1
/* ============================================================================
2

3
   recurrences_ntl.cpp:  recurrences solved via NTL polynomial arithmetic
4
   
5
   This file is part of hypellfrob (version 2.1.1).
6

7
   Copyright (C) 2007, 2008, David Harvey
8

9
   This program is free software; you can redistribute it and/or modify
10
   it under the terms of the GNU General Public License as published by
11
   the Free Software Foundation; either version 2 of the License, or
12
   (at your option) any later version.
13

14
   This program is distributed in the hope that it will be useful,
15
   but WITHOUT ANY WARRANTY; without even the implied warranty of
16
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
   GNU General Public License for more details.
18

19
   You should have received a copy of the GNU General Public License along
20
   with this program; if not, write to the Free Software Foundation, Inc.,
21
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22

23
============================================================================ */
24

25

26
#include <NTL/ZZ_pX.h>
27
#include <NTL/mat_ZZ_p.h>
28
#include <NTL/lzz_pX.h>
29
#include <NTL/mat_lzz_p.h>
30
#include <cassert>
31
#include "recurrences_ntl.h"
32

33

34
NTL_CLIENT
35

36

37
namespace hypellfrob {
38

39

40
/* ============================================================================
41

42
   Some template stuff
43
   
44
The matrix evaluation code is templated so that it can work over either ZZ_p
45
or zz_p. There are several template parameters, which can have two settings:
46

47
   SCALAR:       ZZ_p           zz_p
48
   POLY:         ZZ_pX          zz_pX
49
   VECTOR:       vec_ZZ_p       vec_zz_p
50
   MATRIX:       mat_ZZ_p       mat_zz_p
51
   POLYMODULUS:  ZZ_pXModulus   zz_pXModulus
52
   FFTREP:       FFTRep         fftRep
53
   
54
For the most part NTL uses the same function names for both columns, which
55
makes life easy, but there's a few I needed to define explicitly:
56

57
   to_scalar() convert a ZZ or int into a SCALAR
58
   forward_fft() runs a forward FFT (either ToFFTRep() or TofftRep())
59
   inverse_fft() runs an inverse FFT (either FromFFTRep() or FromfftRep())
60
   
61
============================================================================ */
62

63

64
// to_scalar(ZZ)
65

66
template <typename SCALAR> SCALAR to_scalar(const ZZ& input);
67

68
template<> inline ZZ_p to_scalar<ZZ_p>(const ZZ& input)
69
{
70
   return to_ZZ_p(input);
71
}
72

73
template<> inline zz_p to_scalar<zz_p>(const ZZ& input)
74
{
75
   return to_zz_p(input);
76
}
77

78

79
// to_scalar(int)
80

81
template <typename SCALAR> SCALAR to_scalar(int input);
82

83
template<> inline ZZ_p to_scalar<ZZ_p>(int input)
84
{
85
   return to_ZZ_p(input);
86
}
87

88
template<> inline zz_p to_scalar<zz_p>(int input)
89
{
90
   return to_zz_p(input);
91
}
92

93

94
// forward_fft
95

96
template <typename POLY, typename FFTREP>
97
void forward_fft(FFTREP& y, const POLY& x, long k, long lo, long hi);
98

99
template<> inline void
100
forward_fft<ZZ_pX, FFTRep>(FFTRep& y, const ZZ_pX& x, long k, long lo, long hi)
101
{
102
   ToFFTRep(y, x, k, lo, hi);
103
}
104

105
template<> inline void
106
forward_fft<zz_pX, fftRep>(fftRep& y, const zz_pX& x, long k, long lo, long hi)
107
{
108
   TofftRep(y, x, k, lo, hi);
109
}
110

111

112
// inverse_fft
113

114
template <typename POLY, typename FFTREP>
115
void inverse_fft(POLY& x, FFTREP& y, long lo, long hi);
116

117
template<> inline void
118
inverse_fft<ZZ_pX, FFTRep>(ZZ_pX& x, FFTRep& y, long lo, long hi)
119
{
120
   FromFFTRep(x, y, lo, hi);
121
}
122

123
template<> inline void
124
inverse_fft<zz_pX, fftRep>(zz_pX& x, fftRep& y, long lo, long hi)
125
{
126
   FromfftRep(x, y, lo, hi);
127
}
128

129

130

131
/* ============================================================================
132

133
   Dyadic evaluation stuff
134

135
This section essentially implements Theorem 8 of [BGS], for the particular
136
case of the parameters that are used in Theorem 15.
137

138
============================================================================ */
139

140

141
/*
142
Assume that f has degree d = 2^n and that g has degree 2d.
143

144
This function computes a polynomial h whose x^d through x^{2d} coefficients
145
(inclusive) are the same as those of f*g. The bottom d coefficients of h
146
will be junk.
147

148
The parameter g_fft should be the precomputed length 2d FFT of g.
149

150
The algorithm in this function is based on the paper "The middle product
151
algorithm" by Hanrot, Quercia and Zimmermann. (Many thanks to Victor Shoup
152
for writing such wonderfully modular FFT code. It really made my day.)
153

154
*/
155
template <typename SCALAR, typename POLY, typename FFTREP>
156
void middle_product(POLY& h, const POLY& f,
157
                    const POLY& g, const FFTREP& g_fft, int n)
158
{
159
   int d = 1 << n;
160
   h.rep.SetLength(2*d + 1);
161

162
   FFTREP f_fft(INIT_SIZE, n+1);
163

164
   // Compute length 2d cyclic convolutions of f and g, letting the top
165
   // third of f*g "wrap around" to the bottom half of the output.
166
   forward_fft<POLY, FFTREP>(f_fft, f, n+1, 0, 2*d);
167
   mul(f_fft, f_fft, g_fft);
168
   inverse_fft<POLY, FFTREP>(h, f_fft, 0, 2*d);
169
   
170
   // Need to correct for the x^{2d} term of g which got wrapped around to the
171
   // constant term.
172
   h.rep[d] -= g.rep[2*d] * f.rep[d];
173
   
174
   // Now h contains terms x^d through x^{2d-1} of f*g.
175
   // To finish off, we just need the x^{2d} term.
176
   SCALAR temp;
177
   SCALAR& sum = h.rep[2*d];
178
   sum = 0;
179
   for (int i = 0; i <= d; i++)
180
   {
181
      mul(temp, f.rep[i], g.rep[2*d-i]);
182
      add(sum, sum, temp);
183
   }
184
}
185

186

187

188
/*
189
This struct stores precomputed information that can then be used to shift
190
evaluation values of a polynomial F(x) of degree d = 2^n.
191

192
Specifically, given the values
193
  F(0), F(b), F(2*b), ..., F(d*b),
194
the shift() method computes
195
  F(a), F(a + b), F(a + 2*b), ..., F(a + d*b).
196

197
PRECONDITIONS:
198
   n >= 1
199
   1, 2, ..., d + 1 are invertible
200
   a + i*b are invertible for -d <= i <= d
201

202
*/
203
template <typename SCALAR, typename POLY, typename VECTOR, typename FFTREP>
204
struct DyadicShifter
205
{
206
   int d, n;
207

208
   // input_twist is a vector of length d/2 + 1.
209
   // The i-th entry is \prod_{0 <= j <= d, j != i} (i-j)^(-1).
210
   VECTOR input_twist;
211
   
212
   // output_twist is a vector of length d+1.
213
   // The i-th entry is b^(-d) \prod_{0 <= j <= d} (a + (i-j)*b).
214
   VECTOR output_twist;
215
   
216
   // kernel is a polynomial of degree 2d.
217
   // The coefficients are (a + k*b)^(-1) for -d <= k <= d.
218
   // We also store its length 2d FFT.
219
   POLY kernel;
220
   FFTREP kernel_fft;
221
   
222
   // Polynomials for scratch space in shift()
223
   POLY scratch, scratch2;
224
   
225
   // Constructor (performs various precomputations)
226
   DyadicShifter(int n, const SCALAR& a, const SCALAR& b)
227
   {
228
      assert(n >= 1);
229
      this->n = n;
230
      d = 1 << n;
231
      
232
      // ------------------------ compute input_twist -------------------------
233

234
      input_twist.SetLength(d/2 + 1);
235

236
      // prod = (d!)^(-1)
237
      SCALAR prod;
238
      prod = 1;
239
      for (int i = 2; i <= d; i++)
240
         mul(prod, prod, i);
241
      prod = 1 / prod;
242

243
      // input_twist[i] = ((d-i)!)^(-1)
244
      input_twist[0] = prod;
245
      for (int i = 1; i <= d/2; i++)
246
         mul(input_twist[i], input_twist[i-1], d-(i-1));
247
         
248
      // input_twist[i] = ((d-i)!*i!)^(-1)
249
      prod = input_twist[d/2];
250
      for (int i = d/2; i >= 0; i--)
251
      {
252
         mul(input_twist[i], input_twist[i], prod);
253
         mul(prod, prod, i);
254
      }
255
      
256
      // input_twist[i] = \prod_{0 <= j <= d, j != i} (i-j)^(-1)    :-)
257
      for (int i = 1; i <= d/2; i += 2)
258
         NTL::negate(input_twist[i], input_twist[i]);
259

260
      // ----------------- compute output_twist and kernel --------------------
261

262
      output_twist.SetLength(d+1);
263
      
264
      // c[i] = c_i = a + (i-d)*b     for 0 <= i <= 2d
265
      VECTOR c;
266
      c.SetLength(2*d+1);
267
      c[0] = a - d*b;
268
      for (int i = 1; i <= 2*d; i++)
269
         add(c[i], c[i-1], b);
270

271
      // accum[i] = c_0 * c_1 * ... * c_i    for 0 <= i <= 2d
272
      VECTOR accum;
273
      accum.SetLength(2*d+1);
274
      accum[0] = c[0];
275
      for (int i = 1; i <= 2*d; i++)
276
         mul(accum[i], accum[i-1], c[i]);
277
         
278
      // accum_inv[i] = (c_0 * c_1 * ... * c_i)^(-1)    for 0 <= i <= 2d
279
      VECTOR accum_inv;
280
      accum_inv.SetLength(2*d+1);
281
      accum_inv[2*d] = 1 / accum[2*d];
282
      for (int i = 2*d-1; i >= 0; i--)
283
         mul(accum_inv[i], accum_inv[i+1], c[i+1]);
284
         
285
      // kernel[i] = (c_i)^(-1)    for 0 <= i <= 2d
286
      kernel.rep.SetLength(2*d+1);
287
      kernel.rep[0] = accum_inv[0];
288
      for (int i = 1; i <= 2*d; i++)
289
         mul(kernel.rep[i], accum_inv[i], accum[i-1]);
290
      
291
      // precompute transform of kernel
292
      forward_fft<POLY, FFTREP>(kernel_fft, kernel, n+1, 0, 2*d);
293
         
294
      // output_twist[i] = b^{-d} * c_i * c_{i+1} * ... * c_{i+d}
295
      // for 0 <= i <= d
296
      SCALAR factor = power(b, -d);
297
      SCALAR temp;
298
      output_twist.SetLength(d+1);
299
      output_twist[0] = factor * accum[d];
300
      for (int i = 1; i <= d; i++)
301
      {
302
         mul(temp, factor, accum[i+d]);
303
         mul(output_twist[i], temp, accum_inv[i-1]);
304
      }
305
   }
306

307

308
   // Shifts evaluation values as described above.
309
   // Assumes both output and input have length d + 1.
310
   void shift(VECTOR& output, const VECTOR& input)
311
   {
312
      assert(input.length() == d+1);
313
      assert(output.length() == d+1);
314
   
315
      // multiply inputs pointwise by input_twist
316
      scratch.rep.SetLength(d+1);
317
      for (int i = 0; i <= d/2; i++)
318
         mul(scratch.rep[i], input[i], input_twist[i]);
319
      for (int i = 1; i <= d/2; i++)
320
         mul(scratch.rep[i+d/2], input[i+d/2], input_twist[d/2-i]);
321
         
322
      middle_product<SCALAR, POLY, FFTREP>(scratch2, scratch, kernel,
323
                                           kernel_fft, n);
324

325
      // multiply outputs pointwise by output_twist
326
      for (int i = 0; i <= d; i++)
327
         mul(output[i], scratch2.rep[i+d], output_twist[i]);
328
   }
329
};
330

331

332

333
/*
334
Let M0 and M1 be square matrices of size n*n. Let M(x) = M0 + x*M1; this is a
335
matrix of linear polys in x. Let P(x) = M(x+1) M(x+2) ... M(x+2^s); this is a
336
matrix of polynomials of degree 2^s. This function computes the values
337
    P(a), P(a + 2^t), P(a + 2*2^t), ..., P(a + 2^s*2^t).
338
    
339
The output array should have length n^2. Each entry should be a vector of
340
length 2^s+1, pre-initialised to all zeroes. The (y*n + x)-th vector will be
341
the values of the (y, x) entries of the above list of matrices. (This data
342
format is optimised for the case that 2^s+1 is much larger than n.)
343

344
PRECONDITIONS:
345
   0 <= s <= t
346
   2, 3, ..., 2^t + 1 must be invertible
347

348
*/
349
template <typename SCALAR, typename POLY, typename VECTOR,
350
          typename MATRIX, typename FFTREP>
351
void dyadic_evaluation(vector<VECTOR>& output,
352
                       const MATRIX& M0, const MATRIX& M1,
353
                       int s, int t, const SCALAR& a)
354
{
355
   int n = M0.NumRows();
356

357
   // base cases; just evaluate naively
358
   if (s <= 1)
359
   {
360
      MATRIX X[3];
361
      
362
      if (s == 0)
363
      {
364
         X[0] = M0 + (a+1) * M1;
365
         X[1] = M0 + (a+1 + (1 << t)) * M1;
366
      }
367
      else
368
      {
369
         for (int i = 0; i <= 2; i++)
370
            X[i] = (M0 + (a+1 + (i << t)) * M1) * (M0 + (a+2 + (i << t)) * M1);
371
      }
372

373
      for (int x = 0; x < n; x++)
374
      for (int y = 0; y < n; y++)
375
      for (int i = 0; i < output[0].length(); i++)
376
         output[y*n + x][i] = X[i][y][x];
377

378
      return;
379
   }
380

381
   // General case.
382
   // Let Q(x) = M(x+1) M(x+2) ... M(x+2^(s-1)).
383
   
384
   // Recursively compute Q(a), Q(a + 2^t), ..., Q(a + 2^(s-1)*2^t).
385
   vector<VECTOR> X(n*n);
386
   for (int i = 0; i < n*n; i++)
387
      X[i].SetLength((1 << (s-1)) + 1);
388
   dyadic_evaluation<SCALAR, POLY, VECTOR, MATRIX, FFTREP>
389
                           (X, M0, M1, s-1, t, a);
390

391
   // Do precomputations for shifting by 2^(s-1) and by (2^(s-1)+1)*2^t
392
   SCALAR c, b;
393
   c = 1 << (s-1);
394
   b = 1 << t;
395
   DyadicShifter<SCALAR, POLY, VECTOR, FFTREP> shifter1(s-1, c, b);
396
   DyadicShifter<SCALAR, POLY, VECTOR, FFTREP> shifter2(s-1, (c + 1) * b, b);
397
         
398
   // Shift by 2^(s-1) to obtain
399
   // Q(a + 2^(s-1)), Q(a + 2^t + 2^(s-1)), ..., Q(a + 2^(s-1)*2^t + 2^(s-1))
400
   vector<VECTOR> Y(n*n);
401
   for (int i = 0; i < n*n; i++)
402
   {
403
      Y[i].SetLength((1 << (s-1)) + 1);
404
      shifter1.shift(Y[i], X[i]);
405
   }
406

407
   // Multiply matrices to obtain
408
   // P(a), P(a + 2^t), ..., P(a + 2^(s-1)*2^t).
409
   SCALAR temp;
410
   for (int i = 0; i <= (1 << (s-1)); i++)
411
   for (int x = 0; x < n; x++)
412
   for (int y = 0; y < n; y++)
413
   for (int z = 0; z < n; z++)
414
   {
415
      mul(temp, X[y*n + z][i], Y[z*n + x][i]);
416
      output[y*n + x][i] += temp;
417
   }
418

419
   // Shift original sequence by (2^(s-1)+1)*2^t to obtain
420
   // Q(a + (2^(s-1)+1)*2^t), Q(a + (2^(s-1)+2)*2^t), ..., Q(a + (2^s+1)*2^t).
421
   for (int i = 0; i < n*n; i++)
422
      shifter2.shift(Y[i], X[i]);
423

424
   // Shift again by 2^(s-1) to obtain
425
   // Q(a + (2^(s-1)+1)*2^t + 2^(s-1)), Q(a + (2^(s-1)+2)*2^t + 2^(s-1)), ...,
426
   //                                             Q(a + (2^s+1)*2^t + 2^(s-1)).
427
   for (int i = 0; i < n*n; i++)
428
      shifter1.shift(X[i], Y[i]);
429
   
430
   // Multiply matrices to obtain
431
   // P(a + (2^(s-1)+1)*2^t), P(a + (2^(s-1)+2)*2^t), ..., P(a + (2^s+1)*2^t).
432
   // (we throw out the last one since it's surplus to requirements)
433
   for (int i = 0; i < (1 << (s-1)); i++)
434
   for (int x = 0; x < n; x++)
435
   for (int y = 0; y < n; y++)
436
   for (int z = 0; z < n; z++)
437
   {
438
      mul(temp, Y[y*n + z][i], X[z*n + x][i]);
439
      output[y*n + x][i + (1 << (s-1)) + 1] += temp;
440
   }
441
}
442

443

444

445
/* ============================================================================
446

447
   General evaluation stuff
448

449
This section essentially implements Corollary 10 of [BGS].
450

451
============================================================================ */
452

453

454
/*
455
This struct stores the product tree associated to a vector a[0], ..., a[n-1].
456

457
The top node stores the polynomial product
458
     (x - a[0]) ... (x - a[n-1]).
459
The two children nodes store
460
     (x - a[0]) ... (x - a[m-1])
461
and
462
     (x - a[m]) ... (x - a[n-1])
463
where m = floor(n/2). This continues recursively until we reach n = 1,
464
in which case just the polynomial x - a[0] is stored, and no children.
465

466
*/
467
template <typename SCALAR, typename POLY, typename VECTOR>
468
struct ProductTree
469
{
470
   // polynomial product stored at this node
471
   POLY poly;
472

473
   // children for left and right halves, if deg(poly) > 1
474
   ProductTree* child1;
475
   ProductTree* child2;
476
   
477
   // These are temp polys used by the Evaluator and Interpolator classes.
478
   // It's not very hygienic to keep them here... but it makes things more
479
   // efficient, because we need two temps for each node, and this prevent
480
   // unnecessary reallocations. (The lengths will be the same on repeated
481
   // calls to evaluate() and interpolate().)
482
   POLY scratch1, scratch2;
483

484
   // Constructs product tree for the supplied vector.
485
   ProductTree(const VECTOR& points)
486
   {
487
      build(points, 0, points.length());
488
   }
489

490
   ProductTree(const VECTOR& points, int start, int end)
491
   {
492
      build(points, start, end);
493
   }
494
   
495
   // Constructs product tree recursively for the subset [start, end) of
496
   // the supplied vector.
497
   void build(const VECTOR& points, int start, int end)
498
   {
499
      assert(end - start >= 1);
500
      assert(start >= 0);
501
      assert(end <= points.length());
502
   
503
      if (end - start == 1)
504
      {
505
         SetCoeff(poly, 1, 1);
506
         SetCoeff(poly, 0, -points[start]);
507
      }
508
      else
509
      {
510
         int m = (end - start) / 2;
511
         child1 = new ProductTree(points, start, start + m);
512
         child2 = new ProductTree(points, start + m, end);
513
         mul(poly, child1->poly, child2->poly);
514
      }
515
   }
516

517
   ~ProductTree()
518
   {
519
      if (deg(poly) > 1)
520
      {
521
         delete child1;
522
         delete child2;
523
      }
524
   }
525
};
526

527

528

529
/*
530
Given a list of evaluation points a[0], ..., a[n-1], this struct stores some
531
precomputed information to permit evaluating an arbitrary polynomial at those
532
points.
533
*/
534
template <typename SCALAR, typename POLY,
535
          typename POLYMODULUS, typename VECTOR>
536
struct Evaluator
537
{
538
   // The product tree for the evaluation points
539
   ProductTree<SCALAR, POLY, VECTOR>* tree;
540

541
   // A list of NTL ZZ_pXModulus/zz_pXModulus objects corresponding to the
542
   // polynomials in the product tree, in the order that they get used as the
543
   // tree is traversed in recursive_evaluate().
544
   vector<POLYMODULUS> moduli;
545

546
   // Constructs evaluator object for the given list of evaluation points
547
   Evaluator(const VECTOR& points)
548
   {
549
      assert(points.length() >= 1);
550
      tree = new ProductTree<SCALAR, POLY, VECTOR>(points);
551
      moduli.reserve(2*points.length());
552
      build(tree);
553
      assert(moduli.size() <= 2*points.length());
554
   }
555
   
556
   // Compute modulus objects for each polynomial under the supplied node of
557
   // the product tree; appends them in traversal order to "moduli".
558
   void build(const ProductTree<SCALAR, POLY, VECTOR>* node)
559
   {
560
      if (deg(node->poly) > 1)
561
      {
562
         moduli.push_back(POLYMODULUS(node->poly));
563
         build(node->child1);
564
         build(node->child2);
565
      }
566
   }
567
   
568
   ~Evaluator()
569
   {
570
      delete tree;
571
   }
572

573
   // Evaluates the input polynomial at the evaluation points, writes the
574
   // results to output. The output array must have the correct length.
575
   void evaluate(VECTOR& output, const POLY& input)
576
   {
577
      recursive_evaluate(output, input, tree, 0, 0);
578
   }
579

580
   // Evaluates the input polynomial at the subset [start, end) of the
581
   // evaluation points, which should correspond to the supplied product tree
582
   // node. (The length of the interval is implied by the degree of the poly
583
   // at that node.) Writes the output to the subset [start, end) of the
584
   // output array. The index parameter indicates which modulus in "moduli"
585
   // to use for this node of the tree. The return value is the index for
586
   // the modulus that should be used immediately after this call.
587
   int recursive_evaluate(VECTOR& output, const POLY& input,
588
                          ProductTree<SCALAR, POLY, VECTOR>* node,
589
                          int start, int index)
590
   {
591
      if (deg(node->poly) == 1)
592
      {
593
         eval(output[start], input, -coeff(node->poly, 0));
594
      }
595
      else
596
      {
597
         rem(node->scratch1, input, moduli[index++]);
598
         index = recursive_evaluate(output, node->scratch1, node->child1,
599
                                    start, index);
600
         index = recursive_evaluate(output, node->scratch1, node->child2,
601
                                    start + deg(node->child1->poly), index);
602
      }
603
      return index;
604
   }
605
};
606

607

608
/*
609
Given an integer L >= 1, this struct does some precomputations to permit
610
interpolating a polynomial whose values at 0, 1, ..., L are known.
611

612
PRECONDITIONS:
613
   1, 2, ..., L must be invertible.
614

615
*/
616
template <typename SCALAR, typename POLY, typename VECTOR>
617
struct Interpolator
618
{
619
   ProductTree<SCALAR, POLY, VECTOR>* tree;
620
   int L;
621

622
   // input_twist is a vector of length L+1.
623
   // The i-th entry is \prod_{0 <= j <= L, j != i} (i-j)^(-1).
624
   VECTOR input_twist;
625
   
626
   // vector of length L+1, used in interpolate()
627
   VECTOR temp;
628

629
   // Performs various precomputations for the given L.
630
   Interpolator(int L)
631
   {
632
      this->L = L;
633
      temp.SetLength(L+1);
634

635
      // Build a product tree for the evaluation points
636
      for (int i = 0; i <= L; i++)
637
         temp[i] = i;
638
      tree = new ProductTree<SCALAR, POLY, VECTOR>(temp);
639

640
      // prod = (L!)^(-1)
641
      SCALAR prod;
642
      prod = 1;
643
      for (int i = 2; i <= L; i++)
644
         mul(prod, prod, i);
645
      prod = 1 / prod;
646
      
647
      // input_twist[i] = (i!)^(-1),   0 <= i <= L
648
      input_twist.SetLength(L+1);
649
      input_twist[L] = prod;
650
      for (int i = L; i >= 1; i--)
651
         mul(input_twist[i-1], input_twist[i], i);
652
      
653
      // input_twist[i] = \prod_{0 <= j <= L, j != i} (i-j)^(-1).
654
      for (int i = 0; i <= L/2; i++)
655
      {
656
         mul(input_twist[i], input_twist[i], input_twist[L-i]);
657
         input_twist[L-i] = input_twist[i];
658
      }
659
      for (int i = L-1; i >= 0; i -= 2)
660
         NTL::negate(input_twist[i], input_twist[i]);
661
   }
662
   
663
   ~Interpolator()
664
   {
665
      delete tree;
666
   }
667
   
668
   
669
   // Returns the polynomial
670
   //    \sum_{i=start}^{end-1} values[i] * (x-start) (x-start+1) ... (x-end-1)
671
   // where [start, end) is the interval associated to the supplied product
672
   // tree node, and where the (x-i) term is omitted in each product.
673
   void combine(POLY& output, const VECTOR& values,
674
                ProductTree<SCALAR, POLY, VECTOR>* node, int start)
675
   {
676
      if (deg(node->poly) == 1)
677
      {
678
         // base case
679
         clear(output);
680
         SetCoeff(output, 0, values[start]);
681
      }
682
      else
683
      {
684
         // recursively build up from two halves
685
         // i.e. if f1, f2 are the results of "combine" for the two halves,
686
         // and if p1, p2 are the associated product tree polys, we compute
687
         // f1*p2 + f2*p1
688

689
         combine(node->scratch1, values, node->child1, start);
690
         mul(output, node->scratch1, node->child2->poly);
691

692
         combine(node->scratch1, values, node->child2,
693
                 start + deg(node->child1->poly));
694
         mul(node->scratch2, node->scratch1, node->child1->poly);
695
         
696
         add(output, output, node->scratch2);
697
      }
698
   }
699
   
700
   // Returns a polynomial F(x) of degree at most L such that F(i) = values[i]
701
   // for each 0 <= i <= L.
702
   void interpolate(POLY& output, const VECTOR& values)
703
   {
704
      assert(values.length() == L+1);
705

706
      // multiply input values pointwise by input_twist; this corrects for
707
      // the factor (i-0) (i-1) ... (i-L) (where the i-i factor is omitted).
708
      for (int i = 0; i <= L; i++)
709
         mul(temp[i], values[i], input_twist[i]);
710
   
711
      // do the interpolation
712
      combine(output, temp, tree, 0);
713
   }
714
};
715

716

717

718
/* ============================================================================
719

720
   Matrix products over arbitrary, relatively short intervals
721
   
722
This section implements something similar to steps 1, 2, ... and the final
723
refining step of Theorem 15 of [BGS].
724

725
============================================================================ */
726

727

728
/*
729
Let M0 and M1 be matrices of constants. This function evaluates
730
   M(x) = M0 + x*M1
731
at x = a.
732

733
The output matrix must already have the correct dimensions.
734

735
*/
736
template <typename SCALAR, typename MATRIX>
737
void eval_matrix(MATRIX& output, const MATRIX& M0, const MATRIX& M1,
738
                 const SCALAR& a)
739
{
740
   int n = M0.NumRows();
741
   for (int x = 0; x < n; x++)
742
   for (int y = 0; y < n; y++)
743
   {
744
      mul(output[x][y], a, M1[x][y]);
745
      add(output[x][y], output[x][y], M0[x][y]);
746
   }
747
}
748

749

750

751
/*
752
Similar to ntl_interval_products. This is used as a subroutine of
753
ntl_interval_products() to handle the smaller "refining" subintervals.
754
Its asymptotic complexity theoretically has an extra logarithmic factor
755
over that of ntl_interval_products().
756

757
PRECONDITIONS:
758
   Let d = sum of lengths of intervals. Then 2, 3, ... 1 + floor(sqrt(d)) must
759
   all be invertible.
760

761
*/
762
template <typename SCALAR, typename POLY, typename POLYMODULUS,
763
          typename VECTOR, typename MATRIX>
764
void ntl_short_interval_products(vector<MATRIX>& output,
765
                                 const MATRIX& M0, const MATRIX& M1,
766
                                 const vector<ZZ>& target)
767
{
768
   output.clear();
769

770
   if (target.size() == 0)
771
      return;
772

773
   int dim = M0.NumRows();
774
   int num_intervals = target.size() / 2;
775

776
   // Determine maximum target interval length
777
   int max_length = -1;
778
   for (int i = 0; i < target.size(); i += 2)
779
   {
780
      int temp = to_ulong(target[i+1] - target[i]);
781
      if (temp > max_length)
782
         max_length = temp;
783
   }
784

785
   // Select an appropriate length for the matrix products we'll use
786
   int L, max_eval_points;
787
   if (max_length > 2*num_intervals)
788
   {
789
      // The intervals are still pretty long relative to the number of
790
      // intervals, so we're only going to do a single multipoint
791
      // evaluation.
792
      L = 1 + to_ulong(SqrRoot(num_intervals * to_ZZ(max_length)));
793
      max_eval_points = L;
794
   }
795
   else
796
   {
797
      // The intervals are getting pretty short, so we probably will need
798
      // to do several shorter multipoint evaluations.
799
      L = 1 + max_length/2;
800
      max_eval_points = num_intervals;
801
   }
802

803
   // =========================================================================
804
   // Step 1: compute entries of M(X, X+L) as polynomials in X.
805

806
   vector<POLY> polys(dim*dim);
807
   {
808
      // left_accum[i]  = M(L-i-1, L)  for 0 <= i <= L-1
809
      // right_accum[i] = M(L, L+i+1)  for 0 <= i <= L-1
810
      vector<MATRIX> left_accum(L), right_accum(L);
811

812
      MATRIX temp;
813
      temp.SetDims(dim, dim);
814
      
815
      left_accum[0].SetDims(dim, dim);
816
      eval_matrix<SCALAR, MATRIX>(left_accum[0], M0, M1, to_scalar<SCALAR>(L));
817
      for (int i = L-1; i >= 1; i--)
818
      {
819
         eval_matrix<SCALAR, MATRIX>(temp, M0, M1, to_scalar<SCALAR>(i));
820
         mul(left_accum[L-i], temp, left_accum[L-i-1]);
821
      }
822
      
823
      right_accum[0].SetDims(dim, dim);
824
      eval_matrix<SCALAR, MATRIX>(right_accum[0], M0, M1,
825
                                  to_scalar<SCALAR>(L+1));
826
      for (int i = 1; i <= L-1; i++)
827
      {
828
         eval_matrix<SCALAR, MATRIX>(temp, M0, M1, to_scalar<SCALAR>(L+1+i));
829
         mul(right_accum[i], right_accum[i-1], temp);
830
      }
831
      
832
      // Use left_accum and right_accum to compute:
833
      // initial[i] = M(i, L+i)    for 0 <= i <= L
834
      // i.e. initial[i] are the values of M(X, X+L) at X = 0, 1, ..., L.
835
      vector<MATRIX> initial(L+1);
836
      initial[0] = left_accum.back();
837
      initial[L] = right_accum.back();
838
      for (int i = 1; i <= L-1; i++)
839
         mul(initial[i], left_accum[L-1-i], right_accum[i-1]);
840
         
841
      // Now interpolate entries of initial[i] to get entries of M(X, X+L)
842
      // as polynomials of degree L.
843
      Interpolator<SCALAR, POLY, VECTOR> interpolator(L);
844
      VECTOR values;
845
      values.SetLength(L+1);
846
      for (int x = 0; x < dim; x++)
847
      for (int y = 0; y < dim; y++)
848
      {
849
         for (int j = 0; j <= L; j++)
850
            values[j] = initial[j][y][x];
851
         interpolator.interpolate(polys[y*dim + x], values);
852
      }
853
   }
854

855
   // =========================================================================
856
   // Step 2: decompose intervals into subintervals of length L which we'll
857
   // attack by direct multipoint evaluation, plus leftover pieces that we'll
858
   // handle with a recursive call to ntl_short_interval_products().
859

860
   // eval_points holds all the values of X for which we want to
861
   // evaluate M(X, X+L)
862
   VECTOR eval_points;
863
   eval_points.SetMaxLength(max_eval_points);
864

865
   // leftover_target is the list of leftover intervals that we're going to
866
   // later do recursively
867
   vector<ZZ> leftover_target;
868
   leftover_target.reserve(target.size());
869

870
   ZZ current, next;
871
   for (int i = 0; i < target.size(); i += 2)
872
   {
873
      current = target[i];
874
      next = current + L;
875
      while (next <= target[i+1])
876
      {
877
         // [current, next) fits inside this interval, so peel it off into
878
         // eval_points
879
         append(eval_points, to_scalar<SCALAR>(current));
880
         swap(current, next);
881
         next = current + L;
882
      }
883
      if (current < target[i+1])
884
      {
885
         // the rest of this interval is too short to handle with M(X, X+L),
886
         // so put it in the leftover bin
887
         leftover_target.push_back(current);
888
         leftover_target.push_back(target[i+1]);
889
      }
890
   }
891
   
892
   // =========================================================================
893
   // Step 3: recursively handle leftover pieces
894

895
   // leftover_matrices[i] holds the matrix for leftover interval #i
896
   vector<MATRIX> leftover_matrices;
897
   ntl_short_interval_products<SCALAR, POLY, POLYMODULUS, VECTOR, MATRIX>
898
                    (leftover_matrices, M0, M1, leftover_target);
899

900
   // =========================================================================
901
   // Step 4: evaluate M(X, X+L) at each of the evaluation points. We do this
902
   // by breaking up the list of evaluation points into blocks of length at
903
   // most L+1, and using multipoint evaluation on each block.
904

905
   // main_matrices[i] will hold M(X, X+L) for the i-th evaluation point X.
906
   vector<MATRIX> main_matrices(eval_points.length());
907
   for (int i = 0; i < main_matrices.size(); i++)
908
      main_matrices[i].SetDims(dim, dim);
909

910
   VECTOR block, values;
911
   block.SetMaxLength(L+1);
912
   values.SetMaxLength(L+1);
913

914
   // for each block...
915
   for (int i = 0; i < eval_points.length(); i += (L+1))
916
   {
917
      // determine length of this block, which is at most L+1
918
      int length = eval_points.length() - i;
919
      if (length >= (L+1))
920
         length = (L+1);
921
      block.SetLength(length);
922

923
      // construct Evaluator object for evaluating at these points
924
      for (int j = 0; j < length; j++)
925
         block[j] = eval_points[i+j];
926
      Evaluator<SCALAR, POLY, POLYMODULUS, VECTOR> evaluator(block);
927

928
      // evaluate each entry of M(X, X+L) at those points
929
      for (int x = 0; x < dim; x++)
930
      for (int y = 0; y < dim; y++)
931
      {
932
         evaluator.evaluate(values, polys[y*dim + x]);
933
         for (int k = 0; k < length; k++)
934
            main_matrices[i+k][y][x] = values[k];
935
      }
936
   }
937

938
   // =========================================================================
939
   // Step 5: merge together the matrices obtained from the multipoint
940
   // evaluation step and the recursive leftover interval step.
941
   
942
   output.clear();
943
   output.resize(target.size() / 2);
944
   for (int i = 0; i < target.size()/2; i++)
945
      output[i].SetDims(dim, dim);
946

947
   int main_index = 0;       // index into main_matrices
948
   int leftover_index = 0;   // index into leftover_matrices
949

950
   MATRIX temp;
951
   temp.SetDims(dim, dim);
952

953
   for (int i = 0; i < target.size(); i += 2)
954
   {
955
      current = target[i];
956
      next = current + L;
957
      ident(output[i/2], dim);
958
      
959
      while (next <= target[i+1])
960
      {
961
         // merge in a matrix from multipoint evaluation step
962
         mul(temp, output[i/2], main_matrices[main_index++]);
963
         swap(temp, output[i/2]);
964
         swap(current, next);
965
         next = current + L;
966
      }
967
      if (current < target[i+1])
968
      {
969
         // merge in a matrix from leftover interval step
970
         mul(temp, output[i/2], leftover_matrices[leftover_index++]);
971
         swap(temp, output[i/2]);
972
      }
973
   }
974
}
975

976

977
/* ============================================================================
978

979
   Matrix products over arbitrary, long intervals
980
   
981
This section implements an algorithm similar to Theorem 15 of [BGS].
982

983
============================================================================ */
984

985

986
/*
987
See interval_products_wrapper().
988
  
989
NOTE:
990
   This algorithm works best if the intervals are very long and don't have
991
   much space between them. The case where the gaps are relatively large is
992
   best handled by ntl_short_interval_products().
993
   
994
*/
995
template <typename SCALAR, typename POLY, typename POLYMODULUS,
996
          typename VECTOR, typename MATRIX, typename FFTREP>
997
void ntl_interval_products(vector<MATRIX>& output,
998
                           const MATRIX& M0, const MATRIX& M1,
999
                           const vector<ZZ>& target)
1000
{
1001
   assert(target.size() % 2 == 0);
1002
   output.resize(target.size() / 2);
1003

1004
   int dim = M0.NumRows();
1005
   assert(dim == M0.NumCols());
1006
   assert(dim == M1.NumRows());
1007
   assert(dim == M1.NumCols());
1008
      
1009
   // =========================================================================
1010
   // Step 0: get as many intervals as possible using dyadic_evaluation().
1011
   
1012
   // step0_matrix[i] is the transition matrix between step0_index[2*i]
1013
   // and step0_index[2*i+1].
1014
   vector<MATRIX> step0_matrix;
1015
   vector<ZZ> step0_index;
1016
   // preallocate the maximum number of matrices that could arise (plus safety)
1017
   int reserve_size = target.size() +
1018
                      4*NumBits(target.back() - target.front());
1019
   step0_matrix.reserve(reserve_size);
1020
   step0_index.reserve(2 * reserve_size);
1021
   
1022
   ZZ current_index = target.front();
1023
   int next_target = 0;   // index into "target" array
1024

1025
   // This flag indicates whether the last entry of step0_matrix is
1026
   // still accumulating matrices (in which the right endpoint of the
1027
   // corresponding interval hasn't been written to step0_index yet).
1028
   int active = 0;
1029

1030
   MATRIX temp_mat;
1031
   temp_mat.SetDims(dim, dim);
1032
   
1033
   while (current_index < target.back() - 3)
1034
   {
1035
      // find largest t such that 2^t*(2^t + 1) <= remaining distance to go
1036
      ZZ remaining = target.back() - current_index;
1037
      int t = 0;
1038
      while ((to_ZZ(1) << (2*t)) + (1 << t) <= remaining)
1039
         t++;
1040
      t--;
1041
      
1042
      // evaluate matrices for 2^t+1 intervals of length 2^t
1043
      vector<VECTOR> dyadic_output(dim*dim);
1044
      for (int i = 0; i < dim*dim; i++)
1045
         dyadic_output[i].SetLength((1 << t) + 1);
1046
      dyadic_evaluation<SCALAR, POLY, VECTOR, MATRIX, FFTREP>
1047
               (dyadic_output, M0, M1, t, t, to_scalar<SCALAR>(current_index));
1048
      
1049
      // Walk through the intervals we just computed. Find maximal subsequences
1050
      // of intervals none of which contain any target endpoints. Merge them
1051
      // together (by multiplying the appropriate matrices) and store results
1052
      // in step0_matrix, step0_index.
1053
      
1054
      SCALAR scratch;
1055

1056
      for (int i = 0; i <= (1 << t); i++, current_index += (1 << t))
1057
      {
1058
         assert(next_target == target.size() ||
1059
                target[next_target] >= current_index);
1060

1061
         // Skip over target endpoints which are exactly at the beginning
1062
         // of this interval
1063
         while ((next_target < target.size()) &&
1064
                (target[next_target] == current_index))
1065
         {
1066
            // if there's an active matrix, don't forget to close it off
1067
            if (active)
1068
            {
1069
               step0_index.push_back(current_index);
1070
               active = 0;
1071
            }
1072
            next_target++;
1073
         }
1074

1075
         // Test if any target endpoints are strictly within this interval.
1076
         if ((next_target == target.size()) ||
1077
             (target[next_target] >= current_index + (1 << t)))
1078
         {
1079
            // There are no target endpoints in this interval.
1080
            if (active)
1081
            {
1082
               // Merge this matrix with the active one
1083
               MATRIX& active_mat = step0_matrix.back();
1084
               for (int y = 0; y < dim; y++)
1085
               for (int x = 0; x < dim; x++)
1086
               {
1087
                  SCALAR& accum = temp_mat[y][x];
1088
                  accum = 0;
1089
                  for (int z = 0; z < dim; z++)
1090
                  {
1091
                     mul(scratch, active_mat[y][z],
1092
                                  dyadic_output[z*dim + x][i]);
1093
                     add(accum, accum, scratch);
1094
                  }
1095
               }
1096
               
1097
               swap(temp_mat, active_mat);
1098
            }
1099
            else
1100
            {
1101
               // Make this matrix into a new active one
1102
               step0_index.push_back(current_index);
1103
               step0_matrix.resize(step0_matrix.size() + 1);
1104
               MATRIX& X = step0_matrix.back();
1105
               X.SetDims(dim, dim);
1106
               for (int y = 0; y < dim; y++)
1107
               for (int x = 0; x < dim; x++)
1108
                  X[y][x] = dyadic_output[y*dim + x][i];
1109
               active = 1;
1110
            }
1111
         }
1112
         else
1113
         {
1114
            // There are target endpoints in this interval.
1115
            if (active)
1116
            {
1117
               // If there is still an active matrix, close it off.
1118
               step0_index.push_back(current_index);
1119
               active = 0;
1120
            }
1121
            
1122
            // skip over any other endpoints in this interval
1123
            while ((next_target < target.size()) &&
1124
                   (target[next_target] < current_index + (1 << t)))
1125
            {
1126
               next_target++;
1127
            }
1128
         }
1129
      }
1130
   }
1131

1132
   // If there is still an active matrix, close it off.
1133
   if (active)
1134
      step0_index.push_back(current_index);
1135
   
1136
   assert(step0_index.size() == 2*step0_matrix.size());
1137
   
1138
   // =========================================================================
1139
   // Step 1: Make a list of all subintervals that we are going to need in
1140
   // the refining steps.
1141

1142
   int next_step0 = 0;        // index into step0_index
1143
   vector<ZZ> step1_index;    // list of pairs of endpoints of needed intervals
1144
   step1_index.reserve(2*target.size());
1145

1146
   // add sentinel endpoints to make the next loop simpler:
1147
   step0_index.push_back(target.back() + 10);
1148
   step0_index.push_back(target.back() + 20);
1149

1150
   for (next_target = 0; next_target < target.size(); next_target += 2)
1151
   {
1152
      // skip dyadic intervals that come before this target interval
1153
      while (step0_index[next_step0+1] <= target[next_target])
1154
         next_step0 += 2;
1155

1156
      if (step0_index[next_step0] < target[next_target+1])
1157
      {
1158
         // The next dyadic interval starts before the end of this target
1159
         // interval.
1160
         if (step0_index[next_step0] > target[next_target])
1161
         {
1162
            // The next dyadic interval starts strictly within this target
1163
            // interval, so we need to create a refining subinterval for the
1164
            // initial segment of this target interval.
1165
            step1_index.push_back(target[next_target]);
1166
            step1_index.push_back(step0_index[next_step0]);
1167
         }
1168
         
1169
         // Skip over dyadic intervals to find the last one still contained
1170
         // within this target interval.
1171
         while (step0_index[next_step0+3] <= target[next_target+1])
1172
            next_step0 += 2;
1173
            
1174
         if (step0_index[next_step0+1] < target[next_target+1])
1175
         {
1176
            // The next dyadic interval finishes strictly within this target
1177
            // interval, so we need to create a refining subinterval for the
1178
            // final segment of this target interval.
1179
            step1_index.push_back(step0_index[next_step0+1]);
1180
            step1_index.push_back(target[next_target+1]);
1181
         }
1182
         
1183
         // Move on to next dyadic interval
1184
         next_step0 += 2;
1185
      }
1186
      else
1187
      {
1188
         // The next dyadic interval starts beyond (or just at the end of)
1189
         // this target interval, so we need to create a refining subinterval
1190
         // for this *whole* target interval.
1191
         step1_index.push_back(target[next_target]);
1192
         step1_index.push_back(target[next_target+1]);
1193
      }
1194
   }
1195

1196
   // remove sentinels for my sanity
1197
   step0_index.pop_back();
1198
   step0_index.pop_back();
1199

1200
   // Step 1b: Compute matrix products over those refining subintervals.
1201
   vector<MATRIX> step1_matrix;
1202
   ntl_short_interval_products<SCALAR, POLY, POLYMODULUS, VECTOR, MATRIX>
1203
                                   (step1_matrix, M0, M1, step1_index);
1204

1205
   assert(step1_index.size() == 2 * step1_matrix.size());
1206
   
1207
   // =========================================================================
1208
   // Step 2: Merge together the dyadic intervals and refining intervals into
1209
   // a single list, in sorted order.
1210

1211
   vector<MATRIX> step2_matrix(step0_matrix.size() + step1_matrix.size());
1212
   vector<ZZ> step2_index(step0_index.size() + step1_index.size());
1213

1214
   // add sentinels to make the next loop simpler
1215
   step0_index.push_back(target.back() + 10);
1216
   step0_index.push_back(target.back() + 20);
1217
   step1_index.push_back(target.back() + 10);
1218
   step1_index.push_back(target.back() + 20);
1219
   
1220
   next_step0 = 0;       // index into step0_matrix
1221
   int next_step1 = 0;   // index into step1_matrix
1222
   
1223
   for (int next_step2 = 0; next_step2 < step2_matrix.size(); next_step2++)
1224
   {
1225
      if (step0_index[2*next_step0] < step1_index[2*next_step1])
1226
      {
1227
         // grab a matrix and pair of indices from step0
1228
         swap(step2_matrix[next_step2], step0_matrix[next_step0]);
1229
         step2_index[2*next_step2] = step0_index[2*next_step0];
1230
         step2_index[2*next_step2+1] = step0_index[2*next_step0+1];
1231
         next_step0++;
1232
      }
1233
      else
1234
      {
1235
         // grab a matrix and pair of indices from step1
1236
         swap(step2_matrix[next_step2], step1_matrix[next_step1]);
1237
         step2_index[2*next_step2] = step1_index[2*next_step1];
1238
         step2_index[2*next_step2+1] = step1_index[2*next_step1+1];
1239
         next_step1++;
1240
      }
1241
   }
1242

1243
   // remove sentinels for my sanity
1244
   step0_index.pop_back();
1245
   step0_index.pop_back();
1246
   step1_index.pop_back();
1247
   step1_index.pop_back();
1248
   
1249
   assert(step2_index.size() == 2*step2_matrix.size());
1250

1251
   // =========================================================================
1252
   // Step 3: Walk through target intervals, and merge together appropriate
1253
   // intervals from step2 to get those target intervals.
1254

1255
   int next_step2 = 0;    // index into step2_matrix
1256

1257
   // add sentinels to make the next loop simpler
1258
   step2_index.push_back(target.back() + 1);
1259
   step2_index.push_back(target.back() + 2);
1260
   
1261
   for (int next_target = 0; next_target < target.size(); next_target += 2)
1262
   {
1263
      // search for step2 interval matching the start of this target interval
1264
      while (step2_index[2*next_step2] < target[next_target])
1265
         next_step2++;
1266
         
1267
      assert(step2_index[2*next_step2] == target[next_target]);
1268
      
1269
      // merge together matrices for step2 intervals contained in this target
1270
      // interval
1271
      swap(step2_matrix[next_step2++], output[next_target/2]);
1272
      while (step2_index[2*next_step2+1] <= target[next_target+1])
1273
      {
1274
         mul(temp_mat, output[next_target/2], step2_matrix[next_step2]);
1275
         swap(temp_mat, output[next_target/2]);
1276
         next_step2++;
1277
      }
1278
   }
1279
}
1280

1281

1282
// explicit instantiations for zz_p and ZZ_p versions:
1283

1284

1285
template void ntl_interval_products
1286
      <ZZ_p, ZZ_pX, ZZ_pXModulus, vec_ZZ_p, mat_ZZ_p, FFTRep>
1287
      (vector<mat_ZZ_p>& output, const mat_ZZ_p& M0, const mat_ZZ_p& M1,
1288
                             const vector<ZZ>& target);
1289

1290

1291
template void ntl_interval_products
1292
      <zz_p, zz_pX, zz_pXModulus, vec_zz_p, mat_zz_p, fftRep>
1293
      (vector<mat_zz_p>& output, const mat_zz_p& M0, const mat_zz_p& M1,
1294
                             const vector<ZZ>& target);
1295

1296

1297
};    // namespace hypellfrob
1298

1299

1300
// ----------------------- end of file
1301

1302
Product

Resources

Company