CoCalc -- recurrences_zn

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/schemes/hyperelliptic_curves/hypellfrob/recurrences_zn_poly.cpp
⁴¹⁰⁸ views
1
/* ============================================================================
2

3
   recurrences_zn_poly.cpp:  recurrences solved via zn_poly arithmetic
4
   
5
   This file is part of hypellfrob (version 2.1.1).
6

7
   Copyright (C) 2007, 2008, David Harvey
8

9
   This program is free software; you can redistribute it and/or modify
10
   it under the terms of the GNU General Public License as published by
11
   the Free Software Foundation; either version 2 of the License, or
12
   (at your option) any later version.
13

14
   This program is distributed in the hope that it will be useful,
15
   but WITHOUT ANY WARRANTY; without even the implied warranty of
16
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
   GNU General Public License for more details.
18

19
   You should have received a copy of the GNU General Public License along
20
   with this program; if not, write to the Free Software Foundation, Inc.,
21
   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22

23
============================================================================ */
24

25

26
#include "recurrences_zn_poly.h"
27

28

29
NTL_CLIENT
30

31

32
namespace hypellfrob {
33

34

35
Shifter::~Shifter()
36
{
37
   zn_array_mulmid_precomp1_clear(kernel_precomp);
38
   free(input_twist);
39
}
40

41

42
Shifter::Shifter(ulong d, ulong a, ulong b, const zn_mod_t mod)
43
{
44
   this->d = d;
45
   this->mod = mod;
46

47
   input_twist = (ulong*) malloc(sizeof(ulong) * (3*d + 3));
48
   output_twist = input_twist + d + 1;
49
   scratch = output_twist + d + 1;
50

51
   ZZ modulus;
52
   modulus = zn_mod_get (mod);
53
   
54
   // ------------------------ compute input_twist -------------------------
55

56
   // prod = (d!)^(-1)
57
   ulong prod = 1;
58
   for (ulong i = 2; i <= d; i++)
59
      prod = zn_mod_mul(prod, i, mod);
60
   prod = to_ulong(InvMod(to_ZZ(prod), modulus));
61
   
62
   // input_twist[i] = ((d-i)!)^(-1)
63
   input_twist[0] = prod;
64
   for (ulong i = 1; i <= d; i++)
65
      input_twist[i] = zn_mod_mul(input_twist[i-1], d - (i-1), mod);
66

67
   // input_twist[i] = ((d-i)!*i!)^(-1)
68
   for (ulong i = 0; i <= d/2; i++)
69
   {
70
      input_twist[i] = zn_mod_mul(input_twist[i], input_twist[d-i], mod);
71
      input_twist[d-i] = input_twist[i];
72
   }
73

74
   // input_twist[i] = \prod_{0 <= j <= d, j != i} (i-j)^(-1)
75
   //                = (-1)^(d-i) ((d-i)!*i!)^(-1)
76
   for (long i = d - 1; i >= 0; i -= 2)
77
      input_twist[i] = zn_mod_neg(input_twist[i], mod);
78

79
   // ----------------- compute output_twist and kernel --------------------
80
   
81
   // need some temp space:
82
   // c, accum, accum_inv each of length 2d+1
83
   ulong* c = (ulong*) malloc(sizeof(ulong) * (6*d+3));
84
   ulong* accum = c + 2*d + 1;
85
   ulong* accum_inv = accum + 2*d + 1;
86
   ulong* kernel = c;   // overwrites c
87

88
   // c[i] = c_i = a + (i-d)*b     for 0 <= i <= 2d
89
   c[0] = zn_mod_sub(a, zn_mod_mul(zn_mod_reduce(d, mod), b, mod), mod);
90
   for (ulong i = 1; i <= 2*d; i++)
91
      c[i] = zn_mod_add(c[i-1], b, mod);
92

93
   // accum[i] = c_0 * c_1 * ... * c_i    for 0 <= i <= 2d
94
   accum[0] = c[0];
95
   for (ulong i = 1; i <= 2*d; i++)
96
      accum[i] = zn_mod_mul(accum[i-1], c[i], mod);
97

98
   // accum_inv[i] = (c_0 * c_1 * ... * c_i)^(-1)    for 0 <= i <= 2d
99
   accum_inv[2*d] = to_ulong(InvMod(to_ZZ(accum[2*d]), modulus));
100

101
   for (long i = 2*d - 1; i >= 0; i--)
102
      accum_inv[i] = zn_mod_mul(accum_inv[i+1], c[i+1], mod);
103

104
   // output_twist[i] = b^{-d} * c_i * c_{i+1} * ... * c_{i+d}
105
   // for 0 <= i <= d
106
   ulong factor = to_long(PowerMod(to_ZZ(b), -((long)d), modulus));
107
   output_twist[0] = zn_mod_mul(factor, accum[d], mod);
108
   for (ulong i = 1; i <= d; i++)
109
      output_twist[i] = zn_mod_mul(zn_mod_mul(factor, accum[i+d], mod),
110
                                   accum_inv[i-1], mod);
111

112
   // kernel[i] = (c_i)^(-1)    for 0 <= i <= 2d
113
   kernel[0] = accum_inv[0];
114
   for (ulong i = 1; i <= 2*d; i++)
115
      kernel[i] = zn_mod_mul(accum_inv[i], accum[i-1], mod);
116

117
   // precompute FFT of kernel
118
   zn_array_mulmid_precomp1_init(kernel_precomp, kernel, 2*d+1, d+1, mod);
119

120
   free(c);
121
}
122

123

124
void Shifter::shift(ulong* output, const ulong* input)
125
{
126
   // multiply inputs pointwise by input_twist
127
   for (ulong i = 0; i <= d; i++)
128
      scratch[i] = zn_mod_mul(input[i], input_twist[i], mod);
129

130
   // do middle product
131
   zn_array_mulmid_precomp1_execute(output, scratch, kernel_precomp);
132

133
   // multiply outputs pointwise by output_twist
134
   for (ulong i = 0; i <= d; i++)
135
      output[i] = zn_mod_mul(output[i], output_twist[i], mod);
136
}
137

138

139
/*
140
   Checks whether large_evaluate() will succeed for these choices of k and u.
141
   Returns 1 if okay, otherwise 0.
142
*/
143
int check_params(ulong k, ulong u, const zn_mod_t mod)
144
{
145
   ulong n = zn_mod_get (mod);
146

147
   if (k >= n || u >= n)
148
      return 0;
149

150
   if (k <= 1)
151
      return 1;
152
      
153
   if (k == n - 1)
154
      return 0;
155

156
   ulong k2 = k / 2;
157

158
   // need the following elements to be invertible:
159
   // u
160
   // 1, 2, ..., k + 1
161
   // k2 + i*u for -k2 <= i <= k2
162
   ulong prod = u;
163
   for (ulong i = 2; i <= k; i++)
164
      prod = zn_mod_mul(prod, i, mod);
165
   ulong temp = zn_mod_mul(k2, zn_mod_sub(1, u, mod), mod);
166
   for (ulong i = 0; i <= 2*k2; i++)
167
   {
168
      prod = zn_mod_mul(prod, temp, mod);
169
      temp = zn_mod_add(temp, u, mod);
170
   }
171
   
172
   ZZ x, y;
173
   x = prod;
174
   y = n;
175
   if (GCD(x, y) != 1)
176
      return 0;
177

178
   // check recursively below
179
   return check_params(k2, u, mod);
180
}
181

182

183

184
/*
185
   Let M0 and M1 be square matrices of size r*r. Let M(x) = M0 + x*M1; this
186
   is a matrix of linear polys in x. The matrices M0 and M1 are passed in
187
   row-major order.
188
   
189
   Let P(x) = M(x+1) M(x+2) ... M(x+k); this is a matrix of polynomials of
190
   degree k.
191
   
192
   This class attempts to compute the matrices
193
       P(0), P(u), P(2u), ..., P(ku).
194
       
195
   Usage:
196

197
   * Call the constructor.
198

199
   * Call evaluate() with half == 0. This computes the first half of the
200
   outputs, specifically P(mu) for 0 <= m <= k2, where k2 = floor(k/2).
201
   The (i, j) entry of P(mu) is stored in output[i*r+j][m+offset]. Each array
202
   output[i*r+j] must be preallocated to length k2 + 3. (The extra two matrices
203
   at the end are used for scratch space.)
204

205
   * Call evaluate() with half == 1. This computes the second half, namely
206
   P(mu) for k2 + 1 <= m <= k. The (i, j) entry of P(mu) is stored in
207
   output[i*r+j][m-(k2+1)+offset]. Each array output[i*r+j] must be
208
   preallocated to length k2 + 3. It's okay for this second half to overwrite
209
   the first half generated earlier (in fact this property is used by
210
   zn_poly_interval_products to conserve memory).
211

212
   The computation may fail for certain bad choices of k and u. Let p = residue
213
   characteristic (i.e. assume mod is p^m for some m). Typically this function
214
   gets called for k ~ u and k*u ~ M*p, for some M much smaller than p. In this
215
   situation, I expect most choices of (k, u) are not bad. Failure is very
216
   bad: the program will crash. Luckily, you can call check_params() to test
217
   whether (k, u) is bad. If it's bad, you probably should just increment u
218
   until you find one that's not bad. (Sorry, I haven't done a proper analysis
219
   of the situation yet.) The main routines fall back on the zz_pX version if
220
   they can't find a good parameter choice.
221
   
222
   Must have 0 <= k < n and 0 < u < n.
223

224
*/
225
LargeEvaluator::LargeEvaluator(int r, ulong k, ulong u,
226
                               const vector<vector<ulong> >& M0,
227
                               const vector<vector<ulong> >& M1,
228
                               const zn_mod_t& mod) : M0(M0), M1(M1), mod(mod)
229
{
230
   assert(k < zn_mod_get(mod));
231
   assert(u < zn_mod_get(mod));
232
   assert(r >= 1);
233
   assert(k >= 1);
234

235
   this->r = r;
236
   this->k = k;
237
   this->k2 = k / 2;
238
   this->odd = k & 1;
239
   this->u = u;
240
   this->shifter = NULL;
241
}
242

243
   
244
LargeEvaluator::~LargeEvaluator()
245
{
246
   if (this->shifter)
247
      delete shifter;
248
}
249

250
   
251
void LargeEvaluator::evaluate(int half, vector<ulong_array>& output,
252
                              ulong offset)
253
{
254
   // base cases
255

256
   if (k == 0)
257
   {
258
      // identity matrix
259
      for (int x = 0; x < r; x++)
260
      for (int y = 0; y < r; y++)
261
         output[y*r + x].data[offset] = (x == y);
262
      return;
263
   }
264

265
   if (k == 1)
266
   {
267
      // evaluate M(1) (for half == 0) or M(u+1) (for half == 1)
268
      for (int x = 0; x < r; x++)
269
      for (int y = 0; y < r; y++)
270
      {
271
         ulong temp = zn_mod_add(M0[y][x], M1[y][x], mod);
272
         output[y*r + x].data[offset] = half ?
273
               zn_mod_add(temp, zn_mod_mul(u, M1[y][x], mod), mod) : temp;
274
      }
275
      return;
276
   }
277

278
   // recursive case
279
   
280
   // Let Q(x) = M(x+1) ... M(x+k2), where k2 = floor(k/2).
281
   // Then we have either
282
   //    P(x) = Q(x) Q(x+k2)           if k is even
283
   //    P(x) = Q(x) Q(x+k2) M(x+k)    if k is odd
284

285
   if (half == 0)
286
   {
287
      // This is the first time through this function.
288
      // Allocate scratch space.
289
      scratch.resize(r*r);
290
      for (int i = 0; i < r*r; i++)
291
         scratch[i].resize(k2 + 3);
292

293
      {
294
         // Recursively compute Q(0), Q(u), ..., Q(k2*u).
295
         // These are stored in scratch.
296
         LargeEvaluator recurse(r, k2, u, M0, M1, mod);
297
         recurse.evaluate_all(scratch);
298
      }
299

300
      // Precomputations for value-shifting
301
      shifter = new Shifter(k2, k2, u, mod);
302
   }
303
   else   // half == 1
304
   {
305
      // Shift original sequence by (k2+1)*u to obtain
306
      // Q((k2+1)*u), Q((k2+2)*u), ..., Q((2*k2+1)*u)
307
      Shifter big_shifter(k2, zn_mod_mul(k2+1, u, mod), u, mod);
308
      for (int i = 0; i < r*r; i++)
309
         big_shifter.shift(scratch[i].data, scratch[i].data);
310
   }
311

312
   // Let H = (k2+1)*u*half, so now scratch contains
313
   // Q(H), Q(H + u), ..., Q(H + k2*u).
314
   
315
   // Shift by k2 to obtain Q(H + k2), Q(H + u + k2), ..., Q(H + k2*u + k2).
316
   // Results are stored directly in output array. We put them one slot
317
   // to the right, to make room for inplace matrix multiplies later on.
318
   // (If k is odd, they're shifted right by yet another slot, to make room
319
   // for another round of multiplies.)
320
   for (int i = 0; i < r*r; i++)
321
      shifter->shift(output[i].data + offset + odd + 1, scratch[i].data);
322
   
323
   // If k is odd, right-multiply each Q(H + i*u + k2) by M(H + i*u + k)
324
   // (results are stored in output array, shifted one entry to the left)
325
   if (odd)
326
   {
327
      ulong_array cruft(r*r);    // for storing M(H + i*u + k)
328
      ulong point = k;           // evaluation point
329
      if (half)
330
         point = zn_mod_add(point, zn_mod_mul(k2 + 1, u, mod), mod);
331
      
332
      for (ulong i = 0; i <= k2; i++, point = zn_mod_add(point, u, mod))
333
      {
334
         // compute M(H + i*u + k) = M0 + M1*point
335
         for (int x = 0; x < r; x++)
336
         for (int y = 0; y < r; y++)
337
            cruft.data[y*r + x] = zn_mod_add(M0[y][x],
338
                                  zn_mod_mul(M1[y][x], point, mod), mod);
339

340
         // multiply
341
         for (int x = 0; x < r; x++)
342
         for (int y = 0; y < r; y++)
343
         {
344
            ulong accum = 0;
345
            for (int z = 0; z < r; z++)
346
               accum = zn_mod_add(accum,
347
                       zn_mod_mul(output[y*r + z].data[offset + i + 2],
348
                                  cruft.data[z*r + x], mod), mod);
349
            output[y*r + x].data[offset + i + 1] = accum;
350
         }
351
      }
352
   }
353
   
354
   ulong n = zn_mod_get(mod);
355

356
   // Multiply to obtain P(H), P(H + u), ..., P(H + k2*u)
357
   // (except for the last one, in the second half, if k is even)
358
   // Store results directly in output array.
359
   for (ulong i = 0; i + (half && !odd) <= k2; i++)
360
   for (int x = 0; x < r; x++)
361
   for (int y = 0; y < r; y++)
362
   {
363
      ulong sum_hi = 0;
364
      ulong sum_lo = 0;
365
      for (int z = 0; z < r; z++)
366
      {
367
         ulong hi, lo;
368
         ZNP_MUL_WIDE(hi, lo, scratch[y*r + z].data[i],
369
                              output[z*r + x].data[offset + i + 1]);
370
         ZNP_ADD_WIDE(sum_hi, sum_lo, sum_hi, sum_lo, hi, lo);
371
         if (sum_hi >= n)
372
            sum_hi -= n;
373
      }
374
      output[y*r + x].data[offset + i] =
375
                           zn_mod_reduce_wide(sum_hi, sum_lo, mod);
376
   }
377
}
378

379

380
/*
381
   Evaluates both halves, storing results in output array.
382
*/
383
void LargeEvaluator::evaluate_all(vector<ulong_array>& output)
384
{
385
   evaluate(0, output, 0);
386
   evaluate(1, output, k/2 + 1);
387
}
388

389

390

391
/*
392
See interval_products_wrapper().
393

394
NOTE 1:
395
   I haven't proved that the algorithm here always succeeds, although I expect
396
   that it almost always will, especially when p is very large. If it
397
   succeeds, it returns 1. If it fails, it returns 0 (practically instantly).
398
   In the latter case, at least the caller can fall back on
399
   ntl_interval_products().
400
   
401
NOTE 2:
402
   The algorithm here is similar to ntl_interval_products(). However, it
403
   doesn't do the "refining step" -- it just handles the smaller intervals
404
   in the naive fashion. Also, instead of breaking intervals into power-of-four
405
   lengths, it just does the whole thing in one chunk. The performance ends up
406
   being smoother, but it's harder to prove anything about avoiding
407
   non-invertible elements. Hence the caveat in Note 1.
408
  
409
*/
410
int zn_poly_interval_products(vector<vector<vector<ulong> > >& output,
411
                              const vector<vector<ulong> >& M0,
412
                              const vector<vector<ulong> >& M1,
413
                              const vector<ZZ>& target, const zn_mod_t& mod)
414
{
415
   output.resize(target.size() / 2);
416

417
   // select k such that k*(k+1) >= total length of intervals
418
   ulong k;
419
   {
420
      ZZ len = target.back() - target.front();
421
      ZZ kk = SqrRoot(len);
422
      k = to_ulong(kk);
423
      if (kk * (kk + 1) < len)
424
         k++;
425
   }
426
 
427
   int r = M0.size();
428

429
   // try to find good parameters u and k
430
   ulong u = k;
431
   for (int trial = 0; ; trial++, u++)
432
   {
433
      if (check_params(k, u, mod))
434
         break;    // found some good parameters
435
      if (trial == 5)
436
         return 0;    // too many failures, give up
437
   }
438
   
439
   ulong n = zn_mod_get(mod);
440

441
   // shift M0 over to account for starting index
442
   vector<vector<ulong> > M0_shifted = M0;
443
   ulong shift = target.front() % n;
444
   for (int x = 0; x < r; x++)
445
   for (int y = 0; y < r; y++)
446
      M0_shifted[y][x] = zn_mod_add(M0[y][x],
447
                            zn_mod_mul(shift, M1[y][x], mod), mod);
448

449
   // prepare for evaluating products over the big intervals
450
   // [0, k), [u, u+k), ..., [ku, ku+k)
451
   LargeEvaluator evaluator(r, k, u, M0_shifted, M1, mod);
452

453
   vector<ulong_array> big(r*r);
454
   for (int i = 0; i < r*r; i++)
455
      big[i].resize(k/2 + 3);    // space for half the products, plus two more
456

457
   // evaluate the first half of the products
458
   evaluator.evaluate(0, big, 0);
459
   // flag indicating which half we currently have stored in the "big" array
460
   int half = 0;
461
   
462
   vector<vector<ulong> > accum(r, vector<ulong>(r));
463
   vector<vector<ulong> > temp1(r, vector<ulong>(r));
464
   vector<vector<ulong> > temp2(r, vector<ulong>(r));
465

466
   // for each target interval....
467
   for (int i = 0; i < target.size()/2; i++)
468
   {
469
      // doing interval [s0, s1)
470
      ZZ s0 = target[2*i];
471
      ZZ s1 = target[2*i+1];
472

473
      // product accumulated so far is [t0, t1).
474
      ZZ t0 = s0;
475
      ZZ t1 = s0;
476
      for (int x = 0; x < r; x++)
477
      for (int y = 0; y < r; y++)
478
         accum[x][y] = (x == y);     // identity matrix
479
         
480
      while (t1 < s1)
481
      {
482
         // if we are exactly on the left end of a big interval, and the
483
         // big interval fits inside the target interval, then roll it in
484
         if ((t1 - target[0]) % u == 0  &&  t1 + k <= s1)
485
         {
486
            // compute which "big" interval we are rolling in
487
            int index = to_ulong((t1 - target[0]) / u);
488
            
489
            if (index >= k/2 + 1)
490
            {
491
               // if the "big" interval is in the second half, and we haven't
492
               // computed the second half of the intervals yet, then go and
493
               // compute them (overwriting the first half)
494
               if (half == 0)
495
               {
496
                  evaluator.evaluate(1, big, 0);
497
                  half = 1;
498
               }
499
               index -= (k/2 + 1);
500
            }
501
            
502
            for (int x = 0; x < r; x++)
503
            for (int y = 0; y < r; y++)
504
               temp1[y][x] = big[y*r + x].data[index];
505
         
506
            t1 += k;
507
         }
508
         else
509
         {
510
            // otherwise just multiply by the single matrix M(t1 + 1)
511
            ulong e = (t1 + 1) % n;
512

513
            for (int x = 0; x < r; x++)
514
            for (int y = 0; y < r; y++)
515
               temp1[y][x] = zn_mod_add(M0[y][x],
516
                             zn_mod_mul(M1[y][x], e, mod), mod);
517
               
518
            t1++;
519
         }
520
         
521
         // multiply by whichever matrix we picked up above
522
         for (int x = 0; x < r; x++)
523
         for (int y = 0; y < r; y++)
524
         {
525
            ulong sum_hi = 0;
526
            ulong sum_lo = 0;
527
            for (int z = 0; z < r; z++)
528
            {
529
               ulong hi, lo;
530
               ZNP_MUL_WIDE(hi, lo, accum[y][z], temp1[z][x]);
531
               ZNP_ADD_WIDE(sum_hi, sum_lo, sum_hi, sum_lo, hi, lo);
532
               if (sum_hi >= n)
533
                  sum_hi -= n;
534
            }
535
            temp2[y][x] = zn_mod_reduce_wide(sum_hi, sum_lo, mod);
536
         }
537
         
538
         accum.swap(temp2);
539
      }
540

541
      // store result in output array
542
      output[i] = accum;
543
   }
544

545
   return 1;
546
}
547

548

549
};  // namespace hypellfrob
550

551

552
// ----------------------- end of file
553

554
Product

Resources

Company