CoCalc -- bern

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/rings/bernmm/bern_modp.cpp
⁴⁰⁵⁷ views
1
/*
2
   bern_modp.cpp:  computing isolated Bernoulli numbers modulo p
3
   
4
   Copyright (C) 2008, 2009, David Harvey
5
   
6
   This file is part of the bernmm package (version 1.1).
7

8
   bernmm is released under a BSD-style license. See the README file in
9
   the source distribution for details.
10
*/
11

12

13
#include <limits.h>
14
#include <signal.h>
15
#include <cstring>
16
#include <gmp.h>
17
#include <NTL/ZZ.h>
18
#include "bern_modp_util.h"
19
#include "bern_modp.h"
20

21

22
NTL_CLIENT;
23

24

25
using namespace std;
26

27

28
namespace bernmm {
29

30

31
/******************************************************************************
32

33
   Computing the main sum (general case)
34

35
******************************************************************************/
36

37
/*
38
   Returns (1 - g^k) B_k / 2k mod p.
39
   
40
   PRECONDITIONS:
41
      5 <= p < NTL_SP_BOUND, p prime
42
      2 <= k <= p-3, k even
43
      pinv = 1 / ((double) p)
44
      g = a multiplicative generator of GF(p), in [0, p)
45
*/
46
long bernsum_powg(long p, double pinv, long k, long g)
47
{
48
   long half_gm1 = (g + ((g & 1) ? 0 : p) - 1) / 2;    // (g-1)/2 mod p
49
   long g_to_jm1 = 1;
50
   long g_to_km1 = PowerMod(g, k-1, p, pinv);
51
   long g_to_km1_to_j = g_to_km1;
52
   long sum = 0;
53
   double g_pinv = ((double) g) / ((double) p);
54
   mulmod_precon_t g_to_km1_pinv = PrepMulModPrecon(g_to_km1, p, pinv);
55
   
56
   for (long j = 1; j <= (p-1)/2; j++)
57
   {
58
      // at this point,
59
      //    g_to_jm1 holds g^(j-1) mod p
60
      //    g_to_km1_to_j holds (g^(k-1))^j mod p
61
   
62
      // update g_to_jm1 and compute q = (g*(g^(j-1) mod p) - (g^j mod p)) / p
63
      long q;
64
      g_to_jm1 = MulDivRem(q, g_to_jm1, g, p, g_pinv);
65
      
66
      // compute h = -h_g(g^j) = q - (g-1)/2
67
      long h = SubMod(q, half_gm1, p);
68
      
69
      // add h_g(g^j) * (g^(k-1))^j to running total
70
      sum = SubMod(sum, MulMod(h, g_to_km1_to_j, p, pinv), p);
71
      
72
      // update g_to_km1_to_j
73
      g_to_km1_to_j = MulModPrecon(g_to_km1_to_j, g_to_km1, p, g_to_km1_pinv);
74
   }
75

76
   return sum;
77
}
78

79

80

81
/******************************************************************************
82

83
   Computing the main sum (c = 1/2 case)
84

85
******************************************************************************/
86

87

88
/*
89
   The Expander class stores precomputed information for a fixed integer p,
90
   that subsequently permits fast computation of the binary expansion of s/p
91
   for any 0 < s < p.
92
   
93
   The constructor takes p and max_words as input. Must have 1 <= max_words <=
94
   MAX_INV. It computes an approximation to 1/p.
95
   
96
   The function expand(word_t* res, long s, long n) computes n words of s/p.
97
   Must have 0 < s < p and 1 <= n <= max_words. The output is written to res.
98
   The first word of output is junk. The next n words are the digits of s/p,
99
   from least to most significant. The buffer must be at least n+2 words long
100
   (even though the first and last words are never used for output).
101

102
   A "word" is a word_t, and contains WORD_BITS bits. On most systems this
103
   will be an mp_limb_t.
104
*/
105

106
#define MAX_INV 256
107

108
#if (GMP_NAIL_BITS == 0) && (GMP_LIMB_BITS >= ULONG_BITS)
109
// fast mpn-based version
110

111
typedef mp_limb_t word_t;
112
#define WORD_BITS GMP_LIMB_BITS
113

114
class Expander
115
{
116
private:
117
   // Approximation to 1/p. We store (max_words + 1) limbs.
118
   mp_limb_t pinv[MAX_INV + 2];
119
   mp_limb_t p;
120
   int max_words;
121

122
public:
123
   Expander(long p, int max_words)
124
   {
125
      assert(max_words >= 1);
126
      assert(max_words <= MAX_INV);
127
      
128
      this->max_words = max_words;
129
      this->p = p;
130
      mp_limb_t one = 1;
131
      mpn_divrem_1(pinv, max_words + 1, &one, 1, p);
132
   }
133

134
   void expand(word_t* res, long s, int n)
135
   {
136
      assert(s > 0 && s < p);
137
      assert(n >= 1);
138
      assert(n <= max_words);
139
   
140
      if (s == 1)
141
      {
142
         // already have 1/p; just copy it
143
         for (int i = 1; i <= n; i++)
144
            res[i] = pinv[max_words - n + i];
145
      }
146
      else
147
      {
148
         mpn_mul_1(res, pinv + max_words - n, n + 1, (mp_limb_t) s);
149

150
         // If the first output limb is really close to 0xFFFF..., then there's
151
         // a possibility of overflow, so fall back on doing division directly.
152
         // This should happen extremely rarely --- essentially never on a
153
         // 64-bit system, and very occasionally on a 32-bit system.
154
         if (res[0] > -((mp_limb_t) s))
155
         {
156
            mp_limb_t ss = s;
157
            mpn_divrem_1(res, n + 1, &ss, 1, p);
158
         }
159
      }
160
   }
161
};
162

163

164
#else
165
// slow mpz-based version, since GMP is using nails, or mp_limb_t is
166
// absurdly narrow
167

168
typedef unsigned long word_t;
169
#define WORD_BITS ULONG_BITS
170

171
class Expander
172
{
173
private:
174
   mp_limb_t p;
175
   mpz_t temp;
176

177
public:
178
   Expander(long p, int max_words)
179
   {
180
      this->p = p;
181
      mpz_init(temp);
182
   }
183
   
184
   ~Expander()
185
   {
186
      mpz_clear(temp);
187
   }
188

189
   void expand(word_t* res, long s, int n)
190
   {
191
      assert(s > 0 && s < p);
192
      assert(n >= 1);
193
      
194
      mpz_set_ui(temp, s);
195
      mpz_mul_2exp(temp, temp, WORD_BITS * n);
196
      mpz_fdiv_q_ui(temp, temp, p);
197
      mpz_export(res + 1, NULL, -1, sizeof(word_t), 0, 0, temp);
198
   }
199
};
200

201
#endif
202

203

204

205
/*
206
   Returns (2^(-k) - 1) 2 B_k / k  mod p.
207
   
208
   (Note: this is useless if 2^k = 1 mod p.)
209
      
210
   PRECONDITIONS:
211
      5 <= p < NTL_SP_BOUND, p prime
212
      2 <= k <= p-3, k even
213
      pinv = 1 / ((double) p)
214
      g = a multiplicative generator of GF(p), in [0, p)
215
      n = multiplicative order of 2 in GF(p)
216
*/
217

218
#define TABLE_LG_SIZE 8
219
#define TABLE_SIZE (((word_t) 1) << TABLE_LG_SIZE)
220
#define TABLE_MASK (TABLE_SIZE - 1)
221
#define NUM_TABLES (WORD_BITS / TABLE_LG_SIZE)
222

223
#if WORD_BITS % TABLE_LG_SIZE != 0
224
#error Number of bits in a long must be divisible by TABLE_LG_SIZE
225
#endif
226

227
long bernsum_pow2(long p, double pinv, long k, long g, long n)
228
{
229
   // In the main summation loop we accumulate data into the _tables_ array;
230
   // tables[y][z] contributes to the final answer with a weight of
231
   // 
232
   // sum(-(-1)^z[t] * (2^(k-1))^(WORD_BITS - 1 - y * TABLE_LG_SIZE - t) :
233
   //                                                  0 <= t < TABLE_LG_SIZE),
234
   //
235
   // where z[t] denotes the t-th binary digit of z (LSB is t = 0).
236
   // The memory footprint for _tables_ is 4KB on a 32-bit machine, or 16KB
237
   // on a 64-bit machine, so should fit easily into L1 cache.
238
   long tables[NUM_TABLES][TABLE_SIZE];
239
   memset(tables, 0, sizeof(long) * NUM_TABLES * TABLE_SIZE);
240
   
241
   long m = (p-1) / n;
242

243
   // take advantage of symmetry (n' and m' from the paper)
244
   if (n & 1)
245
      m >>= 1;
246
   else
247
      n >>= 1;
248

249
   // g^(k-1)
250
   long g_to_km1 = PowerMod(g, k-1, p, pinv);
251
   // 2^(k-1)
252
   long two_to_km1 = PowerMod(2, k-1, p, pinv);
253
   // B^(k-1), where B = 2^WORD_BITS
254
   long B_to_km1 = PowerMod(two_to_km1, WORD_BITS, p, pinv);
255
   // B^(MAX_INV)
256
   long s_jump = PowerMod(2, MAX_INV * WORD_BITS, p, pinv);
257

258
   // help speed up modmuls
259
   mulmod_precon_t g_pinv = PrepMulModPrecon(g, p, pinv);
260
   mulmod_precon_t g_to_km1_pinv = PrepMulModPrecon(g_to_km1, p, pinv);
261
   mulmod_precon_t two_to_km1_pinv = PrepMulModPrecon(two_to_km1, p, pinv);
262
   mulmod_precon_t B_to_km1_pinv = PrepMulModPrecon(B_to_km1, p, pinv);
263
   mulmod_precon_t s_jump_pinv = PrepMulModPrecon(s_jump, p, pinv);
264

265
   long g_to_km1_to_i = 1;
266
   long g_to_i = 1;
267
   long sum = 0;
268

269
   // Precompute some of the binary expansion of 1/p; at most MAX_INV words,
270
   // or possibly less if n is sufficiently small
271
   Expander expander(p, (n >= MAX_INV * WORD_BITS)
272
                                       ? MAX_INV : ((n - 1) / WORD_BITS + 1));
273

274
   // =========== phase 1: main summation loop
275
   
276
   // loop over outer sum
277
   for (long i = 0; i < m; i++)
278
   {
279
      // s keeps track of g^i*2^j mod p
280
      long s = g_to_i;
281
      // x keeps track of (g^i*2^j)^(k-1) mod p
282
      long x = g_to_km1_to_i;
283
      
284
      // loop over inner sum; break it up into chunks of length at most
285
      // MAX_INV * WORD_BITS. If n is large, this allows us to do most of
286
      // the work with mpn_mul_1 instead of mpn_divrem_1, and also improves
287
      // memory locality.
288
      for (long nn = n; nn > 0; nn -= MAX_INV * WORD_BITS)
289
      {
290
         word_t s_over_p[MAX_INV + 2];
291
         long bits, words;
292

293
         if (nn >= MAX_INV * WORD_BITS)
294
         {
295
            // do one chunk of length exactly MAX_INV * WORD_BITS
296
            bits = MAX_INV * WORD_BITS;
297
            words = MAX_INV;
298
         }
299
         else
300
         {
301
            // last chunk of length less than MAX_INV * WORD_BITS
302
            bits = nn;
303
            words = (nn - 1) / WORD_BITS + 1;
304
         }
305
         
306
         // compute some bits of the binary expansion of s/p
307
         expander.expand(s_over_p, s, words);
308
         word_t* next = s_over_p + words;
309

310
         // loop over whole words
311
         for (; bits >= WORD_BITS; bits -= WORD_BITS, next--)
312
         {
313
            word_t y = *next;
314

315
#if NUM_TABLES != 8 && NUM_TABLES != 4
316
            // generic version
317
            for (long h = 0; h < NUM_TABLES; h++)
318
            {
319
               long& target = tables[h][y & TABLE_MASK];
320
               target = SubMod(target, x, p);
321
               y >>= TABLE_LG_SIZE;
322
            }
323
#else
324
            // unrolled versions for 32-bit/64-bit machines
325
            long& target0 = tables[0][y & TABLE_MASK];
326
            target0 = SubMod(target0, x, p);
327

328
            long& target1 = tables[1][(y >> TABLE_LG_SIZE) & TABLE_MASK];
329
            target1 = SubMod(target1, x, p);
330

331
            long& target2 = tables[2][(y >> (2*TABLE_LG_SIZE)) & TABLE_MASK];
332
            target2 = SubMod(target2, x, p);
333

334
            long& target3 = tables[3][(y >> (3*TABLE_LG_SIZE)) & TABLE_MASK];
335
            target3 = SubMod(target3, x, p);
336
#if NUM_TABLES == 8
337
            long& target4 = tables[4][(y >> (4*TABLE_LG_SIZE)) & TABLE_MASK];
338
            target4 = SubMod(target4, x, p);
339

340
            long& target5 = tables[5][(y >> (5*TABLE_LG_SIZE)) & TABLE_MASK];
341
            target5 = SubMod(target5, x, p);
342

343
            long& target6 = tables[6][(y >> (6*TABLE_LG_SIZE)) & TABLE_MASK];
344
            target6 = SubMod(target6, x, p);
345

346
            long& target7 = tables[7][(y >> (7*TABLE_LG_SIZE)) & TABLE_MASK];
347
            target7 = SubMod(target7, x, p);
348
#endif
349
#endif
350

351
            x = MulModPrecon(x, B_to_km1, p, B_to_km1_pinv);
352
         }
353
         
354
         // loop over remaining bits in the last word
355
         word_t y = *next;
356
         for (; bits > 0; bits--)
357
         {
358
            if (y & (((word_t) 1) << (WORD_BITS - 1)))
359
               sum = SubMod(sum, x, p);
360
            else
361
               sum = AddMod(sum, x, p);
362

363
            x = MulModPrecon(x, two_to_km1, p, two_to_km1_pinv);
364
            y <<= 1;
365
         }
366
         
367
         // update s
368
         s = MulModPrecon(s, s_jump, p, s_jump_pinv);
369
      }
370

371
      // update g^i and (g^(k-1))^i
372
      g_to_i = MulModPrecon(g_to_i, g, p, g_pinv);
373
      g_to_km1_to_i = MulModPrecon(g_to_km1_to_i, g_to_km1, p, g_to_km1_pinv);
374
   }
375

376
   // =========== phase 2: consolidate table data
377

378
   // compute weights[z] = sum((-1)^z[t] * (2^(k-1))^(TABLE_LG_SIZE - 1 - t) :
379
   //                                                  0 <= t < TABLE_LG_SIZE).
380

381
   long weights[TABLE_SIZE];
382
   weights[0] = 0;
383
   for (long h = 0, x = 1; h < TABLE_LG_SIZE;
384
        h++, x = MulModPrecon(x, two_to_km1, p, two_to_km1_pinv))
385
   {
386
      for (long i = (1L << h) - 1; i >= 0; i--)
387
      {
388
         weights[2*i+1] = SubMod(weights[i], x, p);
389
         weights[2*i]   = AddMod(weights[i], x, p);
390
      }
391
   }
392

393
   // combine table data with weights
394

395
   long x_jump = PowerMod(two_to_km1, TABLE_LG_SIZE, p, pinv);
396

397
   for (long h = NUM_TABLES - 1, x = 1; h >= 0; h--)
398
   {
399
      mulmod_precon_t x_pinv = PrepMulModPrecon(x, p, pinv);
400

401
      for (long i = 0; i < TABLE_SIZE; i++)
402
      {
403
         long y = MulMod(tables[h][i], weights[i], p, pinv);
404
         y = MulModPrecon(y, x, p, x_pinv);
405
         sum = SubMod(sum, y, p);
406
      }
407

408
      x = MulModPrecon(x_jump, x, p, x_pinv);
409
   }
410

411
   return sum;
412
}
413

414

415
/******************************************************************************
416

417
   Computing the main sum (c = 1/2 case, with REDC arithmetic)
418
   
419
   Throughout this section F denotes 2^(ULONG_BITS / 2).
420

421
******************************************************************************/
422

423

424
/*
425
   Returns x/F mod n. Output is in [0, 2n), i.e. *not* reduced completely
426
   into [0, n).
427

428
   PRECONDITIONS:
429
      3 <= n < F, n odd
430
      0 <= x < nF    (if n < F/2)
431
      0 <= x < nF/2  (if n > F/2)
432
      ninv2 = -1/n mod F
433
*/
434
#define LOW_MASK ((1L << (ULONG_BITS / 2)) - 1)
435
static inline long RedcFast(long x, long n, long ninv2)
436
{
437
   unsigned long y = (x * ninv2) & LOW_MASK;
438
   unsigned long z = x + (n * y);
439
   return z >> (ULONG_BITS / 2);
440
}
441

442

443
/*
444
   Same as RedcFast(), but reduces output into [0, n).
445
*/
446
static inline long Redc(long x, long n, long ninv2)
447
{
448
   long y = RedcFast(x, n, ninv2);
449
   if (y >= n)
450
      y -= n;
451
   return y;
452
}
453

454

455
/*
456
   Computes -1/n mod F, in [0, F).
457
   
458
   PRECONDITIONS:
459
      3 <= n < F, n odd
460
*/
461
long PrepRedc(long n)
462
{
463
   long ninv2 = -n;   // already correct mod 8
464
   
465
   // newton's method for 2-adic inversion
466
   for (long bits = 3; bits < ULONG_BITS/2; bits *= 2)
467
      ninv2 = 2*ninv2 + n * ninv2 * ninv2;
468
      
469
   return ninv2 & LOW_MASK;
470
}
471

472

473
/*
474
   Same as bernsum_pow2(), but uses REDC arithmetic, and various delayed
475
   reduction strategies.
476

477
   PRECONDITIONS:
478
      Same as bernsum_pow2(), and in addition:
479
      p < 2^(ULONG_BITS/2 - 1)
480
      
481
   (See bernsum_pow2() for code comments; we only add comments here where
482
   something is different from bernsum_pow2())
483
*/
484
long bernsum_pow2_redc(long p, double pinv, long k, long g, long n)
485
{
486
   long pinv2 = PrepRedc(p);
487
   long F = (1L << (ULONG_BITS/2)) % p;
488
   
489
   long tables[NUM_TABLES][TABLE_SIZE];
490
   memset(tables, 0, sizeof(long) * NUM_TABLES * TABLE_SIZE);
491

492
   long m = (p-1) / n;
493

494
   if (n & 1)
495
      m >>= 1;
496
   else
497
      n >>= 1;
498
   
499
   long g_to_km1 = PowerMod(g, k-1, p, pinv);
500
   long two_to_km1 = PowerMod(2, k-1, p, pinv);
501
   long B_to_km1 = PowerMod(two_to_km1, WORD_BITS, p, pinv);
502
   long s_jump = PowerMod(2, MAX_INV * WORD_BITS, p, pinv);
503
   
504
   long g_redc = MulMod(g, F, p, pinv);
505
   long g_to_km1_redc = MulMod(g_to_km1, F, p, pinv);
506
   long two_to_km1_redc = MulMod(two_to_km1, F, p, pinv);
507
   long B_to_km1_redc = MulMod(B_to_km1, F, p, pinv);
508
   long s_jump_redc = MulMod(s_jump, F, p, pinv);
509

510
   long g_to_km1_to_i = 1;    // always in [0, 2p)
511
   long g_to_i = 1;           // always in [0, 2p)
512
   long sum = 0;
513

514
   Expander expander(p, (n >= MAX_INV * WORD_BITS)
515
                                       ? MAX_INV : ((n - 1) / WORD_BITS + 1));
516

517
   // =========== phase 1: main summation loop
518
   
519
   for (long i = 0; i < m; i++)
520
   {
521
      long s = g_to_i;           // always in [0, p)
522
      if (s >= p)
523
         s -= p;
524
      
525
      long x = g_to_km1_to_i;    // always in [0, 2p)
526
      
527
      for (long nn = n; nn > 0; nn -= MAX_INV * WORD_BITS)
528
      {
529
         word_t s_over_p[MAX_INV + 2];
530
         long bits, words;
531
         
532
         if (nn >= MAX_INV * WORD_BITS)
533
         {
534
            bits = MAX_INV * WORD_BITS;
535
            words = MAX_INV;
536
         }
537
         else
538
         {
539
            bits = nn;
540
            words = (nn - 1) / WORD_BITS + 1;
541
         }
542

543
         expander.expand(s_over_p, s, words);
544
         word_t* next = s_over_p + words;
545
         
546
         for (; bits >= WORD_BITS; bits -= WORD_BITS, next--)
547
         {
548
            word_t y = *next;
549
            
550
            // note: we add the values into tables *without* reduction mod p
551

552
#if NUM_TABLES != 8 && NUM_TABLES != 4
553
            // generic version
554
            for (long h = 0; h < NUM_TABLES; h++)
555
            {
556
               tables[h][y & TABLE_MASK] += x;
557
               y >>= TABLE_LG_SIZE;
558
            }
559
#else
560
            // unrolled versions for 32-bit/64-bit machines
561
            tables[0][ y                       & TABLE_MASK] += x;
562
            tables[1][(y >>    TABLE_LG_SIZE ) & TABLE_MASK] += x;
563
            tables[2][(y >> (2*TABLE_LG_SIZE)) & TABLE_MASK] += x;
564
            tables[3][(y >> (3*TABLE_LG_SIZE)) & TABLE_MASK] += x;
565
#if NUM_TABLES == 8
566
            tables[4][(y >> (4*TABLE_LG_SIZE)) & TABLE_MASK] += x;
567
            tables[5][(y >> (5*TABLE_LG_SIZE)) & TABLE_MASK] += x;
568
            tables[6][(y >> (6*TABLE_LG_SIZE)) & TABLE_MASK] += x;
569
            tables[7][(y >> (7*TABLE_LG_SIZE)) & TABLE_MASK] += x;
570
#endif            
571
#endif
572

573
            x = RedcFast(x * B_to_km1_redc, p, pinv2);
574
         }
575
         
576
         // bring x into [0, p) for next loop
577
         if (x >= p)
578
            x -= p;
579

580
         word_t y = *next;
581
         for (; bits > 0; bits--)
582
         {
583
            if (y & (((word_t) 1) << (WORD_BITS - 1)))
584
               sum = SubMod(sum, x, p);
585
            else
586
               sum = AddMod(sum, x, p);
587

588
            x = Redc(x * two_to_km1_redc, p, pinv2);
589
            y <<= 1;
590
         }
591
         
592
         s = Redc(s * s_jump_redc, p, pinv2);
593
      }
594

595
      g_to_i = RedcFast(g_to_i * g_redc, p, pinv2);
596
      g_to_km1_to_i = RedcFast(g_to_km1_to_i * g_to_km1_redc, p, pinv2);
597
   }
598
   
599
   // At this point, each table entry is at most p^2 (since x was always
600
   // in [0, 2p), and the inner loop was called at most (p/2) / WORD_BITS
601
   // times, and 2p * p/2 / WORD_BITS * TABLE_LG_SIZE <= p^2).
602
   
603
   // =========== phase 2: consolidate table data
604
   
605
   long weights[TABLE_SIZE];
606
   weights[0] = 0;
607
   // we store the weights multiplied by a factor of 2^(3*ULONG_BITS/2) to
608
   // compensate for the three rounds of REDC reduction in the loop below
609
   for (long h = 0, x = PowerMod(2, 3*ULONG_BITS/2, p, pinv);
610
        h < TABLE_LG_SIZE; h++, x = Redc(x * two_to_km1_redc, p, pinv2))
611
   {
612
      for (long i = (1L << h) - 1; i >= 0; i--)
613
      {
614
         weights[2*i+1] = SubMod(weights[i], x, p);
615
         weights[2*i]   = AddMod(weights[i], x, p);
616
      }
617
   }
618

619
   long x_jump = PowerMod(two_to_km1, TABLE_LG_SIZE, p, pinv);
620
   long x_jump_redc = MulMod(x_jump, F, p, pinv);
621

622
   for (long h = NUM_TABLES - 1, x = 1; h >= 0; h--)
623
   {
624
      for (long i = 0; i < TABLE_SIZE; i++)
625
      {
626
         long y;
627
         y = RedcFast(tables[h][i], p, pinv2);
628
         y = RedcFast(y * weights[i], p, pinv2);
629
         y = RedcFast(y * x, p, pinv2);
630
         sum += y;
631
      }
632

633
      x = Redc(x * x_jump_redc, p, pinv2);
634
   }
635

636
   return sum % p;
637
}
638

639

640

641
/******************************************************************************
642

643
   Wrappers for bernsum_*
644

645
******************************************************************************/
646

647

648
/*
649
   Returns B_k/k mod p, in the range [0, p).
650
   
651
   PRECONDITIONS:
652
      5 <= p < NTL_SP_BOUND, p prime
653
      2 <= k <= p-3, k even
654
      pinv = 1 / ((double) p)
655
   
656
   Algorithm: uses bernsum_powg() to compute the main sum.
657
*/
658
long _bern_modp_powg(long p, double pinv, long k)
659
{
660
   Factorisation F(p-1);
661
   long g = primitive_root(p, pinv, F);
662

663
   // compute main sum
664
   long x = bernsum_powg(p, pinv, k, g);
665

666
   // divide by (1 - g^k) and multiply by 2
667
   long g_to_k = PowerMod(g, k, p, pinv);
668
   long t = InvMod(p + 1 - g_to_k, p);
669
   x = MulMod(x, t, p, pinv);
670
   x = AddMod(x, x, p);
671
   
672
   return x;
673
}
674

675

676
/*
677
   Returns B_k/k mod p, in the range [0, p).
678
   
679
   PRECONDITIONS:
680
      5 <= p < NTL_SP_BOUND, p prime
681
      2 <= k <= p-3, k even
682
      pinv = 1 / ((double) p)
683
      2^k != 1 mod p
684

685
   Algorithm: uses bernsum_pow2() (or bernsum_pow2_redc() if p is small
686
   enough) to compute the main sum.
687
*/
688
long _bern_modp_pow2(long p, double pinv, long k)
689
{
690
   Factorisation F(p-1);
691
   long g = primitive_root(p, pinv, F);
692
   long n = order(2, p, pinv, F);
693

694
   // compute main sum
695
   long x;
696
   if (p < (1L << (ULONG_BITS/2 - 1)))
697
      x = bernsum_pow2_redc(p, pinv, k, g, n);
698
   else
699
      x = bernsum_pow2(p, pinv, k, g, n);
700

701
   // divide by 2*(2^(-k) - 1)
702
   long t = PowerMod(2, -k, p, pinv) - 1;
703
   t = AddMod(t, t, p);
704
   t = InvMod(t, p);
705
   x = MulMod(x, t, p, pinv);
706
   
707
   return x;
708
}
709

710

711

712
/*
713
   Returns B_k/k mod p, in the range [0, p).
714
   
715
   PRECONDITIONS:
716
      5 <= p < NTL_SP_BOUND, p prime
717
      2 <= k <= p-3, k even
718
      pinv = 1 / ((double) p)
719
*/
720
long _bern_modp(long p, double pinv, long k)
721
{
722
   if (PowerMod(2, k, p, pinv) != 1)
723
      // 2^k != 1 mod p, so we use the faster version
724
      return _bern_modp_pow2(p, pinv, k);
725
   else
726
      // forced to use slower version
727
      return _bern_modp_powg(p, pinv, k);
728
}
729

730

731

732
/******************************************************************************
733

734
   Main bern_modp() routine
735

736
******************************************************************************/
737

738
long bern_modp(long p, long k)
739
{
740
   assert(k >= 0);
741
   assert(2 <= p && p < NTL_SP_BOUND);
742

743
   // B_0 = 1
744
   if (k == 0)
745
      return 1;
746

747
   // B_1 = -1/2 mod p
748
   if (k == 1)
749
   {
750
      if (p == 2)
751
         return -1;
752
      return (p-1)/2;
753
   }
754

755
   // B_k = 0 for odd k >= 3
756
   if (k & 1)
757
      return 0;
758

759
   // denominator of B_k is always divisible by 6 for k >= 2
760
   if (p <= 3)
761
      return -1;
762
      
763
   // use Kummer's congruence (k = m mod p-1  =>  B_k/k = B_m/m mod p)
764
   long m = k % (p-1);
765
   if (m == 0)
766
      return -1;
767

768
   double pinv = 1 / ((double) p);
769
   long x = _bern_modp(p, pinv, m);    // = B_m/m mod p
770
   return MulMod(x, k, p, pinv);
771
}
772

773

774
};    // end namespace
775

776

777

778
// end of file ================================================================
779

780
Product

Resources

Company