Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sagelib
Path: blob/master/sage/rings/bernmm/bern_modp.cpp
4057 views
1
/*
2
bern_modp.cpp: computing isolated Bernoulli numbers modulo p
3
4
Copyright (C) 2008, 2009, David Harvey
5
6
This file is part of the bernmm package (version 1.1).
7
8
bernmm is released under a BSD-style license. See the README file in
9
the source distribution for details.
10
*/
11
12
13
#include <limits.h>
14
#include <signal.h>
15
#include <cstring>
16
#include <gmp.h>
17
#include <NTL/ZZ.h>
18
#include "bern_modp_util.h"
19
#include "bern_modp.h"
20
21
22
NTL_CLIENT;
23
24
25
using namespace std;
26
27
28
namespace bernmm {
29
30
31
/******************************************************************************
32
33
Computing the main sum (general case)
34
35
******************************************************************************/
36
37
/*
38
Returns (1 - g^k) B_k / 2k mod p.
39
40
PRECONDITIONS:
41
5 <= p < NTL_SP_BOUND, p prime
42
2 <= k <= p-3, k even
43
pinv = 1 / ((double) p)
44
g = a multiplicative generator of GF(p), in [0, p)
45
*/
46
long bernsum_powg(long p, double pinv, long k, long g)
47
{
48
long half_gm1 = (g + ((g & 1) ? 0 : p) - 1) / 2; // (g-1)/2 mod p
49
long g_to_jm1 = 1;
50
long g_to_km1 = PowerMod(g, k-1, p, pinv);
51
long g_to_km1_to_j = g_to_km1;
52
long sum = 0;
53
double g_pinv = ((double) g) / ((double) p);
54
mulmod_precon_t g_to_km1_pinv = PrepMulModPrecon(g_to_km1, p, pinv);
55
56
for (long j = 1; j <= (p-1)/2; j++)
57
{
58
// at this point,
59
// g_to_jm1 holds g^(j-1) mod p
60
// g_to_km1_to_j holds (g^(k-1))^j mod p
61
62
// update g_to_jm1 and compute q = (g*(g^(j-1) mod p) - (g^j mod p)) / p
63
long q;
64
g_to_jm1 = MulDivRem(q, g_to_jm1, g, p, g_pinv);
65
66
// compute h = -h_g(g^j) = q - (g-1)/2
67
long h = SubMod(q, half_gm1, p);
68
69
// add h_g(g^j) * (g^(k-1))^j to running total
70
sum = SubMod(sum, MulMod(h, g_to_km1_to_j, p, pinv), p);
71
72
// update g_to_km1_to_j
73
g_to_km1_to_j = MulModPrecon(g_to_km1_to_j, g_to_km1, p, g_to_km1_pinv);
74
}
75
76
return sum;
77
}
78
79
80
81
/******************************************************************************
82
83
Computing the main sum (c = 1/2 case)
84
85
******************************************************************************/
86
87
88
/*
89
The Expander class stores precomputed information for a fixed integer p,
90
that subsequently permits fast computation of the binary expansion of s/p
91
for any 0 < s < p.
92
93
The constructor takes p and max_words as input. Must have 1 <= max_words <=
94
MAX_INV. It computes an approximation to 1/p.
95
96
The function expand(word_t* res, long s, long n) computes n words of s/p.
97
Must have 0 < s < p and 1 <= n <= max_words. The output is written to res.
98
The first word of output is junk. The next n words are the digits of s/p,
99
from least to most significant. The buffer must be at least n+2 words long
100
(even though the first and last words are never used for output).
101
102
A "word" is a word_t, and contains WORD_BITS bits. On most systems this
103
will be an mp_limb_t.
104
*/
105
106
#define MAX_INV 256
107
108
#if (GMP_NAIL_BITS == 0) && (GMP_LIMB_BITS >= ULONG_BITS)
109
// fast mpn-based version
110
111
typedef mp_limb_t word_t;
112
#define WORD_BITS GMP_LIMB_BITS
113
114
class Expander
115
{
116
private:
117
// Approximation to 1/p. We store (max_words + 1) limbs.
118
mp_limb_t pinv[MAX_INV + 2];
119
mp_limb_t p;
120
int max_words;
121
122
public:
123
Expander(long p, int max_words)
124
{
125
assert(max_words >= 1);
126
assert(max_words <= MAX_INV);
127
128
this->max_words = max_words;
129
this->p = p;
130
mp_limb_t one = 1;
131
mpn_divrem_1(pinv, max_words + 1, &one, 1, p);
132
}
133
134
void expand(word_t* res, long s, int n)
135
{
136
assert(s > 0 && s < p);
137
assert(n >= 1);
138
assert(n <= max_words);
139
140
if (s == 1)
141
{
142
// already have 1/p; just copy it
143
for (int i = 1; i <= n; i++)
144
res[i] = pinv[max_words - n + i];
145
}
146
else
147
{
148
mpn_mul_1(res, pinv + max_words - n, n + 1, (mp_limb_t) s);
149
150
// If the first output limb is really close to 0xFFFF..., then there's
151
// a possibility of overflow, so fall back on doing division directly.
152
// This should happen extremely rarely --- essentially never on a
153
// 64-bit system, and very occasionally on a 32-bit system.
154
if (res[0] > -((mp_limb_t) s))
155
{
156
mp_limb_t ss = s;
157
mpn_divrem_1(res, n + 1, &ss, 1, p);
158
}
159
}
160
}
161
};
162
163
164
#else
165
// slow mpz-based version, since GMP is using nails, or mp_limb_t is
166
// absurdly narrow
167
168
typedef unsigned long word_t;
169
#define WORD_BITS ULONG_BITS
170
171
class Expander
172
{
173
private:
174
mp_limb_t p;
175
mpz_t temp;
176
177
public:
178
Expander(long p, int max_words)
179
{
180
this->p = p;
181
mpz_init(temp);
182
}
183
184
~Expander()
185
{
186
mpz_clear(temp);
187
}
188
189
void expand(word_t* res, long s, int n)
190
{
191
assert(s > 0 && s < p);
192
assert(n >= 1);
193
194
mpz_set_ui(temp, s);
195
mpz_mul_2exp(temp, temp, WORD_BITS * n);
196
mpz_fdiv_q_ui(temp, temp, p);
197
mpz_export(res + 1, NULL, -1, sizeof(word_t), 0, 0, temp);
198
}
199
};
200
201
#endif
202
203
204
205
/*
206
Returns (2^(-k) - 1) 2 B_k / k mod p.
207
208
(Note: this is useless if 2^k = 1 mod p.)
209
210
PRECONDITIONS:
211
5 <= p < NTL_SP_BOUND, p prime
212
2 <= k <= p-3, k even
213
pinv = 1 / ((double) p)
214
g = a multiplicative generator of GF(p), in [0, p)
215
n = multiplicative order of 2 in GF(p)
216
*/
217
218
#define TABLE_LG_SIZE 8
219
#define TABLE_SIZE (((word_t) 1) << TABLE_LG_SIZE)
220
#define TABLE_MASK (TABLE_SIZE - 1)
221
#define NUM_TABLES (WORD_BITS / TABLE_LG_SIZE)
222
223
#if WORD_BITS % TABLE_LG_SIZE != 0
224
#error Number of bits in a long must be divisible by TABLE_LG_SIZE
225
#endif
226
227
long bernsum_pow2(long p, double pinv, long k, long g, long n)
228
{
229
// In the main summation loop we accumulate data into the _tables_ array;
230
// tables[y][z] contributes to the final answer with a weight of
231
//
232
// sum(-(-1)^z[t] * (2^(k-1))^(WORD_BITS - 1 - y * TABLE_LG_SIZE - t) :
233
// 0 <= t < TABLE_LG_SIZE),
234
//
235
// where z[t] denotes the t-th binary digit of z (LSB is t = 0).
236
// The memory footprint for _tables_ is 4KB on a 32-bit machine, or 16KB
237
// on a 64-bit machine, so should fit easily into L1 cache.
238
long tables[NUM_TABLES][TABLE_SIZE];
239
memset(tables, 0, sizeof(long) * NUM_TABLES * TABLE_SIZE);
240
241
long m = (p-1) / n;
242
243
// take advantage of symmetry (n' and m' from the paper)
244
if (n & 1)
245
m >>= 1;
246
else
247
n >>= 1;
248
249
// g^(k-1)
250
long g_to_km1 = PowerMod(g, k-1, p, pinv);
251
// 2^(k-1)
252
long two_to_km1 = PowerMod(2, k-1, p, pinv);
253
// B^(k-1), where B = 2^WORD_BITS
254
long B_to_km1 = PowerMod(two_to_km1, WORD_BITS, p, pinv);
255
// B^(MAX_INV)
256
long s_jump = PowerMod(2, MAX_INV * WORD_BITS, p, pinv);
257
258
// help speed up modmuls
259
mulmod_precon_t g_pinv = PrepMulModPrecon(g, p, pinv);
260
mulmod_precon_t g_to_km1_pinv = PrepMulModPrecon(g_to_km1, p, pinv);
261
mulmod_precon_t two_to_km1_pinv = PrepMulModPrecon(two_to_km1, p, pinv);
262
mulmod_precon_t B_to_km1_pinv = PrepMulModPrecon(B_to_km1, p, pinv);
263
mulmod_precon_t s_jump_pinv = PrepMulModPrecon(s_jump, p, pinv);
264
265
long g_to_km1_to_i = 1;
266
long g_to_i = 1;
267
long sum = 0;
268
269
// Precompute some of the binary expansion of 1/p; at most MAX_INV words,
270
// or possibly less if n is sufficiently small
271
Expander expander(p, (n >= MAX_INV * WORD_BITS)
272
? MAX_INV : ((n - 1) / WORD_BITS + 1));
273
274
// =========== phase 1: main summation loop
275
276
// loop over outer sum
277
for (long i = 0; i < m; i++)
278
{
279
// s keeps track of g^i*2^j mod p
280
long s = g_to_i;
281
// x keeps track of (g^i*2^j)^(k-1) mod p
282
long x = g_to_km1_to_i;
283
284
// loop over inner sum; break it up into chunks of length at most
285
// MAX_INV * WORD_BITS. If n is large, this allows us to do most of
286
// the work with mpn_mul_1 instead of mpn_divrem_1, and also improves
287
// memory locality.
288
for (long nn = n; nn > 0; nn -= MAX_INV * WORD_BITS)
289
{
290
word_t s_over_p[MAX_INV + 2];
291
long bits, words;
292
293
if (nn >= MAX_INV * WORD_BITS)
294
{
295
// do one chunk of length exactly MAX_INV * WORD_BITS
296
bits = MAX_INV * WORD_BITS;
297
words = MAX_INV;
298
}
299
else
300
{
301
// last chunk of length less than MAX_INV * WORD_BITS
302
bits = nn;
303
words = (nn - 1) / WORD_BITS + 1;
304
}
305
306
// compute some bits of the binary expansion of s/p
307
expander.expand(s_over_p, s, words);
308
word_t* next = s_over_p + words;
309
310
// loop over whole words
311
for (; bits >= WORD_BITS; bits -= WORD_BITS, next--)
312
{
313
word_t y = *next;
314
315
#if NUM_TABLES != 8 && NUM_TABLES != 4
316
// generic version
317
for (long h = 0; h < NUM_TABLES; h++)
318
{
319
long& target = tables[h][y & TABLE_MASK];
320
target = SubMod(target, x, p);
321
y >>= TABLE_LG_SIZE;
322
}
323
#else
324
// unrolled versions for 32-bit/64-bit machines
325
long& target0 = tables[0][y & TABLE_MASK];
326
target0 = SubMod(target0, x, p);
327
328
long& target1 = tables[1][(y >> TABLE_LG_SIZE) & TABLE_MASK];
329
target1 = SubMod(target1, x, p);
330
331
long& target2 = tables[2][(y >> (2*TABLE_LG_SIZE)) & TABLE_MASK];
332
target2 = SubMod(target2, x, p);
333
334
long& target3 = tables[3][(y >> (3*TABLE_LG_SIZE)) & TABLE_MASK];
335
target3 = SubMod(target3, x, p);
336
#if NUM_TABLES == 8
337
long& target4 = tables[4][(y >> (4*TABLE_LG_SIZE)) & TABLE_MASK];
338
target4 = SubMod(target4, x, p);
339
340
long& target5 = tables[5][(y >> (5*TABLE_LG_SIZE)) & TABLE_MASK];
341
target5 = SubMod(target5, x, p);
342
343
long& target6 = tables[6][(y >> (6*TABLE_LG_SIZE)) & TABLE_MASK];
344
target6 = SubMod(target6, x, p);
345
346
long& target7 = tables[7][(y >> (7*TABLE_LG_SIZE)) & TABLE_MASK];
347
target7 = SubMod(target7, x, p);
348
#endif
349
#endif
350
351
x = MulModPrecon(x, B_to_km1, p, B_to_km1_pinv);
352
}
353
354
// loop over remaining bits in the last word
355
word_t y = *next;
356
for (; bits > 0; bits--)
357
{
358
if (y & (((word_t) 1) << (WORD_BITS - 1)))
359
sum = SubMod(sum, x, p);
360
else
361
sum = AddMod(sum, x, p);
362
363
x = MulModPrecon(x, two_to_km1, p, two_to_km1_pinv);
364
y <<= 1;
365
}
366
367
// update s
368
s = MulModPrecon(s, s_jump, p, s_jump_pinv);
369
}
370
371
// update g^i and (g^(k-1))^i
372
g_to_i = MulModPrecon(g_to_i, g, p, g_pinv);
373
g_to_km1_to_i = MulModPrecon(g_to_km1_to_i, g_to_km1, p, g_to_km1_pinv);
374
}
375
376
// =========== phase 2: consolidate table data
377
378
// compute weights[z] = sum((-1)^z[t] * (2^(k-1))^(TABLE_LG_SIZE - 1 - t) :
379
// 0 <= t < TABLE_LG_SIZE).
380
381
long weights[TABLE_SIZE];
382
weights[0] = 0;
383
for (long h = 0, x = 1; h < TABLE_LG_SIZE;
384
h++, x = MulModPrecon(x, two_to_km1, p, two_to_km1_pinv))
385
{
386
for (long i = (1L << h) - 1; i >= 0; i--)
387
{
388
weights[2*i+1] = SubMod(weights[i], x, p);
389
weights[2*i] = AddMod(weights[i], x, p);
390
}
391
}
392
393
// combine table data with weights
394
395
long x_jump = PowerMod(two_to_km1, TABLE_LG_SIZE, p, pinv);
396
397
for (long h = NUM_TABLES - 1, x = 1; h >= 0; h--)
398
{
399
mulmod_precon_t x_pinv = PrepMulModPrecon(x, p, pinv);
400
401
for (long i = 0; i < TABLE_SIZE; i++)
402
{
403
long y = MulMod(tables[h][i], weights[i], p, pinv);
404
y = MulModPrecon(y, x, p, x_pinv);
405
sum = SubMod(sum, y, p);
406
}
407
408
x = MulModPrecon(x_jump, x, p, x_pinv);
409
}
410
411
return sum;
412
}
413
414
415
/******************************************************************************
416
417
Computing the main sum (c = 1/2 case, with REDC arithmetic)
418
419
Throughout this section F denotes 2^(ULONG_BITS / 2).
420
421
******************************************************************************/
422
423
424
/*
425
Returns x/F mod n. Output is in [0, 2n), i.e. *not* reduced completely
426
into [0, n).
427
428
PRECONDITIONS:
429
3 <= n < F, n odd
430
0 <= x < nF (if n < F/2)
431
0 <= x < nF/2 (if n > F/2)
432
ninv2 = -1/n mod F
433
*/
434
#define LOW_MASK ((1L << (ULONG_BITS / 2)) - 1)
435
static inline long RedcFast(long x, long n, long ninv2)
436
{
437
unsigned long y = (x * ninv2) & LOW_MASK;
438
unsigned long z = x + (n * y);
439
return z >> (ULONG_BITS / 2);
440
}
441
442
443
/*
444
Same as RedcFast(), but reduces output into [0, n).
445
*/
446
static inline long Redc(long x, long n, long ninv2)
447
{
448
long y = RedcFast(x, n, ninv2);
449
if (y >= n)
450
y -= n;
451
return y;
452
}
453
454
455
/*
456
Computes -1/n mod F, in [0, F).
457
458
PRECONDITIONS:
459
3 <= n < F, n odd
460
*/
461
long PrepRedc(long n)
462
{
463
long ninv2 = -n; // already correct mod 8
464
465
// newton's method for 2-adic inversion
466
for (long bits = 3; bits < ULONG_BITS/2; bits *= 2)
467
ninv2 = 2*ninv2 + n * ninv2 * ninv2;
468
469
return ninv2 & LOW_MASK;
470
}
471
472
473
/*
474
Same as bernsum_pow2(), but uses REDC arithmetic, and various delayed
475
reduction strategies.
476
477
PRECONDITIONS:
478
Same as bernsum_pow2(), and in addition:
479
p < 2^(ULONG_BITS/2 - 1)
480
481
(See bernsum_pow2() for code comments; we only add comments here where
482
something is different from bernsum_pow2())
483
*/
484
long bernsum_pow2_redc(long p, double pinv, long k, long g, long n)
485
{
486
long pinv2 = PrepRedc(p);
487
long F = (1L << (ULONG_BITS/2)) % p;
488
489
long tables[NUM_TABLES][TABLE_SIZE];
490
memset(tables, 0, sizeof(long) * NUM_TABLES * TABLE_SIZE);
491
492
long m = (p-1) / n;
493
494
if (n & 1)
495
m >>= 1;
496
else
497
n >>= 1;
498
499
long g_to_km1 = PowerMod(g, k-1, p, pinv);
500
long two_to_km1 = PowerMod(2, k-1, p, pinv);
501
long B_to_km1 = PowerMod(two_to_km1, WORD_BITS, p, pinv);
502
long s_jump = PowerMod(2, MAX_INV * WORD_BITS, p, pinv);
503
504
long g_redc = MulMod(g, F, p, pinv);
505
long g_to_km1_redc = MulMod(g_to_km1, F, p, pinv);
506
long two_to_km1_redc = MulMod(two_to_km1, F, p, pinv);
507
long B_to_km1_redc = MulMod(B_to_km1, F, p, pinv);
508
long s_jump_redc = MulMod(s_jump, F, p, pinv);
509
510
long g_to_km1_to_i = 1; // always in [0, 2p)
511
long g_to_i = 1; // always in [0, 2p)
512
long sum = 0;
513
514
Expander expander(p, (n >= MAX_INV * WORD_BITS)
515
? MAX_INV : ((n - 1) / WORD_BITS + 1));
516
517
// =========== phase 1: main summation loop
518
519
for (long i = 0; i < m; i++)
520
{
521
long s = g_to_i; // always in [0, p)
522
if (s >= p)
523
s -= p;
524
525
long x = g_to_km1_to_i; // always in [0, 2p)
526
527
for (long nn = n; nn > 0; nn -= MAX_INV * WORD_BITS)
528
{
529
word_t s_over_p[MAX_INV + 2];
530
long bits, words;
531
532
if (nn >= MAX_INV * WORD_BITS)
533
{
534
bits = MAX_INV * WORD_BITS;
535
words = MAX_INV;
536
}
537
else
538
{
539
bits = nn;
540
words = (nn - 1) / WORD_BITS + 1;
541
}
542
543
expander.expand(s_over_p, s, words);
544
word_t* next = s_over_p + words;
545
546
for (; bits >= WORD_BITS; bits -= WORD_BITS, next--)
547
{
548
word_t y = *next;
549
550
// note: we add the values into tables *without* reduction mod p
551
552
#if NUM_TABLES != 8 && NUM_TABLES != 4
553
// generic version
554
for (long h = 0; h < NUM_TABLES; h++)
555
{
556
tables[h][y & TABLE_MASK] += x;
557
y >>= TABLE_LG_SIZE;
558
}
559
#else
560
// unrolled versions for 32-bit/64-bit machines
561
tables[0][ y & TABLE_MASK] += x;
562
tables[1][(y >> TABLE_LG_SIZE ) & TABLE_MASK] += x;
563
tables[2][(y >> (2*TABLE_LG_SIZE)) & TABLE_MASK] += x;
564
tables[3][(y >> (3*TABLE_LG_SIZE)) & TABLE_MASK] += x;
565
#if NUM_TABLES == 8
566
tables[4][(y >> (4*TABLE_LG_SIZE)) & TABLE_MASK] += x;
567
tables[5][(y >> (5*TABLE_LG_SIZE)) & TABLE_MASK] += x;
568
tables[6][(y >> (6*TABLE_LG_SIZE)) & TABLE_MASK] += x;
569
tables[7][(y >> (7*TABLE_LG_SIZE)) & TABLE_MASK] += x;
570
#endif
571
#endif
572
573
x = RedcFast(x * B_to_km1_redc, p, pinv2);
574
}
575
576
// bring x into [0, p) for next loop
577
if (x >= p)
578
x -= p;
579
580
word_t y = *next;
581
for (; bits > 0; bits--)
582
{
583
if (y & (((word_t) 1) << (WORD_BITS - 1)))
584
sum = SubMod(sum, x, p);
585
else
586
sum = AddMod(sum, x, p);
587
588
x = Redc(x * two_to_km1_redc, p, pinv2);
589
y <<= 1;
590
}
591
592
s = Redc(s * s_jump_redc, p, pinv2);
593
}
594
595
g_to_i = RedcFast(g_to_i * g_redc, p, pinv2);
596
g_to_km1_to_i = RedcFast(g_to_km1_to_i * g_to_km1_redc, p, pinv2);
597
}
598
599
// At this point, each table entry is at most p^2 (since x was always
600
// in [0, 2p), and the inner loop was called at most (p/2) / WORD_BITS
601
// times, and 2p * p/2 / WORD_BITS * TABLE_LG_SIZE <= p^2).
602
603
// =========== phase 2: consolidate table data
604
605
long weights[TABLE_SIZE];
606
weights[0] = 0;
607
// we store the weights multiplied by a factor of 2^(3*ULONG_BITS/2) to
608
// compensate for the three rounds of REDC reduction in the loop below
609
for (long h = 0, x = PowerMod(2, 3*ULONG_BITS/2, p, pinv);
610
h < TABLE_LG_SIZE; h++, x = Redc(x * two_to_km1_redc, p, pinv2))
611
{
612
for (long i = (1L << h) - 1; i >= 0; i--)
613
{
614
weights[2*i+1] = SubMod(weights[i], x, p);
615
weights[2*i] = AddMod(weights[i], x, p);
616
}
617
}
618
619
long x_jump = PowerMod(two_to_km1, TABLE_LG_SIZE, p, pinv);
620
long x_jump_redc = MulMod(x_jump, F, p, pinv);
621
622
for (long h = NUM_TABLES - 1, x = 1; h >= 0; h--)
623
{
624
for (long i = 0; i < TABLE_SIZE; i++)
625
{
626
long y;
627
y = RedcFast(tables[h][i], p, pinv2);
628
y = RedcFast(y * weights[i], p, pinv2);
629
y = RedcFast(y * x, p, pinv2);
630
sum += y;
631
}
632
633
x = Redc(x * x_jump_redc, p, pinv2);
634
}
635
636
return sum % p;
637
}
638
639
640
641
/******************************************************************************
642
643
Wrappers for bernsum_*
644
645
******************************************************************************/
646
647
648
/*
649
Returns B_k/k mod p, in the range [0, p).
650
651
PRECONDITIONS:
652
5 <= p < NTL_SP_BOUND, p prime
653
2 <= k <= p-3, k even
654
pinv = 1 / ((double) p)
655
656
Algorithm: uses bernsum_powg() to compute the main sum.
657
*/
658
long _bern_modp_powg(long p, double pinv, long k)
659
{
660
Factorisation F(p-1);
661
long g = primitive_root(p, pinv, F);
662
663
// compute main sum
664
long x = bernsum_powg(p, pinv, k, g);
665
666
// divide by (1 - g^k) and multiply by 2
667
long g_to_k = PowerMod(g, k, p, pinv);
668
long t = InvMod(p + 1 - g_to_k, p);
669
x = MulMod(x, t, p, pinv);
670
x = AddMod(x, x, p);
671
672
return x;
673
}
674
675
676
/*
677
Returns B_k/k mod p, in the range [0, p).
678
679
PRECONDITIONS:
680
5 <= p < NTL_SP_BOUND, p prime
681
2 <= k <= p-3, k even
682
pinv = 1 / ((double) p)
683
2^k != 1 mod p
684
685
Algorithm: uses bernsum_pow2() (or bernsum_pow2_redc() if p is small
686
enough) to compute the main sum.
687
*/
688
long _bern_modp_pow2(long p, double pinv, long k)
689
{
690
Factorisation F(p-1);
691
long g = primitive_root(p, pinv, F);
692
long n = order(2, p, pinv, F);
693
694
// compute main sum
695
long x;
696
if (p < (1L << (ULONG_BITS/2 - 1)))
697
x = bernsum_pow2_redc(p, pinv, k, g, n);
698
else
699
x = bernsum_pow2(p, pinv, k, g, n);
700
701
// divide by 2*(2^(-k) - 1)
702
long t = PowerMod(2, -k, p, pinv) - 1;
703
t = AddMod(t, t, p);
704
t = InvMod(t, p);
705
x = MulMod(x, t, p, pinv);
706
707
return x;
708
}
709
710
711
712
/*
713
Returns B_k/k mod p, in the range [0, p).
714
715
PRECONDITIONS:
716
5 <= p < NTL_SP_BOUND, p prime
717
2 <= k <= p-3, k even
718
pinv = 1 / ((double) p)
719
*/
720
long _bern_modp(long p, double pinv, long k)
721
{
722
if (PowerMod(2, k, p, pinv) != 1)
723
// 2^k != 1 mod p, so we use the faster version
724
return _bern_modp_pow2(p, pinv, k);
725
else
726
// forced to use slower version
727
return _bern_modp_powg(p, pinv, k);
728
}
729
730
731
732
/******************************************************************************
733
734
Main bern_modp() routine
735
736
******************************************************************************/
737
738
long bern_modp(long p, long k)
739
{
740
assert(k >= 0);
741
assert(2 <= p && p < NTL_SP_BOUND);
742
743
// B_0 = 1
744
if (k == 0)
745
return 1;
746
747
// B_1 = -1/2 mod p
748
if (k == 1)
749
{
750
if (p == 2)
751
return -1;
752
return (p-1)/2;
753
}
754
755
// B_k = 0 for odd k >= 3
756
if (k & 1)
757
return 0;
758
759
// denominator of B_k is always divisible by 6 for k >= 2
760
if (p <= 3)
761
return -1;
762
763
// use Kummer's congruence (k = m mod p-1 => B_k/k = B_m/m mod p)
764
long m = k % (p-1);
765
if (m == 0)
766
return -1;
767
768
double pinv = 1 / ((double) p);
769
long x = _bern_modp(p, pinv, m); // = B_m/m mod p
770
return MulMod(x, k, p, pinv);
771
}
772
773
774
}; // end namespace
775
776
777
778
// end of file ================================================================
779
780