CoCalc -- partitions

GitHub Repository: sagemath/sagelib
Path: blob/master/sage/combinat/partitions_c.cc
⁴⁰⁶⁹ views
1
/*
2
 *      Author:     Jonathan Bober
3
 *      Version:    .6
4
 *
5
 *      This program computes p(n), the number of integer partitions of n, using Rademacher's
6
 *      formula. (See Hans Rademacher, On the Partition Function p(n),
7
 *      Proceedings of the London Mathematical Society 1938 s2-43(4):241-254; doi:10.1112/plms/s2-43.4.241,
8
 *      currently at
9
 *
10
 *      http://plms.oxfordjournals.org/cgi/content/citation/s2-43/4/241
11
 *
12
 *      if you have access.)
13
 *
14
 *      We use the following notation:
15
 *
16
 *      p(n) = lim_{n --> oo} t(n,N)
17
 *
18
 *      where
19
 *
20
 *      t(n,N) = sum_{k=1}^N a(n,k) f_n(k),
21
 *
22
 *      where
23
 *
24
 *      a(n,k) = sum_{h=1, (h,k) = 1}^k exp(\pi i s(h,k) - 2 \pi i h  n / k)
25
 *
26
 *      and
27
 *      
28
 *      f_n(k) = \pi sqrt{2} cosh(A_n/(sqrt{3}*k))/(B_n*k) - sinh(C_n/k)/D_n;
29
 *
30
 *      where
31
 *
32
 *      s(h,k) = \sum_{j=1}^{k-1}(j/k)((hj/k))
33
 *
34
 *      A_n = sqrt{2} \pi * sqrt{n - 1/24}
35
 *      B_n = 2 * sqrt{3} * (n - 1/24)
36
 *      C_n = sqrt{2} * \pi * sqrt{n - 1.0/24.0} / sqrt{3}
37
 *      D_n = 2 * (n - 1/24) * sqrt{n - 1.0/24.0}
38
 *
39
 *      and, finally, where ((x)) is the sawtooth function ((x)) = {x} - 1/2 if x is not an integer, 0 otherwise.
40
 *
41
 *      Some clever tricks are used in the computation of s(h,k), and perhaps at least
42
 *      some of these are due to Apostol. (I don't know a reference for this.)
43
 *
44
 *      TODO:
45
 *
46
 *          -- Search source code for other TODO comments.
47
 *
48
 *      OTHER CREDITS:
49
 *      
50
 *      I looked source code written by Ralf Stephan, currently available at
51
 *
52
 *              http://www.ark.in-berlin.de/part.c
53
 *
54
 *      while writing this code, but didn't really make use of it, except for the
55
 *      function cospi(), currently included in the source (slightly modified), but not
56
 *      used.
57
 *
58
 *      More useful were notes currently available at
59
 *
60
 *              http://www.ark.in-berlin.de/part.pdf
61
 *
62
 *      and others at
63
 *
64
 *              http://www.math.uwaterloo.ca/~dmjackso/CO630/ptnform1.pdf
65
 *
66
 *      Also, it is worth noting that the code for GCD(), while trivial,
67
 *      was directly copied from the NTL source code.
68
 *
69
 *      Also, Bill Hart made some comments about ways to speed up this computation on the SAGE
70
 *      mailing list.
71
 *
72
 *      This program is free software; you can redistribute it and/or modify
73
 *      it under the terms of the GNU General Public License as published by
74
 *      the Free Software Foundation; either version 2 of the License, or
75
 *      (at your option) any later version.
76
 *
77
 *      This program is distributed in the hope that it will be useful,
78
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
79
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
80
 *      GNU General Public License for more details.
81
 *
82
 *      You should have received a copy of the GNU General Public License
83
 *      along with this program.  If not, see <http://www.gnu.org/licenses/>. 
84
 */
85

86

87
#if defined(__sun) || defined(__CYGWIN__)
88
extern "C" long double fabsl (long double);
89
extern "C" long double sinl (long double);
90
extern "C" long double cosl (long double);
91
extern "C" long double sqrtl (long double);
92
extern "C" long double coshl (long double);
93
extern "C" long double sinhl (long double);
94
#endif
95

96
#include <stdio.h>
97
#include <cfloat>
98
#include <time.h>
99

100
#include <cmath>
101
#include <cstdlib>
102
#include <cstring>
103

104
#include <iostream>
105
#include <iomanip>
106
#include <limits>
107

108
#include <mpfr.h>
109
#include <gmp.h>
110

111
using namespace std;
112

113
using std::cout;
114
using std::endl;
115

116
/*****************************************************************************
117
 *
118
 *      We begin be declaring all the constant global variables.
119
 *
120
 ****************************************************************************/
121

122
// First, some variables that can be set to have the program
123
// output some information that might be useful for debugging.
124

125
const bool debug = false;                                       // If true, output some random stuff
126

127
const bool debug_precision = false;                             // If true, output information that might be useful for
128
                                                                // debugging the precision setting code.
129
                                        
130
const bool debugs = false;                                      // If true, output informaiton that might be useful for
131
                                                                // debugging the various s() functions.
132
const bool debugf = false;                                      // Same for the f() functions.
133
const bool debuga = false;                                      // Same for the a() functions.
134
const bool debugt = false;                                      // Same for the t() functions.
135

136
#if FLT_RADIX != 2
137
#error I don't know what to do when the float radix is not 2.
138
#endif
139
// Note - it might be unreasonable to not support a float radix other than
140
// 2, but apparently gcc doesn't either. See http://gcc.gnu.org/ml/fortran/2006-12/msg00032.html.
141

142

143
const unsigned int min_precision = DBL_MANT_DIG;                            // The minimum precision that we will ever use.
144
const unsigned int double_precision = DBL_MANT_DIG;                         // The assumed precision of a double.
145

146

147
#if defined(__sparc) || defined(__CYGWIN__)  || defined(__FreeBSD__)
148
// On sparc solaris long double is bad/broken/different, etc.  E.g.,
149
// LDBL_MANT_DIG is 113 rather than 106, which causes all kinds of trouble.
150
// So we only use double_precision.
151
const unsigned int long_double_precision = double_precision;
152
#else
153
const unsigned int long_double_precision = (LDBL_MANT_DIG == 106) ? double_precision : LDBL_MANT_DIG;
154
#endif
155
                                                                            // The assumed precision of a long double.
156
                                                                            // Note: On many systems double_precision = long_double_precision. This is OK, as
157
                                                                            // the long double stage of the computation will just be skipped.
158
                                                                            // 
159
                                                                            // NOTE: If long_double_precision > dd_precision, then, again, long doubles
160
                                                                            //      will not ever be used. It would be nice if this were fixed.
161

162

163
// Second, some constants that control the precision at which we compute.
164

165
// When we compute the terms of the Rademacher series, we start by computing with
166
// mpfr_t variables. But for efficiency we switch to special purpose code when
167
// we don't need as much precision. These constants control the various
168
// precision levels at which we switch to different special
169
// purpose functions.
170

171
// const unsigned int level_one_precision = infinity        // We don't actually use this, but if we did it would be infinite.
172
const unsigned int level_two_precision = long_double_precision;
173
const unsigned int level_three_precision = long_double_precision;
174
const unsigned int level_four_precision = long_double_precision;
175
const unsigned int level_five_precision = double_precision;
176

177

178
const long double ld_pi = 3.141592653589793238462643L;
179
const double d_pi = ld_pi;
180

181
// Third, the rounding mode for mpfr.
182
const mp_rnd_t round_mode = GMP_RNDN;
183

184
/*****************************************************************************
185
 *
186
 *  We are finished declaring constants, and next declare semi-constant
187
 *  variables. These are all set just once per call to part(n).
188
 *
189
 *  All of these are set in initialize_constants(), and the mpfr_t variables
190
 *  are cleared in clear_constants().
191
 *
192
 ****************************************************************************/
193

194
mpfr_t mp_one_over_12, mp_one_over_24, mp_sqrt2, mp_sqrt3, mp_pi, half, fourth;         // These need to be set at run time
195
                                                                                        // because we don't know how much precision we will need
196
                                                                                        // until we know for what n we are computing p(n).
197

198
mpfr_t mp_A, mp_B, mp_C, mp_D;                                                          // These "constants" all depend on n, and 
199
double d_A, d_B, d_C, d_D;                                                              //
200
long double ld_A, ld_B, ld_C, ld_D;                                                     //
201

202

203
/*****************************************************************************
204
 *
205
 * We next declare the variables that actually vary. It is cumbersome to have
206
 * so many global variables, but it is somewhat necessary since we want to call
207
 * the functions mpz_init(), mpq_init(), mpfr_init(), and the corresponding
208
 * clear() functions as little as possible.
209
 *
210
 ****************************************************************************/
211

212
mpz_t ztemp1;                                                                   // These are initialized in the function initialize_mpz_and_mpq_variables(), and
213
mpq_t qtemps, qtempa, qtempa2;                                                  // cleared in the function clear_mpz_and_mpq_variables().
214
                                                                                // qtemps is used by q_s() and qtempa and qtempa2() are used by mp_a().
215
                                                                                //
216
                                                                                // ztemp1 is only used in one function, and the code where is it used is actually unlikely
217
                                                                                // to be used for any input, so it should be erased.
218

219
mpfr_t tempa1, tempa2, tempf1, tempf2, temps1, temps2;                          // These are all initialized by initialize_mpfr_variables(), and cleared by
220
                                                                                // clear_mpfr_variables().
221
                                                                                // NAMING CONVENTIONS:
222
                                                                                //
223
                                                                                // -tempa1 and tempa2 are two variables available for use
224
                                                                                //      in the function mp_a()
225
                                                                                //
226
                                                                                // -tempf1 and tempf2 are two variables available for use
227
                                                                                //      in the function mp_f()
228
                                                                                //
229
                                                                                // -etc...
230

231
mpfr_t tempc1, tempc2;                                                          // temp variables used by cospi()
232

233

234
/*****************************************************************************
235
 *
236
 * End of Global Variables, beginning of function declarations
237
 *
238
 ****************************************************************************/
239

240
// 
241
// A listing of the main functions, in "conceptual" order.
242
//
243

244
int part(mpz_t answer, unsigned int n);
245

246
static void mp_t(mpfr_t result, unsigned int n);                                // Compute t(n,N) for an N large enough, and with a high enough precision, so that
247
                                                                                // |p(n) - t(n,N)| < .5
248

249
static unsigned int compute_initial_precision(unsigned int n);                  // computes the precision required to accurately compute p(n)
250

251
static void initialize_constants(unsigned int prec, unsigned int n);            // Once we know the precision that we will need, we precompute
252
                                                                                // some commonly used constants.
253

254
static unsigned int compute_current_precision(unsigned int n, unsigned int N, unsigned int extra);  // Computed the precision required to
255
                                                                                // accurately compute the tail of the rademacher series
256
                                                                                // assuming that N terms have already been computed.
257
                                                                                // This is called after computing each summand, unless
258
                                                                                // we have already reached the minimum precision.
259
                                                                                //
260
                                                                                // Reducing the precision as fast as possible is key to
261
                                                                                // fast performance.
262

263
static double compute_remainder(unsigned int n, unsigned int N);                // Gives an upper bound on the error that occurs
264
                                                                                // when only N terms of the Rademacher series have been
265
                                                                                // computed.
266
                                                                                //
267
                                                                                // This should only be called when we know that compute_current_precision
268
                                                                                // will return min_precision. Otherwise the error will be too large for
269
                                                                                // it to compute.
270

271

272
//
273
// The following functions are all called in exactly one place, so
274
// we could ask the compiler to inline everything. Actually making this
275
// work properly requires changing some compiler options, and doesn't
276
// provide much improvement, however.
277
//
278

279
static void mp_f(mpfr_t result, unsigned int k);                                // See introduction for an explanation of these functions.
280
static void q_s(mpq_t result, unsigned int h, unsigned int k);                  //
281
static void mp_a(mpfr_t result, unsigned int n, unsigned int k);                //
282

283
template <class T> static inline T a(unsigned int n, unsigned int k);           // Template versions of the above functions for computing with
284
template <class T> static inline T f(unsigned int k);                           // low precision. Currently used for computing with qd_real, dd_real,
285
template <class T> static inline T s(unsigned int h, unsigned int k);           // long double, and double.
286

287

288
//
289
// The following are a bunch of "fancy macros" designed so that in the templated code
290
// for a, for example, when we need to use pi, we just call pi<T>() to get pi to
291
// the proper precision. The compiler should inline these, so using one of these
292
// functions is just as good as using a constant, and since these functions are static
293
// they shouldn't even appear in the object code generated.
294

295
template <class T> static inline T pi(){ return ld_pi; }
296

297
template <class T> static inline T sqrt2() {return sqrt(2.0L);}
298

299
template <class T> static inline T sqrt3() {return sqrt(3.0L);}
300

301
template <class T> static inline T A() {return 1;}
302
template <> inline double A() {return d_A;}
303
template <> inline long double A() {return ld_A;}
304

305
template <class T> static inline T B() {return 1;}
306
template <> inline double B() {return d_B;}
307
template <> inline long double B() {return ld_B;}
308

309
template <class T> static inline T C() {return 1;}
310
template <> inline double C() {return d_C;}
311
template <> inline long double C() {return ld_C;}
312

313
template <class T> static inline T D() {return 1;}
314
template <> inline double D() {return d_D;}
315
template <> inline long double D() {return ld_D;}
316

317
template <class T> static inline T pi_sqrt2() {return 1;}
318
template <> inline double pi_sqrt2() {return d_pi * sqrt(2.0);}
319
template <> inline long double pi_sqrt2() {return ld_pi * sqrt(2.0L);}
320

321
template <class T> static inline T one_over_12() {return 1;}
322
template <> inline double one_over_12() {return 1.0/12.0;}
323
template <> inline long double one_over_12() {return 1.0L/12.0L;}
324

325
// A few utility functions...
326

327
static inline long GCD(long a, long b);
328
int test(bool longtest = false, bool forever = false);                          // Runs a bunch of tests to make sure
329
                                                                                // that we are getting the right answers.
330
                                                                                // Tests are based on a few "known" values that
331
                                                                                // have been verified by other programs, and
332
                                                                                // also on known congruences for p(n)
333

334
static void cospi (mpfr_t res, mpfr_t x);                                       // Puts cos(pi x) in result. This is not currently    
335
                                                                                // used, but should be faster than using mpfr_cos,
336
                                                                                // according to comments in the file part.c
337
                                                                                // mentioned in the introduction.   
338
                                                                                // test
339

340
static int grab_last_digits(char * output, int n, mpfr_t x);                    // Might be useful for debugging, but
341
                                                                                // don't use it for anything else.
342
                                                                                // (See function definition for more information.)
343

344

345
int main(int argc, char *argv[]);                                               // This program is mainly meant for inclusion
346
                                                                                // in SAGE (or any other program, if anyone
347
                                                                                // feels like it). We include a main() function
348
                                                                                // anyway, because it is useful to compile
349
                                                                                // this as a standalone program
350
                                                                                // for testing purposes.
351

352
/***********************************************************************
353
 *
354
 * That should be the end of both function and variable definitions.
355
 *
356
 **********************************************************************/
357

358
// The following function can be useful for debugging in come circumstances, but should not be used for anything else 
359
// unless it is rewritten.
360
int grab_last_digits(char * output, int n, mpfr_t x) {
361
    // fill output with the n digits of x that occur
362
    // just before the decimal point
363
    // Note: this assumes that x has enough digits and enough
364
    // precision -- otherwise bad things can happen
365
    //
366
    // returns: the number of digits to the right of the decimal point
367

368
    char * temp;
369
    mp_exp_t e;
370

371
    temp = mpfr_get_str(NULL, &e, 10, 0, x, GMP_RNDN);
372

373
    int retval;
374

375
    if(e > 0) {
376
        strncpy(output, temp + e - n, n);    
377
        retval =  strlen(temp + e);
378
    }
379
    else {
380
        for(int i = 0; i < n; i++)
381
            output[i] = '0';
382
        retval = strlen(temp);
383
    }
384
    output[n] = '\0';
385

386
    mpfr_free_str(temp);
387

388
    return retval;
389
}
390

391

392

393
unsigned int compute_initial_precision(unsigned int n) {
394
    // We just want to know how many bits we will need to
395
    // compute to get an accurate answer.
396
    
397
    // We know that
398
    //
399
    //          p(n) ~ exp(pi * sqrt(2n/3))/(4n sqrt(3)),
400
    //
401
    // so for now we are assuming that p(n) < exp(pi * sqrt(2n/3))/n,
402
    // so we need pi*sqrt(2n/3)/log(2) - log(n)/log(2) + EXTRA bits.
403
    //
404
    // EXTRA should depend on n, and should be something that ensures
405
    // that the TOTAL ERROR in all computations is < (something small).
406
    // This needs to be worked out carefully. EXTRA = log(n)/log(2) + 3
407
    // is probably good enough, and is convenient...
408
    // 
409
    // but we really need:
410
    //
411
    //                  p(n) < something 
412
    //
413
    // to be sure that we compute the correct answer
414

415
    unsigned int result = (unsigned int)(ceil(3.1415926535897931 * sqrt(2.0 * double(n)/ 3.0) / log(2))) + 3;
416
    if(debug) cout << "Using initial precision of " << result << " bits." << endl;
417
    
418
    if(result > min_precision) {
419
        return result;
420
    }
421
    
422
    else return min_precision;
423

424
}
425

426
unsigned int compute_current_precision(unsigned int n, unsigned int N, unsigned int extra = 0) {
427
    // Roughly, we compute
428
    // 
429
    //      log(A/sqrt(N) + B*sqrt(N/(n-1))*sinh(C * sqrt(n) / N) / log(2)
430
    //
431
    // where A, B, and C are the constants listed below. These error bounds
432
    // are given in the paper by Rademacher listed at the top of this file.
433
    // We then return this + extra, if extra != 0. If extra == 0, return with
434
    // what is probably way more extra precision than is needed.
435
    // 
436
    // extra should probably have been set by a call to compute_extra_precision()
437
    // before this function was called.
438

439

440
    // n = the number for which we are computing p(n)
441
    // N = the number of terms that have been computed so far
442

443
    // if N is 0, then we can't use the above formula (because we would be
444
    // dividing by 0).
445
    if(N == 0) return compute_initial_precision(n) + extra;
446

447
    mpfr_t A, B, C;
448
    mpfr_init2(A, 32);
449
    mpfr_init2(B, 32);
450
    mpfr_init2(C, 32);
451
    
452
    mpfr_set_d(A,1.11431833485164,round_mode);
453
    mpfr_set_d(B,0.059238439175445,round_mode);
454
    mpfr_set_d(C,2.5650996603238,round_mode);
455

456
    mpfr_t error, t1, t2;
457
    mpfr_init2(error, 32);                                // we shouldn't need much precision here since we just need the most significant bit
458
    mpfr_init2(t1, 32);
459
    mpfr_init2(t2, 32);
460

461
    mpfr_set(error, A, round_mode);                       // error = A
462
    mpfr_sqrt_ui(t1, N, round_mode);                      // t1 = sqrt(N)
463
    mpfr_div(error, error, t1, round_mode);               // error = A/sqrt(N)
464

465

466
    mpfr_sqrt_ui(t1, n, round_mode);                      // t1 = sqrt(n)
467
    mpfr_mul(t1, t1, C, round_mode);                      // t1 = C * sqrt(n)
468
    mpfr_div_ui(t1, t1, N, round_mode);                   // t1 = C * sqrt(n) / N
469
    mpfr_sinh(t1, t1, round_mode);                        // t1 = sinh( ditto )
470
    mpfr_mul(t1, t1, B, round_mode);                      // t1 = B * sinh( ditto )
471

472
    mpfr_set_ui(t2, N, round_mode);                       // t2 = N
473
    mpfr_div_ui(t2, t2, n-1, round_mode);                 // t2 = N/(n-1)
474
    mpfr_sqrt(t2, t2, round_mode);                        // t2 = sqrt( ditto )
475
    
476
    mpfr_mul(t1, t1, t2, round_mode);                     // t1 = B * sqrt(N/(n-1)) * sinh(C * sqrt(n)/N)
477

478
    mpfr_add(error, error, t1, round_mode);               // error = (ERROR ESTIMATE)
479
                                                         
480
    unsigned int p = mpfr_get_exp(error);                 // I am not 100% certain that this always does the right thing.
481
                                                          // (It should be the case that p is now the number of bits
482
                                                          // required to hold the integer part of the error.)
483
    
484

485
    if(extra == 0) {
486
        p = p + (unsigned int)ceil(log(n)/log(2));        // This is a stupid case to fall back on,
487
                                                          // left over from earlier versions of this code.
488
                                                          // It really should never be used.
489
    }
490
    else {
491
        p = p + extra;                                    // Recall that the extra precision should be
492
                                                          // large enough so that the accumulated errors
493
                                                          // in all of the computations that we make
494
                                                          // are not big enough to matter.
495
    }
496

497
    if(debug) {
498
        cout << "Error seems to be: ";
499
        mpfr_out_str(stdout, 10, 0, error, round_mode);
500
        cout << endl;
501
        cout.flush();
502
        cout << "Switching to precision of " << p << " bits. " << endl;
503
    }
504

505
    mpfr_clear(error);
506
    mpfr_clear(t1);
507
    mpfr_clear(t2);
508
    mpfr_clear(A);
509
    mpfr_clear(B);
510
    mpfr_clear(C);
511

512

513
    if(p > min_precision) {
514
        return p;                                         // We don't want to return < min_precision.
515
                                                          // Note that when the code that calls this
516
                                                          // function finds that the returned result
517
                                                          // is min_precision, it should stop calling
518
                                                          // this function, since the result won't change
519
                                                          // after that. Also, it should probably switch
520
                                                          // to computations with doubles, and should
521
                                                          // start calling compute_remainder().
522
    }
523
    return min_precision;
524
}
525

526
int compute_extra_precision(unsigned int n, double error = .25) {
527
    // Return the number of terms of the Rachemacher series that
528
    // we will need to compute to get a remainder of less than error
529
    // in absolute value, and then return the extra precision
530
    // that will guarantee that the accumulated error after computing
531
    // that number of steps will be less than .5 - error.
532
    // 
533
    // How this works:
534
    // We first need to figure out how many terms of the series we are going to
535
    // need to compute. That is, we need to know how large k needs to be
536
    // for compute_remainder(n,k) to return something smaller than error. There
537
    // might be a clever way to do this, but instead we just keep calling
538
    // compute_current_precision() until we know that the error will be
539
    // small enough to call compute_remainder(). Then we just call compute_remainder()
540
    // until the error is small enough.
541
    // 
542
    // Now that we know how many terms we will need to compute, k, we compute
543
    // the number of bits required to accurately store (.5 - error)/k. This ensures
544
    // that the total error introduced when we add up all k terms of the sum
545
    // will be less than (.5 - error). This way, if everything else works correctly,
546
    // then the sum will be within .5 of the correct (integer) answer, and we
547
    // can correctly find it by rounding.
548
    unsigned int k = 1;
549
    for( ; compute_current_precision(n,k,0) > double_precision; k += 100) {
550
    }
551

552
    for( ; compute_remainder(n, k) > error ; k += 100)  {
553
    }
554
    if(debug_precision) {
555
        cout << "To compute p(" << n << ") we will add up approximately " << k << " terms from the Rachemacher series." << endl;
556
    }
557
    int bits = (int)((log(k/(.5 - error)))/log(2)) + 5;   // NOTE: reducing the number of bits by 3 here is known to cause errors
558
                                                          // Why the extra 5 bits? Anytime we call a function, eg mp_a(),
559
                                                          // we end up doing a bunch of arithmetic operations, and if
560
                                                          // we want the result of those operations to be accurate
561
                                                          // within (.5 - error)/k, then we need that function to use
562
                                                          // a slightly higher working precision, which should be 
563
                                                          // independent of n.
564
                                                          // TODO:
565
                                                          // Extensive trial and error has found 3 to be the smallest value
566
                                                          // that doesn't seem to produce any wrong answers. Thus, to
567
                                                          // be safe, we use 5 extra bits.
568
                                                          // (Extensive trial and error means compiling this file to get
569
                                                          // a.out and then running './a.out testforever' for a few hours.)
570
                                                          
571
                                                          
572
                                                       
573

574
    return bits;
575
}
576

577
double compute_remainder(unsigned int n, unsigned int N) {
578
    // This computes the remainer left after N terms have been computed.
579
    // The formula is exactly the same as the one used to compute the required
580
    // precision, but once we know the necessary precision is small, we can
581
    // call this function to determine the actual error (rather than the precision).
582
    //
583
    // Generally, this is only called once we know that the necessary
584
    // precision is <= min_precision, because then the error is small
585
    // enough to fit into a double, and also, we know that we are
586
    // getting close to the correct answer.
587

588
    double A = 1.11431833485164;
589
    double B = 0.059238439175445;
590
    double C = 2.5650996603238;
591
    double result;
592
    result = A/sqrt(N) + B * sqrt(double(N)/double(n-1))*sinh(C * sqrt(double(n))/double(N));
593
    return result;
594
}
595

596

597
void initialize_mpfr_variables(unsigned int prec) {
598
    //
599
    // Clear and initialize some "temp" variables that are used in the computation of various functions.
600
    //
601
    // We do this in this auxilliary function (and endure the pain of
602
    // the extra global variables) so that we only have to initialize/clear
603
    // these variables once every time the precision changes.
604
    //
605
    // NAMING CONVENTIONS:
606
    //
607
    // -tempa1 and tempa2 are two variables available for use
608
    //      in the function mp_a()
609
    //
610
    // -tempf1 and tempf2 are two variables available for use
611
    //      in the function mp_f()
612
    //
613
    // -etc...
614
    //
615
    // NOTE: Calls to this function must be paired with calls to clear_mpfr_variables()
616

617
    mpfr_init2(tempa1, prec);
618
    mpfr_init2(tempa2, prec);
619
    mpfr_init2(tempf1, prec);
620
    mpfr_init2(tempf2, prec);
621
    mpfr_init2(temps1, prec);
622
    mpfr_init2(temps2, prec);
623
    mpfr_init2(tempc1, prec);
624
    mpfr_init2(tempc2, prec);
625
}
626

627
void clear_mpfr_variables() {
628
    mpfr_clear(tempa1);
629
    mpfr_clear(tempa2);
630
    mpfr_clear(tempf1);
631
    mpfr_clear(tempf2);
632
    mpfr_clear(temps1);
633
    mpfr_clear(temps2);
634

635
    mpfr_clear(tempc1);
636
    mpfr_clear(tempc2);
637
}
638

639

640
void initialize_constants(unsigned int prec, unsigned int n) {
641
    // The variables mp_A, mp_B, mp_C, and mp_D are used for
642
    // A_n, B_n, C_n, and D_n listed at the top of this file.
643
    //
644
    // They depend only on n, so we compute them just once in this function,
645
    // and then use them many times elsewhere.
646
    //
647
    // Also, we precompute some extra constants that we use a lot, such as
648
    // sqrt2, sqrt3, pi, 1/24, 1/12, etc.
649
    //
650
    // NOTE: Calls to this function must be paired with calls to clear_constants()
651
    static bool init = false;
652
    mp_prec_t p = prec;
653
    
654
    mpfr_init2(mp_one_over_12,p); mpfr_init2(mp_one_over_24,p); mpfr_init2(mp_sqrt2,p); mpfr_init2(mp_sqrt3,p); mpfr_init2(mp_pi,p);
655
    mpfr_init2(mp_A,p); mpfr_init2(mp_B,p); mpfr_init2(mp_C,p); mpfr_init2(mp_D,p); mpfr_init2(fourth, p); mpfr_init2(half, p);
656
    
657
    init = true;
658
    
659
    mpfr_set_ui(mp_one_over_12, 1, round_mode);                             // mp_one_over_12 = 1/12
660
    mpfr_div_ui(mp_one_over_12, mp_one_over_12, 12, round_mode);            //
661

662
    mpfr_set_ui(mp_one_over_24, 1, round_mode);                             // mp_one_over_24 = 1/24
663
    mpfr_div_ui(mp_one_over_24, mp_one_over_24, 24, round_mode);            //
664

665
    mpfr_set_ui(half, 1, round_mode);                                       //
666
    mpfr_div_ui(half, half, 2, round_mode);                                 // half = 1/2
667
    mpfr_div_ui(fourth, half, 2, round_mode);                               // fourth = 1/4
668

669
    mpfr_t n_minus;                                                         //
670
    mpfr_init2(n_minus, p);                                                 //
671
    mpfr_set_ui(n_minus, n, round_mode);                                    // n_minus = n
672
    mpfr_sub(n_minus, n_minus, mp_one_over_24,round_mode);                  // n_minus = n - 1/24
673

674
    mpfr_t sqrt_n_minus;                                                    //
675
    mpfr_init2(sqrt_n_minus, p);                                            //
676
    mpfr_sqrt(sqrt_n_minus, n_minus, round_mode);                           // n_minus = sqrt(n-1/24)
677

678

679
    mpfr_sqrt_ui(mp_sqrt2, 2, round_mode);                                  // mp_sqrt2 = sqrt(2)
680
    mpfr_sqrt_ui(mp_sqrt3, 3, round_mode);                                  // mp_sqrt3 = sqrt(3)
681
    mpfr_const_pi(mp_pi, round_mode);                                       // mp_pi = pi
682

683
    //mp_A = sqrt(2) * 3.1415926535897931 * sqrt(n - 1.0/24.0);---------------
684
    mpfr_set(mp_A, mp_sqrt2, round_mode);                                   // mp_A = sqrt(2)
685
    mpfr_mul(mp_A, mp_A, mp_pi, round_mode);                                // mp_A = sqrt(2) * pi
686
    mpfr_mul(mp_A, mp_A, sqrt_n_minus, round_mode);                         // mp_A = sqrt(2) * pi * sqrt(n - 1/24)
687
    //------------------------------------------------------------------------
688

689

690
    //mp_B = 2.0 * sqrt(3) * (n - 1.0/24.0);----------------------------------
691
    mpfr_set_ui(mp_B, 2, round_mode);                                       // mp_A = 2
692
    mpfr_mul(mp_B, mp_B, mp_sqrt3, round_mode);                             // mp_A = 2*sqrt(3)
693
    mpfr_mul(mp_B, mp_B, n_minus, round_mode);                              // mp_A = 2*sqrt(3)*(n-1/24)
694
    //------------------------------------------------------------------------
695

696

697
    //mp_C = sqrt(2) * pi * sqrt(n - 1.0/24.0) / sqrt(3);---------------------
698
    mpfr_set(mp_C, mp_sqrt2, round_mode);                                   // mp_C = sqrt(2)
699
    mpfr_mul(mp_C, mp_C, mp_pi, round_mode);                                // mp_C = sqrt(2) * pi
700
    mpfr_mul(mp_C, mp_C, sqrt_n_minus, round_mode);                         // mp_C = sqrt(2) * pi * sqrt(n - 1/24)
701
    mpfr_div(mp_C, mp_C, mp_sqrt3, round_mode);                             // mp_C = sqrt(2) * pi * sqrt(n - 1/24) / sqrt3
702
    //------------------------------------------------------------------------
703

704
    
705
    //mp_D = 2.0 * (n - 1.0/24.0) * sqrt(n - 1.0/24.0);-----------------------
706
    mpfr_set_ui(mp_D, 2, round_mode);                                       // mp_D = 2
707
    mpfr_mul(mp_D, mp_D, n_minus, round_mode);                              // mp_D = 2 * (n - 1/24)
708
    mpfr_mul(mp_D, mp_D, sqrt_n_minus, round_mode);                         // mp_D = 2 * (n - 1/24) * sqrt(n - 1/24)
709
    //------------------------------------------------------------------------
710

711
    mpfr_clear(n_minus);
712
    mpfr_clear(sqrt_n_minus);
713

714

715
    // some double precision versions of the above values
716

717
    d_A = sqrt(2) * d_pi * sqrt(n - 1.0/24.0);
718
    d_B = 2.0 * sqrt(3) * (n - 1.0/24.0);
719
    d_C = sqrt(2) * d_pi * sqrt(n - 1.0/24.0) / sqrt(3);
720
    d_D = 2.0 * (n - 1.0/24.0) * sqrt(n - 1.0/24.0);
721

722
    ld_A = sqrtl(2) * ld_pi * sqrtl(n - 1.0L/24.0L);
723
    ld_B = 2.0L * sqrtl(3) * (n - 1.0/24.0);
724
    ld_C = sqrtl(2) * ld_pi * sqrtl(n - 1.0L/24.0L) / sqrtl(3);
725
    ld_D = 2.0L * (n - 1.0L/24.0L) * sqrtl(n - 1.0L/24.0L);
726

727
}
728

729
void clear_constants() {
730
        mpfr_clear(mp_one_over_12); mpfr_clear(mp_one_over_24); mpfr_clear(mp_sqrt2); mpfr_clear(mp_sqrt3); mpfr_clear(mp_pi);
731
        mpfr_clear(mp_A); mpfr_clear(mp_B); mpfr_clear(mp_C); mpfr_clear(mp_D); mpfr_clear(half); mpfr_clear(fourth);
732
}
733

734
void initialize_mpz_and_mpq_variables() {
735
    /* 
736
     * We use a few mpz_t and mpq_t variables which need to be initialized
737
     * before they can be used. Initialization and clearing take some
738
     * time, so we initialize just once in this function, and clear in another.
739
     */
740
    mpz_init(ztemp1);
741

742
    mpq_init(qtemps);
743
    mpq_init(qtempa);
744
    mpq_init(qtempa2);
745
}
746

747
void clear_mpz_and_mpq_variables() {
748
    mpz_clear(ztemp1);
749

750
    mpq_clear(qtemps);
751
    mpq_clear(qtempa);
752
    mpq_clear(qtempa2);
753
}
754

755
void mp_f(mpfr_t result, unsigned int k) {
756
    // compute f_n(k) as described in the introduction
757
    //
758
    // notice that this doesn't use n - the "constants"
759
    // A, B, C, and D depend on n, but they are precomputed
760
    // once before this function is called.
761
    
762
    //result =  pi * sqrt(2) * cosh(A/(sqrt(3)*k))/(B*k) - sinh(C/k)/D;
763

764
                                                                    //
765
    mpfr_set(result, mp_pi, round_mode);                            // result = pi
766
    
767
    mpfr_mul(result, result, mp_sqrt2, round_mode);                 // result = sqrt(2) * pi
768
                                                                    //
769
    mpfr_div(tempf1, mp_A, mp_sqrt3, round_mode);                   // temp1 = mp_A/sqrt(3)
770
    mpfr_div_ui(tempf1, tempf1, k, round_mode);                     // temp1 = mp_A/(sqrt(3) * k)
771
    mpfr_cosh(tempf1, tempf1, round_mode);                          // temp1 = cosh(mp_A/(sqrt(3) * k))
772
    mpfr_div(tempf1, tempf1, mp_B, round_mode);                     // temp1 = cosh(mp_A/(sqrt(3) * k))/mp_B
773
    mpfr_div_ui(tempf1, tempf1, k, round_mode);                     // temp1 = cosh(mp_A/(sqrt(3) * k))/(mp_B*k)
774
                                                                    //
775
    mpfr_mul(result, result, tempf1, round_mode);                   // result = sqrt(2) * pi * cosh(mp_A/(sqrt(3) * k))/(mp_B*k)
776
                                                                    //
777
    mpfr_div_ui(tempf1, mp_C, k, round_mode);                       // temp1 = mp_C/k
778
    mpfr_sinh(tempf1, tempf1, round_mode);                          // temp1 = sinh(mp_C/k)
779
    mpfr_div(tempf1, tempf1, mp_D, round_mode);                     // temp1 = sinh(mp_C/k)/D
780
                                                                    //
781
    mpfr_sub(result, result, tempf1, round_mode);                   // result = RESULT!
782
                                                                    //
783
    //return pi * sqrt(2) * cosh(A/(sqrt(3)*k))/(B*k) - sinh(C/k)/D;
784
}
785

786
// call the following when 4k < sqrt(UINT_MAX)
787
//
788
// TODO: maybe a faster version of this can be written without using mpq_t,
789
//       or maybe this version can be smarter.
790
//
791
//       The actual value of the cosine that we compute using s(h,k)
792
//       only depends on {s(h,k)/2}, that is, the fractional
793
//       part of s(h,k)/2. It may be possible to make use of this somehow.
794
//
795
void q_s(mpq_t result, unsigned int h, unsigned int k) {
796
    
797
    if(k < 3) {
798
        mpq_set_ui(result, 0, 1);
799
        return;
800
    }
801

802
    if (h == 1) {
803
        unsigned int d = GCD( (k-1)*(k-2), 12*k);
804
        if(d > 1) {
805
            mpq_set_ui(result, ((k-1)*(k-2))/d, (12*k)/d);
806
        }
807
        else {
808
            mpq_set_ui(result, (k-1)*(k-2), 12*k);
809
        }
810
        return;
811
    }
812
    // TODO:
813
    // It may be advantageous to implement some of the special forms in the comments below,
814
    // and also some more listed in some of the links mentioned in the introduction, but
815
    // it seems like there might not be much speed benefit to this, and putting in too
816
    // many seems to slow things down a little.
817
    //
818

819
    // if h = 2 and k is odd, we have
820
    // s(h,k) = (k-1)*(k-5)/24k
821
    //if(h == 2 && k > 5 && k % 2 == 1) {
822
    //    unsigned int d = GCD( (k-1)*(k-5), 24*k);
823
    //    if(d > 1) {
824
    //        mpq_set_ui(result, ((k-1)*(k-5))/d, (24*k)/d);
825
    //    }
826
    //    else {
827
    //        mpq_set_ui(result, (k-1)*(k-5), 24*k);
828
    //    }
829
    //    return;
830
    //}
831

832
/*    
833

834
    // if k % h == 1, then
835
    //
836
    //      s(h,k) = (k-1)(k - h^2 - 1)/(12hk)
837
    //
838

839
    // We need to be a little careful here because k - h^2 - 1 can be negative.
840
    if(k % h == 1) {
841
        int num = (k-1)*(k - h*h - 1);
842
        int den = 12*k*h;
843
        int d = GCD(num, den);
844
        if(d > 1) {
845
            mpq_set_si(result, num/d, den/d);
846
        }
847
        else {
848
            mpq_set_si(result, num, den);
849
        }
850
        return;
851
    }
852

853
    // if k % h == 2, then
854
    //
855
    //      s(h,k) = (k-2)[k - .5(h^2 + 1)]/(12hk)
856
    //
857

858
    //if(k % h == 2) {
859
    //}
860
*/
861
    
862

863

864
    
865
    mpq_set_ui(result, 0, 1);                             // result = 0
866

867
    int r1 = k;
868
    int r2 = h;
869

870
    int n = 0;
871
    int temp3;
872
    while(r1 > 0 && r2 > 0) {
873
        unsigned int d = GCD(r1 * r1 + r2 * r2 + 1, r1 * r2);
874
        if(d > 1) {
875
            mpq_set_ui(qtemps, (r1 * r1 + r2 * r2 + 1)/d, (r1 * r2)/d);
876
        }
877
        else{
878
            mpq_set_ui(qtemps, r1 * r1 + r2 * r2 + 1, r1 * r2);
879
        }
880
        //mpq_canonicalize(qtemps);
881
        
882
        if(n % 2 == 0){                                             //
883
            mpq_add(result, result, qtemps);                        // result += temp1;
884
        }                                                           //
885
        else {                                                      //
886
            mpq_sub(result, result, qtemps);                        // result -= temp1;
887
        }                                                           //
888
        temp3 = r1 % r2;                                            //
889
        r1 = r2;                                                    //
890
        r2 = temp3;                                                 //
891
        n++;                                                        //
892
    }
893

894
    mpq_set_ui(qtemps, 1, 12);
895
    mpq_mul(result, result, qtemps);                                // result = result * 1.0/12.0;
896
    
897
    
898
    if(n % 2 == 1) {
899
        mpq_set_ui(qtemps, 1, 4);
900
        mpq_sub(result, result, qtemps);                            // result = result - .25;
901
    }
902

903
}
904

905

906
void mp_a(mpfr_t result, unsigned int n, unsigned int k) {
907
    // compute a(n,k)
908

909
    if (k == 1) {
910
        mpfr_set_ui(result, 1, round_mode);                         //result = 1
911
        return;
912
    }
913
    
914
    mpfr_set_ui(result, 0, round_mode);
915

916
    unsigned int h = 0;
917
    for(h = 1; h < k+1; h++) {
918
        if(GCD(h,k) == 1) {
919
            
920
            // Note that we compute each term of the summand as
921
            //      result += cos(pi * ( s(h,k) - (2.0 * h * n)/k) );
922
            //
923
            // This is the real part of the exponential that was written
924
            // down in the introduction, and we don't need to compute
925
            // the imaginary part because we know that, in the end, the
926
            // imaginary part will be 0, as we are computing an integer.
927
            
928
            q_s(qtempa, h, k);
929
              
930
            //mpfr_mul_q(tempa1, mp_pi, qtempa, round_mode);
931
            //mpfr_mul_ui(tempa1, tempa1, k * k, round_mode);
932

933
            //mpfr_set_q(tempa1, qtempa, round_mode);
934
            unsigned int r = n % k;                                     // here we make use of the fact that the 
935
            unsigned int d = GCD(r,k);                                  // cos() term written above only depends
936
            unsigned int K;                                             // on {hn/k}.
937
            if(d > 1) {
938
                r = r/d;
939
                K = k/d;
940
            }
941
            else {
942
                K = k;
943
            }
944
            if(K % 2 == 0) {
945
                K = K/2;
946
            }
947
            else {
948
                r = r * 2;
949
            }
950
            mpq_set_ui(qtempa2, h*r, K);
951
            mpq_sub(qtempa, qtempa, qtempa2);
952
            
953
            //mpfr_set_q(tempa2, qtempa, round_mode);                   // This might be faster, according to
954
            //cospi(tempa1, tempa2);                                    // the comments in Ralf Stephan's part.c, but
955
                                                                        // I haven't noticed a significant speed up.
956
                                                                        // (Perhaps a different version that takes an mpq_t
957
                                                                        // as an input might be faster.)
958
            
959
            mpfr_mul_q(tempa1, mp_pi, qtempa, round_mode);
960
            mpfr_cos(tempa1, tempa1, round_mode);
961
            mpfr_add(result, result, tempa1, round_mode);
962
  
963
        }
964
        
965
    }
966

967
}
968

969
template <class T> 
970
inline T partial_sum_of_t(unsigned int n, unsigned int &k, unsigned int exit_precision, unsigned int extra_precision, double error = 0) {
971
    unsigned int current_precision = compute_current_precision(n, k - 1, extra_precision);
972
    T result = 0;
973
    if(error == 0) {
974
        for(; current_precision > exit_precision; k++) {                        // (don't change k -- it is already the right value)
975
            result += sqrt(T(int(k))) * a<T>(n,k) * f<T>(k);                    //
976
            current_precision = compute_current_precision(n,k,extra_precision); // The only reason that we compute the new precision
977
                                                                                // now is so that we know when we can change to using just doubles.
978
                                                                                // (There should be a 'long double' version of the compute_current_precision function.
979
        }
980
    }
981
    else {
982
        double remainder = 1;
983
        for(; remainder > error; k++) {                                     // (don't change k -- it is already the right value)
984
            result += sqrt(T(int(k))) * a<T>(n,k) * f<T>(k);                //
985
            remainder = compute_remainder(n,k);
986
        }
987
    }
988
    return result;
989
}
990

991

992
void mp_t(mpfr_t result, unsigned int n) {
993
    // This is the function that actually computes p(n).
994
    //
995
    // More specifically, it computes t(n,N) to within 1/2 of p(n), and then
996
    // we can find p(n) by rounding.
997
    //
998
    //
999
    // NOTE: result should NOT have been initialized when this is called, 
1000
    // as we initialize it to the proper precision in this function.
1001
    
1002
    double error = .25;
1003
    int extra = compute_extra_precision(n, error);
1004

1005
    unsigned int initial_precision = compute_initial_precision(n);  // We begin by computing the precision necessary to hold the final answer.
1006
                                                                    // and then initialize both the result and some temporary variables to that
1007
                                                                    // precision.
1008
    mpfr_t t1, t2;                                                  //
1009
    mpfr_init2(t1, initial_precision);                              //
1010
    mpfr_init2(t2, initial_precision);                              //
1011
    mpfr_init2(result, initial_precision);                          //
1012
    mpfr_set_ui(result, 0, round_mode);                             //
1013
                                                                    
1014
    initialize_mpz_and_mpq_variables();
1015
    initialize_constants(initial_precision, n);                     // Now that we have the precision information, we initialize some constants
1016
                                                                    // that will be used throughout, and also
1017
                                                                    //
1018
    initialize_mpfr_variables(initial_precision);                   // set the precision of the "temp" variables that are used in individual functions.
1019

1020
    unsigned int current_precision = initial_precision;
1021
    unsigned int new_precision;
1022
    
1023
    // We start by computing with high precision arithmetic, until
1024
    // we are sure enough that we don't need that much precision
1025
    // anymore. Once current_precision == min_precision, we drop
1026
    // out of this loop and switch to a computation
1027
    // that only involves doubles.
1028

1029

1030

1031
    unsigned int k = 1;                                             // (k holds the index of the summand that we are computing.)
1032
    for(k = 1; current_precision > level_two_precision; k++) {      //
1033
        
1034
        mpfr_sqrt_ui(t1, k, round_mode);                            // t1 = sqrt(k)
1035
                                                                    //
1036
        mp_a(t2, n, k);                                             // t2 = A_k(n)
1037
        
1038
        if(debuga) {
1039
            cout << "a(" << k <<  ") = ";
1040
            mpfr_out_str(stdout, 10, 10, t2, round_mode);
1041
            cout << endl;
1042
        }
1043
        
1044
        mpfr_mul(t1, t1, t2, round_mode);                           // t1 = sqrt(k)*A_k(n)
1045
                                                                    //
1046
        mp_f(t2, k);                                                // t2 = f_k(n)
1047

1048
        if(debugf) {
1049
            cout << "f(" << n << "," << k <<  ") = ";
1050
            mpfr_out_str(stdout, 10, 0, t2, round_mode);
1051
            cout << endl;
1052
        }
1053

1054
        mpfr_mul(t1, t1, t2, round_mode);                           // t1 = sqrt(k)*A_k(n)*f_k(n)
1055
                                                                    //
1056
        mpfr_add(result, result, t1, round_mode);                   // result += summand
1057

1058
        if(debugt) {
1059
            int num_digits = 20;
1060
            int num_extra_digits;
1061
            char digits[num_digits + 1];
1062
            num_extra_digits = grab_last_digits(digits, 5, t1);
1063
            grab_last_digits(digits, num_digits, result);
1064

1065
            mpfr_out_str(stdout, 10, 10, t1, round_mode);
1066
            cout << endl;
1067

1068
            cout << k << ": current precision:"  << current_precision << ". 20 last digits of partial result: " << digits << ". Number of extra digits: " << num_extra_digits << endl;
1069
            cout.flush();
1070

1071
        }
1072

1073
        new_precision = compute_current_precision(n,k,extra);       // After computing one summand, check what the new precision should be.
1074
        if(new_precision != current_precision) {                    // If the precision changes, we need to clear
1075
            current_precision = new_precision;                      // and reinitialize all "temp" variables to
1076
            clear_mpfr_variables();                                 // use lower precision.
1077
            initialize_mpfr_variables(current_precision);           //
1078
            mpfr_clear(t1); mpfr_clear(t2);                         //
1079
            mpfr_init2(t1, current_precision);                      //
1080
            mpfr_init2(t2, current_precision);                      //
1081
        }
1082
    }
1083

1084
    mpfr_clear(t1); mpfr_clear(t2);
1085

1086
    mpfr_init2(t1, 200);
1087
    mpfr_init2(t2, 200);
1088

1089
    long double ld_partial_sum = partial_sum_of_t<long double>(n,k,level_five_precision, extra, 0);
1090
    mpfr_set_ld(t1, ld_partial_sum, round_mode);
1091
    mpfr_add(result, result, t1, round_mode);
1092
    
1093
    double d_partial_sum = partial_sum_of_t<double>(n,k,0,extra,error);
1094

1095

1096
    mpfr_set_d(t1, d_partial_sum, round_mode);                      //
1097
    mpfr_add(result, result, t1, round_mode);                       // We add together the main result and the tail ends'
1098

1099

1100
    mpfr_div(result, result, mp_pi, round_mode);                    // The actual result is the sum that we have computed
1101
    mpfr_div(result, result, mp_sqrt2, round_mode);                 // divided by pi*sqrt(2).
1102

1103
    clear_constants();
1104
    clear_mpz_and_mpq_variables();
1105
    clear_mpfr_variables();
1106
    mpfr_clear(t1);
1107
    mpfr_clear(t2);                        
1108
}
1109

1110
template <class T>
1111
T f(unsigned int k) {
1112
    return pi_sqrt2<T>() * cosh(A<T>()/(sqrt3<T>()*k))/(B<T>() * k) - sinh(C<T>()/k)/D<T>();
1113
}
1114

1115
template <class T>
1116
T a(unsigned int n, unsigned int k) {
1117
    if(k == 1) {
1118
        return 1;
1119
    }
1120
    T result = 0;
1121
 
1122
    unsigned int h = 0;
1123
    for(h = 1; h < k+1; h++) {
1124
        if(GCD(h,k) == 1) {
1125
            result += cos( pi<T>() * ( s<T>(h,k) - T(2.0 * double(h) * n)/T(int(k))) ); // be careful to promote 2 and h to Ts
1126
                                                                                        // because the result 2 * h * n could
1127
                                                                                        // be too large.
1128
        }
1129
    }
1130
    return result;
1131
}
1132

1133
template <class T>
1134
T s(unsigned int h, unsigned int k) {
1135
    //mpq_set_ui(result, 1, 1);
1136
    //return;
1137
    
1138
    //return T(0);  // Uncommenting this line saves 25% or more time in the computation of p(10^9) in the current version (.51)
1139
                    // but, of course, gives wrong results. (This is just to give an idea of how much time could
1140
                    // possibly be saved by optimizing this function.
1141
    
1142
    if(k < 3) {
1143
        return T(0);
1144
    }
1145

1146
    if (h == 1) {
1147
        return T( int((k-1)*(k-2)) )/T(int(12 * k));
1148
    }
1149
    // TODO: In the function mp_s() there are a few special cases for special forms of h and k.
1150
    // (And there are more special cases listed in one of the references listed in the introduction.)
1151
    //
1152
    // It may be advantageous to implement some here, but I'm not sure
1153
    // if there is any real speed benefit to this.
1154
    //
1155
    // In the mpfr_t version of this function, the speedups didn't seem to help too much, but
1156
    // they might make more of a difference here.
1157
    //
1158
    // Update to the above comments:
1159
    // Actually, a few tests seem to indicate that putting in too many special
1160
    // cases slows things down a little bit.
1161

1162
    // if h = 2 and k is odd, we have
1163
    // s(h,k) = (k-1)*(k-5)/24k
1164
    if(h == 2 && k > 5 && k % 2 == 1) {
1165
        return T( int((k-1)*(k-5)) )/ T(int(24*k));
1166
    }
1167

1168

1169
    // if k % h == 1, then
1170
    //
1171
    //      s(h,k) = (k-1)(k - h^2 - 1)/(12hk)
1172
    //
1173

1174
    // We might need to be a little careful here because k - h^2 - 1 can be negative.
1175
    // THIS CODE DOESN'T WORK.
1176
    //if(k % h == 1) {
1177
    //    int num = (k-1)*(k - h*h - 1);
1178
    //    int den = 12*k*h;
1179
    //    return T(num)/T(den);
1180
    //}
1181

1182
    // if k % h == 2, then
1183
    //
1184
    //      s(h,k) = (k-2)[k - .5(h^2 + 1)]/(12hk)
1185
    //
1186

1187
    //if(k % h == 2) {
1188
    //}
1189
    
1190
    int r1 = k;
1191
    int r2 = h;
1192

1193
    int n = 1;
1194
    int temp3;
1195

1196
    T result = T(0);
1197
    T temp;
1198
    while(r2 > 0)   // Note that we maintain the invariant r1 >= r2, so
1199
                    // we only need to make sure that r2 > 0
1200
    {
1201
        temp = T(int(r1 * r1 + r2 * r2 + 1))/(n * r1 * r2);
1202
        temp3 = r1 % r2;                                            
1203
        r1 = r2;                                                    
1204
        r2 = temp3;                                                 
1205
        
1206
        result += temp;                        // result += temp1;
1207

1208
        n = -n;
1209
    }
1210

1211
    result *= one_over_12<T>();
1212
    
1213
    if(n < 0) {
1214
        result -= T(.25);
1215
    }
1216
    return result;
1217
}
1218

1219

1220
long GCD(long a, long b)
1221
{
1222
   long u, v, t, x;
1223

1224
   if (a < 0) {
1225
      a = -a;
1226
   }
1227

1228
   if (b < 0) {
1229
      b = -b;
1230
   }
1231

1232

1233
   if (b==0)
1234
      x = a;
1235
   else {
1236
      u = a;
1237
      v = b;
1238
      do {
1239
         t = u % v;
1240
         u = v; 
1241
         v = t;
1242
      } while (v != 0);
1243

1244
      x = u;
1245
   }
1246

1247
   return x;
1248
}
1249

1250

1251

1252
// The following function was copied from Ralf Stephan's code,
1253
// mentioned in the introduction, and then some variable names
1254
// were changed. The function is not currently used, however.
1255
void cospi (mpfr_t res, 
1256
		mpfr_t x)
1257
{
1258
//	mpfr_t t, tt, half, fourth;
1259
	
1260
//	mpfr_init2 (t, prec);
1261
//	mpfr_init2 (tt, prec);
1262
//	mpfr_init2 (half, prec);
1263
//	mpfr_init2 (fourth, prec);
1264

1265
//	mpfr_set_ui (half, 1, r);
1266
//	mpfr_div_2ui (half, half, 1, r);
1267
//	mpfr_div_2ui (fourth, half, 1, r);
1268

1269
        // NOTE: switched t to tempc2
1270
        //       and tt to tempc1
1271

1272

1273
        mp_rnd_t r = round_mode;
1274
        
1275

1276
	mpfr_div_2ui (tempc1, x, 1, r);
1277
	mpfr_floor (tempc1, tempc1);
1278
	mpfr_mul_2ui (tempc1, tempc1, 1, r);
1279
	mpfr_sub (tempc2, x, tempc1, r);
1280
	if (mpfr_cmp_ui (tempc2, 1) > 0)
1281
		mpfr_sub_ui (tempc2, tempc2, 2, r);
1282
	mpfr_abs (tempc1, tempc2, r);
1283
	if (mpfr_cmp (tempc1, half) > 0)
1284
	{
1285
		mpfr_ui_sub (tempc2, 1, tempc1, r);
1286
		mpfr_abs (tempc1, tempc2, r);
1287
		if (mpfr_cmp (tempc1, fourth) > 0)
1288
		{
1289
			if (mpfr_sgn (tempc2) > 0)
1290
				mpfr_sub (tempc2, half, tempc2, r);
1291
			else
1292
				mpfr_add (tempc2, tempc2, half, r);
1293
			mpfr_mul (tempc2, tempc2, mp_pi, r);
1294
			mpfr_sin (tempc2, tempc2, r);
1295
		}
1296
		else
1297
		{
1298
			mpfr_mul (tempc2, tempc2, mp_pi, r);
1299
			mpfr_cos (tempc2, tempc2, r);	
1300
		}
1301
		mpfr_neg (res, tempc2, r);
1302
	}
1303
	else
1304
	{
1305
		mpfr_abs (tempc1, tempc2, r);
1306
		if (mpfr_cmp (tempc1, fourth) > 0)
1307
		{
1308
			if (mpfr_sgn (tempc2) > 0)
1309
				mpfr_sub (tempc2, half, tempc2, r);
1310
			else
1311
				mpfr_add (tempc2, tempc2, half, r);
1312
			mpfr_mul (tempc2, tempc2, mp_pi, r);
1313
			mpfr_sin (res, tempc2, r);
1314
		}
1315
		else
1316
		{
1317
			mpfr_mul (tempc2, tempc2, mp_pi, r);
1318
			mpfr_cos (res, tempc2, r);
1319
		}
1320
	}
1321

1322
//	mpfr_clear (half);
1323
//	mpfr_clear (fourth);
1324
//	mpfr_clear (t);
1325
//	mpfr_clear (tt);
1326
}
1327

1328
/* answer must have already been mpz_init'd. */
1329
int part(mpz_t answer, unsigned int n){
1330
    if(n == 1) {
1331
        mpz_set_str(answer, "1", 10);
1332
        return 0;
1333
    }
1334
    mpfr_t result;
1335
    
1336
    mp_t(result, n);
1337
    
1338
    mpfr_get_z(answer, result, round_mode);
1339

1340
    mpfr_clear(result);
1341
    return 0;
1342
}
1343

1344

1345
int main(int argc, char *argv[]){
1346
    //init();
1347
    
1348
    unsigned int n = 10;
1349

1350
    if(argc > 1)
1351
        if(strcmp(argv[1], "test") == 0) {
1352
            n = test(true);
1353
            if(n == 0) {
1354
                cout << "All Tests Passed" << endl;
1355
            }
1356
            else {
1357
                cout << "Error computing p(" << n << ")" << endl;
1358
            }
1359
            return 0;
1360
        }
1361
        else if(strcmp(argv[1], "testforever") == 0) {
1362
            n = test(false, true);
1363
            if(n == 0) {
1364
                cout << "All Tests Passed" << endl;
1365
            }
1366
            else {
1367
                cout << "Error computing p(" << n << ")" << endl;
1368
            }
1369
            return 0;
1370
        }
1371
        else {
1372
            n = atoi(argv[1]);
1373
        }
1374
    else {
1375
        n = test(false);
1376
        if(n == 0) {
1377
            cout << "All short tests passed. Run '" << argv[0] << " test' to run all tests. (This may take some time, but it gives updates as it progresses, and can be interrupted.)" << endl;
1378
            cout << "Run with the argument 'testforever' to run tests until a failure is found (or, hopefully, to run tests forever.)" << endl;
1379
        }
1380
        else {
1381
            cout << "Error computing p(" << n << ")" << endl;
1382
        }
1383
        return 0;
1384
    }
1385
    //mpfr_t result;
1386
    
1387
    //mp_t(result, n);
1388
    
1389
    mpz_t answer;
1390
    mpz_init(answer);
1391
    part(answer, n);
1392

1393
    //mpfr_get_z(answer, result, round_mode);
1394

1395
    mpz_out_str (stdout, 10, answer);
1396
    
1397
    cout << endl;
1398

1399
    return 0;
1400
}
1401

1402

1403
int test(bool longtest, bool forever) {
1404
    // The exact values given below are confirmed by multiple sources, so are probably correct.
1405
    // Other tests rely on known congruences.
1406
    // If longtest is true, then we run some tests that probably take on the order
1407
    // of 10 minutes.
1408
    // If forever is true, then we just do randomized tests until a failure
1409
    // is found. Ideally, this should mean that we test forever, of course.
1410

1411
    int n;
1412

1413
    mpz_t expected_value;
1414
    mpz_t actual_value;
1415
    
1416
    mpz_init(expected_value);
1417
    mpz_init(actual_value);
1418

1419
    n= 1;
1420
    cout << "Computing p(" << n << ")...";
1421
    cout.flush();
1422
    mpz_set_str(expected_value, "1", 10);
1423
    part(actual_value, n);
1424

1425
    if(mpz_cmp(expected_value, actual_value) != 0)
1426
        return n;
1427

1428
    cout << " OK." << endl;
1429

1430
    n = 10;
1431
    cout << "Computing p(" << n << ")...";
1432
    cout.flush();
1433
    mpz_set_str(expected_value, "42", 10);
1434
    part(actual_value, n);
1435

1436
    if(mpz_cmp(expected_value, actual_value) != 0)
1437
        return n;
1438
    
1439
    cout << " OK." << endl;
1440

1441
    n = 100;
1442
    cout << "Computing p(" << n << ")...";
1443
    cout.flush();
1444
    mpz_set_str(expected_value, "190569292", 10);
1445
    part(actual_value, n);
1446

1447
    if(mpz_cmp(expected_value, actual_value) != 0)
1448
        return n;
1449

1450
    cout << " OK." << endl;
1451

1452
    n = 1000;
1453
    cout << "Computing p(" << n << ")...";
1454
    cout.flush();
1455
    mpz_set_str(expected_value, "24061467864032622473692149727991", 10);
1456
    part(actual_value, n);
1457

1458
    if(mpz_cmp(expected_value, actual_value) != 0)
1459
        return n;
1460

1461
    cout << " OK." << endl;
1462

1463
    n = 10000;
1464
    cout << "Computing p(" << n << ")...";
1465
    cout.flush();
1466
    mpz_set_str(expected_value, "36167251325636293988820471890953695495016030339315650422081868605887952568754066420592310556052906916435144", 10);
1467
    part(actual_value, n);
1468

1469
    if(mpz_cmp(expected_value, actual_value) != 0)
1470
        return n;
1471

1472
    cout << " OK." << endl;
1473

1474
    n = 100000;
1475
    cout << "Computing p(" << n << ")...";
1476
    cout.flush();
1477
    mpz_set_str(expected_value, "27493510569775696512677516320986352688173429315980054758203125984302147328114964173055050741660736621590157844774296248940493063070200461792764493033510116079342457190155718943509725312466108452006369558934464248716828789832182345009262853831404597021307130674510624419227311238999702284408609370935531629697851569569892196108480158600569421098519", 10);
1478
    part(actual_value, n);
1479

1480
    if(mpz_cmp(expected_value, actual_value) != 0)
1481
        return n;
1482
    
1483
    cout << " OK." << endl;
1484

1485
    n = 1000000;
1486
    cout << "Computing p(" << n << ")...";
1487
    cout.flush();
1488
    mpz_set_str(expected_value, "1471684986358223398631004760609895943484030484439142125334612747351666117418918618276330148873983597555842015374130600288095929387347128232270327849578001932784396072064228659048713020170971840761025676479860846908142829356706929785991290519899445490672219997823452874982974022288229850136767566294781887494687879003824699988197729200632068668735996662273816798266213482417208446631027428001918132198177180646511234542595026728424452592296781193448139994664730105742564359154794989181485285351370551399476719981691459022015599101959601417474075715430750022184895815209339012481734469448319323280150665384042994054179587751761294916248142479998802936507195257074485047571662771763903391442495113823298195263008336489826045837712202455304996382144601028531832004519046591968302787537418118486000612016852593542741980215046267245473237321845833427512524227465399130174076941280847400831542217999286071108336303316298289102444649696805395416791875480010852636774022023128467646919775022348562520747741843343657801534130704761975530375169707999287040285677841619347472368171772154046664303121315630003467104673818", 10);
1489
    part(actual_value, n);
1490

1491
    if(mpz_cmp(expected_value, actual_value) != 0)
1492
        return n;
1493
    
1494
    cout << " OK." << endl;
1495

1496

1497
    // We now run some tests based on the fact that if n = 369 (mod 385) then p(n) = 0 (mod 385).
1498
    
1499
    n = 369 + 10*385;
1500
    cout << "Computing p(" << n << ")...";
1501
    cout.flush();
1502
    part(actual_value, n);
1503
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1504
        return n;
1505

1506
    cout << " OK." << endl;
1507

1508
    n = 369 + 10*385;
1509
    cout << "Computing p(" << n << ")...";
1510
    cout.flush();
1511
    part(actual_value, n);
1512
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1513
        return n;
1514

1515
    cout << " OK." << endl;
1516

1517
    n = 369 + 100*385;
1518
    cout << "Computing p(" << n << ")...";
1519
    cout.flush();
1520
    part(actual_value, n);
1521
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1522
        return n;
1523

1524
    cout << " OK." << endl;
1525

1526
    n = 369 + 110*385;
1527
    cout << "Computing p(" << n << ")...";
1528
    cout.flush();
1529
    part(actual_value, n);
1530
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1531
        return n;
1532

1533
    cout << " OK." << endl;
1534

1535
    n = 369 + 120*385;
1536
    cout << "Computing p(" << n << ")...";
1537
    cout.flush();
1538
    part(actual_value, n);
1539
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1540
        return n;
1541

1542
    cout << " OK." << endl;
1543

1544
    n = 369 + 130*385;
1545
    cout << "Computing p(" << n << ")...";
1546
    cout.flush();
1547
    part(actual_value, n);
1548
    if(mpz_divisible_ui_p(actual_value, 385) == 0)
1549
        return n;
1550

1551
    cout << " OK." << endl;
1552

1553
    // Randomized testing
1554

1555
    srand( time(NULL) );
1556
    
1557
    for(int i = 0; i < 100; i++) {
1558
        n = int(100000 * double(rand())/double(RAND_MAX) + 1);
1559
        n = n - (n % 385) + 369;
1560
        cout << "Computing p(" << n << ")...";
1561
        cout.flush();
1562
        part(actual_value, n);
1563
        if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1564
            return n;
1565
        }
1566
        cout << " OK." << endl;
1567

1568
    }
1569

1570
    if(longtest) {
1571
        n = 369 + 1000*385;
1572
        cout << "Computing p(" << n << ")...";
1573
        cout.flush();
1574
        part(actual_value, n);
1575
        if(mpz_divisible_ui_p(actual_value, 385) == 0)
1576
            return n;
1577

1578
        cout << " OK." << endl;
1579

1580
        n = 369 + 10000*385;
1581
        cout << "Computing p(" << n << ")...";
1582
        cout.flush();
1583
        part(actual_value, n);
1584
        if(mpz_divisible_ui_p(actual_value, 385) == 0)
1585
            return n;
1586

1587
        cout << " OK." << endl;
1588

1589
        n = 369 + 100000*385;
1590
        cout << "Computing p(" << n << ")...";
1591
        cout.flush();
1592
        part(actual_value, n);
1593
        if(mpz_divisible_ui_p(actual_value, 385) == 0)
1594
            return n;
1595

1596
        cout << " OK." << endl;
1597

1598
        for(int i = 0; i < 20; i++) {
1599
            n = int(100000 * double(rand())/double(RAND_MAX) + 1) + 100000;
1600
            n = n - (n % 385) + 369;
1601
            cout << "Computing p(" << n << ")...";
1602
            cout.flush();
1603
            part(actual_value, n);
1604
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1605
                return n;
1606
            }
1607
            cout << " OK." << endl;
1608
        }
1609

1610
        for(int i = 0; i < 20; i++) {
1611
            n = int(100000 * double(rand())/double(RAND_MAX) + 1) + 500000;
1612
            n = n - (n % 385) + 369;
1613
            cout << "Computing p(" << n << ")...";
1614
            cout.flush();
1615
            part(actual_value, n);
1616
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1617
                return n;
1618
            }
1619
            cout << " OK." << endl;
1620
        }
1621

1622
        for(int i = 0; i < 20; i++) {
1623
            n = int(100000 * double(rand())/double(RAND_MAX) + 1) + 1000000;
1624
            n = n - (n % 385) + 369;
1625
            cout << "Computing p(" << n << ")...";
1626
            cout.flush();
1627
            part(actual_value, n);
1628
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1629
                return n;
1630
            }
1631
            cout << " OK." << endl;
1632
        }
1633

1634
        for(int i = 0; i < 10; i++) {
1635
            n = int(100000 * double(rand())/double(RAND_MAX) + 1) + 10000000;
1636
            n = n - (n % 385) + 369;
1637
            cout << "Computing p(" << n << ")...";
1638
            cout.flush();
1639
            part(actual_value, n);
1640
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1641
                return n;
1642
            }
1643
            cout << " OK." << endl;
1644
        }
1645

1646
        n = 369 + 1000000*385;
1647
        cout << "Computing p(" << n << ")...";
1648
        cout.flush();
1649
        part(actual_value, n);
1650
        if(mpz_divisible_ui_p(actual_value, 385) == 0)
1651
            return n;
1652
        
1653
        cout << " OK." << endl;
1654

1655
        for(int i = 0; i < 10; i++) {
1656
            n = int(100000000 * double(rand())/double(RAND_MAX) + 1) + 100000000;
1657
            n = n - (n % 385) + 369;
1658
            cout << "Computing p(" << n << ")...";
1659
            cout.flush();
1660
            part(actual_value, n);
1661
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1662
                return n;
1663
            }
1664
            cout << " OK." << endl;
1665
        }
1666
        
1667
        n = 1000000000 + 139;
1668
        cout << "Computing p(" << n << ")...";
1669
        cout.flush();
1670
        part(actual_value, n);
1671
        if(mpz_divisible_ui_p(actual_value, 385) == 0)
1672
            return n;
1673
        
1674
        cout << " OK." << endl;
1675

1676

1677
        for(int i = 0; i < 10; i++) {
1678
            n = int(100000000 * double(rand())/double(RAND_MAX) + 1) + 1000000000;
1679
            n = n - (n % 385) + 369;
1680
            cout << "Computing p(" << n << ")...";
1681
            cout.flush();
1682
            part(actual_value, n);
1683
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1684
                return n;
1685
            }
1686
            cout << " OK." << endl;
1687
        }
1688

1689
    }
1690

1691
    if(forever) {
1692
        while(1) {
1693

1694
        for(int i = 0; i < 100; i++) {
1695
            n = int(900000 * double(rand())/double(RAND_MAX) + 1) + 100000;
1696
            n = n - (n % 385) + 369;
1697
            cout << "Computing p(" << n << ")...";
1698
            cout.flush();
1699
            part(actual_value, n);
1700
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1701
                return n;
1702
            }
1703
            cout << " OK." << endl;
1704
        }
1705

1706
        for(int i = 0; i < 50; i++) {
1707
            n = int(9000000 * double(rand())/double(RAND_MAX) + 1) + 1000000;
1708
            n = n - (n % 385) + 369;
1709
            cout << "Computing p(" << n << ")...";
1710
            cout.flush();
1711
            part(actual_value, n);
1712
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1713
                return n;
1714
            }
1715
            cout << " OK." << endl;
1716
        }
1717

1718
        for(int i = 0; i < 50; i++) {
1719
            n = int(90000000 * double(rand())/double(RAND_MAX) + 1) + 10000000;
1720
            n = n - (n % 385) + 369;
1721
            cout << "Computing p(" << n << ")...";
1722
            cout.flush();
1723
            part(actual_value, n);
1724
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1725
                return n;
1726
            }
1727
            cout << " OK." << endl;
1728
        }
1729

1730
        for(int i = 0; i < 10; i++) {
1731
            n = int(900000000 * double(rand())/double(RAND_MAX) + 1) + 100000000;
1732
            n = n - (n % 385) + 369;
1733
            cout << "Computing p(" << n << ")...";
1734
            cout.flush();
1735
            part(actual_value, n);
1736
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1737
                return n;
1738
            }
1739
            cout << " OK." << endl;
1740
        }
1741
        for(int i = 0; i < 5; i++) {
1742
            n = int(100000000 * double(rand())/double(RAND_MAX) + 1) + 1000000000;
1743
            n = n - (n % 385) + 369;
1744
            cout << "Computing p(" << n << ")...";
1745
            cout.flush();
1746
            part(actual_value, n);
1747
            if(mpz_divisible_ui_p(actual_value, 385) == 0) {
1748
                return n;
1749
            }
1750
            cout << " OK." << endl;
1751
        }
1752
        }
1753

1754

1755
    }
1756

1757
    mpz_clear(expected_value);
1758
    mpz_clear(actual_value);
1759
    return 0;
1760
}
1761

1762

1763

1764
Product

Resources

Company