Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
uvahotspot
GitHub Repository: uvahotspot/HotSpot
Path: blob/master/RCutil.c
612 views
1
/*
2
* Thanks to Greg Link from Penn State University
3
* for his math acceleration engine. Where available,
4
* the modified version of the engine found here, uses
5
* the fast, vendor-provided linear algebra routines
6
* from the BLAS and LAPACK packages in lieu of
7
* the vanilla C code present in the matrix functions
8
* of the previous versions of HotSpot.
9
*/
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <assert.h>
13
#include <math.h>
14
15
#include "temperature.h"
16
#include "flp.h"
17
#include "util.h"
18
19
/* thermal resistance calculation */
20
double getr(double conductivity, double thickness, double area)
21
{
22
return thickness / (conductivity * area);
23
}
24
25
/* thermal capacitance calculation */
26
double getcap(double sp_heat, double thickness, double area)
27
{
28
/* include lumped vs. distributed correction */
29
return C_FACTOR * sp_heat * thickness * area;
30
//return sp_heat * thickness * area;
31
}
32
33
/*
34
* LUP decomposition from the pseudocode given in the CLR
35
* 'Introduction to Algorithms' textbook. The matrix 'a' is
36
* transformed into an in-place lower/upper triangular matrix
37
* and the vector'p' carries the permutation vector such that
38
* Pa = lu, where 'P' is the matrix form of 'p'. The 'spd' flag
39
* indicates that 'a' is symmetric and positive definite
40
*/
41
42
void lupdcmp(double**a, int n, int *p, int spd)
43
{
44
#if(MATHACCEL == MA_INTEL)
45
int info = 0;
46
if (!spd)
47
dgetrf(&n, &n, a[0], &n, p, &info);
48
else
49
dpotrf("U", &n, a[0], &n, &info);
50
assert(info == 0);
51
#elif(MATHACCEL == MA_AMD)
52
int info = 0;
53
if (!spd)
54
dgetrf_(&n, &n, a[0], &n, p, &info);
55
else
56
dpotrf_("U", &n, a[0], &n, &info, 1);
57
assert(info == 0);
58
#elif(MATHACCEL == MA_APPLE)
59
int info = 0;
60
if (!spd)
61
dgetrf_((__CLPK_integer *)&n, (__CLPK_integer *)&n, a[0],
62
(__CLPK_integer *)&n, (__CLPK_integer *)p,
63
(__CLPK_integer *)&info);
64
else
65
dpotrf_("U", (__CLPK_integer *)&n, a[0], (__CLPK_integer *)&n,
66
(__CLPK_integer *)&info);
67
assert(info == 0);
68
#elif(MATHACCEL == MA_SUN)
69
int info = 0;
70
if (!spd)
71
dgetrf_(&n, &n, a[0], &n, p, &info);
72
else
73
dpotrf_("U", &n, a[0], &n, &info);
74
assert(info == 0);
75
#else
76
int i, j, k, pivot=0;
77
double max = 0;
78
79
/* start with identity permutation */
80
for (i=0; i < n; i++)
81
p[i] = i;
82
83
for (k=0; k < n-1; k++) {
84
max = 0;
85
for (i = k; i < n; i++) {
86
if (fabs(a[i][k]) > max) {
87
max = fabs(a[i][k]);
88
pivot = i;
89
}
90
}
91
if (eq (max, 0))
92
fatal ("singular matrix in lupdcmp\n");
93
94
/* bring pivot element to position */
95
swap_ival (&p[k], &p[pivot]);
96
for (i=0; i < n; i++)
97
swap_dval (&a[k][i], &a[pivot][i]);
98
99
for (i=k+1; i < n; i++) {
100
a[i][k] /= a[k][k];
101
for (j=k+1; j < n; j++)
102
a[i][j] -= a[i][k] * a[k][j];
103
}
104
}
105
#endif
106
}
107
108
/*
109
* the matrix a is an in-place lower/upper triangular matrix
110
* the following macros split them into their constituents
111
*/
112
113
#define LOWER(a, i, j) ((i > j) ? a[i][j] : 0)
114
#define UPPER(a, i, j) ((i <= j) ? a[i][j] : 0)
115
116
/*
117
* LU forward and backward substitution from the pseudocode given
118
* in the CLR 'Introduction to Algorithms' textbook. It solves ax = b
119
* where, 'a' is an in-place lower/upper triangular matrix. The vector
120
* 'x' carries the solution vector. 'p' is the permutation vector. The
121
* 'spd' flag indicates that 'a' is symmetric and positive definite
122
*/
123
124
void lusolve(double **a, int n, int *p, double *b, double *x, int spd)
125
{
126
#if(MATHACCEL == MA_INTEL)
127
int one = 1, info = 0;
128
cblas_dcopy(n, b, 1, x, 1);
129
if (!spd)
130
dgetrs("T", &n, &one, a[0], &n, p, x, &n, &info);
131
else
132
dpotrs("U", &n, &one, a[0], &n, x, &n, &info);
133
assert(info == 0);
134
#elif(MATHACCEL == MA_AMD)
135
int one = 1, info = 0;
136
dcopy(n, b, 1, x, 1);
137
if (!spd)
138
dgetrs_("T", &n, &one, a[0], &n, p, x, &n, &info, 1);
139
else
140
dpotrs_("U", &n, &one, a[0], &n, x, &n, &info, 1);
141
assert(info == 0);
142
#elif(MATHACCEL == MA_APPLE)
143
int one = 1, info = 0;
144
cblas_dcopy(n, b, 1, x, 1);
145
if (!spd)
146
dgetrs_("T", (__CLPK_integer *)&n, (__CLPK_integer *)&one, a[0],
147
(__CLPK_integer *)&n, (__CLPK_integer *)p, x,
148
(__CLPK_integer *)&n, (__CLPK_integer *)&info);
149
else
150
dpotrs_("U", (__CLPK_integer *)&n, (__CLPK_integer *)&one, a[0],
151
(__CLPK_integer *)&n, x, (__CLPK_integer *)&n,
152
(__CLPK_integer *)&info);
153
assert(info == 0);
154
#elif(MATHACCEL == MA_SUN)
155
int one = 1, info = 0;
156
dcopy(n, b, 1, x, 1);
157
if (!spd)
158
dgetrs_("T", &n, &one, a[0], &n, p, x, &n, &info);
159
else
160
dpotrs_("U", &n, &one, a[0], &n, x, &n, &info);
161
assert(info == 0);
162
#else
163
int i, j;
164
double *y = dvector (n);
165
double sum;
166
167
/* forward substitution - solves ly = pb */
168
for (i=0; i < n; i++) {
169
for (j=0, sum=0; j < i; j++)
170
sum += y[j] * LOWER(a, i, j);
171
y[i] = b[p[i]] - sum;
172
}
173
174
/* backward substitution - solves ux = y */
175
for (i=n-1; i >= 0; i--) {
176
for (j=i+1, sum=0; j < n; j++)
177
sum += x[j] * UPPER(a, i, j);
178
x[i] = (y[i] - sum) / UPPER(a, i, i);
179
}
180
181
free_dvector(y);
182
#endif
183
}
184
185
#if SUPERLU > 0
186
// Builds the matrix A = (1/h)C + G for the Backward Euler Method
187
int build_A_matrix(SuperMatrix *G, diagonal_matrix_t *C, double h, SuperMatrix *A)
188
{
189
NCformat *Astore;
190
int i, j;
191
int n = C->n;
192
double *a;
193
int *asub, *xa;
194
int flag = 1;
195
196
NCformat *Gstore = G->Store;
197
int nrow = G->nrow;
198
int ncol = G->ncol;
199
int nnz = Gstore->nnz;
200
201
// Copy G into A
202
double *nzval;
203
int *rowind;
204
int *colptr;
205
if ( !(nzval = doubleMalloc(nnz)) ) fatal("Malloc fails for a[].\n");
206
if ( !(rowind = intMalloc(nnz)) ) fatal("Malloc fails for asub[].\n");
207
if ( !(colptr = intMalloc(n+1)) ) fatal("Malloc fails for xa[].\n");
208
209
210
dCreate_CompCol_Matrix(A, nrow, ncol, nnz, nzval, rowind,
211
colptr, SLU_NC, SLU_D, SLU_GE);
212
213
214
dCopy_CompCol_Matrix(G, A);
215
216
Astore = A->Store;
217
n = A->ncol;
218
a = Astore->nzval;
219
asub = Astore->rowind;
220
xa = Astore->colptr;
221
222
double *C_vals = C->vals;
223
224
for(i=0; i<n; i++){
225
flag = 1;
226
j = xa[i];
227
while(j<xa[i+1]){
228
if(asub[j] == i){
229
//fprintf(stderr, "i = %d, j = %d, a[%d] = %e + (1.0 / %e) * %e = ", i, j, j, a[j], h, C_vals[i]);
230
a[j] += (1.0 / h)*C_vals[i];
231
//fprintf(stderr, "%e\n", a[j]);
232
flag = 0;
233
}
234
j++;
235
}
236
if(flag)
237
return 0;
238
}
239
return 1;
240
}
241
242
// Builds the matrix B = (1/h)CT + P for the Backward Euler method
243
int build_B_matrix(diagonal_matrix_t *C, double *T, double *P, double h, SuperMatrix *B)
244
{
245
int i, n = C->n;
246
double *B_temp = dvector(n);
247
double *C_vals = C->vals;
248
249
for(i = 0; i < n; i++) {
250
B_temp[i] = (1.0 / h) * C_vals[i] * T[i] + P[i];
251
}
252
253
dCreate_Dense_Matrix(B, n, 1, B_temp, n, SLU_DN, SLU_D, SLU_GE);
254
255
return 1;
256
}
257
258
/*
259
* Backward Euler solver with adaptive step sizing.
260
* It takes as input the conductance matrix G, capacitances C,
261
* temperatures T and power P at time t, and solves the ODE GT + CdT = P
262
* between t and t+h. It returns the correct step size to be used next time.
263
*/
264
#define BACKWARD_EULER_SAFETY 0.95
265
#define BACKWARD_EULER_MAXUP 5.0
266
#define BACKWARD_EULER_MAXDOWN 10.0
267
#define BACKWARD_EULER_PRECISION 0.01
268
double backward_euler(SuperMatrix *G, diagonal_matrix_t *C, double *T, double *P, double *h, double *Tout)
269
{
270
////////////////// Set Options and Statistics //////////////////////////
271
SuperMatrix L, U;
272
int *perm_r; // row permutations from partial pivoting
273
int *perm_c; // column permutation vector
274
int info, n = C->n;
275
double max, new_h = (*h);
276
superlu_options_t options;
277
SuperLUStat_t stat;
278
DNformat *Bstore;
279
double *dp;
280
281
// TODO: Implement re-use of perm_c
282
if ( !(perm_r = intMalloc(n)) ) fatal("Malloc fails for perm_r[].\n");
283
if ( !(perm_c = intMalloc(n)) ) fatal("Malloc fails for perm_c[].\n");
284
285
set_default_options(&options);
286
287
// Initialize the statistics variables.
288
StatInit(&stat);
289
290
SuperMatrix *A, *B;
291
int i;
292
double *Ttemp = dvector(n);
293
double *T1 = dvector(n);
294
double *T2 = dvector(n);
295
296
297
A = calloc(1, sizeof(SuperMatrix));
298
B = calloc(1, sizeof(SuperMatrix));
299
300
int flag = 1;
301
302
if(build_A_matrix(G, C, *h, A) != 1)
303
fatal("error\n");
304
305
if(build_B_matrix(C, T, P, *h, B) != 1)
306
fatal("error\n");
307
308
dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
309
310
// Copy results into Ttemp
311
Bstore = (DNformat *) B->Store;
312
dp = (double *) Bstore->nzval;
313
for(i = 0; i < n; i++) {
314
Ttemp[i] = dp[i];
315
//fprintf(stderr, " Ttemp[%d] = %e\n", i, dp[i]);
316
}
317
318
// TEST
319
/*
320
do {
321
(*h) = new_h;
322
323
////////////////////// Evaluate once with step size h ////////////////////
324
// Build the matrix A = (1/h)C + G
325
if(build_A_matrix(G, C, *h, A) != 1)
326
fatal("In backward_euler(): Failed to build A matrix\n");
327
328
// Build the matrix B = (1/h)CT + P
329
if(build_B_matrix(C, T, P, *h, B) != 1)
330
fatal("In backward_euler(): Failed to build B matrix\n");
331
332
// Solve the linear system Ax = B
333
dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
334
335
// Copy results into Ttemp
336
Bstore = (DNformat *) B->Store;
337
dp = (double *) Bstore->nzval;
338
for(i = 0; i < n; i++) {
339
Ttemp[i] = dp[i];
340
//fprintf(stderr, " Ttemp[%d] = %e\n", i, dp[i]);
341
}
342
343
////////////////////// Repeat as two steps of size h/2 ////////////////////
344
// Build the matrix A = (2/h)C + G
345
if(build_A_matrix(G, C, (*h)/2, A) != 1)
346
fatal("In backward_euler(): Failed to build A matrix\n");
347
348
// Build the matrix B = (2/h)CT + P
349
if(build_B_matrix(C, T, P, (*h)/2, B) != 1)
350
fatal("In backward_euler(): Failed to build B matrix\n");
351
352
// Solve the linear system Ax = B
353
dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
354
355
// Copy results into T1
356
Bstore = (DNformat *) B->Store;
357
dp = (double *) Bstore->nzval;
358
for(i = 0; i < n; i++) {
359
T1[i] = dp[i];
360
//fprintf(stderr, " T1[%d] = %e\n", i, T1[i]);
361
}
362
363
// Rebuild B = (2/h)CT + P using T1
364
if(build_B_matrix(C, T1, P, (*h)/2, B) != 1)
365
fatal("In backward_euler(): Failed to build B matrix\n");
366
367
// Solve the linear system Ax = B
368
dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
369
370
// Copy results into T2
371
Bstore = (DNformat *) B->Store;
372
dp = (double *) Bstore->nzval;
373
for(i = 0; i < n; i++) {
374
T2[i] = dp[i];
375
//fprintf(stderr, " T2[%d] = %e\n", i, T2[i]);
376
}
377
378
// Find maximum difference between Ttemp and T2
379
for(i = 0; i < n; i++) {
380
T1[i] = fabs(Ttemp[i] - T2[i]);
381
//fprintf(stderr, " delta[%d] = |%e - %e| = %e\n", i, Ttemp[i], T2[i], T1[i]);
382
}
383
max = T1[0];
384
for(i = 1; i < n; i++) {
385
if(max < T1[i]) {
386
max = T1[i];
387
}
388
}
389
//fprintf(stderr, " max delta = %e\n", max);
390
391
// Accuracy OK. Increase step size
392
if(max <= BACKWARD_EULER_PRECISION) {
393
new_h = BACKWARD_EULER_SAFETY * (*h) * pow(fabs(BACKWARD_EULER_PRECISION/max), 0.2);
394
if (new_h > BACKWARD_EULER_MAXUP * (*h))
395
new_h = BACKWARD_EULER_MAXUP * (*h);
396
// Inaccuracy error. Decrease step size and compute again
397
//fprintf(stderr, " max delta OK; increasing step size to h = %e\n", new_h);
398
flag = 0;
399
}
400
else {
401
new_h = BACKWARD_EULER_SAFETY * (*h) * pow(fabs(BACKWARD_EULER_PRECISION/max), 0.25);
402
if (new_h < (*h) / BACKWARD_EULER_MAXDOWN)
403
new_h = (*h) / BACKWARD_EULER_MAXDOWN;
404
405
//fprintf(stderr, " max delta NOT OK; decreasing step size to h = %e\n", new_h);
406
}
407
408
} while(new_h < (*h));
409
*/
410
// Copy temperatures over to output
411
for(i = 0; i < n; i++) {
412
Tout[i] = Ttemp[i];
413
//fprintf(stderr, " Returning T[%d] = %e\n", i, Tout[i]);
414
}
415
416
free_dvector(Ttemp);
417
free_dvector(T1);
418
free_dvector(T2);
419
SUPERLU_FREE (perm_r);
420
SUPERLU_FREE (perm_c);
421
Destroy_CompCol_Matrix(A);
422
Destroy_SuperMatrix_Store(B);
423
Destroy_SuperNode_Matrix(&L);
424
Destroy_CompCol_Matrix(&U);
425
StatFree(&stat);
426
427
return new_h;
428
}
429
#endif
430
431
/* core of the 4th order Runge-Kutta method, where the Euler step
432
* (y(n+1) = y(n) + h * k1 where k1 = dydx(n)) is provided as an input.
433
* to evaluate dydx at different points, a call back function f (slope
434
* function) is also passed as a parameter. Given values for y, and k1,
435
* this function advances the solution over an interval h, and returns
436
* the solution in yout. For details, see the discussion in "Numerical
437
* Recipes in C", Chapter 16, from
438
* http://www.nrbook.com/a/bookcpdf/c16-1.pdf
439
*/
440
void rk4_core(void *model, double *y, double *k1, void *p, int n, double h, double *yout, slope_fn_ptr f)
441
{
442
int i;
443
double *t, *k2, *k3, *k4;
444
k2 = dvector(n);
445
k3 = dvector(n);
446
k4 = dvector(n);
447
t = dvector(n);
448
449
/* k2 is the slope at the trial midpoint (t) found using
450
* slope k1 (which is at the starting point).
451
*/
452
/* t = y + h/2 * k1 (t = y; t += h/2 * k1) */
453
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
454
cblas_dcopy(n, y, 1, t, 1);
455
cblas_daxpy(n, h/2.0, k1, 1, t, 1);
456
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
457
dcopy(n, y, 1, t, 1);
458
daxpy(n, h/2.0, k1, 1, t, 1);
459
#else
460
for(i=0; i < n; i++)
461
t[i] = y[i] + h/2.0 * k1[i];
462
#endif
463
/* k2 = slope at t */
464
(*f)(model, t, p, k2);
465
466
/* k3 is the slope at the trial midpoint (t) found using
467
* slope k2 found above.
468
*/
469
/* t = y + h/2 * k2 (t = y; t += h/2 * k2) */
470
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
471
cblas_dcopy(n, y, 1, t, 1);
472
cblas_daxpy(n, h/2.0, k2, 1, t, 1);
473
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
474
dcopy(n, y, 1, t, 1);
475
daxpy(n, h/2.0, k2, 1, t, 1);
476
#else
477
for(i=0; i < n; i++)
478
t[i] = y[i] + h/2.0 * k2[i];
479
#endif
480
/* k3 = slope at t */
481
(*f)(model, t, p, k3);
482
483
/* k4 is the slope at trial endpoint (t) found using
484
* slope k3 found above.
485
*/
486
/* t = y + h * k3 (t = y; t += h * k3) */
487
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
488
cblas_dcopy(n, y, 1, t, 1);
489
cblas_daxpy(n, h, k3, 1, t, 1);
490
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
491
dcopy(n, y, 1, t, 1);
492
daxpy(n, h, k3, 1, t, 1);
493
#else
494
for(i=0; i < n; i++)
495
t[i] = y[i] + h * k3[i];
496
#endif
497
/* k4 = slope at t */
498
(*f)(model, t, p, k4);
499
500
/* yout = y + h*(k1/6 + k2/3 + k3/3 + k4/6) */
501
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
502
/* yout = y */
503
cblas_dcopy(n, y, 1, yout, 1);
504
/* yout += h*k1/6 */
505
cblas_daxpy(n, h/6.0, k1, 1, yout, 1);
506
/* yout += h*k2/3 */
507
cblas_daxpy(n, h/3.0, k2, 1, yout, 1);
508
/* yout += h*k3/3 */
509
cblas_daxpy(n, h/3.0, k3, 1, yout, 1);
510
/* yout += h*k4/6 */
511
cblas_daxpy(n, h/6.0, k4, 1, yout, 1);
512
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
513
dcopy(n, y, 1, yout, 1);
514
/* yout += h*k1/6 */
515
daxpy(n, h/6.0, k1, 1, yout, 1);
516
/* yout += h*k2/3 */
517
daxpy(n, h/3.0, k2, 1, yout, 1);
518
/* yout += h*k3/3 */
519
daxpy(n, h/3.0, k3, 1, yout, 1);
520
/* yout += h*k4/6 */
521
daxpy(n, h/6.0, k4, 1, yout, 1);
522
#else
523
for (i =0; i < n; i++)
524
yout[i] = y[i] + h * (k1[i] + 2*k2[i] + 2*k3[i] + k4[i])/6.0;
525
#endif
526
527
free_dvector(k2);
528
free_dvector(k3);
529
free_dvector(k4);
530
free_dvector(t);
531
}
532
533
/*
534
* 4th order Runge Kutta solver with adaptive step sizing.
535
* It integrates and solves the ODE dy + cy = p between
536
* t and t+h. It returns the correct step size to be used
537
* next time. slope function f is the call back used to
538
* evaluate the derivative at each point
539
*/
540
#define RK4_SAFETY 0.95
541
#define RK4_MAXUP 5.0
542
#define RK4_MAXDOWN 10.0
543
#define RK4_PRECISION 0.01
544
double rk4(void *model, double *y, void *p, int n, double *h, double *yout, slope_fn_ptr f)
545
{
546
int i;
547
double *k1, *t1, *t2, *ytemp, max, new_h = (*h);
548
549
k1 = dvector(n);
550
t1 = dvector(n);
551
t2 = dvector(n);
552
ytemp = dvector(n);
553
554
/* evaluate the slope k1 at the beginning */
555
(*f)(model, y, p, k1);
556
557
/* try until accuracy is achieved */
558
do {
559
(*h) = new_h;
560
561
/* try RK4 once with normal step size */
562
rk4_core(model, y, k1, p, n, (*h), ytemp, f);
563
564
/* repeat it with two half-steps */
565
rk4_core(model, y, k1, p, n, (*h)/2.0, t1, f);
566
567
/* y after 1st half-step is in t1. re-evaluate k1 for this */
568
(*f)(model, t1, p, k1);
569
570
/* get output of the second half-step in t2 */
571
rk4_core(model, t1, k1, p, n, (*h)/2.0, t2, f);
572
573
/* find the max diff between these two results:
574
* use t1 to store the diff
575
*/
576
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
577
/* t1 = ytemp */
578
cblas_dcopy(n, ytemp, 1, t1, 1);
579
/* t1 = -1 * t2 + t1 = ytemp - t2 */
580
cblas_daxpy(n, -1.0, t2, 1, t1, 1);
581
/* max = |t1[max_abs_index]| */
582
max = fabs(t1[cblas_idamax(n, t1, 1)]);
583
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
584
/* t1 = ytemp */
585
dcopy(n, ytemp, 1, t1, 1);
586
/* t1 = -1 * t2 + t1 = ytemp - t2 */
587
daxpy(n, -1.0, t2, 1, t1, 1);
588
/*
589
* max = |t1[max_abs_index]| note: FORTRAN BLAS
590
* indices start from 1 as opposed to CBLAS where
591
* indices start from 0
592
*/
593
max = fabs(t1[idamax(n, t1, 1)-1]);
594
#else
595
for(i=0; i < n; i++) {
596
t1[i] = fabs(ytemp[i] - t2[i]);
597
}
598
max = t1[0];
599
for(i=1; i < n; i++)
600
if (max < t1[i])
601
max = t1[i];
602
#endif
603
604
/*
605
* compute the correct step size: see equation
606
* 16.2.10 in chapter 16 of "Numerical Recipes
607
* in C"
608
*/
609
/* accuracy OK. increase step size */
610
if (max <= RK4_PRECISION) {
611
new_h = RK4_SAFETY * (*h) * pow(fabs(RK4_PRECISION/max), 0.2);
612
if (new_h > RK4_MAXUP * (*h))
613
new_h = RK4_MAXUP * (*h);
614
/* inaccuracy error. decrease step size and compute again */
615
} else {
616
new_h = RK4_SAFETY * (*h) * pow(fabs(RK4_PRECISION/max), 0.25);
617
if (new_h < (*h) / RK4_MAXDOWN)
618
new_h = (*h) / RK4_MAXDOWN;
619
}
620
621
} while (new_h < (*h));
622
623
/* commit ytemp to yout */
624
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
625
cblas_dcopy(n, ytemp, 1, yout, 1);
626
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
627
dcopy(n, ytemp, 1, yout, 1);
628
#else
629
copy_dvector(yout, ytemp, n);
630
#endif
631
632
/* clean up */
633
free_dvector(k1);
634
free_dvector(t1);
635
free_dvector(t2);
636
free_dvector(ytemp);
637
638
/* return the step-size */
639
return new_h;
640
}
641
642
/* matmult: C = AB, A, B are n x n square matrices */
643
void matmult(double **c, double **a, double **b, int n)
644
{
645
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
646
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
647
n, n, n, 1.0, a[0], n, b[0], n, 0.0, c[0], n);
648
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
649
/* B^T * A^T = (A * B)^T */
650
dgemm('N', 'N', n, n, n, 1.0, b[0], n, a[0], n, 0.0, c[0], n);
651
#else
652
int i, j, k;
653
654
for (i = 0; i < n; i++)
655
for (j = 0; j < n; j++) {
656
c[i][j] = 0;
657
for (k = 0; k < n; k++)
658
c[i][j] += a[i][k] * b[k][j];
659
}
660
#endif
661
}
662
663
/* same as above but 'a' is a diagonal matrix stored as a 1-d array */
664
void diagmatmult(double **c, double *a, double **b, int n)
665
{
666
int i;
667
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
668
zero_dmatrix(c, n, n);
669
for(i=0; i < n; i++)
670
cblas_daxpy(n, a[i], b[i], 1, c[i], 1);
671
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
672
zero_dmatrix(c, n, n);
673
for(i=0; i < n; i++)
674
daxpy(n, a[i], b[i], 1, c[i], 1);
675
#else
676
int j;
677
678
for (i = 0; i < n; i++)
679
for (j = 0; j < n; j++)
680
c[i][j] = a[i] * b[i][j];
681
#endif
682
}
683
684
/* mult of an n x n matrix and an n x 1 column vector */
685
void matvectmult(double *vout, double **m, double *vin, int n)
686
{
687
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
688
cblas_dgemv(CblasRowMajor, CblasNoTrans, n, n, 1.0, m[0],
689
n, vin, 1, 0.0, vout, 1);
690
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
691
dgemv('T', n, n, 1.0, m[0], n, vin, 1, 0.0, vout, 1);
692
#else
693
int i, j;
694
695
for (i = 0; i < n; i++) {
696
vout[i] = 0;
697
for (j = 0; j < n; j++)
698
vout[i] += m[i][j] * vin[j];
699
}
700
#endif
701
}
702
703
/* same as above but 'm' is a diagonal matrix stored as a 1-d array */
704
void diagmatvectmult(double *vout, double *m, double *vin, int n)
705
{
706
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
707
cblas_dsbmv(CblasRowMajor, CblasUpper, n, 0, 1.0, m, 1, vin,
708
1, 0.0, vout, 1);
709
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
710
dsbmv('U', n, 0, 1.0, m, 1, vin, 1, 0.0, vout, 1);
711
#else
712
int i;
713
714
for (i = 0; i < n; i++)
715
vout[i] = m[i] * vin[i];
716
#endif
717
}
718
719
/*
720
* inv = m^-1, inv, m are n by n matrices.
721
* the spd flag indicates that m is symmetric
722
* and positive definite
723
*/
724
#define BLOCK_SIZE 256
725
void matinv(double **inv, double **m, int n, int spd)
726
{
727
int *p, lwork;
728
double *work;
729
int i, j;
730
#if (MATHACCEL != MA_NONE)
731
int info;
732
#endif
733
double *col;
734
735
p = ivector(n);
736
lwork = n * BLOCK_SIZE;
737
work = dvector(lwork);
738
739
#if (MATHACCEL == MA_INTEL)
740
info = 0;
741
cblas_dcopy(n*n, m[0], 1, inv[0], 1);
742
if (!spd) {
743
dgetrf(&n, &n, inv[0], &n, p, &info);
744
assert(info == 0);
745
dgetri(&n, inv[0], &n, p, work, &lwork, &info);
746
assert(info == 0);
747
} else {
748
dpotrf("U", &n, inv[0], &n, &info);
749
assert(info == 0);
750
dpotri("U", &n, inv[0], &n, &info);
751
assert(info == 0);
752
mirror_dmatrix(inv, n);
753
}
754
#elif(MATHACCEL == MA_AMD)
755
info = 0;
756
dcopy(n*n, m[0], 1, inv[0], 1);
757
if (!spd) {
758
dgetrf_(&n, &n, inv[0], &n, p, &info);
759
assert(info == 0);
760
dgetri_(&n, inv[0], &n, p, work, &lwork, &info);
761
assert(info == 0);
762
} else {
763
dpotrf_("U", &n, inv[0], &n, &info, 1);
764
assert(info == 0);
765
dpotri_("U", &n, inv[0], &n, &info, 1);
766
assert(info == 0);
767
mirror_dmatrix(inv, n);
768
}
769
#elif (MATHACCEL == MA_APPLE)
770
info = 0;
771
cblas_dcopy(n*n, m[0], 1, inv[0], 1);
772
if (!spd) {
773
dgetrf_((__CLPK_integer *)&n, (__CLPK_integer *)&n,
774
inv[0], (__CLPK_integer *)&n, (__CLPK_integer *)p,
775
(__CLPK_integer *)&info);
776
assert(info == 0);
777
dgetri_((__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
778
(__CLPK_integer *)p, work, (__CLPK_integer *)&lwork,
779
(__CLPK_integer *)&info);
780
assert(info == 0);
781
} else {
782
dpotrf_("U", (__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
783
(__CLPK_integer *)&info);
784
assert(info == 0);
785
dpotri_("U", (__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
786
(__CLPK_integer *)&info);
787
assert(info == 0);
788
mirror_dmatrix(inv, n);
789
}
790
#elif(MATHACCEL == MA_SUN)
791
info = 0;
792
dcopy(n*n, m[0], 1, inv[0], 1);
793
if (!spd) {
794
dgetrf_(&n, &n, inv[0], &n, p, &info);
795
assert(info == 0);
796
dgetri_(&n, inv[0], &n, p, work, &lwork, &info);
797
assert(info == 0);
798
} else {
799
dpotrf_("U", &n, inv[0], &n, &info);
800
assert(info == 0);
801
dpotri_("U", &n, inv[0], &n, &info);
802
assert(info == 0);
803
mirror_dmatrix(inv, n);
804
}
805
#else
806
col = dvector(n);
807
808
lupdcmp(m, n, p, spd);
809
810
for (j = 0; j < n; j++) {
811
for (i = 0; i < n; i++) col[i]=0.0;
812
col[j] = 1.0;
813
lusolve(m, n, p, col, work, spd);
814
for (i = 0; i < n; i++) inv[i][j]=work[i];
815
}
816
817
free_dvector(col);
818
#endif
819
820
free_ivector(p);
821
free_dvector(work);
822
}
823
824
/* dst = src1 + scale * src2 */
825
void scaleadd_dvector (double *dst, double *src1, double *src2, int n, double scale)
826
{
827
#if (MATHACCEL == MA_NONE)
828
int i;
829
for(i=0; i < n; i++)
830
dst[i] = src1[i] + scale * src2[i];
831
#else
832
/* dst == src2. so, dst *= scale, dst += src1 */
833
if (dst == src2 && dst != src1) {
834
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
835
cblas_dscal(n, scale, dst, 1);
836
cblas_daxpy(n, 1.0, src1, 1, dst, 1);
837
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
838
dscal(n, scale, dst, 1);
839
daxpy(n, 1.0, src1, 1, dst, 1);
840
#else
841
fatal("unknown math acceleration\n");
842
#endif
843
/* dst = src1; dst += scale * src2 */
844
} else {
845
#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
846
cblas_dcopy(n, src1, 1, dst, 1);
847
cblas_daxpy(n, scale, src2, 1, dst, 1);
848
#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
849
dcopy(n, src1, 1, dst, 1);
850
daxpy(n, scale, src2, 1, dst, 1);
851
#else
852
fatal("unknown math acceleration\n");
853
#endif
854
}
855
#endif
856
}
857
858