CoCalc -- RCutil.c

GitHub Repository: uvahotspot/HotSpot
Path: blob/master/RCutil.c
⁶¹² views
1
/*
2
 * Thanks to Greg Link from Penn State University
3
 * for his math acceleration engine. Where available,
4
 * the modified version of the engine found here, uses
5
 * the fast, vendor-provided linear algebra routines
6
 * from the BLAS and LAPACK packages in  lieu of
7
 * the vanilla C code present in the matrix functions
8
 * of the previous versions of HotSpot.
9
 */
10
#include <stdio.h>
11
#include <stdlib.h>
12
#include <assert.h>
13
#include <math.h>
14

15
#include "temperature.h"
16
#include "flp.h"
17
#include "util.h"
18

19
/* thermal resistance calculation	*/
20
double getr(double conductivity, double thickness, double area)
21
{
22
	return thickness / (conductivity * area);
23
}
24

25
/* thermal capacitance calculation	*/
26
double getcap(double sp_heat, double thickness, double area)
27
{
28
	/* include lumped vs. distributed correction	*/
29
	return C_FACTOR * sp_heat * thickness * area;
30
  //return sp_heat * thickness * area;
31
}
32

33
/*
34
 * LUP decomposition from the pseudocode given in the CLR
35
 * 'Introduction to Algorithms' textbook. The matrix 'a' is
36
 * transformed into an in-place lower/upper triangular matrix
37
 * and the vector'p' carries the permutation vector such that
38
 * Pa = lu, where 'P' is the matrix form of 'p'. The 'spd' flag
39
 * indicates that 'a' is symmetric and positive definite
40
 */
41

42
void lupdcmp(double**a, int n, int *p, int spd)
43
{
44
	#if(MATHACCEL == MA_INTEL)
45
	int info = 0;
46
	if (!spd)
47
		dgetrf(&n, &n, a[0], &n, p, &info);
48
	else
49
		dpotrf("U", &n, a[0], &n, &info);
50
	assert(info == 0);
51
	#elif(MATHACCEL == MA_AMD)
52
	int info = 0;
53
	if (!spd)
54
		dgetrf_(&n, &n, a[0], &n, p, &info);
55
	else
56
		dpotrf_("U", &n, a[0], &n, &info, 1);
57
	assert(info == 0);
58
	#elif(MATHACCEL == MA_APPLE)
59
	int info = 0;
60
	if (!spd)
61
		dgetrf_((__CLPK_integer *)&n, (__CLPK_integer *)&n, a[0],
62
				(__CLPK_integer *)&n, (__CLPK_integer *)p,
63
				(__CLPK_integer *)&info);
64
	else
65
		dpotrf_("U", (__CLPK_integer *)&n, a[0], (__CLPK_integer *)&n,
66
				(__CLPK_integer *)&info);
67
	assert(info == 0);
68
	#elif(MATHACCEL == MA_SUN)
69
	int info = 0;
70
	if (!spd)
71
		dgetrf_(&n, &n, a[0], &n, p, &info);
72
	else
73
		dpotrf_("U", &n, a[0], &n, &info);
74
	assert(info == 0);
75
	#else
76
	int i, j, k, pivot=0;
77
	double max = 0;
78

79
	/* start with identity permutation	*/
80
	for (i=0; i < n; i++)
81
		p[i] = i;
82

83
	for (k=0; k < n-1; k++)	 {
84
		max = 0;
85
		for (i = k; i < n; i++)	{
86
			if (fabs(a[i][k]) > max) {
87
				max = fabs(a[i][k]);
88
				pivot = i;
89
			}
90
		}
91
		if (eq (max, 0))
92
			fatal ("singular matrix in lupdcmp\n");
93

94
		/* bring pivot element to position	*/
95
		swap_ival (&p[k], &p[pivot]);
96
		for (i=0; i < n; i++)
97
			swap_dval (&a[k][i], &a[pivot][i]);
98

99
		for (i=k+1; i < n; i++) {
100
			a[i][k] /= a[k][k];
101
			for (j=k+1; j < n; j++)
102
				a[i][j] -= a[i][k] * a[k][j];
103
		}
104
	}
105
	#endif
106
}
107

108
/*
109
 * the matrix a is an in-place lower/upper triangular matrix
110
 * the following macros split them into their constituents
111
 */
112

113
#define LOWER(a, i, j)		((i > j) ? a[i][j] : 0)
114
#define UPPER(a, i, j)		((i <= j) ? a[i][j] : 0)
115

116
/*
117
 * LU forward and backward substitution from the pseudocode given
118
 * in the CLR 'Introduction to Algorithms' textbook. It solves ax = b
119
 * where, 'a' is an in-place lower/upper triangular matrix. The vector
120
 * 'x' carries the solution vector. 'p' is the permutation vector. The
121
 * 'spd' flag indicates that 'a' is symmetric and positive definite
122
 */
123

124
void lusolve(double **a, int n, int *p, double *b, double *x, int spd)
125
{
126
	#if(MATHACCEL == MA_INTEL)
127
	int one = 1, info = 0;
128
	cblas_dcopy(n, b, 1, x, 1);
129
	if (!spd)
130
		dgetrs("T", &n, &one, a[0], &n, p, x, &n, &info);
131
	else
132
		dpotrs("U", &n, &one, a[0], &n, x, &n, &info);
133
	assert(info == 0);
134
	#elif(MATHACCEL == MA_AMD)
135
	int one = 1, info = 0;
136
	dcopy(n, b, 1, x, 1);
137
	if (!spd)
138
		dgetrs_("T", &n, &one, a[0], &n, p, x, &n, &info, 1);
139
	else
140
		dpotrs_("U", &n, &one, a[0], &n, x, &n, &info, 1);
141
	assert(info == 0);
142
	#elif(MATHACCEL == MA_APPLE)
143
	int one = 1, info = 0;
144
	cblas_dcopy(n, b, 1, x, 1);
145
	if (!spd)
146
		dgetrs_("T", (__CLPK_integer *)&n, (__CLPK_integer *)&one, a[0],
147
				(__CLPK_integer *)&n, (__CLPK_integer *)p, x,
148
				(__CLPK_integer *)&n, (__CLPK_integer *)&info);
149
	else
150
		dpotrs_("U", (__CLPK_integer *)&n, (__CLPK_integer *)&one, a[0],
151
				(__CLPK_integer *)&n, x, (__CLPK_integer *)&n,
152
				(__CLPK_integer *)&info);
153
	assert(info == 0);
154
	#elif(MATHACCEL == MA_SUN)
155
	int one = 1, info = 0;
156
	dcopy(n, b, 1, x, 1);
157
	if (!spd)
158
		dgetrs_("T", &n, &one, a[0], &n, p, x, &n, &info);
159
	else
160
		dpotrs_("U", &n, &one, a[0], &n, x, &n, &info);
161
	assert(info == 0);
162
	#else
163
	int i, j;
164
	double *y = dvector (n);
165
	double sum;
166

167
	/* forward substitution	- solves ly = pb	*/
168
	for (i=0; i < n; i++) {
169
		for (j=0, sum=0; j < i; j++)
170
			sum += y[j] * LOWER(a, i, j);
171
		y[i] = b[p[i]] - sum;
172
	}
173

174
	/* backward substitution - solves ux = y	*/
175
	for (i=n-1; i >= 0; i--) {
176
		for (j=i+1, sum=0; j < n; j++)
177
			sum += x[j] * UPPER(a, i, j);
178
		x[i] = (y[i] - sum) / UPPER(a, i, i);
179
	}
180

181
	free_dvector(y);
182
	#endif
183
}
184

185
#if SUPERLU > 0
186
// Builds the matrix A = (1/h)C + G for the Backward Euler Method
187
int build_A_matrix(SuperMatrix *G, diagonal_matrix_t *C, double h, SuperMatrix *A)
188
{
189
  NCformat *Astore;
190
  int i, j;
191
  int n = C->n;
192
  double *a;
193
  int *asub, *xa;
194
  int flag = 1;
195

196
  NCformat *Gstore = G->Store;
197
  int nrow = G->nrow;
198
  int ncol = G->ncol;
199
  int nnz = Gstore->nnz;
200

201
  // Copy G into A
202
  double *nzval;
203
  int *rowind;
204
  int *colptr;
205
  if ( !(nzval = doubleMalloc(nnz)) ) fatal("Malloc fails for a[].\n");
206
  if ( !(rowind = intMalloc(nnz)) ) fatal("Malloc fails for asub[].\n");
207
  if ( !(colptr = intMalloc(n+1)) ) fatal("Malloc fails for xa[].\n");
208

209

210
  dCreate_CompCol_Matrix(A, nrow, ncol, nnz, nzval, rowind,
211
                         colptr, SLU_NC, SLU_D, SLU_GE);
212

213

214
  dCopy_CompCol_Matrix(G, A);
215

216
  Astore = A->Store;
217
  n = A->ncol;
218
  a = Astore->nzval;
219
  asub = Astore->rowind;
220
  xa = Astore->colptr;
221

222
  double *C_vals = C->vals;
223

224
  for(i=0; i<n; i++){
225
      flag = 1;
226
      j = xa[i];
227
      while(j<xa[i+1]){
228
          if(asub[j] == i){
229
              //fprintf(stderr, "i = %d, j = %d, a[%d]  = %e + (1.0 / %e) * %e = ", i, j, j, a[j], h, C_vals[i]);
230
              a[j] += (1.0 / h)*C_vals[i];
231
              //fprintf(stderr, "%e\n", a[j]);
232
              flag = 0;
233
          }
234
          j++;
235
      }
236
      if(flag)
237
        return 0;
238
  }
239
  return 1;
240
}
241

242
// Builds the matrix B = (1/h)CT + P for the Backward Euler method
243
int build_B_matrix(diagonal_matrix_t *C, double *T, double *P, double h, SuperMatrix *B)
244
{
245
  int i, n = C->n;
246
  double *B_temp = dvector(n);
247
  double *C_vals = C->vals;
248

249
  for(i = 0; i < n; i++) {
250
    B_temp[i] = (1.0 / h) * C_vals[i] * T[i] + P[i];
251
  }
252

253
  dCreate_Dense_Matrix(B, n, 1, B_temp, n, SLU_DN, SLU_D, SLU_GE);
254

255
  return 1;
256
}
257

258
/*
259
 * Backward Euler solver with adaptive step sizing.
260
 * It takes as input the conductance matrix G, capacitances C,
261
 * temperatures T and power P at time t, and solves the ODE GT + CdT = P
262
 * between t and t+h. It returns the correct step size to be used next time.
263
 */
264
#define BACKWARD_EULER_SAFETY    0.95
265
#define BACKWARD_EULER_MAXUP     5.0
266
#define BACKWARD_EULER_MAXDOWN   10.0
267
#define BACKWARD_EULER_PRECISION 0.01
268
double backward_euler(SuperMatrix *G, diagonal_matrix_t *C, double *T, double *P, double *h, double *Tout)
269
{
270
  ////////////////// Set Options and Statistics //////////////////////////
271
  SuperMatrix L, U;
272
  int *perm_r; // row permutations from partial pivoting
273
  int *perm_c; // column permutation vector
274
  int info, n = C->n;
275
  double max, new_h = (*h);
276
  superlu_options_t options;
277
  SuperLUStat_t stat;
278
  DNformat     *Bstore;
279
  double       *dp;
280

281
  // TODO: Implement re-use of perm_c
282
  if ( !(perm_r = intMalloc(n)) ) fatal("Malloc fails for perm_r[].\n");
283
  if ( !(perm_c = intMalloc(n)) ) fatal("Malloc fails for perm_c[].\n");
284

285
  set_default_options(&options);
286

287
  // Initialize the statistics variables.
288
  StatInit(&stat);
289

290
  SuperMatrix *A, *B;
291
  int i;
292
  double *Ttemp = dvector(n);
293
  double *T1    = dvector(n);
294
  double *T2    = dvector(n);
295

296

297
  A = calloc(1, sizeof(SuperMatrix));
298
  B = calloc(1, sizeof(SuperMatrix));
299

300
  int flag = 1;
301

302
  if(build_A_matrix(G, C, *h, A) != 1)
303
    fatal("error\n");
304

305
  if(build_B_matrix(C, T, P, *h, B) != 1)
306
    fatal("error\n");
307

308
  dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
309

310
  // Copy results into Ttemp
311
  Bstore = (DNformat *) B->Store;
312
  dp = (double *) Bstore->nzval;
313
  for(i = 0; i < n; i++) {
314
    Ttemp[i] = dp[i];
315
    //fprintf(stderr, "  Ttemp[%d] = %e\n", i, dp[i]);
316
  }
317

318
  // TEST
319
  /*
320
  do {
321
    (*h) = new_h;
322

323
    ////////////////////// Evaluate once with step size h ////////////////////
324
    // Build the matrix A = (1/h)C + G
325
    if(build_A_matrix(G, C, *h, A) != 1)
326
      fatal("In backward_euler(): Failed to build A matrix\n");
327

328
    // Build the matrix B = (1/h)CT + P
329
    if(build_B_matrix(C, T, P, *h, B) != 1)
330
      fatal("In backward_euler(): Failed to build B matrix\n");
331

332
    // Solve the linear system Ax = B
333
    dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
334

335
    // Copy results into Ttemp
336
    Bstore = (DNformat *) B->Store;
337
    dp = (double *) Bstore->nzval;
338
    for(i = 0; i < n; i++) {
339
      Ttemp[i] = dp[i];
340
      //fprintf(stderr, "  Ttemp[%d] = %e\n", i, dp[i]);
341
    }
342

343
    ////////////////////// Repeat as two steps of size h/2 ////////////////////
344
    // Build the matrix A = (2/h)C + G
345
    if(build_A_matrix(G, C, (*h)/2, A) != 1)
346
      fatal("In backward_euler(): Failed to build A matrix\n");
347

348
    // Build the matrix B = (2/h)CT + P
349
    if(build_B_matrix(C, T, P, (*h)/2, B) != 1)
350
      fatal("In backward_euler(): Failed to build B matrix\n");
351

352
    // Solve the linear system Ax = B
353
    dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
354

355
    // Copy results into T1
356
    Bstore = (DNformat *) B->Store;
357
    dp = (double *) Bstore->nzval;
358
    for(i = 0; i < n; i++) {
359
      T1[i] = dp[i];
360
      //fprintf(stderr, "  T1[%d] = %e\n", i, T1[i]);
361
    }
362

363
    // Rebuild B = (2/h)CT + P using T1
364
    if(build_B_matrix(C, T1, P, (*h)/2, B) != 1)
365
      fatal("In backward_euler(): Failed to build B matrix\n");
366

367
    // Solve the linear system Ax = B
368
    dgssv(&options, A, perm_c, perm_r, &L, &U, B, &stat, &info);
369

370
    // Copy results into T2
371
    Bstore = (DNformat *) B->Store;
372
    dp = (double *) Bstore->nzval;
373
    for(i = 0; i < n; i++) {
374
      T2[i] = dp[i];
375
      //fprintf(stderr, "  T2[%d] = %e\n", i, T2[i]);
376
    }
377

378
    // Find maximum difference between Ttemp and T2
379
    for(i = 0; i < n; i++) {
380
      T1[i] = fabs(Ttemp[i] - T2[i]);
381
      //fprintf(stderr, "    delta[%d] = |%e - %e| = %e\n", i, Ttemp[i], T2[i], T1[i]);
382
    }
383
    max = T1[0];
384
    for(i = 1; i < n; i++) {
385
      if(max < T1[i]) {
386
        max = T1[i];
387
      }
388
    }
389
    //fprintf(stderr, "    max delta = %e\n", max);
390

391
    // Accuracy OK. Increase step size
392
    if(max <= BACKWARD_EULER_PRECISION) {
393
      new_h = BACKWARD_EULER_SAFETY * (*h) * pow(fabs(BACKWARD_EULER_PRECISION/max), 0.2);
394
      if (new_h > BACKWARD_EULER_MAXUP * (*h))
395
        new_h = BACKWARD_EULER_MAXUP * (*h);
396
    // Inaccuracy error. Decrease step size	and compute again
397
      //fprintf(stderr, "  max delta OK; increasing step size to h = %e\n", new_h);
398
      flag = 0;
399
    }
400
    else {
401
      new_h = BACKWARD_EULER_SAFETY * (*h) * pow(fabs(BACKWARD_EULER_PRECISION/max), 0.25);
402
        if (new_h < (*h) / BACKWARD_EULER_MAXDOWN)
403
          new_h = (*h) / BACKWARD_EULER_MAXDOWN;
404

405
      //fprintf(stderr, "  max delta NOT OK; decreasing step size to h = %e\n", new_h);
406
    }
407

408
  } while(new_h < (*h));
409
*/
410
  // Copy temperatures over to output
411
  for(i = 0; i < n; i++) {
412
    Tout[i] = Ttemp[i];
413
    //fprintf(stderr, "  Returning T[%d] = %e\n", i, Tout[i]);
414
  }
415

416
  free_dvector(Ttemp);
417
  free_dvector(T1);
418
  free_dvector(T2);
419
  SUPERLU_FREE (perm_r);
420
  SUPERLU_FREE (perm_c);
421
  Destroy_CompCol_Matrix(A);
422
  Destroy_SuperMatrix_Store(B);
423
  Destroy_SuperNode_Matrix(&L);
424
  Destroy_CompCol_Matrix(&U);
425
  StatFree(&stat);
426

427
  return new_h;
428
}
429
#endif
430

431
/* core of the 4th order Runge-Kutta method, where the Euler step
432
 * (y(n+1) = y(n) + h * k1 where k1 = dydx(n)) is provided as an input.
433
 * to evaluate dydx at different points, a call back function f (slope
434
 * function) is also passed as a parameter. Given values for y, and k1,
435
 * this function advances the solution over an interval h, and returns
436
 * the solution in yout. For details, see the discussion in "Numerical
437
 * Recipes in C", Chapter 16, from
438
 * http://www.nrbook.com/a/bookcpdf/c16-1.pdf
439
 */
440
void rk4_core(void *model, double *y, double *k1, void *p, int n, double h, double *yout, slope_fn_ptr f)
441
{
442
	int i;
443
	double *t, *k2, *k3, *k4;
444
	k2 = dvector(n);
445
	k3 = dvector(n);
446
	k4 = dvector(n);
447
	t = dvector(n);
448

449
	/* k2 is the slope at the trial midpoint (t) found using
450
	 * slope k1 (which is at the starting point).
451
	 */
452
	/* t = y + h/2 * k1 (t = y; t += h/2 * k1) */
453
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
454
	cblas_dcopy(n, y, 1, t, 1);
455
	cblas_daxpy(n, h/2.0, k1, 1, t, 1);
456
	#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
457
	dcopy(n, y, 1, t, 1);
458
	daxpy(n, h/2.0, k1, 1, t, 1);
459
	#else
460
	for(i=0; i < n; i++)
461
		t[i] = y[i] + h/2.0 * k1[i];
462
	#endif
463
	/* k2 = slope at t */
464
	(*f)(model, t, p, k2);
465

466
	/* k3 is the slope at the trial midpoint (t) found using
467
	 * slope k2 found above.
468
	 */
469
	/* t =  y + h/2 * k2 (t = y; t += h/2 * k2) */
470
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
471
	cblas_dcopy(n, y, 1, t, 1);
472
	cblas_daxpy(n, h/2.0, k2, 1, t, 1);
473
	#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
474
	dcopy(n, y, 1, t, 1);
475
	daxpy(n, h/2.0, k2, 1, t, 1);
476
	#else
477
	for(i=0; i < n; i++)
478
		t[i] = y[i] + h/2.0 * k2[i];
479
	#endif
480
	/* k3 = slope at t */
481
	(*f)(model, t, p, k3);
482

483
	/* k4 is the slope at trial endpoint (t) found using
484
	 * slope k3 found above.
485
	 */
486
	/* t =  y + h * k3 (t = y; t += h * k3) */
487
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
488
	cblas_dcopy(n, y, 1, t, 1);
489
	cblas_daxpy(n, h, k3, 1, t, 1);
490
	#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
491
	dcopy(n, y, 1, t, 1);
492
	daxpy(n, h, k3, 1, t, 1);
493
	#else
494
	for(i=0; i < n; i++)
495
		t[i] = y[i] + h * k3[i];
496
	#endif
497
	/* k4 = slope at t */
498
	(*f)(model, t, p, k4);
499

500
	/* yout = y + h*(k1/6 + k2/3 + k3/3 + k4/6)	*/
501
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
502
	/* yout = y	*/
503
	cblas_dcopy(n, y, 1, yout, 1);
504
	/* yout += h*k1/6	*/
505
	cblas_daxpy(n, h/6.0, k1, 1, yout, 1);
506
	/* yout += h*k2/3	*/
507
	cblas_daxpy(n, h/3.0, k2, 1, yout, 1);
508
	/* yout += h*k3/3	*/
509
	cblas_daxpy(n, h/3.0, k3, 1, yout, 1);
510
	/* yout += h*k4/6	*/
511
	cblas_daxpy(n, h/6.0, k4, 1, yout, 1);
512
	#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
513
	dcopy(n, y, 1, yout, 1);
514
	/* yout += h*k1/6	*/
515
	daxpy(n, h/6.0, k1, 1, yout, 1);
516
	/* yout += h*k2/3	*/
517
	daxpy(n, h/3.0, k2, 1, yout, 1);
518
	/* yout += h*k3/3	*/
519
	daxpy(n, h/3.0, k3, 1, yout, 1);
520
	/* yout += h*k4/6	*/
521
	daxpy(n, h/6.0, k4, 1, yout, 1);
522
	#else
523
	for (i =0; i < n; i++)
524
		yout[i] = y[i] + h * (k1[i] + 2*k2[i] + 2*k3[i] + k4[i])/6.0;
525
	#endif
526

527
	free_dvector(k2);
528
	free_dvector(k3);
529
	free_dvector(k4);
530
	free_dvector(t);
531
}
532

533
/*
534
 * 4th order Runge Kutta solver	with adaptive step sizing.
535
 * It integrates and solves the ODE dy + cy = p between
536
 * t and t+h. It returns the correct step size to be used
537
 * next time. slope function f is the call back used to
538
 * evaluate the derivative at each point
539
 */
540
#define RK4_SAFETY		0.95
541
#define RK4_MAXUP		5.0
542
#define RK4_MAXDOWN		10.0
543
#define RK4_PRECISION	0.01
544
double rk4(void *model, double *y, void *p, int n, double *h, double *yout, slope_fn_ptr f)
545
{
546
	int i;
547
	double *k1, *t1, *t2, *ytemp, max, new_h = (*h);
548

549
	k1 = dvector(n);
550
	t1 = dvector(n);
551
	t2 = dvector(n);
552
	ytemp = dvector(n);
553

554
	/* evaluate the slope k1 at the beginning */
555
	(*f)(model, y, p, k1);
556

557
	/* try until accuracy is achieved	*/
558
  do {
559
		(*h) = new_h;
560

561
		/* try RK4 once with normal step size	*/
562
		rk4_core(model, y, k1, p, n, (*h), ytemp, f);
563

564
		/* repeat it with two half-steps	*/
565
		rk4_core(model, y, k1, p, n, (*h)/2.0, t1, f);
566

567
		/* y after 1st half-step is in t1. re-evaluate k1 for this	*/
568
		(*f)(model, t1, p, k1);
569

570
		/* get output of the second half-step in t2	*/
571
		rk4_core(model, t1, k1, p, n, (*h)/2.0, t2, f);
572

573
		/* find the max diff between these two results:
574
		 * use t1 to store the diff
575
		 */
576
		#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
577
		/* t1 = ytemp	*/
578
		cblas_dcopy(n, ytemp, 1, t1, 1);
579
		/* t1 = -1 * t2 + t1 = ytemp - t2	*/
580
		cblas_daxpy(n, -1.0, t2, 1, t1, 1);
581
		/* max = |t1[max_abs_index]|	*/
582
		max = fabs(t1[cblas_idamax(n, t1, 1)]);
583
		#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
584
		/* t1 = ytemp	*/
585
		dcopy(n, ytemp, 1, t1, 1);
586
		/* t1 = -1 * t2 + t1 = ytemp - t2	*/
587
		daxpy(n, -1.0, t2, 1, t1, 1);
588
		/*
589
		 * max = |t1[max_abs_index]| note: FORTRAN BLAS
590
		 * indices start from 1 as opposed to CBLAS where
591
		 * indices start from 0
592
		 */
593
		max = fabs(t1[idamax(n, t1, 1)-1]);
594
		#else
595
		for(i=0; i < n; i++) {
596
			t1[i] = fabs(ytemp[i] - t2[i]);
597
    }
598
		max = t1[0];
599
		for(i=1; i < n; i++)
600
			if (max < t1[i])
601
				max = t1[i];
602
		#endif
603

604
		/*
605
		 * compute the correct step size: see equation
606
		 * 16.2.10  in chapter 16 of "Numerical Recipes
607
		 * in C"
608
		 */
609
		/* accuracy OK. increase step size	*/
610
		if (max <= RK4_PRECISION) {
611
			new_h = RK4_SAFETY * (*h) * pow(fabs(RK4_PRECISION/max), 0.2);
612
			if (new_h > RK4_MAXUP * (*h))
613
				new_h = RK4_MAXUP * (*h);
614
		/* inaccuracy error. decrease step size	and compute again */
615
		} else {
616
			new_h = RK4_SAFETY * (*h) * pow(fabs(RK4_PRECISION/max), 0.25);
617
			if (new_h < (*h) / RK4_MAXDOWN)
618
				new_h = (*h) / RK4_MAXDOWN;
619
		}
620

621
	} while (new_h < (*h));
622

623
	/* commit ytemp to yout	*/
624
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
625
	cblas_dcopy(n, ytemp, 1, yout, 1);
626
	#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
627
	dcopy(n, ytemp, 1, yout, 1);
628
	#else
629
	copy_dvector(yout, ytemp, n);
630
	#endif
631

632
	/* clean up */
633
	free_dvector(k1);
634
	free_dvector(t1);
635
	free_dvector(t2);
636
	free_dvector(ytemp);
637

638
	/* return the step-size	*/
639
	return new_h;
640
}
641

642
/* matmult: C = AB, A, B are n x n square matrices	*/
643
void matmult(double **c, double **a, double **b, int n)
644
{
645
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
646
	cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
647
				n, n, n, 1.0, a[0], n, b[0], n, 0.0, c[0], n);
648
	#elif (MATHACCEL == MA_AMD  || MATHACCEL == MA_SUN)
649
	/* B^T * A^T = (A * B)^T	*/
650
	dgemm('N', 'N', n, n, n, 1.0, b[0], n, a[0], n, 0.0, c[0], n);
651
	#else
652
	int i, j, k;
653

654
	for (i = 0; i < n; i++)
655
		for (j = 0; j < n; j++) {
656
			c[i][j] = 0;
657
			for (k = 0; k < n; k++)
658
				c[i][j] += a[i][k] * b[k][j];
659
		}
660
	#endif
661
}
662

663
/* same as above but 'a' is a diagonal matrix stored as a 1-d array	*/
664
void diagmatmult(double **c, double *a, double **b, int n)
665
{
666
	int i;
667
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
668
	zero_dmatrix(c, n, n);
669
	for(i=0; i < n; i++)
670
		cblas_daxpy(n, a[i], b[i], 1, c[i], 1);
671
	#elif (MATHACCEL == MA_AMD  || MATHACCEL == MA_SUN)
672
	zero_dmatrix(c, n, n);
673
	for(i=0; i < n; i++)
674
		daxpy(n, a[i], b[i], 1, c[i], 1);
675
	#else
676
	int j;
677

678
	for (i = 0; i < n; i++)
679
		for (j = 0; j < n; j++)
680
			c[i][j] = a[i] * b[i][j];
681
	#endif
682
}
683

684
/* mult of an n x n matrix and an n x 1 column vector	*/
685
void matvectmult(double *vout, double **m, double *vin, int n)
686
{
687
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
688
	cblas_dgemv(CblasRowMajor, CblasNoTrans, n, n, 1.0, m[0],
689
				n, vin, 1, 0.0, vout, 1);
690
	#elif (MATHACCEL == MA_AMD  || MATHACCEL == MA_SUN)
691
	dgemv('T', n, n, 1.0, m[0], n, vin, 1, 0.0, vout, 1);
692
	#else
693
	int i, j;
694

695
	for (i = 0; i < n; i++) {
696
		vout[i] = 0;
697
		for (j = 0; j < n; j++)
698
			vout[i] += m[i][j] * vin[j];
699
	}
700
	#endif
701
}
702

703
/* same as above but 'm' is a diagonal matrix stored as a 1-d array	*/
704
void diagmatvectmult(double *vout, double *m, double *vin, int n)
705
{
706
	#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
707
	cblas_dsbmv(CblasRowMajor, CblasUpper, n, 0, 1.0, m, 1, vin,
708
				1, 0.0, vout, 1);
709
	#elif (MATHACCEL == MA_AMD  || MATHACCEL == MA_SUN)
710
	dsbmv('U', n, 0, 1.0, m, 1, vin, 1, 0.0, vout, 1);
711
	#else
712
	int i;
713

714
	for (i = 0; i < n; i++)
715
		vout[i] = m[i] * vin[i];
716
	#endif
717
}
718

719
/*
720
 * inv = m^-1, inv, m are n by n matrices.
721
 * the spd flag indicates that m is symmetric
722
 * and positive definite
723
 */
724
#define BLOCK_SIZE		256
725
void matinv(double **inv, double **m, int n, int spd)
726
{
727
	int *p, lwork;
728
	double *work;
729
	int i, j;
730
	#if (MATHACCEL != MA_NONE)
731
	int info;
732
	#endif
733
	double *col;
734

735
	p = ivector(n);
736
	lwork = n * BLOCK_SIZE;
737
	work = dvector(lwork);
738

739
	#if (MATHACCEL == MA_INTEL)
740
	info = 0;
741
	cblas_dcopy(n*n, m[0], 1, inv[0], 1);
742
	if (!spd) {
743
		dgetrf(&n, &n, inv[0], &n, p, &info);
744
		assert(info == 0);
745
		dgetri(&n, inv[0], &n, p, work, &lwork, &info);
746
		assert(info == 0);
747
	} else {
748
		dpotrf("U", &n, inv[0], &n, &info);
749
		assert(info == 0);
750
		dpotri("U", &n, inv[0], &n, &info);
751
		assert(info == 0);
752
		mirror_dmatrix(inv, n);
753
	}
754
	#elif(MATHACCEL == MA_AMD)
755
	info = 0;
756
	dcopy(n*n, m[0], 1, inv[0], 1);
757
	if (!spd) {
758
		dgetrf_(&n, &n, inv[0], &n, p, &info);
759
		assert(info == 0);
760
		dgetri_(&n, inv[0], &n, p, work, &lwork, &info);
761
		assert(info == 0);
762
	} else {
763
		dpotrf_("U", &n, inv[0], &n, &info, 1);
764
		assert(info == 0);
765
		dpotri_("U", &n, inv[0], &n, &info, 1);
766
		assert(info == 0);
767
		mirror_dmatrix(inv, n);
768
	}
769
	#elif (MATHACCEL == MA_APPLE)
770
	info = 0;
771
	cblas_dcopy(n*n, m[0], 1, inv[0], 1);
772
	if (!spd) {
773
		dgetrf_((__CLPK_integer *)&n, (__CLPK_integer *)&n,
774
				inv[0], (__CLPK_integer *)&n, (__CLPK_integer *)p,
775
				(__CLPK_integer *)&info);
776
		assert(info == 0);
777
		dgetri_((__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
778
				(__CLPK_integer *)p, work, (__CLPK_integer *)&lwork,
779
				(__CLPK_integer *)&info);
780
		assert(info == 0);
781
	} else {
782
		dpotrf_("U", (__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
783
				(__CLPK_integer *)&info);
784
		assert(info == 0);
785
		dpotri_("U", (__CLPK_integer *)&n, inv[0], (__CLPK_integer *)&n,
786
				(__CLPK_integer *)&info);
787
		assert(info == 0);
788
		mirror_dmatrix(inv, n);
789
	}
790
	#elif(MATHACCEL == MA_SUN)
791
	info = 0;
792
	dcopy(n*n, m[0], 1, inv[0], 1);
793
	if (!spd) {
794
		dgetrf_(&n, &n, inv[0], &n, p, &info);
795
		assert(info == 0);
796
		dgetri_(&n, inv[0], &n, p, work, &lwork, &info);
797
		assert(info == 0);
798
	} else {
799
		dpotrf_("U", &n, inv[0], &n, &info);
800
		assert(info == 0);
801
		dpotri_("U", &n, inv[0], &n, &info);
802
		assert(info == 0);
803
		mirror_dmatrix(inv, n);
804
	}
805
	#else
806
	col = dvector(n);
807

808
	lupdcmp(m, n, p, spd);
809

810
	for (j = 0; j < n; j++) {
811
		for (i = 0; i < n; i++) col[i]=0.0;
812
		col[j] = 1.0;
813
		lusolve(m, n, p, col, work, spd);
814
		for (i = 0; i < n; i++) inv[i][j]=work[i];
815
	}
816

817
	free_dvector(col);
818
	#endif
819

820
	free_ivector(p);
821
	free_dvector(work);
822
}
823

824
/* dst = src1 + scale * src2	*/
825
void scaleadd_dvector (double *dst, double *src1, double *src2, int n, double scale)
826
{
827
	#if (MATHACCEL == MA_NONE)
828
	int i;
829
	for(i=0; i < n; i++)
830
		dst[i] = src1[i] + scale * src2[i];
831
	#else
832
	/* dst == src2. so, dst *= scale, dst += src1	*/
833
	if (dst == src2 && dst != src1) {
834
		#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
835
		cblas_dscal(n, scale, dst, 1);
836
		cblas_daxpy(n, 1.0, src1, 1, dst, 1);
837
		#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
838
		dscal(n, scale, dst, 1);
839
		daxpy(n, 1.0, src1, 1, dst, 1);
840
		#else
841
		fatal("unknown math acceleration\n");
842
		#endif
843
	/* dst = src1; dst += scale * src2	*/
844
	} else {
845
		#if (MATHACCEL == MA_INTEL || MATHACCEL == MA_APPLE)
846
		cblas_dcopy(n, src1, 1, dst, 1);
847
		cblas_daxpy(n, scale, src2, 1, dst, 1);
848
		#elif (MATHACCEL == MA_AMD || MATHACCEL == MA_SUN)
849
		dcopy(n, src1, 1, dst, 1);
850
		daxpy(n, scale, src2, 1, dst, 1);
851
		#else
852
		fatal("unknown math acceleration\n");
853
		#endif
854
	}
855
	#endif
856
}
857

858
Product

Resources

Company