Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdnapps.f
5194 views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdnapps
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c Given the Arnoldi factorization
10
c
11
c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
12
c
13
c apply NP implicit shifts resulting in
14
c
15
c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
16
c
17
c where Q is an orthogonal matrix which is the product of rotations
18
c and reflections resulting from the NP bulge chage sweeps.
19
c The updated Arnoldi factorization becomes:
20
c
21
c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
22
c
23
c\Usage:
24
c call pdnapps
25
c ( COMM, N, KEV, NP, SHIFTR, SHIFTI, V, LDV, H, LDH, RESID, Q, LDQ,
26
c WORKL, WORKD )
27
c
28
c\Arguments
29
c COMM MPI Communicator for the processor grid. (INPUT)
30
c
31
c N Integer. (INPUT)
32
c Problem size, i.e. size of matrix A.
33
c
34
c KEV Integer. (INPUT/OUTPUT)
35
c KEV+NP is the size of the input matrix H.
36
c KEV is the size of the updated matrix HNEW. KEV is only
37
c updated on ouput when fewer than NP shifts are applied in
38
c order to keep the conjugate pair together.
39
c
40
c NP Integer. (INPUT)
41
c Number of implicit shifts to be applied.
42
c
43
c SHIFTR, Double precision array of length NP. (INPUT)
44
c SHIFTI Real and imaginary part of the shifts to be applied.
45
c Upon, entry to pdnapps, the shifts must be sorted so that the
46
c conjugate pairs are in consecutive locations.
47
c
48
c V Double precision N by (KEV+NP) array. (INPUT/OUTPUT)
49
c On INPUT, V contains the current KEV+NP Arnoldi vectors.
50
c On OUTPUT, V contains the updated KEV Arnoldi vectors
51
c in the first KEV columns of V.
52
c
53
c LDV Integer. (INPUT)
54
c Leading dimension of V exactly as declared in the calling
55
c program.
56
c
57
c H Double precision (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT)
58
c On INPUT, H contains the current KEV+NP by KEV+NP upper
59
c Hessenber matrix of the Arnoldi factorization.
60
c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
61
c matrix in the KEV leading submatrix.
62
c
63
c LDH Integer. (INPUT)
64
c Leading dimension of H exactly as declared in the calling
65
c program.
66
c
67
c RESID Double precision array of length N. (INPUT/OUTPUT)
68
c On INPUT, RESID contains the the residual vector r_{k+p}.
69
c On OUTPUT, RESID is the update residual vector rnew_{k}
70
c in the first KEV locations.
71
c
72
c Q Double precision KEV+NP by KEV+NP work array. (WORKSPACE)
73
c Work array used to accumulate the rotations and reflections
74
c during the bulge chase sweep.
75
c
76
c LDQ Integer. (INPUT)
77
c Leading dimension of Q exactly as declared in the calling
78
c program.
79
c
80
c WORKL Double precision work array of length (KEV+NP). (WORKSPACE)
81
c Private (replicated) array on each PE or array allocated on
82
c the front end.
83
c
84
c WORKD Double precision work array of length 2*N. (WORKSPACE)
85
c Distributed array used in the application of the accumulated
86
c orthogonal matrix Q.
87
c
88
c\EndDoc
89
c
90
c-----------------------------------------------------------------------
91
c
92
c\BeginLib
93
c
94
c\Local variables:
95
c xxxxxx real
96
c
97
c\References:
98
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
99
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
100
c pp 357-385.
101
c
102
c\Routines called:
103
c dlabad LAPACK routine that computes machine constants.
104
c dlacpy LAPACK matrix copy routine.
105
c pdlamch ScaLAPACK routine that determines machine constants.
106
c dlanhs LAPACK routine that computes various norms of a matrix.
107
c dlapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
108
c dlarf LAPACK routine that applies Householder reflection to
109
c a matrix.
110
c dlarfg LAPACK Householder reflection construction routine.
111
c dlartg LAPACK Givens rotation construction routine.
112
c dlaset LAPACK matrix initialization routine.
113
c dgemv Level 2 BLAS routine for matrix vector multiplication.
114
c daxpy Level 1 BLAS that computes a vector triad.
115
c dcopy Level 1 BLAS that copies one vector to another .
116
c dscal Level 1 BLAS that scales a vector.
117
c
118
c\Author
119
c Danny Sorensen Phuong Vu
120
c Richard Lehoucq CRPC / Rice University
121
c Dept. of Computational & Houston, Texas
122
c Applied Mathematics
123
c Rice University
124
c Houston, Texas
125
c
126
c\Parallel Modifications
127
c Kristi Maschhoff
128
c
129
c\Revision history:
130
c Starting Point: Serial Code FILE: napps.F SID: 2.2
131
c
132
c\SCCS Information:
133
c FILE: napps.F SID: 1.5 DATE OF SID: 03/19/97
134
c
135
c\Remarks
136
c 1. In this version, each shift is applied to all the sublocks of
137
c the Hessenberg matrix H and not just to the submatrix that it
138
c comes from. Deflation as in LAPACK routine dlahqr (QR algorithm
139
c for upper Hessenberg matrices ) is used.
140
c The subdiagonals of H are enforced to be non-negative.
141
c
142
c\EndLib
143
c
144
c-----------------------------------------------------------------------
145
c
146
subroutine pdnapps
147
& ( comm, n, kev, np, shiftr, shifti, v, ldv, h, ldh, resid,
148
& q, ldq, workl, workd )
149
c
150
c %--------------------%
151
c | MPI Communicator |
152
c %--------------------%
153
c
154
integer comm
155
c
156
c %----------------------------------------------------%
157
c | Include files for debugging and timing information |
158
c %----------------------------------------------------%
159
c
160
include 'debug.h'
161
include 'stat.h'
162
c
163
c %------------------%
164
c | Scalar Arguments |
165
c %------------------%
166
c
167
integer kev, ldh, ldq, ldv, n, np
168
c
169
c %-----------------%
170
c | Array Arguments |
171
c %-----------------%
172
c
173
Double precision
174
& h(ldh,kev+np), resid(n), shifti(np), shiftr(np),
175
& v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
176
c
177
c %------------%
178
c | Parameters |
179
c %------------%
180
c
181
Double precision
182
& one, zero
183
parameter (one = 1.0, zero = 0.0)
184
c
185
c %------------------------%
186
c | Local Scalars & Arrays |
187
c %------------------------%
188
c
189
integer i, iend, ir, istart, j, jj, kplusp, msglvl, nr
190
logical cconj, first
191
Double precision
192
& c, f, g, h11, h12, h21, h22, h32, ovfl, r, s, sigmai,
193
& sigmar, smlnum, ulp, unfl, u(3), t, tau, tst1
194
save first, ovfl, smlnum, ulp, unfl
195
c
196
c %----------------------%
197
c | External Subroutines |
198
c %----------------------%
199
c
200
external daxpy, dcopy, dscal, dlacpy, dlarf, dlarfg, dlartg,
201
& dlaset, dlabad, second, pivout, pdvout, pdmout
202
c
203
c %--------------------%
204
c | External Functions |
205
c %--------------------%
206
c
207
Double precision
208
& pdlamch10, dlanhs, dlapy2
209
external pdlamch10, dlanhs, dlapy2
210
c
211
c %----------------------%
212
c | Intrinsics Functions |
213
c %----------------------%
214
c
215
intrinsic abs, max, min
216
c
217
c %----------------%
218
c | Data statments |
219
c %----------------%
220
c
221
data first / .true. /
222
c
223
c %-----------------------%
224
c | Executable Statements |
225
c %-----------------------%
226
c
227
if (first) then
228
c
229
c %-----------------------------------------------%
230
c | Set machine-dependent constants for the |
231
c | stopping criterion. If norm(H) <= sqrt(OVFL), |
232
c | overflow should not occur. |
233
c | REFERENCE: LAPACK subroutine dlahqr |
234
c %-----------------------------------------------%
235
c
236
unfl = pdlamch10( comm, 'safe minimum' )
237
ovfl = one / unfl
238
call dlabad( unfl, ovfl )
239
ulp = pdlamch10( comm, 'precision' )
240
smlnum = unfl*( n / ulp )
241
first = .false.
242
end if
243
c
244
c %-------------------------------%
245
c | Initialize timing statistics |
246
c | & message level for debugging |
247
c %-------------------------------%
248
c
249
call second (t0)
250
msglvl = mnapps
251
c
252
kplusp = kev + np
253
c
254
c %--------------------------------------------%
255
c | Initialize Q to the identity to accumulate |
256
c | the rotations and reflections |
257
c %--------------------------------------------%
258
c
259
call dlaset ('All', kplusp, kplusp, zero, one, q, ldq)
260
c
261
c %----------------------------------------------%
262
c | Quick return if there are no shifts to apply |
263
c %----------------------------------------------%
264
c
265
if (np .eq. 0) go to 9000
266
c
267
c %----------------------------------------------%
268
c | Chase the bulge with the application of each |
269
c | implicit shift. Each shift is applied to the |
270
c | whole matrix including each block. |
271
c %----------------------------------------------%
272
c
273
cconj = .false.
274
do 110 jj = 1, np
275
sigmar = shiftr(jj)
276
sigmai = shifti(jj)
277
c
278
if (msglvl .gt. 2 ) then
279
call pivout (comm, logfil, 1, [jj], ndigit,
280
& '_napps: shift number.')
281
call pdvout (comm, logfil, 1, [sigmar], ndigit,
282
& '_napps: The real part of the shift ')
283
call pdvout (comm, logfil, 1, [sigmai], ndigit,
284
& '_napps: The imaginary part of the shift ')
285
end if
286
c
287
c %-------------------------------------------------%
288
c | The following set of conditionals is necessary |
289
c | in order that complex conjugate pairs of shifts |
290
c | are applied together or not at all. |
291
c %-------------------------------------------------%
292
c
293
if ( cconj ) then
294
c
295
c %-----------------------------------------%
296
c | cconj = .true. means the previous shift |
297
c | had non-zero imaginary part. |
298
c %-----------------------------------------%
299
c
300
cconj = .false.
301
go to 110
302
else if ( jj .lt. np .and. abs( sigmai ) .gt. zero ) then
303
c
304
c %------------------------------------%
305
c | Start of a complex conjugate pair. |
306
c %------------------------------------%
307
c
308
cconj = .true.
309
else if ( jj .eq. np .and. abs( sigmai ) .gt. zero ) then
310
c
311
c %----------------------------------------------%
312
c | The last shift has a nonzero imaginary part. |
313
c | Don't apply it; thus the order of the |
314
c | compressed H is order KEV+1 since only np-1 |
315
c | were applied. |
316
c %----------------------------------------------%
317
c
318
kev = kev + 1
319
go to 110
320
end if
321
istart = 1
322
20 continue
323
c
324
c %--------------------------------------------------%
325
c | if sigmai = 0 then |
326
c | Apply the jj-th shift ... |
327
c | else |
328
c | Apply the jj-th and (jj+1)-th together ... |
329
c | (Note that jj < np at this point in the code) |
330
c | end |
331
c | to the current block of H. The next do loop |
332
c | determines the current block ; |
333
c %--------------------------------------------------%
334
c
335
do 30 i = istart, kplusp-1
336
c
337
c %----------------------------------------%
338
c | Check for splitting and deflation. Use |
339
c | a standard test as in the QR algorithm |
340
c | REFERENCE: LAPACK subroutine dlahqr |
341
c %----------------------------------------%
342
c
343
tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
344
if( tst1.eq.zero )
345
& tst1 = dlanhs( '1', kplusp-jj+1, h, ldh, workl )
346
if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) ) then
347
if (msglvl .gt. 0) then
348
call pivout (comm, logfil, 1, [i], ndigit,
349
& '_napps: matrix splitting at row/column no.')
350
call pivout (comm, logfil, 1, [jj], ndigit,
351
& '_napps: matrix splitting with shift number.')
352
call pdvout (comm, logfil, 1, h(i+1,i), ndigit,
353
& '_napps: off diagonal element.')
354
end if
355
iend = i
356
h(i+1,i) = zero
357
go to 40
358
end if
359
30 continue
360
iend = kplusp
361
40 continue
362
c
363
if (msglvl .gt. 2) then
364
call pivout (comm, logfil, 1, [istart], ndigit,
365
& '_napps: Start of current block ')
366
call pivout (comm, logfil, 1, [iend], ndigit,
367
& '_napps: End of current block ')
368
end if
369
c
370
c %------------------------------------------------%
371
c | No reason to apply a shift to block of order 1 |
372
c %------------------------------------------------%
373
c
374
if ( istart .eq. iend ) go to 100
375
c
376
c %------------------------------------------------------%
377
c | If istart + 1 = iend then no reason to apply a |
378
c | complex conjugate pair of shifts on a 2 by 2 matrix. |
379
c %------------------------------------------------------%
380
c
381
if ( istart + 1 .eq. iend .and. abs( sigmai ) .gt. zero )
382
& go to 100
383
c
384
h11 = h(istart,istart)
385
h21 = h(istart+1,istart)
386
if ( abs( sigmai ) .le. zero ) then
387
c
388
c %---------------------------------------------%
389
c | Real-valued shift ==> apply single shift QR |
390
c %---------------------------------------------%
391
c
392
f = h11 - sigmar
393
g = h21
394
c
395
do 80 i = istart, iend-1
396
c
397
c %-----------------------------------------------------%
398
c | Contruct the plane rotation G to zero out the bulge |
399
c %-----------------------------------------------------%
400
c
401
call dlartg (f, g, c, s, r)
402
if (i .gt. istart) then
403
c
404
c %-------------------------------------------%
405
c | The following ensures that h(1:iend-1,1), |
406
c | the first iend-2 off diagonal of elements |
407
c | H, remain non negative. |
408
c %-------------------------------------------%
409
c
410
if (r .lt. zero) then
411
r = -r
412
c = -c
413
s = -s
414
end if
415
h(i,i-1) = r
416
h(i+1,i-1) = zero
417
end if
418
c
419
c %---------------------------------------------%
420
c | Apply rotation to the left of H; H <- G'*H |
421
c %---------------------------------------------%
422
c
423
do 50 j = i, kplusp
424
t = c*h(i,j) + s*h(i+1,j)
425
h(i+1,j) = -s*h(i,j) + c*h(i+1,j)
426
h(i,j) = t
427
50 continue
428
c
429
c %---------------------------------------------%
430
c | Apply rotation to the right of H; H <- H*G |
431
c %---------------------------------------------%
432
c
433
do 60 j = 1, min(i+2,iend)
434
t = c*h(j,i) + s*h(j,i+1)
435
h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
436
h(j,i) = t
437
60 continue
438
c
439
c %----------------------------------------------------%
440
c | Accumulate the rotation in the matrix Q; Q <- Q*G |
441
c %----------------------------------------------------%
442
c
443
do 70 j = 1, min( i+jj, kplusp )
444
t = c*q(j,i) + s*q(j,i+1)
445
q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
446
q(j,i) = t
447
70 continue
448
c
449
c %---------------------------%
450
c | Prepare for next rotation |
451
c %---------------------------%
452
c
453
if (i .lt. iend-1) then
454
f = h(i+1,i)
455
g = h(i+2,i)
456
end if
457
80 continue
458
c
459
c %-----------------------------------%
460
c | Finished applying the real shift. |
461
c %-----------------------------------%
462
c
463
else
464
c
465
c %----------------------------------------------------%
466
c | Complex conjugate shifts ==> apply double shift QR |
467
c %----------------------------------------------------%
468
c
469
h12 = h(istart,istart+1)
470
h22 = h(istart+1,istart+1)
471
h32 = h(istart+2,istart+1)
472
c
473
c %---------------------------------------------------------%
474
c | Compute 1st column of (H - shift*I)*(H - conj(shift)*I) |
475
c %---------------------------------------------------------%
476
c
477
s = 2.0*sigmar
478
t = dlapy2 ( sigmar, sigmai )
479
u(1) = ( h11 * (h11 - s) + t * t ) / h21 + h12
480
u(2) = h11 + h22 - s
481
u(3) = h32
482
c
483
do 90 i = istart, iend-1
484
c
485
nr = min ( 3, iend-i+1 )
486
c
487
c %-----------------------------------------------------%
488
c | Construct Householder reflector G to zero out u(1). |
489
c | G is of the form I - tau*( 1 u )' * ( 1 u' ). |
490
c %-----------------------------------------------------%
491
c
492
call dlarfg ( nr, u(1), u(2), 1, tau )
493
c
494
if (i .gt. istart) then
495
h(i,i-1) = u(1)
496
h(i+1,i-1) = zero
497
if (i .lt. iend-1) h(i+2,i-1) = zero
498
end if
499
u(1) = one
500
c
501
c %--------------------------------------%
502
c | Apply the reflector to the left of H |
503
c %--------------------------------------%
504
c
505
call dlarf ('Left', nr, kplusp-i+1, u, 1, tau,
506
& h(i,i), ldh, workl)
507
c
508
c %---------------------------------------%
509
c | Apply the reflector to the right of H |
510
c %---------------------------------------%
511
c
512
ir = min ( i+3, iend )
513
call dlarf ('Right', ir, nr, u, 1, tau,
514
& h(1,i), ldh, workl)
515
c
516
c %-----------------------------------------------------%
517
c | Accumulate the reflector in the matrix Q; Q <- Q*G |
518
c %-----------------------------------------------------%
519
c
520
call dlarf ('Right', kplusp, nr, u, 1, tau,
521
& q(1,i), ldq, workl)
522
c
523
c %----------------------------%
524
c | Prepare for next reflector |
525
c %----------------------------%
526
c
527
if (i .lt. iend-1) then
528
u(1) = h(i+1,i)
529
u(2) = h(i+2,i)
530
if (i .lt. iend-2) u(3) = h(i+3,i)
531
end if
532
c
533
90 continue
534
c
535
c %--------------------------------------------%
536
c | Finished applying a complex pair of shifts |
537
c | to the current block |
538
c %--------------------------------------------%
539
c
540
end if
541
c
542
100 continue
543
c
544
c %---------------------------------------------------------%
545
c | Apply the same shift to the next block if there is any. |
546
c %---------------------------------------------------------%
547
c
548
istart = iend + 1
549
if (iend .lt. kplusp) go to 20
550
c
551
c %---------------------------------------------%
552
c | Loop back to the top to get the next shift. |
553
c %---------------------------------------------%
554
c
555
110 continue
556
c
557
c %--------------------------------------------------%
558
c | Perform a similarity transformation that makes |
559
c | sure that H will have non negative sub diagonals |
560
c %--------------------------------------------------%
561
c
562
do 120 j=1,kev
563
if ( h(j+1,j) .lt. zero ) then
564
call dscal( kplusp-j+1, -one, h(j+1,j), ldh )
565
call dscal( min(j+2, kplusp), -one, h(1,j+1), 1 )
566
call dscal( min(j+np+1,kplusp), -one, q(1,j+1), 1 )
567
end if
568
120 continue
569
c
570
do 130 i = 1, kev
571
c
572
c %--------------------------------------------%
573
c | Final check for splitting and deflation. |
574
c | Use a standard test as in the QR algorithm |
575
c | REFERENCE: LAPACK subroutine dlahqr |
576
c %--------------------------------------------%
577
c
578
tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
579
if( tst1.eq.zero )
580
& tst1 = dlanhs( '1', kev, h, ldh, workl )
581
if( h( i+1,i ) .le. max( ulp*tst1, smlnum ) )
582
& h(i+1,i) = zero
583
130 continue
584
c
585
c %-------------------------------------------------%
586
c | Compute the (kev+1)-st column of (V*Q) and |
587
c | temporarily store the result in WORKD(N+1:2*N). |
588
c | This is needed in the residual update since we |
589
c | cannot GUARANTEE that the corresponding entry |
590
c | of H would be zero as in exact arithmetic. |
591
c %-------------------------------------------------%
592
c
593
if (h(kev+1,kev) .gt. zero)
594
& call dgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
595
& workd(n+1), 1)
596
c
597
c %----------------------------------------------------------%
598
c | Compute column 1 to kev of (V*Q) in backward order |
599
c | taking advantage of the upper Hessenberg structure of Q. |
600
c %----------------------------------------------------------%
601
c
602
do 140 i = 1, kev
603
call dgemv ('N', n, kplusp-i+1, one, v, ldv,
604
& q(1,kev-i+1), 1, zero, workd, 1)
605
call dcopy (n, workd, 1, v(1,kplusp-i+1), 1)
606
140 continue
607
c
608
c %-------------------------------------------------%
609
c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
610
c %-------------------------------------------------%
611
c
612
call dlacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
613
c
614
c %--------------------------------------------------------------%
615
c | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
616
c %--------------------------------------------------------------%
617
c
618
if (h(kev+1,kev) .gt. zero)
619
& call dcopy (n, workd(n+1), 1, v(1,kev+1), 1)
620
c
621
c %---------------------------------------%
622
c | Update the residual vector: |
623
c | r <- sigmak*r + betak*v(:,kev+1) |
624
c | where |
625
c | sigmak = (e_{kplusp}'*Q)*e_{kev} |
626
c | betak = e_{kev+1}'*H*e_{kev} |
627
c %---------------------------------------%
628
c
629
call dscal (n, q(kplusp,kev), resid, 1)
630
if (h(kev+1,kev) .gt. zero)
631
& call daxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
632
c
633
if (msglvl .gt. 1) then
634
call pdvout (comm, logfil, 1, q(kplusp,kev), ndigit,
635
& '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
636
call pdvout (comm, logfil, 1, h(kev+1,kev), ndigit,
637
& '_napps: betak = e_{kev+1}^T*H*e_{kev}')
638
call pivout (comm, logfil, 1, [kev], ndigit,
639
& '_napps: Order of the final Hessenberg matrix ')
640
if (msglvl .gt. 2) then
641
call pdmout (comm, logfil, kev, kev, h, ldh, ndigit,
642
& '_napps: updated Hessenberg matrix H for next iteration')
643
end if
644
c
645
end if
646
c
647
9000 continue
648
call second (t1)
649
tnapps = tnapps + (t1 - t0)
650
c
651
return
652
c
653
c %----------------%
654
c | End of pdnapps |
655
c %----------------%
656
c
657
end
658
659