Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pcnapps.f
5177 views
1
c\BeginDoc
2
c
3
c\Name: pcnapps
4
c
5
c Message Passing Layer: MPI
6
c
7
c\Description:
8
c Given the Arnoldi factorization
9
c
10
c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
11
c
12
c apply NP implicit shifts resulting in
13
c
14
c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
15
c
16
c where Q is an orthogonal matrix which is the product of rotations
17
c and reflections resulting from the NP bulge change sweeps.
18
c The updated Arnoldi factorization becomes:
19
c
20
c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
21
c
22
c\Usage:
23
c call pcnapps
24
c ( COMM, N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ,
25
c WORKL, WORKD )
26
c
27
c\Arguments
28
c COMM MPI Communicator for the processor grid. (INPUT)
29
c
30
c N Integer. (INPUT)
31
c Problem size, i.e. size of matrix A.
32
c
33
c KEV Integer. (INPUT/OUTPUT)
34
c KEV+NP is the size of the input matrix H.
35
c KEV is the size of the updated matrix HNEW.
36
c
37
c NP Integer. (INPUT)
38
c Number of implicit shifts to be applied.
39
c
40
c SHIFT Complex array of length NP. (INPUT)
41
c The shifts to be applied.
42
c
43
c V Complex N by (KEV+NP) array. (INPUT/OUTPUT)
44
c On INPUT, V contains the current KEV+NP Arnoldi vectors.
45
c On OUTPUT, V contains the updated KEV Arnoldi vectors
46
c in the first KEV columns of V.
47
c
48
c LDV Integer. (INPUT)
49
c Leading dimension of V exactly as declared in the calling
50
c program.
51
c
52
c H Complex (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT)
53
c On INPUT, H contains the current KEV+NP by KEV+NP upper
54
c Hessenberg matrix of the Arnoldi factorization.
55
c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
56
c matrix in the KEV leading submatrix.
57
c
58
c LDH Integer. (INPUT)
59
c Leading dimension of H exactly as declared in the calling
60
c program.
61
c
62
c RESID Complex array of length N. (INPUT/OUTPUT)
63
c On INPUT, RESID contains the the residual vector r_{k+p}.
64
c On OUTPUT, RESID is the update residual vector rnew_{k}
65
c in the first KEV locations.
66
c
67
c Q Complex KEV+NP by KEV+NP work array. (WORKSPACE)
68
c Work array used to accumulate the rotations and reflections
69
c during the bulge chase sweep.
70
c
71
c LDQ Integer. (INPUT)
72
c Leading dimension of Q exactly as declared in the calling
73
c program.
74
c
75
c WORKL Complex work array of length (KEV+NP). (WORKSPACE)
76
c Private (replicated) array on each PE or array allocated on
77
c the front end.
78
c
79
c WORKD Complex work array of length 2*N. (WORKSPACE)
80
c Distributed array used in the application of the accumulated
81
c orthogonal matrix Q.
82
c
83
c\EndDoc
84
c
85
c-----------------------------------------------------------------------
86
c
87
c\BeginLib
88
c
89
c\Local variables:
90
c xxxxxx Complex
91
c
92
c\References:
93
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
94
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
95
c pp 357-385.
96
c
97
c\Routines called:
98
c pivout Parallel ARPACK utility routine that prints integers.
99
c second ARPACK utility routine for timing.
100
c pcmout Parallel ARPACK utility routine that prints matrices
101
c pcvout Parallel ARPACK utility routine that prints vectors.
102
c clacpy LAPACK matrix copy routine.
103
c clanhs LAPACK routine that computes various norms of a matrix.
104
c clartg LAPACK Givens rotation construction routine.
105
c claset LAPACK matrix initialization routine.
106
c slabad LAPACK routine for defining the underflow and overflow
107
c limits.
108
c pslamch ScaLAPACK routine that determines machine constants.
109
c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
110
c cgemv Level 2 BLAS routine for matrix vector multiplication.
111
c caxpy Level 1 BLAS that computes a vector triad.
112
c ccopy Level 1 BLAS that copies one vector to another.
113
c cscal Level 1 BLAS that scales a vector.
114
c
115
c\Author
116
c Danny Sorensen Phuong Vu
117
c Richard Lehoucq CRPC / Rice University
118
c Dept. of Computational & Houston, Texas
119
c Applied Mathematics
120
c Rice University
121
c Houston, Texas
122
c
123
c\Parallel Modifications
124
c Kristi Maschhoff
125
c
126
c\Revision history:
127
c Starting Point: Serial Complex Code FILE: napps.F SID: 2.1
128
c
129
c\SCCS Information:
130
c FILE: napps.F SID: 1.3 DATE OF SID: 06/04/98
131
c
132
c\Remarks
133
c 1. In this version, each shift is applied to all the sublocks of
134
c the Hessenberg matrix H and not just to the submatrix that it
135
c comes from. Deflation as in LAPACK routine clahqr (QR algorithm
136
c for upper Hessenberg matrices ) is used.
137
c Upon output, the subdiagonals of H are enforced to be non-negative
138
c real numbers.
139
c
140
c\EndLib
141
c
142
c-----------------------------------------------------------------------
143
c
144
subroutine pcnapps
145
& ( comm, n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq,
146
& workl, workd )
147
c
148
c %--------------------%
149
c | MPI Communicator |
150
c %--------------------%
151
c
152
integer comm
153
c
154
c %----------------------------------------------------%
155
c | Include files for debugging and timing information |
156
c %----------------------------------------------------%
157
c
158
include 'debug.h'
159
include 'stat.h'
160
c
161
c %------------------%
162
c | Scalar Arguments |
163
c %------------------%
164
c
165
integer kev, ldh, ldq, ldv, n, np
166
c
167
c %-----------------%
168
c | Array Arguments |
169
c %-----------------%
170
c
171
Complex
172
& h(ldh,kev+np), resid(n), shift(np),
173
& v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
174
c
175
c %------------%
176
c | Parameters |
177
c %------------%
178
c
179
Complex
180
& one, zero
181
Real
182
& rzero
183
parameter (one = (1.0, 0.0), zero = (0.0, 0.0),
184
& rzero = 0.0)
185
c
186
c %------------------------%
187
c | Local Scalars & Arrays |
188
c %------------------------%
189
c
190
integer i, iend, istart, j, jj, kplusp, msglvl
191
logical first
192
Complex
193
& cdum, f, g, h11, h21, r, s, sigma, t
194
Real
195
& c, ovfl, smlnum, ulp, unfl, tst1
196
save first, ovfl, smlnum, ulp, unfl
197
c
198
c %----------------------%
199
c | External Subroutines |
200
c %----------------------%
201
c
202
external caxpy, ccopy, cgemv, cscal, clacpy, clartg,
203
& pcvout, claset, slabad, pcmout, second, pivout
204
c
205
c %--------------------%
206
c | External Functions |
207
c %--------------------%
208
c
209
Real
210
& clanhs, pslamch10, slapy2
211
external clanhs, pslamch10, slapy2
212
c
213
c %----------------------%
214
c | Intrinsics Functions |
215
c %----------------------%
216
c
217
intrinsic abs, aimag, conjg, cmplx, max, min, real
218
c
219
c %---------------------%
220
c | Statement Functions |
221
c %---------------------%
222
c
223
Real
224
& cabs1
225
cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
226
c
227
c %----------------%
228
c | Data statments |
229
c %----------------%
230
c
231
data first / .true. /
232
c
233
c %-----------------------%
234
c | Executable Statements |
235
c %-----------------------%
236
c
237
if (first) then
238
c
239
c %-----------------------------------------------%
240
c | Set machine-dependent constants for the |
241
c | stopping criterion. If norm(H) <= sqrt(OVFL), |
242
c | overflow should not occur. |
243
c | REFERENCE: LAPACK subroutine clahqr |
244
c %-----------------------------------------------%
245
c
246
unfl = slamch( 'safe minimum' )
247
ovfl = real(one / unfl)
248
call slabad( unfl, ovfl )
249
ulp = slamch( 'precision' )
250
smlnum = unfl*( n / ulp )
251
first = .false.
252
end if
253
c
254
c %-------------------------------%
255
c | Initialize timing statistics |
256
c | & message level for debugging |
257
c %-------------------------------%
258
c
259
call second (t0)
260
msglvl = mcapps
261
c
262
kplusp = kev + np
263
c
264
c %--------------------------------------------%
265
c | Initialize Q to the identity to accumulate |
266
c | the rotations and reflections |
267
c %--------------------------------------------%
268
c
269
call claset ('All', kplusp, kplusp, zero, one, q, ldq)
270
c
271
c %----------------------------------------------%
272
c | Quick return if there are no shifts to apply |
273
c %----------------------------------------------%
274
c
275
if (np .eq. 0) go to 9000
276
c
277
c %----------------------------------------------%
278
c | Chase the bulge with the application of each |
279
c | implicit shift. Each shift is applied to the |
280
c | whole matrix including each block. |
281
c %----------------------------------------------%
282
c
283
do 110 jj = 1, np
284
sigma = shift(jj)
285
c
286
if (msglvl .gt. 2 ) then
287
call pivout (comm, logfil, 1, [jj], ndigit,
288
& '_napps: shift number.')
289
call pcvout (comm, logfil, 1, [sigma], ndigit,
290
& '_napps: Value of the shift ')
291
end if
292
c
293
istart = 1
294
20 continue
295
c
296
do 30 i = istart, kplusp-1
297
c
298
c %----------------------------------------%
299
c | Check for splitting and deflation. Use |
300
c | a standard test as in the QR algorithm |
301
c | REFERENCE: LAPACK subroutine clahqr |
302
c %----------------------------------------%
303
c
304
tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
305
if( tst1.eq.rzero )
306
& tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl )
307
if ( abs(real(h(i+1,i)))
308
& .le. max(ulp*tst1, smlnum) ) then
309
if (msglvl .gt. 0) then
310
call pivout (comm, logfil, 1, [i], ndigit,
311
& '_napps: matrix splitting at row/column no.')
312
call pivout (comm, logfil, 1, [jj], ndigit,
313
& '_napps: matrix splitting with shift number.')
314
call pcvout (comm, logfil, 1, h(i+1,i), ndigit,
315
& '_napps: off diagonal element.')
316
end if
317
iend = i
318
h(i+1,i) = zero
319
go to 40
320
end if
321
30 continue
322
iend = kplusp
323
40 continue
324
c
325
if (msglvl .gt. 2) then
326
call pivout (comm, logfil, 1, [istart], ndigit,
327
& '_napps: Start of current block ')
328
call pivout (comm, logfil, 1, [iend], ndigit,
329
& '_napps: End of current block ')
330
end if
331
c
332
c %------------------------------------------------%
333
c | No reason to apply a shift to block of order 1 |
334
c | or if the current block starts after the point |
335
c | of compression since we'll discard this stuff |
336
c %------------------------------------------------%
337
c
338
if ( istart .eq. iend .or. istart .gt. kev) go to 100
339
c
340
h11 = h(istart,istart)
341
h21 = h(istart+1,istart)
342
f = h11 - sigma
343
g = h21
344
c
345
do 80 i = istart, iend-1
346
c
347
c %------------------------------------------------------%
348
c | Construct the plane rotation G to zero out the bulge |
349
c %------------------------------------------------------%
350
c
351
call clartg (f, g, c, s, r)
352
if (i .gt. istart) then
353
h(i,i-1) = r
354
h(i+1,i-1) = zero
355
end if
356
c
357
c %---------------------------------------------%
358
c | Apply rotation to the left of H; H <- G'*H |
359
c %---------------------------------------------%
360
c
361
do 50 j = i, kplusp
362
t = c*h(i,j) + s*h(i+1,j)
363
h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j)
364
h(i,j) = t
365
50 continue
366
c
367
c %---------------------------------------------%
368
c | Apply rotation to the right of H; H <- H*G |
369
c %---------------------------------------------%
370
c
371
do 60 j = 1, min(i+2,iend)
372
t = c*h(j,i) + conjg(s)*h(j,i+1)
373
h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
374
h(j,i) = t
375
60 continue
376
c
377
c %-----------------------------------------------------%
378
c | Accumulate the rotation in the matrix Q; Q <- Q*G' |
379
c %-----------------------------------------------------%
380
c
381
do 70 j = 1, min(i+jj, kplusp)
382
t = c*q(j,i) + conjg(s)*q(j,i+1)
383
q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
384
q(j,i) = t
385
70 continue
386
c
387
c %---------------------------%
388
c | Prepare for next rotation |
389
c %---------------------------%
390
c
391
if (i .lt. iend-1) then
392
f = h(i+1,i)
393
g = h(i+2,i)
394
end if
395
80 continue
396
c
397
c %-------------------------------%
398
c | Finished applying the shift. |
399
c %-------------------------------%
400
c
401
100 continue
402
c
403
c %---------------------------------------------------------%
404
c | Apply the same shift to the next block if there is any. |
405
c %---------------------------------------------------------%
406
c
407
istart = iend + 1
408
if (iend .lt. kplusp) go to 20
409
c
410
c %---------------------------------------------%
411
c | Loop back to the top to get the next shift. |
412
c %---------------------------------------------%
413
c
414
110 continue
415
c
416
c %---------------------------------------------------%
417
c | Perform a similarity transformation that makes |
418
c | sure that the compressed H will have non-negative |
419
c | real subdiagonal elements. |
420
c %---------------------------------------------------%
421
c
422
do 120 j=1,kev
423
if ( real( h(j+1,j) ) .lt. rzero .or.
424
& aimag( h(j+1,j) ) .ne. rzero ) then
425
t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j)))
426
call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh )
427
call cscal( min(j+2, kplusp), t, h(1,j+1), 1 )
428
call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 )
429
h(j+1,j) = cmplx( real( h(j+1,j) ), rzero )
430
end if
431
120 continue
432
c
433
do 130 i = 1, kev
434
c
435
c %--------------------------------------------%
436
c | Final check for splitting and deflation. |
437
c | Use a standard test as in the QR algorithm |
438
c | REFERENCE: LAPACK subroutine clahqr. |
439
c | Note: Since the subdiagonals of the |
440
c | compressed H are nonnegative real numbers, |
441
c | we take advantage of this. |
442
c %--------------------------------------------%
443
c
444
tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
445
if( tst1 .eq. rzero )
446
& tst1 = clanhs( '1', kev, h, ldh, workl )
447
if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) )
448
& h(i+1,i) = zero
449
130 continue
450
c
451
c %-------------------------------------------------%
452
c | Compute the (kev+1)-st column of (V*Q) and |
453
c | temporarily store the result in WORKD(N+1:2*N). |
454
c | This is needed in the residual update since we |
455
c | cannot GUARANTEE that the corresponding entry |
456
c | of H would be zero as in exact arithmetic. |
457
c %-------------------------------------------------%
458
c
459
if ( real( h(kev+1,kev) ) .gt. rzero )
460
& call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
461
& workd(n+1), 1)
462
c
463
c %----------------------------------------------------------%
464
c | Compute column 1 to kev of (V*Q) in backward order |
465
c | taking advantage of the upper Hessenberg structure of Q. |
466
c %----------------------------------------------------------%
467
c
468
do 140 i = 1, kev
469
call cgemv ('N', n, kplusp-i+1, one, v, ldv,
470
& q(1,kev-i+1), 1, zero, workd, 1)
471
call ccopy (n, workd, 1, v(1,kplusp-i+1), 1)
472
140 continue
473
c
474
c %-------------------------------------------------%
475
c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
476
c %-------------------------------------------------%
477
c
478
call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
479
c
480
c %--------------------------------------------------------------%
481
c | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
482
c %--------------------------------------------------------------%
483
c
484
if ( real( h(kev+1,kev) ) .gt. rzero )
485
& call ccopy (n, workd(n+1), 1, v(1,kev+1), 1)
486
c
487
c %-------------------------------------%
488
c | Update the residual vector: |
489
c | r <- sigmak*r + betak*v(:,kev+1) |
490
c | where |
491
c | sigmak = (e_{kev+p}'*Q)*e_{kev} |
492
c | betak = e_{kev+1}'*H*e_{kev} |
493
c %-------------------------------------%
494
c
495
call cscal (n, q(kplusp,kev), resid, 1)
496
if ( real( h(kev+1,kev) ) .gt. rzero )
497
& call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
498
c
499
if (msglvl .gt. 1) then
500
call pcvout (comm, logfil, 1, q(kplusp,kev), ndigit,
501
& '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
502
call pcvout (comm, logfil, 1, h(kev+1,kev), ndigit,
503
& '_napps: betak = e_{kev+1}^T*H*e_{kev}')
504
call pivout (comm, logfil, 1, [kev], ndigit,
505
& '_napps: Order of the final Hessenberg matrix ')
506
if (msglvl .gt. 2) then
507
call pcmout (comm, logfil, kev, kev, h, ldh, ndigit,
508
& '_napps: updated Hessenberg matrix H for next iteration')
509
end if
510
c
511
end if
512
c
513
9000 continue
514
call second (t1)
515
tcapps = tcapps + (t1 - t0)
516
c
517
return
518
c
519
c %----------------%
520
c | End of pcnapps |
521
c %----------------%
522
c
523
end
524
525