Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/cnapps.f
5194 views
1
c\BeginDoc
2
c
3
c\Name: cnapps
4
c
5
c\Description:
6
c Given the Arnoldi factorization
7
c
8
c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
9
c
10
c apply NP implicit shifts resulting in
11
c
12
c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
13
c
14
c where Q is an orthogonal matrix which is the product of rotations
15
c and reflections resulting from the NP bulge change sweeps.
16
c The updated Arnoldi factorization becomes:
17
c
18
c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
19
c
20
c\Usage:
21
c call cnapps
22
c ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ,
23
c WORKL, WORKD )
24
c
25
c\Arguments
26
c N Integer. (INPUT)
27
c Problem size, i.e. size of matrix A.
28
c
29
c KEV Integer. (INPUT/OUTPUT)
30
c KEV+NP is the size of the input matrix H.
31
c KEV is the size of the updated matrix HNEW.
32
c
33
c NP Integer. (INPUT)
34
c Number of implicit shifts to be applied.
35
c
36
c SHIFT Complex array of length NP. (INPUT)
37
c The shifts to be applied.
38
c
39
c V Complex N by (KEV+NP) array. (INPUT/OUTPUT)
40
c On INPUT, V contains the current KEV+NP Arnoldi vectors.
41
c On OUTPUT, V contains the updated KEV Arnoldi vectors
42
c in the first KEV columns of V.
43
c
44
c LDV Integer. (INPUT)
45
c Leading dimension of V exactly as declared in the calling
46
c program.
47
c
48
c H Complex (KEV+NP) by (KEV+NP) array. (INPUT/OUTPUT)
49
c On INPUT, H contains the current KEV+NP by KEV+NP upper
50
c Hessenberg matrix of the Arnoldi factorization.
51
c On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
52
c matrix in the KEV leading submatrix.
53
c
54
c LDH Integer. (INPUT)
55
c Leading dimension of H exactly as declared in the calling
56
c program.
57
c
58
c RESID Complex array of length N. (INPUT/OUTPUT)
59
c On INPUT, RESID contains the the residual vector r_{k+p}.
60
c On OUTPUT, RESID is the update residual vector rnew_{k}
61
c in the first KEV locations.
62
c
63
c Q Complex KEV+NP by KEV+NP work array. (WORKSPACE)
64
c Work array used to accumulate the rotations and reflections
65
c during the bulge chase sweep.
66
c
67
c LDQ Integer. (INPUT)
68
c Leading dimension of Q exactly as declared in the calling
69
c program.
70
c
71
c WORKL Complex work array of length (KEV+NP). (WORKSPACE)
72
c Private (replicated) array on each PE or array allocated on
73
c the front end.
74
c
75
c WORKD Complex work array of length 2*N. (WORKSPACE)
76
c Distributed array used in the application of the accumulated
77
c orthogonal matrix Q.
78
c
79
c\EndDoc
80
c
81
c-----------------------------------------------------------------------
82
c
83
c\BeginLib
84
c
85
c\Local variables:
86
c xxxxxx Complex
87
c
88
c\References:
89
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
90
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
91
c pp 357-385.
92
c
93
c\Routines called:
94
c ivout ARPACK utility routine that prints integers.
95
c second ARPACK utility routine for timing.
96
c cmout ARPACK utility routine that prints matrices
97
c cvout ARPACK utility routine that prints vectors.
98
c clacpy LAPACK matrix copy routine.
99
c clanhs LAPACK routine that computes various norms of a matrix.
100
c clartg LAPACK Givens rotation construction routine.
101
c claset LAPACK matrix initialization routine.
102
c slabad LAPACK routine for defining the underflow and overflow
103
c limits.
104
c slamch LAPACK routine that determines machine constants.
105
c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
106
c cgemv Level 2 BLAS routine for matrix vector multiplication.
107
c caxpy Level 1 BLAS that computes a vector triad.
108
c ccopy Level 1 BLAS that copies one vector to another.
109
c cscal Level 1 BLAS that scales a vector.
110
c
111
c\Author
112
c Danny Sorensen Phuong Vu
113
c Richard Lehoucq CRPC / Rice University
114
c Dept. of Computational & Houston, Texas
115
c Applied Mathematics
116
c Rice University
117
c Houston, Texas
118
c
119
c\SCCS Information: @(#)
120
c FILE: napps.F SID: 2.2 DATE OF SID: 4/20/96 RELEASE: 2
121
c
122
c\Remarks
123
c 1. In this version, each shift is applied to all the sublocks of
124
c the Hessenberg matrix H and not just to the submatrix that it
125
c comes from. Deflation as in LAPACK routine clahqr (QR algorithm
126
c for upper Hessenberg matrices ) is used.
127
c Upon output, the subdiagonals of H are enforced to be non-negative
128
c real numbers.
129
c
130
c\EndLib
131
c
132
c-----------------------------------------------------------------------
133
c
134
subroutine cnapps
135
& ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq,
136
& workl, workd )
137
c
138
c %----------------------------------------------------%
139
c | Include files for debugging and timing information |
140
c %----------------------------------------------------%
141
c
142
include 'debug.h'
143
include 'stat.h'
144
c
145
c %------------------%
146
c | Scalar Arguments |
147
c %------------------%
148
c
149
integer kev, ldh, ldq, ldv, n, np
150
c
151
c %-----------------%
152
c | Array Arguments |
153
c %-----------------%
154
c
155
Complex
156
& h(ldh,kev+np), resid(n), shift(np),
157
& v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
158
c
159
c %------------%
160
c | Parameters |
161
c %------------%
162
c
163
Complex
164
& one, zero
165
Real
166
& rzero
167
parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
168
& rzero = 0.0E+0)
169
c
170
c %------------------------%
171
c | Local Scalars & Arrays |
172
c %------------------------%
173
c
174
integer i, iend, istart, j, jj, kplusp, msglvl
175
logical first
176
Complex
177
& cdum, f, g, h11, h21, r, s, sigma, t
178
Real
179
& c, ovfl, smlnum, ulp, unfl, tst1
180
save first, ovfl, smlnum, ulp, unfl
181
c
182
c %----------------------%
183
c | External Subroutines |
184
c %----------------------%
185
c
186
external caxpy, ccopy, cgemv, cscal, clacpy, clartg,
187
& cvout, claset, slabad, cmout, second, ivout
188
c
189
c %--------------------%
190
c | External Functions |
191
c %--------------------%
192
c
193
Real
194
& clanhs, slamch, slapy2
195
external clanhs, slamch, slapy2
196
c
197
c %----------------------%
198
c | Intrinsics Functions |
199
c %----------------------%
200
c
201
intrinsic abs, aimag, conjg, cmplx, max, min, real
202
c
203
c %---------------------%
204
c | Statement Functions |
205
c %---------------------%
206
c
207
Real
208
& cabs1
209
cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
210
c
211
c %----------------%
212
c | Data statments |
213
c %----------------%
214
c
215
data first / .true. /
216
c
217
c %-----------------------%
218
c | Executable Statements |
219
c %-----------------------%
220
c
221
if (first) then
222
c
223
c %-----------------------------------------------%
224
c | Set machine-dependent constants for the |
225
c | stopping criterion. If norm(H) <= sqrt(OVFL), |
226
c | overflow should not occur. |
227
c | REFERENCE: LAPACK subroutine clahqr |
228
c %-----------------------------------------------%
229
c
230
unfl = slamch( 'safe minimum' )
231
ovfl = real(one / unfl)
232
call slabad( unfl, ovfl )
233
ulp = slamch( 'precision' )
234
smlnum = unfl*( n / ulp )
235
first = .false.
236
end if
237
c
238
c %-------------------------------%
239
c | Initialize timing statistics |
240
c | & message level for debugging |
241
c %-------------------------------%
242
c
243
call second (t0)
244
msglvl = mcapps
245
c
246
kplusp = kev + np
247
c
248
c %--------------------------------------------%
249
c | Initialize Q to the identity to accumulate |
250
c | the rotations and reflections |
251
c %--------------------------------------------%
252
c
253
call claset ('All', kplusp, kplusp, zero, one, q, ldq)
254
c
255
c %----------------------------------------------%
256
c | Quick return if there are no shifts to apply |
257
c %----------------------------------------------%
258
c
259
if (np .eq. 0) go to 9000
260
c
261
c %----------------------------------------------%
262
c | Chase the bulge with the application of each |
263
c | implicit shift. Each shift is applied to the |
264
c | whole matrix including each block. |
265
c %----------------------------------------------%
266
c
267
do 110 jj = 1, np
268
sigma = shift(jj)
269
c
270
if (msglvl .gt. 2 ) then
271
call ivout (logfil, 1, [jj], ndigit,
272
& '_napps: shift number.')
273
call cvout (logfil, 1, [sigma], ndigit,
274
& '_napps: Value of the shift ')
275
end if
276
c
277
istart = 1
278
20 continue
279
c
280
do 30 i = istart, kplusp-1
281
c
282
c %----------------------------------------%
283
c | Check for splitting and deflation. Use |
284
c | a standard test as in the QR algorithm |
285
c | REFERENCE: LAPACK subroutine clahqr |
286
c %----------------------------------------%
287
c
288
tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
289
if( tst1.eq.rzero )
290
& tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl )
291
if ( abs(real(h(i+1,i)))
292
& .le. max(ulp*tst1, smlnum) ) then
293
if (msglvl .gt. 0) then
294
call ivout (logfil, 1, [i], ndigit,
295
& '_napps: matrix splitting at row/column no.')
296
call ivout (logfil, 1, [jj], ndigit,
297
& '_napps: matrix splitting with shift number.')
298
call cvout (logfil, 1, h(i+1,i), ndigit,
299
& '_napps: off diagonal element.')
300
end if
301
iend = i
302
h(i+1,i) = zero
303
go to 40
304
end if
305
30 continue
306
iend = kplusp
307
40 continue
308
c
309
if (msglvl .gt. 2) then
310
call ivout (logfil, 1, [istart], ndigit,
311
& '_napps: Start of current block ')
312
call ivout (logfil, 1, [iend], ndigit,
313
& '_napps: End of current block ')
314
end if
315
c
316
c %------------------------------------------------%
317
c | No reason to apply a shift to block of order 1 |
318
c | or if the current block starts after the point |
319
c | of compression since we'll discard this stuff |
320
c %------------------------------------------------%
321
c
322
if ( istart .eq. iend .or. istart .gt. kev) go to 100
323
c
324
h11 = h(istart,istart)
325
h21 = h(istart+1,istart)
326
f = h11 - sigma
327
g = h21
328
c
329
do 80 i = istart, iend-1
330
c
331
c %------------------------------------------------------%
332
c | Construct the plane rotation G to zero out the bulge |
333
c %------------------------------------------------------%
334
c
335
call clartg (f, g, c, s, r)
336
if (i .gt. istart) then
337
h(i,i-1) = r
338
h(i+1,i-1) = zero
339
end if
340
c
341
c %---------------------------------------------%
342
c | Apply rotation to the left of H; H <- G'*H |
343
c %---------------------------------------------%
344
c
345
do 50 j = i, kplusp
346
t = c*h(i,j) + s*h(i+1,j)
347
h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j)
348
h(i,j) = t
349
50 continue
350
c
351
c %---------------------------------------------%
352
c | Apply rotation to the right of H; H <- H*G |
353
c %---------------------------------------------%
354
c
355
do 60 j = 1, min(i+2,iend)
356
t = c*h(j,i) + conjg(s)*h(j,i+1)
357
h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
358
h(j,i) = t
359
60 continue
360
c
361
c %-----------------------------------------------------%
362
c | Accumulate the rotation in the matrix Q; Q <- Q*G' |
363
c %-----------------------------------------------------%
364
c
365
do 70 j = 1, min(j+jj, kplusp)
366
t = c*q(j,i) + conjg(s)*q(j,i+1)
367
q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
368
q(j,i) = t
369
70 continue
370
c
371
c %---------------------------%
372
c | Prepare for next rotation |
373
c %---------------------------%
374
c
375
if (i .lt. iend-1) then
376
f = h(i+1,i)
377
g = h(i+2,i)
378
end if
379
80 continue
380
c
381
c %-------------------------------%
382
c | Finished applying the shift. |
383
c %-------------------------------%
384
c
385
100 continue
386
c
387
c %---------------------------------------------------------%
388
c | Apply the same shift to the next block if there is any. |
389
c %---------------------------------------------------------%
390
c
391
istart = iend + 1
392
if (iend .lt. kplusp) go to 20
393
c
394
c %---------------------------------------------%
395
c | Loop back to the top to get the next shift. |
396
c %---------------------------------------------%
397
c
398
110 continue
399
c
400
c %---------------------------------------------------%
401
c | Perform a similarity transformation that makes |
402
c | sure that the compressed H will have non-negative |
403
c | real subdiagonal elements. |
404
c %---------------------------------------------------%
405
c
406
do 120 j=1,kev
407
if ( real( h(j+1,j) ) .lt. rzero .or.
408
& aimag( h(j+1,j) ) .ne. rzero ) then
409
t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j)))
410
call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh )
411
call cscal( min(j+2, kplusp), t, h(1,j+1), 1 )
412
call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 )
413
h(j+1,j) = cmplx( real( h(j+1,j) ), rzero )
414
end if
415
120 continue
416
c
417
do 130 i = 1, kev
418
c
419
c %--------------------------------------------%
420
c | Final check for splitting and deflation. |
421
c | Use a standard test as in the QR algorithm |
422
c | REFERENCE: LAPACK subroutine clahqr. |
423
c | Note: Since the subdiagonals of the |
424
c | compressed H are nonnegative real numbers, |
425
c | we take advantage of this. |
426
c %--------------------------------------------%
427
c
428
tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
429
if( tst1 .eq. rzero )
430
& tst1 = clanhs( '1', kev, h, ldh, workl )
431
if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) )
432
& h(i+1,i) = zero
433
130 continue
434
c
435
c %-------------------------------------------------%
436
c | Compute the (kev+1)-st column of (V*Q) and |
437
c | temporarily store the result in WORKD(N+1:2*N). |
438
c | This is needed in the residual update since we |
439
c | cannot GUARANTEE that the corresponding entry |
440
c | of H would be zero as in exact arithmetic. |
441
c %-------------------------------------------------%
442
c
443
if ( real( h(kev+1,kev) ) .gt. rzero )
444
& call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
445
& workd(n+1), 1)
446
c
447
c %----------------------------------------------------------%
448
c | Compute column 1 to kev of (V*Q) in backward order |
449
c | taking advantage of the upper Hessenberg structure of Q. |
450
c %----------------------------------------------------------%
451
c
452
do 140 i = 1, kev
453
call cgemv ('N', n, kplusp-i+1, one, v, ldv,
454
& q(1,kev-i+1), 1, zero, workd, 1)
455
call ccopy (n, workd, 1, v(1,kplusp-i+1), 1)
456
140 continue
457
c
458
c %-------------------------------------------------%
459
c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
460
c %-------------------------------------------------%
461
c
462
call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
463
c
464
c %--------------------------------------------------------------%
465
c | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
466
c %--------------------------------------------------------------%
467
c
468
if ( real( h(kev+1,kev) ) .gt. rzero )
469
& call ccopy (n, workd(n+1), 1, v(1,kev+1), 1)
470
c
471
c %-------------------------------------%
472
c | Update the residual vector: |
473
c | r <- sigmak*r + betak*v(:,kev+1) |
474
c | where |
475
c | sigmak = (e_{kev+p}'*Q)*e_{kev} |
476
c | betak = e_{kev+1}'*H*e_{kev} |
477
c %-------------------------------------%
478
c
479
call cscal (n, q(kplusp,kev), resid, 1)
480
if ( real( h(kev+1,kev) ) .gt. rzero )
481
& call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
482
c
483
if (msglvl .gt. 1) then
484
call cvout (logfil, 1, q(kplusp,kev), ndigit,
485
& '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
486
call cvout (logfil, 1, h(kev+1,kev), ndigit,
487
& '_napps: betak = e_{kev+1}^T*H*e_{kev}')
488
call ivout (logfil, 1, [kev], ndigit,
489
& '_napps: Order of the final Hessenberg matrix ')
490
if (msglvl .gt. 2) then
491
call cmout (logfil, kev, kev, h, ldh, ndigit,
492
& '_napps: updated Hessenberg matrix H for next iteration')
493
end if
494
c
495
end if
496
c
497
9000 continue
498
call second (t1)
499
tcapps = tcapps + (t1 - t0)
500
c
501
return
502
c
503
c %---------------%
504
c | End of cnapps |
505
c %---------------%
506
c
507
end
508
509