Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsapps.f
5194 views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsapps
5
c
6
c\Description:
7
c Given the Arnoldi factorization
8
c
9
c A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
10
c
11
c apply NP shifts implicitly resulting in
12
c
13
c A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
14
c
15
c where Q is an orthogonal matrix of order KEV+NP. Q is the product of
16
c rotations resulting from the NP bulge chasing sweeps. The updated Arnoldi
17
c factorization becomes:
18
c
19
c A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
20
c
21
c\Usage:
22
c call pdsapps
23
c ( COMM, N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, WORKD )
24
c
25
c\Arguments
26
c COMM MPI Communicator for the processor grid. (INPUT)
27
c
28
c N Integer. (INPUT)
29
c Problem size, i.e. dimension of matrix A.
30
c
31
c KEV Integer. (INPUT)
32
c INPUT: KEV+NP is the size of the input matrix H.
33
c OUTPUT: KEV is the size of the updated matrix HNEW.
34
c
35
c NP Integer. (INPUT)
36
c Number of implicit shifts to be applied.
37
c
38
c SHIFT Double precision array of length NP. (INPUT)
39
c The shifts to be applied.
40
c
41
c V Double precision N by (KEV+NP) array. (INPUT/OUTPUT)
42
c INPUT: V contains the current KEV+NP Arnoldi vectors.
43
c OUTPUT: VNEW = V(1:n,1:KEV); the updated Arnoldi vectors
44
c are in the first KEV columns of V.
45
c
46
c LDV Integer. (INPUT)
47
c Leading dimension of V exactly as declared in the calling
48
c program.
49
c
50
c H Double precision (KEV+NP) by 2 array. (INPUT/OUTPUT)
51
c INPUT: H contains the symmetric tridiagonal matrix of the
52
c Arnoldi factorization with the subdiagonal in the 1st column
53
c starting at H(2,1) and the main diagonal in the 2nd column.
54
c OUTPUT: H contains the updated tridiagonal matrix in the
55
c KEV leading submatrix.
56
c
57
c LDH Integer. (INPUT)
58
c Leading dimension of H exactly as declared in the calling
59
c program.
60
c
61
c RESID Double precision array of length (N). (INPUT/OUTPUT)
62
c INPUT: RESID contains the the residual vector r_{k+p}.
63
c OUTPUT: RESID is the updated residual vector rnew_{k}.
64
c
65
c Q Double precision KEV+NP by KEV+NP work array. (WORKSPACE)
66
c Work array used to accumulate the rotations during the bulge
67
c chase sweep.
68
c
69
c LDQ Integer. (INPUT)
70
c Leading dimension of Q exactly as declared in the calling
71
c program.
72
c
73
c WORKD Double precision work array of length 2*N. (WORKSPACE)
74
c Distributed array used in the application of the accumulated
75
c orthogonal matrix Q.
76
c
77
c\EndDoc
78
c
79
c-----------------------------------------------------------------------
80
c
81
c\BeginLib
82
c
83
c\Local variables:
84
c xxxxxx real
85
c
86
c\References:
87
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
88
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
89
c pp 357-385.
90
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
91
c Restarted Arnoldi Iteration", Rice University Technical Report
92
c TR95-13, Department of Computational and Applied Mathematics.
93
c
94
c\Routines called:
95
c pivout Parallel ARPACK utility routine that prints integers.
96
c second ARPACK utility routine for timing.
97
c pdvout Parallel ARPACK utility routine that prints vectors.
98
c pdlamch ScaLAPACK routine that determines machine constants.
99
c dlartg LAPACK Givens rotation construction routine.
100
c dlacpy LAPACK matrix copy routine.
101
c dlaset LAPACK matrix initialization routine.
102
c dgemv Level 2 BLAS routine for matrix vector multiplication.
103
c daxpy Level 1 BLAS that computes a vector triad.
104
c dcopy Level 1 BLAS that copies one vector to another.
105
c dscal Level 1 BLAS that scales a vector.
106
c
107
c\Author
108
c Danny Sorensen Phuong Vu
109
c Richard Lehoucq CRPC / Rice University
110
c Dept. of Computational & Houston, Texas
111
c Applied Mathematics
112
c Rice University
113
c Houston, Texas
114
c
115
c\Parallel Modifications
116
c Kristi Maschhoff
117
c
118
c\Revision history:
119
c Starting Point: Serial Code FILE: sapps.F SID: 2.4
120
c
121
c\SCCS Information:
122
c FILE: sapps.F SID: 1.3 DATE OF SID: 3/19/97
123
c
124
c\Remarks
125
c 1. In this version, each shift is applied to all the subblocks of
126
c the tridiagonal matrix H and not just to the submatrix that it
127
c comes from. This routine assumes that the subdiagonal elements
128
c of H that are stored in h(1:kev+np,1) are nonegative upon input
129
c and enforce this condition upon output. This version incorporates
130
c deflation. See code for documentation.
131
c
132
c\EndLib
133
c
134
c-----------------------------------------------------------------------
135
c
136
subroutine pdsapps
137
& ( comm, n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, workd)
138
c
139
c %--------------------%
140
c | MPI Communicator |
141
c %--------------------%
142
c
143
integer comm
144
c
145
c %----------------------------------------------------%
146
c | Include files for debugging and timing information |
147
c %----------------------------------------------------%
148
c
149
include 'debug.h'
150
include 'stat.h'
151
c
152
c %------------------%
153
c | Scalar Arguments |
154
c %------------------%
155
c
156
integer kev, ldh, ldq, ldv, n, np
157
c
158
c %-----------------%
159
c | Array Arguments |
160
c %-----------------%
161
c
162
Double precision
163
& h(ldh,2), q(ldq,kev+np), resid(n), shift(np),
164
& v(ldv,kev+np), workd(2*n)
165
c
166
c %------------%
167
c | Parameters |
168
c %------------%
169
c
170
Double precision
171
& one, zero
172
parameter (one = 1.0, zero = 0.0)
173
c
174
c %---------------%
175
c | Local Scalars |
176
c %---------------%
177
c
178
integer i, iend, istart, itop, j, jj, kplusp, msglvl
179
logical first
180
Double precision
181
& a1, a2, a3, a4, big, c, epsmch, f, g, r, s
182
save epsmch, first
183
c
184
c
185
c %----------------------%
186
c | External Subroutines |
187
c %----------------------%
188
c
189
external daxpy, dcopy, dscal, dlacpy, dlartg, dlaset, pdvout,
190
& pivout, second, dgemv
191
c
192
c %--------------------%
193
c | External Functions |
194
c %--------------------%
195
c
196
Double precision
197
& pdlamch10
198
external pdlamch10
199
c
200
c %----------------------%
201
c | Intrinsics Functions |
202
c %----------------------%
203
c
204
intrinsic abs
205
c
206
c %----------------%
207
c | Data statments |
208
c %----------------%
209
c
210
data first / .true. /
211
c
212
c %-----------------------%
213
c | Executable Statements |
214
c %-----------------------%
215
c
216
if (first) then
217
epsmch = pdlamch10(comm, 'Epsilon-Machine')
218
first = .false.
219
end if
220
itop = 1
221
c
222
c %-------------------------------%
223
c | Initialize timing statistics |
224
c | & message level for debugging |
225
c %-------------------------------%
226
c
227
call second (t0)
228
msglvl = msapps
229
c
230
kplusp = kev + np
231
c
232
c %----------------------------------------------%
233
c | Initialize Q to the identity matrix of order |
234
c | kplusp used to accumulate the rotations. |
235
c %----------------------------------------------%
236
c
237
call dlaset ('All', kplusp, kplusp, zero, one, q, ldq)
238
c
239
c %----------------------------------------------%
240
c | Quick return if there are no shifts to apply |
241
c %----------------------------------------------%
242
c
243
if (np .eq. 0) go to 9000
244
c
245
c %----------------------------------------------------------%
246
c | Apply the np shifts implicitly. Apply each shift to the |
247
c | whole matrix and not just to the submatrix from which it |
248
c | comes. |
249
c %----------------------------------------------------------%
250
c
251
do 90 jj = 1, np
252
c
253
istart = itop
254
c
255
c %----------------------------------------------------------%
256
c | Check for splitting and deflation. Currently we consider |
257
c | an off-diagonal element h(i+1,1) negligible if |
258
c | h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| ) |
259
c | for i=1:KEV+NP-1. |
260
c | If above condition tests true then we set h(i+1,1) = 0. |
261
c | Note that h(1:KEV+NP,1) are assumed to be non negative. |
262
c %----------------------------------------------------------%
263
c
264
20 continue
265
c
266
c %------------------------------------------------%
267
c | The following loop exits early if we encounter |
268
c | a negligible off diagonal element. |
269
c %------------------------------------------------%
270
c
271
do 30 i = istart, kplusp-1
272
big = abs(h(i,2)) + abs(h(i+1,2))
273
if (h(i+1,1) .le. epsmch*big) then
274
if (msglvl .gt. 0) then
275
call pivout (comm, logfil, 1, [i], ndigit,
276
& '_sapps: deflation at row/column no.')
277
call pivout (comm, logfil, 1, [jj], ndigit,
278
& '_sapps: occured before shift number.')
279
call pdvout (comm, logfil, 1, h(i+1,1), ndigit,
280
& '_sapps: the corresponding off diagonal element')
281
end if
282
h(i+1,1) = zero
283
iend = i
284
go to 40
285
end if
286
30 continue
287
iend = kplusp
288
40 continue
289
c
290
if (istart .lt. iend) then
291
c
292
c %--------------------------------------------------------%
293
c | Construct the plane rotation G'(istart,istart+1,theta) |
294
c | that attempts to drive h(istart+1,1) to zero. |
295
c %--------------------------------------------------------%
296
c
297
f = h(istart,2) - shift(jj)
298
g = h(istart+1,1)
299
call dlartg (f, g, c, s, r)
300
c
301
c %-------------------------------------------------------%
302
c | Apply rotation to the left and right of H; |
303
c | H <- G' * H * G, where G = G(istart,istart+1,theta). |
304
c | This will create a "bulge". |
305
c %-------------------------------------------------------%
306
c
307
a1 = c*h(istart,2) + s*h(istart+1,1)
308
a2 = c*h(istart+1,1) + s*h(istart+1,2)
309
a4 = c*h(istart+1,2) - s*h(istart+1,1)
310
a3 = c*h(istart+1,1) - s*h(istart,2)
311
h(istart,2) = c*a1 + s*a2
312
h(istart+1,2) = c*a4 - s*a3
313
h(istart+1,1) = c*a3 + s*a4
314
c
315
c %----------------------------------------------------%
316
c | Accumulate the rotation in the matrix Q; Q <- Q*G |
317
c %----------------------------------------------------%
318
c
319
do 60 j = 1, min(istart+jj,kplusp)
320
a1 = c*q(j,istart) + s*q(j,istart+1)
321
q(j,istart+1) = - s*q(j,istart) + c*q(j,istart+1)
322
q(j,istart) = a1
323
60 continue
324
c
325
c
326
c %----------------------------------------------%
327
c | The following loop chases the bulge created. |
328
c | Note that the previous rotation may also be |
329
c | done within the following loop. But it is |
330
c | kept separate to make the distinction among |
331
c | the bulge chasing sweeps and the first plane |
332
c | rotation designed to drive h(istart+1,1) to |
333
c | zero. |
334
c %----------------------------------------------%
335
c
336
do 70 i = istart+1, iend-1
337
c
338
c %----------------------------------------------%
339
c | Construct the plane rotation G'(i,i+1,theta) |
340
c | that zeros the i-th bulge that was created |
341
c | by G(i-1,i,theta). g represents the bulge. |
342
c %----------------------------------------------%
343
c
344
f = h(i,1)
345
g = s*h(i+1,1)
346
c
347
c %----------------------------------%
348
c | Final update with G(i-1,i,theta) |
349
c %----------------------------------%
350
c
351
h(i+1,1) = c*h(i+1,1)
352
call dlartg (f, g, c, s, r)
353
c
354
c %-------------------------------------------%
355
c | The following ensures that h(1:iend-1,1), |
356
c | the first iend-2 off diagonal of elements |
357
c | H, remain non negative. |
358
c %-------------------------------------------%
359
c
360
if (r .lt. zero) then
361
r = -r
362
c = -c
363
s = -s
364
end if
365
c
366
c %--------------------------------------------%
367
c | Apply rotation to the left and right of H; |
368
c | H <- G * H * G', where G = G(i,i+1,theta) |
369
c %--------------------------------------------%
370
c
371
h(i,1) = r
372
c
373
a1 = c*h(i,2) + s*h(i+1,1)
374
a2 = c*h(i+1,1) + s*h(i+1,2)
375
a3 = c*h(i+1,1) - s*h(i,2)
376
a4 = c*h(i+1,2) - s*h(i+1,1)
377
c
378
h(i,2) = c*a1 + s*a2
379
h(i+1,2) = c*a4 - s*a3
380
h(i+1,1) = c*a3 + s*a4
381
c
382
c %----------------------------------------------------%
383
c | Accumulate the rotation in the matrix Q; Q <- Q*G |
384
c %----------------------------------------------------%
385
c
386
do 50 j = 1, min( i+jj, kplusp )
387
a1 = c*q(j,i) + s*q(j,i+1)
388
q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
389
q(j,i) = a1
390
50 continue
391
c
392
70 continue
393
c
394
end if
395
c
396
c %--------------------------%
397
c | Update the block pointer |
398
c %--------------------------%
399
c
400
istart = iend + 1
401
c
402
c %------------------------------------------%
403
c | Make sure that h(iend,1) is non-negative |
404
c | If not then set h(iend,1) <-- -h(iend,1) |
405
c | and negate the last column of Q. |
406
c | We have effectively carried out a |
407
c | similarity on transformation H |
408
c %------------------------------------------%
409
c
410
if (h(iend,1) .lt. zero) then
411
h(iend,1) = -h(iend,1)
412
call dscal(kplusp, -one, q(1,iend), 1)
413
end if
414
c
415
c %--------------------------------------------------------%
416
c | Apply the same shift to the next block if there is any |
417
c %--------------------------------------------------------%
418
c
419
if (iend .lt. kplusp) go to 20
420
c
421
c %-----------------------------------------------------%
422
c | Check if we can increase the the start of the block |
423
c %-----------------------------------------------------%
424
c
425
do 80 i = itop, kplusp-1
426
if (h(i+1,1) .gt. zero) go to 90
427
itop = itop + 1
428
80 continue
429
c
430
c %-----------------------------------%
431
c | Finished applying the jj-th shift |
432
c %-----------------------------------%
433
c
434
90 continue
435
c
436
c %------------------------------------------%
437
c | All shifts have been applied. Check for |
438
c | more possible deflation that might occur |
439
c | after the last shift is applied. |
440
c %------------------------------------------%
441
c
442
do 100 i = itop, kplusp-1
443
big = abs(h(i,2)) + abs(h(i+1,2))
444
if (h(i+1,1) .le. epsmch*big) then
445
if (msglvl .gt. 0) then
446
call pivout (comm, logfil, 1, [i], ndigit,
447
& '_sapps: deflation at row/column no.')
448
call pdvout (comm, logfil, 1, h(i+1,1), ndigit,
449
& '_sapps: the corresponding off diagonal element')
450
end if
451
h(i+1,1) = zero
452
end if
453
100 continue
454
c
455
c %-------------------------------------------------%
456
c | Compute the (kev+1)-st column of (V*Q) and |
457
c | temporarily store the result in WORKD(N+1:2*N). |
458
c | This is not necessary if h(kev+1,1) = 0. |
459
c %-------------------------------------------------%
460
c
461
if ( h(kev+1,1) .gt. zero )
462
& call dgemv ('N', n, kplusp, one, v, ldv,
463
& q(1,kev+1), 1, zero, workd(n+1), 1)
464
c
465
c %-------------------------------------------------------%
466
c | Compute column 1 to kev of (V*Q) in backward order |
467
c | taking advantage that Q is an upper triangular matrix |
468
c | with lower bandwidth np. |
469
c | Place results in v(:,kplusp-kev:kplusp) temporarily. |
470
c %-------------------------------------------------------%
471
c
472
do 130 i = 1, kev
473
call dgemv ('N', n, kplusp-i+1, one, v, ldv,
474
& q(1,kev-i+1), 1, zero, workd, 1)
475
call dcopy (n, workd, 1, v(1,kplusp-i+1), 1)
476
130 continue
477
c
478
c %-------------------------------------------------%
479
c | Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
480
c %-------------------------------------------------%
481
c
482
call dlacpy ('All', n, kev, v(1,np+1), ldv, v, ldv)
483
c
484
c %--------------------------------------------%
485
c | Copy the (kev+1)-st column of (V*Q) in the |
486
c | appropriate place if h(kev+1,1) .ne. zero. |
487
c %--------------------------------------------%
488
c
489
if ( h(kev+1,1) .gt. zero )
490
& call dcopy (n, workd(n+1), 1, v(1,kev+1), 1)
491
c
492
c %-------------------------------------%
493
c | Update the residual vector: |
494
c | r <- sigmak*r + betak*v(:,kev+1) |
495
c | where |
496
c | sigmak = (e_{kev+p}'*Q)*e_{kev} |
497
c | betak = e_{kev+1}'*H*e_{kev} |
498
c %-------------------------------------%
499
c
500
call dscal (n, q(kplusp,kev), resid, 1)
501
if (h(kev+1,1) .gt. zero)
502
& call daxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1)
503
c
504
if (msglvl .gt. 1) then
505
call pdvout (comm, logfil, 1, q(kplusp,kev), ndigit,
506
& '_sapps: sigmak of the updated residual vector')
507
call pdvout (comm, logfil, 1, h(kev+1,1), ndigit,
508
& '_sapps: betak of the updated residual vector')
509
call pdvout (comm, logfil, kev, h(1,2), ndigit,
510
& '_sapps: updated main diagonal of H for next iteration')
511
if (kev .gt. 1) then
512
call pdvout (comm, logfil, kev-1, h(2,1), ndigit,
513
& '_sapps: updated sub diagonal of H for next iteration')
514
end if
515
end if
516
c
517
call second (t1)
518
tsapps = tsapps + (t1 - t0)
519
c
520
9000 continue
521
return
522
c
523
c %----------------%
524
c | End of pdsapps |
525
c %----------------%
526
c
527
end
528
529