Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsaup2.f
5218 views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsaup2
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c Intermediate level interface called by pdsaupd.
10
c
11
c\Usage:
12
c call pdsaup2
13
c ( COMM, IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD,
14
c ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL,
15
c IPNTR, WORKD, INFO )
16
c
17
c\Arguments
18
c
19
c COMM, IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in pdsaupd.
20
c MODE, ISHIFT, MXITER: see the definition of IPARAM in pdsaupd.
21
c
22
c NP Integer. (INPUT/OUTPUT)
23
c Contains the number of implicit shifts to apply during
24
c each Arnoldi/Lanczos iteration.
25
c If ISHIFT=1, NP is adjusted dynamically at each iteration
26
c to accelerate convergence and prevent stagnation.
27
c This is also roughly equal to the number of matrix-vector
28
c products (involving the operator OP) per Arnoldi iteration.
29
c The logic for adjusting is contained within the current
30
c subroutine.
31
c If ISHIFT=0, NP is the number of shifts the user needs
32
c to provide via reverse comunication. 0 < NP < NCV-NEV.
33
c NP may be less than NCV-NEV since a leading block of the current
34
c upper Tridiagonal matrix has split off and contains "unwanted"
35
c Ritz values.
36
c Upon termination of the IRA iteration, NP contains the number
37
c of "converged" wanted Ritz values.
38
c
39
c IUPD Integer. (INPUT)
40
c IUPD .EQ. 0: use explicit restart instead implicit update.
41
c IUPD .NE. 0: use implicit update.
42
c
43
c V Double precision N by (NEV+NP) array. (INPUT/OUTPUT)
44
c The Lanczos basis vectors.
45
c
46
c LDV Integer. (INPUT)
47
c Leading dimension of V exactly as declared in the calling
48
c program.
49
c
50
c H Double precision (NEV+NP) by 2 array. (OUTPUT)
51
c H is used to store the generated symmetric tridiagonal matrix
52
c The subdiagonal is stored in the first column of H starting
53
c at H(2,1). The main diagonal is stored in the second column
54
c of H starting at H(1,2). If pdsaup2 converges store the
55
c B-norm of the final residual vector in H(1,1).
56
c
57
c LDH Integer. (INPUT)
58
c Leading dimension of H exactly as declared in the calling
59
c program.
60
c
61
c RITZ Double precision array of length NEV+NP. (OUTPUT)
62
c RITZ(1:NEV) contains the computed Ritz values of OP.
63
c
64
c BOUNDS Double precision array of length NEV+NP. (OUTPUT)
65
c BOUNDS(1:NEV) contain the error bounds corresponding to RITZ.
66
c
67
c Q Double precision (NEV+NP) by (NEV+NP) array. (WORKSPACE)
68
c Private (replicated) work array used to accumulate the
69
c rotation in the shift application step.
70
c
71
c LDQ Integer. (INPUT)
72
c Leading dimension of Q exactly as declared in the calling
73
c program.
74
c
75
c WORKL Double precision array of length at least 3*(NEV+NP). (INPUT/WORKSPACE)
76
c Private (replicated) array on each PE or array allocated on
77
c the front end. It is used in the computation of the
78
c tridiagonal eigenvalue problem, the calculation and
79
c application of the shifts and convergence checking.
80
c If ISHIFT .EQ. O and IDO .EQ. 3, the first NP locations
81
c of WORKL are used in reverse communication to hold the user
82
c supplied shifts.
83
c
84
c IPNTR Integer array of length 3. (OUTPUT)
85
c Pointer to mark the starting locations in the WORKD for
86
c vectors used by the Lanczos iteration.
87
c -------------------------------------------------------------
88
c IPNTR(1): pointer to the current operand vector X.
89
c IPNTR(2): pointer to the current result vector Y.
90
c IPNTR(3): pointer to the vector B * X when used in one of
91
c the spectral transformation modes. X is the current
92
c operand.
93
c -------------------------------------------------------------
94
c
95
c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION)
96
c Distributed array to be used in the basic Lanczos iteration
97
c for reverse communication. The user should not use WORKD
98
c as temporary workspace during the iteration !!!!!!!!!!
99
c See Data Distribution Note in pdsaupd.
100
c
101
c INFO Integer. (INPUT/OUTPUT)
102
c If INFO .EQ. 0, a randomly initial residual vector is used.
103
c If INFO .NE. 0, RESID contains the initial residual vector,
104
c possibly from a previous run.
105
c Error flag on output.
106
c = 0: Normal return.
107
c = 1: All possible eigenvalues of OP has been found.
108
c NP returns the size of the invariant subspace
109
c spanning the operator OP.
110
c = 2: No shifts could be applied.
111
c = -8: Error return from trid. eigenvalue calculation;
112
c This should never happen.
113
c = -9: Starting vector is zero.
114
c = -9999: Could not build an Lanczos factorization.
115
c Size that was built in returned in NP.
116
c
117
c\EndDoc
118
c
119
c-----------------------------------------------------------------------
120
c
121
c\BeginLib
122
c
123
c\References:
124
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
125
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
126
c pp 357-385.
127
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
128
c Restarted Arnoldi Iteration", Rice University Technical Report
129
c TR95-13, Department of Computational and Applied Mathematics.
130
c 3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall,
131
c 1980.
132
c 4. B.N. Parlett, B. Nour-Omid, "Towards a Black Box Lanczos Program",
133
c Computer Physics Communications, 53 (1989), pp 169-179.
134
c 5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to
135
c Implement the Spectral Transformation", Math. Comp., 48 (1987),
136
c pp 663-673.
137
c 6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos
138
c Algorithm for Solving Sparse Symmetric Generalized Eigenproblems",
139
c SIAM J. Matr. Anal. Apps., January (1993).
140
c 7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines
141
c for Updating the QR decomposition", ACM TOMS, December 1990,
142
c Volume 16 Number 4, pp 369-377.
143
c
144
c\Routines called:
145
c pdgetv0 Parallel ARPACK initial vector generation routine.
146
c pdsaitr Parallel ARPACK Lanczos factorization routine.
147
c pdsapps Parallel ARPACK application of implicit shifts routine.
148
c dsconv ARPACK convergence of Ritz values routine.
149
c pdseigt Parallel ARPACK compute Ritz values and error bounds routine.
150
c pdsgets Parallel ARPACK reorder Ritz values and error bounds routine.
151
c dsortr ARPACK sorting routine.
152
c sstrqb ARPACK routine that computes all eigenvalues and the
153
c last component of the eigenvectors of a symmetric
154
c tridiagonal matrix using the implicit QL or QR method.
155
c pivout Parallel ARPACK utility routine that prints integers.
156
c second ARPACK utility routine for timing.
157
c pdvout Parallel ARPACK utility routine that prints vectors.
158
c pdlamch ScaLAPACK routine that determines machine constants.
159
c dcopy Level 1 BLAS that copies one vector to another.
160
c ddot Level 1 BLAS that computes the scalar product of two vectors.
161
c pdnorm2 Parallel version of Level 1 BLAS that computes the norm of a vector.
162
c dscal Level 1 BLAS that scales a vector.
163
c dswap Level 1 BLAS that swaps two vectors.
164
c
165
c\Author
166
c Danny Sorensen Phuong Vu
167
c Richard Lehoucq CRPC / Rice University
168
c Dept. of Computational & Houston, Texas
169
c Applied Mathematics
170
c Rice University
171
c Houston, Texas
172
c
173
c\Parallel Modifications
174
c Kristi Maschhoff
175
c
176
c\Revision history:
177
c Starting Point: Serial Code FILE: saup2.F SID: 2.4
178
c
179
c\SCCS Information:
180
c FILE: saup2.F SID: 1.5 DATE OF SID: 05/20/98
181
c
182
c\EndLib
183
c
184
c-----------------------------------------------------------------------
185
c
186
subroutine pdsaup2
187
& ( comm, ido, bmat, n, which, nev, np, tol, resid, mode, iupd,
188
& ishift, mxiter, v, ldv, h, ldh, ritz, bounds,
189
& q, ldq, workl, ipntr, workd, info )
190
c
191
include 'mpif.h'
192
c
193
c %---------------%
194
c | MPI Variables |
195
c %---------------%
196
c
197
integer comm
198
c
199
c
200
c %----------------------------------------------------%
201
c | Include files for debugging and timing information |
202
c %----------------------------------------------------%
203
c
204
include 'debug.h'
205
include 'stat.h'
206
c
207
c %------------------%
208
c | Scalar Arguments |
209
c %------------------%
210
c
211
character bmat*1, which*2
212
integer ido, info, ishift, iupd, ldh, ldq, ldv, mxiter,
213
& n, mode, nev, np
214
Double precision
215
& tol
216
c
217
c %-----------------%
218
c | Array Arguments |
219
c %-----------------%
220
c
221
integer ipntr(3)
222
Double precision
223
& bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n),
224
& ritz(nev+np), v(ldv,nev+np), workd(3*n),
225
& workl(3*(nev+np))
226
c
227
c %------------%
228
c | Parameters |
229
c %------------%
230
c
231
Double precision
232
& one, zero
233
parameter (one = 1.0, zero = 0.0)
234
c
235
c %---------------%
236
c | Local Scalars |
237
c %---------------%
238
c
239
character wprime*2
240
logical cnorm, getv0, initv, update, ushift
241
integer ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0,
242
& np0, nptemp, nevd2, nevm2, kp(3)
243
Double precision
244
& rnorm, temp, eps23
245
save cnorm, getv0, initv, update, ushift,
246
& iter, kplusp, msglvl, nconv, nev0, np0,
247
& rnorm, eps23
248
c
249
Double precision
250
& rnorm_buf
251
c
252
c
253
c %----------------------%
254
c | External Subroutines |
255
c %----------------------%
256
c
257
external dcopy, pdgetv0, pdsaitr, dscal, dsconv,
258
& pdseigt, pdsgets, pdsapps,
259
& dsortr, pdvout, pivout, second
260
c
261
c %--------------------%
262
c | External Functions |
263
c %--------------------%
264
c
265
Double precision
266
& ddot, pdnorm2, pdlamch10
267
external ddot, pdnorm2, pdlamch10
268
c
269
c %---------------------%
270
c | Intrinsic Functions |
271
c %---------------------%
272
c
273
intrinsic min, abs, sqrt
274
c
275
c %-----------------------%
276
c | Executable Statements |
277
c %-----------------------%
278
c
279
if (ido .eq. 0) then
280
c
281
c %-------------------------------%
282
c | Initialize timing statistics |
283
c | & message level for debugging |
284
c %-------------------------------%
285
c
286
call second (t0)
287
msglvl = msaup2
288
c
289
c %---------------------------------%
290
c | Set machine dependent constant. |
291
c %---------------------------------%
292
c
293
eps23 = pdlamch10(comm, 'Epsilon-Machine')
294
eps23 = eps23**(2.0/3.0)
295
c
296
c %-------------------------------------%
297
c | nev0 and np0 are integer variables |
298
c | hold the initial values of NEV & NP |
299
c %-------------------------------------%
300
c
301
nev0 = nev
302
np0 = np
303
c
304
c %-------------------------------------%
305
c | kplusp is the bound on the largest |
306
c | Lanczos factorization built. |
307
c | nconv is the current number of |
308
c | "converged" eigenvlues. |
309
c | iter is the counter on the current |
310
c | iteration step. |
311
c %-------------------------------------%
312
c
313
kplusp = nev0 + np0
314
nconv = 0
315
iter = 0
316
c
317
c %---------------------------------------------%
318
c | Set flags for computing the first NEV steps |
319
c | of the Lanczos factorization. |
320
c %---------------------------------------------%
321
c
322
getv0 = .true.
323
update = .false.
324
ushift = .false.
325
cnorm = .false.
326
c
327
if (info .ne. 0) then
328
c
329
c %--------------------------------------------%
330
c | User provides the initial residual vector. |
331
c %--------------------------------------------%
332
c
333
initv = .true.
334
info = 0
335
else
336
initv = .false.
337
end if
338
end if
339
c
340
c %---------------------------------------------%
341
c | Get a possibly random starting vector and |
342
c | force it into the range of the operator OP. |
343
c %---------------------------------------------%
344
c
345
10 continue
346
c
347
if (getv0) then
348
call pdgetv0 ( comm, ido, bmat, 1, initv, n, 1, v, ldv,
349
& resid, rnorm, ipntr, workd, workl, info)
350
c
351
if (ido .ne. 99) go to 9000
352
c
353
if (rnorm .eq. zero) then
354
c
355
c %-----------------------------------------%
356
c | The initial vector is zero. Error exit. |
357
c %-----------------------------------------%
358
c
359
info = -9
360
go to 1200
361
end if
362
getv0 = .false.
363
ido = 0
364
end if
365
c
366
c %------------------------------------------------------------%
367
c | Back from reverse communication: continue with update step |
368
c %------------------------------------------------------------%
369
c
370
if (update) go to 20
371
c
372
c %-------------------------------------------%
373
c | Back from computing user specified shifts |
374
c %-------------------------------------------%
375
c
376
if (ushift) go to 50
377
c
378
c %-------------------------------------%
379
c | Back from computing residual norm |
380
c | at the end of the current iteration |
381
c %-------------------------------------%
382
c
383
if (cnorm) go to 100
384
c
385
c %----------------------------------------------------------%
386
c | Compute the first NEV steps of the Lanczos factorization |
387
c %----------------------------------------------------------%
388
c
389
call pdsaitr (comm, ido, bmat, n, 0, nev0, mode,
390
& resid, rnorm, v, ldv, h, ldh, ipntr,
391
& workd, workl, info)
392
c
393
c %---------------------------------------------------%
394
c | ido .ne. 99 implies use of reverse communication |
395
c | to compute operations involving OP and possibly B |
396
c %---------------------------------------------------%
397
c
398
if (ido .ne. 99) go to 9000
399
c
400
if (info .gt. 0) then
401
c
402
c %-----------------------------------------------------%
403
c | pdsaitr was unable to build an Lanczos factorization|
404
c | of length NEV0. INFO is returned with the size of |
405
c | the factorization built. Exit main loop. |
406
c %-----------------------------------------------------%
407
c
408
np = info
409
mxiter = iter
410
info = -9999
411
go to 1200
412
end if
413
c
414
c %--------------------------------------------------------------%
415
c | |
416
c | M A I N LANCZOS I T E R A T I O N L O O P |
417
c | Each iteration implicitly restarts the Lanczos |
418
c | factorization in place. |
419
c | |
420
c %--------------------------------------------------------------%
421
c
422
1000 continue
423
c
424
iter = iter + 1
425
c
426
if (msglvl .gt. 0) then
427
call pivout (comm, logfil, 1, [iter], ndigit,
428
& '_saup2: **** Start of major iteration number ****')
429
end if
430
if (msglvl .gt. 1) then
431
call pivout (comm, logfil, 1, [nev], ndigit,
432
& '_saup2: The length of the current Lanczos factorization')
433
call pivout (comm, logfil, 1, [np], ndigit,
434
& '_saup2: Extend the Lanczos factorization by')
435
end if
436
c
437
c %------------------------------------------------------------%
438
c | Compute NP additional steps of the Lanczos factorization. |
439
c %------------------------------------------------------------%
440
c
441
ido = 0
442
20 continue
443
update = .true.
444
c
445
call pdsaitr (comm, ido, bmat, n, nev, np, mode,
446
& resid, rnorm, v, ldv, h, ldh, ipntr,
447
& workd, workl, info)
448
c
449
c %---------------------------------------------------%
450
c | ido .ne. 99 implies use of reverse communication |
451
c | to compute operations involving OP and possibly B |
452
c %---------------------------------------------------%
453
c
454
if (ido .ne. 99) go to 9000
455
c
456
if (info .gt. 0) then
457
c
458
c %-----------------------------------------------------%
459
c | pdsaitr was unable to build an Lanczos factorization|
460
c | of length NEV0+NP0. INFO is returned with the size |
461
c | of the factorization built. Exit main loop. |
462
c %-----------------------------------------------------%
463
c
464
np = info
465
mxiter = iter
466
info = -9999
467
go to 1200
468
end if
469
update = .false.
470
c
471
if (msglvl .gt. 1) then
472
call pdvout (comm, logfil, 1, [rnorm], ndigit,
473
& '_saup2: Current B-norm of residual for factorization')
474
end if
475
c
476
c %--------------------------------------------------------%
477
c | Compute the eigenvalues and corresponding error bounds |
478
c | of the current symmetric tridiagonal matrix. |
479
c %--------------------------------------------------------%
480
c
481
call pdseigt ( comm, rnorm, kplusp, h, ldh, ritz, bounds,
482
& workl, ierr)
483
c
484
if (ierr .ne. 0) then
485
info = -8
486
go to 1200
487
end if
488
c
489
c %----------------------------------------------------%
490
c | Make a copy of eigenvalues and corresponding error |
491
c | bounds obtained from _seigt. |
492
c %----------------------------------------------------%
493
c
494
call dcopy(kplusp, ritz, 1, workl(kplusp+1), 1)
495
call dcopy(kplusp, bounds, 1, workl(2*kplusp+1), 1)
496
c
497
c %---------------------------------------------------%
498
c | Select the wanted Ritz values and their bounds |
499
c | to be used in the convergence test. |
500
c | The selection is based on the requested number of |
501
c | eigenvalues instead of the current NEV and NP to |
502
c | prevent possible misconvergence. |
503
c | * Wanted Ritz values := RITZ(NP+1:NEV+NP) |
504
c | * Shifts := RITZ(1:NP) := WORKL(1:NP) |
505
c %---------------------------------------------------%
506
c
507
nev = nev0
508
np = np0
509
call pdsgets ( comm, ishift, which, nev, np, ritz,
510
& bounds, workl)
511
c
512
c %-------------------%
513
c | Convergence test. |
514
c %-------------------%
515
c
516
call dcopy (nev, bounds(np+1), 1, workl(np+1), 1)
517
call dsconv (nev, ritz(np+1), workl(np+1), tol, nconv)
518
c
519
if (msglvl .gt. 2) then
520
kp(1) = nev
521
kp(2) = np
522
kp(3) = nconv
523
call pivout (comm, logfil, 3, kp, ndigit,
524
& '_saup2: NEV, NP, NCONV are')
525
call pdvout (comm, logfil, kplusp, ritz, ndigit,
526
& '_saup2: The eigenvalues of H')
527
call pdvout (comm, logfil, kplusp, bounds, ndigit,
528
& '_saup2: Ritz estimates of the current NCV Ritz values')
529
end if
530
c
531
c %---------------------------------------------------------%
532
c | Count the number of unwanted Ritz values that have zero |
533
c | Ritz estimates. If any Ritz estimates are equal to zero |
534
c | then a leading block of H of order equal to at least |
535
c | the number of Ritz values with zero Ritz estimates has |
536
c | split off. None of these Ritz values may be removed by |
537
c | shifting. Decrease NP the number of shifts to apply. If |
538
c | no shifts may be applied, then prepare to exit |
539
c %---------------------------------------------------------%
540
c
541
nptemp = np
542
do 30 j=1, nptemp
543
if (bounds(j) .eq. zero) then
544
np = np - 1
545
nev = nev + 1
546
end if
547
30 continue
548
c
549
if ( (nconv .ge. nev0) .or.
550
& (iter .gt. mxiter) .or.
551
& (np .eq. 0) ) then
552
c
553
c %------------------------------------------------%
554
c | Prepare to exit. Put the converged Ritz values |
555
c | and corresponding bounds in RITZ(1:NCONV) and |
556
c | BOUNDS(1:NCONV) respectively. Then sort. Be |
557
c | careful when NCONV > NP since we don't want to |
558
c | swap overlapping locations. |
559
c %------------------------------------------------%
560
c
561
if (which .eq. 'BE') then
562
c
563
c %-----------------------------------------------------%
564
c | Both ends of the spectrum are requested. |
565
c | Sort the eigenvalues into algebraically decreasing |
566
c | order first then swap low end of the spectrum next |
567
c | to high end in appropriate locations. |
568
c | NOTE: when np < floor(nev/2) be careful not to swap |
569
c | overlapping locations. |
570
c %-----------------------------------------------------%
571
c
572
wprime = 'SA'
573
call dsortr (wprime, .true., kplusp, ritz, bounds)
574
nevd2 = nev0 / 2
575
nevm2 = nev0 - nevd2
576
if ( nev .gt. 1 ) then
577
call dswap ( min(nevd2,np), ritz(nevm2+1), 1,
578
& ritz( max(kplusp-nevd2+1,kplusp-np+1) ), 1)
579
call dswap ( min(nevd2,np), bounds(nevm2+1), 1,
580
& bounds( max(kplusp-nevd2+1,kplusp-np+1)), 1)
581
end if
582
c
583
else
584
c
585
c %--------------------------------------------------%
586
c | LM, SM, LA, SA case. |
587
c | Sort the eigenvalues of H into the an order that |
588
c | is opposite to WHICH, and apply the resulting |
589
c | order to BOUNDS. The eigenvalues are sorted so |
590
c | that the wanted part are always within the first |
591
c | NEV locations. |
592
c %--------------------------------------------------%
593
c
594
if (which .eq. 'LM') wprime = 'SM'
595
if (which .eq. 'SM') wprime = 'LM'
596
if (which .eq. 'LA') wprime = 'SA'
597
if (which .eq. 'SA') wprime = 'LA'
598
c
599
call dsortr (wprime, .true., kplusp, ritz, bounds)
600
c
601
end if
602
c
603
c %--------------------------------------------------%
604
c | Scale the Ritz estimate of each Ritz value |
605
c | by 1 / max(eps23,magnitude of the Ritz value). |
606
c %--------------------------------------------------%
607
c
608
do 35 j = 1, nev0
609
temp = max( eps23, abs(ritz(j)) )
610
bounds(j) = bounds(j)/temp
611
35 continue
612
c
613
c %----------------------------------------------------%
614
c | Sort the Ritz values according to the scaled Ritz |
615
c | esitmates. This will push all the converged ones |
616
c | towards the front of ritzr, ritzi, bounds |
617
c | (in the case when NCONV < NEV.) |
618
c %----------------------------------------------------%
619
c
620
wprime = 'LA'
621
call dsortr(wprime, .true., nev0, bounds, ritz)
622
c
623
c %----------------------------------------------%
624
c | Scale the Ritz estimate back to its original |
625
c | value. |
626
c %----------------------------------------------%
627
c
628
do 40 j = 1, nev0
629
temp = max( eps23, abs(ritz(j)) )
630
bounds(j) = bounds(j)*temp
631
40 continue
632
c
633
c %--------------------------------------------------%
634
c | Sort the "converged" Ritz values again so that |
635
c | the "threshold" values and their associated Ritz |
636
c | estimates appear at the appropriate position in |
637
c | ritz and bound. |
638
c %--------------------------------------------------%
639
c
640
if (which .eq. 'BE') then
641
c
642
c %------------------------------------------------%
643
c | Sort the "converged" Ritz values in increasing |
644
c | order. The "threshold" values are in the |
645
c | middle. |
646
c %------------------------------------------------%
647
c
648
wprime = 'LA'
649
call dsortr(wprime, .true., nconv, ritz, bounds)
650
c
651
else
652
c
653
c %----------------------------------------------%
654
c | In LM, SM, LA, SA case, sort the "converged" |
655
c | Ritz values according to WHICH so that the |
656
c | "threshold" value appears at the front of |
657
c | ritz. |
658
c %----------------------------------------------%
659
660
call dsortr(which, .true., nconv, ritz, bounds)
661
c
662
end if
663
c
664
c %------------------------------------------%
665
c | Use h( 1,1 ) as storage to communicate |
666
c | rnorm to _seupd if needed |
667
c %------------------------------------------%
668
c
669
h(1,1) = rnorm
670
c
671
if (msglvl .gt. 1) then
672
call pdvout (comm, logfil, kplusp, ritz, ndigit,
673
& '_saup2: Sorted Ritz values.')
674
call pdvout (comm, logfil, kplusp, bounds, ndigit,
675
& '_saup2: Sorted ritz estimates.')
676
end if
677
c
678
c %------------------------------------%
679
c | Max iterations have been exceeded. |
680
c %------------------------------------%
681
c
682
if (iter .gt. mxiter .and. nconv .lt. nev) info = 1
683
c
684
c %---------------------%
685
c | No shifts to apply. |
686
c %---------------------%
687
c
688
if (np .eq. 0 .and. nconv .lt. nev0) info = 2
689
c
690
np = nconv
691
go to 1100
692
c
693
else if (nconv .lt. nev .and. ishift .eq. 1) then
694
c
695
c %---------------------------------------------------%
696
c | Do not have all the requested eigenvalues yet. |
697
c | To prevent possible stagnation, adjust the number |
698
c | of Ritz values and the shifts. |
699
c %---------------------------------------------------%
700
c
701
nevbef = nev
702
nev = nev + min (nconv, np/2)
703
if (nev .eq. 1 .and. kplusp .ge. 6) then
704
nev = kplusp / 2
705
else if (nev .eq. 1 .and. kplusp .gt. 2) then
706
nev = 2
707
end if
708
np = kplusp - nev
709
c
710
c %---------------------------------------%
711
c | If the size of NEV was just increased |
712
c | resort the eigenvalues. |
713
c %---------------------------------------%
714
c
715
if (nevbef .lt. nev)
716
& call pdsgets ( comm, ishift, which, nev, np,
717
& ritz, bounds, workl)
718
c
719
end if
720
c
721
if (msglvl .gt. 0) then
722
call pivout (comm, logfil, 1, [nconv], ndigit,
723
& '_saup2: no. of "converged" Ritz values at this iter.')
724
if (msglvl .gt. 1) then
725
kp(1) = nev
726
kp(2) = np
727
call pivout (comm, logfil, 2, kp, ndigit,
728
& '_saup2: NEV and NP .')
729
call pdvout (comm, logfil, nev, ritz(np+1), ndigit,
730
& '_saup2: "wanted" Ritz values.')
731
call pdvout (comm, logfil, nev, bounds(np+1), ndigit,
732
& '_saup2: Ritz estimates of the "wanted" values ')
733
end if
734
end if
735
c
736
if (ishift .eq. 0) then
737
c
738
c %-----------------------------------------------------%
739
c | User specified shifts: reverse communication to |
740
c | compute the shifts. They are returned in the first |
741
c | NP locations of WORKL. |
742
c %-----------------------------------------------------%
743
c
744
ushift = .true.
745
ido = 3
746
go to 9000
747
end if
748
c
749
50 continue
750
c
751
c %------------------------------------%
752
c | Back from reverse communication; |
753
c | User specified shifts are returned |
754
c | in WORKL(1:NP) |
755
c %------------------------------------%
756
c
757
ushift = .false.
758
c
759
c
760
c %---------------------------------------------------------%
761
c | Move the NP shifts to the first NP locations of RITZ to |
762
c | free up WORKL. This is for the non-exact shift case; |
763
c | in the exact shift case, pdsgets already handles this. |
764
c %---------------------------------------------------------%
765
c
766
if (ishift .eq. 0) call dcopy (np, workl, 1, ritz, 1)
767
c
768
if (msglvl .gt. 2) then
769
call pivout (comm, logfil, 1, [np], ndigit,
770
& '_saup2: The number of shifts to apply ')
771
call pdvout (comm, logfil, np, workl, ndigit,
772
& '_saup2: shifts selected')
773
if (ishift .eq. 1) then
774
call pdvout (comm, logfil, np, bounds, ndigit,
775
& '_saup2: corresponding Ritz estimates')
776
end if
777
end if
778
c
779
c %---------------------------------------------------------%
780
c | Apply the NP0 implicit shifts by QR bulge chasing. |
781
c | Each shift is applied to the entire tridiagonal matrix. |
782
c | The first 2*N locations of WORKD are used as workspace. |
783
c | After pdsapps is done, we have a Lanczos |
784
c | factorization of length NEV. |
785
c %---------------------------------------------------------%
786
c
787
call pdsapps ( comm, n, nev, np, ritz, v, ldv, h, ldh, resid,
788
& q, ldq, workd)
789
c
790
c %---------------------------------------------%
791
c | Compute the B-norm of the updated residual. |
792
c | Keep B*RESID in WORKD(1:N) to be used in |
793
c | the first step of the next call to pdsaitr. |
794
c %---------------------------------------------%
795
c
796
cnorm = .true.
797
call second (t2)
798
if (bmat .eq. 'G') then
799
nbx = nbx + 1
800
call dcopy (n, resid, 1, workd(n+1), 1)
801
ipntr(1) = n + 1
802
ipntr(2) = 1
803
ido = 2
804
c
805
c %----------------------------------%
806
c | Exit in order to compute B*RESID |
807
c %----------------------------------%
808
c
809
go to 9000
810
else if (bmat .eq. 'I') then
811
call dcopy (n, resid, 1, workd, 1)
812
end if
813
c
814
100 continue
815
c
816
c %----------------------------------%
817
c | Back from reverse communication; |
818
c | WORKD(1:N) := B*RESID |
819
c %----------------------------------%
820
c
821
if (bmat .eq. 'G') then
822
call second (t3)
823
tmvbx = tmvbx + (t3 - t2)
824
end if
825
c
826
if (bmat .eq. 'G') then
827
rnorm_buf = ddot (n, resid, 1, workd, 1)
828
call MPI_ALLREDUCE( rnorm_buf, rnorm, 1,
829
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
830
rnorm = sqrt(abs(rnorm))
831
else if (bmat .eq. 'I') then
832
rnorm = pdnorm2( comm, n, resid, 1 )
833
end if
834
cnorm = .false.
835
130 continue
836
c
837
if (msglvl .gt. 2) then
838
call pdvout (comm, logfil, 1, [rnorm], ndigit,
839
& '_saup2: B-norm of residual for NEV factorization')
840
call pdvout (comm, logfil, nev, h(1,2), ndigit,
841
& '_saup2: main diagonal of compressed H matrix')
842
call pdvout (comm, logfil, nev-1, h(2,1), ndigit,
843
& '_saup2: subdiagonal of compressed H matrix')
844
end if
845
c
846
go to 1000
847
c
848
c %---------------------------------------------------------------%
849
c | |
850
c | E N D O F M A I N I T E R A T I O N L O O P |
851
c | |
852
c %---------------------------------------------------------------%
853
c
854
1100 continue
855
c
856
mxiter = iter
857
nev = nconv
858
c
859
1200 continue
860
ido = 99
861
c
862
c %------------%
863
c | Error exit |
864
c %------------%
865
c
866
call second (t1)
867
tsaup2 = t1 - t0
868
c
869
9000 continue
870
return
871
c
872
c %----------------%
873
c | End of pdsaup2 |
874
c %----------------%
875
c
876
end
877
878