CoCalc -- pdsaup2.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsaup2.f
⁵²¹⁸ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsaup2
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c  Intermediate level interface called by pdsaupd.
10
c
11
c\Usage:
12
c  call pdsaup2
13
c     ( COMM, IDO, BMAT, N, WHICH, NEV, NP, TOL, RESID, MODE, IUPD,
14
c       ISHIFT, MXITER, V, LDV, H, LDH, RITZ, BOUNDS, Q, LDQ, WORKL,
15
c       IPNTR, WORKD, INFO )
16
c
17
c\Arguments
18
c
19
c  COMM, IDO, BMAT, N, WHICH, NEV, TOL, RESID: same as defined in pdsaupd.
20
c  MODE, ISHIFT, MXITER: see the definition of IPARAM in pdsaupd.
21
c
22
c  NP      Integer.  (INPUT/OUTPUT)
23
c          Contains the number of implicit shifts to apply during
24
c          each Arnoldi/Lanczos iteration.
25
c          If ISHIFT=1, NP is adjusted dynamically at each iteration
26
c          to accelerate convergence and prevent stagnation.
27
c          This is also roughly equal to the number of matrix-vector
28
c          products (involving the operator OP) per Arnoldi iteration.
29
c          The logic for adjusting is contained within the current
30
c          subroutine.
31
c          If ISHIFT=0, NP is the number of shifts the user needs
32
c          to provide via reverse comunication. 0 < NP < NCV-NEV.
33
c          NP may be less than NCV-NEV since a leading block of the current
34
c          upper Tridiagonal matrix has split off and contains "unwanted"
35
c          Ritz values.
36
c          Upon termination of the IRA iteration, NP contains the number
37
c          of "converged" wanted Ritz values.
38
c
39
c  IUPD    Integer.  (INPUT)
40
c          IUPD .EQ. 0: use explicit restart instead implicit update.
41
c          IUPD .NE. 0: use implicit update.
42
c
43
c  V       Double precision N by (NEV+NP) array.  (INPUT/OUTPUT)
44
c          The Lanczos basis vectors.
45
c
46
c  LDV     Integer.  (INPUT)
47
c          Leading dimension of V exactly as declared in the calling
48
c          program.
49
c
50
c  H       Double precision (NEV+NP) by 2 array.  (OUTPUT)
51
c          H is used to store the generated symmetric tridiagonal matrix
52
c          The subdiagonal is stored in the first column of H starting
53
c          at H(2,1).  The main diagonal is stored in the second column
54
c          of H starting at H(1,2). If pdsaup2 converges store the
55
c          B-norm of the final residual vector in H(1,1).
56
c
57
c  LDH     Integer.  (INPUT)
58
c          Leading dimension of H exactly as declared in the calling
59
c          program.
60
c
61
c  RITZ    Double precision array of length NEV+NP.  (OUTPUT)
62
c          RITZ(1:NEV) contains the computed Ritz values of OP.
63
c
64
c  BOUNDS  Double precision array of length NEV+NP.  (OUTPUT)
65
c          BOUNDS(1:NEV) contain the error bounds corresponding to RITZ.
66
c
67
c  Q       Double precision (NEV+NP) by (NEV+NP) array.  (WORKSPACE)
68
c          Private (replicated) work array used to accumulate the
69
c          rotation in the shift application step.
70
c
71
c  LDQ     Integer.  (INPUT)
72
c          Leading dimension of Q exactly as declared in the calling
73
c          program.
74
c
75
c  WORKL   Double precision array of length at least 3*(NEV+NP).  (INPUT/WORKSPACE)
76
c          Private (replicated) array on each PE or array allocated on
77
c          the front end.  It is used in the computation of the
78
c          tridiagonal eigenvalue problem, the calculation and
79
c          application of the shifts and convergence checking.
80
c          If ISHIFT .EQ. O and IDO .EQ. 3, the first NP locations
81
c          of WORKL are used in reverse communication to hold the user
82
c          supplied shifts.
83
c
84
c  IPNTR   Integer array of length 3.  (OUTPUT)
85
c          Pointer to mark the starting locations in the WORKD for
86
c          vectors used by the Lanczos iteration.
87
c          -------------------------------------------------------------
88
c          IPNTR(1): pointer to the current operand vector X.
89
c          IPNTR(2): pointer to the current result vector Y.
90
c          IPNTR(3): pointer to the vector B * X when used in one of
91
c                    the spectral transformation modes.  X is the current
92
c                    operand.
93
c          -------------------------------------------------------------
94
c
95
c  WORKD   Double precision work array of length 3*N.  (REVERSE COMMUNICATION)
96
c          Distributed array to be used in the basic Lanczos iteration
97
c          for reverse communication.  The user should not use WORKD
98
c          as temporary workspace during the iteration !!!!!!!!!!
99
c          See Data Distribution Note in pdsaupd.
100
c
101
c  INFO    Integer.  (INPUT/OUTPUT)
102
c          If INFO .EQ. 0, a randomly initial residual vector is used.
103
c          If INFO .NE. 0, RESID contains the initial residual vector,
104
c                          possibly from a previous run.
105
c          Error flag on output.
106
c          =     0: Normal return.
107
c          =     1: All possible eigenvalues of OP has been found.
108
c                   NP returns the size of the invariant subspace
109
c                   spanning the operator OP.
110
c          =     2: No shifts could be applied.
111
c          =    -8: Error return from trid. eigenvalue calculation;
112
c                   This should never happen.
113
c          =    -9: Starting vector is zero.
114
c          = -9999: Could not build an Lanczos factorization.
115
c                   Size that was built in returned in NP.
116
c
117
c\EndDoc
118
c
119
c-----------------------------------------------------------------------
120
c
121
c\BeginLib
122
c
123
c\References:
124
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
125
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
126
c     pp 357-385.
127
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
128
c     Restarted Arnoldi Iteration", Rice University Technical Report
129
c     TR95-13, Department of Computational and Applied Mathematics.
130
c  3. B.N. Parlett, "The Symmetric Eigenvalue Problem". Prentice-Hall,
131
c     1980.
132
c  4. B.N. Parlett, B. Nour-Omid, "Towards a Black Box Lanczos Program",
133
c     Computer Physics Communications, 53 (1989), pp 169-179.
134
c  5. B. Nour-Omid, B.N. Parlett, T. Ericson, P.S. Jensen, "How to
135
c     Implement the Spectral Transformation", Math. Comp., 48 (1987),
136
c     pp 663-673.
137
c  6. R.G. Grimes, J.G. Lewis and H.D. Simon, "A Shifted Block Lanczos
138
c     Algorithm for Solving Sparse Symmetric Generalized Eigenproblems",
139
c     SIAM J. Matr. Anal. Apps.,  January (1993).
140
c  7. L. Reichel, W.B. Gragg, "Algorithm 686: FORTRAN Subroutines
141
c     for Updating the QR decomposition", ACM TOMS, December 1990,
142
c     Volume 16 Number 4, pp 369-377.
143
c
144
c\Routines called:
145
c     pdgetv0  Parallel ARPACK initial vector generation routine.
146
c     pdsaitr  Parallel ARPACK Lanczos factorization routine.
147
c     pdsapps  Parallel ARPACK application of implicit shifts routine.
148
c     dsconv   ARPACK convergence of Ritz values routine.
149
c     pdseigt  Parallel ARPACK compute Ritz values and error bounds routine.
150
c     pdsgets  Parallel ARPACK reorder Ritz values and error bounds routine.
151
c     dsortr   ARPACK sorting routine.
152
c     sstrqb   ARPACK routine that computes all eigenvalues and the
153
c              last component of the eigenvectors of a symmetric
154
c              tridiagonal matrix using the implicit QL or QR method.
155
c     pivout   Parallel ARPACK utility routine that prints integers.
156
c     second   ARPACK utility routine for timing.
157
c     pdvout   Parallel ARPACK utility routine that prints vectors.
158
c     pdlamch  ScaLAPACK routine that determines machine constants.
159
c     dcopy    Level 1 BLAS that copies one vector to another.
160
c     ddot     Level 1 BLAS that computes the scalar product of two vectors.
161
c     pdnorm2  Parallel version of Level 1 BLAS that computes the norm of a vector.
162
c     dscal    Level 1 BLAS that scales a vector.
163
c     dswap    Level 1 BLAS that swaps two vectors.
164
c
165
c\Author
166
c     Danny Sorensen               Phuong Vu
167
c     Richard Lehoucq              CRPC / Rice University
168
c     Dept. of Computational &     Houston, Texas
169
c     Applied Mathematics
170
c     Rice University
171
c     Houston, Texas
172
c
173
c\Parallel Modifications
174
c     Kristi Maschhoff
175
c
176
c\Revision history:
177
c     Starting Point: Serial Code FILE: saup2.F   SID: 2.4
178
c
179
c\SCCS Information:
180
c FILE: saup2.F   SID: 1.5   DATE OF SID: 05/20/98
181
c
182
c\EndLib
183
c
184
c-----------------------------------------------------------------------
185
c
186
      subroutine pdsaup2
187
     &   ( comm, ido, bmat, n, which, nev, np, tol, resid, mode, iupd,
188
     &     ishift, mxiter, v, ldv, h, ldh, ritz, bounds,
189
     &     q, ldq, workl, ipntr, workd, info )
190
c
191
      include   'mpif.h'
192
c
193
c     %---------------%
194
c     | MPI Variables |
195
c     %---------------%
196
c
197
      integer    comm
198
c
199
c
200
c     %----------------------------------------------------%
201
c     | Include files for debugging and timing information |
202
c     %----------------------------------------------------%
203
c
204
      include   'debug.h'
205
      include   'stat.h'
206
c
207
c     %------------------%
208
c     | Scalar Arguments |
209
c     %------------------%
210
c
211
      character  bmat*1, which*2
212
      integer    ido, info, ishift, iupd, ldh, ldq, ldv, mxiter,
213
     &           n, mode, nev, np
214
      Double precision
215
     &           tol
216
c
217
c     %-----------------%
218
c     | Array Arguments |
219
c     %-----------------%
220
c
221
      integer    ipntr(3)
222
      Double precision
223
     &           bounds(nev+np), h(ldh,2), q(ldq,nev+np), resid(n),
224
     &           ritz(nev+np), v(ldv,nev+np), workd(3*n),
225
     &           workl(3*(nev+np))
226
c
227
c     %------------%
228
c     | Parameters |
229
c     %------------%
230
c
231
      Double precision
232
     &           one, zero
233
      parameter (one = 1.0, zero = 0.0)
234
c
235
c     %---------------%
236
c     | Local Scalars |
237
c     %---------------%
238
c
239
      character  wprime*2
240
      logical    cnorm, getv0, initv, update, ushift
241
      integer    ierr, iter, j, kplusp, msglvl, nconv, nevbef, nev0,
242
     &           np0, nptemp, nevd2, nevm2, kp(3)
243
      Double precision
244
     &           rnorm, temp, eps23
245
      save       cnorm, getv0, initv, update, ushift,
246
     &           iter, kplusp, msglvl, nconv, nev0, np0,
247
     &           rnorm, eps23
248
c
249
      Double precision
250
     &           rnorm_buf
251
c
252
c
253
c     %----------------------%
254
c     | External Subroutines |
255
c     %----------------------%
256
c
257
      external   dcopy, pdgetv0, pdsaitr, dscal, dsconv,
258
     &           pdseigt, pdsgets, pdsapps,
259
     &           dsortr, pdvout, pivout, second
260
c
261
c     %--------------------%
262
c     | External Functions |
263
c     %--------------------%
264
c
265
      Double precision
266
     &           ddot, pdnorm2, pdlamch10
267
      external   ddot, pdnorm2, pdlamch10
268
c
269
c     %---------------------%
270
c     | Intrinsic Functions |
271
c     %---------------------%
272
c
273
      intrinsic    min, abs, sqrt
274
c
275
c     %-----------------------%
276
c     | Executable Statements |
277
c     %-----------------------%
278
c
279
      if (ido .eq. 0) then
280
c
281
c        %-------------------------------%
282
c        | Initialize timing statistics  |
283
c        | & message level for debugging |
284
c        %-------------------------------%
285
c
286
         call second (t0)
287
         msglvl = msaup2
288
c
289
c        %---------------------------------%
290
c        | Set machine dependent constant. |
291
c        %---------------------------------%
292
c
293
         eps23 = pdlamch10(comm, 'Epsilon-Machine')
294
         eps23 = eps23**(2.0/3.0)
295
c
296
c        %-------------------------------------%
297
c        | nev0 and np0 are integer variables  |
298
c        | hold the initial values of NEV & NP |
299
c        %-------------------------------------%
300
c
301
         nev0   = nev
302
         np0    = np
303
c
304
c        %-------------------------------------%
305
c        | kplusp is the bound on the largest  |
306
c        |        Lanczos factorization built. |
307
c        | nconv is the current number of      |
308
c        |        "converged" eigenvlues.      |
309
c        | iter is the counter on the current  |
310
c        |      iteration step.                |
311
c        %-------------------------------------%
312
c
313
         kplusp = nev0 + np0
314
         nconv  = 0
315
         iter   = 0
316
c
317
c        %---------------------------------------------%
318
c        | Set flags for computing the first NEV steps |
319
c        | of the Lanczos factorization.               |
320
c        %---------------------------------------------%
321
c
322
         getv0    = .true.
323
         update   = .false.
324
         ushift   = .false.
325
         cnorm    = .false.
326
c
327
         if (info .ne. 0) then
328
c
329
c        %--------------------------------------------%
330
c        | User provides the initial residual vector. |
331
c        %--------------------------------------------%
332
c
333
            initv = .true.
334
            info  = 0
335
         else
336
            initv = .false.
337
         end if
338
      end if
339
c
340
c     %---------------------------------------------%
341
c     | Get a possibly random starting vector and   |
342
c     | force it into the range of the operator OP. |
343
c     %---------------------------------------------%
344
c
345
   10 continue
346
c
347
      if (getv0) then
348
         call pdgetv0 ( comm, ido, bmat, 1, initv, n, 1, v, ldv,
349
     &                  resid, rnorm, ipntr, workd, workl, info)
350
c
351
         if (ido .ne. 99) go to 9000
352
c
353
         if (rnorm .eq. zero) then
354
c
355
c           %-----------------------------------------%
356
c           | The initial vector is zero. Error exit. |
357
c           %-----------------------------------------%
358
c
359
            info = -9
360
            go to 1200
361
         end if
362
         getv0 = .false.
363
         ido  = 0
364
      end if
365
c
366
c     %------------------------------------------------------------%
367
c     | Back from reverse communication: continue with update step |
368
c     %------------------------------------------------------------%
369
c
370
      if (update) go to 20
371
c
372
c     %-------------------------------------------%
373
c     | Back from computing user specified shifts |
374
c     %-------------------------------------------%
375
c
376
      if (ushift) go to 50
377
c
378
c     %-------------------------------------%
379
c     | Back from computing residual norm   |
380
c     | at the end of the current iteration |
381
c     %-------------------------------------%
382
c
383
      if (cnorm)  go to 100
384
c
385
c     %----------------------------------------------------------%
386
c     | Compute the first NEV steps of the Lanczos factorization |
387
c     %----------------------------------------------------------%
388
c
389
      call pdsaitr (comm, ido, bmat, n, 0, nev0, mode,
390
     &              resid, rnorm, v, ldv, h, ldh, ipntr,
391
     &              workd, workl, info)
392
c
393
c     %---------------------------------------------------%
394
c     | ido .ne. 99 implies use of reverse communication  |
395
c     | to compute operations involving OP and possibly B |
396
c     %---------------------------------------------------%
397
c
398
      if (ido .ne. 99) go to 9000
399
c
400
      if (info .gt. 0) then
401
c
402
c        %-----------------------------------------------------%
403
c        | pdsaitr was unable to build an Lanczos factorization|
404
c        | of length NEV0. INFO is returned with the size of   |
405
c        | the factorization built. Exit main loop.            |
406
c        %-----------------------------------------------------%
407
c
408
         np   = info
409
         mxiter = iter
410
         info = -9999
411
         go to 1200
412
      end if
413
c
414
c     %--------------------------------------------------------------%
415
c     |                                                              |
416
c     |           M A I N  LANCZOS  I T E R A T I O N  L O O P       |
417
c     |           Each iteration implicitly restarts the Lanczos     |
418
c     |           factorization in place.                            |
419
c     |                                                              |
420
c     %--------------------------------------------------------------%
421
c
422
 1000 continue
423
c
424
         iter = iter + 1
425
c
426
         if (msglvl .gt. 0) then
427
            call pivout (comm, logfil, 1, [iter], ndigit,
428
     &           '_saup2: **** Start of major iteration number ****')
429
         end if
430
         if (msglvl .gt. 1) then
431
            call pivout (comm, logfil, 1, [nev], ndigit,
432
     &     '_saup2: The length of the current Lanczos factorization')
433
            call pivout (comm, logfil, 1, [np], ndigit,
434
     &           '_saup2: Extend the Lanczos factorization by')
435
         end if
436
c
437
c        %------------------------------------------------------------%
438
c        | Compute NP additional steps of the Lanczos factorization.  |
439
c        %------------------------------------------------------------%
440
c
441
         ido = 0
442
   20    continue
443
         update = .true.
444
c
445
         call pdsaitr (comm, ido, bmat, n, nev, np, mode,
446
     &                 resid, rnorm, v, ldv, h, ldh, ipntr,
447
     &                 workd, workl, info)
448
c
449
c        %---------------------------------------------------%
450
c        | ido .ne. 99 implies use of reverse communication  |
451
c        | to compute operations involving OP and possibly B |
452
c        %---------------------------------------------------%
453
c
454
         if (ido .ne. 99) go to 9000
455
c
456
         if (info .gt. 0) then
457
c
458
c           %-----------------------------------------------------%
459
c           | pdsaitr was unable to build an Lanczos factorization|
460
c           | of length NEV0+NP0. INFO is returned with the size  |
461
c           | of the factorization built. Exit main loop.         |
462
c           %-----------------------------------------------------%
463
c
464
            np = info
465
            mxiter = iter
466
            info = -9999
467
            go to 1200
468
         end if
469
         update = .false.
470
c
471
         if (msglvl .gt. 1) then
472
            call pdvout (comm, logfil, 1, [rnorm], ndigit,
473
     &           '_saup2: Current B-norm of residual for factorization')
474
         end if
475
c
476
c        %--------------------------------------------------------%
477
c        | Compute the eigenvalues and corresponding error bounds |
478
c        | of the current symmetric tridiagonal matrix.           |
479
c        %--------------------------------------------------------%
480
c
481
         call pdseigt ( comm, rnorm, kplusp, h, ldh, ritz, bounds,
482
     &                  workl, ierr)
483
c
484
         if (ierr .ne. 0) then
485
            info = -8
486
            go to 1200
487
         end if
488
c
489
c        %----------------------------------------------------%
490
c        | Make a copy of eigenvalues and corresponding error |
491
c        | bounds obtained from _seigt.                       |
492
c        %----------------------------------------------------%
493
c
494
         call dcopy(kplusp, ritz, 1, workl(kplusp+1), 1)
495
         call dcopy(kplusp, bounds, 1, workl(2*kplusp+1), 1)
496
c
497
c        %---------------------------------------------------%
498
c        | Select the wanted Ritz values and their bounds    |
499
c        | to be used in the convergence test.               |
500
c        | The selection is based on the requested number of |
501
c        | eigenvalues instead of the current NEV and NP to  |
502
c        | prevent possible misconvergence.                  |
503
c        | * Wanted Ritz values := RITZ(NP+1:NEV+NP)         |
504
c        | * Shifts := RITZ(1:NP) := WORKL(1:NP)             |
505
c        %---------------------------------------------------%
506
c
507
         nev = nev0
508
         np = np0
509
         call pdsgets ( comm, ishift, which, nev, np, ritz,
510
     &                  bounds, workl)
511
c
512
c        %-------------------%
513
c        | Convergence test. |
514
c        %-------------------%
515
c
516
         call dcopy (nev, bounds(np+1), 1, workl(np+1), 1)
517
         call dsconv (nev, ritz(np+1), workl(np+1), tol, nconv)
518
c
519
         if (msglvl .gt. 2) then
520
            kp(1) = nev
521
            kp(2) = np
522
            kp(3) = nconv
523
            call pivout (comm, logfil, 3, kp, ndigit,
524
     &                  '_saup2: NEV, NP, NCONV are')
525
            call pdvout (comm, logfil, kplusp, ritz, ndigit,
526
     &           '_saup2: The eigenvalues of H')
527
            call pdvout (comm, logfil, kplusp, bounds, ndigit,
528
     &          '_saup2: Ritz estimates of the current NCV Ritz values')
529
         end if
530
c
531
c        %---------------------------------------------------------%
532
c        | Count the number of unwanted Ritz values that have zero |
533
c        | Ritz estimates. If any Ritz estimates are equal to zero |
534
c        | then a leading block of H of order equal to at least    |
535
c        | the number of Ritz values with zero Ritz estimates has  |
536
c        | split off. None of these Ritz values may be removed by  |
537
c        | shifting. Decrease NP the number of shifts to apply. If |
538
c        | no shifts may be applied, then prepare to exit          |
539
c        %---------------------------------------------------------%
540
c
541
         nptemp = np
542
         do 30 j=1, nptemp
543
            if (bounds(j) .eq. zero) then
544
               np = np - 1
545
               nev = nev + 1
546
            end if
547
 30      continue
548
c
549
         if ( (nconv .ge. nev0) .or.
550
     &        (iter .gt. mxiter) .or.
551
     &        (np .eq. 0) ) then
552
c
553
c           %------------------------------------------------%
554
c           | Prepare to exit. Put the converged Ritz values |
555
c           | and corresponding bounds in RITZ(1:NCONV) and  |
556
c           | BOUNDS(1:NCONV) respectively. Then sort. Be    |
557
c           | careful when NCONV > NP since we don't want to |
558
c           | swap overlapping locations.                    |
559
c           %------------------------------------------------%
560
c
561
            if (which .eq. 'BE') then
562
c
563
c              %-----------------------------------------------------%
564
c              | Both ends of the spectrum are requested.            |
565
c              | Sort the eigenvalues into algebraically decreasing  |
566
c              | order first then swap low end of the spectrum next  |
567
c              | to high end in appropriate locations.               |
568
c              | NOTE: when np < floor(nev/2) be careful not to swap |
569
c              | overlapping locations.                              |
570
c              %-----------------------------------------------------%
571
c
572
               wprime = 'SA'
573
               call dsortr (wprime, .true., kplusp, ritz, bounds)
574
               nevd2 = nev0 / 2
575
               nevm2 = nev0 - nevd2
576
               if ( nev .gt. 1 ) then
577
                  call dswap ( min(nevd2,np), ritz(nevm2+1), 1,
578
     &                 ritz( max(kplusp-nevd2+1,kplusp-np+1) ), 1)
579
                  call dswap ( min(nevd2,np), bounds(nevm2+1), 1,
580
     &                 bounds( max(kplusp-nevd2+1,kplusp-np+1)), 1)
581
               end if
582
c
583
            else
584
c
585
c              %--------------------------------------------------%
586
c              | LM, SM, LA, SA case.                             |
587
c              | Sort the eigenvalues of H into the an order that |
588
c              | is opposite to WHICH, and apply the resulting    |
589
c              | order to BOUNDS.  The eigenvalues are sorted so  |
590
c              | that the wanted part are always within the first |
591
c              | NEV locations.                                   |
592
c              %--------------------------------------------------%
593
c
594
               if (which .eq. 'LM') wprime = 'SM'
595
               if (which .eq. 'SM') wprime = 'LM'
596
               if (which .eq. 'LA') wprime = 'SA'
597
               if (which .eq. 'SA') wprime = 'LA'
598
c
599
               call dsortr (wprime, .true., kplusp, ritz, bounds)
600
c
601
            end if
602
c
603
c           %--------------------------------------------------%
604
c           | Scale the Ritz estimate of each Ritz value       |
605
c           | by 1 / max(eps23,magnitude of the Ritz value).   |
606
c           %--------------------------------------------------%
607
c
608
            do 35 j = 1, nev0
609
               temp = max( eps23, abs(ritz(j)) )
610
               bounds(j) = bounds(j)/temp
611
 35         continue
612
c
613
c           %----------------------------------------------------%
614
c           | Sort the Ritz values according to the scaled Ritz  |
615
c           | esitmates.  This will push all the converged ones  |
616
c           | towards the front of ritzr, ritzi, bounds          |
617
c           | (in the case when NCONV < NEV.)                    |
618
c           %----------------------------------------------------%
619
c
620
            wprime = 'LA'
621
            call dsortr(wprime, .true., nev0, bounds, ritz)
622
c
623
c           %----------------------------------------------%
624
c           | Scale the Ritz estimate back to its original |
625
c           | value.                                       |
626
c           %----------------------------------------------%
627
c
628
            do 40 j = 1, nev0
629
                temp = max( eps23, abs(ritz(j)) )
630
                bounds(j) = bounds(j)*temp
631
 40         continue
632
c
633
c           %--------------------------------------------------%
634
c           | Sort the "converged" Ritz values again so that   |
635
c           | the "threshold" values and their associated Ritz |
636
c           | estimates appear at the appropriate position in  |
637
c           | ritz and bound.                                  |
638
c           %--------------------------------------------------%
639
c
640
            if (which .eq. 'BE') then
641
c
642
c              %------------------------------------------------%
643
c              | Sort the "converged" Ritz values in increasing |
644
c              | order.  The "threshold" values are in the      |
645
c              | middle.                                        |
646
c              %------------------------------------------------%
647
c
648
               wprime = 'LA'
649
               call dsortr(wprime, .true., nconv, ritz, bounds)
650
c
651
            else
652
c
653
c              %----------------------------------------------%
654
c              | In LM, SM, LA, SA case, sort the "converged" |
655
c              | Ritz values according to WHICH so that the   |
656
c              | "threshold" value appears at the front of    |
657
c              | ritz.                                        |
658
c              %----------------------------------------------%
659

660
               call dsortr(which, .true., nconv, ritz, bounds)
661
c
662
            end if
663
c
664
c           %------------------------------------------%
665
c           |  Use h( 1,1 ) as storage to communicate  |
666
c           |  rnorm to _seupd if needed               |
667
c           %------------------------------------------%
668
c
669
            h(1,1) = rnorm
670
c
671
            if (msglvl .gt. 1) then
672
               call pdvout (comm, logfil, kplusp, ritz, ndigit,
673
     &            '_saup2: Sorted Ritz values.')
674
               call pdvout (comm, logfil, kplusp, bounds, ndigit,
675
     &            '_saup2: Sorted ritz estimates.')
676
            end if
677
c
678
c           %------------------------------------%
679
c           | Max iterations have been exceeded. |
680
c           %------------------------------------%
681
c
682
            if (iter .gt. mxiter .and. nconv .lt. nev) info = 1
683
c
684
c           %---------------------%
685
c           | No shifts to apply. |
686
c           %---------------------%
687
c
688
            if (np .eq. 0 .and. nconv .lt. nev0) info = 2
689
c
690
            np = nconv
691
            go to 1100
692
c
693
         else if (nconv .lt. nev .and. ishift .eq. 1) then
694
c
695
c           %---------------------------------------------------%
696
c           | Do not have all the requested eigenvalues yet.    |
697
c           | To prevent possible stagnation, adjust the number |
698
c           | of Ritz values and the shifts.                    |
699
c           %---------------------------------------------------%
700
c
701
            nevbef = nev
702
            nev = nev + min (nconv, np/2)
703
            if (nev .eq. 1 .and. kplusp .ge. 6) then
704
               nev = kplusp / 2
705
            else if (nev .eq. 1 .and. kplusp .gt. 2) then
706
               nev = 2
707
            end if
708
            np  = kplusp - nev
709
c
710
c           %---------------------------------------%
711
c           | If the size of NEV was just increased |
712
c           | resort the eigenvalues.               |
713
c           %---------------------------------------%
714
c
715
            if (nevbef .lt. nev)
716
     &         call pdsgets ( comm, ishift, which, nev, np,
717
     &                        ritz, bounds, workl)
718
c
719
         end if
720
c
721
         if (msglvl .gt. 0) then
722
            call pivout (comm, logfil, 1, [nconv], ndigit,
723
     &           '_saup2: no. of "converged" Ritz values at this iter.')
724
            if (msglvl .gt. 1) then
725
               kp(1) = nev
726
               kp(2) = np
727
               call pivout (comm, logfil, 2, kp, ndigit,
728
     &              '_saup2: NEV and NP .')
729
               call pdvout (comm, logfil, nev, ritz(np+1), ndigit,
730
     &              '_saup2: "wanted" Ritz values.')
731
               call pdvout (comm, logfil, nev, bounds(np+1), ndigit,
732
     &              '_saup2: Ritz estimates of the "wanted" values ')
733
            end if
734
         end if
735
c
736
         if (ishift .eq. 0) then
737
c
738
c           %-----------------------------------------------------%
739
c           | User specified shifts: reverse communication to     |
740
c           | compute the shifts. They are returned in the first  |
741
c           | NP locations of WORKL.                              |
742
c           %-----------------------------------------------------%
743
c
744
            ushift = .true.
745
            ido = 3
746
            go to 9000
747
         end if
748
c
749
   50    continue
750
c
751
c        %------------------------------------%
752
c        | Back from reverse communication;   |
753
c        | User specified shifts are returned |
754
c        | in WORKL(1:NP)                     |
755
c        %------------------------------------%
756
c
757
         ushift = .false.
758
c
759
c
760
c        %---------------------------------------------------------%
761
c        | Move the NP shifts to the first NP locations of RITZ to |
762
c        | free up WORKL.  This is for the non-exact shift case;   |
763
c        | in the exact shift case, pdsgets already handles this.  |
764
c        %---------------------------------------------------------%
765
c
766
         if (ishift .eq. 0) call dcopy (np, workl, 1, ritz, 1)
767
c
768
         if (msglvl .gt. 2) then
769
            call pivout (comm, logfil, 1, [np], ndigit,
770
     &                  '_saup2: The number of shifts to apply ')
771
            call pdvout (comm, logfil, np, workl, ndigit,
772
     &                  '_saup2: shifts selected')
773
            if (ishift .eq. 1) then
774
               call pdvout (comm, logfil, np, bounds, ndigit,
775
     &                  '_saup2: corresponding Ritz estimates')
776
             end if
777
         end if
778
c
779
c        %---------------------------------------------------------%
780
c        | Apply the NP0 implicit shifts by QR bulge chasing.      |
781
c        | Each shift is applied to the entire tridiagonal matrix. |
782
c        | The first 2*N locations of WORKD are used as workspace. |
783
c        | After pdsapps is done, we have a Lanczos                |
784
c        | factorization of length NEV.                            |
785
c        %---------------------------------------------------------%
786
c
787
         call pdsapps ( comm, n, nev, np, ritz, v, ldv, h, ldh, resid,
788
     &                  q, ldq, workd)
789
c
790
c        %---------------------------------------------%
791
c        | Compute the B-norm of the updated residual. |
792
c        | Keep B*RESID in WORKD(1:N) to be used in    |
793
c        | the first step of the next call to pdsaitr. |
794
c        %---------------------------------------------%
795
c
796
         cnorm = .true.
797
         call second (t2)
798
         if (bmat .eq. 'G') then
799
            nbx = nbx + 1
800
            call dcopy (n, resid, 1, workd(n+1), 1)
801
            ipntr(1) = n + 1
802
            ipntr(2) = 1
803
            ido = 2
804
c
805
c           %----------------------------------%
806
c           | Exit in order to compute B*RESID |
807
c           %----------------------------------%
808
c
809
            go to 9000
810
         else if (bmat .eq. 'I') then
811
            call dcopy (n, resid, 1, workd, 1)
812
         end if
813
c
814
  100    continue
815
c
816
c        %----------------------------------%
817
c        | Back from reverse communication; |
818
c        | WORKD(1:N) := B*RESID            |
819
c        %----------------------------------%
820
c
821
         if (bmat .eq. 'G') then
822
            call second (t3)
823
            tmvbx = tmvbx + (t3 - t2)
824
         end if
825
c
826
         if (bmat .eq. 'G') then
827
            rnorm_buf = ddot (n, resid, 1, workd, 1)
828
            call MPI_ALLREDUCE( rnorm_buf, rnorm, 1,
829
     &                MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
830
            rnorm = sqrt(abs(rnorm))
831
         else if (bmat .eq. 'I') then
832
            rnorm = pdnorm2( comm, n, resid, 1 )
833
         end if
834
         cnorm = .false.
835
  130    continue
836
c
837
         if (msglvl .gt. 2) then
838
            call pdvout (comm, logfil, 1, [rnorm], ndigit,
839
     &      '_saup2: B-norm of residual for NEV factorization')
840
            call pdvout (comm, logfil, nev, h(1,2), ndigit,
841
     &           '_saup2: main diagonal of compressed H matrix')
842
            call pdvout (comm, logfil, nev-1, h(2,1), ndigit,
843
     &           '_saup2: subdiagonal of compressed H matrix')
844
         end if
845
c
846
      go to 1000
847
c
848
c     %---------------------------------------------------------------%
849
c     |                                                               |
850
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
851
c     |                                                               |
852
c     %---------------------------------------------------------------%
853
c
854
 1100 continue
855
c
856
      mxiter = iter
857
      nev = nconv
858
c
859
 1200 continue
860
      ido = 99
861
c
862
c     %------------%
863
c     | Error exit |
864
c     %------------%
865
c
866
      call second (t1)
867
      tsaup2 = t1 - t0
868
c
869
 9000 continue
870
      return
871
c
872
c     %----------------%
873
c     | End of pdsaup2 |
874
c     %----------------%
875
c
876
      end
877

878
Product

Resources

Company