CoCalc -- psnaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/psnaitr.f
⁵¹⁹⁸ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: psnaitr
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c  Reverse communication interface for applying NP additional steps to
10
c  a K step nonsymmetric Arnoldi factorization.
11
c
12
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
13
c
14
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
15
c
16
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
17
c
18
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
19
c
20
c  where OP and B are as in psnaupd.  The B-norm of r_{k+p} is also
21
c  computed and returned.
22
c
23
c\Usage:
24
c  call psnaitr
25
c     ( COMM, IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
26
c       IPNTR, WORKD, WORKL, INFO )
27
c
28
c\Arguments
29
c  COMM    MPI Communicator for the processor grid.  (INPUT)
30
c
31
c  IDO     Integer.  (INPUT/OUTPUT)
32
c          Reverse communication flag.
33
c          -------------------------------------------------------------
34
c          IDO =  0: first call to the reverse communication interface
35
c          IDO = -1: compute  Y = OP * X  where
36
c                    IPNTR(1) is the pointer into WORK for X,
37
c                    IPNTR(2) is the pointer into WORK for Y.
38
c                    This is for the restart phase to force the new
39
c                    starting vector into the range of OP.
40
c          IDO =  1: compute  Y = OP * X  where
41
c                    IPNTR(1) is the pointer into WORK for X,
42
c                    IPNTR(2) is the pointer into WORK for Y,
43
c                    IPNTR(3) is the pointer into WORK for B * X.
44
c          IDO =  2: compute  Y = B * X  where
45
c                    IPNTR(1) is the pointer into WORK for X,
46
c                    IPNTR(2) is the pointer into WORK for Y.
47
c          IDO = 99: done
48
c          -------------------------------------------------------------
49
c          When the routine is used in the "shift-and-invert" mode, the
50
c          vector B * Q is already available and do not need to be
51
c          recompute in forming OP * Q.
52
c
53
c  BMAT    Character*1.  (INPUT)
54
c          BMAT specifies the type of the matrix B that defines the
55
c          semi-inner product for the operator OP.  See psnaupd.
56
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
57
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
58
c
59
c  N       Integer.  (INPUT)
60
c          Dimension of the eigenproblem.
61
c
62
c  K       Integer.  (INPUT)
63
c          Current size of V and H.
64
c
65
c  NP      Integer.  (INPUT)
66
c          Number of additional Arnoldi steps to take.
67
c
68
c  NB      Integer.  (INPUT)
69
c          Blocksize to be used in the recurrence.
70
c          Only work for NB = 1 right now.  The goal is to have a
71
c          program that implement both the block and non-block method.
72
c
73
c  RESID   Real array of length N.  (INPUT/OUTPUT)
74
c          On INPUT:  RESID contains the residual vector r_{k}.
75
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
76
c
77
c  RNORM   Real scalar.  (INPUT/OUTPUT)
78
c          B-norm of the starting residual on input.
79
c          B-norm of the updated residual r_{k+p} on output.
80
c
81
c  V       Real N by K+NP array.  (INPUT/OUTPUT)
82
c          On INPUT:  V contains the Arnoldi vectors in the first K
83
c          columns.
84
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
85
c          NP columns.  The first K columns are unchanged.
86
c
87
c  LDV     Integer.  (INPUT)
88
c          Leading dimension of V exactly as declared in the calling
89
c          program.
90
c
91
c  H       Real (K+NP) by (K+NP) array.  (INPUT/OUTPUT)
92
c          H is used to store the generated upper Hessenberg matrix.
93
c
94
c  LDH     Integer.  (INPUT)
95
c          Leading dimension of H exactly as declared in the calling
96
c          program.
97
c
98
c  IPNTR   Integer array of length 3.  (OUTPUT)
99
c          Pointer to mark the starting locations in the WORK for
100
c          vectors used by the Arnoldi iteration.
101
c          -------------------------------------------------------------
102
c          IPNTR(1): pointer to the current operand vector X.
103
c          IPNTR(2): pointer to the current result vector Y.
104
c          IPNTR(3): pointer to the vector B * X when used in the
105
c                    shift-and-invert mode.  X is the current operand.
106
c          -------------------------------------------------------------
107
c
108
c  WORKD   Real work array of length 3*N.  (REVERSE COMMUNICATION)
109
c          Distributed array to be used in the basic Arnoldi iteration
110
c          for reverse communication.  The calling program should not
111
c          use WORKD as temporary workspace during the iteration !!!!!!
112
c          On input, WORKD(1:N) = B*RESID and is used to save some
113
c          computation at the first step.
114
c
115
c  WORKL   Real work space used for Gram Schmidt orthogonalization
116
c
117
c  INFO    Integer.  (OUTPUT)
118
c          = 0: Normal exit.
119
c          > 0: Size of the spanning invariant subspace of OP found.
120
c
121
c\EndDoc
122
c
123
c-----------------------------------------------------------------------
124
c
125
c\BeginLib
126
c
127
c\Local variables:
128
c     xxxxxx  real
129
c
130
c\References:
131
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
132
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
133
c     pp 357-385.
134
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
135
c     Restarted Arnoldi Iteration", Rice University Technical Report
136
c     TR95-13, Department of Computational and Applied Mathematics.
137
c
138
c\Routines called:
139
c     psgetv0  Parallel ARPACK routine to generate the initial vector.
140
c     pivout   Parallel ARPACK utility routine that prints integers.
141
c     second   ARPACK utility routine for timing.
142
c     psmout   Parallel ARPACK utility routine that prints matrices
143
c     psvout   Parallel ARPACK utility routine that prints vectors.
144
c     slabad   LAPACK routine that computes machine constants.
145
c     pslamch  ScaLAPACK routine that determines machine constants.
146
c     slascl   LAPACK routine for careful scaling of a matrix.
147
c     slanhs   LAPACK routine that computes various norms of a matrix.
148
c     sgemv    Level 2 BLAS routine for matrix vector multiplication.
149
c     saxpy    Level 1 BLAS that computes a vector triad.
150
c     sscal    Level 1 BLAS that scales a vector.
151
c     scopy    Level 1 BLAS that copies one vector to another .
152
c     sdot     Level 1 BLAS that computes the scalar product of two vectors.
153
c     psnorm2  Parallel version of Level 1 BLAS that computes the norm of a vector.
154
c
155
c\Author
156
c     Danny Sorensen               Phuong Vu
157
c     Richard Lehoucq              CRPC / Rice University
158
c     Dept. of Computational &     Houston, Texas
159
c     Applied Mathematics
160
c     Rice University
161
c     Houston, Texas
162
c
163
c\Parallel Modifications
164
c     Kristi Maschhoff
165
c
166
c\Revision history:
167
c     Starting Point: Serial Code FILE: naitr.F   SID: 2.2
168
c
169
c\SCCS Information:
170
c FILE: naitr.F   SID: 1.3   DATE OF SID: 3/19/97
171
c
172
c\Remarks
173
c  The algorithm implemented is:
174
c
175
c  restart = .false.
176
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
177
c  r_{k} contains the initial residual vector even for k = 0;
178
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
179
c  computed by the calling program.
180
c
181
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
182
c  For  j = k+1, ..., k+np  Do
183
c     1) if ( betaj < tol ) stop or restart depending on j.
184
c        ( At present tol is zero )
185
c        if ( restart ) generate a new starting vector.
186
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
187
c        p_{j} = p_{j}/betaj
188
c     3) r_{j} = OP*v_{j} where OP is defined as in psnaupd
189
c        For shift-invert mode p_{j} = B*v_{j} is already available.
190
c        wnorm = || OP*v_{j} ||
191
c     4) Compute the j-th step residual vector.
192
c        w_{j} =  V_{j}^T * B * OP * v_{j}
193
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
194
c        H(:,j) = w_{j};
195
c        H(j,j-1) = rnorm
196
c        rnorm = || r_(j) ||
197
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
198
c     5) Re-orthogonalization step:
199
c        s = V_{j}'*B*r_{j}
200
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
201
c        alphaj = alphaj + s_{j};
202
c     6) Iterative refinement step:
203
c        If (rnorm1 > 0.717*rnorm) then
204
c           rnorm = rnorm1
205
c           accept step and go back to 1)
206
c        Else
207
c           rnorm = rnorm1
208
c           If this is the first time in step 6), go to 5)
209
c           Else r_{j} lies in the span of V_{j} numerically.
210
c              Set r_{j} = 0 and rnorm = 0; go to 1)
211
c        EndIf
212
c  End Do
213
c
214
c\EndLib
215
c
216
c-----------------------------------------------------------------------
217
c
218
      subroutine psnaitr
219
     &   (comm, ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
220
     &    ipntr, workd, workl, info)
221
c
222
      include   'mpif.h'
223
c
224
c     %---------------%
225
c     | MPI Variables |
226
c     %---------------%
227
c
228
      integer    comm
229
c
230
c     %----------------------------------------------------%
231
c     | Include files for debugging and timing information |
232
c     %----------------------------------------------------%
233
c
234
      include   'debug.h'
235
      include   'stat.h'
236
c
237
c     %------------------%
238
c     | Scalar Arguments |
239
c     %------------------%
240
c
241
      character  bmat*1
242
      integer    ido, info, k, ldh, ldv, n, nb, np
243
      Real
244
     &           rnorm
245
c
246
c     %-----------------%
247
c     | Array Arguments |
248
c     %-----------------%
249
c
250
      integer    ipntr(3)
251
      Real
252
     &           h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n),
253
     &           workl(2*ldh)
254
c
255
c     %------------%
256
c     | Parameters |
257
c     %------------%
258
c
259
      Real
260
     &           one, zero
261
      parameter (one = 1.0, zero = 0.0)
262
c
263
c     %---------------%
264
c     | Local Scalars |
265
c     %---------------%
266
c
267
      logical    first, orth1, orth2, rstart, step3, step4
268
      integer    ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
269
     &           jj
270
      Real
271
     &           betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl,
272
     &           wnorm
273
      save       first, orth1, orth2, rstart, step3, step4,
274
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
275
     &           betaj, rnorm1, smlnum, ulp, unfl, wnorm
276
c
277
      Real
278
     &           rnorm_buf
279
c
280
c
281
c     %-----------------------%
282
c     | Local Array Arguments |
283
c     %-----------------------%
284
c
285
      Real
286
     &           xtemp(2)
287
c
288
c     %----------------------%
289
c     | External Subroutines |
290
c     %----------------------%
291
c
292
      external   saxpy, scopy, sscal, sgemv, psgetv0, slabad,
293
     &           psvout, psmout, pivout, second
294
c
295
c     %--------------------%
296
c     | External Functions |
297
c     %--------------------%
298
c
299
      Real
300
     &           sdot, psnorm2, slanhs, pslamch10
301
      external   sdot, psnorm2, slanhs, pslamch10
302
c
303
c     %---------------------%
304
c     | Intrinsic Functions |
305
c     %---------------------%
306
c
307
      intrinsic    abs, sqrt
308
c
309
c     %-----------------%
310
c     | Data statements |
311
c     %-----------------%
312
c
313
      data      first / .true. /
314
c
315
c     %-----------------------%
316
c     | Executable Statements |
317
c     %-----------------------%
318
c
319
      if (first) then
320
c
321
c        %-----------------------------------------%
322
c        | Set machine-dependent constants for the |
323
c        | the splitting and deflation criterion.  |
324
c        | If norm(H) <= sqrt(OVFL),               |
325
c        | overflow should not occur.              |
326
c        | REFERENCE: LAPACK subroutine slahqr     |
327
c        %-----------------------------------------%
328
c
329
         unfl = pslamch10(comm, 'safe minimum' )
330
         ovfl = one / unfl
331
         call slabad( unfl, ovfl )
332
         ulp = pslamch10( comm, 'precision' )
333
         smlnum = unfl*( n / ulp )
334
         first = .false.
335
      end if
336
c
337
      if (ido .eq. 0) then
338
c
339
c        %-------------------------------%
340
c        | Initialize timing statistics  |
341
c        | & message level for debugging |
342
c        %-------------------------------%
343
c
344
         call second (t0)
345
         msglvl = mnaitr
346
c
347
c        %------------------------------%
348
c        | Initial call to this routine |
349
c        %------------------------------%
350
c
351
         info   = 0
352
         step3  = .false.
353
         step4  = .false.
354
         rstart = .false.
355
         orth1  = .false.
356
         orth2  = .false.
357
         j      = k + 1
358
         ipj    = 1
359
         irj    = ipj   + n
360
         ivj    = irj   + n
361
      end if
362
c
363
c     %-------------------------------------------------%
364
c     | When in reverse communication mode one of:      |
365
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
366
c     | will be .true. when ....                        |
367
c     | STEP3: return from computing OP*v_{j}.          |
368
c     | STEP4: return from computing B-norm of OP*v_{j} |
369
c     | ORTH1: return from computing B-norm of r_{j+1}  |
370
c     | ORTH2: return from computing B-norm of          |
371
c     |        correction to the residual vector.       |
372
c     | RSTART: return from OP computations needed by   |
373
c     |         psgetv0.                                |
374
c     %-------------------------------------------------%
375
c
376
      if (step3)  go to 50
377
      if (step4)  go to 60
378
      if (orth1)  go to 70
379
      if (orth2)  go to 90
380
      if (rstart) go to 30
381
c
382
c     %-----------------------------%
383
c     | Else this is the first step |
384
c     %-----------------------------%
385
c
386
c     %--------------------------------------------------------------%
387
c     |                                                              |
388
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
389
c     |                                                              |
390
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
391
c     %--------------------------------------------------------------%
392

393
 1000 continue
394
c
395
         if (msglvl .gt. 1) then
396
            call pivout (comm, logfil, 1, [j], ndigit,
397
     &                  '_naitr: generating Arnoldi vector number')
398
            call psvout (comm, logfil, 1, [rnorm], ndigit,
399
     &                  '_naitr: B-norm of the current residual is')
400
         end if
401
c
402
c        %---------------------------------------------------%
403
c        | STEP 1: Check if the B norm of j-th residual      |
404
c        | vector is zero. Equivalent to determing whether   |
405
c        | an exact j-step Arnoldi factorization is present. |
406
c        %---------------------------------------------------%
407
c
408
         betaj = rnorm
409
         if (rnorm .gt. zero) go to 40
410
c
411
c           %---------------------------------------------------%
412
c           | Invariant subspace found, generate a new starting |
413
c           | vector which is orthogonal to the current Arnoldi |
414
c           | basis and continue the iteration.                 |
415
c           %---------------------------------------------------%
416
c
417
            if (msglvl .gt. 0) then
418
               call pivout (comm, logfil, 1, [j], ndigit,
419
     &                     '_naitr: ****** RESTART AT STEP ******')
420
            end if
421
c
422
c           %---------------------------------------------%
423
c           | ITRY is the loop variable that controls the |
424
c           | maximum amount of times that a restart is   |
425
c           | attempted. NRSTRT is used by stat.h         |
426
c           %---------------------------------------------%
427
c
428
            betaj  = zero
429
            nrstrt = nrstrt + 1
430
            itry   = 1
431
   20       continue
432
            rstart = .true.
433
            ido    = 0
434
   30       continue
435
c
436
c           %--------------------------------------%
437
c           | If in reverse communication mode and |
438
c           | RSTART = .true. flow returns here.   |
439
c           %--------------------------------------%
440
c
441
            call psgetv0 ( comm, ido, bmat, itry, .false., n, j, v, ldv,
442
     &                     resid, rnorm, ipntr, workd, workl, ierr)
443
            if (ido .ne. 99) go to 9000
444
            if (ierr .lt. 0) then
445
               itry = itry + 1
446
               if (itry .le. 3) go to 20
447
c
448
c              %------------------------------------------------%
449
c              | Give up after several restart attempts.        |
450
c              | Set INFO to the size of the invariant subspace |
451
c              | which spans OP and exit.                       |
452
c              %------------------------------------------------%
453
c
454
               info = j - 1
455
               call second (t1)
456
               tnaitr = tnaitr + (t1 - t0)
457
               ido = 99
458
               go to 9000
459
            end if
460
c
461
   40    continue
462
c
463
c        %---------------------------------------------------------%
464
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
465
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
466
c        | when reciprocating a small RNORM, test against lower    |
467
c        | machine bound.                                          |
468
c        %---------------------------------------------------------%
469
c
470
         call scopy (n, resid, 1, v(1,j), 1)
471
         if (rnorm .ge. unfl) then
472
             temp1 = one / rnorm
473
             call sscal (n, temp1, v(1,j), 1)
474
             call sscal (n, temp1, workd(ipj), 1)
475
         else
476
c
477
c            %-----------------------------------------%
478
c            | To scale both v_{j} and p_{j} carefully |
479
c            | use LAPACK routine SLASCL               |
480
c            %-----------------------------------------%
481
c
482
             call slascl ('General', i, i, rnorm, one, n, 1,
483
     &                    v(1,j), n, infol)
484
             call slascl ('General', i, i, rnorm, one, n, 1,
485
     &                    workd(ipj), n, infol)
486
         end if
487
c
488
c        %------------------------------------------------------%
489
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
490
c        | Note that this is not quite yet r_{j}. See STEP 4    |
491
c        %------------------------------------------------------%
492
c
493
         step3 = .true.
494
         nopx  = nopx + 1
495
         call second (t2)
496
         call scopy (n, v(1,j), 1, workd(ivj), 1)
497
         ipntr(1) = ivj
498
         ipntr(2) = irj
499
         ipntr(3) = ipj
500
         ido = 1
501
c
502
c        %-----------------------------------%
503
c        | Exit in order to compute OP*v_{j} |
504
c        %-----------------------------------%
505
c
506
         go to 9000
507
   50    continue
508
c
509
c        %----------------------------------%
510
c        | Back from reverse communication; |
511
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   |
512
c        | if step3 = .true.                |
513
c        %----------------------------------%
514
c
515
         call second (t3)
516
         tmvopx = tmvopx + (t3 - t2)
517

518
         step3 = .false.
519
c
520
c        %------------------------------------------%
521
c        | Put another copy of OP*v_{j} into RESID. |
522
c        %------------------------------------------%
523
c
524
         call scopy (n, workd(irj), 1, resid, 1)
525
c
526
c        %---------------------------------------%
527
c        | STEP 4:  Finish extending the Arnoldi |
528
c        |          factorization to length j.   |
529
c        %---------------------------------------%
530
c
531
         call second (t2)
532
         if (bmat .eq. 'G') then
533
            nbx = nbx + 1
534
            step4 = .true.
535
            ipntr(1) = irj
536
            ipntr(2) = ipj
537
            ido = 2
538
c
539
c           %-------------------------------------%
540
c           | Exit in order to compute B*OP*v_{j} |
541
c           %-------------------------------------%
542
c
543
            go to 9000
544
         else if (bmat .eq. 'I') then
545
            call scopy (n, resid, 1, workd(ipj), 1)
546
         end if
547
   60    continue
548
c
549
c        %----------------------------------%
550
c        | Back from reverse communication; |
551
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
552
c        | if step4 = .true.                |
553
c        %----------------------------------%
554
c
555
         if (bmat .eq. 'G') then
556
            call second (t3)
557
            tmvbx = tmvbx + (t3 - t2)
558
         end if
559
c
560
         step4 = .false.
561
c
562
c        %-------------------------------------%
563
c        | The following is needed for STEP 5. |
564
c        | Compute the B-norm of OP*v_{j}.     |
565
c        %-------------------------------------%
566
c
567
         if (bmat .eq. 'G') then
568
            rnorm_buf = sdot (n, resid, 1, workd(ipj), 1)
569
            call MPI_ALLREDUCE( rnorm_buf, wnorm, 1,
570
     &           MPI_REAL, MPI_SUM, comm, ierr )
571
            wnorm = sqrt(abs(wnorm))
572
         else if (bmat .eq. 'I') then
573
            wnorm = psnorm2( comm, n, resid, 1 )
574
         end if
575
c
576
c        %-----------------------------------------%
577
c        | Compute the j-th residual corresponding |
578
c        | to the j step factorization.            |
579
c        | Use Classical Gram Schmidt and compute: |
580
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
581
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
582
c        %-----------------------------------------%
583
c
584
c
585
c        %------------------------------------------%
586
c        | Compute the j Fourier coefficients w_{j} |
587
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
588
c        %------------------------------------------%
589
c
590
         call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
591
     &               zero, workl, 1)
592
         call MPI_ALLREDUCE( workl, h(1,j), j,
593
     &               MPI_REAL, MPI_SUM, comm, ierr)
594
c
595
c        %--------------------------------------%
596
c        | Orthogonalize r_{j} against V_{j}.   |
597
c        | RESID contains OP*v_{j}. See STEP 3. |
598
c        %--------------------------------------%
599
c
600
         call sgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
601
     &               one, resid, 1)
602
c
603
         if (j .gt. 1) h(j,j-1) = betaj
604
c
605
         call second (t4)
606
c
607
         orth1 = .true.
608
c
609
         call second (t2)
610
         if (bmat .eq. 'G') then
611
            nbx = nbx + 1
612
            call scopy (n, resid, 1, workd(irj), 1)
613
            ipntr(1) = irj
614
            ipntr(2) = ipj
615
            ido = 2
616
c
617
c           %----------------------------------%
618
c           | Exit in order to compute B*r_{j} |
619
c           %----------------------------------%
620
c
621
            go to 9000
622
         else if (bmat .eq. 'I') then
623
            call scopy (n, resid, 1, workd(ipj), 1)
624
         end if
625
   70    continue
626
c
627
c        %---------------------------------------------------%
628
c        | Back from reverse communication if ORTH1 = .true. |
629
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
630
c        %---------------------------------------------------%
631
c
632
         if (bmat .eq. 'G') then
633
            call second (t3)
634
            tmvbx = tmvbx + (t3 - t2)
635
         end if
636
c
637
         orth1 = .false.
638
c
639
c        %------------------------------%
640
c        | Compute the B-norm of r_{j}. |
641
c        %------------------------------%
642
c
643
         if (bmat .eq. 'G') then
644
            rnorm_buf = sdot (n, resid, 1, workd(ipj), 1)
645
            call MPI_ALLREDUCE( rnorm_buf, rnorm, 1,
646
     &           MPI_REAL, MPI_SUM, comm, ierr )
647
            rnorm = sqrt(abs(rnorm))
648
         else if (bmat .eq. 'I') then
649
            rnorm = psnorm2( comm, n, resid, 1 )
650
         end if
651
c
652
c        %-----------------------------------------------------------%
653
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
654
c        | Maximum NITER_ITREF tries.                                |
655
c        |                                                           |
656
c        |          s      = V_{j}^T * B * r_{j}                     |
657
c        |          r_{j}  = r_{j} - V_{j}*s                         |
658
c        |          alphaj = alphaj + s_{j}                          |
659
c        |                                                           |
660
c        | The stopping criteria used for iterative refinement is    |
661
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
662
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
663
c        | Determine if we need to correct the residual. The goal is |
664
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
665
c        | The following test determines whether the sine of the     |
666
c        | angle between  OP*x and the computed residual is less     |
667
c        | than or equal to 0.717.                                   |
668
c        %-----------------------------------------------------------%
669
c
670
         if (rnorm .gt. 0.717*wnorm) go to 100
671
         iter  = 0
672
         nrorth = nrorth + 1
673
c
674
c        %---------------------------------------------------%
675
c        | Enter the Iterative refinement phase. If further  |
676
c        | refinement is necessary, loop back here. The loop |
677
c        | variable is ITER. Perform a step of Classical     |
678
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
679
c        %---------------------------------------------------%
680
c
681
   80    continue
682
c
683
         if (msglvl .gt. 2) then
684
            xtemp(1) = wnorm
685
            xtemp(2) = rnorm
686
            call psvout (comm, logfil, 2, xtemp, ndigit,
687
     &           '_naitr: re-orthonalization; wnorm and rnorm are')
688
            call psvout (comm, logfil, j, h(1,j), ndigit,
689
     &                  '_naitr: j-th column of H')
690
         end if
691
c
692
c        %----------------------------------------------------%
693
c        | Compute V_{j}^T * B * r_{j}.                       |
694
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
695
c        %----------------------------------------------------%
696
c
697
         call sgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
698
     &               zero, workl(j+1), 1)
699
         call MPI_ALLREDUCE( workl(j+1), workl(1), j,
700
     &               MPI_REAL, MPI_SUM, comm, ierr)
701
c
702
c        %---------------------------------------------%
703
c        | Compute the correction to the residual:     |
704
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
705
c        | The correction to H is v(:,1:J)*H(1:J,1:J)  |
706
c        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         |
707
c        %---------------------------------------------%
708
c
709
         call sgemv ('N', n, j, -one, v, ldv, workl(1), 1,
710
     &               one, resid, 1)
711
         call saxpy (j, one, workl(1), 1, h(1,j), 1)
712
c
713
         orth2 = .true.
714
         call second (t2)
715
         if (bmat .eq. 'G') then
716
            nbx = nbx + 1
717
            call scopy (n, resid, 1, workd(irj), 1)
718
            ipntr(1) = irj
719
            ipntr(2) = ipj
720
            ido = 2
721
c
722
c           %-----------------------------------%
723
c           | Exit in order to compute B*r_{j}. |
724
c           | r_{j} is the corrected residual.  |
725
c           %-----------------------------------%
726
c
727
            go to 9000
728
         else if (bmat .eq. 'I') then
729
            call scopy (n, resid, 1, workd(ipj), 1)
730
         end if
731
   90    continue
732
c
733
c        %---------------------------------------------------%
734
c        | Back from reverse communication if ORTH2 = .true. |
735
c        %---------------------------------------------------%
736
c
737
         if (bmat .eq. 'G') then
738
            call second (t3)
739
            tmvbx = tmvbx + (t3 - t2)
740
         end if
741
c
742
c        %-----------------------------------------------------%
743
c        | Compute the B-norm of the corrected residual r_{j}. |
744
c        %-----------------------------------------------------%
745
c
746
         if (bmat .eq. 'G') then
747
           rnorm_buf = sdot (n, resid, 1, workd(ipj), 1)
748
           call MPI_ALLREDUCE( rnorm_buf, rnorm1, 1,
749
     &          MPI_REAL, MPI_SUM, comm, ierr )
750
           rnorm1 = sqrt(abs(rnorm1))
751
         else if (bmat .eq. 'I') then
752
           rnorm1 = psnorm2( comm, n, resid, 1 )
753
         end if
754
c
755
         if (msglvl .gt. 0 .and. iter .gt. 0) then
756
            call pivout (comm, logfil, 1, [j], ndigit,
757
     &           '_naitr: Iterative refinement for Arnoldi residual')
758
            if (msglvl .gt. 2) then
759
                xtemp(1) = rnorm
760
                xtemp(2) = rnorm1
761
                call psvout (comm, logfil, 2, xtemp, ndigit,
762
     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
763
            end if
764
         end if
765
c
766
c        %-----------------------------------------%
767
c        | Determine if we need to perform another |
768
c        | step of re-orthogonalization.           |
769
c        %-----------------------------------------%
770
c
771
         if (rnorm1 .gt. 0.717*rnorm) then
772
c
773
c           %---------------------------------------%
774
c           | No need for further refinement.       |
775
c           | The cosine of the angle between the   |
776
c           | corrected residual vector and the old |
777
c           | residual vector is greater than 0.717 |
778
c           | In other words the corrected residual |
779
c           | and the old residual vector share an  |
780
c           | angle of less than arcCOS(0.717)      |
781
c           %---------------------------------------%
782
c
783
            rnorm = rnorm1
784
c
785
         else
786
c
787
c           %-------------------------------------------%
788
c           | Another step of iterative refinement step |
789
c           | is required. NITREF is used by stat.h     |
790
c           %-------------------------------------------%
791
c
792
            nitref = nitref + 1
793
            rnorm  = rnorm1
794
            iter   = iter + 1
795
            if (iter .le. 1) go to 80
796
c
797
c           %-------------------------------------------------%
798
c           | Otherwise RESID is numerically in the span of V |
799
c           %-------------------------------------------------%
800
c
801
            do 95 jj = 1, n
802
               resid(jj) = zero
803
  95        continue
804
            rnorm = zero
805
         end if
806
c
807
c        %----------------------------------------------%
808
c        | Branch here directly if iterative refinement |
809
c        | wasn't necessary or after at most NITER_REF  |
810
c        | steps of iterative refinement.               |
811
c        %----------------------------------------------%
812
c
813
  100    continue
814
c
815
         rstart = .false.
816
         orth2  = .false.
817
c
818
         call second (t5)
819
         titref = titref + (t5 - t4)
820
c
821
c        %------------------------------------%
822
c        | STEP 6: Update  j = j+1;  Continue |
823
c        %------------------------------------%
824
c
825
         j = j + 1
826
         if (j .gt. k+np) then
827
            call second (t1)
828
            tnaitr = tnaitr + (t1 - t0)
829
            ido = 99
830
            do 110 i = max(1,k), k+np-1
831
c
832
c              %--------------------------------------------%
833
c              | Check for splitting and deflation.         |
834
c              | Use a standard test as in the QR algorithm |
835
c              | REFERENCE: LAPACK subroutine slahqr        |
836
c              %--------------------------------------------%
837
c
838
               tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
839
               if( tst1.eq.zero )
840
     &              tst1 = slanhs( '1', k+np, h, ldh, workd(n+1) )
841
               if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) )
842
     &              h(i+1,i) = zero
843
 110        continue
844
c
845
            if (msglvl .gt. 2) then
846
               call psmout (comm, logfil, k+np, k+np, h, ldh, ndigit,
847
     &          '_naitr: Final upper Hessenberg matrix H of order K+NP')
848
            end if
849
c
850
            go to 9000
851
         end if
852
c
853
c        %--------------------------------------------------------%
854
c        | Loop back to extend the factorization by another step. |
855
c        %--------------------------------------------------------%
856
c
857
      go to 1000
858
c
859
c     %---------------------------------------------------------------%
860
c     |                                                               |
861
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
862
c     |                                                               |
863
c     %---------------------------------------------------------------%
864
c
865
 9000 continue
866
      return
867
c
868
c     %----------------%
869
c     | End of psnaitr |
870
c     %----------------%
871
c
872
      end
873

874
Product

Resources

Company