CoCalc -- pcnaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pcnaitr.f
⁵²¹³ views
1
c\BeginDoc
2
c
3
c\Name: pcnaitr
4
c
5
c Message Passing Layer: MPI
6
c
7
c\Description:
8
c  Reverse communication interface for applying NP additional steps to
9
c  a K step nonsymmetric Arnoldi factorization.
10
c
11
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
12
c
13
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
14
c
15
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
16
c
17
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
18
c
19
c  where OP and B are as in pcnaupd.  The B-norm of r_{k+p} is also
20
c  computed and returned.
21
c
22
c\Usage:
23
c  call pcnaitr
24
c     ( COMM, IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
25
c       IPNTR, WORKD, WORKL, INFO )
26
c
27
c\Arguments
28
c  COMM    MPI Communicator for the processor grid.  (INPUT)
29
c
30
c  IDO     Integer.  (INPUT/OUTPUT)
31
c          Reverse communication flag.
32
c          -------------------------------------------------------------
33
c          IDO =  0: first call to the reverse communication interface
34
c          IDO = -1: compute  Y = OP * X  where
35
c                    IPNTR(1) is the pointer into WORK for X,
36
c                    IPNTR(2) is the pointer into WORK for Y.
37
c                    This is for the restart phase to force the new
38
c                    starting vector into the range of OP.
39
c          IDO =  1: compute  Y = OP * X  where
40
c                    IPNTR(1) is the pointer into WORK for X,
41
c                    IPNTR(2) is the pointer into WORK for Y,
42
c                    IPNTR(3) is the pointer into WORK for B * X.
43
c          IDO =  2: compute  Y = B * X  where
44
c                    IPNTR(1) is the pointer into WORK for X,
45
c                    IPNTR(2) is the pointer into WORK for Y.
46
c          IDO = 99: done
47
c          -------------------------------------------------------------
48
c          When the routine is used in the "shift-and-invert" mode, the
49
c          vector B * Q is already available and do not need to be
50
c          recomputed in forming OP * Q.
51
c
52
c  BMAT    Character*1.  (INPUT)
53
c          BMAT specifies the type of the matrix B that defines the
54
c          semi-inner product for the operator OP.  See pcnaupd.
55
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
56
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
57
c
58
c  N       Integer.  (INPUT)
59
c          Dimension of the eigenproblem.
60
c
61
c  K       Integer.  (INPUT)
62
c          Current size of V and H.
63
c
64
c  NP      Integer.  (INPUT)
65
c          Number of additional Arnoldi steps to take.
66
c
67
c  NB      Integer.  (INPUT)
68
c          Blocksize to be used in the recurrence.
69
c          Only work for NB = 1 right now.  The goal is to have a
70
c          program that implement both the block and non-block method.
71
c
72
c  RESID   Complex array of length N.  (INPUT/OUTPUT)
73
c          On INPUT:  RESID contains the residual vector r_{k}.
74
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
75
c
76
c  RNORM   Real scalar.  (INPUT/OUTPUT)
77
c          B-norm of the starting residual on input.
78
c          B-norm of the updated residual r_{k+p} on output.
79
c
80
c  V       Complex N by K+NP array.  (INPUT/OUTPUT)
81
c          On INPUT:  V contains the Arnoldi vectors in the first K
82
c          columns.
83
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
84
c          NP columns.  The first K columns are unchanged.
85
c
86
c  LDV     Integer.  (INPUT)
87
c          Leading dimension of V exactly as declared in the calling
88
c          program.
89
c
90
c  H       Complex (K+NP) by (K+NP) array.  (INPUT/OUTPUT)
91
c          H is used to store the generated upper Hessenberg matrix.
92
c
93
c  LDH     Integer.  (INPUT)
94
c          Leading dimension of H exactly as declared in the calling
95
c          program.
96
c
97
c  IPNTR   Integer array of length 3.  (OUTPUT)
98
c          Pointer to mark the starting locations in the WORK for
99
c          vectors used by the Arnoldi iteration.
100
c          -------------------------------------------------------------
101
c          IPNTR(1): pointer to the current operand vector X.
102
c          IPNTR(2): pointer to the current result vector Y.
103
c          IPNTR(3): pointer to the vector B * X when used in the
104
c                    shift-and-invert mode.  X is the current operand.
105
c          -------------------------------------------------------------
106
c
107
c  WORKD   Complex work array of length 3*N.  (REVERSE COMMUNICATION)
108
c          Distributed array to be used in the basic Arnoldi iteration
109
c          for reverse communication.  The calling program should not
110
c          use WORKD as temporary workspace during the iteration !!!!!!
111
c          On input, WORKD(1:N) = B*RESID and is used to save some
112
c          computation at the first step.
113
c
114
c  WORKL   Complex work space used for Gram Schmidt orthogonalization
115
c
116
c  INFO    Integer.  (OUTPUT)
117
c          = 0: Normal exit.
118
c          > 0: Size of the spanning invariant subspace of OP found.
119
c
120
c\EndDoc
121
c
122
c-----------------------------------------------------------------------
123
c
124
c\BeginLib
125
c
126
c\Local variables:
127
c     xxxxxx  Complex
128
c
129
c\References:
130
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
131
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
132
c     pp 357-385.
133
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
134
c     Restarted Arnoldi Iteration", Rice University Technical Report
135
c     TR95-13, Department of Computational and Applied Mathematics.
136
c
137
c\Routines called:
138
c     pcgetv0   Parallel ARPACK routine to generate the initial vector.
139
c     pivout    Parallel ARPACK utility routine that prints integers.
140
c     second    ARPACK utility routine for timing.
141
c     pcmout    Parallel ARPACK utility routine that prints matrices
142
c     pcvout    Parallel ARPACK utility routine that prints vectors.
143
c     clanhs    LAPACK routine that computes various norms of a matrix.
144
c     clascl    LAPACK routine for careful scaling of a matrix.
145
c     slabad    LAPACK routine for defining the underflow and overflow
146
c               limits
147
c     pslamch   ScaLAPACK routine that determines machine constants.
148
c     slapy2    LAPACK routine to compute sqrt(x**2+y**2) carefully.
149
c     cgemv     Level 2 BLAS routine for matrix vector multiplication.
150
c     caxpy     Level 1 BLAS that computes a vector triad.
151
c     ccopy     Level 1 BLAS that copies one vector to another .
152
c     cdotc     Level 1 BLAS that computes the scalar product of
153
c               two vectors.
154
c     cscal     Level 1 BLAS that scales a vector.
155
c     csscal    Level 1 BLAS that scales a complex vector by a real number.
156
c     pscnorm2  Parallel version of Level 1 BLAS that computes the
157
c               norm of a vector.
158
c
159
c\Author
160
c     Danny Sorensen               Phuong Vu
161
c     Richard Lehoucq              CRPC / Rice University
162
c     Dept. of Computational &     Houston, Texas
163
c     Applied Mathematics
164
c     Rice University
165
c     Houston, Texas
166
c
167
c\Parallel Modifications
168
c     Kristi Maschhoff
169
c
170
c\Revision history:
171
c     Starting Point: Complex Code FILE: naitr.F   SID: 2.1
172
c
173
c\SCCS Information:
174
c FILE: naitr.F   SID: 1.3   DATE OF SID: 3/19/97
175
c
176
c\Remarks
177
c  The algorithm implemented is:
178
c
179
c  restart = .false.
180
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
181
c  r_{k} contains the initial residual vector even for k = 0;
182
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
183
c  computed by the calling program.
184
c
185
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
186
c  For  j = k+1, ..., k+np  Do
187
c     1) if ( betaj < tol ) stop or restart depending on j.
188
c        ( At present tol is zero )
189
c        if ( restart ) generate a new starting vector.
190
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
191
c        p_{j} = p_{j}/betaj
192
c     3) r_{j} = OP*v_{j} where OP is defined as in pcnaupd
193
c        For shift-invert mode p_{j} = B*v_{j} is already available.
194
c        wnorm = || OP*v_{j} ||
195
c     4) Compute the j-th step residual vector.
196
c        w_{j} =  V_{j}^T * B * OP * v_{j}
197
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
198
c        H(:,j) = w_{j};
199
c        H(j,j-1) = rnorm
200
c        rnorm = || r_(j) ||
201
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
202
c     5) Re-orthogonalization step:
203
c        s = V_{j}'*B*r_{j}
204
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
205
c        alphaj = alphaj + s_{j};
206
c     6) Iterative refinement step:
207
c        If (rnorm1 > 0.717*rnorm) then
208
c           rnorm = rnorm1
209
c           accept step and go back to 1)
210
c        Else
211
c           rnorm = rnorm1
212
c           If this is the first time in step 6), go to 5)
213
c           Else r_{j} lies in the span of V_{j} numerically.
214
c              Set r_{j} = 0 and rnorm = 0; go to 1)
215
c        EndIf
216
c  End Do
217
c
218
c\EndLib
219
c
220
c-----------------------------------------------------------------------
221
c
222
      subroutine pcnaitr
223
     &   (comm, ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
224
     &    ipntr, workd, workl, info)
225
c
226
      include   'mpif.h'
227
c
228
c     %---------------%
229
c     | MPI Variables |
230
c     %---------------%
231
c
232
      integer    comm
233
c
234
c     %----------------------------------------------------%
235
c     | Include files for debugging and timing information |
236
c     %----------------------------------------------------%
237
c
238
      include   'debug.h'
239
      include   'stat.h'
240
c
241
c     %------------------%
242
c     | Scalar Arguments |
243
c     %------------------%
244
c
245
      character  bmat*1
246
      integer    ido, info, k, ldh, ldv, n, nb, np
247
      Real
248
     &           rnorm
249
c
250
c     %-----------------%
251
c     | Array Arguments |
252
c     %-----------------%
253
c
254
      integer    ipntr(3)
255
      Complex
256
     &           h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n),
257
     &           workl(2*ldh)
258
c
259
c     %------------%
260
c     | Parameters |
261
c     %------------%
262
c
263
      Complex
264
     &           one, zero
265
      Real
266
     &           rone, rzero
267
      parameter (one = (1.0, 0.0), zero = (0.0, 0.0),
268
     &           rone = 1.0, rzero = 0.0)
269
c
270
c     %--------------%
271
c     | Local Arrays |
272
c     %--------------%
273
c
274
      Real
275
     &           rtemp(2)
276
c
277
c     %---------------%
278
c     | Local Scalars |
279
c     %---------------%
280
c
281
      logical    first, orth1, orth2, rstart, step3, step4
282
      integer    ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
283
     &           jj
284
      Real
285
     &           ovfl, smlnum, tst1, ulp, unfl, betaj,
286
     &           temp1, rnorm1, wnorm
287
      Complex
288
     &           cnorm
289
c
290
      save       first, orth1, orth2, rstart, step3, step4,
291
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
292
     &           betaj, rnorm1, smlnum, ulp, unfl, wnorm
293
c
294
      Complex
295
     &           cnorm_buf
296
c
297
c     %----------------------%
298
c     | External Subroutines |
299
c     %----------------------%
300
c
301
      external   caxpy, ccopy, cscal, cgemv, pcgetv0, slabad,
302
     &           csscal, pcvout, pcmout, pivout, second
303
c
304
c     %--------------------%
305
c     | External Functions |
306
c     %--------------------%
307
c
308
      Complex
309
     &           cdotc
310
      Real
311
     &           pslamch10, pscnorm2, clanhs, slapy2
312
      external   cdotc, pscnorm2, clanhs, pslamch10, slapy2
313
c
314
c     %---------------------%
315
c     | Intrinsic Functions |
316
c     %---------------------%
317
c
318
      intrinsic  aimag, real, max, sqrt
319
c
320
c     %-----------------%
321
c     | Data statements |
322
c     %-----------------%
323
c
324
      data       first / .true. /
325
c
326
c     %-----------------------%
327
c     | Executable Statements |
328
c     %-----------------------%
329
c
330
      if (first) then
331
c
332
c        %-----------------------------------------%
333
c        | Set machine-dependent constants for the |
334
c        | the splitting and deflation criterion.  |
335
c        | If norm(H) <= sqrt(OVFL),               |
336
c        | overflow should not occur.              |
337
c        | REFERENCE: LAPACK subroutine clahqr     |
338
c        %-----------------------------------------%
339
c
340
         unfl = pslamch10(comm,  'safe minimum' )
341
         ovfl = real(one / unfl)
342
         call slabad( unfl, ovfl )
343
         ulp = pslamch10( comm, 'precision' )
344
         smlnum = unfl*( n / ulp )
345
         first = .false.
346
      end if
347
c
348
      if (ido .eq. 0) then
349
c
350
c        %-------------------------------%
351
c        | Initialize timing statistics  |
352
c        | & message level for debugging |
353
c        %-------------------------------%
354
c
355
         call second (t0)
356
         msglvl = mcaitr
357
c
358
c        %------------------------------%
359
c        | Initial call to this routine |
360
c        %------------------------------%
361
c
362
         info   = 0
363
         step3  = .false.
364
         step4  = .false.
365
         rstart = .false.
366
         orth1  = .false.
367
         orth2  = .false.
368
         j      = k + 1
369
         ipj    = 1
370
         irj    = ipj   + n
371
         ivj    = irj   + n
372
      end if
373
c
374
c     %-------------------------------------------------%
375
c     | When in reverse communication mode one of:      |
376
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
377
c     | will be .true. when ....                        |
378
c     | STEP3: return from computing OP*v_{j}.          |
379
c     | STEP4: return from computing B-norm of OP*v_{j} |
380
c     | ORTH1: return from computing B-norm of r_{j+1}  |
381
c     | ORTH2: return from computing B-norm of          |
382
c     |        correction to the residual vector.       |
383
c     | RSTART: return from OP computations needed by   |
384
c     |         pcgetv0.                                |
385
c     %-------------------------------------------------%
386
c
387
      if (step3)  go to 50
388
      if (step4)  go to 60
389
      if (orth1)  go to 70
390
      if (orth2)  go to 90
391
      if (rstart) go to 30
392
c
393
c     %-----------------------------%
394
c     | Else this is the first step |
395
c     %-----------------------------%
396
c
397
c     %--------------------------------------------------------------%
398
c     |                                                              |
399
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
400
c     |                                                              |
401
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
402
c     %--------------------------------------------------------------%
403

404
 1000 continue
405
c
406
         if (msglvl .gt. 1) then
407
            call pivout (comm, logfil, 1, [j], ndigit,
408
     &                  '_naitr: generating Arnoldi vector number')
409
            call pdvout (comm, logfil, 1, [rnorm], ndigit,
410
     &                  '_naitr: B-norm of the current residual is')
411
         end if
412
c
413
c        %---------------------------------------------------%
414
c        | STEP 1: Check if the B norm of j-th residual      |
415
c        | vector is zero. Equivalent to determine whether   |
416
c        | an exact j-step Arnoldi factorization is present. |
417
c        %---------------------------------------------------%
418
c
419
         betaj = rnorm
420
         if (rnorm .gt. rzero) go to 40
421
c
422
c           %---------------------------------------------------%
423
c           | Invariant subspace found, generate a new starting |
424
c           | vector which is orthogonal to the current Arnoldi |
425
c           | basis and continue the iteration.                 |
426
c           %---------------------------------------------------%
427
c
428
            if (msglvl .gt. 0) then
429
               call pivout (comm, logfil, 1, [j], ndigit,
430
     &                     '_naitr: ****** RESTART AT STEP ******')
431
            end if
432
c
433
c           %---------------------------------------------%
434
c           | ITRY is the loop variable that controls the |
435
c           | maximum amount of times that a restart is   |
436
c           | attempted. NRSTRT is used by stat.h         |
437
c           %---------------------------------------------%
438
c
439
            betaj  = rzero
440
            nrstrt = nrstrt + 1
441
            itry   = 1
442
   20       continue
443
            rstart = .true.
444
            ido    = 0
445
   30       continue
446
c
447
c           %--------------------------------------%
448
c           | If in reverse communication mode and |
449
c           | RSTART = .true. flow returns here.   |
450
c           %--------------------------------------%
451
c
452
            call pcgetv0 (comm, ido, bmat, itry, .false., n, j, v, ldv,
453
     &                   resid, rnorm, ipntr, workd, workl, ierr)
454
            if (ido .ne. 99) go to 9000
455
            if (ierr .lt. 0) then
456
               itry = itry + 1
457
               if (itry .le. 3) go to 20
458
c
459
c              %------------------------------------------------%
460
c              | Give up after several restart attempts.        |
461
c              | Set INFO to the size of the invariant subspace |
462
c              | which spans OP and exit.                       |
463
c              %------------------------------------------------%
464
c
465
               info = j - 1
466
               call second (t1)
467
               tcaitr = tcaitr + (t1 - t0)
468
               ido = 99
469
               go to 9000
470
            end if
471
c
472
   40    continue
473
c
474
c        %---------------------------------------------------------%
475
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
476
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
477
c        | when reciprocating a small RNORM, test against lower    |
478
c        | machine bound.                                          |
479
c        %---------------------------------------------------------%
480
c
481
         call ccopy (n, resid, 1, v(1,j), 1)
482
         if ( rnorm .ge. unfl) then
483
             temp1 = rone / rnorm
484
             call csscal (n, temp1, v(1,j), 1)
485
             call csscal (n, temp1, workd(ipj), 1)
486
         else
487
c
488
c            %-----------------------------------------%
489
c            | To scale both v_{j} and p_{j} carefully |
490
c            | use LAPACK routine clascl               |
491
c            %-----------------------------------------%
492
c
493
             call clascl ('General', i, i, rnorm, rone,
494
     &                    n, 1, v(1,j), n, infol)
495
             call clascl ('General', i, i, rnorm, rone,
496
     &                    n, 1, workd(ipj), n, infol)
497
         end if
498
c
499
c        %------------------------------------------------------%
500
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
501
c        | Note that this is not quite yet r_{j}. See STEP 4    |
502
c        %------------------------------------------------------%
503
c
504
         step3 = .true.
505
         nopx  = nopx + 1
506
         call second (t2)
507
         call ccopy (n, v(1,j), 1, workd(ivj), 1)
508
         ipntr(1) = ivj
509
         ipntr(2) = irj
510
         ipntr(3) = ipj
511
         ido = 1
512
c
513
c        %-----------------------------------%
514
c        | Exit in order to compute OP*v_{j} |
515
c        %-----------------------------------%
516
c
517
         go to 9000
518
   50    continue
519
c
520
c        %----------------------------------%
521
c        | Back from reverse communication; |
522
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   |
523
c        | if step3 = .true.                |
524
c        %----------------------------------%
525
c
526
         call second (t3)
527
         tmvopx = tmvopx + (t3 - t2)
528

529
         step3 = .false.
530
c
531
c        %------------------------------------------%
532
c        | Put another copy of OP*v_{j} into RESID. |
533
c        %------------------------------------------%
534
c
535
         call ccopy (n, workd(irj), 1, resid, 1)
536
c
537
c        %---------------------------------------%
538
c        | STEP 4:  Finish extending the Arnoldi |
539
c        |          factorization to length j.   |
540
c        %---------------------------------------%
541
c
542
         call second (t2)
543
         if (bmat .eq. 'G') then
544
            nbx = nbx + 1
545
            step4 = .true.
546
            ipntr(1) = irj
547
            ipntr(2) = ipj
548
            ido = 2
549
c
550
c           %-------------------------------------%
551
c           | Exit in order to compute B*OP*v_{j} |
552
c           %-------------------------------------%
553
c
554
            go to 9000
555
         else if (bmat .eq. 'I') then
556
            call ccopy (n, resid, 1, workd(ipj), 1)
557
         end if
558
   60    continue
559
c
560
c        %----------------------------------%
561
c        | Back from reverse communication; |
562
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
563
c        | if step4 = .true.                |
564
c        %----------------------------------%
565
c
566
         if (bmat .eq. 'G') then
567
            call second (t3)
568
            tmvbx = tmvbx + (t3 - t2)
569
         end if
570
c
571
         step4 = .false.
572
c
573
c        %-------------------------------------%
574
c        | The following is needed for STEP 5. |
575
c        | Compute the B-norm of OP*v_{j}.     |
576
c        %-------------------------------------%
577
c
578
         if (bmat .eq. 'G') then
579
             cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
580
            call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
581
     &           MPI_COMPLEX, MPI_SUM, comm, ierr )
582
             wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
583
         else if (bmat .eq. 'I') then
584
             wnorm = pscnorm2(comm, n, resid, 1)
585
         end if
586
c
587
c        %-----------------------------------------%
588
c        | Compute the j-th residual corresponding |
589
c        | to the j step factorization.            |
590
c        | Use Classical Gram Schmidt and compute: |
591
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
592
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
593
c        %-----------------------------------------%
594
c
595
c
596
c        %------------------------------------------%
597
c        | Compute the j Fourier coefficients w_{j} |
598
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
599
c        %------------------------------------------%
600
c
601
         call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
602
     &               zero, workl, 1)
603
         call MPI_ALLREDUCE( workl, h(1,j), j,
604
     &               MPI_COMPLEX, MPI_SUM, comm, ierr)
605
c
606
c        %--------------------------------------%
607
c        | Orthogonalize r_{j} against V_{j}.   |
608
c        | RESID contains OP*v_{j}. See STEP 3. |
609
c        %--------------------------------------%
610
c
611
         call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
612
     &               one, resid, 1)
613
c
614
         if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero)
615
c
616
         call second (t4)
617
c
618
         orth1 = .true.
619
c
620
         call second (t2)
621
         if (bmat .eq. 'G') then
622
            nbx = nbx + 1
623
            call ccopy (n, resid, 1, workd(irj), 1)
624
            ipntr(1) = irj
625
            ipntr(2) = ipj
626
            ido = 2
627
c
628
c           %----------------------------------%
629
c           | Exit in order to compute B*r_{j} |
630
c           %----------------------------------%
631
c
632
            go to 9000
633
         else if (bmat .eq. 'I') then
634
            call ccopy (n, resid, 1, workd(ipj), 1)
635
         end if
636
   70    continue
637
c
638
c        %---------------------------------------------------%
639
c        | Back from reverse communication if ORTH1 = .true. |
640
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
641
c        %---------------------------------------------------%
642
c
643
         if (bmat .eq. 'G') then
644
            call second (t3)
645
            tmvbx = tmvbx + (t3 - t2)
646
         end if
647
c
648
         orth1 = .false.
649
c
650
c        %------------------------------%
651
c        | Compute the B-norm of r_{j}. |
652
c        %------------------------------%
653
c
654
         if (bmat .eq. 'G') then
655
            cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
656
            call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
657
     &           MPI_COMPLEX, MPI_SUM, comm, ierr )
658
            rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
659
         else if (bmat .eq. 'I') then
660
            rnorm = pscnorm2(comm, n, resid, 1)
661
         end if
662
c
663
c        %-----------------------------------------------------------%
664
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
665
c        | Maximum NITER_ITREF tries.                                |
666
c        |                                                           |
667
c        |          s      = V_{j}^T * B * r_{j}                     |
668
c        |          r_{j}  = r_{j} - V_{j}*s                         |
669
c        |          alphaj = alphaj + s_{j}                          |
670
c        |                                                           |
671
c        | The stopping criteria used for iterative refinement is    |
672
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
673
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
674
c        | Determine if we need to correct the residual. The goal is |
675
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
676
c        | The following test determines whether the sine of the     |
677
c        | angle between  OP*x and the computed residual is less     |
678
c        | than or equal to 0.717.                                   |
679
c        %-----------------------------------------------------------%
680
c
681
         if ( rnorm .gt. 0.717*wnorm ) go to 100
682
c
683
         iter  = 0
684
         nrorth = nrorth + 1
685
c
686
c        %---------------------------------------------------%
687
c        | Enter the Iterative refinement phase. If further  |
688
c        | refinement is necessary, loop back here. The loop |
689
c        | variable is ITER. Perform a step of Classical     |
690
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
691
c        %---------------------------------------------------%
692
c
693
   80    continue
694
c
695
         if (msglvl .gt. 2) then
696
            rtemp(1) = wnorm
697
            rtemp(2) = rnorm
698
            call psvout (comm, logfil, 2, rtemp, ndigit,
699
     &      '_naitr: re-orthogonalization; wnorm and rnorm are')
700
            call pcvout (comm, logfil, j, h(1,j), ndigit,
701
     &                  '_naitr: j-th column of H')
702
         end if
703
c
704
c        %----------------------------------------------------%
705
c        | Compute V_{j}^T * B * r_{j}.                       |
706
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
707
c        %----------------------------------------------------%
708
c
709
         call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
710
     &               zero, workl(j+1), 1)
711
         call MPI_ALLREDUCE( workl(j+1), workl(1), j,
712
     &               MPI_COMPLEX, MPI_SUM, comm, ierr)
713
c
714
c        %---------------------------------------------%
715
c        | Compute the correction to the residual:     |
716
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
717
c        | The correction to H is v(:,1:J)*H(1:J,1:J)  |
718
c        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         |
719
c        %---------------------------------------------%
720
c
721
         call cgemv ('N', n, j, -one, v, ldv, workl(1), 1,
722
     &               one, resid, 1)
723
         call caxpy (j, one, workl(1), 1, h(1,j), 1)
724
c
725
         orth2 = .true.
726
         call second (t2)
727
         if (bmat .eq. 'G') then
728
            nbx = nbx + 1
729
            call ccopy (n, resid, 1, workd(irj), 1)
730
            ipntr(1) = irj
731
            ipntr(2) = ipj
732
            ido = 2
733
c
734
c           %-----------------------------------%
735
c           | Exit in order to compute B*r_{j}. |
736
c           | r_{j} is the corrected residual.  |
737
c           %-----------------------------------%
738
c
739
            go to 9000
740
         else if (bmat .eq. 'I') then
741
            call ccopy (n, resid, 1, workd(ipj), 1)
742
         end if
743
   90    continue
744
c
745
c        %---------------------------------------------------%
746
c        | Back from reverse communication if ORTH2 = .true. |
747
c        %---------------------------------------------------%
748
c
749
         if (bmat .eq. 'G') then
750
            call second (t3)
751
            tmvbx = tmvbx + (t3 - t2)
752
         end if
753
c
754
c        %-----------------------------------------------------%
755
c        | Compute the B-norm of the corrected residual r_{j}. |
756
c        %-----------------------------------------------------%
757
c
758
         if (bmat .eq. 'G') then
759
             cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
760
            call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
761
     &           MPI_COMPLEX, MPI_SUM, comm, ierr )
762
             rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
763
         else if (bmat .eq. 'I') then
764
             rnorm1 = pscnorm2(comm, n, resid, 1)
765
         end if
766
c
767
         if (msglvl .gt. 0 .and. iter .gt. 0 ) then
768
            call pivout (comm, logfil, 1, [j], ndigit,
769
     &           '_naitr: Iterative refinement for Arnoldi residual')
770
            if (msglvl .gt. 2) then
771
                rtemp(1) = rnorm
772
                rtemp(2) = rnorm1
773
                call psvout (comm, logfil, 2, rtemp, ndigit,
774
     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
775
            end if
776
         end if
777
c
778
c        %-----------------------------------------%
779
c        | Determine if we need to perform another |
780
c        | step of re-orthogonalization.           |
781
c        %-----------------------------------------%
782
c
783
         if ( rnorm1 .gt. 0.717*rnorm ) then
784
c
785
c           %---------------------------------------%
786
c           | No need for further refinement.       |
787
c           | The cosine of the angle between the   |
788
c           | corrected residual vector and the old |
789
c           | residual vector is greater than 0.717 |
790
c           | In other words the corrected residual |
791
c           | and the old residual vector share an  |
792
c           | angle of less than arcCOS(0.717)      |
793
c           %---------------------------------------%
794
c
795
            rnorm = rnorm1
796
c
797
         else
798
c
799
c           %-------------------------------------------%
800
c           | Another step of iterative refinement step |
801
c           | is required. NITREF is used by stat.h     |
802
c           %-------------------------------------------%
803
c
804
            nitref = nitref + 1
805
            rnorm  = rnorm1
806
            iter   = iter + 1
807
            if (iter .le. 1) go to 80
808
c
809
c           %-------------------------------------------------%
810
c           | Otherwise RESID is numerically in the span of V |
811
c           %-------------------------------------------------%
812
c
813
            do 95 jj = 1, n
814
               resid(jj) = zero
815
  95        continue
816
            rnorm = rzero
817
         end if
818
c
819
c        %----------------------------------------------%
820
c        | Branch here directly if iterative refinement |
821
c        | wasn't necessary or after at most NITER_REF  |
822
c        | steps of iterative refinement.               |
823
c        %----------------------------------------------%
824
c
825
  100    continue
826
c
827
         rstart = .false.
828
         orth2  = .false.
829
c
830
         call second (t5)
831
         titref = titref + (t5 - t4)
832
c
833
c        %------------------------------------%
834
c        | STEP 6: Update  j = j+1;  Continue |
835
c        %------------------------------------%
836
c
837
         j = j + 1
838
         if (j .gt. k+np) then
839
            call second (t1)
840
            tcaitr = tcaitr + (t1 - t0)
841
            ido = 99
842
            do 110 i = max(1,k), k+np-1
843
c
844
c              %--------------------------------------------%
845
c              | Check for splitting and deflation.         |
846
c              | Use a standard test as in the QR algorithm |
847
c              | REFERENCE: LAPACK subroutine clahqr        |
848
c              %--------------------------------------------%
849
c
850
               tst1 = slapy2(real(h(i,i)),aimag(h(i,i)))
851
     &              + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1)))
852
               if( tst1.eq.real(zero) )
853
     &              tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) )
854
               if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le.
855
     &                    max( ulp*tst1, smlnum ) )
856
     &             h(i+1,i) = zero
857
 110        continue
858
c
859
            if (msglvl .gt. 2) then
860
               call pcmout (comm, logfil, k+np, k+np, h, ldh, ndigit,
861
     &          '_naitr: Final upper Hessenberg matrix H of order K+NP')
862
            end if
863
c
864
            go to 9000
865
         end if
866
c
867
c        %--------------------------------------------------------%
868
c        | Loop back to extend the factorization by another step. |
869
c        %--------------------------------------------------------%
870
c
871
      go to 1000
872
c
873
c     %---------------------------------------------------------------%
874
c     |                                                               |
875
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
876
c     |                                                               |
877
c     %---------------------------------------------------------------%
878
c
879
 9000 continue
880
      return
881
c
882
c     %----------------%
883
c     | End of pcnaitr |
884
c     %----------------%
885
c
886
      end
887

888
Product

Resources

Company