CoCalc -- pdsaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsaitr.f
⁵²¹⁸ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsaitr
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c  Reverse communication interface for applying NP additional steps to
10
c  a K step symmetric Arnoldi factorization.
11
c
12
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
13
c
14
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
15
c
16
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
17
c
18
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
19
c
20
c  where OP and B are as in pdsaupd.  The B-norm of r_{k+p} is also
21
c  computed and returned.
22
c
23
c\Usage:
24
c  call pdsaitr
25
c     ( COMM, IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH,
26
c       IPNTR, WORKD, WORKL, INFO )
27
c
28
c\Arguments
29
c  COMM    MPI Communicator for the processor grid.  (INPUT)
30
c
31
c  IDO     Integer.  (INPUT/OUTPUT)
32
c          Reverse communication flag.
33
c          -------------------------------------------------------------
34
c          IDO =  0: first call to the reverse communication interface
35
c          IDO = -1: compute  Y = OP * X  where
36
c                    IPNTR(1) is the pointer into WORK for X,
37
c                    IPNTR(2) is the pointer into WORK for Y.
38
c                    This is for the restart phase to force the new
39
c                    starting vector into the range of OP.
40
c          IDO =  1: compute  Y = OP * X  where
41
c                    IPNTR(1) is the pointer into WORK for X,
42
c                    IPNTR(2) is the pointer into WORK for Y,
43
c                    IPNTR(3) is the pointer into WORK for B * X.
44
c          IDO =  2: compute  Y = B * X  where
45
c                    IPNTR(1) is the pointer into WORK for X,
46
c                    IPNTR(2) is the pointer into WORK for Y.
47
c          IDO = 99: done
48
c          -------------------------------------------------------------
49
c          When the routine is used in the "shift-and-invert" mode, the
50
c          vector B * Q is already available and does not need to be
51
c          recomputed in forming OP * Q.
52
c
53
c  BMAT    Character*1.  (INPUT)
54
c          BMAT specifies the type of matrix B that defines the
55
c          semi-inner product for the operator OP.  See pdsaupd.
56
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
57
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M*x
58
c
59
c  N       Integer.  (INPUT)
60
c          Dimension of the eigenproblem.
61
c
62
c  K       Integer.  (INPUT)
63
c          Current order of H and the number of columns of V.
64
c
65
c  NP      Integer.  (INPUT)
66
c          Number of additional Arnoldi steps to take.
67
c
68
c  MODE    Integer.  (INPUT)
69
c          Signifies which form for "OP". If MODE=2 then
70
c          a reduction in the number of B matrix vector multiplies
71
c          is possible since the B-norm of OP*x is equivalent to
72
c          the inv(B)-norm of A*x.
73
c
74
c  RESID   Double precision array of length N.  (INPUT/OUTPUT)
75
c          On INPUT:  RESID contains the residual vector r_{k}.
76
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
77
c
78
c  RNORM   Double precision scalar.  (INPUT/OUTPUT)
79
c          On INPUT the B-norm of r_{k}.
80
c          On OUTPUT the B-norm of the updated residual r_{k+p}.
81
c
82
c  V       Double precision N by K+NP array.  (INPUT/OUTPUT)
83
c          On INPUT:  V contains the Arnoldi vectors in the first K
84
c          columns.
85
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
86
c          NP columns.  The first K columns are unchanged.
87
c
88
c  LDV     Integer.  (INPUT)
89
c          Leading dimension of V exactly as declared in the calling
90
c          program.
91
c
92
c  H       Double precision (K+NP) by 2 array.  (INPUT/OUTPUT)
93
c          H is used to store the generated symmetric tridiagonal matrix
94
c          with the subdiagonal in the first column starting at H(2,1)
95
c          and the main diagonal in the second column.
96
c
97
c  LDH     Integer.  (INPUT)
98
c          Leading dimension of H exactly as declared in the calling
99
c          program.
100
c
101
c  IPNTR   Integer array of length 3.  (OUTPUT)
102
c          Pointer to mark the starting locations in the WORK for
103
c          vectors used by the Arnoldi iteration.
104
c          -------------------------------------------------------------
105
c          IPNTR(1): pointer to the current operand vector X.
106
c          IPNTR(2): pointer to the current result vector Y.
107
c          IPNTR(3): pointer to the vector B * X when used in the
108
c                    shift-and-invert mode.  X is the current operand.
109
c          -------------------------------------------------------------
110
c
111
c  WORKD   Double precision work array of length 3*N.  (REVERSE COMMUNICATION)
112
c          Distributed array to be used in the basic Arnoldi iteration
113
c          for reverse communication.  The calling program should not
114
c          use WORKD as temporary workspace during the iteration !!!!!!
115
c          On INPUT, WORKD(1:N) = B*RESID where RESID is associated
116
c          with the K step Arnoldi factorization. Used to save some
117
c          computation at the first step.
118
c          On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated
119
c          with the K+NP step Arnoldi factorization.
120
c
121
c  WORKL   Double precision work space used for Gram Schmidt orthogonalization
122
c
123
c  INFO    Integer.  (OUTPUT)
124
c          = 0: Normal exit.
125
c          > 0: Size of an invariant subspace of OP is found that is
126
c               less than K + NP.
127
c
128
c\EndDoc
129
c
130
c-----------------------------------------------------------------------
131
c
132
c\BeginLib
133
c
134
c\Local variables:
135
c     xxxxxx  real
136
c
137
c\Routines called:
138
c     pdgetv0  Parallel ARPACK routine to generate the initial vector.
139
c     pivout   Parallel ARPACK utility routine that prints integers.
140
c     pdmout   Parallel ARPACK utility routine that prints matrices.
141
c     pdvout   Parallel ARPACK utility routine that prints vectors.
142
c     pdlamch  ScaLAPACK routine that determines machine constants.
143
c     dlascl   LAPACK routine for careful scaling of a matrix.
144
c     dgemv    Level 2 BLAS routine for matrix vector multiplication.
145
c     daxpy    Level 1 BLAS that computes a vector triad.
146
c     dscal    Level 1 BLAS that scales a vector.
147
c     dcopy    Level 1 BLAS that copies one vector to another .
148
c     ddot     Level 1 BLAS that computes the scalar product of two vectors.
149
c     pdnorm2  Parallel version of Level 1 BLAS that computes the norm of a vector.
150
c
151
c\Author
152
c     Danny Sorensen               Phuong Vu
153
c     Richard Lehoucq              CRPC / Rice University
154
c     Dept. of Computational &     Houston, Texas
155
c     Applied Mathematics
156
c     Rice University
157
c     Houston, Texas
158
c
159
c\Parallel Modifications
160
c     Kristi Maschhoff
161
c
162
c\Revision history:
163
c     Starting Point: Serial Code FILE: saitr.F   SID: 2.3
164
c
165
c\SCCS Information:
166
c FILE: saitr.F   SID: 1.3   DATE OF SID: 3/19/97
167
c
168
c\Remarks
169
c  The algorithm implemented is:
170
c
171
c  restart = .false.
172
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
173
c  r_{k} contains the initial residual vector even for k = 0;
174
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
175
c  computed by the calling program.
176
c
177
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
178
c  For  j = k+1, ..., k+np  Do
179
c     1) if ( betaj < tol ) stop or restart depending on j.
180
c        if ( restart ) generate a new starting vector.
181
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
182
c        p_{j} = p_{j}/betaj
183
c     3) r_{j} = OP*v_{j} where OP is defined as in pdsaupd
184
c        For shift-invert mode p_{j} = B*v_{j} is already available.
185
c        wnorm = || OP*v_{j} ||
186
c     4) Compute the j-th step residual vector.
187
c        w_{j} =  V_{j}^T * B * OP * v_{j}
188
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
189
c        alphaj <- j-th component of w_{j}
190
c        rnorm = || r_{j} ||
191
c        betaj+1 = rnorm
192
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
193
c     5) Re-orthogonalization step:
194
c        s = V_{j}'*B*r_{j}
195
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
196
c        alphaj = alphaj + s_{j};
197
c     6) Iterative refinement step:
198
c        If (rnorm1 > 0.717*rnorm) then
199
c           rnorm = rnorm1
200
c           accept step and go back to 1)
201
c        Else
202
c           rnorm = rnorm1
203
c           If this is the first time in step 6), go to 5)
204
c           Else r_{j} lies in the span of V_{j} numerically.
205
c              Set r_{j} = 0 and rnorm = 0; go to 1)
206
c        EndIf
207
c  End Do
208
c
209
c\EndLib
210
c
211
c-----------------------------------------------------------------------
212
c
213
      subroutine pdsaitr
214
     &   (comm, ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh,
215
     &    ipntr, workd, workl, info)
216
c
217
      include   'mpif.h'
218
c
219
c     %---------------%
220
c     | MPI Variables |
221
c     %---------------%
222
c
223
      integer    comm
224
c
225
c     %----------------------------------------------------%
226
c     | Include files for debugging and timing information |
227
c     %----------------------------------------------------%
228
c
229
      include   'debug.h'
230
      include   'stat.h'
231
c
232
c     %------------------%
233
c     | Scalar Arguments |
234
c     %------------------%
235
c
236
      character  bmat*1
237
      integer    ido, info, k, ldh, ldv, n, mode, np
238
      Double precision
239
     &           rnorm
240
c
241
c     %-----------------%
242
c     | Array Arguments |
243
c     %-----------------%
244
c
245
      integer    ipntr(3)
246
      Double precision
247
     &           h(ldh,2), resid(n), v(ldv,k+np), workd(3*n),
248
     &           workl(2*ldh)
249
c
250
c     %------------%
251
c     | Parameters |
252
c     %------------%
253
c
254
      Double precision
255
     &           one, zero
256
      parameter (one = 1.0, zero = 0.0)
257
c
258
c     %---------------%
259
c     | Local Scalars |
260
c     %---------------%
261
c
262
      logical    first, orth1, orth2, rstart, step3, step4
263
      integer    i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, infol,
264
     &           jj
265
      Double precision
266
     &           rnorm1, wnorm, safmin, temp1
267
      save       orth1, orth2, rstart, step3, step4,
268
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl,
269
     &           rnorm1, safmin, wnorm
270
c
271
      Double precision
272
     &           rnorm_buf
273
c
274
c     %-----------------------%
275
c     | Local Array Arguments |
276
c     %-----------------------%
277
c
278
      Double precision
279
     &           xtemp(2)
280
c
281
c     %----------------------%
282
c     | External Subroutines |
283
c     %----------------------%
284
c
285
      external   daxpy, dcopy, dscal, dgemv, pdgetv0, pdvout, pdmout,
286
     &           dlascl, pivout, second
287
c
288
c     %--------------------%
289
c     | External Functions |
290
c     %--------------------%
291
c
292
      Double precision
293
     &           ddot, pdnorm2, pdlamch10
294
      external   ddot, pdnorm2, pdlamch10
295
c
296
c     %-----------------%
297
c     | Data statements |
298
c     %-----------------%
299
c
300
      data      first / .true. /
301
c
302
c
303
c     %---------------------%
304
c     | Intrinsic Functions |
305
c     %---------------------%
306
c
307
      intrinsic    abs, sqrt
308
c
309
c     %-----------------------%
310
c     | Executable Statements |
311
c     %-----------------------%
312
c
313
      if (first) then
314
         first = .false.
315
c
316
c        %--------------------------------%
317
c        | safmin = safe minimum is such  |
318
c        | that 1/sfmin does not overflow |
319
c        %--------------------------------%
320
c
321
         safmin = pdlamch10(comm,'safmin')
322
      end if
323
c
324
      if (ido .eq. 0) then
325
c
326
c        %-------------------------------%
327
c        | Initialize timing statistics  |
328
c        | & message level for debugging |
329
c        %-------------------------------%
330
c
331
         call second (t0)
332
         msglvl = msaitr
333
c
334
c        %------------------------------%
335
c        | Initial call to this routine |
336
c        %------------------------------%
337
c
338
         info   = 0
339
         step3  = .false.
340
         step4  = .false.
341
         rstart = .false.
342
         orth1  = .false.
343
         orth2  = .false.
344
c
345
c        %--------------------------------%
346
c        | Pointer to the current step of |
347
c        | the factorization to build     |
348
c        %--------------------------------%
349
c
350
         j      = k + 1
351
c
352
c        %------------------------------------------%
353
c        | Pointers used for reverse communication  |
354
c        | when using WORKD.                        |
355
c        %------------------------------------------%
356
c
357
         ipj    = 1
358
         irj    = ipj   + n
359
         ivj    = irj   + n
360
      end if
361
c
362
c     %-------------------------------------------------%
363
c     | When in reverse communication mode one of:      |
364
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
365
c     | will be .true.                                  |
366
c     | STEP3: return from computing OP*v_{j}.          |
367
c     | STEP4: return from computing B-norm of OP*v_{j} |
368
c     | ORTH1: return from computing B-norm of r_{j+1}  |
369
c     | ORTH2: return from computing B-norm of          |
370
c     |        correction to the residual vector.       |
371
c     | RSTART: return from OP computations needed by   |
372
c     |         pdgetv0.                                |
373
c     %-------------------------------------------------%
374
c
375
      if (step3)  go to 50
376
      if (step4)  go to 60
377
      if (orth1)  go to 70
378
      if (orth2)  go to 90
379
      if (rstart) go to 30
380
c
381
c     %------------------------------%
382
c     | Else this is the first step. |
383
c     %------------------------------%
384
c
385
c     %--------------------------------------------------------------%
386
c     |                                                              |
387
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
388
c     |                                                              |
389
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
390
c     %--------------------------------------------------------------%
391
c
392
 1000 continue
393
c
394
         if (msglvl .gt. 2) then
395
            call pivout (comm, logfil, 1, [j], ndigit,
396
     &                  '_saitr: generating Arnoldi vector no.')
397
            call pdvout (comm, logfil, 1, [rnorm], ndigit,
398
     &                  '_saitr: B-norm of the current residual =')
399
         end if
400
c
401
c        %---------------------------------------------------------%
402
c        | Check for exact zero. Equivalent to determing whether a |
403
c        | j-step Arnoldi factorization is present.                |
404
c        %---------------------------------------------------------%
405
c
406
         if (rnorm .gt. zero) go to 40
407
c
408
c           %---------------------------------------------------%
409
c           | Invariant subspace found, generate a new starting |
410
c           | vector which is orthogonal to the current Arnoldi |
411
c           | basis and continue the iteration.                 |
412
c           %---------------------------------------------------%
413
c
414
            if (msglvl .gt. 0) then
415
               call pivout (comm, logfil, 1, [j], ndigit,
416
     &                     '_saitr: ****** restart at step ******')
417
            end if
418
c
419
c           %---------------------------------------------%
420
c           | ITRY is the loop variable that controls the |
421
c           | maximum amount of times that a restart is   |
422
c           | attempted. NRSTRT is used by stat.h         |
423
c           %---------------------------------------------%
424
c
425
            nrstrt = nrstrt + 1
426
            itry   = 1
427
   20       continue
428
            rstart = .true.
429
            ido    = 0
430
   30       continue
431
c
432
c           %--------------------------------------%
433
c           | If in reverse communication mode and |
434
c           | RSTART = .true. flow returns here.   |
435
c           %--------------------------------------%
436
c
437
            call pdgetv0 (comm, ido, bmat, itry, .false., n, j, v, ldv,
438
     &                   resid, rnorm, ipntr, workd, workl, ierr)
439
            if (ido .ne. 99) go to 9000
440
            if (ierr .lt. 0) then
441
               itry = itry + 1
442
               if (itry .le. 3) go to 20
443
c
444
c              %------------------------------------------------%
445
c              | Give up after several restart attempts.        |
446
c              | Set INFO to the size of the invariant subspace |
447
c              | which spans OP and exit.                       |
448
c              %------------------------------------------------%
449
c
450
               info = j - 1
451
               call second (t1)
452
               tsaitr = tsaitr + (t1 - t0)
453
               ido = 99
454
               go to 9000
455
            end if
456
c
457
   40    continue
458
c
459
c        %---------------------------------------------------------%
460
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
461
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
462
c        | when reciprocating a small RNORM, test against lower    |
463
c        | machine bound.                                          |
464
c        %---------------------------------------------------------%
465
c
466
         call dcopy (n, resid, 1, v(1,j), 1)
467
         if (rnorm .ge. safmin) then
468
             temp1 = one / rnorm
469
             call dscal (n, temp1, v(1,j), 1)
470
             call dscal (n, temp1, workd(ipj), 1)
471
         else
472
c
473
c            %-----------------------------------------%
474
c            | To scale both v_{j} and p_{j} carefully |
475
c            | use LAPACK routine SLASCL               |
476
c            %-----------------------------------------%
477
c
478
             call dlascl ('General', i, i, rnorm, one, n, 1,
479
     &                    v(1,j), n, infol)
480
             call dlascl ('General', i, i, rnorm, one, n, 1,
481
     &                    workd(ipj), n, infol)
482
         end if
483
c
484
c        %------------------------------------------------------%
485
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
486
c        | Note that this is not quite yet r_{j}. See STEP 4    |
487
c        %------------------------------------------------------%
488
c
489
         step3 = .true.
490
         nopx  = nopx + 1
491
         call second (t2)
492
         call dcopy (n, v(1,j), 1, workd(ivj), 1)
493
         ipntr(1) = ivj
494
         ipntr(2) = irj
495
         ipntr(3) = ipj
496
         ido = 1
497
c
498
c        %-----------------------------------%
499
c        | Exit in order to compute OP*v_{j} |
500
c        %-----------------------------------%
501
c
502
         go to 9000
503
   50    continue
504
c
505
c        %-----------------------------------%
506
c        | Back from reverse communication;  |
507
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}.   |
508
c        %-----------------------------------%
509
c
510
         call second (t3)
511
         tmvopx = tmvopx + (t3 - t2)
512
c
513
         step3 = .false.
514
c
515
c        %------------------------------------------%
516
c        | Put another copy of OP*v_{j} into RESID. |
517
c        %------------------------------------------%
518
c
519
         call dcopy (n, workd(irj), 1, resid, 1)
520
c
521
c        %-------------------------------------------%
522
c        | STEP 4:  Finish extending the symmetric   |
523
c        |          Arnoldi to length j. If MODE = 2 |
524
c        |          then B*OP = B*inv(B)*A = A and   |
525
c        |          we don't need to compute B*OP.   |
526
c        | NOTE: If MODE = 2 WORKD(IVJ:IVJ+N-1) is   |
527
c        | assumed to have A*v_{j}.                  |
528
c        %-------------------------------------------%
529
c
530
         if (mode .eq. 2) go to 65
531
         call second (t2)
532
         if (bmat .eq. 'G') then
533
            nbx = nbx + 1
534
            step4 = .true.
535
            ipntr(1) = irj
536
            ipntr(2) = ipj
537
            ido = 2
538
c
539
c           %-------------------------------------%
540
c           | Exit in order to compute B*OP*v_{j} |
541
c           %-------------------------------------%
542
c
543
            go to 9000
544
         else if (bmat .eq. 'I') then
545
              call dcopy(n, resid, 1 , workd(ipj), 1)
546
         end if
547
   60    continue
548
c
549
c        %-----------------------------------%
550
c        | Back from reverse communication;  |
551
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. |
552
c        %-----------------------------------%
553
c
554
         if (bmat .eq. 'G') then
555
            call second (t3)
556
            tmvbx = tmvbx + (t3 - t2)
557
         end if
558
c
559
         step4 = .false.
560
c
561
c        %-------------------------------------%
562
c        | The following is needed for STEP 5. |
563
c        | Compute the B-norm of OP*v_{j}.     |
564
c        %-------------------------------------%
565
c
566
   65    continue
567
         if (mode .eq. 2) then
568
c
569
c           %----------------------------------%
570
c           | Note that the B-norm of OP*v_{j} |
571
c           | is the inv(B)-norm of A*v_{j}.   |
572
c           %----------------------------------%
573
c
574
            rnorm_buf = ddot (n, resid, 1, workd(ivj), 1)
575
            call MPI_ALLREDUCE( rnorm_buf, wnorm, 1,
576
     &           MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
577
            wnorm = sqrt(abs(wnorm))
578
         else if (bmat .eq. 'G') then
579
            rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
580
            call MPI_ALLREDUCE( rnorm_buf, wnorm, 1,
581
     &           MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
582
            wnorm = sqrt(abs(wnorm))
583
         else if (bmat .eq. 'I') then
584
            wnorm = pdnorm2( comm, n, resid, 1 )
585
         end if
586
c
587
c        %-----------------------------------------%
588
c        | Compute the j-th residual corresponding |
589
c        | to the j step factorization.            |
590
c        | Use Classical Gram Schmidt and compute: |
591
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
592
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
593
c        %-----------------------------------------%
594
c
595
c
596
c        %------------------------------------------%
597
c        | Compute the j Fourier coefficients w_{j} |
598
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
599
c        %------------------------------------------%
600
c
601
         if (mode .ne. 2 ) then
602
            call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero,
603
     &                  workl(j+1), 1)
604
            call MPI_ALLREDUCE( workl(j+1), workl(1), j,
605
     &                  MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
606
         else if (mode .eq. 2) then
607
            call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero,
608
     &                  workl(j+1), 1)
609
            call MPI_ALLREDUCE( workl(j+1), workl(1), j,
610
     &                  MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
611
         end if
612
c
613
c        %--------------------------------------%
614
c        | Orthgonalize r_{j} against V_{j}.    |
615
c        | RESID contains OP*v_{j}. See STEP 3. |
616
c        %--------------------------------------%
617
c
618
         call dgemv('N', n, j, -one, v, ldv, workl(1), 1, one,
619
     &               resid, 1)
620
c
621
c        %--------------------------------------%
622
c        | Extend H to have j rows and columns. |
623
c        %--------------------------------------%
624
c
625
         h(j,2) = workl(j)
626
         if (j .eq. 1  .or.  rstart) then
627
            h(j,1) = zero
628
         else
629
            h(j,1) = rnorm
630
         end if
631
         call second (t4)
632
c
633
         orth1 = .true.
634
         iter  = 0
635
c
636
         call second (t2)
637
         if (bmat .eq. 'G') then
638
            nbx = nbx + 1
639
            call dcopy (n, resid, 1, workd(irj), 1)
640
            ipntr(1) = irj
641
            ipntr(2) = ipj
642
            ido = 2
643
c
644
c           %----------------------------------%
645
c           | Exit in order to compute B*r_{j} |
646
c           %----------------------------------%
647
c
648
            go to 9000
649
         else if (bmat .eq. 'I') then
650
            call dcopy (n, resid, 1, workd(ipj), 1)
651
         end if
652
   70    continue
653
c
654
c        %---------------------------------------------------%
655
c        | Back from reverse communication if ORTH1 = .true. |
656
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
657
c        %---------------------------------------------------%
658
c
659
         if (bmat .eq. 'G') then
660
            call second (t3)
661
            tmvbx = tmvbx + (t3 - t2)
662
         end if
663
c
664
         orth1 = .false.
665
c
666
c        %------------------------------%
667
c        | Compute the B-norm of r_{j}. |
668
c        %------------------------------%
669
c
670
         if (bmat .eq. 'G') then
671
            rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
672
            call MPI_ALLREDUCE( rnorm_buf, rnorm, 1,
673
     &           MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
674
            rnorm = sqrt(abs(rnorm))
675
         else if (bmat .eq. 'I') then
676
            rnorm = pdnorm2( comm, n, resid, 1 )
677
         end if
678
c
679
c        %-----------------------------------------------------------%
680
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
681
c        | Maximum NITER_ITREF tries.                                |
682
c        |                                                           |
683
c        |          s      = V_{j}^T * B * r_{j}                     |
684
c        |          r_{j}  = r_{j} - V_{j}*s                         |
685
c        |          alphaj = alphaj + s_{j}                          |
686
c        |                                                           |
687
c        | The stopping criteria used for iterative refinement is    |
688
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
689
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
690
c        | Determine if we need to correct the residual. The goal is |
691
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
692
c        %-----------------------------------------------------------%
693
c
694
         if (rnorm .gt. 0.717*wnorm) go to 100
695
         nrorth = nrorth + 1
696
c
697
c        %---------------------------------------------------%
698
c        | Enter the Iterative refinement phase. If further  |
699
c        | refinement is necessary, loop back here. The loop |
700
c        | variable is ITER. Perform a step of Classical     |
701
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
702
c        %---------------------------------------------------%
703
c
704
   80    continue
705
c
706
         if (msglvl .gt. 2) then
707
            xtemp(1) = wnorm
708
            xtemp(2) = rnorm
709
            call pdvout (comm, logfil, 2, xtemp, ndigit,
710
     &           '_naitr: re-orthonalization ; wnorm and rnorm are')
711
         end if
712
c
713
c        %----------------------------------------------------%
714
c        | Compute V_{j}^T * B * r_{j}.                       |
715
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
716
c        %----------------------------------------------------%
717
c
718
         call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
719
     &               zero, workl(j+1), 1)
720
         call MPI_ALLREDUCE( workl(j+1), workl(1), j,
721
     &               MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
722
c
723
c        %----------------------------------------------%
724
c        | Compute the correction to the residual:      |
725
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1).  |
726
c        | The correction to H is v(:,1:J)*H(1:J,1:J) + |
727
c        | v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j, but only   |
728
c        | H(j,j) is updated.                           |
729
c        %----------------------------------------------%
730
c
731
         call dgemv ('N', n, j, -one, v, ldv, workl(1), 1,
732
     &               one, resid, 1)
733
c
734
         if (j .eq. 1  .or.  rstart) h(j,1) = zero
735
         h(j,2) = h(j,2) + workl(j)
736
c
737
         orth2 = .true.
738
         call second (t2)
739
         if (bmat .eq. 'G') then
740
            nbx = nbx + 1
741
            call dcopy (n, resid, 1, workd(irj), 1)
742
            ipntr(1) = irj
743
            ipntr(2) = ipj
744
            ido = 2
745
c
746
c           %-----------------------------------%
747
c           | Exit in order to compute B*r_{j}. |
748
c           | r_{j} is the corrected residual.  |
749
c           %-----------------------------------%
750
c
751
            go to 9000
752
         else if (bmat .eq. 'I') then
753
            call dcopy (n, resid, 1, workd(ipj), 1)
754
         end if
755
   90    continue
756
c
757
c        %---------------------------------------------------%
758
c        | Back from reverse communication if ORTH2 = .true. |
759
c        %---------------------------------------------------%
760
c
761
         if (bmat .eq. 'G') then
762
            call second (t3)
763
            tmvbx = tmvbx + (t3 - t2)
764
         end if
765
c
766
c        %-----------------------------------------------------%
767
c        | Compute the B-norm of the corrected residual r_{j}. |
768
c        %-----------------------------------------------------%
769
c
770
         if (bmat .eq. 'G') then
771
           rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
772
           call MPI_ALLREDUCE( rnorm_buf, rnorm1, 1,
773
     &          MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
774
           rnorm1 = sqrt(abs(rnorm1))
775
         else if (bmat .eq. 'I') then
776
           rnorm1 = pdnorm2( comm, n, resid, 1 )
777
         end if
778
c
779
         if (msglvl .gt. 0 .and. iter .gt. 0) then
780
            call pivout (comm, logfil, 1, [j], ndigit,
781
     &           '_naitr: Iterative refinement for Arnoldi residual')
782
            if (msglvl .gt. 2) then
783
                xtemp(1) = rnorm
784
                xtemp(2) = rnorm1
785
                call pdvout (comm, logfil, 2, xtemp, ndigit,
786
     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
787
            end if
788
         end if
789
c
790
c        %-----------------------------------------%
791
c        | Determine if we need to perform another |
792
c        | step of re-orthogonalization.           |
793
c        %-----------------------------------------%
794
c
795
         if (rnorm1 .gt. 0.717*rnorm) then
796
c
797
c           %--------------------------------%
798
c           | No need for further refinement |
799
c           %--------------------------------%
800
c
801
            rnorm = rnorm1
802
c
803
         else
804
c
805
c           %-------------------------------------------%
806
c           | Another step of iterative refinement step |
807
c           | is required. NITREF is used by stat.h     |
808
c           %-------------------------------------------%
809
c
810
            nitref = nitref + 1
811
            rnorm  = rnorm1
812
            iter   = iter + 1
813
            if (iter .le. 1) go to 80
814
c
815
c           %-------------------------------------------------%
816
c           | Otherwise RESID is numerically in the span of V |
817
c           %-------------------------------------------------%
818
c
819
            do 95 jj = 1, n
820
               resid(jj) = zero
821
  95        continue
822
            rnorm = zero
823
         end if
824
c
825
c        %----------------------------------------------%
826
c        | Branch here directly if iterative refinement |
827
c        | wasn't necessary or after at most NITER_REF  |
828
c        | steps of iterative refinement.               |
829
c        %----------------------------------------------%
830
c
831
  100    continue
832
c
833
         rstart = .false.
834
         orth2  = .false.
835
c
836
         call second (t5)
837
         titref = titref + (t5 - t4)
838
c
839
c        %----------------------------------------------------------%
840
c        | Make sure the last off-diagonal element is non negative  |
841
c        | If not perform a similarity transformation on H(1:j,1:j) |
842
c        | and scale v(:,j) by -1.                                  |
843
c        %----------------------------------------------------------%
844
c
845
         if (h(j,1) .lt. zero) then
846
            h(j,1) = -h(j,1)
847
            if ( j .lt. k+np) then
848
               call dscal(n, -one, v(1,j+1), 1)
849
            else
850
               call dscal(n, -one, resid, 1)
851
            end if
852
         end if
853
c
854
c        %------------------------------------%
855
c        | STEP 6: Update  j = j+1;  Continue |
856
c        %------------------------------------%
857
c
858
         j = j + 1
859
         if (j .gt. k+np) then
860
            call second (t1)
861
            tsaitr = tsaitr + (t1 - t0)
862
            ido = 99
863
c
864
            if (msglvl .gt. 1) then
865
               call pdvout (comm, logfil, k+np, h(1,2), ndigit,
866
     &         '_saitr: main diagonal of matrix H of step K+NP.')
867
               if (k+np .gt. 1) then
868
               call pdvout (comm, logfil, k+np-1, h(2,1), ndigit,
869
     &         '_saitr: sub diagonal of matrix H of step K+NP.')
870
               end if
871
            end if
872
c
873
            go to 9000
874
         end if
875
c
876
c        %--------------------------------------------------------%
877
c        | Loop back to extend the factorization by another step. |
878
c        %--------------------------------------------------------%
879
c
880
      go to 1000
881
c
882
c     %---------------------------------------------------------------%
883
c     |                                                               |
884
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
885
c     |                                                               |
886
c     %---------------------------------------------------------------%
887
c
888
 9000 continue
889
      return
890
c
891
c     %----------------%
892
c     | End of pdsaitr |
893
c     %----------------%
894
c
895
      end
896

897
Product

Resources

Company