CoCalc -- dsaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/dsaitr.f
⁵²¹³ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: dsaitr
5
c
6
c\Description:
7
c  Reverse communication interface for applying NP additional steps to
8
c  a K step symmetric Arnoldi factorization.
9
c
10
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
11
c
12
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
13
c
14
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
15
c
16
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
17
c
18
c  where OP and B are as in dsaupd.  The B-norm of r_{k+p} is also
19
c  computed and returned.
20
c
21
c\Usage:
22
c  call dsaitr
23
c     ( IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH,
24
c       IPNTR, WORKD, INFO )
25
c
26
c\Arguments
27
c  IDO     Integer.  (INPUT/OUTPUT)
28
c          Reverse communication flag.
29
c          -------------------------------------------------------------
30
c          IDO =  0: first call to the reverse communication interface
31
c          IDO = -1: compute  Y = OP * X  where
32
c                    IPNTR(1) is the pointer into WORK for X,
33
c                    IPNTR(2) is the pointer into WORK for Y.
34
c                    This is for the restart phase to force the new
35
c                    starting vector into the range of OP.
36
c          IDO =  1: compute  Y = OP * X  where
37
c                    IPNTR(1) is the pointer into WORK for X,
38
c                    IPNTR(2) is the pointer into WORK for Y,
39
c                    IPNTR(3) is the pointer into WORK for B * X.
40
c          IDO =  2: compute  Y = B * X  where
41
c                    IPNTR(1) is the pointer into WORK for X,
42
c                    IPNTR(2) is the pointer into WORK for Y.
43
c          IDO = 99: done
44
c          -------------------------------------------------------------
45
c          When the routine is used in the "shift-and-invert" mode, the
46
c          vector B * Q is already available and does not need to be
47
c          recomputed in forming OP * Q.
48
c
49
c  BMAT    Character*1.  (INPUT)
50
c          BMAT specifies the type of matrix B that defines the
51
c          semi-inner product for the operator OP.  See dsaupd.
52
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
53
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M*x
54
c
55
c  N       Integer.  (INPUT)
56
c          Dimension of the eigenproblem.
57
c
58
c  K       Integer.  (INPUT)
59
c          Current order of H and the number of columns of V.
60
c
61
c  NP      Integer.  (INPUT)
62
c          Number of additional Arnoldi steps to take.
63
c
64
c  MODE    Integer.  (INPUT)
65
c          Signifies which form for "OP". If MODE=2 then
66
c          a reduction in the number of B matrix vector multiplies
67
c          is possible since the B-norm of OP*x is equivalent to
68
c          the inv(B)-norm of A*x.
69
c
70
c  RESID   Double precision array of length N.  (INPUT/OUTPUT)
71
c          On INPUT:  RESID contains the residual vector r_{k}.
72
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
73
c
74
c  RNORM   Double precision scalar.  (INPUT/OUTPUT)
75
c          On INPUT the B-norm of r_{k}.
76
c          On OUTPUT the B-norm of the updated residual r_{k+p}.
77
c
78
c  V       Double precision N by K+NP array.  (INPUT/OUTPUT)
79
c          On INPUT:  V contains the Arnoldi vectors in the first K
80
c          columns.
81
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
82
c          NP columns.  The first K columns are unchanged.
83
c
84
c  LDV     Integer.  (INPUT)
85
c          Leading dimension of V exactly as declared in the calling
86
c          program.
87
c
88
c  H       Double precision (K+NP) by 2 array.  (INPUT/OUTPUT)
89
c          H is used to store the generated symmetric tridiagonal matrix
90
c          with the subdiagonal in the first column starting at H(2,1)
91
c          and the main diagonal in the second column.
92
c
93
c  LDH     Integer.  (INPUT)
94
c          Leading dimension of H exactly as declared in the calling
95
c          program.
96
c
97
c  IPNTR   Integer array of length 3.  (OUTPUT)
98
c          Pointer to mark the starting locations in the WORK for
99
c          vectors used by the Arnoldi iteration.
100
c          -------------------------------------------------------------
101
c          IPNTR(1): pointer to the current operand vector X.
102
c          IPNTR(2): pointer to the current result vector Y.
103
c          IPNTR(3): pointer to the vector B * X when used in the
104
c                    shift-and-invert mode.  X is the current operand.
105
c          -------------------------------------------------------------
106
c
107
c  WORKD   Double precision work array of length 3*N.  (REVERSE COMMUNICATION)
108
c          Distributed array to be used in the basic Arnoldi iteration
109
c          for reverse communication.  The calling program should not
110
c          use WORKD as temporary workspace during the iteration !!!!!!
111
c          On INPUT, WORKD(1:N) = B*RESID where RESID is associated
112
c          with the K step Arnoldi factorization. Used to save some
113
c          computation at the first step.
114
c          On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated
115
c          with the K+NP step Arnoldi factorization.
116
c
117
c  INFO    Integer.  (OUTPUT)
118
c          = 0: Normal exit.
119
c          > 0: Size of an invariant subspace of OP is found that is
120
c               less than K + NP.
121
c
122
c\EndDoc
123
c
124
c-----------------------------------------------------------------------
125
c
126
c\BeginLib
127
c
128
c\Local variables:
129
c     xxxxxx  real
130
c
131
c\Routines called:
132
c     dgetv0  ARPACK routine to generate the initial vector.
133
c     ivout   ARPACK utility routine that prints integers.
134
c     dmout   ARPACK utility routine that prints matrices.
135
c     dvout   ARPACK utility routine that prints vectors.
136
c     dlamch  LAPACK routine that determines machine constants.
137
c     dlascl  LAPACK routine for careful scaling of a matrix.
138
c     dgemv   Level 2 BLAS routine for matrix vector multiplication.
139
c     daxpy   Level 1 BLAS that computes a vector triad.
140
c     dscal   Level 1 BLAS that scales a vector.
141
c     dcopy   Level 1 BLAS that copies one vector to another .
142
c     ddot    Level 1 BLAS that computes the scalar product of two vectors.
143
c     dnrm2   Level 1 BLAS that computes the norm of a vector.
144
c
145
c\Author
146
c     Danny Sorensen               Phuong Vu
147
c     Richard Lehoucq              CRPC / Rice University
148
c     Dept. of Computational &     Houston, Texas
149
c     Applied Mathematics
150
c     Rice University
151
c     Houston, Texas
152
c
153
c\Revision history:
154
c     xx/xx/93: Version ' 2.4'
155
c
156
c\SCCS Information: @(#)
157
c FILE: saitr.F   SID: 2.6   DATE OF SID: 8/28/96   RELEASE: 2
158
c
159
c\Remarks
160
c  The algorithm implemented is:
161
c
162
c  restart = .false.
163
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
164
c  r_{k} contains the initial residual vector even for k = 0;
165
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
166
c  computed by the calling program.
167
c
168
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
169
c  For  j = k+1, ..., k+np  Do
170
c     1) if ( betaj < tol ) stop or restart depending on j.
171
c        if ( restart ) generate a new starting vector.
172
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
173
c        p_{j} = p_{j}/betaj
174
c     3) r_{j} = OP*v_{j} where OP is defined as in dsaupd
175
c        For shift-invert mode p_{j} = B*v_{j} is already available.
176
c        wnorm = || OP*v_{j} ||
177
c     4) Compute the j-th step residual vector.
178
c        w_{j} =  V_{j}^T * B * OP * v_{j}
179
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
180
c        alphaj <- j-th component of w_{j}
181
c        rnorm = || r_{j} ||
182
c        betaj+1 = rnorm
183
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
184
c     5) Re-orthogonalization step:
185
c        s = V_{j}'*B*r_{j}
186
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
187
c        alphaj = alphaj + s_{j};
188
c     6) Iterative refinement step:
189
c        If (rnorm1 > 0.717*rnorm) then
190
c           rnorm = rnorm1
191
c           accept step and go back to 1)
192
c        Else
193
c           rnorm = rnorm1
194
c           If this is the first time in step 6), go to 5)
195
c           Else r_{j} lies in the span of V_{j} numerically.
196
c              Set r_{j} = 0 and rnorm = 0; go to 1)
197
c        EndIf
198
c  End Do
199
c
200
c\EndLib
201
c
202
c-----------------------------------------------------------------------
203
c
204
      subroutine dsaitr
205
     &   (ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh,
206
     &    ipntr, workd, info)
207
c
208
c     %----------------------------------------------------%
209
c     | Include files for debugging and timing information |
210
c     %----------------------------------------------------%
211
c
212
      include   'debug.h'
213
      include   'stat.h'
214
c
215
c     %------------------%
216
c     | Scalar Arguments |
217
c     %------------------%
218
c
219
      character  bmat*1
220
      integer    ido, info, k, ldh, ldv, n, mode, np
221
      Double precision
222
     &           rnorm
223
c
224
c     %-----------------%
225
c     | Array Arguments |
226
c     %-----------------%
227
c
228
      integer    ipntr(3)
229
      Double precision
230
     &           h(ldh,2), resid(n), v(ldv,k+np), workd(3*n)
231
c
232
c     %------------%
233
c     | Parameters |
234
c     %------------%
235
c
236
      Double precision
237
     &           one, zero
238
      parameter (one = 1.0D+0, zero = 0.0D+0)
239
c
240
c     %---------------%
241
c     | Local Scalars |
242
c     %---------------%
243
c
244
      logical    first, orth1, orth2, rstart, step3, step4
245
      integer    i, ierr, ipj, irj, ivj, iter, itry, j, msglvl,
246
     &           infol, jj
247
      Double precision
248
     &           rnorm1, wnorm, safmin, temp1
249
      save       orth1, orth2, rstart, step3, step4,
250
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl,
251
     &           rnorm1, safmin, wnorm
252
c
253
c     %-----------------------%
254
c     | Local Array Arguments |
255
c     %-----------------------%
256
c
257
      Double precision
258
     &           xtemp(2)
259
c
260
c     %----------------------%
261
c     | External Subroutines |
262
c     %----------------------%
263
c
264
      external   daxpy, dcopy, dscal, dgemv, dgetv0, dvout, dmout,
265
     &           dlascl, ivout, second
266
c
267
c     %--------------------%
268
c     | External Functions |
269
c     %--------------------%
270
c
271
      Double precision
272
     &           ddot, dnrm2, dlamch
273
      external   ddot, dnrm2, dlamch
274
c
275
c     %-----------------%
276
c     | Data statements |
277
c     %-----------------%
278
c
279
      data      first / .true. /
280
c
281
c     %-----------------------%
282
c     | Executable Statements |
283
c     %-----------------------%
284
c
285
      if (first) then
286
         first = .false.
287
c
288
c        %--------------------------------%
289
c        | safmin = safe minimum is such  |
290
c        | that 1/sfmin does not overflow |
291
c        %--------------------------------%
292
c
293
         safmin = dlamch('safmin')
294
      end if
295
c
296
      if (ido .eq. 0) then
297
c
298
c        %-------------------------------%
299
c        | Initialize timing statistics  |
300
c        | & message level for debugging |
301
c        %-------------------------------%
302
c
303
         call second (t0)
304
         msglvl = msaitr
305
c
306
c        %------------------------------%
307
c        | Initial call to this routine |
308
c        %------------------------------%
309
c
310
         info   = 0
311
         step3  = .false.
312
         step4  = .false.
313
         rstart = .false.
314
         orth1  = .false.
315
         orth2  = .false.
316
c
317
c        %--------------------------------%
318
c        | Pointer to the current step of |
319
c        | the factorization to build     |
320
c        %--------------------------------%
321
c
322
         j      = k + 1
323
c
324
c        %------------------------------------------%
325
c        | Pointers used for reverse communication  |
326
c        | when using WORKD.                        |
327
c        %------------------------------------------%
328
c
329
         ipj    = 1
330
         irj    = ipj   + n
331
         ivj    = irj   + n
332
      end if
333
c
334
c     %-------------------------------------------------%
335
c     | When in reverse communication mode one of:      |
336
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
337
c     | will be .true.                                  |
338
c     | STEP3: return from computing OP*v_{j}.          |
339
c     | STEP4: return from computing B-norm of OP*v_{j} |
340
c     | ORTH1: return from computing B-norm of r_{j+1}  |
341
c     | ORTH2: return from computing B-norm of          |
342
c     |        correction to the residual vector.       |
343
c     | RSTART: return from OP computations needed by   |
344
c     |         dgetv0.                                 |
345
c     %-------------------------------------------------%
346
c
347
      if (step3)  go to 50
348
      if (step4)  go to 60
349
      if (orth1)  go to 70
350
      if (orth2)  go to 90
351
      if (rstart) go to 30
352
c
353
c     %------------------------------%
354
c     | Else this is the first step. |
355
c     %------------------------------%
356
c
357
c     %--------------------------------------------------------------%
358
c     |                                                              |
359
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
360
c     |                                                              |
361
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
362
c     %--------------------------------------------------------------%
363
c
364
 1000 continue
365
c
366
         if (msglvl .gt. 2) then
367
            call ivout (logfil, 1, [j], ndigit,
368
     &                  '_saitr: generating Arnoldi vector no.')
369
            call dvout (logfil, 1, [rnorm], ndigit,
370
     &                  '_saitr: B-norm of the current residual =')
371
         end if
372
c
373
c        %---------------------------------------------------------%
374
c        | Check for exact zero. Equivalent to determing whether a |
375
c        | j-step Arnoldi factorization is present.                |
376
c        %---------------------------------------------------------%
377
c
378
         if (rnorm .gt. zero) go to 40
379
c
380
c           %---------------------------------------------------%
381
c           | Invariant subspace found, generate a new starting |
382
c           | vector which is orthogonal to the current Arnoldi |
383
c           | basis and continue the iteration.                 |
384
c           %---------------------------------------------------%
385
c
386
            if (msglvl .gt. 0) then
387
               call ivout (logfil, 1, [j], ndigit,
388
     &                     '_saitr: ****** restart at step ******')
389
            end if
390
c
391
c           %---------------------------------------------%
392
c           | ITRY is the loop variable that controls the |
393
c           | maximum amount of times that a restart is   |
394
c           | attempted. NRSTRT is used by stat.h         |
395
c           %---------------------------------------------%
396
c
397
            nrstrt = nrstrt + 1
398
            itry   = 1
399
   20       continue
400
            rstart = .true.
401
            ido    = 0
402
   30       continue
403
c
404
c           %--------------------------------------%
405
c           | If in reverse communication mode and |
406
c           | RSTART = .true. flow returns here.   |
407
c           %--------------------------------------%
408
c
409
            call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
410
     &                   resid, rnorm, ipntr, workd, ierr)
411
            if (ido .ne. 99) go to 9000
412
            if (ierr .lt. 0) then
413
               itry = itry + 1
414
               if (itry .le. 3) go to 20
415
c
416
c              %------------------------------------------------%
417
c              | Give up after several restart attempts.        |
418
c              | Set INFO to the size of the invariant subspace |
419
c              | which spans OP and exit.                       |
420
c              %------------------------------------------------%
421
c
422
               info = j - 1
423
               call second (t1)
424
               tsaitr = tsaitr + (t1 - t0)
425
               ido = 99
426
               go to 9000
427
            end if
428
c
429
   40    continue
430
c
431
c        %---------------------------------------------------------%
432
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
433
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
434
c        | when reciprocating a small RNORM, test against lower    |
435
c        | machine bound.                                          |
436
c        %---------------------------------------------------------%
437
c
438
         call dcopy (n, resid, 1, v(1,j), 1)
439
         if (rnorm .ge. safmin) then
440
             temp1 = one / rnorm
441
             call dscal (n, temp1, v(1,j), 1)
442
             call dscal (n, temp1, workd(ipj), 1)
443
         else
444
c
445
c            %-----------------------------------------%
446
c            | To scale both v_{j} and p_{j} carefully |
447
c            | use LAPACK routine SLASCL               |
448
c            %-----------------------------------------%
449
c
450
             call dlascl ('General', i, i, rnorm, one, n, 1,
451
     &                    v(1,j), n, infol)
452
             call dlascl ('General', i, i, rnorm, one, n, 1,
453
     &                    workd(ipj), n, infol)
454
         end if
455
c
456
c        %------------------------------------------------------%
457
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
458
c        | Note that this is not quite yet r_{j}. See STEP 4    |
459
c        %------------------------------------------------------%
460
c
461
         step3 = .true.
462
         nopx  = nopx + 1
463
         call second (t2)
464
         call dcopy (n, v(1,j), 1, workd(ivj), 1)
465
         ipntr(1) = ivj
466
         ipntr(2) = irj
467
         ipntr(3) = ipj
468
         ido = 1
469
c
470
c        %-----------------------------------%
471
c        | Exit in order to compute OP*v_{j} |
472
c        %-----------------------------------%
473
c
474
         go to 9000
475
   50    continue
476
c
477
c        %-----------------------------------%
478
c        | Back from reverse communication;  |
479
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}.   |
480
c        %-----------------------------------%
481
c
482
         call second (t3)
483
         tmvopx = tmvopx + (t3 - t2)
484
c
485
         step3 = .false.
486
c
487
c        %------------------------------------------%
488
c        | Put another copy of OP*v_{j} into RESID. |
489
c        %------------------------------------------%
490
c
491
         call dcopy (n, workd(irj), 1, resid, 1)
492
c
493
c        %-------------------------------------------%
494
c        | STEP 4:  Finish extending the symmetric   |
495
c        |          Arnoldi to length j. If MODE = 2 |
496
c        |          then B*OP = B*inv(B)*A = A and   |
497
c        |          we don't need to compute B*OP.   |
498
c        | NOTE: If MODE = 2 WORKD(IVJ:IVJ+N-1) is   |
499
c        | assumed to have A*v_{j}.                  |
500
c        %-------------------------------------------%
501
c
502
         if (mode .eq. 2) go to 65
503
         call second (t2)
504
         if (bmat .eq. 'G') then
505
            nbx = nbx + 1
506
            step4 = .true.
507
            ipntr(1) = irj
508
            ipntr(2) = ipj
509
            ido = 2
510
c
511
c           %-------------------------------------%
512
c           | Exit in order to compute B*OP*v_{j} |
513
c           %-------------------------------------%
514
c
515
            go to 9000
516
         else if (bmat .eq. 'I') then
517
              call dcopy(n, resid, 1 , workd(ipj), 1)
518
         end if
519
   60    continue
520
c
521
c        %-----------------------------------%
522
c        | Back from reverse communication;  |
523
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. |
524
c        %-----------------------------------%
525
c
526
         if (bmat .eq. 'G') then
527
            call second (t3)
528
            tmvbx = tmvbx + (t3 - t2)
529
         end if
530
c
531
         step4 = .false.
532
c
533
c        %-------------------------------------%
534
c        | The following is needed for STEP 5. |
535
c        | Compute the B-norm of OP*v_{j}.     |
536
c        %-------------------------------------%
537
c
538
   65    continue
539
         if (mode .eq. 2) then
540
c
541
c           %----------------------------------%
542
c           | Note that the B-norm of OP*v_{j} |
543
c           | is the inv(B)-norm of A*v_{j}.   |
544
c           %----------------------------------%
545
c
546
            wnorm = ddot (n, resid, 1, workd(ivj), 1)
547
            wnorm = sqrt(abs(wnorm))
548
         else if (bmat .eq. 'G') then
549
            wnorm = ddot (n, resid, 1, workd(ipj), 1)
550
            wnorm = sqrt(abs(wnorm))
551
         else if (bmat .eq. 'I') then
552
            wnorm = dnrm2(n, resid, 1)
553
         end if
554
c
555
c        %-----------------------------------------%
556
c        | Compute the j-th residual corresponding |
557
c        | to the j step factorization.            |
558
c        | Use Classical Gram Schmidt and compute: |
559
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
560
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
561
c        %-----------------------------------------%
562
c
563
c
564
c        %------------------------------------------%
565
c        | Compute the j Fourier coefficients w_{j} |
566
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
567
c        %------------------------------------------%
568
c
569
         if (mode .ne. 2 ) then
570
            call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero,
571
     &                  workd(irj), 1)
572
         else if (mode .eq. 2) then
573
            call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero,
574
     &                  workd(irj), 1)
575
         end if
576
c
577
c        %--------------------------------------%
578
c        | Orthgonalize r_{j} against V_{j}.    |
579
c        | RESID contains OP*v_{j}. See STEP 3. |
580
c        %--------------------------------------%
581
c
582
         call dgemv('N', n, j, -one, v, ldv, workd(irj), 1, one,
583
     &               resid, 1)
584
c
585
c        %--------------------------------------%
586
c        | Extend H to have j rows and columns. |
587
c        %--------------------------------------%
588
c
589
         h(j,2) = workd(irj + j - 1)
590
         if (j .eq. 1  .or.  rstart) then
591
            h(j,1) = zero
592
         else
593
            h(j,1) = rnorm
594
         end if
595
         call second (t4)
596
c
597
         orth1 = .true.
598
         iter  = 0
599
c
600
         call second (t2)
601
         if (bmat .eq. 'G') then
602
            nbx = nbx + 1
603
            call dcopy (n, resid, 1, workd(irj), 1)
604
            ipntr(1) = irj
605
            ipntr(2) = ipj
606
            ido = 2
607
c
608
c           %----------------------------------%
609
c           | Exit in order to compute B*r_{j} |
610
c           %----------------------------------%
611
c
612
            go to 9000
613
         else if (bmat .eq. 'I') then
614
            call dcopy (n, resid, 1, workd(ipj), 1)
615
         end if
616
   70    continue
617
c
618
c        %---------------------------------------------------%
619
c        | Back from reverse communication if ORTH1 = .true. |
620
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
621
c        %---------------------------------------------------%
622
c
623
         if (bmat .eq. 'G') then
624
            call second (t3)
625
            tmvbx = tmvbx + (t3 - t2)
626
         end if
627
c
628
         orth1 = .false.
629
c
630
c        %------------------------------%
631
c        | Compute the B-norm of r_{j}. |
632
c        %------------------------------%
633
c
634
         if (bmat .eq. 'G') then
635
            rnorm = ddot (n, resid, 1, workd(ipj), 1)
636
            rnorm = sqrt(abs(rnorm))
637
         else if (bmat .eq. 'I') then
638
            rnorm = dnrm2(n, resid, 1)
639
         end if
640
c
641
c        %-----------------------------------------------------------%
642
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
643
c        | Maximum NITER_ITREF tries.                                |
644
c        |                                                           |
645
c        |          s      = V_{j}^T * B * r_{j}                     |
646
c        |          r_{j}  = r_{j} - V_{j}*s                         |
647
c        |          alphaj = alphaj + s_{j}                          |
648
c        |                                                           |
649
c        | The stopping criteria used for iterative refinement is    |
650
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
651
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
652
c        | Determine if we need to correct the residual. The goal is |
653
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
654
c        %-----------------------------------------------------------%
655
c
656
         if (rnorm .gt. 0.717*wnorm) go to 100
657
         nrorth = nrorth + 1
658
c
659
c        %---------------------------------------------------%
660
c        | Enter the Iterative refinement phase. If further  |
661
c        | refinement is necessary, loop back here. The loop |
662
c        | variable is ITER. Perform a step of Classical     |
663
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
664
c        %---------------------------------------------------%
665
c
666
   80    continue
667
c
668
         if (msglvl .gt. 2) then
669
            xtemp(1) = wnorm
670
            xtemp(2) = rnorm
671
            call dvout (logfil, 2, xtemp, ndigit,
672
     &           '_saitr: re-orthonalization ; wnorm and rnorm are')
673
         end if
674
c
675
c        %----------------------------------------------------%
676
c        | Compute V_{j}^T * B * r_{j}.                       |
677
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
678
c        %----------------------------------------------------%
679
c
680
         call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
681
     &               zero, workd(irj), 1)
682
c
683
c        %----------------------------------------------%
684
c        | Compute the correction to the residual:      |
685
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1).  |
686
c        | The correction to H is v(:,1:J)*H(1:J,1:J) + |
687
c        | v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j, but only   |
688
c        | H(j,j) is updated.                           |
689
c        %----------------------------------------------%
690
c
691
         call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
692
     &               one, resid, 1)
693
c
694
         if (j .eq. 1  .or.  rstart) h(j,1) = zero
695
         h(j,2) = h(j,2) + workd(irj + j - 1)
696
c
697
         orth2 = .true.
698
         call second (t2)
699
         if (bmat .eq. 'G') then
700
            nbx = nbx + 1
701
            call dcopy (n, resid, 1, workd(irj), 1)
702
            ipntr(1) = irj
703
            ipntr(2) = ipj
704
            ido = 2
705
c
706
c           %-----------------------------------%
707
c           | Exit in order to compute B*r_{j}. |
708
c           | r_{j} is the corrected residual.  |
709
c           %-----------------------------------%
710
c
711
            go to 9000
712
         else if (bmat .eq. 'I') then
713
            call dcopy (n, resid, 1, workd(ipj), 1)
714
         end if
715
   90    continue
716
c
717
c        %---------------------------------------------------%
718
c        | Back from reverse communication if ORTH2 = .true. |
719
c        %---------------------------------------------------%
720
c
721
         if (bmat .eq. 'G') then
722
            call second (t3)
723
            tmvbx = tmvbx + (t3 - t2)
724
         end if
725
c
726
c        %-----------------------------------------------------%
727
c        | Compute the B-norm of the corrected residual r_{j}. |
728
c        %-----------------------------------------------------%
729
c
730
         if (bmat .eq. 'G') then
731
             rnorm1 = ddot (n, resid, 1, workd(ipj), 1)
732
             rnorm1 = sqrt(abs(rnorm1))
733
         else if (bmat .eq. 'I') then
734
             rnorm1 = dnrm2(n, resid, 1)
735
         end if
736
c
737
         if (msglvl .gt. 0 .and. iter .gt. 0) then
738
            call ivout (logfil, 1, [j], ndigit,
739
     &           '_saitr: Iterative refinement for Arnoldi residual')
740
            if (msglvl .gt. 2) then
741
                xtemp(1) = rnorm
742
                xtemp(2) = rnorm1
743
                call dvout (logfil, 2, xtemp, ndigit,
744
     &           '_saitr: iterative refinement ; rnorm and rnorm1 are')
745
            end if
746
         end if
747
c
748
c        %-----------------------------------------%
749
c        | Determine if we need to perform another |
750
c        | step of re-orthogonalization.           |
751
c        %-----------------------------------------%
752
c
753
         if (rnorm1 .gt. 0.717*rnorm) then
754
c
755
c           %--------------------------------%
756
c           | No need for further refinement |
757
c           %--------------------------------%
758
c
759
            rnorm = rnorm1
760
c
761
         else
762
c
763
c           %-------------------------------------------%
764
c           | Another step of iterative refinement step |
765
c           | is required. NITREF is used by stat.h     |
766
c           %-------------------------------------------%
767
c
768
            nitref = nitref + 1
769
            rnorm  = rnorm1
770
            iter   = iter + 1
771
            if (iter .le. 1) go to 80
772
c
773
c           %-------------------------------------------------%
774
c           | Otherwise RESID is numerically in the span of V |
775
c           %-------------------------------------------------%
776
c
777
            do 95 jj = 1, n
778
               resid(jj) = zero
779
  95        continue
780
            rnorm = zero
781
         end if
782
c
783
c        %----------------------------------------------%
784
c        | Branch here directly if iterative refinement |
785
c        | wasn't necessary or after at most NITER_REF  |
786
c        | steps of iterative refinement.               |
787
c        %----------------------------------------------%
788
c
789
  100    continue
790
c
791
         rstart = .false.
792
         orth2  = .false.
793
c
794
         call second (t5)
795
         titref = titref + (t5 - t4)
796
c
797
c        %----------------------------------------------------------%
798
c        | Make sure the last off-diagonal element is non negative  |
799
c        | If not perform a similarity transformation on H(1:j,1:j) |
800
c        | and scale v(:,j) by -1.                                  |
801
c        %----------------------------------------------------------%
802
c
803
         if (h(j,1) .lt. zero) then
804
            h(j,1) = -h(j,1)
805
            if ( j .lt. k+np) then
806
               call dscal(n, -one, v(1,j+1), 1)
807
            else
808
               call dscal(n, -one, resid, 1)
809
            end if
810
         end if
811
c
812
c        %------------------------------------%
813
c        | STEP 6: Update  j = j+1;  Continue |
814
c        %------------------------------------%
815
c
816
         j = j + 1
817
         if (j .gt. k+np) then
818
            call second (t1)
819
            tsaitr = tsaitr + (t1 - t0)
820
            ido = 99
821
c
822
            if (msglvl .gt. 1) then
823
               call dvout (logfil, k+np, h(1,2), ndigit,
824
     &         '_saitr: main diagonal of matrix H of step K+NP.')
825
               if (k+np .gt. 1) then
826
               call dvout (logfil, k+np-1, h(2,1), ndigit,
827
     &         '_saitr: sub diagonal of matrix H of step K+NP.')
828
               end if
829
            end if
830
c
831
            go to 9000
832
         end if
833
c
834
c        %--------------------------------------------------------%
835
c        | Loop back to extend the factorization by another step. |
836
c        %--------------------------------------------------------%
837
c
838
      go to 1000
839
c
840
c     %---------------------------------------------------------------%
841
c     |                                                               |
842
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
843
c     |                                                               |
844
c     %---------------------------------------------------------------%
845
c
846
 9000 continue
847
      return
848
c
849
c     %---------------%
850
c     | End of dsaitr |
851
c     %---------------%
852
c
853
      end
854

855
Product

Resources

Company