CoCalc -- dnaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/dnaitr.f
⁵²²⁴ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: dnaitr
5
c
6
c\Description:
7
c  Reverse communication interface for applying NP additional steps to
8
c  a K step nonsymmetric Arnoldi factorization.
9
c
10
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
11
c
12
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
13
c
14
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
15
c
16
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
17
c
18
c  where OP and B are as in dnaupd.  The B-norm of r_{k+p} is also
19
c  computed and returned.
20
c
21
c\Usage:
22
c  call dnaitr
23
c     ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
24
c       IPNTR, WORKD, INFO )
25
c
26
c\Arguments
27
c  IDO     Integer.  (INPUT/OUTPUT)
28
c          Reverse communication flag.
29
c          -------------------------------------------------------------
30
c          IDO =  0: first call to the reverse communication interface
31
c          IDO = -1: compute  Y = OP * X  where
32
c                    IPNTR(1) is the pointer into WORK for X,
33
c                    IPNTR(2) is the pointer into WORK for Y.
34
c                    This is for the restart phase to force the new
35
c                    starting vector into the range of OP.
36
c          IDO =  1: compute  Y = OP * X  where
37
c                    IPNTR(1) is the pointer into WORK for X,
38
c                    IPNTR(2) is the pointer into WORK for Y,
39
c                    IPNTR(3) is the pointer into WORK for B * X.
40
c          IDO =  2: compute  Y = B * X  where
41
c                    IPNTR(1) is the pointer into WORK for X,
42
c                    IPNTR(2) is the pointer into WORK for Y.
43
c          IDO = 99: done
44
c          -------------------------------------------------------------
45
c          When the routine is used in the "shift-and-invert" mode, the
46
c          vector B * Q is already available and do not need to be
47
c          recompute in forming OP * Q.
48
c
49
c  BMAT    Character*1.  (INPUT)
50
c          BMAT specifies the type of the matrix B that defines the
51
c          semi-inner product for the operator OP.  See dnaupd.
52
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
53
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
54
c
55
c  N       Integer.  (INPUT)
56
c          Dimension of the eigenproblem.
57
c
58
c  K       Integer.  (INPUT)
59
c          Current size of V and H.
60
c
61
c  NP      Integer.  (INPUT)
62
c          Number of additional Arnoldi steps to take.
63
c
64
c  NB      Integer.  (INPUT)
65
c          Blocksize to be used in the recurrence.
66
c          Only work for NB = 1 right now.  The goal is to have a
67
c          program that implement both the block and non-block method.
68
c
69
c  RESID   Double precision array of length N.  (INPUT/OUTPUT)
70
c          On INPUT:  RESID contains the residual vector r_{k}.
71
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
72
c
73
c  RNORM   Double precision scalar.  (INPUT/OUTPUT)
74
c          B-norm of the starting residual on input.
75
c          B-norm of the updated residual r_{k+p} on output.
76
c
77
c  V       Double precision N by K+NP array.  (INPUT/OUTPUT)
78
c          On INPUT:  V contains the Arnoldi vectors in the first K
79
c          columns.
80
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
81
c          NP columns.  The first K columns are unchanged.
82
c
83
c  LDV     Integer.  (INPUT)
84
c          Leading dimension of V exactly as declared in the calling
85
c          program.
86
c
87
c  H       Double precision (K+NP) by (K+NP) array.  (INPUT/OUTPUT)
88
c          H is used to store the generated upper Hessenberg matrix.
89
c
90
c  LDH     Integer.  (INPUT)
91
c          Leading dimension of H exactly as declared in the calling
92
c          program.
93
c
94
c  IPNTR   Integer array of length 3.  (OUTPUT)
95
c          Pointer to mark the starting locations in the WORK for
96
c          vectors used by the Arnoldi iteration.
97
c          -------------------------------------------------------------
98
c          IPNTR(1): pointer to the current operand vector X.
99
c          IPNTR(2): pointer to the current result vector Y.
100
c          IPNTR(3): pointer to the vector B * X when used in the
101
c                    shift-and-invert mode.  X is the current operand.
102
c          -------------------------------------------------------------
103
c
104
c  WORKD   Double precision work array of length 3*N.  (REVERSE COMMUNICATION)
105
c          Distributed array to be used in the basic Arnoldi iteration
106
c          for reverse communication.  The calling program should not
107
c          use WORKD as temporary workspace during the iteration !!!!!!
108
c          On input, WORKD(1:N) = B*RESID and is used to save some
109
c          computation at the first step.
110
c
111
c  INFO    Integer.  (OUTPUT)
112
c          = 0: Normal exit.
113
c          > 0: Size of the spanning invariant subspace of OP found.
114
c
115
c\EndDoc
116
c
117
c-----------------------------------------------------------------------
118
c
119
c\BeginLib
120
c
121
c\Local variables:
122
c     xxxxxx  real
123
c
124
c\References:
125
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
126
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
127
c     pp 357-385.
128
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
129
c     Restarted Arnoldi Iteration", Rice University Technical Report
130
c     TR95-13, Department of Computational and Applied Mathematics.
131
c
132
c\Routines called:
133
c     dgetv0  ARPACK routine to generate the initial vector.
134
c     ivout   ARPACK utility routine that prints integers.
135
c     second  ARPACK utility routine for timing.
136
c     dmout   ARPACK utility routine that prints matrices
137
c     dvout   ARPACK utility routine that prints vectors.
138
c     dlabad  LAPACK routine that computes machine constants.
139
c     dlamch  LAPACK routine that determines machine constants.
140
c     dlascl  LAPACK routine for careful scaling of a matrix.
141
c     dlanhs  LAPACK routine that computes various norms of a matrix.
142
c     dgemv   Level 2 BLAS routine for matrix vector multiplication.
143
c     daxpy   Level 1 BLAS that computes a vector triad.
144
c     dscal   Level 1 BLAS that scales a vector.
145
c     dcopy   Level 1 BLAS that copies one vector to another .
146
c     ddot    Level 1 BLAS that computes the scalar product of two vectors.
147
c     dnrm2   Level 1 BLAS that computes the norm of a vector.
148
c
149
c\Author
150
c     Danny Sorensen               Phuong Vu
151
c     Richard Lehoucq              CRPC / Rice University
152
c     Dept. of Computational &     Houston, Texas
153
c     Applied Mathematics
154
c     Rice University
155
c     Houston, Texas
156
c
157
c\Revision history:
158
c     xx/xx/92: Version ' 2.4'
159
c
160
c\SCCS Information: @(#)
161
c FILE: naitr.F   SID: 2.4   DATE OF SID: 8/27/96   RELEASE: 2
162
c
163
c\Remarks
164
c  The algorithm implemented is:
165
c
166
c  restart = .false.
167
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
168
c  r_{k} contains the initial residual vector even for k = 0;
169
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
170
c  computed by the calling program.
171
c
172
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
173
c  For  j = k+1, ..., k+np  Do
174
c     1) if ( betaj < tol ) stop or restart depending on j.
175
c        ( At present tol is zero )
176
c        if ( restart ) generate a new starting vector.
177
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
178
c        p_{j} = p_{j}/betaj
179
c     3) r_{j} = OP*v_{j} where OP is defined as in dnaupd
180
c        For shift-invert mode p_{j} = B*v_{j} is already available.
181
c        wnorm = || OP*v_{j} ||
182
c     4) Compute the j-th step residual vector.
183
c        w_{j} =  V_{j}^T * B * OP * v_{j}
184
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
185
c        H(:,j) = w_{j};
186
c        H(j,j-1) = rnorm
187
c        rnorm = || r_(j) ||
188
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
189
c     5) Re-orthogonalization step:
190
c        s = V_{j}'*B*r_{j}
191
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
192
c        alphaj = alphaj + s_{j};
193
c     6) Iterative refinement step:
194
c        If (rnorm1 > 0.717*rnorm) then
195
c           rnorm = rnorm1
196
c           accept step and go back to 1)
197
c        Else
198
c           rnorm = rnorm1
199
c           If this is the first time in step 6), go to 5)
200
c           Else r_{j} lies in the span of V_{j} numerically.
201
c              Set r_{j} = 0 and rnorm = 0; go to 1)
202
c        EndIf
203
c  End Do
204
c
205
c\EndLib
206
c
207
c-----------------------------------------------------------------------
208
c
209
      subroutine dnaitr
210
     &   (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
211
     &    ipntr, workd, info)
212
c
213
c     %----------------------------------------------------%
214
c     | Include files for debugging and timing information |
215
c     %----------------------------------------------------%
216
c
217
      include   'debug.h'
218
      include   'stat.h'
219
c
220
c     %------------------%
221
c     | Scalar Arguments |
222
c     %------------------%
223
c
224
      character  bmat*1
225
      integer    ido, info, k, ldh, ldv, n, nb, np
226
      Double precision
227
     &           rnorm
228
c
229
c     %-----------------%
230
c     | Array Arguments |
231
c     %-----------------%
232
c
233
      integer    ipntr(3)
234
      Double precision
235
     &           h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
236
c
237
c     %------------%
238
c     | Parameters |
239
c     %------------%
240
c
241
      Double precision
242
     &           one, zero
243
      parameter (one = 1.0D+0, zero = 0.0D+0)
244
c
245
c     %---------------%
246
c     | Local Scalars |
247
c     %---------------%
248
c
249
      logical    first, orth1, orth2, rstart, step3, step4
250
      integer    ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
251
     &           jj
252
      Double precision
253
     &           betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl,
254
     &           wnorm
255
      save       first, orth1, orth2, rstart, step3, step4,
256
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
257
     &           betaj, rnorm1, smlnum, ulp, unfl, wnorm
258
c
259
c     %-----------------------%
260
c     | Local Array Arguments |
261
c     %-----------------------%
262
c
263
      Double precision
264
     &           xtemp(2)
265
c
266
c     %----------------------%
267
c     | External Subroutines |
268
c     %----------------------%
269
c
270
      external   daxpy, dcopy, dscal, dgemv, dgetv0, dlabad,
271
     &           dvout, dmout, ivout, second
272
c
273
c     %--------------------%
274
c     | External Functions |
275
c     %--------------------%
276
c
277
      Double precision
278
     &           ddot, dnrm2, dlanhs, dlamch
279
      external   ddot, dnrm2, dlanhs, dlamch
280
c
281
c     %---------------------%
282
c     | Intrinsic Functions |
283
c     %---------------------%
284
c
285
      intrinsic    abs, sqrt
286
c
287
c     %-----------------%
288
c     | Data statements |
289
c     %-----------------%
290
c
291
      data      first / .true. /
292
c
293
c     %-----------------------%
294
c     | Executable Statements |
295
c     %-----------------------%
296
c
297
      if (first) then
298
c
299
c        %-----------------------------------------%
300
c        | Set machine-dependent constants for the |
301
c        | the splitting and deflation criterion.  |
302
c        | If norm(H) <= sqrt(OVFL),               |
303
c        | overflow should not occur.              |
304
c        | REFERENCE: LAPACK subroutine dlahqr     |
305
c        %-----------------------------------------%
306
c
307
         unfl = dlamch( 'safe minimum' )
308
         ovfl = one / unfl
309
         call dlabad( unfl, ovfl )
310
         ulp = dlamch( 'precision' )
311
         smlnum = unfl*( n / ulp )
312
         first = .false.
313
      end if
314
c
315
      if (ido .eq. 0) then
316
c
317
c        %-------------------------------%
318
c        | Initialize timing statistics  |
319
c        | & message level for debugging |
320
c        %-------------------------------%
321
c
322
         call second (t0)
323
         msglvl = mnaitr
324
c
325
c        %------------------------------%
326
c        | Initial call to this routine |
327
c        %------------------------------%
328
c
329
         info   = 0
330
         step3  = .false.
331
         step4  = .false.
332
         rstart = .false.
333
         orth1  = .false.
334
         orth2  = .false.
335
         j      = k + 1
336
         ipj    = 1
337
         irj    = ipj   + n
338
         ivj    = irj   + n
339
      end if
340
c
341
c     %-------------------------------------------------%
342
c     | When in reverse communication mode one of:      |
343
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
344
c     | will be .true. when ....                        |
345
c     | STEP3: return from computing OP*v_{j}.          |
346
c     | STEP4: return from computing B-norm of OP*v_{j} |
347
c     | ORTH1: return from computing B-norm of r_{j+1}  |
348
c     | ORTH2: return from computing B-norm of          |
349
c     |        correction to the residual vector.       |
350
c     | RSTART: return from OP computations needed by   |
351
c     |         dgetv0.                                 |
352
c     %-------------------------------------------------%
353
c
354
      if (step3)  go to 50
355
      if (step4)  go to 60
356
      if (orth1)  go to 70
357
      if (orth2)  go to 90
358
      if (rstart) go to 30
359
c
360
c     %-----------------------------%
361
c     | Else this is the first step |
362
c     %-----------------------------%
363
c
364
c     %--------------------------------------------------------------%
365
c     |                                                              |
366
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
367
c     |                                                              |
368
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
369
c     %--------------------------------------------------------------%
370

371
 1000 continue
372
c
373
         if (msglvl .gt. 1) then
374
            call ivout (logfil, 1, [j], ndigit,
375
     &                  '_naitr: generating Arnoldi vector number')
376
            call dvout (logfil, 1, [rnorm], ndigit,
377
     &                  '_naitr: B-norm of the current residual is')
378
         end if
379
c
380
c        %---------------------------------------------------%
381
c        | STEP 1: Check if the B norm of j-th residual      |
382
c        | vector is zero. Equivalent to determing whether   |
383
c        | an exact j-step Arnoldi factorization is present. |
384
c        %---------------------------------------------------%
385
c
386
         betaj = rnorm
387
         if (rnorm .gt. zero) go to 40
388
c
389
c           %---------------------------------------------------%
390
c           | Invariant subspace found, generate a new starting |
391
c           | vector which is orthogonal to the current Arnoldi |
392
c           | basis and continue the iteration.                 |
393
c           %---------------------------------------------------%
394
c
395
            if (msglvl .gt. 0) then
396
               call ivout (logfil, 1, [j], ndigit,
397
     &                     '_naitr: ****** RESTART AT STEP ******')
398
            end if
399
c
400
c           %---------------------------------------------%
401
c           | ITRY is the loop variable that controls the |
402
c           | maximum amount of times that a restart is   |
403
c           | attempted. NRSTRT is used by stat.h         |
404
c           %---------------------------------------------%
405
c
406
            betaj  = zero
407
            nrstrt = nrstrt + 1
408
            itry   = 1
409
   20       continue
410
            rstart = .true.
411
            ido    = 0
412
   30       continue
413
c
414
c           %--------------------------------------%
415
c           | If in reverse communication mode and |
416
c           | RSTART = .true. flow returns here.   |
417
c           %--------------------------------------%
418
c
419
            call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
420
     &                   resid, rnorm, ipntr, workd, ierr)
421
            if (ido .ne. 99) go to 9000
422
            if (ierr .lt. 0) then
423
               itry = itry + 1
424
               if (itry .le. 3) go to 20
425
c
426
c              %------------------------------------------------%
427
c              | Give up after several restart attempts.        |
428
c              | Set INFO to the size of the invariant subspace |
429
c              | which spans OP and exit.                       |
430
c              %------------------------------------------------%
431
c
432
               info = j - 1
433
               call second (t1)
434
               tnaitr = tnaitr + (t1 - t0)
435
               ido = 99
436
               go to 9000
437
            end if
438
c
439
   40    continue
440
c
441
c        %---------------------------------------------------------%
442
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
443
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
444
c        | when reciprocating a small RNORM, test against lower    |
445
c        | machine bound.                                          |
446
c        %---------------------------------------------------------%
447
c
448
         call dcopy (n, resid, 1, v(1,j), 1)
449
         if (rnorm .ge. unfl) then
450
             temp1 = one / rnorm
451
             call dscal (n, temp1, v(1,j), 1)
452
             call dscal (n, temp1, workd(ipj), 1)
453
         else
454
c
455
c            %-----------------------------------------%
456
c            | To scale both v_{j} and p_{j} carefully |
457
c            | use LAPACK routine SLASCL               |
458
c            %-----------------------------------------%
459
c
460
             call dlascl ('General', i, i, rnorm, one, n, 1,
461
     &                    v(1,j), n, infol)
462
             call dlascl ('General', i, i, rnorm, one, n, 1,
463
     &                    workd(ipj), n, infol)
464
         end if
465
c
466
c        %------------------------------------------------------%
467
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
468
c        | Note that this is not quite yet r_{j}. See STEP 4    |
469
c        %------------------------------------------------------%
470
c
471
         step3 = .true.
472
         nopx  = nopx + 1
473
         call second (t2)
474
         call dcopy (n, v(1,j), 1, workd(ivj), 1)
475
         ipntr(1) = ivj
476
         ipntr(2) = irj
477
         ipntr(3) = ipj
478
         ido = 1
479
c
480
c        %-----------------------------------%
481
c        | Exit in order to compute OP*v_{j} |
482
c        %-----------------------------------%
483
c
484
         go to 9000
485
   50    continue
486
c
487
c        %----------------------------------%
488
c        | Back from reverse communication; |
489
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   |
490
c        | if step3 = .true.                |
491
c        %----------------------------------%
492
c
493
         call second (t3)
494
         tmvopx = tmvopx + (t3 - t2)
495

496
         step3 = .false.
497
c
498
c        %------------------------------------------%
499
c        | Put another copy of OP*v_{j} into RESID. |
500
c        %------------------------------------------%
501
c
502
         call dcopy (n, workd(irj), 1, resid, 1)
503
c
504
c        %---------------------------------------%
505
c        | STEP 4:  Finish extending the Arnoldi |
506
c        |          factorization to length j.   |
507
c        %---------------------------------------%
508
c
509
         call second (t2)
510
         if (bmat .eq. 'G') then
511
            nbx = nbx + 1
512
            step4 = .true.
513
            ipntr(1) = irj
514
            ipntr(2) = ipj
515
            ido = 2
516
c
517
c           %-------------------------------------%
518
c           | Exit in order to compute B*OP*v_{j} |
519
c           %-------------------------------------%
520
c
521
            go to 9000
522
         else if (bmat .eq. 'I') then
523
            call dcopy (n, resid, 1, workd(ipj), 1)
524
         end if
525
   60    continue
526
c
527
c        %----------------------------------%
528
c        | Back from reverse communication; |
529
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
530
c        | if step4 = .true.                |
531
c        %----------------------------------%
532
c
533
         if (bmat .eq. 'G') then
534
            call second (t3)
535
            tmvbx = tmvbx + (t3 - t2)
536
         end if
537
c
538
         step4 = .false.
539
c
540
c        %-------------------------------------%
541
c        | The following is needed for STEP 5. |
542
c        | Compute the B-norm of OP*v_{j}.     |
543
c        %-------------------------------------%
544
c
545
         if (bmat .eq. 'G') then
546
             wnorm = ddot (n, resid, 1, workd(ipj), 1)
547
             wnorm = sqrt(abs(wnorm))
548
         else if (bmat .eq. 'I') then
549
            wnorm = dnrm2(n, resid, 1)
550
         end if
551
c
552
c        %-----------------------------------------%
553
c        | Compute the j-th residual corresponding |
554
c        | to the j step factorization.            |
555
c        | Use Classical Gram Schmidt and compute: |
556
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
557
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
558
c        %-----------------------------------------%
559
c
560
c
561
c        %------------------------------------------%
562
c        | Compute the j Fourier coefficients w_{j} |
563
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
564
c        %------------------------------------------%
565
c
566
         call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
567
     &               zero, h(1,j), 1)
568
c
569
c        %--------------------------------------%
570
c        | Orthogonalize r_{j} against V_{j}.   |
571
c        | RESID contains OP*v_{j}. See STEP 3. |
572
c        %--------------------------------------%
573
c
574
         call dgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
575
     &               one, resid, 1)
576
c
577
         if (j .gt. 1) h(j,j-1) = betaj
578
c
579
         call second (t4)
580
c
581
         orth1 = .true.
582
c
583
         call second (t2)
584
         if (bmat .eq. 'G') then
585
            nbx = nbx + 1
586
            call dcopy (n, resid, 1, workd(irj), 1)
587
            ipntr(1) = irj
588
            ipntr(2) = ipj
589
            ido = 2
590
c
591
c           %----------------------------------%
592
c           | Exit in order to compute B*r_{j} |
593
c           %----------------------------------%
594
c
595
            go to 9000
596
         else if (bmat .eq. 'I') then
597
            call dcopy (n, resid, 1, workd(ipj), 1)
598
         end if
599
   70    continue
600
c
601
c        %---------------------------------------------------%
602
c        | Back from reverse communication if ORTH1 = .true. |
603
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
604
c        %---------------------------------------------------%
605
c
606
         if (bmat .eq. 'G') then
607
            call second (t3)
608
            tmvbx = tmvbx + (t3 - t2)
609
         end if
610
c
611
         orth1 = .false.
612
c
613
c        %------------------------------%
614
c        | Compute the B-norm of r_{j}. |
615
c        %------------------------------%
616
c
617
         if (bmat .eq. 'G') then
618
            rnorm = ddot (n, resid, 1, workd(ipj), 1)
619
            rnorm = sqrt(abs(rnorm))
620
         else if (bmat .eq. 'I') then
621
            rnorm = dnrm2(n, resid, 1)
622
         end if
623
c
624
c        %-----------------------------------------------------------%
625
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
626
c        | Maximum NITER_ITREF tries.                                |
627
c        |                                                           |
628
c        |          s      = V_{j}^T * B * r_{j}                     |
629
c        |          r_{j}  = r_{j} - V_{j}*s                         |
630
c        |          alphaj = alphaj + s_{j}                          |
631
c        |                                                           |
632
c        | The stopping criteria used for iterative refinement is    |
633
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
634
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
635
c        | Determine if we need to correct the residual. The goal is |
636
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
637
c        | The following test determines whether the sine of the     |
638
c        | angle between  OP*x and the computed residual is less     |
639
c        | than or equal to 0.717.                                   |
640
c        %-----------------------------------------------------------%
641
c
642
         if (rnorm .gt. 0.717*wnorm) go to 100
643
         iter  = 0
644
         nrorth = nrorth + 1
645
c
646
c        %---------------------------------------------------%
647
c        | Enter the Iterative refinement phase. If further  |
648
c        | refinement is necessary, loop back here. The loop |
649
c        | variable is ITER. Perform a step of Classical     |
650
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
651
c        %---------------------------------------------------%
652
c
653
   80    continue
654
c
655
         if (msglvl .gt. 2) then
656
            xtemp(1) = wnorm
657
            xtemp(2) = rnorm
658
            call dvout (logfil, 2, xtemp, ndigit,
659
     &           '_naitr: re-orthonalization; wnorm and rnorm are')
660
            call dvout (logfil, j, h(1,j), ndigit,
661
     &                  '_naitr: j-th column of H')
662
         end if
663
c
664
c        %----------------------------------------------------%
665
c        | Compute V_{j}^T * B * r_{j}.                       |
666
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
667
c        %----------------------------------------------------%
668
c
669
         call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
670
     &               zero, workd(irj), 1)
671
c
672
c        %---------------------------------------------%
673
c        | Compute the correction to the residual:     |
674
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
675
c        | The correction to H is v(:,1:J)*H(1:J,1:J)  |
676
c        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         |
677
c        %---------------------------------------------%
678
c
679
         call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
680
     &               one, resid, 1)
681
         call daxpy (j, one, workd(irj), 1, h(1,j), 1)
682
c
683
         orth2 = .true.
684
         call second (t2)
685
         if (bmat .eq. 'G') then
686
            nbx = nbx + 1
687
            call dcopy (n, resid, 1, workd(irj), 1)
688
            ipntr(1) = irj
689
            ipntr(2) = ipj
690
            ido = 2
691
c
692
c           %-----------------------------------%
693
c           | Exit in order to compute B*r_{j}. |
694
c           | r_{j} is the corrected residual.  |
695
c           %-----------------------------------%
696
c
697
            go to 9000
698
         else if (bmat .eq. 'I') then
699
            call dcopy (n, resid, 1, workd(ipj), 1)
700
         end if
701
   90    continue
702
c
703
c        %---------------------------------------------------%
704
c        | Back from reverse communication if ORTH2 = .true. |
705
c        %---------------------------------------------------%
706
c
707
         if (bmat .eq. 'G') then
708
            call second (t3)
709
            tmvbx = tmvbx + (t3 - t2)
710
         end if
711
c
712
c        %-----------------------------------------------------%
713
c        | Compute the B-norm of the corrected residual r_{j}. |
714
c        %-----------------------------------------------------%
715
c
716
         if (bmat .eq. 'G') then
717
             rnorm1 = ddot (n, resid, 1, workd(ipj), 1)
718
             rnorm1 = sqrt(abs(rnorm1))
719
         else if (bmat .eq. 'I') then
720
             rnorm1 = dnrm2(n, resid, 1)
721
         end if
722
c
723
         if (msglvl .gt. 0 .and. iter .gt. 0) then
724
            call ivout (logfil, 1, [j], ndigit,
725
     &           '_naitr: Iterative refinement for Arnoldi residual')
726
            if (msglvl .gt. 2) then
727
                xtemp(1) = rnorm
728
                xtemp(2) = rnorm1
729
                call dvout (logfil, 2, xtemp, ndigit,
730
     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
731
            end if
732
         end if
733
c
734
c        %-----------------------------------------%
735
c        | Determine if we need to perform another |
736
c        | step of re-orthogonalization.           |
737
c        %-----------------------------------------%
738
c
739
         if (rnorm1 .gt. 0.717*rnorm) then
740
c
741
c           %---------------------------------------%
742
c           | No need for further refinement.       |
743
c           | The cosine of the angle between the   |
744
c           | corrected residual vector and the old |
745
c           | residual vector is greater than 0.717 |
746
c           | In other words the corrected residual |
747
c           | and the old residual vector share an  |
748
c           | angle of less than arcCOS(0.717)      |
749
c           %---------------------------------------%
750
c
751
            rnorm = rnorm1
752
c
753
         else
754
c
755
c           %-------------------------------------------%
756
c           | Another step of iterative refinement step |
757
c           | is required. NITREF is used by stat.h     |
758
c           %-------------------------------------------%
759
c
760
            nitref = nitref + 1
761
            rnorm  = rnorm1
762
            iter   = iter + 1
763
            if (iter .le. 1) go to 80
764
c
765
c           %-------------------------------------------------%
766
c           | Otherwise RESID is numerically in the span of V |
767
c           %-------------------------------------------------%
768
c
769
            do 95 jj = 1, n
770
               resid(jj) = zero
771
  95        continue
772
            rnorm = zero
773
         end if
774
c
775
c        %----------------------------------------------%
776
c        | Branch here directly if iterative refinement |
777
c        | wasn't necessary or after at most NITER_REF  |
778
c        | steps of iterative refinement.               |
779
c        %----------------------------------------------%
780
c
781
  100    continue
782
c
783
         rstart = .false.
784
         orth2  = .false.
785
c
786
         call second (t5)
787
         titref = titref + (t5 - t4)
788
c
789
c        %------------------------------------%
790
c        | STEP 6: Update  j = j+1;  Continue |
791
c        %------------------------------------%
792
c
793
         j = j + 1
794
         if (j .gt. k+np) then
795
            call second (t1)
796
            tnaitr = tnaitr + (t1 - t0)
797
            ido = 99
798
            do 110 i = max(1,k), k+np-1
799
c
800
c              %--------------------------------------------%
801
c              | Check for splitting and deflation.         |
802
c              | Use a standard test as in the QR algorithm |
803
c              | REFERENCE: LAPACK subroutine dlahqr        |
804
c              %--------------------------------------------%
805
c
806
               tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
807
               if( tst1.eq.zero )
808
     &              tst1 = dlanhs( '1', k+np, h, ldh, workd(n+1) )
809
               if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) )
810
     &              h(i+1,i) = zero
811
 110        continue
812
c
813
            if (msglvl .gt. 2) then
814
               call dmout (logfil, k+np, k+np, h, ldh, ndigit,
815
     &          '_naitr: Final upper Hessenberg matrix H of order K+NP')
816
            end if
817
c
818
            go to 9000
819
         end if
820
c
821
c        %--------------------------------------------------------%
822
c        | Loop back to extend the factorization by another step. |
823
c        %--------------------------------------------------------%
824
c
825
      go to 1000
826
c
827
c     %---------------------------------------------------------------%
828
c     |                                                               |
829
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
830
c     |                                                               |
831
c     %---------------------------------------------------------------%
832
c
833
 9000 continue
834
      return
835
c
836
c     %---------------%
837
c     | End of dnaitr |
838
c     %---------------%
839
c
840
      end
841

842
Product

Resources

Company