CoCalc -- cnaitr.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/cnaitr.f
⁵²¹⁵ views
1
c\BeginDoc
2
c
3
c\Name: cnaitr
4
c
5
c\Description:
6
c  Reverse communication interface for applying NP additional steps to
7
c  a K step nonsymmetric Arnoldi factorization.
8
c
9
c  Input:  OP*V_{k}  -  V_{k}*H = r_{k}*e_{k}^T
10
c
11
c          with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
12
c
13
c  Output: OP*V_{k+p}  -  V_{k+p}*H = r_{k+p}*e_{k+p}^T
14
c
15
c          with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
16
c
17
c  where OP and B are as in cnaupd.  The B-norm of r_{k+p} is also
18
c  computed and returned.
19
c
20
c\Usage:
21
c  call cnaitr
22
c     ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
23
c       IPNTR, WORKD, INFO )
24
c
25
c\Arguments
26
c  IDO     Integer.  (INPUT/OUTPUT)
27
c          Reverse communication flag.
28
c          -------------------------------------------------------------
29
c          IDO =  0: first call to the reverse communication interface
30
c          IDO = -1: compute  Y = OP * X  where
31
c                    IPNTR(1) is the pointer into WORK for X,
32
c                    IPNTR(2) is the pointer into WORK for Y.
33
c                    This is for the restart phase to force the new
34
c                    starting vector into the range of OP.
35
c          IDO =  1: compute  Y = OP * X  where
36
c                    IPNTR(1) is the pointer into WORK for X,
37
c                    IPNTR(2) is the pointer into WORK for Y,
38
c                    IPNTR(3) is the pointer into WORK for B * X.
39
c          IDO =  2: compute  Y = B * X  where
40
c                    IPNTR(1) is the pointer into WORK for X,
41
c                    IPNTR(2) is the pointer into WORK for Y.
42
c          IDO = 99: done
43
c          -------------------------------------------------------------
44
c          When the routine is used in the "shift-and-invert" mode, the
45
c          vector B * Q is already available and do not need to be
46
c          recomputed in forming OP * Q.
47
c
48
c  BMAT    Character*1.  (INPUT)
49
c          BMAT specifies the type of the matrix B that defines the
50
c          semi-inner product for the operator OP.  See cnaupd.
51
c          B = 'I' -> standard eigenvalue problem A*x = lambda*x
52
c          B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
53
c
54
c  N       Integer.  (INPUT)
55
c          Dimension of the eigenproblem.
56
c
57
c  K       Integer.  (INPUT)
58
c          Current size of V and H.
59
c
60
c  NP      Integer.  (INPUT)
61
c          Number of additional Arnoldi steps to take.
62
c
63
c  NB      Integer.  (INPUT)
64
c          Blocksize to be used in the recurrence.
65
c          Only work for NB = 1 right now.  The goal is to have a
66
c          program that implement both the block and non-block method.
67
c
68
c  RESID   Complex array of length N.  (INPUT/OUTPUT)
69
c          On INPUT:  RESID contains the residual vector r_{k}.
70
c          On OUTPUT: RESID contains the residual vector r_{k+p}.
71
c
72
c  RNORM   Real scalar.  (INPUT/OUTPUT)
73
c          B-norm of the starting residual on input.
74
c          B-norm of the updated residual r_{k+p} on output.
75
c
76
c  V       Complex N by K+NP array.  (INPUT/OUTPUT)
77
c          On INPUT:  V contains the Arnoldi vectors in the first K
78
c          columns.
79
c          On OUTPUT: V contains the new NP Arnoldi vectors in the next
80
c          NP columns.  The first K columns are unchanged.
81
c
82
c  LDV     Integer.  (INPUT)
83
c          Leading dimension of V exactly as declared in the calling
84
c          program.
85
c
86
c  H       Complex (K+NP) by (K+NP) array.  (INPUT/OUTPUT)
87
c          H is used to store the generated upper Hessenberg matrix.
88
c
89
c  LDH     Integer.  (INPUT)
90
c          Leading dimension of H exactly as declared in the calling
91
c          program.
92
c
93
c  IPNTR   Integer array of length 3.  (OUTPUT)
94
c          Pointer to mark the starting locations in the WORK for
95
c          vectors used by the Arnoldi iteration.
96
c          -------------------------------------------------------------
97
c          IPNTR(1): pointer to the current operand vector X.
98
c          IPNTR(2): pointer to the current result vector Y.
99
c          IPNTR(3): pointer to the vector B * X when used in the
100
c                    shift-and-invert mode.  X is the current operand.
101
c          -------------------------------------------------------------
102
c
103
c  WORKD   Complex work array of length 3*N.  (REVERSE COMMUNICATION)
104
c          Distributed array to be used in the basic Arnoldi iteration
105
c          for reverse communication.  The calling program should not
106
c          use WORKD as temporary workspace during the iteration !!!!!!
107
c          On input, WORKD(1:N) = B*RESID and is used to save some
108
c          computation at the first step.
109
c
110
c  INFO    Integer.  (OUTPUT)
111
c          = 0: Normal exit.
112
c          > 0: Size of the spanning invariant subspace of OP found.
113
c
114
c\EndDoc
115
c
116
c-----------------------------------------------------------------------
117
c
118
c\BeginLib
119
c
120
c\Local variables:
121
c     xxxxxx  Complex
122
c
123
c\References:
124
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
125
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
126
c     pp 357-385.
127
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
128
c     Restarted Arnoldi Iteration", Rice University Technical Report
129
c     TR95-13, Department of Computational and Applied Mathematics.
130
c
131
c\Routines called:
132
c     cgetv0  ARPACK routine to generate the initial vector.
133
c     ivout   ARPACK utility routine that prints integers.
134
c     second  ARPACK utility routine for timing.
135
c     cmout   ARPACK utility routine that prints matrices
136
c     cvout   ARPACK utility routine that prints vectors.
137
c     clanhs  LAPACK routine that computes various norms of a matrix.
138
c     clascl  LAPACK routine for careful scaling of a matrix.
139
c     slabad  LAPACK routine for defining the underflow and overflow
140
c             limits.
141
c     slamch  LAPACK routine that determines machine constants.
142
c     slapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
143
c     cgemv   Level 2 BLAS routine for matrix vector multiplication.
144
c     caxpy   Level 1 BLAS that computes a vector triad.
145
c     ccopy   Level 1 BLAS that copies one vector to another .
146
c     cdotc   Level 1 BLAS that computes the scalar product of two vectors.
147
c     cscal   Level 1 BLAS that scales a vector.
148
c     csscal  Level 1 BLAS that scales a complex vector by a real number.
149
c     scnrm2  Level 1 BLAS that computes the norm of a vector.
150
c
151
c\Author
152
c     Danny Sorensen               Phuong Vu
153
c     Richard Lehoucq              CRPC / Rice University
154
c     Dept. of Computational &     Houston, Texas
155
c     Applied Mathematics
156
c     Rice University
157
c     Houston, Texas
158
c
159
c\SCCS Information: @(#)
160
c FILE: naitr.F   SID: 2.3   DATE OF SID: 8/27/96   RELEASE: 2
161
c
162
c\Remarks
163
c  The algorithm implemented is:
164
c
165
c  restart = .false.
166
c  Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
167
c  r_{k} contains the initial residual vector even for k = 0;
168
c  Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
169
c  computed by the calling program.
170
c
171
c  betaj = rnorm ; p_{k+1} = B*r_{k} ;
172
c  For  j = k+1, ..., k+np  Do
173
c     1) if ( betaj < tol ) stop or restart depending on j.
174
c        ( At present tol is zero )
175
c        if ( restart ) generate a new starting vector.
176
c     2) v_{j} = r(j-1)/betaj;  V_{j} = [V_{j-1}, v_{j}];
177
c        p_{j} = p_{j}/betaj
178
c     3) r_{j} = OP*v_{j} where OP is defined as in cnaupd
179
c        For shift-invert mode p_{j} = B*v_{j} is already available.
180
c        wnorm = || OP*v_{j} ||
181
c     4) Compute the j-th step residual vector.
182
c        w_{j} =  V_{j}^T * B * OP * v_{j}
183
c        r_{j} =  OP*v_{j} - V_{j} * w_{j}
184
c        H(:,j) = w_{j};
185
c        H(j,j-1) = rnorm
186
c        rnorm = || r_(j) ||
187
c        If (rnorm > 0.717*wnorm) accept step and go back to 1)
188
c     5) Re-orthogonalization step:
189
c        s = V_{j}'*B*r_{j}
190
c        r_{j} = r_{j} - V_{j}*s;  rnorm1 = || r_{j} ||
191
c        alphaj = alphaj + s_{j};
192
c     6) Iterative refinement step:
193
c        If (rnorm1 > 0.717*rnorm) then
194
c           rnorm = rnorm1
195
c           accept step and go back to 1)
196
c        Else
197
c           rnorm = rnorm1
198
c           If this is the first time in step 6), go to 5)
199
c           Else r_{j} lies in the span of V_{j} numerically.
200
c              Set r_{j} = 0 and rnorm = 0; go to 1)
201
c        EndIf
202
c  End Do
203
c
204
c\EndLib
205
c
206
c-----------------------------------------------------------------------
207
c
208
      subroutine cnaitr
209
     &   (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
210
     &    ipntr, workd, info)
211
c
212
c     %----------------------------------------------------%
213
c     | Include files for debugging and timing information |
214
c     %----------------------------------------------------%
215
c
216
      include   'debug.h'
217
      include   'stat.h'
218
c
219
c     %------------------%
220
c     | Scalar Arguments |
221
c     %------------------%
222
c
223
      character  bmat*1
224
      integer    ido, info, k, ldh, ldv, n, nb, np
225
      Real
226
     &           rnorm
227
c
228
c     %-----------------%
229
c     | Array Arguments |
230
c     %-----------------%
231
c
232
      integer    ipntr(3)
233
      Complex
234
     &           h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
235
c
236
c     %------------%
237
c     | Parameters |
238
c     %------------%
239
c
240
      Complex
241
     &           one, zero
242
      Real
243
     &           rone, rzero
244
      parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
245
     &           rone = 1.0E+0, rzero = 0.0E+0)
246
c
247
c     %--------------%
248
c     | Local Arrays |
249
c     %--------------%
250
c
251
      Real
252
     &           rtemp(2)
253
c
254
c     %---------------%
255
c     | Local Scalars |
256
c     %---------------%
257
c
258
      logical    first, orth1, orth2, rstart, step3, step4
259
      integer    ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
260
     &           jj
261
      Real
262
     &           ovfl, smlnum, tst1, ulp, unfl, betaj,
263
     &           temp1, rnorm1, wnorm
264
      Complex
265
     &           cnorm
266
c
267
      save       first, orth1, orth2, rstart, step3, step4,
268
     &           ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
269
     &           betaj, rnorm1, smlnum, ulp, unfl, wnorm
270
c
271
c     %----------------------%
272
c     | External Subroutines |
273
c     %----------------------%
274
c
275
      external   caxpy, ccopy, cscal, csscal, cgemv, cgetv0,
276
     &           slabad, cvout, cmout, ivout, second
277
c
278
c     %--------------------%
279
c     | External Functions |
280
c     %--------------------%
281
c
282
      Complex
283
     &           cdotc
284
      Real
285
     &           slamch,  scnrm2, clanhs, slapy2
286
      external   cdotc, scnrm2, clanhs, slamch, slapy2
287
c
288
c     %---------------------%
289
c     | Intrinsic Functions |
290
c     %---------------------%
291
c
292
      intrinsic  aimag, real, max, sqrt
293
c
294
c     %-----------------%
295
c     | Data statements |
296
c     %-----------------%
297
c
298
      data       first / .true. /
299
c
300
c     %-----------------------%
301
c     | Executable Statements |
302
c     %-----------------------%
303
c
304
      if (first) then
305
c
306
c        %-----------------------------------------%
307
c        | Set machine-dependent constants for the |
308
c        | the splitting and deflation criterion.  |
309
c        | If norm(H) <= sqrt(OVFL),               |
310
c        | overflow should not occur.              |
311
c        | REFERENCE: LAPACK subroutine clahqr     |
312
c        %-----------------------------------------%
313
c
314
         unfl = slamch( 'safe minimum' )
315
         ovfl = real(one / unfl)
316
         call slabad( unfl, ovfl )
317
         ulp = slamch( 'precision' )
318
         smlnum = unfl*( n / ulp )
319
         first = .false.
320
      end if
321
c
322
      if (ido .eq. 0) then
323
c
324
c        %-------------------------------%
325
c        | Initialize timing statistics  |
326
c        | & message level for debugging |
327
c        %-------------------------------%
328
c
329
         call second (t0)
330
         msglvl = mcaitr
331
c
332
c        %------------------------------%
333
c        | Initial call to this routine |
334
c        %------------------------------%
335
c
336
         info   = 0
337
         step3  = .false.
338
         step4  = .false.
339
         rstart = .false.
340
         orth1  = .false.
341
         orth2  = .false.
342
         j      = k + 1
343
         ipj    = 1
344
         irj    = ipj   + n
345
         ivj    = irj   + n
346
      end if
347
c
348
c     %-------------------------------------------------%
349
c     | When in reverse communication mode one of:      |
350
c     | STEP3, STEP4, ORTH1, ORTH2, RSTART              |
351
c     | will be .true. when ....                        |
352
c     | STEP3: return from computing OP*v_{j}.          |
353
c     | STEP4: return from computing B-norm of OP*v_{j} |
354
c     | ORTH1: return from computing B-norm of r_{j+1}  |
355
c     | ORTH2: return from computing B-norm of          |
356
c     |        correction to the residual vector.       |
357
c     | RSTART: return from OP computations needed by   |
358
c     |         cgetv0.                                 |
359
c     %-------------------------------------------------%
360
c
361
      if (step3)  go to 50
362
      if (step4)  go to 60
363
      if (orth1)  go to 70
364
      if (orth2)  go to 90
365
      if (rstart) go to 30
366
c
367
c     %-----------------------------%
368
c     | Else this is the first step |
369
c     %-----------------------------%
370
c
371
c     %--------------------------------------------------------------%
372
c     |                                                              |
373
c     |        A R N O L D I     I T E R A T I O N     L O O P       |
374
c     |                                                              |
375
c     | Note:  B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
376
c     %--------------------------------------------------------------%
377

378
 1000 continue
379
c
380
         if (msglvl .gt. 1) then
381
            call ivout (logfil, 1, [j], ndigit,
382
     &                  '_naitr: generating Arnoldi vector number')
383
            call svout (logfil, 1, [rnorm], ndigit,
384
     &                  '_naitr: B-norm of the current residual is')
385
         end if
386
c
387
c        %---------------------------------------------------%
388
c        | STEP 1: Check if the B norm of j-th residual      |
389
c        | vector is zero. Equivalent to determine whether   |
390
c        | an exact j-step Arnoldi factorization is present. |
391
c        %---------------------------------------------------%
392
c
393
         betaj = rnorm
394
         if (rnorm .gt. rzero) go to 40
395
c
396
c           %---------------------------------------------------%
397
c           | Invariant subspace found, generate a new starting |
398
c           | vector which is orthogonal to the current Arnoldi |
399
c           | basis and continue the iteration.                 |
400
c           %---------------------------------------------------%
401
c
402
            if (msglvl .gt. 0) then
403
               call ivout (logfil, 1, [j], ndigit,
404
     &                     '_naitr: ****** RESTART AT STEP ******')
405
            end if
406
c
407
c           %---------------------------------------------%
408
c           | ITRY is the loop variable that controls the |
409
c           | maximum amount of times that a restart is   |
410
c           | attempted. NRSTRT is used by stat.h         |
411
c           %---------------------------------------------%
412
c
413
            betaj  = rzero
414
            nrstrt = nrstrt + 1
415
            itry   = 1
416
   20       continue
417
            rstart = .true.
418
            ido    = 0
419
   30       continue
420
c
421
c           %--------------------------------------%
422
c           | If in reverse communication mode and |
423
c           | RSTART = .true. flow returns here.   |
424
c           %--------------------------------------%
425
c
426
            call cgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
427
     &                   resid, rnorm, ipntr, workd, ierr)
428
            if (ido .ne. 99) go to 9000
429
            if (ierr .lt. 0) then
430
               itry = itry + 1
431
               if (itry .le. 3) go to 20
432
c
433
c              %------------------------------------------------%
434
c              | Give up after several restart attempts.        |
435
c              | Set INFO to the size of the invariant subspace |
436
c              | which spans OP and exit.                       |
437
c              %------------------------------------------------%
438
c
439
               info = j - 1
440
               call second (t1)
441
               tcaitr = tcaitr + (t1 - t0)
442
               ido = 99
443
               go to 9000
444
            end if
445
c
446
   40    continue
447
c
448
c        %---------------------------------------------------------%
449
c        | STEP 2:  v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm  |
450
c        | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
451
c        | when reciprocating a small RNORM, test against lower    |
452
c        | machine bound.                                          |
453
c        %---------------------------------------------------------%
454
c
455
         call ccopy (n, resid, 1, v(1,j), 1)
456
         if ( rnorm .ge. unfl) then
457
             temp1 = rone / rnorm
458
             call csscal (n, temp1, v(1,j), 1)
459
             call csscal (n, temp1, workd(ipj), 1)
460
         else
461
c
462
c            %-----------------------------------------%
463
c            | To scale both v_{j} and p_{j} carefully |
464
c            | use LAPACK routine clascl               |
465
c            %-----------------------------------------%
466
c
467
             call clascl ('General', i, i, rnorm, rone,
468
     &                    n, 1, v(1,j), n, infol)
469
             call clascl ('General', i, i, rnorm, rone,
470
     &                    n, 1, workd(ipj), n, infol)
471
         end if
472
c
473
c        %------------------------------------------------------%
474
c        | STEP 3:  r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
475
c        | Note that this is not quite yet r_{j}. See STEP 4    |
476
c        %------------------------------------------------------%
477
c
478
         step3 = .true.
479
         nopx  = nopx + 1
480
         call second (t2)
481
         call ccopy (n, v(1,j), 1, workd(ivj), 1)
482
         ipntr(1) = ivj
483
         ipntr(2) = irj
484
         ipntr(3) = ipj
485
         ido = 1
486
c
487
c        %-----------------------------------%
488
c        | Exit in order to compute OP*v_{j} |
489
c        %-----------------------------------%
490
c
491
         go to 9000
492
   50    continue
493
c
494
c        %----------------------------------%
495
c        | Back from reverse communication; |
496
c        | WORKD(IRJ:IRJ+N-1) := OP*v_{j}   |
497
c        | if step3 = .true.                |
498
c        %----------------------------------%
499
c
500
         call second (t3)
501
         tmvopx = tmvopx + (t3 - t2)
502

503
         step3 = .false.
504
c
505
c        %------------------------------------------%
506
c        | Put another copy of OP*v_{j} into RESID. |
507
c        %------------------------------------------%
508
c
509
         call ccopy (n, workd(irj), 1, resid, 1)
510
c
511
c        %---------------------------------------%
512
c        | STEP 4:  Finish extending the Arnoldi |
513
c        |          factorization to length j.   |
514
c        %---------------------------------------%
515
c
516
         call second (t2)
517
         if (bmat .eq. 'G') then
518
            nbx = nbx + 1
519
            step4 = .true.
520
            ipntr(1) = irj
521
            ipntr(2) = ipj
522
            ido = 2
523
c
524
c           %-------------------------------------%
525
c           | Exit in order to compute B*OP*v_{j} |
526
c           %-------------------------------------%
527
c
528
            go to 9000
529
         else if (bmat .eq. 'I') then
530
            call ccopy (n, resid, 1, workd(ipj), 1)
531
         end if
532
   60    continue
533
c
534
c        %----------------------------------%
535
c        | Back from reverse communication; |
536
c        | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
537
c        | if step4 = .true.                |
538
c        %----------------------------------%
539
c
540
         if (bmat .eq. 'G') then
541
            call second (t3)
542
            tmvbx = tmvbx + (t3 - t2)
543
         end if
544
c
545
         step4 = .false.
546
c
547
c        %-------------------------------------%
548
c        | The following is needed for STEP 5. |
549
c        | Compute the B-norm of OP*v_{j}.     |
550
c        %-------------------------------------%
551
c
552
         if (bmat .eq. 'G') then
553
             cnorm = cdotc (n, resid, 1, workd(ipj), 1)
554
             wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
555
         else if (bmat .eq. 'I') then
556
             wnorm = scnrm2(n, resid, 1)
557
         end if
558
c
559
c        %-----------------------------------------%
560
c        | Compute the j-th residual corresponding |
561
c        | to the j step factorization.            |
562
c        | Use Classical Gram Schmidt and compute: |
563
c        | w_{j} <-  V_{j}^T * B * OP * v_{j}      |
564
c        | r_{j} <-  OP*v_{j} - V_{j} * w_{j}      |
565
c        %-----------------------------------------%
566
c
567
c
568
c        %------------------------------------------%
569
c        | Compute the j Fourier coefficients w_{j} |
570
c        | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}.  |
571
c        %------------------------------------------%
572
c
573
         call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
574
     &               zero, h(1,j), 1)
575
c
576
c        %--------------------------------------%
577
c        | Orthogonalize r_{j} against V_{j}.   |
578
c        | RESID contains OP*v_{j}. See STEP 3. |
579
c        %--------------------------------------%
580
c
581
         call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
582
     &               one, resid, 1)
583
c
584
         if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero)
585
c
586
         call second (t4)
587
c
588
         orth1 = .true.
589
c
590
         call second (t2)
591
         if (bmat .eq. 'G') then
592
            nbx = nbx + 1
593
            call ccopy (n, resid, 1, workd(irj), 1)
594
            ipntr(1) = irj
595
            ipntr(2) = ipj
596
            ido = 2
597
c
598
c           %----------------------------------%
599
c           | Exit in order to compute B*r_{j} |
600
c           %----------------------------------%
601
c
602
            go to 9000
603
         else if (bmat .eq. 'I') then
604
            call ccopy (n, resid, 1, workd(ipj), 1)
605
         end if
606
   70    continue
607
c
608
c        %---------------------------------------------------%
609
c        | Back from reverse communication if ORTH1 = .true. |
610
c        | WORKD(IPJ:IPJ+N-1) := B*r_{j}.                    |
611
c        %---------------------------------------------------%
612
c
613
         if (bmat .eq. 'G') then
614
            call second (t3)
615
            tmvbx = tmvbx + (t3 - t2)
616
         end if
617
c
618
         orth1 = .false.
619
c
620
c        %------------------------------%
621
c        | Compute the B-norm of r_{j}. |
622
c        %------------------------------%
623
c
624
         if (bmat .eq. 'G') then
625
            cnorm = cdotc (n, resid, 1, workd(ipj), 1)
626
            rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
627
         else if (bmat .eq. 'I') then
628
            rnorm = scnrm2(n, resid, 1)
629
         end if
630
c
631
c        %-----------------------------------------------------------%
632
c        | STEP 5: Re-orthogonalization / Iterative refinement phase |
633
c        | Maximum NITER_ITREF tries.                                |
634
c        |                                                           |
635
c        |          s      = V_{j}^T * B * r_{j}                     |
636
c        |          r_{j}  = r_{j} - V_{j}*s                         |
637
c        |          alphaj = alphaj + s_{j}                          |
638
c        |                                                           |
639
c        | The stopping criteria used for iterative refinement is    |
640
c        | discussed in Parlett's book SEP, page 107 and in Gragg &  |
641
c        | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990.         |
642
c        | Determine if we need to correct the residual. The goal is |
643
c        | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} ||  |
644
c        | The following test determines whether the sine of the     |
645
c        | angle between  OP*x and the computed residual is less     |
646
c        | than or equal to 0.717.                                   |
647
c        %-----------------------------------------------------------%
648
c
649
         if ( rnorm .gt. 0.717*wnorm ) go to 100
650
c
651
         iter  = 0
652
         nrorth = nrorth + 1
653
c
654
c        %---------------------------------------------------%
655
c        | Enter the Iterative refinement phase. If further  |
656
c        | refinement is necessary, loop back here. The loop |
657
c        | variable is ITER. Perform a step of Classical     |
658
c        | Gram-Schmidt using all the Arnoldi vectors V_{j}  |
659
c        %---------------------------------------------------%
660
c
661
   80    continue
662
c
663
         if (msglvl .gt. 2) then
664
            rtemp(1) = wnorm
665
            rtemp(2) = rnorm
666
            call svout (logfil, 2, rtemp, ndigit,
667
     &      '_naitr: re-orthogonalization; wnorm and rnorm are')
668
            call cvout (logfil, j, h(1,j), ndigit,
669
     &                  '_naitr: j-th column of H')
670
         end if
671
c
672
c        %----------------------------------------------------%
673
c        | Compute V_{j}^T * B * r_{j}.                       |
674
c        | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
675
c        %----------------------------------------------------%
676
c
677
         call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
678
     &               zero, workd(irj), 1)
679
c
680
c        %---------------------------------------------%
681
c        | Compute the correction to the residual:     |
682
c        | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
683
c        | The correction to H is v(:,1:J)*H(1:J,1:J)  |
684
c        | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j.         |
685
c        %---------------------------------------------%
686
c
687
         call cgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
688
     &               one, resid, 1)
689
         call caxpy (j, one, workd(irj), 1, h(1,j), 1)
690
c
691
         orth2 = .true.
692
         call second (t2)
693
         if (bmat .eq. 'G') then
694
            nbx = nbx + 1
695
            call ccopy (n, resid, 1, workd(irj), 1)
696
            ipntr(1) = irj
697
            ipntr(2) = ipj
698
            ido = 2
699
c
700
c           %-----------------------------------%
701
c           | Exit in order to compute B*r_{j}. |
702
c           | r_{j} is the corrected residual.  |
703
c           %-----------------------------------%
704
c
705
            go to 9000
706
         else if (bmat .eq. 'I') then
707
            call ccopy (n, resid, 1, workd(ipj), 1)
708
         end if
709
   90    continue
710
c
711
c        %---------------------------------------------------%
712
c        | Back from reverse communication if ORTH2 = .true. |
713
c        %---------------------------------------------------%
714
c
715
         if (bmat .eq. 'G') then
716
            call second (t3)
717
            tmvbx = tmvbx + (t3 - t2)
718
         end if
719
c
720
c        %-----------------------------------------------------%
721
c        | Compute the B-norm of the corrected residual r_{j}. |
722
c        %-----------------------------------------------------%
723
c
724
         if (bmat .eq. 'G') then
725
             cnorm  = cdotc (n, resid, 1, workd(ipj), 1)
726
             rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
727
         else if (bmat .eq. 'I') then
728
             rnorm1 = scnrm2(n, resid, 1)
729
         end if
730
c
731
         if (msglvl .gt. 0 .and. iter .gt. 0 ) then
732
            call ivout (logfil, 1, [j], ndigit,
733
     &           '_naitr: Iterative refinement for Arnoldi residual')
734
            if (msglvl .gt. 2) then
735
                rtemp(1) = rnorm
736
                rtemp(2) = rnorm1
737
                call svout (logfil, 2, rtemp, ndigit,
738
     &           '_naitr: iterative refinement ; rnorm and rnorm1 are')
739
            end if
740
         end if
741
c
742
c        %-----------------------------------------%
743
c        | Determine if we need to perform another |
744
c        | step of re-orthogonalization.           |
745
c        %-----------------------------------------%
746
c
747
         if ( rnorm1 .gt. 0.717*rnorm ) then
748
c
749
c           %---------------------------------------%
750
c           | No need for further refinement.       |
751
c           | The cosine of the angle between the   |
752
c           | corrected residual vector and the old |
753
c           | residual vector is greater than 0.717 |
754
c           | In other words the corrected residual |
755
c           | and the old residual vector share an  |
756
c           | angle of less than arcCOS(0.717)      |
757
c           %---------------------------------------%
758
c
759
            rnorm = rnorm1
760
c
761
         else
762
c
763
c           %-------------------------------------------%
764
c           | Another step of iterative refinement step |
765
c           | is required. NITREF is used by stat.h     |
766
c           %-------------------------------------------%
767
c
768
            nitref = nitref + 1
769
            rnorm  = rnorm1
770
            iter   = iter + 1
771
            if (iter .le. 1) go to 80
772
c
773
c           %-------------------------------------------------%
774
c           | Otherwise RESID is numerically in the span of V |
775
c           %-------------------------------------------------%
776
c
777
            do 95 jj = 1, n
778
               resid(jj) = zero
779
  95        continue
780
            rnorm = rzero
781
         end if
782
c
783
c        %----------------------------------------------%
784
c        | Branch here directly if iterative refinement |
785
c        | wasn't necessary or after at most NITER_REF  |
786
c        | steps of iterative refinement.               |
787
c        %----------------------------------------------%
788
c
789
  100    continue
790
c
791
         rstart = .false.
792
         orth2  = .false.
793
c
794
         call second (t5)
795
         titref = titref + (t5 - t4)
796
c
797
c        %------------------------------------%
798
c        | STEP 6: Update  j = j+1;  Continue |
799
c        %------------------------------------%
800
c
801
         j = j + 1
802
         if (j .gt. k+np) then
803
            call second (t1)
804
            tcaitr = tcaitr + (t1 - t0)
805
            ido = 99
806
            do 110 i = max(1,k), k+np-1
807
c
808
c              %--------------------------------------------%
809
c              | Check for splitting and deflation.         |
810
c              | Use a standard test as in the QR algorithm |
811
c              | REFERENCE: LAPACK subroutine clahqr        |
812
c              %--------------------------------------------%
813
c
814
               tst1 = slapy2(real(h(i,i)),aimag(h(i,i)))
815
     &              + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1)))
816
               if( tst1.eq.real(zero) )
817
     &              tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) )
818
               if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le.
819
     &                    max( ulp*tst1, smlnum ) )
820
     &             h(i+1,i) = zero
821
 110        continue
822
c
823
            if (msglvl .gt. 2) then
824
               call cmout (logfil, k+np, k+np, h, ldh, ndigit,
825
     &          '_naitr: Final upper Hessenberg matrix H of order K+NP')
826
            end if
827
c
828
            go to 9000
829
         end if
830
c
831
c        %--------------------------------------------------------%
832
c        | Loop back to extend the factorization by another step. |
833
c        %--------------------------------------------------------%
834
c
835
      go to 1000
836
c
837
c     %---------------------------------------------------------------%
838
c     |                                                               |
839
c     |  E N D     O F     M A I N     I T E R A T I O N     L O O P  |
840
c     |                                                               |
841
c     %---------------------------------------------------------------%
842
c
843
 9000 continue
844
      return
845
c
846
c     %---------------%
847
c     | End of cnaitr |
848
c     %---------------%
849
c
850
      end
851

852
Product

Resources

Company