Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/cnaitr.f
5215 views
1
c\BeginDoc
2
c
3
c\Name: cnaitr
4
c
5
c\Description:
6
c Reverse communication interface for applying NP additional steps to
7
c a K step nonsymmetric Arnoldi factorization.
8
c
9
c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T
10
c
11
c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
12
c
13
c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T
14
c
15
c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
16
c
17
c where OP and B are as in cnaupd. The B-norm of r_{k+p} is also
18
c computed and returned.
19
c
20
c\Usage:
21
c call cnaitr
22
c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
23
c IPNTR, WORKD, INFO )
24
c
25
c\Arguments
26
c IDO Integer. (INPUT/OUTPUT)
27
c Reverse communication flag.
28
c -------------------------------------------------------------
29
c IDO = 0: first call to the reverse communication interface
30
c IDO = -1: compute Y = OP * X where
31
c IPNTR(1) is the pointer into WORK for X,
32
c IPNTR(2) is the pointer into WORK for Y.
33
c This is for the restart phase to force the new
34
c starting vector into the range of OP.
35
c IDO = 1: compute Y = OP * X where
36
c IPNTR(1) is the pointer into WORK for X,
37
c IPNTR(2) is the pointer into WORK for Y,
38
c IPNTR(3) is the pointer into WORK for B * X.
39
c IDO = 2: compute Y = B * X where
40
c IPNTR(1) is the pointer into WORK for X,
41
c IPNTR(2) is the pointer into WORK for Y.
42
c IDO = 99: done
43
c -------------------------------------------------------------
44
c When the routine is used in the "shift-and-invert" mode, the
45
c vector B * Q is already available and do not need to be
46
c recomputed in forming OP * Q.
47
c
48
c BMAT Character*1. (INPUT)
49
c BMAT specifies the type of the matrix B that defines the
50
c semi-inner product for the operator OP. See cnaupd.
51
c B = 'I' -> standard eigenvalue problem A*x = lambda*x
52
c B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
53
c
54
c N Integer. (INPUT)
55
c Dimension of the eigenproblem.
56
c
57
c K Integer. (INPUT)
58
c Current size of V and H.
59
c
60
c NP Integer. (INPUT)
61
c Number of additional Arnoldi steps to take.
62
c
63
c NB Integer. (INPUT)
64
c Blocksize to be used in the recurrence.
65
c Only work for NB = 1 right now. The goal is to have a
66
c program that implement both the block and non-block method.
67
c
68
c RESID Complex array of length N. (INPUT/OUTPUT)
69
c On INPUT: RESID contains the residual vector r_{k}.
70
c On OUTPUT: RESID contains the residual vector r_{k+p}.
71
c
72
c RNORM Real scalar. (INPUT/OUTPUT)
73
c B-norm of the starting residual on input.
74
c B-norm of the updated residual r_{k+p} on output.
75
c
76
c V Complex N by K+NP array. (INPUT/OUTPUT)
77
c On INPUT: V contains the Arnoldi vectors in the first K
78
c columns.
79
c On OUTPUT: V contains the new NP Arnoldi vectors in the next
80
c NP columns. The first K columns are unchanged.
81
c
82
c LDV Integer. (INPUT)
83
c Leading dimension of V exactly as declared in the calling
84
c program.
85
c
86
c H Complex (K+NP) by (K+NP) array. (INPUT/OUTPUT)
87
c H is used to store the generated upper Hessenberg matrix.
88
c
89
c LDH Integer. (INPUT)
90
c Leading dimension of H exactly as declared in the calling
91
c program.
92
c
93
c IPNTR Integer array of length 3. (OUTPUT)
94
c Pointer to mark the starting locations in the WORK for
95
c vectors used by the Arnoldi iteration.
96
c -------------------------------------------------------------
97
c IPNTR(1): pointer to the current operand vector X.
98
c IPNTR(2): pointer to the current result vector Y.
99
c IPNTR(3): pointer to the vector B * X when used in the
100
c shift-and-invert mode. X is the current operand.
101
c -------------------------------------------------------------
102
c
103
c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION)
104
c Distributed array to be used in the basic Arnoldi iteration
105
c for reverse communication. The calling program should not
106
c use WORKD as temporary workspace during the iteration !!!!!!
107
c On input, WORKD(1:N) = B*RESID and is used to save some
108
c computation at the first step.
109
c
110
c INFO Integer. (OUTPUT)
111
c = 0: Normal exit.
112
c > 0: Size of the spanning invariant subspace of OP found.
113
c
114
c\EndDoc
115
c
116
c-----------------------------------------------------------------------
117
c
118
c\BeginLib
119
c
120
c\Local variables:
121
c xxxxxx Complex
122
c
123
c\References:
124
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
125
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
126
c pp 357-385.
127
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
128
c Restarted Arnoldi Iteration", Rice University Technical Report
129
c TR95-13, Department of Computational and Applied Mathematics.
130
c
131
c\Routines called:
132
c cgetv0 ARPACK routine to generate the initial vector.
133
c ivout ARPACK utility routine that prints integers.
134
c second ARPACK utility routine for timing.
135
c cmout ARPACK utility routine that prints matrices
136
c cvout ARPACK utility routine that prints vectors.
137
c clanhs LAPACK routine that computes various norms of a matrix.
138
c clascl LAPACK routine for careful scaling of a matrix.
139
c slabad LAPACK routine for defining the underflow and overflow
140
c limits.
141
c slamch LAPACK routine that determines machine constants.
142
c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
143
c cgemv Level 2 BLAS routine for matrix vector multiplication.
144
c caxpy Level 1 BLAS that computes a vector triad.
145
c ccopy Level 1 BLAS that copies one vector to another .
146
c cdotc Level 1 BLAS that computes the scalar product of two vectors.
147
c cscal Level 1 BLAS that scales a vector.
148
c csscal Level 1 BLAS that scales a complex vector by a real number.
149
c scnrm2 Level 1 BLAS that computes the norm of a vector.
150
c
151
c\Author
152
c Danny Sorensen Phuong Vu
153
c Richard Lehoucq CRPC / Rice University
154
c Dept. of Computational & Houston, Texas
155
c Applied Mathematics
156
c Rice University
157
c Houston, Texas
158
c
159
c\SCCS Information: @(#)
160
c FILE: naitr.F SID: 2.3 DATE OF SID: 8/27/96 RELEASE: 2
161
c
162
c\Remarks
163
c The algorithm implemented is:
164
c
165
c restart = .false.
166
c Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
167
c r_{k} contains the initial residual vector even for k = 0;
168
c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
169
c computed by the calling program.
170
c
171
c betaj = rnorm ; p_{k+1} = B*r_{k} ;
172
c For j = k+1, ..., k+np Do
173
c 1) if ( betaj < tol ) stop or restart depending on j.
174
c ( At present tol is zero )
175
c if ( restart ) generate a new starting vector.
176
c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}];
177
c p_{j} = p_{j}/betaj
178
c 3) r_{j} = OP*v_{j} where OP is defined as in cnaupd
179
c For shift-invert mode p_{j} = B*v_{j} is already available.
180
c wnorm = || OP*v_{j} ||
181
c 4) Compute the j-th step residual vector.
182
c w_{j} = V_{j}^T * B * OP * v_{j}
183
c r_{j} = OP*v_{j} - V_{j} * w_{j}
184
c H(:,j) = w_{j};
185
c H(j,j-1) = rnorm
186
c rnorm = || r_(j) ||
187
c If (rnorm > 0.717*wnorm) accept step and go back to 1)
188
c 5) Re-orthogonalization step:
189
c s = V_{j}'*B*r_{j}
190
c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} ||
191
c alphaj = alphaj + s_{j};
192
c 6) Iterative refinement step:
193
c If (rnorm1 > 0.717*rnorm) then
194
c rnorm = rnorm1
195
c accept step and go back to 1)
196
c Else
197
c rnorm = rnorm1
198
c If this is the first time in step 6), go to 5)
199
c Else r_{j} lies in the span of V_{j} numerically.
200
c Set r_{j} = 0 and rnorm = 0; go to 1)
201
c EndIf
202
c End Do
203
c
204
c\EndLib
205
c
206
c-----------------------------------------------------------------------
207
c
208
subroutine cnaitr
209
& (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
210
& ipntr, workd, info)
211
c
212
c %----------------------------------------------------%
213
c | Include files for debugging and timing information |
214
c %----------------------------------------------------%
215
c
216
include 'debug.h'
217
include 'stat.h'
218
c
219
c %------------------%
220
c | Scalar Arguments |
221
c %------------------%
222
c
223
character bmat*1
224
integer ido, info, k, ldh, ldv, n, nb, np
225
Real
226
& rnorm
227
c
228
c %-----------------%
229
c | Array Arguments |
230
c %-----------------%
231
c
232
integer ipntr(3)
233
Complex
234
& h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
235
c
236
c %------------%
237
c | Parameters |
238
c %------------%
239
c
240
Complex
241
& one, zero
242
Real
243
& rone, rzero
244
parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
245
& rone = 1.0E+0, rzero = 0.0E+0)
246
c
247
c %--------------%
248
c | Local Arrays |
249
c %--------------%
250
c
251
Real
252
& rtemp(2)
253
c
254
c %---------------%
255
c | Local Scalars |
256
c %---------------%
257
c
258
logical first, orth1, orth2, rstart, step3, step4
259
integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
260
& jj
261
Real
262
& ovfl, smlnum, tst1, ulp, unfl, betaj,
263
& temp1, rnorm1, wnorm
264
Complex
265
& cnorm
266
c
267
save first, orth1, orth2, rstart, step3, step4,
268
& ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
269
& betaj, rnorm1, smlnum, ulp, unfl, wnorm
270
c
271
c %----------------------%
272
c | External Subroutines |
273
c %----------------------%
274
c
275
external caxpy, ccopy, cscal, csscal, cgemv, cgetv0,
276
& slabad, cvout, cmout, ivout, second
277
c
278
c %--------------------%
279
c | External Functions |
280
c %--------------------%
281
c
282
Complex
283
& cdotc
284
Real
285
& slamch, scnrm2, clanhs, slapy2
286
external cdotc, scnrm2, clanhs, slamch, slapy2
287
c
288
c %---------------------%
289
c | Intrinsic Functions |
290
c %---------------------%
291
c
292
intrinsic aimag, real, max, sqrt
293
c
294
c %-----------------%
295
c | Data statements |
296
c %-----------------%
297
c
298
data first / .true. /
299
c
300
c %-----------------------%
301
c | Executable Statements |
302
c %-----------------------%
303
c
304
if (first) then
305
c
306
c %-----------------------------------------%
307
c | Set machine-dependent constants for the |
308
c | the splitting and deflation criterion. |
309
c | If norm(H) <= sqrt(OVFL), |
310
c | overflow should not occur. |
311
c | REFERENCE: LAPACK subroutine clahqr |
312
c %-----------------------------------------%
313
c
314
unfl = slamch( 'safe minimum' )
315
ovfl = real(one / unfl)
316
call slabad( unfl, ovfl )
317
ulp = slamch( 'precision' )
318
smlnum = unfl*( n / ulp )
319
first = .false.
320
end if
321
c
322
if (ido .eq. 0) then
323
c
324
c %-------------------------------%
325
c | Initialize timing statistics |
326
c | & message level for debugging |
327
c %-------------------------------%
328
c
329
call second (t0)
330
msglvl = mcaitr
331
c
332
c %------------------------------%
333
c | Initial call to this routine |
334
c %------------------------------%
335
c
336
info = 0
337
step3 = .false.
338
step4 = .false.
339
rstart = .false.
340
orth1 = .false.
341
orth2 = .false.
342
j = k + 1
343
ipj = 1
344
irj = ipj + n
345
ivj = irj + n
346
end if
347
c
348
c %-------------------------------------------------%
349
c | When in reverse communication mode one of: |
350
c | STEP3, STEP4, ORTH1, ORTH2, RSTART |
351
c | will be .true. when .... |
352
c | STEP3: return from computing OP*v_{j}. |
353
c | STEP4: return from computing B-norm of OP*v_{j} |
354
c | ORTH1: return from computing B-norm of r_{j+1} |
355
c | ORTH2: return from computing B-norm of |
356
c | correction to the residual vector. |
357
c | RSTART: return from OP computations needed by |
358
c | cgetv0. |
359
c %-------------------------------------------------%
360
c
361
if (step3) go to 50
362
if (step4) go to 60
363
if (orth1) go to 70
364
if (orth2) go to 90
365
if (rstart) go to 30
366
c
367
c %-----------------------------%
368
c | Else this is the first step |
369
c %-----------------------------%
370
c
371
c %--------------------------------------------------------------%
372
c | |
373
c | A R N O L D I I T E R A T I O N L O O P |
374
c | |
375
c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
376
c %--------------------------------------------------------------%
377
378
1000 continue
379
c
380
if (msglvl .gt. 1) then
381
call ivout (logfil, 1, [j], ndigit,
382
& '_naitr: generating Arnoldi vector number')
383
call svout (logfil, 1, [rnorm], ndigit,
384
& '_naitr: B-norm of the current residual is')
385
end if
386
c
387
c %---------------------------------------------------%
388
c | STEP 1: Check if the B norm of j-th residual |
389
c | vector is zero. Equivalent to determine whether |
390
c | an exact j-step Arnoldi factorization is present. |
391
c %---------------------------------------------------%
392
c
393
betaj = rnorm
394
if (rnorm .gt. rzero) go to 40
395
c
396
c %---------------------------------------------------%
397
c | Invariant subspace found, generate a new starting |
398
c | vector which is orthogonal to the current Arnoldi |
399
c | basis and continue the iteration. |
400
c %---------------------------------------------------%
401
c
402
if (msglvl .gt. 0) then
403
call ivout (logfil, 1, [j], ndigit,
404
& '_naitr: ****** RESTART AT STEP ******')
405
end if
406
c
407
c %---------------------------------------------%
408
c | ITRY is the loop variable that controls the |
409
c | maximum amount of times that a restart is |
410
c | attempted. NRSTRT is used by stat.h |
411
c %---------------------------------------------%
412
c
413
betaj = rzero
414
nrstrt = nrstrt + 1
415
itry = 1
416
20 continue
417
rstart = .true.
418
ido = 0
419
30 continue
420
c
421
c %--------------------------------------%
422
c | If in reverse communication mode and |
423
c | RSTART = .true. flow returns here. |
424
c %--------------------------------------%
425
c
426
call cgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
427
& resid, rnorm, ipntr, workd, ierr)
428
if (ido .ne. 99) go to 9000
429
if (ierr .lt. 0) then
430
itry = itry + 1
431
if (itry .le. 3) go to 20
432
c
433
c %------------------------------------------------%
434
c | Give up after several restart attempts. |
435
c | Set INFO to the size of the invariant subspace |
436
c | which spans OP and exit. |
437
c %------------------------------------------------%
438
c
439
info = j - 1
440
call second (t1)
441
tcaitr = tcaitr + (t1 - t0)
442
ido = 99
443
go to 9000
444
end if
445
c
446
40 continue
447
c
448
c %---------------------------------------------------------%
449
c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm |
450
c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
451
c | when reciprocating a small RNORM, test against lower |
452
c | machine bound. |
453
c %---------------------------------------------------------%
454
c
455
call ccopy (n, resid, 1, v(1,j), 1)
456
if ( rnorm .ge. unfl) then
457
temp1 = rone / rnorm
458
call csscal (n, temp1, v(1,j), 1)
459
call csscal (n, temp1, workd(ipj), 1)
460
else
461
c
462
c %-----------------------------------------%
463
c | To scale both v_{j} and p_{j} carefully |
464
c | use LAPACK routine clascl |
465
c %-----------------------------------------%
466
c
467
call clascl ('General', i, i, rnorm, rone,
468
& n, 1, v(1,j), n, infol)
469
call clascl ('General', i, i, rnorm, rone,
470
& n, 1, workd(ipj), n, infol)
471
end if
472
c
473
c %------------------------------------------------------%
474
c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
475
c | Note that this is not quite yet r_{j}. See STEP 4 |
476
c %------------------------------------------------------%
477
c
478
step3 = .true.
479
nopx = nopx + 1
480
call second (t2)
481
call ccopy (n, v(1,j), 1, workd(ivj), 1)
482
ipntr(1) = ivj
483
ipntr(2) = irj
484
ipntr(3) = ipj
485
ido = 1
486
c
487
c %-----------------------------------%
488
c | Exit in order to compute OP*v_{j} |
489
c %-----------------------------------%
490
c
491
go to 9000
492
50 continue
493
c
494
c %----------------------------------%
495
c | Back from reverse communication; |
496
c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} |
497
c | if step3 = .true. |
498
c %----------------------------------%
499
c
500
call second (t3)
501
tmvopx = tmvopx + (t3 - t2)
502
503
step3 = .false.
504
c
505
c %------------------------------------------%
506
c | Put another copy of OP*v_{j} into RESID. |
507
c %------------------------------------------%
508
c
509
call ccopy (n, workd(irj), 1, resid, 1)
510
c
511
c %---------------------------------------%
512
c | STEP 4: Finish extending the Arnoldi |
513
c | factorization to length j. |
514
c %---------------------------------------%
515
c
516
call second (t2)
517
if (bmat .eq. 'G') then
518
nbx = nbx + 1
519
step4 = .true.
520
ipntr(1) = irj
521
ipntr(2) = ipj
522
ido = 2
523
c
524
c %-------------------------------------%
525
c | Exit in order to compute B*OP*v_{j} |
526
c %-------------------------------------%
527
c
528
go to 9000
529
else if (bmat .eq. 'I') then
530
call ccopy (n, resid, 1, workd(ipj), 1)
531
end if
532
60 continue
533
c
534
c %----------------------------------%
535
c | Back from reverse communication; |
536
c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
537
c | if step4 = .true. |
538
c %----------------------------------%
539
c
540
if (bmat .eq. 'G') then
541
call second (t3)
542
tmvbx = tmvbx + (t3 - t2)
543
end if
544
c
545
step4 = .false.
546
c
547
c %-------------------------------------%
548
c | The following is needed for STEP 5. |
549
c | Compute the B-norm of OP*v_{j}. |
550
c %-------------------------------------%
551
c
552
if (bmat .eq. 'G') then
553
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
554
wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
555
else if (bmat .eq. 'I') then
556
wnorm = scnrm2(n, resid, 1)
557
end if
558
c
559
c %-----------------------------------------%
560
c | Compute the j-th residual corresponding |
561
c | to the j step factorization. |
562
c | Use Classical Gram Schmidt and compute: |
563
c | w_{j} <- V_{j}^T * B * OP * v_{j} |
564
c | r_{j} <- OP*v_{j} - V_{j} * w_{j} |
565
c %-----------------------------------------%
566
c
567
c
568
c %------------------------------------------%
569
c | Compute the j Fourier coefficients w_{j} |
570
c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. |
571
c %------------------------------------------%
572
c
573
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
574
& zero, h(1,j), 1)
575
c
576
c %--------------------------------------%
577
c | Orthogonalize r_{j} against V_{j}. |
578
c | RESID contains OP*v_{j}. See STEP 3. |
579
c %--------------------------------------%
580
c
581
call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
582
& one, resid, 1)
583
c
584
if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero)
585
c
586
call second (t4)
587
c
588
orth1 = .true.
589
c
590
call second (t2)
591
if (bmat .eq. 'G') then
592
nbx = nbx + 1
593
call ccopy (n, resid, 1, workd(irj), 1)
594
ipntr(1) = irj
595
ipntr(2) = ipj
596
ido = 2
597
c
598
c %----------------------------------%
599
c | Exit in order to compute B*r_{j} |
600
c %----------------------------------%
601
c
602
go to 9000
603
else if (bmat .eq. 'I') then
604
call ccopy (n, resid, 1, workd(ipj), 1)
605
end if
606
70 continue
607
c
608
c %---------------------------------------------------%
609
c | Back from reverse communication if ORTH1 = .true. |
610
c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. |
611
c %---------------------------------------------------%
612
c
613
if (bmat .eq. 'G') then
614
call second (t3)
615
tmvbx = tmvbx + (t3 - t2)
616
end if
617
c
618
orth1 = .false.
619
c
620
c %------------------------------%
621
c | Compute the B-norm of r_{j}. |
622
c %------------------------------%
623
c
624
if (bmat .eq. 'G') then
625
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
626
rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
627
else if (bmat .eq. 'I') then
628
rnorm = scnrm2(n, resid, 1)
629
end if
630
c
631
c %-----------------------------------------------------------%
632
c | STEP 5: Re-orthogonalization / Iterative refinement phase |
633
c | Maximum NITER_ITREF tries. |
634
c | |
635
c | s = V_{j}^T * B * r_{j} |
636
c | r_{j} = r_{j} - V_{j}*s |
637
c | alphaj = alphaj + s_{j} |
638
c | |
639
c | The stopping criteria used for iterative refinement is |
640
c | discussed in Parlett's book SEP, page 107 and in Gragg & |
641
c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. |
642
c | Determine if we need to correct the residual. The goal is |
643
c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || |
644
c | The following test determines whether the sine of the |
645
c | angle between OP*x and the computed residual is less |
646
c | than or equal to 0.717. |
647
c %-----------------------------------------------------------%
648
c
649
if ( rnorm .gt. 0.717*wnorm ) go to 100
650
c
651
iter = 0
652
nrorth = nrorth + 1
653
c
654
c %---------------------------------------------------%
655
c | Enter the Iterative refinement phase. If further |
656
c | refinement is necessary, loop back here. The loop |
657
c | variable is ITER. Perform a step of Classical |
658
c | Gram-Schmidt using all the Arnoldi vectors V_{j} |
659
c %---------------------------------------------------%
660
c
661
80 continue
662
c
663
if (msglvl .gt. 2) then
664
rtemp(1) = wnorm
665
rtemp(2) = rnorm
666
call svout (logfil, 2, rtemp, ndigit,
667
& '_naitr: re-orthogonalization; wnorm and rnorm are')
668
call cvout (logfil, j, h(1,j), ndigit,
669
& '_naitr: j-th column of H')
670
end if
671
c
672
c %----------------------------------------------------%
673
c | Compute V_{j}^T * B * r_{j}. |
674
c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
675
c %----------------------------------------------------%
676
c
677
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
678
& zero, workd(irj), 1)
679
c
680
c %---------------------------------------------%
681
c | Compute the correction to the residual: |
682
c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
683
c | The correction to H is v(:,1:J)*H(1:J,1:J) |
684
c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. |
685
c %---------------------------------------------%
686
c
687
call cgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
688
& one, resid, 1)
689
call caxpy (j, one, workd(irj), 1, h(1,j), 1)
690
c
691
orth2 = .true.
692
call second (t2)
693
if (bmat .eq. 'G') then
694
nbx = nbx + 1
695
call ccopy (n, resid, 1, workd(irj), 1)
696
ipntr(1) = irj
697
ipntr(2) = ipj
698
ido = 2
699
c
700
c %-----------------------------------%
701
c | Exit in order to compute B*r_{j}. |
702
c | r_{j} is the corrected residual. |
703
c %-----------------------------------%
704
c
705
go to 9000
706
else if (bmat .eq. 'I') then
707
call ccopy (n, resid, 1, workd(ipj), 1)
708
end if
709
90 continue
710
c
711
c %---------------------------------------------------%
712
c | Back from reverse communication if ORTH2 = .true. |
713
c %---------------------------------------------------%
714
c
715
if (bmat .eq. 'G') then
716
call second (t3)
717
tmvbx = tmvbx + (t3 - t2)
718
end if
719
c
720
c %-----------------------------------------------------%
721
c | Compute the B-norm of the corrected residual r_{j}. |
722
c %-----------------------------------------------------%
723
c
724
if (bmat .eq. 'G') then
725
cnorm = cdotc (n, resid, 1, workd(ipj), 1)
726
rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
727
else if (bmat .eq. 'I') then
728
rnorm1 = scnrm2(n, resid, 1)
729
end if
730
c
731
if (msglvl .gt. 0 .and. iter .gt. 0 ) then
732
call ivout (logfil, 1, [j], ndigit,
733
& '_naitr: Iterative refinement for Arnoldi residual')
734
if (msglvl .gt. 2) then
735
rtemp(1) = rnorm
736
rtemp(2) = rnorm1
737
call svout (logfil, 2, rtemp, ndigit,
738
& '_naitr: iterative refinement ; rnorm and rnorm1 are')
739
end if
740
end if
741
c
742
c %-----------------------------------------%
743
c | Determine if we need to perform another |
744
c | step of re-orthogonalization. |
745
c %-----------------------------------------%
746
c
747
if ( rnorm1 .gt. 0.717*rnorm ) then
748
c
749
c %---------------------------------------%
750
c | No need for further refinement. |
751
c | The cosine of the angle between the |
752
c | corrected residual vector and the old |
753
c | residual vector is greater than 0.717 |
754
c | In other words the corrected residual |
755
c | and the old residual vector share an |
756
c | angle of less than arcCOS(0.717) |
757
c %---------------------------------------%
758
c
759
rnorm = rnorm1
760
c
761
else
762
c
763
c %-------------------------------------------%
764
c | Another step of iterative refinement step |
765
c | is required. NITREF is used by stat.h |
766
c %-------------------------------------------%
767
c
768
nitref = nitref + 1
769
rnorm = rnorm1
770
iter = iter + 1
771
if (iter .le. 1) go to 80
772
c
773
c %-------------------------------------------------%
774
c | Otherwise RESID is numerically in the span of V |
775
c %-------------------------------------------------%
776
c
777
do 95 jj = 1, n
778
resid(jj) = zero
779
95 continue
780
rnorm = rzero
781
end if
782
c
783
c %----------------------------------------------%
784
c | Branch here directly if iterative refinement |
785
c | wasn't necessary or after at most NITER_REF |
786
c | steps of iterative refinement. |
787
c %----------------------------------------------%
788
c
789
100 continue
790
c
791
rstart = .false.
792
orth2 = .false.
793
c
794
call second (t5)
795
titref = titref + (t5 - t4)
796
c
797
c %------------------------------------%
798
c | STEP 6: Update j = j+1; Continue |
799
c %------------------------------------%
800
c
801
j = j + 1
802
if (j .gt. k+np) then
803
call second (t1)
804
tcaitr = tcaitr + (t1 - t0)
805
ido = 99
806
do 110 i = max(1,k), k+np-1
807
c
808
c %--------------------------------------------%
809
c | Check for splitting and deflation. |
810
c | Use a standard test as in the QR algorithm |
811
c | REFERENCE: LAPACK subroutine clahqr |
812
c %--------------------------------------------%
813
c
814
tst1 = slapy2(real(h(i,i)),aimag(h(i,i)))
815
& + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1)))
816
if( tst1.eq.real(zero) )
817
& tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) )
818
if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le.
819
& max( ulp*tst1, smlnum ) )
820
& h(i+1,i) = zero
821
110 continue
822
c
823
if (msglvl .gt. 2) then
824
call cmout (logfil, k+np, k+np, h, ldh, ndigit,
825
& '_naitr: Final upper Hessenberg matrix H of order K+NP')
826
end if
827
c
828
go to 9000
829
end if
830
c
831
c %--------------------------------------------------------%
832
c | Loop back to extend the factorization by another step. |
833
c %--------------------------------------------------------%
834
c
835
go to 1000
836
c
837
c %---------------------------------------------------------------%
838
c | |
839
c | E N D O F M A I N I T E R A T I O N L O O P |
840
c | |
841
c %---------------------------------------------------------------%
842
c
843
9000 continue
844
return
845
c
846
c %---------------%
847
c | End of cnaitr |
848
c %---------------%
849
c
850
end
851
852