Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/dnaitr.f
5224 views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: dnaitr
5
c
6
c\Description:
7
c Reverse communication interface for applying NP additional steps to
8
c a K step nonsymmetric Arnoldi factorization.
9
c
10
c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T
11
c
12
c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
13
c
14
c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T
15
c
16
c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
17
c
18
c where OP and B are as in dnaupd. The B-norm of r_{k+p} is also
19
c computed and returned.
20
c
21
c\Usage:
22
c call dnaitr
23
c ( IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
24
c IPNTR, WORKD, INFO )
25
c
26
c\Arguments
27
c IDO Integer. (INPUT/OUTPUT)
28
c Reverse communication flag.
29
c -------------------------------------------------------------
30
c IDO = 0: first call to the reverse communication interface
31
c IDO = -1: compute Y = OP * X where
32
c IPNTR(1) is the pointer into WORK for X,
33
c IPNTR(2) is the pointer into WORK for Y.
34
c This is for the restart phase to force the new
35
c starting vector into the range of OP.
36
c IDO = 1: compute Y = OP * X where
37
c IPNTR(1) is the pointer into WORK for X,
38
c IPNTR(2) is the pointer into WORK for Y,
39
c IPNTR(3) is the pointer into WORK for B * X.
40
c IDO = 2: compute Y = B * X where
41
c IPNTR(1) is the pointer into WORK for X,
42
c IPNTR(2) is the pointer into WORK for Y.
43
c IDO = 99: done
44
c -------------------------------------------------------------
45
c When the routine is used in the "shift-and-invert" mode, the
46
c vector B * Q is already available and do not need to be
47
c recompute in forming OP * Q.
48
c
49
c BMAT Character*1. (INPUT)
50
c BMAT specifies the type of the matrix B that defines the
51
c semi-inner product for the operator OP. See dnaupd.
52
c B = 'I' -> standard eigenvalue problem A*x = lambda*x
53
c B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
54
c
55
c N Integer. (INPUT)
56
c Dimension of the eigenproblem.
57
c
58
c K Integer. (INPUT)
59
c Current size of V and H.
60
c
61
c NP Integer. (INPUT)
62
c Number of additional Arnoldi steps to take.
63
c
64
c NB Integer. (INPUT)
65
c Blocksize to be used in the recurrence.
66
c Only work for NB = 1 right now. The goal is to have a
67
c program that implement both the block and non-block method.
68
c
69
c RESID Double precision array of length N. (INPUT/OUTPUT)
70
c On INPUT: RESID contains the residual vector r_{k}.
71
c On OUTPUT: RESID contains the residual vector r_{k+p}.
72
c
73
c RNORM Double precision scalar. (INPUT/OUTPUT)
74
c B-norm of the starting residual on input.
75
c B-norm of the updated residual r_{k+p} on output.
76
c
77
c V Double precision N by K+NP array. (INPUT/OUTPUT)
78
c On INPUT: V contains the Arnoldi vectors in the first K
79
c columns.
80
c On OUTPUT: V contains the new NP Arnoldi vectors in the next
81
c NP columns. The first K columns are unchanged.
82
c
83
c LDV Integer. (INPUT)
84
c Leading dimension of V exactly as declared in the calling
85
c program.
86
c
87
c H Double precision (K+NP) by (K+NP) array. (INPUT/OUTPUT)
88
c H is used to store the generated upper Hessenberg matrix.
89
c
90
c LDH Integer. (INPUT)
91
c Leading dimension of H exactly as declared in the calling
92
c program.
93
c
94
c IPNTR Integer array of length 3. (OUTPUT)
95
c Pointer to mark the starting locations in the WORK for
96
c vectors used by the Arnoldi iteration.
97
c -------------------------------------------------------------
98
c IPNTR(1): pointer to the current operand vector X.
99
c IPNTR(2): pointer to the current result vector Y.
100
c IPNTR(3): pointer to the vector B * X when used in the
101
c shift-and-invert mode. X is the current operand.
102
c -------------------------------------------------------------
103
c
104
c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION)
105
c Distributed array to be used in the basic Arnoldi iteration
106
c for reverse communication. The calling program should not
107
c use WORKD as temporary workspace during the iteration !!!!!!
108
c On input, WORKD(1:N) = B*RESID and is used to save some
109
c computation at the first step.
110
c
111
c INFO Integer. (OUTPUT)
112
c = 0: Normal exit.
113
c > 0: Size of the spanning invariant subspace of OP found.
114
c
115
c\EndDoc
116
c
117
c-----------------------------------------------------------------------
118
c
119
c\BeginLib
120
c
121
c\Local variables:
122
c xxxxxx real
123
c
124
c\References:
125
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
126
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
127
c pp 357-385.
128
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
129
c Restarted Arnoldi Iteration", Rice University Technical Report
130
c TR95-13, Department of Computational and Applied Mathematics.
131
c
132
c\Routines called:
133
c dgetv0 ARPACK routine to generate the initial vector.
134
c ivout ARPACK utility routine that prints integers.
135
c second ARPACK utility routine for timing.
136
c dmout ARPACK utility routine that prints matrices
137
c dvout ARPACK utility routine that prints vectors.
138
c dlabad LAPACK routine that computes machine constants.
139
c dlamch LAPACK routine that determines machine constants.
140
c dlascl LAPACK routine for careful scaling of a matrix.
141
c dlanhs LAPACK routine that computes various norms of a matrix.
142
c dgemv Level 2 BLAS routine for matrix vector multiplication.
143
c daxpy Level 1 BLAS that computes a vector triad.
144
c dscal Level 1 BLAS that scales a vector.
145
c dcopy Level 1 BLAS that copies one vector to another .
146
c ddot Level 1 BLAS that computes the scalar product of two vectors.
147
c dnrm2 Level 1 BLAS that computes the norm of a vector.
148
c
149
c\Author
150
c Danny Sorensen Phuong Vu
151
c Richard Lehoucq CRPC / Rice University
152
c Dept. of Computational & Houston, Texas
153
c Applied Mathematics
154
c Rice University
155
c Houston, Texas
156
c
157
c\Revision history:
158
c xx/xx/92: Version ' 2.4'
159
c
160
c\SCCS Information: @(#)
161
c FILE: naitr.F SID: 2.4 DATE OF SID: 8/27/96 RELEASE: 2
162
c
163
c\Remarks
164
c The algorithm implemented is:
165
c
166
c restart = .false.
167
c Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
168
c r_{k} contains the initial residual vector even for k = 0;
169
c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
170
c computed by the calling program.
171
c
172
c betaj = rnorm ; p_{k+1} = B*r_{k} ;
173
c For j = k+1, ..., k+np Do
174
c 1) if ( betaj < tol ) stop or restart depending on j.
175
c ( At present tol is zero )
176
c if ( restart ) generate a new starting vector.
177
c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}];
178
c p_{j} = p_{j}/betaj
179
c 3) r_{j} = OP*v_{j} where OP is defined as in dnaupd
180
c For shift-invert mode p_{j} = B*v_{j} is already available.
181
c wnorm = || OP*v_{j} ||
182
c 4) Compute the j-th step residual vector.
183
c w_{j} = V_{j}^T * B * OP * v_{j}
184
c r_{j} = OP*v_{j} - V_{j} * w_{j}
185
c H(:,j) = w_{j};
186
c H(j,j-1) = rnorm
187
c rnorm = || r_(j) ||
188
c If (rnorm > 0.717*wnorm) accept step and go back to 1)
189
c 5) Re-orthogonalization step:
190
c s = V_{j}'*B*r_{j}
191
c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} ||
192
c alphaj = alphaj + s_{j};
193
c 6) Iterative refinement step:
194
c If (rnorm1 > 0.717*rnorm) then
195
c rnorm = rnorm1
196
c accept step and go back to 1)
197
c Else
198
c rnorm = rnorm1
199
c If this is the first time in step 6), go to 5)
200
c Else r_{j} lies in the span of V_{j} numerically.
201
c Set r_{j} = 0 and rnorm = 0; go to 1)
202
c EndIf
203
c End Do
204
c
205
c\EndLib
206
c
207
c-----------------------------------------------------------------------
208
c
209
subroutine dnaitr
210
& (ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
211
& ipntr, workd, info)
212
c
213
c %----------------------------------------------------%
214
c | Include files for debugging and timing information |
215
c %----------------------------------------------------%
216
c
217
include 'debug.h'
218
include 'stat.h'
219
c
220
c %------------------%
221
c | Scalar Arguments |
222
c %------------------%
223
c
224
character bmat*1
225
integer ido, info, k, ldh, ldv, n, nb, np
226
Double precision
227
& rnorm
228
c
229
c %-----------------%
230
c | Array Arguments |
231
c %-----------------%
232
c
233
integer ipntr(3)
234
Double precision
235
& h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n)
236
c
237
c %------------%
238
c | Parameters |
239
c %------------%
240
c
241
Double precision
242
& one, zero
243
parameter (one = 1.0D+0, zero = 0.0D+0)
244
c
245
c %---------------%
246
c | Local Scalars |
247
c %---------------%
248
c
249
logical first, orth1, orth2, rstart, step3, step4
250
integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
251
& jj
252
Double precision
253
& betaj, ovfl, temp1, rnorm1, smlnum, tst1, ulp, unfl,
254
& wnorm
255
save first, orth1, orth2, rstart, step3, step4,
256
& ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
257
& betaj, rnorm1, smlnum, ulp, unfl, wnorm
258
c
259
c %-----------------------%
260
c | Local Array Arguments |
261
c %-----------------------%
262
c
263
Double precision
264
& xtemp(2)
265
c
266
c %----------------------%
267
c | External Subroutines |
268
c %----------------------%
269
c
270
external daxpy, dcopy, dscal, dgemv, dgetv0, dlabad,
271
& dvout, dmout, ivout, second
272
c
273
c %--------------------%
274
c | External Functions |
275
c %--------------------%
276
c
277
Double precision
278
& ddot, dnrm2, dlanhs, dlamch
279
external ddot, dnrm2, dlanhs, dlamch
280
c
281
c %---------------------%
282
c | Intrinsic Functions |
283
c %---------------------%
284
c
285
intrinsic abs, sqrt
286
c
287
c %-----------------%
288
c | Data statements |
289
c %-----------------%
290
c
291
data first / .true. /
292
c
293
c %-----------------------%
294
c | Executable Statements |
295
c %-----------------------%
296
c
297
if (first) then
298
c
299
c %-----------------------------------------%
300
c | Set machine-dependent constants for the |
301
c | the splitting and deflation criterion. |
302
c | If norm(H) <= sqrt(OVFL), |
303
c | overflow should not occur. |
304
c | REFERENCE: LAPACK subroutine dlahqr |
305
c %-----------------------------------------%
306
c
307
unfl = dlamch( 'safe minimum' )
308
ovfl = one / unfl
309
call dlabad( unfl, ovfl )
310
ulp = dlamch( 'precision' )
311
smlnum = unfl*( n / ulp )
312
first = .false.
313
end if
314
c
315
if (ido .eq. 0) then
316
c
317
c %-------------------------------%
318
c | Initialize timing statistics |
319
c | & message level for debugging |
320
c %-------------------------------%
321
c
322
call second (t0)
323
msglvl = mnaitr
324
c
325
c %------------------------------%
326
c | Initial call to this routine |
327
c %------------------------------%
328
c
329
info = 0
330
step3 = .false.
331
step4 = .false.
332
rstart = .false.
333
orth1 = .false.
334
orth2 = .false.
335
j = k + 1
336
ipj = 1
337
irj = ipj + n
338
ivj = irj + n
339
end if
340
c
341
c %-------------------------------------------------%
342
c | When in reverse communication mode one of: |
343
c | STEP3, STEP4, ORTH1, ORTH2, RSTART |
344
c | will be .true. when .... |
345
c | STEP3: return from computing OP*v_{j}. |
346
c | STEP4: return from computing B-norm of OP*v_{j} |
347
c | ORTH1: return from computing B-norm of r_{j+1} |
348
c | ORTH2: return from computing B-norm of |
349
c | correction to the residual vector. |
350
c | RSTART: return from OP computations needed by |
351
c | dgetv0. |
352
c %-------------------------------------------------%
353
c
354
if (step3) go to 50
355
if (step4) go to 60
356
if (orth1) go to 70
357
if (orth2) go to 90
358
if (rstart) go to 30
359
c
360
c %-----------------------------%
361
c | Else this is the first step |
362
c %-----------------------------%
363
c
364
c %--------------------------------------------------------------%
365
c | |
366
c | A R N O L D I I T E R A T I O N L O O P |
367
c | |
368
c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
369
c %--------------------------------------------------------------%
370
371
1000 continue
372
c
373
if (msglvl .gt. 1) then
374
call ivout (logfil, 1, [j], ndigit,
375
& '_naitr: generating Arnoldi vector number')
376
call dvout (logfil, 1, [rnorm], ndigit,
377
& '_naitr: B-norm of the current residual is')
378
end if
379
c
380
c %---------------------------------------------------%
381
c | STEP 1: Check if the B norm of j-th residual |
382
c | vector is zero. Equivalent to determing whether |
383
c | an exact j-step Arnoldi factorization is present. |
384
c %---------------------------------------------------%
385
c
386
betaj = rnorm
387
if (rnorm .gt. zero) go to 40
388
c
389
c %---------------------------------------------------%
390
c | Invariant subspace found, generate a new starting |
391
c | vector which is orthogonal to the current Arnoldi |
392
c | basis and continue the iteration. |
393
c %---------------------------------------------------%
394
c
395
if (msglvl .gt. 0) then
396
call ivout (logfil, 1, [j], ndigit,
397
& '_naitr: ****** RESTART AT STEP ******')
398
end if
399
c
400
c %---------------------------------------------%
401
c | ITRY is the loop variable that controls the |
402
c | maximum amount of times that a restart is |
403
c | attempted. NRSTRT is used by stat.h |
404
c %---------------------------------------------%
405
c
406
betaj = zero
407
nrstrt = nrstrt + 1
408
itry = 1
409
20 continue
410
rstart = .true.
411
ido = 0
412
30 continue
413
c
414
c %--------------------------------------%
415
c | If in reverse communication mode and |
416
c | RSTART = .true. flow returns here. |
417
c %--------------------------------------%
418
c
419
call dgetv0 (ido, bmat, itry, .false., n, j, v, ldv,
420
& resid, rnorm, ipntr, workd, ierr)
421
if (ido .ne. 99) go to 9000
422
if (ierr .lt. 0) then
423
itry = itry + 1
424
if (itry .le. 3) go to 20
425
c
426
c %------------------------------------------------%
427
c | Give up after several restart attempts. |
428
c | Set INFO to the size of the invariant subspace |
429
c | which spans OP and exit. |
430
c %------------------------------------------------%
431
c
432
info = j - 1
433
call second (t1)
434
tnaitr = tnaitr + (t1 - t0)
435
ido = 99
436
go to 9000
437
end if
438
c
439
40 continue
440
c
441
c %---------------------------------------------------------%
442
c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm |
443
c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
444
c | when reciprocating a small RNORM, test against lower |
445
c | machine bound. |
446
c %---------------------------------------------------------%
447
c
448
call dcopy (n, resid, 1, v(1,j), 1)
449
if (rnorm .ge. unfl) then
450
temp1 = one / rnorm
451
call dscal (n, temp1, v(1,j), 1)
452
call dscal (n, temp1, workd(ipj), 1)
453
else
454
c
455
c %-----------------------------------------%
456
c | To scale both v_{j} and p_{j} carefully |
457
c | use LAPACK routine SLASCL |
458
c %-----------------------------------------%
459
c
460
call dlascl ('General', i, i, rnorm, one, n, 1,
461
& v(1,j), n, infol)
462
call dlascl ('General', i, i, rnorm, one, n, 1,
463
& workd(ipj), n, infol)
464
end if
465
c
466
c %------------------------------------------------------%
467
c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
468
c | Note that this is not quite yet r_{j}. See STEP 4 |
469
c %------------------------------------------------------%
470
c
471
step3 = .true.
472
nopx = nopx + 1
473
call second (t2)
474
call dcopy (n, v(1,j), 1, workd(ivj), 1)
475
ipntr(1) = ivj
476
ipntr(2) = irj
477
ipntr(3) = ipj
478
ido = 1
479
c
480
c %-----------------------------------%
481
c | Exit in order to compute OP*v_{j} |
482
c %-----------------------------------%
483
c
484
go to 9000
485
50 continue
486
c
487
c %----------------------------------%
488
c | Back from reverse communication; |
489
c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} |
490
c | if step3 = .true. |
491
c %----------------------------------%
492
c
493
call second (t3)
494
tmvopx = tmvopx + (t3 - t2)
495
496
step3 = .false.
497
c
498
c %------------------------------------------%
499
c | Put another copy of OP*v_{j} into RESID. |
500
c %------------------------------------------%
501
c
502
call dcopy (n, workd(irj), 1, resid, 1)
503
c
504
c %---------------------------------------%
505
c | STEP 4: Finish extending the Arnoldi |
506
c | factorization to length j. |
507
c %---------------------------------------%
508
c
509
call second (t2)
510
if (bmat .eq. 'G') then
511
nbx = nbx + 1
512
step4 = .true.
513
ipntr(1) = irj
514
ipntr(2) = ipj
515
ido = 2
516
c
517
c %-------------------------------------%
518
c | Exit in order to compute B*OP*v_{j} |
519
c %-------------------------------------%
520
c
521
go to 9000
522
else if (bmat .eq. 'I') then
523
call dcopy (n, resid, 1, workd(ipj), 1)
524
end if
525
60 continue
526
c
527
c %----------------------------------%
528
c | Back from reverse communication; |
529
c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
530
c | if step4 = .true. |
531
c %----------------------------------%
532
c
533
if (bmat .eq. 'G') then
534
call second (t3)
535
tmvbx = tmvbx + (t3 - t2)
536
end if
537
c
538
step4 = .false.
539
c
540
c %-------------------------------------%
541
c | The following is needed for STEP 5. |
542
c | Compute the B-norm of OP*v_{j}. |
543
c %-------------------------------------%
544
c
545
if (bmat .eq. 'G') then
546
wnorm = ddot (n, resid, 1, workd(ipj), 1)
547
wnorm = sqrt(abs(wnorm))
548
else if (bmat .eq. 'I') then
549
wnorm = dnrm2(n, resid, 1)
550
end if
551
c
552
c %-----------------------------------------%
553
c | Compute the j-th residual corresponding |
554
c | to the j step factorization. |
555
c | Use Classical Gram Schmidt and compute: |
556
c | w_{j} <- V_{j}^T * B * OP * v_{j} |
557
c | r_{j} <- OP*v_{j} - V_{j} * w_{j} |
558
c %-----------------------------------------%
559
c
560
c
561
c %------------------------------------------%
562
c | Compute the j Fourier coefficients w_{j} |
563
c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. |
564
c %------------------------------------------%
565
c
566
call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
567
& zero, h(1,j), 1)
568
c
569
c %--------------------------------------%
570
c | Orthogonalize r_{j} against V_{j}. |
571
c | RESID contains OP*v_{j}. See STEP 3. |
572
c %--------------------------------------%
573
c
574
call dgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
575
& one, resid, 1)
576
c
577
if (j .gt. 1) h(j,j-1) = betaj
578
c
579
call second (t4)
580
c
581
orth1 = .true.
582
c
583
call second (t2)
584
if (bmat .eq. 'G') then
585
nbx = nbx + 1
586
call dcopy (n, resid, 1, workd(irj), 1)
587
ipntr(1) = irj
588
ipntr(2) = ipj
589
ido = 2
590
c
591
c %----------------------------------%
592
c | Exit in order to compute B*r_{j} |
593
c %----------------------------------%
594
c
595
go to 9000
596
else if (bmat .eq. 'I') then
597
call dcopy (n, resid, 1, workd(ipj), 1)
598
end if
599
70 continue
600
c
601
c %---------------------------------------------------%
602
c | Back from reverse communication if ORTH1 = .true. |
603
c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. |
604
c %---------------------------------------------------%
605
c
606
if (bmat .eq. 'G') then
607
call second (t3)
608
tmvbx = tmvbx + (t3 - t2)
609
end if
610
c
611
orth1 = .false.
612
c
613
c %------------------------------%
614
c | Compute the B-norm of r_{j}. |
615
c %------------------------------%
616
c
617
if (bmat .eq. 'G') then
618
rnorm = ddot (n, resid, 1, workd(ipj), 1)
619
rnorm = sqrt(abs(rnorm))
620
else if (bmat .eq. 'I') then
621
rnorm = dnrm2(n, resid, 1)
622
end if
623
c
624
c %-----------------------------------------------------------%
625
c | STEP 5: Re-orthogonalization / Iterative refinement phase |
626
c | Maximum NITER_ITREF tries. |
627
c | |
628
c | s = V_{j}^T * B * r_{j} |
629
c | r_{j} = r_{j} - V_{j}*s |
630
c | alphaj = alphaj + s_{j} |
631
c | |
632
c | The stopping criteria used for iterative refinement is |
633
c | discussed in Parlett's book SEP, page 107 and in Gragg & |
634
c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. |
635
c | Determine if we need to correct the residual. The goal is |
636
c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || |
637
c | The following test determines whether the sine of the |
638
c | angle between OP*x and the computed residual is less |
639
c | than or equal to 0.717. |
640
c %-----------------------------------------------------------%
641
c
642
if (rnorm .gt. 0.717*wnorm) go to 100
643
iter = 0
644
nrorth = nrorth + 1
645
c
646
c %---------------------------------------------------%
647
c | Enter the Iterative refinement phase. If further |
648
c | refinement is necessary, loop back here. The loop |
649
c | variable is ITER. Perform a step of Classical |
650
c | Gram-Schmidt using all the Arnoldi vectors V_{j} |
651
c %---------------------------------------------------%
652
c
653
80 continue
654
c
655
if (msglvl .gt. 2) then
656
xtemp(1) = wnorm
657
xtemp(2) = rnorm
658
call dvout (logfil, 2, xtemp, ndigit,
659
& '_naitr: re-orthonalization; wnorm and rnorm are')
660
call dvout (logfil, j, h(1,j), ndigit,
661
& '_naitr: j-th column of H')
662
end if
663
c
664
c %----------------------------------------------------%
665
c | Compute V_{j}^T * B * r_{j}. |
666
c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
667
c %----------------------------------------------------%
668
c
669
call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
670
& zero, workd(irj), 1)
671
c
672
c %---------------------------------------------%
673
c | Compute the correction to the residual: |
674
c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
675
c | The correction to H is v(:,1:J)*H(1:J,1:J) |
676
c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. |
677
c %---------------------------------------------%
678
c
679
call dgemv ('N', n, j, -one, v, ldv, workd(irj), 1,
680
& one, resid, 1)
681
call daxpy (j, one, workd(irj), 1, h(1,j), 1)
682
c
683
orth2 = .true.
684
call second (t2)
685
if (bmat .eq. 'G') then
686
nbx = nbx + 1
687
call dcopy (n, resid, 1, workd(irj), 1)
688
ipntr(1) = irj
689
ipntr(2) = ipj
690
ido = 2
691
c
692
c %-----------------------------------%
693
c | Exit in order to compute B*r_{j}. |
694
c | r_{j} is the corrected residual. |
695
c %-----------------------------------%
696
c
697
go to 9000
698
else if (bmat .eq. 'I') then
699
call dcopy (n, resid, 1, workd(ipj), 1)
700
end if
701
90 continue
702
c
703
c %---------------------------------------------------%
704
c | Back from reverse communication if ORTH2 = .true. |
705
c %---------------------------------------------------%
706
c
707
if (bmat .eq. 'G') then
708
call second (t3)
709
tmvbx = tmvbx + (t3 - t2)
710
end if
711
c
712
c %-----------------------------------------------------%
713
c | Compute the B-norm of the corrected residual r_{j}. |
714
c %-----------------------------------------------------%
715
c
716
if (bmat .eq. 'G') then
717
rnorm1 = ddot (n, resid, 1, workd(ipj), 1)
718
rnorm1 = sqrt(abs(rnorm1))
719
else if (bmat .eq. 'I') then
720
rnorm1 = dnrm2(n, resid, 1)
721
end if
722
c
723
if (msglvl .gt. 0 .and. iter .gt. 0) then
724
call ivout (logfil, 1, [j], ndigit,
725
& '_naitr: Iterative refinement for Arnoldi residual')
726
if (msglvl .gt. 2) then
727
xtemp(1) = rnorm
728
xtemp(2) = rnorm1
729
call dvout (logfil, 2, xtemp, ndigit,
730
& '_naitr: iterative refinement ; rnorm and rnorm1 are')
731
end if
732
end if
733
c
734
c %-----------------------------------------%
735
c | Determine if we need to perform another |
736
c | step of re-orthogonalization. |
737
c %-----------------------------------------%
738
c
739
if (rnorm1 .gt. 0.717*rnorm) then
740
c
741
c %---------------------------------------%
742
c | No need for further refinement. |
743
c | The cosine of the angle between the |
744
c | corrected residual vector and the old |
745
c | residual vector is greater than 0.717 |
746
c | In other words the corrected residual |
747
c | and the old residual vector share an |
748
c | angle of less than arcCOS(0.717) |
749
c %---------------------------------------%
750
c
751
rnorm = rnorm1
752
c
753
else
754
c
755
c %-------------------------------------------%
756
c | Another step of iterative refinement step |
757
c | is required. NITREF is used by stat.h |
758
c %-------------------------------------------%
759
c
760
nitref = nitref + 1
761
rnorm = rnorm1
762
iter = iter + 1
763
if (iter .le. 1) go to 80
764
c
765
c %-------------------------------------------------%
766
c | Otherwise RESID is numerically in the span of V |
767
c %-------------------------------------------------%
768
c
769
do 95 jj = 1, n
770
resid(jj) = zero
771
95 continue
772
rnorm = zero
773
end if
774
c
775
c %----------------------------------------------%
776
c | Branch here directly if iterative refinement |
777
c | wasn't necessary or after at most NITER_REF |
778
c | steps of iterative refinement. |
779
c %----------------------------------------------%
780
c
781
100 continue
782
c
783
rstart = .false.
784
orth2 = .false.
785
c
786
call second (t5)
787
titref = titref + (t5 - t4)
788
c
789
c %------------------------------------%
790
c | STEP 6: Update j = j+1; Continue |
791
c %------------------------------------%
792
c
793
j = j + 1
794
if (j .gt. k+np) then
795
call second (t1)
796
tnaitr = tnaitr + (t1 - t0)
797
ido = 99
798
do 110 i = max(1,k), k+np-1
799
c
800
c %--------------------------------------------%
801
c | Check for splitting and deflation. |
802
c | Use a standard test as in the QR algorithm |
803
c | REFERENCE: LAPACK subroutine dlahqr |
804
c %--------------------------------------------%
805
c
806
tst1 = abs( h( i, i ) ) + abs( h( i+1, i+1 ) )
807
if( tst1.eq.zero )
808
& tst1 = dlanhs( '1', k+np, h, ldh, workd(n+1) )
809
if( abs( h( i+1,i ) ).le.max( ulp*tst1, smlnum ) )
810
& h(i+1,i) = zero
811
110 continue
812
c
813
if (msglvl .gt. 2) then
814
call dmout (logfil, k+np, k+np, h, ldh, ndigit,
815
& '_naitr: Final upper Hessenberg matrix H of order K+NP')
816
end if
817
c
818
go to 9000
819
end if
820
c
821
c %--------------------------------------------------------%
822
c | Loop back to extend the factorization by another step. |
823
c %--------------------------------------------------------%
824
c
825
go to 1000
826
c
827
c %---------------------------------------------------------------%
828
c | |
829
c | E N D O F M A I N I T E R A T I O N L O O P |
830
c | |
831
c %---------------------------------------------------------------%
832
c
833
9000 continue
834
return
835
c
836
c %---------------%
837
c | End of dnaitr |
838
c %---------------%
839
c
840
end
841
842