Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pcnaitr.f
5213 views
1
c\BeginDoc
2
c
3
c\Name: pcnaitr
4
c
5
c Message Passing Layer: MPI
6
c
7
c\Description:
8
c Reverse communication interface for applying NP additional steps to
9
c a K step nonsymmetric Arnoldi factorization.
10
c
11
c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T
12
c
13
c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
14
c
15
c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T
16
c
17
c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
18
c
19
c where OP and B are as in pcnaupd. The B-norm of r_{k+p} is also
20
c computed and returned.
21
c
22
c\Usage:
23
c call pcnaitr
24
c ( COMM, IDO, BMAT, N, K, NP, NB, RESID, RNORM, V, LDV, H, LDH,
25
c IPNTR, WORKD, WORKL, INFO )
26
c
27
c\Arguments
28
c COMM MPI Communicator for the processor grid. (INPUT)
29
c
30
c IDO Integer. (INPUT/OUTPUT)
31
c Reverse communication flag.
32
c -------------------------------------------------------------
33
c IDO = 0: first call to the reverse communication interface
34
c IDO = -1: compute Y = OP * X where
35
c IPNTR(1) is the pointer into WORK for X,
36
c IPNTR(2) is the pointer into WORK for Y.
37
c This is for the restart phase to force the new
38
c starting vector into the range of OP.
39
c IDO = 1: compute Y = OP * X where
40
c IPNTR(1) is the pointer into WORK for X,
41
c IPNTR(2) is the pointer into WORK for Y,
42
c IPNTR(3) is the pointer into WORK for B * X.
43
c IDO = 2: compute Y = B * X where
44
c IPNTR(1) is the pointer into WORK for X,
45
c IPNTR(2) is the pointer into WORK for Y.
46
c IDO = 99: done
47
c -------------------------------------------------------------
48
c When the routine is used in the "shift-and-invert" mode, the
49
c vector B * Q is already available and do not need to be
50
c recomputed in forming OP * Q.
51
c
52
c BMAT Character*1. (INPUT)
53
c BMAT specifies the type of the matrix B that defines the
54
c semi-inner product for the operator OP. See pcnaupd.
55
c B = 'I' -> standard eigenvalue problem A*x = lambda*x
56
c B = 'G' -> generalized eigenvalue problem A*x = lambda*M**x
57
c
58
c N Integer. (INPUT)
59
c Dimension of the eigenproblem.
60
c
61
c K Integer. (INPUT)
62
c Current size of V and H.
63
c
64
c NP Integer. (INPUT)
65
c Number of additional Arnoldi steps to take.
66
c
67
c NB Integer. (INPUT)
68
c Blocksize to be used in the recurrence.
69
c Only work for NB = 1 right now. The goal is to have a
70
c program that implement both the block and non-block method.
71
c
72
c RESID Complex array of length N. (INPUT/OUTPUT)
73
c On INPUT: RESID contains the residual vector r_{k}.
74
c On OUTPUT: RESID contains the residual vector r_{k+p}.
75
c
76
c RNORM Real scalar. (INPUT/OUTPUT)
77
c B-norm of the starting residual on input.
78
c B-norm of the updated residual r_{k+p} on output.
79
c
80
c V Complex N by K+NP array. (INPUT/OUTPUT)
81
c On INPUT: V contains the Arnoldi vectors in the first K
82
c columns.
83
c On OUTPUT: V contains the new NP Arnoldi vectors in the next
84
c NP columns. The first K columns are unchanged.
85
c
86
c LDV Integer. (INPUT)
87
c Leading dimension of V exactly as declared in the calling
88
c program.
89
c
90
c H Complex (K+NP) by (K+NP) array. (INPUT/OUTPUT)
91
c H is used to store the generated upper Hessenberg matrix.
92
c
93
c LDH Integer. (INPUT)
94
c Leading dimension of H exactly as declared in the calling
95
c program.
96
c
97
c IPNTR Integer array of length 3. (OUTPUT)
98
c Pointer to mark the starting locations in the WORK for
99
c vectors used by the Arnoldi iteration.
100
c -------------------------------------------------------------
101
c IPNTR(1): pointer to the current operand vector X.
102
c IPNTR(2): pointer to the current result vector Y.
103
c IPNTR(3): pointer to the vector B * X when used in the
104
c shift-and-invert mode. X is the current operand.
105
c -------------------------------------------------------------
106
c
107
c WORKD Complex work array of length 3*N. (REVERSE COMMUNICATION)
108
c Distributed array to be used in the basic Arnoldi iteration
109
c for reverse communication. The calling program should not
110
c use WORKD as temporary workspace during the iteration !!!!!!
111
c On input, WORKD(1:N) = B*RESID and is used to save some
112
c computation at the first step.
113
c
114
c WORKL Complex work space used for Gram Schmidt orthogonalization
115
c
116
c INFO Integer. (OUTPUT)
117
c = 0: Normal exit.
118
c > 0: Size of the spanning invariant subspace of OP found.
119
c
120
c\EndDoc
121
c
122
c-----------------------------------------------------------------------
123
c
124
c\BeginLib
125
c
126
c\Local variables:
127
c xxxxxx Complex
128
c
129
c\References:
130
c 1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
131
c a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
132
c pp 357-385.
133
c 2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
134
c Restarted Arnoldi Iteration", Rice University Technical Report
135
c TR95-13, Department of Computational and Applied Mathematics.
136
c
137
c\Routines called:
138
c pcgetv0 Parallel ARPACK routine to generate the initial vector.
139
c pivout Parallel ARPACK utility routine that prints integers.
140
c second ARPACK utility routine for timing.
141
c pcmout Parallel ARPACK utility routine that prints matrices
142
c pcvout Parallel ARPACK utility routine that prints vectors.
143
c clanhs LAPACK routine that computes various norms of a matrix.
144
c clascl LAPACK routine for careful scaling of a matrix.
145
c slabad LAPACK routine for defining the underflow and overflow
146
c limits
147
c pslamch ScaLAPACK routine that determines machine constants.
148
c slapy2 LAPACK routine to compute sqrt(x**2+y**2) carefully.
149
c cgemv Level 2 BLAS routine for matrix vector multiplication.
150
c caxpy Level 1 BLAS that computes a vector triad.
151
c ccopy Level 1 BLAS that copies one vector to another .
152
c cdotc Level 1 BLAS that computes the scalar product of
153
c two vectors.
154
c cscal Level 1 BLAS that scales a vector.
155
c csscal Level 1 BLAS that scales a complex vector by a real number.
156
c pscnorm2 Parallel version of Level 1 BLAS that computes the
157
c norm of a vector.
158
c
159
c\Author
160
c Danny Sorensen Phuong Vu
161
c Richard Lehoucq CRPC / Rice University
162
c Dept. of Computational & Houston, Texas
163
c Applied Mathematics
164
c Rice University
165
c Houston, Texas
166
c
167
c\Parallel Modifications
168
c Kristi Maschhoff
169
c
170
c\Revision history:
171
c Starting Point: Complex Code FILE: naitr.F SID: 2.1
172
c
173
c\SCCS Information:
174
c FILE: naitr.F SID: 1.3 DATE OF SID: 3/19/97
175
c
176
c\Remarks
177
c The algorithm implemented is:
178
c
179
c restart = .false.
180
c Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
181
c r_{k} contains the initial residual vector even for k = 0;
182
c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
183
c computed by the calling program.
184
c
185
c betaj = rnorm ; p_{k+1} = B*r_{k} ;
186
c For j = k+1, ..., k+np Do
187
c 1) if ( betaj < tol ) stop or restart depending on j.
188
c ( At present tol is zero )
189
c if ( restart ) generate a new starting vector.
190
c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}];
191
c p_{j} = p_{j}/betaj
192
c 3) r_{j} = OP*v_{j} where OP is defined as in pcnaupd
193
c For shift-invert mode p_{j} = B*v_{j} is already available.
194
c wnorm = || OP*v_{j} ||
195
c 4) Compute the j-th step residual vector.
196
c w_{j} = V_{j}^T * B * OP * v_{j}
197
c r_{j} = OP*v_{j} - V_{j} * w_{j}
198
c H(:,j) = w_{j};
199
c H(j,j-1) = rnorm
200
c rnorm = || r_(j) ||
201
c If (rnorm > 0.717*wnorm) accept step and go back to 1)
202
c 5) Re-orthogonalization step:
203
c s = V_{j}'*B*r_{j}
204
c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} ||
205
c alphaj = alphaj + s_{j};
206
c 6) Iterative refinement step:
207
c If (rnorm1 > 0.717*rnorm) then
208
c rnorm = rnorm1
209
c accept step and go back to 1)
210
c Else
211
c rnorm = rnorm1
212
c If this is the first time in step 6), go to 5)
213
c Else r_{j} lies in the span of V_{j} numerically.
214
c Set r_{j} = 0 and rnorm = 0; go to 1)
215
c EndIf
216
c End Do
217
c
218
c\EndLib
219
c
220
c-----------------------------------------------------------------------
221
c
222
subroutine pcnaitr
223
& (comm, ido, bmat, n, k, np, nb, resid, rnorm, v, ldv, h, ldh,
224
& ipntr, workd, workl, info)
225
c
226
include 'mpif.h'
227
c
228
c %---------------%
229
c | MPI Variables |
230
c %---------------%
231
c
232
integer comm
233
c
234
c %----------------------------------------------------%
235
c | Include files for debugging and timing information |
236
c %----------------------------------------------------%
237
c
238
include 'debug.h'
239
include 'stat.h'
240
c
241
c %------------------%
242
c | Scalar Arguments |
243
c %------------------%
244
c
245
character bmat*1
246
integer ido, info, k, ldh, ldv, n, nb, np
247
Real
248
& rnorm
249
c
250
c %-----------------%
251
c | Array Arguments |
252
c %-----------------%
253
c
254
integer ipntr(3)
255
Complex
256
& h(ldh,k+np), resid(n), v(ldv,k+np), workd(3*n),
257
& workl(2*ldh)
258
c
259
c %------------%
260
c | Parameters |
261
c %------------%
262
c
263
Complex
264
& one, zero
265
Real
266
& rone, rzero
267
parameter (one = (1.0, 0.0), zero = (0.0, 0.0),
268
& rone = 1.0, rzero = 0.0)
269
c
270
c %--------------%
271
c | Local Arrays |
272
c %--------------%
273
c
274
Real
275
& rtemp(2)
276
c
277
c %---------------%
278
c | Local Scalars |
279
c %---------------%
280
c
281
logical first, orth1, orth2, rstart, step3, step4
282
integer ierr, i, infol, ipj, irj, ivj, iter, itry, j, msglvl,
283
& jj
284
Real
285
& ovfl, smlnum, tst1, ulp, unfl, betaj,
286
& temp1, rnorm1, wnorm
287
Complex
288
& cnorm
289
c
290
save first, orth1, orth2, rstart, step3, step4,
291
& ierr, ipj, irj, ivj, iter, itry, j, msglvl, ovfl,
292
& betaj, rnorm1, smlnum, ulp, unfl, wnorm
293
c
294
Complex
295
& cnorm_buf
296
c
297
c %----------------------%
298
c | External Subroutines |
299
c %----------------------%
300
c
301
external caxpy, ccopy, cscal, cgemv, pcgetv0, slabad,
302
& csscal, pcvout, pcmout, pivout, second
303
c
304
c %--------------------%
305
c | External Functions |
306
c %--------------------%
307
c
308
Complex
309
& cdotc
310
Real
311
& pslamch10, pscnorm2, clanhs, slapy2
312
external cdotc, pscnorm2, clanhs, pslamch10, slapy2
313
c
314
c %---------------------%
315
c | Intrinsic Functions |
316
c %---------------------%
317
c
318
intrinsic aimag, real, max, sqrt
319
c
320
c %-----------------%
321
c | Data statements |
322
c %-----------------%
323
c
324
data first / .true. /
325
c
326
c %-----------------------%
327
c | Executable Statements |
328
c %-----------------------%
329
c
330
if (first) then
331
c
332
c %-----------------------------------------%
333
c | Set machine-dependent constants for the |
334
c | the splitting and deflation criterion. |
335
c | If norm(H) <= sqrt(OVFL), |
336
c | overflow should not occur. |
337
c | REFERENCE: LAPACK subroutine clahqr |
338
c %-----------------------------------------%
339
c
340
unfl = pslamch10(comm, 'safe minimum' )
341
ovfl = real(one / unfl)
342
call slabad( unfl, ovfl )
343
ulp = pslamch10( comm, 'precision' )
344
smlnum = unfl*( n / ulp )
345
first = .false.
346
end if
347
c
348
if (ido .eq. 0) then
349
c
350
c %-------------------------------%
351
c | Initialize timing statistics |
352
c | & message level for debugging |
353
c %-------------------------------%
354
c
355
call second (t0)
356
msglvl = mcaitr
357
c
358
c %------------------------------%
359
c | Initial call to this routine |
360
c %------------------------------%
361
c
362
info = 0
363
step3 = .false.
364
step4 = .false.
365
rstart = .false.
366
orth1 = .false.
367
orth2 = .false.
368
j = k + 1
369
ipj = 1
370
irj = ipj + n
371
ivj = irj + n
372
end if
373
c
374
c %-------------------------------------------------%
375
c | When in reverse communication mode one of: |
376
c | STEP3, STEP4, ORTH1, ORTH2, RSTART |
377
c | will be .true. when .... |
378
c | STEP3: return from computing OP*v_{j}. |
379
c | STEP4: return from computing B-norm of OP*v_{j} |
380
c | ORTH1: return from computing B-norm of r_{j+1} |
381
c | ORTH2: return from computing B-norm of |
382
c | correction to the residual vector. |
383
c | RSTART: return from OP computations needed by |
384
c | pcgetv0. |
385
c %-------------------------------------------------%
386
c
387
if (step3) go to 50
388
if (step4) go to 60
389
if (orth1) go to 70
390
if (orth2) go to 90
391
if (rstart) go to 30
392
c
393
c %-----------------------------%
394
c | Else this is the first step |
395
c %-----------------------------%
396
c
397
c %--------------------------------------------------------------%
398
c | |
399
c | A R N O L D I I T E R A T I O N L O O P |
400
c | |
401
c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
402
c %--------------------------------------------------------------%
403
404
1000 continue
405
c
406
if (msglvl .gt. 1) then
407
call pivout (comm, logfil, 1, [j], ndigit,
408
& '_naitr: generating Arnoldi vector number')
409
call pdvout (comm, logfil, 1, [rnorm], ndigit,
410
& '_naitr: B-norm of the current residual is')
411
end if
412
c
413
c %---------------------------------------------------%
414
c | STEP 1: Check if the B norm of j-th residual |
415
c | vector is zero. Equivalent to determine whether |
416
c | an exact j-step Arnoldi factorization is present. |
417
c %---------------------------------------------------%
418
c
419
betaj = rnorm
420
if (rnorm .gt. rzero) go to 40
421
c
422
c %---------------------------------------------------%
423
c | Invariant subspace found, generate a new starting |
424
c | vector which is orthogonal to the current Arnoldi |
425
c | basis and continue the iteration. |
426
c %---------------------------------------------------%
427
c
428
if (msglvl .gt. 0) then
429
call pivout (comm, logfil, 1, [j], ndigit,
430
& '_naitr: ****** RESTART AT STEP ******')
431
end if
432
c
433
c %---------------------------------------------%
434
c | ITRY is the loop variable that controls the |
435
c | maximum amount of times that a restart is |
436
c | attempted. NRSTRT is used by stat.h |
437
c %---------------------------------------------%
438
c
439
betaj = rzero
440
nrstrt = nrstrt + 1
441
itry = 1
442
20 continue
443
rstart = .true.
444
ido = 0
445
30 continue
446
c
447
c %--------------------------------------%
448
c | If in reverse communication mode and |
449
c | RSTART = .true. flow returns here. |
450
c %--------------------------------------%
451
c
452
call pcgetv0 (comm, ido, bmat, itry, .false., n, j, v, ldv,
453
& resid, rnorm, ipntr, workd, workl, ierr)
454
if (ido .ne. 99) go to 9000
455
if (ierr .lt. 0) then
456
itry = itry + 1
457
if (itry .le. 3) go to 20
458
c
459
c %------------------------------------------------%
460
c | Give up after several restart attempts. |
461
c | Set INFO to the size of the invariant subspace |
462
c | which spans OP and exit. |
463
c %------------------------------------------------%
464
c
465
info = j - 1
466
call second (t1)
467
tcaitr = tcaitr + (t1 - t0)
468
ido = 99
469
go to 9000
470
end if
471
c
472
40 continue
473
c
474
c %---------------------------------------------------------%
475
c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm |
476
c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
477
c | when reciprocating a small RNORM, test against lower |
478
c | machine bound. |
479
c %---------------------------------------------------------%
480
c
481
call ccopy (n, resid, 1, v(1,j), 1)
482
if ( rnorm .ge. unfl) then
483
temp1 = rone / rnorm
484
call csscal (n, temp1, v(1,j), 1)
485
call csscal (n, temp1, workd(ipj), 1)
486
else
487
c
488
c %-----------------------------------------%
489
c | To scale both v_{j} and p_{j} carefully |
490
c | use LAPACK routine clascl |
491
c %-----------------------------------------%
492
c
493
call clascl ('General', i, i, rnorm, rone,
494
& n, 1, v(1,j), n, infol)
495
call clascl ('General', i, i, rnorm, rone,
496
& n, 1, workd(ipj), n, infol)
497
end if
498
c
499
c %------------------------------------------------------%
500
c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
501
c | Note that this is not quite yet r_{j}. See STEP 4 |
502
c %------------------------------------------------------%
503
c
504
step3 = .true.
505
nopx = nopx + 1
506
call second (t2)
507
call ccopy (n, v(1,j), 1, workd(ivj), 1)
508
ipntr(1) = ivj
509
ipntr(2) = irj
510
ipntr(3) = ipj
511
ido = 1
512
c
513
c %-----------------------------------%
514
c | Exit in order to compute OP*v_{j} |
515
c %-----------------------------------%
516
c
517
go to 9000
518
50 continue
519
c
520
c %----------------------------------%
521
c | Back from reverse communication; |
522
c | WORKD(IRJ:IRJ+N-1) := OP*v_{j} |
523
c | if step3 = .true. |
524
c %----------------------------------%
525
c
526
call second (t3)
527
tmvopx = tmvopx + (t3 - t2)
528
529
step3 = .false.
530
c
531
c %------------------------------------------%
532
c | Put another copy of OP*v_{j} into RESID. |
533
c %------------------------------------------%
534
c
535
call ccopy (n, workd(irj), 1, resid, 1)
536
c
537
c %---------------------------------------%
538
c | STEP 4: Finish extending the Arnoldi |
539
c | factorization to length j. |
540
c %---------------------------------------%
541
c
542
call second (t2)
543
if (bmat .eq. 'G') then
544
nbx = nbx + 1
545
step4 = .true.
546
ipntr(1) = irj
547
ipntr(2) = ipj
548
ido = 2
549
c
550
c %-------------------------------------%
551
c | Exit in order to compute B*OP*v_{j} |
552
c %-------------------------------------%
553
c
554
go to 9000
555
else if (bmat .eq. 'I') then
556
call ccopy (n, resid, 1, workd(ipj), 1)
557
end if
558
60 continue
559
c
560
c %----------------------------------%
561
c | Back from reverse communication; |
562
c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j} |
563
c | if step4 = .true. |
564
c %----------------------------------%
565
c
566
if (bmat .eq. 'G') then
567
call second (t3)
568
tmvbx = tmvbx + (t3 - t2)
569
end if
570
c
571
step4 = .false.
572
c
573
c %-------------------------------------%
574
c | The following is needed for STEP 5. |
575
c | Compute the B-norm of OP*v_{j}. |
576
c %-------------------------------------%
577
c
578
if (bmat .eq. 'G') then
579
cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
580
call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
581
& MPI_COMPLEX, MPI_SUM, comm, ierr )
582
wnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
583
else if (bmat .eq. 'I') then
584
wnorm = pscnorm2(comm, n, resid, 1)
585
end if
586
c
587
c %-----------------------------------------%
588
c | Compute the j-th residual corresponding |
589
c | to the j step factorization. |
590
c | Use Classical Gram Schmidt and compute: |
591
c | w_{j} <- V_{j}^T * B * OP * v_{j} |
592
c | r_{j} <- OP*v_{j} - V_{j} * w_{j} |
593
c %-----------------------------------------%
594
c
595
c
596
c %------------------------------------------%
597
c | Compute the j Fourier coefficients w_{j} |
598
c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. |
599
c %------------------------------------------%
600
c
601
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
602
& zero, workl, 1)
603
call MPI_ALLREDUCE( workl, h(1,j), j,
604
& MPI_COMPLEX, MPI_SUM, comm, ierr)
605
c
606
c %--------------------------------------%
607
c | Orthogonalize r_{j} against V_{j}. |
608
c | RESID contains OP*v_{j}. See STEP 3. |
609
c %--------------------------------------%
610
c
611
call cgemv ('N', n, j, -one, v, ldv, h(1,j), 1,
612
& one, resid, 1)
613
c
614
if (j .gt. 1) h(j,j-1) = cmplx(betaj, rzero)
615
c
616
call second (t4)
617
c
618
orth1 = .true.
619
c
620
call second (t2)
621
if (bmat .eq. 'G') then
622
nbx = nbx + 1
623
call ccopy (n, resid, 1, workd(irj), 1)
624
ipntr(1) = irj
625
ipntr(2) = ipj
626
ido = 2
627
c
628
c %----------------------------------%
629
c | Exit in order to compute B*r_{j} |
630
c %----------------------------------%
631
c
632
go to 9000
633
else if (bmat .eq. 'I') then
634
call ccopy (n, resid, 1, workd(ipj), 1)
635
end if
636
70 continue
637
c
638
c %---------------------------------------------------%
639
c | Back from reverse communication if ORTH1 = .true. |
640
c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. |
641
c %---------------------------------------------------%
642
c
643
if (bmat .eq. 'G') then
644
call second (t3)
645
tmvbx = tmvbx + (t3 - t2)
646
end if
647
c
648
orth1 = .false.
649
c
650
c %------------------------------%
651
c | Compute the B-norm of r_{j}. |
652
c %------------------------------%
653
c
654
if (bmat .eq. 'G') then
655
cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
656
call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
657
& MPI_COMPLEX, MPI_SUM, comm, ierr )
658
rnorm = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
659
else if (bmat .eq. 'I') then
660
rnorm = pscnorm2(comm, n, resid, 1)
661
end if
662
c
663
c %-----------------------------------------------------------%
664
c | STEP 5: Re-orthogonalization / Iterative refinement phase |
665
c | Maximum NITER_ITREF tries. |
666
c | |
667
c | s = V_{j}^T * B * r_{j} |
668
c | r_{j} = r_{j} - V_{j}*s |
669
c | alphaj = alphaj + s_{j} |
670
c | |
671
c | The stopping criteria used for iterative refinement is |
672
c | discussed in Parlett's book SEP, page 107 and in Gragg & |
673
c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. |
674
c | Determine if we need to correct the residual. The goal is |
675
c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || |
676
c | The following test determines whether the sine of the |
677
c | angle between OP*x and the computed residual is less |
678
c | than or equal to 0.717. |
679
c %-----------------------------------------------------------%
680
c
681
if ( rnorm .gt. 0.717*wnorm ) go to 100
682
c
683
iter = 0
684
nrorth = nrorth + 1
685
c
686
c %---------------------------------------------------%
687
c | Enter the Iterative refinement phase. If further |
688
c | refinement is necessary, loop back here. The loop |
689
c | variable is ITER. Perform a step of Classical |
690
c | Gram-Schmidt using all the Arnoldi vectors V_{j} |
691
c %---------------------------------------------------%
692
c
693
80 continue
694
c
695
if (msglvl .gt. 2) then
696
rtemp(1) = wnorm
697
rtemp(2) = rnorm
698
call psvout (comm, logfil, 2, rtemp, ndigit,
699
& '_naitr: re-orthogonalization; wnorm and rnorm are')
700
call pcvout (comm, logfil, j, h(1,j), ndigit,
701
& '_naitr: j-th column of H')
702
end if
703
c
704
c %----------------------------------------------------%
705
c | Compute V_{j}^T * B * r_{j}. |
706
c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
707
c %----------------------------------------------------%
708
c
709
call cgemv ('C', n, j, one, v, ldv, workd(ipj), 1,
710
& zero, workl(j+1), 1)
711
call MPI_ALLREDUCE( workl(j+1), workl(1), j,
712
& MPI_COMPLEX, MPI_SUM, comm, ierr)
713
c
714
c %---------------------------------------------%
715
c | Compute the correction to the residual: |
716
c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
717
c | The correction to H is v(:,1:J)*H(1:J,1:J) |
718
c | + v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j. |
719
c %---------------------------------------------%
720
c
721
call cgemv ('N', n, j, -one, v, ldv, workl(1), 1,
722
& one, resid, 1)
723
call caxpy (j, one, workl(1), 1, h(1,j), 1)
724
c
725
orth2 = .true.
726
call second (t2)
727
if (bmat .eq. 'G') then
728
nbx = nbx + 1
729
call ccopy (n, resid, 1, workd(irj), 1)
730
ipntr(1) = irj
731
ipntr(2) = ipj
732
ido = 2
733
c
734
c %-----------------------------------%
735
c | Exit in order to compute B*r_{j}. |
736
c | r_{j} is the corrected residual. |
737
c %-----------------------------------%
738
c
739
go to 9000
740
else if (bmat .eq. 'I') then
741
call ccopy (n, resid, 1, workd(ipj), 1)
742
end if
743
90 continue
744
c
745
c %---------------------------------------------------%
746
c | Back from reverse communication if ORTH2 = .true. |
747
c %---------------------------------------------------%
748
c
749
if (bmat .eq. 'G') then
750
call second (t3)
751
tmvbx = tmvbx + (t3 - t2)
752
end if
753
c
754
c %-----------------------------------------------------%
755
c | Compute the B-norm of the corrected residual r_{j}. |
756
c %-----------------------------------------------------%
757
c
758
if (bmat .eq. 'G') then
759
cnorm_buf = cdotc (n, resid, 1, workd(ipj), 1)
760
call MPI_ALLREDUCE( cnorm_buf, cnorm, 1,
761
& MPI_COMPLEX, MPI_SUM, comm, ierr )
762
rnorm1 = sqrt( slapy2(real(cnorm),aimag(cnorm)) )
763
else if (bmat .eq. 'I') then
764
rnorm1 = pscnorm2(comm, n, resid, 1)
765
end if
766
c
767
if (msglvl .gt. 0 .and. iter .gt. 0 ) then
768
call pivout (comm, logfil, 1, [j], ndigit,
769
& '_naitr: Iterative refinement for Arnoldi residual')
770
if (msglvl .gt. 2) then
771
rtemp(1) = rnorm
772
rtemp(2) = rnorm1
773
call psvout (comm, logfil, 2, rtemp, ndigit,
774
& '_naitr: iterative refinement ; rnorm and rnorm1 are')
775
end if
776
end if
777
c
778
c %-----------------------------------------%
779
c | Determine if we need to perform another |
780
c | step of re-orthogonalization. |
781
c %-----------------------------------------%
782
c
783
if ( rnorm1 .gt. 0.717*rnorm ) then
784
c
785
c %---------------------------------------%
786
c | No need for further refinement. |
787
c | The cosine of the angle between the |
788
c | corrected residual vector and the old |
789
c | residual vector is greater than 0.717 |
790
c | In other words the corrected residual |
791
c | and the old residual vector share an |
792
c | angle of less than arcCOS(0.717) |
793
c %---------------------------------------%
794
c
795
rnorm = rnorm1
796
c
797
else
798
c
799
c %-------------------------------------------%
800
c | Another step of iterative refinement step |
801
c | is required. NITREF is used by stat.h |
802
c %-------------------------------------------%
803
c
804
nitref = nitref + 1
805
rnorm = rnorm1
806
iter = iter + 1
807
if (iter .le. 1) go to 80
808
c
809
c %-------------------------------------------------%
810
c | Otherwise RESID is numerically in the span of V |
811
c %-------------------------------------------------%
812
c
813
do 95 jj = 1, n
814
resid(jj) = zero
815
95 continue
816
rnorm = rzero
817
end if
818
c
819
c %----------------------------------------------%
820
c | Branch here directly if iterative refinement |
821
c | wasn't necessary or after at most NITER_REF |
822
c | steps of iterative refinement. |
823
c %----------------------------------------------%
824
c
825
100 continue
826
c
827
rstart = .false.
828
orth2 = .false.
829
c
830
call second (t5)
831
titref = titref + (t5 - t4)
832
c
833
c %------------------------------------%
834
c | STEP 6: Update j = j+1; Continue |
835
c %------------------------------------%
836
c
837
j = j + 1
838
if (j .gt. k+np) then
839
call second (t1)
840
tcaitr = tcaitr + (t1 - t0)
841
ido = 99
842
do 110 i = max(1,k), k+np-1
843
c
844
c %--------------------------------------------%
845
c | Check for splitting and deflation. |
846
c | Use a standard test as in the QR algorithm |
847
c | REFERENCE: LAPACK subroutine clahqr |
848
c %--------------------------------------------%
849
c
850
tst1 = slapy2(real(h(i,i)),aimag(h(i,i)))
851
& + slapy2(real(h(i+1,i+1)), aimag(h(i+1,i+1)))
852
if( tst1.eq.real(zero) )
853
& tst1 = clanhs( '1', k+np, h, ldh, workd(n+1) )
854
if( slapy2(real(h(i+1,i)),aimag(h(i+1,i))) .le.
855
& max( ulp*tst1, smlnum ) )
856
& h(i+1,i) = zero
857
110 continue
858
c
859
if (msglvl .gt. 2) then
860
call pcmout (comm, logfil, k+np, k+np, h, ldh, ndigit,
861
& '_naitr: Final upper Hessenberg matrix H of order K+NP')
862
end if
863
c
864
go to 9000
865
end if
866
c
867
c %--------------------------------------------------------%
868
c | Loop back to extend the factorization by another step. |
869
c %--------------------------------------------------------%
870
c
871
go to 1000
872
c
873
c %---------------------------------------------------------------%
874
c | |
875
c | E N D O F M A I N I T E R A T I O N L O O P |
876
c | |
877
c %---------------------------------------------------------------%
878
c
879
9000 continue
880
return
881
c
882
c %----------------%
883
c | End of pcnaitr |
884
c %----------------%
885
c
886
end
887
888