Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsaitr.f
5218 views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsaitr
5
c
6
c Message Passing Layer: MPI
7
c
8
c\Description:
9
c Reverse communication interface for applying NP additional steps to
10
c a K step symmetric Arnoldi factorization.
11
c
12
c Input: OP*V_{k} - V_{k}*H = r_{k}*e_{k}^T
13
c
14
c with (V_{k}^T)*B*V_{k} = I, (V_{k}^T)*B*r_{k} = 0.
15
c
16
c Output: OP*V_{k+p} - V_{k+p}*H = r_{k+p}*e_{k+p}^T
17
c
18
c with (V_{k+p}^T)*B*V_{k+p} = I, (V_{k+p}^T)*B*r_{k+p} = 0.
19
c
20
c where OP and B are as in pdsaupd. The B-norm of r_{k+p} is also
21
c computed and returned.
22
c
23
c\Usage:
24
c call pdsaitr
25
c ( COMM, IDO, BMAT, N, K, NP, MODE, RESID, RNORM, V, LDV, H, LDH,
26
c IPNTR, WORKD, WORKL, INFO )
27
c
28
c\Arguments
29
c COMM MPI Communicator for the processor grid. (INPUT)
30
c
31
c IDO Integer. (INPUT/OUTPUT)
32
c Reverse communication flag.
33
c -------------------------------------------------------------
34
c IDO = 0: first call to the reverse communication interface
35
c IDO = -1: compute Y = OP * X where
36
c IPNTR(1) is the pointer into WORK for X,
37
c IPNTR(2) is the pointer into WORK for Y.
38
c This is for the restart phase to force the new
39
c starting vector into the range of OP.
40
c IDO = 1: compute Y = OP * X where
41
c IPNTR(1) is the pointer into WORK for X,
42
c IPNTR(2) is the pointer into WORK for Y,
43
c IPNTR(3) is the pointer into WORK for B * X.
44
c IDO = 2: compute Y = B * X where
45
c IPNTR(1) is the pointer into WORK for X,
46
c IPNTR(2) is the pointer into WORK for Y.
47
c IDO = 99: done
48
c -------------------------------------------------------------
49
c When the routine is used in the "shift-and-invert" mode, the
50
c vector B * Q is already available and does not need to be
51
c recomputed in forming OP * Q.
52
c
53
c BMAT Character*1. (INPUT)
54
c BMAT specifies the type of matrix B that defines the
55
c semi-inner product for the operator OP. See pdsaupd.
56
c B = 'I' -> standard eigenvalue problem A*x = lambda*x
57
c B = 'G' -> generalized eigenvalue problem A*x = lambda*M*x
58
c
59
c N Integer. (INPUT)
60
c Dimension of the eigenproblem.
61
c
62
c K Integer. (INPUT)
63
c Current order of H and the number of columns of V.
64
c
65
c NP Integer. (INPUT)
66
c Number of additional Arnoldi steps to take.
67
c
68
c MODE Integer. (INPUT)
69
c Signifies which form for "OP". If MODE=2 then
70
c a reduction in the number of B matrix vector multiplies
71
c is possible since the B-norm of OP*x is equivalent to
72
c the inv(B)-norm of A*x.
73
c
74
c RESID Double precision array of length N. (INPUT/OUTPUT)
75
c On INPUT: RESID contains the residual vector r_{k}.
76
c On OUTPUT: RESID contains the residual vector r_{k+p}.
77
c
78
c RNORM Double precision scalar. (INPUT/OUTPUT)
79
c On INPUT the B-norm of r_{k}.
80
c On OUTPUT the B-norm of the updated residual r_{k+p}.
81
c
82
c V Double precision N by K+NP array. (INPUT/OUTPUT)
83
c On INPUT: V contains the Arnoldi vectors in the first K
84
c columns.
85
c On OUTPUT: V contains the new NP Arnoldi vectors in the next
86
c NP columns. The first K columns are unchanged.
87
c
88
c LDV Integer. (INPUT)
89
c Leading dimension of V exactly as declared in the calling
90
c program.
91
c
92
c H Double precision (K+NP) by 2 array. (INPUT/OUTPUT)
93
c H is used to store the generated symmetric tridiagonal matrix
94
c with the subdiagonal in the first column starting at H(2,1)
95
c and the main diagonal in the second column.
96
c
97
c LDH Integer. (INPUT)
98
c Leading dimension of H exactly as declared in the calling
99
c program.
100
c
101
c IPNTR Integer array of length 3. (OUTPUT)
102
c Pointer to mark the starting locations in the WORK for
103
c vectors used by the Arnoldi iteration.
104
c -------------------------------------------------------------
105
c IPNTR(1): pointer to the current operand vector X.
106
c IPNTR(2): pointer to the current result vector Y.
107
c IPNTR(3): pointer to the vector B * X when used in the
108
c shift-and-invert mode. X is the current operand.
109
c -------------------------------------------------------------
110
c
111
c WORKD Double precision work array of length 3*N. (REVERSE COMMUNICATION)
112
c Distributed array to be used in the basic Arnoldi iteration
113
c for reverse communication. The calling program should not
114
c use WORKD as temporary workspace during the iteration !!!!!!
115
c On INPUT, WORKD(1:N) = B*RESID where RESID is associated
116
c with the K step Arnoldi factorization. Used to save some
117
c computation at the first step.
118
c On OUTPUT, WORKD(1:N) = B*RESID where RESID is associated
119
c with the K+NP step Arnoldi factorization.
120
c
121
c WORKL Double precision work space used for Gram Schmidt orthogonalization
122
c
123
c INFO Integer. (OUTPUT)
124
c = 0: Normal exit.
125
c > 0: Size of an invariant subspace of OP is found that is
126
c less than K + NP.
127
c
128
c\EndDoc
129
c
130
c-----------------------------------------------------------------------
131
c
132
c\BeginLib
133
c
134
c\Local variables:
135
c xxxxxx real
136
c
137
c\Routines called:
138
c pdgetv0 Parallel ARPACK routine to generate the initial vector.
139
c pivout Parallel ARPACK utility routine that prints integers.
140
c pdmout Parallel ARPACK utility routine that prints matrices.
141
c pdvout Parallel ARPACK utility routine that prints vectors.
142
c pdlamch ScaLAPACK routine that determines machine constants.
143
c dlascl LAPACK routine for careful scaling of a matrix.
144
c dgemv Level 2 BLAS routine for matrix vector multiplication.
145
c daxpy Level 1 BLAS that computes a vector triad.
146
c dscal Level 1 BLAS that scales a vector.
147
c dcopy Level 1 BLAS that copies one vector to another .
148
c ddot Level 1 BLAS that computes the scalar product of two vectors.
149
c pdnorm2 Parallel version of Level 1 BLAS that computes the norm of a vector.
150
c
151
c\Author
152
c Danny Sorensen Phuong Vu
153
c Richard Lehoucq CRPC / Rice University
154
c Dept. of Computational & Houston, Texas
155
c Applied Mathematics
156
c Rice University
157
c Houston, Texas
158
c
159
c\Parallel Modifications
160
c Kristi Maschhoff
161
c
162
c\Revision history:
163
c Starting Point: Serial Code FILE: saitr.F SID: 2.3
164
c
165
c\SCCS Information:
166
c FILE: saitr.F SID: 1.3 DATE OF SID: 3/19/97
167
c
168
c\Remarks
169
c The algorithm implemented is:
170
c
171
c restart = .false.
172
c Given V_{k} = [v_{1}, ..., v_{k}], r_{k};
173
c r_{k} contains the initial residual vector even for k = 0;
174
c Also assume that rnorm = || B*r_{k} || and B*r_{k} are already
175
c computed by the calling program.
176
c
177
c betaj = rnorm ; p_{k+1} = B*r_{k} ;
178
c For j = k+1, ..., k+np Do
179
c 1) if ( betaj < tol ) stop or restart depending on j.
180
c if ( restart ) generate a new starting vector.
181
c 2) v_{j} = r(j-1)/betaj; V_{j} = [V_{j-1}, v_{j}];
182
c p_{j} = p_{j}/betaj
183
c 3) r_{j} = OP*v_{j} where OP is defined as in pdsaupd
184
c For shift-invert mode p_{j} = B*v_{j} is already available.
185
c wnorm = || OP*v_{j} ||
186
c 4) Compute the j-th step residual vector.
187
c w_{j} = V_{j}^T * B * OP * v_{j}
188
c r_{j} = OP*v_{j} - V_{j} * w_{j}
189
c alphaj <- j-th component of w_{j}
190
c rnorm = || r_{j} ||
191
c betaj+1 = rnorm
192
c If (rnorm > 0.717*wnorm) accept step and go back to 1)
193
c 5) Re-orthogonalization step:
194
c s = V_{j}'*B*r_{j}
195
c r_{j} = r_{j} - V_{j}*s; rnorm1 = || r_{j} ||
196
c alphaj = alphaj + s_{j};
197
c 6) Iterative refinement step:
198
c If (rnorm1 > 0.717*rnorm) then
199
c rnorm = rnorm1
200
c accept step and go back to 1)
201
c Else
202
c rnorm = rnorm1
203
c If this is the first time in step 6), go to 5)
204
c Else r_{j} lies in the span of V_{j} numerically.
205
c Set r_{j} = 0 and rnorm = 0; go to 1)
206
c EndIf
207
c End Do
208
c
209
c\EndLib
210
c
211
c-----------------------------------------------------------------------
212
c
213
subroutine pdsaitr
214
& (comm, ido, bmat, n, k, np, mode, resid, rnorm, v, ldv, h, ldh,
215
& ipntr, workd, workl, info)
216
c
217
include 'mpif.h'
218
c
219
c %---------------%
220
c | MPI Variables |
221
c %---------------%
222
c
223
integer comm
224
c
225
c %----------------------------------------------------%
226
c | Include files for debugging and timing information |
227
c %----------------------------------------------------%
228
c
229
include 'debug.h'
230
include 'stat.h'
231
c
232
c %------------------%
233
c | Scalar Arguments |
234
c %------------------%
235
c
236
character bmat*1
237
integer ido, info, k, ldh, ldv, n, mode, np
238
Double precision
239
& rnorm
240
c
241
c %-----------------%
242
c | Array Arguments |
243
c %-----------------%
244
c
245
integer ipntr(3)
246
Double precision
247
& h(ldh,2), resid(n), v(ldv,k+np), workd(3*n),
248
& workl(2*ldh)
249
c
250
c %------------%
251
c | Parameters |
252
c %------------%
253
c
254
Double precision
255
& one, zero
256
parameter (one = 1.0, zero = 0.0)
257
c
258
c %---------------%
259
c | Local Scalars |
260
c %---------------%
261
c
262
logical first, orth1, orth2, rstart, step3, step4
263
integer i, ierr, ipj, irj, ivj, iter, itry, j, msglvl, infol,
264
& jj
265
Double precision
266
& rnorm1, wnorm, safmin, temp1
267
save orth1, orth2, rstart, step3, step4,
268
& ierr, ipj, irj, ivj, iter, itry, j, msglvl,
269
& rnorm1, safmin, wnorm
270
c
271
Double precision
272
& rnorm_buf
273
c
274
c %-----------------------%
275
c | Local Array Arguments |
276
c %-----------------------%
277
c
278
Double precision
279
& xtemp(2)
280
c
281
c %----------------------%
282
c | External Subroutines |
283
c %----------------------%
284
c
285
external daxpy, dcopy, dscal, dgemv, pdgetv0, pdvout, pdmout,
286
& dlascl, pivout, second
287
c
288
c %--------------------%
289
c | External Functions |
290
c %--------------------%
291
c
292
Double precision
293
& ddot, pdnorm2, pdlamch10
294
external ddot, pdnorm2, pdlamch10
295
c
296
c %-----------------%
297
c | Data statements |
298
c %-----------------%
299
c
300
data first / .true. /
301
c
302
c
303
c %---------------------%
304
c | Intrinsic Functions |
305
c %---------------------%
306
c
307
intrinsic abs, sqrt
308
c
309
c %-----------------------%
310
c | Executable Statements |
311
c %-----------------------%
312
c
313
if (first) then
314
first = .false.
315
c
316
c %--------------------------------%
317
c | safmin = safe minimum is such |
318
c | that 1/sfmin does not overflow |
319
c %--------------------------------%
320
c
321
safmin = pdlamch10(comm,'safmin')
322
end if
323
c
324
if (ido .eq. 0) then
325
c
326
c %-------------------------------%
327
c | Initialize timing statistics |
328
c | & message level for debugging |
329
c %-------------------------------%
330
c
331
call second (t0)
332
msglvl = msaitr
333
c
334
c %------------------------------%
335
c | Initial call to this routine |
336
c %------------------------------%
337
c
338
info = 0
339
step3 = .false.
340
step4 = .false.
341
rstart = .false.
342
orth1 = .false.
343
orth2 = .false.
344
c
345
c %--------------------------------%
346
c | Pointer to the current step of |
347
c | the factorization to build |
348
c %--------------------------------%
349
c
350
j = k + 1
351
c
352
c %------------------------------------------%
353
c | Pointers used for reverse communication |
354
c | when using WORKD. |
355
c %------------------------------------------%
356
c
357
ipj = 1
358
irj = ipj + n
359
ivj = irj + n
360
end if
361
c
362
c %-------------------------------------------------%
363
c | When in reverse communication mode one of: |
364
c | STEP3, STEP4, ORTH1, ORTH2, RSTART |
365
c | will be .true. |
366
c | STEP3: return from computing OP*v_{j}. |
367
c | STEP4: return from computing B-norm of OP*v_{j} |
368
c | ORTH1: return from computing B-norm of r_{j+1} |
369
c | ORTH2: return from computing B-norm of |
370
c | correction to the residual vector. |
371
c | RSTART: return from OP computations needed by |
372
c | pdgetv0. |
373
c %-------------------------------------------------%
374
c
375
if (step3) go to 50
376
if (step4) go to 60
377
if (orth1) go to 70
378
if (orth2) go to 90
379
if (rstart) go to 30
380
c
381
c %------------------------------%
382
c | Else this is the first step. |
383
c %------------------------------%
384
c
385
c %--------------------------------------------------------------%
386
c | |
387
c | A R N O L D I I T E R A T I O N L O O P |
388
c | |
389
c | Note: B*r_{j-1} is already in WORKD(1:N)=WORKD(IPJ:IPJ+N-1) |
390
c %--------------------------------------------------------------%
391
c
392
1000 continue
393
c
394
if (msglvl .gt. 2) then
395
call pivout (comm, logfil, 1, [j], ndigit,
396
& '_saitr: generating Arnoldi vector no.')
397
call pdvout (comm, logfil, 1, [rnorm], ndigit,
398
& '_saitr: B-norm of the current residual =')
399
end if
400
c
401
c %---------------------------------------------------------%
402
c | Check for exact zero. Equivalent to determing whether a |
403
c | j-step Arnoldi factorization is present. |
404
c %---------------------------------------------------------%
405
c
406
if (rnorm .gt. zero) go to 40
407
c
408
c %---------------------------------------------------%
409
c | Invariant subspace found, generate a new starting |
410
c | vector which is orthogonal to the current Arnoldi |
411
c | basis and continue the iteration. |
412
c %---------------------------------------------------%
413
c
414
if (msglvl .gt. 0) then
415
call pivout (comm, logfil, 1, [j], ndigit,
416
& '_saitr: ****** restart at step ******')
417
end if
418
c
419
c %---------------------------------------------%
420
c | ITRY is the loop variable that controls the |
421
c | maximum amount of times that a restart is |
422
c | attempted. NRSTRT is used by stat.h |
423
c %---------------------------------------------%
424
c
425
nrstrt = nrstrt + 1
426
itry = 1
427
20 continue
428
rstart = .true.
429
ido = 0
430
30 continue
431
c
432
c %--------------------------------------%
433
c | If in reverse communication mode and |
434
c | RSTART = .true. flow returns here. |
435
c %--------------------------------------%
436
c
437
call pdgetv0 (comm, ido, bmat, itry, .false., n, j, v, ldv,
438
& resid, rnorm, ipntr, workd, workl, ierr)
439
if (ido .ne. 99) go to 9000
440
if (ierr .lt. 0) then
441
itry = itry + 1
442
if (itry .le. 3) go to 20
443
c
444
c %------------------------------------------------%
445
c | Give up after several restart attempts. |
446
c | Set INFO to the size of the invariant subspace |
447
c | which spans OP and exit. |
448
c %------------------------------------------------%
449
c
450
info = j - 1
451
call second (t1)
452
tsaitr = tsaitr + (t1 - t0)
453
ido = 99
454
go to 9000
455
end if
456
c
457
40 continue
458
c
459
c %---------------------------------------------------------%
460
c | STEP 2: v_{j} = r_{j-1}/rnorm and p_{j} = p_{j}/rnorm |
461
c | Note that p_{j} = B*r_{j-1}. In order to avoid overflow |
462
c | when reciprocating a small RNORM, test against lower |
463
c | machine bound. |
464
c %---------------------------------------------------------%
465
c
466
call dcopy (n, resid, 1, v(1,j), 1)
467
if (rnorm .ge. safmin) then
468
temp1 = one / rnorm
469
call dscal (n, temp1, v(1,j), 1)
470
call dscal (n, temp1, workd(ipj), 1)
471
else
472
c
473
c %-----------------------------------------%
474
c | To scale both v_{j} and p_{j} carefully |
475
c | use LAPACK routine SLASCL |
476
c %-----------------------------------------%
477
c
478
call dlascl ('General', i, i, rnorm, one, n, 1,
479
& v(1,j), n, infol)
480
call dlascl ('General', i, i, rnorm, one, n, 1,
481
& workd(ipj), n, infol)
482
end if
483
c
484
c %------------------------------------------------------%
485
c | STEP 3: r_{j} = OP*v_{j}; Note that p_{j} = B*v_{j} |
486
c | Note that this is not quite yet r_{j}. See STEP 4 |
487
c %------------------------------------------------------%
488
c
489
step3 = .true.
490
nopx = nopx + 1
491
call second (t2)
492
call dcopy (n, v(1,j), 1, workd(ivj), 1)
493
ipntr(1) = ivj
494
ipntr(2) = irj
495
ipntr(3) = ipj
496
ido = 1
497
c
498
c %-----------------------------------%
499
c | Exit in order to compute OP*v_{j} |
500
c %-----------------------------------%
501
c
502
go to 9000
503
50 continue
504
c
505
c %-----------------------------------%
506
c | Back from reverse communication; |
507
c | WORKD(IRJ:IRJ+N-1) := OP*v_{j}. |
508
c %-----------------------------------%
509
c
510
call second (t3)
511
tmvopx = tmvopx + (t3 - t2)
512
c
513
step3 = .false.
514
c
515
c %------------------------------------------%
516
c | Put another copy of OP*v_{j} into RESID. |
517
c %------------------------------------------%
518
c
519
call dcopy (n, workd(irj), 1, resid, 1)
520
c
521
c %-------------------------------------------%
522
c | STEP 4: Finish extending the symmetric |
523
c | Arnoldi to length j. If MODE = 2 |
524
c | then B*OP = B*inv(B)*A = A and |
525
c | we don't need to compute B*OP. |
526
c | NOTE: If MODE = 2 WORKD(IVJ:IVJ+N-1) is |
527
c | assumed to have A*v_{j}. |
528
c %-------------------------------------------%
529
c
530
if (mode .eq. 2) go to 65
531
call second (t2)
532
if (bmat .eq. 'G') then
533
nbx = nbx + 1
534
step4 = .true.
535
ipntr(1) = irj
536
ipntr(2) = ipj
537
ido = 2
538
c
539
c %-------------------------------------%
540
c | Exit in order to compute B*OP*v_{j} |
541
c %-------------------------------------%
542
c
543
go to 9000
544
else if (bmat .eq. 'I') then
545
call dcopy(n, resid, 1 , workd(ipj), 1)
546
end if
547
60 continue
548
c
549
c %-----------------------------------%
550
c | Back from reverse communication; |
551
c | WORKD(IPJ:IPJ+N-1) := B*OP*v_{j}. |
552
c %-----------------------------------%
553
c
554
if (bmat .eq. 'G') then
555
call second (t3)
556
tmvbx = tmvbx + (t3 - t2)
557
end if
558
c
559
step4 = .false.
560
c
561
c %-------------------------------------%
562
c | The following is needed for STEP 5. |
563
c | Compute the B-norm of OP*v_{j}. |
564
c %-------------------------------------%
565
c
566
65 continue
567
if (mode .eq. 2) then
568
c
569
c %----------------------------------%
570
c | Note that the B-norm of OP*v_{j} |
571
c | is the inv(B)-norm of A*v_{j}. |
572
c %----------------------------------%
573
c
574
rnorm_buf = ddot (n, resid, 1, workd(ivj), 1)
575
call MPI_ALLREDUCE( rnorm_buf, wnorm, 1,
576
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
577
wnorm = sqrt(abs(wnorm))
578
else if (bmat .eq. 'G') then
579
rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
580
call MPI_ALLREDUCE( rnorm_buf, wnorm, 1,
581
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
582
wnorm = sqrt(abs(wnorm))
583
else if (bmat .eq. 'I') then
584
wnorm = pdnorm2( comm, n, resid, 1 )
585
end if
586
c
587
c %-----------------------------------------%
588
c | Compute the j-th residual corresponding |
589
c | to the j step factorization. |
590
c | Use Classical Gram Schmidt and compute: |
591
c | w_{j} <- V_{j}^T * B * OP * v_{j} |
592
c | r_{j} <- OP*v_{j} - V_{j} * w_{j} |
593
c %-----------------------------------------%
594
c
595
c
596
c %------------------------------------------%
597
c | Compute the j Fourier coefficients w_{j} |
598
c | WORKD(IPJ:IPJ+N-1) contains B*OP*v_{j}. |
599
c %------------------------------------------%
600
c
601
if (mode .ne. 2 ) then
602
call dgemv('T', n, j, one, v, ldv, workd(ipj), 1, zero,
603
& workl(j+1), 1)
604
call MPI_ALLREDUCE( workl(j+1), workl(1), j,
605
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
606
else if (mode .eq. 2) then
607
call dgemv('T', n, j, one, v, ldv, workd(ivj), 1, zero,
608
& workl(j+1), 1)
609
call MPI_ALLREDUCE( workl(j+1), workl(1), j,
610
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
611
end if
612
c
613
c %--------------------------------------%
614
c | Orthgonalize r_{j} against V_{j}. |
615
c | RESID contains OP*v_{j}. See STEP 3. |
616
c %--------------------------------------%
617
c
618
call dgemv('N', n, j, -one, v, ldv, workl(1), 1, one,
619
& resid, 1)
620
c
621
c %--------------------------------------%
622
c | Extend H to have j rows and columns. |
623
c %--------------------------------------%
624
c
625
h(j,2) = workl(j)
626
if (j .eq. 1 .or. rstart) then
627
h(j,1) = zero
628
else
629
h(j,1) = rnorm
630
end if
631
call second (t4)
632
c
633
orth1 = .true.
634
iter = 0
635
c
636
call second (t2)
637
if (bmat .eq. 'G') then
638
nbx = nbx + 1
639
call dcopy (n, resid, 1, workd(irj), 1)
640
ipntr(1) = irj
641
ipntr(2) = ipj
642
ido = 2
643
c
644
c %----------------------------------%
645
c | Exit in order to compute B*r_{j} |
646
c %----------------------------------%
647
c
648
go to 9000
649
else if (bmat .eq. 'I') then
650
call dcopy (n, resid, 1, workd(ipj), 1)
651
end if
652
70 continue
653
c
654
c %---------------------------------------------------%
655
c | Back from reverse communication if ORTH1 = .true. |
656
c | WORKD(IPJ:IPJ+N-1) := B*r_{j}. |
657
c %---------------------------------------------------%
658
c
659
if (bmat .eq. 'G') then
660
call second (t3)
661
tmvbx = tmvbx + (t3 - t2)
662
end if
663
c
664
orth1 = .false.
665
c
666
c %------------------------------%
667
c | Compute the B-norm of r_{j}. |
668
c %------------------------------%
669
c
670
if (bmat .eq. 'G') then
671
rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
672
call MPI_ALLREDUCE( rnorm_buf, rnorm, 1,
673
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
674
rnorm = sqrt(abs(rnorm))
675
else if (bmat .eq. 'I') then
676
rnorm = pdnorm2( comm, n, resid, 1 )
677
end if
678
c
679
c %-----------------------------------------------------------%
680
c | STEP 5: Re-orthogonalization / Iterative refinement phase |
681
c | Maximum NITER_ITREF tries. |
682
c | |
683
c | s = V_{j}^T * B * r_{j} |
684
c | r_{j} = r_{j} - V_{j}*s |
685
c | alphaj = alphaj + s_{j} |
686
c | |
687
c | The stopping criteria used for iterative refinement is |
688
c | discussed in Parlett's book SEP, page 107 and in Gragg & |
689
c | Reichel ACM TOMS paper; Algorithm 686, Dec. 1990. |
690
c | Determine if we need to correct the residual. The goal is |
691
c | to enforce ||v(:,1:j)^T * r_{j}|| .le. eps * || r_{j} || |
692
c %-----------------------------------------------------------%
693
c
694
if (rnorm .gt. 0.717*wnorm) go to 100
695
nrorth = nrorth + 1
696
c
697
c %---------------------------------------------------%
698
c | Enter the Iterative refinement phase. If further |
699
c | refinement is necessary, loop back here. The loop |
700
c | variable is ITER. Perform a step of Classical |
701
c | Gram-Schmidt using all the Arnoldi vectors V_{j} |
702
c %---------------------------------------------------%
703
c
704
80 continue
705
c
706
if (msglvl .gt. 2) then
707
xtemp(1) = wnorm
708
xtemp(2) = rnorm
709
call pdvout (comm, logfil, 2, xtemp, ndigit,
710
& '_naitr: re-orthonalization ; wnorm and rnorm are')
711
end if
712
c
713
c %----------------------------------------------------%
714
c | Compute V_{j}^T * B * r_{j}. |
715
c | WORKD(IRJ:IRJ+J-1) = v(:,1:J)'*WORKD(IPJ:IPJ+N-1). |
716
c %----------------------------------------------------%
717
c
718
call dgemv ('T', n, j, one, v, ldv, workd(ipj), 1,
719
& zero, workl(j+1), 1)
720
call MPI_ALLREDUCE( workl(j+1), workl(1), j,
721
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr)
722
c
723
c %----------------------------------------------%
724
c | Compute the correction to the residual: |
725
c | r_{j} = r_{j} - V_{j} * WORKD(IRJ:IRJ+J-1). |
726
c | The correction to H is v(:,1:J)*H(1:J,1:J) + |
727
c | v(:,1:J)*WORKD(IRJ:IRJ+J-1)*e'_j, but only |
728
c | H(j,j) is updated. |
729
c %----------------------------------------------%
730
c
731
call dgemv ('N', n, j, -one, v, ldv, workl(1), 1,
732
& one, resid, 1)
733
c
734
if (j .eq. 1 .or. rstart) h(j,1) = zero
735
h(j,2) = h(j,2) + workl(j)
736
c
737
orth2 = .true.
738
call second (t2)
739
if (bmat .eq. 'G') then
740
nbx = nbx + 1
741
call dcopy (n, resid, 1, workd(irj), 1)
742
ipntr(1) = irj
743
ipntr(2) = ipj
744
ido = 2
745
c
746
c %-----------------------------------%
747
c | Exit in order to compute B*r_{j}. |
748
c | r_{j} is the corrected residual. |
749
c %-----------------------------------%
750
c
751
go to 9000
752
else if (bmat .eq. 'I') then
753
call dcopy (n, resid, 1, workd(ipj), 1)
754
end if
755
90 continue
756
c
757
c %---------------------------------------------------%
758
c | Back from reverse communication if ORTH2 = .true. |
759
c %---------------------------------------------------%
760
c
761
if (bmat .eq. 'G') then
762
call second (t3)
763
tmvbx = tmvbx + (t3 - t2)
764
end if
765
c
766
c %-----------------------------------------------------%
767
c | Compute the B-norm of the corrected residual r_{j}. |
768
c %-----------------------------------------------------%
769
c
770
if (bmat .eq. 'G') then
771
rnorm_buf = ddot (n, resid, 1, workd(ipj), 1)
772
call MPI_ALLREDUCE( rnorm_buf, rnorm1, 1,
773
& MPI_DOUBLE_PRECISION, MPI_SUM, comm, ierr )
774
rnorm1 = sqrt(abs(rnorm1))
775
else if (bmat .eq. 'I') then
776
rnorm1 = pdnorm2( comm, n, resid, 1 )
777
end if
778
c
779
if (msglvl .gt. 0 .and. iter .gt. 0) then
780
call pivout (comm, logfil, 1, [j], ndigit,
781
& '_naitr: Iterative refinement for Arnoldi residual')
782
if (msglvl .gt. 2) then
783
xtemp(1) = rnorm
784
xtemp(2) = rnorm1
785
call pdvout (comm, logfil, 2, xtemp, ndigit,
786
& '_naitr: iterative refinement ; rnorm and rnorm1 are')
787
end if
788
end if
789
c
790
c %-----------------------------------------%
791
c | Determine if we need to perform another |
792
c | step of re-orthogonalization. |
793
c %-----------------------------------------%
794
c
795
if (rnorm1 .gt. 0.717*rnorm) then
796
c
797
c %--------------------------------%
798
c | No need for further refinement |
799
c %--------------------------------%
800
c
801
rnorm = rnorm1
802
c
803
else
804
c
805
c %-------------------------------------------%
806
c | Another step of iterative refinement step |
807
c | is required. NITREF is used by stat.h |
808
c %-------------------------------------------%
809
c
810
nitref = nitref + 1
811
rnorm = rnorm1
812
iter = iter + 1
813
if (iter .le. 1) go to 80
814
c
815
c %-------------------------------------------------%
816
c | Otherwise RESID is numerically in the span of V |
817
c %-------------------------------------------------%
818
c
819
do 95 jj = 1, n
820
resid(jj) = zero
821
95 continue
822
rnorm = zero
823
end if
824
c
825
c %----------------------------------------------%
826
c | Branch here directly if iterative refinement |
827
c | wasn't necessary or after at most NITER_REF |
828
c | steps of iterative refinement. |
829
c %----------------------------------------------%
830
c
831
100 continue
832
c
833
rstart = .false.
834
orth2 = .false.
835
c
836
call second (t5)
837
titref = titref + (t5 - t4)
838
c
839
c %----------------------------------------------------------%
840
c | Make sure the last off-diagonal element is non negative |
841
c | If not perform a similarity transformation on H(1:j,1:j) |
842
c | and scale v(:,j) by -1. |
843
c %----------------------------------------------------------%
844
c
845
if (h(j,1) .lt. zero) then
846
h(j,1) = -h(j,1)
847
if ( j .lt. k+np) then
848
call dscal(n, -one, v(1,j+1), 1)
849
else
850
call dscal(n, -one, resid, 1)
851
end if
852
end if
853
c
854
c %------------------------------------%
855
c | STEP 6: Update j = j+1; Continue |
856
c %------------------------------------%
857
c
858
j = j + 1
859
if (j .gt. k+np) then
860
call second (t1)
861
tsaitr = tsaitr + (t1 - t0)
862
ido = 99
863
c
864
if (msglvl .gt. 1) then
865
call pdvout (comm, logfil, k+np, h(1,2), ndigit,
866
& '_saitr: main diagonal of matrix H of step K+NP.')
867
if (k+np .gt. 1) then
868
call pdvout (comm, logfil, k+np-1, h(2,1), ndigit,
869
& '_saitr: sub diagonal of matrix H of step K+NP.')
870
end if
871
end if
872
c
873
go to 9000
874
end if
875
c
876
c %--------------------------------------------------------%
877
c | Loop back to extend the factorization by another step. |
878
c %--------------------------------------------------------%
879
c
880
go to 1000
881
c
882
c %---------------------------------------------------------------%
883
c | |
884
c | E N D O F M A I N I T E R A T I O N L O O P |
885
c | |
886
c %---------------------------------------------------------------%
887
c
888
9000 continue
889
return
890
c
891
c %----------------%
892
c | End of pdsaitr |
893
c %----------------%
894
c
895
end
896
897