CoCalc -- pcnapps.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pcnapps.f
⁵¹⁷⁷ views
1
c\BeginDoc
2
c
3
c\Name: pcnapps
4
c
5
c Message Passing Layer: MPI
6
c
7
c\Description:
8
c  Given the Arnoldi factorization
9
c
10
c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
11
c
12
c  apply NP implicit shifts resulting in
13
c
14
c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
15
c
16
c  where Q is an orthogonal matrix which is the product of rotations
17
c  and reflections resulting from the NP bulge change sweeps.
18
c  The updated Arnoldi factorization becomes:
19
c
20
c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
21
c
22
c\Usage:
23
c  call pcnapps
24
c     ( COMM, N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ,
25
c       WORKL, WORKD )
26
c
27
c\Arguments
28
c  COMM    MPI Communicator for the processor grid.  (INPUT)
29
c
30
c  N       Integer.  (INPUT)
31
c          Problem size, i.e. size of matrix A.
32
c
33
c  KEV     Integer.  (INPUT/OUTPUT)
34
c          KEV+NP is the size of the input matrix H.
35
c          KEV is the size of the updated matrix HNEW.
36
c
37
c  NP      Integer.  (INPUT)
38
c          Number of implicit shifts to be applied.
39
c
40
c  SHIFT   Complex array of length NP.  (INPUT)
41
c          The shifts to be applied.
42
c
43
c  V       Complex N by (KEV+NP) array.  (INPUT/OUTPUT)
44
c          On INPUT, V contains the current KEV+NP Arnoldi vectors.
45
c          On OUTPUT, V contains the updated KEV Arnoldi vectors
46
c          in the first KEV columns of V.
47
c
48
c  LDV     Integer.  (INPUT)
49
c          Leading dimension of V exactly as declared in the calling
50
c          program.
51
c
52
c  H       Complex (KEV+NP) by (KEV+NP) array.  (INPUT/OUTPUT)
53
c          On INPUT, H contains the current KEV+NP by KEV+NP upper
54
c          Hessenberg matrix of the Arnoldi factorization.
55
c          On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
56
c          matrix in the KEV leading submatrix.
57
c
58
c  LDH     Integer.  (INPUT)
59
c          Leading dimension of H exactly as declared in the calling
60
c          program.
61
c
62
c  RESID   Complex array of length N.  (INPUT/OUTPUT)
63
c          On INPUT, RESID contains the the residual vector r_{k+p}.
64
c          On OUTPUT, RESID is the update residual vector rnew_{k}
65
c          in the first KEV locations.
66
c
67
c  Q       Complex KEV+NP by KEV+NP work array.  (WORKSPACE)
68
c          Work array used to accumulate the rotations and reflections
69
c          during the bulge chase sweep.
70
c
71
c  LDQ     Integer.  (INPUT)
72
c          Leading dimension of Q exactly as declared in the calling
73
c          program.
74
c
75
c  WORKL   Complex work array of length (KEV+NP).  (WORKSPACE)
76
c          Private (replicated) array on each PE or array allocated on
77
c          the front end.
78
c
79
c  WORKD   Complex work array of length 2*N.  (WORKSPACE)
80
c          Distributed array used in the application of the accumulated
81
c          orthogonal matrix Q.
82
c
83
c\EndDoc
84
c
85
c-----------------------------------------------------------------------
86
c
87
c\BeginLib
88
c
89
c\Local variables:
90
c     xxxxxx  Complex
91
c
92
c\References:
93
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
94
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
95
c     pp 357-385.
96
c
97
c\Routines called:
98
c     pivout  Parallel ARPACK utility routine that prints integers.
99
c     second  ARPACK utility routine for timing.
100
c     pcmout  Parallel ARPACK utility routine that prints matrices
101
c     pcvout  Parallel ARPACK utility routine that prints vectors.
102
c     clacpy  LAPACK matrix copy routine.
103
c     clanhs  LAPACK routine that computes various norms of a matrix.
104
c     clartg  LAPACK Givens rotation construction routine.
105
c     claset  LAPACK matrix initialization routine.
106
c     slabad  LAPACK routine for defining the underflow and overflow
107
c             limits.
108
c     pslamch ScaLAPACK routine that determines machine constants.
109
c     slapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
110
c     cgemv   Level 2 BLAS routine for matrix vector multiplication.
111
c     caxpy   Level 1 BLAS that computes a vector triad.
112
c     ccopy   Level 1 BLAS that copies one vector to another.
113
c     cscal   Level 1 BLAS that scales a vector.
114
c
115
c\Author
116
c     Danny Sorensen               Phuong Vu
117
c     Richard Lehoucq              CRPC / Rice University
118
c     Dept. of Computational &     Houston, Texas
119
c     Applied Mathematics
120
c     Rice University
121
c     Houston, Texas
122
c
123
c\Parallel Modifications
124
c     Kristi Maschhoff
125
c
126
c\Revision history:
127
c     Starting Point: Serial Complex Code FILE: napps.F   SID: 2.1
128
c
129
c\SCCS Information:
130
c FILE: napps.F   SID: 1.3   DATE OF SID: 06/04/98
131
c
132
c\Remarks
133
c  1. In this version, each shift is applied to all the sublocks of
134
c     the Hessenberg matrix H and not just to the submatrix that it
135
c     comes from. Deflation as in LAPACK routine clahqr (QR algorithm
136
c     for upper Hessenberg matrices ) is used.
137
c     Upon output, the subdiagonals of H are enforced to be non-negative
138
c     real numbers.
139
c
140
c\EndLib
141
c
142
c-----------------------------------------------------------------------
143
c
144
      subroutine pcnapps
145
     &   ( comm, n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq,
146
     &     workl, workd )
147
c
148
c     %--------------------%
149
c     | MPI Communicator |
150
c     %--------------------%
151
c
152
      integer   comm
153
c
154
c     %----------------------------------------------------%
155
c     | Include files for debugging and timing information |
156
c     %----------------------------------------------------%
157
c
158
      include   'debug.h'
159
      include   'stat.h'
160
c
161
c     %------------------%
162
c     | Scalar Arguments |
163
c     %------------------%
164
c
165
      integer    kev, ldh, ldq, ldv, n, np
166
c
167
c     %-----------------%
168
c     | Array Arguments |
169
c     %-----------------%
170
c
171
      Complex
172
     &           h(ldh,kev+np), resid(n), shift(np),
173
     &           v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
174
c
175
c     %------------%
176
c     | Parameters |
177
c     %------------%
178
c
179
      Complex
180
     &           one, zero
181
      Real
182
     &           rzero
183
      parameter (one = (1.0, 0.0), zero = (0.0, 0.0),
184
     &           rzero = 0.0)
185
c
186
c     %------------------------%
187
c     | Local Scalars & Arrays |
188
c     %------------------------%
189
c
190
      integer    i, iend, istart, j, jj, kplusp, msglvl
191
      logical    first
192
      Complex
193
     &           cdum, f, g, h11, h21, r, s, sigma, t
194
      Real
195
     &           c,  ovfl, smlnum, ulp, unfl, tst1
196
      save       first, ovfl, smlnum, ulp, unfl
197
c
198
c     %----------------------%
199
c     | External Subroutines |
200
c     %----------------------%
201
c
202
      external   caxpy, ccopy, cgemv, cscal, clacpy, clartg,
203
     &           pcvout, claset, slabad, pcmout, second, pivout
204
c
205
c     %--------------------%
206
c     | External Functions |
207
c     %--------------------%
208
c
209
      Real
210
     &           clanhs, pslamch10, slapy2
211
      external   clanhs, pslamch10, slapy2
212
c
213
c     %----------------------%
214
c     | Intrinsics Functions |
215
c     %----------------------%
216
c
217
      intrinsic  abs, aimag, conjg, cmplx, max, min, real
218
c
219
c     %---------------------%
220
c     | Statement Functions |
221
c     %---------------------%
222
c
223
      Real
224
     &           cabs1
225
      cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
226
c
227
c     %----------------%
228
c     | Data statments |
229
c     %----------------%
230
c
231
      data       first / .true. /
232
c
233
c     %-----------------------%
234
c     | Executable Statements |
235
c     %-----------------------%
236
c
237
      if (first) then
238
c
239
c        %-----------------------------------------------%
240
c        | Set machine-dependent constants for the       |
241
c        | stopping criterion. If norm(H) <= sqrt(OVFL), |
242
c        | overflow should not occur.                    |
243
c        | REFERENCE: LAPACK subroutine clahqr           |
244
c        %-----------------------------------------------%
245
c
246
         unfl = slamch( 'safe minimum' )
247
         ovfl = real(one / unfl)
248
         call slabad( unfl, ovfl )
249
         ulp = slamch( 'precision' )
250
         smlnum = unfl*( n / ulp )
251
         first = .false.
252
      end if
253
c
254
c     %-------------------------------%
255
c     | Initialize timing statistics  |
256
c     | & message level for debugging |
257
c     %-------------------------------%
258
c
259
      call second (t0)
260
      msglvl = mcapps
261
c
262
      kplusp = kev + np
263
c
264
c     %--------------------------------------------%
265
c     | Initialize Q to the identity to accumulate |
266
c     | the rotations and reflections              |
267
c     %--------------------------------------------%
268
c
269
      call claset ('All', kplusp, kplusp, zero, one, q, ldq)
270
c
271
c     %----------------------------------------------%
272
c     | Quick return if there are no shifts to apply |
273
c     %----------------------------------------------%
274
c
275
      if (np .eq. 0) go to 9000
276
c
277
c     %----------------------------------------------%
278
c     | Chase the bulge with the application of each |
279
c     | implicit shift. Each shift is applied to the |
280
c     | whole matrix including each block.           |
281
c     %----------------------------------------------%
282
c
283
      do 110 jj = 1, np
284
         sigma = shift(jj)
285
c
286
         if (msglvl .gt. 2 ) then
287
            call pivout (comm, logfil, 1, [jj], ndigit,
288
     &               '_napps: shift number.')
289
            call pcvout (comm, logfil, 1, [sigma], ndigit,
290
     &               '_napps: Value of the shift ')
291
         end if
292
c
293
         istart = 1
294
   20    continue
295
c
296
         do 30 i = istart, kplusp-1
297
c
298
c           %----------------------------------------%
299
c           | Check for splitting and deflation. Use |
300
c           | a standard test as in the QR algorithm |
301
c           | REFERENCE: LAPACK subroutine clahqr    |
302
c           %----------------------------------------%
303
c
304
            tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
305
            if( tst1.eq.rzero )
306
     &         tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl )
307
            if ( abs(real(h(i+1,i)))
308
     &           .le. max(ulp*tst1, smlnum) )  then
309
               if (msglvl .gt. 0) then
310
                  call pivout (comm, logfil, 1, [i], ndigit,
311
     &                 '_napps: matrix splitting at row/column no.')
312
                  call pivout (comm, logfil, 1, [jj], ndigit,
313
     &                 '_napps: matrix splitting with shift number.')
314
                  call pcvout (comm, logfil, 1, h(i+1,i), ndigit,
315
     &                 '_napps: off diagonal element.')
316
               end if
317
               iend = i
318
               h(i+1,i) = zero
319
               go to 40
320
            end if
321
   30    continue
322
         iend = kplusp
323
   40    continue
324
c
325
         if (msglvl .gt. 2) then
326
             call pivout (comm, logfil, 1, [istart], ndigit,
327
     &                   '_napps: Start of current block ')
328
             call pivout (comm, logfil, 1, [iend], ndigit,
329
     &                   '_napps: End of current block ')
330
         end if
331
c
332
c        %------------------------------------------------%
333
c        | No reason to apply a shift to block of order 1 |
334
c        | or if the current block starts after the point |
335
c        | of compression since we'll discard this stuff  |
336
c        %------------------------------------------------%
337
c
338
         if ( istart .eq. iend .or. istart .gt. kev) go to 100
339
c
340
         h11 = h(istart,istart)
341
         h21 = h(istart+1,istart)
342
         f = h11 - sigma
343
         g = h21
344
c
345
         do 80 i = istart, iend-1
346
c
347
c           %------------------------------------------------------%
348
c           | Construct the plane rotation G to zero out the bulge |
349
c           %------------------------------------------------------%
350
c
351
            call clartg (f, g, c, s, r)
352
            if (i .gt. istart) then
353
               h(i,i-1) = r
354
               h(i+1,i-1) = zero
355
            end if
356
c
357
c           %---------------------------------------------%
358
c           | Apply rotation to the left of H;  H <- G'*H |
359
c           %---------------------------------------------%
360
c
361
            do 50 j = i, kplusp
362
               t        =  c*h(i,j) + s*h(i+1,j)
363
               h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j)
364
               h(i,j)   = t
365
   50       continue
366
c
367
c           %---------------------------------------------%
368
c           | Apply rotation to the right of H;  H <- H*G |
369
c           %---------------------------------------------%
370
c
371
            do 60 j = 1, min(i+2,iend)
372
               t        =  c*h(j,i) + conjg(s)*h(j,i+1)
373
               h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
374
               h(j,i)   = t
375
   60       continue
376
c
377
c           %-----------------------------------------------------%
378
c           | Accumulate the rotation in the matrix Q;  Q <- Q*G' |
379
c           %-----------------------------------------------------%
380
c
381
            do 70 j = 1, min(i+jj, kplusp)
382
               t        =   c*q(j,i) + conjg(s)*q(j,i+1)
383
               q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
384
               q(j,i)   = t
385
   70       continue
386
c
387
c           %---------------------------%
388
c           | Prepare for next rotation |
389
c           %---------------------------%
390
c
391
            if (i .lt. iend-1) then
392
               f = h(i+1,i)
393
               g = h(i+2,i)
394
            end if
395
   80    continue
396
c
397
c        %-------------------------------%
398
c        | Finished applying the shift.  |
399
c        %-------------------------------%
400
c
401
  100    continue
402
c
403
c        %---------------------------------------------------------%
404
c        | Apply the same shift to the next block if there is any. |
405
c        %---------------------------------------------------------%
406
c
407
         istart = iend + 1
408
         if (iend .lt. kplusp) go to 20
409
c
410
c        %---------------------------------------------%
411
c        | Loop back to the top to get the next shift. |
412
c        %---------------------------------------------%
413
c
414
  110 continue
415
c
416
c     %---------------------------------------------------%
417
c     | Perform a similarity transformation that makes    |
418
c     | sure that the compressed H will have non-negative |
419
c     | real subdiagonal elements.                        |
420
c     %---------------------------------------------------%
421
c
422
      do 120 j=1,kev
423
         if ( real( h(j+1,j) ) .lt. rzero .or.
424
     &        aimag( h(j+1,j) ) .ne. rzero ) then
425
            t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j)))
426
            call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh )
427
            call cscal( min(j+2, kplusp), t, h(1,j+1), 1 )
428
            call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 )
429
            h(j+1,j) = cmplx( real( h(j+1,j) ), rzero )
430
         end if
431
  120 continue
432
c
433
      do 130 i = 1, kev
434
c
435
c        %--------------------------------------------%
436
c        | Final check for splitting and deflation.   |
437
c        | Use a standard test as in the QR algorithm |
438
c        | REFERENCE: LAPACK subroutine clahqr.       |
439
c        | Note: Since the subdiagonals of the        |
440
c        | compressed H are nonnegative real numbers, |
441
c        | we take advantage of this.                 |
442
c        %--------------------------------------------%
443
c
444
         tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
445
         if( tst1 .eq. rzero )
446
     &       tst1 = clanhs( '1', kev, h, ldh, workl )
447
         if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) )
448
     &       h(i+1,i) = zero
449
 130  continue
450
c
451
c     %-------------------------------------------------%
452
c     | Compute the (kev+1)-st column of (V*Q) and      |
453
c     | temporarily store the result in WORKD(N+1:2*N). |
454
c     | This is needed in the residual update since we  |
455
c     | cannot GUARANTEE that the corresponding entry   |
456
c     | of H would be zero as in exact arithmetic.      |
457
c     %-------------------------------------------------%
458
c
459
      if ( real( h(kev+1,kev) ) .gt. rzero )
460
     &   call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
461
     &                workd(n+1), 1)
462
c
463
c     %----------------------------------------------------------%
464
c     | Compute column 1 to kev of (V*Q) in backward order       |
465
c     | taking advantage of the upper Hessenberg structure of Q. |
466
c     %----------------------------------------------------------%
467
c
468
      do 140 i = 1, kev
469
         call cgemv ('N', n, kplusp-i+1, one, v, ldv,
470
     &               q(1,kev-i+1), 1, zero, workd, 1)
471
         call ccopy (n, workd, 1, v(1,kplusp-i+1), 1)
472
  140 continue
473
c
474
c     %-------------------------------------------------%
475
c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
476
c     %-------------------------------------------------%
477
c
478
      call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
479
c
480
c     %--------------------------------------------------------------%
481
c     | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
482
c     %--------------------------------------------------------------%
483
c
484
      if ( real( h(kev+1,kev) ) .gt. rzero )
485
     &   call ccopy (n, workd(n+1), 1, v(1,kev+1), 1)
486
c
487
c     %-------------------------------------%
488
c     | Update the residual vector:         |
489
c     |    r <- sigmak*r + betak*v(:,kev+1) |
490
c     | where                               |
491
c     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  |
492
c     |    betak = e_{kev+1}'*H*e_{kev}     |
493
c     %-------------------------------------%
494
c
495
      call cscal (n, q(kplusp,kev), resid, 1)
496
      if ( real( h(kev+1,kev) ) .gt. rzero )
497
     &   call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
498
c
499
      if (msglvl .gt. 1) then
500
         call pcvout (comm, logfil, 1, q(kplusp,kev), ndigit,
501
     &        '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
502
         call pcvout (comm, logfil, 1, h(kev+1,kev), ndigit,
503
     &        '_napps: betak = e_{kev+1}^T*H*e_{kev}')
504
         call pivout (comm, logfil, 1, [kev], ndigit,
505
     &               '_napps: Order of the final Hessenberg matrix ')
506
         if (msglvl .gt. 2) then
507
            call pcmout (comm, logfil, kev, kev, h, ldh, ndigit,
508
     &      '_napps: updated Hessenberg matrix H for next iteration')
509
         end if
510
c
511
      end if
512
c
513
 9000 continue
514
      call second (t1)
515
      tcapps = tcapps + (t1 - t0)
516
c
517
      return
518
c
519
c     %----------------%
520
c     | End of pcnapps |
521
c     %----------------%
522
c
523
      end
524

525
Product

Resources

Company