CoCalc -- cnapps.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/arpack/cnapps.f
⁵¹⁹⁴ views
1
c\BeginDoc
2
c
3
c\Name: cnapps
4
c
5
c\Description:
6
c  Given the Arnoldi factorization
7
c
8
c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
9
c
10
c  apply NP implicit shifts resulting in
11
c
12
c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
13
c
14
c  where Q is an orthogonal matrix which is the product of rotations
15
c  and reflections resulting from the NP bulge change sweeps.
16
c  The updated Arnoldi factorization becomes:
17
c
18
c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
19
c
20
c\Usage:
21
c  call cnapps
22
c     ( N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ,
23
c       WORKL, WORKD )
24
c
25
c\Arguments
26
c  N       Integer.  (INPUT)
27
c          Problem size, i.e. size of matrix A.
28
c
29
c  KEV     Integer.  (INPUT/OUTPUT)
30
c          KEV+NP is the size of the input matrix H.
31
c          KEV is the size of the updated matrix HNEW.
32
c
33
c  NP      Integer.  (INPUT)
34
c          Number of implicit shifts to be applied.
35
c
36
c  SHIFT   Complex array of length NP.  (INPUT)
37
c          The shifts to be applied.
38
c
39
c  V       Complex N by (KEV+NP) array.  (INPUT/OUTPUT)
40
c          On INPUT, V contains the current KEV+NP Arnoldi vectors.
41
c          On OUTPUT, V contains the updated KEV Arnoldi vectors
42
c          in the first KEV columns of V.
43
c
44
c  LDV     Integer.  (INPUT)
45
c          Leading dimension of V exactly as declared in the calling
46
c          program.
47
c
48
c  H       Complex (KEV+NP) by (KEV+NP) array.  (INPUT/OUTPUT)
49
c          On INPUT, H contains the current KEV+NP by KEV+NP upper
50
c          Hessenberg matrix of the Arnoldi factorization.
51
c          On OUTPUT, H contains the updated KEV by KEV upper Hessenberg
52
c          matrix in the KEV leading submatrix.
53
c
54
c  LDH     Integer.  (INPUT)
55
c          Leading dimension of H exactly as declared in the calling
56
c          program.
57
c
58
c  RESID   Complex array of length N.  (INPUT/OUTPUT)
59
c          On INPUT, RESID contains the the residual vector r_{k+p}.
60
c          On OUTPUT, RESID is the update residual vector rnew_{k}
61
c          in the first KEV locations.
62
c
63
c  Q       Complex KEV+NP by KEV+NP work array.  (WORKSPACE)
64
c          Work array used to accumulate the rotations and reflections
65
c          during the bulge chase sweep.
66
c
67
c  LDQ     Integer.  (INPUT)
68
c          Leading dimension of Q exactly as declared in the calling
69
c          program.
70
c
71
c  WORKL   Complex work array of length (KEV+NP).  (WORKSPACE)
72
c          Private (replicated) array on each PE or array allocated on
73
c          the front end.
74
c
75
c  WORKD   Complex work array of length 2*N.  (WORKSPACE)
76
c          Distributed array used in the application of the accumulated
77
c          orthogonal matrix Q.
78
c
79
c\EndDoc
80
c
81
c-----------------------------------------------------------------------
82
c
83
c\BeginLib
84
c
85
c\Local variables:
86
c     xxxxxx  Complex
87
c
88
c\References:
89
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
90
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
91
c     pp 357-385.
92
c
93
c\Routines called:
94
c     ivout   ARPACK utility routine that prints integers.
95
c     second  ARPACK utility routine for timing.
96
c     cmout   ARPACK utility routine that prints matrices
97
c     cvout   ARPACK utility routine that prints vectors.
98
c     clacpy  LAPACK matrix copy routine.
99
c     clanhs  LAPACK routine that computes various norms of a matrix.
100
c     clartg  LAPACK Givens rotation construction routine.
101
c     claset  LAPACK matrix initialization routine.
102
c     slabad  LAPACK routine for defining the underflow and overflow
103
c             limits.
104
c     slamch  LAPACK routine that determines machine constants.
105
c     slapy2  LAPACK routine to compute sqrt(x**2+y**2) carefully.
106
c     cgemv   Level 2 BLAS routine for matrix vector multiplication.
107
c     caxpy   Level 1 BLAS that computes a vector triad.
108
c     ccopy   Level 1 BLAS that copies one vector to another.
109
c     cscal   Level 1 BLAS that scales a vector.
110
c
111
c\Author
112
c     Danny Sorensen               Phuong Vu
113
c     Richard Lehoucq              CRPC / Rice University
114
c     Dept. of Computational &     Houston, Texas
115
c     Applied Mathematics
116
c     Rice University
117
c     Houston, Texas
118
c
119
c\SCCS Information: @(#)
120
c FILE: napps.F   SID: 2.2   DATE OF SID: 4/20/96   RELEASE: 2
121
c
122
c\Remarks
123
c  1. In this version, each shift is applied to all the sublocks of
124
c     the Hessenberg matrix H and not just to the submatrix that it
125
c     comes from. Deflation as in LAPACK routine clahqr (QR algorithm
126
c     for upper Hessenberg matrices ) is used.
127
c     Upon output, the subdiagonals of H are enforced to be non-negative
128
c     real numbers.
129
c
130
c\EndLib
131
c
132
c-----------------------------------------------------------------------
133
c
134
      subroutine cnapps
135
     &   ( n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq,
136
     &     workl, workd )
137
c
138
c     %----------------------------------------------------%
139
c     | Include files for debugging and timing information |
140
c     %----------------------------------------------------%
141
c
142
      include   'debug.h'
143
      include   'stat.h'
144
c
145
c     %------------------%
146
c     | Scalar Arguments |
147
c     %------------------%
148
c
149
      integer    kev, ldh, ldq, ldv, n, np
150
c
151
c     %-----------------%
152
c     | Array Arguments |
153
c     %-----------------%
154
c
155
      Complex
156
     &           h(ldh,kev+np), resid(n), shift(np),
157
     &           v(ldv,kev+np), q(ldq,kev+np), workd(2*n), workl(kev+np)
158
c
159
c     %------------%
160
c     | Parameters |
161
c     %------------%
162
c
163
      Complex
164
     &           one, zero
165
      Real
166
     &           rzero
167
      parameter (one = (1.0E+0, 0.0E+0), zero = (0.0E+0, 0.0E+0),
168
     &           rzero = 0.0E+0)
169
c
170
c     %------------------------%
171
c     | Local Scalars & Arrays |
172
c     %------------------------%
173
c
174
      integer    i, iend, istart, j, jj, kplusp, msglvl
175
      logical    first
176
      Complex
177
     &           cdum, f, g, h11, h21, r, s, sigma, t
178
      Real
179
     &           c,  ovfl, smlnum, ulp, unfl, tst1
180
      save       first, ovfl, smlnum, ulp, unfl
181
c
182
c     %----------------------%
183
c     | External Subroutines |
184
c     %----------------------%
185
c
186
      external   caxpy, ccopy, cgemv, cscal, clacpy, clartg,
187
     &           cvout, claset, slabad, cmout, second, ivout
188
c
189
c     %--------------------%
190
c     | External Functions |
191
c     %--------------------%
192
c
193
      Real
194
     &           clanhs, slamch, slapy2
195
      external   clanhs, slamch, slapy2
196
c
197
c     %----------------------%
198
c     | Intrinsics Functions |
199
c     %----------------------%
200
c
201
      intrinsic  abs, aimag, conjg, cmplx, max, min, real
202
c
203
c     %---------------------%
204
c     | Statement Functions |
205
c     %---------------------%
206
c
207
      Real
208
     &           cabs1
209
      cabs1( cdum ) = abs( real( cdum ) ) + abs( aimag( cdum ) )
210
c
211
c     %----------------%
212
c     | Data statments |
213
c     %----------------%
214
c
215
      data       first / .true. /
216
c
217
c     %-----------------------%
218
c     | Executable Statements |
219
c     %-----------------------%
220
c
221
      if (first) then
222
c
223
c        %-----------------------------------------------%
224
c        | Set machine-dependent constants for the       |
225
c        | stopping criterion. If norm(H) <= sqrt(OVFL), |
226
c        | overflow should not occur.                    |
227
c        | REFERENCE: LAPACK subroutine clahqr           |
228
c        %-----------------------------------------------%
229
c
230
         unfl = slamch( 'safe minimum' )
231
         ovfl = real(one / unfl)
232
         call slabad( unfl, ovfl )
233
         ulp = slamch( 'precision' )
234
         smlnum = unfl*( n / ulp )
235
         first = .false.
236
      end if
237
c
238
c     %-------------------------------%
239
c     | Initialize timing statistics  |
240
c     | & message level for debugging |
241
c     %-------------------------------%
242
c
243
      call second (t0)
244
      msglvl = mcapps
245
c
246
      kplusp = kev + np
247
c
248
c     %--------------------------------------------%
249
c     | Initialize Q to the identity to accumulate |
250
c     | the rotations and reflections              |
251
c     %--------------------------------------------%
252
c
253
      call claset ('All', kplusp, kplusp, zero, one, q, ldq)
254
c
255
c     %----------------------------------------------%
256
c     | Quick return if there are no shifts to apply |
257
c     %----------------------------------------------%
258
c
259
      if (np .eq. 0) go to 9000
260
c
261
c     %----------------------------------------------%
262
c     | Chase the bulge with the application of each |
263
c     | implicit shift. Each shift is applied to the |
264
c     | whole matrix including each block.           |
265
c     %----------------------------------------------%
266
c
267
      do 110 jj = 1, np
268
         sigma = shift(jj)
269
c
270
         if (msglvl .gt. 2 ) then
271
            call ivout (logfil, 1, [jj], ndigit,
272
     &               '_napps: shift number.')
273
            call cvout (logfil, 1, [sigma], ndigit,
274
     &               '_napps: Value of the shift ')
275
         end if
276
c
277
         istart = 1
278
   20    continue
279
c
280
         do 30 i = istart, kplusp-1
281
c
282
c           %----------------------------------------%
283
c           | Check for splitting and deflation. Use |
284
c           | a standard test as in the QR algorithm |
285
c           | REFERENCE: LAPACK subroutine clahqr    |
286
c           %----------------------------------------%
287
c
288
            tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
289
            if( tst1.eq.rzero )
290
     &         tst1 = clanhs( '1', kplusp-jj+1, h, ldh, workl )
291
            if ( abs(real(h(i+1,i)))
292
     &           .le. max(ulp*tst1, smlnum) )  then
293
               if (msglvl .gt. 0) then
294
                  call ivout (logfil, 1, [i], ndigit,
295
     &                 '_napps: matrix splitting at row/column no.')
296
                  call ivout (logfil, 1, [jj], ndigit,
297
     &                 '_napps: matrix splitting with shift number.')
298
                  call cvout (logfil, 1, h(i+1,i), ndigit,
299
     &                 '_napps: off diagonal element.')
300
               end if
301
               iend = i
302
               h(i+1,i) = zero
303
               go to 40
304
            end if
305
   30    continue
306
         iend = kplusp
307
   40    continue
308
c
309
         if (msglvl .gt. 2) then
310
             call ivout (logfil, 1, [istart], ndigit,
311
     &                   '_napps: Start of current block ')
312
             call ivout (logfil, 1, [iend], ndigit,
313
     &                   '_napps: End of current block ')
314
         end if
315
c
316
c        %------------------------------------------------%
317
c        | No reason to apply a shift to block of order 1 |
318
c        | or if the current block starts after the point |
319
c        | of compression since we'll discard this stuff  |
320
c        %------------------------------------------------%
321
c
322
         if ( istart .eq. iend .or. istart .gt. kev) go to 100
323
c
324
         h11 = h(istart,istart)
325
         h21 = h(istart+1,istart)
326
         f = h11 - sigma
327
         g = h21
328
c
329
         do 80 i = istart, iend-1
330
c
331
c           %------------------------------------------------------%
332
c           | Construct the plane rotation G to zero out the bulge |
333
c           %------------------------------------------------------%
334
c
335
            call clartg (f, g, c, s, r)
336
            if (i .gt. istart) then
337
               h(i,i-1) = r
338
               h(i+1,i-1) = zero
339
            end if
340
c
341
c           %---------------------------------------------%
342
c           | Apply rotation to the left of H;  H <- G'*H |
343
c           %---------------------------------------------%
344
c
345
            do 50 j = i, kplusp
346
               t        =  c*h(i,j) + s*h(i+1,j)
347
               h(i+1,j) = -conjg(s)*h(i,j) + c*h(i+1,j)
348
               h(i,j)   = t
349
   50       continue
350
c
351
c           %---------------------------------------------%
352
c           | Apply rotation to the right of H;  H <- H*G |
353
c           %---------------------------------------------%
354
c
355
            do 60 j = 1, min(i+2,iend)
356
               t        =  c*h(j,i) + conjg(s)*h(j,i+1)
357
               h(j,i+1) = -s*h(j,i) + c*h(j,i+1)
358
               h(j,i)   = t
359
   60       continue
360
c
361
c           %-----------------------------------------------------%
362
c           | Accumulate the rotation in the matrix Q;  Q <- Q*G' |
363
c           %-----------------------------------------------------%
364
c
365
            do 70 j = 1, min(j+jj, kplusp)
366
               t        =   c*q(j,i) + conjg(s)*q(j,i+1)
367
               q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
368
               q(j,i)   = t
369
   70       continue
370
c
371
c           %---------------------------%
372
c           | Prepare for next rotation |
373
c           %---------------------------%
374
c
375
            if (i .lt. iend-1) then
376
               f = h(i+1,i)
377
               g = h(i+2,i)
378
            end if
379
   80    continue
380
c
381
c        %-------------------------------%
382
c        | Finished applying the shift.  |
383
c        %-------------------------------%
384
c
385
  100    continue
386
c
387
c        %---------------------------------------------------------%
388
c        | Apply the same shift to the next block if there is any. |
389
c        %---------------------------------------------------------%
390
c
391
         istart = iend + 1
392
         if (iend .lt. kplusp) go to 20
393
c
394
c        %---------------------------------------------%
395
c        | Loop back to the top to get the next shift. |
396
c        %---------------------------------------------%
397
c
398
  110 continue
399
c
400
c     %---------------------------------------------------%
401
c     | Perform a similarity transformation that makes    |
402
c     | sure that the compressed H will have non-negative |
403
c     | real subdiagonal elements.                        |
404
c     %---------------------------------------------------%
405
c
406
      do 120 j=1,kev
407
         if ( real( h(j+1,j) ) .lt. rzero .or.
408
     &        aimag( h(j+1,j) ) .ne. rzero ) then
409
            t = h(j+1,j) / slapy2(real(h(j+1,j)),aimag(h(j+1,j)))
410
            call cscal( kplusp-j+1, conjg(t), h(j+1,j), ldh )
411
            call cscal( min(j+2, kplusp), t, h(1,j+1), 1 )
412
            call cscal( min(j+np+1,kplusp), t, q(1,j+1), 1 )
413
            h(j+1,j) = cmplx( real( h(j+1,j) ), rzero )
414
         end if
415
  120 continue
416
c
417
      do 130 i = 1, kev
418
c
419
c        %--------------------------------------------%
420
c        | Final check for splitting and deflation.   |
421
c        | Use a standard test as in the QR algorithm |
422
c        | REFERENCE: LAPACK subroutine clahqr.       |
423
c        | Note: Since the subdiagonals of the        |
424
c        | compressed H are nonnegative real numbers, |
425
c        | we take advantage of this.                 |
426
c        %--------------------------------------------%
427
c
428
         tst1 = cabs1( h( i, i ) ) + cabs1( h( i+1, i+1 ) )
429
         if( tst1 .eq. rzero )
430
     &       tst1 = clanhs( '1', kev, h, ldh, workl )
431
         if( real( h( i+1,i ) ) .le. max( ulp*tst1, smlnum ) )
432
     &       h(i+1,i) = zero
433
 130  continue
434
c
435
c     %-------------------------------------------------%
436
c     | Compute the (kev+1)-st column of (V*Q) and      |
437
c     | temporarily store the result in WORKD(N+1:2*N). |
438
c     | This is needed in the residual update since we  |
439
c     | cannot GUARANTEE that the corresponding entry   |
440
c     | of H would be zero as in exact arithmetic.      |
441
c     %-------------------------------------------------%
442
c
443
      if ( real( h(kev+1,kev) ) .gt. rzero )
444
     &   call cgemv ('N', n, kplusp, one, v, ldv, q(1,kev+1), 1, zero,
445
     &                workd(n+1), 1)
446
c
447
c     %----------------------------------------------------------%
448
c     | Compute column 1 to kev of (V*Q) in backward order       |
449
c     | taking advantage of the upper Hessenberg structure of Q. |
450
c     %----------------------------------------------------------%
451
c
452
      do 140 i = 1, kev
453
         call cgemv ('N', n, kplusp-i+1, one, v, ldv,
454
     &               q(1,kev-i+1), 1, zero, workd, 1)
455
         call ccopy (n, workd, 1, v(1,kplusp-i+1), 1)
456
  140 continue
457
c
458
c     %-------------------------------------------------%
459
c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
460
c     %-------------------------------------------------%
461
c
462
      call clacpy ('A', n, kev, v(1,kplusp-kev+1), ldv, v, ldv)
463
c
464
c     %--------------------------------------------------------------%
465
c     | Copy the (kev+1)-st column of (V*Q) in the appropriate place |
466
c     %--------------------------------------------------------------%
467
c
468
      if ( real( h(kev+1,kev) ) .gt. rzero )
469
     &   call ccopy (n, workd(n+1), 1, v(1,kev+1), 1)
470
c
471
c     %-------------------------------------%
472
c     | Update the residual vector:         |
473
c     |    r <- sigmak*r + betak*v(:,kev+1) |
474
c     | where                               |
475
c     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  |
476
c     |    betak = e_{kev+1}'*H*e_{kev}     |
477
c     %-------------------------------------%
478
c
479
      call cscal (n, q(kplusp,kev), resid, 1)
480
      if ( real( h(kev+1,kev) ) .gt. rzero )
481
     &   call caxpy (n, h(kev+1,kev), v(1,kev+1), 1, resid, 1)
482
c
483
      if (msglvl .gt. 1) then
484
         call cvout (logfil, 1, q(kplusp,kev), ndigit,
485
     &        '_napps: sigmak = (e_{kev+p}^T*Q)*e_{kev}')
486
         call cvout (logfil, 1, h(kev+1,kev), ndigit,
487
     &        '_napps: betak = e_{kev+1}^T*H*e_{kev}')
488
         call ivout (logfil, 1, [kev], ndigit,
489
     &               '_napps: Order of the final Hessenberg matrix ')
490
         if (msglvl .gt. 2) then
491
            call cmout (logfil, kev, kev, h, ldh, ndigit,
492
     &      '_napps: updated Hessenberg matrix H for next iteration')
493
         end if
494
c
495
      end if
496
c
497
 9000 continue
498
      call second (t1)
499
      tcapps = tcapps + (t1 - t0)
500
c
501
      return
502
c
503
c     %---------------%
504
c     | End of cnapps |
505
c     %---------------%
506
c
507
      end
508

509
Product

Resources

Company