CoCalc -- pdsapps.f

GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/parpack/pdsapps.f
⁵¹⁹⁴ views
1
c-----------------------------------------------------------------------
2
c\BeginDoc
3
c
4
c\Name: pdsapps
5
c
6
c\Description:
7
c  Given the Arnoldi factorization
8
c
9
c     A*V_{k} - V_{k}*H_{k} = r_{k+p}*e_{k+p}^T,
10
c
11
c  apply NP shifts implicitly resulting in
12
c
13
c     A*(V_{k}*Q) - (V_{k}*Q)*(Q^T* H_{k}*Q) = r_{k+p}*e_{k+p}^T * Q
14
c
15
c  where Q is an orthogonal matrix of order KEV+NP. Q is the product of
16
c  rotations resulting from the NP bulge chasing sweeps.  The updated Arnoldi
17
c  factorization becomes:
18
c
19
c     A*VNEW_{k} - VNEW_{k}*HNEW_{k} = rnew_{k}*e_{k}^T.
20
c
21
c\Usage:
22
c  call pdsapps
23
c     ( COMM, N, KEV, NP, SHIFT, V, LDV, H, LDH, RESID, Q, LDQ, WORKD )
24
c
25
c\Arguments
26
c  COMM    MPI Communicator for the processor grid.  (INPUT)
27
c
28
c  N       Integer.  (INPUT)
29
c          Problem size, i.e. dimension of matrix A.
30
c
31
c  KEV     Integer.  (INPUT)
32
c          INPUT: KEV+NP is the size of the input matrix H.
33
c          OUTPUT: KEV is the size of the updated matrix HNEW.
34
c
35
c  NP      Integer.  (INPUT)
36
c          Number of implicit shifts to be applied.
37
c
38
c  SHIFT   Double precision array of length NP.  (INPUT)
39
c          The shifts to be applied.
40
c
41
c  V       Double precision N by (KEV+NP) array.  (INPUT/OUTPUT)
42
c          INPUT: V contains the current KEV+NP Arnoldi vectors.
43
c          OUTPUT: VNEW = V(1:n,1:KEV); the updated Arnoldi vectors
44
c          are in the first KEV columns of V.
45
c
46
c  LDV     Integer.  (INPUT)
47
c          Leading dimension of V exactly as declared in the calling
48
c          program.
49
c
50
c  H       Double precision (KEV+NP) by 2 array.  (INPUT/OUTPUT)
51
c          INPUT: H contains the symmetric tridiagonal matrix of the
52
c          Arnoldi factorization with the subdiagonal in the 1st column
53
c          starting at H(2,1) and the main diagonal in the 2nd column.
54
c          OUTPUT: H contains the updated tridiagonal matrix in the
55
c          KEV leading submatrix.
56
c
57
c  LDH     Integer.  (INPUT)
58
c          Leading dimension of H exactly as declared in the calling
59
c          program.
60
c
61
c  RESID   Double precision array of length (N).  (INPUT/OUTPUT)
62
c          INPUT: RESID contains the the residual vector r_{k+p}.
63
c          OUTPUT: RESID is the updated residual vector rnew_{k}.
64
c
65
c  Q       Double precision KEV+NP by KEV+NP work array.  (WORKSPACE)
66
c          Work array used to accumulate the rotations during the bulge
67
c          chase sweep.
68
c
69
c  LDQ     Integer.  (INPUT)
70
c          Leading dimension of Q exactly as declared in the calling
71
c          program.
72
c
73
c  WORKD   Double precision work array of length 2*N.  (WORKSPACE)
74
c          Distributed array used in the application of the accumulated
75
c          orthogonal matrix Q.
76
c
77
c\EndDoc
78
c
79
c-----------------------------------------------------------------------
80
c
81
c\BeginLib
82
c
83
c\Local variables:
84
c     xxxxxx  real
85
c
86
c\References:
87
c  1. D.C. Sorensen, "Implicit Application of Polynomial Filters in
88
c     a k-Step Arnoldi Method", SIAM J. Matr. Anal. Apps., 13 (1992),
89
c     pp 357-385.
90
c  2. R.B. Lehoucq, "Analysis and Implementation of an Implicitly
91
c     Restarted Arnoldi Iteration", Rice University Technical Report
92
c     TR95-13, Department of Computational and Applied Mathematics.
93
c
94
c\Routines called:
95
c     pivout  Parallel ARPACK utility routine that prints integers.
96
c     second  ARPACK utility routine for timing.
97
c     pdvout  Parallel ARPACK utility routine that prints vectors.
98
c     pdlamch ScaLAPACK routine that determines machine constants.
99
c     dlartg  LAPACK Givens rotation construction routine.
100
c     dlacpy  LAPACK matrix copy routine.
101
c     dlaset  LAPACK matrix initialization routine.
102
c     dgemv   Level 2 BLAS routine for matrix vector multiplication.
103
c     daxpy   Level 1 BLAS that computes a vector triad.
104
c     dcopy   Level 1 BLAS that copies one vector to another.
105
c     dscal   Level 1 BLAS that scales a vector.
106
c
107
c\Author
108
c     Danny Sorensen               Phuong Vu
109
c     Richard Lehoucq              CRPC / Rice University
110
c     Dept. of Computational &     Houston, Texas
111
c     Applied Mathematics
112
c     Rice University
113
c     Houston, Texas
114
c
115
c\Parallel Modifications
116
c     Kristi Maschhoff
117
c
118
c\Revision history:
119
c     Starting Point: Serial Code FILE: sapps.F   SID: 2.4
120
c
121
c\SCCS Information:
122
c FILE: sapps.F   SID: 1.3   DATE OF SID: 3/19/97
123
c
124
c\Remarks
125
c  1. In this version, each shift is applied to all the subblocks of
126
c     the tridiagonal matrix H and not just to the submatrix that it
127
c     comes from. This routine assumes that the subdiagonal elements
128
c     of H that are stored in h(1:kev+np,1) are nonegative upon input
129
c     and enforce this condition upon output. This version incorporates
130
c     deflation. See code for documentation.
131
c
132
c\EndLib
133
c
134
c-----------------------------------------------------------------------
135
c
136
      subroutine pdsapps
137
     &  ( comm, n, kev, np, shift, v, ldv, h, ldh, resid, q, ldq, workd)
138
c
139
c     %--------------------%
140
c     | MPI Communicator |
141
c     %--------------------%
142
c
143
      integer   comm
144
c
145
c     %----------------------------------------------------%
146
c     | Include files for debugging and timing information |
147
c     %----------------------------------------------------%
148
c
149
      include   'debug.h'
150
      include   'stat.h'
151
c
152
c     %------------------%
153
c     | Scalar Arguments |
154
c     %------------------%
155
c
156
      integer    kev, ldh, ldq, ldv, n, np
157
c
158
c     %-----------------%
159
c     | Array Arguments |
160
c     %-----------------%
161
c
162
      Double precision
163
     &           h(ldh,2), q(ldq,kev+np), resid(n), shift(np),
164
     &           v(ldv,kev+np), workd(2*n)
165
c
166
c     %------------%
167
c     | Parameters |
168
c     %------------%
169
c
170
      Double precision
171
     &           one, zero
172
      parameter (one = 1.0, zero = 0.0)
173
c
174
c     %---------------%
175
c     | Local Scalars |
176
c     %---------------%
177
c
178
      integer    i, iend, istart, itop, j, jj, kplusp, msglvl
179
      logical    first
180
      Double precision
181
     &           a1, a2, a3, a4, big, c, epsmch, f, g, r, s
182
      save       epsmch, first
183
c
184
c
185
c     %----------------------%
186
c     | External Subroutines |
187
c     %----------------------%
188
c
189
      external   daxpy, dcopy, dscal, dlacpy, dlartg, dlaset, pdvout,
190
     &           pivout, second, dgemv
191
c
192
c     %--------------------%
193
c     | External Functions |
194
c     %--------------------%
195
c
196
      Double precision
197
     &           pdlamch10
198
      external   pdlamch10
199
c
200
c     %----------------------%
201
c     | Intrinsics Functions |
202
c     %----------------------%
203
c
204
      intrinsic  abs
205
c
206
c     %----------------%
207
c     | Data statments |
208
c     %----------------%
209
c
210
      data       first / .true. /
211
c
212
c     %-----------------------%
213
c     | Executable Statements |
214
c     %-----------------------%
215
c
216
      if (first) then
217
         epsmch = pdlamch10(comm, 'Epsilon-Machine')
218
         first = .false.
219
      end if
220
      itop = 1
221
c
222
c     %-------------------------------%
223
c     | Initialize timing statistics  |
224
c     | & message level for debugging |
225
c     %-------------------------------%
226
c
227
      call second (t0)
228
      msglvl = msapps
229
c
230
      kplusp = kev + np
231
c
232
c     %----------------------------------------------%
233
c     | Initialize Q to the identity matrix of order |
234
c     | kplusp used to accumulate the rotations.     |
235
c     %----------------------------------------------%
236
c
237
      call dlaset ('All', kplusp, kplusp, zero, one, q, ldq)
238
c
239
c     %----------------------------------------------%
240
c     | Quick return if there are no shifts to apply |
241
c     %----------------------------------------------%
242
c
243
      if (np .eq. 0) go to 9000
244
c
245
c     %----------------------------------------------------------%
246
c     | Apply the np shifts implicitly. Apply each shift to the  |
247
c     | whole matrix and not just to the submatrix from which it |
248
c     | comes.                                                   |
249
c     %----------------------------------------------------------%
250
c
251
      do 90 jj = 1, np
252
c
253
         istart = itop
254
c
255
c        %----------------------------------------------------------%
256
c        | Check for splitting and deflation. Currently we consider |
257
c        | an off-diagonal element h(i+1,1) negligible if           |
258
c        |         h(i+1,1) .le. epsmch*( |h(i,2)| + |h(i+1,2)| )   |
259
c        | for i=1:KEV+NP-1.                                        |
260
c        | If above condition tests true then we set h(i+1,1) = 0.  |
261
c        | Note that h(1:KEV+NP,1) are assumed to be non negative.  |
262
c        %----------------------------------------------------------%
263
c
264
   20    continue
265
c
266
c        %------------------------------------------------%
267
c        | The following loop exits early if we encounter |
268
c        | a negligible off diagonal element.             |
269
c        %------------------------------------------------%
270
c
271
         do 30 i = istart, kplusp-1
272
            big   = abs(h(i,2)) + abs(h(i+1,2))
273
            if (h(i+1,1) .le. epsmch*big) then
274
               if (msglvl .gt. 0) then
275
                  call pivout (comm, logfil, 1, [i], ndigit,
276
     &                 '_sapps: deflation at row/column no.')
277
                  call pivout (comm, logfil, 1, [jj], ndigit,
278
     &                 '_sapps: occured before shift number.')
279
                  call pdvout (comm, logfil, 1, h(i+1,1), ndigit,
280
     &                 '_sapps: the corresponding off diagonal element')
281
               end if
282
               h(i+1,1) = zero
283
               iend = i
284
               go to 40
285
            end if
286
   30    continue
287
         iend = kplusp
288
   40    continue
289
c
290
         if (istart .lt. iend) then
291
c
292
c           %--------------------------------------------------------%
293
c           | Construct the plane rotation G'(istart,istart+1,theta) |
294
c           | that attempts to drive h(istart+1,1) to zero.          |
295
c           %--------------------------------------------------------%
296
c
297
             f = h(istart,2) - shift(jj)
298
             g = h(istart+1,1)
299
             call dlartg (f, g, c, s, r)
300
c
301
c            %-------------------------------------------------------%
302
c            | Apply rotation to the left and right of H;            |
303
c            | H <- G' * H * G,  where G = G(istart,istart+1,theta). |
304
c            | This will create a "bulge".                           |
305
c            %-------------------------------------------------------%
306
c
307
             a1 = c*h(istart,2)   + s*h(istart+1,1)
308
             a2 = c*h(istart+1,1) + s*h(istart+1,2)
309
             a4 = c*h(istart+1,2) - s*h(istart+1,1)
310
             a3 = c*h(istart+1,1) - s*h(istart,2)
311
             h(istart,2)   = c*a1 + s*a2
312
             h(istart+1,2) = c*a4 - s*a3
313
             h(istart+1,1) = c*a3 + s*a4
314
c
315
c            %----------------------------------------------------%
316
c            | Accumulate the rotation in the matrix Q;  Q <- Q*G |
317
c            %----------------------------------------------------%
318
c
319
             do 60 j = 1, min(istart+jj,kplusp)
320
                a1            =   c*q(j,istart) + s*q(j,istart+1)
321
                q(j,istart+1) = - s*q(j,istart) + c*q(j,istart+1)
322
                q(j,istart)   = a1
323
   60        continue
324
c
325
c
326
c            %----------------------------------------------%
327
c            | The following loop chases the bulge created. |
328
c            | Note that the previous rotation may also be  |
329
c            | done within the following loop. But it is    |
330
c            | kept separate to make the distinction among  |
331
c            | the bulge chasing sweeps and the first plane |
332
c            | rotation designed to drive h(istart+1,1) to  |
333
c            | zero.                                        |
334
c            %----------------------------------------------%
335
c
336
             do 70 i = istart+1, iend-1
337
c
338
c               %----------------------------------------------%
339
c               | Construct the plane rotation G'(i,i+1,theta) |
340
c               | that zeros the i-th bulge that was created   |
341
c               | by G(i-1,i,theta). g represents the bulge.   |
342
c               %----------------------------------------------%
343
c
344
                f = h(i,1)
345
                g = s*h(i+1,1)
346
c
347
c               %----------------------------------%
348
c               | Final update with G(i-1,i,theta) |
349
c               %----------------------------------%
350
c
351
                h(i+1,1) = c*h(i+1,1)
352
                call dlartg (f, g, c, s, r)
353
c
354
c               %-------------------------------------------%
355
c               | The following ensures that h(1:iend-1,1), |
356
c               | the first iend-2 off diagonal of elements |
357
c               | H, remain non negative.                   |
358
c               %-------------------------------------------%
359
c
360
                if (r .lt. zero) then
361
                   r = -r
362
                   c = -c
363
                   s = -s
364
                end if
365
c
366
c               %--------------------------------------------%
367
c               | Apply rotation to the left and right of H; |
368
c               | H <- G * H * G',  where G = G(i,i+1,theta) |
369
c               %--------------------------------------------%
370
c
371
                h(i,1) = r
372
c
373
                a1 = c*h(i,2)   + s*h(i+1,1)
374
                a2 = c*h(i+1,1) + s*h(i+1,2)
375
                a3 = c*h(i+1,1) - s*h(i,2)
376
                a4 = c*h(i+1,2) - s*h(i+1,1)
377
c
378
                h(i,2)   = c*a1 + s*a2
379
                h(i+1,2) = c*a4 - s*a3
380
                h(i+1,1) = c*a3 + s*a4
381
c
382
c               %----------------------------------------------------%
383
c               | Accumulate the rotation in the matrix Q;  Q <- Q*G |
384
c               %----------------------------------------------------%
385
c
386
                do 50 j = 1, min( i+jj, kplusp )
387
                   a1       =   c*q(j,i) + s*q(j,i+1)
388
                   q(j,i+1) = - s*q(j,i) + c*q(j,i+1)
389
                   q(j,i)   = a1
390
   50           continue
391
c
392
   70        continue
393
c
394
         end if
395
c
396
c        %--------------------------%
397
c        | Update the block pointer |
398
c        %--------------------------%
399
c
400
         istart = iend + 1
401
c
402
c        %------------------------------------------%
403
c        | Make sure that h(iend,1) is non-negative |
404
c        | If not then set h(iend,1) <-- -h(iend,1) |
405
c        | and negate the last column of Q.         |
406
c        | We have effectively carried out a        |
407
c        | similarity on transformation H           |
408
c        %------------------------------------------%
409
c
410
         if (h(iend,1) .lt. zero) then
411
             h(iend,1) = -h(iend,1)
412
             call dscal(kplusp, -one, q(1,iend), 1)
413
         end if
414
c
415
c        %--------------------------------------------------------%
416
c        | Apply the same shift to the next block if there is any |
417
c        %--------------------------------------------------------%
418
c
419
         if (iend .lt. kplusp) go to 20
420
c
421
c        %-----------------------------------------------------%
422
c        | Check if we can increase the the start of the block |
423
c        %-----------------------------------------------------%
424
c
425
         do 80 i = itop, kplusp-1
426
            if (h(i+1,1) .gt. zero) go to 90
427
            itop  = itop + 1
428
   80    continue
429
c
430
c        %-----------------------------------%
431
c        | Finished applying the jj-th shift |
432
c        %-----------------------------------%
433
c
434
   90 continue
435
c
436
c     %------------------------------------------%
437
c     | All shifts have been applied. Check for  |
438
c     | more possible deflation that might occur |
439
c     | after the last shift is applied.         |
440
c     %------------------------------------------%
441
c
442
      do 100 i = itop, kplusp-1
443
         big   = abs(h(i,2)) + abs(h(i+1,2))
444
         if (h(i+1,1) .le. epsmch*big) then
445
            if (msglvl .gt. 0) then
446
               call pivout (comm, logfil, 1, [i], ndigit,
447
     &              '_sapps: deflation at row/column no.')
448
               call pdvout (comm, logfil, 1, h(i+1,1), ndigit,
449
     &              '_sapps: the corresponding off diagonal element')
450
            end if
451
            h(i+1,1) = zero
452
         end if
453
 100  continue
454
c
455
c     %-------------------------------------------------%
456
c     | Compute the (kev+1)-st column of (V*Q) and      |
457
c     | temporarily store the result in WORKD(N+1:2*N). |
458
c     | This is not necessary if h(kev+1,1) = 0.         |
459
c     %-------------------------------------------------%
460
c
461
      if ( h(kev+1,1) .gt. zero )
462
     &   call dgemv ('N', n, kplusp, one, v, ldv,
463
     &                q(1,kev+1), 1, zero, workd(n+1), 1)
464
c
465
c     %-------------------------------------------------------%
466
c     | Compute column 1 to kev of (V*Q) in backward order    |
467
c     | taking advantage that Q is an upper triangular matrix |
468
c     | with lower bandwidth np.                              |
469
c     | Place results in v(:,kplusp-kev:kplusp) temporarily.  |
470
c     %-------------------------------------------------------%
471
c
472
      do 130 i = 1, kev
473
         call dgemv ('N', n, kplusp-i+1, one, v, ldv,
474
     &               q(1,kev-i+1), 1, zero, workd, 1)
475
         call dcopy (n, workd, 1, v(1,kplusp-i+1), 1)
476
  130 continue
477
c
478
c     %-------------------------------------------------%
479
c     |  Move v(:,kplusp-kev+1:kplusp) into v(:,1:kev). |
480
c     %-------------------------------------------------%
481
c
482
      call dlacpy ('All', n, kev, v(1,np+1), ldv, v, ldv)
483
c
484
c     %--------------------------------------------%
485
c     | Copy the (kev+1)-st column of (V*Q) in the |
486
c     | appropriate place if h(kev+1,1) .ne. zero. |
487
c     %--------------------------------------------%
488
c
489
      if ( h(kev+1,1) .gt. zero )
490
     &     call dcopy (n, workd(n+1), 1, v(1,kev+1), 1)
491
c
492
c     %-------------------------------------%
493
c     | Update the residual vector:         |
494
c     |    r <- sigmak*r + betak*v(:,kev+1) |
495
c     | where                               |
496
c     |    sigmak = (e_{kev+p}'*Q)*e_{kev}  |
497
c     |    betak = e_{kev+1}'*H*e_{kev}     |
498
c     %-------------------------------------%
499
c
500
      call dscal (n, q(kplusp,kev), resid, 1)
501
      if (h(kev+1,1) .gt. zero)
502
     &   call daxpy (n, h(kev+1,1), v(1,kev+1), 1, resid, 1)
503
c
504
      if (msglvl .gt. 1) then
505
         call pdvout (comm, logfil, 1, q(kplusp,kev), ndigit,
506
     &      '_sapps: sigmak of the updated residual vector')
507
         call pdvout (comm, logfil, 1, h(kev+1,1), ndigit,
508
     &      '_sapps: betak of the updated residual vector')
509
         call pdvout (comm, logfil, kev, h(1,2), ndigit,
510
     &      '_sapps: updated main diagonal of H for next iteration')
511
         if (kev .gt. 1) then
512
         call pdvout (comm, logfil, kev-1, h(2,1), ndigit,
513
     &      '_sapps: updated sub diagonal of H for next iteration')
514
         end if
515
      end if
516
c
517
      call second (t1)
518
      tsapps = tsapps + (t1 - t0)
519
c
520
 9000 continue
521
      return
522
c
523
c     %----------------%
524
c     | End of pdsapps |
525
c     %----------------%
526
c
527
      end
528

529
Product

Resources

Company