Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/lapack/cgbtrf.f
5209 views
1
SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
2
*
3
* -- LAPACK routine (version 3.0) --
4
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
5
* Courant Institute, Argonne National Lab, and Rice University
6
* September 30, 1994
7
*
8
* .. Scalar Arguments ..
9
INTEGER INFO, KL, KU, LDAB, M, N
10
* ..
11
* .. Array Arguments ..
12
INTEGER IPIV( * )
13
COMPLEX AB( LDAB, * )
14
* ..
15
*
16
* Purpose
17
* =======
18
*
19
* CGBTRF computes an LU factorization of a complex m-by-n band matrix A
20
* using partial pivoting with row interchanges.
21
*
22
* This is the blocked version of the algorithm, calling Level 3 BLAS.
23
*
24
* Arguments
25
* =========
26
*
27
* M (input) INTEGER
28
* The number of rows of the matrix A. M >= 0.
29
*
30
* N (input) INTEGER
31
* The number of columns of the matrix A. N >= 0.
32
*
33
* KL (input) INTEGER
34
* The number of subdiagonals within the band of A. KL >= 0.
35
*
36
* KU (input) INTEGER
37
* The number of superdiagonals within the band of A. KU >= 0.
38
*
39
* AB (input/output) COMPLEX array, dimension (LDAB,N)
40
* On entry, the matrix A in band storage, in rows KL+1 to
41
* 2*KL+KU+1; rows 1 to KL of the array need not be set.
42
* The j-th column of A is stored in the j-th column of the
43
* array AB as follows:
44
* AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
45
*
46
* On exit, details of the factorization: U is stored as an
47
* upper triangular band matrix with KL+KU superdiagonals in
48
* rows 1 to KL+KU+1, and the multipliers used during the
49
* factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
50
* See below for further details.
51
*
52
* LDAB (input) INTEGER
53
* The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
54
*
55
* IPIV (output) INTEGER array, dimension (min(M,N))
56
* The pivot indices; for 1 <= i <= min(M,N), row i of the
57
* matrix was interchanged with row IPIV(i).
58
*
59
* INFO (output) INTEGER
60
* = 0: successful exit
61
* < 0: if INFO = -i, the i-th argument had an illegal value
62
* > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
63
* has been completed, but the factor U is exactly
64
* singular, and division by zero will occur if it is used
65
* to solve a system of equations.
66
*
67
* Further Details
68
* ===============
69
*
70
* The band storage scheme is illustrated by the following example, when
71
* M = N = 6, KL = 2, KU = 1:
72
*
73
* On entry: On exit:
74
*
75
* * * * + + + * * * u14 u25 u36
76
* * * + + + + * * u13 u24 u35 u46
77
* * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
78
* a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
79
* a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
80
* a31 a42 a53 a64 * * m31 m42 m53 m64 * *
81
*
82
* Array elements marked * are not used by the routine; elements marked
83
* + need not be set on entry, but are required by the routine to store
84
* elements of U because of fill-in resulting from the row interchanges.
85
*
86
* =====================================================================
87
*
88
* .. Parameters ..
89
COMPLEX ONE, ZERO
90
PARAMETER ( ONE = ( 1.0E+0, 0.0E+0 ),
91
$ ZERO = ( 0.0E+0, 0.0E+0 ) )
92
INTEGER NBMAX, LDWORK
93
PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
94
* ..
95
* .. Local Scalars ..
96
INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
97
$ JU, K2, KM, KV, NB, NW
98
COMPLEX TEMP
99
* ..
100
* .. Local Arrays ..
101
COMPLEX WORK13( LDWORK, NBMAX ),
102
$ WORK31( LDWORK, NBMAX )
103
* ..
104
* .. External Functions ..
105
INTEGER ICAMAX, ILAENV
106
EXTERNAL ICAMAX, ILAENV
107
* ..
108
* .. External Subroutines ..
109
EXTERNAL CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
110
$ CSWAP, CTRSM, XERBLA
111
* ..
112
* .. Intrinsic Functions ..
113
INTRINSIC MAX, MIN
114
* ..
115
* .. Executable Statements ..
116
*
117
* KV is the number of superdiagonals in the factor U, allowing for
118
* fill-in
119
*
120
KV = KU + KL
121
*
122
* Test the input parameters.
123
*
124
INFO = 0
125
IF( M.LT.0 ) THEN
126
INFO = -1
127
ELSE IF( N.LT.0 ) THEN
128
INFO = -2
129
ELSE IF( KL.LT.0 ) THEN
130
INFO = -3
131
ELSE IF( KU.LT.0 ) THEN
132
INFO = -4
133
ELSE IF( LDAB.LT.KL+KV+1 ) THEN
134
INFO = -6
135
END IF
136
IF( INFO.NE.0 ) THEN
137
CALL XERBLA( 'CGBTRF', -INFO )
138
RETURN
139
END IF
140
*
141
* Quick return if possible
142
*
143
IF( M.EQ.0 .OR. N.EQ.0 )
144
$ RETURN
145
*
146
* Determine the block size for this environment
147
*
148
NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
149
*
150
* The block size must not exceed the limit set by the size of the
151
* local arrays WORK13 and WORK31.
152
*
153
NB = MIN( NB, NBMAX )
154
*
155
IF( NB.LE.1 .OR. NB.GT.KL ) THEN
156
*
157
* Use unblocked code
158
*
159
CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
160
ELSE
161
*
162
* Use blocked code
163
*
164
* Zero the superdiagonal elements of the work array WORK13
165
*
166
DO 20 J = 1, NB
167
DO 10 I = 1, J - 1
168
WORK13( I, J ) = ZERO
169
10 CONTINUE
170
20 CONTINUE
171
*
172
* Zero the subdiagonal elements of the work array WORK31
173
*
174
DO 40 J = 1, NB
175
DO 30 I = J + 1, NB
176
WORK31( I, J ) = ZERO
177
30 CONTINUE
178
40 CONTINUE
179
*
180
* Gaussian elimination with partial pivoting
181
*
182
* Set fill-in elements in columns KU+2 to KV to zero
183
*
184
DO 60 J = KU + 2, MIN( KV, N )
185
DO 50 I = KV - J + 2, KL
186
AB( I, J ) = ZERO
187
50 CONTINUE
188
60 CONTINUE
189
*
190
* JU is the index of the last column affected by the current
191
* stage of the factorization
192
*
193
JU = 1
194
*
195
DO 180 J = 1, MIN( M, N ), NB
196
JB = MIN( NB, MIN( M, N )-J+1 )
197
*
198
* The active part of the matrix is partitioned
199
*
200
* A11 A12 A13
201
* A21 A22 A23
202
* A31 A32 A33
203
*
204
* Here A11, A21 and A31 denote the current block of JB columns
205
* which is about to be factorized. The number of rows in the
206
* partitioning are JB, I2, I3 respectively, and the numbers
207
* of columns are JB, J2, J3. The superdiagonal elements of A13
208
* and the subdiagonal elements of A31 lie outside the band.
209
*
210
I2 = MIN( KL-JB, M-J-JB+1 )
211
I3 = MIN( JB, M-J-KL+1 )
212
*
213
* J2 and J3 are computed after JU has been updated.
214
*
215
* Factorize the current block of JB columns
216
*
217
DO 80 JJ = J, J + JB - 1
218
*
219
* Set fill-in elements in column JJ+KV to zero
220
*
221
IF( JJ+KV.LE.N ) THEN
222
DO 70 I = 1, KL
223
AB( I, JJ+KV ) = ZERO
224
70 CONTINUE
225
END IF
226
*
227
* Find pivot and test for singularity. KM is the number of
228
* subdiagonal elements in the current column.
229
*
230
KM = MIN( KL, M-JJ )
231
JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
232
IPIV( JJ ) = JP + JJ - J
233
IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
234
JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
235
IF( JP.NE.1 ) THEN
236
*
237
* Apply interchange to columns J to J+JB-1
238
*
239
IF( JP+JJ-1.LT.J+KL ) THEN
240
*
241
CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
242
$ AB( KV+JP+JJ-J, J ), LDAB-1 )
243
ELSE
244
*
245
* The interchange affects columns J to JJ-1 of A31
246
* which are stored in the work array WORK31
247
*
248
CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
249
$ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
250
CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
251
$ AB( KV+JP, JJ ), LDAB-1 )
252
END IF
253
END IF
254
*
255
* Compute multipliers
256
*
257
CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
258
$ 1 )
259
*
260
* Update trailing submatrix within the band and within
261
* the current block. JM is the index of the last column
262
* which needs to be updated.
263
*
264
JM = MIN( JU, J+JB-1 )
265
IF( JM.GT.JJ )
266
$ CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
267
$ AB( KV, JJ+1 ), LDAB-1,
268
$ AB( KV+1, JJ+1 ), LDAB-1 )
269
ELSE
270
*
271
* If pivot is zero, set INFO to the index of the pivot
272
* unless a zero pivot has already been found.
273
*
274
IF( INFO.EQ.0 )
275
$ INFO = JJ
276
END IF
277
*
278
* Copy current column of A31 into the work array WORK31
279
*
280
NW = MIN( JJ-J+1, I3 )
281
IF( NW.GT.0 )
282
$ CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
283
$ WORK31( 1, JJ-J+1 ), 1 )
284
80 CONTINUE
285
IF( J+JB.LE.N ) THEN
286
*
287
* Apply the row interchanges to the other blocks.
288
*
289
J2 = MIN( JU-J+1, KV ) - JB
290
J3 = MAX( 0, JU-J-KV+1 )
291
*
292
* Use CLASWP to apply the row interchanges to A12, A22, and
293
* A32.
294
*
295
CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
296
$ IPIV( J ), 1 )
297
*
298
* Adjust the pivot indices.
299
*
300
DO 90 I = J, J + JB - 1
301
IPIV( I ) = IPIV( I ) + J - 1
302
90 CONTINUE
303
*
304
* Apply the row interchanges to A13, A23, and A33
305
* columnwise.
306
*
307
K2 = J - 1 + JB + J2
308
DO 110 I = 1, J3
309
JJ = K2 + I
310
DO 100 II = J + I - 1, J + JB - 1
311
IP = IPIV( II )
312
IF( IP.NE.II ) THEN
313
TEMP = AB( KV+1+II-JJ, JJ )
314
AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
315
AB( KV+1+IP-JJ, JJ ) = TEMP
316
END IF
317
100 CONTINUE
318
110 CONTINUE
319
*
320
* Update the relevant part of the trailing submatrix
321
*
322
IF( J2.GT.0 ) THEN
323
*
324
* Update A12
325
*
326
CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
327
$ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
328
$ AB( KV+1-JB, J+JB ), LDAB-1 )
329
*
330
IF( I2.GT.0 ) THEN
331
*
332
* Update A22
333
*
334
CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
335
$ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
336
$ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
337
$ AB( KV+1, J+JB ), LDAB-1 )
338
END IF
339
*
340
IF( I3.GT.0 ) THEN
341
*
342
* Update A32
343
*
344
CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
345
$ JB, -ONE, WORK31, LDWORK,
346
$ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
347
$ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
348
END IF
349
END IF
350
*
351
IF( J3.GT.0 ) THEN
352
*
353
* Copy the lower triangle of A13 into the work array
354
* WORK13
355
*
356
DO 130 JJ = 1, J3
357
DO 120 II = JJ, JB
358
WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
359
120 CONTINUE
360
130 CONTINUE
361
*
362
* Update A13 in the work array
363
*
364
CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
365
$ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
366
$ WORK13, LDWORK )
367
*
368
IF( I2.GT.0 ) THEN
369
*
370
* Update A23
371
*
372
CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
373
$ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
374
$ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
375
$ LDAB-1 )
376
END IF
377
*
378
IF( I3.GT.0 ) THEN
379
*
380
* Update A33
381
*
382
CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
383
$ JB, -ONE, WORK31, LDWORK, WORK13,
384
$ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
385
END IF
386
*
387
* Copy the lower triangle of A13 back into place
388
*
389
DO 150 JJ = 1, J3
390
DO 140 II = JJ, JB
391
AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
392
140 CONTINUE
393
150 CONTINUE
394
END IF
395
ELSE
396
*
397
* Adjust the pivot indices.
398
*
399
DO 160 I = J, J + JB - 1
400
IPIV( I ) = IPIV( I ) + J - 1
401
160 CONTINUE
402
END IF
403
*
404
* Partially undo the interchanges in the current block to
405
* restore the upper triangular form of A31 and copy the upper
406
* triangle of A31 back into place
407
*
408
DO 170 JJ = J + JB - 1, J, -1
409
JP = IPIV( JJ ) - JJ + 1
410
IF( JP.NE.1 ) THEN
411
*
412
* Apply interchange to columns J to JJ-1
413
*
414
IF( JP+JJ-1.LT.J+KL ) THEN
415
*
416
* The interchange does not affect A31
417
*
418
CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
419
$ AB( KV+JP+JJ-J, J ), LDAB-1 )
420
ELSE
421
*
422
* The interchange does affect A31
423
*
424
CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
425
$ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
426
END IF
427
END IF
428
*
429
* Copy the current column of A31 back into place
430
*
431
NW = MIN( I3, JJ-J+1 )
432
IF( NW.GT.0 )
433
$ CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
434
$ AB( KV+KL+1-JJ+J, JJ ), 1 )
435
170 CONTINUE
436
180 CONTINUE
437
END IF
438
*
439
RETURN
440
*
441
* End of CGBTRF
442
*
443
END
444
445