Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
ElmerCSC
GitHub Repository: ElmerCSC/elmerfem
Path: blob/devel/mathlibs/src/lapack/cgesvd.f
5191 views
1
SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
2
$ WORK, LWORK, RWORK, INFO )
3
*
4
* -- LAPACK driver routine (version 3.0) --
5
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6
* Courant Institute, Argonne National Lab, and Rice University
7
* October 31, 1999
8
*
9
* .. Scalar Arguments ..
10
CHARACTER JOBU, JOBVT
11
INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
12
* ..
13
* .. Array Arguments ..
14
REAL RWORK( * ), S( * )
15
COMPLEX A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
16
$ WORK( * )
17
* ..
18
*
19
* Purpose
20
* =======
21
*
22
* CGESVD computes the singular value decomposition (SVD) of a complex
23
* M-by-N matrix A, optionally computing the left and/or right singular
24
* vectors. The SVD is written
25
*
26
* A = U * SIGMA * conjugate-transpose(V)
27
*
28
* where SIGMA is an M-by-N matrix which is zero except for its
29
* min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
30
* V is an N-by-N unitary matrix. The diagonal elements of SIGMA
31
* are the singular values of A; they are real and non-negative, and
32
* are returned in descending order. The first min(m,n) columns of
33
* U and V are the left and right singular vectors of A.
34
*
35
* Note that the routine returns V**H, not V.
36
*
37
* Arguments
38
* =========
39
*
40
* JOBU (input) CHARACTER*1
41
* Specifies options for computing all or part of the matrix U:
42
* = 'A': all M columns of U are returned in array U:
43
* = 'S': the first min(m,n) columns of U (the left singular
44
* vectors) are returned in the array U;
45
* = 'O': the first min(m,n) columns of U (the left singular
46
* vectors) are overwritten on the array A;
47
* = 'N': no columns of U (no left singular vectors) are
48
* computed.
49
*
50
* JOBVT (input) CHARACTER*1
51
* Specifies options for computing all or part of the matrix
52
* V**H:
53
* = 'A': all N rows of V**H are returned in the array VT;
54
* = 'S': the first min(m,n) rows of V**H (the right singular
55
* vectors) are returned in the array VT;
56
* = 'O': the first min(m,n) rows of V**H (the right singular
57
* vectors) are overwritten on the array A;
58
* = 'N': no rows of V**H (no right singular vectors) are
59
* computed.
60
*
61
* JOBVT and JOBU cannot both be 'O'.
62
*
63
* M (input) INTEGER
64
* The number of rows of the input matrix A. M >= 0.
65
*
66
* N (input) INTEGER
67
* The number of columns of the input matrix A. N >= 0.
68
*
69
* A (input/output) COMPLEX array, dimension (LDA,N)
70
* On entry, the M-by-N matrix A.
71
* On exit,
72
* if JOBU = 'O', A is overwritten with the first min(m,n)
73
* columns of U (the left singular vectors,
74
* stored columnwise);
75
* if JOBVT = 'O', A is overwritten with the first min(m,n)
76
* rows of V**H (the right singular vectors,
77
* stored rowwise);
78
* if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
79
* are destroyed.
80
*
81
* LDA (input) INTEGER
82
* The leading dimension of the array A. LDA >= max(1,M).
83
*
84
* S (output) REAL array, dimension (min(M,N))
85
* The singular values of A, sorted so that S(i) >= S(i+1).
86
*
87
* U (output) COMPLEX array, dimension (LDU,UCOL)
88
* (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
89
* If JOBU = 'A', U contains the M-by-M unitary matrix U;
90
* if JOBU = 'S', U contains the first min(m,n) columns of U
91
* (the left singular vectors, stored columnwise);
92
* if JOBU = 'N' or 'O', U is not referenced.
93
*
94
* LDU (input) INTEGER
95
* The leading dimension of the array U. LDU >= 1; if
96
* JOBU = 'S' or 'A', LDU >= M.
97
*
98
* VT (output) COMPLEX array, dimension (LDVT,N)
99
* If JOBVT = 'A', VT contains the N-by-N unitary matrix
100
* V**H;
101
* if JOBVT = 'S', VT contains the first min(m,n) rows of
102
* V**H (the right singular vectors, stored rowwise);
103
* if JOBVT = 'N' or 'O', VT is not referenced.
104
*
105
* LDVT (input) INTEGER
106
* The leading dimension of the array VT. LDVT >= 1; if
107
* JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
108
*
109
* WORK (workspace/output) COMPLEX array, dimension (LWORK)
110
* On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
111
*
112
* LWORK (input) INTEGER
113
* The dimension of the array WORK. LWORK >= 1.
114
* LWORK >= 2*MIN(M,N)+MAX(M,N).
115
* For good performance, LWORK should generally be larger.
116
*
117
* If LWORK = -1, then a workspace query is assumed; the routine
118
* only calculates the optimal size of the WORK array, returns
119
* this value as the first entry of the WORK array, and no error
120
* message related to LWORK is issued by XERBLA.
121
*
122
* RWORK (workspace) REAL array, dimension (5*min(M,N))
123
* On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
124
* unconverged superdiagonal elements of an upper bidiagonal
125
* matrix B whose diagonal is in S (not necessarily sorted).
126
* B satisfies A = U * B * VT, so it has the same singular
127
* values as A, and singular vectors related by U and VT.
128
*
129
* INFO (output) INTEGER
130
* = 0: successful exit.
131
* < 0: if INFO = -i, the i-th argument had an illegal value.
132
* > 0: if CBDSQR did not converge, INFO specifies how many
133
* superdiagonals of an intermediate bidiagonal form B
134
* did not converge to zero. See the description of RWORK
135
* above for details.
136
*
137
* =====================================================================
138
*
139
* .. Parameters ..
140
COMPLEX CZERO, CONE
141
PARAMETER ( CZERO = ( 0.0E0, 0.0E0 ),
142
$ CONE = ( 1.0E0, 0.0E0 ) )
143
REAL ZERO, ONE
144
PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 )
145
* ..
146
* .. Local Scalars ..
147
LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
148
$ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
149
INTEGER BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
150
$ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
151
$ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
152
$ NRVT, WRKBL
153
REAL ANRM, BIGNUM, EPS, SMLNUM
154
* ..
155
* .. Local Arrays ..
156
REAL DUM( 1 )
157
COMPLEX CDUM( 1 )
158
* ..
159
* .. External Subroutines ..
160
EXTERNAL CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY,
161
$ CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR,
162
$ SLASCL, XERBLA
163
* ..
164
* .. External Functions ..
165
LOGICAL LSAME
166
INTEGER ILAENV
167
REAL CLANGE, SLAMCH
168
EXTERNAL LSAME, ILAENV, CLANGE, SLAMCH
169
* ..
170
* .. Intrinsic Functions ..
171
INTRINSIC MAX, MIN, SQRT
172
* ..
173
* .. Executable Statements ..
174
*
175
* Test the input arguments
176
*
177
INFO = 0
178
MINMN = MIN( M, N )
179
MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
180
WNTUA = LSAME( JOBU, 'A' )
181
WNTUS = LSAME( JOBU, 'S' )
182
WNTUAS = WNTUA .OR. WNTUS
183
WNTUO = LSAME( JOBU, 'O' )
184
WNTUN = LSAME( JOBU, 'N' )
185
WNTVA = LSAME( JOBVT, 'A' )
186
WNTVS = LSAME( JOBVT, 'S' )
187
WNTVAS = WNTVA .OR. WNTVS
188
WNTVO = LSAME( JOBVT, 'O' )
189
WNTVN = LSAME( JOBVT, 'N' )
190
MINWRK = 1
191
LQUERY = ( LWORK.EQ.-1 )
192
*
193
IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
194
INFO = -1
195
ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
196
$ ( WNTVO .AND. WNTUO ) ) THEN
197
INFO = -2
198
ELSE IF( M.LT.0 ) THEN
199
INFO = -3
200
ELSE IF( N.LT.0 ) THEN
201
INFO = -4
202
ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
203
INFO = -6
204
ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
205
INFO = -9
206
ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
207
$ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
208
INFO = -11
209
END IF
210
*
211
* Compute workspace
212
* (Note: Comments in the code beginning "Workspace:" describe the
213
* minimal amount of workspace needed at that point in the code,
214
* as well as the preferred amount for good performance.
215
* CWorkspace refers to complex workspace, and RWorkspace to
216
* real workspace. NB refers to the optimal block size for the
217
* immediately following subroutine, as returned by ILAENV.)
218
*
219
IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
220
$ N.GT.0 ) THEN
221
IF( M.GE.N ) THEN
222
*
223
* Space needed for CBDSQR is BDSPAC = 5*N
224
*
225
IF( M.GE.MNTHR ) THEN
226
IF( WNTUN ) THEN
227
*
228
* Path 1 (M much larger than N, JOBU='N')
229
*
230
MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1,
231
$ -1 )
232
MAXWRK = MAX( MAXWRK, 2*N+2*N*
233
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
234
IF( WNTVO .OR. WNTVAS )
235
$ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
236
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
237
MINWRK = 3*N
238
MAXWRK = MAX( MINWRK, MAXWRK )
239
ELSE IF( WNTUO .AND. WNTVN ) THEN
240
*
241
* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
242
*
243
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
244
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
245
$ N, N, -1 ) )
246
WRKBL = MAX( WRKBL, 2*N+2*N*
247
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
248
WRKBL = MAX( WRKBL, 2*N+N*
249
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
250
MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
251
MINWRK = 2*N + M
252
MAXWRK = MAX( MINWRK, MAXWRK )
253
ELSE IF( WNTUO .AND. WNTVAS ) THEN
254
*
255
* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
256
* 'A')
257
*
258
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
259
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
260
$ N, N, -1 ) )
261
WRKBL = MAX( WRKBL, 2*N+2*N*
262
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
263
WRKBL = MAX( WRKBL, 2*N+N*
264
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
265
WRKBL = MAX( WRKBL, 2*N+( N-1 )*
266
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
267
MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
268
MINWRK = 2*N + M
269
MAXWRK = MAX( MINWRK, MAXWRK )
270
ELSE IF( WNTUS .AND. WNTVN ) THEN
271
*
272
* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
273
*
274
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
275
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
276
$ N, N, -1 ) )
277
WRKBL = MAX( WRKBL, 2*N+2*N*
278
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
279
WRKBL = MAX( WRKBL, 2*N+N*
280
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
281
MAXWRK = N*N + WRKBL
282
MINWRK = 2*N + M
283
MAXWRK = MAX( MINWRK, MAXWRK )
284
ELSE IF( WNTUS .AND. WNTVO ) THEN
285
*
286
* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
287
*
288
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
289
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
290
$ N, N, -1 ) )
291
WRKBL = MAX( WRKBL, 2*N+2*N*
292
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
293
WRKBL = MAX( WRKBL, 2*N+N*
294
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
295
WRKBL = MAX( WRKBL, 2*N+( N-1 )*
296
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
297
MAXWRK = 2*N*N + WRKBL
298
MINWRK = 2*N + M
299
MAXWRK = MAX( MINWRK, MAXWRK )
300
ELSE IF( WNTUS .AND. WNTVAS ) THEN
301
*
302
* Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
303
* 'A')
304
*
305
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
306
WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
307
$ N, N, -1 ) )
308
WRKBL = MAX( WRKBL, 2*N+2*N*
309
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
310
WRKBL = MAX( WRKBL, 2*N+N*
311
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
312
WRKBL = MAX( WRKBL, 2*N+( N-1 )*
313
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
314
MAXWRK = N*N + WRKBL
315
MINWRK = 2*N + M
316
MAXWRK = MAX( MINWRK, MAXWRK )
317
ELSE IF( WNTUA .AND. WNTVN ) THEN
318
*
319
* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
320
*
321
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
322
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
323
$ M, N, -1 ) )
324
WRKBL = MAX( WRKBL, 2*N+2*N*
325
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
326
WRKBL = MAX( WRKBL, 2*N+N*
327
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
328
MAXWRK = N*N + WRKBL
329
MINWRK = 2*N + M
330
MAXWRK = MAX( MINWRK, MAXWRK )
331
ELSE IF( WNTUA .AND. WNTVO ) THEN
332
*
333
* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
334
*
335
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
336
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
337
$ M, N, -1 ) )
338
WRKBL = MAX( WRKBL, 2*N+2*N*
339
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
340
WRKBL = MAX( WRKBL, 2*N+N*
341
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
342
WRKBL = MAX( WRKBL, 2*N+( N-1 )*
343
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
344
MAXWRK = 2*N*N + WRKBL
345
MINWRK = 2*N + M
346
MAXWRK = MAX( MINWRK, MAXWRK )
347
ELSE IF( WNTUA .AND. WNTVAS ) THEN
348
*
349
* Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
350
* 'A')
351
*
352
WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
353
WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
354
$ M, N, -1 ) )
355
WRKBL = MAX( WRKBL, 2*N+2*N*
356
$ ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
357
WRKBL = MAX( WRKBL, 2*N+N*
358
$ ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
359
WRKBL = MAX( WRKBL, 2*N+( N-1 )*
360
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
361
MAXWRK = N*N + WRKBL
362
MINWRK = 2*N + M
363
MAXWRK = MAX( MINWRK, MAXWRK )
364
END IF
365
ELSE
366
*
367
* Path 10 (M at least N, but not much larger)
368
*
369
MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
370
$ -1, -1 )
371
IF( WNTUS .OR. WNTUO )
372
$ MAXWRK = MAX( MAXWRK, 2*N+N*
373
$ ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) )
374
IF( WNTUA )
375
$ MAXWRK = MAX( MAXWRK, 2*N+M*
376
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
377
IF( .NOT.WNTVN )
378
$ MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
379
$ ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
380
MINWRK = 2*N + M
381
MAXWRK = MAX( MINWRK, MAXWRK )
382
END IF
383
ELSE
384
*
385
* Space needed for CBDSQR is BDSPAC = 5*M
386
*
387
IF( N.GE.MNTHR ) THEN
388
IF( WNTVN ) THEN
389
*
390
* Path 1t(N much larger than M, JOBVT='N')
391
*
392
MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1,
393
$ -1 )
394
MAXWRK = MAX( MAXWRK, 2*M+2*M*
395
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
396
IF( WNTUO .OR. WNTUAS )
397
$ MAXWRK = MAX( MAXWRK, 2*M+M*
398
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
399
MINWRK = 3*M
400
MAXWRK = MAX( MINWRK, MAXWRK )
401
ELSE IF( WNTVO .AND. WNTUN ) THEN
402
*
403
* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
404
*
405
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
406
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
407
$ N, M, -1 ) )
408
WRKBL = MAX( WRKBL, 2*M+2*M*
409
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
410
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
411
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
412
MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
413
MINWRK = 2*M + N
414
MAXWRK = MAX( MINWRK, MAXWRK )
415
ELSE IF( WNTVO .AND. WNTUAS ) THEN
416
*
417
* Path 3t(N much larger than M, JOBU='S' or 'A',
418
* JOBVT='O')
419
*
420
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
421
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
422
$ N, M, -1 ) )
423
WRKBL = MAX( WRKBL, 2*M+2*M*
424
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
425
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
426
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
427
WRKBL = MAX( WRKBL, 2*M+M*
428
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
429
MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
430
MINWRK = 2*M + N
431
MAXWRK = MAX( MINWRK, MAXWRK )
432
ELSE IF( WNTVS .AND. WNTUN ) THEN
433
*
434
* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
435
*
436
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
437
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
438
$ N, M, -1 ) )
439
WRKBL = MAX( WRKBL, 2*M+2*M*
440
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
441
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
442
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
443
MAXWRK = M*M + WRKBL
444
MINWRK = 2*M + N
445
MAXWRK = MAX( MINWRK, MAXWRK )
446
ELSE IF( WNTVS .AND. WNTUO ) THEN
447
*
448
* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
449
*
450
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
451
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
452
$ N, M, -1 ) )
453
WRKBL = MAX( WRKBL, 2*M+2*M*
454
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
455
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
456
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
457
WRKBL = MAX( WRKBL, 2*M+M*
458
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
459
MAXWRK = 2*M*M + WRKBL
460
MINWRK = 2*M + N
461
MAXWRK = MAX( MINWRK, MAXWRK )
462
ELSE IF( WNTVS .AND. WNTUAS ) THEN
463
*
464
* Path 6t(N much larger than M, JOBU='S' or 'A',
465
* JOBVT='S')
466
*
467
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
468
WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
469
$ N, M, -1 ) )
470
WRKBL = MAX( WRKBL, 2*M+2*M*
471
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
472
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
473
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
474
WRKBL = MAX( WRKBL, 2*M+M*
475
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
476
MAXWRK = M*M + WRKBL
477
MINWRK = 2*M + N
478
MAXWRK = MAX( MINWRK, MAXWRK )
479
ELSE IF( WNTVA .AND. WNTUN ) THEN
480
*
481
* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
482
*
483
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
484
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
485
$ N, M, -1 ) )
486
WRKBL = MAX( WRKBL, 2*M+2*M*
487
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
488
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
489
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
490
MAXWRK = M*M + WRKBL
491
MINWRK = 2*M + N
492
MAXWRK = MAX( MINWRK, MAXWRK )
493
ELSE IF( WNTVA .AND. WNTUO ) THEN
494
*
495
* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
496
*
497
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
498
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
499
$ N, M, -1 ) )
500
WRKBL = MAX( WRKBL, 2*M+2*M*
501
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
502
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
503
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
504
WRKBL = MAX( WRKBL, 2*M+M*
505
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
506
MAXWRK = 2*M*M + WRKBL
507
MINWRK = 2*M + N
508
MAXWRK = MAX( MINWRK, MAXWRK )
509
ELSE IF( WNTVA .AND. WNTUAS ) THEN
510
*
511
* Path 9t(N much larger than M, JOBU='S' or 'A',
512
* JOBVT='A')
513
*
514
WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
515
WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
516
$ N, M, -1 ) )
517
WRKBL = MAX( WRKBL, 2*M+2*M*
518
$ ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
519
WRKBL = MAX( WRKBL, 2*M+( M-1 )*
520
$ ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
521
WRKBL = MAX( WRKBL, 2*M+M*
522
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
523
MAXWRK = M*M + WRKBL
524
MINWRK = 2*M + N
525
MAXWRK = MAX( MINWRK, MAXWRK )
526
END IF
527
ELSE
528
*
529
* Path 10t(N greater than M, but not much larger)
530
*
531
MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
532
$ -1, -1 )
533
IF( WNTVS .OR. WNTVO )
534
$ MAXWRK = MAX( MAXWRK, 2*M+M*
535
$ ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
536
IF( WNTVA )
537
$ MAXWRK = MAX( MAXWRK, 2*M+N*
538
$ ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) )
539
IF( .NOT.WNTUN )
540
$ MAXWRK = MAX( MAXWRK, 2*M+( M-1 )*
541
$ ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
542
MINWRK = 2*M + N
543
MAXWRK = MAX( MINWRK, MAXWRK )
544
END IF
545
END IF
546
WORK( 1 ) = MAXWRK
547
END IF
548
*
549
IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
550
INFO = -13
551
END IF
552
IF( INFO.NE.0 ) THEN
553
CALL XERBLA( 'CGESVD', -INFO )
554
RETURN
555
ELSE IF( LQUERY ) THEN
556
RETURN
557
END IF
558
*
559
* Quick return if possible
560
*
561
IF( M.EQ.0 .OR. N.EQ.0 ) THEN
562
IF( LWORK.GE.1 )
563
$ WORK( 1 ) = ONE
564
RETURN
565
END IF
566
*
567
* Get machine constants
568
*
569
EPS = SLAMCH( 'P' )
570
SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
571
BIGNUM = ONE / SMLNUM
572
*
573
* Scale A if max element outside range [SMLNUM,BIGNUM]
574
*
575
ANRM = CLANGE( 'M', M, N, A, LDA, DUM )
576
ISCL = 0
577
IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
578
ISCL = 1
579
CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
580
ELSE IF( ANRM.GT.BIGNUM ) THEN
581
ISCL = 1
582
CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
583
END IF
584
*
585
IF( M.GE.N ) THEN
586
*
587
* A has at least as many rows as columns. If A has sufficiently
588
* more rows than columns, first reduce using the QR
589
* decomposition (if sufficient workspace available)
590
*
591
IF( M.GE.MNTHR ) THEN
592
*
593
IF( WNTUN ) THEN
594
*
595
* Path 1 (M much larger than N, JOBU='N')
596
* No left singular vectors to be computed
597
*
598
ITAU = 1
599
IWORK = ITAU + N
600
*
601
* Compute A=Q*R
602
* (CWorkspace: need 2*N, prefer N+N*NB)
603
* (RWorkspace: need 0)
604
*
605
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
606
$ LWORK-IWORK+1, IERR )
607
*
608
* Zero out below R
609
*
610
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
611
$ LDA )
612
IE = 1
613
ITAUQ = 1
614
ITAUP = ITAUQ + N
615
IWORK = ITAUP + N
616
*
617
* Bidiagonalize R in A
618
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
619
* (RWorkspace: need N)
620
*
621
CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
622
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
623
$ IERR )
624
NCVT = 0
625
IF( WNTVO .OR. WNTVAS ) THEN
626
*
627
* If right singular vectors desired, generate P'.
628
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
629
* (RWorkspace: 0)
630
*
631
CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
632
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
633
NCVT = N
634
END IF
635
IRWORK = IE + N
636
*
637
* Perform bidiagonal QR iteration, computing right
638
* singular vectors of A in A if desired
639
* (CWorkspace: 0)
640
* (RWorkspace: need BDSPAC)
641
*
642
CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
643
$ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
644
*
645
* If right singular vectors desired in VT, copy them there
646
*
647
IF( WNTVAS )
648
$ CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
649
*
650
ELSE IF( WNTUO .AND. WNTVN ) THEN
651
*
652
* Path 2 (M much larger than N, JOBU='O', JOBVT='N')
653
* N left singular vectors to be overwritten on A and
654
* no right singular vectors to be computed
655
*
656
IF( LWORK.GE.N*N+3*N ) THEN
657
*
658
* Sufficient workspace for a fast algorithm
659
*
660
IR = 1
661
IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
662
*
663
* WORK(IU) is LDA by N, WORK(IR) is LDA by N
664
*
665
LDWRKU = LDA
666
LDWRKR = LDA
667
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
668
*
669
* WORK(IU) is LDA by N, WORK(IR) is N by N
670
*
671
LDWRKU = LDA
672
LDWRKR = N
673
ELSE
674
*
675
* WORK(IU) is LDWRKU by N, WORK(IR) is N by N
676
*
677
LDWRKU = ( LWORK-N*N ) / N
678
LDWRKR = N
679
END IF
680
ITAU = IR + LDWRKR*N
681
IWORK = ITAU + N
682
*
683
* Compute A=Q*R
684
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
685
* (RWorkspace: 0)
686
*
687
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
688
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
689
*
690
* Copy R to WORK(IR) and zero out below it
691
*
692
CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
693
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
694
$ WORK( IR+1 ), LDWRKR )
695
*
696
* Generate Q in A
697
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
698
* (RWorkspace: 0)
699
*
700
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
701
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
702
IE = 1
703
ITAUQ = ITAU
704
ITAUP = ITAUQ + N
705
IWORK = ITAUP + N
706
*
707
* Bidiagonalize R in WORK(IR)
708
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
709
* (RWorkspace: need N)
710
*
711
CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
712
$ WORK( ITAUQ ), WORK( ITAUP ),
713
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
714
*
715
* Generate left vectors bidiagonalizing R
716
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
717
* (RWorkspace: need 0)
718
*
719
CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
720
$ WORK( ITAUQ ), WORK( IWORK ),
721
$ LWORK-IWORK+1, IERR )
722
IRWORK = IE + N
723
*
724
* Perform bidiagonal QR iteration, computing left
725
* singular vectors of R in WORK(IR)
726
* (CWorkspace: need N*N)
727
* (RWorkspace: need BDSPAC)
728
*
729
CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
730
$ WORK( IR ), LDWRKR, CDUM, 1,
731
$ RWORK( IRWORK ), INFO )
732
IU = ITAUQ
733
*
734
* Multiply Q in A by left singular vectors of R in
735
* WORK(IR), storing result in WORK(IU) and copying to A
736
* (CWorkspace: need N*N+N, prefer N*N+M*N)
737
* (RWorkspace: 0)
738
*
739
DO 10 I = 1, M, LDWRKU
740
CHUNK = MIN( M-I+1, LDWRKU )
741
CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
742
$ LDA, WORK( IR ), LDWRKR, CZERO,
743
$ WORK( IU ), LDWRKU )
744
CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
745
$ A( I, 1 ), LDA )
746
10 CONTINUE
747
*
748
ELSE
749
*
750
* Insufficient workspace for a fast algorithm
751
*
752
IE = 1
753
ITAUQ = 1
754
ITAUP = ITAUQ + N
755
IWORK = ITAUP + N
756
*
757
* Bidiagonalize A
758
* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
759
* (RWorkspace: N)
760
*
761
CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
762
$ WORK( ITAUQ ), WORK( ITAUP ),
763
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
764
*
765
* Generate left vectors bidiagonalizing A
766
* (CWorkspace: need 3*N, prefer 2*N+N*NB)
767
* (RWorkspace: 0)
768
*
769
CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
770
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
771
IRWORK = IE + N
772
*
773
* Perform bidiagonal QR iteration, computing left
774
* singular vectors of A in A
775
* (CWorkspace: need 0)
776
* (RWorkspace: need BDSPAC)
777
*
778
CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
779
$ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
780
*
781
END IF
782
*
783
ELSE IF( WNTUO .AND. WNTVAS ) THEN
784
*
785
* Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
786
* N left singular vectors to be overwritten on A and
787
* N right singular vectors to be computed in VT
788
*
789
IF( LWORK.GE.N*N+3*N ) THEN
790
*
791
* Sufficient workspace for a fast algorithm
792
*
793
IR = 1
794
IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
795
*
796
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
797
*
798
LDWRKU = LDA
799
LDWRKR = LDA
800
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
801
*
802
* WORK(IU) is LDA by N and WORK(IR) is N by N
803
*
804
LDWRKU = LDA
805
LDWRKR = N
806
ELSE
807
*
808
* WORK(IU) is LDWRKU by N and WORK(IR) is N by N
809
*
810
LDWRKU = ( LWORK-N*N ) / N
811
LDWRKR = N
812
END IF
813
ITAU = IR + LDWRKR*N
814
IWORK = ITAU + N
815
*
816
* Compute A=Q*R
817
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
818
* (RWorkspace: 0)
819
*
820
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
821
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
822
*
823
* Copy R to VT, zeroing out below it
824
*
825
CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
826
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
827
$ LDVT )
828
*
829
* Generate Q in A
830
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
831
* (RWorkspace: 0)
832
*
833
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
834
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
835
IE = 1
836
ITAUQ = ITAU
837
ITAUP = ITAUQ + N
838
IWORK = ITAUP + N
839
*
840
* Bidiagonalize R in VT, copying result to WORK(IR)
841
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
842
* (RWorkspace: need N)
843
*
844
CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
845
$ WORK( ITAUQ ), WORK( ITAUP ),
846
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
847
CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
848
*
849
* Generate left vectors bidiagonalizing R in WORK(IR)
850
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
851
* (RWorkspace: 0)
852
*
853
CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
854
$ WORK( ITAUQ ), WORK( IWORK ),
855
$ LWORK-IWORK+1, IERR )
856
*
857
* Generate right vectors bidiagonalizing R in VT
858
* (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
859
* (RWorkspace: 0)
860
*
861
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
862
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
863
IRWORK = IE + N
864
*
865
* Perform bidiagonal QR iteration, computing left
866
* singular vectors of R in WORK(IR) and computing right
867
* singular vectors of R in VT
868
* (CWorkspace: need N*N)
869
* (RWorkspace: need BDSPAC)
870
*
871
CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
872
$ LDVT, WORK( IR ), LDWRKR, CDUM, 1,
873
$ RWORK( IRWORK ), INFO )
874
IU = ITAUQ
875
*
876
* Multiply Q in A by left singular vectors of R in
877
* WORK(IR), storing result in WORK(IU) and copying to A
878
* (CWorkspace: need N*N+N, prefer N*N+M*N)
879
* (RWorkspace: 0)
880
*
881
DO 20 I = 1, M, LDWRKU
882
CHUNK = MIN( M-I+1, LDWRKU )
883
CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
884
$ LDA, WORK( IR ), LDWRKR, CZERO,
885
$ WORK( IU ), LDWRKU )
886
CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
887
$ A( I, 1 ), LDA )
888
20 CONTINUE
889
*
890
ELSE
891
*
892
* Insufficient workspace for a fast algorithm
893
*
894
ITAU = 1
895
IWORK = ITAU + N
896
*
897
* Compute A=Q*R
898
* (CWorkspace: need 2*N, prefer N+N*NB)
899
* (RWorkspace: 0)
900
*
901
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
902
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
903
*
904
* Copy R to VT, zeroing out below it
905
*
906
CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
907
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
908
$ LDVT )
909
*
910
* Generate Q in A
911
* (CWorkspace: need 2*N, prefer N+N*NB)
912
* (RWorkspace: 0)
913
*
914
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
915
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
916
IE = 1
917
ITAUQ = ITAU
918
ITAUP = ITAUQ + N
919
IWORK = ITAUP + N
920
*
921
* Bidiagonalize R in VT
922
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
923
* (RWorkspace: N)
924
*
925
CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
926
$ WORK( ITAUQ ), WORK( ITAUP ),
927
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
928
*
929
* Multiply Q in A by left vectors bidiagonalizing R
930
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
931
* (RWorkspace: 0)
932
*
933
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
934
$ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
935
$ LWORK-IWORK+1, IERR )
936
*
937
* Generate right vectors bidiagonalizing R in VT
938
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
939
* (RWorkspace: 0)
940
*
941
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
942
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
943
IRWORK = IE + N
944
*
945
* Perform bidiagonal QR iteration, computing left
946
* singular vectors of A in A and computing right
947
* singular vectors of A in VT
948
* (CWorkspace: 0)
949
* (RWorkspace: need BDSPAC)
950
*
951
CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
952
$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
953
$ INFO )
954
*
955
END IF
956
*
957
ELSE IF( WNTUS ) THEN
958
*
959
IF( WNTVN ) THEN
960
*
961
* Path 4 (M much larger than N, JOBU='S', JOBVT='N')
962
* N left singular vectors to be computed in U and
963
* no right singular vectors to be computed
964
*
965
IF( LWORK.GE.N*N+3*N ) THEN
966
*
967
* Sufficient workspace for a fast algorithm
968
*
969
IR = 1
970
IF( LWORK.GE.WRKBL+LDA*N ) THEN
971
*
972
* WORK(IR) is LDA by N
973
*
974
LDWRKR = LDA
975
ELSE
976
*
977
* WORK(IR) is N by N
978
*
979
LDWRKR = N
980
END IF
981
ITAU = IR + LDWRKR*N
982
IWORK = ITAU + N
983
*
984
* Compute A=Q*R
985
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
986
* (RWorkspace: 0)
987
*
988
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
989
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
990
*
991
* Copy R to WORK(IR), zeroing out below it
992
*
993
CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
994
$ LDWRKR )
995
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
996
$ WORK( IR+1 ), LDWRKR )
997
*
998
* Generate Q in A
999
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1000
* (RWorkspace: 0)
1001
*
1002
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1003
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1004
IE = 1
1005
ITAUQ = ITAU
1006
ITAUP = ITAUQ + N
1007
IWORK = ITAUP + N
1008
*
1009
* Bidiagonalize R in WORK(IR)
1010
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1011
* (RWorkspace: need N)
1012
*
1013
CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1014
$ RWORK( IE ), WORK( ITAUQ ),
1015
$ WORK( ITAUP ), WORK( IWORK ),
1016
$ LWORK-IWORK+1, IERR )
1017
*
1018
* Generate left vectors bidiagonalizing R in WORK(IR)
1019
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1020
* (RWorkspace: 0)
1021
*
1022
CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1023
$ WORK( ITAUQ ), WORK( IWORK ),
1024
$ LWORK-IWORK+1, IERR )
1025
IRWORK = IE + N
1026
*
1027
* Perform bidiagonal QR iteration, computing left
1028
* singular vectors of R in WORK(IR)
1029
* (CWorkspace: need N*N)
1030
* (RWorkspace: need BDSPAC)
1031
*
1032
CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1033
$ 1, WORK( IR ), LDWRKR, CDUM, 1,
1034
$ RWORK( IRWORK ), INFO )
1035
*
1036
* Multiply Q in A by left singular vectors of R in
1037
* WORK(IR), storing result in U
1038
* (CWorkspace: need N*N)
1039
* (RWorkspace: 0)
1040
*
1041
CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1042
$ WORK( IR ), LDWRKR, CZERO, U, LDU )
1043
*
1044
ELSE
1045
*
1046
* Insufficient workspace for a fast algorithm
1047
*
1048
ITAU = 1
1049
IWORK = ITAU + N
1050
*
1051
* Compute A=Q*R, copying result to U
1052
* (CWorkspace: need 2*N, prefer N+N*NB)
1053
* (RWorkspace: 0)
1054
*
1055
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1056
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1057
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1058
*
1059
* Generate Q in U
1060
* (CWorkspace: need 2*N, prefer N+N*NB)
1061
* (RWorkspace: 0)
1062
*
1063
CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1064
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1065
IE = 1
1066
ITAUQ = ITAU
1067
ITAUP = ITAUQ + N
1068
IWORK = ITAUP + N
1069
*
1070
* Zero out below R in A
1071
*
1072
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1073
$ A( 2, 1 ), LDA )
1074
*
1075
* Bidiagonalize R in A
1076
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1077
* (RWorkspace: need N)
1078
*
1079
CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1080
$ WORK( ITAUQ ), WORK( ITAUP ),
1081
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1082
*
1083
* Multiply Q in U by left vectors bidiagonalizing R
1084
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1085
* (RWorkspace: 0)
1086
*
1087
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1088
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1089
$ LWORK-IWORK+1, IERR )
1090
IRWORK = IE + N
1091
*
1092
* Perform bidiagonal QR iteration, computing left
1093
* singular vectors of A in U
1094
* (CWorkspace: 0)
1095
* (RWorkspace: need BDSPAC)
1096
*
1097
CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1098
$ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1099
$ INFO )
1100
*
1101
END IF
1102
*
1103
ELSE IF( WNTVO ) THEN
1104
*
1105
* Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1106
* N left singular vectors to be computed in U and
1107
* N right singular vectors to be overwritten on A
1108
*
1109
IF( LWORK.GE.2*N*N+3*N ) THEN
1110
*
1111
* Sufficient workspace for a fast algorithm
1112
*
1113
IU = 1
1114
IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1115
*
1116
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1117
*
1118
LDWRKU = LDA
1119
IR = IU + LDWRKU*N
1120
LDWRKR = LDA
1121
ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1122
*
1123
* WORK(IU) is LDA by N and WORK(IR) is N by N
1124
*
1125
LDWRKU = LDA
1126
IR = IU + LDWRKU*N
1127
LDWRKR = N
1128
ELSE
1129
*
1130
* WORK(IU) is N by N and WORK(IR) is N by N
1131
*
1132
LDWRKU = N
1133
IR = IU + LDWRKU*N
1134
LDWRKR = N
1135
END IF
1136
ITAU = IR + LDWRKR*N
1137
IWORK = ITAU + N
1138
*
1139
* Compute A=Q*R
1140
* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1141
* (RWorkspace: 0)
1142
*
1143
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1144
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1145
*
1146
* Copy R to WORK(IU), zeroing out below it
1147
*
1148
CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1149
$ LDWRKU )
1150
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1151
$ WORK( IU+1 ), LDWRKU )
1152
*
1153
* Generate Q in A
1154
* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1155
* (RWorkspace: 0)
1156
*
1157
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1158
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1159
IE = 1
1160
ITAUQ = ITAU
1161
ITAUP = ITAUQ + N
1162
IWORK = ITAUP + N
1163
*
1164
* Bidiagonalize R in WORK(IU), copying result to
1165
* WORK(IR)
1166
* (CWorkspace: need 2*N*N+3*N,
1167
* prefer 2*N*N+2*N+2*N*NB)
1168
* (RWorkspace: need N)
1169
*
1170
CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1171
$ RWORK( IE ), WORK( ITAUQ ),
1172
$ WORK( ITAUP ), WORK( IWORK ),
1173
$ LWORK-IWORK+1, IERR )
1174
CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1175
$ WORK( IR ), LDWRKR )
1176
*
1177
* Generate left bidiagonalizing vectors in WORK(IU)
1178
* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1179
* (RWorkspace: 0)
1180
*
1181
CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1182
$ WORK( ITAUQ ), WORK( IWORK ),
1183
$ LWORK-IWORK+1, IERR )
1184
*
1185
* Generate right bidiagonalizing vectors in WORK(IR)
1186
* (CWorkspace: need 2*N*N+3*N-1,
1187
* prefer 2*N*N+2*N+(N-1)*NB)
1188
* (RWorkspace: 0)
1189
*
1190
CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1191
$ WORK( ITAUP ), WORK( IWORK ),
1192
$ LWORK-IWORK+1, IERR )
1193
IRWORK = IE + N
1194
*
1195
* Perform bidiagonal QR iteration, computing left
1196
* singular vectors of R in WORK(IU) and computing
1197
* right singular vectors of R in WORK(IR)
1198
* (CWorkspace: need 2*N*N)
1199
* (RWorkspace: need BDSPAC)
1200
*
1201
CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1202
$ WORK( IR ), LDWRKR, WORK( IU ),
1203
$ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1204
$ INFO )
1205
*
1206
* Multiply Q in A by left singular vectors of R in
1207
* WORK(IU), storing result in U
1208
* (CWorkspace: need N*N)
1209
* (RWorkspace: 0)
1210
*
1211
CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1212
$ WORK( IU ), LDWRKU, CZERO, U, LDU )
1213
*
1214
* Copy right singular vectors of R to A
1215
* (CWorkspace: need N*N)
1216
* (RWorkspace: 0)
1217
*
1218
CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1219
$ LDA )
1220
*
1221
ELSE
1222
*
1223
* Insufficient workspace for a fast algorithm
1224
*
1225
ITAU = 1
1226
IWORK = ITAU + N
1227
*
1228
* Compute A=Q*R, copying result to U
1229
* (CWorkspace: need 2*N, prefer N+N*NB)
1230
* (RWorkspace: 0)
1231
*
1232
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1233
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1234
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1235
*
1236
* Generate Q in U
1237
* (CWorkspace: need 2*N, prefer N+N*NB)
1238
* (RWorkspace: 0)
1239
*
1240
CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1241
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1242
IE = 1
1243
ITAUQ = ITAU
1244
ITAUP = ITAUQ + N
1245
IWORK = ITAUP + N
1246
*
1247
* Zero out below R in A
1248
*
1249
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1250
$ A( 2, 1 ), LDA )
1251
*
1252
* Bidiagonalize R in A
1253
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1254
* (RWorkspace: need N)
1255
*
1256
CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1257
$ WORK( ITAUQ ), WORK( ITAUP ),
1258
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1259
*
1260
* Multiply Q in U by left vectors bidiagonalizing R
1261
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1262
* (RWorkspace: 0)
1263
*
1264
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1265
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1266
$ LWORK-IWORK+1, IERR )
1267
*
1268
* Generate right vectors bidiagonalizing R in A
1269
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1270
* (RWorkspace: 0)
1271
*
1272
CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1273
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1274
IRWORK = IE + N
1275
*
1276
* Perform bidiagonal QR iteration, computing left
1277
* singular vectors of A in U and computing right
1278
* singular vectors of A in A
1279
* (CWorkspace: 0)
1280
* (RWorkspace: need BDSPAC)
1281
*
1282
CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1283
$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1284
$ INFO )
1285
*
1286
END IF
1287
*
1288
ELSE IF( WNTVAS ) THEN
1289
*
1290
* Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1291
* or 'A')
1292
* N left singular vectors to be computed in U and
1293
* N right singular vectors to be computed in VT
1294
*
1295
IF( LWORK.GE.N*N+3*N ) THEN
1296
*
1297
* Sufficient workspace for a fast algorithm
1298
*
1299
IU = 1
1300
IF( LWORK.GE.WRKBL+LDA*N ) THEN
1301
*
1302
* WORK(IU) is LDA by N
1303
*
1304
LDWRKU = LDA
1305
ELSE
1306
*
1307
* WORK(IU) is N by N
1308
*
1309
LDWRKU = N
1310
END IF
1311
ITAU = IU + LDWRKU*N
1312
IWORK = ITAU + N
1313
*
1314
* Compute A=Q*R
1315
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1316
* (RWorkspace: 0)
1317
*
1318
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1319
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1320
*
1321
* Copy R to WORK(IU), zeroing out below it
1322
*
1323
CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1324
$ LDWRKU )
1325
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1326
$ WORK( IU+1 ), LDWRKU )
1327
*
1328
* Generate Q in A
1329
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1330
* (RWorkspace: 0)
1331
*
1332
CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
1333
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1334
IE = 1
1335
ITAUQ = ITAU
1336
ITAUP = ITAUQ + N
1337
IWORK = ITAUP + N
1338
*
1339
* Bidiagonalize R in WORK(IU), copying result to VT
1340
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1341
* (RWorkspace: need N)
1342
*
1343
CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1344
$ RWORK( IE ), WORK( ITAUQ ),
1345
$ WORK( ITAUP ), WORK( IWORK ),
1346
$ LWORK-IWORK+1, IERR )
1347
CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1348
$ LDVT )
1349
*
1350
* Generate left bidiagonalizing vectors in WORK(IU)
1351
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1352
* (RWorkspace: 0)
1353
*
1354
CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1355
$ WORK( ITAUQ ), WORK( IWORK ),
1356
$ LWORK-IWORK+1, IERR )
1357
*
1358
* Generate right bidiagonalizing vectors in VT
1359
* (CWorkspace: need N*N+3*N-1,
1360
* prefer N*N+2*N+(N-1)*NB)
1361
* (RWorkspace: 0)
1362
*
1363
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1364
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1365
IRWORK = IE + N
1366
*
1367
* Perform bidiagonal QR iteration, computing left
1368
* singular vectors of R in WORK(IU) and computing
1369
* right singular vectors of R in VT
1370
* (CWorkspace: need N*N)
1371
* (RWorkspace: need BDSPAC)
1372
*
1373
CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1374
$ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1375
$ RWORK( IRWORK ), INFO )
1376
*
1377
* Multiply Q in A by left singular vectors of R in
1378
* WORK(IU), storing result in U
1379
* (CWorkspace: need N*N)
1380
* (RWorkspace: 0)
1381
*
1382
CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
1383
$ WORK( IU ), LDWRKU, CZERO, U, LDU )
1384
*
1385
ELSE
1386
*
1387
* Insufficient workspace for a fast algorithm
1388
*
1389
ITAU = 1
1390
IWORK = ITAU + N
1391
*
1392
* Compute A=Q*R, copying result to U
1393
* (CWorkspace: need 2*N, prefer N+N*NB)
1394
* (RWorkspace: 0)
1395
*
1396
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1397
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1398
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1399
*
1400
* Generate Q in U
1401
* (CWorkspace: need 2*N, prefer N+N*NB)
1402
* (RWorkspace: 0)
1403
*
1404
CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
1405
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1406
*
1407
* Copy R to VT, zeroing out below it
1408
*
1409
CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
1410
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1411
$ VT( 2, 1 ), LDVT )
1412
IE = 1
1413
ITAUQ = ITAU
1414
ITAUP = ITAUQ + N
1415
IWORK = ITAUP + N
1416
*
1417
* Bidiagonalize R in VT
1418
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1419
* (RWorkspace: need N)
1420
*
1421
CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1422
$ WORK( ITAUQ ), WORK( ITAUP ),
1423
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1424
*
1425
* Multiply Q in U by left bidiagonalizing vectors
1426
* in VT
1427
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1428
* (RWorkspace: 0)
1429
*
1430
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1431
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1432
$ LWORK-IWORK+1, IERR )
1433
*
1434
* Generate right bidiagonalizing vectors in VT
1435
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1436
* (RWorkspace: 0)
1437
*
1438
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1439
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1440
IRWORK = IE + N
1441
*
1442
* Perform bidiagonal QR iteration, computing left
1443
* singular vectors of A in U and computing right
1444
* singular vectors of A in VT
1445
* (CWorkspace: 0)
1446
* (RWorkspace: need BDSPAC)
1447
*
1448
CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1449
$ LDVT, U, LDU, CDUM, 1,
1450
$ RWORK( IRWORK ), INFO )
1451
*
1452
END IF
1453
*
1454
END IF
1455
*
1456
ELSE IF( WNTUA ) THEN
1457
*
1458
IF( WNTVN ) THEN
1459
*
1460
* Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1461
* M left singular vectors to be computed in U and
1462
* no right singular vectors to be computed
1463
*
1464
IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1465
*
1466
* Sufficient workspace for a fast algorithm
1467
*
1468
IR = 1
1469
IF( LWORK.GE.WRKBL+LDA*N ) THEN
1470
*
1471
* WORK(IR) is LDA by N
1472
*
1473
LDWRKR = LDA
1474
ELSE
1475
*
1476
* WORK(IR) is N by N
1477
*
1478
LDWRKR = N
1479
END IF
1480
ITAU = IR + LDWRKR*N
1481
IWORK = ITAU + N
1482
*
1483
* Compute A=Q*R, copying result to U
1484
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1485
* (RWorkspace: 0)
1486
*
1487
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1488
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1489
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1490
*
1491
* Copy R to WORK(IR), zeroing out below it
1492
*
1493
CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
1494
$ LDWRKR )
1495
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1496
$ WORK( IR+1 ), LDWRKR )
1497
*
1498
* Generate Q in U
1499
* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1500
* (RWorkspace: 0)
1501
*
1502
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1503
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1504
IE = 1
1505
ITAUQ = ITAU
1506
ITAUP = ITAUQ + N
1507
IWORK = ITAUP + N
1508
*
1509
* Bidiagonalize R in WORK(IR)
1510
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1511
* (RWorkspace: need N)
1512
*
1513
CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
1514
$ RWORK( IE ), WORK( ITAUQ ),
1515
$ WORK( ITAUP ), WORK( IWORK ),
1516
$ LWORK-IWORK+1, IERR )
1517
*
1518
* Generate left bidiagonalizing vectors in WORK(IR)
1519
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1520
* (RWorkspace: 0)
1521
*
1522
CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1523
$ WORK( ITAUQ ), WORK( IWORK ),
1524
$ LWORK-IWORK+1, IERR )
1525
IRWORK = IE + N
1526
*
1527
* Perform bidiagonal QR iteration, computing left
1528
* singular vectors of R in WORK(IR)
1529
* (CWorkspace: need N*N)
1530
* (RWorkspace: need BDSPAC)
1531
*
1532
CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
1533
$ 1, WORK( IR ), LDWRKR, CDUM, 1,
1534
$ RWORK( IRWORK ), INFO )
1535
*
1536
* Multiply Q in U by left singular vectors of R in
1537
* WORK(IR), storing result in A
1538
* (CWorkspace: need N*N)
1539
* (RWorkspace: 0)
1540
*
1541
CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1542
$ WORK( IR ), LDWRKR, CZERO, A, LDA )
1543
*
1544
* Copy left singular vectors of A from A to U
1545
*
1546
CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1547
*
1548
ELSE
1549
*
1550
* Insufficient workspace for a fast algorithm
1551
*
1552
ITAU = 1
1553
IWORK = ITAU + N
1554
*
1555
* Compute A=Q*R, copying result to U
1556
* (CWorkspace: need 2*N, prefer N+N*NB)
1557
* (RWorkspace: 0)
1558
*
1559
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1560
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1561
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1562
*
1563
* Generate Q in U
1564
* (CWorkspace: need N+M, prefer N+M*NB)
1565
* (RWorkspace: 0)
1566
*
1567
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1568
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1569
IE = 1
1570
ITAUQ = ITAU
1571
ITAUP = ITAUQ + N
1572
IWORK = ITAUP + N
1573
*
1574
* Zero out below R in A
1575
*
1576
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1577
$ A( 2, 1 ), LDA )
1578
*
1579
* Bidiagonalize R in A
1580
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1581
* (RWorkspace: need N)
1582
*
1583
CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1584
$ WORK( ITAUQ ), WORK( ITAUP ),
1585
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1586
*
1587
* Multiply Q in U by left bidiagonalizing vectors
1588
* in A
1589
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1590
* (RWorkspace: 0)
1591
*
1592
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1593
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1594
$ LWORK-IWORK+1, IERR )
1595
IRWORK = IE + N
1596
*
1597
* Perform bidiagonal QR iteration, computing left
1598
* singular vectors of A in U
1599
* (CWorkspace: 0)
1600
* (RWorkspace: need BDSPAC)
1601
*
1602
CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
1603
$ 1, U, LDU, CDUM, 1, RWORK( IRWORK ),
1604
$ INFO )
1605
*
1606
END IF
1607
*
1608
ELSE IF( WNTVO ) THEN
1609
*
1610
* Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1611
* M left singular vectors to be computed in U and
1612
* N right singular vectors to be overwritten on A
1613
*
1614
IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
1615
*
1616
* Sufficient workspace for a fast algorithm
1617
*
1618
IU = 1
1619
IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1620
*
1621
* WORK(IU) is LDA by N and WORK(IR) is LDA by N
1622
*
1623
LDWRKU = LDA
1624
IR = IU + LDWRKU*N
1625
LDWRKR = LDA
1626
ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1627
*
1628
* WORK(IU) is LDA by N and WORK(IR) is N by N
1629
*
1630
LDWRKU = LDA
1631
IR = IU + LDWRKU*N
1632
LDWRKR = N
1633
ELSE
1634
*
1635
* WORK(IU) is N by N and WORK(IR) is N by N
1636
*
1637
LDWRKU = N
1638
IR = IU + LDWRKU*N
1639
LDWRKR = N
1640
END IF
1641
ITAU = IR + LDWRKR*N
1642
IWORK = ITAU + N
1643
*
1644
* Compute A=Q*R, copying result to U
1645
* (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1646
* (RWorkspace: 0)
1647
*
1648
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1649
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1650
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1651
*
1652
* Generate Q in U
1653
* (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1654
* (RWorkspace: 0)
1655
*
1656
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1657
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1658
*
1659
* Copy R to WORK(IU), zeroing out below it
1660
*
1661
CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1662
$ LDWRKU )
1663
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1664
$ WORK( IU+1 ), LDWRKU )
1665
IE = 1
1666
ITAUQ = ITAU
1667
ITAUP = ITAUQ + N
1668
IWORK = ITAUP + N
1669
*
1670
* Bidiagonalize R in WORK(IU), copying result to
1671
* WORK(IR)
1672
* (CWorkspace: need 2*N*N+3*N,
1673
* prefer 2*N*N+2*N+2*N*NB)
1674
* (RWorkspace: need N)
1675
*
1676
CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1677
$ RWORK( IE ), WORK( ITAUQ ),
1678
$ WORK( ITAUP ), WORK( IWORK ),
1679
$ LWORK-IWORK+1, IERR )
1680
CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1681
$ WORK( IR ), LDWRKR )
1682
*
1683
* Generate left bidiagonalizing vectors in WORK(IU)
1684
* (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
1685
* (RWorkspace: 0)
1686
*
1687
CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1688
$ WORK( ITAUQ ), WORK( IWORK ),
1689
$ LWORK-IWORK+1, IERR )
1690
*
1691
* Generate right bidiagonalizing vectors in WORK(IR)
1692
* (CWorkspace: need 2*N*N+3*N-1,
1693
* prefer 2*N*N+2*N+(N-1)*NB)
1694
* (RWorkspace: 0)
1695
*
1696
CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1697
$ WORK( ITAUP ), WORK( IWORK ),
1698
$ LWORK-IWORK+1, IERR )
1699
IRWORK = IE + N
1700
*
1701
* Perform bidiagonal QR iteration, computing left
1702
* singular vectors of R in WORK(IU) and computing
1703
* right singular vectors of R in WORK(IR)
1704
* (CWorkspace: need 2*N*N)
1705
* (RWorkspace: need BDSPAC)
1706
*
1707
CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
1708
$ WORK( IR ), LDWRKR, WORK( IU ),
1709
$ LDWRKU, CDUM, 1, RWORK( IRWORK ),
1710
$ INFO )
1711
*
1712
* Multiply Q in U by left singular vectors of R in
1713
* WORK(IU), storing result in A
1714
* (CWorkspace: need N*N)
1715
* (RWorkspace: 0)
1716
*
1717
CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1718
$ WORK( IU ), LDWRKU, CZERO, A, LDA )
1719
*
1720
* Copy left singular vectors of A from A to U
1721
*
1722
CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1723
*
1724
* Copy right singular vectors of R from WORK(IR) to A
1725
*
1726
CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1727
$ LDA )
1728
*
1729
ELSE
1730
*
1731
* Insufficient workspace for a fast algorithm
1732
*
1733
ITAU = 1
1734
IWORK = ITAU + N
1735
*
1736
* Compute A=Q*R, copying result to U
1737
* (CWorkspace: need 2*N, prefer N+N*NB)
1738
* (RWorkspace: 0)
1739
*
1740
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1741
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1742
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1743
*
1744
* Generate Q in U
1745
* (CWorkspace: need N+M, prefer N+M*NB)
1746
* (RWorkspace: 0)
1747
*
1748
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1749
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1750
IE = 1
1751
ITAUQ = ITAU
1752
ITAUP = ITAUQ + N
1753
IWORK = ITAUP + N
1754
*
1755
* Zero out below R in A
1756
*
1757
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1758
$ A( 2, 1 ), LDA )
1759
*
1760
* Bidiagonalize R in A
1761
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1762
* (RWorkspace: need N)
1763
*
1764
CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
1765
$ WORK( ITAUQ ), WORK( ITAUP ),
1766
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1767
*
1768
* Multiply Q in U by left bidiagonalizing vectors
1769
* in A
1770
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1771
* (RWorkspace: 0)
1772
*
1773
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
1774
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1775
$ LWORK-IWORK+1, IERR )
1776
*
1777
* Generate right bidiagonalizing vectors in A
1778
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1779
* (RWorkspace: 0)
1780
*
1781
CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1782
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1783
IRWORK = IE + N
1784
*
1785
* Perform bidiagonal QR iteration, computing left
1786
* singular vectors of A in U and computing right
1787
* singular vectors of A in A
1788
* (CWorkspace: 0)
1789
* (RWorkspace: need BDSPAC)
1790
*
1791
CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
1792
$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
1793
$ INFO )
1794
*
1795
END IF
1796
*
1797
ELSE IF( WNTVAS ) THEN
1798
*
1799
* Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1800
* or 'A')
1801
* M left singular vectors to be computed in U and
1802
* N right singular vectors to be computed in VT
1803
*
1804
IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
1805
*
1806
* Sufficient workspace for a fast algorithm
1807
*
1808
IU = 1
1809
IF( LWORK.GE.WRKBL+LDA*N ) THEN
1810
*
1811
* WORK(IU) is LDA by N
1812
*
1813
LDWRKU = LDA
1814
ELSE
1815
*
1816
* WORK(IU) is N by N
1817
*
1818
LDWRKU = N
1819
END IF
1820
ITAU = IU + LDWRKU*N
1821
IWORK = ITAU + N
1822
*
1823
* Compute A=Q*R, copying result to U
1824
* (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
1825
* (RWorkspace: 0)
1826
*
1827
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1828
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1829
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1830
*
1831
* Generate Q in U
1832
* (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
1833
* (RWorkspace: 0)
1834
*
1835
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1836
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1837
*
1838
* Copy R to WORK(IU), zeroing out below it
1839
*
1840
CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
1841
$ LDWRKU )
1842
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1843
$ WORK( IU+1 ), LDWRKU )
1844
IE = 1
1845
ITAUQ = ITAU
1846
ITAUP = ITAUQ + N
1847
IWORK = ITAUP + N
1848
*
1849
* Bidiagonalize R in WORK(IU), copying result to VT
1850
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
1851
* (RWorkspace: need N)
1852
*
1853
CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
1854
$ RWORK( IE ), WORK( ITAUQ ),
1855
$ WORK( ITAUP ), WORK( IWORK ),
1856
$ LWORK-IWORK+1, IERR )
1857
CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1858
$ LDVT )
1859
*
1860
* Generate left bidiagonalizing vectors in WORK(IU)
1861
* (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
1862
* (RWorkspace: 0)
1863
*
1864
CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1865
$ WORK( ITAUQ ), WORK( IWORK ),
1866
$ LWORK-IWORK+1, IERR )
1867
*
1868
* Generate right bidiagonalizing vectors in VT
1869
* (CWorkspace: need N*N+3*N-1,
1870
* prefer N*N+2*N+(N-1)*NB)
1871
* (RWorkspace: need 0)
1872
*
1873
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1874
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1875
IRWORK = IE + N
1876
*
1877
* Perform bidiagonal QR iteration, computing left
1878
* singular vectors of R in WORK(IU) and computing
1879
* right singular vectors of R in VT
1880
* (CWorkspace: need N*N)
1881
* (RWorkspace: need BDSPAC)
1882
*
1883
CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
1884
$ LDVT, WORK( IU ), LDWRKU, CDUM, 1,
1885
$ RWORK( IRWORK ), INFO )
1886
*
1887
* Multiply Q in U by left singular vectors of R in
1888
* WORK(IU), storing result in A
1889
* (CWorkspace: need N*N)
1890
* (RWorkspace: 0)
1891
*
1892
CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
1893
$ WORK( IU ), LDWRKU, CZERO, A, LDA )
1894
*
1895
* Copy left singular vectors of A from A to U
1896
*
1897
CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
1898
*
1899
ELSE
1900
*
1901
* Insufficient workspace for a fast algorithm
1902
*
1903
ITAU = 1
1904
IWORK = ITAU + N
1905
*
1906
* Compute A=Q*R, copying result to U
1907
* (CWorkspace: need 2*N, prefer N+N*NB)
1908
* (RWorkspace: 0)
1909
*
1910
CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
1911
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1912
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1913
*
1914
* Generate Q in U
1915
* (CWorkspace: need N+M, prefer N+M*NB)
1916
* (RWorkspace: 0)
1917
*
1918
CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
1919
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1920
*
1921
* Copy R from A to VT, zeroing out below it
1922
*
1923
CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
1924
CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
1925
$ VT( 2, 1 ), LDVT )
1926
IE = 1
1927
ITAUQ = ITAU
1928
ITAUP = ITAUQ + N
1929
IWORK = ITAUP + N
1930
*
1931
* Bidiagonalize R in VT
1932
* (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
1933
* (RWorkspace: need N)
1934
*
1935
CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
1936
$ WORK( ITAUQ ), WORK( ITAUP ),
1937
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1938
*
1939
* Multiply Q in U by left bidiagonalizing vectors
1940
* in VT
1941
* (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
1942
* (RWorkspace: 0)
1943
*
1944
CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
1945
$ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1946
$ LWORK-IWORK+1, IERR )
1947
*
1948
* Generate right bidiagonalizing vectors in VT
1949
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
1950
* (RWorkspace: 0)
1951
*
1952
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1953
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
1954
IRWORK = IE + N
1955
*
1956
* Perform bidiagonal QR iteration, computing left
1957
* singular vectors of A in U and computing right
1958
* singular vectors of A in VT
1959
* (CWorkspace: 0)
1960
* (RWorkspace: need BDSPAC)
1961
*
1962
CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
1963
$ LDVT, U, LDU, CDUM, 1,
1964
$ RWORK( IRWORK ), INFO )
1965
*
1966
END IF
1967
*
1968
END IF
1969
*
1970
END IF
1971
*
1972
ELSE
1973
*
1974
* M .LT. MNTHR
1975
*
1976
* Path 10 (M at least N, but not much larger)
1977
* Reduce to bidiagonal form without QR decomposition
1978
*
1979
IE = 1
1980
ITAUQ = 1
1981
ITAUP = ITAUQ + N
1982
IWORK = ITAUP + N
1983
*
1984
* Bidiagonalize A
1985
* (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
1986
* (RWorkspace: need N)
1987
*
1988
CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
1989
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1990
$ IERR )
1991
IF( WNTUAS ) THEN
1992
*
1993
* If left singular vectors desired in U, copy result to U
1994
* and generate left bidiagonalizing vectors in U
1995
* (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
1996
* (RWorkspace: 0)
1997
*
1998
CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
1999
IF( WNTUS )
2000
$ NCU = N
2001
IF( WNTUA )
2002
$ NCU = M
2003
CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
2004
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2005
END IF
2006
IF( WNTVAS ) THEN
2007
*
2008
* If right singular vectors desired in VT, copy result to
2009
* VT and generate right bidiagonalizing vectors in VT
2010
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2011
* (RWorkspace: 0)
2012
*
2013
CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
2014
CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
2015
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2016
END IF
2017
IF( WNTUO ) THEN
2018
*
2019
* If left singular vectors desired in A, generate left
2020
* bidiagonalizing vectors in A
2021
* (CWorkspace: need 3*N, prefer 2*N+N*NB)
2022
* (RWorkspace: 0)
2023
*
2024
CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
2025
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2026
END IF
2027
IF( WNTVO ) THEN
2028
*
2029
* If right singular vectors desired in A, generate right
2030
* bidiagonalizing vectors in A
2031
* (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
2032
* (RWorkspace: 0)
2033
*
2034
CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
2035
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2036
END IF
2037
IRWORK = IE + N
2038
IF( WNTUAS .OR. WNTUO )
2039
$ NRU = M
2040
IF( WNTUN )
2041
$ NRU = 0
2042
IF( WNTVAS .OR. WNTVO )
2043
$ NCVT = N
2044
IF( WNTVN )
2045
$ NCVT = 0
2046
IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
2047
*
2048
* Perform bidiagonal QR iteration, if desired, computing
2049
* left singular vectors in U and computing right singular
2050
* vectors in VT
2051
* (CWorkspace: 0)
2052
* (RWorkspace: need BDSPAC)
2053
*
2054
CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2055
$ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
2056
$ INFO )
2057
ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
2058
*
2059
* Perform bidiagonal QR iteration, if desired, computing
2060
* left singular vectors in U and computing right singular
2061
* vectors in A
2062
* (CWorkspace: 0)
2063
* (RWorkspace: need BDSPAC)
2064
*
2065
CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
2066
$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
2067
$ INFO )
2068
ELSE
2069
*
2070
* Perform bidiagonal QR iteration, if desired, computing
2071
* left singular vectors in A and computing right singular
2072
* vectors in VT
2073
* (CWorkspace: 0)
2074
* (RWorkspace: need BDSPAC)
2075
*
2076
CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
2077
$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
2078
$ INFO )
2079
END IF
2080
*
2081
END IF
2082
*
2083
ELSE
2084
*
2085
* A has more columns than rows. If A has sufficiently more
2086
* columns than rows, first reduce using the LQ decomposition (if
2087
* sufficient workspace available)
2088
*
2089
IF( N.GE.MNTHR ) THEN
2090
*
2091
IF( WNTVN ) THEN
2092
*
2093
* Path 1t(N much larger than M, JOBVT='N')
2094
* No right singular vectors to be computed
2095
*
2096
ITAU = 1
2097
IWORK = ITAU + M
2098
*
2099
* Compute A=L*Q
2100
* (CWorkspace: need 2*M, prefer M+M*NB)
2101
* (RWorkspace: 0)
2102
*
2103
CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
2104
$ LWORK-IWORK+1, IERR )
2105
*
2106
* Zero out above L
2107
*
2108
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
2109
$ LDA )
2110
IE = 1
2111
ITAUQ = 1
2112
ITAUP = ITAUQ + M
2113
IWORK = ITAUP + M
2114
*
2115
* Bidiagonalize L in A
2116
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2117
* (RWorkspace: need M)
2118
*
2119
CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
2120
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2121
$ IERR )
2122
IF( WNTUO .OR. WNTUAS ) THEN
2123
*
2124
* If left singular vectors desired, generate Q
2125
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2126
* (RWorkspace: 0)
2127
*
2128
CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2129
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2130
END IF
2131
IRWORK = IE + M
2132
NRU = 0
2133
IF( WNTUO .OR. WNTUAS )
2134
$ NRU = M
2135
*
2136
* Perform bidiagonal QR iteration, computing left singular
2137
* vectors of A in A if desired
2138
* (CWorkspace: 0)
2139
* (RWorkspace: need BDSPAC)
2140
*
2141
CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
2142
$ A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
2143
*
2144
* If left singular vectors desired in U, copy them there
2145
*
2146
IF( WNTUAS )
2147
$ CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
2148
*
2149
ELSE IF( WNTVO .AND. WNTUN ) THEN
2150
*
2151
* Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2152
* M right singular vectors to be overwritten on A and
2153
* no left singular vectors to be computed
2154
*
2155
IF( LWORK.GE.M*M+3*M ) THEN
2156
*
2157
* Sufficient workspace for a fast algorithm
2158
*
2159
IR = 1
2160
IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2161
*
2162
* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2163
*
2164
LDWRKU = LDA
2165
CHUNK = N
2166
LDWRKR = LDA
2167
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2168
*
2169
* WORK(IU) is LDA by N and WORK(IR) is M by M
2170
*
2171
LDWRKU = LDA
2172
CHUNK = N
2173
LDWRKR = M
2174
ELSE
2175
*
2176
* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2177
*
2178
LDWRKU = M
2179
CHUNK = ( LWORK-M*M ) / M
2180
LDWRKR = M
2181
END IF
2182
ITAU = IR + LDWRKR*M
2183
IWORK = ITAU + M
2184
*
2185
* Compute A=L*Q
2186
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2187
* (RWorkspace: 0)
2188
*
2189
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2190
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2191
*
2192
* Copy L to WORK(IR) and zero out above it
2193
*
2194
CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2195
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2196
$ WORK( IR+LDWRKR ), LDWRKR )
2197
*
2198
* Generate Q in A
2199
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2200
* (RWorkspace: 0)
2201
*
2202
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2203
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2204
IE = 1
2205
ITAUQ = ITAU
2206
ITAUP = ITAUQ + M
2207
IWORK = ITAUP + M
2208
*
2209
* Bidiagonalize L in WORK(IR)
2210
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2211
* (RWorkspace: need M)
2212
*
2213
CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
2214
$ WORK( ITAUQ ), WORK( ITAUP ),
2215
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2216
*
2217
* Generate right vectors bidiagonalizing L
2218
* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2219
* (RWorkspace: 0)
2220
*
2221
CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2222
$ WORK( ITAUP ), WORK( IWORK ),
2223
$ LWORK-IWORK+1, IERR )
2224
IRWORK = IE + M
2225
*
2226
* Perform bidiagonal QR iteration, computing right
2227
* singular vectors of L in WORK(IR)
2228
* (CWorkspace: need M*M)
2229
* (RWorkspace: need BDSPAC)
2230
*
2231
CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2232
$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2233
$ RWORK( IRWORK ), INFO )
2234
IU = ITAUQ
2235
*
2236
* Multiply right singular vectors of L in WORK(IR) by Q
2237
* in A, storing result in WORK(IU) and copying to A
2238
* (CWorkspace: need M*M+M, prefer M*M+M*N)
2239
* (RWorkspace: 0)
2240
*
2241
DO 30 I = 1, N, CHUNK
2242
BLK = MIN( N-I+1, CHUNK )
2243
CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2244
$ LDWRKR, A( 1, I ), LDA, CZERO,
2245
$ WORK( IU ), LDWRKU )
2246
CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2247
$ A( 1, I ), LDA )
2248
30 CONTINUE
2249
*
2250
ELSE
2251
*
2252
* Insufficient workspace for a fast algorithm
2253
*
2254
IE = 1
2255
ITAUQ = 1
2256
ITAUP = ITAUQ + M
2257
IWORK = ITAUP + M
2258
*
2259
* Bidiagonalize A
2260
* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
2261
* (RWorkspace: need M)
2262
*
2263
CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
2264
$ WORK( ITAUQ ), WORK( ITAUP ),
2265
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2266
*
2267
* Generate right vectors bidiagonalizing A
2268
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2269
* (RWorkspace: 0)
2270
*
2271
CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2272
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2273
IRWORK = IE + M
2274
*
2275
* Perform bidiagonal QR iteration, computing right
2276
* singular vectors of A in A
2277
* (CWorkspace: 0)
2278
* (RWorkspace: need BDSPAC)
2279
*
2280
CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
2281
$ CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
2282
*
2283
END IF
2284
*
2285
ELSE IF( WNTVO .AND. WNTUAS ) THEN
2286
*
2287
* Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2288
* M right singular vectors to be overwritten on A and
2289
* M left singular vectors to be computed in U
2290
*
2291
IF( LWORK.GE.M*M+3*M ) THEN
2292
*
2293
* Sufficient workspace for a fast algorithm
2294
*
2295
IR = 1
2296
IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
2297
*
2298
* WORK(IU) is LDA by N and WORK(IR) is LDA by M
2299
*
2300
LDWRKU = LDA
2301
CHUNK = N
2302
LDWRKR = LDA
2303
ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
2304
*
2305
* WORK(IU) is LDA by N and WORK(IR) is M by M
2306
*
2307
LDWRKU = LDA
2308
CHUNK = N
2309
LDWRKR = M
2310
ELSE
2311
*
2312
* WORK(IU) is M by CHUNK and WORK(IR) is M by M
2313
*
2314
LDWRKU = M
2315
CHUNK = ( LWORK-M*M ) / M
2316
LDWRKR = M
2317
END IF
2318
ITAU = IR + LDWRKR*M
2319
IWORK = ITAU + M
2320
*
2321
* Compute A=L*Q
2322
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2323
* (RWorkspace: 0)
2324
*
2325
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2326
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2327
*
2328
* Copy L to U, zeroing about above it
2329
*
2330
CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2331
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2332
$ LDU )
2333
*
2334
* Generate Q in A
2335
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2336
* (RWorkspace: 0)
2337
*
2338
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2339
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2340
IE = 1
2341
ITAUQ = ITAU
2342
ITAUP = ITAUQ + M
2343
IWORK = ITAUP + M
2344
*
2345
* Bidiagonalize L in U, copying result to WORK(IR)
2346
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2347
* (RWorkspace: need M)
2348
*
2349
CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2350
$ WORK( ITAUQ ), WORK( ITAUP ),
2351
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2352
CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2353
*
2354
* Generate right vectors bidiagonalizing L in WORK(IR)
2355
* (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
2356
* (RWorkspace: 0)
2357
*
2358
CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2359
$ WORK( ITAUP ), WORK( IWORK ),
2360
$ LWORK-IWORK+1, IERR )
2361
*
2362
* Generate left vectors bidiagonalizing L in U
2363
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2364
* (RWorkspace: 0)
2365
*
2366
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2367
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2368
IRWORK = IE + M
2369
*
2370
* Perform bidiagonal QR iteration, computing left
2371
* singular vectors of L in U, and computing right
2372
* singular vectors of L in WORK(IR)
2373
* (CWorkspace: need M*M)
2374
* (RWorkspace: need BDSPAC)
2375
*
2376
CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2377
$ WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
2378
$ RWORK( IRWORK ), INFO )
2379
IU = ITAUQ
2380
*
2381
* Multiply right singular vectors of L in WORK(IR) by Q
2382
* in A, storing result in WORK(IU) and copying to A
2383
* (CWorkspace: need M*M+M, prefer M*M+M*N))
2384
* (RWorkspace: 0)
2385
*
2386
DO 40 I = 1, N, CHUNK
2387
BLK = MIN( N-I+1, CHUNK )
2388
CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
2389
$ LDWRKR, A( 1, I ), LDA, CZERO,
2390
$ WORK( IU ), LDWRKU )
2391
CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2392
$ A( 1, I ), LDA )
2393
40 CONTINUE
2394
*
2395
ELSE
2396
*
2397
* Insufficient workspace for a fast algorithm
2398
*
2399
ITAU = 1
2400
IWORK = ITAU + M
2401
*
2402
* Compute A=L*Q
2403
* (CWorkspace: need 2*M, prefer M+M*NB)
2404
* (RWorkspace: 0)
2405
*
2406
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2407
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2408
*
2409
* Copy L to U, zeroing out above it
2410
*
2411
CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2412
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
2413
$ LDU )
2414
*
2415
* Generate Q in A
2416
* (CWorkspace: need 2*M, prefer M+M*NB)
2417
* (RWorkspace: 0)
2418
*
2419
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2420
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2421
IE = 1
2422
ITAUQ = ITAU
2423
ITAUP = ITAUQ + M
2424
IWORK = ITAUP + M
2425
*
2426
* Bidiagonalize L in U
2427
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2428
* (RWorkspace: need M)
2429
*
2430
CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2431
$ WORK( ITAUQ ), WORK( ITAUP ),
2432
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2433
*
2434
* Multiply right vectors bidiagonalizing L by Q in A
2435
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2436
* (RWorkspace: 0)
2437
*
2438
CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2439
$ WORK( ITAUP ), A, LDA, WORK( IWORK ),
2440
$ LWORK-IWORK+1, IERR )
2441
*
2442
* Generate left vectors bidiagonalizing L in U
2443
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2444
* (RWorkspace: 0)
2445
*
2446
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2447
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2448
IRWORK = IE + M
2449
*
2450
* Perform bidiagonal QR iteration, computing left
2451
* singular vectors of A in U and computing right
2452
* singular vectors of A in A
2453
* (CWorkspace: 0)
2454
* (RWorkspace: need BDSPAC)
2455
*
2456
CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
2457
$ U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
2458
*
2459
END IF
2460
*
2461
ELSE IF( WNTVS ) THEN
2462
*
2463
IF( WNTUN ) THEN
2464
*
2465
* Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2466
* M right singular vectors to be computed in VT and
2467
* no left singular vectors to be computed
2468
*
2469
IF( LWORK.GE.M*M+3*M ) THEN
2470
*
2471
* Sufficient workspace for a fast algorithm
2472
*
2473
IR = 1
2474
IF( LWORK.GE.WRKBL+LDA*M ) THEN
2475
*
2476
* WORK(IR) is LDA by M
2477
*
2478
LDWRKR = LDA
2479
ELSE
2480
*
2481
* WORK(IR) is M by M
2482
*
2483
LDWRKR = M
2484
END IF
2485
ITAU = IR + LDWRKR*M
2486
IWORK = ITAU + M
2487
*
2488
* Compute A=L*Q
2489
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2490
* (RWorkspace: 0)
2491
*
2492
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2493
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2494
*
2495
* Copy L to WORK(IR), zeroing out above it
2496
*
2497
CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
2498
$ LDWRKR )
2499
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2500
$ WORK( IR+LDWRKR ), LDWRKR )
2501
*
2502
* Generate Q in A
2503
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2504
* (RWorkspace: 0)
2505
*
2506
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2507
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2508
IE = 1
2509
ITAUQ = ITAU
2510
ITAUP = ITAUQ + M
2511
IWORK = ITAUP + M
2512
*
2513
* Bidiagonalize L in WORK(IR)
2514
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2515
* (RWorkspace: need M)
2516
*
2517
CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
2518
$ RWORK( IE ), WORK( ITAUQ ),
2519
$ WORK( ITAUP ), WORK( IWORK ),
2520
$ LWORK-IWORK+1, IERR )
2521
*
2522
* Generate right vectors bidiagonalizing L in
2523
* WORK(IR)
2524
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
2525
* (RWorkspace: 0)
2526
*
2527
CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2528
$ WORK( ITAUP ), WORK( IWORK ),
2529
$ LWORK-IWORK+1, IERR )
2530
IRWORK = IE + M
2531
*
2532
* Perform bidiagonal QR iteration, computing right
2533
* singular vectors of L in WORK(IR)
2534
* (CWorkspace: need M*M)
2535
* (RWorkspace: need BDSPAC)
2536
*
2537
CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
2538
$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
2539
$ RWORK( IRWORK ), INFO )
2540
*
2541
* Multiply right singular vectors of L in WORK(IR) by
2542
* Q in A, storing result in VT
2543
* (CWorkspace: need M*M)
2544
* (RWorkspace: 0)
2545
*
2546
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
2547
$ LDWRKR, A, LDA, CZERO, VT, LDVT )
2548
*
2549
ELSE
2550
*
2551
* Insufficient workspace for a fast algorithm
2552
*
2553
ITAU = 1
2554
IWORK = ITAU + M
2555
*
2556
* Compute A=L*Q
2557
* (CWorkspace: need 2*M, prefer M+M*NB)
2558
* (RWorkspace: 0)
2559
*
2560
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2561
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2562
*
2563
* Copy result to VT
2564
*
2565
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2566
*
2567
* Generate Q in VT
2568
* (CWorkspace: need 2*M, prefer M+M*NB)
2569
* (RWorkspace: 0)
2570
*
2571
CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2572
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2573
IE = 1
2574
ITAUQ = ITAU
2575
ITAUP = ITAUQ + M
2576
IWORK = ITAUP + M
2577
*
2578
* Zero out above L in A
2579
*
2580
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2581
$ A( 1, 2 ), LDA )
2582
*
2583
* Bidiagonalize L in A
2584
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2585
* (RWorkspace: need M)
2586
*
2587
CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2588
$ WORK( ITAUQ ), WORK( ITAUP ),
2589
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2590
*
2591
* Multiply right vectors bidiagonalizing L by Q in VT
2592
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2593
* (RWorkspace: 0)
2594
*
2595
CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2596
$ WORK( ITAUP ), VT, LDVT,
2597
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2598
IRWORK = IE + M
2599
*
2600
* Perform bidiagonal QR iteration, computing right
2601
* singular vectors of A in VT
2602
* (CWorkspace: 0)
2603
* (RWorkspace: need BDSPAC)
2604
*
2605
CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
2606
$ LDVT, CDUM, 1, CDUM, 1,
2607
$ RWORK( IRWORK ), INFO )
2608
*
2609
END IF
2610
*
2611
ELSE IF( WNTUO ) THEN
2612
*
2613
* Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2614
* M right singular vectors to be computed in VT and
2615
* M left singular vectors to be overwritten on A
2616
*
2617
IF( LWORK.GE.2*M*M+3*M ) THEN
2618
*
2619
* Sufficient workspace for a fast algorithm
2620
*
2621
IU = 1
2622
IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2623
*
2624
* WORK(IU) is LDA by M and WORK(IR) is LDA by M
2625
*
2626
LDWRKU = LDA
2627
IR = IU + LDWRKU*M
2628
LDWRKR = LDA
2629
ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2630
*
2631
* WORK(IU) is LDA by M and WORK(IR) is M by M
2632
*
2633
LDWRKU = LDA
2634
IR = IU + LDWRKU*M
2635
LDWRKR = M
2636
ELSE
2637
*
2638
* WORK(IU) is M by M and WORK(IR) is M by M
2639
*
2640
LDWRKU = M
2641
IR = IU + LDWRKU*M
2642
LDWRKR = M
2643
END IF
2644
ITAU = IR + LDWRKR*M
2645
IWORK = ITAU + M
2646
*
2647
* Compute A=L*Q
2648
* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2649
* (RWorkspace: 0)
2650
*
2651
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2652
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2653
*
2654
* Copy L to WORK(IU), zeroing out below it
2655
*
2656
CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2657
$ LDWRKU )
2658
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2659
$ WORK( IU+LDWRKU ), LDWRKU )
2660
*
2661
* Generate Q in A
2662
* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2663
* (RWorkspace: 0)
2664
*
2665
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2666
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2667
IE = 1
2668
ITAUQ = ITAU
2669
ITAUP = ITAUQ + M
2670
IWORK = ITAUP + M
2671
*
2672
* Bidiagonalize L in WORK(IU), copying result to
2673
* WORK(IR)
2674
* (CWorkspace: need 2*M*M+3*M,
2675
* prefer 2*M*M+2*M+2*M*NB)
2676
* (RWorkspace: need M)
2677
*
2678
CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2679
$ RWORK( IE ), WORK( ITAUQ ),
2680
$ WORK( ITAUP ), WORK( IWORK ),
2681
$ LWORK-IWORK+1, IERR )
2682
CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2683
$ WORK( IR ), LDWRKR )
2684
*
2685
* Generate right bidiagonalizing vectors in WORK(IU)
2686
* (CWorkspace: need 2*M*M+3*M-1,
2687
* prefer 2*M*M+2*M+(M-1)*NB)
2688
* (RWorkspace: 0)
2689
*
2690
CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2691
$ WORK( ITAUP ), WORK( IWORK ),
2692
$ LWORK-IWORK+1, IERR )
2693
*
2694
* Generate left bidiagonalizing vectors in WORK(IR)
2695
* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
2696
* (RWorkspace: 0)
2697
*
2698
CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2699
$ WORK( ITAUQ ), WORK( IWORK ),
2700
$ LWORK-IWORK+1, IERR )
2701
IRWORK = IE + M
2702
*
2703
* Perform bidiagonal QR iteration, computing left
2704
* singular vectors of L in WORK(IR) and computing
2705
* right singular vectors of L in WORK(IU)
2706
* (CWorkspace: need 2*M*M)
2707
* (RWorkspace: need BDSPAC)
2708
*
2709
CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2710
$ WORK( IU ), LDWRKU, WORK( IR ),
2711
$ LDWRKR, CDUM, 1, RWORK( IRWORK ),
2712
$ INFO )
2713
*
2714
* Multiply right singular vectors of L in WORK(IU) by
2715
* Q in A, storing result in VT
2716
* (CWorkspace: need M*M)
2717
* (RWorkspace: 0)
2718
*
2719
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2720
$ LDWRKU, A, LDA, CZERO, VT, LDVT )
2721
*
2722
* Copy left singular vectors of L to A
2723
* (CWorkspace: need M*M)
2724
* (RWorkspace: 0)
2725
*
2726
CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2727
$ LDA )
2728
*
2729
ELSE
2730
*
2731
* Insufficient workspace for a fast algorithm
2732
*
2733
ITAU = 1
2734
IWORK = ITAU + M
2735
*
2736
* Compute A=L*Q, copying result to VT
2737
* (CWorkspace: need 2*M, prefer M+M*NB)
2738
* (RWorkspace: 0)
2739
*
2740
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2741
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2742
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2743
*
2744
* Generate Q in VT
2745
* (CWorkspace: need 2*M, prefer M+M*NB)
2746
* (RWorkspace: 0)
2747
*
2748
CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2749
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2750
IE = 1
2751
ITAUQ = ITAU
2752
ITAUP = ITAUQ + M
2753
IWORK = ITAUP + M
2754
*
2755
* Zero out above L in A
2756
*
2757
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2758
$ A( 1, 2 ), LDA )
2759
*
2760
* Bidiagonalize L in A
2761
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2762
* (RWorkspace: need M)
2763
*
2764
CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
2765
$ WORK( ITAUQ ), WORK( ITAUP ),
2766
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2767
*
2768
* Multiply right vectors bidiagonalizing L by Q in VT
2769
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2770
* (RWorkspace: 0)
2771
*
2772
CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
2773
$ WORK( ITAUP ), VT, LDVT,
2774
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2775
*
2776
* Generate left bidiagonalizing vectors of L in A
2777
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2778
* (RWorkspace: 0)
2779
*
2780
CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2781
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2782
IRWORK = IE + M
2783
*
2784
* Perform bidiagonal QR iteration, computing left
2785
* singular vectors of A in A and computing right
2786
* singular vectors of A in VT
2787
* (CWorkspace: 0)
2788
* (RWorkspace: need BDSPAC)
2789
*
2790
CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2791
$ LDVT, A, LDA, CDUM, 1,
2792
$ RWORK( IRWORK ), INFO )
2793
*
2794
END IF
2795
*
2796
ELSE IF( WNTUAS ) THEN
2797
*
2798
* Path 6t(N much larger than M, JOBU='S' or 'A',
2799
* JOBVT='S')
2800
* M right singular vectors to be computed in VT and
2801
* M left singular vectors to be computed in U
2802
*
2803
IF( LWORK.GE.M*M+3*M ) THEN
2804
*
2805
* Sufficient workspace for a fast algorithm
2806
*
2807
IU = 1
2808
IF( LWORK.GE.WRKBL+LDA*M ) THEN
2809
*
2810
* WORK(IU) is LDA by N
2811
*
2812
LDWRKU = LDA
2813
ELSE
2814
*
2815
* WORK(IU) is LDA by M
2816
*
2817
LDWRKU = M
2818
END IF
2819
ITAU = IU + LDWRKU*M
2820
IWORK = ITAU + M
2821
*
2822
* Compute A=L*Q
2823
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2824
* (RWorkspace: 0)
2825
*
2826
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2827
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2828
*
2829
* Copy L to WORK(IU), zeroing out above it
2830
*
2831
CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
2832
$ LDWRKU )
2833
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2834
$ WORK( IU+LDWRKU ), LDWRKU )
2835
*
2836
* Generate Q in A
2837
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2838
* (RWorkspace: 0)
2839
*
2840
CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
2841
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2842
IE = 1
2843
ITAUQ = ITAU
2844
ITAUP = ITAUQ + M
2845
IWORK = ITAUP + M
2846
*
2847
* Bidiagonalize L in WORK(IU), copying result to U
2848
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
2849
* (RWorkspace: need M)
2850
*
2851
CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
2852
$ RWORK( IE ), WORK( ITAUQ ),
2853
$ WORK( ITAUP ), WORK( IWORK ),
2854
$ LWORK-IWORK+1, IERR )
2855
CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2856
$ LDU )
2857
*
2858
* Generate right bidiagonalizing vectors in WORK(IU)
2859
* (CWorkspace: need M*M+3*M-1,
2860
* prefer M*M+2*M+(M-1)*NB)
2861
* (RWorkspace: 0)
2862
*
2863
CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2864
$ WORK( ITAUP ), WORK( IWORK ),
2865
$ LWORK-IWORK+1, IERR )
2866
*
2867
* Generate left bidiagonalizing vectors in U
2868
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
2869
* (RWorkspace: 0)
2870
*
2871
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2872
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2873
IRWORK = IE + M
2874
*
2875
* Perform bidiagonal QR iteration, computing left
2876
* singular vectors of L in U and computing right
2877
* singular vectors of L in WORK(IU)
2878
* (CWorkspace: need M*M)
2879
* (RWorkspace: need BDSPAC)
2880
*
2881
CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
2882
$ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
2883
$ RWORK( IRWORK ), INFO )
2884
*
2885
* Multiply right singular vectors of L in WORK(IU) by
2886
* Q in A, storing result in VT
2887
* (CWorkspace: need M*M)
2888
* (RWorkspace: 0)
2889
*
2890
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
2891
$ LDWRKU, A, LDA, CZERO, VT, LDVT )
2892
*
2893
ELSE
2894
*
2895
* Insufficient workspace for a fast algorithm
2896
*
2897
ITAU = 1
2898
IWORK = ITAU + M
2899
*
2900
* Compute A=L*Q, copying result to VT
2901
* (CWorkspace: need 2*M, prefer M+M*NB)
2902
* (RWorkspace: 0)
2903
*
2904
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2905
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2906
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2907
*
2908
* Generate Q in VT
2909
* (CWorkspace: need 2*M, prefer M+M*NB)
2910
* (RWorkspace: 0)
2911
*
2912
CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2913
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2914
*
2915
* Copy L to U, zeroing out above it
2916
*
2917
CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
2918
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
2919
$ U( 1, 2 ), LDU )
2920
IE = 1
2921
ITAUQ = ITAU
2922
ITAUP = ITAUQ + M
2923
IWORK = ITAUP + M
2924
*
2925
* Bidiagonalize L in U
2926
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
2927
* (RWorkspace: need M)
2928
*
2929
CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
2930
$ WORK( ITAUQ ), WORK( ITAUP ),
2931
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2932
*
2933
* Multiply right bidiagonalizing vectors in U by Q
2934
* in VT
2935
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
2936
* (RWorkspace: 0)
2937
*
2938
CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
2939
$ WORK( ITAUP ), VT, LDVT,
2940
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2941
*
2942
* Generate left bidiagonalizing vectors in U
2943
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
2944
* (RWorkspace: 0)
2945
*
2946
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2947
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2948
IRWORK = IE + M
2949
*
2950
* Perform bidiagonal QR iteration, computing left
2951
* singular vectors of A in U and computing right
2952
* singular vectors of A in VT
2953
* (CWorkspace: 0)
2954
* (RWorkspace: need BDSPAC)
2955
*
2956
CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
2957
$ LDVT, U, LDU, CDUM, 1,
2958
$ RWORK( IRWORK ), INFO )
2959
*
2960
END IF
2961
*
2962
END IF
2963
*
2964
ELSE IF( WNTVA ) THEN
2965
*
2966
IF( WNTUN ) THEN
2967
*
2968
* Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2969
* N right singular vectors to be computed in VT and
2970
* no left singular vectors to be computed
2971
*
2972
IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
2973
*
2974
* Sufficient workspace for a fast algorithm
2975
*
2976
IR = 1
2977
IF( LWORK.GE.WRKBL+LDA*M ) THEN
2978
*
2979
* WORK(IR) is LDA by M
2980
*
2981
LDWRKR = LDA
2982
ELSE
2983
*
2984
* WORK(IR) is M by M
2985
*
2986
LDWRKR = M
2987
END IF
2988
ITAU = IR + LDWRKR*M
2989
IWORK = ITAU + M
2990
*
2991
* Compute A=L*Q, copying result to VT
2992
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
2993
* (RWorkspace: 0)
2994
*
2995
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
2996
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
2997
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
2998
*
2999
* Copy L to WORK(IR), zeroing out above it
3000
*
3001
CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
3002
$ LDWRKR )
3003
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3004
$ WORK( IR+LDWRKR ), LDWRKR )
3005
*
3006
* Generate Q in VT
3007
* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3008
* (RWorkspace: 0)
3009
*
3010
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3011
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3012
IE = 1
3013
ITAUQ = ITAU
3014
ITAUP = ITAUQ + M
3015
IWORK = ITAUP + M
3016
*
3017
* Bidiagonalize L in WORK(IR)
3018
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3019
* (RWorkspace: need M)
3020
*
3021
CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
3022
$ RWORK( IE ), WORK( ITAUQ ),
3023
$ WORK( ITAUP ), WORK( IWORK ),
3024
$ LWORK-IWORK+1, IERR )
3025
*
3026
* Generate right bidiagonalizing vectors in WORK(IR)
3027
* (CWorkspace: need M*M+3*M-1,
3028
* prefer M*M+2*M+(M-1)*NB)
3029
* (RWorkspace: 0)
3030
*
3031
CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
3032
$ WORK( ITAUP ), WORK( IWORK ),
3033
$ LWORK-IWORK+1, IERR )
3034
IRWORK = IE + M
3035
*
3036
* Perform bidiagonal QR iteration, computing right
3037
* singular vectors of L in WORK(IR)
3038
* (CWorkspace: need M*M)
3039
* (RWorkspace: need BDSPAC)
3040
*
3041
CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
3042
$ WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
3043
$ RWORK( IRWORK ), INFO )
3044
*
3045
* Multiply right singular vectors of L in WORK(IR) by
3046
* Q in VT, storing result in A
3047
* (CWorkspace: need M*M)
3048
* (RWorkspace: 0)
3049
*
3050
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
3051
$ LDWRKR, VT, LDVT, CZERO, A, LDA )
3052
*
3053
* Copy right singular vectors of A from A to VT
3054
*
3055
CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3056
*
3057
ELSE
3058
*
3059
* Insufficient workspace for a fast algorithm
3060
*
3061
ITAU = 1
3062
IWORK = ITAU + M
3063
*
3064
* Compute A=L*Q, copying result to VT
3065
* (CWorkspace: need 2*M, prefer M+M*NB)
3066
* (RWorkspace: 0)
3067
*
3068
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3069
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3070
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3071
*
3072
* Generate Q in VT
3073
* (CWorkspace: need M+N, prefer M+N*NB)
3074
* (RWorkspace: 0)
3075
*
3076
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3077
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3078
IE = 1
3079
ITAUQ = ITAU
3080
ITAUP = ITAUQ + M
3081
IWORK = ITAUP + M
3082
*
3083
* Zero out above L in A
3084
*
3085
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3086
$ A( 1, 2 ), LDA )
3087
*
3088
* Bidiagonalize L in A
3089
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3090
* (RWorkspace: need M)
3091
*
3092
CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3093
$ WORK( ITAUQ ), WORK( ITAUP ),
3094
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3095
*
3096
* Multiply right bidiagonalizing vectors in A by Q
3097
* in VT
3098
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3099
* (RWorkspace: 0)
3100
*
3101
CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3102
$ WORK( ITAUP ), VT, LDVT,
3103
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3104
IRWORK = IE + M
3105
*
3106
* Perform bidiagonal QR iteration, computing right
3107
* singular vectors of A in VT
3108
* (CWorkspace: 0)
3109
* (RWorkspace: need BDSPAC)
3110
*
3111
CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
3112
$ LDVT, CDUM, 1, CDUM, 1,
3113
$ RWORK( IRWORK ), INFO )
3114
*
3115
END IF
3116
*
3117
ELSE IF( WNTUO ) THEN
3118
*
3119
* Path 8t(N much larger than M, JOBU='O', JOBVT='A')
3120
* N right singular vectors to be computed in VT and
3121
* M left singular vectors to be overwritten on A
3122
*
3123
IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
3124
*
3125
* Sufficient workspace for a fast algorithm
3126
*
3127
IU = 1
3128
IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
3129
*
3130
* WORK(IU) is LDA by M and WORK(IR) is LDA by M
3131
*
3132
LDWRKU = LDA
3133
IR = IU + LDWRKU*M
3134
LDWRKR = LDA
3135
ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
3136
*
3137
* WORK(IU) is LDA by M and WORK(IR) is M by M
3138
*
3139
LDWRKU = LDA
3140
IR = IU + LDWRKU*M
3141
LDWRKR = M
3142
ELSE
3143
*
3144
* WORK(IU) is M by M and WORK(IR) is M by M
3145
*
3146
LDWRKU = M
3147
IR = IU + LDWRKU*M
3148
LDWRKR = M
3149
END IF
3150
ITAU = IR + LDWRKR*M
3151
IWORK = ITAU + M
3152
*
3153
* Compute A=L*Q, copying result to VT
3154
* (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
3155
* (RWorkspace: 0)
3156
*
3157
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3158
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3159
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3160
*
3161
* Generate Q in VT
3162
* (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
3163
* (RWorkspace: 0)
3164
*
3165
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3166
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3167
*
3168
* Copy L to WORK(IU), zeroing out above it
3169
*
3170
CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3171
$ LDWRKU )
3172
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3173
$ WORK( IU+LDWRKU ), LDWRKU )
3174
IE = 1
3175
ITAUQ = ITAU
3176
ITAUP = ITAUQ + M
3177
IWORK = ITAUP + M
3178
*
3179
* Bidiagonalize L in WORK(IU), copying result to
3180
* WORK(IR)
3181
* (CWorkspace: need 2*M*M+3*M,
3182
* prefer 2*M*M+2*M+2*M*NB)
3183
* (RWorkspace: need M)
3184
*
3185
CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3186
$ RWORK( IE ), WORK( ITAUQ ),
3187
$ WORK( ITAUP ), WORK( IWORK ),
3188
$ LWORK-IWORK+1, IERR )
3189
CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
3190
$ WORK( IR ), LDWRKR )
3191
*
3192
* Generate right bidiagonalizing vectors in WORK(IU)
3193
* (CWorkspace: need 2*M*M+3*M-1,
3194
* prefer 2*M*M+2*M+(M-1)*NB)
3195
* (RWorkspace: 0)
3196
*
3197
CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3198
$ WORK( ITAUP ), WORK( IWORK ),
3199
$ LWORK-IWORK+1, IERR )
3200
*
3201
* Generate left bidiagonalizing vectors in WORK(IR)
3202
* (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
3203
* (RWorkspace: 0)
3204
*
3205
CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3206
$ WORK( ITAUQ ), WORK( IWORK ),
3207
$ LWORK-IWORK+1, IERR )
3208
IRWORK = IE + M
3209
*
3210
* Perform bidiagonal QR iteration, computing left
3211
* singular vectors of L in WORK(IR) and computing
3212
* right singular vectors of L in WORK(IU)
3213
* (CWorkspace: need 2*M*M)
3214
* (RWorkspace: need BDSPAC)
3215
*
3216
CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3217
$ WORK( IU ), LDWRKU, WORK( IR ),
3218
$ LDWRKR, CDUM, 1, RWORK( IRWORK ),
3219
$ INFO )
3220
*
3221
* Multiply right singular vectors of L in WORK(IU) by
3222
* Q in VT, storing result in A
3223
* (CWorkspace: need M*M)
3224
* (RWorkspace: 0)
3225
*
3226
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3227
$ LDWRKU, VT, LDVT, CZERO, A, LDA )
3228
*
3229
* Copy right singular vectors of A from A to VT
3230
*
3231
CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3232
*
3233
* Copy left singular vectors of A from WORK(IR) to A
3234
*
3235
CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3236
$ LDA )
3237
*
3238
ELSE
3239
*
3240
* Insufficient workspace for a fast algorithm
3241
*
3242
ITAU = 1
3243
IWORK = ITAU + M
3244
*
3245
* Compute A=L*Q, copying result to VT
3246
* (CWorkspace: need 2*M, prefer M+M*NB)
3247
* (RWorkspace: 0)
3248
*
3249
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3250
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3251
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3252
*
3253
* Generate Q in VT
3254
* (CWorkspace: need M+N, prefer M+N*NB)
3255
* (RWorkspace: 0)
3256
*
3257
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3258
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3259
IE = 1
3260
ITAUQ = ITAU
3261
ITAUP = ITAUQ + M
3262
IWORK = ITAUP + M
3263
*
3264
* Zero out above L in A
3265
*
3266
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3267
$ A( 1, 2 ), LDA )
3268
*
3269
* Bidiagonalize L in A
3270
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3271
* (RWorkspace: need M)
3272
*
3273
CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
3274
$ WORK( ITAUQ ), WORK( ITAUP ),
3275
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3276
*
3277
* Multiply right bidiagonalizing vectors in A by Q
3278
* in VT
3279
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3280
* (RWorkspace: 0)
3281
*
3282
CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
3283
$ WORK( ITAUP ), VT, LDVT,
3284
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3285
*
3286
* Generate left bidiagonalizing vectors in A
3287
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3288
* (RWorkspace: 0)
3289
*
3290
CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3291
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3292
IRWORK = IE + M
3293
*
3294
* Perform bidiagonal QR iteration, computing left
3295
* singular vectors of A in A and computing right
3296
* singular vectors of A in VT
3297
* (CWorkspace: 0)
3298
* (RWorkspace: need BDSPAC)
3299
*
3300
CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3301
$ LDVT, A, LDA, CDUM, 1,
3302
$ RWORK( IRWORK ), INFO )
3303
*
3304
END IF
3305
*
3306
ELSE IF( WNTUAS ) THEN
3307
*
3308
* Path 9t(N much larger than M, JOBU='S' or 'A',
3309
* JOBVT='A')
3310
* N right singular vectors to be computed in VT and
3311
* M left singular vectors to be computed in U
3312
*
3313
IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
3314
*
3315
* Sufficient workspace for a fast algorithm
3316
*
3317
IU = 1
3318
IF( LWORK.GE.WRKBL+LDA*M ) THEN
3319
*
3320
* WORK(IU) is LDA by M
3321
*
3322
LDWRKU = LDA
3323
ELSE
3324
*
3325
* WORK(IU) is M by M
3326
*
3327
LDWRKU = M
3328
END IF
3329
ITAU = IU + LDWRKU*M
3330
IWORK = ITAU + M
3331
*
3332
* Compute A=L*Q, copying result to VT
3333
* (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
3334
* (RWorkspace: 0)
3335
*
3336
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3337
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3338
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3339
*
3340
* Generate Q in VT
3341
* (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
3342
* (RWorkspace: 0)
3343
*
3344
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3345
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3346
*
3347
* Copy L to WORK(IU), zeroing out above it
3348
*
3349
CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
3350
$ LDWRKU )
3351
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3352
$ WORK( IU+LDWRKU ), LDWRKU )
3353
IE = 1
3354
ITAUQ = ITAU
3355
ITAUP = ITAUQ + M
3356
IWORK = ITAUP + M
3357
*
3358
* Bidiagonalize L in WORK(IU), copying result to U
3359
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
3360
* (RWorkspace: need M)
3361
*
3362
CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
3363
$ RWORK( IE ), WORK( ITAUQ ),
3364
$ WORK( ITAUP ), WORK( IWORK ),
3365
$ LWORK-IWORK+1, IERR )
3366
CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3367
$ LDU )
3368
*
3369
* Generate right bidiagonalizing vectors in WORK(IU)
3370
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
3371
* (RWorkspace: 0)
3372
*
3373
CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3374
$ WORK( ITAUP ), WORK( IWORK ),
3375
$ LWORK-IWORK+1, IERR )
3376
*
3377
* Generate left bidiagonalizing vectors in U
3378
* (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
3379
* (RWorkspace: 0)
3380
*
3381
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3382
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3383
IRWORK = IE + M
3384
*
3385
* Perform bidiagonal QR iteration, computing left
3386
* singular vectors of L in U and computing right
3387
* singular vectors of L in WORK(IU)
3388
* (CWorkspace: need M*M)
3389
* (RWorkspace: need BDSPAC)
3390
*
3391
CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
3392
$ WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
3393
$ RWORK( IRWORK ), INFO )
3394
*
3395
* Multiply right singular vectors of L in WORK(IU) by
3396
* Q in VT, storing result in A
3397
* (CWorkspace: need M*M)
3398
* (RWorkspace: 0)
3399
*
3400
CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
3401
$ LDWRKU, VT, LDVT, CZERO, A, LDA )
3402
*
3403
* Copy right singular vectors of A from A to VT
3404
*
3405
CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
3406
*
3407
ELSE
3408
*
3409
* Insufficient workspace for a fast algorithm
3410
*
3411
ITAU = 1
3412
IWORK = ITAU + M
3413
*
3414
* Compute A=L*Q, copying result to VT
3415
* (CWorkspace: need 2*M, prefer M+M*NB)
3416
* (RWorkspace: 0)
3417
*
3418
CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
3419
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3420
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3421
*
3422
* Generate Q in VT
3423
* (CWorkspace: need M+N, prefer M+N*NB)
3424
* (RWorkspace: 0)
3425
*
3426
CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3427
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3428
*
3429
* Copy L to U, zeroing out above it
3430
*
3431
CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3432
CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
3433
$ U( 1, 2 ), LDU )
3434
IE = 1
3435
ITAUQ = ITAU
3436
ITAUP = ITAUQ + M
3437
IWORK = ITAUP + M
3438
*
3439
* Bidiagonalize L in U
3440
* (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
3441
* (RWorkspace: need M)
3442
*
3443
CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
3444
$ WORK( ITAUQ ), WORK( ITAUP ),
3445
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3446
*
3447
* Multiply right bidiagonalizing vectors in U by Q
3448
* in VT
3449
* (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
3450
* (RWorkspace: 0)
3451
*
3452
CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
3453
$ WORK( ITAUP ), VT, LDVT,
3454
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3455
*
3456
* Generate left bidiagonalizing vectors in U
3457
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3458
* (RWorkspace: 0)
3459
*
3460
CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3461
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3462
IRWORK = IE + M
3463
*
3464
* Perform bidiagonal QR iteration, computing left
3465
* singular vectors of A in U and computing right
3466
* singular vectors of A in VT
3467
* (CWorkspace: 0)
3468
* (RWorkspace: need BDSPAC)
3469
*
3470
CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
3471
$ LDVT, U, LDU, CDUM, 1,
3472
$ RWORK( IRWORK ), INFO )
3473
*
3474
END IF
3475
*
3476
END IF
3477
*
3478
END IF
3479
*
3480
ELSE
3481
*
3482
* N .LT. MNTHR
3483
*
3484
* Path 10t(N greater than M, but not much larger)
3485
* Reduce to bidiagonal form without LQ decomposition
3486
*
3487
IE = 1
3488
ITAUQ = 1
3489
ITAUP = ITAUQ + M
3490
IWORK = ITAUP + M
3491
*
3492
* Bidiagonalize A
3493
* (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
3494
* (RWorkspace: M)
3495
*
3496
CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
3497
$ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3498
$ IERR )
3499
IF( WNTUAS ) THEN
3500
*
3501
* If left singular vectors desired in U, copy result to U
3502
* and generate left bidiagonalizing vectors in U
3503
* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3504
* (RWorkspace: 0)
3505
*
3506
CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
3507
CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3508
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3509
END IF
3510
IF( WNTVAS ) THEN
3511
*
3512
* If right singular vectors desired in VT, copy result to
3513
* VT and generate right bidiagonalizing vectors in VT
3514
* (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
3515
* (RWorkspace: 0)
3516
*
3517
CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
3518
IF( WNTVA )
3519
$ NRVT = N
3520
IF( WNTVS )
3521
$ NRVT = M
3522
CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3523
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3524
END IF
3525
IF( WNTUO ) THEN
3526
*
3527
* If left singular vectors desired in A, generate left
3528
* bidiagonalizing vectors in A
3529
* (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
3530
* (RWorkspace: 0)
3531
*
3532
CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3533
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3534
END IF
3535
IF( WNTVO ) THEN
3536
*
3537
* If right singular vectors desired in A, generate right
3538
* bidiagonalizing vectors in A
3539
* (CWorkspace: need 3*M, prefer 2*M+M*NB)
3540
* (RWorkspace: 0)
3541
*
3542
CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3543
$ WORK( IWORK ), LWORK-IWORK+1, IERR )
3544
END IF
3545
IRWORK = IE + M
3546
IF( WNTUAS .OR. WNTUO )
3547
$ NRU = M
3548
IF( WNTUN )
3549
$ NRU = 0
3550
IF( WNTVAS .OR. WNTVO )
3551
$ NCVT = N
3552
IF( WNTVN )
3553
$ NCVT = 0
3554
IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3555
*
3556
* Perform bidiagonal QR iteration, if desired, computing
3557
* left singular vectors in U and computing right singular
3558
* vectors in VT
3559
* (CWorkspace: 0)
3560
* (RWorkspace: need BDSPAC)
3561
*
3562
CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3563
$ LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
3564
$ INFO )
3565
ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3566
*
3567
* Perform bidiagonal QR iteration, if desired, computing
3568
* left singular vectors in U and computing right singular
3569
* vectors in A
3570
* (CWorkspace: 0)
3571
* (RWorkspace: need BDSPAC)
3572
*
3573
CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
3574
$ LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
3575
$ INFO )
3576
ELSE
3577
*
3578
* Perform bidiagonal QR iteration, if desired, computing
3579
* left singular vectors in A and computing right singular
3580
* vectors in VT
3581
* (CWorkspace: 0)
3582
* (RWorkspace: need BDSPAC)
3583
*
3584
CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
3585
$ LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
3586
$ INFO )
3587
END IF
3588
*
3589
END IF
3590
*
3591
END IF
3592
*
3593
* Undo scaling if necessary
3594
*
3595
IF( ISCL.EQ.1 ) THEN
3596
IF( ANRM.GT.BIGNUM )
3597
$ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3598
$ IERR )
3599
IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3600
$ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
3601
$ RWORK( IE ), MINMN, IERR )
3602
IF( ANRM.LT.SMLNUM )
3603
$ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3604
$ IERR )
3605
IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3606
$ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
3607
$ RWORK( IE ), MINMN, IERR )
3608
END IF
3609
*
3610
* Return optimal workspace in WORK(1)
3611
*
3612
WORK( 1 ) = MAXWRK
3613
*
3614
RETURN
3615
*
3616
* End of CGESVD
3617
*
3618
END
3619
3620