Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/matrix_decomp.cpp
16337 views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html
4
5
6
#include "precomp.hpp"
7
8
namespace cv { namespace hal {
9
10
/****************************************************************************************\
11
* LU & Cholesky implementation for small matrices *
12
\****************************************************************************************/
13
14
template<typename _Tp> static inline int
15
LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
16
{
17
int i, j, k, p = 1;
18
astep /= sizeof(A[0]);
19
bstep /= sizeof(b[0]);
20
21
for( i = 0; i < m; i++ )
22
{
23
k = i;
24
25
for( j = i+1; j < m; j++ )
26
if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
27
k = j;
28
29
if( std::abs(A[k*astep + i]) < eps )
30
return 0;
31
32
if( k != i )
33
{
34
for( j = i; j < m; j++ )
35
std::swap(A[i*astep + j], A[k*astep + j]);
36
if( b )
37
for( j = 0; j < n; j++ )
38
std::swap(b[i*bstep + j], b[k*bstep + j]);
39
p = -p;
40
}
41
42
_Tp d = -1/A[i*astep + i];
43
44
for( j = i+1; j < m; j++ )
45
{
46
_Tp alpha = A[j*astep + i]*d;
47
48
for( k = i+1; k < m; k++ )
49
A[j*astep + k] += alpha*A[i*astep + k];
50
51
if( b )
52
for( k = 0; k < n; k++ )
53
b[j*bstep + k] += alpha*b[i*bstep + k];
54
}
55
}
56
57
if( b )
58
{
59
for( i = m-1; i >= 0; i-- )
60
for( j = 0; j < n; j++ )
61
{
62
_Tp s = b[i*bstep + j];
63
for( k = i+1; k < m; k++ )
64
s -= A[i*astep + k]*b[k*bstep + j];
65
b[i*bstep + j] = s/A[i*astep + i];
66
}
67
}
68
69
return p;
70
}
71
72
73
int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
74
{
75
CV_INSTRUMENT_REGION();
76
77
int output;
78
CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
79
output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
80
return output;
81
}
82
83
84
int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
85
{
86
CV_INSTRUMENT_REGION();
87
88
int output;
89
CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
90
output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
91
return output;
92
}
93
94
template<typename _Tp> static inline bool
95
CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
96
{
97
_Tp* L = A;
98
int i, j, k;
99
double s;
100
astep /= sizeof(A[0]);
101
bstep /= sizeof(b[0]);
102
103
for( i = 0; i < m; i++ )
104
{
105
for( j = 0; j < i; j++ )
106
{
107
s = A[i*astep + j];
108
for( k = 0; k < j; k++ )
109
s -= L[i*astep + k]*L[j*astep + k];
110
L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
111
}
112
s = A[i*astep + i];
113
for( k = 0; k < j; k++ )
114
{
115
double t = L[i*astep + k];
116
s -= t*t;
117
}
118
if( s < std::numeric_limits<_Tp>::epsilon() )
119
return false;
120
L[i*astep + i] = (_Tp)(1./std::sqrt(s));
121
}
122
123
if (!b)
124
{
125
for( i = 0; i < m; i++ )
126
L[i*astep + i]=1/L[i*astep + i];
127
return true;
128
}
129
130
// LLt x = b
131
// 1: L y = b
132
// 2. Lt x = y
133
134
/*
135
[ L00 ] y0 b0
136
[ L10 L11 ] y1 = b1
137
[ L20 L21 L22 ] y2 b2
138
[ L30 L31 L32 L33 ] y3 b3
139
140
[ L00 L10 L20 L30 ] x0 y0
141
[ L11 L21 L31 ] x1 = y1
142
[ L22 L32 ] x2 y2
143
[ L33 ] x3 y3
144
*/
145
146
for( i = 0; i < m; i++ )
147
{
148
for( j = 0; j < n; j++ )
149
{
150
s = b[i*bstep + j];
151
for( k = 0; k < i; k++ )
152
s -= L[i*astep + k]*b[k*bstep + j];
153
b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
154
}
155
}
156
157
for( i = m-1; i >= 0; i-- )
158
{
159
for( j = 0; j < n; j++ )
160
{
161
s = b[i*bstep + j];
162
for( k = m-1; k > i; k-- )
163
s -= L[k*astep + i]*b[k*bstep + j];
164
b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
165
}
166
}
167
for( i = 0; i < m; i++ )
168
L[i*astep + i]=1/L[i*astep + i];
169
170
return true;
171
}
172
173
bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
174
{
175
CV_INSTRUMENT_REGION();
176
177
bool output;
178
CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
179
return CholImpl(A, astep, m, b, bstep, n);
180
}
181
182
bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
183
{
184
CV_INSTRUMENT_REGION();
185
186
bool output;
187
CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
188
return CholImpl(A, astep, m, b, bstep, n);
189
}
190
191
template<typename _Tp> inline static int
192
sign(_Tp x)
193
{
194
if (x >= (_Tp)0)
195
return 1;
196
else
197
return -1;
198
}
199
200
template<typename _Tp> static inline int
201
QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps)
202
{
203
astep /= sizeof(_Tp);
204
bstep /= sizeof(_Tp);
205
206
cv::AutoBuffer<_Tp> buffer;
207
size_t buf_size = m ? m + n : hFactors != NULL;
208
buffer.allocate(buf_size);
209
_Tp* vl = buffer.data();
210
if (hFactors == NULL)
211
hFactors = vl + m;
212
213
for (int l = 0; l < n; l++)
214
{
215
//generate vl
216
int vlSize = m - l;
217
_Tp vlNorm = (_Tp)0;
218
for (int i = 0; i < vlSize; i++)
219
{
220
vl[i] = A[(l + i)*astep + l];
221
vlNorm += vl[i] * vl[i];
222
}
223
_Tp tmpV = vl[0];
224
vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm);
225
vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV);
226
for (int i = 0; i < vlSize; i++)
227
{
228
vl[i] /= vlNorm;
229
}
230
//multiply A_l*vl
231
for (int j = l; j < n; j++)
232
{
233
_Tp v_lA = (_Tp)0;
234
for (int i = l; i < m; i++)
235
{
236
v_lA += vl[i - l] * A[i*astep + j];
237
}
238
239
for (int i = l; i < m; i++)
240
{
241
A[i*astep + j] -= 2 * vl[i - l] * v_lA;
242
}
243
}
244
245
//save vl and factors
246
hFactors[l] = vl[0] * vl[0];
247
for (int i = 1; i < vlSize; i++)
248
{
249
A[(l + i)*astep + l] = vl[i] / vl[0];
250
}
251
}
252
253
if (b)
254
{
255
//generate new rhs
256
for (int l = 0; l < n; l++)
257
{
258
//unpack vl
259
vl[0] = (_Tp)1;
260
for (int j = 1; j < m - l; j++)
261
{
262
vl[j] = A[(j + l)*astep + l];
263
}
264
265
//h_l*x
266
for (int j = 0; j < k; j++)
267
{
268
_Tp v_lB = (_Tp)0;
269
for (int i = l; i < m; i++)
270
v_lB += vl[i - l] * b[i*bstep + j];
271
272
for (int i = l; i < m; i++)
273
b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l];
274
}
275
}
276
//do back substitution
277
for (int i = n - 1; i >= 0; i--)
278
{
279
for (int j = n - 1; j > i; j--)
280
{
281
for (int p = 0; p < k; p++)
282
b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j];
283
}
284
if (std::abs(A[i*astep + i]) < eps)
285
return 0;
286
for (int p = 0; p < k; p++)
287
b[i*bstep + p] /= A[i*astep + i];
288
}
289
}
290
291
return 1;
292
}
293
294
int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors)
295
{
296
CV_INSTRUMENT_REGION();
297
298
int output;
299
CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors);
300
output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10);
301
return output;
302
}
303
304
int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors)
305
{
306
CV_INSTRUMENT_REGION();
307
308
int output;
309
CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors)
310
output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100);
311
return output;
312
}
313
314
//=============================================================================
315
// for compatibility with 3.0
316
317
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
318
{
319
return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
320
}
321
322
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
323
{
324
return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
325
}
326
327
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
328
{
329
return CholImpl(A, astep, m, b, bstep, n);
330
}
331
332
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
333
{
334
return CholImpl(A, astep, m, b, bstep, n);
335
}
336
337
}} // cv::hal::
338
339