CoCalc -- matrix_decomp.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/matrix_decomp.cpp
¹⁶³³⁷ views
1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html
4

5

6
#include "precomp.hpp"
7

8
namespace cv { namespace hal {
9

10
/****************************************************************************************\
11
*                     LU & Cholesky implementation for small matrices                    *
12
\****************************************************************************************/
13

14
template<typename _Tp> static inline int
15
LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
16
{
17
    int i, j, k, p = 1;
18
    astep /= sizeof(A[0]);
19
    bstep /= sizeof(b[0]);
20

21
    for( i = 0; i < m; i++ )
22
    {
23
        k = i;
24

25
        for( j = i+1; j < m; j++ )
26
            if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )
27
                k = j;
28

29
        if( std::abs(A[k*astep + i]) < eps )
30
            return 0;
31

32
        if( k != i )
33
        {
34
            for( j = i; j < m; j++ )
35
                std::swap(A[i*astep + j], A[k*astep + j]);
36
            if( b )
37
                for( j = 0; j < n; j++ )
38
                    std::swap(b[i*bstep + j], b[k*bstep + j]);
39
            p = -p;
40
        }
41

42
        _Tp d = -1/A[i*astep + i];
43

44
        for( j = i+1; j < m; j++ )
45
        {
46
            _Tp alpha = A[j*astep + i]*d;
47

48
            for( k = i+1; k < m; k++ )
49
                A[j*astep + k] += alpha*A[i*astep + k];
50

51
            if( b )
52
                for( k = 0; k < n; k++ )
53
                    b[j*bstep + k] += alpha*b[i*bstep + k];
54
        }
55
    }
56

57
    if( b )
58
    {
59
        for( i = m-1; i >= 0; i-- )
60
            for( j = 0; j < n; j++ )
61
            {
62
                _Tp s = b[i*bstep + j];
63
                for( k = i+1; k < m; k++ )
64
                    s -= A[i*astep + k]*b[k*bstep + j];
65
                b[i*bstep + j] = s/A[i*astep + i];
66
            }
67
    }
68

69
    return p;
70
}
71

72

73
int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
74
{
75
    CV_INSTRUMENT_REGION();
76

77
    int output;
78
    CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
79
    output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
80
    return output;
81
}
82

83

84
int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
85
{
86
    CV_INSTRUMENT_REGION();
87

88
    int output;
89
    CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
90
    output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
91
    return output;
92
}
93

94
template<typename _Tp> static inline bool
95
CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
96
{
97
    _Tp* L = A;
98
    int i, j, k;
99
    double s;
100
    astep /= sizeof(A[0]);
101
    bstep /= sizeof(b[0]);
102

103
    for( i = 0; i < m; i++ )
104
    {
105
        for( j = 0; j < i; j++ )
106
        {
107
            s = A[i*astep + j];
108
            for( k = 0; k < j; k++ )
109
                s -= L[i*astep + k]*L[j*astep + k];
110
            L[i*astep + j] = (_Tp)(s*L[j*astep + j]);
111
        }
112
        s = A[i*astep + i];
113
        for( k = 0; k < j; k++ )
114
        {
115
            double t = L[i*astep + k];
116
            s -= t*t;
117
        }
118
        if( s < std::numeric_limits<_Tp>::epsilon() )
119
            return false;
120
        L[i*astep + i] = (_Tp)(1./std::sqrt(s));
121
    }
122

123
    if (!b)
124
    {
125
        for( i = 0; i < m; i++ )
126
             L[i*astep + i]=1/L[i*astep + i];
127
        return true;
128
   }
129

130
    // LLt x = b
131
    // 1: L y = b
132
    // 2. Lt x = y
133

134
    /*
135
     [ L00             ]  y0   b0
136
     [ L10 L11         ]  y1 = b1
137
     [ L20 L21 L22     ]  y2   b2
138
     [ L30 L31 L32 L33 ]  y3   b3
139

140
     [ L00 L10 L20 L30 ]  x0   y0
141
     [     L11 L21 L31 ]  x1 = y1
142
     [         L22 L32 ]  x2   y2
143
     [             L33 ]  x3   y3
144
     */
145

146
    for( i = 0; i < m; i++ )
147
    {
148
        for( j = 0; j < n; j++ )
149
        {
150
            s = b[i*bstep + j];
151
            for( k = 0; k < i; k++ )
152
                s -= L[i*astep + k]*b[k*bstep + j];
153
            b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
154
        }
155
    }
156

157
    for( i = m-1; i >= 0; i-- )
158
    {
159
        for( j = 0; j < n; j++ )
160
        {
161
            s = b[i*bstep + j];
162
            for( k = m-1; k > i; k-- )
163
                s -= L[k*astep + i]*b[k*bstep + j];
164
            b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);
165
        }
166
    }
167
    for( i = 0; i < m; i++ )
168
            L[i*astep + i]=1/L[i*astep + i];
169

170
    return true;
171
}
172

173
bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
174
{
175
    CV_INSTRUMENT_REGION();
176

177
    bool output;
178
    CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
179
    return CholImpl(A, astep, m, b, bstep, n);
180
}
181

182
bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
183
{
184
    CV_INSTRUMENT_REGION();
185

186
    bool output;
187
    CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
188
    return CholImpl(A, astep, m, b, bstep, n);
189
}
190

191
template<typename _Tp> inline static int
192
sign(_Tp x)
193
{
194
    if (x >= (_Tp)0)
195
        return 1;
196
    else
197
        return -1;
198
}
199

200
template<typename _Tp> static inline int
201
QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps)
202
{
203
    astep /= sizeof(_Tp);
204
    bstep /= sizeof(_Tp);
205

206
    cv::AutoBuffer<_Tp> buffer;
207
    size_t buf_size = m ? m + n : hFactors != NULL;
208
    buffer.allocate(buf_size);
209
    _Tp* vl = buffer.data();
210
    if (hFactors == NULL)
211
        hFactors = vl + m;
212

213
    for (int l = 0; l < n; l++)
214
    {
215
        //generate vl
216
        int vlSize = m - l;
217
        _Tp vlNorm = (_Tp)0;
218
        for (int i = 0; i < vlSize; i++)
219
        {
220
            vl[i] = A[(l + i)*astep + l];
221
            vlNorm += vl[i] * vl[i];
222
        }
223
        _Tp tmpV = vl[0];
224
        vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm);
225
        vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV);
226
        for (int i = 0; i < vlSize; i++)
227
        {
228
            vl[i] /= vlNorm;
229
        }
230
        //multiply A_l*vl
231
        for (int j = l; j < n; j++)
232
        {
233
            _Tp v_lA = (_Tp)0;
234
            for (int i = l; i < m; i++)
235
            {
236
                v_lA += vl[i - l] * A[i*astep + j];
237
            }
238

239
            for (int i = l; i < m; i++)
240
            {
241
                A[i*astep + j] -= 2 * vl[i - l] * v_lA;
242
            }
243
        }
244

245
        //save vl and factors
246
        hFactors[l] = vl[0] * vl[0];
247
        for (int i = 1; i < vlSize; i++)
248
        {
249
            A[(l + i)*astep + l] = vl[i] / vl[0];
250
        }
251
    }
252

253
    if (b)
254
    {
255
        //generate new rhs
256
        for (int l = 0; l < n; l++)
257
        {
258
            //unpack vl
259
            vl[0] = (_Tp)1;
260
            for (int j = 1; j < m - l; j++)
261
            {
262
              vl[j] = A[(j + l)*astep + l];
263
            }
264

265
            //h_l*x
266
            for (int j = 0; j < k; j++)
267
            {
268
                _Tp v_lB = (_Tp)0;
269
                for (int i = l; i < m; i++)
270
                  v_lB += vl[i - l] * b[i*bstep + j];
271

272
                for (int i = l; i < m; i++)
273
                    b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l];
274
            }
275
        }
276
        //do back substitution
277
        for (int i = n - 1; i >= 0; i--)
278
        {
279
            for (int j = n - 1; j > i; j--)
280
            {
281
                for (int p = 0; p < k; p++)
282
                    b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j];
283
            }
284
            if (std::abs(A[i*astep + i]) < eps)
285
                return 0;
286
            for (int p = 0; p < k; p++)
287
                b[i*bstep + p] /= A[i*astep + i];
288
        }
289
    }
290

291
    return 1;
292
}
293

294
int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors)
295
{
296
    CV_INSTRUMENT_REGION();
297

298
    int output;
299
    CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors);
300
    output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10);
301
    return output;
302
}
303

304
int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors)
305
{
306
    CV_INSTRUMENT_REGION();
307

308
    int output;
309
    CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors)
310
    output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100);
311
    return output;
312
}
313

314
//=============================================================================
315
// for compatibility with 3.0
316

317
int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)
318
{
319
    return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
320
}
321

322
int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)
323
{
324
    return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
325
}
326

327
bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)
328
{
329
    return CholImpl(A, astep, m, b, bstep, n);
330
}
331

332
bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)
333
{
334
    return CholImpl(A, astep, m, b, bstep, n);
335
}
336

337
}} // cv::hal::
338

339
Product

Resources

Company