CoCalc -- hal_internal.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/hal_internal.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                          License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
16
// Copyright (C) 2015, Itseez Inc., all rights reserved.
17
// Third party copyrights are property of their respective owners.
18
//
19
// Redistribution and use in source and binary forms, with or without modification,
20
// are permitted provided that the following conditions are met:
21
//
22
//   * Redistribution's of source code must retain the above copyright notice,
23
//     this list of conditions and the following disclaimer.
24
//
25
//   * Redistribution's in binary form must reproduce the above copyright notice,
26
//     this list of conditions and the following disclaimer in the documentation
27
//     and/or other materials provided with the distribution.
28
//
29
//   * The name of the copyright holders may not be used to endorse or promote products
30
//     derived from this software without specific prior written permission.
31
//
32
// This software is provided by the copyright holders and contributors "as is" and
33
// any express or implied warranties, including, but not limited to, the implied
34
// warranties of merchantability and fitness for a particular purpose are disclaimed.
35
// In no event shall the Intel Corporation or contributors be liable for any direct,
36
// indirect, incidental, special, exemplary, or consequential damages
37
// (including, but not limited to, procurement of substitute goods or services;
38
// loss of use, data, or profits; or business interruption) however caused
39
// and on any theory of liability, whether in contract, strict liability,
40
// or tort (including negligence or otherwise) arising in any way out of
41
// the use of this software, even if advised of the possibility of such damage.
42
//
43
//M*/
44

45
#include "hal_internal.hpp"
46

47
#ifdef HAVE_LAPACK
48

49
#include <complex.h>
50
#include "opencv_lapack.h"
51

52
#include <cmath>
53
#include <algorithm>
54
#include <typeinfo>
55
#include <limits>
56
#include <complex>
57
#include <vector>
58

59
#define HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH 100
60
#define HAL_GEMM_SMALL_MATRIX_THRESH 100
61
#define HAL_SVD_SMALL_MATRIX_THRESH 25
62
#define HAL_QR_SMALL_MATRIX_THRESH 30
63
#define HAL_LU_SMALL_MATRIX_THRESH 100
64
#define HAL_CHOLESKY_SMALL_MATRIX_THRESH 100
65

66
//lapack stores matrices in column-major order so transposing is neded everywhere
67
template <typename fptype> static inline void
68
transpose_square_inplace(fptype *src, size_t src_ld, size_t m)
69
{
70
    for(size_t i = 0; i < m - 1; i++)
71
        for(size_t j = i + 1; j < m; j++)
72
            std::swap(src[j*src_ld + i], src[i*src_ld + j]);
73
}
74

75
template <typename fptype> static inline void
76
transpose(const fptype *src, size_t src_ld, fptype* dst, size_t dst_ld, size_t m, size_t n)
77
{
78
    for(size_t i = 0; i < m; i++)
79
        for(size_t j = 0; j < n; j++)
80
            dst[j*dst_ld + i] = src[i*src_ld + j];
81
}
82

83
template <typename fptype> static inline void
84
copy_matrix(const fptype *src, size_t src_ld, fptype* dst, size_t dst_ld, size_t m, size_t n)
85
{
86
    for(size_t i = 0; i < m; i++)
87
        for(size_t j = 0; j < n; j++)
88
            dst[i*dst_ld + j] = src[i*src_ld + j];
89
}
90

91
template <typename fptype> static inline void
92
set_value(fptype *dst, size_t dst_ld, fptype value, size_t m, size_t n)
93
{
94
    for(size_t i = 0; i < m; i++)
95
        for(size_t j = 0; j < n; j++)
96
            dst[i*dst_ld + j] = value;
97
}
98

99
template <typename fptype> static inline int
100
lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int* info)
101
{
102
    int lda = (int)(a_step / sizeof(fptype)), sign = 0;
103
    int* piv = new int[m];
104

105
    transpose_square_inplace(a, lda, m);
106

107
    if(b)
108
    {
109
        if(n == 1 && b_step == sizeof(fptype))
110
        {
111
            if(typeid(fptype) == typeid(float))
112
                sgesv_(&m, &n, (float*)a, &lda, piv, (float*)b, &m, info);
113
            else if(typeid(fptype) == typeid(double))
114
                dgesv_(&m, &n, (double*)a, &lda, piv, (double*)b, &m, info);
115
        }
116
        else
117
        {
118
            int ldb = (int)(b_step / sizeof(fptype));
119
            fptype* tmpB = new fptype[m*n];
120

121
            transpose(b, ldb, tmpB, m, m, n);
122

123
            if(typeid(fptype) == typeid(float))
124
                sgesv_(&m, &n, (float*)a, &lda, piv, (float*)tmpB, &m, info);
125
            else if(typeid(fptype) == typeid(double))
126
                dgesv_(&m, &n, (double*)a, &lda, piv, (double*)tmpB, &m, info);
127

128
            transpose(tmpB, m, b, ldb, n, m);
129
            delete[] tmpB;
130
        }
131
    }
132
    else
133
    {
134
        if(typeid(fptype) == typeid(float))
135
            sgetrf_(&m, &m, (float*)a, &lda, piv, info);
136
        else if(typeid(fptype) == typeid(double))
137
            dgetrf_(&m, &m, (double*)a, &lda, piv, info);
138
    }
139

140
    if(*info == 0)
141
    {
142
        for(int i = 0; i < m; i++)
143
            sign ^= piv[i] != i + 1;
144
        *info = sign ? -1 : 1;
145
    }
146
    else
147
        *info = 0; //in opencv LU function zero means error
148

149
    delete[] piv;
150
    return CV_HAL_ERROR_OK;
151
}
152

153
template <typename fptype> static inline int
154
lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, bool* info)
155
{
156
    int lapackStatus = 0;
157
    int lda = (int)(a_step / sizeof(fptype));
158
    char L[] = {'L', '\0'};
159

160
    if(b)
161
    {
162
        if(n == 1 && b_step == sizeof(fptype))
163
        {
164
            if(typeid(fptype) == typeid(float))
165
                sposv_(L, &m, &n, (float*)a, &lda, (float*)b, &m, &lapackStatus);
166
            else if(typeid(fptype) == typeid(double))
167
                dposv_(L, &m, &n, (double*)a, &lda, (double*)b, &m, &lapackStatus);
168
        }
169
        else
170
        {
171
            int ldb = (int)(b_step / sizeof(fptype));
172
            fptype* tmpB = new fptype[m*n];
173
            transpose(b, ldb, tmpB, m, m, n);
174

175
            if(typeid(fptype) == typeid(float))
176
                sposv_(L, &m, &n, (float*)a, &lda, (float*)tmpB, &m, &lapackStatus);
177
            else if(typeid(fptype) == typeid(double))
178
                dposv_(L, &m, &n, (double*)a, &lda, (double*)tmpB, &m, &lapackStatus);
179

180
            transpose(tmpB, m, b, ldb, n, m);
181
            delete[] tmpB;
182
        }
183
    }
184
    else
185
    {
186
        if(typeid(fptype) == typeid(float))
187
            spotrf_(L, &m, (float*)a, &lda, &lapackStatus);
188
        else if(typeid(fptype) == typeid(double))
189
            dpotrf_(L, &m, (double*)a, &lda, &lapackStatus);
190
    }
191

192
    if(lapackStatus == 0) *info = true;
193
    else *info = false; //in opencv Cholesky function false means error
194

195
    return CV_HAL_ERROR_OK;
196
}
197

198
template <typename fptype> static inline int
199
lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype* vt, size_t v_step, int m, int n, int flags, int* info)
200
{
201
    int lda = (int)(a_step / sizeof(fptype));
202
    int ldv = (int)(v_step / sizeof(fptype));
203
    int ldu = (int)(u_step / sizeof(fptype));
204
    int lwork = -1;
205
    int* iworkBuf = new int[8*std::min(m, n)];
206
    fptype work1 = 0;
207

208
    //A already transposed and m>=n
209
    char mode[] = { ' ', '\0'};
210
    if(flags & CV_HAL_SVD_NO_UV)
211
    {
212
        ldv = 1;
213
        mode[0] = 'N';
214
    }
215
    else if((flags & CV_HAL_SVD_SHORT_UV) && (flags & CV_HAL_SVD_MODIFY_A)) //short SVD, U stored in a
216
        mode[0] = 'O';
217
    else if((flags & CV_HAL_SVD_SHORT_UV) && !(flags & CV_HAL_SVD_MODIFY_A)) //short SVD, U stored in u if m>=n
218
        mode[0] = 'S';
219
    else if(flags & CV_HAL_SVD_FULL_UV) //full SVD, U stored in u or in a
220
        mode[0] = 'A';
221

222
    if((flags & CV_HAL_SVD_MODIFY_A) && (flags & CV_HAL_SVD_FULL_UV)) //U stored in a
223
    {
224
        u = new fptype[m*m];
225
        ldu = m;
226
    }
227

228
    if(typeid(fptype) == typeid(float))
229
        sgesdd_(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)&work1, &lwork, iworkBuf, info);
230
    else if(typeid(fptype) == typeid(double))
231
        dgesdd_(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)&work1, &lwork, iworkBuf, info);
232

233
    lwork = (int)round(work1); //optimal buffer size
234
    fptype* buffer = new fptype[lwork + 1];
235

236
    if(typeid(fptype) == typeid(float))
237
        sgesdd_(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)buffer, &lwork, iworkBuf, info);
238
    else if(typeid(fptype) == typeid(double))
239
        dgesdd_(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)buffer, &lwork, iworkBuf, info);
240

241
    if(!(flags & CV_HAL_SVD_NO_UV))
242
        transpose_square_inplace(vt, ldv, n);
243

244
    if((flags & CV_HAL_SVD_MODIFY_A) && (flags & CV_HAL_SVD_FULL_UV))
245
    {
246
        for(int i = 0; i < m; i++)
247
            for(int j = 0; j < m; j++)
248
                a[i*lda + j] = u[i*m + j];
249
        delete[] u;
250
    }
251

252
    delete[] iworkBuf;
253
    delete[] buffer;
254
    return CV_HAL_ERROR_OK;
255
}
256

257
template <typename fptype> static inline int
258
lapack_QR(fptype* a, size_t a_step, int m, int n, int k, fptype* b, size_t b_step, fptype* dst, int* info)
259
{
260
    int lda = (int)(a_step / sizeof(fptype));
261
    char mode[] = { 'N', '\0' };
262
    if(m < n)
263
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
264

265
    std::vector<fptype> tmpAMemHolder;
266
    fptype* tmpA;
267
    int ldtmpA;
268
    if (m == n)
269
    {
270
        transpose_square_inplace(a, lda, m);
271
        tmpA = a;
272
        ldtmpA = lda;
273
    }
274
    else
275
    {
276
        tmpAMemHolder.resize(m*n);
277
        tmpA = &tmpAMemHolder.front();
278
        ldtmpA = m;
279
        transpose(a, lda, tmpA, m, m, n);
280
    }
281

282
    int lwork = -1;
283
    fptype work1 = 0.;
284

285
    if (b)
286
    {
287
        if (k == 1 && b_step == sizeof(fptype))
288
        {
289
            if (typeid(fptype) == typeid(float))
290
                sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)&work1, &lwork, info);
291
            else if (typeid(fptype) == typeid(double))
292
                dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)&work1, &lwork, info);
293

294
            lwork = cvRound(work1); //optimal buffer size
295
            std::vector<fptype> workBufMemHolder(lwork + 1);
296
            fptype* buffer = &workBufMemHolder.front();
297

298
            if (typeid(fptype) == typeid(float))
299
                sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)b, &m, (float*)buffer, &lwork, info);
300
            else if (typeid(fptype) == typeid(double))
301
                dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)b, &m, (double*)buffer, &lwork, info);
302
        }
303
        else
304
        {
305
            std::vector<fptype> tmpBMemHolder(m*k);
306
            fptype* tmpB = &tmpBMemHolder.front();
307
            int ldb = (int)(b_step / sizeof(fptype));
308
            transpose(b, ldb, tmpB, m, m, k);
309

310
            if (typeid(fptype) == typeid(float))
311
                sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)&work1, &lwork, info);
312
            else if (typeid(fptype) == typeid(double))
313
                dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)&work1, &lwork, info);
314

315
            lwork = cvRound(work1); //optimal buffer size
316
            std::vector<fptype> workBufMemHolder(lwork + 1);
317
            fptype* buffer = &workBufMemHolder.front();
318

319
            if (typeid(fptype) == typeid(float))
320
                sgels_(mode, &m, &n, &k, (float*)tmpA, &ldtmpA, (float*)tmpB, &m, (float*)buffer, &lwork, info);
321
            else if (typeid(fptype) == typeid(double))
322
                dgels_(mode, &m, &n, &k, (double*)tmpA, &ldtmpA, (double*)tmpB, &m, (double*)buffer, &lwork, info);
323

324
            transpose(tmpB, m, b, ldb, k, m);
325
        }
326
    }
327
    else
328
    {
329
        if (typeid(fptype) == typeid(float))
330
            sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)&work1, &lwork, info);
331
        else if (typeid(fptype) == typeid(double))
332
            dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)&work1, &lwork, info);
333

334
        lwork = cvRound(work1); //optimal buffer size
335
        std::vector<fptype> workBufMemHolder(lwork + 1);
336
        fptype* buffer = &workBufMemHolder.front();
337

338
        if (typeid(fptype) == typeid(float))
339
            sgeqrf_(&m, &n, (float*)tmpA, &ldtmpA, (float*)dst, (float*)buffer, &lwork, info);
340
        else if (typeid(fptype) == typeid(double))
341
            dgeqrf_(&m, &n, (double*)tmpA, &ldtmpA, (double*)dst, (double*)buffer, &lwork, info);
342
    }
343

344
    if (m == n)
345
        transpose_square_inplace(a, lda, m);
346
    else
347
        transpose(tmpA, m, a, lda, n, m);
348

349
    if (*info != 0)
350
        *info = 0;
351
    else
352
        *info = 1;
353

354
    return CV_HAL_ERROR_OK;
355
}
356

357
template <typename fptype> static inline int
358
lapack_gemm(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
359
            const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
360
{
361
    int ldsrc1 = (int)(src1_step / sizeof(fptype));
362
    int ldsrc2 = (int)(src2_step / sizeof(fptype));
363
    int ldsrc3 = (int)(src3_step / sizeof(fptype));
364
    int lddst = (int)(dst_step / sizeof(fptype));
365
    int c_m, c_n, d_m;
366
    CBLAS_TRANSPOSE transA, transB;
367

368
    if(flags & CV_HAL_GEMM_2_T)
369
    {
370
        transB = CblasTrans;
371
        if(flags & CV_HAL_GEMM_1_T )
372
        {
373
            d_m = a_n;
374
        }
375
        else
376
        {
377
            d_m = a_m;
378
        }
379
    }
380
    else
381
    {
382
        transB = CblasNoTrans;
383
        if(flags & CV_HAL_GEMM_1_T )
384
        {
385
            d_m = a_n;
386
        }
387
        else
388
        {
389
            d_m = a_m;
390
        }
391
    }
392

393
    if(flags & CV_HAL_GEMM_3_T)
394
    {
395
        c_m = d_n;
396
        c_n = d_m;
397
    }
398
    else
399
    {
400
        c_m = d_m;
401
        c_n = d_n;
402
    }
403

404
    if(flags & CV_HAL_GEMM_1_T )
405
    {
406
        transA = CblasTrans;
407
        std::swap(a_n, a_m);
408
    }
409
    else
410
    {
411
        transA = CblasNoTrans;
412
    }
413

414
    if(src3 != dst && beta != 0.0 && src3_step != 0) {
415
        if(flags & CV_HAL_GEMM_3_T)
416
            transpose(src3, ldsrc3, dst, lddst, c_m, c_n);
417
        else
418
            copy_matrix(src3, ldsrc3, dst, lddst, c_m, c_n);
419
    }
420
    else if (src3 == dst && (flags & CV_HAL_GEMM_3_T)) //actually transposing C in this case done by openCV
421
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
422
    else if(src3_step == 0 && beta != 0.0)
423
        set_value(dst, lddst, (fptype)0.0, d_m, d_n);
424

425
    if(typeid(fptype) == typeid(float))
426
        cblas_sgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (float)alpha, (float*)src1, ldsrc1, (float*)src2, ldsrc2, (float)beta, (float*)dst, lddst);
427
    else if(typeid(fptype) == typeid(double))
428
        cblas_dgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (double)alpha, (double*)src1, ldsrc1, (double*)src2, ldsrc2, (double)beta, (double*)dst, lddst);
429

430
    return CV_HAL_ERROR_OK;
431
}
432

433

434
template <typename fptype> static inline int
435
lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
436
            const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
437
{
438
    int ldsrc1 = (int)(src1_step / sizeof(std::complex<fptype>));
439
    int ldsrc2 = (int)(src2_step / sizeof(std::complex<fptype>));
440
    int ldsrc3 = (int)(src3_step / sizeof(std::complex<fptype>));
441
    int lddst = (int)(dst_step / sizeof(std::complex<fptype>));
442
    int c_m, c_n, d_m;
443
    CBLAS_TRANSPOSE transA, transB;
444
    std::complex<fptype> cAlpha(alpha, 0.0);
445
    std::complex<fptype> cBeta(beta, 0.0);
446

447
    if(flags & CV_HAL_GEMM_2_T)
448
    {
449
        transB = CblasTrans;
450
        if(flags & CV_HAL_GEMM_1_T )
451
        {
452
            d_m = a_n;
453
        }
454
        else
455
        {
456
            d_m = a_m;
457
        }
458
    }
459
    else
460
    {
461
        transB = CblasNoTrans;
462
        if(flags & CV_HAL_GEMM_1_T )
463
        {
464
            d_m = a_n;
465
        }
466
        else
467
        {
468
            d_m = a_m;
469
        }
470
    }
471

472
    if(flags & CV_HAL_GEMM_3_T)
473
    {
474
        c_m = d_n;
475
        c_n = d_m;
476
    }
477
    else
478
    {
479
        c_m = d_m;
480
        c_n = d_n;
481
    }
482

483
    if(flags & CV_HAL_GEMM_1_T )
484
    {
485
        transA = CblasTrans;
486
        std::swap(a_n, a_m);
487
    }
488
    else
489
    {
490
        transA = CblasNoTrans;
491
    }
492

493
    if(src3 != dst && beta != 0.0 && src3_step != 0) {
494
        if(flags & CV_HAL_GEMM_3_T)
495
            transpose((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
496
        else
497
            copy_matrix((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
498
    }
499
    else if (src3 == dst && (flags & CV_HAL_GEMM_3_T)) //actually transposing C in this case done by openCV
500
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
501
    else if(src3_step == 0 && beta != 0.0)
502
        set_value((std::complex<fptype>*)dst, lddst, std::complex<fptype>(0.0, 0.0), d_m, d_n);
503

504
    if(typeid(fptype) == typeid(float))
505
        cblas_cgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (float*)reinterpret_cast<fptype(&)[2]>(cAlpha), (float*)src1, ldsrc1, (float*)src2, ldsrc2, (float*)reinterpret_cast<fptype(&)[2]>(cBeta), (float*)dst, lddst);
506
    else if(typeid(fptype) == typeid(double))
507
        cblas_zgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (double*)reinterpret_cast<fptype(&)[2]>(cAlpha), (double*)src1, ldsrc1, (double*)src2, ldsrc2, (double*)reinterpret_cast<fptype(&)[2]>(cBeta), (double*)dst, lddst);
508

509
    return CV_HAL_ERROR_OK;
510
}
511
int lapack_LU32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, int* info)
512
{
513
    if(m < HAL_LU_SMALL_MATRIX_THRESH)
514
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
515
    return lapack_LU(a, a_step, m, b, b_step, n, info);
516
}
517

518
int lapack_LU64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, int* info)
519
{
520
    if(m < HAL_LU_SMALL_MATRIX_THRESH)
521
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
522
    return lapack_LU(a, a_step, m, b, b_step, n, info);
523
}
524

525
int lapack_Cholesky32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, bool *info)
526
{
527
    if(m < HAL_CHOLESKY_SMALL_MATRIX_THRESH)
528
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
529
    return lapack_Cholesky(a, a_step, m, b, b_step, n, info);
530
}
531

532
int lapack_Cholesky64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, bool *info)
533
{
534
    if(m < HAL_CHOLESKY_SMALL_MATRIX_THRESH)
535
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
536
    return lapack_Cholesky(a, a_step, m, b, b_step, n, info);
537
}
538

539
int lapack_SVD32f(float* a, size_t a_step, float *w, float* u, size_t u_step, float* vt, size_t v_step, int m, int n, int flags)
540
{
541

542
    if(m < HAL_SVD_SMALL_MATRIX_THRESH)
543
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
544
    int info;
545
    return lapack_SVD(a, a_step, w, u, u_step, vt, v_step, m, n, flags, &info);
546
}
547

548
int lapack_SVD64f(double* a, size_t a_step, double *w, double* u, size_t u_step, double* vt, size_t v_step, int m, int n, int flags)
549
{
550

551
    if(m < HAL_SVD_SMALL_MATRIX_THRESH)
552
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
553
    int info;
554
    return lapack_SVD(a, a_step, w, u, u_step, vt, v_step, m, n, flags, &info);
555
}
556

557
int lapack_QR32f(float* src1, size_t src1_step, int m, int n, int k, float* src2, size_t src2_step, float* dst, int* info)
558
{
559
    if (m < HAL_QR_SMALL_MATRIX_THRESH)
560
      return CV_HAL_ERROR_NOT_IMPLEMENTED;
561
    return lapack_QR(src1, src1_step, m, n, k, src2, src2_step, dst, info);
562
}
563

564
int lapack_QR64f(double* src1, size_t src1_step, int m, int n, int k, double* src2, size_t src2_step, double* dst, int* info)
565
{
566
    if (m < HAL_QR_SMALL_MATRIX_THRESH)
567
      return CV_HAL_ERROR_NOT_IMPLEMENTED;
568
    return lapack_QR(src1, src1_step, m, n, k, src2, src2_step, dst, info);
569
}
570

571
int lapack_gemm32f(const float *src1, size_t src1_step, const float *src2, size_t src2_step, float alpha,
572
                   const float *src3, size_t src3_step, float beta, float *dst, size_t dst_step, int m, int n, int k, int flags)
573
{
574
    if(m < HAL_GEMM_SMALL_MATRIX_THRESH)
575
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
576
    return lapack_gemm(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
577
}
578

579
int lapack_gemm64f(const double *src1, size_t src1_step, const double *src2, size_t src2_step, double alpha,
580
                   const double *src3, size_t src3_step, double beta, double *dst, size_t dst_step, int m, int n, int k, int flags)
581
{
582
    if(m < HAL_GEMM_SMALL_MATRIX_THRESH)
583
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
584
    return lapack_gemm(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
585
}
586

587
int lapack_gemm32fc(const float *src1, size_t src1_step, const float *src2, size_t src2_step, float alpha,
588
                   const float *src3, size_t src3_step, float beta, float *dst, size_t dst_step, int m, int n, int k, int flags)
589
{
590
    if(m < HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH)
591
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
592
    return lapack_gemm_c(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
593
}
594
int lapack_gemm64fc(const double *src1, size_t src1_step, const double *src2, size_t src2_step, double alpha,
595
                   const double *src3, size_t src3_step, double beta, double *dst, size_t dst_step, int m, int n, int k, int flags)
596
{
597
    if(m < HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH)
598
        return CV_HAL_ERROR_NOT_IMPLEMENTED;
599
    return lapack_gemm_c(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
600
}
601

602
#endif //HAVE_LAPACK
603

604
Product

Resources

Company