Path: blob/master/modules/core/src/matrix_decomp.cpp
16337 views
// This file is part of OpenCV project.1// It is subject to the license terms in the LICENSE file found in the top-level directory2// of this distribution and at http://opencv.org/license.html345#include "precomp.hpp"67namespace cv { namespace hal {89/****************************************************************************************\10* LU & Cholesky implementation for small matrices *11\****************************************************************************************/1213template<typename _Tp> static inline int14LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)15{16int i, j, k, p = 1;17astep /= sizeof(A[0]);18bstep /= sizeof(b[0]);1920for( i = 0; i < m; i++ )21{22k = i;2324for( j = i+1; j < m; j++ )25if( std::abs(A[j*astep + i]) > std::abs(A[k*astep + i]) )26k = j;2728if( std::abs(A[k*astep + i]) < eps )29return 0;3031if( k != i )32{33for( j = i; j < m; j++ )34std::swap(A[i*astep + j], A[k*astep + j]);35if( b )36for( j = 0; j < n; j++ )37std::swap(b[i*bstep + j], b[k*bstep + j]);38p = -p;39}4041_Tp d = -1/A[i*astep + i];4243for( j = i+1; j < m; j++ )44{45_Tp alpha = A[j*astep + i]*d;4647for( k = i+1; k < m; k++ )48A[j*astep + k] += alpha*A[i*astep + k];4950if( b )51for( k = 0; k < n; k++ )52b[j*bstep + k] += alpha*b[i*bstep + k];53}54}5556if( b )57{58for( i = m-1; i >= 0; i-- )59for( j = 0; j < n; j++ )60{61_Tp s = b[i*bstep + j];62for( k = i+1; k < m; k++ )63s -= A[i*astep + k]*b[k*bstep + j];64b[i*bstep + j] = s/A[i*astep + i];65}66}6768return p;69}707172int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)73{74CV_INSTRUMENT_REGION();7576int output;77CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)78output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);79return output;80}818283int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)84{85CV_INSTRUMENT_REGION();8687int output;88CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)89output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);90return output;91}9293template<typename _Tp> static inline bool94CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)95{96_Tp* L = A;97int i, j, k;98double s;99astep /= sizeof(A[0]);100bstep /= sizeof(b[0]);101102for( i = 0; i < m; i++ )103{104for( j = 0; j < i; j++ )105{106s = A[i*astep + j];107for( k = 0; k < j; k++ )108s -= L[i*astep + k]*L[j*astep + k];109L[i*astep + j] = (_Tp)(s*L[j*astep + j]);110}111s = A[i*astep + i];112for( k = 0; k < j; k++ )113{114double t = L[i*astep + k];115s -= t*t;116}117if( s < std::numeric_limits<_Tp>::epsilon() )118return false;119L[i*astep + i] = (_Tp)(1./std::sqrt(s));120}121122if (!b)123{124for( i = 0; i < m; i++ )125L[i*astep + i]=1/L[i*astep + i];126return true;127}128129// LLt x = b130// 1: L y = b131// 2. Lt x = y132133/*134[ L00 ] y0 b0135[ L10 L11 ] y1 = b1136[ L20 L21 L22 ] y2 b2137[ L30 L31 L32 L33 ] y3 b3138139[ L00 L10 L20 L30 ] x0 y0140[ L11 L21 L31 ] x1 = y1141[ L22 L32 ] x2 y2142[ L33 ] x3 y3143*/144145for( i = 0; i < m; i++ )146{147for( j = 0; j < n; j++ )148{149s = b[i*bstep + j];150for( k = 0; k < i; k++ )151s -= L[i*astep + k]*b[k*bstep + j];152b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);153}154}155156for( i = m-1; i >= 0; i-- )157{158for( j = 0; j < n; j++ )159{160s = b[i*bstep + j];161for( k = m-1; k > i; k-- )162s -= L[k*astep + i]*b[k*bstep + j];163b[i*bstep + j] = (_Tp)(s*L[i*astep + i]);164}165}166for( i = 0; i < m; i++ )167L[i*astep + i]=1/L[i*astep + i];168169return true;170}171172bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)173{174CV_INSTRUMENT_REGION();175176bool output;177CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)178return CholImpl(A, astep, m, b, bstep, n);179}180181bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)182{183CV_INSTRUMENT_REGION();184185bool output;186CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)187return CholImpl(A, astep, m, b, bstep, n);188}189190template<typename _Tp> inline static int191sign(_Tp x)192{193if (x >= (_Tp)0)194return 1;195else196return -1;197}198199template<typename _Tp> static inline int200QRImpl(_Tp* A, size_t astep, int m, int n, int k, _Tp* b, size_t bstep, _Tp* hFactors, _Tp eps)201{202astep /= sizeof(_Tp);203bstep /= sizeof(_Tp);204205cv::AutoBuffer<_Tp> buffer;206size_t buf_size = m ? m + n : hFactors != NULL;207buffer.allocate(buf_size);208_Tp* vl = buffer.data();209if (hFactors == NULL)210hFactors = vl + m;211212for (int l = 0; l < n; l++)213{214//generate vl215int vlSize = m - l;216_Tp vlNorm = (_Tp)0;217for (int i = 0; i < vlSize; i++)218{219vl[i] = A[(l + i)*astep + l];220vlNorm += vl[i] * vl[i];221}222_Tp tmpV = vl[0];223vl[0] = vl[0] + sign(vl[0])*std::sqrt(vlNorm);224vlNorm = std::sqrt(vlNorm + vl[0] * vl[0] - tmpV*tmpV);225for (int i = 0; i < vlSize; i++)226{227vl[i] /= vlNorm;228}229//multiply A_l*vl230for (int j = l; j < n; j++)231{232_Tp v_lA = (_Tp)0;233for (int i = l; i < m; i++)234{235v_lA += vl[i - l] * A[i*astep + j];236}237238for (int i = l; i < m; i++)239{240A[i*astep + j] -= 2 * vl[i - l] * v_lA;241}242}243244//save vl and factors245hFactors[l] = vl[0] * vl[0];246for (int i = 1; i < vlSize; i++)247{248A[(l + i)*astep + l] = vl[i] / vl[0];249}250}251252if (b)253{254//generate new rhs255for (int l = 0; l < n; l++)256{257//unpack vl258vl[0] = (_Tp)1;259for (int j = 1; j < m - l; j++)260{261vl[j] = A[(j + l)*astep + l];262}263264//h_l*x265for (int j = 0; j < k; j++)266{267_Tp v_lB = (_Tp)0;268for (int i = l; i < m; i++)269v_lB += vl[i - l] * b[i*bstep + j];270271for (int i = l; i < m; i++)272b[i*bstep + j] -= 2 * vl[i - l] * v_lB * hFactors[l];273}274}275//do back substitution276for (int i = n - 1; i >= 0; i--)277{278for (int j = n - 1; j > i; j--)279{280for (int p = 0; p < k; p++)281b[i*bstep + p] -= b[j*bstep + p] * A[i*astep + j];282}283if (std::abs(A[i*astep + i]) < eps)284return 0;285for (int p = 0; p < k; p++)286b[i*bstep + p] /= A[i*astep + i];287}288}289290return 1;291}292293int QR32f(float* A, size_t astep, int m, int n, int k, float* b, size_t bstep, float* hFactors)294{295CV_INSTRUMENT_REGION();296297int output;298CALL_HAL_RET(QR32f, cv_hal_QR32f, output, A, astep, m, n, k, b, bstep, hFactors);299output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, FLT_EPSILON * 10);300return output;301}302303int QR64f(double* A, size_t astep, int m, int n, int k, double* b, size_t bstep, double* hFactors)304{305CV_INSTRUMENT_REGION();306307int output;308CALL_HAL_RET(QR64f, cv_hal_QR64f, output, A, astep, m, n, k, b, bstep, hFactors)309output = QRImpl(A, astep, m, n, k, b, bstep, hFactors, DBL_EPSILON * 100);310return output;311}312313//=============================================================================314// for compatibility with 3.0315316int LU(float* A, size_t astep, int m, float* b, size_t bstep, int n)317{318return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);319}320321int LU(double* A, size_t astep, int m, double* b, size_t bstep, int n)322{323return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);324}325326bool Cholesky(float* A, size_t astep, int m, float* b, size_t bstep, int n)327{328return CholImpl(A, astep, m, b, bstep, n);329}330331bool Cholesky(double* A, size_t astep, int m, double* b, size_t bstep, int n)332{333return CholImpl(A, astep, m, b, bstep, n);334}335336}} // cv::hal::337338339