Path: blob/master/3rdparty/openexr/Imath/ImathMatrixAlgo.cpp
16337 views
///////////////////////////////////////////////////////////////////////////1//2// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas3// Digital Ltd. LLC4//5// All rights reserved.6//7// Redistribution and use in source and binary forms, with or without8// modification, are permitted provided that the following conditions are9// met:10// * Redistributions of source code must retain the above copyright11// notice, this list of conditions and the following disclaimer.12// * Redistributions in binary form must reproduce the above13// copyright notice, this list of conditions and the following disclaimer14// in the documentation and/or other materials provided with the15// distribution.16// * Neither the name of Industrial Light & Magic nor the names of17// its contributors may be used to endorse or promote products derived18// from this software without specific prior written permission.19//20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.31//32///////////////////////////////////////////////////////////////////////////333435363738//----------------------------------------------------------------------------39//40// Implementation of non-template items declared in ImathMatrixAlgo.h41//42//----------------------------------------------------------------------------4344#include "ImathMatrixAlgo.h"45#include <cmath>46#include <algorithm> // for std::max()4748#if defined(OPENEXR_DLL)49#define EXPORT_CONST __declspec(dllexport)50#else51#define EXPORT_CONST const52#endif5354namespace Imath {5556EXPORT_CONST M33f identity33f ( 1, 0, 0,570, 1, 0,580, 0, 1);5960EXPORT_CONST M33d identity33d ( 1, 0, 0,610, 1, 0,620, 0, 1);6364EXPORT_CONST M44f identity44f ( 1, 0, 0, 0,650, 1, 0, 0,660, 0, 1, 0,670, 0, 0, 1);6869EXPORT_CONST M44d identity44d ( 1, 0, 0, 0,700, 1, 0, 0,710, 0, 1, 0,720, 0, 0, 1);7374namespace75{7677class KahanSum78{79public:80KahanSum() : _total(0), _correction(0) {}8182void83operator+= (const double val)84{85const double y = val - _correction;86const double t = _total + y;87_correction = (t - _total) - y;88_total = t;89}9091double get() const92{93return _total;94}9596private:97double _total;98double _correction;99};100101}102103template <typename T>104M44d105procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const T* weights, const size_t numPoints, const bool doScale)106{107if (numPoints == 0)108return M44d();109110// Always do the accumulation in double precision:111V3d Acenter (0.0);112V3d Bcenter (0.0);113double weightsSum = 0.0;114115if (weights == 0)116{117for (int i = 0; i < numPoints; ++i)118{119Acenter += (V3d) A[i];120Bcenter += (V3d) B[i];121}122weightsSum = (double) numPoints;123}124else125{126for (int i = 0; i < numPoints; ++i)127{128const double w = weights[i];129weightsSum += w;130131Acenter += w * (V3d) A[i];132Bcenter += w * (V3d) B[i];133}134}135136if (weightsSum == 0)137return M44d();138139Acenter /= weightsSum;140Bcenter /= weightsSum;141142//143// Find Q such that |Q*A - B| (actually A-Acenter and B-Bcenter, weighted)144// is minimized in the least squares sense.145// From Golub/Van Loan, p.601146//147// A,B are 3xn148// Let C = B A^T (where A is 3xn and B^T is nx3, so C is 3x3)149// Compute the SVD: C = U D V^T (U,V rotations, D diagonal).150// Throw away the D part, and return Q = U V^T151M33d C (0.0);152if (weights == 0)153{154for (int i = 0; i < numPoints; ++i)155C += outerProduct ((V3d) B[i] - Bcenter, (V3d) A[i] - Acenter);156}157else158{159for (int i = 0; i < numPoints; ++i)160{161const double w = weights[i];162C += outerProduct (w * ((V3d) B[i] - Bcenter), (V3d) A[i] - Acenter);163}164}165166M33d U, V;167V3d S;168jacobiSVD (C, U, S, V, Imath::limits<double>::epsilon(), true);169170// We want Q.transposed() here since we are going to be using it in the171// Imath style (multiplying vectors on the right, v' = v*A^T):172const M33d Qt = V * U.transposed();173174double s = 1.0;175if (doScale && numPoints > 1)176{177// Finding a uniform scale: let us assume the Q is completely fixed178// at this point (solving for both simultaneously seems much harder).179// We are trying to compute (again, per Golub and van Loan)180// min || s*A*Q - B ||_F181// Notice that we've jammed a uniform scale in front of the Q.182// Now, the Frobenius norm (the least squares norm over matrices)183// has the neat property that it is equivalent to minimizing the trace184// of M^T*M (see your friendly neighborhood linear algebra text for a185// derivation). Thus, we can expand this out as186// min tr (s*A*Q - B)^T*(s*A*Q - B)187// = min tr(Q^T*A^T*s*s*A*Q) + tr(B^T*B) - 2*tr(Q^T*A^T*s*B) by linearity of the trace188// = min s^2 tr(A^T*A) + tr(B^T*B) - 2*s*tr(Q^T*A^T*B) using the fact that the trace is invariant189// under similarity transforms Q*M*Q^T190// If we differentiate w.r.t. s and set this to 0, we get191// 0 = 2*s*tr(A^T*A) - 2*tr(Q^T*A^T*B)192// so193// 2*s*tr(A^T*A) = 2*s*tr(Q^T*A^T*B)194// s = tr(Q^T*A^T*B) / tr(A^T*A)195196KahanSum traceATA;197if (weights == 0)198{199for (int i = 0; i < numPoints; ++i)200traceATA += ((V3d) A[i] - Acenter).length2();201}202else203{204for (int i = 0; i < numPoints; ++i)205traceATA += ((double) weights[i]) * ((V3d) A[i] - Acenter).length2();206}207208KahanSum traceBATQ;209for (int i = 0; i < 3; ++i)210for (int j = 0; j < 3; ++j)211traceBATQ += Qt[j][i] * C[i][j];212213s = traceBATQ.get() / traceATA.get();214}215216// Q is the rotation part of what we want to return.217// The entire transform is:218// (translate origin to Bcenter) * Q * (translate Acenter to origin)219// last first220// The effect of this on a point is:221// (translate origin to Bcenter) * Q * (translate Acenter to origin) * point222// = (translate origin to Bcenter) * Q * (-Acenter + point)223// = (translate origin to Bcenter) * (-Q*Acenter + Q*point)224// = (translate origin to Bcenter) * (translate Q*Acenter to origin) * Q*point225// = (translate Q*Acenter to Bcenter) * Q*point226// So what we want to return is:227// (translate Q*Acenter to Bcenter) * Q228//229// In block form, this is:230// [ 1 0 0 | ] [ 0 ] [ 1 0 0 | ] [ 1 0 0 | ] [ | ] [ ]231// [ 0 1 0 tb ] [ s*Q 0 ] [ 0 1 0 -ta ] = [ 0 1 0 tb ] [ s*Q -s*Q*ta ] = [ Q tb-s*Q*ta ]232// [ 0 0 1 | ] [ 0 ] [ 0 0 1 | ] [ 0 0 1 | ] [ | ] [ ]233// [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ] [ 0 0 0 1 ]234// (ofc the whole thing is transposed for Imath).235const V3d translate = Bcenter - s*Acenter*Qt;236237return M44d (s*Qt.x[0][0], s*Qt.x[0][1], s*Qt.x[0][2], T(0),238s*Qt.x[1][0], s*Qt.x[1][1], s*Qt.x[1][2], T(0),239s*Qt.x[2][0], s*Qt.x[2][1], s*Qt.x[2][2], T(0),240translate.x, translate.y, translate.z, T(1));241} // procrustesRotationAndTranslation242243template <typename T>244M44d245procrustesRotationAndTranslation (const Vec3<T>* A, const Vec3<T>* B, const size_t numPoints, const bool doScale)246{247return procrustesRotationAndTranslation (A, B, (const T*) 0, numPoints, doScale);248} // procrustesRotationAndTranslation249250251template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const size_t numPoints, const bool doScale);252template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const size_t numPoints, const bool doScale);253template M44d procrustesRotationAndTranslation (const V3d* from, const V3d* to, const double* weights, const size_t numPoints, const bool doScale);254template M44d procrustesRotationAndTranslation (const V3f* from, const V3f* to, const float* weights, const size_t numPoints, const bool doScale);255256257namespace258{259260// Applies the 2x2 Jacobi rotation261// [ c s 0 ] [ 1 0 0 ] [ c 0 s ]262// [ -s c 0 ] or [ 0 c s ] or [ 0 1 0 ]263// [ 0 0 1 ] [ 0 -s c ] [ -s 0 c ]264// from the right; that is, computes265// J * A266// for the Jacobi rotation J and the matrix A. This is efficient because we267// only need to touch exactly the 2 columns that are affected, so we never268// need to explicitly construct the J matrix.269template <typename T, int j, int k>270void271jacobiRotateRight (Imath::Matrix33<T>& A,272const T c,273const T s)274{275for (int i = 0; i < 3; ++i)276{277const T tau1 = A[i][j];278const T tau2 = A[i][k];279A[i][j] = c * tau1 - s * tau2;280A[i][k] = s * tau1 + c * tau2;281}282}283284template <typename T>285void286jacobiRotateRight (Imath::Matrix44<T>& A,287const int j,288const int k,289const T c,290const T s)291{292for (int i = 0; i < 4; ++i)293{294const T tau1 = A[i][j];295const T tau2 = A[i][k];296A[i][j] = c * tau1 - s * tau2;297A[i][k] = s * tau1 + c * tau2;298}299}300301// This routine solves the 2x2 SVD:302// [ c1 s1 ] [ w x ] [ c2 s2 ] [ d1 0 ]303// [ ] [ ] [ ] = [ ]304// [ -s1 c1 ] [ y z ] [ -s2 c2 ] [ 0 d2 ]305// where306// [ w x ]307// A = [ ]308// [ y z ]309// is the subset of A consisting of the [j,k] entries, A([j k], [j k]) in310// Matlab parlance. The method is the 'USVD' algorithm described in the311// following paper:312// 'Computation of the Singular Value Decomposition using Mesh-Connected Processors'313// by Richard P. Brent, Franklin T. Luk, and Charles Van Loan314// It breaks the computation into two steps: the first symmetrizes the matrix,315// and the second diagonalizes the symmetric matrix.316template <typename T, int j, int k, int l>317bool318twoSidedJacobiRotation (Imath::Matrix33<T>& A,319Imath::Matrix33<T>& U,320Imath::Matrix33<T>& V,321const T tol)322{323// Load everything into local variables to make things easier on the324// optimizer:325const T w = A[j][j];326const T x = A[j][k];327const T y = A[k][j];328const T z = A[k][k];329330// We will keep track of whether we're actually performing any rotations,331// since if the matrix is already diagonal we'll end up with the identity332// as our Jacobi rotation and we can short-circuit.333bool changed = false;334335// The first step is to symmetrize the 2x2 matrix,336// [ c s ]^T [ w x ] = [ p q ]337// [ -s c ] [ y z ] [ q r ]338T mu_1 = w + z;339T mu_2 = x - y;340341T c, s;342if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance)343{ // Note that the <= is important here344c = T(1); // because we want to bypass the computation345s = T(0); // of rho if mu_1 = mu_2 = 0.346347const T p = w;348const T r = z;349mu_1 = r - p;350mu_2 = x + y;351}352else353{354const T rho = mu_1 / mu_2;355s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function?356if (rho < 0)357s = -s;358c = s * rho;359360mu_1 = s * (x + y) + c * (z - w); // = r - p361mu_2 = T(2) * (c * x - s * z); // = 2*q362363changed = true;364}365366// The second stage diagonalizes,367// [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ]368// [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ]369T c_2, s_2;370if (std::abs(mu_2) <= tol*std::abs(mu_1))371{372c_2 = T(1);373s_2 = T(0);374}375else376{377const T rho_2 = mu_1 / mu_2;378T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));379if (rho_2 < 0)380t_2 = -t_2;381c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);382s_2 = c_2 * t_2;383384changed = true;385}386387const T c_1 = c_2 * c - s_2 * s;388const T s_1 = s_2 * c + c_2 * s;389390if (!changed)391{392// We've decided that the off-diagonal entries are already small393// enough, so we'll set them to zero. This actually appears to result394// in smaller errors than leaving them be, possibly because it prevents395// us from trying to do extra rotations later that we don't need.396A[k][j] = 0;397A[j][k] = 0;398return false;399}400401const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);402const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);403404// For the entries we just zeroed out, we'll just set them to 0, since405// they should be 0 up to machine precision.406A[j][j] = d_1;407A[k][k] = d_2;408A[k][j] = 0;409A[j][k] = 0;410411// Rotate the entries that _weren't_ involved in the 2x2 SVD:412{413// Rotate on the left by414// [ c1 s1 0 ]^T [ c1 0 s1 ]^T [ 1 0 0 ]^T415// [ -s1 c1 0 ] or [ 0 1 0 ] or [ 0 c1 s1 ]416// [ 0 0 1 ] [ -s1 0 c1 ] [ 0 -s1 c1 ]417// This has the effect of adding the (weighted) ith and jth _rows_ to418// each other.419const T tau1 = A[j][l];420const T tau2 = A[k][l];421A[j][l] = c_1 * tau1 - s_1 * tau2;422A[k][l] = s_1 * tau1 + c_1 * tau2;423}424425{426// Rotate on the right by427// [ c2 s2 0 ] [ c2 0 s2 ] [ 1 0 0 ]428// [ -s2 c2 0 ] or [ 0 1 0 ] or [ 0 c2 s2 ]429// [ 0 0 1 ] [ -s2 0 c2 ] [ 0 -s2 c2 ]430// This has the effect of adding the (weighted) ith and jth _columns_ to431// each other.432const T tau1 = A[l][j];433const T tau2 = A[l][k];434A[l][j] = c_2 * tau1 - s_2 * tau2;435A[l][k] = s_2 * tau1 + c_2 * tau2;436}437438// Now apply the rotations to U and V:439// Remember that we have440// R1^T * A * R2 = D441// This is in the 2x2 case, but after doing a bunch of these442// we will get something like this for the 3x3 case:443// ... R1b^T * R1a^T * A * R2a * R2b * ... = D444// ----------------- ---------------445// = U^T = V446// So,447// U = R1a * R1b * ...448// V = R2a * R2b * ...449jacobiRotateRight<T, j, k> (U, c_1, s_1);450jacobiRotateRight<T, j, k> (V, c_2, s_2);451452return true;453}454455template <typename T>456bool457twoSidedJacobiRotation (Imath::Matrix44<T>& A,458int j,459int k,460Imath::Matrix44<T>& U,461Imath::Matrix44<T>& V,462const T tol)463{464// Load everything into local variables to make things easier on the465// optimizer:466const T w = A[j][j];467const T x = A[j][k];468const T y = A[k][j];469const T z = A[k][k];470471// We will keep track of whether we're actually performing any rotations,472// since if the matrix is already diagonal we'll end up with the identity473// as our Jacobi rotation and we can short-circuit.474bool changed = false;475476// The first step is to symmetrize the 2x2 matrix,477// [ c s ]^T [ w x ] = [ p q ]478// [ -s c ] [ y z ] [ q r ]479T mu_1 = w + z;480T mu_2 = x - y;481482T c, s;483if (std::abs(mu_2) <= tol*std::abs(mu_1)) // Already symmetric (to tolerance)484{ // Note that the <= is important here485c = T(1); // because we want to bypass the computation486s = T(0); // of rho if mu_1 = mu_2 = 0.487488const T p = w;489const T r = z;490mu_1 = r - p;491mu_2 = x + y;492}493else494{495const T rho = mu_1 / mu_2;496s = T(1) / std::sqrt (T(1) + rho*rho); // TODO is there a native inverse square root function?497if (rho < 0)498s = -s;499c = s * rho;500501mu_1 = s * (x + y) + c * (z - w); // = r - p502mu_2 = T(2) * (c * x - s * z); // = 2*q503504changed = true;505}506507// The second stage diagonalizes,508// [ c2 s2 ]^T [ p q ] [ c2 s2 ] = [ d1 0 ]509// [ -s2 c2 ] [ q r ] [ -s2 c2 ] [ 0 d2 ]510T c_2, s_2;511if (std::abs(mu_2) <= tol*std::abs(mu_1))512{513c_2 = T(1);514s_2 = T(0);515}516else517{518const T rho_2 = mu_1 / mu_2;519T t_2 = T(1) / (std::abs(rho_2) + std::sqrt(1 + rho_2*rho_2));520if (rho_2 < 0)521t_2 = -t_2;522c_2 = T(1) / std::sqrt (T(1) + t_2*t_2);523s_2 = c_2 * t_2;524525changed = true;526}527528const T c_1 = c_2 * c - s_2 * s;529const T s_1 = s_2 * c + c_2 * s;530531if (!changed)532{533// We've decided that the off-diagonal entries are already small534// enough, so we'll set them to zero. This actually appears to result535// in smaller errors than leaving them be, possibly because it prevents536// us from trying to do extra rotations later that we don't need.537A[k][j] = 0;538A[j][k] = 0;539return false;540}541542const T d_1 = c_1*(w*c_2 - x*s_2) - s_1*(y*c_2 - z*s_2);543const T d_2 = s_1*(w*s_2 + x*c_2) + c_1*(y*s_2 + z*c_2);544545// For the entries we just zeroed out, we'll just set them to 0, since546// they should be 0 up to machine precision.547A[j][j] = d_1;548A[k][k] = d_2;549A[k][j] = 0;550A[j][k] = 0;551552// Rotate the entries that _weren't_ involved in the 2x2 SVD:553for (int l = 0; l < 4; ++l)554{555if (l == j || l == k)556continue;557558// Rotate on the left by559// [ 1 ]560// [ . ]561// [ c2 s2 ] j562// [ 1 ]563// [ -s2 c2 ] k564// [ . ]565// [ 1 ]566// j k567//568// This has the effect of adding the (weighted) ith and jth _rows_ to569// each other.570const T tau1 = A[j][l];571const T tau2 = A[k][l];572A[j][l] = c_1 * tau1 - s_1 * tau2;573A[k][l] = s_1 * tau1 + c_1 * tau2;574}575576for (int l = 0; l < 4; ++l)577{578// We set the A[j/k][j/k] entries already579if (l == j || l == k)580continue;581582// Rotate on the right by583// [ 1 ]584// [ . ]585// [ c2 s2 ] j586// [ 1 ]587// [ -s2 c2 ] k588// [ . ]589// [ 1 ]590// j k591//592// This has the effect of adding the (weighted) ith and jth _columns_ to593// each other.594const T tau1 = A[l][j];595const T tau2 = A[l][k];596A[l][j] = c_2 * tau1 - s_2 * tau2;597A[l][k] = s_2 * tau1 + c_2 * tau2;598}599600// Now apply the rotations to U and V:601// Remember that we have602// R1^T * A * R2 = D603// This is in the 2x2 case, but after doing a bunch of these604// we will get something like this for the 3x3 case:605// ... R1b^T * R1a^T * A * R2a * R2b * ... = D606// ----------------- ---------------607// = U^T = V608// So,609// U = R1a * R1b * ...610// V = R2a * R2b * ...611jacobiRotateRight (U, j, k, c_1, s_1);612jacobiRotateRight (V, j, k, c_2, s_2);613614return true;615}616617template <typename T>618void619swapColumns (Imath::Matrix33<T>& A, int j, int k)620{621for (int i = 0; i < 3; ++i)622std::swap (A[i][j], A[i][k]);623}624625template <typename T>626T627maxOffDiag (const Imath::Matrix33<T>& A)628{629T result = 0;630result = std::max (result, std::abs (A[0][1]));631result = std::max (result, std::abs (A[0][2]));632result = std::max (result, std::abs (A[1][0]));633result = std::max (result, std::abs (A[1][2]));634result = std::max (result, std::abs (A[2][0]));635result = std::max (result, std::abs (A[2][1]));636return result;637}638639template <typename T>640T641maxOffDiag (const Imath::Matrix44<T>& A)642{643T result = 0;644for (int i = 0; i < 4; ++i)645{646for (int j = 0; j < 4; ++j)647{648if (i != j)649result = std::max (result, std::abs (A[i][j]));650}651}652653return result;654}655656template <typename T>657void658twoSidedJacobiSVD (Imath::Matrix33<T> A,659Imath::Matrix33<T>& U,660Imath::Vec3<T>& S,661Imath::Matrix33<T>& V,662const T tol,663const bool forcePositiveDeterminant)664{665// The two-sided Jacobi SVD works by repeatedly zeroing out666// off-diagonal entries of the matrix, 2 at a time. Basically,667// we can take our 3x3 matrix,668// [* * *]669// [* * *]670// [* * *]671// and use a pair of orthogonal transforms to zero out, say, the672// pair of entries (0, 1) and (1, 0):673// [ c1 s1 ] [* * *] [ c2 s2 ] [* *]674// [-s1 c1 ] [* * *] [-s2 c2 ] = [ * *]675// [ 1] [* * *] [ 1] [* * *]676// When we go to zero out the next pair of entries (say, (0, 2) and (2, 0))677// then we don't expect those entries to stay 0:678// [ c1 s1 ] [* *] [ c2 s2 ] [* * ]679// [-s1 c1 ] [ * *] [-s2 c2 ] = [* * *]680// [ 1] [* * *] [ 1] [ * *]681// However, if we keep doing this, we'll find that the off-diagonal entries682// converge to 0 fairly quickly (convergence should be roughly cubic). The683// result is a diagonal A matrix and a bunch of orthogonal transforms:684// [* * *] [* ]685// L1 L2 ... Ln [* * *] Rn ... R2 R1 = [ * ]686// [* * *] [ *]687// ------------ ------- ------------ -------688// U^T A V S689// This turns out to be highly accurate because (1) orthogonal transforms690// are extremely stable to compute and apply (this is why QR factorization691// works so well, FWIW) and because (2) by applying everything to the original692// matrix A instead of computing (A^T * A) we avoid any precision loss that693// would result from that.694U.makeIdentity();695V.makeIdentity();696697const int maxIter = 20; // In case we get really unlucky, prevents infinite loops698const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum699if (absTol != 0) // _off-diagonal_ entry.700{701int numIter = 0;702do703{704++numIter;705bool changed = twoSidedJacobiRotation<T, 0, 1, 2> (A, U, V, tol);706changed = twoSidedJacobiRotation<T, 0, 2, 1> (A, U, V, tol) || changed;707changed = twoSidedJacobiRotation<T, 1, 2, 0> (A, U, V, tol) || changed;708if (!changed)709break;710} while (maxOffDiag(A) > absTol && numIter < maxIter);711}712713// The off-diagonal entries are (effectively) 0, so whatever's left on the714// diagonal are the singular values:715S.x = A[0][0];716S.y = A[1][1];717S.z = A[2][2];718719// Nothing thus far has guaranteed that the singular values are positive,720// so let's go back through and flip them if not (since by contract we are721// supposed to return all positive SVs):722for (int i = 0; i < 3; ++i)723{724if (S[i] < 0)725{726// If we flip S[i], we need to flip the corresponding column of U727// (we could also pick V if we wanted; it doesn't really matter):728S[i] = -S[i];729for (int j = 0; j < 3; ++j)730U[j][i] = -U[j][i];731}732}733734// Order the singular values from largest to smallest; this requires735// exactly two passes through the data using bubble sort:736for (int i = 0; i < 2; ++i)737{738for (int j = 0; j < (2 - i); ++j)739{740// No absolute values necessary since we already ensured that741// they're positive:742if (S[j] < S[j+1])743{744// If we swap singular values we also have to swap745// corresponding columns in U and V:746std::swap (S[j], S[j+1]);747swapColumns (U, j, j+1);748swapColumns (V, j, j+1);749}750}751}752753if (forcePositiveDeterminant)754{755// We want to guarantee that the returned matrices always have positive756// determinant. We can do this by adding the appropriate number of757// matrices of the form:758// [ 1 ]759// L = [ 1 ]760// [ -1 ]761// Note that L' = L and L*L = Identity. Thus we can add:762// U*L*L*S*V = (U*L)*(L*S)*V763// if U has a negative determinant, and764// U*S*L*L*V = U*(S*L)*(L*V)765// if V has a neg. determinant.766if (U.determinant() < 0)767{768for (int i = 0; i < 3; ++i)769U[i][2] = -U[i][2];770S.z = -S.z;771}772773if (V.determinant() < 0)774{775for (int i = 0; i < 3; ++i)776V[i][2] = -V[i][2];777S.z = -S.z;778}779}780}781782template <typename T>783void784twoSidedJacobiSVD (Imath::Matrix44<T> A,785Imath::Matrix44<T>& U,786Imath::Vec4<T>& S,787Imath::Matrix44<T>& V,788const T tol,789const bool forcePositiveDeterminant)790{791// Please see the Matrix33 version for a detailed description of the algorithm.792U.makeIdentity();793V.makeIdentity();794795const int maxIter = 20; // In case we get really unlucky, prevents infinite loops796const T absTol = tol * maxOffDiag (A); // Tolerance is in terms of the maximum797if (absTol != 0) // _off-diagonal_ entry.798{799int numIter = 0;800do801{802++numIter;803bool changed = twoSidedJacobiRotation (A, 0, 1, U, V, tol);804changed = twoSidedJacobiRotation (A, 0, 2, U, V, tol) || changed;805changed = twoSidedJacobiRotation (A, 0, 3, U, V, tol) || changed;806changed = twoSidedJacobiRotation (A, 1, 2, U, V, tol) || changed;807changed = twoSidedJacobiRotation (A, 1, 3, U, V, tol) || changed;808changed = twoSidedJacobiRotation (A, 2, 3, U, V, tol) || changed;809if (!changed)810break;811} while (maxOffDiag(A) > absTol && numIter < maxIter);812}813814// The off-diagonal entries are (effectively) 0, so whatever's left on the815// diagonal are the singular values:816S[0] = A[0][0];817S[1] = A[1][1];818S[2] = A[2][2];819S[3] = A[3][3];820821// Nothing thus far has guaranteed that the singular values are positive,822// so let's go back through and flip them if not (since by contract we are823// supposed to return all positive SVs):824for (int i = 0; i < 4; ++i)825{826if (S[i] < 0)827{828// If we flip S[i], we need to flip the corresponding column of U829// (we could also pick V if we wanted; it doesn't really matter):830S[i] = -S[i];831for (int j = 0; j < 4; ++j)832U[j][i] = -U[j][i];833}834}835836// Order the singular values from largest to smallest using insertion sort:837for (int i = 1; i < 4; ++i)838{839const Imath::Vec4<T> uCol (U[0][i], U[1][i], U[2][i], U[3][i]);840const Imath::Vec4<T> vCol (V[0][i], V[1][i], V[2][i], V[3][i]);841const T sVal = S[i];842843int j = i - 1;844while (std::abs (S[j]) < std::abs (sVal))845{846for (int k = 0; k < 4; ++k)847U[k][j+1] = U[k][j];848for (int k = 0; k < 4; ++k)849V[k][j+1] = V[k][j];850S[j+1] = S[j];851852--j;853if (j < 0)854break;855}856857for (int k = 0; k < 4; ++k)858U[k][j+1] = uCol[k];859for (int k = 0; k < 4; ++k)860V[k][j+1] = vCol[k];861S[j+1] = sVal;862}863864if (forcePositiveDeterminant)865{866// We want to guarantee that the returned matrices always have positive867// determinant. We can do this by adding the appropriate number of868// matrices of the form:869// [ 1 ]870// L = [ 1 ]871// [ 1 ]872// [ -1 ]873// Note that L' = L and L*L = Identity. Thus we can add:874// U*L*L*S*V = (U*L)*(L*S)*V875// if U has a negative determinant, and876// U*S*L*L*V = U*(S*L)*(L*V)877// if V has a neg. determinant.878if (U.determinant() < 0)879{880for (int i = 0; i < 4; ++i)881U[i][3] = -U[i][3];882S[3] = -S[3];883}884885if (V.determinant() < 0)886{887for (int i = 0; i < 4; ++i)888V[i][3] = -V[i][3];889S[3] = -S[3];890}891}892}893894}895896template <typename T>897void898jacobiSVD (const Imath::Matrix33<T>& A,899Imath::Matrix33<T>& U,900Imath::Vec3<T>& S,901Imath::Matrix33<T>& V,902const T tol,903const bool forcePositiveDeterminant)904{905twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);906}907908template <typename T>909void910jacobiSVD (const Imath::Matrix44<T>& A,911Imath::Matrix44<T>& U,912Imath::Vec4<T>& S,913Imath::Matrix44<T>& V,914const T tol,915const bool forcePositiveDeterminant)916{917twoSidedJacobiSVD (A, U, S, V, tol, forcePositiveDeterminant);918}919920template void jacobiSVD (const Imath::Matrix33<float>& A,921Imath::Matrix33<float>& U,922Imath::Vec3<float>& S,923Imath::Matrix33<float>& V,924const float tol,925const bool forcePositiveDeterminant);926template void jacobiSVD (const Imath::Matrix33<double>& A,927Imath::Matrix33<double>& U,928Imath::Vec3<double>& S,929Imath::Matrix33<double>& V,930const double tol,931const bool forcePositiveDeterminant);932template void jacobiSVD (const Imath::Matrix44<float>& A,933Imath::Matrix44<float>& U,934Imath::Vec4<float>& S,935Imath::Matrix44<float>& V,936const float tol,937const bool forcePositiveDeterminant);938template void jacobiSVD (const Imath::Matrix44<double>& A,939Imath::Matrix44<double>& U,940Imath::Vec4<double>& S,941Imath::Matrix44<double>& V,942const double tol,943const bool forcePositiveDeterminant);944945namespace946{947948template <int j, int k, typename TM>949inline950void951jacobiRotateRight (TM& A,952const typename TM::BaseType s,953const typename TM::BaseType tau)954{955typedef typename TM::BaseType T;956957for (unsigned int i = 0; i < TM::dimensions(); ++i)958{959const T nu1 = A[i][j];960const T nu2 = A[i][k];961A[i][j] -= s * (nu2 + tau * nu1);962A[i][k] += s * (nu1 - tau * nu2);963}964}965966template <int j, int k, int l, typename T>967bool968jacobiRotation (Matrix33<T>& A,969Matrix33<T>& V,970Vec3<T>& Z,971const T tol)972{973// Load everything into local variables to make things easier on the974// optimizer:975const T x = A[j][j];976const T y = A[j][k];977const T z = A[k][k];978979// The first stage diagonalizes,980// [ c s ]^T [ x y ] [ c -s ] = [ d1 0 ]981// [ -s c ] [ y z ] [ s c ] [ 0 d2 ]982const T mu1 = z - x;983const T mu2 = 2 * y;984985if (std::abs(mu2) <= tol*std::abs(mu1))986{987// We've decided that the off-diagonal entries are already small988// enough, so we'll set them to zero. This actually appears to result989// in smaller errors than leaving them be, possibly because it prevents990// us from trying to do extra rotations later that we don't need.991A[j][k] = 0;992return false;993}994const T rho = mu1 / mu2;995const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));996const T c = T(1) / std::sqrt (T(1) + t*t);997const T s = t * c;998const T tau = s / (T(1) + c);999const T h = t * y;10001001// Update diagonal elements.1002Z[j] -= h;1003Z[k] += h;1004A[j][j] -= h;1005A[k][k] += h;10061007// For the entries we just zeroed out, we'll just set them to 0, since1008// they should be 0 up to machine precision.1009A[j][k] = 0;10101011// We only update upper triagnular elements of A, since1012// A is supposed to be symmetric.1013T& offd1 = l < j ? A[l][j] : A[j][l];1014T& offd2 = l < k ? A[l][k] : A[k][l];1015const T nu1 = offd1;1016const T nu2 = offd2;1017offd1 = nu1 - s * (nu2 + tau * nu1);1018offd2 = nu2 + s * (nu1 - tau * nu2);10191020// Apply rotation to V1021jacobiRotateRight<j, k> (V, s, tau);10221023return true;1024}10251026template <int j, int k, int l1, int l2, typename T>1027bool1028jacobiRotation (Matrix44<T>& A,1029Matrix44<T>& V,1030Vec4<T>& Z,1031const T tol)1032{1033const T x = A[j][j];1034const T y = A[j][k];1035const T z = A[k][k];10361037const T mu1 = z - x;1038const T mu2 = T(2) * y;10391040// Let's see if rho^(-1) = mu2 / mu1 is less than tol1041// This test also checks if rho^2 will overflow1042// when tol^(-1) < sqrt(limits<T>::max()).1043if (std::abs(mu2) <= tol*std::abs(mu1))1044{1045A[j][k] = 0;1046return true;1047}10481049const T rho = mu1 / mu2;1050const T t = (rho < 0 ? T(-1) : T(1)) / (std::abs(rho) + std::sqrt(1 + rho*rho));1051const T c = T(1) / std::sqrt (T(1) + t*t);1052const T s = c * t;1053const T tau = s / (T(1) + c);1054const T h = t * y;10551056Z[j] -= h;1057Z[k] += h;1058A[j][j] -= h;1059A[k][k] += h;1060A[j][k] = 0;10611062{1063T& offd1 = l1 < j ? A[l1][j] : A[j][l1];1064T& offd2 = l1 < k ? A[l1][k] : A[k][l1];1065const T nu1 = offd1;1066const T nu2 = offd2;1067offd1 -= s * (nu2 + tau * nu1);1068offd2 += s * (nu1 - tau * nu2);1069}10701071{1072T& offd1 = l2 < j ? A[l2][j] : A[j][l2];1073T& offd2 = l2 < k ? A[l2][k] : A[k][l2];1074const T nu1 = offd1;1075const T nu2 = offd2;1076offd1 -= s * (nu2 + tau * nu1);1077offd2 += s * (nu1 - tau * nu2);1078}10791080jacobiRotateRight<j, k> (V, s, tau);10811082return true;1083}10841085template <typename TM>1086inline1087typename TM::BaseType1088maxOffDiagSymm (const TM& A)1089{1090typedef typename TM::BaseType T;1091T result = 0;1092for (unsigned int i = 0; i < TM::dimensions(); ++i)1093for (unsigned int j = i+1; j < TM::dimensions(); ++j)1094result = std::max (result, std::abs (A[i][j]));10951096return result;1097}10981099} // namespace11001101template <typename T>1102void1103jacobiEigenSolver (Matrix33<T>& A,1104Vec3<T>& S,1105Matrix33<T>& V,1106const T tol)1107{1108V.makeIdentity();1109for(int i = 0; i < 3; ++i) {1110S[i] = A[i][i];1111}11121113const int maxIter = 20; // In case we get really unlucky, prevents infinite loops1114const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum1115if (absTol != 0) // _off-diagonal_ entry.1116{1117int numIter = 0;1118do1119{1120// Z is for accumulating small changes (h) to diagonal entries1121// of A for one sweep. Adding h's directly to A might cause1122// a cancellation effect when h is relatively very small to1123// the corresponding diagonal entry of A and1124// this will increase numerical errors1125Vec3<T> Z(0, 0, 0);1126++numIter;1127bool changed = jacobiRotation<0, 1, 2> (A, V, Z, tol);1128changed = jacobiRotation<0, 2, 1> (A, V, Z, tol) || changed;1129changed = jacobiRotation<1, 2, 0> (A, V, Z, tol) || changed;1130// One sweep passed. Add accumulated changes (Z) to singular values (S)1131// Update diagonal elements of A for better accuracy as well.1132for(int i = 0; i < 3; ++i) {1133A[i][i] = S[i] += Z[i];1134}1135if (!changed)1136break;1137} while (maxOffDiagSymm(A) > absTol && numIter < maxIter);1138}1139}11401141template <typename T>1142void1143jacobiEigenSolver (Matrix44<T>& A,1144Vec4<T>& S,1145Matrix44<T>& V,1146const T tol)1147{1148V.makeIdentity();11491150for(int i = 0; i < 4; ++i) {1151S[i] = A[i][i];1152}11531154const int maxIter = 20; // In case we get really unlucky, prevents infinite loops1155const T absTol = tol * maxOffDiagSymm (A); // Tolerance is in terms of the maximum1156if (absTol != 0) // _off-diagonal_ entry.1157{1158int numIter = 0;1159do1160{1161++numIter;1162Vec4<T> Z(0, 0, 0, 0);1163bool changed = jacobiRotation<0, 1, 2, 3> (A, V, Z, tol);1164changed = jacobiRotation<0, 2, 1, 3> (A, V, Z, tol) || changed;1165changed = jacobiRotation<0, 3, 1, 2> (A, V, Z, tol) || changed;1166changed = jacobiRotation<1, 2, 0, 3> (A, V, Z, tol) || changed;1167changed = jacobiRotation<1, 3, 0, 2> (A, V, Z, tol) || changed;1168changed = jacobiRotation<2, 3, 0, 1> (A, V, Z, tol) || changed;1169for(int i = 0; i < 4; ++i) {1170A[i][i] = S[i] += Z[i];1171}1172if (!changed)1173break;1174} while (maxOffDiagSymm(A) > absTol && numIter < maxIter);1175}1176}11771178template <typename TM, typename TV>1179void1180maxEigenVector (TM& A, TV& V)1181{1182TV S;1183TM MV;1184jacobiEigenSolver(A, S, MV);11851186int maxIdx(0);1187for(unsigned int i = 1; i < TV::dimensions(); ++i)1188{1189if(std::abs(S[i]) > std::abs(S[maxIdx]))1190maxIdx = i;1191}11921193for(unsigned int i = 0; i < TV::dimensions(); ++i)1194V[i] = MV[i][maxIdx];1195}11961197template <typename TM, typename TV>1198void1199minEigenVector (TM& A, TV& V)1200{1201TV S;1202TM MV;1203jacobiEigenSolver(A, S, MV);12041205int minIdx(0);1206for(unsigned int i = 1; i < TV::dimensions(); ++i)1207{1208if(std::abs(S[i]) < std::abs(S[minIdx]))1209minIdx = i;1210}12111212for(unsigned int i = 0; i < TV::dimensions(); ++i)1213V[i] = MV[i][minIdx];1214}12151216template void jacobiEigenSolver (Matrix33<float>& A,1217Vec3<float>& S,1218Matrix33<float>& V,1219const float tol);1220template void jacobiEigenSolver (Matrix33<double>& A,1221Vec3<double>& S,1222Matrix33<double>& V,1223const double tol);1224template void jacobiEigenSolver (Matrix44<float>& A,1225Vec4<float>& S,1226Matrix44<float>& V,1227const float tol);1228template void jacobiEigenSolver (Matrix44<double>& A,1229Vec4<double>& S,1230Matrix44<double>& V,1231const double tol);12321233template void maxEigenVector (Matrix33<float>& A,1234Vec3<float>& S);1235template void maxEigenVector (Matrix44<float>& A,1236Vec4<float>& S);1237template void maxEigenVector (Matrix33<double>& A,1238Vec3<double>& S);1239template void maxEigenVector (Matrix44<double>& A,1240Vec4<double>& S);12411242template void minEigenVector (Matrix33<float>& A,1243Vec3<float>& S);1244template void minEigenVector (Matrix44<float>& A,1245Vec4<float>& S);1246template void minEigenVector (Matrix33<double>& A,1247Vec3<double>& S);1248template void minEigenVector (Matrix44<double>& A,1249Vec4<double>& S);12501251} // namespace Imath125212531254