///////////////////////////////////////////////////////////////////////////1//2// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas3// Digital Ltd. LLC4//5// All rights reserved.6//7// Redistribution and use in source and binary forms, with or without8// modification, are permitted provided that the following conditions are9// met:10// * Redistributions of source code must retain the above copyright11// notice, this list of conditions and the following disclaimer.12// * Redistributions in binary form must reproduce the above13// copyright notice, this list of conditions and the following disclaimer14// in the documentation and/or other materials provided with the15// distribution.16// * Neither the name of Industrial Light & Magic nor the names of17// its contributors may be used to endorse or promote products derived18// from this software without specific prior written permission.19//20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.31//32///////////////////////////////////////////////////////////////////////////3334// Primary authors:35// Florian Kainz <[email protected]>36// Rod Bogart <[email protected]>3738//---------------------------------------------------------------------------39//40// half -- a 16-bit floating point number class:41//42// Type half can represent positive and negative numbers whose43// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative44// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented45// with an absolute error of 6.0e-8. All integers from -2048 to46// +2048 can be represented exactly.47//48// Type half behaves (almost) like the built-in C++ floating point49// types. In arithmetic expressions, half, float and double can be50// mixed freely. Here are a few examples:51//52// half a (3.5);53// float b (a + sqrt (a));54// a += b;55// b += a;56// b = a + 7;57//58// Conversions from half to float are lossless; all half numbers59// are exactly representable as floats.60//61// Conversions from float to half may not preserve a float's value62// exactly. If a float is not representable as a half, then the63// float value is rounded to the nearest representable half. If a64// float value is exactly in the middle between the two closest65// representable half values, then the float value is rounded to66// the closest half whose least significant bit is zero.67//68// Overflows during float-to-half conversions cause arithmetic69// exceptions. An overflow occurs when the float value to be70// converted is too large to be represented as a half, or if the71// float value is an infinity or a NAN.72//73// The implementation of type half makes the following assumptions74// about the implementation of the built-in C++ types:75//76// float is an IEEE 754 single-precision number77// sizeof (float) == 478// sizeof (unsigned int) == sizeof (float)79// alignof (unsigned int) == alignof (float)80// sizeof (unsigned short) == 281//82//---------------------------------------------------------------------------8384#ifndef _HALF_H_85#define _HALF_H_8687#include <iostream>8889#if defined(OPENEXR_DLL)90#if defined(HALF_EXPORTS)91#define HALF_EXPORT __declspec(dllexport)92#else93#define HALF_EXPORT __declspec(dllimport)94#endif95#define HALF_EXPORT_CONST96#else97#define HALF_EXPORT98#define HALF_EXPORT_CONST const99#endif100101class HALF_EXPORT half102{103public:104105//-------------106// Constructors107//-------------108109half (); // no initialization110half (float f);111112113//--------------------114// Conversion to float115//--------------------116117operator float () const;118119120//------------121// Unary minus122//------------123124half operator - () const;125126127//-----------128// Assignment129//-----------130131half & operator = (half h);132half & operator = (float f);133134half & operator += (half h);135half & operator += (float f);136137half & operator -= (half h);138half & operator -= (float f);139140half & operator *= (half h);141half & operator *= (float f);142143half & operator /= (half h);144half & operator /= (float f);145146147//---------------------------------------------------------148// Round to n-bit precision (n should be between 0 and 10).149// After rounding, the significand's 10-n least significant150// bits will be zero.151//---------------------------------------------------------152153half round (unsigned int n) const;154155156//--------------------------------------------------------------------157// Classification:158//159// h.isFinite() returns true if h is a normalized number,160// a denormalized number or zero161//162// h.isNormalized() returns true if h is a normalized number163//164// h.isDenormalized() returns true if h is a denormalized number165//166// h.isZero() returns true if h is zero167//168// h.isNan() returns true if h is a NAN169//170// h.isInfinity() returns true if h is a positive171// or a negative infinity172//173// h.isNegative() returns true if the sign bit of h174// is set (negative)175//--------------------------------------------------------------------176177bool isFinite () const;178bool isNormalized () const;179bool isDenormalized () const;180bool isZero () const;181bool isNan () const;182bool isInfinity () const;183bool isNegative () const;184185186//--------------------------------------------187// Special values188//189// posInf() returns +infinity190//191// negInf() returns -infinity192//193// qNan() returns a NAN with the bit194// pattern 0111111111111111195//196// sNan() returns a NAN with the bit197// pattern 0111110111111111198//--------------------------------------------199200static half posInf ();201static half negInf ();202static half qNan ();203static half sNan ();204205206//--------------------------------------207// Access to the internal representation208//--------------------------------------209210unsigned short bits () const;211void setBits (unsigned short bits);212213214public:215216union uif217{218unsigned int i;219float f;220};221222private:223224static short convert (int i);225static float overflow ();226227unsigned short _h;228229static HALF_EXPORT_CONST uif _toFloat[1 << 16];230static HALF_EXPORT_CONST unsigned short _eLut[1 << 9];231};232233//-----------234// Stream I/O235//-----------236237HALF_EXPORT std::ostream & operator << (std::ostream &os, half h);238HALF_EXPORT std::istream & operator >> (std::istream &is, half &h);239240241//----------242// Debugging243//----------244245HALF_EXPORT void printBits (std::ostream &os, half h);246HALF_EXPORT void printBits (std::ostream &os, float f);247HALF_EXPORT void printBits (char c[19], half h);248HALF_EXPORT void printBits (char c[35], float f);249250251//-------------------------------------------------------------------------252// Limits253//254// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float255// constants, but at least one other compiler (gcc 2.96) produces incorrect256// results if they are.257//-------------------------------------------------------------------------258259#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER260261#define HALF_MIN 5.96046448e-08f // Smallest positive half262263#define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half264265#define HALF_MAX 65504.0f // Largest positive half266267#define HALF_EPSILON 0.00097656f // Smallest positive e for which268// half (1.0 + e) != half (1.0)269#else270271#define HALF_MIN 5.96046448e-08 // Smallest positive half272273#define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half274275#define HALF_MAX 65504.0 // Largest positive half276277#define HALF_EPSILON 0.00097656 // Smallest positive e for which278// half (1.0 + e) != half (1.0)279#endif280281282#define HALF_MANT_DIG 11 // Number of digits in mantissa283// (significand + hidden leading 1)284285#define HALF_DIG 2 // Number of base 10 digits that286// can be represented without change287288#define HALF_RADIX 2 // Base of the exponent289290#define HALF_MIN_EXP -13 // Minimum negative integer such that291// HALF_RADIX raised to the power of292// one less than that integer is a293// normalized half294295#define HALF_MAX_EXP 16 // Maximum positive integer such that296// HALF_RADIX raised to the power of297// one less than that integer is a298// normalized half299300#define HALF_MIN_10_EXP -4 // Minimum positive integer such301// that 10 raised to that power is302// a normalized half303304#define HALF_MAX_10_EXP 4 // Maximum positive integer such305// that 10 raised to that power is306// a normalized half307308309//---------------------------------------------------------------------------310//311// Implementation --312//313// Representation of a float:314//315// We assume that a float, f, is an IEEE 754 single-precision316// floating point number, whose bits are arranged as follows:317//318// 31 (msb)319// |320// | 30 23321// | | |322// | | | 22 0 (lsb)323// | | | | |324// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX325//326// s e m327//328// S is the sign-bit, e is the exponent and m is the significand.329//330// If e is between 1 and 254, f is a normalized number:331//332// s e-127333// f = (-1) * 2 * 1.m334//335// If e is 0, and m is not zero, f is a denormalized number:336//337// s -126338// f = (-1) * 2 * 0.m339//340// If e and m are both zero, f is zero:341//342// f = 0.0343//344// If e is 255, f is an "infinity" or "not a number" (NAN),345// depending on whether m is zero or not.346//347// Examples:348//349// 0 00000000 00000000000000000000000 = 0.0350// 0 01111110 00000000000000000000000 = 0.5351// 0 01111111 00000000000000000000000 = 1.0352// 0 10000000 00000000000000000000000 = 2.0353// 0 10000000 10000000000000000000000 = 3.0354// 1 10000101 11110000010000000000000 = -124.0625355// 0 11111111 00000000000000000000000 = +infinity356// 1 11111111 00000000000000000000000 = -infinity357// 0 11111111 10000000000000000000000 = NAN358// 1 11111111 11111111111111111111111 = NAN359//360// Representation of a half:361//362// Here is the bit-layout for a half number, h:363//364// 15 (msb)365// |366// | 14 10367// | | |368// | | | 9 0 (lsb)369// | | | | |370// X XXXXX XXXXXXXXXX371//372// s e m373//374// S is the sign-bit, e is the exponent and m is the significand.375//376// If e is between 1 and 30, h is a normalized number:377//378// s e-15379// h = (-1) * 2 * 1.m380//381// If e is 0, and m is not zero, h is a denormalized number:382//383// S -14384// h = (-1) * 2 * 0.m385//386// If e and m are both zero, h is zero:387//388// h = 0.0389//390// If e is 31, h is an "infinity" or "not a number" (NAN),391// depending on whether m is zero or not.392//393// Examples:394//395// 0 00000 0000000000 = 0.0396// 0 01110 0000000000 = 0.5397// 0 01111 0000000000 = 1.0398// 0 10000 0000000000 = 2.0399// 0 10000 1000000000 = 3.0400// 1 10101 1111000001 = -124.0625401// 0 11111 0000000000 = +infinity402// 1 11111 0000000000 = -infinity403// 0 11111 1000000000 = NAN404// 1 11111 1111111111 = NAN405//406// Conversion:407//408// Converting from a float to a half requires some non-trivial bit409// manipulations. In some cases, this makes conversion relatively410// slow, but the most common case is accelerated via table lookups.411//412// Converting back from a half to a float is easier because we don't413// have to do any rounding. In addition, there are only 65536414// different half numbers; we can convert each of those numbers once415// and store the results in a table. Later, all conversions can be416// done using only simple table lookups.417//418//---------------------------------------------------------------------------419420421//--------------------422// Simple constructors423//--------------------424425inline426half::half ()427{428// no initialization429}430431432//----------------------------433// Half-from-float constructor434//----------------------------435436inline437half::half (float f)438{439uif x;440441x.f = f;442443if (f == 0)444{445//446// Common special case - zero.447// Preserve the zero's sign bit.448//449450_h = (x.i >> 16);451}452else453{454//455// We extract the combined sign and exponent, e, from our456// floating-point number, f. Then we convert e to the sign457// and exponent of the half number via a table lookup.458//459// For the most common case, where a normalized half is produced,460// the table lookup returns a non-zero value; in this case, all461// we have to do is round f's significand to 10 bits and combine462// the result with e.463//464// For all other cases (overflow, zeroes, denormalized numbers465// resulting from underflow, infinities and NANs), the table466// lookup returns zero, and we call a longer, non-inline function467// to do the float-to-half conversion.468//469470register int e = (x.i >> 23) & 0x000001ff;471472e = _eLut[e];473474if (e)475{476//477// Simple case - round the significand, m, to 10478// bits and combine it with the sign and exponent.479//480481register int m = x.i & 0x007fffff;482_h = e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);483}484else485{486//487// Difficult case - call a function.488//489490_h = convert (x.i);491}492}493}494495496//------------------------------------------497// Half-to-float conversion via table lookup498//------------------------------------------499500inline501half::operator float () const502{503return _toFloat[_h].f;504}505506507//-------------------------508// Round to n-bit precision509//-------------------------510511inline half512half::round (unsigned int n) const513{514//515// Parameter check.516//517518if (n >= 10)519return *this;520521//522// Disassemble h into the sign, s,523// and the combined exponent and significand, e.524//525526unsigned short s = _h & 0x8000;527unsigned short e = _h & 0x7fff;528529//530// Round the exponent and significand to the nearest value531// where ones occur only in the (10-n) most significant bits.532// Note that the exponent adjusts automatically if rounding533// up causes the significand to overflow.534//535536e >>= 9 - n;537e += e & 1;538e <<= 9 - n;539540//541// Check for exponent overflow.542//543544if (e >= 0x7c00)545{546//547// Overflow occurred -- truncate instead of rounding.548//549550e = _h;551e >>= 10 - n;552e <<= 10 - n;553}554555//556// Put the original sign bit back.557//558559half h;560h._h = s | e;561562return h;563}564565566//-----------------------567// Other inline functions568//-----------------------569570inline half571half::operator - () const572{573half h;574h._h = _h ^ 0x8000;575return h;576}577578579inline half &580half::operator = (half h)581{582_h = h._h;583return *this;584}585586587inline half &588half::operator = (float f)589{590*this = half (f);591return *this;592}593594595inline half &596half::operator += (half h)597{598*this = half (float (*this) + float (h));599return *this;600}601602603inline half &604half::operator += (float f)605{606*this = half (float (*this) + f);607return *this;608}609610611inline half &612half::operator -= (half h)613{614*this = half (float (*this) - float (h));615return *this;616}617618619inline half &620half::operator -= (float f)621{622*this = half (float (*this) - f);623return *this;624}625626627inline half &628half::operator *= (half h)629{630*this = half (float (*this) * float (h));631return *this;632}633634635inline half &636half::operator *= (float f)637{638*this = half (float (*this) * f);639return *this;640}641642643inline half &644half::operator /= (half h)645{646*this = half (float (*this) / float (h));647return *this;648}649650651inline half &652half::operator /= (float f)653{654*this = half (float (*this) / f);655return *this;656}657658659inline bool660half::isFinite () const661{662unsigned short e = (_h >> 10) & 0x001f;663return e < 31;664}665666667inline bool668half::isNormalized () const669{670unsigned short e = (_h >> 10) & 0x001f;671return e > 0 && e < 31;672}673674675inline bool676half::isDenormalized () const677{678unsigned short e = (_h >> 10) & 0x001f;679unsigned short m = _h & 0x3ff;680return e == 0 && m != 0;681}682683684inline bool685half::isZero () const686{687return (_h & 0x7fff) == 0;688}689690691inline bool692half::isNan () const693{694unsigned short e = (_h >> 10) & 0x001f;695unsigned short m = _h & 0x3ff;696return e == 31 && m != 0;697}698699700inline bool701half::isInfinity () const702{703unsigned short e = (_h >> 10) & 0x001f;704unsigned short m = _h & 0x3ff;705return e == 31 && m == 0;706}707708709inline bool710half::isNegative () const711{712return (_h & 0x8000) != 0;713}714715716inline half717half::posInf ()718{719half h;720h._h = 0x7c00;721return h;722}723724725inline half726half::negInf ()727{728half h;729h._h = 0xfc00;730return h;731}732733734inline half735half::qNan ()736{737half h;738h._h = 0x7fff;739return h;740}741742743inline half744half::sNan ()745{746half h;747h._h = 0x7dff;748return h;749}750751752inline unsigned short753half::bits () const754{755return _h;756}757758759inline void760half::setBits (unsigned short bits)761{762_h = bits;763}764765#endif766767768