///////////////////////////////////////////////////////////////////////////1//2// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas3// Digital Ltd. LLC4//5// All rights reserved.6//7// Redistribution and use in source and binary forms, with or without8// modification, are permitted provided that the following conditions are9// met:10// * Redistributions of source code must retain the above copyright11// notice, this list of conditions and the following disclaimer.12// * Redistributions in binary form must reproduce the above13// copyright notice, this list of conditions and the following disclaimer14// in the documentation and/or other materials provided with the15// distribution.16// * Neither the name of Industrial Light & Magic nor the names of17// its contributors may be used to endorse or promote products derived18// from this software without specific prior written permission.19//20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.31//32///////////////////////////////////////////////////////////////////////////3334// Primary authors:35// Florian Kainz <[email protected]>36// Rod Bogart <[email protected]>373839//---------------------------------------------------------------------------40//41// class half --42// implementation of non-inline members43//44//---------------------------------------------------------------------------4546#include <assert.h>47#include "half.h"4849using namespace std;5051//-------------------------------------------------------------52// Lookup tables for half-to-float and float-to-half conversion53//-------------------------------------------------------------5455HALF_EXPORT_CONST half::uif half::_toFloat[1 << 16] =56#include "toFloat.h"57HALF_EXPORT_CONST unsigned short half::_eLut[1 << 9] =58#include "eLut.h"596061//-----------------------------------------------62// Overflow handler for float-to-half conversion;63// generates a hardware floating-point overflow,64// which may be trapped by the operating system.65//-----------------------------------------------6667float68half::overflow ()69{70volatile float f = 1e10;7172for (int i = 0; i < 10; i++)73f *= f; // this will overflow before74// the for�loop terminates75return f;76}777879//-----------------------------------------------------80// Float-to-half conversion -- general case, including81// zeroes, denormalized numbers and exponent overflows.82//-----------------------------------------------------8384short85half::convert (int i)86{87//88// Our floating point number, f, is represented by the bit89// pattern in integer i. Disassemble that bit pattern into90// the sign, s, the exponent, e, and the significand, m.91// Shift s into the position where it will go in in the92// resulting half number.93// Adjust e, accounting for the different exponent bias94// of float and half (127 versus 15).95//9697register int s = (i >> 16) & 0x00008000;98register int e = ((i >> 23) & 0x000000ff) - (127 - 15);99register int m = i & 0x007fffff;100101//102// Now reassemble s, e and m into a half:103//104105if (e <= 0)106{107if (e < -10)108{109//110// E is less than -10. The absolute value of f is111// less than HALF_MIN (f may be a small normalized112// float, a denormalized float or a zero).113//114// We convert f to a half zero with the same sign as f.115//116117return s;118}119120//121// E is between -10 and 0. F is a normalized float122// whose magnitude is less than HALF_NRM_MIN.123//124// We convert f to a denormalized half.125//126127//128// Add an explicit leading 1 to the significand.129//130131m = m | 0x00800000;132133//134// Round to m to the nearest (10+e)-bit value (with e between135// -10 and 0); in case of a tie, round to the nearest even value.136//137// Rounding may cause the significand to overflow and make138// our number normalized. Because of the way a half's bits139// are laid out, we don't have to treat this case separately;140// the code below will handle it correctly.141//142143int t = 14 - e;144int a = (1 << (t - 1)) - 1;145int b = (m >> t) & 1;146147m = (m + a + b) >> t;148149//150// Assemble the half from s, e (zero) and m.151//152153return s | m;154}155else if (e == 0xff - (127 - 15))156{157if (m == 0)158{159//160// F is an infinity; convert f to a half161// infinity with the same sign as f.162//163164return s | 0x7c00;165}166else167{168//169// F is a NAN; we produce a half NAN that preserves170// the sign bit and the 10 leftmost bits of the171// significand of f, with one exception: If the 10172// leftmost bits are all zero, the NAN would turn173// into an infinity, so we have to set at least one174// bit in the significand.175//176177m >>= 13;178return s | 0x7c00 | m | (m == 0);179}180}181else182{183//184// E is greater than zero. F is a normalized float.185// We try to convert f to a normalized half.186//187188//189// Round to m to the nearest 10-bit value. In case of190// a tie, round to the nearest even value.191//192193m = m + 0x00000fff + ((m >> 13) & 1);194195if (m & 0x00800000)196{197m = 0; // overflow in significand,198e += 1; // adjust exponent199}200201//202// Handle exponent overflow203//204205if (e > 30)206{207overflow (); // Cause a hardware floating point overflow;208return s | 0x7c00; // if this returns, the half becomes an209} // infinity with the same sign as f.210211//212// Assemble the half from s, e and m.213//214215return s | (e << 10) | (m >> 13);216}217}218219220//---------------------221// Stream I/O operators222//---------------------223224ostream &225operator << (ostream &os, half h)226{227os << float (h);228return os;229}230231232istream &233operator >> (istream &is, half &h)234{235float f;236is >> f;237h = half (f);238return is;239}240241242//---------------------------------------243// Functions to print the bit-layout of244// floats and halfs, mostly for debugging245//---------------------------------------246247void248printBits (ostream &os, half h)249{250unsigned short b = h.bits();251252for (int i = 15; i >= 0; i--)253{254os << (((b >> i) & 1)? '1': '0');255256if (i == 15 || i == 10)257os << ' ';258}259}260261262void263printBits (ostream &os, float f)264{265half::uif x;266x.f = f;267268for (int i = 31; i >= 0; i--)269{270os << (((x.i >> i) & 1)? '1': '0');271272if (i == 31 || i == 23)273os << ' ';274}275}276277278void279printBits (char c[19], half h)280{281unsigned short b = h.bits();282283for (int i = 15, j = 0; i >= 0; i--, j++)284{285c[j] = (((b >> i) & 1)? '1': '0');286287if (i == 15 || i == 10)288c[++j] = ' ';289}290291c[18] = 0;292}293294295void296printBits (char c[35], float f)297{298half::uif x;299x.f = f;300301for (int i = 31, j = 0; i >= 0; i--, j++)302{303c[j] = (((x.i >> i) & 1)? '1': '0');304305if (i == 31 || i == 23)306c[++j] = ' ';307}308309c[34] = 0;310}311312313