/*******************************************************************************1* Copyright (c) 2019-2020 The Khronos Group Inc.2*3* Licensed under the Apache License, Version 2.0 (the "License");4* you may not use this file except in compliance with the License.5* You may obtain a copy of the License at6*7* http://www.apache.org/licenses/LICENSE-2.08*9* Unless required by applicable law or agreed to in writing, software10* distributed under the License is distributed on an "AS IS" BASIS,11* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.12* See the License for the specific language governing permissions and13* limitations under the License.14******************************************************************************/1516/**17* This is a header-only utility library that provides OpenCL host code with18* routines for converting to/from cl_half values.19*20* Example usage:21*22* #include <CL/cl_half.h>23* ...24* cl_half h = cl_half_from_float(0.5f, CL_HALF_RTE);25* cl_float f = cl_half_to_float(h);26*/2728#ifndef OPENCL_CL_HALF_H29#define OPENCL_CL_HALF_H3031#include <CL/cl_platform.h>3233#include <stdint.h>3435#ifdef __cplusplus36extern "C" {37#endif383940/**41* Rounding mode used when converting to cl_half.42*/43typedef enum44{45CL_HALF_RTE, // round to nearest even46CL_HALF_RTZ, // round towards zero47CL_HALF_RTP, // round towards positive infinity48CL_HALF_RTN, // round towards negative infinity49} cl_half_rounding_mode;505152/* Private utility macros. */53#define CL_HALF_EXP_MASK 0x7C0054#define CL_HALF_MAX_FINITE_MAG 0x7BFF555657/*58* Utility to deal with values that overflow when converting to half precision.59*/60static inline cl_half cl_half_handle_overflow(cl_half_rounding_mode rounding_mode,61uint16_t sign)62{63if (rounding_mode == CL_HALF_RTZ)64{65// Round overflow towards zero -> largest finite number (preserving sign)66return (sign << 15) | CL_HALF_MAX_FINITE_MAG;67}68else if (rounding_mode == CL_HALF_RTP && sign)69{70// Round negative overflow towards positive infinity -> most negative finite number71return (1 << 15) | CL_HALF_MAX_FINITE_MAG;72}73else if (rounding_mode == CL_HALF_RTN && !sign)74{75// Round positive overflow towards negative infinity -> largest finite number76return CL_HALF_MAX_FINITE_MAG;77}7879// Overflow to infinity80return (sign << 15) | CL_HALF_EXP_MASK;81}8283/*84* Utility to deal with values that underflow when converting to half precision.85*/86static inline cl_half cl_half_handle_underflow(cl_half_rounding_mode rounding_mode,87uint16_t sign)88{89if (rounding_mode == CL_HALF_RTP && !sign)90{91// Round underflow towards positive infinity -> smallest positive value92return (sign << 15) | 1;93}94else if (rounding_mode == CL_HALF_RTN && sign)95{96// Round underflow towards negative infinity -> largest negative value97return (sign << 15) | 1;98}99100// Flush to zero101return (sign << 15);102}103104105/**106* Convert a cl_float to a cl_half.107*/108static inline cl_half cl_half_from_float(cl_float f, cl_half_rounding_mode rounding_mode)109{110// Type-punning to get direct access to underlying bits111union112{113cl_float f;114uint32_t i;115} f32;116f32.f = f;117118// Extract sign bit119uint16_t sign = f32.i >> 31;120121// Extract FP32 exponent and mantissa122uint32_t f_exp = (f32.i >> (CL_FLT_MANT_DIG - 1)) & 0xFF;123uint32_t f_mant = f32.i & ((1 << (CL_FLT_MANT_DIG - 1)) - 1);124125// Remove FP32 exponent bias126int32_t exp = f_exp - CL_FLT_MAX_EXP + 1;127128// Add FP16 exponent bias129uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);130131// Position of the bit that will become the FP16 mantissa LSB132uint32_t lsb_pos = CL_FLT_MANT_DIG - CL_HALF_MANT_DIG;133134// Check for NaN / infinity135if (f_exp == 0xFF)136{137if (f_mant)138{139// NaN -> propagate mantissa and silence it140uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);141h_mant |= 0x200;142return (sign << 15) | CL_HALF_EXP_MASK | h_mant;143}144else145{146// Infinity -> zero mantissa147return (sign << 15) | CL_HALF_EXP_MASK;148}149}150151// Check for zero152if (!f_exp && !f_mant)153{154return (sign << 15);155}156157// Check for overflow158if (exp >= CL_HALF_MAX_EXP)159{160return cl_half_handle_overflow(rounding_mode, sign);161}162163// Check for underflow164if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))165{166return cl_half_handle_underflow(rounding_mode, sign);167}168169// Check for value that will become denormal170if (exp < -14)171{172// Denormal -> include the implicit 1 from the FP32 mantissa173h_exp = 0;174f_mant |= 1 << (CL_FLT_MANT_DIG - 1);175176// Mantissa shift amount depends on exponent177lsb_pos = -exp + (CL_FLT_MANT_DIG - 25);178}179180// Generate FP16 mantissa by shifting FP32 mantissa181uint16_t h_mant = (uint16_t)(f_mant >> lsb_pos);182183// Check whether we need to round184uint32_t halfway = 1 << (lsb_pos - 1);185uint32_t mask = (halfway << 1) - 1;186switch (rounding_mode)187{188case CL_HALF_RTE:189if ((f_mant & mask) > halfway)190{191// More than halfway -> round up192h_mant += 1;193}194else if ((f_mant & mask) == halfway)195{196// Exactly halfway -> round to nearest even197if (h_mant & 0x1)198h_mant += 1;199}200break;201case CL_HALF_RTZ:202// Mantissa has already been truncated -> do nothing203break;204case CL_HALF_RTP:205if ((f_mant & mask) && !sign)206{207// Round positive numbers up208h_mant += 1;209}210break;211case CL_HALF_RTN:212if ((f_mant & mask) && sign)213{214// Round negative numbers down215h_mant += 1;216}217break;218}219220// Check for mantissa overflow221if (h_mant & 0x400)222{223h_exp += 1;224h_mant = 0;225}226227return (sign << 15) | (h_exp << 10) | h_mant;228}229230231/**232* Convert a cl_double to a cl_half.233*/234static inline cl_half cl_half_from_double(cl_double d, cl_half_rounding_mode rounding_mode)235{236// Type-punning to get direct access to underlying bits237union238{239cl_double d;240uint64_t i;241} f64;242f64.d = d;243244// Extract sign bit245uint16_t sign = f64.i >> 63;246247// Extract FP64 exponent and mantissa248uint64_t d_exp = (f64.i >> (CL_DBL_MANT_DIG - 1)) & 0x7FF;249uint64_t d_mant = f64.i & (((uint64_t)1 << (CL_DBL_MANT_DIG - 1)) - 1);250251// Remove FP64 exponent bias252int64_t exp = d_exp - CL_DBL_MAX_EXP + 1;253254// Add FP16 exponent bias255uint16_t h_exp = (uint16_t)(exp + CL_HALF_MAX_EXP - 1);256257// Position of the bit that will become the FP16 mantissa LSB258uint32_t lsb_pos = CL_DBL_MANT_DIG - CL_HALF_MANT_DIG;259260// Check for NaN / infinity261if (d_exp == 0x7FF)262{263if (d_mant)264{265// NaN -> propagate mantissa and silence it266uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);267h_mant |= 0x200;268return (sign << 15) | CL_HALF_EXP_MASK | h_mant;269}270else271{272// Infinity -> zero mantissa273return (sign << 15) | CL_HALF_EXP_MASK;274}275}276277// Check for zero278if (!d_exp && !d_mant)279{280return (sign << 15);281}282283// Check for overflow284if (exp >= CL_HALF_MAX_EXP)285{286return cl_half_handle_overflow(rounding_mode, sign);287}288289// Check for underflow290if (exp < (CL_HALF_MIN_EXP - CL_HALF_MANT_DIG - 1))291{292return cl_half_handle_underflow(rounding_mode, sign);293}294295// Check for value that will become denormal296if (exp < -14)297{298// Include the implicit 1 from the FP64 mantissa299h_exp = 0;300d_mant |= (uint64_t)1 << (CL_DBL_MANT_DIG - 1);301302// Mantissa shift amount depends on exponent303lsb_pos = (uint32_t)(-exp + (CL_DBL_MANT_DIG - 25));304}305306// Generate FP16 mantissa by shifting FP64 mantissa307uint16_t h_mant = (uint16_t)(d_mant >> lsb_pos);308309// Check whether we need to round310uint64_t halfway = (uint64_t)1 << (lsb_pos - 1);311uint64_t mask = (halfway << 1) - 1;312switch (rounding_mode)313{314case CL_HALF_RTE:315if ((d_mant & mask) > halfway)316{317// More than halfway -> round up318h_mant += 1;319}320else if ((d_mant & mask) == halfway)321{322// Exactly halfway -> round to nearest even323if (h_mant & 0x1)324h_mant += 1;325}326break;327case CL_HALF_RTZ:328// Mantissa has already been truncated -> do nothing329break;330case CL_HALF_RTP:331if ((d_mant & mask) && !sign)332{333// Round positive numbers up334h_mant += 1;335}336break;337case CL_HALF_RTN:338if ((d_mant & mask) && sign)339{340// Round negative numbers down341h_mant += 1;342}343break;344}345346// Check for mantissa overflow347if (h_mant & 0x400)348{349h_exp += 1;350h_mant = 0;351}352353return (sign << 15) | (h_exp << 10) | h_mant;354}355356357/**358* Convert a cl_half to a cl_float.359*/360static inline cl_float cl_half_to_float(cl_half h)361{362// Type-punning to get direct access to underlying bits363union364{365cl_float f;366uint32_t i;367} f32;368369// Extract sign bit370uint16_t sign = h >> 15;371372// Extract FP16 exponent and mantissa373uint16_t h_exp = (h >> (CL_HALF_MANT_DIG - 1)) & 0x1F;374uint16_t h_mant = h & 0x3FF;375376// Remove FP16 exponent bias377int32_t exp = h_exp - CL_HALF_MAX_EXP + 1;378379// Add FP32 exponent bias380uint32_t f_exp = exp + CL_FLT_MAX_EXP - 1;381382// Check for NaN / infinity383if (h_exp == 0x1F)384{385if (h_mant)386{387// NaN -> propagate mantissa and silence it388uint32_t f_mant = h_mant << (CL_FLT_MANT_DIG - CL_HALF_MANT_DIG);389f_mant |= 0x400000;390f32.i = (sign << 31) | 0x7F800000 | f_mant;391return f32.f;392}393else394{395// Infinity -> zero mantissa396f32.i = (sign << 31) | 0x7F800000;397return f32.f;398}399}400401// Check for zero / denormal402if (h_exp == 0)403{404if (h_mant == 0)405{406// Zero -> zero exponent407f_exp = 0;408}409else410{411// Denormal -> normalize it412// - Shift mantissa to make most-significant 1 implicit413// - Adjust exponent accordingly414uint32_t shift = 0;415while ((h_mant & 0x400) == 0)416{417h_mant <<= 1;418shift++;419}420h_mant &= 0x3FF;421f_exp -= shift - 1;422}423}424425f32.i = (sign << 31) | (f_exp << 23) | (h_mant << 13);426return f32.f;427}428429430#undef CL_HALF_EXP_MASK431#undef CL_HALF_MAX_FINITE_MAG432433434#ifdef __cplusplus435}436#endif437438439#endif /* OPENCL_CL_HALF_H */440441442