Path: blob/main/contrib/llvm-project/compiler-rt/lib/builtins/arm/addsf3.S
35291 views
//===-- addsf3.S - Adds two single precision floating pointer numbers-----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the __addsf3 (single precision floating pointer number9// addition with the IEEE-754 default rounding (to nearest, ties to even)10// function for the ARM Thumb1 ISA.11//12//===----------------------------------------------------------------------===//1314#include "../assembly.h"15#define significandBits 2316#define typeWidth 321718.syntax unified19.text20.thumb21.p2align 22223DEFINE_AEABI_FUNCTION_ALIAS(__aeabi_fadd, __addsf3)2425DEFINE_COMPILERRT_THUMB_FUNCTION(__addsf3)26push {r4, r5, r6, r7, lr}27// Get the absolute value of a and b.28lsls r2, r0, #129lsls r3, r1, #130lsrs r2, r2, #1 // aAbs31beq LOCAL_LABEL(a_zero_nan_inf)32lsrs r3, r3, #1 // bAbs33beq LOCAL_LABEL(zero_nan_inf)3435// Detect if a or b is infinity or Nan.36lsrs r6, r2, #(significandBits)37lsrs r7, r3, #(significandBits)38cmp r6, #0xFF39beq LOCAL_LABEL(zero_nan_inf)40cmp r7, #0xFF41beq LOCAL_LABEL(zero_nan_inf)4243// Swap Rep and Abs so that a and aAbs has the larger absolute value.44cmp r2, r345bhs LOCAL_LABEL(no_swap)46movs r4, r047movs r5, r248movs r0, r149movs r2, r350movs r1, r451movs r3, r552LOCAL_LABEL(no_swap):5354// Get the significands and shift them to give us round, guard and sticky.55lsls r4, r0, #(typeWidth - significandBits)56lsrs r4, r4, #(typeWidth - significandBits - 3) // aSignificand << 357lsls r5, r1, #(typeWidth - significandBits)58lsrs r5, r5, #(typeWidth - significandBits - 3) // bSignificand << 35960// Get the implicitBit.61movs r6, #162lsls r6, r6, #(significandBits + 3)6364// Get aExponent and set implicit bit if necessary.65lsrs r2, r2, #(significandBits)66beq LOCAL_LABEL(a_done_implicit_bit)67orrs r4, r668LOCAL_LABEL(a_done_implicit_bit):6970// Get bExponent and set implicit bit if necessary.71lsrs r3, r3, #(significandBits)72beq LOCAL_LABEL(b_done_implicit_bit)73orrs r5, r674LOCAL_LABEL(b_done_implicit_bit):7576// Get the difference in exponents.77subs r6, r2, r378beq LOCAL_LABEL(done_align)7980// If b is denormal, then a must be normal as align > 0, and we only need to81// right shift bSignificand by (align - 1) bits.82cmp r3, #083bne 1f84subs r6, r6, #1851:8687// No longer needs bExponent. r3 is dead here.88// Set sticky bits of b: sticky = bSignificand << (typeWidth - align).89movs r3, #(typeWidth)90subs r3, r3, r691movs r7, r592lsls r7, r393beq 1f94movs r7, #1951:9697// bSignificand = bSignificand >> align | sticky;98lsrs r5, r699orrs r5, r7100bne LOCAL_LABEL(done_align)101movs r5, #1 // sticky; b is known to be non-zero.102103LOCAL_LABEL(done_align):104// isSubtraction = (aRep ^ bRep) >> 31;105movs r7, r0106eors r7, r1107lsrs r7, #31108bne LOCAL_LABEL(do_substraction)109110// Same sign, do Addition.111112// aSignificand += bSignificand;113adds r4, r4, r5114115// Check carry bit.116movs r6, #1117lsls r6, r6, #(significandBits + 3 + 1)118movs r7, r4119ands r7, r6120beq LOCAL_LABEL(form_result)121// If the addition carried up, we need to right-shift the result and122// adjust the exponent.123movs r7, r4124movs r6, #1125ands r7, r6 // sticky = aSignificand & 1;126lsrs r4, #1127orrs r4, r7 // result Significand128adds r2, #1 // result Exponent129// If we have overflowed the type, return +/- infinity.130cmp r2, 0xFF131beq LOCAL_LABEL(ret_inf)132133LOCAL_LABEL(form_result):134// Shift the sign, exponent and significand into place.135lsrs r0, #(typeWidth - 1)136lsls r0, #(typeWidth - 1) // Get Sign.137lsls r2, #(significandBits)138orrs r0, r2139movs r1, r4140lsls r4, #(typeWidth - significandBits - 3)141lsrs r4, #(typeWidth - significandBits)142orrs r0, r4143144// Final rounding. The result may overflow to infinity, but that is the145// correct result in that case.146// roundGuardSticky = aSignificand & 0x7;147movs r2, #0x7148ands r1, r2149// if (roundGuardSticky > 0x4) result++;150151cmp r1, #0x4152blt LOCAL_LABEL(done_round)153beq 1f154adds r0, #1155pop {r4, r5, r6, r7, pc}1561:157158// if (roundGuardSticky == 0x4) result += result & 1;159movs r1, r0160lsrs r1, #1161bcc LOCAL_LABEL(done_round)162adds r0, r0, #1163LOCAL_LABEL(done_round):164pop {r4, r5, r6, r7, pc}165166LOCAL_LABEL(do_substraction):167subs r4, r4, r5 // aSignificand -= bSignificand;168beq LOCAL_LABEL(ret_zero)169movs r6, r4170cmp r2, 0171beq LOCAL_LABEL(form_result) // if a's exp is 0, no need to normalize.172// If partial cancellation occured, we need to left-shift the result173// and adjust the exponent:174lsrs r6, r6, #(significandBits + 3)175bne LOCAL_LABEL(form_result)176177push {r0, r1, r2, r3}178movs r0, r4179bl SYMBOL_NAME(__clzsi2)180movs r5, r0181pop {r0, r1, r2, r3}182// shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);183subs r5, r5, #(typeWidth - significandBits - 3 - 1)184// aSignificand <<= shift; aExponent -= shift;185lsls r4, r5186subs r2, r2, r5187bgt LOCAL_LABEL(form_result)188189// Do normalization if aExponent <= 0.190movs r6, #1191subs r6, r6, r2 // 1 - aExponent;192movs r2, #0 // aExponent = 0;193movs r3, #(typeWidth) // bExponent is dead.194subs r3, r3, r6195movs r7, r4196lsls r7, r3 // stickyBit = (bool)(aSignificant << (typeWidth - align))197beq 1f198movs r7, #11991:200lsrs r4, r6 // aSignificand >> shift201orrs r4, r7202b LOCAL_LABEL(form_result)203204LOCAL_LABEL(ret_zero):205movs r0, #0206pop {r4, r5, r6, r7, pc}207208209LOCAL_LABEL(a_zero_nan_inf):210lsrs r3, r3, #1211212LOCAL_LABEL(zero_nan_inf):213// Here r2 has aAbs, r3 has bAbs214movs r4, #0xFF215lsls r4, r4, #(significandBits) // Make +inf.216217cmp r2, r4218bhi LOCAL_LABEL(a_is_nan)219cmp r3, r4220bhi LOCAL_LABEL(b_is_nan)221222cmp r2, r4223bne LOCAL_LABEL(a_is_rational)224// aAbs is INF.225eors r1, r0 // aRep ^ bRep.226movs r6, #1227lsls r6, r6, #(typeWidth - 1) // get sign mask.228cmp r1, r6 // if they only differ on sign bit, it's -INF + INF229beq LOCAL_LABEL(a_is_nan)230pop {r4, r5, r6, r7, pc}231232LOCAL_LABEL(a_is_rational):233cmp r3, r4234bne LOCAL_LABEL(b_is_rational)235movs r0, r1236pop {r4, r5, r6, r7, pc}237238LOCAL_LABEL(b_is_rational):239// either a or b or both are zero.240adds r4, r2, r3241beq LOCAL_LABEL(both_zero)242cmp r2, #0 // is absA 0 ?243beq LOCAL_LABEL(ret_b)244pop {r4, r5, r6, r7, pc}245246LOCAL_LABEL(both_zero):247ands r0, r1 // +0 + -0 = +0248pop {r4, r5, r6, r7, pc}249250LOCAL_LABEL(ret_b):251movs r0, r1252253LOCAL_LABEL(ret):254pop {r4, r5, r6, r7, pc}255256LOCAL_LABEL(b_is_nan):257movs r0, r1258LOCAL_LABEL(a_is_nan):259movs r1, #1260lsls r1, r1, #(significandBits -1) // r1 is quiet bit.261orrs r0, r1262pop {r4, r5, r6, r7, pc}263264LOCAL_LABEL(ret_inf):265movs r4, #0xFF266lsls r4, r4, #(significandBits)267orrs r0, r4268lsrs r0, r0, #(significandBits)269lsls r0, r0, #(significandBits)270pop {r4, r5, r6, r7, pc}271272273END_COMPILERRT_FUNCTION(__addsf3)274275NO_EXEC_STACK_DIRECTIVE276277278