Path: blob/main/sys/contrib/openzfs/module/os/linux/spl/spl-math-compat.c
178701 views
// SPDX-License-Identifier: GPL-2.0-or-later1/*2* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.3* Copyright (C) 2007 The Regents of the University of California.4* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).5* Written by Brian Behlendorf <[email protected]>.6* UCRL-CODE-2351977*8* This file is part of the SPL, Solaris Porting Layer.9*10* The SPL is free software; you can redistribute it and/or modify it11* under the terms of the GNU General Public License as published by the12* Free Software Foundation; either version 2 of the License, or (at your13* option) any later version.14*15* The SPL is distributed in the hope that it will be useful, but WITHOUT16* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or17* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License18* for more details.19*20* You should have received a copy of the GNU General Public License along21* with the SPL. If not, see <http://www.gnu.org/licenses/>.22*23* Solaris Porting Layer (SPL) Generic Implementation.24*/2526#include <sys/isa_defs.h>27#include <sys/sysmacros.h>2829/*30* 64-bit math support for 32-bit platforms. Compilers will generatee31* references to the functions here if required.32*/3334#if BITS_PER_LONG == 323536/*37* Support 64/64 => 64 division on a 32-bit platform. While the kernel38* provides a div64_u64() function for this we do not use it because the39* implementation is flawed. There are cases which return incorrect40* results as late as linux-2.6.35. Until this is fixed upstream the41* spl must provide its own implementation.42*43* This implementation is a slightly modified version of the algorithm44* proposed by the book 'Hacker's Delight'. The original source can be45* found here and is available for use without restriction.46*47* http://www.hackersdelight.org/HDcode/newCode/divDouble.c48*/4950/*51* Calculate number of leading of zeros for a 64-bit value.52*/53static int54nlz64(uint64_t x)55{56register int n = 0;5758if (x == 0)59return (64);6061if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; }62if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; }63if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; }64if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; }65if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; }66if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; }6768return (n);69}7071/*72* Newer kernels have a div_u64() function but we define our own73* to simplify portability between kernel versions.74*/75static inline uint64_t76__div_u64(uint64_t u, uint32_t v)77{78(void) do_div(u, v);79return (u);80}8182/*83* Implementation of 64-bit unsigned division for 32-bit machines.84*85* First the procedure takes care of the case in which the divisor is a86* 32-bit quantity. There are two subcases: (1) If the left half of the87* dividend is less than the divisor, one execution of do_div() is all that88* is required (overflow is not possible). (2) Otherwise it does two89* divisions, using the grade school method.90*/91uint64_t92__udivdi3(uint64_t u, uint64_t v)93{94uint64_t u0, u1, v1, q0, q1, k;95int n;9697if (v >> 32 == 0) { // If v < 2**32:98if (u >> 32 < v) { // If u/v cannot overflow,99return (__div_u64(u, v)); // just do one division.100} else { // If u/v would overflow:101u1 = u >> 32; // Break u into two halves.102u0 = u & 0xFFFFFFFF;103q1 = __div_u64(u1, v); // First quotient digit.104k = u1 - q1 * v; // First remainder, < v.105u0 += (k << 32);106q0 = __div_u64(u0, v); // Seconds quotient digit.107return ((q1 << 32) + q0);108}109} else { // If v >= 2**32:110n = nlz64(v); // 0 <= n <= 31.111v1 = (v << n) >> 32; // Normalize divisor, MSB is 1.112u1 = u >> 1; // To ensure no overflow.113q1 = __div_u64(u1, v1); // Get quotient from114q0 = (q1 << n) >> 31; // Undo normalization and115// division of u by 2.116if (q0 != 0) // Make q0 correct or117q0 = q0 - 1; // too small by 1.118if ((u - q0 * v) >= v)119q0 = q0 + 1; // Now q0 is correct.120121return (q0);122}123}124EXPORT_SYMBOL(__udivdi3);125126#ifndef abs64127/* CSTYLED */128#define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; })129#endif130131/*132* Implementation of 64-bit signed division for 32-bit machines.133*/134int64_t135__divdi3(int64_t u, int64_t v)136{137int64_t q, t;138q = __udivdi3(abs64(u), abs64(v));139t = (u ^ v) >> 63; // If u, v have different140return ((q ^ t) - t); // signs, negate q.141}142EXPORT_SYMBOL(__divdi3);143144/*145* Implementation of 64-bit unsigned modulo for 32-bit machines.146*/147uint64_t148__umoddi3(uint64_t dividend, uint64_t divisor)149{150return (dividend - (divisor * __udivdi3(dividend, divisor)));151}152EXPORT_SYMBOL(__umoddi3);153154/* 64-bit signed modulo for 32-bit machines. */155int64_t156__moddi3(int64_t n, int64_t d)157{158int64_t q;159boolean_t nn = B_FALSE;160161if (n < 0) {162nn = B_TRUE;163n = -n;164}165if (d < 0)166d = -d;167168q = __umoddi3(n, d);169170return (nn ? -q : q);171}172EXPORT_SYMBOL(__moddi3);173174/*175* Implementation of 64-bit unsigned division/modulo for 32-bit machines.176*/177uint64_t178__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)179{180uint64_t q = __udivdi3(n, d);181if (r)182*r = n - d * q;183return (q);184}185EXPORT_SYMBOL(__udivmoddi4);186187/*188* Implementation of 64-bit signed division/modulo for 32-bit machines.189*/190int64_t191__divmoddi4(int64_t n, int64_t d, int64_t *r)192{193int64_t q, rr;194boolean_t nn = B_FALSE;195boolean_t nd = B_FALSE;196if (n < 0) {197nn = B_TRUE;198n = -n;199}200if (d < 0) {201nd = B_TRUE;202d = -d;203}204205q = __udivmoddi4(n, d, (uint64_t *)&rr);206207if (nn != nd)208q = -q;209if (nn)210rr = -rr;211if (r)212*r = rr;213return (q);214}215EXPORT_SYMBOL(__divmoddi4);216217#if defined(__arm) || defined(__arm__)218/*219* Implementation of 64-bit (un)signed division for 32-bit arm machines.220*221* Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned)222* long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1},223* and the remainder in {r2, r3}. The return type is specifically left224* set to 'void' to ensure the compiler does not overwrite these registers225* during the return. All results are in registers as per ABI226*/227void228__aeabi_uldivmod(uint64_t u, uint64_t v)229{230uint64_t res;231uint64_t mod;232233res = __udivdi3(u, v);234mod = __umoddi3(u, v);235{236register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);237register uint32_t r1 asm("r1") = (res >> 32);238register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);239register uint32_t r3 asm("r3") = (mod >> 32);240241asm volatile(""242: "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */243: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */244245return; /* r0; */246}247}248EXPORT_SYMBOL(__aeabi_uldivmod);249250void251__aeabi_ldivmod(int64_t u, int64_t v)252{253int64_t res;254uint64_t mod;255256res = __divdi3(u, v);257mod = __umoddi3(u, v);258{259register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF);260register uint32_t r1 asm("r1") = (res >> 32);261register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF);262register uint32_t r3 asm("r3") = (mod >> 32);263264asm volatile(""265: "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */266: "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */267268return; /* r0; */269}270}271EXPORT_SYMBOL(__aeabi_ldivmod);272#endif /* __arm || __arm__ */273274#endif /* BITS_PER_LONG */275276277