Path: blob/master/libs/compiler-rt/lib/builtins/udivmoddi4.c
4395 views
/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===1*2* The LLVM Compiler Infrastructure3*4* This file is dual licensed under the MIT and the University of Illinois Open5* Source Licenses. See LICENSE.TXT for details.6*7* ===----------------------------------------------------------------------===8*9* This file implements __udivmoddi4 for the compiler_rt library.10*11* ===----------------------------------------------------------------------===12*/1314#include "int_lib.h"1516/* Effects: if rem != 0, *rem = a % b17* Returns: a / b18*/1920/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */2122COMPILER_RT_ABI du_int23__udivmoddi4(du_int a, du_int b, du_int* rem)24{25const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;26const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;27udwords n;28n.all = a;29udwords d;30d.all = b;31udwords q;32udwords r;33unsigned sr;34/* special cases, X is unknown, K != 0 */35if (n.s.high == 0)36{37if (d.s.high == 0)38{39/* 0 X40* ---41* 0 X42*/43if (rem)44*rem = n.s.low % d.s.low;45return n.s.low / d.s.low;46}47/* 0 X48* ---49* K X50*/51if (rem)52*rem = n.s.low;53return 0;54}55/* n.s.high != 0 */56if (d.s.low == 0)57{58if (d.s.high == 0)59{60/* K X61* ---62* 0 063*/64if (rem)65*rem = n.s.high % d.s.low;66return n.s.high / d.s.low;67}68/* d.s.high != 0 */69if (n.s.low == 0)70{71/* K 072* ---73* K 074*/75if (rem)76{77r.s.high = n.s.high % d.s.high;78r.s.low = 0;79*rem = r.all;80}81return n.s.high / d.s.high;82}83/* K K84* ---85* K 086*/87if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */88{89if (rem)90{91r.s.low = n.s.low;92r.s.high = n.s.high & (d.s.high - 1);93*rem = r.all;94}95return n.s.high >> __builtin_ctz(d.s.high);96}97/* K K98* ---99* K 0100*/101sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);102/* 0 <= sr <= n_uword_bits - 2 or sr large */103if (sr > n_uword_bits - 2)104{105if (rem)106*rem = n.all;107return 0;108}109++sr;110/* 1 <= sr <= n_uword_bits - 1 */111/* q.all = n.all << (n_udword_bits - sr); */112q.s.low = 0;113q.s.high = n.s.low << (n_uword_bits - sr);114/* r.all = n.all >> sr; */115r.s.high = n.s.high >> sr;116r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);117}118else /* d.s.low != 0 */119{120if (d.s.high == 0)121{122/* K X123* ---124* 0 K125*/126if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */127{128if (rem)129*rem = n.s.low & (d.s.low - 1);130if (d.s.low == 1)131return n.all;132sr = __builtin_ctz(d.s.low);133q.s.high = n.s.high >> sr;134q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);135return q.all;136}137/* K X138* ---139* 0 K140*/141sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);142/* 2 <= sr <= n_udword_bits - 1143* q.all = n.all << (n_udword_bits - sr);144* r.all = n.all >> sr;145*/146if (sr == n_uword_bits)147{148q.s.low = 0;149q.s.high = n.s.low;150r.s.high = 0;151r.s.low = n.s.high;152}153else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1154{155q.s.low = 0;156q.s.high = n.s.low << (n_uword_bits - sr);157r.s.high = n.s.high >> sr;158r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);159}160else // n_uword_bits + 1 <= sr <= n_udword_bits - 1161{162q.s.low = n.s.low << (n_udword_bits - sr);163q.s.high = (n.s.high << (n_udword_bits - sr)) |164(n.s.low >> (sr - n_uword_bits));165r.s.high = 0;166r.s.low = n.s.high >> (sr - n_uword_bits);167}168}169else170{171/* K X172* ---173* K K174*/175sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);176/* 0 <= sr <= n_uword_bits - 1 or sr large */177if (sr > n_uword_bits - 1)178{179if (rem)180*rem = n.all;181return 0;182}183++sr;184/* 1 <= sr <= n_uword_bits */185/* q.all = n.all << (n_udword_bits - sr); */186q.s.low = 0;187if (sr == n_uword_bits)188{189q.s.high = n.s.low;190r.s.high = 0;191r.s.low = n.s.high;192}193else194{195q.s.high = n.s.low << (n_uword_bits - sr);196r.s.high = n.s.high >> sr;197r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);198}199}200}201/* Not a special case202* q and r are initialized with:203* q.all = n.all << (n_udword_bits - sr);204* r.all = n.all >> sr;205* 1 <= sr <= n_udword_bits - 1206*/207su_int carry = 0;208for (; sr > 0; --sr)209{210/* r:q = ((r:q) << 1) | carry */211r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1));212r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1));213q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1));214q.s.low = (q.s.low << 1) | carry;215/* carry = 0;216* if (r.all >= d.all)217* {218* r.all -= d.all;219* carry = 1;220* }221*/222const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);223carry = s & 1;224r.all -= d.all & s;225}226q.all = (q.all << 1) | carry;227if (rem)228*rem = r.all;229return q.all;230}231232233