/*1* arch/alpha/lib/divide.S2*3* (C) 1995 Linus Torvalds4*5* Alpha division..6*/78/*9* The alpha chip doesn't provide hardware division, so we have to do it10* by hand. The compiler expects the functions11*12* __divqu: 64-bit unsigned long divide13* __remqu: 64-bit unsigned long remainder14* __divqs/__remqs: signed 64-bit15* __divlu/__remlu: unsigned 32-bit16* __divls/__remls: signed 32-bit17*18* These are not normal C functions: instead of the normal19* calling sequence, these expect their arguments in registers20* $24 and $25, and return the result in $27. Register $28 may21* be clobbered (assembly temporary), anything else must be saved.22*23* In short: painful.24*25* This is a rather simple bit-at-a-time algorithm: it's very good26* at dividing random 64-bit numbers, but the more usual case where27* the divisor is small is handled better by the DEC algorithm28* using lookup tables. This uses much less memory, though, and is29* nicer on the cache.. Besides, I don't know the copyright status30* of the DEC code.31*/3233/*34* My temporaries:35* $0 - current bit36* $1 - shifted divisor37* $2 - modulus/quotient38*39* $23 - return address40* $24 - dividend41* $25 - divisor42*43* $27 - quotient/modulus44* $28 - compare status45*/4647#define halt .long 04849/*50* Select function type and registers51*/52#define mask $053#define divisor $154#define compare $2855#define tmp1 $356#define tmp2 $45758#ifdef DIV59#define DIV_ONLY(x,y...) x,##y60#define MOD_ONLY(x,y...)61#define func(x) __div##x62#define modulus $263#define quotient $2764#define GETSIGN(x) xor $24,$25,x65#define STACK 4866#else67#define DIV_ONLY(x,y...)68#define MOD_ONLY(x,y...) x,##y69#define func(x) __rem##x70#define modulus $2771#define quotient $272#define GETSIGN(x) bis $24,$24,x73#define STACK 3274#endif7576/*77* For 32-bit operations, we need to extend to 64-bit78*/79#ifdef INTSIZE80#define ufunction func(lu)81#define sfunction func(l)82#define LONGIFY(x) zapnot x,15,x83#define SLONGIFY(x) addl x,0,x84#else85#define ufunction func(qu)86#define sfunction func(q)87#define LONGIFY(x)88#define SLONGIFY(x)89#endif9091.set noat92.align 393.globl ufunction94.ent ufunction95ufunction:96subq $30,STACK,$3097.frame $30,STACK,$2398.prologue 0991007: stq $1, 0($30)101bis $25,$25,divisor102stq $2, 8($30)103bis $24,$24,modulus104stq $0,16($30)105bis $31,$31,quotient106LONGIFY(divisor)107stq tmp1,24($30)108LONGIFY(modulus)109bis $31,1,mask110DIV_ONLY(stq tmp2,32($30))111beq divisor, 9f /* div by zero */112113#ifdef INTSIZE114/*115* shift divisor left, using 3-bit shifts for116* 32-bit divides as we can't overflow. Three-bit117* shifts will result in looping three times less118* here, but can result in two loops more later.119* Thus using a large shift isn't worth it (and120* s8add pairs better than a sll..)121*/1221: cmpult divisor,modulus,compare123s8addq divisor,$31,divisor124s8addq mask,$31,mask125bne compare,1b126#else1271: cmpult divisor,modulus,compare128blt divisor, 2f129addq divisor,divisor,divisor130addq mask,mask,mask131bne compare,1b132unop133#endif134135/* ok, start to go right again.. */1362: DIV_ONLY(addq quotient,mask,tmp2)137srl mask,1,mask138cmpule divisor,modulus,compare139subq modulus,divisor,tmp1140DIV_ONLY(cmovne compare,tmp2,quotient)141srl divisor,1,divisor142cmovne compare,tmp1,modulus143bne mask,2b1441459: ldq $1, 0($30)146ldq $2, 8($30)147ldq $0,16($30)148ldq tmp1,24($30)149DIV_ONLY(ldq tmp2,32($30))150addq $30,STACK,$30151ret $31,($23),1152.end ufunction153154/*155* Uhh.. Ugly signed division. I'd rather not have it at all, but156* it's needed in some circumstances. There are different ways to157* handle this, really. This does:158* -a / b = a / -b = -(a / b)159* -a % b = -(a % b)160* a % -b = a % b161* which is probably not the best solution, but at least should162* have the property that (x/y)*y + (x%y) = x.163*/164.align 3165.globl sfunction166.ent sfunction167sfunction:168subq $30,STACK,$30169.frame $30,STACK,$23170.prologue 0171bis $24,$25,$28172SLONGIFY($28)173bge $28,7b174stq $24,0($30)175subq $31,$24,$28176stq $25,8($30)177cmovlt $24,$28,$24 /* abs($24) */178stq $23,16($30)179subq $31,$25,$28180stq tmp1,24($30)181cmovlt $25,$28,$25 /* abs($25) */182unop183bsr $23,ufunction184ldq $24,0($30)185ldq $25,8($30)186GETSIGN($28)187subq $31,$27,tmp1188SLONGIFY($28)189ldq $23,16($30)190cmovlt $28,tmp1,$27191ldq tmp1,24($30)192addq $30,STACK,$30193ret $31,($23),1194.end sfunction195196197