Path: blob/master/libs/compiler-rt/lib/builtins/i386/moddi3.S
12349 views
// This file is dual licensed under the MIT and the University of Illinois Open1// Source Licenses. See LICENSE.TXT for details.23#include "../assembly.h"45// di_int __moddi3(di_int a, di_int b);67// result = remainder of a / b.8// both inputs and the output are 64-bit signed integers.9// This will do whatever the underlying hardware is set to do on division by zero.10// No other exceptions are generated, as the divide cannot overflow.11//12// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware13// on x86_64. The performance goal is ~40 cycles per divide, which is faster than14// currently possible via simulation of integer divides on the x87 unit.15//1617// Stephen Canon, December 20081819#ifdef __i386__2021.text22.balign 423DEFINE_COMPILERRT_FUNCTION(__moddi3)2425/* This is currently implemented by wrapping the unsigned modulus up in an absolute26value. This could certainly be improved upon. */2728pushl %esi29movl 20(%esp), %edx // high word of b30movl 16(%esp), %eax // low word of b31movl %edx, %ecx32sarl $31, %ecx // (b < 0) ? -1 : 033xorl %ecx, %eax34xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b35subl %ecx, %eax36sbbl %ecx, %edx // EDX:EAX = abs(b)37movl %edx, 20(%esp)38movl %eax, 16(%esp) // store abs(b) back to stack3940movl 12(%esp), %edx // high word of b41movl 8(%esp), %eax // low word of b42movl %edx, %ecx43sarl $31, %ecx // (a < 0) ? -1 : 044xorl %ecx, %eax45xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a46subl %ecx, %eax47sbbl %ecx, %edx // EDX:EAX = abs(a)48movl %edx, 12(%esp)49movl %eax, 8(%esp) // store abs(a) back to stack50movl %ecx, %esi // set aside sign of a5152pushl %ebx53movl 24(%esp), %ebx // Find the index i of the leading bit in b.54bsrl %ebx, %ecx // If the high word of b is zero, jump to55jz 9f // the code to handle that special case [9].5657/* High word of b is known to be non-zero on this branch */5859movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b6061shrl %cl, %eax // Practically, this means that bhi is given by:62shrl %eax //63notl %ecx // bhi = (high word of b) << (31 - i) |64shll %cl, %ebx // (low word of b) >> (1 + i)65orl %eax, %ebx //66movl 16(%esp), %edx // Load the high and low words of a, and jump67movl 12(%esp), %eax // to [2] if the high word is larger than bhi68cmpl %ebx, %edx // to avoid overflowing the upcoming divide.69jae 2f7071/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */7273divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r7475pushl %edi76notl %ecx77shrl %eax78shrl %cl, %eax // q = qs >> (1 + i)79movl %eax, %edi80mull 24(%esp) // q*blo81movl 16(%esp), %ebx82movl 20(%esp), %ecx // ECX:EBX = a83subl %eax, %ebx84sbbl %edx, %ecx // ECX:EBX = a - q*blo85movl 28(%esp), %eax86imull %edi, %eax // q*bhi87subl %eax, %ecx // ECX:EBX = a - q*b8889jnc 1f // if positive, this is the result.90addl 24(%esp), %ebx // otherwise91adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result921: movl %ebx, %eax93movl %ecx, %edx9495addl %esi, %eax // Restore correct sign to result96adcl %esi, %edx97xorl %esi, %eax98xorl %esi, %edx99popl %edi // Restore callee-save registers100popl %ebx101popl %esi102retl // Return1031042: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */105106subl %ebx, %edx // subtract bhi from ahi so that divide will not107divl %ebx // overflow, and find q and r such that108//109// ahi:alo = (1:q)*bhi + r110//111// Note that q is a number in (31-i).(1+i)112// fix point.113114pushl %edi115notl %ecx116shrl %eax117orl $0x80000000, %eax118shrl %cl, %eax // q = (1:qs) >> (1 + i)119movl %eax, %edi120mull 24(%esp) // q*blo121movl 16(%esp), %ebx122movl 20(%esp), %ecx // ECX:EBX = a123subl %eax, %ebx124sbbl %edx, %ecx // ECX:EBX = a - q*blo125movl 28(%esp), %eax126imull %edi, %eax // q*bhi127subl %eax, %ecx // ECX:EBX = a - q*b128129jnc 3f // if positive, this is the result.130addl 24(%esp), %ebx // otherwise131adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result1323: movl %ebx, %eax133movl %ecx, %edx134135addl %esi, %eax // Restore correct sign to result136adcl %esi, %edx137xorl %esi, %eax138xorl %esi, %edx139popl %edi // Restore callee-save registers140popl %ebx141popl %esi142retl // Return1431449: /* High word of b is zero on this branch */145146movl 16(%esp), %eax // Find qhi and rhi such that147movl 20(%esp), %ecx //148xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b149divl %ecx //150movl %eax, %ebx //151movl 12(%esp), %eax // Find rlo such that152divl %ecx //153movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b154popl %ebx //155xorl %edx, %edx // and return 0:rlo156157addl %esi, %eax // Restore correct sign to result158adcl %esi, %edx159xorl %esi, %eax160xorl %esi, %edx161popl %esi162retl // Return163END_COMPILERRT_FUNCTION(__moddi3)164165#endif // __i386__166167NO_EXEC_STACK_DIRECTIVE168169170