Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/polynom_Xsig.S
10818 views
1
/*---------------------------------------------------------------------------+
2
| polynomial_Xsig.S |
3
| |
4
| Fixed point arithmetic polynomial evaluation. |
5
| |
6
| Copyright (C) 1992,1993,1994,1995 |
7
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
8
| Australia. E-mail [email protected] |
9
| |
10
| Call from C as: |
11
| void polynomial_Xsig(Xsig *accum, unsigned long long x, |
12
| unsigned long long terms[], int n) |
13
| |
14
| Computes: |
15
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
16
| and adds the result to the 12 byte Xsig. |
17
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
18
| precision. |
19
| |
20
| This function must be used carefully: most overflow of intermediate |
21
| results is controlled, but overflow of the result is not. |
22
| |
23
+---------------------------------------------------------------------------*/
24
.file "polynomial_Xsig.S"
25
26
#include "fpu_emu.h"
27
28
29
#define TERM_SIZE $8
30
#define SUM_MS -20(%ebp) /* sum ms long */
31
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
32
#define SUM_LS -28(%ebp) /* sum ls long */
33
#define ACCUM_MS -4(%ebp) /* accum ms long */
34
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
35
#define ACCUM_LS -12(%ebp) /* accum ls long */
36
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
37
38
.text
39
ENTRY(polynomial_Xsig)
40
pushl %ebp
41
movl %esp,%ebp
42
subl $32,%esp
43
pushl %esi
44
pushl %edi
45
pushl %ebx
46
47
movl PARAM2,%esi /* x */
48
movl PARAM3,%edi /* terms */
49
50
movl TERM_SIZE,%eax
51
mull PARAM4 /* n */
52
addl %eax,%edi
53
54
movl 4(%edi),%edx /* terms[n] */
55
movl %edx,SUM_MS
56
movl (%edi),%edx /* terms[n] */
57
movl %edx,SUM_MIDDLE
58
xor %eax,%eax
59
movl %eax,SUM_LS
60
movb %al,OVERFLOWED
61
62
subl TERM_SIZE,%edi
63
decl PARAM4
64
js L_accum_done
65
66
L_accum_loop:
67
xor %eax,%eax
68
movl %eax,ACCUM_MS
69
movl %eax,ACCUM_MIDDLE
70
71
movl SUM_MIDDLE,%eax
72
mull (%esi) /* x ls long */
73
movl %edx,ACCUM_LS
74
75
movl SUM_MIDDLE,%eax
76
mull 4(%esi) /* x ms long */
77
addl %eax,ACCUM_LS
78
adcl %edx,ACCUM_MIDDLE
79
adcl $0,ACCUM_MS
80
81
movl SUM_MS,%eax
82
mull (%esi) /* x ls long */
83
addl %eax,ACCUM_LS
84
adcl %edx,ACCUM_MIDDLE
85
adcl $0,ACCUM_MS
86
87
movl SUM_MS,%eax
88
mull 4(%esi) /* x ms long */
89
addl %eax,ACCUM_MIDDLE
90
adcl %edx,ACCUM_MS
91
92
testb $0xff,OVERFLOWED
93
jz L_no_overflow
94
95
movl (%esi),%eax
96
addl %eax,ACCUM_MIDDLE
97
movl 4(%esi),%eax
98
adcl %eax,ACCUM_MS /* This could overflow too */
99
100
L_no_overflow:
101
102
/*
103
* Now put the sum of next term and the accumulator
104
* into the sum register
105
*/
106
movl ACCUM_LS,%eax
107
addl (%edi),%eax /* term ls long */
108
movl %eax,SUM_LS
109
movl ACCUM_MIDDLE,%eax
110
adcl (%edi),%eax /* term ls long */
111
movl %eax,SUM_MIDDLE
112
movl ACCUM_MS,%eax
113
adcl 4(%edi),%eax /* term ms long */
114
movl %eax,SUM_MS
115
sbbb %al,%al
116
movb %al,OVERFLOWED /* Used in the next iteration */
117
118
subl TERM_SIZE,%edi
119
decl PARAM4
120
jns L_accum_loop
121
122
L_accum_done:
123
movl PARAM1,%edi /* accum */
124
movl SUM_LS,%eax
125
addl %eax,(%edi)
126
movl SUM_MIDDLE,%eax
127
adcl %eax,4(%edi)
128
movl SUM_MS,%eax
129
adcl %eax,8(%edi)
130
131
popl %ebx
132
popl %edi
133
popl %esi
134
leave
135
ret
136
137