Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/polynom_Xsig.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0 */
2
/*---------------------------------------------------------------------------+
3
| polynomial_Xsig.S |
4
| |
5
| Fixed point arithmetic polynomial evaluation. |
6
| |
7
| Copyright (C) 1992,1993,1994,1995 |
8
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |
9
| Australia. E-mail [email protected] |
10
| |
11
| Call from C as: |
12
| void polynomial_Xsig(Xsig *accum, unsigned long long x, |
13
| unsigned long long terms[], int n) |
14
| |
15
| Computes: |
16
| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |
17
| and adds the result to the 12 byte Xsig. |
18
| The terms[] are each 8 bytes, but all computation is performed to 12 byte |
19
| precision. |
20
| |
21
| This function must be used carefully: most overflow of intermediate |
22
| results is controlled, but overflow of the result is not. |
23
| |
24
+---------------------------------------------------------------------------*/
25
.file "polynomial_Xsig.S"
26
27
#include "fpu_emu.h"
28
29
30
#define TERM_SIZE $8
31
#define SUM_MS -20(%ebp) /* sum ms long */
32
#define SUM_MIDDLE -24(%ebp) /* sum middle long */
33
#define SUM_LS -28(%ebp) /* sum ls long */
34
#define ACCUM_MS -4(%ebp) /* accum ms long */
35
#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */
36
#define ACCUM_LS -12(%ebp) /* accum ls long */
37
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
38
39
.text
40
SYM_FUNC_START(polynomial_Xsig)
41
pushl %ebp
42
movl %esp,%ebp
43
subl $32,%esp
44
pushl %esi
45
pushl %edi
46
pushl %ebx
47
48
movl PARAM2,%esi /* x */
49
movl PARAM3,%edi /* terms */
50
51
movl TERM_SIZE,%eax
52
mull PARAM4 /* n */
53
addl %eax,%edi
54
55
movl 4(%edi),%edx /* terms[n] */
56
movl %edx,SUM_MS
57
movl (%edi),%edx /* terms[n] */
58
movl %edx,SUM_MIDDLE
59
xor %eax,%eax
60
movl %eax,SUM_LS
61
movb %al,OVERFLOWED
62
63
subl TERM_SIZE,%edi
64
decl PARAM4
65
js L_accum_done
66
67
L_accum_loop:
68
xor %eax,%eax
69
movl %eax,ACCUM_MS
70
movl %eax,ACCUM_MIDDLE
71
72
movl SUM_MIDDLE,%eax
73
mull (%esi) /* x ls long */
74
movl %edx,ACCUM_LS
75
76
movl SUM_MIDDLE,%eax
77
mull 4(%esi) /* x ms long */
78
addl %eax,ACCUM_LS
79
adcl %edx,ACCUM_MIDDLE
80
adcl $0,ACCUM_MS
81
82
movl SUM_MS,%eax
83
mull (%esi) /* x ls long */
84
addl %eax,ACCUM_LS
85
adcl %edx,ACCUM_MIDDLE
86
adcl $0,ACCUM_MS
87
88
movl SUM_MS,%eax
89
mull 4(%esi) /* x ms long */
90
addl %eax,ACCUM_MIDDLE
91
adcl %edx,ACCUM_MS
92
93
testb $0xff,OVERFLOWED
94
jz L_no_overflow
95
96
movl (%esi),%eax
97
addl %eax,ACCUM_MIDDLE
98
movl 4(%esi),%eax
99
adcl %eax,ACCUM_MS /* This could overflow too */
100
101
L_no_overflow:
102
103
/*
104
* Now put the sum of next term and the accumulator
105
* into the sum register
106
*/
107
movl ACCUM_LS,%eax
108
addl (%edi),%eax /* term ls long */
109
movl %eax,SUM_LS
110
movl ACCUM_MIDDLE,%eax
111
adcl (%edi),%eax /* term ls long */
112
movl %eax,SUM_MIDDLE
113
movl ACCUM_MS,%eax
114
adcl 4(%edi),%eax /* term ms long */
115
movl %eax,SUM_MS
116
sbbb %al,%al
117
movb %al,OVERFLOWED /* Used in the next iteration */
118
119
subl TERM_SIZE,%edi
120
decl PARAM4
121
jns L_accum_loop
122
123
L_accum_done:
124
movl PARAM1,%edi /* accum */
125
movl SUM_LS,%eax
126
addl %eax,(%edi)
127
movl SUM_MIDDLE,%eax
128
adcl %eax,4(%edi)
129
movl SUM_MS,%eax
130
adcl %eax,8(%edi)
131
132
popl %ebx
133
popl %edi
134
popl %esi
135
leave
136
RET
137
SYM_FUNC_END(polynomial_Xsig)
138
139