/* SPDX-License-Identifier: GPL-2.0 */1/*---------------------------------------------------------------------------+2| polynomial_Xsig.S |3| |4| Fixed point arithmetic polynomial evaluation. |5| |6| Copyright (C) 1992,1993,1994,1995 |7| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |8| Australia. E-mail [email protected] |9| |10| Call from C as: |11| void polynomial_Xsig(Xsig *accum, unsigned long long x, |12| unsigned long long terms[], int n) |13| |14| Computes: |15| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |16| and adds the result to the 12 byte Xsig. |17| The terms[] are each 8 bytes, but all computation is performed to 12 byte |18| precision. |19| |20| This function must be used carefully: most overflow of intermediate |21| results is controlled, but overflow of the result is not. |22| |23+---------------------------------------------------------------------------*/24.file "polynomial_Xsig.S"2526#include "fpu_emu.h"272829#define TERM_SIZE $830#define SUM_MS -20(%ebp) /* sum ms long */31#define SUM_MIDDLE -24(%ebp) /* sum middle long */32#define SUM_LS -28(%ebp) /* sum ls long */33#define ACCUM_MS -4(%ebp) /* accum ms long */34#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */35#define ACCUM_LS -12(%ebp) /* accum ls long */36#define OVERFLOWED -16(%ebp) /* addition overflow flag */3738.text39SYM_FUNC_START(polynomial_Xsig)40pushl %ebp41movl %esp,%ebp42subl $32,%esp43pushl %esi44pushl %edi45pushl %ebx4647movl PARAM2,%esi /* x */48movl PARAM3,%edi /* terms */4950movl TERM_SIZE,%eax51mull PARAM4 /* n */52addl %eax,%edi5354movl 4(%edi),%edx /* terms[n] */55movl %edx,SUM_MS56movl (%edi),%edx /* terms[n] */57movl %edx,SUM_MIDDLE58xor %eax,%eax59movl %eax,SUM_LS60movb %al,OVERFLOWED6162subl TERM_SIZE,%edi63decl PARAM464js L_accum_done6566L_accum_loop:67xor %eax,%eax68movl %eax,ACCUM_MS69movl %eax,ACCUM_MIDDLE7071movl SUM_MIDDLE,%eax72mull (%esi) /* x ls long */73movl %edx,ACCUM_LS7475movl SUM_MIDDLE,%eax76mull 4(%esi) /* x ms long */77addl %eax,ACCUM_LS78adcl %edx,ACCUM_MIDDLE79adcl $0,ACCUM_MS8081movl SUM_MS,%eax82mull (%esi) /* x ls long */83addl %eax,ACCUM_LS84adcl %edx,ACCUM_MIDDLE85adcl $0,ACCUM_MS8687movl SUM_MS,%eax88mull 4(%esi) /* x ms long */89addl %eax,ACCUM_MIDDLE90adcl %edx,ACCUM_MS9192testb $0xff,OVERFLOWED93jz L_no_overflow9495movl (%esi),%eax96addl %eax,ACCUM_MIDDLE97movl 4(%esi),%eax98adcl %eax,ACCUM_MS /* This could overflow too */99100L_no_overflow:101102/*103* Now put the sum of next term and the accumulator104* into the sum register105*/106movl ACCUM_LS,%eax107addl (%edi),%eax /* term ls long */108movl %eax,SUM_LS109movl ACCUM_MIDDLE,%eax110adcl (%edi),%eax /* term ls long */111movl %eax,SUM_MIDDLE112movl ACCUM_MS,%eax113adcl 4(%edi),%eax /* term ms long */114movl %eax,SUM_MS115sbbb %al,%al116movb %al,OVERFLOWED /* Used in the next iteration */117118subl TERM_SIZE,%edi119decl PARAM4120jns L_accum_loop121122L_accum_done:123movl PARAM1,%edi /* accum */124movl SUM_LS,%eax125addl %eax,(%edi)126movl SUM_MIDDLE,%eax127adcl %eax,4(%edi)128movl SUM_MS,%eax129adcl %eax,8(%edi)130131popl %ebx132popl %edi133popl %esi134leave135RET136SYM_FUNC_END(polynomial_Xsig)137138139