Path: blob/master/arch/x86/math-emu/polynom_Xsig.S
10818 views
/*---------------------------------------------------------------------------+1| polynomial_Xsig.S |2| |3| Fixed point arithmetic polynomial evaluation. |4| |5| Copyright (C) 1992,1993,1994,1995 |6| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, |7| Australia. E-mail [email protected] |8| |9| Call from C as: |10| void polynomial_Xsig(Xsig *accum, unsigned long long x, |11| unsigned long long terms[], int n) |12| |13| Computes: |14| terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x |15| and adds the result to the 12 byte Xsig. |16| The terms[] are each 8 bytes, but all computation is performed to 12 byte |17| precision. |18| |19| This function must be used carefully: most overflow of intermediate |20| results is controlled, but overflow of the result is not. |21| |22+---------------------------------------------------------------------------*/23.file "polynomial_Xsig.S"2425#include "fpu_emu.h"262728#define TERM_SIZE $829#define SUM_MS -20(%ebp) /* sum ms long */30#define SUM_MIDDLE -24(%ebp) /* sum middle long */31#define SUM_LS -28(%ebp) /* sum ls long */32#define ACCUM_MS -4(%ebp) /* accum ms long */33#define ACCUM_MIDDLE -8(%ebp) /* accum middle long */34#define ACCUM_LS -12(%ebp) /* accum ls long */35#define OVERFLOWED -16(%ebp) /* addition overflow flag */3637.text38ENTRY(polynomial_Xsig)39pushl %ebp40movl %esp,%ebp41subl $32,%esp42pushl %esi43pushl %edi44pushl %ebx4546movl PARAM2,%esi /* x */47movl PARAM3,%edi /* terms */4849movl TERM_SIZE,%eax50mull PARAM4 /* n */51addl %eax,%edi5253movl 4(%edi),%edx /* terms[n] */54movl %edx,SUM_MS55movl (%edi),%edx /* terms[n] */56movl %edx,SUM_MIDDLE57xor %eax,%eax58movl %eax,SUM_LS59movb %al,OVERFLOWED6061subl TERM_SIZE,%edi62decl PARAM463js L_accum_done6465L_accum_loop:66xor %eax,%eax67movl %eax,ACCUM_MS68movl %eax,ACCUM_MIDDLE6970movl SUM_MIDDLE,%eax71mull (%esi) /* x ls long */72movl %edx,ACCUM_LS7374movl SUM_MIDDLE,%eax75mull 4(%esi) /* x ms long */76addl %eax,ACCUM_LS77adcl %edx,ACCUM_MIDDLE78adcl $0,ACCUM_MS7980movl SUM_MS,%eax81mull (%esi) /* x ls long */82addl %eax,ACCUM_LS83adcl %edx,ACCUM_MIDDLE84adcl $0,ACCUM_MS8586movl SUM_MS,%eax87mull 4(%esi) /* x ms long */88addl %eax,ACCUM_MIDDLE89adcl %edx,ACCUM_MS9091testb $0xff,OVERFLOWED92jz L_no_overflow9394movl (%esi),%eax95addl %eax,ACCUM_MIDDLE96movl 4(%esi),%eax97adcl %eax,ACCUM_MS /* This could overflow too */9899L_no_overflow:100101/*102* Now put the sum of next term and the accumulator103* into the sum register104*/105movl ACCUM_LS,%eax106addl (%edi),%eax /* term ls long */107movl %eax,SUM_LS108movl ACCUM_MIDDLE,%eax109adcl (%edi),%eax /* term ls long */110movl %eax,SUM_MIDDLE111movl ACCUM_MS,%eax112adcl 4(%edi),%eax /* term ms long */113movl %eax,SUM_MS114sbbb %al,%al115movb %al,OVERFLOWED /* Used in the next iteration */116117subl TERM_SIZE,%edi118decl PARAM4119jns L_accum_loop120121L_accum_done:122movl PARAM1,%edi /* accum */123movl SUM_LS,%eax124addl %eax,(%edi)125movl SUM_MIDDLE,%eax126adcl %eax,4(%edi)127movl SUM_MS,%eax128adcl %eax,8(%edi)129130popl %ebx131popl %edi132popl %esi133leave134ret135136137