Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m68k/ifpsp060/src/fplsp.S
10820 views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6
7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
#
30
# lfptop.s:
31
# This file is appended to the top of the 060ILSP package
32
# and contains the entry points into the package. The user, in
33
# effect, branches to one of the branch table entries located here.
34
#
35
36
bra.l _facoss_
37
short 0x0000
38
bra.l _facosd_
39
short 0x0000
40
bra.l _facosx_
41
short 0x0000
42
43
bra.l _fasins_
44
short 0x0000
45
bra.l _fasind_
46
short 0x0000
47
bra.l _fasinx_
48
short 0x0000
49
50
bra.l _fatans_
51
short 0x0000
52
bra.l _fatand_
53
short 0x0000
54
bra.l _fatanx_
55
short 0x0000
56
57
bra.l _fatanhs_
58
short 0x0000
59
bra.l _fatanhd_
60
short 0x0000
61
bra.l _fatanhx_
62
short 0x0000
63
64
bra.l _fcoss_
65
short 0x0000
66
bra.l _fcosd_
67
short 0x0000
68
bra.l _fcosx_
69
short 0x0000
70
71
bra.l _fcoshs_
72
short 0x0000
73
bra.l _fcoshd_
74
short 0x0000
75
bra.l _fcoshx_
76
short 0x0000
77
78
bra.l _fetoxs_
79
short 0x0000
80
bra.l _fetoxd_
81
short 0x0000
82
bra.l _fetoxx_
83
short 0x0000
84
85
bra.l _fetoxm1s_
86
short 0x0000
87
bra.l _fetoxm1d_
88
short 0x0000
89
bra.l _fetoxm1x_
90
short 0x0000
91
92
bra.l _fgetexps_
93
short 0x0000
94
bra.l _fgetexpd_
95
short 0x0000
96
bra.l _fgetexpx_
97
short 0x0000
98
99
bra.l _fgetmans_
100
short 0x0000
101
bra.l _fgetmand_
102
short 0x0000
103
bra.l _fgetmanx_
104
short 0x0000
105
106
bra.l _flog10s_
107
short 0x0000
108
bra.l _flog10d_
109
short 0x0000
110
bra.l _flog10x_
111
short 0x0000
112
113
bra.l _flog2s_
114
short 0x0000
115
bra.l _flog2d_
116
short 0x0000
117
bra.l _flog2x_
118
short 0x0000
119
120
bra.l _flogns_
121
short 0x0000
122
bra.l _flognd_
123
short 0x0000
124
bra.l _flognx_
125
short 0x0000
126
127
bra.l _flognp1s_
128
short 0x0000
129
bra.l _flognp1d_
130
short 0x0000
131
bra.l _flognp1x_
132
short 0x0000
133
134
bra.l _fmods_
135
short 0x0000
136
bra.l _fmodd_
137
short 0x0000
138
bra.l _fmodx_
139
short 0x0000
140
141
bra.l _frems_
142
short 0x0000
143
bra.l _fremd_
144
short 0x0000
145
bra.l _fremx_
146
short 0x0000
147
148
bra.l _fscales_
149
short 0x0000
150
bra.l _fscaled_
151
short 0x0000
152
bra.l _fscalex_
153
short 0x0000
154
155
bra.l _fsins_
156
short 0x0000
157
bra.l _fsind_
158
short 0x0000
159
bra.l _fsinx_
160
short 0x0000
161
162
bra.l _fsincoss_
163
short 0x0000
164
bra.l _fsincosd_
165
short 0x0000
166
bra.l _fsincosx_
167
short 0x0000
168
169
bra.l _fsinhs_
170
short 0x0000
171
bra.l _fsinhd_
172
short 0x0000
173
bra.l _fsinhx_
174
short 0x0000
175
176
bra.l _ftans_
177
short 0x0000
178
bra.l _ftand_
179
short 0x0000
180
bra.l _ftanx_
181
short 0x0000
182
183
bra.l _ftanhs_
184
short 0x0000
185
bra.l _ftanhd_
186
short 0x0000
187
bra.l _ftanhx_
188
short 0x0000
189
190
bra.l _ftentoxs_
191
short 0x0000
192
bra.l _ftentoxd_
193
short 0x0000
194
bra.l _ftentoxx_
195
short 0x0000
196
197
bra.l _ftwotoxs_
198
short 0x0000
199
bra.l _ftwotoxd_
200
short 0x0000
201
bra.l _ftwotoxx_
202
short 0x0000
203
204
bra.l _fabss_
205
short 0x0000
206
bra.l _fabsd_
207
short 0x0000
208
bra.l _fabsx_
209
short 0x0000
210
211
bra.l _fadds_
212
short 0x0000
213
bra.l _faddd_
214
short 0x0000
215
bra.l _faddx_
216
short 0x0000
217
218
bra.l _fdivs_
219
short 0x0000
220
bra.l _fdivd_
221
short 0x0000
222
bra.l _fdivx_
223
short 0x0000
224
225
bra.l _fints_
226
short 0x0000
227
bra.l _fintd_
228
short 0x0000
229
bra.l _fintx_
230
short 0x0000
231
232
bra.l _fintrzs_
233
short 0x0000
234
bra.l _fintrzd_
235
short 0x0000
236
bra.l _fintrzx_
237
short 0x0000
238
239
bra.l _fmuls_
240
short 0x0000
241
bra.l _fmuld_
242
short 0x0000
243
bra.l _fmulx_
244
short 0x0000
245
246
bra.l _fnegs_
247
short 0x0000
248
bra.l _fnegd_
249
short 0x0000
250
bra.l _fnegx_
251
short 0x0000
252
253
bra.l _fsqrts_
254
short 0x0000
255
bra.l _fsqrtd_
256
short 0x0000
257
bra.l _fsqrtx_
258
short 0x0000
259
260
bra.l _fsubs_
261
short 0x0000
262
bra.l _fsubd_
263
short 0x0000
264
bra.l _fsubx_
265
short 0x0000
266
267
# leave room for future possible additions
268
align 0x400
269
270
#
271
# This file contains a set of define statements for constants
272
# in order to promote readability within the corecode itself.
273
#
274
275
set LOCAL_SIZE, 192 # stack frame size(bytes)
276
set LV, -LOCAL_SIZE # stack offset
277
278
set EXC_SR, 0x4 # stack status register
279
set EXC_PC, 0x6 # stack pc
280
set EXC_VOFF, 0xa # stacked vector offset
281
set EXC_EA, 0xc # stacked <ea>
282
283
set EXC_FP, 0x0 # frame pointer
284
285
set EXC_AREGS, -68 # offset of all address regs
286
set EXC_DREGS, -100 # offset of all data regs
287
set EXC_FPREGS, -36 # offset of all fp regs
288
289
set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
290
set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
291
set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
292
set EXC_A5, EXC_AREGS+(5*4)
293
set EXC_A4, EXC_AREGS+(4*4)
294
set EXC_A3, EXC_AREGS+(3*4)
295
set EXC_A2, EXC_AREGS+(2*4)
296
set EXC_A1, EXC_AREGS+(1*4)
297
set EXC_A0, EXC_AREGS+(0*4)
298
set EXC_D7, EXC_DREGS+(7*4)
299
set EXC_D6, EXC_DREGS+(6*4)
300
set EXC_D5, EXC_DREGS+(5*4)
301
set EXC_D4, EXC_DREGS+(4*4)
302
set EXC_D3, EXC_DREGS+(3*4)
303
set EXC_D2, EXC_DREGS+(2*4)
304
set EXC_D1, EXC_DREGS+(1*4)
305
set EXC_D0, EXC_DREGS+(0*4)
306
307
set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
308
set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
309
set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
310
311
set FP_SCR1, LV+80 # fp scratch 1
312
set FP_SCR1_EX, FP_SCR1+0
313
set FP_SCR1_SGN, FP_SCR1+2
314
set FP_SCR1_HI, FP_SCR1+4
315
set FP_SCR1_LO, FP_SCR1+8
316
317
set FP_SCR0, LV+68 # fp scratch 0
318
set FP_SCR0_EX, FP_SCR0+0
319
set FP_SCR0_SGN, FP_SCR0+2
320
set FP_SCR0_HI, FP_SCR0+4
321
set FP_SCR0_LO, FP_SCR0+8
322
323
set FP_DST, LV+56 # fp destination operand
324
set FP_DST_EX, FP_DST+0
325
set FP_DST_SGN, FP_DST+2
326
set FP_DST_HI, FP_DST+4
327
set FP_DST_LO, FP_DST+8
328
329
set FP_SRC, LV+44 # fp source operand
330
set FP_SRC_EX, FP_SRC+0
331
set FP_SRC_SGN, FP_SRC+2
332
set FP_SRC_HI, FP_SRC+4
333
set FP_SRC_LO, FP_SRC+8
334
335
set USER_FPIAR, LV+40 # FP instr address register
336
337
set USER_FPSR, LV+36 # FP status register
338
set FPSR_CC, USER_FPSR+0 # FPSR condition codes
339
set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
340
set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
341
set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
342
343
set USER_FPCR, LV+32 # FP control register
344
set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
345
set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
346
347
set L_SCR3, LV+28 # integer scratch 3
348
set L_SCR2, LV+24 # integer scratch 2
349
set L_SCR1, LV+20 # integer scratch 1
350
351
set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
352
353
set EXC_TEMP2, LV+24 # temporary space
354
set EXC_TEMP, LV+16 # temporary space
355
356
set DTAG, LV+15 # destination operand type
357
set STAG, LV+14 # source operand type
358
359
set SPCOND_FLG, LV+10 # flag: special case (see below)
360
361
set EXC_CC, LV+8 # saved condition codes
362
set EXC_EXTWPTR, LV+4 # saved current PC (active)
363
set EXC_EXTWORD, LV+2 # saved extension word
364
set EXC_CMDREG, LV+2 # saved extension word
365
set EXC_OPWORD, LV+0 # saved operation word
366
367
################################
368
369
# Helpful macros
370
371
set FTEMP, 0 # offsets within an
372
set FTEMP_EX, 0 # extended precision
373
set FTEMP_SGN, 2 # value saved in memory.
374
set FTEMP_HI, 4
375
set FTEMP_LO, 8
376
set FTEMP_GRS, 12
377
378
set LOCAL, 0 # offsets within an
379
set LOCAL_EX, 0 # extended precision
380
set LOCAL_SGN, 2 # value saved in memory.
381
set LOCAL_HI, 4
382
set LOCAL_LO, 8
383
set LOCAL_GRS, 12
384
385
set DST, 0 # offsets within an
386
set DST_EX, 0 # extended precision
387
set DST_HI, 4 # value saved in memory.
388
set DST_LO, 8
389
390
set SRC, 0 # offsets within an
391
set SRC_EX, 0 # extended precision
392
set SRC_HI, 4 # value saved in memory.
393
set SRC_LO, 8
394
395
set SGL_LO, 0x3f81 # min sgl prec exponent
396
set SGL_HI, 0x407e # max sgl prec exponent
397
set DBL_LO, 0x3c01 # min dbl prec exponent
398
set DBL_HI, 0x43fe # max dbl prec exponent
399
set EXT_LO, 0x0 # min ext prec exponent
400
set EXT_HI, 0x7ffe # max ext prec exponent
401
402
set EXT_BIAS, 0x3fff # extended precision bias
403
set SGL_BIAS, 0x007f # single precision bias
404
set DBL_BIAS, 0x03ff # double precision bias
405
406
set NORM, 0x00 # operand type for STAG/DTAG
407
set ZERO, 0x01 # operand type for STAG/DTAG
408
set INF, 0x02 # operand type for STAG/DTAG
409
set QNAN, 0x03 # operand type for STAG/DTAG
410
set DENORM, 0x04 # operand type for STAG/DTAG
411
set SNAN, 0x05 # operand type for STAG/DTAG
412
set UNNORM, 0x06 # operand type for STAG/DTAG
413
414
##################
415
# FPSR/FPCR bits #
416
##################
417
set neg_bit, 0x3 # negative result
418
set z_bit, 0x2 # zero result
419
set inf_bit, 0x1 # infinite result
420
set nan_bit, 0x0 # NAN result
421
422
set q_sn_bit, 0x7 # sign bit of quotient byte
423
424
set bsun_bit, 7 # branch on unordered
425
set snan_bit, 6 # signalling NAN
426
set operr_bit, 5 # operand error
427
set ovfl_bit, 4 # overflow
428
set unfl_bit, 3 # underflow
429
set dz_bit, 2 # divide by zero
430
set inex2_bit, 1 # inexact result 2
431
set inex1_bit, 0 # inexact result 1
432
433
set aiop_bit, 7 # accrued inexact operation bit
434
set aovfl_bit, 6 # accrued overflow bit
435
set aunfl_bit, 5 # accrued underflow bit
436
set adz_bit, 4 # accrued dz bit
437
set ainex_bit, 3 # accrued inexact bit
438
439
#############################
440
# FPSR individual bit masks #
441
#############################
442
set neg_mask, 0x08000000 # negative bit mask (lw)
443
set inf_mask, 0x02000000 # infinity bit mask (lw)
444
set z_mask, 0x04000000 # zero bit mask (lw)
445
set nan_mask, 0x01000000 # nan bit mask (lw)
446
447
set neg_bmask, 0x08 # negative bit mask (byte)
448
set inf_bmask, 0x02 # infinity bit mask (byte)
449
set z_bmask, 0x04 # zero bit mask (byte)
450
set nan_bmask, 0x01 # nan bit mask (byte)
451
452
set bsun_mask, 0x00008000 # bsun exception mask
453
set snan_mask, 0x00004000 # snan exception mask
454
set operr_mask, 0x00002000 # operr exception mask
455
set ovfl_mask, 0x00001000 # overflow exception mask
456
set unfl_mask, 0x00000800 # underflow exception mask
457
set dz_mask, 0x00000400 # dz exception mask
458
set inex2_mask, 0x00000200 # inex2 exception mask
459
set inex1_mask, 0x00000100 # inex1 exception mask
460
461
set aiop_mask, 0x00000080 # accrued illegal operation
462
set aovfl_mask, 0x00000040 # accrued overflow
463
set aunfl_mask, 0x00000020 # accrued underflow
464
set adz_mask, 0x00000010 # accrued divide by zero
465
set ainex_mask, 0x00000008 # accrued inexact
466
467
######################################
468
# FPSR combinations used in the FPSP #
469
######################################
470
set dzinf_mask, inf_mask+dz_mask+adz_mask
471
set opnan_mask, nan_mask+operr_mask+aiop_mask
472
set nzi_mask, 0x01ffffff #clears N, Z, and I
473
set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
474
set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
475
set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
476
set inx1a_mask, inex1_mask+ainex_mask
477
set inx2a_mask, inex2_mask+ainex_mask
478
set snaniop_mask, nan_mask+snan_mask+aiop_mask
479
set snaniop2_mask, snan_mask+aiop_mask
480
set naniop_mask, nan_mask+aiop_mask
481
set neginf_mask, neg_mask+inf_mask
482
set infaiop_mask, inf_mask+aiop_mask
483
set negz_mask, neg_mask+z_mask
484
set opaop_mask, operr_mask+aiop_mask
485
set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
486
set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
487
488
#########
489
# misc. #
490
#########
491
set rnd_stky_bit, 29 # stky bit pos in longword
492
493
set sign_bit, 0x7 # sign bit
494
set signan_bit, 0x6 # signalling nan bit
495
496
set sgl_thresh, 0x3f81 # minimum sgl exponent
497
set dbl_thresh, 0x3c01 # minimum dbl exponent
498
499
set x_mode, 0x0 # extended precision
500
set s_mode, 0x4 # single precision
501
set d_mode, 0x8 # double precision
502
503
set rn_mode, 0x0 # round-to-nearest
504
set rz_mode, 0x1 # round-to-zero
505
set rm_mode, 0x2 # round-tp-minus-infinity
506
set rp_mode, 0x3 # round-to-plus-infinity
507
508
set mantissalen, 64 # length of mantissa in bits
509
510
set BYTE, 1 # len(byte) == 1 byte
511
set WORD, 2 # len(word) == 2 bytes
512
set LONG, 4 # len(longword) == 2 bytes
513
514
set BSUN_VEC, 0xc0 # bsun vector offset
515
set INEX_VEC, 0xc4 # inexact vector offset
516
set DZ_VEC, 0xc8 # dz vector offset
517
set UNFL_VEC, 0xcc # unfl vector offset
518
set OPERR_VEC, 0xd0 # operr vector offset
519
set OVFL_VEC, 0xd4 # ovfl vector offset
520
set SNAN_VEC, 0xd8 # snan vector offset
521
522
###########################
523
# SPecial CONDition FLaGs #
524
###########################
525
set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
526
set fbsun_flg, 0x02 # flag bit: bsun exception
527
set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
528
set mda7_flg, 0x08 # flag bit: -(a7) <ea>
529
set fmovm_flg, 0x40 # flag bit: fmovm instruction
530
set immed_flg, 0x80 # flag bit: &<data> <ea>
531
532
set ftrapcc_bit, 0x0
533
set fbsun_bit, 0x1
534
set mia7_bit, 0x2
535
set mda7_bit, 0x3
536
set immed_bit, 0x7
537
538
##################################
539
# TRANSCENDENTAL "LAST-OP" FLAGS #
540
##################################
541
set FMUL_OP, 0x0 # fmul instr performed last
542
set FDIV_OP, 0x1 # fdiv performed last
543
set FADD_OP, 0x2 # fadd performed last
544
set FMOV_OP, 0x3 # fmov performed last
545
546
#############
547
# CONSTANTS #
548
#############
549
T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
550
T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
551
552
PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
553
PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
554
555
TWOBYPI:
556
long 0x3FE45F30,0x6DC9C883
557
558
#########################################################################
559
# MONADIC TEMPLATE #
560
#########################################################################
561
global _fsins_
562
_fsins_:
563
link %a6,&-LOCAL_SIZE
564
565
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
566
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
567
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
568
569
fmov.l &0x0,%fpcr # zero FPCR
570
571
#
572
# copy, convert, and tag input argument
573
#
574
fmov.s 0x8(%a6),%fp0 # load sgl input
575
fmov.x %fp0,FP_SRC(%a6)
576
lea FP_SRC(%a6),%a0
577
bsr.l tag # fetch operand type
578
mov.b %d0,STAG(%a6)
579
mov.b %d0,%d1
580
581
andi.l &0x00ff00ff,USER_FPSR(%a6)
582
583
clr.l %d0
584
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
585
586
tst.b %d1
587
bne.b _L0_2s
588
bsr.l ssin # operand is a NORM
589
bra.b _L0_6s
590
_L0_2s:
591
cmpi.b %d1,&ZERO # is operand a ZERO?
592
bne.b _L0_3s # no
593
bsr.l src_zero # yes
594
bra.b _L0_6s
595
_L0_3s:
596
cmpi.b %d1,&INF # is operand an INF?
597
bne.b _L0_4s # no
598
bsr.l t_operr # yes
599
bra.b _L0_6s
600
_L0_4s:
601
cmpi.b %d1,&QNAN # is operand a QNAN?
602
bne.b _L0_5s # no
603
bsr.l src_qnan # yes
604
bra.b _L0_6s
605
_L0_5s:
606
bsr.l ssind # operand is a DENORM
607
_L0_6s:
608
609
#
610
# Result is now in FP0
611
#
612
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
613
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
614
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
615
unlk %a6
616
rts
617
618
global _fsind_
619
_fsind_:
620
link %a6,&-LOCAL_SIZE
621
622
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
623
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
624
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
625
626
fmov.l &0x0,%fpcr # zero FPCR
627
628
#
629
# copy, convert, and tag input argument
630
#
631
fmov.d 0x8(%a6),%fp0 # load dbl input
632
fmov.x %fp0,FP_SRC(%a6)
633
lea FP_SRC(%a6),%a0
634
bsr.l tag # fetch operand type
635
mov.b %d0,STAG(%a6)
636
mov.b %d0,%d1
637
638
andi.l &0x00ff00ff,USER_FPSR(%a6)
639
640
clr.l %d0
641
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
642
643
mov.b %d1,STAG(%a6)
644
tst.b %d1
645
bne.b _L0_2d
646
bsr.l ssin # operand is a NORM
647
bra.b _L0_6d
648
_L0_2d:
649
cmpi.b %d1,&ZERO # is operand a ZERO?
650
bne.b _L0_3d # no
651
bsr.l src_zero # yes
652
bra.b _L0_6d
653
_L0_3d:
654
cmpi.b %d1,&INF # is operand an INF?
655
bne.b _L0_4d # no
656
bsr.l t_operr # yes
657
bra.b _L0_6d
658
_L0_4d:
659
cmpi.b %d1,&QNAN # is operand a QNAN?
660
bne.b _L0_5d # no
661
bsr.l src_qnan # yes
662
bra.b _L0_6d
663
_L0_5d:
664
bsr.l ssind # operand is a DENORM
665
_L0_6d:
666
667
#
668
# Result is now in FP0
669
#
670
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
671
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
672
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
673
unlk %a6
674
rts
675
676
global _fsinx_
677
_fsinx_:
678
link %a6,&-LOCAL_SIZE
679
680
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
681
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
682
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
683
684
fmov.l &0x0,%fpcr # zero FPCR
685
686
#
687
# copy, convert, and tag input argument
688
#
689
lea FP_SRC(%a6),%a0
690
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
691
mov.l 0x8+0x4(%a6),0x4(%a0)
692
mov.l 0x8+0x8(%a6),0x8(%a0)
693
bsr.l tag # fetch operand type
694
mov.b %d0,STAG(%a6)
695
mov.b %d0,%d1
696
697
andi.l &0x00ff00ff,USER_FPSR(%a6)
698
699
clr.l %d0
700
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
701
702
tst.b %d1
703
bne.b _L0_2x
704
bsr.l ssin # operand is a NORM
705
bra.b _L0_6x
706
_L0_2x:
707
cmpi.b %d1,&ZERO # is operand a ZERO?
708
bne.b _L0_3x # no
709
bsr.l src_zero # yes
710
bra.b _L0_6x
711
_L0_3x:
712
cmpi.b %d1,&INF # is operand an INF?
713
bne.b _L0_4x # no
714
bsr.l t_operr # yes
715
bra.b _L0_6x
716
_L0_4x:
717
cmpi.b %d1,&QNAN # is operand a QNAN?
718
bne.b _L0_5x # no
719
bsr.l src_qnan # yes
720
bra.b _L0_6x
721
_L0_5x:
722
bsr.l ssind # operand is a DENORM
723
_L0_6x:
724
725
#
726
# Result is now in FP0
727
#
728
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
729
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
730
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
731
unlk %a6
732
rts
733
734
735
#########################################################################
736
# MONADIC TEMPLATE #
737
#########################################################################
738
global _fcoss_
739
_fcoss_:
740
link %a6,&-LOCAL_SIZE
741
742
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
743
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
744
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
745
746
fmov.l &0x0,%fpcr # zero FPCR
747
748
#
749
# copy, convert, and tag input argument
750
#
751
fmov.s 0x8(%a6),%fp0 # load sgl input
752
fmov.x %fp0,FP_SRC(%a6)
753
lea FP_SRC(%a6),%a0
754
bsr.l tag # fetch operand type
755
mov.b %d0,STAG(%a6)
756
mov.b %d0,%d1
757
758
andi.l &0x00ff00ff,USER_FPSR(%a6)
759
760
clr.l %d0
761
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
762
763
tst.b %d1
764
bne.b _L1_2s
765
bsr.l scos # operand is a NORM
766
bra.b _L1_6s
767
_L1_2s:
768
cmpi.b %d1,&ZERO # is operand a ZERO?
769
bne.b _L1_3s # no
770
bsr.l ld_pone # yes
771
bra.b _L1_6s
772
_L1_3s:
773
cmpi.b %d1,&INF # is operand an INF?
774
bne.b _L1_4s # no
775
bsr.l t_operr # yes
776
bra.b _L1_6s
777
_L1_4s:
778
cmpi.b %d1,&QNAN # is operand a QNAN?
779
bne.b _L1_5s # no
780
bsr.l src_qnan # yes
781
bra.b _L1_6s
782
_L1_5s:
783
bsr.l scosd # operand is a DENORM
784
_L1_6s:
785
786
#
787
# Result is now in FP0
788
#
789
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
790
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
791
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
792
unlk %a6
793
rts
794
795
global _fcosd_
796
_fcosd_:
797
link %a6,&-LOCAL_SIZE
798
799
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
800
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
801
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
802
803
fmov.l &0x0,%fpcr # zero FPCR
804
805
#
806
# copy, convert, and tag input argument
807
#
808
fmov.d 0x8(%a6),%fp0 # load dbl input
809
fmov.x %fp0,FP_SRC(%a6)
810
lea FP_SRC(%a6),%a0
811
bsr.l tag # fetch operand type
812
mov.b %d0,STAG(%a6)
813
mov.b %d0,%d1
814
815
andi.l &0x00ff00ff,USER_FPSR(%a6)
816
817
clr.l %d0
818
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
819
820
mov.b %d1,STAG(%a6)
821
tst.b %d1
822
bne.b _L1_2d
823
bsr.l scos # operand is a NORM
824
bra.b _L1_6d
825
_L1_2d:
826
cmpi.b %d1,&ZERO # is operand a ZERO?
827
bne.b _L1_3d # no
828
bsr.l ld_pone # yes
829
bra.b _L1_6d
830
_L1_3d:
831
cmpi.b %d1,&INF # is operand an INF?
832
bne.b _L1_4d # no
833
bsr.l t_operr # yes
834
bra.b _L1_6d
835
_L1_4d:
836
cmpi.b %d1,&QNAN # is operand a QNAN?
837
bne.b _L1_5d # no
838
bsr.l src_qnan # yes
839
bra.b _L1_6d
840
_L1_5d:
841
bsr.l scosd # operand is a DENORM
842
_L1_6d:
843
844
#
845
# Result is now in FP0
846
#
847
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
848
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
849
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
850
unlk %a6
851
rts
852
853
global _fcosx_
854
_fcosx_:
855
link %a6,&-LOCAL_SIZE
856
857
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
858
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
859
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
860
861
fmov.l &0x0,%fpcr # zero FPCR
862
863
#
864
# copy, convert, and tag input argument
865
#
866
lea FP_SRC(%a6),%a0
867
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
868
mov.l 0x8+0x4(%a6),0x4(%a0)
869
mov.l 0x8+0x8(%a6),0x8(%a0)
870
bsr.l tag # fetch operand type
871
mov.b %d0,STAG(%a6)
872
mov.b %d0,%d1
873
874
andi.l &0x00ff00ff,USER_FPSR(%a6)
875
876
clr.l %d0
877
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
878
879
tst.b %d1
880
bne.b _L1_2x
881
bsr.l scos # operand is a NORM
882
bra.b _L1_6x
883
_L1_2x:
884
cmpi.b %d1,&ZERO # is operand a ZERO?
885
bne.b _L1_3x # no
886
bsr.l ld_pone # yes
887
bra.b _L1_6x
888
_L1_3x:
889
cmpi.b %d1,&INF # is operand an INF?
890
bne.b _L1_4x # no
891
bsr.l t_operr # yes
892
bra.b _L1_6x
893
_L1_4x:
894
cmpi.b %d1,&QNAN # is operand a QNAN?
895
bne.b _L1_5x # no
896
bsr.l src_qnan # yes
897
bra.b _L1_6x
898
_L1_5x:
899
bsr.l scosd # operand is a DENORM
900
_L1_6x:
901
902
#
903
# Result is now in FP0
904
#
905
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
906
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
907
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
908
unlk %a6
909
rts
910
911
912
#########################################################################
913
# MONADIC TEMPLATE #
914
#########################################################################
915
global _fsinhs_
916
_fsinhs_:
917
link %a6,&-LOCAL_SIZE
918
919
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
920
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
921
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
922
923
fmov.l &0x0,%fpcr # zero FPCR
924
925
#
926
# copy, convert, and tag input argument
927
#
928
fmov.s 0x8(%a6),%fp0 # load sgl input
929
fmov.x %fp0,FP_SRC(%a6)
930
lea FP_SRC(%a6),%a0
931
bsr.l tag # fetch operand type
932
mov.b %d0,STAG(%a6)
933
mov.b %d0,%d1
934
935
andi.l &0x00ff00ff,USER_FPSR(%a6)
936
937
clr.l %d0
938
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
939
940
tst.b %d1
941
bne.b _L2_2s
942
bsr.l ssinh # operand is a NORM
943
bra.b _L2_6s
944
_L2_2s:
945
cmpi.b %d1,&ZERO # is operand a ZERO?
946
bne.b _L2_3s # no
947
bsr.l src_zero # yes
948
bra.b _L2_6s
949
_L2_3s:
950
cmpi.b %d1,&INF # is operand an INF?
951
bne.b _L2_4s # no
952
bsr.l src_inf # yes
953
bra.b _L2_6s
954
_L2_4s:
955
cmpi.b %d1,&QNAN # is operand a QNAN?
956
bne.b _L2_5s # no
957
bsr.l src_qnan # yes
958
bra.b _L2_6s
959
_L2_5s:
960
bsr.l ssinhd # operand is a DENORM
961
_L2_6s:
962
963
#
964
# Result is now in FP0
965
#
966
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
967
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
968
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
969
unlk %a6
970
rts
971
972
global _fsinhd_
973
_fsinhd_:
974
link %a6,&-LOCAL_SIZE
975
976
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
977
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
978
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
979
980
fmov.l &0x0,%fpcr # zero FPCR
981
982
#
983
# copy, convert, and tag input argument
984
#
985
fmov.d 0x8(%a6),%fp0 # load dbl input
986
fmov.x %fp0,FP_SRC(%a6)
987
lea FP_SRC(%a6),%a0
988
bsr.l tag # fetch operand type
989
mov.b %d0,STAG(%a6)
990
mov.b %d0,%d1
991
992
andi.l &0x00ff00ff,USER_FPSR(%a6)
993
994
clr.l %d0
995
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
996
997
mov.b %d1,STAG(%a6)
998
tst.b %d1
999
bne.b _L2_2d
1000
bsr.l ssinh # operand is a NORM
1001
bra.b _L2_6d
1002
_L2_2d:
1003
cmpi.b %d1,&ZERO # is operand a ZERO?
1004
bne.b _L2_3d # no
1005
bsr.l src_zero # yes
1006
bra.b _L2_6d
1007
_L2_3d:
1008
cmpi.b %d1,&INF # is operand an INF?
1009
bne.b _L2_4d # no
1010
bsr.l src_inf # yes
1011
bra.b _L2_6d
1012
_L2_4d:
1013
cmpi.b %d1,&QNAN # is operand a QNAN?
1014
bne.b _L2_5d # no
1015
bsr.l src_qnan # yes
1016
bra.b _L2_6d
1017
_L2_5d:
1018
bsr.l ssinhd # operand is a DENORM
1019
_L2_6d:
1020
1021
#
1022
# Result is now in FP0
1023
#
1024
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1025
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1026
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1027
unlk %a6
1028
rts
1029
1030
global _fsinhx_
1031
_fsinhx_:
1032
link %a6,&-LOCAL_SIZE
1033
1034
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1035
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1036
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1037
1038
fmov.l &0x0,%fpcr # zero FPCR
1039
1040
#
1041
# copy, convert, and tag input argument
1042
#
1043
lea FP_SRC(%a6),%a0
1044
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1045
mov.l 0x8+0x4(%a6),0x4(%a0)
1046
mov.l 0x8+0x8(%a6),0x8(%a0)
1047
bsr.l tag # fetch operand type
1048
mov.b %d0,STAG(%a6)
1049
mov.b %d0,%d1
1050
1051
andi.l &0x00ff00ff,USER_FPSR(%a6)
1052
1053
clr.l %d0
1054
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1055
1056
tst.b %d1
1057
bne.b _L2_2x
1058
bsr.l ssinh # operand is a NORM
1059
bra.b _L2_6x
1060
_L2_2x:
1061
cmpi.b %d1,&ZERO # is operand a ZERO?
1062
bne.b _L2_3x # no
1063
bsr.l src_zero # yes
1064
bra.b _L2_6x
1065
_L2_3x:
1066
cmpi.b %d1,&INF # is operand an INF?
1067
bne.b _L2_4x # no
1068
bsr.l src_inf # yes
1069
bra.b _L2_6x
1070
_L2_4x:
1071
cmpi.b %d1,&QNAN # is operand a QNAN?
1072
bne.b _L2_5x # no
1073
bsr.l src_qnan # yes
1074
bra.b _L2_6x
1075
_L2_5x:
1076
bsr.l ssinhd # operand is a DENORM
1077
_L2_6x:
1078
1079
#
1080
# Result is now in FP0
1081
#
1082
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1083
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1084
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1085
unlk %a6
1086
rts
1087
1088
1089
#########################################################################
1090
# MONADIC TEMPLATE #
1091
#########################################################################
1092
global _flognp1s_
1093
_flognp1s_:
1094
link %a6,&-LOCAL_SIZE
1095
1096
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1097
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1098
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1099
1100
fmov.l &0x0,%fpcr # zero FPCR
1101
1102
#
1103
# copy, convert, and tag input argument
1104
#
1105
fmov.s 0x8(%a6),%fp0 # load sgl input
1106
fmov.x %fp0,FP_SRC(%a6)
1107
lea FP_SRC(%a6),%a0
1108
bsr.l tag # fetch operand type
1109
mov.b %d0,STAG(%a6)
1110
mov.b %d0,%d1
1111
1112
andi.l &0x00ff00ff,USER_FPSR(%a6)
1113
1114
clr.l %d0
1115
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1116
1117
tst.b %d1
1118
bne.b _L3_2s
1119
bsr.l slognp1 # operand is a NORM
1120
bra.b _L3_6s
1121
_L3_2s:
1122
cmpi.b %d1,&ZERO # is operand a ZERO?
1123
bne.b _L3_3s # no
1124
bsr.l src_zero # yes
1125
bra.b _L3_6s
1126
_L3_3s:
1127
cmpi.b %d1,&INF # is operand an INF?
1128
bne.b _L3_4s # no
1129
bsr.l sopr_inf # yes
1130
bra.b _L3_6s
1131
_L3_4s:
1132
cmpi.b %d1,&QNAN # is operand a QNAN?
1133
bne.b _L3_5s # no
1134
bsr.l src_qnan # yes
1135
bra.b _L3_6s
1136
_L3_5s:
1137
bsr.l slognp1d # operand is a DENORM
1138
_L3_6s:
1139
1140
#
1141
# Result is now in FP0
1142
#
1143
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1144
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1145
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1146
unlk %a6
1147
rts
1148
1149
global _flognp1d_
1150
_flognp1d_:
1151
link %a6,&-LOCAL_SIZE
1152
1153
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1154
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1155
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1156
1157
fmov.l &0x0,%fpcr # zero FPCR
1158
1159
#
1160
# copy, convert, and tag input argument
1161
#
1162
fmov.d 0x8(%a6),%fp0 # load dbl input
1163
fmov.x %fp0,FP_SRC(%a6)
1164
lea FP_SRC(%a6),%a0
1165
bsr.l tag # fetch operand type
1166
mov.b %d0,STAG(%a6)
1167
mov.b %d0,%d1
1168
1169
andi.l &0x00ff00ff,USER_FPSR(%a6)
1170
1171
clr.l %d0
1172
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1173
1174
mov.b %d1,STAG(%a6)
1175
tst.b %d1
1176
bne.b _L3_2d
1177
bsr.l slognp1 # operand is a NORM
1178
bra.b _L3_6d
1179
_L3_2d:
1180
cmpi.b %d1,&ZERO # is operand a ZERO?
1181
bne.b _L3_3d # no
1182
bsr.l src_zero # yes
1183
bra.b _L3_6d
1184
_L3_3d:
1185
cmpi.b %d1,&INF # is operand an INF?
1186
bne.b _L3_4d # no
1187
bsr.l sopr_inf # yes
1188
bra.b _L3_6d
1189
_L3_4d:
1190
cmpi.b %d1,&QNAN # is operand a QNAN?
1191
bne.b _L3_5d # no
1192
bsr.l src_qnan # yes
1193
bra.b _L3_6d
1194
_L3_5d:
1195
bsr.l slognp1d # operand is a DENORM
1196
_L3_6d:
1197
1198
#
1199
# Result is now in FP0
1200
#
1201
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1202
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1203
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1204
unlk %a6
1205
rts
1206
1207
global _flognp1x_
1208
_flognp1x_:
1209
link %a6,&-LOCAL_SIZE
1210
1211
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1212
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1213
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1214
1215
fmov.l &0x0,%fpcr # zero FPCR
1216
1217
#
1218
# copy, convert, and tag input argument
1219
#
1220
lea FP_SRC(%a6),%a0
1221
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1222
mov.l 0x8+0x4(%a6),0x4(%a0)
1223
mov.l 0x8+0x8(%a6),0x8(%a0)
1224
bsr.l tag # fetch operand type
1225
mov.b %d0,STAG(%a6)
1226
mov.b %d0,%d1
1227
1228
andi.l &0x00ff00ff,USER_FPSR(%a6)
1229
1230
clr.l %d0
1231
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1232
1233
tst.b %d1
1234
bne.b _L3_2x
1235
bsr.l slognp1 # operand is a NORM
1236
bra.b _L3_6x
1237
_L3_2x:
1238
cmpi.b %d1,&ZERO # is operand a ZERO?
1239
bne.b _L3_3x # no
1240
bsr.l src_zero # yes
1241
bra.b _L3_6x
1242
_L3_3x:
1243
cmpi.b %d1,&INF # is operand an INF?
1244
bne.b _L3_4x # no
1245
bsr.l sopr_inf # yes
1246
bra.b _L3_6x
1247
_L3_4x:
1248
cmpi.b %d1,&QNAN # is operand a QNAN?
1249
bne.b _L3_5x # no
1250
bsr.l src_qnan # yes
1251
bra.b _L3_6x
1252
_L3_5x:
1253
bsr.l slognp1d # operand is a DENORM
1254
_L3_6x:
1255
1256
#
1257
# Result is now in FP0
1258
#
1259
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1260
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1261
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1262
unlk %a6
1263
rts
1264
1265
1266
#########################################################################
1267
# MONADIC TEMPLATE #
1268
#########################################################################
1269
global _fetoxm1s_
1270
_fetoxm1s_:
1271
link %a6,&-LOCAL_SIZE
1272
1273
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1274
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1275
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1276
1277
fmov.l &0x0,%fpcr # zero FPCR
1278
1279
#
1280
# copy, convert, and tag input argument
1281
#
1282
fmov.s 0x8(%a6),%fp0 # load sgl input
1283
fmov.x %fp0,FP_SRC(%a6)
1284
lea FP_SRC(%a6),%a0
1285
bsr.l tag # fetch operand type
1286
mov.b %d0,STAG(%a6)
1287
mov.b %d0,%d1
1288
1289
andi.l &0x00ff00ff,USER_FPSR(%a6)
1290
1291
clr.l %d0
1292
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1293
1294
tst.b %d1
1295
bne.b _L4_2s
1296
bsr.l setoxm1 # operand is a NORM
1297
bra.b _L4_6s
1298
_L4_2s:
1299
cmpi.b %d1,&ZERO # is operand a ZERO?
1300
bne.b _L4_3s # no
1301
bsr.l src_zero # yes
1302
bra.b _L4_6s
1303
_L4_3s:
1304
cmpi.b %d1,&INF # is operand an INF?
1305
bne.b _L4_4s # no
1306
bsr.l setoxm1i # yes
1307
bra.b _L4_6s
1308
_L4_4s:
1309
cmpi.b %d1,&QNAN # is operand a QNAN?
1310
bne.b _L4_5s # no
1311
bsr.l src_qnan # yes
1312
bra.b _L4_6s
1313
_L4_5s:
1314
bsr.l setoxm1d # operand is a DENORM
1315
_L4_6s:
1316
1317
#
1318
# Result is now in FP0
1319
#
1320
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1321
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1322
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1323
unlk %a6
1324
rts
1325
1326
global _fetoxm1d_
1327
_fetoxm1d_:
1328
link %a6,&-LOCAL_SIZE
1329
1330
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1331
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1332
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1333
1334
fmov.l &0x0,%fpcr # zero FPCR
1335
1336
#
1337
# copy, convert, and tag input argument
1338
#
1339
fmov.d 0x8(%a6),%fp0 # load dbl input
1340
fmov.x %fp0,FP_SRC(%a6)
1341
lea FP_SRC(%a6),%a0
1342
bsr.l tag # fetch operand type
1343
mov.b %d0,STAG(%a6)
1344
mov.b %d0,%d1
1345
1346
andi.l &0x00ff00ff,USER_FPSR(%a6)
1347
1348
clr.l %d0
1349
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1350
1351
mov.b %d1,STAG(%a6)
1352
tst.b %d1
1353
bne.b _L4_2d
1354
bsr.l setoxm1 # operand is a NORM
1355
bra.b _L4_6d
1356
_L4_2d:
1357
cmpi.b %d1,&ZERO # is operand a ZERO?
1358
bne.b _L4_3d # no
1359
bsr.l src_zero # yes
1360
bra.b _L4_6d
1361
_L4_3d:
1362
cmpi.b %d1,&INF # is operand an INF?
1363
bne.b _L4_4d # no
1364
bsr.l setoxm1i # yes
1365
bra.b _L4_6d
1366
_L4_4d:
1367
cmpi.b %d1,&QNAN # is operand a QNAN?
1368
bne.b _L4_5d # no
1369
bsr.l src_qnan # yes
1370
bra.b _L4_6d
1371
_L4_5d:
1372
bsr.l setoxm1d # operand is a DENORM
1373
_L4_6d:
1374
1375
#
1376
# Result is now in FP0
1377
#
1378
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1379
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1380
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1381
unlk %a6
1382
rts
1383
1384
global _fetoxm1x_
1385
_fetoxm1x_:
1386
link %a6,&-LOCAL_SIZE
1387
1388
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1389
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1390
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1391
1392
fmov.l &0x0,%fpcr # zero FPCR
1393
1394
#
1395
# copy, convert, and tag input argument
1396
#
1397
lea FP_SRC(%a6),%a0
1398
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1399
mov.l 0x8+0x4(%a6),0x4(%a0)
1400
mov.l 0x8+0x8(%a6),0x8(%a0)
1401
bsr.l tag # fetch operand type
1402
mov.b %d0,STAG(%a6)
1403
mov.b %d0,%d1
1404
1405
andi.l &0x00ff00ff,USER_FPSR(%a6)
1406
1407
clr.l %d0
1408
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1409
1410
tst.b %d1
1411
bne.b _L4_2x
1412
bsr.l setoxm1 # operand is a NORM
1413
bra.b _L4_6x
1414
_L4_2x:
1415
cmpi.b %d1,&ZERO # is operand a ZERO?
1416
bne.b _L4_3x # no
1417
bsr.l src_zero # yes
1418
bra.b _L4_6x
1419
_L4_3x:
1420
cmpi.b %d1,&INF # is operand an INF?
1421
bne.b _L4_4x # no
1422
bsr.l setoxm1i # yes
1423
bra.b _L4_6x
1424
_L4_4x:
1425
cmpi.b %d1,&QNAN # is operand a QNAN?
1426
bne.b _L4_5x # no
1427
bsr.l src_qnan # yes
1428
bra.b _L4_6x
1429
_L4_5x:
1430
bsr.l setoxm1d # operand is a DENORM
1431
_L4_6x:
1432
1433
#
1434
# Result is now in FP0
1435
#
1436
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1437
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1438
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1439
unlk %a6
1440
rts
1441
1442
1443
#########################################################################
1444
# MONADIC TEMPLATE #
1445
#########################################################################
1446
global _ftanhs_
1447
_ftanhs_:
1448
link %a6,&-LOCAL_SIZE
1449
1450
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1451
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1452
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1453
1454
fmov.l &0x0,%fpcr # zero FPCR
1455
1456
#
1457
# copy, convert, and tag input argument
1458
#
1459
fmov.s 0x8(%a6),%fp0 # load sgl input
1460
fmov.x %fp0,FP_SRC(%a6)
1461
lea FP_SRC(%a6),%a0
1462
bsr.l tag # fetch operand type
1463
mov.b %d0,STAG(%a6)
1464
mov.b %d0,%d1
1465
1466
andi.l &0x00ff00ff,USER_FPSR(%a6)
1467
1468
clr.l %d0
1469
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1470
1471
tst.b %d1
1472
bne.b _L5_2s
1473
bsr.l stanh # operand is a NORM
1474
bra.b _L5_6s
1475
_L5_2s:
1476
cmpi.b %d1,&ZERO # is operand a ZERO?
1477
bne.b _L5_3s # no
1478
bsr.l src_zero # yes
1479
bra.b _L5_6s
1480
_L5_3s:
1481
cmpi.b %d1,&INF # is operand an INF?
1482
bne.b _L5_4s # no
1483
bsr.l src_one # yes
1484
bra.b _L5_6s
1485
_L5_4s:
1486
cmpi.b %d1,&QNAN # is operand a QNAN?
1487
bne.b _L5_5s # no
1488
bsr.l src_qnan # yes
1489
bra.b _L5_6s
1490
_L5_5s:
1491
bsr.l stanhd # operand is a DENORM
1492
_L5_6s:
1493
1494
#
1495
# Result is now in FP0
1496
#
1497
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1498
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1499
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1500
unlk %a6
1501
rts
1502
1503
global _ftanhd_
1504
_ftanhd_:
1505
link %a6,&-LOCAL_SIZE
1506
1507
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1508
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1509
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1510
1511
fmov.l &0x0,%fpcr # zero FPCR
1512
1513
#
1514
# copy, convert, and tag input argument
1515
#
1516
fmov.d 0x8(%a6),%fp0 # load dbl input
1517
fmov.x %fp0,FP_SRC(%a6)
1518
lea FP_SRC(%a6),%a0
1519
bsr.l tag # fetch operand type
1520
mov.b %d0,STAG(%a6)
1521
mov.b %d0,%d1
1522
1523
andi.l &0x00ff00ff,USER_FPSR(%a6)
1524
1525
clr.l %d0
1526
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1527
1528
mov.b %d1,STAG(%a6)
1529
tst.b %d1
1530
bne.b _L5_2d
1531
bsr.l stanh # operand is a NORM
1532
bra.b _L5_6d
1533
_L5_2d:
1534
cmpi.b %d1,&ZERO # is operand a ZERO?
1535
bne.b _L5_3d # no
1536
bsr.l src_zero # yes
1537
bra.b _L5_6d
1538
_L5_3d:
1539
cmpi.b %d1,&INF # is operand an INF?
1540
bne.b _L5_4d # no
1541
bsr.l src_one # yes
1542
bra.b _L5_6d
1543
_L5_4d:
1544
cmpi.b %d1,&QNAN # is operand a QNAN?
1545
bne.b _L5_5d # no
1546
bsr.l src_qnan # yes
1547
bra.b _L5_6d
1548
_L5_5d:
1549
bsr.l stanhd # operand is a DENORM
1550
_L5_6d:
1551
1552
#
1553
# Result is now in FP0
1554
#
1555
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1556
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1557
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1558
unlk %a6
1559
rts
1560
1561
global _ftanhx_
1562
_ftanhx_:
1563
link %a6,&-LOCAL_SIZE
1564
1565
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1566
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1567
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1568
1569
fmov.l &0x0,%fpcr # zero FPCR
1570
1571
#
1572
# copy, convert, and tag input argument
1573
#
1574
lea FP_SRC(%a6),%a0
1575
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1576
mov.l 0x8+0x4(%a6),0x4(%a0)
1577
mov.l 0x8+0x8(%a6),0x8(%a0)
1578
bsr.l tag # fetch operand type
1579
mov.b %d0,STAG(%a6)
1580
mov.b %d0,%d1
1581
1582
andi.l &0x00ff00ff,USER_FPSR(%a6)
1583
1584
clr.l %d0
1585
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1586
1587
tst.b %d1
1588
bne.b _L5_2x
1589
bsr.l stanh # operand is a NORM
1590
bra.b _L5_6x
1591
_L5_2x:
1592
cmpi.b %d1,&ZERO # is operand a ZERO?
1593
bne.b _L5_3x # no
1594
bsr.l src_zero # yes
1595
bra.b _L5_6x
1596
_L5_3x:
1597
cmpi.b %d1,&INF # is operand an INF?
1598
bne.b _L5_4x # no
1599
bsr.l src_one # yes
1600
bra.b _L5_6x
1601
_L5_4x:
1602
cmpi.b %d1,&QNAN # is operand a QNAN?
1603
bne.b _L5_5x # no
1604
bsr.l src_qnan # yes
1605
bra.b _L5_6x
1606
_L5_5x:
1607
bsr.l stanhd # operand is a DENORM
1608
_L5_6x:
1609
1610
#
1611
# Result is now in FP0
1612
#
1613
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1614
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1615
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1616
unlk %a6
1617
rts
1618
1619
1620
#########################################################################
1621
# MONADIC TEMPLATE #
1622
#########################################################################
1623
global _fatans_
1624
_fatans_:
1625
link %a6,&-LOCAL_SIZE
1626
1627
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1628
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1629
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1630
1631
fmov.l &0x0,%fpcr # zero FPCR
1632
1633
#
1634
# copy, convert, and tag input argument
1635
#
1636
fmov.s 0x8(%a6),%fp0 # load sgl input
1637
fmov.x %fp0,FP_SRC(%a6)
1638
lea FP_SRC(%a6),%a0
1639
bsr.l tag # fetch operand type
1640
mov.b %d0,STAG(%a6)
1641
mov.b %d0,%d1
1642
1643
andi.l &0x00ff00ff,USER_FPSR(%a6)
1644
1645
clr.l %d0
1646
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1647
1648
tst.b %d1
1649
bne.b _L6_2s
1650
bsr.l satan # operand is a NORM
1651
bra.b _L6_6s
1652
_L6_2s:
1653
cmpi.b %d1,&ZERO # is operand a ZERO?
1654
bne.b _L6_3s # no
1655
bsr.l src_zero # yes
1656
bra.b _L6_6s
1657
_L6_3s:
1658
cmpi.b %d1,&INF # is operand an INF?
1659
bne.b _L6_4s # no
1660
bsr.l spi_2 # yes
1661
bra.b _L6_6s
1662
_L6_4s:
1663
cmpi.b %d1,&QNAN # is operand a QNAN?
1664
bne.b _L6_5s # no
1665
bsr.l src_qnan # yes
1666
bra.b _L6_6s
1667
_L6_5s:
1668
bsr.l satand # operand is a DENORM
1669
_L6_6s:
1670
1671
#
1672
# Result is now in FP0
1673
#
1674
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1675
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1676
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1677
unlk %a6
1678
rts
1679
1680
global _fatand_
1681
_fatand_:
1682
link %a6,&-LOCAL_SIZE
1683
1684
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1685
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1686
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1687
1688
fmov.l &0x0,%fpcr # zero FPCR
1689
1690
#
1691
# copy, convert, and tag input argument
1692
#
1693
fmov.d 0x8(%a6),%fp0 # load dbl input
1694
fmov.x %fp0,FP_SRC(%a6)
1695
lea FP_SRC(%a6),%a0
1696
bsr.l tag # fetch operand type
1697
mov.b %d0,STAG(%a6)
1698
mov.b %d0,%d1
1699
1700
andi.l &0x00ff00ff,USER_FPSR(%a6)
1701
1702
clr.l %d0
1703
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1704
1705
mov.b %d1,STAG(%a6)
1706
tst.b %d1
1707
bne.b _L6_2d
1708
bsr.l satan # operand is a NORM
1709
bra.b _L6_6d
1710
_L6_2d:
1711
cmpi.b %d1,&ZERO # is operand a ZERO?
1712
bne.b _L6_3d # no
1713
bsr.l src_zero # yes
1714
bra.b _L6_6d
1715
_L6_3d:
1716
cmpi.b %d1,&INF # is operand an INF?
1717
bne.b _L6_4d # no
1718
bsr.l spi_2 # yes
1719
bra.b _L6_6d
1720
_L6_4d:
1721
cmpi.b %d1,&QNAN # is operand a QNAN?
1722
bne.b _L6_5d # no
1723
bsr.l src_qnan # yes
1724
bra.b _L6_6d
1725
_L6_5d:
1726
bsr.l satand # operand is a DENORM
1727
_L6_6d:
1728
1729
#
1730
# Result is now in FP0
1731
#
1732
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1733
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1734
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1735
unlk %a6
1736
rts
1737
1738
global _fatanx_
1739
_fatanx_:
1740
link %a6,&-LOCAL_SIZE
1741
1742
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1743
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1744
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1745
1746
fmov.l &0x0,%fpcr # zero FPCR
1747
1748
#
1749
# copy, convert, and tag input argument
1750
#
1751
lea FP_SRC(%a6),%a0
1752
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1753
mov.l 0x8+0x4(%a6),0x4(%a0)
1754
mov.l 0x8+0x8(%a6),0x8(%a0)
1755
bsr.l tag # fetch operand type
1756
mov.b %d0,STAG(%a6)
1757
mov.b %d0,%d1
1758
1759
andi.l &0x00ff00ff,USER_FPSR(%a6)
1760
1761
clr.l %d0
1762
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1763
1764
tst.b %d1
1765
bne.b _L6_2x
1766
bsr.l satan # operand is a NORM
1767
bra.b _L6_6x
1768
_L6_2x:
1769
cmpi.b %d1,&ZERO # is operand a ZERO?
1770
bne.b _L6_3x # no
1771
bsr.l src_zero # yes
1772
bra.b _L6_6x
1773
_L6_3x:
1774
cmpi.b %d1,&INF # is operand an INF?
1775
bne.b _L6_4x # no
1776
bsr.l spi_2 # yes
1777
bra.b _L6_6x
1778
_L6_4x:
1779
cmpi.b %d1,&QNAN # is operand a QNAN?
1780
bne.b _L6_5x # no
1781
bsr.l src_qnan # yes
1782
bra.b _L6_6x
1783
_L6_5x:
1784
bsr.l satand # operand is a DENORM
1785
_L6_6x:
1786
1787
#
1788
# Result is now in FP0
1789
#
1790
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1791
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1792
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1793
unlk %a6
1794
rts
1795
1796
1797
#########################################################################
1798
# MONADIC TEMPLATE #
1799
#########################################################################
1800
global _fasins_
1801
_fasins_:
1802
link %a6,&-LOCAL_SIZE
1803
1804
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1805
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1806
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1807
1808
fmov.l &0x0,%fpcr # zero FPCR
1809
1810
#
1811
# copy, convert, and tag input argument
1812
#
1813
fmov.s 0x8(%a6),%fp0 # load sgl input
1814
fmov.x %fp0,FP_SRC(%a6)
1815
lea FP_SRC(%a6),%a0
1816
bsr.l tag # fetch operand type
1817
mov.b %d0,STAG(%a6)
1818
mov.b %d0,%d1
1819
1820
andi.l &0x00ff00ff,USER_FPSR(%a6)
1821
1822
clr.l %d0
1823
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1824
1825
tst.b %d1
1826
bne.b _L7_2s
1827
bsr.l sasin # operand is a NORM
1828
bra.b _L7_6s
1829
_L7_2s:
1830
cmpi.b %d1,&ZERO # is operand a ZERO?
1831
bne.b _L7_3s # no
1832
bsr.l src_zero # yes
1833
bra.b _L7_6s
1834
_L7_3s:
1835
cmpi.b %d1,&INF # is operand an INF?
1836
bne.b _L7_4s # no
1837
bsr.l t_operr # yes
1838
bra.b _L7_6s
1839
_L7_4s:
1840
cmpi.b %d1,&QNAN # is operand a QNAN?
1841
bne.b _L7_5s # no
1842
bsr.l src_qnan # yes
1843
bra.b _L7_6s
1844
_L7_5s:
1845
bsr.l sasind # operand is a DENORM
1846
_L7_6s:
1847
1848
#
1849
# Result is now in FP0
1850
#
1851
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1852
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1853
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1854
unlk %a6
1855
rts
1856
1857
global _fasind_
1858
_fasind_:
1859
link %a6,&-LOCAL_SIZE
1860
1861
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1862
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1863
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1864
1865
fmov.l &0x0,%fpcr # zero FPCR
1866
1867
#
1868
# copy, convert, and tag input argument
1869
#
1870
fmov.d 0x8(%a6),%fp0 # load dbl input
1871
fmov.x %fp0,FP_SRC(%a6)
1872
lea FP_SRC(%a6),%a0
1873
bsr.l tag # fetch operand type
1874
mov.b %d0,STAG(%a6)
1875
mov.b %d0,%d1
1876
1877
andi.l &0x00ff00ff,USER_FPSR(%a6)
1878
1879
clr.l %d0
1880
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1881
1882
mov.b %d1,STAG(%a6)
1883
tst.b %d1
1884
bne.b _L7_2d
1885
bsr.l sasin # operand is a NORM
1886
bra.b _L7_6d
1887
_L7_2d:
1888
cmpi.b %d1,&ZERO # is operand a ZERO?
1889
bne.b _L7_3d # no
1890
bsr.l src_zero # yes
1891
bra.b _L7_6d
1892
_L7_3d:
1893
cmpi.b %d1,&INF # is operand an INF?
1894
bne.b _L7_4d # no
1895
bsr.l t_operr # yes
1896
bra.b _L7_6d
1897
_L7_4d:
1898
cmpi.b %d1,&QNAN # is operand a QNAN?
1899
bne.b _L7_5d # no
1900
bsr.l src_qnan # yes
1901
bra.b _L7_6d
1902
_L7_5d:
1903
bsr.l sasind # operand is a DENORM
1904
_L7_6d:
1905
1906
#
1907
# Result is now in FP0
1908
#
1909
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1910
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1911
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1912
unlk %a6
1913
rts
1914
1915
global _fasinx_
1916
_fasinx_:
1917
link %a6,&-LOCAL_SIZE
1918
1919
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1920
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1921
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1922
1923
fmov.l &0x0,%fpcr # zero FPCR
1924
1925
#
1926
# copy, convert, and tag input argument
1927
#
1928
lea FP_SRC(%a6),%a0
1929
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
1930
mov.l 0x8+0x4(%a6),0x4(%a0)
1931
mov.l 0x8+0x8(%a6),0x8(%a0)
1932
bsr.l tag # fetch operand type
1933
mov.b %d0,STAG(%a6)
1934
mov.b %d0,%d1
1935
1936
andi.l &0x00ff00ff,USER_FPSR(%a6)
1937
1938
clr.l %d0
1939
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
1940
1941
tst.b %d1
1942
bne.b _L7_2x
1943
bsr.l sasin # operand is a NORM
1944
bra.b _L7_6x
1945
_L7_2x:
1946
cmpi.b %d1,&ZERO # is operand a ZERO?
1947
bne.b _L7_3x # no
1948
bsr.l src_zero # yes
1949
bra.b _L7_6x
1950
_L7_3x:
1951
cmpi.b %d1,&INF # is operand an INF?
1952
bne.b _L7_4x # no
1953
bsr.l t_operr # yes
1954
bra.b _L7_6x
1955
_L7_4x:
1956
cmpi.b %d1,&QNAN # is operand a QNAN?
1957
bne.b _L7_5x # no
1958
bsr.l src_qnan # yes
1959
bra.b _L7_6x
1960
_L7_5x:
1961
bsr.l sasind # operand is a DENORM
1962
_L7_6x:
1963
1964
#
1965
# Result is now in FP0
1966
#
1967
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1968
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
1969
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
1970
unlk %a6
1971
rts
1972
1973
1974
#########################################################################
1975
# MONADIC TEMPLATE #
1976
#########################################################################
1977
global _fatanhs_
1978
_fatanhs_:
1979
link %a6,&-LOCAL_SIZE
1980
1981
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1982
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
1983
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
1984
1985
fmov.l &0x0,%fpcr # zero FPCR
1986
1987
#
1988
# copy, convert, and tag input argument
1989
#
1990
fmov.s 0x8(%a6),%fp0 # load sgl input
1991
fmov.x %fp0,FP_SRC(%a6)
1992
lea FP_SRC(%a6),%a0
1993
bsr.l tag # fetch operand type
1994
mov.b %d0,STAG(%a6)
1995
mov.b %d0,%d1
1996
1997
andi.l &0x00ff00ff,USER_FPSR(%a6)
1998
1999
clr.l %d0
2000
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2001
2002
tst.b %d1
2003
bne.b _L8_2s
2004
bsr.l satanh # operand is a NORM
2005
bra.b _L8_6s
2006
_L8_2s:
2007
cmpi.b %d1,&ZERO # is operand a ZERO?
2008
bne.b _L8_3s # no
2009
bsr.l src_zero # yes
2010
bra.b _L8_6s
2011
_L8_3s:
2012
cmpi.b %d1,&INF # is operand an INF?
2013
bne.b _L8_4s # no
2014
bsr.l t_operr # yes
2015
bra.b _L8_6s
2016
_L8_4s:
2017
cmpi.b %d1,&QNAN # is operand a QNAN?
2018
bne.b _L8_5s # no
2019
bsr.l src_qnan # yes
2020
bra.b _L8_6s
2021
_L8_5s:
2022
bsr.l satanhd # operand is a DENORM
2023
_L8_6s:
2024
2025
#
2026
# Result is now in FP0
2027
#
2028
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2029
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2030
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2031
unlk %a6
2032
rts
2033
2034
global _fatanhd_
2035
_fatanhd_:
2036
link %a6,&-LOCAL_SIZE
2037
2038
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2039
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2040
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2041
2042
fmov.l &0x0,%fpcr # zero FPCR
2043
2044
#
2045
# copy, convert, and tag input argument
2046
#
2047
fmov.d 0x8(%a6),%fp0 # load dbl input
2048
fmov.x %fp0,FP_SRC(%a6)
2049
lea FP_SRC(%a6),%a0
2050
bsr.l tag # fetch operand type
2051
mov.b %d0,STAG(%a6)
2052
mov.b %d0,%d1
2053
2054
andi.l &0x00ff00ff,USER_FPSR(%a6)
2055
2056
clr.l %d0
2057
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2058
2059
mov.b %d1,STAG(%a6)
2060
tst.b %d1
2061
bne.b _L8_2d
2062
bsr.l satanh # operand is a NORM
2063
bra.b _L8_6d
2064
_L8_2d:
2065
cmpi.b %d1,&ZERO # is operand a ZERO?
2066
bne.b _L8_3d # no
2067
bsr.l src_zero # yes
2068
bra.b _L8_6d
2069
_L8_3d:
2070
cmpi.b %d1,&INF # is operand an INF?
2071
bne.b _L8_4d # no
2072
bsr.l t_operr # yes
2073
bra.b _L8_6d
2074
_L8_4d:
2075
cmpi.b %d1,&QNAN # is operand a QNAN?
2076
bne.b _L8_5d # no
2077
bsr.l src_qnan # yes
2078
bra.b _L8_6d
2079
_L8_5d:
2080
bsr.l satanhd # operand is a DENORM
2081
_L8_6d:
2082
2083
#
2084
# Result is now in FP0
2085
#
2086
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2087
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2088
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2089
unlk %a6
2090
rts
2091
2092
global _fatanhx_
2093
_fatanhx_:
2094
link %a6,&-LOCAL_SIZE
2095
2096
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2097
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2098
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2099
2100
fmov.l &0x0,%fpcr # zero FPCR
2101
2102
#
2103
# copy, convert, and tag input argument
2104
#
2105
lea FP_SRC(%a6),%a0
2106
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2107
mov.l 0x8+0x4(%a6),0x4(%a0)
2108
mov.l 0x8+0x8(%a6),0x8(%a0)
2109
bsr.l tag # fetch operand type
2110
mov.b %d0,STAG(%a6)
2111
mov.b %d0,%d1
2112
2113
andi.l &0x00ff00ff,USER_FPSR(%a6)
2114
2115
clr.l %d0
2116
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2117
2118
tst.b %d1
2119
bne.b _L8_2x
2120
bsr.l satanh # operand is a NORM
2121
bra.b _L8_6x
2122
_L8_2x:
2123
cmpi.b %d1,&ZERO # is operand a ZERO?
2124
bne.b _L8_3x # no
2125
bsr.l src_zero # yes
2126
bra.b _L8_6x
2127
_L8_3x:
2128
cmpi.b %d1,&INF # is operand an INF?
2129
bne.b _L8_4x # no
2130
bsr.l t_operr # yes
2131
bra.b _L8_6x
2132
_L8_4x:
2133
cmpi.b %d1,&QNAN # is operand a QNAN?
2134
bne.b _L8_5x # no
2135
bsr.l src_qnan # yes
2136
bra.b _L8_6x
2137
_L8_5x:
2138
bsr.l satanhd # operand is a DENORM
2139
_L8_6x:
2140
2141
#
2142
# Result is now in FP0
2143
#
2144
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2145
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2146
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2147
unlk %a6
2148
rts
2149
2150
2151
#########################################################################
2152
# MONADIC TEMPLATE #
2153
#########################################################################
2154
global _ftans_
2155
_ftans_:
2156
link %a6,&-LOCAL_SIZE
2157
2158
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2159
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2160
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2161
2162
fmov.l &0x0,%fpcr # zero FPCR
2163
2164
#
2165
# copy, convert, and tag input argument
2166
#
2167
fmov.s 0x8(%a6),%fp0 # load sgl input
2168
fmov.x %fp0,FP_SRC(%a6)
2169
lea FP_SRC(%a6),%a0
2170
bsr.l tag # fetch operand type
2171
mov.b %d0,STAG(%a6)
2172
mov.b %d0,%d1
2173
2174
andi.l &0x00ff00ff,USER_FPSR(%a6)
2175
2176
clr.l %d0
2177
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2178
2179
tst.b %d1
2180
bne.b _L9_2s
2181
bsr.l stan # operand is a NORM
2182
bra.b _L9_6s
2183
_L9_2s:
2184
cmpi.b %d1,&ZERO # is operand a ZERO?
2185
bne.b _L9_3s # no
2186
bsr.l src_zero # yes
2187
bra.b _L9_6s
2188
_L9_3s:
2189
cmpi.b %d1,&INF # is operand an INF?
2190
bne.b _L9_4s # no
2191
bsr.l t_operr # yes
2192
bra.b _L9_6s
2193
_L9_4s:
2194
cmpi.b %d1,&QNAN # is operand a QNAN?
2195
bne.b _L9_5s # no
2196
bsr.l src_qnan # yes
2197
bra.b _L9_6s
2198
_L9_5s:
2199
bsr.l stand # operand is a DENORM
2200
_L9_6s:
2201
2202
#
2203
# Result is now in FP0
2204
#
2205
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2206
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2207
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2208
unlk %a6
2209
rts
2210
2211
global _ftand_
2212
_ftand_:
2213
link %a6,&-LOCAL_SIZE
2214
2215
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2216
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2217
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2218
2219
fmov.l &0x0,%fpcr # zero FPCR
2220
2221
#
2222
# copy, convert, and tag input argument
2223
#
2224
fmov.d 0x8(%a6),%fp0 # load dbl input
2225
fmov.x %fp0,FP_SRC(%a6)
2226
lea FP_SRC(%a6),%a0
2227
bsr.l tag # fetch operand type
2228
mov.b %d0,STAG(%a6)
2229
mov.b %d0,%d1
2230
2231
andi.l &0x00ff00ff,USER_FPSR(%a6)
2232
2233
clr.l %d0
2234
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2235
2236
mov.b %d1,STAG(%a6)
2237
tst.b %d1
2238
bne.b _L9_2d
2239
bsr.l stan # operand is a NORM
2240
bra.b _L9_6d
2241
_L9_2d:
2242
cmpi.b %d1,&ZERO # is operand a ZERO?
2243
bne.b _L9_3d # no
2244
bsr.l src_zero # yes
2245
bra.b _L9_6d
2246
_L9_3d:
2247
cmpi.b %d1,&INF # is operand an INF?
2248
bne.b _L9_4d # no
2249
bsr.l t_operr # yes
2250
bra.b _L9_6d
2251
_L9_4d:
2252
cmpi.b %d1,&QNAN # is operand a QNAN?
2253
bne.b _L9_5d # no
2254
bsr.l src_qnan # yes
2255
bra.b _L9_6d
2256
_L9_5d:
2257
bsr.l stand # operand is a DENORM
2258
_L9_6d:
2259
2260
#
2261
# Result is now in FP0
2262
#
2263
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2264
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2265
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2266
unlk %a6
2267
rts
2268
2269
global _ftanx_
2270
_ftanx_:
2271
link %a6,&-LOCAL_SIZE
2272
2273
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2274
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2275
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2276
2277
fmov.l &0x0,%fpcr # zero FPCR
2278
2279
#
2280
# copy, convert, and tag input argument
2281
#
2282
lea FP_SRC(%a6),%a0
2283
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2284
mov.l 0x8+0x4(%a6),0x4(%a0)
2285
mov.l 0x8+0x8(%a6),0x8(%a0)
2286
bsr.l tag # fetch operand type
2287
mov.b %d0,STAG(%a6)
2288
mov.b %d0,%d1
2289
2290
andi.l &0x00ff00ff,USER_FPSR(%a6)
2291
2292
clr.l %d0
2293
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2294
2295
tst.b %d1
2296
bne.b _L9_2x
2297
bsr.l stan # operand is a NORM
2298
bra.b _L9_6x
2299
_L9_2x:
2300
cmpi.b %d1,&ZERO # is operand a ZERO?
2301
bne.b _L9_3x # no
2302
bsr.l src_zero # yes
2303
bra.b _L9_6x
2304
_L9_3x:
2305
cmpi.b %d1,&INF # is operand an INF?
2306
bne.b _L9_4x # no
2307
bsr.l t_operr # yes
2308
bra.b _L9_6x
2309
_L9_4x:
2310
cmpi.b %d1,&QNAN # is operand a QNAN?
2311
bne.b _L9_5x # no
2312
bsr.l src_qnan # yes
2313
bra.b _L9_6x
2314
_L9_5x:
2315
bsr.l stand # operand is a DENORM
2316
_L9_6x:
2317
2318
#
2319
# Result is now in FP0
2320
#
2321
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2322
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2323
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2324
unlk %a6
2325
rts
2326
2327
2328
#########################################################################
2329
# MONADIC TEMPLATE #
2330
#########################################################################
2331
global _fetoxs_
2332
_fetoxs_:
2333
link %a6,&-LOCAL_SIZE
2334
2335
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2336
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2337
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2338
2339
fmov.l &0x0,%fpcr # zero FPCR
2340
2341
#
2342
# copy, convert, and tag input argument
2343
#
2344
fmov.s 0x8(%a6),%fp0 # load sgl input
2345
fmov.x %fp0,FP_SRC(%a6)
2346
lea FP_SRC(%a6),%a0
2347
bsr.l tag # fetch operand type
2348
mov.b %d0,STAG(%a6)
2349
mov.b %d0,%d1
2350
2351
andi.l &0x00ff00ff,USER_FPSR(%a6)
2352
2353
clr.l %d0
2354
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2355
2356
tst.b %d1
2357
bne.b _L10_2s
2358
bsr.l setox # operand is a NORM
2359
bra.b _L10_6s
2360
_L10_2s:
2361
cmpi.b %d1,&ZERO # is operand a ZERO?
2362
bne.b _L10_3s # no
2363
bsr.l ld_pone # yes
2364
bra.b _L10_6s
2365
_L10_3s:
2366
cmpi.b %d1,&INF # is operand an INF?
2367
bne.b _L10_4s # no
2368
bsr.l szr_inf # yes
2369
bra.b _L10_6s
2370
_L10_4s:
2371
cmpi.b %d1,&QNAN # is operand a QNAN?
2372
bne.b _L10_5s # no
2373
bsr.l src_qnan # yes
2374
bra.b _L10_6s
2375
_L10_5s:
2376
bsr.l setoxd # operand is a DENORM
2377
_L10_6s:
2378
2379
#
2380
# Result is now in FP0
2381
#
2382
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2383
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2384
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2385
unlk %a6
2386
rts
2387
2388
global _fetoxd_
2389
_fetoxd_:
2390
link %a6,&-LOCAL_SIZE
2391
2392
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2393
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2394
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2395
2396
fmov.l &0x0,%fpcr # zero FPCR
2397
2398
#
2399
# copy, convert, and tag input argument
2400
#
2401
fmov.d 0x8(%a6),%fp0 # load dbl input
2402
fmov.x %fp0,FP_SRC(%a6)
2403
lea FP_SRC(%a6),%a0
2404
bsr.l tag # fetch operand type
2405
mov.b %d0,STAG(%a6)
2406
mov.b %d0,%d1
2407
2408
andi.l &0x00ff00ff,USER_FPSR(%a6)
2409
2410
clr.l %d0
2411
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2412
2413
mov.b %d1,STAG(%a6)
2414
tst.b %d1
2415
bne.b _L10_2d
2416
bsr.l setox # operand is a NORM
2417
bra.b _L10_6d
2418
_L10_2d:
2419
cmpi.b %d1,&ZERO # is operand a ZERO?
2420
bne.b _L10_3d # no
2421
bsr.l ld_pone # yes
2422
bra.b _L10_6d
2423
_L10_3d:
2424
cmpi.b %d1,&INF # is operand an INF?
2425
bne.b _L10_4d # no
2426
bsr.l szr_inf # yes
2427
bra.b _L10_6d
2428
_L10_4d:
2429
cmpi.b %d1,&QNAN # is operand a QNAN?
2430
bne.b _L10_5d # no
2431
bsr.l src_qnan # yes
2432
bra.b _L10_6d
2433
_L10_5d:
2434
bsr.l setoxd # operand is a DENORM
2435
_L10_6d:
2436
2437
#
2438
# Result is now in FP0
2439
#
2440
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2441
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2442
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2443
unlk %a6
2444
rts
2445
2446
global _fetoxx_
2447
_fetoxx_:
2448
link %a6,&-LOCAL_SIZE
2449
2450
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2451
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2452
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2453
2454
fmov.l &0x0,%fpcr # zero FPCR
2455
2456
#
2457
# copy, convert, and tag input argument
2458
#
2459
lea FP_SRC(%a6),%a0
2460
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2461
mov.l 0x8+0x4(%a6),0x4(%a0)
2462
mov.l 0x8+0x8(%a6),0x8(%a0)
2463
bsr.l tag # fetch operand type
2464
mov.b %d0,STAG(%a6)
2465
mov.b %d0,%d1
2466
2467
andi.l &0x00ff00ff,USER_FPSR(%a6)
2468
2469
clr.l %d0
2470
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2471
2472
tst.b %d1
2473
bne.b _L10_2x
2474
bsr.l setox # operand is a NORM
2475
bra.b _L10_6x
2476
_L10_2x:
2477
cmpi.b %d1,&ZERO # is operand a ZERO?
2478
bne.b _L10_3x # no
2479
bsr.l ld_pone # yes
2480
bra.b _L10_6x
2481
_L10_3x:
2482
cmpi.b %d1,&INF # is operand an INF?
2483
bne.b _L10_4x # no
2484
bsr.l szr_inf # yes
2485
bra.b _L10_6x
2486
_L10_4x:
2487
cmpi.b %d1,&QNAN # is operand a QNAN?
2488
bne.b _L10_5x # no
2489
bsr.l src_qnan # yes
2490
bra.b _L10_6x
2491
_L10_5x:
2492
bsr.l setoxd # operand is a DENORM
2493
_L10_6x:
2494
2495
#
2496
# Result is now in FP0
2497
#
2498
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2499
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2500
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2501
unlk %a6
2502
rts
2503
2504
2505
#########################################################################
2506
# MONADIC TEMPLATE #
2507
#########################################################################
2508
global _ftwotoxs_
2509
_ftwotoxs_:
2510
link %a6,&-LOCAL_SIZE
2511
2512
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2513
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2514
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2515
2516
fmov.l &0x0,%fpcr # zero FPCR
2517
2518
#
2519
# copy, convert, and tag input argument
2520
#
2521
fmov.s 0x8(%a6),%fp0 # load sgl input
2522
fmov.x %fp0,FP_SRC(%a6)
2523
lea FP_SRC(%a6),%a0
2524
bsr.l tag # fetch operand type
2525
mov.b %d0,STAG(%a6)
2526
mov.b %d0,%d1
2527
2528
andi.l &0x00ff00ff,USER_FPSR(%a6)
2529
2530
clr.l %d0
2531
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2532
2533
tst.b %d1
2534
bne.b _L11_2s
2535
bsr.l stwotox # operand is a NORM
2536
bra.b _L11_6s
2537
_L11_2s:
2538
cmpi.b %d1,&ZERO # is operand a ZERO?
2539
bne.b _L11_3s # no
2540
bsr.l ld_pone # yes
2541
bra.b _L11_6s
2542
_L11_3s:
2543
cmpi.b %d1,&INF # is operand an INF?
2544
bne.b _L11_4s # no
2545
bsr.l szr_inf # yes
2546
bra.b _L11_6s
2547
_L11_4s:
2548
cmpi.b %d1,&QNAN # is operand a QNAN?
2549
bne.b _L11_5s # no
2550
bsr.l src_qnan # yes
2551
bra.b _L11_6s
2552
_L11_5s:
2553
bsr.l stwotoxd # operand is a DENORM
2554
_L11_6s:
2555
2556
#
2557
# Result is now in FP0
2558
#
2559
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2560
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2561
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2562
unlk %a6
2563
rts
2564
2565
global _ftwotoxd_
2566
_ftwotoxd_:
2567
link %a6,&-LOCAL_SIZE
2568
2569
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2570
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2571
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2572
2573
fmov.l &0x0,%fpcr # zero FPCR
2574
2575
#
2576
# copy, convert, and tag input argument
2577
#
2578
fmov.d 0x8(%a6),%fp0 # load dbl input
2579
fmov.x %fp0,FP_SRC(%a6)
2580
lea FP_SRC(%a6),%a0
2581
bsr.l tag # fetch operand type
2582
mov.b %d0,STAG(%a6)
2583
mov.b %d0,%d1
2584
2585
andi.l &0x00ff00ff,USER_FPSR(%a6)
2586
2587
clr.l %d0
2588
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2589
2590
mov.b %d1,STAG(%a6)
2591
tst.b %d1
2592
bne.b _L11_2d
2593
bsr.l stwotox # operand is a NORM
2594
bra.b _L11_6d
2595
_L11_2d:
2596
cmpi.b %d1,&ZERO # is operand a ZERO?
2597
bne.b _L11_3d # no
2598
bsr.l ld_pone # yes
2599
bra.b _L11_6d
2600
_L11_3d:
2601
cmpi.b %d1,&INF # is operand an INF?
2602
bne.b _L11_4d # no
2603
bsr.l szr_inf # yes
2604
bra.b _L11_6d
2605
_L11_4d:
2606
cmpi.b %d1,&QNAN # is operand a QNAN?
2607
bne.b _L11_5d # no
2608
bsr.l src_qnan # yes
2609
bra.b _L11_6d
2610
_L11_5d:
2611
bsr.l stwotoxd # operand is a DENORM
2612
_L11_6d:
2613
2614
#
2615
# Result is now in FP0
2616
#
2617
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2618
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2619
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2620
unlk %a6
2621
rts
2622
2623
global _ftwotoxx_
2624
_ftwotoxx_:
2625
link %a6,&-LOCAL_SIZE
2626
2627
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2628
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2629
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2630
2631
fmov.l &0x0,%fpcr # zero FPCR
2632
2633
#
2634
# copy, convert, and tag input argument
2635
#
2636
lea FP_SRC(%a6),%a0
2637
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2638
mov.l 0x8+0x4(%a6),0x4(%a0)
2639
mov.l 0x8+0x8(%a6),0x8(%a0)
2640
bsr.l tag # fetch operand type
2641
mov.b %d0,STAG(%a6)
2642
mov.b %d0,%d1
2643
2644
andi.l &0x00ff00ff,USER_FPSR(%a6)
2645
2646
clr.l %d0
2647
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2648
2649
tst.b %d1
2650
bne.b _L11_2x
2651
bsr.l stwotox # operand is a NORM
2652
bra.b _L11_6x
2653
_L11_2x:
2654
cmpi.b %d1,&ZERO # is operand a ZERO?
2655
bne.b _L11_3x # no
2656
bsr.l ld_pone # yes
2657
bra.b _L11_6x
2658
_L11_3x:
2659
cmpi.b %d1,&INF # is operand an INF?
2660
bne.b _L11_4x # no
2661
bsr.l szr_inf # yes
2662
bra.b _L11_6x
2663
_L11_4x:
2664
cmpi.b %d1,&QNAN # is operand a QNAN?
2665
bne.b _L11_5x # no
2666
bsr.l src_qnan # yes
2667
bra.b _L11_6x
2668
_L11_5x:
2669
bsr.l stwotoxd # operand is a DENORM
2670
_L11_6x:
2671
2672
#
2673
# Result is now in FP0
2674
#
2675
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2676
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2677
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2678
unlk %a6
2679
rts
2680
2681
2682
#########################################################################
2683
# MONADIC TEMPLATE #
2684
#########################################################################
2685
global _ftentoxs_
2686
_ftentoxs_:
2687
link %a6,&-LOCAL_SIZE
2688
2689
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2690
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2691
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2692
2693
fmov.l &0x0,%fpcr # zero FPCR
2694
2695
#
2696
# copy, convert, and tag input argument
2697
#
2698
fmov.s 0x8(%a6),%fp0 # load sgl input
2699
fmov.x %fp0,FP_SRC(%a6)
2700
lea FP_SRC(%a6),%a0
2701
bsr.l tag # fetch operand type
2702
mov.b %d0,STAG(%a6)
2703
mov.b %d0,%d1
2704
2705
andi.l &0x00ff00ff,USER_FPSR(%a6)
2706
2707
clr.l %d0
2708
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2709
2710
tst.b %d1
2711
bne.b _L12_2s
2712
bsr.l stentox # operand is a NORM
2713
bra.b _L12_6s
2714
_L12_2s:
2715
cmpi.b %d1,&ZERO # is operand a ZERO?
2716
bne.b _L12_3s # no
2717
bsr.l ld_pone # yes
2718
bra.b _L12_6s
2719
_L12_3s:
2720
cmpi.b %d1,&INF # is operand an INF?
2721
bne.b _L12_4s # no
2722
bsr.l szr_inf # yes
2723
bra.b _L12_6s
2724
_L12_4s:
2725
cmpi.b %d1,&QNAN # is operand a QNAN?
2726
bne.b _L12_5s # no
2727
bsr.l src_qnan # yes
2728
bra.b _L12_6s
2729
_L12_5s:
2730
bsr.l stentoxd # operand is a DENORM
2731
_L12_6s:
2732
2733
#
2734
# Result is now in FP0
2735
#
2736
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2737
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2738
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2739
unlk %a6
2740
rts
2741
2742
global _ftentoxd_
2743
_ftentoxd_:
2744
link %a6,&-LOCAL_SIZE
2745
2746
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2747
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2748
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2749
2750
fmov.l &0x0,%fpcr # zero FPCR
2751
2752
#
2753
# copy, convert, and tag input argument
2754
#
2755
fmov.d 0x8(%a6),%fp0 # load dbl input
2756
fmov.x %fp0,FP_SRC(%a6)
2757
lea FP_SRC(%a6),%a0
2758
bsr.l tag # fetch operand type
2759
mov.b %d0,STAG(%a6)
2760
mov.b %d0,%d1
2761
2762
andi.l &0x00ff00ff,USER_FPSR(%a6)
2763
2764
clr.l %d0
2765
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2766
2767
mov.b %d1,STAG(%a6)
2768
tst.b %d1
2769
bne.b _L12_2d
2770
bsr.l stentox # operand is a NORM
2771
bra.b _L12_6d
2772
_L12_2d:
2773
cmpi.b %d1,&ZERO # is operand a ZERO?
2774
bne.b _L12_3d # no
2775
bsr.l ld_pone # yes
2776
bra.b _L12_6d
2777
_L12_3d:
2778
cmpi.b %d1,&INF # is operand an INF?
2779
bne.b _L12_4d # no
2780
bsr.l szr_inf # yes
2781
bra.b _L12_6d
2782
_L12_4d:
2783
cmpi.b %d1,&QNAN # is operand a QNAN?
2784
bne.b _L12_5d # no
2785
bsr.l src_qnan # yes
2786
bra.b _L12_6d
2787
_L12_5d:
2788
bsr.l stentoxd # operand is a DENORM
2789
_L12_6d:
2790
2791
#
2792
# Result is now in FP0
2793
#
2794
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2795
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2796
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2797
unlk %a6
2798
rts
2799
2800
global _ftentoxx_
2801
_ftentoxx_:
2802
link %a6,&-LOCAL_SIZE
2803
2804
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2805
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2806
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2807
2808
fmov.l &0x0,%fpcr # zero FPCR
2809
2810
#
2811
# copy, convert, and tag input argument
2812
#
2813
lea FP_SRC(%a6),%a0
2814
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2815
mov.l 0x8+0x4(%a6),0x4(%a0)
2816
mov.l 0x8+0x8(%a6),0x8(%a0)
2817
bsr.l tag # fetch operand type
2818
mov.b %d0,STAG(%a6)
2819
mov.b %d0,%d1
2820
2821
andi.l &0x00ff00ff,USER_FPSR(%a6)
2822
2823
clr.l %d0
2824
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2825
2826
tst.b %d1
2827
bne.b _L12_2x
2828
bsr.l stentox # operand is a NORM
2829
bra.b _L12_6x
2830
_L12_2x:
2831
cmpi.b %d1,&ZERO # is operand a ZERO?
2832
bne.b _L12_3x # no
2833
bsr.l ld_pone # yes
2834
bra.b _L12_6x
2835
_L12_3x:
2836
cmpi.b %d1,&INF # is operand an INF?
2837
bne.b _L12_4x # no
2838
bsr.l szr_inf # yes
2839
bra.b _L12_6x
2840
_L12_4x:
2841
cmpi.b %d1,&QNAN # is operand a QNAN?
2842
bne.b _L12_5x # no
2843
bsr.l src_qnan # yes
2844
bra.b _L12_6x
2845
_L12_5x:
2846
bsr.l stentoxd # operand is a DENORM
2847
_L12_6x:
2848
2849
#
2850
# Result is now in FP0
2851
#
2852
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2853
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2854
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2855
unlk %a6
2856
rts
2857
2858
2859
#########################################################################
2860
# MONADIC TEMPLATE #
2861
#########################################################################
2862
global _flogns_
2863
_flogns_:
2864
link %a6,&-LOCAL_SIZE
2865
2866
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2867
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2868
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2869
2870
fmov.l &0x0,%fpcr # zero FPCR
2871
2872
#
2873
# copy, convert, and tag input argument
2874
#
2875
fmov.s 0x8(%a6),%fp0 # load sgl input
2876
fmov.x %fp0,FP_SRC(%a6)
2877
lea FP_SRC(%a6),%a0
2878
bsr.l tag # fetch operand type
2879
mov.b %d0,STAG(%a6)
2880
mov.b %d0,%d1
2881
2882
andi.l &0x00ff00ff,USER_FPSR(%a6)
2883
2884
clr.l %d0
2885
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2886
2887
tst.b %d1
2888
bne.b _L13_2s
2889
bsr.l slogn # operand is a NORM
2890
bra.b _L13_6s
2891
_L13_2s:
2892
cmpi.b %d1,&ZERO # is operand a ZERO?
2893
bne.b _L13_3s # no
2894
bsr.l t_dz2 # yes
2895
bra.b _L13_6s
2896
_L13_3s:
2897
cmpi.b %d1,&INF # is operand an INF?
2898
bne.b _L13_4s # no
2899
bsr.l sopr_inf # yes
2900
bra.b _L13_6s
2901
_L13_4s:
2902
cmpi.b %d1,&QNAN # is operand a QNAN?
2903
bne.b _L13_5s # no
2904
bsr.l src_qnan # yes
2905
bra.b _L13_6s
2906
_L13_5s:
2907
bsr.l slognd # operand is a DENORM
2908
_L13_6s:
2909
2910
#
2911
# Result is now in FP0
2912
#
2913
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2914
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2915
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2916
unlk %a6
2917
rts
2918
2919
global _flognd_
2920
_flognd_:
2921
link %a6,&-LOCAL_SIZE
2922
2923
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2924
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2925
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2926
2927
fmov.l &0x0,%fpcr # zero FPCR
2928
2929
#
2930
# copy, convert, and tag input argument
2931
#
2932
fmov.d 0x8(%a6),%fp0 # load dbl input
2933
fmov.x %fp0,FP_SRC(%a6)
2934
lea FP_SRC(%a6),%a0
2935
bsr.l tag # fetch operand type
2936
mov.b %d0,STAG(%a6)
2937
mov.b %d0,%d1
2938
2939
andi.l &0x00ff00ff,USER_FPSR(%a6)
2940
2941
clr.l %d0
2942
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
2943
2944
mov.b %d1,STAG(%a6)
2945
tst.b %d1
2946
bne.b _L13_2d
2947
bsr.l slogn # operand is a NORM
2948
bra.b _L13_6d
2949
_L13_2d:
2950
cmpi.b %d1,&ZERO # is operand a ZERO?
2951
bne.b _L13_3d # no
2952
bsr.l t_dz2 # yes
2953
bra.b _L13_6d
2954
_L13_3d:
2955
cmpi.b %d1,&INF # is operand an INF?
2956
bne.b _L13_4d # no
2957
bsr.l sopr_inf # yes
2958
bra.b _L13_6d
2959
_L13_4d:
2960
cmpi.b %d1,&QNAN # is operand a QNAN?
2961
bne.b _L13_5d # no
2962
bsr.l src_qnan # yes
2963
bra.b _L13_6d
2964
_L13_5d:
2965
bsr.l slognd # operand is a DENORM
2966
_L13_6d:
2967
2968
#
2969
# Result is now in FP0
2970
#
2971
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2972
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
2973
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
2974
unlk %a6
2975
rts
2976
2977
global _flognx_
2978
_flognx_:
2979
link %a6,&-LOCAL_SIZE
2980
2981
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2982
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
2983
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
2984
2985
fmov.l &0x0,%fpcr # zero FPCR
2986
2987
#
2988
# copy, convert, and tag input argument
2989
#
2990
lea FP_SRC(%a6),%a0
2991
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
2992
mov.l 0x8+0x4(%a6),0x4(%a0)
2993
mov.l 0x8+0x8(%a6),0x8(%a0)
2994
bsr.l tag # fetch operand type
2995
mov.b %d0,STAG(%a6)
2996
mov.b %d0,%d1
2997
2998
andi.l &0x00ff00ff,USER_FPSR(%a6)
2999
3000
clr.l %d0
3001
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3002
3003
tst.b %d1
3004
bne.b _L13_2x
3005
bsr.l slogn # operand is a NORM
3006
bra.b _L13_6x
3007
_L13_2x:
3008
cmpi.b %d1,&ZERO # is operand a ZERO?
3009
bne.b _L13_3x # no
3010
bsr.l t_dz2 # yes
3011
bra.b _L13_6x
3012
_L13_3x:
3013
cmpi.b %d1,&INF # is operand an INF?
3014
bne.b _L13_4x # no
3015
bsr.l sopr_inf # yes
3016
bra.b _L13_6x
3017
_L13_4x:
3018
cmpi.b %d1,&QNAN # is operand a QNAN?
3019
bne.b _L13_5x # no
3020
bsr.l src_qnan # yes
3021
bra.b _L13_6x
3022
_L13_5x:
3023
bsr.l slognd # operand is a DENORM
3024
_L13_6x:
3025
3026
#
3027
# Result is now in FP0
3028
#
3029
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3030
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3031
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3032
unlk %a6
3033
rts
3034
3035
3036
#########################################################################
3037
# MONADIC TEMPLATE #
3038
#########################################################################
3039
global _flog10s_
3040
_flog10s_:
3041
link %a6,&-LOCAL_SIZE
3042
3043
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3044
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3045
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3046
3047
fmov.l &0x0,%fpcr # zero FPCR
3048
3049
#
3050
# copy, convert, and tag input argument
3051
#
3052
fmov.s 0x8(%a6),%fp0 # load sgl input
3053
fmov.x %fp0,FP_SRC(%a6)
3054
lea FP_SRC(%a6),%a0
3055
bsr.l tag # fetch operand type
3056
mov.b %d0,STAG(%a6)
3057
mov.b %d0,%d1
3058
3059
andi.l &0x00ff00ff,USER_FPSR(%a6)
3060
3061
clr.l %d0
3062
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3063
3064
tst.b %d1
3065
bne.b _L14_2s
3066
bsr.l slog10 # operand is a NORM
3067
bra.b _L14_6s
3068
_L14_2s:
3069
cmpi.b %d1,&ZERO # is operand a ZERO?
3070
bne.b _L14_3s # no
3071
bsr.l t_dz2 # yes
3072
bra.b _L14_6s
3073
_L14_3s:
3074
cmpi.b %d1,&INF # is operand an INF?
3075
bne.b _L14_4s # no
3076
bsr.l sopr_inf # yes
3077
bra.b _L14_6s
3078
_L14_4s:
3079
cmpi.b %d1,&QNAN # is operand a QNAN?
3080
bne.b _L14_5s # no
3081
bsr.l src_qnan # yes
3082
bra.b _L14_6s
3083
_L14_5s:
3084
bsr.l slog10d # operand is a DENORM
3085
_L14_6s:
3086
3087
#
3088
# Result is now in FP0
3089
#
3090
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3091
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3092
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3093
unlk %a6
3094
rts
3095
3096
global _flog10d_
3097
_flog10d_:
3098
link %a6,&-LOCAL_SIZE
3099
3100
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3101
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3102
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3103
3104
fmov.l &0x0,%fpcr # zero FPCR
3105
3106
#
3107
# copy, convert, and tag input argument
3108
#
3109
fmov.d 0x8(%a6),%fp0 # load dbl input
3110
fmov.x %fp0,FP_SRC(%a6)
3111
lea FP_SRC(%a6),%a0
3112
bsr.l tag # fetch operand type
3113
mov.b %d0,STAG(%a6)
3114
mov.b %d0,%d1
3115
3116
andi.l &0x00ff00ff,USER_FPSR(%a6)
3117
3118
clr.l %d0
3119
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3120
3121
mov.b %d1,STAG(%a6)
3122
tst.b %d1
3123
bne.b _L14_2d
3124
bsr.l slog10 # operand is a NORM
3125
bra.b _L14_6d
3126
_L14_2d:
3127
cmpi.b %d1,&ZERO # is operand a ZERO?
3128
bne.b _L14_3d # no
3129
bsr.l t_dz2 # yes
3130
bra.b _L14_6d
3131
_L14_3d:
3132
cmpi.b %d1,&INF # is operand an INF?
3133
bne.b _L14_4d # no
3134
bsr.l sopr_inf # yes
3135
bra.b _L14_6d
3136
_L14_4d:
3137
cmpi.b %d1,&QNAN # is operand a QNAN?
3138
bne.b _L14_5d # no
3139
bsr.l src_qnan # yes
3140
bra.b _L14_6d
3141
_L14_5d:
3142
bsr.l slog10d # operand is a DENORM
3143
_L14_6d:
3144
3145
#
3146
# Result is now in FP0
3147
#
3148
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3149
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3150
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3151
unlk %a6
3152
rts
3153
3154
global _flog10x_
3155
_flog10x_:
3156
link %a6,&-LOCAL_SIZE
3157
3158
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3159
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3160
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3161
3162
fmov.l &0x0,%fpcr # zero FPCR
3163
3164
#
3165
# copy, convert, and tag input argument
3166
#
3167
lea FP_SRC(%a6),%a0
3168
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3169
mov.l 0x8+0x4(%a6),0x4(%a0)
3170
mov.l 0x8+0x8(%a6),0x8(%a0)
3171
bsr.l tag # fetch operand type
3172
mov.b %d0,STAG(%a6)
3173
mov.b %d0,%d1
3174
3175
andi.l &0x00ff00ff,USER_FPSR(%a6)
3176
3177
clr.l %d0
3178
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3179
3180
tst.b %d1
3181
bne.b _L14_2x
3182
bsr.l slog10 # operand is a NORM
3183
bra.b _L14_6x
3184
_L14_2x:
3185
cmpi.b %d1,&ZERO # is operand a ZERO?
3186
bne.b _L14_3x # no
3187
bsr.l t_dz2 # yes
3188
bra.b _L14_6x
3189
_L14_3x:
3190
cmpi.b %d1,&INF # is operand an INF?
3191
bne.b _L14_4x # no
3192
bsr.l sopr_inf # yes
3193
bra.b _L14_6x
3194
_L14_4x:
3195
cmpi.b %d1,&QNAN # is operand a QNAN?
3196
bne.b _L14_5x # no
3197
bsr.l src_qnan # yes
3198
bra.b _L14_6x
3199
_L14_5x:
3200
bsr.l slog10d # operand is a DENORM
3201
_L14_6x:
3202
3203
#
3204
# Result is now in FP0
3205
#
3206
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3207
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3208
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3209
unlk %a6
3210
rts
3211
3212
3213
#########################################################################
3214
# MONADIC TEMPLATE #
3215
#########################################################################
3216
global _flog2s_
3217
_flog2s_:
3218
link %a6,&-LOCAL_SIZE
3219
3220
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3221
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3222
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3223
3224
fmov.l &0x0,%fpcr # zero FPCR
3225
3226
#
3227
# copy, convert, and tag input argument
3228
#
3229
fmov.s 0x8(%a6),%fp0 # load sgl input
3230
fmov.x %fp0,FP_SRC(%a6)
3231
lea FP_SRC(%a6),%a0
3232
bsr.l tag # fetch operand type
3233
mov.b %d0,STAG(%a6)
3234
mov.b %d0,%d1
3235
3236
andi.l &0x00ff00ff,USER_FPSR(%a6)
3237
3238
clr.l %d0
3239
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3240
3241
tst.b %d1
3242
bne.b _L15_2s
3243
bsr.l slog2 # operand is a NORM
3244
bra.b _L15_6s
3245
_L15_2s:
3246
cmpi.b %d1,&ZERO # is operand a ZERO?
3247
bne.b _L15_3s # no
3248
bsr.l t_dz2 # yes
3249
bra.b _L15_6s
3250
_L15_3s:
3251
cmpi.b %d1,&INF # is operand an INF?
3252
bne.b _L15_4s # no
3253
bsr.l sopr_inf # yes
3254
bra.b _L15_6s
3255
_L15_4s:
3256
cmpi.b %d1,&QNAN # is operand a QNAN?
3257
bne.b _L15_5s # no
3258
bsr.l src_qnan # yes
3259
bra.b _L15_6s
3260
_L15_5s:
3261
bsr.l slog2d # operand is a DENORM
3262
_L15_6s:
3263
3264
#
3265
# Result is now in FP0
3266
#
3267
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3268
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3269
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3270
unlk %a6
3271
rts
3272
3273
global _flog2d_
3274
_flog2d_:
3275
link %a6,&-LOCAL_SIZE
3276
3277
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3278
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3279
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3280
3281
fmov.l &0x0,%fpcr # zero FPCR
3282
3283
#
3284
# copy, convert, and tag input argument
3285
#
3286
fmov.d 0x8(%a6),%fp0 # load dbl input
3287
fmov.x %fp0,FP_SRC(%a6)
3288
lea FP_SRC(%a6),%a0
3289
bsr.l tag # fetch operand type
3290
mov.b %d0,STAG(%a6)
3291
mov.b %d0,%d1
3292
3293
andi.l &0x00ff00ff,USER_FPSR(%a6)
3294
3295
clr.l %d0
3296
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3297
3298
mov.b %d1,STAG(%a6)
3299
tst.b %d1
3300
bne.b _L15_2d
3301
bsr.l slog2 # operand is a NORM
3302
bra.b _L15_6d
3303
_L15_2d:
3304
cmpi.b %d1,&ZERO # is operand a ZERO?
3305
bne.b _L15_3d # no
3306
bsr.l t_dz2 # yes
3307
bra.b _L15_6d
3308
_L15_3d:
3309
cmpi.b %d1,&INF # is operand an INF?
3310
bne.b _L15_4d # no
3311
bsr.l sopr_inf # yes
3312
bra.b _L15_6d
3313
_L15_4d:
3314
cmpi.b %d1,&QNAN # is operand a QNAN?
3315
bne.b _L15_5d # no
3316
bsr.l src_qnan # yes
3317
bra.b _L15_6d
3318
_L15_5d:
3319
bsr.l slog2d # operand is a DENORM
3320
_L15_6d:
3321
3322
#
3323
# Result is now in FP0
3324
#
3325
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3326
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3327
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3328
unlk %a6
3329
rts
3330
3331
global _flog2x_
3332
_flog2x_:
3333
link %a6,&-LOCAL_SIZE
3334
3335
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3336
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3337
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3338
3339
fmov.l &0x0,%fpcr # zero FPCR
3340
3341
#
3342
# copy, convert, and tag input argument
3343
#
3344
lea FP_SRC(%a6),%a0
3345
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3346
mov.l 0x8+0x4(%a6),0x4(%a0)
3347
mov.l 0x8+0x8(%a6),0x8(%a0)
3348
bsr.l tag # fetch operand type
3349
mov.b %d0,STAG(%a6)
3350
mov.b %d0,%d1
3351
3352
andi.l &0x00ff00ff,USER_FPSR(%a6)
3353
3354
clr.l %d0
3355
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3356
3357
tst.b %d1
3358
bne.b _L15_2x
3359
bsr.l slog2 # operand is a NORM
3360
bra.b _L15_6x
3361
_L15_2x:
3362
cmpi.b %d1,&ZERO # is operand a ZERO?
3363
bne.b _L15_3x # no
3364
bsr.l t_dz2 # yes
3365
bra.b _L15_6x
3366
_L15_3x:
3367
cmpi.b %d1,&INF # is operand an INF?
3368
bne.b _L15_4x # no
3369
bsr.l sopr_inf # yes
3370
bra.b _L15_6x
3371
_L15_4x:
3372
cmpi.b %d1,&QNAN # is operand a QNAN?
3373
bne.b _L15_5x # no
3374
bsr.l src_qnan # yes
3375
bra.b _L15_6x
3376
_L15_5x:
3377
bsr.l slog2d # operand is a DENORM
3378
_L15_6x:
3379
3380
#
3381
# Result is now in FP0
3382
#
3383
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3384
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3385
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3386
unlk %a6
3387
rts
3388
3389
3390
#########################################################################
3391
# MONADIC TEMPLATE #
3392
#########################################################################
3393
global _fcoshs_
3394
_fcoshs_:
3395
link %a6,&-LOCAL_SIZE
3396
3397
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3398
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3399
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3400
3401
fmov.l &0x0,%fpcr # zero FPCR
3402
3403
#
3404
# copy, convert, and tag input argument
3405
#
3406
fmov.s 0x8(%a6),%fp0 # load sgl input
3407
fmov.x %fp0,FP_SRC(%a6)
3408
lea FP_SRC(%a6),%a0
3409
bsr.l tag # fetch operand type
3410
mov.b %d0,STAG(%a6)
3411
mov.b %d0,%d1
3412
3413
andi.l &0x00ff00ff,USER_FPSR(%a6)
3414
3415
clr.l %d0
3416
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3417
3418
tst.b %d1
3419
bne.b _L16_2s
3420
bsr.l scosh # operand is a NORM
3421
bra.b _L16_6s
3422
_L16_2s:
3423
cmpi.b %d1,&ZERO # is operand a ZERO?
3424
bne.b _L16_3s # no
3425
bsr.l ld_pone # yes
3426
bra.b _L16_6s
3427
_L16_3s:
3428
cmpi.b %d1,&INF # is operand an INF?
3429
bne.b _L16_4s # no
3430
bsr.l ld_pinf # yes
3431
bra.b _L16_6s
3432
_L16_4s:
3433
cmpi.b %d1,&QNAN # is operand a QNAN?
3434
bne.b _L16_5s # no
3435
bsr.l src_qnan # yes
3436
bra.b _L16_6s
3437
_L16_5s:
3438
bsr.l scoshd # operand is a DENORM
3439
_L16_6s:
3440
3441
#
3442
# Result is now in FP0
3443
#
3444
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3445
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3446
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3447
unlk %a6
3448
rts
3449
3450
global _fcoshd_
3451
_fcoshd_:
3452
link %a6,&-LOCAL_SIZE
3453
3454
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3455
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3456
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3457
3458
fmov.l &0x0,%fpcr # zero FPCR
3459
3460
#
3461
# copy, convert, and tag input argument
3462
#
3463
fmov.d 0x8(%a6),%fp0 # load dbl input
3464
fmov.x %fp0,FP_SRC(%a6)
3465
lea FP_SRC(%a6),%a0
3466
bsr.l tag # fetch operand type
3467
mov.b %d0,STAG(%a6)
3468
mov.b %d0,%d1
3469
3470
andi.l &0x00ff00ff,USER_FPSR(%a6)
3471
3472
clr.l %d0
3473
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3474
3475
mov.b %d1,STAG(%a6)
3476
tst.b %d1
3477
bne.b _L16_2d
3478
bsr.l scosh # operand is a NORM
3479
bra.b _L16_6d
3480
_L16_2d:
3481
cmpi.b %d1,&ZERO # is operand a ZERO?
3482
bne.b _L16_3d # no
3483
bsr.l ld_pone # yes
3484
bra.b _L16_6d
3485
_L16_3d:
3486
cmpi.b %d1,&INF # is operand an INF?
3487
bne.b _L16_4d # no
3488
bsr.l ld_pinf # yes
3489
bra.b _L16_6d
3490
_L16_4d:
3491
cmpi.b %d1,&QNAN # is operand a QNAN?
3492
bne.b _L16_5d # no
3493
bsr.l src_qnan # yes
3494
bra.b _L16_6d
3495
_L16_5d:
3496
bsr.l scoshd # operand is a DENORM
3497
_L16_6d:
3498
3499
#
3500
# Result is now in FP0
3501
#
3502
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3503
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3504
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3505
unlk %a6
3506
rts
3507
3508
global _fcoshx_
3509
_fcoshx_:
3510
link %a6,&-LOCAL_SIZE
3511
3512
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3513
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3514
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3515
3516
fmov.l &0x0,%fpcr # zero FPCR
3517
3518
#
3519
# copy, convert, and tag input argument
3520
#
3521
lea FP_SRC(%a6),%a0
3522
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3523
mov.l 0x8+0x4(%a6),0x4(%a0)
3524
mov.l 0x8+0x8(%a6),0x8(%a0)
3525
bsr.l tag # fetch operand type
3526
mov.b %d0,STAG(%a6)
3527
mov.b %d0,%d1
3528
3529
andi.l &0x00ff00ff,USER_FPSR(%a6)
3530
3531
clr.l %d0
3532
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3533
3534
tst.b %d1
3535
bne.b _L16_2x
3536
bsr.l scosh # operand is a NORM
3537
bra.b _L16_6x
3538
_L16_2x:
3539
cmpi.b %d1,&ZERO # is operand a ZERO?
3540
bne.b _L16_3x # no
3541
bsr.l ld_pone # yes
3542
bra.b _L16_6x
3543
_L16_3x:
3544
cmpi.b %d1,&INF # is operand an INF?
3545
bne.b _L16_4x # no
3546
bsr.l ld_pinf # yes
3547
bra.b _L16_6x
3548
_L16_4x:
3549
cmpi.b %d1,&QNAN # is operand a QNAN?
3550
bne.b _L16_5x # no
3551
bsr.l src_qnan # yes
3552
bra.b _L16_6x
3553
_L16_5x:
3554
bsr.l scoshd # operand is a DENORM
3555
_L16_6x:
3556
3557
#
3558
# Result is now in FP0
3559
#
3560
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3561
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3562
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3563
unlk %a6
3564
rts
3565
3566
3567
#########################################################################
3568
# MONADIC TEMPLATE #
3569
#########################################################################
3570
global _facoss_
3571
_facoss_:
3572
link %a6,&-LOCAL_SIZE
3573
3574
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3575
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3576
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3577
3578
fmov.l &0x0,%fpcr # zero FPCR
3579
3580
#
3581
# copy, convert, and tag input argument
3582
#
3583
fmov.s 0x8(%a6),%fp0 # load sgl input
3584
fmov.x %fp0,FP_SRC(%a6)
3585
lea FP_SRC(%a6),%a0
3586
bsr.l tag # fetch operand type
3587
mov.b %d0,STAG(%a6)
3588
mov.b %d0,%d1
3589
3590
andi.l &0x00ff00ff,USER_FPSR(%a6)
3591
3592
clr.l %d0
3593
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3594
3595
tst.b %d1
3596
bne.b _L17_2s
3597
bsr.l sacos # operand is a NORM
3598
bra.b _L17_6s
3599
_L17_2s:
3600
cmpi.b %d1,&ZERO # is operand a ZERO?
3601
bne.b _L17_3s # no
3602
bsr.l ld_ppi2 # yes
3603
bra.b _L17_6s
3604
_L17_3s:
3605
cmpi.b %d1,&INF # is operand an INF?
3606
bne.b _L17_4s # no
3607
bsr.l t_operr # yes
3608
bra.b _L17_6s
3609
_L17_4s:
3610
cmpi.b %d1,&QNAN # is operand a QNAN?
3611
bne.b _L17_5s # no
3612
bsr.l src_qnan # yes
3613
bra.b _L17_6s
3614
_L17_5s:
3615
bsr.l sacosd # operand is a DENORM
3616
_L17_6s:
3617
3618
#
3619
# Result is now in FP0
3620
#
3621
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3622
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3623
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3624
unlk %a6
3625
rts
3626
3627
global _facosd_
3628
_facosd_:
3629
link %a6,&-LOCAL_SIZE
3630
3631
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3632
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3633
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3634
3635
fmov.l &0x0,%fpcr # zero FPCR
3636
3637
#
3638
# copy, convert, and tag input argument
3639
#
3640
fmov.d 0x8(%a6),%fp0 # load dbl input
3641
fmov.x %fp0,FP_SRC(%a6)
3642
lea FP_SRC(%a6),%a0
3643
bsr.l tag # fetch operand type
3644
mov.b %d0,STAG(%a6)
3645
mov.b %d0,%d1
3646
3647
andi.l &0x00ff00ff,USER_FPSR(%a6)
3648
3649
clr.l %d0
3650
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3651
3652
mov.b %d1,STAG(%a6)
3653
tst.b %d1
3654
bne.b _L17_2d
3655
bsr.l sacos # operand is a NORM
3656
bra.b _L17_6d
3657
_L17_2d:
3658
cmpi.b %d1,&ZERO # is operand a ZERO?
3659
bne.b _L17_3d # no
3660
bsr.l ld_ppi2 # yes
3661
bra.b _L17_6d
3662
_L17_3d:
3663
cmpi.b %d1,&INF # is operand an INF?
3664
bne.b _L17_4d # no
3665
bsr.l t_operr # yes
3666
bra.b _L17_6d
3667
_L17_4d:
3668
cmpi.b %d1,&QNAN # is operand a QNAN?
3669
bne.b _L17_5d # no
3670
bsr.l src_qnan # yes
3671
bra.b _L17_6d
3672
_L17_5d:
3673
bsr.l sacosd # operand is a DENORM
3674
_L17_6d:
3675
3676
#
3677
# Result is now in FP0
3678
#
3679
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3680
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3681
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3682
unlk %a6
3683
rts
3684
3685
global _facosx_
3686
_facosx_:
3687
link %a6,&-LOCAL_SIZE
3688
3689
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3690
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3691
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3692
3693
fmov.l &0x0,%fpcr # zero FPCR
3694
3695
#
3696
# copy, convert, and tag input argument
3697
#
3698
lea FP_SRC(%a6),%a0
3699
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3700
mov.l 0x8+0x4(%a6),0x4(%a0)
3701
mov.l 0x8+0x8(%a6),0x8(%a0)
3702
bsr.l tag # fetch operand type
3703
mov.b %d0,STAG(%a6)
3704
mov.b %d0,%d1
3705
3706
andi.l &0x00ff00ff,USER_FPSR(%a6)
3707
3708
clr.l %d0
3709
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3710
3711
tst.b %d1
3712
bne.b _L17_2x
3713
bsr.l sacos # operand is a NORM
3714
bra.b _L17_6x
3715
_L17_2x:
3716
cmpi.b %d1,&ZERO # is operand a ZERO?
3717
bne.b _L17_3x # no
3718
bsr.l ld_ppi2 # yes
3719
bra.b _L17_6x
3720
_L17_3x:
3721
cmpi.b %d1,&INF # is operand an INF?
3722
bne.b _L17_4x # no
3723
bsr.l t_operr # yes
3724
bra.b _L17_6x
3725
_L17_4x:
3726
cmpi.b %d1,&QNAN # is operand a QNAN?
3727
bne.b _L17_5x # no
3728
bsr.l src_qnan # yes
3729
bra.b _L17_6x
3730
_L17_5x:
3731
bsr.l sacosd # operand is a DENORM
3732
_L17_6x:
3733
3734
#
3735
# Result is now in FP0
3736
#
3737
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3738
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3739
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3740
unlk %a6
3741
rts
3742
3743
3744
#########################################################################
3745
# MONADIC TEMPLATE #
3746
#########################################################################
3747
global _fgetexps_
3748
_fgetexps_:
3749
link %a6,&-LOCAL_SIZE
3750
3751
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3752
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3753
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3754
3755
fmov.l &0x0,%fpcr # zero FPCR
3756
3757
#
3758
# copy, convert, and tag input argument
3759
#
3760
fmov.s 0x8(%a6),%fp0 # load sgl input
3761
fmov.x %fp0,FP_SRC(%a6)
3762
lea FP_SRC(%a6),%a0
3763
bsr.l tag # fetch operand type
3764
mov.b %d0,STAG(%a6)
3765
mov.b %d0,%d1
3766
3767
andi.l &0x00ff00ff,USER_FPSR(%a6)
3768
3769
clr.l %d0
3770
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3771
3772
tst.b %d1
3773
bne.b _L18_2s
3774
bsr.l sgetexp # operand is a NORM
3775
bra.b _L18_6s
3776
_L18_2s:
3777
cmpi.b %d1,&ZERO # is operand a ZERO?
3778
bne.b _L18_3s # no
3779
bsr.l src_zero # yes
3780
bra.b _L18_6s
3781
_L18_3s:
3782
cmpi.b %d1,&INF # is operand an INF?
3783
bne.b _L18_4s # no
3784
bsr.l t_operr # yes
3785
bra.b _L18_6s
3786
_L18_4s:
3787
cmpi.b %d1,&QNAN # is operand a QNAN?
3788
bne.b _L18_5s # no
3789
bsr.l src_qnan # yes
3790
bra.b _L18_6s
3791
_L18_5s:
3792
bsr.l sgetexpd # operand is a DENORM
3793
_L18_6s:
3794
3795
#
3796
# Result is now in FP0
3797
#
3798
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3799
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3800
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3801
unlk %a6
3802
rts
3803
3804
global _fgetexpd_
3805
_fgetexpd_:
3806
link %a6,&-LOCAL_SIZE
3807
3808
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3809
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3810
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3811
3812
fmov.l &0x0,%fpcr # zero FPCR
3813
3814
#
3815
# copy, convert, and tag input argument
3816
#
3817
fmov.d 0x8(%a6),%fp0 # load dbl input
3818
fmov.x %fp0,FP_SRC(%a6)
3819
lea FP_SRC(%a6),%a0
3820
bsr.l tag # fetch operand type
3821
mov.b %d0,STAG(%a6)
3822
mov.b %d0,%d1
3823
3824
andi.l &0x00ff00ff,USER_FPSR(%a6)
3825
3826
clr.l %d0
3827
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3828
3829
mov.b %d1,STAG(%a6)
3830
tst.b %d1
3831
bne.b _L18_2d
3832
bsr.l sgetexp # operand is a NORM
3833
bra.b _L18_6d
3834
_L18_2d:
3835
cmpi.b %d1,&ZERO # is operand a ZERO?
3836
bne.b _L18_3d # no
3837
bsr.l src_zero # yes
3838
bra.b _L18_6d
3839
_L18_3d:
3840
cmpi.b %d1,&INF # is operand an INF?
3841
bne.b _L18_4d # no
3842
bsr.l t_operr # yes
3843
bra.b _L18_6d
3844
_L18_4d:
3845
cmpi.b %d1,&QNAN # is operand a QNAN?
3846
bne.b _L18_5d # no
3847
bsr.l src_qnan # yes
3848
bra.b _L18_6d
3849
_L18_5d:
3850
bsr.l sgetexpd # operand is a DENORM
3851
_L18_6d:
3852
3853
#
3854
# Result is now in FP0
3855
#
3856
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3857
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3858
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3859
unlk %a6
3860
rts
3861
3862
global _fgetexpx_
3863
_fgetexpx_:
3864
link %a6,&-LOCAL_SIZE
3865
3866
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3867
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3868
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3869
3870
fmov.l &0x0,%fpcr # zero FPCR
3871
3872
#
3873
# copy, convert, and tag input argument
3874
#
3875
lea FP_SRC(%a6),%a0
3876
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
3877
mov.l 0x8+0x4(%a6),0x4(%a0)
3878
mov.l 0x8+0x8(%a6),0x8(%a0)
3879
bsr.l tag # fetch operand type
3880
mov.b %d0,STAG(%a6)
3881
mov.b %d0,%d1
3882
3883
andi.l &0x00ff00ff,USER_FPSR(%a6)
3884
3885
clr.l %d0
3886
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3887
3888
tst.b %d1
3889
bne.b _L18_2x
3890
bsr.l sgetexp # operand is a NORM
3891
bra.b _L18_6x
3892
_L18_2x:
3893
cmpi.b %d1,&ZERO # is operand a ZERO?
3894
bne.b _L18_3x # no
3895
bsr.l src_zero # yes
3896
bra.b _L18_6x
3897
_L18_3x:
3898
cmpi.b %d1,&INF # is operand an INF?
3899
bne.b _L18_4x # no
3900
bsr.l t_operr # yes
3901
bra.b _L18_6x
3902
_L18_4x:
3903
cmpi.b %d1,&QNAN # is operand a QNAN?
3904
bne.b _L18_5x # no
3905
bsr.l src_qnan # yes
3906
bra.b _L18_6x
3907
_L18_5x:
3908
bsr.l sgetexpd # operand is a DENORM
3909
_L18_6x:
3910
3911
#
3912
# Result is now in FP0
3913
#
3914
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3915
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3916
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3917
unlk %a6
3918
rts
3919
3920
3921
#########################################################################
3922
# MONADIC TEMPLATE #
3923
#########################################################################
3924
global _fgetmans_
3925
_fgetmans_:
3926
link %a6,&-LOCAL_SIZE
3927
3928
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3929
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3930
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3931
3932
fmov.l &0x0,%fpcr # zero FPCR
3933
3934
#
3935
# copy, convert, and tag input argument
3936
#
3937
fmov.s 0x8(%a6),%fp0 # load sgl input
3938
fmov.x %fp0,FP_SRC(%a6)
3939
lea FP_SRC(%a6),%a0
3940
bsr.l tag # fetch operand type
3941
mov.b %d0,STAG(%a6)
3942
mov.b %d0,%d1
3943
3944
andi.l &0x00ff00ff,USER_FPSR(%a6)
3945
3946
clr.l %d0
3947
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
3948
3949
tst.b %d1
3950
bne.b _L19_2s
3951
bsr.l sgetman # operand is a NORM
3952
bra.b _L19_6s
3953
_L19_2s:
3954
cmpi.b %d1,&ZERO # is operand a ZERO?
3955
bne.b _L19_3s # no
3956
bsr.l src_zero # yes
3957
bra.b _L19_6s
3958
_L19_3s:
3959
cmpi.b %d1,&INF # is operand an INF?
3960
bne.b _L19_4s # no
3961
bsr.l t_operr # yes
3962
bra.b _L19_6s
3963
_L19_4s:
3964
cmpi.b %d1,&QNAN # is operand a QNAN?
3965
bne.b _L19_5s # no
3966
bsr.l src_qnan # yes
3967
bra.b _L19_6s
3968
_L19_5s:
3969
bsr.l sgetmand # operand is a DENORM
3970
_L19_6s:
3971
3972
#
3973
# Result is now in FP0
3974
#
3975
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3976
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
3977
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
3978
unlk %a6
3979
rts
3980
3981
global _fgetmand_
3982
_fgetmand_:
3983
link %a6,&-LOCAL_SIZE
3984
3985
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3986
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
3987
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
3988
3989
fmov.l &0x0,%fpcr # zero FPCR
3990
3991
#
3992
# copy, convert, and tag input argument
3993
#
3994
fmov.d 0x8(%a6),%fp0 # load dbl input
3995
fmov.x %fp0,FP_SRC(%a6)
3996
lea FP_SRC(%a6),%a0
3997
bsr.l tag # fetch operand type
3998
mov.b %d0,STAG(%a6)
3999
mov.b %d0,%d1
4000
4001
andi.l &0x00ff00ff,USER_FPSR(%a6)
4002
4003
clr.l %d0
4004
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4005
4006
mov.b %d1,STAG(%a6)
4007
tst.b %d1
4008
bne.b _L19_2d
4009
bsr.l sgetman # operand is a NORM
4010
bra.b _L19_6d
4011
_L19_2d:
4012
cmpi.b %d1,&ZERO # is operand a ZERO?
4013
bne.b _L19_3d # no
4014
bsr.l src_zero # yes
4015
bra.b _L19_6d
4016
_L19_3d:
4017
cmpi.b %d1,&INF # is operand an INF?
4018
bne.b _L19_4d # no
4019
bsr.l t_operr # yes
4020
bra.b _L19_6d
4021
_L19_4d:
4022
cmpi.b %d1,&QNAN # is operand a QNAN?
4023
bne.b _L19_5d # no
4024
bsr.l src_qnan # yes
4025
bra.b _L19_6d
4026
_L19_5d:
4027
bsr.l sgetmand # operand is a DENORM
4028
_L19_6d:
4029
4030
#
4031
# Result is now in FP0
4032
#
4033
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4034
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4035
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4036
unlk %a6
4037
rts
4038
4039
global _fgetmanx_
4040
_fgetmanx_:
4041
link %a6,&-LOCAL_SIZE
4042
4043
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4044
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4045
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4046
4047
fmov.l &0x0,%fpcr # zero FPCR
4048
4049
#
4050
# copy, convert, and tag input argument
4051
#
4052
lea FP_SRC(%a6),%a0
4053
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4054
mov.l 0x8+0x4(%a6),0x4(%a0)
4055
mov.l 0x8+0x8(%a6),0x8(%a0)
4056
bsr.l tag # fetch operand type
4057
mov.b %d0,STAG(%a6)
4058
mov.b %d0,%d1
4059
4060
andi.l &0x00ff00ff,USER_FPSR(%a6)
4061
4062
clr.l %d0
4063
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4064
4065
tst.b %d1
4066
bne.b _L19_2x
4067
bsr.l sgetman # operand is a NORM
4068
bra.b _L19_6x
4069
_L19_2x:
4070
cmpi.b %d1,&ZERO # is operand a ZERO?
4071
bne.b _L19_3x # no
4072
bsr.l src_zero # yes
4073
bra.b _L19_6x
4074
_L19_3x:
4075
cmpi.b %d1,&INF # is operand an INF?
4076
bne.b _L19_4x # no
4077
bsr.l t_operr # yes
4078
bra.b _L19_6x
4079
_L19_4x:
4080
cmpi.b %d1,&QNAN # is operand a QNAN?
4081
bne.b _L19_5x # no
4082
bsr.l src_qnan # yes
4083
bra.b _L19_6x
4084
_L19_5x:
4085
bsr.l sgetmand # operand is a DENORM
4086
_L19_6x:
4087
4088
#
4089
# Result is now in FP0
4090
#
4091
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4092
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4093
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4094
unlk %a6
4095
rts
4096
4097
4098
#########################################################################
4099
# MONADIC TEMPLATE #
4100
#########################################################################
4101
global _fsincoss_
4102
_fsincoss_:
4103
link %a6,&-LOCAL_SIZE
4104
4105
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4106
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4107
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4108
4109
fmov.l &0x0,%fpcr # zero FPCR
4110
4111
#
4112
# copy, convert, and tag input argument
4113
#
4114
fmov.s 0x8(%a6),%fp0 # load sgl input
4115
fmov.x %fp0,FP_SRC(%a6)
4116
lea FP_SRC(%a6),%a0
4117
bsr.l tag # fetch operand type
4118
mov.b %d0,STAG(%a6)
4119
mov.b %d0,%d1
4120
4121
andi.l &0x00ff00ff,USER_FPSR(%a6)
4122
4123
clr.l %d0
4124
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4125
4126
tst.b %d1
4127
bne.b _L20_2s
4128
bsr.l ssincos # operand is a NORM
4129
bra.b _L20_6s
4130
_L20_2s:
4131
cmpi.b %d1,&ZERO # is operand a ZERO?
4132
bne.b _L20_3s # no
4133
bsr.l ssincosz # yes
4134
bra.b _L20_6s
4135
_L20_3s:
4136
cmpi.b %d1,&INF # is operand an INF?
4137
bne.b _L20_4s # no
4138
bsr.l ssincosi # yes
4139
bra.b _L20_6s
4140
_L20_4s:
4141
cmpi.b %d1,&QNAN # is operand a QNAN?
4142
bne.b _L20_5s # no
4143
bsr.l ssincosqnan # yes
4144
bra.b _L20_6s
4145
_L20_5s:
4146
bsr.l ssincosd # operand is a DENORM
4147
_L20_6s:
4148
4149
#
4150
# Result is now in FP0
4151
#
4152
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4153
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4154
fmovm.x &0x03,-(%sp) # store off fp0/fp1
4155
fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4156
fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4157
unlk %a6
4158
rts
4159
4160
global _fsincosd_
4161
_fsincosd_:
4162
link %a6,&-LOCAL_SIZE
4163
4164
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4165
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4166
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4167
4168
fmov.l &0x0,%fpcr # zero FPCR
4169
4170
#
4171
# copy, convert, and tag input argument
4172
#
4173
fmov.d 0x8(%a6),%fp0 # load dbl input
4174
fmov.x %fp0,FP_SRC(%a6)
4175
lea FP_SRC(%a6),%a0
4176
bsr.l tag # fetch operand type
4177
mov.b %d0,STAG(%a6)
4178
mov.b %d0,%d1
4179
4180
andi.l &0x00ff00ff,USER_FPSR(%a6)
4181
4182
clr.l %d0
4183
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4184
4185
mov.b %d1,STAG(%a6)
4186
tst.b %d1
4187
bne.b _L20_2d
4188
bsr.l ssincos # operand is a NORM
4189
bra.b _L20_6d
4190
_L20_2d:
4191
cmpi.b %d1,&ZERO # is operand a ZERO?
4192
bne.b _L20_3d # no
4193
bsr.l ssincosz # yes
4194
bra.b _L20_6d
4195
_L20_3d:
4196
cmpi.b %d1,&INF # is operand an INF?
4197
bne.b _L20_4d # no
4198
bsr.l ssincosi # yes
4199
bra.b _L20_6d
4200
_L20_4d:
4201
cmpi.b %d1,&QNAN # is operand a QNAN?
4202
bne.b _L20_5d # no
4203
bsr.l ssincosqnan # yes
4204
bra.b _L20_6d
4205
_L20_5d:
4206
bsr.l ssincosd # operand is a DENORM
4207
_L20_6d:
4208
4209
#
4210
# Result is now in FP0
4211
#
4212
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4213
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4214
fmovm.x &0x03,-(%sp) # store off fp0/fp1
4215
fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4216
fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4217
unlk %a6
4218
rts
4219
4220
global _fsincosx_
4221
_fsincosx_:
4222
link %a6,&-LOCAL_SIZE
4223
4224
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4225
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4226
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4227
4228
fmov.l &0x0,%fpcr # zero FPCR
4229
4230
#
4231
# copy, convert, and tag input argument
4232
#
4233
lea FP_SRC(%a6),%a0
4234
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input
4235
mov.l 0x8+0x4(%a6),0x4(%a0)
4236
mov.l 0x8+0x8(%a6),0x8(%a0)
4237
bsr.l tag # fetch operand type
4238
mov.b %d0,STAG(%a6)
4239
mov.b %d0,%d1
4240
4241
andi.l &0x00ff00ff,USER_FPSR(%a6)
4242
4243
clr.l %d0
4244
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4245
4246
tst.b %d1
4247
bne.b _L20_2x
4248
bsr.l ssincos # operand is a NORM
4249
bra.b _L20_6x
4250
_L20_2x:
4251
cmpi.b %d1,&ZERO # is operand a ZERO?
4252
bne.b _L20_3x # no
4253
bsr.l ssincosz # yes
4254
bra.b _L20_6x
4255
_L20_3x:
4256
cmpi.b %d1,&INF # is operand an INF?
4257
bne.b _L20_4x # no
4258
bsr.l ssincosi # yes
4259
bra.b _L20_6x
4260
_L20_4x:
4261
cmpi.b %d1,&QNAN # is operand a QNAN?
4262
bne.b _L20_5x # no
4263
bsr.l ssincosqnan # yes
4264
bra.b _L20_6x
4265
_L20_5x:
4266
bsr.l ssincosd # operand is a DENORM
4267
_L20_6x:
4268
4269
#
4270
# Result is now in FP0
4271
#
4272
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4273
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4274
fmovm.x &0x03,-(%sp) # store off fp0/fp1
4275
fmovm.x (%sp)+,&0x40 # fp0 now in fp1
4276
fmovm.x (%sp)+,&0x80 # fp1 now in fp0
4277
unlk %a6
4278
rts
4279
4280
4281
#########################################################################
4282
# DYADIC TEMPLATE #
4283
#########################################################################
4284
global _frems_
4285
_frems_:
4286
link %a6,&-LOCAL_SIZE
4287
4288
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4289
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4290
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4291
4292
fmov.l &0x0,%fpcr # zero FPCR
4293
4294
#
4295
# copy, convert, and tag input argument
4296
#
4297
fmov.s 0x8(%a6),%fp0 # load sgl dst
4298
fmov.x %fp0,FP_DST(%a6)
4299
lea FP_DST(%a6),%a0
4300
bsr.l tag # fetch operand type
4301
mov.b %d0,DTAG(%a6)
4302
4303
fmov.s 0xc(%a6),%fp0 # load sgl src
4304
fmov.x %fp0,FP_SRC(%a6)
4305
lea FP_SRC(%a6),%a0
4306
bsr.l tag # fetch operand type
4307
mov.b %d0,STAG(%a6)
4308
mov.l %d0,%d1
4309
4310
andi.l &0x00ff00ff,USER_FPSR(%a6)
4311
4312
clr.l %d0
4313
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4314
4315
lea FP_SRC(%a6),%a0 # pass ptr to src
4316
lea FP_DST(%a6),%a1 # pass ptr to dst
4317
4318
tst.b %d1
4319
bne.b _L21_2s
4320
bsr.l srem_snorm # operand is a NORM
4321
bra.b _L21_6s
4322
_L21_2s:
4323
cmpi.b %d1,&ZERO # is operand a ZERO?
4324
bne.b _L21_3s # no
4325
bsr.l srem_szero # yes
4326
bra.b _L21_6s
4327
_L21_3s:
4328
cmpi.b %d1,&INF # is operand an INF?
4329
bne.b _L21_4s # no
4330
bsr.l srem_sinf # yes
4331
bra.b _L21_6s
4332
_L21_4s:
4333
cmpi.b %d1,&QNAN # is operand a QNAN?
4334
bne.b _L21_5s # no
4335
bsr.l sop_sqnan # yes
4336
bra.b _L21_6s
4337
_L21_5s:
4338
bsr.l srem_sdnrm # operand is a DENORM
4339
_L21_6s:
4340
4341
#
4342
# Result is now in FP0
4343
#
4344
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4345
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4346
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4347
unlk %a6
4348
rts
4349
4350
global _fremd_
4351
_fremd_:
4352
link %a6,&-LOCAL_SIZE
4353
4354
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4355
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4356
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4357
4358
fmov.l &0x0,%fpcr # zero FPCR
4359
4360
#
4361
# copy, convert, and tag input argument
4362
#
4363
fmov.d 0x8(%a6),%fp0 # load dbl dst
4364
fmov.x %fp0,FP_DST(%a6)
4365
lea FP_DST(%a6),%a0
4366
bsr.l tag # fetch operand type
4367
mov.b %d0,DTAG(%a6)
4368
4369
fmov.d 0x10(%a6),%fp0 # load dbl src
4370
fmov.x %fp0,FP_SRC(%a6)
4371
lea FP_SRC(%a6),%a0
4372
bsr.l tag # fetch operand type
4373
mov.b %d0,STAG(%a6)
4374
mov.l %d0,%d1
4375
4376
andi.l &0x00ff00ff,USER_FPSR(%a6)
4377
4378
clr.l %d0
4379
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4380
4381
lea FP_SRC(%a6),%a0 # pass ptr to src
4382
lea FP_DST(%a6),%a1 # pass ptr to dst
4383
4384
tst.b %d1
4385
bne.b _L21_2d
4386
bsr.l srem_snorm # operand is a NORM
4387
bra.b _L21_6d
4388
_L21_2d:
4389
cmpi.b %d1,&ZERO # is operand a ZERO?
4390
bne.b _L21_3d # no
4391
bsr.l srem_szero # yes
4392
bra.b _L21_6d
4393
_L21_3d:
4394
cmpi.b %d1,&INF # is operand an INF?
4395
bne.b _L21_4d # no
4396
bsr.l srem_sinf # yes
4397
bra.b _L21_6d
4398
_L21_4d:
4399
cmpi.b %d1,&QNAN # is operand a QNAN?
4400
bne.b _L21_5d # no
4401
bsr.l sop_sqnan # yes
4402
bra.b _L21_6d
4403
_L21_5d:
4404
bsr.l srem_sdnrm # operand is a DENORM
4405
_L21_6d:
4406
4407
#
4408
# Result is now in FP0
4409
#
4410
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4411
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4412
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4413
unlk %a6
4414
rts
4415
4416
global _fremx_
4417
_fremx_:
4418
link %a6,&-LOCAL_SIZE
4419
4420
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4421
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4422
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4423
4424
fmov.l &0x0,%fpcr # zero FPCR
4425
4426
#
4427
# copy, convert, and tag input argument
4428
#
4429
lea FP_DST(%a6),%a0
4430
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4431
mov.l 0x8+0x4(%a6),0x4(%a0)
4432
mov.l 0x8+0x8(%a6),0x8(%a0)
4433
bsr.l tag # fetch operand type
4434
mov.b %d0,DTAG(%a6)
4435
4436
lea FP_SRC(%a6),%a0
4437
mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4438
mov.l 0x14+0x4(%a6),0x4(%a0)
4439
mov.l 0x14+0x8(%a6),0x8(%a0)
4440
bsr.l tag # fetch operand type
4441
mov.b %d0,STAG(%a6)
4442
mov.l %d0,%d1
4443
4444
andi.l &0x00ff00ff,USER_FPSR(%a6)
4445
4446
clr.l %d0
4447
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4448
4449
lea FP_SRC(%a6),%a0 # pass ptr to src
4450
lea FP_DST(%a6),%a1 # pass ptr to dst
4451
4452
tst.b %d1
4453
bne.b _L21_2x
4454
bsr.l srem_snorm # operand is a NORM
4455
bra.b _L21_6x
4456
_L21_2x:
4457
cmpi.b %d1,&ZERO # is operand a ZERO?
4458
bne.b _L21_3x # no
4459
bsr.l srem_szero # yes
4460
bra.b _L21_6x
4461
_L21_3x:
4462
cmpi.b %d1,&INF # is operand an INF?
4463
bne.b _L21_4x # no
4464
bsr.l srem_sinf # yes
4465
bra.b _L21_6x
4466
_L21_4x:
4467
cmpi.b %d1,&QNAN # is operand a QNAN?
4468
bne.b _L21_5x # no
4469
bsr.l sop_sqnan # yes
4470
bra.b _L21_6x
4471
_L21_5x:
4472
bsr.l srem_sdnrm # operand is a DENORM
4473
_L21_6x:
4474
4475
#
4476
# Result is now in FP0
4477
#
4478
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4479
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4480
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4481
unlk %a6
4482
rts
4483
4484
4485
#########################################################################
4486
# DYADIC TEMPLATE #
4487
#########################################################################
4488
global _fmods_
4489
_fmods_:
4490
link %a6,&-LOCAL_SIZE
4491
4492
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4493
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4494
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4495
4496
fmov.l &0x0,%fpcr # zero FPCR
4497
4498
#
4499
# copy, convert, and tag input argument
4500
#
4501
fmov.s 0x8(%a6),%fp0 # load sgl dst
4502
fmov.x %fp0,FP_DST(%a6)
4503
lea FP_DST(%a6),%a0
4504
bsr.l tag # fetch operand type
4505
mov.b %d0,DTAG(%a6)
4506
4507
fmov.s 0xc(%a6),%fp0 # load sgl src
4508
fmov.x %fp0,FP_SRC(%a6)
4509
lea FP_SRC(%a6),%a0
4510
bsr.l tag # fetch operand type
4511
mov.b %d0,STAG(%a6)
4512
mov.l %d0,%d1
4513
4514
andi.l &0x00ff00ff,USER_FPSR(%a6)
4515
4516
clr.l %d0
4517
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4518
4519
lea FP_SRC(%a6),%a0 # pass ptr to src
4520
lea FP_DST(%a6),%a1 # pass ptr to dst
4521
4522
tst.b %d1
4523
bne.b _L22_2s
4524
bsr.l smod_snorm # operand is a NORM
4525
bra.b _L22_6s
4526
_L22_2s:
4527
cmpi.b %d1,&ZERO # is operand a ZERO?
4528
bne.b _L22_3s # no
4529
bsr.l smod_szero # yes
4530
bra.b _L22_6s
4531
_L22_3s:
4532
cmpi.b %d1,&INF # is operand an INF?
4533
bne.b _L22_4s # no
4534
bsr.l smod_sinf # yes
4535
bra.b _L22_6s
4536
_L22_4s:
4537
cmpi.b %d1,&QNAN # is operand a QNAN?
4538
bne.b _L22_5s # no
4539
bsr.l sop_sqnan # yes
4540
bra.b _L22_6s
4541
_L22_5s:
4542
bsr.l smod_sdnrm # operand is a DENORM
4543
_L22_6s:
4544
4545
#
4546
# Result is now in FP0
4547
#
4548
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4549
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4550
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4551
unlk %a6
4552
rts
4553
4554
global _fmodd_
4555
_fmodd_:
4556
link %a6,&-LOCAL_SIZE
4557
4558
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4559
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4560
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4561
4562
fmov.l &0x0,%fpcr # zero FPCR
4563
4564
#
4565
# copy, convert, and tag input argument
4566
#
4567
fmov.d 0x8(%a6),%fp0 # load dbl dst
4568
fmov.x %fp0,FP_DST(%a6)
4569
lea FP_DST(%a6),%a0
4570
bsr.l tag # fetch operand type
4571
mov.b %d0,DTAG(%a6)
4572
4573
fmov.d 0x10(%a6),%fp0 # load dbl src
4574
fmov.x %fp0,FP_SRC(%a6)
4575
lea FP_SRC(%a6),%a0
4576
bsr.l tag # fetch operand type
4577
mov.b %d0,STAG(%a6)
4578
mov.l %d0,%d1
4579
4580
andi.l &0x00ff00ff,USER_FPSR(%a6)
4581
4582
clr.l %d0
4583
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4584
4585
lea FP_SRC(%a6),%a0 # pass ptr to src
4586
lea FP_DST(%a6),%a1 # pass ptr to dst
4587
4588
tst.b %d1
4589
bne.b _L22_2d
4590
bsr.l smod_snorm # operand is a NORM
4591
bra.b _L22_6d
4592
_L22_2d:
4593
cmpi.b %d1,&ZERO # is operand a ZERO?
4594
bne.b _L22_3d # no
4595
bsr.l smod_szero # yes
4596
bra.b _L22_6d
4597
_L22_3d:
4598
cmpi.b %d1,&INF # is operand an INF?
4599
bne.b _L22_4d # no
4600
bsr.l smod_sinf # yes
4601
bra.b _L22_6d
4602
_L22_4d:
4603
cmpi.b %d1,&QNAN # is operand a QNAN?
4604
bne.b _L22_5d # no
4605
bsr.l sop_sqnan # yes
4606
bra.b _L22_6d
4607
_L22_5d:
4608
bsr.l smod_sdnrm # operand is a DENORM
4609
_L22_6d:
4610
4611
#
4612
# Result is now in FP0
4613
#
4614
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4615
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4616
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4617
unlk %a6
4618
rts
4619
4620
global _fmodx_
4621
_fmodx_:
4622
link %a6,&-LOCAL_SIZE
4623
4624
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4625
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4626
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4627
4628
fmov.l &0x0,%fpcr # zero FPCR
4629
4630
#
4631
# copy, convert, and tag input argument
4632
#
4633
lea FP_DST(%a6),%a0
4634
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4635
mov.l 0x8+0x4(%a6),0x4(%a0)
4636
mov.l 0x8+0x8(%a6),0x8(%a0)
4637
bsr.l tag # fetch operand type
4638
mov.b %d0,DTAG(%a6)
4639
4640
lea FP_SRC(%a6),%a0
4641
mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4642
mov.l 0x14+0x4(%a6),0x4(%a0)
4643
mov.l 0x14+0x8(%a6),0x8(%a0)
4644
bsr.l tag # fetch operand type
4645
mov.b %d0,STAG(%a6)
4646
mov.l %d0,%d1
4647
4648
andi.l &0x00ff00ff,USER_FPSR(%a6)
4649
4650
clr.l %d0
4651
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4652
4653
lea FP_SRC(%a6),%a0 # pass ptr to src
4654
lea FP_DST(%a6),%a1 # pass ptr to dst
4655
4656
tst.b %d1
4657
bne.b _L22_2x
4658
bsr.l smod_snorm # operand is a NORM
4659
bra.b _L22_6x
4660
_L22_2x:
4661
cmpi.b %d1,&ZERO # is operand a ZERO?
4662
bne.b _L22_3x # no
4663
bsr.l smod_szero # yes
4664
bra.b _L22_6x
4665
_L22_3x:
4666
cmpi.b %d1,&INF # is operand an INF?
4667
bne.b _L22_4x # no
4668
bsr.l smod_sinf # yes
4669
bra.b _L22_6x
4670
_L22_4x:
4671
cmpi.b %d1,&QNAN # is operand a QNAN?
4672
bne.b _L22_5x # no
4673
bsr.l sop_sqnan # yes
4674
bra.b _L22_6x
4675
_L22_5x:
4676
bsr.l smod_sdnrm # operand is a DENORM
4677
_L22_6x:
4678
4679
#
4680
# Result is now in FP0
4681
#
4682
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4683
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4684
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4685
unlk %a6
4686
rts
4687
4688
4689
#########################################################################
4690
# DYADIC TEMPLATE #
4691
#########################################################################
4692
global _fscales_
4693
_fscales_:
4694
link %a6,&-LOCAL_SIZE
4695
4696
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4697
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4698
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4699
4700
fmov.l &0x0,%fpcr # zero FPCR
4701
4702
#
4703
# copy, convert, and tag input argument
4704
#
4705
fmov.s 0x8(%a6),%fp0 # load sgl dst
4706
fmov.x %fp0,FP_DST(%a6)
4707
lea FP_DST(%a6),%a0
4708
bsr.l tag # fetch operand type
4709
mov.b %d0,DTAG(%a6)
4710
4711
fmov.s 0xc(%a6),%fp0 # load sgl src
4712
fmov.x %fp0,FP_SRC(%a6)
4713
lea FP_SRC(%a6),%a0
4714
bsr.l tag # fetch operand type
4715
mov.b %d0,STAG(%a6)
4716
mov.l %d0,%d1
4717
4718
andi.l &0x00ff00ff,USER_FPSR(%a6)
4719
4720
clr.l %d0
4721
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4722
4723
lea FP_SRC(%a6),%a0 # pass ptr to src
4724
lea FP_DST(%a6),%a1 # pass ptr to dst
4725
4726
tst.b %d1
4727
bne.b _L23_2s
4728
bsr.l sscale_snorm # operand is a NORM
4729
bra.b _L23_6s
4730
_L23_2s:
4731
cmpi.b %d1,&ZERO # is operand a ZERO?
4732
bne.b _L23_3s # no
4733
bsr.l sscale_szero # yes
4734
bra.b _L23_6s
4735
_L23_3s:
4736
cmpi.b %d1,&INF # is operand an INF?
4737
bne.b _L23_4s # no
4738
bsr.l sscale_sinf # yes
4739
bra.b _L23_6s
4740
_L23_4s:
4741
cmpi.b %d1,&QNAN # is operand a QNAN?
4742
bne.b _L23_5s # no
4743
bsr.l sop_sqnan # yes
4744
bra.b _L23_6s
4745
_L23_5s:
4746
bsr.l sscale_sdnrm # operand is a DENORM
4747
_L23_6s:
4748
4749
#
4750
# Result is now in FP0
4751
#
4752
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4753
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4754
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4755
unlk %a6
4756
rts
4757
4758
global _fscaled_
4759
_fscaled_:
4760
link %a6,&-LOCAL_SIZE
4761
4762
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4763
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4764
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4765
4766
fmov.l &0x0,%fpcr # zero FPCR
4767
4768
#
4769
# copy, convert, and tag input argument
4770
#
4771
fmov.d 0x8(%a6),%fp0 # load dbl dst
4772
fmov.x %fp0,FP_DST(%a6)
4773
lea FP_DST(%a6),%a0
4774
bsr.l tag # fetch operand type
4775
mov.b %d0,DTAG(%a6)
4776
4777
fmov.d 0x10(%a6),%fp0 # load dbl src
4778
fmov.x %fp0,FP_SRC(%a6)
4779
lea FP_SRC(%a6),%a0
4780
bsr.l tag # fetch operand type
4781
mov.b %d0,STAG(%a6)
4782
mov.l %d0,%d1
4783
4784
andi.l &0x00ff00ff,USER_FPSR(%a6)
4785
4786
clr.l %d0
4787
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4788
4789
lea FP_SRC(%a6),%a0 # pass ptr to src
4790
lea FP_DST(%a6),%a1 # pass ptr to dst
4791
4792
tst.b %d1
4793
bne.b _L23_2d
4794
bsr.l sscale_snorm # operand is a NORM
4795
bra.b _L23_6d
4796
_L23_2d:
4797
cmpi.b %d1,&ZERO # is operand a ZERO?
4798
bne.b _L23_3d # no
4799
bsr.l sscale_szero # yes
4800
bra.b _L23_6d
4801
_L23_3d:
4802
cmpi.b %d1,&INF # is operand an INF?
4803
bne.b _L23_4d # no
4804
bsr.l sscale_sinf # yes
4805
bra.b _L23_6d
4806
_L23_4d:
4807
cmpi.b %d1,&QNAN # is operand a QNAN?
4808
bne.b _L23_5d # no
4809
bsr.l sop_sqnan # yes
4810
bra.b _L23_6d
4811
_L23_5d:
4812
bsr.l sscale_sdnrm # operand is a DENORM
4813
_L23_6d:
4814
4815
#
4816
# Result is now in FP0
4817
#
4818
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4819
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4820
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4821
unlk %a6
4822
rts
4823
4824
global _fscalex_
4825
_fscalex_:
4826
link %a6,&-LOCAL_SIZE
4827
4828
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
4829
fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs
4830
fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1
4831
4832
fmov.l &0x0,%fpcr # zero FPCR
4833
4834
#
4835
# copy, convert, and tag input argument
4836
#
4837
lea FP_DST(%a6),%a0
4838
mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst
4839
mov.l 0x8+0x4(%a6),0x4(%a0)
4840
mov.l 0x8+0x8(%a6),0x8(%a0)
4841
bsr.l tag # fetch operand type
4842
mov.b %d0,DTAG(%a6)
4843
4844
lea FP_SRC(%a6),%a0
4845
mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src
4846
mov.l 0x14+0x4(%a6),0x4(%a0)
4847
mov.l 0x14+0x8(%a6),0x8(%a0)
4848
bsr.l tag # fetch operand type
4849
mov.b %d0,STAG(%a6)
4850
mov.l %d0,%d1
4851
4852
andi.l &0x00ff00ff,USER_FPSR(%a6)
4853
4854
clr.l %d0
4855
mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec
4856
4857
lea FP_SRC(%a6),%a0 # pass ptr to src
4858
lea FP_DST(%a6),%a1 # pass ptr to dst
4859
4860
tst.b %d1
4861
bne.b _L23_2x
4862
bsr.l sscale_snorm # operand is a NORM
4863
bra.b _L23_6x
4864
_L23_2x:
4865
cmpi.b %d1,&ZERO # is operand a ZERO?
4866
bne.b _L23_3x # no
4867
bsr.l sscale_szero # yes
4868
bra.b _L23_6x
4869
_L23_3x:
4870
cmpi.b %d1,&INF # is operand an INF?
4871
bne.b _L23_4x # no
4872
bsr.l sscale_sinf # yes
4873
bra.b _L23_6x
4874
_L23_4x:
4875
cmpi.b %d1,&QNAN # is operand a QNAN?
4876
bne.b _L23_5x # no
4877
bsr.l sop_sqnan # yes
4878
bra.b _L23_6x
4879
_L23_5x:
4880
bsr.l sscale_sdnrm # operand is a DENORM
4881
_L23_6x:
4882
4883
#
4884
# Result is now in FP0
4885
#
4886
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
4887
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs
4888
fmovm.x EXC_FP1(%a6),&0x40 # restore fp1
4889
unlk %a6
4890
rts
4891
4892
4893
#########################################################################
4894
# ssin(): computes the sine of a normalized input #
4895
# ssind(): computes the sine of a denormalized input #
4896
# scos(): computes the cosine of a normalized input #
4897
# scosd(): computes the cosine of a denormalized input #
4898
# ssincos(): computes the sine and cosine of a normalized input #
4899
# ssincosd(): computes the sine and cosine of a denormalized input #
4900
# #
4901
# INPUT *************************************************************** #
4902
# a0 = pointer to extended precision input #
4903
# d0 = round precision,mode #
4904
# #
4905
# OUTPUT ************************************************************** #
4906
# fp0 = sin(X) or cos(X) #
4907
# #
4908
# For ssincos(X): #
4909
# fp0 = sin(X) #
4910
# fp1 = cos(X) #
4911
# #
4912
# ACCURACY and MONOTONICITY ******************************************* #
4913
# The returned result is within 1 ulp in 64 significant bit, i.e. #
4914
# within 0.5001 ulp to 53 bits if the result is subsequently #
4915
# rounded to double precision. The result is provably monotonic #
4916
# in double precision. #
4917
# #
4918
# ALGORITHM *********************************************************** #
4919
# #
4920
# SIN and COS: #
4921
# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
4922
# #
4923
# 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. #
4924
# #
4925
# 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4926
# k = N mod 4, so in particular, k = 0,1,2,or 3. #
4927
# Overwrite k by k := k + AdjN. #
4928
# #
4929
# 4. If k is even, go to 6. #
4930
# #
4931
# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
4932
# Return sgn*cos(r) where cos(r) is approximated by an #
4933
# even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), #
4934
# s = r*r. #
4935
# Exit. #
4936
# #
4937
# 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) #
4938
# where sin(r) is approximated by an odd polynomial in r #
4939
# r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. #
4940
# Exit. #
4941
# #
4942
# 7. If |X| > 1, go to 9. #
4943
# #
4944
# 8. (|X|<2**(-40)) If SIN is invoked, return X; #
4945
# otherwise return 1. #
4946
# #
4947
# 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4948
# go back to 3. #
4949
# #
4950
# SINCOS: #
4951
# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
4952
# #
4953
# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
4954
# k = N mod 4, so in particular, k = 0,1,2,or 3. #
4955
# #
4956
# 3. If k is even, go to 5. #
4957
# #
4958
# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
4959
# j1 exclusive or with the l.s.b. of k. #
4960
# sgn1 := (-1)**j1, sgn2 := (-1)**j2. #
4961
# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
4962
# sin(r) and cos(r) are computed as odd and even #
4963
# polynomials in r, respectively. Exit #
4964
# #
4965
# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
4966
# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
4967
# sin(r) and cos(r) are computed as odd and even #
4968
# polynomials in r, respectively. Exit #
4969
# #
4970
# 6. If |X| > 1, go to 8. #
4971
# #
4972
# 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
4973
# #
4974
# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, #
4975
# go back to 2. #
4976
# #
4977
#########################################################################
4978
4979
SINA7: long 0xBD6AAA77,0xCCC994F5
4980
SINA6: long 0x3DE61209,0x7AAE8DA1
4981
SINA5: long 0xBE5AE645,0x2A118AE4
4982
SINA4: long 0x3EC71DE3,0xA5341531
4983
SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
4984
SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
4985
SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
4986
4987
COSB8: long 0x3D2AC4D0,0xD6011EE3
4988
COSB7: long 0xBDA9396F,0x9F45AC19
4989
COSB6: long 0x3E21EED9,0x0612C972
4990
COSB5: long 0xBE927E4F,0xB79D9FCF
4991
COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
4992
COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
4993
COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
4994
COSB1: long 0xBF000000
4995
4996
set INARG,FP_SCR0
4997
4998
set X,FP_SCR0
4999
# set XDCARE,X+2
5000
set XFRAC,X+4
5001
5002
set RPRIME,FP_SCR0
5003
set SPRIME,FP_SCR1
5004
5005
set POSNEG1,L_SCR1
5006
set TWOTO63,L_SCR1
5007
5008
set ENDFLAG,L_SCR2
5009
set INT,L_SCR2
5010
5011
set ADJN,L_SCR3
5012
5013
############################################
5014
global ssin
5015
ssin:
5016
mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
5017
bra.b SINBGN
5018
5019
############################################
5020
global scos
5021
scos:
5022
mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
5023
5024
############################################
5025
SINBGN:
5026
#--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
5027
5028
fmov.x (%a0),%fp0 # LOAD INPUT
5029
fmov.x %fp0,X(%a6) # save input at X
5030
5031
# "COMPACTIFY" X
5032
mov.l (%a0),%d1 # put exp in hi word
5033
mov.w 4(%a0),%d1 # fetch hi(man)
5034
and.l &0x7FFFFFFF,%d1 # strip sign
5035
5036
cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)?
5037
bge.b SOK1 # no
5038
bra.w SINSM # yes; input is very small
5039
5040
SOK1:
5041
cmp.l %d1,&0x4004BC7E # is |X| < 15 PI?
5042
blt.b SINMAIN # no
5043
bra.w SREDUCEX # yes; input is very large
5044
5045
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5046
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5047
SINMAIN:
5048
fmov.x %fp0,%fp1
5049
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5050
5051
lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5052
5053
fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5054
5055
mov.l INT(%a6),%d1 # make a copy of N
5056
asl.l &4,%d1 # N *= 16
5057
add.l %d1,%a1 # tbl_addr = a1 + (N*16)
5058
5059
# A1 IS THE ADDRESS OF N*PIBY2
5060
# ...WHICH IS IN TWO PIECES Y1 & Y2
5061
fsub.x (%a1)+,%fp0 # X-Y1
5062
fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
5063
5064
SINCONT:
5065
#--continuation from REDUCEX
5066
5067
#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
5068
mov.l INT(%a6),%d1
5069
add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
5070
ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
5071
cmp.l %d1,&0
5072
blt.w COSPOLY
5073
5074
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5075
#--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
5076
#--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
5077
#--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
5078
#--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
5079
#--WHERE T=S*S.
5080
#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
5081
#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
5082
SINPOLY:
5083
fmovm.x &0x0c,-(%sp) # save fp2/fp3
5084
5085
fmov.x %fp0,X(%a6) # X IS R
5086
fmul.x %fp0,%fp0 # FP0 IS S
5087
5088
fmov.d SINA7(%pc),%fp3
5089
fmov.d SINA6(%pc),%fp2
5090
5091
fmov.x %fp0,%fp1
5092
fmul.x %fp1,%fp1 # FP1 IS T
5093
5094
ror.l &1,%d1
5095
and.l &0x80000000,%d1
5096
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5097
eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
5098
5099
fmul.x %fp1,%fp3 # TA7
5100
fmul.x %fp1,%fp2 # TA6
5101
5102
fadd.d SINA5(%pc),%fp3 # A5+TA7
5103
fadd.d SINA4(%pc),%fp2 # A4+TA6
5104
5105
fmul.x %fp1,%fp3 # T(A5+TA7)
5106
fmul.x %fp1,%fp2 # T(A4+TA6)
5107
5108
fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
5109
fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
5110
5111
fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
5112
5113
fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
5114
fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
5115
fmul.x X(%a6),%fp0 # R'*S
5116
5117
fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
5118
5119
fmul.x %fp1,%fp0 # SIN(R')-R'
5120
5121
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5122
5123
fmov.l %d0,%fpcr # restore users round mode,prec
5124
fadd.x X(%a6),%fp0 # last inst - possible exception set
5125
bra t_inx2
5126
5127
#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
5128
#--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY
5129
#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
5130
#--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
5131
#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
5132
#--WHERE T=S*S.
5133
#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
5134
#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
5135
#--AND IS THEREFORE STORED AS SINGLE PRECISION.
5136
COSPOLY:
5137
fmovm.x &0x0c,-(%sp) # save fp2/fp3
5138
5139
fmul.x %fp0,%fp0 # FP0 IS S
5140
5141
fmov.d COSB8(%pc),%fp2
5142
fmov.d COSB7(%pc),%fp3
5143
5144
fmov.x %fp0,%fp1
5145
fmul.x %fp1,%fp1 # FP1 IS T
5146
5147
fmov.x %fp0,X(%a6) # X IS S
5148
ror.l &1,%d1
5149
and.l &0x80000000,%d1
5150
# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
5151
5152
fmul.x %fp1,%fp2 # TB8
5153
5154
eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
5155
and.l &0x80000000,%d1
5156
5157
fmul.x %fp1,%fp3 # TB7
5158
5159
or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
5160
mov.l %d1,POSNEG1(%a6)
5161
5162
fadd.d COSB6(%pc),%fp2 # B6+TB8
5163
fadd.d COSB5(%pc),%fp3 # B5+TB7
5164
5165
fmul.x %fp1,%fp2 # T(B6+TB8)
5166
fmul.x %fp1,%fp3 # T(B5+TB7)
5167
5168
fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
5169
fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
5170
5171
fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
5172
fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
5173
5174
fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
5175
fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
5176
5177
fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
5178
5179
fadd.x %fp1,%fp0
5180
5181
fmul.x X(%a6),%fp0
5182
5183
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
5184
5185
fmov.l %d0,%fpcr # restore users round mode,prec
5186
fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
5187
bra t_inx2
5188
5189
##############################################
5190
5191
# SINe: Big OR Small?
5192
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5193
#--IF |X| < 2**(-40), RETURN X OR 1.
5194
SINBORS:
5195
cmp.l %d1,&0x3FFF8000
5196
bgt.l SREDUCEX
5197
5198
SINSM:
5199
mov.l ADJN(%a6),%d1
5200
cmp.l %d1,&0
5201
bgt.b COSTINY
5202
5203
# here, the operation may underflow iff the precision is sgl or dbl.
5204
# extended denorms are handled through another entry point.
5205
SINTINY:
5206
# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
5207
5208
fmov.l %d0,%fpcr # restore users round mode,prec
5209
mov.b &FMOV_OP,%d1 # last inst is MOVE
5210
fmov.x X(%a6),%fp0 # last inst - possible exception set
5211
bra t_catch
5212
5213
COSTINY:
5214
fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5215
fmov.l %d0,%fpcr # restore users round mode,prec
5216
fadd.s &0x80800000,%fp0 # last inst - possible exception set
5217
bra t_pinx2
5218
5219
################################################
5220
global ssind
5221
#--SIN(X) = X FOR DENORMALIZED X
5222
ssind:
5223
bra t_extdnrm
5224
5225
############################################
5226
global scosd
5227
#--COS(X) = 1 FOR DENORMALIZED X
5228
scosd:
5229
fmov.s &0x3F800000,%fp0 # fp0 = 1.0
5230
bra t_pinx2
5231
5232
##################################################
5233
5234
global ssincos
5235
ssincos:
5236
#--SET ADJN TO 4
5237
mov.l &4,ADJN(%a6)
5238
5239
fmov.x (%a0),%fp0 # LOAD INPUT
5240
fmov.x %fp0,X(%a6)
5241
5242
mov.l (%a0),%d1
5243
mov.w 4(%a0),%d1
5244
and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
5245
5246
cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5247
bge.b SCOK1
5248
bra.w SCSM
5249
5250
SCOK1:
5251
cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5252
blt.b SCMAIN
5253
bra.w SREDUCEX
5254
5255
5256
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5257
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5258
SCMAIN:
5259
fmov.x %fp0,%fp1
5260
5261
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5262
5263
lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5264
5265
fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
5266
5267
mov.l INT(%a6),%d1
5268
asl.l &4,%d1
5269
add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
5270
5271
fsub.x (%a1)+,%fp0 # X-Y1
5272
fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5273
5274
SCCONT:
5275
#--continuation point from REDUCEX
5276
5277
mov.l INT(%a6),%d1
5278
ror.l &1,%d1
5279
cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
5280
bge.w NEVEN
5281
5282
SNODD:
5283
#--REGISTERS SAVED SO FAR: D0, A0, FP2.
5284
fmovm.x &0x04,-(%sp) # save fp2
5285
5286
fmov.x %fp0,RPRIME(%a6)
5287
fmul.x %fp0,%fp0 # FP0 IS S = R*R
5288
fmov.d SINA7(%pc),%fp1 # A7
5289
fmov.d COSB8(%pc),%fp2 # B8
5290
fmul.x %fp0,%fp1 # SA7
5291
fmul.x %fp0,%fp2 # SB8
5292
5293
mov.l %d2,-(%sp)
5294
mov.l %d1,%d2
5295
ror.l &1,%d2
5296
and.l &0x80000000,%d2
5297
eor.l %d1,%d2
5298
and.l &0x80000000,%d2
5299
5300
fadd.d SINA6(%pc),%fp1 # A6+SA7
5301
fadd.d COSB7(%pc),%fp2 # B7+SB8
5302
5303
fmul.x %fp0,%fp1 # S(A6+SA7)
5304
eor.l %d2,RPRIME(%a6)
5305
mov.l (%sp)+,%d2
5306
fmul.x %fp0,%fp2 # S(B7+SB8)
5307
ror.l &1,%d1
5308
and.l &0x80000000,%d1
5309
mov.l &0x3F800000,POSNEG1(%a6)
5310
eor.l %d1,POSNEG1(%a6)
5311
5312
fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
5313
fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
5314
5315
fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
5316
fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
5317
fmov.x %fp0,SPRIME(%a6)
5318
5319
fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
5320
eor.l %d1,SPRIME(%a6)
5321
fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
5322
5323
fmul.x %fp0,%fp1 # S(A4+...)
5324
fmul.x %fp0,%fp2 # S(B5+...)
5325
5326
fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
5327
fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
5328
5329
fmul.x %fp0,%fp1 # S(A3+...)
5330
fmul.x %fp0,%fp2 # S(B4+...)
5331
5332
fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
5333
fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
5334
5335
fmul.x %fp0,%fp1 # S(A2+...)
5336
fmul.x %fp0,%fp2 # S(B3+...)
5337
5338
fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
5339
fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
5340
5341
fmul.x %fp0,%fp1 # S(A1+...)
5342
fmul.x %fp2,%fp0 # S(B2+...)
5343
5344
fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
5345
fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
5346
fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
5347
5348
fmovm.x (%sp)+,&0x20 # restore fp2
5349
5350
fmov.l %d0,%fpcr
5351
fadd.x RPRIME(%a6),%fp1 # COS(X)
5352
bsr sto_cos # store cosine result
5353
fadd.s POSNEG1(%a6),%fp0 # SIN(X)
5354
bra t_inx2
5355
5356
NEVEN:
5357
#--REGISTERS SAVED SO FAR: FP2.
5358
fmovm.x &0x04,-(%sp) # save fp2
5359
5360
fmov.x %fp0,RPRIME(%a6)
5361
fmul.x %fp0,%fp0 # FP0 IS S = R*R
5362
5363
fmov.d COSB8(%pc),%fp1 # B8
5364
fmov.d SINA7(%pc),%fp2 # A7
5365
5366
fmul.x %fp0,%fp1 # SB8
5367
fmov.x %fp0,SPRIME(%a6)
5368
fmul.x %fp0,%fp2 # SA7
5369
5370
ror.l &1,%d1
5371
and.l &0x80000000,%d1
5372
5373
fadd.d COSB7(%pc),%fp1 # B7+SB8
5374
fadd.d SINA6(%pc),%fp2 # A6+SA7
5375
5376
eor.l %d1,RPRIME(%a6)
5377
eor.l %d1,SPRIME(%a6)
5378
5379
fmul.x %fp0,%fp1 # S(B7+SB8)
5380
5381
or.l &0x3F800000,%d1
5382
mov.l %d1,POSNEG1(%a6)
5383
5384
fmul.x %fp0,%fp2 # S(A6+SA7)
5385
5386
fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
5387
fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
5388
5389
fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
5390
fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
5391
5392
fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
5393
fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
5394
5395
fmul.x %fp0,%fp1 # S(B5+...)
5396
fmul.x %fp0,%fp2 # S(A4+...)
5397
5398
fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
5399
fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
5400
5401
fmul.x %fp0,%fp1 # S(B4+...)
5402
fmul.x %fp0,%fp2 # S(A3+...)
5403
5404
fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
5405
fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
5406
5407
fmul.x %fp0,%fp1 # S(B3+...)
5408
fmul.x %fp0,%fp2 # S(A2+...)
5409
5410
fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
5411
fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
5412
5413
fmul.x %fp0,%fp1 # S(B2+...)
5414
fmul.x %fp2,%fp0 # s(a1+...)
5415
5416
5417
fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
5418
fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
5419
fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
5420
5421
fmovm.x (%sp)+,&0x20 # restore fp2
5422
5423
fmov.l %d0,%fpcr
5424
fadd.s POSNEG1(%a6),%fp1 # COS(X)
5425
bsr sto_cos # store cosine result
5426
fadd.x RPRIME(%a6),%fp0 # SIN(X)
5427
bra t_inx2
5428
5429
################################################
5430
5431
SCBORS:
5432
cmp.l %d1,&0x3FFF8000
5433
bgt.w SREDUCEX
5434
5435
################################################
5436
5437
SCSM:
5438
# mov.w &0x0000,XDCARE(%a6)
5439
fmov.s &0x3F800000,%fp1
5440
5441
fmov.l %d0,%fpcr
5442
fsub.s &0x00800000,%fp1
5443
bsr sto_cos # store cosine result
5444
fmov.l %fpcr,%d0 # d0 must have fpcr,too
5445
mov.b &FMOV_OP,%d1 # last inst is MOVE
5446
fmov.x X(%a6),%fp0
5447
bra t_catch
5448
5449
##############################################
5450
5451
global ssincosd
5452
#--SIN AND COS OF X FOR DENORMALIZED X
5453
ssincosd:
5454
mov.l %d0,-(%sp) # save d0
5455
fmov.s &0x3F800000,%fp1
5456
bsr sto_cos # store cosine result
5457
mov.l (%sp)+,%d0 # restore d0
5458
bra t_extdnrm
5459
5460
############################################
5461
5462
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5463
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5464
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5465
SREDUCEX:
5466
fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5467
mov.l %d2,-(%sp) # save d2
5468
fmov.s &0x00000000,%fp1 # fp1 = 0
5469
5470
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5471
#--there is a danger of unwanted overflow in first LOOP iteration. In this
5472
#--case, reduce argument by one remainder step to make subsequent reduction
5473
#--safe.
5474
cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5475
bne.b SLOOP # no
5476
5477
# yes; create 2**16383*PI/2
5478
mov.w &0x7ffe,FP_SCR0_EX(%a6)
5479
mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5480
clr.l FP_SCR0_LO(%a6)
5481
5482
# create low half of 2**16383*PI/2 at FP_SCR1
5483
mov.w &0x7fdc,FP_SCR1_EX(%a6)
5484
mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5485
clr.l FP_SCR1_LO(%a6)
5486
5487
ftest.x %fp0 # test sign of argument
5488
fblt.w sred_neg
5489
5490
or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5491
or.b &0x80,FP_SCR1_EX(%a6)
5492
sred_neg:
5493
fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5494
fmov.x %fp0,%fp1 # save high result in fp1
5495
fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5496
fsub.x %fp0,%fp1 # determine low component of result
5497
fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5498
5499
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5500
#--integer quotient will be stored in N
5501
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5502
SLOOP:
5503
fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5504
mov.w INARG(%a6),%d1
5505
mov.l %d1,%a1 # save a copy of D0
5506
and.l &0x00007FFF,%d1
5507
sub.l &0x00003FFF,%d1 # d0 = K
5508
cmp.l %d1,&28
5509
ble.b SLASTLOOP
5510
SCONTLOOP:
5511
sub.l &27,%d1 # d0 = L := K-27
5512
mov.b &0,ENDFLAG(%a6)
5513
bra.b SWORK
5514
SLASTLOOP:
5515
clr.l %d1 # d0 = L := 0
5516
mov.b &1,ENDFLAG(%a6)
5517
5518
SWORK:
5519
#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5520
#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5521
5522
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5523
#--2**L * (PIby2_1), 2**L * (PIby2_2)
5524
5525
mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5526
sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5527
5528
mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5529
mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5530
mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5531
5532
fmov.x %fp0,%fp2
5533
fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5534
5535
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5536
#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5537
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5538
#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5539
#--US THE DESIRED VALUE IN FLOATING POINT.
5540
mov.l %a1,%d2
5541
swap %d2
5542
and.l &0x80000000,%d2
5543
or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5544
mov.l %d2,TWOTO63(%a6)
5545
fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5546
fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5547
# fint.x %fp2
5548
5549
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5550
mov.l %d1,%d2 # d2 = L
5551
5552
add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5553
mov.w %d2,FP_SCR0_EX(%a6)
5554
mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5555
clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5556
5557
add.l &0x00003FDD,%d1
5558
mov.w %d1,FP_SCR1_EX(%a6)
5559
mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5560
clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5561
5562
mov.b ENDFLAG(%a6),%d1
5563
5564
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5565
#--P2 = 2**(L) * Piby2_2
5566
fmov.x %fp2,%fp4 # fp4 = N
5567
fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5568
fmov.x %fp2,%fp5 # fp5 = N
5569
fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
5570
fmov.x %fp4,%fp3 # fp3 = W = N*P1
5571
5572
#--we want P+p = W+w but |p| <= half ulp of P
5573
#--Then, we need to compute A := R-P and a := r-p
5574
fadd.x %fp5,%fp3 # fp3 = P
5575
fsub.x %fp3,%fp4 # fp4 = W-P
5576
5577
fsub.x %fp3,%fp0 # fp0 = A := R - P
5578
fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
5579
5580
fmov.x %fp0,%fp3 # fp3 = A
5581
fsub.x %fp4,%fp1 # fp1 = a := r - p
5582
5583
#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
5584
#--|r| <= half ulp of R.
5585
fadd.x %fp1,%fp0 # fp0 = R := A+a
5586
#--No need to calculate r if this is the last loop
5587
cmp.b %d1,&0
5588
bgt.w SRESTORE
5589
5590
#--Need to calculate r
5591
fsub.x %fp0,%fp3 # fp3 = A-R
5592
fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
5593
bra.w SLOOP
5594
5595
SRESTORE:
5596
fmov.l %fp2,INT(%a6)
5597
mov.l (%sp)+,%d2 # restore d2
5598
fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
5599
5600
mov.l ADJN(%a6),%d1
5601
cmp.l %d1,&4
5602
5603
blt.w SINCONT
5604
bra.w SCCONT
5605
5606
#########################################################################
5607
# stan(): computes the tangent of a normalized input #
5608
# stand(): computes the tangent of a denormalized input #
5609
# #
5610
# INPUT *************************************************************** #
5611
# a0 = pointer to extended precision input #
5612
# d0 = round precision,mode #
5613
# #
5614
# OUTPUT ************************************************************** #
5615
# fp0 = tan(X) #
5616
# #
5617
# ACCURACY and MONOTONICITY ******************************************* #
5618
# The returned result is within 3 ulp in 64 significant bit, i.e. #
5619
# within 0.5001 ulp to 53 bits if the result is subsequently #
5620
# rounded to double precision. The result is provably monotonic #
5621
# in double precision. #
5622
# #
5623
# ALGORITHM *********************************************************** #
5624
# #
5625
# 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. #
5626
# #
5627
# 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let #
5628
# k = N mod 2, so in particular, k = 0 or 1. #
5629
# #
5630
# 3. If k is odd, go to 5. #
5631
# #
5632
# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
5633
# rational function U/V where #
5634
# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5635
# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. #
5636
# Exit. #
5637
# #
5638
# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
5639
# a rational function U/V where #
5640
# U = r + r*s*(P1 + s*(P2 + s*P3)), and #
5641
# V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, #
5642
# -Cot(r) = -V/U. Exit. #
5643
# #
5644
# 6. If |X| > 1, go to 8. #
5645
# #
5646
# 7. (|X|<2**(-40)) Tan(X) = X. Exit. #
5647
# #
5648
# 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back #
5649
# to 2. #
5650
# #
5651
#########################################################################
5652
5653
TANQ4:
5654
long 0x3EA0B759,0xF50F8688
5655
TANP3:
5656
long 0xBEF2BAA5,0xA8924F04
5657
5658
TANQ3:
5659
long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
5660
5661
TANP2:
5662
long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
5663
5664
TANQ2:
5665
long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
5666
5667
TANP1:
5668
long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
5669
5670
TANQ1:
5671
long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
5672
5673
INVTWOPI:
5674
long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
5675
5676
TWOPI1:
5677
long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
5678
TWOPI2:
5679
long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
5680
5681
#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
5682
#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
5683
#--MOST 69 BITS LONG.
5684
# global PITBL
5685
PITBL:
5686
long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
5687
long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
5688
long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
5689
long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
5690
long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
5691
long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
5692
long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
5693
long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
5694
long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
5695
long 0xC0040000,0x90836524,0x88034B96,0x20B00000
5696
long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
5697
long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
5698
long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
5699
long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
5700
long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
5701
long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
5702
long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
5703
long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
5704
long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
5705
long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
5706
long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
5707
long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
5708
long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
5709
long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
5710
long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
5711
long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
5712
long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
5713
long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
5714
long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
5715
long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
5716
long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
5717
long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
5718
long 0x00000000,0x00000000,0x00000000,0x00000000
5719
long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
5720
long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
5721
long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
5722
long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
5723
long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
5724
long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
5725
long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
5726
long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
5727
long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
5728
long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
5729
long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
5730
long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
5731
long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
5732
long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
5733
long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
5734
long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
5735
long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
5736
long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
5737
long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
5738
long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
5739
long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
5740
long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
5741
long 0x40040000,0x90836524,0x88034B96,0xA0B00000
5742
long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
5743
long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
5744
long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
5745
long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
5746
long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
5747
long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
5748
long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
5749
long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
5750
long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
5751
5752
set INARG,FP_SCR0
5753
5754
set TWOTO63,L_SCR1
5755
set INT,L_SCR1
5756
set ENDFLAG,L_SCR2
5757
5758
global stan
5759
stan:
5760
fmov.x (%a0),%fp0 # LOAD INPUT
5761
5762
mov.l (%a0),%d1
5763
mov.w 4(%a0),%d1
5764
and.l &0x7FFFFFFF,%d1
5765
5766
cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)?
5767
bge.b TANOK1
5768
bra.w TANSM
5769
TANOK1:
5770
cmp.l %d1,&0x4004BC7E # |X| < 15 PI?
5771
blt.b TANMAIN
5772
bra.w REDUCEX
5773
5774
TANMAIN:
5775
#--THIS IS THE USUAL CASE, |X| <= 15 PI.
5776
#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
5777
fmov.x %fp0,%fp1
5778
fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
5779
5780
lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
5781
5782
fmov.l %fp1,%d1 # CONVERT TO INTEGER
5783
5784
asl.l &4,%d1
5785
add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
5786
5787
fsub.x (%a1)+,%fp0 # X-Y1
5788
5789
fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
5790
5791
ror.l &5,%d1
5792
and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
5793
5794
TANCONT:
5795
fmovm.x &0x0c,-(%sp) # save fp2,fp3
5796
5797
cmp.l %d1,&0
5798
blt.w NODD
5799
5800
fmov.x %fp0,%fp1
5801
fmul.x %fp1,%fp1 # S = R*R
5802
5803
fmov.d TANQ4(%pc),%fp3
5804
fmov.d TANP3(%pc),%fp2
5805
5806
fmul.x %fp1,%fp3 # SQ4
5807
fmul.x %fp1,%fp2 # SP3
5808
5809
fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5810
fadd.x TANP2(%pc),%fp2 # P2+SP3
5811
5812
fmul.x %fp1,%fp3 # S(Q3+SQ4)
5813
fmul.x %fp1,%fp2 # S(P2+SP3)
5814
5815
fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5816
fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5817
5818
fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
5819
fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
5820
5821
fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5822
fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
5823
5824
fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
5825
5826
fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
5827
5828
fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
5829
5830
fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5831
5832
fmov.l %d0,%fpcr # restore users round mode,prec
5833
fdiv.x %fp1,%fp0 # last inst - possible exception set
5834
bra t_inx2
5835
5836
NODD:
5837
fmov.x %fp0,%fp1
5838
fmul.x %fp0,%fp0 # S = R*R
5839
5840
fmov.d TANQ4(%pc),%fp3
5841
fmov.d TANP3(%pc),%fp2
5842
5843
fmul.x %fp0,%fp3 # SQ4
5844
fmul.x %fp0,%fp2 # SP3
5845
5846
fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
5847
fadd.x TANP2(%pc),%fp2 # P2+SP3
5848
5849
fmul.x %fp0,%fp3 # S(Q3+SQ4)
5850
fmul.x %fp0,%fp2 # S(P2+SP3)
5851
5852
fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
5853
fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
5854
5855
fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
5856
fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
5857
5858
fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
5859
fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
5860
5861
fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
5862
5863
fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
5864
fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
5865
5866
fmovm.x (%sp)+,&0x30 # restore fp2,fp3
5867
5868
fmov.x %fp1,-(%sp)
5869
eor.l &0x80000000,(%sp)
5870
5871
fmov.l %d0,%fpcr # restore users round mode,prec
5872
fdiv.x (%sp)+,%fp0 # last inst - possible exception set
5873
bra t_inx2
5874
5875
TANBORS:
5876
#--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
5877
#--IF |X| < 2**(-40), RETURN X OR 1.
5878
cmp.l %d1,&0x3FFF8000
5879
bgt.b REDUCEX
5880
5881
TANSM:
5882
fmov.x %fp0,-(%sp)
5883
fmov.l %d0,%fpcr # restore users round mode,prec
5884
mov.b &FMOV_OP,%d1 # last inst is MOVE
5885
fmov.x (%sp)+,%fp0 # last inst - posibble exception set
5886
bra t_catch
5887
5888
global stand
5889
#--TAN(X) = X FOR DENORMALIZED X
5890
stand:
5891
bra t_extdnrm
5892
5893
#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
5894
#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
5895
#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
5896
REDUCEX:
5897
fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
5898
mov.l %d2,-(%sp) # save d2
5899
fmov.s &0x00000000,%fp1 # fp1 = 0
5900
5901
#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
5902
#--there is a danger of unwanted overflow in first LOOP iteration. In this
5903
#--case, reduce argument by one remainder step to make subsequent reduction
5904
#--safe.
5905
cmp.l %d1,&0x7ffeffff # is arg dangerously large?
5906
bne.b LOOP # no
5907
5908
# yes; create 2**16383*PI/2
5909
mov.w &0x7ffe,FP_SCR0_EX(%a6)
5910
mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
5911
clr.l FP_SCR0_LO(%a6)
5912
5913
# create low half of 2**16383*PI/2 at FP_SCR1
5914
mov.w &0x7fdc,FP_SCR1_EX(%a6)
5915
mov.l &0x85a308d3,FP_SCR1_HI(%a6)
5916
clr.l FP_SCR1_LO(%a6)
5917
5918
ftest.x %fp0 # test sign of argument
5919
fblt.w red_neg
5920
5921
or.b &0x80,FP_SCR0_EX(%a6) # positive arg
5922
or.b &0x80,FP_SCR1_EX(%a6)
5923
red_neg:
5924
fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
5925
fmov.x %fp0,%fp1 # save high result in fp1
5926
fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
5927
fsub.x %fp0,%fp1 # determine low component of result
5928
fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
5929
5930
#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
5931
#--integer quotient will be stored in N
5932
#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
5933
LOOP:
5934
fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2
5935
mov.w INARG(%a6),%d1
5936
mov.l %d1,%a1 # save a copy of D0
5937
and.l &0x00007FFF,%d1
5938
sub.l &0x00003FFF,%d1 # d0 = K
5939
cmp.l %d1,&28
5940
ble.b LASTLOOP
5941
CONTLOOP:
5942
sub.l &27,%d1 # d0 = L := K-27
5943
mov.b &0,ENDFLAG(%a6)
5944
bra.b WORK
5945
LASTLOOP:
5946
clr.l %d1 # d0 = L := 0
5947
mov.b &1,ENDFLAG(%a6)
5948
5949
WORK:
5950
#--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN
5951
#--THAT INT( X * (2/PI) / 2**(L) ) < 2**29.
5952
5953
#--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
5954
#--2**L * (PIby2_1), 2**L * (PIby2_2)
5955
5956
mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
5957
sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI)
5958
5959
mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
5960
mov.l &0x4E44152A,FP_SCR0_LO(%a6)
5961
mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI)
5962
5963
fmov.x %fp0,%fp2
5964
fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI)
5965
5966
#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
5967
#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
5968
#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
5969
#--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE
5970
#--US THE DESIRED VALUE IN FLOATING POINT.
5971
mov.l %a1,%d2
5972
swap %d2
5973
and.l &0x80000000,%d2
5974
or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL
5975
mov.l %d2,TWOTO63(%a6)
5976
fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
5977
fsub.s TWOTO63(%a6),%fp2 # fp2 = N
5978
# fintrz.x %fp2,%fp2
5979
5980
#--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2
5981
mov.l %d1,%d2 # d2 = L
5982
5983
add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2)
5984
mov.w %d2,FP_SCR0_EX(%a6)
5985
mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
5986
clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1
5987
5988
add.l &0x00003FDD,%d1
5989
mov.w %d1,FP_SCR1_EX(%a6)
5990
mov.l &0x85A308D3,FP_SCR1_HI(%a6)
5991
clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2
5992
5993
mov.b ENDFLAG(%a6),%d1
5994
5995
#--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
5996
#--P2 = 2**(L) * Piby2_2
5997
fmov.x %fp2,%fp4 # fp4 = N
5998
fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
5999
fmov.x %fp2,%fp5 # fp5 = N
6000
fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
6001
fmov.x %fp4,%fp3 # fp3 = W = N*P1
6002
6003
#--we want P+p = W+w but |p| <= half ulp of P
6004
#--Then, we need to compute A := R-P and a := r-p
6005
fadd.x %fp5,%fp3 # fp3 = P
6006
fsub.x %fp3,%fp4 # fp4 = W-P
6007
6008
fsub.x %fp3,%fp0 # fp0 = A := R - P
6009
fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
6010
6011
fmov.x %fp0,%fp3 # fp3 = A
6012
fsub.x %fp4,%fp1 # fp1 = a := r - p
6013
6014
#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
6015
#--|r| <= half ulp of R.
6016
fadd.x %fp1,%fp0 # fp0 = R := A+a
6017
#--No need to calculate r if this is the last loop
6018
cmp.b %d1,&0
6019
bgt.w RESTORE
6020
6021
#--Need to calculate r
6022
fsub.x %fp0,%fp3 # fp3 = A-R
6023
fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
6024
bra.w LOOP
6025
6026
RESTORE:
6027
fmov.l %fp2,INT(%a6)
6028
mov.l (%sp)+,%d2 # restore d2
6029
fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
6030
6031
mov.l INT(%a6),%d1
6032
ror.l &1,%d1
6033
6034
bra.w TANCONT
6035
6036
#########################################################################
6037
# satan(): computes the arctangent of a normalized number #
6038
# satand(): computes the arctangent of a denormalized number #
6039
# #
6040
# INPUT *************************************************************** #
6041
# a0 = pointer to extended precision input #
6042
# d0 = round precision,mode #
6043
# #
6044
# OUTPUT ************************************************************** #
6045
# fp0 = arctan(X) #
6046
# #
6047
# ACCURACY and MONOTONICITY ******************************************* #
6048
# The returned result is within 2 ulps in 64 significant bit, #
6049
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6050
# rounded to double precision. The result is provably monotonic #
6051
# in double precision. #
6052
# #
6053
# ALGORITHM *********************************************************** #
6054
# Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. #
6055
# #
6056
# Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. #
6057
# Note that k = -4, -3,..., or 3. #
6058
# Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 #
6059
# significant bits of X with a bit-1 attached at the 6-th #
6060
# bit position. Define u to be u = (X-F) / (1 + X*F). #
6061
# #
6062
# Step 3. Approximate arctan(u) by a polynomial poly. #
6063
# #
6064
# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
6065
# table of values calculated beforehand. Exit. #
6066
# #
6067
# Step 5. If |X| >= 16, go to Step 7. #
6068
# #
6069
# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
6070
# #
6071
# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
6072
# polynomial in X'. #
6073
# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
6074
# #
6075
#########################################################################
6076
6077
ATANA3: long 0xBFF6687E,0x314987D8
6078
ATANA2: long 0x4002AC69,0x34A26DB3
6079
ATANA1: long 0xBFC2476F,0x4E1DA28E
6080
6081
ATANB6: long 0x3FB34444,0x7F876989
6082
ATANB5: long 0xBFB744EE,0x7FAF45DB
6083
ATANB4: long 0x3FBC71C6,0x46940220
6084
ATANB3: long 0xBFC24924,0x921872F9
6085
ATANB2: long 0x3FC99999,0x99998FA9
6086
ATANB1: long 0xBFD55555,0x55555555
6087
6088
ATANC5: long 0xBFB70BF3,0x98539E6A
6089
ATANC4: long 0x3FBC7187,0x962D1D7D
6090
ATANC3: long 0xBFC24924,0x827107B8
6091
ATANC2: long 0x3FC99999,0x9996263E
6092
ATANC1: long 0xBFD55555,0x55555536
6093
6094
PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
6095
NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
6096
6097
PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
6098
NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
6099
6100
ATANTBL:
6101
long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
6102
long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
6103
long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
6104
long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
6105
long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
6106
long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
6107
long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
6108
long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
6109
long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
6110
long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
6111
long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
6112
long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
6113
long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
6114
long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
6115
long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
6116
long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
6117
long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
6118
long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
6119
long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
6120
long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
6121
long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
6122
long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
6123
long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
6124
long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
6125
long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
6126
long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
6127
long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
6128
long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
6129
long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
6130
long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
6131
long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
6132
long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
6133
long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
6134
long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
6135
long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
6136
long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
6137
long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
6138
long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
6139
long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
6140
long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
6141
long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
6142
long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
6143
long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
6144
long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
6145
long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
6146
long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
6147
long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
6148
long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
6149
long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
6150
long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
6151
long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
6152
long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
6153
long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
6154
long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
6155
long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
6156
long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
6157
long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
6158
long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
6159
long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
6160
long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
6161
long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
6162
long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
6163
long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
6164
long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
6165
long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
6166
long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
6167
long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
6168
long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
6169
long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
6170
long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
6171
long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
6172
long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
6173
long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
6174
long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
6175
long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
6176
long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
6177
long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
6178
long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
6179
long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
6180
long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
6181
long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
6182
long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
6183
long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
6184
long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
6185
long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
6186
long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
6187
long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
6188
long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
6189
long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
6190
long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
6191
long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
6192
long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
6193
long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
6194
long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
6195
long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
6196
long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
6197
long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
6198
long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
6199
long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
6200
long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
6201
long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
6202
long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
6203
long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
6204
long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
6205
long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
6206
long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
6207
long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
6208
long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
6209
long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
6210
long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
6211
long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
6212
long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
6213
long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
6214
long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
6215
long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
6216
long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
6217
long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
6218
long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
6219
long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
6220
long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
6221
long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
6222
long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
6223
long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
6224
long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
6225
long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
6226
long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
6227
long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
6228
long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
6229
6230
set X,FP_SCR0
6231
set XDCARE,X+2
6232
set XFRAC,X+4
6233
set XFRACLO,X+8
6234
6235
set ATANF,FP_SCR1
6236
set ATANFHI,ATANF+4
6237
set ATANFLO,ATANF+8
6238
6239
global satan
6240
#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
6241
satan:
6242
fmov.x (%a0),%fp0 # LOAD INPUT
6243
6244
mov.l (%a0),%d1
6245
mov.w 4(%a0),%d1
6246
fmov.x %fp0,X(%a6)
6247
and.l &0x7FFFFFFF,%d1
6248
6249
cmp.l %d1,&0x3FFB8000 # |X| >= 1/16?
6250
bge.b ATANOK1
6251
bra.w ATANSM
6252
6253
ATANOK1:
6254
cmp.l %d1,&0x4002FFFF # |X| < 16 ?
6255
ble.b ATANMAIN
6256
bra.w ATANBIG
6257
6258
#--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
6259
#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
6260
#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
6261
#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
6262
#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
6263
#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
6264
#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
6265
#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
6266
#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
6267
#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
6268
#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
6269
#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
6270
#--WILL INVOLVE A VERY LONG POLYNOMIAL.
6271
6272
#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
6273
#--WE CHOSE F TO BE +-2^K * 1.BBBB1
6274
#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
6275
#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
6276
#--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
6277
#-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
6278
6279
ATANMAIN:
6280
6281
and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
6282
or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
6283
mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
6284
6285
fmov.x %fp0,%fp1 # FP1 IS X
6286
fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0
6287
fsub.x X(%a6),%fp0 # FP0 IS X-F
6288
fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
6289
fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
6290
6291
#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
6292
#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
6293
#--SAVE REGISTERS FP2.
6294
6295
mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
6296
mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
6297
and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
6298
and.l &0x7FFF0000,%d2 # EXPONENT OF F
6299
sub.l &0x3FFB0000,%d2 # K+4
6300
asr.l &1,%d2
6301
add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
6302
asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|)
6303
lea ATANTBL(%pc),%a1
6304
add.l %d1,%a1 # ADDRESS OF ATAN(|F|)
6305
mov.l (%a1)+,ATANF(%a6)
6306
mov.l (%a1)+,ATANFHI(%a6)
6307
mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|)
6308
mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
6309
and.l &0x80000000,%d1 # SIGN(F)
6310
or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|)
6311
mov.l (%sp)+,%d2 # RESTORE d2
6312
6313
#--THAT'S ALL I HAVE TO DO FOR NOW,
6314
#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
6315
6316
#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
6317
#--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
6318
#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
6319
#--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
6320
#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
6321
#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
6322
#--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
6323
6324
fmovm.x &0x04,-(%sp) # save fp2
6325
6326
fmov.x %fp0,%fp1
6327
fmul.x %fp1,%fp1
6328
fmov.d ATANA3(%pc),%fp2
6329
fadd.x %fp1,%fp2 # A3+V
6330
fmul.x %fp1,%fp2 # V*(A3+V)
6331
fmul.x %fp0,%fp1 # U*V
6332
fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
6333
fmul.d ATANA1(%pc),%fp1 # A1*U*V
6334
fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V))
6335
fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
6336
6337
fmovm.x (%sp)+,&0x20 # restore fp2
6338
6339
fmov.l %d0,%fpcr # restore users rnd mode,prec
6340
fadd.x ATANF(%a6),%fp0 # ATAN(X)
6341
bra t_inx2
6342
6343
ATANBORS:
6344
#--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
6345
#--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
6346
cmp.l %d1,&0x3FFF8000
6347
bgt.w ATANBIG # I.E. |X| >= 16
6348
6349
ATANSM:
6350
#--|X| <= 1/16
6351
#--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
6352
#--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
6353
#--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
6354
#--WHERE Y = X*X, AND Z = Y*Y.
6355
6356
cmp.l %d1,&0x3FD78000
6357
blt.w ATANTINY
6358
6359
#--COMPUTE POLYNOMIAL
6360
fmovm.x &0x0c,-(%sp) # save fp2/fp3
6361
6362
fmul.x %fp0,%fp0 # FPO IS Y = X*X
6363
6364
fmov.x %fp0,%fp1
6365
fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6366
6367
fmov.d ATANB6(%pc),%fp2
6368
fmov.d ATANB5(%pc),%fp3
6369
6370
fmul.x %fp1,%fp2 # Z*B6
6371
fmul.x %fp1,%fp3 # Z*B5
6372
6373
fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
6374
fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
6375
6376
fmul.x %fp1,%fp2 # Z*(B4+Z*B6)
6377
fmul.x %fp3,%fp1 # Z*(B3+Z*B5)
6378
6379
fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6)
6380
fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5)
6381
6382
fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6))
6383
fmul.x X(%a6),%fp0 # X*Y
6384
6385
fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
6386
6387
fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
6388
6389
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6390
6391
fmov.l %d0,%fpcr # restore users rnd mode,prec
6392
fadd.x X(%a6),%fp0
6393
bra t_inx2
6394
6395
ATANTINY:
6396
#--|X| < 2^(-40), ATAN(X) = X
6397
6398
fmov.l %d0,%fpcr # restore users rnd mode,prec
6399
mov.b &FMOV_OP,%d1 # last inst is MOVE
6400
fmov.x X(%a6),%fp0 # last inst - possible exception set
6401
6402
bra t_catch
6403
6404
ATANBIG:
6405
#--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
6406
#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
6407
cmp.l %d1,&0x40638000
6408
bgt.w ATANHUGE
6409
6410
#--APPROXIMATE ATAN(-1/X) BY
6411
#--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
6412
#--THIS CAN BE RE-WRITTEN AS
6413
#--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
6414
6415
fmovm.x &0x0c,-(%sp) # save fp2/fp3
6416
6417
fmov.s &0xBF800000,%fp1 # LOAD -1
6418
fdiv.x %fp0,%fp1 # FP1 IS -1/X
6419
6420
#--DIVIDE IS STILL CRANKING
6421
6422
fmov.x %fp1,%fp0 # FP0 IS X'
6423
fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
6424
fmov.x %fp1,X(%a6) # X IS REALLY X'
6425
6426
fmov.x %fp0,%fp1
6427
fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
6428
6429
fmov.d ATANC5(%pc),%fp3
6430
fmov.d ATANC4(%pc),%fp2
6431
6432
fmul.x %fp1,%fp3 # Z*C5
6433
fmul.x %fp1,%fp2 # Z*B4
6434
6435
fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
6436
fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
6437
6438
fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED
6439
fmul.x %fp0,%fp2 # Y*(C2+Z*C4)
6440
6441
fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5)
6442
fmul.x X(%a6),%fp0 # X'*Y
6443
6444
fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
6445
6446
fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)]
6447
# ... +[Y*(B2+Z*(B4+Z*B6))])
6448
fadd.x X(%a6),%fp0
6449
6450
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
6451
6452
fmov.l %d0,%fpcr # restore users rnd mode,prec
6453
tst.b (%a0)
6454
bpl.b pos_big
6455
6456
neg_big:
6457
fadd.x NPIBY2(%pc),%fp0
6458
bra t_minx2
6459
6460
pos_big:
6461
fadd.x PPIBY2(%pc),%fp0
6462
bra t_pinx2
6463
6464
ATANHUGE:
6465
#--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
6466
tst.b (%a0)
6467
bpl.b pos_huge
6468
6469
neg_huge:
6470
fmov.x NPIBY2(%pc),%fp0
6471
fmov.l %d0,%fpcr
6472
fadd.x PTINY(%pc),%fp0
6473
bra t_minx2
6474
6475
pos_huge:
6476
fmov.x PPIBY2(%pc),%fp0
6477
fmov.l %d0,%fpcr
6478
fadd.x NTINY(%pc),%fp0
6479
bra t_pinx2
6480
6481
global satand
6482
#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
6483
satand:
6484
bra t_extdnrm
6485
6486
#########################################################################
6487
# sasin(): computes the inverse sine of a normalized input #
6488
# sasind(): computes the inverse sine of a denormalized input #
6489
# #
6490
# INPUT *************************************************************** #
6491
# a0 = pointer to extended precision input #
6492
# d0 = round precision,mode #
6493
# #
6494
# OUTPUT ************************************************************** #
6495
# fp0 = arcsin(X) #
6496
# #
6497
# ACCURACY and MONOTONICITY ******************************************* #
6498
# The returned result is within 3 ulps in 64 significant bit, #
6499
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6500
# rounded to double precision. The result is provably monotonic #
6501
# in double precision. #
6502
# #
6503
# ALGORITHM *********************************************************** #
6504
# #
6505
# ASIN #
6506
# 1. If |X| >= 1, go to 3. #
6507
# #
6508
# 2. (|X| < 1) Calculate asin(X) by #
6509
# z := sqrt( [1-X][1+X] ) #
6510
# asin(X) = atan( x / z ). #
6511
# Exit. #
6512
# #
6513
# 3. If |X| > 1, go to 5. #
6514
# #
6515
# 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
6516
# #
6517
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6518
# Exit. #
6519
# #
6520
#########################################################################
6521
6522
global sasin
6523
sasin:
6524
fmov.x (%a0),%fp0 # LOAD INPUT
6525
6526
mov.l (%a0),%d1
6527
mov.w 4(%a0),%d1
6528
and.l &0x7FFFFFFF,%d1
6529
cmp.l %d1,&0x3FFF8000
6530
bge.b ASINBIG
6531
6532
# This catch is added here for the '060 QSP. Originally, the call to
6533
# satan() would handle this case by causing the exception which would
6534
# not be caught until gen_except(). Now, with the exceptions being
6535
# detected inside of satan(), the exception would have been handled there
6536
# instead of inside sasin() as expected.
6537
cmp.l %d1,&0x3FD78000
6538
blt.w ASINTINY
6539
6540
#--THIS IS THE USUAL CASE, |X| < 1
6541
#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
6542
6543
ASINMAIN:
6544
fmov.s &0x3F800000,%fp1
6545
fsub.x %fp0,%fp1 # 1-X
6546
fmovm.x &0x4,-(%sp) # {fp2}
6547
fmov.s &0x3F800000,%fp2
6548
fadd.x %fp0,%fp2 # 1+X
6549
fmul.x %fp2,%fp1 # (1+X)(1-X)
6550
fmovm.x (%sp)+,&0x20 # {fp2}
6551
fsqrt.x %fp1 # SQRT([1-X][1+X])
6552
fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
6553
fmovm.x &0x01,-(%sp) # save X/SQRT(...)
6554
lea (%sp),%a0 # pass ptr to X/SQRT(...)
6555
bsr satan
6556
add.l &0xc,%sp # clear X/SQRT(...) from stack
6557
bra t_inx2
6558
6559
ASINBIG:
6560
fabs.x %fp0 # |X|
6561
fcmp.s %fp0,&0x3F800000
6562
fbgt t_operr # cause an operr exception
6563
6564
#--|X| = 1, ASIN(X) = +- PI/2.
6565
ASINONE:
6566
fmov.x PIBY2(%pc),%fp0
6567
mov.l (%a0),%d1
6568
and.l &0x80000000,%d1 # SIGN BIT OF X
6569
or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
6570
mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
6571
fmov.l %d0,%fpcr
6572
fmul.s (%sp)+,%fp0
6573
bra t_inx2
6574
6575
#--|X| < 2^(-40), ATAN(X) = X
6576
ASINTINY:
6577
fmov.l %d0,%fpcr # restore users rnd mode,prec
6578
mov.b &FMOV_OP,%d1 # last inst is MOVE
6579
fmov.x (%a0),%fp0 # last inst - possible exception
6580
bra t_catch
6581
6582
global sasind
6583
#--ASIN(X) = X FOR DENORMALIZED X
6584
sasind:
6585
bra t_extdnrm
6586
6587
#########################################################################
6588
# sacos(): computes the inverse cosine of a normalized input #
6589
# sacosd(): computes the inverse cosine of a denormalized input #
6590
# #
6591
# INPUT *************************************************************** #
6592
# a0 = pointer to extended precision input #
6593
# d0 = round precision,mode #
6594
# #
6595
# OUTPUT ************************************************************** #
6596
# fp0 = arccos(X) #
6597
# #
6598
# ACCURACY and MONOTONICITY ******************************************* #
6599
# The returned result is within 3 ulps in 64 significant bit, #
6600
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6601
# rounded to double precision. The result is provably monotonic #
6602
# in double precision. #
6603
# #
6604
# ALGORITHM *********************************************************** #
6605
# #
6606
# ACOS #
6607
# 1. If |X| >= 1, go to 3. #
6608
# #
6609
# 2. (|X| < 1) Calculate acos(X) by #
6610
# z := (1-X) / (1+X) #
6611
# acos(X) = 2 * atan( sqrt(z) ). #
6612
# Exit. #
6613
# #
6614
# 3. If |X| > 1, go to 5. #
6615
# #
6616
# 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
6617
# #
6618
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
6619
# Exit. #
6620
# #
6621
#########################################################################
6622
6623
global sacos
6624
sacos:
6625
fmov.x (%a0),%fp0 # LOAD INPUT
6626
6627
mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
6628
mov.w 4(%a0),%d1
6629
and.l &0x7FFFFFFF,%d1
6630
cmp.l %d1,&0x3FFF8000
6631
bge.b ACOSBIG
6632
6633
#--THIS IS THE USUAL CASE, |X| < 1
6634
#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
6635
6636
ACOSMAIN:
6637
fmov.s &0x3F800000,%fp1
6638
fadd.x %fp0,%fp1 # 1+X
6639
fneg.x %fp0 # -X
6640
fadd.s &0x3F800000,%fp0 # 1-X
6641
fdiv.x %fp1,%fp0 # (1-X)/(1+X)
6642
fsqrt.x %fp0 # SQRT((1-X)/(1+X))
6643
mov.l %d0,-(%sp) # save original users fpcr
6644
clr.l %d0
6645
fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
6646
lea (%sp),%a0 # pass ptr to sqrt
6647
bsr satan # ATAN(SQRT([1-X]/[1+X]))
6648
add.l &0xc,%sp # clear SQRT(...) from stack
6649
6650
fmov.l (%sp)+,%fpcr # restore users round prec,mode
6651
fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
6652
bra t_pinx2
6653
6654
ACOSBIG:
6655
fabs.x %fp0
6656
fcmp.s %fp0,&0x3F800000
6657
fbgt t_operr # cause an operr exception
6658
6659
#--|X| = 1, ACOS(X) = 0 OR PI
6660
tst.b (%a0) # is X positive or negative?
6661
bpl.b ACOSP1
6662
6663
#--X = -1
6664
#Returns PI and inexact exception
6665
ACOSM1:
6666
fmov.x PI(%pc),%fp0 # load PI
6667
fmov.l %d0,%fpcr # load round mode,prec
6668
fadd.s &0x00800000,%fp0 # add a small value
6669
bra t_pinx2
6670
6671
ACOSP1:
6672
bra ld_pzero # answer is positive zero
6673
6674
global sacosd
6675
#--ACOS(X) = PI/2 FOR DENORMALIZED X
6676
sacosd:
6677
fmov.l %d0,%fpcr # load user's rnd mode/prec
6678
fmov.x PIBY2(%pc),%fp0
6679
bra t_pinx2
6680
6681
#########################################################################
6682
# setox(): computes the exponential for a normalized input #
6683
# setoxd(): computes the exponential for a denormalized input #
6684
# setoxm1(): computes the exponential minus 1 for a normalized input #
6685
# setoxm1d(): computes the exponential minus 1 for a denormalized input #
6686
# #
6687
# INPUT *************************************************************** #
6688
# a0 = pointer to extended precision input #
6689
# d0 = round precision,mode #
6690
# #
6691
# OUTPUT ************************************************************** #
6692
# fp0 = exp(X) or exp(X)-1 #
6693
# #
6694
# ACCURACY and MONOTONICITY ******************************************* #
6695
# The returned result is within 0.85 ulps in 64 significant bit, #
6696
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
6697
# rounded to double precision. The result is provably monotonic #
6698
# in double precision. #
6699
# #
6700
# ALGORITHM and IMPLEMENTATION **************************************** #
6701
# #
6702
# setoxd #
6703
# ------ #
6704
# Step 1. Set ans := 1.0 #
6705
# #
6706
# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
6707
# Notes: This will always generate one exception -- inexact. #
6708
# #
6709
# #
6710
# setox #
6711
# ----- #
6712
# #
6713
# Step 1. Filter out extreme cases of input argument. #
6714
# 1.1 If |X| >= 2^(-65), go to Step 1.3. #
6715
# 1.2 Go to Step 7. #
6716
# 1.3 If |X| < 16380 log(2), go to Step 2. #
6717
# 1.4 Go to Step 8. #
6718
# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6719
# To avoid the use of floating-point comparisons, a #
6720
# compact representation of |X| is used. This format is a #
6721
# 32-bit integer, the upper (more significant) 16 bits #
6722
# are the sign and biased exponent field of |X|; the #
6723
# lower 16 bits are the 16 most significant fraction #
6724
# (including the explicit bit) bits of |X|. Consequently, #
6725
# the comparisons in Steps 1.1 and 1.3 can be performed #
6726
# by integer comparison. Note also that the constant #
6727
# 16380 log(2) used in Step 1.3 is also in the compact #
6728
# form. Thus taking the branch to Step 2 guarantees #
6729
# |X| < 16380 log(2). There is no harm to have a small #
6730
# number of cases where |X| is less than, but close to, #
6731
# 16380 log(2) and the branch to Step 9 is taken. #
6732
# #
6733
# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6734
# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
6735
# was taken) #
6736
# 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
6737
# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
6738
# or 63. #
6739
# 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
6740
# 2.5 Calculate the address of the stored value of #
6741
# 2^(J/64). #
6742
# 2.6 Create the value Scale = 2^M. #
6743
# Notes: The calculation in 2.2 is really performed by #
6744
# Z := X * constant #
6745
# N := round-to-nearest-integer(Z) #
6746
# where #
6747
# constant := single-precision( 64/log 2 ). #
6748
# #
6749
# Using a single-precision constant avoids memory #
6750
# access. Another effect of using a single-precision #
6751
# "constant" is that the calculated value Z is #
6752
# #
6753
# Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). #
6754
# #
6755
# This error has to be considered later in Steps 3 and 4. #
6756
# #
6757
# Step 3. Calculate X - N*log2/64. #
6758
# 3.1 R := X + N*L1, #
6759
# where L1 := single-precision(-log2/64). #
6760
# 3.2 R := R + N*L2, #
6761
# L2 := extended-precision(-log2/64 - L1).#
6762
# Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
6763
# approximate the value -log2/64 to 88 bits of accuracy. #
6764
# b) N*L1 is exact because N is no longer than 22 bits #
6765
# and L1 is no longer than 24 bits. #
6766
# c) The calculation X+N*L1 is also exact due to #
6767
# cancellation. Thus, R is practically X+N(L1+L2) to full #
6768
# 64 bits. #
6769
# d) It is important to estimate how large can |R| be #
6770
# after Step 3.2. #
6771
# #
6772
# N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) #
6773
# X*64/log2 (1+eps) = N + f, |f| <= 0.5 #
6774
# X*64/log2 - N = f - eps*X 64/log2 #
6775
# X - N*log2/64 = f*log2/64 - eps*X #
6776
# #
6777
# #
6778
# Now |X| <= 16446 log2, thus #
6779
# #
6780
# |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 #
6781
# <= 0.57 log2/64. #
6782
# This bound will be used in Step 4. #
6783
# #
6784
# Step 4. Approximate exp(R)-1 by a polynomial #
6785
# p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) #
6786
# Notes: a) In order to reduce memory access, the coefficients #
6787
# are made as "short" as possible: A1 (which is 1/2), A4 #
6788
# and A5 are single precision; A2 and A3 are double #
6789
# precision. #
6790
# b) Even with the restrictions above, #
6791
# |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. #
6792
# Note that 0.0062 is slightly bigger than 0.57 log2/64. #
6793
# c) To fully utilize the pipeline, p is separated into #
6794
# two independent pieces of roughly equal complexities #
6795
# p = [ R + R*S*(A2 + S*A4) ] + #
6796
# [ S*(A1 + S*(A3 + S*A5)) ] #
6797
# where S = R*R. #
6798
# #
6799
# Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by #
6800
# ans := T + ( T*p + t) #
6801
# where T and t are the stored values for 2^(J/64). #
6802
# Notes: 2^(J/64) is stored as T and t where T+t approximates #
6803
# 2^(J/64) to roughly 85 bits; T is in extended precision #
6804
# and t is in single precision. Note also that T is #
6805
# rounded to 62 bits so that the last two bits of T are #
6806
# zero. The reason for such a special form is that T-1, #
6807
# T-2, and T-8 will all be exact --- a property that will #
6808
# give much more accurate computation of the function #
6809
# EXPM1. #
6810
# #
6811
# Step 6. Reconstruction of exp(X) #
6812
# exp(X) = 2^M * 2^(J/64) * exp(R). #
6813
# 6.1 If AdjFlag = 0, go to 6.3 #
6814
# 6.2 ans := ans * AdjScale #
6815
# 6.3 Restore the user FPCR #
6816
# 6.4 Return ans := ans * Scale. Exit. #
6817
# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
6818
# |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
6819
# neither overflow nor underflow. If AdjFlag = 1, that #
6820
# means that #
6821
# X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. #
6822
# Hence, exp(X) may overflow or underflow or neither. #
6823
# When that is the case, AdjScale = 2^(M1) where M1 is #
6824
# approximately M. Thus 6.2 will never cause #
6825
# over/underflow. Possible exception in 6.4 is overflow #
6826
# or underflow. The inexact exception is not generated in #
6827
# 6.4. Although one can argue that the inexact flag #
6828
# should always be raised, to simulate that exception #
6829
# cost to much than the flag is worth in practical uses. #
6830
# #
6831
# Step 7. Return 1 + X. #
6832
# 7.1 ans := X #
6833
# 7.2 Restore user FPCR. #
6834
# 7.3 Return ans := 1 + ans. Exit #
6835
# Notes: For non-zero X, the inexact exception will always be #
6836
# raised by 7.3. That is the only exception raised by 7.3.#
6837
# Note also that we use the FMOVEM instruction to move X #
6838
# in Step 7.1 to avoid unnecessary trapping. (Although #
6839
# the FMOVEM may not seem relevant since X is normalized, #
6840
# the precaution will be useful in the library version of #
6841
# this code where the separate entry for denormalized #
6842
# inputs will be done away with.) #
6843
# #
6844
# Step 8. Handle exp(X) where |X| >= 16380log2. #
6845
# 8.1 If |X| > 16480 log2, go to Step 9. #
6846
# (mimic 2.2 - 2.6) #
6847
# 8.2 N := round-to-integer( X * 64/log2 ) #
6848
# 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
6849
# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
6850
# AdjFlag := 1. #
6851
# 8.5 Calculate the address of the stored value #
6852
# 2^(J/64). #
6853
# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
6854
# 8.7 Go to Step 3. #
6855
# Notes: Refer to notes for 2.2 - 2.6. #
6856
# #
6857
# Step 9. Handle exp(X), |X| > 16480 log2. #
6858
# 9.1 If X < 0, go to 9.3 #
6859
# 9.2 ans := Huge, go to 9.4 #
6860
# 9.3 ans := Tiny. #
6861
# 9.4 Restore user FPCR. #
6862
# 9.5 Return ans := ans * ans. Exit. #
6863
# Notes: Exp(X) will surely overflow or underflow, depending on #
6864
# X's sign. "Huge" and "Tiny" are respectively large/tiny #
6865
# extended-precision numbers whose square over/underflow #
6866
# with an inexact result. Thus, 9.5 always raises the #
6867
# inexact together with either overflow or underflow. #
6868
# #
6869
# setoxm1d #
6870
# -------- #
6871
# #
6872
# Step 1. Set ans := 0 #
6873
# #
6874
# Step 2. Return ans := X + ans. Exit. #
6875
# Notes: This will return X with the appropriate rounding #
6876
# precision prescribed by the user FPCR. #
6877
# #
6878
# setoxm1 #
6879
# ------- #
6880
# #
6881
# Step 1. Check |X| #
6882
# 1.1 If |X| >= 1/4, go to Step 1.3. #
6883
# 1.2 Go to Step 7. #
6884
# 1.3 If |X| < 70 log(2), go to Step 2. #
6885
# 1.4 Go to Step 10. #
6886
# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
6887
# However, it is conceivable |X| can be small very often #
6888
# because EXPM1 is intended to evaluate exp(X)-1 #
6889
# accurately when |X| is small. For further details on #
6890
# the comparisons, see the notes on Step 1 of setox. #
6891
# #
6892
# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
6893
# 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
6894
# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
6895
# or 63. #
6896
# 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
6897
# 2.4 Calculate the address of the stored value of #
6898
# 2^(J/64). #
6899
# 2.5 Create the values Sc = 2^M and #
6900
# OnebySc := -2^(-M). #
6901
# Notes: See the notes on Step 2 of setox. #
6902
# #
6903
# Step 3. Calculate X - N*log2/64. #
6904
# 3.1 R := X + N*L1, #
6905
# where L1 := single-precision(-log2/64). #
6906
# 3.2 R := R + N*L2, #
6907
# L2 := extended-precision(-log2/64 - L1).#
6908
# Notes: Applying the analysis of Step 3 of setox in this case #
6909
# shows that |R| <= 0.0055 (note that |X| <= 70 log2 in #
6910
# this case). #
6911
# #
6912
# Step 4. Approximate exp(R)-1 by a polynomial #
6913
# p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) #
6914
# Notes: a) In order to reduce memory access, the coefficients #
6915
# are made as "short" as possible: A1 (which is 1/2), A5 #
6916
# and A6 are single precision; A2, A3 and A4 are double #
6917
# precision. #
6918
# b) Even with the restriction above, #
6919
# |p - (exp(R)-1)| < |R| * 2^(-72.7) #
6920
# for all |R| <= 0.0055. #
6921
# c) To fully utilize the pipeline, p is separated into #
6922
# two independent pieces of roughly equal complexity #
6923
# p = [ R*S*(A2 + S*(A4 + S*A6)) ] + #
6924
# [ R + S*(A1 + S*(A3 + S*A5)) ] #
6925
# where S = R*R. #
6926
# #
6927
# Step 5. Compute 2^(J/64)*p by #
6928
# p := T*p #
6929
# where T and t are the stored values for 2^(J/64). #
6930
# Notes: 2^(J/64) is stored as T and t where T+t approximates #
6931
# 2^(J/64) to roughly 85 bits; T is in extended precision #
6932
# and t is in single precision. Note also that T is #
6933
# rounded to 62 bits so that the last two bits of T are #
6934
# zero. The reason for such a special form is that T-1, #
6935
# T-2, and T-8 will all be exact --- a property that will #
6936
# be exploited in Step 6 below. The total relative error #
6937
# in p is no bigger than 2^(-67.7) compared to the final #
6938
# result. #
6939
# #
6940
# Step 6. Reconstruction of exp(X)-1 #
6941
# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
6942
# 6.1 If M <= 63, go to Step 6.3. #
6943
# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
6944
# 6.3 If M >= -3, go to 6.5. #
6945
# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
6946
# 6.5 ans := (T + OnebySc) + (p + t). #
6947
# 6.6 Restore user FPCR. #
6948
# 6.7 Return ans := Sc * ans. Exit. #
6949
# Notes: The various arrangements of the expressions give #
6950
# accurate evaluations. #
6951
# #
6952
# Step 7. exp(X)-1 for |X| < 1/4. #
6953
# 7.1 If |X| >= 2^(-65), go to Step 9. #
6954
# 7.2 Go to Step 8. #
6955
# #
6956
# Step 8. Calculate exp(X)-1, |X| < 2^(-65). #
6957
# 8.1 If |X| < 2^(-16312), goto 8.3 #
6958
# 8.2 Restore FPCR; return ans := X - 2^(-16382). #
6959
# Exit. #
6960
# 8.3 X := X * 2^(140). #
6961
# 8.4 Restore FPCR; ans := ans - 2^(-16382). #
6962
# Return ans := ans*2^(140). Exit #
6963
# Notes: The idea is to return "X - tiny" under the user #
6964
# precision and rounding modes. To avoid unnecessary #
6965
# inefficiency, we stay away from denormalized numbers #
6966
# the best we can. For |X| >= 2^(-16312), the #
6967
# straightforward 8.2 generates the inexact exception as #
6968
# the case warrants. #
6969
# #
6970
# Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial #
6971
# p = X + X*X*(B1 + X*(B2 + ... + X*B12)) #
6972
# Notes: a) In order to reduce memory access, the coefficients #
6973
# are made as "short" as possible: B1 (which is 1/2), B9 #
6974
# to B12 are single precision; B3 to B8 are double #
6975
# precision; and B2 is double extended. #
6976
# b) Even with the restriction above, #
6977
# |p - (exp(X)-1)| < |X| 2^(-70.6) #
6978
# for all |X| <= 0.251. #
6979
# Note that 0.251 is slightly bigger than 1/4. #
6980
# c) To fully preserve accuracy, the polynomial is #
6981
# computed as #
6982
# X + ( S*B1 + Q ) where S = X*X and #
6983
# Q = X*S*(B2 + X*(B3 + ... + X*B12)) #
6984
# d) To fully utilize the pipeline, Q is separated into #
6985
# two independent pieces of roughly equal complexity #
6986
# Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + #
6987
# [ S*S*(B3 + S*(B5 + ... + S*B11)) ] #
6988
# #
6989
# Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. #
6990
# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
6991
# practical purposes. Therefore, go to Step 1 of setox. #
6992
# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
6993
# purposes. #
6994
# ans := -1 #
6995
# Restore user FPCR #
6996
# Return ans := ans + 2^(-126). Exit. #
6997
# Notes: 10.2 will always create an inexact and return -1 + tiny #
6998
# in the user rounding precision and mode. #
6999
# #
7000
#########################################################################
7001
7002
L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
7003
7004
EEXPA3: long 0x3FA55555,0x55554CC1
7005
EEXPA2: long 0x3FC55555,0x55554A54
7006
7007
EM1A4: long 0x3F811111,0x11174385
7008
EM1A3: long 0x3FA55555,0x55554F5A
7009
7010
EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
7011
7012
EM1B8: long 0x3EC71DE3,0xA5774682
7013
EM1B7: long 0x3EFA01A0,0x19D7CB68
7014
7015
EM1B6: long 0x3F2A01A0,0x1A019DF3
7016
EM1B5: long 0x3F56C16C,0x16C170E2
7017
7018
EM1B4: long 0x3F811111,0x11111111
7019
EM1B3: long 0x3FA55555,0x55555555
7020
7021
EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
7022
long 0x00000000
7023
7024
TWO140: long 0x48B00000,0x00000000
7025
TWON140:
7026
long 0x37300000,0x00000000
7027
7028
EEXPTBL:
7029
long 0x3FFF0000,0x80000000,0x00000000,0x00000000
7030
long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
7031
long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
7032
long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
7033
long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
7034
long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
7035
long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
7036
long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
7037
long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
7038
long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
7039
long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
7040
long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
7041
long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
7042
long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
7043
long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
7044
long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
7045
long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
7046
long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
7047
long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
7048
long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
7049
long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
7050
long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
7051
long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
7052
long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
7053
long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
7054
long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
7055
long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
7056
long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
7057
long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
7058
long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
7059
long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
7060
long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
7061
long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
7062
long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
7063
long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
7064
long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
7065
long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
7066
long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
7067
long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
7068
long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
7069
long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
7070
long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
7071
long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
7072
long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
7073
long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
7074
long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
7075
long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
7076
long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
7077
long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
7078
long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
7079
long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
7080
long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
7081
long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
7082
long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
7083
long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
7084
long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
7085
long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
7086
long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
7087
long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
7088
long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
7089
long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
7090
long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
7091
long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
7092
long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
7093
7094
set ADJFLAG,L_SCR2
7095
set SCALE,FP_SCR0
7096
set ADJSCALE,FP_SCR1
7097
set SC,FP_SCR0
7098
set ONEBYSC,FP_SCR1
7099
7100
global setox
7101
setox:
7102
#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
7103
7104
#--Step 1.
7105
mov.l (%a0),%d1 # load part of input X
7106
and.l &0x7FFF0000,%d1 # biased expo. of X
7107
cmp.l %d1,&0x3FBE0000 # 2^(-65)
7108
bge.b EXPC1 # normal case
7109
bra EXPSM
7110
7111
EXPC1:
7112
#--The case |X| >= 2^(-65)
7113
mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7114
cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
7115
blt.b EXPMAIN # normal case
7116
bra EEXPBIG
7117
7118
EXPMAIN:
7119
#--Step 2.
7120
#--This is the normal branch: 2^(-65) <= |X| < 16380 log2.
7121
fmov.x (%a0),%fp0 # load input from (a0)
7122
7123
fmov.x %fp0,%fp1
7124
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7125
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7126
mov.l &0,ADJFLAG(%a6)
7127
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7128
lea EEXPTBL(%pc),%a1
7129
fmov.l %d1,%fp0 # convert to floating-format
7130
7131
mov.l %d1,L_SCR1(%a6) # save N temporarily
7132
and.l &0x3F,%d1 # D0 is J = N mod 64
7133
lsl.l &4,%d1
7134
add.l %d1,%a1 # address of 2^(J/64)
7135
mov.l L_SCR1(%a6),%d1
7136
asr.l &6,%d1 # D0 is M
7137
add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7138
mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
7139
7140
EXPCONT1:
7141
#--Step 3.
7142
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7143
#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
7144
fmov.x %fp0,%fp2
7145
fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7146
fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7147
fadd.x %fp1,%fp0 # X + N*L1
7148
fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7149
7150
#--Step 4.
7151
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7152
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
7153
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7154
#--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
7155
7156
fmov.x %fp0,%fp1
7157
fmul.x %fp1,%fp1 # fp1 IS S = R*R
7158
7159
fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
7160
7161
fmul.x %fp1,%fp2 # fp2 IS S*A5
7162
fmov.x %fp1,%fp3
7163
fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
7164
7165
fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
7166
fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
7167
7168
fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5)
7169
mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
7170
mov.l &0x80000000,SCALE+4(%a6)
7171
clr.l SCALE+8(%a6)
7172
7173
fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4)
7174
7175
fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5)
7176
fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4)
7177
7178
fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5))
7179
fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4),
7180
7181
fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
7182
fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
7183
7184
#--Step 5
7185
#--final reconstruction process
7186
#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
7187
7188
fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
7189
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7190
fadd.s (%a1),%fp0 # accurate 2^(J/64)
7191
7192
fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
7193
mov.l ADJFLAG(%a6),%d1
7194
7195
#--Step 6
7196
tst.l %d1
7197
beq.b NORMAL
7198
ADJUST:
7199
fmul.x ADJSCALE(%a6),%fp0
7200
NORMAL:
7201
fmov.l %d0,%fpcr # restore user FPCR
7202
mov.b &FMUL_OP,%d1 # last inst is MUL
7203
fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
7204
bra t_catch
7205
7206
EXPSM:
7207
#--Step 7
7208
fmovm.x (%a0),&0x80 # load X
7209
fmov.l %d0,%fpcr
7210
fadd.s &0x3F800000,%fp0 # 1+X in user mode
7211
bra t_pinx2
7212
7213
EEXPBIG:
7214
#--Step 8
7215
cmp.l %d1,&0x400CB27C # 16480 log2
7216
bgt.b EXP2BIG
7217
#--Steps 8.2 -- 8.6
7218
fmov.x (%a0),%fp0 # load input from (a0)
7219
7220
fmov.x %fp0,%fp1
7221
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7222
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7223
mov.l &1,ADJFLAG(%a6)
7224
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7225
lea EEXPTBL(%pc),%a1
7226
fmov.l %d1,%fp0 # convert to floating-format
7227
mov.l %d1,L_SCR1(%a6) # save N temporarily
7228
and.l &0x3F,%d1 # D0 is J = N mod 64
7229
lsl.l &4,%d1
7230
add.l %d1,%a1 # address of 2^(J/64)
7231
mov.l L_SCR1(%a6),%d1
7232
asr.l &6,%d1 # D0 is K
7233
mov.l %d1,L_SCR1(%a6) # save K temporarily
7234
asr.l &1,%d1 # D0 is M1
7235
sub.l %d1,L_SCR1(%a6) # a1 is M
7236
add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
7237
mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
7238
mov.l &0x80000000,ADJSCALE+4(%a6)
7239
clr.l ADJSCALE+8(%a6)
7240
mov.l L_SCR1(%a6),%d1 # D0 is M
7241
add.w &0x3FFF,%d1 # biased expo. of 2^(M)
7242
bra.w EXPCONT1 # go back to Step 3
7243
7244
EXP2BIG:
7245
#--Step 9
7246
tst.b (%a0) # is X positive or negative?
7247
bmi t_unfl2
7248
bra t_ovfl2
7249
7250
global setoxd
7251
setoxd:
7252
#--entry point for EXP(X), X is denormalized
7253
mov.l (%a0),-(%sp)
7254
andi.l &0x80000000,(%sp)
7255
ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
7256
7257
fmov.s &0x3F800000,%fp0
7258
7259
fmov.l %d0,%fpcr
7260
fadd.s (%sp)+,%fp0
7261
bra t_pinx2
7262
7263
global setoxm1
7264
setoxm1:
7265
#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
7266
7267
#--Step 1.
7268
#--Step 1.1
7269
mov.l (%a0),%d1 # load part of input X
7270
and.l &0x7FFF0000,%d1 # biased expo. of X
7271
cmp.l %d1,&0x3FFD0000 # 1/4
7272
bge.b EM1CON1 # |X| >= 1/4
7273
bra EM1SM
7274
7275
EM1CON1:
7276
#--Step 1.3
7277
#--The case |X| >= 1/4
7278
mov.w 4(%a0),%d1 # expo. and partial sig. of |X|
7279
cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
7280
ble.b EM1MAIN # 1/4 <= |X| <= 70log2
7281
bra EM1BIG
7282
7283
EM1MAIN:
7284
#--Step 2.
7285
#--This is the case: 1/4 <= |X| <= 70 log2.
7286
fmov.x (%a0),%fp0 # load input from (a0)
7287
7288
fmov.x %fp0,%fp1
7289
fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
7290
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7291
fmov.l %fp0,%d1 # N = int( X * 64/log2 )
7292
lea EEXPTBL(%pc),%a1
7293
fmov.l %d1,%fp0 # convert to floating-format
7294
7295
mov.l %d1,L_SCR1(%a6) # save N temporarily
7296
and.l &0x3F,%d1 # D0 is J = N mod 64
7297
lsl.l &4,%d1
7298
add.l %d1,%a1 # address of 2^(J/64)
7299
mov.l L_SCR1(%a6),%d1
7300
asr.l &6,%d1 # D0 is M
7301
mov.l %d1,L_SCR1(%a6) # save a copy of M
7302
7303
#--Step 3.
7304
#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
7305
#--a0 points to 2^(J/64), D0 and a1 both contain M
7306
fmov.x %fp0,%fp2
7307
fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
7308
fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
7309
fadd.x %fp1,%fp0 # X + N*L1
7310
fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
7311
add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
7312
7313
#--Step 4.
7314
#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
7315
#-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
7316
#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
7317
#--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
7318
7319
fmov.x %fp0,%fp1
7320
fmul.x %fp1,%fp1 # fp1 IS S = R*R
7321
7322
fmov.s &0x3950097B,%fp2 # fp2 IS a6
7323
7324
fmul.x %fp1,%fp2 # fp2 IS S*A6
7325
fmov.x %fp1,%fp3
7326
fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
7327
7328
fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
7329
fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
7330
mov.w %d1,SC(%a6) # SC is 2^(M) in extended
7331
mov.l &0x80000000,SC+4(%a6)
7332
clr.l SC+8(%a6)
7333
7334
fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6)
7335
mov.l L_SCR1(%a6),%d1 # D0 is M
7336
neg.w %d1 # D0 is -M
7337
fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5)
7338
add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
7339
fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6)
7340
fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5)
7341
7342
fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6))
7343
or.w &0x8000,%d1 # signed/expo. of -2^(-M)
7344
mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
7345
mov.l &0x80000000,ONEBYSC+4(%a6)
7346
clr.l ONEBYSC+8(%a6)
7347
fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5))
7348
7349
fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6))
7350
fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5))
7351
7352
fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
7353
7354
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7355
7356
#--Step 5
7357
#--Compute 2^(J/64)*p
7358
7359
fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
7360
7361
#--Step 6
7362
#--Step 6.1
7363
mov.l L_SCR1(%a6),%d1 # retrieve M
7364
cmp.l %d1,&63
7365
ble.b MLE63
7366
#--Step 6.2 M >= 64
7367
fmov.s 12(%a1),%fp1 # fp1 is t
7368
fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
7369
fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
7370
fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
7371
bra EM1SCALE
7372
MLE63:
7373
#--Step 6.3 M <= 63
7374
cmp.l %d1,&-3
7375
bge.b MGEN3
7376
MLTN3:
7377
#--Step 6.4 M <= -4
7378
fadd.s 12(%a1),%fp0 # p+t
7379
fadd.x (%a1),%fp0 # T+(p+t)
7380
fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
7381
bra EM1SCALE
7382
MGEN3:
7383
#--Step 6.5 -3 <= M <= 63
7384
fmov.x (%a1)+,%fp1 # fp1 is T
7385
fadd.s (%a1),%fp0 # fp0 is p+t
7386
fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
7387
fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
7388
7389
EM1SCALE:
7390
#--Step 6.6
7391
fmov.l %d0,%fpcr
7392
fmul.x SC(%a6),%fp0
7393
bra t_inx2
7394
7395
EM1SM:
7396
#--Step 7 |X| < 1/4.
7397
cmp.l %d1,&0x3FBE0000 # 2^(-65)
7398
bge.b EM1POLY
7399
7400
EM1TINY:
7401
#--Step 8 |X| < 2^(-65)
7402
cmp.l %d1,&0x00330000 # 2^(-16312)
7403
blt.b EM12TINY
7404
#--Step 8.2
7405
mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
7406
mov.l &0x80000000,SC+4(%a6)
7407
clr.l SC+8(%a6)
7408
fmov.x (%a0),%fp0
7409
fmov.l %d0,%fpcr
7410
mov.b &FADD_OP,%d1 # last inst is ADD
7411
fadd.x SC(%a6),%fp0
7412
bra t_catch
7413
7414
EM12TINY:
7415
#--Step 8.3
7416
fmov.x (%a0),%fp0
7417
fmul.d TWO140(%pc),%fp0
7418
mov.l &0x80010000,SC(%a6)
7419
mov.l &0x80000000,SC+4(%a6)
7420
clr.l SC+8(%a6)
7421
fadd.x SC(%a6),%fp0
7422
fmov.l %d0,%fpcr
7423
mov.b &FMUL_OP,%d1 # last inst is MUL
7424
fmul.d TWON140(%pc),%fp0
7425
bra t_catch
7426
7427
EM1POLY:
7428
#--Step 9 exp(X)-1 by a simple polynomial
7429
fmov.x (%a0),%fp0 # fp0 is X
7430
fmul.x %fp0,%fp0 # fp0 is S := X*X
7431
fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
7432
fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
7433
fmul.x %fp0,%fp1 # fp1 is S*B12
7434
fmov.s &0x310F8290,%fp2 # fp2 is B11
7435
fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
7436
7437
fmul.x %fp0,%fp2 # fp2 is S*B11
7438
fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
7439
7440
fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
7441
fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
7442
7443
fmul.x %fp0,%fp2 # fp2 is S*(B9+...
7444
fmul.x %fp0,%fp1 # fp1 is S*(B8+...
7445
7446
fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
7447
fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
7448
7449
fmul.x %fp0,%fp2 # fp2 is S*(B7+...
7450
fmul.x %fp0,%fp1 # fp1 is S*(B6+...
7451
7452
fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
7453
fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
7454
7455
fmul.x %fp0,%fp2 # fp2 is S*(B5+...
7456
fmul.x %fp0,%fp1 # fp1 is S*(B4+...
7457
7458
fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
7459
fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
7460
7461
fmul.x %fp0,%fp2 # fp2 is S*(B3+...
7462
fmul.x %fp0,%fp1 # fp1 is S*(B2+...
7463
7464
fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...)
7465
fmul.x (%a0),%fp1 # fp1 is X*S*(B2...
7466
7467
fmul.s &0x3F000000,%fp0 # fp0 is S*B1
7468
fadd.x %fp2,%fp1 # fp1 is Q
7469
7470
fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
7471
7472
fadd.x %fp1,%fp0 # fp0 is S*B1+Q
7473
7474
fmov.l %d0,%fpcr
7475
fadd.x (%a0),%fp0
7476
bra t_inx2
7477
7478
EM1BIG:
7479
#--Step 10 |X| > 70 log2
7480
mov.l (%a0),%d1
7481
cmp.l %d1,&0
7482
bgt.w EXPC1
7483
#--Step 10.2
7484
fmov.s &0xBF800000,%fp0 # fp0 is -1
7485
fmov.l %d0,%fpcr
7486
fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
7487
bra t_minx2
7488
7489
global setoxm1d
7490
setoxm1d:
7491
#--entry point for EXPM1(X), here X is denormalized
7492
#--Step 0.
7493
bra t_extdnrm
7494
7495
#########################################################################
7496
# sgetexp(): returns the exponent portion of the input argument. #
7497
# The exponent bias is removed and the exponent value is #
7498
# returned as an extended precision number in fp0. #
7499
# sgetexpd(): handles denormalized numbers. #
7500
# #
7501
# sgetman(): extracts the mantissa of the input argument. The #
7502
# mantissa is converted to an extended precision number w/ #
7503
# an exponent of $3fff and is returned in fp0. The range of #
7504
# the result is [1.0 - 2.0). #
7505
# sgetmand(): handles denormalized numbers. #
7506
# #
7507
# INPUT *************************************************************** #
7508
# a0 = pointer to extended precision input #
7509
# #
7510
# OUTPUT ************************************************************** #
7511
# fp0 = exponent(X) or mantissa(X) #
7512
# #
7513
#########################################################################
7514
7515
global sgetexp
7516
sgetexp:
7517
mov.w SRC_EX(%a0),%d0 # get the exponent
7518
bclr &0xf,%d0 # clear the sign bit
7519
subi.w &0x3fff,%d0 # subtract off the bias
7520
fmov.w %d0,%fp0 # return exp in fp0
7521
blt.b sgetexpn # it's negative
7522
rts
7523
7524
sgetexpn:
7525
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7526
rts
7527
7528
global sgetexpd
7529
sgetexpd:
7530
bsr.l norm # normalize
7531
neg.w %d0 # new exp = -(shft amt)
7532
subi.w &0x3fff,%d0 # subtract off the bias
7533
fmov.w %d0,%fp0 # return exp in fp0
7534
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7535
rts
7536
7537
global sgetman
7538
sgetman:
7539
mov.w SRC_EX(%a0),%d0 # get the exp
7540
ori.w &0x7fff,%d0 # clear old exp
7541
bclr &0xe,%d0 # make it the new exp +-3fff
7542
7543
# here, we build the result in a tmp location so as not to disturb the input
7544
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
7545
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
7546
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7547
fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
7548
bmi.b sgetmann # it's negative
7549
rts
7550
7551
sgetmann:
7552
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
7553
rts
7554
7555
#
7556
# For denormalized numbers, shift the mantissa until the j-bit = 1,
7557
# then load the exponent with +/1 $3fff.
7558
#
7559
global sgetmand
7560
sgetmand:
7561
bsr.l norm # normalize exponent
7562
bra.b sgetman
7563
7564
#########################################################################
7565
# scosh(): computes the hyperbolic cosine of a normalized input #
7566
# scoshd(): computes the hyperbolic cosine of a denormalized input #
7567
# #
7568
# INPUT *************************************************************** #
7569
# a0 = pointer to extended precision input #
7570
# d0 = round precision,mode #
7571
# #
7572
# OUTPUT ************************************************************** #
7573
# fp0 = cosh(X) #
7574
# #
7575
# ACCURACY and MONOTONICITY ******************************************* #
7576
# The returned result is within 3 ulps in 64 significant bit, #
7577
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7578
# rounded to double precision. The result is provably monotonic #
7579
# in double precision. #
7580
# #
7581
# ALGORITHM *********************************************************** #
7582
# #
7583
# COSH #
7584
# 1. If |X| > 16380 log2, go to 3. #
7585
# #
7586
# 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae #
7587
# y = |X|, z = exp(Y), and #
7588
# cosh(X) = (1/2)*( z + 1/z ). #
7589
# Exit. #
7590
# #
7591
# 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. #
7592
# #
7593
# 4. (16380 log2 < |X| <= 16480 log2) #
7594
# cosh(X) = sign(X) * exp(|X|)/2. #
7595
# However, invoking exp(|X|) may cause premature #
7596
# overflow. Thus, we calculate sinh(X) as follows: #
7597
# Y := |X| #
7598
# Fact := 2**(16380) #
7599
# Y' := Y - 16381 log2 #
7600
# cosh(X) := Fact * exp(Y'). #
7601
# Exit. #
7602
# #
7603
# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7604
# Huge*Huge to generate overflow and an infinity with #
7605
# the appropriate sign. Huge is the largest finite number #
7606
# in extended format. Exit. #
7607
# #
7608
#########################################################################
7609
7610
TWO16380:
7611
long 0x7FFB0000,0x80000000,0x00000000,0x00000000
7612
7613
global scosh
7614
scosh:
7615
fmov.x (%a0),%fp0 # LOAD INPUT
7616
7617
mov.l (%a0),%d1
7618
mov.w 4(%a0),%d1
7619
and.l &0x7FFFFFFF,%d1
7620
cmp.l %d1,&0x400CB167
7621
bgt.b COSHBIG
7622
7623
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7624
#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
7625
7626
fabs.x %fp0 # |X|
7627
7628
mov.l %d0,-(%sp)
7629
clr.l %d0
7630
fmovm.x &0x01,-(%sp) # save |X| to stack
7631
lea (%sp),%a0 # pass ptr to |X|
7632
bsr setox # FP0 IS EXP(|X|)
7633
add.l &0xc,%sp # erase |X| from stack
7634
fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|)
7635
mov.l (%sp)+,%d0
7636
7637
fmov.s &0x3E800000,%fp1 # (1/4)
7638
fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|))
7639
7640
fmov.l %d0,%fpcr
7641
mov.b &FADD_OP,%d1 # last inst is ADD
7642
fadd.x %fp1,%fp0
7643
bra t_catch
7644
7645
COSHBIG:
7646
cmp.l %d1,&0x400CB2B3
7647
bgt.b COSHHUGE
7648
7649
fabs.x %fp0
7650
fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7651
fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7652
7653
mov.l %d0,-(%sp)
7654
clr.l %d0
7655
fmovm.x &0x01,-(%sp) # save fp0 to stack
7656
lea (%sp),%a0 # pass ptr to fp0
7657
bsr setox
7658
add.l &0xc,%sp # clear fp0 from stack
7659
mov.l (%sp)+,%d0
7660
7661
fmov.l %d0,%fpcr
7662
mov.b &FMUL_OP,%d1 # last inst is MUL
7663
fmul.x TWO16380(%pc),%fp0
7664
bra t_catch
7665
7666
COSHHUGE:
7667
bra t_ovfl2
7668
7669
global scoshd
7670
#--COSH(X) = 1 FOR DENORMALIZED X
7671
scoshd:
7672
fmov.s &0x3F800000,%fp0
7673
7674
fmov.l %d0,%fpcr
7675
fadd.s &0x00800000,%fp0
7676
bra t_pinx2
7677
7678
#########################################################################
7679
# ssinh(): computes the hyperbolic sine of a normalized input #
7680
# ssinhd(): computes the hyperbolic sine of a denormalized input #
7681
# #
7682
# INPUT *************************************************************** #
7683
# a0 = pointer to extended precision input #
7684
# d0 = round precision,mode #
7685
# #
7686
# OUTPUT ************************************************************** #
7687
# fp0 = sinh(X) #
7688
# #
7689
# ACCURACY and MONOTONICITY ******************************************* #
7690
# The returned result is within 3 ulps in 64 significant bit, #
7691
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7692
# rounded to double precision. The result is provably monotonic #
7693
# in double precision. #
7694
# #
7695
# ALGORITHM *********************************************************** #
7696
# #
7697
# SINH #
7698
# 1. If |X| > 16380 log2, go to 3. #
7699
# #
7700
# 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula #
7701
# y = |X|, sgn = sign(X), and z = expm1(Y), #
7702
# sinh(X) = sgn*(1/2)*( z + z/(1+z) ). #
7703
# Exit. #
7704
# #
7705
# 3. If |X| > 16480 log2, go to 5. #
7706
# #
7707
# 4. (16380 log2 < |X| <= 16480 log2) #
7708
# sinh(X) = sign(X) * exp(|X|)/2. #
7709
# However, invoking exp(|X|) may cause premature overflow. #
7710
# Thus, we calculate sinh(X) as follows: #
7711
# Y := |X| #
7712
# sgn := sign(X) #
7713
# sgnFact := sgn * 2**(16380) #
7714
# Y' := Y - 16381 log2 #
7715
# sinh(X) := sgnFact * exp(Y'). #
7716
# Exit. #
7717
# #
7718
# 5. (|X| > 16480 log2) sinh(X) must overflow. Return #
7719
# sign(X)*Huge*Huge to generate overflow and an infinity with #
7720
# the appropriate sign. Huge is the largest finite number in #
7721
# extended format. Exit. #
7722
# #
7723
#########################################################################
7724
7725
global ssinh
7726
ssinh:
7727
fmov.x (%a0),%fp0 # LOAD INPUT
7728
7729
mov.l (%a0),%d1
7730
mov.w 4(%a0),%d1
7731
mov.l %d1,%a1 # save (compacted) operand
7732
and.l &0x7FFFFFFF,%d1
7733
cmp.l %d1,&0x400CB167
7734
bgt.b SINHBIG
7735
7736
#--THIS IS THE USUAL CASE, |X| < 16380 LOG2
7737
#--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
7738
7739
fabs.x %fp0 # Y = |X|
7740
7741
movm.l &0x8040,-(%sp) # {a1/d0}
7742
fmovm.x &0x01,-(%sp) # save Y on stack
7743
lea (%sp),%a0 # pass ptr to Y
7744
clr.l %d0
7745
bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7746
add.l &0xc,%sp # clear Y from stack
7747
fmov.l &0,%fpcr
7748
movm.l (%sp)+,&0x0201 # {a1/d0}
7749
7750
fmov.x %fp0,%fp1
7751
fadd.s &0x3F800000,%fp1 # 1+Z
7752
fmov.x %fp0,-(%sp)
7753
fdiv.x %fp1,%fp0 # Z/(1+Z)
7754
mov.l %a1,%d1
7755
and.l &0x80000000,%d1
7756
or.l &0x3F000000,%d1
7757
fadd.x (%sp)+,%fp0
7758
mov.l %d1,-(%sp)
7759
7760
fmov.l %d0,%fpcr
7761
mov.b &FMUL_OP,%d1 # last inst is MUL
7762
fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
7763
bra t_catch
7764
7765
SINHBIG:
7766
cmp.l %d1,&0x400CB2B3
7767
bgt t_ovfl
7768
fabs.x %fp0
7769
fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD)
7770
mov.l &0,-(%sp)
7771
mov.l &0x80000000,-(%sp)
7772
mov.l %a1,%d1
7773
and.l &0x80000000,%d1
7774
or.l &0x7FFB0000,%d1
7775
mov.l %d1,-(%sp) # EXTENDED FMT
7776
fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE
7777
7778
mov.l %d0,-(%sp)
7779
clr.l %d0
7780
fmovm.x &0x01,-(%sp) # save fp0 on stack
7781
lea (%sp),%a0 # pass ptr to fp0
7782
bsr setox
7783
add.l &0xc,%sp # clear fp0 from stack
7784
7785
mov.l (%sp)+,%d0
7786
fmov.l %d0,%fpcr
7787
mov.b &FMUL_OP,%d1 # last inst is MUL
7788
fmul.x (%sp)+,%fp0 # possible exception
7789
bra t_catch
7790
7791
global ssinhd
7792
#--SINH(X) = X FOR DENORMALIZED X
7793
ssinhd:
7794
bra t_extdnrm
7795
7796
#########################################################################
7797
# stanh(): computes the hyperbolic tangent of a normalized input #
7798
# stanhd(): computes the hyperbolic tangent of a denormalized input #
7799
# #
7800
# INPUT *************************************************************** #
7801
# a0 = pointer to extended precision input #
7802
# d0 = round precision,mode #
7803
# #
7804
# OUTPUT ************************************************************** #
7805
# fp0 = tanh(X) #
7806
# #
7807
# ACCURACY and MONOTONICITY ******************************************* #
7808
# The returned result is within 3 ulps in 64 significant bit, #
7809
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7810
# rounded to double precision. The result is provably monotonic #
7811
# in double precision. #
7812
# #
7813
# ALGORITHM *********************************************************** #
7814
# #
7815
# TANH #
7816
# 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. #
7817
# #
7818
# 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by #
7819
# sgn := sign(X), y := 2|X|, z := expm1(Y), and #
7820
# tanh(X) = sgn*( z/(2+z) ). #
7821
# Exit. #
7822
# #
7823
# 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, #
7824
# go to 7. #
7825
# #
7826
# 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. #
7827
# #
7828
# 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by #
7829
# sgn := sign(X), y := 2|X|, z := exp(Y), #
7830
# tanh(X) = sgn - [ sgn*2/(1+z) ]. #
7831
# Exit. #
7832
# #
7833
# 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
7834
# calculate Tanh(X) by #
7835
# sgn := sign(X), Tiny := 2**(-126), #
7836
# tanh(X) := sgn - sgn*Tiny. #
7837
# Exit. #
7838
# #
7839
# 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. #
7840
# #
7841
#########################################################################
7842
7843
set X,FP_SCR0
7844
set XFRAC,X+4
7845
7846
set SGN,L_SCR3
7847
7848
set V,FP_SCR0
7849
7850
global stanh
7851
stanh:
7852
fmov.x (%a0),%fp0 # LOAD INPUT
7853
7854
fmov.x %fp0,X(%a6)
7855
mov.l (%a0),%d1
7856
mov.w 4(%a0),%d1
7857
mov.l %d1,X(%a6)
7858
and.l &0x7FFFFFFF,%d1
7859
cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)?
7860
blt.w TANHBORS # yes
7861
cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2?
7862
bgt.w TANHBORS # yes
7863
7864
#--THIS IS THE USUAL CASE
7865
#--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
7866
7867
mov.l X(%a6),%d1
7868
mov.l %d1,SGN(%a6)
7869
and.l &0x7FFF0000,%d1
7870
add.l &0x00010000,%d1 # EXPONENT OF 2|X|
7871
mov.l %d1,X(%a6)
7872
and.l &0x80000000,SGN(%a6)
7873
fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X|
7874
7875
mov.l %d0,-(%sp)
7876
clr.l %d0
7877
fmovm.x &0x1,-(%sp) # save Y on stack
7878
lea (%sp),%a0 # pass ptr to Y
7879
bsr setoxm1 # FP0 IS Z = EXPM1(Y)
7880
add.l &0xc,%sp # clear Y from stack
7881
mov.l (%sp)+,%d0
7882
7883
fmov.x %fp0,%fp1
7884
fadd.s &0x40000000,%fp1 # Z+2
7885
mov.l SGN(%a6),%d1
7886
fmov.x %fp1,V(%a6)
7887
eor.l %d1,V(%a6)
7888
7889
fmov.l %d0,%fpcr # restore users round prec,mode
7890
fdiv.x V(%a6),%fp0
7891
bra t_inx2
7892
7893
TANHBORS:
7894
cmp.l %d1,&0x3FFF8000
7895
blt.w TANHSM
7896
7897
cmp.l %d1,&0x40048AA1
7898
bgt.w TANHHUGE
7899
7900
#-- (5/2) LOG2 < |X| < 50 LOG2,
7901
#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
7902
#--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
7903
7904
mov.l X(%a6),%d1
7905
mov.l %d1,SGN(%a6)
7906
and.l &0x7FFF0000,%d1
7907
add.l &0x00010000,%d1 # EXPO OF 2|X|
7908
mov.l %d1,X(%a6) # Y = 2|X|
7909
and.l &0x80000000,SGN(%a6)
7910
mov.l SGN(%a6),%d1
7911
fmov.x X(%a6),%fp0 # Y = 2|X|
7912
7913
mov.l %d0,-(%sp)
7914
clr.l %d0
7915
fmovm.x &0x01,-(%sp) # save Y on stack
7916
lea (%sp),%a0 # pass ptr to Y
7917
bsr setox # FP0 IS EXP(Y)
7918
add.l &0xc,%sp # clear Y from stack
7919
mov.l (%sp)+,%d0
7920
mov.l SGN(%a6),%d1
7921
fadd.s &0x3F800000,%fp0 # EXP(Y)+1
7922
7923
eor.l &0xC0000000,%d1 # -SIGN(X)*2
7924
fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
7925
fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
7926
7927
mov.l SGN(%a6),%d1
7928
or.l &0x3F800000,%d1 # SGN
7929
fmov.s %d1,%fp0 # SGN IN SGL FMT
7930
7931
fmov.l %d0,%fpcr # restore users round prec,mode
7932
mov.b &FADD_OP,%d1 # last inst is ADD
7933
fadd.x %fp1,%fp0
7934
bra t_inx2
7935
7936
TANHSM:
7937
fmov.l %d0,%fpcr # restore users round prec,mode
7938
mov.b &FMOV_OP,%d1 # last inst is MOVE
7939
fmov.x X(%a6),%fp0 # last inst - possible exception set
7940
bra t_catch
7941
7942
#---RETURN SGN(X) - SGN(X)EPS
7943
TANHHUGE:
7944
mov.l X(%a6),%d1
7945
and.l &0x80000000,%d1
7946
or.l &0x3F800000,%d1
7947
fmov.s %d1,%fp0
7948
and.l &0x80000000,%d1
7949
eor.l &0x80800000,%d1 # -SIGN(X)*EPS
7950
7951
fmov.l %d0,%fpcr # restore users round prec,mode
7952
fadd.s %d1,%fp0
7953
bra t_inx2
7954
7955
global stanhd
7956
#--TANH(X) = X FOR DENORMALIZED X
7957
stanhd:
7958
bra t_extdnrm
7959
7960
#########################################################################
7961
# slogn(): computes the natural logarithm of a normalized input #
7962
# slognd(): computes the natural logarithm of a denormalized input #
7963
# slognp1(): computes the log(1+X) of a normalized input #
7964
# slognp1d(): computes the log(1+X) of a denormalized input #
7965
# #
7966
# INPUT *************************************************************** #
7967
# a0 = pointer to extended precision input #
7968
# d0 = round precision,mode #
7969
# #
7970
# OUTPUT ************************************************************** #
7971
# fp0 = log(X) or log(1+X) #
7972
# #
7973
# ACCURACY and MONOTONICITY ******************************************* #
7974
# The returned result is within 2 ulps in 64 significant bit, #
7975
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
7976
# rounded to double precision. The result is provably monotonic #
7977
# in double precision. #
7978
# #
7979
# ALGORITHM *********************************************************** #
7980
# LOGN: #
7981
# Step 1. If |X-1| < 1/16, approximate log(X) by an odd #
7982
# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
7983
# move on to Step 2. #
7984
# #
7985
# Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first #
7986
# seven significant bits of Y plus 2**(-7), i.e. #
7987
# F = 1.xxxxxx1 in base 2 where the six "x" match those #
7988
# of Y. Note that |Y-F| <= 2**(-7). #
7989
# #
7990
# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
7991
# polynomial in u, log(1+u) = poly. #
7992
# #
7993
# Step 4. Reconstruct #
7994
# log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) #
7995
# by k*log(2) + (log(F) + poly). The values of log(F) are #
7996
# calculated beforehand and stored in the program. #
7997
# #
7998
# lognp1: #
7999
# Step 1: If |X| < 1/16, approximate log(1+X) by an odd #
8000
# polynomial in u where u = 2X/(2+X). Otherwise, move on #
8001
# to Step 2. #
8002
# #
8003
# Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done #
8004
# in Step 2 of the algorithm for LOGN and compute #
8005
# log(1+X) as k*log(2) + log(F) + poly where poly #
8006
# approximates log(1+u), u = (Y-F)/F. #
8007
# #
8008
# Implementation Notes: #
8009
# Note 1. There are 64 different possible values for F, thus 64 #
8010
# log(F)'s need to be tabulated. Moreover, the values of #
8011
# 1/F are also tabulated so that the division in (Y-F)/F #
8012
# can be performed by a multiplication. #
8013
# #
8014
# Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
8015
# the value Y-F has to be calculated carefully when #
8016
# 1/2 <= X < 3/2. #
8017
# #
8018
# Note 3. To fully exploit the pipeline, polynomials are usually #
8019
# separated into two parts evaluated independently before #
8020
# being added up. #
8021
# #
8022
#########################################################################
8023
LOGOF2:
8024
long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8025
8026
one:
8027
long 0x3F800000
8028
zero:
8029
long 0x00000000
8030
infty:
8031
long 0x7F800000
8032
negone:
8033
long 0xBF800000
8034
8035
LOGA6:
8036
long 0x3FC2499A,0xB5E4040B
8037
LOGA5:
8038
long 0xBFC555B5,0x848CB7DB
8039
8040
LOGA4:
8041
long 0x3FC99999,0x987D8730
8042
LOGA3:
8043
long 0xBFCFFFFF,0xFF6F7E97
8044
8045
LOGA2:
8046
long 0x3FD55555,0x555555A4
8047
LOGA1:
8048
long 0xBFE00000,0x00000008
8049
8050
LOGB5:
8051
long 0x3F175496,0xADD7DAD6
8052
LOGB4:
8053
long 0x3F3C71C2,0xFE80C7E0
8054
8055
LOGB3:
8056
long 0x3F624924,0x928BCCFF
8057
LOGB2:
8058
long 0x3F899999,0x999995EC
8059
8060
LOGB1:
8061
long 0x3FB55555,0x55555555
8062
TWO:
8063
long 0x40000000,0x00000000
8064
8065
LTHOLD:
8066
long 0x3f990000,0x80000000,0x00000000,0x00000000
8067
8068
LOGTBL:
8069
long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
8070
long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
8071
long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
8072
long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
8073
long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
8074
long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
8075
long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
8076
long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
8077
long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
8078
long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
8079
long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
8080
long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
8081
long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
8082
long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
8083
long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
8084
long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
8085
long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
8086
long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
8087
long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
8088
long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
8089
long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
8090
long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
8091
long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
8092
long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
8093
long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
8094
long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
8095
long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
8096
long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
8097
long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
8098
long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
8099
long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
8100
long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
8101
long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
8102
long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
8103
long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
8104
long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
8105
long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
8106
long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
8107
long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
8108
long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
8109
long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
8110
long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
8111
long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
8112
long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
8113
long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
8114
long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
8115
long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
8116
long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
8117
long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
8118
long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
8119
long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
8120
long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
8121
long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
8122
long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
8123
long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
8124
long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
8125
long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
8126
long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
8127
long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
8128
long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
8129
long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
8130
long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
8131
long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
8132
long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
8133
long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
8134
long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
8135
long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
8136
long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
8137
long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
8138
long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
8139
long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
8140
long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
8141
long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
8142
long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
8143
long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
8144
long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
8145
long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
8146
long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
8147
long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
8148
long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
8149
long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
8150
long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
8151
long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
8152
long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
8153
long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
8154
long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
8155
long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
8156
long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
8157
long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
8158
long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
8159
long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
8160
long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
8161
long 0x3FFE0000,0x94458094,0x45809446,0x00000000
8162
long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
8163
long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
8164
long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
8165
long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
8166
long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
8167
long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
8168
long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
8169
long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
8170
long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
8171
long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
8172
long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
8173
long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
8174
long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
8175
long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
8176
long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
8177
long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
8178
long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
8179
long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
8180
long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
8181
long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
8182
long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
8183
long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
8184
long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
8185
long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
8186
long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
8187
long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
8188
long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
8189
long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
8190
long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
8191
long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
8192
long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
8193
long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
8194
long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
8195
long 0x3FFE0000,0x80808080,0x80808081,0x00000000
8196
long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
8197
8198
set ADJK,L_SCR1
8199
8200
set X,FP_SCR0
8201
set XDCARE,X+2
8202
set XFRAC,X+4
8203
8204
set F,FP_SCR1
8205
set FFRAC,F+4
8206
8207
set KLOG2,FP_SCR0
8208
8209
set SAVEU,FP_SCR0
8210
8211
global slogn
8212
#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
8213
slogn:
8214
fmov.x (%a0),%fp0 # LOAD INPUT
8215
mov.l &0x00000000,ADJK(%a6)
8216
8217
LOGBGN:
8218
#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
8219
#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
8220
8221
mov.l (%a0),%d1
8222
mov.w 4(%a0),%d1
8223
8224
mov.l (%a0),X(%a6)
8225
mov.l 4(%a0),X+4(%a6)
8226
mov.l 8(%a0),X+8(%a6)
8227
8228
cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
8229
blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
8230
# X IS POSITIVE, CHECK IF X IS NEAR 1
8231
cmp.l %d1,&0x3ffef07d # IS X < 15/16?
8232
blt.b LOGMAIN # YES
8233
cmp.l %d1,&0x3fff8841 # IS X > 17/16?
8234
ble.w LOGNEAR1 # NO
8235
8236
LOGMAIN:
8237
#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
8238
8239
#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
8240
#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
8241
#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
8242
#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
8243
#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
8244
#--LOG(1+U) CAN BE VERY EFFICIENT.
8245
#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
8246
#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
8247
8248
#--GET K, Y, F, AND ADDRESS OF 1/F.
8249
asr.l &8,%d1
8250
asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
8251
sub.l &0x3FFF,%d1 # THIS IS K
8252
add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
8253
lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
8254
fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
8255
8256
#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
8257
mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
8258
mov.l XFRAC(%a6),FFRAC(%a6)
8259
and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
8260
or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
8261
mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
8262
and.l &0x7E000000,%d1
8263
asr.l &8,%d1
8264
asr.l &8,%d1
8265
asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
8266
add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
8267
8268
fmov.x X(%a6),%fp0
8269
mov.l &0x3fff0000,F(%a6)
8270
clr.l F+8(%a6)
8271
fsub.x F(%a6),%fp0 # Y-F
8272
fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
8273
#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
8274
#--REGISTERS SAVED: FPCR, FP1, FP2
8275
8276
LP1CONT1:
8277
#--AN RE-ENTRY POINT FOR LOGNP1
8278
fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
8279
fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
8280
fmov.x %fp0,%fp2
8281
fmul.x %fp2,%fp2 # FP2 IS V=U*U
8282
fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
8283
8284
#--LOG(1+U) IS APPROXIMATED BY
8285
#--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
8286
#--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))]
8287
8288
fmov.x %fp2,%fp3
8289
fmov.x %fp2,%fp1
8290
8291
fmul.d LOGA6(%pc),%fp1 # V*A6
8292
fmul.d LOGA5(%pc),%fp2 # V*A5
8293
8294
fadd.d LOGA4(%pc),%fp1 # A4+V*A6
8295
fadd.d LOGA3(%pc),%fp2 # A3+V*A5
8296
8297
fmul.x %fp3,%fp1 # V*(A4+V*A6)
8298
fmul.x %fp3,%fp2 # V*(A3+V*A5)
8299
8300
fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6)
8301
fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5)
8302
8303
fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6))
8304
add.l &16,%a0 # ADDRESS OF LOG(F)
8305
fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5))
8306
8307
fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6))
8308
fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5))
8309
8310
fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6))
8311
fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
8312
fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
8313
8314
fmov.l %d0,%fpcr
8315
fadd.x KLOG2(%a6),%fp0 # FINAL ADD
8316
bra t_inx2
8317
8318
8319
LOGNEAR1:
8320
8321
# if the input is exactly equal to one, then exit through ld_pzero.
8322
# if these 2 lines weren't here, the correct answer would be returned
8323
# but the INEX2 bit would be set.
8324
fcmp.b %fp0,&0x1 # is it equal to one?
8325
fbeq.l ld_pzero # yes
8326
8327
#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
8328
fmov.x %fp0,%fp1
8329
fsub.s one(%pc),%fp1 # FP1 IS X-1
8330
fadd.s one(%pc),%fp0 # FP0 IS X+1
8331
fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
8332
#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
8333
#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
8334
8335
LP1CONT2:
8336
#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
8337
fdiv.x %fp0,%fp1 # FP1 IS U
8338
fmovm.x &0xc,-(%sp) # SAVE FP2-3
8339
#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
8340
#--LET V=U*U, W=V*V, CALCULATE
8341
#--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
8342
#--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] )
8343
fmov.x %fp1,%fp0
8344
fmul.x %fp0,%fp0 # FP0 IS V
8345
fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
8346
fmov.x %fp0,%fp1
8347
fmul.x %fp1,%fp1 # FP1 IS W
8348
8349
fmov.d LOGB5(%pc),%fp3
8350
fmov.d LOGB4(%pc),%fp2
8351
8352
fmul.x %fp1,%fp3 # W*B5
8353
fmul.x %fp1,%fp2 # W*B4
8354
8355
fadd.d LOGB3(%pc),%fp3 # B3+W*B5
8356
fadd.d LOGB2(%pc),%fp2 # B2+W*B4
8357
8358
fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED
8359
8360
fmul.x %fp0,%fp2 # V*(B2+W*B4)
8361
8362
fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5)
8363
fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
8364
8365
fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
8366
fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
8367
8368
fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
8369
8370
fmov.l %d0,%fpcr
8371
fadd.x SAVEU(%a6),%fp0
8372
bra t_inx2
8373
8374
#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
8375
LOGNEG:
8376
bra t_operr
8377
8378
global slognd
8379
slognd:
8380
#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
8381
8382
mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
8383
8384
#----normalize the input value by left shifting k bits (k to be determined
8385
#----below), adjusting exponent and storing -k to ADJK
8386
#----the value TWOTO100 is no longer needed.
8387
#----Note that this code assumes the denormalized input is NON-ZERO.
8388
8389
movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
8390
mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
8391
mov.l 4(%a0),%d4
8392
mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
8393
clr.l %d2 # D2 used for holding K
8394
8395
tst.l %d4
8396
bne.b Hi_not0
8397
8398
Hi_0:
8399
mov.l %d5,%d4
8400
clr.l %d5
8401
mov.l &32,%d2
8402
clr.l %d6
8403
bfffo %d4{&0:&32},%d6
8404
lsl.l %d6,%d4
8405
add.l %d6,%d2 # (D3,D4,D5) is normalized
8406
8407
mov.l %d3,X(%a6)
8408
mov.l %d4,XFRAC(%a6)
8409
mov.l %d5,XFRAC+4(%a6)
8410
neg.l %d2
8411
mov.l %d2,ADJK(%a6)
8412
fmov.x X(%a6),%fp0
8413
movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8414
lea X(%a6),%a0
8415
bra.w LOGBGN # begin regular log(X)
8416
8417
Hi_not0:
8418
clr.l %d6
8419
bfffo %d4{&0:&32},%d6 # find first 1
8420
mov.l %d6,%d2 # get k
8421
lsl.l %d6,%d4
8422
mov.l %d5,%d7 # a copy of D5
8423
lsl.l %d6,%d5
8424
neg.l %d6
8425
add.l &32,%d6
8426
lsr.l %d6,%d7
8427
or.l %d7,%d4 # (D3,D4,D5) normalized
8428
8429
mov.l %d3,X(%a6)
8430
mov.l %d4,XFRAC(%a6)
8431
mov.l %d5,XFRAC+4(%a6)
8432
neg.l %d2
8433
mov.l %d2,ADJK(%a6)
8434
fmov.x X(%a6),%fp0
8435
movm.l (%sp)+,&0xfc # restore registers {d2-d7}
8436
lea X(%a6),%a0
8437
bra.w LOGBGN # begin regular log(X)
8438
8439
global slognp1
8440
#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
8441
slognp1:
8442
fmov.x (%a0),%fp0 # LOAD INPUT
8443
fabs.x %fp0 # test magnitude
8444
fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
8445
fbgt.w LP1REAL # if greater, continue
8446
fmov.l %d0,%fpcr
8447
mov.b &FMOV_OP,%d1 # last inst is MOVE
8448
fmov.x (%a0),%fp0 # return signed argument
8449
bra t_catch
8450
8451
LP1REAL:
8452
fmov.x (%a0),%fp0 # LOAD INPUT
8453
mov.l &0x00000000,ADJK(%a6)
8454
fmov.x %fp0,%fp1 # FP1 IS INPUT Z
8455
fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
8456
fmov.x %fp0,X(%a6)
8457
mov.w XFRAC(%a6),XDCARE(%a6)
8458
mov.l X(%a6),%d1
8459
cmp.l %d1,&0
8460
ble.w LP1NEG0 # LOG OF ZERO OR -VE
8461
cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
8462
blt.w LOGMAIN
8463
cmp.l %d1,&0x3fffc000
8464
bgt.w LOGMAIN
8465
#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
8466
#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
8467
#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
8468
8469
LP1NEAR1:
8470
#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
8471
cmp.l %d1,&0x3ffef07d
8472
blt.w LP1CARE
8473
cmp.l %d1,&0x3fff8841
8474
bgt.w LP1CARE
8475
8476
LP1ONE16:
8477
#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
8478
#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
8479
fadd.x %fp1,%fp1 # FP1 IS 2Z
8480
fadd.s one(%pc),%fp0 # FP0 IS 1+X
8481
#--U = FP1/FP0
8482
bra.w LP1CONT2
8483
8484
LP1CARE:
8485
#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
8486
#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
8487
#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
8488
#--THERE ARE ONLY TWO CASES.
8489
#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
8490
#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
8491
#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
8492
#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
8493
8494
mov.l XFRAC(%a6),FFRAC(%a6)
8495
and.l &0xFE000000,FFRAC(%a6)
8496
or.l &0x01000000,FFRAC(%a6) # F OBTAINED
8497
cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
8498
bge.b KISZERO
8499
8500
KISNEG1:
8501
fmov.s TWO(%pc),%fp0
8502
mov.l &0x3fff0000,F(%a6)
8503
clr.l F+8(%a6)
8504
fsub.x F(%a6),%fp0 # 2-F
8505
mov.l FFRAC(%a6),%d1
8506
and.l &0x7E000000,%d1
8507
asr.l &8,%d1
8508
asr.l &8,%d1
8509
asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
8510
fadd.x %fp1,%fp1 # GET 2Z
8511
fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
8512
fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
8513
lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
8514
add.l %d1,%a0
8515
fmov.s negone(%pc),%fp1 # FP1 IS K = -1
8516
bra.w LP1CONT1
8517
8518
KISZERO:
8519
fmov.s one(%pc),%fp0
8520
mov.l &0x3fff0000,F(%a6)
8521
clr.l F+8(%a6)
8522
fsub.x F(%a6),%fp0 # 1-F
8523
mov.l FFRAC(%a6),%d1
8524
and.l &0x7E000000,%d1
8525
asr.l &8,%d1
8526
asr.l &8,%d1
8527
asr.l &4,%d1
8528
fadd.x %fp1,%fp0 # FP0 IS Y-F
8529
fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
8530
lea LOGTBL(%pc),%a0
8531
add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
8532
fmov.s zero(%pc),%fp1 # FP1 IS K = 0
8533
bra.w LP1CONT1
8534
8535
LP1NEG0:
8536
#--FPCR SAVED. D0 IS X IN COMPACT FORM.
8537
cmp.l %d1,&0
8538
blt.b LP1NEG
8539
LP1ZERO:
8540
fmov.s negone(%pc),%fp0
8541
8542
fmov.l %d0,%fpcr
8543
bra t_dz
8544
8545
LP1NEG:
8546
fmov.s zero(%pc),%fp0
8547
8548
fmov.l %d0,%fpcr
8549
bra t_operr
8550
8551
global slognp1d
8552
#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
8553
# Simply return the denorm
8554
slognp1d:
8555
bra t_extdnrm
8556
8557
#########################################################################
8558
# satanh(): computes the inverse hyperbolic tangent of a norm input #
8559
# satanhd(): computes the inverse hyperbolic tangent of a denorm input #
8560
# #
8561
# INPUT *************************************************************** #
8562
# a0 = pointer to extended precision input #
8563
# d0 = round precision,mode #
8564
# #
8565
# OUTPUT ************************************************************** #
8566
# fp0 = arctanh(X) #
8567
# #
8568
# ACCURACY and MONOTONICITY ******************************************* #
8569
# The returned result is within 3 ulps in 64 significant bit, #
8570
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8571
# rounded to double precision. The result is provably monotonic #
8572
# in double precision. #
8573
# #
8574
# ALGORITHM *********************************************************** #
8575
# #
8576
# ATANH #
8577
# 1. If |X| >= 1, go to 3. #
8578
# #
8579
# 2. (|X| < 1) Calculate atanh(X) by #
8580
# sgn := sign(X) #
8581
# y := |X| #
8582
# z := 2y/(1-y) #
8583
# atanh(X) := sgn * (1/2) * logp1(z) #
8584
# Exit. #
8585
# #
8586
# 3. If |X| > 1, go to 5. #
8587
# #
8588
# 4. (|X| = 1) Generate infinity with an appropriate sign and #
8589
# divide-by-zero by #
8590
# sgn := sign(X) #
8591
# atan(X) := sgn / (+0). #
8592
# Exit. #
8593
# #
8594
# 5. (|X| > 1) Generate an invalid operation by 0 * infinity. #
8595
# Exit. #
8596
# #
8597
#########################################################################
8598
8599
global satanh
8600
satanh:
8601
mov.l (%a0),%d1
8602
mov.w 4(%a0),%d1
8603
and.l &0x7FFFFFFF,%d1
8604
cmp.l %d1,&0x3FFF8000
8605
bge.b ATANHBIG
8606
8607
#--THIS IS THE USUAL CASE, |X| < 1
8608
#--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
8609
8610
fabs.x (%a0),%fp0 # Y = |X|
8611
fmov.x %fp0,%fp1
8612
fneg.x %fp1 # -Y
8613
fadd.x %fp0,%fp0 # 2Y
8614
fadd.s &0x3F800000,%fp1 # 1-Y
8615
fdiv.x %fp1,%fp0 # 2Y/(1-Y)
8616
mov.l (%a0),%d1
8617
and.l &0x80000000,%d1
8618
or.l &0x3F000000,%d1 # SIGN(X)*HALF
8619
mov.l %d1,-(%sp)
8620
8621
mov.l %d0,-(%sp) # save rnd prec,mode
8622
clr.l %d0 # pass ext prec,RN
8623
fmovm.x &0x01,-(%sp) # save Z on stack
8624
lea (%sp),%a0 # pass ptr to Z
8625
bsr slognp1 # LOG1P(Z)
8626
add.l &0xc,%sp # clear Z from stack
8627
8628
mov.l (%sp)+,%d0 # fetch old prec,mode
8629
fmov.l %d0,%fpcr # load it
8630
mov.b &FMUL_OP,%d1 # last inst is MUL
8631
fmul.s (%sp)+,%fp0
8632
bra t_catch
8633
8634
ATANHBIG:
8635
fabs.x (%a0),%fp0 # |X|
8636
fcmp.s %fp0,&0x3F800000
8637
fbgt t_operr
8638
bra t_dz
8639
8640
global satanhd
8641
#--ATANH(X) = X FOR DENORMALIZED X
8642
satanhd:
8643
bra t_extdnrm
8644
8645
#########################################################################
8646
# slog10(): computes the base-10 logarithm of a normalized input #
8647
# slog10d(): computes the base-10 logarithm of a denormalized input #
8648
# slog2(): computes the base-2 logarithm of a normalized input #
8649
# slog2d(): computes the base-2 logarithm of a denormalized input #
8650
# #
8651
# INPUT *************************************************************** #
8652
# a0 = pointer to extended precision input #
8653
# d0 = round precision,mode #
8654
# #
8655
# OUTPUT ************************************************************** #
8656
# fp0 = log_10(X) or log_2(X) #
8657
# #
8658
# ACCURACY and MONOTONICITY ******************************************* #
8659
# The returned result is within 1.7 ulps in 64 significant bit, #
8660
# i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
8661
# rounded to double precision. The result is provably monotonic #
8662
# in double precision. #
8663
# #
8664
# ALGORITHM *********************************************************** #
8665
# #
8666
# slog10d: #
8667
# #
8668
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8669
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8670
# Notes: Default means round-to-nearest mode, no floating-point #
8671
# traps, and precision control = double extended. #
8672
# #
8673
# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8674
# Notes: Even if X is denormalized, log(X) is always normalized. #
8675
# #
8676
# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8677
# 2.1 Restore the user FPCR #
8678
# 2.2 Return ans := Y * INV_L10. #
8679
# #
8680
# slog10: #
8681
# #
8682
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8683
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8684
# Notes: Default means round-to-nearest mode, no floating-point #
8685
# traps, and precision control = double extended. #
8686
# #
8687
# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
8688
# #
8689
# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
8690
# 2.1 Restore the user FPCR #
8691
# 2.2 Return ans := Y * INV_L10. #
8692
# #
8693
# sLog2d: #
8694
# #
8695
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8696
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8697
# Notes: Default means round-to-nearest mode, no floating-point #
8698
# traps, and precision control = double extended. #
8699
# #
8700
# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
8701
# Notes: Even if X is denormalized, log(X) is always normalized. #
8702
# #
8703
# Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
8704
# 2.1 Restore the user FPCR #
8705
# 2.2 Return ans := Y * INV_L2. #
8706
# #
8707
# sLog2: #
8708
# #
8709
# Step 0. If X < 0, create a NaN and raise the invalid operation #
8710
# flag. Otherwise, save FPCR in D1; set FpCR to default. #
8711
# Notes: Default means round-to-nearest mode, no floating-point #
8712
# traps, and precision control = double extended. #
8713
# #
8714
# Step 1. If X is not an integer power of two, i.e., X != 2^k, #
8715
# go to Step 3. #
8716
# #
8717
# Step 2. Return k. #
8718
# 2.1 Get integer k, X = 2^k. #
8719
# 2.2 Restore the user FPCR. #
8720
# 2.3 Return ans := convert-to-double-extended(k). #
8721
# #
8722
# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
8723
# #
8724
# Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
8725
# 4.1 Restore the user FPCR #
8726
# 4.2 Return ans := Y * INV_L2. #
8727
# #
8728
#########################################################################
8729
8730
INV_L10:
8731
long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
8732
8733
INV_L2:
8734
long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
8735
8736
global slog10
8737
#--entry point for Log10(X), X is normalized
8738
slog10:
8739
fmov.b &0x1,%fp0
8740
fcmp.x %fp0,(%a0) # if operand == 1,
8741
fbeq.l ld_pzero # return an EXACT zero
8742
8743
mov.l (%a0),%d1
8744
blt.w invalid
8745
mov.l %d0,-(%sp)
8746
clr.l %d0
8747
bsr slogn # log(X), X normal.
8748
fmov.l (%sp)+,%fpcr
8749
fmul.x INV_L10(%pc),%fp0
8750
bra t_inx2
8751
8752
global slog10d
8753
#--entry point for Log10(X), X is denormalized
8754
slog10d:
8755
mov.l (%a0),%d1
8756
blt.w invalid
8757
mov.l %d0,-(%sp)
8758
clr.l %d0
8759
bsr slognd # log(X), X denorm.
8760
fmov.l (%sp)+,%fpcr
8761
fmul.x INV_L10(%pc),%fp0
8762
bra t_minx2
8763
8764
global slog2
8765
#--entry point for Log2(X), X is normalized
8766
slog2:
8767
mov.l (%a0),%d1
8768
blt.w invalid
8769
8770
mov.l 8(%a0),%d1
8771
bne.b continue # X is not 2^k
8772
8773
mov.l 4(%a0),%d1
8774
and.l &0x7FFFFFFF,%d1
8775
bne.b continue
8776
8777
#--X = 2^k.
8778
mov.w (%a0),%d1
8779
and.l &0x00007FFF,%d1
8780
sub.l &0x3FFF,%d1
8781
beq.l ld_pzero
8782
fmov.l %d0,%fpcr
8783
fmov.l %d1,%fp0
8784
bra t_inx2
8785
8786
continue:
8787
mov.l %d0,-(%sp)
8788
clr.l %d0
8789
bsr slogn # log(X), X normal.
8790
fmov.l (%sp)+,%fpcr
8791
fmul.x INV_L2(%pc),%fp0
8792
bra t_inx2
8793
8794
invalid:
8795
bra t_operr
8796
8797
global slog2d
8798
#--entry point for Log2(X), X is denormalized
8799
slog2d:
8800
mov.l (%a0),%d1
8801
blt.w invalid
8802
mov.l %d0,-(%sp)
8803
clr.l %d0
8804
bsr slognd # log(X), X denorm.
8805
fmov.l (%sp)+,%fpcr
8806
fmul.x INV_L2(%pc),%fp0
8807
bra t_minx2
8808
8809
#########################################################################
8810
# stwotox(): computes 2**X for a normalized input #
8811
# stwotoxd(): computes 2**X for a denormalized input #
8812
# stentox(): computes 10**X for a normalized input #
8813
# stentoxd(): computes 10**X for a denormalized input #
8814
# #
8815
# INPUT *************************************************************** #
8816
# a0 = pointer to extended precision input #
8817
# d0 = round precision,mode #
8818
# #
8819
# OUTPUT ************************************************************** #
8820
# fp0 = 2**X or 10**X #
8821
# #
8822
# ACCURACY and MONOTONICITY ******************************************* #
8823
# The returned result is within 2 ulps in 64 significant bit, #
8824
# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
8825
# rounded to double precision. The result is provably monotonic #
8826
# in double precision. #
8827
# #
8828
# ALGORITHM *********************************************************** #
8829
# #
8830
# twotox #
8831
# 1. If |X| > 16480, go to ExpBig. #
8832
# #
8833
# 2. If |X| < 2**(-70), go to ExpSm. #
8834
# #
8835
# 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore #
8836
# decompose N as #
8837
# N = 64(M + M') + j, j = 0,1,2,...,63. #
8838
# #
8839
# 4. Overwrite r := r * log2. Then #
8840
# 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8841
# Go to expr to compute that expression. #
8842
# #
8843
# tentox #
8844
# 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
8845
# #
8846
# 2. If |X| < 2**(-70), go to ExpSm. #
8847
# #
8848
# 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set #
8849
# N := round-to-int(y). Decompose N as #
8850
# N = 64(M + M') + j, j = 0,1,2,...,63. #
8851
# #
8852
# 4. Define r as #
8853
# r := ((X - N*L1)-N*L2) * L10 #
8854
# where L1, L2 are the leading and trailing parts of #
8855
# log_10(2)/64 and L10 is the natural log of 10. Then #
8856
# 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). #
8857
# Go to expr to compute that expression. #
8858
# #
8859
# expr #
8860
# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
8861
# #
8862
# 2. Overwrite Fact1 and Fact2 by #
8863
# Fact1 := 2**(M) * Fact1 #
8864
# Fact2 := 2**(M) * Fact2 #
8865
# Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). #
8866
# #
8867
# 3. Calculate P where 1 + P approximates exp(r): #
8868
# P = r + r*r*(A1+r*(A2+...+r*A5)). #
8869
# #
8870
# 4. Let AdjFact := 2**(M'). Return #
8871
# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
8872
# Exit. #
8873
# #
8874
# ExpBig #
8875
# 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
8876
# generate underflow by Tiny * Tiny. #
8877
# #
8878
# ExpSm #
8879
# 1. Return 1 + X. #
8880
# #
8881
#########################################################################
8882
8883
L2TEN64:
8884
long 0x406A934F,0x0979A371 # 64LOG10/LOG2
8885
L10TWO1:
8886
long 0x3F734413,0x509F8000 # LOG2/64LOG10
8887
8888
L10TWO2:
8889
long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
8890
8891
LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
8892
8893
LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
8894
8895
EXPA5: long 0x3F56C16D,0x6F7BD0B2
8896
EXPA4: long 0x3F811112,0x302C712C
8897
EXPA3: long 0x3FA55555,0x55554CC1
8898
EXPA2: long 0x3FC55555,0x55554A54
8899
EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
8900
8901
TEXPTBL:
8902
long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
8903
long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
8904
long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
8905
long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
8906
long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
8907
long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
8908
long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
8909
long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
8910
long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
8911
long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
8912
long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
8913
long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
8914
long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
8915
long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
8916
long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
8917
long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
8918
long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
8919
long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
8920
long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
8921
long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
8922
long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
8923
long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
8924
long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
8925
long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
8926
long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
8927
long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
8928
long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
8929
long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
8930
long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
8931
long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
8932
long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
8933
long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
8934
long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
8935
long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
8936
long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
8937
long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
8938
long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
8939
long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
8940
long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
8941
long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
8942
long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
8943
long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
8944
long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
8945
long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
8946
long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
8947
long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
8948
long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
8949
long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
8950
long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
8951
long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
8952
long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
8953
long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
8954
long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
8955
long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
8956
long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
8957
long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
8958
long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
8959
long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
8960
long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
8961
long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
8962
long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
8963
long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
8964
long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
8965
long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
8966
8967
set INT,L_SCR1
8968
8969
set X,FP_SCR0
8970
set XDCARE,X+2
8971
set XFRAC,X+4
8972
8973
set ADJFACT,FP_SCR0
8974
8975
set FACT1,FP_SCR0
8976
set FACT1HI,FACT1+4
8977
set FACT1LOW,FACT1+8
8978
8979
set FACT2,FP_SCR1
8980
set FACT2HI,FACT2+4
8981
set FACT2LOW,FACT2+8
8982
8983
global stwotox
8984
#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
8985
stwotox:
8986
fmovm.x (%a0),&0x80 # LOAD INPUT
8987
8988
mov.l (%a0),%d1
8989
mov.w 4(%a0),%d1
8990
fmov.x %fp0,X(%a6)
8991
and.l &0x7FFFFFFF,%d1
8992
8993
cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
8994
bge.b TWOOK1
8995
bra.w EXPBORS
8996
8997
TWOOK1:
8998
cmp.l %d1,&0x400D80C0 # |X| > 16480?
8999
ble.b TWOMAIN
9000
bra.w EXPBORS
9001
9002
TWOMAIN:
9003
#--USUAL CASE, 2^(-70) <= |X| <= 16480
9004
9005
fmov.x %fp0,%fp1
9006
fmul.s &0x42800000,%fp1 # 64 * X
9007
fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
9008
mov.l %d2,-(%sp)
9009
lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9010
fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9011
mov.l INT(%a6),%d1
9012
mov.l %d1,%d2
9013
and.l &0x3F,%d1 # D0 IS J
9014
asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9015
add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9016
asr.l &6,%d2 # d2 IS L, N = 64L + J
9017
mov.l %d2,%d1
9018
asr.l &1,%d1 # D0 IS M
9019
sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9020
add.l &0x3FFF,%d2
9021
9022
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9023
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9024
#--ADJFACT = 2^(M').
9025
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9026
9027
fmovm.x &0x0c,-(%sp) # save fp2/fp3
9028
9029
fmul.s &0x3C800000,%fp1 # (1/64)*N
9030
mov.l (%a1)+,FACT1(%a6)
9031
mov.l (%a1)+,FACT1HI(%a6)
9032
mov.l (%a1)+,FACT1LOW(%a6)
9033
mov.w (%a1)+,FACT2(%a6)
9034
9035
fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
9036
9037
mov.w (%a1)+,FACT2HI(%a6)
9038
clr.w FACT2HI+2(%a6)
9039
clr.l FACT2LOW(%a6)
9040
add.w %d1,FACT1(%a6)
9041
fmul.x LOG2(%pc),%fp0 # FP0 IS R
9042
add.w %d1,FACT2(%a6)
9043
9044
bra.w expr
9045
9046
EXPBORS:
9047
#--FPCR, D0 SAVED
9048
cmp.l %d1,&0x3FFF8000
9049
bgt.b TEXPBIG
9050
9051
#--|X| IS SMALL, RETURN 1 + X
9052
9053
fmov.l %d0,%fpcr # restore users round prec,mode
9054
fadd.s &0x3F800000,%fp0 # RETURN 1 + X
9055
bra t_pinx2
9056
9057
TEXPBIG:
9058
#--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
9059
#--REGISTERS SAVE SO FAR ARE FPCR AND D0
9060
mov.l X(%a6),%d1
9061
cmp.l %d1,&0
9062
blt.b EXPNEG
9063
9064
bra t_ovfl2 # t_ovfl expects positive value
9065
9066
EXPNEG:
9067
bra t_unfl2 # t_unfl expects positive value
9068
9069
global stwotoxd
9070
stwotoxd:
9071
#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
9072
9073
fmov.l %d0,%fpcr # set user's rounding mode/precision
9074
fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9075
mov.l (%a0),%d1
9076
or.l &0x00800001,%d1
9077
fadd.s %d1,%fp0
9078
bra t_pinx2
9079
9080
global stentox
9081
#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
9082
stentox:
9083
fmovm.x (%a0),&0x80 # LOAD INPUT
9084
9085
mov.l (%a0),%d1
9086
mov.w 4(%a0),%d1
9087
fmov.x %fp0,X(%a6)
9088
and.l &0x7FFFFFFF,%d1
9089
9090
cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)?
9091
bge.b TENOK1
9092
bra.w EXPBORS
9093
9094
TENOK1:
9095
cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ?
9096
ble.b TENMAIN
9097
bra.w EXPBORS
9098
9099
TENMAIN:
9100
#--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
9101
9102
fmov.x %fp0,%fp1
9103
fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2
9104
fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2)
9105
mov.l %d2,-(%sp)
9106
lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
9107
fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
9108
mov.l INT(%a6),%d1
9109
mov.l %d1,%d2
9110
and.l &0x3F,%d1 # D0 IS J
9111
asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
9112
add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
9113
asr.l &6,%d2 # d2 IS L, N = 64L + J
9114
mov.l %d2,%d1
9115
asr.l &1,%d1 # D0 IS M
9116
sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
9117
add.l &0x3FFF,%d2
9118
9119
#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
9120
#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
9121
#--ADJFACT = 2^(M').
9122
#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
9123
fmovm.x &0x0c,-(%sp) # save fp2/fp3
9124
9125
fmov.x %fp1,%fp2
9126
9127
fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
9128
mov.l (%a1)+,FACT1(%a6)
9129
9130
fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
9131
9132
mov.l (%a1)+,FACT1HI(%a6)
9133
mov.l (%a1)+,FACT1LOW(%a6)
9134
fsub.x %fp1,%fp0 # X - N L_LEAD
9135
mov.w (%a1)+,FACT2(%a6)
9136
9137
fsub.x %fp2,%fp0 # X - N L_TRAIL
9138
9139
mov.w (%a1)+,FACT2HI(%a6)
9140
clr.w FACT2HI+2(%a6)
9141
clr.l FACT2LOW(%a6)
9142
9143
fmul.x LOG10(%pc),%fp0 # FP0 IS R
9144
add.w %d1,FACT1(%a6)
9145
add.w %d1,FACT2(%a6)
9146
9147
expr:
9148
#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
9149
#--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
9150
#--FP0 IS R. THE FOLLOWING CODE COMPUTES
9151
#-- 2**(M'+M) * 2**(J/64) * EXP(R)
9152
9153
fmov.x %fp0,%fp1
9154
fmul.x %fp1,%fp1 # FP1 IS S = R*R
9155
9156
fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
9157
fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
9158
9159
fmul.x %fp1,%fp2 # FP2 IS S*A5
9160
fmul.x %fp1,%fp3 # FP3 IS S*A4
9161
9162
fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
9163
fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
9164
9165
fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5)
9166
fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4)
9167
9168
fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5)
9169
fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4)
9170
9171
fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5))
9172
fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4)
9173
fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
9174
9175
fmovm.x (%sp)+,&0x30 # restore fp2/fp3
9176
9177
#--FINAL RECONSTRUCTION PROCESS
9178
#--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0)
9179
9180
fmul.x FACT1(%a6),%fp0
9181
fadd.x FACT2(%a6),%fp0
9182
fadd.x FACT1(%a6),%fp0
9183
9184
fmov.l %d0,%fpcr # restore users round prec,mode
9185
mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
9186
mov.l (%sp)+,%d2
9187
mov.l &0x80000000,ADJFACT+4(%a6)
9188
clr.l ADJFACT+8(%a6)
9189
mov.b &FMUL_OP,%d1 # last inst is MUL
9190
fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
9191
bra t_catch
9192
9193
global stentoxd
9194
stentoxd:
9195
#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
9196
9197
fmov.l %d0,%fpcr # set user's rounding mode/precision
9198
fmov.s &0x3F800000,%fp0 # RETURN 1 + X
9199
mov.l (%a0),%d1
9200
or.l &0x00800001,%d1
9201
fadd.s %d1,%fp0
9202
bra t_pinx2
9203
9204
#########################################################################
9205
# sscale(): computes the destination operand scaled by the source #
9206
# operand. If the absoulute value of the source operand is #
9207
# >= 2^14, an overflow or underflow is returned. #
9208
# #
9209
# INPUT *************************************************************** #
9210
# a0 = pointer to double-extended source operand X #
9211
# a1 = pointer to double-extended destination operand Y #
9212
# #
9213
# OUTPUT ************************************************************** #
9214
# fp0 = scale(X,Y) #
9215
# #
9216
#########################################################################
9217
9218
set SIGN, L_SCR1
9219
9220
global sscale
9221
sscale:
9222
mov.l %d0,-(%sp) # store off ctrl bits for now
9223
9224
mov.w DST_EX(%a1),%d1 # get dst exponent
9225
smi.b SIGN(%a6) # use SIGN to hold dst sign
9226
andi.l &0x00007fff,%d1 # strip sign from dst exp
9227
9228
mov.w SRC_EX(%a0),%d0 # check src bounds
9229
andi.w &0x7fff,%d0 # clr src sign bit
9230
cmpi.w %d0,&0x3fff # is src ~ ZERO?
9231
blt.w src_small # yes
9232
cmpi.w %d0,&0x400c # no; is src too big?
9233
bgt.w src_out # yes
9234
9235
#
9236
# Source is within 2^14 range.
9237
#
9238
src_ok:
9239
fintrz.x SRC(%a0),%fp0 # calc int of src
9240
fmov.l %fp0,%d0 # int src to d0
9241
# don't want any accrued bits from the fintrz showing up later since
9242
# we may need to read the fpsr for the last fp op in t_catch2().
9243
fmov.l &0x0,%fpsr
9244
9245
tst.b DST_HI(%a1) # is dst denormalized?
9246
bmi.b sok_norm
9247
9248
# the dst is a DENORM. normalize the DENORM and add the adjustment to
9249
# the src value. then, jump to the norm part of the routine.
9250
sok_dnrm:
9251
mov.l %d0,-(%sp) # save src for now
9252
9253
mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
9254
mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
9255
mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
9256
9257
lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
9258
bsr.l norm # normalize the DENORM
9259
neg.l %d0
9260
add.l (%sp)+,%d0 # add adjustment to src
9261
9262
fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
9263
9264
cmpi.w %d0,&-0x3fff # is the shft amt really low?
9265
bge.b sok_norm2 # thank goodness no
9266
9267
# the multiply factor that we're trying to create should be a denorm
9268
# for the multiply to work. therefore, we're going to actually do a
9269
# multiply with a denorm which will cause an unimplemented data type
9270
# exception to be put into the machine which will be caught and corrected
9271
# later. we don't do this with the DENORMs above because this method
9272
# is slower. but, don't fret, I don't see it being used much either.
9273
fmov.l (%sp)+,%fpcr # restore user fpcr
9274
mov.l &0x80000000,%d1 # load normalized mantissa
9275
subi.l &-0x3fff,%d0 # how many should we shift?
9276
neg.l %d0 # make it positive
9277
cmpi.b %d0,&0x20 # is it > 32?
9278
bge.b sok_dnrm_32 # yes
9279
lsr.l %d0,%d1 # no; bit stays in upper lw
9280
clr.l -(%sp) # insert zero low mantissa
9281
mov.l %d1,-(%sp) # insert new high mantissa
9282
clr.l -(%sp) # make zero exponent
9283
bra.b sok_norm_cont
9284
sok_dnrm_32:
9285
subi.b &0x20,%d0 # get shift count
9286
lsr.l %d0,%d1 # make low mantissa longword
9287
mov.l %d1,-(%sp) # insert new low mantissa
9288
clr.l -(%sp) # insert zero high mantissa
9289
clr.l -(%sp) # make zero exponent
9290
bra.b sok_norm_cont
9291
9292
# the src will force the dst to a DENORM value or worse. so, let's
9293
# create an fp multiply that will create the result.
9294
sok_norm:
9295
fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
9296
sok_norm2:
9297
fmov.l (%sp)+,%fpcr # restore user fpcr
9298
9299
addi.w &0x3fff,%d0 # turn src amt into exp value
9300
swap %d0 # put exponent in high word
9301
clr.l -(%sp) # insert new exponent
9302
mov.l &0x80000000,-(%sp) # insert new high mantissa
9303
mov.l %d0,-(%sp) # insert new lo mantissa
9304
9305
sok_norm_cont:
9306
fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
9307
mov.b &FMUL_OP,%d1 # last inst is MUL
9308
fmul.x (%sp)+,%fp0 # do the multiply
9309
bra t_catch2 # catch any exceptions
9310
9311
#
9312
# Source is outside of 2^14 range. Test the sign and branch
9313
# to the appropriate exception handler.
9314
#
9315
src_out:
9316
mov.l (%sp)+,%d0 # restore ctrl bits
9317
exg %a0,%a1 # swap src,dst ptrs
9318
tst.b SRC_EX(%a1) # is src negative?
9319
bmi t_unfl # yes; underflow
9320
bra t_ovfl_sc # no; overflow
9321
9322
#
9323
# The source input is below 1, so we check for denormalized numbers
9324
# and set unfl.
9325
#
9326
src_small:
9327
tst.b DST_HI(%a1) # is dst denormalized?
9328
bpl.b ssmall_done # yes
9329
9330
mov.l (%sp)+,%d0
9331
fmov.l %d0,%fpcr # no; load control bits
9332
mov.b &FMOV_OP,%d1 # last inst is MOVE
9333
fmov.x DST(%a1),%fp0 # simply return dest
9334
bra t_catch2
9335
ssmall_done:
9336
mov.l (%sp)+,%d0 # load control bits into d1
9337
mov.l %a1,%a0 # pass ptr to dst
9338
bra t_resdnrm
9339
9340
#########################################################################
9341
# smod(): computes the fp MOD of the input values X,Y. #
9342
# srem(): computes the fp (IEEE) REM of the input values X,Y. #
9343
# #
9344
# INPUT *************************************************************** #
9345
# a0 = pointer to extended precision input X #
9346
# a1 = pointer to extended precision input Y #
9347
# d0 = round precision,mode #
9348
# #
9349
# The input operands X and Y can be either normalized or #
9350
# denormalized. #
9351
# #
9352
# OUTPUT ************************************************************** #
9353
# fp0 = FREM(X,Y) or FMOD(X,Y) #
9354
# #
9355
# ALGORITHM *********************************************************** #
9356
# #
9357
# Step 1. Save and strip signs of X and Y: signX := sign(X), #
9358
# signY := sign(Y), X := |X|, Y := |Y|, #
9359
# signQ := signX EOR signY. Record whether MOD or REM #
9360
# is requested. #
9361
# #
9362
# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
9363
# If (L < 0) then #
9364
# R := X, go to Step 4. #
9365
# else #
9366
# R := 2^(-L)X, j := L. #
9367
# endif #
9368
# #
9369
# Step 3. Perform MOD(X,Y) #
9370
# 3.1 If R = Y, go to Step 9. #
9371
# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
9372
# 3.3 If j = 0, go to Step 4. #
9373
# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
9374
# Step 3.1. #
9375
# #
9376
# Step 4. At this point, R = X - QY = MOD(X,Y). Set #
9377
# Last_Subtract := false (used in Step 7 below). If #
9378
# MOD is requested, go to Step 6. #
9379
# #
9380
# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
9381
# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
9382
# Step 6. #
9383
# 5.2 If R > Y/2, then { set Last_Subtract := true, #
9384
# Q := Q + 1, Y := signY*Y }. Go to Step 6. #
9385
# 5.3 This is the tricky case of R = Y/2. If Q is odd, #
9386
# then { Q := Q + 1, signX := -signX }. #
9387
# #
9388
# Step 6. R := signX*R. #
9389
# #
9390
# Step 7. If Last_Subtract = true, R := R - Y. #
9391
# #
9392
# Step 8. Return signQ, last 7 bits of Q, and R as required. #
9393
# #
9394
# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
9395
# X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), #
9396
# R := 0. Return signQ, last 7 bits of Q, and R. #
9397
# #
9398
#########################################################################
9399
9400
set Mod_Flag,L_SCR3
9401
set Sc_Flag,L_SCR3+1
9402
9403
set SignY,L_SCR2
9404
set SignX,L_SCR2+2
9405
set SignQ,L_SCR3+2
9406
9407
set Y,FP_SCR0
9408
set Y_Hi,Y+4
9409
set Y_Lo,Y+8
9410
9411
set R,FP_SCR1
9412
set R_Hi,R+4
9413
set R_Lo,R+8
9414
9415
Scale:
9416
long 0x00010000,0x80000000,0x00000000,0x00000000
9417
9418
global smod
9419
smod:
9420
clr.b FPSR_QBYTE(%a6)
9421
mov.l %d0,-(%sp) # save ctrl bits
9422
clr.b Mod_Flag(%a6)
9423
bra.b Mod_Rem
9424
9425
global srem
9426
srem:
9427
clr.b FPSR_QBYTE(%a6)
9428
mov.l %d0,-(%sp) # save ctrl bits
9429
mov.b &0x1,Mod_Flag(%a6)
9430
9431
Mod_Rem:
9432
#..Save sign of X and Y
9433
movm.l &0x3f00,-(%sp) # save data registers
9434
mov.w SRC_EX(%a0),%d3
9435
mov.w %d3,SignY(%a6)
9436
and.l &0x00007FFF,%d3 # Y := |Y|
9437
9438
#
9439
mov.l SRC_HI(%a0),%d4
9440
mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y|
9441
9442
tst.l %d3
9443
bne.b Y_Normal
9444
9445
mov.l &0x00003FFE,%d3 # $3FFD + 1
9446
tst.l %d4
9447
bne.b HiY_not0
9448
9449
HiY_0:
9450
mov.l %d5,%d4
9451
clr.l %d5
9452
sub.l &32,%d3
9453
clr.l %d6
9454
bfffo %d4{&0:&32},%d6
9455
lsl.l %d6,%d4
9456
sub.l %d6,%d3 # (D3,D4,D5) is normalized
9457
# ...with bias $7FFD
9458
bra.b Chk_X
9459
9460
HiY_not0:
9461
clr.l %d6
9462
bfffo %d4{&0:&32},%d6
9463
sub.l %d6,%d3
9464
lsl.l %d6,%d4
9465
mov.l %d5,%d7 # a copy of D5
9466
lsl.l %d6,%d5
9467
neg.l %d6
9468
add.l &32,%d6
9469
lsr.l %d6,%d7
9470
or.l %d7,%d4 # (D3,D4,D5) normalized
9471
# ...with bias $7FFD
9472
bra.b Chk_X
9473
9474
Y_Normal:
9475
add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
9476
# ...with bias $7FFD
9477
9478
Chk_X:
9479
mov.w DST_EX(%a1),%d0
9480
mov.w %d0,SignX(%a6)
9481
mov.w SignY(%a6),%d1
9482
eor.l %d0,%d1
9483
and.l &0x00008000,%d1
9484
mov.w %d1,SignQ(%a6) # sign(Q) obtained
9485
and.l &0x00007FFF,%d0
9486
mov.l DST_HI(%a1),%d1
9487
mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X|
9488
tst.l %d0
9489
bne.b X_Normal
9490
mov.l &0x00003FFE,%d0
9491
tst.l %d1
9492
bne.b HiX_not0
9493
9494
HiX_0:
9495
mov.l %d2,%d1
9496
clr.l %d2
9497
sub.l &32,%d0
9498
clr.l %d6
9499
bfffo %d1{&0:&32},%d6
9500
lsl.l %d6,%d1
9501
sub.l %d6,%d0 # (D0,D1,D2) is normalized
9502
# ...with bias $7FFD
9503
bra.b Init
9504
9505
HiX_not0:
9506
clr.l %d6
9507
bfffo %d1{&0:&32},%d6
9508
sub.l %d6,%d0
9509
lsl.l %d6,%d1
9510
mov.l %d2,%d7 # a copy of D2
9511
lsl.l %d6,%d2
9512
neg.l %d6
9513
add.l &32,%d6
9514
lsr.l %d6,%d7
9515
or.l %d7,%d1 # (D0,D1,D2) normalized
9516
# ...with bias $7FFD
9517
bra.b Init
9518
9519
X_Normal:
9520
add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
9521
# ...with bias $7FFD
9522
9523
Init:
9524
#
9525
mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
9526
mov.l %d0,-(%sp) # save biased exp(X)
9527
sub.l %d3,%d0 # L := expo(X)-expo(Y)
9528
9529
clr.l %d6 # D6 := carry <- 0
9530
clr.l %d3 # D3 is Q
9531
mov.l &0,%a1 # A1 is k; j+k=L, Q=0
9532
9533
#..(Carry,D1,D2) is R
9534
tst.l %d0
9535
bge.b Mod_Loop_pre
9536
9537
#..expo(X) < expo(Y). Thus X = mod(X,Y)
9538
#
9539
mov.l (%sp)+,%d0 # restore d0
9540
bra.w Get_Mod
9541
9542
Mod_Loop_pre:
9543
addq.l &0x4,%sp # erase exp(X)
9544
#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
9545
Mod_Loop:
9546
tst.l %d6 # test carry bit
9547
bgt.b R_GT_Y
9548
9549
#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
9550
cmp.l %d1,%d4 # compare hi(R) and hi(Y)
9551
bne.b R_NE_Y
9552
cmp.l %d2,%d5 # compare lo(R) and lo(Y)
9553
bne.b R_NE_Y
9554
9555
#..At this point, R = Y
9556
bra.w Rem_is_0
9557
9558
R_NE_Y:
9559
#..use the borrow of the previous compare
9560
bcs.b R_LT_Y # borrow is set iff R < Y
9561
9562
R_GT_Y:
9563
#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
9564
#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
9565
sub.l %d5,%d2 # lo(R) - lo(Y)
9566
subx.l %d4,%d1 # hi(R) - hi(Y)
9567
clr.l %d6 # clear carry
9568
addq.l &1,%d3 # Q := Q + 1
9569
9570
R_LT_Y:
9571
#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
9572
tst.l %d0 # see if j = 0.
9573
beq.b PostLoop
9574
9575
add.l %d3,%d3 # Q := 2Q
9576
add.l %d2,%d2 # lo(R) = 2lo(R)
9577
roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
9578
scs %d6 # set Carry if 2(R) overflows
9579
addq.l &1,%a1 # k := k+1
9580
subq.l &1,%d0 # j := j - 1
9581
#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
9582
9583
bra.b Mod_Loop
9584
9585
PostLoop:
9586
#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
9587
9588
#..normalize R.
9589
mov.l L_SCR1(%a6),%d0 # new biased expo of R
9590
tst.l %d1
9591
bne.b HiR_not0
9592
9593
HiR_0:
9594
mov.l %d2,%d1
9595
clr.l %d2
9596
sub.l &32,%d0
9597
clr.l %d6
9598
bfffo %d1{&0:&32},%d6
9599
lsl.l %d6,%d1
9600
sub.l %d6,%d0 # (D0,D1,D2) is normalized
9601
# ...with bias $7FFD
9602
bra.b Get_Mod
9603
9604
HiR_not0:
9605
clr.l %d6
9606
bfffo %d1{&0:&32},%d6
9607
bmi.b Get_Mod # already normalized
9608
sub.l %d6,%d0
9609
lsl.l %d6,%d1
9610
mov.l %d2,%d7 # a copy of D2
9611
lsl.l %d6,%d2
9612
neg.l %d6
9613
add.l &32,%d6
9614
lsr.l %d6,%d7
9615
or.l %d7,%d1 # (D0,D1,D2) normalized
9616
9617
#
9618
Get_Mod:
9619
cmp.l %d0,&0x000041FE
9620
bge.b No_Scale
9621
Do_Scale:
9622
mov.w %d0,R(%a6)
9623
mov.l %d1,R_Hi(%a6)
9624
mov.l %d2,R_Lo(%a6)
9625
mov.l L_SCR1(%a6),%d6
9626
mov.w %d6,Y(%a6)
9627
mov.l %d4,Y_Hi(%a6)
9628
mov.l %d5,Y_Lo(%a6)
9629
fmov.x R(%a6),%fp0 # no exception
9630
mov.b &1,Sc_Flag(%a6)
9631
bra.b ModOrRem
9632
No_Scale:
9633
mov.l %d1,R_Hi(%a6)
9634
mov.l %d2,R_Lo(%a6)
9635
sub.l &0x3FFE,%d0
9636
mov.w %d0,R(%a6)
9637
mov.l L_SCR1(%a6),%d6
9638
sub.l &0x3FFE,%d6
9639
mov.l %d6,L_SCR1(%a6)
9640
fmov.x R(%a6),%fp0
9641
mov.w %d6,Y(%a6)
9642
mov.l %d4,Y_Hi(%a6)
9643
mov.l %d5,Y_Lo(%a6)
9644
clr.b Sc_Flag(%a6)
9645
9646
#
9647
ModOrRem:
9648
tst.b Mod_Flag(%a6)
9649
beq.b Fix_Sign
9650
9651
mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
9652
subq.l &1,%d6 # biased expo(Y/2)
9653
cmp.l %d0,%d6
9654
blt.b Fix_Sign
9655
bgt.b Last_Sub
9656
9657
cmp.l %d1,%d4
9658
bne.b Not_EQ
9659
cmp.l %d2,%d5
9660
bne.b Not_EQ
9661
bra.w Tie_Case
9662
9663
Not_EQ:
9664
bcs.b Fix_Sign
9665
9666
Last_Sub:
9667
#
9668
fsub.x Y(%a6),%fp0 # no exceptions
9669
addq.l &1,%d3 # Q := Q + 1
9670
9671
#
9672
Fix_Sign:
9673
#..Get sign of X
9674
mov.w SignX(%a6),%d6
9675
bge.b Get_Q
9676
fneg.x %fp0
9677
9678
#..Get Q
9679
#
9680
Get_Q:
9681
clr.l %d6
9682
mov.w SignQ(%a6),%d6 # D6 is sign(Q)
9683
mov.l &8,%d7
9684
lsr.l %d7,%d6
9685
and.l &0x0000007F,%d3 # 7 bits of Q
9686
or.l %d6,%d3 # sign and bits of Q
9687
# swap %d3
9688
# fmov.l %fpsr,%d6
9689
# and.l &0xFF00FFFF,%d6
9690
# or.l %d3,%d6
9691
# fmov.l %d6,%fpsr # put Q in fpsr
9692
mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
9693
9694
#
9695
Restore:
9696
movm.l (%sp)+,&0xfc # {%d2-%d7}
9697
mov.l (%sp)+,%d0
9698
fmov.l %d0,%fpcr
9699
tst.b Sc_Flag(%a6)
9700
beq.b Finish
9701
mov.b &FMUL_OP,%d1 # last inst is MUL
9702
fmul.x Scale(%pc),%fp0 # may cause underflow
9703
bra t_catch2
9704
# the '040 package did this apparently to see if the dst operand for the
9705
# preceding fmul was a denorm. but, it better not have been since the
9706
# algorithm just got done playing with fp0 and expected no exceptions
9707
# as a result. trust me...
9708
# bra t_avoid_unsupp # check for denorm as a
9709
# ;result of the scaling
9710
9711
Finish:
9712
mov.b &FMOV_OP,%d1 # last inst is MOVE
9713
fmov.x %fp0,%fp0 # capture exceptions & round
9714
bra t_catch2
9715
9716
Rem_is_0:
9717
#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
9718
addq.l &1,%d3
9719
cmp.l %d0,&8 # D0 is j
9720
bge.b Q_Big
9721
9722
lsl.l %d0,%d3
9723
bra.b Set_R_0
9724
9725
Q_Big:
9726
clr.l %d3
9727
9728
Set_R_0:
9729
fmov.s &0x00000000,%fp0
9730
clr.b Sc_Flag(%a6)
9731
bra.w Fix_Sign
9732
9733
Tie_Case:
9734
#..Check parity of Q
9735
mov.l %d3,%d6
9736
and.l &0x00000001,%d6
9737
tst.l %d6
9738
beq.w Fix_Sign # Q is even
9739
9740
#..Q is odd, Q := Q + 1, signX := -signX
9741
addq.l &1,%d3
9742
mov.w SignX(%a6),%d6
9743
eor.l &0x00008000,%d6
9744
mov.w %d6,SignX(%a6)
9745
bra.w Fix_Sign
9746
9747
#########################################################################
9748
# XDEF **************************************************************** #
9749
# tag(): return the optype of the input ext fp number #
9750
# #
9751
# This routine is used by the 060FPLSP. #
9752
# #
9753
# XREF **************************************************************** #
9754
# None #
9755
# #
9756
# INPUT *************************************************************** #
9757
# a0 = pointer to extended precision operand #
9758
# #
9759
# OUTPUT ************************************************************** #
9760
# d0 = value of type tag #
9761
# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
9762
# #
9763
# ALGORITHM *********************************************************** #
9764
# Simply test the exponent, j-bit, and mantissa values to #
9765
# determine the type of operand. #
9766
# If it's an unnormalized zero, alter the operand and force it #
9767
# to be a normal zero. #
9768
# #
9769
#########################################################################
9770
9771
global tag
9772
tag:
9773
mov.w FTEMP_EX(%a0), %d0 # extract exponent
9774
andi.w &0x7fff, %d0 # strip off sign
9775
cmpi.w %d0, &0x7fff # is (EXP == MAX)?
9776
beq.b inf_or_nan_x
9777
not_inf_or_nan_x:
9778
btst &0x7,FTEMP_HI(%a0)
9779
beq.b not_norm_x
9780
is_norm_x:
9781
mov.b &NORM, %d0
9782
rts
9783
not_norm_x:
9784
tst.w %d0 # is exponent = 0?
9785
bne.b is_unnorm_x
9786
not_unnorm_x:
9787
tst.l FTEMP_HI(%a0)
9788
bne.b is_denorm_x
9789
tst.l FTEMP_LO(%a0)
9790
bne.b is_denorm_x
9791
is_zero_x:
9792
mov.b &ZERO, %d0
9793
rts
9794
is_denorm_x:
9795
mov.b &DENORM, %d0
9796
rts
9797
is_unnorm_x:
9798
bsr.l unnorm_fix # convert to norm,denorm,or zero
9799
rts
9800
is_unnorm_reg_x:
9801
mov.b &UNNORM, %d0
9802
rts
9803
inf_or_nan_x:
9804
tst.l FTEMP_LO(%a0)
9805
bne.b is_nan_x
9806
mov.l FTEMP_HI(%a0), %d0
9807
and.l &0x7fffffff, %d0 # msb is a don't care!
9808
bne.b is_nan_x
9809
is_inf_x:
9810
mov.b &INF, %d0
9811
rts
9812
is_nan_x:
9813
mov.b &QNAN, %d0
9814
rts
9815
9816
#############################################################
9817
9818
qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
9819
9820
#########################################################################
9821
# XDEF **************************************************************** #
9822
# t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. #
9823
# t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. #
9824
# #
9825
# These rouitnes are used by the 060FPLSP package. #
9826
# #
9827
# XREF **************************************************************** #
9828
# None #
9829
# #
9830
# INPUT *************************************************************** #
9831
# a0 = pointer to extended precision source operand. #
9832
# #
9833
# OUTPUT ************************************************************** #
9834
# fp0 = default DZ result. #
9835
# #
9836
# ALGORITHM *********************************************************** #
9837
# Transcendental emulation for the 060FPLSP has detected that #
9838
# a DZ exception should occur for the instruction. If DZ is disabled, #
9839
# return the default result. #
9840
# If DZ is enabled, the dst operand should be returned unscathed #
9841
# in fp0 while fp1 is used to create a DZ exception so that the #
9842
# operating system can log that such an event occurred. #
9843
# #
9844
#########################################################################
9845
9846
global t_dz
9847
t_dz:
9848
tst.b SRC_EX(%a0) # check sign for neg or pos
9849
bpl.b dz_pinf # branch if pos sign
9850
9851
global t_dz2
9852
t_dz2:
9853
ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
9854
9855
btst &dz_bit,FPCR_ENABLE(%a6)
9856
bne.b dz_minf_ena
9857
9858
# dz is disabled. return a -INF.
9859
fmov.s &0xff800000,%fp0 # return -INF
9860
rts
9861
9862
# dz is enabled. create a dz exception so the user can record it
9863
# but use fp1 instead. return the dst operand unscathed in fp0.
9864
dz_minf_ena:
9865
fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9866
fmov.l USER_FPCR(%a6),%fpcr
9867
fmov.s &0xbf800000,%fp1 # load -1
9868
fdiv.s &0x00000000,%fp1 # -1 / 0
9869
rts
9870
9871
dz_pinf:
9872
ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
9873
9874
btst &dz_bit,FPCR_ENABLE(%a6)
9875
bne.b dz_pinf_ena
9876
9877
# dz is disabled. return a +INF.
9878
fmov.s &0x7f800000,%fp0 # return +INF
9879
rts
9880
9881
# dz is enabled. create a dz exception so the user can record it
9882
# but use fp1 instead. return the dst operand unscathed in fp0.
9883
dz_pinf_ena:
9884
fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9885
fmov.l USER_FPCR(%a6),%fpcr
9886
fmov.s &0x3f800000,%fp1 # load +1
9887
fdiv.s &0x00000000,%fp1 # +1 / 0
9888
rts
9889
9890
#########################################################################
9891
# XDEF **************************************************************** #
9892
# t_operr(): Handle 060FPLSP OPERR exception during emulation. #
9893
# #
9894
# This routine is used by the 060FPLSP package. #
9895
# #
9896
# XREF **************************************************************** #
9897
# None. #
9898
# #
9899
# INPUT *************************************************************** #
9900
# fp1 = source operand #
9901
# #
9902
# OUTPUT ************************************************************** #
9903
# fp0 = default result #
9904
# fp1 = unchanged #
9905
# #
9906
# ALGORITHM *********************************************************** #
9907
# An operand error should occur as the result of transcendental #
9908
# emulation in the 060FPLSP. If OPERR is disabled, just return a NAN #
9909
# in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 #
9910
# and the source operand in fp1. Use fp2 to create an OPERR exception #
9911
# so that the operating system can log the event. #
9912
# #
9913
#########################################################################
9914
9915
global t_operr
9916
t_operr:
9917
ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP
9918
9919
btst &operr_bit,FPCR_ENABLE(%a6)
9920
bne.b operr_ena
9921
9922
# operr is disabled. return a QNAN in fp0
9923
fmovm.x qnan(%pc),&0x80 # return QNAN
9924
rts
9925
9926
# operr is enabled. create an operr exception so the user can record it
9927
# but use fp2 instead. return the dst operand unscathed in fp0.
9928
operr_ena:
9929
fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed
9930
fmov.l USER_FPCR(%a6),%fpcr
9931
fmovm.x &0x04,-(%sp) # save fp2
9932
fmov.s &0x7f800000,%fp2 # load +INF
9933
fmul.s &0x00000000,%fp2 # +INF x 0
9934
fmovm.x (%sp)+,&0x20 # restore fp2
9935
rts
9936
9937
pls_huge:
9938
long 0x7ffe0000,0xffffffff,0xffffffff
9939
mns_huge:
9940
long 0xfffe0000,0xffffffff,0xffffffff
9941
pls_tiny:
9942
long 0x00000000,0x80000000,0x00000000
9943
mns_tiny:
9944
long 0x80000000,0x80000000,0x00000000
9945
9946
#########################################################################
9947
# XDEF **************************************************************** #
9948
# t_unfl(): Handle 060FPLSP underflow exception during emulation. #
9949
# t_unfl2(): Handle 060FPLSP underflow exception during #
9950
# emulation. result always positive. #
9951
# #
9952
# This routine is used by the 060FPLSP package. #
9953
# #
9954
# XREF **************************************************************** #
9955
# None. #
9956
# #
9957
# INPUT *************************************************************** #
9958
# a0 = pointer to extended precision source operand #
9959
# #
9960
# OUTPUT ************************************************************** #
9961
# fp0 = default underflow result #
9962
# #
9963
# ALGORITHM *********************************************************** #
9964
# An underflow should occur as the result of transcendental #
9965
# emulation in the 060FPLSP. Create an underflow by using "fmul" #
9966
# and two very small numbers of appropriate sign so the operating #
9967
# system can log the event. #
9968
# #
9969
#########################################################################
9970
9971
global t_unfl
9972
t_unfl:
9973
tst.b SRC_EX(%a0)
9974
bpl.b unf_pos
9975
9976
global t_unfl2
9977
t_unfl2:
9978
ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX
9979
9980
fmov.l USER_FPCR(%a6),%fpcr
9981
fmovm.x mns_tiny(%pc),&0x80
9982
fmul.x pls_tiny(%pc),%fp0
9983
9984
fmov.l %fpsr,%d0
9985
rol.l &0x8,%d0
9986
mov.b %d0,FPSR_CC(%a6)
9987
rts
9988
unf_pos:
9989
ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX
9990
9991
fmov.l USER_FPCR(%a6),%fpcr
9992
fmovm.x pls_tiny(%pc),&0x80
9993
fmul.x %fp0,%fp0
9994
9995
fmov.l %fpsr,%d0
9996
rol.l &0x8,%d0
9997
mov.b %d0,FPSR_CC(%a6)
9998
rts
9999
10000
#########################################################################
10001
# XDEF **************************************************************** #
10002
# t_ovfl(): Handle 060FPLSP overflow exception during emulation. #
10003
# (monadic) #
10004
# t_ovfl2(): Handle 060FPLSP overflow exception during #
10005
# emulation. result always positive. (dyadic) #
10006
# t_ovfl_sc(): Handle 060FPLSP overflow exception during #
10007
# emulation for "fscale". #
10008
# #
10009
# This routine is used by the 060FPLSP package. #
10010
# #
10011
# XREF **************************************************************** #
10012
# None. #
10013
# #
10014
# INPUT *************************************************************** #
10015
# a0 = pointer to extended precision source operand #
10016
# #
10017
# OUTPUT ************************************************************** #
10018
# fp0 = default underflow result #
10019
# #
10020
# ALGORITHM *********************************************************** #
10021
# An overflow should occur as the result of transcendental #
10022
# emulation in the 060FPLSP. Create an overflow by using "fmul" #
10023
# and two very lareg numbers of appropriate sign so the operating #
10024
# system can log the event. #
10025
# For t_ovfl_sc() we take special care not to lose the INEX2 bit. #
10026
# #
10027
#########################################################################
10028
10029
global t_ovfl_sc
10030
t_ovfl_sc:
10031
ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
10032
10033
mov.b %d0,%d1 # fetch rnd prec,mode
10034
andi.b &0xc0,%d1 # extract prec
10035
beq.w ovfl_work
10036
10037
# dst op is a DENORM. we have to normalize the mantissa to see if the
10038
# result would be inexact for the given precision. make a copy of the
10039
# dst so we don't screw up the version passed to us.
10040
mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
10041
mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
10042
mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
10043
lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
10044
movm.l &0xc080,-(%sp) # save d0-d1/a0
10045
bsr.l norm # normalize mantissa
10046
movm.l (%sp)+,&0x0103 # restore d0-d1/a0
10047
10048
cmpi.b %d1,&0x40 # is precision sgl?
10049
bne.b ovfl_sc_dbl # no; dbl
10050
ovfl_sc_sgl:
10051
tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
10052
bne.b ovfl_sc_inx # yes
10053
tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
10054
bne.b ovfl_sc_inx # yes
10055
bra.w ovfl_work # don't set INEX2
10056
ovfl_sc_dbl:
10057
mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
10058
andi.l &0x7ff,%d1 # dbl mantissa set?
10059
beq.w ovfl_work # no; don't set INEX2
10060
ovfl_sc_inx:
10061
ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
10062
bra.b ovfl_work # continue
10063
10064
global t_ovfl
10065
t_ovfl:
10066
ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10067
ovfl_work:
10068
tst.b SRC_EX(%a0)
10069
bpl.b ovfl_p
10070
ovfl_m:
10071
fmov.l USER_FPCR(%a6),%fpcr
10072
fmovm.x mns_huge(%pc),&0x80
10073
fmul.x pls_huge(%pc),%fp0
10074
10075
fmov.l %fpsr,%d0
10076
rol.l &0x8,%d0
10077
ori.b &neg_mask,%d0
10078
mov.b %d0,FPSR_CC(%a6)
10079
rts
10080
ovfl_p:
10081
fmov.l USER_FPCR(%a6),%fpcr
10082
fmovm.x pls_huge(%pc),&0x80
10083
fmul.x pls_huge(%pc),%fp0
10084
10085
fmov.l %fpsr,%d0
10086
rol.l &0x8,%d0
10087
mov.b %d0,FPSR_CC(%a6)
10088
rts
10089
10090
global t_ovfl2
10091
t_ovfl2:
10092
ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX
10093
fmov.l USER_FPCR(%a6),%fpcr
10094
fmovm.x pls_huge(%pc),&0x80
10095
fmul.x pls_huge(%pc),%fp0
10096
10097
fmov.l %fpsr,%d0
10098
rol.l &0x8,%d0
10099
mov.b %d0,FPSR_CC(%a6)
10100
rts
10101
10102
#########################################################################
10103
# XDEF **************************************************************** #
10104
# t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10105
# emulation. #
10106
# t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during #
10107
# emulation. #
10108
# #
10109
# These routines are used by the 060FPLSP package. #
10110
# #
10111
# XREF **************************************************************** #
10112
# None. #
10113
# #
10114
# INPUT *************************************************************** #
10115
# fp0 = default underflow or overflow result #
10116
# #
10117
# OUTPUT ************************************************************** #
10118
# fp0 = default result #
10119
# #
10120
# ALGORITHM *********************************************************** #
10121
# If an overflow or underflow occurred during the last #
10122
# instruction of transcendental 060FPLSP emulation, then it has already #
10123
# occurred and has been logged. Now we need to see if an inexact #
10124
# exception should occur. #
10125
# #
10126
#########################################################################
10127
10128
global t_catch2
10129
t_catch2:
10130
fmov.l %fpsr,%d0
10131
or.l %d0,USER_FPSR(%a6)
10132
bra.b inx2_work
10133
10134
global t_catch
10135
t_catch:
10136
fmov.l %fpsr,%d0
10137
or.l %d0,USER_FPSR(%a6)
10138
10139
#########################################################################
10140
# XDEF **************************************************************** #
10141
# t_inx2(): Handle inexact 060FPLSP exception during emulation. #
10142
# t_pinx2(): Handle inexact 060FPLSP exception for "+" results. #
10143
# t_minx2(): Handle inexact 060FPLSP exception for "-" results. #
10144
# #
10145
# XREF **************************************************************** #
10146
# None. #
10147
# #
10148
# INPUT *************************************************************** #
10149
# fp0 = default result #
10150
# #
10151
# OUTPUT ************************************************************** #
10152
# fp0 = default result #
10153
# #
10154
# ALGORITHM *********************************************************** #
10155
# The last instruction of transcendental emulation for the #
10156
# 060FPLSP should be inexact. So, if inexact is enabled, then we create #
10157
# the event here by adding a large and very small number together #
10158
# so that the operating system can log the event. #
10159
# Must check, too, if the result was zero, in which case we just #
10160
# set the FPSR bits and return. #
10161
# #
10162
#########################################################################
10163
10164
global t_inx2
10165
t_inx2:
10166
fblt.w t_minx2
10167
fbeq.w inx2_zero
10168
10169
global t_pinx2
10170
t_pinx2:
10171
ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX
10172
bra.b inx2_work
10173
10174
global t_minx2
10175
t_minx2:
10176
ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6)
10177
10178
inx2_work:
10179
btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
10180
bne.b inx2_work_ena # yes
10181
rts
10182
inx2_work_ena:
10183
fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions
10184
fmov.s &0x3f800000,%fp1 # load +1
10185
fadd.x pls_tiny(%pc),%fp1 # cause exception
10186
rts
10187
10188
inx2_zero:
10189
mov.b &z_bmask,FPSR_CC(%a6)
10190
ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX
10191
rts
10192
10193
#########################################################################
10194
# XDEF **************************************************************** #
10195
# t_extdnrm(): Handle DENORM inputs in 060FPLSP. #
10196
# t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". #
10197
# #
10198
# This routine is used by the 060FPLSP package. #
10199
# #
10200
# XREF **************************************************************** #
10201
# None. #
10202
# #
10203
# INPUT *************************************************************** #
10204
# a0 = pointer to extended precision input operand #
10205
# #
10206
# OUTPUT ************************************************************** #
10207
# fp0 = default result #
10208
# #
10209
# ALGORITHM *********************************************************** #
10210
# For all functions that have a denormalized input and that #
10211
# f(x)=x, this is the entry point. #
10212
# DENORM value is moved using "fmove" which triggers an exception #
10213
# if enabled so the operating system can log the event. #
10214
# #
10215
#########################################################################
10216
10217
global t_extdnrm
10218
t_extdnrm:
10219
fmov.l USER_FPCR(%a6),%fpcr
10220
fmov.x SRC_EX(%a0),%fp0
10221
fmov.l %fpsr,%d0
10222
ori.l &unfinx_mask,%d0
10223
or.l %d0,USER_FPSR(%a6)
10224
rts
10225
10226
global t_resdnrm
10227
t_resdnrm:
10228
fmov.l USER_FPCR(%a6),%fpcr
10229
fmov.x SRC_EX(%a0),%fp0
10230
fmov.l %fpsr,%d0
10231
or.l %d0,USER_FPSR(%a6)
10232
rts
10233
10234
##########################################
10235
10236
#
10237
# sto_cos:
10238
# This is used by fsincos library emulation. The correct
10239
# values are already in fp0 and fp1 so we do nothing here.
10240
#
10241
global sto_cos
10242
sto_cos:
10243
rts
10244
10245
##########################################
10246
10247
#
10248
# dst_qnan --- force result when destination is a NaN
10249
#
10250
global dst_qnan
10251
dst_qnan:
10252
fmov.x DST(%a1),%fp0
10253
tst.b DST_EX(%a1)
10254
bmi.b dst_qnan_m
10255
dst_qnan_p:
10256
mov.b &nan_bmask,FPSR_CC(%a6)
10257
rts
10258
dst_qnan_m:
10259
mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10260
rts
10261
10262
#
10263
# src_qnan --- force result when source is a NaN
10264
#
10265
global src_qnan
10266
src_qnan:
10267
fmov.x SRC(%a0),%fp0
10268
tst.b SRC_EX(%a0)
10269
bmi.b src_qnan_m
10270
src_qnan_p:
10271
mov.b &nan_bmask,FPSR_CC(%a6)
10272
rts
10273
src_qnan_m:
10274
mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6)
10275
rts
10276
10277
##########################################
10278
10279
#
10280
# Native instruction support
10281
#
10282
# Some systems may need entry points even for 68060 native
10283
# instructions. These routines are provided for
10284
# convenience.
10285
#
10286
global _fadds_
10287
_fadds_:
10288
fmov.l %fpcr,-(%sp) # save fpcr
10289
fmov.l &0x00000000,%fpcr # clear fpcr for load
10290
fmov.s 0x8(%sp),%fp0 # load sgl dst
10291
fmov.l (%sp)+,%fpcr # restore fpcr
10292
fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src
10293
rts
10294
10295
global _faddd_
10296
_faddd_:
10297
fmov.l %fpcr,-(%sp) # save fpcr
10298
fmov.l &0x00000000,%fpcr # clear fpcr for load
10299
fmov.d 0x8(%sp),%fp0 # load dbl dst
10300
fmov.l (%sp)+,%fpcr # restore fpcr
10301
fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src
10302
rts
10303
10304
global _faddx_
10305
_faddx_:
10306
fmovm.x 0x4(%sp),&0x80 # load ext dst
10307
fadd.x 0x10(%sp),%fp0 # fadd w/ ext src
10308
rts
10309
10310
global _fsubs_
10311
_fsubs_:
10312
fmov.l %fpcr,-(%sp) # save fpcr
10313
fmov.l &0x00000000,%fpcr # clear fpcr for load
10314
fmov.s 0x8(%sp),%fp0 # load sgl dst
10315
fmov.l (%sp)+,%fpcr # restore fpcr
10316
fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src
10317
rts
10318
10319
global _fsubd_
10320
_fsubd_:
10321
fmov.l %fpcr,-(%sp) # save fpcr
10322
fmov.l &0x00000000,%fpcr # clear fpcr for load
10323
fmov.d 0x8(%sp),%fp0 # load dbl dst
10324
fmov.l (%sp)+,%fpcr # restore fpcr
10325
fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src
10326
rts
10327
10328
global _fsubx_
10329
_fsubx_:
10330
fmovm.x 0x4(%sp),&0x80 # load ext dst
10331
fsub.x 0x10(%sp),%fp0 # fsub w/ ext src
10332
rts
10333
10334
global _fmuls_
10335
_fmuls_:
10336
fmov.l %fpcr,-(%sp) # save fpcr
10337
fmov.l &0x00000000,%fpcr # clear fpcr for load
10338
fmov.s 0x8(%sp),%fp0 # load sgl dst
10339
fmov.l (%sp)+,%fpcr # restore fpcr
10340
fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src
10341
rts
10342
10343
global _fmuld_
10344
_fmuld_:
10345
fmov.l %fpcr,-(%sp) # save fpcr
10346
fmov.l &0x00000000,%fpcr # clear fpcr for load
10347
fmov.d 0x8(%sp),%fp0 # load dbl dst
10348
fmov.l (%sp)+,%fpcr # restore fpcr
10349
fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src
10350
rts
10351
10352
global _fmulx_
10353
_fmulx_:
10354
fmovm.x 0x4(%sp),&0x80 # load ext dst
10355
fmul.x 0x10(%sp),%fp0 # fmul w/ ext src
10356
rts
10357
10358
global _fdivs_
10359
_fdivs_:
10360
fmov.l %fpcr,-(%sp) # save fpcr
10361
fmov.l &0x00000000,%fpcr # clear fpcr for load
10362
fmov.s 0x8(%sp),%fp0 # load sgl dst
10363
fmov.l (%sp)+,%fpcr # restore fpcr
10364
fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src
10365
rts
10366
10367
global _fdivd_
10368
_fdivd_:
10369
fmov.l %fpcr,-(%sp) # save fpcr
10370
fmov.l &0x00000000,%fpcr # clear fpcr for load
10371
fmov.d 0x8(%sp),%fp0 # load dbl dst
10372
fmov.l (%sp)+,%fpcr # restore fpcr
10373
fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src
10374
rts
10375
10376
global _fdivx_
10377
_fdivx_:
10378
fmovm.x 0x4(%sp),&0x80 # load ext dst
10379
fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src
10380
rts
10381
10382
global _fabss_
10383
_fabss_:
10384
fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src
10385
rts
10386
10387
global _fabsd_
10388
_fabsd_:
10389
fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src
10390
rts
10391
10392
global _fabsx_
10393
_fabsx_:
10394
fabs.x 0x4(%sp),%fp0 # fabs w/ ext src
10395
rts
10396
10397
global _fnegs_
10398
_fnegs_:
10399
fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src
10400
rts
10401
10402
global _fnegd_
10403
_fnegd_:
10404
fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src
10405
rts
10406
10407
global _fnegx_
10408
_fnegx_:
10409
fneg.x 0x4(%sp),%fp0 # fneg w/ ext src
10410
rts
10411
10412
global _fsqrts_
10413
_fsqrts_:
10414
fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src
10415
rts
10416
10417
global _fsqrtd_
10418
_fsqrtd_:
10419
fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src
10420
rts
10421
10422
global _fsqrtx_
10423
_fsqrtx_:
10424
fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src
10425
rts
10426
10427
global _fints_
10428
_fints_:
10429
fint.s 0x4(%sp),%fp0 # fint w/ sgl src
10430
rts
10431
10432
global _fintd_
10433
_fintd_:
10434
fint.d 0x4(%sp),%fp0 # fint w/ dbl src
10435
rts
10436
10437
global _fintx_
10438
_fintx_:
10439
fint.x 0x4(%sp),%fp0 # fint w/ ext src
10440
rts
10441
10442
global _fintrzs_
10443
_fintrzs_:
10444
fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src
10445
rts
10446
10447
global _fintrzd_
10448
_fintrzd_:
10449
fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src
10450
rts
10451
10452
global _fintrzx_
10453
_fintrzx_:
10454
fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src
10455
rts
10456
10457
########################################################################
10458
10459
#########################################################################
10460
# src_zero(): Return signed zero according to sign of src operand. #
10461
#########################################################################
10462
global src_zero
10463
src_zero:
10464
tst.b SRC_EX(%a0) # get sign of src operand
10465
bmi.b ld_mzero # if neg, load neg zero
10466
10467
#
10468
# ld_pzero(): return a positive zero.
10469
#
10470
global ld_pzero
10471
ld_pzero:
10472
fmov.s &0x00000000,%fp0 # load +0
10473
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10474
rts
10475
10476
# ld_mzero(): return a negative zero.
10477
global ld_mzero
10478
ld_mzero:
10479
fmov.s &0x80000000,%fp0 # load -0
10480
mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
10481
rts
10482
10483
#########################################################################
10484
# dst_zero(): Return signed zero according to sign of dst operand. #
10485
#########################################################################
10486
global dst_zero
10487
dst_zero:
10488
tst.b DST_EX(%a1) # get sign of dst operand
10489
bmi.b ld_mzero # if neg, load neg zero
10490
bra.b ld_pzero # load positive zero
10491
10492
#########################################################################
10493
# src_inf(): Return signed inf according to sign of src operand. #
10494
#########################################################################
10495
global src_inf
10496
src_inf:
10497
tst.b SRC_EX(%a0) # get sign of src operand
10498
bmi.b ld_minf # if negative branch
10499
10500
#
10501
# ld_pinf(): return a positive infinity.
10502
#
10503
global ld_pinf
10504
ld_pinf:
10505
fmov.s &0x7f800000,%fp0 # load +INF
10506
mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
10507
rts
10508
10509
#
10510
# ld_minf():return a negative infinity.
10511
#
10512
global ld_minf
10513
ld_minf:
10514
fmov.s &0xff800000,%fp0 # load -INF
10515
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
10516
rts
10517
10518
#########################################################################
10519
# dst_inf(): Return signed inf according to sign of dst operand. #
10520
#########################################################################
10521
global dst_inf
10522
dst_inf:
10523
tst.b DST_EX(%a1) # get sign of dst operand
10524
bmi.b ld_minf # if negative branch
10525
bra.b ld_pinf
10526
10527
global szr_inf
10528
#################################################################
10529
# szr_inf(): Return +ZERO for a negative src operand or #
10530
# +INF for a positive src operand. #
10531
# Routine used for fetox, ftwotox, and ftentox. #
10532
#################################################################
10533
szr_inf:
10534
tst.b SRC_EX(%a0) # check sign of source
10535
bmi.b ld_pzero
10536
bra.b ld_pinf
10537
10538
#########################################################################
10539
# sopr_inf(): Return +INF for a positive src operand or #
10540
# jump to operand error routine for a negative src operand. #
10541
# Routine used for flogn, flognp1, flog10, and flog2. #
10542
#########################################################################
10543
global sopr_inf
10544
sopr_inf:
10545
tst.b SRC_EX(%a0) # check sign of source
10546
bmi.w t_operr
10547
bra.b ld_pinf
10548
10549
#################################################################
10550
# setoxm1i(): Return minus one for a negative src operand or #
10551
# positive infinity for a positive src operand. #
10552
# Routine used for fetoxm1. #
10553
#################################################################
10554
global setoxm1i
10555
setoxm1i:
10556
tst.b SRC_EX(%a0) # check sign of source
10557
bmi.b ld_mone
10558
bra.b ld_pinf
10559
10560
#########################################################################
10561
# src_one(): Return signed one according to sign of src operand. #
10562
#########################################################################
10563
global src_one
10564
src_one:
10565
tst.b SRC_EX(%a0) # check sign of source
10566
bmi.b ld_mone
10567
10568
#
10569
# ld_pone(): return positive one.
10570
#
10571
global ld_pone
10572
ld_pone:
10573
fmov.s &0x3f800000,%fp0 # load +1
10574
clr.b FPSR_CC(%a6)
10575
rts
10576
10577
#
10578
# ld_mone(): return negative one.
10579
#
10580
global ld_mone
10581
ld_mone:
10582
fmov.s &0xbf800000,%fp0 # load -1
10583
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
10584
rts
10585
10586
ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
10587
mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
10588
10589
#################################################################
10590
# spi_2(): Return signed PI/2 according to sign of src operand. #
10591
#################################################################
10592
global spi_2
10593
spi_2:
10594
tst.b SRC_EX(%a0) # check sign of source
10595
bmi.b ld_mpi2
10596
10597
#
10598
# ld_ppi2(): return positive PI/2.
10599
#
10600
global ld_ppi2
10601
ld_ppi2:
10602
fmov.l %d0,%fpcr
10603
fmov.x ppiby2(%pc),%fp0 # load +pi/2
10604
bra.w t_pinx2 # set INEX2
10605
10606
#
10607
# ld_mpi2(): return negative PI/2.
10608
#
10609
global ld_mpi2
10610
ld_mpi2:
10611
fmov.l %d0,%fpcr
10612
fmov.x mpiby2(%pc),%fp0 # load -pi/2
10613
bra.w t_minx2 # set INEX2
10614
10615
####################################################
10616
# The following routines give support for fsincos. #
10617
####################################################
10618
10619
#
10620
# ssincosz(): When the src operand is ZERO, store a one in the
10621
# cosine register and return a ZERO in fp0 w/ the same sign
10622
# as the src operand.
10623
#
10624
global ssincosz
10625
ssincosz:
10626
fmov.s &0x3f800000,%fp1
10627
tst.b SRC_EX(%a0) # test sign
10628
bpl.b sincoszp
10629
fmov.s &0x80000000,%fp0 # return sin result in fp0
10630
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
10631
rts
10632
sincoszp:
10633
fmov.s &0x00000000,%fp0 # return sin result in fp0
10634
mov.b &z_bmask,FPSR_CC(%a6)
10635
rts
10636
10637
#
10638
# ssincosi(): When the src operand is INF, store a QNAN in the cosine
10639
# register and jump to the operand error routine for negative
10640
# src operands.
10641
#
10642
global ssincosi
10643
ssincosi:
10644
fmov.x qnan(%pc),%fp1 # load NAN
10645
bra.w t_operr
10646
10647
#
10648
# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
10649
# register and branch to the src QNAN routine.
10650
#
10651
global ssincosqnan
10652
ssincosqnan:
10653
fmov.x LOCAL_EX(%a0),%fp1
10654
bra.w src_qnan
10655
10656
########################################################################
10657
10658
global smod_sdnrm
10659
global smod_snorm
10660
smod_sdnrm:
10661
smod_snorm:
10662
mov.b DTAG(%a6),%d1
10663
beq.l smod
10664
cmpi.b %d1,&ZERO
10665
beq.w smod_zro
10666
cmpi.b %d1,&INF
10667
beq.l t_operr
10668
cmpi.b %d1,&DENORM
10669
beq.l smod
10670
bra.l dst_qnan
10671
10672
global smod_szero
10673
smod_szero:
10674
mov.b DTAG(%a6),%d1
10675
beq.l t_operr
10676
cmpi.b %d1,&ZERO
10677
beq.l t_operr
10678
cmpi.b %d1,&INF
10679
beq.l t_operr
10680
cmpi.b %d1,&DENORM
10681
beq.l t_operr
10682
bra.l dst_qnan
10683
10684
global smod_sinf
10685
smod_sinf:
10686
mov.b DTAG(%a6),%d1
10687
beq.l smod_fpn
10688
cmpi.b %d1,&ZERO
10689
beq.l smod_zro
10690
cmpi.b %d1,&INF
10691
beq.l t_operr
10692
cmpi.b %d1,&DENORM
10693
beq.l smod_fpn
10694
bra.l dst_qnan
10695
10696
smod_zro:
10697
srem_zro:
10698
mov.b SRC_EX(%a0),%d1 # get src sign
10699
mov.b DST_EX(%a1),%d0 # get dst sign
10700
eor.b %d0,%d1 # get qbyte sign
10701
andi.b &0x80,%d1
10702
mov.b %d1,FPSR_QBYTE(%a6)
10703
tst.b %d0
10704
bpl.w ld_pzero
10705
bra.w ld_mzero
10706
10707
smod_fpn:
10708
srem_fpn:
10709
clr.b FPSR_QBYTE(%a6)
10710
mov.l %d0,-(%sp)
10711
mov.b SRC_EX(%a0),%d1 # get src sign
10712
mov.b DST_EX(%a1),%d0 # get dst sign
10713
eor.b %d0,%d1 # get qbyte sign
10714
andi.b &0x80,%d1
10715
mov.b %d1,FPSR_QBYTE(%a6)
10716
cmpi.b DTAG(%a6),&DENORM
10717
bne.b smod_nrm
10718
lea DST(%a1),%a0
10719
mov.l (%sp)+,%d0
10720
bra t_resdnrm
10721
smod_nrm:
10722
fmov.l (%sp)+,%fpcr
10723
fmov.x DST(%a1),%fp0
10724
tst.b DST_EX(%a1)
10725
bmi.b smod_nrm_neg
10726
rts
10727
10728
smod_nrm_neg:
10729
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code
10730
rts
10731
10732
#########################################################################
10733
global srem_snorm
10734
global srem_sdnrm
10735
srem_sdnrm:
10736
srem_snorm:
10737
mov.b DTAG(%a6),%d1
10738
beq.l srem
10739
cmpi.b %d1,&ZERO
10740
beq.w srem_zro
10741
cmpi.b %d1,&INF
10742
beq.l t_operr
10743
cmpi.b %d1,&DENORM
10744
beq.l srem
10745
bra.l dst_qnan
10746
10747
global srem_szero
10748
srem_szero:
10749
mov.b DTAG(%a6),%d1
10750
beq.l t_operr
10751
cmpi.b %d1,&ZERO
10752
beq.l t_operr
10753
cmpi.b %d1,&INF
10754
beq.l t_operr
10755
cmpi.b %d1,&DENORM
10756
beq.l t_operr
10757
bra.l dst_qnan
10758
10759
global srem_sinf
10760
srem_sinf:
10761
mov.b DTAG(%a6),%d1
10762
beq.w srem_fpn
10763
cmpi.b %d1,&ZERO
10764
beq.w srem_zro
10765
cmpi.b %d1,&INF
10766
beq.l t_operr
10767
cmpi.b %d1,&DENORM
10768
beq.l srem_fpn
10769
bra.l dst_qnan
10770
10771
#########################################################################
10772
10773
global sscale_snorm
10774
global sscale_sdnrm
10775
sscale_snorm:
10776
sscale_sdnrm:
10777
mov.b DTAG(%a6),%d1
10778
beq.l sscale
10779
cmpi.b %d1,&ZERO
10780
beq.l dst_zero
10781
cmpi.b %d1,&INF
10782
beq.l dst_inf
10783
cmpi.b %d1,&DENORM
10784
beq.l sscale
10785
bra.l dst_qnan
10786
10787
global sscale_szero
10788
sscale_szero:
10789
mov.b DTAG(%a6),%d1
10790
beq.l sscale
10791
cmpi.b %d1,&ZERO
10792
beq.l dst_zero
10793
cmpi.b %d1,&INF
10794
beq.l dst_inf
10795
cmpi.b %d1,&DENORM
10796
beq.l sscale
10797
bra.l dst_qnan
10798
10799
global sscale_sinf
10800
sscale_sinf:
10801
mov.b DTAG(%a6),%d1
10802
beq.l t_operr
10803
cmpi.b %d1,&QNAN
10804
beq.l dst_qnan
10805
bra.l t_operr
10806
10807
########################################################################
10808
10809
global sop_sqnan
10810
sop_sqnan:
10811
mov.b DTAG(%a6),%d1
10812
cmpi.b %d1,&QNAN
10813
beq.l dst_qnan
10814
bra.l src_qnan
10815
10816
#########################################################################
10817
# norm(): normalize the mantissa of an extended precision input. the #
10818
# input operand should not be normalized already. #
10819
# #
10820
# XDEF **************************************************************** #
10821
# norm() #
10822
# #
10823
# XREF **************************************************************** #
10824
# none #
10825
# #
10826
# INPUT *************************************************************** #
10827
# a0 = pointer fp extended precision operand to normalize #
10828
# #
10829
# OUTPUT ************************************************************** #
10830
# d0 = number of bit positions the mantissa was shifted #
10831
# a0 = the input operand's mantissa is normalized; the exponent #
10832
# is unchanged. #
10833
# #
10834
#########################################################################
10835
global norm
10836
norm:
10837
mov.l %d2, -(%sp) # create some temp regs
10838
mov.l %d3, -(%sp)
10839
10840
mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
10841
mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
10842
10843
bfffo %d0{&0:&32}, %d2 # how many places to shift?
10844
beq.b norm_lo # hi(man) is all zeroes!
10845
10846
norm_hi:
10847
lsl.l %d2, %d0 # left shift hi(man)
10848
bfextu %d1{&0:%d2}, %d3 # extract lo bits
10849
10850
or.l %d3, %d0 # create hi(man)
10851
lsl.l %d2, %d1 # create lo(man)
10852
10853
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10854
mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
10855
10856
mov.l %d2, %d0 # return shift amount
10857
10858
mov.l (%sp)+, %d3 # restore temp regs
10859
mov.l (%sp)+, %d2
10860
10861
rts
10862
10863
norm_lo:
10864
bfffo %d1{&0:&32}, %d2 # how many places to shift?
10865
lsl.l %d2, %d1 # shift lo(man)
10866
add.l &32, %d2 # add 32 to shft amount
10867
10868
mov.l %d1, FTEMP_HI(%a0) # store hi(man)
10869
clr.l FTEMP_LO(%a0) # lo(man) is now zero
10870
10871
mov.l %d2, %d0 # return shift amount
10872
10873
mov.l (%sp)+, %d3 # restore temp regs
10874
mov.l (%sp)+, %d2
10875
10876
rts
10877
10878
#########################################################################
10879
# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
10880
# - returns corresponding optype tag #
10881
# #
10882
# XDEF **************************************************************** #
10883
# unnorm_fix() #
10884
# #
10885
# XREF **************************************************************** #
10886
# norm() - normalize the mantissa #
10887
# #
10888
# INPUT *************************************************************** #
10889
# a0 = pointer to unnormalized extended precision number #
10890
# #
10891
# OUTPUT ************************************************************** #
10892
# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
10893
# a0 = input operand has been converted to a norm, denorm, or #
10894
# zero; both the exponent and mantissa are changed. #
10895
# #
10896
#########################################################################
10897
10898
global unnorm_fix
10899
unnorm_fix:
10900
bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
10901
bne.b unnorm_shift # hi(man) is not all zeroes
10902
10903
#
10904
# hi(man) is all zeroes so see if any bits in lo(man) are set
10905
#
10906
unnorm_chk_lo:
10907
bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
10908
beq.w unnorm_zero # yes
10909
10910
add.w &32, %d0 # no; fix shift distance
10911
10912
#
10913
# d0 = # shifts needed for complete normalization
10914
#
10915
unnorm_shift:
10916
clr.l %d1 # clear top word
10917
mov.w FTEMP_EX(%a0), %d1 # extract exponent
10918
and.w &0x7fff, %d1 # strip off sgn
10919
10920
cmp.w %d0, %d1 # will denorm push exp < 0?
10921
bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
10922
10923
#
10924
# exponent would not go < 0. therefore, number stays normalized
10925
#
10926
sub.w %d0, %d1 # shift exponent value
10927
mov.w FTEMP_EX(%a0), %d0 # load old exponent
10928
and.w &0x8000, %d0 # save old sign
10929
or.w %d0, %d1 # {sgn,new exp}
10930
mov.w %d1, FTEMP_EX(%a0) # insert new exponent
10931
10932
bsr.l norm # normalize UNNORM
10933
10934
mov.b &NORM, %d0 # return new optype tag
10935
rts
10936
10937
#
10938
# exponent would go < 0, so only denormalize until exp = 0
10939
#
10940
unnorm_nrm_zero:
10941
cmp.b %d1, &32 # is exp <= 32?
10942
bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
10943
10944
bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
10945
mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
10946
10947
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10948
lsl.l %d1, %d0 # extract new lo(man)
10949
mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
10950
10951
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10952
10953
mov.b &DENORM, %d0 # return new optype tag
10954
rts
10955
10956
#
10957
# only mantissa bits set are in lo(man)
10958
#
10959
unnorm_nrm_zero_lrg:
10960
sub.w &32, %d1 # adjust shft amt by 32
10961
10962
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
10963
lsl.l %d1, %d0 # left shift lo(man)
10964
10965
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
10966
clr.l FTEMP_LO(%a0) # lo(man) = 0
10967
10968
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
10969
10970
mov.b &DENORM, %d0 # return new optype tag
10971
rts
10972
10973
#
10974
# whole mantissa is zero so this UNNORM is actually a zero
10975
#
10976
unnorm_zero:
10977
and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
10978
10979
mov.b &ZERO, %d0 # fix optype tag
10980
rts
10981
10982