Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m68k/ifpsp060/src/pfpsp.S
10820 views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6
7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
# freal.s:
30
# This file is appended to the top of the 060FPSP package
31
# and contains the entry points into the package. The user, in
32
# effect, branches to one of the branch table entries located
33
# after _060FPSP_TABLE.
34
# Also, subroutine stubs exist in this file (_fpsp_done for
35
# example) that are referenced by the FPSP package itself in order
36
# to call a given routine. The stub routine actually performs the
37
# callout. The FPSP code does a "bsr" to the stub routine. This
38
# extra layer of hierarchy adds a slight performance penalty but
39
# it makes the FPSP code easier to read and more mainatinable.
40
#
41
42
set _off_bsun, 0x00
43
set _off_snan, 0x04
44
set _off_operr, 0x08
45
set _off_ovfl, 0x0c
46
set _off_unfl, 0x10
47
set _off_dz, 0x14
48
set _off_inex, 0x18
49
set _off_fline, 0x1c
50
set _off_fpu_dis, 0x20
51
set _off_trap, 0x24
52
set _off_trace, 0x28
53
set _off_access, 0x2c
54
set _off_done, 0x30
55
56
set _off_imr, 0x40
57
set _off_dmr, 0x44
58
set _off_dmw, 0x48
59
set _off_irw, 0x4c
60
set _off_irl, 0x50
61
set _off_drb, 0x54
62
set _off_drw, 0x58
63
set _off_drl, 0x5c
64
set _off_dwb, 0x60
65
set _off_dww, 0x64
66
set _off_dwl, 0x68
67
68
_060FPSP_TABLE:
69
70
###############################################################
71
72
# Here's the table of ENTRY POINTS for those linking the package.
73
bra.l _fpsp_snan
74
short 0x0000
75
bra.l _fpsp_operr
76
short 0x0000
77
bra.l _fpsp_ovfl
78
short 0x0000
79
bra.l _fpsp_unfl
80
short 0x0000
81
bra.l _fpsp_dz
82
short 0x0000
83
bra.l _fpsp_inex
84
short 0x0000
85
bra.l _fpsp_fline
86
short 0x0000
87
bra.l _fpsp_unsupp
88
short 0x0000
89
bra.l _fpsp_effadd
90
short 0x0000
91
92
space 56
93
94
###############################################################
95
global _fpsp_done
96
_fpsp_done:
97
mov.l %d0,-(%sp)
98
mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
100
mov.l 0x4(%sp),%d0
101
rtd &0x4
102
103
global _real_ovfl
104
_real_ovfl:
105
mov.l %d0,-(%sp)
106
mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
108
mov.l 0x4(%sp),%d0
109
rtd &0x4
110
111
global _real_unfl
112
_real_unfl:
113
mov.l %d0,-(%sp)
114
mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
116
mov.l 0x4(%sp),%d0
117
rtd &0x4
118
119
global _real_inex
120
_real_inex:
121
mov.l %d0,-(%sp)
122
mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
124
mov.l 0x4(%sp),%d0
125
rtd &0x4
126
127
global _real_bsun
128
_real_bsun:
129
mov.l %d0,-(%sp)
130
mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
132
mov.l 0x4(%sp),%d0
133
rtd &0x4
134
135
global _real_operr
136
_real_operr:
137
mov.l %d0,-(%sp)
138
mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
140
mov.l 0x4(%sp),%d0
141
rtd &0x4
142
143
global _real_snan
144
_real_snan:
145
mov.l %d0,-(%sp)
146
mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
148
mov.l 0x4(%sp),%d0
149
rtd &0x4
150
151
global _real_dz
152
_real_dz:
153
mov.l %d0,-(%sp)
154
mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
156
mov.l 0x4(%sp),%d0
157
rtd &0x4
158
159
global _real_fline
160
_real_fline:
161
mov.l %d0,-(%sp)
162
mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
164
mov.l 0x4(%sp),%d0
165
rtd &0x4
166
167
global _real_fpu_disabled
168
_real_fpu_disabled:
169
mov.l %d0,-(%sp)
170
mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
172
mov.l 0x4(%sp),%d0
173
rtd &0x4
174
175
global _real_trap
176
_real_trap:
177
mov.l %d0,-(%sp)
178
mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
180
mov.l 0x4(%sp),%d0
181
rtd &0x4
182
183
global _real_trace
184
_real_trace:
185
mov.l %d0,-(%sp)
186
mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
188
mov.l 0x4(%sp),%d0
189
rtd &0x4
190
191
global _real_access
192
_real_access:
193
mov.l %d0,-(%sp)
194
mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
196
mov.l 0x4(%sp),%d0
197
rtd &0x4
198
199
#######################################
200
201
global _imem_read
202
_imem_read:
203
mov.l %d0,-(%sp)
204
mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
206
mov.l 0x4(%sp),%d0
207
rtd &0x4
208
209
global _dmem_read
210
_dmem_read:
211
mov.l %d0,-(%sp)
212
mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
214
mov.l 0x4(%sp),%d0
215
rtd &0x4
216
217
global _dmem_write
218
_dmem_write:
219
mov.l %d0,-(%sp)
220
mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
222
mov.l 0x4(%sp),%d0
223
rtd &0x4
224
225
global _imem_read_word
226
_imem_read_word:
227
mov.l %d0,-(%sp)
228
mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
230
mov.l 0x4(%sp),%d0
231
rtd &0x4
232
233
global _imem_read_long
234
_imem_read_long:
235
mov.l %d0,-(%sp)
236
mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
238
mov.l 0x4(%sp),%d0
239
rtd &0x4
240
241
global _dmem_read_byte
242
_dmem_read_byte:
243
mov.l %d0,-(%sp)
244
mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
246
mov.l 0x4(%sp),%d0
247
rtd &0x4
248
249
global _dmem_read_word
250
_dmem_read_word:
251
mov.l %d0,-(%sp)
252
mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
254
mov.l 0x4(%sp),%d0
255
rtd &0x4
256
257
global _dmem_read_long
258
_dmem_read_long:
259
mov.l %d0,-(%sp)
260
mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
262
mov.l 0x4(%sp),%d0
263
rtd &0x4
264
265
global _dmem_write_byte
266
_dmem_write_byte:
267
mov.l %d0,-(%sp)
268
mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
270
mov.l 0x4(%sp),%d0
271
rtd &0x4
272
273
global _dmem_write_word
274
_dmem_write_word:
275
mov.l %d0,-(%sp)
276
mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
278
mov.l 0x4(%sp),%d0
279
rtd &0x4
280
281
global _dmem_write_long
282
_dmem_write_long:
283
mov.l %d0,-(%sp)
284
mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285
pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
286
mov.l 0x4(%sp),%d0
287
rtd &0x4
288
289
#
290
# This file contains a set of define statements for constants
291
# in order to promote readability within the corecode itself.
292
#
293
294
set LOCAL_SIZE, 192 # stack frame size(bytes)
295
set LV, -LOCAL_SIZE # stack offset
296
297
set EXC_SR, 0x4 # stack status register
298
set EXC_PC, 0x6 # stack pc
299
set EXC_VOFF, 0xa # stacked vector offset
300
set EXC_EA, 0xc # stacked <ea>
301
302
set EXC_FP, 0x0 # frame pointer
303
304
set EXC_AREGS, -68 # offset of all address regs
305
set EXC_DREGS, -100 # offset of all data regs
306
set EXC_FPREGS, -36 # offset of all fp regs
307
308
set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
309
set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
310
set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
311
set EXC_A5, EXC_AREGS+(5*4)
312
set EXC_A4, EXC_AREGS+(4*4)
313
set EXC_A3, EXC_AREGS+(3*4)
314
set EXC_A2, EXC_AREGS+(2*4)
315
set EXC_A1, EXC_AREGS+(1*4)
316
set EXC_A0, EXC_AREGS+(0*4)
317
set EXC_D7, EXC_DREGS+(7*4)
318
set EXC_D6, EXC_DREGS+(6*4)
319
set EXC_D5, EXC_DREGS+(5*4)
320
set EXC_D4, EXC_DREGS+(4*4)
321
set EXC_D3, EXC_DREGS+(3*4)
322
set EXC_D2, EXC_DREGS+(2*4)
323
set EXC_D1, EXC_DREGS+(1*4)
324
set EXC_D0, EXC_DREGS+(0*4)
325
326
set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
327
set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
328
set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
329
330
set FP_SCR1, LV+80 # fp scratch 1
331
set FP_SCR1_EX, FP_SCR1+0
332
set FP_SCR1_SGN, FP_SCR1+2
333
set FP_SCR1_HI, FP_SCR1+4
334
set FP_SCR1_LO, FP_SCR1+8
335
336
set FP_SCR0, LV+68 # fp scratch 0
337
set FP_SCR0_EX, FP_SCR0+0
338
set FP_SCR0_SGN, FP_SCR0+2
339
set FP_SCR0_HI, FP_SCR0+4
340
set FP_SCR0_LO, FP_SCR0+8
341
342
set FP_DST, LV+56 # fp destination operand
343
set FP_DST_EX, FP_DST+0
344
set FP_DST_SGN, FP_DST+2
345
set FP_DST_HI, FP_DST+4
346
set FP_DST_LO, FP_DST+8
347
348
set FP_SRC, LV+44 # fp source operand
349
set FP_SRC_EX, FP_SRC+0
350
set FP_SRC_SGN, FP_SRC+2
351
set FP_SRC_HI, FP_SRC+4
352
set FP_SRC_LO, FP_SRC+8
353
354
set USER_FPIAR, LV+40 # FP instr address register
355
356
set USER_FPSR, LV+36 # FP status register
357
set FPSR_CC, USER_FPSR+0 # FPSR condition codes
358
set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
359
set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
360
set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
361
362
set USER_FPCR, LV+32 # FP control register
363
set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
364
set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
365
366
set L_SCR3, LV+28 # integer scratch 3
367
set L_SCR2, LV+24 # integer scratch 2
368
set L_SCR1, LV+20 # integer scratch 1
369
370
set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
371
372
set EXC_TEMP2, LV+24 # temporary space
373
set EXC_TEMP, LV+16 # temporary space
374
375
set DTAG, LV+15 # destination operand type
376
set STAG, LV+14 # source operand type
377
378
set SPCOND_FLG, LV+10 # flag: special case (see below)
379
380
set EXC_CC, LV+8 # saved condition codes
381
set EXC_EXTWPTR, LV+4 # saved current PC (active)
382
set EXC_EXTWORD, LV+2 # saved extension word
383
set EXC_CMDREG, LV+2 # saved extension word
384
set EXC_OPWORD, LV+0 # saved operation word
385
386
################################
387
388
# Helpful macros
389
390
set FTEMP, 0 # offsets within an
391
set FTEMP_EX, 0 # extended precision
392
set FTEMP_SGN, 2 # value saved in memory.
393
set FTEMP_HI, 4
394
set FTEMP_LO, 8
395
set FTEMP_GRS, 12
396
397
set LOCAL, 0 # offsets within an
398
set LOCAL_EX, 0 # extended precision
399
set LOCAL_SGN, 2 # value saved in memory.
400
set LOCAL_HI, 4
401
set LOCAL_LO, 8
402
set LOCAL_GRS, 12
403
404
set DST, 0 # offsets within an
405
set DST_EX, 0 # extended precision
406
set DST_HI, 4 # value saved in memory.
407
set DST_LO, 8
408
409
set SRC, 0 # offsets within an
410
set SRC_EX, 0 # extended precision
411
set SRC_HI, 4 # value saved in memory.
412
set SRC_LO, 8
413
414
set SGL_LO, 0x3f81 # min sgl prec exponent
415
set SGL_HI, 0x407e # max sgl prec exponent
416
set DBL_LO, 0x3c01 # min dbl prec exponent
417
set DBL_HI, 0x43fe # max dbl prec exponent
418
set EXT_LO, 0x0 # min ext prec exponent
419
set EXT_HI, 0x7ffe # max ext prec exponent
420
421
set EXT_BIAS, 0x3fff # extended precision bias
422
set SGL_BIAS, 0x007f # single precision bias
423
set DBL_BIAS, 0x03ff # double precision bias
424
425
set NORM, 0x00 # operand type for STAG/DTAG
426
set ZERO, 0x01 # operand type for STAG/DTAG
427
set INF, 0x02 # operand type for STAG/DTAG
428
set QNAN, 0x03 # operand type for STAG/DTAG
429
set DENORM, 0x04 # operand type for STAG/DTAG
430
set SNAN, 0x05 # operand type for STAG/DTAG
431
set UNNORM, 0x06 # operand type for STAG/DTAG
432
433
##################
434
# FPSR/FPCR bits #
435
##################
436
set neg_bit, 0x3 # negative result
437
set z_bit, 0x2 # zero result
438
set inf_bit, 0x1 # infinite result
439
set nan_bit, 0x0 # NAN result
440
441
set q_sn_bit, 0x7 # sign bit of quotient byte
442
443
set bsun_bit, 7 # branch on unordered
444
set snan_bit, 6 # signalling NAN
445
set operr_bit, 5 # operand error
446
set ovfl_bit, 4 # overflow
447
set unfl_bit, 3 # underflow
448
set dz_bit, 2 # divide by zero
449
set inex2_bit, 1 # inexact result 2
450
set inex1_bit, 0 # inexact result 1
451
452
set aiop_bit, 7 # accrued inexact operation bit
453
set aovfl_bit, 6 # accrued overflow bit
454
set aunfl_bit, 5 # accrued underflow bit
455
set adz_bit, 4 # accrued dz bit
456
set ainex_bit, 3 # accrued inexact bit
457
458
#############################
459
# FPSR individual bit masks #
460
#############################
461
set neg_mask, 0x08000000 # negative bit mask (lw)
462
set inf_mask, 0x02000000 # infinity bit mask (lw)
463
set z_mask, 0x04000000 # zero bit mask (lw)
464
set nan_mask, 0x01000000 # nan bit mask (lw)
465
466
set neg_bmask, 0x08 # negative bit mask (byte)
467
set inf_bmask, 0x02 # infinity bit mask (byte)
468
set z_bmask, 0x04 # zero bit mask (byte)
469
set nan_bmask, 0x01 # nan bit mask (byte)
470
471
set bsun_mask, 0x00008000 # bsun exception mask
472
set snan_mask, 0x00004000 # snan exception mask
473
set operr_mask, 0x00002000 # operr exception mask
474
set ovfl_mask, 0x00001000 # overflow exception mask
475
set unfl_mask, 0x00000800 # underflow exception mask
476
set dz_mask, 0x00000400 # dz exception mask
477
set inex2_mask, 0x00000200 # inex2 exception mask
478
set inex1_mask, 0x00000100 # inex1 exception mask
479
480
set aiop_mask, 0x00000080 # accrued illegal operation
481
set aovfl_mask, 0x00000040 # accrued overflow
482
set aunfl_mask, 0x00000020 # accrued underflow
483
set adz_mask, 0x00000010 # accrued divide by zero
484
set ainex_mask, 0x00000008 # accrued inexact
485
486
######################################
487
# FPSR combinations used in the FPSP #
488
######################################
489
set dzinf_mask, inf_mask+dz_mask+adz_mask
490
set opnan_mask, nan_mask+operr_mask+aiop_mask
491
set nzi_mask, 0x01ffffff #clears N, Z, and I
492
set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493
set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
494
set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495
set inx1a_mask, inex1_mask+ainex_mask
496
set inx2a_mask, inex2_mask+ainex_mask
497
set snaniop_mask, nan_mask+snan_mask+aiop_mask
498
set snaniop2_mask, snan_mask+aiop_mask
499
set naniop_mask, nan_mask+aiop_mask
500
set neginf_mask, neg_mask+inf_mask
501
set infaiop_mask, inf_mask+aiop_mask
502
set negz_mask, neg_mask+z_mask
503
set opaop_mask, operr_mask+aiop_mask
504
set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
505
set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
506
507
#########
508
# misc. #
509
#########
510
set rnd_stky_bit, 29 # stky bit pos in longword
511
512
set sign_bit, 0x7 # sign bit
513
set signan_bit, 0x6 # signalling nan bit
514
515
set sgl_thresh, 0x3f81 # minimum sgl exponent
516
set dbl_thresh, 0x3c01 # minimum dbl exponent
517
518
set x_mode, 0x0 # extended precision
519
set s_mode, 0x4 # single precision
520
set d_mode, 0x8 # double precision
521
522
set rn_mode, 0x0 # round-to-nearest
523
set rz_mode, 0x1 # round-to-zero
524
set rm_mode, 0x2 # round-tp-minus-infinity
525
set rp_mode, 0x3 # round-to-plus-infinity
526
527
set mantissalen, 64 # length of mantissa in bits
528
529
set BYTE, 1 # len(byte) == 1 byte
530
set WORD, 2 # len(word) == 2 bytes
531
set LONG, 4 # len(longword) == 2 bytes
532
533
set BSUN_VEC, 0xc0 # bsun vector offset
534
set INEX_VEC, 0xc4 # inexact vector offset
535
set DZ_VEC, 0xc8 # dz vector offset
536
set UNFL_VEC, 0xcc # unfl vector offset
537
set OPERR_VEC, 0xd0 # operr vector offset
538
set OVFL_VEC, 0xd4 # ovfl vector offset
539
set SNAN_VEC, 0xd8 # snan vector offset
540
541
###########################
542
# SPecial CONDition FLaGs #
543
###########################
544
set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
545
set fbsun_flg, 0x02 # flag bit: bsun exception
546
set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
547
set mda7_flg, 0x08 # flag bit: -(a7) <ea>
548
set fmovm_flg, 0x40 # flag bit: fmovm instruction
549
set immed_flg, 0x80 # flag bit: &<data> <ea>
550
551
set ftrapcc_bit, 0x0
552
set fbsun_bit, 0x1
553
set mia7_bit, 0x2
554
set mda7_bit, 0x3
555
set immed_bit, 0x7
556
557
##################################
558
# TRANSCENDENTAL "LAST-OP" FLAGS #
559
##################################
560
set FMUL_OP, 0x0 # fmul instr performed last
561
set FDIV_OP, 0x1 # fdiv performed last
562
set FADD_OP, 0x2 # fadd performed last
563
set FMOV_OP, 0x3 # fmov performed last
564
565
#############
566
# CONSTANTS #
567
#############
568
T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
569
T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
570
571
PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572
PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573
574
TWOBYPI:
575
long 0x3FE45F30,0x6DC9C883
576
577
#########################################################################
578
# XDEF **************************************************************** #
579
# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
580
# #
581
# This handler should be the first code executed upon taking the #
582
# FP Overflow exception in an operating system. #
583
# #
584
# XREF **************************************************************** #
585
# _imem_read_long() - read instruction longword #
586
# fix_skewed_ops() - adjust src operand in fsave frame #
587
# set_tag_x() - determine optype of src/dst operands #
588
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
589
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
590
# load_fpn2() - load dst operand from FP regfile #
591
# fout() - emulate an opclass 3 instruction #
592
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
593
# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
594
# _real_ovfl() - "callout" for Overflow exception enabled code #
595
# _real_inex() - "callout" for Inexact exception enabled code #
596
# _real_trace() - "callout" for Trace exception code #
597
# #
598
# INPUT *************************************************************** #
599
# - The system stack contains the FP Ovfl exception stack frame #
600
# - The fsave frame contains the source operand #
601
# #
602
# OUTPUT ************************************************************** #
603
# Overflow Exception enabled: #
604
# - The system stack is unchanged #
605
# - The fsave frame contains the adjusted src op for opclass 0,2 #
606
# Overflow Exception disabled: #
607
# - The system stack is unchanged #
608
# - The "exception present" flag in the fsave frame is cleared #
609
# #
610
# ALGORITHM *********************************************************** #
611
# On the 060, if an FP overflow is present as the result of any #
612
# instruction, the 060 will take an overflow exception whether the #
613
# exception is enabled or disabled in the FPCR. For the disabled case, #
614
# This handler emulates the instruction to determine what the correct #
615
# default result should be for the operation. This default result is #
616
# then stored in either the FP regfile, data regfile, or memory. #
617
# Finally, the handler exits through the "callout" _fpsp_done() #
618
# denoting that no exceptional conditions exist within the machine. #
619
# If the exception is enabled, then this handler must create the #
620
# exceptional operand and plave it in the fsave state frame, and store #
621
# the default result (only if the instruction is opclass 3). For #
622
# exceptions enabled, this handler must exit through the "callout" #
623
# _real_ovfl() so that the operating system enabled overflow handler #
624
# can handle this case. #
625
# Two other conditions exist. First, if overflow was disabled #
626
# but the inexact exception was enabled, this handler must exit #
627
# through the "callout" _real_inex() regardless of whether the result #
628
# was inexact. #
629
# Also, in the case of an opclass three instruction where #
630
# overflow was disabled and the trace exception was enabled, this #
631
# handler must exit through the "callout" _real_trace(). #
632
# #
633
#########################################################################
634
635
global _fpsp_ovfl
636
_fpsp_ovfl:
637
638
#$# sub.l &24,%sp # make room for src/dst
639
640
link.w %a6,&-LOCAL_SIZE # init stack frame
641
642
fsave FP_SRC(%a6) # grab the "busy" frame
643
644
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
645
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
647
648
# the FPIAR holds the "current PC" of the faulting instruction
649
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
651
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
652
bsr.l _imem_read_long # fetch the instruction words
653
mov.l %d0,EXC_OPWORD(%a6)
654
655
##############################################################################
656
657
btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
658
bne.w fovfl_out
659
660
661
lea FP_SRC(%a6),%a0 # pass: ptr to src op
662
bsr.l fix_skewed_ops # fix src op
663
664
# since, I believe, only NORMs and DENORMs can come through here,
665
# maybe we can avoid the subroutine call.
666
lea FP_SRC(%a6),%a0 # pass: ptr to src op
667
bsr.l set_tag_x # tag the operand type
668
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
669
670
# bit five of the fp extension word separates the monadic and dyadic operations
671
# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672
# will never take this exception.
673
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
674
beq.b fovfl_extract # monadic
675
676
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677
bsr.l load_fpn2 # load dst into FP_DST
678
679
lea FP_DST(%a6),%a0 # pass: ptr to dst op
680
bsr.l set_tag_x # tag the operand type
681
cmpi.b %d0,&UNNORM # is operand an UNNORM?
682
bne.b fovfl_op2_done # no
683
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
684
fovfl_op2_done:
685
mov.b %d0,DTAG(%a6) # save dst optype tag
686
687
fovfl_extract:
688
689
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692
#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693
#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694
#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695
696
clr.l %d0
697
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
698
699
mov.b 1+EXC_CMDREG(%a6),%d1
700
andi.w &0x007f,%d1 # extract extension
701
702
andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703
704
fmov.l &0x0,%fpcr # zero current control regs
705
fmov.l &0x0,%fpsr
706
707
lea FP_SRC(%a6),%a0
708
lea FP_DST(%a6),%a1
709
710
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
711
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712
jsr (tbl_unsupp.l,%pc,%d1.l*1)
713
714
# the operation has been emulated. the result is in fp0.
715
# the EXOP, if an exception occurred, is in fp1.
716
# we must save the default result regardless of whether
717
# traps are enabled or disabled.
718
bfextu EXC_CMDREG(%a6){&6:&3},%d0
719
bsr.l store_fpreg
720
721
# the exceptional possibilities we have left ourselves with are ONLY overflow
722
# and inexact. and, the inexact is such that overflow occurred and was disabled
723
# but inexact was enabled.
724
btst &ovfl_bit,FPCR_ENABLE(%a6)
725
bne.b fovfl_ovfl_on
726
727
btst &inex2_bit,FPCR_ENABLE(%a6)
728
bne.b fovfl_inex_on
729
730
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
731
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
733
734
unlk %a6
735
#$# add.l &24,%sp
736
bra.l _fpsp_done
737
738
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739
# in fp1. now, simply jump to _real_ovfl()!
740
fovfl_ovfl_on:
741
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
742
743
mov.w &0xe005,2+FP_SRC(%a6) # save exc status
744
745
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
746
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
748
749
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
750
751
unlk %a6
752
753
bra.l _real_ovfl
754
755
# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756
# we must jump to real_inex().
757
fovfl_inex_on:
758
759
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
760
761
mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
762
mov.w &0xe001,2+FP_SRC(%a6) # save exc status
763
764
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
765
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
767
768
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
769
770
unlk %a6
771
772
bra.l _real_inex
773
774
########################################################################
775
fovfl_out:
776
777
778
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781
782
# the src operand is definitely a NORM(!), so tag it as such
783
mov.b &NORM,STAG(%a6) # set src optype tag
784
785
clr.l %d0
786
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
787
788
and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789
790
fmov.l &0x0,%fpcr # zero current control regs
791
fmov.l &0x0,%fpsr
792
793
lea FP_SRC(%a6),%a0 # pass ptr to src operand
794
795
bsr.l fout
796
797
btst &ovfl_bit,FPCR_ENABLE(%a6)
798
bne.w fovfl_ovfl_on
799
800
btst &inex2_bit,FPCR_ENABLE(%a6)
801
bne.w fovfl_inex_on
802
803
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
804
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
806
807
unlk %a6
808
#$# add.l &24,%sp
809
810
btst &0x7,(%sp) # is trace on?
811
beq.l _fpsp_done # no
812
813
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
814
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
815
bra.l _real_trace
816
817
#########################################################################
818
# XDEF **************************************************************** #
819
# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
820
# #
821
# This handler should be the first code executed upon taking the #
822
# FP Underflow exception in an operating system. #
823
# #
824
# XREF **************************************************************** #
825
# _imem_read_long() - read instruction longword #
826
# fix_skewed_ops() - adjust src operand in fsave frame #
827
# set_tag_x() - determine optype of src/dst operands #
828
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
829
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
830
# load_fpn2() - load dst operand from FP regfile #
831
# fout() - emulate an opclass 3 instruction #
832
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
833
# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
834
# _real_ovfl() - "callout" for Overflow exception enabled code #
835
# _real_inex() - "callout" for Inexact exception enabled code #
836
# _real_trace() - "callout" for Trace exception code #
837
# #
838
# INPUT *************************************************************** #
839
# - The system stack contains the FP Unfl exception stack frame #
840
# - The fsave frame contains the source operand #
841
# #
842
# OUTPUT ************************************************************** #
843
# Underflow Exception enabled: #
844
# - The system stack is unchanged #
845
# - The fsave frame contains the adjusted src op for opclass 0,2 #
846
# Underflow Exception disabled: #
847
# - The system stack is unchanged #
848
# - The "exception present" flag in the fsave frame is cleared #
849
# #
850
# ALGORITHM *********************************************************** #
851
# On the 060, if an FP underflow is present as the result of any #
852
# instruction, the 060 will take an underflow exception whether the #
853
# exception is enabled or disabled in the FPCR. For the disabled case, #
854
# This handler emulates the instruction to determine what the correct #
855
# default result should be for the operation. This default result is #
856
# then stored in either the FP regfile, data regfile, or memory. #
857
# Finally, the handler exits through the "callout" _fpsp_done() #
858
# denoting that no exceptional conditions exist within the machine. #
859
# If the exception is enabled, then this handler must create the #
860
# exceptional operand and plave it in the fsave state frame, and store #
861
# the default result (only if the instruction is opclass 3). For #
862
# exceptions enabled, this handler must exit through the "callout" #
863
# _real_unfl() so that the operating system enabled overflow handler #
864
# can handle this case. #
865
# Two other conditions exist. First, if underflow was disabled #
866
# but the inexact exception was enabled and the result was inexact, #
867
# this handler must exit through the "callout" _real_inex(). #
868
# was inexact. #
869
# Also, in the case of an opclass three instruction where #
870
# underflow was disabled and the trace exception was enabled, this #
871
# handler must exit through the "callout" _real_trace(). #
872
# #
873
#########################################################################
874
875
global _fpsp_unfl
876
_fpsp_unfl:
877
878
#$# sub.l &24,%sp # make room for src/dst
879
880
link.w %a6,&-LOCAL_SIZE # init stack frame
881
882
fsave FP_SRC(%a6) # grab the "busy" frame
883
884
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
885
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
887
888
# the FPIAR holds the "current PC" of the faulting instruction
889
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
891
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
892
bsr.l _imem_read_long # fetch the instruction words
893
mov.l %d0,EXC_OPWORD(%a6)
894
895
##############################################################################
896
897
btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
898
bne.w funfl_out
899
900
901
lea FP_SRC(%a6),%a0 # pass: ptr to src op
902
bsr.l fix_skewed_ops # fix src op
903
904
lea FP_SRC(%a6),%a0 # pass: ptr to src op
905
bsr.l set_tag_x # tag the operand type
906
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
907
908
# bit five of the fp ext word separates the monadic and dyadic operations
909
# that can pass through fpsp_unfl(). remember that fcmp, and ftst
910
# will never take this exception.
911
btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
912
beq.b funfl_extract # monadic
913
914
# now, what's left that's not dyadic is fsincos. we can distinguish it
915
# from all dyadics by the '0110xxx pattern
916
btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
917
bne.b funfl_extract # yes
918
919
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920
bsr.l load_fpn2 # load dst into FP_DST
921
922
lea FP_DST(%a6),%a0 # pass: ptr to dst op
923
bsr.l set_tag_x # tag the operand type
924
cmpi.b %d0,&UNNORM # is operand an UNNORM?
925
bne.b funfl_op2_done # no
926
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
927
funfl_op2_done:
928
mov.b %d0,DTAG(%a6) # save dst optype tag
929
930
funfl_extract:
931
932
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935
#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936
#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937
#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938
939
clr.l %d0
940
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
941
942
mov.b 1+EXC_CMDREG(%a6),%d1
943
andi.w &0x007f,%d1 # extract extension
944
945
andi.l &0x00ff01ff,USER_FPSR(%a6)
946
947
fmov.l &0x0,%fpcr # zero current control regs
948
fmov.l &0x0,%fpsr
949
950
lea FP_SRC(%a6),%a0
951
lea FP_DST(%a6),%a1
952
953
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
954
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955
jsr (tbl_unsupp.l,%pc,%d1.l*1)
956
957
bfextu EXC_CMDREG(%a6){&6:&3},%d0
958
bsr.l store_fpreg
959
960
# The `060 FPU multiplier hardware is such that if the result of a
961
# multiply operation is the smallest possible normalized number
962
# (0x00000000_80000000_00000000), then the machine will take an
963
# underflow exception. Since this is incorrect, we need to check
964
# if our emulation, after re-doing the operation, decided that
965
# no underflow was called for. We do these checks only in
966
# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967
# special case will simply exit gracefully with the correct result.
968
969
# the exceptional possibilities we have left ourselves with are ONLY overflow
970
# and inexact. and, the inexact is such that overflow occurred and was disabled
971
# but inexact was enabled.
972
btst &unfl_bit,FPCR_ENABLE(%a6)
973
bne.b funfl_unfl_on
974
975
funfl_chkinex:
976
btst &inex2_bit,FPCR_ENABLE(%a6)
977
bne.b funfl_inex_on
978
979
funfl_exit:
980
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
981
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
983
984
unlk %a6
985
#$# add.l &24,%sp
986
bra.l _fpsp_done
987
988
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989
# in fp1 (don't forget to save fp0). what to do now?
990
# well, we simply have to get to go to _real_unfl()!
991
funfl_unfl_on:
992
993
# The `060 FPU multiplier hardware is such that if the result of a
994
# multiply operation is the smallest possible normalized number
995
# (0x00000000_80000000_00000000), then the machine will take an
996
# underflow exception. Since this is incorrect, we check here to see
997
# if our emulation, after re-doing the operation, decided that
998
# no underflow was called for.
999
btst &unfl_bit,FPSR_EXCEPT(%a6)
1000
beq.w funfl_chkinex
1001
1002
funfl_unfl_on2:
1003
fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
1004
1005
mov.w &0xe003,2+FP_SRC(%a6) # save exc status
1006
1007
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1008
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1010
1011
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1012
1013
unlk %a6
1014
1015
bra.l _real_unfl
1016
1017
# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018
# we must jump to real_inex().
1019
funfl_inex_on:
1020
1021
# The `060 FPU multiplier hardware is such that if the result of a
1022
# multiply operation is the smallest possible normalized number
1023
# (0x00000000_80000000_00000000), then the machine will take an
1024
# underflow exception.
1025
# But, whether bogus or not, if inexact is enabled AND it occurred,
1026
# then we have to branch to real_inex.
1027
1028
btst &inex2_bit,FPSR_EXCEPT(%a6)
1029
beq.w funfl_exit
1030
1031
funfl_inex_on2:
1032
1033
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
1034
1035
mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
1036
mov.w &0xe001,2+FP_SRC(%a6) # save exc status
1037
1038
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1039
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1041
1042
frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
1043
1044
unlk %a6
1045
1046
bra.l _real_inex
1047
1048
#######################################################################
1049
funfl_out:
1050
1051
1052
#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053
#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054
#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055
1056
# the src operand is definitely a NORM(!), so tag it as such
1057
mov.b &NORM,STAG(%a6) # set src optype tag
1058
1059
clr.l %d0
1060
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
1061
1062
and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063
1064
fmov.l &0x0,%fpcr # zero current control regs
1065
fmov.l &0x0,%fpsr
1066
1067
lea FP_SRC(%a6),%a0 # pass ptr to src operand
1068
1069
bsr.l fout
1070
1071
btst &unfl_bit,FPCR_ENABLE(%a6)
1072
bne.w funfl_unfl_on2
1073
1074
btst &inex2_bit,FPCR_ENABLE(%a6)
1075
bne.w funfl_inex_on2
1076
1077
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
1078
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1080
1081
unlk %a6
1082
#$# add.l &24,%sp
1083
1084
btst &0x7,(%sp) # is trace on?
1085
beq.l _fpsp_done # no
1086
1087
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
1088
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
1089
bra.l _real_trace
1090
1091
#########################################################################
1092
# XDEF **************************************************************** #
1093
# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
1094
# Data Type" exception. #
1095
# #
1096
# This handler should be the first code executed upon taking the #
1097
# FP Unimplemented Data Type exception in an operating system. #
1098
# #
1099
# XREF **************************************************************** #
1100
# _imem_read_{word,long}() - read instruction word/longword #
1101
# fix_skewed_ops() - adjust src operand in fsave frame #
1102
# set_tag_x() - determine optype of src/dst operands #
1103
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
1104
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
1105
# load_fpn2() - load dst operand from FP regfile #
1106
# load_fpn1() - load src operand from FP regfile #
1107
# fout() - emulate an opclass 3 instruction #
1108
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1109
# _real_inex() - "callout" to operating system inexact handler #
1110
# _fpsp_done() - "callout" for exit; work all done #
1111
# _real_trace() - "callout" for Trace enabled exception #
1112
# funimp_skew() - adjust fsave src ops to "incorrect" value #
1113
# _real_snan() - "callout" for SNAN exception #
1114
# _real_operr() - "callout" for OPERR exception #
1115
# _real_ovfl() - "callout" for OVFL exception #
1116
# _real_unfl() - "callout" for UNFL exception #
1117
# get_packed() - fetch packed operand from memory #
1118
# #
1119
# INPUT *************************************************************** #
1120
# - The system stack contains the "Unimp Data Type" stk frame #
1121
# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
1122
# #
1123
# OUTPUT ************************************************************** #
1124
# If Inexact exception (opclass 3): #
1125
# - The system stack is changed to an Inexact exception stk frame #
1126
# If SNAN exception (opclass 3): #
1127
# - The system stack is changed to an SNAN exception stk frame #
1128
# If OPERR exception (opclass 3): #
1129
# - The system stack is changed to an OPERR exception stk frame #
1130
# If OVFL exception (opclass 3): #
1131
# - The system stack is changed to an OVFL exception stk frame #
1132
# If UNFL exception (opclass 3): #
1133
# - The system stack is changed to an UNFL exception stack frame #
1134
# If Trace exception enabled: #
1135
# - The system stack is changed to a Trace exception stack frame #
1136
# Else: (normal case) #
1137
# - Correct result has been stored as appropriate #
1138
# #
1139
# ALGORITHM *********************************************************** #
1140
# Two main instruction types can enter here: (1) DENORM or UNNORM #
1141
# unimplemented data types. These can be either opclass 0,2 or 3 #
1142
# instructions, and (2) PACKED unimplemented data format instructions #
1143
# also of opclasses 0,2, or 3. #
1144
# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
1145
# operand from the fsave state frame and the dst operand (if dyadic) #
1146
# from the FP register file. The instruction is then emulated by #
1147
# choosing an emulation routine from a table of routines indexed by #
1148
# instruction type. Once the instruction has been emulated and result #
1149
# saved, then we check to see if any enabled exceptions resulted from #
1150
# instruction emulation. If none, then we exit through the "callout" #
1151
# _fpsp_done(). If there is an enabled FP exception, then we insert #
1152
# this exception into the FPU in the fsave state frame and then exit #
1153
# through _fpsp_done(). #
1154
# PACKED opclass 0 and 2 is similar in how the instruction is #
1155
# emulated and exceptions handled. The differences occur in how the #
1156
# handler loads the packed op (by calling get_packed() routine) and #
1157
# by the fact that a Trace exception could be pending for PACKED ops. #
1158
# If a Trace exception is pending, then the current exception stack #
1159
# frame is changed to a Trace exception stack frame and an exit is #
1160
# made through _real_trace(). #
1161
# For UNNORM/DENORM opclass 3, the actual move out to memory is #
1162
# performed by calling the routine fout(). If no exception should occur #
1163
# as the result of emulation, then an exit either occurs through #
1164
# _fpsp_done() or through _real_trace() if a Trace exception is pending #
1165
# (a Trace stack frame must be created here, too). If an FP exception #
1166
# should occur, then we must create an exception stack frame of that #
1167
# type and jump to either _real_snan(), _real_operr(), _real_inex(), #
1168
# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
1169
# emulation is performed in a similar manner. #
1170
# #
1171
#########################################################################
1172
1173
#
1174
# (1) DENORM and UNNORM (unimplemented) data types:
1175
#
1176
# post-instruction
1177
# *****************
1178
# * EA *
1179
# pre-instruction * *
1180
# ***************** *****************
1181
# * 0x0 * 0x0dc * * 0x3 * 0x0dc *
1182
# ***************** *****************
1183
# * Next * * Next *
1184
# * PC * * PC *
1185
# ***************** *****************
1186
# * SR * * SR *
1187
# ***************** *****************
1188
#
1189
# (2) PACKED format (unsupported) opclasses two and three:
1190
# *****************
1191
# * EA *
1192
# * *
1193
# *****************
1194
# * 0x2 * 0x0dc *
1195
# *****************
1196
# * Next *
1197
# * PC *
1198
# *****************
1199
# * SR *
1200
# *****************
1201
#
1202
global _fpsp_unsupp
1203
_fpsp_unsupp:
1204
1205
link.w %a6,&-LOCAL_SIZE # init stack frame
1206
1207
fsave FP_SRC(%a6) # save fp state
1208
1209
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
1210
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
1212
1213
btst &0x5,EXC_SR(%a6) # user or supervisor mode?
1214
bne.b fu_s
1215
fu_u:
1216
mov.l %usp,%a0 # fetch user stack pointer
1217
mov.l %a0,EXC_A7(%a6) # save on stack
1218
bra.b fu_cont
1219
# if the exception is an opclass zero or two unimplemented data type
1220
# exception, then the a7' calculated here is wrong since it doesn't
1221
# stack an ea. however, we don't need an a7' for this case anyways.
1222
fu_s:
1223
lea 0x4+EXC_EA(%a6),%a0 # load old a7'
1224
mov.l %a0,EXC_A7(%a6) # save on stack
1225
1226
fu_cont:
1227
1228
# the FPIAR holds the "current PC" of the faulting instruction
1229
# the FPIAR should be set correctly for ALL exceptions passing through
1230
# this point.
1231
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
1233
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
1234
bsr.l _imem_read_long # fetch the instruction words
1235
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
1236
1237
############################
1238
1239
clr.b SPCOND_FLG(%a6) # clear special condition flag
1240
1241
# Separate opclass three (fpn-to-mem) ops since they have a different
1242
# stack frame and protocol.
1243
btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
1244
bne.w fu_out # yes
1245
1246
# Separate packed opclass two instructions.
1247
bfextu EXC_CMDREG(%a6){&0:&6},%d0
1248
cmpi.b %d0,&0x13
1249
beq.w fu_in_pack
1250
1251
1252
# I'm not sure at this point what FPSR bits are valid for this instruction.
1253
# so, since the emulation routines re-create them anyways, zero exception field
1254
andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255
1256
fmov.l &0x0,%fpcr # zero current control regs
1257
fmov.l &0x0,%fpsr
1258
1259
# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260
# precision format if the src format was single or double and the
1261
# source data type was an INF, NAN, DENORM, or UNNORM
1262
lea FP_SRC(%a6),%a0 # pass ptr to input
1263
bsr.l fix_skewed_ops
1264
1265
# we don't know whether the src operand or the dst operand (or both) is the
1266
# UNNORM or DENORM. call the function that tags the operand type. if the
1267
# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268
lea FP_SRC(%a6),%a0 # pass: ptr to src op
1269
bsr.l set_tag_x # tag the operand type
1270
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1271
bne.b fu_op2 # no
1272
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1273
1274
fu_op2:
1275
mov.b %d0,STAG(%a6) # save src optype tag
1276
1277
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278
1279
# bit five of the fp extension word separates the monadic and dyadic operations
1280
# at this point
1281
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1282
beq.b fu_extract # monadic
1283
cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1284
beq.b fu_extract # yes, so it's monadic, too
1285
1286
bsr.l load_fpn2 # load dst into FP_DST
1287
1288
lea FP_DST(%a6),%a0 # pass: ptr to dst op
1289
bsr.l set_tag_x # tag the operand type
1290
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1291
bne.b fu_op2_done # no
1292
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1293
fu_op2_done:
1294
mov.b %d0,DTAG(%a6) # save dst optype tag
1295
1296
fu_extract:
1297
clr.l %d0
1298
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1299
1300
bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301
1302
lea FP_SRC(%a6),%a0
1303
lea FP_DST(%a6),%a1
1304
1305
mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306
jsr (tbl_unsupp.l,%pc,%d1.l*1)
1307
1308
#
1309
# Exceptions in order of precedence:
1310
# BSUN : none
1311
# SNAN : all dyadic ops
1312
# OPERR : fsqrt(-NORM)
1313
# OVFL : all except ftst,fcmp
1314
# UNFL : all except ftst,fcmp
1315
# DZ : fdiv
1316
# INEX2 : all except ftst,fcmp
1317
# INEX1 : none (packed doesn't go through here)
1318
#
1319
1320
# we determine the highest priority exception(if any) set by the
1321
# emulation routine that has also been enabled by the user.
1322
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
1323
bne.b fu_in_ena # some are enabled
1324
1325
fu_in_cont:
1326
# fcmp and ftst do not store any result.
1327
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1328
andi.b &0x38,%d0 # extract bits 3-5
1329
cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1330
beq.b fu_in_exit # yes
1331
1332
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333
bsr.l store_fpreg # store the result
1334
1335
fu_in_exit:
1336
1337
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1338
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1340
1341
unlk %a6
1342
1343
bra.l _fpsp_done
1344
1345
fu_in_ena:
1346
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1347
bfffo %d0{&24:&8},%d0 # find highest priority exception
1348
bne.b fu_in_exc # there is at least one set
1349
1350
#
1351
# No exceptions occurred that were also enabled. Now:
1352
#
1353
# if (OVFL && ovfl_disabled && inexact_enabled) {
1354
# branch to _real_inex() (even if the result was exact!);
1355
# } else {
1356
# save the result in the proper fp reg (unless the op is fcmp or ftst);
1357
# return;
1358
# }
1359
#
1360
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361
beq.b fu_in_cont # no
1362
1363
fu_in_ovflchk:
1364
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365
beq.b fu_in_cont # no
1366
bra.w fu_in_exc_ovfl # go insert overflow frame
1367
1368
#
1369
# An exception occurred and that exception was enabled:
1370
#
1371
# shift enabled exception field into lo byte of d0;
1372
# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373
# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374
# /*
1375
# * this is the case where we must call _real_inex() now or else
1376
# * there will be no other way to pass it the exceptional operand
1377
# */
1378
# call _real_inex();
1379
# } else {
1380
# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381
# }
1382
#
1383
fu_in_exc:
1384
subi.l &24,%d0 # fix offset to be 0-8
1385
cmpi.b %d0,&0x6 # is exception INEX? (6)
1386
bne.b fu_in_exc_exit # no
1387
1388
# the enabled exception was inexact
1389
btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390
bne.w fu_in_exc_unfl # yes
1391
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392
bne.w fu_in_exc_ovfl # yes
1393
1394
# here, we insert the correct fsave status value into the fsave frame for the
1395
# corresponding exception. the operand in the fsave frame should be the original
1396
# src operand.
1397
fu_in_exc_exit:
1398
mov.l %d0,-(%sp) # save d0
1399
bsr.l funimp_skew # skew sgl or dbl inputs
1400
mov.l (%sp)+,%d0 # restore d0
1401
1402
mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403
1404
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1405
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1407
1408
frestore FP_SRC(%a6) # restore src op
1409
1410
unlk %a6
1411
1412
bra.l _fpsp_done
1413
1414
tbl_except:
1415
short 0xe000,0xe006,0xe004,0xe005
1416
short 0xe003,0xe002,0xe001,0xe001
1417
1418
fu_in_exc_unfl:
1419
mov.w &0x4,%d0
1420
bra.b fu_in_exc_exit
1421
fu_in_exc_ovfl:
1422
mov.w &0x03,%d0
1423
bra.b fu_in_exc_exit
1424
1425
# If the input operand to this operation was opclass two and a single
1426
# or double precision denorm, inf, or nan, the operand needs to be
1427
# "corrected" in order to have the proper equivalent extended precision
1428
# number.
1429
global fix_skewed_ops
1430
fix_skewed_ops:
1431
bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432
cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
1433
beq.b fso_sgl # yes
1434
cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
1435
beq.b fso_dbl # yes
1436
rts # no
1437
1438
fso_sgl:
1439
mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1440
andi.w &0x7fff,%d0 # strip sign
1441
cmpi.w %d0,&0x3f80 # is |exp| == $3f80?
1442
beq.b fso_sgl_dnrm_zero # yes
1443
cmpi.w %d0,&0x407f # no; is |exp| == $407f?
1444
beq.b fso_infnan # yes
1445
rts # no
1446
1447
fso_sgl_dnrm_zero:
1448
andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449
beq.b fso_zero # it's a skewed zero
1450
fso_sgl_dnrm:
1451
# here, we count on norm not to alter a0...
1452
bsr.l norm # normalize mantissa
1453
neg.w %d0 # -shft amt
1454
addi.w &0x3f81,%d0 # adjust new exponent
1455
andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1456
or.w %d0,LOCAL_EX(%a0) # insert new exponent
1457
rts
1458
1459
fso_zero:
1460
andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
1461
rts
1462
1463
fso_infnan:
1464
andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
1465
ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
1466
rts
1467
1468
fso_dbl:
1469
mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
1470
andi.w &0x7fff,%d0 # strip sign
1471
cmpi.w %d0,&0x3c00 # is |exp| == $3c00?
1472
beq.b fso_dbl_dnrm_zero # yes
1473
cmpi.w %d0,&0x43ff # no; is |exp| == $43ff?
1474
beq.b fso_infnan # yes
1475
rts # no
1476
1477
fso_dbl_dnrm_zero:
1478
andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479
bne.b fso_dbl_dnrm # it's a skewed denorm
1480
tst.l LOCAL_LO(%a0) # is it a zero?
1481
beq.b fso_zero # yes
1482
fso_dbl_dnrm:
1483
# here, we count on norm not to alter a0...
1484
bsr.l norm # normalize mantissa
1485
neg.w %d0 # -shft amt
1486
addi.w &0x3c01,%d0 # adjust new exponent
1487
andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
1488
or.w %d0,LOCAL_EX(%a0) # insert new exponent
1489
rts
1490
1491
#################################################################
1492
1493
# fmove out took an unimplemented data type exception.
1494
# the src operand is in FP_SRC. Call _fout() to write out the result and
1495
# to determine which exceptions, if any, to take.
1496
fu_out:
1497
1498
# Separate packed move outs from the UNNORM and DENORM move outs.
1499
bfextu EXC_CMDREG(%a6){&3:&3},%d0
1500
cmpi.b %d0,&0x3
1501
beq.w fu_out_pack
1502
cmpi.b %d0,&0x7
1503
beq.w fu_out_pack
1504
1505
1506
# I'm not sure at this point what FPSR bits are valid for this instruction.
1507
# so, since the emulation routines re-create them anyways, zero exception field.
1508
# fmove out doesn't affect ccodes.
1509
and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
1510
1511
fmov.l &0x0,%fpcr # zero current control regs
1512
fmov.l &0x0,%fpsr
1513
1514
# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515
# call here. just figure out what it is...
1516
mov.w FP_SRC_EX(%a6),%d0 # get exponent
1517
andi.w &0x7fff,%d0 # strip sign
1518
beq.b fu_out_denorm # it's a DENORM
1519
1520
lea FP_SRC(%a6),%a0
1521
bsr.l unnorm_fix # yes; fix it
1522
1523
mov.b %d0,STAG(%a6)
1524
1525
bra.b fu_out_cont
1526
fu_out_denorm:
1527
mov.b &DENORM,STAG(%a6)
1528
fu_out_cont:
1529
1530
clr.l %d0
1531
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1532
1533
lea FP_SRC(%a6),%a0 # pass ptr to src operand
1534
1535
mov.l (%a6),EXC_A6(%a6) # in case a6 changes
1536
bsr.l fout # call fmove out routine
1537
1538
# Exceptions in order of precedence:
1539
# BSUN : none
1540
# SNAN : none
1541
# OPERR : fmove.{b,w,l} out of large UNNORM
1542
# OVFL : fmove.{s,d}
1543
# UNFL : fmove.{s,d,x}
1544
# DZ : none
1545
# INEX2 : all
1546
# INEX1 : none (packed doesn't travel through here)
1547
1548
# determine the highest priority exception(if any) set by the
1549
# emulation routine that has also been enabled by the user.
1550
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1551
bne.w fu_out_ena # some are enabled
1552
1553
fu_out_done:
1554
1555
mov.l EXC_A6(%a6),(%a6) # in case a6 changed
1556
1557
# on extended precision opclass three instructions using pre-decrement or
1558
# post-increment addressing mode, the address register is not updated. is the
1559
# address register was the stack pointer used from user mode, then let's update
1560
# it here. if it was used from supervisor mode, then we have to handle this
1561
# as a special case.
1562
btst &0x5,EXC_SR(%a6)
1563
bne.b fu_out_done_s
1564
1565
mov.l EXC_A7(%a6),%a0 # restore a7
1566
mov.l %a0,%usp
1567
1568
fu_out_done_cont:
1569
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1570
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1572
1573
unlk %a6
1574
1575
btst &0x7,(%sp) # is trace on?
1576
bne.b fu_out_trace # yes
1577
1578
bra.l _fpsp_done
1579
1580
# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581
# ("fmov.x fpm,-(a7)") if so,
1582
fu_out_done_s:
1583
cmpi.b SPCOND_FLG(%a6),&mda7_flg
1584
bne.b fu_out_done_cont
1585
1586
# the extended precision result is still in fp0. but, we need to save it
1587
# somewhere on the stack until we can copy it to its final resting place.
1588
# here, we're counting on the top of the stack to be the old place-holders
1589
# for fp0/fp1 which have already been restored. that way, we can write
1590
# over those destinations with the shifted stack frame.
1591
fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1592
1593
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1594
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1596
1597
mov.l (%a6),%a6 # restore frame pointer
1598
1599
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601
1602
# now, copy the result to the proper place on the stack
1603
mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604
mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605
mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606
1607
add.l &LOCAL_SIZE-0x8,%sp
1608
1609
btst &0x7,(%sp)
1610
bne.b fu_out_trace
1611
1612
bra.l _fpsp_done
1613
1614
fu_out_ena:
1615
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
1616
bfffo %d0{&24:&8},%d0 # find highest priority exception
1617
bne.b fu_out_exc # there is at least one set
1618
1619
# no exceptions were set.
1620
# if a disabled overflow occurred and inexact was enabled but the result
1621
# was exact, then a branch to _real_inex() is made.
1622
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623
beq.w fu_out_done # no
1624
1625
fu_out_ovflchk:
1626
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627
beq.w fu_out_done # no
1628
bra.w fu_inex # yes
1629
1630
#
1631
# The fp move out that took the "Unimplemented Data Type" exception was
1632
# being traced. Since the stack frames are similar, get the "current" PC
1633
# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634
#
1635
# UNSUPP FRAME TRACE FRAME
1636
# ***************** *****************
1637
# * EA * * Current *
1638
# * * * PC *
1639
# ***************** *****************
1640
# * 0x3 * 0x0dc * * 0x2 * 0x024 *
1641
# ***************** *****************
1642
# * Next * * Next *
1643
# * PC * * PC *
1644
# ***************** *****************
1645
# * SR * * SR *
1646
# ***************** *****************
1647
#
1648
fu_out_trace:
1649
mov.w &0x2024,0x6(%sp)
1650
fmov.l %fpiar,0x8(%sp)
1651
bra.l _real_trace
1652
1653
# an exception occurred and that exception was enabled.
1654
fu_out_exc:
1655
subi.l &24,%d0 # fix offset to be 0-8
1656
1657
# we don't mess with the existing fsave frame. just re-insert it and
1658
# jump to the "_real_{}()" handler...
1659
mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
1660
jmp (tbl_fu_out.b,%pc,%d0.w*1)
1661
1662
swbeg &0x8
1663
tbl_fu_out:
1664
short tbl_fu_out - tbl_fu_out # BSUN can't happen
1665
short tbl_fu_out - tbl_fu_out # SNAN can't happen
1666
short fu_operr - tbl_fu_out # OPERR
1667
short fu_ovfl - tbl_fu_out # OVFL
1668
short fu_unfl - tbl_fu_out # UNFL
1669
short tbl_fu_out - tbl_fu_out # DZ can't happen
1670
short fu_inex - tbl_fu_out # INEX2
1671
short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
1672
1673
# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674
# frestore it.
1675
fu_snan:
1676
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1677
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1679
1680
mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
1681
mov.w &0xe006,2+FP_SRC(%a6)
1682
1683
frestore FP_SRC(%a6)
1684
1685
unlk %a6
1686
1687
1688
bra.l _real_snan
1689
1690
fu_operr:
1691
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1692
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1694
1695
mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
1696
mov.w &0xe004,2+FP_SRC(%a6)
1697
1698
frestore FP_SRC(%a6)
1699
1700
unlk %a6
1701
1702
1703
bra.l _real_operr
1704
1705
fu_ovfl:
1706
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1707
1708
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1709
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1711
1712
mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
1713
mov.w &0xe005,2+FP_SRC(%a6)
1714
1715
frestore FP_SRC(%a6) # restore EXOP
1716
1717
unlk %a6
1718
1719
bra.l _real_ovfl
1720
1721
# underflow can happen for extended precision. extended precision opclass
1722
# three instruction exceptions don't update the stack pointer. so, if the
1723
# exception occurred from user mode, then simply update a7 and exit normally.
1724
# if the exception occurred from supervisor mode, check if
1725
fu_unfl:
1726
mov.l EXC_A6(%a6),(%a6) # restore a6
1727
1728
btst &0x5,EXC_SR(%a6)
1729
bne.w fu_unfl_s
1730
1731
mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
1732
mov.l %a0,%usp # to or not...
1733
1734
fu_unfl_cont:
1735
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1736
1737
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1738
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1740
1741
mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1742
mov.w &0xe003,2+FP_SRC(%a6)
1743
1744
frestore FP_SRC(%a6) # restore EXOP
1745
1746
unlk %a6
1747
1748
bra.l _real_unfl
1749
1750
fu_unfl_s:
1751
cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752
bne.b fu_unfl_cont
1753
1754
# the extended precision result is still in fp0. but, we need to save it
1755
# somewhere on the stack until we can copy it to its final resting place
1756
# (where the exc frame is currently). make sure it's not at the top of the
1757
# frame or it will get overwritten when the exc stack frame is shifted "down".
1758
fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
1759
fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
1760
1761
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1762
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1764
1765
mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
1766
mov.w &0xe003,2+FP_DST(%a6)
1767
1768
frestore FP_DST(%a6) # restore EXOP
1769
1770
mov.l (%a6),%a6 # restore frame pointer
1771
1772
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775
1776
# now, copy the result to the proper place on the stack
1777
mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778
mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779
mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780
1781
add.l &LOCAL_SIZE-0x8,%sp
1782
1783
bra.l _real_unfl
1784
1785
# fmove in and out enter here.
1786
fu_inex:
1787
fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
1788
1789
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1790
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1792
1793
mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
1794
mov.w &0xe001,2+FP_SRC(%a6)
1795
1796
frestore FP_SRC(%a6) # restore EXOP
1797
1798
unlk %a6
1799
1800
1801
bra.l _real_inex
1802
1803
#########################################################################
1804
#########################################################################
1805
fu_in_pack:
1806
1807
1808
# I'm not sure at this point what FPSR bits are valid for this instruction.
1809
# so, since the emulation routines re-create them anyways, zero exception field
1810
andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811
1812
fmov.l &0x0,%fpcr # zero current control regs
1813
fmov.l &0x0,%fpsr
1814
1815
bsr.l get_packed # fetch packed src operand
1816
1817
lea FP_SRC(%a6),%a0 # pass ptr to src
1818
bsr.l set_tag_x # set src optype tag
1819
1820
mov.b %d0,STAG(%a6) # save src optype tag
1821
1822
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823
1824
# bit five of the fp extension word separates the monadic and dyadic operations
1825
# at this point
1826
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
1827
beq.b fu_extract_p # monadic
1828
cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1829
beq.b fu_extract_p # yes, so it's monadic, too
1830
1831
bsr.l load_fpn2 # load dst into FP_DST
1832
1833
lea FP_DST(%a6),%a0 # pass: ptr to dst op
1834
bsr.l set_tag_x # tag the operand type
1835
cmpi.b %d0,&UNNORM # is operand an UNNORM?
1836
bne.b fu_op2_done_p # no
1837
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
1838
fu_op2_done_p:
1839
mov.b %d0,DTAG(%a6) # save dst optype tag
1840
1841
fu_extract_p:
1842
clr.l %d0
1843
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
1844
1845
bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846
1847
lea FP_SRC(%a6),%a0
1848
lea FP_DST(%a6),%a1
1849
1850
mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851
jsr (tbl_unsupp.l,%pc,%d1.l*1)
1852
1853
#
1854
# Exceptions in order of precedence:
1855
# BSUN : none
1856
# SNAN : all dyadic ops
1857
# OPERR : fsqrt(-NORM)
1858
# OVFL : all except ftst,fcmp
1859
# UNFL : all except ftst,fcmp
1860
# DZ : fdiv
1861
# INEX2 : all except ftst,fcmp
1862
# INEX1 : all
1863
#
1864
1865
# we determine the highest priority exception(if any) set by the
1866
# emulation routine that has also been enabled by the user.
1867
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
1868
bne.w fu_in_ena_p # some are enabled
1869
1870
fu_in_cont_p:
1871
# fcmp and ftst do not store any result.
1872
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
1873
andi.b &0x38,%d0 # extract bits 3-5
1874
cmpi.b %d0,&0x38 # is instr fcmp or ftst?
1875
beq.b fu_in_exit_p # yes
1876
1877
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878
bsr.l store_fpreg # store the result
1879
1880
fu_in_exit_p:
1881
1882
btst &0x5,EXC_SR(%a6) # user or supervisor?
1883
bne.w fu_in_exit_s_p # supervisor
1884
1885
mov.l EXC_A7(%a6),%a0 # update user a7
1886
mov.l %a0,%usp
1887
1888
fu_in_exit_cont_p:
1889
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1890
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1892
1893
unlk %a6 # unravel stack frame
1894
1895
btst &0x7,(%sp) # is trace on?
1896
bne.w fu_trace_p # yes
1897
1898
bra.l _fpsp_done # exit to os
1899
1900
# the exception occurred in supervisor mode. check to see if the
1901
# addressing mode was (a7)+. if so, we'll need to shift the
1902
# stack frame "up".
1903
fu_in_exit_s_p:
1904
btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905
beq.b fu_in_exit_cont_p # no
1906
1907
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1908
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1910
1911
unlk %a6 # unravel stack frame
1912
1913
# shift the stack frame "up". we don't really care about the <ea> field.
1914
mov.l 0x4(%sp),0x10(%sp)
1915
mov.l 0x0(%sp),0xc(%sp)
1916
add.l &0xc,%sp
1917
1918
btst &0x7,(%sp) # is trace on?
1919
bne.w fu_trace_p # yes
1920
1921
bra.l _fpsp_done # exit to os
1922
1923
fu_in_ena_p:
1924
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
1925
bfffo %d0{&24:&8},%d0 # find highest priority exception
1926
bne.b fu_in_exc_p # at least one was set
1927
1928
#
1929
# No exceptions occurred that were also enabled. Now:
1930
#
1931
# if (OVFL && ovfl_disabled && inexact_enabled) {
1932
# branch to _real_inex() (even if the result was exact!);
1933
# } else {
1934
# save the result in the proper fp reg (unless the op is fcmp or ftst);
1935
# return;
1936
# }
1937
#
1938
btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939
beq.w fu_in_cont_p # no
1940
1941
fu_in_ovflchk_p:
1942
btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943
beq.w fu_in_cont_p # no
1944
bra.w fu_in_exc_ovfl_p # do _real_inex() now
1945
1946
#
1947
# An exception occurred and that exception was enabled:
1948
#
1949
# shift enabled exception field into lo byte of d0;
1950
# if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951
# ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952
# /*
1953
# * this is the case where we must call _real_inex() now or else
1954
# * there will be no other way to pass it the exceptional operand
1955
# */
1956
# call _real_inex();
1957
# } else {
1958
# restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959
# }
1960
#
1961
fu_in_exc_p:
1962
subi.l &24,%d0 # fix offset to be 0-8
1963
cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
1964
blt.b fu_in_exc_exit_p # no
1965
1966
# the enabled exception was inexact
1967
btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968
bne.w fu_in_exc_unfl_p # yes
1969
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970
bne.w fu_in_exc_ovfl_p # yes
1971
1972
# here, we insert the correct fsave status value into the fsave frame for the
1973
# corresponding exception. the operand in the fsave frame should be the original
1974
# src operand.
1975
# as a reminder for future predicted pain and agony, we are passing in fsave the
1976
# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977
# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978
fu_in_exc_exit_p:
1979
btst &0x5,EXC_SR(%a6) # user or supervisor?
1980
bne.w fu_in_exc_exit_s_p # supervisor
1981
1982
mov.l EXC_A7(%a6),%a0 # update user a7
1983
mov.l %a0,%usp
1984
1985
fu_in_exc_exit_cont_p:
1986
mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987
1988
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
1989
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
1991
1992
frestore FP_SRC(%a6) # restore src op
1993
1994
unlk %a6
1995
1996
btst &0x7,(%sp) # is trace enabled?
1997
bne.w fu_trace_p # yes
1998
1999
bra.l _fpsp_done
2000
2001
tbl_except_p:
2002
short 0xe000,0xe006,0xe004,0xe005
2003
short 0xe003,0xe002,0xe001,0xe001
2004
2005
fu_in_exc_ovfl_p:
2006
mov.w &0x3,%d0
2007
bra.w fu_in_exc_exit_p
2008
2009
fu_in_exc_unfl_p:
2010
mov.w &0x4,%d0
2011
bra.w fu_in_exc_exit_p
2012
2013
fu_in_exc_exit_s_p:
2014
btst &mia7_bit,SPCOND_FLG(%a6)
2015
beq.b fu_in_exc_exit_cont_p
2016
2017
mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018
2019
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2020
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2022
2023
frestore FP_SRC(%a6) # restore src op
2024
2025
unlk %a6 # unravel stack frame
2026
2027
# shift stack frame "up". who cares about <ea> field.
2028
mov.l 0x4(%sp),0x10(%sp)
2029
mov.l 0x0(%sp),0xc(%sp)
2030
add.l &0xc,%sp
2031
2032
btst &0x7,(%sp) # is trace on?
2033
bne.b fu_trace_p # yes
2034
2035
bra.l _fpsp_done # exit to os
2036
2037
#
2038
# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039
# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040
# trace stack frame then jump to _real_trace().
2041
#
2042
# UNSUPP FRAME TRACE FRAME
2043
# ***************** *****************
2044
# * EA * * Current *
2045
# * * * PC *
2046
# ***************** *****************
2047
# * 0x2 * 0x0dc * * 0x2 * 0x024 *
2048
# ***************** *****************
2049
# * Next * * Next *
2050
# * PC * * PC *
2051
# ***************** *****************
2052
# * SR * * SR *
2053
# ***************** *****************
2054
fu_trace_p:
2055
mov.w &0x2024,0x6(%sp)
2056
fmov.l %fpiar,0x8(%sp)
2057
2058
bra.l _real_trace
2059
2060
#########################################################
2061
#########################################################
2062
fu_out_pack:
2063
2064
2065
# I'm not sure at this point what FPSR bits are valid for this instruction.
2066
# so, since the emulation routines re-create them anyways, zero exception field.
2067
# fmove out doesn't affect ccodes.
2068
and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
2069
2070
fmov.l &0x0,%fpcr # zero current control regs
2071
fmov.l &0x0,%fpsr
2072
2073
bfextu EXC_CMDREG(%a6){&6:&3},%d0
2074
bsr.l load_fpn1
2075
2076
# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077
# able to detect all operand types.
2078
lea FP_SRC(%a6),%a0
2079
bsr.l set_tag_x # tag the operand type
2080
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2081
bne.b fu_op2_p # no
2082
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
2083
2084
fu_op2_p:
2085
mov.b %d0,STAG(%a6) # save src optype tag
2086
2087
clr.l %d0
2088
mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
2089
2090
lea FP_SRC(%a6),%a0 # pass ptr to src operand
2091
2092
mov.l (%a6),EXC_A6(%a6) # in case a6 changes
2093
bsr.l fout # call fmove out routine
2094
2095
# Exceptions in order of precedence:
2096
# BSUN : no
2097
# SNAN : yes
2098
# OPERR : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099
# OVFL : no
2100
# UNFL : no
2101
# DZ : no
2102
# INEX2 : yes
2103
# INEX1 : no
2104
2105
# determine the highest priority exception(if any) set by the
2106
# emulation routine that has also been enabled by the user.
2107
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2108
bne.w fu_out_ena_p # some are enabled
2109
2110
fu_out_exit_p:
2111
mov.l EXC_A6(%a6),(%a6) # restore a6
2112
2113
btst &0x5,EXC_SR(%a6) # user or supervisor?
2114
bne.b fu_out_exit_s_p # supervisor
2115
2116
mov.l EXC_A7(%a6),%a0 # update user a7
2117
mov.l %a0,%usp
2118
2119
fu_out_exit_cont_p:
2120
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2121
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2123
2124
unlk %a6 # unravel stack frame
2125
2126
btst &0x7,(%sp) # is trace on?
2127
bne.w fu_trace_p # yes
2128
2129
bra.l _fpsp_done # exit to os
2130
2131
# the exception occurred in supervisor mode. check to see if the
2132
# addressing mode was -(a7). if so, we'll need to shift the
2133
# stack frame "down".
2134
fu_out_exit_s_p:
2135
btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136
beq.b fu_out_exit_cont_p # no
2137
2138
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2139
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2141
2142
mov.l (%a6),%a6 # restore frame pointer
2143
2144
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146
2147
# now, copy the result to the proper place on the stack
2148
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151
2152
add.l &LOCAL_SIZE-0x8,%sp
2153
2154
btst &0x7,(%sp)
2155
bne.w fu_trace_p
2156
2157
bra.l _fpsp_done
2158
2159
fu_out_ena_p:
2160
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
2161
bfffo %d0{&24:&8},%d0 # find highest priority exception
2162
beq.w fu_out_exit_p
2163
2164
mov.l EXC_A6(%a6),(%a6) # restore a6
2165
2166
# an exception occurred and that exception was enabled.
2167
# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168
fu_out_exc_p:
2169
cmpi.b %d0,&0x1a
2170
bgt.w fu_inex_p2
2171
beq.w fu_operr_p
2172
2173
fu_snan_p:
2174
btst &0x5,EXC_SR(%a6)
2175
bne.b fu_snan_s_p
2176
2177
mov.l EXC_A7(%a6),%a0
2178
mov.l %a0,%usp
2179
bra.w fu_snan
2180
2181
fu_snan_s_p:
2182
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2183
bne.w fu_snan
2184
2185
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186
# the strategy is to move the exception frame "down" 12 bytes. then, we
2187
# can store the default result where the exception frame was.
2188
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2189
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2191
2192
mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
2193
mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
2194
2195
frestore FP_SRC(%a6) # restore src operand
2196
2197
mov.l (%a6),%a6 # restore frame pointer
2198
2199
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202
2203
# now, we copy the default result to its proper location
2204
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207
2208
add.l &LOCAL_SIZE-0x8,%sp
2209
2210
2211
bra.l _real_snan
2212
2213
fu_operr_p:
2214
btst &0x5,EXC_SR(%a6)
2215
bne.w fu_operr_p_s
2216
2217
mov.l EXC_A7(%a6),%a0
2218
mov.l %a0,%usp
2219
bra.w fu_operr
2220
2221
fu_operr_p_s:
2222
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2223
bne.w fu_operr
2224
2225
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226
# the strategy is to move the exception frame "down" 12 bytes. then, we
2227
# can store the default result where the exception frame was.
2228
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2229
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2231
2232
mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
2233
mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
2234
2235
frestore FP_SRC(%a6) # restore src operand
2236
2237
mov.l (%a6),%a6 # restore frame pointer
2238
2239
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242
2243
# now, we copy the default result to its proper location
2244
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247
2248
add.l &LOCAL_SIZE-0x8,%sp
2249
2250
2251
bra.l _real_operr
2252
2253
fu_inex_p2:
2254
btst &0x5,EXC_SR(%a6)
2255
bne.w fu_inex_s_p2
2256
2257
mov.l EXC_A7(%a6),%a0
2258
mov.l %a0,%usp
2259
bra.w fu_inex
2260
2261
fu_inex_s_p2:
2262
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2263
bne.w fu_inex
2264
2265
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266
# the strategy is to move the exception frame "down" 12 bytes. then, we
2267
# can store the default result where the exception frame was.
2268
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
2269
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2271
2272
mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
2273
mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
2274
2275
frestore FP_SRC(%a6) # restore src operand
2276
2277
mov.l (%a6),%a6 # restore frame pointer
2278
2279
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280
mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282
2283
# now, we copy the default result to its proper location
2284
mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285
mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286
mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287
2288
add.l &LOCAL_SIZE-0x8,%sp
2289
2290
2291
bra.l _real_inex
2292
2293
#########################################################################
2294
2295
#
2296
# if we're stuffing a source operand back into an fsave frame then we
2297
# have to make sure that for single or double source operands that the
2298
# format stuffed is as weird as the hardware usually makes it.
2299
#
2300
global funimp_skew
2301
funimp_skew:
2302
bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303
cmpi.b %d0,&0x1 # was src sgl?
2304
beq.b funimp_skew_sgl # yes
2305
cmpi.b %d0,&0x5 # was src dbl?
2306
beq.b funimp_skew_dbl # yes
2307
rts
2308
2309
funimp_skew_sgl:
2310
mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2311
andi.w &0x7fff,%d0 # strip sign
2312
beq.b funimp_skew_sgl_not
2313
cmpi.w %d0,&0x3f80
2314
bgt.b funimp_skew_sgl_not
2315
neg.w %d0 # make exponent negative
2316
addi.w &0x3f81,%d0 # find amt to shift
2317
mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
2318
lsr.l %d0,%d1 # shift it
2319
bset &31,%d1 # set j-bit
2320
mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
2321
andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
2322
ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
2323
funimp_skew_sgl_not:
2324
rts
2325
2326
funimp_skew_dbl:
2327
mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
2328
andi.w &0x7fff,%d0 # strip sign
2329
beq.b funimp_skew_dbl_not
2330
cmpi.w %d0,&0x3c00
2331
bgt.b funimp_skew_dbl_not
2332
2333
tst.b FP_SRC_EX(%a6) # make "internal format"
2334
smi.b 0x2+FP_SRC(%a6)
2335
mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
2336
clr.l %d0 # clear g,r,s
2337
lea FP_SRC(%a6),%a0 # pass ptr to src op
2338
mov.w &0x3c01,%d1 # pass denorm threshold
2339
bsr.l dnrm_lp # denorm it
2340
mov.w &0x3c00,%d0 # new exponent
2341
tst.b 0x2+FP_SRC(%a6) # is sign set?
2342
beq.b fss_dbl_denorm_done # no
2343
bset &15,%d0 # set sign
2344
fss_dbl_denorm_done:
2345
bset &0x7,FP_SRC_HI(%a6) # set j-bit
2346
mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
2347
funimp_skew_dbl_not:
2348
rts
2349
2350
#########################################################################
2351
global _mem_write2
2352
_mem_write2:
2353
btst &0x5,EXC_SR(%a6)
2354
beq.l _dmem_write
2355
mov.l 0x0(%a0),FP_DST_EX(%a6)
2356
mov.l 0x4(%a0),FP_DST_HI(%a6)
2357
mov.l 0x8(%a0),FP_DST_LO(%a6)
2358
clr.l %d1
2359
rts
2360
2361
#########################################################################
2362
# XDEF **************************************************************** #
2363
# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
2364
# effective address" exception. #
2365
# #
2366
# This handler should be the first code executed upon taking the #
2367
# FP Unimplemented Effective Address exception in an operating #
2368
# system. #
2369
# #
2370
# XREF **************************************************************** #
2371
# _imem_read_long() - read instruction longword #
2372
# fix_skewed_ops() - adjust src operand in fsave frame #
2373
# set_tag_x() - determine optype of src/dst operands #
2374
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
2375
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
2376
# load_fpn2() - load dst operand from FP regfile #
2377
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2378
# decbin() - convert packed data to FP binary data #
2379
# _real_fpu_disabled() - "callout" for "FPU disabled" exception #
2380
# _real_access() - "callout" for access error exception #
2381
# _mem_read() - read extended immediate operand from memory #
2382
# _fpsp_done() - "callout" for exit; work all done #
2383
# _real_trace() - "callout" for Trace enabled exception #
2384
# fmovm_dynamic() - emulate dynamic fmovm instruction #
2385
# fmovm_ctrl() - emulate fmovm control instruction #
2386
# #
2387
# INPUT *************************************************************** #
2388
# - The system stack contains the "Unimplemented <ea>" stk frame #
2389
# #
2390
# OUTPUT ************************************************************** #
2391
# If access error: #
2392
# - The system stack is changed to an access error stack frame #
2393
# If FPU disabled: #
2394
# - The system stack is changed to an FPU disabled stack frame #
2395
# If Trace exception enabled: #
2396
# - The system stack is changed to a Trace exception stack frame #
2397
# Else: (normal case) #
2398
# - None (correct result has been stored as appropriate) #
2399
# #
2400
# ALGORITHM *********************************************************** #
2401
# This exception handles 3 types of operations: #
2402
# (1) FP Instructions using extended precision or packed immediate #
2403
# addressing mode. #
2404
# (2) The "fmovm.x" instruction w/ dynamic register specification. #
2405
# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
2406
# #
2407
# For immediate data operations, the data is read in w/ a #
2408
# _mem_read() "callout", converted to FP binary (if packed), and used #
2409
# as the source operand to the instruction specified by the instruction #
2410
# word. If no FP exception should be reported ads a result of the #
2411
# emulation, then the result is stored to the destination register and #
2412
# the handler exits through _fpsp_done(). If an enabled exc has been #
2413
# signalled as a result of emulation, then an fsave state frame #
2414
# corresponding to the FP exception type must be entered into the 060 #
2415
# FPU before exiting. In either the enabled or disabled cases, we #
2416
# must also check if a Trace exception is pending, in which case, we #
2417
# must create a Trace exception stack frame from the current exception #
2418
# stack frame. If no Trace is pending, we simply exit through #
2419
# _fpsp_done(). #
2420
# For "fmovm.x", call the routine fmovm_dynamic() which will #
2421
# decode and emulate the instruction. No FP exceptions can be pending #
2422
# as a result of this operation emulation. A Trace exception can be #
2423
# pending, though, which means the current stack frame must be changed #
2424
# to a Trace stack frame and an exit made through _real_trace(). #
2425
# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
2426
# was executed from supervisor mode, this handler must store the FP #
2427
# register file values to the system stack by itself since #
2428
# fmovm_dynamic() can't handle this. A normal exit is made through #
2429
# fpsp_done(). #
2430
# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2431
# Again, a Trace exception may be pending and an exit made through #
2432
# _real_trace(). Else, a normal exit is made through _fpsp_done(). #
2433
# #
2434
# Before any of the above is attempted, it must be checked to #
2435
# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2436
# before the "FPU disabled" exception, but the "FPU disabled" exception #
2437
# has higher priority, we check the disabled bit in the PCR. If set, #
2438
# then we must create an 8 word "FPU disabled" exception stack frame #
2439
# from the current 4 word exception stack frame. This includes #
2440
# reproducing the effective address of the instruction to put on the #
2441
# new stack frame. #
2442
# #
2443
# In the process of all emulation work, if a _mem_read() #
2444
# "callout" returns a failing result indicating an access error, then #
2445
# we must create an access error stack frame from the current stack #
2446
# frame. This information includes a faulting address and a fault- #
2447
# status-longword. These are created within this handler. #
2448
# #
2449
#########################################################################
2450
2451
global _fpsp_effadd
2452
_fpsp_effadd:
2453
2454
# This exception type takes priority over the "Line F Emulator"
2455
# exception. Therefore, the FPU could be disabled when entering here.
2456
# So, we must check to see if it's disabled and handle that case separately.
2457
mov.l %d0,-(%sp) # save d0
2458
movc %pcr,%d0 # load proc cr
2459
btst &0x1,%d0 # is FPU disabled?
2460
bne.w iea_disabled # yes
2461
mov.l (%sp)+,%d0 # restore d0
2462
2463
link %a6,&-LOCAL_SIZE # init stack frame
2464
2465
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2466
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
2468
2469
# PC of instruction that took the exception is the PC in the frame
2470
mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471
2472
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2473
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2474
bsr.l _imem_read_long # fetch the instruction words
2475
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2476
2477
#########################################################################
2478
2479
tst.w %d0 # is operation fmovem?
2480
bmi.w iea_fmovm # yes
2481
2482
#
2483
# here, we will have:
2484
# fabs fdabs fsabs facos fmod
2485
# fadd fdadd fsadd fasin frem
2486
# fcmp fatan fscale
2487
# fdiv fddiv fsdiv fatanh fsin
2488
# fint fcos fsincos
2489
# fintrz fcosh fsinh
2490
# fmove fdmove fsmove fetox ftan
2491
# fmul fdmul fsmul fetoxm1 ftanh
2492
# fneg fdneg fsneg fgetexp ftentox
2493
# fsgldiv fgetman ftwotox
2494
# fsglmul flog10
2495
# fsqrt flog2
2496
# fsub fdsub fssub flogn
2497
# ftst flognp1
2498
# which can all use f<op>.{x,p}
2499
# so, now it's immediate data extended precision AND PACKED FORMAT!
2500
#
2501
iea_op:
2502
andi.l &0x00ff00ff,USER_FPSR(%a6)
2503
2504
btst &0xa,%d0 # is src fmt x or p?
2505
bne.b iea_op_pack # packed
2506
2507
2508
mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2509
lea FP_SRC(%a6),%a1 # pass: ptr to super addr
2510
mov.l &0xc,%d0 # pass: 12 bytes
2511
bsr.l _imem_read # read extended immediate
2512
2513
tst.l %d1 # did ifetch fail?
2514
bne.w iea_iacc # yes
2515
2516
bra.b iea_op_setsrc
2517
2518
iea_op_pack:
2519
2520
mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
2521
lea FP_SRC(%a6),%a1 # pass: ptr to super dst
2522
mov.l &0xc,%d0 # pass: 12 bytes
2523
bsr.l _imem_read # read packed operand
2524
2525
tst.l %d1 # did ifetch fail?
2526
bne.w iea_iacc # yes
2527
2528
# The packed operand is an INF or a NAN if the exponent field is all ones.
2529
bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
2530
cmpi.w %d0,&0x7fff # INF or NAN?
2531
beq.b iea_op_setsrc # operand is an INF or NAN
2532
2533
# The packed operand is a zero if the mantissa is all zero, else it's
2534
# a normal packed op.
2535
mov.b 3+FP_SRC(%a6),%d0 # get byte 4
2536
andi.b &0x0f,%d0 # clear all but last nybble
2537
bne.b iea_op_gp_not_spec # not a zero
2538
tst.l FP_SRC_HI(%a6) # is lw 2 zero?
2539
bne.b iea_op_gp_not_spec # not a zero
2540
tst.l FP_SRC_LO(%a6) # is lw 3 zero?
2541
beq.b iea_op_setsrc # operand is a ZERO
2542
iea_op_gp_not_spec:
2543
lea FP_SRC(%a6),%a0 # pass: ptr to packed op
2544
bsr.l decbin # convert to extended
2545
fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
2546
2547
iea_op_setsrc:
2548
addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
2549
2550
# FP_SRC now holds the src operand.
2551
lea FP_SRC(%a6),%a0 # pass: ptr to src op
2552
bsr.l set_tag_x # tag the operand type
2553
mov.b %d0,STAG(%a6) # could be ANYTHING!!!
2554
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2555
bne.b iea_op_getdst # no
2556
bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2557
mov.b %d0,STAG(%a6) # set new optype tag
2558
iea_op_getdst:
2559
clr.b STORE_FLG(%a6) # clear "store result" boolean
2560
2561
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
2562
beq.b iea_op_extract # monadic
2563
btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
2564
bne.b iea_op_spec # yes
2565
2566
iea_op_loaddst:
2567
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568
bsr.l load_fpn2 # load dst operand
2569
2570
lea FP_DST(%a6),%a0 # pass: ptr to dst op
2571
bsr.l set_tag_x # tag the operand type
2572
mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
2573
cmpi.b %d0,&UNNORM # is operand an UNNORM?
2574
bne.b iea_op_extract # no
2575
bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
2576
mov.b %d0,DTAG(%a6) # set new optype tag
2577
bra.b iea_op_extract
2578
2579
# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580
iea_op_spec:
2581
btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
2582
beq.b iea_op_extract # yes
2583
# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584
# store a result. then, only fcmp will branch back and pick up a dst operand.
2585
st STORE_FLG(%a6) # don't store a final result
2586
btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
2587
beq.b iea_op_loaddst # yes
2588
2589
iea_op_extract:
2590
clr.l %d0
2591
mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
2592
2593
mov.b 1+EXC_CMDREG(%a6),%d1
2594
andi.w &0x007f,%d1 # extract extension
2595
2596
fmov.l &0x0,%fpcr
2597
fmov.l &0x0,%fpsr
2598
2599
lea FP_SRC(%a6),%a0
2600
lea FP_DST(%a6),%a1
2601
2602
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603
jsr (tbl_unsupp.l,%pc,%d1.l*1)
2604
2605
#
2606
# Exceptions in order of precedence:
2607
# BSUN : none
2608
# SNAN : all operations
2609
# OPERR : all reg-reg or mem-reg operations that can normally operr
2610
# OVFL : same as OPERR
2611
# UNFL : same as OPERR
2612
# DZ : same as OPERR
2613
# INEX2 : same as OPERR
2614
# INEX1 : all packed immediate operations
2615
#
2616
2617
# we determine the highest priority exception(if any) set by the
2618
# emulation routine that has also been enabled by the user.
2619
mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
2620
bne.b iea_op_ena # some are enabled
2621
2622
# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623
# these don't save results.
2624
iea_op_save:
2625
tst.b STORE_FLG(%a6) # does this op store a result?
2626
bne.b iea_op_exit1 # exit with no frestore
2627
2628
iea_op_store:
2629
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630
bsr.l store_fpreg # store the result
2631
2632
iea_op_exit1:
2633
mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635
2636
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2637
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2639
2640
unlk %a6 # unravel the frame
2641
2642
btst &0x7,(%sp) # is trace on?
2643
bne.w iea_op_trace # yes
2644
2645
bra.l _fpsp_done # exit to os
2646
2647
iea_op_ena:
2648
and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
2649
bfffo %d0{&24:&8},%d0 # find highest priority exception
2650
bne.b iea_op_exc # at least one was set
2651
2652
# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653
# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655
beq.b iea_op_save
2656
2657
iea_op_ovfl:
2658
btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659
beq.b iea_op_store # no
2660
bra.b iea_op_exc_ovfl # yes
2661
2662
# an enabled exception occurred. we have to insert the exception type back into
2663
# the machine.
2664
iea_op_exc:
2665
subi.l &24,%d0 # fix offset to be 0-8
2666
cmpi.b %d0,&0x6 # is exception INEX?
2667
bne.b iea_op_exc_force # no
2668
2669
# the enabled exception was inexact. so, if it occurs with an overflow
2670
# or underflow that was disabled, then we have to force an overflow or
2671
# underflow frame.
2672
btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673
bne.b iea_op_exc_ovfl # yes
2674
btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675
bne.b iea_op_exc_unfl # yes
2676
2677
iea_op_exc_force:
2678
mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679
bra.b iea_op_exit2 # exit with frestore
2680
2681
tbl_iea_except:
2682
short 0xe002, 0xe006, 0xe004, 0xe005
2683
short 0xe003, 0xe002, 0xe001, 0xe001
2684
2685
iea_op_exc_ovfl:
2686
mov.w &0xe005,2+FP_SRC(%a6)
2687
bra.b iea_op_exit2
2688
2689
iea_op_exc_unfl:
2690
mov.w &0xe003,2+FP_SRC(%a6)
2691
2692
iea_op_exit2:
2693
mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695
2696
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2697
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2699
2700
frestore FP_SRC(%a6) # restore exceptional state
2701
2702
unlk %a6 # unravel the frame
2703
2704
btst &0x7,(%sp) # is trace on?
2705
bne.b iea_op_trace # yes
2706
2707
bra.l _fpsp_done # exit to os
2708
2709
#
2710
# The opclass two instruction that took an "Unimplemented Effective Address"
2711
# exception was being traced. Make the "current" PC the FPIAR and put it in
2712
# the trace stack frame then jump to _real_trace().
2713
#
2714
# UNIMP EA FRAME TRACE FRAME
2715
# ***************** *****************
2716
# * 0x0 * 0x0f0 * * Current *
2717
# ***************** * PC *
2718
# * Current * *****************
2719
# * PC * * 0x2 * 0x024 *
2720
# ***************** *****************
2721
# * SR * * Next *
2722
# ***************** * PC *
2723
# *****************
2724
# * SR *
2725
# *****************
2726
iea_op_trace:
2727
mov.l (%sp),-(%sp) # shift stack frame "down"
2728
mov.w 0x8(%sp),0x4(%sp)
2729
mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
2730
fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
2731
2732
bra.l _real_trace
2733
2734
#########################################################################
2735
iea_fmovm:
2736
btst &14,%d0 # ctrl or data reg
2737
beq.w iea_fmovm_ctrl
2738
2739
iea_fmovm_data:
2740
2741
btst &0x5,EXC_SR(%a6) # user or supervisor mode
2742
bne.b iea_fmovm_data_s
2743
2744
iea_fmovm_data_u:
2745
mov.l %usp,%a0
2746
mov.l %a0,EXC_A7(%a6) # store current a7
2747
bsr.l fmovm_dynamic # do dynamic fmovm
2748
mov.l EXC_A7(%a6),%a0 # load possibly new a7
2749
mov.l %a0,%usp # update usp
2750
bra.w iea_fmovm_exit
2751
2752
iea_fmovm_data_s:
2753
clr.b SPCOND_FLG(%a6)
2754
lea 0x2+EXC_VOFF(%a6),%a0
2755
mov.l %a0,EXC_A7(%a6)
2756
bsr.l fmovm_dynamic # do dynamic fmovm
2757
2758
cmpi.b SPCOND_FLG(%a6),&mda7_flg
2759
beq.w iea_fmovm_data_predec
2760
cmpi.b SPCOND_FLG(%a6),&mia7_flg
2761
bne.w iea_fmovm_exit
2762
2763
# right now, d0 = the size.
2764
# the data has been fetched from the supervisor stack, but we have not
2765
# incremented the stack pointer by the appropriate number of bytes.
2766
# do it here.
2767
iea_fmovm_data_postinc:
2768
btst &0x7,EXC_SR(%a6)
2769
bne.b iea_fmovm_data_pi_trace
2770
2771
mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772
mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773
mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2774
2775
lea (EXC_SR,%a6,%d0),%a0
2776
mov.l %a0,EXC_SR(%a6)
2777
2778
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2779
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2781
2782
unlk %a6
2783
mov.l (%sp)+,%sp
2784
bra.l _fpsp_done
2785
2786
iea_fmovm_data_pi_trace:
2787
mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788
mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789
mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790
mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791
2792
lea (EXC_SR-0x4,%a6,%d0),%a0
2793
mov.l %a0,EXC_SR(%a6)
2794
2795
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2796
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2798
2799
unlk %a6
2800
mov.l (%sp)+,%sp
2801
bra.l _real_trace
2802
2803
# right now, d1 = size and d0 = the strg.
2804
iea_fmovm_data_predec:
2805
mov.b %d1,EXC_VOFF(%a6) # store strg
2806
mov.b %d0,0x1+EXC_VOFF(%a6) # store size
2807
2808
fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
2809
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2811
2812
mov.l (%a6),-(%sp) # make a copy of a6
2813
mov.l %d0,-(%sp) # save d0
2814
mov.l %d1,-(%sp) # save d1
2815
mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2816
2817
clr.l %d0
2818
mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
2819
neg.l %d0 # get negative of size
2820
2821
btst &0x7,EXC_SR(%a6) # is trace enabled?
2822
beq.b iea_fmovm_data_p2
2823
2824
mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825
mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826
mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
2827
mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828
2829
pea (%a6,%d0) # create final sp
2830
bra.b iea_fmovm_data_p3
2831
2832
iea_fmovm_data_p2:
2833
mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834
mov.l (%sp)+,(EXC_PC,%a6,%d0)
2835
mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
2836
2837
pea (0x4,%a6,%d0) # create final sp
2838
2839
iea_fmovm_data_p3:
2840
clr.l %d1
2841
mov.b EXC_VOFF(%a6),%d1 # fetch strg
2842
2843
tst.b %d1
2844
bpl.b fm_1
2845
fmovm.x &0x80,(0x4+0x8,%a6,%d0)
2846
addi.l &0xc,%d0
2847
fm_1:
2848
lsl.b &0x1,%d1
2849
bpl.b fm_2
2850
fmovm.x &0x40,(0x4+0x8,%a6,%d0)
2851
addi.l &0xc,%d0
2852
fm_2:
2853
lsl.b &0x1,%d1
2854
bpl.b fm_3
2855
fmovm.x &0x20,(0x4+0x8,%a6,%d0)
2856
addi.l &0xc,%d0
2857
fm_3:
2858
lsl.b &0x1,%d1
2859
bpl.b fm_4
2860
fmovm.x &0x10,(0x4+0x8,%a6,%d0)
2861
addi.l &0xc,%d0
2862
fm_4:
2863
lsl.b &0x1,%d1
2864
bpl.b fm_5
2865
fmovm.x &0x08,(0x4+0x8,%a6,%d0)
2866
addi.l &0xc,%d0
2867
fm_5:
2868
lsl.b &0x1,%d1
2869
bpl.b fm_6
2870
fmovm.x &0x04,(0x4+0x8,%a6,%d0)
2871
addi.l &0xc,%d0
2872
fm_6:
2873
lsl.b &0x1,%d1
2874
bpl.b fm_7
2875
fmovm.x &0x02,(0x4+0x8,%a6,%d0)
2876
addi.l &0xc,%d0
2877
fm_7:
2878
lsl.b &0x1,%d1
2879
bpl.b fm_end
2880
fmovm.x &0x01,(0x4+0x8,%a6,%d0)
2881
fm_end:
2882
mov.l 0x4(%sp),%d1
2883
mov.l 0x8(%sp),%d0
2884
mov.l 0xc(%sp),%a6
2885
mov.l (%sp)+,%sp
2886
2887
btst &0x7,(%sp) # is trace enabled?
2888
beq.l _fpsp_done
2889
bra.l _real_trace
2890
2891
#########################################################################
2892
iea_fmovm_ctrl:
2893
2894
bsr.l fmovm_ctrl # load ctrl regs
2895
2896
iea_fmovm_exit:
2897
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
2898
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2900
2901
btst &0x7,EXC_SR(%a6) # is trace on?
2902
bne.b iea_fmovm_trace # yes
2903
2904
mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905
2906
unlk %a6 # unravel the frame
2907
2908
bra.l _fpsp_done # exit to os
2909
2910
#
2911
# The control reg instruction that took an "Unimplemented Effective Address"
2912
# exception was being traced. The "Current PC" for the trace frame is the
2913
# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914
# After fixing the stack frame, jump to _real_trace().
2915
#
2916
# UNIMP EA FRAME TRACE FRAME
2917
# ***************** *****************
2918
# * 0x0 * 0x0f0 * * Current *
2919
# ***************** * PC *
2920
# * Current * *****************
2921
# * PC * * 0x2 * 0x024 *
2922
# ***************** *****************
2923
# * SR * * Next *
2924
# ***************** * PC *
2925
# *****************
2926
# * SR *
2927
# *****************
2928
# this ain't a pretty solution, but it works:
2929
# -restore a6 (not with unlk)
2930
# -shift stack frame down over where old a6 used to be
2931
# -add LOCAL_SIZE to stack pointer
2932
iea_fmovm_trace:
2933
mov.l (%a6),%a6 # restore frame pointer
2934
mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935
mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936
mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937
mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938
add.l &LOCAL_SIZE,%sp # clear stack frame
2939
2940
bra.l _real_trace
2941
2942
#########################################################################
2943
# The FPU is disabled and so we should really have taken the "Line
2944
# F Emulator" exception. So, here we create an 8-word stack frame
2945
# from our 4-word stack frame. This means we must calculate the length
2946
# the faulting instruction to get the "next PC". This is trivial for
2947
# immediate operands but requires some extra work for fmovm dynamic
2948
# which can use most addressing modes.
2949
iea_disabled:
2950
mov.l (%sp)+,%d0 # restore d0
2951
2952
link %a6,&-LOCAL_SIZE # init stack frame
2953
2954
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
2955
2956
# PC of instruction that took the exception is the PC in the frame
2957
mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
2959
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
2960
bsr.l _imem_read_long # fetch the instruction words
2961
mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
2962
2963
tst.w %d0 # is instr fmovm?
2964
bmi.b iea_dis_fmovm # yes
2965
# instruction is using an extended precision immediate operand. Therefore,
2966
# the total instruction length is 16 bytes.
2967
iea_dis_immed:
2968
mov.l &0x10,%d0 # 16 bytes of instruction
2969
bra.b iea_dis_cont
2970
iea_dis_fmovm:
2971
btst &0xe,%d0 # is instr fmovm ctrl
2972
bne.b iea_dis_fmovm_data # no
2973
# the instruction is a fmovm.l with 2 or 3 registers.
2974
bfextu %d0{&19:&3},%d1
2975
mov.l &0xc,%d0
2976
cmpi.b %d1,&0x7 # move all regs?
2977
bne.b iea_dis_cont
2978
addq.l &0x4,%d0
2979
bra.b iea_dis_cont
2980
# the instruction is an fmovm.x dynamic which can use many addressing
2981
# modes and thus can have several different total instruction lengths.
2982
# call fmovm_calc_ea which will go through the ea calc process and,
2983
# as a by-product, will tell us how long the instruction is.
2984
iea_dis_fmovm_data:
2985
clr.l %d0
2986
bsr.l fmovm_calc_ea
2987
mov.l EXC_EXTWPTR(%a6),%d0
2988
sub.l EXC_PC(%a6),%d0
2989
iea_dis_cont:
2990
mov.w %d0,EXC_VOFF(%a6) # store stack shift value
2991
2992
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
2993
2994
unlk %a6
2995
2996
# here, we actually create the 8-word frame from the 4-word frame,
2997
# with the "next PC" as additional info.
2998
# the <ea> field is let as undefined.
2999
subq.l &0x8,%sp # make room for new stack
3000
mov.l %d0,-(%sp) # save d0
3001
mov.w 0xc(%sp),0x4(%sp) # move SR
3002
mov.l 0xe(%sp),0x6(%sp) # move Current PC
3003
clr.l %d0
3004
mov.w 0x12(%sp),%d0
3005
mov.l 0x6(%sp),0x10(%sp) # move Current PC
3006
add.l %d0,0x6(%sp) # make Next PC
3007
mov.w &0x402c,0xa(%sp) # insert offset,frame format
3008
mov.l (%sp)+,%d0 # restore d0
3009
3010
bra.l _real_fpu_disabled
3011
3012
##########
3013
3014
iea_iacc:
3015
movc %pcr,%d0
3016
btst &0x1,%d0
3017
bne.b iea_iacc_cont
3018
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3020
iea_iacc_cont:
3021
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3022
3023
unlk %a6
3024
3025
subq.w &0x8,%sp # make stack frame bigger
3026
mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
3027
mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
3028
mov.w &0x4008,0x6(%sp) # store voff
3029
mov.l 0x2(%sp),0x8(%sp) # store ea
3030
mov.l &0x09428001,0xc(%sp) # store fslw
3031
3032
iea_acc_done:
3033
btst &0x5,(%sp) # user or supervisor mode?
3034
beq.b iea_acc_done2 # user
3035
bset &0x2,0xd(%sp) # set supervisor TM bit
3036
3037
iea_acc_done2:
3038
bra.l _real_access
3039
3040
iea_dacc:
3041
lea -LOCAL_SIZE(%a6),%sp
3042
3043
movc %pcr,%d1
3044
btst &0x1,%d1
3045
bne.b iea_dacc_cont
3046
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
3047
fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048
iea_dacc_cont:
3049
mov.l (%a6),%a6
3050
3051
mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052
mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053
mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054
mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055
mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056
mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057
3058
movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059
add.w &LOCAL_SIZE-0x4,%sp
3060
3061
bra.b iea_acc_done
3062
3063
#########################################################################
3064
# XDEF **************************************************************** #
3065
# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
3066
# #
3067
# This handler should be the first code executed upon taking the #
3068
# FP Operand Error exception in an operating system. #
3069
# #
3070
# XREF **************************************************************** #
3071
# _imem_read_long() - read instruction longword #
3072
# fix_skewed_ops() - adjust src operand in fsave frame #
3073
# _real_operr() - "callout" to operating system operr handler #
3074
# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3075
# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3076
# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3077
# #
3078
# INPUT *************************************************************** #
3079
# - The system stack contains the FP Operr exception frame #
3080
# - The fsave frame contains the source operand #
3081
# #
3082
# OUTPUT ************************************************************** #
3083
# No access error: #
3084
# - The system stack is unchanged #
3085
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3086
# #
3087
# ALGORITHM *********************************************************** #
3088
# In a system where the FP Operr exception is enabled, the goal #
3089
# is to get to the handler specified at _real_operr(). But, on the 060, #
3090
# for opclass zero and two instruction taking this exception, the #
3091
# input operand in the fsave frame may be incorrect for some cases #
3092
# and needs to be corrected. This handler calls fix_skewed_ops() to #
3093
# do just this and then exits through _real_operr(). #
3094
# For opclass 3 instructions, the 060 doesn't store the default #
3095
# operr result out to memory or data register file as it should. #
3096
# This code must emulate the move out before finally exiting through #
3097
# _real_inex(). The move out, if to memory, is performed using #
3098
# _mem_write() "callout" routines that may return a failing result. #
3099
# In this special case, the handler must exit through facc_out() #
3100
# which creates an access error stack frame from the current operr #
3101
# stack frame. #
3102
# #
3103
#########################################################################
3104
3105
global _fpsp_operr
3106
_fpsp_operr:
3107
3108
link.w %a6,&-LOCAL_SIZE # init stack frame
3109
3110
fsave FP_SRC(%a6) # grab the "busy" frame
3111
3112
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3113
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3115
3116
# the FPIAR holds the "current PC" of the faulting instruction
3117
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118
3119
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3120
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3121
bsr.l _imem_read_long # fetch the instruction words
3122
mov.l %d0,EXC_OPWORD(%a6)
3123
3124
##############################################################################
3125
3126
btst &13,%d0 # is instr an fmove out?
3127
bne.b foperr_out # fmove out
3128
3129
3130
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131
# this would be the case for opclass two operations with a source infinity or
3132
# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133
# cause an operr so we don't need to check for them here.
3134
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3135
bsr.l fix_skewed_ops # fix src op
3136
3137
foperr_exit:
3138
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3139
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3141
3142
frestore FP_SRC(%a6)
3143
3144
unlk %a6
3145
bra.l _real_operr
3146
3147
########################################################################
3148
3149
#
3150
# the hardware does not save the default result to memory on enabled
3151
# operand error exceptions. we do this here before passing control to
3152
# the user operand error handler.
3153
#
3154
# byte, word, and long destination format operations can pass
3155
# through here. we simply need to test the sign of the src
3156
# operand and save the appropriate minimum or maximum integer value
3157
# to the effective address as pointed to by the stacked effective address.
3158
#
3159
# although packed opclass three operations can take operand error
3160
# exceptions, they won't pass through here since they are caught
3161
# first by the unsupported data format exception handler. that handler
3162
# sends them directly to _real_operr() if necessary.
3163
#
3164
foperr_out:
3165
3166
mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
3167
andi.w &0x7fff,%d1
3168
cmpi.w %d1,&0x7fff
3169
bne.b foperr_out_not_qnan
3170
# the operand is either an infinity or a QNAN.
3171
tst.l FP_SRC_LO(%a6)
3172
bne.b foperr_out_qnan
3173
mov.l FP_SRC_HI(%a6),%d1
3174
andi.l &0x7fffffff,%d1
3175
beq.b foperr_out_not_qnan
3176
foperr_out_qnan:
3177
mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
3178
bra.b foperr_out_jmp
3179
3180
foperr_out_not_qnan:
3181
mov.l &0x7fffffff,%d1
3182
tst.b FP_SRC_EX(%a6)
3183
bpl.b foperr_out_not_qnan2
3184
addq.l &0x1,%d1
3185
foperr_out_not_qnan2:
3186
mov.l %d1,L_SCR1(%a6)
3187
3188
foperr_out_jmp:
3189
bfextu %d0{&19:&3},%d0 # extract dst format field
3190
mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3191
mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
3192
jmp (tbl_operr.b,%pc,%a0)
3193
3194
tbl_operr:
3195
short foperr_out_l - tbl_operr # long word integer
3196
short tbl_operr - tbl_operr # sgl prec shouldn't happen
3197
short tbl_operr - tbl_operr # ext prec shouldn't happen
3198
short foperr_exit - tbl_operr # packed won't enter here
3199
short foperr_out_w - tbl_operr # word integer
3200
short tbl_operr - tbl_operr # dbl prec shouldn't happen
3201
short foperr_out_b - tbl_operr # byte integer
3202
short tbl_operr - tbl_operr # packed won't enter here
3203
3204
foperr_out_b:
3205
mov.b L_SCR1(%a6),%d0 # load positive default result
3206
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3207
ble.b foperr_out_b_save_dn # yes
3208
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3209
bsr.l _dmem_write_byte # write the default result
3210
3211
tst.l %d1 # did dstore fail?
3212
bne.l facc_out_b # yes
3213
3214
bra.w foperr_exit
3215
foperr_out_b_save_dn:
3216
andi.w &0x0007,%d1
3217
bsr.l store_dreg_b # store result to regfile
3218
bra.w foperr_exit
3219
3220
foperr_out_w:
3221
mov.w L_SCR1(%a6),%d0 # load positive default result
3222
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3223
ble.b foperr_out_w_save_dn # yes
3224
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3225
bsr.l _dmem_write_word # write the default result
3226
3227
tst.l %d1 # did dstore fail?
3228
bne.l facc_out_w # yes
3229
3230
bra.w foperr_exit
3231
foperr_out_w_save_dn:
3232
andi.w &0x0007,%d1
3233
bsr.l store_dreg_w # store result to regfile
3234
bra.w foperr_exit
3235
3236
foperr_out_l:
3237
mov.l L_SCR1(%a6),%d0 # load positive default result
3238
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3239
ble.b foperr_out_l_save_dn # yes
3240
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3241
bsr.l _dmem_write_long # write the default result
3242
3243
tst.l %d1 # did dstore fail?
3244
bne.l facc_out_l # yes
3245
3246
bra.w foperr_exit
3247
foperr_out_l_save_dn:
3248
andi.w &0x0007,%d1
3249
bsr.l store_dreg_l # store result to regfile
3250
bra.w foperr_exit
3251
3252
#########################################################################
3253
# XDEF **************************************************************** #
3254
# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
3255
# #
3256
# This handler should be the first code executed upon taking the #
3257
# FP Signalling NAN exception in an operating system. #
3258
# #
3259
# XREF **************************************************************** #
3260
# _imem_read_long() - read instruction longword #
3261
# fix_skewed_ops() - adjust src operand in fsave frame #
3262
# _real_snan() - "callout" to operating system SNAN handler #
3263
# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
3264
# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
3265
# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
3266
# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
3267
# #
3268
# INPUT *************************************************************** #
3269
# - The system stack contains the FP SNAN exception frame #
3270
# - The fsave frame contains the source operand #
3271
# #
3272
# OUTPUT ************************************************************** #
3273
# No access error: #
3274
# - The system stack is unchanged #
3275
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3276
# #
3277
# ALGORITHM *********************************************************** #
3278
# In a system where the FP SNAN exception is enabled, the goal #
3279
# is to get to the handler specified at _real_snan(). But, on the 060, #
3280
# for opclass zero and two instructions taking this exception, the #
3281
# input operand in the fsave frame may be incorrect for some cases #
3282
# and needs to be corrected. This handler calls fix_skewed_ops() to #
3283
# do just this and then exits through _real_snan(). #
3284
# For opclass 3 instructions, the 060 doesn't store the default #
3285
# SNAN result out to memory or data register file as it should. #
3286
# This code must emulate the move out before finally exiting through #
3287
# _real_snan(). The move out, if to memory, is performed using #
3288
# _mem_write() "callout" routines that may return a failing result. #
3289
# In this special case, the handler must exit through facc_out() #
3290
# which creates an access error stack frame from the current SNAN #
3291
# stack frame. #
3292
# For the case of an extended precision opclass 3 instruction, #
3293
# if the effective addressing mode was -() or ()+, then the address #
3294
# register must get updated by calling _calc_ea_fout(). If the <ea> #
3295
# was -(a7) from supervisor mode, then the exception frame currently #
3296
# on the system stack must be carefully moved "down" to make room #
3297
# for the operand being moved. #
3298
# #
3299
#########################################################################
3300
3301
global _fpsp_snan
3302
_fpsp_snan:
3303
3304
link.w %a6,&-LOCAL_SIZE # init stack frame
3305
3306
fsave FP_SRC(%a6) # grab the "busy" frame
3307
3308
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3309
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3311
3312
# the FPIAR holds the "current PC" of the faulting instruction
3313
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314
3315
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3316
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3317
bsr.l _imem_read_long # fetch the instruction words
3318
mov.l %d0,EXC_OPWORD(%a6)
3319
3320
##############################################################################
3321
3322
btst &13,%d0 # is instr an fmove out?
3323
bne.w fsnan_out # fmove out
3324
3325
3326
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327
# this would be the case for opclass two operations with a source infinity or
3328
# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329
# fixed here.
3330
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3331
bsr.l fix_skewed_ops # fix src op
3332
3333
fsnan_exit:
3334
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3335
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3337
3338
frestore FP_SRC(%a6)
3339
3340
unlk %a6
3341
bra.l _real_snan
3342
3343
########################################################################
3344
3345
#
3346
# the hardware does not save the default result to memory on enabled
3347
# snan exceptions. we do this here before passing control to
3348
# the user snan handler.
3349
#
3350
# byte, word, long, and packed destination format operations can pass
3351
# through here. since packed format operations already were handled by
3352
# fpsp_unsupp(), then we need to do nothing else for them here.
3353
# for byte, word, and long, we simply need to test the sign of the src
3354
# operand and save the appropriate minimum or maximum integer value
3355
# to the effective address as pointed to by the stacked effective address.
3356
#
3357
fsnan_out:
3358
3359
bfextu %d0{&19:&3},%d0 # extract dst format field
3360
mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
3361
mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
3362
jmp (tbl_snan.b,%pc,%a0)
3363
3364
tbl_snan:
3365
short fsnan_out_l - tbl_snan # long word integer
3366
short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367
short fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368
short tbl_snan - tbl_snan # packed needs no help
3369
short fsnan_out_w - tbl_snan # word integer
3370
short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371
short fsnan_out_b - tbl_snan # byte integer
3372
short tbl_snan - tbl_snan # packed needs no help
3373
3374
fsnan_out_b:
3375
mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
3376
bset &6,%d0 # set SNAN bit
3377
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3378
ble.b fsnan_out_b_dn # yes
3379
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3380
bsr.l _dmem_write_byte # write the default result
3381
3382
tst.l %d1 # did dstore fail?
3383
bne.l facc_out_b # yes
3384
3385
bra.w fsnan_exit
3386
fsnan_out_b_dn:
3387
andi.w &0x0007,%d1
3388
bsr.l store_dreg_b # store result to regfile
3389
bra.w fsnan_exit
3390
3391
fsnan_out_w:
3392
mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
3393
bset &14,%d0 # set SNAN bit
3394
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3395
ble.b fsnan_out_w_dn # yes
3396
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3397
bsr.l _dmem_write_word # write the default result
3398
3399
tst.l %d1 # did dstore fail?
3400
bne.l facc_out_w # yes
3401
3402
bra.w fsnan_exit
3403
fsnan_out_w_dn:
3404
andi.w &0x0007,%d1
3405
bsr.l store_dreg_w # store result to regfile
3406
bra.w fsnan_exit
3407
3408
fsnan_out_l:
3409
mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
3410
bset &30,%d0 # set SNAN bit
3411
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3412
ble.b fsnan_out_l_dn # yes
3413
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3414
bsr.l _dmem_write_long # write the default result
3415
3416
tst.l %d1 # did dstore fail?
3417
bne.l facc_out_l # yes
3418
3419
bra.w fsnan_exit
3420
fsnan_out_l_dn:
3421
andi.w &0x0007,%d1
3422
bsr.l store_dreg_l # store result to regfile
3423
bra.w fsnan_exit
3424
3425
fsnan_out_s:
3426
cmpi.b %d1,&0x7 # is <ea> mode a data reg?
3427
ble.b fsnan_out_d_dn # yes
3428
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3429
andi.l &0x80000000,%d0 # keep sign
3430
ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3431
mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3432
lsr.l &0x8,%d1 # shift mantissa for sgl
3433
or.l %d1,%d0 # create sgl SNAN
3434
mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
3435
bsr.l _dmem_write_long # write the default result
3436
3437
tst.l %d1 # did dstore fail?
3438
bne.l facc_out_l # yes
3439
3440
bra.w fsnan_exit
3441
fsnan_out_d_dn:
3442
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3443
andi.l &0x80000000,%d0 # keep sign
3444
ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
3445
mov.l %d1,-(%sp)
3446
mov.l FP_SRC_HI(%a6),%d1 # load mantissa
3447
lsr.l &0x8,%d1 # shift mantissa for sgl
3448
or.l %d1,%d0 # create sgl SNAN
3449
mov.l (%sp)+,%d1
3450
andi.w &0x0007,%d1
3451
bsr.l store_dreg_l # store result to regfile
3452
bra.w fsnan_exit
3453
3454
fsnan_out_d:
3455
mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
3456
andi.l &0x80000000,%d0 # keep sign
3457
ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
3458
mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3459
mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
3460
mov.l &11,%d0 # load shift amt
3461
lsr.l %d0,%d1
3462
or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
3463
mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
3464
andi.l &0x000007ff,%d1
3465
ror.l %d0,%d1
3466
mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
3467
mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
3468
lsr.l %d0,%d1
3469
or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
3470
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3471
mov.l EXC_EA(%a6),%a1 # pass: dst addr
3472
movq.l &0x8,%d0 # pass: size of 8 bytes
3473
bsr.l _dmem_write # write the default result
3474
3475
tst.l %d1 # did dstore fail?
3476
bne.l facc_out_d # yes
3477
3478
bra.w fsnan_exit
3479
3480
# for extended precision, if the addressing mode is pre-decrement or
3481
# post-increment, then the address register did not get updated.
3482
# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483
fsnan_out_x:
3484
clr.b SPCOND_FLG(%a6) # clear special case flag
3485
3486
mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487
clr.w 2+FP_SCR0(%a6)
3488
mov.l FP_SRC_HI(%a6),%d0
3489
bset &30,%d0
3490
mov.l %d0,FP_SCR0_HI(%a6)
3491
mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492
3493
btst &0x5,EXC_SR(%a6) # supervisor mode exception?
3494
bne.b fsnan_out_x_s # yes
3495
3496
mov.l %usp,%a0 # fetch user stack pointer
3497
mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
3498
mov.l (%a6),EXC_A6(%a6)
3499
3500
bsr.l _calc_ea_fout # find the correct ea,update An
3501
mov.l %a0,%a1
3502
mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3503
3504
mov.l EXC_A7(%a6),%a0
3505
mov.l %a0,%usp # restore user stack pointer
3506
mov.l EXC_A6(%a6),(%a6)
3507
3508
fsnan_out_x_save:
3509
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
3510
movq.l &0xc,%d0 # pass: size of extended
3511
bsr.l _dmem_write # write the default result
3512
3513
tst.l %d1 # did dstore fail?
3514
bne.l facc_out_x # yes
3515
3516
bra.w fsnan_exit
3517
3518
fsnan_out_x_s:
3519
mov.l (%a6),EXC_A6(%a6)
3520
3521
bsr.l _calc_ea_fout # find the correct ea,update An
3522
mov.l %a0,%a1
3523
mov.l %a0,EXC_EA(%a6) # stack correct <ea>
3524
3525
mov.l EXC_A6(%a6),(%a6)
3526
3527
cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528
bne.b fsnan_out_x_save # no
3529
3530
# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3532
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3534
3535
frestore FP_SRC(%a6)
3536
3537
mov.l EXC_A6(%a6),%a6 # restore frame pointer
3538
3539
mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540
mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541
mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542
3543
mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544
mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545
mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546
3547
add.l &LOCAL_SIZE-0x8,%sp
3548
3549
bra.l _real_snan
3550
3551
#########################################################################
3552
# XDEF **************************************************************** #
3553
# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
3554
# #
3555
# This handler should be the first code executed upon taking the #
3556
# FP Inexact exception in an operating system. #
3557
# #
3558
# XREF **************************************************************** #
3559
# _imem_read_long() - read instruction longword #
3560
# fix_skewed_ops() - adjust src operand in fsave frame #
3561
# set_tag_x() - determine optype of src/dst operands #
3562
# store_fpreg() - store opclass 0 or 2 result to FP regfile #
3563
# unnorm_fix() - change UNNORM operands to NORM or ZERO #
3564
# load_fpn2() - load dst operand from FP regfile #
3565
# smovcr() - emulate an "fmovcr" instruction #
3566
# fout() - emulate an opclass 3 instruction #
3567
# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3568
# _real_inex() - "callout" to operating system inexact handler #
3569
# #
3570
# INPUT *************************************************************** #
3571
# - The system stack contains the FP Inexact exception frame #
3572
# - The fsave frame contains the source operand #
3573
# #
3574
# OUTPUT ************************************************************** #
3575
# - The system stack is unchanged #
3576
# - The fsave frame contains the adjusted src op for opclass 0,2 #
3577
# #
3578
# ALGORITHM *********************************************************** #
3579
# In a system where the FP Inexact exception is enabled, the goal #
3580
# is to get to the handler specified at _real_inex(). But, on the 060, #
3581
# for opclass zero and two instruction taking this exception, the #
3582
# hardware doesn't store the correct result to the destination FP #
3583
# register as did the '040 and '881/2. This handler must emulate the #
3584
# instruction in order to get this value and then store it to the #
3585
# correct register before calling _real_inex(). #
3586
# For opclass 3 instructions, the 060 doesn't store the default #
3587
# inexact result out to memory or data register file as it should. #
3588
# This code must emulate the move out by calling fout() before finally #
3589
# exiting through _real_inex(). #
3590
# #
3591
#########################################################################
3592
3593
global _fpsp_inex
3594
_fpsp_inex:
3595
3596
link.w %a6,&-LOCAL_SIZE # init stack frame
3597
3598
fsave FP_SRC(%a6) # grab the "busy" frame
3599
3600
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3601
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3603
3604
# the FPIAR holds the "current PC" of the faulting instruction
3605
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606
3607
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3608
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3609
bsr.l _imem_read_long # fetch the instruction words
3610
mov.l %d0,EXC_OPWORD(%a6)
3611
3612
##############################################################################
3613
3614
btst &13,%d0 # is instr an fmove out?
3615
bne.w finex_out # fmove out
3616
3617
3618
# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619
# longword integer directly into the upper longword of the mantissa along
3620
# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621
bfextu %d0{&19:&3},%d0 # fetch instr size
3622
bne.b finex_cont # instr size is not long
3623
cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
3624
bne.b finex_cont # no
3625
fmov.l &0x0,%fpcr
3626
fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
3627
fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
3628
mov.w &0xe001,0x2+FP_SRC(%a6)
3629
3630
finex_cont:
3631
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3632
bsr.l fix_skewed_ops # fix src op
3633
3634
# Here, we zero the ccode and exception byte field since we're going to
3635
# emulate the whole instruction. Notice, though, that we don't kill the
3636
# INEX1 bit. This is because a packed op has long since been converted
3637
# to extended before arriving here. Therefore, we need to retain the
3638
# INEX1 bit from when the operand was first converted.
3639
andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640
3641
fmov.l &0x0,%fpcr # zero current control regs
3642
fmov.l &0x0,%fpsr
3643
3644
bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645
cmpi.b %d1,&0x17 # is op an fmovecr?
3646
beq.w finex_fmovcr # yes
3647
3648
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3649
bsr.l set_tag_x # tag the operand type
3650
mov.b %d0,STAG(%a6) # maybe NORM,DENORM
3651
3652
# bits four and five of the fp extension word separate the monadic and dyadic
3653
# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654
# will never take this exception, but fsincos will.
3655
btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
3656
beq.b finex_extract # monadic
3657
3658
btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
3659
bne.b finex_extract # yes
3660
3661
bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662
bsr.l load_fpn2 # load dst into FP_DST
3663
3664
lea FP_DST(%a6),%a0 # pass: ptr to dst op
3665
bsr.l set_tag_x # tag the operand type
3666
cmpi.b %d0,&UNNORM # is operand an UNNORM?
3667
bne.b finex_op2_done # no
3668
bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
3669
finex_op2_done:
3670
mov.b %d0,DTAG(%a6) # save dst optype tag
3671
3672
finex_extract:
3673
clr.l %d0
3674
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
3675
3676
mov.b 1+EXC_CMDREG(%a6),%d1
3677
andi.w &0x007f,%d1 # extract extension
3678
3679
lea FP_SRC(%a6),%a0
3680
lea FP_DST(%a6),%a1
3681
3682
mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683
jsr (tbl_unsupp.l,%pc,%d1.l*1)
3684
3685
# the operation has been emulated. the result is in fp0.
3686
finex_save:
3687
bfextu EXC_CMDREG(%a6){&6:&3},%d0
3688
bsr.l store_fpreg
3689
3690
finex_exit:
3691
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3692
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3694
3695
frestore FP_SRC(%a6)
3696
3697
unlk %a6
3698
bra.l _real_inex
3699
3700
finex_fmovcr:
3701
clr.l %d0
3702
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3703
mov.b 1+EXC_CMDREG(%a6),%d1
3704
andi.l &0x0000007f,%d1 # pass rom offset
3705
bsr.l smovcr
3706
bra.b finex_save
3707
3708
########################################################################
3709
3710
#
3711
# the hardware does not save the default result to memory on enabled
3712
# inexact exceptions. we do this here before passing control to
3713
# the user inexact handler.
3714
#
3715
# byte, word, and long destination format operations can pass
3716
# through here. so can double and single precision.
3717
# although packed opclass three operations can take inexact
3718
# exceptions, they won't pass through here since they are caught
3719
# first by the unsupported data format exception handler. that handler
3720
# sends them directly to _real_inex() if necessary.
3721
#
3722
finex_out:
3723
3724
mov.b &NORM,STAG(%a6) # src is a NORM
3725
3726
clr.l %d0
3727
mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
3728
3729
andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
3730
3731
lea FP_SRC(%a6),%a0 # pass ptr to src operand
3732
3733
bsr.l fout # store the default result
3734
3735
bra.b finex_exit
3736
3737
#########################################################################
3738
# XDEF **************************************************************** #
3739
# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
3740
# #
3741
# This handler should be the first code executed upon taking #
3742
# the FP DZ exception in an operating system. #
3743
# #
3744
# XREF **************************************************************** #
3745
# _imem_read_long() - read instruction longword from memory #
3746
# fix_skewed_ops() - adjust fsave operand #
3747
# _real_dz() - "callout" exit point from FP DZ handler #
3748
# #
3749
# INPUT *************************************************************** #
3750
# - The system stack contains the FP DZ exception stack. #
3751
# - The fsave frame contains the source operand. #
3752
# #
3753
# OUTPUT ************************************************************** #
3754
# - The system stack contains the FP DZ exception stack. #
3755
# - The fsave frame contains the adjusted source operand. #
3756
# #
3757
# ALGORITHM *********************************************************** #
3758
# In a system where the DZ exception is enabled, the goal is to #
3759
# get to the handler specified at _real_dz(). But, on the 060, when the #
3760
# exception is taken, the input operand in the fsave state frame may #
3761
# be incorrect for some cases and need to be adjusted. So, this package #
3762
# adjusts the operand using fix_skewed_ops() and then branches to #
3763
# _real_dz(). #
3764
# #
3765
#########################################################################
3766
3767
global _fpsp_dz
3768
_fpsp_dz:
3769
3770
link.w %a6,&-LOCAL_SIZE # init stack frame
3771
3772
fsave FP_SRC(%a6) # grab the "busy" frame
3773
3774
movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
3775
fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776
fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
3777
3778
# the FPIAR holds the "current PC" of the faulting instruction
3779
mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780
3781
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
3782
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
3783
bsr.l _imem_read_long # fetch the instruction words
3784
mov.l %d0,EXC_OPWORD(%a6)
3785
3786
##############################################################################
3787
3788
3789
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790
# this would be the case for opclass two operations with a source zero
3791
# in the sgl or dbl format.
3792
lea FP_SRC(%a6),%a0 # pass: ptr to src op
3793
bsr.l fix_skewed_ops # fix src op
3794
3795
fdz_exit:
3796
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
3797
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
3799
3800
frestore FP_SRC(%a6)
3801
3802
unlk %a6
3803
bra.l _real_dz
3804
3805
#########################################################################
3806
# XDEF **************************************************************** #
3807
# _fpsp_fline(): 060FPSP entry point for "Line F emulator" #
3808
# exception when the "reduced" version of the #
3809
# FPSP is implemented that does not emulate #
3810
# FP unimplemented instructions. #
3811
# #
3812
# This handler should be the first code executed upon taking a #
3813
# "Line F Emulator" exception in an operating system integrating #
3814
# the reduced version of 060FPSP. #
3815
# #
3816
# XREF **************************************************************** #
3817
# _real_fpu_disabled() - Handle "FPU disabled" exceptions #
3818
# _real_fline() - Handle all other cases (treated equally) #
3819
# #
3820
# INPUT *************************************************************** #
3821
# - The system stack contains a "Line F Emulator" exception #
3822
# stack frame. #
3823
# #
3824
# OUTPUT ************************************************************** #
3825
# - The system stack is unchanged. #
3826
# #
3827
# ALGORITHM *********************************************************** #
3828
# When a "Line F Emulator" exception occurs in a system where #
3829
# "FPU Unimplemented" instructions will not be emulated, the exception #
3830
# can occur because then FPU is disabled or the instruction is to be #
3831
# classifed as "Line F". This module determines which case exists and #
3832
# calls the appropriate "callout". #
3833
# #
3834
#########################################################################
3835
3836
global _fpsp_fline
3837
_fpsp_fline:
3838
3839
# check to see if the FPU is disabled. if so, jump to the OS entry
3840
# point for that condition.
3841
cmpi.w 0x6(%sp),&0x402c
3842
beq.l _real_fpu_disabled
3843
3844
bra.l _real_fline
3845
3846
#########################################################################
3847
# XDEF **************************************************************** #
3848
# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
3849
# #
3850
# XREF **************************************************************** #
3851
# inc_areg() - increment an address register #
3852
# dec_areg() - decrement an address register #
3853
# #
3854
# INPUT *************************************************************** #
3855
# d0 = number of bytes to adjust <ea> by #
3856
# #
3857
# OUTPUT ************************************************************** #
3858
# None #
3859
# #
3860
# ALGORITHM *********************************************************** #
3861
# "Dummy" CALCulate Effective Address: #
3862
# The stacked <ea> for FP unimplemented instructions and opclass #
3863
# two packed instructions is correct with the exception of... #
3864
# #
3865
# 1) -(An) : The register is not updated regardless of size. #
3866
# Also, for extended precision and packed, the #
3867
# stacked <ea> value is 8 bytes too big #
3868
# 2) (An)+ : The register is not updated. #
3869
# 3) #<data> : The upper longword of the immediate operand is #
3870
# stacked b,w,l and s sizes are completely stacked. #
3871
# d,x, and p are not. #
3872
# #
3873
#########################################################################
3874
3875
global _dcalc_ea
3876
_dcalc_ea:
3877
mov.l %d0, %a0 # move # bytes to %a0
3878
3879
mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
3880
mov.l %d0, %d1 # make a copy
3881
3882
andi.w &0x38, %d0 # extract mode field
3883
andi.l &0x7, %d1 # extract reg field
3884
3885
cmpi.b %d0,&0x18 # is mode (An)+ ?
3886
beq.b dcea_pi # yes
3887
3888
cmpi.b %d0,&0x20 # is mode -(An) ?
3889
beq.b dcea_pd # yes
3890
3891
or.w %d1,%d0 # concat mode,reg
3892
cmpi.b %d0,&0x3c # is mode #<data>?
3893
3894
beq.b dcea_imm # yes
3895
3896
mov.l EXC_EA(%a6),%a0 # return <ea>
3897
rts
3898
3899
# need to set immediate data flag here since we'll need to do
3900
# an imem_read to fetch this later.
3901
dcea_imm:
3902
mov.b &immed_flg,SPCOND_FLG(%a6)
3903
lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904
rts
3905
3906
# here, the <ea> is stacked correctly. however, we must update the
3907
# address register...
3908
dcea_pi:
3909
mov.l %a0,%d0 # pass amt to inc by
3910
bsr.l inc_areg # inc addr register
3911
3912
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3913
rts
3914
3915
# the <ea> is stacked correctly for all but extended and packed which
3916
# the <ea>s are 8 bytes too large.
3917
# it would make no sense to have a pre-decrement to a7 in supervisor
3918
# mode so we don't even worry about this tricky case here : )
3919
dcea_pd:
3920
mov.l %a0,%d0 # pass amt to dec by
3921
bsr.l dec_areg # dec addr register
3922
3923
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3924
3925
cmpi.b %d0,&0xc # is opsize ext or packed?
3926
beq.b dcea_pd2 # yes
3927
rts
3928
dcea_pd2:
3929
sub.l &0x8,%a0 # correct <ea>
3930
mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
3931
rts
3932
3933
#########################################################################
3934
# XDEF **************************************************************** #
3935
# _calc_ea_fout(): calculate correct stacked <ea> for extended #
3936
# and packed data opclass 3 operations. #
3937
# #
3938
# XREF **************************************************************** #
3939
# None #
3940
# #
3941
# INPUT *************************************************************** #
3942
# None #
3943
# #
3944
# OUTPUT ************************************************************** #
3945
# a0 = return correct effective address #
3946
# #
3947
# ALGORITHM *********************************************************** #
3948
# For opclass 3 extended and packed data operations, the <ea> #
3949
# stacked for the exception is incorrect for -(an) and (an)+ addressing #
3950
# modes. Also, while we're at it, the index register itself must get #
3951
# updated. #
3952
# So, for -(an), we must subtract 8 off of the stacked <ea> value #
3953
# and return that value as the correct <ea> and store that value in An. #
3954
# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
3955
# #
3956
#########################################################################
3957
3958
# This calc_ea is currently used to retrieve the correct <ea>
3959
# for fmove outs of type extended and packed.
3960
global _calc_ea_fout
3961
_calc_ea_fout:
3962
mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
3963
mov.l %d0,%d1 # make a copy
3964
3965
andi.w &0x38,%d0 # extract mode field
3966
andi.l &0x7,%d1 # extract reg field
3967
3968
cmpi.b %d0,&0x18 # is mode (An)+ ?
3969
beq.b ceaf_pi # yes
3970
3971
cmpi.b %d0,&0x20 # is mode -(An) ?
3972
beq.w ceaf_pd # yes
3973
3974
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
3975
rts
3976
3977
# (An)+ : extended and packed fmove out
3978
# : stacked <ea> is correct
3979
# : "An" not updated
3980
ceaf_pi:
3981
mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982
mov.l EXC_EA(%a6),%a0
3983
jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
3984
3985
swbeg &0x8
3986
tbl_ceaf_pi:
3987
short ceaf_pi0 - tbl_ceaf_pi
3988
short ceaf_pi1 - tbl_ceaf_pi
3989
short ceaf_pi2 - tbl_ceaf_pi
3990
short ceaf_pi3 - tbl_ceaf_pi
3991
short ceaf_pi4 - tbl_ceaf_pi
3992
short ceaf_pi5 - tbl_ceaf_pi
3993
short ceaf_pi6 - tbl_ceaf_pi
3994
short ceaf_pi7 - tbl_ceaf_pi
3995
3996
ceaf_pi0:
3997
addi.l &0xc,EXC_DREGS+0x8(%a6)
3998
rts
3999
ceaf_pi1:
4000
addi.l &0xc,EXC_DREGS+0xc(%a6)
4001
rts
4002
ceaf_pi2:
4003
add.l &0xc,%a2
4004
rts
4005
ceaf_pi3:
4006
add.l &0xc,%a3
4007
rts
4008
ceaf_pi4:
4009
add.l &0xc,%a4
4010
rts
4011
ceaf_pi5:
4012
add.l &0xc,%a5
4013
rts
4014
ceaf_pi6:
4015
addi.l &0xc,EXC_A6(%a6)
4016
rts
4017
ceaf_pi7:
4018
mov.b &mia7_flg,SPCOND_FLG(%a6)
4019
addi.l &0xc,EXC_A7(%a6)
4020
rts
4021
4022
# -(An) : extended and packed fmove out
4023
# : stacked <ea> = actual <ea> + 8
4024
# : "An" not updated
4025
ceaf_pd:
4026
mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027
mov.l EXC_EA(%a6),%a0
4028
sub.l &0x8,%a0
4029
sub.l &0x8,EXC_EA(%a6)
4030
jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
4031
4032
swbeg &0x8
4033
tbl_ceaf_pd:
4034
short ceaf_pd0 - tbl_ceaf_pd
4035
short ceaf_pd1 - tbl_ceaf_pd
4036
short ceaf_pd2 - tbl_ceaf_pd
4037
short ceaf_pd3 - tbl_ceaf_pd
4038
short ceaf_pd4 - tbl_ceaf_pd
4039
short ceaf_pd5 - tbl_ceaf_pd
4040
short ceaf_pd6 - tbl_ceaf_pd
4041
short ceaf_pd7 - tbl_ceaf_pd
4042
4043
ceaf_pd0:
4044
mov.l %a0,EXC_DREGS+0x8(%a6)
4045
rts
4046
ceaf_pd1:
4047
mov.l %a0,EXC_DREGS+0xc(%a6)
4048
rts
4049
ceaf_pd2:
4050
mov.l %a0,%a2
4051
rts
4052
ceaf_pd3:
4053
mov.l %a0,%a3
4054
rts
4055
ceaf_pd4:
4056
mov.l %a0,%a4
4057
rts
4058
ceaf_pd5:
4059
mov.l %a0,%a5
4060
rts
4061
ceaf_pd6:
4062
mov.l %a0,EXC_A6(%a6)
4063
rts
4064
ceaf_pd7:
4065
mov.l %a0,EXC_A7(%a6)
4066
mov.b &mda7_flg,SPCOND_FLG(%a6)
4067
rts
4068
4069
#
4070
# This table holds the offsets of the emulation routines for each individual
4071
# math operation relative to the address of this table. Included are
4072
# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073
# this table is for the version if the 060FPSP without transcendentals.
4074
# The location within the table is determined by the extension bits of the
4075
# operation longword.
4076
#
4077
4078
swbeg &109
4079
tbl_unsupp:
4080
long fin - tbl_unsupp # 00: fmove
4081
long fint - tbl_unsupp # 01: fint
4082
long tbl_unsupp - tbl_unsupp # 02: fsinh
4083
long fintrz - tbl_unsupp # 03: fintrz
4084
long fsqrt - tbl_unsupp # 04: fsqrt
4085
long tbl_unsupp - tbl_unsupp
4086
long tbl_unsupp - tbl_unsupp # 06: flognp1
4087
long tbl_unsupp - tbl_unsupp
4088
long tbl_unsupp - tbl_unsupp # 08: fetoxm1
4089
long tbl_unsupp - tbl_unsupp # 09: ftanh
4090
long tbl_unsupp - tbl_unsupp # 0a: fatan
4091
long tbl_unsupp - tbl_unsupp
4092
long tbl_unsupp - tbl_unsupp # 0c: fasin
4093
long tbl_unsupp - tbl_unsupp # 0d: fatanh
4094
long tbl_unsupp - tbl_unsupp # 0e: fsin
4095
long tbl_unsupp - tbl_unsupp # 0f: ftan
4096
long tbl_unsupp - tbl_unsupp # 10: fetox
4097
long tbl_unsupp - tbl_unsupp # 11: ftwotox
4098
long tbl_unsupp - tbl_unsupp # 12: ftentox
4099
long tbl_unsupp - tbl_unsupp
4100
long tbl_unsupp - tbl_unsupp # 14: flogn
4101
long tbl_unsupp - tbl_unsupp # 15: flog10
4102
long tbl_unsupp - tbl_unsupp # 16: flog2
4103
long tbl_unsupp - tbl_unsupp
4104
long fabs - tbl_unsupp # 18: fabs
4105
long tbl_unsupp - tbl_unsupp # 19: fcosh
4106
long fneg - tbl_unsupp # 1a: fneg
4107
long tbl_unsupp - tbl_unsupp
4108
long tbl_unsupp - tbl_unsupp # 1c: facos
4109
long tbl_unsupp - tbl_unsupp # 1d: fcos
4110
long tbl_unsupp - tbl_unsupp # 1e: fgetexp
4111
long tbl_unsupp - tbl_unsupp # 1f: fgetman
4112
long fdiv - tbl_unsupp # 20: fdiv
4113
long tbl_unsupp - tbl_unsupp # 21: fmod
4114
long fadd - tbl_unsupp # 22: fadd
4115
long fmul - tbl_unsupp # 23: fmul
4116
long fsgldiv - tbl_unsupp # 24: fsgldiv
4117
long tbl_unsupp - tbl_unsupp # 25: frem
4118
long tbl_unsupp - tbl_unsupp # 26: fscale
4119
long fsglmul - tbl_unsupp # 27: fsglmul
4120
long fsub - tbl_unsupp # 28: fsub
4121
long tbl_unsupp - tbl_unsupp
4122
long tbl_unsupp - tbl_unsupp
4123
long tbl_unsupp - tbl_unsupp
4124
long tbl_unsupp - tbl_unsupp
4125
long tbl_unsupp - tbl_unsupp
4126
long tbl_unsupp - tbl_unsupp
4127
long tbl_unsupp - tbl_unsupp
4128
long tbl_unsupp - tbl_unsupp # 30: fsincos
4129
long tbl_unsupp - tbl_unsupp # 31: fsincos
4130
long tbl_unsupp - tbl_unsupp # 32: fsincos
4131
long tbl_unsupp - tbl_unsupp # 33: fsincos
4132
long tbl_unsupp - tbl_unsupp # 34: fsincos
4133
long tbl_unsupp - tbl_unsupp # 35: fsincos
4134
long tbl_unsupp - tbl_unsupp # 36: fsincos
4135
long tbl_unsupp - tbl_unsupp # 37: fsincos
4136
long fcmp - tbl_unsupp # 38: fcmp
4137
long tbl_unsupp - tbl_unsupp
4138
long ftst - tbl_unsupp # 3a: ftst
4139
long tbl_unsupp - tbl_unsupp
4140
long tbl_unsupp - tbl_unsupp
4141
long tbl_unsupp - tbl_unsupp
4142
long tbl_unsupp - tbl_unsupp
4143
long tbl_unsupp - tbl_unsupp
4144
long fsin - tbl_unsupp # 40: fsmove
4145
long fssqrt - tbl_unsupp # 41: fssqrt
4146
long tbl_unsupp - tbl_unsupp
4147
long tbl_unsupp - tbl_unsupp
4148
long fdin - tbl_unsupp # 44: fdmove
4149
long fdsqrt - tbl_unsupp # 45: fdsqrt
4150
long tbl_unsupp - tbl_unsupp
4151
long tbl_unsupp - tbl_unsupp
4152
long tbl_unsupp - tbl_unsupp
4153
long tbl_unsupp - tbl_unsupp
4154
long tbl_unsupp - tbl_unsupp
4155
long tbl_unsupp - tbl_unsupp
4156
long tbl_unsupp - tbl_unsupp
4157
long tbl_unsupp - tbl_unsupp
4158
long tbl_unsupp - tbl_unsupp
4159
long tbl_unsupp - tbl_unsupp
4160
long tbl_unsupp - tbl_unsupp
4161
long tbl_unsupp - tbl_unsupp
4162
long tbl_unsupp - tbl_unsupp
4163
long tbl_unsupp - tbl_unsupp
4164
long tbl_unsupp - tbl_unsupp
4165
long tbl_unsupp - tbl_unsupp
4166
long tbl_unsupp - tbl_unsupp
4167
long tbl_unsupp - tbl_unsupp
4168
long fsabs - tbl_unsupp # 58: fsabs
4169
long tbl_unsupp - tbl_unsupp
4170
long fsneg - tbl_unsupp # 5a: fsneg
4171
long tbl_unsupp - tbl_unsupp
4172
long fdabs - tbl_unsupp # 5c: fdabs
4173
long tbl_unsupp - tbl_unsupp
4174
long fdneg - tbl_unsupp # 5e: fdneg
4175
long tbl_unsupp - tbl_unsupp
4176
long fsdiv - tbl_unsupp # 60: fsdiv
4177
long tbl_unsupp - tbl_unsupp
4178
long fsadd - tbl_unsupp # 62: fsadd
4179
long fsmul - tbl_unsupp # 63: fsmul
4180
long fddiv - tbl_unsupp # 64: fddiv
4181
long tbl_unsupp - tbl_unsupp
4182
long fdadd - tbl_unsupp # 66: fdadd
4183
long fdmul - tbl_unsupp # 67: fdmul
4184
long fssub - tbl_unsupp # 68: fssub
4185
long tbl_unsupp - tbl_unsupp
4186
long tbl_unsupp - tbl_unsupp
4187
long tbl_unsupp - tbl_unsupp
4188
long fdsub - tbl_unsupp # 6c: fdsub
4189
4190
#################################################
4191
# Add this here so non-fp modules can compile.
4192
# (smovcr is called from fpsp_inex.)
4193
global smovcr
4194
smovcr:
4195
bra.b smovcr
4196
4197
#########################################################################
4198
# XDEF **************************************************************** #
4199
# fmovm_dynamic(): emulate "fmovm" dynamic instruction #
4200
# #
4201
# XREF **************************************************************** #
4202
# fetch_dreg() - fetch data register #
4203
# {i,d,}mem_read() - fetch data from memory #
4204
# _mem_write() - write data to memory #
4205
# iea_iacc() - instruction memory access error occurred #
4206
# iea_dacc() - data memory access error occurred #
4207
# restore() - restore An index regs if access error occurred #
4208
# #
4209
# INPUT *************************************************************** #
4210
# None #
4211
# #
4212
# OUTPUT ************************************************************** #
4213
# If instr is "fmovm Dn,-(A7)" from supervisor mode, #
4214
# d0 = size of dump #
4215
# d1 = Dn #
4216
# Else if instruction access error, #
4217
# d0 = FSLW #
4218
# Else if data access error, #
4219
# d0 = FSLW #
4220
# a0 = address of fault #
4221
# Else #
4222
# none. #
4223
# #
4224
# ALGORITHM *********************************************************** #
4225
# The effective address must be calculated since this is entered #
4226
# from an "Unimplemented Effective Address" exception handler. So, we #
4227
# have our own fcalc_ea() routine here. If an access error is flagged #
4228
# by a _{i,d,}mem_read() call, we must exit through the special #
4229
# handler. #
4230
# The data register is determined and its value loaded to get the #
4231
# string of FP registers affected. This value is used as an index into #
4232
# a lookup table such that we can determine the number of bytes #
4233
# involved. #
4234
# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
4235
# to read in all FP values. Again, _mem_read() may fail and require a #
4236
# special exit. #
4237
# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4238
# to write all FP values. _mem_write() may also fail. #
4239
# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
4240
# then we return the size of the dump and the string to the caller #
4241
# so that the move can occur outside of this routine. This special #
4242
# case is required so that moves to the system stack are handled #
4243
# correctly. #
4244
# #
4245
# DYNAMIC: #
4246
# fmovm.x dn, <ea> #
4247
# fmovm.x <ea>, dn #
4248
# #
4249
# <WORD 1> <WORD2> #
4250
# 1111 0010 00 |<ea>| 11@& 1000 0$$$ 0000 #
4251
# #
4252
# & = (0): predecrement addressing mode #
4253
# (1): postincrement or control addressing mode #
4254
# @ = (0): move listed regs from memory to the FPU #
4255
# (1): move listed regs from the FPU to memory #
4256
# $$$ : index of data register holding reg select mask #
4257
# #
4258
# NOTES: #
4259
# If the data register holds a zero, then the #
4260
# instruction is a nop. #
4261
# #
4262
#########################################################################
4263
4264
global fmovm_dynamic
4265
fmovm_dynamic:
4266
4267
# extract the data register in which the bit string resides...
4268
mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
4269
andi.w &0x70,%d1 # extract reg bits
4270
lsr.b &0x4,%d1 # shift into lo bits
4271
4272
# fetch the bit string into d0...
4273
bsr.l fetch_dreg # fetch reg string
4274
4275
andi.l &0x000000ff,%d0 # keep only lo byte
4276
4277
mov.l %d0,-(%sp) # save strg
4278
mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
4279
mov.l %d0,-(%sp) # save size
4280
bsr.l fmovm_calc_ea # calculate <ea>
4281
mov.l (%sp)+,%d0 # restore size
4282
mov.l (%sp)+,%d1 # restore strg
4283
4284
# if the bit string is a zero, then the operation is a no-op
4285
# but, make sure that we've calculated ea and advanced the opword pointer
4286
beq.w fmovm_data_done
4287
4288
# separate move ins from move outs...
4289
btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
4290
beq.w fmovm_data_in # it's a move out
4291
4292
#############
4293
# MOVE OUT: #
4294
#############
4295
fmovm_data_out:
4296
btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
4297
bne.w fmovm_out_ctrl # control
4298
4299
############################
4300
fmovm_out_predec:
4301
# for predecrement mode, the bit string is the opposite of both control
4302
# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303
# here, we convert it to be just like the others...
4304
mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305
4306
btst &0x5,EXC_SR(%a6) # user or supervisor mode?
4307
beq.b fmovm_out_ctrl # user
4308
4309
fmovm_out_predec_s:
4310
cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311
bne.b fmovm_out_ctrl
4312
4313
# the operation was unfortunately an: fmovm.x dn,-(sp)
4314
# called from supervisor mode.
4315
# we're also passing "size" and "strg" back to the calling routine
4316
rts
4317
4318
############################
4319
fmovm_out_ctrl:
4320
mov.l %a0,%a1 # move <ea> to a1
4321
4322
sub.l %d0,%sp # subtract size of dump
4323
lea (%sp),%a0
4324
4325
tst.b %d1 # should FP0 be moved?
4326
bpl.b fmovm_out_ctrl_fp1 # no
4327
4328
mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
4329
mov.l 0x4+EXC_FP0(%a6),(%a0)+
4330
mov.l 0x8+EXC_FP0(%a6),(%a0)+
4331
4332
fmovm_out_ctrl_fp1:
4333
lsl.b &0x1,%d1 # should FP1 be moved?
4334
bpl.b fmovm_out_ctrl_fp2 # no
4335
4336
mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
4337
mov.l 0x4+EXC_FP1(%a6),(%a0)+
4338
mov.l 0x8+EXC_FP1(%a6),(%a0)+
4339
4340
fmovm_out_ctrl_fp2:
4341
lsl.b &0x1,%d1 # should FP2 be moved?
4342
bpl.b fmovm_out_ctrl_fp3 # no
4343
4344
fmovm.x &0x20,(%a0) # yes
4345
add.l &0xc,%a0
4346
4347
fmovm_out_ctrl_fp3:
4348
lsl.b &0x1,%d1 # should FP3 be moved?
4349
bpl.b fmovm_out_ctrl_fp4 # no
4350
4351
fmovm.x &0x10,(%a0) # yes
4352
add.l &0xc,%a0
4353
4354
fmovm_out_ctrl_fp4:
4355
lsl.b &0x1,%d1 # should FP4 be moved?
4356
bpl.b fmovm_out_ctrl_fp5 # no
4357
4358
fmovm.x &0x08,(%a0) # yes
4359
add.l &0xc,%a0
4360
4361
fmovm_out_ctrl_fp5:
4362
lsl.b &0x1,%d1 # should FP5 be moved?
4363
bpl.b fmovm_out_ctrl_fp6 # no
4364
4365
fmovm.x &0x04,(%a0) # yes
4366
add.l &0xc,%a0
4367
4368
fmovm_out_ctrl_fp6:
4369
lsl.b &0x1,%d1 # should FP6 be moved?
4370
bpl.b fmovm_out_ctrl_fp7 # no
4371
4372
fmovm.x &0x02,(%a0) # yes
4373
add.l &0xc,%a0
4374
4375
fmovm_out_ctrl_fp7:
4376
lsl.b &0x1,%d1 # should FP7 be moved?
4377
bpl.b fmovm_out_ctrl_done # no
4378
4379
fmovm.x &0x01,(%a0) # yes
4380
add.l &0xc,%a0
4381
4382
fmovm_out_ctrl_done:
4383
mov.l %a1,L_SCR1(%a6)
4384
4385
lea (%sp),%a0 # pass: supervisor src
4386
mov.l %d0,-(%sp) # save size
4387
bsr.l _dmem_write # copy data to user mem
4388
4389
mov.l (%sp)+,%d0
4390
add.l %d0,%sp # clear fpreg data from stack
4391
4392
tst.l %d1 # did dstore err?
4393
bne.w fmovm_out_err # yes
4394
4395
rts
4396
4397
############
4398
# MOVE IN: #
4399
############
4400
fmovm_data_in:
4401
mov.l %a0,L_SCR1(%a6)
4402
4403
sub.l %d0,%sp # make room for fpregs
4404
lea (%sp),%a1
4405
4406
mov.l %d1,-(%sp) # save bit string for later
4407
mov.l %d0,-(%sp) # save # of bytes
4408
4409
bsr.l _dmem_read # copy data from user mem
4410
4411
mov.l (%sp)+,%d0 # retrieve # of bytes
4412
4413
tst.l %d1 # did dfetch fail?
4414
bne.w fmovm_in_err # yes
4415
4416
mov.l (%sp)+,%d1 # load bit string
4417
4418
lea (%sp),%a0 # addr of stack
4419
4420
tst.b %d1 # should FP0 be moved?
4421
bpl.b fmovm_data_in_fp1 # no
4422
4423
mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
4424
mov.l (%a0)+,0x4+EXC_FP0(%a6)
4425
mov.l (%a0)+,0x8+EXC_FP0(%a6)
4426
4427
fmovm_data_in_fp1:
4428
lsl.b &0x1,%d1 # should FP1 be moved?
4429
bpl.b fmovm_data_in_fp2 # no
4430
4431
mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
4432
mov.l (%a0)+,0x4+EXC_FP1(%a6)
4433
mov.l (%a0)+,0x8+EXC_FP1(%a6)
4434
4435
fmovm_data_in_fp2:
4436
lsl.b &0x1,%d1 # should FP2 be moved?
4437
bpl.b fmovm_data_in_fp3 # no
4438
4439
fmovm.x (%a0)+,&0x20 # yes
4440
4441
fmovm_data_in_fp3:
4442
lsl.b &0x1,%d1 # should FP3 be moved?
4443
bpl.b fmovm_data_in_fp4 # no
4444
4445
fmovm.x (%a0)+,&0x10 # yes
4446
4447
fmovm_data_in_fp4:
4448
lsl.b &0x1,%d1 # should FP4 be moved?
4449
bpl.b fmovm_data_in_fp5 # no
4450
4451
fmovm.x (%a0)+,&0x08 # yes
4452
4453
fmovm_data_in_fp5:
4454
lsl.b &0x1,%d1 # should FP5 be moved?
4455
bpl.b fmovm_data_in_fp6 # no
4456
4457
fmovm.x (%a0)+,&0x04 # yes
4458
4459
fmovm_data_in_fp6:
4460
lsl.b &0x1,%d1 # should FP6 be moved?
4461
bpl.b fmovm_data_in_fp7 # no
4462
4463
fmovm.x (%a0)+,&0x02 # yes
4464
4465
fmovm_data_in_fp7:
4466
lsl.b &0x1,%d1 # should FP7 be moved?
4467
bpl.b fmovm_data_in_done # no
4468
4469
fmovm.x (%a0)+,&0x01 # yes
4470
4471
fmovm_data_in_done:
4472
add.l %d0,%sp # remove fpregs from stack
4473
rts
4474
4475
#####################################
4476
4477
fmovm_data_done:
4478
rts
4479
4480
##############################################################################
4481
4482
#
4483
# table indexed by the operation's bit string that gives the number
4484
# of bytes that will be moved.
4485
#
4486
# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487
#
4488
tbl_fmovm_size:
4489
byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505
byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513
byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517
byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519
byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520
byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521
4522
#
4523
# table to convert a pre-decrement bit string into a post-increment
4524
# or control bit string.
4525
# ex: 0x00 ==> 0x00
4526
# 0x01 ==> 0x80
4527
# 0x02 ==> 0x40
4528
# .
4529
# .
4530
# 0xfd ==> 0xbf
4531
# 0xfe ==> 0x7f
4532
# 0xff ==> 0xff
4533
#
4534
tbl_fmovm_convert:
4535
byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536
byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537
byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538
byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539
byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540
byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541
byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542
byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543
byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544
byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545
byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546
byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547
byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548
byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549
byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550
byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551
byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552
byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553
byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554
byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555
byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556
byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557
byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558
byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559
byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560
byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561
byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562
byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563
byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564
byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565
byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566
byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567
4568
global fmovm_calc_ea
4569
###############################################
4570
# _fmovm_calc_ea: calculate effective address #
4571
###############################################
4572
fmovm_calc_ea:
4573
mov.l %d0,%a0 # move # bytes to a0
4574
4575
# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576
# easily changed if they were inputs passed in registers.
4577
mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
4578
mov.w %d0,%d1 # make a copy
4579
4580
andi.w &0x3f,%d0 # extract mode field
4581
andi.l &0x7,%d1 # extract reg field
4582
4583
# jump to the corresponding function for each {MODE,REG} pair.
4584
mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585
jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586
4587
swbeg &64
4588
tbl_fea_mode:
4589
short tbl_fea_mode - tbl_fea_mode
4590
short tbl_fea_mode - tbl_fea_mode
4591
short tbl_fea_mode - tbl_fea_mode
4592
short tbl_fea_mode - tbl_fea_mode
4593
short tbl_fea_mode - tbl_fea_mode
4594
short tbl_fea_mode - tbl_fea_mode
4595
short tbl_fea_mode - tbl_fea_mode
4596
short tbl_fea_mode - tbl_fea_mode
4597
4598
short tbl_fea_mode - tbl_fea_mode
4599
short tbl_fea_mode - tbl_fea_mode
4600
short tbl_fea_mode - tbl_fea_mode
4601
short tbl_fea_mode - tbl_fea_mode
4602
short tbl_fea_mode - tbl_fea_mode
4603
short tbl_fea_mode - tbl_fea_mode
4604
short tbl_fea_mode - tbl_fea_mode
4605
short tbl_fea_mode - tbl_fea_mode
4606
4607
short faddr_ind_a0 - tbl_fea_mode
4608
short faddr_ind_a1 - tbl_fea_mode
4609
short faddr_ind_a2 - tbl_fea_mode
4610
short faddr_ind_a3 - tbl_fea_mode
4611
short faddr_ind_a4 - tbl_fea_mode
4612
short faddr_ind_a5 - tbl_fea_mode
4613
short faddr_ind_a6 - tbl_fea_mode
4614
short faddr_ind_a7 - tbl_fea_mode
4615
4616
short faddr_ind_p_a0 - tbl_fea_mode
4617
short faddr_ind_p_a1 - tbl_fea_mode
4618
short faddr_ind_p_a2 - tbl_fea_mode
4619
short faddr_ind_p_a3 - tbl_fea_mode
4620
short faddr_ind_p_a4 - tbl_fea_mode
4621
short faddr_ind_p_a5 - tbl_fea_mode
4622
short faddr_ind_p_a6 - tbl_fea_mode
4623
short faddr_ind_p_a7 - tbl_fea_mode
4624
4625
short faddr_ind_m_a0 - tbl_fea_mode
4626
short faddr_ind_m_a1 - tbl_fea_mode
4627
short faddr_ind_m_a2 - tbl_fea_mode
4628
short faddr_ind_m_a3 - tbl_fea_mode
4629
short faddr_ind_m_a4 - tbl_fea_mode
4630
short faddr_ind_m_a5 - tbl_fea_mode
4631
short faddr_ind_m_a6 - tbl_fea_mode
4632
short faddr_ind_m_a7 - tbl_fea_mode
4633
4634
short faddr_ind_disp_a0 - tbl_fea_mode
4635
short faddr_ind_disp_a1 - tbl_fea_mode
4636
short faddr_ind_disp_a2 - tbl_fea_mode
4637
short faddr_ind_disp_a3 - tbl_fea_mode
4638
short faddr_ind_disp_a4 - tbl_fea_mode
4639
short faddr_ind_disp_a5 - tbl_fea_mode
4640
short faddr_ind_disp_a6 - tbl_fea_mode
4641
short faddr_ind_disp_a7 - tbl_fea_mode
4642
4643
short faddr_ind_ext - tbl_fea_mode
4644
short faddr_ind_ext - tbl_fea_mode
4645
short faddr_ind_ext - tbl_fea_mode
4646
short faddr_ind_ext - tbl_fea_mode
4647
short faddr_ind_ext - tbl_fea_mode
4648
short faddr_ind_ext - tbl_fea_mode
4649
short faddr_ind_ext - tbl_fea_mode
4650
short faddr_ind_ext - tbl_fea_mode
4651
4652
short fabs_short - tbl_fea_mode
4653
short fabs_long - tbl_fea_mode
4654
short fpc_ind - tbl_fea_mode
4655
short fpc_ind_ext - tbl_fea_mode
4656
short tbl_fea_mode - tbl_fea_mode
4657
short tbl_fea_mode - tbl_fea_mode
4658
short tbl_fea_mode - tbl_fea_mode
4659
short tbl_fea_mode - tbl_fea_mode
4660
4661
###################################
4662
# Address register indirect: (An) #
4663
###################################
4664
faddr_ind_a0:
4665
mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
4666
rts
4667
4668
faddr_ind_a1:
4669
mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
4670
rts
4671
4672
faddr_ind_a2:
4673
mov.l %a2,%a0 # Get current a2
4674
rts
4675
4676
faddr_ind_a3:
4677
mov.l %a3,%a0 # Get current a3
4678
rts
4679
4680
faddr_ind_a4:
4681
mov.l %a4,%a0 # Get current a4
4682
rts
4683
4684
faddr_ind_a5:
4685
mov.l %a5,%a0 # Get current a5
4686
rts
4687
4688
faddr_ind_a6:
4689
mov.l (%a6),%a0 # Get current a6
4690
rts
4691
4692
faddr_ind_a7:
4693
mov.l EXC_A7(%a6),%a0 # Get current a7
4694
rts
4695
4696
#####################################################
4697
# Address register indirect w/ postincrement: (An)+ #
4698
#####################################################
4699
faddr_ind_p_a0:
4700
mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4701
mov.l %d0,%d1
4702
add.l %a0,%d1 # Increment
4703
mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
4704
mov.l %d0,%a0
4705
rts
4706
4707
faddr_ind_p_a1:
4708
mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4709
mov.l %d0,%d1
4710
add.l %a0,%d1 # Increment
4711
mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
4712
mov.l %d0,%a0
4713
rts
4714
4715
faddr_ind_p_a2:
4716
mov.l %a2,%d0 # Get current a2
4717
mov.l %d0,%d1
4718
add.l %a0,%d1 # Increment
4719
mov.l %d1,%a2 # Save incr value
4720
mov.l %d0,%a0
4721
rts
4722
4723
faddr_ind_p_a3:
4724
mov.l %a3,%d0 # Get current a3
4725
mov.l %d0,%d1
4726
add.l %a0,%d1 # Increment
4727
mov.l %d1,%a3 # Save incr value
4728
mov.l %d0,%a0
4729
rts
4730
4731
faddr_ind_p_a4:
4732
mov.l %a4,%d0 # Get current a4
4733
mov.l %d0,%d1
4734
add.l %a0,%d1 # Increment
4735
mov.l %d1,%a4 # Save incr value
4736
mov.l %d0,%a0
4737
rts
4738
4739
faddr_ind_p_a5:
4740
mov.l %a5,%d0 # Get current a5
4741
mov.l %d0,%d1
4742
add.l %a0,%d1 # Increment
4743
mov.l %d1,%a5 # Save incr value
4744
mov.l %d0,%a0
4745
rts
4746
4747
faddr_ind_p_a6:
4748
mov.l (%a6),%d0 # Get current a6
4749
mov.l %d0,%d1
4750
add.l %a0,%d1 # Increment
4751
mov.l %d1,(%a6) # Save incr value
4752
mov.l %d0,%a0
4753
rts
4754
4755
faddr_ind_p_a7:
4756
mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757
4758
mov.l EXC_A7(%a6),%d0 # Get current a7
4759
mov.l %d0,%d1
4760
add.l %a0,%d1 # Increment
4761
mov.l %d1,EXC_A7(%a6) # Save incr value
4762
mov.l %d0,%a0
4763
rts
4764
4765
####################################################
4766
# Address register indirect w/ predecrement: -(An) #
4767
####################################################
4768
faddr_ind_m_a0:
4769
mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
4770
sub.l %a0,%d0 # Decrement
4771
mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
4772
mov.l %d0,%a0
4773
rts
4774
4775
faddr_ind_m_a1:
4776
mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
4777
sub.l %a0,%d0 # Decrement
4778
mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
4779
mov.l %d0,%a0
4780
rts
4781
4782
faddr_ind_m_a2:
4783
mov.l %a2,%d0 # Get current a2
4784
sub.l %a0,%d0 # Decrement
4785
mov.l %d0,%a2 # Save decr value
4786
mov.l %d0,%a0
4787
rts
4788
4789
faddr_ind_m_a3:
4790
mov.l %a3,%d0 # Get current a3
4791
sub.l %a0,%d0 # Decrement
4792
mov.l %d0,%a3 # Save decr value
4793
mov.l %d0,%a0
4794
rts
4795
4796
faddr_ind_m_a4:
4797
mov.l %a4,%d0 # Get current a4
4798
sub.l %a0,%d0 # Decrement
4799
mov.l %d0,%a4 # Save decr value
4800
mov.l %d0,%a0
4801
rts
4802
4803
faddr_ind_m_a5:
4804
mov.l %a5,%d0 # Get current a5
4805
sub.l %a0,%d0 # Decrement
4806
mov.l %d0,%a5 # Save decr value
4807
mov.l %d0,%a0
4808
rts
4809
4810
faddr_ind_m_a6:
4811
mov.l (%a6),%d0 # Get current a6
4812
sub.l %a0,%d0 # Decrement
4813
mov.l %d0,(%a6) # Save decr value
4814
mov.l %d0,%a0
4815
rts
4816
4817
faddr_ind_m_a7:
4818
mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819
4820
mov.l EXC_A7(%a6),%d0 # Get current a7
4821
sub.l %a0,%d0 # Decrement
4822
mov.l %d0,EXC_A7(%a6) # Save decr value
4823
mov.l %d0,%a0
4824
rts
4825
4826
########################################################
4827
# Address register indirect w/ displacement: (d16, An) #
4828
########################################################
4829
faddr_ind_disp_a0:
4830
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4831
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4832
bsr.l _imem_read_word
4833
4834
tst.l %d1 # did ifetch fail?
4835
bne.l iea_iacc # yes
4836
4837
mov.w %d0,%a0 # sign extend displacement
4838
4839
add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
4840
rts
4841
4842
faddr_ind_disp_a1:
4843
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4844
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4845
bsr.l _imem_read_word
4846
4847
tst.l %d1 # did ifetch fail?
4848
bne.l iea_iacc # yes
4849
4850
mov.w %d0,%a0 # sign extend displacement
4851
4852
add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
4853
rts
4854
4855
faddr_ind_disp_a2:
4856
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4857
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4858
bsr.l _imem_read_word
4859
4860
tst.l %d1 # did ifetch fail?
4861
bne.l iea_iacc # yes
4862
4863
mov.w %d0,%a0 # sign extend displacement
4864
4865
add.l %a2,%a0 # a2 + d16
4866
rts
4867
4868
faddr_ind_disp_a3:
4869
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4870
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4871
bsr.l _imem_read_word
4872
4873
tst.l %d1 # did ifetch fail?
4874
bne.l iea_iacc # yes
4875
4876
mov.w %d0,%a0 # sign extend displacement
4877
4878
add.l %a3,%a0 # a3 + d16
4879
rts
4880
4881
faddr_ind_disp_a4:
4882
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4883
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4884
bsr.l _imem_read_word
4885
4886
tst.l %d1 # did ifetch fail?
4887
bne.l iea_iacc # yes
4888
4889
mov.w %d0,%a0 # sign extend displacement
4890
4891
add.l %a4,%a0 # a4 + d16
4892
rts
4893
4894
faddr_ind_disp_a5:
4895
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4896
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4897
bsr.l _imem_read_word
4898
4899
tst.l %d1 # did ifetch fail?
4900
bne.l iea_iacc # yes
4901
4902
mov.w %d0,%a0 # sign extend displacement
4903
4904
add.l %a5,%a0 # a5 + d16
4905
rts
4906
4907
faddr_ind_disp_a6:
4908
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4909
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4910
bsr.l _imem_read_word
4911
4912
tst.l %d1 # did ifetch fail?
4913
bne.l iea_iacc # yes
4914
4915
mov.w %d0,%a0 # sign extend displacement
4916
4917
add.l (%a6),%a0 # a6 + d16
4918
rts
4919
4920
faddr_ind_disp_a7:
4921
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4922
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4923
bsr.l _imem_read_word
4924
4925
tst.l %d1 # did ifetch fail?
4926
bne.l iea_iacc # yes
4927
4928
mov.w %d0,%a0 # sign extend displacement
4929
4930
add.l EXC_A7(%a6),%a0 # a7 + d16
4931
rts
4932
4933
########################################################################
4934
# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935
# " " " w/ " (base displacement): (bd, An, Xn) #
4936
# Memory indirect postindexed: ([bd, An], Xn, od) #
4937
# Memory indirect preindexed: ([bd, An, Xn], od) #
4938
########################################################################
4939
faddr_ind_ext:
4940
addq.l &0x8,%d1
4941
bsr.l fetch_dreg # fetch base areg
4942
mov.l %d0,-(%sp)
4943
4944
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4945
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4946
bsr.l _imem_read_word # fetch extword in d0
4947
4948
tst.l %d1 # did ifetch fail?
4949
bne.l iea_iacc # yes
4950
4951
mov.l (%sp)+,%a0
4952
4953
btst &0x8,%d0
4954
bne.w fcalc_mem_ind
4955
4956
mov.l %d0,L_SCR1(%a6) # hold opword
4957
4958
mov.l %d0,%d1
4959
rol.w &0x4,%d1
4960
andi.w &0xf,%d1 # extract index regno
4961
4962
# count on fetch_dreg() not to alter a0...
4963
bsr.l fetch_dreg # fetch index
4964
4965
mov.l %d2,-(%sp) # save d2
4966
mov.l L_SCR1(%a6),%d2 # fetch opword
4967
4968
btst &0xb,%d2 # is it word or long?
4969
bne.b faii8_long
4970
ext.l %d0 # sign extend word index
4971
faii8_long:
4972
mov.l %d2,%d1
4973
rol.w &0x7,%d1
4974
andi.l &0x3,%d1 # extract scale value
4975
4976
lsl.l %d1,%d0 # shift index by scale
4977
4978
extb.l %d2 # sign extend displacement
4979
add.l %d2,%d0 # index + disp
4980
add.l %d0,%a0 # An + (index + disp)
4981
4982
mov.l (%sp)+,%d2 # restore old d2
4983
rts
4984
4985
###########################
4986
# Absolute short: (XXX).W #
4987
###########################
4988
fabs_short:
4989
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
4990
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
4991
bsr.l _imem_read_word # fetch short address
4992
4993
tst.l %d1 # did ifetch fail?
4994
bne.l iea_iacc # yes
4995
4996
mov.w %d0,%a0 # return <ea> in a0
4997
rts
4998
4999
##########################
5000
# Absolute long: (XXX).L #
5001
##########################
5002
fabs_long:
5003
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5004
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5005
bsr.l _imem_read_long # fetch long address
5006
5007
tst.l %d1 # did ifetch fail?
5008
bne.l iea_iacc # yes
5009
5010
mov.l %d0,%a0 # return <ea> in a0
5011
rts
5012
5013
#######################################################
5014
# Program counter indirect w/ displacement: (d16, PC) #
5015
#######################################################
5016
fpc_ind:
5017
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5018
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5019
bsr.l _imem_read_word # fetch word displacement
5020
5021
tst.l %d1 # did ifetch fail?
5022
bne.l iea_iacc # yes
5023
5024
mov.w %d0,%a0 # sign extend displacement
5025
5026
add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
5027
5028
# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029
subq.l &0x2,%a0 # adjust <ea>
5030
rts
5031
5032
##########################################################
5033
# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034
# " " w/ " (base displacement): (bd, PC, An) #
5035
# PC memory indirect postindexed: ([bd, PC], Xn, od) #
5036
# PC memory indirect preindexed: ([bd, PC, Xn], od) #
5037
##########################################################
5038
fpc_ind_ext:
5039
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5040
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5041
bsr.l _imem_read_word # fetch ext word
5042
5043
tst.l %d1 # did ifetch fail?
5044
bne.l iea_iacc # yes
5045
5046
mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
5047
subq.l &0x2,%a0 # adjust base
5048
5049
btst &0x8,%d0 # is disp only 8 bits?
5050
bne.w fcalc_mem_ind # calc memory indirect
5051
5052
mov.l %d0,L_SCR1(%a6) # store opword
5053
5054
mov.l %d0,%d1 # make extword copy
5055
rol.w &0x4,%d1 # rotate reg num into place
5056
andi.w &0xf,%d1 # extract register number
5057
5058
# count on fetch_dreg() not to alter a0...
5059
bsr.l fetch_dreg # fetch index
5060
5061
mov.l %d2,-(%sp) # save d2
5062
mov.l L_SCR1(%a6),%d2 # fetch opword
5063
5064
btst &0xb,%d2 # is index word or long?
5065
bne.b fpii8_long # long
5066
ext.l %d0 # sign extend word index
5067
fpii8_long:
5068
mov.l %d2,%d1
5069
rol.w &0x7,%d1 # rotate scale value into place
5070
andi.l &0x3,%d1 # extract scale value
5071
5072
lsl.l %d1,%d0 # shift index by scale
5073
5074
extb.l %d2 # sign extend displacement
5075
add.l %d2,%d0 # disp + index
5076
add.l %d0,%a0 # An + (index + disp)
5077
5078
mov.l (%sp)+,%d2 # restore temp register
5079
rts
5080
5081
# d2 = index
5082
# d3 = base
5083
# d4 = od
5084
# d5 = extword
5085
fcalc_mem_ind:
5086
btst &0x6,%d0 # is the index suppressed?
5087
beq.b fcalc_index
5088
5089
movm.l &0x3c00,-(%sp) # save d2-d5
5090
5091
mov.l %d0,%d5 # put extword in d5
5092
mov.l %a0,%d3 # put base in d3
5093
5094
clr.l %d2 # yes, so index = 0
5095
bra.b fbase_supp_ck
5096
5097
# index:
5098
fcalc_index:
5099
mov.l %d0,L_SCR1(%a6) # save d0 (opword)
5100
bfextu %d0{&16:&4},%d1 # fetch dreg index
5101
bsr.l fetch_dreg
5102
5103
movm.l &0x3c00,-(%sp) # save d2-d5
5104
mov.l %d0,%d2 # put index in d2
5105
mov.l L_SCR1(%a6),%d5
5106
mov.l %a0,%d3
5107
5108
btst &0xb,%d5 # is index word or long?
5109
bne.b fno_ext
5110
ext.l %d2
5111
5112
fno_ext:
5113
bfextu %d5{&21:&2},%d0
5114
lsl.l %d0,%d2
5115
5116
# base address (passed as parameter in d3):
5117
# we clear the value here if it should actually be suppressed.
5118
fbase_supp_ck:
5119
btst &0x7,%d5 # is the bd suppressed?
5120
beq.b fno_base_sup
5121
clr.l %d3
5122
5123
# base displacement:
5124
fno_base_sup:
5125
bfextu %d5{&26:&2},%d0 # get bd size
5126
# beq.l fmovm_error # if (size == 0) it's reserved
5127
5128
cmpi.b %d0,&0x2
5129
blt.b fno_bd
5130
beq.b fget_word_bd
5131
5132
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5133
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5134
bsr.l _imem_read_long
5135
5136
tst.l %d1 # did ifetch fail?
5137
bne.l fcea_iacc # yes
5138
5139
bra.b fchk_ind
5140
5141
fget_word_bd:
5142
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5143
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5144
bsr.l _imem_read_word
5145
5146
tst.l %d1 # did ifetch fail?
5147
bne.l fcea_iacc # yes
5148
5149
ext.l %d0 # sign extend bd
5150
5151
fchk_ind:
5152
add.l %d0,%d3 # base += bd
5153
5154
# outer displacement:
5155
fno_bd:
5156
bfextu %d5{&30:&2},%d0 # is od suppressed?
5157
beq.w faii_bd
5158
5159
cmpi.b %d0,&0x2
5160
blt.b fnull_od
5161
beq.b fword_od
5162
5163
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5164
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5165
bsr.l _imem_read_long
5166
5167
tst.l %d1 # did ifetch fail?
5168
bne.l fcea_iacc # yes
5169
5170
bra.b fadd_them
5171
5172
fword_od:
5173
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5174
addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
5175
bsr.l _imem_read_word
5176
5177
tst.l %d1 # did ifetch fail?
5178
bne.l fcea_iacc # yes
5179
5180
ext.l %d0 # sign extend od
5181
bra.b fadd_them
5182
5183
fnull_od:
5184
clr.l %d0
5185
5186
fadd_them:
5187
mov.l %d0,%d4
5188
5189
btst &0x2,%d5 # pre or post indexing?
5190
beq.b fpre_indexed
5191
5192
mov.l %d3,%a0
5193
bsr.l _dmem_read_long
5194
5195
tst.l %d1 # did dfetch fail?
5196
bne.w fcea_err # yes
5197
5198
add.l %d2,%d0 # <ea> += index
5199
add.l %d4,%d0 # <ea> += od
5200
bra.b fdone_ea
5201
5202
fpre_indexed:
5203
add.l %d2,%d3 # preindexing
5204
mov.l %d3,%a0
5205
bsr.l _dmem_read_long
5206
5207
tst.l %d1 # did dfetch fail?
5208
bne.w fcea_err # yes
5209
5210
add.l %d4,%d0 # ea += od
5211
bra.b fdone_ea
5212
5213
faii_bd:
5214
add.l %d2,%d3 # ea = (base + bd) + index
5215
mov.l %d3,%d0
5216
fdone_ea:
5217
mov.l %d0,%a0
5218
5219
movm.l (%sp)+,&0x003c # restore d2-d5
5220
rts
5221
5222
#########################################################
5223
fcea_err:
5224
mov.l %d3,%a0
5225
5226
movm.l (%sp)+,&0x003c # restore d2-d5
5227
mov.w &0x0101,%d0
5228
bra.l iea_dacc
5229
5230
fcea_iacc:
5231
movm.l (%sp)+,&0x003c # restore d2-d5
5232
bra.l iea_iacc
5233
5234
fmovm_out_err:
5235
bsr.l restore
5236
mov.w &0x00e1,%d0
5237
bra.b fmovm_err
5238
5239
fmovm_in_err:
5240
bsr.l restore
5241
mov.w &0x0161,%d0
5242
5243
fmovm_err:
5244
mov.l L_SCR1(%a6),%a0
5245
bra.l iea_dacc
5246
5247
#########################################################################
5248
# XDEF **************************************************************** #
5249
# fmovm_ctrl(): emulate fmovm.l of control registers instr #
5250
# #
5251
# XREF **************************************************************** #
5252
# _imem_read_long() - read longword from memory #
5253
# iea_iacc() - _imem_read_long() failed; error recovery #
5254
# #
5255
# INPUT *************************************************************** #
5256
# None #
5257
# #
5258
# OUTPUT ************************************************************** #
5259
# If _imem_read_long() doesn't fail: #
5260
# USER_FPCR(a6) = new FPCR value #
5261
# USER_FPSR(a6) = new FPSR value #
5262
# USER_FPIAR(a6) = new FPIAR value #
5263
# #
5264
# ALGORITHM *********************************************************** #
5265
# Decode the instruction type by looking at the extension word #
5266
# in order to see how many control registers to fetch from memory. #
5267
# Fetch them using _imem_read_long(). If this fetch fails, exit through #
5268
# the special access error exit handler iea_iacc(). #
5269
# #
5270
# Instruction word decoding: #
5271
# #
5272
# fmovem.l #<data>, {FPIAR&|FPCR&|FPSR} #
5273
# #
5274
# WORD1 WORD2 #
5275
# 1111 0010 00 111100 100$ $$00 0000 0000 #
5276
# #
5277
# $$$ (100): FPCR #
5278
# (010): FPSR #
5279
# (001): FPIAR #
5280
# (000): FPIAR #
5281
# #
5282
#########################################################################
5283
5284
global fmovm_ctrl
5285
fmovm_ctrl:
5286
mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
5287
cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
5288
beq.w fctrl_in_7 # yes
5289
cmpi.b %d0,&0x98 # fpcr & fpsr ?
5290
beq.w fctrl_in_6 # yes
5291
cmpi.b %d0,&0x94 # fpcr & fpiar ?
5292
beq.b fctrl_in_5 # yes
5293
5294
# fmovem.l #<data>, fpsr/fpiar
5295
fctrl_in_3:
5296
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5297
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5298
bsr.l _imem_read_long # fetch FPSR from mem
5299
5300
tst.l %d1 # did ifetch fail?
5301
bne.l iea_iacc # yes
5302
5303
mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
5304
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5305
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5306
bsr.l _imem_read_long # fetch FPIAR from mem
5307
5308
tst.l %d1 # did ifetch fail?
5309
bne.l iea_iacc # yes
5310
5311
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5312
rts
5313
5314
# fmovem.l #<data>, fpcr/fpiar
5315
fctrl_in_5:
5316
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5317
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5318
bsr.l _imem_read_long # fetch FPCR from mem
5319
5320
tst.l %d1 # did ifetch fail?
5321
bne.l iea_iacc # yes
5322
5323
mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
5324
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5325
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5326
bsr.l _imem_read_long # fetch FPIAR from mem
5327
5328
tst.l %d1 # did ifetch fail?
5329
bne.l iea_iacc # yes
5330
5331
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
5332
rts
5333
5334
# fmovem.l #<data>, fpcr/fpsr
5335
fctrl_in_6:
5336
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5337
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5338
bsr.l _imem_read_long # fetch FPCR from mem
5339
5340
tst.l %d1 # did ifetch fail?
5341
bne.l iea_iacc # yes
5342
5343
mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5344
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5345
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5346
bsr.l _imem_read_long # fetch FPSR from mem
5347
5348
tst.l %d1 # did ifetch fail?
5349
bne.l iea_iacc # yes
5350
5351
mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5352
rts
5353
5354
# fmovem.l #<data>, fpcr/fpsr/fpiar
5355
fctrl_in_7:
5356
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5357
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5358
bsr.l _imem_read_long # fetch FPCR from mem
5359
5360
tst.l %d1 # did ifetch fail?
5361
bne.l iea_iacc # yes
5362
5363
mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
5364
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5365
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5366
bsr.l _imem_read_long # fetch FPSR from mem
5367
5368
tst.l %d1 # did ifetch fail?
5369
bne.l iea_iacc # yes
5370
5371
mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
5372
mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
5373
addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
5374
bsr.l _imem_read_long # fetch FPIAR from mem
5375
5376
tst.l %d1 # did ifetch fail?
5377
bne.l iea_iacc # yes
5378
5379
mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
5380
rts
5381
5382
##########################################################################
5383
5384
#########################################################################
5385
# XDEF **************************************************************** #
5386
# addsub_scaler2(): scale inputs to fadd/fsub such that no #
5387
# OVFL/UNFL exceptions will result #
5388
# #
5389
# XREF **************************************************************** #
5390
# norm() - normalize mantissa after adjusting exponent #
5391
# #
5392
# INPUT *************************************************************** #
5393
# FP_SRC(a6) = fp op1(src) #
5394
# FP_DST(a6) = fp op2(dst) #
5395
# #
5396
# OUTPUT ************************************************************** #
5397
# FP_SRC(a6) = fp op1 scaled(src) #
5398
# FP_DST(a6) = fp op2 scaled(dst) #
5399
# d0 = scale amount #
5400
# #
5401
# ALGORITHM *********************************************************** #
5402
# If the DST exponent is > the SRC exponent, set the DST exponent #
5403
# equal to 0x3fff and scale the SRC exponent by the value that the #
5404
# DST exponent was scaled by. If the SRC exponent is greater or equal, #
5405
# do the opposite. Return this scale factor in d0. #
5406
# If the two exponents differ by > the number of mantissa bits #
5407
# plus two, then set the smallest exponent to a very small value as a #
5408
# quick shortcut. #
5409
# #
5410
#########################################################################
5411
5412
global addsub_scaler2
5413
addsub_scaler2:
5414
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
5415
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
5416
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
5417
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
5418
mov.w SRC_EX(%a0),%d0
5419
mov.w DST_EX(%a1),%d1
5420
mov.w %d0,FP_SCR0_EX(%a6)
5421
mov.w %d1,FP_SCR1_EX(%a6)
5422
5423
andi.w &0x7fff,%d0
5424
andi.w &0x7fff,%d1
5425
mov.w %d0,L_SCR1(%a6) # store src exponent
5426
mov.w %d1,2+L_SCR1(%a6) # store dst exponent
5427
5428
cmp.w %d0, %d1 # is src exp >= dst exp?
5429
bge.l src_exp_ge2
5430
5431
# dst exp is > src exp; scale dst to exp = 0x3fff
5432
dst_exp_gt2:
5433
bsr.l scale_to_zero_dst
5434
mov.l %d0,-(%sp) # save scale factor
5435
5436
cmpi.b STAG(%a6),&DENORM # is dst denormalized?
5437
bne.b cmpexp12
5438
5439
lea FP_SCR0(%a6),%a0
5440
bsr.l norm # normalize the denorm; result is new exp
5441
neg.w %d0 # new exp = -(shft val)
5442
mov.w %d0,L_SCR1(%a6) # inset new exp
5443
5444
cmpexp12:
5445
mov.w 2+L_SCR1(%a6),%d0
5446
subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5447
5448
cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
5449
bge.b quick_scale12
5450
5451
mov.w L_SCR1(%a6),%d0
5452
add.w 0x2(%sp),%d0 # scale src exponent by scale factor
5453
mov.w FP_SCR0_EX(%a6),%d1
5454
and.w &0x8000,%d1
5455
or.w %d1,%d0 # concat {sgn,new exp}
5456
mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
5457
5458
mov.l (%sp)+,%d0 # return SCALE factor
5459
rts
5460
5461
quick_scale12:
5462
andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5463
bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
5464
5465
mov.l (%sp)+,%d0 # return SCALE factor
5466
rts
5467
5468
# src exp is >= dst exp; scale src to exp = 0x3fff
5469
src_exp_ge2:
5470
bsr.l scale_to_zero_src
5471
mov.l %d0,-(%sp) # save scale factor
5472
5473
cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
5474
bne.b cmpexp22
5475
lea FP_SCR1(%a6),%a0
5476
bsr.l norm # normalize the denorm; result is new exp
5477
neg.w %d0 # new exp = -(shft val)
5478
mov.w %d0,2+L_SCR1(%a6) # inset new exp
5479
5480
cmpexp22:
5481
mov.w L_SCR1(%a6),%d0
5482
subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
5483
5484
cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
5485
bge.b quick_scale22
5486
5487
mov.w 2+L_SCR1(%a6),%d0
5488
add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
5489
mov.w FP_SCR1_EX(%a6),%d1
5490
andi.w &0x8000,%d1
5491
or.w %d1,%d0 # concat {sgn,new exp}
5492
mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
5493
5494
mov.l (%sp)+,%d0 # return SCALE factor
5495
rts
5496
5497
quick_scale22:
5498
andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5499
bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
5500
5501
mov.l (%sp)+,%d0 # return SCALE factor
5502
rts
5503
5504
##########################################################################
5505
5506
#########################################################################
5507
# XDEF **************************************************************** #
5508
# scale_to_zero_src(): scale the exponent of extended precision #
5509
# value at FP_SCR0(a6). #
5510
# #
5511
# XREF **************************************************************** #
5512
# norm() - normalize the mantissa if the operand was a DENORM #
5513
# #
5514
# INPUT *************************************************************** #
5515
# FP_SCR0(a6) = extended precision operand to be scaled #
5516
# #
5517
# OUTPUT ************************************************************** #
5518
# FP_SCR0(a6) = scaled extended precision operand #
5519
# d0 = scale value #
5520
# #
5521
# ALGORITHM *********************************************************** #
5522
# Set the exponent of the input operand to 0x3fff. Save the value #
5523
# of the difference between the original and new exponent. Then, #
5524
# normalize the operand if it was a DENORM. Add this normalization #
5525
# value to the previous value. Return the result. #
5526
# #
5527
#########################################################################
5528
5529
global scale_to_zero_src
5530
scale_to_zero_src:
5531
mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5532
mov.w %d1,%d0 # make a copy
5533
5534
andi.l &0x7fff,%d1 # extract operand's exponent
5535
5536
andi.w &0x8000,%d0 # extract operand's sgn
5537
or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5538
5539
mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
5540
5541
cmpi.b STAG(%a6),&DENORM # is operand normalized?
5542
beq.b stzs_denorm # normalize the DENORM
5543
5544
stzs_norm:
5545
mov.l &0x3fff,%d0
5546
sub.l %d1,%d0 # scale = BIAS + (-exp)
5547
5548
rts
5549
5550
stzs_denorm:
5551
lea FP_SCR0(%a6),%a0 # pass ptr to src op
5552
bsr.l norm # normalize denorm
5553
neg.l %d0 # new exponent = -(shft val)
5554
mov.l %d0,%d1 # prepare for op_norm call
5555
bra.b stzs_norm # finish scaling
5556
5557
###
5558
5559
#########################################################################
5560
# XDEF **************************************************************** #
5561
# scale_sqrt(): scale the input operand exponent so a subsequent #
5562
# fsqrt operation won't take an exception. #
5563
# #
5564
# XREF **************************************************************** #
5565
# norm() - normalize the mantissa if the operand was a DENORM #
5566
# #
5567
# INPUT *************************************************************** #
5568
# FP_SCR0(a6) = extended precision operand to be scaled #
5569
# #
5570
# OUTPUT ************************************************************** #
5571
# FP_SCR0(a6) = scaled extended precision operand #
5572
# d0 = scale value #
5573
# #
5574
# ALGORITHM *********************************************************** #
5575
# If the input operand is a DENORM, normalize it. #
5576
# If the exponent of the input operand is even, set the exponent #
5577
# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
5578
# exponent of the input operand is off, set the exponent to ox3fff and #
5579
# return a scale factor of "(exp-0x3fff)/2". #
5580
# #
5581
#########################################################################
5582
5583
global scale_sqrt
5584
scale_sqrt:
5585
cmpi.b STAG(%a6),&DENORM # is operand normalized?
5586
beq.b ss_denorm # normalize the DENORM
5587
5588
mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
5589
andi.l &0x7fff,%d1 # extract operand's exponent
5590
5591
andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5592
5593
btst &0x0,%d1 # is exp even or odd?
5594
beq.b ss_norm_even
5595
5596
ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5597
5598
mov.l &0x3fff,%d0
5599
sub.l %d1,%d0 # scale = BIAS + (-exp)
5600
asr.l &0x1,%d0 # divide scale factor by 2
5601
rts
5602
5603
ss_norm_even:
5604
ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5605
5606
mov.l &0x3ffe,%d0
5607
sub.l %d1,%d0 # scale = BIAS + (-exp)
5608
asr.l &0x1,%d0 # divide scale factor by 2
5609
rts
5610
5611
ss_denorm:
5612
lea FP_SCR0(%a6),%a0 # pass ptr to src op
5613
bsr.l norm # normalize denorm
5614
5615
btst &0x0,%d0 # is exp even or odd?
5616
beq.b ss_denorm_even
5617
5618
ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5619
5620
add.l &0x3fff,%d0
5621
asr.l &0x1,%d0 # divide scale factor by 2
5622
rts
5623
5624
ss_denorm_even:
5625
ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5626
5627
add.l &0x3ffe,%d0
5628
asr.l &0x1,%d0 # divide scale factor by 2
5629
rts
5630
5631
###
5632
5633
#########################################################################
5634
# XDEF **************************************************************** #
5635
# scale_to_zero_dst(): scale the exponent of extended precision #
5636
# value at FP_SCR1(a6). #
5637
# #
5638
# XREF **************************************************************** #
5639
# norm() - normalize the mantissa if the operand was a DENORM #
5640
# #
5641
# INPUT *************************************************************** #
5642
# FP_SCR1(a6) = extended precision operand to be scaled #
5643
# #
5644
# OUTPUT ************************************************************** #
5645
# FP_SCR1(a6) = scaled extended precision operand #
5646
# d0 = scale value #
5647
# #
5648
# ALGORITHM *********************************************************** #
5649
# Set the exponent of the input operand to 0x3fff. Save the value #
5650
# of the difference between the original and new exponent. Then, #
5651
# normalize the operand if it was a DENORM. Add this normalization #
5652
# value to the previous value. Return the result. #
5653
# #
5654
#########################################################################
5655
5656
global scale_to_zero_dst
5657
scale_to_zero_dst:
5658
mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
5659
mov.w %d1,%d0 # make a copy
5660
5661
andi.l &0x7fff,%d1 # extract operand's exponent
5662
5663
andi.w &0x8000,%d0 # extract operand's sgn
5664
or.w &0x3fff,%d0 # insert new operand's exponent(=0)
5665
5666
mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
5667
5668
cmpi.b DTAG(%a6),&DENORM # is operand normalized?
5669
beq.b stzd_denorm # normalize the DENORM
5670
5671
stzd_norm:
5672
mov.l &0x3fff,%d0
5673
sub.l %d1,%d0 # scale = BIAS + (-exp)
5674
rts
5675
5676
stzd_denorm:
5677
lea FP_SCR1(%a6),%a0 # pass ptr to dst op
5678
bsr.l norm # normalize denorm
5679
neg.l %d0 # new exponent = -(shft val)
5680
mov.l %d0,%d1 # prepare for op_norm call
5681
bra.b stzd_norm # finish scaling
5682
5683
##########################################################################
5684
5685
#########################################################################
5686
# XDEF **************************************************************** #
5687
# res_qnan(): return default result w/ QNAN operand for dyadic #
5688
# res_snan(): return default result w/ SNAN operand for dyadic #
5689
# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
5690
# res_snan_1op(): return dflt result w/ SNAN operand for monadic #
5691
# #
5692
# XREF **************************************************************** #
5693
# None #
5694
# #
5695
# INPUT *************************************************************** #
5696
# FP_SRC(a6) = pointer to extended precision src operand #
5697
# FP_DST(a6) = pointer to extended precision dst operand #
5698
# #
5699
# OUTPUT ************************************************************** #
5700
# fp0 = default result #
5701
# #
5702
# ALGORITHM *********************************************************** #
5703
# If either operand (but not both operands) of an operation is a #
5704
# nonsignalling NAN, then that NAN is returned as the result. If both #
5705
# operands are nonsignalling NANs, then the destination operand #
5706
# nonsignalling NAN is returned as the result. #
5707
# If either operand to an operation is a signalling NAN (SNAN), #
5708
# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
5709
# enable bit is set in the FPCR, then the trap is taken and the #
5710
# destination is not modified. If the SNAN trap enable bit is not set, #
5711
# then the SNAN is converted to a nonsignalling NAN (by setting the #
5712
# SNAN bit in the operand to one), and the operation continues as #
5713
# described in the preceding paragraph, for nonsignalling NANs. #
5714
# Make sure the appropriate FPSR bits are set before exiting. #
5715
# #
5716
#########################################################################
5717
5718
global res_qnan
5719
global res_snan
5720
res_qnan:
5721
res_snan:
5722
cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
5723
beq.b dst_snan2
5724
cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
5725
beq.b dst_qnan2
5726
src_nan:
5727
cmp.b STAG(%a6), &QNAN
5728
beq.b src_qnan2
5729
global res_snan_1op
5730
res_snan_1op:
5731
src_snan2:
5732
bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
5733
or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734
lea FP_SRC(%a6), %a0
5735
bra.b nan_comp
5736
global res_qnan_1op
5737
res_qnan_1op:
5738
src_qnan2:
5739
or.l &nan_mask, USER_FPSR(%a6)
5740
lea FP_SRC(%a6), %a0
5741
bra.b nan_comp
5742
dst_snan2:
5743
or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744
bset &0x6, FP_DST_HI(%a6) # set SNAN bit
5745
lea FP_DST(%a6), %a0
5746
bra.b nan_comp
5747
dst_qnan2:
5748
lea FP_DST(%a6), %a0
5749
cmp.b STAG(%a6), &SNAN
5750
bne nan_done
5751
or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
5752
nan_done:
5753
or.l &nan_mask, USER_FPSR(%a6)
5754
nan_comp:
5755
btst &0x7, FTEMP_EX(%a0) # is NAN neg?
5756
beq.b nan_not_neg
5757
or.l &neg_mask, USER_FPSR(%a6)
5758
nan_not_neg:
5759
fmovm.x (%a0), &0x80
5760
rts
5761
5762
#########################################################################
5763
# XDEF **************************************************************** #
5764
# res_operr(): return default result during operand error #
5765
# #
5766
# XREF **************************************************************** #
5767
# None #
5768
# #
5769
# INPUT *************************************************************** #
5770
# None #
5771
# #
5772
# OUTPUT ************************************************************** #
5773
# fp0 = default operand error result #
5774
# #
5775
# ALGORITHM *********************************************************** #
5776
# An nonsignalling NAN is returned as the default result when #
5777
# an operand error occurs for the following cases: #
5778
# #
5779
# Multiply: (Infinity x Zero) #
5780
# Divide : (Zero / Zero) || (Infinity / Infinity) #
5781
# #
5782
#########################################################################
5783
5784
global res_operr
5785
res_operr:
5786
or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787
fmovm.x nan_return(%pc), &0x80
5788
rts
5789
5790
nan_return:
5791
long 0x7fff0000, 0xffffffff, 0xffffffff
5792
5793
#########################################################################
5794
# XDEF **************************************************************** #
5795
# _denorm(): denormalize an intermediate result #
5796
# #
5797
# XREF **************************************************************** #
5798
# None #
5799
# #
5800
# INPUT *************************************************************** #
5801
# a0 = points to the operand to be denormalized #
5802
# (in the internal extended format) #
5803
# #
5804
# d0 = rounding precision #
5805
# #
5806
# OUTPUT ************************************************************** #
5807
# a0 = pointer to the denormalized result #
5808
# (in the internal extended format) #
5809
# #
5810
# d0 = guard,round,sticky #
5811
# #
5812
# ALGORITHM *********************************************************** #
5813
# According to the exponent underflow threshold for the given #
5814
# precision, shift the mantissa bits to the right in order raise the #
5815
# exponent of the operand to the threshold value. While shifting the #
5816
# mantissa bits right, maintain the value of the guard, round, and #
5817
# sticky bits. #
5818
# other notes: #
5819
# (1) _denorm() is called by the underflow routines #
5820
# (2) _denorm() does NOT affect the status register #
5821
# #
5822
#########################################################################
5823
5824
#
5825
# table of exponent threshold values for each precision
5826
#
5827
tbl_thresh:
5828
short 0x0
5829
short sgl_thresh
5830
short dbl_thresh
5831
5832
global _denorm
5833
_denorm:
5834
#
5835
# Load the exponent threshold for the precision selected and check
5836
# to see if (threshold - exponent) is > 65 in which case we can
5837
# simply calculate the sticky bit and zero the mantissa. otherwise
5838
# we have to call the denormalization routine.
5839
#
5840
lsr.b &0x2, %d0 # shift prec to lo bits
5841
mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842
mov.w %d1, %d0 # copy d1 into d0
5843
sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
5844
cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
5845
bpl.b denorm_set_stky # yes; just calc sticky
5846
5847
clr.l %d0 # clear g,r,s
5848
btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849
beq.b denorm_call # no; don't change anything
5850
bset &29, %d0 # yes; set sticky bit
5851
5852
denorm_call:
5853
bsr.l dnrm_lp # denormalize the number
5854
rts
5855
5856
#
5857
# all bit would have been shifted off during the denorm so simply
5858
# calculate if the sticky should be set and clear the entire mantissa.
5859
#
5860
denorm_set_stky:
5861
mov.l &0x20000000, %d0 # set sticky bit in return value
5862
mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
5863
clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
5864
clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
5865
rts
5866
5867
# #
5868
# dnrm_lp(): normalize exponent/mantissa to specified threshold #
5869
# #
5870
# INPUT: #
5871
# %a0 : points to the operand to be denormalized #
5872
# %d0{31:29} : initial guard,round,sticky #
5873
# %d1{15:0} : denormalization threshold #
5874
# OUTPUT: #
5875
# %a0 : points to the denormalized operand #
5876
# %d0{31:29} : final guard,round,sticky #
5877
# #
5878
5879
# *** Local Equates *** #
5880
set GRS, L_SCR2 # g,r,s temp storage
5881
set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
5882
5883
global dnrm_lp
5884
dnrm_lp:
5885
5886
#
5887
# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888
# in memory so as to make the bitfield extraction for denormalization easier.
5889
#
5890
mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891
mov.l %d0, GRS(%a6) # place g,r,s after it
5892
5893
#
5894
# check to see how much less than the underflow threshold the operand
5895
# exponent is.
5896
#
5897
mov.l %d1, %d0 # copy the denorm threshold
5898
sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
5899
ble.b dnrm_no_lp # d1 <= 0
5900
cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
5901
blt.b case_1 # yes
5902
cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
5903
blt.b case_2 # yes
5904
bra.w case_3 # (d1 >= 64)
5905
5906
#
5907
# No normalization necessary
5908
#
5909
dnrm_no_lp:
5910
mov.l GRS(%a6), %d0 # restore original g,r,s
5911
rts
5912
5913
#
5914
# case (0<d1<32)
5915
#
5916
# %d0 = denorm threshold
5917
# %d1 = "n" = amt to shift
5918
#
5919
# ---------------------------------------------------------
5920
# | FTEMP_HI | FTEMP_LO |grs000.........000|
5921
# ---------------------------------------------------------
5922
# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923
# \ \ \ \
5924
# \ \ \ \
5925
# \ \ \ \
5926
# \ \ \ \
5927
# \ \ \ \
5928
# \ \ \ \
5929
# \ \ \ \
5930
# \ \ \ \
5931
# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932
# ---------------------------------------------------------
5933
# |0.....0| NEW_HI | NEW_FTEMP_LO |grs |
5934
# ---------------------------------------------------------
5935
#
5936
case_1:
5937
mov.l %d2, -(%sp) # create temp storage
5938
5939
mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5940
mov.l &32, %d0
5941
sub.w %d1, %d0 # %d0 = 32 - %d1
5942
5943
cmpi.w %d1, &29 # is shft amt >= 29
5944
blt.b case1_extract # no; no fix needed
5945
mov.b GRS(%a6), %d2
5946
or.b %d2, 3+FTEMP_LO2(%a6)
5947
5948
case1_extract:
5949
bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950
bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951
bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952
5953
mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
5954
mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
5955
5956
bftst %d0{&2:&30} # were bits shifted off?
5957
beq.b case1_sticky_clear # no; go finish
5958
bset &rnd_stky_bit, %d0 # yes; set sticky bit
5959
5960
case1_sticky_clear:
5961
and.l &0xe0000000, %d0 # clear all but G,R,S
5962
mov.l (%sp)+, %d2 # restore temp register
5963
rts
5964
5965
#
5966
# case (32<=d1<64)
5967
#
5968
# %d0 = denorm threshold
5969
# %d1 = "n" = amt to shift
5970
#
5971
# ---------------------------------------------------------
5972
# | FTEMP_HI | FTEMP_LO |grs000.........000|
5973
# ---------------------------------------------------------
5974
# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975
# \ \ \
5976
# \ \ \
5977
# \ \ -------------------
5978
# \ -------------------- \
5979
# ------------------- \ \
5980
# \ \ \
5981
# \ \ \
5982
# \ \ \
5983
# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984
# ---------------------------------------------------------
5985
# |0...............0|0....0| NEW_LO |grs |
5986
# ---------------------------------------------------------
5987
#
5988
case_2:
5989
mov.l %d2, -(%sp) # create temp storage
5990
5991
mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
5992
subi.w &0x20, %d1 # %d1 now between 0 and 32
5993
mov.l &0x20, %d0
5994
sub.w %d1, %d0 # %d0 = 32 - %d1
5995
5996
# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997
# the number of bits to check for the sticky detect.
5998
# it only plays a role in shift amounts of 61-63.
5999
mov.b GRS(%a6), %d2
6000
or.b %d2, 3+FTEMP_LO2(%a6)
6001
6002
bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003
bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004
6005
bftst %d1{&2:&30} # were any bits shifted off?
6006
bne.b case2_set_sticky # yes; set sticky bit
6007
bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6008
bne.b case2_set_sticky # yes; set sticky bit
6009
6010
mov.l %d1, %d0 # move new G,R,S to %d0
6011
bra.b case2_end
6012
6013
case2_set_sticky:
6014
mov.l %d1, %d0 # move new G,R,S to %d0
6015
bset &rnd_stky_bit, %d0 # set sticky bit
6016
6017
case2_end:
6018
clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
6019
mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
6020
and.l &0xe0000000, %d0 # clear all but G,R,S
6021
6022
mov.l (%sp)+,%d2 # restore temp register
6023
rts
6024
6025
#
6026
# case (d1>=64)
6027
#
6028
# %d0 = denorm threshold
6029
# %d1 = amt to shift
6030
#
6031
case_3:
6032
mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
6033
6034
cmpi.w %d1, &65 # is shift amt > 65?
6035
blt.b case3_64 # no; it's == 64
6036
beq.b case3_65 # no; it's == 65
6037
6038
#
6039
# case (d1>65)
6040
#
6041
# Shift value is > 65 and out of range. All bits are shifted off.
6042
# Return a zero mantissa with the sticky bit set
6043
#
6044
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6045
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6046
mov.l &0x20000000, %d0 # set sticky bit
6047
rts
6048
6049
#
6050
# case (d1 == 64)
6051
#
6052
# ---------------------------------------------------------
6053
# | FTEMP_HI | FTEMP_LO |grs000.........000|
6054
# ---------------------------------------------------------
6055
# <-------(32)------>
6056
# \ \
6057
# \ \
6058
# \ \
6059
# \ ------------------------------
6060
# ------------------------------- \
6061
# \ \
6062
# \ \
6063
# \ \
6064
# <-------(32)------>
6065
# ---------------------------------------------------------
6066
# |0...............0|0................0|grs |
6067
# ---------------------------------------------------------
6068
#
6069
case3_64:
6070
mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6071
mov.l %d0, %d1 # make a copy
6072
and.l &0xc0000000, %d0 # extract G,R
6073
and.l &0x3fffffff, %d1 # extract other bits
6074
6075
bra.b case3_complete
6076
6077
#
6078
# case (d1 == 65)
6079
#
6080
# ---------------------------------------------------------
6081
# | FTEMP_HI | FTEMP_LO |grs000.........000|
6082
# ---------------------------------------------------------
6083
# <-------(32)------>
6084
# \ \
6085
# \ \
6086
# \ \
6087
# \ ------------------------------
6088
# -------------------------------- \
6089
# \ \
6090
# \ \
6091
# \ \
6092
# <-------(31)----->
6093
# ---------------------------------------------------------
6094
# |0...............0|0................0|0rs |
6095
# ---------------------------------------------------------
6096
#
6097
case3_65:
6098
mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
6099
and.l &0x80000000, %d0 # extract R bit
6100
lsr.l &0x1, %d0 # shift high bit into R bit
6101
and.l &0x7fffffff, %d1 # extract other bits
6102
6103
case3_complete:
6104
# last operation done was an "and" of the bits shifted off so the condition
6105
# codes are already set so branch accordingly.
6106
bne.b case3_set_sticky # yes; go set new sticky
6107
tst.l FTEMP_LO(%a0) # were any bits shifted off?
6108
bne.b case3_set_sticky # yes; go set new sticky
6109
tst.b GRS(%a6) # were any bits shifted off?
6110
bne.b case3_set_sticky # yes; go set new sticky
6111
6112
#
6113
# no bits were shifted off so don't set the sticky bit.
6114
# the guard and
6115
# the entire mantissa is zero.
6116
#
6117
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6118
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6119
rts
6120
6121
#
6122
# some bits were shifted off so set the sticky bit.
6123
# the entire mantissa is zero.
6124
#
6125
case3_set_sticky:
6126
bset &rnd_stky_bit,%d0 # set new sticky bit
6127
clr.l FTEMP_HI(%a0) # clear hi(mantissa)
6128
clr.l FTEMP_LO(%a0) # clear lo(mantissa)
6129
rts
6130
6131
#########################################################################
6132
# XDEF **************************************************************** #
6133
# _round(): round result according to precision/mode #
6134
# #
6135
# XREF **************************************************************** #
6136
# None #
6137
# #
6138
# INPUT *************************************************************** #
6139
# a0 = ptr to input operand in internal extended format #
6140
# d1(hi) = contains rounding precision: #
6141
# ext = $0000xxxx #
6142
# sgl = $0004xxxx #
6143
# dbl = $0008xxxx #
6144
# d1(lo) = contains rounding mode: #
6145
# RN = $xxxx0000 #
6146
# RZ = $xxxx0001 #
6147
# RM = $xxxx0002 #
6148
# RP = $xxxx0003 #
6149
# d0{31:29} = contains the g,r,s bits (extended) #
6150
# #
6151
# OUTPUT ************************************************************** #
6152
# a0 = pointer to rounded result #
6153
# #
6154
# ALGORITHM *********************************************************** #
6155
# On return the value pointed to by a0 is correctly rounded, #
6156
# a0 is preserved and the g-r-s bits in d0 are cleared. #
6157
# The result is not typed - the tag field is invalid. The #
6158
# result is still in the internal extended format. #
6159
# #
6160
# The INEX bit of USER_FPSR will be set if the rounded result was #
6161
# inexact (i.e. if any of the g-r-s bits were set). #
6162
# #
6163
#########################################################################
6164
6165
global _round
6166
_round:
6167
#
6168
# ext_grs() looks at the rounding precision and sets the appropriate
6169
# G,R,S bits.
6170
# If (G,R,S == 0) then result is exact and round is done, else set
6171
# the inex flag in status reg and continue.
6172
#
6173
bsr.l ext_grs # extract G,R,S
6174
6175
tst.l %d0 # are G,R,S zero?
6176
beq.w truncate # yes; round is complete
6177
6178
or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179
6180
#
6181
# Use rounding mode as an index into a jump table for these modes.
6182
# All of the following assumes grs != 0.
6183
#
6184
mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185
jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
6186
6187
tbl_mode:
6188
short rnd_near - tbl_mode
6189
short truncate - tbl_mode # RZ always truncates
6190
short rnd_mnus - tbl_mode
6191
short rnd_plus - tbl_mode
6192
6193
#################################################################
6194
# ROUND PLUS INFINITY #
6195
# #
6196
# If sign of fp number = 0 (positive), then add 1 to l. #
6197
#################################################################
6198
rnd_plus:
6199
tst.b FTEMP_SGN(%a0) # check for sign
6200
bmi.w truncate # if positive then truncate
6201
6202
mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6203
swap %d1 # set up d1 for round prec.
6204
6205
cmpi.b %d1, &s_mode # is prec = sgl?
6206
beq.w add_sgl # yes
6207
bgt.w add_dbl # no; it's dbl
6208
bra.w add_ext # no; it's ext
6209
6210
#################################################################
6211
# ROUND MINUS INFINITY #
6212
# #
6213
# If sign of fp number = 1 (negative), then add 1 to l. #
6214
#################################################################
6215
rnd_mnus:
6216
tst.b FTEMP_SGN(%a0) # check for sign
6217
bpl.w truncate # if negative then truncate
6218
6219
mov.l &0xffffffff, %d0 # force g,r,s to be all f's
6220
swap %d1 # set up d1 for round prec.
6221
6222
cmpi.b %d1, &s_mode # is prec = sgl?
6223
beq.w add_sgl # yes
6224
bgt.w add_dbl # no; it's dbl
6225
bra.w add_ext # no; it's ext
6226
6227
#################################################################
6228
# ROUND NEAREST #
6229
# #
6230
# If (g=1), then add 1 to l and if (r=s=0), then clear l #
6231
# Note that this will round to even in case of a tie. #
6232
#################################################################
6233
rnd_near:
6234
asl.l &0x1, %d0 # shift g-bit to c-bit
6235
bcc.w truncate # if (g=1) then
6236
6237
swap %d1 # set up d1 for round prec.
6238
6239
cmpi.b %d1, &s_mode # is prec = sgl?
6240
beq.w add_sgl # yes
6241
bgt.w add_dbl # no; it's dbl
6242
bra.w add_ext # no; it's ext
6243
6244
# *** LOCAL EQUATES ***
6245
set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
6246
set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
6247
6248
#########################
6249
# ADD SINGLE #
6250
#########################
6251
add_sgl:
6252
add.l &ad_1_sgl, FTEMP_HI(%a0)
6253
bcc.b scc_clr # no mantissa overflow
6254
roxr.w FTEMP_HI(%a0) # shift v-bit back in
6255
roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
6256
add.w &0x1, FTEMP_EX(%a0) # and incr exponent
6257
scc_clr:
6258
tst.l %d0 # test for rs = 0
6259
bne.b sgl_done
6260
and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261
sgl_done:
6262
and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263
clr.l FTEMP_LO(%a0) # clear d2
6264
rts
6265
6266
#########################
6267
# ADD EXTENDED #
6268
#########################
6269
add_ext:
6270
addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
6271
bcc.b xcc_clr # test for carry out
6272
addq.l &1,FTEMP_HI(%a0) # propagate carry
6273
bcc.b xcc_clr
6274
roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6275
roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6276
roxr.w FTEMP_LO(%a0)
6277
roxr.w FTEMP_LO+2(%a0)
6278
add.w &0x1,FTEMP_EX(%a0) # and inc exp
6279
xcc_clr:
6280
tst.l %d0 # test rs = 0
6281
bne.b add_ext_done
6282
and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
6283
add_ext_done:
6284
rts
6285
6286
#########################
6287
# ADD DOUBLE #
6288
#########################
6289
add_dbl:
6290
add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291
bcc.b dcc_clr # no carry
6292
addq.l &0x1, FTEMP_HI(%a0) # propagate carry
6293
bcc.b dcc_clr # no carry
6294
6295
roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
6296
roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
6297
roxr.w FTEMP_LO(%a0)
6298
roxr.w FTEMP_LO+2(%a0)
6299
addq.w &0x1, FTEMP_EX(%a0) # incr exponent
6300
dcc_clr:
6301
tst.l %d0 # test for rs = 0
6302
bne.b dbl_done
6303
and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304
6305
dbl_done:
6306
and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307
rts
6308
6309
###########################
6310
# Truncate all other bits #
6311
###########################
6312
truncate:
6313
swap %d1 # select rnd prec
6314
6315
cmpi.b %d1, &s_mode # is prec sgl?
6316
beq.w sgl_done # yes
6317
bgt.b dbl_done # no; it's dbl
6318
rts # no; it's ext
6319
6320
6321
#
6322
# ext_grs(): extract guard, round and sticky bits according to
6323
# rounding precision.
6324
#
6325
# INPUT
6326
# d0 = extended precision g,r,s (in d0{31:29})
6327
# d1 = {PREC,ROUND}
6328
# OUTPUT
6329
# d0{31:29} = guard, round, sticky
6330
#
6331
# The ext_grs extract the guard/round/sticky bits according to the
6332
# selected rounding precision. It is called by the round subroutine
6333
# only. All registers except d0 are kept intact. d0 becomes an
6334
# updated guard,round,sticky in d0{31:29}
6335
#
6336
# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337
# prior to usage, and needs to restore d1 to original. this
6338
# routine is tightly tied to the round routine and not meant to
6339
# uphold standard subroutine calling practices.
6340
#
6341
6342
ext_grs:
6343
swap %d1 # have d1.w point to round precision
6344
tst.b %d1 # is rnd prec = extended?
6345
bne.b ext_grs_not_ext # no; go handle sgl or dbl
6346
6347
#
6348
# %d0 actually already hold g,r,s since _round() had it before calling
6349
# this function. so, as long as we don't disturb it, we are "returning" it.
6350
#
6351
ext_grs_ext:
6352
swap %d1 # yes; return to correct positions
6353
rts
6354
6355
ext_grs_not_ext:
6356
movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
6357
6358
cmpi.b %d1, &s_mode # is rnd prec = sgl?
6359
bne.b ext_grs_dbl # no; go handle dbl
6360
6361
#
6362
# sgl:
6363
# 96 64 40 32 0
6364
# -----------------------------------------------------
6365
# | EXP |XXXXXXX| |xx | |grs|
6366
# -----------------------------------------------------
6367
# <--(24)--->nn\ /
6368
# ee ---------------------
6369
# ww |
6370
# v
6371
# gr new sticky
6372
#
6373
ext_grs_sgl:
6374
bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375
mov.l &30, %d2 # of the sgl prec. limits
6376
lsl.l %d2, %d3 # shift g-r bits to MSB of d3
6377
mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
6378
and.l &0x0000003f, %d2 # s bit is the or of all other
6379
bne.b ext_grs_st_stky # bits to the right of g-r
6380
tst.l FTEMP_LO(%a0) # test lower mantissa
6381
bne.b ext_grs_st_stky # if any are set, set sticky
6382
tst.l %d0 # test original g,r,s
6383
bne.b ext_grs_st_stky # if any are set, set sticky
6384
bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
6385
6386
#
6387
# dbl:
6388
# 96 64 32 11 0
6389
# -----------------------------------------------------
6390
# | EXP |XXXXXXX| | |xx |grs|
6391
# -----------------------------------------------------
6392
# nn\ /
6393
# ee -------
6394
# ww |
6395
# v
6396
# gr new sticky
6397
#
6398
ext_grs_dbl:
6399
bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400
mov.l &30, %d2 # of the dbl prec. limits
6401
lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
6402
mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
6403
and.l &0x000001ff, %d2 # s bit is the or-ing of all
6404
bne.b ext_grs_st_stky # other bits to the right of g-r
6405
tst.l %d0 # test word original g,r,s
6406
bne.b ext_grs_st_stky # if any are set, set sticky
6407
bra.b ext_grs_end_sd # if clear, exit
6408
6409
ext_grs_st_stky:
6410
bset &rnd_stky_bit, %d3 # set sticky bit
6411
ext_grs_end_sd:
6412
mov.l %d3, %d0 # return grs to d0
6413
6414
movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
6415
6416
swap %d1 # restore d1 to original
6417
rts
6418
6419
#########################################################################
6420
# norm(): normalize the mantissa of an extended precision input. the #
6421
# input operand should not be normalized already. #
6422
# #
6423
# XDEF **************************************************************** #
6424
# norm() #
6425
# #
6426
# XREF **************************************************************** #
6427
# none #
6428
# #
6429
# INPUT *************************************************************** #
6430
# a0 = pointer fp extended precision operand to normalize #
6431
# #
6432
# OUTPUT ************************************************************** #
6433
# d0 = number of bit positions the mantissa was shifted #
6434
# a0 = the input operand's mantissa is normalized; the exponent #
6435
# is unchanged. #
6436
# #
6437
#########################################################################
6438
global norm
6439
norm:
6440
mov.l %d2, -(%sp) # create some temp regs
6441
mov.l %d3, -(%sp)
6442
6443
mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
6444
mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
6445
6446
bfffo %d0{&0:&32}, %d2 # how many places to shift?
6447
beq.b norm_lo # hi(man) is all zeroes!
6448
6449
norm_hi:
6450
lsl.l %d2, %d0 # left shift hi(man)
6451
bfextu %d1{&0:%d2}, %d3 # extract lo bits
6452
6453
or.l %d3, %d0 # create hi(man)
6454
lsl.l %d2, %d1 # create lo(man)
6455
6456
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6457
mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
6458
6459
mov.l %d2, %d0 # return shift amount
6460
6461
mov.l (%sp)+, %d3 # restore temp regs
6462
mov.l (%sp)+, %d2
6463
6464
rts
6465
6466
norm_lo:
6467
bfffo %d1{&0:&32}, %d2 # how many places to shift?
6468
lsl.l %d2, %d1 # shift lo(man)
6469
add.l &32, %d2 # add 32 to shft amount
6470
6471
mov.l %d1, FTEMP_HI(%a0) # store hi(man)
6472
clr.l FTEMP_LO(%a0) # lo(man) is now zero
6473
6474
mov.l %d2, %d0 # return shift amount
6475
6476
mov.l (%sp)+, %d3 # restore temp regs
6477
mov.l (%sp)+, %d2
6478
6479
rts
6480
6481
#########################################################################
6482
# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
6483
# - returns corresponding optype tag #
6484
# #
6485
# XDEF **************************************************************** #
6486
# unnorm_fix() #
6487
# #
6488
# XREF **************************************************************** #
6489
# norm() - normalize the mantissa #
6490
# #
6491
# INPUT *************************************************************** #
6492
# a0 = pointer to unnormalized extended precision number #
6493
# #
6494
# OUTPUT ************************************************************** #
6495
# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
6496
# a0 = input operand has been converted to a norm, denorm, or #
6497
# zero; both the exponent and mantissa are changed. #
6498
# #
6499
#########################################################################
6500
6501
global unnorm_fix
6502
unnorm_fix:
6503
bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504
bne.b unnorm_shift # hi(man) is not all zeroes
6505
6506
#
6507
# hi(man) is all zeroes so see if any bits in lo(man) are set
6508
#
6509
unnorm_chk_lo:
6510
bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511
beq.w unnorm_zero # yes
6512
6513
add.w &32, %d0 # no; fix shift distance
6514
6515
#
6516
# d0 = # shifts needed for complete normalization
6517
#
6518
unnorm_shift:
6519
clr.l %d1 # clear top word
6520
mov.w FTEMP_EX(%a0), %d1 # extract exponent
6521
and.w &0x7fff, %d1 # strip off sgn
6522
6523
cmp.w %d0, %d1 # will denorm push exp < 0?
6524
bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
6525
6526
#
6527
# exponent would not go < 0. Therefore, number stays normalized
6528
#
6529
sub.w %d0, %d1 # shift exponent value
6530
mov.w FTEMP_EX(%a0), %d0 # load old exponent
6531
and.w &0x8000, %d0 # save old sign
6532
or.w %d0, %d1 # {sgn,new exp}
6533
mov.w %d1, FTEMP_EX(%a0) # insert new exponent
6534
6535
bsr.l norm # normalize UNNORM
6536
6537
mov.b &NORM, %d0 # return new optype tag
6538
rts
6539
6540
#
6541
# exponent would go < 0, so only denormalize until exp = 0
6542
#
6543
unnorm_nrm_zero:
6544
cmp.b %d1, &32 # is exp <= 32?
6545
bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
6546
6547
bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548
mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
6549
6550
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6551
lsl.l %d1, %d0 # extract new lo(man)
6552
mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
6553
6554
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6555
6556
mov.b &DENORM, %d0 # return new optype tag
6557
rts
6558
6559
#
6560
# only mantissa bits set are in lo(man)
6561
#
6562
unnorm_nrm_zero_lrg:
6563
sub.w &32, %d1 # adjust shft amt by 32
6564
6565
mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
6566
lsl.l %d1, %d0 # left shift lo(man)
6567
6568
mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
6569
clr.l FTEMP_LO(%a0) # lo(man) = 0
6570
6571
and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
6572
6573
mov.b &DENORM, %d0 # return new optype tag
6574
rts
6575
6576
#
6577
# whole mantissa is zero so this UNNORM is actually a zero
6578
#
6579
unnorm_zero:
6580
and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
6581
6582
mov.b &ZERO, %d0 # fix optype tag
6583
rts
6584
6585
#########################################################################
6586
# XDEF **************************************************************** #
6587
# set_tag_x(): return the optype of the input ext fp number #
6588
# #
6589
# XREF **************************************************************** #
6590
# None #
6591
# #
6592
# INPUT *************************************************************** #
6593
# a0 = pointer to extended precision operand #
6594
# #
6595
# OUTPUT ************************************************************** #
6596
# d0 = value of type tag #
6597
# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
6598
# #
6599
# ALGORITHM *********************************************************** #
6600
# Simply test the exponent, j-bit, and mantissa values to #
6601
# determine the type of operand. #
6602
# If it's an unnormalized zero, alter the operand and force it #
6603
# to be a normal zero. #
6604
# #
6605
#########################################################################
6606
6607
global set_tag_x
6608
set_tag_x:
6609
mov.w FTEMP_EX(%a0), %d0 # extract exponent
6610
andi.w &0x7fff, %d0 # strip off sign
6611
cmpi.w %d0, &0x7fff # is (EXP == MAX)?
6612
beq.b inf_or_nan_x
6613
not_inf_or_nan_x:
6614
btst &0x7,FTEMP_HI(%a0)
6615
beq.b not_norm_x
6616
is_norm_x:
6617
mov.b &NORM, %d0
6618
rts
6619
not_norm_x:
6620
tst.w %d0 # is exponent = 0?
6621
bne.b is_unnorm_x
6622
not_unnorm_x:
6623
tst.l FTEMP_HI(%a0)
6624
bne.b is_denorm_x
6625
tst.l FTEMP_LO(%a0)
6626
bne.b is_denorm_x
6627
is_zero_x:
6628
mov.b &ZERO, %d0
6629
rts
6630
is_denorm_x:
6631
mov.b &DENORM, %d0
6632
rts
6633
# must distinguish now "Unnormalized zeroes" which we
6634
# must convert to zero.
6635
is_unnorm_x:
6636
tst.l FTEMP_HI(%a0)
6637
bne.b is_unnorm_reg_x
6638
tst.l FTEMP_LO(%a0)
6639
bne.b is_unnorm_reg_x
6640
# it's an "unnormalized zero". let's convert it to an actual zero...
6641
andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
6642
mov.b &ZERO, %d0
6643
rts
6644
is_unnorm_reg_x:
6645
mov.b &UNNORM, %d0
6646
rts
6647
inf_or_nan_x:
6648
tst.l FTEMP_LO(%a0)
6649
bne.b is_nan_x
6650
mov.l FTEMP_HI(%a0), %d0
6651
and.l &0x7fffffff, %d0 # msb is a don't care!
6652
bne.b is_nan_x
6653
is_inf_x:
6654
mov.b &INF, %d0
6655
rts
6656
is_nan_x:
6657
btst &0x6, FTEMP_HI(%a0)
6658
beq.b is_snan_x
6659
mov.b &QNAN, %d0
6660
rts
6661
is_snan_x:
6662
mov.b &SNAN, %d0
6663
rts
6664
6665
#########################################################################
6666
# XDEF **************************************************************** #
6667
# set_tag_d(): return the optype of the input dbl fp number #
6668
# #
6669
# XREF **************************************************************** #
6670
# None #
6671
# #
6672
# INPUT *************************************************************** #
6673
# a0 = points to double precision operand #
6674
# #
6675
# OUTPUT ************************************************************** #
6676
# d0 = value of type tag #
6677
# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6678
# #
6679
# ALGORITHM *********************************************************** #
6680
# Simply test the exponent, j-bit, and mantissa values to #
6681
# determine the type of operand. #
6682
# #
6683
#########################################################################
6684
6685
global set_tag_d
6686
set_tag_d:
6687
mov.l FTEMP(%a0), %d0
6688
mov.l %d0, %d1
6689
6690
andi.l &0x7ff00000, %d0
6691
beq.b zero_or_denorm_d
6692
6693
cmpi.l %d0, &0x7ff00000
6694
beq.b inf_or_nan_d
6695
6696
is_norm_d:
6697
mov.b &NORM, %d0
6698
rts
6699
zero_or_denorm_d:
6700
and.l &0x000fffff, %d1
6701
bne is_denorm_d
6702
tst.l 4+FTEMP(%a0)
6703
bne is_denorm_d
6704
is_zero_d:
6705
mov.b &ZERO, %d0
6706
rts
6707
is_denorm_d:
6708
mov.b &DENORM, %d0
6709
rts
6710
inf_or_nan_d:
6711
and.l &0x000fffff, %d1
6712
bne is_nan_d
6713
tst.l 4+FTEMP(%a0)
6714
bne is_nan_d
6715
is_inf_d:
6716
mov.b &INF, %d0
6717
rts
6718
is_nan_d:
6719
btst &19, %d1
6720
bne is_qnan_d
6721
is_snan_d:
6722
mov.b &SNAN, %d0
6723
rts
6724
is_qnan_d:
6725
mov.b &QNAN, %d0
6726
rts
6727
6728
#########################################################################
6729
# XDEF **************************************************************** #
6730
# set_tag_s(): return the optype of the input sgl fp number #
6731
# #
6732
# XREF **************************************************************** #
6733
# None #
6734
# #
6735
# INPUT *************************************************************** #
6736
# a0 = pointer to single precision operand #
6737
# #
6738
# OUTPUT ************************************************************** #
6739
# d0 = value of type tag #
6740
# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
6741
# #
6742
# ALGORITHM *********************************************************** #
6743
# Simply test the exponent, j-bit, and mantissa values to #
6744
# determine the type of operand. #
6745
# #
6746
#########################################################################
6747
6748
global set_tag_s
6749
set_tag_s:
6750
mov.l FTEMP(%a0), %d0
6751
mov.l %d0, %d1
6752
6753
andi.l &0x7f800000, %d0
6754
beq.b zero_or_denorm_s
6755
6756
cmpi.l %d0, &0x7f800000
6757
beq.b inf_or_nan_s
6758
6759
is_norm_s:
6760
mov.b &NORM, %d0
6761
rts
6762
zero_or_denorm_s:
6763
and.l &0x007fffff, %d1
6764
bne is_denorm_s
6765
is_zero_s:
6766
mov.b &ZERO, %d0
6767
rts
6768
is_denorm_s:
6769
mov.b &DENORM, %d0
6770
rts
6771
inf_or_nan_s:
6772
and.l &0x007fffff, %d1
6773
bne is_nan_s
6774
is_inf_s:
6775
mov.b &INF, %d0
6776
rts
6777
is_nan_s:
6778
btst &22, %d1
6779
bne is_qnan_s
6780
is_snan_s:
6781
mov.b &SNAN, %d0
6782
rts
6783
is_qnan_s:
6784
mov.b &QNAN, %d0
6785
rts
6786
6787
#########################################################################
6788
# XDEF **************************************************************** #
6789
# unf_res(): routine to produce default underflow result of a #
6790
# scaled extended precision number; this is used by #
6791
# fadd/fdiv/fmul/etc. emulation routines. #
6792
# unf_res4(): same as above but for fsglmul/fsgldiv which use #
6793
# single round prec and extended prec mode. #
6794
# #
6795
# XREF **************************************************************** #
6796
# _denorm() - denormalize according to scale factor #
6797
# _round() - round denormalized number according to rnd prec #
6798
# #
6799
# INPUT *************************************************************** #
6800
# a0 = pointer to extended precison operand #
6801
# d0 = scale factor #
6802
# d1 = rounding precision/mode #
6803
# #
6804
# OUTPUT ************************************************************** #
6805
# a0 = pointer to default underflow result in extended precision #
6806
# d0.b = result FPSR_cc which caller may or may not want to save #
6807
# #
6808
# ALGORITHM *********************************************************** #
6809
# Convert the input operand to "internal format" which means the #
6810
# exponent is extended to 16 bits and the sign is stored in the unused #
6811
# portion of the extended precison operand. Denormalize the number #
6812
# according to the scale factor passed in d0. Then, round the #
6813
# denormalized result. #
6814
# Set the FPSR_exc bits as appropriate but return the cc bits in #
6815
# d0 in case the caller doesn't want to save them (as is the case for #
6816
# fmove out). #
6817
# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
6818
# precision and the rounding mode to single. #
6819
# #
6820
#########################################################################
6821
global unf_res
6822
unf_res:
6823
mov.l %d1, -(%sp) # save rnd prec,mode on stack
6824
6825
btst &0x7, FTEMP_EX(%a0) # make "internal" format
6826
sne FTEMP_SGN(%a0)
6827
6828
mov.w FTEMP_EX(%a0), %d1 # extract exponent
6829
and.w &0x7fff, %d1
6830
sub.w %d0, %d1
6831
mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
6832
6833
mov.l %a0, -(%sp) # save operand ptr during calls
6834
6835
mov.l 0x4(%sp),%d0 # pass rnd prec.
6836
andi.w &0x00c0,%d0
6837
lsr.w &0x4,%d0
6838
bsr.l _denorm # denorm result
6839
6840
mov.l (%sp),%a0
6841
mov.w 0x6(%sp),%d1 # load prec:mode into %d1
6842
andi.w &0xc0,%d1 # extract rnd prec
6843
lsr.w &0x4,%d1
6844
swap %d1
6845
mov.w 0x6(%sp),%d1
6846
andi.w &0x30,%d1
6847
lsr.w &0x4,%d1
6848
bsr.l _round # round the denorm
6849
6850
mov.l (%sp)+, %a0
6851
6852
# result is now rounded properly. convert back to normal format
6853
bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
6854
tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6855
beq.b unf_res_chkifzero # no; result is positive
6856
bset &0x7, FTEMP_EX(%a0) # set result sgn
6857
clr.b FTEMP_SGN(%a0) # clear temp sign
6858
6859
# the number may have become zero after rounding. set ccodes accordingly.
6860
unf_res_chkifzero:
6861
clr.l %d0
6862
tst.l FTEMP_HI(%a0) # is value now a zero?
6863
bne.b unf_res_cont # no
6864
tst.l FTEMP_LO(%a0)
6865
bne.b unf_res_cont # no
6866
# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
6867
bset &z_bit, %d0 # yes; set zero ccode bit
6868
6869
unf_res_cont:
6870
6871
#
6872
# can inex1 also be set along with unfl and inex2???
6873
#
6874
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875
#
6876
btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877
beq.b unf_res_end # no
6878
bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879
6880
unf_res_end:
6881
add.l &0x4, %sp # clear stack
6882
rts
6883
6884
# unf_res() for fsglmul() and fsgldiv().
6885
global unf_res4
6886
unf_res4:
6887
mov.l %d1,-(%sp) # save rnd prec,mode on stack
6888
6889
btst &0x7,FTEMP_EX(%a0) # make "internal" format
6890
sne FTEMP_SGN(%a0)
6891
6892
mov.w FTEMP_EX(%a0),%d1 # extract exponent
6893
and.w &0x7fff,%d1
6894
sub.w %d0,%d1
6895
mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
6896
6897
mov.l %a0,-(%sp) # save operand ptr during calls
6898
6899
clr.l %d0 # force rnd prec = ext
6900
bsr.l _denorm # denorm result
6901
6902
mov.l (%sp),%a0
6903
mov.w &s_mode,%d1 # force rnd prec = sgl
6904
swap %d1
6905
mov.w 0x6(%sp),%d1 # load rnd mode
6906
andi.w &0x30,%d1 # extract rnd prec
6907
lsr.w &0x4,%d1
6908
bsr.l _round # round the denorm
6909
6910
mov.l (%sp)+,%a0
6911
6912
# result is now rounded properly. convert back to normal format
6913
bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
6914
tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
6915
beq.b unf_res4_chkifzero # no; result is positive
6916
bset &0x7,FTEMP_EX(%a0) # set result sgn
6917
clr.b FTEMP_SGN(%a0) # clear temp sign
6918
6919
# the number may have become zero after rounding. set ccodes accordingly.
6920
unf_res4_chkifzero:
6921
clr.l %d0
6922
tst.l FTEMP_HI(%a0) # is value now a zero?
6923
bne.b unf_res4_cont # no
6924
tst.l FTEMP_LO(%a0)
6925
bne.b unf_res4_cont # no
6926
# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
6927
bset &z_bit,%d0 # yes; set zero ccode bit
6928
6929
unf_res4_cont:
6930
6931
#
6932
# can inex1 also be set along with unfl and inex2???
6933
#
6934
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935
#
6936
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937
beq.b unf_res4_end # no
6938
bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939
6940
unf_res4_end:
6941
add.l &0x4,%sp # clear stack
6942
rts
6943
6944
#########################################################################
6945
# XDEF **************************************************************** #
6946
# ovf_res(): routine to produce the default overflow result of #
6947
# an overflowing number. #
6948
# ovf_res2(): same as above but the rnd mode/prec are passed #
6949
# differently. #
6950
# #
6951
# XREF **************************************************************** #
6952
# none #
6953
# #
6954
# INPUT *************************************************************** #
6955
# d1.b = '-1' => (-); '0' => (+) #
6956
# ovf_res(): #
6957
# d0 = rnd mode/prec #
6958
# ovf_res2(): #
6959
# hi(d0) = rnd prec #
6960
# lo(d0) = rnd mode #
6961
# #
6962
# OUTPUT ************************************************************** #
6963
# a0 = points to extended precision result #
6964
# d0.b = condition code bits #
6965
# #
6966
# ALGORITHM *********************************************************** #
6967
# The default overflow result can be determined by the sign of #
6968
# the result and the rounding mode/prec in effect. These bits are #
6969
# concatenated together to create an index into the default result #
6970
# table. A pointer to the correct result is returned in a0. The #
6971
# resulting condition codes are returned in d0 in case the caller #
6972
# doesn't want FPSR_cc altered (as is the case for fmove out). #
6973
# #
6974
#########################################################################
6975
6976
global ovf_res
6977
ovf_res:
6978
andi.w &0x10,%d1 # keep result sign
6979
lsr.b &0x4,%d0 # shift prec/mode
6980
or.b %d0,%d1 # concat the two
6981
mov.w %d1,%d0 # make a copy
6982
lsl.b &0x1,%d1 # multiply d1 by 2
6983
bra.b ovf_res_load
6984
6985
global ovf_res2
6986
ovf_res2:
6987
and.w &0x10, %d1 # keep result sign
6988
or.b %d0, %d1 # insert rnd mode
6989
swap %d0
6990
or.b %d0, %d1 # insert rnd prec
6991
mov.w %d1, %d0 # make a copy
6992
lsl.b &0x1, %d1 # shift left by 1
6993
6994
#
6995
# use the rounding mode, precision, and result sign as in index into the
6996
# two tables below to fetch the default result and the result ccodes.
6997
#
6998
ovf_res_load:
6999
mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000
lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001
7002
rts
7003
7004
tbl_ovfl_cc:
7005
byte 0x2, 0x0, 0x0, 0x2
7006
byte 0x2, 0x0, 0x0, 0x2
7007
byte 0x2, 0x0, 0x0, 0x2
7008
byte 0x0, 0x0, 0x0, 0x0
7009
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7010
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7011
byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
7012
7013
tbl_ovfl_result:
7014
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015
long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016
long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018
7019
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020
long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021
long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023
7024
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025
long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026
long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027
long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029
long 0x00000000,0x00000000,0x00000000,0x00000000
7030
long 0x00000000,0x00000000,0x00000000,0x00000000
7031
long 0x00000000,0x00000000,0x00000000,0x00000000
7032
long 0x00000000,0x00000000,0x00000000,0x00000000
7033
7034
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035
long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037
long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038
7039
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040
long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042
long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043
7044
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045
long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046
long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047
long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048
7049
#########################################################################
7050
# XDEF **************************************************************** #
7051
# fout(): move from fp register to memory or data register #
7052
# #
7053
# XREF **************************************************************** #
7054
# _round() - needed to create EXOP for sgl/dbl precision #
7055
# norm() - needed to create EXOP for extended precision #
7056
# ovf_res() - create default overflow result for sgl/dbl precision#
7057
# unf_res() - create default underflow result for sgl/dbl prec. #
7058
# dst_dbl() - create rounded dbl precision result. #
7059
# dst_sgl() - create rounded sgl precision result. #
7060
# fetch_dreg() - fetch dynamic k-factor reg for packed. #
7061
# bindec() - convert FP binary number to packed number. #
7062
# _mem_write() - write data to memory. #
7063
# _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064
# _dmem_write_{byte,word,long}() - write data to memory. #
7065
# store_dreg_{b,w,l}() - store data to data register file. #
7066
# facc_out_{b,w,l,d,x}() - data access error occurred. #
7067
# #
7068
# INPUT *************************************************************** #
7069
# a0 = pointer to extended precision source operand #
7070
# d0 = round prec,mode #
7071
# #
7072
# OUTPUT ************************************************************** #
7073
# fp0 : intermediate underflow or overflow result if #
7074
# OVFL/UNFL occurred for a sgl or dbl operand #
7075
# #
7076
# ALGORITHM *********************************************************** #
7077
# This routine is accessed by many handlers that need to do an #
7078
# opclass three move of an operand out to memory. #
7079
# Decode an fmove out (opclass 3) instruction to determine if #
7080
# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
7081
# register or memory. The algorithm uses a standard "fmove" to create #
7082
# the rounded result. Also, since exceptions are disabled, this also #
7083
# create the correct OPERR default result if appropriate. #
7084
# For sgl or dbl precision, overflow or underflow can occur. If #
7085
# either occurs and is enabled, the EXOP. #
7086
# For extended precision, the stacked <ea> must be fixed along #
7087
# w/ the address index register as appropriate w/ _calc_ea_fout(). If #
7088
# the source is a denorm and if underflow is enabled, an EXOP must be #
7089
# created. #
7090
# For packed, the k-factor must be fetched from the instruction #
7091
# word or a data register. The <ea> must be fixed as w/ extended #
7092
# precision. Then, bindec() is called to create the appropriate #
7093
# packed result. #
7094
# If at any time an access error is flagged by one of the move- #
7095
# to-memory routines, then a special exit must be made so that the #
7096
# access error can be handled properly. #
7097
# #
7098
#########################################################################
7099
7100
global fout
7101
fout:
7102
bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103
mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104
jmp (tbl_fout.b,%pc,%a1) # jump to routine
7105
7106
swbeg &0x8
7107
tbl_fout:
7108
short fout_long - tbl_fout
7109
short fout_sgl - tbl_fout
7110
short fout_ext - tbl_fout
7111
short fout_pack - tbl_fout
7112
short fout_word - tbl_fout
7113
short fout_dbl - tbl_fout
7114
short fout_byte - tbl_fout
7115
short fout_pack - tbl_fout
7116
7117
#################################################################
7118
# fmove.b out ###################################################
7119
#################################################################
7120
7121
# Only "Unimplemented Data Type" exceptions enter here. The operand
7122
# is either a DENORM or a NORM.
7123
fout_byte:
7124
tst.b STAG(%a6) # is operand normalized?
7125
bne.b fout_byte_denorm # no
7126
7127
fmovm.x SRC(%a0),&0x80 # load value
7128
7129
fout_byte_norm:
7130
fmov.l %d0,%fpcr # insert rnd prec,mode
7131
7132
fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
7133
7134
fmov.l &0x0,%fpcr # clear FPCR
7135
fmov.l %fpsr,%d1 # fetch FPSR
7136
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7137
7138
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7139
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7140
beq.b fout_byte_dn # must save to integer regfile
7141
7142
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7143
bsr.l _dmem_write_byte # write byte
7144
7145
tst.l %d1 # did dstore fail?
7146
bne.l facc_out_b # yes
7147
7148
rts
7149
7150
fout_byte_dn:
7151
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7152
andi.w &0x7,%d1
7153
bsr.l store_dreg_b
7154
rts
7155
7156
fout_byte_denorm:
7157
mov.l SRC_EX(%a0),%d1
7158
andi.l &0x80000000,%d1 # keep DENORM sign
7159
ori.l &0x00800000,%d1 # make smallest sgl
7160
fmov.s %d1,%fp0
7161
bra.b fout_byte_norm
7162
7163
#################################################################
7164
# fmove.w out ###################################################
7165
#################################################################
7166
7167
# Only "Unimplemented Data Type" exceptions enter here. The operand
7168
# is either a DENORM or a NORM.
7169
fout_word:
7170
tst.b STAG(%a6) # is operand normalized?
7171
bne.b fout_word_denorm # no
7172
7173
fmovm.x SRC(%a0),&0x80 # load value
7174
7175
fout_word_norm:
7176
fmov.l %d0,%fpcr # insert rnd prec:mode
7177
7178
fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
7179
7180
fmov.l &0x0,%fpcr # clear FPCR
7181
fmov.l %fpsr,%d1 # fetch FPSR
7182
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7183
7184
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7185
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7186
beq.b fout_word_dn # must save to integer regfile
7187
7188
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7189
bsr.l _dmem_write_word # write word
7190
7191
tst.l %d1 # did dstore fail?
7192
bne.l facc_out_w # yes
7193
7194
rts
7195
7196
fout_word_dn:
7197
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7198
andi.w &0x7,%d1
7199
bsr.l store_dreg_w
7200
rts
7201
7202
fout_word_denorm:
7203
mov.l SRC_EX(%a0),%d1
7204
andi.l &0x80000000,%d1 # keep DENORM sign
7205
ori.l &0x00800000,%d1 # make smallest sgl
7206
fmov.s %d1,%fp0
7207
bra.b fout_word_norm
7208
7209
#################################################################
7210
# fmove.l out ###################################################
7211
#################################################################
7212
7213
# Only "Unimplemented Data Type" exceptions enter here. The operand
7214
# is either a DENORM or a NORM.
7215
fout_long:
7216
tst.b STAG(%a6) # is operand normalized?
7217
bne.b fout_long_denorm # no
7218
7219
fmovm.x SRC(%a0),&0x80 # load value
7220
7221
fout_long_norm:
7222
fmov.l %d0,%fpcr # insert rnd prec:mode
7223
7224
fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
7225
7226
fmov.l &0x0,%fpcr # clear FPCR
7227
fmov.l %fpsr,%d1 # fetch FPSR
7228
or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
7229
7230
fout_long_write:
7231
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7232
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7233
beq.b fout_long_dn # must save to integer regfile
7234
7235
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7236
bsr.l _dmem_write_long # write long
7237
7238
tst.l %d1 # did dstore fail?
7239
bne.l facc_out_l # yes
7240
7241
rts
7242
7243
fout_long_dn:
7244
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7245
andi.w &0x7,%d1
7246
bsr.l store_dreg_l
7247
rts
7248
7249
fout_long_denorm:
7250
mov.l SRC_EX(%a0),%d1
7251
andi.l &0x80000000,%d1 # keep DENORM sign
7252
ori.l &0x00800000,%d1 # make smallest sgl
7253
fmov.s %d1,%fp0
7254
bra.b fout_long_norm
7255
7256
#################################################################
7257
# fmove.x out ###################################################
7258
#################################################################
7259
7260
# Only "Unimplemented Data Type" exceptions enter here. The operand
7261
# is either a DENORM or a NORM.
7262
# The DENORM causes an Underflow exception.
7263
fout_ext:
7264
7265
# we copy the extended precision result to FP_SCR0 so that the reserved
7266
# 16-bit field gets zeroed. we do this since we promise not to disturb
7267
# what's at SRC(a0).
7268
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7269
clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
7270
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7271
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7272
7273
fmovm.x SRC(%a0),&0x80 # return result
7274
7275
bsr.l _calc_ea_fout # fix stacked <ea>
7276
7277
mov.l %a0,%a1 # pass: dst addr
7278
lea FP_SCR0(%a6),%a0 # pass: src addr
7279
mov.l &0xc,%d0 # pass: opsize is 12 bytes
7280
7281
# we must not yet write the extended precision data to the stack
7282
# in the pre-decrement case from supervisor mode or else we'll corrupt
7283
# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284
cmpi.b SPCOND_FLG(%a6),&mda7_flg
7285
beq.b fout_ext_a7
7286
7287
bsr.l _dmem_write # write ext prec number to memory
7288
7289
tst.l %d1 # did dstore fail?
7290
bne.w fout_ext_err # yes
7291
7292
tst.b STAG(%a6) # is operand normalized?
7293
bne.b fout_ext_denorm # no
7294
rts
7295
7296
# the number is a DENORM. must set the underflow exception bit
7297
fout_ext_denorm:
7298
bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299
7300
mov.b FPCR_ENABLE(%a6),%d0
7301
andi.b &0x0a,%d0 # is UNFL or INEX enabled?
7302
bne.b fout_ext_exc # yes
7303
rts
7304
7305
# we don't want to do the write if the exception occurred in supervisor mode
7306
# so _mem_write2() handles this for us.
7307
fout_ext_a7:
7308
bsr.l _mem_write2 # write ext prec number to memory
7309
7310
tst.l %d1 # did dstore fail?
7311
bne.w fout_ext_err # yes
7312
7313
tst.b STAG(%a6) # is operand normalized?
7314
bne.b fout_ext_denorm # no
7315
rts
7316
7317
fout_ext_exc:
7318
lea FP_SCR0(%a6),%a0
7319
bsr.l norm # normalize the mantissa
7320
neg.w %d0 # new exp = -(shft amt)
7321
andi.w &0x7fff,%d0
7322
andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7323
or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
7324
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7325
rts
7326
7327
fout_ext_err:
7328
mov.l EXC_A6(%a6),(%a6) # fix stacked a6
7329
bra.l facc_out_x
7330
7331
#########################################################################
7332
# fmove.s out ###########################################################
7333
#########################################################################
7334
fout_sgl:
7335
andi.b &0x30,%d0 # clear rnd prec
7336
ori.b &s_mode*0x10,%d0 # insert sgl prec
7337
mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7338
7339
#
7340
# operand is a normalized number. first, we check to see if the move out
7341
# would cause either an underflow or overflow. these cases are handled
7342
# separately. otherwise, set the FPCR to the proper rounding mode and
7343
# execute the move.
7344
#
7345
mov.w SRC_EX(%a0),%d0 # extract exponent
7346
andi.w &0x7fff,%d0 # strip sign
7347
7348
cmpi.w %d0,&SGL_HI # will operand overflow?
7349
bgt.w fout_sgl_ovfl # yes; go handle OVFL
7350
beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
7351
cmpi.w %d0,&SGL_LO # will operand underflow?
7352
blt.w fout_sgl_unfl # yes; go handle underflow
7353
7354
#
7355
# NORMs(in range) can be stored out by a simple "fmov.s"
7356
# Unnormalized inputs can come through this point.
7357
#
7358
fout_sgl_exg:
7359
fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7360
7361
fmov.l L_SCR3(%a6),%fpcr # set FPCR
7362
fmov.l &0x0,%fpsr # clear FPSR
7363
7364
fmov.s %fp0,%d0 # store does convert and round
7365
7366
fmov.l &0x0,%fpcr # clear FPCR
7367
fmov.l %fpsr,%d1 # save FPSR
7368
7369
or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
7370
7371
fout_sgl_exg_write:
7372
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7373
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7374
beq.b fout_sgl_exg_write_dn # must save to integer regfile
7375
7376
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7377
bsr.l _dmem_write_long # write long
7378
7379
tst.l %d1 # did dstore fail?
7380
bne.l facc_out_l # yes
7381
7382
rts
7383
7384
fout_sgl_exg_write_dn:
7385
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7386
andi.w &0x7,%d1
7387
bsr.l store_dreg_l
7388
rts
7389
7390
#
7391
# here, we know that the operand would UNFL if moved out to single prec,
7392
# so, denorm and round and then use generic store single routine to
7393
# write the value to memory.
7394
#
7395
fout_sgl_unfl:
7396
bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397
7398
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7399
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7400
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7401
mov.l %a0,-(%sp)
7402
7403
clr.l %d0 # pass: S.F. = 0
7404
7405
cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7406
bne.b fout_sgl_unfl_cont # let DENORMs fall through
7407
7408
lea FP_SCR0(%a6),%a0
7409
bsr.l norm # normalize the DENORM
7410
7411
fout_sgl_unfl_cont:
7412
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7413
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7414
bsr.l unf_res # calc default underflow result
7415
7416
lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7417
bsr.l dst_sgl # convert to single prec
7418
7419
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7420
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7421
beq.b fout_sgl_unfl_dn # must save to integer regfile
7422
7423
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7424
bsr.l _dmem_write_long # write long
7425
7426
tst.l %d1 # did dstore fail?
7427
bne.l facc_out_l # yes
7428
7429
bra.b fout_sgl_unfl_chkexc
7430
7431
fout_sgl_unfl_dn:
7432
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7433
andi.w &0x7,%d1
7434
bsr.l store_dreg_l
7435
7436
fout_sgl_unfl_chkexc:
7437
mov.b FPCR_ENABLE(%a6),%d1
7438
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7439
bne.w fout_sd_exc_unfl # yes
7440
addq.l &0x4,%sp
7441
rts
7442
7443
#
7444
# it's definitely an overflow so call ovf_res to get the correct answer
7445
#
7446
fout_sgl_ovfl:
7447
tst.b 3+SRC_HI(%a0) # is result inexact?
7448
bne.b fout_sgl_ovfl_inex2
7449
tst.l SRC_LO(%a0) # is result inexact?
7450
bne.b fout_sgl_ovfl_inex2
7451
ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452
bra.b fout_sgl_ovfl_cont
7453
fout_sgl_ovfl_inex2:
7454
ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455
7456
fout_sgl_ovfl_cont:
7457
mov.l %a0,-(%sp)
7458
7459
# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460
# overflow result. DON'T save the returned ccodes from ovf_res() since
7461
# fmove out doesn't alter them.
7462
tst.b SRC_EX(%a0) # is operand negative?
7463
smi %d1 # set if so
7464
mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
7465
bsr.l ovf_res # calc OVFL result
7466
fmovm.x (%a0),&0x80 # load default overflow result
7467
fmov.s %fp0,%d0 # store to single
7468
7469
mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
7470
andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
7471
beq.b fout_sgl_ovfl_dn # must save to integer regfile
7472
7473
mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
7474
bsr.l _dmem_write_long # write long
7475
7476
tst.l %d1 # did dstore fail?
7477
bne.l facc_out_l # yes
7478
7479
bra.b fout_sgl_ovfl_chkexc
7480
7481
fout_sgl_ovfl_dn:
7482
mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
7483
andi.w &0x7,%d1
7484
bsr.l store_dreg_l
7485
7486
fout_sgl_ovfl_chkexc:
7487
mov.b FPCR_ENABLE(%a6),%d1
7488
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7489
bne.w fout_sd_exc_ovfl # yes
7490
addq.l &0x4,%sp
7491
rts
7492
7493
#
7494
# move out MAY overflow:
7495
# (1) force the exp to 0x3fff
7496
# (2) do a move w/ appropriate rnd mode
7497
# (3) if exp still equals zero, then insert original exponent
7498
# for the correct result.
7499
# if exp now equals one, then it overflowed so call ovf_res.
7500
#
7501
fout_sgl_may_ovfl:
7502
mov.w SRC_EX(%a0),%d1 # fetch current sign
7503
andi.w &0x8000,%d1 # keep it,clear exp
7504
ori.w &0x3fff,%d1 # insert exp = 0
7505
mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7506
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508
7509
fmov.l L_SCR3(%a6),%fpcr # set FPCR
7510
7511
fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7512
fmov.l &0x0,%fpcr # clear FPCR
7513
7514
fabs.x %fp0 # need absolute value
7515
fcmp.b %fp0,&0x2 # did exponent increase?
7516
fblt.w fout_sgl_exg # no; go finish NORM
7517
bra.w fout_sgl_ovfl # yes; go handle overflow
7518
7519
################
7520
7521
fout_sd_exc_unfl:
7522
mov.l (%sp)+,%a0
7523
7524
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7525
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7526
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7527
7528
cmpi.b STAG(%a6),&DENORM # was src a DENORM?
7529
bne.b fout_sd_exc_cont # no
7530
7531
lea FP_SCR0(%a6),%a0
7532
bsr.l norm
7533
neg.l %d0
7534
andi.w &0x7fff,%d0
7535
bfins %d0,FP_SCR0_EX(%a6){&1:&15}
7536
bra.b fout_sd_exc_cont
7537
7538
fout_sd_exc:
7539
fout_sd_exc_ovfl:
7540
mov.l (%sp)+,%a0 # restore a0
7541
7542
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7543
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7544
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7545
7546
fout_sd_exc_cont:
7547
bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
7548
sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
7549
lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
7550
7551
mov.b 3+L_SCR3(%a6),%d1
7552
lsr.b &0x4,%d1
7553
andi.w &0x0c,%d1
7554
swap %d1
7555
mov.b 3+L_SCR3(%a6),%d1
7556
lsr.b &0x4,%d1
7557
andi.w &0x03,%d1
7558
clr.l %d0 # pass: zero g,r,s
7559
bsr.l _round # round the DENORM
7560
7561
tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
7562
beq.b fout_sd_exc_done # no
7563
bset &0x7,FP_SCR0_EX(%a6) # yes
7564
7565
fout_sd_exc_done:
7566
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
7567
rts
7568
7569
#################################################################
7570
# fmove.d out ###################################################
7571
#################################################################
7572
fout_dbl:
7573
andi.b &0x30,%d0 # clear rnd prec
7574
ori.b &d_mode*0x10,%d0 # insert dbl prec
7575
mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
7576
7577
#
7578
# operand is a normalized number. first, we check to see if the move out
7579
# would cause either an underflow or overflow. these cases are handled
7580
# separately. otherwise, set the FPCR to the proper rounding mode and
7581
# execute the move.
7582
#
7583
mov.w SRC_EX(%a0),%d0 # extract exponent
7584
andi.w &0x7fff,%d0 # strip sign
7585
7586
cmpi.w %d0,&DBL_HI # will operand overflow?
7587
bgt.w fout_dbl_ovfl # yes; go handle OVFL
7588
beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
7589
cmpi.w %d0,&DBL_LO # will operand underflow?
7590
blt.w fout_dbl_unfl # yes; go handle underflow
7591
7592
#
7593
# NORMs(in range) can be stored out by a simple "fmov.d"
7594
# Unnormalized inputs can come through this point.
7595
#
7596
fout_dbl_exg:
7597
fmovm.x SRC(%a0),&0x80 # fetch fop from stack
7598
7599
fmov.l L_SCR3(%a6),%fpcr # set FPCR
7600
fmov.l &0x0,%fpsr # clear FPSR
7601
7602
fmov.d %fp0,L_SCR1(%a6) # store does convert and round
7603
7604
fmov.l &0x0,%fpcr # clear FPCR
7605
fmov.l %fpsr,%d0 # save FPSR
7606
7607
or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
7608
7609
mov.l EXC_EA(%a6),%a1 # pass: dst addr
7610
lea L_SCR1(%a6),%a0 # pass: src addr
7611
movq.l &0x8,%d0 # pass: opsize is 8 bytes
7612
bsr.l _dmem_write # store dbl fop to memory
7613
7614
tst.l %d1 # did dstore fail?
7615
bne.l facc_out_d # yes
7616
7617
rts # no; so we're finished
7618
7619
#
7620
# here, we know that the operand would UNFL if moved out to double prec,
7621
# so, denorm and round and then use generic store double routine to
7622
# write the value to memory.
7623
#
7624
fout_dbl_unfl:
7625
bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626
7627
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
7628
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
7629
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
7630
mov.l %a0,-(%sp)
7631
7632
clr.l %d0 # pass: S.F. = 0
7633
7634
cmpi.b STAG(%a6),&DENORM # fetch src optype tag
7635
bne.b fout_dbl_unfl_cont # let DENORMs fall through
7636
7637
lea FP_SCR0(%a6),%a0
7638
bsr.l norm # normalize the DENORM
7639
7640
fout_dbl_unfl_cont:
7641
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
7642
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
7643
bsr.l unf_res # calc default underflow result
7644
7645
lea FP_SCR0(%a6),%a0 # pass: ptr to fop
7646
bsr.l dst_dbl # convert to single prec
7647
mov.l %d0,L_SCR1(%a6)
7648
mov.l %d1,L_SCR2(%a6)
7649
7650
mov.l EXC_EA(%a6),%a1 # pass: dst addr
7651
lea L_SCR1(%a6),%a0 # pass: src addr
7652
movq.l &0x8,%d0 # pass: opsize is 8 bytes
7653
bsr.l _dmem_write # store dbl fop to memory
7654
7655
tst.l %d1 # did dstore fail?
7656
bne.l facc_out_d # yes
7657
7658
mov.b FPCR_ENABLE(%a6),%d1
7659
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7660
bne.w fout_sd_exc_unfl # yes
7661
addq.l &0x4,%sp
7662
rts
7663
7664
#
7665
# it's definitely an overflow so call ovf_res to get the correct answer
7666
#
7667
fout_dbl_ovfl:
7668
mov.w 2+SRC_LO(%a0),%d0
7669
andi.w &0x7ff,%d0
7670
bne.b fout_dbl_ovfl_inex2
7671
7672
ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673
bra.b fout_dbl_ovfl_cont
7674
fout_dbl_ovfl_inex2:
7675
ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676
7677
fout_dbl_ovfl_cont:
7678
mov.l %a0,-(%sp)
7679
7680
# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681
# overflow result. DON'T save the returned ccodes from ovf_res() since
7682
# fmove out doesn't alter them.
7683
tst.b SRC_EX(%a0) # is operand negative?
7684
smi %d1 # set if so
7685
mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
7686
bsr.l ovf_res # calc OVFL result
7687
fmovm.x (%a0),&0x80 # load default overflow result
7688
fmov.d %fp0,L_SCR1(%a6) # store to double
7689
7690
mov.l EXC_EA(%a6),%a1 # pass: dst addr
7691
lea L_SCR1(%a6),%a0 # pass: src addr
7692
movq.l &0x8,%d0 # pass: opsize is 8 bytes
7693
bsr.l _dmem_write # store dbl fop to memory
7694
7695
tst.l %d1 # did dstore fail?
7696
bne.l facc_out_d # yes
7697
7698
mov.b FPCR_ENABLE(%a6),%d1
7699
andi.b &0x0a,%d1 # is UNFL or INEX enabled?
7700
bne.w fout_sd_exc_ovfl # yes
7701
addq.l &0x4,%sp
7702
rts
7703
7704
#
7705
# move out MAY overflow:
7706
# (1) force the exp to 0x3fff
7707
# (2) do a move w/ appropriate rnd mode
7708
# (3) if exp still equals zero, then insert original exponent
7709
# for the correct result.
7710
# if exp now equals one, then it overflowed so call ovf_res.
7711
#
7712
fout_dbl_may_ovfl:
7713
mov.w SRC_EX(%a0),%d1 # fetch current sign
7714
andi.w &0x8000,%d1 # keep it,clear exp
7715
ori.w &0x3fff,%d1 # insert exp = 0
7716
mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
7717
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719
7720
fmov.l L_SCR3(%a6),%fpcr # set FPCR
7721
7722
fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
7723
fmov.l &0x0,%fpcr # clear FPCR
7724
7725
fabs.x %fp0 # need absolute value
7726
fcmp.b %fp0,&0x2 # did exponent increase?
7727
fblt.w fout_dbl_exg # no; go finish NORM
7728
bra.w fout_dbl_ovfl # yes; go handle overflow
7729
7730
#########################################################################
7731
# XDEF **************************************************************** #
7732
# dst_dbl(): create double precision value from extended prec. #
7733
# #
7734
# XREF **************************************************************** #
7735
# None #
7736
# #
7737
# INPUT *************************************************************** #
7738
# a0 = pointer to source operand in extended precision #
7739
# #
7740
# OUTPUT ************************************************************** #
7741
# d0 = hi(double precision result) #
7742
# d1 = lo(double precision result) #
7743
# #
7744
# ALGORITHM *********************************************************** #
7745
# #
7746
# Changes extended precision to double precision. #
7747
# Note: no attempt is made to round the extended value to double. #
7748
# dbl_sign = ext_sign #
7749
# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
7750
# get rid of ext integer bit #
7751
# dbl_mant = ext_mant{62:12} #
7752
# #
7753
# --------------- --------------- --------------- #
7754
# extended -> |s| exp | |1| ms mant | | ls mant | #
7755
# --------------- --------------- --------------- #
7756
# 95 64 63 62 32 31 11 0 #
7757
# | | #
7758
# | | #
7759
# | | #
7760
# v v #
7761
# --------------- --------------- #
7762
# double -> |s|exp| mant | | mant | #
7763
# --------------- --------------- #
7764
# 63 51 32 31 0 #
7765
# #
7766
#########################################################################
7767
7768
dst_dbl:
7769
clr.l %d0 # clear d0
7770
mov.w FTEMP_EX(%a0),%d0 # get exponent
7771
subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7772
addi.w &DBL_BIAS,%d0 # add double precision bias
7773
tst.b FTEMP_HI(%a0) # is number a denorm?
7774
bmi.b dst_get_dupper # no
7775
subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
7776
dst_get_dupper:
7777
swap %d0 # d0 now in upper word
7778
lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
7779
tst.b FTEMP_EX(%a0) # test sign
7780
bpl.b dst_get_dman # if positive, go process mantissa
7781
bset &0x1f,%d0 # if negative, set sign
7782
dst_get_dman:
7783
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7784
bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
7785
or.l %d1,%d0 # put these bits in ms word of double
7786
mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
7787
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7788
mov.l &21,%d0 # load shift count
7789
lsl.l %d0,%d1 # put lower 11 bits in upper bits
7790
mov.l %d1,L_SCR2(%a6) # build lower lword in memory
7791
mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
7792
bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
7793
mov.l L_SCR2(%a6),%d1
7794
or.l %d0,%d1 # put them in double result
7795
mov.l L_SCR1(%a6),%d0
7796
rts
7797
7798
#########################################################################
7799
# XDEF **************************************************************** #
7800
# dst_sgl(): create single precision value from extended prec #
7801
# #
7802
# XREF **************************************************************** #
7803
# #
7804
# INPUT *************************************************************** #
7805
# a0 = pointer to source operand in extended precision #
7806
# #
7807
# OUTPUT ************************************************************** #
7808
# d0 = single precision result #
7809
# #
7810
# ALGORITHM *********************************************************** #
7811
# #
7812
# Changes extended precision to single precision. #
7813
# sgl_sign = ext_sign #
7814
# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
7815
# get rid of ext integer bit #
7816
# sgl_mant = ext_mant{62:12} #
7817
# #
7818
# --------------- --------------- --------------- #
7819
# extended -> |s| exp | |1| ms mant | | ls mant | #
7820
# --------------- --------------- --------------- #
7821
# 95 64 63 62 40 32 31 12 0 #
7822
# | | #
7823
# | | #
7824
# | | #
7825
# v v #
7826
# --------------- #
7827
# single -> |s|exp| mant | #
7828
# --------------- #
7829
# 31 22 0 #
7830
# #
7831
#########################################################################
7832
7833
dst_sgl:
7834
clr.l %d0
7835
mov.w FTEMP_EX(%a0),%d0 # get exponent
7836
subi.w &EXT_BIAS,%d0 # subtract extended precision bias
7837
addi.w &SGL_BIAS,%d0 # add single precision bias
7838
tst.b FTEMP_HI(%a0) # is number a denorm?
7839
bmi.b dst_get_supper # no
7840
subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
7841
dst_get_supper:
7842
swap %d0 # put exp in upper word of d0
7843
lsl.l &0x7,%d0 # shift it into single exp bits
7844
tst.b FTEMP_EX(%a0) # test sign
7845
bpl.b dst_get_sman # if positive, continue
7846
bset &0x1f,%d0 # if negative, put in sign first
7847
dst_get_sman:
7848
mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
7849
andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
7850
lsr.l &0x8,%d1 # and put them flush right
7851
or.l %d1,%d0 # put these bits in ms word of single
7852
rts
7853
7854
##############################################################################
7855
fout_pack:
7856
bsr.l _calc_ea_fout # fetch the <ea>
7857
mov.l %a0,-(%sp)
7858
7859
mov.b STAG(%a6),%d0 # fetch input type
7860
bne.w fout_pack_not_norm # input is not NORM
7861
7862
fout_pack_norm:
7863
btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
7864
beq.b fout_pack_s # static
7865
7866
fout_pack_d:
7867
mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
7868
lsr.b &0x4,%d1
7869
andi.w &0x7,%d1
7870
7871
bsr.l fetch_dreg # fetch Dn w/ k-factor
7872
7873
bra.b fout_pack_type
7874
fout_pack_s:
7875
mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
7876
7877
fout_pack_type:
7878
bfexts %d0{&25:&7},%d0 # extract k-factor
7879
mov.l %d0,-(%sp)
7880
7881
lea FP_SRC(%a6),%a0 # pass: ptr to input
7882
7883
# bindec is currently scrambling FP_SRC for denorm inputs.
7884
# we'll have to change this, but for now, tough luck!!!
7885
bsr.l bindec # convert xprec to packed
7886
7887
# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888
andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889
7890
mov.l (%sp)+,%d0
7891
7892
tst.b 3+FP_SCR0_EX(%a6)
7893
bne.b fout_pack_set
7894
tst.l FP_SCR0_HI(%a6)
7895
bne.b fout_pack_set
7896
tst.l FP_SCR0_LO(%a6)
7897
bne.b fout_pack_set
7898
7899
# add the extra condition that only if the k-factor was zero, too, should
7900
# we zero the exponent
7901
tst.l %d0
7902
bne.b fout_pack_set
7903
# "mantissa" is all zero which means that the answer is zero. but, the '040
7904
# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905
# if the mantissa is zero, I will zero the exponent, too.
7906
# the question now is whether the exponents sign bit is allowed to be non-zero
7907
# for a zero, also...
7908
andi.w &0xf000,FP_SCR0(%a6)
7909
7910
fout_pack_set:
7911
7912
lea FP_SCR0(%a6),%a0 # pass: src addr
7913
7914
fout_pack_write:
7915
mov.l (%sp)+,%a1 # pass: dst addr
7916
mov.l &0xc,%d0 # pass: opsize is 12 bytes
7917
7918
cmpi.b SPCOND_FLG(%a6),&mda7_flg
7919
beq.b fout_pack_a7
7920
7921
bsr.l _dmem_write # write ext prec number to memory
7922
7923
tst.l %d1 # did dstore fail?
7924
bne.w fout_ext_err # yes
7925
7926
rts
7927
7928
# we don't want to do the write if the exception occurred in supervisor mode
7929
# so _mem_write2() handles this for us.
7930
fout_pack_a7:
7931
bsr.l _mem_write2 # write ext prec number to memory
7932
7933
tst.l %d1 # did dstore fail?
7934
bne.w fout_ext_err # yes
7935
7936
rts
7937
7938
fout_pack_not_norm:
7939
cmpi.b %d0,&DENORM # is it a DENORM?
7940
beq.w fout_pack_norm # yes
7941
lea FP_SRC(%a6),%a0
7942
clr.w 2+FP_SRC_EX(%a6)
7943
cmpi.b %d0,&SNAN # is it an SNAN?
7944
beq.b fout_pack_snan # yes
7945
bra.b fout_pack_write # no
7946
7947
fout_pack_snan:
7948
ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949
bset &0x6,FP_SRC_HI(%a6) # set snan bit
7950
bra.b fout_pack_write
7951
7952
#########################################################################
7953
# XDEF **************************************************************** #
7954
# fmul(): emulates the fmul instruction #
7955
# fsmul(): emulates the fsmul instruction #
7956
# fdmul(): emulates the fdmul instruction #
7957
# #
7958
# XREF **************************************************************** #
7959
# scale_to_zero_src() - scale src exponent to zero #
7960
# scale_to_zero_dst() - scale dst exponent to zero #
7961
# unf_res() - return default underflow result #
7962
# ovf_res() - return default overflow result #
7963
# res_qnan() - return QNAN result #
7964
# res_snan() - return SNAN result #
7965
# #
7966
# INPUT *************************************************************** #
7967
# a0 = pointer to extended precision source operand #
7968
# a1 = pointer to extended precision destination operand #
7969
# d0 rnd prec,mode #
7970
# #
7971
# OUTPUT ************************************************************** #
7972
# fp0 = result #
7973
# fp1 = EXOP (if exception occurred) #
7974
# #
7975
# ALGORITHM *********************************************************** #
7976
# Handle NANs, infinities, and zeroes as special cases. Divide #
7977
# norms/denorms into ext/sgl/dbl precision. #
7978
# For norms/denorms, scale the exponents such that a multiply #
7979
# instruction won't cause an exception. Use the regular fmul to #
7980
# compute a result. Check if the regular operands would have taken #
7981
# an exception. If so, return the default overflow/underflow result #
7982
# and return the EXOP if exceptions are enabled. Else, scale the #
7983
# result operand to the proper exponent. #
7984
# #
7985
#########################################################################
7986
7987
align 0x10
7988
tbl_fmul_ovfl:
7989
long 0x3fff - 0x7ffe # ext_max
7990
long 0x3fff - 0x407e # sgl_max
7991
long 0x3fff - 0x43fe # dbl_max
7992
tbl_fmul_unfl:
7993
long 0x3fff + 0x0001 # ext_unfl
7994
long 0x3fff - 0x3f80 # sgl_unfl
7995
long 0x3fff - 0x3c00 # dbl_unfl
7996
7997
global fsmul
7998
fsmul:
7999
andi.b &0x30,%d0 # clear rnd prec
8000
ori.b &s_mode*0x10,%d0 # insert sgl prec
8001
bra.b fmul
8002
8003
global fdmul
8004
fdmul:
8005
andi.b &0x30,%d0
8006
ori.b &d_mode*0x10,%d0 # insert dbl prec
8007
8008
global fmul
8009
fmul:
8010
mov.l %d0,L_SCR3(%a6) # store rnd info
8011
8012
clr.w %d1
8013
mov.b DTAG(%a6),%d1
8014
lsl.b &0x3,%d1
8015
or.b STAG(%a6),%d1 # combine src tags
8016
bne.w fmul_not_norm # optimize on non-norm input
8017
8018
fmul_norm:
8019
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8020
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8021
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8022
8023
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8024
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8025
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8026
8027
bsr.l scale_to_zero_src # scale src exponent
8028
mov.l %d0,-(%sp) # save scale factor 1
8029
8030
bsr.l scale_to_zero_dst # scale dst exponent
8031
8032
add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
8033
8034
mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8035
lsr.b &0x6,%d1 # shift to lo bits
8036
mov.l (%sp)+,%d0 # load S.F.
8037
cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038
beq.w fmul_may_ovfl # result may rnd to overflow
8039
blt.w fmul_ovfl # result will overflow
8040
8041
cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042
beq.w fmul_may_unfl # result may rnd to no unfl
8043
bgt.w fmul_unfl # result will underflow
8044
8045
#
8046
# NORMAL:
8047
# - the result of the multiply operation will neither overflow nor underflow.
8048
# - do the multiply to the proper precision and rounding mode.
8049
# - scale the result exponent using the scale factor. if both operands were
8050
# normalized then we really don't need to go through this scaling. but for now,
8051
# this will do.
8052
#
8053
fmul_normal:
8054
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8055
8056
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8057
fmov.l &0x0,%fpsr # clear FPSR
8058
8059
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8060
8061
fmov.l %fpsr,%d1 # save status
8062
fmov.l &0x0,%fpcr # clear FPCR
8063
8064
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8065
8066
fmul_normal_exit:
8067
fmovm.x &0x80,FP_SCR0(%a6) # store out result
8068
mov.l %d2,-(%sp) # save d2
8069
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8070
mov.l %d1,%d2 # make a copy
8071
andi.l &0x7fff,%d1 # strip sign
8072
andi.w &0x8000,%d2 # keep old sign
8073
sub.l %d0,%d1 # add scale factor
8074
or.w %d2,%d1 # concat old sign,new exp
8075
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8076
mov.l (%sp)+,%d2 # restore d2
8077
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8078
rts
8079
8080
#
8081
# OVERFLOW:
8082
# - the result of the multiply operation is an overflow.
8083
# - do the multiply to the proper precision and rounding mode in order to
8084
# set the inexact bits.
8085
# - calculate the default result and return it in fp0.
8086
# - if overflow or inexact is enabled, we need a multiply result rounded to
8087
# extended precision. if the original operation was extended, then we have this
8088
# result. if the original operation was single or double, we have to do another
8089
# multiply using extended precision and the correct rounding mode. the result
8090
# of this operation then has its exponent scaled by -0x6000 to create the
8091
# exceptional operand.
8092
#
8093
fmul_ovfl:
8094
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8095
8096
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8097
fmov.l &0x0,%fpsr # clear FPSR
8098
8099
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8100
8101
fmov.l %fpsr,%d1 # save status
8102
fmov.l &0x0,%fpcr # clear FPCR
8103
8104
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8105
8106
# save setting this until now because this is where fmul_may_ovfl may jump in
8107
fmul_ovfl_tst:
8108
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109
8110
mov.b FPCR_ENABLE(%a6),%d1
8111
andi.b &0x13,%d1 # is OVFL or INEX enabled?
8112
bne.b fmul_ovfl_ena # yes
8113
8114
# calculate the default result
8115
fmul_ovfl_dis:
8116
btst &neg_bit,FPSR_CC(%a6) # is result negative?
8117
sne %d1 # set sign param accordingly
8118
mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
8119
bsr.l ovf_res # calculate default result
8120
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8121
fmovm.x (%a0),&0x80 # return default result in fp0
8122
rts
8123
8124
#
8125
# OVFL is enabled; Create EXOP:
8126
# - if precision is extended, then we have the EXOP. simply bias the exponent
8127
# with an extra -0x6000. if the precision is single or double, we need to
8128
# calculate a result rounded to extended precision.
8129
#
8130
fmul_ovfl_ena:
8131
mov.l L_SCR3(%a6),%d1
8132
andi.b &0xc0,%d1 # test the rnd prec
8133
bne.b fmul_ovfl_ena_sd # it's sgl or dbl
8134
8135
fmul_ovfl_ena_cont:
8136
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8137
8138
mov.l %d2,-(%sp) # save d2
8139
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8140
mov.w %d1,%d2 # make a copy
8141
andi.l &0x7fff,%d1 # strip sign
8142
sub.l %d0,%d1 # add scale factor
8143
subi.l &0x6000,%d1 # subtract bias
8144
andi.w &0x7fff,%d1 # clear sign bit
8145
andi.w &0x8000,%d2 # keep old sign
8146
or.w %d2,%d1 # concat old sign,new exp
8147
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8148
mov.l (%sp)+,%d2 # restore d2
8149
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8150
bra.b fmul_ovfl_dis
8151
8152
fmul_ovfl_ena_sd:
8153
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8154
8155
mov.l L_SCR3(%a6),%d1
8156
andi.b &0x30,%d1 # keep rnd mode only
8157
fmov.l %d1,%fpcr # set FPCR
8158
8159
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8160
8161
fmov.l &0x0,%fpcr # clear FPCR
8162
bra.b fmul_ovfl_ena_cont
8163
8164
#
8165
# may OVERFLOW:
8166
# - the result of the multiply operation MAY overflow.
8167
# - do the multiply to the proper precision and rounding mode in order to
8168
# set the inexact bits.
8169
# - calculate the default result and return it in fp0.
8170
#
8171
fmul_may_ovfl:
8172
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8173
8174
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8175
fmov.l &0x0,%fpsr # clear FPSR
8176
8177
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8178
8179
fmov.l %fpsr,%d1 # save status
8180
fmov.l &0x0,%fpcr # clear FPCR
8181
8182
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8183
8184
fabs.x %fp0,%fp1 # make a copy of result
8185
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8186
fbge.w fmul_ovfl_tst # yes; overflow has occurred
8187
8188
# no, it didn't overflow; we have correct result
8189
bra.w fmul_normal_exit
8190
8191
#
8192
# UNDERFLOW:
8193
# - the result of the multiply operation is an underflow.
8194
# - do the multiply to the proper precision and rounding mode in order to
8195
# set the inexact bits.
8196
# - calculate the default result and return it in fp0.
8197
# - if overflow or inexact is enabled, we need a multiply result rounded to
8198
# extended precision. if the original operation was extended, then we have this
8199
# result. if the original operation was single or double, we have to do another
8200
# multiply using extended precision and the correct rounding mode. the result
8201
# of this operation then has its exponent scaled by -0x6000 to create the
8202
# exceptional operand.
8203
#
8204
fmul_unfl:
8205
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206
8207
# for fun, let's use only extended precision, round to zero. then, let
8208
# the unf_res() routine figure out all the rest.
8209
# will we get the correct answer.
8210
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8211
8212
fmov.l &rz_mode*0x10,%fpcr # set FPCR
8213
fmov.l &0x0,%fpsr # clear FPSR
8214
8215
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8216
8217
fmov.l %fpsr,%d1 # save status
8218
fmov.l &0x0,%fpcr # clear FPCR
8219
8220
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8221
8222
mov.b FPCR_ENABLE(%a6),%d1
8223
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8224
bne.b fmul_unfl_ena # yes
8225
8226
fmul_unfl_dis:
8227
fmovm.x &0x80,FP_SCR0(%a6) # store out result
8228
8229
lea FP_SCR0(%a6),%a0 # pass: result addr
8230
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8231
bsr.l unf_res # calculate default result
8232
or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
8233
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8234
rts
8235
8236
#
8237
# UNFL is enabled.
8238
#
8239
fmul_unfl_ena:
8240
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
8241
8242
mov.l L_SCR3(%a6),%d1
8243
andi.b &0xc0,%d1 # is precision extended?
8244
bne.b fmul_unfl_ena_sd # no, sgl or dbl
8245
8246
# if the rnd mode is anything but RZ, then we have to re-do the above
8247
# multiplication because we used RZ for all.
8248
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8249
8250
fmul_unfl_ena_cont:
8251
fmov.l &0x0,%fpsr # clear FPSR
8252
8253
fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8254
8255
fmov.l &0x0,%fpcr # clear FPCR
8256
8257
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
8258
mov.l %d2,-(%sp) # save d2
8259
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8260
mov.l %d1,%d2 # make a copy
8261
andi.l &0x7fff,%d1 # strip sign
8262
andi.w &0x8000,%d2 # keep old sign
8263
sub.l %d0,%d1 # add scale factor
8264
addi.l &0x6000,%d1 # add bias
8265
andi.w &0x7fff,%d1
8266
or.w %d2,%d1 # concat old sign,new exp
8267
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8268
mov.l (%sp)+,%d2 # restore d2
8269
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8270
bra.w fmul_unfl_dis
8271
8272
fmul_unfl_ena_sd:
8273
mov.l L_SCR3(%a6),%d1
8274
andi.b &0x30,%d1 # use only rnd mode
8275
fmov.l %d1,%fpcr # set FPCR
8276
8277
bra.b fmul_unfl_ena_cont
8278
8279
# MAY UNDERFLOW:
8280
# -use the correct rounding mode and precision. this code favors operations
8281
# that do not underflow.
8282
fmul_may_unfl:
8283
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8284
8285
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8286
fmov.l &0x0,%fpsr # clear FPSR
8287
8288
fmul.x FP_SCR0(%a6),%fp0 # execute multiply
8289
8290
fmov.l %fpsr,%d1 # save status
8291
fmov.l &0x0,%fpcr # clear FPCR
8292
8293
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8294
8295
fabs.x %fp0,%fp1 # make a copy of result
8296
fcmp.b %fp1,&0x2 # is |result| > 2.b?
8297
fbgt.w fmul_normal_exit # no; no underflow occurred
8298
fblt.w fmul_unfl # yes; underflow occurred
8299
8300
#
8301
# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302
# we don't know if the result was an underflow that rounded up to a 2 or
8303
# a normalized number that rounded down to a 2. so, redo the entire operation
8304
# using RZ as the rounding mode to see what the pre-rounded result is.
8305
# this case should be relatively rare.
8306
#
8307
fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
8308
8309
mov.l L_SCR3(%a6),%d1
8310
andi.b &0xc0,%d1 # keep rnd prec
8311
ori.b &rz_mode*0x10,%d1 # insert RZ
8312
8313
fmov.l %d1,%fpcr # set FPCR
8314
fmov.l &0x0,%fpsr # clear FPSR
8315
8316
fmul.x FP_SCR0(%a6),%fp1 # execute multiply
8317
8318
fmov.l &0x0,%fpcr # clear FPCR
8319
fabs.x %fp1 # make absolute value
8320
fcmp.b %fp1,&0x2 # is |result| < 2.b?
8321
fbge.w fmul_normal_exit # no; no underflow occurred
8322
bra.w fmul_unfl # yes, underflow occurred
8323
8324
################################################################################
8325
8326
#
8327
# Multiply: inputs are not both normalized; what are they?
8328
#
8329
fmul_not_norm:
8330
mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331
jmp (tbl_fmul_op.b,%pc,%d1.w)
8332
8333
swbeg &48
8334
tbl_fmul_op:
8335
short fmul_norm - tbl_fmul_op # NORM x NORM
8336
short fmul_zero - tbl_fmul_op # NORM x ZERO
8337
short fmul_inf_src - tbl_fmul_op # NORM x INF
8338
short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8339
short fmul_norm - tbl_fmul_op # NORM x DENORM
8340
short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8341
short tbl_fmul_op - tbl_fmul_op #
8342
short tbl_fmul_op - tbl_fmul_op #
8343
8344
short fmul_zero - tbl_fmul_op # ZERO x NORM
8345
short fmul_zero - tbl_fmul_op # ZERO x ZERO
8346
short fmul_res_operr - tbl_fmul_op # ZERO x INF
8347
short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
8348
short fmul_zero - tbl_fmul_op # ZERO x DENORM
8349
short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
8350
short tbl_fmul_op - tbl_fmul_op #
8351
short tbl_fmul_op - tbl_fmul_op #
8352
8353
short fmul_inf_dst - tbl_fmul_op # INF x NORM
8354
short fmul_res_operr - tbl_fmul_op # INF x ZERO
8355
short fmul_inf_dst - tbl_fmul_op # INF x INF
8356
short fmul_res_qnan - tbl_fmul_op # INF x QNAN
8357
short fmul_inf_dst - tbl_fmul_op # INF x DENORM
8358
short fmul_res_snan - tbl_fmul_op # INF x SNAN
8359
short tbl_fmul_op - tbl_fmul_op #
8360
short tbl_fmul_op - tbl_fmul_op #
8361
8362
short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
8363
short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
8364
short fmul_res_qnan - tbl_fmul_op # QNAN x INF
8365
short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
8366
short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
8367
short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
8368
short tbl_fmul_op - tbl_fmul_op #
8369
short tbl_fmul_op - tbl_fmul_op #
8370
8371
short fmul_norm - tbl_fmul_op # NORM x NORM
8372
short fmul_zero - tbl_fmul_op # NORM x ZERO
8373
short fmul_inf_src - tbl_fmul_op # NORM x INF
8374
short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
8375
short fmul_norm - tbl_fmul_op # NORM x DENORM
8376
short fmul_res_snan - tbl_fmul_op # NORM x SNAN
8377
short tbl_fmul_op - tbl_fmul_op #
8378
short tbl_fmul_op - tbl_fmul_op #
8379
8380
short fmul_res_snan - tbl_fmul_op # SNAN x NORM
8381
short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
8382
short fmul_res_snan - tbl_fmul_op # SNAN x INF
8383
short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
8384
short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
8385
short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
8386
short tbl_fmul_op - tbl_fmul_op #
8387
short tbl_fmul_op - tbl_fmul_op #
8388
8389
fmul_res_operr:
8390
bra.l res_operr
8391
fmul_res_snan:
8392
bra.l res_snan
8393
fmul_res_qnan:
8394
bra.l res_qnan
8395
8396
#
8397
# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398
#
8399
global fmul_zero # global for fsglmul
8400
fmul_zero:
8401
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8402
mov.b DST_EX(%a1),%d1
8403
eor.b %d0,%d1
8404
bpl.b fmul_zero_p # result ZERO is pos.
8405
fmul_zero_n:
8406
fmov.s &0x80000000,%fp0 # load -ZERO
8407
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408
rts
8409
fmul_zero_p:
8410
fmov.s &0x00000000,%fp0 # load +ZERO
8411
mov.b &z_bmask,FPSR_CC(%a6) # set Z
8412
rts
8413
8414
#
8415
# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416
#
8417
# Note: The j-bit for an infinity is a don't-care. However, to be
8418
# strictly compatible w/ the 68881/882, we make sure to return an
8419
# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420
# INFs take priority.
8421
#
8422
global fmul_inf_dst # global for fsglmul
8423
fmul_inf_dst:
8424
fmovm.x DST(%a1),&0x80 # return INF result in fp0
8425
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8426
mov.b DST_EX(%a1),%d1
8427
eor.b %d0,%d1
8428
bpl.b fmul_inf_dst_p # result INF is pos.
8429
fmul_inf_dst_n:
8430
fabs.x %fp0 # clear result sign
8431
fneg.x %fp0 # set result sign
8432
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433
rts
8434
fmul_inf_dst_p:
8435
fabs.x %fp0 # clear result sign
8436
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
8437
rts
8438
8439
global fmul_inf_src # global for fsglmul
8440
fmul_inf_src:
8441
fmovm.x SRC(%a0),&0x80 # return INF result in fp0
8442
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
8443
mov.b DST_EX(%a1),%d1
8444
eor.b %d0,%d1
8445
bpl.b fmul_inf_dst_p # result INF is pos.
8446
bra.b fmul_inf_dst_n
8447
8448
#########################################################################
8449
# XDEF **************************************************************** #
8450
# fin(): emulates the fmove instruction #
8451
# fsin(): emulates the fsmove instruction #
8452
# fdin(): emulates the fdmove instruction #
8453
# #
8454
# XREF **************************************************************** #
8455
# norm() - normalize mantissa for EXOP on denorm #
8456
# scale_to_zero_src() - scale src exponent to zero #
8457
# ovf_res() - return default overflow result #
8458
# unf_res() - return default underflow result #
8459
# res_qnan_1op() - return QNAN result #
8460
# res_snan_1op() - return SNAN result #
8461
# #
8462
# INPUT *************************************************************** #
8463
# a0 = pointer to extended precision source operand #
8464
# d0 = round prec/mode #
8465
# #
8466
# OUTPUT ************************************************************** #
8467
# fp0 = result #
8468
# fp1 = EXOP (if exception occurred) #
8469
# #
8470
# ALGORITHM *********************************************************** #
8471
# Handle NANs, infinities, and zeroes as special cases. Divide #
8472
# norms into extended, single, and double precision. #
8473
# Norms can be emulated w/ a regular fmove instruction. For #
8474
# sgl/dbl, must scale exponent and perform an "fmove". Check to see #
8475
# if the result would have overflowed/underflowed. If so, use unf_res() #
8476
# or ovf_res() to return the default result. Also return EXOP if #
8477
# exception is enabled. If no exception, return the default result. #
8478
# Unnorms don't pass through here. #
8479
# #
8480
#########################################################################
8481
8482
global fsin
8483
fsin:
8484
andi.b &0x30,%d0 # clear rnd prec
8485
ori.b &s_mode*0x10,%d0 # insert sgl precision
8486
bra.b fin
8487
8488
global fdin
8489
fdin:
8490
andi.b &0x30,%d0 # clear rnd prec
8491
ori.b &d_mode*0x10,%d0 # insert dbl precision
8492
8493
global fin
8494
fin:
8495
mov.l %d0,L_SCR3(%a6) # store rnd info
8496
8497
mov.b STAG(%a6),%d1 # fetch src optype tag
8498
bne.w fin_not_norm # optimize on non-norm input
8499
8500
#
8501
# FP MOVE IN: NORMs and DENORMs ONLY!
8502
#
8503
fin_norm:
8504
andi.b &0xc0,%d0 # is precision extended?
8505
bne.w fin_not_ext # no, so go handle dbl or sgl
8506
8507
#
8508
# precision selected is extended. so...we cannot get an underflow
8509
# or overflow because of rounding to the correct precision. so...
8510
# skip the scaling and unscaling...
8511
#
8512
tst.b SRC_EX(%a0) # is the operand negative?
8513
bpl.b fin_norm_done # no
8514
bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8515
fin_norm_done:
8516
fmovm.x SRC(%a0),&0x80 # return result in fp0
8517
rts
8518
8519
#
8520
# for an extended precision DENORM, the UNFL exception bit is set
8521
# the accrued bit is NOT set in this instance(no inexactness!)
8522
#
8523
fin_denorm:
8524
andi.b &0xc0,%d0 # is precision extended?
8525
bne.w fin_not_ext # no, so go handle dbl or sgl
8526
8527
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528
tst.b SRC_EX(%a0) # is the operand negative?
8529
bpl.b fin_denorm_done # no
8530
bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
8531
fin_denorm_done:
8532
fmovm.x SRC(%a0),&0x80 # return result in fp0
8533
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534
bne.b fin_denorm_unfl_ena # yes
8535
rts
8536
8537
#
8538
# the input is an extended DENORM and underflow is enabled in the FPCR.
8539
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540
# exponent and insert back into the operand.
8541
#
8542
fin_denorm_unfl_ena:
8543
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8544
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8545
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8546
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
8547
bsr.l norm # normalize result
8548
neg.w %d0 # new exponent = -(shft val)
8549
addi.w &0x6000,%d0 # add new bias to exponent
8550
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
8551
andi.w &0x8000,%d1 # keep old sign
8552
andi.w &0x7fff,%d0 # clear sign position
8553
or.w %d1,%d0 # concat new exo,old sign
8554
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
8555
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8556
rts
8557
8558
#
8559
# operand is to be rounded to single or double precision
8560
#
8561
fin_not_ext:
8562
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
8563
bne.b fin_dbl
8564
8565
#
8566
# operand is to be rounded to single precision
8567
#
8568
fin_sgl:
8569
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8570
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8571
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8572
bsr.l scale_to_zero_src # calculate scale factor
8573
8574
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
8575
bge.w fin_sd_unfl # yes; go handle underflow
8576
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
8577
beq.w fin_sd_may_ovfl # maybe; go check
8578
blt.w fin_sd_ovfl # yes; go handle overflow
8579
8580
#
8581
# operand will NOT overflow or underflow when moved into the fp reg file
8582
#
8583
fin_sd_normal:
8584
fmov.l &0x0,%fpsr # clear FPSR
8585
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8586
8587
fmov.x FP_SCR0(%a6),%fp0 # perform move
8588
8589
fmov.l %fpsr,%d1 # save FPSR
8590
fmov.l &0x0,%fpcr # clear FPCR
8591
8592
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8593
8594
fin_sd_normal_exit:
8595
mov.l %d2,-(%sp) # save d2
8596
fmovm.x &0x80,FP_SCR0(%a6) # store out result
8597
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8598
mov.w %d1,%d2 # make a copy
8599
andi.l &0x7fff,%d1 # strip sign
8600
sub.l %d0,%d1 # add scale factor
8601
andi.w &0x8000,%d2 # keep old sign
8602
or.w %d1,%d2 # concat old sign,new exponent
8603
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
8604
mov.l (%sp)+,%d2 # restore d2
8605
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8606
rts
8607
8608
#
8609
# operand is to be rounded to double precision
8610
#
8611
fin_dbl:
8612
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8613
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8614
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8615
bsr.l scale_to_zero_src # calculate scale factor
8616
8617
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
8618
bge.w fin_sd_unfl # yes; go handle underflow
8619
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
8620
beq.w fin_sd_may_ovfl # maybe; go check
8621
blt.w fin_sd_ovfl # yes; go handle overflow
8622
bra.w fin_sd_normal # no; ho handle normalized op
8623
8624
#
8625
# operand WILL underflow when moved in to the fp register file
8626
#
8627
fin_sd_unfl:
8628
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629
8630
tst.b FP_SCR0_EX(%a6) # is operand negative?
8631
bpl.b fin_sd_unfl_tst
8632
bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
8633
8634
# if underflow or inexact is enabled, then go calculate the EXOP first.
8635
fin_sd_unfl_tst:
8636
mov.b FPCR_ENABLE(%a6),%d1
8637
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8638
bne.b fin_sd_unfl_ena # yes
8639
8640
fin_sd_unfl_dis:
8641
lea FP_SCR0(%a6),%a0 # pass: result addr
8642
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
8643
bsr.l unf_res # calculate default result
8644
or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
8645
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
8646
rts
8647
8648
#
8649
# operand will underflow AND underflow or inexact is enabled.
8650
# Therefore, we must return the result rounded to extended precision.
8651
#
8652
fin_sd_unfl_ena:
8653
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
8656
8657
mov.l %d2,-(%sp) # save d2
8658
mov.w %d1,%d2 # make a copy
8659
andi.l &0x7fff,%d1 # strip sign
8660
sub.l %d0,%d1 # subtract scale factor
8661
andi.w &0x8000,%d2 # extract old sign
8662
addi.l &0x6000,%d1 # add new bias
8663
andi.w &0x7fff,%d1
8664
or.w %d1,%d2 # concat old sign,new exp
8665
mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
8666
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
8667
mov.l (%sp)+,%d2 # restore d2
8668
bra.b fin_sd_unfl_dis
8669
8670
#
8671
# operand WILL overflow.
8672
#
8673
fin_sd_ovfl:
8674
fmov.l &0x0,%fpsr # clear FPSR
8675
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8676
8677
fmov.x FP_SCR0(%a6),%fp0 # perform move
8678
8679
fmov.l &0x0,%fpcr # clear FPCR
8680
fmov.l %fpsr,%d1 # save FPSR
8681
8682
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8683
8684
fin_sd_ovfl_tst:
8685
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686
8687
mov.b FPCR_ENABLE(%a6),%d1
8688
andi.b &0x13,%d1 # is OVFL or INEX enabled?
8689
bne.b fin_sd_ovfl_ena # yes
8690
8691
#
8692
# OVFL is not enabled; therefore, we must create the default result by
8693
# calling ovf_res().
8694
#
8695
fin_sd_ovfl_dis:
8696
btst &neg_bit,FPSR_CC(%a6) # is result negative?
8697
sne %d1 # set sign param accordingly
8698
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
8699
bsr.l ovf_res # calculate default result
8700
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
8701
fmovm.x (%a0),&0x80 # return default result in fp0
8702
rts
8703
8704
#
8705
# OVFL is enabled.
8706
# the INEX2 bit has already been updated by the round to the correct precision.
8707
# now, round to extended(and don't alter the FPSR).
8708
#
8709
fin_sd_ovfl_ena:
8710
mov.l %d2,-(%sp) # save d2
8711
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8712
mov.l %d1,%d2 # make a copy
8713
andi.l &0x7fff,%d1 # strip sign
8714
andi.w &0x8000,%d2 # keep old sign
8715
sub.l %d0,%d1 # add scale factor
8716
sub.l &0x6000,%d1 # subtract bias
8717
andi.w &0x7fff,%d1
8718
or.w %d2,%d1
8719
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8720
mov.l (%sp)+,%d2 # restore d2
8721
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8722
bra.b fin_sd_ovfl_dis
8723
8724
#
8725
# the move in MAY overflow. so...
8726
#
8727
fin_sd_may_ovfl:
8728
fmov.l &0x0,%fpsr # clear FPSR
8729
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8730
8731
fmov.x FP_SCR0(%a6),%fp0 # perform the move
8732
8733
fmov.l %fpsr,%d1 # save status
8734
fmov.l &0x0,%fpcr # clear FPCR
8735
8736
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8737
8738
fabs.x %fp0,%fp1 # make a copy of result
8739
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
8740
fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
8741
8742
# no, it didn't overflow; we have correct result
8743
bra.w fin_sd_normal_exit
8744
8745
##########################################################################
8746
8747
#
8748
# operand is not a NORM: check its optype and branch accordingly
8749
#
8750
fin_not_norm:
8751
cmpi.b %d1,&DENORM # weed out DENORM
8752
beq.w fin_denorm
8753
cmpi.b %d1,&SNAN # weed out SNANs
8754
beq.l res_snan_1op
8755
cmpi.b %d1,&QNAN # weed out QNANs
8756
beq.l res_qnan_1op
8757
8758
#
8759
# do the fmove in; at this point, only possible ops are ZERO and INF.
8760
# use fmov to determine ccodes.
8761
# prec:mode should be zero at this point but it won't affect answer anyways.
8762
#
8763
fmov.x SRC(%a0),%fp0 # do fmove in
8764
fmov.l %fpsr,%d0 # no exceptions possible
8765
rol.l &0x8,%d0 # put ccodes in lo byte
8766
mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
8767
rts
8768
8769
#########################################################################
8770
# XDEF **************************************************************** #
8771
# fdiv(): emulates the fdiv instruction #
8772
# fsdiv(): emulates the fsdiv instruction #
8773
# fddiv(): emulates the fddiv instruction #
8774
# #
8775
# XREF **************************************************************** #
8776
# scale_to_zero_src() - scale src exponent to zero #
8777
# scale_to_zero_dst() - scale dst exponent to zero #
8778
# unf_res() - return default underflow result #
8779
# ovf_res() - return default overflow result #
8780
# res_qnan() - return QNAN result #
8781
# res_snan() - return SNAN result #
8782
# #
8783
# INPUT *************************************************************** #
8784
# a0 = pointer to extended precision source operand #
8785
# a1 = pointer to extended precision destination operand #
8786
# d0 rnd prec,mode #
8787
# #
8788
# OUTPUT ************************************************************** #
8789
# fp0 = result #
8790
# fp1 = EXOP (if exception occurred) #
8791
# #
8792
# ALGORITHM *********************************************************** #
8793
# Handle NANs, infinities, and zeroes as special cases. Divide #
8794
# norms/denorms into ext/sgl/dbl precision. #
8795
# For norms/denorms, scale the exponents such that a divide #
8796
# instruction won't cause an exception. Use the regular fdiv to #
8797
# compute a result. Check if the regular operands would have taken #
8798
# an exception. If so, return the default overflow/underflow result #
8799
# and return the EXOP if exceptions are enabled. Else, scale the #
8800
# result operand to the proper exponent. #
8801
# #
8802
#########################################################################
8803
8804
align 0x10
8805
tbl_fdiv_unfl:
8806
long 0x3fff - 0x0000 # ext_unfl
8807
long 0x3fff - 0x3f81 # sgl_unfl
8808
long 0x3fff - 0x3c01 # dbl_unfl
8809
8810
tbl_fdiv_ovfl:
8811
long 0x3fff - 0x7ffe # ext overflow exponent
8812
long 0x3fff - 0x407e # sgl overflow exponent
8813
long 0x3fff - 0x43fe # dbl overflow exponent
8814
8815
global fsdiv
8816
fsdiv:
8817
andi.b &0x30,%d0 # clear rnd prec
8818
ori.b &s_mode*0x10,%d0 # insert sgl prec
8819
bra.b fdiv
8820
8821
global fddiv
8822
fddiv:
8823
andi.b &0x30,%d0 # clear rnd prec
8824
ori.b &d_mode*0x10,%d0 # insert dbl prec
8825
8826
global fdiv
8827
fdiv:
8828
mov.l %d0,L_SCR3(%a6) # store rnd info
8829
8830
clr.w %d1
8831
mov.b DTAG(%a6),%d1
8832
lsl.b &0x3,%d1
8833
or.b STAG(%a6),%d1 # combine src tags
8834
8835
bne.w fdiv_not_norm # optimize on non-norm input
8836
8837
#
8838
# DIVIDE: NORMs and DENORMs ONLY!
8839
#
8840
fdiv_norm:
8841
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
8842
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
8843
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
8844
8845
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
8846
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
8847
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
8848
8849
bsr.l scale_to_zero_src # scale src exponent
8850
mov.l %d0,-(%sp) # save scale factor 1
8851
8852
bsr.l scale_to_zero_dst # scale dst exponent
8853
8854
neg.l (%sp) # SCALE FACTOR = scale1 - scale2
8855
add.l %d0,(%sp)
8856
8857
mov.w 2+L_SCR3(%a6),%d1 # fetch precision
8858
lsr.b &0x6,%d1 # shift to lo bits
8859
mov.l (%sp)+,%d0 # load S.F.
8860
cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861
ble.w fdiv_may_ovfl # result will overflow
8862
8863
cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864
beq.w fdiv_may_unfl # maybe
8865
bgt.w fdiv_unfl # yes; go handle underflow
8866
8867
fdiv_normal:
8868
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8869
8870
fmov.l L_SCR3(%a6),%fpcr # save FPCR
8871
fmov.l &0x0,%fpsr # clear FPSR
8872
8873
fdiv.x FP_SCR0(%a6),%fp0 # perform divide
8874
8875
fmov.l %fpsr,%d1 # save FPSR
8876
fmov.l &0x0,%fpcr # clear FPCR
8877
8878
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8879
8880
fdiv_normal_exit:
8881
fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
8882
mov.l %d2,-(%sp) # store d2
8883
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
8884
mov.l %d1,%d2 # make a copy
8885
andi.l &0x7fff,%d1 # strip sign
8886
andi.w &0x8000,%d2 # keep old sign
8887
sub.l %d0,%d1 # add scale factor
8888
or.w %d2,%d1 # concat old sign,new exp
8889
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8890
mov.l (%sp)+,%d2 # restore d2
8891
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
8892
rts
8893
8894
tbl_fdiv_ovfl2:
8895
long 0x7fff
8896
long 0x407f
8897
long 0x43ff
8898
8899
fdiv_no_ovfl:
8900
mov.l (%sp)+,%d0 # restore scale factor
8901
bra.b fdiv_normal_exit
8902
8903
fdiv_may_ovfl:
8904
mov.l %d0,-(%sp) # save scale factor
8905
8906
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8907
8908
fmov.l L_SCR3(%a6),%fpcr # set FPCR
8909
fmov.l &0x0,%fpsr # set FPSR
8910
8911
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8912
8913
fmov.l %fpsr,%d0
8914
fmov.l &0x0,%fpcr
8915
8916
or.l %d0,USER_FPSR(%a6) # save INEX,N
8917
8918
fmovm.x &0x01,-(%sp) # save result to stack
8919
mov.w (%sp),%d0 # fetch new exponent
8920
add.l &0xc,%sp # clear result from stack
8921
andi.l &0x7fff,%d0 # strip sign
8922
sub.l (%sp),%d0 # add scale factor
8923
cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924
blt.b fdiv_no_ovfl
8925
mov.l (%sp)+,%d0
8926
8927
fdiv_ovfl_tst:
8928
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929
8930
mov.b FPCR_ENABLE(%a6),%d1
8931
andi.b &0x13,%d1 # is OVFL or INEX enabled?
8932
bne.b fdiv_ovfl_ena # yes
8933
8934
fdiv_ovfl_dis:
8935
btst &neg_bit,FPSR_CC(%a6) # is result negative?
8936
sne %d1 # set sign param accordingly
8937
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
8938
bsr.l ovf_res # calculate default result
8939
or.b %d0,FPSR_CC(%a6) # set INF if applicable
8940
fmovm.x (%a0),&0x80 # return default result in fp0
8941
rts
8942
8943
fdiv_ovfl_ena:
8944
mov.l L_SCR3(%a6),%d1
8945
andi.b &0xc0,%d1 # is precision extended?
8946
bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
8947
8948
fdiv_ovfl_ena_cont:
8949
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
8950
8951
mov.l %d2,-(%sp) # save d2
8952
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
8953
mov.w %d1,%d2 # make a copy
8954
andi.l &0x7fff,%d1 # strip sign
8955
sub.l %d0,%d1 # add scale factor
8956
subi.l &0x6000,%d1 # subtract bias
8957
andi.w &0x7fff,%d1 # clear sign bit
8958
andi.w &0x8000,%d2 # keep old sign
8959
or.w %d2,%d1 # concat old sign,new exp
8960
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
8961
mov.l (%sp)+,%d2 # restore d2
8962
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
8963
bra.b fdiv_ovfl_dis
8964
8965
fdiv_ovfl_ena_sd:
8966
fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
8967
8968
mov.l L_SCR3(%a6),%d1
8969
andi.b &0x30,%d1 # keep rnd mode
8970
fmov.l %d1,%fpcr # set FPCR
8971
8972
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8973
8974
fmov.l &0x0,%fpcr # clear FPCR
8975
bra.b fdiv_ovfl_ena_cont
8976
8977
fdiv_unfl:
8978
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979
8980
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
8981
8982
fmov.l &rz_mode*0x10,%fpcr # set FPCR
8983
fmov.l &0x0,%fpsr # clear FPSR
8984
8985
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
8986
8987
fmov.l %fpsr,%d1 # save status
8988
fmov.l &0x0,%fpcr # clear FPCR
8989
8990
or.l %d1,USER_FPSR(%a6) # save INEX2,N
8991
8992
mov.b FPCR_ENABLE(%a6),%d1
8993
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
8994
bne.b fdiv_unfl_ena # yes
8995
8996
fdiv_unfl_dis:
8997
fmovm.x &0x80,FP_SCR0(%a6) # store out result
8998
8999
lea FP_SCR0(%a6),%a0 # pass: result addr
9000
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9001
bsr.l unf_res # calculate default result
9002
or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
9003
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9004
rts
9005
9006
#
9007
# UNFL is enabled.
9008
#
9009
fdiv_unfl_ena:
9010
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
9011
9012
mov.l L_SCR3(%a6),%d1
9013
andi.b &0xc0,%d1 # is precision extended?
9014
bne.b fdiv_unfl_ena_sd # no, sgl or dbl
9015
9016
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9017
9018
fdiv_unfl_ena_cont:
9019
fmov.l &0x0,%fpsr # clear FPSR
9020
9021
fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9022
9023
fmov.l &0x0,%fpcr # clear FPCR
9024
9025
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
9026
mov.l %d2,-(%sp) # save d2
9027
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9028
mov.l %d1,%d2 # make a copy
9029
andi.l &0x7fff,%d1 # strip sign
9030
andi.w &0x8000,%d2 # keep old sign
9031
sub.l %d0,%d1 # add scale factoer
9032
addi.l &0x6000,%d1 # add bias
9033
andi.w &0x7fff,%d1
9034
or.w %d2,%d1 # concat old sign,new exp
9035
mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
9036
mov.l (%sp)+,%d2 # restore d2
9037
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9038
bra.w fdiv_unfl_dis
9039
9040
fdiv_unfl_ena_sd:
9041
mov.l L_SCR3(%a6),%d1
9042
andi.b &0x30,%d1 # use only rnd mode
9043
fmov.l %d1,%fpcr # set FPCR
9044
9045
bra.b fdiv_unfl_ena_cont
9046
9047
#
9048
# the divide operation MAY underflow:
9049
#
9050
fdiv_may_unfl:
9051
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
9052
9053
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9054
fmov.l &0x0,%fpsr # clear FPSR
9055
9056
fdiv.x FP_SCR0(%a6),%fp0 # execute divide
9057
9058
fmov.l %fpsr,%d1 # save status
9059
fmov.l &0x0,%fpcr # clear FPCR
9060
9061
or.l %d1,USER_FPSR(%a6) # save INEX2,N
9062
9063
fabs.x %fp0,%fp1 # make a copy of result
9064
fcmp.b %fp1,&0x1 # is |result| > 1.b?
9065
fbgt.w fdiv_normal_exit # no; no underflow occurred
9066
fblt.w fdiv_unfl # yes; underflow occurred
9067
9068
#
9069
# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070
# we don't know if the result was an underflow that rounded up to a 1
9071
# or a normalized number that rounded down to a 1. so, redo the entire
9072
# operation using RZ as the rounding mode to see what the pre-rounded
9073
# result is. this case should be relatively rare.
9074
#
9075
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
9076
9077
mov.l L_SCR3(%a6),%d1
9078
andi.b &0xc0,%d1 # keep rnd prec
9079
ori.b &rz_mode*0x10,%d1 # insert RZ
9080
9081
fmov.l %d1,%fpcr # set FPCR
9082
fmov.l &0x0,%fpsr # clear FPSR
9083
9084
fdiv.x FP_SCR0(%a6),%fp1 # execute divide
9085
9086
fmov.l &0x0,%fpcr # clear FPCR
9087
fabs.x %fp1 # make absolute value
9088
fcmp.b %fp1,&0x1 # is |result| < 1.b?
9089
fbge.w fdiv_normal_exit # no; no underflow occurred
9090
bra.w fdiv_unfl # yes; underflow occurred
9091
9092
############################################################################
9093
9094
#
9095
# Divide: inputs are not both normalized; what are they?
9096
#
9097
fdiv_not_norm:
9098
mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099
jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
9100
9101
swbeg &48
9102
tbl_fdiv_op:
9103
short fdiv_norm - tbl_fdiv_op # NORM / NORM
9104
short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
9105
short fdiv_zero_load - tbl_fdiv_op # NORM / INF
9106
short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
9107
short fdiv_norm - tbl_fdiv_op # NORM / DENORM
9108
short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
9109
short tbl_fdiv_op - tbl_fdiv_op #
9110
short tbl_fdiv_op - tbl_fdiv_op #
9111
9112
short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
9113
short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
9114
short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
9115
short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
9116
short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
9117
short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
9118
short tbl_fdiv_op - tbl_fdiv_op #
9119
short tbl_fdiv_op - tbl_fdiv_op #
9120
9121
short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
9122
short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
9123
short fdiv_res_operr - tbl_fdiv_op # INF / INF
9124
short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
9125
short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
9126
short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
9127
short tbl_fdiv_op - tbl_fdiv_op #
9128
short tbl_fdiv_op - tbl_fdiv_op #
9129
9130
short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
9131
short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
9132
short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
9133
short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
9134
short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
9135
short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
9136
short tbl_fdiv_op - tbl_fdiv_op #
9137
short tbl_fdiv_op - tbl_fdiv_op #
9138
9139
short fdiv_norm - tbl_fdiv_op # DENORM / NORM
9140
short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
9141
short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
9142
short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
9143
short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
9144
short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
9145
short tbl_fdiv_op - tbl_fdiv_op #
9146
short tbl_fdiv_op - tbl_fdiv_op #
9147
9148
short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
9149
short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
9150
short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
9151
short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
9152
short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
9153
short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
9154
short tbl_fdiv_op - tbl_fdiv_op #
9155
short tbl_fdiv_op - tbl_fdiv_op #
9156
9157
fdiv_res_qnan:
9158
bra.l res_qnan
9159
fdiv_res_snan:
9160
bra.l res_snan
9161
fdiv_res_operr:
9162
bra.l res_operr
9163
9164
global fdiv_zero_load # global for fsgldiv
9165
fdiv_zero_load:
9166
mov.b SRC_EX(%a0),%d0 # result sign is exclusive
9167
mov.b DST_EX(%a1),%d1 # or of input signs.
9168
eor.b %d0,%d1
9169
bpl.b fdiv_zero_load_p # result is positive
9170
fmov.s &0x80000000,%fp0 # load a -ZERO
9171
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9172
rts
9173
fdiv_zero_load_p:
9174
fmov.s &0x00000000,%fp0 # load a +ZERO
9175
mov.b &z_bmask,FPSR_CC(%a6) # set Z
9176
rts
9177
9178
#
9179
# The destination was In Range and the source was a ZERO. The result,
9180
# Therefore, is an INF w/ the proper sign.
9181
# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182
#
9183
global fdiv_inf_load # global for fsgldiv
9184
fdiv_inf_load:
9185
ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186
mov.b SRC_EX(%a0),%d0 # load both signs
9187
mov.b DST_EX(%a1),%d1
9188
eor.b %d0,%d1
9189
bpl.b fdiv_inf_load_p # result is positive
9190
fmov.s &0xff800000,%fp0 # make result -INF
9191
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192
rts
9193
fdiv_inf_load_p:
9194
fmov.s &0x7f800000,%fp0 # make result +INF
9195
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9196
rts
9197
9198
#
9199
# The destination was an INF w/ an In Range or ZERO source, the result is
9200
# an INF w/ the proper sign.
9201
# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202
# dst INF is set, then then j-bit of the result INF is also set).
9203
#
9204
global fdiv_inf_dst # global for fsgldiv
9205
fdiv_inf_dst:
9206
mov.b DST_EX(%a1),%d0 # load both signs
9207
mov.b SRC_EX(%a0),%d1
9208
eor.b %d0,%d1
9209
bpl.b fdiv_inf_dst_p # result is positive
9210
9211
fmovm.x DST(%a1),&0x80 # return result in fp0
9212
fabs.x %fp0 # clear sign bit
9213
fneg.x %fp0 # set sign bit
9214
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215
rts
9216
9217
fdiv_inf_dst_p:
9218
fmovm.x DST(%a1),&0x80 # return result in fp0
9219
fabs.x %fp0 # return positive INF
9220
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
9221
rts
9222
9223
#########################################################################
9224
# XDEF **************************************************************** #
9225
# fneg(): emulates the fneg instruction #
9226
# fsneg(): emulates the fsneg instruction #
9227
# fdneg(): emulates the fdneg instruction #
9228
# #
9229
# XREF **************************************************************** #
9230
# norm() - normalize a denorm to provide EXOP #
9231
# scale_to_zero_src() - scale sgl/dbl source exponent #
9232
# ovf_res() - return default overflow result #
9233
# unf_res() - return default underflow result #
9234
# res_qnan_1op() - return QNAN result #
9235
# res_snan_1op() - return SNAN result #
9236
# #
9237
# INPUT *************************************************************** #
9238
# a0 = pointer to extended precision source operand #
9239
# d0 = rnd prec,mode #
9240
# #
9241
# OUTPUT ************************************************************** #
9242
# fp0 = result #
9243
# fp1 = EXOP (if exception occurred) #
9244
# #
9245
# ALGORITHM *********************************************************** #
9246
# Handle NANs, zeroes, and infinities as special cases. Separate #
9247
# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
9248
# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
9249
# and an actual fneg performed to see if overflow/underflow would have #
9250
# occurred. If so, return default underflow/overflow result. Else, #
9251
# scale the result exponent and return result. FPSR gets set based on #
9252
# the result value. #
9253
# #
9254
#########################################################################
9255
9256
global fsneg
9257
fsneg:
9258
andi.b &0x30,%d0 # clear rnd prec
9259
ori.b &s_mode*0x10,%d0 # insert sgl precision
9260
bra.b fneg
9261
9262
global fdneg
9263
fdneg:
9264
andi.b &0x30,%d0 # clear rnd prec
9265
ori.b &d_mode*0x10,%d0 # insert dbl prec
9266
9267
global fneg
9268
fneg:
9269
mov.l %d0,L_SCR3(%a6) # store rnd info
9270
mov.b STAG(%a6),%d1
9271
bne.w fneg_not_norm # optimize on non-norm input
9272
9273
#
9274
# NEGATE SIGN : norms and denorms ONLY!
9275
#
9276
fneg_norm:
9277
andi.b &0xc0,%d0 # is precision extended?
9278
bne.w fneg_not_ext # no; go handle sgl or dbl
9279
9280
#
9281
# precision selected is extended. so...we can not get an underflow
9282
# or overflow because of rounding to the correct precision. so...
9283
# skip the scaling and unscaling...
9284
#
9285
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9286
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9287
mov.w SRC_EX(%a0),%d0
9288
eori.w &0x8000,%d0 # negate sign
9289
bpl.b fneg_norm_load # sign is positive
9290
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9291
fneg_norm_load:
9292
mov.w %d0,FP_SCR0_EX(%a6)
9293
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9294
rts
9295
9296
#
9297
# for an extended precision DENORM, the UNFL exception bit is set
9298
# the accrued bit is NOT set in this instance(no inexactness!)
9299
#
9300
fneg_denorm:
9301
andi.b &0xc0,%d0 # is precision extended?
9302
bne.b fneg_not_ext # no; go handle sgl or dbl
9303
9304
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305
9306
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9307
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9308
mov.w SRC_EX(%a0),%d0
9309
eori.w &0x8000,%d0 # negate sign
9310
bpl.b fneg_denorm_done # no
9311
mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9312
fneg_denorm_done:
9313
mov.w %d0,FP_SCR0_EX(%a6)
9314
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9315
9316
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317
bne.b fneg_ext_unfl_ena # yes
9318
rts
9319
9320
#
9321
# the input is an extended DENORM and underflow is enabled in the FPCR.
9322
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323
# exponent and insert back into the operand.
9324
#
9325
fneg_ext_unfl_ena:
9326
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9327
bsr.l norm # normalize result
9328
neg.w %d0 # new exponent = -(shft val)
9329
addi.w &0x6000,%d0 # add new bias to exponent
9330
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9331
andi.w &0x8000,%d1 # keep old sign
9332
andi.w &0x7fff,%d0 # clear sign position
9333
or.w %d1,%d0 # concat old sign, new exponent
9334
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9335
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9336
rts
9337
9338
#
9339
# operand is either single or double
9340
#
9341
fneg_not_ext:
9342
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9343
bne.b fneg_dbl
9344
9345
#
9346
# operand is to be rounded to single precision
9347
#
9348
fneg_sgl:
9349
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9350
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9351
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9352
bsr.l scale_to_zero_src # calculate scale factor
9353
9354
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9355
bge.w fneg_sd_unfl # yes; go handle underflow
9356
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9357
beq.w fneg_sd_may_ovfl # maybe; go check
9358
blt.w fneg_sd_ovfl # yes; go handle overflow
9359
9360
#
9361
# operand will NOT overflow or underflow when moved in to the fp reg file
9362
#
9363
fneg_sd_normal:
9364
fmov.l &0x0,%fpsr # clear FPSR
9365
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9366
9367
fneg.x FP_SCR0(%a6),%fp0 # perform negation
9368
9369
fmov.l %fpsr,%d1 # save FPSR
9370
fmov.l &0x0,%fpcr # clear FPCR
9371
9372
or.l %d1,USER_FPSR(%a6) # save INEX2,N
9373
9374
fneg_sd_normal_exit:
9375
mov.l %d2,-(%sp) # save d2
9376
fmovm.x &0x80,FP_SCR0(%a6) # store out result
9377
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9378
mov.w %d1,%d2 # make a copy
9379
andi.l &0x7fff,%d1 # strip sign
9380
sub.l %d0,%d1 # add scale factor
9381
andi.w &0x8000,%d2 # keep old sign
9382
or.w %d1,%d2 # concat old sign,new exp
9383
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
9384
mov.l (%sp)+,%d2 # restore d2
9385
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9386
rts
9387
9388
#
9389
# operand is to be rounded to double precision
9390
#
9391
fneg_dbl:
9392
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9393
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9394
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9395
bsr.l scale_to_zero_src # calculate scale factor
9396
9397
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
9398
bge.b fneg_sd_unfl # yes; go handle underflow
9399
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
9400
beq.w fneg_sd_may_ovfl # maybe; go check
9401
blt.w fneg_sd_ovfl # yes; go handle overflow
9402
bra.w fneg_sd_normal # no; ho handle normalized op
9403
9404
#
9405
# operand WILL underflow when moved in to the fp register file
9406
#
9407
fneg_sd_unfl:
9408
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409
9410
eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
9411
bpl.b fneg_sd_unfl_tst
9412
bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
9413
9414
# if underflow or inexact is enabled, go calculate EXOP first.
9415
fneg_sd_unfl_tst:
9416
mov.b FPCR_ENABLE(%a6),%d1
9417
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
9418
bne.b fneg_sd_unfl_ena # yes
9419
9420
fneg_sd_unfl_dis:
9421
lea FP_SCR0(%a6),%a0 # pass: result addr
9422
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
9423
bsr.l unf_res # calculate default result
9424
or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
9425
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9426
rts
9427
9428
#
9429
# operand will underflow AND underflow is enabled.
9430
# Therefore, we must return the result rounded to extended precision.
9431
#
9432
fneg_sd_unfl_ena:
9433
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
9436
9437
mov.l %d2,-(%sp) # save d2
9438
mov.l %d1,%d2 # make a copy
9439
andi.l &0x7fff,%d1 # strip sign
9440
andi.w &0x8000,%d2 # keep old sign
9441
sub.l %d0,%d1 # subtract scale factor
9442
addi.l &0x6000,%d1 # add new bias
9443
andi.w &0x7fff,%d1
9444
or.w %d2,%d1 # concat new sign,new exp
9445
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
9446
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
9447
mov.l (%sp)+,%d2 # restore d2
9448
bra.b fneg_sd_unfl_dis
9449
9450
#
9451
# operand WILL overflow.
9452
#
9453
fneg_sd_ovfl:
9454
fmov.l &0x0,%fpsr # clear FPSR
9455
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9456
9457
fneg.x FP_SCR0(%a6),%fp0 # perform negation
9458
9459
fmov.l &0x0,%fpcr # clear FPCR
9460
fmov.l %fpsr,%d1 # save FPSR
9461
9462
or.l %d1,USER_FPSR(%a6) # save INEX2,N
9463
9464
fneg_sd_ovfl_tst:
9465
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466
9467
mov.b FPCR_ENABLE(%a6),%d1
9468
andi.b &0x13,%d1 # is OVFL or INEX enabled?
9469
bne.b fneg_sd_ovfl_ena # yes
9470
9471
#
9472
# OVFL is not enabled; therefore, we must create the default result by
9473
# calling ovf_res().
9474
#
9475
fneg_sd_ovfl_dis:
9476
btst &neg_bit,FPSR_CC(%a6) # is result negative?
9477
sne %d1 # set sign param accordingly
9478
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
9479
bsr.l ovf_res # calculate default result
9480
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
9481
fmovm.x (%a0),&0x80 # return default result in fp0
9482
rts
9483
9484
#
9485
# OVFL is enabled.
9486
# the INEX2 bit has already been updated by the round to the correct precision.
9487
# now, round to extended(and don't alter the FPSR).
9488
#
9489
fneg_sd_ovfl_ena:
9490
mov.l %d2,-(%sp) # save d2
9491
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
9492
mov.l %d1,%d2 # make a copy
9493
andi.l &0x7fff,%d1 # strip sign
9494
andi.w &0x8000,%d2 # keep old sign
9495
sub.l %d0,%d1 # add scale factor
9496
subi.l &0x6000,%d1 # subtract bias
9497
andi.w &0x7fff,%d1
9498
or.w %d2,%d1 # concat sign,exp
9499
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
9500
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9501
mov.l (%sp)+,%d2 # restore d2
9502
bra.b fneg_sd_ovfl_dis
9503
9504
#
9505
# the move in MAY underflow. so...
9506
#
9507
fneg_sd_may_ovfl:
9508
fmov.l &0x0,%fpsr # clear FPSR
9509
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9510
9511
fneg.x FP_SCR0(%a6),%fp0 # perform negation
9512
9513
fmov.l %fpsr,%d1 # save status
9514
fmov.l &0x0,%fpcr # clear FPCR
9515
9516
or.l %d1,USER_FPSR(%a6) # save INEX2,N
9517
9518
fabs.x %fp0,%fp1 # make a copy of result
9519
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
9520
fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
9521
9522
# no, it didn't overflow; we have correct result
9523
bra.w fneg_sd_normal_exit
9524
9525
##########################################################################
9526
9527
#
9528
# input is not normalized; what is it?
9529
#
9530
fneg_not_norm:
9531
cmpi.b %d1,&DENORM # weed out DENORM
9532
beq.w fneg_denorm
9533
cmpi.b %d1,&SNAN # weed out SNAN
9534
beq.l res_snan_1op
9535
cmpi.b %d1,&QNAN # weed out QNAN
9536
beq.l res_qnan_1op
9537
9538
#
9539
# do the fneg; at this point, only possible ops are ZERO and INF.
9540
# use fneg to determine ccodes.
9541
# prec:mode should be zero at this point but it won't affect answer anyways.
9542
#
9543
fneg.x SRC_EX(%a0),%fp0 # do fneg
9544
fmov.l %fpsr,%d0
9545
rol.l &0x8,%d0 # put ccodes in lo byte
9546
mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
9547
rts
9548
9549
#########################################################################
9550
# XDEF **************************************************************** #
9551
# ftst(): emulates the ftest instruction #
9552
# #
9553
# XREF **************************************************************** #
9554
# res{s,q}nan_1op() - set NAN result for monadic instruction #
9555
# #
9556
# INPUT *************************************************************** #
9557
# a0 = pointer to extended precision source operand #
9558
# #
9559
# OUTPUT ************************************************************** #
9560
# none #
9561
# #
9562
# ALGORITHM *********************************************************** #
9563
# Check the source operand tag (STAG) and set the FPCR according #
9564
# to the operand type and sign. #
9565
# #
9566
#########################################################################
9567
9568
global ftst
9569
ftst:
9570
mov.b STAG(%a6),%d1
9571
bne.b ftst_not_norm # optimize on non-norm input
9572
9573
#
9574
# Norm:
9575
#
9576
ftst_norm:
9577
tst.b SRC_EX(%a0) # is operand negative?
9578
bmi.b ftst_norm_m # yes
9579
rts
9580
ftst_norm_m:
9581
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9582
rts
9583
9584
#
9585
# input is not normalized; what is it?
9586
#
9587
ftst_not_norm:
9588
cmpi.b %d1,&ZERO # weed out ZERO
9589
beq.b ftst_zero
9590
cmpi.b %d1,&INF # weed out INF
9591
beq.b ftst_inf
9592
cmpi.b %d1,&SNAN # weed out SNAN
9593
beq.l res_snan_1op
9594
cmpi.b %d1,&QNAN # weed out QNAN
9595
beq.l res_qnan_1op
9596
9597
#
9598
# Denorm:
9599
#
9600
ftst_denorm:
9601
tst.b SRC_EX(%a0) # is operand negative?
9602
bmi.b ftst_denorm_m # yes
9603
rts
9604
ftst_denorm_m:
9605
mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9606
rts
9607
9608
#
9609
# Infinity:
9610
#
9611
ftst_inf:
9612
tst.b SRC_EX(%a0) # is operand negative?
9613
bmi.b ftst_inf_m # yes
9614
ftst_inf_p:
9615
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9616
rts
9617
ftst_inf_m:
9618
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619
rts
9620
9621
#
9622
# Zero:
9623
#
9624
ftst_zero:
9625
tst.b SRC_EX(%a0) # is operand negative?
9626
bmi.b ftst_zero_m # yes
9627
ftst_zero_p:
9628
mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9629
rts
9630
ftst_zero_m:
9631
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9632
rts
9633
9634
#########################################################################
9635
# XDEF **************************************************************** #
9636
# fint(): emulates the fint instruction #
9637
# #
9638
# XREF **************************************************************** #
9639
# res_{s,q}nan_1op() - set NAN result for monadic operation #
9640
# #
9641
# INPUT *************************************************************** #
9642
# a0 = pointer to extended precision source operand #
9643
# d0 = round precision/mode #
9644
# #
9645
# OUTPUT ************************************************************** #
9646
# fp0 = result #
9647
# #
9648
# ALGORITHM *********************************************************** #
9649
# Separate according to operand type. Unnorms don't pass through #
9650
# here. For norms, load the rounding mode/prec, execute a "fint", then #
9651
# store the resulting FPSR bits. #
9652
# For denorms, force the j-bit to a one and do the same as for #
9653
# norms. Denorms are so low that the answer will either be a zero or a #
9654
# one. #
9655
# For zeroes/infs/NANs, return the same while setting the FPSR #
9656
# as appropriate. #
9657
# #
9658
#########################################################################
9659
9660
global fint
9661
fint:
9662
mov.b STAG(%a6),%d1
9663
bne.b fint_not_norm # optimize on non-norm input
9664
9665
#
9666
# Norm:
9667
#
9668
fint_norm:
9669
andi.b &0x30,%d0 # set prec = ext
9670
9671
fmov.l %d0,%fpcr # set FPCR
9672
fmov.l &0x0,%fpsr # clear FPSR
9673
9674
fint.x SRC(%a0),%fp0 # execute fint
9675
9676
fmov.l &0x0,%fpcr # clear FPCR
9677
fmov.l %fpsr,%d0 # save FPSR
9678
or.l %d0,USER_FPSR(%a6) # set exception bits
9679
9680
rts
9681
9682
#
9683
# input is not normalized; what is it?
9684
#
9685
fint_not_norm:
9686
cmpi.b %d1,&ZERO # weed out ZERO
9687
beq.b fint_zero
9688
cmpi.b %d1,&INF # weed out INF
9689
beq.b fint_inf
9690
cmpi.b %d1,&DENORM # weed out DENORM
9691
beq.b fint_denorm
9692
cmpi.b %d1,&SNAN # weed out SNAN
9693
beq.l res_snan_1op
9694
bra.l res_qnan_1op # weed out QNAN
9695
9696
#
9697
# Denorm:
9698
#
9699
# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700
# also, the INEX2 and AINEX exception bits will be set.
9701
# so, we could either set these manually or force the DENORM
9702
# to a very small NORM and ship it to the NORM routine.
9703
# I do the latter.
9704
#
9705
fint_denorm:
9706
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707
mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9708
lea FP_SCR0(%a6),%a0
9709
bra.b fint_norm
9710
9711
#
9712
# Zero:
9713
#
9714
fint_zero:
9715
tst.b SRC_EX(%a0) # is ZERO negative?
9716
bmi.b fint_zero_m # yes
9717
fint_zero_p:
9718
fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9719
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9720
rts
9721
fint_zero_m:
9722
fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9723
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724
rts
9725
9726
#
9727
# Infinity:
9728
#
9729
fint_inf:
9730
fmovm.x SRC(%a0),&0x80 # return result in fp0
9731
tst.b SRC_EX(%a0) # is INF negative?
9732
bmi.b fint_inf_m # yes
9733
fint_inf_p:
9734
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9735
rts
9736
fint_inf_m:
9737
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738
rts
9739
9740
#########################################################################
9741
# XDEF **************************************************************** #
9742
# fintrz(): emulates the fintrz instruction #
9743
# #
9744
# XREF **************************************************************** #
9745
# res_{s,q}nan_1op() - set NAN result for monadic operation #
9746
# #
9747
# INPUT *************************************************************** #
9748
# a0 = pointer to extended precision source operand #
9749
# d0 = round precision/mode #
9750
# #
9751
# OUTPUT ************************************************************** #
9752
# fp0 = result #
9753
# #
9754
# ALGORITHM *********************************************************** #
9755
# Separate according to operand type. Unnorms don't pass through #
9756
# here. For norms, load the rounding mode/prec, execute a "fintrz", #
9757
# then store the resulting FPSR bits. #
9758
# For denorms, force the j-bit to a one and do the same as for #
9759
# norms. Denorms are so low that the answer will either be a zero or a #
9760
# one. #
9761
# For zeroes/infs/NANs, return the same while setting the FPSR #
9762
# as appropriate. #
9763
# #
9764
#########################################################################
9765
9766
global fintrz
9767
fintrz:
9768
mov.b STAG(%a6),%d1
9769
bne.b fintrz_not_norm # optimize on non-norm input
9770
9771
#
9772
# Norm:
9773
#
9774
fintrz_norm:
9775
fmov.l &0x0,%fpsr # clear FPSR
9776
9777
fintrz.x SRC(%a0),%fp0 # execute fintrz
9778
9779
fmov.l %fpsr,%d0 # save FPSR
9780
or.l %d0,USER_FPSR(%a6) # set exception bits
9781
9782
rts
9783
9784
#
9785
# input is not normalized; what is it?
9786
#
9787
fintrz_not_norm:
9788
cmpi.b %d1,&ZERO # weed out ZERO
9789
beq.b fintrz_zero
9790
cmpi.b %d1,&INF # weed out INF
9791
beq.b fintrz_inf
9792
cmpi.b %d1,&DENORM # weed out DENORM
9793
beq.b fintrz_denorm
9794
cmpi.b %d1,&SNAN # weed out SNAN
9795
beq.l res_snan_1op
9796
bra.l res_qnan_1op # weed out QNAN
9797
9798
#
9799
# Denorm:
9800
#
9801
# for DENORMs, the result will be (+/-)ZERO.
9802
# also, the INEX2 and AINEX exception bits will be set.
9803
# so, we could either set these manually or force the DENORM
9804
# to a very small NORM and ship it to the NORM routine.
9805
# I do the latter.
9806
#
9807
fintrz_denorm:
9808
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809
mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
9810
lea FP_SCR0(%a6),%a0
9811
bra.b fintrz_norm
9812
9813
#
9814
# Zero:
9815
#
9816
fintrz_zero:
9817
tst.b SRC_EX(%a0) # is ZERO negative?
9818
bmi.b fintrz_zero_m # yes
9819
fintrz_zero_p:
9820
fmov.s &0x00000000,%fp0 # return +ZERO in fp0
9821
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
9822
rts
9823
fintrz_zero_m:
9824
fmov.s &0x80000000,%fp0 # return -ZERO in fp0
9825
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826
rts
9827
9828
#
9829
# Infinity:
9830
#
9831
fintrz_inf:
9832
fmovm.x SRC(%a0),&0x80 # return result in fp0
9833
tst.b SRC_EX(%a0) # is INF negative?
9834
bmi.b fintrz_inf_m # yes
9835
fintrz_inf_p:
9836
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9837
rts
9838
fintrz_inf_m:
9839
mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840
rts
9841
9842
#########################################################################
9843
# XDEF **************************************************************** #
9844
# fabs(): emulates the fabs instruction #
9845
# fsabs(): emulates the fsabs instruction #
9846
# fdabs(): emulates the fdabs instruction #
9847
# #
9848
# XREF **************************************************************** #
9849
# norm() - normalize denorm mantissa to provide EXOP #
9850
# scale_to_zero_src() - make exponent. = 0; get scale factor #
9851
# unf_res() - calculate underflow result #
9852
# ovf_res() - calculate overflow result #
9853
# res_{s,q}nan_1op() - set NAN result for monadic operation #
9854
# #
9855
# INPUT *************************************************************** #
9856
# a0 = pointer to extended precision source operand #
9857
# d0 = rnd precision/mode #
9858
# #
9859
# OUTPUT ************************************************************** #
9860
# fp0 = result #
9861
# fp1 = EXOP (if exception occurred) #
9862
# #
9863
# ALGORITHM *********************************************************** #
9864
# Handle NANs, infinities, and zeroes as special cases. Divide #
9865
# norms into extended, single, and double precision. #
9866
# Simply clear sign for extended precision norm. Ext prec denorm #
9867
# gets an EXOP created for it since it's an underflow. #
9868
# Double and single precision can overflow and underflow. First, #
9869
# scale the operand such that the exponent is zero. Perform an "fabs" #
9870
# using the correct rnd mode/prec. Check to see if the original #
9871
# exponent would take an exception. If so, use unf_res() or ovf_res() #
9872
# to calculate the default result. Also, create the EXOP for the #
9873
# exceptional case. If no exception should occur, insert the correct #
9874
# result exponent and return. #
9875
# Unnorms don't pass through here. #
9876
# #
9877
#########################################################################
9878
9879
global fsabs
9880
fsabs:
9881
andi.b &0x30,%d0 # clear rnd prec
9882
ori.b &s_mode*0x10,%d0 # insert sgl precision
9883
bra.b fabs
9884
9885
global fdabs
9886
fdabs:
9887
andi.b &0x30,%d0 # clear rnd prec
9888
ori.b &d_mode*0x10,%d0 # insert dbl precision
9889
9890
global fabs
9891
fabs:
9892
mov.l %d0,L_SCR3(%a6) # store rnd info
9893
mov.b STAG(%a6),%d1
9894
bne.w fabs_not_norm # optimize on non-norm input
9895
9896
#
9897
# ABSOLUTE VALUE: norms and denorms ONLY!
9898
#
9899
fabs_norm:
9900
andi.b &0xc0,%d0 # is precision extended?
9901
bne.b fabs_not_ext # no; go handle sgl or dbl
9902
9903
#
9904
# precision selected is extended. so...we can not get an underflow
9905
# or overflow because of rounding to the correct precision. so...
9906
# skip the scaling and unscaling...
9907
#
9908
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9909
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9910
mov.w SRC_EX(%a0),%d1
9911
bclr &15,%d1 # force absolute value
9912
mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
9913
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
9914
rts
9915
9916
#
9917
# for an extended precision DENORM, the UNFL exception bit is set
9918
# the accrued bit is NOT set in this instance(no inexactness!)
9919
#
9920
fabs_denorm:
9921
andi.b &0xc0,%d0 # is precision extended?
9922
bne.b fabs_not_ext # no
9923
9924
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925
9926
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9927
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9928
mov.w SRC_EX(%a0),%d0
9929
bclr &15,%d0 # clear sign
9930
mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
9931
9932
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
9933
9934
btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935
bne.b fabs_ext_unfl_ena
9936
rts
9937
9938
#
9939
# the input is an extended DENORM and underflow is enabled in the FPCR.
9940
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941
# exponent and insert back into the operand.
9942
#
9943
fabs_ext_unfl_ena:
9944
lea FP_SCR0(%a6),%a0 # pass: ptr to operand
9945
bsr.l norm # normalize result
9946
neg.w %d0 # new exponent = -(shft val)
9947
addi.w &0x6000,%d0 # add new bias to exponent
9948
mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
9949
andi.w &0x8000,%d1 # keep old sign
9950
andi.w &0x7fff,%d0 # clear sign position
9951
or.w %d1,%d0 # concat old sign, new exponent
9952
mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
9953
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
9954
rts
9955
9956
#
9957
# operand is either single or double
9958
#
9959
fabs_not_ext:
9960
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
9961
bne.b fabs_dbl
9962
9963
#
9964
# operand is to be rounded to single precision
9965
#
9966
fabs_sgl:
9967
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
9968
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
9969
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
9970
bsr.l scale_to_zero_src # calculate scale factor
9971
9972
cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
9973
bge.w fabs_sd_unfl # yes; go handle underflow
9974
cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
9975
beq.w fabs_sd_may_ovfl # maybe; go check
9976
blt.w fabs_sd_ovfl # yes; go handle overflow
9977
9978
#
9979
# operand will NOT overflow or underflow when moved in to the fp reg file
9980
#
9981
fabs_sd_normal:
9982
fmov.l &0x0,%fpsr # clear FPSR
9983
fmov.l L_SCR3(%a6),%fpcr # set FPCR
9984
9985
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
9986
9987
fmov.l %fpsr,%d1 # save FPSR
9988
fmov.l &0x0,%fpcr # clear FPCR
9989
9990
or.l %d1,USER_FPSR(%a6) # save INEX2,N
9991
9992
fabs_sd_normal_exit:
9993
mov.l %d2,-(%sp) # save d2
9994
fmovm.x &0x80,FP_SCR0(%a6) # store out result
9995
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
9996
mov.l %d1,%d2 # make a copy
9997
andi.l &0x7fff,%d1 # strip sign
9998
sub.l %d0,%d1 # add scale factor
9999
andi.w &0x8000,%d2 # keep old sign
10000
or.w %d1,%d2 # concat old sign,new exp
10001
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
10002
mov.l (%sp)+,%d2 # restore d2
10003
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10004
rts
10005
10006
#
10007
# operand is to be rounded to double precision
10008
#
10009
fabs_dbl:
10010
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10011
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10012
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10013
bsr.l scale_to_zero_src # calculate scale factor
10014
10015
cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
10016
bge.b fabs_sd_unfl # yes; go handle underflow
10017
cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
10018
beq.w fabs_sd_may_ovfl # maybe; go check
10019
blt.w fabs_sd_ovfl # yes; go handle overflow
10020
bra.w fabs_sd_normal # no; ho handle normalized op
10021
10022
#
10023
# operand WILL underflow when moved in to the fp register file
10024
#
10025
fabs_sd_unfl:
10026
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027
10028
bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
10029
10030
# if underflow or inexact is enabled, go calculate EXOP first.
10031
mov.b FPCR_ENABLE(%a6),%d1
10032
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10033
bne.b fabs_sd_unfl_ena # yes
10034
10035
fabs_sd_unfl_dis:
10036
lea FP_SCR0(%a6),%a0 # pass: result addr
10037
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10038
bsr.l unf_res # calculate default result
10039
or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
10040
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10041
rts
10042
10043
#
10044
# operand will underflow AND underflow is enabled.
10045
# Therefore, we must return the result rounded to extended precision.
10046
#
10047
fabs_sd_unfl_ena:
10048
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
10051
10052
mov.l %d2,-(%sp) # save d2
10053
mov.l %d1,%d2 # make a copy
10054
andi.l &0x7fff,%d1 # strip sign
10055
andi.w &0x8000,%d2 # keep old sign
10056
sub.l %d0,%d1 # subtract scale factor
10057
addi.l &0x6000,%d1 # add new bias
10058
andi.w &0x7fff,%d1
10059
or.w %d2,%d1 # concat new sign,new exp
10060
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
10061
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
10062
mov.l (%sp)+,%d2 # restore d2
10063
bra.b fabs_sd_unfl_dis
10064
10065
#
10066
# operand WILL overflow.
10067
#
10068
fabs_sd_ovfl:
10069
fmov.l &0x0,%fpsr # clear FPSR
10070
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10071
10072
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10073
10074
fmov.l &0x0,%fpcr # clear FPCR
10075
fmov.l %fpsr,%d1 # save FPSR
10076
10077
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10078
10079
fabs_sd_ovfl_tst:
10080
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081
10082
mov.b FPCR_ENABLE(%a6),%d1
10083
andi.b &0x13,%d1 # is OVFL or INEX enabled?
10084
bne.b fabs_sd_ovfl_ena # yes
10085
10086
#
10087
# OVFL is not enabled; therefore, we must create the default result by
10088
# calling ovf_res().
10089
#
10090
fabs_sd_ovfl_dis:
10091
btst &neg_bit,FPSR_CC(%a6) # is result negative?
10092
sne %d1 # set sign param accordingly
10093
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
10094
bsr.l ovf_res # calculate default result
10095
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10096
fmovm.x (%a0),&0x80 # return default result in fp0
10097
rts
10098
10099
#
10100
# OVFL is enabled.
10101
# the INEX2 bit has already been updated by the round to the correct precision.
10102
# now, round to extended(and don't alter the FPSR).
10103
#
10104
fabs_sd_ovfl_ena:
10105
mov.l %d2,-(%sp) # save d2
10106
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10107
mov.l %d1,%d2 # make a copy
10108
andi.l &0x7fff,%d1 # strip sign
10109
andi.w &0x8000,%d2 # keep old sign
10110
sub.l %d0,%d1 # add scale factor
10111
subi.l &0x6000,%d1 # subtract bias
10112
andi.w &0x7fff,%d1
10113
or.w %d2,%d1 # concat sign,exp
10114
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10115
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10116
mov.l (%sp)+,%d2 # restore d2
10117
bra.b fabs_sd_ovfl_dis
10118
10119
#
10120
# the move in MAY underflow. so...
10121
#
10122
fabs_sd_may_ovfl:
10123
fmov.l &0x0,%fpsr # clear FPSR
10124
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10125
10126
fabs.x FP_SCR0(%a6),%fp0 # perform absolute
10127
10128
fmov.l %fpsr,%d1 # save status
10129
fmov.l &0x0,%fpcr # clear FPCR
10130
10131
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10132
10133
fabs.x %fp0,%fp1 # make a copy of result
10134
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10135
fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
10136
10137
# no, it didn't overflow; we have correct result
10138
bra.w fabs_sd_normal_exit
10139
10140
##########################################################################
10141
10142
#
10143
# input is not normalized; what is it?
10144
#
10145
fabs_not_norm:
10146
cmpi.b %d1,&DENORM # weed out DENORM
10147
beq.w fabs_denorm
10148
cmpi.b %d1,&SNAN # weed out SNAN
10149
beq.l res_snan_1op
10150
cmpi.b %d1,&QNAN # weed out QNAN
10151
beq.l res_qnan_1op
10152
10153
fabs.x SRC(%a0),%fp0 # force absolute value
10154
10155
cmpi.b %d1,&INF # weed out INF
10156
beq.b fabs_inf
10157
fabs_zero:
10158
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10159
rts
10160
fabs_inf:
10161
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10162
rts
10163
10164
#########################################################################
10165
# XDEF **************************************************************** #
10166
# fcmp(): fp compare op routine #
10167
# #
10168
# XREF **************************************************************** #
10169
# res_qnan() - return QNAN result #
10170
# res_snan() - return SNAN result #
10171
# #
10172
# INPUT *************************************************************** #
10173
# a0 = pointer to extended precision source operand #
10174
# a1 = pointer to extended precision destination operand #
10175
# d0 = round prec/mode #
10176
# #
10177
# OUTPUT ************************************************************** #
10178
# None #
10179
# #
10180
# ALGORITHM *********************************************************** #
10181
# Handle NANs and denorms as special cases. For everything else, #
10182
# just use the actual fcmp instruction to produce the correct condition #
10183
# codes. #
10184
# #
10185
#########################################################################
10186
10187
global fcmp
10188
fcmp:
10189
clr.w %d1
10190
mov.b DTAG(%a6),%d1
10191
lsl.b &0x3,%d1
10192
or.b STAG(%a6),%d1
10193
bne.b fcmp_not_norm # optimize on non-norm input
10194
10195
#
10196
# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197
#
10198
fcmp_norm:
10199
fmovm.x DST(%a1),&0x80 # load dst op
10200
10201
fcmp.x %fp0,SRC(%a0) # do compare
10202
10203
fmov.l %fpsr,%d0 # save FPSR
10204
rol.l &0x8,%d0 # extract ccode bits
10205
mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
10206
10207
rts
10208
10209
#
10210
# fcmp: inputs are not both normalized; what are they?
10211
#
10212
fcmp_not_norm:
10213
mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214
jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
10215
10216
swbeg &48
10217
tbl_fcmp_op:
10218
short fcmp_norm - tbl_fcmp_op # NORM - NORM
10219
short fcmp_norm - tbl_fcmp_op # NORM - ZERO
10220
short fcmp_norm - tbl_fcmp_op # NORM - INF
10221
short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
10222
short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
10223
short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
10224
short tbl_fcmp_op - tbl_fcmp_op #
10225
short tbl_fcmp_op - tbl_fcmp_op #
10226
10227
short fcmp_norm - tbl_fcmp_op # ZERO - NORM
10228
short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
10229
short fcmp_norm - tbl_fcmp_op # ZERO - INF
10230
short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
10231
short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
10232
short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
10233
short tbl_fcmp_op - tbl_fcmp_op #
10234
short tbl_fcmp_op - tbl_fcmp_op #
10235
10236
short fcmp_norm - tbl_fcmp_op # INF - NORM
10237
short fcmp_norm - tbl_fcmp_op # INF - ZERO
10238
short fcmp_norm - tbl_fcmp_op # INF - INF
10239
short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
10240
short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
10241
short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
10242
short tbl_fcmp_op - tbl_fcmp_op #
10243
short tbl_fcmp_op - tbl_fcmp_op #
10244
10245
short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
10246
short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
10247
short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
10248
short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
10249
short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
10250
short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
10251
short tbl_fcmp_op - tbl_fcmp_op #
10252
short tbl_fcmp_op - tbl_fcmp_op #
10253
10254
short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
10255
short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
10256
short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
10257
short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
10258
short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
10259
short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
10260
short tbl_fcmp_op - tbl_fcmp_op #
10261
short tbl_fcmp_op - tbl_fcmp_op #
10262
10263
short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
10264
short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
10265
short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
10266
short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
10267
short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
10268
short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
10269
short tbl_fcmp_op - tbl_fcmp_op #
10270
short tbl_fcmp_op - tbl_fcmp_op #
10271
10272
# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273
# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274
fcmp_res_qnan:
10275
bsr.l res_qnan
10276
andi.b &0xf7,FPSR_CC(%a6)
10277
rts
10278
fcmp_res_snan:
10279
bsr.l res_snan
10280
andi.b &0xf7,FPSR_CC(%a6)
10281
rts
10282
10283
#
10284
# DENORMs are a little more difficult.
10285
# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286
# and use the fcmp_norm routine.
10287
# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288
# and use the fcmp_norm routine.
10289
# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290
# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291
# (1) signs are (+) and the DENORM is the dst or
10292
# (2) signs are (-) and the DENORM is the src
10293
#
10294
10295
fcmp_dnrm_s:
10296
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10297
mov.l SRC_HI(%a0),%d0
10298
bset &31,%d0 # DENORM src; make into small norm
10299
mov.l %d0,FP_SCR0_HI(%a6)
10300
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10301
lea FP_SCR0(%a6),%a0
10302
bra.w fcmp_norm
10303
10304
fcmp_dnrm_d:
10305
mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
10306
mov.l DST_HI(%a1),%d0
10307
bset &31,%d0 # DENORM src; make into small norm
10308
mov.l %d0,FP_SCR0_HI(%a6)
10309
mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
10310
lea FP_SCR0(%a6),%a1
10311
bra.w fcmp_norm
10312
10313
fcmp_dnrm_sd:
10314
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10315
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10316
mov.l DST_HI(%a1),%d0
10317
bset &31,%d0 # DENORM dst; make into small norm
10318
mov.l %d0,FP_SCR1_HI(%a6)
10319
mov.l SRC_HI(%a0),%d0
10320
bset &31,%d0 # DENORM dst; make into small norm
10321
mov.l %d0,FP_SCR0_HI(%a6)
10322
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10323
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10324
lea FP_SCR1(%a6),%a1
10325
lea FP_SCR0(%a6),%a0
10326
bra.w fcmp_norm
10327
10328
fcmp_nrm_dnrm:
10329
mov.b SRC_EX(%a0),%d0 # determine if like signs
10330
mov.b DST_EX(%a1),%d1
10331
eor.b %d0,%d1
10332
bmi.w fcmp_dnrm_s
10333
10334
# signs are the same, so must determine the answer ourselves.
10335
tst.b %d0 # is src op negative?
10336
bmi.b fcmp_nrm_dnrm_m # yes
10337
rts
10338
fcmp_nrm_dnrm_m:
10339
mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10340
rts
10341
10342
fcmp_dnrm_nrm:
10343
mov.b SRC_EX(%a0),%d0 # determine if like signs
10344
mov.b DST_EX(%a1),%d1
10345
eor.b %d0,%d1
10346
bmi.w fcmp_dnrm_d
10347
10348
# signs are the same, so must determine the answer ourselves.
10349
tst.b %d0 # is src op negative?
10350
bpl.b fcmp_dnrm_nrm_m # no
10351
rts
10352
fcmp_dnrm_nrm_m:
10353
mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10354
rts
10355
10356
#########################################################################
10357
# XDEF **************************************************************** #
10358
# fsglmul(): emulates the fsglmul instruction #
10359
# #
10360
# XREF **************************************************************** #
10361
# scale_to_zero_src() - scale src exponent to zero #
10362
# scale_to_zero_dst() - scale dst exponent to zero #
10363
# unf_res4() - return default underflow result for sglop #
10364
# ovf_res() - return default overflow result #
10365
# res_qnan() - return QNAN result #
10366
# res_snan() - return SNAN result #
10367
# #
10368
# INPUT *************************************************************** #
10369
# a0 = pointer to extended precision source operand #
10370
# a1 = pointer to extended precision destination operand #
10371
# d0 rnd prec,mode #
10372
# #
10373
# OUTPUT ************************************************************** #
10374
# fp0 = result #
10375
# fp1 = EXOP (if exception occurred) #
10376
# #
10377
# ALGORITHM *********************************************************** #
10378
# Handle NANs, infinities, and zeroes as special cases. Divide #
10379
# norms/denorms into ext/sgl/dbl precision. #
10380
# For norms/denorms, scale the exponents such that a multiply #
10381
# instruction won't cause an exception. Use the regular fsglmul to #
10382
# compute a result. Check if the regular operands would have taken #
10383
# an exception. If so, return the default overflow/underflow result #
10384
# and return the EXOP if exceptions are enabled. Else, scale the #
10385
# result operand to the proper exponent. #
10386
# #
10387
#########################################################################
10388
10389
global fsglmul
10390
fsglmul:
10391
mov.l %d0,L_SCR3(%a6) # store rnd info
10392
10393
clr.w %d1
10394
mov.b DTAG(%a6),%d1
10395
lsl.b &0x3,%d1
10396
or.b STAG(%a6),%d1
10397
10398
bne.w fsglmul_not_norm # optimize on non-norm input
10399
10400
fsglmul_norm:
10401
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10402
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10403
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10404
10405
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10406
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10407
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10408
10409
bsr.l scale_to_zero_src # scale exponent
10410
mov.l %d0,-(%sp) # save scale factor 1
10411
10412
bsr.l scale_to_zero_dst # scale dst exponent
10413
10414
add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
10415
10416
cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
10417
beq.w fsglmul_may_ovfl # result may rnd to overflow
10418
blt.w fsglmul_ovfl # result will overflow
10419
10420
cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
10421
beq.w fsglmul_may_unfl # result may rnd to no unfl
10422
bgt.w fsglmul_unfl # result will underflow
10423
10424
fsglmul_normal:
10425
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10426
10427
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10428
fmov.l &0x0,%fpsr # clear FPSR
10429
10430
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10431
10432
fmov.l %fpsr,%d1 # save status
10433
fmov.l &0x0,%fpcr # clear FPCR
10434
10435
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10436
10437
fsglmul_normal_exit:
10438
fmovm.x &0x80,FP_SCR0(%a6) # store out result
10439
mov.l %d2,-(%sp) # save d2
10440
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10441
mov.l %d1,%d2 # make a copy
10442
andi.l &0x7fff,%d1 # strip sign
10443
andi.w &0x8000,%d2 # keep old sign
10444
sub.l %d0,%d1 # add scale factor
10445
or.w %d2,%d1 # concat old sign,new exp
10446
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10447
mov.l (%sp)+,%d2 # restore d2
10448
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10449
rts
10450
10451
fsglmul_ovfl:
10452
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10453
10454
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10455
fmov.l &0x0,%fpsr # clear FPSR
10456
10457
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10458
10459
fmov.l %fpsr,%d1 # save status
10460
fmov.l &0x0,%fpcr # clear FPCR
10461
10462
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10463
10464
fsglmul_ovfl_tst:
10465
10466
# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467
or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468
10469
mov.b FPCR_ENABLE(%a6),%d1
10470
andi.b &0x13,%d1 # is OVFL or INEX enabled?
10471
bne.b fsglmul_ovfl_ena # yes
10472
10473
fsglmul_ovfl_dis:
10474
btst &neg_bit,FPSR_CC(%a6) # is result negative?
10475
sne %d1 # set sign param accordingly
10476
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10477
andi.b &0x30,%d0 # force prec = ext
10478
bsr.l ovf_res # calculate default result
10479
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
10480
fmovm.x (%a0),&0x80 # return default result in fp0
10481
rts
10482
10483
fsglmul_ovfl_ena:
10484
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10485
10486
mov.l %d2,-(%sp) # save d2
10487
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10488
mov.l %d1,%d2 # make a copy
10489
andi.l &0x7fff,%d1 # strip sign
10490
sub.l %d0,%d1 # add scale factor
10491
subi.l &0x6000,%d1 # subtract bias
10492
andi.w &0x7fff,%d1
10493
andi.w &0x8000,%d2 # keep old sign
10494
or.w %d2,%d1 # concat old sign,new exp
10495
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10496
mov.l (%sp)+,%d2 # restore d2
10497
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10498
bra.b fsglmul_ovfl_dis
10499
10500
fsglmul_may_ovfl:
10501
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10502
10503
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10504
fmov.l &0x0,%fpsr # clear FPSR
10505
10506
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10507
10508
fmov.l %fpsr,%d1 # save status
10509
fmov.l &0x0,%fpcr # clear FPCR
10510
10511
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10512
10513
fabs.x %fp0,%fp1 # make a copy of result
10514
fcmp.b %fp1,&0x2 # is |result| >= 2.b?
10515
fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
10516
10517
# no, it didn't overflow; we have correct result
10518
bra.w fsglmul_normal_exit
10519
10520
fsglmul_unfl:
10521
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522
10523
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10524
10525
fmov.l &rz_mode*0x10,%fpcr # set FPCR
10526
fmov.l &0x0,%fpsr # clear FPSR
10527
10528
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10529
10530
fmov.l %fpsr,%d1 # save status
10531
fmov.l &0x0,%fpcr # clear FPCR
10532
10533
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10534
10535
mov.b FPCR_ENABLE(%a6),%d1
10536
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10537
bne.b fsglmul_unfl_ena # yes
10538
10539
fsglmul_unfl_dis:
10540
fmovm.x &0x80,FP_SCR0(%a6) # store out result
10541
10542
lea FP_SCR0(%a6),%a0 # pass: result addr
10543
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10544
bsr.l unf_res4 # calculate default result
10545
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10546
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10547
rts
10548
10549
#
10550
# UNFL is enabled.
10551
#
10552
fsglmul_unfl_ena:
10553
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10554
10555
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10556
fmov.l &0x0,%fpsr # clear FPSR
10557
10558
fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10559
10560
fmov.l &0x0,%fpcr # clear FPCR
10561
10562
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10563
mov.l %d2,-(%sp) # save d2
10564
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10565
mov.l %d1,%d2 # make a copy
10566
andi.l &0x7fff,%d1 # strip sign
10567
andi.w &0x8000,%d2 # keep old sign
10568
sub.l %d0,%d1 # add scale factor
10569
addi.l &0x6000,%d1 # add bias
10570
andi.w &0x7fff,%d1
10571
or.w %d2,%d1 # concat old sign,new exp
10572
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10573
mov.l (%sp)+,%d2 # restore d2
10574
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10575
bra.w fsglmul_unfl_dis
10576
10577
fsglmul_may_unfl:
10578
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10579
10580
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10581
fmov.l &0x0,%fpsr # clear FPSR
10582
10583
fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
10584
10585
fmov.l %fpsr,%d1 # save status
10586
fmov.l &0x0,%fpcr # clear FPCR
10587
10588
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10589
10590
fabs.x %fp0,%fp1 # make a copy of result
10591
fcmp.b %fp1,&0x2 # is |result| > 2.b?
10592
fbgt.w fsglmul_normal_exit # no; no underflow occurred
10593
fblt.w fsglmul_unfl # yes; underflow occurred
10594
10595
#
10596
# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597
# we don't know if the result was an underflow that rounded up to a 2 or
10598
# a normalized number that rounded down to a 2. so, redo the entire operation
10599
# using RZ as the rounding mode to see what the pre-rounded result is.
10600
# this case should be relatively rare.
10601
#
10602
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
10603
10604
mov.l L_SCR3(%a6),%d1
10605
andi.b &0xc0,%d1 # keep rnd prec
10606
ori.b &rz_mode*0x10,%d1 # insert RZ
10607
10608
fmov.l %d1,%fpcr # set FPCR
10609
fmov.l &0x0,%fpsr # clear FPSR
10610
10611
fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
10612
10613
fmov.l &0x0,%fpcr # clear FPCR
10614
fabs.x %fp1 # make absolute value
10615
fcmp.b %fp1,&0x2 # is |result| < 2.b?
10616
fbge.w fsglmul_normal_exit # no; no underflow occurred
10617
bra.w fsglmul_unfl # yes, underflow occurred
10618
10619
##############################################################################
10620
10621
#
10622
# Single Precision Multiply: inputs are not both normalized; what are they?
10623
#
10624
fsglmul_not_norm:
10625
mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626
jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
10627
10628
swbeg &48
10629
tbl_fsglmul_op:
10630
short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10631
short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10632
short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10633
short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10634
short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10635
short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10636
short tbl_fsglmul_op - tbl_fsglmul_op #
10637
short tbl_fsglmul_op - tbl_fsglmul_op #
10638
10639
short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
10640
short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
10641
short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
10642
short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
10643
short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
10644
short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
10645
short tbl_fsglmul_op - tbl_fsglmul_op #
10646
short tbl_fsglmul_op - tbl_fsglmul_op #
10647
10648
short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
10649
short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
10650
short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
10651
short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
10652
short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
10653
short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
10654
short tbl_fsglmul_op - tbl_fsglmul_op #
10655
short tbl_fsglmul_op - tbl_fsglmul_op #
10656
10657
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
10658
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
10659
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
10660
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
10661
short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
10662
short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
10663
short tbl_fsglmul_op - tbl_fsglmul_op #
10664
short tbl_fsglmul_op - tbl_fsglmul_op #
10665
10666
short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
10667
short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
10668
short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
10669
short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
10670
short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
10671
short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
10672
short tbl_fsglmul_op - tbl_fsglmul_op #
10673
short tbl_fsglmul_op - tbl_fsglmul_op #
10674
10675
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
10676
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
10677
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
10678
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
10679
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
10680
short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
10681
short tbl_fsglmul_op - tbl_fsglmul_op #
10682
short tbl_fsglmul_op - tbl_fsglmul_op #
10683
10684
fsglmul_res_operr:
10685
bra.l res_operr
10686
fsglmul_res_snan:
10687
bra.l res_snan
10688
fsglmul_res_qnan:
10689
bra.l res_qnan
10690
fsglmul_zero:
10691
bra.l fmul_zero
10692
fsglmul_inf_src:
10693
bra.l fmul_inf_src
10694
fsglmul_inf_dst:
10695
bra.l fmul_inf_dst
10696
10697
#########################################################################
10698
# XDEF **************************************************************** #
10699
# fsgldiv(): emulates the fsgldiv instruction #
10700
# #
10701
# XREF **************************************************************** #
10702
# scale_to_zero_src() - scale src exponent to zero #
10703
# scale_to_zero_dst() - scale dst exponent to zero #
10704
# unf_res4() - return default underflow result for sglop #
10705
# ovf_res() - return default overflow result #
10706
# res_qnan() - return QNAN result #
10707
# res_snan() - return SNAN result #
10708
# #
10709
# INPUT *************************************************************** #
10710
# a0 = pointer to extended precision source operand #
10711
# a1 = pointer to extended precision destination operand #
10712
# d0 rnd prec,mode #
10713
# #
10714
# OUTPUT ************************************************************** #
10715
# fp0 = result #
10716
# fp1 = EXOP (if exception occurred) #
10717
# #
10718
# ALGORITHM *********************************************************** #
10719
# Handle NANs, infinities, and zeroes as special cases. Divide #
10720
# norms/denorms into ext/sgl/dbl precision. #
10721
# For norms/denorms, scale the exponents such that a divide #
10722
# instruction won't cause an exception. Use the regular fsgldiv to #
10723
# compute a result. Check if the regular operands would have taken #
10724
# an exception. If so, return the default overflow/underflow result #
10725
# and return the EXOP if exceptions are enabled. Else, scale the #
10726
# result operand to the proper exponent. #
10727
# #
10728
#########################################################################
10729
10730
global fsgldiv
10731
fsgldiv:
10732
mov.l %d0,L_SCR3(%a6) # store rnd info
10733
10734
clr.w %d1
10735
mov.b DTAG(%a6),%d1
10736
lsl.b &0x3,%d1
10737
or.b STAG(%a6),%d1 # combine src tags
10738
10739
bne.w fsgldiv_not_norm # optimize on non-norm input
10740
10741
#
10742
# DIVIDE: NORMs and DENORMs ONLY!
10743
#
10744
fsgldiv_norm:
10745
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
10746
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
10747
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
10748
10749
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
10750
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
10751
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
10752
10753
bsr.l scale_to_zero_src # calculate scale factor 1
10754
mov.l %d0,-(%sp) # save scale factor 1
10755
10756
bsr.l scale_to_zero_dst # calculate scale factor 2
10757
10758
neg.l (%sp) # S.F. = scale1 - scale2
10759
add.l %d0,(%sp)
10760
10761
mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
10762
lsr.b &0x6,%d1
10763
mov.l (%sp)+,%d0
10764
cmpi.l %d0,&0x3fff-0x7ffe
10765
ble.w fsgldiv_may_ovfl
10766
10767
cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
10768
beq.w fsgldiv_may_unfl # maybe
10769
bgt.w fsgldiv_unfl # yes; go handle underflow
10770
10771
fsgldiv_normal:
10772
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10773
10774
fmov.l L_SCR3(%a6),%fpcr # save FPCR
10775
fmov.l &0x0,%fpsr # clear FPSR
10776
10777
fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
10778
10779
fmov.l %fpsr,%d1 # save FPSR
10780
fmov.l &0x0,%fpcr # clear FPCR
10781
10782
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10783
10784
fsgldiv_normal_exit:
10785
fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
10786
mov.l %d2,-(%sp) # save d2
10787
mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
10788
mov.l %d1,%d2 # make a copy
10789
andi.l &0x7fff,%d1 # strip sign
10790
andi.w &0x8000,%d2 # keep old sign
10791
sub.l %d0,%d1 # add scale factor
10792
or.w %d2,%d1 # concat old sign,new exp
10793
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10794
mov.l (%sp)+,%d2 # restore d2
10795
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
10796
rts
10797
10798
fsgldiv_may_ovfl:
10799
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10800
10801
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10802
fmov.l &0x0,%fpsr # set FPSR
10803
10804
fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
10805
10806
fmov.l %fpsr,%d1
10807
fmov.l &0x0,%fpcr
10808
10809
or.l %d1,USER_FPSR(%a6) # save INEX,N
10810
10811
fmovm.x &0x01,-(%sp) # save result to stack
10812
mov.w (%sp),%d1 # fetch new exponent
10813
add.l &0xc,%sp # clear result
10814
andi.l &0x7fff,%d1 # strip sign
10815
sub.l %d0,%d1 # add scale factor
10816
cmp.l %d1,&0x7fff # did divide overflow?
10817
blt.b fsgldiv_normal_exit
10818
10819
fsgldiv_ovfl_tst:
10820
or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821
10822
mov.b FPCR_ENABLE(%a6),%d1
10823
andi.b &0x13,%d1 # is OVFL or INEX enabled?
10824
bne.b fsgldiv_ovfl_ena # yes
10825
10826
fsgldiv_ovfl_dis:
10827
btst &neg_bit,FPSR_CC(%a6) # is result negative
10828
sne %d1 # set sign param accordingly
10829
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
10830
andi.b &0x30,%d0 # kill precision
10831
bsr.l ovf_res # calculate default result
10832
or.b %d0,FPSR_CC(%a6) # set INF if applicable
10833
fmovm.x (%a0),&0x80 # return default result in fp0
10834
rts
10835
10836
fsgldiv_ovfl_ena:
10837
fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
10838
10839
mov.l %d2,-(%sp) # save d2
10840
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10841
mov.l %d1,%d2 # make a copy
10842
andi.l &0x7fff,%d1 # strip sign
10843
andi.w &0x8000,%d2 # keep old sign
10844
sub.l %d0,%d1 # add scale factor
10845
subi.l &0x6000,%d1 # subtract new bias
10846
andi.w &0x7fff,%d1 # clear ms bit
10847
or.w %d2,%d1 # concat old sign,new exp
10848
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10849
mov.l (%sp)+,%d2 # restore d2
10850
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10851
bra.b fsgldiv_ovfl_dis
10852
10853
fsgldiv_unfl:
10854
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855
10856
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10857
10858
fmov.l &rz_mode*0x10,%fpcr # set FPCR
10859
fmov.l &0x0,%fpsr # clear FPSR
10860
10861
fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10862
10863
fmov.l %fpsr,%d1 # save status
10864
fmov.l &0x0,%fpcr # clear FPCR
10865
10866
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10867
10868
mov.b FPCR_ENABLE(%a6),%d1
10869
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
10870
bne.b fsgldiv_unfl_ena # yes
10871
10872
fsgldiv_unfl_dis:
10873
fmovm.x &0x80,FP_SCR0(%a6) # store out result
10874
10875
lea FP_SCR0(%a6),%a0 # pass: result addr
10876
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
10877
bsr.l unf_res4 # calculate default result
10878
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
10879
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
10880
rts
10881
10882
#
10883
# UNFL is enabled.
10884
#
10885
fsgldiv_unfl_ena:
10886
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
10887
10888
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10889
fmov.l &0x0,%fpsr # clear FPSR
10890
10891
fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10892
10893
fmov.l &0x0,%fpcr # clear FPCR
10894
10895
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
10896
mov.l %d2,-(%sp) # save d2
10897
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
10898
mov.l %d1,%d2 # make a copy
10899
andi.l &0x7fff,%d1 # strip sign
10900
andi.w &0x8000,%d2 # keep old sign
10901
sub.l %d0,%d1 # add scale factor
10902
addi.l &0x6000,%d1 # add bias
10903
andi.w &0x7fff,%d1 # clear top bit
10904
or.w %d2,%d1 # concat old sign, new exp
10905
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
10906
mov.l (%sp)+,%d2 # restore d2
10907
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
10908
bra.b fsgldiv_unfl_dis
10909
10910
#
10911
# the divide operation MAY underflow:
10912
#
10913
fsgldiv_may_unfl:
10914
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
10915
10916
fmov.l L_SCR3(%a6),%fpcr # set FPCR
10917
fmov.l &0x0,%fpsr # clear FPSR
10918
10919
fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
10920
10921
fmov.l %fpsr,%d1 # save status
10922
fmov.l &0x0,%fpcr # clear FPCR
10923
10924
or.l %d1,USER_FPSR(%a6) # save INEX2,N
10925
10926
fabs.x %fp0,%fp1 # make a copy of result
10927
fcmp.b %fp1,&0x1 # is |result| > 1.b?
10928
fbgt.w fsgldiv_normal_exit # no; no underflow occurred
10929
fblt.w fsgldiv_unfl # yes; underflow occurred
10930
10931
#
10932
# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933
# we don't know if the result was an underflow that rounded up to a 1
10934
# or a normalized number that rounded down to a 1. so, redo the entire
10935
# operation using RZ as the rounding mode to see what the pre-rounded
10936
# result is. this case should be relatively rare.
10937
#
10938
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
10939
10940
clr.l %d1 # clear scratch register
10941
ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
10942
10943
fmov.l %d1,%fpcr # set FPCR
10944
fmov.l &0x0,%fpsr # clear FPSR
10945
10946
fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
10947
10948
fmov.l &0x0,%fpcr # clear FPCR
10949
fabs.x %fp1 # make absolute value
10950
fcmp.b %fp1,&0x1 # is |result| < 1.b?
10951
fbge.w fsgldiv_normal_exit # no; no underflow occurred
10952
bra.w fsgldiv_unfl # yes; underflow occurred
10953
10954
############################################################################
10955
10956
#
10957
# Divide: inputs are not both normalized; what are they?
10958
#
10959
fsgldiv_not_norm:
10960
mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961
jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962
10963
swbeg &48
10964
tbl_fsgldiv_op:
10965
short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
10966
short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
10967
short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
10968
short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
10969
short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
10970
short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
10971
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10972
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10973
10974
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
10975
short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
10976
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
10977
short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
10978
short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
10979
short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
10980
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10981
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10982
10983
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
10984
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
10985
short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
10986
short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
10987
short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
10988
short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
10989
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10990
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10991
10992
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
10993
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
10994
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
10995
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
10996
short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
10997
short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
10998
short tbl_fsgldiv_op - tbl_fsgldiv_op #
10999
short tbl_fsgldiv_op - tbl_fsgldiv_op #
11000
11001
short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
11002
short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
11003
short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
11004
short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
11005
short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
11006
short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
11007
short tbl_fsgldiv_op - tbl_fsgldiv_op #
11008
short tbl_fsgldiv_op - tbl_fsgldiv_op #
11009
11010
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
11011
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
11012
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
11013
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
11014
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
11015
short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
11016
short tbl_fsgldiv_op - tbl_fsgldiv_op #
11017
short tbl_fsgldiv_op - tbl_fsgldiv_op #
11018
11019
fsgldiv_res_qnan:
11020
bra.l res_qnan
11021
fsgldiv_res_snan:
11022
bra.l res_snan
11023
fsgldiv_res_operr:
11024
bra.l res_operr
11025
fsgldiv_inf_load:
11026
bra.l fdiv_inf_load
11027
fsgldiv_zero_load:
11028
bra.l fdiv_zero_load
11029
fsgldiv_inf_dst:
11030
bra.l fdiv_inf_dst
11031
11032
#########################################################################
11033
# XDEF **************************************************************** #
11034
# fadd(): emulates the fadd instruction #
11035
# fsadd(): emulates the fadd instruction #
11036
# fdadd(): emulates the fdadd instruction #
11037
# #
11038
# XREF **************************************************************** #
11039
# addsub_scaler2() - scale the operands so they won't take exc #
11040
# ovf_res() - return default overflow result #
11041
# unf_res() - return default underflow result #
11042
# res_qnan() - set QNAN result #
11043
# res_snan() - set SNAN result #
11044
# res_operr() - set OPERR result #
11045
# scale_to_zero_src() - set src operand exponent equal to zero #
11046
# scale_to_zero_dst() - set dst operand exponent equal to zero #
11047
# #
11048
# INPUT *************************************************************** #
11049
# a0 = pointer to extended precision source operand #
11050
# a1 = pointer to extended precision destination operand #
11051
# #
11052
# OUTPUT ************************************************************** #
11053
# fp0 = result #
11054
# fp1 = EXOP (if exception occurred) #
11055
# #
11056
# ALGORITHM *********************************************************** #
11057
# Handle NANs, infinities, and zeroes as special cases. Divide #
11058
# norms into extended, single, and double precision. #
11059
# Do addition after scaling exponents such that exception won't #
11060
# occur. Then, check result exponent to see if exception would have #
11061
# occurred. If so, return default result and maybe EXOP. Else, insert #
11062
# the correct result exponent and return. Set FPSR bits as appropriate. #
11063
# #
11064
#########################################################################
11065
11066
global fsadd
11067
fsadd:
11068
andi.b &0x30,%d0 # clear rnd prec
11069
ori.b &s_mode*0x10,%d0 # insert sgl prec
11070
bra.b fadd
11071
11072
global fdadd
11073
fdadd:
11074
andi.b &0x30,%d0 # clear rnd prec
11075
ori.b &d_mode*0x10,%d0 # insert dbl prec
11076
11077
global fadd
11078
fadd:
11079
mov.l %d0,L_SCR3(%a6) # store rnd info
11080
11081
clr.w %d1
11082
mov.b DTAG(%a6),%d1
11083
lsl.b &0x3,%d1
11084
or.b STAG(%a6),%d1 # combine src tags
11085
11086
bne.w fadd_not_norm # optimize on non-norm input
11087
11088
#
11089
# ADD: norms and denorms
11090
#
11091
fadd_norm:
11092
bsr.l addsub_scaler2 # scale exponents
11093
11094
fadd_zero_entry:
11095
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11096
11097
fmov.l &0x0,%fpsr # clear FPSR
11098
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11099
11100
fadd.x FP_SCR0(%a6),%fp0 # execute add
11101
11102
fmov.l &0x0,%fpcr # clear FPCR
11103
fmov.l %fpsr,%d1 # fetch INEX2,N,Z
11104
11105
or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11106
11107
fbeq.w fadd_zero_exit # if result is zero, end now
11108
11109
mov.l %d2,-(%sp) # save d2
11110
11111
fmovm.x &0x01,-(%sp) # save result to stack
11112
11113
mov.w 2+L_SCR3(%a6),%d1
11114
lsr.b &0x6,%d1
11115
11116
mov.w (%sp),%d2 # fetch new sign, exp
11117
andi.l &0x7fff,%d2 # strip sign
11118
sub.l %d0,%d2 # add scale factor
11119
11120
cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121
bge.b fadd_ovfl # yes
11122
11123
cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124
blt.w fadd_unfl # yes
11125
beq.w fadd_may_unfl # maybe; go find out
11126
11127
fadd_normal:
11128
mov.w (%sp),%d1
11129
andi.w &0x8000,%d1 # keep sign
11130
or.w %d2,%d1 # concat sign,new exp
11131
mov.w %d1,(%sp) # insert new exponent
11132
11133
fmovm.x (%sp)+,&0x80 # return result in fp0
11134
11135
mov.l (%sp)+,%d2 # restore d2
11136
rts
11137
11138
fadd_zero_exit:
11139
# fmov.s &0x00000000,%fp0 # return zero in fp0
11140
rts
11141
11142
tbl_fadd_ovfl:
11143
long 0x7fff # ext ovfl
11144
long 0x407f # sgl ovfl
11145
long 0x43ff # dbl ovfl
11146
11147
tbl_fadd_unfl:
11148
long 0x0000 # ext unfl
11149
long 0x3f81 # sgl unfl
11150
long 0x3c01 # dbl unfl
11151
11152
fadd_ovfl:
11153
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154
11155
mov.b FPCR_ENABLE(%a6),%d1
11156
andi.b &0x13,%d1 # is OVFL or INEX enabled?
11157
bne.b fadd_ovfl_ena # yes
11158
11159
add.l &0xc,%sp
11160
fadd_ovfl_dis:
11161
btst &neg_bit,FPSR_CC(%a6) # is result negative?
11162
sne %d1 # set sign param accordingly
11163
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11164
bsr.l ovf_res # calculate default result
11165
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11166
fmovm.x (%a0),&0x80 # return default result in fp0
11167
mov.l (%sp)+,%d2 # restore d2
11168
rts
11169
11170
fadd_ovfl_ena:
11171
mov.b L_SCR3(%a6),%d1
11172
andi.b &0xc0,%d1 # is precision extended?
11173
bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
11174
11175
fadd_ovfl_ena_cont:
11176
mov.w (%sp),%d1
11177
andi.w &0x8000,%d1 # keep sign
11178
subi.l &0x6000,%d2 # add extra bias
11179
andi.w &0x7fff,%d2
11180
or.w %d2,%d1 # concat sign,new exp
11181
mov.w %d1,(%sp) # insert new exponent
11182
11183
fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11184
bra.b fadd_ovfl_dis
11185
11186
fadd_ovfl_ena_sd:
11187
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11188
11189
mov.l L_SCR3(%a6),%d1
11190
andi.b &0x30,%d1 # keep rnd mode
11191
fmov.l %d1,%fpcr # set FPCR
11192
11193
fadd.x FP_SCR0(%a6),%fp0 # execute add
11194
11195
fmov.l &0x0,%fpcr # clear FPCR
11196
11197
add.l &0xc,%sp
11198
fmovm.x &0x01,-(%sp)
11199
bra.b fadd_ovfl_ena_cont
11200
11201
fadd_unfl:
11202
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203
11204
add.l &0xc,%sp
11205
11206
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11207
11208
fmov.l &rz_mode*0x10,%fpcr # set FPCR
11209
fmov.l &0x0,%fpsr # clear FPSR
11210
11211
fadd.x FP_SCR0(%a6),%fp0 # execute add
11212
11213
fmov.l &0x0,%fpcr # clear FPCR
11214
fmov.l %fpsr,%d1 # save status
11215
11216
or.l %d1,USER_FPSR(%a6) # save INEX,N
11217
11218
mov.b FPCR_ENABLE(%a6),%d1
11219
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11220
bne.b fadd_unfl_ena # yes
11221
11222
fadd_unfl_dis:
11223
fmovm.x &0x80,FP_SCR0(%a6) # store out result
11224
11225
lea FP_SCR0(%a6),%a0 # pass: result addr
11226
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11227
bsr.l unf_res # calculate default result
11228
or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
11229
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11230
mov.l (%sp)+,%d2 # restore d2
11231
rts
11232
11233
fadd_unfl_ena:
11234
fmovm.x FP_SCR1(%a6),&0x40 # load dst op
11235
11236
mov.l L_SCR3(%a6),%d1
11237
andi.b &0xc0,%d1 # is precision extended?
11238
bne.b fadd_unfl_ena_sd # no; sgl or dbl
11239
11240
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11241
11242
fadd_unfl_ena_cont:
11243
fmov.l &0x0,%fpsr # clear FPSR
11244
11245
fadd.x FP_SCR0(%a6),%fp1 # execute multiply
11246
11247
fmov.l &0x0,%fpcr # clear FPCR
11248
11249
fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
11250
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11251
mov.l %d1,%d2 # make a copy
11252
andi.l &0x7fff,%d1 # strip sign
11253
andi.w &0x8000,%d2 # keep old sign
11254
sub.l %d0,%d1 # add scale factor
11255
addi.l &0x6000,%d1 # add new bias
11256
andi.w &0x7fff,%d1 # clear top bit
11257
or.w %d2,%d1 # concat sign,new exp
11258
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11259
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11260
bra.w fadd_unfl_dis
11261
11262
fadd_unfl_ena_sd:
11263
mov.l L_SCR3(%a6),%d1
11264
andi.b &0x30,%d1 # use only rnd mode
11265
fmov.l %d1,%fpcr # set FPCR
11266
11267
bra.b fadd_unfl_ena_cont
11268
11269
#
11270
# result is equal to the smallest normalized number in the selected precision
11271
# if the precision is extended, this result could not have come from an
11272
# underflow that rounded up.
11273
#
11274
fadd_may_unfl:
11275
mov.l L_SCR3(%a6),%d1
11276
andi.b &0xc0,%d1
11277
beq.w fadd_normal # yes; no underflow occurred
11278
11279
mov.l 0x4(%sp),%d1 # extract hi(man)
11280
cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11281
bne.w fadd_normal # no; no underflow occurred
11282
11283
tst.l 0x8(%sp) # is lo(man) = 0x0?
11284
bne.w fadd_normal # no; no underflow occurred
11285
11286
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287
beq.w fadd_normal # no; no underflow occurred
11288
11289
#
11290
# ok, so now the result has a exponent equal to the smallest normalized
11291
# exponent for the selected precision. also, the mantissa is equal to
11292
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293
# g,r,s.
11294
# now, we must determine whether the pre-rounded result was an underflow
11295
# rounded "up" or a normalized number rounded "down".
11296
# so, we do this be re-executing the add using RZ as the rounding mode and
11297
# seeing if the new result is smaller or equal to the current result.
11298
#
11299
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11300
11301
mov.l L_SCR3(%a6),%d1
11302
andi.b &0xc0,%d1 # keep rnd prec
11303
ori.b &rz_mode*0x10,%d1 # insert rnd mode
11304
fmov.l %d1,%fpcr # set FPCR
11305
fmov.l &0x0,%fpsr # clear FPSR
11306
11307
fadd.x FP_SCR0(%a6),%fp1 # execute add
11308
11309
fmov.l &0x0,%fpcr # clear FPCR
11310
11311
fabs.x %fp0 # compare absolute values
11312
fabs.x %fp1
11313
fcmp.x %fp0,%fp1 # is first result > second?
11314
11315
fbgt.w fadd_unfl # yes; it's an underflow
11316
bra.w fadd_normal # no; it's not an underflow
11317
11318
##########################################################################
11319
11320
#
11321
# Add: inputs are not both normalized; what are they?
11322
#
11323
fadd_not_norm:
11324
mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325
jmp (tbl_fadd_op.b,%pc,%d1.w*1)
11326
11327
swbeg &48
11328
tbl_fadd_op:
11329
short fadd_norm - tbl_fadd_op # NORM + NORM
11330
short fadd_zero_src - tbl_fadd_op # NORM + ZERO
11331
short fadd_inf_src - tbl_fadd_op # NORM + INF
11332
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11333
short fadd_norm - tbl_fadd_op # NORM + DENORM
11334
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11335
short tbl_fadd_op - tbl_fadd_op #
11336
short tbl_fadd_op - tbl_fadd_op #
11337
11338
short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
11339
short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
11340
short fadd_inf_src - tbl_fadd_op # ZERO + INF
11341
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11342
short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
11343
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11344
short tbl_fadd_op - tbl_fadd_op #
11345
short tbl_fadd_op - tbl_fadd_op #
11346
11347
short fadd_inf_dst - tbl_fadd_op # INF + NORM
11348
short fadd_inf_dst - tbl_fadd_op # INF + ZERO
11349
short fadd_inf_2 - tbl_fadd_op # INF + INF
11350
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11351
short fadd_inf_dst - tbl_fadd_op # INF + DENORM
11352
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11353
short tbl_fadd_op - tbl_fadd_op #
11354
short tbl_fadd_op - tbl_fadd_op #
11355
11356
short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
11357
short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
11358
short fadd_res_qnan - tbl_fadd_op # QNAN + INF
11359
short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
11360
short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
11361
short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
11362
short tbl_fadd_op - tbl_fadd_op #
11363
short tbl_fadd_op - tbl_fadd_op #
11364
11365
short fadd_norm - tbl_fadd_op # DENORM + NORM
11366
short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
11367
short fadd_inf_src - tbl_fadd_op # DENORM + INF
11368
short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
11369
short fadd_norm - tbl_fadd_op # DENORM + DENORM
11370
short fadd_res_snan - tbl_fadd_op # NORM + SNAN
11371
short tbl_fadd_op - tbl_fadd_op #
11372
short tbl_fadd_op - tbl_fadd_op #
11373
11374
short fadd_res_snan - tbl_fadd_op # SNAN + NORM
11375
short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
11376
short fadd_res_snan - tbl_fadd_op # SNAN + INF
11377
short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
11378
short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
11379
short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
11380
short tbl_fadd_op - tbl_fadd_op #
11381
short tbl_fadd_op - tbl_fadd_op #
11382
11383
fadd_res_qnan:
11384
bra.l res_qnan
11385
fadd_res_snan:
11386
bra.l res_snan
11387
11388
#
11389
# both operands are ZEROes
11390
#
11391
fadd_zero_2:
11392
mov.b SRC_EX(%a0),%d0 # are the signs opposite
11393
mov.b DST_EX(%a1),%d1
11394
eor.b %d0,%d1
11395
bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
11396
11397
# the signs are the same. so determine whether they are positive or negative
11398
# and return the appropriately signed zero.
11399
tst.b %d0 # are ZEROes positive or negative?
11400
bmi.b fadd_zero_rm # negative
11401
fmov.s &0x00000000,%fp0 # return +ZERO
11402
mov.b &z_bmask,FPSR_CC(%a6) # set Z
11403
rts
11404
11405
#
11406
# the ZEROes have opposite signs:
11407
# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408
# - -ZERO is returned in the case of RM.
11409
#
11410
fadd_zero_2_chk_rm:
11411
mov.b 3+L_SCR3(%a6),%d1
11412
andi.b &0x30,%d1 # extract rnd mode
11413
cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
11414
beq.b fadd_zero_rm # yes
11415
fmov.s &0x00000000,%fp0 # return +ZERO
11416
mov.b &z_bmask,FPSR_CC(%a6) # set Z
11417
rts
11418
11419
fadd_zero_rm:
11420
fmov.s &0x80000000,%fp0 # return -ZERO
11421
mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422
rts
11423
11424
#
11425
# one operand is a ZERO and the other is a DENORM or NORM. scale
11426
# the DENORM or NORM and jump to the regular fadd routine.
11427
#
11428
fadd_zero_dst:
11429
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11430
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11431
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11432
bsr.l scale_to_zero_src # scale the operand
11433
clr.w FP_SCR1_EX(%a6)
11434
clr.l FP_SCR1_HI(%a6)
11435
clr.l FP_SCR1_LO(%a6)
11436
bra.w fadd_zero_entry # go execute fadd
11437
11438
fadd_zero_src:
11439
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11440
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11441
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11442
bsr.l scale_to_zero_dst # scale the operand
11443
clr.w FP_SCR0_EX(%a6)
11444
clr.l FP_SCR0_HI(%a6)
11445
clr.l FP_SCR0_LO(%a6)
11446
bra.w fadd_zero_entry # go execute fadd
11447
11448
#
11449
# both operands are INFs. an OPERR will result if the INFs have
11450
# different signs. else, an INF of the same sign is returned
11451
#
11452
fadd_inf_2:
11453
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11454
mov.b DST_EX(%a1),%d1
11455
eor.b %d1,%d0
11456
bmi.l res_operr # weed out (-INF)+(+INF)
11457
11458
# ok, so it's not an OPERR. but, we do have to remember to return the
11459
# src INF since that's where the 881/882 gets the j-bit from...
11460
11461
#
11462
# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463
#
11464
fadd_inf_src:
11465
fmovm.x SRC(%a0),&0x80 # return src INF
11466
tst.b SRC_EX(%a0) # is INF positive?
11467
bpl.b fadd_inf_done # yes; we're done
11468
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469
rts
11470
11471
#
11472
# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473
#
11474
fadd_inf_dst:
11475
fmovm.x DST(%a1),&0x80 # return dst INF
11476
tst.b DST_EX(%a1) # is INF positive?
11477
bpl.b fadd_inf_done # yes; we're done
11478
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479
rts
11480
11481
fadd_inf_done:
11482
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11483
rts
11484
11485
#########################################################################
11486
# XDEF **************************************************************** #
11487
# fsub(): emulates the fsub instruction #
11488
# fssub(): emulates the fssub instruction #
11489
# fdsub(): emulates the fdsub instruction #
11490
# #
11491
# XREF **************************************************************** #
11492
# addsub_scaler2() - scale the operands so they won't take exc #
11493
# ovf_res() - return default overflow result #
11494
# unf_res() - return default underflow result #
11495
# res_qnan() - set QNAN result #
11496
# res_snan() - set SNAN result #
11497
# res_operr() - set OPERR result #
11498
# scale_to_zero_src() - set src operand exponent equal to zero #
11499
# scale_to_zero_dst() - set dst operand exponent equal to zero #
11500
# #
11501
# INPUT *************************************************************** #
11502
# a0 = pointer to extended precision source operand #
11503
# a1 = pointer to extended precision destination operand #
11504
# #
11505
# OUTPUT ************************************************************** #
11506
# fp0 = result #
11507
# fp1 = EXOP (if exception occurred) #
11508
# #
11509
# ALGORITHM *********************************************************** #
11510
# Handle NANs, infinities, and zeroes as special cases. Divide #
11511
# norms into extended, single, and double precision. #
11512
# Do subtraction after scaling exponents such that exception won't#
11513
# occur. Then, check result exponent to see if exception would have #
11514
# occurred. If so, return default result and maybe EXOP. Else, insert #
11515
# the correct result exponent and return. Set FPSR bits as appropriate. #
11516
# #
11517
#########################################################################
11518
11519
global fssub
11520
fssub:
11521
andi.b &0x30,%d0 # clear rnd prec
11522
ori.b &s_mode*0x10,%d0 # insert sgl prec
11523
bra.b fsub
11524
11525
global fdsub
11526
fdsub:
11527
andi.b &0x30,%d0 # clear rnd prec
11528
ori.b &d_mode*0x10,%d0 # insert dbl prec
11529
11530
global fsub
11531
fsub:
11532
mov.l %d0,L_SCR3(%a6) # store rnd info
11533
11534
clr.w %d1
11535
mov.b DTAG(%a6),%d1
11536
lsl.b &0x3,%d1
11537
or.b STAG(%a6),%d1 # combine src tags
11538
11539
bne.w fsub_not_norm # optimize on non-norm input
11540
11541
#
11542
# SUB: norms and denorms
11543
#
11544
fsub_norm:
11545
bsr.l addsub_scaler2 # scale exponents
11546
11547
fsub_zero_entry:
11548
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11549
11550
fmov.l &0x0,%fpsr # clear FPSR
11551
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11552
11553
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11554
11555
fmov.l &0x0,%fpcr # clear FPCR
11556
fmov.l %fpsr,%d1 # fetch INEX2, N, Z
11557
11558
or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
11559
11560
fbeq.w fsub_zero_exit # if result zero, end now
11561
11562
mov.l %d2,-(%sp) # save d2
11563
11564
fmovm.x &0x01,-(%sp) # save result to stack
11565
11566
mov.w 2+L_SCR3(%a6),%d1
11567
lsr.b &0x6,%d1
11568
11569
mov.w (%sp),%d2 # fetch new exponent
11570
andi.l &0x7fff,%d2 # strip sign
11571
sub.l %d0,%d2 # add scale factor
11572
11573
cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574
bge.b fsub_ovfl # yes
11575
11576
cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577
blt.w fsub_unfl # yes
11578
beq.w fsub_may_unfl # maybe; go find out
11579
11580
fsub_normal:
11581
mov.w (%sp),%d1
11582
andi.w &0x8000,%d1 # keep sign
11583
or.w %d2,%d1 # insert new exponent
11584
mov.w %d1,(%sp) # insert new exponent
11585
11586
fmovm.x (%sp)+,&0x80 # return result in fp0
11587
11588
mov.l (%sp)+,%d2 # restore d2
11589
rts
11590
11591
fsub_zero_exit:
11592
# fmov.s &0x00000000,%fp0 # return zero in fp0
11593
rts
11594
11595
tbl_fsub_ovfl:
11596
long 0x7fff # ext ovfl
11597
long 0x407f # sgl ovfl
11598
long 0x43ff # dbl ovfl
11599
11600
tbl_fsub_unfl:
11601
long 0x0000 # ext unfl
11602
long 0x3f81 # sgl unfl
11603
long 0x3c01 # dbl unfl
11604
11605
fsub_ovfl:
11606
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607
11608
mov.b FPCR_ENABLE(%a6),%d1
11609
andi.b &0x13,%d1 # is OVFL or INEX enabled?
11610
bne.b fsub_ovfl_ena # yes
11611
11612
add.l &0xc,%sp
11613
fsub_ovfl_dis:
11614
btst &neg_bit,FPSR_CC(%a6) # is result negative?
11615
sne %d1 # set sign param accordingly
11616
mov.l L_SCR3(%a6),%d0 # pass prec:rnd
11617
bsr.l ovf_res # calculate default result
11618
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
11619
fmovm.x (%a0),&0x80 # return default result in fp0
11620
mov.l (%sp)+,%d2 # restore d2
11621
rts
11622
11623
fsub_ovfl_ena:
11624
mov.b L_SCR3(%a6),%d1
11625
andi.b &0xc0,%d1 # is precision extended?
11626
bne.b fsub_ovfl_ena_sd # no
11627
11628
fsub_ovfl_ena_cont:
11629
mov.w (%sp),%d1 # fetch {sgn,exp}
11630
andi.w &0x8000,%d1 # keep sign
11631
subi.l &0x6000,%d2 # subtract new bias
11632
andi.w &0x7fff,%d2 # clear top bit
11633
or.w %d2,%d1 # concat sign,exp
11634
mov.w %d1,(%sp) # insert new exponent
11635
11636
fmovm.x (%sp)+,&0x40 # return EXOP in fp1
11637
bra.b fsub_ovfl_dis
11638
11639
fsub_ovfl_ena_sd:
11640
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11641
11642
mov.l L_SCR3(%a6),%d1
11643
andi.b &0x30,%d1 # clear rnd prec
11644
fmov.l %d1,%fpcr # set FPCR
11645
11646
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11647
11648
fmov.l &0x0,%fpcr # clear FPCR
11649
11650
add.l &0xc,%sp
11651
fmovm.x &0x01,-(%sp)
11652
bra.b fsub_ovfl_ena_cont
11653
11654
fsub_unfl:
11655
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656
11657
add.l &0xc,%sp
11658
11659
fmovm.x FP_SCR1(%a6),&0x80 # load dst op
11660
11661
fmov.l &rz_mode*0x10,%fpcr # set FPCR
11662
fmov.l &0x0,%fpsr # clear FPSR
11663
11664
fsub.x FP_SCR0(%a6),%fp0 # execute subtract
11665
11666
fmov.l &0x0,%fpcr # clear FPCR
11667
fmov.l %fpsr,%d1 # save status
11668
11669
or.l %d1,USER_FPSR(%a6)
11670
11671
mov.b FPCR_ENABLE(%a6),%d1
11672
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
11673
bne.b fsub_unfl_ena # yes
11674
11675
fsub_unfl_dis:
11676
fmovm.x &0x80,FP_SCR0(%a6) # store out result
11677
11678
lea FP_SCR0(%a6),%a0 # pass: result addr
11679
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
11680
bsr.l unf_res # calculate default result
11681
or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
11682
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
11683
mov.l (%sp)+,%d2 # restore d2
11684
rts
11685
11686
fsub_unfl_ena:
11687
fmovm.x FP_SCR1(%a6),&0x40
11688
11689
mov.l L_SCR3(%a6),%d1
11690
andi.b &0xc0,%d1 # is precision extended?
11691
bne.b fsub_unfl_ena_sd # no
11692
11693
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11694
11695
fsub_unfl_ena_cont:
11696
fmov.l &0x0,%fpsr # clear FPSR
11697
11698
fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11699
11700
fmov.l &0x0,%fpcr # clear FPCR
11701
11702
fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
11703
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
11704
mov.l %d1,%d2 # make a copy
11705
andi.l &0x7fff,%d1 # strip sign
11706
andi.w &0x8000,%d2 # keep old sign
11707
sub.l %d0,%d1 # add scale factor
11708
addi.l &0x6000,%d1 # subtract new bias
11709
andi.w &0x7fff,%d1 # clear top bit
11710
or.w %d2,%d1 # concat sgn,exp
11711
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
11712
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
11713
bra.w fsub_unfl_dis
11714
11715
fsub_unfl_ena_sd:
11716
mov.l L_SCR3(%a6),%d1
11717
andi.b &0x30,%d1 # clear rnd prec
11718
fmov.l %d1,%fpcr # set FPCR
11719
11720
bra.b fsub_unfl_ena_cont
11721
11722
#
11723
# result is equal to the smallest normalized number in the selected precision
11724
# if the precision is extended, this result could not have come from an
11725
# underflow that rounded up.
11726
#
11727
fsub_may_unfl:
11728
mov.l L_SCR3(%a6),%d1
11729
andi.b &0xc0,%d1 # fetch rnd prec
11730
beq.w fsub_normal # yes; no underflow occurred
11731
11732
mov.l 0x4(%sp),%d1
11733
cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
11734
bne.w fsub_normal # no; no underflow occurred
11735
11736
tst.l 0x8(%sp) # is lo(man) = 0x0?
11737
bne.w fsub_normal # no; no underflow occurred
11738
11739
btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740
beq.w fsub_normal # no; no underflow occurred
11741
11742
#
11743
# ok, so now the result has a exponent equal to the smallest normalized
11744
# exponent for the selected precision. also, the mantissa is equal to
11745
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746
# g,r,s.
11747
# now, we must determine whether the pre-rounded result was an underflow
11748
# rounded "up" or a normalized number rounded "down".
11749
# so, we do this be re-executing the add using RZ as the rounding mode and
11750
# seeing if the new result is smaller or equal to the current result.
11751
#
11752
fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
11753
11754
mov.l L_SCR3(%a6),%d1
11755
andi.b &0xc0,%d1 # keep rnd prec
11756
ori.b &rz_mode*0x10,%d1 # insert rnd mode
11757
fmov.l %d1,%fpcr # set FPCR
11758
fmov.l &0x0,%fpsr # clear FPSR
11759
11760
fsub.x FP_SCR0(%a6),%fp1 # execute subtract
11761
11762
fmov.l &0x0,%fpcr # clear FPCR
11763
11764
fabs.x %fp0 # compare absolute values
11765
fabs.x %fp1
11766
fcmp.x %fp0,%fp1 # is first result > second?
11767
11768
fbgt.w fsub_unfl # yes; it's an underflow
11769
bra.w fsub_normal # no; it's not an underflow
11770
11771
##########################################################################
11772
11773
#
11774
# Sub: inputs are not both normalized; what are they?
11775
#
11776
fsub_not_norm:
11777
mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778
jmp (tbl_fsub_op.b,%pc,%d1.w*1)
11779
11780
swbeg &48
11781
tbl_fsub_op:
11782
short fsub_norm - tbl_fsub_op # NORM - NORM
11783
short fsub_zero_src - tbl_fsub_op # NORM - ZERO
11784
short fsub_inf_src - tbl_fsub_op # NORM - INF
11785
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11786
short fsub_norm - tbl_fsub_op # NORM - DENORM
11787
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11788
short tbl_fsub_op - tbl_fsub_op #
11789
short tbl_fsub_op - tbl_fsub_op #
11790
11791
short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
11792
short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
11793
short fsub_inf_src - tbl_fsub_op # ZERO - INF
11794
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11795
short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
11796
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11797
short tbl_fsub_op - tbl_fsub_op #
11798
short tbl_fsub_op - tbl_fsub_op #
11799
11800
short fsub_inf_dst - tbl_fsub_op # INF - NORM
11801
short fsub_inf_dst - tbl_fsub_op # INF - ZERO
11802
short fsub_inf_2 - tbl_fsub_op # INF - INF
11803
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11804
short fsub_inf_dst - tbl_fsub_op # INF - DENORM
11805
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11806
short tbl_fsub_op - tbl_fsub_op #
11807
short tbl_fsub_op - tbl_fsub_op #
11808
11809
short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
11810
short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
11811
short fsub_res_qnan - tbl_fsub_op # QNAN - INF
11812
short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
11813
short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
11814
short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
11815
short tbl_fsub_op - tbl_fsub_op #
11816
short tbl_fsub_op - tbl_fsub_op #
11817
11818
short fsub_norm - tbl_fsub_op # DENORM - NORM
11819
short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
11820
short fsub_inf_src - tbl_fsub_op # DENORM - INF
11821
short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
11822
short fsub_norm - tbl_fsub_op # DENORM - DENORM
11823
short fsub_res_snan - tbl_fsub_op # NORM - SNAN
11824
short tbl_fsub_op - tbl_fsub_op #
11825
short tbl_fsub_op - tbl_fsub_op #
11826
11827
short fsub_res_snan - tbl_fsub_op # SNAN - NORM
11828
short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
11829
short fsub_res_snan - tbl_fsub_op # SNAN - INF
11830
short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
11831
short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
11832
short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
11833
short tbl_fsub_op - tbl_fsub_op #
11834
short tbl_fsub_op - tbl_fsub_op #
11835
11836
fsub_res_qnan:
11837
bra.l res_qnan
11838
fsub_res_snan:
11839
bra.l res_snan
11840
11841
#
11842
# both operands are ZEROes
11843
#
11844
fsub_zero_2:
11845
mov.b SRC_EX(%a0),%d0
11846
mov.b DST_EX(%a1),%d1
11847
eor.b %d1,%d0
11848
bpl.b fsub_zero_2_chk_rm
11849
11850
# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851
tst.b %d0 # is dst negative?
11852
bmi.b fsub_zero_2_rm # yes
11853
fmov.s &0x00000000,%fp0 # no; return +ZERO
11854
mov.b &z_bmask,FPSR_CC(%a6) # set Z
11855
rts
11856
11857
#
11858
# the ZEROes have the same signs:
11859
# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860
# - -ZERO is returned in the case of RM.
11861
#
11862
fsub_zero_2_chk_rm:
11863
mov.b 3+L_SCR3(%a6),%d1
11864
andi.b &0x30,%d1 # extract rnd mode
11865
cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
11866
beq.b fsub_zero_2_rm # yes
11867
fmov.s &0x00000000,%fp0 # no; return +ZERO
11868
mov.b &z_bmask,FPSR_CC(%a6) # set Z
11869
rts
11870
11871
fsub_zero_2_rm:
11872
fmov.s &0x80000000,%fp0 # return -ZERO
11873
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11874
rts
11875
11876
#
11877
# one operand is a ZERO and the other is a DENORM or a NORM.
11878
# scale the DENORM or NORM and jump to the regular fsub routine.
11879
#
11880
fsub_zero_dst:
11881
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
11882
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
11883
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
11884
bsr.l scale_to_zero_src # scale the operand
11885
clr.w FP_SCR1_EX(%a6)
11886
clr.l FP_SCR1_HI(%a6)
11887
clr.l FP_SCR1_LO(%a6)
11888
bra.w fsub_zero_entry # go execute fsub
11889
11890
fsub_zero_src:
11891
mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
11892
mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
11893
mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
11894
bsr.l scale_to_zero_dst # scale the operand
11895
clr.w FP_SCR0_EX(%a6)
11896
clr.l FP_SCR0_HI(%a6)
11897
clr.l FP_SCR0_LO(%a6)
11898
bra.w fsub_zero_entry # go execute fsub
11899
11900
#
11901
# both operands are INFs. an OPERR will result if the INFs have the
11902
# same signs. else,
11903
#
11904
fsub_inf_2:
11905
mov.b SRC_EX(%a0),%d0 # exclusive or the signs
11906
mov.b DST_EX(%a1),%d1
11907
eor.b %d1,%d0
11908
bpl.l res_operr # weed out (-INF)+(+INF)
11909
11910
# ok, so it's not an OPERR. but we do have to remember to return
11911
# the src INF since that's where the 881/882 gets the j-bit.
11912
11913
fsub_inf_src:
11914
fmovm.x SRC(%a0),&0x80 # return src INF
11915
fneg.x %fp0 # invert sign
11916
fbge.w fsub_inf_done # sign is now positive
11917
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918
rts
11919
11920
fsub_inf_dst:
11921
fmovm.x DST(%a1),&0x80 # return dst INF
11922
tst.b DST_EX(%a1) # is INF negative?
11923
bpl.b fsub_inf_done # no
11924
mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925
rts
11926
11927
fsub_inf_done:
11928
mov.b &inf_bmask,FPSR_CC(%a6) # set INF
11929
rts
11930
11931
#########################################################################
11932
# XDEF **************************************************************** #
11933
# fsqrt(): emulates the fsqrt instruction #
11934
# fssqrt(): emulates the fssqrt instruction #
11935
# fdsqrt(): emulates the fdsqrt instruction #
11936
# #
11937
# XREF **************************************************************** #
11938
# scale_sqrt() - scale the source operand #
11939
# unf_res() - return default underflow result #
11940
# ovf_res() - return default overflow result #
11941
# res_qnan_1op() - return QNAN result #
11942
# res_snan_1op() - return SNAN result #
11943
# #
11944
# INPUT *************************************************************** #
11945
# a0 = pointer to extended precision source operand #
11946
# d0 rnd prec,mode #
11947
# #
11948
# OUTPUT ************************************************************** #
11949
# fp0 = result #
11950
# fp1 = EXOP (if exception occurred) #
11951
# #
11952
# ALGORITHM *********************************************************** #
11953
# Handle NANs, infinities, and zeroes as special cases. Divide #
11954
# norms/denorms into ext/sgl/dbl precision. #
11955
# For norms/denorms, scale the exponents such that a sqrt #
11956
# instruction won't cause an exception. Use the regular fsqrt to #
11957
# compute a result. Check if the regular operands would have taken #
11958
# an exception. If so, return the default overflow/underflow result #
11959
# and return the EXOP if exceptions are enabled. Else, scale the #
11960
# result operand to the proper exponent. #
11961
# #
11962
#########################################################################
11963
11964
global fssqrt
11965
fssqrt:
11966
andi.b &0x30,%d0 # clear rnd prec
11967
ori.b &s_mode*0x10,%d0 # insert sgl precision
11968
bra.b fsqrt
11969
11970
global fdsqrt
11971
fdsqrt:
11972
andi.b &0x30,%d0 # clear rnd prec
11973
ori.b &d_mode*0x10,%d0 # insert dbl precision
11974
11975
global fsqrt
11976
fsqrt:
11977
mov.l %d0,L_SCR3(%a6) # store rnd info
11978
clr.w %d1
11979
mov.b STAG(%a6),%d1
11980
bne.w fsqrt_not_norm # optimize on non-norm input
11981
11982
#
11983
# SQUARE ROOT: norms and denorms ONLY!
11984
#
11985
fsqrt_norm:
11986
tst.b SRC_EX(%a0) # is operand negative?
11987
bmi.l res_operr # yes
11988
11989
andi.b &0xc0,%d0 # is precision extended?
11990
bne.b fsqrt_not_ext # no; go handle sgl or dbl
11991
11992
fmov.l L_SCR3(%a6),%fpcr # set FPCR
11993
fmov.l &0x0,%fpsr # clear FPSR
11994
11995
fsqrt.x (%a0),%fp0 # execute square root
11996
11997
fmov.l %fpsr,%d1
11998
or.l %d1,USER_FPSR(%a6) # set N,INEX
11999
12000
rts
12001
12002
fsqrt_denorm:
12003
tst.b SRC_EX(%a0) # is operand negative?
12004
bmi.l res_operr # yes
12005
12006
andi.b &0xc0,%d0 # is precision extended?
12007
bne.b fsqrt_not_ext # no; go handle sgl or dbl
12008
12009
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12010
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12011
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12012
12013
bsr.l scale_sqrt # calculate scale factor
12014
12015
bra.w fsqrt_sd_normal
12016
12017
#
12018
# operand is either single or double
12019
#
12020
fsqrt_not_ext:
12021
cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
12022
bne.w fsqrt_dbl
12023
12024
#
12025
# operand is to be rounded to single precision
12026
#
12027
fsqrt_sgl:
12028
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12029
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12030
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12031
12032
bsr.l scale_sqrt # calculate scale factor
12033
12034
cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
12035
beq.w fsqrt_sd_may_unfl
12036
bgt.w fsqrt_sd_unfl # yes; go handle underflow
12037
cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
12038
beq.w fsqrt_sd_may_ovfl # maybe; go check
12039
blt.w fsqrt_sd_ovfl # yes; go handle overflow
12040
12041
#
12042
# operand will NOT overflow or underflow when moved in to the fp reg file
12043
#
12044
fsqrt_sd_normal:
12045
fmov.l &0x0,%fpsr # clear FPSR
12046
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12047
12048
fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12049
12050
fmov.l %fpsr,%d1 # save FPSR
12051
fmov.l &0x0,%fpcr # clear FPCR
12052
12053
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12054
12055
fsqrt_sd_normal_exit:
12056
mov.l %d2,-(%sp) # save d2
12057
fmovm.x &0x80,FP_SCR0(%a6) # store out result
12058
mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
12059
mov.l %d1,%d2 # make a copy
12060
andi.l &0x7fff,%d1 # strip sign
12061
sub.l %d0,%d1 # add scale factor
12062
andi.w &0x8000,%d2 # keep old sign
12063
or.w %d1,%d2 # concat old sign,new exp
12064
mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
12065
mov.l (%sp)+,%d2 # restore d2
12066
fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
12067
rts
12068
12069
#
12070
# operand is to be rounded to double precision
12071
#
12072
fsqrt_dbl:
12073
mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
12074
mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
12075
mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
12076
12077
bsr.l scale_sqrt # calculate scale factor
12078
12079
cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
12080
beq.w fsqrt_sd_may_unfl
12081
bgt.b fsqrt_sd_unfl # yes; go handle underflow
12082
cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
12083
beq.w fsqrt_sd_may_ovfl # maybe; go check
12084
blt.w fsqrt_sd_ovfl # yes; go handle overflow
12085
bra.w fsqrt_sd_normal # no; ho handle normalized op
12086
12087
# we're on the line here and the distinguising characteristic is whether
12088
# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089
# elsewise fall through to underflow.
12090
fsqrt_sd_may_unfl:
12091
btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12092
bne.w fsqrt_sd_normal # yes, so no underflow
12093
12094
#
12095
# operand WILL underflow when moved in to the fp register file
12096
#
12097
fsqrt_sd_unfl:
12098
bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099
12100
fmov.l &rz_mode*0x10,%fpcr # set FPCR
12101
fmov.l &0x0,%fpsr # clear FPSR
12102
12103
fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
12104
12105
fmov.l %fpsr,%d1 # save status
12106
fmov.l &0x0,%fpcr # clear FPCR
12107
12108
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12109
12110
# if underflow or inexact is enabled, go calculate EXOP first.
12111
mov.b FPCR_ENABLE(%a6),%d1
12112
andi.b &0x0b,%d1 # is UNFL or INEX enabled?
12113
bne.b fsqrt_sd_unfl_ena # yes
12114
12115
fsqrt_sd_unfl_dis:
12116
fmovm.x &0x80,FP_SCR0(%a6) # store out result
12117
12118
lea FP_SCR0(%a6),%a0 # pass: result addr
12119
mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
12120
bsr.l unf_res # calculate default result
12121
or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
12122
fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
12123
rts
12124
12125
#
12126
# operand will underflow AND underflow is enabled.
12127
# Therefore, we must return the result rounded to extended precision.
12128
#
12129
fsqrt_sd_unfl_ena:
12130
mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131
mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132
mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
12133
12134
mov.l %d2,-(%sp) # save d2
12135
mov.l %d1,%d2 # make a copy
12136
andi.l &0x7fff,%d1 # strip sign
12137
andi.w &0x8000,%d2 # keep old sign
12138
sub.l %d0,%d1 # subtract scale factor
12139
addi.l &0x6000,%d1 # add new bias
12140
andi.w &0x7fff,%d1
12141
or.w %d2,%d1 # concat new sign,new exp
12142
mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
12143
fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
12144
mov.l (%sp)+,%d2 # restore d2
12145
bra.b fsqrt_sd_unfl_dis
12146
12147
#
12148
# operand WILL overflow.
12149
#
12150
fsqrt_sd_ovfl:
12151
fmov.l &0x0,%fpsr # clear FPSR
12152
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12153
12154
fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
12155
12156
fmov.l &0x0,%fpcr # clear FPCR
12157
fmov.l %fpsr,%d1 # save FPSR
12158
12159
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12160
12161
fsqrt_sd_ovfl_tst:
12162
or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163
12164
mov.b FPCR_ENABLE(%a6),%d1
12165
andi.b &0x13,%d1 # is OVFL or INEX enabled?
12166
bne.b fsqrt_sd_ovfl_ena # yes
12167
12168
#
12169
# OVFL is not enabled; therefore, we must create the default result by
12170
# calling ovf_res().
12171
#
12172
fsqrt_sd_ovfl_dis:
12173
btst &neg_bit,FPSR_CC(%a6) # is result negative?
12174
sne %d1 # set sign param accordingly
12175
mov.l L_SCR3(%a6),%d0 # pass: prec,mode
12176
bsr.l ovf_res # calculate default result
12177
or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
12178
fmovm.x (%a0),&0x80 # return default result in fp0
12179
rts
12180
12181
#
12182
# OVFL is enabled.
12183
# the INEX2 bit has already been updated by the round to the correct precision.
12184
# now, round to extended(and don't alter the FPSR).
12185
#
12186
fsqrt_sd_ovfl_ena:
12187
mov.l %d2,-(%sp) # save d2
12188
mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
12189
mov.l %d1,%d2 # make a copy
12190
andi.l &0x7fff,%d1 # strip sign
12191
andi.w &0x8000,%d2 # keep old sign
12192
sub.l %d0,%d1 # add scale factor
12193
subi.l &0x6000,%d1 # subtract bias
12194
andi.w &0x7fff,%d1
12195
or.w %d2,%d1 # concat sign,exp
12196
mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
12197
fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
12198
mov.l (%sp)+,%d2 # restore d2
12199
bra.b fsqrt_sd_ovfl_dis
12200
12201
#
12202
# the move in MAY underflow. so...
12203
#
12204
fsqrt_sd_may_ovfl:
12205
btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
12206
bne.w fsqrt_sd_ovfl # yes, so overflow
12207
12208
fmov.l &0x0,%fpsr # clear FPSR
12209
fmov.l L_SCR3(%a6),%fpcr # set FPCR
12210
12211
fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
12212
12213
fmov.l %fpsr,%d1 # save status
12214
fmov.l &0x0,%fpcr # clear FPCR
12215
12216
or.l %d1,USER_FPSR(%a6) # save INEX2,N
12217
12218
fmov.x %fp0,%fp1 # make a copy of result
12219
fcmp.b %fp1,&0x1 # is |result| >= 1.b?
12220
fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
12221
12222
# no, it didn't overflow; we have correct result
12223
bra.w fsqrt_sd_normal_exit
12224
12225
##########################################################################
12226
12227
#
12228
# input is not normalized; what is it?
12229
#
12230
fsqrt_not_norm:
12231
cmpi.b %d1,&DENORM # weed out DENORM
12232
beq.w fsqrt_denorm
12233
cmpi.b %d1,&ZERO # weed out ZERO
12234
beq.b fsqrt_zero
12235
cmpi.b %d1,&INF # weed out INF
12236
beq.b fsqrt_inf
12237
cmpi.b %d1,&SNAN # weed out SNAN
12238
beq.l res_snan_1op
12239
bra.l res_qnan_1op
12240
12241
#
12242
# fsqrt(+0) = +0
12243
# fsqrt(-0) = -0
12244
# fsqrt(+INF) = +INF
12245
# fsqrt(-INF) = OPERR
12246
#
12247
fsqrt_zero:
12248
tst.b SRC_EX(%a0) # is ZERO positive or negative?
12249
bmi.b fsqrt_zero_m # negative
12250
fsqrt_zero_p:
12251
fmov.s &0x00000000,%fp0 # return +ZERO
12252
mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
12253
rts
12254
fsqrt_zero_m:
12255
fmov.s &0x80000000,%fp0 # return -ZERO
12256
mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12257
rts
12258
12259
fsqrt_inf:
12260
tst.b SRC_EX(%a0) # is INF positive or negative?
12261
bmi.l res_operr # negative
12262
fsqrt_inf_p:
12263
fmovm.x SRC(%a0),&0x80 # return +INF in fp0
12264
mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12265
rts
12266
12267
#########################################################################
12268
# XDEF **************************************************************** #
12269
# fetch_dreg(): fetch register according to index in d1 #
12270
# #
12271
# XREF **************************************************************** #
12272
# None #
12273
# #
12274
# INPUT *************************************************************** #
12275
# d1 = index of register to fetch from #
12276
# #
12277
# OUTPUT ************************************************************** #
12278
# d0 = value of register fetched #
12279
# #
12280
# ALGORITHM *********************************************************** #
12281
# According to the index value in d1 which can range from zero #
12282
# to fifteen, load the corresponding register file value (where #
12283
# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
12284
# stack. The rest should still be in their original places. #
12285
# #
12286
#########################################################################
12287
12288
# this routine leaves d1 intact for subsequent store_dreg calls.
12289
global fetch_dreg
12290
fetch_dreg:
12291
mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
12292
jmp (tbl_fdreg.b,%pc,%d0.w*1)
12293
12294
tbl_fdreg:
12295
short fdreg0 - tbl_fdreg
12296
short fdreg1 - tbl_fdreg
12297
short fdreg2 - tbl_fdreg
12298
short fdreg3 - tbl_fdreg
12299
short fdreg4 - tbl_fdreg
12300
short fdreg5 - tbl_fdreg
12301
short fdreg6 - tbl_fdreg
12302
short fdreg7 - tbl_fdreg
12303
short fdreg8 - tbl_fdreg
12304
short fdreg9 - tbl_fdreg
12305
short fdrega - tbl_fdreg
12306
short fdregb - tbl_fdreg
12307
short fdregc - tbl_fdreg
12308
short fdregd - tbl_fdreg
12309
short fdrege - tbl_fdreg
12310
short fdregf - tbl_fdreg
12311
12312
fdreg0:
12313
mov.l EXC_DREGS+0x0(%a6),%d0
12314
rts
12315
fdreg1:
12316
mov.l EXC_DREGS+0x4(%a6),%d0
12317
rts
12318
fdreg2:
12319
mov.l %d2,%d0
12320
rts
12321
fdreg3:
12322
mov.l %d3,%d0
12323
rts
12324
fdreg4:
12325
mov.l %d4,%d0
12326
rts
12327
fdreg5:
12328
mov.l %d5,%d0
12329
rts
12330
fdreg6:
12331
mov.l %d6,%d0
12332
rts
12333
fdreg7:
12334
mov.l %d7,%d0
12335
rts
12336
fdreg8:
12337
mov.l EXC_DREGS+0x8(%a6),%d0
12338
rts
12339
fdreg9:
12340
mov.l EXC_DREGS+0xc(%a6),%d0
12341
rts
12342
fdrega:
12343
mov.l %a2,%d0
12344
rts
12345
fdregb:
12346
mov.l %a3,%d0
12347
rts
12348
fdregc:
12349
mov.l %a4,%d0
12350
rts
12351
fdregd:
12352
mov.l %a5,%d0
12353
rts
12354
fdrege:
12355
mov.l (%a6),%d0
12356
rts
12357
fdregf:
12358
mov.l EXC_A7(%a6),%d0
12359
rts
12360
12361
#########################################################################
12362
# XDEF **************************************************************** #
12363
# store_dreg_l(): store longword to data register specified by d1 #
12364
# #
12365
# XREF **************************************************************** #
12366
# None #
12367
# #
12368
# INPUT *************************************************************** #
12369
# d0 = longowrd value to store #
12370
# d1 = index of register to fetch from #
12371
# #
12372
# OUTPUT ************************************************************** #
12373
# (data register is updated) #
12374
# #
12375
# ALGORITHM *********************************************************** #
12376
# According to the index value in d1, store the longword value #
12377
# in d0 to the corresponding data register. D0/D1 are on the stack #
12378
# while the rest are in their initial places. #
12379
# #
12380
#########################################################################
12381
12382
global store_dreg_l
12383
store_dreg_l:
12384
mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
12385
jmp (tbl_sdregl.b,%pc,%d1.w*1)
12386
12387
tbl_sdregl:
12388
short sdregl0 - tbl_sdregl
12389
short sdregl1 - tbl_sdregl
12390
short sdregl2 - tbl_sdregl
12391
short sdregl3 - tbl_sdregl
12392
short sdregl4 - tbl_sdregl
12393
short sdregl5 - tbl_sdregl
12394
short sdregl6 - tbl_sdregl
12395
short sdregl7 - tbl_sdregl
12396
12397
sdregl0:
12398
mov.l %d0,EXC_DREGS+0x0(%a6)
12399
rts
12400
sdregl1:
12401
mov.l %d0,EXC_DREGS+0x4(%a6)
12402
rts
12403
sdregl2:
12404
mov.l %d0,%d2
12405
rts
12406
sdregl3:
12407
mov.l %d0,%d3
12408
rts
12409
sdregl4:
12410
mov.l %d0,%d4
12411
rts
12412
sdregl5:
12413
mov.l %d0,%d5
12414
rts
12415
sdregl6:
12416
mov.l %d0,%d6
12417
rts
12418
sdregl7:
12419
mov.l %d0,%d7
12420
rts
12421
12422
#########################################################################
12423
# XDEF **************************************************************** #
12424
# store_dreg_w(): store word to data register specified by d1 #
12425
# #
12426
# XREF **************************************************************** #
12427
# None #
12428
# #
12429
# INPUT *************************************************************** #
12430
# d0 = word value to store #
12431
# d1 = index of register to fetch from #
12432
# #
12433
# OUTPUT ************************************************************** #
12434
# (data register is updated) #
12435
# #
12436
# ALGORITHM *********************************************************** #
12437
# According to the index value in d1, store the word value #
12438
# in d0 to the corresponding data register. D0/D1 are on the stack #
12439
# while the rest are in their initial places. #
12440
# #
12441
#########################################################################
12442
12443
global store_dreg_w
12444
store_dreg_w:
12445
mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
12446
jmp (tbl_sdregw.b,%pc,%d1.w*1)
12447
12448
tbl_sdregw:
12449
short sdregw0 - tbl_sdregw
12450
short sdregw1 - tbl_sdregw
12451
short sdregw2 - tbl_sdregw
12452
short sdregw3 - tbl_sdregw
12453
short sdregw4 - tbl_sdregw
12454
short sdregw5 - tbl_sdregw
12455
short sdregw6 - tbl_sdregw
12456
short sdregw7 - tbl_sdregw
12457
12458
sdregw0:
12459
mov.w %d0,2+EXC_DREGS+0x0(%a6)
12460
rts
12461
sdregw1:
12462
mov.w %d0,2+EXC_DREGS+0x4(%a6)
12463
rts
12464
sdregw2:
12465
mov.w %d0,%d2
12466
rts
12467
sdregw3:
12468
mov.w %d0,%d3
12469
rts
12470
sdregw4:
12471
mov.w %d0,%d4
12472
rts
12473
sdregw5:
12474
mov.w %d0,%d5
12475
rts
12476
sdregw6:
12477
mov.w %d0,%d6
12478
rts
12479
sdregw7:
12480
mov.w %d0,%d7
12481
rts
12482
12483
#########################################################################
12484
# XDEF **************************************************************** #
12485
# store_dreg_b(): store byte to data register specified by d1 #
12486
# #
12487
# XREF **************************************************************** #
12488
# None #
12489
# #
12490
# INPUT *************************************************************** #
12491
# d0 = byte value to store #
12492
# d1 = index of register to fetch from #
12493
# #
12494
# OUTPUT ************************************************************** #
12495
# (data register is updated) #
12496
# #
12497
# ALGORITHM *********************************************************** #
12498
# According to the index value in d1, store the byte value #
12499
# in d0 to the corresponding data register. D0/D1 are on the stack #
12500
# while the rest are in their initial places. #
12501
# #
12502
#########################################################################
12503
12504
global store_dreg_b
12505
store_dreg_b:
12506
mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
12507
jmp (tbl_sdregb.b,%pc,%d1.w*1)
12508
12509
tbl_sdregb:
12510
short sdregb0 - tbl_sdregb
12511
short sdregb1 - tbl_sdregb
12512
short sdregb2 - tbl_sdregb
12513
short sdregb3 - tbl_sdregb
12514
short sdregb4 - tbl_sdregb
12515
short sdregb5 - tbl_sdregb
12516
short sdregb6 - tbl_sdregb
12517
short sdregb7 - tbl_sdregb
12518
12519
sdregb0:
12520
mov.b %d0,3+EXC_DREGS+0x0(%a6)
12521
rts
12522
sdregb1:
12523
mov.b %d0,3+EXC_DREGS+0x4(%a6)
12524
rts
12525
sdregb2:
12526
mov.b %d0,%d2
12527
rts
12528
sdregb3:
12529
mov.b %d0,%d3
12530
rts
12531
sdregb4:
12532
mov.b %d0,%d4
12533
rts
12534
sdregb5:
12535
mov.b %d0,%d5
12536
rts
12537
sdregb6:
12538
mov.b %d0,%d6
12539
rts
12540
sdregb7:
12541
mov.b %d0,%d7
12542
rts
12543
12544
#########################################################################
12545
# XDEF **************************************************************** #
12546
# inc_areg(): increment an address register by the value in d0 #
12547
# #
12548
# XREF **************************************************************** #
12549
# None #
12550
# #
12551
# INPUT *************************************************************** #
12552
# d0 = amount to increment by #
12553
# d1 = index of address register to increment #
12554
# #
12555
# OUTPUT ************************************************************** #
12556
# (address register is updated) #
12557
# #
12558
# ALGORITHM *********************************************************** #
12559
# Typically used for an instruction w/ a post-increment <ea>, #
12560
# this routine adds the increment value in d0 to the address register #
12561
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12562
# in their original places. #
12563
# For a7, if the increment amount is one, then we have to #
12564
# increment by two. For any a7 update, set the mia7_flag so that if #
12565
# an access error exception occurs later in emulation, this address #
12566
# register update can be undone. #
12567
# #
12568
#########################################################################
12569
12570
global inc_areg
12571
inc_areg:
12572
mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
12573
jmp (tbl_iareg.b,%pc,%d1.w*1)
12574
12575
tbl_iareg:
12576
short iareg0 - tbl_iareg
12577
short iareg1 - tbl_iareg
12578
short iareg2 - tbl_iareg
12579
short iareg3 - tbl_iareg
12580
short iareg4 - tbl_iareg
12581
short iareg5 - tbl_iareg
12582
short iareg6 - tbl_iareg
12583
short iareg7 - tbl_iareg
12584
12585
iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
12586
rts
12587
iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
12588
rts
12589
iareg2: add.l %d0,%a2
12590
rts
12591
iareg3: add.l %d0,%a3
12592
rts
12593
iareg4: add.l %d0,%a4
12594
rts
12595
iareg5: add.l %d0,%a5
12596
rts
12597
iareg6: add.l %d0,(%a6)
12598
rts
12599
iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
12600
cmpi.b %d0,&0x1
12601
beq.b iareg7b
12602
add.l %d0,EXC_A7(%a6)
12603
rts
12604
iareg7b:
12605
addq.l &0x2,EXC_A7(%a6)
12606
rts
12607
12608
#########################################################################
12609
# XDEF **************************************************************** #
12610
# dec_areg(): decrement an address register by the value in d0 #
12611
# #
12612
# XREF **************************************************************** #
12613
# None #
12614
# #
12615
# INPUT *************************************************************** #
12616
# d0 = amount to decrement by #
12617
# d1 = index of address register to decrement #
12618
# #
12619
# OUTPUT ************************************************************** #
12620
# (address register is updated) #
12621
# #
12622
# ALGORITHM *********************************************************** #
12623
# Typically used for an instruction w/ a pre-decrement <ea>, #
12624
# this routine adds the decrement value in d0 to the address register #
12625
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
12626
# in their original places. #
12627
# For a7, if the decrement amount is one, then we have to #
12628
# decrement by two. For any a7 update, set the mda7_flag so that if #
12629
# an access error exception occurs later in emulation, this address #
12630
# register update can be undone. #
12631
# #
12632
#########################################################################
12633
12634
global dec_areg
12635
dec_areg:
12636
mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
12637
jmp (tbl_dareg.b,%pc,%d1.w*1)
12638
12639
tbl_dareg:
12640
short dareg0 - tbl_dareg
12641
short dareg1 - tbl_dareg
12642
short dareg2 - tbl_dareg
12643
short dareg3 - tbl_dareg
12644
short dareg4 - tbl_dareg
12645
short dareg5 - tbl_dareg
12646
short dareg6 - tbl_dareg
12647
short dareg7 - tbl_dareg
12648
12649
dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
12650
rts
12651
dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
12652
rts
12653
dareg2: sub.l %d0,%a2
12654
rts
12655
dareg3: sub.l %d0,%a3
12656
rts
12657
dareg4: sub.l %d0,%a4
12658
rts
12659
dareg5: sub.l %d0,%a5
12660
rts
12661
dareg6: sub.l %d0,(%a6)
12662
rts
12663
dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
12664
cmpi.b %d0,&0x1
12665
beq.b dareg7b
12666
sub.l %d0,EXC_A7(%a6)
12667
rts
12668
dareg7b:
12669
subq.l &0x2,EXC_A7(%a6)
12670
rts
12671
12672
##############################################################################
12673
12674
#########################################################################
12675
# XDEF **************************************************************** #
12676
# load_fpn1(): load FP register value into FP_SRC(a6). #
12677
# #
12678
# XREF **************************************************************** #
12679
# None #
12680
# #
12681
# INPUT *************************************************************** #
12682
# d0 = index of FP register to load #
12683
# #
12684
# OUTPUT ************************************************************** #
12685
# FP_SRC(a6) = value loaded from FP register file #
12686
# #
12687
# ALGORITHM *********************************************************** #
12688
# Using the index in d0, load FP_SRC(a6) with a number from the #
12689
# FP register file. #
12690
# #
12691
#########################################################################
12692
12693
global load_fpn1
12694
load_fpn1:
12695
mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696
jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
12697
12698
tbl_load_fpn1:
12699
short load_fpn1_0 - tbl_load_fpn1
12700
short load_fpn1_1 - tbl_load_fpn1
12701
short load_fpn1_2 - tbl_load_fpn1
12702
short load_fpn1_3 - tbl_load_fpn1
12703
short load_fpn1_4 - tbl_load_fpn1
12704
short load_fpn1_5 - tbl_load_fpn1
12705
short load_fpn1_6 - tbl_load_fpn1
12706
short load_fpn1_7 - tbl_load_fpn1
12707
12708
load_fpn1_0:
12709
mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710
mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711
mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712
lea FP_SRC(%a6), %a0
12713
rts
12714
load_fpn1_1:
12715
mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716
mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717
mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718
lea FP_SRC(%a6), %a0
12719
rts
12720
load_fpn1_2:
12721
fmovm.x &0x20, FP_SRC(%a6)
12722
lea FP_SRC(%a6), %a0
12723
rts
12724
load_fpn1_3:
12725
fmovm.x &0x10, FP_SRC(%a6)
12726
lea FP_SRC(%a6), %a0
12727
rts
12728
load_fpn1_4:
12729
fmovm.x &0x08, FP_SRC(%a6)
12730
lea FP_SRC(%a6), %a0
12731
rts
12732
load_fpn1_5:
12733
fmovm.x &0x04, FP_SRC(%a6)
12734
lea FP_SRC(%a6), %a0
12735
rts
12736
load_fpn1_6:
12737
fmovm.x &0x02, FP_SRC(%a6)
12738
lea FP_SRC(%a6), %a0
12739
rts
12740
load_fpn1_7:
12741
fmovm.x &0x01, FP_SRC(%a6)
12742
lea FP_SRC(%a6), %a0
12743
rts
12744
12745
#############################################################################
12746
12747
#########################################################################
12748
# XDEF **************************************************************** #
12749
# load_fpn2(): load FP register value into FP_DST(a6). #
12750
# #
12751
# XREF **************************************************************** #
12752
# None #
12753
# #
12754
# INPUT *************************************************************** #
12755
# d0 = index of FP register to load #
12756
# #
12757
# OUTPUT ************************************************************** #
12758
# FP_DST(a6) = value loaded from FP register file #
12759
# #
12760
# ALGORITHM *********************************************************** #
12761
# Using the index in d0, load FP_DST(a6) with a number from the #
12762
# FP register file. #
12763
# #
12764
#########################################################################
12765
12766
global load_fpn2
12767
load_fpn2:
12768
mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769
jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
12770
12771
tbl_load_fpn2:
12772
short load_fpn2_0 - tbl_load_fpn2
12773
short load_fpn2_1 - tbl_load_fpn2
12774
short load_fpn2_2 - tbl_load_fpn2
12775
short load_fpn2_3 - tbl_load_fpn2
12776
short load_fpn2_4 - tbl_load_fpn2
12777
short load_fpn2_5 - tbl_load_fpn2
12778
short load_fpn2_6 - tbl_load_fpn2
12779
short load_fpn2_7 - tbl_load_fpn2
12780
12781
load_fpn2_0:
12782
mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783
mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784
mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785
lea FP_DST(%a6), %a0
12786
rts
12787
load_fpn2_1:
12788
mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789
mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790
mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791
lea FP_DST(%a6), %a0
12792
rts
12793
load_fpn2_2:
12794
fmovm.x &0x20, FP_DST(%a6)
12795
lea FP_DST(%a6), %a0
12796
rts
12797
load_fpn2_3:
12798
fmovm.x &0x10, FP_DST(%a6)
12799
lea FP_DST(%a6), %a0
12800
rts
12801
load_fpn2_4:
12802
fmovm.x &0x08, FP_DST(%a6)
12803
lea FP_DST(%a6), %a0
12804
rts
12805
load_fpn2_5:
12806
fmovm.x &0x04, FP_DST(%a6)
12807
lea FP_DST(%a6), %a0
12808
rts
12809
load_fpn2_6:
12810
fmovm.x &0x02, FP_DST(%a6)
12811
lea FP_DST(%a6), %a0
12812
rts
12813
load_fpn2_7:
12814
fmovm.x &0x01, FP_DST(%a6)
12815
lea FP_DST(%a6), %a0
12816
rts
12817
12818
#############################################################################
12819
12820
#########################################################################
12821
# XDEF **************************************************************** #
12822
# store_fpreg(): store an fp value to the fpreg designated d0. #
12823
# #
12824
# XREF **************************************************************** #
12825
# None #
12826
# #
12827
# INPUT *************************************************************** #
12828
# fp0 = extended precision value to store #
12829
# d0 = index of floating-point register #
12830
# #
12831
# OUTPUT ************************************************************** #
12832
# None #
12833
# #
12834
# ALGORITHM *********************************************************** #
12835
# Store the value in fp0 to the FP register designated by the #
12836
# value in d0. The FP number can be DENORM or SNAN so we have to be #
12837
# careful that we don't take an exception here. #
12838
# #
12839
#########################################################################
12840
12841
global store_fpreg
12842
store_fpreg:
12843
mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844
jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
12845
12846
tbl_store_fpreg:
12847
short store_fpreg_0 - tbl_store_fpreg
12848
short store_fpreg_1 - tbl_store_fpreg
12849
short store_fpreg_2 - tbl_store_fpreg
12850
short store_fpreg_3 - tbl_store_fpreg
12851
short store_fpreg_4 - tbl_store_fpreg
12852
short store_fpreg_5 - tbl_store_fpreg
12853
short store_fpreg_6 - tbl_store_fpreg
12854
short store_fpreg_7 - tbl_store_fpreg
12855
12856
store_fpreg_0:
12857
fmovm.x &0x80, EXC_FP0(%a6)
12858
rts
12859
store_fpreg_1:
12860
fmovm.x &0x80, EXC_FP1(%a6)
12861
rts
12862
store_fpreg_2:
12863
fmovm.x &0x01, -(%sp)
12864
fmovm.x (%sp)+, &0x20
12865
rts
12866
store_fpreg_3:
12867
fmovm.x &0x01, -(%sp)
12868
fmovm.x (%sp)+, &0x10
12869
rts
12870
store_fpreg_4:
12871
fmovm.x &0x01, -(%sp)
12872
fmovm.x (%sp)+, &0x08
12873
rts
12874
store_fpreg_5:
12875
fmovm.x &0x01, -(%sp)
12876
fmovm.x (%sp)+, &0x04
12877
rts
12878
store_fpreg_6:
12879
fmovm.x &0x01, -(%sp)
12880
fmovm.x (%sp)+, &0x02
12881
rts
12882
store_fpreg_7:
12883
fmovm.x &0x01, -(%sp)
12884
fmovm.x (%sp)+, &0x01
12885
rts
12886
12887
#########################################################################
12888
# XDEF **************************************************************** #
12889
# get_packed(): fetch a packed operand from memory and then #
12890
# convert it to a floating-point binary number. #
12891
# #
12892
# XREF **************************************************************** #
12893
# _dcalc_ea() - calculate the correct <ea> #
12894
# _mem_read() - fetch the packed operand from memory #
12895
# facc_in_x() - the fetch failed so jump to special exit code #
12896
# decbin() - convert packed to binary extended precision #
12897
# #
12898
# INPUT *************************************************************** #
12899
# None #
12900
# #
12901
# OUTPUT ************************************************************** #
12902
# If no failure on _mem_read(): #
12903
# FP_SRC(a6) = packed operand now as a binary FP number #
12904
# #
12905
# ALGORITHM *********************************************************** #
12906
# Get the correct <ea> which is the value on the exception stack #
12907
# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
12908
# Then, fetch the operand from memory. If the fetch fails, exit #
12909
# through facc_in_x(). #
12910
# If the packed operand is a ZERO,NAN, or INF, convert it to #
12911
# its binary representation here. Else, call decbin() which will #
12912
# convert the packed value to an extended precision binary value. #
12913
# #
12914
#########################################################################
12915
12916
# the stacked <ea> for packed is correct except for -(An).
12917
# the base reg must be updated for both -(An) and (An)+.
12918
global get_packed
12919
get_packed:
12920
mov.l &0xc,%d0 # packed is 12 bytes
12921
bsr.l _dcalc_ea # fetch <ea>; correct An
12922
12923
lea FP_SRC(%a6),%a1 # pass: ptr to super dst
12924
mov.l &0xc,%d0 # pass: 12 bytes
12925
bsr.l _dmem_read # read packed operand
12926
12927
tst.l %d1 # did dfetch fail?
12928
bne.l facc_in_x # yes
12929
12930
# The packed operand is an INF or a NAN if the exponent field is all ones.
12931
bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
12932
cmpi.w %d0,&0x7fff # INF or NAN?
12933
bne.b gp_try_zero # no
12934
rts # operand is an INF or NAN
12935
12936
# The packed operand is a zero if the mantissa is all zero, else it's
12937
# a normal packed op.
12938
gp_try_zero:
12939
mov.b 3+FP_SRC(%a6),%d0 # get byte 4
12940
andi.b &0x0f,%d0 # clear all but last nybble
12941
bne.b gp_not_spec # not a zero
12942
tst.l FP_SRC_HI(%a6) # is lw 2 zero?
12943
bne.b gp_not_spec # not a zero
12944
tst.l FP_SRC_LO(%a6) # is lw 3 zero?
12945
bne.b gp_not_spec # not a zero
12946
rts # operand is a ZERO
12947
gp_not_spec:
12948
lea FP_SRC(%a6),%a0 # pass: ptr to packed op
12949
bsr.l decbin # convert to extended
12950
fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
12951
rts
12952
12953
#########################################################################
12954
# decbin(): Converts normalized packed bcd value pointed to by register #
12955
# a0 to extended-precision value in fp0. #
12956
# #
12957
# INPUT *************************************************************** #
12958
# a0 = pointer to normalized packed bcd value #
12959
# #
12960
# OUTPUT ************************************************************** #
12961
# fp0 = exact fp representation of the packed bcd value. #
12962
# #
12963
# ALGORITHM *********************************************************** #
12964
# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
12965
# and NaN operands are dispatched without entering this routine) #
12966
# value in 68881/882 format at location (a0). #
12967
# #
12968
# A1. Convert the bcd exponent to binary by successive adds and #
12969
# muls. Set the sign according to SE. Subtract 16 to compensate #
12970
# for the mantissa which is to be interpreted as 17 integer #
12971
# digits, rather than 1 integer and 16 fraction digits. #
12972
# Note: this operation can never overflow. #
12973
# #
12974
# A2. Convert the bcd mantissa to binary by successive #
12975
# adds and muls in FP0. Set the sign according to SM. #
12976
# The mantissa digits will be converted with the decimal point #
12977
# assumed following the least-significant digit. #
12978
# Note: this operation can never overflow. #
12979
# #
12980
# A3. Count the number of leading/trailing zeros in the #
12981
# bcd string. If SE is positive, count the leading zeros; #
12982
# if negative, count the trailing zeros. Set the adjusted #
12983
# exponent equal to the exponent from A1 and the zero count #
12984
# added if SM = 1 and subtracted if SM = 0. Scale the #
12985
# mantissa the equivalent of forcing in the bcd value: #
12986
# #
12987
# SM = 0 a non-zero digit in the integer position #
12988
# SM = 1 a non-zero digit in Mant0, lsd of the fraction #
12989
# #
12990
# this will insure that any value, regardless of its #
12991
# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
12992
# consistently. #
12993
# #
12994
# A4. Calculate the factor 10^exp in FP1 using a table of #
12995
# 10^(2^n) values. To reduce the error in forming factors #
12996
# greater than 10^27, a directed rounding scheme is used with #
12997
# tables rounded to RN, RM, and RP, according to the table #
12998
# in the comments of the pwrten section. #
12999
# #
13000
# A5. Form the final binary number by scaling the mantissa by #
13001
# the exponent factor. This is done by multiplying the #
13002
# mantissa in FP0 by the factor in FP1 if the adjusted #
13003
# exponent sign is positive, and dividing FP0 by FP1 if #
13004
# it is negative. #
13005
# #
13006
# Clean up and return. Check if the final mul or div was inexact. #
13007
# If so, set INEX1 in USER_FPSR. #
13008
# #
13009
#########################################################################
13010
13011
#
13012
# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013
# to nearest, minus, and plus, respectively. The tables include
13014
# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
13015
# is required until the power is greater than 27, however, all
13016
# tables include the first 5 for ease of indexing.
13017
#
13018
RTABLE:
13019
byte 0,0,0,0
13020
byte 2,3,2,3
13021
byte 2,3,3,2
13022
byte 3,2,2,3
13023
13024
set FNIBS,7
13025
set FSTRT,0
13026
13027
set ESTRT,4
13028
set EDIGITS,2
13029
13030
global decbin
13031
decbin:
13032
mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033
mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034
mov.l 0x8(%a0),FP_SCR0_LO(%a6)
13035
13036
lea FP_SCR0(%a6),%a0
13037
13038
movm.l &0x3c00,-(%sp) # save d2-d5
13039
fmovm.x &0x1,-(%sp) # save fp1
13040
#
13041
# Calculate exponent:
13042
# 1. Copy bcd value in memory for use as a working copy.
13043
# 2. Calculate absolute value of exponent in d1 by mul and add.
13044
# 3. Correct for exponent sign.
13045
# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046
# (i.e., all digits assumed left of the decimal point.)
13047
#
13048
# Register usage:
13049
#
13050
# calc_e:
13051
# (*) d0: temp digit storage
13052
# (*) d1: accumulator for binary exponent
13053
# (*) d2: digit count
13054
# (*) d3: offset pointer
13055
# ( ) d4: first word of bcd
13056
# ( ) a0: pointer to working bcd value
13057
# ( ) a6: pointer to original bcd value
13058
# (*) FP_SCR1: working copy of original bcd value
13059
# (*) L_SCR1: copy of original exponent word
13060
#
13061
calc_e:
13062
mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
13063
mov.l &ESTRT,%d3 # counter to pick up digits
13064
mov.l (%a0),%d4 # get first word of bcd
13065
clr.l %d1 # zero d1 for accumulator
13066
e_gd:
13067
mulu.l &0xa,%d1 # mul partial product by one digit place
13068
bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
13069
add.l %d0,%d1 # d1 = d1 + d0
13070
addq.b &4,%d3 # advance d3 to the next digit
13071
dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
13072
btst &30,%d4 # get SE
13073
beq.b e_pos # don't negate if pos
13074
neg.l %d1 # negate before subtracting
13075
e_pos:
13076
sub.l &16,%d1 # sub to compensate for shift of mant
13077
bge.b e_save # if still pos, do not neg
13078
neg.l %d1 # now negative, make pos and set SE
13079
or.l &0x40000000,%d4 # set SE in d4,
13080
or.l &0x40000000,(%a0) # and in working bcd
13081
e_save:
13082
mov.l %d1,-(%sp) # save exp on stack
13083
#
13084
#
13085
# Calculate mantissa:
13086
# 1. Calculate absolute value of mantissa in fp0 by mul and add.
13087
# 2. Correct for mantissa sign.
13088
# (i.e., all digits assumed left of the decimal point.)
13089
#
13090
# Register usage:
13091
#
13092
# calc_m:
13093
# (*) d0: temp digit storage
13094
# (*) d1: lword counter
13095
# (*) d2: digit count
13096
# (*) d3: offset pointer
13097
# ( ) d4: words 2 and 3 of bcd
13098
# ( ) a0: pointer to working bcd value
13099
# ( ) a6: pointer to original bcd value
13100
# (*) fp0: mantissa accumulator
13101
# ( ) FP_SCR1: working copy of original bcd value
13102
# ( ) L_SCR1: copy of original exponent word
13103
#
13104
calc_m:
13105
mov.l &1,%d1 # word counter, init to 1
13106
fmov.s &0x00000000,%fp0 # accumulator
13107
#
13108
#
13109
# Since the packed number has a long word between the first & second parts,
13110
# get the integer digit then skip down & get the rest of the
13111
# mantissa. We will unroll the loop once.
13112
#
13113
bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
13114
fadd.b %d0,%fp0 # add digit to sum in fp0
13115
#
13116
#
13117
# Get the rest of the mantissa.
13118
#
13119
loadlw:
13120
mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
13121
mov.l &FSTRT,%d3 # counter to pick up digits
13122
mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
13123
md2b:
13124
fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
13125
bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
13126
fadd.b %d0,%fp0 # fp0 = fp0 + digit
13127
#
13128
#
13129
# If all the digits (8) in that long word have been converted (d2=0),
13130
# then inc d1 (=2) to point to the next long word and reset d3 to 0
13131
# to initialize the digit offset, and set d2 to 7 for the digit count;
13132
# else continue with this long word.
13133
#
13134
addq.b &4,%d3 # advance d3 to the next digit
13135
dbf.w %d2,md2b # check for last digit in this lw
13136
nextlw:
13137
addq.l &1,%d1 # inc lw pointer in mantissa
13138
cmp.l %d1,&2 # test for last lw
13139
ble.b loadlw # if not, get last one
13140
#
13141
# Check the sign of the mant and make the value in fp0 the same sign.
13142
#
13143
m_sign:
13144
btst &31,(%a0) # test sign of the mantissa
13145
beq.b ap_st_z # if clear, go to append/strip zeros
13146
fneg.x %fp0 # if set, negate fp0
13147
#
13148
# Append/strip zeros:
13149
#
13150
# For adjusted exponents which have an absolute value greater than 27*,
13151
# this routine calculates the amount needed to normalize the mantissa
13152
# for the adjusted exponent. That number is subtracted from the exp
13153
# if the exp was positive, and added if it was negative. The purpose
13154
# of this is to reduce the value of the exponent and the possibility
13155
# of error in calculation of pwrten.
13156
#
13157
# 1. Branch on the sign of the adjusted exponent.
13158
# 2p.(positive exp)
13159
# 2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160
# 3. Add one for each zero encountered until a non-zero digit.
13161
# 4. Subtract the count from the exp.
13162
# 5. Check if the exp has crossed zero in #3 above; make the exp abs
13163
# and set SE.
13164
# 6. Multiply the mantissa by 10**count.
13165
# 2n.(negative exp)
13166
# 2. Check the digits in lwords 3 and 2 in decending order.
13167
# 3. Add one for each zero encountered until a non-zero digit.
13168
# 4. Add the count to the exp.
13169
# 5. Check if the exp has crossed zero in #3 above; clear SE.
13170
# 6. Divide the mantissa by 10**count.
13171
#
13172
# *Why 27? If the adjusted exponent is within -28 < expA < 28, than
13173
# any adjustment due to append/strip zeros will drive the resultane
13174
# exponent towards zero. Since all pwrten constants with a power
13175
# of 27 or less are exact, there is no need to use this routine to
13176
# attempt to lessen the resultant exponent.
13177
#
13178
# Register usage:
13179
#
13180
# ap_st_z:
13181
# (*) d0: temp digit storage
13182
# (*) d1: zero count
13183
# (*) d2: digit count
13184
# (*) d3: offset pointer
13185
# ( ) d4: first word of bcd
13186
# (*) d5: lword counter
13187
# ( ) a0: pointer to working bcd value
13188
# ( ) FP_SCR1: working copy of original bcd value
13189
# ( ) L_SCR1: copy of original exponent word
13190
#
13191
#
13192
# First check the absolute value of the exponent to see if this
13193
# routine is necessary. If so, then check the sign of the exponent
13194
# and do append (+) or strip (-) zeros accordingly.
13195
# This section handles a positive adjusted exponent.
13196
#
13197
ap_st_z:
13198
mov.l (%sp),%d1 # load expA for range test
13199
cmp.l %d1,&27 # test is with 27
13200
ble.w pwrten # if abs(expA) <28, skip ap/st zeros
13201
btst &30,(%a0) # check sign of exp
13202
bne.b ap_st_n # if neg, go to neg side
13203
clr.l %d1 # zero count reg
13204
mov.l (%a0),%d4 # load lword 1 to d4
13205
bfextu %d4{&28:&4},%d0 # get M16 in d0
13206
bne.b ap_p_fx # if M16 is non-zero, go fix exp
13207
addq.l &1,%d1 # inc zero count
13208
mov.l &1,%d5 # init lword counter
13209
mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
13210
bne.b ap_p_cl # if lw 2 is zero, skip it
13211
addq.l &8,%d1 # and inc count by 8
13212
addq.l &1,%d5 # inc lword counter
13213
mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
13214
ap_p_cl:
13215
clr.l %d3 # init offset reg
13216
mov.l &7,%d2 # init digit counter
13217
ap_p_gd:
13218
bfextu %d4{%d3:&4},%d0 # get digit
13219
bne.b ap_p_fx # if non-zero, go to fix exp
13220
addq.l &4,%d3 # point to next digit
13221
addq.l &1,%d1 # inc digit counter
13222
dbf.w %d2,ap_p_gd # get next digit
13223
ap_p_fx:
13224
mov.l %d1,%d0 # copy counter to d2
13225
mov.l (%sp),%d1 # get adjusted exp from memory
13226
sub.l %d0,%d1 # subtract count from exp
13227
bge.b ap_p_fm # if still pos, go to pwrten
13228
neg.l %d1 # now its neg; get abs
13229
mov.l (%a0),%d4 # load lword 1 to d4
13230
or.l &0x40000000,%d4 # and set SE in d4
13231
or.l &0x40000000,(%a0) # and in memory
13232
#
13233
# Calculate the mantissa multiplier to compensate for the striping of
13234
# zeros from the mantissa.
13235
#
13236
ap_p_fm:
13237
lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13238
clr.l %d3 # init table index
13239
fmov.s &0x3f800000,%fp1 # init fp1 to 1
13240
mov.l &3,%d2 # init d2 to count bits in counter
13241
ap_p_el:
13242
asr.l &1,%d0 # shift lsb into carry
13243
bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
13244
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13245
ap_p_en:
13246
add.l &12,%d3 # inc d3 to next rtable entry
13247
tst.l %d0 # check if d0 is zero
13248
bne.b ap_p_el # if not, get next bit
13249
fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
13250
bra.b pwrten # go calc pwrten
13251
#
13252
# This section handles a negative adjusted exponent.
13253
#
13254
ap_st_n:
13255
clr.l %d1 # clr counter
13256
mov.l &2,%d5 # set up d5 to point to lword 3
13257
mov.l (%a0,%d5.L*4),%d4 # get lword 3
13258
bne.b ap_n_cl # if not zero, check digits
13259
sub.l &1,%d5 # dec d5 to point to lword 2
13260
addq.l &8,%d1 # inc counter by 8
13261
mov.l (%a0,%d5.L*4),%d4 # get lword 2
13262
ap_n_cl:
13263
mov.l &28,%d3 # point to last digit
13264
mov.l &7,%d2 # init digit counter
13265
ap_n_gd:
13266
bfextu %d4{%d3:&4},%d0 # get digit
13267
bne.b ap_n_fx # if non-zero, go to exp fix
13268
subq.l &4,%d3 # point to previous digit
13269
addq.l &1,%d1 # inc digit counter
13270
dbf.w %d2,ap_n_gd # get next digit
13271
ap_n_fx:
13272
mov.l %d1,%d0 # copy counter to d0
13273
mov.l (%sp),%d1 # get adjusted exp from memory
13274
sub.l %d0,%d1 # subtract count from exp
13275
bgt.b ap_n_fm # if still pos, go fix mantissa
13276
neg.l %d1 # take abs of exp and clr SE
13277
mov.l (%a0),%d4 # load lword 1 to d4
13278
and.l &0xbfffffff,%d4 # and clr SE in d4
13279
and.l &0xbfffffff,(%a0) # and in memory
13280
#
13281
# Calculate the mantissa multiplier to compensate for the appending of
13282
# zeros to the mantissa.
13283
#
13284
ap_n_fm:
13285
lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
13286
clr.l %d3 # init table index
13287
fmov.s &0x3f800000,%fp1 # init fp1 to 1
13288
mov.l &3,%d2 # init d2 to count bits in counter
13289
ap_n_el:
13290
asr.l &1,%d0 # shift lsb into carry
13291
bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
13292
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13293
ap_n_en:
13294
add.l &12,%d3 # inc d3 to next rtable entry
13295
tst.l %d0 # check if d0 is zero
13296
bne.b ap_n_el # if not, get next bit
13297
fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
13298
#
13299
#
13300
# Calculate power-of-ten factor from adjusted and shifted exponent.
13301
#
13302
# Register usage:
13303
#
13304
# pwrten:
13305
# (*) d0: temp
13306
# ( ) d1: exponent
13307
# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308
# (*) d3: FPCR work copy
13309
# ( ) d4: first word of bcd
13310
# (*) a1: RTABLE pointer
13311
# calc_p:
13312
# (*) d0: temp
13313
# ( ) d1: exponent
13314
# (*) d3: PWRTxx table index
13315
# ( ) a0: pointer to working copy of bcd
13316
# (*) a1: PWRTxx pointer
13317
# (*) fp1: power-of-ten accumulator
13318
#
13319
# Pwrten calculates the exponent factor in the selected rounding mode
13320
# according to the following table:
13321
#
13322
# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
13323
#
13324
# ANY ANY RN RN
13325
#
13326
# + + RP RP
13327
# - + RP RM
13328
# + - RP RM
13329
# - - RP RP
13330
#
13331
# + + RM RM
13332
# - + RM RP
13333
# + - RM RP
13334
# - - RM RM
13335
#
13336
# + + RZ RM
13337
# - + RZ RM
13338
# + - RZ RP
13339
# - - RZ RP
13340
#
13341
#
13342
pwrten:
13343
mov.l USER_FPCR(%a6),%d3 # get user's FPCR
13344
bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
13345
mov.l (%a0),%d4 # reload 1st bcd word to d4
13346
asl.l &2,%d2 # format d2 to be
13347
bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
13348
add.l %d0,%d2 # in d2 as index into RTABLE
13349
lea.l RTABLE(%pc),%a1 # load rtable base
13350
mov.b (%a1,%d2),%d0 # load new rounding bits from table
13351
clr.l %d3 # clear d3 to force no exc and extended
13352
bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
13353
fmov.l %d3,%fpcr # write new FPCR
13354
asr.l &1,%d0 # write correct PTENxx table
13355
bcc.b not_rp # to a1
13356
lea.l PTENRP(%pc),%a1 # it is RP
13357
bra.b calc_p # go to init section
13358
not_rp:
13359
asr.l &1,%d0 # keep checking
13360
bcc.b not_rm
13361
lea.l PTENRM(%pc),%a1 # it is RM
13362
bra.b calc_p # go to init section
13363
not_rm:
13364
lea.l PTENRN(%pc),%a1 # it is RN
13365
calc_p:
13366
mov.l %d1,%d0 # copy exp to d0;use d0
13367
bpl.b no_neg # if exp is negative,
13368
neg.l %d0 # invert it
13369
or.l &0x40000000,(%a0) # and set SE bit
13370
no_neg:
13371
clr.l %d3 # table index
13372
fmov.s &0x3f800000,%fp1 # init fp1 to 1
13373
e_loop:
13374
asr.l &1,%d0 # shift next bit into carry
13375
bcc.b e_next # if zero, skip the mul
13376
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13377
e_next:
13378
add.l &12,%d3 # inc d3 to next rtable entry
13379
tst.l %d0 # check if d0 is zero
13380
bne.b e_loop # not zero, continue shifting
13381
#
13382
#
13383
# Check the sign of the adjusted exp and make the value in fp0 the
13384
# same sign. If the exp was pos then multiply fp1*fp0;
13385
# else divide fp0/fp1.
13386
#
13387
# Register Usage:
13388
# norm:
13389
# ( ) a0: pointer to working bcd value
13390
# (*) fp0: mantissa accumulator
13391
# ( ) fp1: scaling factor - 10**(abs(exp))
13392
#
13393
pnorm:
13394
btst &30,(%a0) # test the sign of the exponent
13395
beq.b mul # if clear, go to multiply
13396
div:
13397
fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
13398
bra.b end_dec
13399
mul:
13400
fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
13401
#
13402
#
13403
# Clean up and return with result in fp0.
13404
#
13405
# If the final mul/div in decbin incurred an inex exception,
13406
# it will be inex2, but will be reported as inex1 by get_op.
13407
#
13408
end_dec:
13409
fmov.l %fpsr,%d0 # get status register
13410
bclr &inex2_bit+8,%d0 # test for inex2 and clear it
13411
beq.b no_exc # skip this if no exc
13412
ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413
no_exc:
13414
add.l &0x4,%sp # clear 1 lw param
13415
fmovm.x (%sp)+,&0x40 # restore fp1
13416
movm.l (%sp)+,&0x3c # restore d2-d5
13417
fmov.l &0x0,%fpcr
13418
fmov.l &0x0,%fpsr
13419
rts
13420
13421
#########################################################################
13422
# bindec(): Converts an input in extended precision format to bcd format#
13423
# #
13424
# INPUT *************************************************************** #
13425
# a0 = pointer to the input extended precision value in memory. #
13426
# the input may be either normalized, unnormalized, or #
13427
# denormalized. #
13428
# d0 = contains the k-factor sign-extended to 32-bits. #
13429
# #
13430
# OUTPUT ************************************************************** #
13431
# FP_SCR0(a6) = bcd format result on the stack. #
13432
# #
13433
# ALGORITHM *********************************************************** #
13434
# #
13435
# A1. Set RM and size ext; Set SIGMA = sign of input. #
13436
# The k-factor is saved for use in d7. Clear the #
13437
# BINDEC_FLG for separating normalized/denormalized #
13438
# input. If input is unnormalized or denormalized, #
13439
# normalize it. #
13440
# #
13441
# A2. Set X = abs(input). #
13442
# #
13443
# A3. Compute ILOG. #
13444
# ILOG is the log base 10 of the input value. It is #
13445
# approximated by adding e + 0.f when the original #
13446
# value is viewed as 2^^e * 1.f in extended precision. #
13447
# This value is stored in d6. #
13448
# #
13449
# A4. Clr INEX bit. #
13450
# The operation in A3 above may have set INEX2. #
13451
# #
13452
# A5. Set ICTR = 0; #
13453
# ICTR is a flag used in A13. It must be set before the #
13454
# loop entry A6. #
13455
# #
13456
# A6. Calculate LEN. #
13457
# LEN is the number of digits to be displayed. The #
13458
# k-factor can dictate either the total number of digits, #
13459
# if it is a positive number, or the number of digits #
13460
# after the decimal point which are to be included as #
13461
# significant. See the 68882 manual for examples. #
13462
# If LEN is computed to be greater than 17, set OPERR in #
13463
# USER_FPSR. LEN is stored in d4. #
13464
# #
13465
# A7. Calculate SCALE. #
13466
# SCALE is equal to 10^ISCALE, where ISCALE is the number #
13467
# of decimal places needed to insure LEN integer digits #
13468
# in the output before conversion to bcd. LAMBDA is the #
13469
# sign of ISCALE, used in A9. Fp1 contains #
13470
# 10^^(abs(ISCALE)) using a rounding mode which is a #
13471
# function of the original rounding mode and the signs #
13472
# of ISCALE and X. A table is given in the code. #
13473
# #
13474
# A8. Clr INEX; Force RZ. #
13475
# The operation in A3 above may have set INEX2. #
13476
# RZ mode is forced for the scaling operation to insure #
13477
# only one rounding error. The grs bits are collected in #
13478
# the INEX flag for use in A10. #
13479
# #
13480
# A9. Scale X -> Y. #
13481
# The mantissa is scaled to the desired number of #
13482
# significant digits. The excess digits are collected #
13483
# in INEX2. #
13484
# #
13485
# A10. Or in INEX. #
13486
# If INEX is set, round error occurred. This is #
13487
# compensated for by 'or-ing' in the INEX2 flag to #
13488
# the lsb of Y. #
13489
# #
13490
# A11. Restore original FPCR; set size ext. #
13491
# Perform FINT operation in the user's rounding mode. #
13492
# Keep the size to extended. #
13493
# #
13494
# A12. Calculate YINT = FINT(Y) according to user's rounding #
13495
# mode. The FPSP routine sintd0 is used. The output #
13496
# is in fp0. #
13497
# #
13498
# A13. Check for LEN digits. #
13499
# If the int operation results in more than LEN digits, #
13500
# or less than LEN -1 digits, adjust ILOG and repeat from #
13501
# A6. This test occurs only on the first pass. If the #
13502
# result is exactly 10^LEN, decrement ILOG and divide #
13503
# the mantissa by 10. #
13504
# #
13505
# A14. Convert the mantissa to bcd. #
13506
# The binstr routine is used to convert the LEN digit #
13507
# mantissa to bcd in memory. The input to binstr is #
13508
# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
13509
# such that the decimal point is to the left of bit 63. #
13510
# The bcd digits are stored in the correct position in #
13511
# the final string area in memory. #
13512
# #
13513
# A15. Convert the exponent to bcd. #
13514
# As in A14 above, the exp is converted to bcd and the #
13515
# digits are stored in the final string. #
13516
# Test the length of the final exponent string. If the #
13517
# length is 4, set operr. #
13518
# #
13519
# A16. Write sign bits to final string. #
13520
# #
13521
#########################################################################
13522
13523
set BINDEC_FLG, EXC_TEMP # DENORM flag
13524
13525
# Constants in extended precision
13526
PLOG2:
13527
long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528
PLOG2UP1:
13529
long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530
13531
# Constants in single precision
13532
FONE:
13533
long 0x3F800000,0x00000000,0x00000000,0x00000000
13534
FTWO:
13535
long 0x40000000,0x00000000,0x00000000,0x00000000
13536
FTEN:
13537
long 0x41200000,0x00000000,0x00000000,0x00000000
13538
F4933:
13539
long 0x459A2800,0x00000000,0x00000000,0x00000000
13540
13541
RBDTBL:
13542
byte 0,0,0,0
13543
byte 3,3,2,2
13544
byte 3,2,2,3
13545
byte 2,3,3,2
13546
13547
# Implementation Notes:
13548
#
13549
# The registers are used as follows:
13550
#
13551
# d0: scratch; LEN input to binstr
13552
# d1: scratch
13553
# d2: upper 32-bits of mantissa for binstr
13554
# d3: scratch;lower 32-bits of mantissa for binstr
13555
# d4: LEN
13556
# d5: LAMBDA/ICTR
13557
# d6: ILOG
13558
# d7: k-factor
13559
# a0: ptr for original operand/final result
13560
# a1: scratch pointer
13561
# a2: pointer to FP_X; abs(original value) in ext
13562
# fp0: scratch
13563
# fp1: scratch
13564
# fp2: scratch
13565
# F_SCR1:
13566
# F_SCR2:
13567
# L_SCR1:
13568
# L_SCR2:
13569
13570
global bindec
13571
bindec:
13572
movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
13573
fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
13574
13575
# A1. Set RM and size ext. Set SIGMA = sign input;
13576
# The k-factor is saved for use in d7. Clear BINDEC_FLG for
13577
# separating normalized/denormalized input. If the input
13578
# is a denormalized number, set the BINDEC_FLG memory word
13579
# to signal denorm. If the input is unnormalized, normalize
13580
# the input and test for denormalized result.
13581
#
13582
fmov.l &rm_mode*0x10,%fpcr # set RM and ext
13583
mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
13584
mov.l %d0,%d7 # move k-factor to d7
13585
13586
clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
13587
cmpi.b STAG(%a6),&DENORM # is input a DENORM?
13588
bne.w A2_str # no; input is a NORM
13589
13590
#
13591
# Normalize the denorm
13592
#
13593
un_de_norm:
13594
mov.w (%a0),%d0
13595
and.w &0x7fff,%d0 # strip sign of normalized exp
13596
mov.l 4(%a0),%d1
13597
mov.l 8(%a0),%d2
13598
norm_loop:
13599
sub.w &1,%d0
13600
lsl.l &1,%d2
13601
roxl.l &1,%d1
13602
tst.l %d1
13603
bge.b norm_loop
13604
#
13605
# Test if the normalized input is denormalized
13606
#
13607
tst.w %d0
13608
bgt.b pos_exp # if greater than zero, it is a norm
13609
st BINDEC_FLG(%a6) # set flag for denorm
13610
pos_exp:
13611
and.w &0x7fff,%d0 # strip sign of normalized exp
13612
mov.w %d0,(%a0)
13613
mov.l %d1,4(%a0)
13614
mov.l %d2,8(%a0)
13615
13616
# A2. Set X = abs(input).
13617
#
13618
A2_str:
13619
mov.l (%a0),FP_SCR1(%a6) # move input to work space
13620
mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
13621
mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
13622
and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
13623
13624
# A3. Compute ILOG.
13625
# ILOG is the log base 10 of the input value. It is approx-
13626
# imated by adding e + 0.f when the original value is viewed
13627
# as 2^^e * 1.f in extended precision. This value is stored
13628
# in d6.
13629
#
13630
# Register usage:
13631
# Input/Output
13632
# d0: k-factor/exponent
13633
# d2: x/x
13634
# d3: x/x
13635
# d4: x/x
13636
# d5: x/x
13637
# d6: x/ILOG
13638
# d7: k-factor/Unchanged
13639
# a0: ptr for original operand/final result
13640
# a1: x/x
13641
# a2: x/x
13642
# fp0: x/float(ILOG)
13643
# fp1: x/x
13644
# fp2: x/x
13645
# F_SCR1:x/x
13646
# F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647
# L_SCR1:x/x
13648
# L_SCR2:first word of X packed/Unchanged
13649
13650
tst.b BINDEC_FLG(%a6) # check for denorm
13651
beq.b A3_cont # if clr, continue with norm
13652
mov.l &-4933,%d6 # force ILOG = -4933
13653
bra.b A4_str
13654
A3_cont:
13655
mov.w FP_SCR1(%a6),%d0 # move exp to d0
13656
mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
13657
fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
13658
sub.w &0x3fff,%d0 # strip off bias
13659
fadd.w %d0,%fp0 # add in exp
13660
fsub.s FONE(%pc),%fp0 # subtract off 1.0
13661
fbge.w pos_res # if pos, branch
13662
fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
13663
fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13664
bra.b A4_str # go move out ILOG
13665
pos_res:
13666
fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
13667
fmov.l %fp0,%d6 # put ILOG in d6 as a lword
13668
13669
13670
# A4. Clr INEX bit.
13671
# The operation in A3 above may have set INEX2.
13672
13673
A4_str:
13674
fmov.l &0,%fpsr # zero all of fpsr - nothing needed
13675
13676
13677
# A5. Set ICTR = 0;
13678
# ICTR is a flag used in A13. It must be set before the
13679
# loop entry A6. The lower word of d5 is used for ICTR.
13680
13681
clr.w %d5 # clear ICTR
13682
13683
# A6. Calculate LEN.
13684
# LEN is the number of digits to be displayed. The k-factor
13685
# can dictate either the total number of digits, if it is
13686
# a positive number, or the number of digits after the
13687
# original decimal point which are to be included as
13688
# significant. See the 68882 manual for examples.
13689
# If LEN is computed to be greater than 17, set OPERR in
13690
# USER_FPSR. LEN is stored in d4.
13691
#
13692
# Register usage:
13693
# Input/Output
13694
# d0: exponent/Unchanged
13695
# d2: x/x/scratch
13696
# d3: x/x
13697
# d4: exc picture/LEN
13698
# d5: ICTR/Unchanged
13699
# d6: ILOG/Unchanged
13700
# d7: k-factor/Unchanged
13701
# a0: ptr for original operand/final result
13702
# a1: x/x
13703
# a2: x/x
13704
# fp0: float(ILOG)/Unchanged
13705
# fp1: x/x
13706
# fp2: x/x
13707
# F_SCR1:x/x
13708
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709
# L_SCR1:x/x
13710
# L_SCR2:first word of X packed/Unchanged
13711
13712
A6_str:
13713
tst.l %d7 # branch on sign of k
13714
ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
13715
mov.l %d7,%d4 # if k > 0, LEN = k
13716
bra.b len_ck # skip to LEN check
13717
k_neg:
13718
mov.l %d6,%d4 # first load ILOG to d4
13719
sub.l %d7,%d4 # subtract off k
13720
addq.l &1,%d4 # add in the 1
13721
len_ck:
13722
tst.l %d4 # LEN check: branch on sign of LEN
13723
ble.b LEN_ng # if neg, set LEN = 1
13724
cmp.l %d4,&17 # test if LEN > 17
13725
ble.b A7_str # if not, forget it
13726
mov.l &17,%d4 # set max LEN = 17
13727
tst.l %d7 # if negative, never set OPERR
13728
ble.b A7_str # if positive, continue
13729
or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
13730
bra.b A7_str # finished here
13731
LEN_ng:
13732
mov.l &1,%d4 # min LEN is 1
13733
13734
13735
# A7. Calculate SCALE.
13736
# SCALE is equal to 10^ISCALE, where ISCALE is the number
13737
# of decimal places needed to insure LEN integer digits
13738
# in the output before conversion to bcd. LAMBDA is the sign
13739
# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
13740
# the rounding mode as given in the following table (see
13741
# Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742
# of opposite sign in bindec.sa from Coonen).
13743
#
13744
# Initial USE
13745
# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
13746
# ----------------------------------------------
13747
# RN 00 0 0 00/0 RN
13748
# RN 00 0 1 00/0 RN
13749
# RN 00 1 0 00/0 RN
13750
# RN 00 1 1 00/0 RN
13751
# RZ 01 0 0 11/3 RP
13752
# RZ 01 0 1 11/3 RP
13753
# RZ 01 1 0 10/2 RM
13754
# RZ 01 1 1 10/2 RM
13755
# RM 10 0 0 11/3 RP
13756
# RM 10 0 1 10/2 RM
13757
# RM 10 1 0 10/2 RM
13758
# RM 10 1 1 11/3 RP
13759
# RP 11 0 0 10/2 RM
13760
# RP 11 0 1 11/3 RP
13761
# RP 11 1 0 11/3 RP
13762
# RP 11 1 1 10/2 RM
13763
#
13764
# Register usage:
13765
# Input/Output
13766
# d0: exponent/scratch - final is 0
13767
# d2: x/0 or 24 for A9
13768
# d3: x/scratch - offset ptr into PTENRM array
13769
# d4: LEN/Unchanged
13770
# d5: 0/ICTR:LAMBDA
13771
# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772
# d7: k-factor/Unchanged
13773
# a0: ptr for original operand/final result
13774
# a1: x/ptr to PTENRM array
13775
# a2: x/x
13776
# fp0: float(ILOG)/Unchanged
13777
# fp1: x/10^ISCALE
13778
# fp2: x/x
13779
# F_SCR1:x/x
13780
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781
# L_SCR1:x/x
13782
# L_SCR2:first word of X packed/Unchanged
13783
13784
A7_str:
13785
tst.l %d7 # test sign of k
13786
bgt.b k_pos # if pos and > 0, skip this
13787
cmp.l %d7,%d6 # test k - ILOG
13788
blt.b k_pos # if ILOG >= k, skip this
13789
mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
13790
k_pos:
13791
mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
13792
addq.l &1,%d0 # add the 1
13793
sub.l %d4,%d0 # sub off LEN
13794
swap %d5 # use upper word of d5 for LAMBDA
13795
clr.w %d5 # set it zero initially
13796
clr.w %d2 # set up d2 for very small case
13797
tst.l %d0 # test sign of ISCALE
13798
bge.b iscale # if pos, skip next inst
13799
addq.w &1,%d5 # if neg, set LAMBDA true
13800
cmp.l %d0,&0xffffecd4 # test iscale <= -4908
13801
bgt.b no_inf # if false, skip rest
13802
add.l &24,%d0 # add in 24 to iscale
13803
mov.l &24,%d2 # put 24 in d2 for A9
13804
no_inf:
13805
neg.l %d0 # and take abs of ISCALE
13806
iscale:
13807
fmov.s FONE(%pc),%fp1 # init fp1 to 1
13808
bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
13809
lsl.w &1,%d1 # put them in bits 2:1
13810
add.w %d5,%d1 # add in LAMBDA
13811
lsl.w &1,%d1 # put them in bits 3:1
13812
tst.l L_SCR2(%a6) # test sign of original x
13813
bge.b x_pos # if pos, don't set bit 0
13814
addq.l &1,%d1 # if neg, set bit 0
13815
x_pos:
13816
lea.l RBDTBL(%pc),%a2 # load rbdtbl base
13817
mov.b (%a2,%d1),%d3 # load d3 with new rmode
13818
lsl.l &4,%d3 # put bits in proper position
13819
fmov.l %d3,%fpcr # load bits into fpu
13820
lsr.l &4,%d3 # put bits in proper position
13821
tst.b %d3 # decode new rmode for pten table
13822
bne.b not_rn # if zero, it is RN
13823
lea.l PTENRN(%pc),%a1 # load a1 with RN table base
13824
bra.b rmode # exit decode
13825
not_rn:
13826
lsr.b &1,%d3 # get lsb in carry
13827
bcc.b not_rp2 # if carry clear, it is RM
13828
lea.l PTENRP(%pc),%a1 # load a1 with RP table base
13829
bra.b rmode # exit decode
13830
not_rp2:
13831
lea.l PTENRM(%pc),%a1 # load a1 with RM table base
13832
rmode:
13833
clr.l %d3 # clr table index
13834
e_loop2:
13835
lsr.l &1,%d0 # shift next bit into carry
13836
bcc.b e_next2 # if zero, skip the mul
13837
fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
13838
e_next2:
13839
add.l &12,%d3 # inc d3 to next pwrten table entry
13840
tst.l %d0 # test if ISCALE is zero
13841
bne.b e_loop2 # if not, loop
13842
13843
# A8. Clr INEX; Force RZ.
13844
# The operation in A3 above may have set INEX2.
13845
# RZ mode is forced for the scaling operation to insure
13846
# only one rounding error. The grs bits are collected in
13847
# the INEX flag for use in A10.
13848
#
13849
# Register usage:
13850
# Input/Output
13851
13852
fmov.l &0,%fpsr # clr INEX
13853
fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
13854
13855
# A9. Scale X -> Y.
13856
# The mantissa is scaled to the desired number of significant
13857
# digits. The excess digits are collected in INEX2. If mul,
13858
# Check d2 for excess 10 exponential value. If not zero,
13859
# the iscale value would have caused the pwrten calculation
13860
# to overflow. Only a negative iscale can cause this, so
13861
# multiply by 10^(d2), which is now only allowed to be 24,
13862
# with a multiply by 10^8 and 10^16, which is exact since
13863
# 10^24 is exact. If the input was denormalized, we must
13864
# create a busy stack frame with the mul command and the
13865
# two operands, and allow the fpu to complete the multiply.
13866
#
13867
# Register usage:
13868
# Input/Output
13869
# d0: FPCR with RZ mode/Unchanged
13870
# d2: 0 or 24/unchanged
13871
# d3: x/x
13872
# d4: LEN/Unchanged
13873
# d5: ICTR:LAMBDA
13874
# d6: ILOG/Unchanged
13875
# d7: k-factor/Unchanged
13876
# a0: ptr for original operand/final result
13877
# a1: ptr to PTENRM array/Unchanged
13878
# a2: x/x
13879
# fp0: float(ILOG)/X adjusted for SCALE (Y)
13880
# fp1: 10^ISCALE/Unchanged
13881
# fp2: x/x
13882
# F_SCR1:x/x
13883
# F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884
# L_SCR1:x/x
13885
# L_SCR2:first word of X packed/Unchanged
13886
13887
A9_str:
13888
fmov.x (%a0),%fp0 # load X from memory
13889
fabs.x %fp0 # use abs(X)
13890
tst.w %d5 # LAMBDA is in lower word of d5
13891
bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
13892
fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
13893
bra.w A10_st # branch to A10
13894
13895
sc_mul:
13896
tst.b BINDEC_FLG(%a6) # check for denorm
13897
beq.w A9_norm # if norm, continue with mul
13898
13899
# for DENORM, we must calculate:
13900
# fp0 = input_op * 10^ISCALE * 10^24
13901
# since the input operand is a DENORM, we can't multiply it directly.
13902
# so, we do the multiplication of the exponents and mantissas separately.
13903
# in this way, we avoid underflow on intermediate stages of the
13904
# multiplication and guarantee a result without exception.
13905
fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
13906
13907
mov.w (%sp),%d3 # grab exponent
13908
andi.w &0x7fff,%d3 # clear sign
13909
ori.w &0x8000,(%a0) # make DENORM exp negative
13910
add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
13911
subi.w &0x3fff,%d3 # subtract BIAS
13912
add.w 36(%a1),%d3
13913
subi.w &0x3fff,%d3 # subtract BIAS
13914
add.w 48(%a1),%d3
13915
subi.w &0x3fff,%d3 # subtract BIAS
13916
13917
bmi.w sc_mul_err # is result is DENORM, punt!!!
13918
13919
andi.w &0x8000,(%sp) # keep sign
13920
or.w %d3,(%sp) # insert new exponent
13921
andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
13922
mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
13923
mov.l 0x4(%a0),-(%sp)
13924
mov.l &0x3fff0000,-(%sp) # force exp to zero
13925
fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
13926
fmul.x (%sp)+,%fp0
13927
13928
# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13929
# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13930
mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
13931
mov.l 36+4(%a1),-(%sp)
13932
mov.l &0x3fff0000,-(%sp) # force exp to zero
13933
mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
13934
mov.l 48+4(%a1),-(%sp)
13935
mov.l &0x3fff0000,-(%sp)# force exp to zero
13936
fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
13937
fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
13938
bra.b A10_st
13939
13940
sc_mul_err:
13941
bra.b sc_mul_err
13942
13943
A9_norm:
13944
tst.w %d2 # test for small exp case
13945
beq.b A9_con # if zero, continue as normal
13946
fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
13947
fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
13948
A9_con:
13949
fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
13950
13951
# A10. Or in INEX.
13952
# If INEX is set, round error occurred. This is compensated
13953
# for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954
#
13955
# Register usage:
13956
# Input/Output
13957
# d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958
# d2: x/x
13959
# d3: x/x
13960
# d4: LEN/Unchanged
13961
# d5: ICTR:LAMBDA
13962
# d6: ILOG/Unchanged
13963
# d7: k-factor/Unchanged
13964
# a0: ptr for original operand/final result
13965
# a1: ptr to PTENxx array/Unchanged
13966
# a2: x/ptr to FP_SCR1(a6)
13967
# fp0: Y/Y with lsb adjusted
13968
# fp1: 10^ISCALE/Unchanged
13969
# fp2: x/x
13970
13971
A10_st:
13972
fmov.l %fpsr,%d0 # get FPSR
13973
fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
13974
lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
13975
btst &9,%d0 # check if INEX2 set
13976
beq.b A11_st # if clear, skip rest
13977
or.l &1,8(%a2) # or in 1 to lsb of mantissa
13978
fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
13979
13980
13981
# A11. Restore original FPCR; set size ext.
13982
# Perform FINT operation in the user's rounding mode. Keep
13983
# the size to extended. The sintdo entry point in the sint
13984
# routine expects the FPCR value to be in USER_FPCR for
13985
# mode and precision. The original FPCR is saved in L_SCR1.
13986
13987
A11_st:
13988
mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
13989
and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
13990
# ;block exceptions
13991
13992
13993
# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994
# The FPSP routine sintd0 is used. The output is in fp0.
13995
#
13996
# Register usage:
13997
# Input/Output
13998
# d0: FPSR with AINEX cleared/FPCR with size set to ext
13999
# d2: x/x/scratch
14000
# d3: x/x
14001
# d4: LEN/Unchanged
14002
# d5: ICTR:LAMBDA/Unchanged
14003
# d6: ILOG/Unchanged
14004
# d7: k-factor/Unchanged
14005
# a0: ptr for original operand/src ptr for sintdo
14006
# a1: ptr to PTENxx array/Unchanged
14007
# a2: ptr to FP_SCR1(a6)/Unchanged
14008
# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009
# fp0: Y/YINT
14010
# fp1: 10^ISCALE/Unchanged
14011
# fp2: x/x
14012
# F_SCR1:x/x
14013
# F_SCR2:Y adjusted for inex/Y with original exponent
14014
# L_SCR1:x/original USER_FPCR
14015
# L_SCR2:first word of X packed/Unchanged
14016
14017
A12_st:
14018
movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
14019
mov.l L_SCR1(%a6),-(%sp)
14020
mov.l L_SCR2(%a6),-(%sp)
14021
14022
lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
14023
fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
14024
tst.l L_SCR2(%a6) # test sign of original operand
14025
bge.b do_fint12 # if pos, use Y
14026
or.l &0x80000000,(%a0) # if neg, use -Y
14027
do_fint12:
14028
mov.l USER_FPSR(%a6),-(%sp)
14029
# bsr sintdo # sint routine returns int in fp0
14030
14031
fmov.l USER_FPCR(%a6),%fpcr
14032
fmov.l &0x0,%fpsr # clear the AEXC bits!!!
14033
## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
14034
## andi.l &0x00000030,%d0
14035
## fmov.l %d0,%fpcr
14036
fint.x FP_SCR1(%a6),%fp0 # do fint()
14037
fmov.l %fpsr,%d0
14038
or.w %d0,FPSR_EXCEPT(%a6)
14039
## fmov.l &0x0,%fpcr
14040
## fmov.l %fpsr,%d0 # don't keep ccodes
14041
## or.w %d0,FPSR_EXCEPT(%a6)
14042
14043
mov.b (%sp),USER_FPSR(%a6)
14044
add.l &4,%sp
14045
14046
mov.l (%sp)+,L_SCR2(%a6)
14047
mov.l (%sp)+,L_SCR1(%a6)
14048
movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
14049
14050
mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
14051
mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
14052
14053
# A13. Check for LEN digits.
14054
# If the int operation results in more than LEN digits,
14055
# or less than LEN -1 digits, adjust ILOG and repeat from
14056
# A6. This test occurs only on the first pass. If the
14057
# result is exactly 10^LEN, decrement ILOG and divide
14058
# the mantissa by 10. The calculation of 10^LEN cannot
14059
# be inexact, since all powers of ten up to 10^27 are exact
14060
# in extended precision, so the use of a previous power-of-ten
14061
# table will introduce no error.
14062
#
14063
#
14064
# Register usage:
14065
# Input/Output
14066
# d0: FPCR with size set to ext/scratch final = 0
14067
# d2: x/x
14068
# d3: x/scratch final = x
14069
# d4: LEN/LEN adjusted
14070
# d5: ICTR:LAMBDA/LAMBDA:ICTR
14071
# d6: ILOG/ILOG adjusted
14072
# d7: k-factor/Unchanged
14073
# a0: pointer into memory for packed bcd string formation
14074
# a1: ptr to PTENxx array/Unchanged
14075
# a2: ptr to FP_SCR1(a6)/Unchanged
14076
# fp0: int portion of Y/abs(YINT) adjusted
14077
# fp1: 10^ISCALE/Unchanged
14078
# fp2: x/10^LEN
14079
# F_SCR1:x/x
14080
# F_SCR2:Y with original exponent/Unchanged
14081
# L_SCR1:original USER_FPCR/Unchanged
14082
# L_SCR2:first word of X packed/Unchanged
14083
14084
A13_st:
14085
swap %d5 # put ICTR in lower word of d5
14086
tst.w %d5 # check if ICTR = 0
14087
bne not_zr # if non-zero, go to second test
14088
#
14089
# Compute 10^(LEN-1)
14090
#
14091
fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14092
mov.l %d4,%d0 # put LEN in d0
14093
subq.l &1,%d0 # d0 = LEN -1
14094
clr.l %d3 # clr table index
14095
l_loop:
14096
lsr.l &1,%d0 # shift next bit into carry
14097
bcc.b l_next # if zero, skip the mul
14098
fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14099
l_next:
14100
add.l &12,%d3 # inc d3 to next pwrten table entry
14101
tst.l %d0 # test if LEN is zero
14102
bne.b l_loop # if not, loop
14103
#
14104
# 10^LEN-1 is computed for this test and A14. If the input was
14105
# denormalized, check only the case in which YINT > 10^LEN.
14106
#
14107
tst.b BINDEC_FLG(%a6) # check if input was norm
14108
beq.b A13_con # if norm, continue with checking
14109
fabs.x %fp0 # take abs of YINT
14110
bra test_2
14111
#
14112
# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113
#
14114
A13_con:
14115
fabs.x %fp0 # take abs of YINT
14116
fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
14117
fbge.w test_2 # if greater, do next test
14118
subq.l &1,%d6 # subtract 1 from ILOG
14119
mov.w &1,%d5 # set ICTR
14120
fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14121
fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14122
bra.w A6_str # return to A6 and recompute YINT
14123
test_2:
14124
fmul.s FTEN(%pc),%fp2 # compute 10^LEN
14125
fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
14126
fblt.w A14_st # if less, all is ok, go to A14
14127
fbgt.w fix_ex # if greater, fix and redo
14128
fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
14129
addq.l &1,%d6 # and inc ILOG
14130
bra.b A14_st # and continue elsewhere
14131
fix_ex:
14132
addq.l &1,%d6 # increment ILOG by 1
14133
mov.w &1,%d5 # set ICTR
14134
fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
14135
bra.w A6_str # return to A6 and recompute YINT
14136
#
14137
# Since ICTR <> 0, we have already been through one adjustment,
14138
# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139
# 10^LEN is again computed using whatever table is in a1 since the
14140
# value calculated cannot be inexact.
14141
#
14142
not_zr:
14143
fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
14144
mov.l %d4,%d0 # put LEN in d0
14145
clr.l %d3 # clr table index
14146
z_loop:
14147
lsr.l &1,%d0 # shift next bit into carry
14148
bcc.b z_next # if zero, skip the mul
14149
fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
14150
z_next:
14151
add.l &12,%d3 # inc d3 to next pwrten table entry
14152
tst.l %d0 # test if LEN is zero
14153
bne.b z_loop # if not, loop
14154
fabs.x %fp0 # get abs(YINT)
14155
fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
14156
fbneq.w A14_st # if not, skip this
14157
fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
14158
addq.l &1,%d6 # and inc ILOG by 1
14159
addq.l &1,%d4 # and inc LEN
14160
fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
14161
14162
# A14. Convert the mantissa to bcd.
14163
# The binstr routine is used to convert the LEN digit
14164
# mantissa to bcd in memory. The input to binstr is
14165
# to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166
# such that the decimal point is to the left of bit 63.
14167
# The bcd digits are stored in the correct position in
14168
# the final string area in memory.
14169
#
14170
#
14171
# Register usage:
14172
# Input/Output
14173
# d0: x/LEN call to binstr - final is 0
14174
# d1: x/0
14175
# d2: x/ms 32-bits of mant of abs(YINT)
14176
# d3: x/ls 32-bits of mant of abs(YINT)
14177
# d4: LEN/Unchanged
14178
# d5: ICTR:LAMBDA/LAMBDA:ICTR
14179
# d6: ILOG
14180
# d7: k-factor/Unchanged
14181
# a0: pointer into memory for packed bcd string formation
14182
# /ptr to first mantissa byte in result string
14183
# a1: ptr to PTENxx array/Unchanged
14184
# a2: ptr to FP_SCR1(a6)/Unchanged
14185
# fp0: int portion of Y/abs(YINT) adjusted
14186
# fp1: 10^ISCALE/Unchanged
14187
# fp2: 10^LEN/Unchanged
14188
# F_SCR1:x/Work area for final result
14189
# F_SCR2:Y with original exponent/Unchanged
14190
# L_SCR1:original USER_FPCR/Unchanged
14191
# L_SCR2:first word of X packed/Unchanged
14192
14193
A14_st:
14194
fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
14195
fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
14196
lea.l FP_SCR0(%a6),%a0
14197
fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
14198
mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
14199
mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
14200
clr.l 4(%a0) # zero word 2 of FP_RES
14201
clr.l 8(%a0) # zero word 3 of FP_RES
14202
mov.l (%a0),%d0 # move exponent to d0
14203
swap %d0 # put exponent in lower word
14204
beq.b no_sft # if zero, don't shift
14205
sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
14206
tst.l %d0 # check if > 1
14207
bgt.b no_sft # if so, don't shift
14208
neg.l %d0 # make exp positive
14209
m_loop:
14210
lsr.l &1,%d2 # shift d2:d3 right, add 0s
14211
roxr.l &1,%d3 # the number of places
14212
dbf.w %d0,m_loop # given in d0
14213
no_sft:
14214
tst.l %d2 # check for mantissa of zero
14215
bne.b no_zr # if not, go on
14216
tst.l %d3 # continue zero check
14217
beq.b zer_m # if zero, go directly to binstr
14218
no_zr:
14219
clr.l %d1 # put zero in d1 for addx
14220
add.l &0x00000080,%d3 # inc at bit 7
14221
addx.l %d1,%d2 # continue inc
14222
and.l &0xffffff80,%d3 # strip off lsb not used by 882
14223
zer_m:
14224
mov.l %d4,%d0 # put LEN in d0 for binstr call
14225
addq.l &3,%a0 # a0 points to M16 byte in result
14226
bsr binstr # call binstr to convert mant
14227
14228
14229
# A15. Convert the exponent to bcd.
14230
# As in A14 above, the exp is converted to bcd and the
14231
# digits are stored in the final string.
14232
#
14233
# Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234
#
14235
# 32 16 15 0
14236
# -----------------------------------------
14237
# | 0 | e3 | e2 | e1 | e4 | X | X | X |
14238
# -----------------------------------------
14239
#
14240
# And are moved into their proper places in FP_SCR0. If digit e4
14241
# is non-zero, OPERR is signaled. In all cases, all 4 digits are
14242
# written as specified in the 881/882 manual for packed decimal.
14243
#
14244
# Register usage:
14245
# Input/Output
14246
# d0: x/LEN call to binstr - final is 0
14247
# d1: x/scratch (0);shift count for final exponent packing
14248
# d2: x/ms 32-bits of exp fraction/scratch
14249
# d3: x/ls 32-bits of exp fraction
14250
# d4: LEN/Unchanged
14251
# d5: ICTR:LAMBDA/LAMBDA:ICTR
14252
# d6: ILOG
14253
# d7: k-factor/Unchanged
14254
# a0: ptr to result string/ptr to L_SCR1(a6)
14255
# a1: ptr to PTENxx array/Unchanged
14256
# a2: ptr to FP_SCR1(a6)/Unchanged
14257
# fp0: abs(YINT) adjusted/float(ILOG)
14258
# fp1: 10^ISCALE/Unchanged
14259
# fp2: 10^LEN/Unchanged
14260
# F_SCR1:Work area for final result/BCD result
14261
# F_SCR2:Y with original exponent/ILOG/10^4
14262
# L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263
# L_SCR2:first word of X packed/Unchanged
14264
14265
A15_st:
14266
tst.b BINDEC_FLG(%a6) # check for denorm
14267
beq.b not_denorm
14268
ftest.x %fp0 # test for zero
14269
fbeq.w den_zero # if zero, use k-factor or 4933
14270
fmov.l %d6,%fp0 # float ILOG
14271
fabs.x %fp0 # get abs of ILOG
14272
bra.b convrt
14273
den_zero:
14274
tst.l %d7 # check sign of the k-factor
14275
blt.b use_ilog # if negative, use ILOG
14276
fmov.s F4933(%pc),%fp0 # force exponent to 4933
14277
bra.b convrt # do it
14278
use_ilog:
14279
fmov.l %d6,%fp0 # float ILOG
14280
fabs.x %fp0 # get abs of ILOG
14281
bra.b convrt
14282
not_denorm:
14283
ftest.x %fp0 # test for zero
14284
fbneq.w not_zero # if zero, force exponent
14285
fmov.s FONE(%pc),%fp0 # force exponent to 1
14286
bra.b convrt # do it
14287
not_zero:
14288
fmov.l %d6,%fp0 # float ILOG
14289
fabs.x %fp0 # get abs of ILOG
14290
convrt:
14291
fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
14292
fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
14293
mov.l 4(%a2),%d2 # move word 2 to d2
14294
mov.l 8(%a2),%d3 # move word 3 to d3
14295
mov.w (%a2),%d0 # move exp to d0
14296
beq.b x_loop_fin # if zero, skip the shift
14297
sub.w &0x3ffd,%d0 # subtract off bias
14298
neg.w %d0 # make exp positive
14299
x_loop:
14300
lsr.l &1,%d2 # shift d2:d3 right
14301
roxr.l &1,%d3 # the number of places
14302
dbf.w %d0,x_loop # given in d0
14303
x_loop_fin:
14304
clr.l %d1 # put zero in d1 for addx
14305
add.l &0x00000080,%d3 # inc at bit 6
14306
addx.l %d1,%d2 # continue inc
14307
and.l &0xffffff80,%d3 # strip off lsb not used by 882
14308
mov.l &4,%d0 # put 4 in d0 for binstr call
14309
lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14310
bsr binstr # call binstr to convert exp
14311
mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14312
mov.l &12,%d1 # use d1 for shift count
14313
lsr.l %d1,%d0 # shift d0 right by 12
14314
bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
14315
lsr.l %d1,%d0 # shift d0 right by 12
14316
bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
14317
tst.b %d0 # check if e4 is zero
14318
beq.b A16_st # if zero, skip rest
14319
or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
14320
14321
14322
# A16. Write sign bits to final string.
14323
# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324
#
14325
# Register usage:
14326
# Input/Output
14327
# d0: x/scratch - final is x
14328
# d2: x/x
14329
# d3: x/x
14330
# d4: LEN/Unchanged
14331
# d5: ICTR:LAMBDA/LAMBDA:ICTR
14332
# d6: ILOG/ILOG adjusted
14333
# d7: k-factor/Unchanged
14334
# a0: ptr to L_SCR1(a6)/Unchanged
14335
# a1: ptr to PTENxx array/Unchanged
14336
# a2: ptr to FP_SCR1(a6)/Unchanged
14337
# fp0: float(ILOG)/Unchanged
14338
# fp1: 10^ISCALE/Unchanged
14339
# fp2: 10^LEN/Unchanged
14340
# F_SCR1:BCD result with correct signs
14341
# F_SCR2:ILOG/10^4
14342
# L_SCR1:Exponent digits on return from binstr
14343
# L_SCR2:first word of X packed/Unchanged
14344
14345
A16_st:
14346
clr.l %d0 # clr d0 for collection of signs
14347
and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
14348
tst.l L_SCR2(%a6) # check sign of original mantissa
14349
bge.b mant_p # if pos, don't set SM
14350
mov.l &2,%d0 # move 2 in to d0 for SM
14351
mant_p:
14352
tst.l %d6 # check sign of ILOG
14353
bge.b wr_sgn # if pos, don't set SE
14354
addq.l &1,%d0 # set bit 0 in d0 for SE
14355
wr_sgn:
14356
bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14357
14358
# Clean up and restore all registers used.
14359
14360
fmov.l &0,%fpsr # clear possible inex2/ainex bits
14361
fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
14362
movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
14363
rts
14364
14365
global PTENRN
14366
PTENRN:
14367
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14368
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14369
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14370
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14371
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14372
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14373
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14374
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14375
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14376
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14377
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14378
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14379
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14380
14381
global PTENRP
14382
PTENRP:
14383
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14384
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14385
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14386
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14387
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14388
long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
14389
long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
14390
long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
14391
long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
14392
long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
14393
long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
14394
long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
14395
long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
14396
14397
global PTENRM
14398
PTENRM:
14399
long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
14400
long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
14401
long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
14402
long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
14403
long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
14404
long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
14405
long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
14406
long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
14407
long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
14408
long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
14409
long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
14410
long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
14411
long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
14412
14413
#########################################################################
14414
# binstr(): Converts a 64-bit binary integer to bcd. #
14415
# #
14416
# INPUT *************************************************************** #
14417
# d2:d3 = 64-bit binary integer #
14418
# d0 = desired length (LEN) #
14419
# a0 = pointer to start in memory for bcd characters #
14420
# (This pointer must point to byte 4 of the first #
14421
# lword of the packed decimal memory string.) #
14422
# #
14423
# OUTPUT ************************************************************** #
14424
# a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14425
# #
14426
# ALGORITHM *********************************************************** #
14427
# The 64-bit binary is assumed to have a decimal point before #
14428
# bit 63. The fraction is multiplied by 10 using a mul by 2 #
14429
# shift and a mul by 8 shift. The bits shifted out of the #
14430
# msb form a decimal digit. This process is iterated until #
14431
# LEN digits are formed. #
14432
# #
14433
# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
14434
# digit formed will be assumed the least significant. This is #
14435
# to force the first byte formed to have a 0 in the upper 4 bits. #
14436
# #
14437
# A2. Beginning of the loop: #
14438
# Copy the fraction in d2:d3 to d4:d5. #
14439
# #
14440
# A3. Multiply the fraction in d2:d3 by 8 using bit-field #
14441
# extracts and shifts. The three msbs from d2 will go into d1. #
14442
# #
14443
# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
14444
# will be collected by the carry. #
14445
# #
14446
# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
14447
# into d2:d3. D1 will contain the bcd digit formed. #
14448
# #
14449
# A6. Test d7. If zero, the digit formed is the ms digit. If non- #
14450
# zero, it is the ls digit. Put the digit in its place in the #
14451
# upper word of d0. If it is the ls digit, write the word #
14452
# from d0 to memory. #
14453
# #
14454
# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
14455
# #
14456
#########################################################################
14457
14458
# Implementation Notes:
14459
#
14460
# The registers are used as follows:
14461
#
14462
# d0: LEN counter
14463
# d1: temp used to form the digit
14464
# d2: upper 32-bits of fraction for mul by 8
14465
# d3: lower 32-bits of fraction for mul by 8
14466
# d4: upper 32-bits of fraction for mul by 2
14467
# d5: lower 32-bits of fraction for mul by 2
14468
# d6: temp for bit-field extracts
14469
# d7: byte digit formation word;digit count {0,1}
14470
# a0: pointer into memory for packed bcd string formation
14471
#
14472
14473
global binstr
14474
binstr:
14475
movm.l &0xff00,-(%sp) # {%d0-%d7}
14476
14477
#
14478
# A1: Init d7
14479
#
14480
mov.l &1,%d7 # init d7 for second digit
14481
subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
14482
#
14483
# A2. Copy d2:d3 to d4:d5. Start loop.
14484
#
14485
loop:
14486
mov.l %d2,%d4 # copy the fraction before muls
14487
mov.l %d3,%d5 # to d4:d5
14488
#
14489
# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490
#
14491
bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
14492
asl.l &3,%d2 # shift d2 left by 3 places
14493
bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
14494
asl.l &3,%d3 # shift d3 left by 3 places
14495
or.l %d6,%d2 # or in msbs from d3 into d2
14496
#
14497
# A4. Multiply d4:d5 by 2; add carry out to d1.
14498
#
14499
asl.l &1,%d5 # mul d5 by 2
14500
roxl.l &1,%d4 # mul d4 by 2
14501
swap %d6 # put 0 in d6 lower word
14502
addx.w %d6,%d1 # add in extend from mul by 2
14503
#
14504
# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
14505
#
14506
add.l %d5,%d3 # add lower 32 bits
14507
nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508
addx.l %d4,%d2 # add with extend upper 32 bits
14509
nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510
addx.w %d6,%d1 # add in extend from add to d1
14511
swap %d6 # with d6 = 0; put 0 in upper word
14512
#
14513
# A6. Test d7 and branch.
14514
#
14515
tst.w %d7 # if zero, store digit & to loop
14516
beq.b first_d # if non-zero, form byte & write
14517
sec_d:
14518
swap %d7 # bring first digit to word d7b
14519
asl.w &4,%d7 # first digit in upper 4 bits d7b
14520
add.w %d1,%d7 # add in ls digit to d7b
14521
mov.b %d7,(%a0)+ # store d7b byte in memory
14522
swap %d7 # put LEN counter in word d7a
14523
clr.w %d7 # set d7a to signal no digits done
14524
dbf.w %d0,loop # do loop some more!
14525
bra.b end_bstr # finished, so exit
14526
first_d:
14527
swap %d7 # put digit word in d7b
14528
mov.w %d1,%d7 # put new digit in d7b
14529
swap %d7 # put LEN counter in word d7a
14530
addq.w &1,%d7 # set d7a to signal first digit done
14531
dbf.w %d0,loop # do loop some more!
14532
swap %d7 # put last digit in string
14533
lsl.w &4,%d7 # move it to upper 4 bits
14534
mov.b %d7,(%a0)+ # store it in memory string
14535
#
14536
# Clean up and return with result in fp0.
14537
#
14538
end_bstr:
14539
movm.l (%sp)+,&0xff # {%d0-%d7}
14540
rts
14541
14542
#########################################################################
14543
# XDEF **************************************************************** #
14544
# facc_in_b(): dmem_read_byte failed #
14545
# facc_in_w(): dmem_read_word failed #
14546
# facc_in_l(): dmem_read_long failed #
14547
# facc_in_d(): dmem_read of dbl prec failed #
14548
# facc_in_x(): dmem_read of ext prec failed #
14549
# #
14550
# facc_out_b(): dmem_write_byte failed #
14551
# facc_out_w(): dmem_write_word failed #
14552
# facc_out_l(): dmem_write_long failed #
14553
# facc_out_d(): dmem_write of dbl prec failed #
14554
# facc_out_x(): dmem_write of ext prec failed #
14555
# #
14556
# XREF **************************************************************** #
14557
# _real_access() - exit through access error handler #
14558
# #
14559
# INPUT *************************************************************** #
14560
# None #
14561
# #
14562
# OUTPUT ************************************************************** #
14563
# None #
14564
# #
14565
# ALGORITHM *********************************************************** #
14566
# Flow jumps here when an FP data fetch call gets an error #
14567
# result. This means the operating system wants an access error frame #
14568
# made out of the current exception stack frame. #
14569
# So, we first call restore() which makes sure that any updated #
14570
# -(an)+ register gets returned to its pre-exception value and then #
14571
# we change the stack to an access error stack frame. #
14572
# #
14573
#########################################################################
14574
14575
facc_in_b:
14576
movq.l &0x1,%d0 # one byte
14577
bsr.w restore # fix An
14578
14579
mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
14580
bra.w facc_finish
14581
14582
facc_in_w:
14583
movq.l &0x2,%d0 # two bytes
14584
bsr.w restore # fix An
14585
14586
mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
14587
bra.b facc_finish
14588
14589
facc_in_l:
14590
movq.l &0x4,%d0 # four bytes
14591
bsr.w restore # fix An
14592
14593
mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
14594
bra.b facc_finish
14595
14596
facc_in_d:
14597
movq.l &0x8,%d0 # eight bytes
14598
bsr.w restore # fix An
14599
14600
mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14601
bra.b facc_finish
14602
14603
facc_in_x:
14604
movq.l &0xc,%d0 # twelve bytes
14605
bsr.w restore # fix An
14606
14607
mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
14608
bra.b facc_finish
14609
14610
################################################################
14611
14612
facc_out_b:
14613
movq.l &0x1,%d0 # one byte
14614
bsr.w restore # restore An
14615
14616
mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
14617
bra.b facc_finish
14618
14619
facc_out_w:
14620
movq.l &0x2,%d0 # two bytes
14621
bsr.w restore # restore An
14622
14623
mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
14624
bra.b facc_finish
14625
14626
facc_out_l:
14627
movq.l &0x4,%d0 # four bytes
14628
bsr.w restore # restore An
14629
14630
mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
14631
bra.b facc_finish
14632
14633
facc_out_d:
14634
movq.l &0x8,%d0 # eight bytes
14635
bsr.w restore # restore An
14636
14637
mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14638
bra.b facc_finish
14639
14640
facc_out_x:
14641
mov.l &0xc,%d0 # twelve bytes
14642
bsr.w restore # restore An
14643
14644
mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
14645
14646
# here's where we actually create the access error frame from the
14647
# current exception stack frame.
14648
facc_finish:
14649
mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650
14651
fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
14652
fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653
movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
14654
14655
unlk %a6
14656
14657
mov.l (%sp),-(%sp) # store SR, hi(PC)
14658
mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
14659
mov.l 0xc(%sp),0x8(%sp) # store EA
14660
mov.l &0x00000001,0xc(%sp) # store FSLW
14661
mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
14662
mov.w &0x4008,0x6(%sp) # store voff
14663
14664
btst &0x5,(%sp) # supervisor or user mode?
14665
beq.b facc_out2 # user
14666
bset &0x2,0xd(%sp) # set supervisor TM bit
14667
14668
facc_out2:
14669
bra.l _real_access
14670
14671
##################################################################
14672
14673
# if the effective addressing mode was predecrement or postincrement,
14674
# the emulation has already changed its value to the correct post-
14675
# instruction value. but since we're exiting to the access error
14676
# handler, then AN must be returned to its pre-instruction value.
14677
# we do that here.
14678
restore:
14679
mov.b EXC_OPWORD+0x1(%a6),%d1
14680
andi.b &0x38,%d1 # extract opmode
14681
cmpi.b %d1,&0x18 # postinc?
14682
beq.w rest_inc
14683
cmpi.b %d1,&0x20 # predec?
14684
beq.w rest_dec
14685
rts
14686
14687
rest_inc:
14688
mov.b EXC_OPWORD+0x1(%a6),%d1
14689
andi.w &0x0007,%d1 # fetch An
14690
14691
mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692
jmp (tbl_rest_inc.b,%pc,%d1.w*1)
14693
14694
tbl_rest_inc:
14695
short ri_a0 - tbl_rest_inc
14696
short ri_a1 - tbl_rest_inc
14697
short ri_a2 - tbl_rest_inc
14698
short ri_a3 - tbl_rest_inc
14699
short ri_a4 - tbl_rest_inc
14700
short ri_a5 - tbl_rest_inc
14701
short ri_a6 - tbl_rest_inc
14702
short ri_a7 - tbl_rest_inc
14703
14704
ri_a0:
14705
sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
14706
rts
14707
ri_a1:
14708
sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
14709
rts
14710
ri_a2:
14711
sub.l %d0,%a2 # fix a2
14712
rts
14713
ri_a3:
14714
sub.l %d0,%a3 # fix a3
14715
rts
14716
ri_a4:
14717
sub.l %d0,%a4 # fix a4
14718
rts
14719
ri_a5:
14720
sub.l %d0,%a5 # fix a5
14721
rts
14722
ri_a6:
14723
sub.l %d0,(%a6) # fix stacked a6
14724
rts
14725
# if it's a fmove out instruction, we don't have to fix a7
14726
# because we hadn't changed it yet. if it's an opclass two
14727
# instruction (data moved in) and the exception was in supervisor
14728
# mode, then also also wasn't updated. if it was user mode, then
14729
# restore the correct a7 which is in the USP currently.
14730
ri_a7:
14731
cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
14732
bne.b ri_a7_done # out
14733
14734
btst &0x5,EXC_SR(%a6) # user or supervisor?
14735
bne.b ri_a7_done # supervisor
14736
movc %usp,%a0 # restore USP
14737
sub.l %d0,%a0
14738
movc %a0,%usp
14739
ri_a7_done:
14740
rts
14741
14742
# need to invert adjustment value if the <ea> was predec
14743
rest_dec:
14744
neg.l %d0
14745
bra.b rest_inc
14746
14747