CoCalc -- pfpsp.S

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m68k/ifpsp060/src/pfpsp.S
¹⁰⁸²⁰ views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6

7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8

9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15

16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22

23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
# freal.s:
30
#	This file is appended to the top of the 060FPSP package
31
# and contains the entry points into the package. The user, in
32
# effect, branches to one of the branch table entries located
33
# after _060FPSP_TABLE.
34
#	Also, subroutine stubs exist in this file (_fpsp_done for
35
# example) that are referenced by the FPSP package itself in order
36
# to call a given routine. The stub routine actually performs the
37
# callout. The FPSP code does a "bsr" to the stub routine. This
38
# extra layer of hierarchy adds a slight performance penalty but
39
# it makes the FPSP code easier to read and more mainatinable.
40
#
41

42
set	_off_bsun,	0x00
43
set	_off_snan,	0x04
44
set	_off_operr,	0x08
45
set	_off_ovfl,	0x0c
46
set	_off_unfl,	0x10
47
set	_off_dz,	0x14
48
set	_off_inex,	0x18
49
set	_off_fline,	0x1c
50
set	_off_fpu_dis,	0x20
51
set	_off_trap,	0x24
52
set	_off_trace,	0x28
53
set	_off_access,	0x2c
54
set	_off_done,	0x30
55

56
set	_off_imr,	0x40
57
set	_off_dmr,	0x44
58
set	_off_dmw,	0x48
59
set	_off_irw,	0x4c
60
set	_off_irl,	0x50
61
set	_off_drb,	0x54
62
set	_off_drw,	0x58
63
set	_off_drl,	0x5c
64
set	_off_dwb,	0x60
65
set	_off_dww,	0x64
66
set	_off_dwl,	0x68
67

68
_060FPSP_TABLE:
69

70
###############################################################
71

72
# Here's the table of ENTRY POINTS for those linking the package.
73
	bra.l		_fpsp_snan
74
	short		0x0000
75
	bra.l		_fpsp_operr
76
	short		0x0000
77
	bra.l		_fpsp_ovfl
78
	short		0x0000
79
	bra.l		_fpsp_unfl
80
	short		0x0000
81
	bra.l		_fpsp_dz
82
	short		0x0000
83
	bra.l		_fpsp_inex
84
	short		0x0000
85
	bra.l		_fpsp_fline
86
	short		0x0000
87
	bra.l		_fpsp_unsupp
88
	short		0x0000
89
	bra.l		_fpsp_effadd
90
	short		0x0000
91

92
	space		56
93

94
###############################################################
95
	global		_fpsp_done
96
_fpsp_done:
97
	mov.l		%d0,-(%sp)
98
	mov.l		(_060FPSP_TABLE-0x80+_off_done,%pc),%d0
99
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
100
	mov.l		0x4(%sp),%d0
101
	rtd		&0x4
102

103
	global		_real_ovfl
104
_real_ovfl:
105
	mov.l		%d0,-(%sp)
106
	mov.l		(_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
107
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
108
	mov.l		0x4(%sp),%d0
109
	rtd		&0x4
110

111
	global		_real_unfl
112
_real_unfl:
113
	mov.l		%d0,-(%sp)
114
	mov.l		(_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
115
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
116
	mov.l		0x4(%sp),%d0
117
	rtd		&0x4
118

119
	global		_real_inex
120
_real_inex:
121
	mov.l		%d0,-(%sp)
122
	mov.l		(_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
123
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
124
	mov.l		0x4(%sp),%d0
125
	rtd		&0x4
126

127
	global		_real_bsun
128
_real_bsun:
129
	mov.l		%d0,-(%sp)
130
	mov.l		(_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
131
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
132
	mov.l		0x4(%sp),%d0
133
	rtd		&0x4
134

135
	global		_real_operr
136
_real_operr:
137
	mov.l		%d0,-(%sp)
138
	mov.l		(_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
139
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
140
	mov.l		0x4(%sp),%d0
141
	rtd		&0x4
142

143
	global		_real_snan
144
_real_snan:
145
	mov.l		%d0,-(%sp)
146
	mov.l		(_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
147
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
148
	mov.l		0x4(%sp),%d0
149
	rtd		&0x4
150

151
	global		_real_dz
152
_real_dz:
153
	mov.l		%d0,-(%sp)
154
	mov.l		(_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
155
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
156
	mov.l		0x4(%sp),%d0
157
	rtd		&0x4
158

159
	global		_real_fline
160
_real_fline:
161
	mov.l		%d0,-(%sp)
162
	mov.l		(_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
163
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
164
	mov.l		0x4(%sp),%d0
165
	rtd		&0x4
166

167
	global		_real_fpu_disabled
168
_real_fpu_disabled:
169
	mov.l		%d0,-(%sp)
170
	mov.l		(_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
171
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
172
	mov.l		0x4(%sp),%d0
173
	rtd		&0x4
174

175
	global		_real_trap
176
_real_trap:
177
	mov.l		%d0,-(%sp)
178
	mov.l		(_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
179
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
180
	mov.l		0x4(%sp),%d0
181
	rtd		&0x4
182

183
	global		_real_trace
184
_real_trace:
185
	mov.l		%d0,-(%sp)
186
	mov.l		(_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
187
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
188
	mov.l		0x4(%sp),%d0
189
	rtd		&0x4
190

191
	global		_real_access
192
_real_access:
193
	mov.l		%d0,-(%sp)
194
	mov.l		(_060FPSP_TABLE-0x80+_off_access,%pc),%d0
195
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
196
	mov.l		0x4(%sp),%d0
197
	rtd		&0x4
198

199
#######################################
200

201
	global		_imem_read
202
_imem_read:
203
	mov.l		%d0,-(%sp)
204
	mov.l		(_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
205
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
206
	mov.l		0x4(%sp),%d0
207
	rtd		&0x4
208

209
	global		_dmem_read
210
_dmem_read:
211
	mov.l		%d0,-(%sp)
212
	mov.l		(_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
213
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
214
	mov.l		0x4(%sp),%d0
215
	rtd		&0x4
216

217
	global		_dmem_write
218
_dmem_write:
219
	mov.l		%d0,-(%sp)
220
	mov.l		(_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
221
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
222
	mov.l		0x4(%sp),%d0
223
	rtd		&0x4
224

225
	global		_imem_read_word
226
_imem_read_word:
227
	mov.l		%d0,-(%sp)
228
	mov.l		(_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
229
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
230
	mov.l		0x4(%sp),%d0
231
	rtd		&0x4
232

233
	global		_imem_read_long
234
_imem_read_long:
235
	mov.l		%d0,-(%sp)
236
	mov.l		(_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
237
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
238
	mov.l		0x4(%sp),%d0
239
	rtd		&0x4
240

241
	global		_dmem_read_byte
242
_dmem_read_byte:
243
	mov.l		%d0,-(%sp)
244
	mov.l		(_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
245
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
246
	mov.l		0x4(%sp),%d0
247
	rtd		&0x4
248

249
	global		_dmem_read_word
250
_dmem_read_word:
251
	mov.l		%d0,-(%sp)
252
	mov.l		(_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
253
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
254
	mov.l		0x4(%sp),%d0
255
	rtd		&0x4
256

257
	global		_dmem_read_long
258
_dmem_read_long:
259
	mov.l		%d0,-(%sp)
260
	mov.l		(_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
261
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
262
	mov.l		0x4(%sp),%d0
263
	rtd		&0x4
264

265
	global		_dmem_write_byte
266
_dmem_write_byte:
267
	mov.l		%d0,-(%sp)
268
	mov.l		(_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
269
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
270
	mov.l		0x4(%sp),%d0
271
	rtd		&0x4
272

273
	global		_dmem_write_word
274
_dmem_write_word:
275
	mov.l		%d0,-(%sp)
276
	mov.l		(_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
277
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
278
	mov.l		0x4(%sp),%d0
279
	rtd		&0x4
280

281
	global		_dmem_write_long
282
_dmem_write_long:
283
	mov.l		%d0,-(%sp)
284
	mov.l		(_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
285
	pea.l		(_060FPSP_TABLE-0x80,%pc,%d0)
286
	mov.l		0x4(%sp),%d0
287
	rtd		&0x4
288

289
#
290
# This file contains a set of define statements for constants
291
# in order to promote readability within the corecode itself.
292
#
293

294
set LOCAL_SIZE,		192			# stack frame size(bytes)
295
set LV,			-LOCAL_SIZE		# stack offset
296

297
set EXC_SR,		0x4			# stack status register
298
set EXC_PC,		0x6			# stack pc
299
set EXC_VOFF,		0xa			# stacked vector offset
300
set EXC_EA,		0xc			# stacked <ea>
301

302
set EXC_FP,		0x0			# frame pointer
303

304
set EXC_AREGS,		-68			# offset of all address regs
305
set EXC_DREGS,		-100			# offset of all data regs
306
set EXC_FPREGS,		-36			# offset of all fp regs
307

308
set EXC_A7,		EXC_AREGS+(7*4)		# offset of saved a7
309
set OLD_A7,		EXC_AREGS+(6*4)		# extra copy of saved a7
310
set EXC_A6,		EXC_AREGS+(6*4)		# offset of saved a6
311
set EXC_A5,		EXC_AREGS+(5*4)
312
set EXC_A4,		EXC_AREGS+(4*4)
313
set EXC_A3,		EXC_AREGS+(3*4)
314
set EXC_A2,		EXC_AREGS+(2*4)
315
set EXC_A1,		EXC_AREGS+(1*4)
316
set EXC_A0,		EXC_AREGS+(0*4)
317
set EXC_D7,		EXC_DREGS+(7*4)
318
set EXC_D6,		EXC_DREGS+(6*4)
319
set EXC_D5,		EXC_DREGS+(5*4)
320
set EXC_D4,		EXC_DREGS+(4*4)
321
set EXC_D3,		EXC_DREGS+(3*4)
322
set EXC_D2,		EXC_DREGS+(2*4)
323
set EXC_D1,		EXC_DREGS+(1*4)
324
set EXC_D0,		EXC_DREGS+(0*4)
325

326
set EXC_FP0,		EXC_FPREGS+(0*12)	# offset of saved fp0
327
set EXC_FP1,		EXC_FPREGS+(1*12)	# offset of saved fp1
328
set EXC_FP2,		EXC_FPREGS+(2*12)	# offset of saved fp2 (not used)
329

330
set FP_SCR1,		LV+80			# fp scratch 1
331
set FP_SCR1_EX,		FP_SCR1+0
332
set FP_SCR1_SGN,	FP_SCR1+2
333
set FP_SCR1_HI,		FP_SCR1+4
334
set FP_SCR1_LO,		FP_SCR1+8
335

336
set FP_SCR0,		LV+68			# fp scratch 0
337
set FP_SCR0_EX,		FP_SCR0+0
338
set FP_SCR0_SGN,	FP_SCR0+2
339
set FP_SCR0_HI,		FP_SCR0+4
340
set FP_SCR0_LO,		FP_SCR0+8
341

342
set FP_DST,		LV+56			# fp destination operand
343
set FP_DST_EX,		FP_DST+0
344
set FP_DST_SGN,		FP_DST+2
345
set FP_DST_HI,		FP_DST+4
346
set FP_DST_LO,		FP_DST+8
347

348
set FP_SRC,		LV+44			# fp source operand
349
set FP_SRC_EX,		FP_SRC+0
350
set FP_SRC_SGN,		FP_SRC+2
351
set FP_SRC_HI,		FP_SRC+4
352
set FP_SRC_LO,		FP_SRC+8
353

354
set USER_FPIAR,		LV+40			# FP instr address register
355

356
set USER_FPSR,		LV+36			# FP status register
357
set FPSR_CC,		USER_FPSR+0		# FPSR condition codes
358
set FPSR_QBYTE,		USER_FPSR+1		# FPSR qoutient byte
359
set FPSR_EXCEPT,	USER_FPSR+2		# FPSR exception status byte
360
set FPSR_AEXCEPT,	USER_FPSR+3		# FPSR accrued exception byte
361

362
set USER_FPCR,		LV+32			# FP control register
363
set FPCR_ENABLE,	USER_FPCR+2		# FPCR exception enable
364
set FPCR_MODE,		USER_FPCR+3		# FPCR rounding mode control
365

366
set L_SCR3,		LV+28			# integer scratch 3
367
set L_SCR2,		LV+24			# integer scratch 2
368
set L_SCR1,		LV+20			# integer scratch 1
369

370
set STORE_FLG,		LV+19			# flag: operand store (ie. not fcmp/ftst)
371

372
set EXC_TEMP2,		LV+24			# temporary space
373
set EXC_TEMP,		LV+16			# temporary space
374

375
set DTAG,		LV+15			# destination operand type
376
set STAG,		LV+14			# source operand type
377

378
set SPCOND_FLG,		LV+10			# flag: special case (see below)
379

380
set EXC_CC,		LV+8			# saved condition codes
381
set EXC_EXTWPTR,	LV+4			# saved current PC (active)
382
set EXC_EXTWORD,	LV+2			# saved extension word
383
set EXC_CMDREG,		LV+2			# saved extension word
384
set EXC_OPWORD,		LV+0			# saved operation word
385

386
################################
387

388
# Helpful macros
389

390
set FTEMP,		0			# offsets within an
391
set FTEMP_EX,		0			# extended precision
392
set FTEMP_SGN,		2			# value saved in memory.
393
set FTEMP_HI,		4
394
set FTEMP_LO,		8
395
set FTEMP_GRS,		12
396

397
set LOCAL,		0			# offsets within an
398
set LOCAL_EX,		0			# extended precision
399
set LOCAL_SGN,		2			# value saved in memory.
400
set LOCAL_HI,		4
401
set LOCAL_LO,		8
402
set LOCAL_GRS,		12
403

404
set DST,		0			# offsets within an
405
set DST_EX,		0			# extended precision
406
set DST_HI,		4			# value saved in memory.
407
set DST_LO,		8
408

409
set SRC,		0			# offsets within an
410
set SRC_EX,		0			# extended precision
411
set SRC_HI,		4			# value saved in memory.
412
set SRC_LO,		8
413

414
set SGL_LO,		0x3f81			# min sgl prec exponent
415
set SGL_HI,		0x407e			# max sgl prec exponent
416
set DBL_LO,		0x3c01			# min dbl prec exponent
417
set DBL_HI,		0x43fe			# max dbl prec exponent
418
set EXT_LO,		0x0			# min ext prec exponent
419
set EXT_HI,		0x7ffe			# max ext prec exponent
420

421
set EXT_BIAS,		0x3fff			# extended precision bias
422
set SGL_BIAS,		0x007f			# single precision bias
423
set DBL_BIAS,		0x03ff			# double precision bias
424

425
set NORM,		0x00			# operand type for STAG/DTAG
426
set ZERO,		0x01			# operand type for STAG/DTAG
427
set INF,		0x02			# operand type for STAG/DTAG
428
set QNAN,		0x03			# operand type for STAG/DTAG
429
set DENORM,		0x04			# operand type for STAG/DTAG
430
set SNAN,		0x05			# operand type for STAG/DTAG
431
set UNNORM,		0x06			# operand type for STAG/DTAG
432

433
##################
434
# FPSR/FPCR bits #
435
##################
436
set neg_bit,		0x3			# negative result
437
set z_bit,		0x2			# zero result
438
set inf_bit,		0x1			# infinite result
439
set nan_bit,		0x0			# NAN result
440

441
set q_sn_bit,		0x7			# sign bit of quotient byte
442

443
set bsun_bit,		7			# branch on unordered
444
set snan_bit,		6			# signalling NAN
445
set operr_bit,		5			# operand error
446
set ovfl_bit,		4			# overflow
447
set unfl_bit,		3			# underflow
448
set dz_bit,		2			# divide by zero
449
set inex2_bit,		1			# inexact result 2
450
set inex1_bit,		0			# inexact result 1
451

452
set aiop_bit,		7			# accrued inexact operation bit
453
set aovfl_bit,		6			# accrued overflow bit
454
set aunfl_bit,		5			# accrued underflow bit
455
set adz_bit,		4			# accrued dz bit
456
set ainex_bit,		3			# accrued inexact bit
457

458
#############################
459
# FPSR individual bit masks #
460
#############################
461
set neg_mask,		0x08000000		# negative bit mask (lw)
462
set inf_mask,		0x02000000		# infinity bit mask (lw)
463
set z_mask,		0x04000000		# zero bit mask (lw)
464
set nan_mask,		0x01000000		# nan bit mask (lw)
465

466
set neg_bmask,		0x08			# negative bit mask (byte)
467
set inf_bmask,		0x02			# infinity bit mask (byte)
468
set z_bmask,		0x04			# zero bit mask (byte)
469
set nan_bmask,		0x01			# nan bit mask (byte)
470

471
set bsun_mask,		0x00008000		# bsun exception mask
472
set snan_mask,		0x00004000		# snan exception mask
473
set operr_mask,		0x00002000		# operr exception mask
474
set ovfl_mask,		0x00001000		# overflow exception mask
475
set unfl_mask,		0x00000800		# underflow exception mask
476
set dz_mask,		0x00000400		# dz exception mask
477
set inex2_mask,		0x00000200		# inex2 exception mask
478
set inex1_mask,		0x00000100		# inex1 exception mask
479

480
set aiop_mask,		0x00000080		# accrued illegal operation
481
set aovfl_mask,		0x00000040		# accrued overflow
482
set aunfl_mask,		0x00000020		# accrued underflow
483
set adz_mask,		0x00000010		# accrued divide by zero
484
set ainex_mask,		0x00000008		# accrued inexact
485

486
######################################
487
# FPSR combinations used in the FPSP #
488
######################################
489
set dzinf_mask,		inf_mask+dz_mask+adz_mask
490
set opnan_mask,		nan_mask+operr_mask+aiop_mask
491
set nzi_mask,		0x01ffffff		#clears N, Z, and I
492
set unfinx_mask,	unfl_mask+inex2_mask+aunfl_mask+ainex_mask
493
set unf2inx_mask,	unfl_mask+inex2_mask+ainex_mask
494
set ovfinx_mask,	ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
495
set inx1a_mask,		inex1_mask+ainex_mask
496
set inx2a_mask,		inex2_mask+ainex_mask
497
set snaniop_mask,	nan_mask+snan_mask+aiop_mask
498
set snaniop2_mask,	snan_mask+aiop_mask
499
set naniop_mask,	nan_mask+aiop_mask
500
set neginf_mask,	neg_mask+inf_mask
501
set infaiop_mask,	inf_mask+aiop_mask
502
set negz_mask,		neg_mask+z_mask
503
set opaop_mask,		operr_mask+aiop_mask
504
set unfl_inx_mask,	unfl_mask+aunfl_mask+ainex_mask
505
set ovfl_inx_mask,	ovfl_mask+aovfl_mask+ainex_mask
506

507
#########
508
# misc. #
509
#########
510
set rnd_stky_bit,	29			# stky bit pos in longword
511

512
set sign_bit,		0x7			# sign bit
513
set signan_bit,		0x6			# signalling nan bit
514

515
set sgl_thresh,		0x3f81			# minimum sgl exponent
516
set dbl_thresh,		0x3c01			# minimum dbl exponent
517

518
set x_mode,		0x0			# extended precision
519
set s_mode,		0x4			# single precision
520
set d_mode,		0x8			# double precision
521

522
set rn_mode,		0x0			# round-to-nearest
523
set rz_mode,		0x1			# round-to-zero
524
set rm_mode,		0x2			# round-tp-minus-infinity
525
set rp_mode,		0x3			# round-to-plus-infinity
526

527
set mantissalen,	64			# length of mantissa in bits
528

529
set BYTE,		1			# len(byte) == 1 byte
530
set WORD,		2			# len(word) == 2 bytes
531
set LONG,		4			# len(longword) == 2 bytes
532

533
set BSUN_VEC,		0xc0			# bsun    vector offset
534
set INEX_VEC,		0xc4			# inexact vector offset
535
set DZ_VEC,		0xc8			# dz      vector offset
536
set UNFL_VEC,		0xcc			# unfl    vector offset
537
set OPERR_VEC,		0xd0			# operr   vector offset
538
set OVFL_VEC,		0xd4			# ovfl    vector offset
539
set SNAN_VEC,		0xd8			# snan    vector offset
540

541
###########################
542
# SPecial CONDition FLaGs #
543
###########################
544
set ftrapcc_flg,	0x01			# flag bit: ftrapcc exception
545
set fbsun_flg,		0x02			# flag bit: bsun exception
546
set mia7_flg,		0x04			# flag bit: (a7)+ <ea>
547
set mda7_flg,		0x08			# flag bit: -(a7) <ea>
548
set fmovm_flg,		0x40			# flag bit: fmovm instruction
549
set immed_flg,		0x80			# flag bit: &<data> <ea>
550

551
set ftrapcc_bit,	0x0
552
set fbsun_bit,		0x1
553
set mia7_bit,		0x2
554
set mda7_bit,		0x3
555
set immed_bit,		0x7
556

557
##################################
558
# TRANSCENDENTAL "LAST-OP" FLAGS #
559
##################################
560
set FMUL_OP,		0x0			# fmul instr performed last
561
set FDIV_OP,		0x1			# fdiv performed last
562
set FADD_OP,		0x2			# fadd performed last
563
set FMOV_OP,		0x3			# fmov performed last
564

565
#############
566
# CONSTANTS #
567
#############
568
T1:	long		0x40C62D38,0xD3D64634	# 16381 LOG2 LEAD
569
T2:	long		0x3D6F90AE,0xB1E75CC7	# 16381 LOG2 TRAIL
570

571
PI:	long		0x40000000,0xC90FDAA2,0x2168C235,0x00000000
572
PIBY2:	long		0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
573

574
TWOBYPI:
575
	long		0x3FE45F30,0x6DC9C883
576

577
#########################################################################
578
# XDEF ****************************************************************	#
579
#	_fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.	#
580
#									#
581
#	This handler should be the first code executed upon taking the	#
582
#	FP Overflow exception in an operating system.			#
583
#									#
584
# XREF ****************************************************************	#
585
#	_imem_read_long() - read instruction longword			#
586
#	fix_skewed_ops() - adjust src operand in fsave frame		#
587
#	set_tag_x() - determine optype of src/dst operands		#
588
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
589
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
590
#	load_fpn2() - load dst operand from FP regfile			#
591
#	fout() - emulate an opclass 3 instruction			#
592
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
593
#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
594
#	_real_ovfl() - "callout" for Overflow exception enabled code	#
595
#	_real_inex() - "callout" for Inexact exception enabled code	#
596
#	_real_trace() - "callout" for Trace exception code		#
597
#									#
598
# INPUT ***************************************************************	#
599
#	- The system stack contains the FP Ovfl exception stack frame	#
600
#	- The fsave frame contains the source operand			#
601
#									#
602
# OUTPUT **************************************************************	#
603
#	Overflow Exception enabled:					#
604
#	- The system stack is unchanged					#
605
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
606
#	Overflow Exception disabled:					#
607
#	- The system stack is unchanged					#
608
#	- The "exception present" flag in the fsave frame is cleared	#
609
#									#
610
# ALGORITHM ***********************************************************	#
611
#	On the 060, if an FP overflow is present as the result of any	#
612
# instruction, the 060 will take an overflow exception whether the	#
613
# exception is enabled or disabled in the FPCR. For the disabled case,	#
614
# This handler emulates the instruction to determine what the correct	#
615
# default result should be for the operation. This default result is	#
616
# then stored in either the FP regfile, data regfile, or memory.	#
617
# Finally, the handler exits through the "callout" _fpsp_done()		#
618
# denoting that no exceptional conditions exist within the machine.	#
619
#	If the exception is enabled, then this handler must create the	#
620
# exceptional operand and plave it in the fsave state frame, and store	#
621
# the default result (only if the instruction is opclass 3). For	#
622
# exceptions enabled, this handler must exit through the "callout"	#
623
# _real_ovfl() so that the operating system enabled overflow handler	#
624
# can handle this case.							#
625
#	Two other conditions exist. First, if overflow was disabled	#
626
# but the inexact exception was enabled, this handler must exit		#
627
# through the "callout" _real_inex() regardless of whether the result	#
628
# was inexact.								#
629
#	Also, in the case of an opclass three instruction where		#
630
# overflow was disabled and the trace exception was enabled, this	#
631
# handler must exit through the "callout" _real_trace().		#
632
#									#
633
#########################################################################
634

635
	global		_fpsp_ovfl
636
_fpsp_ovfl:
637

638
#$#	sub.l		&24,%sp			# make room for src/dst
639

640
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
641

642
	fsave		FP_SRC(%a6)		# grab the "busy" frame
643

644
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
645
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
646
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
647

648
# the FPIAR holds the "current PC" of the faulting instruction
649
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
650
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
651
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
652
	bsr.l		_imem_read_long		# fetch the instruction words
653
	mov.l		%d0,EXC_OPWORD(%a6)
654

655
##############################################################################
656

657
	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
658
	bne.w		fovfl_out
659

660

661
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
662
	bsr.l		fix_skewed_ops		# fix src op
663

664
# since, I believe, only NORMs and DENORMs can come through here,
665
# maybe we can avoid the subroutine call.
666
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
667
	bsr.l		set_tag_x		# tag the operand type
668
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
669

670
# bit five of the fp extension word separates the monadic and dyadic operations
671
# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
672
# will never take this exception.
673
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
674
	beq.b		fovfl_extract		# monadic
675

676
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
677
	bsr.l		load_fpn2		# load dst into FP_DST
678

679
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
680
	bsr.l		set_tag_x		# tag the operand type
681
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
682
	bne.b		fovfl_op2_done		# no
683
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
684
fovfl_op2_done:
685
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
686

687
fovfl_extract:
688

689
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
690
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
691
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
692
#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
693
#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
694
#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
695

696
	clr.l		%d0
697
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
698

699
	mov.b		1+EXC_CMDREG(%a6),%d1
700
	andi.w		&0x007f,%d1		# extract extension
701

702
	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
703

704
	fmov.l		&0x0,%fpcr		# zero current control regs
705
	fmov.l		&0x0,%fpsr
706

707
	lea		FP_SRC(%a6),%a0
708
	lea		FP_DST(%a6),%a1
709

710
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
711
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
712
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
713

714
# the operation has been emulated. the result is in fp0.
715
# the EXOP, if an exception occurred, is in fp1.
716
# we must save the default result regardless of whether
717
# traps are enabled or disabled.
718
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
719
	bsr.l		store_fpreg
720

721
# the exceptional possibilities we have left ourselves with are ONLY overflow
722
# and inexact. and, the inexact is such that overflow occurred and was disabled
723
# but inexact was enabled.
724
	btst		&ovfl_bit,FPCR_ENABLE(%a6)
725
	bne.b		fovfl_ovfl_on
726

727
	btst		&inex2_bit,FPCR_ENABLE(%a6)
728
	bne.b		fovfl_inex_on
729

730
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
731
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
732
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
733

734
	unlk		%a6
735
#$#	add.l		&24,%sp
736
	bra.l		_fpsp_done
737

738
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
739
# in fp1. now, simply jump to _real_ovfl()!
740
fovfl_ovfl_on:
741
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
742

743
	mov.w		&0xe005,2+FP_SRC(%a6)	# save exc status
744

745
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
746
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
747
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
748

749
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
750

751
	unlk		%a6
752

753
	bra.l		_real_ovfl
754

755
# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
756
# we must jump to real_inex().
757
fovfl_inex_on:
758

759
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
760

761
	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
762
	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
763

764
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
765
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
766
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
767

768
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
769

770
	unlk		%a6
771

772
	bra.l		_real_inex
773

774
########################################################################
775
fovfl_out:
776

777

778
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
779
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
780
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
781

782
# the src operand is definitely a NORM(!), so tag it as such
783
	mov.b		&NORM,STAG(%a6)		# set src optype tag
784

785
	clr.l		%d0
786
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
787

788
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
789

790
	fmov.l		&0x0,%fpcr		# zero current control regs
791
	fmov.l		&0x0,%fpsr
792

793
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
794

795
	bsr.l		fout
796

797
	btst		&ovfl_bit,FPCR_ENABLE(%a6)
798
	bne.w		fovfl_ovfl_on
799

800
	btst		&inex2_bit,FPCR_ENABLE(%a6)
801
	bne.w		fovfl_inex_on
802

803
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
804
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
805
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
806

807
	unlk		%a6
808
#$#	add.l		&24,%sp
809

810
	btst		&0x7,(%sp)		# is trace on?
811
	beq.l		_fpsp_done		# no
812

813
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
814
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
815
	bra.l		_real_trace
816

817
#########################################################################
818
# XDEF ****************************************************************	#
819
#	_fpsp_unfl(): 060FPSP entry point for FP Underflow exception.	#
820
#									#
821
#	This handler should be the first code executed upon taking the	#
822
#	FP Underflow exception in an operating system.			#
823
#									#
824
# XREF ****************************************************************	#
825
#	_imem_read_long() - read instruction longword			#
826
#	fix_skewed_ops() - adjust src operand in fsave frame		#
827
#	set_tag_x() - determine optype of src/dst operands		#
828
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
829
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
830
#	load_fpn2() - load dst operand from FP regfile			#
831
#	fout() - emulate an opclass 3 instruction			#
832
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
833
#	_fpsp_done() - "callout" for 060FPSP exit (all work done!)	#
834
#	_real_ovfl() - "callout" for Overflow exception enabled code	#
835
#	_real_inex() - "callout" for Inexact exception enabled code	#
836
#	_real_trace() - "callout" for Trace exception code		#
837
#									#
838
# INPUT ***************************************************************	#
839
#	- The system stack contains the FP Unfl exception stack frame	#
840
#	- The fsave frame contains the source operand			#
841
#									#
842
# OUTPUT **************************************************************	#
843
#	Underflow Exception enabled:					#
844
#	- The system stack is unchanged					#
845
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
846
#	Underflow Exception disabled:					#
847
#	- The system stack is unchanged					#
848
#	- The "exception present" flag in the fsave frame is cleared	#
849
#									#
850
# ALGORITHM ***********************************************************	#
851
#	On the 060, if an FP underflow is present as the result of any	#
852
# instruction, the 060 will take an underflow exception whether the	#
853
# exception is enabled or disabled in the FPCR. For the disabled case,	#
854
# This handler emulates the instruction to determine what the correct	#
855
# default result should be for the operation. This default result is	#
856
# then stored in either the FP regfile, data regfile, or memory.	#
857
# Finally, the handler exits through the "callout" _fpsp_done()		#
858
# denoting that no exceptional conditions exist within the machine.	#
859
#	If the exception is enabled, then this handler must create the	#
860
# exceptional operand and plave it in the fsave state frame, and store	#
861
# the default result (only if the instruction is opclass 3). For	#
862
# exceptions enabled, this handler must exit through the "callout"	#
863
# _real_unfl() so that the operating system enabled overflow handler	#
864
# can handle this case.							#
865
#	Two other conditions exist. First, if underflow was disabled	#
866
# but the inexact exception was enabled and the result was inexact,	#
867
# this handler must exit through the "callout" _real_inex().		#
868
# was inexact.								#
869
#	Also, in the case of an opclass three instruction where		#
870
# underflow was disabled and the trace exception was enabled, this	#
871
# handler must exit through the "callout" _real_trace().		#
872
#									#
873
#########################################################################
874

875
	global		_fpsp_unfl
876
_fpsp_unfl:
877

878
#$#	sub.l		&24,%sp			# make room for src/dst
879

880
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
881

882
	fsave		FP_SRC(%a6)		# grab the "busy" frame
883

884
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
885
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
886
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
887

888
# the FPIAR holds the "current PC" of the faulting instruction
889
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
890
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
891
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
892
	bsr.l		_imem_read_long		# fetch the instruction words
893
	mov.l		%d0,EXC_OPWORD(%a6)
894

895
##############################################################################
896

897
	btst		&0x5,EXC_CMDREG(%a6)	# is instr an fmove out?
898
	bne.w		funfl_out
899

900

901
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
902
	bsr.l		fix_skewed_ops		# fix src op
903

904
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
905
	bsr.l		set_tag_x		# tag the operand type
906
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
907

908
# bit five of the fp ext word separates the monadic and dyadic operations
909
# that can pass through fpsp_unfl(). remember that fcmp, and ftst
910
# will never take this exception.
911
	btst		&0x5,1+EXC_CMDREG(%a6)	# is op monadic or dyadic?
912
	beq.b		funfl_extract		# monadic
913

914
# now, what's left that's not dyadic is fsincos. we can distinguish it
915
# from all dyadics by the '0110xxx pattern
916
	btst		&0x4,1+EXC_CMDREG(%a6)	# is op an fsincos?
917
	bne.b		funfl_extract		# yes
918

919
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
920
	bsr.l		load_fpn2		# load dst into FP_DST
921

922
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
923
	bsr.l		set_tag_x		# tag the operand type
924
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
925
	bne.b		funfl_op2_done		# no
926
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
927
funfl_op2_done:
928
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
929

930
funfl_extract:
931

932
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
933
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
934
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
935
#$#	mov.l		FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
936
#$#	mov.l		FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
937
#$#	mov.l		FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
938

939
	clr.l		%d0
940
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
941

942
	mov.b		1+EXC_CMDREG(%a6),%d1
943
	andi.w		&0x007f,%d1		# extract extension
944

945
	andi.l		&0x00ff01ff,USER_FPSR(%a6)
946

947
	fmov.l		&0x0,%fpcr		# zero current control regs
948
	fmov.l		&0x0,%fpsr
949

950
	lea		FP_SRC(%a6),%a0
951
	lea		FP_DST(%a6),%a1
952

953
# maybe we can make these entry points ONLY the OVFL entry points of each routine.
954
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
955
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
956

957
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
958
	bsr.l		store_fpreg
959

960
# The `060 FPU multiplier hardware is such that if the result of a
961
# multiply operation is the smallest possible normalized number
962
# (0x00000000_80000000_00000000), then the machine will take an
963
# underflow exception. Since this is incorrect, we need to check
964
# if our emulation, after re-doing the operation, decided that
965
# no underflow was called for. We do these checks only in
966
# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
967
# special case will simply exit gracefully with the correct result.
968

969
# the exceptional possibilities we have left ourselves with are ONLY overflow
970
# and inexact. and, the inexact is such that overflow occurred and was disabled
971
# but inexact was enabled.
972
	btst		&unfl_bit,FPCR_ENABLE(%a6)
973
	bne.b		funfl_unfl_on
974

975
funfl_chkinex:
976
	btst		&inex2_bit,FPCR_ENABLE(%a6)
977
	bne.b		funfl_inex_on
978

979
funfl_exit:
980
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
981
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
982
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
983

984
	unlk		%a6
985
#$#	add.l		&24,%sp
986
	bra.l		_fpsp_done
987

988
# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
989
# in fp1 (don't forget to save fp0). what to do now?
990
# well, we simply have to get to go to _real_unfl()!
991
funfl_unfl_on:
992

993
# The `060 FPU multiplier hardware is such that if the result of a
994
# multiply operation is the smallest possible normalized number
995
# (0x00000000_80000000_00000000), then the machine will take an
996
# underflow exception. Since this is incorrect, we check here to see
997
# if our emulation, after re-doing the operation, decided that
998
# no underflow was called for.
999
	btst		&unfl_bit,FPSR_EXCEPT(%a6)
1000
	beq.w		funfl_chkinex
1001

1002
funfl_unfl_on2:
1003
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP (fp1) to stack
1004

1005
	mov.w		&0xe003,2+FP_SRC(%a6)	# save exc status
1006

1007
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1008
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1010

1011
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1012

1013
	unlk		%a6
1014

1015
	bra.l		_real_unfl
1016

1017
# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018
# we must jump to real_inex().
1019
funfl_inex_on:
1020

1021
# The `060 FPU multiplier hardware is such that if the result of a
1022
# multiply operation is the smallest possible normalized number
1023
# (0x00000000_80000000_00000000), then the machine will take an
1024
# underflow exception.
1025
# But, whether bogus or not, if inexact is enabled AND it occurred,
1026
# then we have to branch to real_inex.
1027

1028
	btst		&inex2_bit,FPSR_EXCEPT(%a6)
1029
	beq.w		funfl_exit
1030

1031
funfl_inex_on2:
1032

1033
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to stack
1034

1035
	mov.b		&0xc4,1+EXC_VOFF(%a6)	# vector offset = 0xc4
1036
	mov.w		&0xe001,2+FP_SRC(%a6)	# save exc status
1037

1038
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1039
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1041

1042
	frestore	FP_SRC(%a6)		# do this after fmovm,other f<op>s!
1043

1044
	unlk		%a6
1045

1046
	bra.l		_real_inex
1047

1048
#######################################################################
1049
funfl_out:
1050

1051

1052
#$#	mov.l		FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053
#$#	mov.l		FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054
#$#	mov.l		FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055

1056
# the src operand is definitely a NORM(!), so tag it as such
1057
	mov.b		&NORM,STAG(%a6)		# set src optype tag
1058

1059
	clr.l		%d0
1060
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
1061

1062
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063

1064
	fmov.l		&0x0,%fpcr		# zero current control regs
1065
	fmov.l		&0x0,%fpsr
1066

1067
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1068

1069
	bsr.l		fout
1070

1071
	btst		&unfl_bit,FPCR_ENABLE(%a6)
1072
	bne.w		funfl_unfl_on2
1073

1074
	btst		&inex2_bit,FPCR_ENABLE(%a6)
1075
	bne.w		funfl_inex_on2
1076

1077
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
1078
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1080

1081
	unlk		%a6
1082
#$#	add.l		&24,%sp
1083

1084
	btst		&0x7,(%sp)		# is trace on?
1085
	beq.l		_fpsp_done		# no
1086

1087
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
1088
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
1089
	bra.l		_real_trace
1090

1091
#########################################################################
1092
# XDEF ****************************************************************	#
1093
#	_fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented	#
1094
#		        Data Type" exception.				#
1095
#									#
1096
#	This handler should be the first code executed upon taking the	#
1097
#	FP Unimplemented Data Type exception in an operating system.	#
1098
#									#
1099
# XREF ****************************************************************	#
1100
#	_imem_read_{word,long}() - read instruction word/longword	#
1101
#	fix_skewed_ops() - adjust src operand in fsave frame		#
1102
#	set_tag_x() - determine optype of src/dst operands		#
1103
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
1104
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
1105
#	load_fpn2() - load dst operand from FP regfile			#
1106
#	load_fpn1() - load src operand from FP regfile			#
1107
#	fout() - emulate an opclass 3 instruction			#
1108
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
1109
#	_real_inex() - "callout" to operating system inexact handler	#
1110
#	_fpsp_done() - "callout" for exit; work all done		#
1111
#	_real_trace() - "callout" for Trace enabled exception		#
1112
#	funimp_skew() - adjust fsave src ops to "incorrect" value	#
1113
#	_real_snan() - "callout" for SNAN exception			#
1114
#	_real_operr() - "callout" for OPERR exception			#
1115
#	_real_ovfl() - "callout" for OVFL exception			#
1116
#	_real_unfl() - "callout" for UNFL exception			#
1117
#	get_packed() - fetch packed operand from memory			#
1118
#									#
1119
# INPUT ***************************************************************	#
1120
#	- The system stack contains the "Unimp Data Type" stk frame	#
1121
#	- The fsave frame contains the ssrc op (for UNNORM/DENORM)	#
1122
#									#
1123
# OUTPUT **************************************************************	#
1124
#	If Inexact exception (opclass 3):				#
1125
#	- The system stack is changed to an Inexact exception stk frame	#
1126
#	If SNAN exception (opclass 3):					#
1127
#	- The system stack is changed to an SNAN exception stk frame	#
1128
#	If OPERR exception (opclass 3):					#
1129
#	- The system stack is changed to an OPERR exception stk frame	#
1130
#	If OVFL exception (opclass 3):					#
1131
#	- The system stack is changed to an OVFL exception stk frame	#
1132
#	If UNFL exception (opclass 3):					#
1133
#	- The system stack is changed to an UNFL exception stack frame	#
1134
#	If Trace exception enabled:					#
1135
#	- The system stack is changed to a Trace exception stack frame	#
1136
#	Else: (normal case)						#
1137
#	- Correct result has been stored as appropriate			#
1138
#									#
1139
# ALGORITHM ***********************************************************	#
1140
#	Two main instruction types can enter here: (1) DENORM or UNNORM	#
1141
# unimplemented data types. These can be either opclass 0,2 or 3	#
1142
# instructions, and (2) PACKED unimplemented data format instructions	#
1143
# also of opclasses 0,2, or 3.						#
1144
#	For UNNORM/DENORM opclass 0 and 2, the handler fetches the src	#
1145
# operand from the fsave state frame and the dst operand (if dyadic)	#
1146
# from the FP register file. The instruction is then emulated by	#
1147
# choosing an emulation routine from a table of routines indexed by	#
1148
# instruction type. Once the instruction has been emulated and result	#
1149
# saved, then we check to see if any enabled exceptions resulted from	#
1150
# instruction emulation. If none, then we exit through the "callout"	#
1151
# _fpsp_done(). If there is an enabled FP exception, then we insert	#
1152
# this exception into the FPU in the fsave state frame and then exit	#
1153
# through _fpsp_done().							#
1154
#	PACKED opclass 0 and 2 is similar in how the instruction is	#
1155
# emulated and exceptions handled. The differences occur in how the	#
1156
# handler loads the packed op (by calling get_packed() routine) and	#
1157
# by the fact that a Trace exception could be pending for PACKED ops.	#
1158
# If a Trace exception is pending, then the current exception stack	#
1159
# frame is changed to a Trace exception stack frame and an exit is	#
1160
# made through _real_trace().						#
1161
#	For UNNORM/DENORM opclass 3, the actual move out to memory is	#
1162
# performed by calling the routine fout(). If no exception should occur	#
1163
# as the result of emulation, then an exit either occurs through	#
1164
# _fpsp_done() or through _real_trace() if a Trace exception is pending	#
1165
# (a Trace stack frame must be created here, too). If an FP exception	#
1166
# should occur, then we must create an exception stack frame of that	#
1167
# type and jump to either _real_snan(), _real_operr(), _real_inex(),	#
1168
# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3	#
1169
# emulation is performed in a similar manner.				#
1170
#									#
1171
#########################################################################
1172

1173
#
1174
# (1) DENORM and UNNORM (unimplemented) data types:
1175
#
1176
#				post-instruction
1177
#				*****************
1178
#				*      EA	*
1179
#	 pre-instruction	*		*
1180
#	*****************	*****************
1181
#	* 0x0 *  0x0dc  *	* 0x3 *  0x0dc  *
1182
#	*****************	*****************
1183
#	*     Next	*	*     Next	*
1184
#	*      PC	*	*      PC	*
1185
#	*****************	*****************
1186
#	*      SR	*	*      SR	*
1187
#	*****************	*****************
1188
#
1189
# (2) PACKED format (unsupported) opclasses two and three:
1190
#	*****************
1191
#	*      EA	*
1192
#	*		*
1193
#	*****************
1194
#	* 0x2 *  0x0dc	*
1195
#	*****************
1196
#	*     Next	*
1197
#	*      PC	*
1198
#	*****************
1199
#	*      SR	*
1200
#	*****************
1201
#
1202
	global		_fpsp_unsupp
1203
_fpsp_unsupp:
1204

1205
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
1206

1207
	fsave		FP_SRC(%a6)		# save fp state
1208

1209
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
1210
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
1212

1213
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
1214
	bne.b		fu_s
1215
fu_u:
1216
	mov.l		%usp,%a0		# fetch user stack pointer
1217
	mov.l		%a0,EXC_A7(%a6)		# save on stack
1218
	bra.b		fu_cont
1219
# if the exception is an opclass zero or two unimplemented data type
1220
# exception, then the a7' calculated here is wrong since it doesn't
1221
# stack an ea. however, we don't need an a7' for this case anyways.
1222
fu_s:
1223
	lea		0x4+EXC_EA(%a6),%a0	# load old a7'
1224
	mov.l		%a0,EXC_A7(%a6)		# save on stack
1225

1226
fu_cont:
1227

1228
# the FPIAR holds the "current PC" of the faulting instruction
1229
# the FPIAR should be set correctly for ALL exceptions passing through
1230
# this point.
1231
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
1233
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
1234
	bsr.l		_imem_read_long		# fetch the instruction words
1235
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
1236

1237
############################
1238

1239
	clr.b		SPCOND_FLG(%a6)		# clear special condition flag
1240

1241
# Separate opclass three (fpn-to-mem) ops since they have a different
1242
# stack frame and protocol.
1243
	btst		&0x5,EXC_CMDREG(%a6)	# is it an fmove out?
1244
	bne.w		fu_out			# yes
1245

1246
# Separate packed opclass two instructions.
1247
	bfextu		EXC_CMDREG(%a6){&0:&6},%d0
1248
	cmpi.b		%d0,&0x13
1249
	beq.w		fu_in_pack
1250

1251

1252
# I'm not sure at this point what FPSR bits are valid for this instruction.
1253
# so, since the emulation routines re-create them anyways, zero exception field
1254
	andi.l		&0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255

1256
	fmov.l		&0x0,%fpcr		# zero current control regs
1257
	fmov.l		&0x0,%fpsr
1258

1259
# Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260
# precision format if the src format was single or double and the
1261
# source data type was an INF, NAN, DENORM, or UNNORM
1262
	lea		FP_SRC(%a6),%a0		# pass ptr to input
1263
	bsr.l		fix_skewed_ops
1264

1265
# we don't know whether the src operand or the dst operand (or both) is the
1266
# UNNORM or DENORM. call the function that tags the operand type. if the
1267
# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
1269
	bsr.l		set_tag_x		# tag the operand type
1270
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1271
	bne.b		fu_op2			# no
1272
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1273

1274
fu_op2:
1275
	mov.b		%d0,STAG(%a6)		# save src optype tag
1276

1277
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278

1279
# bit five of the fp extension word separates the monadic and dyadic operations
1280
# at this point
1281
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1282
	beq.b		fu_extract		# monadic
1283
	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1284
	beq.b		fu_extract		# yes, so it's monadic, too
1285

1286
	bsr.l		load_fpn2		# load dst into FP_DST
1287

1288
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1289
	bsr.l		set_tag_x		# tag the operand type
1290
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1291
	bne.b		fu_op2_done		# no
1292
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1293
fu_op2_done:
1294
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1295

1296
fu_extract:
1297
	clr.l		%d0
1298
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1299

1300
	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301

1302
	lea		FP_SRC(%a6),%a0
1303
	lea		FP_DST(%a6),%a1
1304

1305
	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1307

1308
#
1309
# Exceptions in order of precedence:
1310
#	BSUN	: none
1311
#	SNAN	: all dyadic ops
1312
#	OPERR	: fsqrt(-NORM)
1313
#	OVFL	: all except ftst,fcmp
1314
#	UNFL	: all except ftst,fcmp
1315
#	DZ	: fdiv
1316
#	INEX2	: all except ftst,fcmp
1317
#	INEX1	: none (packed doesn't go through here)
1318
#
1319

1320
# we determine the highest priority exception(if any) set by the
1321
# emulation routine that has also been enabled by the user.
1322
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions set
1323
	bne.b		fu_in_ena		# some are enabled
1324

1325
fu_in_cont:
1326
# fcmp and ftst do not store any result.
1327
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1328
	andi.b		&0x38,%d0		# extract bits 3-5
1329
	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1330
	beq.b		fu_in_exit		# yes
1331

1332
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333
	bsr.l		store_fpreg		# store the result
1334

1335
fu_in_exit:
1336

1337
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1338
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1340

1341
	unlk		%a6
1342

1343
	bra.l		_fpsp_done
1344

1345
fu_in_ena:
1346
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1347
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1348
	bne.b		fu_in_exc		# there is at least one set
1349

1350
#
1351
# No exceptions occurred that were also enabled. Now:
1352
#
1353
#	if (OVFL && ovfl_disabled && inexact_enabled) {
1354
#	    branch to _real_inex() (even if the result was exact!);
1355
#	} else {
1356
#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1357
#	    return;
1358
#	}
1359
#
1360
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361
	beq.b		fu_in_cont		# no
1362

1363
fu_in_ovflchk:
1364
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365
	beq.b		fu_in_cont		# no
1366
	bra.w		fu_in_exc_ovfl		# go insert overflow frame
1367

1368
#
1369
# An exception occurred and that exception was enabled:
1370
#
1371
#	shift enabled exception field into lo byte of d0;
1372
#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373
#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374
#		/*
1375
#		 * this is the case where we must call _real_inex() now or else
1376
#		 * there will be no other way to pass it the exceptional operand
1377
#		 */
1378
#		call _real_inex();
1379
#	} else {
1380
#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381
#	}
1382
#
1383
fu_in_exc:
1384
	subi.l		&24,%d0			# fix offset to be 0-8
1385
	cmpi.b		%d0,&0x6		# is exception INEX? (6)
1386
	bne.b		fu_in_exc_exit		# no
1387

1388
# the enabled exception was inexact
1389
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390
	bne.w		fu_in_exc_unfl		# yes
1391
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392
	bne.w		fu_in_exc_ovfl		# yes
1393

1394
# here, we insert the correct fsave status value into the fsave frame for the
1395
# corresponding exception. the operand in the fsave frame should be the original
1396
# src operand.
1397
fu_in_exc_exit:
1398
	mov.l		%d0,-(%sp)		# save d0
1399
	bsr.l		funimp_skew		# skew sgl or dbl inputs
1400
	mov.l		(%sp)+,%d0		# restore d0
1401

1402
	mov.w		(tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403

1404
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1405
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1407

1408
	frestore	FP_SRC(%a6)		# restore src op
1409

1410
	unlk		%a6
1411

1412
	bra.l		_fpsp_done
1413

1414
tbl_except:
1415
	short		0xe000,0xe006,0xe004,0xe005
1416
	short		0xe003,0xe002,0xe001,0xe001
1417

1418
fu_in_exc_unfl:
1419
	mov.w		&0x4,%d0
1420
	bra.b		fu_in_exc_exit
1421
fu_in_exc_ovfl:
1422
	mov.w		&0x03,%d0
1423
	bra.b		fu_in_exc_exit
1424

1425
# If the input operand to this operation was opclass two and a single
1426
# or double precision denorm, inf, or nan, the operand needs to be
1427
# "corrected" in order to have the proper equivalent extended precision
1428
# number.
1429
	global		fix_skewed_ops
1430
fix_skewed_ops:
1431
	bfextu		EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432
	cmpi.b		%d0,&0x11		# is class = 2 & fmt = sgl?
1433
	beq.b		fso_sgl			# yes
1434
	cmpi.b		%d0,&0x15		# is class = 2 & fmt = dbl?
1435
	beq.b		fso_dbl			# yes
1436
	rts					# no
1437

1438
fso_sgl:
1439
	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1440
	andi.w		&0x7fff,%d0		# strip sign
1441
	cmpi.w		%d0,&0x3f80		# is |exp| == $3f80?
1442
	beq.b		fso_sgl_dnrm_zero	# yes
1443
	cmpi.w		%d0,&0x407f		# no; is |exp| == $407f?
1444
	beq.b		fso_infnan		# yes
1445
	rts					# no
1446

1447
fso_sgl_dnrm_zero:
1448
	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449
	beq.b		fso_zero		# it's a skewed zero
1450
fso_sgl_dnrm:
1451
# here, we count on norm not to alter a0...
1452
	bsr.l		norm			# normalize mantissa
1453
	neg.w		%d0			# -shft amt
1454
	addi.w		&0x3f81,%d0		# adjust new exponent
1455
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1456
	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1457
	rts
1458

1459
fso_zero:
1460
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear bogus exponent
1461
	rts
1462

1463
fso_infnan:
1464
	andi.b		&0x7f,LOCAL_HI(%a0)	# clear j-bit
1465
	ori.w		&0x7fff,LOCAL_EX(%a0)	# make exponent = $7fff
1466
	rts
1467

1468
fso_dbl:
1469
	mov.w		LOCAL_EX(%a0),%d0	# fetch src exponent
1470
	andi.w		&0x7fff,%d0		# strip sign
1471
	cmpi.w		%d0,&0x3c00		# is |exp| == $3c00?
1472
	beq.b		fso_dbl_dnrm_zero	# yes
1473
	cmpi.w		%d0,&0x43ff		# no; is |exp| == $43ff?
1474
	beq.b		fso_infnan		# yes
1475
	rts					# no
1476

1477
fso_dbl_dnrm_zero:
1478
	andi.l		&0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479
	bne.b		fso_dbl_dnrm		# it's a skewed denorm
1480
	tst.l		LOCAL_LO(%a0)		# is it a zero?
1481
	beq.b		fso_zero		# yes
1482
fso_dbl_dnrm:
1483
# here, we count on norm not to alter a0...
1484
	bsr.l		norm			# normalize mantissa
1485
	neg.w		%d0			# -shft amt
1486
	addi.w		&0x3c01,%d0		# adjust new exponent
1487
	andi.w		&0x8000,LOCAL_EX(%a0)	# clear old exponent
1488
	or.w		%d0,LOCAL_EX(%a0)	# insert new exponent
1489
	rts
1490

1491
#################################################################
1492

1493
# fmove out took an unimplemented data type exception.
1494
# the src operand is in FP_SRC. Call _fout() to write out the result and
1495
# to determine which exceptions, if any, to take.
1496
fu_out:
1497

1498
# Separate packed move outs from the UNNORM and DENORM move outs.
1499
	bfextu		EXC_CMDREG(%a6){&3:&3},%d0
1500
	cmpi.b		%d0,&0x3
1501
	beq.w		fu_out_pack
1502
	cmpi.b		%d0,&0x7
1503
	beq.w		fu_out_pack
1504

1505

1506
# I'm not sure at this point what FPSR bits are valid for this instruction.
1507
# so, since the emulation routines re-create them anyways, zero exception field.
1508
# fmove out doesn't affect ccodes.
1509
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
1510

1511
	fmov.l		&0x0,%fpcr		# zero current control regs
1512
	fmov.l		&0x0,%fpsr
1513

1514
# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515
# call here. just figure out what it is...
1516
	mov.w		FP_SRC_EX(%a6),%d0	# get exponent
1517
	andi.w		&0x7fff,%d0		# strip sign
1518
	beq.b		fu_out_denorm		# it's a DENORM
1519

1520
	lea		FP_SRC(%a6),%a0
1521
	bsr.l		unnorm_fix		# yes; fix it
1522

1523
	mov.b		%d0,STAG(%a6)
1524

1525
	bra.b		fu_out_cont
1526
fu_out_denorm:
1527
	mov.b		&DENORM,STAG(%a6)
1528
fu_out_cont:
1529

1530
	clr.l		%d0
1531
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1532

1533
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
1534

1535
	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
1536
	bsr.l		fout			# call fmove out routine
1537

1538
# Exceptions in order of precedence:
1539
#	BSUN	: none
1540
#	SNAN	: none
1541
#	OPERR	: fmove.{b,w,l} out of large UNNORM
1542
#	OVFL	: fmove.{s,d}
1543
#	UNFL	: fmove.{s,d,x}
1544
#	DZ	: none
1545
#	INEX2	: all
1546
#	INEX1	: none (packed doesn't travel through here)
1547

1548
# determine the highest priority exception(if any) set by the
1549
# emulation routine that has also been enabled by the user.
1550
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1551
	bne.w		fu_out_ena		# some are enabled
1552

1553
fu_out_done:
1554

1555
	mov.l		EXC_A6(%a6),(%a6)	# in case a6 changed
1556

1557
# on extended precision opclass three instructions using pre-decrement or
1558
# post-increment addressing mode, the address register is not updated. is the
1559
# address register was the stack pointer used from user mode, then let's update
1560
# it here. if it was used from supervisor mode, then we have to handle this
1561
# as a special case.
1562
	btst		&0x5,EXC_SR(%a6)
1563
	bne.b		fu_out_done_s
1564

1565
	mov.l		EXC_A7(%a6),%a0		# restore a7
1566
	mov.l		%a0,%usp
1567

1568
fu_out_done_cont:
1569
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1570
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1572

1573
	unlk		%a6
1574

1575
	btst		&0x7,(%sp)		# is trace on?
1576
	bne.b		fu_out_trace		# yes
1577

1578
	bra.l		_fpsp_done
1579

1580
# is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581
# ("fmov.x fpm,-(a7)") if so,
1582
fu_out_done_s:
1583
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
1584
	bne.b		fu_out_done_cont
1585

1586
# the extended precision result is still in fp0. but, we need to save it
1587
# somewhere on the stack until we can copy it to its final resting place.
1588
# here, we're counting on the top of the stack to be the old place-holders
1589
# for fp0/fp1 which have already been restored. that way, we can write
1590
# over those destinations with the shifted stack frame.
1591
	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1592

1593
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1594
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1596

1597
	mov.l		(%a6),%a6		# restore frame pointer
1598

1599
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601

1602
# now, copy the result to the proper place on the stack
1603
	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604
	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605
	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606

1607
	add.l		&LOCAL_SIZE-0x8,%sp
1608

1609
	btst		&0x7,(%sp)
1610
	bne.b		fu_out_trace
1611

1612
	bra.l		_fpsp_done
1613

1614
fu_out_ena:
1615
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
1616
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1617
	bne.b		fu_out_exc		# there is at least one set
1618

1619
# no exceptions were set.
1620
# if a disabled overflow occurred and inexact was enabled but the result
1621
# was exact, then a branch to _real_inex() is made.
1622
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623
	beq.w		fu_out_done		# no
1624

1625
fu_out_ovflchk:
1626
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627
	beq.w		fu_out_done		# no
1628
	bra.w		fu_inex			# yes
1629

1630
#
1631
# The fp move out that took the "Unimplemented Data Type" exception was
1632
# being traced. Since the stack frames are similar, get the "current" PC
1633
# from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634
#
1635
#		  UNSUPP FRAME		   TRACE FRAME
1636
#		*****************	*****************
1637
#		*      EA	*	*    Current	*
1638
#		*		*	*      PC	*
1639
#		*****************	*****************
1640
#		* 0x3 *  0x0dc	*	* 0x2 *  0x024	*
1641
#		*****************	*****************
1642
#		*     Next	*	*     Next	*
1643
#		*      PC	*	*      PC	*
1644
#		*****************	*****************
1645
#		*      SR	*	*      SR	*
1646
#		*****************	*****************
1647
#
1648
fu_out_trace:
1649
	mov.w		&0x2024,0x6(%sp)
1650
	fmov.l		%fpiar,0x8(%sp)
1651
	bra.l		_real_trace
1652

1653
# an exception occurred and that exception was enabled.
1654
fu_out_exc:
1655
	subi.l		&24,%d0			# fix offset to be 0-8
1656

1657
# we don't mess with the existing fsave frame. just re-insert it and
1658
# jump to the "_real_{}()" handler...
1659
	mov.w		(tbl_fu_out.b,%pc,%d0.w*2),%d0
1660
	jmp		(tbl_fu_out.b,%pc,%d0.w*1)
1661

1662
	swbeg		&0x8
1663
tbl_fu_out:
1664
	short		tbl_fu_out	- tbl_fu_out	# BSUN can't happen
1665
	short		tbl_fu_out	- tbl_fu_out	# SNAN can't happen
1666
	short		fu_operr	- tbl_fu_out	# OPERR
1667
	short		fu_ovfl		- tbl_fu_out	# OVFL
1668
	short		fu_unfl		- tbl_fu_out	# UNFL
1669
	short		tbl_fu_out	- tbl_fu_out	# DZ can't happen
1670
	short		fu_inex		- tbl_fu_out	# INEX2
1671
	short		tbl_fu_out	- tbl_fu_out	# INEX1 won't make it here
1672

1673
# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674
# frestore it.
1675
fu_snan:
1676
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1677
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1679

1680
	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd8
1681
	mov.w		&0xe006,2+FP_SRC(%a6)
1682

1683
	frestore	FP_SRC(%a6)
1684

1685
	unlk		%a6
1686

1687

1688
	bra.l		_real_snan
1689

1690
fu_operr:
1691
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1692
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1694

1695
	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
1696
	mov.w		&0xe004,2+FP_SRC(%a6)
1697

1698
	frestore	FP_SRC(%a6)
1699

1700
	unlk		%a6
1701

1702

1703
	bra.l		_real_operr
1704

1705
fu_ovfl:
1706
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1707

1708
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1709
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1711

1712
	mov.w		&0x30d4,EXC_VOFF(%a6)	# vector offset = 0xd4
1713
	mov.w		&0xe005,2+FP_SRC(%a6)
1714

1715
	frestore	FP_SRC(%a6)		# restore EXOP
1716

1717
	unlk		%a6
1718

1719
	bra.l		_real_ovfl
1720

1721
# underflow can happen for extended precision. extended precision opclass
1722
# three instruction exceptions don't update the stack pointer. so, if the
1723
# exception occurred from user mode, then simply update a7 and exit normally.
1724
# if the exception occurred from supervisor mode, check if
1725
fu_unfl:
1726
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
1727

1728
	btst		&0x5,EXC_SR(%a6)
1729
	bne.w		fu_unfl_s
1730

1731
	mov.l		EXC_A7(%a6),%a0		# restore a7 whether we need
1732
	mov.l		%a0,%usp		# to or not...
1733

1734
fu_unfl_cont:
1735
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1736

1737
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1738
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1740

1741
	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1742
	mov.w		&0xe003,2+FP_SRC(%a6)
1743

1744
	frestore	FP_SRC(%a6)		# restore EXOP
1745

1746
	unlk		%a6
1747

1748
	bra.l		_real_unfl
1749

1750
fu_unfl_s:
1751
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752
	bne.b		fu_unfl_cont
1753

1754
# the extended precision result is still in fp0. but, we need to save it
1755
# somewhere on the stack until we can copy it to its final resting place
1756
# (where the exc frame is currently). make sure it's not at the top of the
1757
# frame or it will get overwritten when the exc stack frame is shifted "down".
1758
	fmovm.x		&0x80,FP_SRC(%a6)	# put answer on stack
1759
	fmovm.x		&0x40,FP_DST(%a6)	# put EXOP on stack
1760

1761
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1762
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1764

1765
	mov.w		&0x30cc,EXC_VOFF(%a6)	# vector offset = 0xcc
1766
	mov.w		&0xe003,2+FP_DST(%a6)
1767

1768
	frestore	FP_DST(%a6)		# restore EXOP
1769

1770
	mov.l		(%a6),%a6		# restore frame pointer
1771

1772
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775

1776
# now, copy the result to the proper place on the stack
1777
	mov.l		LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778
	mov.l		LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779
	mov.l		LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780

1781
	add.l		&LOCAL_SIZE-0x8,%sp
1782

1783
	bra.l		_real_unfl
1784

1785
# fmove in and out enter here.
1786
fu_inex:
1787
	fmovm.x		&0x40,FP_SRC(%a6)	# save EXOP to the stack
1788

1789
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1790
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1792

1793
	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
1794
	mov.w		&0xe001,2+FP_SRC(%a6)
1795

1796
	frestore	FP_SRC(%a6)		# restore EXOP
1797

1798
	unlk		%a6
1799

1800

1801
	bra.l		_real_inex
1802

1803
#########################################################################
1804
#########################################################################
1805
fu_in_pack:
1806

1807

1808
# I'm not sure at this point what FPSR bits are valid for this instruction.
1809
# so, since the emulation routines re-create them anyways, zero exception field
1810
	andi.l		&0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811

1812
	fmov.l		&0x0,%fpcr		# zero current control regs
1813
	fmov.l		&0x0,%fpsr
1814

1815
	bsr.l		get_packed		# fetch packed src operand
1816

1817
	lea		FP_SRC(%a6),%a0		# pass ptr to src
1818
	bsr.l		set_tag_x		# set src optype tag
1819

1820
	mov.b		%d0,STAG(%a6)		# save src optype tag
1821

1822
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823

1824
# bit five of the fp extension word separates the monadic and dyadic operations
1825
# at this point
1826
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
1827
	beq.b		fu_extract_p		# monadic
1828
	cmpi.b		1+EXC_CMDREG(%a6),&0x3a	# is operation an ftst?
1829
	beq.b		fu_extract_p		# yes, so it's monadic, too
1830

1831
	bsr.l		load_fpn2		# load dst into FP_DST
1832

1833
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
1834
	bsr.l		set_tag_x		# tag the operand type
1835
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
1836
	bne.b		fu_op2_done_p		# no
1837
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
1838
fu_op2_done_p:
1839
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
1840

1841
fu_extract_p:
1842
	clr.l		%d0
1843
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
1844

1845
	bfextu		1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846

1847
	lea		FP_SRC(%a6),%a0
1848
	lea		FP_DST(%a6),%a1
1849

1850
	mov.l		(tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
1852

1853
#
1854
# Exceptions in order of precedence:
1855
#	BSUN	: none
1856
#	SNAN	: all dyadic ops
1857
#	OPERR	: fsqrt(-NORM)
1858
#	OVFL	: all except ftst,fcmp
1859
#	UNFL	: all except ftst,fcmp
1860
#	DZ	: fdiv
1861
#	INEX2	: all except ftst,fcmp
1862
#	INEX1	: all
1863
#
1864

1865
# we determine the highest priority exception(if any) set by the
1866
# emulation routine that has also been enabled by the user.
1867
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
1868
	bne.w		fu_in_ena_p		# some are enabled
1869

1870
fu_in_cont_p:
1871
# fcmp and ftst do not store any result.
1872
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch extension
1873
	andi.b		&0x38,%d0		# extract bits 3-5
1874
	cmpi.b		%d0,&0x38		# is instr fcmp or ftst?
1875
	beq.b		fu_in_exit_p		# yes
1876

1877
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878
	bsr.l		store_fpreg		# store the result
1879

1880
fu_in_exit_p:
1881

1882
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1883
	bne.w		fu_in_exit_s_p		# supervisor
1884

1885
	mov.l		EXC_A7(%a6),%a0		# update user a7
1886
	mov.l		%a0,%usp
1887

1888
fu_in_exit_cont_p:
1889
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1890
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1892

1893
	unlk		%a6			# unravel stack frame
1894

1895
	btst		&0x7,(%sp)		# is trace on?
1896
	bne.w		fu_trace_p		# yes
1897

1898
	bra.l		_fpsp_done		# exit to os
1899

1900
# the exception occurred in supervisor mode. check to see if the
1901
# addressing mode was (a7)+. if so, we'll need to shift the
1902
# stack frame "up".
1903
fu_in_exit_s_p:
1904
	btst		&mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905
	beq.b		fu_in_exit_cont_p	# no
1906

1907
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1908
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1910

1911
	unlk		%a6			# unravel stack frame
1912

1913
# shift the stack frame "up". we don't really care about the <ea> field.
1914
	mov.l		0x4(%sp),0x10(%sp)
1915
	mov.l		0x0(%sp),0xc(%sp)
1916
	add.l		&0xc,%sp
1917

1918
	btst		&0x7,(%sp)		# is trace on?
1919
	bne.w		fu_trace_p		# yes
1920

1921
	bra.l		_fpsp_done		# exit to os
1922

1923
fu_in_ena_p:
1924
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled & set
1925
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
1926
	bne.b		fu_in_exc_p		# at least one was set
1927

1928
#
1929
# No exceptions occurred that were also enabled. Now:
1930
#
1931
#	if (OVFL && ovfl_disabled && inexact_enabled) {
1932
#	    branch to _real_inex() (even if the result was exact!);
1933
#	} else {
1934
#	    save the result in the proper fp reg (unless the op is fcmp or ftst);
1935
#	    return;
1936
#	}
1937
#
1938
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939
	beq.w		fu_in_cont_p		# no
1940

1941
fu_in_ovflchk_p:
1942
	btst		&inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943
	beq.w		fu_in_cont_p		# no
1944
	bra.w		fu_in_exc_ovfl_p	# do _real_inex() now
1945

1946
#
1947
# An exception occurred and that exception was enabled:
1948
#
1949
#	shift enabled exception field into lo byte of d0;
1950
#	if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951
#	    ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952
#		/*
1953
#		 * this is the case where we must call _real_inex() now or else
1954
#		 * there will be no other way to pass it the exceptional operand
1955
#		 */
1956
#		call _real_inex();
1957
#	} else {
1958
#		restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959
#	}
1960
#
1961
fu_in_exc_p:
1962
	subi.l		&24,%d0			# fix offset to be 0-8
1963
	cmpi.b		%d0,&0x6		# is exception INEX? (6 or 7)
1964
	blt.b		fu_in_exc_exit_p	# no
1965

1966
# the enabled exception was inexact
1967
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968
	bne.w		fu_in_exc_unfl_p	# yes
1969
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970
	bne.w		fu_in_exc_ovfl_p	# yes
1971

1972
# here, we insert the correct fsave status value into the fsave frame for the
1973
# corresponding exception. the operand in the fsave frame should be the original
1974
# src operand.
1975
# as a reminder for future predicted pain and agony, we are passing in fsave the
1976
# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977
# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978
fu_in_exc_exit_p:
1979
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
1980
	bne.w		fu_in_exc_exit_s_p	# supervisor
1981

1982
	mov.l		EXC_A7(%a6),%a0		# update user a7
1983
	mov.l		%a0,%usp
1984

1985
fu_in_exc_exit_cont_p:
1986
	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987

1988
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
1989
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
1991

1992
	frestore	FP_SRC(%a6)		# restore src op
1993

1994
	unlk		%a6
1995

1996
	btst		&0x7,(%sp)		# is trace enabled?
1997
	bne.w		fu_trace_p		# yes
1998

1999
	bra.l		_fpsp_done
2000

2001
tbl_except_p:
2002
	short		0xe000,0xe006,0xe004,0xe005
2003
	short		0xe003,0xe002,0xe001,0xe001
2004

2005
fu_in_exc_ovfl_p:
2006
	mov.w		&0x3,%d0
2007
	bra.w		fu_in_exc_exit_p
2008

2009
fu_in_exc_unfl_p:
2010
	mov.w		&0x4,%d0
2011
	bra.w		fu_in_exc_exit_p
2012

2013
fu_in_exc_exit_s_p:
2014
	btst		&mia7_bit,SPCOND_FLG(%a6)
2015
	beq.b		fu_in_exc_exit_cont_p
2016

2017
	mov.w		(tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018

2019
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2020
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2022

2023
	frestore	FP_SRC(%a6)		# restore src op
2024

2025
	unlk		%a6			# unravel stack frame
2026

2027
# shift stack frame "up". who cares about <ea> field.
2028
	mov.l		0x4(%sp),0x10(%sp)
2029
	mov.l		0x0(%sp),0xc(%sp)
2030
	add.l		&0xc,%sp
2031

2032
	btst		&0x7,(%sp)		# is trace on?
2033
	bne.b		fu_trace_p		# yes
2034

2035
	bra.l		_fpsp_done		# exit to os
2036

2037
#
2038
# The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039
# exception was being traced. Make the "current" PC the FPIAR and put it in the
2040
# trace stack frame then jump to _real_trace().
2041
#
2042
#		  UNSUPP FRAME		   TRACE FRAME
2043
#		*****************	*****************
2044
#		*      EA	*	*    Current	*
2045
#		*		*	*      PC	*
2046
#		*****************	*****************
2047
#		* 0x2 *	0x0dc	*	* 0x2 *  0x024	*
2048
#		*****************	*****************
2049
#		*     Next	*	*     Next	*
2050
#		*      PC	*	*      PC	*
2051
#		*****************	*****************
2052
#		*      SR	*	*      SR	*
2053
#		*****************	*****************
2054
fu_trace_p:
2055
	mov.w		&0x2024,0x6(%sp)
2056
	fmov.l		%fpiar,0x8(%sp)
2057

2058
	bra.l		_real_trace
2059

2060
#########################################################
2061
#########################################################
2062
fu_out_pack:
2063

2064

2065
# I'm not sure at this point what FPSR bits are valid for this instruction.
2066
# so, since the emulation routines re-create them anyways, zero exception field.
2067
# fmove out doesn't affect ccodes.
2068
	and.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
2069

2070
	fmov.l		&0x0,%fpcr		# zero current control regs
2071
	fmov.l		&0x0,%fpsr
2072

2073
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
2074
	bsr.l		load_fpn1
2075

2076
# unlike other opclass 3, unimplemented data type exceptions, packed must be
2077
# able to detect all operand types.
2078
	lea		FP_SRC(%a6),%a0
2079
	bsr.l		set_tag_x		# tag the operand type
2080
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2081
	bne.b		fu_op2_p		# no
2082
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
2083

2084
fu_op2_p:
2085
	mov.b		%d0,STAG(%a6)		# save src optype tag
2086

2087
	clr.l		%d0
2088
	mov.b		FPCR_MODE(%a6),%d0	# fetch rnd mode/prec
2089

2090
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
2091

2092
	mov.l		(%a6),EXC_A6(%a6)	# in case a6 changes
2093
	bsr.l		fout			# call fmove out routine
2094

2095
# Exceptions in order of precedence:
2096
#	BSUN	: no
2097
#	SNAN	: yes
2098
#	OPERR	: if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099
#	OVFL	: no
2100
#	UNFL	: no
2101
#	DZ	: no
2102
#	INEX2	: yes
2103
#	INEX1	: no
2104

2105
# determine the highest priority exception(if any) set by the
2106
# emulation routine that has also been enabled by the user.
2107
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2108
	bne.w		fu_out_ena_p		# some are enabled
2109

2110
fu_out_exit_p:
2111
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2112

2113
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
2114
	bne.b		fu_out_exit_s_p		# supervisor
2115

2116
	mov.l		EXC_A7(%a6),%a0		# update user a7
2117
	mov.l		%a0,%usp
2118

2119
fu_out_exit_cont_p:
2120
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2121
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2123

2124
	unlk		%a6			# unravel stack frame
2125

2126
	btst		&0x7,(%sp)		# is trace on?
2127
	bne.w		fu_trace_p		# yes
2128

2129
	bra.l		_fpsp_done		# exit to os
2130

2131
# the exception occurred in supervisor mode. check to see if the
2132
# addressing mode was -(a7). if so, we'll need to shift the
2133
# stack frame "down".
2134
fu_out_exit_s_p:
2135
	btst		&mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136
	beq.b		fu_out_exit_cont_p	# no
2137

2138
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2139
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2141

2142
	mov.l		(%a6),%a6		# restore frame pointer
2143

2144
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146

2147
# now, copy the result to the proper place on the stack
2148
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151

2152
	add.l		&LOCAL_SIZE-0x8,%sp
2153

2154
	btst		&0x7,(%sp)
2155
	bne.w		fu_trace_p
2156

2157
	bra.l		_fpsp_done
2158

2159
fu_out_ena_p:
2160
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enabled
2161
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2162
	beq.w		fu_out_exit_p
2163

2164
	mov.l		EXC_A6(%a6),(%a6)	# restore a6
2165

2166
# an exception occurred and that exception was enabled.
2167
# the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168
fu_out_exc_p:
2169
	cmpi.b		%d0,&0x1a
2170
	bgt.w		fu_inex_p2
2171
	beq.w		fu_operr_p
2172

2173
fu_snan_p:
2174
	btst		&0x5,EXC_SR(%a6)
2175
	bne.b		fu_snan_s_p
2176

2177
	mov.l		EXC_A7(%a6),%a0
2178
	mov.l		%a0,%usp
2179
	bra.w		fu_snan
2180

2181
fu_snan_s_p:
2182
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2183
	bne.w		fu_snan
2184

2185
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186
# the strategy is to move the exception frame "down" 12 bytes. then, we
2187
# can store the default result where the exception frame was.
2188
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2189
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2191

2192
	mov.w		&0x30d8,EXC_VOFF(%a6)	# vector offset = 0xd0
2193
	mov.w		&0xe006,2+FP_SRC(%a6)	# set fsave status
2194

2195
	frestore	FP_SRC(%a6)		# restore src operand
2196

2197
	mov.l		(%a6),%a6		# restore frame pointer
2198

2199
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202

2203
# now, we copy the default result to its proper location
2204
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207

2208
	add.l		&LOCAL_SIZE-0x8,%sp
2209

2210

2211
	bra.l		_real_snan
2212

2213
fu_operr_p:
2214
	btst		&0x5,EXC_SR(%a6)
2215
	bne.w		fu_operr_p_s
2216

2217
	mov.l		EXC_A7(%a6),%a0
2218
	mov.l		%a0,%usp
2219
	bra.w		fu_operr
2220

2221
fu_operr_p_s:
2222
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2223
	bne.w		fu_operr
2224

2225
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226
# the strategy is to move the exception frame "down" 12 bytes. then, we
2227
# can store the default result where the exception frame was.
2228
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2229
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2231

2232
	mov.w		&0x30d0,EXC_VOFF(%a6)	# vector offset = 0xd0
2233
	mov.w		&0xe004,2+FP_SRC(%a6)	# set fsave status
2234

2235
	frestore	FP_SRC(%a6)		# restore src operand
2236

2237
	mov.l		(%a6),%a6		# restore frame pointer
2238

2239
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242

2243
# now, we copy the default result to its proper location
2244
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247

2248
	add.l		&LOCAL_SIZE-0x8,%sp
2249

2250

2251
	bra.l		_real_operr
2252

2253
fu_inex_p2:
2254
	btst		&0x5,EXC_SR(%a6)
2255
	bne.w		fu_inex_s_p2
2256

2257
	mov.l		EXC_A7(%a6),%a0
2258
	mov.l		%a0,%usp
2259
	bra.w		fu_inex
2260

2261
fu_inex_s_p2:
2262
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2263
	bne.w		fu_inex
2264

2265
# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266
# the strategy is to move the exception frame "down" 12 bytes. then, we
2267
# can store the default result where the exception frame was.
2268
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0/fp1
2269
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2271

2272
	mov.w		&0x30c4,EXC_VOFF(%a6)	# vector offset = 0xc4
2273
	mov.w		&0xe001,2+FP_SRC(%a6)	# set fsave status
2274

2275
	frestore	FP_SRC(%a6)		# restore src operand
2276

2277
	mov.l		(%a6),%a6		# restore frame pointer
2278

2279
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280
	mov.l		LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282

2283
# now, we copy the default result to its proper location
2284
	mov.l		LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285
	mov.l		LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286
	mov.l		LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287

2288
	add.l		&LOCAL_SIZE-0x8,%sp
2289

2290

2291
	bra.l		_real_inex
2292

2293
#########################################################################
2294

2295
#
2296
# if we're stuffing a source operand back into an fsave frame then we
2297
# have to make sure that for single or double source operands that the
2298
# format stuffed is as weird as the hardware usually makes it.
2299
#
2300
	global		funimp_skew
2301
funimp_skew:
2302
	bfextu		EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303
	cmpi.b		%d0,&0x1		# was src sgl?
2304
	beq.b		funimp_skew_sgl		# yes
2305
	cmpi.b		%d0,&0x5		# was src dbl?
2306
	beq.b		funimp_skew_dbl		# yes
2307
	rts
2308

2309
funimp_skew_sgl:
2310
	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2311
	andi.w		&0x7fff,%d0		# strip sign
2312
	beq.b		funimp_skew_sgl_not
2313
	cmpi.w		%d0,&0x3f80
2314
	bgt.b		funimp_skew_sgl_not
2315
	neg.w		%d0			# make exponent negative
2316
	addi.w		&0x3f81,%d0		# find amt to shift
2317
	mov.l		FP_SRC_HI(%a6),%d1	# fetch DENORM hi(man)
2318
	lsr.l		%d0,%d1			# shift it
2319
	bset		&31,%d1			# set j-bit
2320
	mov.l		%d1,FP_SRC_HI(%a6)	# insert new hi(man)
2321
	andi.w		&0x8000,FP_SRC_EX(%a6)	# clear old exponent
2322
	ori.w		&0x3f80,FP_SRC_EX(%a6)	# insert new "skewed" exponent
2323
funimp_skew_sgl_not:
2324
	rts
2325

2326
funimp_skew_dbl:
2327
	mov.w		FP_SRC_EX(%a6),%d0	# fetch DENORM exponent
2328
	andi.w		&0x7fff,%d0		# strip sign
2329
	beq.b		funimp_skew_dbl_not
2330
	cmpi.w		%d0,&0x3c00
2331
	bgt.b		funimp_skew_dbl_not
2332

2333
	tst.b		FP_SRC_EX(%a6)		# make "internal format"
2334
	smi.b		0x2+FP_SRC(%a6)
2335
	mov.w		%d0,FP_SRC_EX(%a6)	# insert exponent with cleared sign
2336
	clr.l		%d0			# clear g,r,s
2337
	lea		FP_SRC(%a6),%a0		# pass ptr to src op
2338
	mov.w		&0x3c01,%d1		# pass denorm threshold
2339
	bsr.l		dnrm_lp			# denorm it
2340
	mov.w		&0x3c00,%d0		# new exponent
2341
	tst.b		0x2+FP_SRC(%a6)		# is sign set?
2342
	beq.b		fss_dbl_denorm_done	# no
2343
	bset		&15,%d0			# set sign
2344
fss_dbl_denorm_done:
2345
	bset		&0x7,FP_SRC_HI(%a6)	# set j-bit
2346
	mov.w		%d0,FP_SRC_EX(%a6)	# insert new exponent
2347
funimp_skew_dbl_not:
2348
	rts
2349

2350
#########################################################################
2351
	global		_mem_write2
2352
_mem_write2:
2353
	btst		&0x5,EXC_SR(%a6)
2354
	beq.l		_dmem_write
2355
	mov.l		0x0(%a0),FP_DST_EX(%a6)
2356
	mov.l		0x4(%a0),FP_DST_HI(%a6)
2357
	mov.l		0x8(%a0),FP_DST_LO(%a6)
2358
	clr.l		%d1
2359
	rts
2360

2361
#########################################################################
2362
# XDEF ****************************************************************	#
2363
#	_fpsp_effadd(): 060FPSP entry point for FP "Unimplemented	#
2364
#			effective address" exception.			#
2365
#									#
2366
#	This handler should be the first code executed upon taking the	#
2367
#	FP Unimplemented Effective Address exception in an operating	#
2368
#	system.								#
2369
#									#
2370
# XREF ****************************************************************	#
2371
#	_imem_read_long() - read instruction longword			#
2372
#	fix_skewed_ops() - adjust src operand in fsave frame		#
2373
#	set_tag_x() - determine optype of src/dst operands		#
2374
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
2375
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
2376
#	load_fpn2() - load dst operand from FP regfile			#
2377
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
2378
#	decbin() - convert packed data to FP binary data		#
2379
#	_real_fpu_disabled() - "callout" for "FPU disabled" exception	#
2380
#	_real_access() - "callout" for access error exception		#
2381
#	_mem_read() - read extended immediate operand from memory	#
2382
#	_fpsp_done() - "callout" for exit; work all done		#
2383
#	_real_trace() - "callout" for Trace enabled exception		#
2384
#	fmovm_dynamic() - emulate dynamic fmovm instruction		#
2385
#	fmovm_ctrl() - emulate fmovm control instruction		#
2386
#									#
2387
# INPUT ***************************************************************	#
2388
#	- The system stack contains the "Unimplemented <ea>" stk frame	#
2389
#									#
2390
# OUTPUT **************************************************************	#
2391
#	If access error:						#
2392
#	- The system stack is changed to an access error stack frame	#
2393
#	If FPU disabled:						#
2394
#	- The system stack is changed to an FPU disabled stack frame	#
2395
#	If Trace exception enabled:					#
2396
#	- The system stack is changed to a Trace exception stack frame	#
2397
#	Else: (normal case)						#
2398
#	- None (correct result has been stored as appropriate)		#
2399
#									#
2400
# ALGORITHM ***********************************************************	#
2401
#	This exception handles 3 types of operations:			#
2402
# (1) FP Instructions using extended precision or packed immediate	#
2403
#     addressing mode.							#
2404
# (2) The "fmovm.x" instruction w/ dynamic register specification.	#
2405
# (3) The "fmovm.l" instruction w/ 2 or 3 control registers.		#
2406
#									#
2407
#	For immediate data operations, the data is read in w/ a		#
2408
# _mem_read() "callout", converted to FP binary (if packed), and used	#
2409
# as the source operand to the instruction specified by the instruction	#
2410
# word. If no FP exception should be reported ads a result of the	#
2411
# emulation, then the result is stored to the destination register and	#
2412
# the handler exits through _fpsp_done(). If an enabled exc has been	#
2413
# signalled as a result of emulation, then an fsave state frame		#
2414
# corresponding to the FP exception type must be entered into the 060	#
2415
# FPU before exiting. In either the enabled or disabled cases, we	#
2416
# must also check if a Trace exception is pending, in which case, we	#
2417
# must create a Trace exception stack frame from the current exception	#
2418
# stack frame. If no Trace is pending, we simply exit through		#
2419
# _fpsp_done().								#
2420
#	For "fmovm.x", call the routine fmovm_dynamic() which will	#
2421
# decode and emulate the instruction. No FP exceptions can be pending	#
2422
# as a result of this operation emulation. A Trace exception can be	#
2423
# pending, though, which means the current stack frame must be changed	#
2424
# to a Trace stack frame and an exit made through _real_trace().	#
2425
# For the case of "fmovm.x Dn,-(a7)", where the offending instruction	#
2426
# was executed from supervisor mode, this handler must store the FP	#
2427
# register file values to the system stack by itself since		#
2428
# fmovm_dynamic() can't handle this. A normal exit is made through	#
2429
# fpsp_done().								#
2430
#	For "fmovm.l", fmovm_ctrl() is used to emulate the instruction.	#
2431
# Again, a Trace exception may be pending and an exit made through	#
2432
# _real_trace(). Else, a normal exit is made through _fpsp_done().	#
2433
#									#
2434
#	Before any of the above is attempted, it must be checked to	#
2435
# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken	#
2436
# before the "FPU disabled" exception, but the "FPU disabled" exception	#
2437
# has higher priority, we check the disabled bit in the PCR. If set,	#
2438
# then we must create an 8 word "FPU disabled" exception stack frame	#
2439
# from the current 4 word exception stack frame. This includes		#
2440
# reproducing the effective address of the instruction to put on the	#
2441
# new stack frame.							#
2442
#									#
2443
#	In the process of all emulation work, if a _mem_read()		#
2444
# "callout" returns a failing result indicating an access error, then	#
2445
# we must create an access error stack frame from the current stack	#
2446
# frame. This information includes a faulting address and a fault-	#
2447
# status-longword. These are created within this handler.		#
2448
#									#
2449
#########################################################################
2450

2451
	global		_fpsp_effadd
2452
_fpsp_effadd:
2453

2454
# This exception type takes priority over the "Line F Emulator"
2455
# exception. Therefore, the FPU could be disabled when entering here.
2456
# So, we must check to see if it's disabled and handle that case separately.
2457
	mov.l		%d0,-(%sp)		# save d0
2458
	movc		%pcr,%d0		# load proc cr
2459
	btst		&0x1,%d0		# is FPU disabled?
2460
	bne.w		iea_disabled		# yes
2461
	mov.l		(%sp)+,%d0		# restore d0
2462

2463
	link		%a6,&-LOCAL_SIZE	# init stack frame
2464

2465
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2466
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
2468

2469
# PC of instruction that took the exception is the PC in the frame
2470
	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471

2472
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2473
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2474
	bsr.l		_imem_read_long		# fetch the instruction words
2475
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2476

2477
#########################################################################
2478

2479
	tst.w		%d0			# is operation fmovem?
2480
	bmi.w		iea_fmovm		# yes
2481

2482
#
2483
# here, we will have:
2484
#	fabs	fdabs	fsabs		facos		fmod
2485
#	fadd	fdadd	fsadd		fasin		frem
2486
#	fcmp				fatan		fscale
2487
#	fdiv	fddiv	fsdiv		fatanh		fsin
2488
#	fint				fcos		fsincos
2489
#	fintrz				fcosh		fsinh
2490
#	fmove	fdmove	fsmove		fetox		ftan
2491
#	fmul	fdmul	fsmul		fetoxm1		ftanh
2492
#	fneg	fdneg	fsneg		fgetexp		ftentox
2493
#	fsgldiv				fgetman		ftwotox
2494
#	fsglmul				flog10
2495
#	fsqrt				flog2
2496
#	fsub	fdsub	fssub		flogn
2497
#	ftst				flognp1
2498
# which can all use f<op>.{x,p}
2499
# so, now it's immediate data extended precision AND PACKED FORMAT!
2500
#
2501
iea_op:
2502
	andi.l		&0x00ff00ff,USER_FPSR(%a6)
2503

2504
	btst		&0xa,%d0		# is src fmt x or p?
2505
	bne.b		iea_op_pack		# packed
2506

2507

2508
	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2509
	lea		FP_SRC(%a6),%a1		# pass: ptr to super addr
2510
	mov.l		&0xc,%d0		# pass: 12 bytes
2511
	bsr.l		_imem_read		# read extended immediate
2512

2513
	tst.l		%d1			# did ifetch fail?
2514
	bne.w		iea_iacc		# yes
2515

2516
	bra.b		iea_op_setsrc
2517

2518
iea_op_pack:
2519

2520
	mov.l		EXC_EXTWPTR(%a6),%a0	# pass: ptr to #<data>
2521
	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
2522
	mov.l		&0xc,%d0		# pass: 12 bytes
2523
	bsr.l		_imem_read		# read packed operand
2524

2525
	tst.l		%d1			# did ifetch fail?
2526
	bne.w		iea_iacc		# yes
2527

2528
# The packed operand is an INF or a NAN if the exponent field is all ones.
2529
	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
2530
	cmpi.w		%d0,&0x7fff		# INF or NAN?
2531
	beq.b		iea_op_setsrc		# operand is an INF or NAN
2532

2533
# The packed operand is a zero if the mantissa is all zero, else it's
2534
# a normal packed op.
2535
	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
2536
	andi.b		&0x0f,%d0		# clear all but last nybble
2537
	bne.b		iea_op_gp_not_spec	# not a zero
2538
	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
2539
	bne.b		iea_op_gp_not_spec	# not a zero
2540
	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
2541
	beq.b		iea_op_setsrc		# operand is a ZERO
2542
iea_op_gp_not_spec:
2543
	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
2544
	bsr.l		decbin			# convert to extended
2545
	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
2546

2547
iea_op_setsrc:
2548
	addi.l		&0xc,EXC_EXTWPTR(%a6)	# update extension word pointer
2549

2550
# FP_SRC now holds the src operand.
2551
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
2552
	bsr.l		set_tag_x		# tag the operand type
2553
	mov.b		%d0,STAG(%a6)		# could be ANYTHING!!!
2554
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2555
	bne.b		iea_op_getdst		# no
2556
	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2557
	mov.b		%d0,STAG(%a6)		# set new optype tag
2558
iea_op_getdst:
2559
	clr.b		STORE_FLG(%a6)		# clear "store result" boolean
2560

2561
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
2562
	beq.b		iea_op_extract		# monadic
2563
	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation fsincos,ftst,fcmp?
2564
	bne.b		iea_op_spec		# yes
2565

2566
iea_op_loaddst:
2567
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568
	bsr.l		load_fpn2		# load dst operand
2569

2570
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
2571
	bsr.l		set_tag_x		# tag the operand type
2572
	mov.b		%d0,DTAG(%a6)		# could be ANYTHING!!!
2573
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
2574
	bne.b		iea_op_extract		# no
2575
	bsr.l		unnorm_fix		# yes; convert to NORM/DENORM/ZERO
2576
	mov.b		%d0,DTAG(%a6)		# set new optype tag
2577
	bra.b		iea_op_extract
2578

2579
# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580
iea_op_spec:
2581
	btst		&0x3,1+EXC_CMDREG(%a6)	# is operation fsincos?
2582
	beq.b		iea_op_extract		# yes
2583
# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584
# store a result. then, only fcmp will branch back and pick up a dst operand.
2585
	st		STORE_FLG(%a6)		# don't store a final result
2586
	btst		&0x1,1+EXC_CMDREG(%a6)	# is operation fcmp?
2587
	beq.b		iea_op_loaddst		# yes
2588

2589
iea_op_extract:
2590
	clr.l		%d0
2591
	mov.b		FPCR_MODE(%a6),%d0	# pass: rnd mode,prec
2592

2593
	mov.b		1+EXC_CMDREG(%a6),%d1
2594
	andi.w		&0x007f,%d1		# extract extension
2595

2596
	fmov.l		&0x0,%fpcr
2597
	fmov.l		&0x0,%fpsr
2598

2599
	lea		FP_SRC(%a6),%a0
2600
	lea		FP_DST(%a6),%a1
2601

2602
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
2604

2605
#
2606
# Exceptions in order of precedence:
2607
#	BSUN	: none
2608
#	SNAN	: all operations
2609
#	OPERR	: all reg-reg or mem-reg operations that can normally operr
2610
#	OVFL	: same as OPERR
2611
#	UNFL	: same as OPERR
2612
#	DZ	: same as OPERR
2613
#	INEX2	: same as OPERR
2614
#	INEX1	: all packed immediate operations
2615
#
2616

2617
# we determine the highest priority exception(if any) set by the
2618
# emulation routine that has also been enabled by the user.
2619
	mov.b		FPCR_ENABLE(%a6),%d0	# fetch exceptions enabled
2620
	bne.b		iea_op_ena		# some are enabled
2621

2622
# now, we save the result, unless, of course, the operation was ftst or fcmp.
2623
# these don't save results.
2624
iea_op_save:
2625
	tst.b		STORE_FLG(%a6)		# does this op store a result?
2626
	bne.b		iea_op_exit1		# exit with no frestore
2627

2628
iea_op_store:
2629
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630
	bsr.l		store_fpreg		# store the result
2631

2632
iea_op_exit1:
2633
	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635

2636
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2637
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2639

2640
	unlk		%a6			# unravel the frame
2641

2642
	btst		&0x7,(%sp)		# is trace on?
2643
	bne.w		iea_op_trace		# yes
2644

2645
	bra.l		_fpsp_done		# exit to os
2646

2647
iea_op_ena:
2648
	and.b		FPSR_EXCEPT(%a6),%d0	# keep only ones enable and set
2649
	bfffo		%d0{&24:&8},%d0		# find highest priority exception
2650
	bne.b		iea_op_exc		# at least one was set
2651

2652
# no exception occurred. now, did a disabled, exact overflow occur with inexact
2653
# enabled? if so, then we have to stuff an overflow frame into the FPU.
2654
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655
	beq.b		iea_op_save
2656

2657
iea_op_ovfl:
2658
	btst		&inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659
	beq.b		iea_op_store		# no
2660
	bra.b		iea_op_exc_ovfl		# yes
2661

2662
# an enabled exception occurred. we have to insert the exception type back into
2663
# the machine.
2664
iea_op_exc:
2665
	subi.l		&24,%d0			# fix offset to be 0-8
2666
	cmpi.b		%d0,&0x6		# is exception INEX?
2667
	bne.b		iea_op_exc_force	# no
2668

2669
# the enabled exception was inexact. so, if it occurs with an overflow
2670
# or underflow that was disabled, then we have to force an overflow or
2671
# underflow frame.
2672
	btst		&ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673
	bne.b		iea_op_exc_ovfl		# yes
2674
	btst		&unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675
	bne.b		iea_op_exc_unfl		# yes
2676

2677
iea_op_exc_force:
2678
	mov.w		(tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679
	bra.b		iea_op_exit2		# exit with frestore
2680

2681
tbl_iea_except:
2682
	short		0xe002, 0xe006, 0xe004, 0xe005
2683
	short		0xe003, 0xe002, 0xe001, 0xe001
2684

2685
iea_op_exc_ovfl:
2686
	mov.w		&0xe005,2+FP_SRC(%a6)
2687
	bra.b		iea_op_exit2
2688

2689
iea_op_exc_unfl:
2690
	mov.w		&0xe003,2+FP_SRC(%a6)
2691

2692
iea_op_exit2:
2693
	mov.l		EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695

2696
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2697
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2699

2700
	frestore	FP_SRC(%a6)		# restore exceptional state
2701

2702
	unlk		%a6			# unravel the frame
2703

2704
	btst		&0x7,(%sp)		# is trace on?
2705
	bne.b		iea_op_trace		# yes
2706

2707
	bra.l		_fpsp_done		# exit to os
2708

2709
#
2710
# The opclass two instruction that took an "Unimplemented Effective Address"
2711
# exception was being traced. Make the "current" PC the FPIAR and put it in
2712
# the trace stack frame then jump to _real_trace().
2713
#
2714
#		 UNIMP EA FRAME		   TRACE FRAME
2715
#		*****************	*****************
2716
#		* 0x0 *  0x0f0	*	*    Current	*
2717
#		*****************	*      PC	*
2718
#		*    Current	*	*****************
2719
#		*      PC	*	* 0x2 *  0x024	*
2720
#		*****************	*****************
2721
#		*      SR	*	*     Next	*
2722
#		*****************	*      PC	*
2723
#					*****************
2724
#					*      SR	*
2725
#					*****************
2726
iea_op_trace:
2727
	mov.l		(%sp),-(%sp)		# shift stack frame "down"
2728
	mov.w		0x8(%sp),0x4(%sp)
2729
	mov.w		&0x2024,0x6(%sp)	# stk fmt = 0x2; voff = 0x024
2730
	fmov.l		%fpiar,0x8(%sp)		# "Current PC" is in FPIAR
2731

2732
	bra.l		_real_trace
2733

2734
#########################################################################
2735
iea_fmovm:
2736
	btst		&14,%d0			# ctrl or data reg
2737
	beq.w		iea_fmovm_ctrl
2738

2739
iea_fmovm_data:
2740

2741
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode
2742
	bne.b		iea_fmovm_data_s
2743

2744
iea_fmovm_data_u:
2745
	mov.l		%usp,%a0
2746
	mov.l		%a0,EXC_A7(%a6)		# store current a7
2747
	bsr.l		fmovm_dynamic		# do dynamic fmovm
2748
	mov.l		EXC_A7(%a6),%a0		# load possibly new a7
2749
	mov.l		%a0,%usp		# update usp
2750
	bra.w		iea_fmovm_exit
2751

2752
iea_fmovm_data_s:
2753
	clr.b		SPCOND_FLG(%a6)
2754
	lea		0x2+EXC_VOFF(%a6),%a0
2755
	mov.l		%a0,EXC_A7(%a6)
2756
	bsr.l		fmovm_dynamic		# do dynamic fmovm
2757

2758
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
2759
	beq.w		iea_fmovm_data_predec
2760
	cmpi.b		SPCOND_FLG(%a6),&mia7_flg
2761
	bne.w		iea_fmovm_exit
2762

2763
# right now, d0 = the size.
2764
# the data has been fetched from the supervisor stack, but we have not
2765
# incremented the stack pointer by the appropriate number of bytes.
2766
# do it here.
2767
iea_fmovm_data_postinc:
2768
	btst		&0x7,EXC_SR(%a6)
2769
	bne.b		iea_fmovm_data_pi_trace
2770

2771
	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772
	mov.l		EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773
	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2774

2775
	lea		(EXC_SR,%a6,%d0),%a0
2776
	mov.l		%a0,EXC_SR(%a6)
2777

2778
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2779
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2781

2782
	unlk		%a6
2783
	mov.l		(%sp)+,%sp
2784
	bra.l		_fpsp_done
2785

2786
iea_fmovm_data_pi_trace:
2787
	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788
	mov.l		EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789
	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790
	mov.l		EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791

2792
	lea		(EXC_SR-0x4,%a6,%d0),%a0
2793
	mov.l		%a0,EXC_SR(%a6)
2794

2795
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2796
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2798

2799
	unlk		%a6
2800
	mov.l		(%sp)+,%sp
2801
	bra.l		_real_trace
2802

2803
# right now, d1 = size and d0 = the strg.
2804
iea_fmovm_data_predec:
2805
	mov.b		%d1,EXC_VOFF(%a6)	# store strg
2806
	mov.b		%d0,0x1+EXC_VOFF(%a6)	# store size
2807

2808
	fmovm.x		EXC_FP0(%a6),&0xc0	# restore fp0-fp1
2809
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2811

2812
	mov.l		(%a6),-(%sp)		# make a copy of a6
2813
	mov.l		%d0,-(%sp)		# save d0
2814
	mov.l		%d1,-(%sp)		# save d1
2815
	mov.l		EXC_EXTWPTR(%a6),-(%sp)	# make a copy of Next PC
2816

2817
	clr.l		%d0
2818
	mov.b		0x1+EXC_VOFF(%a6),%d0	# fetch size
2819
	neg.l		%d0			# get negative of size
2820

2821
	btst		&0x7,EXC_SR(%a6)	# is trace enabled?
2822
	beq.b		iea_fmovm_data_p2
2823

2824
	mov.w		EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825
	mov.l		EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826
	mov.l		(%sp)+,(EXC_PC-0x4,%a6,%d0)
2827
	mov.w		&0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828

2829
	pea		(%a6,%d0)		# create final sp
2830
	bra.b		iea_fmovm_data_p3
2831

2832
iea_fmovm_data_p2:
2833
	mov.w		EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834
	mov.l		(%sp)+,(EXC_PC,%a6,%d0)
2835
	mov.w		&0x00f0,(EXC_VOFF,%a6,%d0)
2836

2837
	pea		(0x4,%a6,%d0)		# create final sp
2838

2839
iea_fmovm_data_p3:
2840
	clr.l		%d1
2841
	mov.b		EXC_VOFF(%a6),%d1	# fetch strg
2842

2843
	tst.b		%d1
2844
	bpl.b		fm_1
2845
	fmovm.x		&0x80,(0x4+0x8,%a6,%d0)
2846
	addi.l		&0xc,%d0
2847
fm_1:
2848
	lsl.b		&0x1,%d1
2849
	bpl.b		fm_2
2850
	fmovm.x		&0x40,(0x4+0x8,%a6,%d0)
2851
	addi.l		&0xc,%d0
2852
fm_2:
2853
	lsl.b		&0x1,%d1
2854
	bpl.b		fm_3
2855
	fmovm.x		&0x20,(0x4+0x8,%a6,%d0)
2856
	addi.l		&0xc,%d0
2857
fm_3:
2858
	lsl.b		&0x1,%d1
2859
	bpl.b		fm_4
2860
	fmovm.x		&0x10,(0x4+0x8,%a6,%d0)
2861
	addi.l		&0xc,%d0
2862
fm_4:
2863
	lsl.b		&0x1,%d1
2864
	bpl.b		fm_5
2865
	fmovm.x		&0x08,(0x4+0x8,%a6,%d0)
2866
	addi.l		&0xc,%d0
2867
fm_5:
2868
	lsl.b		&0x1,%d1
2869
	bpl.b		fm_6
2870
	fmovm.x		&0x04,(0x4+0x8,%a6,%d0)
2871
	addi.l		&0xc,%d0
2872
fm_6:
2873
	lsl.b		&0x1,%d1
2874
	bpl.b		fm_7
2875
	fmovm.x		&0x02,(0x4+0x8,%a6,%d0)
2876
	addi.l		&0xc,%d0
2877
fm_7:
2878
	lsl.b		&0x1,%d1
2879
	bpl.b		fm_end
2880
	fmovm.x		&0x01,(0x4+0x8,%a6,%d0)
2881
fm_end:
2882
	mov.l		0x4(%sp),%d1
2883
	mov.l		0x8(%sp),%d0
2884
	mov.l		0xc(%sp),%a6
2885
	mov.l		(%sp)+,%sp
2886

2887
	btst		&0x7,(%sp)		# is trace enabled?
2888
	beq.l		_fpsp_done
2889
	bra.l		_real_trace
2890

2891
#########################################################################
2892
iea_fmovm_ctrl:
2893

2894
	bsr.l		fmovm_ctrl		# load ctrl regs
2895

2896
iea_fmovm_exit:
2897
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
2898
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2900

2901
	btst		&0x7,EXC_SR(%a6)	# is trace on?
2902
	bne.b		iea_fmovm_trace		# yes
2903

2904
	mov.l		EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905

2906
	unlk		%a6			# unravel the frame
2907

2908
	bra.l		_fpsp_done		# exit to os
2909

2910
#
2911
# The control reg instruction that took an "Unimplemented Effective Address"
2912
# exception was being traced. The "Current PC" for the trace frame is the
2913
# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914
# After fixing the stack frame, jump to _real_trace().
2915
#
2916
#		 UNIMP EA FRAME		   TRACE FRAME
2917
#		*****************	*****************
2918
#		* 0x0 *  0x0f0	*	*    Current	*
2919
#		*****************	*      PC	*
2920
#		*    Current	*	*****************
2921
#		*      PC	*	* 0x2 *  0x024	*
2922
#		*****************	*****************
2923
#		*      SR	*	*     Next	*
2924
#		*****************	*      PC	*
2925
#					*****************
2926
#					*      SR	*
2927
#					*****************
2928
# this ain't a pretty solution, but it works:
2929
# -restore a6 (not with unlk)
2930
# -shift stack frame down over where old a6 used to be
2931
# -add LOCAL_SIZE to stack pointer
2932
iea_fmovm_trace:
2933
	mov.l		(%a6),%a6		# restore frame pointer
2934
	mov.w		EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935
	mov.l		EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936
	mov.l		EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937
	mov.w		&0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938
	add.l		&LOCAL_SIZE,%sp		# clear stack frame
2939

2940
	bra.l		_real_trace
2941

2942
#########################################################################
2943
# The FPU is disabled and so we should really have taken the "Line
2944
# F Emulator" exception. So, here we create an 8-word stack frame
2945
# from our 4-word stack frame. This means we must calculate the length
2946
# the faulting instruction to get the "next PC". This is trivial for
2947
# immediate operands but requires some extra work for fmovm dynamic
2948
# which can use most addressing modes.
2949
iea_disabled:
2950
	mov.l		(%sp)+,%d0		# restore d0
2951

2952
	link		%a6,&-LOCAL_SIZE	# init stack frame
2953

2954
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
2955

2956
# PC of instruction that took the exception is the PC in the frame
2957
	mov.l		EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
2959
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
2960
	bsr.l		_imem_read_long		# fetch the instruction words
2961
	mov.l		%d0,EXC_OPWORD(%a6)	# store OPWORD and EXTWORD
2962

2963
	tst.w		%d0			# is instr fmovm?
2964
	bmi.b		iea_dis_fmovm		# yes
2965
# instruction is using an extended precision immediate operand. Therefore,
2966
# the total instruction length is 16 bytes.
2967
iea_dis_immed:
2968
	mov.l		&0x10,%d0		# 16 bytes of instruction
2969
	bra.b		iea_dis_cont
2970
iea_dis_fmovm:
2971
	btst		&0xe,%d0		# is instr fmovm ctrl
2972
	bne.b		iea_dis_fmovm_data	# no
2973
# the instruction is a fmovm.l with 2 or 3 registers.
2974
	bfextu		%d0{&19:&3},%d1
2975
	mov.l		&0xc,%d0
2976
	cmpi.b		%d1,&0x7		# move all regs?
2977
	bne.b		iea_dis_cont
2978
	addq.l		&0x4,%d0
2979
	bra.b		iea_dis_cont
2980
# the instruction is an fmovm.x dynamic which can use many addressing
2981
# modes and thus can have several different total instruction lengths.
2982
# call fmovm_calc_ea which will go through the ea calc process and,
2983
# as a by-product, will tell us how long the instruction is.
2984
iea_dis_fmovm_data:
2985
	clr.l		%d0
2986
	bsr.l		fmovm_calc_ea
2987
	mov.l		EXC_EXTWPTR(%a6),%d0
2988
	sub.l		EXC_PC(%a6),%d0
2989
iea_dis_cont:
2990
	mov.w		%d0,EXC_VOFF(%a6)	# store stack shift value
2991

2992
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
2993

2994
	unlk		%a6
2995

2996
# here, we actually create the 8-word frame from the 4-word frame,
2997
# with the "next PC" as additional info.
2998
# the <ea> field is let as undefined.
2999
	subq.l		&0x8,%sp		# make room for new stack
3000
	mov.l		%d0,-(%sp)		# save d0
3001
	mov.w		0xc(%sp),0x4(%sp)	# move SR
3002
	mov.l		0xe(%sp),0x6(%sp)	# move Current PC
3003
	clr.l		%d0
3004
	mov.w		0x12(%sp),%d0
3005
	mov.l		0x6(%sp),0x10(%sp)	# move Current PC
3006
	add.l		%d0,0x6(%sp)		# make Next PC
3007
	mov.w		&0x402c,0xa(%sp)	# insert offset,frame format
3008
	mov.l		(%sp)+,%d0		# restore d0
3009

3010
	bra.l		_real_fpu_disabled
3011

3012
##########
3013

3014
iea_iacc:
3015
	movc		%pcr,%d0
3016
	btst		&0x1,%d0
3017
	bne.b		iea_iacc_cont
3018
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3020
iea_iacc_cont:
3021
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3022

3023
	unlk		%a6
3024

3025
	subq.w		&0x8,%sp		# make stack frame bigger
3026
	mov.l		0x8(%sp),(%sp)		# store SR,hi(PC)
3027
	mov.w		0xc(%sp),0x4(%sp)	# store lo(PC)
3028
	mov.w		&0x4008,0x6(%sp)	# store voff
3029
	mov.l		0x2(%sp),0x8(%sp)	# store ea
3030
	mov.l		&0x09428001,0xc(%sp)	# store fslw
3031

3032
iea_acc_done:
3033
	btst		&0x5,(%sp)		# user or supervisor mode?
3034
	beq.b		iea_acc_done2		# user
3035
	bset		&0x2,0xd(%sp)		# set supervisor TM bit
3036

3037
iea_acc_done2:
3038
	bra.l		_real_access
3039

3040
iea_dacc:
3041
	lea		-LOCAL_SIZE(%a6),%sp
3042

3043
	movc		%pcr,%d1
3044
	btst		&0x1,%d1
3045
	bne.b		iea_dacc_cont
3046
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1 on stack
3047
	fmovm.l		LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048
iea_dacc_cont:
3049
	mov.l		(%a6),%a6
3050

3051
	mov.l		0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052
	mov.w		0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053
	mov.w		&0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054
	mov.l		%a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055
	mov.w		%d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056
	mov.w		&0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057

3058
	movm.l		LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059
	add.w		&LOCAL_SIZE-0x4,%sp
3060

3061
	bra.b		iea_acc_done
3062

3063
#########################################################################
3064
# XDEF ****************************************************************	#
3065
#	_fpsp_operr(): 060FPSP entry point for FP Operr exception.	#
3066
#									#
3067
#	This handler should be the first code executed upon taking the	#
3068
#	FP Operand Error exception in an operating system.		#
3069
#									#
3070
# XREF ****************************************************************	#
3071
#	_imem_read_long() - read instruction longword			#
3072
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3073
#	_real_operr() - "callout" to operating system operr handler	#
3074
#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3075
#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3076
#	facc_out_{b,w,l}() - store to memory took access error (opcl 3)	#
3077
#									#
3078
# INPUT ***************************************************************	#
3079
#	- The system stack contains the FP Operr exception frame	#
3080
#	- The fsave frame contains the source operand			#
3081
#									#
3082
# OUTPUT **************************************************************	#
3083
#	No access error:						#
3084
#	- The system stack is unchanged					#
3085
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3086
#									#
3087
# ALGORITHM ***********************************************************	#
3088
#	In a system where the FP Operr exception is enabled, the goal	#
3089
# is to get to the handler specified at _real_operr(). But, on the 060,	#
3090
# for opclass zero and two instruction taking this exception, the	#
3091
# input operand in the fsave frame may be incorrect for some cases	#
3092
# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3093
# do just this and then exits through _real_operr().			#
3094
#	For opclass 3 instructions, the 060 doesn't store the default	#
3095
# operr result out to memory or data register file as it should.	#
3096
# This code must emulate the move out before finally exiting through	#
3097
# _real_inex(). The move out, if to memory, is performed using		#
3098
# _mem_write() "callout" routines that may return a failing result.	#
3099
# In this special case, the handler must exit through facc_out()	#
3100
# which creates an access error stack frame from the current operr	#
3101
# stack frame.								#
3102
#									#
3103
#########################################################################
3104

3105
	global		_fpsp_operr
3106
_fpsp_operr:
3107

3108
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3109

3110
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3111

3112
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3113
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3115

3116
# the FPIAR holds the "current PC" of the faulting instruction
3117
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118

3119
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3120
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3121
	bsr.l		_imem_read_long		# fetch the instruction words
3122
	mov.l		%d0,EXC_OPWORD(%a6)
3123

3124
##############################################################################
3125

3126
	btst		&13,%d0			# is instr an fmove out?
3127
	bne.b		foperr_out		# fmove out
3128

3129

3130
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131
# this would be the case for opclass two operations with a source infinity or
3132
# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133
# cause an operr so we don't need to check for them here.
3134
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3135
	bsr.l		fix_skewed_ops		# fix src op
3136

3137
foperr_exit:
3138
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3139
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3141

3142
	frestore	FP_SRC(%a6)
3143

3144
	unlk		%a6
3145
	bra.l		_real_operr
3146

3147
########################################################################
3148

3149
#
3150
# the hardware does not save the default result to memory on enabled
3151
# operand error exceptions. we do this here before passing control to
3152
# the user operand error handler.
3153
#
3154
# byte, word, and long destination format operations can pass
3155
# through here. we simply need to test the sign of the src
3156
# operand and save the appropriate minimum or maximum integer value
3157
# to the effective address as pointed to by the stacked effective address.
3158
#
3159
# although packed opclass three operations can take operand error
3160
# exceptions, they won't pass through here since they are caught
3161
# first by the unsupported data format exception handler. that handler
3162
# sends them directly to _real_operr() if necessary.
3163
#
3164
foperr_out:
3165

3166
	mov.w		FP_SRC_EX(%a6),%d1	# fetch exponent
3167
	andi.w		&0x7fff,%d1
3168
	cmpi.w		%d1,&0x7fff
3169
	bne.b		foperr_out_not_qnan
3170
# the operand is either an infinity or a QNAN.
3171
	tst.l		FP_SRC_LO(%a6)
3172
	bne.b		foperr_out_qnan
3173
	mov.l		FP_SRC_HI(%a6),%d1
3174
	andi.l		&0x7fffffff,%d1
3175
	beq.b		foperr_out_not_qnan
3176
foperr_out_qnan:
3177
	mov.l		FP_SRC_HI(%a6),L_SCR1(%a6)
3178
	bra.b		foperr_out_jmp
3179

3180
foperr_out_not_qnan:
3181
	mov.l		&0x7fffffff,%d1
3182
	tst.b		FP_SRC_EX(%a6)
3183
	bpl.b		foperr_out_not_qnan2
3184
	addq.l		&0x1,%d1
3185
foperr_out_not_qnan2:
3186
	mov.l		%d1,L_SCR1(%a6)
3187

3188
foperr_out_jmp:
3189
	bfextu		%d0{&19:&3},%d0		# extract dst format field
3190
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3191
	mov.w		(tbl_operr.b,%pc,%d0.w*2),%a0
3192
	jmp		(tbl_operr.b,%pc,%a0)
3193

3194
tbl_operr:
3195
	short		foperr_out_l - tbl_operr # long word integer
3196
	short		tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197
	short		tbl_operr    - tbl_operr # ext prec shouldn't happen
3198
	short		foperr_exit  - tbl_operr # packed won't enter here
3199
	short		foperr_out_w - tbl_operr # word integer
3200
	short		tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201
	short		foperr_out_b - tbl_operr # byte integer
3202
	short		tbl_operr    - tbl_operr # packed won't enter here
3203

3204
foperr_out_b:
3205
	mov.b		L_SCR1(%a6),%d0		# load positive default result
3206
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3207
	ble.b		foperr_out_b_save_dn	# yes
3208
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3209
	bsr.l		_dmem_write_byte	# write the default result
3210

3211
	tst.l		%d1			# did dstore fail?
3212
	bne.l		facc_out_b		# yes
3213

3214
	bra.w		foperr_exit
3215
foperr_out_b_save_dn:
3216
	andi.w		&0x0007,%d1
3217
	bsr.l		store_dreg_b		# store result to regfile
3218
	bra.w		foperr_exit
3219

3220
foperr_out_w:
3221
	mov.w		L_SCR1(%a6),%d0		# load positive default result
3222
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3223
	ble.b		foperr_out_w_save_dn	# yes
3224
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3225
	bsr.l		_dmem_write_word	# write the default result
3226

3227
	tst.l		%d1			# did dstore fail?
3228
	bne.l		facc_out_w		# yes
3229

3230
	bra.w		foperr_exit
3231
foperr_out_w_save_dn:
3232
	andi.w		&0x0007,%d1
3233
	bsr.l		store_dreg_w		# store result to regfile
3234
	bra.w		foperr_exit
3235

3236
foperr_out_l:
3237
	mov.l		L_SCR1(%a6),%d0		# load positive default result
3238
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3239
	ble.b		foperr_out_l_save_dn	# yes
3240
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3241
	bsr.l		_dmem_write_long	# write the default result
3242

3243
	tst.l		%d1			# did dstore fail?
3244
	bne.l		facc_out_l		# yes
3245

3246
	bra.w		foperr_exit
3247
foperr_out_l_save_dn:
3248
	andi.w		&0x0007,%d1
3249
	bsr.l		store_dreg_l		# store result to regfile
3250
	bra.w		foperr_exit
3251

3252
#########################################################################
3253
# XDEF ****************************************************************	#
3254
#	_fpsp_snan(): 060FPSP entry point for FP SNAN exception.	#
3255
#									#
3256
#	This handler should be the first code executed upon taking the	#
3257
#	FP Signalling NAN exception in an operating system.		#
3258
#									#
3259
# XREF ****************************************************************	#
3260
#	_imem_read_long() - read instruction longword			#
3261
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3262
#	_real_snan() - "callout" to operating system SNAN handler	#
3263
#	_dmem_write_{byte,word,long}() - store data to mem (opclass 3)	#
3264
#	store_dreg_{b,w,l}() - store data to data regfile (opclass 3)	#
3265
#	facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)	#
3266
#	_calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>	#
3267
#									#
3268
# INPUT ***************************************************************	#
3269
#	- The system stack contains the FP SNAN exception frame		#
3270
#	- The fsave frame contains the source operand			#
3271
#									#
3272
# OUTPUT **************************************************************	#
3273
#	No access error:						#
3274
#	- The system stack is unchanged					#
3275
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3276
#									#
3277
# ALGORITHM ***********************************************************	#
3278
#	In a system where the FP SNAN exception is enabled, the goal	#
3279
# is to get to the handler specified at _real_snan(). But, on the 060,	#
3280
# for opclass zero and two instructions taking this exception, the	#
3281
# input operand in the fsave frame may be incorrect for some cases	#
3282
# and needs to be corrected. This handler calls fix_skewed_ops() to	#
3283
# do just this and then exits through _real_snan().			#
3284
#	For opclass 3 instructions, the 060 doesn't store the default	#
3285
# SNAN result out to memory or data register file as it should.		#
3286
# This code must emulate the move out before finally exiting through	#
3287
# _real_snan(). The move out, if to memory, is performed using		#
3288
# _mem_write() "callout" routines that may return a failing result.	#
3289
# In this special case, the handler must exit through facc_out()	#
3290
# which creates an access error stack frame from the current SNAN	#
3291
# stack frame.								#
3292
#	For the case of an extended precision opclass 3 instruction,	#
3293
# if the effective addressing mode was -() or ()+, then the address	#
3294
# register must get updated by calling _calc_ea_fout(). If the <ea>	#
3295
# was -(a7) from supervisor mode, then the exception frame currently	#
3296
# on the system stack must be carefully moved "down" to make room	#
3297
# for the operand being moved.						#
3298
#									#
3299
#########################################################################
3300

3301
	global		_fpsp_snan
3302
_fpsp_snan:
3303

3304
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3305

3306
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3307

3308
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3309
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3311

3312
# the FPIAR holds the "current PC" of the faulting instruction
3313
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314

3315
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3316
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3317
	bsr.l		_imem_read_long		# fetch the instruction words
3318
	mov.l		%d0,EXC_OPWORD(%a6)
3319

3320
##############################################################################
3321

3322
	btst		&13,%d0			# is instr an fmove out?
3323
	bne.w		fsnan_out		# fmove out
3324

3325

3326
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327
# this would be the case for opclass two operations with a source infinity or
3328
# denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329
# fixed here.
3330
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3331
	bsr.l		fix_skewed_ops		# fix src op
3332

3333
fsnan_exit:
3334
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3335
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3337

3338
	frestore	FP_SRC(%a6)
3339

3340
	unlk		%a6
3341
	bra.l		_real_snan
3342

3343
########################################################################
3344

3345
#
3346
# the hardware does not save the default result to memory on enabled
3347
# snan exceptions. we do this here before passing control to
3348
# the user snan handler.
3349
#
3350
# byte, word, long, and packed destination format operations can pass
3351
# through here. since packed format operations already were handled by
3352
# fpsp_unsupp(), then we need to do nothing else for them here.
3353
# for byte, word, and long, we simply need to test the sign of the src
3354
# operand and save the appropriate minimum or maximum integer value
3355
# to the effective address as pointed to by the stacked effective address.
3356
#
3357
fsnan_out:
3358

3359
	bfextu		%d0{&19:&3},%d0		# extract dst format field
3360
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract <ea> mode,reg
3361
	mov.w		(tbl_snan.b,%pc,%d0.w*2),%a0
3362
	jmp		(tbl_snan.b,%pc,%a0)
3363

3364
tbl_snan:
3365
	short		fsnan_out_l - tbl_snan # long word integer
3366
	short		fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367
	short		fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368
	short		tbl_snan    - tbl_snan # packed needs no help
3369
	short		fsnan_out_w - tbl_snan # word integer
3370
	short		fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371
	short		fsnan_out_b - tbl_snan # byte integer
3372
	short		tbl_snan    - tbl_snan # packed needs no help
3373

3374
fsnan_out_b:
3375
	mov.b		FP_SRC_HI(%a6),%d0	# load upper byte of SNAN
3376
	bset		&6,%d0			# set SNAN bit
3377
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3378
	ble.b		fsnan_out_b_dn		# yes
3379
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3380
	bsr.l		_dmem_write_byte	# write the default result
3381

3382
	tst.l		%d1			# did dstore fail?
3383
	bne.l		facc_out_b		# yes
3384

3385
	bra.w		fsnan_exit
3386
fsnan_out_b_dn:
3387
	andi.w		&0x0007,%d1
3388
	bsr.l		store_dreg_b		# store result to regfile
3389
	bra.w		fsnan_exit
3390

3391
fsnan_out_w:
3392
	mov.w		FP_SRC_HI(%a6),%d0	# load upper word of SNAN
3393
	bset		&14,%d0			# set SNAN bit
3394
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3395
	ble.b		fsnan_out_w_dn		# yes
3396
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3397
	bsr.l		_dmem_write_word	# write the default result
3398

3399
	tst.l		%d1			# did dstore fail?
3400
	bne.l		facc_out_w		# yes
3401

3402
	bra.w		fsnan_exit
3403
fsnan_out_w_dn:
3404
	andi.w		&0x0007,%d1
3405
	bsr.l		store_dreg_w		# store result to regfile
3406
	bra.w		fsnan_exit
3407

3408
fsnan_out_l:
3409
	mov.l		FP_SRC_HI(%a6),%d0	# load upper longword of SNAN
3410
	bset		&30,%d0			# set SNAN bit
3411
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3412
	ble.b		fsnan_out_l_dn		# yes
3413
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3414
	bsr.l		_dmem_write_long	# write the default result
3415

3416
	tst.l		%d1			# did dstore fail?
3417
	bne.l		facc_out_l		# yes
3418

3419
	bra.w		fsnan_exit
3420
fsnan_out_l_dn:
3421
	andi.w		&0x0007,%d1
3422
	bsr.l		store_dreg_l		# store result to regfile
3423
	bra.w		fsnan_exit
3424

3425
fsnan_out_s:
3426
	cmpi.b		%d1,&0x7		# is <ea> mode a data reg?
3427
	ble.b		fsnan_out_d_dn		# yes
3428
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3429
	andi.l		&0x80000000,%d0		# keep sign
3430
	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3431
	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3432
	lsr.l		&0x8,%d1		# shift mantissa for sgl
3433
	or.l		%d1,%d0			# create sgl SNAN
3434
	mov.l		EXC_EA(%a6),%a0		# pass: <ea> of default result
3435
	bsr.l		_dmem_write_long	# write the default result
3436

3437
	tst.l		%d1			# did dstore fail?
3438
	bne.l		facc_out_l		# yes
3439

3440
	bra.w		fsnan_exit
3441
fsnan_out_d_dn:
3442
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3443
	andi.l		&0x80000000,%d0		# keep sign
3444
	ori.l		&0x7fc00000,%d0		# insert new exponent,SNAN bit
3445
	mov.l		%d1,-(%sp)
3446
	mov.l		FP_SRC_HI(%a6),%d1	# load mantissa
3447
	lsr.l		&0x8,%d1		# shift mantissa for sgl
3448
	or.l		%d1,%d0			# create sgl SNAN
3449
	mov.l		(%sp)+,%d1
3450
	andi.w		&0x0007,%d1
3451
	bsr.l		store_dreg_l		# store result to regfile
3452
	bra.w		fsnan_exit
3453

3454
fsnan_out_d:
3455
	mov.l		FP_SRC_EX(%a6),%d0	# fetch SNAN sign
3456
	andi.l		&0x80000000,%d0		# keep sign
3457
	ori.l		&0x7ff80000,%d0		# insert new exponent,SNAN bit
3458
	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3459
	mov.l		%d0,FP_SCR0_EX(%a6)	# store to temp space
3460
	mov.l		&11,%d0			# load shift amt
3461
	lsr.l		%d0,%d1
3462
	or.l		%d1,FP_SCR0_EX(%a6)	# create dbl hi
3463
	mov.l		FP_SRC_HI(%a6),%d1	# load hi mantissa
3464
	andi.l		&0x000007ff,%d1
3465
	ror.l		%d0,%d1
3466
	mov.l		%d1,FP_SCR0_HI(%a6)	# store to temp space
3467
	mov.l		FP_SRC_LO(%a6),%d1	# load lo mantissa
3468
	lsr.l		%d0,%d1
3469
	or.l		%d1,FP_SCR0_HI(%a6)	# create dbl lo
3470
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3471
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
3472
	movq.l		&0x8,%d0		# pass: size of 8 bytes
3473
	bsr.l		_dmem_write		# write the default result
3474

3475
	tst.l		%d1			# did dstore fail?
3476
	bne.l		facc_out_d		# yes
3477

3478
	bra.w		fsnan_exit
3479

3480
# for extended precision, if the addressing mode is pre-decrement or
3481
# post-increment, then the address register did not get updated.
3482
# in addition, for pre-decrement, the stacked <ea> is incorrect.
3483
fsnan_out_x:
3484
	clr.b		SPCOND_FLG(%a6)		# clear special case flag
3485

3486
	mov.w		FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487
	clr.w		2+FP_SCR0(%a6)
3488
	mov.l		FP_SRC_HI(%a6),%d0
3489
	bset		&30,%d0
3490
	mov.l		%d0,FP_SCR0_HI(%a6)
3491
	mov.l		FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492

3493
	btst		&0x5,EXC_SR(%a6)	# supervisor mode exception?
3494
	bne.b		fsnan_out_x_s		# yes
3495

3496
	mov.l		%usp,%a0		# fetch user stack pointer
3497
	mov.l		%a0,EXC_A7(%a6)		# save on stack for calc_ea()
3498
	mov.l		(%a6),EXC_A6(%a6)
3499

3500
	bsr.l		_calc_ea_fout		# find the correct ea,update An
3501
	mov.l		%a0,%a1
3502
	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3503

3504
	mov.l		EXC_A7(%a6),%a0
3505
	mov.l		%a0,%usp		# restore user stack pointer
3506
	mov.l		EXC_A6(%a6),(%a6)
3507

3508
fsnan_out_x_save:
3509
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
3510
	movq.l		&0xc,%d0		# pass: size of extended
3511
	bsr.l		_dmem_write		# write the default result
3512

3513
	tst.l		%d1			# did dstore fail?
3514
	bne.l		facc_out_x		# yes
3515

3516
	bra.w		fsnan_exit
3517

3518
fsnan_out_x_s:
3519
	mov.l		(%a6),EXC_A6(%a6)
3520

3521
	bsr.l		_calc_ea_fout		# find the correct ea,update An
3522
	mov.l		%a0,%a1
3523
	mov.l		%a0,EXC_EA(%a6)		# stack correct <ea>
3524

3525
	mov.l		EXC_A6(%a6),(%a6)
3526

3527
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528
	bne.b		fsnan_out_x_save	# no
3529

3530
# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3532
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3534

3535
	frestore	FP_SRC(%a6)
3536

3537
	mov.l		EXC_A6(%a6),%a6		# restore frame pointer
3538

3539
	mov.l		LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540
	mov.l		LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541
	mov.l		LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542

3543
	mov.l		LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544
	mov.l		LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545
	mov.l		LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546

3547
	add.l		&LOCAL_SIZE-0x8,%sp
3548

3549
	bra.l		_real_snan
3550

3551
#########################################################################
3552
# XDEF ****************************************************************	#
3553
#	_fpsp_inex(): 060FPSP entry point for FP Inexact exception.	#
3554
#									#
3555
#	This handler should be the first code executed upon taking the	#
3556
#	FP Inexact exception in an operating system.			#
3557
#									#
3558
# XREF ****************************************************************	#
3559
#	_imem_read_long() - read instruction longword			#
3560
#	fix_skewed_ops() - adjust src operand in fsave frame		#
3561
#	set_tag_x() - determine optype of src/dst operands		#
3562
#	store_fpreg() - store opclass 0 or 2 result to FP regfile	#
3563
#	unnorm_fix() - change UNNORM operands to NORM or ZERO		#
3564
#	load_fpn2() - load dst operand from FP regfile			#
3565
#	smovcr() - emulate an "fmovcr" instruction			#
3566
#	fout() - emulate an opclass 3 instruction			#
3567
#	tbl_unsupp - add of table of emulation routines for opclass 0,2	#
3568
#	_real_inex() - "callout" to operating system inexact handler	#
3569
#									#
3570
# INPUT ***************************************************************	#
3571
#	- The system stack contains the FP Inexact exception frame	#
3572
#	- The fsave frame contains the source operand			#
3573
#									#
3574
# OUTPUT **************************************************************	#
3575
#	- The system stack is unchanged					#
3576
#	- The fsave frame contains the adjusted src op for opclass 0,2	#
3577
#									#
3578
# ALGORITHM ***********************************************************	#
3579
#	In a system where the FP Inexact exception is enabled, the goal	#
3580
# is to get to the handler specified at _real_inex(). But, on the 060,	#
3581
# for opclass zero and two instruction taking this exception, the	#
3582
# hardware doesn't store the correct result to the destination FP	#
3583
# register as did the '040 and '881/2. This handler must emulate the	#
3584
# instruction in order to get this value and then store it to the	#
3585
# correct register before calling _real_inex().				#
3586
#	For opclass 3 instructions, the 060 doesn't store the default	#
3587
# inexact result out to memory or data register file as it should.	#
3588
# This code must emulate the move out by calling fout() before finally	#
3589
# exiting through _real_inex().						#
3590
#									#
3591
#########################################################################
3592

3593
	global		_fpsp_inex
3594
_fpsp_inex:
3595

3596
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3597

3598
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3599

3600
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3601
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3603

3604
# the FPIAR holds the "current PC" of the faulting instruction
3605
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606

3607
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3608
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3609
	bsr.l		_imem_read_long		# fetch the instruction words
3610
	mov.l		%d0,EXC_OPWORD(%a6)
3611

3612
##############################################################################
3613

3614
	btst		&13,%d0			# is instr an fmove out?
3615
	bne.w		finex_out		# fmove out
3616

3617

3618
# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619
# longword integer directly into the upper longword of the mantissa along
3620
# w/ an exponent value of 0x401e. we convert this to extended precision here.
3621
	bfextu		%d0{&19:&3},%d0		# fetch instr size
3622
	bne.b		finex_cont		# instr size is not long
3623
	cmpi.w		FP_SRC_EX(%a6),&0x401e	# is exponent 0x401e?
3624
	bne.b		finex_cont		# no
3625
	fmov.l		&0x0,%fpcr
3626
	fmov.l		FP_SRC_HI(%a6),%fp0	# load integer src
3627
	fmov.x		%fp0,FP_SRC(%a6)	# store integer as extended precision
3628
	mov.w		&0xe001,0x2+FP_SRC(%a6)
3629

3630
finex_cont:
3631
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3632
	bsr.l		fix_skewed_ops		# fix src op
3633

3634
# Here, we zero the ccode and exception byte field since we're going to
3635
# emulate the whole instruction. Notice, though, that we don't kill the
3636
# INEX1 bit. This is because a packed op has long since been converted
3637
# to extended before arriving here. Therefore, we need to retain the
3638
# INEX1 bit from when the operand was first converted.
3639
	andi.l		&0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640

3641
	fmov.l		&0x0,%fpcr		# zero current control regs
3642
	fmov.l		&0x0,%fpsr
3643

3644
	bfextu		EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645
	cmpi.b		%d1,&0x17		# is op an fmovecr?
3646
	beq.w		finex_fmovcr		# yes
3647

3648
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3649
	bsr.l		set_tag_x		# tag the operand type
3650
	mov.b		%d0,STAG(%a6)		# maybe NORM,DENORM
3651

3652
# bits four and five of the fp extension word separate the monadic and dyadic
3653
# operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654
# will never take this exception, but fsincos will.
3655
	btst		&0x5,1+EXC_CMDREG(%a6)	# is operation monadic or dyadic?
3656
	beq.b		finex_extract		# monadic
3657

3658
	btst		&0x4,1+EXC_CMDREG(%a6)	# is operation an fsincos?
3659
	bne.b		finex_extract		# yes
3660

3661
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662
	bsr.l		load_fpn2		# load dst into FP_DST
3663

3664
	lea		FP_DST(%a6),%a0		# pass: ptr to dst op
3665
	bsr.l		set_tag_x		# tag the operand type
3666
	cmpi.b		%d0,&UNNORM		# is operand an UNNORM?
3667
	bne.b		finex_op2_done		# no
3668
	bsr.l		unnorm_fix		# yes; convert to NORM,DENORM,or ZERO
3669
finex_op2_done:
3670
	mov.b		%d0,DTAG(%a6)		# save dst optype tag
3671

3672
finex_extract:
3673
	clr.l		%d0
3674
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec/mode
3675

3676
	mov.b		1+EXC_CMDREG(%a6),%d1
3677
	andi.w		&0x007f,%d1		# extract extension
3678

3679
	lea		FP_SRC(%a6),%a0
3680
	lea		FP_DST(%a6),%a1
3681

3682
	mov.l		(tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683
	jsr		(tbl_unsupp.l,%pc,%d1.l*1)
3684

3685
# the operation has been emulated. the result is in fp0.
3686
finex_save:
3687
	bfextu		EXC_CMDREG(%a6){&6:&3},%d0
3688
	bsr.l		store_fpreg
3689

3690
finex_exit:
3691
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3692
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3694

3695
	frestore	FP_SRC(%a6)
3696

3697
	unlk		%a6
3698
	bra.l		_real_inex
3699

3700
finex_fmovcr:
3701
	clr.l		%d0
3702
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3703
	mov.b		1+EXC_CMDREG(%a6),%d1
3704
	andi.l		&0x0000007f,%d1		# pass rom offset
3705
	bsr.l		smovcr
3706
	bra.b		finex_save
3707

3708
########################################################################
3709

3710
#
3711
# the hardware does not save the default result to memory on enabled
3712
# inexact exceptions. we do this here before passing control to
3713
# the user inexact handler.
3714
#
3715
# byte, word, and long destination format operations can pass
3716
# through here. so can double and single precision.
3717
# although packed opclass three operations can take inexact
3718
# exceptions, they won't pass through here since they are caught
3719
# first by the unsupported data format exception handler. that handler
3720
# sends them directly to _real_inex() if necessary.
3721
#
3722
finex_out:
3723

3724
	mov.b		&NORM,STAG(%a6)		# src is a NORM
3725

3726
	clr.l		%d0
3727
	mov.b		FPCR_MODE(%a6),%d0	# pass rnd prec,mode
3728

3729
	andi.l		&0xffff00ff,USER_FPSR(%a6) # zero exception field
3730

3731
	lea		FP_SRC(%a6),%a0		# pass ptr to src operand
3732

3733
	bsr.l		fout			# store the default result
3734

3735
	bra.b		finex_exit
3736

3737
#########################################################################
3738
# XDEF ****************************************************************	#
3739
#	_fpsp_dz(): 060FPSP entry point for FP DZ exception.		#
3740
#									#
3741
#	This handler should be the first code executed upon taking	#
3742
#	the FP DZ exception in an operating system.			#
3743
#									#
3744
# XREF ****************************************************************	#
3745
#	_imem_read_long() - read instruction longword from memory	#
3746
#	fix_skewed_ops() - adjust fsave operand				#
3747
#	_real_dz() - "callout" exit point from FP DZ handler		#
3748
#									#
3749
# INPUT ***************************************************************	#
3750
#	- The system stack contains the FP DZ exception stack.		#
3751
#	- The fsave frame contains the source operand.			#
3752
#									#
3753
# OUTPUT **************************************************************	#
3754
#	- The system stack contains the FP DZ exception stack.		#
3755
#	- The fsave frame contains the adjusted source operand.		#
3756
#									#
3757
# ALGORITHM ***********************************************************	#
3758
#	In a system where the DZ exception is enabled, the goal is to	#
3759
# get to the handler specified at _real_dz(). But, on the 060, when the	#
3760
# exception is taken, the input operand in the fsave state frame may	#
3761
# be incorrect for some cases and need to be adjusted. So, this package	#
3762
# adjusts the operand using fix_skewed_ops() and then branches to	#
3763
# _real_dz().								#
3764
#									#
3765
#########################################################################
3766

3767
	global		_fpsp_dz
3768
_fpsp_dz:
3769

3770
	link.w		%a6,&-LOCAL_SIZE	# init stack frame
3771

3772
	fsave		FP_SRC(%a6)		# grab the "busy" frame
3773

3774
	movm.l		&0x0303,EXC_DREGS(%a6)	# save d0-d1/a0-a1
3775
	fmovm.l		%fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776
	fmovm.x		&0xc0,EXC_FPREGS(%a6)	# save fp0-fp1 on stack
3777

3778
# the FPIAR holds the "current PC" of the faulting instruction
3779
	mov.l		USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780

3781
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
3782
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
3783
	bsr.l		_imem_read_long		# fetch the instruction words
3784
	mov.l		%d0,EXC_OPWORD(%a6)
3785

3786
##############################################################################
3787

3788

3789
# here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790
# this would be the case for opclass two operations with a source zero
3791
# in the sgl or dbl format.
3792
	lea		FP_SRC(%a6),%a0		# pass: ptr to src op
3793
	bsr.l		fix_skewed_ops		# fix src op
3794

3795
fdz_exit:
3796
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
3797
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
3799

3800
	frestore	FP_SRC(%a6)
3801

3802
	unlk		%a6
3803
	bra.l		_real_dz
3804

3805
#########################################################################
3806
# XDEF ****************************************************************	#
3807
#	_fpsp_fline(): 060FPSP entry point for "Line F emulator"	#
3808
#		       exception when the "reduced" version of the	#
3809
#		       FPSP is implemented that does not emulate	#
3810
#		       FP unimplemented instructions.			#
3811
#									#
3812
#	This handler should be the first code executed upon taking a	#
3813
#	"Line F Emulator" exception in an operating system integrating	#
3814
#	the reduced version of 060FPSP.					#
3815
#									#
3816
# XREF ****************************************************************	#
3817
#	_real_fpu_disabled() - Handle "FPU disabled" exceptions		#
3818
#	_real_fline() - Handle all other cases (treated equally)	#
3819
#									#
3820
# INPUT ***************************************************************	#
3821
#	- The system stack contains a "Line F Emulator" exception	#
3822
#	  stack frame.							#
3823
#									#
3824
# OUTPUT **************************************************************	#
3825
#	- The system stack is unchanged.				#
3826
#									#
3827
# ALGORITHM ***********************************************************	#
3828
#	When a "Line F Emulator" exception occurs in a system where	#
3829
# "FPU Unimplemented" instructions will not be emulated, the exception	#
3830
# can occur because then FPU is disabled or the instruction is to be	#
3831
# classifed as "Line F". This module determines which case exists and	#
3832
# calls the appropriate "callout".					#
3833
#									#
3834
#########################################################################
3835

3836
	global		_fpsp_fline
3837
_fpsp_fline:
3838

3839
# check to see if the FPU is disabled. if so, jump to the OS entry
3840
# point for that condition.
3841
	cmpi.w		0x6(%sp),&0x402c
3842
	beq.l		_real_fpu_disabled
3843

3844
	bra.l		_real_fline
3845

3846
#########################################################################
3847
# XDEF ****************************************************************	#
3848
#	_dcalc_ea(): calc correct <ea> from <ea> stacked on exception	#
3849
#									#
3850
# XREF ****************************************************************	#
3851
#	inc_areg() - increment an address register			#
3852
#	dec_areg() - decrement an address register			#
3853
#									#
3854
# INPUT ***************************************************************	#
3855
#	d0 = number of bytes to adjust <ea> by				#
3856
#									#
3857
# OUTPUT **************************************************************	#
3858
#	None								#
3859
#									#
3860
# ALGORITHM ***********************************************************	#
3861
# "Dummy" CALCulate Effective Address:					#
3862
#	The stacked <ea> for FP unimplemented instructions and opclass	#
3863
#	two packed instructions is correct with the exception of...	#
3864
#									#
3865
#	1) -(An)   : The register is not updated regardless of size.	#
3866
#		     Also, for extended precision and packed, the	#
3867
#		     stacked <ea> value is 8 bytes too big		#
3868
#	2) (An)+   : The register is not updated.			#
3869
#	3) #<data> : The upper longword of the immediate operand is	#
3870
#		     stacked b,w,l and s sizes are completely stacked.	#
3871
#		     d,x, and p are not.				#
3872
#									#
3873
#########################################################################
3874

3875
	global		_dcalc_ea
3876
_dcalc_ea:
3877
	mov.l		%d0, %a0		# move # bytes to %a0
3878

3879
	mov.b		1+EXC_OPWORD(%a6), %d0	# fetch opcode word
3880
	mov.l		%d0, %d1		# make a copy
3881

3882
	andi.w		&0x38, %d0		# extract mode field
3883
	andi.l		&0x7, %d1		# extract reg  field
3884

3885
	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3886
	beq.b		dcea_pi			# yes
3887

3888
	cmpi.b		%d0,&0x20		# is mode -(An) ?
3889
	beq.b		dcea_pd			# yes
3890

3891
	or.w		%d1,%d0			# concat mode,reg
3892
	cmpi.b		%d0,&0x3c		# is mode #<data>?
3893

3894
	beq.b		dcea_imm		# yes
3895

3896
	mov.l		EXC_EA(%a6),%a0		# return <ea>
3897
	rts
3898

3899
# need to set immediate data flag here since we'll need to do
3900
# an imem_read to fetch this later.
3901
dcea_imm:
3902
	mov.b		&immed_flg,SPCOND_FLG(%a6)
3903
	lea		([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904
	rts
3905

3906
# here, the <ea> is stacked correctly. however, we must update the
3907
# address register...
3908
dcea_pi:
3909
	mov.l		%a0,%d0			# pass amt to inc by
3910
	bsr.l		inc_areg		# inc addr register
3911

3912
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3913
	rts
3914

3915
# the <ea> is stacked correctly for all but extended and packed which
3916
# the <ea>s are 8 bytes too large.
3917
# it would make no sense to have a pre-decrement to a7 in supervisor
3918
# mode so we don't even worry about this tricky case here : )
3919
dcea_pd:
3920
	mov.l		%a0,%d0			# pass amt to dec by
3921
	bsr.l		dec_areg		# dec addr register
3922

3923
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3924

3925
	cmpi.b		%d0,&0xc		# is opsize ext or packed?
3926
	beq.b		dcea_pd2		# yes
3927
	rts
3928
dcea_pd2:
3929
	sub.l		&0x8,%a0		# correct <ea>
3930
	mov.l		%a0,EXC_EA(%a6)		# put correct <ea> on stack
3931
	rts
3932

3933
#########################################################################
3934
# XDEF ****************************************************************	#
3935
#	_calc_ea_fout(): calculate correct stacked <ea> for extended	#
3936
#			 and packed data opclass 3 operations.		#
3937
#									#
3938
# XREF ****************************************************************	#
3939
#	None								#
3940
#									#
3941
# INPUT ***************************************************************	#
3942
#	None								#
3943
#									#
3944
# OUTPUT **************************************************************	#
3945
#	a0 = return correct effective address				#
3946
#									#
3947
# ALGORITHM ***********************************************************	#
3948
#	For opclass 3 extended and packed data operations, the <ea>	#
3949
# stacked for the exception is incorrect for -(an) and (an)+ addressing	#
3950
# modes. Also, while we're at it, the index register itself must get	#
3951
# updated.								#
3952
#	So, for -(an), we must subtract 8 off of the stacked <ea> value	#
3953
# and return that value as the correct <ea> and store that value in An.	#
3954
# For (an)+, the stacked <ea> is correct but we must adjust An by +12.	#
3955
#									#
3956
#########################################################################
3957

3958
# This calc_ea is currently used to retrieve the correct <ea>
3959
# for fmove outs of type extended and packed.
3960
	global		_calc_ea_fout
3961
_calc_ea_fout:
3962
	mov.b		1+EXC_OPWORD(%a6),%d0	# fetch opcode word
3963
	mov.l		%d0,%d1			# make a copy
3964

3965
	andi.w		&0x38,%d0		# extract mode field
3966
	andi.l		&0x7,%d1		# extract reg  field
3967

3968
	cmpi.b		%d0,&0x18		# is mode (An)+ ?
3969
	beq.b		ceaf_pi			# yes
3970

3971
	cmpi.b		%d0,&0x20		# is mode -(An) ?
3972
	beq.w		ceaf_pd			# yes
3973

3974
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
3975
	rts
3976

3977
# (An)+ : extended and packed fmove out
3978
#	: stacked <ea> is correct
3979
#	: "An" not updated
3980
ceaf_pi:
3981
	mov.w		(tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982
	mov.l		EXC_EA(%a6),%a0
3983
	jmp		(tbl_ceaf_pi.b,%pc,%d1.w*1)
3984

3985
	swbeg		&0x8
3986
tbl_ceaf_pi:
3987
	short		ceaf_pi0 - tbl_ceaf_pi
3988
	short		ceaf_pi1 - tbl_ceaf_pi
3989
	short		ceaf_pi2 - tbl_ceaf_pi
3990
	short		ceaf_pi3 - tbl_ceaf_pi
3991
	short		ceaf_pi4 - tbl_ceaf_pi
3992
	short		ceaf_pi5 - tbl_ceaf_pi
3993
	short		ceaf_pi6 - tbl_ceaf_pi
3994
	short		ceaf_pi7 - tbl_ceaf_pi
3995

3996
ceaf_pi0:
3997
	addi.l		&0xc,EXC_DREGS+0x8(%a6)
3998
	rts
3999
ceaf_pi1:
4000
	addi.l		&0xc,EXC_DREGS+0xc(%a6)
4001
	rts
4002
ceaf_pi2:
4003
	add.l		&0xc,%a2
4004
	rts
4005
ceaf_pi3:
4006
	add.l		&0xc,%a3
4007
	rts
4008
ceaf_pi4:
4009
	add.l		&0xc,%a4
4010
	rts
4011
ceaf_pi5:
4012
	add.l		&0xc,%a5
4013
	rts
4014
ceaf_pi6:
4015
	addi.l		&0xc,EXC_A6(%a6)
4016
	rts
4017
ceaf_pi7:
4018
	mov.b		&mia7_flg,SPCOND_FLG(%a6)
4019
	addi.l		&0xc,EXC_A7(%a6)
4020
	rts
4021

4022
# -(An) : extended and packed fmove out
4023
#	: stacked <ea> = actual <ea> + 8
4024
#	: "An" not updated
4025
ceaf_pd:
4026
	mov.w		(tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027
	mov.l		EXC_EA(%a6),%a0
4028
	sub.l		&0x8,%a0
4029
	sub.l		&0x8,EXC_EA(%a6)
4030
	jmp		(tbl_ceaf_pd.b,%pc,%d1.w*1)
4031

4032
	swbeg		&0x8
4033
tbl_ceaf_pd:
4034
	short		ceaf_pd0 - tbl_ceaf_pd
4035
	short		ceaf_pd1 - tbl_ceaf_pd
4036
	short		ceaf_pd2 - tbl_ceaf_pd
4037
	short		ceaf_pd3 - tbl_ceaf_pd
4038
	short		ceaf_pd4 - tbl_ceaf_pd
4039
	short		ceaf_pd5 - tbl_ceaf_pd
4040
	short		ceaf_pd6 - tbl_ceaf_pd
4041
	short		ceaf_pd7 - tbl_ceaf_pd
4042

4043
ceaf_pd0:
4044
	mov.l		%a0,EXC_DREGS+0x8(%a6)
4045
	rts
4046
ceaf_pd1:
4047
	mov.l		%a0,EXC_DREGS+0xc(%a6)
4048
	rts
4049
ceaf_pd2:
4050
	mov.l		%a0,%a2
4051
	rts
4052
ceaf_pd3:
4053
	mov.l		%a0,%a3
4054
	rts
4055
ceaf_pd4:
4056
	mov.l		%a0,%a4
4057
	rts
4058
ceaf_pd5:
4059
	mov.l		%a0,%a5
4060
	rts
4061
ceaf_pd6:
4062
	mov.l		%a0,EXC_A6(%a6)
4063
	rts
4064
ceaf_pd7:
4065
	mov.l		%a0,EXC_A7(%a6)
4066
	mov.b		&mda7_flg,SPCOND_FLG(%a6)
4067
	rts
4068

4069
#
4070
# This table holds the offsets of the emulation routines for each individual
4071
# math operation relative to the address of this table. Included are
4072
# routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073
# this table is for the version if the 060FPSP without transcendentals.
4074
# The location within the table is determined by the extension bits of the
4075
# operation longword.
4076
#
4077

4078
	swbeg		&109
4079
tbl_unsupp:
4080
	long		fin		- tbl_unsupp	# 00: fmove
4081
	long		fint		- tbl_unsupp	# 01: fint
4082
	long		tbl_unsupp	- tbl_unsupp	# 02: fsinh
4083
	long		fintrz		- tbl_unsupp	# 03: fintrz
4084
	long		fsqrt		- tbl_unsupp	# 04: fsqrt
4085
	long		tbl_unsupp	- tbl_unsupp
4086
	long		tbl_unsupp	- tbl_unsupp	# 06: flognp1
4087
	long		tbl_unsupp	- tbl_unsupp
4088
	long		tbl_unsupp	- tbl_unsupp	# 08: fetoxm1
4089
	long		tbl_unsupp	- tbl_unsupp	# 09: ftanh
4090
	long		tbl_unsupp	- tbl_unsupp	# 0a: fatan
4091
	long		tbl_unsupp	- tbl_unsupp
4092
	long		tbl_unsupp	- tbl_unsupp	# 0c: fasin
4093
	long		tbl_unsupp	- tbl_unsupp	# 0d: fatanh
4094
	long		tbl_unsupp	- tbl_unsupp	# 0e: fsin
4095
	long		tbl_unsupp	- tbl_unsupp	# 0f: ftan
4096
	long		tbl_unsupp	- tbl_unsupp	# 10: fetox
4097
	long		tbl_unsupp	- tbl_unsupp	# 11: ftwotox
4098
	long		tbl_unsupp	- tbl_unsupp	# 12: ftentox
4099
	long		tbl_unsupp	- tbl_unsupp
4100
	long		tbl_unsupp	- tbl_unsupp	# 14: flogn
4101
	long		tbl_unsupp	- tbl_unsupp	# 15: flog10
4102
	long		tbl_unsupp	- tbl_unsupp	# 16: flog2
4103
	long		tbl_unsupp	- tbl_unsupp
4104
	long		fabs		- tbl_unsupp	# 18: fabs
4105
	long		tbl_unsupp	- tbl_unsupp	# 19: fcosh
4106
	long		fneg		- tbl_unsupp	# 1a: fneg
4107
	long		tbl_unsupp	- tbl_unsupp
4108
	long		tbl_unsupp	- tbl_unsupp	# 1c: facos
4109
	long		tbl_unsupp	- tbl_unsupp	# 1d: fcos
4110
	long		tbl_unsupp	- tbl_unsupp	# 1e: fgetexp
4111
	long		tbl_unsupp	- tbl_unsupp	# 1f: fgetman
4112
	long		fdiv		- tbl_unsupp	# 20: fdiv
4113
	long		tbl_unsupp	- tbl_unsupp	# 21: fmod
4114
	long		fadd		- tbl_unsupp	# 22: fadd
4115
	long		fmul		- tbl_unsupp	# 23: fmul
4116
	long		fsgldiv		- tbl_unsupp	# 24: fsgldiv
4117
	long		tbl_unsupp	- tbl_unsupp	# 25: frem
4118
	long		tbl_unsupp	- tbl_unsupp	# 26: fscale
4119
	long		fsglmul		- tbl_unsupp	# 27: fsglmul
4120
	long		fsub		- tbl_unsupp	# 28: fsub
4121
	long		tbl_unsupp	- tbl_unsupp
4122
	long		tbl_unsupp	- tbl_unsupp
4123
	long		tbl_unsupp	- tbl_unsupp
4124
	long		tbl_unsupp	- tbl_unsupp
4125
	long		tbl_unsupp	- tbl_unsupp
4126
	long		tbl_unsupp	- tbl_unsupp
4127
	long		tbl_unsupp	- tbl_unsupp
4128
	long		tbl_unsupp	- tbl_unsupp	# 30: fsincos
4129
	long		tbl_unsupp	- tbl_unsupp	# 31: fsincos
4130
	long		tbl_unsupp	- tbl_unsupp	# 32: fsincos
4131
	long		tbl_unsupp	- tbl_unsupp	# 33: fsincos
4132
	long		tbl_unsupp	- tbl_unsupp	# 34: fsincos
4133
	long		tbl_unsupp	- tbl_unsupp	# 35: fsincos
4134
	long		tbl_unsupp	- tbl_unsupp	# 36: fsincos
4135
	long		tbl_unsupp	- tbl_unsupp	# 37: fsincos
4136
	long		fcmp		- tbl_unsupp	# 38: fcmp
4137
	long		tbl_unsupp	- tbl_unsupp
4138
	long		ftst		- tbl_unsupp	# 3a: ftst
4139
	long		tbl_unsupp	- tbl_unsupp
4140
	long		tbl_unsupp	- tbl_unsupp
4141
	long		tbl_unsupp	- tbl_unsupp
4142
	long		tbl_unsupp	- tbl_unsupp
4143
	long		tbl_unsupp	- tbl_unsupp
4144
	long		fsin		- tbl_unsupp	# 40: fsmove
4145
	long		fssqrt		- tbl_unsupp	# 41: fssqrt
4146
	long		tbl_unsupp	- tbl_unsupp
4147
	long		tbl_unsupp	- tbl_unsupp
4148
	long		fdin		- tbl_unsupp	# 44: fdmove
4149
	long		fdsqrt		- tbl_unsupp	# 45: fdsqrt
4150
	long		tbl_unsupp	- tbl_unsupp
4151
	long		tbl_unsupp	- tbl_unsupp
4152
	long		tbl_unsupp	- tbl_unsupp
4153
	long		tbl_unsupp	- tbl_unsupp
4154
	long		tbl_unsupp	- tbl_unsupp
4155
	long		tbl_unsupp	- tbl_unsupp
4156
	long		tbl_unsupp	- tbl_unsupp
4157
	long		tbl_unsupp	- tbl_unsupp
4158
	long		tbl_unsupp	- tbl_unsupp
4159
	long		tbl_unsupp	- tbl_unsupp
4160
	long		tbl_unsupp	- tbl_unsupp
4161
	long		tbl_unsupp	- tbl_unsupp
4162
	long		tbl_unsupp	- tbl_unsupp
4163
	long		tbl_unsupp	- tbl_unsupp
4164
	long		tbl_unsupp	- tbl_unsupp
4165
	long		tbl_unsupp	- tbl_unsupp
4166
	long		tbl_unsupp	- tbl_unsupp
4167
	long		tbl_unsupp	- tbl_unsupp
4168
	long		fsabs		- tbl_unsupp	# 58: fsabs
4169
	long		tbl_unsupp	- tbl_unsupp
4170
	long		fsneg		- tbl_unsupp	# 5a: fsneg
4171
	long		tbl_unsupp	- tbl_unsupp
4172
	long		fdabs		- tbl_unsupp	# 5c: fdabs
4173
	long		tbl_unsupp	- tbl_unsupp
4174
	long		fdneg		- tbl_unsupp	# 5e: fdneg
4175
	long		tbl_unsupp	- tbl_unsupp
4176
	long		fsdiv		- tbl_unsupp	# 60: fsdiv
4177
	long		tbl_unsupp	- tbl_unsupp
4178
	long		fsadd		- tbl_unsupp	# 62: fsadd
4179
	long		fsmul		- tbl_unsupp	# 63: fsmul
4180
	long		fddiv		- tbl_unsupp	# 64: fddiv
4181
	long		tbl_unsupp	- tbl_unsupp
4182
	long		fdadd		- tbl_unsupp	# 66: fdadd
4183
	long		fdmul		- tbl_unsupp	# 67: fdmul
4184
	long		fssub		- tbl_unsupp	# 68: fssub
4185
	long		tbl_unsupp	- tbl_unsupp
4186
	long		tbl_unsupp	- tbl_unsupp
4187
	long		tbl_unsupp	- tbl_unsupp
4188
	long		fdsub		- tbl_unsupp	# 6c: fdsub
4189

4190
#################################################
4191
# Add this here so non-fp modules can compile.
4192
# (smovcr is called from fpsp_inex.)
4193
	global		smovcr
4194
smovcr:
4195
	bra.b		smovcr
4196

4197
#########################################################################
4198
# XDEF ****************************************************************	#
4199
#	fmovm_dynamic(): emulate "fmovm" dynamic instruction		#
4200
#									#
4201
# XREF ****************************************************************	#
4202
#	fetch_dreg() - fetch data register				#
4203
#	{i,d,}mem_read() - fetch data from memory			#
4204
#	_mem_write() - write data to memory				#
4205
#	iea_iacc() - instruction memory access error occurred		#
4206
#	iea_dacc() - data memory access error occurred			#
4207
#	restore() - restore An index regs if access error occurred	#
4208
#									#
4209
# INPUT ***************************************************************	#
4210
#	None								#
4211
#									#
4212
# OUTPUT **************************************************************	#
4213
#	If instr is "fmovm Dn,-(A7)" from supervisor mode,		#
4214
#		d0 = size of dump					#
4215
#		d1 = Dn							#
4216
#	Else if instruction access error,				#
4217
#		d0 = FSLW						#
4218
#	Else if data access error,					#
4219
#		d0 = FSLW						#
4220
#		a0 = address of fault					#
4221
#	Else								#
4222
#		none.							#
4223
#									#
4224
# ALGORITHM ***********************************************************	#
4225
#	The effective address must be calculated since this is entered	#
4226
# from an "Unimplemented Effective Address" exception handler. So, we	#
4227
# have our own fcalc_ea() routine here. If an access error is flagged	#
4228
# by a _{i,d,}mem_read() call, we must exit through the special		#
4229
# handler.								#
4230
#	The data register is determined and its value loaded to get the	#
4231
# string of FP registers affected. This value is used as an index into	#
4232
# a lookup table such that we can determine the number of bytes		#
4233
# involved.								#
4234
#	If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used	#
4235
# to read in all FP values. Again, _mem_read() may fail and require a	#
4236
# special exit.								#
4237
#	If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used	#
4238
# to write all FP values. _mem_write() may also fail.			#
4239
#	If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,	#
4240
# then we return the size of the dump and the string to the caller	#
4241
# so that the move can occur outside of this routine. This special	#
4242
# case is required so that moves to the system stack are handled	#
4243
# correctly.								#
4244
#									#
4245
# DYNAMIC:								#
4246
#	fmovm.x	dn, <ea>						#
4247
#	fmovm.x	<ea>, dn						#
4248
#									#
4249
#	      <WORD 1>		      <WORD2>				#
4250
#	1111 0010 00 |<ea>|	11@& 1000 0$$$ 0000			#
4251
#									#
4252
#	& = (0): predecrement addressing mode				#
4253
#	    (1): postincrement or control addressing mode		#
4254
#	@ = (0): move listed regs from memory to the FPU		#
4255
#	    (1): move listed regs from the FPU to memory		#
4256
#	$$$    : index of data register holding reg select mask		#
4257
#									#
4258
# NOTES:								#
4259
#	If the data register holds a zero, then the			#
4260
#	instruction is a nop.						#
4261
#									#
4262
#########################################################################
4263

4264
	global		fmovm_dynamic
4265
fmovm_dynamic:
4266

4267
# extract the data register in which the bit string resides...
4268
	mov.b		1+EXC_EXTWORD(%a6),%d1	# fetch extword
4269
	andi.w		&0x70,%d1		# extract reg bits
4270
	lsr.b		&0x4,%d1		# shift into lo bits
4271

4272
# fetch the bit string into d0...
4273
	bsr.l		fetch_dreg		# fetch reg string
4274

4275
	andi.l		&0x000000ff,%d0		# keep only lo byte
4276

4277
	mov.l		%d0,-(%sp)		# save strg
4278
	mov.b		(tbl_fmovm_size.w,%pc,%d0),%d0
4279
	mov.l		%d0,-(%sp)		# save size
4280
	bsr.l		fmovm_calc_ea		# calculate <ea>
4281
	mov.l		(%sp)+,%d0		# restore size
4282
	mov.l		(%sp)+,%d1		# restore strg
4283

4284
# if the bit string is a zero, then the operation is a no-op
4285
# but, make sure that we've calculated ea and advanced the opword pointer
4286
	beq.w		fmovm_data_done
4287

4288
# separate move ins from move outs...
4289
	btst		&0x5,EXC_EXTWORD(%a6)	# is it a move in or out?
4290
	beq.w		fmovm_data_in		# it's a move out
4291

4292
#############
4293
# MOVE OUT: #
4294
#############
4295
fmovm_data_out:
4296
	btst		&0x4,EXC_EXTWORD(%a6)	# control or predecrement?
4297
	bne.w		fmovm_out_ctrl		# control
4298

4299
############################
4300
fmovm_out_predec:
4301
# for predecrement mode, the bit string is the opposite of both control
4302
# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303
# here, we convert it to be just like the others...
4304
	mov.b		(tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305

4306
	btst		&0x5,EXC_SR(%a6)	# user or supervisor mode?
4307
	beq.b		fmovm_out_ctrl		# user
4308

4309
fmovm_out_predec_s:
4310
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311
	bne.b		fmovm_out_ctrl
4312

4313
# the operation was unfortunately an: fmovm.x dn,-(sp)
4314
# called from supervisor mode.
4315
# we're also passing "size" and "strg" back to the calling routine
4316
	rts
4317

4318
############################
4319
fmovm_out_ctrl:
4320
	mov.l		%a0,%a1			# move <ea> to a1
4321

4322
	sub.l		%d0,%sp			# subtract size of dump
4323
	lea		(%sp),%a0
4324

4325
	tst.b		%d1			# should FP0 be moved?
4326
	bpl.b		fmovm_out_ctrl_fp1	# no
4327

4328
	mov.l		0x0+EXC_FP0(%a6),(%a0)+	# yes
4329
	mov.l		0x4+EXC_FP0(%a6),(%a0)+
4330
	mov.l		0x8+EXC_FP0(%a6),(%a0)+
4331

4332
fmovm_out_ctrl_fp1:
4333
	lsl.b		&0x1,%d1		# should FP1 be moved?
4334
	bpl.b		fmovm_out_ctrl_fp2	# no
4335

4336
	mov.l		0x0+EXC_FP1(%a6),(%a0)+	# yes
4337
	mov.l		0x4+EXC_FP1(%a6),(%a0)+
4338
	mov.l		0x8+EXC_FP1(%a6),(%a0)+
4339

4340
fmovm_out_ctrl_fp2:
4341
	lsl.b		&0x1,%d1		# should FP2 be moved?
4342
	bpl.b		fmovm_out_ctrl_fp3	# no
4343

4344
	fmovm.x		&0x20,(%a0)		# yes
4345
	add.l		&0xc,%a0
4346

4347
fmovm_out_ctrl_fp3:
4348
	lsl.b		&0x1,%d1		# should FP3 be moved?
4349
	bpl.b		fmovm_out_ctrl_fp4	# no
4350

4351
	fmovm.x		&0x10,(%a0)		# yes
4352
	add.l		&0xc,%a0
4353

4354
fmovm_out_ctrl_fp4:
4355
	lsl.b		&0x1,%d1		# should FP4 be moved?
4356
	bpl.b		fmovm_out_ctrl_fp5	# no
4357

4358
	fmovm.x		&0x08,(%a0)		# yes
4359
	add.l		&0xc,%a0
4360

4361
fmovm_out_ctrl_fp5:
4362
	lsl.b		&0x1,%d1		# should FP5 be moved?
4363
	bpl.b		fmovm_out_ctrl_fp6	# no
4364

4365
	fmovm.x		&0x04,(%a0)		# yes
4366
	add.l		&0xc,%a0
4367

4368
fmovm_out_ctrl_fp6:
4369
	lsl.b		&0x1,%d1		# should FP6 be moved?
4370
	bpl.b		fmovm_out_ctrl_fp7	# no
4371

4372
	fmovm.x		&0x02,(%a0)		# yes
4373
	add.l		&0xc,%a0
4374

4375
fmovm_out_ctrl_fp7:
4376
	lsl.b		&0x1,%d1		# should FP7 be moved?
4377
	bpl.b		fmovm_out_ctrl_done	# no
4378

4379
	fmovm.x		&0x01,(%a0)		# yes
4380
	add.l		&0xc,%a0
4381

4382
fmovm_out_ctrl_done:
4383
	mov.l		%a1,L_SCR1(%a6)
4384

4385
	lea		(%sp),%a0		# pass: supervisor src
4386
	mov.l		%d0,-(%sp)		# save size
4387
	bsr.l		_dmem_write		# copy data to user mem
4388

4389
	mov.l		(%sp)+,%d0
4390
	add.l		%d0,%sp			# clear fpreg data from stack
4391

4392
	tst.l		%d1			# did dstore err?
4393
	bne.w		fmovm_out_err		# yes
4394

4395
	rts
4396

4397
############
4398
# MOVE IN: #
4399
############
4400
fmovm_data_in:
4401
	mov.l		%a0,L_SCR1(%a6)
4402

4403
	sub.l		%d0,%sp			# make room for fpregs
4404
	lea		(%sp),%a1
4405

4406
	mov.l		%d1,-(%sp)		# save bit string for later
4407
	mov.l		%d0,-(%sp)		# save # of bytes
4408

4409
	bsr.l		_dmem_read		# copy data from user mem
4410

4411
	mov.l		(%sp)+,%d0		# retrieve # of bytes
4412

4413
	tst.l		%d1			# did dfetch fail?
4414
	bne.w		fmovm_in_err		# yes
4415

4416
	mov.l		(%sp)+,%d1		# load bit string
4417

4418
	lea		(%sp),%a0		# addr of stack
4419

4420
	tst.b		%d1			# should FP0 be moved?
4421
	bpl.b		fmovm_data_in_fp1	# no
4422

4423
	mov.l		(%a0)+,0x0+EXC_FP0(%a6)	# yes
4424
	mov.l		(%a0)+,0x4+EXC_FP0(%a6)
4425
	mov.l		(%a0)+,0x8+EXC_FP0(%a6)
4426

4427
fmovm_data_in_fp1:
4428
	lsl.b		&0x1,%d1		# should FP1 be moved?
4429
	bpl.b		fmovm_data_in_fp2	# no
4430

4431
	mov.l		(%a0)+,0x0+EXC_FP1(%a6)	# yes
4432
	mov.l		(%a0)+,0x4+EXC_FP1(%a6)
4433
	mov.l		(%a0)+,0x8+EXC_FP1(%a6)
4434

4435
fmovm_data_in_fp2:
4436
	lsl.b		&0x1,%d1		# should FP2 be moved?
4437
	bpl.b		fmovm_data_in_fp3	# no
4438

4439
	fmovm.x		(%a0)+,&0x20		# yes
4440

4441
fmovm_data_in_fp3:
4442
	lsl.b		&0x1,%d1		# should FP3 be moved?
4443
	bpl.b		fmovm_data_in_fp4	# no
4444

4445
	fmovm.x		(%a0)+,&0x10		# yes
4446

4447
fmovm_data_in_fp4:
4448
	lsl.b		&0x1,%d1		# should FP4 be moved?
4449
	bpl.b		fmovm_data_in_fp5	# no
4450

4451
	fmovm.x		(%a0)+,&0x08		# yes
4452

4453
fmovm_data_in_fp5:
4454
	lsl.b		&0x1,%d1		# should FP5 be moved?
4455
	bpl.b		fmovm_data_in_fp6	# no
4456

4457
	fmovm.x		(%a0)+,&0x04		# yes
4458

4459
fmovm_data_in_fp6:
4460
	lsl.b		&0x1,%d1		# should FP6 be moved?
4461
	bpl.b		fmovm_data_in_fp7	# no
4462

4463
	fmovm.x		(%a0)+,&0x02		# yes
4464

4465
fmovm_data_in_fp7:
4466
	lsl.b		&0x1,%d1		# should FP7 be moved?
4467
	bpl.b		fmovm_data_in_done	# no
4468

4469
	fmovm.x		(%a0)+,&0x01		# yes
4470

4471
fmovm_data_in_done:
4472
	add.l		%d0,%sp			# remove fpregs from stack
4473
	rts
4474

4475
#####################################
4476

4477
fmovm_data_done:
4478
	rts
4479

4480
##############################################################################
4481

4482
#
4483
# table indexed by the operation's bit string that gives the number
4484
# of bytes that will be moved.
4485
#
4486
# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487
#
4488
tbl_fmovm_size:
4489
	byte	0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505
	byte	0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513
	byte	0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517
	byte	0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519
	byte	0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520
	byte	0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521

4522
#
4523
# table to convert a pre-decrement bit string into a post-increment
4524
# or control bit string.
4525
# ex:	0x00	==>	0x00
4526
#	0x01	==>	0x80
4527
#	0x02	==>	0x40
4528
#		.
4529
#		.
4530
#	0xfd	==>	0xbf
4531
#	0xfe	==>	0x7f
4532
#	0xff	==>	0xff
4533
#
4534
tbl_fmovm_convert:
4535
	byte	0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536
	byte	0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537
	byte	0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538
	byte	0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539
	byte	0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540
	byte	0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541
	byte	0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542
	byte	0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543
	byte	0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544
	byte	0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545
	byte	0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546
	byte	0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547
	byte	0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548
	byte	0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549
	byte	0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550
	byte	0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551
	byte	0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552
	byte	0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553
	byte	0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554
	byte	0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555
	byte	0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556
	byte	0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557
	byte	0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558
	byte	0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559
	byte	0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560
	byte	0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561
	byte	0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562
	byte	0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563
	byte	0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564
	byte	0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565
	byte	0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566
	byte	0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567

4568
	global		fmovm_calc_ea
4569
###############################################
4570
# _fmovm_calc_ea: calculate effective address #
4571
###############################################
4572
fmovm_calc_ea:
4573
	mov.l		%d0,%a0			# move # bytes to a0
4574

4575
# currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576
# easily changed if they were inputs passed in registers.
4577
	mov.w		EXC_OPWORD(%a6),%d0	# fetch opcode word
4578
	mov.w		%d0,%d1			# make a copy
4579

4580
	andi.w		&0x3f,%d0		# extract mode field
4581
	andi.l		&0x7,%d1		# extract reg  field
4582

4583
# jump to the corresponding function for each {MODE,REG} pair.
4584
	mov.w		(tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585
	jmp		(tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586

4587
	swbeg		&64
4588
tbl_fea_mode:
4589
	short		tbl_fea_mode	-	tbl_fea_mode
4590
	short		tbl_fea_mode	-	tbl_fea_mode
4591
	short		tbl_fea_mode	-	tbl_fea_mode
4592
	short		tbl_fea_mode	-	tbl_fea_mode
4593
	short		tbl_fea_mode	-	tbl_fea_mode
4594
	short		tbl_fea_mode	-	tbl_fea_mode
4595
	short		tbl_fea_mode	-	tbl_fea_mode
4596
	short		tbl_fea_mode	-	tbl_fea_mode
4597

4598
	short		tbl_fea_mode	-	tbl_fea_mode
4599
	short		tbl_fea_mode	-	tbl_fea_mode
4600
	short		tbl_fea_mode	-	tbl_fea_mode
4601
	short		tbl_fea_mode	-	tbl_fea_mode
4602
	short		tbl_fea_mode	-	tbl_fea_mode
4603
	short		tbl_fea_mode	-	tbl_fea_mode
4604
	short		tbl_fea_mode	-	tbl_fea_mode
4605
	short		tbl_fea_mode	-	tbl_fea_mode
4606

4607
	short		faddr_ind_a0	-	tbl_fea_mode
4608
	short		faddr_ind_a1	-	tbl_fea_mode
4609
	short		faddr_ind_a2	-	tbl_fea_mode
4610
	short		faddr_ind_a3	-	tbl_fea_mode
4611
	short		faddr_ind_a4	-	tbl_fea_mode
4612
	short		faddr_ind_a5	-	tbl_fea_mode
4613
	short		faddr_ind_a6	-	tbl_fea_mode
4614
	short		faddr_ind_a7	-	tbl_fea_mode
4615

4616
	short		faddr_ind_p_a0	-	tbl_fea_mode
4617
	short		faddr_ind_p_a1	-	tbl_fea_mode
4618
	short		faddr_ind_p_a2	-	tbl_fea_mode
4619
	short		faddr_ind_p_a3	-	tbl_fea_mode
4620
	short		faddr_ind_p_a4	-	tbl_fea_mode
4621
	short		faddr_ind_p_a5	-	tbl_fea_mode
4622
	short		faddr_ind_p_a6	-	tbl_fea_mode
4623
	short		faddr_ind_p_a7	-	tbl_fea_mode
4624

4625
	short		faddr_ind_m_a0	-	tbl_fea_mode
4626
	short		faddr_ind_m_a1	-	tbl_fea_mode
4627
	short		faddr_ind_m_a2	-	tbl_fea_mode
4628
	short		faddr_ind_m_a3	-	tbl_fea_mode
4629
	short		faddr_ind_m_a4	-	tbl_fea_mode
4630
	short		faddr_ind_m_a5	-	tbl_fea_mode
4631
	short		faddr_ind_m_a6	-	tbl_fea_mode
4632
	short		faddr_ind_m_a7	-	tbl_fea_mode
4633

4634
	short		faddr_ind_disp_a0	-	tbl_fea_mode
4635
	short		faddr_ind_disp_a1	-	tbl_fea_mode
4636
	short		faddr_ind_disp_a2	-	tbl_fea_mode
4637
	short		faddr_ind_disp_a3	-	tbl_fea_mode
4638
	short		faddr_ind_disp_a4	-	tbl_fea_mode
4639
	short		faddr_ind_disp_a5	-	tbl_fea_mode
4640
	short		faddr_ind_disp_a6	-	tbl_fea_mode
4641
	short		faddr_ind_disp_a7	-	tbl_fea_mode
4642

4643
	short		faddr_ind_ext	-	tbl_fea_mode
4644
	short		faddr_ind_ext	-	tbl_fea_mode
4645
	short		faddr_ind_ext	-	tbl_fea_mode
4646
	short		faddr_ind_ext	-	tbl_fea_mode
4647
	short		faddr_ind_ext	-	tbl_fea_mode
4648
	short		faddr_ind_ext	-	tbl_fea_mode
4649
	short		faddr_ind_ext	-	tbl_fea_mode
4650
	short		faddr_ind_ext	-	tbl_fea_mode
4651

4652
	short		fabs_short	-	tbl_fea_mode
4653
	short		fabs_long	-	tbl_fea_mode
4654
	short		fpc_ind		-	tbl_fea_mode
4655
	short		fpc_ind_ext	-	tbl_fea_mode
4656
	short		tbl_fea_mode	-	tbl_fea_mode
4657
	short		tbl_fea_mode	-	tbl_fea_mode
4658
	short		tbl_fea_mode	-	tbl_fea_mode
4659
	short		tbl_fea_mode	-	tbl_fea_mode
4660

4661
###################################
4662
# Address register indirect: (An) #
4663
###################################
4664
faddr_ind_a0:
4665
	mov.l		EXC_DREGS+0x8(%a6),%a0	# Get current a0
4666
	rts
4667

4668
faddr_ind_a1:
4669
	mov.l		EXC_DREGS+0xc(%a6),%a0	# Get current a1
4670
	rts
4671

4672
faddr_ind_a2:
4673
	mov.l		%a2,%a0			# Get current a2
4674
	rts
4675

4676
faddr_ind_a3:
4677
	mov.l		%a3,%a0			# Get current a3
4678
	rts
4679

4680
faddr_ind_a4:
4681
	mov.l		%a4,%a0			# Get current a4
4682
	rts
4683

4684
faddr_ind_a5:
4685
	mov.l		%a5,%a0			# Get current a5
4686
	rts
4687

4688
faddr_ind_a6:
4689
	mov.l		(%a6),%a0		# Get current a6
4690
	rts
4691

4692
faddr_ind_a7:
4693
	mov.l		EXC_A7(%a6),%a0		# Get current a7
4694
	rts
4695

4696
#####################################################
4697
# Address register indirect w/ postincrement: (An)+ #
4698
#####################################################
4699
faddr_ind_p_a0:
4700
	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4701
	mov.l		%d0,%d1
4702
	add.l		%a0,%d1			# Increment
4703
	mov.l		%d1,EXC_DREGS+0x8(%a6)	# Save incr value
4704
	mov.l		%d0,%a0
4705
	rts
4706

4707
faddr_ind_p_a1:
4708
	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4709
	mov.l		%d0,%d1
4710
	add.l		%a0,%d1			# Increment
4711
	mov.l		%d1,EXC_DREGS+0xc(%a6)	# Save incr value
4712
	mov.l		%d0,%a0
4713
	rts
4714

4715
faddr_ind_p_a2:
4716
	mov.l		%a2,%d0			# Get current a2
4717
	mov.l		%d0,%d1
4718
	add.l		%a0,%d1			# Increment
4719
	mov.l		%d1,%a2			# Save incr value
4720
	mov.l		%d0,%a0
4721
	rts
4722

4723
faddr_ind_p_a3:
4724
	mov.l		%a3,%d0			# Get current a3
4725
	mov.l		%d0,%d1
4726
	add.l		%a0,%d1			# Increment
4727
	mov.l		%d1,%a3			# Save incr value
4728
	mov.l		%d0,%a0
4729
	rts
4730

4731
faddr_ind_p_a4:
4732
	mov.l		%a4,%d0			# Get current a4
4733
	mov.l		%d0,%d1
4734
	add.l		%a0,%d1			# Increment
4735
	mov.l		%d1,%a4			# Save incr value
4736
	mov.l		%d0,%a0
4737
	rts
4738

4739
faddr_ind_p_a5:
4740
	mov.l		%a5,%d0			# Get current a5
4741
	mov.l		%d0,%d1
4742
	add.l		%a0,%d1			# Increment
4743
	mov.l		%d1,%a5			# Save incr value
4744
	mov.l		%d0,%a0
4745
	rts
4746

4747
faddr_ind_p_a6:
4748
	mov.l		(%a6),%d0		# Get current a6
4749
	mov.l		%d0,%d1
4750
	add.l		%a0,%d1			# Increment
4751
	mov.l		%d1,(%a6)		# Save incr value
4752
	mov.l		%d0,%a0
4753
	rts
4754

4755
faddr_ind_p_a7:
4756
	mov.b		&mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757

4758
	mov.l		EXC_A7(%a6),%d0		# Get current a7
4759
	mov.l		%d0,%d1
4760
	add.l		%a0,%d1			# Increment
4761
	mov.l		%d1,EXC_A7(%a6)		# Save incr value
4762
	mov.l		%d0,%a0
4763
	rts
4764

4765
####################################################
4766
# Address register indirect w/ predecrement: -(An) #
4767
####################################################
4768
faddr_ind_m_a0:
4769
	mov.l		EXC_DREGS+0x8(%a6),%d0	# Get current a0
4770
	sub.l		%a0,%d0			# Decrement
4771
	mov.l		%d0,EXC_DREGS+0x8(%a6)	# Save decr value
4772
	mov.l		%d0,%a0
4773
	rts
4774

4775
faddr_ind_m_a1:
4776
	mov.l		EXC_DREGS+0xc(%a6),%d0	# Get current a1
4777
	sub.l		%a0,%d0			# Decrement
4778
	mov.l		%d0,EXC_DREGS+0xc(%a6)	# Save decr value
4779
	mov.l		%d0,%a0
4780
	rts
4781

4782
faddr_ind_m_a2:
4783
	mov.l		%a2,%d0			# Get current a2
4784
	sub.l		%a0,%d0			# Decrement
4785
	mov.l		%d0,%a2			# Save decr value
4786
	mov.l		%d0,%a0
4787
	rts
4788

4789
faddr_ind_m_a3:
4790
	mov.l		%a3,%d0			# Get current a3
4791
	sub.l		%a0,%d0			# Decrement
4792
	mov.l		%d0,%a3			# Save decr value
4793
	mov.l		%d0,%a0
4794
	rts
4795

4796
faddr_ind_m_a4:
4797
	mov.l		%a4,%d0			# Get current a4
4798
	sub.l		%a0,%d0			# Decrement
4799
	mov.l		%d0,%a4			# Save decr value
4800
	mov.l		%d0,%a0
4801
	rts
4802

4803
faddr_ind_m_a5:
4804
	mov.l		%a5,%d0			# Get current a5
4805
	sub.l		%a0,%d0			# Decrement
4806
	mov.l		%d0,%a5			# Save decr value
4807
	mov.l		%d0,%a0
4808
	rts
4809

4810
faddr_ind_m_a6:
4811
	mov.l		(%a6),%d0		# Get current a6
4812
	sub.l		%a0,%d0			# Decrement
4813
	mov.l		%d0,(%a6)		# Save decr value
4814
	mov.l		%d0,%a0
4815
	rts
4816

4817
faddr_ind_m_a7:
4818
	mov.b		&mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819

4820
	mov.l		EXC_A7(%a6),%d0		# Get current a7
4821
	sub.l		%a0,%d0			# Decrement
4822
	mov.l		%d0,EXC_A7(%a6)		# Save decr value
4823
	mov.l		%d0,%a0
4824
	rts
4825

4826
########################################################
4827
# Address register indirect w/ displacement: (d16, An) #
4828
########################################################
4829
faddr_ind_disp_a0:
4830
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4831
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4832
	bsr.l		_imem_read_word
4833

4834
	tst.l		%d1			# did ifetch fail?
4835
	bne.l		iea_iacc		# yes
4836

4837
	mov.w		%d0,%a0			# sign extend displacement
4838

4839
	add.l		EXC_DREGS+0x8(%a6),%a0	# a0 + d16
4840
	rts
4841

4842
faddr_ind_disp_a1:
4843
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4844
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4845
	bsr.l		_imem_read_word
4846

4847
	tst.l		%d1			# did ifetch fail?
4848
	bne.l		iea_iacc		# yes
4849

4850
	mov.w		%d0,%a0			# sign extend displacement
4851

4852
	add.l		EXC_DREGS+0xc(%a6),%a0	# a1 + d16
4853
	rts
4854

4855
faddr_ind_disp_a2:
4856
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4857
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4858
	bsr.l		_imem_read_word
4859

4860
	tst.l		%d1			# did ifetch fail?
4861
	bne.l		iea_iacc		# yes
4862

4863
	mov.w		%d0,%a0			# sign extend displacement
4864

4865
	add.l		%a2,%a0			# a2 + d16
4866
	rts
4867

4868
faddr_ind_disp_a3:
4869
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4870
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4871
	bsr.l		_imem_read_word
4872

4873
	tst.l		%d1			# did ifetch fail?
4874
	bne.l		iea_iacc		# yes
4875

4876
	mov.w		%d0,%a0			# sign extend displacement
4877

4878
	add.l		%a3,%a0			# a3 + d16
4879
	rts
4880

4881
faddr_ind_disp_a4:
4882
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4883
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4884
	bsr.l		_imem_read_word
4885

4886
	tst.l		%d1			# did ifetch fail?
4887
	bne.l		iea_iacc		# yes
4888

4889
	mov.w		%d0,%a0			# sign extend displacement
4890

4891
	add.l		%a4,%a0			# a4 + d16
4892
	rts
4893

4894
faddr_ind_disp_a5:
4895
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4896
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4897
	bsr.l		_imem_read_word
4898

4899
	tst.l		%d1			# did ifetch fail?
4900
	bne.l		iea_iacc		# yes
4901

4902
	mov.w		%d0,%a0			# sign extend displacement
4903

4904
	add.l		%a5,%a0			# a5 + d16
4905
	rts
4906

4907
faddr_ind_disp_a6:
4908
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4909
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4910
	bsr.l		_imem_read_word
4911

4912
	tst.l		%d1			# did ifetch fail?
4913
	bne.l		iea_iacc		# yes
4914

4915
	mov.w		%d0,%a0			# sign extend displacement
4916

4917
	add.l		(%a6),%a0		# a6 + d16
4918
	rts
4919

4920
faddr_ind_disp_a7:
4921
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4922
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4923
	bsr.l		_imem_read_word
4924

4925
	tst.l		%d1			# did ifetch fail?
4926
	bne.l		iea_iacc		# yes
4927

4928
	mov.w		%d0,%a0			# sign extend displacement
4929

4930
	add.l		EXC_A7(%a6),%a0		# a7 + d16
4931
	rts
4932

4933
########################################################################
4934
# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935
#    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936
# Memory indirect postindexed: ([bd, An], Xn, od)		       #
4937
# Memory indirect preindexed: ([bd, An, Xn], od)		       #
4938
########################################################################
4939
faddr_ind_ext:
4940
	addq.l		&0x8,%d1
4941
	bsr.l		fetch_dreg		# fetch base areg
4942
	mov.l		%d0,-(%sp)
4943

4944
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4945
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4946
	bsr.l		_imem_read_word		# fetch extword in d0
4947

4948
	tst.l		%d1			# did ifetch fail?
4949
	bne.l		iea_iacc		# yes
4950

4951
	mov.l		(%sp)+,%a0
4952

4953
	btst		&0x8,%d0
4954
	bne.w		fcalc_mem_ind
4955

4956
	mov.l		%d0,L_SCR1(%a6)		# hold opword
4957

4958
	mov.l		%d0,%d1
4959
	rol.w		&0x4,%d1
4960
	andi.w		&0xf,%d1		# extract index regno
4961

4962
# count on fetch_dreg() not to alter a0...
4963
	bsr.l		fetch_dreg		# fetch index
4964

4965
	mov.l		%d2,-(%sp)		# save d2
4966
	mov.l		L_SCR1(%a6),%d2		# fetch opword
4967

4968
	btst		&0xb,%d2		# is it word or long?
4969
	bne.b		faii8_long
4970
	ext.l		%d0			# sign extend word index
4971
faii8_long:
4972
	mov.l		%d2,%d1
4973
	rol.w		&0x7,%d1
4974
	andi.l		&0x3,%d1		# extract scale value
4975

4976
	lsl.l		%d1,%d0			# shift index by scale
4977

4978
	extb.l		%d2			# sign extend displacement
4979
	add.l		%d2,%d0			# index + disp
4980
	add.l		%d0,%a0			# An + (index + disp)
4981

4982
	mov.l		(%sp)+,%d2		# restore old d2
4983
	rts
4984

4985
###########################
4986
# Absolute short: (XXX).W #
4987
###########################
4988
fabs_short:
4989
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
4990
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
4991
	bsr.l		_imem_read_word		# fetch short address
4992

4993
	tst.l		%d1			# did ifetch fail?
4994
	bne.l		iea_iacc		# yes
4995

4996
	mov.w		%d0,%a0			# return <ea> in a0
4997
	rts
4998

4999
##########################
5000
# Absolute long: (XXX).L #
5001
##########################
5002
fabs_long:
5003
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5004
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5005
	bsr.l		_imem_read_long		# fetch long address
5006

5007
	tst.l		%d1			# did ifetch fail?
5008
	bne.l		iea_iacc		# yes
5009

5010
	mov.l		%d0,%a0			# return <ea> in a0
5011
	rts
5012

5013
#######################################################
5014
# Program counter indirect w/ displacement: (d16, PC) #
5015
#######################################################
5016
fpc_ind:
5017
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5018
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5019
	bsr.l		_imem_read_word		# fetch word displacement
5020

5021
	tst.l		%d1			# did ifetch fail?
5022
	bne.l		iea_iacc		# yes
5023

5024
	mov.w		%d0,%a0			# sign extend displacement
5025

5026
	add.l		EXC_EXTWPTR(%a6),%a0	# pc + d16
5027

5028
# _imem_read_word() increased the extwptr by 2. need to adjust here.
5029
	subq.l		&0x2,%a0		# adjust <ea>
5030
	rts
5031

5032
##########################################################
5033
# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034
# "     "     w/   "  (base displacement): (bd, PC, An)  #
5035
# PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036
# PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037
##########################################################
5038
fpc_ind_ext:
5039
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5040
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5041
	bsr.l		_imem_read_word		# fetch ext word
5042

5043
	tst.l		%d1			# did ifetch fail?
5044
	bne.l		iea_iacc		# yes
5045

5046
	mov.l		EXC_EXTWPTR(%a6),%a0	# put base in a0
5047
	subq.l		&0x2,%a0		# adjust base
5048

5049
	btst		&0x8,%d0		# is disp only 8 bits?
5050
	bne.w		fcalc_mem_ind		# calc memory indirect
5051

5052
	mov.l		%d0,L_SCR1(%a6)		# store opword
5053

5054
	mov.l		%d0,%d1			# make extword copy
5055
	rol.w		&0x4,%d1		# rotate reg num into place
5056
	andi.w		&0xf,%d1		# extract register number
5057

5058
# count on fetch_dreg() not to alter a0...
5059
	bsr.l		fetch_dreg		# fetch index
5060

5061
	mov.l		%d2,-(%sp)		# save d2
5062
	mov.l		L_SCR1(%a6),%d2		# fetch opword
5063

5064
	btst		&0xb,%d2		# is index word or long?
5065
	bne.b		fpii8_long		# long
5066
	ext.l		%d0			# sign extend word index
5067
fpii8_long:
5068
	mov.l		%d2,%d1
5069
	rol.w		&0x7,%d1		# rotate scale value into place
5070
	andi.l		&0x3,%d1		# extract scale value
5071

5072
	lsl.l		%d1,%d0			# shift index by scale
5073

5074
	extb.l		%d2			# sign extend displacement
5075
	add.l		%d2,%d0			# disp + index
5076
	add.l		%d0,%a0			# An + (index + disp)
5077

5078
	mov.l		(%sp)+,%d2		# restore temp register
5079
	rts
5080

5081
# d2 = index
5082
# d3 = base
5083
# d4 = od
5084
# d5 = extword
5085
fcalc_mem_ind:
5086
	btst		&0x6,%d0		# is the index suppressed?
5087
	beq.b		fcalc_index
5088

5089
	movm.l		&0x3c00,-(%sp)		# save d2-d5
5090

5091
	mov.l		%d0,%d5			# put extword in d5
5092
	mov.l		%a0,%d3			# put base in d3
5093

5094
	clr.l		%d2			# yes, so index = 0
5095
	bra.b		fbase_supp_ck
5096

5097
# index:
5098
fcalc_index:
5099
	mov.l		%d0,L_SCR1(%a6)		# save d0 (opword)
5100
	bfextu		%d0{&16:&4},%d1		# fetch dreg index
5101
	bsr.l		fetch_dreg
5102

5103
	movm.l		&0x3c00,-(%sp)		# save d2-d5
5104
	mov.l		%d0,%d2			# put index in d2
5105
	mov.l		L_SCR1(%a6),%d5
5106
	mov.l		%a0,%d3
5107

5108
	btst		&0xb,%d5		# is index word or long?
5109
	bne.b		fno_ext
5110
	ext.l		%d2
5111

5112
fno_ext:
5113
	bfextu		%d5{&21:&2},%d0
5114
	lsl.l		%d0,%d2
5115

5116
# base address (passed as parameter in d3):
5117
# we clear the value here if it should actually be suppressed.
5118
fbase_supp_ck:
5119
	btst		&0x7,%d5		# is the bd suppressed?
5120
	beq.b		fno_base_sup
5121
	clr.l		%d3
5122

5123
# base displacement:
5124
fno_base_sup:
5125
	bfextu		%d5{&26:&2},%d0		# get bd size
5126
#	beq.l		fmovm_error		# if (size == 0) it's reserved
5127

5128
	cmpi.b		%d0,&0x2
5129
	blt.b		fno_bd
5130
	beq.b		fget_word_bd
5131

5132
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5133
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5134
	bsr.l		_imem_read_long
5135

5136
	tst.l		%d1			# did ifetch fail?
5137
	bne.l		fcea_iacc		# yes
5138

5139
	bra.b		fchk_ind
5140

5141
fget_word_bd:
5142
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5143
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5144
	bsr.l		_imem_read_word
5145

5146
	tst.l		%d1			# did ifetch fail?
5147
	bne.l		fcea_iacc		# yes
5148

5149
	ext.l		%d0			# sign extend bd
5150

5151
fchk_ind:
5152
	add.l		%d0,%d3			# base += bd
5153

5154
# outer displacement:
5155
fno_bd:
5156
	bfextu		%d5{&30:&2},%d0		# is od suppressed?
5157
	beq.w		faii_bd
5158

5159
	cmpi.b		%d0,&0x2
5160
	blt.b		fnull_od
5161
	beq.b		fword_od
5162

5163
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5164
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5165
	bsr.l		_imem_read_long
5166

5167
	tst.l		%d1			# did ifetch fail?
5168
	bne.l		fcea_iacc		# yes
5169

5170
	bra.b		fadd_them
5171

5172
fword_od:
5173
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5174
	addq.l		&0x2,EXC_EXTWPTR(%a6)	# incr instruction ptr
5175
	bsr.l		_imem_read_word
5176

5177
	tst.l		%d1			# did ifetch fail?
5178
	bne.l		fcea_iacc		# yes
5179

5180
	ext.l		%d0			# sign extend od
5181
	bra.b		fadd_them
5182

5183
fnull_od:
5184
	clr.l		%d0
5185

5186
fadd_them:
5187
	mov.l		%d0,%d4
5188

5189
	btst		&0x2,%d5		# pre or post indexing?
5190
	beq.b		fpre_indexed
5191

5192
	mov.l		%d3,%a0
5193
	bsr.l		_dmem_read_long
5194

5195
	tst.l		%d1			# did dfetch fail?
5196
	bne.w		fcea_err		# yes
5197

5198
	add.l		%d2,%d0			# <ea> += index
5199
	add.l		%d4,%d0			# <ea> += od
5200
	bra.b		fdone_ea
5201

5202
fpre_indexed:
5203
	add.l		%d2,%d3			# preindexing
5204
	mov.l		%d3,%a0
5205
	bsr.l		_dmem_read_long
5206

5207
	tst.l		%d1			# did dfetch fail?
5208
	bne.w		fcea_err		# yes
5209

5210
	add.l		%d4,%d0			# ea += od
5211
	bra.b		fdone_ea
5212

5213
faii_bd:
5214
	add.l		%d2,%d3			# ea = (base + bd) + index
5215
	mov.l		%d3,%d0
5216
fdone_ea:
5217
	mov.l		%d0,%a0
5218

5219
	movm.l		(%sp)+,&0x003c		# restore d2-d5
5220
	rts
5221

5222
#########################################################
5223
fcea_err:
5224
	mov.l		%d3,%a0
5225

5226
	movm.l		(%sp)+,&0x003c		# restore d2-d5
5227
	mov.w		&0x0101,%d0
5228
	bra.l		iea_dacc
5229

5230
fcea_iacc:
5231
	movm.l		(%sp)+,&0x003c		# restore d2-d5
5232
	bra.l		iea_iacc
5233

5234
fmovm_out_err:
5235
	bsr.l		restore
5236
	mov.w		&0x00e1,%d0
5237
	bra.b		fmovm_err
5238

5239
fmovm_in_err:
5240
	bsr.l		restore
5241
	mov.w		&0x0161,%d0
5242

5243
fmovm_err:
5244
	mov.l		L_SCR1(%a6),%a0
5245
	bra.l		iea_dacc
5246

5247
#########################################################################
5248
# XDEF ****************************************************************	#
5249
#	fmovm_ctrl(): emulate fmovm.l of control registers instr	#
5250
#									#
5251
# XREF ****************************************************************	#
5252
#	_imem_read_long() - read longword from memory			#
5253
#	iea_iacc() - _imem_read_long() failed; error recovery		#
5254
#									#
5255
# INPUT ***************************************************************	#
5256
#	None								#
5257
#									#
5258
# OUTPUT **************************************************************	#
5259
#	If _imem_read_long() doesn't fail:				#
5260
#		USER_FPCR(a6)  = new FPCR value				#
5261
#		USER_FPSR(a6)  = new FPSR value				#
5262
#		USER_FPIAR(a6) = new FPIAR value			#
5263
#									#
5264
# ALGORITHM ***********************************************************	#
5265
#	Decode the instruction type by looking at the extension word	#
5266
# in order to see how many control registers to fetch from memory.	#
5267
# Fetch them using _imem_read_long(). If this fetch fails, exit through	#
5268
# the special access error exit handler iea_iacc().			#
5269
#									#
5270
# Instruction word decoding:						#
5271
#									#
5272
#	fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}				#
5273
#									#
5274
#		WORD1			WORD2				#
5275
#	1111 0010 00 111100	100$ $$00 0000 0000			#
5276
#									#
5277
#	$$$ (100): FPCR							#
5278
#	    (010): FPSR							#
5279
#	    (001): FPIAR						#
5280
#	    (000): FPIAR						#
5281
#									#
5282
#########################################################################
5283

5284
	global		fmovm_ctrl
5285
fmovm_ctrl:
5286
	mov.b		EXC_EXTWORD(%a6),%d0	# fetch reg select bits
5287
	cmpi.b		%d0,&0x9c		# fpcr & fpsr & fpiar ?
5288
	beq.w		fctrl_in_7		# yes
5289
	cmpi.b		%d0,&0x98		# fpcr & fpsr ?
5290
	beq.w		fctrl_in_6		# yes
5291
	cmpi.b		%d0,&0x94		# fpcr & fpiar ?
5292
	beq.b		fctrl_in_5		# yes
5293

5294
# fmovem.l #<data>, fpsr/fpiar
5295
fctrl_in_3:
5296
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5297
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5298
	bsr.l		_imem_read_long		# fetch FPSR from mem
5299

5300
	tst.l		%d1			# did ifetch fail?
5301
	bne.l		iea_iacc		# yes
5302

5303
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to stack
5304
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5305
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5306
	bsr.l		_imem_read_long		# fetch FPIAR from mem
5307

5308
	tst.l		%d1			# did ifetch fail?
5309
	bne.l		iea_iacc		# yes
5310

5311
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5312
	rts
5313

5314
# fmovem.l #<data>, fpcr/fpiar
5315
fctrl_in_5:
5316
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5317
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5318
	bsr.l		_imem_read_long		# fetch FPCR from mem
5319

5320
	tst.l		%d1			# did ifetch fail?
5321
	bne.l		iea_iacc		# yes
5322

5323
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to stack
5324
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5325
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5326
	bsr.l		_imem_read_long		# fetch FPIAR from mem
5327

5328
	tst.l		%d1			# did ifetch fail?
5329
	bne.l		iea_iacc		# yes
5330

5331
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to stack
5332
	rts
5333

5334
# fmovem.l #<data>, fpcr/fpsr
5335
fctrl_in_6:
5336
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5337
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5338
	bsr.l		_imem_read_long		# fetch FPCR from mem
5339

5340
	tst.l		%d1			# did ifetch fail?
5341
	bne.l		iea_iacc		# yes
5342

5343
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5344
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5345
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5346
	bsr.l		_imem_read_long		# fetch FPSR from mem
5347

5348
	tst.l		%d1			# did ifetch fail?
5349
	bne.l		iea_iacc		# yes
5350

5351
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5352
	rts
5353

5354
# fmovem.l #<data>, fpcr/fpsr/fpiar
5355
fctrl_in_7:
5356
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5357
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5358
	bsr.l		_imem_read_long		# fetch FPCR from mem
5359

5360
	tst.l		%d1			# did ifetch fail?
5361
	bne.l		iea_iacc		# yes
5362

5363
	mov.l		%d0,USER_FPCR(%a6)	# store new FPCR to mem
5364
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5365
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5366
	bsr.l		_imem_read_long		# fetch FPSR from mem
5367

5368
	tst.l		%d1			# did ifetch fail?
5369
	bne.l		iea_iacc		# yes
5370

5371
	mov.l		%d0,USER_FPSR(%a6)	# store new FPSR to mem
5372
	mov.l		EXC_EXTWPTR(%a6),%a0	# fetch instruction addr
5373
	addq.l		&0x4,EXC_EXTWPTR(%a6)	# incr instruction ptr
5374
	bsr.l		_imem_read_long		# fetch FPIAR from mem
5375

5376
	tst.l		%d1			# did ifetch fail?
5377
	bne.l		iea_iacc		# yes
5378

5379
	mov.l		%d0,USER_FPIAR(%a6)	# store new FPIAR to mem
5380
	rts
5381

5382
##########################################################################
5383

5384
#########################################################################
5385
# XDEF ****************************************************************	#
5386
#	addsub_scaler2(): scale inputs to fadd/fsub such that no	#
5387
#			  OVFL/UNFL exceptions will result		#
5388
#									#
5389
# XREF ****************************************************************	#
5390
#	norm() - normalize mantissa after adjusting exponent		#
5391
#									#
5392
# INPUT ***************************************************************	#
5393
#	FP_SRC(a6) = fp op1(src)					#
5394
#	FP_DST(a6) = fp op2(dst)					#
5395
#									#
5396
# OUTPUT **************************************************************	#
5397
#	FP_SRC(a6) = fp op1 scaled(src)					#
5398
#	FP_DST(a6) = fp op2 scaled(dst)					#
5399
#	d0         = scale amount					#
5400
#									#
5401
# ALGORITHM ***********************************************************	#
5402
#	If the DST exponent is > the SRC exponent, set the DST exponent	#
5403
# equal to 0x3fff and scale the SRC exponent by the value that the	#
5404
# DST exponent was scaled by. If the SRC exponent is greater or equal,	#
5405
# do the opposite. Return this scale factor in d0.			#
5406
#	If the two exponents differ by > the number of mantissa bits	#
5407
# plus two, then set the smallest exponent to a very small value as a	#
5408
# quick shortcut.							#
5409
#									#
5410
#########################################################################
5411

5412
	global		addsub_scaler2
5413
addsub_scaler2:
5414
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
5415
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
5416
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
5417
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
5418
	mov.w		SRC_EX(%a0),%d0
5419
	mov.w		DST_EX(%a1),%d1
5420
	mov.w		%d0,FP_SCR0_EX(%a6)
5421
	mov.w		%d1,FP_SCR1_EX(%a6)
5422

5423
	andi.w		&0x7fff,%d0
5424
	andi.w		&0x7fff,%d1
5425
	mov.w		%d0,L_SCR1(%a6)		# store src exponent
5426
	mov.w		%d1,2+L_SCR1(%a6)	# store dst exponent
5427

5428
	cmp.w		%d0, %d1		# is src exp >= dst exp?
5429
	bge.l		src_exp_ge2
5430

5431
# dst exp is >  src exp; scale dst to exp = 0x3fff
5432
dst_exp_gt2:
5433
	bsr.l		scale_to_zero_dst
5434
	mov.l		%d0,-(%sp)		# save scale factor
5435

5436
	cmpi.b		STAG(%a6),&DENORM	# is dst denormalized?
5437
	bne.b		cmpexp12
5438

5439
	lea		FP_SCR0(%a6),%a0
5440
	bsr.l		norm			# normalize the denorm; result is new exp
5441
	neg.w		%d0			# new exp = -(shft val)
5442
	mov.w		%d0,L_SCR1(%a6)		# inset new exp
5443

5444
cmpexp12:
5445
	mov.w		2+L_SCR1(%a6),%d0
5446
	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5447

5448
	cmp.w		%d0,L_SCR1(%a6)		# is difference >= len(mantissa)+2?
5449
	bge.b		quick_scale12
5450

5451
	mov.w		L_SCR1(%a6),%d0
5452
	add.w		0x2(%sp),%d0		# scale src exponent by scale factor
5453
	mov.w		FP_SCR0_EX(%a6),%d1
5454
	and.w		&0x8000,%d1
5455
	or.w		%d1,%d0			# concat {sgn,new exp}
5456
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new dst exponent
5457

5458
	mov.l		(%sp)+,%d0		# return SCALE factor
5459
	rts
5460

5461
quick_scale12:
5462
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# zero src exponent
5463
	bset		&0x0,1+FP_SCR0_EX(%a6)	# set exp = 1
5464

5465
	mov.l		(%sp)+,%d0		# return SCALE factor
5466
	rts
5467

5468
# src exp is >= dst exp; scale src to exp = 0x3fff
5469
src_exp_ge2:
5470
	bsr.l		scale_to_zero_src
5471
	mov.l		%d0,-(%sp)		# save scale factor
5472

5473
	cmpi.b		DTAG(%a6),&DENORM	# is dst denormalized?
5474
	bne.b		cmpexp22
5475
	lea		FP_SCR1(%a6),%a0
5476
	bsr.l		norm			# normalize the denorm; result is new exp
5477
	neg.w		%d0			# new exp = -(shft val)
5478
	mov.w		%d0,2+L_SCR1(%a6)	# inset new exp
5479

5480
cmpexp22:
5481
	mov.w		L_SCR1(%a6),%d0
5482
	subi.w		&mantissalen+2,%d0	# subtract mantissalen+2 from larger exp
5483

5484
	cmp.w		%d0,2+L_SCR1(%a6)	# is difference >= len(mantissa)+2?
5485
	bge.b		quick_scale22
5486

5487
	mov.w		2+L_SCR1(%a6),%d0
5488
	add.w		0x2(%sp),%d0		# scale dst exponent by scale factor
5489
	mov.w		FP_SCR1_EX(%a6),%d1
5490
	andi.w		&0x8000,%d1
5491
	or.w		%d1,%d0			# concat {sgn,new exp}
5492
	mov.w		%d0,FP_SCR1_EX(%a6)	# insert new dst exponent
5493

5494
	mov.l		(%sp)+,%d0		# return SCALE factor
5495
	rts
5496

5497
quick_scale22:
5498
	andi.w		&0x8000,FP_SCR1_EX(%a6)	# zero dst exponent
5499
	bset		&0x0,1+FP_SCR1_EX(%a6)	# set exp = 1
5500

5501
	mov.l		(%sp)+,%d0		# return SCALE factor
5502
	rts
5503

5504
##########################################################################
5505

5506
#########################################################################
5507
# XDEF ****************************************************************	#
5508
#	scale_to_zero_src(): scale the exponent of extended precision	#
5509
#			     value at FP_SCR0(a6).			#
5510
#									#
5511
# XREF ****************************************************************	#
5512
#	norm() - normalize the mantissa if the operand was a DENORM	#
5513
#									#
5514
# INPUT ***************************************************************	#
5515
#	FP_SCR0(a6) = extended precision operand to be scaled		#
5516
#									#
5517
# OUTPUT **************************************************************	#
5518
#	FP_SCR0(a6) = scaled extended precision operand			#
5519
#	d0	    = scale value					#
5520
#									#
5521
# ALGORITHM ***********************************************************	#
5522
#	Set the exponent of the input operand to 0x3fff. Save the value	#
5523
# of the difference between the original and new exponent. Then,	#
5524
# normalize the operand if it was a DENORM. Add this normalization	#
5525
# value to the previous value. Return the result.			#
5526
#									#
5527
#########################################################################
5528

5529
	global		scale_to_zero_src
5530
scale_to_zero_src:
5531
	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5532
	mov.w		%d1,%d0			# make a copy
5533

5534
	andi.l		&0x7fff,%d1		# extract operand's exponent
5535

5536
	andi.w		&0x8000,%d0		# extract operand's sgn
5537
	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5538

5539
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert biased exponent
5540

5541
	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5542
	beq.b		stzs_denorm		# normalize the DENORM
5543

5544
stzs_norm:
5545
	mov.l		&0x3fff,%d0
5546
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5547

5548
	rts
5549

5550
stzs_denorm:
5551
	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5552
	bsr.l		norm			# normalize denorm
5553
	neg.l		%d0			# new exponent = -(shft val)
5554
	mov.l		%d0,%d1			# prepare for op_norm call
5555
	bra.b		stzs_norm		# finish scaling
5556

5557
###
5558

5559
#########################################################################
5560
# XDEF ****************************************************************	#
5561
#	scale_sqrt(): scale the input operand exponent so a subsequent	#
5562
#		      fsqrt operation won't take an exception.		#
5563
#									#
5564
# XREF ****************************************************************	#
5565
#	norm() - normalize the mantissa if the operand was a DENORM	#
5566
#									#
5567
# INPUT ***************************************************************	#
5568
#	FP_SCR0(a6) = extended precision operand to be scaled		#
5569
#									#
5570
# OUTPUT **************************************************************	#
5571
#	FP_SCR0(a6) = scaled extended precision operand			#
5572
#	d0	    = scale value					#
5573
#									#
5574
# ALGORITHM ***********************************************************	#
5575
#	If the input operand is a DENORM, normalize it.			#
5576
#	If the exponent of the input operand is even, set the exponent	#
5577
# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the	#
5578
# exponent of the input operand is off, set the exponent to ox3fff and	#
5579
# return a scale factor of "(exp-0x3fff)/2".				#
5580
#									#
5581
#########################################################################
5582

5583
	global		scale_sqrt
5584
scale_sqrt:
5585
	cmpi.b		STAG(%a6),&DENORM	# is operand normalized?
5586
	beq.b		ss_denorm		# normalize the DENORM
5587

5588
	mov.w		FP_SCR0_EX(%a6),%d1	# extract operand's {sgn,exp}
5589
	andi.l		&0x7fff,%d1		# extract operand's exponent
5590

5591
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# extract operand's sgn
5592

5593
	btst		&0x0,%d1		# is exp even or odd?
5594
	beq.b		ss_norm_even
5595

5596
	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5597

5598
	mov.l		&0x3fff,%d0
5599
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5600
	asr.l		&0x1,%d0		# divide scale factor by 2
5601
	rts
5602

5603
ss_norm_even:
5604
	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5605

5606
	mov.l		&0x3ffe,%d0
5607
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5608
	asr.l		&0x1,%d0		# divide scale factor by 2
5609
	rts
5610

5611
ss_denorm:
5612
	lea		FP_SCR0(%a6),%a0	# pass ptr to src op
5613
	bsr.l		norm			# normalize denorm
5614

5615
	btst		&0x0,%d0		# is exp even or odd?
5616
	beq.b		ss_denorm_even
5617

5618
	ori.w		&0x3fff,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5619

5620
	add.l		&0x3fff,%d0
5621
	asr.l		&0x1,%d0		# divide scale factor by 2
5622
	rts
5623

5624
ss_denorm_even:
5625
	ori.w		&0x3ffe,FP_SCR0_EX(%a6)	# insert new operand's exponent(=0)
5626

5627
	add.l		&0x3ffe,%d0
5628
	asr.l		&0x1,%d0		# divide scale factor by 2
5629
	rts
5630

5631
###
5632

5633
#########################################################################
5634
# XDEF ****************************************************************	#
5635
#	scale_to_zero_dst(): scale the exponent of extended precision	#
5636
#			     value at FP_SCR1(a6).			#
5637
#									#
5638
# XREF ****************************************************************	#
5639
#	norm() - normalize the mantissa if the operand was a DENORM	#
5640
#									#
5641
# INPUT ***************************************************************	#
5642
#	FP_SCR1(a6) = extended precision operand to be scaled		#
5643
#									#
5644
# OUTPUT **************************************************************	#
5645
#	FP_SCR1(a6) = scaled extended precision operand			#
5646
#	d0	    = scale value					#
5647
#									#
5648
# ALGORITHM ***********************************************************	#
5649
#	Set the exponent of the input operand to 0x3fff. Save the value	#
5650
# of the difference between the original and new exponent. Then,	#
5651
# normalize the operand if it was a DENORM. Add this normalization	#
5652
# value to the previous value. Return the result.			#
5653
#									#
5654
#########################################################################
5655

5656
	global		scale_to_zero_dst
5657
scale_to_zero_dst:
5658
	mov.w		FP_SCR1_EX(%a6),%d1	# extract operand's {sgn,exp}
5659
	mov.w		%d1,%d0			# make a copy
5660

5661
	andi.l		&0x7fff,%d1		# extract operand's exponent
5662

5663
	andi.w		&0x8000,%d0		# extract operand's sgn
5664
	or.w		&0x3fff,%d0		# insert new operand's exponent(=0)
5665

5666
	mov.w		%d0,FP_SCR1_EX(%a6)	# insert biased exponent
5667

5668
	cmpi.b		DTAG(%a6),&DENORM	# is operand normalized?
5669
	beq.b		stzd_denorm		# normalize the DENORM
5670

5671
stzd_norm:
5672
	mov.l		&0x3fff,%d0
5673
	sub.l		%d1,%d0			# scale = BIAS + (-exp)
5674
	rts
5675

5676
stzd_denorm:
5677
	lea		FP_SCR1(%a6),%a0	# pass ptr to dst op
5678
	bsr.l		norm			# normalize denorm
5679
	neg.l		%d0			# new exponent = -(shft val)
5680
	mov.l		%d0,%d1			# prepare for op_norm call
5681
	bra.b		stzd_norm		# finish scaling
5682

5683
##########################################################################
5684

5685
#########################################################################
5686
# XDEF ****************************************************************	#
5687
#	res_qnan(): return default result w/ QNAN operand for dyadic	#
5688
#	res_snan(): return default result w/ SNAN operand for dyadic	#
5689
#	res_qnan_1op(): return dflt result w/ QNAN operand for monadic	#
5690
#	res_snan_1op(): return dflt result w/ SNAN operand for monadic	#
5691
#									#
5692
# XREF ****************************************************************	#
5693
#	None								#
5694
#									#
5695
# INPUT ***************************************************************	#
5696
#	FP_SRC(a6) = pointer to extended precision src operand		#
5697
#	FP_DST(a6) = pointer to extended precision dst operand		#
5698
#									#
5699
# OUTPUT **************************************************************	#
5700
#	fp0 = default result						#
5701
#									#
5702
# ALGORITHM ***********************************************************	#
5703
#	If either operand (but not both operands) of an operation is a	#
5704
# nonsignalling NAN, then that NAN is returned as the result. If both	#
5705
# operands are nonsignalling NANs, then the destination operand		#
5706
# nonsignalling NAN is returned as the result.				#
5707
#	If either operand to an operation is a signalling NAN (SNAN),	#
5708
# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap	#
5709
# enable bit is set in the FPCR, then the trap is taken and the		#
5710
# destination is not modified. If the SNAN trap enable bit is not set,	#
5711
# then the SNAN is converted to a nonsignalling NAN (by setting the	#
5712
# SNAN bit in the operand to one), and the operation continues as	#
5713
# described in the preceding paragraph, for nonsignalling NANs.		#
5714
#	Make sure the appropriate FPSR bits are set before exiting.	#
5715
#									#
5716
#########################################################################
5717

5718
	global		res_qnan
5719
	global		res_snan
5720
res_qnan:
5721
res_snan:
5722
	cmp.b		DTAG(%a6), &SNAN	# is the dst an SNAN?
5723
	beq.b		dst_snan2
5724
	cmp.b		DTAG(%a6), &QNAN	# is the dst a  QNAN?
5725
	beq.b		dst_qnan2
5726
src_nan:
5727
	cmp.b		STAG(%a6), &QNAN
5728
	beq.b		src_qnan2
5729
	global		res_snan_1op
5730
res_snan_1op:
5731
src_snan2:
5732
	bset		&0x6, FP_SRC_HI(%a6)	# set SNAN bit
5733
	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734
	lea		FP_SRC(%a6), %a0
5735
	bra.b		nan_comp
5736
	global		res_qnan_1op
5737
res_qnan_1op:
5738
src_qnan2:
5739
	or.l		&nan_mask, USER_FPSR(%a6)
5740
	lea		FP_SRC(%a6), %a0
5741
	bra.b		nan_comp
5742
dst_snan2:
5743
	or.l		&nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744
	bset		&0x6, FP_DST_HI(%a6)	# set SNAN bit
5745
	lea		FP_DST(%a6), %a0
5746
	bra.b		nan_comp
5747
dst_qnan2:
5748
	lea		FP_DST(%a6), %a0
5749
	cmp.b		STAG(%a6), &SNAN
5750
	bne		nan_done
5751
	or.l		&aiop_mask+snan_mask, USER_FPSR(%a6)
5752
nan_done:
5753
	or.l		&nan_mask, USER_FPSR(%a6)
5754
nan_comp:
5755
	btst		&0x7, FTEMP_EX(%a0)	# is NAN neg?
5756
	beq.b		nan_not_neg
5757
	or.l		&neg_mask, USER_FPSR(%a6)
5758
nan_not_neg:
5759
	fmovm.x		(%a0), &0x80
5760
	rts
5761

5762
#########################################################################
5763
# XDEF ****************************************************************	#
5764
#	res_operr(): return default result during operand error		#
5765
#									#
5766
# XREF ****************************************************************	#
5767
#	None								#
5768
#									#
5769
# INPUT ***************************************************************	#
5770
#	None								#
5771
#									#
5772
# OUTPUT **************************************************************	#
5773
#	fp0 = default operand error result				#
5774
#									#
5775
# ALGORITHM ***********************************************************	#
5776
#	An nonsignalling NAN is returned as the default result when	#
5777
# an operand error occurs for the following cases:			#
5778
#									#
5779
#	Multiply: (Infinity x Zero)					#
5780
#	Divide  : (Zero / Zero) || (Infinity / Infinity)		#
5781
#									#
5782
#########################################################################
5783

5784
	global		res_operr
5785
res_operr:
5786
	or.l		&nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787
	fmovm.x		nan_return(%pc), &0x80
5788
	rts
5789

5790
nan_return:
5791
	long		0x7fff0000, 0xffffffff, 0xffffffff
5792

5793
#########################################################################
5794
# XDEF ****************************************************************	#
5795
#	_denorm(): denormalize an intermediate result			#
5796
#									#
5797
# XREF ****************************************************************	#
5798
#	None								#
5799
#									#
5800
# INPUT *************************************************************** #
5801
#	a0 = points to the operand to be denormalized			#
5802
#		(in the internal extended format)			#
5803
#									#
5804
#	d0 = rounding precision						#
5805
#									#
5806
# OUTPUT **************************************************************	#
5807
#	a0 = pointer to the denormalized result				#
5808
#		(in the internal extended format)			#
5809
#									#
5810
#	d0 = guard,round,sticky						#
5811
#									#
5812
# ALGORITHM ***********************************************************	#
5813
#	According to the exponent underflow threshold for the given	#
5814
# precision, shift the mantissa bits to the right in order raise the	#
5815
# exponent of the operand to the threshold value. While shifting the	#
5816
# mantissa bits right, maintain the value of the guard, round, and	#
5817
# sticky bits.								#
5818
# other notes:								#
5819
#	(1) _denorm() is called by the underflow routines		#
5820
#	(2) _denorm() does NOT affect the status register		#
5821
#									#
5822
#########################################################################
5823

5824
#
5825
# table of exponent threshold values for each precision
5826
#
5827
tbl_thresh:
5828
	short		0x0
5829
	short		sgl_thresh
5830
	short		dbl_thresh
5831

5832
	global		_denorm
5833
_denorm:
5834
#
5835
# Load the exponent threshold for the precision selected and check
5836
# to see if (threshold - exponent) is > 65 in which case we can
5837
# simply calculate the sticky bit and zero the mantissa. otherwise
5838
# we have to call the denormalization routine.
5839
#
5840
	lsr.b		&0x2, %d0		# shift prec to lo bits
5841
	mov.w		(tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842
	mov.w		%d1, %d0		# copy d1 into d0
5843
	sub.w		FTEMP_EX(%a0), %d0	# diff = threshold - exp
5844
	cmpi.w		%d0, &66		# is diff > 65? (mant + g,r bits)
5845
	bpl.b		denorm_set_stky		# yes; just calc sticky
5846

5847
	clr.l		%d0			# clear g,r,s
5848
	btst		&inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849
	beq.b		denorm_call		# no; don't change anything
5850
	bset		&29, %d0		# yes; set sticky bit
5851

5852
denorm_call:
5853
	bsr.l		dnrm_lp			# denormalize the number
5854
	rts
5855

5856
#
5857
# all bit would have been shifted off during the denorm so simply
5858
# calculate if the sticky should be set and clear the entire mantissa.
5859
#
5860
denorm_set_stky:
5861
	mov.l		&0x20000000, %d0	# set sticky bit in return value
5862
	mov.w		%d1, FTEMP_EX(%a0)	# load exp with threshold
5863
	clr.l		FTEMP_HI(%a0)		# set d1 = 0 (ms mantissa)
5864
	clr.l		FTEMP_LO(%a0)		# set d2 = 0 (ms mantissa)
5865
	rts
5866

5867
#									#
5868
# dnrm_lp(): normalize exponent/mantissa to specified threshold		#
5869
#									#
5870
# INPUT:								#
5871
#	%a0	   : points to the operand to be denormalized		#
5872
#	%d0{31:29} : initial guard,round,sticky				#
5873
#	%d1{15:0}  : denormalization threshold				#
5874
# OUTPUT:								#
5875
#	%a0	   : points to the denormalized operand			#
5876
#	%d0{31:29} : final guard,round,sticky				#
5877
#									#
5878

5879
# *** Local Equates *** #
5880
set	GRS,		L_SCR2			# g,r,s temp storage
5881
set	FTEMP_LO2,	L_SCR1			# FTEMP_LO copy
5882

5883
	global		dnrm_lp
5884
dnrm_lp:
5885

5886
#
5887
# make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888
# in memory so as to make the bitfield extraction for denormalization easier.
5889
#
5890
	mov.l		FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891
	mov.l		%d0, GRS(%a6)		# place g,r,s after it
5892

5893
#
5894
# check to see how much less than the underflow threshold the operand
5895
# exponent is.
5896
#
5897
	mov.l		%d1, %d0		# copy the denorm threshold
5898
	sub.w		FTEMP_EX(%a0), %d1	# d1 = threshold - uns exponent
5899
	ble.b		dnrm_no_lp		# d1 <= 0
5900
	cmpi.w		%d1, &0x20		# is ( 0 <= d1 < 32) ?
5901
	blt.b		case_1			# yes
5902
	cmpi.w		%d1, &0x40		# is (32 <= d1 < 64) ?
5903
	blt.b		case_2			# yes
5904
	bra.w		case_3			# (d1 >= 64)
5905

5906
#
5907
# No normalization necessary
5908
#
5909
dnrm_no_lp:
5910
	mov.l		GRS(%a6), %d0		# restore original g,r,s
5911
	rts
5912

5913
#
5914
# case (0<d1<32)
5915
#
5916
# %d0 = denorm threshold
5917
# %d1 = "n" = amt to shift
5918
#
5919
#	---------------------------------------------------------
5920
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5921
#	---------------------------------------------------------
5922
#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923
#	\	   \		      \			 \
5924
#	 \	    \		       \		  \
5925
#	  \	     \			\		   \
5926
#	   \	      \			 \		    \
5927
#	    \	       \		  \		     \
5928
#	     \		\		   \		      \
5929
#	      \		 \		    \		       \
5930
#	       \	  \		     \			\
5931
#	<-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932
#	---------------------------------------------------------
5933
#	|0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs		|
5934
#	---------------------------------------------------------
5935
#
5936
case_1:
5937
	mov.l		%d2, -(%sp)		# create temp storage
5938

5939
	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5940
	mov.l		&32, %d0
5941
	sub.w		%d1, %d0		# %d0 = 32 - %d1
5942

5943
	cmpi.w		%d1, &29		# is shft amt >= 29
5944
	blt.b		case1_extract		# no; no fix needed
5945
	mov.b		GRS(%a6), %d2
5946
	or.b		%d2, 3+FTEMP_LO2(%a6)
5947

5948
case1_extract:
5949
	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950
	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951
	bfextu		FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952

5953
	mov.l		%d2, FTEMP_HI(%a0)	# store new FTEMP_HI
5954
	mov.l		%d1, FTEMP_LO(%a0)	# store new FTEMP_LO
5955

5956
	bftst		%d0{&2:&30}		# were bits shifted off?
5957
	beq.b		case1_sticky_clear	# no; go finish
5958
	bset		&rnd_stky_bit, %d0	# yes; set sticky bit
5959

5960
case1_sticky_clear:
5961
	and.l		&0xe0000000, %d0	# clear all but G,R,S
5962
	mov.l		(%sp)+, %d2		# restore temp register
5963
	rts
5964

5965
#
5966
# case (32<=d1<64)
5967
#
5968
# %d0 = denorm threshold
5969
# %d1 = "n" = amt to shift
5970
#
5971
#	---------------------------------------------------------
5972
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
5973
#	---------------------------------------------------------
5974
#	<-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975
#	\	   \		      \
5976
#	 \	    \		       \
5977
#	  \	     \			-------------------
5978
#	   \	      --------------------		   \
5979
#	    -------------------		  \		    \
5980
#			       \	   \		     \
5981
#				\	    \		      \
5982
#				 \	     \		       \
5983
#	<-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984
#	---------------------------------------------------------
5985
#	|0...............0|0....0| NEW_LO     |grs		|
5986
#	---------------------------------------------------------
5987
#
5988
case_2:
5989
	mov.l		%d2, -(%sp)		# create temp storage
5990

5991
	mov.w		%d0, FTEMP_EX(%a0)	# exponent = denorm threshold
5992
	subi.w		&0x20, %d1		# %d1 now between 0 and 32
5993
	mov.l		&0x20, %d0
5994
	sub.w		%d1, %d0		# %d0 = 32 - %d1
5995

5996
# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997
# the number of bits to check for the sticky detect.
5998
# it only plays a role in shift amounts of 61-63.
5999
	mov.b		GRS(%a6), %d2
6000
	or.b		%d2, 3+FTEMP_LO2(%a6)
6001

6002
	bfextu		FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003
	bfextu		FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004

6005
	bftst		%d1{&2:&30}		# were any bits shifted off?
6006
	bne.b		case2_set_sticky	# yes; set sticky bit
6007
	bftst		FTEMP_LO2(%a6){%d0:&31}	# were any bits shifted off?
6008
	bne.b		case2_set_sticky	# yes; set sticky bit
6009

6010
	mov.l		%d1, %d0		# move new G,R,S to %d0
6011
	bra.b		case2_end
6012

6013
case2_set_sticky:
6014
	mov.l		%d1, %d0		# move new G,R,S to %d0
6015
	bset		&rnd_stky_bit, %d0	# set sticky bit
6016

6017
case2_end:
6018
	clr.l		FTEMP_HI(%a0)		# store FTEMP_HI = 0
6019
	mov.l		%d2, FTEMP_LO(%a0)	# store FTEMP_LO
6020
	and.l		&0xe0000000, %d0	# clear all but G,R,S
6021

6022
	mov.l		(%sp)+,%d2		# restore temp register
6023
	rts
6024

6025
#
6026
# case (d1>=64)
6027
#
6028
# %d0 = denorm threshold
6029
# %d1 = amt to shift
6030
#
6031
case_3:
6032
	mov.w		%d0, FTEMP_EX(%a0)	# insert denorm threshold
6033

6034
	cmpi.w		%d1, &65		# is shift amt > 65?
6035
	blt.b		case3_64		# no; it's == 64
6036
	beq.b		case3_65		# no; it's == 65
6037

6038
#
6039
# case (d1>65)
6040
#
6041
# Shift value is > 65 and out of range. All bits are shifted off.
6042
# Return a zero mantissa with the sticky bit set
6043
#
6044
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6045
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6046
	mov.l		&0x20000000, %d0	# set sticky bit
6047
	rts
6048

6049
#
6050
# case (d1 == 64)
6051
#
6052
#	---------------------------------------------------------
6053
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6054
#	---------------------------------------------------------
6055
#	<-------(32)------>
6056
#	\		   \
6057
#	 \		    \
6058
#	  \		     \
6059
#	   \		      ------------------------------
6060
#	    -------------------------------		    \
6061
#					   \		     \
6062
#					    \		      \
6063
#					     \		       \
6064
#					      <-------(32)------>
6065
#	---------------------------------------------------------
6066
#	|0...............0|0................0|grs		|
6067
#	---------------------------------------------------------
6068
#
6069
case3_64:
6070
	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6071
	mov.l		%d0, %d1		# make a copy
6072
	and.l		&0xc0000000, %d0	# extract G,R
6073
	and.l		&0x3fffffff, %d1	# extract other bits
6074

6075
	bra.b		case3_complete
6076

6077
#
6078
# case (d1 == 65)
6079
#
6080
#	---------------------------------------------------------
6081
#	|     FTEMP_HI	  |	FTEMP_LO     |grs000.........000|
6082
#	---------------------------------------------------------
6083
#	<-------(32)------>
6084
#	\		   \
6085
#	 \		    \
6086
#	  \		     \
6087
#	   \		      ------------------------------
6088
#	    --------------------------------		    \
6089
#					    \		     \
6090
#					     \		      \
6091
#					      \		       \
6092
#					       <-------(31)----->
6093
#	---------------------------------------------------------
6094
#	|0...............0|0................0|0rs		|
6095
#	---------------------------------------------------------
6096
#
6097
case3_65:
6098
	mov.l		FTEMP_HI(%a0), %d0	# fetch hi(mantissa)
6099
	and.l		&0x80000000, %d0	# extract R bit
6100
	lsr.l		&0x1, %d0		# shift high bit into R bit
6101
	and.l		&0x7fffffff, %d1	# extract other bits
6102

6103
case3_complete:
6104
# last operation done was an "and" of the bits shifted off so the condition
6105
# codes are already set so branch accordingly.
6106
	bne.b		case3_set_sticky	# yes; go set new sticky
6107
	tst.l		FTEMP_LO(%a0)		# were any bits shifted off?
6108
	bne.b		case3_set_sticky	# yes; go set new sticky
6109
	tst.b		GRS(%a6)		# were any bits shifted off?
6110
	bne.b		case3_set_sticky	# yes; go set new sticky
6111

6112
#
6113
# no bits were shifted off so don't set the sticky bit.
6114
# the guard and
6115
# the entire mantissa is zero.
6116
#
6117
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6118
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6119
	rts
6120

6121
#
6122
# some bits were shifted off so set the sticky bit.
6123
# the entire mantissa is zero.
6124
#
6125
case3_set_sticky:
6126
	bset		&rnd_stky_bit,%d0	# set new sticky bit
6127
	clr.l		FTEMP_HI(%a0)		# clear hi(mantissa)
6128
	clr.l		FTEMP_LO(%a0)		# clear lo(mantissa)
6129
	rts
6130

6131
#########################################################################
6132
# XDEF ****************************************************************	#
6133
#	_round(): round result according to precision/mode		#
6134
#									#
6135
# XREF ****************************************************************	#
6136
#	None								#
6137
#									#
6138
# INPUT ***************************************************************	#
6139
#	a0	  = ptr to input operand in internal extended format	#
6140
#	d1(hi)    = contains rounding precision:			#
6141
#			ext = $0000xxxx					#
6142
#			sgl = $0004xxxx					#
6143
#			dbl = $0008xxxx					#
6144
#	d1(lo)	  = contains rounding mode:				#
6145
#			RN  = $xxxx0000					#
6146
#			RZ  = $xxxx0001					#
6147
#			RM  = $xxxx0002					#
6148
#			RP  = $xxxx0003					#
6149
#	d0{31:29} = contains the g,r,s bits (extended)			#
6150
#									#
6151
# OUTPUT **************************************************************	#
6152
#	a0 = pointer to rounded result					#
6153
#									#
6154
# ALGORITHM ***********************************************************	#
6155
#	On return the value pointed to by a0 is correctly rounded,	#
6156
#	a0 is preserved and the g-r-s bits in d0 are cleared.		#
6157
#	The result is not typed - the tag field is invalid.  The	#
6158
#	result is still in the internal extended format.		#
6159
#									#
6160
#	The INEX bit of USER_FPSR will be set if the rounded result was	#
6161
#	inexact (i.e. if any of the g-r-s bits were set).		#
6162
#									#
6163
#########################################################################
6164

6165
	global		_round
6166
_round:
6167
#
6168
# ext_grs() looks at the rounding precision and sets the appropriate
6169
# G,R,S bits.
6170
# If (G,R,S == 0) then result is exact and round is done, else set
6171
# the inex flag in status reg and continue.
6172
#
6173
	bsr.l		ext_grs			# extract G,R,S
6174

6175
	tst.l		%d0			# are G,R,S zero?
6176
	beq.w		truncate		# yes; round is complete
6177

6178
	or.w		&inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179

6180
#
6181
# Use rounding mode as an index into a jump table for these modes.
6182
# All of the following assumes grs != 0.
6183
#
6184
	mov.w		(tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185
	jmp		(tbl_mode.b,%pc,%a1)	# jmp to rnd mode handler
6186

6187
tbl_mode:
6188
	short		rnd_near - tbl_mode
6189
	short		truncate - tbl_mode	# RZ always truncates
6190
	short		rnd_mnus - tbl_mode
6191
	short		rnd_plus - tbl_mode
6192

6193
#################################################################
6194
#	ROUND PLUS INFINITY					#
6195
#								#
6196
#	If sign of fp number = 0 (positive), then add 1 to l.	#
6197
#################################################################
6198
rnd_plus:
6199
	tst.b		FTEMP_SGN(%a0)		# check for sign
6200
	bmi.w		truncate		# if positive then truncate
6201

6202
	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6203
	swap		%d1			# set up d1 for round prec.
6204

6205
	cmpi.b		%d1, &s_mode		# is prec = sgl?
6206
	beq.w		add_sgl			# yes
6207
	bgt.w		add_dbl			# no; it's dbl
6208
	bra.w		add_ext			# no; it's ext
6209

6210
#################################################################
6211
#	ROUND MINUS INFINITY					#
6212
#								#
6213
#	If sign of fp number = 1 (negative), then add 1 to l.	#
6214
#################################################################
6215
rnd_mnus:
6216
	tst.b		FTEMP_SGN(%a0)		# check for sign
6217
	bpl.w		truncate		# if negative then truncate
6218

6219
	mov.l		&0xffffffff, %d0	# force g,r,s to be all f's
6220
	swap		%d1			# set up d1 for round prec.
6221

6222
	cmpi.b		%d1, &s_mode		# is prec = sgl?
6223
	beq.w		add_sgl			# yes
6224
	bgt.w		add_dbl			# no; it's dbl
6225
	bra.w		add_ext			# no; it's ext
6226

6227
#################################################################
6228
#	ROUND NEAREST						#
6229
#								#
6230
#	If (g=1), then add 1 to l and if (r=s=0), then clear l	#
6231
#	Note that this will round to even in case of a tie.	#
6232
#################################################################
6233
rnd_near:
6234
	asl.l		&0x1, %d0		# shift g-bit to c-bit
6235
	bcc.w		truncate		# if (g=1) then
6236

6237
	swap		%d1			# set up d1 for round prec.
6238

6239
	cmpi.b		%d1, &s_mode		# is prec = sgl?
6240
	beq.w		add_sgl			# yes
6241
	bgt.w		add_dbl			# no; it's dbl
6242
	bra.w		add_ext			# no; it's ext
6243

6244
# *** LOCAL EQUATES ***
6245
set	ad_1_sgl,	0x00000100	# constant to add 1 to l-bit in sgl prec
6246
set	ad_1_dbl,	0x00000800	# constant to add 1 to l-bit in dbl prec
6247

6248
#########################
6249
#	ADD SINGLE	#
6250
#########################
6251
add_sgl:
6252
	add.l		&ad_1_sgl, FTEMP_HI(%a0)
6253
	bcc.b		scc_clr			# no mantissa overflow
6254
	roxr.w		FTEMP_HI(%a0)		# shift v-bit back in
6255
	roxr.w		FTEMP_HI+2(%a0)		# shift v-bit back in
6256
	add.w		&0x1, FTEMP_EX(%a0)	# and incr exponent
6257
scc_clr:
6258
	tst.l		%d0			# test for rs = 0
6259
	bne.b		sgl_done
6260
	and.w		&0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261
sgl_done:
6262
	and.l		&0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263
	clr.l		FTEMP_LO(%a0)		# clear d2
6264
	rts
6265

6266
#########################
6267
#	ADD EXTENDED	#
6268
#########################
6269
add_ext:
6270
	addq.l		&1,FTEMP_LO(%a0)	# add 1 to l-bit
6271
	bcc.b		xcc_clr			# test for carry out
6272
	addq.l		&1,FTEMP_HI(%a0)	# propagate carry
6273
	bcc.b		xcc_clr
6274
	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6275
	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6276
	roxr.w		FTEMP_LO(%a0)
6277
	roxr.w		FTEMP_LO+2(%a0)
6278
	add.w		&0x1,FTEMP_EX(%a0)	# and inc exp
6279
xcc_clr:
6280
	tst.l		%d0			# test rs = 0
6281
	bne.b		add_ext_done
6282
	and.b		&0xfe,FTEMP_LO+3(%a0)	# clear the l bit
6283
add_ext_done:
6284
	rts
6285

6286
#########################
6287
#	ADD DOUBLE	#
6288
#########################
6289
add_dbl:
6290
	add.l		&ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291
	bcc.b		dcc_clr			# no carry
6292
	addq.l		&0x1, FTEMP_HI(%a0)	# propagate carry
6293
	bcc.b		dcc_clr			# no carry
6294

6295
	roxr.w		FTEMP_HI(%a0)		# mant is 0 so restore v-bit
6296
	roxr.w		FTEMP_HI+2(%a0)		# mant is 0 so restore v-bit
6297
	roxr.w		FTEMP_LO(%a0)
6298
	roxr.w		FTEMP_LO+2(%a0)
6299
	addq.w		&0x1, FTEMP_EX(%a0)	# incr exponent
6300
dcc_clr:
6301
	tst.l		%d0			# test for rs = 0
6302
	bne.b		dbl_done
6303
	and.w		&0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304

6305
dbl_done:
6306
	and.l		&0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307
	rts
6308

6309
###########################
6310
# Truncate all other bits #
6311
###########################
6312
truncate:
6313
	swap		%d1			# select rnd prec
6314

6315
	cmpi.b		%d1, &s_mode		# is prec sgl?
6316
	beq.w		sgl_done		# yes
6317
	bgt.b		dbl_done		# no; it's dbl
6318
	rts					# no; it's ext
6319

6320

6321
#
6322
# ext_grs(): extract guard, round and sticky bits according to
6323
#	     rounding precision.
6324
#
6325
# INPUT
6326
#	d0	   = extended precision g,r,s (in d0{31:29})
6327
#	d1	   = {PREC,ROUND}
6328
# OUTPUT
6329
#	d0{31:29}  = guard, round, sticky
6330
#
6331
# The ext_grs extract the guard/round/sticky bits according to the
6332
# selected rounding precision. It is called by the round subroutine
6333
# only.  All registers except d0 are kept intact. d0 becomes an
6334
# updated guard,round,sticky in d0{31:29}
6335
#
6336
# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337
#	 prior to usage, and needs to restore d1 to original. this
6338
#	 routine is tightly tied to the round routine and not meant to
6339
#	 uphold standard subroutine calling practices.
6340
#
6341

6342
ext_grs:
6343
	swap		%d1			# have d1.w point to round precision
6344
	tst.b		%d1			# is rnd prec = extended?
6345
	bne.b		ext_grs_not_ext		# no; go handle sgl or dbl
6346

6347
#
6348
# %d0 actually already hold g,r,s since _round() had it before calling
6349
# this function. so, as long as we don't disturb it, we are "returning" it.
6350
#
6351
ext_grs_ext:
6352
	swap		%d1			# yes; return to correct positions
6353
	rts
6354

6355
ext_grs_not_ext:
6356
	movm.l		&0x3000, -(%sp)		# make some temp registers {d2/d3}
6357

6358
	cmpi.b		%d1, &s_mode		# is rnd prec = sgl?
6359
	bne.b		ext_grs_dbl		# no; go handle dbl
6360

6361
#
6362
# sgl:
6363
#	96		64	  40	32		0
6364
#	-----------------------------------------------------
6365
#	| EXP	|XXXXXXX|	  |xx	|		|grs|
6366
#	-----------------------------------------------------
6367
#			<--(24)--->nn\			   /
6368
#				   ee ---------------------
6369
#				   ww		|
6370
#						v
6371
#				   gr	   new sticky
6372
#
6373
ext_grs_sgl:
6374
	bfextu		FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375
	mov.l		&30, %d2		# of the sgl prec. limits
6376
	lsl.l		%d2, %d3		# shift g-r bits to MSB of d3
6377
	mov.l		FTEMP_HI(%a0), %d2	# get word 2 for s-bit test
6378
	and.l		&0x0000003f, %d2	# s bit is the or of all other
6379
	bne.b		ext_grs_st_stky		# bits to the right of g-r
6380
	tst.l		FTEMP_LO(%a0)		# test lower mantissa
6381
	bne.b		ext_grs_st_stky		# if any are set, set sticky
6382
	tst.l		%d0			# test original g,r,s
6383
	bne.b		ext_grs_st_stky		# if any are set, set sticky
6384
	bra.b		ext_grs_end_sd		# if words 3 and 4 are clr, exit
6385

6386
#
6387
# dbl:
6388
#	96		64		32	 11	0
6389
#	-----------------------------------------------------
6390
#	| EXP	|XXXXXXX|		|	 |xx	|grs|
6391
#	-----------------------------------------------------
6392
#						  nn\	    /
6393
#						  ee -------
6394
#						  ww	|
6395
#							v
6396
#						  gr	new sticky
6397
#
6398
ext_grs_dbl:
6399
	bfextu		FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400
	mov.l		&30, %d2		# of the dbl prec. limits
6401
	lsl.l		%d2, %d3		# shift g-r bits to the MSB of d3
6402
	mov.l		FTEMP_LO(%a0), %d2	# get lower mantissa  for s-bit test
6403
	and.l		&0x000001ff, %d2	# s bit is the or-ing of all
6404
	bne.b		ext_grs_st_stky		# other bits to the right of g-r
6405
	tst.l		%d0			# test word original g,r,s
6406
	bne.b		ext_grs_st_stky		# if any are set, set sticky
6407
	bra.b		ext_grs_end_sd		# if clear, exit
6408

6409
ext_grs_st_stky:
6410
	bset		&rnd_stky_bit, %d3	# set sticky bit
6411
ext_grs_end_sd:
6412
	mov.l		%d3, %d0		# return grs to d0
6413

6414
	movm.l		(%sp)+, &0xc		# restore scratch registers {d2/d3}
6415

6416
	swap		%d1			# restore d1 to original
6417
	rts
6418

6419
#########################################################################
6420
# norm(): normalize the mantissa of an extended precision input. the	#
6421
#	  input operand should not be normalized already.		#
6422
#									#
6423
# XDEF ****************************************************************	#
6424
#	norm()								#
6425
#									#
6426
# XREF **************************************************************** #
6427
#	none								#
6428
#									#
6429
# INPUT *************************************************************** #
6430
#	a0 = pointer fp extended precision operand to normalize		#
6431
#									#
6432
# OUTPUT ************************************************************** #
6433
#	d0 = number of bit positions the mantissa was shifted		#
6434
#	a0 = the input operand's mantissa is normalized; the exponent	#
6435
#	     is unchanged.						#
6436
#									#
6437
#########################################################################
6438
	global		norm
6439
norm:
6440
	mov.l		%d2, -(%sp)		# create some temp regs
6441
	mov.l		%d3, -(%sp)
6442

6443
	mov.l		FTEMP_HI(%a0), %d0	# load hi(mantissa)
6444
	mov.l		FTEMP_LO(%a0), %d1	# load lo(mantissa)
6445

6446
	bfffo		%d0{&0:&32}, %d2	# how many places to shift?
6447
	beq.b		norm_lo			# hi(man) is all zeroes!
6448

6449
norm_hi:
6450
	lsl.l		%d2, %d0		# left shift hi(man)
6451
	bfextu		%d1{&0:%d2}, %d3	# extract lo bits
6452

6453
	or.l		%d3, %d0		# create hi(man)
6454
	lsl.l		%d2, %d1		# create lo(man)
6455

6456
	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6457
	mov.l		%d1, FTEMP_LO(%a0)	# store new lo(man)
6458

6459
	mov.l		%d2, %d0		# return shift amount
6460

6461
	mov.l		(%sp)+, %d3		# restore temp regs
6462
	mov.l		(%sp)+, %d2
6463

6464
	rts
6465

6466
norm_lo:
6467
	bfffo		%d1{&0:&32}, %d2	# how many places to shift?
6468
	lsl.l		%d2, %d1		# shift lo(man)
6469
	add.l		&32, %d2		# add 32 to shft amount
6470

6471
	mov.l		%d1, FTEMP_HI(%a0)	# store hi(man)
6472
	clr.l		FTEMP_LO(%a0)		# lo(man) is now zero
6473

6474
	mov.l		%d2, %d0		# return shift amount
6475

6476
	mov.l		(%sp)+, %d3		# restore temp regs
6477
	mov.l		(%sp)+, %d2
6478

6479
	rts
6480

6481
#########################################################################
6482
# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO	#
6483
#		- returns corresponding optype tag			#
6484
#									#
6485
# XDEF ****************************************************************	#
6486
#	unnorm_fix()							#
6487
#									#
6488
# XREF **************************************************************** #
6489
#	norm() - normalize the mantissa					#
6490
#									#
6491
# INPUT *************************************************************** #
6492
#	a0 = pointer to unnormalized extended precision number		#
6493
#									#
6494
# OUTPUT ************************************************************** #
6495
#	d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO	#
6496
#	a0 = input operand has been converted to a norm, denorm, or	#
6497
#	     zero; both the exponent and mantissa are changed.		#
6498
#									#
6499
#########################################################################
6500

6501
	global		unnorm_fix
6502
unnorm_fix:
6503
	bfffo		FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504
	bne.b		unnorm_shift		# hi(man) is not all zeroes
6505

6506
#
6507
# hi(man) is all zeroes so see if any bits in lo(man) are set
6508
#
6509
unnorm_chk_lo:
6510
	bfffo		FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511
	beq.w		unnorm_zero		# yes
6512

6513
	add.w		&32, %d0		# no; fix shift distance
6514

6515
#
6516
# d0 = # shifts needed for complete normalization
6517
#
6518
unnorm_shift:
6519
	clr.l		%d1			# clear top word
6520
	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6521
	and.w		&0x7fff, %d1		# strip off sgn
6522

6523
	cmp.w		%d0, %d1		# will denorm push exp < 0?
6524
	bgt.b		unnorm_nrm_zero		# yes; denorm only until exp = 0
6525

6526
#
6527
# exponent would not go < 0. Therefore, number stays normalized
6528
#
6529
	sub.w		%d0, %d1		# shift exponent value
6530
	mov.w		FTEMP_EX(%a0), %d0	# load old exponent
6531
	and.w		&0x8000, %d0		# save old sign
6532
	or.w		%d0, %d1		# {sgn,new exp}
6533
	mov.w		%d1, FTEMP_EX(%a0)	# insert new exponent
6534

6535
	bsr.l		norm			# normalize UNNORM
6536

6537
	mov.b		&NORM, %d0		# return new optype tag
6538
	rts
6539

6540
#
6541
# exponent would go < 0, so only denormalize until exp = 0
6542
#
6543
unnorm_nrm_zero:
6544
	cmp.b		%d1, &32		# is exp <= 32?
6545
	bgt.b		unnorm_nrm_zero_lrg	# no; go handle large exponent
6546

6547
	bfextu		FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548
	mov.l		%d0, FTEMP_HI(%a0)	# save new hi(man)
6549

6550
	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6551
	lsl.l		%d1, %d0		# extract new lo(man)
6552
	mov.l		%d0, FTEMP_LO(%a0)	# save new lo(man)
6553

6554
	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6555

6556
	mov.b		&DENORM, %d0		# return new optype tag
6557
	rts
6558

6559
#
6560
# only mantissa bits set are in lo(man)
6561
#
6562
unnorm_nrm_zero_lrg:
6563
	sub.w		&32, %d1		# adjust shft amt by 32
6564

6565
	mov.l		FTEMP_LO(%a0), %d0	# fetch old lo(man)
6566
	lsl.l		%d1, %d0		# left shift lo(man)
6567

6568
	mov.l		%d0, FTEMP_HI(%a0)	# store new hi(man)
6569
	clr.l		FTEMP_LO(%a0)		# lo(man) = 0
6570

6571
	and.w		&0x8000, FTEMP_EX(%a0)	# set exp = 0
6572

6573
	mov.b		&DENORM, %d0		# return new optype tag
6574
	rts
6575

6576
#
6577
# whole mantissa is zero so this UNNORM is actually a zero
6578
#
6579
unnorm_zero:
6580
	and.w		&0x8000, FTEMP_EX(%a0)	# force exponent to zero
6581

6582
	mov.b		&ZERO, %d0		# fix optype tag
6583
	rts
6584

6585
#########################################################################
6586
# XDEF ****************************************************************	#
6587
#	set_tag_x(): return the optype of the input ext fp number	#
6588
#									#
6589
# XREF ****************************************************************	#
6590
#	None								#
6591
#									#
6592
# INPUT ***************************************************************	#
6593
#	a0 = pointer to extended precision operand			#
6594
#									#
6595
# OUTPUT **************************************************************	#
6596
#	d0 = value of type tag						#
6597
#		one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO	#
6598
#									#
6599
# ALGORITHM ***********************************************************	#
6600
#	Simply test the exponent, j-bit, and mantissa values to		#
6601
# determine the type of operand.					#
6602
#	If it's an unnormalized zero, alter the operand and force it	#
6603
# to be a normal zero.							#
6604
#									#
6605
#########################################################################
6606

6607
	global		set_tag_x
6608
set_tag_x:
6609
	mov.w		FTEMP_EX(%a0), %d0	# extract exponent
6610
	andi.w		&0x7fff, %d0		# strip off sign
6611
	cmpi.w		%d0, &0x7fff		# is (EXP == MAX)?
6612
	beq.b		inf_or_nan_x
6613
not_inf_or_nan_x:
6614
	btst		&0x7,FTEMP_HI(%a0)
6615
	beq.b		not_norm_x
6616
is_norm_x:
6617
	mov.b		&NORM, %d0
6618
	rts
6619
not_norm_x:
6620
	tst.w		%d0			# is exponent = 0?
6621
	bne.b		is_unnorm_x
6622
not_unnorm_x:
6623
	tst.l		FTEMP_HI(%a0)
6624
	bne.b		is_denorm_x
6625
	tst.l		FTEMP_LO(%a0)
6626
	bne.b		is_denorm_x
6627
is_zero_x:
6628
	mov.b		&ZERO, %d0
6629
	rts
6630
is_denorm_x:
6631
	mov.b		&DENORM, %d0
6632
	rts
6633
# must distinguish now "Unnormalized zeroes" which we
6634
# must convert to zero.
6635
is_unnorm_x:
6636
	tst.l		FTEMP_HI(%a0)
6637
	bne.b		is_unnorm_reg_x
6638
	tst.l		FTEMP_LO(%a0)
6639
	bne.b		is_unnorm_reg_x
6640
# it's an "unnormalized zero". let's convert it to an actual zero...
6641
	andi.w		&0x8000,FTEMP_EX(%a0)	# clear exponent
6642
	mov.b		&ZERO, %d0
6643
	rts
6644
is_unnorm_reg_x:
6645
	mov.b		&UNNORM, %d0
6646
	rts
6647
inf_or_nan_x:
6648
	tst.l		FTEMP_LO(%a0)
6649
	bne.b		is_nan_x
6650
	mov.l		FTEMP_HI(%a0), %d0
6651
	and.l		&0x7fffffff, %d0	# msb is a don't care!
6652
	bne.b		is_nan_x
6653
is_inf_x:
6654
	mov.b		&INF, %d0
6655
	rts
6656
is_nan_x:
6657
	btst		&0x6, FTEMP_HI(%a0)
6658
	beq.b		is_snan_x
6659
	mov.b		&QNAN, %d0
6660
	rts
6661
is_snan_x:
6662
	mov.b		&SNAN, %d0
6663
	rts
6664

6665
#########################################################################
6666
# XDEF ****************************************************************	#
6667
#	set_tag_d(): return the optype of the input dbl fp number	#
6668
#									#
6669
# XREF ****************************************************************	#
6670
#	None								#
6671
#									#
6672
# INPUT ***************************************************************	#
6673
#	a0 = points to double precision operand				#
6674
#									#
6675
# OUTPUT **************************************************************	#
6676
#	d0 = value of type tag						#
6677
#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6678
#									#
6679
# ALGORITHM ***********************************************************	#
6680
#	Simply test the exponent, j-bit, and mantissa values to		#
6681
# determine the type of operand.					#
6682
#									#
6683
#########################################################################
6684

6685
	global		set_tag_d
6686
set_tag_d:
6687
	mov.l		FTEMP(%a0), %d0
6688
	mov.l		%d0, %d1
6689

6690
	andi.l		&0x7ff00000, %d0
6691
	beq.b		zero_or_denorm_d
6692

6693
	cmpi.l		%d0, &0x7ff00000
6694
	beq.b		inf_or_nan_d
6695

6696
is_norm_d:
6697
	mov.b		&NORM, %d0
6698
	rts
6699
zero_or_denorm_d:
6700
	and.l		&0x000fffff, %d1
6701
	bne		is_denorm_d
6702
	tst.l		4+FTEMP(%a0)
6703
	bne		is_denorm_d
6704
is_zero_d:
6705
	mov.b		&ZERO, %d0
6706
	rts
6707
is_denorm_d:
6708
	mov.b		&DENORM, %d0
6709
	rts
6710
inf_or_nan_d:
6711
	and.l		&0x000fffff, %d1
6712
	bne		is_nan_d
6713
	tst.l		4+FTEMP(%a0)
6714
	bne		is_nan_d
6715
is_inf_d:
6716
	mov.b		&INF, %d0
6717
	rts
6718
is_nan_d:
6719
	btst		&19, %d1
6720
	bne		is_qnan_d
6721
is_snan_d:
6722
	mov.b		&SNAN, %d0
6723
	rts
6724
is_qnan_d:
6725
	mov.b		&QNAN, %d0
6726
	rts
6727

6728
#########################################################################
6729
# XDEF ****************************************************************	#
6730
#	set_tag_s(): return the optype of the input sgl fp number	#
6731
#									#
6732
# XREF ****************************************************************	#
6733
#	None								#
6734
#									#
6735
# INPUT ***************************************************************	#
6736
#	a0 = pointer to single precision operand			#
6737
#									#
6738
# OUTPUT **************************************************************	#
6739
#	d0 = value of type tag						#
6740
#		one of: NORM, INF, QNAN, SNAN, DENORM, ZERO		#
6741
#									#
6742
# ALGORITHM ***********************************************************	#
6743
#	Simply test the exponent, j-bit, and mantissa values to		#
6744
# determine the type of operand.					#
6745
#									#
6746
#########################################################################
6747

6748
	global		set_tag_s
6749
set_tag_s:
6750
	mov.l		FTEMP(%a0), %d0
6751
	mov.l		%d0, %d1
6752

6753
	andi.l		&0x7f800000, %d0
6754
	beq.b		zero_or_denorm_s
6755

6756
	cmpi.l		%d0, &0x7f800000
6757
	beq.b		inf_or_nan_s
6758

6759
is_norm_s:
6760
	mov.b		&NORM, %d0
6761
	rts
6762
zero_or_denorm_s:
6763
	and.l		&0x007fffff, %d1
6764
	bne		is_denorm_s
6765
is_zero_s:
6766
	mov.b		&ZERO, %d0
6767
	rts
6768
is_denorm_s:
6769
	mov.b		&DENORM, %d0
6770
	rts
6771
inf_or_nan_s:
6772
	and.l		&0x007fffff, %d1
6773
	bne		is_nan_s
6774
is_inf_s:
6775
	mov.b		&INF, %d0
6776
	rts
6777
is_nan_s:
6778
	btst		&22, %d1
6779
	bne		is_qnan_s
6780
is_snan_s:
6781
	mov.b		&SNAN, %d0
6782
	rts
6783
is_qnan_s:
6784
	mov.b		&QNAN, %d0
6785
	rts
6786

6787
#########################################################################
6788
# XDEF ****************************************************************	#
6789
#	unf_res(): routine to produce default underflow result of a	#
6790
#		   scaled extended precision number; this is used by	#
6791
#		   fadd/fdiv/fmul/etc. emulation routines.		#
6792
#	unf_res4(): same as above but for fsglmul/fsgldiv which use	#
6793
#		    single round prec and extended prec mode.		#
6794
#									#
6795
# XREF ****************************************************************	#
6796
#	_denorm() - denormalize according to scale factor		#
6797
#	_round() - round denormalized number according to rnd prec	#
6798
#									#
6799
# INPUT ***************************************************************	#
6800
#	a0 = pointer to extended precison operand			#
6801
#	d0 = scale factor						#
6802
#	d1 = rounding precision/mode					#
6803
#									#
6804
# OUTPUT **************************************************************	#
6805
#	a0 = pointer to default underflow result in extended precision	#
6806
#	d0.b = result FPSR_cc which caller may or may not want to save	#
6807
#									#
6808
# ALGORITHM ***********************************************************	#
6809
#	Convert the input operand to "internal format" which means the	#
6810
# exponent is extended to 16 bits and the sign is stored in the unused	#
6811
# portion of the extended precison operand. Denormalize the number	#
6812
# according to the scale factor passed in d0. Then, round the		#
6813
# denormalized result.							#
6814
#	Set the FPSR_exc bits as appropriate but return the cc bits in	#
6815
# d0 in case the caller doesn't want to save them (as is the case for	#
6816
# fmove out).								#
6817
#	unf_res4() for fsglmul/fsgldiv forces the denorm to extended	#
6818
# precision and the rounding mode to single.				#
6819
#									#
6820
#########################################################################
6821
	global		unf_res
6822
unf_res:
6823
	mov.l		%d1, -(%sp)		# save rnd prec,mode on stack
6824

6825
	btst		&0x7, FTEMP_EX(%a0)	# make "internal" format
6826
	sne		FTEMP_SGN(%a0)
6827

6828
	mov.w		FTEMP_EX(%a0), %d1	# extract exponent
6829
	and.w		&0x7fff, %d1
6830
	sub.w		%d0, %d1
6831
	mov.w		%d1, FTEMP_EX(%a0)	# insert 16 bit exponent
6832

6833
	mov.l		%a0, -(%sp)		# save operand ptr during calls
6834

6835
	mov.l		0x4(%sp),%d0		# pass rnd prec.
6836
	andi.w		&0x00c0,%d0
6837
	lsr.w		&0x4,%d0
6838
	bsr.l		_denorm			# denorm result
6839

6840
	mov.l		(%sp),%a0
6841
	mov.w		0x6(%sp),%d1		# load prec:mode into %d1
6842
	andi.w		&0xc0,%d1		# extract rnd prec
6843
	lsr.w		&0x4,%d1
6844
	swap		%d1
6845
	mov.w		0x6(%sp),%d1
6846
	andi.w		&0x30,%d1
6847
	lsr.w		&0x4,%d1
6848
	bsr.l		_round			# round the denorm
6849

6850
	mov.l		(%sp)+, %a0
6851

6852
# result is now rounded properly. convert back to normal format
6853
	bclr		&0x7, FTEMP_EX(%a0)	# clear sgn first; may have residue
6854
	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6855
	beq.b		unf_res_chkifzero	# no; result is positive
6856
	bset		&0x7, FTEMP_EX(%a0)	# set result sgn
6857
	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6858

6859
# the number may have become zero after rounding. set ccodes accordingly.
6860
unf_res_chkifzero:
6861
	clr.l		%d0
6862
	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6863
	bne.b		unf_res_cont		# no
6864
	tst.l		FTEMP_LO(%a0)
6865
	bne.b		unf_res_cont		# no
6866
#	bset		&z_bit, FPSR_CC(%a6)	# yes; set zero ccode bit
6867
	bset		&z_bit, %d0		# yes; set zero ccode bit
6868

6869
unf_res_cont:
6870

6871
#
6872
# can inex1 also be set along with unfl and inex2???
6873
#
6874
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875
#
6876
	btst		&inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877
	beq.b		unf_res_end		# no
6878
	bset		&aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879

6880
unf_res_end:
6881
	add.l		&0x4, %sp		# clear stack
6882
	rts
6883

6884
# unf_res() for fsglmul() and fsgldiv().
6885
	global		unf_res4
6886
unf_res4:
6887
	mov.l		%d1,-(%sp)		# save rnd prec,mode on stack
6888

6889
	btst		&0x7,FTEMP_EX(%a0)	# make "internal" format
6890
	sne		FTEMP_SGN(%a0)
6891

6892
	mov.w		FTEMP_EX(%a0),%d1	# extract exponent
6893
	and.w		&0x7fff,%d1
6894
	sub.w		%d0,%d1
6895
	mov.w		%d1,FTEMP_EX(%a0)	# insert 16 bit exponent
6896

6897
	mov.l		%a0,-(%sp)		# save operand ptr during calls
6898

6899
	clr.l		%d0			# force rnd prec = ext
6900
	bsr.l		_denorm			# denorm result
6901

6902
	mov.l		(%sp),%a0
6903
	mov.w		&s_mode,%d1		# force rnd prec = sgl
6904
	swap		%d1
6905
	mov.w		0x6(%sp),%d1		# load rnd mode
6906
	andi.w		&0x30,%d1		# extract rnd prec
6907
	lsr.w		&0x4,%d1
6908
	bsr.l		_round			# round the denorm
6909

6910
	mov.l		(%sp)+,%a0
6911

6912
# result is now rounded properly. convert back to normal format
6913
	bclr		&0x7,FTEMP_EX(%a0)	# clear sgn first; may have residue
6914
	tst.b		FTEMP_SGN(%a0)		# is "internal result" sign set?
6915
	beq.b		unf_res4_chkifzero	# no; result is positive
6916
	bset		&0x7,FTEMP_EX(%a0)	# set result sgn
6917
	clr.b		FTEMP_SGN(%a0)		# clear temp sign
6918

6919
# the number may have become zero after rounding. set ccodes accordingly.
6920
unf_res4_chkifzero:
6921
	clr.l		%d0
6922
	tst.l		FTEMP_HI(%a0)		# is value now a zero?
6923
	bne.b		unf_res4_cont		# no
6924
	tst.l		FTEMP_LO(%a0)
6925
	bne.b		unf_res4_cont		# no
6926
#	bset		&z_bit,FPSR_CC(%a6)	# yes; set zero ccode bit
6927
	bset		&z_bit,%d0		# yes; set zero ccode bit
6928

6929
unf_res4_cont:
6930

6931
#
6932
# can inex1 also be set along with unfl and inex2???
6933
#
6934
# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935
#
6936
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937
	beq.b		unf_res4_end		# no
6938
	bset		&aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939

6940
unf_res4_end:
6941
	add.l		&0x4,%sp		# clear stack
6942
	rts
6943

6944
#########################################################################
6945
# XDEF ****************************************************************	#
6946
#	ovf_res(): routine to produce the default overflow result of	#
6947
#		   an overflowing number.				#
6948
#	ovf_res2(): same as above but the rnd mode/prec are passed	#
6949
#		    differently.					#
6950
#									#
6951
# XREF ****************************************************************	#
6952
#	none								#
6953
#									#
6954
# INPUT ***************************************************************	#
6955
#	d1.b	= '-1' => (-); '0' => (+)				#
6956
#   ovf_res():								#
6957
#	d0	= rnd mode/prec						#
6958
#   ovf_res2():								#
6959
#	hi(d0)	= rnd prec						#
6960
#	lo(d0)	= rnd mode						#
6961
#									#
6962
# OUTPUT **************************************************************	#
6963
#	a0	= points to extended precision result			#
6964
#	d0.b	= condition code bits					#
6965
#									#
6966
# ALGORITHM ***********************************************************	#
6967
#	The default overflow result can be determined by the sign of	#
6968
# the result and the rounding mode/prec in effect. These bits are	#
6969
# concatenated together to create an index into the default result	#
6970
# table. A pointer to the correct result is returned in a0. The		#
6971
# resulting condition codes are returned in d0 in case the caller	#
6972
# doesn't want FPSR_cc altered (as is the case for fmove out).		#
6973
#									#
6974
#########################################################################
6975

6976
	global		ovf_res
6977
ovf_res:
6978
	andi.w		&0x10,%d1		# keep result sign
6979
	lsr.b		&0x4,%d0		# shift prec/mode
6980
	or.b		%d0,%d1			# concat the two
6981
	mov.w		%d1,%d0			# make a copy
6982
	lsl.b		&0x1,%d1		# multiply d1 by 2
6983
	bra.b		ovf_res_load
6984

6985
	global		ovf_res2
6986
ovf_res2:
6987
	and.w		&0x10, %d1		# keep result sign
6988
	or.b		%d0, %d1		# insert rnd mode
6989
	swap		%d0
6990
	or.b		%d0, %d1		# insert rnd prec
6991
	mov.w		%d1, %d0		# make a copy
6992
	lsl.b		&0x1, %d1		# shift left by 1
6993

6994
#
6995
# use the rounding mode, precision, and result sign as in index into the
6996
# two tables below to fetch the default result and the result ccodes.
6997
#
6998
ovf_res_load:
6999
	mov.b		(tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000
	lea		(tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001

7002
	rts
7003

7004
tbl_ovfl_cc:
7005
	byte		0x2, 0x0, 0x0, 0x2
7006
	byte		0x2, 0x0, 0x0, 0x2
7007
	byte		0x2, 0x0, 0x0, 0x2
7008
	byte		0x0, 0x0, 0x0, 0x0
7009
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7010
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7011
	byte		0x2+0x8, 0x8, 0x2+0x8, 0x8
7012

7013
tbl_ovfl_result:
7014
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015
	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016
	long		0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018

7019
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020
	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021
	long		0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023

7024
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025
	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026
	long		0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027
	long		0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028

7029
	long		0x00000000,0x00000000,0x00000000,0x00000000
7030
	long		0x00000000,0x00000000,0x00000000,0x00000000
7031
	long		0x00000000,0x00000000,0x00000000,0x00000000
7032
	long		0x00000000,0x00000000,0x00000000,0x00000000
7033

7034
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035
	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037
	long		0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038

7039
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040
	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042
	long		0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043

7044
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045
	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046
	long		0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047
	long		0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048

7049
#########################################################################
7050
# XDEF ****************************************************************	#
7051
#	fout(): move from fp register to memory or data register	#
7052
#									#
7053
# XREF ****************************************************************	#
7054
#	_round() - needed to create EXOP for sgl/dbl precision		#
7055
#	norm() - needed to create EXOP for extended precision		#
7056
#	ovf_res() - create default overflow result for sgl/dbl precision#
7057
#	unf_res() - create default underflow result for sgl/dbl prec.	#
7058
#	dst_dbl() - create rounded dbl precision result.		#
7059
#	dst_sgl() - create rounded sgl precision result.		#
7060
#	fetch_dreg() - fetch dynamic k-factor reg for packed.		#
7061
#	bindec() - convert FP binary number to packed number.		#
7062
#	_mem_write() - write data to memory.				#
7063
#	_mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064
#	_dmem_write_{byte,word,long}() - write data to memory.		#
7065
#	store_dreg_{b,w,l}() - store data to data register file.	#
7066
#	facc_out_{b,w,l,d,x}() - data access error occurred.		#
7067
#									#
7068
# INPUT ***************************************************************	#
7069
#	a0 = pointer to extended precision source operand		#
7070
#	d0 = round prec,mode						#
7071
#									#
7072
# OUTPUT **************************************************************	#
7073
#	fp0 : intermediate underflow or overflow result if		#
7074
#	      OVFL/UNFL occurred for a sgl or dbl operand		#
7075
#									#
7076
# ALGORITHM ***********************************************************	#
7077
#	This routine is accessed by many handlers that need to do an	#
7078
# opclass three move of an operand out to memory.			#
7079
#	Decode an fmove out (opclass 3) instruction to determine if	#
7080
# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data	#
7081
# register or memory. The algorithm uses a standard "fmove" to create	#
7082
# the rounded result. Also, since exceptions are disabled, this also	#
7083
# create the correct OPERR default result if appropriate.		#
7084
#	For sgl or dbl precision, overflow or underflow can occur. If	#
7085
# either occurs and is enabled, the EXOP.				#
7086
#	For extended precision, the stacked <ea> must be fixed along	#
7087
# w/ the address index register as appropriate w/ _calc_ea_fout(). If	#
7088
# the source is a denorm and if underflow is enabled, an EXOP must be	#
7089
# created.								#
7090
#	For packed, the k-factor must be fetched from the instruction	#
7091
# word or a data register. The <ea> must be fixed as w/ extended	#
7092
# precision. Then, bindec() is called to create the appropriate		#
7093
# packed result.							#
7094
#	If at any time an access error is flagged by one of the move-	#
7095
# to-memory routines, then a special exit must be made so that the	#
7096
# access error can be handled properly.					#
7097
#									#
7098
#########################################################################
7099

7100
	global		fout
7101
fout:
7102
	bfextu		EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103
	mov.w		(tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104
	jmp		(tbl_fout.b,%pc,%a1)	# jump to routine
7105

7106
	swbeg		&0x8
7107
tbl_fout:
7108
	short		fout_long	-	tbl_fout
7109
	short		fout_sgl	-	tbl_fout
7110
	short		fout_ext	-	tbl_fout
7111
	short		fout_pack	-	tbl_fout
7112
	short		fout_word	-	tbl_fout
7113
	short		fout_dbl	-	tbl_fout
7114
	short		fout_byte	-	tbl_fout
7115
	short		fout_pack	-	tbl_fout
7116

7117
#################################################################
7118
# fmove.b out ###################################################
7119
#################################################################
7120

7121
# Only "Unimplemented Data Type" exceptions enter here. The operand
7122
# is either a DENORM or a NORM.
7123
fout_byte:
7124
	tst.b		STAG(%a6)		# is operand normalized?
7125
	bne.b		fout_byte_denorm	# no
7126

7127
	fmovm.x		SRC(%a0),&0x80		# load value
7128

7129
fout_byte_norm:
7130
	fmov.l		%d0,%fpcr		# insert rnd prec,mode
7131

7132
	fmov.b		%fp0,%d0		# exec move out w/ correct rnd mode
7133

7134
	fmov.l		&0x0,%fpcr		# clear FPCR
7135
	fmov.l		%fpsr,%d1		# fetch FPSR
7136
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7137

7138
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7139
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7140
	beq.b		fout_byte_dn		# must save to integer regfile
7141

7142
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7143
	bsr.l		_dmem_write_byte	# write byte
7144

7145
	tst.l		%d1			# did dstore fail?
7146
	bne.l		facc_out_b		# yes
7147

7148
	rts
7149

7150
fout_byte_dn:
7151
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7152
	andi.w		&0x7,%d1
7153
	bsr.l		store_dreg_b
7154
	rts
7155

7156
fout_byte_denorm:
7157
	mov.l		SRC_EX(%a0),%d1
7158
	andi.l		&0x80000000,%d1		# keep DENORM sign
7159
	ori.l		&0x00800000,%d1		# make smallest sgl
7160
	fmov.s		%d1,%fp0
7161
	bra.b		fout_byte_norm
7162

7163
#################################################################
7164
# fmove.w out ###################################################
7165
#################################################################
7166

7167
# Only "Unimplemented Data Type" exceptions enter here. The operand
7168
# is either a DENORM or a NORM.
7169
fout_word:
7170
	tst.b		STAG(%a6)		# is operand normalized?
7171
	bne.b		fout_word_denorm	# no
7172

7173
	fmovm.x		SRC(%a0),&0x80		# load value
7174

7175
fout_word_norm:
7176
	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7177

7178
	fmov.w		%fp0,%d0		# exec move out w/ correct rnd mode
7179

7180
	fmov.l		&0x0,%fpcr		# clear FPCR
7181
	fmov.l		%fpsr,%d1		# fetch FPSR
7182
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7183

7184
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7185
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7186
	beq.b		fout_word_dn		# must save to integer regfile
7187

7188
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7189
	bsr.l		_dmem_write_word	# write word
7190

7191
	tst.l		%d1			# did dstore fail?
7192
	bne.l		facc_out_w		# yes
7193

7194
	rts
7195

7196
fout_word_dn:
7197
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7198
	andi.w		&0x7,%d1
7199
	bsr.l		store_dreg_w
7200
	rts
7201

7202
fout_word_denorm:
7203
	mov.l		SRC_EX(%a0),%d1
7204
	andi.l		&0x80000000,%d1		# keep DENORM sign
7205
	ori.l		&0x00800000,%d1		# make smallest sgl
7206
	fmov.s		%d1,%fp0
7207
	bra.b		fout_word_norm
7208

7209
#################################################################
7210
# fmove.l out ###################################################
7211
#################################################################
7212

7213
# Only "Unimplemented Data Type" exceptions enter here. The operand
7214
# is either a DENORM or a NORM.
7215
fout_long:
7216
	tst.b		STAG(%a6)		# is operand normalized?
7217
	bne.b		fout_long_denorm	# no
7218

7219
	fmovm.x		SRC(%a0),&0x80		# load value
7220

7221
fout_long_norm:
7222
	fmov.l		%d0,%fpcr		# insert rnd prec:mode
7223

7224
	fmov.l		%fp0,%d0		# exec move out w/ correct rnd mode
7225

7226
	fmov.l		&0x0,%fpcr		# clear FPCR
7227
	fmov.l		%fpsr,%d1		# fetch FPSR
7228
	or.w		%d1,2+USER_FPSR(%a6)	# save new exc,accrued bits
7229

7230
fout_long_write:
7231
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7232
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7233
	beq.b		fout_long_dn		# must save to integer regfile
7234

7235
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7236
	bsr.l		_dmem_write_long	# write long
7237

7238
	tst.l		%d1			# did dstore fail?
7239
	bne.l		facc_out_l		# yes
7240

7241
	rts
7242

7243
fout_long_dn:
7244
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7245
	andi.w		&0x7,%d1
7246
	bsr.l		store_dreg_l
7247
	rts
7248

7249
fout_long_denorm:
7250
	mov.l		SRC_EX(%a0),%d1
7251
	andi.l		&0x80000000,%d1		# keep DENORM sign
7252
	ori.l		&0x00800000,%d1		# make smallest sgl
7253
	fmov.s		%d1,%fp0
7254
	bra.b		fout_long_norm
7255

7256
#################################################################
7257
# fmove.x out ###################################################
7258
#################################################################
7259

7260
# Only "Unimplemented Data Type" exceptions enter here. The operand
7261
# is either a DENORM or a NORM.
7262
# The DENORM causes an Underflow exception.
7263
fout_ext:
7264

7265
# we copy the extended precision result to FP_SCR0 so that the reserved
7266
# 16-bit field gets zeroed. we do this since we promise not to disturb
7267
# what's at SRC(a0).
7268
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7269
	clr.w		2+FP_SCR0_EX(%a6)	# clear reserved field
7270
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7271
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7272

7273
	fmovm.x		SRC(%a0),&0x80		# return result
7274

7275
	bsr.l		_calc_ea_fout		# fix stacked <ea>
7276

7277
	mov.l		%a0,%a1			# pass: dst addr
7278
	lea		FP_SCR0(%a6),%a0	# pass: src addr
7279
	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7280

7281
# we must not yet write the extended precision data to the stack
7282
# in the pre-decrement case from supervisor mode or else we'll corrupt
7283
# the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7285
	beq.b		fout_ext_a7
7286

7287
	bsr.l		_dmem_write		# write ext prec number to memory
7288

7289
	tst.l		%d1			# did dstore fail?
7290
	bne.w		fout_ext_err		# yes
7291

7292
	tst.b		STAG(%a6)		# is operand normalized?
7293
	bne.b		fout_ext_denorm		# no
7294
	rts
7295

7296
# the number is a DENORM. must set the underflow exception bit
7297
fout_ext_denorm:
7298
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299

7300
	mov.b		FPCR_ENABLE(%a6),%d0
7301
	andi.b		&0x0a,%d0		# is UNFL or INEX enabled?
7302
	bne.b		fout_ext_exc		# yes
7303
	rts
7304

7305
# we don't want to do the write if the exception occurred in supervisor mode
7306
# so _mem_write2() handles this for us.
7307
fout_ext_a7:
7308
	bsr.l		_mem_write2		# write ext prec number to memory
7309

7310
	tst.l		%d1			# did dstore fail?
7311
	bne.w		fout_ext_err		# yes
7312

7313
	tst.b		STAG(%a6)		# is operand normalized?
7314
	bne.b		fout_ext_denorm		# no
7315
	rts
7316

7317
fout_ext_exc:
7318
	lea		FP_SCR0(%a6),%a0
7319
	bsr.l		norm			# normalize the mantissa
7320
	neg.w		%d0			# new exp = -(shft amt)
7321
	andi.w		&0x7fff,%d0
7322
	andi.w		&0x8000,FP_SCR0_EX(%a6)	# keep only old sign
7323
	or.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
7324
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7325
	rts
7326

7327
fout_ext_err:
7328
	mov.l		EXC_A6(%a6),(%a6)	# fix stacked a6
7329
	bra.l		facc_out_x
7330

7331
#########################################################################
7332
# fmove.s out ###########################################################
7333
#########################################################################
7334
fout_sgl:
7335
	andi.b		&0x30,%d0		# clear rnd prec
7336
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
7337
	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7338

7339
#
7340
# operand is a normalized number. first, we check to see if the move out
7341
# would cause either an underflow or overflow. these cases are handled
7342
# separately. otherwise, set the FPCR to the proper rounding mode and
7343
# execute the move.
7344
#
7345
	mov.w		SRC_EX(%a0),%d0		# extract exponent
7346
	andi.w		&0x7fff,%d0		# strip sign
7347

7348
	cmpi.w		%d0,&SGL_HI		# will operand overflow?
7349
	bgt.w		fout_sgl_ovfl		# yes; go handle OVFL
7350
	beq.w		fout_sgl_may_ovfl	# maybe; go handle possible OVFL
7351
	cmpi.w		%d0,&SGL_LO		# will operand underflow?
7352
	blt.w		fout_sgl_unfl		# yes; go handle underflow
7353

7354
#
7355
# NORMs(in range) can be stored out by a simple "fmov.s"
7356
# Unnormalized inputs can come through this point.
7357
#
7358
fout_sgl_exg:
7359
	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7360

7361
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7362
	fmov.l		&0x0,%fpsr		# clear FPSR
7363

7364
	fmov.s		%fp0,%d0		# store does convert and round
7365

7366
	fmov.l		&0x0,%fpcr		# clear FPCR
7367
	fmov.l		%fpsr,%d1		# save FPSR
7368

7369
	or.w		%d1,2+USER_FPSR(%a6)	# set possible inex2/ainex
7370

7371
fout_sgl_exg_write:
7372
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7373
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7374
	beq.b		fout_sgl_exg_write_dn	# must save to integer regfile
7375

7376
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7377
	bsr.l		_dmem_write_long	# write long
7378

7379
	tst.l		%d1			# did dstore fail?
7380
	bne.l		facc_out_l		# yes
7381

7382
	rts
7383

7384
fout_sgl_exg_write_dn:
7385
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7386
	andi.w		&0x7,%d1
7387
	bsr.l		store_dreg_l
7388
	rts
7389

7390
#
7391
# here, we know that the operand would UNFL if moved out to single prec,
7392
# so, denorm and round and then use generic store single routine to
7393
# write the value to memory.
7394
#
7395
fout_sgl_unfl:
7396
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397

7398
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7399
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7400
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7401
	mov.l		%a0,-(%sp)
7402

7403
	clr.l		%d0			# pass: S.F. = 0
7404

7405
	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7406
	bne.b		fout_sgl_unfl_cont	# let DENORMs fall through
7407

7408
	lea		FP_SCR0(%a6),%a0
7409
	bsr.l		norm			# normalize the DENORM
7410

7411
fout_sgl_unfl_cont:
7412
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7413
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7414
	bsr.l		unf_res			# calc default underflow result
7415

7416
	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7417
	bsr.l		dst_sgl			# convert to single prec
7418

7419
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7420
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7421
	beq.b		fout_sgl_unfl_dn	# must save to integer regfile
7422

7423
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7424
	bsr.l		_dmem_write_long	# write long
7425

7426
	tst.l		%d1			# did dstore fail?
7427
	bne.l		facc_out_l		# yes
7428

7429
	bra.b		fout_sgl_unfl_chkexc
7430

7431
fout_sgl_unfl_dn:
7432
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7433
	andi.w		&0x7,%d1
7434
	bsr.l		store_dreg_l
7435

7436
fout_sgl_unfl_chkexc:
7437
	mov.b		FPCR_ENABLE(%a6),%d1
7438
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7439
	bne.w		fout_sd_exc_unfl	# yes
7440
	addq.l		&0x4,%sp
7441
	rts
7442

7443
#
7444
# it's definitely an overflow so call ovf_res to get the correct answer
7445
#
7446
fout_sgl_ovfl:
7447
	tst.b		3+SRC_HI(%a0)		# is result inexact?
7448
	bne.b		fout_sgl_ovfl_inex2
7449
	tst.l		SRC_LO(%a0)		# is result inexact?
7450
	bne.b		fout_sgl_ovfl_inex2
7451
	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452
	bra.b		fout_sgl_ovfl_cont
7453
fout_sgl_ovfl_inex2:
7454
	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455

7456
fout_sgl_ovfl_cont:
7457
	mov.l		%a0,-(%sp)
7458

7459
# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460
# overflow result. DON'T save the returned ccodes from ovf_res() since
7461
# fmove out doesn't alter them.
7462
	tst.b		SRC_EX(%a0)		# is operand negative?
7463
	smi		%d1			# set if so
7464
	mov.l		L_SCR3(%a6),%d0		# pass: sgl prec,rnd mode
7465
	bsr.l		ovf_res			# calc OVFL result
7466
	fmovm.x		(%a0),&0x80		# load default overflow result
7467
	fmov.s		%fp0,%d0		# store to single
7468

7469
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract dst mode
7470
	andi.b		&0x38,%d1		# is mode == 0? (Dreg dst)
7471
	beq.b		fout_sgl_ovfl_dn	# must save to integer regfile
7472

7473
	mov.l		EXC_EA(%a6),%a0		# stacked <ea> is correct
7474
	bsr.l		_dmem_write_long	# write long
7475

7476
	tst.l		%d1			# did dstore fail?
7477
	bne.l		facc_out_l		# yes
7478

7479
	bra.b		fout_sgl_ovfl_chkexc
7480

7481
fout_sgl_ovfl_dn:
7482
	mov.b		1+EXC_OPWORD(%a6),%d1	# extract Dn
7483
	andi.w		&0x7,%d1
7484
	bsr.l		store_dreg_l
7485

7486
fout_sgl_ovfl_chkexc:
7487
	mov.b		FPCR_ENABLE(%a6),%d1
7488
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7489
	bne.w		fout_sd_exc_ovfl	# yes
7490
	addq.l		&0x4,%sp
7491
	rts
7492

7493
#
7494
# move out MAY overflow:
7495
# (1) force the exp to 0x3fff
7496
# (2) do a move w/ appropriate rnd mode
7497
# (3) if exp still equals zero, then insert original exponent
7498
#	for the correct result.
7499
#     if exp now equals one, then it overflowed so call ovf_res.
7500
#
7501
fout_sgl_may_ovfl:
7502
	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7503
	andi.w		&0x8000,%d1		# keep it,clear exp
7504
	ori.w		&0x3fff,%d1		# insert exp = 0
7505
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7506
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508

7509
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7510

7511
	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7512
	fmov.l		&0x0,%fpcr		# clear FPCR
7513

7514
	fabs.x		%fp0			# need absolute value
7515
	fcmp.b		%fp0,&0x2		# did exponent increase?
7516
	fblt.w		fout_sgl_exg		# no; go finish NORM
7517
	bra.w		fout_sgl_ovfl		# yes; go handle overflow
7518

7519
################
7520

7521
fout_sd_exc_unfl:
7522
	mov.l		(%sp)+,%a0
7523

7524
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7525
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7526
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7527

7528
	cmpi.b		STAG(%a6),&DENORM	# was src a DENORM?
7529
	bne.b		fout_sd_exc_cont	# no
7530

7531
	lea		FP_SCR0(%a6),%a0
7532
	bsr.l		norm
7533
	neg.l		%d0
7534
	andi.w		&0x7fff,%d0
7535
	bfins		%d0,FP_SCR0_EX(%a6){&1:&15}
7536
	bra.b		fout_sd_exc_cont
7537

7538
fout_sd_exc:
7539
fout_sd_exc_ovfl:
7540
	mov.l		(%sp)+,%a0		# restore a0
7541

7542
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7543
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7544
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7545

7546
fout_sd_exc_cont:
7547
	bclr		&0x7,FP_SCR0_EX(%a6)	# clear sign bit
7548
	sne.b		2+FP_SCR0_EX(%a6)	# set internal sign bit
7549
	lea		FP_SCR0(%a6),%a0	# pass: ptr to DENORM
7550

7551
	mov.b		3+L_SCR3(%a6),%d1
7552
	lsr.b		&0x4,%d1
7553
	andi.w		&0x0c,%d1
7554
	swap		%d1
7555
	mov.b		3+L_SCR3(%a6),%d1
7556
	lsr.b		&0x4,%d1
7557
	andi.w		&0x03,%d1
7558
	clr.l		%d0			# pass: zero g,r,s
7559
	bsr.l		_round			# round the DENORM
7560

7561
	tst.b		2+FP_SCR0_EX(%a6)	# is EXOP negative?
7562
	beq.b		fout_sd_exc_done	# no
7563
	bset		&0x7,FP_SCR0_EX(%a6)	# yes
7564

7565
fout_sd_exc_done:
7566
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
7567
	rts
7568

7569
#################################################################
7570
# fmove.d out ###################################################
7571
#################################################################
7572
fout_dbl:
7573
	andi.b		&0x30,%d0		# clear rnd prec
7574
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
7575
	mov.l		%d0,L_SCR3(%a6)		# save rnd prec,mode on stack
7576

7577
#
7578
# operand is a normalized number. first, we check to see if the move out
7579
# would cause either an underflow or overflow. these cases are handled
7580
# separately. otherwise, set the FPCR to the proper rounding mode and
7581
# execute the move.
7582
#
7583
	mov.w		SRC_EX(%a0),%d0		# extract exponent
7584
	andi.w		&0x7fff,%d0		# strip sign
7585

7586
	cmpi.w		%d0,&DBL_HI		# will operand overflow?
7587
	bgt.w		fout_dbl_ovfl		# yes; go handle OVFL
7588
	beq.w		fout_dbl_may_ovfl	# maybe; go handle possible OVFL
7589
	cmpi.w		%d0,&DBL_LO		# will operand underflow?
7590
	blt.w		fout_dbl_unfl		# yes; go handle underflow
7591

7592
#
7593
# NORMs(in range) can be stored out by a simple "fmov.d"
7594
# Unnormalized inputs can come through this point.
7595
#
7596
fout_dbl_exg:
7597
	fmovm.x		SRC(%a0),&0x80		# fetch fop from stack
7598

7599
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7600
	fmov.l		&0x0,%fpsr		# clear FPSR
7601

7602
	fmov.d		%fp0,L_SCR1(%a6)	# store does convert and round
7603

7604
	fmov.l		&0x0,%fpcr		# clear FPCR
7605
	fmov.l		%fpsr,%d0		# save FPSR
7606

7607
	or.w		%d0,2+USER_FPSR(%a6)	# set possible inex2/ainex
7608

7609
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7610
	lea		L_SCR1(%a6),%a0		# pass: src addr
7611
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7612
	bsr.l		_dmem_write		# store dbl fop to memory
7613

7614
	tst.l		%d1			# did dstore fail?
7615
	bne.l		facc_out_d		# yes
7616

7617
	rts					# no; so we're finished
7618

7619
#
7620
# here, we know that the operand would UNFL if moved out to double prec,
7621
# so, denorm and round and then use generic store double routine to
7622
# write the value to memory.
7623
#
7624
fout_dbl_unfl:
7625
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626

7627
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
7628
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
7629
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
7630
	mov.l		%a0,-(%sp)
7631

7632
	clr.l		%d0			# pass: S.F. = 0
7633

7634
	cmpi.b		STAG(%a6),&DENORM	# fetch src optype tag
7635
	bne.b		fout_dbl_unfl_cont	# let DENORMs fall through
7636

7637
	lea		FP_SCR0(%a6),%a0
7638
	bsr.l		norm			# normalize the DENORM
7639

7640
fout_dbl_unfl_cont:
7641
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
7642
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
7643
	bsr.l		unf_res			# calc default underflow result
7644

7645
	lea		FP_SCR0(%a6),%a0	# pass: ptr to fop
7646
	bsr.l		dst_dbl			# convert to single prec
7647
	mov.l		%d0,L_SCR1(%a6)
7648
	mov.l		%d1,L_SCR2(%a6)
7649

7650
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7651
	lea		L_SCR1(%a6),%a0		# pass: src addr
7652
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7653
	bsr.l		_dmem_write		# store dbl fop to memory
7654

7655
	tst.l		%d1			# did dstore fail?
7656
	bne.l		facc_out_d		# yes
7657

7658
	mov.b		FPCR_ENABLE(%a6),%d1
7659
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7660
	bne.w		fout_sd_exc_unfl	# yes
7661
	addq.l		&0x4,%sp
7662
	rts
7663

7664
#
7665
# it's definitely an overflow so call ovf_res to get the correct answer
7666
#
7667
fout_dbl_ovfl:
7668
	mov.w		2+SRC_LO(%a0),%d0
7669
	andi.w		&0x7ff,%d0
7670
	bne.b		fout_dbl_ovfl_inex2
7671

7672
	ori.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673
	bra.b		fout_dbl_ovfl_cont
7674
fout_dbl_ovfl_inex2:
7675
	ori.w		&ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676

7677
fout_dbl_ovfl_cont:
7678
	mov.l		%a0,-(%sp)
7679

7680
# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681
# overflow result. DON'T save the returned ccodes from ovf_res() since
7682
# fmove out doesn't alter them.
7683
	tst.b		SRC_EX(%a0)		# is operand negative?
7684
	smi		%d1			# set if so
7685
	mov.l		L_SCR3(%a6),%d0		# pass: dbl prec,rnd mode
7686
	bsr.l		ovf_res			# calc OVFL result
7687
	fmovm.x		(%a0),&0x80		# load default overflow result
7688
	fmov.d		%fp0,L_SCR1(%a6)	# store to double
7689

7690
	mov.l		EXC_EA(%a6),%a1		# pass: dst addr
7691
	lea		L_SCR1(%a6),%a0		# pass: src addr
7692
	movq.l		&0x8,%d0		# pass: opsize is 8 bytes
7693
	bsr.l		_dmem_write		# store dbl fop to memory
7694

7695
	tst.l		%d1			# did dstore fail?
7696
	bne.l		facc_out_d		# yes
7697

7698
	mov.b		FPCR_ENABLE(%a6),%d1
7699
	andi.b		&0x0a,%d1		# is UNFL or INEX enabled?
7700
	bne.w		fout_sd_exc_ovfl	# yes
7701
	addq.l		&0x4,%sp
7702
	rts
7703

7704
#
7705
# move out MAY overflow:
7706
# (1) force the exp to 0x3fff
7707
# (2) do a move w/ appropriate rnd mode
7708
# (3) if exp still equals zero, then insert original exponent
7709
#	for the correct result.
7710
#     if exp now equals one, then it overflowed so call ovf_res.
7711
#
7712
fout_dbl_may_ovfl:
7713
	mov.w		SRC_EX(%a0),%d1		# fetch current sign
7714
	andi.w		&0x8000,%d1		# keep it,clear exp
7715
	ori.w		&0x3fff,%d1		# insert exp = 0
7716
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert scaled exp
7717
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719

7720
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
7721

7722
	fmov.x		FP_SCR0(%a6),%fp0	# force fop to be rounded
7723
	fmov.l		&0x0,%fpcr		# clear FPCR
7724

7725
	fabs.x		%fp0			# need absolute value
7726
	fcmp.b		%fp0,&0x2		# did exponent increase?
7727
	fblt.w		fout_dbl_exg		# no; go finish NORM
7728
	bra.w		fout_dbl_ovfl		# yes; go handle overflow
7729

7730
#########################################################################
7731
# XDEF ****************************************************************	#
7732
#	dst_dbl(): create double precision value from extended prec.	#
7733
#									#
7734
# XREF ****************************************************************	#
7735
#	None								#
7736
#									#
7737
# INPUT ***************************************************************	#
7738
#	a0 = pointer to source operand in extended precision		#
7739
#									#
7740
# OUTPUT **************************************************************	#
7741
#	d0 = hi(double precision result)				#
7742
#	d1 = lo(double precision result)				#
7743
#									#
7744
# ALGORITHM ***********************************************************	#
7745
#									#
7746
#  Changes extended precision to double precision.			#
7747
#  Note: no attempt is made to round the extended value to double.	#
7748
#	dbl_sign = ext_sign						#
7749
#	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)		#
7750
#	get rid of ext integer bit					#
7751
#	dbl_mant = ext_mant{62:12}					#
7752
#									#
7753
#		---------------   ---------------    ---------------	#
7754
#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7755
#		---------------   ---------------    ---------------	#
7756
#		 95	    64    63 62	      32      31     11	  0	#
7757
#				     |			     |		#
7758
#				     |			     |		#
7759
#				     |			     |		#
7760
#			             v			     v		#
7761
#			      ---------------   ---------------		#
7762
#  double   ->		      |s|exp| mant  |   |  mant       |		#
7763
#			      ---------------   ---------------		#
7764
#			      63     51   32   31	       0	#
7765
#									#
7766
#########################################################################
7767

7768
dst_dbl:
7769
	clr.l		%d0			# clear d0
7770
	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7771
	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7772
	addi.w		&DBL_BIAS,%d0		# add double precision bias
7773
	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7774
	bmi.b		dst_get_dupper		# no
7775
	subq.w		&0x1,%d0		# yes; denorm bias = DBL_BIAS - 1
7776
dst_get_dupper:
7777
	swap		%d0			# d0 now in upper word
7778
	lsl.l		&0x4,%d0		# d0 in proper place for dbl prec exp
7779
	tst.b		FTEMP_EX(%a0)		# test sign
7780
	bpl.b		dst_get_dman		# if positive, go process mantissa
7781
	bset		&0x1f,%d0		# if negative, set sign
7782
dst_get_dman:
7783
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7784
	bfextu		%d1{&1:&20},%d1		# get upper 20 bits of ms
7785
	or.l		%d1,%d0			# put these bits in ms word of double
7786
	mov.l		%d0,L_SCR1(%a6)		# put the new exp back on the stack
7787
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7788
	mov.l		&21,%d0			# load shift count
7789
	lsl.l		%d0,%d1			# put lower 11 bits in upper bits
7790
	mov.l		%d1,L_SCR2(%a6)		# build lower lword in memory
7791
	mov.l		FTEMP_LO(%a0),%d1	# get ls mantissa
7792
	bfextu		%d1{&0:&21},%d0		# get ls 21 bits of double
7793
	mov.l		L_SCR2(%a6),%d1
7794
	or.l		%d0,%d1			# put them in double result
7795
	mov.l		L_SCR1(%a6),%d0
7796
	rts
7797

7798
#########################################################################
7799
# XDEF ****************************************************************	#
7800
#	dst_sgl(): create single precision value from extended prec	#
7801
#									#
7802
# XREF ****************************************************************	#
7803
#									#
7804
# INPUT ***************************************************************	#
7805
#	a0 = pointer to source operand in extended precision		#
7806
#									#
7807
# OUTPUT **************************************************************	#
7808
#	d0 = single precision result					#
7809
#									#
7810
# ALGORITHM ***********************************************************	#
7811
#									#
7812
# Changes extended precision to single precision.			#
7813
#	sgl_sign = ext_sign						#
7814
#	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)		#
7815
#	get rid of ext integer bit					#
7816
#	sgl_mant = ext_mant{62:12}					#
7817
#									#
7818
#		---------------   ---------------    ---------------	#
7819
#  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |	#
7820
#		---------------   ---------------    ---------------	#
7821
#		 95	    64    63 62	   40 32      31     12	  0	#
7822
#				     |	   |				#
7823
#				     |	   |				#
7824
#				     |	   |				#
7825
#			             v     v				#
7826
#			      ---------------				#
7827
#  single   ->		      |s|exp| mant  |				#
7828
#			      ---------------				#
7829
#			      31     22     0				#
7830
#									#
7831
#########################################################################
7832

7833
dst_sgl:
7834
	clr.l		%d0
7835
	mov.w		FTEMP_EX(%a0),%d0	# get exponent
7836
	subi.w		&EXT_BIAS,%d0		# subtract extended precision bias
7837
	addi.w		&SGL_BIAS,%d0		# add single precision bias
7838
	tst.b		FTEMP_HI(%a0)		# is number a denorm?
7839
	bmi.b		dst_get_supper		# no
7840
	subq.w		&0x1,%d0		# yes; denorm bias = SGL_BIAS - 1
7841
dst_get_supper:
7842
	swap		%d0			# put exp in upper word of d0
7843
	lsl.l		&0x7,%d0		# shift it into single exp bits
7844
	tst.b		FTEMP_EX(%a0)		# test sign
7845
	bpl.b		dst_get_sman		# if positive, continue
7846
	bset		&0x1f,%d0		# if negative, put in sign first
7847
dst_get_sman:
7848
	mov.l		FTEMP_HI(%a0),%d1	# get ms mantissa
7849
	andi.l		&0x7fffff00,%d1		# get upper 23 bits of ms
7850
	lsr.l		&0x8,%d1		# and put them flush right
7851
	or.l		%d1,%d0			# put these bits in ms word of single
7852
	rts
7853

7854
##############################################################################
7855
fout_pack:
7856
	bsr.l		_calc_ea_fout		# fetch the <ea>
7857
	mov.l		%a0,-(%sp)
7858

7859
	mov.b		STAG(%a6),%d0		# fetch input type
7860
	bne.w		fout_pack_not_norm	# input is not NORM
7861

7862
fout_pack_norm:
7863
	btst		&0x4,EXC_CMDREG(%a6)	# static or dynamic?
7864
	beq.b		fout_pack_s		# static
7865

7866
fout_pack_d:
7867
	mov.b		1+EXC_CMDREG(%a6),%d1	# fetch dynamic reg
7868
	lsr.b		&0x4,%d1
7869
	andi.w		&0x7,%d1
7870

7871
	bsr.l		fetch_dreg		# fetch Dn w/ k-factor
7872

7873
	bra.b		fout_pack_type
7874
fout_pack_s:
7875
	mov.b		1+EXC_CMDREG(%a6),%d0	# fetch static field
7876

7877
fout_pack_type:
7878
	bfexts		%d0{&25:&7},%d0		# extract k-factor
7879
	mov.l	%d0,-(%sp)
7880

7881
	lea		FP_SRC(%a6),%a0		# pass: ptr to input
7882

7883
# bindec is currently scrambling FP_SRC for denorm inputs.
7884
# we'll have to change this, but for now, tough luck!!!
7885
	bsr.l		bindec			# convert xprec to packed
7886

7887
#	andi.l		&0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888
	andi.l		&0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889

7890
	mov.l	(%sp)+,%d0
7891

7892
	tst.b		3+FP_SCR0_EX(%a6)
7893
	bne.b		fout_pack_set
7894
	tst.l		FP_SCR0_HI(%a6)
7895
	bne.b		fout_pack_set
7896
	tst.l		FP_SCR0_LO(%a6)
7897
	bne.b		fout_pack_set
7898

7899
# add the extra condition that only if the k-factor was zero, too, should
7900
# we zero the exponent
7901
	tst.l		%d0
7902
	bne.b		fout_pack_set
7903
# "mantissa" is all zero which means that the answer is zero. but, the '040
7904
# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905
# if the mantissa is zero, I will zero the exponent, too.
7906
# the question now is whether the exponents sign bit is allowed to be non-zero
7907
# for a zero, also...
7908
	andi.w		&0xf000,FP_SCR0(%a6)
7909

7910
fout_pack_set:
7911

7912
	lea		FP_SCR0(%a6),%a0	# pass: src addr
7913

7914
fout_pack_write:
7915
	mov.l		(%sp)+,%a1		# pass: dst addr
7916
	mov.l		&0xc,%d0		# pass: opsize is 12 bytes
7917

7918
	cmpi.b		SPCOND_FLG(%a6),&mda7_flg
7919
	beq.b		fout_pack_a7
7920

7921
	bsr.l		_dmem_write		# write ext prec number to memory
7922

7923
	tst.l		%d1			# did dstore fail?
7924
	bne.w		fout_ext_err		# yes
7925

7926
	rts
7927

7928
# we don't want to do the write if the exception occurred in supervisor mode
7929
# so _mem_write2() handles this for us.
7930
fout_pack_a7:
7931
	bsr.l		_mem_write2		# write ext prec number to memory
7932

7933
	tst.l		%d1			# did dstore fail?
7934
	bne.w		fout_ext_err		# yes
7935

7936
	rts
7937

7938
fout_pack_not_norm:
7939
	cmpi.b		%d0,&DENORM		# is it a DENORM?
7940
	beq.w		fout_pack_norm		# yes
7941
	lea		FP_SRC(%a6),%a0
7942
	clr.w		2+FP_SRC_EX(%a6)
7943
	cmpi.b		%d0,&SNAN		# is it an SNAN?
7944
	beq.b		fout_pack_snan		# yes
7945
	bra.b		fout_pack_write		# no
7946

7947
fout_pack_snan:
7948
	ori.w		&snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949
	bset		&0x6,FP_SRC_HI(%a6)	# set snan bit
7950
	bra.b		fout_pack_write
7951

7952
#########################################################################
7953
# XDEF ****************************************************************	#
7954
#	fmul(): emulates the fmul instruction				#
7955
#	fsmul(): emulates the fsmul instruction				#
7956
#	fdmul(): emulates the fdmul instruction				#
7957
#									#
7958
# XREF ****************************************************************	#
7959
#	scale_to_zero_src() - scale src exponent to zero		#
7960
#	scale_to_zero_dst() - scale dst exponent to zero		#
7961
#	unf_res() - return default underflow result			#
7962
#	ovf_res() - return default overflow result			#
7963
#	res_qnan() - return QNAN result					#
7964
#	res_snan() - return SNAN result					#
7965
#									#
7966
# INPUT ***************************************************************	#
7967
#	a0 = pointer to extended precision source operand		#
7968
#	a1 = pointer to extended precision destination operand		#
7969
#	d0  rnd prec,mode						#
7970
#									#
7971
# OUTPUT **************************************************************	#
7972
#	fp0 = result							#
7973
#	fp1 = EXOP (if exception occurred)				#
7974
#									#
7975
# ALGORITHM ***********************************************************	#
7976
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
7977
# norms/denorms into ext/sgl/dbl precision.				#
7978
#	For norms/denorms, scale the exponents such that a multiply	#
7979
# instruction won't cause an exception. Use the regular fmul to		#
7980
# compute a result. Check if the regular operands would have taken	#
7981
# an exception. If so, return the default overflow/underflow result	#
7982
# and return the EXOP if exceptions are enabled. Else, scale the	#
7983
# result operand to the proper exponent.				#
7984
#									#
7985
#########################################################################
7986

7987
	align		0x10
7988
tbl_fmul_ovfl:
7989
	long		0x3fff - 0x7ffe		# ext_max
7990
	long		0x3fff - 0x407e		# sgl_max
7991
	long		0x3fff - 0x43fe		# dbl_max
7992
tbl_fmul_unfl:
7993
	long		0x3fff + 0x0001		# ext_unfl
7994
	long		0x3fff - 0x3f80		# sgl_unfl
7995
	long		0x3fff - 0x3c00		# dbl_unfl
7996

7997
	global		fsmul
7998
fsmul:
7999
	andi.b		&0x30,%d0		# clear rnd prec
8000
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8001
	bra.b		fmul
8002

8003
	global		fdmul
8004
fdmul:
8005
	andi.b		&0x30,%d0
8006
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8007

8008
	global		fmul
8009
fmul:
8010
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8011

8012
	clr.w		%d1
8013
	mov.b		DTAG(%a6),%d1
8014
	lsl.b		&0x3,%d1
8015
	or.b		STAG(%a6),%d1		# combine src tags
8016
	bne.w		fmul_not_norm		# optimize on non-norm input
8017

8018
fmul_norm:
8019
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8020
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8021
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8022

8023
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8024
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8025
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8026

8027
	bsr.l		scale_to_zero_src	# scale src exponent
8028
	mov.l		%d0,-(%sp)		# save scale factor 1
8029

8030
	bsr.l		scale_to_zero_dst	# scale dst exponent
8031

8032
	add.l		%d0,(%sp)		# SCALE_FACTOR = scale1 + scale2
8033

8034
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8035
	lsr.b		&0x6,%d1		# shift to lo bits
8036
	mov.l		(%sp)+,%d0		# load S.F.
8037
	cmp.l		%d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038
	beq.w		fmul_may_ovfl		# result may rnd to overflow
8039
	blt.w		fmul_ovfl		# result will overflow
8040

8041
	cmp.l		%d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042
	beq.w		fmul_may_unfl		# result may rnd to no unfl
8043
	bgt.w		fmul_unfl		# result will underflow
8044

8045
#
8046
# NORMAL:
8047
# - the result of the multiply operation will neither overflow nor underflow.
8048
# - do the multiply to the proper precision and rounding mode.
8049
# - scale the result exponent using the scale factor. if both operands were
8050
# normalized then we really don't need to go through this scaling. but for now,
8051
# this will do.
8052
#
8053
fmul_normal:
8054
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8055

8056
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8057
	fmov.l		&0x0,%fpsr		# clear FPSR
8058

8059
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8060

8061
	fmov.l		%fpsr,%d1		# save status
8062
	fmov.l		&0x0,%fpcr		# clear FPCR
8063

8064
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8065

8066
fmul_normal_exit:
8067
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8068
	mov.l		%d2,-(%sp)		# save d2
8069
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8070
	mov.l		%d1,%d2			# make a copy
8071
	andi.l		&0x7fff,%d1		# strip sign
8072
	andi.w		&0x8000,%d2		# keep old sign
8073
	sub.l		%d0,%d1			# add scale factor
8074
	or.w		%d2,%d1			# concat old sign,new exp
8075
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8076
	mov.l		(%sp)+,%d2		# restore d2
8077
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8078
	rts
8079

8080
#
8081
# OVERFLOW:
8082
# - the result of the multiply operation is an overflow.
8083
# - do the multiply to the proper precision and rounding mode in order to
8084
# set the inexact bits.
8085
# - calculate the default result and return it in fp0.
8086
# - if overflow or inexact is enabled, we need a multiply result rounded to
8087
# extended precision. if the original operation was extended, then we have this
8088
# result. if the original operation was single or double, we have to do another
8089
# multiply using extended precision and the correct rounding mode. the result
8090
# of this operation then has its exponent scaled by -0x6000 to create the
8091
# exceptional operand.
8092
#
8093
fmul_ovfl:
8094
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8095

8096
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8097
	fmov.l		&0x0,%fpsr		# clear FPSR
8098

8099
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8100

8101
	fmov.l		%fpsr,%d1		# save status
8102
	fmov.l		&0x0,%fpcr		# clear FPCR
8103

8104
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8105

8106
# save setting this until now because this is where fmul_may_ovfl may jump in
8107
fmul_ovfl_tst:
8108
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109

8110
	mov.b		FPCR_ENABLE(%a6),%d1
8111
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8112
	bne.b		fmul_ovfl_ena		# yes
8113

8114
# calculate the default result
8115
fmul_ovfl_dis:
8116
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8117
	sne		%d1			# set sign param accordingly
8118
	mov.l		L_SCR3(%a6),%d0		# pass rnd prec,mode
8119
	bsr.l		ovf_res			# calculate default result
8120
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8121
	fmovm.x		(%a0),&0x80		# return default result in fp0
8122
	rts
8123

8124
#
8125
# OVFL is enabled; Create EXOP:
8126
# - if precision is extended, then we have the EXOP. simply bias the exponent
8127
# with an extra -0x6000. if the precision is single or double, we need to
8128
# calculate a result rounded to extended precision.
8129
#
8130
fmul_ovfl_ena:
8131
	mov.l		L_SCR3(%a6),%d1
8132
	andi.b		&0xc0,%d1		# test the rnd prec
8133
	bne.b		fmul_ovfl_ena_sd	# it's sgl or dbl
8134

8135
fmul_ovfl_ena_cont:
8136
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8137

8138
	mov.l		%d2,-(%sp)		# save d2
8139
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8140
	mov.w		%d1,%d2			# make a copy
8141
	andi.l		&0x7fff,%d1		# strip sign
8142
	sub.l		%d0,%d1			# add scale factor
8143
	subi.l		&0x6000,%d1		# subtract bias
8144
	andi.w		&0x7fff,%d1		# clear sign bit
8145
	andi.w		&0x8000,%d2		# keep old sign
8146
	or.w		%d2,%d1			# concat old sign,new exp
8147
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8148
	mov.l		(%sp)+,%d2		# restore d2
8149
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8150
	bra.b		fmul_ovfl_dis
8151

8152
fmul_ovfl_ena_sd:
8153
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8154

8155
	mov.l		L_SCR3(%a6),%d1
8156
	andi.b		&0x30,%d1		# keep rnd mode only
8157
	fmov.l		%d1,%fpcr		# set FPCR
8158

8159
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8160

8161
	fmov.l		&0x0,%fpcr		# clear FPCR
8162
	bra.b		fmul_ovfl_ena_cont
8163

8164
#
8165
# may OVERFLOW:
8166
# - the result of the multiply operation MAY overflow.
8167
# - do the multiply to the proper precision and rounding mode in order to
8168
# set the inexact bits.
8169
# - calculate the default result and return it in fp0.
8170
#
8171
fmul_may_ovfl:
8172
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8173

8174
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8175
	fmov.l		&0x0,%fpsr		# clear FPSR
8176

8177
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8178

8179
	fmov.l		%fpsr,%d1		# save status
8180
	fmov.l		&0x0,%fpcr		# clear FPCR
8181

8182
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8183

8184
	fabs.x		%fp0,%fp1		# make a copy of result
8185
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8186
	fbge.w		fmul_ovfl_tst		# yes; overflow has occurred
8187

8188
# no, it didn't overflow; we have correct result
8189
	bra.w		fmul_normal_exit
8190

8191
#
8192
# UNDERFLOW:
8193
# - the result of the multiply operation is an underflow.
8194
# - do the multiply to the proper precision and rounding mode in order to
8195
# set the inexact bits.
8196
# - calculate the default result and return it in fp0.
8197
# - if overflow or inexact is enabled, we need a multiply result rounded to
8198
# extended precision. if the original operation was extended, then we have this
8199
# result. if the original operation was single or double, we have to do another
8200
# multiply using extended precision and the correct rounding mode. the result
8201
# of this operation then has its exponent scaled by -0x6000 to create the
8202
# exceptional operand.
8203
#
8204
fmul_unfl:
8205
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206

8207
# for fun, let's use only extended precision, round to zero. then, let
8208
# the unf_res() routine figure out all the rest.
8209
# will we get the correct answer.
8210
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8211

8212
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8213
	fmov.l		&0x0,%fpsr		# clear FPSR
8214

8215
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8216

8217
	fmov.l		%fpsr,%d1		# save status
8218
	fmov.l		&0x0,%fpcr		# clear FPCR
8219

8220
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8221

8222
	mov.b		FPCR_ENABLE(%a6),%d1
8223
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8224
	bne.b		fmul_unfl_ena		# yes
8225

8226
fmul_unfl_dis:
8227
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8228

8229
	lea		FP_SCR0(%a6),%a0	# pass: result addr
8230
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8231
	bsr.l		unf_res			# calculate default result
8232
	or.b		%d0,FPSR_CC(%a6)	# unf_res2 may have set 'Z'
8233
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8234
	rts
8235

8236
#
8237
# UNFL is enabled.
8238
#
8239
fmul_unfl_ena:
8240
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
8241

8242
	mov.l		L_SCR3(%a6),%d1
8243
	andi.b		&0xc0,%d1		# is precision extended?
8244
	bne.b		fmul_unfl_ena_sd	# no, sgl or dbl
8245

8246
# if the rnd mode is anything but RZ, then we have to re-do the above
8247
# multiplication because we used RZ for all.
8248
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8249

8250
fmul_unfl_ena_cont:
8251
	fmov.l		&0x0,%fpsr		# clear FPSR
8252

8253
	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8254

8255
	fmov.l		&0x0,%fpcr		# clear FPCR
8256

8257
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
8258
	mov.l		%d2,-(%sp)		# save d2
8259
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8260
	mov.l		%d1,%d2			# make a copy
8261
	andi.l		&0x7fff,%d1		# strip sign
8262
	andi.w		&0x8000,%d2		# keep old sign
8263
	sub.l		%d0,%d1			# add scale factor
8264
	addi.l		&0x6000,%d1		# add bias
8265
	andi.w		&0x7fff,%d1
8266
	or.w		%d2,%d1			# concat old sign,new exp
8267
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8268
	mov.l		(%sp)+,%d2		# restore d2
8269
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8270
	bra.w		fmul_unfl_dis
8271

8272
fmul_unfl_ena_sd:
8273
	mov.l		L_SCR3(%a6),%d1
8274
	andi.b		&0x30,%d1		# use only rnd mode
8275
	fmov.l		%d1,%fpcr		# set FPCR
8276

8277
	bra.b		fmul_unfl_ena_cont
8278

8279
# MAY UNDERFLOW:
8280
# -use the correct rounding mode and precision. this code favors operations
8281
# that do not underflow.
8282
fmul_may_unfl:
8283
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8284

8285
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8286
	fmov.l		&0x0,%fpsr		# clear FPSR
8287

8288
	fmul.x		FP_SCR0(%a6),%fp0	# execute multiply
8289

8290
	fmov.l		%fpsr,%d1		# save status
8291
	fmov.l		&0x0,%fpcr		# clear FPCR
8292

8293
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8294

8295
	fabs.x		%fp0,%fp1		# make a copy of result
8296
	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
8297
	fbgt.w		fmul_normal_exit	# no; no underflow occurred
8298
	fblt.w		fmul_unfl		# yes; underflow occurred
8299

8300
#
8301
# we still don't know if underflow occurred. result is ~ equal to 2. but,
8302
# we don't know if the result was an underflow that rounded up to a 2 or
8303
# a normalized number that rounded down to a 2. so, redo the entire operation
8304
# using RZ as the rounding mode to see what the pre-rounded result is.
8305
# this case should be relatively rare.
8306
#
8307
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst operand
8308

8309
	mov.l		L_SCR3(%a6),%d1
8310
	andi.b		&0xc0,%d1		# keep rnd prec
8311
	ori.b		&rz_mode*0x10,%d1	# insert RZ
8312

8313
	fmov.l		%d1,%fpcr		# set FPCR
8314
	fmov.l		&0x0,%fpsr		# clear FPSR
8315

8316
	fmul.x		FP_SCR0(%a6),%fp1	# execute multiply
8317

8318
	fmov.l		&0x0,%fpcr		# clear FPCR
8319
	fabs.x		%fp1			# make absolute value
8320
	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
8321
	fbge.w		fmul_normal_exit	# no; no underflow occurred
8322
	bra.w		fmul_unfl		# yes, underflow occurred
8323

8324
################################################################################
8325

8326
#
8327
# Multiply: inputs are not both normalized; what are they?
8328
#
8329
fmul_not_norm:
8330
	mov.w		(tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331
	jmp		(tbl_fmul_op.b,%pc,%d1.w)
8332

8333
	swbeg		&48
8334
tbl_fmul_op:
8335
	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8336
	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8337
	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8338
	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8339
	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8340
	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8341
	short		tbl_fmul_op	- tbl_fmul_op #
8342
	short		tbl_fmul_op	- tbl_fmul_op #
8343

8344
	short		fmul_zero	- tbl_fmul_op # ZERO x NORM
8345
	short		fmul_zero	- tbl_fmul_op # ZERO x ZERO
8346
	short		fmul_res_operr	- tbl_fmul_op # ZERO x INF
8347
	short		fmul_res_qnan	- tbl_fmul_op # ZERO x QNAN
8348
	short		fmul_zero	- tbl_fmul_op # ZERO x DENORM
8349
	short		fmul_res_snan	- tbl_fmul_op # ZERO x SNAN
8350
	short		tbl_fmul_op	- tbl_fmul_op #
8351
	short		tbl_fmul_op	- tbl_fmul_op #
8352

8353
	short		fmul_inf_dst	- tbl_fmul_op # INF x NORM
8354
	short		fmul_res_operr	- tbl_fmul_op # INF x ZERO
8355
	short		fmul_inf_dst	- tbl_fmul_op # INF x INF
8356
	short		fmul_res_qnan	- tbl_fmul_op # INF x QNAN
8357
	short		fmul_inf_dst	- tbl_fmul_op # INF x DENORM
8358
	short		fmul_res_snan	- tbl_fmul_op # INF x SNAN
8359
	short		tbl_fmul_op	- tbl_fmul_op #
8360
	short		tbl_fmul_op	- tbl_fmul_op #
8361

8362
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x NORM
8363
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x ZERO
8364
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x INF
8365
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x QNAN
8366
	short		fmul_res_qnan	- tbl_fmul_op # QNAN x DENORM
8367
	short		fmul_res_snan	- tbl_fmul_op # QNAN x SNAN
8368
	short		tbl_fmul_op	- tbl_fmul_op #
8369
	short		tbl_fmul_op	- tbl_fmul_op #
8370

8371
	short		fmul_norm	- tbl_fmul_op # NORM x NORM
8372
	short		fmul_zero	- tbl_fmul_op # NORM x ZERO
8373
	short		fmul_inf_src	- tbl_fmul_op # NORM x INF
8374
	short		fmul_res_qnan	- tbl_fmul_op # NORM x QNAN
8375
	short		fmul_norm	- tbl_fmul_op # NORM x DENORM
8376
	short		fmul_res_snan	- tbl_fmul_op # NORM x SNAN
8377
	short		tbl_fmul_op	- tbl_fmul_op #
8378
	short		tbl_fmul_op	- tbl_fmul_op #
8379

8380
	short		fmul_res_snan	- tbl_fmul_op # SNAN x NORM
8381
	short		fmul_res_snan	- tbl_fmul_op # SNAN x ZERO
8382
	short		fmul_res_snan	- tbl_fmul_op # SNAN x INF
8383
	short		fmul_res_snan	- tbl_fmul_op # SNAN x QNAN
8384
	short		fmul_res_snan	- tbl_fmul_op # SNAN x DENORM
8385
	short		fmul_res_snan	- tbl_fmul_op # SNAN x SNAN
8386
	short		tbl_fmul_op	- tbl_fmul_op #
8387
	short		tbl_fmul_op	- tbl_fmul_op #
8388

8389
fmul_res_operr:
8390
	bra.l		res_operr
8391
fmul_res_snan:
8392
	bra.l		res_snan
8393
fmul_res_qnan:
8394
	bra.l		res_qnan
8395

8396
#
8397
# Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398
#
8399
	global		fmul_zero		# global for fsglmul
8400
fmul_zero:
8401
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8402
	mov.b		DST_EX(%a1),%d1
8403
	eor.b		%d0,%d1
8404
	bpl.b		fmul_zero_p		# result ZERO is pos.
8405
fmul_zero_n:
8406
	fmov.s		&0x80000000,%fp0	# load -ZERO
8407
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408
	rts
8409
fmul_zero_p:
8410
	fmov.s		&0x00000000,%fp0	# load +ZERO
8411
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
8412
	rts
8413

8414
#
8415
# Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416
#
8417
# Note: The j-bit for an infinity is a don't-care. However, to be
8418
# strictly compatible w/ the 68881/882, we make sure to return an
8419
# INF w/ the j-bit set if the input INF j-bit was set. Destination
8420
# INFs take priority.
8421
#
8422
	global		fmul_inf_dst		# global for fsglmul
8423
fmul_inf_dst:
8424
	fmovm.x		DST(%a1),&0x80		# return INF result in fp0
8425
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8426
	mov.b		DST_EX(%a1),%d1
8427
	eor.b		%d0,%d1
8428
	bpl.b		fmul_inf_dst_p		# result INF is pos.
8429
fmul_inf_dst_n:
8430
	fabs.x		%fp0			# clear result sign
8431
	fneg.x		%fp0			# set result sign
8432
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433
	rts
8434
fmul_inf_dst_p:
8435
	fabs.x		%fp0			# clear result sign
8436
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
8437
	rts
8438

8439
	global		fmul_inf_src		# global for fsglmul
8440
fmul_inf_src:
8441
	fmovm.x		SRC(%a0),&0x80		# return INF result in fp0
8442
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
8443
	mov.b		DST_EX(%a1),%d1
8444
	eor.b		%d0,%d1
8445
	bpl.b		fmul_inf_dst_p		# result INF is pos.
8446
	bra.b		fmul_inf_dst_n
8447

8448
#########################################################################
8449
# XDEF ****************************************************************	#
8450
#	fin(): emulates the fmove instruction				#
8451
#	fsin(): emulates the fsmove instruction				#
8452
#	fdin(): emulates the fdmove instruction				#
8453
#									#
8454
# XREF ****************************************************************	#
8455
#	norm() - normalize mantissa for EXOP on denorm			#
8456
#	scale_to_zero_src() - scale src exponent to zero		#
8457
#	ovf_res() - return default overflow result			#
8458
#	unf_res() - return default underflow result			#
8459
#	res_qnan_1op() - return QNAN result				#
8460
#	res_snan_1op() - return SNAN result				#
8461
#									#
8462
# INPUT ***************************************************************	#
8463
#	a0 = pointer to extended precision source operand		#
8464
#	d0 = round prec/mode						#
8465
#									#
8466
# OUTPUT **************************************************************	#
8467
#	fp0 = result							#
8468
#	fp1 = EXOP (if exception occurred)				#
8469
#									#
8470
# ALGORITHM ***********************************************************	#
8471
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8472
# norms into extended, single, and double precision.			#
8473
#	Norms can be emulated w/ a regular fmove instruction. For	#
8474
# sgl/dbl, must scale exponent and perform an "fmove". Check to see	#
8475
# if the result would have overflowed/underflowed. If so, use unf_res()	#
8476
# or ovf_res() to return the default result. Also return EXOP if	#
8477
# exception is enabled. If no exception, return the default result.	#
8478
#	Unnorms don't pass through here.				#
8479
#									#
8480
#########################################################################
8481

8482
	global		fsin
8483
fsin:
8484
	andi.b		&0x30,%d0		# clear rnd prec
8485
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
8486
	bra.b		fin
8487

8488
	global		fdin
8489
fdin:
8490
	andi.b		&0x30,%d0		# clear rnd prec
8491
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
8492

8493
	global		fin
8494
fin:
8495
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8496

8497
	mov.b		STAG(%a6),%d1		# fetch src optype tag
8498
	bne.w		fin_not_norm		# optimize on non-norm input
8499

8500
#
8501
# FP MOVE IN: NORMs and DENORMs ONLY!
8502
#
8503
fin_norm:
8504
	andi.b		&0xc0,%d0		# is precision extended?
8505
	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8506

8507
#
8508
# precision selected is extended. so...we cannot get an underflow
8509
# or overflow because of rounding to the correct precision. so...
8510
# skip the scaling and unscaling...
8511
#
8512
	tst.b		SRC_EX(%a0)		# is the operand negative?
8513
	bpl.b		fin_norm_done		# no
8514
	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8515
fin_norm_done:
8516
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8517
	rts
8518

8519
#
8520
# for an extended precision DENORM, the UNFL exception bit is set
8521
# the accrued bit is NOT set in this instance(no inexactness!)
8522
#
8523
fin_denorm:
8524
	andi.b		&0xc0,%d0		# is precision extended?
8525
	bne.w		fin_not_ext		# no, so go handle dbl or sgl
8526

8527
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528
	tst.b		SRC_EX(%a0)		# is the operand negative?
8529
	bpl.b		fin_denorm_done		# no
8530
	bset		&neg_bit,FPSR_CC(%a6)	# yes, so set 'N' ccode bit
8531
fin_denorm_done:
8532
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
8533
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534
	bne.b		fin_denorm_unfl_ena	# yes
8535
	rts
8536

8537
#
8538
# the input is an extended DENORM and underflow is enabled in the FPCR.
8539
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540
# exponent and insert back into the operand.
8541
#
8542
fin_denorm_unfl_ena:
8543
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8544
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8545
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8546
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
8547
	bsr.l		norm			# normalize result
8548
	neg.w		%d0			# new exponent = -(shft val)
8549
	addi.w		&0x6000,%d0		# add new bias to exponent
8550
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
8551
	andi.w		&0x8000,%d1		# keep old sign
8552
	andi.w		&0x7fff,%d0		# clear sign position
8553
	or.w		%d1,%d0			# concat new exo,old sign
8554
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
8555
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8556
	rts
8557

8558
#
8559
# operand is to be rounded to single or double precision
8560
#
8561
fin_not_ext:
8562
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
8563
	bne.b		fin_dbl
8564

8565
#
8566
# operand is to be rounded to single precision
8567
#
8568
fin_sgl:
8569
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8570
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8571
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8572
	bsr.l		scale_to_zero_src	# calculate scale factor
8573

8574
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
8575
	bge.w		fin_sd_unfl		# yes; go handle underflow
8576
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
8577
	beq.w		fin_sd_may_ovfl		# maybe; go check
8578
	blt.w		fin_sd_ovfl		# yes; go handle overflow
8579

8580
#
8581
# operand will NOT overflow or underflow when moved into the fp reg file
8582
#
8583
fin_sd_normal:
8584
	fmov.l		&0x0,%fpsr		# clear FPSR
8585
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8586

8587
	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8588

8589
	fmov.l		%fpsr,%d1		# save FPSR
8590
	fmov.l		&0x0,%fpcr		# clear FPCR
8591

8592
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8593

8594
fin_sd_normal_exit:
8595
	mov.l		%d2,-(%sp)		# save d2
8596
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8597
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8598
	mov.w		%d1,%d2			# make a copy
8599
	andi.l		&0x7fff,%d1		# strip sign
8600
	sub.l		%d0,%d1			# add scale factor
8601
	andi.w		&0x8000,%d2		# keep old sign
8602
	or.w		%d1,%d2			# concat old sign,new exponent
8603
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
8604
	mov.l		(%sp)+,%d2		# restore d2
8605
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8606
	rts
8607

8608
#
8609
# operand is to be rounded to double precision
8610
#
8611
fin_dbl:
8612
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8613
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8614
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8615
	bsr.l		scale_to_zero_src	# calculate scale factor
8616

8617
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
8618
	bge.w		fin_sd_unfl		# yes; go handle underflow
8619
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
8620
	beq.w		fin_sd_may_ovfl		# maybe; go check
8621
	blt.w		fin_sd_ovfl		# yes; go handle overflow
8622
	bra.w		fin_sd_normal		# no; ho handle normalized op
8623

8624
#
8625
# operand WILL underflow when moved in to the fp register file
8626
#
8627
fin_sd_unfl:
8628
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629

8630
	tst.b		FP_SCR0_EX(%a6)		# is operand negative?
8631
	bpl.b		fin_sd_unfl_tst
8632
	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
8633

8634
# if underflow or inexact is enabled, then go calculate the EXOP first.
8635
fin_sd_unfl_tst:
8636
	mov.b		FPCR_ENABLE(%a6),%d1
8637
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8638
	bne.b		fin_sd_unfl_ena		# yes
8639

8640
fin_sd_unfl_dis:
8641
	lea		FP_SCR0(%a6),%a0	# pass: result addr
8642
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
8643
	bsr.l		unf_res			# calculate default result
8644
	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
8645
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
8646
	rts
8647

8648
#
8649
# operand will underflow AND underflow or inexact is enabled.
8650
# Therefore, we must return the result rounded to extended precision.
8651
#
8652
fin_sd_unfl_ena:
8653
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
8656

8657
	mov.l		%d2,-(%sp)		# save d2
8658
	mov.w		%d1,%d2			# make a copy
8659
	andi.l		&0x7fff,%d1		# strip sign
8660
	sub.l		%d0,%d1			# subtract scale factor
8661
	andi.w		&0x8000,%d2		# extract old sign
8662
	addi.l		&0x6000,%d1		# add new bias
8663
	andi.w		&0x7fff,%d1
8664
	or.w		%d1,%d2			# concat old sign,new exp
8665
	mov.w		%d2,FP_SCR1_EX(%a6)	# insert new exponent
8666
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
8667
	mov.l		(%sp)+,%d2		# restore d2
8668
	bra.b		fin_sd_unfl_dis
8669

8670
#
8671
# operand WILL overflow.
8672
#
8673
fin_sd_ovfl:
8674
	fmov.l		&0x0,%fpsr		# clear FPSR
8675
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8676

8677
	fmov.x		FP_SCR0(%a6),%fp0	# perform move
8678

8679
	fmov.l		&0x0,%fpcr		# clear FPCR
8680
	fmov.l		%fpsr,%d1		# save FPSR
8681

8682
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8683

8684
fin_sd_ovfl_tst:
8685
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686

8687
	mov.b		FPCR_ENABLE(%a6),%d1
8688
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8689
	bne.b		fin_sd_ovfl_ena		# yes
8690

8691
#
8692
# OVFL is not enabled; therefore, we must create the default result by
8693
# calling ovf_res().
8694
#
8695
fin_sd_ovfl_dis:
8696
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8697
	sne		%d1			# set sign param accordingly
8698
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
8699
	bsr.l		ovf_res			# calculate default result
8700
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
8701
	fmovm.x		(%a0),&0x80		# return default result in fp0
8702
	rts
8703

8704
#
8705
# OVFL is enabled.
8706
# the INEX2 bit has already been updated by the round to the correct precision.
8707
# now, round to extended(and don't alter the FPSR).
8708
#
8709
fin_sd_ovfl_ena:
8710
	mov.l		%d2,-(%sp)		# save d2
8711
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8712
	mov.l		%d1,%d2			# make a copy
8713
	andi.l		&0x7fff,%d1		# strip sign
8714
	andi.w		&0x8000,%d2		# keep old sign
8715
	sub.l		%d0,%d1			# add scale factor
8716
	sub.l		&0x6000,%d1		# subtract bias
8717
	andi.w		&0x7fff,%d1
8718
	or.w		%d2,%d1
8719
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8720
	mov.l		(%sp)+,%d2		# restore d2
8721
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8722
	bra.b		fin_sd_ovfl_dis
8723

8724
#
8725
# the move in MAY overflow. so...
8726
#
8727
fin_sd_may_ovfl:
8728
	fmov.l		&0x0,%fpsr		# clear FPSR
8729
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8730

8731
	fmov.x		FP_SCR0(%a6),%fp0	# perform the move
8732

8733
	fmov.l		%fpsr,%d1		# save status
8734
	fmov.l		&0x0,%fpcr		# clear FPCR
8735

8736
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8737

8738
	fabs.x		%fp0,%fp1		# make a copy of result
8739
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
8740
	fbge.w		fin_sd_ovfl_tst		# yes; overflow has occurred
8741

8742
# no, it didn't overflow; we have correct result
8743
	bra.w		fin_sd_normal_exit
8744

8745
##########################################################################
8746

8747
#
8748
# operand is not a NORM: check its optype and branch accordingly
8749
#
8750
fin_not_norm:
8751
	cmpi.b		%d1,&DENORM		# weed out DENORM
8752
	beq.w		fin_denorm
8753
	cmpi.b		%d1,&SNAN		# weed out SNANs
8754
	beq.l		res_snan_1op
8755
	cmpi.b		%d1,&QNAN		# weed out QNANs
8756
	beq.l		res_qnan_1op
8757

8758
#
8759
# do the fmove in; at this point, only possible ops are ZERO and INF.
8760
# use fmov to determine ccodes.
8761
# prec:mode should be zero at this point but it won't affect answer anyways.
8762
#
8763
	fmov.x		SRC(%a0),%fp0		# do fmove in
8764
	fmov.l		%fpsr,%d0		# no exceptions possible
8765
	rol.l		&0x8,%d0		# put ccodes in lo byte
8766
	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
8767
	rts
8768

8769
#########################################################################
8770
# XDEF ****************************************************************	#
8771
#	fdiv(): emulates the fdiv instruction				#
8772
#	fsdiv(): emulates the fsdiv instruction				#
8773
#	fddiv(): emulates the fddiv instruction				#
8774
#									#
8775
# XREF ****************************************************************	#
8776
#	scale_to_zero_src() - scale src exponent to zero		#
8777
#	scale_to_zero_dst() - scale dst exponent to zero		#
8778
#	unf_res() - return default underflow result			#
8779
#	ovf_res() - return default overflow result			#
8780
#	res_qnan() - return QNAN result					#
8781
#	res_snan() - return SNAN result					#
8782
#									#
8783
# INPUT ***************************************************************	#
8784
#	a0 = pointer to extended precision source operand		#
8785
#	a1 = pointer to extended precision destination operand		#
8786
#	d0  rnd prec,mode						#
8787
#									#
8788
# OUTPUT **************************************************************	#
8789
#	fp0 = result							#
8790
#	fp1 = EXOP (if exception occurred)				#
8791
#									#
8792
# ALGORITHM ***********************************************************	#
8793
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
8794
# norms/denorms into ext/sgl/dbl precision.				#
8795
#	For norms/denorms, scale the exponents such that a divide	#
8796
# instruction won't cause an exception. Use the regular fdiv to		#
8797
# compute a result. Check if the regular operands would have taken	#
8798
# an exception. If so, return the default overflow/underflow result	#
8799
# and return the EXOP if exceptions are enabled. Else, scale the	#
8800
# result operand to the proper exponent.				#
8801
#									#
8802
#########################################################################
8803

8804
	align		0x10
8805
tbl_fdiv_unfl:
8806
	long		0x3fff - 0x0000		# ext_unfl
8807
	long		0x3fff - 0x3f81		# sgl_unfl
8808
	long		0x3fff - 0x3c01		# dbl_unfl
8809

8810
tbl_fdiv_ovfl:
8811
	long		0x3fff - 0x7ffe		# ext overflow exponent
8812
	long		0x3fff - 0x407e		# sgl overflow exponent
8813
	long		0x3fff - 0x43fe		# dbl overflow exponent
8814

8815
	global		fsdiv
8816
fsdiv:
8817
	andi.b		&0x30,%d0		# clear rnd prec
8818
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
8819
	bra.b		fdiv
8820

8821
	global		fddiv
8822
fddiv:
8823
	andi.b		&0x30,%d0		# clear rnd prec
8824
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
8825

8826
	global		fdiv
8827
fdiv:
8828
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
8829

8830
	clr.w		%d1
8831
	mov.b		DTAG(%a6),%d1
8832
	lsl.b		&0x3,%d1
8833
	or.b		STAG(%a6),%d1		# combine src tags
8834

8835
	bne.w		fdiv_not_norm		# optimize on non-norm input
8836

8837
#
8838
# DIVIDE: NORMs and DENORMs ONLY!
8839
#
8840
fdiv_norm:
8841
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
8842
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
8843
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
8844

8845
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
8846
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
8847
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
8848

8849
	bsr.l		scale_to_zero_src	# scale src exponent
8850
	mov.l		%d0,-(%sp)		# save scale factor 1
8851

8852
	bsr.l		scale_to_zero_dst	# scale dst exponent
8853

8854
	neg.l		(%sp)			# SCALE FACTOR = scale1 - scale2
8855
	add.l		%d0,(%sp)
8856

8857
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision
8858
	lsr.b		&0x6,%d1		# shift to lo bits
8859
	mov.l		(%sp)+,%d0		# load S.F.
8860
	cmp.l		%d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861
	ble.w		fdiv_may_ovfl		# result will overflow
8862

8863
	cmp.l		%d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864
	beq.w		fdiv_may_unfl		# maybe
8865
	bgt.w		fdiv_unfl		# yes; go handle underflow
8866

8867
fdiv_normal:
8868
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8869

8870
	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
8871
	fmov.l		&0x0,%fpsr		# clear FPSR
8872

8873
	fdiv.x		FP_SCR0(%a6),%fp0	# perform divide
8874

8875
	fmov.l		%fpsr,%d1		# save FPSR
8876
	fmov.l		&0x0,%fpcr		# clear FPCR
8877

8878
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8879

8880
fdiv_normal_exit:
8881
	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
8882
	mov.l		%d2,-(%sp)		# store d2
8883
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
8884
	mov.l		%d1,%d2			# make a copy
8885
	andi.l		&0x7fff,%d1		# strip sign
8886
	andi.w		&0x8000,%d2		# keep old sign
8887
	sub.l		%d0,%d1			# add scale factor
8888
	or.w		%d2,%d1			# concat old sign,new exp
8889
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8890
	mov.l		(%sp)+,%d2		# restore d2
8891
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
8892
	rts
8893

8894
tbl_fdiv_ovfl2:
8895
	long		0x7fff
8896
	long		0x407f
8897
	long		0x43ff
8898

8899
fdiv_no_ovfl:
8900
	mov.l		(%sp)+,%d0		# restore scale factor
8901
	bra.b		fdiv_normal_exit
8902

8903
fdiv_may_ovfl:
8904
	mov.l		%d0,-(%sp)		# save scale factor
8905

8906
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8907

8908
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
8909
	fmov.l		&0x0,%fpsr		# set FPSR
8910

8911
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8912

8913
	fmov.l		%fpsr,%d0
8914
	fmov.l		&0x0,%fpcr
8915

8916
	or.l		%d0,USER_FPSR(%a6)	# save INEX,N
8917

8918
	fmovm.x		&0x01,-(%sp)		# save result to stack
8919
	mov.w		(%sp),%d0		# fetch new exponent
8920
	add.l		&0xc,%sp		# clear result from stack
8921
	andi.l		&0x7fff,%d0		# strip sign
8922
	sub.l		(%sp),%d0		# add scale factor
8923
	cmp.l		%d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924
	blt.b		fdiv_no_ovfl
8925
	mov.l		(%sp)+,%d0
8926

8927
fdiv_ovfl_tst:
8928
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929

8930
	mov.b		FPCR_ENABLE(%a6),%d1
8931
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
8932
	bne.b		fdiv_ovfl_ena		# yes
8933

8934
fdiv_ovfl_dis:
8935
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
8936
	sne		%d1			# set sign param accordingly
8937
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
8938
	bsr.l		ovf_res			# calculate default result
8939
	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
8940
	fmovm.x		(%a0),&0x80		# return default result in fp0
8941
	rts
8942

8943
fdiv_ovfl_ena:
8944
	mov.l		L_SCR3(%a6),%d1
8945
	andi.b		&0xc0,%d1		# is precision extended?
8946
	bne.b		fdiv_ovfl_ena_sd	# no, do sgl or dbl
8947

8948
fdiv_ovfl_ena_cont:
8949
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
8950

8951
	mov.l		%d2,-(%sp)		# save d2
8952
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
8953
	mov.w		%d1,%d2			# make a copy
8954
	andi.l		&0x7fff,%d1		# strip sign
8955
	sub.l		%d0,%d1			# add scale factor
8956
	subi.l		&0x6000,%d1		# subtract bias
8957
	andi.w		&0x7fff,%d1		# clear sign bit
8958
	andi.w		&0x8000,%d2		# keep old sign
8959
	or.w		%d2,%d1			# concat old sign,new exp
8960
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
8961
	mov.l		(%sp)+,%d2		# restore d2
8962
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
8963
	bra.b		fdiv_ovfl_dis
8964

8965
fdiv_ovfl_ena_sd:
8966
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst operand
8967

8968
	mov.l		L_SCR3(%a6),%d1
8969
	andi.b		&0x30,%d1		# keep rnd mode
8970
	fmov.l		%d1,%fpcr		# set FPCR
8971

8972
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8973

8974
	fmov.l		&0x0,%fpcr		# clear FPCR
8975
	bra.b		fdiv_ovfl_ena_cont
8976

8977
fdiv_unfl:
8978
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979

8980
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
8981

8982
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
8983
	fmov.l		&0x0,%fpsr		# clear FPSR
8984

8985
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
8986

8987
	fmov.l		%fpsr,%d1		# save status
8988
	fmov.l		&0x0,%fpcr		# clear FPCR
8989

8990
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
8991

8992
	mov.b		FPCR_ENABLE(%a6),%d1
8993
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
8994
	bne.b		fdiv_unfl_ena		# yes
8995

8996
fdiv_unfl_dis:
8997
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
8998

8999
	lea		FP_SCR0(%a6),%a0	# pass: result addr
9000
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9001
	bsr.l		unf_res			# calculate default result
9002
	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
9003
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9004
	rts
9005

9006
#
9007
# UNFL is enabled.
9008
#
9009
fdiv_unfl_ena:
9010
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
9011

9012
	mov.l		L_SCR3(%a6),%d1
9013
	andi.b		&0xc0,%d1		# is precision extended?
9014
	bne.b		fdiv_unfl_ena_sd	# no, sgl or dbl
9015

9016
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9017

9018
fdiv_unfl_ena_cont:
9019
	fmov.l		&0x0,%fpsr		# clear FPSR
9020

9021
	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9022

9023
	fmov.l		&0x0,%fpcr		# clear FPCR
9024

9025
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
9026
	mov.l		%d2,-(%sp)		# save d2
9027
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9028
	mov.l		%d1,%d2			# make a copy
9029
	andi.l		&0x7fff,%d1		# strip sign
9030
	andi.w		&0x8000,%d2		# keep old sign
9031
	sub.l		%d0,%d1			# add scale factoer
9032
	addi.l		&0x6000,%d1		# add bias
9033
	andi.w		&0x7fff,%d1
9034
	or.w		%d2,%d1			# concat old sign,new exp
9035
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exp
9036
	mov.l		(%sp)+,%d2		# restore d2
9037
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9038
	bra.w		fdiv_unfl_dis
9039

9040
fdiv_unfl_ena_sd:
9041
	mov.l		L_SCR3(%a6),%d1
9042
	andi.b		&0x30,%d1		# use only rnd mode
9043
	fmov.l		%d1,%fpcr		# set FPCR
9044

9045
	bra.b		fdiv_unfl_ena_cont
9046

9047
#
9048
# the divide operation MAY underflow:
9049
#
9050
fdiv_may_unfl:
9051
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
9052

9053
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9054
	fmov.l		&0x0,%fpsr		# clear FPSR
9055

9056
	fdiv.x		FP_SCR0(%a6),%fp0	# execute divide
9057

9058
	fmov.l		%fpsr,%d1		# save status
9059
	fmov.l		&0x0,%fpcr		# clear FPCR
9060

9061
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9062

9063
	fabs.x		%fp0,%fp1		# make a copy of result
9064
	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
9065
	fbgt.w		fdiv_normal_exit	# no; no underflow occurred
9066
	fblt.w		fdiv_unfl		# yes; underflow occurred
9067

9068
#
9069
# we still don't know if underflow occurred. result is ~ equal to 1. but,
9070
# we don't know if the result was an underflow that rounded up to a 1
9071
# or a normalized number that rounded down to a 1. so, redo the entire
9072
# operation using RZ as the rounding mode to see what the pre-rounded
9073
# result is. this case should be relatively rare.
9074
#
9075
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
9076

9077
	mov.l		L_SCR3(%a6),%d1
9078
	andi.b		&0xc0,%d1		# keep rnd prec
9079
	ori.b		&rz_mode*0x10,%d1	# insert RZ
9080

9081
	fmov.l		%d1,%fpcr		# set FPCR
9082
	fmov.l		&0x0,%fpsr		# clear FPSR
9083

9084
	fdiv.x		FP_SCR0(%a6),%fp1	# execute divide
9085

9086
	fmov.l		&0x0,%fpcr		# clear FPCR
9087
	fabs.x		%fp1			# make absolute value
9088
	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
9089
	fbge.w		fdiv_normal_exit	# no; no underflow occurred
9090
	bra.w		fdiv_unfl		# yes; underflow occurred
9091

9092
############################################################################
9093

9094
#
9095
# Divide: inputs are not both normalized; what are they?
9096
#
9097
fdiv_not_norm:
9098
	mov.w		(tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099
	jmp		(tbl_fdiv_op.b,%pc,%d1.w*1)
9100

9101
	swbeg		&48
9102
tbl_fdiv_op:
9103
	short		fdiv_norm	- tbl_fdiv_op # NORM / NORM
9104
	short		fdiv_inf_load	- tbl_fdiv_op # NORM / ZERO
9105
	short		fdiv_zero_load	- tbl_fdiv_op # NORM / INF
9106
	short		fdiv_res_qnan	- tbl_fdiv_op # NORM / QNAN
9107
	short		fdiv_norm	- tbl_fdiv_op # NORM / DENORM
9108
	short		fdiv_res_snan	- tbl_fdiv_op # NORM / SNAN
9109
	short		tbl_fdiv_op	- tbl_fdiv_op #
9110
	short		tbl_fdiv_op	- tbl_fdiv_op #
9111

9112
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / NORM
9113
	short		fdiv_res_operr	- tbl_fdiv_op # ZERO / ZERO
9114
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / INF
9115
	short		fdiv_res_qnan	- tbl_fdiv_op # ZERO / QNAN
9116
	short		fdiv_zero_load	- tbl_fdiv_op # ZERO / DENORM
9117
	short		fdiv_res_snan	- tbl_fdiv_op # ZERO / SNAN
9118
	short		tbl_fdiv_op	- tbl_fdiv_op #
9119
	short		tbl_fdiv_op	- tbl_fdiv_op #
9120

9121
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / NORM
9122
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / ZERO
9123
	short		fdiv_res_operr	- tbl_fdiv_op # INF / INF
9124
	short		fdiv_res_qnan	- tbl_fdiv_op # INF / QNAN
9125
	short		fdiv_inf_dst	- tbl_fdiv_op # INF / DENORM
9126
	short		fdiv_res_snan	- tbl_fdiv_op # INF / SNAN
9127
	short		tbl_fdiv_op	- tbl_fdiv_op #
9128
	short		tbl_fdiv_op	- tbl_fdiv_op #
9129

9130
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / NORM
9131
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / ZERO
9132
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / INF
9133
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / QNAN
9134
	short		fdiv_res_qnan	- tbl_fdiv_op # QNAN / DENORM
9135
	short		fdiv_res_snan	- tbl_fdiv_op # QNAN / SNAN
9136
	short		tbl_fdiv_op	- tbl_fdiv_op #
9137
	short		tbl_fdiv_op	- tbl_fdiv_op #
9138

9139
	short		fdiv_norm	- tbl_fdiv_op # DENORM / NORM
9140
	short		fdiv_inf_load	- tbl_fdiv_op # DENORM / ZERO
9141
	short		fdiv_zero_load	- tbl_fdiv_op # DENORM / INF
9142
	short		fdiv_res_qnan	- tbl_fdiv_op # DENORM / QNAN
9143
	short		fdiv_norm	- tbl_fdiv_op # DENORM / DENORM
9144
	short		fdiv_res_snan	- tbl_fdiv_op # DENORM / SNAN
9145
	short		tbl_fdiv_op	- tbl_fdiv_op #
9146
	short		tbl_fdiv_op	- tbl_fdiv_op #
9147

9148
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / NORM
9149
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / ZERO
9150
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / INF
9151
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / QNAN
9152
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / DENORM
9153
	short		fdiv_res_snan	- tbl_fdiv_op # SNAN / SNAN
9154
	short		tbl_fdiv_op	- tbl_fdiv_op #
9155
	short		tbl_fdiv_op	- tbl_fdiv_op #
9156

9157
fdiv_res_qnan:
9158
	bra.l		res_qnan
9159
fdiv_res_snan:
9160
	bra.l		res_snan
9161
fdiv_res_operr:
9162
	bra.l		res_operr
9163

9164
	global		fdiv_zero_load		# global for fsgldiv
9165
fdiv_zero_load:
9166
	mov.b		SRC_EX(%a0),%d0		# result sign is exclusive
9167
	mov.b		DST_EX(%a1),%d1		# or of input signs.
9168
	eor.b		%d0,%d1
9169
	bpl.b		fdiv_zero_load_p	# result is positive
9170
	fmov.s		&0x80000000,%fp0	# load a -ZERO
9171
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/N
9172
	rts
9173
fdiv_zero_load_p:
9174
	fmov.s		&0x00000000,%fp0	# load a +ZERO
9175
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
9176
	rts
9177

9178
#
9179
# The destination was In Range and the source was a ZERO. The result,
9180
# Therefore, is an INF w/ the proper sign.
9181
# So, determine the sign and return a new INF (w/ the j-bit cleared).
9182
#
9183
	global		fdiv_inf_load		# global for fsgldiv
9184
fdiv_inf_load:
9185
	ori.w		&dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186
	mov.b		SRC_EX(%a0),%d0		# load both signs
9187
	mov.b		DST_EX(%a1),%d1
9188
	eor.b		%d0,%d1
9189
	bpl.b		fdiv_inf_load_p		# result is positive
9190
	fmov.s		&0xff800000,%fp0	# make result -INF
9191
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192
	rts
9193
fdiv_inf_load_p:
9194
	fmov.s		&0x7f800000,%fp0	# make result +INF
9195
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
9196
	rts
9197

9198
#
9199
# The destination was an INF w/ an In Range or ZERO source, the result is
9200
# an INF w/ the proper sign.
9201
# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202
# dst INF is set, then then j-bit of the result INF is also set).
9203
#
9204
	global		fdiv_inf_dst		# global for fsgldiv
9205
fdiv_inf_dst:
9206
	mov.b		DST_EX(%a1),%d0		# load both signs
9207
	mov.b		SRC_EX(%a0),%d1
9208
	eor.b		%d0,%d1
9209
	bpl.b		fdiv_inf_dst_p		# result is positive
9210

9211
	fmovm.x		DST(%a1),&0x80		# return result in fp0
9212
	fabs.x		%fp0			# clear sign bit
9213
	fneg.x		%fp0			# set sign bit
9214
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215
	rts
9216

9217
fdiv_inf_dst_p:
9218
	fmovm.x		DST(%a1),&0x80		# return result in fp0
9219
	fabs.x		%fp0			# return positive INF
9220
	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
9221
	rts
9222

9223
#########################################################################
9224
# XDEF ****************************************************************	#
9225
#	fneg(): emulates the fneg instruction				#
9226
#	fsneg(): emulates the fsneg instruction				#
9227
#	fdneg(): emulates the fdneg instruction				#
9228
#									#
9229
# XREF ****************************************************************	#
9230
#	norm() - normalize a denorm to provide EXOP			#
9231
#	scale_to_zero_src() - scale sgl/dbl source exponent		#
9232
#	ovf_res() - return default overflow result			#
9233
#	unf_res() - return default underflow result			#
9234
#	res_qnan_1op() - return QNAN result				#
9235
#	res_snan_1op() - return SNAN result				#
9236
#									#
9237
# INPUT ***************************************************************	#
9238
#	a0 = pointer to extended precision source operand		#
9239
#	d0 = rnd prec,mode						#
9240
#									#
9241
# OUTPUT **************************************************************	#
9242
#	fp0 = result							#
9243
#	fp1 = EXOP (if exception occurred)				#
9244
#									#
9245
# ALGORITHM ***********************************************************	#
9246
#	Handle NANs, zeroes, and infinities as special cases. Separate	#
9247
# norms/denorms into ext/sgl/dbl precisions. Extended precision can be	#
9248
# emulated by simply setting sign bit. Sgl/dbl operands must be scaled	#
9249
# and an actual fneg performed to see if overflow/underflow would have	#
9250
# occurred. If so, return default underflow/overflow result. Else,	#
9251
# scale the result exponent and return result. FPSR gets set based on	#
9252
# the result value.							#
9253
#									#
9254
#########################################################################
9255

9256
	global		fsneg
9257
fsneg:
9258
	andi.b		&0x30,%d0		# clear rnd prec
9259
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9260
	bra.b		fneg
9261

9262
	global		fdneg
9263
fdneg:
9264
	andi.b		&0x30,%d0		# clear rnd prec
9265
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
9266

9267
	global		fneg
9268
fneg:
9269
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9270
	mov.b		STAG(%a6),%d1
9271
	bne.w		fneg_not_norm		# optimize on non-norm input
9272

9273
#
9274
# NEGATE SIGN : norms and denorms ONLY!
9275
#
9276
fneg_norm:
9277
	andi.b		&0xc0,%d0		# is precision extended?
9278
	bne.w		fneg_not_ext		# no; go handle sgl or dbl
9279

9280
#
9281
# precision selected is extended. so...we can not get an underflow
9282
# or overflow because of rounding to the correct precision. so...
9283
# skip the scaling and unscaling...
9284
#
9285
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9286
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9287
	mov.w		SRC_EX(%a0),%d0
9288
	eori.w		&0x8000,%d0		# negate sign
9289
	bpl.b		fneg_norm_load		# sign is positive
9290
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9291
fneg_norm_load:
9292
	mov.w		%d0,FP_SCR0_EX(%a6)
9293
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9294
	rts
9295

9296
#
9297
# for an extended precision DENORM, the UNFL exception bit is set
9298
# the accrued bit is NOT set in this instance(no inexactness!)
9299
#
9300
fneg_denorm:
9301
	andi.b		&0xc0,%d0		# is precision extended?
9302
	bne.b		fneg_not_ext		# no; go handle sgl or dbl
9303

9304
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305

9306
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9307
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9308
	mov.w		SRC_EX(%a0),%d0
9309
	eori.w		&0x8000,%d0		# negate sign
9310
	bpl.b		fneg_denorm_done	# no
9311
	mov.b		&neg_bmask,FPSR_CC(%a6)	# yes, set 'N' ccode bit
9312
fneg_denorm_done:
9313
	mov.w		%d0,FP_SCR0_EX(%a6)
9314
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9315

9316
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317
	bne.b		fneg_ext_unfl_ena	# yes
9318
	rts
9319

9320
#
9321
# the input is an extended DENORM and underflow is enabled in the FPCR.
9322
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323
# exponent and insert back into the operand.
9324
#
9325
fneg_ext_unfl_ena:
9326
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9327
	bsr.l		norm			# normalize result
9328
	neg.w		%d0			# new exponent = -(shft val)
9329
	addi.w		&0x6000,%d0		# add new bias to exponent
9330
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9331
	andi.w		&0x8000,%d1		# keep old sign
9332
	andi.w		&0x7fff,%d0		# clear sign position
9333
	or.w		%d1,%d0			# concat old sign, new exponent
9334
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9335
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9336
	rts
9337

9338
#
9339
# operand is either single or double
9340
#
9341
fneg_not_ext:
9342
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9343
	bne.b		fneg_dbl
9344

9345
#
9346
# operand is to be rounded to single precision
9347
#
9348
fneg_sgl:
9349
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9350
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9351
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9352
	bsr.l		scale_to_zero_src	# calculate scale factor
9353

9354
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9355
	bge.w		fneg_sd_unfl		# yes; go handle underflow
9356
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9357
	beq.w		fneg_sd_may_ovfl	# maybe; go check
9358
	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9359

9360
#
9361
# operand will NOT overflow or underflow when moved in to the fp reg file
9362
#
9363
fneg_sd_normal:
9364
	fmov.l		&0x0,%fpsr		# clear FPSR
9365
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9366

9367
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9368

9369
	fmov.l		%fpsr,%d1		# save FPSR
9370
	fmov.l		&0x0,%fpcr		# clear FPCR
9371

9372
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9373

9374
fneg_sd_normal_exit:
9375
	mov.l		%d2,-(%sp)		# save d2
9376
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9377
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9378
	mov.w		%d1,%d2			# make a copy
9379
	andi.l		&0x7fff,%d1		# strip sign
9380
	sub.l		%d0,%d1			# add scale factor
9381
	andi.w		&0x8000,%d2		# keep old sign
9382
	or.w		%d1,%d2			# concat old sign,new exp
9383
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
9384
	mov.l		(%sp)+,%d2		# restore d2
9385
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9386
	rts
9387

9388
#
9389
# operand is to be rounded to double precision
9390
#
9391
fneg_dbl:
9392
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9393
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9394
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9395
	bsr.l		scale_to_zero_src	# calculate scale factor
9396

9397
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
9398
	bge.b		fneg_sd_unfl		# yes; go handle underflow
9399
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
9400
	beq.w		fneg_sd_may_ovfl	# maybe; go check
9401
	blt.w		fneg_sd_ovfl		# yes; go handle overflow
9402
	bra.w		fneg_sd_normal		# no; ho handle normalized op
9403

9404
#
9405
# operand WILL underflow when moved in to the fp register file
9406
#
9407
fneg_sd_unfl:
9408
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409

9410
	eori.b		&0x80,FP_SCR0_EX(%a6)	# negate sign
9411
	bpl.b		fneg_sd_unfl_tst
9412
	bset		&neg_bit,FPSR_CC(%a6)	# set 'N' ccode bit
9413

9414
# if underflow or inexact is enabled, go calculate EXOP first.
9415
fneg_sd_unfl_tst:
9416
	mov.b		FPCR_ENABLE(%a6),%d1
9417
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
9418
	bne.b		fneg_sd_unfl_ena	# yes
9419

9420
fneg_sd_unfl_dis:
9421
	lea		FP_SCR0(%a6),%a0	# pass: result addr
9422
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
9423
	bsr.l		unf_res			# calculate default result
9424
	or.b		%d0,FPSR_CC(%a6)	# unf_res may have set 'Z'
9425
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9426
	rts
9427

9428
#
9429
# operand will underflow AND underflow is enabled.
9430
# Therefore, we must return the result rounded to extended precision.
9431
#
9432
fneg_sd_unfl_ena:
9433
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
9436

9437
	mov.l		%d2,-(%sp)		# save d2
9438
	mov.l		%d1,%d2			# make a copy
9439
	andi.l		&0x7fff,%d1		# strip sign
9440
	andi.w		&0x8000,%d2		# keep old sign
9441
	sub.l		%d0,%d1			# subtract scale factor
9442
	addi.l		&0x6000,%d1		# add new bias
9443
	andi.w		&0x7fff,%d1
9444
	or.w		%d2,%d1			# concat new sign,new exp
9445
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
9446
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
9447
	mov.l		(%sp)+,%d2		# restore d2
9448
	bra.b		fneg_sd_unfl_dis
9449

9450
#
9451
# operand WILL overflow.
9452
#
9453
fneg_sd_ovfl:
9454
	fmov.l		&0x0,%fpsr		# clear FPSR
9455
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9456

9457
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9458

9459
	fmov.l		&0x0,%fpcr		# clear FPCR
9460
	fmov.l		%fpsr,%d1		# save FPSR
9461

9462
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9463

9464
fneg_sd_ovfl_tst:
9465
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466

9467
	mov.b		FPCR_ENABLE(%a6),%d1
9468
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
9469
	bne.b		fneg_sd_ovfl_ena	# yes
9470

9471
#
9472
# OVFL is not enabled; therefore, we must create the default result by
9473
# calling ovf_res().
9474
#
9475
fneg_sd_ovfl_dis:
9476
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
9477
	sne		%d1			# set sign param accordingly
9478
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
9479
	bsr.l		ovf_res			# calculate default result
9480
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
9481
	fmovm.x		(%a0),&0x80		# return default result in fp0
9482
	rts
9483

9484
#
9485
# OVFL is enabled.
9486
# the INEX2 bit has already been updated by the round to the correct precision.
9487
# now, round to extended(and don't alter the FPSR).
9488
#
9489
fneg_sd_ovfl_ena:
9490
	mov.l		%d2,-(%sp)		# save d2
9491
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
9492
	mov.l		%d1,%d2			# make a copy
9493
	andi.l		&0x7fff,%d1		# strip sign
9494
	andi.w		&0x8000,%d2		# keep old sign
9495
	sub.l		%d0,%d1			# add scale factor
9496
	subi.l		&0x6000,%d1		# subtract bias
9497
	andi.w		&0x7fff,%d1
9498
	or.w		%d2,%d1			# concat sign,exp
9499
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
9500
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9501
	mov.l		(%sp)+,%d2		# restore d2
9502
	bra.b		fneg_sd_ovfl_dis
9503

9504
#
9505
# the move in MAY underflow. so...
9506
#
9507
fneg_sd_may_ovfl:
9508
	fmov.l		&0x0,%fpsr		# clear FPSR
9509
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9510

9511
	fneg.x		FP_SCR0(%a6),%fp0	# perform negation
9512

9513
	fmov.l		%fpsr,%d1		# save status
9514
	fmov.l		&0x0,%fpcr		# clear FPCR
9515

9516
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9517

9518
	fabs.x		%fp0,%fp1		# make a copy of result
9519
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
9520
	fbge.w		fneg_sd_ovfl_tst	# yes; overflow has occurred
9521

9522
# no, it didn't overflow; we have correct result
9523
	bra.w		fneg_sd_normal_exit
9524

9525
##########################################################################
9526

9527
#
9528
# input is not normalized; what is it?
9529
#
9530
fneg_not_norm:
9531
	cmpi.b		%d1,&DENORM		# weed out DENORM
9532
	beq.w		fneg_denorm
9533
	cmpi.b		%d1,&SNAN		# weed out SNAN
9534
	beq.l		res_snan_1op
9535
	cmpi.b		%d1,&QNAN		# weed out QNAN
9536
	beq.l		res_qnan_1op
9537

9538
#
9539
# do the fneg; at this point, only possible ops are ZERO and INF.
9540
# use fneg to determine ccodes.
9541
# prec:mode should be zero at this point but it won't affect answer anyways.
9542
#
9543
	fneg.x		SRC_EX(%a0),%fp0	# do fneg
9544
	fmov.l		%fpsr,%d0
9545
	rol.l		&0x8,%d0		# put ccodes in lo byte
9546
	mov.b		%d0,FPSR_CC(%a6)	# insert correct ccodes
9547
	rts
9548

9549
#########################################################################
9550
# XDEF ****************************************************************	#
9551
#	ftst(): emulates the ftest instruction				#
9552
#									#
9553
# XREF ****************************************************************	#
9554
#	res{s,q}nan_1op() - set NAN result for monadic instruction	#
9555
#									#
9556
# INPUT ***************************************************************	#
9557
#	a0 = pointer to extended precision source operand		#
9558
#									#
9559
# OUTPUT **************************************************************	#
9560
#	none								#
9561
#									#
9562
# ALGORITHM ***********************************************************	#
9563
#	Check the source operand tag (STAG) and set the FPCR according	#
9564
# to the operand type and sign.						#
9565
#									#
9566
#########################################################################
9567

9568
	global		ftst
9569
ftst:
9570
	mov.b		STAG(%a6),%d1
9571
	bne.b		ftst_not_norm		# optimize on non-norm input
9572

9573
#
9574
# Norm:
9575
#
9576
ftst_norm:
9577
	tst.b		SRC_EX(%a0)		# is operand negative?
9578
	bmi.b		ftst_norm_m		# yes
9579
	rts
9580
ftst_norm_m:
9581
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9582
	rts
9583

9584
#
9585
# input is not normalized; what is it?
9586
#
9587
ftst_not_norm:
9588
	cmpi.b		%d1,&ZERO		# weed out ZERO
9589
	beq.b		ftst_zero
9590
	cmpi.b		%d1,&INF		# weed out INF
9591
	beq.b		ftst_inf
9592
	cmpi.b		%d1,&SNAN		# weed out SNAN
9593
	beq.l		res_snan_1op
9594
	cmpi.b		%d1,&QNAN		# weed out QNAN
9595
	beq.l		res_qnan_1op
9596

9597
#
9598
# Denorm:
9599
#
9600
ftst_denorm:
9601
	tst.b		SRC_EX(%a0)		# is operand negative?
9602
	bmi.b		ftst_denorm_m		# yes
9603
	rts
9604
ftst_denorm_m:
9605
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9606
	rts
9607

9608
#
9609
# Infinity:
9610
#
9611
ftst_inf:
9612
	tst.b		SRC_EX(%a0)		# is operand negative?
9613
	bmi.b		ftst_inf_m		# yes
9614
ftst_inf_p:
9615
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9616
	rts
9617
ftst_inf_m:
9618
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619
	rts
9620

9621
#
9622
# Zero:
9623
#
9624
ftst_zero:
9625
	tst.b		SRC_EX(%a0)		# is operand negative?
9626
	bmi.b		ftst_zero_m		# yes
9627
ftst_zero_p:
9628
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'N' ccode bit
9629
	rts
9630
ftst_zero_m:
9631
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
9632
	rts
9633

9634
#########################################################################
9635
# XDEF ****************************************************************	#
9636
#	fint(): emulates the fint instruction				#
9637
#									#
9638
# XREF ****************************************************************	#
9639
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9640
#									#
9641
# INPUT ***************************************************************	#
9642
#	a0 = pointer to extended precision source operand		#
9643
#	d0 = round precision/mode					#
9644
#									#
9645
# OUTPUT **************************************************************	#
9646
#	fp0 = result							#
9647
#									#
9648
# ALGORITHM ***********************************************************	#
9649
#	Separate according to operand type. Unnorms don't pass through	#
9650
# here. For norms, load the rounding mode/prec, execute a "fint", then	#
9651
# store the resulting FPSR bits.					#
9652
#	For denorms, force the j-bit to a one and do the same as for	#
9653
# norms. Denorms are so low that the answer will either be a zero or a	#
9654
# one.									#
9655
#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9656
# as appropriate.							#
9657
#									#
9658
#########################################################################
9659

9660
	global		fint
9661
fint:
9662
	mov.b		STAG(%a6),%d1
9663
	bne.b		fint_not_norm		# optimize on non-norm input
9664

9665
#
9666
# Norm:
9667
#
9668
fint_norm:
9669
	andi.b		&0x30,%d0		# set prec = ext
9670

9671
	fmov.l		%d0,%fpcr		# set FPCR
9672
	fmov.l		&0x0,%fpsr		# clear FPSR
9673

9674
	fint.x		SRC(%a0),%fp0		# execute fint
9675

9676
	fmov.l		&0x0,%fpcr		# clear FPCR
9677
	fmov.l		%fpsr,%d0		# save FPSR
9678
	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9679

9680
	rts
9681

9682
#
9683
# input is not normalized; what is it?
9684
#
9685
fint_not_norm:
9686
	cmpi.b		%d1,&ZERO		# weed out ZERO
9687
	beq.b		fint_zero
9688
	cmpi.b		%d1,&INF		# weed out INF
9689
	beq.b		fint_inf
9690
	cmpi.b		%d1,&DENORM		# weed out DENORM
9691
	beq.b		fint_denorm
9692
	cmpi.b		%d1,&SNAN		# weed out SNAN
9693
	beq.l		res_snan_1op
9694
	bra.l		res_qnan_1op		# weed out QNAN
9695

9696
#
9697
# Denorm:
9698
#
9699
# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700
# also, the INEX2 and AINEX exception bits will be set.
9701
# so, we could either set these manually or force the DENORM
9702
# to a very small NORM and ship it to the NORM routine.
9703
# I do the latter.
9704
#
9705
fint_denorm:
9706
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707
	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9708
	lea		FP_SCR0(%a6),%a0
9709
	bra.b		fint_norm
9710

9711
#
9712
# Zero:
9713
#
9714
fint_zero:
9715
	tst.b		SRC_EX(%a0)		# is ZERO negative?
9716
	bmi.b		fint_zero_m		# yes
9717
fint_zero_p:
9718
	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9719
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9720
	rts
9721
fint_zero_m:
9722
	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9723
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724
	rts
9725

9726
#
9727
# Infinity:
9728
#
9729
fint_inf:
9730
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9731
	tst.b		SRC_EX(%a0)		# is INF negative?
9732
	bmi.b		fint_inf_m		# yes
9733
fint_inf_p:
9734
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9735
	rts
9736
fint_inf_m:
9737
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738
	rts
9739

9740
#########################################################################
9741
# XDEF ****************************************************************	#
9742
#	fintrz(): emulates the fintrz instruction			#
9743
#									#
9744
# XREF ****************************************************************	#
9745
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9746
#									#
9747
# INPUT ***************************************************************	#
9748
#	a0 = pointer to extended precision source operand		#
9749
#	d0 = round precision/mode					#
9750
#									#
9751
# OUTPUT **************************************************************	#
9752
#	fp0 = result							#
9753
#									#
9754
# ALGORITHM ***********************************************************	#
9755
#	Separate according to operand type. Unnorms don't pass through	#
9756
# here. For norms, load the rounding mode/prec, execute a "fintrz",	#
9757
# then store the resulting FPSR bits.					#
9758
#	For denorms, force the j-bit to a one and do the same as for	#
9759
# norms. Denorms are so low that the answer will either be a zero or a	#
9760
# one.									#
9761
#	For zeroes/infs/NANs, return the same while setting the FPSR	#
9762
# as appropriate.							#
9763
#									#
9764
#########################################################################
9765

9766
	global		fintrz
9767
fintrz:
9768
	mov.b		STAG(%a6),%d1
9769
	bne.b		fintrz_not_norm		# optimize on non-norm input
9770

9771
#
9772
# Norm:
9773
#
9774
fintrz_norm:
9775
	fmov.l		&0x0,%fpsr		# clear FPSR
9776

9777
	fintrz.x	SRC(%a0),%fp0		# execute fintrz
9778

9779
	fmov.l		%fpsr,%d0		# save FPSR
9780
	or.l		%d0,USER_FPSR(%a6)	# set exception bits
9781

9782
	rts
9783

9784
#
9785
# input is not normalized; what is it?
9786
#
9787
fintrz_not_norm:
9788
	cmpi.b		%d1,&ZERO		# weed out ZERO
9789
	beq.b		fintrz_zero
9790
	cmpi.b		%d1,&INF		# weed out INF
9791
	beq.b		fintrz_inf
9792
	cmpi.b		%d1,&DENORM		# weed out DENORM
9793
	beq.b		fintrz_denorm
9794
	cmpi.b		%d1,&SNAN		# weed out SNAN
9795
	beq.l		res_snan_1op
9796
	bra.l		res_qnan_1op		# weed out QNAN
9797

9798
#
9799
# Denorm:
9800
#
9801
# for DENORMs, the result will be (+/-)ZERO.
9802
# also, the INEX2 and AINEX exception bits will be set.
9803
# so, we could either set these manually or force the DENORM
9804
# to a very small NORM and ship it to the NORM routine.
9805
# I do the latter.
9806
#
9807
fintrz_denorm:
9808
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809
	mov.b		&0x80,FP_SCR0_HI(%a6)	# force DENORM ==> small NORM
9810
	lea		FP_SCR0(%a6),%a0
9811
	bra.b		fintrz_norm
9812

9813
#
9814
# Zero:
9815
#
9816
fintrz_zero:
9817
	tst.b		SRC_EX(%a0)		# is ZERO negative?
9818
	bmi.b		fintrz_zero_m		# yes
9819
fintrz_zero_p:
9820
	fmov.s		&0x00000000,%fp0	# return +ZERO in fp0
9821
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
9822
	rts
9823
fintrz_zero_m:
9824
	fmov.s		&0x80000000,%fp0	# return -ZERO in fp0
9825
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826
	rts
9827

9828
#
9829
# Infinity:
9830
#
9831
fintrz_inf:
9832
	fmovm.x		SRC(%a0),&0x80		# return result in fp0
9833
	tst.b		SRC_EX(%a0)		# is INF negative?
9834
	bmi.b		fintrz_inf_m		# yes
9835
fintrz_inf_p:
9836
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
9837
	rts
9838
fintrz_inf_m:
9839
	mov.b		&inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840
	rts
9841

9842
#########################################################################
9843
# XDEF ****************************************************************	#
9844
#	fabs():  emulates the fabs instruction				#
9845
#	fsabs(): emulates the fsabs instruction				#
9846
#	fdabs(): emulates the fdabs instruction				#
9847
#									#
9848
# XREF **************************************************************** #
9849
#	norm() - normalize denorm mantissa to provide EXOP		#
9850
#	scale_to_zero_src() - make exponent. = 0; get scale factor	#
9851
#	unf_res() - calculate underflow result				#
9852
#	ovf_res() - calculate overflow result				#
9853
#	res_{s,q}nan_1op() - set NAN result for monadic operation	#
9854
#									#
9855
# INPUT *************************************************************** #
9856
#	a0 = pointer to extended precision source operand		#
9857
#	d0 = rnd precision/mode						#
9858
#									#
9859
# OUTPUT ************************************************************** #
9860
#	fp0 = result							#
9861
#	fp1 = EXOP (if exception occurred)				#
9862
#									#
9863
# ALGORITHM ***********************************************************	#
9864
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
9865
# norms into extended, single, and double precision.			#
9866
#	Simply clear sign for extended precision norm. Ext prec denorm	#
9867
# gets an EXOP created for it since it's an underflow.			#
9868
#	Double and single precision can overflow and underflow. First,	#
9869
# scale the operand such that the exponent is zero. Perform an "fabs"	#
9870
# using the correct rnd mode/prec. Check to see if the original		#
9871
# exponent would take an exception. If so, use unf_res() or ovf_res()	#
9872
# to calculate the default result. Also, create the EXOP for the	#
9873
# exceptional case. If no exception should occur, insert the correct	#
9874
# result exponent and return.						#
9875
#	Unnorms don't pass through here.				#
9876
#									#
9877
#########################################################################
9878

9879
	global		fsabs
9880
fsabs:
9881
	andi.b		&0x30,%d0		# clear rnd prec
9882
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
9883
	bra.b		fabs
9884

9885
	global		fdabs
9886
fdabs:
9887
	andi.b		&0x30,%d0		# clear rnd prec
9888
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
9889

9890
	global		fabs
9891
fabs:
9892
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
9893
	mov.b		STAG(%a6),%d1
9894
	bne.w		fabs_not_norm		# optimize on non-norm input
9895

9896
#
9897
# ABSOLUTE VALUE: norms and denorms ONLY!
9898
#
9899
fabs_norm:
9900
	andi.b		&0xc0,%d0		# is precision extended?
9901
	bne.b		fabs_not_ext		# no; go handle sgl or dbl
9902

9903
#
9904
# precision selected is extended. so...we can not get an underflow
9905
# or overflow because of rounding to the correct precision. so...
9906
# skip the scaling and unscaling...
9907
#
9908
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9909
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9910
	mov.w		SRC_EX(%a0),%d1
9911
	bclr		&15,%d1			# force absolute value
9912
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert exponent
9913
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
9914
	rts
9915

9916
#
9917
# for an extended precision DENORM, the UNFL exception bit is set
9918
# the accrued bit is NOT set in this instance(no inexactness!)
9919
#
9920
fabs_denorm:
9921
	andi.b		&0xc0,%d0		# is precision extended?
9922
	bne.b		fabs_not_ext		# no
9923

9924
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925

9926
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9927
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9928
	mov.w		SRC_EX(%a0),%d0
9929
	bclr		&15,%d0			# clear sign
9930
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert exponent
9931

9932
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
9933

9934
	btst		&unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935
	bne.b		fabs_ext_unfl_ena
9936
	rts
9937

9938
#
9939
# the input is an extended DENORM and underflow is enabled in the FPCR.
9940
# normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941
# exponent and insert back into the operand.
9942
#
9943
fabs_ext_unfl_ena:
9944
	lea		FP_SCR0(%a6),%a0	# pass: ptr to operand
9945
	bsr.l		norm			# normalize result
9946
	neg.w		%d0			# new exponent = -(shft val)
9947
	addi.w		&0x6000,%d0		# add new bias to exponent
9948
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch old sign,exp
9949
	andi.w		&0x8000,%d1		# keep old sign
9950
	andi.w		&0x7fff,%d0		# clear sign position
9951
	or.w		%d1,%d0			# concat old sign, new exponent
9952
	mov.w		%d0,FP_SCR0_EX(%a6)	# insert new exponent
9953
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
9954
	rts
9955

9956
#
9957
# operand is either single or double
9958
#
9959
fabs_not_ext:
9960
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
9961
	bne.b		fabs_dbl
9962

9963
#
9964
# operand is to be rounded to single precision
9965
#
9966
fabs_sgl:
9967
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
9968
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
9969
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
9970
	bsr.l		scale_to_zero_src	# calculate scale factor
9971

9972
	cmpi.l		%d0,&0x3fff-0x3f80	# will move in underflow?
9973
	bge.w		fabs_sd_unfl		# yes; go handle underflow
9974
	cmpi.l		%d0,&0x3fff-0x407e	# will move in overflow?
9975
	beq.w		fabs_sd_may_ovfl	# maybe; go check
9976
	blt.w		fabs_sd_ovfl		# yes; go handle overflow
9977

9978
#
9979
# operand will NOT overflow or underflow when moved in to the fp reg file
9980
#
9981
fabs_sd_normal:
9982
	fmov.l		&0x0,%fpsr		# clear FPSR
9983
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
9984

9985
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
9986

9987
	fmov.l		%fpsr,%d1		# save FPSR
9988
	fmov.l		&0x0,%fpcr		# clear FPCR
9989

9990
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
9991

9992
fabs_sd_normal_exit:
9993
	mov.l		%d2,-(%sp)		# save d2
9994
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
9995
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
9996
	mov.l		%d1,%d2			# make a copy
9997
	andi.l		&0x7fff,%d1		# strip sign
9998
	sub.l		%d0,%d1			# add scale factor
9999
	andi.w		&0x8000,%d2		# keep old sign
10000
	or.w		%d1,%d2			# concat old sign,new exp
10001
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
10002
	mov.l		(%sp)+,%d2		# restore d2
10003
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10004
	rts
10005

10006
#
10007
# operand is to be rounded to double precision
10008
#
10009
fabs_dbl:
10010
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10011
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10012
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10013
	bsr.l		scale_to_zero_src	# calculate scale factor
10014

10015
	cmpi.l		%d0,&0x3fff-0x3c00	# will move in underflow?
10016
	bge.b		fabs_sd_unfl		# yes; go handle underflow
10017
	cmpi.l		%d0,&0x3fff-0x43fe	# will move in overflow?
10018
	beq.w		fabs_sd_may_ovfl	# maybe; go check
10019
	blt.w		fabs_sd_ovfl		# yes; go handle overflow
10020
	bra.w		fabs_sd_normal		# no; ho handle normalized op
10021

10022
#
10023
# operand WILL underflow when moved in to the fp register file
10024
#
10025
fabs_sd_unfl:
10026
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027

10028
	bclr		&0x7,FP_SCR0_EX(%a6)	# force absolute value
10029

10030
# if underflow or inexact is enabled, go calculate EXOP first.
10031
	mov.b		FPCR_ENABLE(%a6),%d1
10032
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10033
	bne.b		fabs_sd_unfl_ena	# yes
10034

10035
fabs_sd_unfl_dis:
10036
	lea		FP_SCR0(%a6),%a0	# pass: result addr
10037
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10038
	bsr.l		unf_res			# calculate default result
10039
	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
10040
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10041
	rts
10042

10043
#
10044
# operand will underflow AND underflow is enabled.
10045
# Therefore, we must return the result rounded to extended precision.
10046
#
10047
fabs_sd_unfl_ena:
10048
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
10051

10052
	mov.l		%d2,-(%sp)		# save d2
10053
	mov.l		%d1,%d2			# make a copy
10054
	andi.l		&0x7fff,%d1		# strip sign
10055
	andi.w		&0x8000,%d2		# keep old sign
10056
	sub.l		%d0,%d1			# subtract scale factor
10057
	addi.l		&0x6000,%d1		# add new bias
10058
	andi.w		&0x7fff,%d1
10059
	or.w		%d2,%d1			# concat new sign,new exp
10060
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
10061
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
10062
	mov.l		(%sp)+,%d2		# restore d2
10063
	bra.b		fabs_sd_unfl_dis
10064

10065
#
10066
# operand WILL overflow.
10067
#
10068
fabs_sd_ovfl:
10069
	fmov.l		&0x0,%fpsr		# clear FPSR
10070
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10071

10072
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10073

10074
	fmov.l		&0x0,%fpcr		# clear FPCR
10075
	fmov.l		%fpsr,%d1		# save FPSR
10076

10077
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10078

10079
fabs_sd_ovfl_tst:
10080
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081

10082
	mov.b		FPCR_ENABLE(%a6),%d1
10083
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10084
	bne.b		fabs_sd_ovfl_ena	# yes
10085

10086
#
10087
# OVFL is not enabled; therefore, we must create the default result by
10088
# calling ovf_res().
10089
#
10090
fabs_sd_ovfl_dis:
10091
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10092
	sne		%d1			# set sign param accordingly
10093
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
10094
	bsr.l		ovf_res			# calculate default result
10095
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10096
	fmovm.x		(%a0),&0x80		# return default result in fp0
10097
	rts
10098

10099
#
10100
# OVFL is enabled.
10101
# the INEX2 bit has already been updated by the round to the correct precision.
10102
# now, round to extended(and don't alter the FPSR).
10103
#
10104
fabs_sd_ovfl_ena:
10105
	mov.l		%d2,-(%sp)		# save d2
10106
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10107
	mov.l		%d1,%d2			# make a copy
10108
	andi.l		&0x7fff,%d1		# strip sign
10109
	andi.w		&0x8000,%d2		# keep old sign
10110
	sub.l		%d0,%d1			# add scale factor
10111
	subi.l		&0x6000,%d1		# subtract bias
10112
	andi.w		&0x7fff,%d1
10113
	or.w		%d2,%d1			# concat sign,exp
10114
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10115
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10116
	mov.l		(%sp)+,%d2		# restore d2
10117
	bra.b		fabs_sd_ovfl_dis
10118

10119
#
10120
# the move in MAY underflow. so...
10121
#
10122
fabs_sd_may_ovfl:
10123
	fmov.l		&0x0,%fpsr		# clear FPSR
10124
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10125

10126
	fabs.x		FP_SCR0(%a6),%fp0	# perform absolute
10127

10128
	fmov.l		%fpsr,%d1		# save status
10129
	fmov.l		&0x0,%fpcr		# clear FPCR
10130

10131
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10132

10133
	fabs.x		%fp0,%fp1		# make a copy of result
10134
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10135
	fbge.w		fabs_sd_ovfl_tst	# yes; overflow has occurred
10136

10137
# no, it didn't overflow; we have correct result
10138
	bra.w		fabs_sd_normal_exit
10139

10140
##########################################################################
10141

10142
#
10143
# input is not normalized; what is it?
10144
#
10145
fabs_not_norm:
10146
	cmpi.b		%d1,&DENORM		# weed out DENORM
10147
	beq.w		fabs_denorm
10148
	cmpi.b		%d1,&SNAN		# weed out SNAN
10149
	beq.l		res_snan_1op
10150
	cmpi.b		%d1,&QNAN		# weed out QNAN
10151
	beq.l		res_qnan_1op
10152

10153
	fabs.x		SRC(%a0),%fp0		# force absolute value
10154

10155
	cmpi.b		%d1,&INF		# weed out INF
10156
	beq.b		fabs_inf
10157
fabs_zero:
10158
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10159
	rts
10160
fabs_inf:
10161
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
10162
	rts
10163

10164
#########################################################################
10165
# XDEF ****************************************************************	#
10166
#	fcmp(): fp compare op routine					#
10167
#									#
10168
# XREF ****************************************************************	#
10169
#	res_qnan() - return QNAN result					#
10170
#	res_snan() - return SNAN result					#
10171
#									#
10172
# INPUT ***************************************************************	#
10173
#	a0 = pointer to extended precision source operand		#
10174
#	a1 = pointer to extended precision destination operand		#
10175
#	d0 = round prec/mode						#
10176
#									#
10177
# OUTPUT ************************************************************** #
10178
#	None								#
10179
#									#
10180
# ALGORITHM ***********************************************************	#
10181
#	Handle NANs and denorms as special cases. For everything else,	#
10182
# just use the actual fcmp instruction to produce the correct condition	#
10183
# codes.								#
10184
#									#
10185
#########################################################################
10186

10187
	global		fcmp
10188
fcmp:
10189
	clr.w		%d1
10190
	mov.b		DTAG(%a6),%d1
10191
	lsl.b		&0x3,%d1
10192
	or.b		STAG(%a6),%d1
10193
	bne.b		fcmp_not_norm		# optimize on non-norm input
10194

10195
#
10196
# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197
#
10198
fcmp_norm:
10199
	fmovm.x		DST(%a1),&0x80		# load dst op
10200

10201
	fcmp.x		%fp0,SRC(%a0)		# do compare
10202

10203
	fmov.l		%fpsr,%d0		# save FPSR
10204
	rol.l		&0x8,%d0		# extract ccode bits
10205
	mov.b		%d0,FPSR_CC(%a6)	# set ccode bits(no exc bits are set)
10206

10207
	rts
10208

10209
#
10210
# fcmp: inputs are not both normalized; what are they?
10211
#
10212
fcmp_not_norm:
10213
	mov.w		(tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214
	jmp		(tbl_fcmp_op.b,%pc,%d1.w*1)
10215

10216
	swbeg		&48
10217
tbl_fcmp_op:
10218
	short		fcmp_norm	- tbl_fcmp_op # NORM - NORM
10219
	short		fcmp_norm	- tbl_fcmp_op # NORM - ZERO
10220
	short		fcmp_norm	- tbl_fcmp_op # NORM - INF
10221
	short		fcmp_res_qnan	- tbl_fcmp_op # NORM - QNAN
10222
	short		fcmp_nrm_dnrm	- tbl_fcmp_op # NORM - DENORM
10223
	short		fcmp_res_snan	- tbl_fcmp_op # NORM - SNAN
10224
	short		tbl_fcmp_op	- tbl_fcmp_op #
10225
	short		tbl_fcmp_op	- tbl_fcmp_op #
10226

10227
	short		fcmp_norm	- tbl_fcmp_op # ZERO - NORM
10228
	short		fcmp_norm	- tbl_fcmp_op # ZERO - ZERO
10229
	short		fcmp_norm	- tbl_fcmp_op # ZERO - INF
10230
	short		fcmp_res_qnan	- tbl_fcmp_op # ZERO - QNAN
10231
	short		fcmp_dnrm_s	- tbl_fcmp_op # ZERO - DENORM
10232
	short		fcmp_res_snan	- tbl_fcmp_op # ZERO - SNAN
10233
	short		tbl_fcmp_op	- tbl_fcmp_op #
10234
	short		tbl_fcmp_op	- tbl_fcmp_op #
10235

10236
	short		fcmp_norm	- tbl_fcmp_op # INF - NORM
10237
	short		fcmp_norm	- tbl_fcmp_op # INF - ZERO
10238
	short		fcmp_norm	- tbl_fcmp_op # INF - INF
10239
	short		fcmp_res_qnan	- tbl_fcmp_op # INF - QNAN
10240
	short		fcmp_dnrm_s	- tbl_fcmp_op # INF - DENORM
10241
	short		fcmp_res_snan	- tbl_fcmp_op # INF - SNAN
10242
	short		tbl_fcmp_op	- tbl_fcmp_op #
10243
	short		tbl_fcmp_op	- tbl_fcmp_op #
10244

10245
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - NORM
10246
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - ZERO
10247
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - INF
10248
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - QNAN
10249
	short		fcmp_res_qnan	- tbl_fcmp_op # QNAN - DENORM
10250
	short		fcmp_res_snan	- tbl_fcmp_op # QNAN - SNAN
10251
	short		tbl_fcmp_op	- tbl_fcmp_op #
10252
	short		tbl_fcmp_op	- tbl_fcmp_op #
10253

10254
	short		fcmp_dnrm_nrm	- tbl_fcmp_op # DENORM - NORM
10255
	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - ZERO
10256
	short		fcmp_dnrm_d	- tbl_fcmp_op # DENORM - INF
10257
	short		fcmp_res_qnan	- tbl_fcmp_op # DENORM - QNAN
10258
	short		fcmp_dnrm_sd	- tbl_fcmp_op # DENORM - DENORM
10259
	short		fcmp_res_snan	- tbl_fcmp_op # DENORM - SNAN
10260
	short		tbl_fcmp_op	- tbl_fcmp_op #
10261
	short		tbl_fcmp_op	- tbl_fcmp_op #
10262

10263
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - NORM
10264
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - ZERO
10265
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - INF
10266
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - QNAN
10267
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - DENORM
10268
	short		fcmp_res_snan	- tbl_fcmp_op # SNAN - SNAN
10269
	short		tbl_fcmp_op	- tbl_fcmp_op #
10270
	short		tbl_fcmp_op	- tbl_fcmp_op #
10271

10272
# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273
# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274
fcmp_res_qnan:
10275
	bsr.l		res_qnan
10276
	andi.b		&0xf7,FPSR_CC(%a6)
10277
	rts
10278
fcmp_res_snan:
10279
	bsr.l		res_snan
10280
	andi.b		&0xf7,FPSR_CC(%a6)
10281
	rts
10282

10283
#
10284
# DENORMs are a little more difficult.
10285
# If you have a 2 DENORMs, then you can just force the j-bit to a one
10286
# and use the fcmp_norm routine.
10287
# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288
# and use the fcmp_norm routine.
10289
# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290
# But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291
# (1) signs are (+) and the DENORM is the dst or
10292
# (2) signs are (-) and the DENORM is the src
10293
#
10294

10295
fcmp_dnrm_s:
10296
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10297
	mov.l		SRC_HI(%a0),%d0
10298
	bset		&31,%d0			# DENORM src; make into small norm
10299
	mov.l		%d0,FP_SCR0_HI(%a6)
10300
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10301
	lea		FP_SCR0(%a6),%a0
10302
	bra.w		fcmp_norm
10303

10304
fcmp_dnrm_d:
10305
	mov.l		DST_EX(%a1),FP_SCR0_EX(%a6)
10306
	mov.l		DST_HI(%a1),%d0
10307
	bset		&31,%d0			# DENORM src; make into small norm
10308
	mov.l		%d0,FP_SCR0_HI(%a6)
10309
	mov.l		DST_LO(%a1),FP_SCR0_LO(%a6)
10310
	lea		FP_SCR0(%a6),%a1
10311
	bra.w		fcmp_norm
10312

10313
fcmp_dnrm_sd:
10314
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10315
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10316
	mov.l		DST_HI(%a1),%d0
10317
	bset		&31,%d0			# DENORM dst; make into small norm
10318
	mov.l		%d0,FP_SCR1_HI(%a6)
10319
	mov.l		SRC_HI(%a0),%d0
10320
	bset		&31,%d0			# DENORM dst; make into small norm
10321
	mov.l		%d0,FP_SCR0_HI(%a6)
10322
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10323
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10324
	lea		FP_SCR1(%a6),%a1
10325
	lea		FP_SCR0(%a6),%a0
10326
	bra.w		fcmp_norm
10327

10328
fcmp_nrm_dnrm:
10329
	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10330
	mov.b		DST_EX(%a1),%d1
10331
	eor.b		%d0,%d1
10332
	bmi.w		fcmp_dnrm_s
10333

10334
# signs are the same, so must determine the answer ourselves.
10335
	tst.b		%d0			# is src op negative?
10336
	bmi.b		fcmp_nrm_dnrm_m		# yes
10337
	rts
10338
fcmp_nrm_dnrm_m:
10339
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10340
	rts
10341

10342
fcmp_dnrm_nrm:
10343
	mov.b		SRC_EX(%a0),%d0		# determine if like signs
10344
	mov.b		DST_EX(%a1),%d1
10345
	eor.b		%d0,%d1
10346
	bmi.w		fcmp_dnrm_d
10347

10348
# signs are the same, so must determine the answer ourselves.
10349
	tst.b		%d0			# is src op negative?
10350
	bpl.b		fcmp_dnrm_nrm_m		# no
10351
	rts
10352
fcmp_dnrm_nrm_m:
10353
	mov.b		&neg_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
10354
	rts
10355

10356
#########################################################################
10357
# XDEF ****************************************************************	#
10358
#	fsglmul(): emulates the fsglmul instruction			#
10359
#									#
10360
# XREF ****************************************************************	#
10361
#	scale_to_zero_src() - scale src exponent to zero		#
10362
#	scale_to_zero_dst() - scale dst exponent to zero		#
10363
#	unf_res4() - return default underflow result for sglop		#
10364
#	ovf_res() - return default overflow result			#
10365
#	res_qnan() - return QNAN result					#
10366
#	res_snan() - return SNAN result					#
10367
#									#
10368
# INPUT ***************************************************************	#
10369
#	a0 = pointer to extended precision source operand		#
10370
#	a1 = pointer to extended precision destination operand		#
10371
#	d0  rnd prec,mode						#
10372
#									#
10373
# OUTPUT **************************************************************	#
10374
#	fp0 = result							#
10375
#	fp1 = EXOP (if exception occurred)				#
10376
#									#
10377
# ALGORITHM ***********************************************************	#
10378
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10379
# norms/denorms into ext/sgl/dbl precision.				#
10380
#	For norms/denorms, scale the exponents such that a multiply	#
10381
# instruction won't cause an exception. Use the regular fsglmul to	#
10382
# compute a result. Check if the regular operands would have taken	#
10383
# an exception. If so, return the default overflow/underflow result	#
10384
# and return the EXOP if exceptions are enabled. Else, scale the	#
10385
# result operand to the proper exponent.				#
10386
#									#
10387
#########################################################################
10388

10389
	global		fsglmul
10390
fsglmul:
10391
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10392

10393
	clr.w		%d1
10394
	mov.b		DTAG(%a6),%d1
10395
	lsl.b		&0x3,%d1
10396
	or.b		STAG(%a6),%d1
10397

10398
	bne.w		fsglmul_not_norm	# optimize on non-norm input
10399

10400
fsglmul_norm:
10401
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10402
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10403
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10404

10405
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10406
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10407
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10408

10409
	bsr.l		scale_to_zero_src	# scale exponent
10410
	mov.l		%d0,-(%sp)		# save scale factor 1
10411

10412
	bsr.l		scale_to_zero_dst	# scale dst exponent
10413

10414
	add.l		(%sp)+,%d0		# SCALE_FACTOR = scale1 + scale2
10415

10416
	cmpi.l		%d0,&0x3fff-0x7ffe	# would result ovfl?
10417
	beq.w		fsglmul_may_ovfl	# result may rnd to overflow
10418
	blt.w		fsglmul_ovfl		# result will overflow
10419

10420
	cmpi.l		%d0,&0x3fff+0x0001	# would result unfl?
10421
	beq.w		fsglmul_may_unfl	# result may rnd to no unfl
10422
	bgt.w		fsglmul_unfl		# result will underflow
10423

10424
fsglmul_normal:
10425
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10426

10427
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10428
	fmov.l		&0x0,%fpsr		# clear FPSR
10429

10430
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10431

10432
	fmov.l		%fpsr,%d1		# save status
10433
	fmov.l		&0x0,%fpcr		# clear FPCR
10434

10435
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10436

10437
fsglmul_normal_exit:
10438
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10439
	mov.l		%d2,-(%sp)		# save d2
10440
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10441
	mov.l		%d1,%d2			# make a copy
10442
	andi.l		&0x7fff,%d1		# strip sign
10443
	andi.w		&0x8000,%d2		# keep old sign
10444
	sub.l		%d0,%d1			# add scale factor
10445
	or.w		%d2,%d1			# concat old sign,new exp
10446
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10447
	mov.l		(%sp)+,%d2		# restore d2
10448
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10449
	rts
10450

10451
fsglmul_ovfl:
10452
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10453

10454
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10455
	fmov.l		&0x0,%fpsr		# clear FPSR
10456

10457
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10458

10459
	fmov.l		%fpsr,%d1		# save status
10460
	fmov.l		&0x0,%fpcr		# clear FPCR
10461

10462
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10463

10464
fsglmul_ovfl_tst:
10465

10466
# save setting this until now because this is where fsglmul_may_ovfl may jump in
10467
	or.l		&ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468

10469
	mov.b		FPCR_ENABLE(%a6),%d1
10470
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10471
	bne.b		fsglmul_ovfl_ena	# yes
10472

10473
fsglmul_ovfl_dis:
10474
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
10475
	sne		%d1			# set sign param accordingly
10476
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10477
	andi.b		&0x30,%d0		# force prec = ext
10478
	bsr.l		ovf_res			# calculate default result
10479
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
10480
	fmovm.x		(%a0),&0x80		# return default result in fp0
10481
	rts
10482

10483
fsglmul_ovfl_ena:
10484
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10485

10486
	mov.l		%d2,-(%sp)		# save d2
10487
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10488
	mov.l		%d1,%d2			# make a copy
10489
	andi.l		&0x7fff,%d1		# strip sign
10490
	sub.l		%d0,%d1			# add scale factor
10491
	subi.l		&0x6000,%d1		# subtract bias
10492
	andi.w		&0x7fff,%d1
10493
	andi.w		&0x8000,%d2		# keep old sign
10494
	or.w		%d2,%d1			# concat old sign,new exp
10495
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10496
	mov.l		(%sp)+,%d2		# restore d2
10497
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10498
	bra.b		fsglmul_ovfl_dis
10499

10500
fsglmul_may_ovfl:
10501
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10502

10503
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10504
	fmov.l		&0x0,%fpsr		# clear FPSR
10505

10506
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10507

10508
	fmov.l		%fpsr,%d1		# save status
10509
	fmov.l		&0x0,%fpcr		# clear FPCR
10510

10511
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10512

10513
	fabs.x		%fp0,%fp1		# make a copy of result
10514
	fcmp.b		%fp1,&0x2		# is |result| >= 2.b?
10515
	fbge.w		fsglmul_ovfl_tst	# yes; overflow has occurred
10516

10517
# no, it didn't overflow; we have correct result
10518
	bra.w		fsglmul_normal_exit
10519

10520
fsglmul_unfl:
10521
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522

10523
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10524

10525
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10526
	fmov.l		&0x0,%fpsr		# clear FPSR
10527

10528
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10529

10530
	fmov.l		%fpsr,%d1		# save status
10531
	fmov.l		&0x0,%fpcr		# clear FPCR
10532

10533
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10534

10535
	mov.b		FPCR_ENABLE(%a6),%d1
10536
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10537
	bne.b		fsglmul_unfl_ena	# yes
10538

10539
fsglmul_unfl_dis:
10540
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10541

10542
	lea		FP_SCR0(%a6),%a0	# pass: result addr
10543
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10544
	bsr.l		unf_res4		# calculate default result
10545
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10546
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10547
	rts
10548

10549
#
10550
# UNFL is enabled.
10551
#
10552
fsglmul_unfl_ena:
10553
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10554

10555
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10556
	fmov.l		&0x0,%fpsr		# clear FPSR
10557

10558
	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10559

10560
	fmov.l		&0x0,%fpcr		# clear FPCR
10561

10562
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10563
	mov.l		%d2,-(%sp)		# save d2
10564
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10565
	mov.l		%d1,%d2			# make a copy
10566
	andi.l		&0x7fff,%d1		# strip sign
10567
	andi.w		&0x8000,%d2		# keep old sign
10568
	sub.l		%d0,%d1			# add scale factor
10569
	addi.l		&0x6000,%d1		# add bias
10570
	andi.w		&0x7fff,%d1
10571
	or.w		%d2,%d1			# concat old sign,new exp
10572
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10573
	mov.l		(%sp)+,%d2		# restore d2
10574
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10575
	bra.w		fsglmul_unfl_dis
10576

10577
fsglmul_may_unfl:
10578
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10579

10580
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10581
	fmov.l		&0x0,%fpsr		# clear FPSR
10582

10583
	fsglmul.x	FP_SCR0(%a6),%fp0	# execute sgl multiply
10584

10585
	fmov.l		%fpsr,%d1		# save status
10586
	fmov.l		&0x0,%fpcr		# clear FPCR
10587

10588
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10589

10590
	fabs.x		%fp0,%fp1		# make a copy of result
10591
	fcmp.b		%fp1,&0x2		# is |result| > 2.b?
10592
	fbgt.w		fsglmul_normal_exit	# no; no underflow occurred
10593
	fblt.w		fsglmul_unfl		# yes; underflow occurred
10594

10595
#
10596
# we still don't know if underflow occurred. result is ~ equal to 2. but,
10597
# we don't know if the result was an underflow that rounded up to a 2 or
10598
# a normalized number that rounded down to a 2. so, redo the entire operation
10599
# using RZ as the rounding mode to see what the pre-rounded result is.
10600
# this case should be relatively rare.
10601
#
10602
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
10603

10604
	mov.l		L_SCR3(%a6),%d1
10605
	andi.b		&0xc0,%d1		# keep rnd prec
10606
	ori.b		&rz_mode*0x10,%d1	# insert RZ
10607

10608
	fmov.l		%d1,%fpcr		# set FPCR
10609
	fmov.l		&0x0,%fpsr		# clear FPSR
10610

10611
	fsglmul.x	FP_SCR0(%a6),%fp1	# execute sgl multiply
10612

10613
	fmov.l		&0x0,%fpcr		# clear FPCR
10614
	fabs.x		%fp1			# make absolute value
10615
	fcmp.b		%fp1,&0x2		# is |result| < 2.b?
10616
	fbge.w		fsglmul_normal_exit	# no; no underflow occurred
10617
	bra.w		fsglmul_unfl		# yes, underflow occurred
10618

10619
##############################################################################
10620

10621
#
10622
# Single Precision Multiply: inputs are not both normalized; what are they?
10623
#
10624
fsglmul_not_norm:
10625
	mov.w		(tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626
	jmp		(tbl_fsglmul_op.b,%pc,%d1.w*1)
10627

10628
	swbeg		&48
10629
tbl_fsglmul_op:
10630
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10631
	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10632
	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10633
	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10634
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10635
	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10636
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10637
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10638

10639
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x NORM
10640
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x ZERO
10641
	short		fsglmul_res_operr	- tbl_fsglmul_op # ZERO x INF
10642
	short		fsglmul_res_qnan	- tbl_fsglmul_op # ZERO x QNAN
10643
	short		fsglmul_zero		- tbl_fsglmul_op # ZERO x DENORM
10644
	short		fsglmul_res_snan	- tbl_fsglmul_op # ZERO x SNAN
10645
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10646
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10647

10648
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x NORM
10649
	short		fsglmul_res_operr	- tbl_fsglmul_op # INF x ZERO
10650
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x INF
10651
	short		fsglmul_res_qnan	- tbl_fsglmul_op # INF x QNAN
10652
	short		fsglmul_inf_dst		- tbl_fsglmul_op # INF x DENORM
10653
	short		fsglmul_res_snan	- tbl_fsglmul_op # INF x SNAN
10654
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10655
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10656

10657
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x NORM
10658
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x ZERO
10659
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x INF
10660
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x QNAN
10661
	short		fsglmul_res_qnan	- tbl_fsglmul_op # QNAN x DENORM
10662
	short		fsglmul_res_snan	- tbl_fsglmul_op # QNAN x SNAN
10663
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10664
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10665

10666
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x NORM
10667
	short		fsglmul_zero		- tbl_fsglmul_op # NORM x ZERO
10668
	short		fsglmul_inf_src		- tbl_fsglmul_op # NORM x INF
10669
	short		fsglmul_res_qnan	- tbl_fsglmul_op # NORM x QNAN
10670
	short		fsglmul_norm		- tbl_fsglmul_op # NORM x DENORM
10671
	short		fsglmul_res_snan	- tbl_fsglmul_op # NORM x SNAN
10672
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10673
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10674

10675
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x NORM
10676
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x ZERO
10677
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x INF
10678
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x QNAN
10679
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x DENORM
10680
	short		fsglmul_res_snan	- tbl_fsglmul_op # SNAN x SNAN
10681
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10682
	short		tbl_fsglmul_op		- tbl_fsglmul_op #
10683

10684
fsglmul_res_operr:
10685
	bra.l		res_operr
10686
fsglmul_res_snan:
10687
	bra.l		res_snan
10688
fsglmul_res_qnan:
10689
	bra.l		res_qnan
10690
fsglmul_zero:
10691
	bra.l		fmul_zero
10692
fsglmul_inf_src:
10693
	bra.l		fmul_inf_src
10694
fsglmul_inf_dst:
10695
	bra.l		fmul_inf_dst
10696

10697
#########################################################################
10698
# XDEF ****************************************************************	#
10699
#	fsgldiv(): emulates the fsgldiv instruction			#
10700
#									#
10701
# XREF ****************************************************************	#
10702
#	scale_to_zero_src() - scale src exponent to zero		#
10703
#	scale_to_zero_dst() - scale dst exponent to zero		#
10704
#	unf_res4() - return default underflow result for sglop		#
10705
#	ovf_res() - return default overflow result			#
10706
#	res_qnan() - return QNAN result					#
10707
#	res_snan() - return SNAN result					#
10708
#									#
10709
# INPUT ***************************************************************	#
10710
#	a0 = pointer to extended precision source operand		#
10711
#	a1 = pointer to extended precision destination operand		#
10712
#	d0  rnd prec,mode						#
10713
#									#
10714
# OUTPUT **************************************************************	#
10715
#	fp0 = result							#
10716
#	fp1 = EXOP (if exception occurred)				#
10717
#									#
10718
# ALGORITHM ***********************************************************	#
10719
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
10720
# norms/denorms into ext/sgl/dbl precision.				#
10721
#	For norms/denorms, scale the exponents such that a divide	#
10722
# instruction won't cause an exception. Use the regular fsgldiv to	#
10723
# compute a result. Check if the regular operands would have taken	#
10724
# an exception. If so, return the default overflow/underflow result	#
10725
# and return the EXOP if exceptions are enabled. Else, scale the	#
10726
# result operand to the proper exponent.				#
10727
#									#
10728
#########################################################################
10729

10730
	global		fsgldiv
10731
fsgldiv:
10732
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
10733

10734
	clr.w		%d1
10735
	mov.b		DTAG(%a6),%d1
10736
	lsl.b		&0x3,%d1
10737
	or.b		STAG(%a6),%d1		# combine src tags
10738

10739
	bne.w		fsgldiv_not_norm	# optimize on non-norm input
10740

10741
#
10742
# DIVIDE: NORMs and DENORMs ONLY!
10743
#
10744
fsgldiv_norm:
10745
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
10746
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
10747
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
10748

10749
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
10750
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
10751
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
10752

10753
	bsr.l		scale_to_zero_src	# calculate scale factor 1
10754
	mov.l		%d0,-(%sp)		# save scale factor 1
10755

10756
	bsr.l		scale_to_zero_dst	# calculate scale factor 2
10757

10758
	neg.l		(%sp)			# S.F. = scale1 - scale2
10759
	add.l		%d0,(%sp)
10760

10761
	mov.w		2+L_SCR3(%a6),%d1	# fetch precision,mode
10762
	lsr.b		&0x6,%d1
10763
	mov.l		(%sp)+,%d0
10764
	cmpi.l		%d0,&0x3fff-0x7ffe
10765
	ble.w		fsgldiv_may_ovfl
10766

10767
	cmpi.l		%d0,&0x3fff-0x0000	# will result underflow?
10768
	beq.w		fsgldiv_may_unfl	# maybe
10769
	bgt.w		fsgldiv_unfl		# yes; go handle underflow
10770

10771
fsgldiv_normal:
10772
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10773

10774
	fmov.l		L_SCR3(%a6),%fpcr	# save FPCR
10775
	fmov.l		&0x0,%fpsr		# clear FPSR
10776

10777
	fsgldiv.x	FP_SCR0(%a6),%fp0	# perform sgl divide
10778

10779
	fmov.l		%fpsr,%d1		# save FPSR
10780
	fmov.l		&0x0,%fpcr		# clear FPCR
10781

10782
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10783

10784
fsgldiv_normal_exit:
10785
	fmovm.x		&0x80,FP_SCR0(%a6)	# store result on stack
10786
	mov.l		%d2,-(%sp)		# save d2
10787
	mov.w		FP_SCR0_EX(%a6),%d1	# load {sgn,exp}
10788
	mov.l		%d1,%d2			# make a copy
10789
	andi.l		&0x7fff,%d1		# strip sign
10790
	andi.w		&0x8000,%d2		# keep old sign
10791
	sub.l		%d0,%d1			# add scale factor
10792
	or.w		%d2,%d1			# concat old sign,new exp
10793
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10794
	mov.l		(%sp)+,%d2		# restore d2
10795
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
10796
	rts
10797

10798
fsgldiv_may_ovfl:
10799
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10800

10801
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10802
	fmov.l		&0x0,%fpsr		# set FPSR
10803

10804
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute divide
10805

10806
	fmov.l		%fpsr,%d1
10807
	fmov.l		&0x0,%fpcr
10808

10809
	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
10810

10811
	fmovm.x		&0x01,-(%sp)		# save result to stack
10812
	mov.w		(%sp),%d1		# fetch new exponent
10813
	add.l		&0xc,%sp		# clear result
10814
	andi.l		&0x7fff,%d1		# strip sign
10815
	sub.l		%d0,%d1			# add scale factor
10816
	cmp.l		%d1,&0x7fff		# did divide overflow?
10817
	blt.b		fsgldiv_normal_exit
10818

10819
fsgldiv_ovfl_tst:
10820
	or.w		&ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821

10822
	mov.b		FPCR_ENABLE(%a6),%d1
10823
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
10824
	bne.b		fsgldiv_ovfl_ena	# yes
10825

10826
fsgldiv_ovfl_dis:
10827
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative
10828
	sne		%d1			# set sign param accordingly
10829
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
10830
	andi.b		&0x30,%d0		# kill precision
10831
	bsr.l		ovf_res			# calculate default result
10832
	or.b		%d0,FPSR_CC(%a6)	# set INF if applicable
10833
	fmovm.x		(%a0),&0x80		# return default result in fp0
10834
	rts
10835

10836
fsgldiv_ovfl_ena:
10837
	fmovm.x		&0x80,FP_SCR0(%a6)	# move result to stack
10838

10839
	mov.l		%d2,-(%sp)		# save d2
10840
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10841
	mov.l		%d1,%d2			# make a copy
10842
	andi.l		&0x7fff,%d1		# strip sign
10843
	andi.w		&0x8000,%d2		# keep old sign
10844
	sub.l		%d0,%d1			# add scale factor
10845
	subi.l		&0x6000,%d1		# subtract new bias
10846
	andi.w		&0x7fff,%d1		# clear ms bit
10847
	or.w		%d2,%d1			# concat old sign,new exp
10848
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10849
	mov.l		(%sp)+,%d2		# restore d2
10850
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10851
	bra.b		fsgldiv_ovfl_dis
10852

10853
fsgldiv_unfl:
10854
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855

10856
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10857

10858
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
10859
	fmov.l		&0x0,%fpsr		# clear FPSR
10860

10861
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10862

10863
	fmov.l		%fpsr,%d1		# save status
10864
	fmov.l		&0x0,%fpcr		# clear FPCR
10865

10866
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10867

10868
	mov.b		FPCR_ENABLE(%a6),%d1
10869
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
10870
	bne.b		fsgldiv_unfl_ena	# yes
10871

10872
fsgldiv_unfl_dis:
10873
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
10874

10875
	lea		FP_SCR0(%a6),%a0	# pass: result addr
10876
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
10877
	bsr.l		unf_res4		# calculate default result
10878
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
10879
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
10880
	rts
10881

10882
#
10883
# UNFL is enabled.
10884
#
10885
fsgldiv_unfl_ena:
10886
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
10887

10888
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10889
	fmov.l		&0x0,%fpsr		# clear FPSR
10890

10891
	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10892

10893
	fmov.l		&0x0,%fpcr		# clear FPCR
10894

10895
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
10896
	mov.l		%d2,-(%sp)		# save d2
10897
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
10898
	mov.l		%d1,%d2			# make a copy
10899
	andi.l		&0x7fff,%d1		# strip sign
10900
	andi.w		&0x8000,%d2		# keep old sign
10901
	sub.l		%d0,%d1			# add scale factor
10902
	addi.l		&0x6000,%d1		# add bias
10903
	andi.w		&0x7fff,%d1		# clear top bit
10904
	or.w		%d2,%d1			# concat old sign, new exp
10905
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
10906
	mov.l		(%sp)+,%d2		# restore d2
10907
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
10908
	bra.b		fsgldiv_unfl_dis
10909

10910
#
10911
# the divide operation MAY underflow:
10912
#
10913
fsgldiv_may_unfl:
10914
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
10915

10916
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
10917
	fmov.l		&0x0,%fpsr		# clear FPSR
10918

10919
	fsgldiv.x	FP_SCR0(%a6),%fp0	# execute sgl divide
10920

10921
	fmov.l		%fpsr,%d1		# save status
10922
	fmov.l		&0x0,%fpcr		# clear FPCR
10923

10924
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
10925

10926
	fabs.x		%fp0,%fp1		# make a copy of result
10927
	fcmp.b		%fp1,&0x1		# is |result| > 1.b?
10928
	fbgt.w		fsgldiv_normal_exit	# no; no underflow occurred
10929
	fblt.w		fsgldiv_unfl		# yes; underflow occurred
10930

10931
#
10932
# we still don't know if underflow occurred. result is ~ equal to 1. but,
10933
# we don't know if the result was an underflow that rounded up to a 1
10934
# or a normalized number that rounded down to a 1. so, redo the entire
10935
# operation using RZ as the rounding mode to see what the pre-rounded
10936
# result is. this case should be relatively rare.
10937
#
10938
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into %fp1
10939

10940
	clr.l		%d1			# clear scratch register
10941
	ori.b		&rz_mode*0x10,%d1	# force RZ rnd mode
10942

10943
	fmov.l		%d1,%fpcr		# set FPCR
10944
	fmov.l		&0x0,%fpsr		# clear FPSR
10945

10946
	fsgldiv.x	FP_SCR0(%a6),%fp1	# execute sgl divide
10947

10948
	fmov.l		&0x0,%fpcr		# clear FPCR
10949
	fabs.x		%fp1			# make absolute value
10950
	fcmp.b		%fp1,&0x1		# is |result| < 1.b?
10951
	fbge.w		fsgldiv_normal_exit	# no; no underflow occurred
10952
	bra.w		fsgldiv_unfl		# yes; underflow occurred
10953

10954
############################################################################
10955

10956
#
10957
# Divide: inputs are not both normalized; what are they?
10958
#
10959
fsgldiv_not_norm:
10960
	mov.w		(tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961
	jmp		(tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962

10963
	swbeg		&48
10964
tbl_fsgldiv_op:
10965
	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / NORM
10966
	short		fsgldiv_inf_load	- tbl_fsgldiv_op # NORM / ZERO
10967
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # NORM / INF
10968
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # NORM / QNAN
10969
	short		fsgldiv_norm		- tbl_fsgldiv_op # NORM / DENORM
10970
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # NORM / SNAN
10971
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10972
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10973

10974
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / NORM
10975
	short		fsgldiv_res_operr	- tbl_fsgldiv_op # ZERO / ZERO
10976
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / INF
10977
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # ZERO / QNAN
10978
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # ZERO / DENORM
10979
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # ZERO / SNAN
10980
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10981
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10982

10983
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / NORM
10984
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / ZERO
10985
	short		fsgldiv_res_operr	- tbl_fsgldiv_op # INF / INF
10986
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # INF / QNAN
10987
	short		fsgldiv_inf_dst		- tbl_fsgldiv_op # INF / DENORM
10988
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # INF / SNAN
10989
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10990
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10991

10992
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / NORM
10993
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / ZERO
10994
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / INF
10995
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / QNAN
10996
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # QNAN / DENORM
10997
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # QNAN / SNAN
10998
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
10999
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11000

11001
	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / NORM
11002
	short		fsgldiv_inf_load	- tbl_fsgldiv_op # DENORM / ZERO
11003
	short		fsgldiv_zero_load	- tbl_fsgldiv_op # DENORM / INF
11004
	short		fsgldiv_res_qnan	- tbl_fsgldiv_op # DENORM / QNAN
11005
	short		fsgldiv_norm		- tbl_fsgldiv_op # DENORM / DENORM
11006
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # DENORM / SNAN
11007
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11008
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11009

11010
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / NORM
11011
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / ZERO
11012
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / INF
11013
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / QNAN
11014
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / DENORM
11015
	short		fsgldiv_res_snan	- tbl_fsgldiv_op # SNAN / SNAN
11016
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11017
	short		tbl_fsgldiv_op		- tbl_fsgldiv_op #
11018

11019
fsgldiv_res_qnan:
11020
	bra.l		res_qnan
11021
fsgldiv_res_snan:
11022
	bra.l		res_snan
11023
fsgldiv_res_operr:
11024
	bra.l		res_operr
11025
fsgldiv_inf_load:
11026
	bra.l		fdiv_inf_load
11027
fsgldiv_zero_load:
11028
	bra.l		fdiv_zero_load
11029
fsgldiv_inf_dst:
11030
	bra.l		fdiv_inf_dst
11031

11032
#########################################################################
11033
# XDEF ****************************************************************	#
11034
#	fadd(): emulates the fadd instruction				#
11035
#	fsadd(): emulates the fadd instruction				#
11036
#	fdadd(): emulates the fdadd instruction				#
11037
#									#
11038
# XREF ****************************************************************	#
11039
#	addsub_scaler2() - scale the operands so they won't take exc	#
11040
#	ovf_res() - return default overflow result			#
11041
#	unf_res() - return default underflow result			#
11042
#	res_qnan() - set QNAN result					#
11043
#	res_snan() - set SNAN result					#
11044
#	res_operr() - set OPERR result					#
11045
#	scale_to_zero_src() - set src operand exponent equal to zero	#
11046
#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11047
#									#
11048
# INPUT ***************************************************************	#
11049
#	a0 = pointer to extended precision source operand		#
11050
#	a1 = pointer to extended precision destination operand		#
11051
#									#
11052
# OUTPUT **************************************************************	#
11053
#	fp0 = result							#
11054
#	fp1 = EXOP (if exception occurred)				#
11055
#									#
11056
# ALGORITHM ***********************************************************	#
11057
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11058
# norms into extended, single, and double precision.			#
11059
#	Do addition after scaling exponents such that exception won't	#
11060
# occur. Then, check result exponent to see if exception would have	#
11061
# occurred. If so, return default result and maybe EXOP. Else, insert	#
11062
# the correct result exponent and return. Set FPSR bits as appropriate.	#
11063
#									#
11064
#########################################################################
11065

11066
	global		fsadd
11067
fsadd:
11068
	andi.b		&0x30,%d0		# clear rnd prec
11069
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11070
	bra.b		fadd
11071

11072
	global		fdadd
11073
fdadd:
11074
	andi.b		&0x30,%d0		# clear rnd prec
11075
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11076

11077
	global		fadd
11078
fadd:
11079
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11080

11081
	clr.w		%d1
11082
	mov.b		DTAG(%a6),%d1
11083
	lsl.b		&0x3,%d1
11084
	or.b		STAG(%a6),%d1		# combine src tags
11085

11086
	bne.w		fadd_not_norm		# optimize on non-norm input
11087

11088
#
11089
# ADD: norms and denorms
11090
#
11091
fadd_norm:
11092
	bsr.l		addsub_scaler2		# scale exponents
11093

11094
fadd_zero_entry:
11095
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11096

11097
	fmov.l		&0x0,%fpsr		# clear FPSR
11098
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11099

11100
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11101

11102
	fmov.l		&0x0,%fpcr		# clear FPCR
11103
	fmov.l		%fpsr,%d1		# fetch INEX2,N,Z
11104

11105
	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11106

11107
	fbeq.w		fadd_zero_exit		# if result is zero, end now
11108

11109
	mov.l		%d2,-(%sp)		# save d2
11110

11111
	fmovm.x		&0x01,-(%sp)		# save result to stack
11112

11113
	mov.w		2+L_SCR3(%a6),%d1
11114
	lsr.b		&0x6,%d1
11115

11116
	mov.w		(%sp),%d2		# fetch new sign, exp
11117
	andi.l		&0x7fff,%d2		# strip sign
11118
	sub.l		%d0,%d2			# add scale factor
11119

11120
	cmp.l		%d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121
	bge.b		fadd_ovfl		# yes
11122

11123
	cmp.l		%d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124
	blt.w		fadd_unfl		# yes
11125
	beq.w		fadd_may_unfl		# maybe; go find out
11126

11127
fadd_normal:
11128
	mov.w		(%sp),%d1
11129
	andi.w		&0x8000,%d1		# keep sign
11130
	or.w		%d2,%d1			# concat sign,new exp
11131
	mov.w		%d1,(%sp)		# insert new exponent
11132

11133
	fmovm.x		(%sp)+,&0x80		# return result in fp0
11134

11135
	mov.l		(%sp)+,%d2		# restore d2
11136
	rts
11137

11138
fadd_zero_exit:
11139
#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11140
	rts
11141

11142
tbl_fadd_ovfl:
11143
	long		0x7fff			# ext ovfl
11144
	long		0x407f			# sgl ovfl
11145
	long		0x43ff			# dbl ovfl
11146

11147
tbl_fadd_unfl:
11148
	long	        0x0000			# ext unfl
11149
	long		0x3f81			# sgl unfl
11150
	long		0x3c01			# dbl unfl
11151

11152
fadd_ovfl:
11153
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154

11155
	mov.b		FPCR_ENABLE(%a6),%d1
11156
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11157
	bne.b		fadd_ovfl_ena		# yes
11158

11159
	add.l		&0xc,%sp
11160
fadd_ovfl_dis:
11161
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11162
	sne		%d1			# set sign param accordingly
11163
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11164
	bsr.l		ovf_res			# calculate default result
11165
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11166
	fmovm.x		(%a0),&0x80		# return default result in fp0
11167
	mov.l		(%sp)+,%d2		# restore d2
11168
	rts
11169

11170
fadd_ovfl_ena:
11171
	mov.b		L_SCR3(%a6),%d1
11172
	andi.b		&0xc0,%d1		# is precision extended?
11173
	bne.b		fadd_ovfl_ena_sd	# no; prec = sgl or dbl
11174

11175
fadd_ovfl_ena_cont:
11176
	mov.w		(%sp),%d1
11177
	andi.w		&0x8000,%d1		# keep sign
11178
	subi.l		&0x6000,%d2		# add extra bias
11179
	andi.w		&0x7fff,%d2
11180
	or.w		%d2,%d1			# concat sign,new exp
11181
	mov.w		%d1,(%sp)		# insert new exponent
11182

11183
	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11184
	bra.b		fadd_ovfl_dis
11185

11186
fadd_ovfl_ena_sd:
11187
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11188

11189
	mov.l		L_SCR3(%a6),%d1
11190
	andi.b		&0x30,%d1		# keep rnd mode
11191
	fmov.l		%d1,%fpcr		# set FPCR
11192

11193
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11194

11195
	fmov.l		&0x0,%fpcr		# clear FPCR
11196

11197
	add.l		&0xc,%sp
11198
	fmovm.x		&0x01,-(%sp)
11199
	bra.b		fadd_ovfl_ena_cont
11200

11201
fadd_unfl:
11202
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203

11204
	add.l		&0xc,%sp
11205

11206
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11207

11208
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11209
	fmov.l		&0x0,%fpsr		# clear FPSR
11210

11211
	fadd.x		FP_SCR0(%a6),%fp0	# execute add
11212

11213
	fmov.l		&0x0,%fpcr		# clear FPCR
11214
	fmov.l		%fpsr,%d1		# save status
11215

11216
	or.l		%d1,USER_FPSR(%a6)	# save INEX,N
11217

11218
	mov.b		FPCR_ENABLE(%a6),%d1
11219
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11220
	bne.b		fadd_unfl_ena		# yes
11221

11222
fadd_unfl_dis:
11223
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11224

11225
	lea		FP_SCR0(%a6),%a0	# pass: result addr
11226
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11227
	bsr.l		unf_res			# calculate default result
11228
	or.b		%d0,FPSR_CC(%a6)	# 'Z' bit may have been set
11229
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11230
	mov.l		(%sp)+,%d2		# restore d2
11231
	rts
11232

11233
fadd_unfl_ena:
11234
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op
11235

11236
	mov.l		L_SCR3(%a6),%d1
11237
	andi.b		&0xc0,%d1		# is precision extended?
11238
	bne.b		fadd_unfl_ena_sd	# no; sgl or dbl
11239

11240
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11241

11242
fadd_unfl_ena_cont:
11243
	fmov.l		&0x0,%fpsr		# clear FPSR
11244

11245
	fadd.x		FP_SCR0(%a6),%fp1	# execute multiply
11246

11247
	fmov.l		&0x0,%fpcr		# clear FPCR
11248

11249
	fmovm.x		&0x40,FP_SCR0(%a6)	# save result to stack
11250
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11251
	mov.l		%d1,%d2			# make a copy
11252
	andi.l		&0x7fff,%d1		# strip sign
11253
	andi.w		&0x8000,%d2		# keep old sign
11254
	sub.l		%d0,%d1			# add scale factor
11255
	addi.l		&0x6000,%d1		# add new bias
11256
	andi.w		&0x7fff,%d1		# clear top bit
11257
	or.w		%d2,%d1			# concat sign,new exp
11258
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11259
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11260
	bra.w		fadd_unfl_dis
11261

11262
fadd_unfl_ena_sd:
11263
	mov.l		L_SCR3(%a6),%d1
11264
	andi.b		&0x30,%d1		# use only rnd mode
11265
	fmov.l		%d1,%fpcr		# set FPCR
11266

11267
	bra.b		fadd_unfl_ena_cont
11268

11269
#
11270
# result is equal to the smallest normalized number in the selected precision
11271
# if the precision is extended, this result could not have come from an
11272
# underflow that rounded up.
11273
#
11274
fadd_may_unfl:
11275
	mov.l		L_SCR3(%a6),%d1
11276
	andi.b		&0xc0,%d1
11277
	beq.w		fadd_normal		# yes; no underflow occurred
11278

11279
	mov.l		0x4(%sp),%d1		# extract hi(man)
11280
	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11281
	bne.w		fadd_normal		# no; no underflow occurred
11282

11283
	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11284
	bne.w		fadd_normal		# no; no underflow occurred
11285

11286
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287
	beq.w		fadd_normal		# no; no underflow occurred
11288

11289
#
11290
# ok, so now the result has a exponent equal to the smallest normalized
11291
# exponent for the selected precision. also, the mantissa is equal to
11292
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293
# g,r,s.
11294
# now, we must determine whether the pre-rounded result was an underflow
11295
# rounded "up" or a normalized number rounded "down".
11296
# so, we do this be re-executing the add using RZ as the rounding mode and
11297
# seeing if the new result is smaller or equal to the current result.
11298
#
11299
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11300

11301
	mov.l		L_SCR3(%a6),%d1
11302
	andi.b		&0xc0,%d1		# keep rnd prec
11303
	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11304
	fmov.l		%d1,%fpcr		# set FPCR
11305
	fmov.l		&0x0,%fpsr		# clear FPSR
11306

11307
	fadd.x		FP_SCR0(%a6),%fp1	# execute add
11308

11309
	fmov.l		&0x0,%fpcr		# clear FPCR
11310

11311
	fabs.x		%fp0			# compare absolute values
11312
	fabs.x		%fp1
11313
	fcmp.x		%fp0,%fp1		# is first result > second?
11314

11315
	fbgt.w		fadd_unfl		# yes; it's an underflow
11316
	bra.w		fadd_normal		# no; it's not an underflow
11317

11318
##########################################################################
11319

11320
#
11321
# Add: inputs are not both normalized; what are they?
11322
#
11323
fadd_not_norm:
11324
	mov.w		(tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325
	jmp		(tbl_fadd_op.b,%pc,%d1.w*1)
11326

11327
	swbeg		&48
11328
tbl_fadd_op:
11329
	short		fadd_norm	- tbl_fadd_op # NORM + NORM
11330
	short		fadd_zero_src	- tbl_fadd_op # NORM + ZERO
11331
	short		fadd_inf_src	- tbl_fadd_op # NORM + INF
11332
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11333
	short		fadd_norm	- tbl_fadd_op # NORM + DENORM
11334
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11335
	short		tbl_fadd_op	- tbl_fadd_op #
11336
	short		tbl_fadd_op	- tbl_fadd_op #
11337

11338
	short		fadd_zero_dst	- tbl_fadd_op # ZERO + NORM
11339
	short		fadd_zero_2	- tbl_fadd_op # ZERO + ZERO
11340
	short		fadd_inf_src	- tbl_fadd_op # ZERO + INF
11341
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11342
	short		fadd_zero_dst	- tbl_fadd_op # ZERO + DENORM
11343
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11344
	short		tbl_fadd_op	- tbl_fadd_op #
11345
	short		tbl_fadd_op	- tbl_fadd_op #
11346

11347
	short		fadd_inf_dst	- tbl_fadd_op # INF + NORM
11348
	short		fadd_inf_dst	- tbl_fadd_op # INF + ZERO
11349
	short		fadd_inf_2	- tbl_fadd_op # INF + INF
11350
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11351
	short		fadd_inf_dst	- tbl_fadd_op # INF + DENORM
11352
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11353
	short		tbl_fadd_op	- tbl_fadd_op #
11354
	short		tbl_fadd_op	- tbl_fadd_op #
11355

11356
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + NORM
11357
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + ZERO
11358
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + INF
11359
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + QNAN
11360
	short		fadd_res_qnan	- tbl_fadd_op # QNAN + DENORM
11361
	short		fadd_res_snan	- tbl_fadd_op # QNAN + SNAN
11362
	short		tbl_fadd_op	- tbl_fadd_op #
11363
	short		tbl_fadd_op	- tbl_fadd_op #
11364

11365
	short		fadd_norm	- tbl_fadd_op # DENORM + NORM
11366
	short		fadd_zero_src	- tbl_fadd_op # DENORM + ZERO
11367
	short		fadd_inf_src	- tbl_fadd_op # DENORM + INF
11368
	short		fadd_res_qnan	- tbl_fadd_op # NORM + QNAN
11369
	short		fadd_norm	- tbl_fadd_op # DENORM + DENORM
11370
	short		fadd_res_snan	- tbl_fadd_op # NORM + SNAN
11371
	short		tbl_fadd_op	- tbl_fadd_op #
11372
	short		tbl_fadd_op	- tbl_fadd_op #
11373

11374
	short		fadd_res_snan	- tbl_fadd_op # SNAN + NORM
11375
	short		fadd_res_snan	- tbl_fadd_op # SNAN + ZERO
11376
	short		fadd_res_snan	- tbl_fadd_op # SNAN + INF
11377
	short		fadd_res_snan	- tbl_fadd_op # SNAN + QNAN
11378
	short		fadd_res_snan	- tbl_fadd_op # SNAN + DENORM
11379
	short		fadd_res_snan	- tbl_fadd_op # SNAN + SNAN
11380
	short		tbl_fadd_op	- tbl_fadd_op #
11381
	short		tbl_fadd_op	- tbl_fadd_op #
11382

11383
fadd_res_qnan:
11384
	bra.l		res_qnan
11385
fadd_res_snan:
11386
	bra.l		res_snan
11387

11388
#
11389
# both operands are ZEROes
11390
#
11391
fadd_zero_2:
11392
	mov.b		SRC_EX(%a0),%d0		# are the signs opposite
11393
	mov.b		DST_EX(%a1),%d1
11394
	eor.b		%d0,%d1
11395
	bmi.w		fadd_zero_2_chk_rm	# weed out (-ZERO)+(+ZERO)
11396

11397
# the signs are the same. so determine whether they are positive or negative
11398
# and return the appropriately signed zero.
11399
	tst.b		%d0			# are ZEROes positive or negative?
11400
	bmi.b		fadd_zero_rm		# negative
11401
	fmov.s		&0x00000000,%fp0	# return +ZERO
11402
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11403
	rts
11404

11405
#
11406
# the ZEROes have opposite signs:
11407
# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408
# - -ZERO is returned in the case of RM.
11409
#
11410
fadd_zero_2_chk_rm:
11411
	mov.b		3+L_SCR3(%a6),%d1
11412
	andi.b		&0x30,%d1		# extract rnd mode
11413
	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode == RM?
11414
	beq.b		fadd_zero_rm		# yes
11415
	fmov.s		&0x00000000,%fp0	# return +ZERO
11416
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11417
	rts
11418

11419
fadd_zero_rm:
11420
	fmov.s		&0x80000000,%fp0	# return -ZERO
11421
	mov.b		&neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422
	rts
11423

11424
#
11425
# one operand is a ZERO and the other is a DENORM or NORM. scale
11426
# the DENORM or NORM and jump to the regular fadd routine.
11427
#
11428
fadd_zero_dst:
11429
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11430
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11431
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11432
	bsr.l		scale_to_zero_src	# scale the operand
11433
	clr.w		FP_SCR1_EX(%a6)
11434
	clr.l		FP_SCR1_HI(%a6)
11435
	clr.l		FP_SCR1_LO(%a6)
11436
	bra.w		fadd_zero_entry		# go execute fadd
11437

11438
fadd_zero_src:
11439
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11440
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11441
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11442
	bsr.l		scale_to_zero_dst	# scale the operand
11443
	clr.w		FP_SCR0_EX(%a6)
11444
	clr.l		FP_SCR0_HI(%a6)
11445
	clr.l		FP_SCR0_LO(%a6)
11446
	bra.w		fadd_zero_entry		# go execute fadd
11447

11448
#
11449
# both operands are INFs. an OPERR will result if the INFs have
11450
# different signs. else, an INF of the same sign is returned
11451
#
11452
fadd_inf_2:
11453
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11454
	mov.b		DST_EX(%a1),%d1
11455
	eor.b		%d1,%d0
11456
	bmi.l		res_operr		# weed out (-INF)+(+INF)
11457

11458
# ok, so it's not an OPERR. but, we do have to remember to return the
11459
# src INF since that's where the 881/882 gets the j-bit from...
11460

11461
#
11462
# operands are INF and one of {ZERO, INF, DENORM, NORM}
11463
#
11464
fadd_inf_src:
11465
	fmovm.x		SRC(%a0),&0x80		# return src INF
11466
	tst.b		SRC_EX(%a0)		# is INF positive?
11467
	bpl.b		fadd_inf_done		# yes; we're done
11468
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469
	rts
11470

11471
#
11472
# operands are INF and one of {ZERO, INF, DENORM, NORM}
11473
#
11474
fadd_inf_dst:
11475
	fmovm.x		DST(%a1),&0x80		# return dst INF
11476
	tst.b		DST_EX(%a1)		# is INF positive?
11477
	bpl.b		fadd_inf_done		# yes; we're done
11478
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479
	rts
11480

11481
fadd_inf_done:
11482
	mov.b		&inf_bmask,FPSR_CC(%a6) # set INF
11483
	rts
11484

11485
#########################################################################
11486
# XDEF ****************************************************************	#
11487
#	fsub(): emulates the fsub instruction				#
11488
#	fssub(): emulates the fssub instruction				#
11489
#	fdsub(): emulates the fdsub instruction				#
11490
#									#
11491
# XREF ****************************************************************	#
11492
#	addsub_scaler2() - scale the operands so they won't take exc	#
11493
#	ovf_res() - return default overflow result			#
11494
#	unf_res() - return default underflow result			#
11495
#	res_qnan() - set QNAN result					#
11496
#	res_snan() - set SNAN result					#
11497
#	res_operr() - set OPERR result					#
11498
#	scale_to_zero_src() - set src operand exponent equal to zero	#
11499
#	scale_to_zero_dst() - set dst operand exponent equal to zero	#
11500
#									#
11501
# INPUT ***************************************************************	#
11502
#	a0 = pointer to extended precision source operand		#
11503
#	a1 = pointer to extended precision destination operand		#
11504
#									#
11505
# OUTPUT **************************************************************	#
11506
#	fp0 = result							#
11507
#	fp1 = EXOP (if exception occurred)				#
11508
#									#
11509
# ALGORITHM ***********************************************************	#
11510
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11511
# norms into extended, single, and double precision.			#
11512
#	Do subtraction after scaling exponents such that exception won't#
11513
# occur. Then, check result exponent to see if exception would have	#
11514
# occurred. If so, return default result and maybe EXOP. Else, insert	#
11515
# the correct result exponent and return. Set FPSR bits as appropriate.	#
11516
#									#
11517
#########################################################################
11518

11519
	global		fssub
11520
fssub:
11521
	andi.b		&0x30,%d0		# clear rnd prec
11522
	ori.b		&s_mode*0x10,%d0	# insert sgl prec
11523
	bra.b		fsub
11524

11525
	global		fdsub
11526
fdsub:
11527
	andi.b		&0x30,%d0		# clear rnd prec
11528
	ori.b		&d_mode*0x10,%d0	# insert dbl prec
11529

11530
	global		fsub
11531
fsub:
11532
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11533

11534
	clr.w		%d1
11535
	mov.b		DTAG(%a6),%d1
11536
	lsl.b		&0x3,%d1
11537
	or.b		STAG(%a6),%d1		# combine src tags
11538

11539
	bne.w		fsub_not_norm		# optimize on non-norm input
11540

11541
#
11542
# SUB: norms and denorms
11543
#
11544
fsub_norm:
11545
	bsr.l		addsub_scaler2		# scale exponents
11546

11547
fsub_zero_entry:
11548
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11549

11550
	fmov.l		&0x0,%fpsr		# clear FPSR
11551
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11552

11553
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11554

11555
	fmov.l		&0x0,%fpcr		# clear FPCR
11556
	fmov.l		%fpsr,%d1		# fetch INEX2, N, Z
11557

11558
	or.l		%d1,USER_FPSR(%a6)	# save exc and ccode bits
11559

11560
	fbeq.w		fsub_zero_exit		# if result zero, end now
11561

11562
	mov.l		%d2,-(%sp)		# save d2
11563

11564
	fmovm.x		&0x01,-(%sp)		# save result to stack
11565

11566
	mov.w		2+L_SCR3(%a6),%d1
11567
	lsr.b		&0x6,%d1
11568

11569
	mov.w		(%sp),%d2		# fetch new exponent
11570
	andi.l		&0x7fff,%d2		# strip sign
11571
	sub.l		%d0,%d2			# add scale factor
11572

11573
	cmp.l		%d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574
	bge.b		fsub_ovfl		# yes
11575

11576
	cmp.l		%d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577
	blt.w		fsub_unfl		# yes
11578
	beq.w		fsub_may_unfl		# maybe; go find out
11579

11580
fsub_normal:
11581
	mov.w		(%sp),%d1
11582
	andi.w		&0x8000,%d1		# keep sign
11583
	or.w		%d2,%d1			# insert new exponent
11584
	mov.w		%d1,(%sp)		# insert new exponent
11585

11586
	fmovm.x		(%sp)+,&0x80		# return result in fp0
11587

11588
	mov.l		(%sp)+,%d2		# restore d2
11589
	rts
11590

11591
fsub_zero_exit:
11592
#	fmov.s		&0x00000000,%fp0	# return zero in fp0
11593
	rts
11594

11595
tbl_fsub_ovfl:
11596
	long		0x7fff			# ext ovfl
11597
	long		0x407f			# sgl ovfl
11598
	long		0x43ff			# dbl ovfl
11599

11600
tbl_fsub_unfl:
11601
	long	        0x0000			# ext unfl
11602
	long		0x3f81			# sgl unfl
11603
	long		0x3c01			# dbl unfl
11604

11605
fsub_ovfl:
11606
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607

11608
	mov.b		FPCR_ENABLE(%a6),%d1
11609
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
11610
	bne.b		fsub_ovfl_ena		# yes
11611

11612
	add.l		&0xc,%sp
11613
fsub_ovfl_dis:
11614
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
11615
	sne		%d1			# set sign param accordingly
11616
	mov.l		L_SCR3(%a6),%d0		# pass prec:rnd
11617
	bsr.l		ovf_res			# calculate default result
11618
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
11619
	fmovm.x		(%a0),&0x80		# return default result in fp0
11620
	mov.l		(%sp)+,%d2		# restore d2
11621
	rts
11622

11623
fsub_ovfl_ena:
11624
	mov.b		L_SCR3(%a6),%d1
11625
	andi.b		&0xc0,%d1		# is precision extended?
11626
	bne.b		fsub_ovfl_ena_sd	# no
11627

11628
fsub_ovfl_ena_cont:
11629
	mov.w		(%sp),%d1		# fetch {sgn,exp}
11630
	andi.w		&0x8000,%d1		# keep sign
11631
	subi.l		&0x6000,%d2		# subtract new bias
11632
	andi.w		&0x7fff,%d2		# clear top bit
11633
	or.w		%d2,%d1			# concat sign,exp
11634
	mov.w		%d1,(%sp)		# insert new exponent
11635

11636
	fmovm.x		(%sp)+,&0x40		# return EXOP in fp1
11637
	bra.b		fsub_ovfl_dis
11638

11639
fsub_ovfl_ena_sd:
11640
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11641

11642
	mov.l		L_SCR3(%a6),%d1
11643
	andi.b		&0x30,%d1		# clear rnd prec
11644
	fmov.l		%d1,%fpcr		# set FPCR
11645

11646
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11647

11648
	fmov.l		&0x0,%fpcr		# clear FPCR
11649

11650
	add.l		&0xc,%sp
11651
	fmovm.x		&0x01,-(%sp)
11652
	bra.b		fsub_ovfl_ena_cont
11653

11654
fsub_unfl:
11655
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656

11657
	add.l		&0xc,%sp
11658

11659
	fmovm.x		FP_SCR1(%a6),&0x80	# load dst op
11660

11661
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
11662
	fmov.l		&0x0,%fpsr		# clear FPSR
11663

11664
	fsub.x		FP_SCR0(%a6),%fp0	# execute subtract
11665

11666
	fmov.l		&0x0,%fpcr		# clear FPCR
11667
	fmov.l		%fpsr,%d1		# save status
11668

11669
	or.l		%d1,USER_FPSR(%a6)
11670

11671
	mov.b		FPCR_ENABLE(%a6),%d1
11672
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
11673
	bne.b		fsub_unfl_ena		# yes
11674

11675
fsub_unfl_dis:
11676
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
11677

11678
	lea		FP_SCR0(%a6),%a0	# pass: result addr
11679
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
11680
	bsr.l		unf_res			# calculate default result
11681
	or.b		%d0,FPSR_CC(%a6)	# 'Z' may have been set
11682
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
11683
	mov.l		(%sp)+,%d2		# restore d2
11684
	rts
11685

11686
fsub_unfl_ena:
11687
	fmovm.x		FP_SCR1(%a6),&0x40
11688

11689
	mov.l		L_SCR3(%a6),%d1
11690
	andi.b		&0xc0,%d1		# is precision extended?
11691
	bne.b		fsub_unfl_ena_sd	# no
11692

11693
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11694

11695
fsub_unfl_ena_cont:
11696
	fmov.l		&0x0,%fpsr		# clear FPSR
11697

11698
	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11699

11700
	fmov.l		&0x0,%fpcr		# clear FPCR
11701

11702
	fmovm.x		&0x40,FP_SCR0(%a6)	# store result to stack
11703
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
11704
	mov.l		%d1,%d2			# make a copy
11705
	andi.l		&0x7fff,%d1		# strip sign
11706
	andi.w		&0x8000,%d2		# keep old sign
11707
	sub.l		%d0,%d1			# add scale factor
11708
	addi.l		&0x6000,%d1		# subtract new bias
11709
	andi.w		&0x7fff,%d1		# clear top bit
11710
	or.w		%d2,%d1			# concat sgn,exp
11711
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
11712
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
11713
	bra.w		fsub_unfl_dis
11714

11715
fsub_unfl_ena_sd:
11716
	mov.l		L_SCR3(%a6),%d1
11717
	andi.b		&0x30,%d1		# clear rnd prec
11718
	fmov.l		%d1,%fpcr		# set FPCR
11719

11720
	bra.b		fsub_unfl_ena_cont
11721

11722
#
11723
# result is equal to the smallest normalized number in the selected precision
11724
# if the precision is extended, this result could not have come from an
11725
# underflow that rounded up.
11726
#
11727
fsub_may_unfl:
11728
	mov.l		L_SCR3(%a6),%d1
11729
	andi.b		&0xc0,%d1		# fetch rnd prec
11730
	beq.w		fsub_normal		# yes; no underflow occurred
11731

11732
	mov.l		0x4(%sp),%d1
11733
	cmpi.l		%d1,&0x80000000		# is hi(man) = 0x80000000?
11734
	bne.w		fsub_normal		# no; no underflow occurred
11735

11736
	tst.l		0x8(%sp)		# is lo(man) = 0x0?
11737
	bne.w		fsub_normal		# no; no underflow occurred
11738

11739
	btst		&inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740
	beq.w		fsub_normal		# no; no underflow occurred
11741

11742
#
11743
# ok, so now the result has a exponent equal to the smallest normalized
11744
# exponent for the selected precision. also, the mantissa is equal to
11745
# 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746
# g,r,s.
11747
# now, we must determine whether the pre-rounded result was an underflow
11748
# rounded "up" or a normalized number rounded "down".
11749
# so, we do this be re-executing the add using RZ as the rounding mode and
11750
# seeing if the new result is smaller or equal to the current result.
11751
#
11752
	fmovm.x		FP_SCR1(%a6),&0x40	# load dst op into fp1
11753

11754
	mov.l		L_SCR3(%a6),%d1
11755
	andi.b		&0xc0,%d1		# keep rnd prec
11756
	ori.b		&rz_mode*0x10,%d1	# insert rnd mode
11757
	fmov.l		%d1,%fpcr		# set FPCR
11758
	fmov.l		&0x0,%fpsr		# clear FPSR
11759

11760
	fsub.x		FP_SCR0(%a6),%fp1	# execute subtract
11761

11762
	fmov.l		&0x0,%fpcr		# clear FPCR
11763

11764
	fabs.x		%fp0			# compare absolute values
11765
	fabs.x		%fp1
11766
	fcmp.x		%fp0,%fp1		# is first result > second?
11767

11768
	fbgt.w		fsub_unfl		# yes; it's an underflow
11769
	bra.w		fsub_normal		# no; it's not an underflow
11770

11771
##########################################################################
11772

11773
#
11774
# Sub: inputs are not both normalized; what are they?
11775
#
11776
fsub_not_norm:
11777
	mov.w		(tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778
	jmp		(tbl_fsub_op.b,%pc,%d1.w*1)
11779

11780
	swbeg		&48
11781
tbl_fsub_op:
11782
	short		fsub_norm	- tbl_fsub_op # NORM - NORM
11783
	short		fsub_zero_src	- tbl_fsub_op # NORM - ZERO
11784
	short		fsub_inf_src	- tbl_fsub_op # NORM - INF
11785
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11786
	short		fsub_norm	- tbl_fsub_op # NORM - DENORM
11787
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11788
	short		tbl_fsub_op	- tbl_fsub_op #
11789
	short		tbl_fsub_op	- tbl_fsub_op #
11790

11791
	short		fsub_zero_dst	- tbl_fsub_op # ZERO - NORM
11792
	short		fsub_zero_2	- tbl_fsub_op # ZERO - ZERO
11793
	short		fsub_inf_src	- tbl_fsub_op # ZERO - INF
11794
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11795
	short		fsub_zero_dst	- tbl_fsub_op # ZERO - DENORM
11796
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11797
	short		tbl_fsub_op	- tbl_fsub_op #
11798
	short		tbl_fsub_op	- tbl_fsub_op #
11799

11800
	short		fsub_inf_dst	- tbl_fsub_op # INF - NORM
11801
	short		fsub_inf_dst	- tbl_fsub_op # INF - ZERO
11802
	short		fsub_inf_2	- tbl_fsub_op # INF - INF
11803
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11804
	short		fsub_inf_dst	- tbl_fsub_op # INF - DENORM
11805
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11806
	short		tbl_fsub_op	- tbl_fsub_op #
11807
	short		tbl_fsub_op	- tbl_fsub_op #
11808

11809
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - NORM
11810
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - ZERO
11811
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - INF
11812
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - QNAN
11813
	short		fsub_res_qnan	- tbl_fsub_op # QNAN - DENORM
11814
	short		fsub_res_snan	- tbl_fsub_op # QNAN - SNAN
11815
	short		tbl_fsub_op	- tbl_fsub_op #
11816
	short		tbl_fsub_op	- tbl_fsub_op #
11817

11818
	short		fsub_norm	- tbl_fsub_op # DENORM - NORM
11819
	short		fsub_zero_src	- tbl_fsub_op # DENORM - ZERO
11820
	short		fsub_inf_src	- tbl_fsub_op # DENORM - INF
11821
	short		fsub_res_qnan	- tbl_fsub_op # NORM - QNAN
11822
	short		fsub_norm	- tbl_fsub_op # DENORM - DENORM
11823
	short		fsub_res_snan	- tbl_fsub_op # NORM - SNAN
11824
	short		tbl_fsub_op	- tbl_fsub_op #
11825
	short		tbl_fsub_op	- tbl_fsub_op #
11826

11827
	short		fsub_res_snan	- tbl_fsub_op # SNAN - NORM
11828
	short		fsub_res_snan	- tbl_fsub_op # SNAN - ZERO
11829
	short		fsub_res_snan	- tbl_fsub_op # SNAN - INF
11830
	short		fsub_res_snan	- tbl_fsub_op # SNAN - QNAN
11831
	short		fsub_res_snan	- tbl_fsub_op # SNAN - DENORM
11832
	short		fsub_res_snan	- tbl_fsub_op # SNAN - SNAN
11833
	short		tbl_fsub_op	- tbl_fsub_op #
11834
	short		tbl_fsub_op	- tbl_fsub_op #
11835

11836
fsub_res_qnan:
11837
	bra.l		res_qnan
11838
fsub_res_snan:
11839
	bra.l		res_snan
11840

11841
#
11842
# both operands are ZEROes
11843
#
11844
fsub_zero_2:
11845
	mov.b		SRC_EX(%a0),%d0
11846
	mov.b		DST_EX(%a1),%d1
11847
	eor.b		%d1,%d0
11848
	bpl.b		fsub_zero_2_chk_rm
11849

11850
# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851
	tst.b		%d0			# is dst negative?
11852
	bmi.b		fsub_zero_2_rm		# yes
11853
	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11854
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11855
	rts
11856

11857
#
11858
# the ZEROes have the same signs:
11859
# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860
# - -ZERO is returned in the case of RM.
11861
#
11862
fsub_zero_2_chk_rm:
11863
	mov.b		3+L_SCR3(%a6),%d1
11864
	andi.b		&0x30,%d1		# extract rnd mode
11865
	cmpi.b		%d1,&rm_mode*0x10	# is rnd mode = RM?
11866
	beq.b		fsub_zero_2_rm		# yes
11867
	fmov.s		&0x00000000,%fp0	# no; return +ZERO
11868
	mov.b		&z_bmask,FPSR_CC(%a6)	# set Z
11869
	rts
11870

11871
fsub_zero_2_rm:
11872
	fmov.s		&0x80000000,%fp0	# return -ZERO
11873
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set Z/NEG
11874
	rts
11875

11876
#
11877
# one operand is a ZERO and the other is a DENORM or a NORM.
11878
# scale the DENORM or NORM and jump to the regular fsub routine.
11879
#
11880
fsub_zero_dst:
11881
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
11882
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
11883
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
11884
	bsr.l		scale_to_zero_src	# scale the operand
11885
	clr.w		FP_SCR1_EX(%a6)
11886
	clr.l		FP_SCR1_HI(%a6)
11887
	clr.l		FP_SCR1_LO(%a6)
11888
	bra.w		fsub_zero_entry		# go execute fsub
11889

11890
fsub_zero_src:
11891
	mov.w		DST_EX(%a1),FP_SCR1_EX(%a6)
11892
	mov.l		DST_HI(%a1),FP_SCR1_HI(%a6)
11893
	mov.l		DST_LO(%a1),FP_SCR1_LO(%a6)
11894
	bsr.l		scale_to_zero_dst	# scale the operand
11895
	clr.w		FP_SCR0_EX(%a6)
11896
	clr.l		FP_SCR0_HI(%a6)
11897
	clr.l		FP_SCR0_LO(%a6)
11898
	bra.w		fsub_zero_entry		# go execute fsub
11899

11900
#
11901
# both operands are INFs. an OPERR will result if the INFs have the
11902
# same signs. else,
11903
#
11904
fsub_inf_2:
11905
	mov.b		SRC_EX(%a0),%d0		# exclusive or the signs
11906
	mov.b		DST_EX(%a1),%d1
11907
	eor.b		%d1,%d0
11908
	bpl.l		res_operr		# weed out (-INF)+(+INF)
11909

11910
# ok, so it's not an OPERR. but we do have to remember to return
11911
# the src INF since that's where the 881/882 gets the j-bit.
11912

11913
fsub_inf_src:
11914
	fmovm.x		SRC(%a0),&0x80		# return src INF
11915
	fneg.x		%fp0			# invert sign
11916
	fbge.w		fsub_inf_done		# sign is now positive
11917
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918
	rts
11919

11920
fsub_inf_dst:
11921
	fmovm.x		DST(%a1),&0x80		# return dst INF
11922
	tst.b		DST_EX(%a1)		# is INF negative?
11923
	bpl.b		fsub_inf_done		# no
11924
	mov.b		&neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925
	rts
11926

11927
fsub_inf_done:
11928
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set INF
11929
	rts
11930

11931
#########################################################################
11932
# XDEF ****************************************************************	#
11933
#	fsqrt(): emulates the fsqrt instruction				#
11934
#	fssqrt(): emulates the fssqrt instruction			#
11935
#	fdsqrt(): emulates the fdsqrt instruction			#
11936
#									#
11937
# XREF ****************************************************************	#
11938
#	scale_sqrt() - scale the source operand				#
11939
#	unf_res() - return default underflow result			#
11940
#	ovf_res() - return default overflow result			#
11941
#	res_qnan_1op() - return QNAN result				#
11942
#	res_snan_1op() - return SNAN result				#
11943
#									#
11944
# INPUT ***************************************************************	#
11945
#	a0 = pointer to extended precision source operand		#
11946
#	d0  rnd prec,mode						#
11947
#									#
11948
# OUTPUT **************************************************************	#
11949
#	fp0 = result							#
11950
#	fp1 = EXOP (if exception occurred)				#
11951
#									#
11952
# ALGORITHM ***********************************************************	#
11953
#	Handle NANs, infinities, and zeroes as special cases. Divide	#
11954
# norms/denorms into ext/sgl/dbl precision.				#
11955
#	For norms/denorms, scale the exponents such that a sqrt		#
11956
# instruction won't cause an exception. Use the regular fsqrt to	#
11957
# compute a result. Check if the regular operands would have taken	#
11958
# an exception. If so, return the default overflow/underflow result	#
11959
# and return the EXOP if exceptions are enabled. Else, scale the	#
11960
# result operand to the proper exponent.				#
11961
#									#
11962
#########################################################################
11963

11964
	global		fssqrt
11965
fssqrt:
11966
	andi.b		&0x30,%d0		# clear rnd prec
11967
	ori.b		&s_mode*0x10,%d0	# insert sgl precision
11968
	bra.b		fsqrt
11969

11970
	global		fdsqrt
11971
fdsqrt:
11972
	andi.b		&0x30,%d0		# clear rnd prec
11973
	ori.b		&d_mode*0x10,%d0	# insert dbl precision
11974

11975
	global		fsqrt
11976
fsqrt:
11977
	mov.l		%d0,L_SCR3(%a6)		# store rnd info
11978
	clr.w		%d1
11979
	mov.b		STAG(%a6),%d1
11980
	bne.w		fsqrt_not_norm		# optimize on non-norm input
11981

11982
#
11983
# SQUARE ROOT: norms and denorms ONLY!
11984
#
11985
fsqrt_norm:
11986
	tst.b		SRC_EX(%a0)		# is operand negative?
11987
	bmi.l		res_operr		# yes
11988

11989
	andi.b		&0xc0,%d0		# is precision extended?
11990
	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
11991

11992
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
11993
	fmov.l		&0x0,%fpsr		# clear FPSR
11994

11995
	fsqrt.x		(%a0),%fp0		# execute square root
11996

11997
	fmov.l		%fpsr,%d1
11998
	or.l		%d1,USER_FPSR(%a6)	# set N,INEX
11999

12000
	rts
12001

12002
fsqrt_denorm:
12003
	tst.b		SRC_EX(%a0)		# is operand negative?
12004
	bmi.l		res_operr		# yes
12005

12006
	andi.b		&0xc0,%d0		# is precision extended?
12007
	bne.b		fsqrt_not_ext		# no; go handle sgl or dbl
12008

12009
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12010
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12011
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12012

12013
	bsr.l		scale_sqrt		# calculate scale factor
12014

12015
	bra.w		fsqrt_sd_normal
12016

12017
#
12018
# operand is either single or double
12019
#
12020
fsqrt_not_ext:
12021
	cmpi.b		%d0,&s_mode*0x10	# separate sgl/dbl prec
12022
	bne.w		fsqrt_dbl
12023

12024
#
12025
# operand is to be rounded to single precision
12026
#
12027
fsqrt_sgl:
12028
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12029
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12030
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12031

12032
	bsr.l		scale_sqrt		# calculate scale factor
12033

12034
	cmpi.l		%d0,&0x3fff-0x3f81	# will move in underflow?
12035
	beq.w		fsqrt_sd_may_unfl
12036
	bgt.w		fsqrt_sd_unfl		# yes; go handle underflow
12037
	cmpi.l		%d0,&0x3fff-0x407f	# will move in overflow?
12038
	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12039
	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12040

12041
#
12042
# operand will NOT overflow or underflow when moved in to the fp reg file
12043
#
12044
fsqrt_sd_normal:
12045
	fmov.l		&0x0,%fpsr		# clear FPSR
12046
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12047

12048
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12049

12050
	fmov.l		%fpsr,%d1		# save FPSR
12051
	fmov.l		&0x0,%fpcr		# clear FPCR
12052

12053
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12054

12055
fsqrt_sd_normal_exit:
12056
	mov.l		%d2,-(%sp)		# save d2
12057
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12058
	mov.w		FP_SCR0_EX(%a6),%d1	# load sgn,exp
12059
	mov.l		%d1,%d2			# make a copy
12060
	andi.l		&0x7fff,%d1		# strip sign
12061
	sub.l		%d0,%d1			# add scale factor
12062
	andi.w		&0x8000,%d2		# keep old sign
12063
	or.w		%d1,%d2			# concat old sign,new exp
12064
	mov.w		%d2,FP_SCR0_EX(%a6)	# insert new exponent
12065
	mov.l		(%sp)+,%d2		# restore d2
12066
	fmovm.x		FP_SCR0(%a6),&0x80	# return result in fp0
12067
	rts
12068

12069
#
12070
# operand is to be rounded to double precision
12071
#
12072
fsqrt_dbl:
12073
	mov.w		SRC_EX(%a0),FP_SCR0_EX(%a6)
12074
	mov.l		SRC_HI(%a0),FP_SCR0_HI(%a6)
12075
	mov.l		SRC_LO(%a0),FP_SCR0_LO(%a6)
12076

12077
	bsr.l		scale_sqrt		# calculate scale factor
12078

12079
	cmpi.l		%d0,&0x3fff-0x3c01	# will move in underflow?
12080
	beq.w		fsqrt_sd_may_unfl
12081
	bgt.b		fsqrt_sd_unfl		# yes; go handle underflow
12082
	cmpi.l		%d0,&0x3fff-0x43ff	# will move in overflow?
12083
	beq.w		fsqrt_sd_may_ovfl	# maybe; go check
12084
	blt.w		fsqrt_sd_ovfl		# yes; go handle overflow
12085
	bra.w		fsqrt_sd_normal		# no; ho handle normalized op
12086

12087
# we're on the line here and the distinguising characteristic is whether
12088
# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089
# elsewise fall through to underflow.
12090
fsqrt_sd_may_unfl:
12091
	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12092
	bne.w		fsqrt_sd_normal		# yes, so no underflow
12093

12094
#
12095
# operand WILL underflow when moved in to the fp register file
12096
#
12097
fsqrt_sd_unfl:
12098
	bset		&unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099

12100
	fmov.l		&rz_mode*0x10,%fpcr	# set FPCR
12101
	fmov.l		&0x0,%fpsr		# clear FPSR
12102

12103
	fsqrt.x		FP_SCR0(%a6),%fp0	# execute square root
12104

12105
	fmov.l		%fpsr,%d1		# save status
12106
	fmov.l		&0x0,%fpcr		# clear FPCR
12107

12108
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12109

12110
# if underflow or inexact is enabled, go calculate EXOP first.
12111
	mov.b		FPCR_ENABLE(%a6),%d1
12112
	andi.b		&0x0b,%d1		# is UNFL or INEX enabled?
12113
	bne.b		fsqrt_sd_unfl_ena	# yes
12114

12115
fsqrt_sd_unfl_dis:
12116
	fmovm.x		&0x80,FP_SCR0(%a6)	# store out result
12117

12118
	lea		FP_SCR0(%a6),%a0	# pass: result addr
12119
	mov.l		L_SCR3(%a6),%d1		# pass: rnd prec,mode
12120
	bsr.l		unf_res			# calculate default result
12121
	or.b		%d0,FPSR_CC(%a6)	# set possible 'Z' ccode
12122
	fmovm.x		FP_SCR0(%a6),&0x80	# return default result in fp0
12123
	rts
12124

12125
#
12126
# operand will underflow AND underflow is enabled.
12127
# Therefore, we must return the result rounded to extended precision.
12128
#
12129
fsqrt_sd_unfl_ena:
12130
	mov.l		FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131
	mov.l		FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132
	mov.w		FP_SCR0_EX(%a6),%d1	# load current exponent
12133

12134
	mov.l		%d2,-(%sp)		# save d2
12135
	mov.l		%d1,%d2			# make a copy
12136
	andi.l		&0x7fff,%d1		# strip sign
12137
	andi.w		&0x8000,%d2		# keep old sign
12138
	sub.l		%d0,%d1			# subtract scale factor
12139
	addi.l		&0x6000,%d1		# add new bias
12140
	andi.w		&0x7fff,%d1
12141
	or.w		%d2,%d1			# concat new sign,new exp
12142
	mov.w		%d1,FP_SCR1_EX(%a6)	# insert new exp
12143
	fmovm.x		FP_SCR1(%a6),&0x40	# return EXOP in fp1
12144
	mov.l		(%sp)+,%d2		# restore d2
12145
	bra.b		fsqrt_sd_unfl_dis
12146

12147
#
12148
# operand WILL overflow.
12149
#
12150
fsqrt_sd_ovfl:
12151
	fmov.l		&0x0,%fpsr		# clear FPSR
12152
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12153

12154
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform square root
12155

12156
	fmov.l		&0x0,%fpcr		# clear FPCR
12157
	fmov.l		%fpsr,%d1		# save FPSR
12158

12159
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12160

12161
fsqrt_sd_ovfl_tst:
12162
	or.l		&ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163

12164
	mov.b		FPCR_ENABLE(%a6),%d1
12165
	andi.b		&0x13,%d1		# is OVFL or INEX enabled?
12166
	bne.b		fsqrt_sd_ovfl_ena	# yes
12167

12168
#
12169
# OVFL is not enabled; therefore, we must create the default result by
12170
# calling ovf_res().
12171
#
12172
fsqrt_sd_ovfl_dis:
12173
	btst		&neg_bit,FPSR_CC(%a6)	# is result negative?
12174
	sne		%d1			# set sign param accordingly
12175
	mov.l		L_SCR3(%a6),%d0		# pass: prec,mode
12176
	bsr.l		ovf_res			# calculate default result
12177
	or.b		%d0,FPSR_CC(%a6)	# set INF,N if applicable
12178
	fmovm.x		(%a0),&0x80		# return default result in fp0
12179
	rts
12180

12181
#
12182
# OVFL is enabled.
12183
# the INEX2 bit has already been updated by the round to the correct precision.
12184
# now, round to extended(and don't alter the FPSR).
12185
#
12186
fsqrt_sd_ovfl_ena:
12187
	mov.l		%d2,-(%sp)		# save d2
12188
	mov.w		FP_SCR0_EX(%a6),%d1	# fetch {sgn,exp}
12189
	mov.l		%d1,%d2			# make a copy
12190
	andi.l		&0x7fff,%d1		# strip sign
12191
	andi.w		&0x8000,%d2		# keep old sign
12192
	sub.l		%d0,%d1			# add scale factor
12193
	subi.l		&0x6000,%d1		# subtract bias
12194
	andi.w		&0x7fff,%d1
12195
	or.w		%d2,%d1			# concat sign,exp
12196
	mov.w		%d1,FP_SCR0_EX(%a6)	# insert new exponent
12197
	fmovm.x		FP_SCR0(%a6),&0x40	# return EXOP in fp1
12198
	mov.l		(%sp)+,%d2		# restore d2
12199
	bra.b		fsqrt_sd_ovfl_dis
12200

12201
#
12202
# the move in MAY underflow. so...
12203
#
12204
fsqrt_sd_may_ovfl:
12205
	btst		&0x0,1+FP_SCR0_EX(%a6)	# is exponent 0x3fff?
12206
	bne.w		fsqrt_sd_ovfl		# yes, so overflow
12207

12208
	fmov.l		&0x0,%fpsr		# clear FPSR
12209
	fmov.l		L_SCR3(%a6),%fpcr	# set FPCR
12210

12211
	fsqrt.x		FP_SCR0(%a6),%fp0	# perform absolute
12212

12213
	fmov.l		%fpsr,%d1		# save status
12214
	fmov.l		&0x0,%fpcr		# clear FPCR
12215

12216
	or.l		%d1,USER_FPSR(%a6)	# save INEX2,N
12217

12218
	fmov.x		%fp0,%fp1		# make a copy of result
12219
	fcmp.b		%fp1,&0x1		# is |result| >= 1.b?
12220
	fbge.w		fsqrt_sd_ovfl_tst	# yes; overflow has occurred
12221

12222
# no, it didn't overflow; we have correct result
12223
	bra.w		fsqrt_sd_normal_exit
12224

12225
##########################################################################
12226

12227
#
12228
# input is not normalized; what is it?
12229
#
12230
fsqrt_not_norm:
12231
	cmpi.b		%d1,&DENORM		# weed out DENORM
12232
	beq.w		fsqrt_denorm
12233
	cmpi.b		%d1,&ZERO		# weed out ZERO
12234
	beq.b		fsqrt_zero
12235
	cmpi.b		%d1,&INF		# weed out INF
12236
	beq.b		fsqrt_inf
12237
	cmpi.b		%d1,&SNAN		# weed out SNAN
12238
	beq.l		res_snan_1op
12239
	bra.l		res_qnan_1op
12240

12241
#
12242
#	fsqrt(+0) = +0
12243
#	fsqrt(-0) = -0
12244
#	fsqrt(+INF) = +INF
12245
#	fsqrt(-INF) = OPERR
12246
#
12247
fsqrt_zero:
12248
	tst.b		SRC_EX(%a0)		# is ZERO positive or negative?
12249
	bmi.b		fsqrt_zero_m		# negative
12250
fsqrt_zero_p:
12251
	fmov.s		&0x00000000,%fp0	# return +ZERO
12252
	mov.b		&z_bmask,FPSR_CC(%a6)	# set 'Z' ccode bit
12253
	rts
12254
fsqrt_zero_m:
12255
	fmov.s		&0x80000000,%fp0	# return -ZERO
12256
	mov.b		&z_bmask+neg_bmask,FPSR_CC(%a6)	# set 'Z','N' ccode bits
12257
	rts
12258

12259
fsqrt_inf:
12260
	tst.b		SRC_EX(%a0)		# is INF positive or negative?
12261
	bmi.l		res_operr		# negative
12262
fsqrt_inf_p:
12263
	fmovm.x		SRC(%a0),&0x80		# return +INF in fp0
12264
	mov.b		&inf_bmask,FPSR_CC(%a6)	# set 'I' ccode bit
12265
	rts
12266

12267
#########################################################################
12268
# XDEF ****************************************************************	#
12269
#	fetch_dreg(): fetch register according to index in d1		#
12270
#									#
12271
# XREF ****************************************************************	#
12272
#	None								#
12273
#									#
12274
# INPUT ***************************************************************	#
12275
#	d1 = index of register to fetch from				#
12276
#									#
12277
# OUTPUT **************************************************************	#
12278
#	d0 = value of register fetched					#
12279
#									#
12280
# ALGORITHM ***********************************************************	#
12281
#	According to the index value in d1 which can range from zero	#
12282
# to fifteen, load the corresponding register file value (where		#
12283
# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the	#
12284
# stack. The rest should still be in their original places.		#
12285
#									#
12286
#########################################################################
12287

12288
# this routine leaves d1 intact for subsequent store_dreg calls.
12289
	global		fetch_dreg
12290
fetch_dreg:
12291
	mov.w		(tbl_fdreg.b,%pc,%d1.w*2),%d0
12292
	jmp		(tbl_fdreg.b,%pc,%d0.w*1)
12293

12294
tbl_fdreg:
12295
	short		fdreg0 - tbl_fdreg
12296
	short		fdreg1 - tbl_fdreg
12297
	short		fdreg2 - tbl_fdreg
12298
	short		fdreg3 - tbl_fdreg
12299
	short		fdreg4 - tbl_fdreg
12300
	short		fdreg5 - tbl_fdreg
12301
	short		fdreg6 - tbl_fdreg
12302
	short		fdreg7 - tbl_fdreg
12303
	short		fdreg8 - tbl_fdreg
12304
	short		fdreg9 - tbl_fdreg
12305
	short		fdrega - tbl_fdreg
12306
	short		fdregb - tbl_fdreg
12307
	short		fdregc - tbl_fdreg
12308
	short		fdregd - tbl_fdreg
12309
	short		fdrege - tbl_fdreg
12310
	short		fdregf - tbl_fdreg
12311

12312
fdreg0:
12313
	mov.l		EXC_DREGS+0x0(%a6),%d0
12314
	rts
12315
fdreg1:
12316
	mov.l		EXC_DREGS+0x4(%a6),%d0
12317
	rts
12318
fdreg2:
12319
	mov.l		%d2,%d0
12320
	rts
12321
fdreg3:
12322
	mov.l		%d3,%d0
12323
	rts
12324
fdreg4:
12325
	mov.l		%d4,%d0
12326
	rts
12327
fdreg5:
12328
	mov.l		%d5,%d0
12329
	rts
12330
fdreg6:
12331
	mov.l		%d6,%d0
12332
	rts
12333
fdreg7:
12334
	mov.l		%d7,%d0
12335
	rts
12336
fdreg8:
12337
	mov.l		EXC_DREGS+0x8(%a6),%d0
12338
	rts
12339
fdreg9:
12340
	mov.l		EXC_DREGS+0xc(%a6),%d0
12341
	rts
12342
fdrega:
12343
	mov.l		%a2,%d0
12344
	rts
12345
fdregb:
12346
	mov.l		%a3,%d0
12347
	rts
12348
fdregc:
12349
	mov.l		%a4,%d0
12350
	rts
12351
fdregd:
12352
	mov.l		%a5,%d0
12353
	rts
12354
fdrege:
12355
	mov.l		(%a6),%d0
12356
	rts
12357
fdregf:
12358
	mov.l		EXC_A7(%a6),%d0
12359
	rts
12360

12361
#########################################################################
12362
# XDEF ****************************************************************	#
12363
#	store_dreg_l(): store longword to data register specified by d1	#
12364
#									#
12365
# XREF ****************************************************************	#
12366
#	None								#
12367
#									#
12368
# INPUT ***************************************************************	#
12369
#	d0 = longowrd value to store					#
12370
#	d1 = index of register to fetch from				#
12371
#									#
12372
# OUTPUT **************************************************************	#
12373
#	(data register is updated)					#
12374
#									#
12375
# ALGORITHM ***********************************************************	#
12376
#	According to the index value in d1, store the longword value	#
12377
# in d0 to the corresponding data register. D0/D1 are on the stack	#
12378
# while the rest are in their initial places.				#
12379
#									#
12380
#########################################################################
12381

12382
	global		store_dreg_l
12383
store_dreg_l:
12384
	mov.w		(tbl_sdregl.b,%pc,%d1.w*2),%d1
12385
	jmp		(tbl_sdregl.b,%pc,%d1.w*1)
12386

12387
tbl_sdregl:
12388
	short		sdregl0 - tbl_sdregl
12389
	short		sdregl1 - tbl_sdregl
12390
	short		sdregl2 - tbl_sdregl
12391
	short		sdregl3 - tbl_sdregl
12392
	short		sdregl4 - tbl_sdregl
12393
	short		sdregl5 - tbl_sdregl
12394
	short		sdregl6 - tbl_sdregl
12395
	short		sdregl7 - tbl_sdregl
12396

12397
sdregl0:
12398
	mov.l		%d0,EXC_DREGS+0x0(%a6)
12399
	rts
12400
sdregl1:
12401
	mov.l		%d0,EXC_DREGS+0x4(%a6)
12402
	rts
12403
sdregl2:
12404
	mov.l		%d0,%d2
12405
	rts
12406
sdregl3:
12407
	mov.l		%d0,%d3
12408
	rts
12409
sdregl4:
12410
	mov.l		%d0,%d4
12411
	rts
12412
sdregl5:
12413
	mov.l		%d0,%d5
12414
	rts
12415
sdregl6:
12416
	mov.l		%d0,%d6
12417
	rts
12418
sdregl7:
12419
	mov.l		%d0,%d7
12420
	rts
12421

12422
#########################################################################
12423
# XDEF ****************************************************************	#
12424
#	store_dreg_w(): store word to data register specified by d1	#
12425
#									#
12426
# XREF ****************************************************************	#
12427
#	None								#
12428
#									#
12429
# INPUT ***************************************************************	#
12430
#	d0 = word value to store					#
12431
#	d1 = index of register to fetch from				#
12432
#									#
12433
# OUTPUT **************************************************************	#
12434
#	(data register is updated)					#
12435
#									#
12436
# ALGORITHM ***********************************************************	#
12437
#	According to the index value in d1, store the word value	#
12438
# in d0 to the corresponding data register. D0/D1 are on the stack	#
12439
# while the rest are in their initial places.				#
12440
#									#
12441
#########################################################################
12442

12443
	global		store_dreg_w
12444
store_dreg_w:
12445
	mov.w		(tbl_sdregw.b,%pc,%d1.w*2),%d1
12446
	jmp		(tbl_sdregw.b,%pc,%d1.w*1)
12447

12448
tbl_sdregw:
12449
	short		sdregw0 - tbl_sdregw
12450
	short		sdregw1 - tbl_sdregw
12451
	short		sdregw2 - tbl_sdregw
12452
	short		sdregw3 - tbl_sdregw
12453
	short		sdregw4 - tbl_sdregw
12454
	short		sdregw5 - tbl_sdregw
12455
	short		sdregw6 - tbl_sdregw
12456
	short		sdregw7 - tbl_sdregw
12457

12458
sdregw0:
12459
	mov.w		%d0,2+EXC_DREGS+0x0(%a6)
12460
	rts
12461
sdregw1:
12462
	mov.w		%d0,2+EXC_DREGS+0x4(%a6)
12463
	rts
12464
sdregw2:
12465
	mov.w		%d0,%d2
12466
	rts
12467
sdregw3:
12468
	mov.w		%d0,%d3
12469
	rts
12470
sdregw4:
12471
	mov.w		%d0,%d4
12472
	rts
12473
sdregw5:
12474
	mov.w		%d0,%d5
12475
	rts
12476
sdregw6:
12477
	mov.w		%d0,%d6
12478
	rts
12479
sdregw7:
12480
	mov.w		%d0,%d7
12481
	rts
12482

12483
#########################################################################
12484
# XDEF ****************************************************************	#
12485
#	store_dreg_b(): store byte to data register specified by d1	#
12486
#									#
12487
# XREF ****************************************************************	#
12488
#	None								#
12489
#									#
12490
# INPUT ***************************************************************	#
12491
#	d0 = byte value to store					#
12492
#	d1 = index of register to fetch from				#
12493
#									#
12494
# OUTPUT **************************************************************	#
12495
#	(data register is updated)					#
12496
#									#
12497
# ALGORITHM ***********************************************************	#
12498
#	According to the index value in d1, store the byte value	#
12499
# in d0 to the corresponding data register. D0/D1 are on the stack	#
12500
# while the rest are in their initial places.				#
12501
#									#
12502
#########################################################################
12503

12504
	global		store_dreg_b
12505
store_dreg_b:
12506
	mov.w		(tbl_sdregb.b,%pc,%d1.w*2),%d1
12507
	jmp		(tbl_sdregb.b,%pc,%d1.w*1)
12508

12509
tbl_sdregb:
12510
	short		sdregb0 - tbl_sdregb
12511
	short		sdregb1 - tbl_sdregb
12512
	short		sdregb2 - tbl_sdregb
12513
	short		sdregb3 - tbl_sdregb
12514
	short		sdregb4 - tbl_sdregb
12515
	short		sdregb5 - tbl_sdregb
12516
	short		sdregb6 - tbl_sdregb
12517
	short		sdregb7 - tbl_sdregb
12518

12519
sdregb0:
12520
	mov.b		%d0,3+EXC_DREGS+0x0(%a6)
12521
	rts
12522
sdregb1:
12523
	mov.b		%d0,3+EXC_DREGS+0x4(%a6)
12524
	rts
12525
sdregb2:
12526
	mov.b		%d0,%d2
12527
	rts
12528
sdregb3:
12529
	mov.b		%d0,%d3
12530
	rts
12531
sdregb4:
12532
	mov.b		%d0,%d4
12533
	rts
12534
sdregb5:
12535
	mov.b		%d0,%d5
12536
	rts
12537
sdregb6:
12538
	mov.b		%d0,%d6
12539
	rts
12540
sdregb7:
12541
	mov.b		%d0,%d7
12542
	rts
12543

12544
#########################################################################
12545
# XDEF ****************************************************************	#
12546
#	inc_areg(): increment an address register by the value in d0	#
12547
#									#
12548
# XREF ****************************************************************	#
12549
#	None								#
12550
#									#
12551
# INPUT ***************************************************************	#
12552
#	d0 = amount to increment by					#
12553
#	d1 = index of address register to increment			#
12554
#									#
12555
# OUTPUT **************************************************************	#
12556
#	(address register is updated)					#
12557
#									#
12558
# ALGORITHM ***********************************************************	#
12559
#	Typically used for an instruction w/ a post-increment <ea>,	#
12560
# this routine adds the increment value in d0 to the address register	#
12561
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12562
# in their original places.						#
12563
#	For a7, if the increment amount is one, then we have to		#
12564
# increment by two. For any a7 update, set the mia7_flag so that if	#
12565
# an access error exception occurs later in emulation, this address	#
12566
# register update can be undone.					#
12567
#									#
12568
#########################################################################
12569

12570
	global		inc_areg
12571
inc_areg:
12572
	mov.w		(tbl_iareg.b,%pc,%d1.w*2),%d1
12573
	jmp		(tbl_iareg.b,%pc,%d1.w*1)
12574

12575
tbl_iareg:
12576
	short		iareg0 - tbl_iareg
12577
	short		iareg1 - tbl_iareg
12578
	short		iareg2 - tbl_iareg
12579
	short		iareg3 - tbl_iareg
12580
	short		iareg4 - tbl_iareg
12581
	short		iareg5 - tbl_iareg
12582
	short		iareg6 - tbl_iareg
12583
	short		iareg7 - tbl_iareg
12584

12585
iareg0:	add.l		%d0,EXC_DREGS+0x8(%a6)
12586
	rts
12587
iareg1:	add.l		%d0,EXC_DREGS+0xc(%a6)
12588
	rts
12589
iareg2:	add.l		%d0,%a2
12590
	rts
12591
iareg3:	add.l		%d0,%a3
12592
	rts
12593
iareg4:	add.l		%d0,%a4
12594
	rts
12595
iareg5:	add.l		%d0,%a5
12596
	rts
12597
iareg6:	add.l		%d0,(%a6)
12598
	rts
12599
iareg7:	mov.b		&mia7_flg,SPCOND_FLG(%a6)
12600
	cmpi.b		%d0,&0x1
12601
	beq.b		iareg7b
12602
	add.l		%d0,EXC_A7(%a6)
12603
	rts
12604
iareg7b:
12605
	addq.l		&0x2,EXC_A7(%a6)
12606
	rts
12607

12608
#########################################################################
12609
# XDEF ****************************************************************	#
12610
#	dec_areg(): decrement an address register by the value in d0	#
12611
#									#
12612
# XREF ****************************************************************	#
12613
#	None								#
12614
#									#
12615
# INPUT ***************************************************************	#
12616
#	d0 = amount to decrement by					#
12617
#	d1 = index of address register to decrement			#
12618
#									#
12619
# OUTPUT **************************************************************	#
12620
#	(address register is updated)					#
12621
#									#
12622
# ALGORITHM ***********************************************************	#
12623
#	Typically used for an instruction w/ a pre-decrement <ea>,	#
12624
# this routine adds the decrement value in d0 to the address register	#
12625
# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside	#
12626
# in their original places.						#
12627
#	For a7, if the decrement amount is one, then we have to		#
12628
# decrement by two. For any a7 update, set the mda7_flag so that if	#
12629
# an access error exception occurs later in emulation, this address	#
12630
# register update can be undone.					#
12631
#									#
12632
#########################################################################
12633

12634
	global		dec_areg
12635
dec_areg:
12636
	mov.w		(tbl_dareg.b,%pc,%d1.w*2),%d1
12637
	jmp		(tbl_dareg.b,%pc,%d1.w*1)
12638

12639
tbl_dareg:
12640
	short		dareg0 - tbl_dareg
12641
	short		dareg1 - tbl_dareg
12642
	short		dareg2 - tbl_dareg
12643
	short		dareg3 - tbl_dareg
12644
	short		dareg4 - tbl_dareg
12645
	short		dareg5 - tbl_dareg
12646
	short		dareg6 - tbl_dareg
12647
	short		dareg7 - tbl_dareg
12648

12649
dareg0:	sub.l		%d0,EXC_DREGS+0x8(%a6)
12650
	rts
12651
dareg1:	sub.l		%d0,EXC_DREGS+0xc(%a6)
12652
	rts
12653
dareg2:	sub.l		%d0,%a2
12654
	rts
12655
dareg3:	sub.l		%d0,%a3
12656
	rts
12657
dareg4:	sub.l		%d0,%a4
12658
	rts
12659
dareg5:	sub.l		%d0,%a5
12660
	rts
12661
dareg6:	sub.l		%d0,(%a6)
12662
	rts
12663
dareg7:	mov.b		&mda7_flg,SPCOND_FLG(%a6)
12664
	cmpi.b		%d0,&0x1
12665
	beq.b		dareg7b
12666
	sub.l		%d0,EXC_A7(%a6)
12667
	rts
12668
dareg7b:
12669
	subq.l		&0x2,EXC_A7(%a6)
12670
	rts
12671

12672
##############################################################################
12673

12674
#########################################################################
12675
# XDEF ****************************************************************	#
12676
#	load_fpn1(): load FP register value into FP_SRC(a6).		#
12677
#									#
12678
# XREF ****************************************************************	#
12679
#	None								#
12680
#									#
12681
# INPUT ***************************************************************	#
12682
#	d0 = index of FP register to load				#
12683
#									#
12684
# OUTPUT **************************************************************	#
12685
#	FP_SRC(a6) = value loaded from FP register file			#
12686
#									#
12687
# ALGORITHM ***********************************************************	#
12688
#	Using the index in d0, load FP_SRC(a6) with a number from the	#
12689
# FP register file.							#
12690
#									#
12691
#########################################################################
12692

12693
	global		load_fpn1
12694
load_fpn1:
12695
	mov.w		(tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696
	jmp		(tbl_load_fpn1.b,%pc,%d0.w*1)
12697

12698
tbl_load_fpn1:
12699
	short		load_fpn1_0 - tbl_load_fpn1
12700
	short		load_fpn1_1 - tbl_load_fpn1
12701
	short		load_fpn1_2 - tbl_load_fpn1
12702
	short		load_fpn1_3 - tbl_load_fpn1
12703
	short		load_fpn1_4 - tbl_load_fpn1
12704
	short		load_fpn1_5 - tbl_load_fpn1
12705
	short		load_fpn1_6 - tbl_load_fpn1
12706
	short		load_fpn1_7 - tbl_load_fpn1
12707

12708
load_fpn1_0:
12709
	mov.l		0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710
	mov.l		4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711
	mov.l		8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712
	lea		FP_SRC(%a6), %a0
12713
	rts
12714
load_fpn1_1:
12715
	mov.l		0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716
	mov.l		4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717
	mov.l		8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718
	lea		FP_SRC(%a6), %a0
12719
	rts
12720
load_fpn1_2:
12721
	fmovm.x		&0x20, FP_SRC(%a6)
12722
	lea		FP_SRC(%a6), %a0
12723
	rts
12724
load_fpn1_3:
12725
	fmovm.x		&0x10, FP_SRC(%a6)
12726
	lea		FP_SRC(%a6), %a0
12727
	rts
12728
load_fpn1_4:
12729
	fmovm.x		&0x08, FP_SRC(%a6)
12730
	lea		FP_SRC(%a6), %a0
12731
	rts
12732
load_fpn1_5:
12733
	fmovm.x		&0x04, FP_SRC(%a6)
12734
	lea		FP_SRC(%a6), %a0
12735
	rts
12736
load_fpn1_6:
12737
	fmovm.x		&0x02, FP_SRC(%a6)
12738
	lea		FP_SRC(%a6), %a0
12739
	rts
12740
load_fpn1_7:
12741
	fmovm.x		&0x01, FP_SRC(%a6)
12742
	lea		FP_SRC(%a6), %a0
12743
	rts
12744

12745
#############################################################################
12746

12747
#########################################################################
12748
# XDEF ****************************************************************	#
12749
#	load_fpn2(): load FP register value into FP_DST(a6).		#
12750
#									#
12751
# XREF ****************************************************************	#
12752
#	None								#
12753
#									#
12754
# INPUT ***************************************************************	#
12755
#	d0 = index of FP register to load				#
12756
#									#
12757
# OUTPUT **************************************************************	#
12758
#	FP_DST(a6) = value loaded from FP register file			#
12759
#									#
12760
# ALGORITHM ***********************************************************	#
12761
#	Using the index in d0, load FP_DST(a6) with a number from the	#
12762
# FP register file.							#
12763
#									#
12764
#########################################################################
12765

12766
	global		load_fpn2
12767
load_fpn2:
12768
	mov.w		(tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769
	jmp		(tbl_load_fpn2.b,%pc,%d0.w*1)
12770

12771
tbl_load_fpn2:
12772
	short		load_fpn2_0 - tbl_load_fpn2
12773
	short		load_fpn2_1 - tbl_load_fpn2
12774
	short		load_fpn2_2 - tbl_load_fpn2
12775
	short		load_fpn2_3 - tbl_load_fpn2
12776
	short		load_fpn2_4 - tbl_load_fpn2
12777
	short		load_fpn2_5 - tbl_load_fpn2
12778
	short		load_fpn2_6 - tbl_load_fpn2
12779
	short		load_fpn2_7 - tbl_load_fpn2
12780

12781
load_fpn2_0:
12782
	mov.l		0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783
	mov.l		4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784
	mov.l		8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785
	lea		FP_DST(%a6), %a0
12786
	rts
12787
load_fpn2_1:
12788
	mov.l		0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789
	mov.l		4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790
	mov.l		8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791
	lea		FP_DST(%a6), %a0
12792
	rts
12793
load_fpn2_2:
12794
	fmovm.x		&0x20, FP_DST(%a6)
12795
	lea		FP_DST(%a6), %a0
12796
	rts
12797
load_fpn2_3:
12798
	fmovm.x		&0x10, FP_DST(%a6)
12799
	lea		FP_DST(%a6), %a0
12800
	rts
12801
load_fpn2_4:
12802
	fmovm.x		&0x08, FP_DST(%a6)
12803
	lea		FP_DST(%a6), %a0
12804
	rts
12805
load_fpn2_5:
12806
	fmovm.x		&0x04, FP_DST(%a6)
12807
	lea		FP_DST(%a6), %a0
12808
	rts
12809
load_fpn2_6:
12810
	fmovm.x		&0x02, FP_DST(%a6)
12811
	lea		FP_DST(%a6), %a0
12812
	rts
12813
load_fpn2_7:
12814
	fmovm.x		&0x01, FP_DST(%a6)
12815
	lea		FP_DST(%a6), %a0
12816
	rts
12817

12818
#############################################################################
12819

12820
#########################################################################
12821
# XDEF ****************************************************************	#
12822
#	store_fpreg(): store an fp value to the fpreg designated d0.	#
12823
#									#
12824
# XREF ****************************************************************	#
12825
#	None								#
12826
#									#
12827
# INPUT ***************************************************************	#
12828
#	fp0 = extended precision value to store				#
12829
#	d0  = index of floating-point register				#
12830
#									#
12831
# OUTPUT **************************************************************	#
12832
#	None								#
12833
#									#
12834
# ALGORITHM ***********************************************************	#
12835
#	Store the value in fp0 to the FP register designated by the	#
12836
# value in d0. The FP number can be DENORM or SNAN so we have to be	#
12837
# careful that we don't take an exception here.				#
12838
#									#
12839
#########################################################################
12840

12841
	global		store_fpreg
12842
store_fpreg:
12843
	mov.w		(tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844
	jmp		(tbl_store_fpreg.b,%pc,%d0.w*1)
12845

12846
tbl_store_fpreg:
12847
	short		store_fpreg_0 - tbl_store_fpreg
12848
	short		store_fpreg_1 - tbl_store_fpreg
12849
	short		store_fpreg_2 - tbl_store_fpreg
12850
	short		store_fpreg_3 - tbl_store_fpreg
12851
	short		store_fpreg_4 - tbl_store_fpreg
12852
	short		store_fpreg_5 - tbl_store_fpreg
12853
	short		store_fpreg_6 - tbl_store_fpreg
12854
	short		store_fpreg_7 - tbl_store_fpreg
12855

12856
store_fpreg_0:
12857
	fmovm.x		&0x80, EXC_FP0(%a6)
12858
	rts
12859
store_fpreg_1:
12860
	fmovm.x		&0x80, EXC_FP1(%a6)
12861
	rts
12862
store_fpreg_2:
12863
	fmovm.x		&0x01, -(%sp)
12864
	fmovm.x		(%sp)+, &0x20
12865
	rts
12866
store_fpreg_3:
12867
	fmovm.x		&0x01, -(%sp)
12868
	fmovm.x		(%sp)+, &0x10
12869
	rts
12870
store_fpreg_4:
12871
	fmovm.x		&0x01, -(%sp)
12872
	fmovm.x		(%sp)+, &0x08
12873
	rts
12874
store_fpreg_5:
12875
	fmovm.x		&0x01, -(%sp)
12876
	fmovm.x		(%sp)+, &0x04
12877
	rts
12878
store_fpreg_6:
12879
	fmovm.x		&0x01, -(%sp)
12880
	fmovm.x		(%sp)+, &0x02
12881
	rts
12882
store_fpreg_7:
12883
	fmovm.x		&0x01, -(%sp)
12884
	fmovm.x		(%sp)+, &0x01
12885
	rts
12886

12887
#########################################################################
12888
# XDEF ****************************************************************	#
12889
#	get_packed(): fetch a packed operand from memory and then	#
12890
#		      convert it to a floating-point binary number.	#
12891
#									#
12892
# XREF ****************************************************************	#
12893
#	_dcalc_ea() - calculate the correct <ea>			#
12894
#	_mem_read() - fetch the packed operand from memory		#
12895
#	facc_in_x() - the fetch failed so jump to special exit code	#
12896
#	decbin()    - convert packed to binary extended precision	#
12897
#									#
12898
# INPUT ***************************************************************	#
12899
#	None								#
12900
#									#
12901
# OUTPUT **************************************************************	#
12902
#	If no failure on _mem_read():					#
12903
#	FP_SRC(a6) = packed operand now as a binary FP number		#
12904
#									#
12905
# ALGORITHM ***********************************************************	#
12906
#	Get the correct <ea> which is the value on the exception stack	#
12907
# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.	#
12908
# Then, fetch the operand from memory. If the fetch fails, exit		#
12909
# through facc_in_x().							#
12910
#	If the packed operand is a ZERO,NAN, or INF, convert it to	#
12911
# its binary representation here. Else, call decbin() which will	#
12912
# convert the packed value to an extended precision binary value.	#
12913
#									#
12914
#########################################################################
12915

12916
# the stacked <ea> for packed is correct except for -(An).
12917
# the base reg must be updated for both -(An) and (An)+.
12918
	global		get_packed
12919
get_packed:
12920
	mov.l		&0xc,%d0		# packed is 12 bytes
12921
	bsr.l		_dcalc_ea		# fetch <ea>; correct An
12922

12923
	lea		FP_SRC(%a6),%a1		# pass: ptr to super dst
12924
	mov.l		&0xc,%d0		# pass: 12 bytes
12925
	bsr.l		_dmem_read		# read packed operand
12926

12927
	tst.l		%d1			# did dfetch fail?
12928
	bne.l		facc_in_x		# yes
12929

12930
# The packed operand is an INF or a NAN if the exponent field is all ones.
12931
	bfextu		FP_SRC(%a6){&1:&15},%d0	# get exp
12932
	cmpi.w		%d0,&0x7fff		# INF or NAN?
12933
	bne.b		gp_try_zero		# no
12934
	rts					# operand is an INF or NAN
12935

12936
# The packed operand is a zero if the mantissa is all zero, else it's
12937
# a normal packed op.
12938
gp_try_zero:
12939
	mov.b		3+FP_SRC(%a6),%d0	# get byte 4
12940
	andi.b		&0x0f,%d0		# clear all but last nybble
12941
	bne.b		gp_not_spec		# not a zero
12942
	tst.l		FP_SRC_HI(%a6)		# is lw 2 zero?
12943
	bne.b		gp_not_spec		# not a zero
12944
	tst.l		FP_SRC_LO(%a6)		# is lw 3 zero?
12945
	bne.b		gp_not_spec		# not a zero
12946
	rts					# operand is a ZERO
12947
gp_not_spec:
12948
	lea		FP_SRC(%a6),%a0		# pass: ptr to packed op
12949
	bsr.l		decbin			# convert to extended
12950
	fmovm.x		&0x80,FP_SRC(%a6)	# make this the srcop
12951
	rts
12952

12953
#########################################################################
12954
# decbin(): Converts normalized packed bcd value pointed to by register	#
12955
#	    a0 to extended-precision value in fp0.			#
12956
#									#
12957
# INPUT ***************************************************************	#
12958
#	a0 = pointer to normalized packed bcd value			#
12959
#									#
12960
# OUTPUT **************************************************************	#
12961
#	fp0 = exact fp representation of the packed bcd value.		#
12962
#									#
12963
# ALGORITHM ***********************************************************	#
12964
#	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,	#
12965
#	and NaN operands are dispatched without entering this routine)	#
12966
#	value in 68881/882 format at location (a0).			#
12967
#									#
12968
#	A1. Convert the bcd exponent to binary by successive adds and	#
12969
#	muls. Set the sign according to SE. Subtract 16 to compensate	#
12970
#	for the mantissa which is to be interpreted as 17 integer	#
12971
#	digits, rather than 1 integer and 16 fraction digits.		#
12972
#	Note: this operation can never overflow.			#
12973
#									#
12974
#	A2. Convert the bcd mantissa to binary by successive		#
12975
#	adds and muls in FP0. Set the sign according to SM.		#
12976
#	The mantissa digits will be converted with the decimal point	#
12977
#	assumed following the least-significant digit.			#
12978
#	Note: this operation can never overflow.			#
12979
#									#
12980
#	A3. Count the number of leading/trailing zeros in the		#
12981
#	bcd string.  If SE is positive, count the leading zeros;	#
12982
#	if negative, count the trailing zeros.  Set the adjusted	#
12983
#	exponent equal to the exponent from A1 and the zero count	#
12984
#	added if SM = 1 and subtracted if SM = 0.  Scale the		#
12985
#	mantissa the equivalent of forcing in the bcd value:		#
12986
#									#
12987
#	SM = 0	a non-zero digit in the integer position		#
12988
#	SM = 1	a non-zero digit in Mant0, lsd of the fraction		#
12989
#									#
12990
#	this will insure that any value, regardless of its		#
12991
#	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted	#
12992
#	consistently.							#
12993
#									#
12994
#	A4. Calculate the factor 10^exp in FP1 using a table of		#
12995
#	10^(2^n) values.  To reduce the error in forming factors	#
12996
#	greater than 10^27, a directed rounding scheme is used with	#
12997
#	tables rounded to RN, RM, and RP, according to the table	#
12998
#	in the comments of the pwrten section.				#
12999
#									#
13000
#	A5. Form the final binary number by scaling the mantissa by	#
13001
#	the exponent factor.  This is done by multiplying the		#
13002
#	mantissa in FP0 by the factor in FP1 if the adjusted		#
13003
#	exponent sign is positive, and dividing FP0 by FP1 if		#
13004
#	it is negative.							#
13005
#									#
13006
#	Clean up and return. Check if the final mul or div was inexact.	#
13007
#	If so, set INEX1 in USER_FPSR.					#
13008
#									#
13009
#########################################################################
13010

13011
#
13012
#	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013
#	to nearest, minus, and plus, respectively.  The tables include
13014
#	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015
#	is required until the power is greater than 27, however, all
13016
#	tables include the first 5 for ease of indexing.
13017
#
13018
RTABLE:
13019
	byte		0,0,0,0
13020
	byte		2,3,2,3
13021
	byte		2,3,3,2
13022
	byte		3,2,2,3
13023

13024
	set		FNIBS,7
13025
	set		FSTRT,0
13026

13027
	set		ESTRT,4
13028
	set		EDIGITS,2
13029

13030
	global		decbin
13031
decbin:
13032
	mov.l		0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033
	mov.l		0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034
	mov.l		0x8(%a0),FP_SCR0_LO(%a6)
13035

13036
	lea		FP_SCR0(%a6),%a0
13037

13038
	movm.l		&0x3c00,-(%sp)		# save d2-d5
13039
	fmovm.x		&0x1,-(%sp)		# save fp1
13040
#
13041
# Calculate exponent:
13042
#  1. Copy bcd value in memory for use as a working copy.
13043
#  2. Calculate absolute value of exponent in d1 by mul and add.
13044
#  3. Correct for exponent sign.
13045
#  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046
#     (i.e., all digits assumed left of the decimal point.)
13047
#
13048
# Register usage:
13049
#
13050
#  calc_e:
13051
#	(*)  d0: temp digit storage
13052
#	(*)  d1: accumulator for binary exponent
13053
#	(*)  d2: digit count
13054
#	(*)  d3: offset pointer
13055
#	( )  d4: first word of bcd
13056
#	( )  a0: pointer to working bcd value
13057
#	( )  a6: pointer to original bcd value
13058
#	(*)  FP_SCR1: working copy of original bcd value
13059
#	(*)  L_SCR1: copy of original exponent word
13060
#
13061
calc_e:
13062
	mov.l		&EDIGITS,%d2		# # of nibbles (digits) in fraction part
13063
	mov.l		&ESTRT,%d3		# counter to pick up digits
13064
	mov.l		(%a0),%d4		# get first word of bcd
13065
	clr.l		%d1			# zero d1 for accumulator
13066
e_gd:
13067
	mulu.l		&0xa,%d1		# mul partial product by one digit place
13068
	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend into d0
13069
	add.l		%d0,%d1			# d1 = d1 + d0
13070
	addq.b		&4,%d3			# advance d3 to the next digit
13071
	dbf.w		%d2,e_gd		# if we have used all 3 digits, exit loop
13072
	btst		&30,%d4			# get SE
13073
	beq.b		e_pos			# don't negate if pos
13074
	neg.l		%d1			# negate before subtracting
13075
e_pos:
13076
	sub.l		&16,%d1			# sub to compensate for shift of mant
13077
	bge.b		e_save			# if still pos, do not neg
13078
	neg.l		%d1			# now negative, make pos and set SE
13079
	or.l		&0x40000000,%d4		# set SE in d4,
13080
	or.l		&0x40000000,(%a0)	# and in working bcd
13081
e_save:
13082
	mov.l		%d1,-(%sp)		# save exp on stack
13083
#
13084
#
13085
# Calculate mantissa:
13086
#  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087
#  2. Correct for mantissa sign.
13088
#     (i.e., all digits assumed left of the decimal point.)
13089
#
13090
# Register usage:
13091
#
13092
#  calc_m:
13093
#	(*)  d0: temp digit storage
13094
#	(*)  d1: lword counter
13095
#	(*)  d2: digit count
13096
#	(*)  d3: offset pointer
13097
#	( )  d4: words 2 and 3 of bcd
13098
#	( )  a0: pointer to working bcd value
13099
#	( )  a6: pointer to original bcd value
13100
#	(*) fp0: mantissa accumulator
13101
#	( )  FP_SCR1: working copy of original bcd value
13102
#	( )  L_SCR1: copy of original exponent word
13103
#
13104
calc_m:
13105
	mov.l		&1,%d1			# word counter, init to 1
13106
	fmov.s		&0x00000000,%fp0	# accumulator
13107
#
13108
#
13109
#  Since the packed number has a long word between the first & second parts,
13110
#  get the integer digit then skip down & get the rest of the
13111
#  mantissa.  We will unroll the loop once.
13112
#
13113
	bfextu		(%a0){&28:&4},%d0	# integer part is ls digit in long word
13114
	fadd.b		%d0,%fp0		# add digit to sum in fp0
13115
#
13116
#
13117
#  Get the rest of the mantissa.
13118
#
13119
loadlw:
13120
	mov.l		(%a0,%d1.L*4),%d4	# load mantissa lonqword into d4
13121
	mov.l		&FSTRT,%d3		# counter to pick up digits
13122
	mov.l		&FNIBS,%d2		# reset number of digits per a0 ptr
13123
md2b:
13124
	fmul.s		&0x41200000,%fp0	# fp0 = fp0 * 10
13125
	bfextu		%d4{%d3:&4},%d0		# get the digit and zero extend
13126
	fadd.b		%d0,%fp0		# fp0 = fp0 + digit
13127
#
13128
#
13129
#  If all the digits (8) in that long word have been converted (d2=0),
13130
#  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131
#  to initialize the digit offset, and set d2 to 7 for the digit count;
13132
#  else continue with this long word.
13133
#
13134
	addq.b		&4,%d3			# advance d3 to the next digit
13135
	dbf.w		%d2,md2b		# check for last digit in this lw
13136
nextlw:
13137
	addq.l		&1,%d1			# inc lw pointer in mantissa
13138
	cmp.l		%d1,&2			# test for last lw
13139
	ble.b		loadlw			# if not, get last one
13140
#
13141
#  Check the sign of the mant and make the value in fp0 the same sign.
13142
#
13143
m_sign:
13144
	btst		&31,(%a0)		# test sign of the mantissa
13145
	beq.b		ap_st_z			# if clear, go to append/strip zeros
13146
	fneg.x		%fp0			# if set, negate fp0
13147
#
13148
# Append/strip zeros:
13149
#
13150
#  For adjusted exponents which have an absolute value greater than 27*,
13151
#  this routine calculates the amount needed to normalize the mantissa
13152
#  for the adjusted exponent.  That number is subtracted from the exp
13153
#  if the exp was positive, and added if it was negative.  The purpose
13154
#  of this is to reduce the value of the exponent and the possibility
13155
#  of error in calculation of pwrten.
13156
#
13157
#  1. Branch on the sign of the adjusted exponent.
13158
#  2p.(positive exp)
13159
#   2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160
#   3. Add one for each zero encountered until a non-zero digit.
13161
#   4. Subtract the count from the exp.
13162
#   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163
#	   and set SE.
13164
#	6. Multiply the mantissa by 10**count.
13165
#  2n.(negative exp)
13166
#   2. Check the digits in lwords 3 and 2 in decending order.
13167
#   3. Add one for each zero encountered until a non-zero digit.
13168
#   4. Add the count to the exp.
13169
#   5. Check if the exp has crossed zero in #3 above; clear SE.
13170
#   6. Divide the mantissa by 10**count.
13171
#
13172
#  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173
#   any adjustment due to append/strip zeros will drive the resultane
13174
#   exponent towards zero.  Since all pwrten constants with a power
13175
#   of 27 or less are exact, there is no need to use this routine to
13176
#   attempt to lessen the resultant exponent.
13177
#
13178
# Register usage:
13179
#
13180
#  ap_st_z:
13181
#	(*)  d0: temp digit storage
13182
#	(*)  d1: zero count
13183
#	(*)  d2: digit count
13184
#	(*)  d3: offset pointer
13185
#	( )  d4: first word of bcd
13186
#	(*)  d5: lword counter
13187
#	( )  a0: pointer to working bcd value
13188
#	( )  FP_SCR1: working copy of original bcd value
13189
#	( )  L_SCR1: copy of original exponent word
13190
#
13191
#
13192
# First check the absolute value of the exponent to see if this
13193
# routine is necessary.  If so, then check the sign of the exponent
13194
# and do append (+) or strip (-) zeros accordingly.
13195
# This section handles a positive adjusted exponent.
13196
#
13197
ap_st_z:
13198
	mov.l		(%sp),%d1		# load expA for range test
13199
	cmp.l		%d1,&27			# test is with 27
13200
	ble.w		pwrten			# if abs(expA) <28, skip ap/st zeros
13201
	btst		&30,(%a0)		# check sign of exp
13202
	bne.b		ap_st_n			# if neg, go to neg side
13203
	clr.l		%d1			# zero count reg
13204
	mov.l		(%a0),%d4		# load lword 1 to d4
13205
	bfextu		%d4{&28:&4},%d0		# get M16 in d0
13206
	bne.b		ap_p_fx			# if M16 is non-zero, go fix exp
13207
	addq.l		&1,%d1			# inc zero count
13208
	mov.l		&1,%d5			# init lword counter
13209
	mov.l		(%a0,%d5.L*4),%d4	# get lword 2 to d4
13210
	bne.b		ap_p_cl			# if lw 2 is zero, skip it
13211
	addq.l		&8,%d1			# and inc count by 8
13212
	addq.l		&1,%d5			# inc lword counter
13213
	mov.l		(%a0,%d5.L*4),%d4	# get lword 3 to d4
13214
ap_p_cl:
13215
	clr.l		%d3			# init offset reg
13216
	mov.l		&7,%d2			# init digit counter
13217
ap_p_gd:
13218
	bfextu		%d4{%d3:&4},%d0		# get digit
13219
	bne.b		ap_p_fx			# if non-zero, go to fix exp
13220
	addq.l		&4,%d3			# point to next digit
13221
	addq.l		&1,%d1			# inc digit counter
13222
	dbf.w		%d2,ap_p_gd		# get next digit
13223
ap_p_fx:
13224
	mov.l		%d1,%d0			# copy counter to d2
13225
	mov.l		(%sp),%d1		# get adjusted exp from memory
13226
	sub.l		%d0,%d1			# subtract count from exp
13227
	bge.b		ap_p_fm			# if still pos, go to pwrten
13228
	neg.l		%d1			# now its neg; get abs
13229
	mov.l		(%a0),%d4		# load lword 1 to d4
13230
	or.l		&0x40000000,%d4		# and set SE in d4
13231
	or.l		&0x40000000,(%a0)	# and in memory
13232
#
13233
# Calculate the mantissa multiplier to compensate for the striping of
13234
# zeros from the mantissa.
13235
#
13236
ap_p_fm:
13237
	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13238
	clr.l		%d3			# init table index
13239
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13240
	mov.l		&3,%d2			# init d2 to count bits in counter
13241
ap_p_el:
13242
	asr.l		&1,%d0			# shift lsb into carry
13243
	bcc.b		ap_p_en			# if 1, mul fp1 by pwrten factor
13244
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13245
ap_p_en:
13246
	add.l		&12,%d3			# inc d3 to next rtable entry
13247
	tst.l		%d0			# check if d0 is zero
13248
	bne.b		ap_p_el			# if not, get next bit
13249
	fmul.x		%fp1,%fp0		# mul mantissa by 10**(no_bits_shifted)
13250
	bra.b		pwrten			# go calc pwrten
13251
#
13252
# This section handles a negative adjusted exponent.
13253
#
13254
ap_st_n:
13255
	clr.l		%d1			# clr counter
13256
	mov.l		&2,%d5			# set up d5 to point to lword 3
13257
	mov.l		(%a0,%d5.L*4),%d4	# get lword 3
13258
	bne.b		ap_n_cl			# if not zero, check digits
13259
	sub.l		&1,%d5			# dec d5 to point to lword 2
13260
	addq.l		&8,%d1			# inc counter by 8
13261
	mov.l		(%a0,%d5.L*4),%d4	# get lword 2
13262
ap_n_cl:
13263
	mov.l		&28,%d3			# point to last digit
13264
	mov.l		&7,%d2			# init digit counter
13265
ap_n_gd:
13266
	bfextu		%d4{%d3:&4},%d0		# get digit
13267
	bne.b		ap_n_fx			# if non-zero, go to exp fix
13268
	subq.l		&4,%d3			# point to previous digit
13269
	addq.l		&1,%d1			# inc digit counter
13270
	dbf.w		%d2,ap_n_gd		# get next digit
13271
ap_n_fx:
13272
	mov.l		%d1,%d0			# copy counter to d0
13273
	mov.l		(%sp),%d1		# get adjusted exp from memory
13274
	sub.l		%d0,%d1			# subtract count from exp
13275
	bgt.b		ap_n_fm			# if still pos, go fix mantissa
13276
	neg.l		%d1			# take abs of exp and clr SE
13277
	mov.l		(%a0),%d4		# load lword 1 to d4
13278
	and.l		&0xbfffffff,%d4		# and clr SE in d4
13279
	and.l		&0xbfffffff,(%a0)	# and in memory
13280
#
13281
# Calculate the mantissa multiplier to compensate for the appending of
13282
# zeros to the mantissa.
13283
#
13284
ap_n_fm:
13285
	lea.l		PTENRN(%pc),%a1		# get address of power-of-ten table
13286
	clr.l		%d3			# init table index
13287
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13288
	mov.l		&3,%d2			# init d2 to count bits in counter
13289
ap_n_el:
13290
	asr.l		&1,%d0			# shift lsb into carry
13291
	bcc.b		ap_n_en			# if 1, mul fp1 by pwrten factor
13292
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13293
ap_n_en:
13294
	add.l		&12,%d3			# inc d3 to next rtable entry
13295
	tst.l		%d0			# check if d0 is zero
13296
	bne.b		ap_n_el			# if not, get next bit
13297
	fdiv.x		%fp1,%fp0		# div mantissa by 10**(no_bits_shifted)
13298
#
13299
#
13300
# Calculate power-of-ten factor from adjusted and shifted exponent.
13301
#
13302
# Register usage:
13303
#
13304
#  pwrten:
13305
#	(*)  d0: temp
13306
#	( )  d1: exponent
13307
#	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308
#	(*)  d3: FPCR work copy
13309
#	( )  d4: first word of bcd
13310
#	(*)  a1: RTABLE pointer
13311
#  calc_p:
13312
#	(*)  d0: temp
13313
#	( )  d1: exponent
13314
#	(*)  d3: PWRTxx table index
13315
#	( )  a0: pointer to working copy of bcd
13316
#	(*)  a1: PWRTxx pointer
13317
#	(*) fp1: power-of-ten accumulator
13318
#
13319
# Pwrten calculates the exponent factor in the selected rounding mode
13320
# according to the following table:
13321
#
13322
#	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323
#
13324
#	ANY	  ANY	RN	RN
13325
#
13326
#	 +	   +	RP	RP
13327
#	 -	   +	RP	RM
13328
#	 +	   -	RP	RM
13329
#	 -	   -	RP	RP
13330
#
13331
#	 +	   +	RM	RM
13332
#	 -	   +	RM	RP
13333
#	 +	   -	RM	RP
13334
#	 -	   -	RM	RM
13335
#
13336
#	 +	   +	RZ	RM
13337
#	 -	   +	RZ	RM
13338
#	 +	   -	RZ	RP
13339
#	 -	   -	RZ	RP
13340
#
13341
#
13342
pwrten:
13343
	mov.l		USER_FPCR(%a6),%d3	# get user's FPCR
13344
	bfextu		%d3{&26:&2},%d2		# isolate rounding mode bits
13345
	mov.l		(%a0),%d4		# reload 1st bcd word to d4
13346
	asl.l		&2,%d2			# format d2 to be
13347
	bfextu		%d4{&0:&2},%d0		# {FPCR[6],FPCR[5],SM,SE}
13348
	add.l		%d0,%d2			# in d2 as index into RTABLE
13349
	lea.l		RTABLE(%pc),%a1		# load rtable base
13350
	mov.b		(%a1,%d2),%d0		# load new rounding bits from table
13351
	clr.l		%d3			# clear d3 to force no exc and extended
13352
	bfins		%d0,%d3{&26:&2}		# stuff new rounding bits in FPCR
13353
	fmov.l		%d3,%fpcr		# write new FPCR
13354
	asr.l		&1,%d0			# write correct PTENxx table
13355
	bcc.b		not_rp			# to a1
13356
	lea.l		PTENRP(%pc),%a1		# it is RP
13357
	bra.b		calc_p			# go to init section
13358
not_rp:
13359
	asr.l		&1,%d0			# keep checking
13360
	bcc.b		not_rm
13361
	lea.l		PTENRM(%pc),%a1		# it is RM
13362
	bra.b		calc_p			# go to init section
13363
not_rm:
13364
	lea.l		PTENRN(%pc),%a1		# it is RN
13365
calc_p:
13366
	mov.l		%d1,%d0			# copy exp to d0;use d0
13367
	bpl.b		no_neg			# if exp is negative,
13368
	neg.l		%d0			# invert it
13369
	or.l		&0x40000000,(%a0)	# and set SE bit
13370
no_neg:
13371
	clr.l		%d3			# table index
13372
	fmov.s		&0x3f800000,%fp1	# init fp1 to 1
13373
e_loop:
13374
	asr.l		&1,%d0			# shift next bit into carry
13375
	bcc.b		e_next			# if zero, skip the mul
13376
	fmul.x		(%a1,%d3),%fp1		# mul by 10**(d3_bit_no)
13377
e_next:
13378
	add.l		&12,%d3			# inc d3 to next rtable entry
13379
	tst.l		%d0			# check if d0 is zero
13380
	bne.b		e_loop			# not zero, continue shifting
13381
#
13382
#
13383
#  Check the sign of the adjusted exp and make the value in fp0 the
13384
#  same sign. If the exp was pos then multiply fp1*fp0;
13385
#  else divide fp0/fp1.
13386
#
13387
# Register Usage:
13388
#  norm:
13389
#	( )  a0: pointer to working bcd value
13390
#	(*) fp0: mantissa accumulator
13391
#	( ) fp1: scaling factor - 10**(abs(exp))
13392
#
13393
pnorm:
13394
	btst		&30,(%a0)		# test the sign of the exponent
13395
	beq.b		mul			# if clear, go to multiply
13396
div:
13397
	fdiv.x		%fp1,%fp0		# exp is negative, so divide mant by exp
13398
	bra.b		end_dec
13399
mul:
13400
	fmul.x		%fp1,%fp0		# exp is positive, so multiply by exp
13401
#
13402
#
13403
# Clean up and return with result in fp0.
13404
#
13405
# If the final mul/div in decbin incurred an inex exception,
13406
# it will be inex2, but will be reported as inex1 by get_op.
13407
#
13408
end_dec:
13409
	fmov.l		%fpsr,%d0		# get status register
13410
	bclr		&inex2_bit+8,%d0	# test for inex2 and clear it
13411
	beq.b		no_exc			# skip this if no exc
13412
	ori.w		&inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413
no_exc:
13414
	add.l		&0x4,%sp		# clear 1 lw param
13415
	fmovm.x		(%sp)+,&0x40		# restore fp1
13416
	movm.l		(%sp)+,&0x3c		# restore d2-d5
13417
	fmov.l		&0x0,%fpcr
13418
	fmov.l		&0x0,%fpsr
13419
	rts
13420

13421
#########################################################################
13422
# bindec(): Converts an input in extended precision format to bcd format#
13423
#									#
13424
# INPUT ***************************************************************	#
13425
#	a0 = pointer to the input extended precision value in memory.	#
13426
#	     the input may be either normalized, unnormalized, or	#
13427
#	     denormalized.						#
13428
#	d0 = contains the k-factor sign-extended to 32-bits.		#
13429
#									#
13430
# OUTPUT **************************************************************	#
13431
#	FP_SCR0(a6) = bcd format result on the stack.			#
13432
#									#
13433
# ALGORITHM ***********************************************************	#
13434
#									#
13435
#	A1.	Set RM and size ext;  Set SIGMA = sign of input.	#
13436
#		The k-factor is saved for use in d7. Clear the		#
13437
#		BINDEC_FLG for separating normalized/denormalized	#
13438
#		input.  If input is unnormalized or denormalized,	#
13439
#		normalize it.						#
13440
#									#
13441
#	A2.	Set X = abs(input).					#
13442
#									#
13443
#	A3.	Compute ILOG.						#
13444
#		ILOG is the log base 10 of the input value.  It is	#
13445
#		approximated by adding e + 0.f when the original	#
13446
#		value is viewed as 2^^e * 1.f in extended precision.	#
13447
#		This value is stored in d6.				#
13448
#									#
13449
#	A4.	Clr INEX bit.						#
13450
#		The operation in A3 above may have set INEX2.		#
13451
#									#
13452
#	A5.	Set ICTR = 0;						#
13453
#		ICTR is a flag used in A13.  It must be set before the	#
13454
#		loop entry A6.						#
13455
#									#
13456
#	A6.	Calculate LEN.						#
13457
#		LEN is the number of digits to be displayed.  The	#
13458
#		k-factor can dictate either the total number of digits,	#
13459
#		if it is a positive number, or the number of digits	#
13460
#		after the decimal point which are to be included as	#
13461
#		significant.  See the 68882 manual for examples.	#
13462
#		If LEN is computed to be greater than 17, set OPERR in	#
13463
#		USER_FPSR.  LEN is stored in d4.			#
13464
#									#
13465
#	A7.	Calculate SCALE.					#
13466
#		SCALE is equal to 10^ISCALE, where ISCALE is the number	#
13467
#		of decimal places needed to insure LEN integer digits	#
13468
#		in the output before conversion to bcd. LAMBDA is the	#
13469
#		sign of ISCALE, used in A9. Fp1 contains		#
13470
#		10^^(abs(ISCALE)) using a rounding mode which is a	#
13471
#		function of the original rounding mode and the signs	#
13472
#		of ISCALE and X.  A table is given in the code.		#
13473
#									#
13474
#	A8.	Clr INEX; Force RZ.					#
13475
#		The operation in A3 above may have set INEX2.		#
13476
#		RZ mode is forced for the scaling operation to insure	#
13477
#		only one rounding error.  The grs bits are collected in #
13478
#		the INEX flag for use in A10.				#
13479
#									#
13480
#	A9.	Scale X -> Y.						#
13481
#		The mantissa is scaled to the desired number of		#
13482
#		significant digits.  The excess digits are collected	#
13483
#		in INEX2.						#
13484
#									#
13485
#	A10.	Or in INEX.						#
13486
#		If INEX is set, round error occurred.  This is		#
13487
#		compensated for by 'or-ing' in the INEX2 flag to	#
13488
#		the lsb of Y.						#
13489
#									#
13490
#	A11.	Restore original FPCR; set size ext.			#
13491
#		Perform FINT operation in the user's rounding mode.	#
13492
#		Keep the size to extended.				#
13493
#									#
13494
#	A12.	Calculate YINT = FINT(Y) according to user's rounding	#
13495
#		mode.  The FPSP routine sintd0 is used.  The output	#
13496
#		is in fp0.						#
13497
#									#
13498
#	A13.	Check for LEN digits.					#
13499
#		If the int operation results in more than LEN digits,	#
13500
#		or less than LEN -1 digits, adjust ILOG and repeat from	#
13501
#		A6.  This test occurs only on the first pass.  If the	#
13502
#		result is exactly 10^LEN, decrement ILOG and divide	#
13503
#		the mantissa by 10.					#
13504
#									#
13505
#	A14.	Convert the mantissa to bcd.				#
13506
#		The binstr routine is used to convert the LEN digit	#
13507
#		mantissa to bcd in memory.  The input to binstr is	#
13508
#		to be a fraction; i.e. (mantissa)/10^LEN and adjusted	#
13509
#		such that the decimal point is to the left of bit 63.	#
13510
#		The bcd digits are stored in the correct position in	#
13511
#		the final string area in memory.			#
13512
#									#
13513
#	A15.	Convert the exponent to bcd.				#
13514
#		As in A14 above, the exp is converted to bcd and the	#
13515
#		digits are stored in the final string.			#
13516
#		Test the length of the final exponent string.  If the	#
13517
#		length is 4, set operr.					#
13518
#									#
13519
#	A16.	Write sign bits to final string.			#
13520
#									#
13521
#########################################################################
13522

13523
set	BINDEC_FLG,	EXC_TEMP	# DENORM flag
13524

13525
# Constants in extended precision
13526
PLOG2:
13527
	long		0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528
PLOG2UP1:
13529
	long		0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530

13531
# Constants in single precision
13532
FONE:
13533
	long		0x3F800000,0x00000000,0x00000000,0x00000000
13534
FTWO:
13535
	long		0x40000000,0x00000000,0x00000000,0x00000000
13536
FTEN:
13537
	long		0x41200000,0x00000000,0x00000000,0x00000000
13538
F4933:
13539
	long		0x459A2800,0x00000000,0x00000000,0x00000000
13540

13541
RBDTBL:
13542
	byte		0,0,0,0
13543
	byte		3,3,2,2
13544
	byte		3,2,2,3
13545
	byte		2,3,3,2
13546

13547
#	Implementation Notes:
13548
#
13549
#	The registers are used as follows:
13550
#
13551
#		d0: scratch; LEN input to binstr
13552
#		d1: scratch
13553
#		d2: upper 32-bits of mantissa for binstr
13554
#		d3: scratch;lower 32-bits of mantissa for binstr
13555
#		d4: LEN
13556
#		d5: LAMBDA/ICTR
13557
#		d6: ILOG
13558
#		d7: k-factor
13559
#		a0: ptr for original operand/final result
13560
#		a1: scratch pointer
13561
#		a2: pointer to FP_X; abs(original value) in ext
13562
#		fp0: scratch
13563
#		fp1: scratch
13564
#		fp2: scratch
13565
#		F_SCR1:
13566
#		F_SCR2:
13567
#		L_SCR1:
13568
#		L_SCR2:
13569

13570
	global		bindec
13571
bindec:
13572
	movm.l		&0x3f20,-(%sp)	#  {%d2-%d7/%a2}
13573
	fmovm.x		&0x7,-(%sp)	#  {%fp0-%fp2}
13574

13575
# A1. Set RM and size ext. Set SIGMA = sign input;
13576
#     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577
#     separating  normalized/denormalized input.  If the input
13578
#     is a denormalized number, set the BINDEC_FLG memory word
13579
#     to signal denorm.  If the input is unnormalized, normalize
13580
#     the input and test for denormalized result.
13581
#
13582
	fmov.l		&rm_mode*0x10,%fpcr	# set RM and ext
13583
	mov.l		(%a0),L_SCR2(%a6)	# save exponent for sign check
13584
	mov.l		%d0,%d7		# move k-factor to d7
13585

13586
	clr.b		BINDEC_FLG(%a6)	# clr norm/denorm flag
13587
	cmpi.b		STAG(%a6),&DENORM # is input a DENORM?
13588
	bne.w		A2_str		# no; input is a NORM
13589

13590
#
13591
# Normalize the denorm
13592
#
13593
un_de_norm:
13594
	mov.w		(%a0),%d0
13595
	and.w		&0x7fff,%d0	# strip sign of normalized exp
13596
	mov.l		4(%a0),%d1
13597
	mov.l		8(%a0),%d2
13598
norm_loop:
13599
	sub.w		&1,%d0
13600
	lsl.l		&1,%d2
13601
	roxl.l		&1,%d1
13602
	tst.l		%d1
13603
	bge.b		norm_loop
13604
#
13605
# Test if the normalized input is denormalized
13606
#
13607
	tst.w		%d0
13608
	bgt.b		pos_exp		# if greater than zero, it is a norm
13609
	st		BINDEC_FLG(%a6)	# set flag for denorm
13610
pos_exp:
13611
	and.w		&0x7fff,%d0	# strip sign of normalized exp
13612
	mov.w		%d0,(%a0)
13613
	mov.l		%d1,4(%a0)
13614
	mov.l		%d2,8(%a0)
13615

13616
# A2. Set X = abs(input).
13617
#
13618
A2_str:
13619
	mov.l		(%a0),FP_SCR1(%a6)	# move input to work space
13620
	mov.l		4(%a0),FP_SCR1+4(%a6)	# move input to work space
13621
	mov.l		8(%a0),FP_SCR1+8(%a6)	# move input to work space
13622
	and.l		&0x7fffffff,FP_SCR1(%a6)	# create abs(X)
13623

13624
# A3. Compute ILOG.
13625
#     ILOG is the log base 10 of the input value.  It is approx-
13626
#     imated by adding e + 0.f when the original value is viewed
13627
#     as 2^^e * 1.f in extended precision.  This value is stored
13628
#     in d6.
13629
#
13630
# Register usage:
13631
#	Input/Output
13632
#	d0: k-factor/exponent
13633
#	d2: x/x
13634
#	d3: x/x
13635
#	d4: x/x
13636
#	d5: x/x
13637
#	d6: x/ILOG
13638
#	d7: k-factor/Unchanged
13639
#	a0: ptr for original operand/final result
13640
#	a1: x/x
13641
#	a2: x/x
13642
#	fp0: x/float(ILOG)
13643
#	fp1: x/x
13644
#	fp2: x/x
13645
#	F_SCR1:x/x
13646
#	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647
#	L_SCR1:x/x
13648
#	L_SCR2:first word of X packed/Unchanged
13649

13650
	tst.b		BINDEC_FLG(%a6)	# check for denorm
13651
	beq.b		A3_cont		# if clr, continue with norm
13652
	mov.l		&-4933,%d6	# force ILOG = -4933
13653
	bra.b		A4_str
13654
A3_cont:
13655
	mov.w		FP_SCR1(%a6),%d0	# move exp to d0
13656
	mov.w		&0x3fff,FP_SCR1(%a6)	# replace exponent with 0x3fff
13657
	fmov.x		FP_SCR1(%a6),%fp0	# now fp0 has 1.f
13658
	sub.w		&0x3fff,%d0	# strip off bias
13659
	fadd.w		%d0,%fp0	# add in exp
13660
	fsub.s		FONE(%pc),%fp0	# subtract off 1.0
13661
	fbge.w		pos_res		# if pos, branch
13662
	fmul.x		PLOG2UP1(%pc),%fp0	# if neg, mul by LOG2UP1
13663
	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13664
	bra.b		A4_str		# go move out ILOG
13665
pos_res:
13666
	fmul.x		PLOG2(%pc),%fp0	# if pos, mul by LOG2
13667
	fmov.l		%fp0,%d6	# put ILOG in d6 as a lword
13668

13669

13670
# A4. Clr INEX bit.
13671
#     The operation in A3 above may have set INEX2.
13672

13673
A4_str:
13674
	fmov.l		&0,%fpsr	# zero all of fpsr - nothing needed
13675

13676

13677
# A5. Set ICTR = 0;
13678
#     ICTR is a flag used in A13.  It must be set before the
13679
#     loop entry A6. The lower word of d5 is used for ICTR.
13680

13681
	clr.w		%d5		# clear ICTR
13682

13683
# A6. Calculate LEN.
13684
#     LEN is the number of digits to be displayed.  The k-factor
13685
#     can dictate either the total number of digits, if it is
13686
#     a positive number, or the number of digits after the
13687
#     original decimal point which are to be included as
13688
#     significant.  See the 68882 manual for examples.
13689
#     If LEN is computed to be greater than 17, set OPERR in
13690
#     USER_FPSR.  LEN is stored in d4.
13691
#
13692
# Register usage:
13693
#	Input/Output
13694
#	d0: exponent/Unchanged
13695
#	d2: x/x/scratch
13696
#	d3: x/x
13697
#	d4: exc picture/LEN
13698
#	d5: ICTR/Unchanged
13699
#	d6: ILOG/Unchanged
13700
#	d7: k-factor/Unchanged
13701
#	a0: ptr for original operand/final result
13702
#	a1: x/x
13703
#	a2: x/x
13704
#	fp0: float(ILOG)/Unchanged
13705
#	fp1: x/x
13706
#	fp2: x/x
13707
#	F_SCR1:x/x
13708
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709
#	L_SCR1:x/x
13710
#	L_SCR2:first word of X packed/Unchanged
13711

13712
A6_str:
13713
	tst.l		%d7		# branch on sign of k
13714
	ble.b		k_neg		# if k <= 0, LEN = ILOG + 1 - k
13715
	mov.l		%d7,%d4		# if k > 0, LEN = k
13716
	bra.b		len_ck		# skip to LEN check
13717
k_neg:
13718
	mov.l		%d6,%d4		# first load ILOG to d4
13719
	sub.l		%d7,%d4		# subtract off k
13720
	addq.l		&1,%d4		# add in the 1
13721
len_ck:
13722
	tst.l		%d4		# LEN check: branch on sign of LEN
13723
	ble.b		LEN_ng		# if neg, set LEN = 1
13724
	cmp.l		%d4,&17		# test if LEN > 17
13725
	ble.b		A7_str		# if not, forget it
13726
	mov.l		&17,%d4		# set max LEN = 17
13727
	tst.l		%d7		# if negative, never set OPERR
13728
	ble.b		A7_str		# if positive, continue
13729
	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
13730
	bra.b		A7_str		# finished here
13731
LEN_ng:
13732
	mov.l		&1,%d4		# min LEN is 1
13733

13734

13735
# A7. Calculate SCALE.
13736
#     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737
#     of decimal places needed to insure LEN integer digits
13738
#     in the output before conversion to bcd. LAMBDA is the sign
13739
#     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740
#     the rounding mode as given in the following table (see
13741
#     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742
#     of opposite sign in bindec.sa from Coonen).
13743
#
13744
#	Initial					USE
13745
#	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
13746
#	----------------------------------------------
13747
#	 RN	00	   0	   0		00/0	RN
13748
#	 RN	00	   0	   1		00/0	RN
13749
#	 RN	00	   1	   0		00/0	RN
13750
#	 RN	00	   1	   1		00/0	RN
13751
#	 RZ	01	   0	   0		11/3	RP
13752
#	 RZ	01	   0	   1		11/3	RP
13753
#	 RZ	01	   1	   0		10/2	RM
13754
#	 RZ	01	   1	   1		10/2	RM
13755
#	 RM	10	   0	   0		11/3	RP
13756
#	 RM	10	   0	   1		10/2	RM
13757
#	 RM	10	   1	   0		10/2	RM
13758
#	 RM	10	   1	   1		11/3	RP
13759
#	 RP	11	   0	   0		10/2	RM
13760
#	 RP	11	   0	   1		11/3	RP
13761
#	 RP	11	   1	   0		11/3	RP
13762
#	 RP	11	   1	   1		10/2	RM
13763
#
13764
# Register usage:
13765
#	Input/Output
13766
#	d0: exponent/scratch - final is 0
13767
#	d2: x/0 or 24 for A9
13768
#	d3: x/scratch - offset ptr into PTENRM array
13769
#	d4: LEN/Unchanged
13770
#	d5: 0/ICTR:LAMBDA
13771
#	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772
#	d7: k-factor/Unchanged
13773
#	a0: ptr for original operand/final result
13774
#	a1: x/ptr to PTENRM array
13775
#	a2: x/x
13776
#	fp0: float(ILOG)/Unchanged
13777
#	fp1: x/10^ISCALE
13778
#	fp2: x/x
13779
#	F_SCR1:x/x
13780
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781
#	L_SCR1:x/x
13782
#	L_SCR2:first word of X packed/Unchanged
13783

13784
A7_str:
13785
	tst.l		%d7		# test sign of k
13786
	bgt.b		k_pos		# if pos and > 0, skip this
13787
	cmp.l		%d7,%d6		# test k - ILOG
13788
	blt.b		k_pos		# if ILOG >= k, skip this
13789
	mov.l		%d7,%d6		# if ((k<0) & (ILOG < k)) ILOG = k
13790
k_pos:
13791
	mov.l		%d6,%d0		# calc ILOG + 1 - LEN in d0
13792
	addq.l		&1,%d0		# add the 1
13793
	sub.l		%d4,%d0		# sub off LEN
13794
	swap		%d5		# use upper word of d5 for LAMBDA
13795
	clr.w		%d5		# set it zero initially
13796
	clr.w		%d2		# set up d2 for very small case
13797
	tst.l		%d0		# test sign of ISCALE
13798
	bge.b		iscale		# if pos, skip next inst
13799
	addq.w		&1,%d5		# if neg, set LAMBDA true
13800
	cmp.l		%d0,&0xffffecd4	# test iscale <= -4908
13801
	bgt.b		no_inf		# if false, skip rest
13802
	add.l		&24,%d0		# add in 24 to iscale
13803
	mov.l		&24,%d2		# put 24 in d2 for A9
13804
no_inf:
13805
	neg.l		%d0		# and take abs of ISCALE
13806
iscale:
13807
	fmov.s		FONE(%pc),%fp1	# init fp1 to 1
13808
	bfextu		USER_FPCR(%a6){&26:&2},%d1	# get initial rmode bits
13809
	lsl.w		&1,%d1		# put them in bits 2:1
13810
	add.w		%d5,%d1		# add in LAMBDA
13811
	lsl.w		&1,%d1		# put them in bits 3:1
13812
	tst.l		L_SCR2(%a6)	# test sign of original x
13813
	bge.b		x_pos		# if pos, don't set bit 0
13814
	addq.l		&1,%d1		# if neg, set bit 0
13815
x_pos:
13816
	lea.l		RBDTBL(%pc),%a2	# load rbdtbl base
13817
	mov.b		(%a2,%d1),%d3	# load d3 with new rmode
13818
	lsl.l		&4,%d3		# put bits in proper position
13819
	fmov.l		%d3,%fpcr	# load bits into fpu
13820
	lsr.l		&4,%d3		# put bits in proper position
13821
	tst.b		%d3		# decode new rmode for pten table
13822
	bne.b		not_rn		# if zero, it is RN
13823
	lea.l		PTENRN(%pc),%a1	# load a1 with RN table base
13824
	bra.b		rmode		# exit decode
13825
not_rn:
13826
	lsr.b		&1,%d3		# get lsb in carry
13827
	bcc.b		not_rp2		# if carry clear, it is RM
13828
	lea.l		PTENRP(%pc),%a1	# load a1 with RP table base
13829
	bra.b		rmode		# exit decode
13830
not_rp2:
13831
	lea.l		PTENRM(%pc),%a1	# load a1 with RM table base
13832
rmode:
13833
	clr.l		%d3		# clr table index
13834
e_loop2:
13835
	lsr.l		&1,%d0		# shift next bit into carry
13836
	bcc.b		e_next2		# if zero, skip the mul
13837
	fmul.x		(%a1,%d3),%fp1	# mul by 10**(d3_bit_no)
13838
e_next2:
13839
	add.l		&12,%d3		# inc d3 to next pwrten table entry
13840
	tst.l		%d0		# test if ISCALE is zero
13841
	bne.b		e_loop2		# if not, loop
13842

13843
# A8. Clr INEX; Force RZ.
13844
#     The operation in A3 above may have set INEX2.
13845
#     RZ mode is forced for the scaling operation to insure
13846
#     only one rounding error.  The grs bits are collected in
13847
#     the INEX flag for use in A10.
13848
#
13849
# Register usage:
13850
#	Input/Output
13851

13852
	fmov.l		&0,%fpsr	# clr INEX
13853
	fmov.l		&rz_mode*0x10,%fpcr	# set RZ rounding mode
13854

13855
# A9. Scale X -> Y.
13856
#     The mantissa is scaled to the desired number of significant
13857
#     digits.  The excess digits are collected in INEX2. If mul,
13858
#     Check d2 for excess 10 exponential value.  If not zero,
13859
#     the iscale value would have caused the pwrten calculation
13860
#     to overflow.  Only a negative iscale can cause this, so
13861
#     multiply by 10^(d2), which is now only allowed to be 24,
13862
#     with a multiply by 10^8 and 10^16, which is exact since
13863
#     10^24 is exact.  If the input was denormalized, we must
13864
#     create a busy stack frame with the mul command and the
13865
#     two operands, and allow the fpu to complete the multiply.
13866
#
13867
# Register usage:
13868
#	Input/Output
13869
#	d0: FPCR with RZ mode/Unchanged
13870
#	d2: 0 or 24/unchanged
13871
#	d3: x/x
13872
#	d4: LEN/Unchanged
13873
#	d5: ICTR:LAMBDA
13874
#	d6: ILOG/Unchanged
13875
#	d7: k-factor/Unchanged
13876
#	a0: ptr for original operand/final result
13877
#	a1: ptr to PTENRM array/Unchanged
13878
#	a2: x/x
13879
#	fp0: float(ILOG)/X adjusted for SCALE (Y)
13880
#	fp1: 10^ISCALE/Unchanged
13881
#	fp2: x/x
13882
#	F_SCR1:x/x
13883
#	F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884
#	L_SCR1:x/x
13885
#	L_SCR2:first word of X packed/Unchanged
13886

13887
A9_str:
13888
	fmov.x		(%a0),%fp0	# load X from memory
13889
	fabs.x		%fp0		# use abs(X)
13890
	tst.w		%d5		# LAMBDA is in lower word of d5
13891
	bne.b		sc_mul		# if neg (LAMBDA = 1), scale by mul
13892
	fdiv.x		%fp1,%fp0	# calculate X / SCALE -> Y to fp0
13893
	bra.w		A10_st		# branch to A10
13894

13895
sc_mul:
13896
	tst.b		BINDEC_FLG(%a6)	# check for denorm
13897
	beq.w		A9_norm		# if norm, continue with mul
13898

13899
# for DENORM, we must calculate:
13900
#	fp0 = input_op * 10^ISCALE * 10^24
13901
# since the input operand is a DENORM, we can't multiply it directly.
13902
# so, we do the multiplication of the exponents and mantissas separately.
13903
# in this way, we avoid underflow on intermediate stages of the
13904
# multiplication and guarantee a result without exception.
13905
	fmovm.x		&0x2,-(%sp)	# save 10^ISCALE to stack
13906

13907
	mov.w		(%sp),%d3	# grab exponent
13908
	andi.w		&0x7fff,%d3	# clear sign
13909
	ori.w		&0x8000,(%a0)	# make DENORM exp negative
13910
	add.w		(%a0),%d3	# add DENORM exp to 10^ISCALE exp
13911
	subi.w		&0x3fff,%d3	# subtract BIAS
13912
	add.w		36(%a1),%d3
13913
	subi.w		&0x3fff,%d3	# subtract BIAS
13914
	add.w		48(%a1),%d3
13915
	subi.w		&0x3fff,%d3	# subtract BIAS
13916

13917
	bmi.w		sc_mul_err	# is result is DENORM, punt!!!
13918

13919
	andi.w		&0x8000,(%sp)	# keep sign
13920
	or.w		%d3,(%sp)	# insert new exponent
13921
	andi.w		&0x7fff,(%a0)	# clear sign bit on DENORM again
13922
	mov.l		0x8(%a0),-(%sp) # put input op mantissa on stk
13923
	mov.l		0x4(%a0),-(%sp)
13924
	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13925
	fmovm.x		(%sp)+,&0x80	# load normalized DENORM into fp0
13926
	fmul.x		(%sp)+,%fp0
13927

13928
#	fmul.x	36(%a1),%fp0	# multiply fp0 by 10^8
13929
#	fmul.x	48(%a1),%fp0	# multiply fp0 by 10^16
13930
	mov.l		36+8(%a1),-(%sp) # get 10^8 mantissa
13931
	mov.l		36+4(%a1),-(%sp)
13932
	mov.l		&0x3fff0000,-(%sp) # force exp to zero
13933
	mov.l		48+8(%a1),-(%sp) # get 10^16 mantissa
13934
	mov.l		48+4(%a1),-(%sp)
13935
	mov.l		&0x3fff0000,-(%sp)# force exp to zero
13936
	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^8
13937
	fmul.x		(%sp)+,%fp0	# multiply fp0 by 10^16
13938
	bra.b		A10_st
13939

13940
sc_mul_err:
13941
	bra.b		sc_mul_err
13942

13943
A9_norm:
13944
	tst.w		%d2		# test for small exp case
13945
	beq.b		A9_con		# if zero, continue as normal
13946
	fmul.x		36(%a1),%fp0	# multiply fp0 by 10^8
13947
	fmul.x		48(%a1),%fp0	# multiply fp0 by 10^16
13948
A9_con:
13949
	fmul.x		%fp1,%fp0	# calculate X * SCALE -> Y to fp0
13950

13951
# A10. Or in INEX.
13952
#      If INEX is set, round error occurred.  This is compensated
13953
#      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954
#
13955
# Register usage:
13956
#	Input/Output
13957
#	d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958
#	d2: x/x
13959
#	d3: x/x
13960
#	d4: LEN/Unchanged
13961
#	d5: ICTR:LAMBDA
13962
#	d6: ILOG/Unchanged
13963
#	d7: k-factor/Unchanged
13964
#	a0: ptr for original operand/final result
13965
#	a1: ptr to PTENxx array/Unchanged
13966
#	a2: x/ptr to FP_SCR1(a6)
13967
#	fp0: Y/Y with lsb adjusted
13968
#	fp1: 10^ISCALE/Unchanged
13969
#	fp2: x/x
13970

13971
A10_st:
13972
	fmov.l		%fpsr,%d0	# get FPSR
13973
	fmov.x		%fp0,FP_SCR1(%a6)	# move Y to memory
13974
	lea.l		FP_SCR1(%a6),%a2	# load a2 with ptr to FP_SCR1
13975
	btst		&9,%d0		# check if INEX2 set
13976
	beq.b		A11_st		# if clear, skip rest
13977
	or.l		&1,8(%a2)	# or in 1 to lsb of mantissa
13978
	fmov.x		FP_SCR1(%a6),%fp0	# write adjusted Y back to fpu
13979

13980

13981
# A11. Restore original FPCR; set size ext.
13982
#      Perform FINT operation in the user's rounding mode.  Keep
13983
#      the size to extended.  The sintdo entry point in the sint
13984
#      routine expects the FPCR value to be in USER_FPCR for
13985
#      mode and precision.  The original FPCR is saved in L_SCR1.
13986

13987
A11_st:
13988
	mov.l		USER_FPCR(%a6),L_SCR1(%a6)	# save it for later
13989
	and.l		&0x00000030,USER_FPCR(%a6)	# set size to ext,
13990
#					;block exceptions
13991

13992

13993
# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994
#      The FPSP routine sintd0 is used.  The output is in fp0.
13995
#
13996
# Register usage:
13997
#	Input/Output
13998
#	d0: FPSR with AINEX cleared/FPCR with size set to ext
13999
#	d2: x/x/scratch
14000
#	d3: x/x
14001
#	d4: LEN/Unchanged
14002
#	d5: ICTR:LAMBDA/Unchanged
14003
#	d6: ILOG/Unchanged
14004
#	d7: k-factor/Unchanged
14005
#	a0: ptr for original operand/src ptr for sintdo
14006
#	a1: ptr to PTENxx array/Unchanged
14007
#	a2: ptr to FP_SCR1(a6)/Unchanged
14008
#	a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009
#	fp0: Y/YINT
14010
#	fp1: 10^ISCALE/Unchanged
14011
#	fp2: x/x
14012
#	F_SCR1:x/x
14013
#	F_SCR2:Y adjusted for inex/Y with original exponent
14014
#	L_SCR1:x/original USER_FPCR
14015
#	L_SCR2:first word of X packed/Unchanged
14016

14017
A12_st:
14018
	movm.l	&0xc0c0,-(%sp)	# save regs used by sintd0	 {%d0-%d1/%a0-%a1}
14019
	mov.l	L_SCR1(%a6),-(%sp)
14020
	mov.l	L_SCR2(%a6),-(%sp)
14021

14022
	lea.l		FP_SCR1(%a6),%a0	# a0 is ptr to FP_SCR1(a6)
14023
	fmov.x		%fp0,(%a0)	# move Y to memory at FP_SCR1(a6)
14024
	tst.l		L_SCR2(%a6)	# test sign of original operand
14025
	bge.b		do_fint12		# if pos, use Y
14026
	or.l		&0x80000000,(%a0)	# if neg, use -Y
14027
do_fint12:
14028
	mov.l	USER_FPSR(%a6),-(%sp)
14029
#	bsr	sintdo		# sint routine returns int in fp0
14030

14031
	fmov.l	USER_FPCR(%a6),%fpcr
14032
	fmov.l	&0x0,%fpsr			# clear the AEXC bits!!!
14033
##	mov.l		USER_FPCR(%a6),%d0	# ext prec/keep rnd mode
14034
##	andi.l		&0x00000030,%d0
14035
##	fmov.l		%d0,%fpcr
14036
	fint.x		FP_SCR1(%a6),%fp0	# do fint()
14037
	fmov.l	%fpsr,%d0
14038
	or.w	%d0,FPSR_EXCEPT(%a6)
14039
##	fmov.l		&0x0,%fpcr
14040
##	fmov.l		%fpsr,%d0		# don't keep ccodes
14041
##	or.w		%d0,FPSR_EXCEPT(%a6)
14042

14043
	mov.b	(%sp),USER_FPSR(%a6)
14044
	add.l	&4,%sp
14045

14046
	mov.l	(%sp)+,L_SCR2(%a6)
14047
	mov.l	(%sp)+,L_SCR1(%a6)
14048
	movm.l	(%sp)+,&0x303	# restore regs used by sint	 {%d0-%d1/%a0-%a1}
14049

14050
	mov.l	L_SCR2(%a6),FP_SCR1(%a6)	# restore original exponent
14051
	mov.l	L_SCR1(%a6),USER_FPCR(%a6)	# restore user's FPCR
14052

14053
# A13. Check for LEN digits.
14054
#      If the int operation results in more than LEN digits,
14055
#      or less than LEN -1 digits, adjust ILOG and repeat from
14056
#      A6.  This test occurs only on the first pass.  If the
14057
#      result is exactly 10^LEN, decrement ILOG and divide
14058
#      the mantissa by 10.  The calculation of 10^LEN cannot
14059
#      be inexact, since all powers of ten up to 10^27 are exact
14060
#      in extended precision, so the use of a previous power-of-ten
14061
#      table will introduce no error.
14062
#
14063
#
14064
# Register usage:
14065
#	Input/Output
14066
#	d0: FPCR with size set to ext/scratch final = 0
14067
#	d2: x/x
14068
#	d3: x/scratch final = x
14069
#	d4: LEN/LEN adjusted
14070
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14071
#	d6: ILOG/ILOG adjusted
14072
#	d7: k-factor/Unchanged
14073
#	a0: pointer into memory for packed bcd string formation
14074
#	a1: ptr to PTENxx array/Unchanged
14075
#	a2: ptr to FP_SCR1(a6)/Unchanged
14076
#	fp0: int portion of Y/abs(YINT) adjusted
14077
#	fp1: 10^ISCALE/Unchanged
14078
#	fp2: x/10^LEN
14079
#	F_SCR1:x/x
14080
#	F_SCR2:Y with original exponent/Unchanged
14081
#	L_SCR1:original USER_FPCR/Unchanged
14082
#	L_SCR2:first word of X packed/Unchanged
14083

14084
A13_st:
14085
	swap		%d5		# put ICTR in lower word of d5
14086
	tst.w		%d5		# check if ICTR = 0
14087
	bne		not_zr		# if non-zero, go to second test
14088
#
14089
# Compute 10^(LEN-1)
14090
#
14091
	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14092
	mov.l		%d4,%d0		# put LEN in d0
14093
	subq.l		&1,%d0		# d0 = LEN -1
14094
	clr.l		%d3		# clr table index
14095
l_loop:
14096
	lsr.l		&1,%d0		# shift next bit into carry
14097
	bcc.b		l_next		# if zero, skip the mul
14098
	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14099
l_next:
14100
	add.l		&12,%d3		# inc d3 to next pwrten table entry
14101
	tst.l		%d0		# test if LEN is zero
14102
	bne.b		l_loop		# if not, loop
14103
#
14104
# 10^LEN-1 is computed for this test and A14.  If the input was
14105
# denormalized, check only the case in which YINT > 10^LEN.
14106
#
14107
	tst.b		BINDEC_FLG(%a6)	# check if input was norm
14108
	beq.b		A13_con		# if norm, continue with checking
14109
	fabs.x		%fp0		# take abs of YINT
14110
	bra		test_2
14111
#
14112
# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113
#
14114
A13_con:
14115
	fabs.x		%fp0		# take abs of YINT
14116
	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^(LEN-1)
14117
	fbge.w		test_2		# if greater, do next test
14118
	subq.l		&1,%d6		# subtract 1 from ILOG
14119
	mov.w		&1,%d5		# set ICTR
14120
	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14121
	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14122
	bra.w		A6_str		# return to A6 and recompute YINT
14123
test_2:
14124
	fmul.s		FTEN(%pc),%fp2	# compute 10^LEN
14125
	fcmp.x		%fp0,%fp2	# compare abs(YINT) with 10^LEN
14126
	fblt.w		A14_st		# if less, all is ok, go to A14
14127
	fbgt.w		fix_ex		# if greater, fix and redo
14128
	fdiv.s		FTEN(%pc),%fp0	# if equal, divide by 10
14129
	addq.l		&1,%d6		# and inc ILOG
14130
	bra.b		A14_st		# and continue elsewhere
14131
fix_ex:
14132
	addq.l		&1,%d6		# increment ILOG by 1
14133
	mov.w		&1,%d5		# set ICTR
14134
	fmov.l		&rm_mode*0x10,%fpcr	# set rmode to RM
14135
	bra.w		A6_str		# return to A6 and recompute YINT
14136
#
14137
# Since ICTR <> 0, we have already been through one adjustment,
14138
# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139
# 10^LEN is again computed using whatever table is in a1 since the
14140
# value calculated cannot be inexact.
14141
#
14142
not_zr:
14143
	fmov.s		FONE(%pc),%fp2	# init fp2 to 1.0
14144
	mov.l		%d4,%d0		# put LEN in d0
14145
	clr.l		%d3		# clr table index
14146
z_loop:
14147
	lsr.l		&1,%d0		# shift next bit into carry
14148
	bcc.b		z_next		# if zero, skip the mul
14149
	fmul.x		(%a1,%d3),%fp2	# mul by 10**(d3_bit_no)
14150
z_next:
14151
	add.l		&12,%d3		# inc d3 to next pwrten table entry
14152
	tst.l		%d0		# test if LEN is zero
14153
	bne.b		z_loop		# if not, loop
14154
	fabs.x		%fp0		# get abs(YINT)
14155
	fcmp.x		%fp0,%fp2	# check if abs(YINT) = 10^LEN
14156
	fbneq.w		A14_st		# if not, skip this
14157
	fdiv.s		FTEN(%pc),%fp0	# divide abs(YINT) by 10
14158
	addq.l		&1,%d6		# and inc ILOG by 1
14159
	addq.l		&1,%d4		# and inc LEN
14160
	fmul.s		FTEN(%pc),%fp2	# if LEN++, the get 10^^LEN
14161

14162
# A14. Convert the mantissa to bcd.
14163
#      The binstr routine is used to convert the LEN digit
14164
#      mantissa to bcd in memory.  The input to binstr is
14165
#      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166
#      such that the decimal point is to the left of bit 63.
14167
#      The bcd digits are stored in the correct position in
14168
#      the final string area in memory.
14169
#
14170
#
14171
# Register usage:
14172
#	Input/Output
14173
#	d0: x/LEN call to binstr - final is 0
14174
#	d1: x/0
14175
#	d2: x/ms 32-bits of mant of abs(YINT)
14176
#	d3: x/ls 32-bits of mant of abs(YINT)
14177
#	d4: LEN/Unchanged
14178
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14179
#	d6: ILOG
14180
#	d7: k-factor/Unchanged
14181
#	a0: pointer into memory for packed bcd string formation
14182
#	    /ptr to first mantissa byte in result string
14183
#	a1: ptr to PTENxx array/Unchanged
14184
#	a2: ptr to FP_SCR1(a6)/Unchanged
14185
#	fp0: int portion of Y/abs(YINT) adjusted
14186
#	fp1: 10^ISCALE/Unchanged
14187
#	fp2: 10^LEN/Unchanged
14188
#	F_SCR1:x/Work area for final result
14189
#	F_SCR2:Y with original exponent/Unchanged
14190
#	L_SCR1:original USER_FPCR/Unchanged
14191
#	L_SCR2:first word of X packed/Unchanged
14192

14193
A14_st:
14194
	fmov.l		&rz_mode*0x10,%fpcr	# force rz for conversion
14195
	fdiv.x		%fp2,%fp0	# divide abs(YINT) by 10^LEN
14196
	lea.l		FP_SCR0(%a6),%a0
14197
	fmov.x		%fp0,(%a0)	# move abs(YINT)/10^LEN to memory
14198
	mov.l		4(%a0),%d2	# move 2nd word of FP_RES to d2
14199
	mov.l		8(%a0),%d3	# move 3rd word of FP_RES to d3
14200
	clr.l		4(%a0)		# zero word 2 of FP_RES
14201
	clr.l		8(%a0)		# zero word 3 of FP_RES
14202
	mov.l		(%a0),%d0	# move exponent to d0
14203
	swap		%d0		# put exponent in lower word
14204
	beq.b		no_sft		# if zero, don't shift
14205
	sub.l		&0x3ffd,%d0	# sub bias less 2 to make fract
14206
	tst.l		%d0		# check if > 1
14207
	bgt.b		no_sft		# if so, don't shift
14208
	neg.l		%d0		# make exp positive
14209
m_loop:
14210
	lsr.l		&1,%d2		# shift d2:d3 right, add 0s
14211
	roxr.l		&1,%d3		# the number of places
14212
	dbf.w		%d0,m_loop	# given in d0
14213
no_sft:
14214
	tst.l		%d2		# check for mantissa of zero
14215
	bne.b		no_zr		# if not, go on
14216
	tst.l		%d3		# continue zero check
14217
	beq.b		zer_m		# if zero, go directly to binstr
14218
no_zr:
14219
	clr.l		%d1		# put zero in d1 for addx
14220
	add.l		&0x00000080,%d3	# inc at bit 7
14221
	addx.l		%d1,%d2		# continue inc
14222
	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14223
zer_m:
14224
	mov.l		%d4,%d0		# put LEN in d0 for binstr call
14225
	addq.l		&3,%a0		# a0 points to M16 byte in result
14226
	bsr		binstr		# call binstr to convert mant
14227

14228

14229
# A15. Convert the exponent to bcd.
14230
#      As in A14 above, the exp is converted to bcd and the
14231
#      digits are stored in the final string.
14232
#
14233
#      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234
#
14235
#	 32               16 15                0
14236
#	-----------------------------------------
14237
#	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238
#	-----------------------------------------
14239
#
14240
# And are moved into their proper places in FP_SCR0.  If digit e4
14241
# is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242
# written as specified in the 881/882 manual for packed decimal.
14243
#
14244
# Register usage:
14245
#	Input/Output
14246
#	d0: x/LEN call to binstr - final is 0
14247
#	d1: x/scratch (0);shift count for final exponent packing
14248
#	d2: x/ms 32-bits of exp fraction/scratch
14249
#	d3: x/ls 32-bits of exp fraction
14250
#	d4: LEN/Unchanged
14251
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14252
#	d6: ILOG
14253
#	d7: k-factor/Unchanged
14254
#	a0: ptr to result string/ptr to L_SCR1(a6)
14255
#	a1: ptr to PTENxx array/Unchanged
14256
#	a2: ptr to FP_SCR1(a6)/Unchanged
14257
#	fp0: abs(YINT) adjusted/float(ILOG)
14258
#	fp1: 10^ISCALE/Unchanged
14259
#	fp2: 10^LEN/Unchanged
14260
#	F_SCR1:Work area for final result/BCD result
14261
#	F_SCR2:Y with original exponent/ILOG/10^4
14262
#	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263
#	L_SCR2:first word of X packed/Unchanged
14264

14265
A15_st:
14266
	tst.b		BINDEC_FLG(%a6)	# check for denorm
14267
	beq.b		not_denorm
14268
	ftest.x		%fp0		# test for zero
14269
	fbeq.w		den_zero	# if zero, use k-factor or 4933
14270
	fmov.l		%d6,%fp0	# float ILOG
14271
	fabs.x		%fp0		# get abs of ILOG
14272
	bra.b		convrt
14273
den_zero:
14274
	tst.l		%d7		# check sign of the k-factor
14275
	blt.b		use_ilog	# if negative, use ILOG
14276
	fmov.s		F4933(%pc),%fp0	# force exponent to 4933
14277
	bra.b		convrt		# do it
14278
use_ilog:
14279
	fmov.l		%d6,%fp0	# float ILOG
14280
	fabs.x		%fp0		# get abs of ILOG
14281
	bra.b		convrt
14282
not_denorm:
14283
	ftest.x		%fp0		# test for zero
14284
	fbneq.w		not_zero	# if zero, force exponent
14285
	fmov.s		FONE(%pc),%fp0	# force exponent to 1
14286
	bra.b		convrt		# do it
14287
not_zero:
14288
	fmov.l		%d6,%fp0	# float ILOG
14289
	fabs.x		%fp0		# get abs of ILOG
14290
convrt:
14291
	fdiv.x		24(%a1),%fp0	# compute ILOG/10^4
14292
	fmov.x		%fp0,FP_SCR1(%a6)	# store fp0 in memory
14293
	mov.l		4(%a2),%d2	# move word 2 to d2
14294
	mov.l		8(%a2),%d3	# move word 3 to d3
14295
	mov.w		(%a2),%d0	# move exp to d0
14296
	beq.b		x_loop_fin	# if zero, skip the shift
14297
	sub.w		&0x3ffd,%d0	# subtract off bias
14298
	neg.w		%d0		# make exp positive
14299
x_loop:
14300
	lsr.l		&1,%d2		# shift d2:d3 right
14301
	roxr.l		&1,%d3		# the number of places
14302
	dbf.w		%d0,x_loop	# given in d0
14303
x_loop_fin:
14304
	clr.l		%d1		# put zero in d1 for addx
14305
	add.l		&0x00000080,%d3	# inc at bit 6
14306
	addx.l		%d1,%d2		# continue inc
14307
	and.l		&0xffffff80,%d3	# strip off lsb not used by 882
14308
	mov.l		&4,%d0		# put 4 in d0 for binstr call
14309
	lea.l		L_SCR1(%a6),%a0	# a0 is ptr to L_SCR1 for exp digits
14310
	bsr		binstr		# call binstr to convert exp
14311
	mov.l		L_SCR1(%a6),%d0	# load L_SCR1 lword to d0
14312
	mov.l		&12,%d1		# use d1 for shift count
14313
	lsr.l		%d1,%d0		# shift d0 right by 12
14314
	bfins		%d0,FP_SCR0(%a6){&4:&12}	# put e3:e2:e1 in FP_SCR0
14315
	lsr.l		%d1,%d0		# shift d0 right by 12
14316
	bfins		%d0,FP_SCR0(%a6){&16:&4}	# put e4 in FP_SCR0
14317
	tst.b		%d0		# check if e4 is zero
14318
	beq.b		A16_st		# if zero, skip rest
14319
	or.l		&opaop_mask,USER_FPSR(%a6)	# set OPERR & AIOP in USER_FPSR
14320

14321

14322
# A16. Write sign bits to final string.
14323
#	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324
#
14325
# Register usage:
14326
#	Input/Output
14327
#	d0: x/scratch - final is x
14328
#	d2: x/x
14329
#	d3: x/x
14330
#	d4: LEN/Unchanged
14331
#	d5: ICTR:LAMBDA/LAMBDA:ICTR
14332
#	d6: ILOG/ILOG adjusted
14333
#	d7: k-factor/Unchanged
14334
#	a0: ptr to L_SCR1(a6)/Unchanged
14335
#	a1: ptr to PTENxx array/Unchanged
14336
#	a2: ptr to FP_SCR1(a6)/Unchanged
14337
#	fp0: float(ILOG)/Unchanged
14338
#	fp1: 10^ISCALE/Unchanged
14339
#	fp2: 10^LEN/Unchanged
14340
#	F_SCR1:BCD result with correct signs
14341
#	F_SCR2:ILOG/10^4
14342
#	L_SCR1:Exponent digits on return from binstr
14343
#	L_SCR2:first word of X packed/Unchanged
14344

14345
A16_st:
14346
	clr.l		%d0		# clr d0 for collection of signs
14347
	and.b		&0x0f,FP_SCR0(%a6)	# clear first nibble of FP_SCR0
14348
	tst.l		L_SCR2(%a6)	# check sign of original mantissa
14349
	bge.b		mant_p		# if pos, don't set SM
14350
	mov.l		&2,%d0		# move 2 in to d0 for SM
14351
mant_p:
14352
	tst.l		%d6		# check sign of ILOG
14353
	bge.b		wr_sgn		# if pos, don't set SE
14354
	addq.l		&1,%d0		# set bit 0 in d0 for SE
14355
wr_sgn:
14356
	bfins		%d0,FP_SCR0(%a6){&0:&2}	# insert SM and SE into FP_SCR0
14357

14358
# Clean up and restore all registers used.
14359

14360
	fmov.l		&0,%fpsr	# clear possible inex2/ainex bits
14361
	fmovm.x		(%sp)+,&0xe0	#  {%fp0-%fp2}
14362
	movm.l		(%sp)+,&0x4fc	#  {%d2-%d7/%a2}
14363
	rts
14364

14365
	global		PTENRN
14366
PTENRN:
14367
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14368
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14369
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14370
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14371
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14372
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14373
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14374
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14375
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14376
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14377
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14378
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14379
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14380

14381
	global		PTENRP
14382
PTENRP:
14383
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14384
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14385
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14386
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14387
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14388
	long		0x40690000,0x9DC5ADA8,0x2B70B59E	# 10 ^ 32
14389
	long		0x40D30000,0xC2781F49,0xFFCFA6D6	# 10 ^ 64
14390
	long		0x41A80000,0x93BA47C9,0x80E98CE0	# 10 ^ 128
14391
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8E	# 10 ^ 256
14392
	long		0x46A30000,0xE319A0AE,0xA60E91C7	# 10 ^ 512
14393
	long		0x4D480000,0xC9767586,0x81750C18	# 10 ^ 1024
14394
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE5	# 10 ^ 2048
14395
	long		0x75250000,0xC4605202,0x8A20979B	# 10 ^ 4096
14396

14397
	global		PTENRM
14398
PTENRM:
14399
	long		0x40020000,0xA0000000,0x00000000	# 10 ^ 1
14400
	long		0x40050000,0xC8000000,0x00000000	# 10 ^ 2
14401
	long		0x400C0000,0x9C400000,0x00000000	# 10 ^ 4
14402
	long		0x40190000,0xBEBC2000,0x00000000	# 10 ^ 8
14403
	long		0x40340000,0x8E1BC9BF,0x04000000	# 10 ^ 16
14404
	long		0x40690000,0x9DC5ADA8,0x2B70B59D	# 10 ^ 32
14405
	long		0x40D30000,0xC2781F49,0xFFCFA6D5	# 10 ^ 64
14406
	long		0x41A80000,0x93BA47C9,0x80E98CDF	# 10 ^ 128
14407
	long		0x43510000,0xAA7EEBFB,0x9DF9DE8D	# 10 ^ 256
14408
	long		0x46A30000,0xE319A0AE,0xA60E91C6	# 10 ^ 512
14409
	long		0x4D480000,0xC9767586,0x81750C17	# 10 ^ 1024
14410
	long		0x5A920000,0x9E8B3B5D,0xC53D5DE4	# 10 ^ 2048
14411
	long		0x75250000,0xC4605202,0x8A20979A	# 10 ^ 4096
14412

14413
#########################################################################
14414
# binstr(): Converts a 64-bit binary integer to bcd.			#
14415
#									#
14416
# INPUT *************************************************************** #
14417
#	d2:d3 = 64-bit binary integer					#
14418
#	d0    = desired length (LEN)					#
14419
#	a0    = pointer to start in memory for bcd characters		#
14420
#		(This pointer must point to byte 4 of the first		#
14421
#		 lword of the packed decimal memory string.)		#
14422
#									#
14423
# OUTPUT ************************************************************** #
14424
#	a0 = pointer to LEN bcd digits representing the 64-bit integer.	#
14425
#									#
14426
# ALGORITHM ***********************************************************	#
14427
#	The 64-bit binary is assumed to have a decimal point before	#
14428
#	bit 63.  The fraction is multiplied by 10 using a mul by 2	#
14429
#	shift and a mul by 8 shift.  The bits shifted out of the	#
14430
#	msb form a decimal digit.  This process is iterated until	#
14431
#	LEN digits are formed.						#
14432
#									#
14433
# A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the	#
14434
#     digit formed will be assumed the least significant.  This is	#
14435
#     to force the first byte formed to have a 0 in the upper 4 bits.	#
14436
#									#
14437
# A2. Beginning of the loop:						#
14438
#     Copy the fraction in d2:d3 to d4:d5.				#
14439
#									#
14440
# A3. Multiply the fraction in d2:d3 by 8 using bit-field		#
14441
#     extracts and shifts.  The three msbs from d2 will go into d1.	#
14442
#									#
14443
# A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb	#
14444
#     will be collected by the carry.					#
14445
#									#
14446
# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5	#
14447
#     into d2:d3.  D1 will contain the bcd digit formed.		#
14448
#									#
14449
# A6. Test d7.  If zero, the digit formed is the ms digit.  If non-	#
14450
#     zero, it is the ls digit.  Put the digit in its place in the	#
14451
#     upper word of d0.  If it is the ls digit, write the word		#
14452
#     from d0 to memory.						#
14453
#									#
14454
# A7. Decrement d6 (LEN counter) and repeat the loop until zero.	#
14455
#									#
14456
#########################################################################
14457

14458
#	Implementation Notes:
14459
#
14460
#	The registers are used as follows:
14461
#
14462
#		d0: LEN counter
14463
#		d1: temp used to form the digit
14464
#		d2: upper 32-bits of fraction for mul by 8
14465
#		d3: lower 32-bits of fraction for mul by 8
14466
#		d4: upper 32-bits of fraction for mul by 2
14467
#		d5: lower 32-bits of fraction for mul by 2
14468
#		d6: temp for bit-field extracts
14469
#		d7: byte digit formation word;digit count {0,1}
14470
#		a0: pointer into memory for packed bcd string formation
14471
#
14472

14473
	global		binstr
14474
binstr:
14475
	movm.l		&0xff00,-(%sp)	#  {%d0-%d7}
14476

14477
#
14478
# A1: Init d7
14479
#
14480
	mov.l		&1,%d7		# init d7 for second digit
14481
	subq.l		&1,%d0		# for dbf d0 would have LEN+1 passes
14482
#
14483
# A2. Copy d2:d3 to d4:d5.  Start loop.
14484
#
14485
loop:
14486
	mov.l		%d2,%d4		# copy the fraction before muls
14487
	mov.l		%d3,%d5		# to d4:d5
14488
#
14489
# A3. Multiply d2:d3 by 8; extract msbs into d1.
14490
#
14491
	bfextu		%d2{&0:&3},%d1	# copy 3 msbs of d2 into d1
14492
	asl.l		&3,%d2		# shift d2 left by 3 places
14493
	bfextu		%d3{&0:&3},%d6	# copy 3 msbs of d3 into d6
14494
	asl.l		&3,%d3		# shift d3 left by 3 places
14495
	or.l		%d6,%d2		# or in msbs from d3 into d2
14496
#
14497
# A4. Multiply d4:d5 by 2; add carry out to d1.
14498
#
14499
	asl.l		&1,%d5		# mul d5 by 2
14500
	roxl.l		&1,%d4		# mul d4 by 2
14501
	swap		%d6		# put 0 in d6 lower word
14502
	addx.w		%d6,%d1		# add in extend from mul by 2
14503
#
14504
# A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505
#
14506
	add.l		%d5,%d3		# add lower 32 bits
14507
	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508
	addx.l		%d4,%d2		# add with extend upper 32 bits
14509
	nop				# ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510
	addx.w		%d6,%d1		# add in extend from add to d1
14511
	swap		%d6		# with d6 = 0; put 0 in upper word
14512
#
14513
# A6. Test d7 and branch.
14514
#
14515
	tst.w		%d7		# if zero, store digit & to loop
14516
	beq.b		first_d		# if non-zero, form byte & write
14517
sec_d:
14518
	swap		%d7		# bring first digit to word d7b
14519
	asl.w		&4,%d7		# first digit in upper 4 bits d7b
14520
	add.w		%d1,%d7		# add in ls digit to d7b
14521
	mov.b		%d7,(%a0)+	# store d7b byte in memory
14522
	swap		%d7		# put LEN counter in word d7a
14523
	clr.w		%d7		# set d7a to signal no digits done
14524
	dbf.w		%d0,loop	# do loop some more!
14525
	bra.b		end_bstr	# finished, so exit
14526
first_d:
14527
	swap		%d7		# put digit word in d7b
14528
	mov.w		%d1,%d7		# put new digit in d7b
14529
	swap		%d7		# put LEN counter in word d7a
14530
	addq.w		&1,%d7		# set d7a to signal first digit done
14531
	dbf.w		%d0,loop	# do loop some more!
14532
	swap		%d7		# put last digit in string
14533
	lsl.w		&4,%d7		# move it to upper 4 bits
14534
	mov.b		%d7,(%a0)+	# store it in memory string
14535
#
14536
# Clean up and return with result in fp0.
14537
#
14538
end_bstr:
14539
	movm.l		(%sp)+,&0xff	#  {%d0-%d7}
14540
	rts
14541

14542
#########################################################################
14543
# XDEF ****************************************************************	#
14544
#	facc_in_b(): dmem_read_byte failed				#
14545
#	facc_in_w(): dmem_read_word failed				#
14546
#	facc_in_l(): dmem_read_long failed				#
14547
#	facc_in_d(): dmem_read of dbl prec failed			#
14548
#	facc_in_x(): dmem_read of ext prec failed			#
14549
#									#
14550
#	facc_out_b(): dmem_write_byte failed				#
14551
#	facc_out_w(): dmem_write_word failed				#
14552
#	facc_out_l(): dmem_write_long failed				#
14553
#	facc_out_d(): dmem_write of dbl prec failed			#
14554
#	facc_out_x(): dmem_write of ext prec failed			#
14555
#									#
14556
# XREF ****************************************************************	#
14557
#	_real_access() - exit through access error handler		#
14558
#									#
14559
# INPUT ***************************************************************	#
14560
#	None								#
14561
#									#
14562
# OUTPUT **************************************************************	#
14563
#	None								#
14564
#									#
14565
# ALGORITHM ***********************************************************	#
14566
#	Flow jumps here when an FP data fetch call gets an error	#
14567
# result. This means the operating system wants an access error frame	#
14568
# made out of the current exception stack frame.			#
14569
#	So, we first call restore() which makes sure that any updated	#
14570
# -(an)+ register gets returned to its pre-exception value and then	#
14571
# we change the stack to an access error stack frame.			#
14572
#									#
14573
#########################################################################
14574

14575
facc_in_b:
14576
	movq.l		&0x1,%d0			# one byte
14577
	bsr.w		restore				# fix An
14578

14579
	mov.w		&0x0121,EXC_VOFF(%a6)		# set FSLW
14580
	bra.w		facc_finish
14581

14582
facc_in_w:
14583
	movq.l		&0x2,%d0			# two bytes
14584
	bsr.w		restore				# fix An
14585

14586
	mov.w		&0x0141,EXC_VOFF(%a6)		# set FSLW
14587
	bra.b		facc_finish
14588

14589
facc_in_l:
14590
	movq.l		&0x4,%d0			# four bytes
14591
	bsr.w		restore				# fix An
14592

14593
	mov.w		&0x0101,EXC_VOFF(%a6)		# set FSLW
14594
	bra.b		facc_finish
14595

14596
facc_in_d:
14597
	movq.l		&0x8,%d0			# eight bytes
14598
	bsr.w		restore				# fix An
14599

14600
	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14601
	bra.b		facc_finish
14602

14603
facc_in_x:
14604
	movq.l		&0xc,%d0			# twelve bytes
14605
	bsr.w		restore				# fix An
14606

14607
	mov.w		&0x0161,EXC_VOFF(%a6)		# set FSLW
14608
	bra.b		facc_finish
14609

14610
################################################################
14611

14612
facc_out_b:
14613
	movq.l		&0x1,%d0			# one byte
14614
	bsr.w		restore				# restore An
14615

14616
	mov.w		&0x00a1,EXC_VOFF(%a6)		# set FSLW
14617
	bra.b		facc_finish
14618

14619
facc_out_w:
14620
	movq.l		&0x2,%d0			# two bytes
14621
	bsr.w		restore				# restore An
14622

14623
	mov.w		&0x00c1,EXC_VOFF(%a6)		# set FSLW
14624
	bra.b		facc_finish
14625

14626
facc_out_l:
14627
	movq.l		&0x4,%d0			# four bytes
14628
	bsr.w		restore				# restore An
14629

14630
	mov.w		&0x0081,EXC_VOFF(%a6)		# set FSLW
14631
	bra.b		facc_finish
14632

14633
facc_out_d:
14634
	movq.l		&0x8,%d0			# eight bytes
14635
	bsr.w		restore				# restore An
14636

14637
	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14638
	bra.b		facc_finish
14639

14640
facc_out_x:
14641
	mov.l		&0xc,%d0			# twelve bytes
14642
	bsr.w		restore				# restore An
14643

14644
	mov.w		&0x00e1,EXC_VOFF(%a6)		# set FSLW
14645

14646
# here's where we actually create the access error frame from the
14647
# current exception stack frame.
14648
facc_finish:
14649
	mov.l		USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650

14651
	fmovm.x		EXC_FPREGS(%a6),&0xc0	# restore fp0-fp1
14652
	fmovm.l		USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653
	movm.l		EXC_DREGS(%a6),&0x0303	# restore d0-d1/a0-a1
14654

14655
	unlk		%a6
14656

14657
	mov.l		(%sp),-(%sp)		# store SR, hi(PC)
14658
	mov.l		0x8(%sp),0x4(%sp)	# store lo(PC)
14659
	mov.l		0xc(%sp),0x8(%sp)	# store EA
14660
	mov.l		&0x00000001,0xc(%sp)	# store FSLW
14661
	mov.w		0x6(%sp),0xc(%sp)	# fix FSLW (size)
14662
	mov.w		&0x4008,0x6(%sp)	# store voff
14663

14664
	btst		&0x5,(%sp)		# supervisor or user mode?
14665
	beq.b		facc_out2		# user
14666
	bset		&0x2,0xd(%sp)		# set supervisor TM bit
14667

14668
facc_out2:
14669
	bra.l		_real_access
14670

14671
##################################################################
14672

14673
# if the effective addressing mode was predecrement or postincrement,
14674
# the emulation has already changed its value to the correct post-
14675
# instruction value. but since we're exiting to the access error
14676
# handler, then AN must be returned to its pre-instruction value.
14677
# we do that here.
14678
restore:
14679
	mov.b		EXC_OPWORD+0x1(%a6),%d1
14680
	andi.b		&0x38,%d1		# extract opmode
14681
	cmpi.b		%d1,&0x18		# postinc?
14682
	beq.w		rest_inc
14683
	cmpi.b		%d1,&0x20		# predec?
14684
	beq.w		rest_dec
14685
	rts
14686

14687
rest_inc:
14688
	mov.b		EXC_OPWORD+0x1(%a6),%d1
14689
	andi.w		&0x0007,%d1		# fetch An
14690

14691
	mov.w		(tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692
	jmp		(tbl_rest_inc.b,%pc,%d1.w*1)
14693

14694
tbl_rest_inc:
14695
	short		ri_a0 - tbl_rest_inc
14696
	short		ri_a1 - tbl_rest_inc
14697
	short		ri_a2 - tbl_rest_inc
14698
	short		ri_a3 - tbl_rest_inc
14699
	short		ri_a4 - tbl_rest_inc
14700
	short		ri_a5 - tbl_rest_inc
14701
	short		ri_a6 - tbl_rest_inc
14702
	short		ri_a7 - tbl_rest_inc
14703

14704
ri_a0:
14705
	sub.l		%d0,EXC_DREGS+0x8(%a6)	# fix stacked a0
14706
	rts
14707
ri_a1:
14708
	sub.l		%d0,EXC_DREGS+0xc(%a6)	# fix stacked a1
14709
	rts
14710
ri_a2:
14711
	sub.l		%d0,%a2			# fix a2
14712
	rts
14713
ri_a3:
14714
	sub.l		%d0,%a3			# fix a3
14715
	rts
14716
ri_a4:
14717
	sub.l		%d0,%a4			# fix a4
14718
	rts
14719
ri_a5:
14720
	sub.l		%d0,%a5			# fix a5
14721
	rts
14722
ri_a6:
14723
	sub.l		%d0,(%a6)		# fix stacked a6
14724
	rts
14725
# if it's a fmove out instruction, we don't have to fix a7
14726
# because we hadn't changed it yet. if it's an opclass two
14727
# instruction (data moved in) and the exception was in supervisor
14728
# mode, then also also wasn't updated. if it was user mode, then
14729
# restore the correct a7 which is in the USP currently.
14730
ri_a7:
14731
	cmpi.b		EXC_VOFF(%a6),&0x30	# move in or out?
14732
	bne.b		ri_a7_done		# out
14733

14734
	btst		&0x5,EXC_SR(%a6)	# user or supervisor?
14735
	bne.b		ri_a7_done		# supervisor
14736
	movc		%usp,%a0		# restore USP
14737
	sub.l		%d0,%a0
14738
	movc		%a0,%usp
14739
ri_a7_done:
14740
	rts
14741

14742
# need to invert adjustment value if the <ea> was predec
14743
rest_dec:
14744
	neg.l		%d0
14745
	bra.b		rest_inc
14746

14747
Product

Resources

Company