CoCalc -- ilsp.S

GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m68k/ifpsp060/src/ilsp.S
¹⁰⁸²⁰ views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6

7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
8

9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15

16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22

23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
# litop.s:
30
#	This file is appended to the top of the 060FPLSP package
31
# and contains the entry points into the package. The user, in
32
# effect, branches to one of the branch table entries located here.
33
#
34

35
	bra.l	_060LSP__idivs64_
36
	short	0x0000
37
	bra.l	_060LSP__idivu64_
38
	short	0x0000
39

40
	bra.l	_060LSP__imuls64_
41
	short	0x0000
42
	bra.l	_060LSP__imulu64_
43
	short	0x0000
44

45
	bra.l	_060LSP__cmp2_Ab_
46
	short	0x0000
47
	bra.l	_060LSP__cmp2_Aw_
48
	short	0x0000
49
	bra.l	_060LSP__cmp2_Al_
50
	short	0x0000
51
	bra.l	_060LSP__cmp2_Db_
52
	short	0x0000
53
	bra.l	_060LSP__cmp2_Dw_
54
	short	0x0000
55
	bra.l	_060LSP__cmp2_Dl_
56
	short	0x0000
57

58
# leave room for future possible aditions.
59
	align	0x200
60

61
#########################################################################
62
# XDEF ****************************************************************	#
63
#	_060LSP__idivu64_(): Emulate 64-bit unsigned div instruction.	#
64
#	_060LSP__idivs64_(): Emulate 64-bit signed div instruction.	#
65
#									#
66
#	This is the library version which is accessed as a subroutine	#
67
#	and therefore does not work exactly like the 680X0 div{s,u}.l	#
68
#	64-bit divide instruction.					#
69
#									#
70
# XREF ****************************************************************	#
71
#	None.								#
72
#									#
73
# INPUT ***************************************************************	#
74
#	0x4(sp)  = divisor						#
75
#	0x8(sp)  = hi(dividend)						#
76
#	0xc(sp)  = lo(dividend)						#
77
#	0x10(sp) = pointer to location to place quotient/remainder	#
78
#									#
79
# OUTPUT **************************************************************	#
80
#	0x10(sp) = points to location of remainder/quotient.		#
81
#		   remainder is in first longword, quotient is in 2nd.	#
82
#									#
83
# ALGORITHM ***********************************************************	#
84
#	If the operands are signed, make them unsigned and save the	#
85
# sign info for later. Separate out special cases like divide-by-zero	#
86
# or 32-bit divides if possible. Else, use a special math algorithm	#
87
# to calculate the result.						#
88
#	Restore sign info if signed instruction. Set the condition	#
89
# codes before performing the final "rts". If the divisor was equal to	#
90
# zero, then perform a divide-by-zero using a 16-bit implemented	#
91
# divide instruction. This way, the operating system can record that	#
92
# the event occurred even though it may not point to the correct place.	#
93
#									#
94
#########################################################################
95

96
set	POSNEG,		-1
97
set	NDIVISOR,	-2
98
set	NDIVIDEND,	-3
99
set	DDSECOND,	-4
100
set	DDNORMAL,	-8
101
set	DDQUOTIENT,	-12
102
set	DIV64_CC,	-16
103

104
##########
105
# divs.l #
106
##########
107
	global		_060LSP__idivs64_
108
_060LSP__idivs64_:
109
# PROLOGUE BEGIN ########################################################
110
	link.w		%a6,&-16
111
	movm.l		&0x3f00,-(%sp)		# save d2-d7
112
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
113
# PROLOGUE END ##########################################################
114

115
	mov.w		%cc,DIV64_CC(%a6)
116
	st		POSNEG(%a6)		# signed operation
117
	bra.b		ldiv64_cont
118

119
##########
120
# divu.l #
121
##########
122
	global		_060LSP__idivu64_
123
_060LSP__idivu64_:
124
# PROLOGUE BEGIN ########################################################
125
	link.w		%a6,&-16
126
	movm.l		&0x3f00,-(%sp)		# save d2-d7
127
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
128
# PROLOGUE END ##########################################################
129

130
	mov.w		%cc,DIV64_CC(%a6)
131
	sf		POSNEG(%a6)		# unsigned operation
132

133
ldiv64_cont:
134
	mov.l		0x8(%a6),%d7		# fetch divisor
135

136
	beq.w		ldiv64eq0		# divisor is = 0!!!
137

138
	mov.l		0xc(%a6), %d5		# get dividend hi
139
	mov.l		0x10(%a6), %d6		# get dividend lo
140

141
# separate signed and unsigned divide
142
	tst.b		POSNEG(%a6)		# signed or unsigned?
143
	beq.b		ldspecialcases		# use positive divide
144

145
# save the sign of the divisor
146
# make divisor unsigned if it's negative
147
	tst.l		%d7			# chk sign of divisor
148
	slt		NDIVISOR(%a6)		# save sign of divisor
149
	bpl.b		ldsgndividend
150
	neg.l		%d7			# complement negative divisor
151

152
# save the sign of the dividend
153
# make dividend unsigned if it's negative
154
ldsgndividend:
155
	tst.l		%d5			# chk sign of hi(dividend)
156
	slt		NDIVIDEND(%a6)		# save sign of dividend
157
	bpl.b		ldspecialcases
158

159
	mov.w		&0x0, %cc		# clear 'X' cc bit
160
	negx.l		%d6			# complement signed dividend
161
	negx.l		%d5
162

163
# extract some special cases:
164
#	- is (dividend == 0) ?
165
#	- is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
166
ldspecialcases:
167
	tst.l		%d5			# is (hi(dividend) == 0)
168
	bne.b		ldnormaldivide		# no, so try it the long way
169

170
	tst.l		%d6			# is (lo(dividend) == 0), too
171
	beq.w		lddone			# yes, so (dividend == 0)
172

173
	cmp.l		%d7,%d6			# is (divisor <= lo(dividend))
174
	bls.b		ld32bitdivide		# yes, so use 32 bit divide
175

176
	exg		%d5,%d6			# q = 0, r = dividend
177
	bra.w		ldivfinish		# can't divide, we're done.
178

179
ld32bitdivide:
180
	tdivu.l		%d7, %d5:%d6		# it's only a 32/32 bit div!
181

182
	bra.b		ldivfinish
183

184
ldnormaldivide:
185
# last special case:
186
#	- is hi(dividend) >= divisor ? if yes, then overflow
187
	cmp.l		%d7,%d5
188
	bls.b		lddovf			# answer won't fit in 32 bits
189

190
# perform the divide algorithm:
191
	bsr.l		ldclassical		# do int divide
192

193
# separate into signed and unsigned finishes.
194
ldivfinish:
195
	tst.b		POSNEG(%a6)		# do divs, divu separately
196
	beq.b		lddone			# divu has no processing!!!
197

198
# it was a divs.l, so ccode setting is a little more complicated...
199
	tst.b		NDIVIDEND(%a6)		# remainder has same sign
200
	beq.b		ldcc			# as dividend.
201
	neg.l		%d5			# sgn(rem) = sgn(dividend)
202
ldcc:
203
	mov.b		NDIVISOR(%a6), %d0
204
	eor.b		%d0, NDIVIDEND(%a6)	# chk if quotient is negative
205
	beq.b		ldqpos			# branch to quot positive
206

207
# 0x80000000 is the largest number representable as a 32-bit negative
208
# number. the negative of 0x80000000 is 0x80000000.
209
	cmpi.l		%d6, &0x80000000	# will (-quot) fit in 32 bits?
210
	bhi.b		lddovf
211

212
	neg.l		%d6			# make (-quot) 2's comp
213

214
	bra.b		lddone
215

216
ldqpos:
217
	btst		&0x1f, %d6		# will (+quot) fit in 32 bits?
218
	bne.b		lddovf
219

220
lddone:
221
# if the register numbers are the same, only the quotient gets saved.
222
# so, if we always save the quotient second, we save ourselves a cmp&beq
223
	andi.w		&0x10,DIV64_CC(%a6)
224
	mov.w		DIV64_CC(%a6),%cc
225
	tst.l		%d6			# may set 'N' ccode bit
226

227
# here, the result is in d1 and d0. the current strategy is to save
228
# the values at the location pointed to by a0.
229
# use movm here to not disturb the condition codes.
230
ldexit:
231
	movm.l		&0x0060,([0x14,%a6])	# save result
232

233
# EPILOGUE BEGIN ########################################################
234
#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
235
	movm.l		(%sp)+,&0x00fc		# restore d2-d7
236
	unlk		%a6
237
# EPILOGUE END ##########################################################
238

239
	rts
240

241
# the result should be the unchanged dividend
242
lddovf:
243
	mov.l		0xc(%a6), %d5		# get dividend hi
244
	mov.l		0x10(%a6), %d6		# get dividend lo
245

246
	andi.w		&0x1c,DIV64_CC(%a6)
247
	ori.w		&0x02,DIV64_CC(%a6)	# set 'V' ccode bit
248
	mov.w		DIV64_CC(%a6),%cc
249

250
	bra.b		ldexit
251

252
ldiv64eq0:
253
	mov.l		0xc(%a6),([0x14,%a6])
254
	mov.l		0x10(%a6),([0x14,%a6],0x4)
255

256
	mov.w		DIV64_CC(%a6),%cc
257

258
# EPILOGUE BEGIN ########################################################
259
#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
260
	movm.l		(%sp)+,&0x00fc		# restore d2-d7
261
	unlk		%a6
262
# EPILOGUE END ##########################################################
263

264
	divu.w		&0x0,%d0		# force a divbyzero exception
265
	rts
266

267
###########################################################################
268
#########################################################################
269
# This routine uses the 'classical' Algorithm D from Donald Knuth's	#
270
# Art of Computer Programming, vol II, Seminumerical Algorithms.	#
271
# For this implementation b=2**16, and the target is U1U2U3U4/V1V2,	#
272
# where U,V are words of the quadword dividend and longword divisor,	#
273
# and U1, V1 are the most significant words.				#
274
#									#
275
# The most sig. longword of the 64 bit dividend must be in %d5, least	#
276
# in %d6. The divisor must be in the variable ddivisor, and the		#
277
# signed/unsigned flag ddusign must be set (0=unsigned,1=signed).	#
278
# The quotient is returned in %d6, remainder in %d5, unless the		#
279
# v (overflow) bit is set in the saved %ccr. If overflow, the dividend	#
280
# is unchanged.								#
281
#########################################################################
282
ldclassical:
283
# if the divisor msw is 0, use simpler algorithm then the full blown
284
# one at ddknuth:
285

286
	cmpi.l		%d7, &0xffff
287
	bhi.b		lddknuth		# go use D. Knuth algorithm
288

289
# Since the divisor is only a word (and larger than the mslw of the dividend),
290
# a simpler algorithm may be used :
291
# In the general case, four quotient words would be created by
292
# dividing the divisor word into each dividend word. In this case,
293
# the first two quotient words must be zero, or overflow would occur.
294
# Since we already checked this case above, we can treat the most significant
295
# longword of the dividend as (0) remainder (see Knuth) and merely complete
296
# the last two divisions to get a quotient longword and word remainder:
297

298
	clr.l		%d1
299
	swap		%d5			# same as r*b if previous step rqd
300
	swap		%d6			# get u3 to lsw position
301
	mov.w		%d6, %d5		# rb + u3
302

303
	divu.w		%d7, %d5
304

305
	mov.w		%d5, %d1		# first quotient word
306
	swap		%d6			# get u4
307
	mov.w		%d6, %d5		# rb + u4
308

309
	divu.w		%d7, %d5
310

311
	swap		%d1
312
	mov.w		%d5, %d1		# 2nd quotient 'digit'
313
	clr.w		%d5
314
	swap		%d5			# now remainder
315
	mov.l		%d1, %d6		# and quotient
316

317
	rts
318

319
lddknuth:
320
# In this algorithm, the divisor is treated as a 2 digit (word) number
321
# which is divided into a 3 digit (word) dividend to get one quotient
322
# digit (word). After subtraction, the dividend is shifted and the
323
# process repeated. Before beginning, the divisor and quotient are
324
# 'normalized' so that the process of estimating the quotient digit
325
# will yield verifiably correct results..
326

327
	clr.l		DDNORMAL(%a6)		# count of shifts for normalization
328
	clr.b		DDSECOND(%a6)		# clear flag for quotient digits
329
	clr.l		%d1			# %d1 will hold trial quotient
330
lddnchk:
331
	btst		&31, %d7		# must we normalize? first word of
332
	bne.b		lddnormalized		# divisor (V1) must be >= 65536/2
333
	addq.l		&0x1, DDNORMAL(%a6)	# count normalization shifts
334
	lsl.l		&0x1, %d7		# shift the divisor
335
	lsl.l		&0x1, %d6		# shift u4,u3 with overflow to u2
336
	roxl.l		&0x1, %d5		# shift u1,u2
337
	bra.w		lddnchk
338
lddnormalized:
339

340
# Now calculate an estimate of the quotient words (msw first, then lsw).
341
# The comments use subscripts for the first quotient digit determination.
342
	mov.l		%d7, %d3		# divisor
343
	mov.l		%d5, %d2		# dividend mslw
344
	swap		%d2
345
	swap		%d3
346
	cmp.w		%d2, %d3		# V1 = U1 ?
347
	bne.b		lddqcalc1
348
	mov.w		&0xffff, %d1		# use max trial quotient word
349
	bra.b		lddadj0
350
lddqcalc1:
351
	mov.l		%d5, %d1
352

353
	divu.w		%d3, %d1		# use quotient of mslw/msw
354

355
	andi.l		&0x0000ffff, %d1	# zero any remainder
356
lddadj0:
357

358
# now test the trial quotient and adjust. This step plus the
359
# normalization assures (according to Knuth) that the trial
360
# quotient will be at worst 1 too large.
361
	mov.l		%d6, -(%sp)
362
	clr.w		%d6			# word u3 left
363
	swap		%d6			# in lsw position
364
lddadj1: mov.l		%d7, %d3
365
	mov.l		%d1, %d2
366
	mulu.w		%d7, %d2		# V2q
367
	swap		%d3
368
	mulu.w		%d1, %d3		# V1q
369
	mov.l		%d5, %d4		# U1U2
370
	sub.l		%d3, %d4		# U1U2 - V1q
371

372
	swap		%d4
373

374
	mov.w		%d4,%d0
375
	mov.w		%d6,%d4			# insert lower word (U3)
376

377
	tst.w		%d0			# is upper word set?
378
	bne.w		lddadjd1
379

380
#	add.l		%d6, %d4		# (U1U2 - V1q) + U3
381

382
	cmp.l		%d2, %d4
383
	bls.b		lddadjd1		# is V2q > (U1U2-V1q) + U3 ?
384
	subq.l		&0x1, %d1		# yes, decrement and recheck
385
	bra.b		lddadj1
386
lddadjd1:
387
# now test the word by multiplying it by the divisor (V1V2) and comparing
388
# the 3 digit (word) result with the current dividend words
389
	mov.l		%d5, -(%sp)		# save %d5 (%d6 already saved)
390
	mov.l		%d1, %d6
391
	swap		%d6			# shift answer to ms 3 words
392
	mov.l		%d7, %d5
393
	bsr.l		ldmm2
394
	mov.l		%d5, %d2		# now %d2,%d3 are trial*divisor
395
	mov.l		%d6, %d3
396
	mov.l		(%sp)+, %d5		# restore dividend
397
	mov.l		(%sp)+, %d6
398
	sub.l		%d3, %d6
399
	subx.l		%d2, %d5		# subtract double precision
400
	bcc		ldd2nd			# no carry, do next quotient digit
401
	subq.l		&0x1, %d1		# q is one too large
402
# need to add back divisor longword to current ms 3 digits of dividend
403
# - according to Knuth, this is done only 2 out of 65536 times for random
404
# divisor, dividend selection.
405
	clr.l		%d2
406
	mov.l		%d7, %d3
407
	swap		%d3
408
	clr.w		%d3			# %d3 now ls word of divisor
409
	add.l		%d3, %d6		# aligned with 3rd word of dividend
410
	addx.l		%d2, %d5
411
	mov.l		%d7, %d3
412
	clr.w		%d3			# %d3 now ms word of divisor
413
	swap		%d3			# aligned with 2nd word of dividend
414
	add.l		%d3, %d5
415
ldd2nd:
416
	tst.b		DDSECOND(%a6)	# both q words done?
417
	bne.b		lddremain
418
# first quotient digit now correct. store digit and shift the
419
# (subtracted) dividend
420
	mov.w		%d1, DDQUOTIENT(%a6)
421
	clr.l		%d1
422
	swap		%d5
423
	swap		%d6
424
	mov.w		%d6, %d5
425
	clr.w		%d6
426
	st		DDSECOND(%a6)		# second digit
427
	bra.w		lddnormalized
428
lddremain:
429
# add 2nd word to quotient, get the remainder.
430
	mov.w		%d1, DDQUOTIENT+2(%a6)
431
# shift down one word/digit to renormalize remainder.
432
	mov.w		%d5, %d6
433
	swap		%d6
434
	swap		%d5
435
	mov.l		DDNORMAL(%a6), %d7	# get norm shift count
436
	beq.b		lddrn
437
	subq.l		&0x1, %d7		# set for loop count
438
lddnlp:
439
	lsr.l		&0x1, %d5		# shift into %d6
440
	roxr.l		&0x1, %d6
441
	dbf		%d7, lddnlp
442
lddrn:
443
	mov.l		%d6, %d5		# remainder
444
	mov.l		DDQUOTIENT(%a6), %d6	# quotient
445

446
	rts
447
ldmm2:
448
# factors for the 32X32->64 multiplication are in %d5 and %d6.
449
# returns 64 bit result in %d5 (hi) %d6(lo).
450
# destroys %d2,%d3,%d4.
451

452
# multiply hi,lo words of each factor to get 4 intermediate products
453
	mov.l		%d6, %d2
454
	mov.l		%d6, %d3
455
	mov.l		%d5, %d4
456
	swap		%d3
457
	swap		%d4
458
	mulu.w		%d5, %d6		# %d6 <- lsw*lsw
459
	mulu.w		%d3, %d5		# %d5 <- msw-dest*lsw-source
460
	mulu.w		%d4, %d2		# %d2 <- msw-source*lsw-dest
461
	mulu.w		%d4, %d3		# %d3 <- msw*msw
462
# now use swap and addx to consolidate to two longwords
463
	clr.l		%d4
464
	swap		%d6
465
	add.w		%d5, %d6		# add msw of l*l to lsw of m*l product
466
	addx.w		%d4, %d3		# add any carry to m*m product
467
	add.w		%d2, %d6		# add in lsw of other m*l product
468
	addx.w		%d4, %d3		# add any carry to m*m product
469
	swap		%d6			# %d6 is low 32 bits of final product
470
	clr.w		%d5
471
	clr.w		%d2			# lsw of two mixed products used,
472
	swap		%d5			# now use msws of longwords
473
	swap		%d2
474
	add.l		%d2, %d5
475
	add.l		%d3, %d5	# %d5 now ms 32 bits of final product
476
	rts
477

478
#########################################################################
479
# XDEF ****************************************************************	#
480
#	_060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction	#
481
#	_060LSP__imuls64_(): Emulate 64-bit signed mul instruction.	#
482
#									#
483
#	This is the library version which is accessed as a subroutine	#
484
#	and therefore does not work exactly like the 680X0 mul{s,u}.l	#
485
#	64-bit multiply instruction.					#
486
#									#
487
# XREF ****************************************************************	#
488
#	None								#
489
#									#
490
# INPUT ***************************************************************	#
491
#	0x4(sp) = multiplier						#
492
#	0x8(sp) = multiplicand						#
493
#	0xc(sp) = pointer to location to place 64-bit result		#
494
#									#
495
# OUTPUT **************************************************************	#
496
#	0xc(sp) = points to location of 64-bit result			#
497
#									#
498
# ALGORITHM ***********************************************************	#
499
#	Perform the multiply in pieces using 16x16->32 unsigned		#
500
# multiplies and "add" instructions.					#
501
#	Set the condition codes as appropriate before performing an	#
502
# "rts".								#
503
#									#
504
#########################################################################
505

506
set MUL64_CC, -4
507

508
	global		_060LSP__imulu64_
509
_060LSP__imulu64_:
510

511
# PROLOGUE BEGIN ########################################################
512
	link.w		%a6,&-4
513
	movm.l		&0x3800,-(%sp)		# save d2-d4
514
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
515
# PROLOGUE END ##########################################################
516

517
	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
518

519
	mov.l		0x8(%a6),%d0		# store multiplier in d0
520
	beq.w		mulu64_zero		# handle zero separately
521

522
	mov.l		0xc(%a6),%d1		# get multiplicand in d1
523
	beq.w		mulu64_zero		# handle zero separately
524

525
#########################################################################
526
#	63			   32				0	#
527
#	----------------------------					#
528
#	| hi(mplier) * hi(mplicand)|					#
529
#	----------------------------					#
530
#		     -----------------------------			#
531
#		     | hi(mplier) * lo(mplicand) |			#
532
#		     -----------------------------			#
533
#		     -----------------------------			#
534
#		     | lo(mplier) * hi(mplicand) |			#
535
#		     -----------------------------			#
536
#	  |			   -----------------------------	#
537
#	--|--			   | lo(mplier) * lo(mplicand) |	#
538
#	  |			   -----------------------------	#
539
#	========================================================	#
540
#	--------------------------------------------------------	#
541
#	|	hi(result)	   |	    lo(result)         |	#
542
#	--------------------------------------------------------	#
543
#########################################################################
544
mulu64_alg:
545
# load temp registers with operands
546
	mov.l		%d0,%d2			# mr in d2
547
	mov.l		%d0,%d3			# mr in d3
548
	mov.l		%d1,%d4			# md in d4
549
	swap		%d3			# hi(mr) in lo d3
550
	swap		%d4			# hi(md) in lo d4
551

552
# complete necessary multiplies:
553
	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
554
	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
555
	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
556
	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
557

558
# add lo portions of [2],[3] to hi portion of [1].
559
# add carries produced from these adds to [4].
560
# lo([1]) is the final lo 16 bits of the result.
561
	clr.l		%d4			# load d4 w/ zero value
562
	swap		%d0			# hi([1]) <==> lo([1])
563
	add.w		%d1,%d0			# hi([1]) + lo([2])
564
	addx.l		%d4,%d3			#    [4]  + carry
565
	add.w		%d2,%d0			# hi([1]) + lo([3])
566
	addx.l		%d4,%d3			#    [4]  + carry
567
	swap		%d0			# lo([1]) <==> hi([1])
568

569
# lo portions of [2],[3] have been added in to final result.
570
# now, clear lo, put hi in lo reg, and add to [4]
571
	clr.w		%d1			# clear lo([2])
572
	clr.w		%d2			# clear hi([3])
573
	swap		%d1			# hi([2]) in lo d1
574
	swap		%d2			# hi([3]) in lo d2
575
	add.l		%d2,%d1			#    [4]  + hi([2])
576
	add.l		%d3,%d1			#    [4]  + hi([3])
577

578
# now, grab the condition codes. only one that can be set is 'N'.
579
# 'N' CAN be set if the operation is unsigned if bit 63 is set.
580
	mov.w		MUL64_CC(%a6),%d4
581
	andi.b		&0x10,%d4		# keep old 'X' bit
582
	tst.l		%d1			# may set 'N' bit
583
	bpl.b		mulu64_ddone
584
	ori.b		&0x8,%d4		# set 'N' bit
585
mulu64_ddone:
586
	mov.w		%d4,%cc
587

588
# here, the result is in d1 and d0. the current strategy is to save
589
# the values at the location pointed to by a0.
590
# use movm here to not disturb the condition codes.
591
mulu64_end:
592
	exg		%d1,%d0
593
	movm.l		&0x0003,([0x10,%a6])		# save result
594

595
# EPILOGUE BEGIN ########################################################
596
#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
597
	movm.l		(%sp)+,&0x001c		# restore d2-d4
598
	unlk		%a6
599
# EPILOGUE END ##########################################################
600

601
	rts
602

603
# one or both of the operands is zero so the result is also zero.
604
# save the zero result to the register file and set the 'Z' ccode bit.
605
mulu64_zero:
606
	clr.l		%d0
607
	clr.l		%d1
608

609
	mov.w		MUL64_CC(%a6),%d4
610
	andi.b		&0x10,%d4
611
	ori.b		&0x4,%d4
612
	mov.w		%d4,%cc			# set 'Z' ccode bit
613

614
	bra.b		mulu64_end
615

616
##########
617
# muls.l #
618
##########
619
	global		_060LSP__imuls64_
620
_060LSP__imuls64_:
621

622
# PROLOGUE BEGIN ########################################################
623
	link.w		%a6,&-4
624
	movm.l		&0x3c00,-(%sp)		# save d2-d5
625
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
626
# PROLOGUE END ##########################################################
627

628
	mov.w		%cc,MUL64_CC(%a6)	# save incoming ccodes
629

630
	mov.l		0x8(%a6),%d0		# store multiplier in d0
631
	beq.b		mulu64_zero		# handle zero separately
632

633
	mov.l		0xc(%a6),%d1		# get multiplicand in d1
634
	beq.b		mulu64_zero		# handle zero separately
635

636
	clr.b		%d5			# clear sign tag
637
	tst.l		%d0			# is multiplier negative?
638
	bge.b		muls64_chk_md_sgn	# no
639
	neg.l		%d0			# make multiplier positive
640

641
	ori.b		&0x1,%d5		# save multiplier sgn
642

643
# the result sign is the exclusive or of the operand sign bits.
644
muls64_chk_md_sgn:
645
	tst.l		%d1			# is multiplicand negative?
646
	bge.b		muls64_alg		# no
647
	neg.l		%d1			# make multiplicand positive
648

649
	eori.b		&0x1,%d5		# calculate correct sign
650

651
#########################################################################
652
#	63			   32				0	#
653
#	----------------------------					#
654
#	| hi(mplier) * hi(mplicand)|					#
655
#	----------------------------					#
656
#		     -----------------------------			#
657
#		     | hi(mplier) * lo(mplicand) |			#
658
#		     -----------------------------			#
659
#		     -----------------------------			#
660
#		     | lo(mplier) * hi(mplicand) |			#
661
#		     -----------------------------			#
662
#	  |			   -----------------------------	#
663
#	--|--			   | lo(mplier) * lo(mplicand) |	#
664
#	  |			   -----------------------------	#
665
#	========================================================	#
666
#	--------------------------------------------------------	#
667
#	|	hi(result)	   |	    lo(result)         |	#
668
#	--------------------------------------------------------	#
669
#########################################################################
670
muls64_alg:
671
# load temp registers with operands
672
	mov.l		%d0,%d2			# mr in d2
673
	mov.l		%d0,%d3			# mr in d3
674
	mov.l		%d1,%d4			# md in d4
675
	swap		%d3			# hi(mr) in lo d3
676
	swap		%d4			# hi(md) in lo d4
677

678
# complete necessary multiplies:
679
	mulu.w		%d1,%d0			# [1] lo(mr) * lo(md)
680
	mulu.w		%d3,%d1			# [2] hi(mr) * lo(md)
681
	mulu.w		%d4,%d2			# [3] lo(mr) * hi(md)
682
	mulu.w		%d4,%d3			# [4] hi(mr) * hi(md)
683

684
# add lo portions of [2],[3] to hi portion of [1].
685
# add carries produced from these adds to [4].
686
# lo([1]) is the final lo 16 bits of the result.
687
	clr.l		%d4			# load d4 w/ zero value
688
	swap		%d0			# hi([1]) <==> lo([1])
689
	add.w		%d1,%d0			# hi([1]) + lo([2])
690
	addx.l		%d4,%d3			#    [4]  + carry
691
	add.w		%d2,%d0			# hi([1]) + lo([3])
692
	addx.l		%d4,%d3			#    [4]  + carry
693
	swap		%d0			# lo([1]) <==> hi([1])
694

695
# lo portions of [2],[3] have been added in to final result.
696
# now, clear lo, put hi in lo reg, and add to [4]
697
	clr.w		%d1			# clear lo([2])
698
	clr.w		%d2			# clear hi([3])
699
	swap		%d1			# hi([2]) in lo d1
700
	swap		%d2			# hi([3]) in lo d2
701
	add.l		%d2,%d1			#    [4]  + hi([2])
702
	add.l		%d3,%d1			#    [4]  + hi([3])
703

704
	tst.b		%d5			# should result be signed?
705
	beq.b		muls64_done		# no
706

707
# result should be a signed negative number.
708
# compute 2's complement of the unsigned number:
709
#   -negate all bits and add 1
710
muls64_neg:
711
	not.l		%d0			# negate lo(result) bits
712
	not.l		%d1			# negate hi(result) bits
713
	addq.l		&1,%d0			# add 1 to lo(result)
714
	addx.l		%d4,%d1			# add carry to hi(result)
715

716
muls64_done:
717
	mov.w		MUL64_CC(%a6),%d4
718
	andi.b		&0x10,%d4		# keep old 'X' bit
719
	tst.l		%d1			# may set 'N' bit
720
	bpl.b		muls64_ddone
721
	ori.b		&0x8,%d4		# set 'N' bit
722
muls64_ddone:
723
	mov.w		%d4,%cc
724

725
# here, the result is in d1 and d0. the current strategy is to save
726
# the values at the location pointed to by a0.
727
# use movm here to not disturb the condition codes.
728
muls64_end:
729
	exg		%d1,%d0
730
	movm.l		&0x0003,([0x10,%a6])	# save result at (a0)
731

732
# EPILOGUE BEGIN ########################################################
733
#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
734
	movm.l		(%sp)+,&0x003c		# restore d2-d5
735
	unlk		%a6
736
# EPILOGUE END ##########################################################
737

738
	rts
739

740
# one or both of the operands is zero so the result is also zero.
741
# save the zero result to the register file and set the 'Z' ccode bit.
742
muls64_zero:
743
	clr.l		%d0
744
	clr.l		%d1
745

746
	mov.w		MUL64_CC(%a6),%d4
747
	andi.b		&0x10,%d4
748
	ori.b		&0x4,%d4
749
	mov.w		%d4,%cc			# set 'Z' ccode bit
750

751
	bra.b		muls64_end
752

753
#########################################################################
754
# XDEF ****************************************************************	#
755
#	_060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>".			#
756
#	_060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>".			#
757
#	_060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>".			#
758
#	_060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>".			#
759
#	_060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>".			#
760
#	_060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>".			#
761
#									#
762
#	This is the library version which is accessed as a subroutine	#
763
#	and therefore does not work exactly like the 680X0 "cmp2"	#
764
#	instruction.							#
765
#									#
766
# XREF ****************************************************************	#
767
#	None								#
768
#									#
769
# INPUT ***************************************************************	#
770
#	0x4(sp) = Rn							#
771
#	0x8(sp) = pointer to boundary pair				#
772
#									#
773
# OUTPUT **************************************************************	#
774
#	cc = condition codes are set correctly				#
775
#									#
776
# ALGORITHM ***********************************************************	#
777
#	In the interest of simplicity, all operands are converted to	#
778
# longword size whether the operation is byte, word, or long. The	#
779
# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
780
# also sign extended. If Rn is an address register, it need not be sign #
781
# extended since the full register is always used.			#
782
#	The condition codes are set correctly before the final "rts".	#
783
#									#
784
#########################################################################
785

786
set	CMP2_CC,	-4
787

788
	global		_060LSP__cmp2_Ab_
789
_060LSP__cmp2_Ab_:
790

791
# PROLOGUE BEGIN ########################################################
792
	link.w		%a6,&-4
793
	movm.l		&0x3800,-(%sp)		# save d2-d4
794
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
795
# PROLOGUE END ##########################################################
796

797
	mov.w		%cc,CMP2_CC(%a6)
798
	mov.l		0x8(%a6), %d2		# get regval
799

800
	mov.b		([0xc,%a6],0x0),%d0
801
	mov.b		([0xc,%a6],0x1),%d1
802

803
	extb.l		%d0			# sign extend lo bnd
804
	extb.l		%d1			# sign extend hi bnd
805
	bra.w		l_cmp2_cmp		# go do the compare emulation
806

807
	global		_060LSP__cmp2_Aw_
808
_060LSP__cmp2_Aw_:
809

810
# PROLOGUE BEGIN ########################################################
811
	link.w		%a6,&-4
812
	movm.l		&0x3800,-(%sp)		# save d2-d4
813
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
814
# PROLOGUE END ##########################################################
815

816
	mov.w		%cc,CMP2_CC(%a6)
817
	mov.l		0x8(%a6), %d2		# get regval
818

819
	mov.w		([0xc,%a6],0x0),%d0
820
	mov.w		([0xc,%a6],0x2),%d1
821

822
	ext.l		%d0			# sign extend lo bnd
823
	ext.l		%d1			# sign extend hi bnd
824
	bra.w		l_cmp2_cmp		# go do the compare emulation
825

826
	global		_060LSP__cmp2_Al_
827
_060LSP__cmp2_Al_:
828

829
# PROLOGUE BEGIN ########################################################
830
	link.w		%a6,&-4
831
	movm.l		&0x3800,-(%sp)		# save d2-d4
832
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
833
# PROLOGUE END ##########################################################
834

835
	mov.w		%cc,CMP2_CC(%a6)
836
	mov.l		0x8(%a6), %d2		# get regval
837

838
	mov.l		([0xc,%a6],0x0),%d0
839
	mov.l		([0xc,%a6],0x4),%d1
840
	bra.w		l_cmp2_cmp		# go do the compare emulation
841

842
	global		_060LSP__cmp2_Db_
843
_060LSP__cmp2_Db_:
844

845
# PROLOGUE BEGIN ########################################################
846
	link.w		%a6,&-4
847
	movm.l		&0x3800,-(%sp)		# save d2-d4
848
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
849
# PROLOGUE END ##########################################################
850

851
	mov.w		%cc,CMP2_CC(%a6)
852
	mov.l		0x8(%a6), %d2		# get regval
853

854
	mov.b		([0xc,%a6],0x0),%d0
855
	mov.b		([0xc,%a6],0x1),%d1
856

857
	extb.l		%d0			# sign extend lo bnd
858
	extb.l		%d1			# sign extend hi bnd
859

860
# operation is a data register compare.
861
# sign extend byte to long so we can do simple longword compares.
862
	extb.l		%d2			# sign extend data byte
863
	bra.w		l_cmp2_cmp		# go do the compare emulation
864

865
	global		_060LSP__cmp2_Dw_
866
_060LSP__cmp2_Dw_:
867

868
# PROLOGUE BEGIN ########################################################
869
	link.w		%a6,&-4
870
	movm.l		&0x3800,-(%sp)		# save d2-d4
871
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
872
# PROLOGUE END ##########################################################
873

874
	mov.w		%cc,CMP2_CC(%a6)
875
	mov.l		0x8(%a6), %d2		# get regval
876

877
	mov.w		([0xc,%a6],0x0),%d0
878
	mov.w		([0xc,%a6],0x2),%d1
879

880
	ext.l		%d0			# sign extend lo bnd
881
	ext.l		%d1			# sign extend hi bnd
882

883
# operation is a data register compare.
884
# sign extend word to long so we can do simple longword compares.
885
	ext.l		%d2			# sign extend data word
886
	bra.w		l_cmp2_cmp		# go emulate compare
887

888
	global		_060LSP__cmp2_Dl_
889
_060LSP__cmp2_Dl_:
890

891
# PROLOGUE BEGIN ########################################################
892
	link.w		%a6,&-4
893
	movm.l		&0x3800,-(%sp)		# save d2-d4
894
#	fmovm.l		&0x0,-(%sp)		# save no fpregs
895
# PROLOGUE END ##########################################################
896

897
	mov.w		%cc,CMP2_CC(%a6)
898
	mov.l		0x8(%a6), %d2		# get regval
899

900
	mov.l		([0xc,%a6],0x0),%d0
901
	mov.l		([0xc,%a6],0x4),%d1
902

903
#
904
# To set the ccodes correctly:
905
#	(1) save 'Z' bit from (Rn - lo)
906
#	(2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907
#	(3) keep 'X', 'N', and 'V' from before instruction
908
#	(4) combine ccodes
909
#
910
l_cmp2_cmp:
911
	sub.l		%d0, %d2		# (Rn - lo)
912
	mov.w		%cc, %d3		# fetch resulting ccodes
913
	andi.b		&0x4, %d3		# keep 'Z' bit
914
	sub.l		%d0, %d1		# (hi - lo)
915
	cmp.l		%d1,%d2			# ((hi - lo) - (Rn - hi))
916

917
	mov.w		%cc, %d4		# fetch resulting ccodes
918
	or.b		%d4, %d3		# combine w/ earlier ccodes
919
	andi.b		&0x5, %d3		# keep 'Z' and 'N'
920

921
	mov.w		CMP2_CC(%a6), %d4	# fetch old ccodes
922
	andi.b		&0x1a, %d4		# keep 'X','N','V' bits
923
	or.b		%d3, %d4		# insert new ccodes
924
	mov.w		%d4,%cc			# save new ccodes
925

926
# EPILOGUE BEGIN ########################################################
927
#	fmovm.l		(%sp)+,&0x0		# restore no fpregs
928
	movm.l		(%sp)+,&0x001c		# restore d2-d4
929
	unlk		%a6
930
# EPILOGUE END ##########################################################
931

932
	rts
933

934
Product

Resources

Company