Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/m68k/ifpsp060/src/ilsp.S
10820 views
1
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2
MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
3
M68000 Hi-Performance Microprocessor Division
4
M68060 Software Package
5
Production Release P1.00 -- October 10, 1994
6
7
M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
8
9
THE SOFTWARE is provided on an "AS IS" basis and without warranty.
10
To the maximum extent permitted by applicable law,
11
MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
12
INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
13
and any warranty against infringement with regard to the SOFTWARE
14
(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
15
16
To the maximum extent permitted by applicable law,
17
IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
18
(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
19
BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
20
ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
21
Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
22
23
You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
24
so long as this entire notice is retained without alteration in any modified and/or
25
redistributed versions, and that such modified versions are clearly identified as such.
26
No licenses are granted by implication, estoppel or otherwise under any patents
27
or trademarks of Motorola, Inc.
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
# litop.s:
30
# This file is appended to the top of the 060FPLSP package
31
# and contains the entry points into the package. The user, in
32
# effect, branches to one of the branch table entries located here.
33
#
34
35
bra.l _060LSP__idivs64_
36
short 0x0000
37
bra.l _060LSP__idivu64_
38
short 0x0000
39
40
bra.l _060LSP__imuls64_
41
short 0x0000
42
bra.l _060LSP__imulu64_
43
short 0x0000
44
45
bra.l _060LSP__cmp2_Ab_
46
short 0x0000
47
bra.l _060LSP__cmp2_Aw_
48
short 0x0000
49
bra.l _060LSP__cmp2_Al_
50
short 0x0000
51
bra.l _060LSP__cmp2_Db_
52
short 0x0000
53
bra.l _060LSP__cmp2_Dw_
54
short 0x0000
55
bra.l _060LSP__cmp2_Dl_
56
short 0x0000
57
58
# leave room for future possible aditions.
59
align 0x200
60
61
#########################################################################
62
# XDEF **************************************************************** #
63
# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
64
# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
65
# #
66
# This is the library version which is accessed as a subroutine #
67
# and therefore does not work exactly like the 680X0 div{s,u}.l #
68
# 64-bit divide instruction. #
69
# #
70
# XREF **************************************************************** #
71
# None. #
72
# #
73
# INPUT *************************************************************** #
74
# 0x4(sp) = divisor #
75
# 0x8(sp) = hi(dividend) #
76
# 0xc(sp) = lo(dividend) #
77
# 0x10(sp) = pointer to location to place quotient/remainder #
78
# #
79
# OUTPUT ************************************************************** #
80
# 0x10(sp) = points to location of remainder/quotient. #
81
# remainder is in first longword, quotient is in 2nd. #
82
# #
83
# ALGORITHM *********************************************************** #
84
# If the operands are signed, make them unsigned and save the #
85
# sign info for later. Separate out special cases like divide-by-zero #
86
# or 32-bit divides if possible. Else, use a special math algorithm #
87
# to calculate the result. #
88
# Restore sign info if signed instruction. Set the condition #
89
# codes before performing the final "rts". If the divisor was equal to #
90
# zero, then perform a divide-by-zero using a 16-bit implemented #
91
# divide instruction. This way, the operating system can record that #
92
# the event occurred even though it may not point to the correct place. #
93
# #
94
#########################################################################
95
96
set POSNEG, -1
97
set NDIVISOR, -2
98
set NDIVIDEND, -3
99
set DDSECOND, -4
100
set DDNORMAL, -8
101
set DDQUOTIENT, -12
102
set DIV64_CC, -16
103
104
##########
105
# divs.l #
106
##########
107
global _060LSP__idivs64_
108
_060LSP__idivs64_:
109
# PROLOGUE BEGIN ########################################################
110
link.w %a6,&-16
111
movm.l &0x3f00,-(%sp) # save d2-d7
112
# fmovm.l &0x0,-(%sp) # save no fpregs
113
# PROLOGUE END ##########################################################
114
115
mov.w %cc,DIV64_CC(%a6)
116
st POSNEG(%a6) # signed operation
117
bra.b ldiv64_cont
118
119
##########
120
# divu.l #
121
##########
122
global _060LSP__idivu64_
123
_060LSP__idivu64_:
124
# PROLOGUE BEGIN ########################################################
125
link.w %a6,&-16
126
movm.l &0x3f00,-(%sp) # save d2-d7
127
# fmovm.l &0x0,-(%sp) # save no fpregs
128
# PROLOGUE END ##########################################################
129
130
mov.w %cc,DIV64_CC(%a6)
131
sf POSNEG(%a6) # unsigned operation
132
133
ldiv64_cont:
134
mov.l 0x8(%a6),%d7 # fetch divisor
135
136
beq.w ldiv64eq0 # divisor is = 0!!!
137
138
mov.l 0xc(%a6), %d5 # get dividend hi
139
mov.l 0x10(%a6), %d6 # get dividend lo
140
141
# separate signed and unsigned divide
142
tst.b POSNEG(%a6) # signed or unsigned?
143
beq.b ldspecialcases # use positive divide
144
145
# save the sign of the divisor
146
# make divisor unsigned if it's negative
147
tst.l %d7 # chk sign of divisor
148
slt NDIVISOR(%a6) # save sign of divisor
149
bpl.b ldsgndividend
150
neg.l %d7 # complement negative divisor
151
152
# save the sign of the dividend
153
# make dividend unsigned if it's negative
154
ldsgndividend:
155
tst.l %d5 # chk sign of hi(dividend)
156
slt NDIVIDEND(%a6) # save sign of dividend
157
bpl.b ldspecialcases
158
159
mov.w &0x0, %cc # clear 'X' cc bit
160
negx.l %d6 # complement signed dividend
161
negx.l %d5
162
163
# extract some special cases:
164
# - is (dividend == 0) ?
165
# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
166
ldspecialcases:
167
tst.l %d5 # is (hi(dividend) == 0)
168
bne.b ldnormaldivide # no, so try it the long way
169
170
tst.l %d6 # is (lo(dividend) == 0), too
171
beq.w lddone # yes, so (dividend == 0)
172
173
cmp.l %d7,%d6 # is (divisor <= lo(dividend))
174
bls.b ld32bitdivide # yes, so use 32 bit divide
175
176
exg %d5,%d6 # q = 0, r = dividend
177
bra.w ldivfinish # can't divide, we're done.
178
179
ld32bitdivide:
180
tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
181
182
bra.b ldivfinish
183
184
ldnormaldivide:
185
# last special case:
186
# - is hi(dividend) >= divisor ? if yes, then overflow
187
cmp.l %d7,%d5
188
bls.b lddovf # answer won't fit in 32 bits
189
190
# perform the divide algorithm:
191
bsr.l ldclassical # do int divide
192
193
# separate into signed and unsigned finishes.
194
ldivfinish:
195
tst.b POSNEG(%a6) # do divs, divu separately
196
beq.b lddone # divu has no processing!!!
197
198
# it was a divs.l, so ccode setting is a little more complicated...
199
tst.b NDIVIDEND(%a6) # remainder has same sign
200
beq.b ldcc # as dividend.
201
neg.l %d5 # sgn(rem) = sgn(dividend)
202
ldcc:
203
mov.b NDIVISOR(%a6), %d0
204
eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
205
beq.b ldqpos # branch to quot positive
206
207
# 0x80000000 is the largest number representable as a 32-bit negative
208
# number. the negative of 0x80000000 is 0x80000000.
209
cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
210
bhi.b lddovf
211
212
neg.l %d6 # make (-quot) 2's comp
213
214
bra.b lddone
215
216
ldqpos:
217
btst &0x1f, %d6 # will (+quot) fit in 32 bits?
218
bne.b lddovf
219
220
lddone:
221
# if the register numbers are the same, only the quotient gets saved.
222
# so, if we always save the quotient second, we save ourselves a cmp&beq
223
andi.w &0x10,DIV64_CC(%a6)
224
mov.w DIV64_CC(%a6),%cc
225
tst.l %d6 # may set 'N' ccode bit
226
227
# here, the result is in d1 and d0. the current strategy is to save
228
# the values at the location pointed to by a0.
229
# use movm here to not disturb the condition codes.
230
ldexit:
231
movm.l &0x0060,([0x14,%a6]) # save result
232
233
# EPILOGUE BEGIN ########################################################
234
# fmovm.l (%sp)+,&0x0 # restore no fpregs
235
movm.l (%sp)+,&0x00fc # restore d2-d7
236
unlk %a6
237
# EPILOGUE END ##########################################################
238
239
rts
240
241
# the result should be the unchanged dividend
242
lddovf:
243
mov.l 0xc(%a6), %d5 # get dividend hi
244
mov.l 0x10(%a6), %d6 # get dividend lo
245
246
andi.w &0x1c,DIV64_CC(%a6)
247
ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
248
mov.w DIV64_CC(%a6),%cc
249
250
bra.b ldexit
251
252
ldiv64eq0:
253
mov.l 0xc(%a6),([0x14,%a6])
254
mov.l 0x10(%a6),([0x14,%a6],0x4)
255
256
mov.w DIV64_CC(%a6),%cc
257
258
# EPILOGUE BEGIN ########################################################
259
# fmovm.l (%sp)+,&0x0 # restore no fpregs
260
movm.l (%sp)+,&0x00fc # restore d2-d7
261
unlk %a6
262
# EPILOGUE END ##########################################################
263
264
divu.w &0x0,%d0 # force a divbyzero exception
265
rts
266
267
###########################################################################
268
#########################################################################
269
# This routine uses the 'classical' Algorithm D from Donald Knuth's #
270
# Art of Computer Programming, vol II, Seminumerical Algorithms. #
271
# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
272
# where U,V are words of the quadword dividend and longword divisor, #
273
# and U1, V1 are the most significant words. #
274
# #
275
# The most sig. longword of the 64 bit dividend must be in %d5, least #
276
# in %d6. The divisor must be in the variable ddivisor, and the #
277
# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
278
# The quotient is returned in %d6, remainder in %d5, unless the #
279
# v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
280
# is unchanged. #
281
#########################################################################
282
ldclassical:
283
# if the divisor msw is 0, use simpler algorithm then the full blown
284
# one at ddknuth:
285
286
cmpi.l %d7, &0xffff
287
bhi.b lddknuth # go use D. Knuth algorithm
288
289
# Since the divisor is only a word (and larger than the mslw of the dividend),
290
# a simpler algorithm may be used :
291
# In the general case, four quotient words would be created by
292
# dividing the divisor word into each dividend word. In this case,
293
# the first two quotient words must be zero, or overflow would occur.
294
# Since we already checked this case above, we can treat the most significant
295
# longword of the dividend as (0) remainder (see Knuth) and merely complete
296
# the last two divisions to get a quotient longword and word remainder:
297
298
clr.l %d1
299
swap %d5 # same as r*b if previous step rqd
300
swap %d6 # get u3 to lsw position
301
mov.w %d6, %d5 # rb + u3
302
303
divu.w %d7, %d5
304
305
mov.w %d5, %d1 # first quotient word
306
swap %d6 # get u4
307
mov.w %d6, %d5 # rb + u4
308
309
divu.w %d7, %d5
310
311
swap %d1
312
mov.w %d5, %d1 # 2nd quotient 'digit'
313
clr.w %d5
314
swap %d5 # now remainder
315
mov.l %d1, %d6 # and quotient
316
317
rts
318
319
lddknuth:
320
# In this algorithm, the divisor is treated as a 2 digit (word) number
321
# which is divided into a 3 digit (word) dividend to get one quotient
322
# digit (word). After subtraction, the dividend is shifted and the
323
# process repeated. Before beginning, the divisor and quotient are
324
# 'normalized' so that the process of estimating the quotient digit
325
# will yield verifiably correct results..
326
327
clr.l DDNORMAL(%a6) # count of shifts for normalization
328
clr.b DDSECOND(%a6) # clear flag for quotient digits
329
clr.l %d1 # %d1 will hold trial quotient
330
lddnchk:
331
btst &31, %d7 # must we normalize? first word of
332
bne.b lddnormalized # divisor (V1) must be >= 65536/2
333
addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
334
lsl.l &0x1, %d7 # shift the divisor
335
lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
336
roxl.l &0x1, %d5 # shift u1,u2
337
bra.w lddnchk
338
lddnormalized:
339
340
# Now calculate an estimate of the quotient words (msw first, then lsw).
341
# The comments use subscripts for the first quotient digit determination.
342
mov.l %d7, %d3 # divisor
343
mov.l %d5, %d2 # dividend mslw
344
swap %d2
345
swap %d3
346
cmp.w %d2, %d3 # V1 = U1 ?
347
bne.b lddqcalc1
348
mov.w &0xffff, %d1 # use max trial quotient word
349
bra.b lddadj0
350
lddqcalc1:
351
mov.l %d5, %d1
352
353
divu.w %d3, %d1 # use quotient of mslw/msw
354
355
andi.l &0x0000ffff, %d1 # zero any remainder
356
lddadj0:
357
358
# now test the trial quotient and adjust. This step plus the
359
# normalization assures (according to Knuth) that the trial
360
# quotient will be at worst 1 too large.
361
mov.l %d6, -(%sp)
362
clr.w %d6 # word u3 left
363
swap %d6 # in lsw position
364
lddadj1: mov.l %d7, %d3
365
mov.l %d1, %d2
366
mulu.w %d7, %d2 # V2q
367
swap %d3
368
mulu.w %d1, %d3 # V1q
369
mov.l %d5, %d4 # U1U2
370
sub.l %d3, %d4 # U1U2 - V1q
371
372
swap %d4
373
374
mov.w %d4,%d0
375
mov.w %d6,%d4 # insert lower word (U3)
376
377
tst.w %d0 # is upper word set?
378
bne.w lddadjd1
379
380
# add.l %d6, %d4 # (U1U2 - V1q) + U3
381
382
cmp.l %d2, %d4
383
bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
384
subq.l &0x1, %d1 # yes, decrement and recheck
385
bra.b lddadj1
386
lddadjd1:
387
# now test the word by multiplying it by the divisor (V1V2) and comparing
388
# the 3 digit (word) result with the current dividend words
389
mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
390
mov.l %d1, %d6
391
swap %d6 # shift answer to ms 3 words
392
mov.l %d7, %d5
393
bsr.l ldmm2
394
mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
395
mov.l %d6, %d3
396
mov.l (%sp)+, %d5 # restore dividend
397
mov.l (%sp)+, %d6
398
sub.l %d3, %d6
399
subx.l %d2, %d5 # subtract double precision
400
bcc ldd2nd # no carry, do next quotient digit
401
subq.l &0x1, %d1 # q is one too large
402
# need to add back divisor longword to current ms 3 digits of dividend
403
# - according to Knuth, this is done only 2 out of 65536 times for random
404
# divisor, dividend selection.
405
clr.l %d2
406
mov.l %d7, %d3
407
swap %d3
408
clr.w %d3 # %d3 now ls word of divisor
409
add.l %d3, %d6 # aligned with 3rd word of dividend
410
addx.l %d2, %d5
411
mov.l %d7, %d3
412
clr.w %d3 # %d3 now ms word of divisor
413
swap %d3 # aligned with 2nd word of dividend
414
add.l %d3, %d5
415
ldd2nd:
416
tst.b DDSECOND(%a6) # both q words done?
417
bne.b lddremain
418
# first quotient digit now correct. store digit and shift the
419
# (subtracted) dividend
420
mov.w %d1, DDQUOTIENT(%a6)
421
clr.l %d1
422
swap %d5
423
swap %d6
424
mov.w %d6, %d5
425
clr.w %d6
426
st DDSECOND(%a6) # second digit
427
bra.w lddnormalized
428
lddremain:
429
# add 2nd word to quotient, get the remainder.
430
mov.w %d1, DDQUOTIENT+2(%a6)
431
# shift down one word/digit to renormalize remainder.
432
mov.w %d5, %d6
433
swap %d6
434
swap %d5
435
mov.l DDNORMAL(%a6), %d7 # get norm shift count
436
beq.b lddrn
437
subq.l &0x1, %d7 # set for loop count
438
lddnlp:
439
lsr.l &0x1, %d5 # shift into %d6
440
roxr.l &0x1, %d6
441
dbf %d7, lddnlp
442
lddrn:
443
mov.l %d6, %d5 # remainder
444
mov.l DDQUOTIENT(%a6), %d6 # quotient
445
446
rts
447
ldmm2:
448
# factors for the 32X32->64 multiplication are in %d5 and %d6.
449
# returns 64 bit result in %d5 (hi) %d6(lo).
450
# destroys %d2,%d3,%d4.
451
452
# multiply hi,lo words of each factor to get 4 intermediate products
453
mov.l %d6, %d2
454
mov.l %d6, %d3
455
mov.l %d5, %d4
456
swap %d3
457
swap %d4
458
mulu.w %d5, %d6 # %d6 <- lsw*lsw
459
mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
460
mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
461
mulu.w %d4, %d3 # %d3 <- msw*msw
462
# now use swap and addx to consolidate to two longwords
463
clr.l %d4
464
swap %d6
465
add.w %d5, %d6 # add msw of l*l to lsw of m*l product
466
addx.w %d4, %d3 # add any carry to m*m product
467
add.w %d2, %d6 # add in lsw of other m*l product
468
addx.w %d4, %d3 # add any carry to m*m product
469
swap %d6 # %d6 is low 32 bits of final product
470
clr.w %d5
471
clr.w %d2 # lsw of two mixed products used,
472
swap %d5 # now use msws of longwords
473
swap %d2
474
add.l %d2, %d5
475
add.l %d3, %d5 # %d5 now ms 32 bits of final product
476
rts
477
478
#########################################################################
479
# XDEF **************************************************************** #
480
# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
481
# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
482
# #
483
# This is the library version which is accessed as a subroutine #
484
# and therefore does not work exactly like the 680X0 mul{s,u}.l #
485
# 64-bit multiply instruction. #
486
# #
487
# XREF **************************************************************** #
488
# None #
489
# #
490
# INPUT *************************************************************** #
491
# 0x4(sp) = multiplier #
492
# 0x8(sp) = multiplicand #
493
# 0xc(sp) = pointer to location to place 64-bit result #
494
# #
495
# OUTPUT ************************************************************** #
496
# 0xc(sp) = points to location of 64-bit result #
497
# #
498
# ALGORITHM *********************************************************** #
499
# Perform the multiply in pieces using 16x16->32 unsigned #
500
# multiplies and "add" instructions. #
501
# Set the condition codes as appropriate before performing an #
502
# "rts". #
503
# #
504
#########################################################################
505
506
set MUL64_CC, -4
507
508
global _060LSP__imulu64_
509
_060LSP__imulu64_:
510
511
# PROLOGUE BEGIN ########################################################
512
link.w %a6,&-4
513
movm.l &0x3800,-(%sp) # save d2-d4
514
# fmovm.l &0x0,-(%sp) # save no fpregs
515
# PROLOGUE END ##########################################################
516
517
mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
518
519
mov.l 0x8(%a6),%d0 # store multiplier in d0
520
beq.w mulu64_zero # handle zero separately
521
522
mov.l 0xc(%a6),%d1 # get multiplicand in d1
523
beq.w mulu64_zero # handle zero separately
524
525
#########################################################################
526
# 63 32 0 #
527
# ---------------------------- #
528
# | hi(mplier) * hi(mplicand)| #
529
# ---------------------------- #
530
# ----------------------------- #
531
# | hi(mplier) * lo(mplicand) | #
532
# ----------------------------- #
533
# ----------------------------- #
534
# | lo(mplier) * hi(mplicand) | #
535
# ----------------------------- #
536
# | ----------------------------- #
537
# --|-- | lo(mplier) * lo(mplicand) | #
538
# | ----------------------------- #
539
# ======================================================== #
540
# -------------------------------------------------------- #
541
# | hi(result) | lo(result) | #
542
# -------------------------------------------------------- #
543
#########################################################################
544
mulu64_alg:
545
# load temp registers with operands
546
mov.l %d0,%d2 # mr in d2
547
mov.l %d0,%d3 # mr in d3
548
mov.l %d1,%d4 # md in d4
549
swap %d3 # hi(mr) in lo d3
550
swap %d4 # hi(md) in lo d4
551
552
# complete necessary multiplies:
553
mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
554
mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
555
mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
556
mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
557
558
# add lo portions of [2],[3] to hi portion of [1].
559
# add carries produced from these adds to [4].
560
# lo([1]) is the final lo 16 bits of the result.
561
clr.l %d4 # load d4 w/ zero value
562
swap %d0 # hi([1]) <==> lo([1])
563
add.w %d1,%d0 # hi([1]) + lo([2])
564
addx.l %d4,%d3 # [4] + carry
565
add.w %d2,%d0 # hi([1]) + lo([3])
566
addx.l %d4,%d3 # [4] + carry
567
swap %d0 # lo([1]) <==> hi([1])
568
569
# lo portions of [2],[3] have been added in to final result.
570
# now, clear lo, put hi in lo reg, and add to [4]
571
clr.w %d1 # clear lo([2])
572
clr.w %d2 # clear hi([3])
573
swap %d1 # hi([2]) in lo d1
574
swap %d2 # hi([3]) in lo d2
575
add.l %d2,%d1 # [4] + hi([2])
576
add.l %d3,%d1 # [4] + hi([3])
577
578
# now, grab the condition codes. only one that can be set is 'N'.
579
# 'N' CAN be set if the operation is unsigned if bit 63 is set.
580
mov.w MUL64_CC(%a6),%d4
581
andi.b &0x10,%d4 # keep old 'X' bit
582
tst.l %d1 # may set 'N' bit
583
bpl.b mulu64_ddone
584
ori.b &0x8,%d4 # set 'N' bit
585
mulu64_ddone:
586
mov.w %d4,%cc
587
588
# here, the result is in d1 and d0. the current strategy is to save
589
# the values at the location pointed to by a0.
590
# use movm here to not disturb the condition codes.
591
mulu64_end:
592
exg %d1,%d0
593
movm.l &0x0003,([0x10,%a6]) # save result
594
595
# EPILOGUE BEGIN ########################################################
596
# fmovm.l (%sp)+,&0x0 # restore no fpregs
597
movm.l (%sp)+,&0x001c # restore d2-d4
598
unlk %a6
599
# EPILOGUE END ##########################################################
600
601
rts
602
603
# one or both of the operands is zero so the result is also zero.
604
# save the zero result to the register file and set the 'Z' ccode bit.
605
mulu64_zero:
606
clr.l %d0
607
clr.l %d1
608
609
mov.w MUL64_CC(%a6),%d4
610
andi.b &0x10,%d4
611
ori.b &0x4,%d4
612
mov.w %d4,%cc # set 'Z' ccode bit
613
614
bra.b mulu64_end
615
616
##########
617
# muls.l #
618
##########
619
global _060LSP__imuls64_
620
_060LSP__imuls64_:
621
622
# PROLOGUE BEGIN ########################################################
623
link.w %a6,&-4
624
movm.l &0x3c00,-(%sp) # save d2-d5
625
# fmovm.l &0x0,-(%sp) # save no fpregs
626
# PROLOGUE END ##########################################################
627
628
mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
629
630
mov.l 0x8(%a6),%d0 # store multiplier in d0
631
beq.b mulu64_zero # handle zero separately
632
633
mov.l 0xc(%a6),%d1 # get multiplicand in d1
634
beq.b mulu64_zero # handle zero separately
635
636
clr.b %d5 # clear sign tag
637
tst.l %d0 # is multiplier negative?
638
bge.b muls64_chk_md_sgn # no
639
neg.l %d0 # make multiplier positive
640
641
ori.b &0x1,%d5 # save multiplier sgn
642
643
# the result sign is the exclusive or of the operand sign bits.
644
muls64_chk_md_sgn:
645
tst.l %d1 # is multiplicand negative?
646
bge.b muls64_alg # no
647
neg.l %d1 # make multiplicand positive
648
649
eori.b &0x1,%d5 # calculate correct sign
650
651
#########################################################################
652
# 63 32 0 #
653
# ---------------------------- #
654
# | hi(mplier) * hi(mplicand)| #
655
# ---------------------------- #
656
# ----------------------------- #
657
# | hi(mplier) * lo(mplicand) | #
658
# ----------------------------- #
659
# ----------------------------- #
660
# | lo(mplier) * hi(mplicand) | #
661
# ----------------------------- #
662
# | ----------------------------- #
663
# --|-- | lo(mplier) * lo(mplicand) | #
664
# | ----------------------------- #
665
# ======================================================== #
666
# -------------------------------------------------------- #
667
# | hi(result) | lo(result) | #
668
# -------------------------------------------------------- #
669
#########################################################################
670
muls64_alg:
671
# load temp registers with operands
672
mov.l %d0,%d2 # mr in d2
673
mov.l %d0,%d3 # mr in d3
674
mov.l %d1,%d4 # md in d4
675
swap %d3 # hi(mr) in lo d3
676
swap %d4 # hi(md) in lo d4
677
678
# complete necessary multiplies:
679
mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
680
mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
681
mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
682
mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
683
684
# add lo portions of [2],[3] to hi portion of [1].
685
# add carries produced from these adds to [4].
686
# lo([1]) is the final lo 16 bits of the result.
687
clr.l %d4 # load d4 w/ zero value
688
swap %d0 # hi([1]) <==> lo([1])
689
add.w %d1,%d0 # hi([1]) + lo([2])
690
addx.l %d4,%d3 # [4] + carry
691
add.w %d2,%d0 # hi([1]) + lo([3])
692
addx.l %d4,%d3 # [4] + carry
693
swap %d0 # lo([1]) <==> hi([1])
694
695
# lo portions of [2],[3] have been added in to final result.
696
# now, clear lo, put hi in lo reg, and add to [4]
697
clr.w %d1 # clear lo([2])
698
clr.w %d2 # clear hi([3])
699
swap %d1 # hi([2]) in lo d1
700
swap %d2 # hi([3]) in lo d2
701
add.l %d2,%d1 # [4] + hi([2])
702
add.l %d3,%d1 # [4] + hi([3])
703
704
tst.b %d5 # should result be signed?
705
beq.b muls64_done # no
706
707
# result should be a signed negative number.
708
# compute 2's complement of the unsigned number:
709
# -negate all bits and add 1
710
muls64_neg:
711
not.l %d0 # negate lo(result) bits
712
not.l %d1 # negate hi(result) bits
713
addq.l &1,%d0 # add 1 to lo(result)
714
addx.l %d4,%d1 # add carry to hi(result)
715
716
muls64_done:
717
mov.w MUL64_CC(%a6),%d4
718
andi.b &0x10,%d4 # keep old 'X' bit
719
tst.l %d1 # may set 'N' bit
720
bpl.b muls64_ddone
721
ori.b &0x8,%d4 # set 'N' bit
722
muls64_ddone:
723
mov.w %d4,%cc
724
725
# here, the result is in d1 and d0. the current strategy is to save
726
# the values at the location pointed to by a0.
727
# use movm here to not disturb the condition codes.
728
muls64_end:
729
exg %d1,%d0
730
movm.l &0x0003,([0x10,%a6]) # save result at (a0)
731
732
# EPILOGUE BEGIN ########################################################
733
# fmovm.l (%sp)+,&0x0 # restore no fpregs
734
movm.l (%sp)+,&0x003c # restore d2-d5
735
unlk %a6
736
# EPILOGUE END ##########################################################
737
738
rts
739
740
# one or both of the operands is zero so the result is also zero.
741
# save the zero result to the register file and set the 'Z' ccode bit.
742
muls64_zero:
743
clr.l %d0
744
clr.l %d1
745
746
mov.w MUL64_CC(%a6),%d4
747
andi.b &0x10,%d4
748
ori.b &0x4,%d4
749
mov.w %d4,%cc # set 'Z' ccode bit
750
751
bra.b muls64_end
752
753
#########################################################################
754
# XDEF **************************************************************** #
755
# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
756
# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
757
# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
758
# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
759
# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
760
# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
761
# #
762
# This is the library version which is accessed as a subroutine #
763
# and therefore does not work exactly like the 680X0 "cmp2" #
764
# instruction. #
765
# #
766
# XREF **************************************************************** #
767
# None #
768
# #
769
# INPUT *************************************************************** #
770
# 0x4(sp) = Rn #
771
# 0x8(sp) = pointer to boundary pair #
772
# #
773
# OUTPUT ************************************************************** #
774
# cc = condition codes are set correctly #
775
# #
776
# ALGORITHM *********************************************************** #
777
# In the interest of simplicity, all operands are converted to #
778
# longword size whether the operation is byte, word, or long. The #
779
# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
780
# also sign extended. If Rn is an address register, it need not be sign #
781
# extended since the full register is always used. #
782
# The condition codes are set correctly before the final "rts". #
783
# #
784
#########################################################################
785
786
set CMP2_CC, -4
787
788
global _060LSP__cmp2_Ab_
789
_060LSP__cmp2_Ab_:
790
791
# PROLOGUE BEGIN ########################################################
792
link.w %a6,&-4
793
movm.l &0x3800,-(%sp) # save d2-d4
794
# fmovm.l &0x0,-(%sp) # save no fpregs
795
# PROLOGUE END ##########################################################
796
797
mov.w %cc,CMP2_CC(%a6)
798
mov.l 0x8(%a6), %d2 # get regval
799
800
mov.b ([0xc,%a6],0x0),%d0
801
mov.b ([0xc,%a6],0x1),%d1
802
803
extb.l %d0 # sign extend lo bnd
804
extb.l %d1 # sign extend hi bnd
805
bra.w l_cmp2_cmp # go do the compare emulation
806
807
global _060LSP__cmp2_Aw_
808
_060LSP__cmp2_Aw_:
809
810
# PROLOGUE BEGIN ########################################################
811
link.w %a6,&-4
812
movm.l &0x3800,-(%sp) # save d2-d4
813
# fmovm.l &0x0,-(%sp) # save no fpregs
814
# PROLOGUE END ##########################################################
815
816
mov.w %cc,CMP2_CC(%a6)
817
mov.l 0x8(%a6), %d2 # get regval
818
819
mov.w ([0xc,%a6],0x0),%d0
820
mov.w ([0xc,%a6],0x2),%d1
821
822
ext.l %d0 # sign extend lo bnd
823
ext.l %d1 # sign extend hi bnd
824
bra.w l_cmp2_cmp # go do the compare emulation
825
826
global _060LSP__cmp2_Al_
827
_060LSP__cmp2_Al_:
828
829
# PROLOGUE BEGIN ########################################################
830
link.w %a6,&-4
831
movm.l &0x3800,-(%sp) # save d2-d4
832
# fmovm.l &0x0,-(%sp) # save no fpregs
833
# PROLOGUE END ##########################################################
834
835
mov.w %cc,CMP2_CC(%a6)
836
mov.l 0x8(%a6), %d2 # get regval
837
838
mov.l ([0xc,%a6],0x0),%d0
839
mov.l ([0xc,%a6],0x4),%d1
840
bra.w l_cmp2_cmp # go do the compare emulation
841
842
global _060LSP__cmp2_Db_
843
_060LSP__cmp2_Db_:
844
845
# PROLOGUE BEGIN ########################################################
846
link.w %a6,&-4
847
movm.l &0x3800,-(%sp) # save d2-d4
848
# fmovm.l &0x0,-(%sp) # save no fpregs
849
# PROLOGUE END ##########################################################
850
851
mov.w %cc,CMP2_CC(%a6)
852
mov.l 0x8(%a6), %d2 # get regval
853
854
mov.b ([0xc,%a6],0x0),%d0
855
mov.b ([0xc,%a6],0x1),%d1
856
857
extb.l %d0 # sign extend lo bnd
858
extb.l %d1 # sign extend hi bnd
859
860
# operation is a data register compare.
861
# sign extend byte to long so we can do simple longword compares.
862
extb.l %d2 # sign extend data byte
863
bra.w l_cmp2_cmp # go do the compare emulation
864
865
global _060LSP__cmp2_Dw_
866
_060LSP__cmp2_Dw_:
867
868
# PROLOGUE BEGIN ########################################################
869
link.w %a6,&-4
870
movm.l &0x3800,-(%sp) # save d2-d4
871
# fmovm.l &0x0,-(%sp) # save no fpregs
872
# PROLOGUE END ##########################################################
873
874
mov.w %cc,CMP2_CC(%a6)
875
mov.l 0x8(%a6), %d2 # get regval
876
877
mov.w ([0xc,%a6],0x0),%d0
878
mov.w ([0xc,%a6],0x2),%d1
879
880
ext.l %d0 # sign extend lo bnd
881
ext.l %d1 # sign extend hi bnd
882
883
# operation is a data register compare.
884
# sign extend word to long so we can do simple longword compares.
885
ext.l %d2 # sign extend data word
886
bra.w l_cmp2_cmp # go emulate compare
887
888
global _060LSP__cmp2_Dl_
889
_060LSP__cmp2_Dl_:
890
891
# PROLOGUE BEGIN ########################################################
892
link.w %a6,&-4
893
movm.l &0x3800,-(%sp) # save d2-d4
894
# fmovm.l &0x0,-(%sp) # save no fpregs
895
# PROLOGUE END ##########################################################
896
897
mov.w %cc,CMP2_CC(%a6)
898
mov.l 0x8(%a6), %d2 # get regval
899
900
mov.l ([0xc,%a6],0x0),%d0
901
mov.l ([0xc,%a6],0x4),%d1
902
903
#
904
# To set the ccodes correctly:
905
# (1) save 'Z' bit from (Rn - lo)
906
# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
907
# (3) keep 'X', 'N', and 'V' from before instruction
908
# (4) combine ccodes
909
#
910
l_cmp2_cmp:
911
sub.l %d0, %d2 # (Rn - lo)
912
mov.w %cc, %d3 # fetch resulting ccodes
913
andi.b &0x4, %d3 # keep 'Z' bit
914
sub.l %d0, %d1 # (hi - lo)
915
cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
916
917
mov.w %cc, %d4 # fetch resulting ccodes
918
or.b %d4, %d3 # combine w/ earlier ccodes
919
andi.b &0x5, %d3 # keep 'Z' and 'N'
920
921
mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
922
andi.b &0x1a, %d4 # keep 'X','N','V' bits
923
or.b %d3, %d4 # insert new ccodes
924
mov.w %d4,%cc # save new ccodes
925
926
# EPILOGUE BEGIN ########################################################
927
# fmovm.l (%sp)+,&0x0 # restore no fpregs
928
movm.l (%sp)+,&0x001c # restore d2-d4
929
unlk %a6
930
# EPILOGUE END ##########################################################
931
932
rts
933
934