Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/TexLoad8b.h
2 views
1
/*
2
* Glide64 - Glide video plugin for Nintendo 64 emulators.
3
* Copyright (c) 2002 Dave2001
4
* Copyright (c) 2008 Günther <[email protected]>
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* any later version.
10
*
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public
17
* License along with this program; if not, write to the Free
18
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA
20
*/
21
22
//****************************************************************
23
//
24
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
25
// Project started on December 29th, 2001
26
//
27
// To modify Glide64:
28
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
29
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30
//
31
// Official Glide64 development channel: #Glide64 on EFnet
32
//
33
// Original author: Dave2001 ([email protected])
34
// Other authors: Gonetz, Gugaman
35
//
36
//****************************************************************
37
38
DWORD Load8bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
39
{
40
if (wid_64 < 1) wid_64 = 1;
41
if (height < 1) height = 1;
42
int ext = (real_width - (wid_64 << 3)) << 1;
43
unsigned short * pal = rdp.pal_8;
44
45
if (rdp.tlut_mode == 2)
46
{
47
#if !defined(__GNUC__) && !defined(NO_ASM)
48
__asm {
49
mov ebx,dword ptr [pal]
50
51
mov esi,dword ptr [src]
52
mov edi,dword ptr [dst]
53
54
mov ecx,dword ptr [height]
55
y_loop:
56
push ecx
57
58
mov ecx,dword ptr [wid_64]
59
x_loop:
60
push ecx
61
62
mov eax,dword ptr [esi] // read all 4 pixels
63
bswap eax
64
add esi,4
65
mov edx,eax
66
67
// 1st dword output {
68
shr eax,15
69
and eax,0x1FE
70
mov cx,word ptr [ebx+eax]
71
ror cx,1
72
shl ecx,16
73
74
mov eax,edx
75
shr eax,23
76
and eax,0x1FE
77
mov cx,word ptr [ebx+eax]
78
ror cx,1
79
80
mov dword ptr [edi],ecx
81
add edi,4
82
// }
83
84
// 2nd dword output {
85
mov eax,edx
86
shl eax,1
87
and eax,0x1FE
88
mov cx,word ptr [ebx+eax]
89
ror cx,1
90
shl ecx,16
91
92
shr edx,7
93
and edx,0x1FE
94
mov cx,word ptr [ebx+edx]
95
ror cx,1
96
97
mov dword ptr [edi],ecx
98
add edi,4
99
// }
100
101
// * copy
102
mov eax,dword ptr [esi] // read all 4 pixels
103
bswap eax
104
add esi,4
105
mov edx,eax
106
107
// 1st dword output {
108
shr eax,15
109
and eax,0x1FE
110
mov cx,word ptr [ebx+eax]
111
ror cx,1
112
shl ecx,16
113
114
mov eax,edx
115
shr eax,23
116
and eax,0x1FE
117
mov cx,word ptr [ebx+eax]
118
ror cx,1
119
120
mov dword ptr [edi],ecx
121
add edi,4
122
// }
123
124
// 2nd dword output {
125
mov eax,edx
126
shl eax,1
127
and eax,0x1FE
128
mov cx,word ptr [ebx+eax]
129
ror cx,1
130
shl ecx,16
131
132
shr edx,7
133
and edx,0x1FE
134
mov cx,word ptr [ebx+edx]
135
ror cx,1
136
137
mov dword ptr [edi],ecx
138
add edi,4
139
// }
140
// *
141
142
pop ecx
143
144
dec ecx
145
jnz x_loop
146
147
pop ecx
148
dec ecx
149
jz end_y_loop
150
push ecx
151
152
add esi,dword ptr [line]
153
add edi,dword ptr [ext]
154
155
mov ecx,dword ptr [wid_64]
156
x_loop_2:
157
push ecx
158
159
mov eax,dword ptr [esi+4] // read all 4 pixels
160
bswap eax
161
mov edx,eax
162
163
// 1st dword output {
164
shr eax,15
165
and eax,0x1FE
166
mov cx,word ptr [ebx+eax]
167
ror cx,1
168
shl ecx,16
169
170
mov eax,edx
171
shr eax,23
172
and eax,0x1FE
173
mov cx,word ptr [ebx+eax]
174
ror cx,1
175
176
mov dword ptr [edi],ecx
177
add edi,4
178
// }
179
180
// 2nd dword output {
181
mov eax,edx
182
shl eax,1
183
and eax,0x1FE
184
mov cx,word ptr [ebx+eax]
185
ror cx,1
186
shl ecx,16
187
188
shr edx,7
189
and edx,0x1FE
190
mov cx,word ptr [ebx+edx]
191
ror cx,1
192
193
mov dword ptr [edi],ecx
194
add edi,4
195
// }
196
197
// * copy
198
mov eax,dword ptr [esi] // read all 4 pixels
199
bswap eax
200
add esi,8
201
mov edx,eax
202
203
// 1st dword output {
204
shr eax,15
205
and eax,0x1FE
206
mov cx,word ptr [ebx+eax]
207
ror cx,1
208
shl ecx,16
209
210
mov eax,edx
211
shr eax,23
212
and eax,0x1FE
213
mov cx,word ptr [ebx+eax]
214
ror cx,1
215
216
mov dword ptr [edi],ecx
217
add edi,4
218
// }
219
220
// 2nd dword output {
221
mov eax,edx
222
shl eax,1
223
and eax,0x1FE
224
mov cx,word ptr [ebx+eax]
225
ror cx,1
226
shl ecx,16
227
228
shr edx,7
229
and edx,0x1FE
230
mov cx,word ptr [ebx+edx]
231
ror cx,1
232
233
mov dword ptr [edi],ecx
234
add edi,4
235
// }
236
// *
237
238
pop ecx
239
240
dec ecx
241
jnz x_loop_2
242
243
add esi,dword ptr [line]
244
add edi,dword ptr [ext]
245
246
pop ecx
247
dec ecx
248
jnz y_loop
249
250
end_y_loop:
251
}
252
#elif !defined(NO_ASM)
253
//printf("Load8bCI1\n");
254
long lTempX, lTempY, lHeight = (long) height;
255
intptr_t fake_eax, fake_edx;
256
asm volatile (
257
"1: \n" // y_loop4
258
"mov %[c], %[tempy] \n"
259
260
"mov %[wid_64], %%ecx \n"
261
"2: \n" // x_loop4
262
"mov %[c], %[tempx] \n"
263
264
"mov (%[src]), %%eax \n" // read all 4 pixels
265
"bswap %%eax \n"
266
"add $4, %[src] \n"
267
"mov %%eax, %%edx \n"
268
269
// 1st dword output {
270
"shr $15, %%eax \n"
271
"and $0x1FE, %%eax \n"
272
"mov (%[pal],%[a]), %%cx \n"
273
"ror $1, %%cx \n"
274
"shl $16, %%ecx \n"
275
276
"mov %%edx, %%eax \n"
277
"shr $23, %%eax \n"
278
"and $0x1FE, %%eax \n"
279
"mov (%[pal],%[a]), %%cx \n"
280
"ror $1, %%cx \n"
281
282
"mov %%ecx, (%[dst]) \n"
283
"add $4, %[dst] \n"
284
// }
285
286
// 2nd dword output {
287
"mov %%edx, %%eax \n"
288
"shl $1, %%eax \n"
289
"and $0x1FE, %%eax \n"
290
"mov (%[pal],%[a]), %%cx \n"
291
"ror $1, %%cx \n"
292
"shl $16, %%ecx \n"
293
294
"shr $7, %%edx \n"
295
"and $0x1FE, %%edx \n"
296
"mov (%[pal],%[d]), %%cx \n"
297
"ror $1, %%cx \n"
298
299
"mov %%ecx, (%[dst]) \n"
300
"add $4, %[dst] \n"
301
// }
302
303
// * copy
304
"mov (%[src]), %%eax \n" // read all 4 pixels
305
"bswap %%eax \n"
306
"add $4, %[src] \n"
307
"mov %%eax, %%edx \n"
308
309
// 1st dword output {
310
"shr $15, %%eax \n"
311
"and $0x1FE, %%eax \n"
312
"mov (%[pal],%[a]), %%cx \n"
313
"ror $1, %%cx \n"
314
"shl $16, %%ecx \n"
315
316
"mov %%edx, %%eax \n"
317
"shr $23, %%eax \n"
318
"and $0x1FE, %%eax \n"
319
"mov (%[pal],%[a]), %%cx \n"
320
"ror $1, %%cx \n"
321
322
"mov %%ecx, (%[dst]) \n"
323
"add $4, %[dst] \n"
324
// }
325
326
// 2nd dword output {
327
"mov %%edx, %%eax \n"
328
"shl $1, %%eax \n"
329
"and $0x1FE, %%eax \n"
330
"mov (%[pal],%[a]), %%cx \n"
331
"ror $1, %%cx \n"
332
"shl $16, %%ecx \n"
333
334
"shr $7, %%edx \n"
335
"and $0x1FE, %%edx \n"
336
"mov (%[pal],%[d]), %%cx \n"
337
"ror $1, %%cx \n"
338
339
"mov %%ecx, (%[dst]) \n"
340
"add $4, %[dst] \n"
341
// }
342
// *
343
344
"mov %[tempx], %[c] \n"
345
346
"dec %%ecx \n"
347
"jnz 2b \n" // x_loop4
348
349
"mov %[tempy], %[c] \n"
350
"dec %%ecx \n"
351
"jz 4f \n" // end_y_loop4
352
"mov %[c], %[tempy] \n"
353
354
"add %[line], %[src] \n"
355
"add %[ext], %[dst] \n"
356
357
"mov %[wid_64], %%ecx \n"
358
"3: \n" // x_loop_24
359
"mov %[c], %[tempx] \n"
360
361
"mov 4(%[src]), %%eax \n" // read all 4 pixels
362
"bswap %%eax \n"
363
"mov %%eax, %%edx \n"
364
365
// 1st dword output {
366
"shr $15, %%eax \n"
367
"and $0x1FE, %%eax \n"
368
"mov (%[pal],%[a]), %%cx \n"
369
"ror $1, %%cx \n"
370
"shl $16, %%ecx \n"
371
372
"mov %%edx, %%eax \n"
373
"shr $23, %%eax \n"
374
"and $0x1FE, %%eax \n"
375
"mov (%[pal],%[a]), %%cx \n"
376
"ror $1, %%cx \n"
377
378
"mov %%ecx, (%[dst]) \n"
379
"add $4, %[dst] \n"
380
// }
381
382
// 2nd dword output {
383
"mov %%edx, %%eax \n"
384
"shl $1, %%eax \n"
385
"and $0x1FE, %%eax \n"
386
"mov (%[pal],%[a]), %%cx \n"
387
"ror $1, %%cx \n"
388
"shl $16, %%ecx \n"
389
390
"shr $7, %%edx \n"
391
"and $0x1FE, %%edx \n"
392
"mov (%[pal],%[d]), %%cx \n"
393
"ror $1, %%cx \n"
394
395
"mov %%ecx, (%[dst]) \n"
396
"add $4, %[dst] \n"
397
// }
398
399
// * copy
400
"mov (%[src]), %%eax \n" // read all 4 pixels
401
"bswap %%eax \n"
402
"add $8, %[src] \n"
403
"mov %%eax, %%edx \n"
404
405
// 1st dword output {
406
"shr $15, %%eax \n"
407
"and $0x1FE, %%eax \n"
408
"mov (%[pal],%[a]), %%cx \n"
409
"ror $1, %%cx \n"
410
"shl $16, %%ecx \n"
411
412
"mov %%edx, %%eax \n"
413
"shr $23, %%eax \n"
414
"and $0x1FE, %%eax \n"
415
"mov (%[pal],%[a]), %%cx \n"
416
"ror $1, %%cx \n"
417
418
"mov %%ecx, (%[dst]) \n"
419
"add $4, %[dst] \n"
420
// }
421
422
// 2nd dword output {
423
"mov %%edx, %%eax \n"
424
"shl $1, %%eax \n"
425
"and $0x1FE, %%eax \n"
426
"mov (%[pal],%[a]), %%cx \n"
427
"ror $1, %%cx \n"
428
"shl $16, %%ecx \n"
429
430
"shr $7, %%edx \n"
431
"and $0x1FE, %%edx \n"
432
"mov (%[pal],%[d]), %%cx \n"
433
"ror $1, %%cx \n"
434
435
"mov %%ecx, (%[dst]) \n"
436
"add $4, %[dst] \n"
437
// }
438
// *
439
440
"mov %[tempx], %[c] \n"
441
"dec %%ecx \n"
442
"jnz 3b \n" // x_loop_24
443
444
"add %[line], %[src] \n"
445
"add %[ext], %[dst] \n"
446
447
"mov %[tempy], %[c] \n"
448
"dec %%ecx \n"
449
"jnz 1b \n" // y_loop4
450
451
"4: \n" // end_y_loop4
452
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
453
: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
454
: "memory", "cc"
455
);
456
#endif
457
return (1 << 16) | GR_TEXFMT_ARGB_1555;
458
}
459
else
460
{
461
#if !defined(__GNUC__) && !defined(NO_ASM)
462
__asm {
463
mov ebx,dword ptr [pal]
464
465
mov esi,dword ptr [src]
466
mov edi,dword ptr [dst]
467
468
mov ecx,dword ptr [height]
469
ia_y_loop:
470
push ecx
471
472
mov ecx,dword ptr [wid_64]
473
ia_x_loop:
474
push ecx
475
476
mov eax,dword ptr [esi] // read all 4 pixels
477
bswap eax
478
add esi,4
479
mov edx,eax
480
481
// 1st dword output {
482
shr eax,15
483
and eax,0x1FE
484
mov cx,word ptr [ebx+eax]
485
ror cx,8
486
shl ecx,16
487
488
mov eax,edx
489
shr eax,23
490
and eax,0x1FE
491
mov cx,word ptr [ebx+eax]
492
ror cx,8
493
494
mov dword ptr [edi],ecx
495
add edi,4
496
// }
497
498
// 2nd dword output {
499
mov eax,edx
500
shl eax,1
501
and eax,0x1FE
502
mov cx,word ptr [ebx+eax]
503
ror cx,8
504
shl ecx,16
505
506
shr edx,7
507
and edx,0x1FE
508
mov cx,word ptr [ebx+edx]
509
ror cx,8
510
511
mov dword ptr [edi],ecx
512
add edi,4
513
// }
514
515
// * copy
516
mov eax,dword ptr [esi] // read all 4 pixels
517
bswap eax
518
add esi,4
519
mov edx,eax
520
521
// 1st dword output {
522
shr eax,15
523
and eax,0x1FE
524
mov cx,word ptr [ebx+eax]
525
ror cx,8
526
shl ecx,16
527
528
mov eax,edx
529
shr eax,23
530
and eax,0x1FE
531
mov cx,word ptr [ebx+eax]
532
ror cx,8
533
534
mov dword ptr [edi],ecx
535
add edi,4
536
// }
537
538
// 2nd dword output {
539
mov eax,edx
540
shl eax,1
541
and eax,0x1FE
542
mov cx,word ptr [ebx+eax]
543
ror cx,8
544
shl ecx,16
545
546
shr edx,7
547
and edx,0x1FE
548
mov cx,word ptr [ebx+edx]
549
ror cx,8
550
551
mov dword ptr [edi],ecx
552
add edi,4
553
// }
554
// *
555
556
pop ecx
557
558
dec ecx
559
jnz ia_x_loop
560
561
pop ecx
562
dec ecx
563
jz ia_end_y_loop
564
push ecx
565
566
add esi,dword ptr [line]
567
add edi,dword ptr [ext]
568
569
mov ecx,dword ptr [wid_64]
570
ia_x_loop_2:
571
push ecx
572
573
mov eax,dword ptr [esi+4] // read all 4 pixels
574
bswap eax
575
mov edx,eax
576
577
// 1st dword output {
578
shr eax,15
579
and eax,0x1FE
580
mov cx,word ptr [ebx+eax]
581
ror cx,8
582
shl ecx,16
583
584
mov eax,edx
585
shr eax,23
586
and eax,0x1FE
587
mov cx,word ptr [ebx+eax]
588
ror cx,8
589
590
mov dword ptr [edi],ecx
591
add edi,4
592
// }
593
594
// 2nd dword output {
595
mov eax,edx
596
shl eax,1
597
and eax,0x1FE
598
mov cx,word ptr [ebx+eax]
599
ror cx,8
600
shl ecx,16
601
602
shr edx,7
603
and edx,0x1FE
604
mov cx,word ptr [ebx+edx]
605
ror cx,8
606
607
mov dword ptr [edi],ecx
608
add edi,4
609
// }
610
611
// * copy
612
mov eax,dword ptr [esi] // read all 4 pixels
613
bswap eax
614
add esi,8
615
mov edx,eax
616
617
// 1st dword output {
618
shr eax,15
619
and eax,0x1FE
620
mov cx,word ptr [ebx+eax]
621
ror cx,8
622
shl ecx,16
623
624
mov eax,edx
625
shr eax,23
626
and eax,0x1FE
627
mov cx,word ptr [ebx+eax]
628
ror cx,8
629
630
mov dword ptr [edi],ecx
631
add edi,4
632
// }
633
634
// 2nd dword output {
635
mov eax,edx
636
shl eax,1
637
and eax,0x1FE
638
mov cx,word ptr [ebx+eax]
639
ror cx,8
640
shl ecx,16
641
642
shr edx,7
643
and edx,0x1FE
644
mov cx,word ptr [ebx+edx]
645
ror cx,8
646
647
mov dword ptr [edi],ecx
648
add edi,4
649
// }
650
// *
651
652
pop ecx
653
654
dec ecx
655
jnz ia_x_loop_2
656
657
add esi,dword ptr [line]
658
add edi,dword ptr [ext]
659
660
pop ecx
661
dec ecx
662
jnz ia_y_loop
663
664
ia_end_y_loop:
665
}
666
#elif !defined(NO_ASM)
667
//printf("Load8bCI1\n");
668
long lTempX, lTempY, lHeight = (long) height;
669
intptr_t fake_eax, fake_edx;
670
asm volatile (
671
"1: \n" // ia_y_loop2
672
"mov %[c], %[tempy] \n"
673
674
"mov %[wid_64], %%ecx \n"
675
"2: \n" // ia_x_loop2
676
"mov %[c], %[tempx] \n"
677
678
"mov (%[src]), %%eax \n" // read all 4 pixels
679
"bswap %%eax \n"
680
"add $4, %[src] \n"
681
"mov %%eax, %%edx \n"
682
683
// 1st dword output {
684
"shr $15, %%eax \n"
685
"and $0x1FE, %%eax \n"
686
"mov (%[pal],%[a]), %%cx \n"
687
"ror $8, %%cx \n"
688
"shl $16, %%ecx \n"
689
690
"mov %%edx, %%eax \n"
691
"shr $23, %%eax \n"
692
"and $0x1FE, %%eax \n"
693
"mov (%[pal],%[a]), %%cx \n"
694
"ror $8, %%cx \n"
695
696
"mov %%ecx, (%[dst]) \n"
697
"add $4, %[dst] \n"
698
// }
699
700
// 2nd dword output {
701
"mov %%edx, %%eax \n"
702
"shl $1, %%eax \n"
703
"and $0x1FE, %%eax \n"
704
"mov (%[pal],%[a]), %%cx \n"
705
"ror $8, %%cx \n"
706
"shl $16, %%ecx \n"
707
708
"shr $7, %%edx \n"
709
"and $0x1FE, %%edx \n"
710
"mov (%[pal],%[d]), %%cx \n"
711
"ror $8, %%cx \n"
712
713
"mov %%ecx, (%[dst]) \n"
714
"add $4, %[dst] \n"
715
// }
716
717
// * copy
718
"mov (%[src]), %%eax \n" // read all 4 pixels
719
"bswap %%eax \n"
720
"add $4, %[src] \n"
721
"mov %%eax, %%edx \n"
722
723
// 1st dword output {
724
"shr $15, %%eax \n"
725
"and $0x1FE, %%eax \n"
726
"mov (%[pal],%[a]), %%cx \n"
727
"ror $8, %%cx \n"
728
"shl $16, %%ecx \n"
729
730
"mov %%edx, %%eax \n"
731
"shr $23, %%eax \n"
732
"and $0x1FE, %%eax \n"
733
"mov (%[pal],%[a]), %%cx \n"
734
"ror $8, %%cx \n"
735
736
"mov %%ecx, (%[dst]) \n"
737
"add $4, %[dst] \n"
738
// }
739
740
// 2nd dword output {
741
"mov %%edx, %%eax \n"
742
"shl $1, %%eax \n"
743
"and $0x1FE, %%eax \n"
744
"mov (%[pal],%[a]), %%cx \n"
745
"ror $8, %%cx \n"
746
"shl $16, %%ecx \n"
747
748
"shr $7, %%edx \n"
749
"and $0x1FE, %%edx \n"
750
"mov (%[pal],%[d]), %%cx \n"
751
"ror $8, %%cx \n"
752
753
"mov %%ecx, (%[dst]) \n"
754
"add $4, %[dst] \n"
755
// }
756
// *
757
758
"mov %[tempx], %[c] \n"
759
"dec %%ecx \n"
760
"jnz 2b \n" // ia_x_loop2
761
762
"mov %[tempy], %[c] \n"
763
"dec %%ecx \n"
764
"jz 4f \n" // ia_end_y_loop2
765
"mov %[c], %[tempy] \n"
766
767
"add %[line], %[src] \n"
768
"add %[ext], %[dst] \n"
769
770
"mov %[wid_64], %%ecx \n"
771
"3: \n" // ia_x_loop_22
772
"mov %[c], %[tempx] \n"
773
774
"mov 4(%[src]), %%eax \n" // read all 4 pixels
775
"bswap %%eax \n"
776
"mov %%eax, %%edx \n"
777
778
// 1st dword output {
779
"shr $15, %%eax \n"
780
"and $0x1FE, %%eax \n"
781
"mov (%[pal],%[a]), %%cx \n"
782
"ror $8, %%cx \n"
783
"shl $16, %%ecx \n"
784
785
"mov %%edx, %%eax \n"
786
"shr $23, %%eax \n"
787
"and $0x1FE, %%eax \n"
788
"mov (%[pal],%[a]), %%cx \n"
789
"ror $8, %%cx \n"
790
791
"mov %%ecx, (%[dst]) \n"
792
"add $4, %[dst] \n"
793
// }
794
795
// 2nd dword output {
796
"mov %%edx, %%eax \n"
797
"shl $1, %%eax \n"
798
"and $0x1FE, %%eax \n"
799
"mov (%[pal],%[a]), %%cx \n"
800
"ror $8, %%cx \n"
801
"shl $16, %%ecx \n"
802
803
"shr $7, %%edx \n"
804
"and $0x1FE, %%edx \n"
805
"mov (%[pal],%[d]), %%cx \n"
806
"ror $8, %%cx \n"
807
808
"mov %%ecx, (%[dst]) \n"
809
"add $4, %[dst] \n"
810
// }
811
812
// * copy
813
"mov (%[src]), %%eax \n" // read all 4 pixels
814
"bswap %%eax \n"
815
"add $8, %[src] \n"
816
"mov %%eax, %%edx \n"
817
818
// 1st dword output {
819
"shr $15, %%eax \n"
820
"and $0x1FE, %%eax \n"
821
"mov (%[pal],%[a]), %%cx \n"
822
"ror $8, %%cx \n"
823
"shl $16, %%ecx \n"
824
825
"mov %%edx, %%eax \n"
826
"shr $23, %%eax \n"
827
"and $0x1FE, %%eax \n"
828
"mov (%[pal],%[a]), %%cx \n"
829
"ror $8, %%cx \n"
830
831
"mov %%ecx, (%[dst]) \n"
832
"add $4, %[dst] \n"
833
// }
834
835
// 2nd dword output {
836
"mov %%edx, %%eax \n"
837
"shl $1, %%eax \n"
838
"and $0x1FE, %%eax \n"
839
"mov (%[pal],%[a]), %%cx \n"
840
"ror $8, %%cx \n"
841
"shl $16, %%ecx \n"
842
843
"shr $7, %%edx \n"
844
"and $0x1FE, %%edx \n"
845
"mov (%[pal],%[d]), %%cx \n"
846
"ror $8, %%cx \n"
847
848
"mov %%ecx, (%[dst]) \n"
849
"add $4, %[dst] \n"
850
// }
851
// *
852
853
"mov %[tempx], %[c] \n"
854
"dec %%ecx \n"
855
"jnz 3b \n" // ia_x_loop_22
856
857
"add %[line], %[src] \n"
858
"add %[ext], %[dst] \n"
859
860
"mov %[tempy], %[c] \n"
861
"dec %%ecx \n"
862
"jnz 1b \n" // ia_y_loop2
863
864
"4: \n" // ia_end_y_loop2
865
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
866
: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
867
: "memory", "cc"
868
);
869
#endif
870
return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
871
}
872
873
return 0;
874
}
875
876
//****************************************************************
877
// Size: 1, Format: 3
878
//
879
// ** by Gugaman **
880
881
DWORD Load8bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
882
{
883
if (rdp.tlut_mode != 0)
884
return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
885
886
if (wid_64 < 1) wid_64 = 1;
887
if (height < 1) height = 1;
888
int ext = (real_width - (wid_64 << 3));
889
#if !defined(__GNUC__) && !defined(NO_ASM)
890
__asm {
891
mov esi,dword ptr [src]
892
mov edi,dword ptr [dst]
893
894
mov ecx,dword ptr [height]
895
y_loop:
896
push ecx
897
898
mov ecx,dword ptr [wid_64]
899
x_loop:
900
mov eax,dword ptr [esi] // read all 4 pixels
901
add esi,4
902
903
xor ebx,ebx
904
mov edx,eax
905
shr eax,4//all alpha
906
and eax,0x0F0F0F0F
907
or ebx,eax
908
mov eax,edx//intensity
909
shl eax,4
910
and eax,0xF0F0F0F0
911
or ebx,eax
912
913
mov dword ptr [edi],ebx // save dword
914
add edi,4
915
916
mov eax,dword ptr [esi] // read all 4 pixels
917
add esi,4
918
919
xor ebx,ebx
920
mov edx,eax
921
shr eax,4//all alpha
922
and eax,0x0F0F0F0F
923
or ebx,eax
924
mov eax,edx//intensity
925
shl eax,4
926
and eax,0xF0F0F0F0
927
or ebx,eax
928
929
mov dword ptr [edi],ebx // save dword
930
add edi,4
931
// *
932
933
dec ecx
934
jnz x_loop
935
936
pop ecx
937
dec ecx
938
jz end_y_loop
939
push ecx
940
941
add esi,dword ptr [line]
942
add edi,dword ptr [ext]
943
944
mov ecx,dword ptr [wid_64]
945
x_loop_2:
946
mov eax,dword ptr [esi+4] // read both pixels
947
948
xor ebx,ebx
949
mov edx,eax
950
shr eax,4//all alpha
951
and eax,0x0F0F0F0F
952
or ebx,eax
953
mov eax,edx//intensity
954
shl eax,4
955
and eax,0xF0F0F0F0
956
or ebx,eax
957
958
mov dword ptr [edi],ebx //save dword
959
add edi,4
960
961
mov eax,dword ptr [esi] // read both pixels
962
add esi,8
963
964
xor ebx,ebx
965
mov edx,eax
966
shr eax,4//all alpha
967
and eax,0x0F0F0F0F
968
or ebx,eax
969
mov eax,edx//intensity
970
shl eax,4
971
and eax,0xF0F0F0F0
972
or ebx,eax
973
974
mov dword ptr [edi],ebx //save dword
975
add edi,4
976
// *
977
978
dec ecx
979
jnz x_loop_2
980
981
add esi,dword ptr [line]
982
add edi,dword ptr [ext]
983
984
pop ecx
985
dec ecx
986
jnz y_loop
987
988
end_y_loop:
989
}
990
#elif !defined(NO_ASM)
991
//printf("Load8bIA\n");
992
int lTemp, lHeight = (int) height;
993
asm volatile (
994
"1: \n" // y_loop5
995
"mov %[wid_64], %%eax \n"
996
"mov %%eax, %[temp] \n"
997
"2: \n" // x_loop5
998
"mov (%[src]), %%eax \n" // read all 4 pixels
999
"add $4, %[src] \n"
1000
1001
"xor %%ecx, %%ecx \n"
1002
"mov %%eax, %%edx \n"
1003
"shr $4, %%eax \n"//all alpha
1004
"and $0x0F0F0F0F, %%eax \n"
1005
"or %%eax, %%ecx \n"
1006
"mov %%edx, %%eax \n"//intensity
1007
"shl $4, %%eax \n"
1008
"and $0xF0F0F0F0, %%eax \n"
1009
"or %%eax, %%ecx \n"
1010
1011
"mov %%ecx, (%[dst]) \n" // save dword
1012
"add $4, %[dst] \n"
1013
1014
"mov (%[src]), %%eax \n" // read all 4 pixels
1015
"add $4, %[src] \n"
1016
1017
"xor %%ecx, %%ecx \n"
1018
"mov %%eax, %%edx \n"
1019
"shr $4, %%eax \n"//all alpha
1020
"and $0x0F0F0F0F, %%eax \n"
1021
"or %%eax, %%ecx \n"
1022
"mov %%edx, %%eax \n"//intensity
1023
"shl $4, %%eax \n"
1024
"and $0xF0F0F0F0, %%eax \n"
1025
"or %%eax, %%ecx \n"
1026
1027
"mov %%ecx, (%[dst]) \n" // save dword
1028
"add $4, %[dst] \n"
1029
1030
"decl %[temp] \n"
1031
"jnz 2b \n" // x_loop5
1032
1033
"decl %[height] \n"
1034
"jz 4f \n" // end_y_loop5
1035
1036
"add %[line], %[src] \n"
1037
"add %[ext], %[dst] \n"
1038
1039
"mov %[wid_64], %%eax \n"
1040
"mov %%eax, %[temp] \n"
1041
"3: \n" // x_loop_25
1042
"mov 4(%[src]), %%eax \n" // read both pixels
1043
1044
"xor %%ecx, %%ecx \n"
1045
"mov %%eax, %%edx \n"
1046
"shr $4, %%eax \n"//all alpha
1047
"and $0x0F0F0F0F, %%eax \n"
1048
"or %%eax, %%ecx \n"
1049
"mov %%edx, %%eax \n"//intensity
1050
"shl $4, %%eax \n"
1051
"and $0xF0F0F0F0, %%eax \n"
1052
"or %%eax, %%ecx \n"
1053
1054
"mov %%ecx, (%[dst]) \n" //save dword
1055
"add $4, %[dst] \n"
1056
1057
"mov (%[src]), %%eax \n" // read both pixels
1058
"add $8, %[src] \n"
1059
1060
"xor %%ecx, %%ecx \n"
1061
"mov %%eax, %%edx \n"
1062
"shr $4, %%eax \n"//all alpha
1063
"and $0x0F0F0F0F, %%eax \n"
1064
"or %%eax, %%ecx \n"
1065
"mov %%edx, %%eax \n"//intensity
1066
"shl $4, %%eax \n"
1067
"and $0xF0F0F0F0, %%eax \n"
1068
"or %%eax, %%ecx \n"
1069
1070
"mov %%ecx, (%[dst]) \n" //save dword
1071
"add $4, %[dst] \n"
1072
// *
1073
1074
"decl %[temp] \n"
1075
"jnz 3b \n" // x_loop_25
1076
1077
"add %[line], %[src] \n"
1078
"add %[ext], %[dst] \n"
1079
1080
"decl %[height] \n"
1081
"jnz 1b \n" // y_loop5
1082
1083
"4: \n" // end_y_loop5
1084
: [temp]"=m"(lTemp), [src] "+S"(src), [dst] "+D"(dst), [height] "+g"(lHeight)
1085
: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1086
: "memory", "cc", "eax", "edx", "ecx"
1087
);
1088
#endif
1089
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
1090
}
1091
1092
//****************************************************************
1093
// Size: 1, Format: 4
1094
//
1095
// ** by Gugaman **
1096
1097
DWORD Load8bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
1098
{
1099
if (rdp.tlut_mode != 0)
1100
return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
1101
1102
if (wid_64 < 1) wid_64 = 1;
1103
if (height < 1) height = 1;
1104
int ext = (real_width - (wid_64 << 3));
1105
#if !defined(__GNUC__) && !defined(NO_ASM)
1106
__asm {
1107
mov esi,dword ptr [src]
1108
mov edi,dword ptr [dst]
1109
1110
mov ecx,dword ptr [height]
1111
y_loop:
1112
push ecx
1113
1114
mov ecx,dword ptr [wid_64]
1115
x_loop:
1116
mov eax,dword ptr [esi] // read all 4 pixels
1117
add esi,4
1118
1119
mov dword ptr [edi],eax // save dword
1120
add edi,4
1121
1122
mov eax,dword ptr [esi] // read all 4 pixels
1123
add esi,4
1124
1125
mov dword ptr [edi],eax // save dword
1126
add edi,4
1127
// *
1128
1129
dec ecx
1130
jnz x_loop
1131
1132
pop ecx
1133
dec ecx
1134
jz end_y_loop
1135
push ecx
1136
1137
add esi,dword ptr [line]
1138
add edi,dword ptr [ext]
1139
1140
mov ecx,dword ptr [wid_64]
1141
x_loop_2:
1142
mov eax,dword ptr [esi+4] // read both pixels
1143
1144
mov dword ptr [edi],eax //save dword
1145
add edi,4
1146
1147
mov eax,dword ptr [esi] // read both pixels
1148
add esi,8
1149
1150
mov dword ptr [edi],eax //save dword
1151
add edi,4
1152
// *
1153
1154
dec ecx
1155
jnz x_loop_2
1156
1157
add esi,dword ptr [line]
1158
add edi,dword ptr [ext]
1159
1160
pop ecx
1161
dec ecx
1162
jnz y_loop
1163
1164
end_y_loop:
1165
}
1166
#elif !defined(NO_ASM)
1167
//printf("Load8bI\n");
1168
int lTemp, lHeight = (int) height;
1169
asm volatile (
1170
"1: \n" // y_loop6
1171
"mov %[wid_64], %%eax \n"
1172
"mov %%eax, %[temp] \n"
1173
"2: \n" // x_loop6
1174
"mov (%[src]), %%eax \n" // read all 4 pixels
1175
"add $4, %[src] \n"
1176
1177
"mov %%eax, (%[dst]) \n" // save dword
1178
"add $4, %[dst] \n"
1179
1180
"mov (%[src]), %%eax \n" // read all 4 pixels
1181
"add $4, %[src] \n"
1182
1183
"mov %%eax, (%[dst]) \n" // save dword
1184
"add $4, %[dst] \n"
1185
// *
1186
1187
"decl %[temp] \n"
1188
"jnz 2b \n" // x_loop6
1189
1190
"decl %[height] \n"
1191
"jz 4f \n" // end_y_loop6
1192
1193
"add %[line], %[src] \n"
1194
"add %[ext], %[dst] \n"
1195
1196
"mov %[wid_64], %%eax \n"
1197
"mov %%eax, %[temp] \n"
1198
"3: \n" // x_loop_26
1199
"mov 4(%[src]), %%eax \n" // read both pixels
1200
1201
"mov %%eax, (%[dst]) \n" //save dword
1202
"add $4, %[dst] \n"
1203
1204
"mov (%[src]), %%eax \n" // read both pixels
1205
"add $8, %[src] \n"
1206
1207
"mov %%eax, (%[dst]) \n" //save dword
1208
"add $4, %[dst] \n"
1209
1210
"decl %[temp] \n"
1211
"jnz 3b \n" // x_loop_26
1212
1213
"add %[line], %[src] \n"
1214
"add %[ext], %[dst] \n"
1215
1216
"decl %[height] \n"
1217
"jnz 1b \n" // y_loop6
1218
1219
"4: \n" // end_y_loop6
1220
: [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [height]"+g"(lHeight)
1221
: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1222
: "memory", "cc", "eax", "edx"
1223
);
1224
#endif
1225
return /*(0 << 16) | */GR_TEXFMT_ALPHA_8;
1226
}
1227
1228
1229