CoCalc -- TexLoad8b.h

GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/TexLoad8b.h
² views
1
/*
2
*   Glide64 - Glide video plugin for Nintendo 64 emulators.
3
*   Copyright (c) 2002  Dave2001
4
*   Copyright (c) 2008  Günther <[email protected]>
5
*
6
*   This program is free software; you can redistribute it and/or modify
7
*   it under the terms of the GNU General Public License as published by
8
*   the Free Software Foundation; either version 2 of the License, or
9
*   any later version.
10
*
11
*   This program is distributed in the hope that it will be useful,
12
*   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
*   GNU General Public License for more details.
15
*
16
*   You should have received a copy of the GNU General Public
17
*   License along with this program; if not, write to the Free
18
*   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
19
*   Boston, MA  02110-1301, USA
20
*/
21

22
//****************************************************************
23
//
24
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
25
// Project started on December 29th, 2001
26
//
27
// To modify Glide64:
28
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
29
// * Do NOT send me the whole project or file that you modified.  Take out your modified code sections, and tell me where to put them.  If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30
//
31
// Official Glide64 development channel: #Glide64 on EFnet
32
//
33
// Original author: Dave2001 ([email protected])
34
// Other authors: Gonetz, Gugaman
35
//
36
//****************************************************************
37

38
DWORD Load8bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
39
{
40
    if (wid_64 < 1) wid_64 = 1;
41
    if (height < 1) height = 1;
42
    int ext = (real_width - (wid_64 << 3)) << 1;
43
    unsigned short * pal = rdp.pal_8;
44

45
    if (rdp.tlut_mode == 2)
46
    {
47
#if !defined(__GNUC__) && !defined(NO_ASM)
48
        __asm {
49
            mov ebx,dword ptr [pal]
50
                
51
                mov esi,dword ptr [src]
52
                mov edi,dword ptr [dst]
53
                
54
                mov ecx,dword ptr [height]
55
y_loop:
56
            push ecx
57
                
58
                mov ecx,dword ptr [wid_64]
59
x_loop:
60
            push ecx
61
                
62
                mov eax,dword ptr [esi]     // read all 4 pixels
63
                bswap eax
64
                add esi,4
65
                mov edx,eax
66
                
67
                // 1st dword output {
68
                shr eax,15
69
                and eax,0x1FE
70
                mov cx,word ptr [ebx+eax]
71
                ror cx,1
72
                shl ecx,16
73
                
74
                mov eax,edx
75
                shr eax,23
76
                and eax,0x1FE
77
                mov cx,word ptr [ebx+eax]
78
                ror cx,1
79
                
80
                mov dword ptr [edi],ecx
81
                add edi,4
82
                // }
83
                
84
                // 2nd dword output {
85
                mov eax,edx
86
                shl eax,1
87
                and eax,0x1FE
88
                mov cx,word ptr [ebx+eax]
89
                ror cx,1
90
                shl ecx,16
91
                
92
                shr edx,7
93
                and edx,0x1FE
94
                mov cx,word ptr [ebx+edx]
95
                ror cx,1
96
                
97
                mov dword ptr [edi],ecx
98
                add edi,4
99
                // }
100
                
101
                // * copy
102
                mov eax,dword ptr [esi]     // read all 4 pixels
103
                bswap eax
104
                add esi,4
105
                mov edx,eax
106
                
107
                // 1st dword output {
108
                shr eax,15
109
                and eax,0x1FE
110
                mov cx,word ptr [ebx+eax]
111
                ror cx,1
112
                shl ecx,16
113
                
114
                mov eax,edx
115
                shr eax,23
116
                and eax,0x1FE
117
                mov cx,word ptr [ebx+eax]
118
                ror cx,1
119
                
120
                mov dword ptr [edi],ecx
121
                add edi,4
122
                // }
123
                
124
                // 2nd dword output {
125
                mov eax,edx
126
                shl eax,1
127
                and eax,0x1FE
128
                mov cx,word ptr [ebx+eax]
129
                ror cx,1
130
                shl ecx,16
131
                
132
                shr edx,7
133
                and edx,0x1FE
134
                mov cx,word ptr [ebx+edx]
135
                ror cx,1
136
                
137
                mov dword ptr [edi],ecx
138
                add edi,4
139
                // }
140
                // *
141
                
142
                pop ecx
143
                
144
                dec ecx
145
                jnz x_loop
146
                
147
                pop ecx
148
                dec ecx
149
                jz end_y_loop
150
                push ecx
151
                
152
                add esi,dword ptr [line]
153
                add edi,dword ptr [ext]
154
                
155
                mov ecx,dword ptr [wid_64]
156
x_loop_2:
157
            push ecx
158
                
159
                mov eax,dword ptr [esi+4]       // read all 4 pixels
160
                bswap eax
161
                mov edx,eax
162
                
163
                // 1st dword output {
164
                shr eax,15
165
                and eax,0x1FE
166
                mov cx,word ptr [ebx+eax]
167
                ror cx,1
168
                shl ecx,16
169
                
170
                mov eax,edx
171
                shr eax,23
172
                and eax,0x1FE
173
                mov cx,word ptr [ebx+eax]
174
                ror cx,1
175
                
176
                mov dword ptr [edi],ecx
177
                add edi,4
178
                // }
179
                
180
                // 2nd dword output {
181
                mov eax,edx
182
                shl eax,1
183
                and eax,0x1FE
184
                mov cx,word ptr [ebx+eax]
185
                ror cx,1
186
                shl ecx,16
187
                
188
                shr edx,7
189
                and edx,0x1FE
190
                mov cx,word ptr [ebx+edx]
191
                ror cx,1
192
                
193
                mov dword ptr [edi],ecx
194
                add edi,4
195
                // }
196
                
197
                // * copy
198
                mov eax,dword ptr [esi]     // read all 4 pixels
199
                bswap eax
200
                add esi,8
201
                mov edx,eax
202
                
203
                // 1st dword output {
204
                shr eax,15
205
                and eax,0x1FE
206
                mov cx,word ptr [ebx+eax]
207
                ror cx,1
208
                shl ecx,16
209
                
210
                mov eax,edx
211
                shr eax,23
212
                and eax,0x1FE
213
                mov cx,word ptr [ebx+eax]
214
                ror cx,1
215
                
216
                mov dword ptr [edi],ecx
217
                add edi,4
218
                // }
219
                
220
                // 2nd dword output {
221
                mov eax,edx
222
                shl eax,1
223
                and eax,0x1FE
224
                mov cx,word ptr [ebx+eax]
225
                ror cx,1
226
                shl ecx,16
227
                
228
                shr edx,7
229
                and edx,0x1FE
230
                mov cx,word ptr [ebx+edx]
231
                ror cx,1
232
                
233
                mov dword ptr [edi],ecx
234
                add edi,4
235
                // }
236
                // *
237
                
238
                pop ecx
239
                
240
                dec ecx
241
                jnz x_loop_2
242
                
243
                add esi,dword ptr [line]
244
                add edi,dword ptr [ext]
245
                
246
                pop ecx
247
                dec ecx
248
                jnz y_loop
249
                
250
end_y_loop:
251
        }
252
#elif !defined(NO_ASM)
253
       //printf("Load8bCI1\n");
254
       long lTempX, lTempY, lHeight = (long) height;
255
       intptr_t fake_eax, fake_edx;
256
       asm volatile (
257
             "1:                     \n"  // y_loop4
258
             "mov %[c], %[tempy]     \n"
259
                
260
             "mov %[wid_64], %%ecx   \n"
261
             "2:                     \n"  // x_loop4
262
             "mov %[c], %[tempx]     \n"
263
             
264
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
265
             "bswap %%eax             \n"
266
             "add $4, %[src]           \n"
267
             "mov %%eax, %%edx        \n"
268
             
269
             // 1st dword output {
270
             "shr $15, %%eax          \n"
271
             "and $0x1FE, %%eax       \n"
272
             "mov (%[pal],%[a]), %%cx \n"
273
             "ror $1, %%cx            \n"
274
             "shl $16, %%ecx          \n"
275
             
276
             "mov %%edx, %%eax        \n"
277
             "shr $23, %%eax          \n"
278
             "and $0x1FE, %%eax       \n"
279
             "mov (%[pal],%[a]), %%cx \n"
280
             "ror $1, %%cx            \n"
281
             
282
             "mov %%ecx, (%[dst])      \n"
283
             "add $4, %[dst]           \n"
284
             // }
285
                
286
             // 2nd dword output {
287
             "mov %%edx, %%eax        \n"
288
             "shl $1, %%eax           \n"
289
             "and $0x1FE, %%eax       \n"
290
             "mov (%[pal],%[a]), %%cx \n"
291
             "ror $1, %%cx            \n"
292
             "shl $16, %%ecx          \n"
293
             
294
             "shr $7, %%edx           \n"
295
             "and $0x1FE, %%edx       \n"
296
             "mov (%[pal],%[d]), %%cx \n"
297
             "ror $1, %%cx            \n"
298
             
299
             "mov %%ecx, (%[dst])      \n"
300
             "add $4, %[dst]           \n"
301
             // }
302
                
303
             // * copy
304
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
305
             "bswap %%eax             \n"
306
             "add $4, %[src]           \n"
307
             "mov %%eax, %%edx        \n"
308
             
309
             // 1st dword output {
310
             "shr $15, %%eax          \n"
311
             "and $0x1FE, %%eax       \n"
312
             "mov (%[pal],%[a]), %%cx \n"
313
             "ror $1, %%cx            \n"
314
             "shl $16, %%ecx          \n"
315
             
316
             "mov %%edx, %%eax        \n"
317
             "shr $23, %%eax          \n"
318
             "and $0x1FE, %%eax       \n"
319
             "mov (%[pal],%[a]), %%cx \n"
320
             "ror $1, %%cx            \n"
321
             
322
             "mov %%ecx, (%[dst])      \n"
323
             "add $4, %[dst]           \n"
324
             // }
325
                
326
             // 2nd dword output {
327
             "mov %%edx, %%eax        \n"
328
             "shl $1, %%eax           \n"
329
             "and $0x1FE, %%eax       \n"
330
             "mov (%[pal],%[a]), %%cx \n"
331
             "ror $1, %%cx            \n"
332
             "shl $16, %%ecx          \n"
333
             
334
             "shr $7, %%edx           \n"
335
             "and $0x1FE, %%edx       \n"
336
             "mov (%[pal],%[d]), %%cx \n"
337
             "ror $1, %%cx            \n"
338
             
339
             "mov %%ecx, (%[dst])      \n"
340
             "add $4, %[dst]           \n"
341
             // }
342
             // *
343
                
344
             "mov %[tempx], %[c]     \n"
345

346
             "dec %%ecx               \n"
347
             "jnz 2b                  \n"  // x_loop4
348
             
349
             "mov %[tempy], %[c]      \n"
350
             "dec %%ecx               \n"
351
             "jz 4f                   \n"  // end_y_loop4
352
             "mov %[c], %[tempy]      \n"
353
             
354
             "add %[line], %[src]     \n"
355
             "add %[ext], %[dst]      \n"
356
             
357
             "mov %[wid_64], %%ecx   \n"
358
             "3:                     \n"  // x_loop_24
359
             "mov %[c], %[tempx]     \n"
360
             
361
             "mov 4(%[src]), %%eax     \n"      // read all 4 pixels
362
             "bswap %%eax             \n"
363
             "mov %%eax, %%edx        \n"
364
             
365
             // 1st dword output {
366
             "shr $15, %%eax          \n"
367
             "and $0x1FE, %%eax       \n"
368
             "mov (%[pal],%[a]), %%cx \n"
369
             "ror $1, %%cx            \n"
370
             "shl $16, %%ecx          \n"
371
             
372
             "mov %%edx, %%eax        \n"
373
             "shr $23, %%eax          \n"
374
             "and $0x1FE, %%eax       \n"
375
             "mov (%[pal],%[a]), %%cx \n"
376
             "ror $1, %%cx            \n"
377
             
378
             "mov %%ecx, (%[dst])      \n"
379
             "add $4, %[dst]           \n"
380
             // }
381
                
382
             // 2nd dword output {
383
             "mov %%edx, %%eax        \n"
384
             "shl $1, %%eax           \n"
385
             "and $0x1FE, %%eax       \n"
386
             "mov (%[pal],%[a]), %%cx \n"
387
             "ror $1, %%cx            \n"
388
             "shl $16, %%ecx          \n"
389
             
390
             "shr $7, %%edx           \n"
391
             "and $0x1FE, %%edx       \n"
392
             "mov (%[pal],%[d]), %%cx \n"
393
             "ror $1, %%cx            \n"
394
             
395
             "mov %%ecx, (%[dst])      \n"
396
             "add $4, %[dst]           \n"
397
             // }
398
             
399
             // * copy
400
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
401
             "bswap %%eax             \n"
402
             "add $8, %[src]           \n"
403
             "mov %%eax, %%edx        \n"
404
             
405
             // 1st dword output {
406
             "shr $15, %%eax          \n"
407
             "and $0x1FE, %%eax       \n"
408
             "mov (%[pal],%[a]), %%cx \n"
409
             "ror $1, %%cx            \n"
410
             "shl $16, %%ecx          \n"
411
             
412
             "mov %%edx, %%eax        \n"
413
             "shr $23, %%eax          \n"
414
             "and $0x1FE, %%eax       \n"
415
             "mov (%[pal],%[a]), %%cx \n"
416
             "ror $1, %%cx            \n"
417
             
418
             "mov %%ecx, (%[dst])      \n"
419
             "add $4, %[dst]           \n"
420
             // }
421
             
422
             // 2nd dword output {
423
             "mov %%edx, %%eax        \n"
424
             "shl $1, %%eax           \n"
425
             "and $0x1FE, %%eax       \n"
426
             "mov (%[pal],%[a]), %%cx \n"
427
             "ror $1, %%cx            \n"
428
             "shl $16, %%ecx          \n"
429
             
430
             "shr $7, %%edx           \n"
431
             "and $0x1FE, %%edx       \n"
432
             "mov (%[pal],%[d]), %%cx \n"
433
             "ror $1, %%cx            \n"
434
             
435
             "mov %%ecx, (%[dst])      \n"
436
             "add $4, %[dst]           \n"
437
             // }
438
             // *
439
             
440
             "mov %[tempx], %[c]      \n"
441
             "dec %%ecx               \n"
442
             "jnz 3b                  \n"  // x_loop_24
443
             
444
             "add %[line], %[src]     \n"
445
             "add %[ext], %[dst]      \n"
446
             
447
             "mov %[tempy], %[c]      \n"
448
             "dec %%ecx               \n"
449
             "jnz 1b                  \n"  // y_loop4
450
             
451
             "4:                      \n"  // end_y_loop4
452
             : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
453
             : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
454
             : "memory", "cc"
455
             );
456
#endif
457
    return (1 << 16) | GR_TEXFMT_ARGB_1555;
458
    }
459
    else
460
    {
461
#if !defined(__GNUC__) && !defined(NO_ASM)
462
        __asm {
463
            mov ebx,dword ptr [pal]
464
                
465
                mov esi,dword ptr [src]
466
                mov edi,dword ptr [dst]
467
                
468
                mov ecx,dword ptr [height]
469
ia_y_loop:
470
            push ecx
471
                
472
                mov ecx,dword ptr [wid_64]
473
ia_x_loop:
474
            push ecx
475
                
476
                mov eax,dword ptr [esi]     // read all 4 pixels
477
                bswap eax
478
                add esi,4
479
                mov edx,eax
480
                
481
                // 1st dword output {
482
                shr eax,15
483
                and eax,0x1FE
484
                mov cx,word ptr [ebx+eax]
485
                ror cx,8
486
                shl ecx,16
487
                
488
                mov eax,edx
489
                shr eax,23
490
                and eax,0x1FE
491
                mov cx,word ptr [ebx+eax]
492
                ror cx,8
493
                
494
                mov dword ptr [edi],ecx
495
                add edi,4
496
                // }
497
                
498
                // 2nd dword output {
499
                mov eax,edx
500
                shl eax,1
501
                and eax,0x1FE
502
                mov cx,word ptr [ebx+eax]
503
                ror cx,8
504
                shl ecx,16
505
                
506
                shr edx,7
507
                and edx,0x1FE
508
                mov cx,word ptr [ebx+edx]
509
                ror cx,8
510
                
511
                mov dword ptr [edi],ecx
512
                add edi,4
513
                // }
514
                
515
                // * copy
516
                mov eax,dword ptr [esi]     // read all 4 pixels
517
                bswap eax
518
                add esi,4
519
                mov edx,eax
520
                
521
                // 1st dword output {
522
                shr eax,15
523
                and eax,0x1FE
524
                mov cx,word ptr [ebx+eax]
525
                ror cx,8
526
                shl ecx,16
527
                
528
                mov eax,edx
529
                shr eax,23
530
                and eax,0x1FE
531
                mov cx,word ptr [ebx+eax]
532
                ror cx,8
533
                
534
                mov dword ptr [edi],ecx
535
                add edi,4
536
                // }
537
                
538
                // 2nd dword output {
539
                mov eax,edx
540
                shl eax,1
541
                and eax,0x1FE
542
                mov cx,word ptr [ebx+eax]
543
                ror cx,8
544
                shl ecx,16
545
                
546
                shr edx,7
547
                and edx,0x1FE
548
                mov cx,word ptr [ebx+edx]
549
                ror cx,8
550
                
551
                mov dword ptr [edi],ecx
552
                add edi,4
553
                // }
554
                // *
555
                
556
                pop ecx
557
                
558
                dec ecx
559
                jnz ia_x_loop
560
                
561
                pop ecx
562
                dec ecx
563
                jz ia_end_y_loop
564
                push ecx
565
                
566
                add esi,dword ptr [line]
567
                add edi,dword ptr [ext]
568
                
569
                mov ecx,dword ptr [wid_64]
570
ia_x_loop_2:
571
            push ecx
572
                
573
                mov eax,dword ptr [esi+4]       // read all 4 pixels
574
                bswap eax
575
                mov edx,eax
576
                
577
                // 1st dword output {
578
                shr eax,15
579
                and eax,0x1FE
580
                mov cx,word ptr [ebx+eax]
581
                ror cx,8
582
                shl ecx,16
583
                
584
                mov eax,edx
585
                shr eax,23
586
                and eax,0x1FE
587
                mov cx,word ptr [ebx+eax]
588
                ror cx,8
589
                
590
                mov dword ptr [edi],ecx
591
                add edi,4
592
                // }
593
                
594
                // 2nd dword output {
595
                mov eax,edx
596
                shl eax,1
597
                and eax,0x1FE
598
                mov cx,word ptr [ebx+eax]
599
                ror cx,8
600
                shl ecx,16
601
                
602
                shr edx,7
603
                and edx,0x1FE
604
                mov cx,word ptr [ebx+edx]
605
                ror cx,8
606
                
607
                mov dword ptr [edi],ecx
608
                add edi,4
609
                // }
610
                
611
                // * copy
612
                mov eax,dword ptr [esi]     // read all 4 pixels
613
                bswap eax
614
                add esi,8
615
                mov edx,eax
616
                
617
                // 1st dword output {
618
                shr eax,15
619
                and eax,0x1FE
620
                mov cx,word ptr [ebx+eax]
621
                ror cx,8
622
                shl ecx,16
623
                
624
                mov eax,edx
625
                shr eax,23
626
                and eax,0x1FE
627
                mov cx,word ptr [ebx+eax]
628
                ror cx,8
629
                
630
                mov dword ptr [edi],ecx
631
                add edi,4
632
                // }
633
                
634
                // 2nd dword output {
635
                mov eax,edx
636
                shl eax,1
637
                and eax,0x1FE
638
                mov cx,word ptr [ebx+eax]
639
                ror cx,8
640
                shl ecx,16
641
                
642
                shr edx,7
643
                and edx,0x1FE
644
                mov cx,word ptr [ebx+edx]
645
                ror cx,8
646
                
647
                mov dword ptr [edi],ecx
648
                add edi,4
649
                // }
650
                // *
651
                
652
                pop ecx
653
                
654
                dec ecx
655
                jnz ia_x_loop_2
656
                
657
                add esi,dword ptr [line]
658
                add edi,dword ptr [ext]
659
                
660
                pop ecx
661
                dec ecx
662
                jnz ia_y_loop
663
                
664
ia_end_y_loop:
665
    }
666
#elif !defined(NO_ASM)
667
       //printf("Load8bCI1\n");
668
       long lTempX, lTempY, lHeight = (long) height;
669
        intptr_t fake_eax, fake_edx;
670
       asm volatile (
671
             "1:                      \n"  // ia_y_loop2
672
             "mov %[c], %[tempy]      \n"
673
                
674
             "mov %[wid_64], %%ecx   \n"
675
             "2:                     \n"  // ia_x_loop2
676
             "mov %[c], %[tempx]     \n"
677
             
678
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
679
             "bswap %%eax             \n"
680
             "add $4, %[src]           \n"
681
             "mov %%eax, %%edx        \n"
682
             
683
             // 1st dword output {
684
             "shr $15, %%eax          \n"
685
             "and $0x1FE, %%eax       \n"
686
             "mov (%[pal],%[a]), %%cx \n"
687
             "ror $8, %%cx            \n"
688
             "shl $16, %%ecx          \n"
689
             
690
             "mov %%edx, %%eax        \n"
691
             "shr $23, %%eax          \n"
692
             "and $0x1FE, %%eax       \n"
693
             "mov (%[pal],%[a]), %%cx \n"
694
             "ror $8, %%cx            \n"
695
             
696
             "mov %%ecx, (%[dst])      \n"
697
             "add $4, %[dst]           \n"
698
             // }
699
                
700
             // 2nd dword output {
701
             "mov %%edx, %%eax        \n"
702
             "shl $1, %%eax           \n"
703
             "and $0x1FE, %%eax       \n"
704
             "mov (%[pal],%[a]), %%cx \n"
705
             "ror $8, %%cx            \n"
706
             "shl $16, %%ecx          \n"
707
             
708
             "shr $7, %%edx           \n"
709
             "and $0x1FE, %%edx       \n"
710
             "mov (%[pal],%[d]), %%cx \n"
711
             "ror $8, %%cx            \n"
712
             
713
             "mov %%ecx, (%[dst])      \n"
714
             "add $4, %[dst]           \n"
715
             // }
716
                
717
             // * copy
718
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
719
             "bswap %%eax             \n"
720
             "add $4, %[src]           \n"
721
             "mov %%eax, %%edx        \n"
722
             
723
             // 1st dword output {
724
             "shr $15, %%eax          \n"
725
             "and $0x1FE, %%eax       \n"
726
             "mov (%[pal],%[a]), %%cx \n"
727
             "ror $8, %%cx            \n"
728
             "shl $16, %%ecx          \n"
729
             
730
             "mov %%edx, %%eax        \n"
731
             "shr $23, %%eax          \n"
732
             "and $0x1FE, %%eax       \n"
733
             "mov (%[pal],%[a]), %%cx \n"
734
             "ror $8, %%cx            \n"
735
             
736
             "mov %%ecx, (%[dst])      \n"
737
             "add $4, %[dst]           \n"
738
             // }
739
                
740
             // 2nd dword output {
741
             "mov %%edx, %%eax        \n"
742
             "shl $1, %%eax           \n"
743
             "and $0x1FE, %%eax       \n"
744
             "mov (%[pal],%[a]), %%cx \n"
745
             "ror $8, %%cx            \n"
746
             "shl $16, %%ecx          \n"
747
             
748
             "shr $7, %%edx           \n"
749
             "and $0x1FE, %%edx       \n"
750
             "mov (%[pal],%[d]), %%cx \n"
751
             "ror $8, %%cx            \n"
752
             
753
             "mov %%ecx, (%[dst])      \n"
754
             "add $4, %[dst]           \n"
755
             // }
756
             // *
757
                
758
             "mov %[tempx], %[c]      \n"
759
             "dec %%ecx               \n"
760
             "jnz 2b                  \n"  // ia_x_loop2
761
             
762
             "mov %[tempy], %[c]      \n"
763
             "dec %%ecx               \n"
764
             "jz 4f                   \n"  // ia_end_y_loop2
765
             "mov %[c], %[tempy]      \n"
766
                
767
             "add %[line], %[src]     \n"
768
             "add %[ext], %[dst]      \n"
769
             
770
             "mov %[wid_64], %%ecx    \n"
771
             "3:                      \n"  // ia_x_loop_22
772
             "mov %[c], %[tempx]      \n"
773
             
774
             "mov 4(%[src]), %%eax     \n"      // read all 4 pixels
775
             "bswap %%eax             \n"
776
             "mov %%eax, %%edx        \n"
777
             
778
             // 1st dword output {
779
             "shr $15, %%eax          \n"
780
             "and $0x1FE, %%eax       \n"
781
             "mov (%[pal],%[a]), %%cx \n"
782
             "ror $8, %%cx            \n"
783
             "shl $16, %%ecx          \n"
784
             
785
             "mov %%edx, %%eax        \n"
786
             "shr $23, %%eax          \n"
787
             "and $0x1FE, %%eax       \n"
788
             "mov (%[pal],%[a]), %%cx \n"
789
             "ror $8, %%cx            \n"
790
             
791
             "mov %%ecx, (%[dst])      \n"
792
             "add $4, %[dst]           \n"
793
             // }
794
                
795
             // 2nd dword output {
796
             "mov %%edx, %%eax        \n"
797
             "shl $1, %%eax           \n"
798
             "and $0x1FE, %%eax       \n"
799
             "mov (%[pal],%[a]), %%cx \n"
800
             "ror $8, %%cx            \n"
801
             "shl $16, %%ecx          \n"
802
             
803
             "shr $7, %%edx           \n"
804
             "and $0x1FE, %%edx       \n"
805
             "mov (%[pal],%[d]), %%cx \n"
806
             "ror $8, %%cx            \n"
807
             
808
             "mov %%ecx, (%[dst])      \n"
809
             "add $4, %[dst]           \n"
810
             // }
811
             
812
             // * copy
813
             "mov (%[src]), %%eax      \n"      // read all 4 pixels
814
             "bswap %%eax             \n"
815
             "add $8, %[src]           \n"
816
             "mov %%eax, %%edx        \n"
817
             
818
             // 1st dword output {
819
             "shr $15, %%eax          \n"
820
             "and $0x1FE, %%eax       \n"
821
             "mov (%[pal],%[a]), %%cx \n"
822
             "ror $8, %%cx            \n"
823
             "shl $16, %%ecx          \n"
824
             
825
             "mov %%edx, %%eax        \n"
826
             "shr $23, %%eax          \n"
827
             "and $0x1FE, %%eax       \n"
828
             "mov (%[pal],%[a]), %%cx \n"
829
             "ror $8, %%cx            \n"
830
             
831
             "mov %%ecx, (%[dst])      \n"
832
             "add $4, %[dst]           \n"
833
             // }
834
             
835
             // 2nd dword output {
836
             "mov %%edx, %%eax        \n"
837
             "shl $1, %%eax           \n"
838
             "and $0x1FE, %%eax       \n"
839
             "mov (%[pal],%[a]), %%cx \n"
840
             "ror $8, %%cx            \n"
841
             "shl $16, %%ecx          \n"
842
             
843
             "shr $7, %%edx           \n"
844
             "and $0x1FE, %%edx       \n"
845
             "mov (%[pal],%[d]), %%cx \n"
846
             "ror $8, %%cx            \n"
847
             
848
             "mov %%ecx, (%[dst])      \n"
849
             "add $4, %[dst]           \n"
850
             // }
851
             // *
852

853
             "mov %[tempx], %[c]      \n"
854
             "dec %%ecx               \n"
855
             "jnz 3b                  \n"  // ia_x_loop_22
856
             
857
             "add %[line], %[src]     \n"
858
             "add %[ext], %[dst]      \n"
859
             
860
             "mov %[tempy], %[c]      \n"
861
             "dec %%ecx               \n"
862
             "jnz 1b                  \n"  // ia_y_loop2
863
             
864
             "4:                      \n"  // ia_end_y_loop2
865
             : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
866
             : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
867
             : "memory", "cc"
868
             );
869
#endif
870
    return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
871
    }
872
    
873
    return 0;
874
}
875

876
//****************************************************************
877
// Size: 1, Format: 3
878
//
879
// ** by Gugaman **
880

881
DWORD Load8bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)  
882
{ 
883
    if (rdp.tlut_mode != 0)
884
        return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
885

886
    if (wid_64 < 1) wid_64 = 1;  
887
    if (height < 1) height = 1;  
888
    int ext = (real_width - (wid_64 << 3));  
889
#if !defined(__GNUC__) && !defined(NO_ASM)
890
    __asm {  
891
        mov esi,dword ptr [src]  
892
            mov edi,dword ptr [dst]  
893
            
894
            mov ecx,dword ptr [height]  
895
y_loop:  
896
        push ecx  
897
            
898
            mov ecx,dword ptr [wid_64]  
899
x_loop:  
900
        mov eax,dword ptr [esi]          // read all 4 pixels  
901
            add esi,4  
902
            
903
            xor ebx,ebx 
904
            mov edx,eax 
905
            shr eax,4//all alpha 
906
            and eax,0x0F0F0F0F 
907
            or ebx,eax 
908
            mov eax,edx//intensity 
909
            shl eax,4 
910
            and eax,0xF0F0F0F0 
911
            or ebx,eax 
912
            
913
            mov dword ptr [edi],ebx // save dword 
914
            add edi,4  
915
            
916
            mov eax,dword ptr [esi]          // read all 4 pixels  
917
            add esi,4  
918
            
919
            xor ebx,ebx 
920
            mov edx,eax 
921
            shr eax,4//all alpha 
922
            and eax,0x0F0F0F0F 
923
            or ebx,eax 
924
            mov eax,edx//intensity 
925
            shl eax,4 
926
            and eax,0xF0F0F0F0 
927
            or ebx,eax 
928
            
929
            mov dword ptr [edi],ebx // save dword 
930
            add edi,4  
931
            // *  
932
            
933
            dec ecx  
934
            jnz x_loop  
935
            
936
            pop ecx  
937
            dec ecx  
938
            jz end_y_loop  
939
            push ecx  
940
            
941
            add esi,dword ptr [line]  
942
            add edi,dword ptr [ext]  
943
            
944
            mov ecx,dword ptr [wid_64]  
945
x_loop_2:  
946
        mov eax,dword ptr [esi+4]          // read both pixels  
947
            
948
            xor ebx,ebx 
949
            mov edx,eax 
950
            shr eax,4//all alpha 
951
            and eax,0x0F0F0F0F 
952
            or ebx,eax 
953
            mov eax,edx//intensity 
954
            shl eax,4 
955
            and eax,0xF0F0F0F0 
956
            or ebx,eax 
957
            
958
            mov dword ptr [edi],ebx //save dword 
959
            add edi,4  
960
            
961
            mov eax,dword ptr [esi]          // read both pixels  
962
            add esi,8  
963
            
964
            xor ebx,ebx 
965
            mov edx,eax 
966
            shr eax,4//all alpha 
967
            and eax,0x0F0F0F0F 
968
            or ebx,eax 
969
            mov eax,edx//intensity 
970
            shl eax,4 
971
            and eax,0xF0F0F0F0 
972
            or ebx,eax 
973
            
974
            mov dword ptr [edi],ebx //save dword 
975
            add edi,4  
976
            // *  
977
            
978
            dec ecx  
979
            jnz x_loop_2  
980
            
981
            add esi,dword ptr [line]  
982
            add edi,dword ptr [ext]  
983
            
984
            pop ecx  
985
            dec ecx  
986
            jnz y_loop  
987
            
988
end_y_loop:  
989
    }  
990
#elif !defined(NO_ASM)
991
   //printf("Load8bIA\n");
992
   int lTemp, lHeight = (int) height;
993
   asm volatile (
994
         "1:                     \n"  // y_loop5
995
         "mov %[wid_64], %%eax    \n"
996
         "mov %%eax, %[temp]      \n"
997
         "2:                      \n"  // x_loop5
998
         "mov (%[src]), %%eax     \n"          // read all 4 pixels  
999
         "add $4, %[src]          \n"
1000
         
1001
         "xor %%ecx, %%ecx       \n"
1002
         "mov %%eax, %%edx       \n"
1003
         "shr $4, %%eax          \n"//all alpha 
1004
         "and $0x0F0F0F0F, %%eax \n"
1005
         "or %%eax, %%ecx        \n"
1006
         "mov %%edx, %%eax       \n"//intensity 
1007
         "shl $4, %%eax          \n"
1008
         "and $0xF0F0F0F0, %%eax \n"
1009
         "or %%eax, %%ecx        \n"
1010
         
1011
         "mov %%ecx, (%[dst])     \n" // save dword 
1012
         "add $4, %[dst]          \n"
1013
         
1014
         "mov (%[src]), %%eax     \n"          // read all 4 pixels  
1015
         "add $4, %[src]          \n"
1016
         
1017
         "xor %%ecx, %%ecx       \n"
1018
         "mov %%eax, %%edx       \n"
1019
         "shr $4, %%eax          \n"//all alpha 
1020
         "and $0x0F0F0F0F, %%eax \n"
1021
         "or %%eax, %%ecx        \n"
1022
         "mov %%edx, %%eax       \n"//intensity 
1023
         "shl $4, %%eax          \n"
1024
         "and $0xF0F0F0F0, %%eax \n"
1025
         "or %%eax, %%ecx        \n"
1026
         
1027
         "mov %%ecx, (%[dst])    \n" // save dword 
1028
         "add $4, %[dst]         \n"
1029
            
1030
         "decl %[temp]           \n"
1031
         "jnz 2b                 \n"  // x_loop5
1032
         
1033
         "decl %[height]         \n"
1034
         "jz 4f                  \n"  // end_y_loop5
1035
         
1036
         "add %[line], %[src]    \n"
1037
         "add %[ext], %[dst]     \n"
1038
         
1039
         "mov %[wid_64], %%eax    \n"
1040
         "mov %%eax, %[temp]      \n"
1041
         "3:                      \n"  // x_loop_25
1042
         "mov 4(%[src]), %%eax    \n"          // read both pixels  
1043
         
1044
         "xor %%ecx, %%ecx       \n"
1045
         "mov %%eax, %%edx       \n"
1046
         "shr $4, %%eax          \n"//all alpha 
1047
         "and $0x0F0F0F0F, %%eax \n"
1048
         "or %%eax, %%ecx        \n"
1049
         "mov %%edx, %%eax       \n"//intensity 
1050
         "shl $4, %%eax          \n"
1051
         "and $0xF0F0F0F0, %%eax \n"
1052
         "or %%eax, %%ecx        \n"
1053
         
1054
         "mov %%ecx, (%[dst])     \n" //save dword 
1055
         "add $4, %[dst]          \n"
1056
         
1057
         "mov (%[src]), %%eax     \n"          // read both pixels  
1058
         "add $8, %[src]          \n"
1059
         
1060
         "xor %%ecx, %%ecx       \n"
1061
         "mov %%eax, %%edx       \n"
1062
         "shr $4, %%eax          \n"//all alpha 
1063
         "and $0x0F0F0F0F, %%eax \n"
1064
         "or %%eax, %%ecx        \n"
1065
         "mov %%edx, %%eax       \n"//intensity 
1066
         "shl $4, %%eax          \n"
1067
         "and $0xF0F0F0F0, %%eax \n"
1068
         "or %%eax, %%ecx        \n"
1069
         
1070
         "mov %%ecx, (%[dst])     \n" //save dword 
1071
         "add $4, %[dst]          \n"
1072
         // *  
1073
         
1074
         "decl %[temp]           \n"
1075
         "jnz 3b                 \n"  // x_loop_25
1076
         
1077
         "add %[line], %[src]    \n"
1078
         "add %[ext], %[dst]     \n"
1079
         
1080
         "decl %[height]         \n"
1081
         "jnz 1b                 \n"  // y_loop5
1082
         
1083
         "4:                     \n"  // end_y_loop5
1084
           : [temp]"=m"(lTemp), [src] "+S"(src), [dst] "+D"(dst), [height] "+g"(lHeight)
1085
           : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1086
           : "memory", "cc", "eax", "edx", "ecx"
1087
           );
1088
#endif
1089
    return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;  
1090
} 
1091

1092
//****************************************************************
1093
// Size: 1, Format: 4
1094
//
1095
// ** by Gugaman **
1096

1097
DWORD Load8bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)  
1098
{ 
1099
    if (rdp.tlut_mode != 0)
1100
        return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
1101
    
1102
    if (wid_64 < 1) wid_64 = 1;  
1103
    if (height < 1) height = 1;  
1104
    int ext = (real_width - (wid_64 << 3));  
1105
#if !defined(__GNUC__) && !defined(NO_ASM)
1106
    __asm {  
1107
        mov esi,dword ptr [src]  
1108
            mov edi,dword ptr [dst]  
1109
            
1110
            mov ecx,dword ptr [height]  
1111
y_loop:  
1112
        push ecx  
1113
            
1114
            mov ecx,dword ptr [wid_64]  
1115
x_loop:  
1116
        mov eax,dword ptr [esi]          // read all 4 pixels  
1117
            add esi,4  
1118
            
1119
            mov dword ptr [edi],eax // save dword 
1120
            add edi,4  
1121
            
1122
            mov eax,dword ptr [esi]          // read all 4 pixels  
1123
            add esi,4  
1124
            
1125
            mov dword ptr [edi],eax // save dword 
1126
            add edi,4  
1127
            // *  
1128
            
1129
            dec ecx  
1130
            jnz x_loop  
1131
            
1132
            pop ecx  
1133
            dec ecx  
1134
            jz end_y_loop  
1135
            push ecx  
1136
            
1137
            add esi,dword ptr [line]  
1138
            add edi,dword ptr [ext]  
1139
            
1140
            mov ecx,dword ptr [wid_64]  
1141
x_loop_2:  
1142
        mov eax,dword ptr [esi+4]          // read both pixels  
1143
            
1144
            mov dword ptr [edi],eax //save dword 
1145
            add edi,4  
1146
            
1147
            mov eax,dword ptr [esi]          // read both pixels  
1148
            add esi,8  
1149
            
1150
            mov dword ptr [edi],eax //save dword 
1151
            add edi,4  
1152
            // *  
1153
            
1154
            dec ecx  
1155
            jnz x_loop_2  
1156
            
1157
            add esi,dword ptr [line]  
1158
            add edi,dword ptr [ext]  
1159
            
1160
            pop ecx  
1161
            dec ecx  
1162
            jnz y_loop  
1163
            
1164
end_y_loop:  
1165
    }  
1166
#elif !defined(NO_ASM)
1167
   //printf("Load8bI\n");
1168
   int lTemp, lHeight = (int) height;
1169
   asm volatile (
1170
         "1:                     \n"  // y_loop6
1171
         "mov %[wid_64], %%eax   \n"
1172
         "mov %%eax, %[temp]     \n"
1173
         "2:                     \n"  // x_loop6
1174
         "mov (%[src]), %%eax    \n"          // read all 4 pixels  
1175
         "add $4, %[src]         \n"
1176
         
1177
         "mov %%eax, (%[dst])    \n" // save dword 
1178
         "add $4, %[dst]         \n"
1179
         
1180
         "mov (%[src]), %%eax    \n"          // read all 4 pixels  
1181
         "add $4, %[src]         \n"
1182
         
1183
         "mov %%eax, (%[dst])    \n" // save dword 
1184
         "add $4, %[dst]         \n"
1185
         // *  
1186
         
1187
         "decl %[temp]          \n"
1188
         "jnz 2b                \n" // x_loop6
1189
         
1190
         "decl %[height]        \n"
1191
         "jz 4f                 \n" // end_y_loop6
1192
            
1193
         "add %[line], %[src]   \n"
1194
         "add %[ext], %[dst]    \n"
1195
         
1196
         "mov %[wid_64], %%eax   \n"
1197
         "mov %%eax, %[temp]     \n"
1198
         "3:                     \n"  // x_loop_26
1199
         "mov 4(%[src]), %%eax   \n"          // read both pixels  
1200
         
1201
         "mov %%eax, (%[dst])    \n" //save dword 
1202
         "add $4, %[dst]         \n"
1203
         
1204
         "mov (%[src]), %%eax    \n"          // read both pixels  
1205
         "add $8, %[src]         \n"
1206
         
1207
         "mov %%eax, (%[dst])    \n" //save dword 
1208
         "add $4, %[dst]         \n"
1209

1210
         "decl %[temp]          \n"
1211
         "jnz 3b                \n"  // x_loop_26
1212
         
1213
         "add %[line], %[src]   \n"
1214
         "add %[ext], %[dst]    \n"
1215
         
1216
         "decl %[height]        \n"
1217
         "jnz 1b                \n"  // y_loop6
1218
         
1219
         "4:                    \n"  // end_y_loop6
1220
         : [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [height]"+g"(lHeight)
1221
         : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1222
         : "memory", "cc", "eax", "edx"
1223
         );  
1224
#endif
1225
     return /*(0 << 16) | */GR_TEXFMT_ALPHA_8;  
1226
}
1227

1228

1229
Product

Resources

Company