Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/TexLoad4b.h
2 views
1
/*
2
* Glide64 - Glide video plugin for Nintendo 64 emulators.
3
* Copyright (c) 2002 Dave2001
4
* Copyright (c) 2008 Günther <[email protected]>
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* any later version.
10
*
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public
17
* License along with this program; if not, write to the Free
18
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA
20
*/
21
22
//****************************************************************
23
//
24
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
25
// Project started on December 29th, 2001
26
//
27
// To modify Glide64:
28
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
29
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30
//
31
// Official Glide64 development channel: #Glide64 on EFnet
32
//
33
// Original author: Dave2001 ([email protected])
34
// Other authors: Gonetz, Gugaman
35
//
36
//****************************************************************
37
38
//****************************************************************
39
// Size: 0, Format: 2
40
41
DWORD Load4bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
42
{
43
if (wid_64 < 1) wid_64 = 1;
44
if (height < 1) height = 1;
45
int ext = (real_width - (wid_64 << 4)) << 1;
46
unsigned short * pal = (rdp.pal_8 + (rdp.tiles[tile].palette << 4));
47
if (rdp.tlut_mode == 2)
48
{
49
#if !defined(__GNUC__) && !defined(NO_ASM)
50
__asm {
51
mov ebx,dword ptr [pal]
52
53
mov esi,dword ptr [src]
54
mov edi,dword ptr [dst]
55
56
mov ecx,dword ptr [height]
57
y_loop:
58
push ecx
59
60
mov ecx,dword ptr [wid_64]
61
x_loop:
62
push ecx
63
64
mov eax,dword ptr [esi] // read all 8 pixels
65
bswap eax
66
add esi,4
67
mov edx,eax
68
69
// 1st dword output {
70
shr eax,23
71
and eax,0x1E
72
mov cx,word ptr [ebx+eax]
73
ror cx,1
74
shl ecx,16
75
76
mov eax,edx
77
shr eax,27
78
and eax,0x1E
79
mov cx,word ptr [ebx+eax]
80
ror cx,1
81
82
mov dword ptr [edi],ecx
83
add edi,4
84
// }
85
86
// 2nd dword output {
87
mov eax,edx
88
shr eax,15
89
and eax,0x1E
90
mov cx,word ptr [ebx+eax]
91
ror cx,1
92
shl ecx,16
93
94
mov eax,edx
95
shr eax,19
96
and eax,0x1E
97
mov cx,word ptr [ebx+eax]
98
ror cx,1
99
100
mov dword ptr [edi],ecx
101
add edi,4
102
// }
103
104
// 3rd dword output {
105
mov eax,edx
106
shr eax,7
107
and eax,0x1E
108
mov cx,word ptr [ebx+eax]
109
ror cx,1
110
shl ecx,16
111
112
mov eax,edx
113
shr eax,11
114
and eax,0x1E
115
mov cx,word ptr [ebx+eax]
116
ror cx,1
117
118
mov dword ptr [edi],ecx
119
add edi,4
120
// }
121
122
// 4th dword output {
123
mov eax,edx
124
shl eax,1
125
and eax,0x1E
126
mov cx,word ptr [ebx+eax]
127
ror cx,1
128
shl ecx,16
129
130
shr edx,3
131
and edx,0x1E
132
mov cx,word ptr [ebx+edx]
133
ror cx,1
134
135
mov dword ptr [edi],ecx
136
add edi,4
137
// }
138
139
// * copy
140
mov eax,dword ptr [esi] // read all 8 pixels
141
bswap eax
142
add esi,4
143
mov edx,eax
144
145
// 1st dword output {
146
shr eax,23
147
and eax,0x1E
148
mov cx,word ptr [ebx+eax]
149
ror cx,1
150
shl ecx,16
151
152
mov eax,edx
153
shr eax,27
154
and eax,0x1E
155
mov cx,word ptr [ebx+eax]
156
ror cx,1
157
158
mov dword ptr [edi],ecx
159
add edi,4
160
// }
161
162
// 2nd dword output {
163
mov eax,edx
164
shr eax,15
165
and eax,0x1E
166
mov cx,word ptr [ebx+eax]
167
ror cx,1
168
shl ecx,16
169
170
mov eax,edx
171
shr eax,19
172
and eax,0x1E
173
mov cx,word ptr [ebx+eax]
174
ror cx,1
175
176
mov dword ptr [edi],ecx
177
add edi,4
178
// }
179
180
// 3rd dword output {
181
mov eax,edx
182
shr eax,7
183
and eax,0x1E
184
mov cx,word ptr [ebx+eax]
185
ror cx,1
186
shl ecx,16
187
188
mov eax,edx
189
shr eax,11
190
and eax,0x1E
191
mov cx,word ptr [ebx+eax]
192
ror cx,1
193
194
mov dword ptr [edi],ecx
195
add edi,4
196
// }
197
198
// 4th dword output {
199
mov eax,edx
200
shl eax,1
201
and eax,0x1E
202
mov cx,word ptr [ebx+eax]
203
ror cx,1
204
shl ecx,16
205
206
shr edx,3
207
and edx,0x1E
208
mov cx,word ptr [ebx+edx]
209
ror cx,1
210
211
mov dword ptr [edi],ecx
212
add edi,4
213
// }
214
// *
215
216
pop ecx
217
218
dec ecx
219
jnz x_loop
220
221
pop ecx
222
dec ecx
223
jz end_y_loop
224
push ecx
225
226
add esi,dword ptr [line]
227
add edi,dword ptr [ext]
228
229
mov ecx,dword ptr [wid_64]
230
x_loop_2:
231
push ecx
232
233
mov eax,dword ptr [esi+4] // read all 8 pixels
234
bswap eax
235
mov edx,eax
236
237
// 1st dword output {
238
shr eax,23
239
and eax,0x1E
240
mov cx,word ptr [ebx+eax]
241
ror cx,1
242
shl ecx,16
243
244
mov eax,edx
245
shr eax,27
246
and eax,0x1E
247
mov cx,word ptr [ebx+eax]
248
ror cx,1
249
250
mov dword ptr [edi],ecx
251
add edi,4
252
// }
253
254
// 2nd dword output {
255
mov eax,edx
256
shr eax,15
257
and eax,0x1E
258
mov cx,word ptr [ebx+eax]
259
ror cx,1
260
shl ecx,16
261
262
mov eax,edx
263
shr eax,19
264
and eax,0x1E
265
mov cx,word ptr [ebx+eax]
266
ror cx,1
267
268
mov dword ptr [edi],ecx
269
add edi,4
270
// }
271
272
// 3rd dword output {
273
mov eax,edx
274
shr eax,7
275
and eax,0x1E
276
mov cx,word ptr [ebx+eax]
277
ror cx,1
278
shl ecx,16
279
280
mov eax,edx
281
shr eax,11
282
and eax,0x1E
283
mov cx,word ptr [ebx+eax]
284
ror cx,1
285
286
mov dword ptr [edi],ecx
287
add edi,4
288
// }
289
290
// 4th dword output {
291
mov eax,edx
292
shl eax,1
293
and eax,0x1E
294
mov cx,word ptr [ebx+eax]
295
ror cx,1
296
shl ecx,16
297
298
shr edx,3
299
and edx,0x1E
300
mov cx,word ptr [ebx+edx]
301
ror cx,1
302
303
mov dword ptr [edi],ecx
304
add edi,4
305
// }
306
307
// * copy
308
mov eax,dword ptr [esi] // read all 8 pixels
309
bswap eax
310
add esi,8
311
mov edx,eax
312
313
// 1st dword output {
314
shr eax,23
315
and eax,0x1E
316
mov cx,word ptr [ebx+eax]
317
ror cx,1
318
shl ecx,16
319
320
mov eax,edx
321
shr eax,27
322
and eax,0x1E
323
mov cx,word ptr [ebx+eax]
324
ror cx,1
325
326
mov dword ptr [edi],ecx
327
add edi,4
328
// }
329
330
// 2nd dword output {
331
mov eax,edx
332
shr eax,15
333
and eax,0x1E
334
mov cx,word ptr [ebx+eax]
335
ror cx,1
336
shl ecx,16
337
338
mov eax,edx
339
shr eax,19
340
and eax,0x1E
341
mov cx,word ptr [ebx+eax]
342
ror cx,1
343
344
mov dword ptr [edi],ecx
345
add edi,4
346
// }
347
348
// 3rd dword output {
349
mov eax,edx
350
shr eax,7
351
and eax,0x1E
352
mov cx,word ptr [ebx+eax]
353
ror cx,1
354
shl ecx,16
355
356
mov eax,edx
357
shr eax,11
358
and eax,0x1E
359
mov cx,word ptr [ebx+eax]
360
ror cx,1
361
362
mov dword ptr [edi],ecx
363
add edi,4
364
// }
365
366
// 4th dword output {
367
mov eax,edx
368
shl eax,1
369
and eax,0x1E
370
mov cx,word ptr [ebx+eax]
371
ror cx,1
372
shl ecx,16
373
374
shr edx,3
375
and edx,0x1E
376
mov cx,word ptr [ebx+edx]
377
ror cx,1
378
379
mov dword ptr [edi],ecx
380
add edi,4
381
// }
382
// *
383
384
pop ecx
385
386
dec ecx
387
jnz x_loop_2
388
389
add esi,dword ptr [line]
390
add edi,dword ptr [ext]
391
392
pop ecx
393
dec ecx
394
jnz y_loop
395
396
end_y_loop:
397
}
398
#elif !defined(NO_ASM)
399
//printf("Load4bCI1\n");
400
// This way, gcc generates either a 32 bit or a 64 bit register
401
long lTempX, lTempY, lHeight = (long) height;
402
intptr_t fake_eax, fake_edx;
403
asm volatile (
404
"1: \n" // y_loop
405
"mov %[c], %[tempy] \n"
406
407
"mov %[wid_64], %%ecx \n"
408
"2: \n" // x_loop
409
"mov %[c], %[tempx] \n"
410
411
"mov (%[src]), %%eax \n" // read all 8 pixels
412
"bswap %%eax \n"
413
"add $4, %[src] \n"
414
"mov %%eax, %%edx \n"
415
416
// 1st dword output {
417
"shr $23, %%eax \n"
418
"and $0x1E, %%eax \n"
419
"mov (%[pal],%[a]), %%cx \n"
420
"ror $1, %%cx \n"
421
"shl $16, %%ecx \n"
422
423
"mov %%edx, %%eax \n"
424
"shr $27, %%eax \n"
425
"and $0x1E, %%eax \n"
426
"mov (%[pal],%[a]), %%cx \n"
427
"ror $1, %%cx \n"
428
429
"mov %%ecx, (%[dst]) \n"
430
"add $4, %[dst] \n"
431
// }
432
433
// 2nd dword output {
434
"mov %%edx, %%eax \n"
435
"shr $15, %%eax \n"
436
"and $0x1E, %%eax \n"
437
"mov (%[pal],%[a]), %%cx \n"
438
"ror $1, %%cx \n"
439
"shl $16, %%ecx \n"
440
441
"mov %%edx, %%eax \n"
442
"shr $19, %%eax \n"
443
"and $0x1E, %%eax \n"
444
"mov (%[pal],%[a]), %%cx \n"
445
"ror $1, %%cx \n"
446
447
"mov %%ecx, (%[dst]) \n"
448
"add $4, %[dst] \n"
449
// }
450
451
// 3rd dword output {
452
"mov %%edx, %%eax \n"
453
"shr $7,%%eax \n"
454
"and $0x1E, %%eax \n"
455
"mov (%[pal],%[a]),%%cx \n"
456
"ror $1,%%cx \n"
457
"shl $16,%%ecx \n"
458
459
"mov %%edx, %%eax \n"
460
"shr $11, %%eax \n"
461
"and $0x1E, %%eax \n"
462
"mov (%[pal],%[a]), %%cx \n"
463
"ror $1, %%cx \n"
464
465
"mov %%ecx, (%[dst]) \n"
466
"add $4, %[dst] \n"
467
// }
468
469
// 4th dword output {
470
"mov %%edx, %%eax \n"
471
"shl $1, %%eax \n"
472
"and $0x1E, %%eax \n"
473
"mov (%[pal],%[a]), %%cx \n"
474
"ror $1, %%cx \n"
475
"shl $16, %%ecx \n"
476
477
"shr $3, %%edx \n"
478
"and $0x1E, %%edx \n"
479
"mov (%[pal],%[d]), %%cx \n"
480
"ror $1, %%cx \n"
481
482
"mov %%ecx, (%[dst]) \n"
483
"add $4, %[dst] \n"
484
// }
485
486
// * copy
487
"mov (%[src]), %%eax \n" // read all 8 pixels
488
"bswap %%eax \n"
489
"add $4, %[src] \n"
490
"mov %%eax, %%edx \n"
491
492
// 1st dword output {
493
"shr $23, %%eax \n"
494
"and $0x1E, %%eax \n"
495
"mov (%[pal],%[a]), %%cx \n"
496
"ror $1, %%cx \n"
497
"shl $16, %%ecx \n"
498
499
"mov %%edx, %%eax \n"
500
"shr $27, %%eax \n"
501
"and $0x1E, %%eax \n"
502
"mov (%[pal],%[a]), %%cx \n"
503
"ror $1, %%cx \n"
504
505
"mov %%ecx, (%[dst]) \n"
506
"add $4, %[dst] \n"
507
// }
508
509
// 2nd dword output {
510
"mov %%edx, %%eax \n"
511
"shr $15, %%eax \n"
512
"and $0x1E, %%eax \n"
513
"mov (%[pal],%[a]), %%cx \n"
514
"ror $1, %%cx \n"
515
"shl $16, %%ecx \n"
516
517
"mov %%edx, %%eax \n"
518
"shr $19, %%eax \n"
519
"and $0x1E, %%eax \n"
520
"mov (%[pal],%[a]), %%cx \n"
521
"ror $1, %%cx \n"
522
523
"mov %%ecx, (%[dst]) \n"
524
"add $4, %[dst] \n"
525
// }
526
527
// 3rd dword output {
528
"mov %%edx, %%eax \n"
529
"shr $7, %%eax \n"
530
"and $0x1E, %%eax \n"
531
"mov (%[pal],%[a]), %%cx \n"
532
"ror $1, %%cx \n"
533
"shl $16, %%ecx \n"
534
535
"mov %%edx, %%eax \n"
536
"shr $11, %%eax \n"
537
"and $0x1E, %%eax \n"
538
"mov (%[pal],%[a]), %%cx \n"
539
"ror $1, %%cx \n"
540
541
"mov %%ecx, (%[dst]) \n"
542
"add $4, %[dst] \n"
543
// }
544
545
// 4th dword output {
546
"mov %%edx, %%eax \n"
547
"shl $1, %%eax \n"
548
"and $0x1E, %%eax \n"
549
"mov (%[pal],%[a]), %%cx \n"
550
"ror $1, %%cx \n"
551
"shl $16, %%ecx \n"
552
553
"shr $3, %%edx \n"
554
"and $0x1E, %%edx \n"
555
"mov (%[pal],%[d]), %%cx \n"
556
"ror $1, %%cx \n"
557
558
"mov %%ecx, (%[dst]) \n"
559
"add $4, %[dst] \n"
560
// }
561
// *
562
563
"mov %[tempx], %[c] \n"
564
565
"dec %%ecx \n"
566
"jnz 2b \n" // x_loop
567
568
"mov %[tempy], %[c] \n"
569
"dec %%ecx \n"
570
"jz 4f \n" // end_y_loop
571
"mov %[c], %[tempy] \n"
572
573
"add %[line], %[src] \n"
574
"add %[ext], %[dst] \n"
575
576
"mov %[wid_64], %%ecx \n"
577
"3: \n" // x_loop_2
578
"mov %[c], %[tempx] \n"
579
580
"mov 4(%[src]), %%eax \n" // read all 8 pixels
581
"bswap %%eax \n"
582
"mov %%eax, %%edx \n"
583
584
// 1st dword output {
585
"shr $23, %%eax \n"
586
"and $0x1E, %%eax \n"
587
"mov (%[pal],%[a]), %%cx \n"
588
"ror $1, %%cx \n"
589
"shl $16, %%ecx \n"
590
591
"mov %%edx, %%eax \n"
592
"shr $27, %%eax \n"
593
"and $0x1E, %%eax \n"
594
"mov (%[pal],%[a]), %%cx \n"
595
"ror $1, %%cx \n"
596
597
"mov %%ecx, (%[dst]) \n"
598
"add $4, %[dst] \n"
599
// }
600
601
// 2nd dword output {
602
"mov %%edx, %%eax \n"
603
"shr $15, %%eax \n"
604
"and $0x1E, %%eax \n"
605
"mov (%[pal],%[a]), %%cx \n"
606
"ror $1, %%cx \n"
607
"shl $16, %%ecx \n"
608
609
"mov %%edx, %%eax \n"
610
"shr $19, %%eax \n"
611
"and $0x1E, %%eax \n"
612
"mov (%[pal],%[a]), %%cx \n"
613
"ror $1, %%cx \n"
614
615
"mov %%ecx, (%[dst]) \n"
616
"add $4, %[dst] \n"
617
// }
618
619
// 3rd dword output {
620
"mov %%edx, %%eax \n"
621
"shr $7, %%eax \n"
622
"and $0x1E, %%eax \n"
623
"mov (%[pal],%[a]), %%cx \n"
624
"ror $1, %%cx \n"
625
"shl $16, %%ecx \n"
626
627
"mov %%edx, %%eax \n"
628
"shr $11, %%eax \n"
629
"and $0x1E, %%eax \n"
630
"mov (%[pal],%[a]), %%cx \n"
631
"ror $1, %%cx \n"
632
633
"mov %%ecx, (%[dst]) \n"
634
"add $4, %[dst] \n"
635
// }
636
637
// 4th dword output {
638
"mov %%edx, %%eax \n"
639
"shl $1, %%eax \n"
640
"and $0x1E, %%eax \n"
641
"mov (%[pal],%[a]), %%cx \n"
642
"ror $1, %%cx \n"
643
"shl $16, %%ecx \n"
644
645
"shr $3, %%edx \n"
646
"and $0x1E, %%edx \n"
647
"mov (%[pal],%[d]), %%cx \n"
648
"ror $1, %%cx \n"
649
650
"mov %%ecx, (%[dst]) \n"
651
"add $4, %[dst] \n"
652
// }
653
654
// * copy
655
"mov (%[src]), %%eax \n" // read all 8 pixels
656
"bswap %%eax \n"
657
"add $8, %[src] \n"
658
"mov %%eax, %%edx \n"
659
660
// 1st dword output {
661
"shr $23, %%eax \n"
662
"and $0x1E, %%eax \n"
663
"mov (%[pal],%[a]), %%cx \n"
664
"ror $1, %%cx \n"
665
"shl $16, %%ecx \n"
666
667
"mov %%edx, %%eax \n"
668
"shr $27, %%eax \n"
669
"and $0x1E, %%eax \n"
670
"mov (%[pal],%[a]), %%cx \n"
671
"ror $1, %%cx \n"
672
673
"mov %%ecx, (%[dst]) \n"
674
"add $4, %[dst] \n"
675
// }
676
677
// 2nd dword output {
678
"mov %%edx, %%eax \n"
679
"shr $15, %%eax \n"
680
"and $0x1E, %%eax \n"
681
"mov (%[pal],%[a]), %%cx \n"
682
"ror $1, %%cx \n"
683
"shl $16, %%ecx \n"
684
685
"mov %%edx, %%eax \n"
686
"shr $19, %%eax \n"
687
"and $0x1E, %%eax \n"
688
"mov (%[pal],%[a]), %%cx \n"
689
"ror $1, %%cx \n"
690
691
"mov %%ecx, (%[dst]) \n"
692
"add $4, %[dst] \n"
693
// }
694
695
// 3rd dword output {
696
"mov %%edx, %%eax \n"
697
"shr $7, %%eax \n"
698
"and $0x1E, %%eax \n"
699
"mov (%[pal],%[a]), %%cx \n"
700
"ror $1, %%cx \n"
701
"shl $16, %%ecx \n"
702
703
"mov %%edx, %%eax \n"
704
"shr $11, %%eax \n"
705
"and $0x1E, %%eax \n"
706
"mov (%[pal],%[a]), %%cx \n"
707
"ror $1, %%cx \n"
708
709
"mov %%ecx, (%[dst]) \n"
710
"add $4, %[dst] \n"
711
// }
712
713
// 4th dword output {
714
"mov %%edx, %%eax \n"
715
"shl $1, %%eax \n"
716
"and $0x1E, %%eax \n"
717
"mov (%[pal],%[a]), %%cx \n"
718
"ror $1, %%cx \n"
719
"shl $16, %%ecx \n"
720
721
"shr $3, %%edx \n"
722
"and $0x1E, %%edx \n"
723
"mov (%[pal],%[d]), %%cx \n"
724
"ror $1, %%cx \n"
725
726
"mov %%ecx, (%[dst]) \n"
727
"add $4, %[dst] \n"
728
// }
729
// *
730
731
"mov %[tempx], %[c] \n"
732
733
"dec %%ecx \n"
734
"jnz 3b \n" // x_loop_2
735
736
"add %[line], %[src] \n"
737
"add %[ext], %[dst] \n"
738
739
"mov %[tempy], %[c] \n"
740
"dec %%ecx \n"
741
"jnz 1b \n" // y_loop
742
743
"4: \n" // end_y_loop
744
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a"(fake_eax), [d] "=&d"(fake_edx), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight)
745
// pal needs to be in a register because its used in mov (%[pal],...), ...
746
: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
747
: "memory", "cc"
748
);
749
#endif
750
}
751
else
752
{
753
#if !defined(__GNUC__) && !defined(NO_ASM)
754
__asm {
755
mov ebx,dword ptr [pal]
756
757
mov esi,dword ptr [src]
758
mov edi,dword ptr [dst]
759
760
mov ecx,dword ptr [height]
761
ia_y_loop:
762
push ecx
763
764
mov ecx,dword ptr [wid_64]
765
ia_x_loop:
766
push ecx
767
768
mov eax,dword ptr [esi] // read all 8 pixels
769
bswap eax
770
add esi,4
771
mov edx,eax
772
773
// 1st dword output {
774
shr eax,23
775
and eax,0x1E
776
mov cx,word ptr [ebx+eax]
777
ror cx,8
778
shl ecx,16
779
780
mov eax,edx
781
shr eax,27
782
and eax,0x1E
783
mov cx,word ptr [ebx+eax]
784
ror cx,8
785
786
mov dword ptr [edi],ecx
787
add edi,4
788
// }
789
790
// 2nd dword output {
791
mov eax,edx
792
shr eax,15
793
and eax,0x1E
794
mov cx,word ptr [ebx+eax]
795
ror cx,8
796
shl ecx,16
797
798
mov eax,edx
799
shr eax,19
800
and eax,0x1E
801
mov cx,word ptr [ebx+eax]
802
ror cx,8
803
804
mov dword ptr [edi],ecx
805
add edi,4
806
// }
807
808
// 3rd dword output {
809
mov eax,edx
810
shr eax,7
811
and eax,0x1E
812
mov cx,word ptr [ebx+eax]
813
ror cx,8
814
shl ecx,16
815
816
mov eax,edx
817
shr eax,11
818
and eax,0x1E
819
mov cx,word ptr [ebx+eax]
820
ror cx,8
821
822
mov dword ptr [edi],ecx
823
add edi,4
824
// }
825
826
// 4th dword output {
827
mov eax,edx
828
shl eax,1
829
and eax,0x1E
830
mov cx,word ptr [ebx+eax]
831
ror cx,8
832
shl ecx,16
833
834
shr edx,3
835
and edx,0x1E
836
mov cx,word ptr [ebx+edx]
837
ror cx,8
838
839
mov dword ptr [edi],ecx
840
add edi,4
841
// }
842
843
// * copy
844
mov eax,dword ptr [esi] // read all 8 pixels
845
bswap eax
846
add esi,4
847
mov edx,eax
848
849
// 1st dword output {
850
shr eax,23
851
and eax,0x1E
852
mov cx,word ptr [ebx+eax]
853
ror cx,8
854
shl ecx,16
855
856
mov eax,edx
857
shr eax,27
858
and eax,0x1E
859
mov cx,word ptr [ebx+eax]
860
ror cx,8
861
862
mov dword ptr [edi],ecx
863
add edi,4
864
// }
865
866
// 2nd dword output {
867
mov eax,edx
868
shr eax,15
869
and eax,0x1E
870
mov cx,word ptr [ebx+eax]
871
ror cx,8
872
shl ecx,16
873
874
mov eax,edx
875
shr eax,19
876
and eax,0x1E
877
mov cx,word ptr [ebx+eax]
878
ror cx,8
879
880
mov dword ptr [edi],ecx
881
add edi,4
882
// }
883
884
// 3rd dword output {
885
mov eax,edx
886
shr eax,7
887
and eax,0x1E
888
mov cx,word ptr [ebx+eax]
889
ror cx,8
890
shl ecx,16
891
892
mov eax,edx
893
shr eax,11
894
and eax,0x1E
895
mov cx,word ptr [ebx+eax]
896
ror cx,8
897
898
mov dword ptr [edi],ecx
899
add edi,4
900
// }
901
902
// 4th dword output {
903
mov eax,edx
904
shl eax,1
905
and eax,0x1E
906
mov cx,word ptr [ebx+eax]
907
ror cx,8
908
shl ecx,16
909
910
shr edx,3
911
and edx,0x1E
912
mov cx,word ptr [ebx+edx]
913
ror cx,8
914
915
mov dword ptr [edi],ecx
916
add edi,4
917
// }
918
// *
919
920
pop ecx
921
922
dec ecx
923
jnz ia_x_loop
924
925
pop ecx
926
dec ecx
927
jz ia_end_y_loop
928
push ecx
929
930
add esi,dword ptr [line]
931
add edi,dword ptr [ext]
932
933
mov ecx,dword ptr [wid_64]
934
ia_x_loop_2:
935
push ecx
936
937
mov eax,dword ptr [esi+4] // read all 8 pixels
938
bswap eax
939
mov edx,eax
940
941
// 1st dword output {
942
shr eax,23
943
and eax,0x1E
944
mov cx,word ptr [ebx+eax]
945
ror cx,8
946
shl ecx,16
947
948
mov eax,edx
949
shr eax,27
950
and eax,0x1E
951
mov cx,word ptr [ebx+eax]
952
ror cx,8
953
954
mov dword ptr [edi],ecx
955
add edi,4
956
// }
957
958
// 2nd dword output {
959
mov eax,edx
960
shr eax,15
961
and eax,0x1E
962
mov cx,word ptr [ebx+eax]
963
ror cx,8
964
shl ecx,16
965
966
mov eax,edx
967
shr eax,19
968
and eax,0x1E
969
mov cx,word ptr [ebx+eax]
970
ror cx,8
971
972
mov dword ptr [edi],ecx
973
add edi,4
974
// }
975
976
// 3rd dword output {
977
mov eax,edx
978
shr eax,7
979
and eax,0x1E
980
mov cx,word ptr [ebx+eax]
981
ror cx,8
982
shl ecx,16
983
984
mov eax,edx
985
shr eax,11
986
and eax,0x1E
987
mov cx,word ptr [ebx+eax]
988
ror cx,8
989
990
mov dword ptr [edi],ecx
991
add edi,4
992
// }
993
994
// 4th dword output {
995
mov eax,edx
996
shl eax,1
997
and eax,0x1E
998
mov cx,word ptr [ebx+eax]
999
ror cx,8
1000
shl ecx,16
1001
1002
shr edx,3
1003
and edx,0x1E
1004
mov cx,word ptr [ebx+edx]
1005
ror cx,8
1006
1007
mov dword ptr [edi],ecx
1008
add edi,4
1009
// }
1010
1011
// * copy
1012
mov eax,dword ptr [esi] // read all 8 pixels
1013
bswap eax
1014
add esi,8
1015
mov edx,eax
1016
1017
// 1st dword output {
1018
shr eax,23
1019
and eax,0x1E
1020
mov cx,word ptr [ebx+eax]
1021
ror cx,8
1022
shl ecx,16
1023
1024
mov eax,edx
1025
shr eax,27
1026
and eax,0x1E
1027
mov cx,word ptr [ebx+eax]
1028
ror cx,8
1029
1030
mov dword ptr [edi],ecx
1031
add edi,4
1032
// }
1033
1034
// 2nd dword output {
1035
mov eax,edx
1036
shr eax,15
1037
and eax,0x1E
1038
mov cx,word ptr [ebx+eax]
1039
ror cx,8
1040
shl ecx,16
1041
1042
mov eax,edx
1043
shr eax,19
1044
and eax,0x1E
1045
mov cx,word ptr [ebx+eax]
1046
ror cx,8
1047
1048
mov dword ptr [edi],ecx
1049
add edi,4
1050
// }
1051
1052
// 3rd dword output {
1053
mov eax,edx
1054
shr eax,7
1055
and eax,0x1E
1056
mov cx,word ptr [ebx+eax]
1057
ror cx,8
1058
shl ecx,16
1059
1060
mov eax,edx
1061
shr eax,11
1062
and eax,0x1E
1063
mov cx,word ptr [ebx+eax]
1064
ror cx,8
1065
1066
mov dword ptr [edi],ecx
1067
add edi,4
1068
// }
1069
1070
// 4th dword output {
1071
mov eax,edx
1072
shl eax,1
1073
and eax,0x1E
1074
mov cx,word ptr [ebx+eax]
1075
ror cx,8
1076
shl ecx,16
1077
1078
shr edx,3
1079
and edx,0x1E
1080
mov cx,word ptr [ebx+edx]
1081
ror cx,8
1082
1083
mov dword ptr [edi],ecx
1084
add edi,4
1085
// }
1086
// *
1087
1088
pop ecx
1089
1090
dec ecx
1091
jnz ia_x_loop_2
1092
1093
add esi,dword ptr [line]
1094
add edi,dword ptr [ext]
1095
1096
pop ecx
1097
dec ecx
1098
jnz ia_y_loop
1099
1100
ia_end_y_loop:
1101
}
1102
#elif !defined(NO_ASM)
1103
//printf("Load4bCI2\n");
1104
long lTempX, lTempY, lHeight = (long) height;
1105
intptr_t fake_eax, fake_edx;
1106
asm volatile (
1107
"1: \n" // ia_y_loop
1108
"mov %[c], %[tempy] \n"
1109
1110
"mov %[wid_64], %%ecx \n"
1111
"2: \n" // ia_x_loop
1112
"mov %[c], %[tempx] \n"
1113
1114
"mov (%[src]), %%eax \n" // read all 8 pixels
1115
"bswap %%eax \n"
1116
"add $4, %[src] \n"
1117
"mov %%eax, %%edx \n"
1118
1119
// 1st dword output {
1120
"shr $23, %%eax \n"
1121
"and $0x1E, %%eax \n"
1122
"mov (%[pal],%[a]), %%cx \n"
1123
"ror $8, %%cx \n"
1124
"shl $16, %%ecx \n"
1125
1126
"mov %%edx, %%eax \n"
1127
"shr $27, %%eax \n"
1128
"and $0x1E, %%eax \n"
1129
"mov (%[pal],%[a]), %%cx \n"
1130
"ror $8, %%cx \n"
1131
1132
"mov %%ecx, (%[dst]) \n"
1133
"add $4, %[dst] \n"
1134
// }
1135
1136
// 2nd dword output {
1137
"mov %%edx, %%eax \n"
1138
"shr $15, %%eax \n"
1139
"and $0x1E, %%eax \n"
1140
"mov (%[pal],%[a]), %%cx \n"
1141
"ror $8, %%cx \n"
1142
"shl $16, %%ecx \n"
1143
1144
"mov %%edx, %%eax \n"
1145
"shr $19, %%eax \n"
1146
"and $0x1E, %%eax \n"
1147
"mov (%[pal],%[a]), %%cx \n"
1148
"ror $8, %%cx \n"
1149
1150
"mov %%ecx, (%[dst]) \n"
1151
"add $4, %[dst] \n"
1152
// }
1153
1154
// 3rd dword output {
1155
"mov %%edx, %%eax \n"
1156
"shr $7, %%eax \n"
1157
"and $0x1E, %%eax \n"
1158
"mov (%[pal],%[a]), %%cx \n"
1159
"ror $8, %%cx \n"
1160
"shl $16, %%ecx \n"
1161
1162
"mov %%edx, %%eax \n"
1163
"shr $11, %%eax \n"
1164
"and $0x1E, %%eax \n"
1165
"mov (%[pal],%[a]), %%cx \n"
1166
"ror $8, %%cx \n"
1167
1168
"mov %%ecx, (%[dst]) \n"
1169
"add $4, %[dst] \n"
1170
// }
1171
1172
// 4th dword output {
1173
"mov %%edx, %%eax \n"
1174
"shl $1, %%eax \n"
1175
"and $0x1E, %%eax \n"
1176
"mov (%[pal],%[a]), %%cx \n"
1177
"ror $8, %%cx \n"
1178
"shl $16, %%ecx \n"
1179
1180
"shr $3, %%edx \n"
1181
"and $0x1E, %%edx \n"
1182
"mov (%[pal],%[d]), %%cx \n"
1183
"ror $8, %%cx \n"
1184
1185
"mov %%ecx, (%[dst]) \n"
1186
"add $4, %[dst] \n"
1187
// }
1188
1189
// * copy
1190
"mov (%[src]), %%eax \n" // read all 8 pixels
1191
"bswap %%eax \n"
1192
"add $4, %[src] \n"
1193
"mov %%eax, %%edx \n"
1194
1195
// 1st dword output {
1196
"shr $23, %%eax \n"
1197
"and $0x1E, %%eax \n"
1198
"mov (%[pal],%[a]), %%cx \n"
1199
"ror $8, %%cx \n"
1200
"shl $16, %%ecx \n"
1201
1202
"mov %%edx, %%eax \n"
1203
"shr $27, %%eax \n"
1204
"and $0x1E, %%eax \n"
1205
"mov (%[pal],%[a]), %%cx \n"
1206
"ror $8, %%cx \n"
1207
1208
"mov %%ecx, (%[dst]) \n"
1209
"add $4, %[dst] \n"
1210
// }
1211
1212
// 2nd dword output {
1213
"mov %%edx, %%eax \n"
1214
"shr $15, %%eax \n"
1215
"and $0x1E, %%eax \n"
1216
"mov (%[pal],%[a]), %%cx \n"
1217
"ror $8, %%cx \n"
1218
"shl $16, %%ecx \n"
1219
1220
"mov %%edx, %%eax \n"
1221
"shr $19, %%eax \n"
1222
"and $0x1E, %%eax \n"
1223
"mov (%[pal],%[a]), %%cx \n"
1224
"ror $8, %%cx \n"
1225
1226
"mov %%ecx, (%[dst]) \n"
1227
"add $4, %[dst] \n"
1228
// }
1229
1230
// 3rd dword output {
1231
"mov %%edx, %%eax \n"
1232
"shr $7, %%eax \n"
1233
"and $0x1E, %%eax \n"
1234
"mov (%[pal],%[a]), %%cx \n"
1235
"ror $8,%%cx \n"
1236
"shl $16, %%ecx \n"
1237
1238
"mov %%edx, %%eax \n"
1239
"shr $11, %%eax \n"
1240
"and $0x1E, %%eax \n"
1241
"mov (%[pal],%[a]), %%cx \n"
1242
"ror $8, %%cx \n"
1243
1244
"mov %%ecx, (%[dst]) \n"
1245
"add $4, %[dst] \n"
1246
// }
1247
1248
// 4th dword output {
1249
"mov %%edx, %%eax \n"
1250
"shl $1, %%eax \n"
1251
"and $0x1E, %%eax \n"
1252
"mov (%[pal],%[a]), %%cx \n"
1253
"ror $8, %%cx \n"
1254
"shl $16, %%ecx \n"
1255
1256
"shr $3, %%edx \n"
1257
"and $0x1E, %%edx \n"
1258
"mov (%[pal],%[d]), %%cx \n"
1259
"ror $8, %%cx \n"
1260
1261
"mov %%ecx, (%[dst]) \n"
1262
"add $4, %[dst] \n"
1263
// }
1264
// *
1265
1266
"mov %[tempx], %[c] \n"
1267
1268
"dec %%ecx \n"
1269
"jnz 2b \n" // ia_x_loop
1270
1271
"mov %[tempy], %[c] \n"
1272
"dec %%ecx \n"
1273
"jz 4f \n" // ia_end_y_loop
1274
"mov %[c], %[tempy] \n"
1275
1276
"add %[line], %[src] \n"
1277
"add %[ext], %[dst] \n"
1278
1279
"mov %[wid_64], %%ecx \n"
1280
"3: \n" // ia_x_loop_2
1281
"mov %[c], %[tempx] \n"
1282
1283
"mov 4(%[src]), %%eax \n" // read all 8 pixels
1284
"bswap %%eax \n"
1285
"mov %%eax, %%edx \n"
1286
1287
// 1st dword output {
1288
"shr $23, %%eax \n"
1289
"and $0x1E, %%eax \n"
1290
"mov (%[pal],%[a]), %%cx \n"
1291
"ror $8, %%cx \n"
1292
"shl $16, %%ecx \n"
1293
1294
"mov %%edx, %%eax \n"
1295
"shr $27, %%eax \n"
1296
"and $0x1E, %%eax \n"
1297
"mov (%[pal],%[a]), %%cx \n"
1298
"ror $8, %%cx \n"
1299
1300
"mov %%ecx, (%[dst]) \n"
1301
"add $4, %[dst] \n"
1302
// }
1303
1304
// 2nd dword output {
1305
"mov %%edx, %%eax \n"
1306
"shr $15, %%eax \n"
1307
"and $0x1E, %%eax \n"
1308
"mov (%[pal],%[a]), %%cx \n"
1309
"ror $8, %%cx \n"
1310
"shl $16, %%ecx \n"
1311
1312
"mov %%edx, %%eax \n"
1313
"shr $19, %%eax \n"
1314
"and $0x1E, %%eax \n"
1315
"mov (%[pal],%[a]), %%cx \n"
1316
"ror $8, %%cx \n"
1317
1318
"mov %%ecx, (%[dst]) \n"
1319
"add $4, %[dst] \n"
1320
// }
1321
1322
// 3rd dword output {
1323
"mov %%edx, %%eax \n"
1324
"shr $7, %%eax \n"
1325
"and $0x1E, %%eax \n"
1326
"mov (%[pal],%[a]), %%cx \n"
1327
"ror $8, %%cx \n"
1328
"shl $16, %%ecx \n"
1329
1330
"mov %%edx, %%eax \n"
1331
"shr $11, %%eax \n"
1332
"and $0x1E, %%eax \n"
1333
"mov (%[pal],%[a]), %%cx \n"
1334
"ror $8, %%cx \n"
1335
1336
"mov %%ecx, (%[dst]) \n"
1337
"add $4, %[dst] \n"
1338
// }
1339
1340
// 4th dword output {
1341
"mov %%edx, %%eax \n"
1342
"shl $1, %%eax \n"
1343
"and $0x1E, %%eax \n"
1344
"mov (%[pal],%[a]), %%cx \n"
1345
"ror $8, %%cx \n"
1346
"shl $16, %%ecx \n"
1347
1348
"shr $3, %%edx \n"
1349
"and $0x1E, %%edx \n"
1350
"mov (%[pal],%[d]), %%cx \n"
1351
"ror $8, %%cx \n"
1352
1353
"mov %%ecx, (%[dst]) \n"
1354
"add $4, %[dst] \n"
1355
// }
1356
1357
// * copy
1358
"mov (%[src]), %%eax \n" // read all 8 pixels
1359
"bswap %%eax \n"
1360
"add $8, %[src] \n"
1361
"mov %%eax, %%edx \n"
1362
1363
// 1st dword output {
1364
"shr $23, %%eax \n"
1365
"and $0x1E, %%eax \n"
1366
"mov (%[pal],%[a]), %%cx \n"
1367
"ror $8, %%cx \n"
1368
"shl $16, %%ecx \n"
1369
1370
"mov %%edx, %%eax \n"
1371
"shr $27, %%eax \n"
1372
"and $0x1E, %%eax \n"
1373
"mov (%[pal],%[a]), %%cx \n"
1374
"ror $8, %%cx \n"
1375
1376
"mov %%ecx, (%[dst]) \n"
1377
"add $4, %[dst] \n"
1378
// }
1379
1380
// 2nd dword output {
1381
"mov %%edx, %%eax \n"
1382
"shr $15, %%eax \n"
1383
"and $0x1E, %%eax \n"
1384
"mov (%[pal],%[a]), %%cx \n"
1385
"ror $8, %%cx \n"
1386
"shl $16, %%ecx \n"
1387
1388
"mov %%edx, %%eax \n"
1389
"shr $19, %%eax \n"
1390
"and $0x1E, %%eax \n"
1391
"mov (%[pal],%[a]), %%cx \n"
1392
"ror $8, %%cx \n"
1393
1394
"mov %%ecx, (%[dst]) \n"
1395
"add $4, %[dst] \n"
1396
// }
1397
1398
// 3rd dword output {
1399
"mov %%edx, %%eax \n"
1400
"shr $7, %%eax \n"
1401
"and $0x1E, %%eax \n"
1402
"mov (%[pal],%[a]), %%cx \n"
1403
"ror $8, %%cx \n"
1404
"shl $16, %%ecx \n"
1405
1406
"mov %%edx, %%eax \n"
1407
"shr $11, %%eax \n"
1408
"and $0x1E, %%eax \n"
1409
"mov (%[pal],%[a]), %%cx \n"
1410
"ror $8, %%cx \n"
1411
1412
"mov %%ecx, (%[dst]) \n"
1413
"add $4, %[dst] \n"
1414
// }
1415
1416
// 4th dword output {
1417
"mov %%edx, %%eax \n"
1418
"shl $1, %%eax \n"
1419
"and $0x1E, %%eax \n"
1420
"mov (%[pal],%[a]), %%cx \n"
1421
"ror $8, %%cx \n"
1422
"shl $16, %%ecx \n"
1423
1424
"shr $3, %%edx \n"
1425
"and $0x1E, %%edx \n"
1426
"mov (%[pal],%[d]), %%cx \n"
1427
"ror $8, %%cx \n"
1428
1429
"mov %%ecx, (%[dst]) \n"
1430
"add $4, %[dst] \n"
1431
// }
1432
// *
1433
1434
"mov %[tempx], %[c] \n"
1435
1436
"dec %%ecx \n"
1437
"jnz 3b \n" // ia_x_loop_2
1438
1439
"add %[line], %[src] \n"
1440
"add %[ext], %[dst] \n"
1441
1442
"mov %[tempy], %[c] \n"
1443
"dec %%ecx \n"
1444
"jnz 1b \n" // ia_y_loop
1445
1446
"4: \n" // ia_end_y_loop
1447
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a"(fake_eax), [d] "=&d"(fake_edx), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight)
1448
// pal needs to be in a register because its used in mov (%[pal],...), ...
1449
: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1450
: "memory", "cc"
1451
);
1452
#endif
1453
return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
1454
}
1455
1456
return (1 << 16) | GR_TEXFMT_ARGB_1555;
1457
}
1458
1459
//****************************************************************
1460
// Size: 0, Format: 3
1461
//
1462
// ** BY GUGAMAN **
1463
1464
DWORD Load4bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
1465
{
1466
if (rdp.tlut_mode != 0)
1467
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
1468
1469
if (wid_64 < 1) wid_64 = 1;
1470
if (height < 1) height = 1;
1471
int ext = (real_width - (wid_64 << 4));
1472
#if !defined(__GNUC__) && !defined(NO_ASM)
1473
__asm {
1474
mov esi,dword ptr [src]
1475
mov edi,dword ptr [dst]
1476
1477
mov ecx,dword ptr [height]
1478
y_loop:
1479
push ecx
1480
1481
mov ecx,dword ptr [wid_64]
1482
x_loop:
1483
push ecx
1484
1485
mov eax,dword ptr [esi] // read all 8 pixels
1486
bswap eax
1487
add esi,4
1488
mov edx,eax
1489
1490
// 1st dword {
1491
xor ecx,ecx
1492
1493
// pixel #1
1494
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1495
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1496
mov eax,edx
1497
shr eax,24 //Alpha
1498
and eax,0x00000010
1499
or ecx,eax
1500
shl eax,1
1501
or ecx,eax
1502
shl eax,1
1503
or ecx,eax
1504
shl eax,1
1505
or ecx,eax
1506
mov eax,edx
1507
shr eax,28 // Intensity
1508
and eax,0x0000000E
1509
or ecx,eax
1510
shr eax,3
1511
or ecx,eax
1512
1513
// pixel #2
1514
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1515
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1516
mov eax,edx
1517
shr eax,12 //Alpha
1518
and eax,0x00001000
1519
or ecx,eax
1520
shl eax,1
1521
or ecx,eax
1522
shl eax,1
1523
or ecx,eax
1524
shl eax,1
1525
or ecx,eax
1526
mov eax,edx
1527
shr eax,16 // Intensity
1528
and eax,0x00000E00
1529
or ecx,eax
1530
shr eax,3
1531
and eax,0x00000100
1532
or ecx,eax
1533
1534
// pixel #3
1535
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1536
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1537
//Alpha
1538
mov eax,edx
1539
and eax,0x00100000
1540
or ecx,eax
1541
shl eax,1
1542
or ecx,eax
1543
shl eax,1
1544
or ecx,eax
1545
shl eax,1
1546
or ecx,eax
1547
mov eax,edx
1548
shr eax,4 // Intensity
1549
and eax,0x000E0000
1550
or ecx,eax
1551
shr eax,3
1552
and eax,0x00010000
1553
or ecx,eax
1554
1555
// pixel #4
1556
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1557
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1558
mov eax,edx
1559
shl eax,12 //Alpha
1560
and eax,0x10000000
1561
or ecx,eax
1562
shl eax,1
1563
or ecx,eax
1564
shl eax,1
1565
or ecx,eax
1566
shl eax,1
1567
or ecx,eax
1568
mov eax,edx
1569
shl eax,8 // Intensity
1570
and eax,0x0E000000
1571
or ecx,eax
1572
shr eax,3
1573
and eax,0x01000000
1574
or ecx,eax
1575
1576
1577
mov dword ptr [edi],ecx
1578
add edi,4
1579
// }
1580
1581
// 2nd dword {
1582
xor ecx,ecx
1583
1584
// pixel #5
1585
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1586
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1587
mov eax,edx
1588
shr eax,8 //Alpha
1589
and eax,0x00000010
1590
or ecx,eax
1591
shl eax,1
1592
or ecx,eax
1593
shl eax,1
1594
or ecx,eax
1595
shl eax,1
1596
or ecx,eax
1597
mov eax,edx
1598
shr eax,12 // Intensity
1599
and eax,0x0000000E
1600
or ecx,eax
1601
shr eax,3
1602
or ecx,eax
1603
1604
// pixel #6
1605
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1606
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1607
//Alpha
1608
mov eax,edx
1609
shl eax,4
1610
and eax,0x00001000
1611
or ecx,eax
1612
shl eax,1
1613
or ecx,eax
1614
shl eax,1
1615
or ecx,eax
1616
shl eax,1
1617
or ecx,eax
1618
mov eax,edx // Intensity
1619
and eax,0x00000E00
1620
or ecx,eax
1621
shr eax,3
1622
and eax,0x00000100
1623
or ecx,eax
1624
1625
// pixel #7
1626
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
1627
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1628
//Alpha
1629
mov eax,edx
1630
shl eax,16
1631
and eax,0x00100000
1632
or ecx,eax
1633
shl eax,1
1634
or ecx,eax
1635
shl eax,1
1636
or ecx,eax
1637
shl eax,1
1638
or ecx,eax
1639
mov eax,edx
1640
shl eax,12 // Intensity
1641
and eax,0x000E0000
1642
or ecx,eax
1643
shr eax,3
1644
and eax,0x00010000
1645
or ecx,eax
1646
1647
// pixel #8
1648
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
1649
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1650
mov eax,edx
1651
shl eax,28 //Alpha
1652
and eax,0x10000000
1653
or ecx,eax
1654
shl eax,1
1655
or ecx,eax
1656
shl eax,1
1657
or ecx,eax
1658
shl eax,1
1659
or ecx,eax
1660
mov eax,edx
1661
shl eax,24 // Intensity
1662
and eax,0x0E000000
1663
or ecx,eax
1664
shr eax,3
1665
and eax,0x01000000
1666
or ecx,eax
1667
1668
mov dword ptr [edi],ecx
1669
add edi,4
1670
// }
1671
1672
// * copy
1673
mov eax,dword ptr [esi] // read all 8 pixels
1674
bswap eax
1675
add esi,4
1676
mov edx,eax
1677
1678
// 1st dword {
1679
xor ecx,ecx
1680
1681
// pixel #1
1682
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1683
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1684
mov eax,edx
1685
shr eax,24 //Alpha
1686
and eax,0x00000010
1687
or ecx,eax
1688
shl eax,1
1689
or ecx,eax
1690
shl eax,1
1691
or ecx,eax
1692
shl eax,1
1693
or ecx,eax
1694
mov eax,edx
1695
shr eax,28 // Intensity
1696
and eax,0x0000000E
1697
or ecx,eax
1698
shr eax,3
1699
or ecx,eax
1700
1701
// pixel #2
1702
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1703
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1704
mov eax,edx
1705
shr eax,12 //Alpha
1706
and eax,0x00001000
1707
or ecx,eax
1708
shl eax,1
1709
or ecx,eax
1710
shl eax,1
1711
or ecx,eax
1712
shl eax,1
1713
or ecx,eax
1714
mov eax,edx
1715
shr eax,16 // Intensity
1716
and eax,0x00000E00
1717
or ecx,eax
1718
shr eax,3
1719
and eax,0x00000100
1720
or ecx,eax
1721
1722
// pixel #3
1723
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1724
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1725
//Alpha
1726
mov eax,edx
1727
and eax,0x00100000
1728
or ecx,eax
1729
shl eax,1
1730
or ecx,eax
1731
shl eax,1
1732
or ecx,eax
1733
shl eax,1
1734
or ecx,eax
1735
mov eax,edx
1736
shr eax,4 // Intensity
1737
and eax,0x000E0000
1738
or ecx,eax
1739
shr eax,3
1740
and eax,0x00010000
1741
or ecx,eax
1742
1743
// pixel #4
1744
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1745
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1746
mov eax,edx
1747
shl eax,12 //Alpha
1748
and eax,0x10000000
1749
or ecx,eax
1750
shl eax,1
1751
or ecx,eax
1752
shl eax,1
1753
or ecx,eax
1754
shl eax,1
1755
or ecx,eax
1756
mov eax,edx
1757
shl eax,8 // Intensity
1758
and eax,0x0E000000
1759
or ecx,eax
1760
shr eax,3
1761
and eax,0x01000000
1762
or ecx,eax
1763
1764
1765
mov dword ptr [edi],ecx
1766
add edi,4
1767
// }
1768
1769
// 2nd dword {
1770
xor ecx,ecx
1771
1772
// pixel #5
1773
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1774
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1775
mov eax,edx
1776
shr eax,8 //Alpha
1777
and eax,0x00000010
1778
or ecx,eax
1779
shl eax,1
1780
or ecx,eax
1781
shl eax,1
1782
or ecx,eax
1783
shl eax,1
1784
or ecx,eax
1785
mov eax,edx
1786
shr eax,12 // Intensity
1787
and eax,0x0000000E
1788
or ecx,eax
1789
shr eax,3
1790
or ecx,eax
1791
1792
// pixel #6
1793
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1794
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1795
//Alpha
1796
mov eax,edx
1797
shl eax,4
1798
and eax,0x00001000
1799
or ecx,eax
1800
shl eax,1
1801
or ecx,eax
1802
shl eax,1
1803
or ecx,eax
1804
shl eax,1
1805
or ecx,eax
1806
mov eax,edx // Intensity
1807
and eax,0x00000E00
1808
or ecx,eax
1809
shr eax,3
1810
and eax,0x00000100
1811
or ecx,eax
1812
1813
// pixel #7
1814
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
1815
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1816
//Alpha
1817
mov eax,edx
1818
shl eax,16
1819
and eax,0x00100000
1820
or ecx,eax
1821
shl eax,1
1822
or ecx,eax
1823
shl eax,1
1824
or ecx,eax
1825
shl eax,1
1826
or ecx,eax
1827
mov eax,edx
1828
shl eax,12 // Intensity
1829
and eax,0x000E0000
1830
or ecx,eax
1831
shr eax,3
1832
and eax,0x00010000
1833
or ecx,eax
1834
1835
// pixel #8
1836
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
1837
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1838
mov eax,edx
1839
shl eax,28 //Alpha
1840
and eax,0x10000000
1841
or ecx,eax
1842
shl eax,1
1843
or ecx,eax
1844
shl eax,1
1845
or ecx,eax
1846
shl eax,1
1847
or ecx,eax
1848
mov eax,edx
1849
shl eax,24 // Intensity
1850
and eax,0x0E000000
1851
or ecx,eax
1852
shr eax,3
1853
and eax,0x01000000
1854
or ecx,eax
1855
1856
mov dword ptr [edi],ecx
1857
add edi,4
1858
// }
1859
1860
// *
1861
1862
pop ecx
1863
dec ecx
1864
jnz x_loop
1865
1866
pop ecx
1867
dec ecx
1868
jz end_y_loop
1869
push ecx
1870
1871
add esi,dword ptr [line]
1872
add edi,dword ptr [ext]
1873
1874
mov ecx,dword ptr [wid_64]
1875
x_loop_2:
1876
push ecx
1877
1878
mov eax,dword ptr [esi+4] // read all 8 pixels
1879
bswap eax
1880
mov edx,eax
1881
1882
// 1st dword {
1883
xor ecx,ecx
1884
1885
// pixel #1
1886
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
1887
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1888
mov eax,edx
1889
shr eax,24 //Alpha
1890
and eax,0x00000010
1891
or ecx,eax
1892
shl eax,1
1893
or ecx,eax
1894
shl eax,1
1895
or ecx,eax
1896
shl eax,1
1897
or ecx,eax
1898
mov eax,edx
1899
shr eax,28 // Intensity
1900
and eax,0x0000000E
1901
or ecx,eax
1902
shr eax,3
1903
or ecx,eax
1904
1905
// pixel #2
1906
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
1907
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1908
mov eax,edx
1909
shr eax,12 //Alpha
1910
and eax,0x00001000
1911
or ecx,eax
1912
shl eax,1
1913
or ecx,eax
1914
shl eax,1
1915
or ecx,eax
1916
shl eax,1
1917
or ecx,eax
1918
mov eax,edx
1919
shr eax,16 // Intensity
1920
and eax,0x00000E00
1921
or ecx,eax
1922
shr eax,3
1923
and eax,0x00000100
1924
or ecx,eax
1925
1926
// pixel #3
1927
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
1928
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
1929
//Alpha
1930
mov eax,edx
1931
and eax,0x00100000
1932
or ecx,eax
1933
shl eax,1
1934
or ecx,eax
1935
shl eax,1
1936
or ecx,eax
1937
shl eax,1
1938
or ecx,eax
1939
mov eax,edx
1940
shr eax,4 // Intensity
1941
and eax,0x000E0000
1942
or ecx,eax
1943
shr eax,3
1944
and eax,0x00010000
1945
or ecx,eax
1946
1947
// pixel #4
1948
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
1949
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
1950
mov eax,edx
1951
shl eax,12 //Alpha
1952
and eax,0x10000000
1953
or ecx,eax
1954
shl eax,1
1955
or ecx,eax
1956
shl eax,1
1957
or ecx,eax
1958
shl eax,1
1959
or ecx,eax
1960
mov eax,edx
1961
shl eax,8 // Intensity
1962
and eax,0x0E000000
1963
or ecx,eax
1964
shr eax,3
1965
and eax,0x01000000
1966
or ecx,eax
1967
1968
1969
mov dword ptr [edi],ecx
1970
add edi,4
1971
// }
1972
1973
// 2nd dword {
1974
xor ecx,ecx
1975
1976
// pixel #5
1977
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
1978
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
1979
mov eax,edx
1980
shr eax,8 //Alpha
1981
and eax,0x00000010
1982
or ecx,eax
1983
shl eax,1
1984
or ecx,eax
1985
shl eax,1
1986
or ecx,eax
1987
shl eax,1
1988
or ecx,eax
1989
mov eax,edx
1990
shr eax,12 // Intensity
1991
and eax,0x0000000E
1992
or ecx,eax
1993
shr eax,3
1994
or ecx,eax
1995
1996
// pixel #6
1997
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
1998
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
1999
//Alpha
2000
mov eax,edx
2001
shl eax,4
2002
and eax,0x00001000
2003
or ecx,eax
2004
shl eax,1
2005
or ecx,eax
2006
shl eax,1
2007
or ecx,eax
2008
shl eax,1
2009
or ecx,eax
2010
mov eax,edx // Intensity
2011
and eax,0x00000E00
2012
or ecx,eax
2013
shr eax,3
2014
and eax,0x00000100
2015
or ecx,eax
2016
2017
// pixel #7
2018
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2019
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2020
//Alpha
2021
mov eax,edx
2022
shl eax,16
2023
and eax,0x00100000
2024
or ecx,eax
2025
shl eax,1
2026
or ecx,eax
2027
shl eax,1
2028
or ecx,eax
2029
shl eax,1
2030
or ecx,eax
2031
mov eax,edx
2032
shl eax,12 // Intensity
2033
and eax,0x000E0000
2034
or ecx,eax
2035
shr eax,3
2036
and eax,0x00010000
2037
or ecx,eax
2038
2039
// pixel #8
2040
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2041
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2042
mov eax,edx
2043
shl eax,28 //Alpha
2044
and eax,0x10000000
2045
or ecx,eax
2046
shl eax,1
2047
or ecx,eax
2048
shl eax,1
2049
or ecx,eax
2050
shl eax,1
2051
or ecx,eax
2052
mov eax,edx
2053
shl eax,24 // Intensity
2054
and eax,0x0E000000
2055
or ecx,eax
2056
shr eax,3
2057
and eax,0x01000000
2058
or ecx,eax
2059
2060
mov dword ptr [edi],ecx
2061
add edi,4
2062
// }
2063
2064
// * copy
2065
mov eax,dword ptr [esi] // read all 8 pixels
2066
bswap eax
2067
add esi,8
2068
mov edx,eax
2069
2070
// 1st dword {
2071
xor ecx,ecx
2072
2073
// pixel #1
2074
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2075
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2076
mov eax,edx
2077
shr eax,24 //Alpha
2078
and eax,0x00000010
2079
or ecx,eax
2080
shl eax,1
2081
or ecx,eax
2082
shl eax,1
2083
or ecx,eax
2084
shl eax,1
2085
or ecx,eax
2086
mov eax,edx
2087
shr eax,28 // Intensity
2088
and eax,0x0000000E
2089
or ecx,eax
2090
shr eax,3
2091
or ecx,eax
2092
2093
// pixel #2
2094
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2095
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2096
mov eax,edx
2097
shr eax,12 //Alpha
2098
and eax,0x00001000
2099
or ecx,eax
2100
shl eax,1
2101
or ecx,eax
2102
shl eax,1
2103
or ecx,eax
2104
shl eax,1
2105
or ecx,eax
2106
mov eax,edx
2107
shr eax,16 // Intensity
2108
and eax,0x00000E00
2109
or ecx,eax
2110
shr eax,3
2111
and eax,0x00000100
2112
or ecx,eax
2113
2114
// pixel #3
2115
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2116
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2117
//Alpha
2118
mov eax,edx
2119
and eax,0x00100000
2120
or ecx,eax
2121
shl eax,1
2122
or ecx,eax
2123
shl eax,1
2124
or ecx,eax
2125
shl eax,1
2126
or ecx,eax
2127
mov eax,edx
2128
shr eax,4 // Intensity
2129
and eax,0x000E0000
2130
or ecx,eax
2131
shr eax,3
2132
and eax,0x00010000
2133
or ecx,eax
2134
2135
// pixel #4
2136
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2137
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2138
mov eax,edx
2139
shl eax,12 //Alpha
2140
and eax,0x10000000
2141
or ecx,eax
2142
shl eax,1
2143
or ecx,eax
2144
shl eax,1
2145
or ecx,eax
2146
shl eax,1
2147
or ecx,eax
2148
mov eax,edx
2149
shl eax,8 // Intensity
2150
and eax,0x0E000000
2151
or ecx,eax
2152
shr eax,3
2153
and eax,0x01000000
2154
or ecx,eax
2155
2156
2157
mov dword ptr [edi],ecx
2158
add edi,4
2159
// }
2160
2161
// 2nd dword {
2162
xor ecx,ecx
2163
2164
// pixel #5
2165
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2166
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2167
mov eax,edx
2168
shr eax,8 //Alpha
2169
and eax,0x00000010
2170
or ecx,eax
2171
shl eax,1
2172
or ecx,eax
2173
shl eax,1
2174
or ecx,eax
2175
shl eax,1
2176
or ecx,eax
2177
mov eax,edx
2178
shr eax,12 // Intensity
2179
and eax,0x0000000E
2180
or ecx,eax
2181
shr eax,3
2182
or ecx,eax
2183
2184
// pixel #6
2185
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2186
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2187
//Alpha
2188
mov eax,edx
2189
shl eax,4
2190
and eax,0x00001000
2191
or ecx,eax
2192
shl eax,1
2193
or ecx,eax
2194
shl eax,1
2195
or ecx,eax
2196
shl eax,1
2197
or ecx,eax
2198
mov eax,edx // Intensity
2199
and eax,0x00000E00
2200
or ecx,eax
2201
shr eax,3
2202
and eax,0x00000100
2203
or ecx,eax
2204
2205
// pixel #7
2206
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2207
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2208
//Alpha
2209
mov eax,edx
2210
shl eax,16
2211
and eax,0x00100000
2212
or ecx,eax
2213
shl eax,1
2214
or ecx,eax
2215
shl eax,1
2216
or ecx,eax
2217
shl eax,1
2218
or ecx,eax
2219
mov eax,edx
2220
shl eax,12 // Intensity
2221
and eax,0x000E0000
2222
or ecx,eax
2223
shr eax,3
2224
and eax,0x00010000
2225
or ecx,eax
2226
2227
// pixel #8
2228
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2229
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2230
mov eax,edx
2231
shl eax,28 //Alpha
2232
and eax,0x10000000
2233
or ecx,eax
2234
shl eax,1
2235
or ecx,eax
2236
shl eax,1
2237
or ecx,eax
2238
shl eax,1
2239
or ecx,eax
2240
mov eax,edx
2241
shl eax,24 // Intensity
2242
and eax,0x0E000000
2243
or ecx,eax
2244
shr eax,3
2245
and eax,0x01000000
2246
or ecx,eax
2247
2248
mov dword ptr [edi],ecx
2249
add edi,4
2250
// }
2251
// *
2252
2253
pop ecx
2254
dec ecx
2255
jnz x_loop_2
2256
2257
add esi,dword ptr [line]
2258
add edi,dword ptr [ext]
2259
2260
pop ecx
2261
dec ecx
2262
jnz y_loop
2263
2264
end_y_loop:
2265
}
2266
#elif !defined(NO_ASM)
2267
//printf("Load4bIA\n");
2268
long lTempX, lTempY, lHeight = (long) height;
2269
asm volatile (
2270
"1: \n" // y_loop2
2271
"mov %[c], %[tempy] \n"
2272
2273
"mov %[wid_64], %%ecx \n"
2274
"2: \n" // x_loop2
2275
"mov %[c], %[tempx] \n"
2276
2277
"mov (%[src]), %%eax \n" // read all 8 pixels
2278
"bswap %%eax \n"
2279
"add $4, %[src] \n"
2280
"mov %%eax, %%edx \n"
2281
2282
// 1st dword {
2283
"xor %%ecx, %%ecx \n"
2284
2285
// pixel #1
2286
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2287
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2288
"mov %%edx, %%eax \n"
2289
"shr $24, %%eax \n" //Alpha
2290
"and $0x00000010, %%eax \n"
2291
"or %%eax, %%ecx \n"
2292
"shl $1, %%eax \n"
2293
"or %%eax, %%ecx \n"
2294
"shl $1, %%eax \n"
2295
"or %%eax, %%ecx \n"
2296
"shl $1, %%eax \n"
2297
"or %%eax, %%ecx \n"
2298
"mov %%edx, %%eax \n"
2299
"shr $28, %%eax \n" // Intensity
2300
"and $0x0000000E, %%eax \n"
2301
"or %%eax, %%ecx \n"
2302
"shr $3, %%eax \n"
2303
"or %%eax, %%ecx \n"
2304
2305
// pixel #2
2306
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2307
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2308
"mov %%edx, %%eax \n"
2309
"shr $12, %%eax \n" //Alpha
2310
"and $0x00001000, %%eax \n"
2311
"or %%eax, %%ecx \n"
2312
"shl $1, %%eax \n"
2313
"or %%eax, %%ecx \n"
2314
"shl $1, %%eax \n"
2315
"or %%eax, %%ecx \n"
2316
"shl $1, %%eax \n"
2317
"or %%eax, %%ecx \n"
2318
"mov %%edx, %%eax \n"
2319
"shr $16, %%eax \n" // Intensity
2320
"and $0x00000E00, %%eax \n"
2321
"or %%eax, %%ecx \n"
2322
"shr $3, %%eax \n"
2323
"and $0x00000100, %%eax \n"
2324
"or %%eax, %%ecx \n"
2325
2326
// pixel #3
2327
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2328
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2329
//Alpha
2330
"mov %%edx, %%eax \n"
2331
"and $0x00100000, %%eax \n"
2332
"or %%eax, %%ecx \n"
2333
"shl $1, %%eax \n"
2334
"or %%eax, %%ecx \n"
2335
"shl $1, %%eax \n"
2336
"or %%eax, %%ecx \n"
2337
"shl $1, %%eax \n"
2338
"or %%eax, %%ecx \n"
2339
"mov %%edx, %%eax \n"
2340
"shr $4, %%eax \n" // Intensity
2341
"and $0x000E0000, %%eax \n"
2342
"or %%eax, %%ecx \n"
2343
"shr $3, %%eax \n"
2344
"and $0x00010000, %%eax \n"
2345
"or %%eax, %%ecx \n"
2346
2347
// pixel #4
2348
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2349
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2350
"mov %%edx, %%eax \n"
2351
"shl $12, %%eax \n" //Alpha
2352
"and $0x10000000, %%eax \n"
2353
"or %%eax, %%ecx \n"
2354
"shl $1, %%eax \n"
2355
"or %%eax, %%ecx \n"
2356
"shl $1, %%eax \n"
2357
"or %%eax, %%ecx \n"
2358
"shl $1, %%eax \n"
2359
"or %%eax, %%ecx \n"
2360
"mov %%edx, %%eax \n"
2361
"shl $8, %%eax \n" // Intensity
2362
"and $0x0E000000, %%eax \n"
2363
"or %%eax, %%ecx \n"
2364
"shr $3, %%eax \n"
2365
"and $0x01000000, %%eax \n"
2366
"or %%eax, %%ecx \n"
2367
2368
2369
"mov %%ecx, (%[dst]) \n"
2370
"add $4, %[dst] \n"
2371
// }
2372
2373
// 2nd dword {
2374
"xor %%ecx, %%ecx \n"
2375
2376
// pixel #5
2377
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2378
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2379
"mov %%edx, %%eax \n"
2380
"shr $8, %%eax \n" //Alpha
2381
"and $0x00000010, %%eax \n"
2382
"or %%eax, %%ecx \n"
2383
"shl $1, %%eax \n"
2384
"or %%eax, %%ecx \n"
2385
"shl $1, %%eax \n"
2386
"or %%eax, %%ecx \n"
2387
"shl $1, %%eax \n"
2388
"or %%eax, %%ecx \n"
2389
"mov %%edx, %%eax \n"
2390
"shr $12, %%eax \n" // Intensity
2391
"and $0x0000000E, %%eax \n"
2392
"or %%eax, %%ecx \n"
2393
"shr $3, %%eax \n"
2394
"or %%eax, %%ecx \n"
2395
2396
// pixel #6
2397
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2398
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2399
//Alpha
2400
"mov %%edx, %%eax \n"
2401
"shl $4, %%eax \n"
2402
"and $0x00001000, %%eax \n"
2403
"or %%eax, %%ecx \n"
2404
"shl $1, %%eax \n"
2405
"or %%eax, %%ecx \n"
2406
"shl $1, %%eax \n"
2407
"or %%eax, %%ecx \n"
2408
"shl $1, %%eax \n"
2409
"or %%eax, %%ecx \n"
2410
"mov %%edx, %%eax \n" // Intensity
2411
"and $0x00000E00, %%eax \n"
2412
"or %%eax, %%ecx \n"
2413
"shr $3, %%eax \n"
2414
"and $0x00000100, %%eax \n"
2415
"or %%eax, %%ecx \n"
2416
2417
// pixel #7
2418
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2419
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2420
//Alpha
2421
"mov %%edx, %%eax \n"
2422
"shl $16, %%eax \n"
2423
"and $0x00100000, %%eax \n"
2424
"or %%eax, %%ecx \n"
2425
"shl $1, %%eax \n"
2426
"or %%eax, %%ecx \n"
2427
"shl $1, %%eax \n"
2428
"or %%eax, %%ecx \n"
2429
"shl $1, %%eax \n"
2430
"or %%eax, %%ecx \n"
2431
"mov %%edx, %%eax \n"
2432
"shl $12, %%eax \n" // Intensity
2433
"and $0x000E0000, %%eax \n"
2434
"or %%eax, %%ecx \n"
2435
"shr $3, %%eax \n"
2436
"and $0x00010000, %%eax \n"
2437
"or %%eax, %%ecx \n"
2438
2439
// pixel #8
2440
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2441
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2442
"mov %%edx, %%eax \n"
2443
"shl $28, %%eax \n" //Alpha
2444
"and $0x10000000, %%eax \n"
2445
"or %%eax, %%ecx \n"
2446
"shl $1, %%eax \n"
2447
"or %%eax, %%ecx \n"
2448
"shl $1, %%eax \n"
2449
"or %%eax, %%ecx \n"
2450
"shl $1, %%eax \n"
2451
"or %%eax, %%ecx \n"
2452
"mov %%edx, %%eax \n"
2453
"shl $24, %%eax \n" // Intensity
2454
"and $0x0E000000, %%eax \n"
2455
"or %%eax, %%ecx \n"
2456
"shr $3, %%eax \n"
2457
"and $0x01000000, %%eax \n"
2458
"or %%eax, %%ecx \n"
2459
2460
"mov %%ecx, (%[dst]) \n"
2461
"add $4, %[dst] \n"
2462
// }
2463
2464
// * copy
2465
"mov (%[src]), %%eax \n" // read all 8 pixels
2466
"bswap %%eax \n"
2467
"add $4, %[src] \n"
2468
"mov %%eax, %%edx \n"
2469
2470
// 1st dword {
2471
"xor %%ecx, %%ecx \n"
2472
2473
// pixel #1
2474
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2475
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2476
"mov %%edx, %%eax \n"
2477
"shr $24, %%eax \n" //Alpha
2478
"and $0x00000010, %%eax \n"
2479
"or %%eax, %%ecx \n"
2480
"shl $1, %%eax \n"
2481
"or %%eax, %%ecx \n"
2482
"shl $1, %%eax \n"
2483
"or %%eax, %%ecx \n"
2484
"shl $1, %%eax \n"
2485
"or %%eax, %%ecx \n"
2486
"mov %%edx, %%eax \n"
2487
"shr $28, %%eax \n" // Intensity
2488
"and $0x0000000E, %%eax \n"
2489
"or %%eax, %%ecx \n"
2490
"shr $3, %%eax \n"
2491
"or %%eax, %%ecx \n"
2492
2493
// pixel #2
2494
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2495
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2496
"mov %%edx, %%eax \n"
2497
"shr $12, %%eax \n" //Alpha
2498
"and $0x00001000, %%eax \n"
2499
"or %%eax, %%ecx \n"
2500
"shl $1, %%eax \n"
2501
"or %%eax, %%ecx \n"
2502
"shl $1, %%eax \n"
2503
"or %%eax, %%ecx \n"
2504
"shl $1, %%eax \n"
2505
"or %%eax, %%ecx \n"
2506
"mov %%edx, %%eax \n"
2507
"shr $16, %%eax \n" // Intensity
2508
"and $0x00000E00, %%eax \n"
2509
"or %%eax, %%ecx \n"
2510
"shr $3, %%eax \n"
2511
"and $0x00000100, %%eax \n"
2512
"or %%eax, %%ecx \n"
2513
2514
// pixel #3
2515
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2516
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2517
//Alpha
2518
"mov %%edx, %%eax \n"
2519
"and $0x00100000, %%eax \n"
2520
"or %%eax, %%ecx \n"
2521
"shl $1, %%eax \n"
2522
"or %%eax, %%ecx \n"
2523
"shl $1, %%eax \n"
2524
"or %%eax, %%ecx \n"
2525
"shl $1, %%eax \n"
2526
"or %%eax, %%ecx \n"
2527
"mov %%edx, %%eax \n"
2528
"shr $4, %%eax \n" // Intensity
2529
"and $0x000E0000, %%eax \n"
2530
"or %%eax, %%ecx \n"
2531
"shr $3, %%eax \n"
2532
"and $0x00010000, %%eax \n"
2533
"or %%eax, %%ecx \n"
2534
2535
// pixel #4
2536
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2537
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2538
"mov %%edx, %%eax \n"
2539
"shl $12, %%eax \n" //Alpha
2540
"and $0x10000000, %%eax \n"
2541
"or %%eax, %%ecx \n"
2542
"shl $1, %%eax \n"
2543
"or %%eax, %%ecx \n"
2544
"shl $1, %%eax \n"
2545
"or %%eax, %%ecx \n"
2546
"shl $1, %%eax \n"
2547
"or %%eax, %%ecx \n"
2548
"mov %%edx, %%eax \n"
2549
"shl $8, %%eax \n" // Intensity
2550
"and $0x0E000000, %%eax \n"
2551
"or %%eax, %%ecx \n"
2552
"shr $3, %%eax \n"
2553
"and $0x01000000, %%eax \n"
2554
"or %%eax, %%ecx \n"
2555
2556
2557
"mov %%ecx, (%[dst]) \n"
2558
"add $4, %[dst] \n"
2559
// }
2560
2561
// 2nd dword {
2562
"xor %%ecx, %%ecx \n"
2563
2564
// pixel #5
2565
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2566
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2567
"mov %%edx, %%eax \n"
2568
"shr $8, %%eax \n" //Alpha
2569
"and $0x00000010, %%eax \n"
2570
"or %%eax, %%ecx \n"
2571
"shl $1, %%eax \n"
2572
"or %%eax, %%ecx \n"
2573
"shl $1, %%eax \n"
2574
"or %%eax, %%ecx \n"
2575
"shl $1, %%eax \n"
2576
"or %%eax, %%ecx \n"
2577
"mov %%edx, %%eax \n"
2578
"shr $12, %%eax \n" // Intensity
2579
"and $0x0000000E, %%eax \n"
2580
"or %%eax, %%ecx \n"
2581
"shr $3, %%eax \n"
2582
"or %%eax, %%ecx \n"
2583
2584
// pixel #6
2585
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2586
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2587
//Alpha
2588
"mov %%edx, %%eax \n"
2589
"shl $4, %%eax \n"
2590
"and $0x00001000, %%eax \n"
2591
"or %%eax, %%ecx \n"
2592
"shl $1, %%eax \n"
2593
"or %%eax, %%ecx \n"
2594
"shl $1, %%eax \n"
2595
"or %%eax, %%ecx \n"
2596
"shl $1, %%eax \n"
2597
"or %%eax, %%ecx \n"
2598
"mov %%edx, %%eax \n" // Intensity
2599
"and $0x00000E00, %%eax \n"
2600
"or %%eax, %%ecx \n"
2601
"shr $3, %%eax \n"
2602
"and $0x00000100, %%eax \n"
2603
"or %%eax, %%ecx \n"
2604
2605
// pixel #7
2606
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2607
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2608
//Alpha
2609
"mov %%edx, %%eax \n"
2610
"shl $16, %%eax \n"
2611
"and $0x00100000, %%eax \n"
2612
"or %%eax, %%ecx \n"
2613
"shl $1, %%eax \n"
2614
"or %%eax, %%ecx \n"
2615
"shl $1, %%eax \n"
2616
"or %%eax, %%ecx \n"
2617
"shl $1, %%eax \n"
2618
"or %%eax, %%ecx \n"
2619
"mov %%edx, %%eax \n"
2620
"shl $12, %%eax \n" // Intensity
2621
"and $0x000E0000, %%eax \n"
2622
"or %%eax, %%ecx \n"
2623
"shr $3, %%eax \n"
2624
"and $0x00010000, %%eax \n"
2625
"or %%eax, %%ecx \n"
2626
2627
// pixel #8
2628
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2629
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2630
"mov %%edx, %%eax \n"
2631
"shl $28, %%eax \n" //Alpha
2632
"and $0x10000000, %%eax \n"
2633
"or %%eax, %%ecx \n"
2634
"shl $1, %%eax \n"
2635
"or %%eax, %%ecx \n"
2636
"shl $1, %%eax \n"
2637
"or %%eax, %%ecx \n"
2638
"shl $1, %%eax \n"
2639
"or %%eax, %%ecx \n"
2640
"mov %%edx, %%eax \n"
2641
"shl $24, %%eax \n" // Intensity
2642
"and $0x0E000000, %%eax \n"
2643
"or %%eax, %%ecx \n"
2644
"shr $3, %%eax \n"
2645
"and $0x01000000, %%eax \n"
2646
"or %%eax, %%ecx \n"
2647
2648
"mov %%ecx, (%[dst]) \n"
2649
"add $4, %[dst] \n"
2650
// }
2651
2652
// *
2653
2654
"mov %[tempx], %[c] \n"
2655
"dec %%ecx \n"
2656
"jnz 2b \n" // x_loop2
2657
2658
"mov %[tempy], %[c] \n"
2659
"dec %%ecx \n"
2660
"jz 4f \n" // end_y_loop2
2661
"mov %[c], %[tempy] \n"
2662
2663
"add %[line], %[src] \n"
2664
"add %[ext], %[dst] \n"
2665
2666
"mov %[wid_64], %%ecx \n"
2667
"3: \n" // x_loop_22
2668
"mov %[c], %[tempx] \n"
2669
2670
"mov 4(%[src]), %%eax \n" // read all 8 pixels
2671
"bswap %%eax \n"
2672
"mov %%eax, %%edx \n"
2673
2674
// 1st dword {
2675
"xor %%ecx, %%ecx \n"
2676
2677
// pixel #1
2678
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2679
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2680
"mov %%edx, %%eax \n"
2681
"shr $24, %%eax \n" //Alpha
2682
"and $0x00000010, %%eax \n"
2683
"or %%eax, %%ecx \n"
2684
"shl $1, %%eax \n"
2685
"or %%eax, %%ecx \n"
2686
"shl $1, %%eax \n"
2687
"or %%eax, %%ecx \n"
2688
"shl $1, %%eax \n"
2689
"or %%eax, %%ecx \n"
2690
"mov %%edx, %%eax \n"
2691
"shr $28, %%eax \n" // Intensity
2692
"and $0x0000000E, %%eax \n"
2693
"or %%eax, %%ecx \n"
2694
"shr $3, %%eax \n"
2695
"or %%eax, %%ecx \n"
2696
2697
// pixel #2
2698
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2699
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2700
"mov %%edx, %%eax \n"
2701
"shr $12, %%eax \n" //Alpha
2702
"and $0x00001000, %%eax \n"
2703
"or %%eax, %%ecx \n"
2704
"shl $1, %%eax \n"
2705
"or %%eax, %%ecx \n"
2706
"shl $1, %%eax \n"
2707
"or %%eax, %%ecx \n"
2708
"shl $1, %%eax \n"
2709
"or %%eax, %%ecx \n"
2710
"mov %%edx, %%eax \n"
2711
"shr $16, %%eax \n" // Intensity
2712
"and $0x00000E00, %%eax \n"
2713
"or %%eax, %%ecx \n"
2714
"shr $3, %%eax \n"
2715
"and $0x00000100, %%eax \n"
2716
"or %%eax, %%ecx \n"
2717
2718
// pixel #3
2719
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2720
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2721
//Alpha
2722
"mov %%edx, %%eax \n"
2723
"and $0x00100000, %%eax \n"
2724
"or %%eax, %%ecx \n"
2725
"shl $1, %%eax \n"
2726
"or %%eax, %%ecx \n"
2727
"shl $1, %%eax \n"
2728
"or %%eax, %%ecx \n"
2729
"shl $1, %%eax \n"
2730
"or %%eax, %%ecx \n"
2731
"mov %%edx, %%eax \n"
2732
"shr $4, %%eax \n" // Intensity
2733
"and $0x000E0000, %%eax \n"
2734
"or %%eax, %%ecx \n"
2735
"shr $3, %%eax \n"
2736
"and $0x00010000, %%eax \n"
2737
"or %%eax, %%ecx \n"
2738
2739
// pixel #4
2740
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2741
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2742
"mov %%edx, %%eax \n"
2743
"shl $12, %%eax \n" //Alpha
2744
"and $0x10000000, %%eax \n"
2745
"or %%eax, %%ecx \n"
2746
"shl $1, %%eax \n"
2747
"or %%eax, %%ecx \n"
2748
"shl $1, %%eax \n"
2749
"or %%eax, %%ecx \n"
2750
"shl $1, %%eax \n"
2751
"or %%eax, %%ecx \n"
2752
"mov %%edx, %%eax \n"
2753
"shl $8, %%eax \n" // Intensity
2754
"and $0x0E000000, %%eax \n"
2755
"or %%eax, %%ecx \n"
2756
"shr $3, %%eax \n"
2757
"and $0x01000000, %%eax \n"
2758
"or %%eax, %%ecx \n"
2759
2760
2761
"mov %%ecx, (%[dst]) \n"
2762
"add $4, %[dst] \n"
2763
// }
2764
2765
// 2nd dword {
2766
"xor %%ecx, %%ecx \n"
2767
2768
// pixel #5
2769
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2770
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2771
"mov %%edx, %%eax \n"
2772
"shr $8, %%eax \n" //Alpha
2773
"and $0x00000010, %%eax \n"
2774
"or %%eax, %%ecx \n"
2775
"shl $1, %%eax \n"
2776
"or %%eax, %%ecx \n"
2777
"shl $1, %%eax \n"
2778
"or %%eax, %%ecx \n"
2779
"shl $1, %%eax \n"
2780
"or %%eax, %%ecx \n"
2781
"mov %%edx, %%eax \n"
2782
"shr $12, %%eax \n" // Intensity
2783
"and $0x0000000E, %%eax \n"
2784
"or %%eax, %%ecx \n"
2785
"shr $3, %%eax \n"
2786
"or %%eax, %%ecx \n"
2787
2788
// pixel #6
2789
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2790
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2791
//Alpha
2792
"mov %%edx, %%eax \n"
2793
"shl $4, %%eax \n"
2794
"and $0x00001000, %%eax \n"
2795
"or %%eax, %%ecx \n"
2796
"shl $1, %%eax \n"
2797
"or %%eax, %%ecx \n"
2798
"shl $1, %%eax \n"
2799
"or %%eax, %%ecx \n"
2800
"shl $1, %%eax \n"
2801
"or %%eax, %%ecx \n"
2802
"mov %%edx, %%eax \n" // Intensity
2803
"and $0x00000E00, %%eax \n"
2804
"or %%eax, %%ecx \n"
2805
"shr $3, %%eax \n"
2806
"and $0x00000100, %%eax \n"
2807
"or %%eax, %%ecx \n"
2808
2809
// pixel #7
2810
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2811
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2812
//Alpha
2813
"mov %%edx, %%eax \n"
2814
"shl $16, %%eax \n"
2815
"and $0x00100000, %%eax \n"
2816
"or %%eax, %%ecx \n"
2817
"shl $1, %%eax \n"
2818
"or %%eax, %%ecx \n"
2819
"shl $1, %%eax \n"
2820
"or %%eax, %%ecx \n"
2821
"shl $1, %%eax \n"
2822
"or %%eax, %%ecx \n"
2823
"mov %%edx, %%eax \n"
2824
"shl $12, %%eax \n" // Intensity
2825
"and $0x000E0000, %%eax \n"
2826
"or %%eax, %%ecx \n"
2827
"shr $3, %%eax \n"
2828
"and $0x00010000, %%eax \n"
2829
"or %%eax, %%ecx \n"
2830
2831
// pixel #8
2832
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
2833
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2834
"mov %%edx, %%eax \n"
2835
"shl $28, %%eax \n" //Alpha
2836
"and $0x10000000, %%eax \n"
2837
"or %%eax, %%ecx \n"
2838
"shl $1, %%eax \n"
2839
"or %%eax, %%ecx \n"
2840
"shl $1, %%eax \n"
2841
"or %%eax, %%ecx \n"
2842
"shl $1, %%eax \n"
2843
"or %%eax, %%ecx \n"
2844
"mov %%edx, %%eax \n"
2845
"shl $24, %%eax \n" // Intensity
2846
"and $0x0E000000, %%eax \n"
2847
"or %%eax, %%ecx \n"
2848
"shr $3, %%eax \n"
2849
"and $0x01000000, %%eax \n"
2850
"or %%eax, %%ecx \n"
2851
2852
"mov %%ecx, (%[dst]) \n"
2853
"add $4, %[dst] \n"
2854
// }
2855
2856
// * copy
2857
"mov (%[src]), %%eax \n" // read all 8 pixels
2858
"bswap %%eax \n"
2859
"add $8, %[src] \n"
2860
"mov %%eax, %%edx \n"
2861
2862
// 1st dword {
2863
"xor %%ecx, %%ecx \n"
2864
2865
// pixel #1
2866
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2867
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2868
"mov %%edx, %%eax \n"
2869
"shr $24, %%eax \n" //Alpha
2870
"and $0x00000010, %%eax \n"
2871
"or %%eax, %%ecx \n"
2872
"shl $1, %%eax \n"
2873
"or %%eax, %%ecx \n"
2874
"shl $1, %%eax \n"
2875
"or %%eax, %%ecx \n"
2876
"shl $1, %%eax \n"
2877
"or %%eax, %%ecx \n"
2878
"mov %%edx, %%eax \n"
2879
"shr $28, %%eax \n" // Intensity
2880
"and $0x0000000E, %%eax \n"
2881
"or %%eax, %%ecx \n"
2882
"shr $3, %%eax \n"
2883
"or %%eax, %%ecx \n"
2884
2885
// pixel #2
2886
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
2887
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2888
"mov %%edx, %%eax \n"
2889
"shr $12, %%eax \n" //Alpha
2890
"and $0x00001000, %%eax \n"
2891
"or %%eax, %%ecx \n"
2892
"shl $1, %%eax \n"
2893
"or %%eax, %%ecx \n"
2894
"shl $1, %%eax \n"
2895
"or %%eax, %%ecx \n"
2896
"shl $1, %%eax \n"
2897
"or %%eax, %%ecx \n"
2898
"mov %%edx, %%eax \n"
2899
"shr $16, %%eax \n" // Intensity
2900
"and $0x00000E00, %%eax \n"
2901
"or %%eax, %%ecx \n"
2902
"shr $3, %%eax \n"
2903
"and $0x00000100, %%eax \n"
2904
"or %%eax, %%ecx \n"
2905
2906
// pixel #3
2907
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
2908
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
2909
//Alpha
2910
"mov %%edx, %%eax \n"
2911
"and $0x00100000, %%eax \n"
2912
"or %%eax, %%ecx \n"
2913
"shl $1, %%eax \n"
2914
"or %%eax, %%ecx \n"
2915
"shl $1, %%eax \n"
2916
"or %%eax, %%ecx \n"
2917
"shl $1, %%eax \n"
2918
"or %%eax, %%ecx \n"
2919
"mov %%edx, %%eax \n"
2920
"shr $4, %%eax \n" // Intensity
2921
"and $0x000E0000, %%eax \n"
2922
"or %%eax, %%ecx \n"
2923
"shr $3, %%eax \n"
2924
"and $0x00010000, %%eax \n"
2925
"or %%eax, %%ecx \n"
2926
2927
// pixel #4
2928
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
2929
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
2930
"mov %%edx, %%eax \n"
2931
"shl $12, %%eax \n" //Alpha
2932
"and $0x10000000, %%eax \n"
2933
"or %%eax, %%ecx \n"
2934
"shl $1, %%eax \n"
2935
"or %%eax, %%ecx \n"
2936
"shl $1, %%eax \n"
2937
"or %%eax, %%ecx \n"
2938
"shl $1, %%eax \n"
2939
"or %%eax, %%ecx \n"
2940
"mov %%edx, %%eax \n"
2941
"shl $8, %%eax \n" // Intensity
2942
"and $0x0E000000, %%eax \n"
2943
"or %%eax, %%ecx \n"
2944
"shr $3, %%eax \n"
2945
"and $0x01000000, %%eax \n"
2946
"or %%eax, %%ecx \n"
2947
2948
2949
"mov %%ecx, (%[dst]) \n"
2950
"add $4, %[dst] \n"
2951
// }
2952
2953
// 2nd dword {
2954
"xor %%ecx, %%ecx \n"
2955
2956
// pixel #5
2957
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
2958
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
2959
"mov %%edx, %%eax \n"
2960
"shr $8, %%eax \n" //Alpha
2961
"and $0x00000010, %%eax \n"
2962
"or %%eax, %%ecx \n"
2963
"shl $1, %%eax \n"
2964
"or %%eax, %%ecx \n"
2965
"shl $1, %%eax \n"
2966
"or %%eax, %%ecx \n"
2967
"shl $1, %%eax \n"
2968
"or %%eax, %%ecx \n"
2969
"mov %%edx, %%eax \n"
2970
"shr $12, %%eax \n" // Intensity
2971
"and $0x0000000E, %%eax \n"
2972
"or %%eax, %%ecx \n"
2973
"shr $3, %%eax \n"
2974
"or %%eax, %%ecx \n"
2975
2976
// pixel #6
2977
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
2978
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
2979
//Alpha
2980
"mov %%edx, %%eax \n"
2981
"shl $4, %%eax \n"
2982
"and $0x00001000, %%eax \n"
2983
"or %%eax, %%ecx \n"
2984
"shl $1, %%eax \n"
2985
"or %%eax, %%ecx \n"
2986
"shl $1, %%eax \n"
2987
"or %%eax, %%ecx \n"
2988
"shl $1, %%eax \n"
2989
"or %%eax, %%ecx \n"
2990
"mov %%edx, %%eax \n" // Intensity
2991
"and $0x00000E00, %%eax \n"
2992
"or %%eax, %%ecx \n"
2993
"shr $3, %%eax \n"
2994
"and $0x00000100, %%eax \n"
2995
"or %%eax, %%ecx \n"
2996
2997
// pixel #7
2998
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
2999
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
3000
//Alpha
3001
"mov %%edx, %%eax \n"
3002
"shl $16, %%eax \n"
3003
"and $0x00100000, %%eax \n"
3004
"or %%eax, %%ecx \n"
3005
"shl $1, %%eax \n"
3006
"or %%eax, %%ecx \n"
3007
"shl $1, %%eax \n"
3008
"or %%eax, %%ecx \n"
3009
"shl $1, %%eax \n"
3010
"or %%eax, %%ecx \n"
3011
"mov %%edx, %%eax \n"
3012
"shl $12, %%eax \n" // Intensity
3013
"and $0x000E0000, %%eax \n"
3014
"or %%eax, %%ecx \n"
3015
"shr $3, %%eax \n"
3016
"and $0x00010000, %%eax \n"
3017
"or %%eax, %%ecx \n"
3018
3019
// pixel #8
3020
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
3021
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
3022
"mov %%edx, %%eax \n"
3023
"shl $28, %%eax \n" //Alpha
3024
"and $0x10000000, %%eax \n"
3025
"or %%eax, %%ecx \n"
3026
"shl $1, %%eax \n"
3027
"or %%eax, %%ecx \n"
3028
"shl $1, %%eax \n"
3029
"or %%eax, %%ecx \n"
3030
"shl $1, %%eax \n"
3031
"or %%eax, %%ecx \n"
3032
"mov %%edx, %%eax \n"
3033
"shl $24, %%eax \n" // Intensity
3034
"and $0x0E000000, %%eax \n"
3035
"or %%eax, %%ecx \n"
3036
"shr $3, %%eax \n"
3037
"and $0x01000000, %%eax \n"
3038
"or %%eax, %%ecx \n"
3039
3040
"mov %%ecx, (%[dst]) \n"
3041
"add $4, %[dst] \n"
3042
// }
3043
// *
3044
3045
"mov %[tempx], %[c] \n"
3046
"dec %%ecx \n"
3047
"jnz 3b \n" // x_loop_22
3048
3049
"add %[line], %[src] \n"
3050
"add %[ext], %[dst] \n"
3051
3052
"mov %[tempy], %[c] \n"
3053
"dec %%ecx \n"
3054
"jnz 1b \n" // y_loop2
3055
3056
"4: \n" // end_y_loop2
3057
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
3058
: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
3059
: "memory", "cc", "eax", "edx"
3060
);
3061
#endif
3062
3063
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
3064
}
3065
3066
//****************************************************************
3067
// Size: 0, Format: 4
3068
3069
DWORD Load4bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
3070
{
3071
if (rdp.tlut_mode != 0)
3072
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
3073
3074
if (wid_64 < 1) wid_64 = 1;
3075
if (height < 1) height = 1;
3076
int ext = (real_width - (wid_64 << 4));
3077
#if !defined(__GNUC__) && !defined(NO_ASM)
3078
__asm {
3079
mov esi,dword ptr [src]
3080
mov edi,dword ptr [dst]
3081
3082
mov ecx,dword ptr [height]
3083
y_loop:
3084
push ecx
3085
3086
mov ecx,dword ptr [wid_64]
3087
x_loop:
3088
push ecx
3089
3090
mov eax,dword ptr [esi] // read all 8 pixels
3091
bswap eax
3092
add esi,4
3093
mov edx,eax
3094
3095
// 1st dword {
3096
xor ecx,ecx
3097
shr eax,28 // 0xF0000000 -> 0x0000000F
3098
or ecx,eax
3099
shl eax,4
3100
or ecx,eax
3101
3102
mov eax,edx // 0x0F000000 -> 0x00000F00
3103
shr eax,16
3104
and eax,0x00000F00
3105
or ecx,eax
3106
shl eax,4
3107
or ecx,eax
3108
3109
mov eax,edx
3110
shr eax,4 // 0x00F00000 -> 0x000F0000
3111
and eax,0x000F0000
3112
or ecx,eax
3113
shl eax,4
3114
or ecx,eax
3115
3116
mov eax,edx
3117
shl eax,8 // 0x000F0000 -> 0x0F000000
3118
and eax,0x0F000000
3119
or ecx,eax
3120
shl eax,4
3121
or ecx,eax
3122
3123
mov dword ptr [edi],ecx
3124
add edi,4
3125
// }
3126
3127
// 2nd dword {
3128
xor ecx,ecx
3129
mov eax,edx
3130
shr eax,12 // 0x0000F000 -> 0x0000000F
3131
and eax,0x0000000F
3132
or ecx,eax
3133
shl eax,4
3134
or ecx,eax
3135
3136
mov eax,edx // 0x00000F00 -> 0x00000F00
3137
and eax,0x00000F00
3138
or ecx,eax
3139
shl eax,4
3140
or ecx,eax
3141
3142
mov eax,edx
3143
shl eax,12 // 0x000000F0 -> 0x000F0000
3144
and eax,0x000F0000
3145
or ecx,eax
3146
shl eax,4
3147
or ecx,eax
3148
3149
shl edx,24 // 0x0000000F -> 0x0F000000
3150
and edx,0x0F000000
3151
or ecx,edx
3152
shl edx,4
3153
or ecx,edx
3154
3155
mov dword ptr [edi],ecx
3156
add edi,4
3157
// }
3158
3159
// * copy
3160
mov eax,dword ptr [esi] // read all 8 pixels
3161
bswap eax
3162
add esi,4
3163
mov edx,eax
3164
3165
// 1st dword {
3166
xor ecx,ecx
3167
shr eax,28 // 0xF0000000 -> 0x0000000F
3168
or ecx,eax
3169
shl eax,4
3170
or ecx,eax
3171
3172
mov eax,edx // 0x0F000000 -> 0x00000F00
3173
shr eax,16
3174
and eax,0x00000F00
3175
or ecx,eax
3176
shl eax,4
3177
or ecx,eax
3178
3179
mov eax,edx
3180
shr eax,4 // 0x00F00000 -> 0x000F0000
3181
and eax,0x000F0000
3182
or ecx,eax
3183
shl eax,4
3184
or ecx,eax
3185
3186
mov eax,edx
3187
shl eax,8 // 0x000F0000 -> 0x0F000000
3188
and eax,0x0F000000
3189
or ecx,eax
3190
shl eax,4
3191
or ecx,eax
3192
3193
mov dword ptr [edi],ecx
3194
add edi,4
3195
// }
3196
3197
// 2nd dword {
3198
xor ecx,ecx
3199
mov eax,edx
3200
shr eax,12 // 0x0000F000 -> 0x0000000F
3201
and eax,0x0000000F
3202
or ecx,eax
3203
shl eax,4
3204
or ecx,eax
3205
3206
mov eax,edx // 0x00000F00 -> 0x00000F00
3207
and eax,0x00000F00
3208
or ecx,eax
3209
shl eax,4
3210
or ecx,eax
3211
3212
mov eax,edx
3213
shl eax,12 // 0x000000F0 -> 0x000F0000
3214
and eax,0x000F0000
3215
or ecx,eax
3216
shl eax,4
3217
or ecx,eax
3218
3219
shl edx,24 // 0x0000000F -> 0x0F000000
3220
and edx,0x0F000000
3221
or ecx,edx
3222
shl edx,4
3223
or ecx,edx
3224
3225
mov dword ptr [edi],ecx
3226
add edi,4
3227
// }
3228
// *
3229
3230
pop ecx
3231
dec ecx
3232
jnz x_loop
3233
3234
pop ecx
3235
dec ecx
3236
jz end_y_loop
3237
push ecx
3238
3239
add esi,dword ptr [line]
3240
add edi,dword ptr [ext]
3241
3242
mov ecx,dword ptr [wid_64]
3243
x_loop_2:
3244
push ecx
3245
3246
mov eax,dword ptr [esi+4] // read all 8 pixels
3247
bswap eax
3248
mov edx,eax
3249
3250
// 1st dword {
3251
xor ecx,ecx
3252
shr eax,28 // 0xF0000000 -> 0x0000000F
3253
or ecx,eax
3254
shl eax,4
3255
or ecx,eax
3256
3257
mov eax,edx // 0x0F000000 -> 0x00000F00
3258
shr eax,16
3259
and eax,0x00000F00
3260
or ecx,eax
3261
shl eax,4
3262
or ecx,eax
3263
3264
mov eax,edx
3265
shr eax,4 // 0x00F00000 -> 0x000F0000
3266
and eax,0x000F0000
3267
or ecx,eax
3268
shl eax,4
3269
or ecx,eax
3270
3271
mov eax,edx
3272
shl eax,8 // 0x000F0000 -> 0x0F000000
3273
and eax,0x0F000000
3274
or ecx,eax
3275
shl eax,4
3276
or ecx,eax
3277
3278
mov dword ptr [edi],ecx
3279
add edi,4
3280
// }
3281
3282
// 2nd dword {
3283
xor ecx,ecx
3284
mov eax,edx
3285
shr eax,12 // 0x0000F000 -> 0x0000000F
3286
and eax,0x0000000F
3287
or ecx,eax
3288
shl eax,4
3289
or ecx,eax
3290
3291
mov eax,edx // 0x00000F00 -> 0x00000F00
3292
and eax,0x00000F00
3293
or ecx,eax
3294
shl eax,4
3295
or ecx,eax
3296
3297
mov eax,edx
3298
shl eax,12 // 0x000000F0 -> 0x000F0000
3299
and eax,0x000F0000
3300
or ecx,eax
3301
shl eax,4
3302
or ecx,eax
3303
3304
shl edx,24 // 0x0000000F -> 0x0F000000
3305
and edx,0x0F000000
3306
or ecx,edx
3307
shl edx,4
3308
or ecx,edx
3309
3310
mov dword ptr [edi],ecx
3311
add edi,4
3312
// }
3313
3314
// * copy
3315
mov eax,dword ptr [esi] // read all 8 pixels
3316
bswap eax
3317
add esi,8
3318
mov edx,eax
3319
3320
// 1st dword {
3321
xor ecx,ecx
3322
shr eax,28 // 0xF0000000 -> 0x0000000F
3323
or ecx,eax
3324
shl eax,4
3325
or ecx,eax
3326
3327
mov eax,edx // 0x0F000000 -> 0x00000F00
3328
shr eax,16
3329
and eax,0x00000F00
3330
or ecx,eax
3331
shl eax,4
3332
or ecx,eax
3333
3334
mov eax,edx
3335
shr eax,4 // 0x00F00000 -> 0x000F0000
3336
and eax,0x000F0000
3337
or ecx,eax
3338
shl eax,4
3339
or ecx,eax
3340
3341
mov eax,edx
3342
shl eax,8 // 0x000F0000 -> 0x0F000000
3343
and eax,0x0F000000
3344
or ecx,eax
3345
shl eax,4
3346
or ecx,eax
3347
3348
mov dword ptr [edi],ecx
3349
add edi,4
3350
// }
3351
3352
// 2nd dword {
3353
xor ecx,ecx
3354
mov eax,edx
3355
shr eax,12 // 0x0000F000 -> 0x0000000F
3356
and eax,0x0000000F
3357
or ecx,eax
3358
shl eax,4
3359
or ecx,eax
3360
3361
mov eax,edx // 0x00000F00 -> 0x00000F00
3362
and eax,0x00000F00
3363
or ecx,eax
3364
shl eax,4
3365
or ecx,eax
3366
3367
mov eax,edx
3368
shl eax,12 // 0x000000F0 -> 0x000F0000
3369
and eax,0x000F0000
3370
or ecx,eax
3371
shl eax,4
3372
or ecx,eax
3373
3374
shl edx,24 // 0x0000000F -> 0x0F000000
3375
and edx,0x0F000000
3376
or ecx,edx
3377
shl edx,4
3378
or ecx,edx
3379
3380
mov dword ptr [edi],ecx
3381
add edi,4
3382
// }
3383
// *
3384
3385
pop ecx
3386
dec ecx
3387
jnz x_loop_2
3388
3389
add esi,dword ptr [line]
3390
add edi,dword ptr [ext]
3391
3392
pop ecx
3393
dec ecx
3394
jnz y_loop
3395
3396
end_y_loop:
3397
}
3398
#elif !defined(NO_ASM)
3399
//printf("Load4bI\n");
3400
int lTempX, lTempY, lHeight = (int) height;
3401
asm volatile (
3402
"1: \n" // y_loop3
3403
"mov %[c], %[tempy] \n"
3404
3405
"mov %[wid_64], %%ecx \n"
3406
"2: \n" // x_loop3
3407
"mov %[c], %[tempx] \n"
3408
3409
"mov (%[src]), %%eax \n" // read all 8 pixels
3410
"bswap %%eax \n"
3411
"add $4, %[src] \n"
3412
"mov %%eax, %%edx \n"
3413
3414
// 1st dword {
3415
"xor %%ecx, %%ecx \n"
3416
"shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3417
"or %%eax, %%ecx \n"
3418
"shl $4, %%eax \n"
3419
"or %%eax, %%ecx \n"
3420
3421
"mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3422
"shr $16, %%eax \n"
3423
"and $0x00000F00, %%eax \n"
3424
"or %%eax, %%ecx \n"
3425
"shl $4, %%eax \n"
3426
"or %%eax, %%ecx \n"
3427
3428
"mov %%edx, %%eax \n"
3429
"shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3430
"and $0x000F0000, %%eax \n"
3431
"or %%eax, %%ecx \n"
3432
"shl $4, %%eax \n"
3433
"or %%eax, %%ecx \n"
3434
3435
"mov %%edx, %%eax \n"
3436
"shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3437
"and $0x0F000000, %%eax \n"
3438
"or %%eax, %%ecx \n"
3439
"shl $4, %%eax \n"
3440
"or %%eax, %%ecx \n"
3441
3442
"mov %%ecx, (%[dst]) \n"
3443
"add $4, %[dst] \n"
3444
// }
3445
3446
// 2nd dword {
3447
"xor %%ecx, %%ecx \n"
3448
"mov %%edx, %%eax \n"
3449
"shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3450
"and $0x0000000F, %%eax \n"
3451
"or %%eax, %%ecx \n"
3452
"shl $4, %%eax \n"
3453
"or %%eax, %%ecx \n"
3454
3455
"mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3456
"and $0x00000F00, %%eax \n"
3457
"or %%eax, %%ecx \n"
3458
"shl $4, %%eax \n"
3459
"or %%eax, %%ecx \n"
3460
3461
"mov %%edx, %%eax \n"
3462
"shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3463
"and $0x000F0000, %%eax \n"
3464
"or %%eax, %%ecx \n"
3465
"shl $4, %%eax \n"
3466
"or %%eax, %%ecx \n"
3467
3468
"shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3469
"and $0x0F000000, %%edx \n"
3470
"or %%edx, %%ecx \n"
3471
"shl $4, %%edx \n"
3472
"or %%edx, %%ecx \n"
3473
3474
"mov %%ecx, (%[dst]) \n"
3475
"add $4, %[dst] \n"
3476
// }
3477
3478
// * copy
3479
"mov (%[src]), %%eax \n" // read all 8 pixels
3480
"bswap %%eax \n"
3481
"add $4, %[src] \n"
3482
"mov %%eax, %%edx \n"
3483
3484
// 1st dword {
3485
"xor %%ecx, %%ecx \n"
3486
"shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3487
"or %%eax, %%ecx \n"
3488
"shl $4, %%eax \n"
3489
"or %%eax, %%ecx \n"
3490
3491
"mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3492
"shr $16, %%eax \n"
3493
"and $0x00000F00, %%eax \n"
3494
"or %%eax, %%ecx \n"
3495
"shl $4, %%eax \n"
3496
"or %%eax, %%ecx \n"
3497
3498
"mov %%edx, %%eax \n"
3499
"shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3500
"and $0x000F0000, %%eax \n"
3501
"or %%eax, %%ecx \n"
3502
"shl $4, %%eax \n"
3503
"or %%eax, %%ecx \n"
3504
3505
"mov %%edx, %%eax \n"
3506
"shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3507
"and $0x0F000000, %%eax \n"
3508
"or %%eax, %%ecx \n"
3509
"shl $4, %%eax \n"
3510
"or %%eax, %%ecx \n"
3511
3512
"mov %%ecx, (%[dst]) \n"
3513
"add $4, %[dst] \n"
3514
// }
3515
3516
// 2nd dword {
3517
"xor %%ecx, %%ecx \n"
3518
"mov %%edx, %%eax \n"
3519
"shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3520
"and $0x0000000F, %%eax \n"
3521
"or %%eax, %%ecx \n"
3522
"shl $4, %%eax \n"
3523
"or %%eax, %%ecx \n"
3524
3525
"mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3526
"and $0x00000F00, %%eax \n"
3527
"or %%eax, %%ecx \n"
3528
"shl $4, %%eax \n"
3529
"or %%eax, %%ecx \n"
3530
3531
"mov %%edx, %%eax \n"
3532
"shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3533
"and $0x000F0000, %%eax \n"
3534
"or %%eax, %%ecx \n"
3535
"shl $4, %%eax \n"
3536
"or %%eax, %%ecx \n"
3537
3538
"shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3539
"and $0x0F000000, %%edx \n"
3540
"or %%edx, %%ecx \n"
3541
"shl $4, %%edx \n"
3542
"or %%edx, %%ecx \n"
3543
3544
"mov %%ecx, (%[dst]) \n"
3545
"add $4, %[dst] \n"
3546
// }
3547
// *
3548
3549
"mov %[tempx], %[c] \n"
3550
"dec %%ecx \n"
3551
"jnz 2b \n" // x_loop3
3552
3553
"mov %[tempy], %[c] \n"
3554
"dec %%ecx \n"
3555
"jz 4f \n" // end_y_loop3
3556
"mov %[c], %[tempy] \n"
3557
3558
"add %[line], %[src] \n"
3559
"add %[ext], %[dst] \n"
3560
3561
"mov %[wid_64], %%ecx \n"
3562
"3: \n" // x_loop_23
3563
"mov %[c], %[tempx] \n"
3564
3565
"mov 4(%[src]), %%eax \n" // read all 8 pixels
3566
"bswap %%eax \n"
3567
"mov %%eax, %%edx \n"
3568
3569
// 1st dword {
3570
"xor %%ecx, %%ecx \n"
3571
"shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3572
"or %%eax, %%ecx \n"
3573
"shl $4, %%eax \n"
3574
"or %%eax, %%ecx \n"
3575
3576
"mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3577
"shr $16, %%eax \n"
3578
"and $0x00000F00, %%eax \n"
3579
"or %%eax, %%ecx \n"
3580
"shl $4, %%eax \n"
3581
"or %%eax, %%ecx \n"
3582
3583
"mov %%edx, %%eax \n"
3584
"shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3585
"and $0x000F0000, %%eax \n"
3586
"or %%eax, %%ecx \n"
3587
"shl $4, %%eax \n"
3588
"or %%eax, %%ecx \n"
3589
3590
"mov %%edx, %%eax \n"
3591
"shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3592
"and $0x0F000000, %%eax \n"
3593
"or %%eax, %%ecx \n"
3594
"shl $4, %%eax \n"
3595
"or %%eax, %%ecx \n"
3596
3597
"mov %%ecx, (%[dst]) \n"
3598
"add $4, %[dst] \n"
3599
// }
3600
3601
// 2nd dword {
3602
"xor %%ecx, %%ecx \n"
3603
"mov %%edx, %%eax \n"
3604
"shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3605
"and $0x0000000F, %%eax \n"
3606
"or %%eax, %%ecx \n"
3607
"shl $4, %%eax \n"
3608
"or %%eax, %%ecx \n"
3609
3610
"mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3611
"and $0x00000F00, %%eax \n"
3612
"or %%eax, %%ecx \n"
3613
"shl $4, %%eax \n"
3614
"or %%eax, %%ecx \n"
3615
3616
"mov %%edx, %%eax \n"
3617
"shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3618
"and $0x000F0000, %%eax \n"
3619
"or %%eax, %%ecx \n"
3620
"shl $4, %%eax \n"
3621
"or %%eax, %%ecx \n"
3622
3623
"shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3624
"and $0x0F000000, %%edx \n"
3625
"or %%edx, %%ecx \n"
3626
"shl $4, %%edx \n"
3627
"or %%edx, %%ecx \n"
3628
3629
"mov %%ecx, (%[dst]) \n"
3630
"add $4, %[dst] \n"
3631
// }
3632
3633
// * copy
3634
"mov (%[src]), %%eax \n" // read all 8 pixels
3635
"bswap %%eax \n"
3636
"add $8, %[src] \n"
3637
"mov %%eax, %%edx \n"
3638
3639
// 1st dword {
3640
"xor %%ecx, %%ecx \n"
3641
"shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F
3642
"or %%eax, %%ecx \n"
3643
"shl $4, %%eax \n"
3644
"or %%eax, %%ecx \n"
3645
3646
"mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00
3647
"shr $16, %%eax \n"
3648
"and $0x00000F00, %%eax \n"
3649
"or %%eax, %%ecx \n"
3650
"shl $4, %%eax \n"
3651
"or %%eax, %%ecx \n"
3652
3653
"mov %%edx, %%eax \n"
3654
"shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000
3655
"and $0x000F0000, %%eax \n"
3656
"or %%eax, %%ecx \n"
3657
"shl $4, %%eax \n"
3658
"or %%eax, %%ecx \n"
3659
3660
"mov %%edx, %%eax \n"
3661
"shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000
3662
"and $0x0F000000, %%eax \n"
3663
"or %%eax, %%ecx \n"
3664
"shl $4, %%eax \n"
3665
"or %%eax, %%ecx \n"
3666
3667
"mov %%ecx, (%[dst]) \n"
3668
"add $4, %[dst] \n"
3669
// }
3670
3671
// 2nd dword {
3672
"xor %%ecx, %%ecx \n"
3673
"mov %%edx, %%eax \n"
3674
"shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F
3675
"and $0x0000000F, %%eax \n"
3676
"or %%eax, %%ecx \n"
3677
"shl $4, %%eax \n"
3678
"or %%eax, %%ecx \n"
3679
3680
"mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00
3681
"and $0x00000F00, %%eax \n"
3682
"or %%eax, %%ecx \n"
3683
"shl $4, %%eax \n"
3684
"or %%eax, %%ecx \n"
3685
3686
"mov %%edx, %%eax \n"
3687
"shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000
3688
"and $0x000F0000, %%eax \n"
3689
"or %%eax, %%ecx \n"
3690
"shl $4, %%eax \n"
3691
"or %%eax, %%ecx \n"
3692
3693
"shl $24, %%edx \n" // 0x0000000F -> 0x0F000000
3694
"and $0x0F000000, %%edx \n"
3695
"or %%edx, %%ecx \n"
3696
"shl $4, %%edx \n"
3697
"or %%edx, %%ecx \n"
3698
3699
"mov %%ecx, (%[dst]) \n"
3700
"add $4, %[dst] \n"
3701
// }
3702
// *
3703
3704
"mov %[tempx], %[c] \n"
3705
"dec %%ecx \n"
3706
"jnz 3b \n" // x_loop_23
3707
3708
"add %[line], %[src] \n"
3709
"add %[ext], %[dst] \n"
3710
3711
"mov %[tempy], %[c] \n"
3712
"dec %%ecx \n"
3713
"jnz 1b \n" // y_loop3
3714
3715
"4: \n" // end_y_loop3
3716
: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [src] "+S"(src), [dst] "+D"(dst), [c]"+c"(lHeight)
3717
: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
3718
: "memory", "cc", "eax", "edx"
3719
);
3720
#endif
3721
3722
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
3723
}
3724
3725
//****************************************************************
3726
// Size: 0, Format: 0
3727
3728
DWORD Load4bSelect (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
3729
{
3730
if (rdp.tlut_mode == 0)
3731
return Load4bI (dst, src, wid_64, height, line, real_width, tile);
3732
3733
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
3734
}
3735
3736
3737