Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/TexLoad8b.h
2 views
/*1* Glide64 - Glide video plugin for Nintendo 64 emulators.2* Copyright (c) 2002 Dave20013* Copyright (c) 2008 Günther <[email protected]>4*5* This program is free software; you can redistribute it and/or modify6* it under the terms of the GNU General Public License as published by7* the Free Software Foundation; either version 2 of the License, or8* any later version.9*10* This program is distributed in the hope that it will be useful,11* but WITHOUT ANY WARRANTY; without even the implied warranty of12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the13* GNU General Public License for more details.14*15* You should have received a copy of the GNU General Public16* License along with this program; if not, write to the Free17* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,18* Boston, MA 02110-1301, USA19*/2021//****************************************************************22//23// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)24// Project started on December 29th, 200125//26// To modify Glide64:27// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.28// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.29//30// Official Glide64 development channel: #Glide64 on EFnet31//32// Original author: Dave2001 ([email protected])33// Other authors: Gonetz, Gugaman34//35//****************************************************************3637DWORD Load8bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)38{39if (wid_64 < 1) wid_64 = 1;40if (height < 1) height = 1;41int ext = (real_width - (wid_64 << 3)) << 1;42unsigned short * pal = rdp.pal_8;4344if (rdp.tlut_mode == 2)45{46#if !defined(__GNUC__) && !defined(NO_ASM)47__asm {48mov ebx,dword ptr [pal]4950mov esi,dword ptr [src]51mov edi,dword ptr [dst]5253mov ecx,dword ptr [height]54y_loop:55push ecx5657mov ecx,dword ptr [wid_64]58x_loop:59push ecx6061mov eax,dword ptr [esi] // read all 4 pixels62bswap eax63add esi,464mov edx,eax6566// 1st dword output {67shr eax,1568and eax,0x1FE69mov cx,word ptr [ebx+eax]70ror cx,171shl ecx,167273mov eax,edx74shr eax,2375and eax,0x1FE76mov cx,word ptr [ebx+eax]77ror cx,17879mov dword ptr [edi],ecx80add edi,481// }8283// 2nd dword output {84mov eax,edx85shl eax,186and eax,0x1FE87mov cx,word ptr [ebx+eax]88ror cx,189shl ecx,169091shr edx,792and edx,0x1FE93mov cx,word ptr [ebx+edx]94ror cx,19596mov dword ptr [edi],ecx97add edi,498// }99100// * copy101mov eax,dword ptr [esi] // read all 4 pixels102bswap eax103add esi,4104mov edx,eax105106// 1st dword output {107shr eax,15108and eax,0x1FE109mov cx,word ptr [ebx+eax]110ror cx,1111shl ecx,16112113mov eax,edx114shr eax,23115and eax,0x1FE116mov cx,word ptr [ebx+eax]117ror cx,1118119mov dword ptr [edi],ecx120add edi,4121// }122123// 2nd dword output {124mov eax,edx125shl eax,1126and eax,0x1FE127mov cx,word ptr [ebx+eax]128ror cx,1129shl ecx,16130131shr edx,7132and edx,0x1FE133mov cx,word ptr [ebx+edx]134ror cx,1135136mov dword ptr [edi],ecx137add edi,4138// }139// *140141pop ecx142143dec ecx144jnz x_loop145146pop ecx147dec ecx148jz end_y_loop149push ecx150151add esi,dword ptr [line]152add edi,dword ptr [ext]153154mov ecx,dword ptr [wid_64]155x_loop_2:156push ecx157158mov eax,dword ptr [esi+4] // read all 4 pixels159bswap eax160mov edx,eax161162// 1st dword output {163shr eax,15164and eax,0x1FE165mov cx,word ptr [ebx+eax]166ror cx,1167shl ecx,16168169mov eax,edx170shr eax,23171and eax,0x1FE172mov cx,word ptr [ebx+eax]173ror cx,1174175mov dword ptr [edi],ecx176add edi,4177// }178179// 2nd dword output {180mov eax,edx181shl eax,1182and eax,0x1FE183mov cx,word ptr [ebx+eax]184ror cx,1185shl ecx,16186187shr edx,7188and edx,0x1FE189mov cx,word ptr [ebx+edx]190ror cx,1191192mov dword ptr [edi],ecx193add edi,4194// }195196// * copy197mov eax,dword ptr [esi] // read all 4 pixels198bswap eax199add esi,8200mov edx,eax201202// 1st dword output {203shr eax,15204and eax,0x1FE205mov cx,word ptr [ebx+eax]206ror cx,1207shl ecx,16208209mov eax,edx210shr eax,23211and eax,0x1FE212mov cx,word ptr [ebx+eax]213ror cx,1214215mov dword ptr [edi],ecx216add edi,4217// }218219// 2nd dword output {220mov eax,edx221shl eax,1222and eax,0x1FE223mov cx,word ptr [ebx+eax]224ror cx,1225shl ecx,16226227shr edx,7228and edx,0x1FE229mov cx,word ptr [ebx+edx]230ror cx,1231232mov dword ptr [edi],ecx233add edi,4234// }235// *236237pop ecx238239dec ecx240jnz x_loop_2241242add esi,dword ptr [line]243add edi,dword ptr [ext]244245pop ecx246dec ecx247jnz y_loop248249end_y_loop:250}251#elif !defined(NO_ASM)252//printf("Load8bCI1\n");253long lTempX, lTempY, lHeight = (long) height;254intptr_t fake_eax, fake_edx;255asm volatile (256"1: \n" // y_loop4257"mov %[c], %[tempy] \n"258259"mov %[wid_64], %%ecx \n"260"2: \n" // x_loop4261"mov %[c], %[tempx] \n"262263"mov (%[src]), %%eax \n" // read all 4 pixels264"bswap %%eax \n"265"add $4, %[src] \n"266"mov %%eax, %%edx \n"267268// 1st dword output {269"shr $15, %%eax \n"270"and $0x1FE, %%eax \n"271"mov (%[pal],%[a]), %%cx \n"272"ror $1, %%cx \n"273"shl $16, %%ecx \n"274275"mov %%edx, %%eax \n"276"shr $23, %%eax \n"277"and $0x1FE, %%eax \n"278"mov (%[pal],%[a]), %%cx \n"279"ror $1, %%cx \n"280281"mov %%ecx, (%[dst]) \n"282"add $4, %[dst] \n"283// }284285// 2nd dword output {286"mov %%edx, %%eax \n"287"shl $1, %%eax \n"288"and $0x1FE, %%eax \n"289"mov (%[pal],%[a]), %%cx \n"290"ror $1, %%cx \n"291"shl $16, %%ecx \n"292293"shr $7, %%edx \n"294"and $0x1FE, %%edx \n"295"mov (%[pal],%[d]), %%cx \n"296"ror $1, %%cx \n"297298"mov %%ecx, (%[dst]) \n"299"add $4, %[dst] \n"300// }301302// * copy303"mov (%[src]), %%eax \n" // read all 4 pixels304"bswap %%eax \n"305"add $4, %[src] \n"306"mov %%eax, %%edx \n"307308// 1st dword output {309"shr $15, %%eax \n"310"and $0x1FE, %%eax \n"311"mov (%[pal],%[a]), %%cx \n"312"ror $1, %%cx \n"313"shl $16, %%ecx \n"314315"mov %%edx, %%eax \n"316"shr $23, %%eax \n"317"and $0x1FE, %%eax \n"318"mov (%[pal],%[a]), %%cx \n"319"ror $1, %%cx \n"320321"mov %%ecx, (%[dst]) \n"322"add $4, %[dst] \n"323// }324325// 2nd dword output {326"mov %%edx, %%eax \n"327"shl $1, %%eax \n"328"and $0x1FE, %%eax \n"329"mov (%[pal],%[a]), %%cx \n"330"ror $1, %%cx \n"331"shl $16, %%ecx \n"332333"shr $7, %%edx \n"334"and $0x1FE, %%edx \n"335"mov (%[pal],%[d]), %%cx \n"336"ror $1, %%cx \n"337338"mov %%ecx, (%[dst]) \n"339"add $4, %[dst] \n"340// }341// *342343"mov %[tempx], %[c] \n"344345"dec %%ecx \n"346"jnz 2b \n" // x_loop4347348"mov %[tempy], %[c] \n"349"dec %%ecx \n"350"jz 4f \n" // end_y_loop4351"mov %[c], %[tempy] \n"352353"add %[line], %[src] \n"354"add %[ext], %[dst] \n"355356"mov %[wid_64], %%ecx \n"357"3: \n" // x_loop_24358"mov %[c], %[tempx] \n"359360"mov 4(%[src]), %%eax \n" // read all 4 pixels361"bswap %%eax \n"362"mov %%eax, %%edx \n"363364// 1st dword output {365"shr $15, %%eax \n"366"and $0x1FE, %%eax \n"367"mov (%[pal],%[a]), %%cx \n"368"ror $1, %%cx \n"369"shl $16, %%ecx \n"370371"mov %%edx, %%eax \n"372"shr $23, %%eax \n"373"and $0x1FE, %%eax \n"374"mov (%[pal],%[a]), %%cx \n"375"ror $1, %%cx \n"376377"mov %%ecx, (%[dst]) \n"378"add $4, %[dst] \n"379// }380381// 2nd dword output {382"mov %%edx, %%eax \n"383"shl $1, %%eax \n"384"and $0x1FE, %%eax \n"385"mov (%[pal],%[a]), %%cx \n"386"ror $1, %%cx \n"387"shl $16, %%ecx \n"388389"shr $7, %%edx \n"390"and $0x1FE, %%edx \n"391"mov (%[pal],%[d]), %%cx \n"392"ror $1, %%cx \n"393394"mov %%ecx, (%[dst]) \n"395"add $4, %[dst] \n"396// }397398// * copy399"mov (%[src]), %%eax \n" // read all 4 pixels400"bswap %%eax \n"401"add $8, %[src] \n"402"mov %%eax, %%edx \n"403404// 1st dword output {405"shr $15, %%eax \n"406"and $0x1FE, %%eax \n"407"mov (%[pal],%[a]), %%cx \n"408"ror $1, %%cx \n"409"shl $16, %%ecx \n"410411"mov %%edx, %%eax \n"412"shr $23, %%eax \n"413"and $0x1FE, %%eax \n"414"mov (%[pal],%[a]), %%cx \n"415"ror $1, %%cx \n"416417"mov %%ecx, (%[dst]) \n"418"add $4, %[dst] \n"419// }420421// 2nd dword output {422"mov %%edx, %%eax \n"423"shl $1, %%eax \n"424"and $0x1FE, %%eax \n"425"mov (%[pal],%[a]), %%cx \n"426"ror $1, %%cx \n"427"shl $16, %%ecx \n"428429"shr $7, %%edx \n"430"and $0x1FE, %%edx \n"431"mov (%[pal],%[d]), %%cx \n"432"ror $1, %%cx \n"433434"mov %%ecx, (%[dst]) \n"435"add $4, %[dst] \n"436// }437// *438439"mov %[tempx], %[c] \n"440"dec %%ecx \n"441"jnz 3b \n" // x_loop_24442443"add %[line], %[src] \n"444"add %[ext], %[dst] \n"445446"mov %[tempy], %[c] \n"447"dec %%ecx \n"448"jnz 1b \n" // y_loop4449450"4: \n" // end_y_loop4451: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)452: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)453: "memory", "cc"454);455#endif456return (1 << 16) | GR_TEXFMT_ARGB_1555;457}458else459{460#if !defined(__GNUC__) && !defined(NO_ASM)461__asm {462mov ebx,dword ptr [pal]463464mov esi,dword ptr [src]465mov edi,dword ptr [dst]466467mov ecx,dword ptr [height]468ia_y_loop:469push ecx470471mov ecx,dword ptr [wid_64]472ia_x_loop:473push ecx474475mov eax,dword ptr [esi] // read all 4 pixels476bswap eax477add esi,4478mov edx,eax479480// 1st dword output {481shr eax,15482and eax,0x1FE483mov cx,word ptr [ebx+eax]484ror cx,8485shl ecx,16486487mov eax,edx488shr eax,23489and eax,0x1FE490mov cx,word ptr [ebx+eax]491ror cx,8492493mov dword ptr [edi],ecx494add edi,4495// }496497// 2nd dword output {498mov eax,edx499shl eax,1500and eax,0x1FE501mov cx,word ptr [ebx+eax]502ror cx,8503shl ecx,16504505shr edx,7506and edx,0x1FE507mov cx,word ptr [ebx+edx]508ror cx,8509510mov dword ptr [edi],ecx511add edi,4512// }513514// * copy515mov eax,dword ptr [esi] // read all 4 pixels516bswap eax517add esi,4518mov edx,eax519520// 1st dword output {521shr eax,15522and eax,0x1FE523mov cx,word ptr [ebx+eax]524ror cx,8525shl ecx,16526527mov eax,edx528shr eax,23529and eax,0x1FE530mov cx,word ptr [ebx+eax]531ror cx,8532533mov dword ptr [edi],ecx534add edi,4535// }536537// 2nd dword output {538mov eax,edx539shl eax,1540and eax,0x1FE541mov cx,word ptr [ebx+eax]542ror cx,8543shl ecx,16544545shr edx,7546and edx,0x1FE547mov cx,word ptr [ebx+edx]548ror cx,8549550mov dword ptr [edi],ecx551add edi,4552// }553// *554555pop ecx556557dec ecx558jnz ia_x_loop559560pop ecx561dec ecx562jz ia_end_y_loop563push ecx564565add esi,dword ptr [line]566add edi,dword ptr [ext]567568mov ecx,dword ptr [wid_64]569ia_x_loop_2:570push ecx571572mov eax,dword ptr [esi+4] // read all 4 pixels573bswap eax574mov edx,eax575576// 1st dword output {577shr eax,15578and eax,0x1FE579mov cx,word ptr [ebx+eax]580ror cx,8581shl ecx,16582583mov eax,edx584shr eax,23585and eax,0x1FE586mov cx,word ptr [ebx+eax]587ror cx,8588589mov dword ptr [edi],ecx590add edi,4591// }592593// 2nd dword output {594mov eax,edx595shl eax,1596and eax,0x1FE597mov cx,word ptr [ebx+eax]598ror cx,8599shl ecx,16600601shr edx,7602and edx,0x1FE603mov cx,word ptr [ebx+edx]604ror cx,8605606mov dword ptr [edi],ecx607add edi,4608// }609610// * copy611mov eax,dword ptr [esi] // read all 4 pixels612bswap eax613add esi,8614mov edx,eax615616// 1st dword output {617shr eax,15618and eax,0x1FE619mov cx,word ptr [ebx+eax]620ror cx,8621shl ecx,16622623mov eax,edx624shr eax,23625and eax,0x1FE626mov cx,word ptr [ebx+eax]627ror cx,8628629mov dword ptr [edi],ecx630add edi,4631// }632633// 2nd dword output {634mov eax,edx635shl eax,1636and eax,0x1FE637mov cx,word ptr [ebx+eax]638ror cx,8639shl ecx,16640641shr edx,7642and edx,0x1FE643mov cx,word ptr [ebx+edx]644ror cx,8645646mov dword ptr [edi],ecx647add edi,4648// }649// *650651pop ecx652653dec ecx654jnz ia_x_loop_2655656add esi,dword ptr [line]657add edi,dword ptr [ext]658659pop ecx660dec ecx661jnz ia_y_loop662663ia_end_y_loop:664}665#elif !defined(NO_ASM)666//printf("Load8bCI1\n");667long lTempX, lTempY, lHeight = (long) height;668intptr_t fake_eax, fake_edx;669asm volatile (670"1: \n" // ia_y_loop2671"mov %[c], %[tempy] \n"672673"mov %[wid_64], %%ecx \n"674"2: \n" // ia_x_loop2675"mov %[c], %[tempx] \n"676677"mov (%[src]), %%eax \n" // read all 4 pixels678"bswap %%eax \n"679"add $4, %[src] \n"680"mov %%eax, %%edx \n"681682// 1st dword output {683"shr $15, %%eax \n"684"and $0x1FE, %%eax \n"685"mov (%[pal],%[a]), %%cx \n"686"ror $8, %%cx \n"687"shl $16, %%ecx \n"688689"mov %%edx, %%eax \n"690"shr $23, %%eax \n"691"and $0x1FE, %%eax \n"692"mov (%[pal],%[a]), %%cx \n"693"ror $8, %%cx \n"694695"mov %%ecx, (%[dst]) \n"696"add $4, %[dst] \n"697// }698699// 2nd dword output {700"mov %%edx, %%eax \n"701"shl $1, %%eax \n"702"and $0x1FE, %%eax \n"703"mov (%[pal],%[a]), %%cx \n"704"ror $8, %%cx \n"705"shl $16, %%ecx \n"706707"shr $7, %%edx \n"708"and $0x1FE, %%edx \n"709"mov (%[pal],%[d]), %%cx \n"710"ror $8, %%cx \n"711712"mov %%ecx, (%[dst]) \n"713"add $4, %[dst] \n"714// }715716// * copy717"mov (%[src]), %%eax \n" // read all 4 pixels718"bswap %%eax \n"719"add $4, %[src] \n"720"mov %%eax, %%edx \n"721722// 1st dword output {723"shr $15, %%eax \n"724"and $0x1FE, %%eax \n"725"mov (%[pal],%[a]), %%cx \n"726"ror $8, %%cx \n"727"shl $16, %%ecx \n"728729"mov %%edx, %%eax \n"730"shr $23, %%eax \n"731"and $0x1FE, %%eax \n"732"mov (%[pal],%[a]), %%cx \n"733"ror $8, %%cx \n"734735"mov %%ecx, (%[dst]) \n"736"add $4, %[dst] \n"737// }738739// 2nd dword output {740"mov %%edx, %%eax \n"741"shl $1, %%eax \n"742"and $0x1FE, %%eax \n"743"mov (%[pal],%[a]), %%cx \n"744"ror $8, %%cx \n"745"shl $16, %%ecx \n"746747"shr $7, %%edx \n"748"and $0x1FE, %%edx \n"749"mov (%[pal],%[d]), %%cx \n"750"ror $8, %%cx \n"751752"mov %%ecx, (%[dst]) \n"753"add $4, %[dst] \n"754// }755// *756757"mov %[tempx], %[c] \n"758"dec %%ecx \n"759"jnz 2b \n" // ia_x_loop2760761"mov %[tempy], %[c] \n"762"dec %%ecx \n"763"jz 4f \n" // ia_end_y_loop2764"mov %[c], %[tempy] \n"765766"add %[line], %[src] \n"767"add %[ext], %[dst] \n"768769"mov %[wid_64], %%ecx \n"770"3: \n" // ia_x_loop_22771"mov %[c], %[tempx] \n"772773"mov 4(%[src]), %%eax \n" // read all 4 pixels774"bswap %%eax \n"775"mov %%eax, %%edx \n"776777// 1st dword output {778"shr $15, %%eax \n"779"and $0x1FE, %%eax \n"780"mov (%[pal],%[a]), %%cx \n"781"ror $8, %%cx \n"782"shl $16, %%ecx \n"783784"mov %%edx, %%eax \n"785"shr $23, %%eax \n"786"and $0x1FE, %%eax \n"787"mov (%[pal],%[a]), %%cx \n"788"ror $8, %%cx \n"789790"mov %%ecx, (%[dst]) \n"791"add $4, %[dst] \n"792// }793794// 2nd dword output {795"mov %%edx, %%eax \n"796"shl $1, %%eax \n"797"and $0x1FE, %%eax \n"798"mov (%[pal],%[a]), %%cx \n"799"ror $8, %%cx \n"800"shl $16, %%ecx \n"801802"shr $7, %%edx \n"803"and $0x1FE, %%edx \n"804"mov (%[pal],%[d]), %%cx \n"805"ror $8, %%cx \n"806807"mov %%ecx, (%[dst]) \n"808"add $4, %[dst] \n"809// }810811// * copy812"mov (%[src]), %%eax \n" // read all 4 pixels813"bswap %%eax \n"814"add $8, %[src] \n"815"mov %%eax, %%edx \n"816817// 1st dword output {818"shr $15, %%eax \n"819"and $0x1FE, %%eax \n"820"mov (%[pal],%[a]), %%cx \n"821"ror $8, %%cx \n"822"shl $16, %%ecx \n"823824"mov %%edx, %%eax \n"825"shr $23, %%eax \n"826"and $0x1FE, %%eax \n"827"mov (%[pal],%[a]), %%cx \n"828"ror $8, %%cx \n"829830"mov %%ecx, (%[dst]) \n"831"add $4, %[dst] \n"832// }833834// 2nd dword output {835"mov %%edx, %%eax \n"836"shl $1, %%eax \n"837"and $0x1FE, %%eax \n"838"mov (%[pal],%[a]), %%cx \n"839"ror $8, %%cx \n"840"shl $16, %%ecx \n"841842"shr $7, %%edx \n"843"and $0x1FE, %%edx \n"844"mov (%[pal],%[d]), %%cx \n"845"ror $8, %%cx \n"846847"mov %%ecx, (%[dst]) \n"848"add $4, %[dst] \n"849// }850// *851852"mov %[tempx], %[c] \n"853"dec %%ecx \n"854"jnz 3b \n" // ia_x_loop_22855856"add %[line], %[src] \n"857"add %[ext], %[dst] \n"858859"mov %[tempy], %[c] \n"860"dec %%ecx \n"861"jnz 1b \n" // ia_y_loop2862863"4: \n" // ia_end_y_loop2864: [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)865: [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)866: "memory", "cc"867);868#endif869return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;870}871872return 0;873}874875//****************************************************************876// Size: 1, Format: 3877//878// ** by Gugaman **879880DWORD Load8bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)881{882if (rdp.tlut_mode != 0)883return Load8bCI (dst, src, wid_64, height, line, real_width, tile);884885if (wid_64 < 1) wid_64 = 1;886if (height < 1) height = 1;887int ext = (real_width - (wid_64 << 3));888#if !defined(__GNUC__) && !defined(NO_ASM)889__asm {890mov esi,dword ptr [src]891mov edi,dword ptr [dst]892893mov ecx,dword ptr [height]894y_loop:895push ecx896897mov ecx,dword ptr [wid_64]898x_loop:899mov eax,dword ptr [esi] // read all 4 pixels900add esi,4901902xor ebx,ebx903mov edx,eax904shr eax,4//all alpha905and eax,0x0F0F0F0F906or ebx,eax907mov eax,edx//intensity908shl eax,4909and eax,0xF0F0F0F0910or ebx,eax911912mov dword ptr [edi],ebx // save dword913add edi,4914915mov eax,dword ptr [esi] // read all 4 pixels916add esi,4917918xor ebx,ebx919mov edx,eax920shr eax,4//all alpha921and eax,0x0F0F0F0F922or ebx,eax923mov eax,edx//intensity924shl eax,4925and eax,0xF0F0F0F0926or ebx,eax927928mov dword ptr [edi],ebx // save dword929add edi,4930// *931932dec ecx933jnz x_loop934935pop ecx936dec ecx937jz end_y_loop938push ecx939940add esi,dword ptr [line]941add edi,dword ptr [ext]942943mov ecx,dword ptr [wid_64]944x_loop_2:945mov eax,dword ptr [esi+4] // read both pixels946947xor ebx,ebx948mov edx,eax949shr eax,4//all alpha950and eax,0x0F0F0F0F951or ebx,eax952mov eax,edx//intensity953shl eax,4954and eax,0xF0F0F0F0955or ebx,eax956957mov dword ptr [edi],ebx //save dword958add edi,4959960mov eax,dword ptr [esi] // read both pixels961add esi,8962963xor ebx,ebx964mov edx,eax965shr eax,4//all alpha966and eax,0x0F0F0F0F967or ebx,eax968mov eax,edx//intensity969shl eax,4970and eax,0xF0F0F0F0971or ebx,eax972973mov dword ptr [edi],ebx //save dword974add edi,4975// *976977dec ecx978jnz x_loop_2979980add esi,dword ptr [line]981add edi,dword ptr [ext]982983pop ecx984dec ecx985jnz y_loop986987end_y_loop:988}989#elif !defined(NO_ASM)990//printf("Load8bIA\n");991int lTemp, lHeight = (int) height;992asm volatile (993"1: \n" // y_loop5994"mov %[wid_64], %%eax \n"995"mov %%eax, %[temp] \n"996"2: \n" // x_loop5997"mov (%[src]), %%eax \n" // read all 4 pixels998"add $4, %[src] \n"9991000"xor %%ecx, %%ecx \n"1001"mov %%eax, %%edx \n"1002"shr $4, %%eax \n"//all alpha1003"and $0x0F0F0F0F, %%eax \n"1004"or %%eax, %%ecx \n"1005"mov %%edx, %%eax \n"//intensity1006"shl $4, %%eax \n"1007"and $0xF0F0F0F0, %%eax \n"1008"or %%eax, %%ecx \n"10091010"mov %%ecx, (%[dst]) \n" // save dword1011"add $4, %[dst] \n"10121013"mov (%[src]), %%eax \n" // read all 4 pixels1014"add $4, %[src] \n"10151016"xor %%ecx, %%ecx \n"1017"mov %%eax, %%edx \n"1018"shr $4, %%eax \n"//all alpha1019"and $0x0F0F0F0F, %%eax \n"1020"or %%eax, %%ecx \n"1021"mov %%edx, %%eax \n"//intensity1022"shl $4, %%eax \n"1023"and $0xF0F0F0F0, %%eax \n"1024"or %%eax, %%ecx \n"10251026"mov %%ecx, (%[dst]) \n" // save dword1027"add $4, %[dst] \n"10281029"decl %[temp] \n"1030"jnz 2b \n" // x_loop510311032"decl %[height] \n"1033"jz 4f \n" // end_y_loop510341035"add %[line], %[src] \n"1036"add %[ext], %[dst] \n"10371038"mov %[wid_64], %%eax \n"1039"mov %%eax, %[temp] \n"1040"3: \n" // x_loop_251041"mov 4(%[src]), %%eax \n" // read both pixels10421043"xor %%ecx, %%ecx \n"1044"mov %%eax, %%edx \n"1045"shr $4, %%eax \n"//all alpha1046"and $0x0F0F0F0F, %%eax \n"1047"or %%eax, %%ecx \n"1048"mov %%edx, %%eax \n"//intensity1049"shl $4, %%eax \n"1050"and $0xF0F0F0F0, %%eax \n"1051"or %%eax, %%ecx \n"10521053"mov %%ecx, (%[dst]) \n" //save dword1054"add $4, %[dst] \n"10551056"mov (%[src]), %%eax \n" // read both pixels1057"add $8, %[src] \n"10581059"xor %%ecx, %%ecx \n"1060"mov %%eax, %%edx \n"1061"shr $4, %%eax \n"//all alpha1062"and $0x0F0F0F0F, %%eax \n"1063"or %%eax, %%ecx \n"1064"mov %%edx, %%eax \n"//intensity1065"shl $4, %%eax \n"1066"and $0xF0F0F0F0, %%eax \n"1067"or %%eax, %%ecx \n"10681069"mov %%ecx, (%[dst]) \n" //save dword1070"add $4, %[dst] \n"1071// *10721073"decl %[temp] \n"1074"jnz 3b \n" // x_loop_2510751076"add %[line], %[src] \n"1077"add %[ext], %[dst] \n"10781079"decl %[height] \n"1080"jnz 1b \n" // y_loop510811082"4: \n" // end_y_loop51083: [temp]"=m"(lTemp), [src] "+S"(src), [dst] "+D"(dst), [height] "+g"(lHeight)1084: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)1085: "memory", "cc", "eax", "edx", "ecx"1086);1087#endif1088return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;1089}10901091//****************************************************************1092// Size: 1, Format: 41093//1094// ** by Gugaman **10951096DWORD Load8bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)1097{1098if (rdp.tlut_mode != 0)1099return Load8bCI (dst, src, wid_64, height, line, real_width, tile);11001101if (wid_64 < 1) wid_64 = 1;1102if (height < 1) height = 1;1103int ext = (real_width - (wid_64 << 3));1104#if !defined(__GNUC__) && !defined(NO_ASM)1105__asm {1106mov esi,dword ptr [src]1107mov edi,dword ptr [dst]11081109mov ecx,dword ptr [height]1110y_loop:1111push ecx11121113mov ecx,dword ptr [wid_64]1114x_loop:1115mov eax,dword ptr [esi] // read all 4 pixels1116add esi,411171118mov dword ptr [edi],eax // save dword1119add edi,411201121mov eax,dword ptr [esi] // read all 4 pixels1122add esi,411231124mov dword ptr [edi],eax // save dword1125add edi,41126// *11271128dec ecx1129jnz x_loop11301131pop ecx1132dec ecx1133jz end_y_loop1134push ecx11351136add esi,dword ptr [line]1137add edi,dword ptr [ext]11381139mov ecx,dword ptr [wid_64]1140x_loop_2:1141mov eax,dword ptr [esi+4] // read both pixels11421143mov dword ptr [edi],eax //save dword1144add edi,411451146mov eax,dword ptr [esi] // read both pixels1147add esi,811481149mov dword ptr [edi],eax //save dword1150add edi,41151// *11521153dec ecx1154jnz x_loop_211551156add esi,dword ptr [line]1157add edi,dword ptr [ext]11581159pop ecx1160dec ecx1161jnz y_loop11621163end_y_loop:1164}1165#elif !defined(NO_ASM)1166//printf("Load8bI\n");1167int lTemp, lHeight = (int) height;1168asm volatile (1169"1: \n" // y_loop61170"mov %[wid_64], %%eax \n"1171"mov %%eax, %[temp] \n"1172"2: \n" // x_loop61173"mov (%[src]), %%eax \n" // read all 4 pixels1174"add $4, %[src] \n"11751176"mov %%eax, (%[dst]) \n" // save dword1177"add $4, %[dst] \n"11781179"mov (%[src]), %%eax \n" // read all 4 pixels1180"add $4, %[src] \n"11811182"mov %%eax, (%[dst]) \n" // save dword1183"add $4, %[dst] \n"1184// *11851186"decl %[temp] \n"1187"jnz 2b \n" // x_loop611881189"decl %[height] \n"1190"jz 4f \n" // end_y_loop611911192"add %[line], %[src] \n"1193"add %[ext], %[dst] \n"11941195"mov %[wid_64], %%eax \n"1196"mov %%eax, %[temp] \n"1197"3: \n" // x_loop_261198"mov 4(%[src]), %%eax \n" // read both pixels11991200"mov %%eax, (%[dst]) \n" //save dword1201"add $4, %[dst] \n"12021203"mov (%[src]), %%eax \n" // read both pixels1204"add $8, %[src] \n"12051206"mov %%eax, (%[dst]) \n" //save dword1207"add $4, %[dst] \n"12081209"decl %[temp] \n"1210"jnz 3b \n" // x_loop_2612111212"add %[line], %[src] \n"1213"add %[ext], %[dst] \n"12141215"decl %[height] \n"1216"jnz 1b \n" // y_loop612171218"4: \n" // end_y_loop61219: [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [height]"+g"(lHeight)1220: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)1221: "memory", "cc", "eax", "edx"1222);1223#endif1224return /*(0 << 16) | */GR_TEXFMT_ALPHA_8;1225}1226122712281229