Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/TexLoad32b.h
2 views
/*1* Glide64 - Glide video plugin for Nintendo 64 emulators.2* Copyright (c) 2002 Dave20013* Copyright (c) 2008 Günther <[email protected]>4*5* This program is free software; you can redistribute it and/or modify6* it under the terms of the GNU General Public License as published by7* the Free Software Foundation; either version 2 of the License, or8* any later version.9*10* This program is distributed in the hope that it will be useful,11* but WITHOUT ANY WARRANTY; without even the implied warranty of12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the13* GNU General Public License for more details.14*15* You should have received a copy of the GNU General Public16* License along with this program; if not, write to the Free17* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,18* Boston, MA 02110-1301, USA19*/2021//****************************************************************22//23// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)24// Project started on December 29th, 200125//26// To modify Glide64:27// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.28// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.29//30// Official Glide64 development channel: #Glide64 on EFnet31//32// Original author: Dave2001 ([email protected])33// Other authors: Gonetz, Gugaman34//35//****************************************************************3637//****************************************************************38// Size: 2, Format: 03940DWORD Load32bRGBA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)41{42if (wid_64 < 1) wid_64 = 1;43if (height < 1) height = 1;44int ext = (real_width - (wid_64 << 1)) << 1;4546wid_64 >>= 1; // re-shift it, load twice as many quadwords47#if !defined(__GNUC__) && !defined(NO_ASM)48__asm {49mov esi,dword ptr [src]50mov edi,dword ptr [dst]5152mov ecx,dword ptr [height]53y_loop:54push ecx5556mov ecx,dword ptr [wid_64]57x_loop:58mov eax,dword ptr [esi] // read first pixel59add esi,460bswap eax61mov edx,eax6263xor ebx,ebx64shl eax,8 // 0x000000F0 -> 0x0000F000 (a)65and eax,0x0000F00066or ebx,eax67shr edx,12 // 0x0000F000 -> 0x0000000F (b)68mov eax,edx69and eax,0x0000000F70or ebx,eax71shr edx,4 // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)72mov eax,edx73and eax,0x000000F074or ebx,eax75shr edx,4 // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)76and edx,0x00000F0077or ebx,edx7879mov eax,dword ptr [esi] // read second pixel80add esi,481bswap eax82mov edx,eax8384shl eax,24 // 0x000000F0 -> 0xF0000000 (a)85and eax,0xF000000086or ebx,eax87// 0x00F00000 -> 0x00F00000 (g)88mov eax,edx89and eax,0x00F0000090or ebx,eax91rol edx,4 // 0x0000F000 (did not shift) -> 0x000F0000 (b)92mov eax,edx93and eax,0x000F000094or ebx,eax95shl edx,24 // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)96and edx,0x0F00000097or ebx,edx9899mov dword ptr [edi],ebx100add edi,4101102// * copy103mov eax,dword ptr [esi] // read first pixel104add esi,4105bswap eax106mov edx,eax107108xor ebx,ebx109shl eax,8 // 0x000000F0 -> 0x0000F000 (a)110and eax,0x0000F000111or ebx,eax112shr edx,12 // 0x0000F000 -> 0x0000000F (b)113mov eax,edx114and eax,0x0000000F115or ebx,eax116shr edx,4 // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)117mov eax,edx118and eax,0x000000F0119or ebx,eax120shr edx,4 // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)121and edx,0x00000F00122or ebx,edx123124mov eax,dword ptr [esi] // read second pixel125add esi,4126bswap eax127mov edx,eax128129shl eax,24 // 0x000000F0 -> 0xF0000000 (a)130and eax,0xF0000000131or ebx,eax132// 0x00F00000 -> 0x00F00000 (g)133mov eax,edx134and eax,0x00F00000135or ebx,eax136rol edx,4 // 0x0000F000 (did not shift) -> 0x000F0000 (b)137mov eax,edx138and eax,0x000F0000139or ebx,eax140shl edx,24 // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)141and edx,0x0F000000142or ebx,edx143144mov dword ptr [edi],ebx145add edi,4146// *147148dec ecx149jnz x_loop150151pop ecx152dec ecx153jz end_y_loop154push ecx155156add esi,dword ptr [line]157add edi,dword ptr [ext]158159mov ecx,dword ptr [wid_64]160x_loop_2:161mov eax,dword ptr [esi+8] // read first pixel162bswap eax163mov edx,eax164165xor ebx,ebx166shl eax,8 // 0x000000F0 -> 0x0000F000 (a)167and eax,0x0000F000168or ebx,eax169shr edx,12 // 0x0000F000 -> 0x0000000F (b)170mov eax,edx171and eax,0x0000000F172or ebx,eax173shr edx,4 // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)174mov eax,edx175and eax,0x000000F0176or ebx,eax177shr edx,4 // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)178and edx,0x00000F00179or ebx,edx180181mov eax,dword ptr [esi+12] // read second pixel182bswap eax183mov edx,eax184185shl eax,24 // 0x000000F0 -> 0xF0000000 (a)186and eax,0xF0000000187or ebx,eax188// 0x00F00000 -> 0x00F00000 (g)189mov eax,edx190and eax,0x00F00000191or ebx,eax192rol edx,4 // 0x0000F000 (did not shift) -> 0x000F0000 (b)193mov eax,edx194and eax,0x000F0000195or ebx,eax196shl edx,24 // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)197and edx,0x0F000000198or ebx,edx199200mov dword ptr [edi],ebx201add edi,4202203// * copy204mov eax,dword ptr [esi+0] // read first pixel205bswap eax206mov edx,eax207208xor ebx,ebx209shl eax,8 // 0x000000F0 -> 0x0000F000 (a)210and eax,0x0000F000211or ebx,eax212shr edx,12 // 0x0000F000 -> 0x0000000F (b)213mov eax,edx214and eax,0x0000000F215or ebx,eax216shr edx,4 // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)217mov eax,edx218and eax,0x000000F0219or ebx,eax220shr edx,4 // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)221and edx,0x00000F00222or ebx,edx223224mov eax,dword ptr [esi+4] // read second pixel225add esi,16226bswap eax227mov edx,eax228229shl eax,24 // 0x000000F0 -> 0xF0000000 (a)230and eax,0xF0000000231or ebx,eax232// 0x00F00000 -> 0x00F00000 (g)233mov eax,edx234and eax,0x00F00000235or ebx,eax236rol edx,4 // 0x0000F000 (did not shift) -> 0x000F0000 (b)237mov eax,edx238and eax,0x000F0000239or ebx,eax240shl edx,24 // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)241and edx,0x0F000000242or ebx,edx243244mov dword ptr [edi],ebx245add edi,4246// *247248dec ecx249jnz x_loop_2250251add esi,dword ptr [line]252add edi,dword ptr [ext]253254pop ecx255dec ecx256jnz y_loop257258end_y_loop:259}260#elif !defined(NO_ASM)261//printf("Load32bRGBA\n");262int lTemp, lHeight = (int) height;263asm volatile (264"y_loop9: \n"265266"mov %[wid_64], %%eax \n"267"mov %%eax, %[temp] \n"268"x_loop9: \n"269"mov (%[src]), %%eax \n" // read first pixel270"add $4, %[src] \n"271"bswap %%eax \n"272"mov %%eax, %%edx \n"273274"xor %%ecx, %%ecx \n"275"shl $8, %%eax \n" // 0x000000F0 -> 0x0000F000 (a)276"and $0x0000F000, %%eax \n"277"or %%eax, %%ecx \n"278"shr $12, %%edx \n" // 0x0000F000 -> 0x0000000F (b)279"mov %%edx, %%eax \n"280"and $0x0000000F, %%eax \n"281"or %%eax, %%ecx \n"282"shr $4, %%edx \n" // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)283"mov %%edx, %%eax \n"284"and $0x000000F0, %%eax \n"285"or %%eax, %%ecx \n"286"shr $4, %%edx \n" // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)287"and $0x00000F00, %%edx \n"288"or %%edx, %%ecx \n"289290"mov (%[src]), %%eax \n" // read second pixel291"add $4, %[src] \n"292"bswap %%eax \n"293"mov %%eax, %%edx \n"294295"shl $24, %%eax \n" // 0x000000F0 -> 0xF0000000 (a)296"and $0xF0000000, %%eax \n"297"or %%eax, %%ecx \n" // 0x00F00000 -> 0x00F00000 (g)298"mov %%edx, %%eax \n"299"and $0x00F00000, %%eax \n"300"or %%eax, %%ecx \n"301"rol $4, %%edx \n" // 0x0000F000 (did not shift) -> 0x000F0000 (b)302"mov %%edx, %%eax \n"303"and $0x000F0000, %%eax \n"304"or %%eax, %%ecx \n"305"shl $24, %%edx \n" // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)306"and $0x0F000000, %%edx \n"307"or %%edx, %%ecx \n"308309"mov %%ecx, (%[dst]) \n"310"add $4, %[dst] \n"311312// * copy313"mov (%[src]), %%eax \n" // read first pixel314"add $4, %[src] \n"315"bswap %%eax \n"316"mov %%eax, %%edx \n"317318"xor %%ecx, %%ecx \n"319"shl $8, %%eax \n" // 0x000000F0 -> 0x0000F000 (a)320"and $0x0000F000, %%eax \n"321"or %%eax, %%ecx \n"322"shr $12, %%edx \n" // 0x0000F000 -> 0x0000000F (b)323"mov %%edx, %%eax \n"324"and $0x0000000F, %%eax \n"325"or %%eax, %%ecx \n"326"shr $4, %%edx \n" // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)327"mov %%edx, %%eax \n"328"and $0x000000F0, %%eax \n"329"or %%eax, %%ecx \n"330"shr $4, %%edx \n" // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)331"and $0x00000F00, %%edx \n"332"or %%edx, %%ecx \n"333334"mov (%[src]), %%eax \n" // read second pixel335"add $4, %[src] \n"336"bswap %%eax \n"337"mov %%eax, %%edx \n"338339"shl $24, %%eax \n" // 0x000000F0 -> 0xF0000000 (a)340"and $0xF0000000, %%eax \n"341"or %%eax, %%ecx \n" // 0x00F00000 -> 0x00F00000 (g)342"mov %%edx, %%eax \n"343"and $0x00F00000, %%eax \n"344"or %%eax, %%ecx \n"345"rol $4, %%edx \n" // 0x0000F000 (did not shift) -> 0x000F0000 (b)346"mov %%edx, %%eax \n"347"and $0x000F0000, %%eax \n"348"or %%eax, %%ecx \n"349"shl $24, %%edx \n" // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)350"and $0x0F000000, %%edx \n"351"or %%edx, %%ecx \n"352353"mov %%ecx, (%[dst]) \n"354"add $4, %[dst] \n"355// *356357"decl %[temp] \n"358"jnz x_loop9 \n"359360"decl %[height] \n"361"jz end_y_loop9 \n"362363"add %[line], %[src] \n"364"add %[ext], %[dst] \n"365366"mov %[wid_64], %%eax \n"367"mov %%eax, %[temp] \n"368"x_loop_29: \n"369"mov 8(%[src]), %%eax \n" // read first pixel370"bswap %%eax \n"371"mov %%eax, %%edx \n"372373"xor %%ecx, %%ecx \n"374"shl $8, %%eax \n" // 0x000000F0 -> 0x0000F000 (a)375"and $0x0000F000, %%eax \n"376"or %%eax, %%ecx \n"377"shr $12, %%edx \n" // 0x0000F000 -> 0x0000000F (b)378"mov %%edx, %%eax \n"379"and $0x0000000F, %%eax \n"380"or %%eax, %%ecx \n"381"shr $4, %%edx \n" // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)382"mov %%edx, %%eax \n"383"and $0x000000F0, %%eax \n"384"or %%eax, %%ecx \n"385"shr $4, %%edx \n" // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)386"and $0x00000F00, %%edx \n"387"or %%edx, %%ecx \n"388389"mov 12(%[src]), %%eax \n" // read second pixel390"bswap %%eax \n"391"mov %%eax, %%edx \n"392393"shl $24, %%eax \n" // 0x000000F0 -> 0xF0000000 (a)394"and $0xF0000000, %%eax \n"395"or %%eax, %%ecx \n" // 0x00F00000 -> 0x00F00000 (g)396"mov %%edx, %%eax \n"397"and $0x00F00000, %%eax \n"398"or %%eax, %%ecx \n"399"rol $4, %%edx \n" // 0x0000F000 (did not shift) -> 0x000F0000 (b)400"mov %%edx, %%eax \n"401"and $0x000F0000, %%eax \n"402"or %%eax, %%ecx \n"403"shl $24, %%edx \n" // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)404"and $0x0F000000, %%edx \n"405"or %%edx, %%ecx \n"406407"mov %%ecx, (%[dst]) \n"408"add $4, %[dst] \n"409410// * copy411"mov (%[src]), %%eax \n" // read first pixel412"bswap %%eax \n"413"mov %%eax, %%edx \n"414415"xor %%ecx, %%ecx \n"416"shl $8, %%eax \n" // 0x000000F0 -> 0x0000F000 (a)417"and $0x0000F000, %%eax \n"418"or %%eax, %%ecx \n"419"shr $12, %%edx \n" // 0x0000F000 -> 0x0000000F (b)420"mov %%edx, %%eax \n"421"and $0x0000000F, %%eax \n"422"or %%eax, %%ecx \n"423"shr $4, %%edx \n" // 0x00F00000 went to 0x00000F00 -> 0x000000F0 (g)424"mov %%edx, %%eax \n"425"and $0x000000F0, %%eax \n"426"or %%eax, %%ecx \n"427"shr $4, %%edx \n" // 0xF0000000 went to 0x000F0000 went to 0x0000F000 -> 0x00000F00 (r)428"and $0x00000F00, %%edx \n"429"or %%edx, %%ecx \n"430431"mov 4(%[src]), %%eax \n" // read second pixel432"add $16, %[src] \n"433"bswap %%eax \n"434"mov %%eax, %%edx \n"435436"shl $24, %%eax \n" // 0x000000F0 -> 0xF0000000 (a)437"and $0xF0000000, %%eax \n"438"or %%eax, %%ecx \n" // 0x00F00000 -> 0x00F00000 (g)439"mov %%edx, %%eax \n"440"and $0x00F00000, %%eax \n"441"or %%eax, %%ecx \n"442"rol $4, %%edx \n" // 0x0000F000 (did not shift) -> 0x000F0000 (b)443"mov %%edx, %%eax \n"444"and $0x000F0000, %%eax \n"445"or %%eax, %%ecx \n"446"shl $24, %%edx \n" // 0xF0000000 went to 0x0000000F -> 0x0F000000 (r)447"and $0x0F000000, %%edx \n"448"or %%edx, %%ecx \n"449450"mov %%ecx, (%[dst]) \n"451"add $4, %[dst] \n"452// *453454"decl %[temp] \n"455"jnz x_loop_29 \n"456457"add %[line], %[src] \n"458"add %[ext], %[dst] \n"459460"decl %[height] \n"461"jnz y_loop9 \n"462463"end_y_loop9: \n"464: [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [height]"+g"(lHeight)465: [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)466: "memory", "cc", "ecx", "eax", "edx"467);468#endif469return (1 << 16) | GR_TEXFMT_ARGB_4444;470}471472473474