Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/3dmath.cpp
2 views
/*1* Glide64 - Glide video plugin for Nintendo 64 emulators.2* Copyright (c) 2002 Dave20013* Copyright (c) 2008 Günther <[email protected]>4*5* This program is free software; you can redistribute it and/or modify6* it under the terms of the GNU General Public License as published by7* the Free Software Foundation; either version 2 of the License, or8* any later version.9*10* This program is distributed in the hope that it will be useful,11* but WITHOUT ANY WARRANTY; without even the implied warranty of12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the13* GNU General Public License for more details.14*15* You should have received a copy of the GNU General Public16* Licence along with this program; if not, write to the Free17* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,18* Boston, MA 02110-1301, USA19*/2021//****************************************************************22//23// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)24// Project started on December 29th, 200125//26// To modify Glide64:27// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.28// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.29//30// Official Glide64 development channel: #Glide64 on EFnet31//32// Original author: Dave2001 ([email protected])33// Other authors: Gonetz, Gugaman34//35//****************************************************************3637#define M64P_PLUGIN_PROTOTYPES 138#include "m64p_types.h"39#include "m64p_plugin.h"40#include "m64p_config.h"41#include "m64p_vidext.h"42#include "3dmath.h"43#if !defined(NO_ASM)44#include <xmmintrin.h>45#endif4647void calc_light (VERTEX *v)48{49float light_intensity = 0.0f;50register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};51for (DWORD l=0; l<rdp.num_lights; l++)52{53light_intensity = DotProduct (rdp.light_vector[l], v->vec);5455if (light_intensity > 0.0f)56{57color[0] += rdp.light[l].r * light_intensity;58color[1] += rdp.light[l].g * light_intensity;59color[2] += rdp.light[l].b * light_intensity;60}61}6263if (color[0] > 1.0f) color[0] = 1.0f;64if (color[1] > 1.0f) color[1] = 1.0f;65if (color[2] > 1.0f) color[2] = 1.0f;6667v->r = (BYTE)(color[0]*255.0f);68v->g = (BYTE)(color[1]*255.0f);69v->b = (BYTE)(color[2]*255.0f);70}7172__inline void TransformVector (float *src, float *dst, float mat[4][4])73{74dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];75dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];76dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];77}7879//*80void calc_linear (VERTEX *v)81{82float vec[3];8384TransformVector (v->vec, vec, rdp.model);85// TransformVector (v->vec, vec, rdp.combined);86NormalizeVector (vec);87float x, y;88if (!rdp.use_lookat)89{90x = vec[0];91y = vec[1];92}93else94{95x = DotProduct (rdp.lookat[0], vec);96y = DotProduct (rdp.lookat[1], vec);97}98if (rdp.cur_cache[0])99{100// scale >> 6 is size to map to101v->ou = (acosf(x)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);102v->ov = (acosf(y)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);103}104}105//*/106107/*108void calc_linear (VERTEX *v)109{110float vec[3];111112TransformVector (v->vec, vec, rdp.combined);113NormalizeVector (vec);114115if (rdp.cur_cache[0])116{117// scale >> 6 is size to map to118v->ou = (acosf(vec[0])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);119v->ov = (acosf(vec[1])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);120}121}122//*/123124void calc_sphere (VERTEX *v)125{126//RDP("calc_sphere\n");127float vec[3];128int s_scale, t_scale;129if (settings.chopper)130{131s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);132t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);133}134else135{136s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;137t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;138}139TransformVector (v->vec, vec, rdp.model);140// TransformVector (v->vec, vec, rdp.combined);141NormalizeVector (vec);142float x = DotProduct (rdp.lookat[0], vec);143float y = DotProduct (rdp.lookat[1], vec);144v->ou = (x * 0.5f + 0.5f) * s_scale;145v->ov = (y * 0.5f + 0.5f) * t_scale;146}147148void __stdcall MulMatricesNOSSE(float m1[4][4],float m2[4][4],float r[4][4])149{150151/*for (int i=0; i<4; i++)152{153for (int j=0; j<4; j++)154{155r[i][j] =156m1[i][0] * m2[0][j] +157m1[i][1] * m2[1][j] +158m1[i][2] * m2[2][j] +159m1[i][3] * m2[3][j];160}161}*/162r[0][0] = m1[0][0]*m2[0][0] + m1[0][1]*m2[1][0] + m1[0][2]*m2[2][0] + m1[0][3]*m2[3][0];163r[0][1] = m1[0][0]*m2[0][1] + m1[0][1]*m2[1][1] + m1[0][2]*m2[2][1] + m1[0][3]*m2[3][1];164r[0][2] = m1[0][0]*m2[0][2] + m1[0][1]*m2[1][2] + m1[0][2]*m2[2][2] + m1[0][3]*m2[3][2];165r[0][3] = m1[0][0]*m2[0][3] + m1[0][1]*m2[1][3] + m1[0][2]*m2[2][3] + m1[0][3]*m2[3][3];166167r[1][0] = m1[1][0]*m2[0][0] + m1[1][1]*m2[1][0] + m1[1][2]*m2[2][0] + m1[1][3]*m2[3][0];168r[1][1] = m1[1][0]*m2[0][1] + m1[1][1]*m2[1][1] + m1[1][2]*m2[2][1] + m1[1][3]*m2[3][1];169r[1][2] = m1[1][0]*m2[0][2] + m1[1][1]*m2[1][2] + m1[1][2]*m2[2][2] + m1[1][3]*m2[3][2];170r[1][3] = m1[1][0]*m2[0][3] + m1[1][1]*m2[1][3] + m1[1][2]*m2[2][3] + m1[1][3]*m2[3][3];171172r[2][0] = m1[2][0]*m2[0][0] + m1[2][1]*m2[1][0] + m1[2][2]*m2[2][0] + m1[2][3]*m2[3][0];173r[2][1] = m1[2][0]*m2[0][1] + m1[2][1]*m2[1][1] + m1[2][2]*m2[2][1] + m1[2][3]*m2[3][1];174r[2][2] = m1[2][0]*m2[0][2] + m1[2][1]*m2[1][2] + m1[2][2]*m2[2][2] + m1[2][3]*m2[3][2];175r[2][3] = m1[2][0]*m2[0][3] + m1[2][1]*m2[1][3] + m1[2][2]*m2[2][3] + m1[2][3]*m2[3][3];176177r[3][0] = m1[3][0]*m2[0][0] + m1[3][1]*m2[1][0] + m1[3][2]*m2[2][0] + m1[3][3]*m2[3][0];178r[3][1] = m1[3][0]*m2[0][1] + m1[3][1]*m2[1][1] + m1[3][2]*m2[2][1] + m1[3][3]*m2[3][1];179r[3][2] = m1[3][0]*m2[0][2] + m1[3][1]*m2[1][2] + m1[3][2]*m2[2][2] + m1[3][3]*m2[3][2];180r[3][3] = m1[3][0]*m2[0][3] + m1[3][1]*m2[1][3] + m1[3][2]*m2[2][3] + m1[3][3]*m2[3][3];181}182183void __stdcall MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])184{185#if defined(__GNUC__) && !defined(NO_ASM)186/* [row][col]*/187typedef float v4sf __attribute__ ((vector_size (16)));188v4sf row0 = __builtin_ia32_loadups(m2[0]);189v4sf row1 = __builtin_ia32_loadups(m2[1]);190v4sf row2 = __builtin_ia32_loadups(m2[2]);191v4sf row3 = __builtin_ia32_loadups(m2[3]);192193for (int i = 0; i < 4; ++i)194{195v4sf leftrow = __builtin_ia32_loadups(m1[i]);196197// Fill tmp with four copies of leftrow[0]198v4sf tmp = leftrow;199tmp = _mm_shuffle_ps (tmp, tmp, 0);200// Calculate the four first summands201v4sf destrow = tmp * row0;202203// Fill tmp with four copies of leftrow[1]204tmp = leftrow;205tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));206destrow += tmp * row1;207208// Fill tmp with four copies of leftrow[2]209tmp = leftrow;210tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));211destrow += tmp * row2;212213// Fill tmp with four copies of leftrow[3]214tmp = leftrow;215tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));216destrow += tmp * row3;217218__builtin_ia32_storeups(r[i], destrow);219}220#elif !defined(NO_ASM)221__asm222{223mov eax, dword ptr [r]224mov ecx, dword ptr [m1]225mov edx, dword ptr [m2]226227movaps xmm0,[edx]228movaps xmm1,[edx+16]229movaps xmm2,[edx+32]230movaps xmm3,[edx+48]231232// r[0][0],r[0][1],r[0][2],r[0][3]233234movaps xmm4,xmmword ptr[ecx]235movaps xmm5,xmm4236movaps xmm6,xmm4237movaps xmm7,xmm4238239shufps xmm4,xmm4,00000000b240shufps xmm5,xmm5,01010101b241shufps xmm6,xmm6,10101010b242shufps xmm7,xmm7,11111111b243244mulps xmm4,xmm0245mulps xmm5,xmm1246mulps xmm6,xmm2247mulps xmm7,xmm3248249addps xmm4,xmm5250addps xmm4,xmm6251addps xmm4,xmm7252253movaps xmmword ptr[eax],xmm4254255// r[1][0],r[1][1],r[1][2],r[1][3]256257movaps xmm4,xmmword ptr[ecx+16]258movaps xmm5,xmm4259movaps xmm6,xmm4260movaps xmm7,xmm4261262shufps xmm4,xmm4,00000000b263shufps xmm5,xmm5,01010101b264shufps xmm6,xmm6,10101010b265shufps xmm7,xmm7,11111111b266267mulps xmm4,xmm0268mulps xmm5,xmm1269mulps xmm6,xmm2270mulps xmm7,xmm3271272addps xmm4,xmm5273addps xmm4,xmm6274addps xmm4,xmm7275276movaps xmmword ptr[eax+16],xmm4277278279// r[2][0],r[2][1],r[2][2],r[2][3]280281movaps xmm4,xmmword ptr[ecx+32]282movaps xmm5,xmm4283movaps xmm6,xmm4284movaps xmm7,xmm4285286shufps xmm4,xmm4,00000000b287shufps xmm5,xmm5,01010101b288shufps xmm6,xmm6,10101010b289shufps xmm7,xmm7,11111111b290291mulps xmm4,xmm0292mulps xmm5,xmm1293mulps xmm6,xmm2294mulps xmm7,xmm3295296addps xmm4,xmm5297addps xmm4,xmm6298addps xmm4,xmm7299300movaps xmmword ptr[eax+32],xmm4301302// r[3][0],r[3][1],r[3][2],r[3][3]303304movaps xmm4,xmmword ptr[ecx+48]305movaps xmm5,xmm4306movaps xmm6,xmm4307movaps xmm7,xmm4308309shufps xmm4,xmm4,00000000b310shufps xmm5,xmm5,01010101b311shufps xmm6,xmm6,10101010b312shufps xmm7,xmm7,11111111b313314mulps xmm4,xmm0315mulps xmm5,xmm1316mulps xmm6,xmm2317mulps xmm7,xmm3318319addps xmm4,xmm5320addps xmm4,xmm6321addps xmm4,xmm7322323movaps xmmword ptr[eax+48],xmm4324}325#endif // _WIN32326}327328MULMATRIX MulMatrices = MulMatricesNOSSE;329330void math_init()331{332BOOL IsSSE = FALSE;333#if defined(__GNUC__) && !defined(NO_ASM)334int edx, eax;335#if defined(__x86_64__)336asm volatile(" cpuid; "337: "=a"(eax), "=d"(edx)338: "0"(1)339: "rbx", "rcx"340);341#else342asm volatile(" push %%ebx; "343" push %%ecx; "344" cpuid; "345" pop %%ecx; "346" pop %%ebx; "347: "=a"(eax), "=d"(edx)348: "0"(1)349:350);351#endif352// Check for SSE353if (edx & (1 << 25))354IsSSE = TRUE;355#elif !defined(NO_ASM)356DWORD dwEdx;357__try358{359__asm360{361mov eax,1362cpuid363mov dwEdx,edx364}365}366__except(EXCEPTION_EXECUTE_HANDLER)367{368return;369}370371if (dwEdx & (1<<25))372{373if (dwEdx & (1<<24))374{375__try376{377__asm xorps xmm0, xmm0378IsSSE = TRUE;379}380__except(EXCEPTION_EXECUTE_HANDLER)381{382return;383}384}385}386#endif // _WIN32387if (IsSSE)388{389MulMatrices = MulMatricesSSE;390WriteLog(M64MSG_INFO, "SSE detected.\n");391}392}393394395396