Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/Glide64/3dmath.cpp
2 views
/*1* Glide64 - Glide video plugin for Nintendo 64 emulators.2* Copyright (c) 2002 Dave20013* Copyright (c) 2003-2009 Sergey 'Gonetz' Lipski4*5* This program is free software; you can redistribute it and/or modify6* it under the terms of the GNU General Public License as published by7* the Free Software Foundation; either version 2 of the License, or8* any later version.9*10* This program is distributed in the hope that it will be useful,11* but WITHOUT ANY WARRANTY; without even the implied warranty of12* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the13* GNU General Public License for more details.14*15* You should have received a copy of the GNU General Public License16* along with this program; if not, write to the Free Software17* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA18*/1920//****************************************************************21//22// Glide64 - Glide Plugin for Nintendo 64 emulators23// Project started on December 29th, 200124//25// Authors:26// Dave2001, original author, founded the project in 2001, left it in 200227// Gugaman, joined the project in 2002, left it in 200228// Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 200229// Hiroshi 'KoolSmoky' Morii, joined the project in 200730//31//****************************************************************32//33// To modify Glide64:34// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.35// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.36//37//****************************************************************3839#include "Gfx_1.3.h"40extern "C" {41#ifndef NOSSE42#include <xmmintrin.h>43#endif44}4546#include <math.h>47#include "3dmath.h"4849void calc_light (VERTEX *v)50{51float light_intensity = 0.0f;52register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};53for (wxUint32 l=0; l<rdp.num_lights; l++)54{55light_intensity = DotProduct (rdp.light_vector[l], v->vec);5657if (light_intensity > 0.0f)58{59color[0] += rdp.light[l].r * light_intensity;60color[1] += rdp.light[l].g * light_intensity;61color[2] += rdp.light[l].b * light_intensity;62}63}6465if (color[0] > 1.0f) color[0] = 1.0f;66if (color[1] > 1.0f) color[1] = 1.0f;67if (color[2] > 1.0f) color[2] = 1.0f;6869v->r = (wxUint8)(color[0]*255.0f);70v->g = (wxUint8)(color[1]*255.0f);71v->b = (wxUint8)(color[2]*255.0f);72}7374//*75void calc_linear (VERTEX *v)76{77if (settings.force_calc_sphere)78{79calc_sphere(v);80return;81}82DECLAREALIGN16VAR(vec[3]);8384TransformVector (v->vec, vec, rdp.model);85// TransformVector (v->vec, vec, rdp.combined);86NormalizeVector (vec);87float x, y;88if (!rdp.use_lookat)89{90x = vec[0];91y = vec[1];92}93else94{95x = DotProduct (rdp.lookat[0], vec);96y = DotProduct (rdp.lookat[1], vec);97}9899if (x > 1.0f)100x = 1.0f;101else if (x < -1.0f)102x = -1.0f;103if (y > 1.0f)104y = 1.0f;105else if (y < -1.0f)106y = -1.0f;107108if (rdp.cur_cache[0])109{110// scale >> 6 is size to map to111v->ou = (acosf(x)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);112v->ov = (acosf(y)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);113}114v->uv_scaled = 1;115#ifdef EXTREME_LOGGING116FRDP ("calc linear u: %f, v: %f\n", v->ou, v->ov);117#endif118}119120void calc_sphere (VERTEX *v)121{122// LRDP("calc_sphere\n");123DECLAREALIGN16VAR(vec[3]);124int s_scale, t_scale;125if (settings.hacks&hack_Chopper)126{127s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);128t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);129}130else131{132s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;133t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;134}135TransformVector (v->vec, vec, rdp.model);136// TransformVector (v->vec, vec, rdp.combined);137NormalizeVector (vec);138float x, y;139if (!rdp.use_lookat)140{141x = vec[0];142y = vec[1];143}144else145{146x = DotProduct (rdp.lookat[0], vec);147y = DotProduct (rdp.lookat[1], vec);148}149v->ou = (x * 0.5f + 0.5f) * s_scale;150v->ov = (y * 0.5f + 0.5f) * t_scale;151v->uv_scaled = 1;152#ifdef EXTREME_LOGGING153FRDP ("calc sphere u: %f, v: %f\n", v->ou, v->ov);154#endif155}156157float DotProductC(register float *v1, register float *v2)158{159register float result;160result = v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];161return(result);162}163164void NormalizeVectorC(float *v)165{166register float len;167len = sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);168if (len > 0.0f)169{170v[0] /= len;171v[1] /= len;172v[2] /= len;173}174}175176void TransformVectorC(float *src, float *dst, float mat[4][4])177{178dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];179dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];180dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];181}182183void InverseTransformVectorC (float *src, float *dst, float mat[4][4])184{185dst[0] = mat[0][0]*src[0] + mat[0][1]*src[1] + mat[0][2]*src[2];186dst[1] = mat[1][0]*src[0] + mat[1][1]*src[1] + mat[1][2]*src[2];187dst[2] = mat[2][0]*src[0] + mat[2][1]*src[1] + mat[2][2]*src[2];188}189190void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4])191{192for (int i=0; i<4; i++)193{194for (int j=0; j<4; j++)195{196r[i][j] = m1[i][0] * m2[0][j] +197m1[i][1] * m2[1][j] +198m1[i][2] * m2[2][j] +199m1[i][3] * m2[3][j];200}201}202}203204// 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication205// and 3DNOW! 4x4 4x4 matrix multiplication206// 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible207// This will need fixing.208MULMATRIX MulMatrices = MulMatricesC;209TRANSFORMVECTOR TransformVector = TransformVectorC;210TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;211DOTPRODUCT DotProduct = DotProductC;212NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;213214void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])215{216#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)217/* [row][col]*/218typedef float v4sf __attribute__ ((vector_size (16)));219v4sf row0 = _mm_loadu_ps(m2[0]);220v4sf row1 = _mm_loadu_ps(m2[1]);221v4sf row2 = _mm_loadu_ps(m2[2]);222v4sf row3 = _mm_loadu_ps(m2[3]);223224for (int i = 0; i < 4; ++i)225{226v4sf leftrow = _mm_loadu_ps(m1[i]);227228// Fill tmp with four copies of leftrow[0]229v4sf tmp = leftrow;230tmp = _mm_shuffle_ps (tmp, tmp, 0);231// Calculate the four first summands232v4sf destrow = tmp * row0;233234// Fill tmp with four copies of leftrow[1]235tmp = leftrow;236tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));237destrow += tmp * row1;238239// Fill tmp with four copies of leftrow[2]240tmp = leftrow;241tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));242destrow += tmp * row2;243244// Fill tmp with four copies of leftrow[3]245tmp = leftrow;246tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));247destrow += tmp * row3;248249__builtin_ia32_storeups(r[i], destrow);250}251#elif !defined(NO_ASM) && !defined(NOSSE)252__asm253{254mov eax, dword ptr [r]255mov ecx, dword ptr [m1]256mov edx, dword ptr [m2]257258movaps xmm0,[edx]259movaps xmm1,[edx+16]260movaps xmm2,[edx+32]261movaps xmm3,[edx+48]262263// r[0][0],r[0][1],r[0][2],r[0][3]264265movaps xmm4,xmmword ptr[ecx]266movaps xmm5,xmm4267movaps xmm6,xmm4268movaps xmm7,xmm4269270shufps xmm4,xmm4,00000000b271shufps xmm5,xmm5,01010101b272shufps xmm6,xmm6,10101010b273shufps xmm7,xmm7,11111111b274275mulps xmm4,xmm0276mulps xmm5,xmm1277mulps xmm6,xmm2278mulps xmm7,xmm3279280addps xmm4,xmm5281addps xmm4,xmm6282addps xmm4,xmm7283284movaps xmmword ptr[eax],xmm4285286// r[1][0],r[1][1],r[1][2],r[1][3]287288movaps xmm4,xmmword ptr[ecx+16]289movaps xmm5,xmm4290movaps xmm6,xmm4291movaps xmm7,xmm4292293shufps xmm4,xmm4,00000000b294shufps xmm5,xmm5,01010101b295shufps xmm6,xmm6,10101010b296shufps xmm7,xmm7,11111111b297298mulps xmm4,xmm0299mulps xmm5,xmm1300mulps xmm6,xmm2301mulps xmm7,xmm3302303addps xmm4,xmm5304addps xmm4,xmm6305addps xmm4,xmm7306307movaps xmmword ptr[eax+16],xmm4308309310// r[2][0],r[2][1],r[2][2],r[2][3]311312movaps xmm4,xmmword ptr[ecx+32]313movaps xmm5,xmm4314movaps xmm6,xmm4315movaps xmm7,xmm4316317shufps xmm4,xmm4,00000000b318shufps xmm5,xmm5,01010101b319shufps xmm6,xmm6,10101010b320shufps xmm7,xmm7,11111111b321322mulps xmm4,xmm0323mulps xmm5,xmm1324mulps xmm6,xmm2325mulps xmm7,xmm3326327addps xmm4,xmm5328addps xmm4,xmm6329addps xmm4,xmm7330331movaps xmmword ptr[eax+32],xmm4332333// r[3][0],r[3][1],r[3][2],r[3][3]334335movaps xmm4,xmmword ptr[ecx+48]336movaps xmm5,xmm4337movaps xmm6,xmm4338movaps xmm7,xmm4339340shufps xmm4,xmm4,00000000b341shufps xmm5,xmm5,01010101b342shufps xmm6,xmm6,10101010b343shufps xmm7,xmm7,11111111b344345mulps xmm4,xmm0346mulps xmm5,xmm1347mulps xmm6,xmm2348mulps xmm7,xmm3349350addps xmm4,xmm5351addps xmm4,xmm6352addps xmm4,xmm7353354movaps xmmword ptr[eax+48],xmm4355}356#endif // _WIN32357}358359360361void math_init()362{363#ifndef _DEBUG364int IsSSE = FALSE;365#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)366int edx, eax;367GLIDE64_TRY368{369#if defined(__x86_64__)370asm volatile(" cpuid; "371: "=a"(eax), "=d"(edx)372: "0"(1)373: "rbx", "rcx"374);375#else376asm volatile(" push %%ebx; "377" push %%ecx; "378" cpuid; "379" pop %%ecx; "380" pop %%ebx; "381: "=a"(eax), "=d"(edx)382: "0"(1)383:384);385#endif386}387GLIDE64_CATCH388{ return; }389// Check for SSE390if (edx & (1 << 25))391IsSSE = TRUE;392#elif !defined(NO_ASM) && !defined(NOSSE)393DWORD dwEdx;394__try395{396__asm397{398mov eax,1399cpuid400mov dwEdx,edx401}402}403__except(EXCEPTION_EXECUTE_HANDLER)404{405return;406}407408if (dwEdx & (1<<25))409{410if (dwEdx & (1<<24))411{412__try413{414__asm xorps xmm0, xmm0415IsSSE = TRUE;416}417__except(EXCEPTION_EXECUTE_HANDLER)418{419return;420}421}422}423#endif // _WIN32424if (IsSSE)425{426MulMatrices = MulMatricesSSE;427LOG("3DNOW! detected.\n");428}429430#endif //_DEBUG431}432433434