CoCalc -- 3dmath.cpp

GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/3dmath.cpp
² views
1
/*
2
*   Glide64 - Glide video plugin for Nintendo 64 emulators.
3
*   Copyright (c) 2002  Dave2001
4
*   Copyright (c) 2008  Günther <[email protected]>
5
*
6
*   This program is free software; you can redistribute it and/or modify
7
*   it under the terms of the GNU General Public License as published by
8
*   the Free Software Foundation; either version 2 of the License, or
9
*   any later version.
10
*
11
*   This program is distributed in the hope that it will be useful,
12
*   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
*   GNU General Public License for more details.
15
*
16
*   You should have received a copy of the GNU General Public
17
*   Licence along with this program; if not, write to the Free
18
*   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
19
*   Boston, MA  02110-1301, USA
20
*/
21

22
//****************************************************************
23
//
24
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
25
// Project started on December 29th, 2001
26
//
27
// To modify Glide64:
28
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
29
// * Do NOT send me the whole project or file that you modified.  Take out your modified code sections, and tell me where to put them.  If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30
//
31
// Official Glide64 development channel: #Glide64 on EFnet
32
//
33
// Original author: Dave2001 ([email protected])
34
// Other authors: Gonetz, Gugaman
35
//
36
//****************************************************************
37

38
#define M64P_PLUGIN_PROTOTYPES 1
39
#include "m64p_types.h"
40
#include "m64p_plugin.h"
41
#include "m64p_config.h"
42
#include "m64p_vidext.h"
43
#include "3dmath.h"
44
#if !defined(NO_ASM)
45
#include <xmmintrin.h>
46
#endif
47

48
void calc_light (VERTEX *v)
49
{
50
    float light_intensity = 0.0f;
51
    register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};
52
    for (DWORD l=0; l<rdp.num_lights; l++)
53
    {
54
        light_intensity = DotProduct (rdp.light_vector[l], v->vec);
55

56
        if (light_intensity > 0.0f) 
57
        {
58
            color[0] += rdp.light[l].r * light_intensity;
59
            color[1] += rdp.light[l].g * light_intensity;
60
            color[2] += rdp.light[l].b * light_intensity;
61
        }
62
    }
63

64
    if (color[0] > 1.0f) color[0] = 1.0f;
65
    if (color[1] > 1.0f) color[1] = 1.0f;
66
    if (color[2] > 1.0f) color[2] = 1.0f;
67

68
    v->r = (BYTE)(color[0]*255.0f);
69
    v->g = (BYTE)(color[1]*255.0f);
70
    v->b = (BYTE)(color[2]*255.0f);
71
}
72

73
__inline void TransformVector (float *src, float *dst, float mat[4][4])
74
{
75
    dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];
76
    dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];
77
    dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];
78
}
79

80
//*
81
void calc_linear (VERTEX *v)
82
{
83
    float vec[3];
84
    
85
    TransformVector (v->vec, vec, rdp.model);
86
//  TransformVector (v->vec, vec, rdp.combined);
87
    NormalizeVector (vec);
88
    float x, y;
89
    if (!rdp.use_lookat)
90
    {
91
    x = vec[0];
92
    y = vec[1];
93
    }
94
    else
95
    {
96
    x = DotProduct (rdp.lookat[0], vec);
97
    y = DotProduct (rdp.lookat[1], vec);
98
    }
99
    if (rdp.cur_cache[0])
100
    {
101
        // scale >> 6 is size to map to
102
        v->ou = (acosf(x)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
103
        v->ov = (acosf(y)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
104
    }
105
}
106
//*/
107

108
/*
109
void calc_linear (VERTEX *v)
110
{
111
    float vec[3];
112

113
    TransformVector (v->vec, vec, rdp.combined);
114
    NormalizeVector (vec);
115

116
    if (rdp.cur_cache[0])
117
    {
118
        // scale >> 6 is size to map to
119
        v->ou = (acosf(vec[0])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
120
        v->ov = (acosf(vec[1])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
121
    }
122
}
123
//*/
124

125
void calc_sphere (VERTEX *v)
126
{
127
  //RDP("calc_sphere\n");
128
  float vec[3];
129
  int s_scale, t_scale;
130
  if (settings.chopper)
131
  {
132
    s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);
133
    t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);
134
  }
135
  else
136
  {
137
    s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;
138
    t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;
139
  }
140
  TransformVector (v->vec, vec, rdp.model);
141
  //    TransformVector (v->vec, vec, rdp.combined);
142
  NormalizeVector (vec);
143
  float x = DotProduct (rdp.lookat[0], vec);
144
  float y = DotProduct (rdp.lookat[1], vec);
145
  v->ou = (x * 0.5f + 0.5f) * s_scale;
146
  v->ov = (y * 0.5f + 0.5f) * t_scale;
147
}
148

149
void __stdcall MulMatricesNOSSE(float m1[4][4],float m2[4][4],float r[4][4])
150
{
151

152
  /*for (int i=0; i<4; i++)
153
  {
154
    for (int j=0; j<4; j++)
155
    {
156
        r[i][j] =
157
        m1[i][0] * m2[0][j] +
158
        m1[i][1] * m2[1][j] +
159
        m1[i][2] * m2[2][j] +
160
        m1[i][3] * m2[3][j];
161
    }
162
  }*/
163
    r[0][0]  = m1[0][0]*m2[0][0] + m1[0][1]*m2[1][0] + m1[0][2]*m2[2][0] + m1[0][3]*m2[3][0];
164
    r[0][1]  = m1[0][0]*m2[0][1] + m1[0][1]*m2[1][1] + m1[0][2]*m2[2][1] + m1[0][3]*m2[3][1];
165
    r[0][2]  = m1[0][0]*m2[0][2] + m1[0][1]*m2[1][2] + m1[0][2]*m2[2][2] + m1[0][3]*m2[3][2];
166
    r[0][3]  = m1[0][0]*m2[0][3] + m1[0][1]*m2[1][3] + m1[0][2]*m2[2][3] + m1[0][3]*m2[3][3];
167

168
    r[1][0]  = m1[1][0]*m2[0][0] + m1[1][1]*m2[1][0] + m1[1][2]*m2[2][0] + m1[1][3]*m2[3][0];
169
    r[1][1]  = m1[1][0]*m2[0][1] + m1[1][1]*m2[1][1] + m1[1][2]*m2[2][1] + m1[1][3]*m2[3][1];
170
    r[1][2]  = m1[1][0]*m2[0][2] + m1[1][1]*m2[1][2] + m1[1][2]*m2[2][2] + m1[1][3]*m2[3][2];
171
    r[1][3]  = m1[1][0]*m2[0][3] + m1[1][1]*m2[1][3] + m1[1][2]*m2[2][3] + m1[1][3]*m2[3][3];
172

173
    r[2][0]  = m1[2][0]*m2[0][0] + m1[2][1]*m2[1][0] + m1[2][2]*m2[2][0] + m1[2][3]*m2[3][0];
174
    r[2][1]  = m1[2][0]*m2[0][1] + m1[2][1]*m2[1][1] + m1[2][2]*m2[2][1] + m1[2][3]*m2[3][1];
175
    r[2][2]  = m1[2][0]*m2[0][2] + m1[2][1]*m2[1][2] + m1[2][2]*m2[2][2] + m1[2][3]*m2[3][2];
176
    r[2][3]  = m1[2][0]*m2[0][3] + m1[2][1]*m2[1][3] + m1[2][2]*m2[2][3] + m1[2][3]*m2[3][3];
177

178
    r[3][0]  = m1[3][0]*m2[0][0] + m1[3][1]*m2[1][0] + m1[3][2]*m2[2][0] + m1[3][3]*m2[3][0];
179
    r[3][1]  = m1[3][0]*m2[0][1] + m1[3][1]*m2[1][1] + m1[3][2]*m2[2][1] + m1[3][3]*m2[3][1];
180
    r[3][2]  = m1[3][0]*m2[0][2] + m1[3][1]*m2[1][2] + m1[3][2]*m2[2][2] + m1[3][3]*m2[3][2];
181
    r[3][3]  = m1[3][0]*m2[0][3] + m1[3][1]*m2[1][3] + m1[3][2]*m2[2][3] + m1[3][3]*m2[3][3];
182
}
183

184
void __stdcall MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
185
{
186
#if defined(__GNUC__) && !defined(NO_ASM)
187
    /* [row][col]*/
188
    typedef float v4sf __attribute__ ((vector_size (16)));
189
    v4sf row0 = __builtin_ia32_loadups(m2[0]);
190
    v4sf row1 = __builtin_ia32_loadups(m2[1]);
191
    v4sf row2 = __builtin_ia32_loadups(m2[2]);
192
    v4sf row3 = __builtin_ia32_loadups(m2[3]);
193

194
    for (int i = 0; i < 4; ++i)
195
    {
196
    v4sf leftrow = __builtin_ia32_loadups(m1[i]);
197
    
198
    // Fill tmp with four copies of leftrow[0]
199
    v4sf tmp = leftrow;
200
    tmp = _mm_shuffle_ps (tmp, tmp, 0);
201
    // Calculate the four first summands
202
    v4sf destrow = tmp * row0;
203
    
204
    // Fill tmp with four copies of leftrow[1]
205
    tmp = leftrow;
206
    tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));
207
    destrow += tmp * row1;
208
    
209
    // Fill tmp with four copies of leftrow[2]
210
    tmp = leftrow;
211
    tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));
212
    destrow += tmp * row2;
213
    
214
    // Fill tmp with four copies of leftrow[3]
215
    tmp = leftrow;
216
    tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));
217
    destrow += tmp * row3;
218
    
219
    __builtin_ia32_storeups(r[i], destrow);
220
    }
221
#elif !defined(NO_ASM)
222
    __asm
223
    {
224
        mov     eax, dword ptr [r]  
225
        mov     ecx, dword ptr [m1]
226
        mov     edx, dword ptr [m2]
227

228
        movaps  xmm0,[edx]
229
        movaps  xmm1,[edx+16]
230
        movaps  xmm2,[edx+32]
231
        movaps  xmm3,[edx+48]
232

233
// r[0][0],r[0][1],r[0][2],r[0][3]
234

235
        movaps  xmm4,xmmword ptr[ecx]
236
        movaps  xmm5,xmm4
237
        movaps  xmm6,xmm4
238
        movaps  xmm7,xmm4
239

240
        shufps  xmm4,xmm4,00000000b
241
        shufps  xmm5,xmm5,01010101b
242
        shufps  xmm6,xmm6,10101010b
243
        shufps  xmm7,xmm7,11111111b
244

245
        mulps   xmm4,xmm0
246
        mulps   xmm5,xmm1
247
        mulps   xmm6,xmm2
248
        mulps   xmm7,xmm3
249

250
        addps   xmm4,xmm5
251
        addps   xmm4,xmm6
252
        addps   xmm4,xmm7
253

254
        movaps  xmmword ptr[eax],xmm4
255

256
// r[1][0],r[1][1],r[1][2],r[1][3]
257

258
        movaps  xmm4,xmmword ptr[ecx+16]
259
        movaps  xmm5,xmm4
260
        movaps  xmm6,xmm4
261
        movaps  xmm7,xmm4
262

263
        shufps  xmm4,xmm4,00000000b
264
        shufps  xmm5,xmm5,01010101b
265
        shufps  xmm6,xmm6,10101010b
266
        shufps  xmm7,xmm7,11111111b
267

268
        mulps   xmm4,xmm0
269
        mulps   xmm5,xmm1
270
        mulps   xmm6,xmm2
271
        mulps   xmm7,xmm3
272

273
        addps   xmm4,xmm5
274
        addps   xmm4,xmm6
275
        addps   xmm4,xmm7
276

277
        movaps  xmmword ptr[eax+16],xmm4
278

279

280
// r[2][0],r[2][1],r[2][2],r[2][3]
281

282
        movaps  xmm4,xmmword ptr[ecx+32]
283
        movaps  xmm5,xmm4
284
        movaps  xmm6,xmm4
285
        movaps  xmm7,xmm4
286

287
        shufps  xmm4,xmm4,00000000b
288
        shufps  xmm5,xmm5,01010101b
289
        shufps  xmm6,xmm6,10101010b
290
        shufps  xmm7,xmm7,11111111b
291

292
        mulps   xmm4,xmm0
293
        mulps   xmm5,xmm1
294
        mulps   xmm6,xmm2
295
        mulps   xmm7,xmm3
296

297
        addps   xmm4,xmm5
298
        addps   xmm4,xmm6
299
        addps   xmm4,xmm7
300

301
        movaps  xmmword ptr[eax+32],xmm4
302

303
// r[3][0],r[3][1],r[3][2],r[3][3]
304

305
        movaps  xmm4,xmmword ptr[ecx+48]
306
        movaps  xmm5,xmm4
307
        movaps  xmm6,xmm4
308
        movaps  xmm7,xmm4
309

310
        shufps  xmm4,xmm4,00000000b
311
        shufps  xmm5,xmm5,01010101b
312
        shufps  xmm6,xmm6,10101010b
313
        shufps  xmm7,xmm7,11111111b
314

315
        mulps   xmm4,xmm0
316
        mulps   xmm5,xmm1
317
        mulps   xmm6,xmm2
318
        mulps   xmm7,xmm3
319

320
        addps   xmm4,xmm5
321
        addps   xmm4,xmm6
322
        addps   xmm4,xmm7
323

324
        movaps  xmmword ptr[eax+48],xmm4
325
    }
326
#endif // _WIN32
327
}
328

329
MULMATRIX MulMatrices = MulMatricesNOSSE;
330

331
void math_init()
332
{
333
  BOOL IsSSE = FALSE;
334
#if defined(__GNUC__) && !defined(NO_ASM)
335
    int edx, eax;
336
  #if defined(__x86_64__)
337
    asm volatile(" cpuid;        "
338
                 : "=a"(eax), "=d"(edx)
339
                 : "0"(1)
340
                 : "rbx", "rcx"
341
                 );
342
  #else
343
    asm volatile(" push %%ebx;   "
344
                 " push %%ecx;   "
345
                 " cpuid;        "
346
                 " pop %%ecx;    "
347
                 " pop %%ebx;    "
348
                 : "=a"(eax), "=d"(edx)
349
                 : "0"(1)
350
                 :
351
                 );
352
  #endif
353
    // Check for SSE
354
    if (edx & (1 << 25))
355
    IsSSE = TRUE;
356
#elif !defined(NO_ASM)
357
  DWORD dwEdx;
358
  __try
359
  {
360
    __asm 
361
    {
362
      mov  eax,1
363
      cpuid
364
      mov dwEdx,edx
365
    }  
366
  }
367
  __except(EXCEPTION_EXECUTE_HANDLER)
368
  {
369
    return;
370
  }
371

372
  if (dwEdx & (1<<25)) 
373
  {
374
    if (dwEdx & (1<<24))
375
    {      
376
      __try
377
      {
378
        __asm xorps xmm0, xmm0
379
        IsSSE = TRUE;
380
      }
381
      __except(EXCEPTION_EXECUTE_HANDLER)
382
      {
383
        return;
384
      }
385
    }
386
  }
387
#endif // _WIN32
388
  if (IsSSE)
389
  {
390
    MulMatrices = MulMatricesSSE;
391
    WriteLog(M64MSG_INFO, "SSE detected.\n");
392
  }
393
}
394

395

396
Product

Resources

Company