CoCalc -- 3dmath.cpp

GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/Glide64/3dmath.cpp
² views
1
/*
2
* Glide64 - Glide video plugin for Nintendo 64 emulators.
3
* Copyright (c) 2002  Dave2001
4
* Copyright (c) 2003-2009  Sergey 'Gonetz' Lipski
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* any later version.
10
*
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
*/
20

21
//****************************************************************
22
//
23
// Glide64 - Glide Plugin for Nintendo 64 emulators
24
// Project started on December 29th, 2001
25
//
26
// Authors:
27
// Dave2001, original author, founded the project in 2001, left it in 2002
28
// Gugaman, joined the project in 2002, left it in 2002
29
// Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
30
// Hiroshi 'KoolSmoky' Morii, joined the project in 2007
31
//
32
//****************************************************************
33
//
34
// To modify Glide64:
35
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
36
// * Do NOT send me the whole project or file that you modified.  Take out your modified code sections, and tell me where to put them.  If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
37
//
38
//****************************************************************
39

40
#include "Gfx_1.3.h"
41
extern "C" {
42
#ifndef NOSSE
43
#include <xmmintrin.h>
44
#endif
45
}
46

47
#include <math.h>
48
#include "3dmath.h"
49

50
void calc_light (VERTEX *v)
51
{
52
  float light_intensity = 0.0f;
53
  register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};
54
  for (wxUint32 l=0; l<rdp.num_lights; l++)
55
  {
56
    light_intensity = DotProduct (rdp.light_vector[l], v->vec);
57
    
58
    if (light_intensity > 0.0f) 
59
    {
60
      color[0] += rdp.light[l].r * light_intensity;
61
      color[1] += rdp.light[l].g * light_intensity;
62
      color[2] += rdp.light[l].b * light_intensity;
63
    }
64
  }
65
  
66
  if (color[0] > 1.0f) color[0] = 1.0f;
67
  if (color[1] > 1.0f) color[1] = 1.0f;
68
  if (color[2] > 1.0f) color[2] = 1.0f;
69
  
70
  v->r = (wxUint8)(color[0]*255.0f);
71
  v->g = (wxUint8)(color[1]*255.0f);
72
  v->b = (wxUint8)(color[2]*255.0f);
73
}
74

75
//*
76
void calc_linear (VERTEX *v)
77
{
78
  if (settings.force_calc_sphere)
79
  {
80
    calc_sphere(v);
81
    return;
82
  }
83
  DECLAREALIGN16VAR(vec[3]);
84
  
85
  TransformVector (v->vec, vec, rdp.model);
86
  //    TransformVector (v->vec, vec, rdp.combined);
87
  NormalizeVector (vec);
88
  float x, y;
89
  if (!rdp.use_lookat)
90
  {
91
    x = vec[0];
92
    y = vec[1];
93
  }
94
  else
95
  {
96
    x = DotProduct (rdp.lookat[0], vec);
97
    y = DotProduct (rdp.lookat[1], vec);
98
  }
99
  
100
  if (x > 1.0f)
101
    x = 1.0f;
102
  else if (x < -1.0f)
103
    x = -1.0f;
104
  if (y > 1.0f)
105
    y = 1.0f;
106
  else if (y < -1.0f)
107
    y = -1.0f;
108
  
109
  if (rdp.cur_cache[0])
110
  {
111
    // scale >> 6 is size to map to
112
    v->ou = (acosf(x)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
113
    v->ov = (acosf(y)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
114
  }
115
  v->uv_scaled = 1;
116
#ifdef EXTREME_LOGGING
117
  FRDP ("calc linear u: %f, v: %f\n", v->ou, v->ov);
118
#endif
119
}
120

121
void calc_sphere (VERTEX *v)
122
{
123
//  LRDP("calc_sphere\n");
124
  DECLAREALIGN16VAR(vec[3]);
125
  int s_scale, t_scale;
126
  if (settings.hacks&hack_Chopper)
127
  {
128
    s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);
129
    t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);
130
  }
131
  else
132
  {
133
    s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;
134
    t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;
135
  }
136
  TransformVector (v->vec, vec, rdp.model);
137
  //    TransformVector (v->vec, vec, rdp.combined);
138
  NormalizeVector (vec);
139
  float x, y;
140
  if (!rdp.use_lookat)
141
  {
142
    x = vec[0];
143
    y = vec[1];
144
  }
145
  else
146
  {
147
    x = DotProduct (rdp.lookat[0], vec);
148
    y = DotProduct (rdp.lookat[1], vec);
149
  }
150
  v->ou = (x * 0.5f + 0.5f) * s_scale;
151
  v->ov = (y * 0.5f + 0.5f) * t_scale;
152
  v->uv_scaled = 1;
153
#ifdef EXTREME_LOGGING
154
  FRDP ("calc sphere u: %f, v: %f\n", v->ou, v->ov);
155
#endif
156
}
157

158
float DotProductC(register float *v1, register float *v2)
159
{
160
    register float result;
161
    result = v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
162
    return(result);
163
}
164

165
void NormalizeVectorC(float *v)
166
{
167
    register float len;
168
    len = sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
169
    if (len > 0.0f)
170
    {
171
        v[0] /= len;
172
        v[1] /= len;
173
        v[2] /= len;
174
    }
175
}
176

177
void TransformVectorC(float *src, float *dst, float mat[4][4])
178
{
179
  dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];
180
  dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];
181
  dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];
182
}
183

184
void InverseTransformVectorC (float *src, float *dst, float mat[4][4])
185
{
186
  dst[0] = mat[0][0]*src[0] + mat[0][1]*src[1] + mat[0][2]*src[2];
187
  dst[1] = mat[1][0]*src[0] + mat[1][1]*src[1] + mat[1][2]*src[2];
188
  dst[2] = mat[2][0]*src[0] + mat[2][1]*src[1] + mat[2][2]*src[2];
189
}
190

191
void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4])
192
{
193
  for (int i=0; i<4; i++)
194
  {
195
    for (int j=0; j<4; j++)
196
    {
197
      r[i][j] = m1[i][0] * m2[0][j] +
198
                m1[i][1] * m2[1][j] +
199
                m1[i][2] * m2[2][j] +
200
                m1[i][3] * m2[3][j];
201
    }
202
  }
203
}
204

205
// 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication
206
//                      and 3DNOW! 4x4 4x4 matrix multiplication
207
// 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible
208
// This will need fixing.
209
MULMATRIX MulMatrices = MulMatricesC;
210
TRANSFORMVECTOR TransformVector = TransformVectorC;
211
TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
212
DOTPRODUCT DotProduct = DotProductC;
213
NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
214

215
void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
216
{
217
#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
218
   /* [row][col]*/
219
  typedef float v4sf __attribute__ ((vector_size (16)));
220
  v4sf row0 = _mm_loadu_ps(m2[0]);
221
  v4sf row1 = _mm_loadu_ps(m2[1]);
222
  v4sf row2 = _mm_loadu_ps(m2[2]);
223
  v4sf row3 = _mm_loadu_ps(m2[3]);
224

225
  for (int i = 0; i < 4; ++i)
226
  {
227
    v4sf leftrow = _mm_loadu_ps(m1[i]);
228

229
    // Fill tmp with four copies of leftrow[0]
230
    v4sf tmp = leftrow;
231
    tmp = _mm_shuffle_ps (tmp, tmp, 0);
232
    // Calculate the four first summands
233
    v4sf destrow = tmp * row0;
234

235
    // Fill tmp with four copies of leftrow[1]
236
    tmp = leftrow;
237
    tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));
238
    destrow += tmp * row1;
239

240
    // Fill tmp with four copies of leftrow[2]
241
    tmp = leftrow;
242
    tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));
243
    destrow += tmp * row2;
244

245
    // Fill tmp with four copies of leftrow[3]
246
    tmp = leftrow;
247
    tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));
248
    destrow += tmp * row3;
249

250
    __builtin_ia32_storeups(r[i], destrow);
251
  }
252
 #elif !defined(NO_ASM) && !defined(NOSSE)
253
  __asm
254
  {
255
    mov     eax, dword ptr [r]  
256
      mov     ecx, dword ptr [m1]
257
      mov     edx, dword ptr [m2]
258

259
      movaps  xmm0,[edx]
260
      movaps  xmm1,[edx+16]
261
      movaps  xmm2,[edx+32]
262
      movaps  xmm3,[edx+48]
263

264
// r[0][0],r[0][1],r[0][2],r[0][3]
265

266
      movaps  xmm4,xmmword ptr[ecx]
267
      movaps  xmm5,xmm4
268
      movaps  xmm6,xmm4
269
      movaps  xmm7,xmm4
270

271
      shufps  xmm4,xmm4,00000000b
272
      shufps  xmm5,xmm5,01010101b
273
      shufps  xmm6,xmm6,10101010b
274
      shufps  xmm7,xmm7,11111111b
275

276
      mulps   xmm4,xmm0
277
      mulps   xmm5,xmm1
278
      mulps   xmm6,xmm2
279
      mulps   xmm7,xmm3
280

281
      addps   xmm4,xmm5
282
      addps   xmm4,xmm6
283
      addps   xmm4,xmm7
284

285
      movaps  xmmword ptr[eax],xmm4
286

287
// r[1][0],r[1][1],r[1][2],r[1][3]
288

289
      movaps  xmm4,xmmword ptr[ecx+16]
290
      movaps  xmm5,xmm4
291
      movaps  xmm6,xmm4
292
      movaps  xmm7,xmm4
293

294
      shufps  xmm4,xmm4,00000000b
295
      shufps  xmm5,xmm5,01010101b
296
      shufps  xmm6,xmm6,10101010b
297
      shufps  xmm7,xmm7,11111111b
298

299
      mulps   xmm4,xmm0
300
      mulps   xmm5,xmm1
301
      mulps   xmm6,xmm2
302
      mulps   xmm7,xmm3
303

304
      addps   xmm4,xmm5
305
      addps   xmm4,xmm6
306
      addps   xmm4,xmm7
307

308
      movaps  xmmword ptr[eax+16],xmm4
309

310

311
// r[2][0],r[2][1],r[2][2],r[2][3]
312

313
      movaps  xmm4,xmmword ptr[ecx+32]
314
      movaps  xmm5,xmm4
315
      movaps  xmm6,xmm4
316
      movaps  xmm7,xmm4
317

318
      shufps  xmm4,xmm4,00000000b
319
      shufps  xmm5,xmm5,01010101b
320
      shufps  xmm6,xmm6,10101010b
321
      shufps  xmm7,xmm7,11111111b
322

323
      mulps   xmm4,xmm0
324
      mulps   xmm5,xmm1
325
      mulps   xmm6,xmm2
326
      mulps   xmm7,xmm3
327

328
      addps   xmm4,xmm5
329
      addps   xmm4,xmm6
330
      addps   xmm4,xmm7
331

332
      movaps  xmmword ptr[eax+32],xmm4
333

334
// r[3][0],r[3][1],r[3][2],r[3][3]
335

336
      movaps  xmm4,xmmword ptr[ecx+48]
337
      movaps  xmm5,xmm4
338
      movaps  xmm6,xmm4
339
      movaps  xmm7,xmm4
340

341
      shufps  xmm4,xmm4,00000000b
342
      shufps  xmm5,xmm5,01010101b
343
      shufps  xmm6,xmm6,10101010b
344
      shufps  xmm7,xmm7,11111111b
345

346
      mulps   xmm4,xmm0
347
      mulps   xmm5,xmm1
348
      mulps   xmm6,xmm2
349
      mulps   xmm7,xmm3
350

351
      addps   xmm4,xmm5
352
      addps   xmm4,xmm6
353
      addps   xmm4,xmm7
354

355
      movaps  xmmword ptr[eax+48],xmm4
356
    }
357
#endif // _WIN32
358
  }
359

360

361

362
  void math_init()
363
  {
364
#ifndef _DEBUG
365
    int IsSSE = FALSE;
366
#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
367
    int edx, eax;
368
    GLIDE64_TRY
369
    {
370
  #if defined(__x86_64__)
371
      asm volatile(" cpuid;        "
372
        : "=a"(eax), "=d"(edx)
373
        : "0"(1)
374
        : "rbx", "rcx"
375
        );
376
  #else
377
      asm volatile(" push %%ebx;   "
378
        " push %%ecx;   "
379
        " cpuid;        "
380
        " pop %%ecx;    "
381
        " pop %%ebx;    "
382
        : "=a"(eax), "=d"(edx)
383
        : "0"(1)
384
        :
385
      );
386
  #endif
387
    }
388
    GLIDE64_CATCH
389
      { return; }
390
    // Check for SSE
391
    if (edx & (1 << 25))
392
      IsSSE = TRUE;
393
#elif !defined(NO_ASM) && !defined(NOSSE)
394
    DWORD dwEdx;
395
    __try
396
    {
397
      __asm 
398
      {
399
        mov  eax,1
400
          cpuid
401
          mov dwEdx,edx
402
        }  
403
      }
404
      __except(EXCEPTION_EXECUTE_HANDLER)
405
      {
406
        return;
407
      }
408

409
      if (dwEdx & (1<<25)) 
410
      {
411
        if (dwEdx & (1<<24))
412
        {      
413
          __try
414
          {
415
            __asm xorps xmm0, xmm0
416
              IsSSE = TRUE;
417
          }
418
          __except(EXCEPTION_EXECUTE_HANDLER)
419
          {
420
            return;
421
          }
422
        }
423
      }
424
#endif // _WIN32
425
      if (IsSSE)
426
      {
427
        MulMatrices = MulMatricesSSE;
428
        LOG("3DNOW! detected.\n");
429
      }
430

431
#endif //_DEBUG
432
    }
433

434
Product

Resources

Company