Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64mk2/src/Glide64/3dmath.cpp
2 views
1
/*
2
* Glide64 - Glide video plugin for Nintendo 64 emulators.
3
* Copyright (c) 2002 Dave2001
4
* Copyright (c) 2003-2009 Sergey 'Gonetz' Lipski
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* any later version.
10
*
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
*/
20
21
//****************************************************************
22
//
23
// Glide64 - Glide Plugin for Nintendo 64 emulators
24
// Project started on December 29th, 2001
25
//
26
// Authors:
27
// Dave2001, original author, founded the project in 2001, left it in 2002
28
// Gugaman, joined the project in 2002, left it in 2002
29
// Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
30
// Hiroshi 'KoolSmoky' Morii, joined the project in 2007
31
//
32
//****************************************************************
33
//
34
// To modify Glide64:
35
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
36
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
37
//
38
//****************************************************************
39
40
#include "Gfx_1.3.h"
41
extern "C" {
42
#ifndef NOSSE
43
#include <xmmintrin.h>
44
#endif
45
}
46
47
#include <math.h>
48
#include "3dmath.h"
49
50
void calc_light (VERTEX *v)
51
{
52
float light_intensity = 0.0f;
53
register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};
54
for (wxUint32 l=0; l<rdp.num_lights; l++)
55
{
56
light_intensity = DotProduct (rdp.light_vector[l], v->vec);
57
58
if (light_intensity > 0.0f)
59
{
60
color[0] += rdp.light[l].r * light_intensity;
61
color[1] += rdp.light[l].g * light_intensity;
62
color[2] += rdp.light[l].b * light_intensity;
63
}
64
}
65
66
if (color[0] > 1.0f) color[0] = 1.0f;
67
if (color[1] > 1.0f) color[1] = 1.0f;
68
if (color[2] > 1.0f) color[2] = 1.0f;
69
70
v->r = (wxUint8)(color[0]*255.0f);
71
v->g = (wxUint8)(color[1]*255.0f);
72
v->b = (wxUint8)(color[2]*255.0f);
73
}
74
75
//*
76
void calc_linear (VERTEX *v)
77
{
78
if (settings.force_calc_sphere)
79
{
80
calc_sphere(v);
81
return;
82
}
83
DECLAREALIGN16VAR(vec[3]);
84
85
TransformVector (v->vec, vec, rdp.model);
86
// TransformVector (v->vec, vec, rdp.combined);
87
NormalizeVector (vec);
88
float x, y;
89
if (!rdp.use_lookat)
90
{
91
x = vec[0];
92
y = vec[1];
93
}
94
else
95
{
96
x = DotProduct (rdp.lookat[0], vec);
97
y = DotProduct (rdp.lookat[1], vec);
98
}
99
100
if (x > 1.0f)
101
x = 1.0f;
102
else if (x < -1.0f)
103
x = -1.0f;
104
if (y > 1.0f)
105
y = 1.0f;
106
else if (y < -1.0f)
107
y = -1.0f;
108
109
if (rdp.cur_cache[0])
110
{
111
// scale >> 6 is size to map to
112
v->ou = (acosf(x)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
113
v->ov = (acosf(y)/3.141592654f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
114
}
115
v->uv_scaled = 1;
116
#ifdef EXTREME_LOGGING
117
FRDP ("calc linear u: %f, v: %f\n", v->ou, v->ov);
118
#endif
119
}
120
121
void calc_sphere (VERTEX *v)
122
{
123
// LRDP("calc_sphere\n");
124
DECLAREALIGN16VAR(vec[3]);
125
int s_scale, t_scale;
126
if (settings.hacks&hack_Chopper)
127
{
128
s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);
129
t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);
130
}
131
else
132
{
133
s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;
134
t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;
135
}
136
TransformVector (v->vec, vec, rdp.model);
137
// TransformVector (v->vec, vec, rdp.combined);
138
NormalizeVector (vec);
139
float x, y;
140
if (!rdp.use_lookat)
141
{
142
x = vec[0];
143
y = vec[1];
144
}
145
else
146
{
147
x = DotProduct (rdp.lookat[0], vec);
148
y = DotProduct (rdp.lookat[1], vec);
149
}
150
v->ou = (x * 0.5f + 0.5f) * s_scale;
151
v->ov = (y * 0.5f + 0.5f) * t_scale;
152
v->uv_scaled = 1;
153
#ifdef EXTREME_LOGGING
154
FRDP ("calc sphere u: %f, v: %f\n", v->ou, v->ov);
155
#endif
156
}
157
158
float DotProductC(register float *v1, register float *v2)
159
{
160
register float result;
161
result = v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
162
return(result);
163
}
164
165
void NormalizeVectorC(float *v)
166
{
167
register float len;
168
len = sqrtf(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
169
if (len > 0.0f)
170
{
171
v[0] /= len;
172
v[1] /= len;
173
v[2] /= len;
174
}
175
}
176
177
void TransformVectorC(float *src, float *dst, float mat[4][4])
178
{
179
dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];
180
dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];
181
dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];
182
}
183
184
void InverseTransformVectorC (float *src, float *dst, float mat[4][4])
185
{
186
dst[0] = mat[0][0]*src[0] + mat[0][1]*src[1] + mat[0][2]*src[2];
187
dst[1] = mat[1][0]*src[0] + mat[1][1]*src[1] + mat[1][2]*src[2];
188
dst[2] = mat[2][0]*src[0] + mat[2][1]*src[1] + mat[2][2]*src[2];
189
}
190
191
void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4])
192
{
193
for (int i=0; i<4; i++)
194
{
195
for (int j=0; j<4; j++)
196
{
197
r[i][j] = m1[i][0] * m2[0][j] +
198
m1[i][1] * m2[1][j] +
199
m1[i][2] * m2[2][j] +
200
m1[i][3] * m2[3][j];
201
}
202
}
203
}
204
205
// 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication
206
// and 3DNOW! 4x4 4x4 matrix multiplication
207
// 2011-01-03 Balrog - removed because is in NASM format and not 64-bit compatible
208
// This will need fixing.
209
MULMATRIX MulMatrices = MulMatricesC;
210
TRANSFORMVECTOR TransformVector = TransformVectorC;
211
TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
212
DOTPRODUCT DotProduct = DotProductC;
213
NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
214
215
void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
216
{
217
#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
218
/* [row][col]*/
219
typedef float v4sf __attribute__ ((vector_size (16)));
220
v4sf row0 = _mm_loadu_ps(m2[0]);
221
v4sf row1 = _mm_loadu_ps(m2[1]);
222
v4sf row2 = _mm_loadu_ps(m2[2]);
223
v4sf row3 = _mm_loadu_ps(m2[3]);
224
225
for (int i = 0; i < 4; ++i)
226
{
227
v4sf leftrow = _mm_loadu_ps(m1[i]);
228
229
// Fill tmp with four copies of leftrow[0]
230
v4sf tmp = leftrow;
231
tmp = _mm_shuffle_ps (tmp, tmp, 0);
232
// Calculate the four first summands
233
v4sf destrow = tmp * row0;
234
235
// Fill tmp with four copies of leftrow[1]
236
tmp = leftrow;
237
tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));
238
destrow += tmp * row1;
239
240
// Fill tmp with four copies of leftrow[2]
241
tmp = leftrow;
242
tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));
243
destrow += tmp * row2;
244
245
// Fill tmp with four copies of leftrow[3]
246
tmp = leftrow;
247
tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));
248
destrow += tmp * row3;
249
250
__builtin_ia32_storeups(r[i], destrow);
251
}
252
#elif !defined(NO_ASM) && !defined(NOSSE)
253
__asm
254
{
255
mov eax, dword ptr [r]
256
mov ecx, dword ptr [m1]
257
mov edx, dword ptr [m2]
258
259
movaps xmm0,[edx]
260
movaps xmm1,[edx+16]
261
movaps xmm2,[edx+32]
262
movaps xmm3,[edx+48]
263
264
// r[0][0],r[0][1],r[0][2],r[0][3]
265
266
movaps xmm4,xmmword ptr[ecx]
267
movaps xmm5,xmm4
268
movaps xmm6,xmm4
269
movaps xmm7,xmm4
270
271
shufps xmm4,xmm4,00000000b
272
shufps xmm5,xmm5,01010101b
273
shufps xmm6,xmm6,10101010b
274
shufps xmm7,xmm7,11111111b
275
276
mulps xmm4,xmm0
277
mulps xmm5,xmm1
278
mulps xmm6,xmm2
279
mulps xmm7,xmm3
280
281
addps xmm4,xmm5
282
addps xmm4,xmm6
283
addps xmm4,xmm7
284
285
movaps xmmword ptr[eax],xmm4
286
287
// r[1][0],r[1][1],r[1][2],r[1][3]
288
289
movaps xmm4,xmmword ptr[ecx+16]
290
movaps xmm5,xmm4
291
movaps xmm6,xmm4
292
movaps xmm7,xmm4
293
294
shufps xmm4,xmm4,00000000b
295
shufps xmm5,xmm5,01010101b
296
shufps xmm6,xmm6,10101010b
297
shufps xmm7,xmm7,11111111b
298
299
mulps xmm4,xmm0
300
mulps xmm5,xmm1
301
mulps xmm6,xmm2
302
mulps xmm7,xmm3
303
304
addps xmm4,xmm5
305
addps xmm4,xmm6
306
addps xmm4,xmm7
307
308
movaps xmmword ptr[eax+16],xmm4
309
310
311
// r[2][0],r[2][1],r[2][2],r[2][3]
312
313
movaps xmm4,xmmword ptr[ecx+32]
314
movaps xmm5,xmm4
315
movaps xmm6,xmm4
316
movaps xmm7,xmm4
317
318
shufps xmm4,xmm4,00000000b
319
shufps xmm5,xmm5,01010101b
320
shufps xmm6,xmm6,10101010b
321
shufps xmm7,xmm7,11111111b
322
323
mulps xmm4,xmm0
324
mulps xmm5,xmm1
325
mulps xmm6,xmm2
326
mulps xmm7,xmm3
327
328
addps xmm4,xmm5
329
addps xmm4,xmm6
330
addps xmm4,xmm7
331
332
movaps xmmword ptr[eax+32],xmm4
333
334
// r[3][0],r[3][1],r[3][2],r[3][3]
335
336
movaps xmm4,xmmword ptr[ecx+48]
337
movaps xmm5,xmm4
338
movaps xmm6,xmm4
339
movaps xmm7,xmm4
340
341
shufps xmm4,xmm4,00000000b
342
shufps xmm5,xmm5,01010101b
343
shufps xmm6,xmm6,10101010b
344
shufps xmm7,xmm7,11111111b
345
346
mulps xmm4,xmm0
347
mulps xmm5,xmm1
348
mulps xmm6,xmm2
349
mulps xmm7,xmm3
350
351
addps xmm4,xmm5
352
addps xmm4,xmm6
353
addps xmm4,xmm7
354
355
movaps xmmword ptr[eax+48],xmm4
356
}
357
#endif // _WIN32
358
}
359
360
361
362
void math_init()
363
{
364
#ifndef _DEBUG
365
int IsSSE = FALSE;
366
#if defined(__GNUC__) && !defined(NO_ASM) && !defined(NOSSE)
367
int edx, eax;
368
GLIDE64_TRY
369
{
370
#if defined(__x86_64__)
371
asm volatile(" cpuid; "
372
: "=a"(eax), "=d"(edx)
373
: "0"(1)
374
: "rbx", "rcx"
375
);
376
#else
377
asm volatile(" push %%ebx; "
378
" push %%ecx; "
379
" cpuid; "
380
" pop %%ecx; "
381
" pop %%ebx; "
382
: "=a"(eax), "=d"(edx)
383
: "0"(1)
384
:
385
);
386
#endif
387
}
388
GLIDE64_CATCH
389
{ return; }
390
// Check for SSE
391
if (edx & (1 << 25))
392
IsSSE = TRUE;
393
#elif !defined(NO_ASM) && !defined(NOSSE)
394
DWORD dwEdx;
395
__try
396
{
397
__asm
398
{
399
mov eax,1
400
cpuid
401
mov dwEdx,edx
402
}
403
}
404
__except(EXCEPTION_EXECUTE_HANDLER)
405
{
406
return;
407
}
408
409
if (dwEdx & (1<<25))
410
{
411
if (dwEdx & (1<<24))
412
{
413
__try
414
{
415
__asm xorps xmm0, xmm0
416
IsSSE = TRUE;
417
}
418
__except(EXCEPTION_EXECUTE_HANDLER)
419
{
420
return;
421
}
422
}
423
}
424
#endif // _WIN32
425
if (IsSSE)
426
{
427
MulMatrices = MulMatricesSSE;
428
LOG("3DNOW! detected.\n");
429
}
430
431
#endif //_DEBUG
432
}
433
434