Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libmupen64plus/mupen64plus-video-glide64/src/3dmath.cpp
2 views
1
/*
2
* Glide64 - Glide video plugin for Nintendo 64 emulators.
3
* Copyright (c) 2002 Dave2001
4
* Copyright (c) 2008 Günther <[email protected]>
5
*
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* any later version.
10
*
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
15
*
16
* You should have received a copy of the GNU General Public
17
* Licence along with this program; if not, write to the Free
18
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA
20
*/
21
22
//****************************************************************
23
//
24
// Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
25
// Project started on December 29th, 2001
26
//
27
// To modify Glide64:
28
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
29
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30
//
31
// Official Glide64 development channel: #Glide64 on EFnet
32
//
33
// Original author: Dave2001 ([email protected])
34
// Other authors: Gonetz, Gugaman
35
//
36
//****************************************************************
37
38
#define M64P_PLUGIN_PROTOTYPES 1
39
#include "m64p_types.h"
40
#include "m64p_plugin.h"
41
#include "m64p_config.h"
42
#include "m64p_vidext.h"
43
#include "3dmath.h"
44
#if !defined(NO_ASM)
45
#include <xmmintrin.h>
46
#endif
47
48
void calc_light (VERTEX *v)
49
{
50
float light_intensity = 0.0f;
51
register float color[3] = {rdp.light[rdp.num_lights].r, rdp.light[rdp.num_lights].g, rdp.light[rdp.num_lights].b};
52
for (DWORD l=0; l<rdp.num_lights; l++)
53
{
54
light_intensity = DotProduct (rdp.light_vector[l], v->vec);
55
56
if (light_intensity > 0.0f)
57
{
58
color[0] += rdp.light[l].r * light_intensity;
59
color[1] += rdp.light[l].g * light_intensity;
60
color[2] += rdp.light[l].b * light_intensity;
61
}
62
}
63
64
if (color[0] > 1.0f) color[0] = 1.0f;
65
if (color[1] > 1.0f) color[1] = 1.0f;
66
if (color[2] > 1.0f) color[2] = 1.0f;
67
68
v->r = (BYTE)(color[0]*255.0f);
69
v->g = (BYTE)(color[1]*255.0f);
70
v->b = (BYTE)(color[2]*255.0f);
71
}
72
73
__inline void TransformVector (float *src, float *dst, float mat[4][4])
74
{
75
dst[0] = mat[0][0]*src[0] + mat[1][0]*src[1] + mat[2][0]*src[2];
76
dst[1] = mat[0][1]*src[0] + mat[1][1]*src[1] + mat[2][1]*src[2];
77
dst[2] = mat[0][2]*src[0] + mat[1][2]*src[1] + mat[2][2]*src[2];
78
}
79
80
//*
81
void calc_linear (VERTEX *v)
82
{
83
float vec[3];
84
85
TransformVector (v->vec, vec, rdp.model);
86
// TransformVector (v->vec, vec, rdp.combined);
87
NormalizeVector (vec);
88
float x, y;
89
if (!rdp.use_lookat)
90
{
91
x = vec[0];
92
y = vec[1];
93
}
94
else
95
{
96
x = DotProduct (rdp.lookat[0], vec);
97
y = DotProduct (rdp.lookat[1], vec);
98
}
99
if (rdp.cur_cache[0])
100
{
101
// scale >> 6 is size to map to
102
v->ou = (acosf(x)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
103
v->ov = (acosf(y)/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
104
}
105
}
106
//*/
107
108
/*
109
void calc_linear (VERTEX *v)
110
{
111
float vec[3];
112
113
TransformVector (v->vec, vec, rdp.combined);
114
NormalizeVector (vec);
115
116
if (rdp.cur_cache[0])
117
{
118
// scale >> 6 is size to map to
119
v->ou = (acosf(vec[0])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_s_scale >> 6);
120
v->ov = (acosf(vec[1])/3.1415f) * (rdp.tiles[rdp.cur_tile].org_t_scale >> 6);
121
}
122
}
123
//*/
124
125
void calc_sphere (VERTEX *v)
126
{
127
//RDP("calc_sphere\n");
128
float vec[3];
129
int s_scale, t_scale;
130
if (settings.chopper)
131
{
132
s_scale = min(rdp.tiles[rdp.cur_tile].org_s_scale >> 6, rdp.tiles[rdp.cur_tile].lr_s);
133
t_scale = min(rdp.tiles[rdp.cur_tile].org_t_scale >> 6, rdp.tiles[rdp.cur_tile].lr_t);
134
}
135
else
136
{
137
s_scale = rdp.tiles[rdp.cur_tile].org_s_scale >> 6;
138
t_scale = rdp.tiles[rdp.cur_tile].org_t_scale >> 6;
139
}
140
TransformVector (v->vec, vec, rdp.model);
141
// TransformVector (v->vec, vec, rdp.combined);
142
NormalizeVector (vec);
143
float x = DotProduct (rdp.lookat[0], vec);
144
float y = DotProduct (rdp.lookat[1], vec);
145
v->ou = (x * 0.5f + 0.5f) * s_scale;
146
v->ov = (y * 0.5f + 0.5f) * t_scale;
147
}
148
149
void __stdcall MulMatricesNOSSE(float m1[4][4],float m2[4][4],float r[4][4])
150
{
151
152
/*for (int i=0; i<4; i++)
153
{
154
for (int j=0; j<4; j++)
155
{
156
r[i][j] =
157
m1[i][0] * m2[0][j] +
158
m1[i][1] * m2[1][j] +
159
m1[i][2] * m2[2][j] +
160
m1[i][3] * m2[3][j];
161
}
162
}*/
163
r[0][0] = m1[0][0]*m2[0][0] + m1[0][1]*m2[1][0] + m1[0][2]*m2[2][0] + m1[0][3]*m2[3][0];
164
r[0][1] = m1[0][0]*m2[0][1] + m1[0][1]*m2[1][1] + m1[0][2]*m2[2][1] + m1[0][3]*m2[3][1];
165
r[0][2] = m1[0][0]*m2[0][2] + m1[0][1]*m2[1][2] + m1[0][2]*m2[2][2] + m1[0][3]*m2[3][2];
166
r[0][3] = m1[0][0]*m2[0][3] + m1[0][1]*m2[1][3] + m1[0][2]*m2[2][3] + m1[0][3]*m2[3][3];
167
168
r[1][0] = m1[1][0]*m2[0][0] + m1[1][1]*m2[1][0] + m1[1][2]*m2[2][0] + m1[1][3]*m2[3][0];
169
r[1][1] = m1[1][0]*m2[0][1] + m1[1][1]*m2[1][1] + m1[1][2]*m2[2][1] + m1[1][3]*m2[3][1];
170
r[1][2] = m1[1][0]*m2[0][2] + m1[1][1]*m2[1][2] + m1[1][2]*m2[2][2] + m1[1][3]*m2[3][2];
171
r[1][3] = m1[1][0]*m2[0][3] + m1[1][1]*m2[1][3] + m1[1][2]*m2[2][3] + m1[1][3]*m2[3][3];
172
173
r[2][0] = m1[2][0]*m2[0][0] + m1[2][1]*m2[1][0] + m1[2][2]*m2[2][0] + m1[2][3]*m2[3][0];
174
r[2][1] = m1[2][0]*m2[0][1] + m1[2][1]*m2[1][1] + m1[2][2]*m2[2][1] + m1[2][3]*m2[3][1];
175
r[2][2] = m1[2][0]*m2[0][2] + m1[2][1]*m2[1][2] + m1[2][2]*m2[2][2] + m1[2][3]*m2[3][2];
176
r[2][3] = m1[2][0]*m2[0][3] + m1[2][1]*m2[1][3] + m1[2][2]*m2[2][3] + m1[2][3]*m2[3][3];
177
178
r[3][0] = m1[3][0]*m2[0][0] + m1[3][1]*m2[1][0] + m1[3][2]*m2[2][0] + m1[3][3]*m2[3][0];
179
r[3][1] = m1[3][0]*m2[0][1] + m1[3][1]*m2[1][1] + m1[3][2]*m2[2][1] + m1[3][3]*m2[3][1];
180
r[3][2] = m1[3][0]*m2[0][2] + m1[3][1]*m2[1][2] + m1[3][2]*m2[2][2] + m1[3][3]*m2[3][2];
181
r[3][3] = m1[3][0]*m2[0][3] + m1[3][1]*m2[1][3] + m1[3][2]*m2[2][3] + m1[3][3]*m2[3][3];
182
}
183
184
void __stdcall MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
185
{
186
#if defined(__GNUC__) && !defined(NO_ASM)
187
/* [row][col]*/
188
typedef float v4sf __attribute__ ((vector_size (16)));
189
v4sf row0 = __builtin_ia32_loadups(m2[0]);
190
v4sf row1 = __builtin_ia32_loadups(m2[1]);
191
v4sf row2 = __builtin_ia32_loadups(m2[2]);
192
v4sf row3 = __builtin_ia32_loadups(m2[3]);
193
194
for (int i = 0; i < 4; ++i)
195
{
196
v4sf leftrow = __builtin_ia32_loadups(m1[i]);
197
198
// Fill tmp with four copies of leftrow[0]
199
v4sf tmp = leftrow;
200
tmp = _mm_shuffle_ps (tmp, tmp, 0);
201
// Calculate the four first summands
202
v4sf destrow = tmp * row0;
203
204
// Fill tmp with four copies of leftrow[1]
205
tmp = leftrow;
206
tmp = _mm_shuffle_ps (tmp, tmp, 1 + (1 << 2) + (1 << 4) + (1 << 6));
207
destrow += tmp * row1;
208
209
// Fill tmp with four copies of leftrow[2]
210
tmp = leftrow;
211
tmp = _mm_shuffle_ps (tmp, tmp, 2 + (2 << 2) + (2 << 4) + (2 << 6));
212
destrow += tmp * row2;
213
214
// Fill tmp with four copies of leftrow[3]
215
tmp = leftrow;
216
tmp = _mm_shuffle_ps (tmp, tmp, 3 + (3 << 2) + (3 << 4) + (3 << 6));
217
destrow += tmp * row3;
218
219
__builtin_ia32_storeups(r[i], destrow);
220
}
221
#elif !defined(NO_ASM)
222
__asm
223
{
224
mov eax, dword ptr [r]
225
mov ecx, dword ptr [m1]
226
mov edx, dword ptr [m2]
227
228
movaps xmm0,[edx]
229
movaps xmm1,[edx+16]
230
movaps xmm2,[edx+32]
231
movaps xmm3,[edx+48]
232
233
// r[0][0],r[0][1],r[0][2],r[0][3]
234
235
movaps xmm4,xmmword ptr[ecx]
236
movaps xmm5,xmm4
237
movaps xmm6,xmm4
238
movaps xmm7,xmm4
239
240
shufps xmm4,xmm4,00000000b
241
shufps xmm5,xmm5,01010101b
242
shufps xmm6,xmm6,10101010b
243
shufps xmm7,xmm7,11111111b
244
245
mulps xmm4,xmm0
246
mulps xmm5,xmm1
247
mulps xmm6,xmm2
248
mulps xmm7,xmm3
249
250
addps xmm4,xmm5
251
addps xmm4,xmm6
252
addps xmm4,xmm7
253
254
movaps xmmword ptr[eax],xmm4
255
256
// r[1][0],r[1][1],r[1][2],r[1][3]
257
258
movaps xmm4,xmmword ptr[ecx+16]
259
movaps xmm5,xmm4
260
movaps xmm6,xmm4
261
movaps xmm7,xmm4
262
263
shufps xmm4,xmm4,00000000b
264
shufps xmm5,xmm5,01010101b
265
shufps xmm6,xmm6,10101010b
266
shufps xmm7,xmm7,11111111b
267
268
mulps xmm4,xmm0
269
mulps xmm5,xmm1
270
mulps xmm6,xmm2
271
mulps xmm7,xmm3
272
273
addps xmm4,xmm5
274
addps xmm4,xmm6
275
addps xmm4,xmm7
276
277
movaps xmmword ptr[eax+16],xmm4
278
279
280
// r[2][0],r[2][1],r[2][2],r[2][3]
281
282
movaps xmm4,xmmword ptr[ecx+32]
283
movaps xmm5,xmm4
284
movaps xmm6,xmm4
285
movaps xmm7,xmm4
286
287
shufps xmm4,xmm4,00000000b
288
shufps xmm5,xmm5,01010101b
289
shufps xmm6,xmm6,10101010b
290
shufps xmm7,xmm7,11111111b
291
292
mulps xmm4,xmm0
293
mulps xmm5,xmm1
294
mulps xmm6,xmm2
295
mulps xmm7,xmm3
296
297
addps xmm4,xmm5
298
addps xmm4,xmm6
299
addps xmm4,xmm7
300
301
movaps xmmword ptr[eax+32],xmm4
302
303
// r[3][0],r[3][1],r[3][2],r[3][3]
304
305
movaps xmm4,xmmword ptr[ecx+48]
306
movaps xmm5,xmm4
307
movaps xmm6,xmm4
308
movaps xmm7,xmm4
309
310
shufps xmm4,xmm4,00000000b
311
shufps xmm5,xmm5,01010101b
312
shufps xmm6,xmm6,10101010b
313
shufps xmm7,xmm7,11111111b
314
315
mulps xmm4,xmm0
316
mulps xmm5,xmm1
317
mulps xmm6,xmm2
318
mulps xmm7,xmm3
319
320
addps xmm4,xmm5
321
addps xmm4,xmm6
322
addps xmm4,xmm7
323
324
movaps xmmword ptr[eax+48],xmm4
325
}
326
#endif // _WIN32
327
}
328
329
MULMATRIX MulMatrices = MulMatricesNOSSE;
330
331
void math_init()
332
{
333
BOOL IsSSE = FALSE;
334
#if defined(__GNUC__) && !defined(NO_ASM)
335
int edx, eax;
336
#if defined(__x86_64__)
337
asm volatile(" cpuid; "
338
: "=a"(eax), "=d"(edx)
339
: "0"(1)
340
: "rbx", "rcx"
341
);
342
#else
343
asm volatile(" push %%ebx; "
344
" push %%ecx; "
345
" cpuid; "
346
" pop %%ecx; "
347
" pop %%ebx; "
348
: "=a"(eax), "=d"(edx)
349
: "0"(1)
350
:
351
);
352
#endif
353
// Check for SSE
354
if (edx & (1 << 25))
355
IsSSE = TRUE;
356
#elif !defined(NO_ASM)
357
DWORD dwEdx;
358
__try
359
{
360
__asm
361
{
362
mov eax,1
363
cpuid
364
mov dwEdx,edx
365
}
366
}
367
__except(EXCEPTION_EXECUTE_HANDLER)
368
{
369
return;
370
}
371
372
if (dwEdx & (1<<25))
373
{
374
if (dwEdx & (1<<24))
375
{
376
__try
377
{
378
__asm xorps xmm0, xmm0
379
IsSSE = TRUE;
380
}
381
__except(EXCEPTION_EXECUTE_HANDLER)
382
{
383
return;
384
}
385
}
386
}
387
#endif // _WIN32
388
if (IsSSE)
389
{
390
MulMatrices = MulMatricesSSE;
391
WriteLog(M64MSG_INFO, "SSE detected.\n");
392
}
393
}
394
395
396