Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
alexbevi
GitHub Repository: alexbevi/BizHawk
Path: blob/master/libsnes/bsnes/libco/ppc.c
2 views
1
/*
2
libco.ppc (2010-10-17)
3
author: blargg
4
license: public domain
5
*/
6
7
/* PowerPC 32/64 using embedded or external asm, with optional
8
floating-point and AltiVec save/restore */
9
10
#define LIBCO_C
11
#include "libco.h"
12
#include <stdlib.h>
13
#include <stdint.h>
14
#include <string.h>
15
16
#define LIBCO_MPROTECT (__unix__ && !LIBCO_PPC_ASM)
17
18
#if LIBCO_MPROTECT
19
#include <unistd.h>
20
#include <sys/mman.h>
21
#endif
22
23
/* State format (offsets in 32-bit words)
24
25
+0 Pointer to swap code
26
Rest of function descriptor for entry function
27
+8 PC
28
+10 SP
29
Special regs
30
GPRs
31
FPRs
32
VRs
33
stack
34
*/
35
36
enum { state_size = 1024 };
37
enum { above_stack = 2048 };
38
enum { stack_align = 256 };
39
40
static thread_local cothread_t co_active_handle = 0;
41
42
/**** Determine environment ****/
43
44
#define LIBCO_PPC64 (_ARCH_PPC64 || __PPC64__ || __ppc64__ || __powerpc64__)
45
46
/* Whether function calls are indirect through a descriptor,
47
or are directly to function */
48
#ifndef LIBCO_PPCDESC
49
#if !_CALL_SYSV && (_CALL_AIX || _CALL_AIXDESC || LIBCO_PPC64)
50
#define LIBCO_PPCDESC 1
51
#endif
52
#endif
53
54
#ifdef LIBCO_PPC_ASM
55
56
#ifdef __cplusplus
57
extern "C"
58
#endif
59
60
/* Swap code is in ppc.S */
61
void co_swap_asm( cothread_t, cothread_t );
62
#define CO_SWAP_ASM( x, y ) co_swap_asm( x, y )
63
64
#else
65
66
/* Swap code is here in array. Please leave dieassembly comments,
67
as they make it easy to see what it does, and reorder instructions
68
if one wants to see whether that improves performance. */
69
static const uint32_t libco_ppc_code [] = {
70
#if LIBCO_PPC64
71
0x7d000026, /* mfcr r8 */
72
0xf8240028, /* std r1,40(r4) */
73
0x7d2802a6, /* mflr r9 */
74
0xf9c40048, /* std r14,72(r4) */
75
0xf9e40050, /* std r15,80(r4) */
76
0xfa040058, /* std r16,88(r4) */
77
0xfa240060, /* std r17,96(r4) */
78
0xfa440068, /* std r18,104(r4) */
79
0xfa640070, /* std r19,112(r4) */
80
0xfa840078, /* std r20,120(r4) */
81
0xfaa40080, /* std r21,128(r4) */
82
0xfac40088, /* std r22,136(r4) */
83
0xfae40090, /* std r23,144(r4) */
84
0xfb040098, /* std r24,152(r4) */
85
0xfb2400a0, /* std r25,160(r4) */
86
0xfb4400a8, /* std r26,168(r4) */
87
0xfb6400b0, /* std r27,176(r4) */
88
0xfb8400b8, /* std r28,184(r4) */
89
0xfba400c0, /* std r29,192(r4) */
90
0xfbc400c8, /* std r30,200(r4) */
91
0xfbe400d0, /* std r31,208(r4) */
92
0xf9240020, /* std r9,32(r4) */
93
0xe8e30020, /* ld r7,32(r3) */
94
0xe8230028, /* ld r1,40(r3) */
95
0x48000009, /* bl 1 */
96
0x7fe00008, /* trap */
97
0x91040030,/*1:stw r8,48(r4) */
98
0x80c30030, /* lwz r6,48(r3) */
99
0x7ce903a6, /* mtctr r7 */
100
0xe9c30048, /* ld r14,72(r3) */
101
0xe9e30050, /* ld r15,80(r3) */
102
0xea030058, /* ld r16,88(r3) */
103
0xea230060, /* ld r17,96(r3) */
104
0xea430068, /* ld r18,104(r3) */
105
0xea630070, /* ld r19,112(r3) */
106
0xea830078, /* ld r20,120(r3) */
107
0xeaa30080, /* ld r21,128(r3) */
108
0xeac30088, /* ld r22,136(r3) */
109
0xeae30090, /* ld r23,144(r3) */
110
0xeb030098, /* ld r24,152(r3) */
111
0xeb2300a0, /* ld r25,160(r3) */
112
0xeb4300a8, /* ld r26,168(r3) */
113
0xeb6300b0, /* ld r27,176(r3) */
114
0xeb8300b8, /* ld r28,184(r3) */
115
0xeba300c0, /* ld r29,192(r3) */
116
0xebc300c8, /* ld r30,200(r3) */
117
0xebe300d0, /* ld r31,208(r3) */
118
0x7ccff120, /* mtcr r6 */
119
#else
120
0x7d000026, /* mfcr r8 */
121
0x90240028, /* stw r1,40(r4) */
122
0x7d2802a6, /* mflr r9 */
123
0x91a4003c, /* stw r13,60(r4) */
124
0x91c40040, /* stw r14,64(r4) */
125
0x91e40044, /* stw r15,68(r4) */
126
0x92040048, /* stw r16,72(r4) */
127
0x9224004c, /* stw r17,76(r4) */
128
0x92440050, /* stw r18,80(r4) */
129
0x92640054, /* stw r19,84(r4) */
130
0x92840058, /* stw r20,88(r4) */
131
0x92a4005c, /* stw r21,92(r4) */
132
0x92c40060, /* stw r22,96(r4) */
133
0x92e40064, /* stw r23,100(r4) */
134
0x93040068, /* stw r24,104(r4) */
135
0x9324006c, /* stw r25,108(r4) */
136
0x93440070, /* stw r26,112(r4) */
137
0x93640074, /* stw r27,116(r4) */
138
0x93840078, /* stw r28,120(r4) */
139
0x93a4007c, /* stw r29,124(r4) */
140
0x93c40080, /* stw r30,128(r4) */
141
0x93e40084, /* stw r31,132(r4) */
142
0x91240020, /* stw r9,32(r4) */
143
0x80e30020, /* lwz r7,32(r3) */
144
0x80230028, /* lwz r1,40(r3) */
145
0x48000009, /* bl 1 */
146
0x7fe00008, /* trap */
147
0x91040030,/*1:stw r8,48(r4) */
148
0x80c30030, /* lwz r6,48(r3) */
149
0x7ce903a6, /* mtctr r7 */
150
0x81a3003c, /* lwz r13,60(r3) */
151
0x81c30040, /* lwz r14,64(r3) */
152
0x81e30044, /* lwz r15,68(r3) */
153
0x82030048, /* lwz r16,72(r3) */
154
0x8223004c, /* lwz r17,76(r3) */
155
0x82430050, /* lwz r18,80(r3) */
156
0x82630054, /* lwz r19,84(r3) */
157
0x82830058, /* lwz r20,88(r3) */
158
0x82a3005c, /* lwz r21,92(r3) */
159
0x82c30060, /* lwz r22,96(r3) */
160
0x82e30064, /* lwz r23,100(r3) */
161
0x83030068, /* lwz r24,104(r3) */
162
0x8323006c, /* lwz r25,108(r3) */
163
0x83430070, /* lwz r26,112(r3) */
164
0x83630074, /* lwz r27,116(r3) */
165
0x83830078, /* lwz r28,120(r3) */
166
0x83a3007c, /* lwz r29,124(r3) */
167
0x83c30080, /* lwz r30,128(r3) */
168
0x83e30084, /* lwz r31,132(r3) */
169
0x7ccff120, /* mtcr r6 */
170
#endif
171
172
#ifndef LIBCO_PPC_NOFP
173
0xd9c400e0, /* stfd f14,224(r4) */
174
0xd9e400e8, /* stfd f15,232(r4) */
175
0xda0400f0, /* stfd f16,240(r4) */
176
0xda2400f8, /* stfd f17,248(r4) */
177
0xda440100, /* stfd f18,256(r4) */
178
0xda640108, /* stfd f19,264(r4) */
179
0xda840110, /* stfd f20,272(r4) */
180
0xdaa40118, /* stfd f21,280(r4) */
181
0xdac40120, /* stfd f22,288(r4) */
182
0xdae40128, /* stfd f23,296(r4) */
183
0xdb040130, /* stfd f24,304(r4) */
184
0xdb240138, /* stfd f25,312(r4) */
185
0xdb440140, /* stfd f26,320(r4) */
186
0xdb640148, /* stfd f27,328(r4) */
187
0xdb840150, /* stfd f28,336(r4) */
188
0xdba40158, /* stfd f29,344(r4) */
189
0xdbc40160, /* stfd f30,352(r4) */
190
0xdbe40168, /* stfd f31,360(r4) */
191
0xc9c300e0, /* lfd f14,224(r3) */
192
0xc9e300e8, /* lfd f15,232(r3) */
193
0xca0300f0, /* lfd f16,240(r3) */
194
0xca2300f8, /* lfd f17,248(r3) */
195
0xca430100, /* lfd f18,256(r3) */
196
0xca630108, /* lfd f19,264(r3) */
197
0xca830110, /* lfd f20,272(r3) */
198
0xcaa30118, /* lfd f21,280(r3) */
199
0xcac30120, /* lfd f22,288(r3) */
200
0xcae30128, /* lfd f23,296(r3) */
201
0xcb030130, /* lfd f24,304(r3) */
202
0xcb230138, /* lfd f25,312(r3) */
203
0xcb430140, /* lfd f26,320(r3) */
204
0xcb630148, /* lfd f27,328(r3) */
205
0xcb830150, /* lfd f28,336(r3) */
206
0xcba30158, /* lfd f29,344(r3) */
207
0xcbc30160, /* lfd f30,352(r3) */
208
0xcbe30168, /* lfd f31,360(r3) */
209
#endif
210
211
#ifdef __ALTIVEC__
212
0x7ca042a6, /* mfvrsave r5 */
213
0x39040180, /* addi r8,r4,384 */
214
0x39240190, /* addi r9,r4,400 */
215
0x70a00fff, /* andi. r0,r5,4095 */
216
0x90a40034, /* stw r5,52(r4) */
217
0x4182005c, /* beq- 2 */
218
0x7e8041ce, /* stvx v20,r0,r8 */
219
0x39080020, /* addi r8,r8,32 */
220
0x7ea049ce, /* stvx v21,r0,r9 */
221
0x39290020, /* addi r9,r9,32 */
222
0x7ec041ce, /* stvx v22,r0,r8 */
223
0x39080020, /* addi r8,r8,32 */
224
0x7ee049ce, /* stvx v23,r0,r9 */
225
0x39290020, /* addi r9,r9,32 */
226
0x7f0041ce, /* stvx v24,r0,r8 */
227
0x39080020, /* addi r8,r8,32 */
228
0x7f2049ce, /* stvx v25,r0,r9 */
229
0x39290020, /* addi r9,r9,32 */
230
0x7f4041ce, /* stvx v26,r0,r8 */
231
0x39080020, /* addi r8,r8,32 */
232
0x7f6049ce, /* stvx v27,r0,r9 */
233
0x39290020, /* addi r9,r9,32 */
234
0x7f8041ce, /* stvx v28,r0,r8 */
235
0x39080020, /* addi r8,r8,32 */
236
0x7fa049ce, /* stvx v29,r0,r9 */
237
0x39290020, /* addi r9,r9,32 */
238
0x7fc041ce, /* stvx v30,r0,r8 */
239
0x7fe049ce, /* stvx v31,r0,r9 */
240
0x80a30034,/*2:lwz r5,52(r3) */
241
0x39030180, /* addi r8,r3,384 */
242
0x39230190, /* addi r9,r3,400 */
243
0x70a00fff, /* andi. r0,r5,4095 */
244
0x7ca043a6, /* mtvrsave r5 */
245
0x4d820420, /* beqctr */
246
0x7e8040ce, /* lvx v20,r0,r8 */
247
0x39080020, /* addi r8,r8,32 */
248
0x7ea048ce, /* lvx v21,r0,r9 */
249
0x39290020, /* addi r9,r9,32 */
250
0x7ec040ce, /* lvx v22,r0,r8 */
251
0x39080020, /* addi r8,r8,32 */
252
0x7ee048ce, /* lvx v23,r0,r9 */
253
0x39290020, /* addi r9,r9,32 */
254
0x7f0040ce, /* lvx v24,r0,r8 */
255
0x39080020, /* addi r8,r8,32 */
256
0x7f2048ce, /* lvx v25,r0,r9 */
257
0x39290020, /* addi r9,r9,32 */
258
0x7f4040ce, /* lvx v26,r0,r8 */
259
0x39080020, /* addi r8,r8,32 */
260
0x7f6048ce, /* lvx v27,r0,r9 */
261
0x39290020, /* addi r9,r9,32 */
262
0x7f8040ce, /* lvx v28,r0,r8 */
263
0x39080020, /* addi r8,r8,32 */
264
0x7fa048ce, /* lvx v29,r0,r9 */
265
0x39290020, /* addi r9,r9,32 */
266
0x7fc040ce, /* lvx v30,r0,r8 */
267
0x7fe048ce, /* lvx v31,r0,r9 */
268
#endif
269
270
0x4e800420, /* bctr */
271
};
272
273
#if LIBCO_PPCDESC
274
/* Function call goes through indirect descriptor */
275
#define CO_SWAP_ASM( x, y ) \
276
((void (*)( cothread_t, cothread_t )) (uintptr_t) x)( x, y )
277
#else
278
/* Function call goes directly to code */
279
#define CO_SWAP_ASM( x, y ) \
280
((void (*)( cothread_t, cothread_t )) (uintptr_t) libco_ppc_code)( x, y )
281
#endif
282
283
#endif
284
285
static uint32_t* co_create_( unsigned size, uintptr_t entry )
286
{
287
uint32_t* t = (uint32_t*) malloc( size );
288
289
(void) entry;
290
291
#if LIBCO_PPCDESC
292
if ( t )
293
{
294
/* Copy entry's descriptor */
295
memcpy( t, (void*) entry, sizeof (void*) * 3 );
296
297
/* Set function pointer to swap routine */
298
#ifdef LIBCO_PPC_ASM
299
*(const void**) t = *(void**) &co_swap_asm;
300
#else
301
*(const void**) t = libco_ppc_code;
302
#endif
303
}
304
#endif
305
306
return t;
307
}
308
309
cothread_t co_create( unsigned int size, void (*entry_)( void ) )
310
{
311
uintptr_t entry = (uintptr_t) entry_;
312
uint32_t* t = NULL;
313
314
/* Be sure main thread was successfully allocated */
315
if ( co_active() )
316
{
317
size += state_size + above_stack + stack_align;
318
t = co_create_( size, entry );
319
}
320
321
if ( t )
322
{
323
uintptr_t sp;
324
int shift;
325
326
/* Save current registers into new thread, so that any special ones will
327
have proper values when thread is begun */
328
CO_SWAP_ASM( t, t );
329
330
#if LIBCO_PPCDESC
331
/* Get real address */
332
entry = (uintptr_t) *(void**) entry;
333
#endif
334
335
/* Put stack near end of block, and align */
336
sp = (uintptr_t) t + size - above_stack;
337
sp -= sp % stack_align;
338
339
/* On PPC32, we save and restore GPRs as 32 bits. For PPC64, we
340
save and restore them as 64 bits, regardless of the size the ABI
341
uses. So, we manually write pointers at the proper size. We always
342
save and restore at the same address, and since PPC is big-endian,
343
we must put the low byte first on PPC32. */
344
345
/* If uintptr_t is 32 bits, >>32 is undefined behavior, so we do two shifts
346
and don't have to care how many bits uintptr_t is. */
347
#if LIBCO_PPC64
348
shift = 16;
349
#else
350
shift = 0;
351
#endif
352
353
/* Set up so entry will be called on next swap */
354
t [8] = (uint32_t) (entry >> shift >> shift);
355
t [9] = (uint32_t) entry;
356
357
t [10] = (uint32_t) (sp >> shift >> shift);
358
t [11] = (uint32_t) sp;
359
}
360
361
return t;
362
}
363
364
void co_delete( cothread_t t )
365
{
366
free( t );
367
}
368
369
static void co_init_( void )
370
{
371
#if LIBCO_MPROTECT
372
/* TODO: pre- and post-pad PPC code so that this doesn't make other
373
data executable and writable */
374
long page_size = sysconf( _SC_PAGESIZE );
375
if ( page_size > 0 )
376
{
377
uintptr_t align = page_size;
378
uintptr_t begin = (uintptr_t) libco_ppc_code;
379
uintptr_t end = begin + sizeof libco_ppc_code;
380
381
/* Align beginning and end */
382
end += align - 1;
383
end -= end % align;
384
begin -= begin % align;
385
386
mprotect( (void*) begin, end - begin, PROT_READ | PROT_WRITE | PROT_EXEC );
387
}
388
#endif
389
390
co_active_handle = co_create_( state_size, (uintptr_t) &co_switch );
391
}
392
393
cothread_t co_active()
394
{
395
if ( !co_active_handle )
396
co_init_();
397
398
return co_active_handle;
399
}
400
401
void co_switch( cothread_t t )
402
{
403
cothread_t old = co_active_handle;
404
co_active_handle = t;
405
406
CO_SWAP_ASM( t, old );
407
}
408
409