Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
tpruvot
GitHub Repository: tpruvot/cpuminer-multi
Path: blob/linux/scryptjane/scrypt-jane-portable-x86.h
1201 views
1
#if defined(CPU_X86) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))
2
#define X86ASM
3
/* gcc 2.95 royally screws up stack alignments on variables */
4
#if (defined(COMPILER_MSVC6PP_AND_LATER) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 30000)))
5
#define X86ASM_SSE
6
#define X86ASM_SSE2
7
#endif
8
#if ((defined(COMPILER_MSVC) && (COMPILER_MSVC >= 1400)) || (defined(COMPILER_GCC) && (COMPILER_GCC >= 40102)))
9
#define X86ASM_SSSE3
10
#endif
11
#if ((defined(COMPILER_GCC) && (COMPILER_GCC >= 40400)))
12
#define X86ASM_AVX
13
#endif
14
#endif
15
16
#if defined(CPU_X86_64) && defined(COMPILER_GCC)
17
#define X86_64ASM
18
#define X86_64ASM_SSE2
19
#if (COMPILER_GCC >= 40102)
20
#define X86_64ASM_SSSE3
21
#endif
22
#if (COMPILER_GCC >= 40400)
23
#define X86_64ASM_AVX
24
#endif
25
#endif
26
27
#if defined(COMPILER_MSVC)
28
#define X86_INTRINSIC
29
#if defined(CPU_X86_64) || defined(X86ASM_SSE)
30
#define X86_INTRINSIC_SSE
31
#endif
32
#if defined(CPU_X86_64) || defined(X86ASM_SSE2)
33
#define X86_INTRINSIC_SSE2
34
#endif
35
#if (COMPILER_MSVC >= 1400)
36
#define X86_INTRINSIC_SSSE3
37
#endif
38
#endif
39
40
#if defined(COMPILER_MSVC) && defined(CPU_X86_64)
41
#define X86_64USE_INTRINSIC
42
#endif
43
44
#if defined(COMPILER_MSVC) && defined(CPU_X86_64)
45
#define X86_64USE_INTRINSIC
46
#endif
47
48
#ifdef __AVX__
49
#define X86_INTRINSIC_AVX
50
#endif
51
52
#if defined(COMPILER_GCC) && defined(CPU_X86_FORCE_INTRINSICS)
53
#define X86_INTRINSIC
54
#if defined(__SSE__)
55
#define X86_INTRINSIC_SSE
56
#endif
57
#if defined(__SSE2__)
58
#define X86_INTRINSIC_SSE2
59
#endif
60
#if defined(__SSSE3__)
61
#define X86_INTRINSIC_SSSE3
62
#endif
63
#if defined(__AVX__)
64
#define X86_INTRINSIC_AVX
65
#endif
66
67
/* HACK - I want to use CPU_X86_FORCE_INTRINSICS with mingw64 so these need to be undefined - mikaelh */
68
#undef X86_64ASM_SSSE3
69
#undef X86_64ASM_AVX
70
#undef X86_64ASM_SSE2
71
#undef X86ASM_AVX
72
#undef X86ASM_SSSE3
73
#undef X86ASM_SSE2
74
#undef X86ASM_SSE
75
#endif
76
77
/* only use simd on windows (or SSE2 on gcc)! */
78
#if defined(CPU_X86_FORCE_INTRINSICS) || defined(X86_INTRINSIC)
79
#if defined(X86_INTRINSIC_SSE)
80
#define X86_INTRINSIC
81
#include <mmintrin.h>
82
#include <xmmintrin.h>
83
typedef __m64 qmm;
84
typedef __m128 xmm;
85
typedef __m128d xmmd;
86
#endif
87
#if defined(X86_INTRINSIC_SSE2)
88
#define X86_INTRINSIC_SSE2
89
#include <emmintrin.h>
90
typedef __m128i xmmi;
91
#endif
92
#if defined(X86_INTRINSIC_SSSE3)
93
#define X86_INTRINSIC_SSSE3
94
#include <tmmintrin.h>
95
#endif
96
#if defined (X86_INTRINSIC_AVX)
97
#define X86_INTRINSIC_AVX
98
#include <immintrin.h>
99
#endif
100
#endif
101
102
103
#if defined(X86_INTRINSIC_SSE2)
104
typedef union packedelem8_t {
105
uint8_t u[16];
106
xmmi v;
107
} packedelem8;
108
109
typedef union packedelem32_t {
110
uint32_t u[4];
111
xmmi v;
112
} packedelem32;
113
114
typedef union packedelem64_t {
115
uint64_t u[2];
116
xmmi v;
117
} packedelem64;
118
#else
119
typedef union packedelem8_t {
120
uint8_t u[16];
121
uint32_t dw[4];
122
} packedelem8;
123
124
typedef union packedelem32_t {
125
uint32_t u[4];
126
uint8_t b[16];
127
} packedelem32;
128
129
typedef union packedelem64_t {
130
uint64_t u[2];
131
uint8_t b[16];
132
} packedelem64;
133
#endif
134
135
#if defined(X86_INTRINSIC_SSSE3)
136
static const packedelem8 MM16 ssse3_rotl16_32bit = {{2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}};
137
static const packedelem8 MM16 ssse3_rotl8_32bit = {{3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14}};
138
#endif
139
140
/*
141
x86 inline asm for gcc/msvc. usage:
142
143
asm_naked_fn_proto(return_type, name) (type parm1, type parm2..)
144
asm_naked_fn(name)
145
a1(..)
146
a2(.., ..)
147
a3(.., .., ..)
148
64bit OR 0 paramters: a1(ret)
149
32bit AND n parameters: aret(4n), eg aret(16) for 4 parameters
150
asm_naked_fn_end(name)
151
*/
152
153
#if defined(X86ASM) || defined(X86_64ASM)
154
155
#if defined(COMPILER_MSVC)
156
#pragma warning(disable : 4731) /* frame pointer modified by inline assembly */
157
#define a1(x) __asm {x}
158
#define a2(x, y) __asm {x, y}
159
#define a3(x, y, z) __asm {x, y, z}
160
#define a4(x, y, z, w) __asm {x, y, z, w}
161
#define al(x) __asm {label##x:}
162
#define aj(x, y, z) __asm {x label##y}
163
#define asm_align8 a1(ALIGN 8)
164
#define asm_align16 a1(ALIGN 16)
165
166
#define asm_calling_convention STDCALL
167
#define asm_naked_fn_proto(type, fn) static NAKED type asm_calling_convention fn
168
#define asm_naked_fn(fn) {
169
#define asm_naked_fn_end(fn) }
170
#elif defined(COMPILER_GCC)
171
#define GNU_AS1(x) #x ";\n"
172
#define GNU_AS2(x, y) #x ", " #y ";\n"
173
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";\n"
174
#define GNU_AS4(x, y, z, w) #x ", " #y ", " #z ", " #w ";\n"
175
#define GNU_ASL(x) "\n" #x ":\n"
176
#define GNU_ASFN(x) "\n_" #x ":\n" #x ":\n"
177
#define GNU_ASJ(x, y, z) #x " " #y #z ";"
178
179
#define a1(x) GNU_AS1(x)
180
#define a2(x, y) GNU_AS2(x, y)
181
#define a3(x, y, z) GNU_AS3(x, y, z)
182
#define a4(x, y, z, w) GNU_AS4(x, y, z, w)
183
#define al(x) GNU_ASL(x)
184
#define aj(x, y, z) GNU_ASJ(x, y, z)
185
#define asm_align8 a1(.align 8)
186
#define asm_align16 a1(.align 16)
187
188
#if defined(OS_WINDOWS)
189
#define asm_calling_convention CDECL
190
#define aret(n) a1(ret)
191
#define asm_naked_fn_end(fn) ".att_syntax prefix;\n" );
192
#else
193
#define asm_calling_convention STDCALL
194
#define aret(n) a1(ret n)
195
#define asm_naked_fn_end(fn) ".att_syntax prefix;\n.type " #fn ",@function\n.size " #fn ",.-" #fn "\n" );
196
#endif
197
#define asm_naked_fn_proto(type, fn) extern type asm_calling_convention fn
198
#define asm_naked_fn(fn) ; __asm__ (".intel_syntax noprefix;\n.text\n" asm_align16 GNU_ASFN(fn)
199
200
#define asm_gcc() __asm__ __volatile__(".intel_syntax noprefix;\n"
201
#define asm_gcc_parms() ".att_syntax prefix;"
202
#define asm_gcc_trashed() __asm__ __volatile__("" :::
203
#define asm_gcc_end() );
204
#else
205
need x86 asm
206
#endif
207
208
#endif /* X86ASM || X86_64ASM */
209
210
211
#if defined(CPU_X86) || defined(CPU_X86_64)
212
213
typedef enum cpu_flags_x86_t {
214
cpu_mmx = 1 << 0,
215
cpu_sse = 1 << 1,
216
cpu_sse2 = 1 << 2,
217
cpu_sse3 = 1 << 3,
218
cpu_ssse3 = 1 << 4,
219
cpu_sse4_1 = 1 << 5,
220
cpu_sse4_2 = 1 << 6,
221
cpu_avx = 1 << 7
222
} cpu_flags_x86;
223
224
typedef enum cpu_vendors_x86_t {
225
cpu_nobody,
226
cpu_intel,
227
cpu_amd
228
} cpu_vendors_x86;
229
230
typedef struct x86_regs_t {
231
uint32_t eax, ebx, ecx, edx;
232
} x86_regs;
233
234
#if defined(X86ASM)
235
asm_naked_fn_proto(int, has_cpuid)(void)
236
asm_naked_fn(has_cpuid)
237
a1(pushfd)
238
a1(pop eax)
239
a2(mov ecx, eax)
240
a2(xor eax, 0x200000)
241
a1(push eax)
242
a1(popfd)
243
a1(pushfd)
244
a1(pop eax)
245
a2(xor eax, ecx)
246
a2(shr eax, 21)
247
a2(and eax, 1)
248
a1(push ecx)
249
a1(popfd)
250
a1(ret)
251
asm_naked_fn_end(has_cpuid)
252
#endif /* X86ASM */
253
254
255
static void NOINLINE
256
get_cpuid(x86_regs *regs, uint32_t flags) {
257
#if defined(COMPILER_MSVC)
258
__cpuid((int *)regs, (int)flags);
259
#else
260
#if defined(CPU_X86_64)
261
#define cpuid_bx rbx
262
#else
263
#define cpuid_bx ebx
264
#endif
265
266
asm_gcc()
267
a1(push cpuid_bx)
268
a1(cpuid)
269
a2(mov [%1 + 0], eax)
270
a2(mov [%1 + 4], ebx)
271
a2(mov [%1 + 8], ecx)
272
a2(mov [%1 + 12], edx)
273
a1(pop cpuid_bx)
274
asm_gcc_parms() : "+a"(flags) : "S"(regs) : "%ecx", "%edx", "cc"
275
asm_gcc_end()
276
#endif
277
}
278
279
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
280
static uint64_t NOINLINE
281
get_xgetbv(uint32_t flags) {
282
#if defined(COMPILER_MSVC)
283
return _xgetbv(flags);
284
#else
285
uint32_t lo, hi;
286
asm_gcc()
287
a1(xgetbv)
288
asm_gcc_parms() : "+c"(flags), "=a" (lo), "=d" (hi)
289
asm_gcc_end()
290
return ((uint64_t)lo | ((uint64_t)hi << 32));
291
#endif
292
}
293
#endif // AVX support
294
295
#if defined(SCRYPT_TEST_SPEED)
296
size_t cpu_detect_mask = (size_t)-1;
297
#endif
298
299
static size_t
300
detect_cpu(void) {
301
union { uint8_t s[12]; uint32_t i[3]; } vendor_string;
302
cpu_vendors_x86 vendor = cpu_nobody;
303
x86_regs regs;
304
uint32_t max_level;
305
size_t cpu_flags = 0;
306
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
307
uint64_t xgetbv_flags;
308
#endif
309
310
#if defined(CPU_X86)
311
if (!has_cpuid())
312
return cpu_flags;
313
#endif
314
315
get_cpuid(&regs, 0);
316
max_level = regs.eax;
317
vendor_string.i[0] = regs.ebx;
318
vendor_string.i[1] = regs.edx;
319
vendor_string.i[2] = regs.ecx;
320
321
if (scrypt_verify(vendor_string.s, (const uint8_t *)"GenuineIntel", 12))
322
vendor = cpu_intel;
323
else if (scrypt_verify(vendor_string.s, (const uint8_t *)"AuthenticAMD", 12))
324
vendor = cpu_amd;
325
326
if (max_level & 0x00000500) {
327
/* "Intel P5 pre-B0" */
328
cpu_flags |= cpu_mmx;
329
return cpu_flags;
330
}
331
332
if (max_level < 1)
333
return cpu_flags;
334
335
get_cpuid(&regs, 1);
336
#if defined(X86ASM_AVX) || defined(X86_64ASM_AVX)
337
/* xsave/xrestore */
338
if (regs.ecx & (1 << 27)) {
339
xgetbv_flags = get_xgetbv(0);
340
if ((regs.ecx & (1 << 28)) && (xgetbv_flags & 0x6)) cpu_flags |= cpu_avx;
341
}
342
#endif
343
if (regs.ecx & (1 << 20)) cpu_flags |= cpu_sse4_2;
344
if (regs.ecx & (1 << 19)) cpu_flags |= cpu_sse4_2;
345
if (regs.ecx & (1 << 9)) cpu_flags |= cpu_ssse3;
346
if (regs.ecx & (1 )) cpu_flags |= cpu_sse3;
347
if (regs.edx & (1 << 26)) cpu_flags |= cpu_sse2;
348
if (regs.edx & (1 << 25)) cpu_flags |= cpu_sse;
349
if (regs.edx & (1 << 23)) cpu_flags |= cpu_mmx;
350
351
#if defined(SCRYPT_TEST_SPEED)
352
cpu_flags &= cpu_detect_mask;
353
#endif
354
355
return cpu_flags;
356
}
357
358
#if defined(SCRYPT_TEST_SPEED)
359
static const char *
360
get_top_cpuflag_desc(size_t flag) {
361
if (flag & cpu_avx) return "AVX";
362
else if (flag & cpu_sse4_2) return "SSE4.2";
363
else if (flag & cpu_sse4_1) return "SSE4.1";
364
else if (flag & cpu_ssse3) return "SSSE3";
365
else if (flag & cpu_sse2) return "SSE2";
366
else if (flag & cpu_sse) return "SSE";
367
else if (flag & cpu_mmx) return "MMX";
368
else return "Basic";
369
}
370
#endif
371
372
/* enable the highest system-wide option */
373
#if defined(SCRYPT_CHOOSE_COMPILETIME)
374
#if !defined(__AVX__)
375
#undef X86_64ASM_AVX
376
#undef X86ASM_AVX
377
#undef X86_INTRINSIC_AVX
378
#endif
379
#if !defined(__SSSE3__)
380
#undef X86_64ASM_SSSE3
381
#undef X86ASM_SSSE3
382
#undef X86_INTRINSIC_SSSE3
383
#endif
384
#if !defined(__SSE2__)
385
#undef X86_64ASM_SSE2
386
#undef X86ASM_SSE2
387
#undef X86_INTRINSIC_SSE2
388
#endif
389
#endif
390
391
#endif /* defined(CPU_X86) || defined(CPU_X86_64) */
392
393