Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/jcl/src/openj9.cuda/share/classes/com/ibm/cuda/CudaJitOptions.java
12927 views
1
/*[INCLUDE-IF Sidecar18-SE]*/
2
/*******************************************************************************
3
* Copyright (c) 2013, 2018 IBM Corp. and others
4
*
5
* This program and the accompanying materials are made available under
6
* the terms of the Eclipse Public License 2.0 which accompanies this
7
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
8
* or the Apache License, Version 2.0 which accompanies this distribution and
9
* is available at https://www.apache.org/licenses/LICENSE-2.0.
10
*
11
* This Source Code may also be made available under the following
12
* Secondary Licenses when the conditions for such availability set
13
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
14
* General Public License, version 2 with the GNU Classpath
15
* Exception [1] and GNU General Public License, version 2 with the
16
* OpenJDK Assembly Exception [2].
17
*
18
* [1] https://www.gnu.org/software/classpath/license.html
19
* [2] http://openjdk.java.net/legal/assembly-exception.html
20
*
21
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
22
*******************************************************************************/
23
package com.ibm.cuda;
24
25
import java.util.concurrent.atomic.AtomicLong;
26
27
/**
28
* The {@code CudaJitOptions} class represents a set of options that influence
29
* the behavior of linking and loading modules.
30
*/
31
public final class CudaJitOptions implements Cloneable {
32
33
/**
34
* {@code CacheMode} identifies the cache management choices.
35
*/
36
public static enum CacheMode {
37
38
/** Compile with no -dlcm flag specified. */
39
DEFAULT(1),
40
41
/** Compile with L1 cache disabled. */
42
L1_DISABLED(2),
43
44
/** Compile with L1 cache enabled. */
45
L1_ENABLED(3);
46
47
final int nativeMode;
48
49
CacheMode(int nativeMode) {
50
this.nativeMode = nativeMode;
51
}
52
}
53
54
/**
55
* {@code Fallback} identifies the available fall-back strategies
56
* when an exactly matching object is not available.
57
*/
58
public static enum Fallback {
59
60
/** Prefer to fall back to compatible binary code if exact match not found. */
61
PreferBinary(1),
62
63
/** Prefer to compile ptx if exact binary match not found. */
64
PreferPtx(2);
65
66
final int nativeStrategy;
67
68
Fallback(int nativeStrategy) {
69
this.nativeStrategy = nativeStrategy;
70
}
71
}
72
73
// The range of OPT_* constants.
74
private static final int NUM_OPT_CODES = 15;
75
76
private static final int OPT_CACHE_MODE = 14;
77
78
private static final int OPT_ERROR_LOG_BUFFER_SIZE_BYTES = 6;
79
80
private static final int OPT_FALLBACK_STRATEGY = 10;
81
82
private static final int OPT_GENERATE_DEBUG_INFO = 11;
83
84
private static final int OPT_GENERATE_LINE_INFO = 13;
85
86
private static final int OPT_INFO_LOG_BUFFER_SIZE_BYTES = 4;
87
88
private static final int OPT_LOG_VERBOSE = 12;
89
90
private static final int OPT_MAX_REGISTERS = 0;
91
92
private static final int OPT_OPTIMIZATION_LEVEL = 7;
93
94
private static final int OPT_TARGET = 9;
95
96
private static final int OPT_TARGET_FROM_CUCONTEXT = 8;
97
98
private static final int OPT_THREADS_PER_BLOCK = 1;
99
100
private static final int OPT_WALL_TIME = 2;
101
102
private static native long create(int[] keyValuePairs) throws CudaException;
103
104
private static native void destroy(long handle);
105
106
private static native String getErrorLogBuffer(long handle);
107
108
private static native String getInfoLogBuffer(long handle);
109
110
private static native int getThreadsPerBlock(long handle);
111
112
private static native float getWallTime(long handle);
113
114
private String errorLogBuffer;
115
116
private String infoLogBuffer;
117
118
private final AtomicLong nativeHandle;
119
120
private int optionMask;
121
122
private final int[] optionValue;
123
124
private int threadsPerBlock;
125
126
private float wallTime;
127
128
/**
129
* Creates a new options object.
130
*/
131
public CudaJitOptions() {
132
super();
133
this.errorLogBuffer = ""; //$NON-NLS-1$
134
this.infoLogBuffer = ""; //$NON-NLS-1$
135
this.nativeHandle = new AtomicLong();
136
this.optionMask = 0;
137
this.optionValue = new int[NUM_OPT_CODES];
138
this.threadsPerBlock = 0;
139
this.wallTime = 0.0f;
140
}
141
142
/**
143
* Creates a new options object with the same state as this object.
144
*/
145
@Override
146
protected CudaJitOptions clone() {
147
CudaJitOptions clone = new CudaJitOptions();
148
149
clone.optionMask = optionMask;
150
System.arraycopy(optionValue, 0, clone.optionValue, 0, NUM_OPT_CODES);
151
152
return clone;
153
}
154
155
/**
156
* Returns the contents of the error log.
157
* <p>
158
* The result will be empty unless {@link #setErrorLogBufferSize(int)}
159
* was called with a positive value, this object was used in connection
160
* with a {@link CudaModule} or a {@link CudaLinker}, and errors were
161
* reported.
162
*
163
* @return
164
* the contents of the error log
165
*/
166
public String getErrorLogBuffer() {
167
return errorLogBuffer;
168
}
169
170
long getHandle() throws CudaException {
171
long handle;
172
173
while ((handle = nativeHandle.get()) == 0) {
174
int mask = optionMask;
175
int[] keyValuePairs = new int[Integer.bitCount(mask) << 1];
176
int index = 0;
177
178
for (int code = 0; mask != 0; ++code, mask >>= 1) {
179
if ((mask & 1) != 0) {
180
keyValuePairs[index++] = code;
181
keyValuePairs[index++] = optionValue[code];
182
}
183
}
184
185
handle = create(keyValuePairs);
186
187
if (nativeHandle.compareAndSet(0, handle)) {
188
break;
189
}
190
191
// forget our work and look for results from a thread that finished earlier
192
destroy(handle);
193
}
194
195
return handle;
196
}
197
198
/**
199
* Returns the contents of the information log.
200
* <p>
201
* The result will be empty unless {@link #setInfoLogBufferSize(int)}
202
* was called with a positive value, this object was used in connection
203
* with a {@link CudaModule} or a {@link CudaLinker}, and informational
204
* messages were reported.
205
*
206
* @return
207
* the contents of the information log
208
*/
209
public String getInfoLogBuffer() {
210
return infoLogBuffer;
211
}
212
213
/**
214
* Returns the maximum number of threads per block.
215
* <p>
216
* The result will only be meaningful if {@link #setThreadsPerBlock(int)} was
217
* called with a positive value, and this object was used in connection
218
* with a {@link CudaModule} or a {@link CudaLinker} involving PTX code.
219
*
220
* @return
221
* the maximum number of threads per block
222
*/
223
public int getThreadsPerBlock() {
224
return threadsPerBlock;
225
}
226
227
/**
228
* Returns the total elapsed time, in milliseconds,
229
* spent in the compiler and linker.
230
* <p>
231
* Applies to: compiler and linker.
232
*
233
* @return
234
* the total elapsed time, in milliseconds, spent in the compiler and linker
235
*/
236
public float getWallTime() {
237
return wallTime;
238
}
239
240
/**
241
* Requests recording of the total wall clock time,
242
* in milliseconds, spent in the compiler and linker.
243
* <p>
244
* Applies to: compiler and linker.
245
*
246
* @return
247
* this options object
248
*/
249
public CudaJitOptions recordWallTime() {
250
return setOption(OPT_WALL_TIME, true);
251
}
252
253
void releaseHandle(boolean update) {
254
long handle = nativeHandle.getAndSet(0);
255
256
if (handle != 0) {
257
if (update) {
258
update(handle);
259
}
260
261
destroy(handle);
262
}
263
}
264
265
/**
266
* Specifies the desired caching behavior (-dlcm).
267
* <p>
268
* Applies to compiler only.
269
*
270
* @param mode
271
* the desired caching behavior
272
* @return
273
* this options object
274
*/
275
public CudaJitOptions setCacheMode(CacheMode mode) {
276
if (mode == null) {
277
mode = CacheMode.DEFAULT;
278
}
279
280
return setOption(OPT_CACHE_MODE, mode.nativeMode);
281
}
282
283
/**
284
* Specifies the size, in bytes, to allocate for capturing error messages.
285
* <p>
286
* Applies to compiler and linker.
287
*
288
* @param size
289
* the size, in bytes, of the error log buffer
290
* @return
291
* this options object
292
*/
293
public CudaJitOptions setErrorLogBufferSize(int size) {
294
return setOption(OPT_ERROR_LOG_BUFFER_SIZE_BYTES, size);
295
}
296
297
/**
298
* Specifies whether to generate debug information.
299
* <p>
300
* Applies to compiler and linker.
301
*
302
* @param enabled
303
* whether debug information should be generated
304
* @return
305
* this options object
306
*/
307
public CudaJitOptions setGenerateDebugInfo(boolean enabled) {
308
return setOption(OPT_GENERATE_DEBUG_INFO, enabled);
309
}
310
311
/**
312
* Specifies whether to generate line number information.
313
* <p>
314
* Applies to compiler only.
315
*
316
* @param enabled
317
* whether line number information should be generated
318
* @return
319
* this options object
320
*/
321
public CudaJitOptions setGenerateLineInfo(boolean enabled) {
322
return setOption(OPT_GENERATE_LINE_INFO, enabled);
323
}
324
325
/**
326
* Specifies the size, in bytes, to allocate for capturing informational
327
* messages.
328
* <p>
329
* Applies to compiler and linker.
330
*
331
* @param size
332
* the size, in bytes, of the information log buffer
333
* @return
334
* this options object
335
*/
336
public CudaJitOptions setInfoLogBufferSize(int size) {
337
return setOption(OPT_INFO_LOG_BUFFER_SIZE_BYTES, size);
338
}
339
340
/**
341
* Specifies the fallback strategy if an exactly matching
342
* binary object cannot be found.
343
* <p>
344
* Applies to: compiler only
345
*
346
* @param strategy
347
* the desired fallback strategy
348
* @return
349
* this options object
350
*/
351
public CudaJitOptions setJitFallbackStrategy(Fallback strategy) {
352
if (strategy == null) {
353
strategy = Fallback.PreferPtx;
354
}
355
356
return setOption(OPT_FALLBACK_STRATEGY, strategy.nativeStrategy);
357
}
358
359
/**
360
* Specifies whether to generate verbose log messages.
361
* <p>
362
* Applies to: compiler and linker
363
*
364
* @param verbose
365
* whether verbose log messages should be generated
366
* @return
367
* this options object
368
*/
369
public CudaJitOptions setLogVerbose(boolean verbose) {
370
return setOption(OPT_LOG_VERBOSE, verbose);
371
}
372
373
/**
374
* Specifies the maximum number of registers that a thread may use.
375
* <p>
376
* Applies to: compiler only
377
*
378
* @param limit
379
* the maximum number of registers a thread may use
380
* @return
381
* this options object
382
*/
383
public CudaJitOptions setMaxRegisters(int limit) {
384
return setOption(OPT_MAX_REGISTERS, limit);
385
}
386
387
/**
388
* Specifies the level of optimization to be applied to generated code
389
* (0 - 4), with 4 being the default and highest level of optimization.
390
* <p>
391
* Applies to compiler only.
392
*
393
* @param level
394
* the desired optimization level
395
* @return
396
* this options object
397
*/
398
public CudaJitOptions setOptimizationLevel(int level) {
399
return setOption(OPT_OPTIMIZATION_LEVEL, level);
400
}
401
402
private CudaJitOptions setOption(int key, boolean value) {
403
return setOption(key, value ? 1 : 0);
404
}
405
406
private CudaJitOptions setOption(int key, int value) {
407
releaseHandle(false);
408
409
optionMask |= 1 << key;
410
optionValue[key] = value;
411
412
return this;
413
}
414
415
/**
416
* Specifies the desired compute target.
417
* <p>
418
* Cannot be combined with {@link #setThreadsPerBlock(int)}.
419
* <p>
420
* Applies to compiler and linker.
421
*
422
* @param target
423
* the desired compute target
424
* @return
425
* this options object
426
*/
427
public CudaJitOptions setTarget(CudaJitTarget target) {
428
return setOption(OPT_TARGET, target.nativeValue);
429
}
430
431
/**
432
* Specifies that the target should be determined based on the current
433
* attached context.
434
* <p>
435
* Applies to compiler and linker.
436
*
437
* @return
438
* this options object
439
*/
440
public CudaJitOptions setTargetFromCuContext() {
441
return setOption(OPT_TARGET_FROM_CUCONTEXT, true);
442
}
443
444
/**
445
* Specifies the minimum number of threads per block for compilation.
446
* <p>
447
* This restricts the resource utilization of the compiler (e.g. maximum
448
* registers) such that a block with the given number of threads should be
449
* able to launch based on register limitations. Note, this option does not
450
* currently take into account any other resource limitations, such as
451
* shared memory utilization.
452
* <p>
453
* Cannot be combined with {@link #setTarget(CudaJitTarget)}.
454
* <p>
455
* Applies to compiler only.
456
*
457
* @param limit
458
* the desired minimum number of threads per block
459
* @return
460
* this options object
461
*/
462
public CudaJitOptions setThreadsPerBlock(int limit) {
463
return setOption(OPT_THREADS_PER_BLOCK, limit);
464
}
465
466
CudaJitOptions update() {
467
long handle = nativeHandle.get();
468
469
if (handle != 0) {
470
update(handle);
471
}
472
473
return this;
474
}
475
476
private void update(long handle) {
477
errorLogBuffer = getErrorLogBuffer(handle);
478
infoLogBuffer = getInfoLogBuffer(handle);
479
threadsPerBlock = getThreadsPerBlock(handle);
480
wallTime = getWallTime(handle);
481
}
482
}
483
484