CoCalc -- CudaJitOptions.java

GitHub Repository: PojavLauncherTeam/openj9
Path: blob/master/jcl/src/openj9.cuda/share/classes/com/ibm/cuda/CudaJitOptions.java
¹²⁹²⁷ views
1
/*[INCLUDE-IF Sidecar18-SE]*/
2
/*******************************************************************************
3
 * Copyright (c) 2013, 2018 IBM Corp. and others
4
 *
5
 * This program and the accompanying materials are made available under
6
 * the terms of the Eclipse Public License 2.0 which accompanies this
7
 * distribution and is available at https://www.eclipse.org/legal/epl-2.0/
8
 * or the Apache License, Version 2.0 which accompanies this distribution and
9
 * is available at https://www.apache.org/licenses/LICENSE-2.0.
10
 *
11
 * This Source Code may also be made available under the following
12
 * Secondary Licenses when the conditions for such availability set
13
 * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
14
 * General Public License, version 2 with the GNU Classpath
15
 * Exception [1] and GNU General Public License, version 2 with the
16
 * OpenJDK Assembly Exception [2].
17
 *
18
 * [1] https://www.gnu.org/software/classpath/license.html
19
 * [2] http://openjdk.java.net/legal/assembly-exception.html
20
 *
21
 * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception
22
 *******************************************************************************/
23
package com.ibm.cuda;
24

25
import java.util.concurrent.atomic.AtomicLong;
26

27
/**
28
 * The {@code CudaJitOptions} class represents a set of options that influence
29
 * the behavior of linking and loading modules.
30
 */
31
public final class CudaJitOptions implements Cloneable {
32

33
	/**
34
	 * {@code CacheMode} identifies the cache management choices.
35
	 */
36
	public static enum CacheMode {
37

38
		/** Compile with no -dlcm flag specified. */
39
		DEFAULT(1),
40

41
		/** Compile with L1 cache disabled. */
42
		L1_DISABLED(2),
43

44
		/** Compile with L1 cache enabled. */
45
		L1_ENABLED(3);
46

47
		final int nativeMode;
48

49
		CacheMode(int nativeMode) {
50
			this.nativeMode = nativeMode;
51
		}
52
	}
53

54
	/**
55
	 * {@code Fallback} identifies the available fall-back strategies
56
	 * when an exactly matching object is not available.
57
	 */
58
	public static enum Fallback {
59

60
		/** Prefer to fall back to compatible binary code if exact match not found. */
61
		PreferBinary(1),
62

63
		/** Prefer to compile ptx if exact binary match not found. */
64
		PreferPtx(2);
65

66
		final int nativeStrategy;
67

68
		Fallback(int nativeStrategy) {
69
			this.nativeStrategy = nativeStrategy;
70
		}
71
	}
72

73
	// The range of OPT_* constants.
74
	private static final int NUM_OPT_CODES = 15;
75

76
	private static final int OPT_CACHE_MODE = 14;
77

78
	private static final int OPT_ERROR_LOG_BUFFER_SIZE_BYTES = 6;
79

80
	private static final int OPT_FALLBACK_STRATEGY = 10;
81

82
	private static final int OPT_GENERATE_DEBUG_INFO = 11;
83

84
	private static final int OPT_GENERATE_LINE_INFO = 13;
85

86
	private static final int OPT_INFO_LOG_BUFFER_SIZE_BYTES = 4;
87

88
	private static final int OPT_LOG_VERBOSE = 12;
89

90
	private static final int OPT_MAX_REGISTERS = 0;
91

92
	private static final int OPT_OPTIMIZATION_LEVEL = 7;
93

94
	private static final int OPT_TARGET = 9;
95

96
	private static final int OPT_TARGET_FROM_CUCONTEXT = 8;
97

98
	private static final int OPT_THREADS_PER_BLOCK = 1;
99

100
	private static final int OPT_WALL_TIME = 2;
101

102
	private static native long create(int[] keyValuePairs) throws CudaException;
103

104
	private static native void destroy(long handle);
105

106
	private static native String getErrorLogBuffer(long handle);
107

108
	private static native String getInfoLogBuffer(long handle);
109

110
	private static native int getThreadsPerBlock(long handle);
111

112
	private static native float getWallTime(long handle);
113

114
	private String errorLogBuffer;
115

116
	private String infoLogBuffer;
117

118
	private final AtomicLong nativeHandle;
119

120
	private int optionMask;
121

122
	private final int[] optionValue;
123

124
	private int threadsPerBlock;
125

126
	private float wallTime;
127

128
	/**
129
	 * Creates a new options object.
130
	 */
131
	public CudaJitOptions() {
132
		super();
133
		this.errorLogBuffer = ""; //$NON-NLS-1$
134
		this.infoLogBuffer = ""; //$NON-NLS-1$
135
		this.nativeHandle = new AtomicLong();
136
		this.optionMask = 0;
137
		this.optionValue = new int[NUM_OPT_CODES];
138
		this.threadsPerBlock = 0;
139
		this.wallTime = 0.0f;
140
	}
141

142
	/**
143
	 * Creates a new options object with the same state as this object.
144
	 */
145
	@Override
146
	protected CudaJitOptions clone() {
147
		CudaJitOptions clone = new CudaJitOptions();
148

149
		clone.optionMask = optionMask;
150
		System.arraycopy(optionValue, 0, clone.optionValue, 0, NUM_OPT_CODES);
151

152
		return clone;
153
	}
154

155
	/**
156
	 * Returns the contents of the error log.
157
	 * <p>
158
	 * The result will be empty unless {@link #setErrorLogBufferSize(int)}
159
	 * was called with a positive value, this object was used in connection
160
	 * with a {@link CudaModule} or a {@link CudaLinker}, and errors were
161
	 * reported.
162
	 *
163
	 * @return
164
	 *          the contents of the error log
165
	 */
166
	public String getErrorLogBuffer() {
167
		return errorLogBuffer;
168
	}
169

170
	long getHandle() throws CudaException {
171
		long handle;
172

173
		while ((handle = nativeHandle.get()) == 0) {
174
			int mask = optionMask;
175
			int[] keyValuePairs = new int[Integer.bitCount(mask) << 1];
176
			int index = 0;
177

178
			for (int code = 0; mask != 0; ++code, mask >>= 1) {
179
				if ((mask & 1) != 0) {
180
					keyValuePairs[index++] = code;
181
					keyValuePairs[index++] = optionValue[code];
182
				}
183
			}
184

185
			handle = create(keyValuePairs);
186

187
			if (nativeHandle.compareAndSet(0, handle)) {
188
				break;
189
			}
190

191
			// forget our work and look for results from a thread that finished earlier
192
			destroy(handle);
193
		}
194

195
		return handle;
196
	}
197

198
	/**
199
	 * Returns the contents of the information log.
200
	 * <p>
201
	 * The result will be empty unless {@link #setInfoLogBufferSize(int)}
202
	 * was called with a positive value, this object was used in connection
203
	 * with a {@link CudaModule} or a {@link CudaLinker}, and informational
204
	 * messages were reported.
205
	 *
206
	 * @return
207
	 *          the contents of the information log
208
	 */
209
	public String getInfoLogBuffer() {
210
		return infoLogBuffer;
211
	}
212

213
	/**
214
	 * Returns the maximum number of threads per block.
215
	 * <p>
216
	 * The result will only be meaningful if {@link #setThreadsPerBlock(int)} was
217
	 * called with a positive value, and this object was used in connection
218
	 * with a {@link CudaModule} or a {@link CudaLinker} involving PTX code.
219
	 *
220
	 * @return
221
	 *          the maximum number of threads per block
222
	 */
223
	public int getThreadsPerBlock() {
224
		return threadsPerBlock;
225
	}
226

227
	/**
228
	 * Returns the total elapsed time, in milliseconds,
229
	 * spent in the compiler and linker.
230
	 * <p>
231
	 * Applies to: compiler and linker.
232
	 *
233
	 * @return
234
	 *          the total elapsed time, in milliseconds, spent in the compiler and linker
235
	 */
236
	public float getWallTime() {
237
		return wallTime;
238
	}
239

240
	/**
241
	 * Requests recording of the total wall clock time,
242
	 * in milliseconds, spent in the compiler and linker.
243
	 * <p>
244
	 * Applies to: compiler and linker.
245
	 *
246
	 * @return
247
	 *          this options object
248
	 */
249
	public CudaJitOptions recordWallTime() {
250
		return setOption(OPT_WALL_TIME, true);
251
	}
252

253
	void releaseHandle(boolean update) {
254
		long handle = nativeHandle.getAndSet(0);
255

256
		if (handle != 0) {
257
			if (update) {
258
				update(handle);
259
			}
260

261
			destroy(handle);
262
		}
263
	}
264

265
	/**
266
	 * Specifies the desired caching behavior (-dlcm).
267
	 * <p>
268
	 * Applies to compiler only.
269
	 *
270
	 * @param mode
271
	 *          the desired caching behavior
272
	 * @return
273
	 *          this options object
274
	 */
275
	public CudaJitOptions setCacheMode(CacheMode mode) {
276
		if (mode == null) {
277
			mode = CacheMode.DEFAULT;
278
		}
279

280
		return setOption(OPT_CACHE_MODE, mode.nativeMode);
281
	}
282

283
	/**
284
	 * Specifies the size, in bytes, to allocate for capturing error messages.
285
	 * <p>
286
	 * Applies to compiler and linker.
287
	 *
288
	 * @param size
289
	 *          the size, in bytes, of the error log buffer
290
	 * @return
291
	 *          this options object
292
	 */
293
	public CudaJitOptions setErrorLogBufferSize(int size) {
294
		return setOption(OPT_ERROR_LOG_BUFFER_SIZE_BYTES, size);
295
	}
296

297
	/**
298
	 * Specifies whether to generate debug information.
299
	 * <p>
300
	 * Applies to compiler and linker.
301
	 *
302
	 * @param enabled
303
	 *          whether debug information should be generated
304
	 * @return
305
	 *          this options object
306
	 */
307
	public CudaJitOptions setGenerateDebugInfo(boolean enabled) {
308
		return setOption(OPT_GENERATE_DEBUG_INFO, enabled);
309
	}
310

311
	/**
312
	 * Specifies whether to generate line number information.
313
	 * <p>
314
	 * Applies to compiler only.
315
	 *
316
	 * @param enabled
317
	 *          whether line number information should be generated
318
	 * @return
319
	 *          this options object
320
	 */
321
	public CudaJitOptions setGenerateLineInfo(boolean enabled) {
322
		return setOption(OPT_GENERATE_LINE_INFO, enabled);
323
	}
324

325
	/**
326
	 * Specifies the size, in bytes, to allocate for capturing informational
327
	 * messages.
328
	 * <p>
329
	 * Applies to compiler and linker.
330
	 *
331
	 * @param size
332
	 *          the size, in bytes, of the information log buffer
333
	 * @return
334
	 *          this options object
335
	 */
336
	public CudaJitOptions setInfoLogBufferSize(int size) {
337
		return setOption(OPT_INFO_LOG_BUFFER_SIZE_BYTES, size);
338
	}
339

340
	/**
341
	 * Specifies the fallback strategy if an exactly matching
342
	 * binary object cannot be found.
343
	 * <p>
344
	 * Applies to: compiler only
345
	 *
346
	 * @param strategy
347
	 *          the desired fallback strategy
348
	 * @return
349
	 *          this options object
350
	 */
351
	public CudaJitOptions setJitFallbackStrategy(Fallback strategy) {
352
		if (strategy == null) {
353
			strategy = Fallback.PreferPtx;
354
		}
355

356
		return setOption(OPT_FALLBACK_STRATEGY, strategy.nativeStrategy);
357
	}
358

359
	/**
360
	 * Specifies whether to generate verbose log messages.
361
	 * <p>
362
	 * Applies to: compiler and linker
363
	 *
364
	 * @param verbose
365
	 *          whether verbose log messages should be generated
366
	 * @return
367
	 *          this options object
368
	 */
369
	public CudaJitOptions setLogVerbose(boolean verbose) {
370
		return setOption(OPT_LOG_VERBOSE, verbose);
371
	}
372

373
	/**
374
	 * Specifies the maximum number of registers that a thread may use.
375
	 * <p>
376
	 * Applies to: compiler only
377
	 *
378
	 * @param limit
379
	 *          the maximum number of registers a thread may use
380
	 * @return
381
	 *          this options object
382
	 */
383
	public CudaJitOptions setMaxRegisters(int limit) {
384
		return setOption(OPT_MAX_REGISTERS, limit);
385
	}
386

387
	/**
388
	 * Specifies the level of optimization to be applied to generated code
389
	 * (0 - 4), with 4 being the default and highest level of optimization.
390
	 * <p>
391
	 * Applies to compiler only.
392
	 *
393
	 * @param level
394
	 *          the desired optimization level
395
	 * @return
396
	 *          this options object
397
	 */
398
	public CudaJitOptions setOptimizationLevel(int level) {
399
		return setOption(OPT_OPTIMIZATION_LEVEL, level);
400
	}
401

402
	private CudaJitOptions setOption(int key, boolean value) {
403
		return setOption(key, value ? 1 : 0);
404
	}
405

406
	private CudaJitOptions setOption(int key, int value) {
407
		releaseHandle(false);
408

409
		optionMask |= 1 << key;
410
		optionValue[key] = value;
411

412
		return this;
413
	}
414

415
	/**
416
	 * Specifies the desired compute target.
417
	 * <p>
418
	 * Cannot be combined with {@link #setThreadsPerBlock(int)}.
419
	 * <p>
420
	 * Applies to compiler and linker.
421
	 *
422
	 * @param target
423
	 *          the desired compute target
424
	 * @return
425
	 *          this options object
426
	 */
427
	public CudaJitOptions setTarget(CudaJitTarget target) {
428
		return setOption(OPT_TARGET, target.nativeValue);
429
	}
430

431
	/**
432
	 * Specifies that the target should be determined based on the current
433
	 * attached context.
434
	 * <p>
435
	 * Applies to compiler and linker.
436
	 *
437
	 * @return
438
	 *          this options object
439
	 */
440
	public CudaJitOptions setTargetFromCuContext() {
441
		return setOption(OPT_TARGET_FROM_CUCONTEXT, true);
442
	}
443

444
	/**
445
	 * Specifies the minimum number of threads per block for compilation.
446
	 * <p>
447
	 * This restricts the resource utilization of the compiler (e.g. maximum
448
	 * registers) such that a block with the given number of threads should be
449
	 * able to launch based on register limitations. Note, this option does not
450
	 * currently take into account any other resource limitations, such as
451
	 * shared memory utilization.
452
	 * <p>
453
	 * Cannot be combined with {@link #setTarget(CudaJitTarget)}.
454
	 * <p>
455
	 * Applies to compiler only.
456
	 *
457
	 * @param limit
458
	 *          the desired minimum number of threads per block
459
	 * @return
460
	 *          this options object
461
	 */
462
	public CudaJitOptions setThreadsPerBlock(int limit) {
463
		return setOption(OPT_THREADS_PER_BLOCK, limit);
464
	}
465

466
	CudaJitOptions update() {
467
		long handle = nativeHandle.get();
468

469
		if (handle != 0) {
470
			update(handle);
471
		}
472

473
		return this;
474
	}
475

476
	private void update(long handle) {
477
		errorLogBuffer = getErrorLogBuffer(handle);
478
		infoLogBuffer = getInfoLogBuffer(handle);
479
		threadsPerBlock = getThreadsPerBlock(handle);
480
		wallTime = getWallTime(handle);
481
	}
482
}
483

484
Product

Resources

Company