Path: blob/master/jcl/src/openj9.cuda/share/classes/com/ibm/cuda/CudaJitOptions.java
12927 views
/*[INCLUDE-IF Sidecar18-SE]*/1/*******************************************************************************2* Copyright (c) 2013, 2018 IBM Corp. and others3*4* This program and the accompanying materials are made available under5* the terms of the Eclipse Public License 2.0 which accompanies this6* distribution and is available at https://www.eclipse.org/legal/epl-2.0/7* or the Apache License, Version 2.0 which accompanies this distribution and8* is available at https://www.apache.org/licenses/LICENSE-2.0.9*10* This Source Code may also be made available under the following11* Secondary Licenses when the conditions for such availability set12* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU13* General Public License, version 2 with the GNU Classpath14* Exception [1] and GNU General Public License, version 2 with the15* OpenJDK Assembly Exception [2].16*17* [1] https://www.gnu.org/software/classpath/license.html18* [2] http://openjdk.java.net/legal/assembly-exception.html19*20* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception21*******************************************************************************/22package com.ibm.cuda;2324import java.util.concurrent.atomic.AtomicLong;2526/**27* The {@code CudaJitOptions} class represents a set of options that influence28* the behavior of linking and loading modules.29*/30public final class CudaJitOptions implements Cloneable {3132/**33* {@code CacheMode} identifies the cache management choices.34*/35public static enum CacheMode {3637/** Compile with no -dlcm flag specified. */38DEFAULT(1),3940/** Compile with L1 cache disabled. */41L1_DISABLED(2),4243/** Compile with L1 cache enabled. */44L1_ENABLED(3);4546final int nativeMode;4748CacheMode(int nativeMode) {49this.nativeMode = nativeMode;50}51}5253/**54* {@code Fallback} identifies the available fall-back strategies55* when an exactly matching object is not available.56*/57public static enum Fallback {5859/** Prefer to fall back to compatible binary code if exact match not found. */60PreferBinary(1),6162/** Prefer to compile ptx if exact binary match not found. */63PreferPtx(2);6465final int nativeStrategy;6667Fallback(int nativeStrategy) {68this.nativeStrategy = nativeStrategy;69}70}7172// The range of OPT_* constants.73private static final int NUM_OPT_CODES = 15;7475private static final int OPT_CACHE_MODE = 14;7677private static final int OPT_ERROR_LOG_BUFFER_SIZE_BYTES = 6;7879private static final int OPT_FALLBACK_STRATEGY = 10;8081private static final int OPT_GENERATE_DEBUG_INFO = 11;8283private static final int OPT_GENERATE_LINE_INFO = 13;8485private static final int OPT_INFO_LOG_BUFFER_SIZE_BYTES = 4;8687private static final int OPT_LOG_VERBOSE = 12;8889private static final int OPT_MAX_REGISTERS = 0;9091private static final int OPT_OPTIMIZATION_LEVEL = 7;9293private static final int OPT_TARGET = 9;9495private static final int OPT_TARGET_FROM_CUCONTEXT = 8;9697private static final int OPT_THREADS_PER_BLOCK = 1;9899private static final int OPT_WALL_TIME = 2;100101private static native long create(int[] keyValuePairs) throws CudaException;102103private static native void destroy(long handle);104105private static native String getErrorLogBuffer(long handle);106107private static native String getInfoLogBuffer(long handle);108109private static native int getThreadsPerBlock(long handle);110111private static native float getWallTime(long handle);112113private String errorLogBuffer;114115private String infoLogBuffer;116117private final AtomicLong nativeHandle;118119private int optionMask;120121private final int[] optionValue;122123private int threadsPerBlock;124125private float wallTime;126127/**128* Creates a new options object.129*/130public CudaJitOptions() {131super();132this.errorLogBuffer = ""; //$NON-NLS-1$133this.infoLogBuffer = ""; //$NON-NLS-1$134this.nativeHandle = new AtomicLong();135this.optionMask = 0;136this.optionValue = new int[NUM_OPT_CODES];137this.threadsPerBlock = 0;138this.wallTime = 0.0f;139}140141/**142* Creates a new options object with the same state as this object.143*/144@Override145protected CudaJitOptions clone() {146CudaJitOptions clone = new CudaJitOptions();147148clone.optionMask = optionMask;149System.arraycopy(optionValue, 0, clone.optionValue, 0, NUM_OPT_CODES);150151return clone;152}153154/**155* Returns the contents of the error log.156* <p>157* The result will be empty unless {@link #setErrorLogBufferSize(int)}158* was called with a positive value, this object was used in connection159* with a {@link CudaModule} or a {@link CudaLinker}, and errors were160* reported.161*162* @return163* the contents of the error log164*/165public String getErrorLogBuffer() {166return errorLogBuffer;167}168169long getHandle() throws CudaException {170long handle;171172while ((handle = nativeHandle.get()) == 0) {173int mask = optionMask;174int[] keyValuePairs = new int[Integer.bitCount(mask) << 1];175int index = 0;176177for (int code = 0; mask != 0; ++code, mask >>= 1) {178if ((mask & 1) != 0) {179keyValuePairs[index++] = code;180keyValuePairs[index++] = optionValue[code];181}182}183184handle = create(keyValuePairs);185186if (nativeHandle.compareAndSet(0, handle)) {187break;188}189190// forget our work and look for results from a thread that finished earlier191destroy(handle);192}193194return handle;195}196197/**198* Returns the contents of the information log.199* <p>200* The result will be empty unless {@link #setInfoLogBufferSize(int)}201* was called with a positive value, this object was used in connection202* with a {@link CudaModule} or a {@link CudaLinker}, and informational203* messages were reported.204*205* @return206* the contents of the information log207*/208public String getInfoLogBuffer() {209return infoLogBuffer;210}211212/**213* Returns the maximum number of threads per block.214* <p>215* The result will only be meaningful if {@link #setThreadsPerBlock(int)} was216* called with a positive value, and this object was used in connection217* with a {@link CudaModule} or a {@link CudaLinker} involving PTX code.218*219* @return220* the maximum number of threads per block221*/222public int getThreadsPerBlock() {223return threadsPerBlock;224}225226/**227* Returns the total elapsed time, in milliseconds,228* spent in the compiler and linker.229* <p>230* Applies to: compiler and linker.231*232* @return233* the total elapsed time, in milliseconds, spent in the compiler and linker234*/235public float getWallTime() {236return wallTime;237}238239/**240* Requests recording of the total wall clock time,241* in milliseconds, spent in the compiler and linker.242* <p>243* Applies to: compiler and linker.244*245* @return246* this options object247*/248public CudaJitOptions recordWallTime() {249return setOption(OPT_WALL_TIME, true);250}251252void releaseHandle(boolean update) {253long handle = nativeHandle.getAndSet(0);254255if (handle != 0) {256if (update) {257update(handle);258}259260destroy(handle);261}262}263264/**265* Specifies the desired caching behavior (-dlcm).266* <p>267* Applies to compiler only.268*269* @param mode270* the desired caching behavior271* @return272* this options object273*/274public CudaJitOptions setCacheMode(CacheMode mode) {275if (mode == null) {276mode = CacheMode.DEFAULT;277}278279return setOption(OPT_CACHE_MODE, mode.nativeMode);280}281282/**283* Specifies the size, in bytes, to allocate for capturing error messages.284* <p>285* Applies to compiler and linker.286*287* @param size288* the size, in bytes, of the error log buffer289* @return290* this options object291*/292public CudaJitOptions setErrorLogBufferSize(int size) {293return setOption(OPT_ERROR_LOG_BUFFER_SIZE_BYTES, size);294}295296/**297* Specifies whether to generate debug information.298* <p>299* Applies to compiler and linker.300*301* @param enabled302* whether debug information should be generated303* @return304* this options object305*/306public CudaJitOptions setGenerateDebugInfo(boolean enabled) {307return setOption(OPT_GENERATE_DEBUG_INFO, enabled);308}309310/**311* Specifies whether to generate line number information.312* <p>313* Applies to compiler only.314*315* @param enabled316* whether line number information should be generated317* @return318* this options object319*/320public CudaJitOptions setGenerateLineInfo(boolean enabled) {321return setOption(OPT_GENERATE_LINE_INFO, enabled);322}323324/**325* Specifies the size, in bytes, to allocate for capturing informational326* messages.327* <p>328* Applies to compiler and linker.329*330* @param size331* the size, in bytes, of the information log buffer332* @return333* this options object334*/335public CudaJitOptions setInfoLogBufferSize(int size) {336return setOption(OPT_INFO_LOG_BUFFER_SIZE_BYTES, size);337}338339/**340* Specifies the fallback strategy if an exactly matching341* binary object cannot be found.342* <p>343* Applies to: compiler only344*345* @param strategy346* the desired fallback strategy347* @return348* this options object349*/350public CudaJitOptions setJitFallbackStrategy(Fallback strategy) {351if (strategy == null) {352strategy = Fallback.PreferPtx;353}354355return setOption(OPT_FALLBACK_STRATEGY, strategy.nativeStrategy);356}357358/**359* Specifies whether to generate verbose log messages.360* <p>361* Applies to: compiler and linker362*363* @param verbose364* whether verbose log messages should be generated365* @return366* this options object367*/368public CudaJitOptions setLogVerbose(boolean verbose) {369return setOption(OPT_LOG_VERBOSE, verbose);370}371372/**373* Specifies the maximum number of registers that a thread may use.374* <p>375* Applies to: compiler only376*377* @param limit378* the maximum number of registers a thread may use379* @return380* this options object381*/382public CudaJitOptions setMaxRegisters(int limit) {383return setOption(OPT_MAX_REGISTERS, limit);384}385386/**387* Specifies the level of optimization to be applied to generated code388* (0 - 4), with 4 being the default and highest level of optimization.389* <p>390* Applies to compiler only.391*392* @param level393* the desired optimization level394* @return395* this options object396*/397public CudaJitOptions setOptimizationLevel(int level) {398return setOption(OPT_OPTIMIZATION_LEVEL, level);399}400401private CudaJitOptions setOption(int key, boolean value) {402return setOption(key, value ? 1 : 0);403}404405private CudaJitOptions setOption(int key, int value) {406releaseHandle(false);407408optionMask |= 1 << key;409optionValue[key] = value;410411return this;412}413414/**415* Specifies the desired compute target.416* <p>417* Cannot be combined with {@link #setThreadsPerBlock(int)}.418* <p>419* Applies to compiler and linker.420*421* @param target422* the desired compute target423* @return424* this options object425*/426public CudaJitOptions setTarget(CudaJitTarget target) {427return setOption(OPT_TARGET, target.nativeValue);428}429430/**431* Specifies that the target should be determined based on the current432* attached context.433* <p>434* Applies to compiler and linker.435*436* @return437* this options object438*/439public CudaJitOptions setTargetFromCuContext() {440return setOption(OPT_TARGET_FROM_CUCONTEXT, true);441}442443/**444* Specifies the minimum number of threads per block for compilation.445* <p>446* This restricts the resource utilization of the compiler (e.g. maximum447* registers) such that a block with the given number of threads should be448* able to launch based on register limitations. Note, this option does not449* currently take into account any other resource limitations, such as450* shared memory utilization.451* <p>452* Cannot be combined with {@link #setTarget(CudaJitTarget)}.453* <p>454* Applies to compiler only.455*456* @param limit457* the desired minimum number of threads per block458* @return459* this options object460*/461public CudaJitOptions setThreadsPerBlock(int limit) {462return setOption(OPT_THREADS_PER_BLOCK, limit);463}464465CudaJitOptions update() {466long handle = nativeHandle.get();467468if (handle != 0) {469update(handle);470}471472return this;473}474475private void update(long handle) {476errorLogBuffer = getErrorLogBuffer(handle);477infoLogBuffer = getInfoLogBuffer(handle);478threadsPerBlock = getThreadsPerBlock(handle);479wallTime = getWallTime(handle);480}481}482483484