Path: blob/master/jcl/src/openj9.cuda/share/classes/com/ibm/cuda/CudaFunction.java
12917 views
/*[INCLUDE-IF Sidecar18-SE]*/1/*******************************************************************************2* Copyright (c) 2013, 2018 IBM Corp. and others3*4* This program and the accompanying materials are made available under5* the terms of the Eclipse Public License 2.0 which accompanies this6* distribution and is available at https://www.eclipse.org/legal/epl-2.0/7* or the Apache License, Version 2.0 which accompanies this distribution and8* is available at https://www.apache.org/licenses/LICENSE-2.0.9*10* This Source Code may also be made available under the following11* Secondary Licenses when the conditions for such availability set12* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU13* General Public License, version 2 with the GNU Classpath14* Exception [1] and GNU General Public License, version 2 with the15* OpenJDK Assembly Exception [2].16*17* [1] https://www.gnu.org/software/classpath/license.html18* [2] http://openjdk.java.net/legal/assembly-exception.html19*20* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception21*******************************************************************************/22package com.ibm.cuda;2324import com.ibm.cuda.CudaDevice.CacheConfig;25import com.ibm.cuda.CudaDevice.SharedMemConfig;26import com.ibm.cuda.CudaKernel.Parameters;2728/**29* The {@code CudaFunction} class represents a kernel entry point found in30* a specific {@code CudaModule} loaded on a CUDA-capable device.31*/32public final class CudaFunction {3334/**35* The binary architecture version for which the function was compiled.36* This value is the major binary version * 10 + the minor binary version,37* so a binary version 1.3 function would return the value 13. Note that38* this will return a value of 10 for legacy cubins that do not have a39* properly-encoded binary architecture version.40*/41public static final int ATTRIBUTE_BINARY_VERSION = 6;4243/**44* The size in bytes of user-allocated constant memory required by this45* function.46*/47public static final int ATTRIBUTE_CONST_SIZE_BYTES = 2;4849/**50* The size in bytes of local memory used by each thread of this function.51*/52public static final int ATTRIBUTE_LOCAL_SIZE_BYTES = 3;5354/**55* The maximum number of threads per block, beyond which a launch of the56* function would fail. This number depends on both the function and the57* device on which the function is currently loaded.58*/59public static final int ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0;6061/**62* The number of registers used by each thread of this function.63*/64public static final int ATTRIBUTE_NUM_REGS = 4;6566/**67* The PTX virtual architecture version for which the function was68* compiled. This value is the major PTX version * 10 + the minor PTX69* version, so a PTX version 1.3 function would return the value 13.70* Note that this may return the undefined value of 0 for cubins71* compiled prior to CUDA 3.0.72*/73public static final int ATTRIBUTE_PTX_VERSION = 5;7475/**76* The size in bytes of statically-allocated shared memory required by77* this function. This does not include dynamically-allocated shared78* memory requested by the user at runtime.79*/80public static final int ATTRIBUTE_SHARED_SIZE_BYTES = 1;8182private static native int getAttribute(int deviceId, long nativeHandle,83int attribute) throws CudaException;8485private static native void launch(int deviceId, long functionPtr, // <br/>86int gridDimX, int gridDimY, int gridDimZ, // <br/>87int blockDimX, int blockDimY, int blockDimZ, // <br/>88int sharedMemBytes, long stream, // <br/>89long[] parameterValues) throws CudaException;9091static long nativeValueOf(Object parameter) {92long value = 0;9394if (parameter != null) {95Class<? extends Object> type = parameter.getClass();9697// type tests are sorted in order of decreasing (expected) likelihood98if (type == CudaBuffer.class) {99value = ((CudaBuffer) parameter).getAddress();100} else if (type == Integer.class) {101value = ((Integer) parameter).intValue();102} else if (type == Long.class) {103value = ((Long) parameter).longValue();104} else if (type == Double.class) {105value = Double.doubleToRawLongBits( // <br/>106((Double) parameter).doubleValue());107} else if (type == Float.class) {108value = Float.floatToRawIntBits( // <br/>109((Float) parameter).floatValue());110} else if (type == Short.class) {111value = ((Short) parameter).shortValue();112} else if (type == Byte.class) {113value = ((Byte) parameter).byteValue();114} else if (type == Character.class) {115value = ((Character) parameter).charValue();116} else {117throw new IllegalArgumentException();118}119}120121return value;122}123124private static native void setCacheConfig(int deviceId, long nativeHandle,125int config) throws CudaException;126127private static native void setSharedMemConfig(int deviceId,128long nativeHandle, int config) throws CudaException;129130final int deviceId;131132private final long nativeHandle;133134CudaFunction(int deviceId, long nativeHandle) {135super();136this.deviceId = deviceId;137this.nativeHandle = nativeHandle;138}139140/**141* Returns the value of the specified @{code attribute}.142*143* @param attribute144* the attribute to be queried (see ATTRIBUTE_XXX)145* @return146* the attribute value147* @throws CudaException148* if a CUDA exception occurs149*/150public int getAttribute(int attribute) throws CudaException {151return getAttribute(deviceId, nativeHandle, attribute);152}153154void launch(CudaGrid grid, Object... parameters) throws CudaException {155int parameterCount = parameters.length;156long[] nativeValues = new long[parameterCount];157158for (int i = 0; i < parameterCount; ++i) {159nativeValues[i] = nativeValueOf(parameters[i]);160}161162CudaStream stream = grid.stream;163164launch(deviceId, nativeHandle, // <br/>165grid.gridDimX, grid.gridDimY, grid.gridDimZ, // <br/>166grid.blockDimX, grid.blockDimY, grid.blockDimZ, // <br/>167grid.sharedMemBytes, // <br/>168stream != null ? stream.getHandle() : 0, // <br/>169nativeValues);170}171172void launch(CudaGrid grid, Parameters parameters) throws CudaException {173if (!parameters.isComplete()) {174throw new IllegalArgumentException();175}176177CudaStream stream = grid.stream;178179launch(deviceId, nativeHandle, // <br/>180grid.gridDimX, grid.gridDimY, grid.gridDimZ, // <br/>181grid.blockDimX, grid.blockDimY, grid.blockDimZ, // <br/>182grid.sharedMemBytes, // <br/>183stream != null ? stream.getHandle() : 0, // <br/>184parameters.values);185}186187/**188* Configures the cache for this function.189*190* @param config191* the desired cache configuration192* @throws CudaException193* if a CUDA exception occurs194*/195public void setCacheConfig(CacheConfig config) throws CudaException {196setCacheConfig(deviceId, nativeHandle, config.nativeValue);197}198199/**200* Configures the shared memory of this function.201*202* @param config203* the desired shared memory configuration204* @throws CudaException205* if a CUDA exception occurs206*/207public void setSharedMemConfig(SharedMemConfig config) throws CudaException {208setSharedMemConfig(deviceId, nativeHandle, config.nativeValue);209}210}211212213