Path: blob/master/jcl/src/openj9.cuda/share/classes/com/ibm/cuda/CudaKernel.java
12927 views
/*[INCLUDE-IF Sidecar18-SE]*/1/*******************************************************************************2* Copyright (c) 2013, 2018 IBM Corp. and others3*4* This program and the accompanying materials are made available under5* the terms of the Eclipse Public License 2.0 which accompanies this6* distribution and is available at https://www.eclipse.org/legal/epl-2.0/7* or the Apache License, Version 2.0 which accompanies this distribution and8* is available at https://www.apache.org/licenses/LICENSE-2.0.9*10* This Source Code may also be made available under the following11* Secondary Licenses when the conditions for such availability set12* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU13* General Public License, version 2 with the GNU Classpath14* Exception [1] and GNU General Public License, version 2 with the15* OpenJDK Assembly Exception [2].16*17* [1] https://www.gnu.org/software/classpath/license.html18* [2] http://openjdk.java.net/legal/assembly-exception.html19*20* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception21*******************************************************************************/22package com.ibm.cuda;2324import com.ibm.cuda.CudaDevice.CacheConfig;25import com.ibm.cuda.CudaDevice.SharedMemConfig;2627/**28* The {@code CudaKernel} class represents a kernel {@link CudaFunction function}29* in a loaded {@link CudaModule}.30*/31public class CudaKernel {3233/**34* The {@code Parameters} class represents the actual parameters in35* a {@link CudaKernel kernel} launch.36*/37public static final class Parameters implements Cloneable {3839/* A bit-mask of missing values (the bit (1<<i) is set if the values[i] is missing). */40private long mask;4142final long[] values;4344/**45* Creates a new bundle of parameter values.46*47* @param count48* the number of values to be passed when the kernel is launched49* @throws IllegalArgumentException50* if the count is negative or greater than 6451*/52public Parameters(int count) {53super();54if (0 <= count && count <= Long.SIZE) {55this.mask = count == Long.SIZE ? -1L : (1L << count) - 1;56this.values = new long[count];57} else {58throw new IllegalArgumentException();59}60}6162/**63* Creates a new bundle of parameter values.64* <p>65* Each parameter value must be one of the following:66* <ul>67* <li>a boxed primitive value</li>68* <li>a CudaBuffer object</li>69* <li>null</li>70* </ul>71*72* @param values73* the values to be passed when the kernel is launched74* @throws IllegalArgumentException75* if {@code parameters} contains any unsupported types76*/77public Parameters(Object... values) {78super();7980int count = values.length;8182this.mask = 0;83this.values = new long[count];8485for (int i = 0; i < count; ++i) {86this.values[i] = CudaFunction.nativeValueOf(values[i]);87}88}8990/**91* Creates a copy of the given parameter block.92*93* @param that94* the parameter block to be copied95*/96public Parameters(Parameters that) {97super();98this.mask = that.mask;99this.values = that.values.clone();100}101102/**103* Appends a byte value to the list of parameter values.104*105* @param value106* the value to be passed when the kernel is launched107* @return108* this parameter list109* @throws IndexOutOfBoundsException110* if all positions already have values defined111*/112public Parameters add(byte value) {113return add((long) value);114}115116/**117* Appends a character value to the list of parameter values.118*119* @param value120* the value to be passed when the kernel is launched121* @return122* this parameter list123* @throws IndexOutOfBoundsException124* if all positions already have values defined125*/126public Parameters add(char value) {127return add((long) value);128}129130/**131* Appends a buffer address to the list of parameter values.132*133* @param value134* the value to be passed when the kernel is launched,135* or null to pass a null pointer136* @return137* this parameter list138* @throws IllegalStateException139* if the buffer has been closed (see {@link CudaBuffer#close()})140* @throws IndexOutOfBoundsException141* if all positions already have values defined142*/143public Parameters add(CudaBuffer value) {144return add(value == null ? 0 : value.getAddress());145}146147/**148* Appends a double value to the list of parameter values.149*150* @param value151* the value to be passed when the kernel is launched152* @return153* this parameter list154* @throws IndexOutOfBoundsException155* if all positions already have values defined156*/157public Parameters add(double value) {158return add(Double.doubleToRawLongBits(value));159}160161/**162* Appends a float value to the list of parameter values.163*164* @param value165* the value to be passed when the kernel is launched166* @return167* this parameter list168* @throws IndexOutOfBoundsException169* if all positions already have values defined170*/171public Parameters add(float value) {172return add((long) Float.floatToRawIntBits(value));173}174175/**176* Appends a integer value to the list of parameter values.177*178* @param value179* the value to be passed when the kernel is launched180* @return181* this parameter list182* @throws IndexOutOfBoundsException183* if all positions already have values defined184*/185public Parameters add(int value) {186return add((long) value);187}188189/**190* Appends a long value to the list of parameter values.191*192* @param value193* the value to be passed when the kernel is launched194* @return195* this parameter list196* @throws IndexOutOfBoundsException197* if all positions already have values defined198*/199public Parameters add(long value) {200if (isComplete()) {201throw new IndexOutOfBoundsException();202}203204int index = Long.numberOfTrailingZeros(mask);205206return set(index, value);207}208209/**210* Appends a short value to the list of parameter values.211*212* @param value213* the value to be passed when the kernel is launched214* @return215* this parameter list216* @throws IndexOutOfBoundsException217* if all positions already have values defined218*/219public Parameters add(short value) {220return add((long) value);221}222223/**224* Creates a copy of this parameter block.225*/226@Override227public Parameters clone() {228return new Parameters(this);229}230231boolean isComplete() {232return mask == 0;233}234235/**236* Replaces the parameter at the specified index with the given byte value.237*238* @param index239* the index of the parameter to be set240* @param value241* the value to be passed when the kernel is launched242* @return243* this parameter list244* @throws IndexOutOfBoundsException245* if {@code index} < 0 or {@code index} >= the size of this parameter list246*/247public Parameters set(int index, byte value) {248return set(index, (long) value);249}250251/**252* Replaces the parameter at the specified index with the given character value.253*254* @param index255* the index of the parameter to be set256* @param value257* the value to be passed when the kernel is launched258* @return259* this parameter list260* @throws IndexOutOfBoundsException261* if {@code index} < 0 or {@code index} >= the size of this parameter list262*/263public Parameters set(int index, char value) {264return set(index, (long) value);265}266267/**268* Replaces the parameter at the specified index with the given buffer address.269*270* @param index271* the index of the parameter to be set272* @param value273* the value to be passed when the kernel is launched,274* or null to pass a null pointer275* @return276* this parameter list277* @throws IndexOutOfBoundsException278* if {@code index} < 0 or {@code index} >= the size of this parameter list279*/280public Parameters set(int index, CudaBuffer value) {281return set(index, value == null ? 0 : value.getAddress());282}283284/**285* Replaces the parameter at the specified index with the given double value.286*287* @param index288* the index of the parameter to be set289* @param value290* the value to be passed when the kernel is launched291* @return292* this parameter list293* @throws IndexOutOfBoundsException294* if {@code index} < 0 or {@code index} >= the size of this parameter list295*/296public Parameters set(int index, double value) {297return set(index, Double.doubleToRawLongBits(value));298}299300/**301* Replaces the parameter at the specified index with the given float value.302*303* @param index304* the index of the parameter to be set305* @param value306* the value to be passed when the kernel is launched307* @return308* this parameter list309* @throws IndexOutOfBoundsException310* if {@code index} < 0 or {@code index} >= the size of this parameter list311*/312public Parameters set(int index, float value) {313return set(index, (long) Float.floatToRawIntBits(value));314}315316/**317* Replaces the parameter at the specified index with the given int value.318*319* @param index320* the index of the parameter to be set321* @param value322* the value to be passed when the kernel is launched323* @return324* this parameter list325* @throws IndexOutOfBoundsException326* if {@code index} < 0 or {@code index} >= the size of this parameter list327*/328public Parameters set(int index, int value) {329return set(index, (long) value);330}331332/**333* Replaces the parameter at the specified index with the given long value.334*335* @param index336* the index of the parameter to be set337* @param value338* the value to be passed when the kernel is launched339* @return340* this parameter list341* @throws IndexOutOfBoundsException342* if {@code index} < 0 or {@code index} >= the size of this parameter list343*/344public Parameters set(int index, long value) {345if (0 <= index && index < values.length) {346mask &= ~(1L << index);347values[index] = value;348return this;349} else {350throw new IndexOutOfBoundsException(Integer.toString(index));351}352}353354/**355* Replaces the parameter at the specified index with a short value.356*357* @param index358* the index of the parameter to be set359* @param value360* the value to be passed when the kernel is launched361* @return362* this parameter list363* @throws IndexOutOfBoundsException364* if {@code index} < 0 or {@code index} >= the size of this parameter list365*/366public Parameters set(int index, short value) {367return set(index, (long) value);368}369}370371private final CudaFunction function;372373/**374* Creates a new kernel object in the given module whose entry point375* is the specified function.376*377* @param module378* the module containing the kernel code379* @param function380* the entry point of the kernel381*/382public CudaKernel(CudaModule module, CudaFunction function) {383super();384385if (function.deviceId != module.deviceId) {386throw new IllegalArgumentException();387}388389this.function = function;390}391392/**393* Creates a new kernel object in the given module whose entry point394* is the function with the specified name.395*396* @param module397* the module containing the kernel code398* @param functionName399* the name of the entry point of the kernel400* @throws CudaException401* if a CUDA exception occurs402*/403public CudaKernel(CudaModule module, String functionName)404throws CudaException {405this(module, module.getFunction(functionName));406}407408/**409* Returns the value of the specified @{code attribute} for the410* {@link CudaFunction function} associated with this kernel.411*412* @param attribute413* the attribute to be queried (see CudaFunction.ATTRIBUTE_XXX)414* @return415* the attribute value416* @throws CudaException417* if a CUDA exception occurs418*/419public final int getAttribute(int attribute) throws CudaException {420return function.getAttribute(attribute);421}422423/**424* Launches this kernel. The launch configuration is given by {@code grid}425* and the actual parameter values are specified by {@code parameters}.426* <p>427* Each parameter value must be one of the following:428* <ul>429* <li>a boxed primitive value</li>430* <li>a CudaBuffer object</li>431* <li>null</li>432* </ul>433*434* @param grid435* the launch configuration436* @param parameters437* the actual parameter values438* @throws CudaException439* if a CUDA exception occurs440* @throws IllegalArgumentException441* if {@code parameters} contains any unsupported types442*/443public final void launch(CudaGrid grid, Object... parameters)444throws CudaException {445function.launch(grid, parameters);446}447448/**449* Launches this kernel. The launch configuration is given by {@code grid}450* and the actual parameter values are specified by {@code parameters}.451*452* @param grid453* the launch configuration454* @param parameters455* the actual parameter values456* @throws CudaException457* if a CUDA exception occurs458* @throws IllegalArgumentException459* if {@code parameters} does not contain the correct number of values460*/461public final void launch(CudaGrid grid, Parameters parameters)462throws CudaException {463function.launch(grid, parameters);464}465466/**467* Configures the cache for the {@link CudaFunction function} associated468* with this kernel.469*470* @param config471* the desired cache configuration472* @throws CudaException473* if a CUDA exception occurs474*/475public final void setCacheConfig(CacheConfig config) throws CudaException {476function.setCacheConfig(config);477}478479/**480* Configures the shared memory of the {@link CudaFunction function}481* associated with this kernel.482*483* @param config484* the desired shared memory configuration485* @throws CudaException486* if a CUDA exception occurs487*/488public final void setSharedMemConfig(SharedMemConfig config)489throws CudaException {490function.setSharedMemConfig(config);491}492}493494495