Path: blob/master/jcl/src/openj9.gpu/share/classes/com/ibm/gpu/CUDAManager.java
12565 views
/*[INCLUDE-IF JAVA_SPEC_VERSION >= 8]*/1/*******************************************************************************2* Copyright (c) 2014, 2021 IBM Corp. and others3*4* This program and the accompanying materials are made available under5* the terms of the Eclipse Public License 2.0 which accompanies this6* distribution and is available at https://www.eclipse.org/legal/epl-2.0/7* or the Apache License, Version 2.0 which accompanies this distribution and8* is available at https://www.apache.org/licenses/LICENSE-2.0.9*10* This Source Code may also be made available under the following11* Secondary Licenses when the conditions for such availability set12* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU13* General Public License, version 2 with the GNU Classpath14* Exception [1] and GNU General Public License, version 2 with the15* OpenJDK Assembly Exception [2].16*17* [1] https://www.gnu.org/software/classpath/license.html18* [2] http://openjdk.java.net/legal/assembly-exception.html19*20* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0 WITH Classpath-exception-2.0 OR LicenseRef-GPL-2.0 WITH Assembly-exception21*******************************************************************************/22package com.ibm.gpu;2324import java.io.IOException;25import java.io.InputStream;26import java.security.AccessController;27import java.security.PrivilegedAction;28import java.util.ArrayList;29import java.util.Arrays;30import java.util.BitSet;31import java.util.HashMap;32import java.util.Map;33import java.util.Map.Entry;34import java.util.Properties;35import java.util.TreeMap;3637import com.ibm.cuda.Cuda;38import com.ibm.cuda.CudaDevice;3940/*[IF Sidecar19-SE]*/41import java.lang.invoke.MethodHandles;42import java.lang.invoke.VarHandle;43/*[ELSE]44import sun.misc.Unsafe;45/*[ENDIF]*/4647/**48* This class contains information important to IBM GPU enabled functions.49*/50/*[IF JAVA_SPEC_VERSION >= 17]*/51@SuppressWarnings("removal")52/*[ENDIF] JAVA_SPEC_VERSION >= 17 */53public final class CUDAManager {5455private static final class Configuration {5657private static final String DEFAULT_MODEL_NAME = "DEFAULT"; //$NON-NLS-1$5859private static final int DEFAULT_THRESHOLD = 30000;6061private static void loadProperties(Properties properties, String resourceName) throws IOException {62PrivilegedAction<InputStream> action = () -> CUDAManager.class.getResourceAsStream(resourceName);6364try (InputStream input = AccessController.doPrivileged(action)) {65if (input != null) {66properties.load(input);67}68}69}7071private static boolean startsWithIgnoreCase(String string, String prefix) {72int prefixLength = prefix.length();7374if (string.length() >= prefixLength) {75return string.regionMatches(true, 0, prefix, 0, prefixLength);76}7778return true;79}8081private final CUDAManager manager;8283/*84* Keyed by model name; then by type.85*/86private final Map<String, Map<Type, Integer>> thresholds;8788Configuration(CUDAManager manager) {89super();90this.manager = manager;91this.thresholds = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);9293readThresholds();94}9596boolean checkSortProperty(String name) {97String value = getProperty(name);9899if (value != null) {100if (value.isEmpty() // <br/>101|| value.equalsIgnoreCase("all") //$NON-NLS-1$102|| value.equalsIgnoreCase("sort")) { //$NON-NLS-1$103return true;104}105106manager.outputIfVerbose(String.format(107"Invalid value \"%s\" given on system property %s", //$NON-NLS-1$108value, name));109}110111return false;112}113114private int getDefaultThreshold(Type type) {115Map<Type, Integer> modelMap = thresholds.get(DEFAULT_MODEL_NAME);116117if (modelMap != null) {118Integer threshold = modelMap.get(type);119120if (threshold != null) {121return threshold.intValue();122}123}124125return DEFAULT_THRESHOLD;126}127128int getDoubleThreshold() {129return getDefaultThreshold(Type.DOUBLE);130}131132int getDoubleThreshold(String modelName) {133return getThreshold(modelName, Type.DOUBLE);134}135136int getFloatThreshold() {137return getDefaultThreshold(Type.FLOAT);138}139140int getFloatThreshold(String modelName) {141return getThreshold(modelName, Type.FLOAT);142}143144int getIntThreshold() {145return getDefaultThreshold(Type.INT);146}147148int getIntThreshold(String modelName) {149return getThreshold(modelName, Type.INT);150}151152int getLongThreshold() {153return getDefaultThreshold(Type.LONG);154}155156int getLongThreshold(String modelName) {157return getThreshold(modelName, Type.LONG);158}159160private int getThreshold(String modelName, Type type) {161Map<Type, Integer> modelMap = thresholds.get(modelName);162163if (modelMap != null) {164Integer threshold = modelMap.get(type);165166if (threshold != null) {167return threshold.intValue();168}169}170171return getDefaultThreshold(type);172}173174private void readThresholds() {175Properties properties = new Properties();176177try {178loadProperties(properties, "ibm_gpu_thresholds.properties"); //$NON-NLS-1$179} catch (IOException e) {180manager.outputIfVerbose("Warning: couldn't load threshold properties file: " //$NON-NLS-1$181+ e.getLocalizedMessage());182}183184for (Entry<Object, Object> property : properties.entrySet()) {185String propertyName = String.valueOf(property.getKey());186187for (Type type : Type.values()) {188String prefix = type.propertyPrefix;189190if (!startsWithIgnoreCase(propertyName, prefix)) {191continue;192}193194String propertyValue = String.valueOf(property.getValue());195196try {197int value = Integer.parseInt(propertyValue);198199if (0 < value && value < Integer.MAX_VALUE) {200String model = propertyName.substring(prefix.length()).replace('_', ' ');201Map<Type, Integer> modelMap = thresholds.get(model);202203if (modelMap == null) {204thresholds.put(model, modelMap = new HashMap<>());205}206207modelMap.put(type, Integer.valueOf(value));208}209} catch (NumberFormatException e) {210manager.outputIfVerbose(String.format(211"Warning: ignoring non-numeric threshold: %s = %s", //$NON-NLS-1$212propertyName, propertyValue));213}214break;215}216}217}218219}220221private static final class Lock {222223Lock() {224super();225}226227}228229private static final class Singleton {230231static final CUDAManager INSTANCE = new CUDAManager();232233}234235private static enum Type {236237DOUBLE, FLOAT, INT, LONG;238239final String propertyPrefix;240241Type() {242propertyPrefix = "com.ibm.gpu." + name() + "sortThreshold."; //$NON-NLS-1$ //$NON-NLS-2$243}244245}246247private static final Lock lock = new Lock();248249/**250* Return a CUDAManager instance.251*252* @return a CUDAManager instance253* @throws GPUConfigurationException254* This exception is not actually thrown; use {@code instance()} instead.255* @throws SecurityException256* If a security manager exists and the calling thread does not257* have permission to access the CUDAManager instance.258* @deprecated Use {@code instance()} instead.259*/260@Deprecated261public static CUDAManager getInstance()262throws GPUConfigurationException, SecurityException {263return instance();264}265266/**267* Return a CUDAManager instance.268*269* @return a CUDAManager instance270* @throws SecurityException271* If a security manager exists and the calling thread does not272* have permission to access the CUDAManager instance.273*/274public static CUDAManager instance() throws SecurityException {275@SuppressWarnings("removal")276SecurityManager security = System.getSecurityManager();277278if (security != null) {279security.checkPermission(GPUPermission.Access);280}281282return instanceInternal();283}284285static CUDAManager instanceInternal() {286return Singleton.INSTANCE;287}288289/**290* Get the header used to prefix all IBM GPU related output.291*292* @return The header used for IBM GPU related output.293*/294public static String getOutputHeader() {295return "[IBM GPU]:"; //$NON-NLS-1$296}297298static String getProperty(String name) {299PrivilegedAction<String> action = () -> System.getProperty(name);300301return AccessController.doPrivileged(action);302}303304/**305* Get the version of this class.306*307* @return Returns the version of this class.308*/309public static String getVersion() {310return Version.VERSION;311}312313/**314* Performs cleanup on the CUDAManager class.315*316* @deprecated This method has no effect; it will be removed in a future version.317*/318@Deprecated319public static void tearDown() {320return;321}322323private final BitSet busyDevices;324325private int defaultDeviceId;326327private final int defaultDoubleThreshold;328329private final int defaultFloatThreshold;330331private final int defaultIntThreshold;332333private final int defaultLongThreshold;334335private CUDADevice[] devices;336337/*[IF Sidecar19-SE]*/338private final VarHandle devicesHandle;339/*[ELSE]340private final long devicesOffset;341private final Unsafe unsafe;342/*[ENDIF]*/343344private boolean doSortOnGPU;345346private boolean enforceGPUSort;347348private boolean verboseOutput;349350CUDAManager() {351super();352busyDevices = new BitSet();353defaultDeviceId = 0;354devices = null;355doSortOnGPU = false;356enforceGPUSort = false;357358// set this early for better feedback359verboseOutput = getProperty("com.ibm.gpu.verbose") != null; //$NON-NLS-1$360361/*[IF Sidecar19-SE]*/362try {363devicesHandle = MethodHandles.lookup().findVarHandle(CUDAManager.class, "devices", CUDADevice[].class); //$NON-NLS-1$364} catch (IllegalAccessException | NoSuchFieldException e) {365throw new InternalError(e.toString(), e);366}367/*[ELSE]368try {369unsafe = Unsafe.getUnsafe();370devicesOffset = unsafe.objectFieldOffset(CUDAManager.class.getDeclaredField("devices")); //$NON-NLS-1$371} catch (NoSuchFieldException e) {372InternalError error = new InternalError(e.toString());373error.initCause(e);374throw error;375}376/*[ENDIF]*/377378Configuration configuration = new Configuration(this);379380defaultDoubleThreshold = configuration.getDoubleThreshold();381defaultFloatThreshold = configuration.getFloatThreshold();382defaultIntThreshold = configuration.getIntThreshold();383defaultLongThreshold = configuration.getLongThreshold();384385if (configuration.checkSortProperty("com.ibm.gpu.enforce")) { //$NON-NLS-1$386doSortOnGPU = true;387enforceGPUSort = true;388} else if (configuration.checkSortProperty("com.ibm.gpu.enable")) { //$NON-NLS-1$389doSortOnGPU = true;390}391392if (configuration.checkSortProperty("com.ibm.gpu.disable")) { //$NON-NLS-1$393doSortOnGPU = false;394enforceGPUSort = false;395}396}397398/**399* Look for the next free device and mark it as busy.400*401* @return Returns the device ID of the next free device.402*/403public int acquireFreeDevice() {404synchronized (lock) {405int deviceId = busyDevices.nextClearBit(0);406407if (deviceId < getDeviceCount()) {408outputIfVerbose("Acquired device: " + deviceId); //$NON-NLS-1$409busyDevices.set(deviceId);410} else {411outputIfVerbose("No available devices found"); //$NON-NLS-1$412deviceId = -1;413}414415return deviceId;416}417}418419private CUDADevice[] findDevices() {420int deviceCount = 0;421422try {423deviceCount = Cuda.getDeviceCount();424} catch (Exception e) {425// Cuda.getDeviceCount() declares but never throws CudaException.426outputIfVerbose("Couldn't count devices due to: " + e.getLocalizedMessage()); //$NON-NLS-1$427} catch (NoClassDefFoundError e) {428outputIfVerbose("Unsupported platform detected"); //$NON-NLS-1$429}430431CUDADevice[] allDevices = new CUDADevice[deviceCount];432433if (deviceCount != 0) {434Configuration configuration = new Configuration(this);435436for (int deviceId = 0; deviceId < deviceCount; ++deviceId) {437String modelName = ""; //$NON-NLS-1$438439try {440modelName = new CudaDevice(deviceId).getName();441} catch (Exception e) {442// This is likely a CudaException but we can't catch it specifically443// or class loading verification would fail for this class.444outputIfVerbose("Warning: couldn't get the GPU model name for device " + deviceId); //$NON-NLS-1$445}446447allDevices[deviceId] = new CUDADevice(deviceId, modelName, // <br/>448configuration.getDoubleThreshold(modelName), // <br/>449configuration.getFloatThreshold(modelName), // <br/>450configuration.getIntThreshold(modelName), // <br/>451configuration.getLongThreshold(modelName));452}453454outputIfVerbose("Discovered " + deviceCount + " device(s)"); //$NON-NLS-1$ //$NON-NLS-2$455}456457return allDevices;458}459460/**461* Use this method to obtain a reference to an ArrayList containing462* references to all discovered CUDA devices.463*464* @return Returns an ArrayList containing all discovered CUDA465* devices - see {@link CUDADevice} for details.466*/467public ArrayList<CUDADevice> getCUDADevices() {468return new ArrayList<>(Arrays.asList(getDevices()));469}470471/**472* Gets the ID of the default device, set to 0 by default.473*474* @return Returns the device ID of the current default device.475*/476public int getDefaultDevice() {477return defaultDeviceId;478}479480/**481* Get a reference to the CUDA device by means of its index (with 0 being the first).482*483* @param deviceId The index of the device to retrieve (with 0 being the first).484* @return Returns a CUDA device at the specified index - see485* {@link CUDADevice} for details.486* @throws GPUConfigurationException Throws this exception if an invalid deviceId487* has been specified.488*/489public CUDADevice getDevice(int deviceId) throws GPUConfigurationException {490CUDADevice[] allDevices = getDevices();491492if (0 <= deviceId && deviceId < allDevices.length) {493return allDevices[deviceId];494} else {495throw newGPUConfigurationException("Invalid device"); //$NON-NLS-1$496}497}498499/**500* Identifies the number of available CUDA devices.501*502* @return Returns how many CUDA devices have been detected.503*/504public int getDeviceCount() {505return getDevices().length;506}507508private CUDADevice[] getDevices() {509CUDADevice[] allDevices = devices;510511if (allDevices == null) {512synchronized (lock) {513allDevices = devices;514515if (allDevices == null) {516allDevices = findDevices();517/*[IF Sidecar19-SE]*/518devicesHandle.setRelease(this, allDevices);519/*[ELSE]520unsafe.putOrderedObject(this, devicesOffset, allDevices);521/*[ENDIF]*/522}523}524}525526return allDevices;527}528529/**530* Identifies the CUDA device that has the most memory available.531*532* @return Returns a reference to the CUDA device with the most memory available.533* @throws GPUConfigurationException Throws this exception if an534* attempt was made to access an invalid device (no longer available).535*/536public CUDADevice getDeviceWithMostAvailableMemory()537throws GPUConfigurationException {538CUDADevice[] allDevices = getDevices();539CUDADevice bestDevice = null;540int deviceCount = allDevices.length;541long mostFreeMem = 0;542543for (int deviceId = 0; deviceId < deviceCount; ++deviceId) {544try {545long freeMem = new CudaDevice(deviceId).getFreeMemory();546547if (mostFreeMem < freeMem || deviceId == 0) {548bestDevice = allDevices[deviceId];549mostFreeMem = freeMem;550}551} catch (Exception e) {552// ignore553}554}555556return bestDevice;557}558559/**560* Gets the minimum length of a double array that will be561* sorted using a GPU if enabled.562*563* @return The minimum length of a double array that will be564* sorted using a GPU.565*/566public int getDoubleThreshold() {567return defaultDoubleThreshold;568}569570/**571* Use this method to return an array of enabled CUDA devices.572*573* @return Returns an array containing enabled CUDA devices -574* see {@link CUDADevice} for details.575*/576public CUDADevice[] getEnabledCUDADevices() {577return getDevices().clone();578}579580/**581* Gets the minimum length of a float array that will be582* sorted using a GPU if enabled.583*584* @return The minimum length of a float array that will be585* sorted using a GPU.586*/587public int getFloatThreshold() {588return defaultFloatThreshold;589}590591/**592* Get the amount of free memory (in bytes) available for the provided CUDA device.593* Does not persistently change the current device.594*595* @param deviceId The index of the device to query (with 0 being the first).596* @return Returns the amount of free memory available.597* @throws GPUConfigurationException Throw this exception if cannot get free memory amount.598*/599public long getFreeMemoryForDevice(int deviceId)600throws GPUConfigurationException {601if (0 <= deviceId && deviceId < getDeviceCount()) {602try {603CudaDevice device = new CudaDevice(deviceId);604605return device.getFreeMemory();606} catch (Exception e) {607// This is likely a CudaException but we can't catch it specifically608// or class loading verification would fail for this class.609throw new GPUConfigurationException(e.getLocalizedMessage(), e);610}611} else {612throw newGPUConfigurationException("Invalid device"); //$NON-NLS-1$613}614}615616/**617* Gets the minimum length of an int array that will be618* sorted using a GPU if enabled.619*620* @return The minimum length of an int array that will be621* sorted using a GPU.622*/623public int getIntThreshold() {624return defaultIntThreshold;625}626627/**628* Gets the minimum length of a long array that will be629* sorted using a GPU if enabled.630*631* @return The minimum length of a long array that will be632* sorted using a GPU.633*/634public int getLongThreshold() {635return defaultLongThreshold;636}637638/**639* Returns the next CUDA device that is available to run calculations on.640*641* @return -1 if there are no free devices, otherwise returns642* the ID of the free CUDA device.643*/644public int getNextAvailableDevice() {645synchronized (lock) {646int deviceId = busyDevices.nextClearBit(0);647648if (deviceId < getDeviceCount()) {649outputIfVerbose("Device " + deviceId + " was free"); //$NON-NLS-1$ //$NON-NLS-2$650} else {651outputIfVerbose("No free devices!"); //$NON-NLS-1$652deviceId = -1;653}654655return deviceId;656}657}658659/**660* Get the value of the verboseGPUOutput flag.661*662* @return Whether or not verbose output should be produced.663*/664public boolean getVerboseGPUOutput() {665return verboseOutput;666}667668/**669* Use this method to identify if CUDA is supported on this machine and670* within this environment: returns true if the number of CUDA devices671* detected is greater than 0.672*673* @return Returns true if one or more CUDA devices have been detected.674*/675public boolean hasCUDASupport() {676return getDeviceCount() != 0;677}678679/**680* This method provides a means to determine if sort is681* enabled to be used by any available CUDA device.682*683* @return Returns true if GPU sort is enabled.684*/685public boolean isSortEnabledOnGPU() {686return doSortOnGPU;687}688689/**690* This method provides a means to determine if sort is691* forced to be used by any available CUDA device.692*693* @return Returns true if GPU sort is forced.694*/695public boolean isSortEnforcedOnGPU() {696return enforceGPUSort;697}698699private GPUConfigurationException newGPUConfigurationException(String message) {700outputIfVerbose(message);701return new GPUConfigurationException(getOutputHeader() + ' ' + message);702}703704void outputIfVerbose(String message) {705if (verboseOutput) {706System.out.printf(707"%s [time.ms=%d]: %s\n", //$NON-NLS-1$708getOutputHeader(),709Long.valueOf(System.currentTimeMillis()), message);710}711}712713/**714* Print information for each detected CUDA device.715*/716public void printAllDeviceInfo() {717CUDADevice[] allDevices = getDevices();718719System.out.println("Number of devices: " + allDevices.length); //$NON-NLS-1$720721for (CUDADevice device : allDevices) {722System.out.println(device);723}724}725726/**727* Mark a device as being free; must be in a try finally block as we MUST728* release the handle regardless of whether or not a sort was successful.729*730* @param deviceId The device to be marked as free.731*/732public void releaseDevice(int deviceId) {733synchronized (lock) {734if (0 <= deviceId && deviceId < getDeviceCount()) {735busyDevices.clear(deviceId);736outputIfVerbose("Released device: " + deviceId); //$NON-NLS-1$737}738}739}740741/**742* Sets the default device to the given device ID.743* @param deviceId The new default device.744*/745public void setDefaultDevice(int deviceId) {746defaultDeviceId = deviceId;747}748749/**750* Use this method to set the device to use for subsequent calls.751*752* @param deviceId Set the default device ID to be this.753* @throws GPUConfigurationException Throws this exception if an invalid device754* number was specified.755*/756public void setDevice(int deviceId) throws GPUConfigurationException {757if (0 <= deviceId && deviceId < getDeviceCount()) {758this.setDefaultDevice(deviceId);759} else {760throw newGPUConfigurationException("Invalid device"); //$NON-NLS-1$761}762}763764/**765* Set the value of the verboseGPUOutput flag. When this flag is true, GPU766* output will be produced.767*768* @param condition Whether or not verbose output should be produced.769*/770public void setVerboseGPU(boolean condition) {771verboseOutput = condition;772}773774}775776777