Skip to content

Commit

Permalink
Merge pull request #1314 from bharathappali/gpu-support-pr-2
Browse files Browse the repository at this point in the history
Add Accelerator specific interfaces and there respective implementations
  • Loading branch information
dinogun authored Oct 4, 2024
2 parents f709671 + 71d0d41 commit a1af211
Show file tree
Hide file tree
Showing 9 changed files with 450 additions and 0 deletions.
52 changes: 52 additions & 0 deletions src/main/java/com/autotune/analyzer/utils/AnalyzerConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,58 @@ public enum RegisterRecommendationModelStatus {
INVALID
}

public enum DeviceType {
CPU,
MEMORY,
NETWORK,
ACCELERATOR
}

public enum DeviceParameters {
MODEL_NAME,
UUID,
HOSTNAME,
NAME,
MANUFACTURER,
DEVICE_NAME
}

public static final class AcceleratorConstants {
private AcceleratorConstants() {

}

public static final class SupportedAccelerators {
private SupportedAccelerators() {

}
public static final String A100_80_GB = "A100-80GB";
public static final String A100_40_GB = "A100-40GB";
public static final String H100 = "H100";
}

public static final class AcceleratorProfiles {
private AcceleratorProfiles () {

}

// A100 40GB Profiles
public static final String PROFILE_1G_5GB = "1g.5gb";
public static final String PROFILE_1G_10GB = "1g.10gb";
public static final String PROFILE_2G_10GB = "2g.10gb";
public static final String PROFILE_3G_20GB = "3g.20gb";
public static final String PROFILE_4G_20GB = "4g.20gb";
public static final String PROFILE_7G_40GB = "7g.40gb";

// A100 80GB & H100 80GB Profiles
public static final String PROFILE_1G_20GB = "1g.20gb";
public static final String PROFILE_2G_20GB = "2g.20gb";
public static final String PROFILE_3G_40GB = "3g.40gb";
public static final String PROFILE_4G_40GB = "4g.40gb";
public static final String PROFILE_7G_80GB = "7g.80gb";
}
}

public static final class ExperimentTypes {
public static final String NAMESPACE_EXPERIMENT = "namespace";
public static final String CONTAINER_EXPERIMENT = "container";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
package com.autotune.common.data.system.info.device;

import com.autotune.analyzer.utils.AnalyzerConstants;
import com.autotune.common.data.system.info.device.accelerator.AcceleratorDeviceData;

import java.util.ArrayList;
import java.util.HashMap;

/**
* This class stores the device entries linked to the container
*/
public class ContainerDeviceList implements DeviceHandler, DeviceComponentDetector {
private final HashMap<AnalyzerConstants.DeviceType, ArrayList<DeviceDetails>> deviceMap;
private boolean isAcceleratorDeviceDetected;
private boolean isCPUDeviceDetected;
private boolean isMemoryDeviceDetected;
private boolean isNetworkDeviceDetected;

public ContainerDeviceList(){
this.deviceMap = new HashMap<AnalyzerConstants.DeviceType, ArrayList<DeviceDetails>>();
this.isAcceleratorDeviceDetected = false;
// Currently setting up CPU, Memory and Network as true by default
this.isCPUDeviceDetected = true;
this.isMemoryDeviceDetected = true;
this.isNetworkDeviceDetected = true;
}

@Override
public void addDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo) {
if (null == deviceType || null == deviceInfo) {
// TODO: Handle appropriate returns in future
return;
}

if (deviceType == AnalyzerConstants.DeviceType.ACCELERATOR)
this.isAcceleratorDeviceDetected = true;

// TODO: Handle multiple same entries
// Currently only first MIG is getting added so no check for existing duplicates is done
if (null == deviceMap.get(deviceType)) {
ArrayList<DeviceDetails> deviceDetailsList = new ArrayList<DeviceDetails>();
deviceDetailsList.add(deviceInfo);
this.deviceMap.put(deviceType, deviceDetailsList);
} else {
this.deviceMap.get(deviceType).add(deviceInfo);
}
}

@Override
public void removeDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo) {
if (null == deviceType || null == deviceInfo) {
// TODO: Handle appropriate returns in future
return;
}
// TODO: Need to be implemented if we need a dynamic experiment device updates
if (deviceType == AnalyzerConstants.DeviceType.ACCELERATOR) {
if (null == deviceMap.get(deviceType) || this.deviceMap.get(deviceType).isEmpty()) {
this.isAcceleratorDeviceDetected = false;
}
}
}

@Override
public void updateDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo) {
// TODO: Need to be implemented if we need a dynamic experiment device updates
}

/**
* Returns the Device which matches the identifier based on the device parameter passed
* @param deviceType - Type of the device Eg: CPU, Memory, Network or Accelerator
* @param matchIdentifier - String which needs to the matched
* @param deviceParameters - Parameter to search in device details list
* @return the appropriate DeviceDetails object
*
* USE CASE: To search the device based on a particular parameter, Let's say you have multiple accelerators
* to the container, you can pass the Model name as parameter and name of model to get the particular
* DeviceDetail object.
*/
@Override
public DeviceDetails getDeviceByParameter(AnalyzerConstants.DeviceType deviceType, String matchIdentifier, AnalyzerConstants.DeviceParameters deviceParameters) {
if (null == deviceType)
return null;
if (null == matchIdentifier)
return null;
if (null == deviceParameters)
return null;
if (matchIdentifier.isEmpty())
return null;
if (!deviceMap.containsKey(deviceType))
return null;
if (null == deviceMap.get(deviceType))
return null;
if (deviceMap.get(deviceType).isEmpty())
return null;

// Todo: Need to add extractors for each device type currently implementing for GPU
if (deviceType == AnalyzerConstants.DeviceType.ACCELERATOR) {
for (DeviceDetails deviceDetails: deviceMap.get(deviceType)) {
AcceleratorDeviceData deviceData = (AcceleratorDeviceData) deviceDetails;
if (deviceParameters == AnalyzerConstants.DeviceParameters.MODEL_NAME) {
if (deviceData.getModelName().equalsIgnoreCase(matchIdentifier)) {
return deviceData;
}
}
}
}

return null;
}

@Override
public ArrayList<DeviceDetails> getDevices(AnalyzerConstants.DeviceType deviceType) {
if (null == deviceType)
return null;
if (!deviceMap.containsKey(deviceType))
return null;
if (null == deviceMap.get(deviceType))
return null;
if (deviceMap.get(deviceType).isEmpty())
return null;

return deviceMap.get(deviceType);
}

@Override
public boolean isAcceleratorDeviceDetected() {
return this.isAcceleratorDeviceDetected;
}

@Override
public boolean isCPUDeviceDetected() {
return this.isCPUDeviceDetected;
}

@Override
public boolean isMemoryDeviceDetected() {
return this.isMemoryDeviceDetected;
}

@Override
public boolean isNetworkDeviceDetected() {
return this.isNetworkDeviceDetected;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.autotune.common.data.system.info.device;

public interface DeviceComponentDetector {
public boolean isAcceleratorDeviceDetected();
public boolean isCPUDeviceDetected();
public boolean isMemoryDeviceDetected();
public boolean isNetworkDeviceDetected();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package com.autotune.common.data.system.info.device;

import com.autotune.analyzer.utils.AnalyzerConstants;

public interface DeviceDetails {
public AnalyzerConstants.DeviceType getType();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.autotune.common.data.system.info.device;

import com.autotune.analyzer.utils.AnalyzerConstants;

import java.util.ArrayList;

public interface DeviceHandler {
public void addDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo);
public void removeDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo);
public void updateDevice(AnalyzerConstants.DeviceType deviceType, DeviceDetails deviceInfo);
public DeviceDetails getDeviceByParameter(AnalyzerConstants.DeviceType deviceType,
String matchIdentifier,
AnalyzerConstants.DeviceParameters deviceParameters);
public ArrayList<DeviceDetails> getDevices(AnalyzerConstants.DeviceType deviceType);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package com.autotune.common.data.system.info.device.accelerator;

import com.autotune.analyzer.utils.AnalyzerConstants;

public class AcceleratorDeviceData implements AcceleratorDeviceDetails {
private final String manufacturer;
private final String modelName;
private final String hostName;
private final String UUID;
private final String deviceName;
private boolean isMIG;

public AcceleratorDeviceData (String modelName, String hostName, String UUID, String deviceName, boolean isMIG) {
this.manufacturer = "NVIDIA";
this.modelName = modelName;
this.hostName = hostName;
this.UUID = UUID;
this.deviceName = deviceName;
this.isMIG = isMIG;
}

@Override
public String getManufacturer() {
return this.manufacturer;
}

@Override
public String getModelName() {
return modelName;
}

@Override
public String getHostName() {
return hostName;
}

@Override
public String getUUID() {
return UUID;
}

@Override
public String getDeviceName() {
return deviceName;
}

public boolean isMIG() {
return isMIG;
}

public void setMIG(boolean isMIG) {
this.isMIG = isMIG;
}

@Override
public AnalyzerConstants.DeviceType getType() {
return AnalyzerConstants.DeviceType.ACCELERATOR;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.autotune.common.data.system.info.device.accelerator;

import com.autotune.common.data.system.info.device.DeviceDetails;

public interface AcceleratorDeviceDetails extends DeviceDetails {
public String getManufacturer();
public String getModelName();
public String getHostName();
public String getUUID();
public String getDeviceName();
}
Loading

0 comments on commit a1af211

Please sign in to comment.