Skip to content

Commit

Permalink
nvapi: Implement NvAPI_D3D12_CreateCubinComputeShaderEx
Browse files Browse the repository at this point in the history
- Refactor NvapiD3d12Device to centralize CuBIN creation into the new
  NvapiD3d12Device::CreateCubinComputeShaderEx() function

- Introduce a map pairing CuBIN handles with the amount of dynamic
  shared memory they require at launch-time.
  • Loading branch information
liam-middlebrook authored and jp7677 committed Aug 18, 2023
1 parent d7988bc commit b4b33c2
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 4 deletions.
42 changes: 38 additions & 4 deletions src/d3d12/nvapi_d3d12_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,37 @@ namespace dxvk {
if (cubinDevice == nullptr)
return false;

return SUCCEEDED(cubinDevice->CreateCubinComputeShaderWithName(cubinData, cubinSize, blockX, blockY, blockZ, shaderName, reinterpret_cast<D3D12_CUBIN_DATA_HANDLE**>(pShader)));
return CreateCubinComputeShaderEx(device, cubinData, cubinSize, blockX, blockY, blockZ, 0 /* smemSize */, shaderName, pShader);
}

bool NvapiD3d12Device::CreateCubinComputeShaderEx(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader) {
auto cubinDevice = GetCubinDevice(device);
if (cubinDevice == nullptr)
return false;

Com<ID3D12DeviceExt> deviceExt;
if (FAILED(cubinDevice->QueryInterface(IID_PPV_ARGS(&deviceExt))))
return false;

if (FAILED(deviceExt->CreateCubinComputeShaderWithName(cubinData, cubinSize, blockX, blockY, blockZ, shaderName, reinterpret_cast<D3D12_CUBIN_DATA_HANDLE**>(pShader))))
return false;

std::scoped_lock lock(m_CubinSmemMutex);
m_cubinSmemMap.emplace(*pShader, smemSize);
return true;
}

bool NvapiD3d12Device::DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader) {
auto cubinDevice = GetCubinDevice(device);
if (cubinDevice == nullptr)
return false;

return SUCCEEDED(cubinDevice->DestroyCubinComputeShader(reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(shader)));
if (FAILED(cubinDevice->DestroyCubinComputeShader(reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(shader))))
return false;

std::scoped_lock lock(m_CubinSmemMutex);
m_cubinSmemMap.erase(shader);
return true;
}

bool NvapiD3d12Device::GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle) {
Expand All @@ -67,10 +89,22 @@ namespace dxvk {
auto cmdList = commandListExt.value().CommandListExt;
auto interfaceVersion = commandListExt.value().InterfaceVersion;

if (interfaceVersion >= 1)
return SUCCEEDED(cmdList->LaunchCubinShaderEx(reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(pShader), blockX, blockY, blockZ, 0, params, paramSize, nullptr, 0));
uint32_t smem = 0;
std::scoped_lock lock(m_CubinSmemMutex);
auto it = m_cubinSmemMap.find(pShader);
if (it != m_cubinSmemMap.end())
smem = it->second;
else
log::write("Failed to find CuBIN in m_cubinSmemMap, defaulting to 0");

if (interfaceVersion >= 1)
return SUCCEEDED(cmdList->LaunchCubinShaderEx(reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(pShader), blockX, blockY, blockZ, smem, params, paramSize, nullptr, 0));
else {
if (smem != 0)
log::write("Non-zero SMEM value supplied for CuBIN but ID3D12GraphicsCommandListExt1 not supported! This may cause corruption");

return SUCCEEDED(cmdList->LaunchCubinShader(reinterpret_cast<D3D12_CUBIN_DATA_HANDLE*>(pShader), blockX, blockY, blockZ, params, paramSize));
}
}

bool NvapiD3d12Device::CaptureUAVInfo(ID3D12Device* device, NVAPI_UAV_INFO* pUAVInfo) {
Expand Down
3 changes: 3 additions & 0 deletions src/d3d12/nvapi_d3d12_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ namespace dxvk {
static bool SetDepthBoundsTestValues(ID3D12GraphicsCommandList* commandList, float minDepth, float maxDepth);

static bool CreateCubinComputeShaderWithName(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader);
static bool CreateCubinComputeShaderEx(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader);
static bool DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader);
static bool GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle);
static bool GetCudaSurfaceObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE uavHandle, NvU32* cudaSurfaceHandle);
Expand All @@ -30,9 +31,11 @@ namespace dxvk {
private:
inline static std::unordered_map<ID3D12Device*, ID3D12DeviceExt*> m_cubinDeviceMap;
inline static std::unordered_map<ID3D12GraphicsCommandList*, CommandListExtWithVersion> m_CommandListMap;
inline static std::unordered_map<NVDX_ObjectHandle, NvU32> m_cubinSmemMap;

inline static std::mutex m_CommandListMutex;
inline static std::mutex m_CubinDeviceMutex;
inline static std::mutex m_CubinSmemMutex;

[[nodiscard]] static Com<ID3D12DeviceExt> GetCubinDevice(ID3D12Device* device);
[[nodiscard]] static Com<ID3D12DeviceExt> GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension);
Expand Down
14 changes: 14 additions & 0 deletions src/nvapi_d3d12.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ extern "C" {
return NotSupported(__func__);
}

NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderEx(ID3D12Device* pDevice, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader) {
constexpr auto n = __func__;
static bool alreadyLoggedError = false;
static bool alreadyLoggedOk = false;

if (pDevice == nullptr)
return InvalidArgument(n);

if (!NvapiD3d12Device::CreateCubinComputeShaderEx(pDevice, cubinData, cubinSize, blockX, blockY, blockZ, smemSize, shaderName, pShader))
return Error(n, alreadyLoggedError);

return Ok(n, alreadyLoggedOk);
}

NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderWithName(ID3D12Device* pDevice, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader) {
constexpr auto n = __func__;
static bool alreadyLoggedError = false;
Expand Down
1 change: 1 addition & 0 deletions src/nvapi_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ extern "C" {
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateGraphicsPipelineState)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetDepthBoundsTestValues)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderWithName)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderEx)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShader)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_DestroyCubinComputeShader)
INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaTextureObject)
Expand Down

0 comments on commit b4b33c2

Please sign in to comment.