From b4b33c2abd2ba8aa70fa8081459bd2e511ac61ff Mon Sep 17 00:00:00 2001 From: Liam Middlebrook Date: Wed, 16 Aug 2023 16:42:06 -0700 Subject: [PATCH] nvapi: Implement NvAPI_D3D12_CreateCubinComputeShaderEx - Refactor NvapiD3d12Device to centralize CuBIN creation into the new NvapiD3d12Device::CreateCubinComputeShaderEx() function - Introduce a map pairing CuBIN handles with the amount of dynamic shared memory they require at launch-time. --- src/d3d12/nvapi_d3d12_device.cpp | 42 +++++++++++++++++++++++++++++--- src/d3d12/nvapi_d3d12_device.h | 3 +++ src/nvapi_d3d12.cpp | 14 +++++++++++ src/nvapi_interface.cpp | 1 + 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/d3d12/nvapi_d3d12_device.cpp b/src/d3d12/nvapi_d3d12_device.cpp index 28a7b527..1958a0d7 100644 --- a/src/d3d12/nvapi_d3d12_device.cpp +++ b/src/d3d12/nvapi_d3d12_device.cpp @@ -32,7 +32,24 @@ namespace dxvk { if (cubinDevice == nullptr) return false; - return SUCCEEDED(cubinDevice->CreateCubinComputeShaderWithName(cubinData, cubinSize, blockX, blockY, blockZ, shaderName, reinterpret_cast(pShader))); + return CreateCubinComputeShaderEx(device, cubinData, cubinSize, blockX, blockY, blockZ, 0 /* smemSize */, shaderName, pShader); + } + + bool NvapiD3d12Device::CreateCubinComputeShaderEx(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader) { + auto cubinDevice = GetCubinDevice(device); + if (cubinDevice == nullptr) + return false; + + Com deviceExt; + if (FAILED(cubinDevice->QueryInterface(IID_PPV_ARGS(&deviceExt)))) + return false; + + if (FAILED(deviceExt->CreateCubinComputeShaderWithName(cubinData, cubinSize, blockX, blockY, blockZ, shaderName, reinterpret_cast(pShader)))) + return false; + + std::scoped_lock lock(m_CubinSmemMutex); + m_cubinSmemMap.emplace(*pShader, smemSize); + return true; } bool NvapiD3d12Device::DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader) { @@ -40,7 +57,12 @@ namespace dxvk { if (cubinDevice == nullptr) return false; - return SUCCEEDED(cubinDevice->DestroyCubinComputeShader(reinterpret_cast(shader))); + if (FAILED(cubinDevice->DestroyCubinComputeShader(reinterpret_cast(shader)))) + return false; + + std::scoped_lock lock(m_CubinSmemMutex); + m_cubinSmemMap.erase(shader); + return true; } bool NvapiD3d12Device::GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle) { @@ -67,10 +89,22 @@ namespace dxvk { auto cmdList = commandListExt.value().CommandListExt; auto interfaceVersion = commandListExt.value().InterfaceVersion; - if (interfaceVersion >= 1) - return SUCCEEDED(cmdList->LaunchCubinShaderEx(reinterpret_cast(pShader), blockX, blockY, blockZ, 0, params, paramSize, nullptr, 0)); + uint32_t smem = 0; + std::scoped_lock lock(m_CubinSmemMutex); + auto it = m_cubinSmemMap.find(pShader); + if (it != m_cubinSmemMap.end()) + smem = it->second; else + log::write("Failed to find CuBIN in m_cubinSmemMap, defaulting to 0"); + + if (interfaceVersion >= 1) + return SUCCEEDED(cmdList->LaunchCubinShaderEx(reinterpret_cast(pShader), blockX, blockY, blockZ, smem, params, paramSize, nullptr, 0)); + else { + if (smem != 0) + log::write("Non-zero SMEM value supplied for CuBIN but ID3D12GraphicsCommandListExt1 not supported! This may cause corruption"); + return SUCCEEDED(cmdList->LaunchCubinShader(reinterpret_cast(pShader), blockX, blockY, blockZ, params, paramSize)); + } } bool NvapiD3d12Device::CaptureUAVInfo(ID3D12Device* device, NVAPI_UAV_INFO* pUAVInfo) { diff --git a/src/d3d12/nvapi_d3d12_device.h b/src/d3d12/nvapi_d3d12_device.h index e14d26fd..58a6478e 100644 --- a/src/d3d12/nvapi_d3d12_device.h +++ b/src/d3d12/nvapi_d3d12_device.h @@ -20,6 +20,7 @@ namespace dxvk { static bool SetDepthBoundsTestValues(ID3D12GraphicsCommandList* commandList, float minDepth, float maxDepth); static bool CreateCubinComputeShaderWithName(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader); + static bool CreateCubinComputeShaderEx(ID3D12Device* device, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader); static bool DestroyCubinComputeShader(ID3D12Device* device, NVDX_ObjectHandle shader); static bool GetCudaTextureObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE srvHandle, D3D12_CPU_DESCRIPTOR_HANDLE samplerHandle, NvU32* cudaTextureHandle); static bool GetCudaSurfaceObject(ID3D12Device* device, D3D12_CPU_DESCRIPTOR_HANDLE uavHandle, NvU32* cudaSurfaceHandle); @@ -30,9 +31,11 @@ namespace dxvk { private: inline static std::unordered_map m_cubinDeviceMap; inline static std::unordered_map m_CommandListMap; + inline static std::unordered_map m_cubinSmemMap; inline static std::mutex m_CommandListMutex; inline static std::mutex m_CubinDeviceMutex; + inline static std::mutex m_CubinSmemMutex; [[nodiscard]] static Com GetCubinDevice(ID3D12Device* device); [[nodiscard]] static Com GetDeviceExt(ID3D12Device* device, D3D12_VK_EXTENSION extension); diff --git a/src/nvapi_d3d12.cpp b/src/nvapi_d3d12.cpp index 1382bf10..e185e718 100644 --- a/src/nvapi_d3d12.cpp +++ b/src/nvapi_d3d12.cpp @@ -25,6 +25,20 @@ extern "C" { return NotSupported(__func__); } + NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderEx(ID3D12Device* pDevice, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, NvU32 smemSize, const char* shaderName, NVDX_ObjectHandle* pShader) { + constexpr auto n = __func__; + static bool alreadyLoggedError = false; + static bool alreadyLoggedOk = false; + + if (pDevice == nullptr) + return InvalidArgument(n); + + if (!NvapiD3d12Device::CreateCubinComputeShaderEx(pDevice, cubinData, cubinSize, blockX, blockY, blockZ, smemSize, shaderName, pShader)) + return Error(n, alreadyLoggedError); + + return Ok(n, alreadyLoggedOk); + } + NvAPI_Status __cdecl NvAPI_D3D12_CreateCubinComputeShaderWithName(ID3D12Device* pDevice, const void* cubinData, NvU32 cubinSize, NvU32 blockX, NvU32 blockY, NvU32 blockZ, const char* shaderName, NVDX_ObjectHandle* pShader) { constexpr auto n = __func__; static bool alreadyLoggedError = false; diff --git a/src/nvapi_interface.cpp b/src/nvapi_interface.cpp index 65612f18..b461166e 100644 --- a/src/nvapi_interface.cpp +++ b/src/nvapi_interface.cpp @@ -57,6 +57,7 @@ extern "C" { INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateGraphicsPipelineState) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_SetDepthBoundsTestValues) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderWithName) + INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShaderEx) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_CreateCubinComputeShader) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_DestroyCubinComputeShader) INSERT_AND_RETURN_WHEN_EQUALS(NvAPI_D3D12_GetCudaTextureObject)