[dxbc] Implemented new workaround for depth-compare ops on Nvidia

doitsujin · Mar 1, 2018 · 0e9b7d7 · 0e9b7d7
1 parent c21ebd7
commit 0e9b7d7
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 2 deletions.
diff --git a/src/dxbc/dxbc_compiler.cpp b/src/dxbc/dxbc_compiler.cpp
@@ -2683,6 +2683,9 @@ namespace dxvk {
           DxbcRegMask(true, false, false, false))
       : DxbcRegisterValue();
 
+    if (isDepthCompare && m_options.addExtraDrefCoordComponent && coord.type.ccount < 4)
+      coord = emitRegisterConcat(coord, referenceValue);
+
     // Determine the sampled image type based on the opcode.
     const uint32_t sampledImageType = isDepthCompare
       ? m_module.defSampledImageType(m_textures.at(textureId).depthTypeId)
@@ -2797,6 +2800,9 @@ namespace dxvk {
       ? emitRegisterLoad(ins.src[3], DxbcRegMask(true, false, false, false))
       : DxbcRegisterValue();
 
+    if (isDepthCompare && m_options.addExtraDrefCoordComponent && coord.type.ccount < 4)
+      coord = emitRegisterConcat(coord, referenceValue);
+
     // Load explicit gradients for sample operations that require them
     const bool hasExplicitGradients = ins.op == DxbcOpcode::SampleD;
 
@@ -3583,16 +3589,32 @@ namespace dxvk {
   }
 
 
+  DxbcRegisterValue DxbcCompiler::emitRegisterConcat(
+          DxbcRegisterValue       value1,
+          DxbcRegisterValue       value2) {
+    std::array<uint32_t, 2> ids =
+      {{ value1.id, value2.id }};
+
+    DxbcRegisterValue result;
+    result.type.ctype  = value1.type.ctype;
+    result.type.ccount = value1.type.ccount + value2.type.ccount;
+    result.id = m_module.opCompositeConstruct(
+      getVectorTypeId(result.type),
+      ids.size(), ids.data());
+    return result;
+  }
+
+
   DxbcRegisterValue DxbcCompiler::emitRegisterExtend(
           DxbcRegisterValue       value,
           uint32_t                size) {
     if (size == 1)
       return value;
 
-    std::array<uint32_t, 4> ids = {
+    std::array<uint32_t, 4> ids = {{
       value.id, value.id,
       value.id, value.id, 
-    };
+    }};
 
     DxbcRegisterValue result;
     result.type.ctype  = value.type.ctype;

diff --git a/src/dxbc/dxbc_compiler.h b/src/dxbc/dxbc_compiler.h
@@ -652,6 +652,10 @@ namespace dxvk {
             DxbcRegisterValue       srcValue,
             DxbcRegMask             srcMask);
 
+    DxbcRegisterValue emitRegisterConcat(
+            DxbcRegisterValue       value1,
+            DxbcRegisterValue       value2);
+
     DxbcRegisterValue emitRegisterExtend(
             DxbcRegisterValue       value,
             uint32_t                size);

diff --git a/src/dxbc/dxbc_options.cpp b/src/dxbc/dxbc_options.cpp
@@ -10,11 +10,18 @@ namespace dxvk {
       = static_cast<DxvkGpuVendor>(deviceProps.vendorID);
 
     if (vendor == DxvkGpuVendor::Nvidia) {
+      // The driver expects the coordinate
+      // vector to have an extra component
+      this->addExtraDrefCoordComponent = true;
+
       // From vkd3d: NMin/NMax/NClamp crash the driver.
       this->useSimpleMinMaxClamp = true;
     }
 
     // Inform the user about which workarounds are enabled
+    if (this->addExtraDrefCoordComponent)
+      Logger::warn("DxbcOptions: Growing coordinate vector for Dref operations");
+
     if (this->useSimpleMinMaxClamp)
       Logger::warn("DxbcOptions: Using FMin/FMax/FClamp instead of NMin/NMax/NClamp");
   }

diff --git a/src/dxbc/dxbc_options.h b/src/dxbc/dxbc_options.h
@@ -15,6 +15,9 @@ namespace dxvk {
     DxbcOptions(
       const Rc<DxvkDevice>& device);
 
+    /// Add extra component to dref coordinate vector
+    bool addExtraDrefCoordComponent = false;
+
     /// Use Fmin/Fmax instead of Nmin/Nmax.
     bool useSimpleMinMaxClamp = false;
   };