From f2cf9d4d0be34d3f7ecb4178c356cdfc34b7634c Mon Sep 17 00:00:00 2001
From: Jeremy Reizenstein <reizenstein@fb.com>
Date: Thu, 24 Mar 2022 06:52:05 -0700
Subject: [PATCH] windows fix

Summary: Attempt to reduce nvcc trouble on windows by (1) avoiding flag for c++14 and (2) avoiding `torch/extension.h`, which introduces pybind11, in `.cu` files.

Reviewed By: patricklabatut

Differential Revision: D34969868

fbshipit-source-id: f3878d6a2ba9d644e87ae7b6377cb5008b4b6ce3
---
 pytorch3d/csrc/ball_query/ball_query.cu                       | 1 -
 pytorch3d/csrc/iou_box3d/iou_box3d.cu                         | 1 -
 .../csrc/sample_farthest_points/sample_farthest_points.cu     | 4 +++-
 pytorch3d/csrc/utils/pytorch3d_cutils.h                       | 3 ---
 setup.py                                                      | 3 ++-
 5 files changed, 5 insertions(+), 7 deletions(-)
diff --git a/pytorch3d/csrc/ball_query/ball_query.cu b/pytorch3d/csrc/ball_query/ball_query.cu
index 7c21b8b39..586701c18 100644
--- a/pytorch3d/csrc/ball_query/ball_query.cu
+++ b/pytorch3d/csrc/ball_query/ball_query.cu
@@ -12,7 +12,6 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include "utils/pytorch3d_cutils.h"
 
 // A chunk of work is blocksize-many points of P1.
 // The number of potential chunks to do is N*(1+(P1-1)/blocksize)
diff --git a/pytorch3d/csrc/iou_box3d/iou_box3d.cu b/pytorch3d/csrc/iou_box3d/iou_box3d.cu
index f9c6ff472..270f7f18a 100644
--- a/pytorch3d/csrc/iou_box3d/iou_box3d.cu
+++ b/pytorch3d/csrc/iou_box3d/iou_box3d.cu
@@ -15,7 +15,6 @@
 #include <thrust/device_vector.h>
 #include <thrust/tuple.h>
 #include "iou_box3d/iou_utils.cuh"
-#include "utils/pytorch3d_cutils.h"
 
 // Parallelize over N*M computations which can each be done
 // independently
diff --git a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
index 8ed45522b..a91e2df68 100644
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
@@ -12,7 +12,6 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include "utils/pytorch3d_cutils.h"
 #include "utils/warp_reduce.cuh"
 
 template <unsigned int block_size>
@@ -170,6 +169,9 @@ at::Tensor FarthestPointSamplingCuda(
   // This will ensure each thread processes the minimum necessary number of
   // points (P/threads).
   const int points_pow_2 = std::log(static_cast<double>(P)) / std::log(2.0);
+
+  // Max possible threads per block
+  const int MAX_THREADS_PER_BLOCK = 1024;
   const size_t threads = max(min(1 << points_pow_2, MAX_THREADS_PER_BLOCK), 1);
 
   // Create the accessors
diff --git a/pytorch3d/csrc/utils/pytorch3d_cutils.h b/pytorch3d/csrc/utils/pytorch3d_cutils.h
index 3ef4144ae..48d04546e 100644
--- a/pytorch3d/csrc/utils/pytorch3d_cutils.h
+++ b/pytorch3d/csrc/utils/pytorch3d_cutils.h
@@ -15,6 +15,3 @@
 #define CHECK_CONTIGUOUS_CUDA(x) \
   CHECK_CUDA(x);                 \
   CHECK_CONTIGUOUS(x)
-
-// Max possible threads per block
-const int MAX_THREADS_PER_BLOCK = 1024;
diff --git a/setup.py b/setup.py
index c39d71b89..00706f048 100755
--- a/setup.py
+++ b/setup.py
@@ -57,12 +57,13 @@ def get_extensions():
         define_macros += [("THRUST_IGNORE_CUB_VERSION_CHECK", None)]
         cub_home = os.environ.get("CUB_HOME", None)
         nvcc_args = [
-            "-std=c++14",
             "-DCUDA_HAS_FP16=1",
             "-D__CUDA_NO_HALF_OPERATORS__",
             "-D__CUDA_NO_HALF_CONVERSIONS__",
             "-D__CUDA_NO_HALF2_OPERATORS__",
         ]
+        if os.name != "nt":
+            nvcc_args.append("-std=c++14")
         if cub_home is None:
             prefix = os.environ.get("CONDA_PREFIX", None)
             if prefix is not None and os.path.isdir(prefix + "/include/cub"):