From f2cf9d4d0be34d3f7ecb4178c356cdfc34b7634c Mon Sep 17 00:00:00 2001 From: Jeremy Reizenstein Date: Thu, 24 Mar 2022 06:52:05 -0700 Subject: [PATCH] windows fix Summary: Attempt to reduce nvcc trouble on windows by (1) avoiding flag for c++14 and (2) avoiding `torch/extension.h`, which introduces pybind11, in `.cu` files. Reviewed By: patricklabatut Differential Revision: D34969868 fbshipit-source-id: f3878d6a2ba9d644e87ae7b6377cb5008b4b6ce3 --- pytorch3d/csrc/ball_query/ball_query.cu | 1 - pytorch3d/csrc/iou_box3d/iou_box3d.cu | 1 - .../csrc/sample_farthest_points/sample_farthest_points.cu | 4 +++- pytorch3d/csrc/utils/pytorch3d_cutils.h | 3 --- setup.py | 3 ++- 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/pytorch3d/csrc/ball_query/ball_query.cu b/pytorch3d/csrc/ball_query/ball_query.cu index 7c21b8b39..586701c18 100644 --- a/pytorch3d/csrc/ball_query/ball_query.cu +++ b/pytorch3d/csrc/ball_query/ball_query.cu @@ -12,7 +12,6 @@ #include #include #include -#include "utils/pytorch3d_cutils.h" // A chunk of work is blocksize-many points of P1. // The number of potential chunks to do is N*(1+(P1-1)/blocksize) diff --git a/pytorch3d/csrc/iou_box3d/iou_box3d.cu b/pytorch3d/csrc/iou_box3d/iou_box3d.cu index f9c6ff472..270f7f18a 100644 --- a/pytorch3d/csrc/iou_box3d/iou_box3d.cu +++ b/pytorch3d/csrc/iou_box3d/iou_box3d.cu @@ -15,7 +15,6 @@ #include #include #include "iou_box3d/iou_utils.cuh" -#include "utils/pytorch3d_cutils.h" // Parallelize over N*M computations which can each be done // independently diff --git a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu index 8ed45522b..a91e2df68 100644 --- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu +++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu @@ -12,7 +12,6 @@ #include #include #include -#include "utils/pytorch3d_cutils.h" #include "utils/warp_reduce.cuh" template @@ -170,6 +169,9 @@ at::Tensor FarthestPointSamplingCuda( // This will ensure each thread processes the minimum necessary number of // points (P/threads). const int points_pow_2 = std::log(static_cast(P)) / std::log(2.0); + + // Max possible threads per block + const int MAX_THREADS_PER_BLOCK = 1024; const size_t threads = max(min(1 << points_pow_2, MAX_THREADS_PER_BLOCK), 1); // Create the accessors diff --git a/pytorch3d/csrc/utils/pytorch3d_cutils.h b/pytorch3d/csrc/utils/pytorch3d_cutils.h index 3ef4144ae..48d04546e 100644 --- a/pytorch3d/csrc/utils/pytorch3d_cutils.h +++ b/pytorch3d/csrc/utils/pytorch3d_cutils.h @@ -15,6 +15,3 @@ #define CHECK_CONTIGUOUS_CUDA(x) \ CHECK_CUDA(x); \ CHECK_CONTIGUOUS(x) - -// Max possible threads per block -const int MAX_THREADS_PER_BLOCK = 1024; diff --git a/setup.py b/setup.py index c39d71b89..00706f048 100755 --- a/setup.py +++ b/setup.py @@ -57,12 +57,13 @@ def get_extensions(): define_macros += [("THRUST_IGNORE_CUB_VERSION_CHECK", None)] cub_home = os.environ.get("CUB_HOME", None) nvcc_args = [ - "-std=c++14", "-DCUDA_HAS_FP16=1", "-D__CUDA_NO_HALF_OPERATORS__", "-D__CUDA_NO_HALF_CONVERSIONS__", "-D__CUDA_NO_HALF2_OPERATORS__", ] + if os.name != "nt": + nvcc_args.append("-std=c++14") if cub_home is None: prefix = os.environ.get("CONDA_PREFIX", None) if prefix is not None and os.path.isdir(prefix + "/include/cub"):