Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed windows MSVC build compatibility #9

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pytorch3d/csrc/face_areas_normals/face_areas_normals.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
template <typename scalar_t>
__global__ void FaceAreasNormalsKernel(
const scalar_t* __restrict__ verts,
const long* __restrict__ faces,
const int64_t* __restrict__ faces,
scalar_t* __restrict__ face_areas,
scalar_t* __restrict__ face_normals,
const size_t V,
Expand All @@ -18,9 +18,9 @@ __global__ void FaceAreasNormalsKernel(
// Each thread computes the area & normal of its respective faces and adds it
// to the global face_areas tensor.
for (size_t f = tid; f < F; f += stride) {
const long i0 = faces[3 * f + 0];
const long i1 = faces[3 * f + 1];
const long i2 = faces[3 * f + 2];
const int64_t i0 = faces[3 * f + 0];
const int64_t i1 = faces[3 * f + 1];
const int64_t i2 = faces[3 * f + 2];

const scalar_t v0_x = verts[3 * i0 + 0];
const scalar_t v0_y = verts[3 * i0 + 1];
Expand Down Expand Up @@ -70,12 +70,12 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsCuda(
FaceAreasNormalsKernel<scalar_t>
<<<blocks, threads>>>(
verts.data_ptr<scalar_t>(),
faces.data_ptr<long>(),
faces.data_ptr<int64_t>(),
areas.data_ptr<scalar_t>(),
normals.data_ptr<scalar_t>(),
V,
F);
}));

return std::make_tuple(areas, normals);
}
8 changes: 4 additions & 4 deletions pytorch3d/csrc/gather_scatter/gather_scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// TODO(T47953967) to make this cuda kernel support all datatypes.
__global__ void gather_scatter_kernel(
const float* __restrict__ input,
const long* __restrict__ edges,
const int64_t* __restrict__ edges,
float* __restrict__ output,
bool directed,
bool backward,
Expand All @@ -21,8 +21,8 @@ __global__ void gather_scatter_kernel(
// Edges are split evenly across the blocks.
for (int e = blockIdx.x; e < E; e += gridDim.x) {
// Get indices of vertices which form the edge.
const long v0 = edges[2 * e + v0_idx];
const long v1 = edges[2 * e + v1_idx];
const int64_t v0 = edges[2 * e + v0_idx];
const int64_t v1 = edges[2 * e + v1_idx];

// Split vertex features evenly across threads.
// This implementation will be quite wasteful when D<128 since there will be
Expand Down Expand Up @@ -57,7 +57,7 @@ at::Tensor gather_scatter_cuda(

gather_scatter_kernel<<<blocks, threads>>>(
input.data_ptr<float>(),
edges.data_ptr<long>(),
edges.data_ptr<int64_t>(),
output.data_ptr<float>(),
directed,
backward,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
template <typename scalar_t>
__device__ void WarpReduce(
volatile scalar_t* min_dists,
volatile long* min_idxs,
volatile int64_t* min_idxs,
const size_t tid) {
// s = 32
if (min_dists[tid] > min_dists[tid + 32]) {
Expand Down Expand Up @@ -57,7 +57,7 @@ template <typename scalar_t>
__global__ void NearestNeighborKernel(
const scalar_t* __restrict__ points1,
const scalar_t* __restrict__ points2,
long* __restrict__ idx,
int64_t* __restrict__ idx,
const size_t N,
const size_t P1,
const size_t P2,
Expand All @@ -74,7 +74,7 @@ __global__ void NearestNeighborKernel(
extern __shared__ char shared_buf[];
scalar_t* x = (scalar_t*)shared_buf; // scalar_t[DD]
scalar_t* min_dists = &x[D_2]; // scalar_t[NUM_THREADS]
long* min_idxs = (long*)&min_dists[blockDim.x]; // long[NUM_THREADS]
int64_t* min_idxs = (int64_t*)&min_dists[blockDim.x]; // int64_t[NUM_THREADS]

const size_t n = blockIdx.y; // index of batch element.
const size_t i = blockIdx.x; // index of point within batch element.
Expand Down Expand Up @@ -147,14 +147,14 @@ template <typename scalar_t>
__global__ void NearestNeighborKernelD3(
const scalar_t* __restrict__ points1,
const scalar_t* __restrict__ points2,
long* __restrict__ idx,
int64_t* __restrict__ idx,
const size_t N,
const size_t P1,
const size_t P2) {
// Single shared memory buffer which is split and cast to different types.
extern __shared__ char shared_buf[];
scalar_t* min_dists = (scalar_t*)shared_buf; // scalar_t[NUM_THREADS]
long* min_idxs = (long*)&min_dists[blockDim.x]; // long[NUM_THREADS]
int64_t* min_idxs = (int64_t*)&min_dists[blockDim.x]; // int64_t[NUM_THREADS]

const size_t D = 3;
const size_t n = blockIdx.y; // index of batch element.
Expand Down Expand Up @@ -230,12 +230,12 @@ at::Tensor NearestNeighborIdxCuda(at::Tensor p1, at::Tensor p2) {
// Use the specialized kernel for D=3.
AT_DISPATCH_FLOATING_TYPES(p1.type(), "nearest_neighbor_v3_cuda", ([&] {
size_t shared_size = threads * sizeof(size_t) +
threads * sizeof(long);
threads * sizeof(int64_t);
NearestNeighborKernelD3<scalar_t>
<<<blocks, threads, shared_size>>>(
p1.data_ptr<scalar_t>(),
p2.data_ptr<scalar_t>(),
idx.data_ptr<long>(),
idx.data_ptr<int64_t>(),
N,
P1,
P2);
Expand All @@ -248,11 +248,11 @@ at::Tensor NearestNeighborIdxCuda(at::Tensor p1, at::Tensor p2) {
// need to be rounded to the next even size.
size_t D_2 = D + (D % 2);
size_t shared_size = (D_2 + threads) * sizeof(size_t);
shared_size += threads * sizeof(long);
shared_size += threads * sizeof(int64_t);
NearestNeighborKernel<scalar_t><<<blocks, threads, shared_size>>>(
p1.data_ptr<scalar_t>(),
p2.data_ptr<scalar_t>(),
idx.data_ptr<long>(),
idx.data_ptr<int64_t>(),
N,
P1,
P2,
Expand Down
6 changes: 5 additions & 1 deletion pytorch3d/csrc/rasterize_meshes/geometry_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@
#include "float_math.cuh"

// Set epsilon for preventing floating point errors and division by 0.
#ifdef _MSC_VER
#define kEpsilon 1e-30f
#else
const auto kEpsilon = 1e-30;
#endif

// Determines whether a point p is on the right side of a 2D line segment
// given by the end points v0, v1.
Expand Down Expand Up @@ -93,7 +97,7 @@ BarycentricCoordsBackward(
const float2& v2,
const float3& grad_bary_upstream) {
const float area = EdgeFunctionForward(v2, v0, v1) + kEpsilon;
const float area2 = pow(area, 2.0);
const float area2 = pow(area, 2.0f);
const float e0 = EdgeFunctionForward(p, v1, v2);
const float e1 = EdgeFunctionForward(p, v2, v0);
const float e2 = EdgeFunctionForward(p, v0, v1);
Expand Down
6 changes: 3 additions & 3 deletions pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// Given a pixel coordinate 0 <= i < S, convert it to a normalized device
// coordinate in the range [-1, 1]. The NDC range is divided into S evenly-sized
// pixels, and assume that each pixel falls in the *center* of its range.
inline float PixToNdc(const int i, const int S) {
static float PixToNdc(const int i, const int S) {
// NDC x-offset + (i * pixel_width + half_pixel_width)
return -1 + (2 * i + 1.0f) / S;
}
Expand Down Expand Up @@ -74,7 +74,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaiveCpu(
return std::make_tuple(point_idxs, zbuf, pix_dists);
}

std::tuple<torch::Tensor, torch::Tensor> RasterizePointsCoarseCpu(
torch::Tensor RasterizePointsCoarseCpu(
const torch::Tensor& points,
const int image_size,
const float radius,
Expand Down Expand Up @@ -140,7 +140,7 @@ std::tuple<torch::Tensor, torch::Tensor> RasterizePointsCoarseCpu(
bin_y_max = bin_y_min + bin_width;
}
}
return std::make_tuple(points_per_bin, bin_points);
return bin_points;
}

torch::Tensor RasterizePointsBackwardCpu(
Expand Down