Skip to content

Commit

Permalink
Merge pull request #24 from harrism/fix-derive-trajectory
Browse files Browse the repository at this point in the history
[REVIEW] Bug fixes and gtests for derive_trajectories()
  • Loading branch information
thomcom authored Sep 5, 2019
2 parents 2670f4d + 3751a64 commit a5bfbe7
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 216 deletions.
6 changes: 5 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
## New Features

- PR #7 Initial code
- PR #18 Python initial unit tests and bindings

## Improvements

## Bug Fixes

- PR #16 `cuspatial::subset_trajectory_id` test improvements and bug fixes
- PR #16 `cuspatial::subset_trajectory_id()` test improvements and bug fixes
- PR #17 Update issue / PR templates
- PR #23 Fix cudf Cython imports
- PR #24 `cuspatial::derive_trajectories()` test improvements and bug fixes
8 changes: 5 additions & 3 deletions cpp/include/cuspatial/trajectory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ namespace cuspatial {
*
* @return number of derived trajectories
*/
int derive_trajectories(gdf_column& x, gdf_column& y, gdf_column& object_id,
gdf_column& timestamp, gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset);
gdf_size_type derive_trajectories(const gdf_column& x, const gdf_column& y,
const gdf_column& object_id,
const gdf_column& timestamp,
gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset);


/**
Expand Down
112 changes: 58 additions & 54 deletions cpp/src/trajectory/derive_trajectories.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,62 +34,63 @@ struct derive_trajectories_functor {
}

template <typename T, std::enable_if_t< is_supported<T>() >* = nullptr>
int operator()(gdf_column& x, gdf_column& y, gdf_column& object_id,
gdf_column& timestamp, gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset)
gdf_size_type operator()(const gdf_column& x, const gdf_column& y,
const gdf_column& object_id,
const gdf_column& timestamp,
gdf_column& trajectory_id,
gdf_column& length,
gdf_column& offset)
{
T* x_ptr = static_cast<T*>(x.data);
T* y_ptr = static_cast<T*>(y.data);
uint32_t* id_ptr = static_cast<uint32_t*>(object_id.data);
cuspatial::its_timestamp * time_ptr =
static_cast<cuspatial::its_timestamp*>(timestamp.data);
int32_t* id_ptr = static_cast<int32_t*>(object_id.data);
cudf::timestamp * time_ptr =
static_cast<cudf::timestamp*>(timestamp.data);

cudaStream_t stream{0};
auto exec_policy = rmm::exec_policy(stream)->on(stream);

uint32_t num_rec = object_id.size;
gdf_size_type num_rec = object_id.size;
thrust::stable_sort_by_key(exec_policy, time_ptr, time_ptr + num_rec,
thrust::make_zip_iterator(thrust::make_tuple(id_ptr, x_ptr, y_ptr)));
thrust::stable_sort_by_key(exec_policy, id_ptr, id_ptr+num_rec,
thrust::make_zip_iterator(thrust::make_tuple(time_ptr, x_ptr, y_ptr)));

//allocate sufficient memory to hold id, cnt and pos before reduce_by_key
uint32_t *objcnt{nullptr};
uint32_t *objid{nullptr};
RMM_TRY( RMM_ALLOC(&objcnt, num_rec * sizeof(uint32_t), 0) );
RMM_TRY( RMM_ALLOC(&objid, num_rec * sizeof(uint32_t), 0) );

int num_traj =
thrust::reduce_by_key(exec_policy, id_ptr, id_ptr + num_rec,
thrust::constant_iterator<int>(1),
objid, objcnt).second - objcnt;

//allocate just enough memory (num_traj), copy over and then free large (num_rec) arrays
uint32_t *trajid{nullptr};
uint32_t *trajcnt{nullptr};
uint32_t *trajpos{nullptr};
RMM_TRY( RMM_ALLOC(&trajid, num_traj * sizeof(uint32_t), 0) );
RMM_TRY( RMM_ALLOC(&trajcnt, num_traj * sizeof(uint32_t), 0) );
RMM_TRY( RMM_ALLOC(&trajpos, num_traj * sizeof(uint32_t), 0) );

thrust::copy(exec_policy, objid, objid + num_traj, trajid);
thrust::copy(exec_policy, objcnt, objcnt + num_traj, trajcnt);
thrust::inclusive_scan(exec_policy, trajcnt, trajcnt + num_traj, trajpos);

RMM_TRY( RMM_FREE(objid, 0) );
RMM_TRY( RMM_FREE(objcnt, 0) );

gdf_column_view(&trajectory_id, trajid, nullptr, num_traj, GDF_INT32);
gdf_column_view(&length, trajcnt, nullptr, num_traj, GDF_INT32);
gdf_column_view(&offset, trajpos, nullptr, num_traj, GDF_INT32);
rmm::device_vector<gdf_size_type> obj_count(num_rec);
rmm::device_vector<gdf_size_type> obj_id(num_rec);

auto end = thrust::reduce_by_key(exec_policy, id_ptr, id_ptr + num_rec,
thrust::constant_iterator<int>(1),
obj_id.begin(),
obj_count.begin());
gdf_size_type num_traj = end.second - obj_count.begin();

gdf_size_type* traj_id{nullptr};
gdf_size_type* traj_count{nullptr};
gdf_size_type* traj_pos{nullptr};
RMM_TRY( RMM_ALLOC(&traj_id, num_traj * sizeof(gdf_size_type), 0) );
RMM_TRY( RMM_ALLOC(&traj_count, num_traj * sizeof(gdf_size_type), 0) );
RMM_TRY( RMM_ALLOC(&traj_pos, num_traj * sizeof(gdf_size_type), 0) );

thrust::copy_n(exec_policy, obj_id.begin(), num_traj, traj_id);
thrust::copy_n(exec_policy, obj_count.begin(), num_traj, traj_count);
thrust::inclusive_scan(exec_policy, traj_count, traj_count +num_traj,
traj_pos);

gdf_column_view(&trajectory_id, traj_id, nullptr, num_traj, GDF_INT32);
gdf_column_view(&length, traj_count, nullptr, num_traj, GDF_INT32);
gdf_column_view(&offset, traj_pos, nullptr, num_traj, GDF_INT32);

return num_traj;
}

template <typename T, std::enable_if_t< !is_supported<T>() >* = nullptr>
int operator()(gdf_column& x, gdf_column& y, gdf_column& object_id,
gdf_column& timestamp, gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset)
gdf_size_type operator()(const gdf_column& x, const gdf_column& y,
const gdf_column& object_id,
const gdf_column& timestamp,
gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset)
{
CUDF_FAIL("Non-floating point operation is not supported");
}
Expand All @@ -105,30 +106,33 @@ namespace cuspatial {
* object IDs by first sorting based on id and timestamp and then group by id.
* see trajectory.hpp
*/
int derive_trajectories(gdf_column& x, gdf_column& y, gdf_column& object_id,
gdf_column& timestamp, gdf_column& trajectory_id,
gdf_column& length, gdf_column& offset)
{

gdf_size_type derive_trajectories(const gdf_column& x, const gdf_column& y,
const gdf_column& object_id,
const gdf_column& timestamp,
gdf_column& trajectory_id,
gdf_column& length,
gdf_column& offset)
{
CUDF_EXPECTS(x.data != nullptr && y.data != nullptr &&
object_id.data != nullptr && timestamp.data != nullptr,
"x/y/object_id/timetamp data cannot be null");
"Null input data");
CUDF_EXPECTS(x.size == y.size && x.size == object_id.size &&
x.size == timestamp.size ,
"x/y/object_id/timestamp must have equal size");

// future versions might allow x/y/object_id/timestamp to have null_count > 0,
// which might be useful for taking query results as inputs
"Data size mismatch");
CUDF_EXPECTS(object_id.dtype == GDF_INT32,
"Invalid trajectory ID datatype");
CUDF_EXPECTS(timestamp.dtype == GDF_TIMESTAMP,
"Invalid timestamp datatype");
CUDF_EXPECTS(x.null_count == 0 && y.null_count == 0 &&
object_id.null_count==0 && timestamp.null_count==0,
"NULL support unimplemented");
int num_trajectories = cudf::type_dispatcher(x.dtype,
derive_trajectories_functor(),
x, y, object_id, timestamp,
trajectory_id, length, offset);

gdf_size_type num_trajectories =
cudf::type_dispatcher(x.dtype, derive_trajectories_functor(),
x, y, object_id, timestamp,
trajectory_id, length, offset);

return num_trajectories;
}

}// namespace cuspatial
15 changes: 5 additions & 10 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,26 +106,21 @@ set(POINT_IN_POLYGON_TEST_SRC
ConfigureTest(POINT_IN_POLYGON_TEST "${POINT_IN_POLYGON_TEST_SRC}")

set(SPATIAL_WINDOW_POINT_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/query/spatial_window_test_toy.cu"
)
"${CMAKE_CURRENT_SOURCE_DIR}/query/spatial_window_test_toy.cu")
ConfigureTest(SPATIAL_WINDOW_POINT_TEST "${SPATIAL_WINDOW_POINT_TEST_SRC}")

set(TRAJECTORY_DISTANCE_SPEED_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_distance_speed_toy.cu"
)
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_distance_speed_toy.cu")
ConfigureTest(TRAJECTORY_DISTANCE_SPEED_TEST "${TRAJECTORY_DISTANCE_SPEED_TEST_SRC}")

set(TRAJECTORY_DERIVE_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_derive_toy.cu"
)
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_derive.cu")
ConfigureTest(TRAJECTORY_DERIVE_TEST "${TRAJECTORY_DERIVE_TEST_SRC}")

set(TRAJECTORY_SPATIAL_BOUND_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_spatial_bounds_toy.cu"
)
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_spatial_bounds_toy.cu")
ConfigureTest(TRAJECTORY_SPATIAL_BOUND_TEST "${TRAJECTORY_SPATIAL_BOUND_SRC}")

set(TRAJECTORY_SUBSET_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_subset.cu"
)
"${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_subset.cu")
ConfigureTest(TRAJECTORY_SUBSET_TEST "${TRAJECTORY_SUBSET_TEST_SRC}")
160 changes: 160 additions & 0 deletions cpp/tests/trajectory/test_trajectory_derive.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <vector>
#include <gtest/gtest.h>

#include <tests/utilities/cudf_test_fixtures.h>
#include <tests/utilities/column_wrapper.cuh>

#include <cuspatial/trajectory.hpp>

struct TrajectoryDerive : public GdfTest
{
};

template <typename T>
using wrapper = cudf::test::column_wrapper<T>;

constexpr gdf_size_type column_size{1000};

TEST_F(TrajectoryDerive, DeriveThree)
{
std::vector<int32_t> sequence(column_size);
std::iota(sequence.begin(), sequence.end(), 0);

//three sorted trajectories: one with 2/3 of the points, two with 1/6
std::vector<int32_t> id_vector(column_size);
std::transform(sequence.cbegin(), sequence.cend(), id_vector.begin(),
[](int32_t i) {
return (i < 2 * column_size / 3) ? 0 :
(i < 5 * column_size / 6) ? 1 : 2;
});

// timestamp milliseconds
std::vector<int64_t> ms_vector(sequence.begin(), sequence.end());

//randomize sequence
std::seed_seq seed{0};
std::mt19937 g(seed);

std::shuffle(sequence.begin(), sequence.end(), g);

wrapper<double> in_x(column_size,
[&](gdf_index_type i) { return static_cast<double>(sequence[i]); });
wrapper<double> in_y(column_size,
[&](gdf_index_type i) { return static_cast<double>(sequence[i]); });
wrapper<int32_t> in_id(column_size,
[&](gdf_index_type i) { return id_vector[sequence[i]]; });
wrapper<cudf::timestamp> in_ts(column_size,
[&](gdf_index_type i) {
return static_cast<cudf::timestamp>(ms_vector[sequence[i]]);
});

gdf_column traj_id{}, traj_len{}, traj_offset{};

gdf_size_type num_traj{0};
EXPECT_NO_THROW(
num_traj = cuspatial::derive_trajectories(in_x, in_y, in_id, in_ts,
traj_id, traj_len,
traj_offset);
);

wrapper<gdf_size_type> expected_traj_id{0, 1, 2};
// need to round up
wrapper<gdf_size_type> expected_traj_len{2 * column_size / 3,
(column_size + 5) / 6,
(column_size + 5) / 6};
// for some reason offset is the inclusive scan (offset[i] is the end of
// trajectory[i] rather than the beginning)
wrapper<gdf_size_type> expected_traj_offset{2 * column_size / 3,
5 * column_size / 6,
column_size};

EXPECT_EQ(num_traj, 3);
EXPECT_TRUE(expected_traj_id == traj_id);
EXPECT_TRUE(expected_traj_len == traj_len);
EXPECT_TRUE(expected_traj_offset == traj_offset);
}

TEST_F(TrajectoryDerive, BadData)
{
gdf_column out_id, out_len, out_offset;

gdf_column bad_x, bad_y, bad_in_id, bad_timestamp;
gdf_column_view(&bad_x, 0, 0, 0, GDF_FLOAT64);
gdf_column_view(&bad_y, 0, 0, 0, GDF_FLOAT64);
gdf_column_view(&bad_in_id, 0, 0, 0, GDF_INT32);
gdf_column_view(&bad_timestamp, 0, 0, 0, GDF_TIMESTAMP);

// null pointers
CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y,
bad_in_id,
bad_timestamp,
out_id,
out_len,
out_offset),
"Null input data");

// size mismatch
bad_x.data = bad_y.data = bad_in_id.data = bad_timestamp.data =
reinterpret_cast<void*>(0x0badf00d);
bad_x.size = 10;
bad_y.size = 12; // mismatch
bad_in_id.size = 10;
bad_timestamp.size = 10;

CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y,
bad_in_id,
bad_timestamp,
out_id,
out_len,
out_offset),
"Data size mismatch");

// Invalid ID datatype
bad_y.size = 10;
bad_in_id.dtype = GDF_FLOAT32;

CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y,
bad_in_id,
bad_timestamp,
out_id,
out_len,
out_offset),
"Invalid trajectory ID datatype");

bad_in_id.dtype = GDF_INT32;
bad_timestamp.dtype = GDF_DATE32;

CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y,
bad_in_id,
bad_timestamp,
out_id,
out_len,
out_offset),
"Invalid timestamp datatype");

bad_timestamp.dtype = GDF_TIMESTAMP;
bad_x.null_count = 5;
CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y,
bad_in_id,
bad_timestamp,
out_id,
out_len,
out_offset),
"NULL support unimplemented");
}
Loading

0 comments on commit a5bfbe7

Please sign in to comment.