diff --git a/CHANGELOG.md b/CHANGELOG.md index d41a74b79..d2fa72de2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,13 @@ ## New Features - PR #7 Initial code +- PR #18 Python initial unit tests and bindings ## Improvements ## Bug Fixes -- PR #16 `cuspatial::subset_trajectory_id` test improvements and bug fixes +- PR #16 `cuspatial::subset_trajectory_id()` test improvements and bug fixes +- PR #17 Update issue / PR templates +- PR #23 Fix cudf Cython imports +- PR #24 `cuspatial::derive_trajectories()` test improvements and bug fixes diff --git a/cpp/include/cuspatial/trajectory.hpp b/cpp/include/cuspatial/trajectory.hpp index 018036ed9..1e3ae1775 100644 --- a/cpp/include/cuspatial/trajectory.hpp +++ b/cpp/include/cuspatial/trajectory.hpp @@ -40,9 +40,11 @@ namespace cuspatial { * * @return number of derived trajectories */ -int derive_trajectories(gdf_column& x, gdf_column& y, gdf_column& object_id, - gdf_column& timestamp, gdf_column& trajectory_id, - gdf_column& length, gdf_column& offset); +gdf_size_type derive_trajectories(const gdf_column& x, const gdf_column& y, + const gdf_column& object_id, + const gdf_column& timestamp, + gdf_column& trajectory_id, + gdf_column& length, gdf_column& offset); /** diff --git a/cpp/src/trajectory/derive_trajectories.cu b/cpp/src/trajectory/derive_trajectories.cu index f7ef89d14..51b03f2d6 100644 --- a/cpp/src/trajectory/derive_trajectories.cu +++ b/cpp/src/trajectory/derive_trajectories.cu @@ -34,62 +34,63 @@ struct derive_trajectories_functor { } template () >* = nullptr> - int operator()(gdf_column& x, gdf_column& y, gdf_column& object_id, - gdf_column& timestamp, gdf_column& trajectory_id, - gdf_column& length, gdf_column& offset) + gdf_size_type operator()(const gdf_column& x, const gdf_column& y, + const gdf_column& object_id, + const gdf_column& timestamp, + gdf_column& trajectory_id, + gdf_column& length, + gdf_column& offset) { T* x_ptr = static_cast(x.data); T* y_ptr = static_cast(y.data); - uint32_t* id_ptr = static_cast(object_id.data); - cuspatial::its_timestamp * time_ptr = - static_cast(timestamp.data); + int32_t* id_ptr = static_cast(object_id.data); + cudf::timestamp * time_ptr = + static_cast(timestamp.data); cudaStream_t stream{0}; auto exec_policy = rmm::exec_policy(stream)->on(stream); - uint32_t num_rec = object_id.size; + gdf_size_type num_rec = object_id.size; thrust::stable_sort_by_key(exec_policy, time_ptr, time_ptr + num_rec, thrust::make_zip_iterator(thrust::make_tuple(id_ptr, x_ptr, y_ptr))); thrust::stable_sort_by_key(exec_policy, id_ptr, id_ptr+num_rec, thrust::make_zip_iterator(thrust::make_tuple(time_ptr, x_ptr, y_ptr))); //allocate sufficient memory to hold id, cnt and pos before reduce_by_key - uint32_t *objcnt{nullptr}; - uint32_t *objid{nullptr}; - RMM_TRY( RMM_ALLOC(&objcnt, num_rec * sizeof(uint32_t), 0) ); - RMM_TRY( RMM_ALLOC(&objid, num_rec * sizeof(uint32_t), 0) ); - - int num_traj = - thrust::reduce_by_key(exec_policy, id_ptr, id_ptr + num_rec, - thrust::constant_iterator(1), - objid, objcnt).second - objcnt; - - //allocate just enough memory (num_traj), copy over and then free large (num_rec) arrays - uint32_t *trajid{nullptr}; - uint32_t *trajcnt{nullptr}; - uint32_t *trajpos{nullptr}; - RMM_TRY( RMM_ALLOC(&trajid, num_traj * sizeof(uint32_t), 0) ); - RMM_TRY( RMM_ALLOC(&trajcnt, num_traj * sizeof(uint32_t), 0) ); - RMM_TRY( RMM_ALLOC(&trajpos, num_traj * sizeof(uint32_t), 0) ); - - thrust::copy(exec_policy, objid, objid + num_traj, trajid); - thrust::copy(exec_policy, objcnt, objcnt + num_traj, trajcnt); - thrust::inclusive_scan(exec_policy, trajcnt, trajcnt + num_traj, trajpos); - - RMM_TRY( RMM_FREE(objid, 0) ); - RMM_TRY( RMM_FREE(objcnt, 0) ); - - gdf_column_view(&trajectory_id, trajid, nullptr, num_traj, GDF_INT32); - gdf_column_view(&length, trajcnt, nullptr, num_traj, GDF_INT32); - gdf_column_view(&offset, trajpos, nullptr, num_traj, GDF_INT32); + rmm::device_vector obj_count(num_rec); + rmm::device_vector obj_id(num_rec); + + auto end = thrust::reduce_by_key(exec_policy, id_ptr, id_ptr + num_rec, + thrust::constant_iterator(1), + obj_id.begin(), + obj_count.begin()); + gdf_size_type num_traj = end.second - obj_count.begin(); + + gdf_size_type* traj_id{nullptr}; + gdf_size_type* traj_count{nullptr}; + gdf_size_type* traj_pos{nullptr}; + RMM_TRY( RMM_ALLOC(&traj_id, num_traj * sizeof(gdf_size_type), 0) ); + RMM_TRY( RMM_ALLOC(&traj_count, num_traj * sizeof(gdf_size_type), 0) ); + RMM_TRY( RMM_ALLOC(&traj_pos, num_traj * sizeof(gdf_size_type), 0) ); + + thrust::copy_n(exec_policy, obj_id.begin(), num_traj, traj_id); + thrust::copy_n(exec_policy, obj_count.begin(), num_traj, traj_count); + thrust::inclusive_scan(exec_policy, traj_count, traj_count +num_traj, + traj_pos); + + gdf_column_view(&trajectory_id, traj_id, nullptr, num_traj, GDF_INT32); + gdf_column_view(&length, traj_count, nullptr, num_traj, GDF_INT32); + gdf_column_view(&offset, traj_pos, nullptr, num_traj, GDF_INT32); return num_traj; } template () >* = nullptr> - int operator()(gdf_column& x, gdf_column& y, gdf_column& object_id, - gdf_column& timestamp, gdf_column& trajectory_id, - gdf_column& length, gdf_column& offset) + gdf_size_type operator()(const gdf_column& x, const gdf_column& y, + const gdf_column& object_id, + const gdf_column& timestamp, + gdf_column& trajectory_id, + gdf_column& length, gdf_column& offset) { CUDF_FAIL("Non-floating point operation is not supported"); } @@ -105,30 +106,33 @@ namespace cuspatial { * object IDs by first sorting based on id and timestamp and then group by id. * see trajectory.hpp */ -int derive_trajectories(gdf_column& x, gdf_column& y, gdf_column& object_id, - gdf_column& timestamp, gdf_column& trajectory_id, - gdf_column& length, gdf_column& offset) -{ - +gdf_size_type derive_trajectories(const gdf_column& x, const gdf_column& y, + const gdf_column& object_id, + const gdf_column& timestamp, + gdf_column& trajectory_id, + gdf_column& length, + gdf_column& offset) +{ CUDF_EXPECTS(x.data != nullptr && y.data != nullptr && object_id.data != nullptr && timestamp.data != nullptr, - "x/y/object_id/timetamp data cannot be null"); + "Null input data"); CUDF_EXPECTS(x.size == y.size && x.size == object_id.size && x.size == timestamp.size , - "x/y/object_id/timestamp must have equal size"); - - // future versions might allow x/y/object_id/timestamp to have null_count > 0, - // which might be useful for taking query results as inputs + "Data size mismatch"); + CUDF_EXPECTS(object_id.dtype == GDF_INT32, + "Invalid trajectory ID datatype"); + CUDF_EXPECTS(timestamp.dtype == GDF_TIMESTAMP, + "Invalid timestamp datatype"); CUDF_EXPECTS(x.null_count == 0 && y.null_count == 0 && object_id.null_count==0 && timestamp.null_count==0, "NULL support unimplemented"); - - int num_trajectories = cudf::type_dispatcher(x.dtype, - derive_trajectories_functor(), - x, y, object_id, timestamp, - trajectory_id, length, offset); + + gdf_size_type num_trajectories = + cudf::type_dispatcher(x.dtype, derive_trajectories_functor(), + x, y, object_id, timestamp, + trajectory_id, length, offset); return num_trajectories; } - + }// namespace cuspatial diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 609dd8511..f8444845a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -106,26 +106,21 @@ set(POINT_IN_POLYGON_TEST_SRC ConfigureTest(POINT_IN_POLYGON_TEST "${POINT_IN_POLYGON_TEST_SRC}") set(SPATIAL_WINDOW_POINT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/query/spatial_window_test_toy.cu" - ) + "${CMAKE_CURRENT_SOURCE_DIR}/query/spatial_window_test_toy.cu") ConfigureTest(SPATIAL_WINDOW_POINT_TEST "${SPATIAL_WINDOW_POINT_TEST_SRC}") set(TRAJECTORY_DISTANCE_SPEED_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_distance_speed_toy.cu" - ) + "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_distance_speed_toy.cu") ConfigureTest(TRAJECTORY_DISTANCE_SPEED_TEST "${TRAJECTORY_DISTANCE_SPEED_TEST_SRC}") set(TRAJECTORY_DERIVE_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_derive_toy.cu" - ) + "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_derive.cu") ConfigureTest(TRAJECTORY_DERIVE_TEST "${TRAJECTORY_DERIVE_TEST_SRC}") set(TRAJECTORY_SPATIAL_BOUND_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_spatial_bounds_toy.cu" - ) + "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/trajectory_spatial_bounds_toy.cu") ConfigureTest(TRAJECTORY_SPATIAL_BOUND_TEST "${TRAJECTORY_SPATIAL_BOUND_SRC}") set(TRAJECTORY_SUBSET_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_subset.cu" - ) + "${CMAKE_CURRENT_SOURCE_DIR}/trajectory/test_trajectory_subset.cu") ConfigureTest(TRAJECTORY_SUBSET_TEST "${TRAJECTORY_SUBSET_TEST_SRC}") diff --git a/cpp/tests/trajectory/test_trajectory_derive.cu b/cpp/tests/trajectory/test_trajectory_derive.cu new file mode 100644 index 000000000..8f3559b76 --- /dev/null +++ b/cpp/tests/trajectory/test_trajectory_derive.cu @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +struct TrajectoryDerive : public GdfTest +{ +}; + +template +using wrapper = cudf::test::column_wrapper; + +constexpr gdf_size_type column_size{1000}; + +TEST_F(TrajectoryDerive, DeriveThree) +{ + std::vector sequence(column_size); + std::iota(sequence.begin(), sequence.end(), 0); + + //three sorted trajectories: one with 2/3 of the points, two with 1/6 + std::vector id_vector(column_size); + std::transform(sequence.cbegin(), sequence.cend(), id_vector.begin(), + [](int32_t i) { + return (i < 2 * column_size / 3) ? 0 : + (i < 5 * column_size / 6) ? 1 : 2; + }); + + // timestamp milliseconds + std::vector ms_vector(sequence.begin(), sequence.end()); + + //randomize sequence + std::seed_seq seed{0}; + std::mt19937 g(seed); + + std::shuffle(sequence.begin(), sequence.end(), g); + + wrapper in_x(column_size, + [&](gdf_index_type i) { return static_cast(sequence[i]); }); + wrapper in_y(column_size, + [&](gdf_index_type i) { return static_cast(sequence[i]); }); + wrapper in_id(column_size, + [&](gdf_index_type i) { return id_vector[sequence[i]]; }); + wrapper in_ts(column_size, + [&](gdf_index_type i) { + return static_cast(ms_vector[sequence[i]]); + }); + + gdf_column traj_id{}, traj_len{}, traj_offset{}; + + gdf_size_type num_traj{0}; + EXPECT_NO_THROW( + num_traj = cuspatial::derive_trajectories(in_x, in_y, in_id, in_ts, + traj_id, traj_len, + traj_offset); + ); + + wrapper expected_traj_id{0, 1, 2}; + // need to round up + wrapper expected_traj_len{2 * column_size / 3, + (column_size + 5) / 6, + (column_size + 5) / 6}; + // for some reason offset is the inclusive scan (offset[i] is the end of + // trajectory[i] rather than the beginning) + wrapper expected_traj_offset{2 * column_size / 3, + 5 * column_size / 6, + column_size}; + + EXPECT_EQ(num_traj, 3); + EXPECT_TRUE(expected_traj_id == traj_id); + EXPECT_TRUE(expected_traj_len == traj_len); + EXPECT_TRUE(expected_traj_offset == traj_offset); +} + +TEST_F(TrajectoryDerive, BadData) +{ + gdf_column out_id, out_len, out_offset; + + gdf_column bad_x, bad_y, bad_in_id, bad_timestamp; + gdf_column_view(&bad_x, 0, 0, 0, GDF_FLOAT64); + gdf_column_view(&bad_y, 0, 0, 0, GDF_FLOAT64); + gdf_column_view(&bad_in_id, 0, 0, 0, GDF_INT32); + gdf_column_view(&bad_timestamp, 0, 0, 0, GDF_TIMESTAMP); + + // null pointers + CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y, + bad_in_id, + bad_timestamp, + out_id, + out_len, + out_offset), + "Null input data"); + + // size mismatch + bad_x.data = bad_y.data = bad_in_id.data = bad_timestamp.data = + reinterpret_cast(0x0badf00d); + bad_x.size = 10; + bad_y.size = 12; // mismatch + bad_in_id.size = 10; + bad_timestamp.size = 10; + + CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y, + bad_in_id, + bad_timestamp, + out_id, + out_len, + out_offset), + "Data size mismatch"); + + // Invalid ID datatype + bad_y.size = 10; + bad_in_id.dtype = GDF_FLOAT32; + + CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y, + bad_in_id, + bad_timestamp, + out_id, + out_len, + out_offset), + "Invalid trajectory ID datatype"); + + bad_in_id.dtype = GDF_INT32; + bad_timestamp.dtype = GDF_DATE32; + + CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y, + bad_in_id, + bad_timestamp, + out_id, + out_len, + out_offset), + "Invalid timestamp datatype"); + + bad_timestamp.dtype = GDF_TIMESTAMP; + bad_x.null_count = 5; + CUDF_EXPECT_THROW_MESSAGE(cuspatial::derive_trajectories(bad_x, bad_y, + bad_in_id, + bad_timestamp, + out_id, + out_len, + out_offset), + "NULL support unimplemented"); +} \ No newline at end of file diff --git a/cpp/tests/trajectory/test_trajectory_subset.cu b/cpp/tests/trajectory/test_trajectory_subset.cu index d0ee21338..231fc725a 100644 --- a/cpp/tests/trajectory/test_trajectory_subset.cu +++ b/cpp/tests/trajectory/test_trajectory_subset.cu @@ -14,23 +14,15 @@ * limitations under the License. */ -#include -#include #include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include + +#include struct TrajectorySubsetTest : public GdfTest { @@ -49,8 +41,10 @@ void test_subset(std::vector ids_to_keep) //three sorted trajectories: one with 2/3 of the points, two with 1/6 std::vector id_vector(column_size); std::transform(sequence.cbegin(), sequence.cend(), id_vector.begin(), - [](int32_t i) { return (i < 2 * i / 3) ? 0 : - (i < 5 * i / 6) ? 1 : 2; }); + [](int32_t i) { + return (i < 2 * column_size / 3) ? 0 : + (i < 5 * column_size / 6) ? 1 : 2; + }); // timestamp milliseconds std::vector ms_vector(sequence.begin(), sequence.end()); @@ -79,13 +73,15 @@ void test_subset(std::vector ids_to_keep) std::sort(ids_to_keep.begin(), ids_to_keep.end()); std::vector expected_sequence(sequence.size()); - auto expected_size = + auto end = std::copy_if(sequence.begin(), sequence.end(), expected_sequence.begin(), [&](int32_t i) { return std::binary_search(ids_to_keep.begin(), ids_to_keep.end(), id_vector[i]); } - ) - expected_sequence.begin(); + ); + + gdf_size_type expected_size = end - expected_sequence.begin(); wrapper expected_x(expected_size, [&](gdf_index_type i) { return static_cast(expected_sequence[i]); }); @@ -127,8 +123,6 @@ TEST_F(TrajectorySubsetTest, SelectSome) TEST_F(TrajectorySubsetTest, BadData) { - //constexpr gdf_size_type column_size{1000}; - gdf_column out_x, out_y, out_id, out_timestamp; gdf_column bad_x, bad_y, bad_in_id, bad_timestamp, bad_id; diff --git a/cpp/tests/trajectory/trajectory_derive_toy.cu b/cpp/tests/trajectory/trajectory_derive_toy.cu deleted file mode 100644 index 2479817f6..000000000 --- a/cpp/tests/trajectory/trajectory_derive_toy.cu +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -struct TrajectoryDeriveToy : public GdfTest -{ -}; - -TEST_F(TrajectoryDeriveToy, trajectoryderivetest) -{ - //three sorted trajectories with 5,4,3 points, respectively - std::cout<<"in TrajectoryDeriveToy"< point_x_wrapp{rand_x}; - cudf::test::column_wrapper point_y_wrapp{rand_y}; - cudf::test::column_wrapper point_id_wrapp{rand_id}; - cudf::test::column_wrapper point_ts_wrapp{rand_ts}; - - gdf_column traj_id,traj_len,traj_offset; - memset(&traj_id,0,sizeof(gdf_column)); - memset(&traj_len,0,sizeof(gdf_column)); - memset(&traj_offset,0,sizeof(gdf_column)); - gdf_column pnt_x=*(point_x_wrapp.get()); - gdf_column pnt_y=*(point_y_wrapp.get()); - gdf_column pnt_id=*(point_id_wrapp.get()); - gdf_column pnt_ts=*(point_ts_wrapp.get()); - - std::cout<<"calling cuspatial::derive_trajectory"< pnt_x_ptr=thrust::device_pointer_cast(static_cast(pnt_x.data)); - thrust::device_ptr pnt_y_ptr=thrust::device_pointer_cast(static_cast(pnt_y.data)); - thrust::device_ptr pnt_id_ptr=thrust::device_pointer_cast(static_cast(pnt_id.data)); - thrust::device_ptr pnt_ts_ptr = - thrust::device_pointer_cast(static_cast(pnt_ts.data)); - - int num_print = (pnt_x.size<20)?pnt_x.size:20; - std::cout<<"x"<(std::cout, " "));std::cout<(std::cout, " "));std::cout<(std::cout, " "));std::cout<(std::cout, " "));std::cout< traj_id_ptr=thrust::device_pointer_cast(static_cast(traj_id.data)); - thrust::device_ptr traj_len_ptr=thrust::device_pointer_cast(static_cast(traj_len.data)); - thrust::device_ptr traj_offset_ptr=thrust::device_pointer_cast(static_cast(traj_offset.data)); - std::cout<<"ids of trajectories"<(std::cout, " "));std::cout<(std::cout, " "));std::cout<(std::cout, " "));std::cout<