Skip to content

Commit

Permalink
Optimize TensorList resizing.
Browse files Browse the repository at this point in the history
Signed-off-by: Michal Zientkiewicz <michalz@nvidia.com>
  • Loading branch information
mzient committed Sep 18, 2024
1 parent 94f02ad commit 7610ac0
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 115 deletions.
4 changes: 2 additions & 2 deletions dali/pipeline/data/buffer.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,7 +63,7 @@ DLL_PUBLIC shared_ptr<uint8_t> AllocBuffer(size_t bytes, bool pinned,
}

DLL_PUBLIC bool RestrictPinnedMemUsage() {
static bool val = []() {
static const bool val = []() {
const char *env = getenv("DALI_RESTRICT_PINNED_MEM");
return env && atoi(env);
}();
Expand Down
17 changes: 10 additions & 7 deletions dali/pipeline/data/buffer.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -302,7 +302,7 @@ class DLL_PUBLIC Buffer {
return !!data_;
}

std::shared_ptr<void> get_data_ptr() const {
const std::shared_ptr<void> &get_data_ptr() const {
return data_;
}

Expand Down Expand Up @@ -549,7 +549,7 @@ class DLL_PUBLIC Buffer {
*
* @remark If order is empty, current order is used.
*/
inline void set_backing_allocation(const shared_ptr<void> &ptr, size_t bytes, bool pinned,
inline void set_backing_allocation(shared_ptr<void> ptr, size_t bytes, bool pinned,
DALIDataType type, size_t size, int device_id,
AccessOrder order = {}) {
if (!same_managed_object(data_, ptr))
Expand All @@ -562,7 +562,7 @@ class DLL_PUBLIC Buffer {

// Fill the remaining members in the order as they appear in class.
type_ = TypeTable::GetTypeInfo(type);
data_ = ptr;
data_ = std::move(ptr);
allocate_ = {};
size_ = size;
shares_data_ = data_ != nullptr;
Expand Down Expand Up @@ -674,7 +674,10 @@ class DLL_PUBLIC Buffer {
static double growth_factor_;
static double shrink_threshold_;

static bool default_pinned();
static bool default_pinned() {
static const bool pinned = !RestrictPinnedMemUsage();
return pinned;
}

TypeInfo type_ = {}; // Data type of underlying storage
shared_ptr<void> data_ = nullptr; // Pointer to underlying storage
Expand All @@ -683,8 +686,8 @@ class DLL_PUBLIC Buffer {
size_t num_bytes_ = 0; // To keep track of the true size of the underlying allocation
int device_ = CPU_ONLY_DEVICE_ID; // device the buffer was allocated on
AccessOrder order_ = AccessOrder::host(); // The order of memory access (host or device)
bool shares_data_ = false; // Whether we aren't using our own allocation
bool pinned_ = !RestrictPinnedMemUsage(); // Whether the allocation uses pinned memory
bool shares_data_ = false; // Whether we aren't using our own allocation
bool pinned_ = default_pinned(); // Whether the allocation uses pinned memory
};

template <typename Backend>
Expand Down
68 changes: 37 additions & 31 deletions dali/pipeline/data/tensor.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2017-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,7 +44,6 @@ class Tensor : public Buffer<Backend> {
inline Tensor() {}
inline ~Tensor() override = default;


/**
*
* @brief For tensor T of shape (s_0, s_1, ..., s_{n-1}) returns a n-1 dimensional tensor T'
Expand Down Expand Up @@ -226,35 +225,9 @@ class Tensor : public Buffer<Backend> {
* individually. The device_id describes the location of the memory and the order can describe
* the dependency on the work that is happening on another device.
*/
inline void ShareData(const shared_ptr<void> &ptr, size_t bytes, bool pinned,
const TensorShape<> &shape, DALIDataType type, int device_id,
AccessOrder order = {}) {
Index new_size = volume(shape);
DALI_ENFORCE(new_size == 0 || type != DALI_NO_TYPE,
"Only empty tensors can be shared without specifying a type.");

// Free the underlying storage.
if (!same_managed_object(data_, ptr))
free_storage();

// Set the new order, if provided.
if (order)
this->set_order(order);

// Save our new pointer and bytes. Reset our type, shape, and size
type_ = TypeTable::GetTypeInfo(type);
data_ = ptr;
size_ = new_size;
num_bytes_ = bytes;
device_ = device_id;

// If the input pointer stores a non-zero size allocation, mark
// that we are sharing our underlying data
shares_data_ = num_bytes_ > 0 ? true : false;
pinned_ = pinned;

shape_ = shape;
}
void ShareData(shared_ptr<void> ptr, size_t bytes, bool pinned,
const TensorShape<> &shape, DALIDataType type, int device_id,
AccessOrder order = {});

/**
* @brief Interprets a raw allocation as a tensor with given shape.
Expand Down Expand Up @@ -460,6 +433,39 @@ class Tensor : public Buffer<Backend> {
friend class TensorList;
};


template <typename Backend>
void Tensor<Backend>::ShareData(shared_ptr<void> ptr, size_t bytes, bool pinned,
const TensorShape<> &shape, DALIDataType type, int device_id,
AccessOrder order) {
Index new_size = volume(shape);
DALI_ENFORCE(new_size == 0 || type != DALI_NO_TYPE,
"Only empty tensors can be shared without specifying a type.");

// Free the underlying storage.
if (!same_managed_object(data_, ptr))
free_storage();

// Set the new order, if provided.
if (order)
this->set_order(order);

// Save our new pointer and bytes. Reset our type, shape, and size
type_ = TypeTable::GetTypeInfo(type);
data_ = std::move(ptr);
size_ = new_size;
num_bytes_ = bytes;
device_ = device_id;

// If the input pointer stores a non-zero size allocation, mark
// that we are sharing our underlying data
shares_data_ = num_bytes_ > 0 ? true : false;
pinned_ = pinned;

shape_ = shape;
}


} // namespace dali

#endif // DALI_PIPELINE_DATA_TENSOR_H_
75 changes: 13 additions & 62 deletions dali/pipeline/data/tensor_list.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -301,7 +301,7 @@ void TensorList<Backend>::SetSample(int sample_idx, const Tensor<Backend> &owner


template <typename Backend>
void TensorList<Backend>::SetSample(int sample_idx, const shared_ptr<void> &ptr, size_t bytes,
void TensorList<Backend>::SetSample(int sample_idx, shared_ptr<void> ptr, size_t bytes,
bool pinned, const TensorShape<> &shape, DALIDataType type,
int device_id, AccessOrder order, const TensorLayout &layout) {
// Bounds check
Expand All @@ -316,7 +316,7 @@ void TensorList<Backend>::SetSample(int sample_idx, const shared_ptr<void> &ptr,

// Setting a new share overwrites the previous one - so we can safely assume that even if
// we had a sample sharing into TL, it will be overwritten
tensors_[sample_idx].ShareData(ptr, bytes, pinned, shape, type, device_id, order);
tensors_[sample_idx].ShareData(std::move(ptr), bytes, pinned, shape, type, device_id, order);
// As the order was simply copied over, we have to fix it back.
// We will be accessing it in order of this buffer, so we need to wait for all the work
// from the "incoming" src order.
Expand Down Expand Up @@ -460,13 +460,6 @@ std::vector<size_t> TensorList<Backend>::_chunks_capacity() const {
return result;
}


template <typename Backend>
const TensorListShape<> &TensorList<Backend>::shape() const & {
return shape_;
}


template <typename Backend>
void TensorList<Backend>::set_order(AccessOrder order, bool synchronize) {
DALI_ENFORCE(order, "Resetting order to an empty one is not supported");
Expand Down Expand Up @@ -529,6 +522,7 @@ void TensorList<Backend>::Resize(const TensorListShape<> &new_shape, DALIDataTyp
if (old_size < new_shape.num_samples()) {
tensors_.resize(new_shape.num_samples());
}

for (int i = old_size; i < new_shape.num_samples(); i++) {
setup_tensor_allocation(i);
}
Expand Down Expand Up @@ -575,6 +569,7 @@ void TensorList<Backend>::Resize(const TensorListShape<> &new_shape, DALIDataTyp
for (int i = 0; i < curr_num_tensors_; i++) {
tensors_[i].Resize(new_shape[i], new_type);
}

if (curr_num_tensors_ > 0) {
order_ = tensors_[0].order();
device_ = tensors_[0].device_id();
Expand Down Expand Up @@ -629,19 +624,6 @@ void TensorList<Backend>::set_type(DALIDataType new_type_id) {
}
}


template <typename Backend>
DALIDataType TensorList<Backend>::type() const {
return type_.id();
}


template <typename Backend>
const TypeInfo &TensorList<Backend>::type_info() const {
return type_;
}


template <typename Backend>
void TensorList<Backend>::SetLayout(const TensorLayout &layout) {
for (auto &t : tensors_) {
Expand All @@ -662,13 +644,6 @@ void TensorList<Backend>::SetSourceInfo(int idx, const std::string &source_info)
tensors_[idx].SetSourceInfo(source_info);
}


template <typename Backend>
TensorLayout TensorList<Backend>::GetLayout() const {
return layout_;
}


template <typename Backend>
const DALIMeta &TensorList<Backend>::GetMeta(int idx) const {
assert(idx < curr_num_tensors_);
Expand All @@ -695,13 +670,6 @@ void TensorList<Backend>::set_pinned(bool pinned) {
pinned_ = pinned;
}


template <typename Backend>
bool TensorList<Backend>::is_pinned() const {
return pinned_;
}


template <typename Backend>
void TensorList<Backend>::set_device_id(int device_id) {
contiguous_buffer_.set_device_id(device_id);
Expand All @@ -711,13 +679,6 @@ void TensorList<Backend>::set_device_id(int device_id) {
device_ = device_id;
}


template <typename Backend>
int TensorList<Backend>::device_id() const {
return device_;
}


template <typename Backend>
void TensorList<Backend>::reserve(size_t total_bytes) {
int batch_size_bkp = curr_num_tensors_;
Expand All @@ -744,30 +705,18 @@ void TensorList<Backend>::reserve(size_t bytes_per_sample, int batch_size) {
}
}


template <typename Backend>
bool TensorList<Backend>::IsContiguous() const noexcept {
return state_.IsContiguous();
}


template <typename Backend>
BatchContiguity TensorList<Backend>::GetContiguity() const noexcept {
return state_.Get();
}


template <typename Backend>
void TensorList<Backend>::recreate_views() {
// precondition: type, shape are configured
uint8_t *sample_ptr = static_cast<uint8_t *>(contiguous_buffer_.raw_mutable_data());
int64_t num_samples = shape().num_samples();
auto &data_ptr = contiguous_buffer_.get_data_ptr();
for (int64_t i = 0; i < num_samples; i++) {
// or any other way
auto tensor_size = shape().tensor_size(i);

std::shared_ptr<void> sample_alias(contiguous_buffer_.get_data_ptr(), sample_ptr);
tensors_[i].ShareData(sample_alias, tensor_size * type_info().size(), is_pinned(), shape()[i],
tensors_[i].ShareData(std::shared_ptr<void>(data_ptr, sample_ptr),
tensor_size * type_info().size(), is_pinned(), shape()[i],
type(), device_id(), order());
tensors_[i].SetLayout(GetLayout());
sample_ptr += tensor_size * type_info().size();
Expand Down Expand Up @@ -996,7 +945,8 @@ Tensor<Backend> TensorList<Backend>::AsReshapedTensor(const TensorShape<> &new_s
ptr = nullptr;
}

result.ShareData(ptr, capacity(), is_pinned(), new_shape, type(), device_id(), order());
result.ShareData(std::move(ptr), capacity(), is_pinned(),
new_shape, type(), device_id(), order());

auto result_layout = GetLayout();
if (result_layout.ndim() + 1 == new_shape.sample_dim()) {
Expand All @@ -1022,10 +972,11 @@ Tensor<Backend> TensorList<Backend>::AsTensor() {


template <typename Backend>
void TensorList<Backend>::ShareData(const shared_ptr<void> &ptr, size_t bytes, bool pinned,
void TensorList<Backend>::ShareData(shared_ptr<void> ptr, size_t bytes, bool pinned,
const TensorListShape<> &shape, DALIDataType type,
int device_id, AccessOrder order, const TensorLayout &layout) {
contiguous_buffer_.set_backing_allocation(ptr, bytes, pinned, type, shape.num_elements(),
contiguous_buffer_.set_backing_allocation(std::move(ptr), bytes, pinned,
type, shape.num_elements(),
device_id, order);
buffer_bkp_.reset();
tensors_.clear();
Expand Down
Loading

0 comments on commit 7610ac0

Please sign in to comment.