Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ROIRandomCrop operator #2638

Merged
merged 6 commits into from
Feb 1, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions dali/operators/generic/roi_random_crop.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <random>
#include "dali/pipeline/operator/operator.h"
#include "dali/pipeline/util/batch_rng.h"
#include "dali/pipeline/operator/arg_helper.h"

namespace dali {

DALI_SCHEMA(ROIRandomCrop)
.DocStr(R"code(Produces a fixed shape cropping window, randomly placed so that as much of the
provided region of interest (ROI) is contained in it.
JanuszL marked this conversation as resolved.
Show resolved Hide resolved

If the ROI is bigger than the cropping window, the cropping window will be a subwindow of the ROI.
If the ROI is smaller than the cropping window, the whole ROI shall be contained in the cropping window.

If an input shape (``in_shape``) is given, the resulting cropping window is selected to be within the
bounds of that input shape. Alternatively, the input data subject to cropping can be passed to the operator,
in the operator. When providing an input shape, the region of interest should be within the bounds of the
input and the cropping window shape should not be larger than the input shape.

If no input shape is provided, the resulting cropping window is unbounded, potentially resulting in out
of bounds cropping.

The cropping window dimensions should be explicitly provided (``crop_shape``), and the ROI should be
either specified with ``roi_start``/``roi_end`` or ``roi_start``/``roi_shape``.

The operator produces an output representing the cropping window start coordinates.
)code")
.AddArg("crop_shape",
R"code(Cropping window dimensions.)code", DALI_INT_VEC, true)
.AddArg("roi_start",
R"code(ROI start coordinates.)code", DALI_INT_VEC, true)
.AddOptionalArg<std::vector<int>>("roi_end",
R"code(ROI end coordinates.

.. note::
Using ``roi_end`` is mutually exclusive with ``roi_shape``.
)code", nullptr, true)
.AddOptionalArg<std::vector<int>>("roi_shape",
R"code(ROI shape.
JanuszL marked this conversation as resolved.
Show resolved Hide resolved

.. note::
Using ``roi_shape`` is mutually exclusive with ``roi_end``.
)code", nullptr, true)
.AddOptionalArg<std::vector<int>>("in_shape",
R"code(Shape of the input data.

If provided, the cropping window start will be selected so that the cropping window is within the
bounds of the input.

..note::
Providing ``in_shape`` is incompatible with feeding the input data directly as a positional input.
)code", nullptr, true)
.NumInput(0, 1)
.NumOutput(1);

class ROIRandomCropCPU : public Operator<CPUBackend> {
public:
explicit ROIRandomCropCPU(const OpSpec &spec);
bool CanInferOutputs() const override { return true; }
bool SetupImpl(std::vector<OutputDesc> &output_desc, const workspace_t<CPUBackend> &ws) override;
void RunImpl(workspace_t<CPUBackend> &ws) override;

private:
BatchRNG<std::mt19937> rngs_;

ArgValue<int, 1> roi_start_;
ArgValue<int, 1> roi_end_;
ArgValue<int, 1> roi_shape_;
ArgValue<int, 1> crop_shape_;
ArgValue<int, 1> in_shape_arg_;

TensorListShape<> in_shape_;

USE_OPERATOR_MEMBERS();
};

ROIRandomCropCPU::ROIRandomCropCPU(const OpSpec &spec)
: Operator<CPUBackend>(spec),
rngs_(spec.GetArgument<int64_t>("seed"), spec.GetArgument<int64_t>("max_batch_size")),
roi_start_("roi_start", spec),
roi_end_("roi_end", spec),
roi_shape_("roi_shape", spec),
crop_shape_("crop_shape", spec),
in_shape_arg_("in_shape", spec) {
DALI_ENFORCE((roi_end_.IsDefined() + roi_shape_.IsDefined()) == 1,
"Either ROI end or ROI shape should be defined, but not both");
}

bool ROIRandomCropCPU::SetupImpl(std::vector<OutputDesc> &output_desc,
const workspace_t<CPUBackend> &ws) {
int nsamples = spec_.HasTensorArgument("crop_shape") ?
ws.ArgumentInput("crop_shape").size() :
ws.GetRequestedBatchSize(0);
crop_shape_.Acquire(spec_, ws, nsamples, true);
int ndim = crop_shape_[0].shape[0];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe you should check if crop_shape_ is uniform?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what the true in Acquire does

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't have to - each sample can be different.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the shape of a shape we are talking about. Uniformity in crop_shape means same number of dimensions


TensorShape<1> sh{ndim};
roi_start_.Acquire(spec_, ws, nsamples, sh);
if (roi_end_.IsDefined()) {
roi_end_.Acquire(spec_, ws, nsamples, sh);
} else {
assert(roi_shape_.IsDefined());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need that assert as you already have one check in the constructor?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't it one of the reasons to have asserts - to check for inconsistent internal state?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I put it there mostly for documentation, but also to double-check that the logic is sane

roi_shape_.Acquire(spec_, ws, nsamples, sh);
}

in_shape_.shapes.clear();
if (in_shape_arg_.IsDefined() || ws.NumInput() == 1) {
DALI_ENFORCE((in_shape_arg_.IsDefined() + (ws.NumInput() == 1)) == 1,
"``in_shape`` argument is incompatible with providing an input.");
if (in_shape_arg_.IsDefined()) {
in_shape_.resize(nsamples, ndim);
in_shape_arg_.Acquire(spec_, ws, nsamples, sh);
for (int s = 0; s < nsamples; s++) {
auto sample_sh = in_shape_.tensor_shape_span(s);
for (int d = 0; d < ndim; d++) {
sample_sh[d] = in_shape_arg_[s].data[d];
}
}
} else {
auto &in = ws.template InputRef<CPUBackend>(0);
in_shape_ = in.shape();
}
Comment on lines +124 to +136
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if no input nor in_shape_arg_ are provided?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing special, we just don't bound the cropping window to be within the image

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see. I missed that L121 condition covers the L138 loop as well.


for (int s = 0; s < nsamples; s++) {
auto sample_sh = in_shape_.tensor_shape_span(s);
for (int d = 0; d < ndim; d++) {
DALI_ENFORCE(sample_sh[d] >= 0,
make_string("Input shape can't be negative. Got ",
sample_sh[d], " for d=", d));
DALI_ENFORCE(crop_shape_[s].data[d] >= 0,
make_string("Crop shape can't be negative. Got ", crop_shape_[s].data[d],
" for d=", d));
DALI_ENFORCE(sample_sh[d] >= crop_shape_[s].data[d],
make_string("Cropping shape can't be bigger than the input shape. "
"Got: crop_shape[", crop_shape_[s].data[d], "] and sample_shape[",
sample_sh[d], "] for d=", d));
Comment on lines +147 to +150
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder what we should do in this case...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we should allow (with an extra parameter) to clip the cropping window if it's larger than input? Or, alternatively, keep out-of-bounds window (pad)? Just thinking aloud.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should complicate this even more unless we have a specific use in mind.

}
if (roi_shape_.IsDefined()) {
for (int d = 0; d < ndim; d++) {
auto roi_end = roi_start_[s].data[d] + roi_shape_[s].data[d];
DALI_ENFORCE(roi_start_[s].data[d] >= 0 && sample_sh[d] >= roi_end,
make_string("ROI can't be out of bounds. Got roi_start[",
roi_start_[s].data[d], "], roi_end[", roi_end,
"], sample_shape[", sample_sh[d], "], for d=", d));
}
} else {
for (int d = 0; d < ndim; d++) {
DALI_ENFORCE(roi_start_[s].data[d] >= 0 && sample_sh[d] >= roi_end_[s].data[d],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to bother with roi_end_ here. Maybe it should be just roi_start_ and roi_shape_, and values from roi_end should be used to infer roi_shape_ if not provided directly.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It'd involve extra storage, and the code accessing this storage would look different than the arguments directly provided (because those are tensor views). This is why I preferred to do it on the fly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's easier to check end than start+shape?

make_string("ROI can't be out of bounds. Got roi_start[",
roi_start_[s].data[d], "], roi_end[", roi_end_[s].data[d],
"], sample_shape[", sample_sh[d], "], for d=", d));
}
}
}
}

output_desc.resize(1);
output_desc[0].shape = uniform_list_shape(nsamples, sh);
output_desc[0].type = TypeTable::GetTypeInfo(DALI_INT64);
return true;
}

void ROIRandomCropCPU::RunImpl(workspace_t<CPUBackend> &ws) {
auto &out_crop_start = ws.template OutputRef<CPUBackend>(0);
auto crop_start = view<int64_t, 1>(out_crop_start);

int nsamples = crop_start.shape.size();
int ndim = crop_start[0].shape[0];

auto& thread_pool = ws.GetThreadPool();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove


for (int sample_idx = 0; sample_idx < nsamples; sample_idx++) {
int64_t* sample_sh = nullptr;
if (!in_shape_.empty())
sample_sh = in_shape_.tensor_shape_span(sample_idx).data();
for (int d = 0; d < ndim; d++) {
int64_t roi_extent = -1;
int64_t roi_start = roi_start_[sample_idx].data[d];
int64_t crop_extent = crop_shape_[sample_idx].data[d];

if (roi_end_.IsDefined()) {
roi_extent = roi_end_[sample_idx].data[d] - roi_start;
} else {
roi_extent = roi_shape_[sample_idx].data[d];
}

if (roi_extent == crop_extent) {
crop_start[sample_idx].data[d] = roi_start;
} else {
int64_t start_range[2] = {roi_start, roi_start + roi_extent - crop_extent};
if (start_range[0] > start_range[1])
std::swap(start_range[0], start_range[1]);

if (sample_sh) {
start_range[0] = std::max<int64_t>(0, start_range[0]);
start_range[1] = std::min<int64_t>(sample_sh[d] - crop_extent, start_range[1]);
}

auto dist = std::uniform_int_distribution<int64_t>(start_range[0], start_range[1]);
crop_start[sample_idx].data[d] = dist(rngs_[sample_idx]);
}
}
}
}

DALI_REGISTER_OPERATOR(ROIRandomCrop, ROIRandomCropCPU, CPU);

} // namespace dali
163 changes: 163 additions & 0 deletions dali/test/python/test_operator_roi_random_crop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import numpy as np
import nvidia.dali as dali
import nvidia.dali.fn as fn
import nvidia.dali.types as types
import nvidia.dali.math as math
from test_utils import check_batch, dali_type
import random
from nose.tools import assert_raises

np.random.seed(4321)

def random_shape(min_sh, max_sh, ndim):
return np.array(
[np.random.randint(min_sh, max_sh) for s in range(ndim)],
dtype=np.int32
)

def batch_gen(max_batch_size, sample_shape_fn, dtype=np.float32):
bs = np.random.randint(1, max_batch_size)
data = []
for i in range(bs):
sample_sh = sample_shape_fn()
data += [np.zeros(sample_sh, dtype=dtype)]
return data

def check_roi_random_crop(ndim=2, max_batch_size=16,
roi_min_start = 0, roi_max_start = 100,
roi_min_extent = 20, roi_max_extent = 50,
crop_min_extent = 20, crop_max_extent = 50,
in_shape_min = 400, in_shape_max = 500,
niter=3):
pipe = dali.pipeline.Pipeline(batch_size=max_batch_size, num_threads=4, device_id=0, seed=1234)
with pipe:
assert in_shape_min < in_shape_max
shape_gen_fn = lambda: random_shape(in_shape_min, in_shape_max, ndim)
data_gen_f = lambda: batch_gen(max_batch_size, shape_gen_fn)
shape_like_in = dali.fn.external_source(data_gen_f, device='cpu')
in_shape = dali.fn.shapes(shape_like_in, dtype=types.INT32)

crop_shape = [(crop_min_extent + crop_max_extent) // 2] * ndim if random.choice([True, False]) \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Numpy arrays are passed as argument inputs, so this still does not test non-tensor arguments. It must be a plain list.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right. I fixed that now

else fn.random.uniform(range=(crop_min_extent, crop_max_extent + 1),
shape=(ndim,), dtype=types.INT32, device='cpu')

roi_shape = np.array([(roi_min_extent + roi_max_extent) // 2] * ndim, dtype=np.int32) if random.choice([True, False]) \
else fn.random.uniform(range=(roi_min_extent, roi_max_extent + 1),
shape=(ndim,), dtype=types.INT32, device='cpu')
roi_start = np.array([(roi_min_start + roi_max_start) // 2] * ndim, dtype=np.int32) if random.choice([True, False]) \
else fn.random.uniform(range=(roi_min_start, roi_max_start + 1),
shape=(ndim,), dtype=types.INT32, device='cpu')
roi_end = roi_start + roi_shape
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this works, then it's by chance - if roi_start is a numpy.ndarray and roi_shape is a DataNode, this wouldn't work.


outs = [
fn.roi_random_crop(crop_shape=crop_shape,
roi_start=roi_start, roi_shape=roi_shape,
device='cpu'),
fn.roi_random_crop(crop_shape=crop_shape,
roi_start=roi_start, roi_end=roi_end,
device='cpu'),
fn.roi_random_crop(shape_like_in, crop_shape=crop_shape,
roi_start=roi_start, roi_shape=roi_shape,
device='cpu'),
fn.roi_random_crop(shape_like_in, crop_shape=crop_shape,
roi_start=roi_start, roi_end=roi_end,
device='cpu'),
fn.roi_random_crop(in_shape=in_shape, crop_shape=crop_shape,
roi_start=roi_start, roi_shape=roi_shape,
device='cpu'),
fn.roi_random_crop(in_shape=in_shape, crop_shape=crop_shape,
roi_start=roi_start, roi_end=roi_end,
device='cpu'),
]

outputs = [in_shape, roi_start, roi_shape, crop_shape, *outs]
pipe.set_outputs(*outputs)
pipe.build()
for _ in range(niter):
outputs = pipe.run()
batch_size = len(outputs[0])
for s in range(batch_size):
in_shape = np.array(outputs[0][s]).tolist()
roi_start = np.array(outputs[1][s]).tolist()
roi_shape = np.array(outputs[2][s]).tolist()
crop_shape = np.array(outputs[3][s]).tolist()

def check_crop_start(crop_start, roi_start, roi_shape, crop_shape, in_shape=None):
ndim = len(crop_start)
roi_end = [roi_start[d] + roi_shape[d] for d in range(ndim)]
crop_end = [crop_start[d] + crop_shape[d] for d in range(ndim)]
for d in range(ndim):
if in_shape is not None:
assert crop_start[d] >= 0
assert crop_end[d] <= in_shape[d]

if crop_shape[d] >= roi_shape[d]:
assert crop_start[d] <= roi_start[d]
assert crop_end[d] >= roi_end[d]
else:
assert crop_start[d] >= roi_start[d]
assert crop_end[d] <= roi_end[d]
for idx in range(4, 6):
check_crop_start(np.array(outputs[idx][s]).tolist(), roi_start, roi_shape, crop_shape)
for idx in range(6, 10):
check_crop_start(np.array(outputs[idx][s]).tolist(), roi_start, roi_shape, crop_shape, in_shape)

def test_roi_random_crop():
batch_size = 16
niter = 3
for ndim in (2, 3):
in_shape_min = 250
in_shape_max = 300
for roi_start_min, roi_start_max, roi_extent_min, roi_extent_max, \
crop_extent_min, crop_extent_max in \
[(20, 50, 10, 20, 30, 40),
(20, 50, 100, 140, 30, 40),
(0, 1, 10, 20, 80, 100)]:
yield check_roi_random_crop, ndim, batch_size, roi_start_min, roi_start_max, roi_extent_min, roi_extent_max, \
crop_extent_min, crop_extent_max, in_shape_min, in_shape_max, niter

def check_roi_random_crop_error(shape_like_in=None, in_shape=None, crop_shape=None, roi_start=None,
roi_shape=None, roi_end=None):
ndim = 2
batch_size = 3
niter = 3
pipe = dali.pipeline.Pipeline(batch_size=batch_size, num_threads=4, device_id=0, seed=1234)
with pipe:
inputs = [] if shape_like_in is None else [shape_like_in]
out = fn.roi_random_crop(*inputs,
in_shape=in_shape,
crop_shape=crop_shape,
roi_start=roi_start,
roi_shape=roi_shape,
roi_end=roi_end,
device='cpu')
pipe.set_outputs(out)
with assert_raises(RuntimeError):
pipe.build()
for _ in range(niter):
outputs = pipe.run()

def test_roi_random_crop_error_incompatible_args():
in_shape = np.array([4, 4])
crop_shape = np.array([2, 2])
roi_start = np.array([1, 1])
roi_shape = np.array([1, 1])
roi_end = np.array([2, 2])
yield check_roi_random_crop_error, np.zeros(in_shape), in_shape, crop_shape, roi_start, roi_shape, None
yield check_roi_random_crop_error, np.zeros(in_shape), None, crop_shape, roi_start, roi_shape, roi_end

def test_roi_random_crop_error_wrong_args():
in_shape = np.array([4, 4])
crop_shape = np.array([2, 2])
roi_start = np.array([1, 1])
roi_shape = np.array([1, 1])
roi_end = np.array([2, 2])
# Negative shape
yield check_roi_random_crop_error, None, np.array([-4, 4]), crop_shape, roi_start, roi_shape, None
yield check_roi_random_crop_error, None, in_shape, np.array([1, -1]), roi_start, roi_shape, None
# Out of bounds ROI
yield check_roi_random_crop_error, None, in_shape, crop_shape, np.array([-1, -1]), roi_shape, None
yield check_roi_random_crop_error, None, in_shape, crop_shape, roi_start, np.array([4, 4]), None
yield check_roi_random_crop_error, None, in_shape, crop_shape, roi_start, None, np.array([5, 5])
# Out of bounds crop
yield check_roi_random_crop_error, None, in_shape, np.array([10, 10]), roi_start, roi_shape, None