diff --git a/dali/benchmark/caffe2_alexnet_bench.cc b/dali/benchmark/caffe2_alexnet_bench.cc index 52e824f991d..cd43eb4b9c3 100755 --- a/dali/benchmark/caffe2_alexnet_bench.cc +++ b/dali/benchmark/caffe2_alexnet_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -127,7 +127,7 @@ BENCHMARK_DEFINE_F(C2Alexnet, Caffe2Pipe)(benchmark::State& st) { // NOLINT } } - WriteCHWBatch(ws.Output(0), 128, 1, "img"); + WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); @@ -258,7 +258,7 @@ BENCHMARK_DEFINE_F(C2Alexnet, HybridPipe)(benchmark::State& st) { // NOLINT } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/benchmark/caffe_alexnet_bench.cc b/dali/benchmark/caffe_alexnet_bench.cc index 7af2b9565db..d4369e71c26 100755 --- a/dali/benchmark/caffe_alexnet_bench.cc +++ b/dali/benchmark/caffe_alexnet_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -128,7 +128,7 @@ BENCHMARK_DEFINE_F(Alexnet, CaffePipe)(benchmark::State& st) { // NOLINT } } - WriteCHWBatch(ws.Output(0), 128, 1, "img"); + WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); @@ -259,7 +259,7 @@ BENCHMARK_DEFINE_F(Alexnet, HybridPipe)(benchmark::State& st) { // NOLINT } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/benchmark/decoder_bench.cc b/dali/benchmark/decoder_bench.cc index d44259f1fa7..c6e2294de6f 100644 --- a/dali/benchmark/decoder_bench.cc +++ b/dali/benchmark/decoder_bench.cc @@ -81,7 +81,7 @@ class DecoderBench : public DALIBenchmark { } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + 1; st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/benchmark/file_reader_alexnet_bench.cc b/dali/benchmark/file_reader_alexnet_bench.cc index a1cc6867a5c..e5989388b20 100755 --- a/dali/benchmark/file_reader_alexnet_bench.cc +++ b/dali/benchmark/file_reader_alexnet_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -128,7 +128,7 @@ BENCHMARK_DEFINE_F(FileReaderAlexnet, CaffePipe)(benchmark::State& st) { // NOLI } } - WriteCHWBatch(ws.Output(0), 128, 1, "img"); + WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/benchmark/resnet50_bench.cc b/dali/benchmark/resnet50_bench.cc index 12633d258d3..1bbf1c1ed07 100755 --- a/dali/benchmark/resnet50_bench.cc +++ b/dali/benchmark/resnet50_bench.cc @@ -131,7 +131,7 @@ BENCHMARK_DEFINE_F(RN50, C2Pipe)(benchmark::State& st) { // NOLINT } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); @@ -266,7 +266,7 @@ BENCHMARK_DEFINE_F(RN50, HybridPipe)(benchmark::State& st) { // NOLINT } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); @@ -379,7 +379,7 @@ BENCHMARK_DEFINE_F(RN50, nvJPEGPipe)(benchmark::State& st) { // NOLINT } } - // WriteCHWBatch(ws.Output(0), 128, 1, "img"); + // WriteCHWBatch(ws.OutputRef(0), 128, 1, "img"); int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, benchmark::Counter::kIsRate); diff --git a/dali/benchmark/resnet50_nvjpeg_bench.cc b/dali/benchmark/resnet50_nvjpeg_bench.cc index 893ce3eeb9f..b089be10e2f 100755 --- a/dali/benchmark/resnet50_nvjpeg_bench.cc +++ b/dali/benchmark/resnet50_nvjpeg_bench.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -112,7 +112,7 @@ BENCHMARK_DEFINE_F(RealRN50, nvjpegPipe)(benchmark::State& st) { // NOLINT } #if DALI_DEBUG - WriteHWCBatch(ws.Output(0), "img"); + WriteHWCBatch(ws.OutputRef(0), "img"); #endif int num_batches = st.iterations() + static_cast(pipelined); st.counters["FPS"] = benchmark::Counter(batch_size*num_batches, diff --git a/dali/c_api/c_api.cc b/dali/c_api/c_api.cc index 8902a3113f7..f1e0860ded7 100644 --- a/dali/c_api/c_api.cc +++ b/dali/c_api/c_api.cc @@ -318,9 +318,9 @@ void daliOutputRelease(daliPipelineHandle *pipe_handle) { int64_t daliOutputHasUniformShape(daliPipelineHandle* pipe_handle, int i) { dali::DeviceWorkspace* ws = reinterpret_cast(pipe_handle->ws); if (ws->OutputIsType(i)) { - return is_uniform(ws->Output(i).shape()); + return is_uniform(ws->OutputRef(i).shape()); } else { - return is_uniform(ws->Output(i).shape()); + return is_uniform(ws->OutputRef(i).shape()); } } @@ -328,7 +328,7 @@ template static int64_t *daliShapeAtHelper(dali::DeviceWorkspace *ws, int n, int k) { int64_t *c_shape = nullptr; std::vector shape; - const auto &out_tensor_list = ws->Output(n); + const auto &out_tensor_list = ws->OutputRef(n); if (k >= 0) { auto shape_span = out_tensor_list.tensor_shape_span(k); shape = std::vector(shape_span.begin(), shape_span.end()); @@ -366,7 +366,7 @@ int64_t* daliShapeAt(daliPipelineHandle* pipe_handle, int n) { template static dali_data_type_t daliTypeAtHelper(dali::DeviceWorkspace* ws, int n) { - const auto &out_tensor_list = ws->Output(n); + const auto &out_tensor_list = ws->OutputRef(n); auto type_id = out_tensor_list.type(); return static_cast(static_cast(type_id)); } @@ -383,7 +383,7 @@ dali_data_type_t daliTypeAt(daliPipelineHandle* pipe_handle, int n) { template static size_t daliNumTensorsHelper(dali::DeviceWorkspace* ws, int n) { - return ws->Output(n).num_samples(); + return ws->OutputRef(n).num_samples(); } size_t daliNumTensors(daliPipelineHandle* pipe_handle, int n) { @@ -397,7 +397,7 @@ size_t daliNumTensors(daliPipelineHandle* pipe_handle, int n) { template static size_t daliNumElementsHelper(dali::DeviceWorkspace* ws, int n) { - return ws->Output(n)._num_elements(); + return ws->OutputRef(n)._num_elements(); } size_t daliNumElements(daliPipelineHandle* pipe_handle, int n) { @@ -411,7 +411,7 @@ size_t daliNumElements(daliPipelineHandle* pipe_handle, int n) { template static size_t daliTensorSizeHelper(dali::DeviceWorkspace* ws, int n) { - return ws->Output(n).nbytes(); + return ws->OutputRef(n).nbytes(); } size_t daliTensorSize(daliPipelineHandle* pipe_handle, int n) { @@ -425,7 +425,7 @@ size_t daliTensorSize(daliPipelineHandle* pipe_handle, int n) { template static size_t daliMaxDimTensorsHelper(dali::DeviceWorkspace* ws, int n) { - const auto &out_tensor_list = ws->Output(n); + const auto &out_tensor_list = ws->OutputRef(n); size_t tensors_num = out_tensor_list.num_samples(); int max_num_dim = 0; for (size_t i = 0; i < tensors_num; ++i) { diff --git a/dali/c_api/c_api_test.cc b/dali/c_api/c_api_test.cc index 644e7974750..df83ef5c1f0 100644 --- a/dali/c_api/c_api_test.cc +++ b/dali/c_api/c_api_test.cc @@ -152,7 +152,7 @@ void ComparePipelinesOutputs(daliPipelineHandle &handle, Pipeline &baseline, EXPECT_EQ(daliNumTensors(&handle, output), batch_size); for (int elem = 0; elem < batch_size; elem++) { auto *shape = daliShapeAtSample(&handle, output, elem); - auto ref_shape = ws.Output(output).shape()[elem]; + auto ref_shape = ws.OutputRef(output).shape()[elem]; int D = ref_shape.size(); for (int d = 0; d < D; d++) EXPECT_EQ(shape[d], ref_shape[d]); @@ -162,7 +162,7 @@ void ComparePipelinesOutputs(daliPipelineHandle &handle, Pipeline &baseline, TensorList pipeline_output_cpu, c_api_output_cpu; // Unnecessary copy in case of CPUBackend, makes the code generic across Backends - pipeline_output_cpu.Copy(ws.Output(0), cuda_stream); + pipeline_output_cpu.Copy(ws.OutputRef(0), cuda_stream); auto num_elems = pipeline_output_cpu.shape().num_elements(); auto backend_buf = AllocBuffer(num_elems * sizeof(uint8_t), false); diff --git a/dali/operators/bbox/bbox_paste.cc b/dali/operators/bbox/bbox_paste.cc index 4b8588d8e21..82aee69da84 100644 --- a/dali/operators/bbox/bbox_paste.cc +++ b/dali/operators/bbox/bbox_paste.cc @@ -56,14 +56,14 @@ canvas and ``(1,1)`` aligns it to bottom-right. template<> void BBoxPaste::RunImpl(Workspace &ws) { - const auto &input = ws.Input(0); + const auto &input = ws.InputRef(0); const auto input_data = input.data(); DALI_ENFORCE(input.type() == DALI_FLOAT, "Bounding box in wrong format"); DALI_ENFORCE(input.size() % 4 == 0, "Bounding box tensor size must be a multiple of 4." "Got: " + std::to_string(input.size())); - auto &output = ws.Output(0); + auto &output = ws.OutputRef(0); output.Resize(input.shape(), DALI_FLOAT); auto *output_data = output.mutable_data(); diff --git a/dali/operators/debug/dump_image.cc b/dali/operators/debug/dump_image.cc index 266b320a242..29740871d0f 100644 --- a/dali/operators/debug/dump_image.cc +++ b/dali/operators/debug/dump_image.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,8 +21,8 @@ namespace dali { template<> void DumpImage::RunImpl(SampleWorkspace &ws) { - auto &input = ws.Input(0); - auto &output = ws.Output(0); + auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); DALI_ENFORCE(input.ndim() == 3, make_string("Input images must have three dimensions, got input with `", diff --git a/dali/operators/debug/dump_image.cu b/dali/operators/debug/dump_image.cu index edcd5bd7e0a..913ce81da21 100644 --- a/dali/operators/debug/dump_image.cu +++ b/dali/operators/debug/dump_image.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -20,8 +20,8 @@ namespace dali { template<> void DumpImage::RunImpl(DeviceWorkspace &ws) { - auto &input = ws.Input(0); - auto &output = ws.Output(0); + auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); DALI_ENFORCE(input.shape().sample_dim() == 3, diff --git a/dali/operators/decoder/host/host_decoder.cc b/dali/operators/decoder/host/host_decoder.cc index 55518397181..afc37b83896 100644 --- a/dali/operators/decoder/host/host_decoder.cc +++ b/dali/operators/decoder/host/host_decoder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,8 +21,8 @@ namespace dali { void HostDecoder::RunImpl(SampleWorkspace &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); auto file_name = input.GetSourceInfo(); // Verify input diff --git a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h index 29842aa08e2..267b27dd8d9 100644 --- a/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h +++ b/dali/operators/decoder/nvjpeg/nvjpeg_decoder_decoupled_api.h @@ -554,7 +554,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { #endif // NVJPEG2K_ENABLED for (int i = 0; i < curr_batch_size; i++) { - const auto &in = ws.Input(0, i); + const auto &in = ws.InputRef(0)[i]; const auto in_size = in.size(); thread_pool_.AddWork([this, i, &in, in_size](int tid) { auto *input_data = in.data(); @@ -683,7 +683,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { } void ProcessImagesCache(MixedWorkspace &ws) { - auto& output = ws.Output(0); + auto& output = ws.OutputRef(0); for (auto *sample : samples_cache_) { assert(sample); auto i = sample->sample_idx; @@ -694,12 +694,12 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { } void ProcessImagesCuda(MixedWorkspace &ws) { - auto& output = ws.Output(0); + auto& output = ws.OutputRef(0); for (auto *sample : samples_single_) { assert(sample); auto i = sample->sample_idx; auto *output_data = output.mutable_tensor(i); - const auto &in = ws.Input(0, i); + const auto &in = ws.InputRef(0)[i]; thread_pool_.AddWork( [this, sample, &in, output_data](int tid) { SampleWorker(sample->sample_idx, sample->file_name, in.size(), tid, @@ -799,11 +799,11 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { } void ProcessImagesHost(MixedWorkspace &ws) { - auto& output = ws.Output(0); + auto& output = ws.OutputRef(0); for (auto *sample : samples_host_) { auto i = sample->sample_idx; auto *output_data = output.mutable_tensor(i); - const auto &in = ws.Input(0, i); + const auto &in = ws.InputRef(0)[i]; ImageCache::ImageShape shape = output_shape_[i].to_static<3>(); thread_pool_.AddWork( [this, sample, &in, output_data, shape](int tid) { @@ -816,7 +816,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { void ProcessImagesHw(MixedWorkspace &ws) { #if IS_HW_DECODER_COMPATIBLE - auto& output = ws.Output(0); + auto& output = ws.OutputRef(0); if (!samples_hw_batched_.empty()) { nvjpegJpegState_t &state = state_hw_batched_; assert(state != nullptr); @@ -839,7 +839,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { for (auto *sample : samples_hw_batched_) { int i = sample->sample_idx; - const auto &in = ws.Input(0, i); + const auto &in = ws.InputRef(0)[i]; const auto &out_shape = output_shape_.tensor_shape(i); tv[j].ShareData(const_cast &>(in)); @@ -891,7 +891,7 @@ class nvJPEGDecoder : public Operator, CachedDecoderImpl { } void ProcessImages(MixedWorkspace &ws) { - auto &output = ws.Output(0); + auto &output = ws.OutputRef(0); assert(output_shape_.num_samples() == ws.GetInputBatchSize(0)); // If fails: Incorrect number of samples in shape output.Resize(output_shape_, DALI_UINT8); diff --git a/dali/operators/generic/flip.cc b/dali/operators/generic/flip.cc index 54e321101bf..11be631d296 100644 --- a/dali/operators/generic/flip.cc +++ b/dali/operators/generic/flip.cc @@ -59,8 +59,8 @@ void RunFlip(Tensor &output, const Tensor &input, template <> void Flip::RunImpl(Workspace &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); auto layout = input.GetLayout(); output.SetLayout(layout); output.Resize(input.shape(), input.type()); diff --git a/dali/operators/generic/flip.cu b/dali/operators/generic/flip.cu index fb682f7b943..cc86291c4eb 100644 --- a/dali/operators/generic/flip.cu +++ b/dali/operators/generic/flip.cu @@ -43,8 +43,8 @@ void RunKernel(TensorList &output, const TensorList &inp template <> void Flip::RunImpl(Workspace &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); output.SetLayout(input.GetLayout()); output.Resize(input.shape(), input.type()); auto curr_batch_size = ws.GetInputBatchSize(0); diff --git a/dali/operators/generic/pad.cu b/dali/operators/generic/pad.cu index 62ceb2c074e..11bef081727 100644 --- a/dali/operators/generic/pad.cu +++ b/dali/operators/generic/pad.cu @@ -25,7 +25,7 @@ template <> bool Pad::SetupImpl(std::vector &output_desc, const workspace_t &ws) { output_desc.resize(1); - const auto &input = ws.Input(0); + const auto &input = ws.InputRef(0); auto in_shape = input.shape(); auto in_layout = input.GetLayout(); int ndim = in_shape.sample_dim(); @@ -57,8 +57,8 @@ bool Pad::SetupImpl(std::vector &output_desc, template <> void Pad::RunImpl(workspace_t &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); output.SetLayout(input.GetLayout()); int ndim = input.shape().sample_dim(); TYPE_SWITCH(input.type(), type2id, T, PAD_SUPPORTED_TYPES, ( diff --git a/dali/operators/image/color/brightness_contrast.cu b/dali/operators/image/color/brightness_contrast.cu index 501a4f53763..fb42505d024 100644 --- a/dali/operators/image/color/brightness_contrast.cu +++ b/dali/operators/image/color/brightness_contrast.cu @@ -54,8 +54,8 @@ bool BrightnessContrastGpu::SetupImpl(std::vector &output_desc, void BrightnessContrastGpu::RunImpl(workspace_t &ws) { - const auto &input = ws.template Input(0); - auto &output = ws.template Output(0); + const auto &input = ws.template InputRef(0); + auto &output = ws.template OutputRef(0); output.SetLayout(input.GetLayout()); TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), ( TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), ( diff --git a/dali/operators/image/color/color_twist.cu b/dali/operators/image/color/color_twist.cu index 7917d1bc80b..e4b10ed26f8 100644 --- a/dali/operators/image/color/color_twist.cu +++ b/dali/operators/image/color/color_twist.cu @@ -49,8 +49,8 @@ bool ColorTwistGpu::SetupImpl(std::vector &output_desc, const Device void ColorTwistGpu::RunImpl(workspace_t &ws) { - const auto &input = ws.template Input(0); - auto &output = ws.template Output(0); + const auto &input = ws.template InputRef(0); + auto &output = ws.template OutputRef(0); output.SetLayout(input.GetLayout()); TYPE_SWITCH(input.type(), type2id, InputType, (uint8_t, int16_t, int32_t, float), ( TYPE_SWITCH(output_type_, type2id, OutputType, (uint8_t, int16_t, int32_t, float), ( diff --git a/dali/operators/image/color/old_color_twist.cc b/dali/operators/image/color/old_color_twist.cc index 0b52b9f25b7..986503497dd 100644 --- a/dali/operators/image/color/old_color_twist.cc +++ b/dali/operators/image/color/old_color_twist.cc @@ -213,9 +213,9 @@ typedef NppStatus (*colorTwistFunc)(const Npp8u *pSrc, int nSrcStep, Npp8u *pDst template<> void OldColorTwistBase::RunImpl(DeviceWorkspace &ws) { - const auto &input = ws.Input(0); + const auto &input = ws.InputRef(0); DALI_ENFORCE(IsType(input.type()), "Color augmentations accept only uint8 tensors"); - auto &output = ws.Output(0); + auto &output = ws.OutputRef(0); output.Resize(input.shape(), DALI_UINT8); output.SetLayout(input.GetLayout()); @@ -248,8 +248,8 @@ void OldColorTwistBase::RunImpl(DeviceWorkspace &ws) { template <> void OldColorTwistBase::RunImpl(SampleWorkspace &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); const auto &input_shape = input.shape(); CheckParam(input, "Color augmentation"); diff --git a/dali/operators/image/paste/multipaste.cu b/dali/operators/image/paste/multipaste.cu index aea5171782d..64e2eeaa9e7 100644 --- a/dali/operators/image/paste/multipaste.cu +++ b/dali/operators/image/paste/multipaste.cu @@ -55,7 +55,7 @@ void MultiPasteGPU::InitSamples(const TensorListShape<> &out_shape) { template void MultiPasteGPU::SetupTyped(const workspace_t &ws, const TensorListShape<> &out_shape) { - const auto &images = ws.template Input(0); + const auto &images = ws.template InputRef(0); const auto &in = view(images); using Kernel = kernels::PasteGPU; kernels::KernelContext ctx; @@ -68,8 +68,8 @@ void MultiPasteGPU::SetupTyped(const workspace_t &ws, template void MultiPasteGPU::RunTyped(workspace_t &ws) { - const auto &images = ws.template Input(0); - auto &output = ws.template Output(0); + const auto &images = ws.template InputRef(0); + auto &output = ws.template OutputRef(0); output.SetLayout(images.GetLayout()); auto out_shape = output.shape(); diff --git a/dali/operators/image/paste/paste.cu b/dali/operators/image/paste/paste.cu index 0bd6e78ae30..e420d0eb386 100644 --- a/dali/operators/image/paste/paste.cu +++ b/dali/operators/image/paste/paste.cu @@ -124,8 +124,8 @@ void Paste::SetupSharedSampleParams(DeviceWorkspace &ws) { template<> void Paste::SetupSampleParams(DeviceWorkspace &ws) { - auto &input = ws.Input(0); - auto &output = ws.Output(0); + auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); auto curr_batch_size = ws.GetInputBatchSize(0); std::vector> output_shape(curr_batch_size); diff --git a/dali/operators/image/resize/random_resized_crop.cu b/dali/operators/image/resize/random_resized_crop.cu index 58c6a0304a9..49049937417 100644 --- a/dali/operators/image/resize/random_resized_crop.cu +++ b/dali/operators/image/resize/random_resized_crop.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -30,8 +30,8 @@ void RandomResizedCrop::BackendInit() { template<> void RandomResizedCrop::RunImpl(DeviceWorkspace &ws) { - auto &input = ws.Input(0); - auto &output = ws.Output(0); + auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); RunResize(ws, output, input); output.SetLayout(input.GetLayout()); } diff --git a/dali/operators/image/resize/resize.cc b/dali/operators/image/resize/resize.cc index 086b4409ac1..95a506d44c5 100755 --- a/dali/operators/image/resize/resize.cc +++ b/dali/operators/image/resize/resize.cc @@ -82,14 +82,14 @@ void Resize::InitializeBackend() { template<> void Resize::RunImpl(DeviceWorkspace &ws) { - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); RunResize(ws, output, input); output.SetLayout(input.GetLayout()); if (save_attrs_) { - auto &attr_out = ws.Output(1); + auto &attr_out = ws.OutputRef(1); const auto &attr_shape = attr_out.shape(); assert(attr_shape.num_samples() == input.shape().num_samples() && attr_shape.sample_dim() == 1 && diff --git a/dali/operators/image/resize/resize_crop_mirror.h b/dali/operators/image/resize/resize_crop_mirror.h index c1621085518..04ff4aade6d 100755 --- a/dali/operators/image/resize/resize_crop_mirror.h +++ b/dali/operators/image/resize/resize_crop_mirror.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -164,10 +164,10 @@ class ResizeCropMirrorAttr : protected CropAttr { * @return const vector One matching shape for all inputs */ virtual const std::vector CheckShapes(const SampleWorkspace *ws) { - const auto &input = ws->Input(0); + const auto &input = ws->InputRef(0); // enforce that all shapes match for (int i = 1; i < ws->NumInput(); ++i) { - DALI_ENFORCE(input.SameShape(ws->Input(i))); + DALI_ENFORCE(input.SameShape(ws->InputRef(i))); } DALI_ENFORCE(input.ndim() == 3, "Operator expects 3-dimensional image input."); @@ -230,8 +230,8 @@ class ResizeCropMirror : public Operator, protected ResizeCropMirror } inline void RunResizeImpl(SampleWorkspace &ws, resizeCropMirroHost func) { - auto &input = ws.Input(0); - auto &output = ws.Output(0); + auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); CheckParam(input, "ResizeCropMirror"); const TransformMeta &meta = per_thread_meta_[ws.thread_idx()]; diff --git a/dali/operators/python_function/dltensor_function.cc b/dali/operators/python_function/dltensor_function.cc index e5987d45da5..845ae62185c 100644 --- a/dali/operators/python_function/dltensor_function.cc +++ b/dali/operators/python_function/dltensor_function.cc @@ -78,7 +78,7 @@ py::list PrepareDLTensorInputs(HostWorkspace &ws) { for (Index idx = 0; idx < ws.NumInput(); ++idx) { py::list dl_tensor_list; for (Index i = 0; i < ws.GetInputBatchSize(idx); ++i) { - auto &t = ws.Input(idx, i); + auto &t = ws.InputRef(idx)[i]; auto dl_capsule = TensorToDLPackView(const_cast&>(t)); dl_tensor_list.append(dl_capsule); } @@ -106,7 +106,7 @@ py::list PrepareDLTensorInputsPerSample(HostWorkspace &ws) { for (Index s = 0; s < batch_size; ++s) { py::list tuple; for (Index idx = 0; idx < ws.NumInput(); ++idx) { - auto &t = ws.Input(idx, s); + auto &t = ws.InputRef(idx)[s]; auto dl_capsule = TensorToDLPackView(const_cast&>(t)); tuple.append(dl_capsule); } diff --git a/dali/operators/reader/coco_reader_op.cc b/dali/operators/reader/coco_reader_op.cc index aaafb98702c..62d46589acf 100755 --- a/dali/operators/reader/coco_reader_op.cc +++ b/dali/operators/reader/coco_reader_op.cc @@ -201,7 +201,7 @@ void COCOReader::RunImpl(SampleWorkspace &ws) { const ImageLabelWrapper& image_label = GetSample(ws.data_idx()); Index image_size = image_label.image.size(); - auto &image_output = ws.Output(0); + auto &image_output = ws.OutputRef(0); int image_idx = image_label.label; image_output.Resize({image_size}, DALI_UINT8); @@ -210,20 +210,20 @@ void COCOReader::RunImpl(SampleWorkspace &ws) { auto &loader_impl = LoaderImpl(); auto bboxes = loader_impl.bboxes(image_idx); - auto &boxes_output = ws.Output(1); + auto &boxes_output = ws.OutputRef(1); boxes_output.Resize({bboxes.size(), 4}, DALI_FLOAT); std::memcpy(boxes_output.mutable_data(), bboxes.data(), bboxes.size() * sizeof(vec<4>)); auto labels = loader_impl.labels(image_idx); - auto &labels_output = ws.Output(2); + auto &labels_output = ws.OutputRef(2); labels_output.Resize({labels.size()}, DALI_INT32); // 0.28dev: changed shape from {N, 1} to {N} std::memcpy(labels_output.mutable_data(), labels.data(), labels.size() * sizeof(int)); int curr_out_idx = 3; if (output_polygon_masks_) { - auto &polygons_output = ws.Output(curr_out_idx++); + auto &polygons_output = ws.OutputRef(curr_out_idx++); auto polygons = loader_impl.polygons(image_idx); polygons_output.Resize({polygons.size(), 3}, DALI_INT32); std::memcpy(polygons_output.mutable_data(), @@ -235,7 +235,7 @@ void COCOReader::RunImpl(SampleWorkspace &ws) { poly_data[i * 3 + 2] *= 2; } } - auto &vertices_output = ws.Output(curr_out_idx++); + auto &vertices_output = ws.OutputRef(curr_out_idx++); auto vertices = loader_impl.vertices(image_idx); vertices_output.Resize({vertices.size(), 2}, DALI_FLOAT); std::memcpy(vertices_output.mutable_data(), @@ -243,7 +243,7 @@ void COCOReader::RunImpl(SampleWorkspace &ws) { } if (output_pixelwise_masks_) { - auto &masks_output = ws.Output(curr_out_idx++); + auto &masks_output = ws.OutputRef(curr_out_idx++); auto masks_info = loader_impl.pixelwise_masks_info(image_idx); masks_output.Resize(masks_info.shape, DALI_INT32); masks_output.SetLayout("HWC"); @@ -251,7 +251,7 @@ void COCOReader::RunImpl(SampleWorkspace &ws) { } if (output_image_ids_) { - auto &id_output = ws.Output(curr_out_idx++); + auto &id_output = ws.OutputRef(curr_out_idx++); id_output.Resize({1}, DALI_INT32); *(id_output.mutable_data()) = loader_impl.image_id(image_idx); } diff --git a/dali/operators/reader/coco_reader_op_test.cc b/dali/operators/reader/coco_reader_op_test.cc index 6b86bfe0051..84dda50f870 100644 --- a/dali/operators/reader/coco_reader_op_test.cc +++ b/dali/operators/reader/coco_reader_op_test.cc @@ -107,7 +107,7 @@ class CocoReaderTest : public ::testing::Test { int ObjectCount(bool masks) { return masks ? 7 : 194; } std::vector CopyIds(DeviceWorkspace &ws, int ids_out_idx = 3) { - auto &output = ws.Output(ids_out_idx); + auto &output = ws.OutputRef(ids_out_idx); const auto &shape = output.shape(); vector ids(shape.size()); @@ -182,8 +182,8 @@ class CocoReaderTest : public ::testing::Test { void CheckInstances(DeviceWorkspace &ws, bool ltrb, bool ratio, bool skip_empty, int expected_size, bool polygon_masks, bool polygon_masks_legacy) { - const auto &boxes_output = ws.Output(1); - const auto &labels_output = ws.Output(2); + const auto &boxes_output = ws.OutputRef(1); + const auto &labels_output = ws.OutputRef(2); const auto &boxes_shape = boxes_output.shape(); const auto &labels_shape = labels_output.shape(); @@ -254,8 +254,8 @@ class CocoReaderTest : public ::testing::Test { } if (polygon_masks || polygon_masks_legacy) { - const auto &polygons_output = ws.Output(3); - const auto &vertices_output = ws.Output(4); + const auto &polygons_output = ws.OutputRef(3); + const auto &vertices_output = ws.OutputRef(4); const auto &polygons_shape = polygons_output.shape(); const auto &vertices_shape = vertices_output.shape(); @@ -605,7 +605,7 @@ TEST_F(CocoReaderTest, PixelwiseMasks) { pipe2.Outputs(&ws2); for (auto *ws : {&ws1, &ws2}) { - const auto &masks_output = ws->Output(3); + const auto &masks_output = ws->OutputRef(3); const auto &masks_shape = masks_output.shape(); TensorListShape<3> pixelwise_masks_shape({ diff --git a/dali/operators/reader/file_reader_op.h b/dali/operators/reader/file_reader_op.h index 3f87eca9cbd..8cd41e72568 100644 --- a/dali/operators/reader/file_reader_op.h +++ b/dali/operators/reader/file_reader_op.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -37,8 +37,8 @@ class FileReader : public DataReader { const auto& image_label = GetSample(idx); // copy from raw_data -> outputs directly - auto &image_output = ws.Output(0); - auto &label_output = ws.Output(1); + auto &image_output = ws.OutputRef(0); + auto &label_output = ws.OutputRef(1); Index image_size = image_label.image.size(); diff --git a/dali/operators/reader/nemo_asr_reader_op.cc b/dali/operators/reader/nemo_asr_reader_op.cc index 02ad5280411..0d2bc7472ff 100755 --- a/dali/operators/reader/nemo_asr_reader_op.cc +++ b/dali/operators/reader/nemo_asr_reader_op.cc @@ -193,7 +193,7 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) { const auto &sample = GetSample(ws.data_idx()); const auto &sample_audio = GetDecodedAudioSample(ws.data_idx()); - auto &audio = ws.Output(0); + auto &audio = ws.OutputRef(0); audio.Copy(sample_audio, 0); DALIMeta meta; @@ -203,14 +203,14 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) { int next_out_idx = 1; if (read_sr_) { - auto &sample_rate = ws.Output(next_out_idx++); + auto &sample_rate = ws.OutputRef(next_out_idx++); sample_rate.Resize({}, DALI_FLOAT); sample_rate.mutable_data()[0] = sample.audio_meta().sample_rate; sample_rate.SetMeta(meta); } if (read_text_) { - auto &text_out = ws.Output(next_out_idx++); + auto &text_out = ws.OutputRef(next_out_idx++); const auto &text = sample.text(); int64_t text_sz = text.length(); text_out.Resize({text_sz}, DALI_UINT8); @@ -219,7 +219,7 @@ void NemoAsrReader::RunImpl(SampleWorkspace &ws) { } if (read_idxs_) { - auto &idxs_out = ws.Output(next_out_idx++); + auto &idxs_out = ws.OutputRef(next_out_idx++); idxs_out.Resize({1}, DALI_INT64); *idxs_out.mutable_data() = sample.index(); idxs_out.SetMeta(meta); diff --git a/dali/operators/reader/parser/caffe2_parser.h b/dali/operators/reader/parser/caffe2_parser.h index 15a336b6ab2..fa2317e3848 100644 --- a/dali/operators/reader/parser/caffe2_parser.h +++ b/dali/operators/reader/parser/caffe2_parser.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -121,19 +121,19 @@ void ParseLabels(const caffe2::TensorProtos& protos, const int num_labels, SampleWorkspace* ws, int consumed_inputs) { - auto& label_tensor = ws->Output(consumed_inputs); + auto& label_tensor = ws->OutputRef(consumed_inputs); switch (label_type) { case SINGLE_LABEL: { // single element, from protos(1) to Output(consumed_inputs) // ensure we only have a single label in the proto DALI_ENFORCE(proto_data_size(protos.protos(consumed_inputs)) == 1); - extract_data(protos.protos(consumed_inputs), ws->Output(consumed_inputs)); + extract_data(protos.protos(consumed_inputs), ws->OutputRef(consumed_inputs)); break; } case MULTI_LABEL_SPARSE: { // multiple labels, all 1. in elements defined in protos(consumed_inputs) - auto& label_tensor = ws->Output(consumed_inputs); + auto& label_tensor = ws->OutputRef(consumed_inputs); label_tensor.set_type(); label_tensor.Resize({num_labels}); @@ -150,7 +150,7 @@ void ParseLabels(const caffe2::TensorProtos& protos, } case MULTI_LABEL_DENSE: { // multiple elements, stored contiguously - extract_data(protos.protos(consumed_inputs), ws->Output(consumed_inputs)); + extract_data(protos.protos(consumed_inputs), ws->OutputRef(consumed_inputs)); break; } case MULTI_LABEL_WEIGHTED_SPARSE: { @@ -196,7 +196,7 @@ class Caffe2Parser : public Parser> { if (image_available_) { - auto& image = ws->Output(consumed_inputs); + auto& image = ws->OutputRef(consumed_inputs); const caffe2::TensorProto& image_proto = protos.protos(consumed_inputs); // copy image -- if type is string, image is encoded, if bytes, image isn't encoded @@ -243,7 +243,7 @@ class Caffe2Parser : public Parser> { for (int i = additional_proto_start; i < additional_proto_end; ++i) { auto& additional_proto = protos.protos(i); - auto& output_tensor = ws->Output(consumed_inputs); + auto& output_tensor = ws->OutputRef(consumed_inputs); switch (additional_proto.data_type()) { case caffe2::TensorProto::FLOAT: @@ -269,7 +269,7 @@ class Caffe2Parser : public Parser> { DALI_ENFORCE(bbox_proto.data_type() == caffe2::TensorProto::INT32); DALI_ENFORCE(bbox_proto.int32_data_size() == 4); - extract_data(bbox_proto, ws->Output(consumed_inputs)); + extract_data(bbox_proto, ws->OutputRef(consumed_inputs)); } } diff --git a/dali/operators/reader/parser/caffe_parser.h b/dali/operators/reader/parser/caffe_parser.h index e6a3399e33c..6a68cda7ac1 100644 --- a/dali/operators/reader/parser/caffe_parser.h +++ b/dali/operators/reader/parser/caffe_parser.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -36,7 +36,7 @@ class CaffeParser : public Parser> { if (image_available_ && datum.has_data()) { bool encoded_data = true; - auto& image = ws->Output(out_tensors); + auto& image = ws->OutputRef(out_tensors); if (datum.has_encoded() && !datum.encoded()) { encoded_data = false; } @@ -53,7 +53,7 @@ class CaffeParser : public Parser> { } if (label_available_ && datum.has_label()) { - auto& label = ws->Output(out_tensors); + auto& label = ws->OutputRef(out_tensors); // copy label label.Resize({1}, DALI_INT32); diff --git a/dali/operators/reader/parser/parser_test.cc b/dali/operators/reader/parser/parser_test.cc index 2d21e306b7e..fb4723ab1d7 100644 --- a/dali/operators/reader/parser/parser_test.cc +++ b/dali/operators/reader/parser/parser_test.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ class IntArrayParser : public Parser { printf("H: %d, W: %d, C: %d\n", H, W, C); - Tensor& output = ws->template Output(0); + Tensor& output = ws->template OutputRef(0); output.Resize({H, W, C}, DALI_INT32); int *output_data = output.template mutable_data(); @@ -75,10 +75,10 @@ TYPED_TEST(ParserTest, BasicTest) { HostWorkspace workspace; SampleWorkspace ws; - workspace.GetSample(&ws, 0, 0); + MakeSampleView(ws, workspace, 0, 0); shared_ptr> t(new Tensor()); - ws.AddOutput(t); + ws.AddOutput(t.get()); IntArrayParser parser(OpSpec("temp")); IntArrayWrapper ia_wrapper = {data.data(), data.size()}; diff --git a/dali/operators/reader/parser/recordio_parser.h b/dali/operators/reader/parser/recordio_parser.h index 98c182fd889..f6d9daeb329 100644 --- a/dali/operators/reader/parser/recordio_parser.h +++ b/dali/operators/reader/parser/recordio_parser.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -35,8 +35,8 @@ class RecordIOParser : public Parser> { } void Parse(const Tensor& data, SampleWorkspace* ws) override { - auto& image = ws->Output(0); - auto& label = ws->Output(1); + auto& image = ws->OutputRef(0); + auto& label = ws->OutputRef(1); ReadSingleImageRecordIO(image, label, data.data()); image.SetSourceInfo(data.GetSourceInfo()); } diff --git a/dali/operators/reader/parser/sequence_parser.cc b/dali/operators/reader/parser/sequence_parser.cc index 0f76cdaeef4..0f02c2f6f3c 100644 --- a/dali/operators/reader/parser/sequence_parser.cc +++ b/dali/operators/reader/parser/sequence_parser.cc @@ -21,7 +21,7 @@ namespace dali { void SequenceParser::Parse(const TensorSequence& data, SampleWorkspace* ws) { - auto& sequence = ws->Output(0); + auto& sequence = ws->OutputRef(0); sequence.SetLayout("FHWC"); Index seq_length = data.tensors.size(); diff --git a/dali/operators/reader/parser/tfrecord_parser.h b/dali/operators/reader/parser/tfrecord_parser.h index a9d6c5444df..c38450f5ca6 100644 --- a/dali/operators/reader/parser/tfrecord_parser.h +++ b/dali/operators/reader/parser/tfrecord_parser.h @@ -63,7 +63,7 @@ class TFRecordParser : public Parser> { " (raw data length: ", length, "bytes).")); for (size_t i = 0; i < features_.size(); ++i) { - auto& output = ws->Output(i); + auto& output = ws->OutputRef(i); Feature& f = features_[i]; std::string& name = feature_names_[i]; auto& feature = example.features().feature(); diff --git a/dali/operators/reader/reader_op.h b/dali/operators/reader/reader_op.h index 08ed601b963..accd945b681 100644 --- a/dali/operators/reader/reader_op.h +++ b/dali/operators/reader/reader_op.h @@ -237,7 +237,7 @@ class DataReader : public Operator { DALI_ENFORCE(cached_outputs.size() == num_outputs, "Unexpected number of outputs"); for (std::size_t i = 0; i < cached_outputs.size(); i++) { - auto& output = ws->Output(i); + auto& output = ws->OutputRef(i); output.Copy(cached_outputs[i], 0); } return; @@ -262,7 +262,7 @@ class DataReader : public Operator { first_output.Resize({1}, DALI_UINT8); for (std::size_t i = 1; i < cached_outputs.size(); i++) { - auto& output = ws->Output(i); + auto& output = ws->OutputRef(i); cached_outputs[i].set_pinned(false); cached_outputs[i].Copy(output, 0); } diff --git a/dali/operators/reader/reader_op_test.cc b/dali/operators/reader/reader_op_test.cc index fd25495d242..231a1d0ba44 100644 --- a/dali/operators/reader/reader_op_test.cc +++ b/dali/operators/reader/reader_op_test.cc @@ -88,7 +88,7 @@ class DummyDataReader : public DataReader> { void RunImpl(SampleWorkspace &ws) override { std::this_thread::sleep_for(std::chrono::milliseconds(5)); - ws.Output(0).Copy(GetSample(ws.data_idx()), 0); + ws.OutputRef(0).Copy(GetSample(ws.data_idx()), 0); } private: @@ -220,7 +220,7 @@ TYPED_TEST(ReaderTest, SequenceTest) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - auto shape = ws.Output(0).AsTensor()->shape(); + auto shape = ws.OutputRef(0).AsTensor()->shape(); // We have NFHWC format const auto batch_size = shape[0]; const auto frame_count = shape[1]; @@ -236,7 +236,7 @@ TYPED_TEST(ReaderTest, SequenceTest) { auto start_frame = (i * batch_size + sample) % (16 - 3 + 1); for (int frame = 0; frame < frame_count; frame++) { auto off = sample * seq_size + frame * frame_size; - auto val = ws.Output(0).AsTensor()->data()[off]; + auto val = ws.OutputRef(0).AsTensor()->data()[off]; decltype(val) expected = start_frame + frame; ASSERT_EQ(val, expected); } diff --git a/dali/operators/reader/video_reader_decoder_op.cc b/dali/operators/reader/video_reader_decoder_op.cc index 664e693937a..1548a08c04f 100644 --- a/dali/operators/reader/video_reader_decoder_op.cc +++ b/dali/operators/reader/video_reader_decoder_op.cc @@ -26,12 +26,12 @@ VideoReaderDecoder::VideoReaderDecoder(const OpSpec &spec) void VideoReaderDecoder::RunImpl(SampleWorkspace &ws) { const auto &sample = GetSample(ws.data_idx()); - auto &video_output = ws.Output(0); + auto &video_output = ws.OutputRef(0); video_output.Copy(sample.data_, 0); if (has_labels_) { - auto &label_output = ws.Output(1); + auto &label_output = ws.OutputRef(1); label_output.Resize({}, DALIDataType::DALI_INT32); label_output.mutable_data()[0] = sample.label_; } diff --git a/dali/operators/reader/video_reader_op.h b/dali/operators/reader/video_reader_op.h index 5e7d9a73697..a00032912c7 100644 --- a/dali/operators/reader/video_reader_op.h +++ b/dali/operators/reader/video_reader_op.h @@ -129,16 +129,16 @@ class VideoReader : public DataReader { void PrepareAdditionalOutputs(DeviceWorkspace &ws) { int output_index = 1; if (output_labels_) { - label_output_ = &ws.Output(output_index++); + label_output_ = &ws.OutputRef(output_index++); label_output_->Resize(label_shape_, DALI_INT32); if (can_use_frames_timestamps_) { if (enable_frame_num_) { - frame_num_output_ = &ws.Output(output_index++); + frame_num_output_ = &ws.OutputRef(output_index++); frame_num_output_->Resize(frame_num_shape_, DALI_INT32); } if (enable_timestamps_) { - timestamp_output_ = &ws.Output(output_index++); + timestamp_output_ = &ws.OutputRef(output_index++); timestamp_output_->Resize(timestamp_shape_, DALI_FLOAT64); } } @@ -181,7 +181,7 @@ class VideoReader : public DataReader { } void RunImpl(DeviceWorkspace &ws) override { - auto &video_output = ws.Output(0); + auto &video_output = ws.OutputRef(0); auto &curent_batch = prefetched_batch_tensors_[curr_batch_consumer_]; SetOutputShapeType(video_output, ws); diff --git a/dali/operators/reader/video_reader_op_test.cc b/dali/operators/reader/video_reader_op_test.cc index 11b7d3f5692..4a79b1273fc 100644 --- a/dali/operators/reader/video_reader_op_test.cc +++ b/dali/operators/reader/video_reader_op_test.cc @@ -87,7 +87,7 @@ TEST_F(VideoReaderTest, ConstantFrameRate) { pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 1); @@ -139,8 +139,8 @@ TEST_F(VideoReaderTest, MultipleVideoResolution) { pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_output = ws.Output(0); - const auto &labels_output = ws.Output(1); + const auto &frames_output = ws.OutputRef(0); + const auto &labels_output = ws.OutputRef(1); TensorList labels_cpu; labels_cpu.Copy(labels_output, 0); @@ -196,7 +196,7 @@ TEST_F(VideoReaderTest, PackedBFrames) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), batch_size); @@ -243,7 +243,7 @@ TEST_F(VideoReaderTest, Vp9Profile0) { } } - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 1); @@ -282,7 +282,7 @@ TEST_F(VideoReaderTest, Vp9Profile2) { } } - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 1); @@ -319,7 +319,7 @@ TEST_F(VideoReaderTest, Vp8Profile0) { } } - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 1); @@ -358,7 +358,7 @@ TEST_F(VideoReaderTest, MJpeg) { } } - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 1); @@ -399,7 +399,7 @@ TEST_F(VideoReaderTest, HEVC) { } } - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), 16); @@ -430,9 +430,9 @@ TEST_F(VideoReaderTest, FrameLabels) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -476,9 +476,9 @@ TEST_F(VideoReaderTest, FrameLabelsFilenames) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -524,9 +524,9 @@ TEST_F(VideoReaderTest, LabelsFilenames) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -572,10 +572,10 @@ TEST_F(VideoReaderTest, FrameLabelsWithFileListFrameNum) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); - const auto ×tamp_gpu = ws.Output(3); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); + const auto ×tamp_gpu = ws.OutputRef(3); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -634,10 +634,10 @@ TEST_F(VideoReaderTest, TimestampLabels) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); - const auto ×tamp_gpu = ws.Output(3); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); + const auto ×tamp_gpu = ws.OutputRef(3); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -680,9 +680,9 @@ TEST_F(VideoReaderTest, StartEndLabels) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_gpu = ws.Output(0); - const auto &label_gpu = ws.Output(1); - const auto &frame_num_gpu = ws.Output(2); + const auto &frames_gpu = ws.OutputRef(0); + const auto &label_gpu = ws.OutputRef(1); + const auto &frame_num_gpu = ws.OutputRef(2); TensorList frames_cpu; frames_cpu.Copy(frames_gpu, 0); @@ -720,7 +720,7 @@ TEST_F(VideoReaderTest, MultipleFrameRates) { pipe.RunCPU(); pipe.RunGPU(); pipe.Outputs(&ws); - const auto &frames_output = ws.Output(0); + const auto &frames_output = ws.OutputRef(0); const auto &frames_shape = frames_output.shape(); ASSERT_EQ(frames_shape.size(), batch_size); diff --git a/dali/operators/sequence/optical_flow/optical_flow.cc b/dali/operators/sequence/optical_flow/optical_flow.cc index d418e72f542..aeb88c73169 100644 --- a/dali/operators/sequence/optical_flow/optical_flow.cc +++ b/dali/operators/sequence/optical_flow/optical_flow.cc @@ -113,9 +113,9 @@ void OpticalFlow::RunImpl(Workspace &ws) { if (enable_external_hints_) { // Fetch data // Input is a TensorList, where every Tensor is a sequence - const auto &input = ws.Input(0); - const auto &hints = ws.Input(1); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + const auto &hints = ws.InputRef(1); + auto &output = ws.OutputRef(0); output.SetLayout("HWC"); // Channels represent the two flow vector components (x and y) // Extract calculation params ExtractParams(input, hints); @@ -153,8 +153,8 @@ void OpticalFlow::RunImpl(Workspace &ws) { } else { // Fetch data // Input is a TensorList, where every Tensor is a sequence - const auto &input = ws.Input(0); - auto &output = ws.Output(0); + const auto &input = ws.InputRef(0); + auto &output = ws.OutputRef(0); output.SetLayout(input.GetLayout()); // Extract calculation params diff --git a/dali/operators/signal/decibel/to_decibels_op_gpu.cu b/dali/operators/signal/decibel/to_decibels_op_gpu.cu index 1cfd388cc8f..5741041f1cc 100644 --- a/dali/operators/signal/decibel/to_decibels_op_gpu.cu +++ b/dali/operators/signal/decibel/to_decibels_op_gpu.cu @@ -103,7 +103,7 @@ template <> bool ToDecibels::SetupImpl(std::vector &output_desc, const workspace_t &ws) { output_desc.resize(kNumOutputs); - const auto &input = ws.Input(0); + const auto &input = ws.InputRef(0); auto type = input.type(); TYPE_SWITCH(type, type2id, T, (float), ( using Impl = ToDecibelsImpl; diff --git a/dali/operators/ssd/box_encoder.cc b/dali/operators/ssd/box_encoder.cc index 836abd20be4..cf18f70b776 100644 --- a/dali/operators/ssd/box_encoder.cc +++ b/dali/operators/ssd/box_encoder.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -156,8 +156,8 @@ void BoxEncoder::WriteMatchesToOutput( } void BoxEncoder::RunImpl(SampleWorkspace &ws) { - const auto &bboxes_input = ws.Input(kBoxesInId); - const auto &labels_input = ws.Input(kLabelsInId); + const auto &bboxes_input = ws.InputRef(kBoxesInId); + const auto &labels_input = ws.InputRef(kLabelsInId); const auto num_boxes = bboxes_input.dim(0); const auto labels = labels_input.data(); @@ -166,11 +166,11 @@ void BoxEncoder::RunImpl(SampleWorkspace &ws) { ReadBoxes(make_span(boxes), make_cspan(bboxes_input.data(), bboxes_input.size()), {}, {}); // Create output - auto &bboxes_output = ws.Output(kBoxesOutId); + auto &bboxes_output = ws.OutputRef(kBoxesOutId); bboxes_output.Resize({static_cast(anchors_.size()), BoundingBox::size}, bboxes_input.type()); auto out_boxes = bboxes_output.mutable_data(); - auto &labels_output = ws.Output(kLabelsOutId); + auto &labels_output = ws.OutputRef(kLabelsOutId); labels_output.Resize({static_cast(anchors_.size())}, labels_input.type()); auto out_labels = labels_output.mutable_data(); diff --git a/dali/operators/ssd/box_encoder.cu b/dali/operators/ssd/box_encoder.cu index cde92d853a8..594c058915a 100644 --- a/dali/operators/ssd/box_encoder.cu +++ b/dali/operators/ssd/box_encoder.cu @@ -260,8 +260,8 @@ BoxEncoder::CalculateDims( } void BoxEncoder::RunImpl(Workspace &ws) { - const auto &boxes_input = ws.Input(kBoxesInId); - const auto &labels_input = ws.Input(kLabelsInId); + const auto &boxes_input = ws.InputRef(kBoxesInId); + const auto &labels_input = ws.InputRef(kLabelsInId); assert(ws.GetInputBatchSize(kBoxesInId) == ws.GetInputBatchSize(kLabelsInId)); auto curr_batch_size = ws.GetInputBatchSize(kBoxesInId); @@ -273,10 +273,10 @@ void BoxEncoder::RunImpl(Workspace &ws) { auto dims = CalculateDims(boxes_input); - auto &boxes_output = ws.Output(kBoxesOutId); + auto &boxes_output = ws.OutputRef(kBoxesOutId); boxes_output.Resize(dims.first, boxes_input.type()); - auto &labels_output = ws.Output(kLabelsOutId); + auto &labels_output = ws.OutputRef(kLabelsOutId); labels_output.Resize(dims.second, labels_input.type()); samples.resize(curr_batch_size_); diff --git a/dali/operators/ssd/box_encoder_test.cc b/dali/operators/ssd/box_encoder_test.cc index ceecc477e4f..468da270afe 100644 --- a/dali/operators/ssd/box_encoder_test.cc +++ b/dali/operators/ssd/box_encoder_test.cc @@ -1085,14 +1085,14 @@ class BoxEncoderTest : public GenericBBoxesTest { } void CheckAnswersForCocoOnCpu(DeviceWorkspace *ws, bool offset = false) { - TensorList &boxes = ws->Output(0); - TensorList &labels = ws->Output(1); + TensorList &boxes = ws->OutputRef(0); + TensorList &labels = ws->OutputRef(1); CheckAnswersForCoco(&boxes, &labels, offset); } void CheckAnswersForCocoOnGpu(DeviceWorkspace *ws, bool offset = false) { - auto boxes = this->CopyTensorListToHost(ws->Output(0)); - auto labels = this->CopyTensorListToHost(ws->Output(1)); + auto boxes = this->CopyTensorListToHost(ws->OutputRef(0)); + auto labels = this->CopyTensorListToHost(ws->OutputRef(1)); CheckAnswersForCoco(boxes.get(), labels.get(), offset); } }; diff --git a/dali/operators/ssd/random_crop.cc b/dali/operators/ssd/random_crop.cc index 340cad42326..781313baeb8 100644 --- a/dali/operators/ssd/random_crop.cc +++ b/dali/operators/ssd/random_crop.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -179,10 +179,10 @@ void crop(const Tensor& img, vector bounds, Tensor& template <> void SSDRandomCrop::RunImpl(SampleWorkspace &ws) { // [H, W, C], dtype=uint8_t - const auto& img = ws.Input(0); + const auto& img = ws.InputRef(0); // [N] : [ltrb, ... ], dtype=float - const auto& bboxes = ws.Input(1); - const auto& labels = ws.Input(2); + const auto& bboxes = ws.InputRef(1); + const auto& labels = ws.InputRef(2); int sample = ws.data_idx(); auto N = bboxes.dim(0); @@ -202,9 +202,9 @@ void SSDRandomCrop::RunImpl(SampleWorkspace &ws) { if (option.no_crop()) { // copy directly to output without modification - ws.Output(0).Copy(img, 0); - ws.Output(1).Copy(bboxes, 0); - ws.Output(2).Copy(labels, 0); + ws.OutputRef(0).Copy(img, 0); + ws.OutputRef(1).Copy(bboxes, 0); + ws.OutputRef(2).Copy(labels, 0); return; } @@ -273,10 +273,10 @@ void SSDRandomCrop::RunImpl(SampleWorkspace &ws) { // now we know how many output bboxes there will be, we can allocate // the output. - auto &img_out = ws.Output(0); + auto &img_out = ws.OutputRef(0); img_out.SetLayout(img.GetLayout()); - auto &bbox_out = ws.Output(1); - auto &label_out = ws.Output(2); + auto &bbox_out = ws.OutputRef(1); + auto &label_out = ws.OutputRef(2); bbox_out.Resize({valid_bboxes, 4}, DALI_FLOAT); auto *bbox_out_data = bbox_out.mutable_data(); diff --git a/dali/pipeline/executor/executor_test.cc b/dali/pipeline/executor/executor_test.cc index f51dbfe9cd9..2b724ba9e1e 100644 --- a/dali/pipeline/executor/executor_test.cc +++ b/dali/pipeline/executor/executor_test.cc @@ -599,7 +599,7 @@ TYPED_TEST(ExecutorSyncTest, TestPrefetchedExecution) { ASSERT_EQ(ws.NumOutput(), 1); ASSERT_EQ(ws.NumInput(), 0); ASSERT_TRUE(ws.OutputIsType(0)); - TensorList &res1 = ws.Output(0); + TensorList &res1 = ws.OutputRef(0); for (int i = 0; i < batch_size; ++i) { this->VerifyDecode( res1.template tensor(i), @@ -615,7 +615,7 @@ TYPED_TEST(ExecutorSyncTest, TestPrefetchedExecution) { auto status_2 = barrier_future_2.wait_for(std::chrono::seconds(5)); ASSERT_EQ(status_2, std::future_status::ready); ASSERT_EQ(cb_counter, 2); - TensorList &res2 = ws.Output(0); + TensorList &res2 = ws.OutputRef(0); for (int i = 0; i < batch_size; ++i) { this->VerifyDecode( res2.template tensor(i), diff --git a/dali/pipeline/operator/builtin/external_source.cc b/dali/pipeline/operator/builtin/external_source.cc index 1a711cbd792..0e43e08c1bb 100644 --- a/dali/pipeline/operator/builtin/external_source.cc +++ b/dali/pipeline/operator/builtin/external_source.cc @@ -36,7 +36,7 @@ void ExternalSource::RunImpl(HostWorkspace &ws) { for (int sample_id = 0; sample_id < curr_batch_size; ++sample_id) { thread_pool.AddWork( [&ws, sample_id, &tensor_vector_elm](int tid) { - Tensor &output_tensor = ws.Output(0, sample_id); + Tensor &output_tensor = ws.OutputRef(0)[sample_id]; // HostWorkspace doesn't have any stream cudaStream_t stream = 0; output_tensor.Copy((*tensor_vector_elm.front())[sample_id], stream); diff --git a/dali/pipeline/operator/builtin/external_source.cu b/dali/pipeline/operator/builtin/external_source.cu index ddcdae2c6d0..53dd0046fd6 100644 --- a/dali/pipeline/operator/builtin/external_source.cu +++ b/dali/pipeline/operator/builtin/external_source.cu @@ -37,7 +37,7 @@ void ExternalSource::RunImpl(DeviceWorkspace &ws) { } } - auto &output = ws.Output(0); + auto &output = ws.OutputRef(0); cudaStream_t stream_used = ws.has_stream() ? ws.stream() : 0; if (!state_info.no_copy || state_info.copied_shared_data) { CUDA_CALL(cudaStreamWaitEvent(stream_used, *internal_copy_to_storage.front(), 0)); diff --git a/dali/pipeline/operator/builtin/external_source_test.cc b/dali/pipeline/operator/builtin/external_source_test.cc index d7b9ba55cf9..eb6d5c5aff0 100644 --- a/dali/pipeline/operator/builtin/external_source_test.cc +++ b/dali/pipeline/operator/builtin/external_source_test.cc @@ -192,7 +192,7 @@ class ExternalSourceTest : public::testing::WithParamInterface, bool RunOutputs() { DeviceWorkspace ws; exe_->Outputs(&ws); - auto &tensor_gpu_list = ws.Output(0); + auto &tensor_gpu_list = ws.OutputRef(0); TensorList tensor_cpu_list; tensor_cpu_list.Copy(tensor_gpu_list, (ws.has_stream() ? ws.stream() : 0)); CUDA_CALL(cudaStreamSynchronize(ws.has_stream() ? ws.stream() : 0)); @@ -579,9 +579,9 @@ void TestRunExternalSource(Pipeline &pipe, const std::string &name, TensorList output_cpu; pipe.Outputs(&ws); if (dev == "cpu") { - output_cpu.Copy(ws.Output(0), 0); + output_cpu.Copy(ws.OutputRef(0), 0); } else { - output_cpu.Copy(ws.Output(0), 0); + output_cpu.Copy(ws.OutputRef(0), 0); cudaStreamSynchronize(0); } ASSERT_EQ(input_cpu.shape(), output_cpu.shape()); diff --git a/dali/pipeline/operator/builtin/make_contiguous.cu b/dali/pipeline/operator/builtin/make_contiguous.cu index b977566fb19..27aa9b8fa92 100644 --- a/dali/pipeline/operator/builtin/make_contiguous.cu +++ b/dali/pipeline/operator/builtin/make_contiguous.cu @@ -24,7 +24,7 @@ void MakeContiguousMixed::Run(MixedWorkspace &ws) { DALIDataType type = input.type(); for (size_t i = 0; i < input.num_samples(); ++i) { - auto &sample = ws.Input(0, i); + auto &sample = ws.InputRef(0)[i]; size_t sample_bytes = sample.nbytes(); if (coalesced && sample_bytes > COALESCE_THRESHOLD) coalesced = false; @@ -34,7 +34,7 @@ void MakeContiguousMixed::Run(MixedWorkspace &ws) { "in input batch. Cannot copy to contiguous device buffer."); } - auto &output = ws.Output(0); + auto &output = ws.OutputRef(0); if (coalesced) { DomainTimeRange tr("[DALI][MakeContiguousMixed] coalesced", DomainTimeRange::kBlue); cpu_output_buff.Copy(input, 0); diff --git a/dali/pipeline/operator/operator.cc b/dali/pipeline/operator/operator.cc index 53f9a459c08..1f5be4259df 100644 --- a/dali/pipeline/operator/operator.cc +++ b/dali/pipeline/operator/operator.cc @@ -50,7 +50,7 @@ void OperatorBase::EnforceUniformOutputBatchSize( auto ref_batch_size = ws.NumInput() > 0 ? ws.GetInputBatchSize(0) : ws.GetRequestedBatchSize(0); for (int i = 0; i < ws.NumOutput(); i++) { auto output_batch_size = const_cast &>(ws) - .template Output(i) + .template OutputRef(i) .shape() .num_samples(); DALI_ENFORCE(ref_batch_size == output_batch_size, diff --git a/dali/pipeline/operator/operator.h b/dali/pipeline/operator/operator.h index 6e4e0817055..f695e8f5a63 100644 --- a/dali/pipeline/operator/operator.h +++ b/dali/pipeline/operator/operator.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -332,7 +332,7 @@ class Operator : public OperatorBase { for (int data_idx = 0; data_idx < curr_batch_size; ++data_idx) { thread_pool.AddWork([this, &ws, data_idx](int tid) { SampleWorkspace sample; - ws.GetSample(&sample, data_idx, tid); + MakeSampleView(sample, ws, data_idx, tid); this->SetupSharedSampleParams(sample); this->RunImpl(sample); }, -data_idx); // -data_idx for FIFO order diff --git a/dali/pipeline/pipeline_test.cc b/dali/pipeline/pipeline_test.cc index d7d81b0359a..0f4261c4689 100644 --- a/dali/pipeline/pipeline_test.cc +++ b/dali/pipeline/pipeline_test.cc @@ -465,29 +465,29 @@ TEST_F(PipelineTestOnce, TestPresize) { pipe.Outputs(&ws); // we should not presize CPU buffers if they are not pinned - ASSERT_EQ(*(ws.Output(0).tensor(0)), 0); + ASSERT_EQ(*(ws.OutputRef(0).tensor(0)), 0); int ref_presize = RestrictPinnedMemUsage() ? 0 : presize_val_CPU; - ASSERT_EQ(*(ws.Output(1).tensor(0)), ref_presize); + ASSERT_EQ(*(ws.OutputRef(1).tensor(0)), ref_presize); size_t tmp[2]; CUDA_CALL(cudaDeviceSynchronize()); - CUDA_CALL(cudaMemcpy(&tmp, ws.Output(2).tensor(0), + CUDA_CALL(cudaMemcpy(&tmp, ws.OutputRef(2).tensor(0), sizeof(size_t) * 2, cudaMemcpyDefault)); ASSERT_EQ(tmp[0], presize_val_Mixed); ASSERT_EQ(tmp[1], 2 * sizeof(size_t)); - CUDA_CALL(cudaMemcpy(&tmp, ws.Output(3).tensor(0), + CUDA_CALL(cudaMemcpy(&tmp, ws.OutputRef(3).tensor(0), sizeof(size_t) * 2, cudaMemcpyDefault)); ASSERT_EQ(tmp[0], presize_val_GPU); ASSERT_EQ(tmp[1], 2 * sizeof(size_t)); - CUDA_CALL(cudaMemcpy(&tmp, ws.Output(4).tensor(0), + CUDA_CALL(cudaMemcpy(&tmp, ws.OutputRef(4).tensor(0), sizeof(size_t) * 2, cudaMemcpyDefault)); ASSERT_EQ(tmp[0], presize_val_GPU); ASSERT_EQ(tmp[1], 2 * sizeof(size_t)); - CUDA_CALL(cudaMemcpy(&tmp, ws.Output(5).tensor(0), + CUDA_CALL(cudaMemcpy(&tmp, ws.OutputRef(5).tensor(0), sizeof(size_t) * 2, cudaMemcpyDefault)); ASSERT_EQ(tmp[0], presize_val_default); ASSERT_EQ(tmp[1], 2 * sizeof(size_t)); @@ -551,7 +551,7 @@ class PrefetchedPipelineTest : public GenericDecoderTest { ASSERT_EQ(ws.NumOutput(), 1); ASSERT_EQ(ws.NumInput(), 0); ASSERT_TRUE(ws.OutputIsType(0)); - TensorList &res1 = ws.Output(0); + TensorList &res1 = ws.OutputRef(0); for (int j = 0; j < batch_size; ++j) { this->VerifyDecode( res1.template tensor(j), diff --git a/dali/pipeline/workspace/device_workspace.cc b/dali/pipeline/workspace/device_workspace.cc deleted file mode 100644 index 6dc452a898a..00000000000 --- a/dali/pipeline/workspace/device_workspace.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "dali/pipeline/workspace/device_workspace.h" - -#include "dali/pipeline/workspace/sample_workspace.h" - -namespace dali { - -template <> -const TensorList& DeviceWorkspace::Input(int idx) const { - return *CPUInput(idx); -} - -template <> -const TensorList& DeviceWorkspace::Input(int idx) const { - return *GPUInput(idx); -} - -template <> -TensorList& DeviceWorkspace::MutableInput(int idx) { - return *CPUInput(idx); -} - -template <> -TensorList& DeviceWorkspace::MutableInput(int idx) { - return *GPUInput(idx); -} - -template <> -TensorList& DeviceWorkspace::Output(int idx) { - return *CPUOutput(idx); -} - -template <> -TensorList& DeviceWorkspace::Output(int idx) { - return *GPUOutput(idx); -} - -} // namespace dali diff --git a/dali/pipeline/workspace/device_workspace.h b/dali/pipeline/workspace/device_workspace.h index 3498290ba88..0c2d6e84b96 100644 --- a/dali/pipeline/workspace/device_workspace.h +++ b/dali/pipeline/workspace/device_workspace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,8 +41,6 @@ using DeviceOutputType = shared_ptr>; */ class DLL_PUBLIC DeviceWorkspace : public WorkspaceBase { public: - using WorkspaceBase::input_t; - using WorkspaceBase::output_t; DLL_PUBLIC DeviceWorkspace() : stream_(0), event_(nullptr) {} DLL_PUBLIC ~DeviceWorkspace() override = default; @@ -58,33 +56,6 @@ class DLL_PUBLIC DeviceWorkspace : public WorkspaceBase - DLL_PUBLIC const TensorList& Input(int idx) const; - - /** - * @brief Returns the input non-const TensorList at index `idx`. - * - * @throws runtime_error If calling type does not match the type of - * the output at the given index. - */ - template - DLL_PUBLIC TensorList& MutableInput(int idx); - - /** - * @brief Returns the output TensorList at index `idx`. - * - * @throws runtime_error If calling type does not match the type of - * the output at the given index. - */ - template - DLL_PUBLIC TensorList& Output(int idx); - /** * @brief Sets the stream for this workspace. */ diff --git a/dali/pipeline/workspace/host_workspace.cc b/dali/pipeline/workspace/host_workspace.cc deleted file mode 100644 index 4b6cc0f35fa..00000000000 --- a/dali/pipeline/workspace/host_workspace.cc +++ /dev/null @@ -1,66 +0,0 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "dali/pipeline/workspace/host_workspace.h" - -#include "dali/pipeline/workspace/sample_workspace.h" - -namespace dali { - -void HostWorkspace::GetSample(SampleWorkspace* ws, int data_idx, int thread_idx) { - DALI_ENFORCE(ws != nullptr, "Input workspace is nullptr."); - ws->Clear(); - ws->set_data_idx(data_idx); - ws->set_thread_idx(thread_idx); - for (const auto& input_meta : input_index_map_) { - if (input_meta.storage_device == StorageDevice::CPU) { - ws->AddInput(cpu_inputs_[input_meta.index]->tensor_handle(data_idx)); - } else { - ws->AddInput(gpu_inputs_[input_meta.index]->tensor_handle(data_idx)); - } - } - for (const auto& output_meta : output_index_map_) { - if (output_meta.storage_device == StorageDevice::CPU) { - ws->AddOutput(cpu_outputs_[output_meta.index]->tensor_handle(data_idx)); - } else { - ws->AddOutput(gpu_outputs_[output_meta.index]->tensor_handle(data_idx)); - } - } - for (auto& arg_pair : argument_inputs_) { - assert(!arg_pair.second.should_update); - ws->AddArgumentInput(arg_pair.first, arg_pair.second.tvec); - } -} - -template <> -const Tensor& HostWorkspace::Input(int idx, int data_idx) const { - return InputRef(idx)[data_idx]; -} - -template <> -const Tensor& HostWorkspace::Input(int idx, int data_idx) const { - return InputRef(idx)[data_idx]; -} - -template <> -Tensor& HostWorkspace::Output(int idx, int data_idx) { - return OutputRef(idx)[data_idx]; -} - -template <> -Tensor& HostWorkspace::Output(int idx, int data_idx) { - return OutputRef(idx)[data_idx]; -} - -} // namespace dali diff --git a/dali/pipeline/workspace/host_workspace.h b/dali/pipeline/workspace/host_workspace.h index 085370ad576..911ec1963c2 100644 --- a/dali/pipeline/workspace/host_workspace.h +++ b/dali/pipeline/workspace/host_workspace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -42,39 +42,9 @@ class SampleWorkspace; */ class DLL_PUBLIC HostWorkspace : public WorkspaceBase { public: - using WorkspaceBase::input_t; - using WorkspaceBase::output_t; - DLL_PUBLIC inline HostWorkspace() {} DLL_PUBLIC inline ~HostWorkspace() override = default; - /** - * @brief Returns a sample workspace for the given sample - * index and thread index - */ - DLL_PUBLIC void GetSample(SampleWorkspace *ws, int data_idx, int thread_idx); - - /** - * @brief Returns the Tensor at index `data_idx` in the input - * Tensors at index `idx`. - * - * @throws runtime_error if the calling type does not match the - * type of the tensor at the given index - */ - template - DLL_PUBLIC const Tensor& Input(int idx, int data_idx) const; - - /** - * @brief Returns the Tensor at index `data_idx` in the output - * Tensors at index `idx`. - * - * @throws runtime_error if the calling type does not match the - * type of the tensor at the given index - */ - template - DLL_PUBLIC Tensor& Output(int idx, int data_idx); - - DLL_PUBLIC inline void SetThreadPool(ThreadPool *pool) { thread_pool_ = pool; } diff --git a/dali/pipeline/workspace/mixed_workspace.cc b/dali/pipeline/workspace/mixed_workspace.cc deleted file mode 100644 index a54a0e81632..00000000000 --- a/dali/pipeline/workspace/mixed_workspace.cc +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "dali/pipeline/workspace/mixed_workspace.h" - -#include "dali/pipeline/workspace/sample_workspace.h" - -namespace dali { - -template <> -const Tensor& MixedWorkspace::Input(int idx, int data_idx) const { - return InputRef(idx)[data_idx]; -} - -template <> -const Tensor& MixedWorkspace::Input(int idx, int data_idx) const { - return InputRef(idx)[data_idx]; -} - -template <> -TensorList& MixedWorkspace::Output(int idx) { - return OutputRef(idx); -} - -template <> -TensorList& MixedWorkspace::Output(int idx) { - return OutputRef(idx); -} - -} // namespace dali diff --git a/dali/pipeline/workspace/mixed_workspace.h b/dali/pipeline/workspace/mixed_workspace.h index 8bddaf89b90..c70e8875eba 100644 --- a/dali/pipeline/workspace/mixed_workspace.h +++ b/dali/pipeline/workspace/mixed_workspace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,31 +41,9 @@ using MixedOutputType = shared_ptr>; */ class DLL_PUBLIC MixedWorkspace : public WorkspaceBase { public: - using WorkspaceBase::input_t; - using WorkspaceBase::output_t; DLL_PUBLIC inline MixedWorkspace() : stream_(0), event_(nullptr) {} DLL_PUBLIC inline ~MixedWorkspace() override = default; - - /** - * @brief Returns the input Tensor at index `data_idx` in the input - * set of Tensors at index `idx`. - * - * @throws runtime_error If calling type does not match the type of - * the output at the given index. - */ - template - DLL_PUBLIC const Tensor& Input(int idx, int data_idx) const; - - /** - * @brief Returns the output TensorList at index `idx`. - * - * @throws runtime_error If calling type does not match the type of - * the output at the given index. - */ - template - DLL_PUBLIC TensorList& Output(int idx); - /** * @brief Sets the stream for this workspace. */ diff --git a/dali/pipeline/workspace/sample_workspace.cc b/dali/pipeline/workspace/sample_workspace.cc index 445b5ce1c9b..7f9bcf42a56 100644 --- a/dali/pipeline/workspace/sample_workspace.cc +++ b/dali/pipeline/workspace/sample_workspace.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,28 +12,40 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "dali/pipeline/workspace/host_workspace.h" #include "dali/pipeline/workspace/sample_workspace.h" namespace dali { -template <> -const Tensor& SampleWorkspace::Input(int idx) const { - return *CPUInput(idx); -} - -template <> -const Tensor& SampleWorkspace::Input(int idx) const { - return *GPUInput(idx); -} - -template <> -Tensor& SampleWorkspace::Output(int idx) { - return *CPUOutput(idx); -} +void MakeSampleView(SampleWorkspace& sample, HostWorkspace& batch, int data_idx, int thread_idx) { + sample.Clear(); + sample.set_data_idx(data_idx); + sample.set_thread_idx(thread_idx); + int num_inputs = batch.NumInput(); + for (int i = 0; i < num_inputs; i++) { + if (batch.InputIsType(i)) { + auto &input_ref = batch.InputRef(i); + sample.AddInput(&input_ref[data_idx]); + } else { + auto &input_ref = batch.InputRef(i); + sample.AddInput(&input_ref[data_idx]); + } + } -template <> -Tensor& SampleWorkspace::Output(int idx) { - return *GPUOutput(idx); + int num_outputs = batch.NumOutput(); + for (int i = 0; i < num_outputs; i++) { + if (batch.OutputIsType(i)) { + auto &output_ref = batch.OutputRef(i); + sample.AddOutput(&output_ref[data_idx]); + } else { + auto &output_ref = batch.OutputRef(i); + sample.AddOutput(&output_ref[data_idx]); + } + } + for (auto& arg_pair : batch) { + assert(!arg_pair.second.should_update); + sample.AddArgumentInput(arg_pair.first, arg_pair.second.tvec); + } } } // namespace dali diff --git a/dali/pipeline/workspace/sample_workspace.h b/dali/pipeline/workspace/sample_workspace.h index 88b62444575..9e281db5437 100644 --- a/dali/pipeline/workspace/sample_workspace.h +++ b/dali/pipeline/workspace/sample_workspace.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2017-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -32,13 +32,14 @@ namespace dali { template -using SampleInputType = shared_ptr>; +using SampleInputType = Tensor *; template -using SampleOutputType = shared_ptr>; +using SampleOutputType = Tensor *; /** - * @brief SampleWorkspace stores all data required for an operator to - * perform its computation on a single sample. + * @brief SampleWorkspace is workspace used for the legacy, deprcated CPU Operator implementation. + * It has views of all data required for an operator to perform its computation on a single sample, + * the data is actually owned by a corresponding HostWorkspace */ class DLL_PUBLIC SampleWorkspace : public WorkspaceBase { public: @@ -58,20 +59,6 @@ class DLL_PUBLIC SampleWorkspace : public WorkspaceBase - DLL_PUBLIC const Tensor& Input(int idx) const; - - /** - * @brief Returns Tensor with index = data_idx() from the output - * TensorList at index = `idx`. - */ - template - DLL_PUBLIC Tensor& Output(int idx); - int GetInputBatchSize(int) const { DALI_FAIL( "Impossible function: " @@ -140,6 +127,13 @@ class DLL_PUBLIC SampleWorkspace : public WorkspaceBase - typename InputType::element_type& InputRef(int idx) const { + auto& InputRef(int idx) const { return *InputHandle(idx, Backend{}); } template - typename OutputType::element_type& OutputRef(int idx) const { + auto& OutputRef(int idx) const { return *OutputHandle(idx, Backend{}); } diff --git a/dali/test/dali_operator_test.h b/dali/test/dali_operator_test.h index 693eb21f51c..15a11cc9e36 100644 --- a/dali/test/dali_operator_test.h +++ b/dali/test/dali_operator_test.h @@ -142,9 +142,9 @@ GetOutputsFromPipeline(Pipeline &pipeline, const std::string &output_backend) { pipeline.Outputs(&workspace); for (int output_idx = 0; output_idx < workspace.NumOutput(); output_idx++) { if (workspace.OutputIsType(output_idx)) { - ret.emplace_back(&workspace.template Output(output_idx)); + ret.emplace_back(&workspace.template OutputRef(output_idx)); } else { - ret.emplace_back(&workspace.template Output(output_idx)); + ret.emplace_back(&workspace.template OutputRef(output_idx)); } } return ret; diff --git a/dali/test/dali_test_bboxes.h b/dali/test/dali_test_bboxes.h index 1ea10275d53..e7956471abe 100644 --- a/dali/test/dali_test_bboxes.h +++ b/dali/test/dali_test_bboxes.h @@ -95,11 +95,11 @@ class GenericBBoxesTest : public DALISingleOpTest { DeviceWorkspace ws; pipe->Outputs(&ws); - auto images_cpu = this->CopyToHost(ws.Output(0))[0]; - images_cpu->SetLayout(ws.Output(0).GetLayout()); + auto images_cpu = this->CopyToHost(ws.OutputRef(0))[0]; + images_cpu->SetLayout(ws.OutputRef(0).GetLayout()); - auto boxes_cpu = this->CopyToHost(ws.Output(1))[0]; - boxes_cpu->SetLayout(ws.Output(1).GetLayout()); + auto boxes_cpu = this->CopyToHost(ws.OutputRef(1))[0]; + boxes_cpu->SetLayout(ws.OutputRef(1).GetLayout()); return {images_cpu, boxes_cpu}; } @@ -145,15 +145,15 @@ class GenericBBoxesTest : public DALISingleOpTest { std::vector>> ret; ret.push_back(std::make_shared>()); ret.push_back(std::make_shared>()); - ret[0]->Copy(ws.Output(0), 0); - ret[1]->Copy(ws.Output(1), 0); + ret[0]->Copy(ws.OutputRef(0), 0); + ret[1]->Copy(ws.OutputRef(1), 0); return ret; } vector>> Reference( const vector *> &inputs, DeviceWorkspace *ws) override { - auto &from = ws->Output(1); + auto &from = ws->OutputRef(1); auto reference = this->CopyToHost(from); reference[0]->SetLayout(from.GetLayout()); return reference; diff --git a/dali/test/dali_test_matching.h b/dali/test/dali_test_matching.h index a97ac7b647d..134ee8cf274 100644 --- a/dali/test/dali_test_matching.h +++ b/dali/test/dali_test_matching.h @@ -59,9 +59,9 @@ class GenericMatchingTest : public DALISingleOpTest { vector>> Reference(const vector*> &inputs, DeviceWorkspace *ws) override { if (GetOpType() == OpType::GPU) - return this->CopyToHost(ws->Output(1)); + return this->CopyToHost(ws->OutputRef(1)); else - return this->CopyToHost(ws->Output(1)); + return this->CopyToHost(ws->OutputRef(1)); } uint32_t GetTestCheckType() const override { diff --git a/dali/test/dali_test_resize.h b/dali/test/dali_test_resize.h index 30fe20157b8..d31b944e398 100755 --- a/dali/test/dali_test_resize.h +++ b/dali/test/dali_test_resize.h @@ -76,7 +76,7 @@ class GenericResizeTest : public DALISingleOpTest { // determine resize parameters if (useExternSizes) { - const auto *t = ws->Output(1).tensor(i); + const auto *t = ws->OutputRef(1).tensor(i); rsz_h = t[0]; rsz_w = t[1]; } else { diff --git a/dali/test/dali_test_single_op.h b/dali/test/dali_test_single_op.h index cc3d484d229..9e2c2e464ae 100644 --- a/dali/test/dali_test_single_op.h +++ b/dali/test/dali_test_single_op.h @@ -308,9 +308,9 @@ class DALISingleOpTest : public DALITest { auto idx = output_indices[i]; if (output_device == "gpu") { // copy to host - calc_output->Copy(ws->Output(idx), nullptr); + calc_output->Copy(ws->OutputRef(idx), nullptr); } else { - calc_output->Copy(ws->Output(idx), nullptr); + calc_output->Copy(ws->OutputRef(idx), nullptr); } auto& ref_output = res[i]; diff --git a/dali/test/plugins/dummy/dummy.cu b/dali/test/plugins/dummy/dummy.cu index 10409ead010..a11e6a51a58 100644 --- a/dali/test/plugins/dummy/dummy.cu +++ b/dali/test/plugins/dummy/dummy.cu @@ -19,9 +19,9 @@ namespace other_ns { template<> void Dummy<::dali::GPUBackend>::RunImpl(::dali::DeviceWorkspace &ws) { - const auto &input = ws.Input<::dali::GPUBackend>(0); + const auto &input = ws.InputRef<::dali::GPUBackend>(0); const auto &shape = input.shape(); - auto &output = ws.Output<::dali::GPUBackend>(0); + auto &output = ws.OutputRef<::dali::GPUBackend>(0); for (int sample_idx = 0; sample_idx < shape.num_samples(); sample_idx++) { CUDA_CALL(cudaMemcpyAsync( output.raw_mutable_tensor(sample_idx),