Skip to content

Commit

Permalink
Introduce SetConvdescriptors to refactor cudnn/conv_forward.cc
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Jan 17, 2022
1 parent 596333b commit 146464e
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 87 deletions.
91 changes: 4 additions & 87 deletions src/runtime/contrib/cudnn/conv_forward.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,94 +35,11 @@ void ConvolutionForward(int mode, int format, int algo, int dims, int groups, co
const int stride[], const int dilation[], DLTensor* x, DLTensor* w,
DLTensor* y, const std::string& conv_dtype) {
CuDNNThreadEntry* entry_ptr = CuDNNThreadEntry::ThreadLocal();
// Set Mode
entry_ptr->conv_entry.mode = static_cast<cudnnConvolutionMode_t>(mode);
// Set Format
entry_ptr->conv_entry.tensor_format = static_cast<cudnnTensorFormat_t>(format);
// Set Algo
entry_ptr->conv_entry.fwd_algo = static_cast<cudnnConvolutionFwdAlgo_t>(algo);
// Set Device
entry_ptr->conv_entry.device = x->device;
// Set Data Type
entry_ptr->conv_entry.data_type = CuDNNDataType::DLTypeToCuDNNType(String2DLDataType(conv_dtype));
cudnnDataType_t data_type = CuDNNDataType::DLTypeToCuDNNType(x->dtype);
// Dims includes N and C
int full_dims = dims + 2;

std::vector<int> dim(full_dims);
std::vector<int> tensor_stride(full_dims);

// Note: For 2D tenor, using ND setters causes CUDNN_STATUS_NOT_SUPPORTED error
// in following cudnnGetConvolutionForwardWorkspaceSize() when data type is fp16, int

CUDNN_CALL(cudnnSetConvolutionGroupCount(entry_ptr->conv_entry.conv_desc, groups));
if (dims == 2) {
// Set Desc
CUDNN_CALL(cudnnSetConvolution2dDescriptor(
entry_ptr->conv_entry.conv_desc, pad[0], pad[1], stride[0], stride[1], dilation[0],
dilation[1], entry_ptr->conv_entry.mode, entry_ptr->conv_entry.data_type));
int ni, ci, hi, wi;
if (entry_ptr->conv_entry.tensor_format == CUDNN_TENSOR_NHWC) {
ni = 0;
ci = 3;
hi = 1;
wi = 2;
} else {
ni = 0;
ci = 1;
hi = 2;
wi = 3;
}

// Set Filter
CUDNN_CALL(cudnnSetFilter4dDescriptor(
entry_ptr->conv_entry.filter_desc, data_type, entry_ptr->conv_entry.tensor_format,
static_cast<int>(w->shape[ni]), static_cast<int>(w->shape[ci]),
static_cast<int>(w->shape[hi]), static_cast<int>(w->shape[wi])));
// Set Input
CUDNN_CALL(cudnnSetTensor4dDescriptor(
entry_ptr->conv_entry.input_desc, entry_ptr->conv_entry.tensor_format, data_type,
static_cast<int>(x->shape[ni]), static_cast<int>(x->shape[ci]),
static_cast<int>(x->shape[hi]), static_cast<int>(x->shape[wi])));
// Set Output
CUDNN_CALL(cudnnSetTensor4dDescriptor(
entry_ptr->conv_entry.output_desc, entry_ptr->conv_entry.tensor_format, data_type,
static_cast<int>(y->shape[ni]), static_cast<int>(y->shape[ci]),
static_cast<int>(y->shape[hi]), static_cast<int>(y->shape[wi])));
} else {
CUDNN_CALL(cudnnSetConvolutionNdDescriptor(entry_ptr->conv_entry.conv_desc, dims, pad, stride,
dilation, entry_ptr->conv_entry.mode,
entry_ptr->conv_entry.data_type));

// Set Filter
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(w->shape[i]);
}
CUDNN_CALL(cudnnSetFilterNdDescriptor(entry_ptr->conv_entry.filter_desc, data_type,
entry_ptr->conv_entry.tensor_format, full_dims,
dim.data()));
// Set Input
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(x->shape[i]);
}
GetCudnnStride(full_dims, dim.data(), tensor_stride.data());
CUDNN_CALL(cudnnSetTensorNdDescriptor(entry_ptr->conv_entry.input_desc, data_type, full_dims,
dim.data(), tensor_stride.data()));
// Set Output
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(y->shape[i]);
}
GetCudnnStride(full_dims, dim.data(), tensor_stride.data());
CUDNN_CALL(cudnnSetTensorNdDescriptor(entry_ptr->conv_entry.output_desc, data_type, full_dims,
dim.data(), tensor_stride.data()));
}

if (cudnnGetVersion() > 7000) {
CUDNN_CALL(cudnnSetConvolutionMathType(entry_ptr->conv_entry.conv_desc, CUDNN_TENSOR_OP_MATH))
}
SetConvDescriptors(entry_ptr, mode, format, algo, dims, groups, pad, stride, dilation, x, w, y,
conv_dtype);

// Set workspace
size_t workspace_size = 0;
// Set workspace
size_t workspace_size = 0;
CUDNN_CALL(cudnnGetConvolutionForwardWorkspaceSize(
entry_ptr->handle, entry_ptr->conv_entry.input_desc, entry_ptr->conv_entry.filter_desc,
entry_ptr->conv_entry.conv_desc, entry_ptr->conv_entry.output_desc,
Expand Down
93 changes: 93 additions & 0 deletions src/runtime/contrib/cudnn/cudnn_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "cudnn_utils.h"

#include <dmlc/thread_local.h>
#include <tvm/runtime/data_type.h>
#include <tvm/runtime/registry.h>

namespace tvm {
Expand Down Expand Up @@ -160,6 +161,98 @@ void ConvEntry::CleanWorkspace() {
workspace_size = 0;
}

void SetConvDescriptors(CuDNNThreadEntry* entry_ptr, int mode, int format, int algo, int dims,
int groups, const int pad[], const int stride[], const int dilation[],
DLTensor* x, DLTensor* w, DLTensor* y, const std::string& conv_dtype) {
// Set Mode
entry_ptr->conv_entry.mode = static_cast<cudnnConvolutionMode_t>(mode);
// Set Format
entry_ptr->conv_entry.tensor_format = static_cast<cudnnTensorFormat_t>(format);
// Set Algo
entry_ptr->conv_entry.fwd_algo = static_cast<cudnnConvolutionFwdAlgo_t>(algo);
// Set Device
entry_ptr->conv_entry.device = x->device;
// Set Data Type
entry_ptr->conv_entry.data_type =
CuDNNDataType::DLTypeToCuDNNType(runtime::String2DLDataType(conv_dtype));

cudnnDataType_t data_type = CuDNNDataType::DLTypeToCuDNNType(x->dtype);
// Dims includes N and C
int full_dims = dims + 2;

std::vector<int> dim(full_dims);
std::vector<int> tensor_stride(full_dims);

// Note: For 2D tenor, using ND setters causes CUDNN_STATUS_NOT_SUPPORTED error
// in following cudnnGetConvolutionForwardWorkspaceSize() when data type is fp16, int

CUDNN_CALL(cudnnSetConvolutionGroupCount(entry_ptr->conv_entry.conv_desc, groups));
if (dims == 2) {
// Set Desc
CUDNN_CALL(cudnnSetConvolution2dDescriptor(
entry_ptr->conv_entry.conv_desc, pad[0], pad[1], stride[0], stride[1], dilation[0],
dilation[1], entry_ptr->conv_entry.mode, entry_ptr->conv_entry.data_type));
int ni, ci, hi, wi;
if (entry_ptr->conv_entry.tensor_format == CUDNN_TENSOR_NHWC) {
ni = 0;
ci = 3;
hi = 1;
wi = 2;
} else {
ni = 0;
ci = 1;
hi = 2;
wi = 3;
}

// Set Filter
CUDNN_CALL(cudnnSetFilter4dDescriptor(
entry_ptr->conv_entry.filter_desc, data_type, entry_ptr->conv_entry.tensor_format,
static_cast<int>(w->shape[ni]), static_cast<int>(w->shape[ci]),
static_cast<int>(w->shape[hi]), static_cast<int>(w->shape[wi])));
// Set Input
CUDNN_CALL(cudnnSetTensor4dDescriptor(
entry_ptr->conv_entry.input_desc, entry_ptr->conv_entry.tensor_format, data_type,
static_cast<int>(x->shape[ni]), static_cast<int>(x->shape[ci]),
static_cast<int>(x->shape[hi]), static_cast<int>(x->shape[wi])));
// Set Output
CUDNN_CALL(cudnnSetTensor4dDescriptor(
entry_ptr->conv_entry.output_desc, entry_ptr->conv_entry.tensor_format, data_type,
static_cast<int>(y->shape[ni]), static_cast<int>(y->shape[ci]),
static_cast<int>(y->shape[hi]), static_cast<int>(y->shape[wi])));
} else {
CUDNN_CALL(cudnnSetConvolutionNdDescriptor(entry_ptr->conv_entry.conv_desc, dims, pad, stride,
dilation, entry_ptr->conv_entry.mode,
entry_ptr->conv_entry.data_type));

// Set Filter
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(w->shape[i]);
}
CUDNN_CALL(cudnnSetFilterNdDescriptor(entry_ptr->conv_entry.filter_desc, data_type,
entry_ptr->conv_entry.tensor_format, full_dims,
dim.data()));
// Set Input
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(x->shape[i]);
}
GetCudnnStride(full_dims, dim.data(), tensor_stride.data());
CUDNN_CALL(cudnnSetTensorNdDescriptor(entry_ptr->conv_entry.input_desc, data_type, full_dims,
dim.data(), tensor_stride.data()));
// Set Output
for (int i = 0; i < full_dims; i++) {
dim[i] = static_cast<int>(y->shape[i]);
}
GetCudnnStride(full_dims, dim.data(), tensor_stride.data());
CUDNN_CALL(cudnnSetTensorNdDescriptor(entry_ptr->conv_entry.output_desc, data_type, full_dims,
dim.data(), tensor_stride.data()));
}

if (cudnnGetVersion() > 7000) {
CUDNN_CALL(cudnnSetConvolutionMathType(entry_ptr->conv_entry.conv_desc, CUDNN_TENSOR_OP_MATH))
}
}

// SoftmaxEntry

SoftmaxEntry::SoftmaxEntry() { CUDNN_CALL(cudnnCreateTensorDescriptor(&shape_desc)); }
Expand Down
4 changes: 4 additions & 0 deletions src/runtime/contrib/cudnn/cudnn_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ struct CuDNNThreadEntry {
static CuDNNThreadEntry* ThreadLocal(bool check_exists = true);
}; // CuDNNThreadEntry

void SetConvDescriptors(CuDNNThreadEntry* entry_ptr, int mode, int format, int algo, int dims,
int groups, const int pad[], const int stride[], const int dilation[],
DLTensor* x, DLTensor* w, DLTensor* y, const std::string& conv_dtype);

} // namespace contrib
} // namespace tvm

Expand Down

0 comments on commit 146464e

Please sign in to comment.