Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cudnn-frontend crashes in case of MAX_OPGRAPH_OPS violation #108

Open
gritukan opened this issue Sep 7, 2024 · 1 comment
Open

cudnn-frontend crashes in case of MAX_OPGRAPH_OPS violation #108

gritukan opened this issue Sep 7, 2024 · 1 comment
Assignees

Comments

@gritukan
Copy link

gritukan commented Sep 7, 2024

Describe the bug
Consider a graph with more than MAX_OPGRAPH_OPS nodes, for example in this code

#include "cudnn-frontend/include/cudnn_frontend.h"

namespace fe = cudnn_frontend;

int main()
{
    cudnnHandle_t handle;
    assert(cudnnCreate(&handle) == CUDNN_STATUS_SUCCESS);

    auto graph = std::make_shared<fe::graph::Graph>();
    auto x = graph->tensor(
        fe::graph::Tensor_attributes()
            .set_name("x")
            .set_dim({1, 1, 1})
            .set_stride({1, 1, 1})
            .set_data_type(fe::DataType_t::FLOAT));
    auto y = graph->tensor(
        fe::graph::Tensor_attributes()
            .set_name("y")
            .set_dim({1, 1, 1})
            .set_stride({1, 1, 1})
            .set_data_type(fe::DataType_t::FLOAT));

    auto inX = x;
    auto inY = y;

    for (int i = 0; i < 60; ++i) {
        auto sum = graph->pointwise(
            x,
            y,
            fe::graph::Pointwise_attributes()
                .set_mode(fe::PointwiseMode_t::ADD)
                .set_compute_data_type(fe::DataType_t::FLOAT));
        sum->set_data_type(fe::DataType_t::FLOAT);

        x = y;
        y = sum;
    }

    y->set_output(true);

    assert(graph->validate().is_good());
    auto r = graph->build_operation_graph(handle);
    if (!r.is_good()) {
        std::cerr << r.get_message() << std::endl;
        return 0;
    }

    assert(graph->create_execution_plans({fe::HeurMode_t::A}).is_good());
    assert(graph->build_plans(handle, fe::BuildPlanPolicy_t::ALL).is_good());

    float One = 1;

    void* inXPtr;
    assert(cudaMalloc(&inXPtr, sizeof(float)) == cudaSuccess);
    assert(cudaMemcpy(inXPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);

    void* inYPtr;
    assert(cudaMalloc(&inYPtr, sizeof(float)) == cudaSuccess);
    assert(cudaMemcpy(inYPtr, &One, sizeof(float), cudaMemcpyHostToDevice) == cudaSuccess);

    void* outPtr;
    assert(cudaMalloc(&outPtr, sizeof(float)) == cudaSuccess);

    void* workspacePtr;
    assert(cudaMalloc(&workspacePtr, graph->get_workspace_size()) == cudaSuccess);

    std::unordered_map<std::shared_ptr<fe::graph::Tensor_attributes>, void*> tensorMap;
    tensorMap[inX] = inXPtr;
    tensorMap[inY] = inYPtr;
    tensorMap[y] = outPtr;

    r = graph->execute(handle, tensorMap, workspacePtr);
    if (!r.is_good()) {
        std::cerr << r.get_message() << std::endl;
        return 0;
    }

    assert(cudaDeviceSynchronize() == cudaSuccess);

    float outData;
    assert(cudaMemcpy(&outData, outPtr, sizeof(float), cudaMemcpyDeviceToHost) == cudaSuccess);
    std::cout << outData << std::endl;

    assert(cudaFree(inXPtr) == cudaSuccess);
    assert(cudaFree(inYPtr) == cudaSuccess);
    assert(cudaFree(outPtr) == cudaSuccess);
    assert(cudaFree(workspacePtr) == cudaSuccess);

    assert(cudnnDestroy(handle) == CUDNN_STATUS_SUCCESS);
}

Instead of giving an error during compilation it crashes with the following trace because of the out-of-bounds access to m_operationGraph.ops.

(gdb) bt
#0  0x00005555555a56d5 in __gnu_cxx::__exchange_and_add (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:66
#1  __gnu_cxx::__exchange_and_add_dispatch (__val=-1, __mem=0xf8) at /usr/include/c++/11/ext/atomicity.h:101
#2  std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release (this=0xf0) at /usr/include/c++/11/bits/shared_ptr_base.h:165
#3  0x0000555555596de3 in std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count (this=0x7fffffffcfe8, 
    __in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:705
#4  0x000055555556decc in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::~__shared_ptr (
    this=0x7fffffffcfe0, __in_chrg=<optimized out>) at /usr/include/c++/11/bits/shared_ptr_base.h:1154
#5  0x00005555555a75e6 in std::__shared_ptr<cudnn_frontend::OpaqueBackendPointer, (__gnu_cxx::_Lock_policy)2>::operator= (
    this=0x7fffffffd4e8, __r=...) at /usr/include/c++/11/bits/shared_ptr_base.h:1250
#6  0x000055555559883e in std::shared_ptr<cudnn_frontend::OpaqueBackendPointer>::operator= (this=0x7fffffffd4e8, __r=...)
    at /usr/include/c++/11/bits/shared_ptr.h:385
#7  0x0000555555578ef1 in cudnn_frontend::OperationGraphBuilder_v8::setOperationGraph (this=0x7fffffffd170, numOps_=60, 
    ops_=0x555555ab2f80) at cudnn-frontend/include/cudnn_frontend_OperationGraph.h:157
#8  0x0000555555586552 in cudnn_frontend::ICudnn::create_cudnn_operation_graph (this=0x5555562310f8, handle=0x5555557379c0)
    at cudnn-frontend/include/cudnn_frontend/node/../cudnn_interface.h:123
#9  0x0000555555592b85 in cudnn_frontend::graph::Graph::build_operation_graph (this=0x5555562310f0, handle=0x5555557379c0)
    at cudnn-frontend/include/cudnn_frontend/graph_interface.h:265
#10 0x00005555555681d4 in main () at example.cpp:43

Expected behavior

I expect to get an error in this case, so my code can fallback to another computational engine.

Also, I expect this restriction to be mentioned in the documentation.

@Anerudhan Anerudhan self-assigned this Sep 24, 2024
@Anerudhan
Copy link
Collaborator

Will be addressed in next release.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants