Skip to content

Commit

Permalink
Handle file descriptor ownership and update documents for GDS (#234)
Browse files Browse the repository at this point in the history
- Close the file descriptor if the file descriptor is owned by the object.
- Update GDS example in the Jupyter notebook.

```python
from cucim.clara.filesystem import CuFileDriver
import cucim.clara.filesystem as fs
import os, cupy as cp, torch

# Create a CuPy array with size 10 (in bytes)
cp_arr = cp.ones(10, dtype=cp.uint8)
# Create a PyTorch array with size 10 (in bytes)
cuda0 = torch.device('cuda:0')
torch_arr = torch.ones(10, dtype=torch.uint8, device=cuda0)

# Using CuFileDriver
# (Opening a file with O_DIRECT flag is required for GDS)
fno = os.open("input.raw", os.O_RDONLY | os.O_DIRECT)
with CuFileDriver(fno) as fd:
  # Read 8 bytes starting from file offset 0 into buffer offset 2
  read_count = fd.pread(cp_arr, 8, 0, 2)
  # Read 10 bytes starting from file offset 3
  read_count = fd.pread(torch_arr, 10, 3)
os.close(fno)

# Another way of opening file with cuFile
with fs.open("output.raw", "w") as fd:
  # Write 10 bytes from cp_array to file starting from offset 5
  write_count = fd.pwrite(cp_arr, 10, 5)
  #############################################
  # <=== file descriptor created by fs.open() would be closed when exiting the scope
  #############################################
```


Fixes #233

Authors:
  - Gigon Bae (https://github.com/gigony)

Approvers:
  - https://github.com/jakirkham

URL: #234
  • Loading branch information
gigony authored Mar 24, 2022
1 parent 80b5c31 commit 4f79b07
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 56 deletions.
5 changes: 4 additions & 1 deletion cpp/include/cucim/filesystem/cufile_driver.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -183,6 +183,9 @@ class EXPORT_VISIBLE CuFileDriver : public std::enable_shared_from_this<CuFileDr

~CuFileDriver();

// To allow 'handle_' field
friend std::shared_ptr<CuFileDriver> open(const char* file_path, const char* flags, mode_t mode);

private:
static Mutex driver_mutex_; // TODO: not used yet.

Expand Down
18 changes: 15 additions & 3 deletions cpp/include/cucim/filesystem/file_handle.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,6 +19,9 @@


#include "../macros/defines.h"

#include <unistd.h>

#include <cstdio>
#include <cstdint>
#include <memory>
Expand All @@ -32,7 +35,7 @@ typedef void* CuCIMFileHandle_share;
typedef void* CuCIMFileHandle_ptr;
typedef bool (*CuCIMFileHandleDeleter)(CuCIMFileHandle_ptr);

enum class FileHandleType: uint16_t
enum class FileHandleType : uint16_t
{
kUnknown = 0,
kPosix = 1,
Expand All @@ -55,7 +58,8 @@ struct EXPORT_VISIBLE CuCIMFileHandle : public std::enable_shared_from_this<CuCI
void* client_data,
uint64_t dev,
uint64_t ino,
int64_t mtime);
int64_t mtime,
bool own_fd);

~CuCIMFileHandle()
{
Expand All @@ -70,6 +74,13 @@ struct EXPORT_VISIBLE CuCIMFileHandle : public std::enable_shared_from_this<CuCI
deleter(this);
deleter = nullptr;
}

if (own_fd && fd >=0)
{
::close(fd);
fd = -1;
own_fd = false;
}
}

CuCIMFileHandleDeleter set_deleter(CuCIMFileHandleDeleter deleter)
Expand All @@ -86,6 +97,7 @@ struct EXPORT_VISIBLE CuCIMFileHandle : public std::enable_shared_from_this<CuCI
uint64_t dev = 0;
uint64_t ino = 0;
int64_t mtime = 0;
bool own_fd = false; /// whether if the file descriptor is created internally by the driver
CuCIMFileHandleDeleter deleter = nullptr;
};
#else
Expand Down
14 changes: 11 additions & 3 deletions cpp/src/filesystem/cufile_driver.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -162,7 +162,15 @@ std::shared_ptr<CuFileDriver> open(const char* file_path, const char* flags, mod
}
}

return std::make_shared<CuFileDriver>(fd, no_gds, use_mmap, file_path);
const auto handle = std::make_shared<CuFileDriver>(fd, no_gds, use_mmap, file_path);

// Set ownership to the file descriptor
if (handle->handle_)
{
handle->handle_->own_fd = true;
}

return handle;
}

std::shared_ptr<CuFileDriver> open(int fd, bool no_gds, bool use_mmap)
Expand Down Expand Up @@ -201,7 +209,7 @@ CuFileDriver::CuFileDriver(int fd, bool no_gds, bool use_mmap, const char* file_
file_path_cstr[file_path_.size()] = '\0';
handle_ = std::make_shared<CuCIMFileHandle>(fd, nullptr, file_type, const_cast<char*>(file_path_cstr), this,
static_cast<uint64_t>(st.st_dev), static_cast<uint64_t>(st.st_ino),
static_cast<int64_t>(st.st_mtim.tv_nsec));
static_cast<int64_t>(st.st_mtim.tv_nsec), false);

CUfileError_t status;
CUfileDescr_t cf_descr{}; // It is important to set zero!
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/filesystem/file_handle.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -29,7 +29,8 @@ CuCIMFileHandle::CuCIMFileHandle()
hash_value(0),
dev(0),
ino(0),
mtime(0)
mtime(0),
own_fd(false)
{
}

Expand All @@ -53,8 +54,9 @@ CuCIMFileHandle::CuCIMFileHandle(int fd,
void* client_data,
uint64_t dev,
uint64_t ino,
int64_t mtime)
: fd(fd), cufile(cufile), type(type), path(path), client_data(client_data), dev(dev), ino(ino), mtime(mtime)
int64_t mtime,
bool own_fd)
: fd(fd), cufile(cufile), type(type), path(path), client_data(client_data), dev(dev), ino(ino), mtime(mtime), own_fd(own_fd)
{
hash_value = cucim::codec::splitmix64_3(dev, ino, mtime);
}
90 changes: 45 additions & 45 deletions notebooks/Accessing_File_with_GDS.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
"source": [
"## Prerequisite\n",
"\n",
"[NVIDIA® GPUDirect® Storage (GDS)](https://developer.nvidia.com/gpudirect-storage) needs to be installed to use GDS feature (Since CUDA Toolkit 11.4, GDS client package has been available.)\n",
"\n",
"[NVIDIA® GPUDirect® Storage (GDS)](https://developer.nvidia.com/gpudirect-storage) needs to be installed to use GDS feature (Since CUDA 11.4, GDS has been available as a GA release)\n",
"\n",
". File access APIs would still work without GDS but you won't see the speed up. \\\n",
"File access APIs would still work without the GDS host (kernel) packages but you won't see the speed up.\n",
"Please follow the [release note](https://docs.nvidia.com/gpudirect-storage/release-notes/index.html) or the [installation guide](https://docs.nvidia.com/gpudirect-storage/troubleshooting-guide/index.html#abstract) to install GDS in your host system.\n",
"- Note:: During the GDS installation, you would need MOFED (Mellanox OpenFabrics Enterprise Distribution) installed. MOFED is available at https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed.\n",
"\n",
"- Note:: During the GDS prerequisite installation (step 3 of [the installation guide](https://docs.nvidia.com/gpudirect-storage/troubleshooting-guide/index.html#install-prereqs>)), you would need MOFED (Mellanox OpenFabrics Enterprise Distribution) installed. MOFED is available at https://www.mellanox.com/products/infiniband-drivers/linux/mlnx_ofed.\n",
"\n",
"\n",
"\n",
"The following examples assumes that files loaded are mounted on the NVMe storage device and assumes that CuPy and PyTorch packages are installed.\n",
"\n",
Expand All @@ -39,7 +41,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -75,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -103,14 +105,16 @@
"from cucim.clara.filesystem import CuFileDriver\n",
"\n",
"fno = os.open( \"input/image.tif\", os.O_RDONLY | os.O_DIRECT)\n",
"fno2 = os.dup(fno) \n",
"fno2 = os.dup(fno)\n",
"\n",
"fd = CuFileDriver(fno, False)\n",
"fd.close()\n",
"os.close(fno)\n",
"\n",
"# Do not use GDS even when GDS can be supported for the file.\n",
"fd2 = CuFileDriver(fno2, True)\n",
"fd2.close()\n",
"os.close(fno2)\n",
"\n",
"help(CuFileDriver.__init__)"
]
Expand Down Expand Up @@ -153,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -162,7 +166,7 @@
"True"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -171,11 +175,11 @@
"import cucim.clara.filesystem as fs\n",
"\n",
"fd = fs.open(\"input/image.tif\", \"r\")\n",
"fs.close(fd)\n",
"fs.close(fd) # same with fd.close()\n",
"\n",
"# Open file without using GDS\n",
"fd2 = fs.open(\"input/image.tif\", \"rp\")\n",
"fs.close(fd2)\n"
"fs.close(fd2) # same with fd2.close()\n"
]
},
{
Expand Down Expand Up @@ -203,7 +207,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -215,18 +219,9 @@
"torch_arr cnt: 7 content: tensor([104, 105, 106, 107, 108, 109, 110, 108, 109, 110], dtype=torch.uint8)\n",
"output.raw cnt: 10 content: [0, 0, 0, 0, 0, 104, 105, 106, 107, 108, 109, 110, 108, 109, 110]\n",
"\n",
"np_arr cnt: 10 content: [ 0 0 0 0 0 104 105 106 107 108]\n",
"np_arr cnt: 10 content: [ 0 0 0 0 0 104 105 106 107 108]\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand All @@ -253,11 +248,13 @@
"read_count = fd.pread(torch_arr.data_ptr(), 10, 3) # read 10 bytes starting from file offset 3\n",
"print(\"{:10} cnt: {} content: {}\".format(\"torch_arr\", read_count, torch_arr))\n",
"fd.close()\n",
"os.close(fno)\n",
"\n",
"fno = os.open(\"output.raw\", os.O_RDWR | os.O_CREAT | os.O_TRUNC)\n",
"fd = CuFileDriver(fno)\n",
"write_count = fd.pwrite(np_arr, 10, 5) # write 10 bytes from np_array to file starting from offset 5\n",
"fd.close()\n",
"os.close(fno)\n",
"print(\"{:10} cnt: {} content: {}\".format(\"output.raw\", write_count, list(open(\"output.raw\", \"rb\").read())))\n",
"\n",
"\n",
Expand All @@ -266,7 +263,12 @@
"fd = fs.open(\"output.raw\", \"r\")\n",
"read_count = fs.pread(fd, np_arr, 10, 0) # read 10 bytes starting from offset 0\n",
"print(\"{:10} cnt: {} content: {}\".format(\"np_arr\", read_count, np_arr))\n",
"fs.close(fd) # same with fd.close()"
"fs.close(fd) # same with fd.close()\n",
"\n",
"# Using 'with' statement\n",
"with fs.open(\"output.raw\", \"r\") as fd:\n",
" read_count = fd.pread(np_arr, 10, 0) # read 10 bytes starting from offset 0\n",
" print(\"{:10} cnt: {} content: {}\".format(\"np_arr\", read_count, np_arr))\n"
]
},
{
Expand All @@ -281,7 +283,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [
{
Expand All @@ -290,22 +292,13 @@
"text": [
"np_arr cnt: 8 content: [ 1 2 101 102 103 104 105 106 107 108]\n",
"cp_arr cnt: 10 content: [ 0 0 0 0 0 104 105 106 107 108]\n",
"torch_arr cnt: 7 content: tensor([104, 105, 106, 107, 108, 109, 110, 0, 0, 0], device='cuda:0',\n",
"torch_arr cnt: 7 content: tensor([104, 105, 106, 107, 108, 109, 110, 108, 109, 110], device='cuda:0',\n",
" dtype=torch.uint8)\n",
"output.raw cnt: 10 content: [0, 0, 0, 0, 0, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110]\n",
"\n",
"cp_arr cnt: 10 content: [ 0 0 0 0 0 104 105 106 107 108]\n"
"cp_arr cnt: 10 content: [ 0 0 0 0 0 101 102 103 104 105]\n",
"np_arr cnt: 10 content: [ 0 0 0 0 0 101 102 103 104 105]\n"
]
},
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
Expand Down Expand Up @@ -337,24 +330,31 @@
"read_count = fd.pread(torch_arr, 10, 3) # read 10 bytes starting from offset 3\n",
"print(\"{:20} cnt: {} content: {}\".format(\"torch_arr\", read_count, torch_arr))\n",
"fd.close()\n",
"os.close(fno)\n",
"\n",
"fno = os.open(\"output.raw\", os.O_RDWR | os.O_CREAT | os.O_TRUNC)\n",
"fd = CuFileDriver(fno)\n",
"write_count = fd.pwrite(cp_arr, 10, 5) # write 10 bytes from np_array to file starting from offset 5\n",
"fd.close()\n",
"os.close(fno)\n",
"print(\"{:20} cnt: {} content: {}\".format(\"output.raw\", write_count, list(open(\"output.raw\", \"rb\").read())))\n",
"\n",
"print()\n",
"# Using filesystem package\n",
"fd = fs.open(\"output.raw\", \"r\")\n",
"read_count = fs.pread(fd, cp_arr, 10, 0) # read 10 bytes starting from offset 0\n",
"print(\"{:20} cnt: {} content: {}\".format(\"cp_arr\", read_count, np_arr))\n",
"fs.close(fd) # same with fd.close()"
"print(\"{:20} cnt: {} content: {}\".format(\"cp_arr\", read_count, cp_arr))\n",
"fs.close(fd) # same with fd.close()\n",
"\n",
"# Using 'with' statement\n",
"with fs.open(\"output.raw\", \"r\") as fd:\n",
" read_count = fd.pread(cp_arr, 10, 0) # read 10 bytes starting from offset 0\n",
" print(\"{:10} cnt: {} content: {}\".format(\"np_arr\", read_count, cp_arr))\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -366,10 +366,10 @@
" 'stream': 1,\n",
" 'version': 3,\n",
" 'strides': None,\n",
" 'data': (140035445751808, False)}"
" 'data': (139779203137536, False)}"
]
},
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -380,7 +380,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -389,11 +389,11 @@
"{'typestr': '|u1',\n",
" 'shape': (10,),\n",
" 'strides': None,\n",
" 'data': (140035106013184, False),\n",
" 'data': (139776277610496, False),\n",
" 'version': 2}"
]
},
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -490,7 +490,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.9.10"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 4f79b07

Please sign in to comment.