Skip to content

Commit

Permalink
Merge branch 'master' into gb_cuda_examples2
Browse files Browse the repository at this point in the history
  • Loading branch information
mfbalin authored Jan 23, 2024
2 parents 9ad218c + 2fedcdc commit 1240697
Show file tree
Hide file tree
Showing 15 changed files with 115 additions and 79 deletions.
18 changes: 9 additions & 9 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v231103_1700"
image "dgllib/dgl-ci-cpu:v240123_1000"
args "-u root"
alwaysPull true
}
Expand All @@ -337,7 +337,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-gpu:cu116_v231103_1700"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
args "-u root"
alwaysPull true
}
Expand Down Expand Up @@ -392,7 +392,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v231103_1700"
image "dgllib/dgl-ci-cpu:v240123_1000"
args "-u root"
alwaysPull true
}
Expand All @@ -411,7 +411,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v231103_1700"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand Down Expand Up @@ -464,7 +464,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v231103_1700"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
args "-u root --runtime nvidia"
alwaysPull true
}
Expand All @@ -489,7 +489,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v231103_1700"
image "dgllib/dgl-ci-cpu:v240123_1000"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -542,7 +542,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-gpu"
image "dgllib/dgl-ci-gpu:cu116_v231103_1700"
image "dgllib/dgl-ci-gpu:cu116_v240123_1000"
args "-u root --runtime nvidia --shm-size=8gb"
alwaysPull true
}
Expand Down Expand Up @@ -571,7 +571,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v231103_1700"
image "dgllib/dgl-ci-cpu:v240123_1000"
args "-u root --shm-size=4gb"
alwaysPull true
}
Expand Down Expand Up @@ -618,7 +618,7 @@ pipeline {
agent {
docker {
label "dgl-ci-linux-cpu"
image "dgllib/dgl-ci-cpu:v231103_1700"
image "dgllib/dgl-ci-cpu:v240123_1000"
args "-u root"
alwaysPull true
}
Expand Down
1 change: 1 addition & 0 deletions docker/install/conda_env/torch_cpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ torch==1.13.0+cpu
torchdata
torcheval
torchmetrics
torch_geometric
tqdm
1 change: 1 addition & 0 deletions docker/install/conda_env/torch_gpu_pip.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ torch==1.13.0+cu116
torchdata
torcheval
torchmetrics
torch_geometric
tqdm
16 changes: 8 additions & 8 deletions examples/core/rgcn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9

| Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) |
| ------------ | ------------- | ----------- | ------------- | ------------------------ |
| ~1.1GB | ~5GB | 0 | 0GB | ~243s |
| ~1.1GB | ~3GB | 1 | 4.4GB | ~81s |
| ~1.1GB | ~7GB | 0 | 0GB | ~233s |
| ~1.1GB | ~5GB | 1 | 4.5GB | ~73.6s |

### Accuracies
```
Epoch: 01, Loss: 2.3302, Valid: 47.76%, Test: 46.58%
Epoch: 02, Loss: 1.5486, Valid: 48.31%, Test: 47.12%
Epoch: 03, Loss: 1.1469, Valid: 46.43%, Test: 45.18%
Test accuracy 45.1227
Epoch: 01, Loss: 2.3386, Valid: 47.67%, Test: 46.96%
Epoch: 02, Loss: 1.5563, Valid: 47.66%, Test: 47.02%
Epoch: 03, Loss: 1.1557, Valid: 46.58%, Test: 45.42%
Test accuracy 45.3850
```

## Run on `ogb-lsc-mag240m` dataset
Expand All @@ -54,8 +54,8 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9

| Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) |
| ------------ | ------------- | ----------- | ------------- | ------------------------ |
| ~404GB | ~60GB | 0 | 0GB | ~216s |
| ~404GB | ~60GB | 1 | 7GB | ~157s |
| ~404GB | ~72GB | 0 | 0GB | ~325s |
| ~404GB | ~61GB | 1 | 14GB | ~178s |

### Accuracies
```
Expand Down
16 changes: 10 additions & 6 deletions examples/multigpu/node_classification_sage.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,16 @@ def train(
use_uva,
):
# Instantiate a neighbor sampler
sampler = NeighborSampler(
[10, 10, 10],
prefetch_node_feats=["feat"],
prefetch_labels=["label"],
fused=(args.mode != "benchmark"),
)
if args.mode == "benchmark":
# A work-around to prevent CUDA running error. For more details, please
# see https://github.com/dmlc/dgl/issues/6697.
sampler = NeighborSampler([10, 10, 10], fused=False)
else:
sampler = NeighborSampler(
[10, 10, 10],
prefetch_node_feats=["feat"],
prefetch_labels=["label"],
)
train_dataloader = DataLoader(
g,
train_idx,
Expand Down
6 changes: 4 additions & 2 deletions examples/sampling/graphbolt/node_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,9 @@ def evaluate(args, model, graph, features, itemset, num_classes):


def train(args, graph, features, train_set, valid_set, num_classes, model):
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=5e-4
)
dataloader = create_dataloader(
graph=graph,
features=features,
Expand Down Expand Up @@ -343,7 +345,7 @@ def parse_args():
parser.add_argument(
"--lr",
type=float,
default=0.0005,
default=1e-3,
help="Learning rate for optimization.",
)
parser.add_argument(
Expand Down
24 changes: 12 additions & 12 deletions examples/sampling/graphbolt/rgcn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9

| Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) |
| ------------ | ------------- | ----------- | ------------- | ------------------------ |
| ~1.1GB | ~4.5GB | 0 | 0GB | ~235s |
| ~1.1GB | ~2GB | 1 | 4.4GB | ~60s |
| ~1.1GB | ~5.3GB | 0 | 0GB | ~230s |
| ~1.1GB | ~3GB | 1 | 3.87GB | ~64.6s |

### Accuracies
```
Epoch: 01, Loss: 2.6736, Valid accuracy: 42.21%
Epoch: 02, Loss: 2.0809, Valid accuracy: 42.51%
Epoch: 03, Loss: 1.8143, Valid accuracy: 42.76%
Test accuracy 41.4817
Epoch: 01, Loss: 2.3434, Valid accuracy: 48.23%
Epoch: 02, Loss: 1.5646, Valid accuracy: 48.49%
Epoch: 03, Loss: 1.1633, Valid accuracy: 45.79%
Test accuracy 44.6792
```

## Run on `ogb-lsc-mag240m` dataset
Expand All @@ -47,17 +47,17 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9

> **note:**
`buffer/cache` are highly used during train, it's about 300GB. If more RAM is available, more `buffer/cache` will be consumed as graph size is about 55GB and feature data is about 350GB.
One more thing, first epoch is quite slow as `buffer/cache` is not ready yet. For GPU train, first epoch takes **34:56min, 1.93s/it**.
One more thing, first epoch is quite slow as `buffer/cache` is not ready yet. For GPU train, first epoch takes **1030s**.
Even in following epochs, time consumption varies.

| Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) |
| ------------ | ------------- | ----------- | ------------- | ------------------------ |
| ~404GB | ~55GB | 0 | 0GB | ~197s |
| ~404GB | ~55GB | 1 | 7GB | ~119s |
| ~404GB | ~67GB | 0 | 0GB | ~248s |
| ~404GB | ~60GB | 1 | 15GB | ~166s |

### Accuracies
```
Epoch: 01, Loss: 2.3038, Valid accuracy: 46.33%
Epoch: 02, Loss: 2.1160, Valid accuracy: 46.47%
Epoch: 03, Loss: 2.0847, Valid accuracy: 48.38%
Epoch: 01, Loss: 2.1432, Valid accuracy: 50.21%
Epoch: 02, Loss: 1.9267, Valid accuracy: 50.77%
Epoch: 03, Loss: 1.8797, Valid accuracy: 53.38%
```
13 changes: 13 additions & 0 deletions python/dgl/graphbolt/minibatch.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,19 @@ class MiniBatch:
or other graph components depending on the specific context.
"""

indexes: Union[torch.Tensor, Dict[str, torch.Tensor]] = None
"""
Indexes associated with seed nodes / node pairs in the graph, which
indicates to which query a seed node / node pair belongs.
- If `indexes` is a tensor: It indicates the graph is homogeneous. The
value should be corresponding query to given 'seed_nodes' or
'node_pairs'.
- If `indexes` is a dictionary: It indicates the graph is
heterogeneous. The keys should be node or edge type and the value should
be corresponding query to given 'seed_nodes' or 'node_pairs'. For each
key, indexes are consecutive integers starting from zero.
"""

negative_srcs: Union[torch.Tensor, Dict[str, torch.Tensor]] = None
"""
Representation of negative samples for the head nodes in the link
Expand Down
55 changes: 29 additions & 26 deletions python/dgl/graphbolt/sampled_subgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ def sampled_csc(
self,
) -> Union[CSCFormatBase, Dict[str, CSCFormatBase],]:
"""Returns the node pairs representing edges in csc format.
- If `sampled_csc` is a CSCFormatBase: It should be in the csc format.
`indptr` stores the index in the data array where each column
starts. `indices` stores the row indices of the non-zero elements.
- If `sampled_csc` is a dictionary: The keys should be edge type and
the values should be corresponding node pairs. The ids inside is
heterogeneous ids.
- If `sampled_csc` is a CSCFormatBase: It should be in the csc
format. `indptr` stores the index in the data array where each
column starts. `indices` stores the row indices of the non-zero
elements.
- If `sampled_csc` is a dictionary: The keys should be edge type and
the values should be corresponding node pairs. The ids inside is
heterogeneous ids.
Examples
--------
1. Homogeneous graph.
>>> import dgl.graphbolt as gb
>>> import torch
>>> sampled_csc = gb.CSCFormatBase(
Expand All @@ -51,6 +53,7 @@ def sampled_csc(
)
2. Heterogeneous graph.
sampled_csc = {"A:relation:B": gb.CSCFormatBase(
... indptr=torch.tensor([0, 1, 2, 3]),
... indices=torch.tensor([0, 1, 2]))}
Expand All @@ -69,11 +72,11 @@ def original_column_node_ids(
Column's reverse node ids in the original graph. A graph structure
can be treated as a coordinated row and column pair, and this is
the mapped ids of the column.
- If `original_column_node_ids` is a tensor: It represents the original
node ids.
- If `original_column_node_ids` is a dictionary: The keys should be
node type and the values should be corresponding original
heterogeneous node ids.
- If `original_column_node_ids` is a tensor: It represents the
original node ids.
- If `original_column_node_ids` is a dictionary: The keys should be
node type and the values should be corresponding original
heterogeneous node ids.
If present, it means column IDs are compacted, and `sampled_csc`
column IDs match these compacted ones.
"""
Expand All @@ -87,11 +90,11 @@ def original_row_node_ids(
Row's reverse node ids in the original graph. A graph structure
can be treated as a coordinated row and column pair, and this is
the mapped ids of the row.
- If `original_row_node_ids` is a tensor: It represents the original
node ids.
- If `original_row_node_ids` is a dictionary: The keys should be node
type and the values should be corresponding original heterogeneous
node ids.
- If `original_row_node_ids` is a tensor: It represents the original
node ids.
- If `original_row_node_ids` is a dictionary: The keys should be node
type and the values should be corresponding original heterogeneous
node ids.
If present, it means row IDs are compacted, and `sampled_csc`
row IDs match these compacted ones."""
return None
Expand All @@ -101,11 +104,11 @@ def original_edge_ids(self) -> Union[torch.Tensor, Dict[str, torch.Tensor]]:
"""Returns corresponding reverse edge ids the original graph.
Reverse edge ids in the original graph. This is useful when edge
features are needed.
- If `original_edge_ids` is a tensor: It represents the original edge
ids.
- If `original_edge_ids` is a dictionary: The keys should be edge type
and the values should be corresponding original heterogeneous edge
ids.
- If `original_edge_ids` is a tensor: It represents the original edge
ids.
- If `original_edge_ids` is a dictionary: The keys should be edge
type and the values should be corresponding original heterogeneous
edge ids.
"""
return None

Expand All @@ -119,17 +122,17 @@ def exclude_edges(
):
r"""Exclude edges from the sampled subgraph.
This function can be used with sampled subgraphs, regardless of whether they
have compacted row/column nodes or not. If the original subgraph has
compacted row or column nodes, the corresponding row or column nodes in the
returned subgraph will also be compacted.
This function can be used with sampled subgraphs, regardless of
whether they have compacted row/column nodes or not. If the original
subgraph has compacted row or column nodes, the corresponding row or
column nodes in the returned subgraph will also be compacted.
Parameters
----------
self : SampledSubgraph
The sampled subgraph.
edges : Union[Tuple[torch.Tensor, torch.Tensor],
Dict[str, Tuple[torch.Tensor, torch.Tensor]]]
Dict[str, Tuple[torch.Tensor, torch.Tensor]]]
Edges to exclude. If sampled subgraph is homogeneous, then `edges`
should be a pair of tensors representing the edges to exclude. If
sampled subgraph is heterogeneous, then `edges` should be a
Expand Down
1 change: 1 addition & 0 deletions script/dgl_dev.yml.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies:
- torchdata>=0.5.0
- torcheval
- torchmetrics
- torch_geometric
- tqdm
- boto3 # AWS SDK for python
- sphinx==4.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,8 @@ def test_sample_neighbors_homo(labor, is_pinned):
0 1 0 0 1
1 0 0 0 1
"""
if F._default_context_str == "cpu" and is_pinned:
pytest.skip("Pinning is not meaningful without a GPU.")
# Initialize data.
total_num_edges = 12
indptr = torch.LongTensor([0, 3, 5, 7, 9, 12])
Expand All @@ -1631,12 +1633,9 @@ def test_sample_neighbors_homo(labor, is_pinned):
assert indptr[-1] == len(indices)

# Construct FusedCSCSamplingGraph.
graph = gb.fused_csc_sampling_graph(indptr, indices)
if F._default_context_str == "gpu":
if is_pinned:
graph.pin_memory_()
else:
graph = graph.to(F.ctx())
graph = gb.fused_csc_sampling_graph(indptr, indices).to(
"pinned" if is_pinned else F.ctx()
)

# Generate subgraph via sample neighbors.
nodes = torch.LongTensor([1, 3, 4]).to(F.ctx())
Expand Down Expand Up @@ -1883,6 +1882,8 @@ def test_sample_neighbors_return_eids_homo(labor, is_pinned):
0 1 0 0 1
1 0 0 0 1
"""
if F._default_context_str == "cpu" and is_pinned:
pytest.skip("Pinning is not meaningful without a GPU.")
# Initialize data.
total_num_edges = 12
indptr = torch.LongTensor([0, 3, 5, 7, 9, 12])
Expand All @@ -1896,12 +1897,7 @@ def test_sample_neighbors_return_eids_homo(labor, is_pinned):
# Construct FusedCSCSamplingGraph.
graph = gb.fused_csc_sampling_graph(
indptr, indices, edge_attributes=edge_attributes
)
if F._default_context_str == "gpu":
if is_pinned:
graph.pin_memory_()
else:
graph = graph.to(F.ctx())
).to("pinned" if is_pinned else F.ctx())

# Generate subgraph via sample neighbors.
nodes = torch.LongTensor([1, 3, 4]).to(F.ctx())
Expand Down
Loading

0 comments on commit 1240697

Please sign in to comment.