Skip to content

Commit f519ac1

Browse files
authored
Forward merge branch-24.06 into branch-24.08 (#4489)
Replaces #4476 Authors: - Ralph Liu (https://github.com/nv-rliu) - Alex Barghi (https://github.com/alexbarghi-nv) - Tingyu Wang (https://github.com/tingyu66) - Bradley Dice (https://github.com/bdice) - James Lamb (https://github.com/jameslamb) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Chuck Hastings (https://github.com/ChuckHastings) - Bradley Dice (https://github.com/bdice) URL: #4489
1 parent bc0771e commit f519ac1

File tree

18 files changed

+110
-28
lines changed

18 files changed

+110
-28
lines changed

ci/build_wheel.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,13 @@ fi
5656

5757
cd "${package_dir}"
5858

59-
python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
59+
python -m pip wheel \
60+
-w dist \
61+
-vvv \
62+
--no-deps \
63+
--disable-pip-version-check \
64+
--extra-index-url https://pypi.nvidia.com \
65+
.
6066

6167
# pure-python packages should be marked as pure, and not have auditwheel run on them.
6268
if [[ ${package_name} == "nx-cugraph" ]] || \

ci/build_wheel_cugraph.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#!/bin/bash
2-
# Copyright (c) 2023, NVIDIA CORPORATION.
2+
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
33

44
set -euo pipefail
55

@@ -12,6 +12,10 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
1212
RAPIDS_PY_WHEEL_NAME=pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibcugraph
1313
export PIP_FIND_LINKS=$(pwd)/local-pylibcugraph
1414

15+
PARALLEL_LEVEL=$(python -c \
16+
"from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))")
17+
1518
export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/"
19+
export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}"
1620

1721
./ci/build_wheel.sh cugraph python/cugraph

ci/build_wheel_pylibcugraph.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
#!/bin/bash
2-
# Copyright (c) 2023, NVIDIA CORPORATION.
2+
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
33

44
set -euo pipefail
55

6+
PARALLEL_LEVEL=$(python -c \
7+
"from math import ceil; from multiprocessing import cpu_count; print(ceil(cpu_count()/4))")
8+
69
export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_CUGRAPH_CPP=OFF;-DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/"
10+
export SKBUILD_BUILD_TOOL_ARGS="-j${PARALLEL_LEVEL};-l${PARALLEL_LEVEL}"
711

812
./ci/build_wheel.sh pylibcugraph python/pylibcugraph

ci/test_python.sh

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ rapids-mamba-retry install \
4444
rapids-logger "Check GPU usage"
4545
nvidia-smi
4646

47+
export LD_PRELOAD="${CONDA_PREFIX}/lib/libgomp.so.1"
48+
4749
# RAPIDS_DATASET_ROOT_DIR is used by test scripts
4850
export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
4951
pushd "${RAPIDS_DATASET_ROOT_DIR}"
@@ -191,6 +193,8 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
191193
conda activate test_cugraph_pyg
192194
set -u
193195

196+
rapids-print-env
197+
194198
# TODO re-enable logic once CUDA 12 is testable
195199
#if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
196200
CONDA_CUDA_VERSION="11.8"
@@ -204,18 +208,9 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
204208
rapids-mamba-retry install \
205209
--channel "${CPP_CHANNEL}" \
206210
--channel "${PYTHON_CHANNEL}" \
207-
--channel pytorch \
208211
--channel pyg \
209-
--channel nvidia \
210212
"cugraph-pyg" \
211-
"pytorch=2.1.0" \
212-
"pytorch-cuda=${CONDA_CUDA_VERSION}"
213-
214-
# Install pyg dependencies (which requires pip)
215-
216-
pip install \
217-
ogb \
218-
tensordict
213+
"ogb"
219214

220215
pip install \
221216
pyg_lib \

ci/test_wheel_cugraph-pyg.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ rapids-retry python -m pip install \
4242
pyg_lib \
4343
torch_scatter \
4444
torch_sparse \
45-
tensordict \
4645
-f ${PYG_URL}
4746

4847
rapids-logger "pytest cugraph-pyg (single GPU)"

ci/test_wheel_nx-cugraph.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
#!/bin/bash
2-
# Copyright (c) 2023, NVIDIA CORPORATION.
2+
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
33

44
set -eoxu pipefail
55

6+
# Download wheels built during this job.
7+
RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
8+
RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-deps
9+
python -m pip install ./local-deps/*.whl
10+
611
./ci/test_wheel.sh nx-cugraph python/nx-cugraph

conda/environments/all_cuda-118_arch-x86_64.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ dependencies:
4242
- numpy>=1.23,<2.0a0
4343
- numpydoc
4444
- nvcc_linux-64=11.8
45-
- openmpi
45+
- openmpi<5.0.3
4646
- packaging>=21
4747
- pandas
4848
- pre-commit
@@ -56,6 +56,7 @@ dependencies:
5656
- pytest-mpl
5757
- pytest-xdist
5858
- python-louvain
59+
- pytorch>=2.0,<2.2.0a0
5960
- raft-dask==24.8.*
6061
- rapids-dask-dependency==24.8.*
6162
- recommonmark

conda/environments/all_cuda-122_arch-x86_64.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ dependencies:
4747
- numba>=0.57
4848
- numpy>=1.23,<2.0a0
4949
- numpydoc
50-
- openmpi
50+
- openmpi<5.0.3
5151
- packaging>=21
5252
- pandas
5353
- pre-commit
@@ -61,6 +61,7 @@ dependencies:
6161
- pytest-mpl
6262
- pytest-xdist
6363
- python-louvain
64+
- pytorch>=2.0,<2.2.0a0
6465
- raft-dask==24.8.*
6566
- rapids-dask-dependency==24.8.*
6667
- recommonmark

conda/recipes/libcugraph/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ requirements:
4242
- {{ compiler('cxx') }}
4343
- cmake {{ cmake_version }}
4444
- ninja
45-
- openmpi # Required for building cpp-mgtests (multi-GPU tests)
45+
- openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests)
4646
- {{ stdlib("c") }}
4747
host:
4848
{% if cuda_major == "11" %}

dependencies.yaml

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ files:
2222
- depends_on_pylibcugraphops
2323
- depends_on_pylibwholegraph
2424
- depends_on_cupy
25+
- depends_on_pytorch
2526
- python_run_cugraph
2627
- python_run_nx_cugraph
2728
- python_run_cugraph_dgl
@@ -62,6 +63,7 @@ files:
6263
- cuda_version
6364
- depends_on_cudf
6465
- depends_on_pylibwholegraph
66+
- depends_on_pytorch
6567
- py_version
6668
- test_python_common
6769
- test_python_cugraph
@@ -177,6 +179,7 @@ files:
177179
includes:
178180
- test_python_common
179181
- depends_on_pylibwholegraph
182+
- depends_on_pytorch
180183
py_build_cugraph_pyg:
181184
output: pyproject
182185
pyproject_dir: python/cugraph-pyg
@@ -201,6 +204,7 @@ files:
201204
includes:
202205
- test_python_common
203206
- depends_on_pylibwholegraph
207+
- depends_on_pytorch
204208
py_build_cugraph_equivariant:
205209
output: pyproject
206210
pyproject_dir: python/cugraph-equivariant
@@ -362,7 +366,7 @@ dependencies:
362366
- libraft-headers==24.8.*
363367
- libraft==24.8.*
364368
- librmm==24.8.*
365-
- openmpi # Required for building cpp-mgtests (multi-GPU tests)
369+
- openmpi<5.0.3 # Required for building cpp-mgtests (multi-GPU tests)
366370
specific:
367371
- output_types: [conda]
368372
matrices:
@@ -568,9 +572,30 @@ dependencies:
568572
- cugraph==24.8.*
569573
- pytorch>=2.0
570574
- pytorch-cuda==11.8
571-
- tensordict>=0.1.2
575+
- &tensordict tensordict>=0.1.2
572576
- pyg>=2.5,<2.6
573577

578+
depends_on_pytorch:
579+
common:
580+
- output_types: [conda]
581+
packages:
582+
- &pytorch_conda pytorch>=2.0,<2.2.0a0
583+
584+
specific:
585+
- output_types: [requirements, pyproject]
586+
matrices:
587+
- matrix: {cuda: "12.*"}
588+
packages:
589+
- &pytorch_pip torch>=2.0,<2.2.0a0
590+
- *tensordict
591+
- --extra-index-url=https://download.pytorch.org/whl/cu121
592+
- matrix: {cuda: "11.*"}
593+
packages:
594+
- *pytorch_pip
595+
- *tensordict
596+
- --extra-index-url=https://download.pytorch.org/whl/cu118
597+
- {matrix: null, packages: [*pytorch_pip, *tensordict]}
598+
574599
depends_on_pylibwholegraph:
575600
common:
576601
- output_types: conda

python/cugraph-dgl/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ test = [
3838
"pytest-cov",
3939
"pytest-xdist",
4040
"scipy",
41+
"tensordict>=0.1.2",
42+
"torch>=2.0,<2.2.0a0",
4143
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
4244

4345
[project.urls]

python/cugraph-equivariant/cugraph_equivariant/nn/tensor_product_conv.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,15 @@
2020

2121
from cugraph_equivariant.utils import scatter_reduce
2222

23-
from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct
23+
try:
24+
from pylibcugraphops.pytorch.operators import FusedFullyConnectedTensorProduct
25+
except ImportError as exc:
26+
raise RuntimeError(
27+
"FullyConnectedTensorProductConv is no longer supported in "
28+
"cugraph-equivariant starting from version 24.08. It will be migrated "
29+
"to the new `cuequivariance` package. Please use 24.06 release for the "
30+
"legacy interface."
31+
) from exc
2432

2533

2634
class FullyConnectedTensorProductConv(nn.Module):

python/cugraph-equivariant/cugraph_equivariant/tests/test_tensor_product_conv.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,14 @@
1616
import torch
1717
from torch import nn
1818
from e3nn import o3
19-
from cugraph_equivariant.nn import FullyConnectedTensorProductConv
19+
20+
try:
21+
from cugraph_equivariant.nn import FullyConnectedTensorProductConv
22+
except RuntimeError:
23+
pytest.skip(
24+
"Migrated to cuequivariance package starting from 24.08.",
25+
allow_module_level=True,
26+
)
2027

2128
device = torch.device("cuda:0")
2229

python/cugraph-pyg/pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ test = [
4646
"pytest-cov",
4747
"pytest-xdist",
4848
"scipy",
49+
"tensordict>=0.1.2",
50+
"torch>=2.0,<2.2.0a0",
4951
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
5052

5153
[tool.setuptools]

python/cugraph/cugraph/gnn/data_loading/dist_sampler.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,12 @@
2424

2525
from typing import Union, List, Dict, Tuple, Iterator, Optional
2626

27-
from cugraph.utilities import import_optional
27+
from cugraph.utilities.utils import import_optional, MissingModule
2828
from cugraph.gnn.comms import cugraph_comms_get_raft_handle
2929

3030
from cugraph.gnn.data_loading.bulk_sampler_io import create_df_from_disjoint_arrays
3131

32-
# PyTorch is NOT optional but this is required for container builds.
33-
torch = import_optional("torch")
34-
32+
torch = MissingModule("torch")
3533
TensorType = Union["torch.Tensor", cupy.ndarray, cudf.Series]
3634

3735

@@ -44,6 +42,8 @@ def __init__(
4442
rank: Optional[int] = None,
4543
filelist=None,
4644
):
45+
torch = import_optional("torch")
46+
4747
self.__format = format
4848
self.__directory = directory
4949

@@ -77,6 +77,8 @@ def __iter__(self):
7777
return self
7878

7979
def __next__(self):
80+
torch = import_optional("torch")
81+
8082
if len(self.__files) > 0:
8183
f = self.__files.pop()
8284
fname = f[0]
@@ -404,6 +406,7 @@ def get_reader(self) -> Iterator[Tuple[Dict[str, "torch.Tensor"], int, int]]:
404406
"""
405407
Returns an iterator over sampled data.
406408
"""
409+
torch = import_optional("torch")
407410
rank = torch.distributed.get_rank() if self.is_multi_gpu else None
408411
return self.__writer.get_reader(rank)
409412

@@ -461,6 +464,8 @@ def get_label_list_and_output_rank(
461464
label_to_output_comm_rank: TensorType
462465
The global mapping of labels to ranks.
463466
"""
467+
torch = import_optional("torch")
468+
464469
world_size = torch.distributed.get_world_size()
465470

466471
if assume_equal_input_size:
@@ -528,6 +533,8 @@ def get_start_batch_offset(
528533
and whether the input sizes on each rank are equal (bool).
529534
530535
"""
536+
torch = import_optional("torch")
537+
531538
input_size_is_equal = True
532539
if self.is_multi_gpu:
533540
rank = torch.distributed.get_rank()
@@ -581,6 +588,8 @@ def sample_from_nodes(
581588
random_state: int
582589
The random seed to use for sampling.
583590
"""
591+
torch = import_optional("torch")
592+
584593
nodes = torch.as_tensor(nodes, device="cuda")
585594

586595
batches_per_call = self._local_seeds_per_call // batch_size
@@ -700,6 +709,8 @@ def __init__(
700709
)
701710

702711
def __calc_local_seeds_per_call(self, local_seeds_per_call: Optional[int] = None):
712+
torch = import_optional("torch")
713+
703714
if local_seeds_per_call is None:
704715
if len([x for x in self.__fanout if x <= 0]) > 0:
705716
return UniformNeighborSampler.UNKNOWN_VERTICES_DEFAULT
@@ -721,6 +732,7 @@ def sample_batches(
721732
random_state: int = 0,
722733
assume_equal_input_size: bool = False,
723734
) -> Dict[str, TensorType]:
735+
torch = import_optional("torch")
724736
if self.is_multi_gpu:
725737
rank = torch.distributed.get_rank()
726738

@@ -800,7 +812,9 @@ def sample_batches(
800812
compression=self.__compression,
801813
compress_per_hop=self.__compress_per_hop,
802814
retain_seeds=self._retain_original_seeds,
803-
label_offsets=cupy.asarray(label_offsets),
815+
label_offsets=None
816+
if label_offsets is None
817+
else cupy.asarray(label_offsets),
804818
return_dict=True,
805819
)
806820

python/cugraph/cugraph/tests/sampling/test_bulk_sampler_io.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2023, NVIDIA CORPORATION.
1+
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -169,6 +169,7 @@ def test_bulk_sampler_io_empty_batch(scratch_dir):
169169

170170

171171
@pytest.mark.sg
172+
@pytest.mark.skip(reason="broken")
172173
def test_bulk_sampler_io_mock_csr(scratch_dir):
173174
major_offsets_array = cudf.Series([0, 5, 10, 15])
174175
minors_array = cudf.Series([1, 2, 3, 4, 8, 9, 1, 3, 4, 5, 3, 0, 4, 9, 1])

python/cugraph/cugraph/tests/sampling/test_dist_sampler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131

3232

3333
torch = import_optional("torch")
34+
if not isinstance(torch, MissingModule):
35+
from rmm.allocators.torch import rmm_torch_allocator
36+
37+
torch.cuda.change_current_allocator(rmm_torch_allocator)
3438

3539

3640
@pytest.fixture

0 commit comments

Comments
 (0)