Skip to content

Commit a538745

Browse files
authored
Updates to cugraph.hypergraph (Duplicate Col Labels Bug) (#4610)
cc: @rlratzel @ChuckHastings This PR addresses failures seen in certain PRs (like [here](https://github.com/rapidsai/cugraph/actions/runs/10372270389/job/28718471674?pr=4606#step:7:5269)) due to a [recent change](rapidsai/cudf#16514) to `cudf` that disallows selecting duplicate column labels. --- In `hypergraph.py`, this PR modifies `_create_hyper_edges` and `_create_direct_edges` to ensure that DataFrames are being indexed by non-duplicate column values. This is done by taking a list that includes duplicates (`fs`), and removing the non-unique values ```python fs = list(set(fs)) ``` _This part requires some attention from the author of the unit test @jnke2016_ In `test_hypergraph.py`, this PR adds the `check_like=True` arg to `assert_frame_equals` function because the ordering of the columns is different for the two DFs. Authors: - Ralph Liu (https://github.com/nv-rliu) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Chuck Hastings (https://github.com/ChuckHastings) - Paul Taylor (https://github.com/trxcllnt) - Joseph Nke (https://github.com/jnke2016) URL: #4610
1 parent 1ef3f56 commit a538745

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

python/cugraph/cugraph/structure/hypergraph.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ def _create_hyper_edges(
440440
for key, col in events[columns].items():
441441
cat = categories.get(key, key)
442442
fs = [EVENTID] + ([key] if drop_edge_attrs else edge_attrs)
443+
fs = list(set(fs))
443444
df = events[fs].dropna(subset=[key]) if dropna else events[fs]
444445
if len(df) == 0:
445446
continue
@@ -464,8 +465,7 @@ def _create_hyper_edges(
464465
if not drop_edge_attrs:
465466
columns += edge_attrs
466467

467-
edges = cudf.concat(edges)[columns]
468-
edges.reset_index(drop=True, inplace=True)
468+
edges = cudf.concat(edges, ignore_index=True)[list(set(columns))]
469469
return edges
470470

471471

@@ -546,6 +546,7 @@ def _create_direct_edges(
546546
for key2, col2 in events[sorted(edge_shape[key1])].items():
547547
cat2 = categories.get(key2, key2)
548548
fs = [EVENTID] + ([key1, key2] if drop_edge_attrs else edge_attrs)
549+
fs = list(set(fs))
549550
df = events[fs].dropna(subset=[key1, key2]) if dropna else events[fs]
550551
if len(df) == 0:
551552
continue
@@ -573,7 +574,7 @@ def _create_direct_edges(
573574
if not drop_edge_attrs:
574575
columns += edge_attrs
575576

576-
edges = cudf.concat(edges)[columns]
577+
edges = cudf.concat(edges)[list(set(columns))]
577578
edges.reset_index(drop=True, inplace=True)
578579
return edges
579580

python/cugraph/cugraph/tests/structure/test_hypergraph.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
1+
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -171,7 +171,8 @@ def test_hyperedges(categorical_metadata):
171171
if categorical_metadata:
172172
edges = edges.astype({"edge_type": "category"})
173173

174-
assert_frame_equal(edges, h["edges"], check_dtype=False)
174+
# check_like ignores the order of columns as long as all correct ones are present
175+
assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True)
175176
for (k, v) in [("entities", 12), ("nodes", 15), ("edges", 12), ("events", 3)]:
176177
assert len(h[k]) == v
177178

@@ -266,7 +267,8 @@ def test_drop_edge_attrs(categorical_metadata):
266267
if categorical_metadata:
267268
edges = edges.astype({"edge_type": "category"})
268269

269-
assert_frame_equal(edges, h["edges"], check_dtype=False)
270+
# check_like ignores the order of columns as long as all correct ones are present
271+
assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True)
270272

271273
for (k, v) in [("entities", 9), ("nodes", 12), ("edges", 9), ("events", 3)]:
272274
assert len(h[k]) == v
@@ -308,7 +310,8 @@ def test_drop_edge_attrs_direct(categorical_metadata):
308310
if categorical_metadata:
309311
edges = edges.astype({"edge_type": "category"})
310312

311-
assert_frame_equal(edges, h["edges"], check_dtype=False)
313+
# check_like ignores the order of columns as long as all correct ones are present
314+
assert_frame_equal(edges, h["edges"], check_dtype=False, check_like=True)
312315

313316
for (k, v) in [("entities", 9), ("nodes", 9), ("edges", 6), ("events", 0)]:
314317
assert len(h[k]) == v

0 commit comments

Comments
 (0)