Skip to content

Update BC computation to address normalization edge conditions #5105

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 47 additions & 27 deletions cpp/src/centrality/betweenness_centrality_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -540,37 +540,57 @@ rmm::device_uvector<weight_t> betweenness_centrality(
do_expensive_check);
}

std::optional<weight_t> scale_factor{std::nullopt};
std::optional<weight_t> scale_nonsource{std::nullopt};
std::optional<weight_t> scale_source{std::nullopt};

weight_t num_vertices = static_cast<weight_t>(graph_view.number_of_vertices());
if (!include_endpoints) num_vertices = num_vertices - 1;

if ((static_cast<edge_t>(num_sources) == num_vertices) || include_endpoints) {
if (normalized) {
scale_nonsource = static_cast<weight_t>(num_sources * (num_vertices - 1));
} else if (graph_view.is_symmetric()) {
scale_nonsource =
static_cast<weight_t>(num_sources * 2) / static_cast<weight_t>(num_vertices);
} else {
scale_nonsource = static_cast<weight_t>(num_sources) / static_cast<weight_t>(num_vertices);
}

if (normalized) {
if (include_endpoints) {
if (graph_view.number_of_vertices() >= 2) {
scale_factor = static_cast<weight_t>(
std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices()) *
(graph_view.number_of_vertices() - 1));
}
} else if (graph_view.number_of_vertices() > 2) {
scale_factor = static_cast<weight_t>(
std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices() - 1) *
(graph_view.number_of_vertices() - 2));
scale_source = scale_nonsource;
} else if (normalized) {
scale_nonsource = static_cast<weight_t>(num_sources) * (num_vertices - 1);
scale_source = static_cast<weight_t>(num_sources - 1) * (num_vertices - 1);
} else {
scale_nonsource = static_cast<weight_t>(num_sources) / num_vertices;
scale_source = static_cast<weight_t>(num_sources - 1) / num_vertices;

if (graph_view.is_symmetric()) {
*scale_nonsource *= 2;
*scale_source *= 2;
}
} else if (num_sources < static_cast<size_t>(graph_view.number_of_vertices())) {
if ((graph_view.number_of_vertices() > 1) && (num_sources > 0))
scale_factor =
(graph_view.is_symmetric() ? weight_t{2} : weight_t{1}) *
static_cast<weight_t>(num_sources) /
(include_endpoints ? static_cast<weight_t>(graph_view.number_of_vertices())
: static_cast<weight_t>(graph_view.number_of_vertices() - 1));
} else if (graph_view.is_symmetric()) {
scale_factor = weight_t{2};
}

if (scale_factor) {
thrust::transform(handle.get_thrust_policy(),
centralities.begin(),
centralities.end(),
centralities.begin(),
[sf = *scale_factor] __device__(auto centrality) { return centrality / sf; });
if (scale_nonsource) {
auto iter = thrust::make_zip_iterator(
thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()),
centralities.begin());

thrust::transform(
handle.get_thrust_policy(),
iter,
iter + centralities.size(),
centralities.begin(),
[nonsource = *scale_nonsource,
source = *scale_source,
vertices_begin,
vertices_end] __device__(auto t) {
vertex_t v = thrust::get<0>(t);
weight_t centrality = thrust::get<1>(t);

return (thrust::find(thrust::seq, vertices_begin, vertices_end, v) == vertices_end)
? centrality / nonsource
: centrality / source;
});
}

return centralities;
Expand Down
25 changes: 16 additions & 9 deletions cpp/tests/c_api/betweenness_centrality_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,16 @@ int generic_betweenness_centrality_test(vertex_t* h_src,
TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");

for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
TEST_ASSERT(test_ret_value,
nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001),
"centralities results don't match");
if (isnan(h_result[h_vertices[i]])) {
TEST_ASSERT(test_ret_value, isnan(h_centralities[i]), "expected NaN, got a non-NaN value");
} else {
if (!nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001))
printf(" expected: %g, got %g\n", h_result[h_vertices[i]], h_centralities[i]);

TEST_ASSERT(test_ret_value,
nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001),
"centralities results don't match");
}
}

cugraph_centrality_result_free(p_result);
Expand Down Expand Up @@ -169,7 +176,7 @@ int test_betweenness_centrality_specific_normalized()
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 0.395833, 0.16667, 0.0833333, 0.0416667, 0.0625};
weight_t h_result[] = {0, 0.395833, 0.166667, 0.166667, 0.0416667, 0.0625};

return generic_betweenness_centrality_test(h_src,
h_dst,
Expand Down Expand Up @@ -197,7 +204,7 @@ int test_betweenness_centrality_specific_unnormalized()
weight_t h_wgt[] = {
0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
vertex_t h_seeds[] = {0, 3};
weight_t h_result[] = {0, 7.91667, 3.33333, 1.666667, 0.833333, 1.25};
weight_t h_result[] = {0, 7.91667, 3.33333, 3.33333, 0.833333, 1.25};

return generic_betweenness_centrality_test(h_src,
h_dst,
Expand Down Expand Up @@ -312,17 +319,17 @@ int test_issue_4941()
{TRUE, TRUE, FALSE, 0, {1.0, 0.4, 0.4, 0.4, 0.4}},
{TRUE, TRUE, FALSE, 1, {1.0, 1.0, 0.25, 0.25, 0.25}},
{TRUE, FALSE, TRUE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, TRUE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, TRUE, 1, {1.0, NAN, 0.0, 0.0, 0.0}},
{TRUE, FALSE, FALSE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, FALSE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
{TRUE, FALSE, FALSE, 1, {1.0, NAN, 0.0, 0.0, 0.0}},
{FALSE, TRUE, TRUE, 0, {20.0, 8.0, 8.0, 8.0, 8.0}},
{FALSE, TRUE, TRUE, 1, {20.0, 20.0, 5.0, 5.0, 5.0}},
{FALSE, TRUE, FALSE, 0, {10.0, 4.0, 4.0, 4.0, 4.0}},
{FALSE, TRUE, FALSE, 1, {10.0, 10.0, 2.5, 2.5, 2.5}},
{FALSE, FALSE, TRUE, 0, {12.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, TRUE, 1, {12.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, TRUE, 1, {12, NAN, 0.0, 0.0, 0.0}},
{FALSE, FALSE, FALSE, 0, {6.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, FALSE, 1, {6.0, 0.0, 0.0, 0.0, 0.0}},
{FALSE, FALSE, FALSE, 1, {6.0, NAN, 0.0, 0.0, 0.0}},
};

int test_result = 0;
Expand Down
64 changes: 41 additions & 23 deletions cpp/tests/centrality/betweenness_centrality_reference.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,41 +130,54 @@ void ref_edge_accumulation(std::vector<weight_t>& result,
}
}

template <typename result_t>
template <typename vertex_t, typename result_t>
void reference_rescale(result_t* result,
vertex_t const* sources,
bool directed,
bool normalize,
bool endpoints,
size_t const number_of_vertices,
size_t const number_of_sources)
{
result_t rescale_factor = static_cast<result_t>(1);
result_t casted_number_of_sources = static_cast<result_t>(number_of_sources);
result_t casted_number_of_vertices = static_cast<result_t>(number_of_vertices);
if (!endpoints) casted_number_of_vertices = casted_number_of_vertices - 1;

if (normalize) {
if (number_of_vertices > 2) {
if (endpoints) {
rescale_factor /=
(number_of_sources > 0 ? casted_number_of_sources
: casted_number_of_vertices * (casted_number_of_vertices - 1));
} else {
rescale_factor /= (number_of_sources > 0
? casted_number_of_sources
: (casted_number_of_vertices - 1) * (casted_number_of_vertices - 2));
}
if ((number_of_sources == number_of_vertices) || endpoints) {
result_t rescale_factor = static_cast<result_t>(1);

if (normalize) {
rescale_factor = result_t{1} / (casted_number_of_sources * (casted_number_of_vertices - 1));
} else if (!directed) {
rescale_factor = casted_number_of_vertices / (2 * casted_number_of_sources);
} else {
rescale_factor = casted_number_of_vertices / casted_number_of_sources;
}
} else if (number_of_sources < number_of_vertices) {
rescale_factor = (endpoints ? casted_number_of_vertices : casted_number_of_vertices - 1) /
(directed ? casted_number_of_sources : 2 * casted_number_of_sources);
} else if (!directed) {
rescale_factor = 2;
}

if (rescale_factor != result_t{1}) {
for (auto idx = 0; idx < number_of_vertices; ++idx) {
for (vertex_t idx = 0; idx < number_of_vertices; ++idx) {
result[idx] *= rescale_factor;
}
} else {
result_t rescale_source = static_cast<result_t>(1);
result_t rescale_non_source = static_cast<result_t>(1);

if (normalize) {
rescale_source = 1 / ((casted_number_of_sources - 1) * (casted_number_of_vertices - 1));
rescale_non_source = 1 / (casted_number_of_sources * (casted_number_of_vertices - 1));
} else if (directed) {
rescale_source = casted_number_of_vertices / (casted_number_of_sources - 1);
rescale_non_source = casted_number_of_vertices / casted_number_of_sources;
} else {
rescale_source = casted_number_of_vertices / (2 * (casted_number_of_sources - 1));
rescale_non_source = casted_number_of_vertices / (2 * casted_number_of_sources);
}

for (vertex_t idx = 0; idx < number_of_vertices; ++idx) {
if (std::find(sources, sources + number_of_sources, idx) == (sources + number_of_sources))
result[idx] *= rescale_non_source;
else
result[idx] *= rescale_source;
}
}
}

Expand Down Expand Up @@ -235,8 +248,13 @@ std::vector<weight_t> betweenness_centrality_reference(
}
}

reference_rescale(
result.data(), directed, normalize, include_endpoints, offsets.size() - 1, seeds.size());
reference_rescale(result.data(),
seeds.data(),
directed,
normalize,
include_endpoints,
offsets.size() - 1,
seeds.size());

return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pytest
import numpy as np
import networkx as nx

from cugraph.dask.common.mg_utils import is_single_gpu
from cugraph.datasets import karate
Expand Down Expand Up @@ -55,7 +56,10 @@ def setup_function():
# =============================================================================


@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("dataset", DATASETS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import pytest
import numpy as np
import networkx as nx

from cugraph.dask.common.mg_utils import is_single_gpu
from cugraph.datasets import karate, netscience
Expand Down Expand Up @@ -53,7 +54,10 @@ def setup_function():


# FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.mg
@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
@pytest.mark.parametrize("dataset", DATASETS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import networkx as nx

import cudf
import cupy
import cugraph
from cugraph.datasets import karate_disjoint
from cugraph.testing import utils, SMALL_DATASETS
Expand Down Expand Up @@ -102,7 +101,7 @@ def calc_betweenness_centrality(
Contains 'vertex' and 'cu_bc' 'ref_bc' columns, where 'cu_bc'
and 'ref_bc' are the two betweenness centrality scores to compare.
The dataframe is expected to be sorted based on 'vertex', so that we
can use cupy.isclose to compare the scores.
can use np.isclose to compare the scores.
"""
G = None
Gnx = None
Expand Down Expand Up @@ -289,8 +288,15 @@ def _calc_bc_full(G, Gnx, normalized, weight, endpoints, k, seed, result_dtype):
# i.e: sorted_df[idx][first_key] should be compared to
# sorted_df[idx][second_key]
def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
# Compare with numpy and pandas since presence of NaNs in cudf Series
# results in "ValueError: CuPy currently does not support masked arrays."
errors = sorted_df[
~cupy.isclose(sorted_df[first_key], sorted_df[second_key], rtol=epsilon)
~np.isclose(
sorted_df[first_key].to_pandas(),
sorted_df[second_key].to_pandas(),
rtol=epsilon,
equal_nan=True,
)
]
num_errors = len(errors)
if num_errors > 0:
Expand All @@ -305,7 +311,10 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
# =============================================================================
# Tests
# =============================================================================
@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", [False, True])
Expand Down Expand Up @@ -542,17 +551,17 @@ def test_betweenness_centrality_nx(graph_file, directed, edgevals):
(True, True, False, None, {0: 1.0, 1: 0.4, 2: 0.4, 3: 0.4, 4: 0.4}),
(True, True, False, 1, {0: 1.0, 1: 1.0, 2: 0.25, 3: 0.25, 4: 0.25}),
(True, False, True, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, True, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, True, 1, {0: 1.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, False, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, False, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(True, False, False, 1, {0: 1.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, True, True, None, {0: 20.0, 1: 8.0, 2: 8.0, 3: 8.0, 4: 8.0}),
(False, True, True, 1, {0: 20.0, 1: 20.0, 2: 5.0, 3: 5.0, 4: 5.0}),
(False, True, False, None, {0: 10.0, 1: 4.0, 2: 4.0, 3: 4.0, 4: 4.0}),
(False, True, False, 1, {0: 10.0, 1: 10.0, 2: 2.5, 3: 2.5, 4: 2.5}),
(False, False, True, None, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, True, 1, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, True, 1, {0: 12.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, False, None, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, False, 1, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
(False, False, False, 1, {0: 6.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
],
)
def test_scale_with_k_on_star_graph(normalized, endpoints, is_directed, k, expected):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,10 @@ def generate_upper_triangle(dataframe):
return dataframe


@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
Expand Down Expand Up @@ -343,7 +346,10 @@ def test_edge_betweenness_centrality(
compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc")


@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
Expand Down Expand Up @@ -383,7 +389,10 @@ def test_edge_betweenness_centrality_k_full(
# the function operating the comparison inside is first proceeding
# to a random sampling over the number of vertices (thus direct offsets)
# in the graph structure instead of actual vertices identifiers
@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", [karate_disjoint])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
Expand Down Expand Up @@ -487,7 +496,10 @@ def test_edge_betweenness_invalid_dtype(
compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc")


@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
@pytest.mark.skipif(
float(".".join(nx.__version__.split(".")[:2])) < 3.5,
reason="Requires networkx >= 3.5",
)
@pytest.mark.sg
@pytest.mark.parametrize("graph_file", SMALL_DATASETS)
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
Expand Down