rapidsai · rapids-bot · Jun 12, 2025 · Jun 3, 2025 · Jun 3, 2025 · Jun 4, 2025
@@ -540,37 +540,57 @@ rmm::device_uvector<weight_t> betweenness_centrality(
                               do_expensive_check);
   }
 
-  std::optional<weight_t> scale_factor{std::nullopt};
+  std::optional<weight_t> scale_nonsource{std::nullopt};
+  std::optional<weight_t> scale_source{std::nullopt};
+
+  weight_t num_vertices = static_cast<weight_t>(graph_view.number_of_vertices());
+  if (!include_endpoints) num_vertices = num_vertices - 1;
+
+  if ((static_cast<edge_t>(num_sources) == num_vertices) || include_endpoints) {
+    if (normalized) {
+      scale_nonsource = static_cast<weight_t>(num_sources * (num_vertices - 1));
+    } else if (graph_view.is_symmetric()) {
+      scale_nonsource =
+        static_cast<weight_t>(num_sources * 2) / static_cast<weight_t>(num_vertices);
+    } else {
+      scale_nonsource = static_cast<weight_t>(num_sources) / static_cast<weight_t>(num_vertices);
+    }
 
-  if (normalized) {
-    if (include_endpoints) {
-      if (graph_view.number_of_vertices() >= 2) {
-        scale_factor = static_cast<weight_t>(
-          std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices()) *
-          (graph_view.number_of_vertices() - 1));
-      }
-    } else if (graph_view.number_of_vertices() > 2) {
-      scale_factor = static_cast<weight_t>(
-        std::min(static_cast<vertex_t>(num_sources), graph_view.number_of_vertices() - 1) *
-        (graph_view.number_of_vertices() - 2));
+    scale_source = scale_nonsource;
+  } else if (normalized) {
+    scale_nonsource = static_cast<weight_t>(num_sources) * (num_vertices - 1);
+    scale_source    = static_cast<weight_t>(num_sources - 1) * (num_vertices - 1);
+  } else {
+    scale_nonsource = static_cast<weight_t>(num_sources) / num_vertices;
+    scale_source    = static_cast<weight_t>(num_sources - 1) / num_vertices;
+
+    if (graph_view.is_symmetric()) {
+      *scale_nonsource *= 2;
+      *scale_source *= 2;
     }
-  } else if (num_sources < static_cast<size_t>(graph_view.number_of_vertices())) {
-    if ((graph_view.number_of_vertices() > 1) && (num_sources > 0))
-      scale_factor =
-        (graph_view.is_symmetric() ? weight_t{2} : weight_t{1}) *
-        static_cast<weight_t>(num_sources) /
-        (include_endpoints ? static_cast<weight_t>(graph_view.number_of_vertices())
-                           : static_cast<weight_t>(graph_view.number_of_vertices() - 1));
-  } else if (graph_view.is_symmetric()) {
-    scale_factor = weight_t{2};
   }
 
-  if (scale_factor) {
-    thrust::transform(handle.get_thrust_policy(),
-                      centralities.begin(),
-                      centralities.end(),
-                      centralities.begin(),
-                      [sf = *scale_factor] __device__(auto centrality) { return centrality / sf; });
+  if (scale_nonsource) {
+    auto iter = thrust::make_zip_iterator(
+      thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()),
+      centralities.begin());
+
+    thrust::transform(
+      handle.get_thrust_policy(),
+      iter,
+      iter + centralities.size(),
+      centralities.begin(),
+      [nonsource = *scale_nonsource,
+       source    = *scale_source,
+       vertices_begin,
+       vertices_end] __device__(auto t) {
+        vertex_t v          = thrust::get<0>(t);
+        weight_t centrality = thrust::get<1>(t);
+
+        return (thrust::find(thrust::seq, vertices_begin, vertices_end, v) == vertices_end)
+                 ? centrality / nonsource
+                 : centrality / source;
+      });
   }
 
   return centralities;

@@ -113,9 +113,16 @@ int generic_betweenness_centrality_test(vertex_t* h_src,
   TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed.");
 
   for (int i = 0; (i < num_vertices) && (test_ret_value == 0); ++i) {
-    TEST_ASSERT(test_ret_value,
-                nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001),
-                "centralities results don't match");
+    if (isnan(h_result[h_vertices[i]])) {
+      TEST_ASSERT(test_ret_value, isnan(h_centralities[i]), "expected NaN, got a non-NaN value");
+    } else {
+      if (!nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001))
+        printf("  expected: %g, got %g\n", h_result[h_vertices[i]], h_centralities[i]);
+
+      TEST_ASSERT(test_ret_value,
+                  nearlyEqual(h_result[h_vertices[i]], h_centralities[i], 0.0001),
+                  "centralities results don't match");
+    }
   }
 
   cugraph_centrality_result_free(p_result);
@@ -169,7 +176,7 @@ int test_betweenness_centrality_specific_normalized()
   weight_t h_wgt[] = {
     0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
   vertex_t h_seeds[]  = {0, 3};
-  weight_t h_result[] = {0, 0.395833, 0.16667, 0.0833333, 0.0416667, 0.0625};
+  weight_t h_result[] = {0, 0.395833, 0.166667, 0.166667, 0.0416667, 0.0625};
 
   return generic_betweenness_centrality_test(h_src,
                                              h_dst,
@@ -197,7 +204,7 @@ int test_betweenness_centrality_specific_unnormalized()
   weight_t h_wgt[] = {
     0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f};
   vertex_t h_seeds[]  = {0, 3};
-  weight_t h_result[] = {0, 7.91667, 3.33333, 1.666667, 0.833333, 1.25};
+  weight_t h_result[] = {0, 7.91667, 3.33333, 3.33333, 0.833333, 1.25};
 
   return generic_betweenness_centrality_test(h_src,
                                              h_dst,
@@ -312,17 +319,17 @@ int test_issue_4941()
     {TRUE, TRUE, FALSE, 0, {1.0, 0.4, 0.4, 0.4, 0.4}},
     {TRUE, TRUE, FALSE, 1, {1.0, 1.0, 0.25, 0.25, 0.25}},
     {TRUE, FALSE, TRUE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
-    {TRUE, FALSE, TRUE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
+    {TRUE, FALSE, TRUE, 1, {1.0, NAN, 0.0, 0.0, 0.0}},
     {TRUE, FALSE, FALSE, 0, {1.0, 0.0, 0.0, 0.0, 0.0}},
-    {TRUE, FALSE, FALSE, 1, {1.0, 0.0, 0.0, 0.0, 0.0}},
+    {TRUE, FALSE, FALSE, 1, {1.0, NAN, 0.0, 0.0, 0.0}},
     {FALSE, TRUE, TRUE, 0, {20.0, 8.0, 8.0, 8.0, 8.0}},
     {FALSE, TRUE, TRUE, 1, {20.0, 20.0, 5.0, 5.0, 5.0}},
     {FALSE, TRUE, FALSE, 0, {10.0, 4.0, 4.0, 4.0, 4.0}},
     {FALSE, TRUE, FALSE, 1, {10.0, 10.0, 2.5, 2.5, 2.5}},
     {FALSE, FALSE, TRUE, 0, {12.0, 0.0, 0.0, 0.0, 0.0}},
-    {FALSE, FALSE, TRUE, 1, {12.0, 0.0, 0.0, 0.0, 0.0}},
+    {FALSE, FALSE, TRUE, 1, {12, NAN, 0.0, 0.0, 0.0}},
     {FALSE, FALSE, FALSE, 0, {6.0, 0.0, 0.0, 0.0, 0.0}},
-    {FALSE, FALSE, FALSE, 1, {6.0, 0.0, 0.0, 0.0, 0.0}},
+    {FALSE, FALSE, FALSE, 1, {6.0, NAN, 0.0, 0.0, 0.0}},
   };
 
   int test_result = 0;

@@ -130,41 +130,54 @@ void ref_edge_accumulation(std::vector<weight_t>& result,
   }
 }
 
-template <typename result_t>
+template <typename vertex_t, typename result_t>
 void reference_rescale(result_t* result,
+                       vertex_t const* sources,
                        bool directed,
                        bool normalize,
                        bool endpoints,
                        size_t const number_of_vertices,
                        size_t const number_of_sources)
 {
-  result_t rescale_factor            = static_cast<result_t>(1);
   result_t casted_number_of_sources  = static_cast<result_t>(number_of_sources);
   result_t casted_number_of_vertices = static_cast<result_t>(number_of_vertices);
+  if (!endpoints) casted_number_of_vertices = casted_number_of_vertices - 1;
 
-  if (normalize) {
-    if (number_of_vertices > 2) {
-      if (endpoints) {
-        rescale_factor /=
-          (number_of_sources > 0 ? casted_number_of_sources
-                                 : casted_number_of_vertices * (casted_number_of_vertices - 1));
-      } else {
-        rescale_factor /= (number_of_sources > 0
-                             ? casted_number_of_sources
-                             : (casted_number_of_vertices - 1) * (casted_number_of_vertices - 2));
-      }
+  if ((number_of_sources == number_of_vertices) || endpoints) {
+    result_t rescale_factor = static_cast<result_t>(1);
+
+    if (normalize) {
+      rescale_factor = result_t{1} / (casted_number_of_sources * (casted_number_of_vertices - 1));
+    } else if (!directed) {
+      rescale_factor = casted_number_of_vertices / (2 * casted_number_of_sources);
+    } else {
+      rescale_factor = casted_number_of_vertices / casted_number_of_sources;
     }
-  } else if (number_of_sources < number_of_vertices) {
-    rescale_factor = (endpoints ? casted_number_of_vertices : casted_number_of_vertices - 1) /
-                     (directed ? casted_number_of_sources : 2 * casted_number_of_sources);
-  } else if (!directed) {
-    rescale_factor = 2;
-  }
 
-  if (rescale_factor != result_t{1}) {
-    for (auto idx = 0; idx < number_of_vertices; ++idx) {
+    for (vertex_t idx = 0; idx < number_of_vertices; ++idx) {
       result[idx] *= rescale_factor;
     }
+  } else {
+    result_t rescale_source     = static_cast<result_t>(1);
+    result_t rescale_non_source = static_cast<result_t>(1);
+
+    if (normalize) {
+      rescale_source     = 1 / ((casted_number_of_sources - 1) * (casted_number_of_vertices - 1));
+      rescale_non_source = 1 / (casted_number_of_sources * (casted_number_of_vertices - 1));
+    } else if (directed) {
+      rescale_source     = casted_number_of_vertices / (casted_number_of_sources - 1);
+      rescale_non_source = casted_number_of_vertices / casted_number_of_sources;
+    } else {
+      rescale_source     = casted_number_of_vertices / (2 * (casted_number_of_sources - 1));
+      rescale_non_source = casted_number_of_vertices / (2 * casted_number_of_sources);
+    }
+
+    for (vertex_t idx = 0; idx < number_of_vertices; ++idx) {
+      if (std::find(sources, sources + number_of_sources, idx) == (sources + number_of_sources))
+        result[idx] *= rescale_non_source;
+      else
+        result[idx] *= rescale_source;
+    }
   }
 }
 
@@ -235,8 +248,13 @@ std::vector<weight_t> betweenness_centrality_reference(
     }
   }
 
-  reference_rescale(
-    result.data(), directed, normalize, include_endpoints, offsets.size() - 1, seeds.size());
+  reference_rescale(result.data(),
+                    seeds.data(),
+                    directed,
+                    normalize,
+                    include_endpoints,
+                    offsets.size() - 1,
+                    seeds.size());
 
   return result;
 }

@@ -15,6 +15,7 @@
 
 import pytest
 import numpy as np
+import networkx as nx
 
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate
@@ -55,7 +56,10 @@ def setup_function():
 # =============================================================================
 
 
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize("dataset", DATASETS)

@@ -15,6 +15,7 @@
 
 import pytest
 import numpy as np
+import networkx as nx
 
 from cugraph.dask.common.mg_utils import is_single_gpu
 from cugraph.datasets import karate, netscience
@@ -53,7 +54,10 @@ def setup_function():
 
 
 # FIXME: Fails for directed = False(bc score twice as much) and normalized = True.
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.mg
 @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system")
 @pytest.mark.parametrize("dataset", DATASETS)

@@ -19,7 +19,6 @@
 import networkx as nx
 
 import cudf
-import cupy
 import cugraph
 from cugraph.datasets import karate_disjoint
 from cugraph.testing import utils, SMALL_DATASETS
@@ -102,7 +101,7 @@ def calc_betweenness_centrality(
         Contains 'vertex' and  'cu_bc' 'ref_bc' columns,  where 'cu_bc'
         and 'ref_bc' are the two betweenness centrality scores to compare.
         The dataframe is expected to be sorted based on 'vertex', so that we
-        can use cupy.isclose to compare the scores.
+        can use np.isclose to compare the scores.
     """
     G = None
     Gnx = None
@@ -289,8 +288,15 @@ def _calc_bc_full(G, Gnx, normalized, weight, endpoints, k, seed, result_dtype):
 # i.e: sorted_df[idx][first_key] should be compared to
 #      sorted_df[idx][second_key]
 def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
+    # Compare with numpy and pandas since presence of NaNs in cudf Series
+    # results in "ValueError: CuPy currently does not support masked arrays."
     errors = sorted_df[
-        ~cupy.isclose(sorted_df[first_key], sorted_df[second_key], rtol=epsilon)
+        ~np.isclose(
+            sorted_df[first_key].to_pandas(),
+            sorted_df[second_key].to_pandas(),
+            rtol=epsilon,
+            equal_nan=True,
+        )
     ]
     num_errors = len(errors)
     if num_errors > 0:
@@ -305,7 +311,10 @@ def compare_scores(sorted_df, first_key, second_key, epsilon=DEFAULT_EPSILON):
 # =============================================================================
 # Tests
 # =============================================================================
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", [False, True])
@@ -542,17 +551,17 @@ def test_betweenness_centrality_nx(graph_file, directed, edgevals):
         (True, True, False, None, {0: 1.0, 1: 0.4, 2: 0.4, 3: 0.4, 4: 0.4}),
         (True, True, False, 1, {0: 1.0, 1: 1.0, 2: 0.25, 3: 0.25, 4: 0.25}),
         (True, False, True, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
-        (True, False, True, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
+        (True, False, True, 1, {0: 1.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
         (True, False, False, None, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
-        (True, False, False, 1, {0: 1.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
+        (True, False, False, 1, {0: 1.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
         (False, True, True, None, {0: 20.0, 1: 8.0, 2: 8.0, 3: 8.0, 4: 8.0}),
         (False, True, True, 1, {0: 20.0, 1: 20.0, 2: 5.0, 3: 5.0, 4: 5.0}),
         (False, True, False, None, {0: 10.0, 1: 4.0, 2: 4.0, 3: 4.0, 4: 4.0}),
         (False, True, False, 1, {0: 10.0, 1: 10.0, 2: 2.5, 3: 2.5, 4: 2.5}),
         (False, False, True, None, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
-        (False, False, True, 1, {0: 12.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
+        (False, False, True, 1, {0: 12.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
         (False, False, False, None, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
-        (False, False, False, 1, {0: 6.0, 1: 0.0, 2: 0.0, 3: 0.0, 4: 0.0}),
+        (False, False, False, 1, {0: 6.0, 1: np.nan, 2: 0.0, 3: 0.0, 4: 0.0}),
     ],
 )
 def test_scale_with_k_on_star_graph(normalized, endpoints, is_directed, k, expected):

@@ -312,7 +312,10 @@ def generate_upper_triangle(dataframe):
     return dataframe
 
 
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@@ -343,7 +346,10 @@ def test_edge_betweenness_centrality(
     compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc")
 
 
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@@ -383,7 +389,10 @@ def test_edge_betweenness_centrality_k_full(
 #       the function operating the comparison inside is first proceeding
 #       to a random sampling over the number of vertices (thus direct offsets)
 #       in the graph structure instead of actual vertices identifiers
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", [karate_disjoint])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@@ -487,7 +496,10 @@ def test_edge_betweenness_invalid_dtype(
         compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc")
 
 
-@pytest.mark.skip(reason="https://github.com/networkx/networkx/pull/7908")
+@pytest.mark.skipif(
+    float(".".join(nx.__version__.split(".")[:2])) < 3.5,
+    reason="Requires networkx >= 3.5",
+)
 @pytest.mark.sg
 @pytest.mark.parametrize("graph_file", SMALL_DATASETS)
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)