Skip to content

Commit 9b59a90

Browse files
committed
Added numpy random generator
1 parent 3f64a30 commit 9b59a90

File tree

5 files changed

+61
-5
lines changed

5 files changed

+61
-5
lines changed

docs/releases/PyCVI-0.1.5.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# PyCVI 0.1.5 release notes
2+
3+
YYYY/MM/DD
4+
5+
_summary-XXX_
6+
7+
## Python versions
8+
9+
This version supports Python versions 3.8 to 3.11.
10+
11+
## New Features
12+
13+
- `pycvi.compute_scores.compute_all_scores`, CVIs and their corresponding functions can now use a specific numpy [Random Generator](https://numpy.org/doc/stable/reference/random/generator.html) for reproducibility purpose.
14+
15+
## Changes
16+
17+
- `pycvi.cvi.select` now raises a `SelectionError` if no clustering could be selected (was `None` previously)
18+
19+
## Fixes
20+
21+
- Fix Davies-Bouldin in order to ignore comparing the distance between a centroid to itself when computing the index.
22+
23+
## Contributors
24+
25+
- Natacha Galmiche (@nglm)

pycvi/cluster.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def generate_uniform(
8181
data: np.ndarray,
8282
zero_type: str = "bounds",
8383
N_zero: int = 10,
84+
rng = np.random.default_rng(611),
8485
) -> List[np.ndarray]:
8586
"""
8687
Generate `N_zero` samples from a uniform distribution based on data.
@@ -104,6 +105,9 @@ def generate_uniform(
104105
105106
N_zero : int, optional
106107
Number of uniform distributions sampled, by default 10
108+
rng : A numpy Random Generator, optional
109+
The numpy random generator to use to sample from the uniform
110+
distribution, by default np.random.default_rng(611)
107111
108112
Returns
109113
-------
@@ -135,7 +139,7 @@ def generate_uniform(
135139
# Generate N_zero samples from a uniform distribution with shape
136140
# the same shape as data
137141
l_data0 = [
138-
np.random.uniform(low=mins, high=maxs, size=data.shape)
142+
rng.uniform(low=mins, high=maxs, size=data.shape)
139143
for _ in range(N_zero)
140144
]
141145
return l_data0

pycvi/compute_scores.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ def compute_all_scores(
368368
time_window: int = None,
369369
N_zero: int = 10,
370370
zero_type: str = "bounds",
371+
rng = np.random.default_rng(611),
371372
cvi_kwargs: dict = {},
372373
return_list: bool = False,
373374
) -> Union[List[List[Dict[int, float]]], List[Dict[int, float]], Dict[int, float]]:
@@ -426,6 +427,9 @@ def compute_all_scores(
426427
has the same variance and mean as the original data.
427428
- `"bounds"`: the uniform distribution is defined such that it
428429
has the same bounds as the original data.
430+
rng : A numpy Random Generator, optional
431+
The numpy random generator to use to sample from the uniform
432+
distribution, by default np.random.default_rng(611)
429433
cvi_kwargs : dict, optional
430434
Specific kwargs to give to the CVI, by default {}
431435
return_list: bool, optional
@@ -455,7 +459,9 @@ def compute_all_scores(
455459
# --------------------------------------------------------------
456460

457461
data_copy = set_data_shape(data)
458-
l_data0 = generate_uniform(data_copy, zero_type=zero_type, N_zero=N_zero)
462+
l_data0 = generate_uniform(
463+
data_copy, zero_type=zero_type, N_zero=N_zero, rng=rng
464+
)
459465
(N, T, d) = data_copy.shape
460466
if scaler is not None:
461467
scaler.fit(data_copy.reshape(N*T, d))

pycvi/cvi.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ class CVI():
114114
example used in the Hartigan index where we don't use
115115
:math:`k=0` as a reference score if :math:`k=0` is more
116116
relevant than :math:`k=1`, by default False
117+
rng : A numpy Random Generator, optional
118+
The numpy random generator to use when sampling from random
119+
distributions, by default np.random.default_rng(611)
117120
118121
Raises
119122
------
@@ -134,6 +137,7 @@ def __init__(
134137
criterion_function: callable = None,
135138
k_condition: callable = None,
136139
ignore0: bool = False,
140+
rng = np.random.default_rng(611),
137141
) -> None:
138142
self.function = cvi_function
139143
self.criterion_function = criterion_function
@@ -150,6 +154,7 @@ def __init__(
150154
self.ignore0 = ignore0
151155
self.N = None
152156
self.d = None
157+
self.rng = rng
153158

154159
def __call__(
155160
self,
@@ -895,6 +900,7 @@ def get_cvi_kwargs(
895900
"""
896901
cvi_kw = {}
897902
cvi_kw["k"] = n_clusters
903+
cvi_kw["rng"] = self.rng
898904
if n_clusters < len(X_clus):
899905
cvi_kw["clusters_next"] = clusterings_t.get(n_clusters+1, None)
900906
if n_clusters == 0:
@@ -996,6 +1002,7 @@ def get_cvi_kwargs(
9961002
"""
9971003
cvi_kw = {}
9981004
cvi_kw["k"] = n_clusters
1005+
cvi_kw["rng"] = self.rng
9991006
if n_clusters == 0:
10001007
cvi_kw["X1"] = X_clus
10011008
if "zero_type" not in cvi_kw or cvi_kw["zero_type"] == None:
@@ -1127,6 +1134,7 @@ def get_cvi_kwargs(
11271134
"""
11281135
cvi_kw = {"B" : 10}
11291136
cvi_kw["k"] = n_clusters
1137+
cvi_kw["rng"] = self.rng
11301138
if self.cvi_type == "original":
11311139
cvi_kw["return_s"] = True
11321140
cvi_kw.update(cvi_kwargs)

pycvi/cvi_func.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ def gap_statistic(
213213
k: int = None,
214214
B: int = 10,
215215
zero_type: str = "variance",
216+
rng = np.random.default_rng(611),
216217
return_s: bool = False,
217218
) -> Union[float, Tuple[float, float]]:
218219
"""
@@ -236,6 +237,9 @@ def gap_statistic(
236237
has the same bounds as the original data.
237238
238239
:type zero_type: str, optional
240+
:param rng: The numpy random generator to use to sample from the
241+
uniform distribution, by default np.random.default_rng(611)
242+
:type rng: A numpy Random Generator, optional
239243
:param return_s: Should s be returned as well?
240244
:type return_s: bool, optional
241245
:return: The gap statistics
@@ -249,7 +253,8 @@ def gap_statistic(
249253

250254
# Generate B random datasets with the same shape as the input data
251255
# and the same parameters
252-
random_datasets = generate_uniform(X, zero_type=zero_type, N_zero=B)
256+
random_datasets = generate_uniform(
257+
X, zero_type=zero_type, N_zero=B, rng=rng)
253258

254259
# Compute the log of the within-cluster dispersion for each random dataset
255260
wcss_rand = []
@@ -327,6 +332,7 @@ def hartigan(
327332
k:int = None,
328333
clusters_next: List[List[int]] = None,
329334
X1: np.ndarray = None,
335+
rng = np.random.default_rng(611),
330336
) -> float:
331337
"""
332338
Compute the Hartigan index for a given clustering.
@@ -343,6 +349,9 @@ def hartigan(
343349
then the values of all datapoints when sampled from a uniform
344350
distribution.
345351
:type X1: np.ndarray, shape: (N, d*w_t) or (N, w_t, d)
352+
:param rng: The numpy random generator to use to sample from the
353+
uniform distribution, by default np.random.default_rng(611)
354+
:type rng: A numpy Random Generator, optional
346355
:return: The Hartigan index
347356
:rtype: float
348357
"""
@@ -359,7 +368,7 @@ def hartigan(
359368
elif k == 0:
360369
# X0 shape: (N, d*w_t) or (N, w_t, d)
361370
if X1 is None:
362-
l_X0 = generate_uniform(X, zero_type="bounds", N_zero=1)
371+
l_X0 = generate_uniform(X, zero_type="bounds", N_zero=1, rng=rng)
363372
X1 = X
364373
else:
365374
l_X0 = [X]
@@ -442,6 +451,7 @@ def CH(
442451
k: int = None,
443452
X1: np.ndarray = None,
444453
zero_type: str = "variance",
454+
rng = np.random.default_rng(611),
445455
dist_kwargs: dict = {},
446456
) -> float:
447457
"""
@@ -467,6 +477,9 @@ def CH(
467477
has the same bounds as the original data.
468478
469479
:type zero_type: str, optional
480+
:param rng: The numpy random generator to use to sample from the
481+
uniform distribution, by default np.random.default_rng(611)
482+
:type rng: A numpy Random Generator, optional
470483
:param dist_kwargs: kwargs for the distance function, defaults to {}
471484
:type dist_kwargs: dict, optional
472485
:return: The CH index
@@ -484,7 +497,7 @@ def CH(
484497

485498
# X0 shape: (N, d*w_t) or (N, w_t, d)
486499
if X1 is None:
487-
X0 = generate_uniform(X, zero_type=zero_type, N_zero=1)[0]
500+
X0 = generate_uniform(X, zero_type=zero_type, N_zero=1, rng=rng)[0]
488501
X1 = X
489502
else:
490503
X0 = X

0 commit comments

Comments
 (0)