Skip to content

Commit d031b0d

Browse files
committed
Revert "Fixed imputer for sparse matrices"
This reverts commit 05675ad.
1 parent b6c2916 commit d031b0d

File tree

2 files changed

+16
-20
lines changed

2 files changed

+16
-20
lines changed

autosklearn/pipeline/components/data_preprocessing/imputation/categorical_imputation.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -29,25 +29,23 @@ def fit(self, X: PIPELINE_DATA_DTYPE,
2929
y: Optional[PIPELINE_DATA_DTYPE] = None) -> 'CategoricalImputation':
3030
import sklearn.impute
3131

32+
fill_value = None
3233
if hasattr(X, 'columns'):
3334
kind = X[X.columns[-1]].dtype.kind
3435
else:
3536
# Series, sparse and numpy have dtype
3637
# Only DataFrame does not
3738
kind = X.dtype.kind
3839

39-
number_kinds = ("i", "u", "f")
40-
if kind in number_kinds:
40+
if kind in ("i", "u", "f"):
4141
# We do not want to impute a category with the default
42-
# value (0 is the default).
43-
# Hence we take one greater than the max
44-
unique = np.unique([*X.data, 0]) if issparse(X) else np.unique(X)
45-
print(unique)
46-
fill_value = min(unique) - 1
47-
else:
48-
fill_value = None
49-
50-
print(fill_value)
42+
# value (0 is the default) in case such default is in the
43+
# train data already!
44+
if issparse(X):
45+
# X.data doesn't return 0's
46+
fill_value = min([*X.data, 0]) - 1
47+
else:
48+
fill_value = min(np.unique(X)) - 1
5149

5250
self.preprocessor = sklearn.impute.SimpleImputer(
5351
strategy='constant', copy=False, fill_value=fill_value)

test/test_pipeline/components/data_preprocessing/test_categorical_imputation.py

+7-9
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,15 @@ def test_default_imputation(input_data_imputation, categorical):
3434
X = X.astype('str').astype('object')
3535
X[mask] = np.nan
3636
else:
37-
imputation_value = min(np.unique(X)) - 1
38-
37+
imputation_value = 0
3938
Y = CategoricalImputation().fit_transform(X.copy())
40-
41-
assert np.array_equal(Y == imputation_value, mask)
42-
assert np.array_equal(Y != imputation_value, ~mask)
39+
assert ((np.argwhere(Y == imputation_value) == np.argwhere(mask)).all())
40+
assert ((np.argwhere(Y != imputation_value) == np.argwhere(np.logical_not(mask))).all())
4341

4442

4543
@pytest.mark.parametrize('format_type', ('numpy', 'pandas'))
4644
def test_nonzero_numerical_imputation(format_type):
45+
4746
# First try with an array with 0 as only valid category. The imputation should
4847
# happen with -1
4948
X = np.full(fill_value=np.nan, shape=(10, 10))
@@ -70,9 +69,8 @@ def test_nonzero_numerical_imputation(format_type):
7069
@pytest.mark.parametrize('input_data_imputation', ('numpy'), indirect=True)
7170
def test_default_sparse(input_data_imputation):
7271
X, mask = input_data_imputation
73-
X = sparse.csr_matrix(X)
72+
X = sparse.csc_matrix(X)
7473
Y = CategoricalImputation().fit_transform(X)
7574
Y = Y.todense()
76-
77-
assert np.array_equal(Y == -1, mask)
78-
assert np.array_equal(Y != -1, ~mask)
75+
assert (np.argwhere(Y == 0) == np.argwhere(mask)).all()
76+
assert (np.argwhere(Y != 0) == np.argwhere(np.logical_not(mask))).all()

0 commit comments

Comments
 (0)