Skip to content

Commit 09e4752

Browse files
committed
Update docstrings of include and exclude parameters of the estimators (#1332)
* Update docstrings and types * doc typo fix * flake'd
1 parent 8276228 commit 09e4752

File tree

2 files changed

+0
-145
lines changed

2 files changed

+0
-145
lines changed

autosklearn/automl.py

-138
Original file line numberDiff line numberDiff line change
@@ -1843,144 +1843,6 @@ def show_models(self) -> Dict[int, Any]:
18431843

18441844
A model dictionary contains the following:
18451845

1846-
* ``"model_id"`` - The id given to a model by ``autosklearn``.
1847-
* ``"rank"`` - The rank of the model based on it's ``"cost"``.
1848-
* ``"cost"`` - The loss of the model on the validation set.
1849-
* ``"ensemble_weight"`` - The weight given to the model in the ensemble.
1850-
* ``"voting_model"`` - The ``cv_voting_ensemble`` model (for 'cv' resampling).
1851-
* ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble`` (for 'cv' resampling).
1852-
* ``"data_preprocessor"`` - The preprocessor used on the data.
1853-
* ``"balancing"`` - The balancing used on the data (for classification).
1854-
* ``"feature_preprocessor"`` - The preprocessor for features types.
1855-
* ``"classifier"`` or ``"regressor"`` - The autosklearn wrapped classifier or regressor.
1856-
* ``"sklearn_classifier"`` or ``"sklearn_regressor"`` - The sklearn classifier or regressor.
1857-
1858-
**Example**
1859-
1860-
.. code-block:: python
1861-
1862-
import sklearn.datasets
1863-
import sklearn.metrics
1864-
import autosklearn.regression
1865-
1866-
X, y = sklearn.datasets.load_diabetes(return_X_y=True)
1867-
1868-
automl = autosklearn.regression.AutoSklearnRegressor(
1869-
time_left_for_this_task=120
1870-
)
1871-
automl.fit(X_train, y_train, dataset_name='diabetes')
1872-
1873-
ensemble_dict = automl.show_models()
1874-
print(ensemble_dict)
1875-
1876-
Output:
1877-
1878-
.. code-block:: text
1879-
1880-
{
1881-
25: {'model_id': 25.0,
1882-
'rank': 1,
1883-
'cost': 0.43667876507897496,
1884-
'ensemble_weight': 0.38,
1885-
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1886-
'feature_preprocessor': <autosklearn.pipeline.components....>,
1887-
'regressor': <autosklearn.pipeline.components.regression....>,
1888-
'sklearn_regressor': SGDRegressor(alpha=0.0006517033225329654,...)
1889-
},
1890-
6: {'model_id': 6.0,
1891-
'rank': 2,
1892-
'cost': 0.4550418898836528,
1893-
'ensemble_weight': 0.3,
1894-
'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1895-
'feature_preprocessor': <autosklearn.pipeline.components....>,
1896-
'regressor': <autosklearn.pipeline.components.regression....>,
1897-
'sklearn_regressor': ARDRegression(alpha_1=0.0003701926442639788,...)
1898-
}...
1899-
}
1900-
1901-
Returns
1902-
-------
1903-
Dict(int, Any) : dictionary of length = number of models in the ensemble
1904-
A dictionary of models in the ensemble, where ``model_id`` is the key.
1905-
1906-
"""
1907-
1908-
ensemble_dict = {}
1909-
1910-
def has_key(rv, key):
1911-
return rv.additional_info and key in rv.additional_info
1912-
1913-
table_dict = {}
1914-
for rkey, rval in self.runhistory_.data.items():
1915-
if has_key(rval, 'num_run'):
1916-
model_id = rval.additional_info['num_run']
1917-
table_dict[model_id] = {
1918-
'model_id': model_id,
1919-
'cost': rval.cost
1920-
}
1921-
1922-
# Checking if the dictionary is empty
1923-
if not table_dict:
1924-
raise RuntimeError('No model found. Try increasing \'time_left_for_this_task\'.')
1925-
1926-
for i, weight in enumerate(self.ensemble_.weights_):
1927-
(_, model_id, _) = self.ensemble_.identifiers_[i]
1928-
table_dict[model_id]['ensemble_weight'] = weight
1929-
1930-
table = pd.DataFrame.from_dict(table_dict, orient='index')
1931-
1932-
# Checking which resampling strategy is chosen and selecting the appropriate models
1933-
is_cv = (self._resampling_strategy == "cv")
1934-
models = self.cv_models_ if is_cv else self.models_
1935-
1936-
rank = 1 # Initializing rank for the first model
1937-
for (_, model_id, _), model in models.items():
1938-
model_dict = {} # Declaring model dictionary
1939-
1940-
# Inserting model_id, rank, cost and ensemble weight
1941-
model_dict['model_id'] = table.loc[model_id]['model_id'].astype(int)
1942-
model_dict['rank'] = rank
1943-
model_dict['cost'] = table.loc[model_id]['cost']
1944-
model_dict['ensemble_weight'] = table.loc[model_id]['ensemble_weight']
1945-
rank += 1 # Incrementing rank by 1 for the next model
1946-
1947-
# The steps in the models pipeline are as follows:
1948-
# 'data_preprocessor': DataPreprocessor,
1949-
# 'balancing': Balancing,
1950-
# 'feature_preprocessor': FeaturePreprocessorChoice,
1951-
# 'classifier'/'regressor': ClassifierChoice/RegressorChoice (autosklearn wrapped model)
1952-
1953-
# For 'cv' (cross validation) strategy
1954-
if is_cv:
1955-
# Voting model created by cross validation
1956-
cv_voting_ensemble = model
1957-
model_dict['voting_model'] = cv_voting_ensemble
1958-
1959-
# List of models, each trained on one cv fold
1960-
cv_models = []
1961-
for cv_model in cv_voting_ensemble.estimators_:
1962-
estimator = dict(cv_model.steps)
1963-
1964-
# Adding sklearn model to the model dictionary
1965-
model_type, autosklearn_wrapped_model = cv_model.steps[-1]
1966-
estimator[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
1967-
cv_models.append(estimator)
1968-
model_dict['estimators'] = cv_models
1969-
1970-
# For any other strategy
1971-
else:
1972-
steps = dict(model.steps)
1973-
model_dict.update(steps)
1974-
1975-
# Adding sklearn model to the model dictionary
1976-
model_type, autosklearn_wrapped_model = model.steps[-1]
1977-
model_dict[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
1978-
1979-
# Insterting model_dict in the ensemble dictionary
1980-
ensemble_dict[model_id] = model_dict
1981-
1982-
return ensemble_dict
1983-
19841846
def _create_search_space(
19851847
self,
19861848
tmp_dir,

autosklearn/estimators.py

-7
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,6 @@ def __init__(
9797
Memory limit in MB for the machine learning algorithm.
9898
`auto-sklearn` will stop fitting the machine learning algorithm if
9999
it tries to allocate more than ``memory_limit`` MB.
100-
101-
**Important notes:**
102-
103-
* If ``None`` is provided, no memory limit is set.
104-
* In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is
105-
``n_jobs x memory_limit``.
106-
* The memory limit also applies to the ensemble creation process.
107100
108101
**Important notes:**
109102

0 commit comments

Comments
 (0)