Update docstrings of include and exclude parameters of the estimators (#1332)

eddiebergman · eddiebergman · commit 09e475248e5c · 2022-08-18T20:08:47.000+02:00
* Update docstrings and types

* doc typo fix

* flake'd
diff --git a/autosklearn/automl.py b/autosklearn/automl.py
@@ -1843,144 +1843,6 @@ def show_models(self) -> Dict[int, Any]:
 
         A model dictionary contains the following:
 
-        * ``"model_id"`` - The id given to a model by ``autosklearn``.
-        * ``"rank"`` - The rank of the model based on it's ``"cost"``.
-        * ``"cost"`` - The loss of the model on the validation set.
-        * ``"ensemble_weight"`` - The weight given to the model in the ensemble.
-        * ``"voting_model"`` - The ``cv_voting_ensemble`` model (for 'cv' resampling).
-        * ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble`` (for 'cv' resampling).
-        * ``"data_preprocessor"`` - The preprocessor used on the data.
-        * ``"balancing"`` - The balancing used on the data (for classification).
-        * ``"feature_preprocessor"`` - The preprocessor for features types.
-        * ``"classifier"`` or ``"regressor"`` - The autosklearn wrapped classifier or regressor.
-        * ``"sklearn_classifier"`` or ``"sklearn_regressor"`` - The sklearn classifier or regressor.
-
-        **Example**
-
-        .. code-block:: python
-
-            import sklearn.datasets
-            import sklearn.metrics
-            import autosklearn.regression
-
-            X, y = sklearn.datasets.load_diabetes(return_X_y=True)
-
-            automl = autosklearn.regression.AutoSklearnRegressor(
-                time_left_for_this_task=120
-                )
-            automl.fit(X_train, y_train, dataset_name='diabetes')
-
-            ensemble_dict = automl.show_models()
-            print(ensemble_dict)
-
-        Output:
-
-        .. code-block:: text
-
-            {
-                25: {'model_id': 25.0,
-                     'rank': 1,
-                     'cost': 0.43667876507897496,
-                     'ensemble_weight': 0.38,
-                     'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
-                     'feature_preprocessor': <autosklearn.pipeline.components....>,
-                     'regressor': <autosklearn.pipeline.components.regression....>,
-                     'sklearn_regressor': SGDRegressor(alpha=0.0006517033225329654,...)
-                    },
-                6: {'model_id': 6.0,
-                    'rank': 2,
-                    'cost': 0.4550418898836528,
-                    'ensemble_weight': 0.3,
-                    'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
-                    'feature_preprocessor': <autosklearn.pipeline.components....>,
-                    'regressor': <autosklearn.pipeline.components.regression....>,
-                    'sklearn_regressor': ARDRegression(alpha_1=0.0003701926442639788,...)
-                    }...
-            }
-
-        Returns
-        -------
-        Dict(int, Any) : dictionary of length = number of models in the ensemble
-            A dictionary of models in the ensemble, where ``model_id`` is the key.
-
-        """
-
-        ensemble_dict = {}
-
-        def has_key(rv, key):
-            return rv.additional_info and key in rv.additional_info
-
-        table_dict = {}
-        for rkey, rval in self.runhistory_.data.items():
-            if has_key(rval, 'num_run'):
-                model_id = rval.additional_info['num_run']
-                table_dict[model_id] = {
-                        'model_id': model_id,
-                        'cost': rval.cost
-                        }
-
-        # Checking if the dictionary is empty
-        if not table_dict:
-            raise RuntimeError('No model found. Try increasing \'time_left_for_this_task\'.')
-
-        for i, weight in enumerate(self.ensemble_.weights_):
-            (_, model_id, _) = self.ensemble_.identifiers_[i]
-            table_dict[model_id]['ensemble_weight'] = weight
-
-        table = pd.DataFrame.from_dict(table_dict, orient='index')
-
-        # Checking which resampling strategy is chosen and selecting the appropriate models
-        is_cv = (self._resampling_strategy == "cv")
-        models = self.cv_models_ if is_cv else self.models_
-
-        rank = 1  # Initializing rank for the first model
-        for (_, model_id, _), model in models.items():
-            model_dict = {}  # Declaring model dictionary
-
-            # Inserting model_id, rank, cost and ensemble weight
-            model_dict['model_id'] = table.loc[model_id]['model_id'].astype(int)
-            model_dict['rank'] = rank
-            model_dict['cost'] = table.loc[model_id]['cost']
-            model_dict['ensemble_weight'] = table.loc[model_id]['ensemble_weight']
-            rank += 1  # Incrementing rank by 1 for the next model
-
-            # The steps in the models pipeline are as follows:
-            # 'data_preprocessor': DataPreprocessor,
-            # 'balancing': Balancing,
-            # 'feature_preprocessor': FeaturePreprocessorChoice,
-            # 'classifier'/'regressor': ClassifierChoice/RegressorChoice (autosklearn wrapped model)
-
-            # For 'cv' (cross validation) strategy
-            if is_cv:
-                # Voting model created by cross validation
-                cv_voting_ensemble = model
-                model_dict['voting_model'] = cv_voting_ensemble
-
-                # List of models, each trained on one cv fold
-                cv_models = []
-                for cv_model in cv_voting_ensemble.estimators_:
-                    estimator = dict(cv_model.steps)
-
-                    # Adding sklearn model to the model dictionary
-                    model_type, autosklearn_wrapped_model = cv_model.steps[-1]
-                    estimator[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
-                    cv_models.append(estimator)
-                model_dict['estimators'] = cv_models
-
-            # For any other strategy
-            else:
-                steps = dict(model.steps)
-                model_dict.update(steps)
-
-                # Adding sklearn model to the model dictionary
-                model_type, autosklearn_wrapped_model = model.steps[-1]
-                model_dict[f'sklearn_{model_type}'] = autosklearn_wrapped_model.choice.estimator
-
-            # Insterting model_dict in the ensemble dictionary
-            ensemble_dict[model_id] = model_dict
-
-        return ensemble_dict
-
     def _create_search_space(
         self,
         tmp_dir,
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -97,13 +97,6 @@ def __init__(
             Memory limit in MB for the machine learning algorithm.
             `auto-sklearn` will stop fitting the machine learning algorithm if
             it tries to allocate more than ``memory_limit`` MB.
-            
-            **Important notes:** 
-            
-            * If ``None`` is provided, no memory limit is set.
-            * In case of multi-processing, ``memory_limit`` will be *per job*, so the total usage is 
-              ``n_jobs x memory_limit``.
-            * The memory limit also applies to the ensemble creation process.
 
             **Important notes:**