@@ -1843,144 +1843,6 @@ def show_models(self) -> Dict[int, Any]:
1843
1843
1844
1844
A model dictionary contains the following:
1845
1845
1846
- * ``"model_id"`` - The id given to a model by ``autosklearn``.
1847
- * ``"rank"`` - The rank of the model based on it's ``"cost"``.
1848
- * ``"cost"`` - The loss of the model on the validation set.
1849
- * ``"ensemble_weight"`` - The weight given to the model in the ensemble.
1850
- * ``"voting_model"`` - The ``cv_voting_ensemble`` model (for 'cv' resampling).
1851
- * ``"estimators"`` - List of models (dicts) in ``cv_voting_ensemble`` (for 'cv' resampling).
1852
- * ``"data_preprocessor"`` - The preprocessor used on the data.
1853
- * ``"balancing"`` - The balancing used on the data (for classification).
1854
- * ``"feature_preprocessor"`` - The preprocessor for features types.
1855
- * ``"classifier"`` or ``"regressor"`` - The autosklearn wrapped classifier or regressor.
1856
- * ``"sklearn_classifier"`` or ``"sklearn_regressor"`` - The sklearn classifier or regressor.
1857
-
1858
- **Example**
1859
-
1860
- .. code-block:: python
1861
-
1862
- import sklearn.datasets
1863
- import sklearn.metrics
1864
- import autosklearn.regression
1865
-
1866
- X, y = sklearn.datasets.load_diabetes(return_X_y=True)
1867
-
1868
- automl = autosklearn.regression.AutoSklearnRegressor(
1869
- time_left_for_this_task=120
1870
- )
1871
- automl.fit(X_train, y_train, dataset_name='diabetes')
1872
-
1873
- ensemble_dict = automl.show_models()
1874
- print(ensemble_dict)
1875
-
1876
- Output:
1877
-
1878
- .. code-block:: text
1879
-
1880
- {
1881
- 25: {'model_id': 25.0,
1882
- 'rank': 1,
1883
- 'cost': 0.43667876507897496,
1884
- 'ensemble_weight': 0.38,
1885
- 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1886
- 'feature_preprocessor': <autosklearn.pipeline.components....>,
1887
- 'regressor': <autosklearn.pipeline.components.regression....>,
1888
- 'sklearn_regressor': SGDRegressor(alpha=0.0006517033225329654,...)
1889
- },
1890
- 6: {'model_id': 6.0,
1891
- 'rank': 2,
1892
- 'cost': 0.4550418898836528,
1893
- 'ensemble_weight': 0.3,
1894
- 'data_preprocessor': <autosklearn.pipeline.components.data_preprocessing....>,
1895
- 'feature_preprocessor': <autosklearn.pipeline.components....>,
1896
- 'regressor': <autosklearn.pipeline.components.regression....>,
1897
- 'sklearn_regressor': ARDRegression(alpha_1=0.0003701926442639788,...)
1898
- }...
1899
- }
1900
-
1901
- Returns
1902
- -------
1903
- Dict(int, Any) : dictionary of length = number of models in the ensemble
1904
- A dictionary of models in the ensemble, where ``model_id`` is the key.
1905
-
1906
- """
1907
-
1908
- ensemble_dict = {}
1909
-
1910
- def has_key (rv , key ):
1911
- return rv .additional_info and key in rv .additional_info
1912
-
1913
- table_dict = {}
1914
- for rkey , rval in self .runhistory_ .data .items ():
1915
- if has_key (rval , 'num_run' ):
1916
- model_id = rval .additional_info ['num_run' ]
1917
- table_dict [model_id ] = {
1918
- 'model_id' : model_id ,
1919
- 'cost' : rval .cost
1920
- }
1921
-
1922
- # Checking if the dictionary is empty
1923
- if not table_dict :
1924
- raise RuntimeError ('No model found. Try increasing \' time_left_for_this_task\' .' )
1925
-
1926
- for i , weight in enumerate (self .ensemble_ .weights_ ):
1927
- (_ , model_id , _ ) = self .ensemble_ .identifiers_ [i ]
1928
- table_dict [model_id ]['ensemble_weight' ] = weight
1929
-
1930
- table = pd .DataFrame .from_dict (table_dict , orient = 'index' )
1931
-
1932
- # Checking which resampling strategy is chosen and selecting the appropriate models
1933
- is_cv = (self ._resampling_strategy == "cv" )
1934
- models = self .cv_models_ if is_cv else self .models_
1935
-
1936
- rank = 1 # Initializing rank for the first model
1937
- for (_ , model_id , _ ), model in models .items ():
1938
- model_dict = {} # Declaring model dictionary
1939
-
1940
- # Inserting model_id, rank, cost and ensemble weight
1941
- model_dict ['model_id' ] = table .loc [model_id ]['model_id' ].astype (int )
1942
- model_dict ['rank' ] = rank
1943
- model_dict ['cost' ] = table .loc [model_id ]['cost' ]
1944
- model_dict ['ensemble_weight' ] = table .loc [model_id ]['ensemble_weight' ]
1945
- rank += 1 # Incrementing rank by 1 for the next model
1946
-
1947
- # The steps in the models pipeline are as follows:
1948
- # 'data_preprocessor': DataPreprocessor,
1949
- # 'balancing': Balancing,
1950
- # 'feature_preprocessor': FeaturePreprocessorChoice,
1951
- # 'classifier'/'regressor': ClassifierChoice/RegressorChoice (autosklearn wrapped model)
1952
-
1953
- # For 'cv' (cross validation) strategy
1954
- if is_cv :
1955
- # Voting model created by cross validation
1956
- cv_voting_ensemble = model
1957
- model_dict ['voting_model' ] = cv_voting_ensemble
1958
-
1959
- # List of models, each trained on one cv fold
1960
- cv_models = []
1961
- for cv_model in cv_voting_ensemble .estimators_ :
1962
- estimator = dict (cv_model .steps )
1963
-
1964
- # Adding sklearn model to the model dictionary
1965
- model_type , autosklearn_wrapped_model = cv_model .steps [- 1 ]
1966
- estimator [f'sklearn_{ model_type } ' ] = autosklearn_wrapped_model .choice .estimator
1967
- cv_models .append (estimator )
1968
- model_dict ['estimators' ] = cv_models
1969
-
1970
- # For any other strategy
1971
- else :
1972
- steps = dict (model .steps )
1973
- model_dict .update (steps )
1974
-
1975
- # Adding sklearn model to the model dictionary
1976
- model_type , autosklearn_wrapped_model = model .steps [- 1 ]
1977
- model_dict [f'sklearn_{ model_type } ' ] = autosklearn_wrapped_model .choice .estimator
1978
-
1979
- # Insterting model_dict in the ensemble dictionary
1980
- ensemble_dict [model_id ] = model_dict
1981
-
1982
- return ensemble_dict
1983
-
1984
1846
def _create_search_space(
1985
1847
self,
1986
1848
tmp_dir,
0 commit comments