From 8849ce3bc89302063499aa7010be746ad71c4b54 Mon Sep 17 00:00:00 2001 From: Sebastien Poirier Date: Sat, 30 May 2020 01:42:13 +0200 Subject: [PATCH 1/3] adding generic support for frameworks extensions with an example of integration for custom metrics --- amlb/benchmark.py | 4 +++- frameworks/AutoGluon/exec.py | 5 +++-- frameworks/TPOT/exec.py | 5 +++-- frameworks/autosklearn/exec.py | 5 +++-- frameworks/shared/callee.py | 36 ++++++++++++++++++++++++++++++++-- resources/config.yaml | 3 +++ 6 files changed, 49 insertions(+), 9 deletions(-) diff --git a/amlb/benchmark.py b/amlb/benchmark.py index 071709e0b..c703561b2 100644 --- a/amlb/benchmark.py +++ b/amlb/benchmark.py @@ -279,7 +279,7 @@ class TaskConfig: def __init__(self, name, fold, metrics, seed, max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb, - input_dir, output_dir): + input_dir, output_dir, extensions): self.framework = None self.framework_params = None self.type = None @@ -295,6 +295,7 @@ def __init__(self, name, fold, metrics, seed, self.input_dir = input_dir self.output_dir = output_dir self.output_predictions_file = os.path.join(output_dir, "predictions.csv") + self.extensions = extensions def __json__(self): return self.__dict__ @@ -350,6 +351,7 @@ def __init__(self, benchmark: Benchmark, task_def, fold): min_vol_size_mb=task_def.min_vol_size_mb, input_dir=rconfig().input_dir, output_dir=benchmark.output_dirs.session, + extensions=rconfig().extensions_files, ) # allowing to override some task parameters through command line, e.g.: -Xt.max_runtime_seconds=60 if rconfig()['t'] is not None: diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 8dfb4004e..3336b2251 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -12,7 +12,7 @@ from autogluon.utils.tabular.utils.savers import save_pd, save_pkl import autogluon.utils.tabular.metrics as metrics -from frameworks.shared.callee import call_run, result, output_subdir, utils +from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils log = logging.getLogger(__name__) @@ -32,7 +32,8 @@ def run(dataset, config): rmse=metrics.mean_squared_error, # for now, we can let autogluon optimize training on mse: anyway we compute final score from predictions. ) - perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None + perf_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping + else get_extension(config.extensions, config.metric)) if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py index 0bfdf096a..986d3a3ab 100644 --- a/frameworks/TPOT/exec.py +++ b/frameworks/TPOT/exec.py @@ -12,7 +12,7 @@ os.environ['MKL_NUM_THREADS'] = '1' from tpot import TPOTClassifier, TPOTRegressor -from frameworks.shared.callee import call_run, result, output_subdir, utils +from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils log = logging.getLogger(__name__) @@ -34,7 +34,8 @@ def run(dataset, config): r2='r2', rmse='neg_mean_squared_error', # TPOT can score on mse, as app computes rmse independently on predictions ) - scoring_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None + scoring_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping + else get_extension(config.extensions, config.metric)) if scoring_metric is None: raise ValueError("Performance metric {} not supported.".format(config.metric)) diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index 7d3c5140a..130e7cfe0 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -13,7 +13,7 @@ import autosklearn.metrics as metrics from packaging import version -from frameworks.shared.callee import call_run, result, output_subdir, utils +from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils log = logging.getLogger(__name__) @@ -36,7 +36,8 @@ def run(dataset, config): rmse=metrics.mean_squared_error, # autosklearn can optimize on mse, and we compute rmse independently on predictions r2=metrics.r2 ) - perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None + perf_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping + else get_extension(config.extensions, config.metric)) if perf_metric is None: # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping log.warning("Performance metric %s not supported.", config.metric) diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py index 6d355da20..5a6c182bc 100644 --- a/frameworks/shared/callee.py +++ b/frameworks/shared/callee.py @@ -1,3 +1,4 @@ +import linecache import json import logging import os @@ -44,7 +45,38 @@ def output_subdir(name, config): return subdir -data_keys = re.compile("^(X|y|data)(_.+)?$") +_extensions_ = {} + + +def get_extension(files, name=None, default=None): + files = [files] if isinstance(files, str) else files + + extensions = [] + for file in files: + if file in _extensions_: + extensions.append(_extensions_.get(file, {})) + elif os.path.isfile(file): + try: + with open(file) as f: + # linecache and compile are necessary only if we want to inspect code later + # otherwise the following statement is enough: + # exec(f.read(), customizations) + linecache.updatecache(f.name) + code = compile(f.read(), f.name, 'exec') + ext = {} + exec(code, ext) + _extensions_[file] = ext + extensions.append(ext) + except Exception as e: + log.warning("Could not load extension file %s: %s", file, str(e)) + _extensions_[file] = {} + else: + log.warning("No extensions available at %s", file) + + return extensions if name is None else next((ext[name] for ext in extensions if name in ext), default) + + +_data_keys_ = re.compile("^(X|y|data)(_.+)?$") def call_run(run_fn): @@ -53,7 +85,7 @@ def call_run(run_fn): params = NS.from_dict(json.loads(sys.stdin.read())) def load_data(name, path, **ignored): - if isinstance(path, str) and data_keys.match(name): + if isinstance(path, str) and _data_keys_.match(name): return name, np.load(path, allow_pickle=True) return name, path diff --git a/resources/config.yaml b/resources/config.yaml index a8ad914d6..8487e7e62 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -50,6 +50,9 @@ benchmarks: max_mem_size_mb: -1 # default amount of memory assigned to each automl task. If <= 0, then the amount of memory is computed from os available memory. min_vol_size_mb: -1 # default minimum amount of free space required on the volume. If <= 0, skips verification. +extensions_files: + - '{user}/extensions.py' + results: error_max_length: 200 save: true # set by runbenchmark.py From 2a4ade45006eff9e37255d8be80ba5e17140222d Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 7 Oct 2020 15:57:56 +0200 Subject: [PATCH 2/3] Allow custom metrics to be reported in results In general, we can expect AutoML frameworks to expect different signatures for their metric function. We define a new signature specific to the amlb, so that we can also report the scores. This will be denoted with a trailing underscore ("metric_"). It allows the user to define two methods, e.g. Accuracy and Accuracy_, the former will be used by the automl framework, the latter by us. --- amlb/results.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/amlb/results.py b/amlb/results.py index 89f5312fb..3222c89ea 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -17,6 +17,7 @@ from .datautils import accuracy_score, confusion_matrix, f1_score, log_loss, balanced_accuracy_score, mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score, roc_auc_score, read_csv, write_csv, is_data_frame, to_data_frame from .resources import get as rget, config as rconfig, output_dirs from .utils import Namespace, backup_file, cached, datetime_iso, memoize, profile +from frameworks.shared.callee import get_extension log = logging.getLogger(__name__) @@ -323,6 +324,15 @@ def __init__(self, predictions_df, info=None): def evaluate(self, metric): if hasattr(self, metric): return getattr(self, metric)() + else: + # A metric may be defined twice, once for the automl system to use (e.g. + # as a scikit-learn scorer), and once in the amlb-compatible format. + # The amlb-compatible format is marked with a trailing underscore. + custom_metric = get_extension(rconfig().extensions_files, f"{metric}_") + if custom_metric is None: + custom_metric = get_extension(rconfig().extensions_files, metric) + if custom_metric is not None: + return custom_metric(self) # raise ValueError("Metric {metric} is not supported for {type}.".format(metric=metric, type=self.type)) log.warning("Metric %s is not supported for %s!", metric, self.type) return nan From ba3969de2f0caeaaaf2254a7b9be92ab1bad61cf Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 7 Oct 2020 16:25:16 +0200 Subject: [PATCH 3/3] Remove the check for custom metric without '_' There is no automl framework that is going to have the format that the amlb uses, so sharing the definition makes no sense until we alter the signature in the amlb. --- amlb/results.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/amlb/results.py b/amlb/results.py index 3222c89ea..b20ebe8f9 100644 --- a/amlb/results.py +++ b/amlb/results.py @@ -329,8 +329,6 @@ def evaluate(self, metric): # as a scikit-learn scorer), and once in the amlb-compatible format. # The amlb-compatible format is marked with a trailing underscore. custom_metric = get_extension(rconfig().extensions_files, f"{metric}_") - if custom_metric is None: - custom_metric = get_extension(rconfig().extensions_files, metric) if custom_metric is not None: return custom_metric(self) # raise ValueError("Metric {metric} is not supported for {type}.".format(metric=metric, type=self.type))