From 8849ce3bc89302063499aa7010be746ad71c4b54 Mon Sep 17 00:00:00 2001
From: Sebastien Poirier <sebastien@h2o.ai>
Date: Sat, 30 May 2020 01:42:13 +0200
Subject: [PATCH 1/3] adding generic support for frameworks extensions with an
 example of integration for custom metrics

---
 amlb/benchmark.py              |  4 +++-
 frameworks/AutoGluon/exec.py   |  5 +++--
 frameworks/TPOT/exec.py        |  5 +++--
 frameworks/autosklearn/exec.py |  5 +++--
 frameworks/shared/callee.py    | 36 ++++++++++++++++++++++++++++++++--
 resources/config.yaml          |  3 +++
 6 files changed, 49 insertions(+), 9 deletions(-)

diff --git a/amlb/benchmark.py b/amlb/benchmark.py
index 071709e0b..c703561b2 100644
--- a/amlb/benchmark.py
+++ b/amlb/benchmark.py
@@ -279,7 +279,7 @@ class TaskConfig:
 
     def __init__(self, name, fold, metrics, seed,
                  max_runtime_seconds, cores, max_mem_size_mb, min_vol_size_mb,
-                 input_dir, output_dir):
+                 input_dir, output_dir, extensions):
         self.framework = None
         self.framework_params = None
         self.type = None
@@ -295,6 +295,7 @@ def __init__(self, name, fold, metrics, seed,
         self.input_dir = input_dir
         self.output_dir = output_dir
         self.output_predictions_file = os.path.join(output_dir, "predictions.csv")
+        self.extensions = extensions
 
     def __json__(self):
         return self.__dict__
@@ -350,6 +351,7 @@ def __init__(self, benchmark: Benchmark, task_def, fold):
             min_vol_size_mb=task_def.min_vol_size_mb,
             input_dir=rconfig().input_dir,
             output_dir=benchmark.output_dirs.session,
+            extensions=rconfig().extensions_files,
         )
         # allowing to override some task parameters through command line, e.g.: -Xt.max_runtime_seconds=60
         if rconfig()['t'] is not None:
diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py
index 8dfb4004e..3336b2251 100644
--- a/frameworks/AutoGluon/exec.py
+++ b/frameworks/AutoGluon/exec.py
@@ -12,7 +12,7 @@
 from autogluon.utils.tabular.utils.savers import save_pd, save_pkl
 import autogluon.utils.tabular.metrics as metrics
 
-from frameworks.shared.callee import call_run, result, output_subdir, utils
+from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils
 
 log = logging.getLogger(__name__)
 
@@ -32,7 +32,8 @@ def run(dataset, config):
         rmse=metrics.mean_squared_error,  # for now, we can let autogluon optimize training on mse: anyway we compute final score from predictions.
     )
 
-    perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
+    perf_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping
+                   else get_extension(config.extensions, config.metric))
     if perf_metric is None:
         # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping
         log.warning("Performance metric %s not supported.", config.metric)
diff --git a/frameworks/TPOT/exec.py b/frameworks/TPOT/exec.py
index 0bfdf096a..986d3a3ab 100644
--- a/frameworks/TPOT/exec.py
+++ b/frameworks/TPOT/exec.py
@@ -12,7 +12,7 @@
 os.environ['MKL_NUM_THREADS'] = '1'
 from tpot import TPOTClassifier, TPOTRegressor
 
-from frameworks.shared.callee import call_run, result, output_subdir, utils
+from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils
 
 
 log = logging.getLogger(__name__)
@@ -34,7 +34,8 @@ def run(dataset, config):
         r2='r2',
         rmse='neg_mean_squared_error',  # TPOT can score on mse, as app computes rmse independently on predictions
     )
-    scoring_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
+    scoring_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping
+                      else get_extension(config.extensions, config.metric))
     if scoring_metric is None:
         raise ValueError("Performance metric {} not supported.".format(config.metric))
 
diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
index 7d3c5140a..130e7cfe0 100644
--- a/frameworks/autosklearn/exec.py
+++ b/frameworks/autosklearn/exec.py
@@ -13,7 +13,7 @@
 import autosklearn.metrics as metrics
 from packaging import version
 
-from frameworks.shared.callee import call_run, result, output_subdir, utils
+from frameworks.shared.callee import call_run, get_extension, result, output_subdir, utils
 
 log = logging.getLogger(__name__)
 
@@ -36,7 +36,8 @@ def run(dataset, config):
         rmse=metrics.mean_squared_error,  # autosklearn can optimize on mse, and we compute rmse independently on predictions
         r2=metrics.r2
     )
-    perf_metric = metrics_mapping[config.metric] if config.metric in metrics_mapping else None
+    perf_metric = (metrics_mapping[config.metric] if config.metric in metrics_mapping
+                   else get_extension(config.extensions, config.metric))
     if perf_metric is None:
         # TODO: figure out if we are going to blindly pass metrics through, or if we use a strict mapping
         log.warning("Performance metric %s not supported.", config.metric)
diff --git a/frameworks/shared/callee.py b/frameworks/shared/callee.py
index 6d355da20..5a6c182bc 100644
--- a/frameworks/shared/callee.py
+++ b/frameworks/shared/callee.py
@@ -1,3 +1,4 @@
+import linecache
 import json
 import logging
 import os
@@ -44,7 +45,38 @@ def output_subdir(name, config):
     return subdir
 
 
-data_keys = re.compile("^(X|y|data)(_.+)?$")
+_extensions_ = {}
+
+
+def get_extension(files, name=None, default=None):
+    files = [files] if isinstance(files, str) else files
+
+    extensions = []
+    for file in files:
+        if file in _extensions_:
+            extensions.append(_extensions_.get(file, {}))
+        elif os.path.isfile(file):
+            try:
+                with open(file) as f:
+                    # linecache and compile are necessary only if we want to inspect code later
+                    # otherwise the following statement is enough:
+                    # exec(f.read(), customizations)
+                    linecache.updatecache(f.name)
+                    code = compile(f.read(), f.name, 'exec')
+                    ext = {}
+                    exec(code, ext)
+                    _extensions_[file] = ext
+                    extensions.append(ext)
+            except Exception as e:
+                log.warning("Could not load extension file %s: %s", file, str(e))
+                _extensions_[file] = {}
+        else:
+            log.warning("No extensions available at %s", file)
+
+    return extensions if name is None else next((ext[name] for ext in extensions if name in ext), default)
+
+
+_data_keys_ = re.compile("^(X|y|data)(_.+)?$")
 
 
 def call_run(run_fn):
@@ -53,7 +85,7 @@ def call_run(run_fn):
     params = NS.from_dict(json.loads(sys.stdin.read()))
 
     def load_data(name, path, **ignored):
-        if isinstance(path, str) and data_keys.match(name):
+        if isinstance(path, str) and _data_keys_.match(name):
             return name, np.load(path, allow_pickle=True)
         return name, path
 
diff --git a/resources/config.yaml b/resources/config.yaml
index a8ad914d6..8487e7e62 100644
--- a/resources/config.yaml
+++ b/resources/config.yaml
@@ -50,6 +50,9 @@ benchmarks:
     max_mem_size_mb: -1       # default amount of memory assigned to each automl task. If <= 0, then the amount of memory is computed from os available memory.
     min_vol_size_mb: -1       # default minimum amount of free space required on the volume. If <= 0, skips verification.
 
+extensions_files:
+  - '{user}/extensions.py'
+
 results:
   error_max_length: 200
   save: true  # set by runbenchmark.py

From 2a4ade45006eff9e37255d8be80ba5e17140222d Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 7 Oct 2020 15:57:56 +0200
Subject: [PATCH 2/3] Allow custom metrics to be reported in results

In general, we can expect AutoML frameworks to expect different
signatures for their metric function. We define a new signature specific
to the amlb, so that we can also report the scores. This
will be denoted with a trailing underscore ("metric_"). It allows the
user to define two methods, e.g. Accuracy and Accuracy_, the former will
be used by the automl framework, the latter by us.
---
 amlb/results.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/amlb/results.py b/amlb/results.py
index 89f5312fb..3222c89ea 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -17,6 +17,7 @@
 from .datautils import accuracy_score, confusion_matrix, f1_score, log_loss, balanced_accuracy_score, mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score, roc_auc_score, read_csv, write_csv, is_data_frame, to_data_frame
 from .resources import get as rget, config as rconfig, output_dirs
 from .utils import Namespace, backup_file, cached, datetime_iso, memoize, profile
+from frameworks.shared.callee import get_extension
 
 log = logging.getLogger(__name__)
 
@@ -323,6 +324,15 @@ def __init__(self, predictions_df, info=None):
     def evaluate(self, metric):
         if hasattr(self, metric):
             return getattr(self, metric)()
+        else:
+            # A metric may be defined twice, once for the automl system to use (e.g.
+            # as a scikit-learn scorer), and once in the amlb-compatible format.
+            # The amlb-compatible format is marked with a trailing underscore.
+            custom_metric = get_extension(rconfig().extensions_files, f"{metric}_")
+            if custom_metric is None:
+                custom_metric = get_extension(rconfig().extensions_files, metric)
+            if custom_metric is not None:
+                return custom_metric(self)
         # raise ValueError("Metric {metric} is not supported for {type}.".format(metric=metric, type=self.type))
         log.warning("Metric %s is not supported for %s!", metric, self.type)
         return nan

From ba3969de2f0caeaaaf2254a7b9be92ab1bad61cf Mon Sep 17 00:00:00 2001
From: PGijsbers <p.gijsbers@tue.nl>
Date: Wed, 7 Oct 2020 16:25:16 +0200
Subject: [PATCH 3/3] Remove the check for custom metric without '_'

There is no automl framework that is going to have the format that the
amlb uses, so sharing the definition makes no sense until we alter the
signature in the amlb.
---
 amlb/results.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/amlb/results.py b/amlb/results.py
index 3222c89ea..b20ebe8f9 100644
--- a/amlb/results.py
+++ b/amlb/results.py
@@ -329,8 +329,6 @@ def evaluate(self, metric):
             # as a scikit-learn scorer), and once in the amlb-compatible format.
             # The amlb-compatible format is marked with a trailing underscore.
             custom_metric = get_extension(rconfig().extensions_files, f"{metric}_")
-            if custom_metric is None:
-                custom_metric = get_extension(rconfig().extensions_files, metric)
             if custom_metric is not None:
                 return custom_metric(self)
         # raise ValueError("Metric {metric} is not supported for {type}.".format(metric=metric, type=self.type))