add date_published field to model metadata

janosh · janosh · commit cddda31d9395 · 2023-06-19T20:29:24.000-07:00
diff --git a/.gitattributes b/.gitattributes
@@ -1,3 +1,3 @@
-# exclude Svelte files in figures/ when calculating repo language statistics on GitHub
-figures/* linguist-generated
+# exclude generated plot files when calculating repo language statistics on GitHub
+*/figs/* linguist-generated
 data/**/*.svelte linguist-generated
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -26,14 +26,13 @@
 
 class Files(dict):  # type: ignore
     """Files instance inherits from dict so that .values(), items(), etc. are supported
-    but also allows accessing attributes by dot notation. E.g. FILES.wbm_summary
-    instead of FILES["wbm_summary"]. This enables tab completion in IDEs and
-    auto-updating attribute names across the code base when changing the name of an
-    attribute.
-    Every subclass must set the _root attribute to a path that serves as the root
-    directory w.r.t. which all files will be turned into absolute paths.
-    The _key_map attribute can be used to map attribute names to different names in the
-    dict. This is useful if you want to have keys that are not valid Python identifiers.
+    but also allows accessing attributes by dot notation. E.g. FILES.wbm_summary instead
+    of FILES["wbm_summary"]. This enables tab completion in IDEs and auto-updating
+    attribute names across the code base when changing the name of an attribute. Every
+    subclass must set the _root attribute to a path that serves as the root directory
+    w.r.t. which all files will be turned into absolute paths. The _key_map attribute
+    can be used to map attribute names to different names in the dict. Useful if you
+    want to have keys like 'foo+bar' that are not valid Python identifiers.
     """
 
     def __init__(self) -> None:
@@ -246,13 +245,16 @@ def glob_to_df(
 
 
 def load_df_wbm_preds(
-    models: Sequence[str], pbar: bool = True, id_col: str = "material_id", **kwargs: Any
+    models: Sequence[str] = (*PRED_FILES,),
+    pbar: bool = True,
+    id_col: str = "material_id",
+    **kwargs: Any,
 ) -> pd.DataFrame:
     """Load WBM summary dataframe with model predictions from disk.
 
     Args:
-        models (Sequence[str]): Model names must be keys of the dict
-            matbench_discovery.data.PRED_FILES.
+        models (Sequence[str], optional): Model names must be keys of
+            matbench_discovery.data.PRED_FILES. Defaults to all models.
         pbar (bool, optional): Whether to show progress bar. Defaults to True.
         id_col (str, optional): Column to set as df.index. Defaults to "material_id".
         **kwargs: Keyword arguments passed to glob_to_df().
diff --git a/matbench_discovery/preds.py b/matbench_discovery/preds.py
@@ -10,16 +10,11 @@
 __author__ = "Janosh Riebesell"
 __date__ = "2023-02-04"
 
-models = sorted(
-    "Wrenformer, CGCNN+P, Voronoi Random Forest, MEGNet, M3GNet + MEGNet, "
-    "BOWSR + MEGNet".split(", ")
-)
 e_form_col = "e_form_per_atom_mp2020_corrected"
 each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"
 each_pred_col = "e_above_hull_pred"
 
-df_wbm = load_df_wbm_preds(list(PRED_FILES)).round(3)
-drop_cols = {*PRED_FILES} - {*models}
+df_wbm = load_df_wbm_preds().round(3)
 
 
 df_metrics = pd.DataFrame()
diff --git a/models/bowsr/metadata.yml b/models/bowsr/metadata.yml
@@ -2,6 +2,7 @@ model_name: BOWSR + MEGNet
 model_version: 2022.9.20
 matbench_discovery_version: 1.0
 date_added: "2022-11-17"
+date_published: "2021-04-20"
 authors:
   - name: Yunxing Zuo
     affiliation: UC San Diego
diff --git a/models/cgcnn/metadata.yml b/models/cgcnn/metadata.yml
@@ -2,6 +2,7 @@
   model_version: 0.1.0 # the aviary version
   matbench_discovery_version: 1.0
   date_added: "2022-12-28"
+  date_published: "2017-10-27"
   authors:
     - name: Tian Xie
       email: txie@csail.mit.edu
@@ -34,6 +35,7 @@
   model_version: 0.1.0 # the aviary version
   matbench_discovery_version: 1.0
   date_added: "2023-02-03"
+  date_published: "2022-02-28"
   authors:
     - name: Jason B. Gibson
       affiliation: University of Florida
diff --git a/models/m3gnet/metadata.yml b/models/m3gnet/metadata.yml
@@ -2,6 +2,7 @@
   model_version: 2022.9.20
   matbench_discovery_version: 1.0
   date_added: "2022-09-20"
+  date_published: "2022-02-05"
   authors:
     - name: Chi Chen
       affiliation: UC San Diego
@@ -30,6 +31,7 @@
   model_version: 2022.9.20
   matbench_discovery_version: 1.0
   date_added: "2023-02-03"
+  date_published: "2022-02-05"
   authors:
     - name: Chi Chen
       affiliation: UC San Diego
diff --git a/models/megnet/metadata.yml b/models/megnet/metadata.yml
@@ -2,6 +2,7 @@ model_name: MEGNet
 model_version: 2022.9.20
 matbench_discovery_version: 1.0
 date_added: "2022-11-14"
+date_published: "2021-12-18"
 authors:
   - name: Chi Chen
     affiliation: UC San Diego
diff --git a/models/voronoi/metadata.yml b/models/voronoi/metadata.yml
@@ -2,6 +2,7 @@ model_name: Voronoi Random Forest
 model_version: 1.1.2 # scikit learn version which implements the random forest
 matbench_discovery_version: 1.0
 date_added: "2022-11-26"
+date_published: "2017-07-14"
 authors:
   - name: Logan Ward
     affiliation: Argonne National Laboratory
diff --git a/models/wrenformer/metadata.yml b/models/wrenformer/metadata.yml
@@ -2,6 +2,7 @@ model_name: Wrenformer
 model_version: 0.1.0 # the aviary version
 matbench_discovery_version: 1.0
 date_added: "2022-11-26"
+date_published: "2021-06-21"
 authors:
   - name: Janosh Riebesell
     affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
diff --git a/scripts/scatter_e_above_hull_models.py b/scripts/scatter_e_above_hull_models.py
@@ -33,7 +33,8 @@
 df_melt = df_wbm.melt(
     id_vars=hover_cols,
     var_name=facet_col,
-    value_vars=df_metrics.T.MAE.nsmallest(6).index,
+    # value_vars=df_metrics.T.MAE.nsmallest(6).index,  # top 6 models by MAE
+    value_vars=list(df_metrics),  # all models
     value_name=e_form_pred_col,
 )
 
@@ -112,14 +113,14 @@
     x=each_true_col,
     y=each_pred_col,
     facet_col=facet_col,
-    facet_col_wrap=2,
+    facet_col_wrap=4,
     facet_col_spacing=0.02,
     facet_row_spacing=0.04,
     hover_data=hover_cols,
     hover_name=df_wbm.index.name,
     color=clf_col,
     color_discrete_map=clf_color_map,
-    opacity=0.4,
+    # opacity=0.4,
     range_x=(-xy_max, xy_max),
     range_y=(-xy_max, xy_max),
 )
@@ -131,7 +132,7 @@
 # iterate over subplots and set new title
 for idx, anno in enumerate(fig.layout.annotations, 1):
     traces = [t for t in fig.data if t.xaxis == f"x{idx if idx > 1 else ''}"]
-    assert len(traces) == 4, f"Expected 4 traces, got {len(traces)=}"
+    assert len(traces) in (0, 4), f"Plots be empty or have 4 traces, got {len(traces)=}"
 
     model = anno.text.split("=", 1)[1]
     assert model in df_wbm, f"Unexpected {model=} not in {list(df_wbm)=}"
@@ -182,9 +183,9 @@
 fig.update_xaxes(nticks=5)
 fig.update_yaxes(nticks=5)
 
-# remove legend title and place legend centered above subplots
+# remove legend title and place legend centered above subplots, increase marker size
 fig.layout.legend.update(
-    title="", orientation="h", x=0.5, xanchor="center", y=1.1, yanchor="top"
+    title="", orientation="h", x=0.5, xanchor="center", y=1.2, itemsizing="constant"
 )
 
 # fig.update_layout(yaxis=dict(scaleanchor="x", scaleratio=1))
@@ -211,4 +212,4 @@
 # %%
 img_name = "each-scatter-models"
 save_fig(fig, f"{STATIC}/{img_name}.webp", scale=4, width=600, height=800)
-save_fig(fig, f"{ROOT}/tmp/figures/{img_name}.pdf")
+save_fig(fig, f"{ROOT}/tmp/figures/{img_name}.pdf", width=1200)
diff --git a/site/src/lib/ModelCard.svelte b/site/src/lib/ModelCard.svelte
@@ -43,12 +43,18 @@
   {/each}
 </nav>
 <p>
-  <span
-    ><Icon icon="ion:ios-calendar" inline />
-    {data.date_added}
+  <span title="Date added">
+    <Icon icon="ion:ios-calendar" inline />
+    Added {data.date_added}
   </span>
-  <span
-    ><Icon icon="carbon:version" inline />
+  {#if data.date_published}
+    <span title="Date published">
+      <Icon icon="ri:calendar-check-line" inline />
+      Published {data.date_published}
+    </span>
+  {/if}
+  <span>
+    <Icon icon="carbon:version" inline />
     Benchmark version: {data.matbench_discovery_version}
   </span>
   <span>
@@ -176,9 +182,9 @@
     place-items: center;
   }
   p {
-    display: flex;
-    gap: 3pt 12pt;
-    flex-wrap: wrap;
+    display: grid;
+    gap: 3pt;
+    grid-template-columns: 1fr 1fr;
   }
   div {
     display: grid;
diff --git a/site/src/lib/index.ts b/site/src/lib/index.ts
@@ -11,6 +11,7 @@ export type ModelMetadata = {
   model_version: string
   matbench_discovery_version: string
   date_added: string
+  date_published?: string
   authors: Author[]
   repo: string
   url?: string
diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -16,7 +16,7 @@
 )
 
 AxLine = Literal["x", "y", "xy", ""]
-models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"]
+models = ["MEGNet", "CGCNN", "Voronoi Random Forest"]
 df_wbm = load_df_wbm_preds(models, nrows=100)
 each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"
 each_pred_col = "e_above_hull_pred"

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@`
`16`	`16`	`)`
`17`	`17`
`18`	`18`	`AxLine = Literal["x", "y", "xy", ""]`
`19`		`-models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"]`
	`19`	`+models = ["MEGNet", "CGCNN", "Voronoi Random Forest"]`
`20`	`20`	`df_wbm = load_df_wbm_preds(models, nrows=100)`
`21`	`21`	`each_true_col = "e_above_hull_mp2020_corrected_ppd_mp"`
`22`	`22`	`each_pred_col = "e_above_hull_pred"`