SSoT for projects URLs in site/package.json

janosh · janosh · commit a4b37a352fe4 · 2023-06-19T20:29:23.000-07:00
add site/src/routes/models/analyze_preds.py to display missing preds in ModelCard
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -4,7 +4,7 @@ on:
   pull_request:
     branches: [main]
   push:
-    branches: [main, site]
+    branches: [main]
   workflow_dispatch:
 
 # set permissions of GITHUB_TOKEN to allow deployment to GitHub Pages
diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py
@@ -1,5 +1,6 @@
 """Global variables used all across the matbench_discovery package."""
 
+import json
 import os
 import sys
 from datetime import datetime
@@ -18,3 +19,10 @@
 
 timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
 today = timestamp.split("@")[0]
+
+# load URLs from package.json
+
+with open(f"{ROOT}/site/package.json") as file:
+    pkg = json.load(file)
+    pypi_keys_to_npm = dict(Docs="homepage", Repo="repository", Package="package")
+    URLs = {key: pkg[val] for key, val in pypi_keys_to_npm.items()}
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -60,7 +60,7 @@ def load_train_test(
     Recognized data keys are mp-computed-structure-entries, mp-elemental-ref-energies,
     mp-energies, mp-patched-phase-diagram, wbm-computed-structure-entries,
     wbm-initial-structures, wbm-summary. See
-    https://matbench-discovery.janosh.dev/how-to-use for brief data descriptions.
+    https://janosh.github.io/matbench-discovery/how-to-contribute for brief data descriptions.
 
     Args:
         data_names (str | list[str], optional): Which parts of the MP/WBM dataset to load.
@@ -140,11 +140,11 @@ def load_train_test(
 
 PRED_FILENAMES = {
     "CGCNN": "cgcnn/2022-11-23-test-cgcnn-wbm-IS2RE/cgcnn-ensemble-preds.csv",
-    "Voronoi RF": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv",
+    "Voronoi Random Forest": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv",
     "Wrenformer": "wrenformer/2022-11-15-wrenformer-IS2RE-preds.csv",
     "MEGNet": "megnet/2022-11-18-megnet-wbm-IS2RE/megnet-e-form-preds.csv",
     "M3GNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv",
-    "BOWSR MEGNet": "bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.csv",
+    "BOWSR MEGNet": "bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.csv",
 }
 
 
diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -612,7 +612,7 @@ def cumulative_precision_recall(
             # requires fixing index mismatch due to df sub-sampling above
             # customdata=dict(
             #     df_cum.reset_index()
-            #     .pivot(index="index", columns="metric")["Voronoi RF above hull pred"]
+            #     .pivot(index="index", columns="metric")
             #     .items()
             # ),
             **kwargs,
diff --git a/models/bowsr/join_bowsr_results.py b/models/bowsr/join_bowsr_results.py
@@ -57,6 +57,10 @@
 )
 
 
+# %% remove redundant column after sanity check
+df_bowsr = df_bowsr.drop(columns=[f"energy_bowsr_{energy_model}"])
+
+
 # %%
 pymatviz.density_scatter(
     x=df_bowsr.e_form_per_atom_bowsr_megnet,
@@ -71,5 +75,5 @@
 # save energy and formation energy as CSV for fast loading
 df_bowsr.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv"))
 
-# in_path = f"{ROOT}/models/bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.json.gz"
+in_path = f"{ROOT}/models/bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.json.gz"
 # df_bowsr = pd.read_json(in_path).set_index("material_id")
diff --git a/readme.md b/readme.md
@@ -13,7 +13,7 @@ Matbench Discovery
 
 </h4>
 
-Matbench Discovery is an [interactive leaderboard](https://matbench-discovery.janosh.dev/figures) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations.
+Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matbench-discovery) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations.
 
 We welcome contributions that add new models to the leaderboard through [GitHub PRs](https://github.com/janosh/matbench-discovery/pulls). See the [usage and contributing guide](https://janosh.github.io/matbench-discovery/how-to-contribute) for details.
 
diff --git a/scripts/cumulative_clf_metrics.py b/scripts/cumulative_clf_metrics.py
@@ -13,7 +13,7 @@
 # %%
 models = (
     # Wren, CGCNN IS2RE, CGCNN RS2RE, CGCNN
-    "Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
+    "Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
 ).split(", ")
 
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
diff --git a/scripts/hist_classified_stable_vs_hull_dist_models.py b/scripts/hist_classified_stable_vs_hull_dist_models.py
@@ -23,7 +23,7 @@
 
 # %%
 models = sorted(
-    "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+    "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
 )
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
 
diff --git a/scripts/metrics_table.py b/scripts/metrics_table.py
@@ -26,7 +26,7 @@
             display_name={"$regex": "cgcnn-robust-formation_energy_per_atom"},
         ),
     ),
-    "Voronoi RF": dict(
+    "Voronoi Random Forest": dict(
         n_runs=68,
         filters=dict(
             created_at={"$gt": "2022-11-17", "$lt": "2022-11-28"},
@@ -100,7 +100,7 @@
 
 
 # on 2022-11-28:
-# run_times = {'Voronoi RF': 739608,
+# run_times = {'Voronoi Random Forest': 739608,
 #  'Wrenformer': 208399,
 #  'MEGNet': 12396,
 #  'M3GNet': 301138,
diff --git a/scripts/rolling_mae_vs_hull_dist_all_models.py b/scripts/rolling_mae_vs_hull_dist_all_models.py
@@ -11,7 +11,7 @@
 
 # %%
 models = sorted(
-    "Wrenformer, CGCNN, Voronoi RF, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+    "Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
 )
 e_form_col = "e_form_per_atom_mp2020_corrected"
 e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
diff --git a/scripts/scatter_e_above_hull_models.py b/scripts/scatter_e_above_hull_models.py
@@ -16,7 +16,7 @@
 # %%
 print(f"loadable models: {list(PRED_FILENAMES)}")
 models = sorted(
-    "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
+    "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
 )
 df_wbm = load_df_wbm_with_preds(models=models).round(3)
 
diff --git a/site/package.json b/site/package.json
@@ -4,6 +4,7 @@
   "author": "Janosh Riebesell <janosh.riebesell@gmail.com>",
   "homepage": "https://janosh.github.io/matbench-discovery",
   "repository": "https://github.com/janosh/matbench-discovery",
+  "package": "https://pypi.org/project/matbench-discovery",
   "license": "MIT",
   "type": "module",
   "bugs": "https://github.com/janosh/matbench-discovery/issues",
diff --git a/site/src/app.css b/site/src/app.css
@@ -167,3 +167,7 @@ caption {
 :target {
   animation: highlight-scroll-target 3s;
 }
+
+.pull-left {
+  margin-left: calc(0.8 * (-50vw + 50cqw));
+}
diff --git a/site/src/lib/ModelCard.svelte b/site/src/lib/ModelCard.svelte
@@ -1,12 +1,14 @@
 <script lang="ts">
   import { repository } from '$site/package.json'
   import Icon from '@iconify/svelte'
+  import { pretty_num } from 'sveriodic-table/labels'
   import type { ModelMetadata } from './types'
 
   export let key: string
   export let data: ModelMetadata
 
   const { model_name, repo, doi, preprint, url, date_added } = data
+  const { missing_preds, test_set_size } = data
 </script>
 
 <h2>{model_name}</h2>
@@ -21,45 +23,55 @@
 <p>
   Date added: {new Date(date_added).toISOString().split(`T`)[0]}
   &nbsp;&bull;&nbsp; Benchmark version: {data.matbench_discovery_version}
+  &nbsp;&bull;&nbsp; Missing predictions:
+  {pretty_num(missing_preds)}
+  <small>({((100 * missing_preds) / test_set_size).toFixed(2)}%)</small>
 </p>
-<strong>Authors</strong>
-<section>
-  <ul>
-    {#each data.authors as { name, email, orcid, affiliation, url }}
-      <li>
-        <span title={affiliation}>{name}</span>
-        {#if email}
-          [<a href="mailto:{email}">email</a>]
-        {/if}
-        {#if orcid}
-          [<a href={orcid}>Orcid</a>]
-        {/if}
-        {#if url}
-          [<a href={url}>web</a>]
-        {/if}
-      </li>
-    {/each}
-  </ul>
-  <strong>Package versions</strong>
-  <ul>
-    {#each Object.entries(data.requirements) as [name, version]}
-      <li>
-        {#if ![`aviary`].includes(name)}
-          {@const href = `https://pypi.org/project/${name}/${version}`}
-          {name}: <a {href}>{version}</a>
-        {:else}
-          {name}: {version}
-        {/if}
-      </li>
-    {/each}
-  </ul>
-</section>
+<div>
+  <section>
+    <h3>Authors</h3>
+    <ul>
+      {#each data.authors as { name, email, orcid, affiliation, url }}
+        <li>
+          <span title={affiliation}>{name}</span>
+          {#if email}
+            [<a href="mailto:{email}">email</a>]
+          {/if}
+          {#if orcid}
+            [<a href={orcid}>Orcid</a>]
+          {/if}
+          {#if url}
+            [<a href={url}>web</a>]
+          {/if}
+        </li>
+      {/each}
+    </ul>
+  </section>
+  <section>
+    <h3>Package versions</h3>
+    <ul>
+      {#each Object.entries(data.requirements) as [name, version]}
+        <li>
+          {#if ![`aviary`].includes(name)}
+            {@const href = `https://pypi.org/project/${name}/${version}`}
+            {name}: <a {href}>{version}</a>
+          {:else}
+            {name}: {version}
+          {/if}
+        </li>
+      {/each}
+    </ul>
+  </section>
+</div>
 
 <!-- TODO add table with performance metrics (F1, Acc, Recall, Precision) for each model -->
 <style>
   h2 {
     margin: 5pt 0 1ex;
   }
+  h3 {
+    margin: 0;
+  }
   ul {
     list-style-type: disc;
   }
@@ -71,11 +83,16 @@
   }
   nav > span {
     display: flex;
-    gap: 0.5em;
+    gap: 6pt;
     place-items: center;
   }
-  strong {
-    display: block;
-    margin: 1em 0 5pt;
+  div {
+    display: flex;
+    gap: 15pt;
+    margin: 1em 0;
+    justify-content: space-between;
+  }
+  small {
+    font-weight: lighter;
   }
 </style>
diff --git a/site/src/routes/models/+page.server.ts b/site/src/routes/models/+page.server.ts
@@ -1,13 +1,19 @@
 import type { ModelMetadata } from '$lib/types'
 import { dirname } from 'path'
 import type { PageServerLoad } from './$types'
+import analysis from './2023-01-23-pred-analysis.json'
 
 export const load: PageServerLoad = async () => {
-  const models: [string, ModelMetadata][] = Object.entries(
-    import.meta.glob(`$root/models/**/metadata.yml`, {
-      eager: true,
-    })
-  ).map(([key, module]) => [dirname(key), module.default])
+  const yml = import.meta.glob(`$root/models/**/metadata.yml`, {
+    eager: true,
+  })
+  const models: [string, ModelMetadata][] = Object.entries(yml).map(
+    ([key, module]) => {
+      const metadata = module.default as ModelMetadata
+      const computed = analysis[metadata.model_name] ?? {}
+      return [dirname(key), { ...metadata, ...computed }]
+    }
+  )
 
   return { models }
 }
diff --git a/site/src/routes/models/+page.svelte b/site/src/routes/models/+page.svelte
@@ -5,10 +5,10 @@
   export let data: PageData
 </script>
 
-<h1>Models</h1>
+<h1 class="pull-left">Models</h1>
 
-<ol>
-  {#each data.models as [key, metadata], idx}
+<ol class="pull-left">
+  {#each data.models as [key, metadata]}
     <li>
       <ModelCard {key} data={metadata} />
     </li>
@@ -20,9 +20,11 @@
     display: grid;
     gap: 2em;
     list-style: none;
+    grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
   }
   ol > li {
     background-color: rgba(255, 255, 255, 0.05);
-    padding: 3pt 9pt 5pt;
+    padding: 3pt 10pt 7pt;
+    border-radius: 3pt;
   }
 </style>
diff --git a/site/src/routes/paper/+page.svx b/site/src/routes/paper/+page.svx
@@ -49,8 +49,6 @@ date: Jan 31, 2023
   import { References } from '$lib'
   import './heading-number.css' // CSS to auto-number headings
   import CumulativeClfMetrics from '$figs/2023-01-19-cumulative-clf-metrics.svelte'
-
-  const style = "margin-left: calc(0.8 * (-50vw + 50cqw));"
 </script>
 
 # {title}<br><small>{subtitle}</small>
@@ -193,7 +191,7 @@ Our benchmark is designed to make [adding future models easy](/how-to-contribute
 
 <div>
 {#if typeof document !== `undefined`}
-  <CumulativeClfMetrics {style} />
+  <CumulativeClfMetrics class="pull-left" />
 {/if}
 </div>
 
diff --git a/tests/test_data.py b/tests/test_data.py
@@ -111,8 +111,11 @@ def test_load_train_test_doc_str() -> None:
     for name in DATA_FILENAMES:
         assert name in doc_str, f"Missing data {name=} in load_train_test() docstring"
 
-    # TODO refactor to load site URL from site/package.json for SSoT
-    assert "https://matbench-discovery.janosh.dev" in doc_str
+    route = "/how-to-contribute"
+    from matbench_discovery import URLs
+
+    assert f"{URLs['Docs']}{route}" in doc_str
+    assert os.path.isdir(f"{ROOT}/site/src/routes/{route}")
 
 
 @pytest.mark.skipif(website_down, reason=f"{RAW_REPO_URL} unreachable")
diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -15,7 +15,7 @@
     rolling_mae_vs_hull_dist,
 )
 
-models = ["Wrenformer", "CGCNN", "Voronoi RF"]
+models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"]
 df_wbm = load_df_wbm_with_preds(models=models, nrows=100)
 e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
 e_form_col = "e_form_per_atom_mp2020_corrected"

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`
`24`	`24`	`# %%`
`25`	`25`	`models = sorted(`
`26`		`- "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
	`26`	`+ "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
`27`	`27`	`)`
`28`	`28`	`df_wbm = load_df_wbm_with_preds(models=models).round(3)`
`29`	`29`
Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@`
`11`	`11`
`12`	`12`	`# %%`
`13`	`13`	`models = sorted(`
`14`		`- "Wrenformer, CGCNN, Voronoi RF, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
	`14`	`+ "Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
`15`	`15`	`)`
`16`	`16`	`e_form_col = "e_form_per_atom_mp2020_corrected"`
`17`	`17`	`e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@`
`16`	`16`	`# %%`
`17`	`17`	`print(f"loadable models: {list(PRED_FILENAMES)}")`
`18`	`18`	`models = sorted(`
`19`		`- "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
	`19`	`+ "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")`
`20`	`20`	`)`
`21`	`21`	`df_wbm = load_df_wbm_with_preds(models=models).round(3)`
`22`	`22`
Original file line number	Diff line number	Diff line change
`@@ -167,3 +167,7 @@ caption {`
`167`	`167`	`:target {`
`168`	`168`	`animation: highlight-scroll-target 3s;`
`169`	`169`	`}`
	`170`	`+`
	`171`	`+.pull-left {`
	`172`	`+ margin-left: calc(0.8 * (-50vw + 50cqw));`
	`173`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@`
`15`	`15`	`rolling_mae_vs_hull_dist,`
`16`	`16`	`)`
`17`	`17`
`18`		`-models = ["Wrenformer", "CGCNN", "Voronoi RF"]`
	`18`	`+models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"]`
`19`	`19`	`df_wbm = load_df_wbm_with_preds(models=models, nrows=100)`
`20`	`20`	`e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"`
`21`	`21`	`e_form_col = "e_form_per_atom_mp2020_corrected"`