janosh
diff --git a/‎data/wbm/eda.py
+4-4 b/‎data/wbm/eda.py
+4-4
diff --git a/‎scripts/model_figs/per_element_errors.py
+1-1 b/‎scripts/model_figs/per_element_errors.py
+1-1
diff --git a/‎site/src/lib/Nav.svelte
+3-2 b/‎site/src/lib/Nav.svelte
+3-2
diff --git a/‎site/src/routes/+layout.svelte
+3-7 b/‎site/src/routes/+layout.svelte
+3-7
diff --git a/‎site/src/routes/about-the-data/+page.svelte ‎site/src/routes/data/+page.svelte b/‎site/src/routes/about-the-data/+page.svelte ‎site/src/routes/data/+page.svelte
diff --git a/‎site/src/routes/about-the-data/mp-element-counts-composition.json ‎site/src/routes/data/mp-element-counts-composition.json b/‎site/src/routes/about-the-data/mp-element-counts-composition.json ‎site/src/routes/data/mp-element-counts-composition.json
diff --git a/‎site/src/routes/about-the-data/mp-element-counts-occurrence.json ‎site/src/routes/data/mp-element-counts-occurrence.json b/‎site/src/routes/about-the-data/mp-element-counts-occurrence.json ‎site/src/routes/data/mp-element-counts-occurrence.json
diff --git a/‎site/src/routes/about-the-data/tmi/+page.svelte ‎site/src/routes/data/tmi/+page.svelte b/‎site/src/routes/about-the-data/tmi/+page.svelte ‎site/src/routes/data/tmi/+page.svelte
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-arity=2.json ‎site/src/routes/data/wbm-element-counts-arity=2.json b/‎site/src/routes/about-the-data/wbm-element-counts-arity=2.json ‎site/src/routes/data/wbm-element-counts-arity=2.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-arity=3.json ‎site/src/routes/data/wbm-element-counts-arity=3.json b/‎site/src/routes/about-the-data/wbm-element-counts-arity=3.json ‎site/src/routes/data/wbm-element-counts-arity=3.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-arity=4.json ‎site/src/routes/data/wbm-element-counts-arity=4.json b/‎site/src/routes/about-the-data/wbm-element-counts-arity=4.json ‎site/src/routes/data/wbm-element-counts-arity=4.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-arity=5.json ‎site/src/routes/data/wbm-element-counts-arity=5.json b/‎site/src/routes/about-the-data/wbm-element-counts-arity=5.json ‎site/src/routes/data/wbm-element-counts-arity=5.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-batch=1.json ‎site/src/routes/data/wbm-element-counts-batch=1.json b/‎site/src/routes/about-the-data/wbm-element-counts-batch=1.json ‎site/src/routes/data/wbm-element-counts-batch=1.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-batch=2.json ‎site/src/routes/data/wbm-element-counts-batch=2.json b/‎site/src/routes/about-the-data/wbm-element-counts-batch=2.json ‎site/src/routes/data/wbm-element-counts-batch=2.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-batch=3.json ‎site/src/routes/data/wbm-element-counts-batch=3.json b/‎site/src/routes/about-the-data/wbm-element-counts-batch=3.json ‎site/src/routes/data/wbm-element-counts-batch=3.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-batch=4.json ‎site/src/routes/data/wbm-element-counts-batch=4.json b/‎site/src/routes/about-the-data/wbm-element-counts-batch=4.json ‎site/src/routes/data/wbm-element-counts-batch=4.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-batch=5.json ‎site/src/routes/data/wbm-element-counts-batch=5.json b/‎site/src/routes/about-the-data/wbm-element-counts-batch=5.json ‎site/src/routes/data/wbm-element-counts-batch=5.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-composition.json ‎site/src/routes/data/wbm-element-counts-composition.json b/‎site/src/routes/about-the-data/wbm-element-counts-composition.json ‎site/src/routes/data/wbm-element-counts-composition.json
diff --git a/‎site/src/routes/about-the-data/wbm-element-counts-occurrence.json ‎site/src/routes/data/wbm-element-counts-occurrence.json b/‎site/src/routes/about-the-data/wbm-element-counts-occurrence.json ‎site/src/routes/data/wbm-element-counts-occurrence.json
diff --git a/‎site/src/routes/preprint/+page.md
+2-2 b/‎site/src/routes/preprint/+page.md
+2-2
@@ -28,7 +28,7 @@
 """
 
 module_dir = os.path.dirname(__file__)
-about_data_page = f"{ROOT}/site/src/routes/about-the-data"
+data_page = f"{ROOT}/site/src/routes/data"
 
 
 # %% load MP training set
@@ -54,15 +54,15 @@
 
 # %%
 for dataset, count_mode, elem_counts in all_counts:
-    elem_counts.to_json(f"{about_data_page}/{dataset}-element-counts-{count_mode}.json")
+    elem_counts.to_json(f"{data_page}/{dataset}-element-counts-{count_mode}.json")
 
 
 # %% export element counts by WBM step to JSON
 df_wbm["step"] = df_wbm.index.str.split("-").str[1].astype(int)
 assert df_wbm.step.between(1, 5).all()
 for batch in range(1, 6):
     count_elements(df_wbm[df_wbm.step == batch].formula).to_json(
-        f"{about_data_page}/wbm-element-counts-{batch=}.json"
+        f"{data_page}/wbm-element-counts-{batch=}.json"
     )
 
 # export element counts by arity (how many elements in the formula)
@@ -71,7 +71,7 @@
 
 for arity, df_mp in df_wbm.groupby(df_wbm[comp_col].map(len)):
     count_elements(df_mp.formula).to_json(
-        f"{about_data_page}/wbm-element-counts-{arity=}.json"
+        f"{data_page}/wbm-element-counts-{arity=}.json"
     )
 
 
 
@@ -50,7 +50,7 @@
 # %% compute number of samples per element in training set
 # counting element occurrences not weighted by composition, assuming model don't learn
 # much more about iron and oxygen from Fe2O3 than from FeO
-counts_path = f"{ROOT}/site/src/routes/about-the-data/mp-element-counts-occurrence.json"
+counts_path = f"{ROOT}/site/src/routes/data/mp-element-counts-occurrence.json"
 df_elem_err = pd.read_json(counts_path, typ="series")
 train_count_col = "MP Occurrences"
 df_elem_err = df_elem_err.reset_index(name=train_count_col).set_index("index")
 
@@ -12,9 +12,10 @@
 </script>
 
 <nav {style}>
-  {#each routes as href, idx}
+  {#each routes as route, idx}
+    {@const [title, href] = Array.isArray(route) ? route : [route, route]}
     {#if idx > 0}<strong>&bull;</strong>{/if}
-    <a {href} aria-current={is_current(href)} class="link">{href}</a>
+    <a {href} aria-current={is_current(href)} class="link">{title}</a>
   {/each}
 </nav>
 
 
@@ -19,8 +19,8 @@
 
   $: description = {
     '/': `Benchmarking machine learning energy models for materials discovery.`,
-    '/about-the-data': `Details about provenance, chemistry and energies in the benchmark's train and test set.`,
-    '/about-the-data/tmi': `Too much information on the benchmark's data.`,
+    '/data': `Details about provenance, chemistry and energies in the benchmark's train and test set.`,
+    '/data/tmi': `Too much information on the benchmark's data.`,
     '/api': `API docs for the Matbench Discovery PyPI package.`,
     '/contribute': `Steps for contributing a new model to the benchmark.`,
     '/models': `Details on each model sortable by metrics.`,
@@ -70,14 +70,10 @@
 
 <Toc {headingSelector} breakpoint={1250} minItems={3} />
 
-{#if url !== `/`}
-  <a href="/" aria-label="Back to index page">&laquo; home</a>
-{/if}
-
 <GitHubCorner href={repository} />
 
 <main>
-  <Nav routes={routes.filter((route) => route != `/changelog`)} />
+  <Nav routes={[[`/about`, `/`], ...routes.filter((route) => route != `/changelog`)]} />
 
   <slot />
 
 
@@ -235,7 +235,7 @@ The results for M3GNet and MACE depart from the trend that F1 is rank-correlated
 Of all models, M3GNet achieves the highest true positive rate (TPR) but an unusually low true negative rate (TNR).
 A similar trend is seen for MACE. @fig:rolling-mae-vs-hull-dist-models provides a visual understanding of this observation.
 M3GNet and MACE have the lowest rolling mean of the absolute errors (rolling MAE) as a function of hull distance for materials above the convex hull (see right half of plot) but incur comparably large errors for materials below the hull (left half of plot).
-Since $\text{TPR} = \frac{\text{TN}}{\text{TN} + \text{FP}}$, lower error for energies above the hull increases both TN and decreases FP, resulting in the high TPR values observed.
+Since $\text{TPR} = \frac{\text{TP}}{\text{TP} + \text{FN}}$, lower error for energies above the hull increases both TN and decreases FP, resulting in the high TPR values observed.
 
 The reason CGCNN+P achieves better regression metrics than CGCNN but is still worse as a classifier becomes apparent from @fig:hist-clf-pred-hull-dist-models by noting that the CGCNN+P histogram is more sharply peaked at the 0 hull distance stability threshold.
 This causes even small errors in the predicted convex hull distance to be large enough to invert a classification.
@@ -449,7 +449,7 @@ BOWSR has the largest median error, while Voronoi RF has the largest IQR. Note t
 
 > @label:fig:hist-clf-pred-hull-dist-models Distribution of model-predicted hull distance colored by stability classification. Models are sorted from top to bottom by F1 score. The thickness of the red and yellow bands shows how often models misclassify as a function of how far away from the convex hull they place a material. While CHGNet and M3GNet perform almost equally well overall, these plots reveal that they do so via different trade-offs. M3GNet commits fewer false negatives but more false positives predictions compared to CHGNet. In a real discovery campaign, false positives have a higher opportunity cost than false negatives since they result in wasted DFT relaxations or even synthesis time in the lab. A false negative by contrast is just one missed opportunity out of many. This observation is also reflected in the higher TPR and lower TNR of M3GNet vs CHGNet in @fig:metrics-table, as well as the lower rolling MAE for CHGNet vs M3GNet on the stable side (left half) of @fig:rolling-mae-vs-hull-dist-models and vice-versa on the unstable side (right half).
 
-Note the CGCNN+P histogram is more strongly peaked than CGCNN's which agrees better with the actual DFT ground truth [distribution of hull distances](/about-the-data#--target-distribution) in our test set. This explains why CGCNN+P performs better as a regressor, but also reveals how it can perform simultaneously worse as a classifier. By moving predictions closer to the stability threshold at 0 eV/atom above the hull, even small errors are significant enough to tip a classification over the threshold.
+Note the CGCNN+P histogram is more strongly peaked than CGCNN's which agrees better with the actual DFT ground truth [distribution of hull distances](/data#--target-distribution) in our test set. This explains why CGCNN+P performs better as a regressor, but also reveals how it can perform simultaneously worse as a classifier. By moving predictions closer to the stability threshold at 0 eV/atom above the hull, even small errors are significant enough to tip a classification over the threshold.
 
 ## Measuring extrapolation performance from WBM batch robustness