Skip to content

Commit 387722a

Browse files
committed
fix voronoi RF metadata.yml
write custom svelte preprocessor to turn markdown citation syntax into HTML anchors add References.svelte
1 parent 4ae422d commit 387722a

File tree

10 files changed

+108
-64
lines changed

10 files changed

+108
-64
lines changed

data/mp/get_mp_energies.py

+26-8
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from aviary.utils import as_dict_handler
66
from aviary.wren.utils import get_aflow_label_from_spglib
77
from mp_api.client import MPRester
8-
from pymatviz import density_scatter
8+
from pymatviz.utils import annotate_mae_r2
99
from tqdm import tqdm
1010

1111
from matbench_discovery import today
@@ -57,9 +57,12 @@
5757

5858
df["wyckoff_spglib"] = [get_aflow_label_from_spglib(x) for x in tqdm(df.structure)]
5959

60-
df.to_json(f"{module_dir}/{today}-mp-energies.json.gz", default_handler=as_dict_handler)
60+
df.reset_index().to_json(
61+
f"{module_dir}/{today}-mp-energies.json.gz", default_handler=as_dict_handler
62+
)
6163

6264
# df = pd.read_json(f"{module_dir}/2022-08-13-mp-energies.json.gz")
65+
# df = pd.read_json(f"{module_dir}/2023-01-10-mp-energies.json.gz")
6366

6467

6568
# %% reproduce fig. 1b from https://arxiv.org/abs/2001.10591 (as data consistency check)
@@ -70,15 +73,30 @@
7073
backend="matplotlib",
7174
xlim=[-5, 1],
7275
ylim=[-1, 1],
73-
color=df.decomposition_enthalpy.map(lambda x: "red" if x > 0 else "blue"),
76+
color=(df.decomposition_enthalpy > 0).map({True: "red", False: "blue"}),
7477
title=f"{today} - {len(df):,} MP entries",
7578
)
79+
80+
annotate_mae_r2(df.formation_energy_per_atom, df.decomposition_enthalpy)
7681
# result on 2023-01-10: plots match. no correlation between formation energy and decomposition
7782
# enthalpy. R^2 = -1.571, MAE = 1.604
78-
ax.figure.savefig(f"{module_dir}/{today}-mp-decomp-enth-vs-e-form.png", dpi=300)
83+
# ax.figure.savefig(f"{module_dir}/{today}-mp-decomp-enth-vs-e-form.png", dpi=300)
84+
7985

80-
ax = density_scatter(
81-
df.formation_energy_per_atom,
82-
df.decomposition_enthalpy,
86+
# %% scatter plot energy above convex hull vs decomposition enthalpy
87+
# https://berkeleytheory.slack.com/archives/C16RE1TUN/p1673887564955539
88+
mask_above_line = df.energy_above_hull - df.decomposition_enthalpy.clip(0) > 0.1
89+
ax = df.plot.scatter(
90+
x="decomposition_enthalpy",
91+
y="energy_above_hull",
92+
color=mask_above_line.map({True: "red", False: "blue"})
93+
# backend="plotly",
94+
# hover_data=["index", "formula_pretty", "formation_energy_per_atom"],
95+
)
96+
# most points lie on line y=x for x > 0 and y = 0 for x < 0.
97+
n_above_line = sum(mask_above_line)
98+
ax.set(
99+
title=f"{n_above_line:,} / {len(df):,} = {n_above_line/len(df):.1%} "
100+
"MP materials with\nenergy_above_hull - decomposition_enthalpy.clip(0) > 0.1"
83101
)
84-
ax.set(xlim=[-5, 1], ylim=[-1, 1])
102+
# ax.figure.savefig(f"{module_dir}/{today}-mp-e-above-hull-vs-decomp-enth.png", dpi=300)

data/wbm/readme.md

+6-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ Since repeated substitutions should - on average - increase chemical dissimilari
1010

1111
## 🆔 &thinsp; About the IDs
1212

13-
The first integer in each material ID ranging from 1 to 5 and coming right after the prefix `wbm-` indicates the substitution step, i.e. in which iteration of the substitution process was this material generated. Each iteration has varying numbers of materials which are counted by the 2nd integer. Note this 2nd number is not always consecutive. A small number of materials (~0.2%) were removed by the data processing steps detailed below. Don't be surprised to find an ID like `wbm-3-70804` followed by `wbm-3-70807`.
13+
The first integer in each material ID ranging from 1 to 5 and coming right after the prefix `wbm-` indicates the substitution count, i.e. how many times one element was replaced with another chemically similar one starting from an MP source structure. Intuitively, the higher this number the more chance we've had to diffuse away from the MP training set into uncharted regions of materials space. On average, we expect model performance to decrease with substitution step count as structures further from the training set require more extrapolation.
14+
15+
Each iteration has varying numbers of materials which are counted by the 2nd integer. Note this 2nd number is not always consecutive. A small number of materials (~0.2%) were removed by the data-cleaning steps detailed below. Don't be surprised to find an ID like `wbm-3-70804` followed by `wbm-3-70807`.
1416

1517
## 🪓 &thinsp; Data processing steps
1618

@@ -75,11 +77,11 @@ materialscloud:2021.68 includes a readme file with a description of the dataset,
7577
<slot name="wbm-elements-heatmap">
7678
<img src="./2023-01-08-wbm-elements.svg" alt="Periodic table log heatmap of WBM elements">
7779
</slot>
78-
<caption>Heatmap of WBM training set element counts</caption>
80+
<caption>Test set element counts consisting of 256,963 WBM <code>ComputedStructureEntries</code></caption>
7981

80-
which compares as follows to the training set (all 146323 MP ComputedStructureEntries)
82+
By comparison, the training set of MP ComputedStructureEntries has this element distribution.
8183

8284
<slot name="mp-elements-heatmap">
8385
<img src="./2023-01-08-mp-elements.svg" alt="Periodic table log heatmap of MP elements">
8486
</slot>
85-
<caption>Heatmap of MP test set element counts</caption>
87+
<caption>Training set element counts consisting of 146,323 MP <code>ComputedStructureEntries</code></caption>

models/voronoi/metadata.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,17 @@ model_version: 1.1.2 # scikit learn version which implements the random forest
33
matbench_discovery_version: 1.0
44
date_added: 2022-11-26
55
authors:
6-
- name: Rhys Goodall
7-
affiliation: University of Cambridge
8-
orcid: https://orcid.org/0000-0002-6589-1700
9-
- name: Janosh Riebesell
10-
affiliation: University of Cambridge, Lawrence Berkeley National Laboratory
11-
12-
orcid: https://orcid.org/0000-0001-5233-3462
6+
- name: Logan Ward
7+
affiliation: Argonne National Laboratory
8+
9+
orcid: https://orcid.org/0000-0002-1323-5939
10+
twitter: WardLT2
11+
- name: Chris Wolverton
12+
affiliation: Northwestern University
13+
14+
orcid: https://orcid.org/0000-0003-2248-474X
1315
repo: https://github.com/janosh/matbench-discovery
14-
doi: https://doi.org/10.1126/sciadv.abn4117
16+
doi: https://doi.org/10.1103/PhysRevB.96.024104
1517
preprint: https://arxiv.org/abs/2106.11132
1618
requirements:
1719
matminer: 0.8.0

site/package.json

+6-6
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,27 @@
1818
"devDependencies": {
1919
"@iconify/svelte": "^3.0.1",
2020
"@rollup/plugin-yaml": "^4.0.1",
21-
"@sveltejs/adapter-static": "^1.0.1",
22-
"@sveltejs/kit": "^1.0.11",
21+
"@sveltejs/adapter-static": "^1.0.3",
22+
"@sveltejs/kit": "^1.1.1",
2323
"@sveltejs/vite-plugin-svelte": "^2.0.2",
2424
"@typescript-eslint/eslint-plugin": "^5.48.1",
2525
"@typescript-eslint/parser": "^5.48.1",
26-
"eslint": "^8.31.0",
26+
"eslint": "^8.32.0",
2727
"eslint-plugin-svelte3": "^4.0.0",
2828
"hastscript": "^7.2.0",
2929
"katex": "^0.16.4",
3030
"mdsvex": "^0.10.6",
31-
"prettier": "^2.8.2",
31+
"prettier": "^2.8.3",
3232
"prettier-plugin-svelte": "^2.9.0",
3333
"rehype-autolink-headings": "^6.1.1",
3434
"rehype-katex-svelte": "^1.1.2",
3535
"rehype-slug": "^5.1.0",
36-
"remark-math": "3.0.0",
36+
"remark-math": "5.1.1",
3737
"svelte": "^3.55.1",
3838
"svelte-check": "^3.0.2",
3939
"svelte-preprocess": "^5.0.0",
4040
"svelte-toc": "^0.5.2",
41-
"svelte-zoo": "^0.2.0",
41+
"svelte-zoo": "^0.2.1",
4242
"svelte2tsx": "^0.6.0",
4343
"sveriodic-table": "^0.1.4",
4444
"tslib": "^2.4.1",

site/src/app.css

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
--toc-min-width: 16em;
1313
--toc-active-bg: darkcyan;
1414

15-
--ghc-color: var(--night);
16-
--ghc-bg: white;
15+
--zoo-github-corner-color: var(--night);
16+
--zoo-github-corner-bg: white;
1717
}
1818
body {
1919
background: var(--night);

site/src/lib/References.svelte

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<script lang="ts">
2+
import type { Reference } from './types'
3+
4+
export let references: Reference[]
5+
</script>
6+
7+
<ol>
8+
{#each references as { title, id, author, DOI, URL, issued }, idx}
9+
<li>
10+
<strong {id}>{title}</strong>
11+
<p>
12+
{@html author.map((a) => `${a.given} ${a.family}`).join(`, &thinsp; `)}
13+
</p>
14+
<p>
15+
{#if DOI}
16+
DOI: <a href="https://doi.org/{DOI}">{DOI}</a>
17+
{:else if URL}
18+
preprint: <a href={URL}>{URL}</a>
19+
{/if}
20+
{#if issued}
21+
- {issued[0].year}
22+
{/if}
23+
</p>
24+
</li>
25+
{/each}
26+
</ol>
27+
28+
<style>
29+
ol > li {
30+
margin: 1ex 0;
31+
}
32+
ol > li > p {
33+
margin: 0;
34+
}
35+
</style>

site/src/lib/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export { default as Footer } from './Footer.svelte'
22
export { default as ModelCard } from './ModelCard.svelte'
33
export { default as Nav } from './Nav.svelte'
4+
export { default as References } from './References.svelte'

site/src/routes/paper/+page.svx

+2-26
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ geometry: margin=3cm # https://stackoverflow.com/a/13516042
5151
<script>
5252
import MetricsTable from '$figs/2022-11-28-metrics-table.svelte'
5353
import { references } from './references.yaml'
54+
import { References } from '$lib'
5455
import './heading-number.css' // uncomment to remove heading numbers
5556
</script>
5657

@@ -189,26 +190,7 @@ JR acknowledges support from the German Academic Scholarship Foundation (Studien
189190

190191
## References
191192

192-
<ol>
193-
{#each references as { title, id, author, DOI, URL, issued }, idx}
194-
<li>
195-
<strong {id}>{title}</strong>
196-
<p>
197-
{@html author.map((a) => `${a.given} ${a.family}`).join(`, &thinsp; `)}
198-
</p>
199-
<p>
200-
{#if DOI}
201-
DOI: <a href="https://doi.org/{DOI}">{DOI}</a>
202-
{:else if URL}
203-
preprint: <a href={URL}>{URL}</a>
204-
{/if}
205-
{#if issued}
206-
- {issued[0].year}
207-
{/if}
208-
</p>
209-
</li>
210-
{/each}
211-
</ol>
193+
<References {references} />
212194

213195
<style>
214196
#abstract,
@@ -222,10 +204,4 @@ JR acknowledges support from the German Academic Scholarship Foundation (Studien
222204
margin: 1em;
223205
display: block;
224206
}
225-
ol > li {
226-
margin: 1ex 0;
227-
}
228-
ol > li > p {
229-
margin: 0;
230-
}
231207
</style>

site/svelte.config.js

+20
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,20 @@ export default {
3030
extensions: [`.svelte`, `.svx`, `.md`],
3131

3232
preprocess: [
33+
{
34+
// preprocess markdown citations @auth_1stwordtitle_yyyy into superscript
35+
// links to bibliography items, href must match References.svelte
36+
markup: (file) => {
37+
if (file.filename.endsWith(`paper/+page.svx`)) {
38+
const code = file.content.replace(
39+
/@((.+?)_.+?_(\d{4}))/g,
40+
`<sup><a href="#$1">$2 $3</a></sup>`
41+
)
42+
return { code }
43+
}
44+
},
45+
},
46+
3347
preprocess(),
3448
mdsvex({
3549
rehypePlugins,
@@ -43,6 +57,12 @@ export default {
4357
kit: {
4458
adapter: adapter(),
4559

60+
alias: {
61+
$site: `.`,
62+
$root: `..`,
63+
$figs: `./static/figs`,
64+
},
65+
4666
prerender: {
4767
handleHttpError: `warn`,
4868
},

site/vite.config.ts

-10
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
11
import yaml from '@rollup/plugin-yaml'
22
import { sveltekit } from '@sveltejs/kit/vite'
3-
import { resolve } from 'path'
43
import type { UserConfig } from 'vite'
54

65
const vite_config: UserConfig = {
76
plugins: [sveltekit(), yaml()],
87

9-
resolve: {
10-
alias: {
11-
$src: resolve(`./src`),
12-
$site: resolve(`.`),
13-
$root: resolve(`..`),
14-
$figs: resolve(`./static/figs`),
15-
},
16-
},
17-
188
server: {
199
fs: { allow: [`../..`] }, // needed to import from $root
2010
port: 3000,

0 commit comments

Comments
 (0)