Skip to content

Commit 016e71f

Browse files
committed
change TL;DR to MP non-endorsement disclaimer
add module doc strings fix wbm_computed_structure_entries desc on /contribute page
1 parent a3b4362 commit 016e71f

15 files changed

+53
-43
lines changed

.pre-commit-config.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ default_install_hook_types: [pre-commit, commit-msg]
88

99
repos:
1010
- repo: https://github.com/astral-sh/ruff-pre-commit
11-
rev: v0.3.7
11+
rev: v0.4.2
1212
hooks:
1313
- id: ruff
1414
args: [--fix]
@@ -31,7 +31,7 @@ repos:
3131
- id: trailing-whitespace
3232

3333
- repo: https://github.com/pre-commit/mirrors-mypy
34-
rev: v1.9.0
34+
rev: v1.10.0
3535
hooks:
3636
- id: mypy
3737
additional_dependencies: [types-pyyaml, types-requests]
@@ -57,7 +57,7 @@ repos:
5757
exclude: ^(site/src/figs/.+\.svelte|data/wbm/20.+\..+|site/src/(routes|figs).+\.(yaml|json)|changelog.md)$
5858

5959
- repo: https://github.com/pre-commit/mirrors-eslint
60-
rev: v9.0.0
60+
rev: v9.1.1
6161
hooks:
6262
- id: eslint
6363
types: [file]
@@ -79,7 +79,7 @@ repos:
7979
- id: check-github-actions
8080

8181
- repo: https://github.com/RobertCraigie/pyright-python
82-
rev: v1.1.358
82+
rev: v1.1.360
8383
hooks:
8484
- id: pyright
8585
args: [--level, error]

data/mp/get_mp_energies.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""Download all MP formation and above hull energies on 2023-01-10.
2+
3+
Related EDA of MP formation energies:
4+
https://github.com/janosh/pymatviz/blob/-/examples/mp_bimodal_e_form.ipynb
5+
"""
6+
17
# %%
28
import os
39

@@ -12,13 +18,6 @@
1218
from matbench_discovery.data import DATA_FILES
1319
from matbench_discovery.enums import Key
1420

15-
"""
16-
Download all MP formation and above hull energies on 2023-01-10.
17-
18-
Related EDA of MP formation energies:
19-
https://github.com/janosh/pymatviz/blob/-/examples/mp_bimodal_e_form.ipynb
20-
"""
21-
2221
__author__ = "Janosh Riebesell"
2322
__date__ = "2023-01-10"
2423

data/wbm/compare_cse_vs_ce_mp_2020_corrections.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""NOTE MaterialsProject2020Compatibility takes structural information into account when
2+
correcting energies (for oxides and sulfides only). Always use
3+
ComputedStructureEntry, not ComputedEntry when applying energy corrections.
4+
"""
5+
16
# %%
27
import gzip
38
import json
@@ -16,13 +21,6 @@
1621
from matbench_discovery.enums import Key
1722
from matbench_discovery.plots import plt
1823

19-
"""
20-
NOTE MaterialsProject2020Compatibility takes structural information into account when
21-
correcting energies (for certain oxides and sulfides). Always use
22-
ComputedStructureEntry, not ComputedEntry when applying corrections.
23-
"""
24-
25-
2624
df_cse = pd.read_json(DATA_FILES.wbm_computed_structure_entries).set_index(Key.mat_id)
2725

2826
cses = [ComputedStructureEntry.from_dict(dct) for dct in tqdm(df_cse[Key.cse])]

data/wbm/compile_wbm_test_set.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""Download, process and clean the DFT dataset published in Wang et al. 2021.
2+
3+
https://nature.com/articles/s41524-020-00481-6
4+
"""
5+
16
# %%
27
import gzip
38
import os
@@ -33,11 +38,6 @@
3338
)
3439
raise
3540

36-
"""
37-
Dataset generated with DFT and published in Jan 2021 as
38-
"Predicting stable crystalline compounds using chemical similarity"
39-
https://nature.com/articles/s41524-020-00481-6
40-
"""
4141

4242
# %% links to google drive files received via email from 1st author Hai-Chen Wang
4343
# on 2021-06-15 containing initial and relaxed structures

data/wbm/eda_wbm.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""WBM exploratory data analysis (EDA).
2+
Start with comparing MP and WBM elemental prevalence.
3+
"""
4+
15
# %%
26
import os
37

@@ -26,11 +30,6 @@
2630
__author__ = "Janosh Riebesell"
2731
__date__ = "2023-03-30"
2832

29-
"""
30-
WBM exploratory data analysis (EDA).
31-
Start with comparing MP and WBM elemental prevalence.
32-
"""
33-
3433
module_dir = os.path.dirname(__file__)
3534
data_page = f"{ROOT}/site/src/routes/data"
3635

matbench_discovery/data.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""Download, cache and hydrate data files from the Matbench Discovery Figshare article.
2+
3+
https://figshare.com/articles/dataset/22715158
4+
"""
5+
16
import gzip
27
import json
38
import os

matbench_discovery/energy.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
"""Functions to compute formation and elemental reference energies from
2+
pymatgen EntryLikes.
3+
"""
4+
15
import itertools
26
from collections.abc import Sequence
37

matbench_discovery/enums.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Enums used as keys/accessors for dicts and dataframes across Matbench Discovery."""
2+
13
from enum import StrEnum, unique
24
from typing import Self
35

matbench_discovery/models.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Initializes global variable MODEL_METADATA."""
2+
13
from glob import glob
24
from typing import Any
35

@@ -20,11 +22,7 @@
2022
continue
2123
# make sure all required keys are non-empty
2224
with open(md_file) as yml_file:
23-
models = yaml.full_load(yml_file)
25+
model_data = yaml.full_load(yml_file)
2426

25-
# some metadata files contain a single model, some have multiple
26-
if not isinstance(models, list):
27-
models = [models]
28-
for model in models:
29-
model["model_dir"] = model_dir
30-
MODEL_METADATA[model["model_name"]] = model
27+
model_data["model_dir"] = model_dir
28+
MODEL_METADATA[model_data["model_name"]] = model_data

matbench_discovery/preds.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Centralize data-loading and computing metrics for plotting scripts."""
2+
13
from collections.abc import Sequence
24
from typing import Any, Literal
35

@@ -10,8 +12,6 @@
1012
from matbench_discovery.metrics import stable_metrics
1113
from matbench_discovery.plots import plotly_colors, plotly_line_styles, plotly_markers
1214

13-
"""Centralize data-loading and computing metrics for plotting scripts"""
14-
1515
__author__ = "Janosh Riebesell"
1616
__date__ = "2023-02-04"
1717

matbench_discovery/slurm.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Slurm job submission helper function."""
2+
13
import os
24
import subprocess
35
import sys

matbench_discovery/structure.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
"""Perturb atomic coordinates of a pymatgen structure.
2+
3+
Used for CGCNN+P training set augmentation.
4+
"""
5+
16
import numpy as np
27
from pymatgen.core import Structure
38

readme.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
</h4>
1515

16-
> TL;DR: We benchmark ML models on crystal stability prediction from unrelaxed structures finding universal interatomic potentials (UIP) like [MACE](https://github.com/ACEsuit/mace), [CHGNet](https://github.com/CederGroupHub/chgnet) and [M3GNet](https://github.com/materialsvirtuallab/m3gnet) to be highly accurate, robust across chemistries and ready for production use in high-throughput materials discovery.
16+
> Disclaimer: We evaluate how accurately ML models predict solid-state thermodynamic stability. Although this is an important aspect of high-throughput materials discovery, the ranking cannot give a complete picture of a model's general applicability to materials. A high ranking does not constitute endorsement by the Materials Project.
1717
1818
Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matbench-discovery/models) and associated [PyPI package](https://pypi.org/project/matbench-discovery) which together make it easy to rank ML energy models on a task designed to simulate a high-throughput discovery campaign for new stable inorganic crystals.
1919

site/src/lib/model-schema.d.ts

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
/* eslint-disable */
21
/**
32
* This file was automatically generated by json-schema-to-typescript.
43
* DO NOT MODIFY IT BY HAND. Instead, modify the source JSONSchema file,

site/src/routes/contribute/+page.md

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,19 @@
66
const ppd_doc_url = `https://github.com/materialsproject/pymatgen/blob/v2023.5.10/pymatgen/analysis/phase_diagram.py#L1480-L1814`
77
const ppd_link = `<a href=${ppd_doc_url}>PatchedPhaseDiagram</a>`
88
const cse_doc_url = `https://github.com/materialsproject/pymatgen/blob/v2023.5.10/pymatgen/entries/computed_entries.py#L579-L722`
9-
const cse_link = `<a href=${cse_doc_url}>ComputedStructureEntry</a>`
109

1110
const mp_trj_link = `<a href="https://figshare.com/articles/dataset/23713842">MPtrj</a>`
1211
const descriptions = {
1312
alignn_checkpoint: `ALIGNN model trained on <code>mp_computed_structure_entries</code>`,
1413
mp_computed_structure_entries:
15-
`JSON-Serialized MP ${cse_link} objects containing relaxed structures and DFT final energies`,
14+
`JSON-Serialized MP <a href=${cse_doc_url}>ComputedStructureEntries</a> containing DFT relaxed structures and corresponding final energies`,
1615
mp_elemental_ref_entries: `Minimum energy ComputedEntry for each element in MP`,
1716
mp_energies: `Materials Project formation energies and energies above convex hull`,
1817
mp_patched_phase_diagram:
1918
`${ppd_link} constructed from all MP ComputedStructureEntries`,
20-
wbm_computed_structure_entries: `Materials Project computed structure entries`,
19+
wbm_computed_structure_entries: `WBM <a href=${cse_doc_url}>ComputedStructureEntries</a> containing DFT relaxed structures and corresponding final energies`,
2120
wbm_initial_structures: `Unrelaxed WBM structures`,
22-
wbm_cses_plus_init_structs: `Both unrelaxed and DFT-relaxed WBM structures, the latter stored with their final VASP energies as ${cse_link}`,
21+
wbm_cses_plus_init_structs: `Both unrelaxed and DFT-relaxed WBM structures, the latter stored with their final VASP energies as <a href=${cse_doc_url}>ComputedStructureEntry</a>`,
2322
wbm_summary:
2423
`Computed material properties only, no structures. Available properties are VASP energy, formation energy, energy above the convex hull, volume, band gap, number of sites per unit cell, and more.`,
2524
mp_trj_extxyz_by_yuan: `${mp_trj_link} converted to <code>ase</code>-compatible extended XYZ format and compressed (11.3 to 1.6 GB) by Yuan Chiang`,

0 commit comments

Comments
 (0)