Skip to content

Commit 169afb1

Browse files
authored
Show non-compliant models by default (#233)
* fix structure loading from JSON in `analyze_geo_opt.py` to use JSON Lines format - fix `calc_geo_opt_metrics` warning by using `infer_objects` before fillna RMSDs with 1.0 - Update `Model` class to check for None in geo_opt metrics - Correct model_key casing in eSEN YAML files for consistency - fix structure_column in grace model YAML * better landing page table layout: table now extends into page margins to avoid horizontal scrolling - Simplified table element selection in `html-to-img.ts` for SVG and PNG exports. * add mattersim-v1-5m.yml geo_opt metrics and upload analysis files to figshare * change non-compliant models to be visible by default and add color legend to MetricsTable to indicate compliance * consistently PascalCase Svelte component unit test files
1 parent 85892aa commit 169afb1

27 files changed

+114
-540
lines changed

matbench_discovery/enums.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,11 @@ def geo_opt_path(self) -> str | None:
418418
"""File path associated with the file URL if it exists, otherwise
419419
download the file first, then return the path.
420420
"""
421-
geo_opt_metrics = self.metrics.get("geo_opt", {})
422-
if geo_opt_metrics in ("not available", "not applicable"):
421+
geo_opt_metrics = self.metrics.get("geo_opt")
422+
if geo_opt_metrics is None or geo_opt_metrics in (
423+
"not available",
424+
"not applicable",
425+
):
423426
return None
424427
rel_path = geo_opt_metrics.get("pred_file")
425428
file_url = geo_opt_metrics.get("pred_file_url")

matbench_discovery/metrics/geo_opt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def calc_geo_opt_metrics(df_model_analysis: pd.DataFrame) -> dict[str, float]:
9393
n_structs = len(spg_diff.dropna())
9494

9595
# Fill NaN values with 1.0 (the stol value we set in StructureMatcher)
96-
mean_rmsd = rmsd_vals.fillna(1.0).mean()
96+
mean_rmsd = rmsd_vals.infer_objects(copy=False).fillna(1.0).mean()
9797
sym_ops_mae = n_sym_ops_diff.abs().mean()
9898

9999
# Count cases where spacegroup changed

models/eSEN/eSEN-30m-mp.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
model_name: eSEN-30M-MP
2-
model_key: esen-30m-mp
2+
model_key: eSEN-30m-mp
33
model_version: v2025.03.17
44
matbench_discovery_version: 1.3.1
55
date_added: "2025-03-17"

models/eSEN/eSEN-30m-oam.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
model_name: eSEN-30M-OAM
2-
model_key: esen-30m-oam
2+
model_key: eSEN-30m-oam
33
model_version: v2025.03.17
44
matbench_discovery_version: 1.3.1
55
date_added: "2025-03-17"

models/grace/grace-2l-mptrj.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ metrics:
5959
geo_opt:
6060
pred_file: models/grace/grace-2l-mptrj/2024-11-11-relaxed-structures.json.gz
6161
pred_file_url: https://figshare.com/files/52062590
62-
struct_col: grace-2L-mp_structure
62+
struct_col: grace2l_r6_structure
6363
symprec=1e-5:
6464
rmsd: 0.0186 # Å
6565
n_sym_ops_mae: 1.8703 # unitless

models/mattersim/mattersim-v1-5m.yml

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,25 @@ metrics:
130130
geo_opt:
131131
pred_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt.jsonl.gz
132132
pred_file_url: https://figshare.com/files/52062176
133-
struct_col: mattersim-v1-5m_structure
133+
struct_col: mattersim_5M_structure
134+
symprec=1e-2:
135+
rmsd: 0.0733 # Å
136+
n_sym_ops_mae: 6.08 # unitless
137+
symmetry_decrease: 0.03 # fraction
138+
symmetry_match: 0.78 # fraction
139+
symmetry_increase: 0.19 # fraction
140+
n_structures: 100 # count
141+
analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt.jsonl.gz-symprec=1e-2-moyo=0.4.2.csv.gz
142+
analysis_file_url: https://figshare.com/files/53167865
143+
symprec=1e-5:
144+
rmsd: 0.0733 # Å
145+
n_sym_ops_mae: 8.83 # unitless
146+
symmetry_decrease: 0.05 # fraction
147+
symmetry_match: 0.66 # fraction
148+
symmetry_increase: 0.27 # fraction
149+
n_structures: 100 # count
150+
analysis_file: models/mattersim/mattersim-v1-5M/2024-12-19-wbm-geo-opt.jsonl.gz-symprec=1e-5-moyo=0.4.2.csv.gz
151+
analysis_file_url: https://figshare.com/files/53167868
134152
discovery:
135153
pred_file: models/mattersim/mattersim-v1-5M/2024-12-16-wbm-IS2RE.csv.gz
136154
# the original Graphormer-based replaced the M3GNet-based MatterSim on the leaderboard

scripts/analyze_geo_opt.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,7 @@ def analyze_model_symprec(
7070

7171
# Load model structures
7272
try:
73-
if ml_relaxed_structs_path.endswith((".json", ".json.gz", ".json.xz")):
74-
df_ml_structs = pd.read_json(ml_relaxed_structs_path)
75-
else:
76-
raise ValueError(
77-
"Relaxed structure analysis currently only supports pymatgen JSON, "
78-
f"got {ml_relaxed_structs_path}"
79-
)
73+
df_ml_structs = pd.read_json(ml_relaxed_structs_path, lines=True)
8074
except Exception as exc:
8175
exc.add_note(f"{model.label=} {ml_relaxed_structs_path=}")
8276
raise
@@ -187,7 +181,9 @@ def analyze_model_symprec(
187181
# %%
188182
print("Loading WBM PBE structures...")
189183
wbm_cse_path = DataFiles.wbm_computed_structure_entries.path
190-
df_wbm_structs: pd.DataFrame = pd.read_json(wbm_cse_path).set_index(Key.mat_id)
184+
df_wbm_structs: pd.DataFrame = pd.read_json(
185+
wbm_cse_path, lines=True, orient="records"
186+
).set_index(Key.mat_id)
191187

192188
if debug_mode:
193189
df_wbm_structs = df_wbm_structs.head(debug_mode)

site/src/figs/metrics-table-uip-megnet-combos.svelte

Lines changed: 0 additions & 235 deletions
This file was deleted.

0 commit comments

Comments
 (0)