janosh
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎data/mp/build_phase_diagram.py
Lines changed: 1 addition & 2 deletions b/‎data/mp/build_phase_diagram.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎matbench_discovery/enums.py
Lines changed: 2 additions & 0 deletions b/‎matbench_discovery/enums.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎models/alphanet/test_alphanet_kappa.py
Lines changed: 8 additions & 12 deletions b/‎models/alphanet/test_alphanet_kappa.py
Lines changed: 8 additions & 12 deletions
diff --git a/‎models/deepmd/dpa3-v2-mptrj.yml
Lines changed: 11 additions & 11 deletions b/‎models/deepmd/dpa3-v2-mptrj.yml
Lines changed: 11 additions & 11 deletions
diff --git a/‎models/deepmd/dpa3-v2-openlam.yml
Lines changed: 7 additions & 7 deletions b/‎models/deepmd/dpa3-v2-openlam.yml
Lines changed: 7 additions & 7 deletions
diff --git a/‎models/deepmd/test_dpa3_kappa.py
Lines changed: 8 additions & 11 deletions b/‎models/deepmd/test_dpa3_kappa.py
Lines changed: 8 additions & 11 deletions
diff --git a/‎models/eSEN/eSEN-30m-mp.yml
Lines changed: 150 additions & 0 deletions b/‎models/eSEN/eSEN-30m-mp.yml
Lines changed: 150 additions & 0 deletions
@@ -42,3 +42,4 @@ site/src/routes/api/*.md
 
 # large files
 data/*-models-geo-opt-analysis-symprec=*.csv.gz
+.cursor
@@ -9,7 +9,6 @@
 import pickle
 
 import pandas as pd
-import pymatviz
 import pymatviz as pmv
 from pymatgen.analysis.phase_diagram import PatchedPhaseDiagram
 from pymatgen.entries.compatibility import MaterialsProject2020Compatibility
@@ -124,7 +123,7 @@
 
 
 # make sure get_form_energy_per_atom() reproduces MP formation energies
-ax = pymatviz.density_scatter(df_mp[Key.form_energy], df_mp[e_form_us])
+ax = pmv.density_scatter_plotly(df_mp[Key.form_energy], df_mp[e_form_us])
 ax.set(
     title="MP Formation Energy Comparison",
     xlabel="MP Formation Energy (eV/atom)",
 
@@ -310,6 +310,8 @@ class Model(Files, base_dir=f"{ROOT}/models"):
     # FAIR-Chem
     eqv2_s_dens = auto(), "eqV2/eqV2-s-dens-mp.yml"
     eqv2_m = auto(), "eqV2/eqV2-m-omat-salex-mp.yml"
+    esen_30m_mp = auto(), "eSEN/eSEN-30m-mp.yml"
+    esen_30m_oam = auto(), "eSEN/eSEN-30m-oam.yml"
 
     # GRACE: https://arxiv.org/abs/2311.16326v2
     grace_2l_mptrj = auto(), "grace/grace-2l-mptrj.yml"
 
@@ -109,20 +109,17 @@
 for atoms in tqdm_bar:
     mat_id = atoms.info.get(Key.mat_id, f"id-{len(kappa_results)}")
     init_info = deepcopy(atoms.info)
-    mat_name = atoms.info.get("name", "unknown")
-
+    formula = atoms.info.get("name", "unknown")
     spg_num = MoyoDataset(MoyoAdapter.from_atoms(atoms)).number
-    mat_desc = f"{mat_name}-{spg_num}"
-
     info_dict = {
-        "desc": mat_desc,
-        "name": mat_name,
-        "initial_space_group_number": spg_num,
+        Key.desc: mat_id,
+        Key.formula: formula,
+        Key.spg_num: spg_num,
         "errors": [],
         "error_traceback": [],
     }
 
-    tqdm_bar.set_postfix_str(mat_desc, refresh=True)
+    tqdm_bar.set_postfix_str(mat_id, refresh=True)
 
     # Initialize relax_dict to avoid "possibly unbound" errors
     relax_dict = {
@@ -148,7 +145,7 @@
 
             reached_max_steps = optimizer.step >= max_steps
             if reached_max_steps:
-                print(f"Material {mat_desc=} reached {max_steps=} during relaxation.")
+                print(f"Material {mat_id=} reached {max_steps=} during relaxation.")
 
             max_stress = atoms.get_stress().reshape((2, 3), order="C").max(axis=1)
             atoms.calc = None
@@ -166,7 +163,7 @@
             }
 
     except Exception as exc:
-        warnings.warn(f"Failed to relax {mat_name=}, {mat_id=}: {exc!r}", stacklevel=2)
+        warnings.warn(f"Failed to relax {formula=}, {mat_id=}: {exc!r}", stacklevel=2)
         traceback.print_exc()
         info_dict["errors"].append(f"RelaxError: {exc!r}")
         info_dict["error_traceback"].append(traceback.format_exc())
@@ -221,8 +218,7 @@
         if not ltc_condition:
             kappa_results[mat_id] = info_dict | relax_dict | freqs_dict
             warnings.warn(
-                f"Material {mat_desc} imaginary frequencies or broken symmetry.",
-                stacklevel=2,
+                f"{mat_id=} has imaginary frequencies or broken symmetry", stacklevel=2
             )
             continue
 
 
@@ -124,7 +124,7 @@ metrics:
     pred_col: e_form_per_atom_dp
     full_test_set:
       F1: 0.774 # fraction
-      DAF: 4.25 # dimensionless
+      DAF: 4.249 # dimensionless
       Precision: 0.729 # fraction
       Recall: 0.825 # fraction
       Accuracy: 0.917 # fraction
@@ -133,20 +133,20 @@ metrics:
       TNR: 0.936 # fraction
       FNR: 0.175 # fraction
       TP: 36393.0 # count
-      FP: 13519.0 # count
-      TN: 199352.0 # count
+      FP: 13518.0 # count
+      TN: 199353.0 # count
       FN: 7699.0 # count
       MAE: 0.038 # eV/atom
-      RMSE: 0.082 # eV/atom
-      R2: 0.796 # dimensionless
-      missing_preds: 0 # count
+      RMSE: 0.08 # eV/atom
+      R2: 0.801 # dimensionless
+      missing_preds: 2 # count
       missing_percent: 0.00% # fraction
     most_stable_10k:
-      F1: 0.980 # fraction
-      DAF: 6.280 # dimensionless
-      Precision: 0.960 # fraction
+      F1: 0.98 # fraction
+      DAF: 6.28 # dimensionless
+      Precision: 0.96 # fraction
       Recall: 1.0 # fraction
-      Accuracy: 0.960 # fraction
+      Accuracy: 0.96 # fraction
       TPR: 1.0 # fraction
       FPR: 1.0 # fraction
       TNR: 0.0 # fraction
@@ -162,7 +162,7 @@ metrics:
       missing_percent: 0.00% # fraction
     unique_prototypes:
       F1: 0.786 # fraction
-      DAF: 4.760 # dimensionless
+      DAF: 4.822 # dimensionless
       Precision: 0.737 # fraction
       Recall: 0.841 # fraction
       Accuracy: 0.929 # fraction
 
@@ -135,13 +135,13 @@ metrics:
       TNR: 0.969 # fraction
       FNR: 0.115 # fraction
       TP: 39030.0 # count
-      FP: 6676.0 # count
-      TN: 206195.0 # count
+      FP: 6675.0 # count
+      TN: 206196.0 # count
       FN: 5062.0 # count
       MAE: 0.022 # eV/atom
-      RMSE: 0.068 # eV/atom
-      R2: 0.857 # dimensionless
-      missing_preds: 0 # count
+      RMSE: 0.067 # eV/atom
+      R2: 0.863 # dimensionless
+      missing_preds: 2 # count
       missing_percent: 0.00% # fraction
     most_stable_10k:
       F1: 0.986 # fraction
@@ -163,11 +163,11 @@ metrics:
       missing_preds: 0 # count
       missing_percent: 0.00% # fraction
     unique_prototypes:
-      F1: 0.890 # fraction
+      F1: 0.89 # fraction
       DAF: 5.747 # dimensionless
       Precision: 0.879 # fraction
       Recall: 0.902 # fraction
-      Accuracy: 0.965 # fraction
+      Accuracy: 0.966 # fraction
       TPR: 0.902 # fraction
       FPR: 0.023 # fraction
       TNR: 0.977 # fraction
 
@@ -103,20 +103,18 @@
 for atoms in tqdm_bar:
     mat_id = atoms.info.get(Key.mat_id, f"id-{len(kappa_results)}")
     init_info = deepcopy(atoms.info)
-    mat_name = atoms.info.get("name", "unknown")
+    formula = atoms.info.get("name", "unknown")
 
     spg_num = MoyoDataset(MoyoAdapter.from_atoms(atoms)).number
-    mat_desc = f"{mat_name}-{spg_num}"
-
     info_dict = {
-        "desc": mat_desc,
-        "name": mat_name,
-        "initial_space_group_number": spg_num,
+        Key.desc: mat_id,
+        Key.formula: formula,
+        Key.spg_num: spg_num,
         "errors": [],
         "error_traceback": [],
     }
 
-    tqdm_bar.set_postfix_str(mat_desc, refresh=True)
+    tqdm_bar.set_postfix_str(mat_id, refresh=True)
 
     # Initialize relax_dict to avoid "possibly unbound" errors
     relax_dict = {
@@ -142,7 +140,7 @@
 
             reached_max_steps = optimizer.step >= max_steps
             if reached_max_steps:
-                print(f"Material {mat_desc=} reached {max_steps=} during relaxation.")
+                print(f"{mat_id=} reached {max_steps=} during relaxation.")
 
             max_stress = atoms.get_stress().reshape((2, 3), order="C").max(axis=1)
             atoms.calc = None
@@ -160,7 +158,7 @@
             }
 
     except Exception as exc:
-        warnings.warn(f"Failed to relax {mat_name=}, {mat_id=}: {exc!r}", stacklevel=2)
+        warnings.warn(f"Failed to relax {formula=}, {mat_id=}: {exc!r}", stacklevel=2)
         traceback.print_exc()
         info_dict["errors"].append(f"RelaxError: {exc!r}")
         info_dict["error_traceback"].append(traceback.format_exc())
@@ -215,8 +213,7 @@
         if not ltc_condition:
             kappa_results[mat_id] = info_dict | relax_dict | freqs_dict
             warnings.warn(
-                f"Material {mat_desc} imaginary frequencies or broken symmetry.",
-                stacklevel=2,
+                f"{mat_id=} has imaginary frequencies or broken symmetry", stacklevel=2
             )
             continue
 
 
@@ -0,0 +1,150 @@
+model_name: eSEN-30M-MP
+model_key: esen-30m-mp
+model_version: v2025.03.17
+matbench_discovery_version: 1.3.1
+date_added: "2025-03-17"
+date_published: "2025-03-17"
+authors:
+  - name: Xiang Fu
+    affiliation: FAIR at Meta
+    email: [email protected]
+    orcid: https://orcid.org/0000-0001-7480-6312
+    github: https://github.com/kyonofx
+    corresponding: true
+  - name: Brandon M. Wood
+    affiliation: FAIR at Meta
+  - name: Luis Barroso-Luque
+    affiliation: FAIR at Meta
+  - name: Daniel S. Levine
+    affiliation: FAIR at Meta
+  - name: Meng Gao
+    affiliation: FAIR at Meta
+  - name: Misko Dzamba
+    affiliation: FAIR at Meta
+  - name: C. Lawrence Zitnick
+    affiliation: FAIR at Meta
+    email: [email protected]
+    corresponding: true
+
+repo: https://github.com/FAIR-Chem/fairchem
+doi: https://doi.org/10.48550/arXiv.2502.12147
+paper: https://arxiv.org/abs/2502.12147
+url: https://huggingface.co/fairchem/eSEN
+pypi: https://pypi.org/project/fairchem-core
+pr_url: https://github.com/janosh/matbench-discovery/pull/226
+
+requirements:
+  fairchem-core: 1.7.0
+
+openness: OSOD
+trained_for_benchmark: false
+train_task: S2EFS
+test_task: IS2RE-SR
+targets: EFS_G
+model_type: UIP
+model_params: 30_085_121
+n_estimators: 1
+
+training_set: [MPtrj]
+
+hyperparams:
+  max_force: 0.02
+  max_steps: 500
+  ase_optimizer: FIRE
+  cell_filter: FrechetCellFilter
+
+notes:
+  Description: |
+    equivariant Smooth Energy Network (eSEN), described in https://arxiv.org/abs/2502.12147.
+  Training: |
+    Training was done by: (1) 70-epoch direct pre-training on MPTrj with DeNS; (2) 30-epoch conservative fine-tuning on MPTrj without DeNS.
+
+metrics:
+  phonons:
+    kappa_103:
+      κ_SRME: 0.3398
+      pred_file: models/eSEN/eSEN-30m-mp/2025-03-18-kappa-103-FIRE-dist=0.03-fmax=1e-4-symprec=1e-5.json.gz
+      pred_file_url: https://figshare.com/files/53090276
+  geo_opt:
+    pred_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt.json.gz
+    pred_file_url: https://figshare.com/files/53054693
+    struct_col: pred_structure_esen
+    symprec=1e-5:
+      rmsd: 0.0142 # Å
+      n_sym_ops_mae: 3.3144 # unitless
+      symmetry_decrease: 0.4281 # fraction
+      symmetry_match: 0.3859 # fraction
+      symmetry_increase: 0.1282 # fraction
+      n_structures: 256963 # count
+      analysis_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt-symprec=1e-5-moyo=0.4.2.csv.gz
+      analysis_file_url: https://figshare.com/files/53054696
+    symprec=1e-2:
+      rmsd: 0.0142 # Å
+      n_sym_ops_mae: 2.521 # unitless
+      symmetry_decrease: 0.2604 # fraction
+      symmetry_match: 0.6106 # fraction
+      symmetry_increase: 0.098 # fraction
+      n_structures: 256963 # count
+      analysis_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-geo-opt-symprec=1e-2-moyo=0.4.2.csv.gz
+      analysis_file_url: https://figshare.com/files/53054375
+  discovery:
+    pred_file: models/eSEN/eSEN-30m-mp/2025-03-17-wbm-IS2RE.csv.gz
+    pred_file_url: https://figshare.com/files/53054366
+    pred_col: pred_e_form_per_atom_esen
+    full_test_set:
+      F1: 0.819 # fraction
+      DAF: 4.616 # dimensionless
+      Precision: 0.792 # fraction
+      Recall: 0.849 # fraction
+      Accuracy: 0.936 # fraction
+      TPR: 0.849 # fraction
+      FPR: 0.046 # fraction
+      TNR: 0.954 # fraction
+      FNR: 0.151 # fraction
+      TP: 37419.0 # count
+      FP: 9825.0 # count
+      TN: 203046.0 # count
+      FN: 6673.0 # count
+      MAE: 0.032 # eV/atom
+      RMSE: 0.077 # eV/atom
+      R2: 0.818 # dimensionless
+      missing_preds: 2 # count
+      missing_percent: 0.00% # fraction
+    unique_prototypes:
+      F1: 0.831 # fraction
+      DAF: 5.26 # dimensionless
+      Precision: 0.804 # fraction
+      Recall: 0.861 # fraction
+      Accuracy: 0.946 # fraction
+      TPR: 0.861 # fraction
+      FPR: 0.038 # fraction
+      TNR: 0.962 # fraction
+      FNR: 0.139 # fraction
+      TP: 28722.0 # count
+      FP: 7000.0 # count
+      TN: 175114.0 # count
+      FN: 4652.0 # count
+      MAE: 0.033 # eV/atom
+      RMSE: 0.078 # eV/atom
+      R2: 0.822 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction
+    most_stable_10k:
+      F1: 0.978 # fraction
+      DAF: 6.261 # dimensionless
+      Precision: 0.957 # fraction
+      Recall: 1.0 # fraction
+      Accuracy: 0.957 # fraction
+      TPR: 1.0 # fraction
+      FPR: 1.0 # fraction
+      TNR: 0.0 # fraction
+      FNR: 0.0 # fraction
+      TP: 9572.0 # count
+      FP: 428.0 # count
+      TN: 0.0 # count
+      FN: 0.0 # count
+      MAE: 0.035 # eV/atom
+      RMSE: 0.111 # eV/atom
+      R2: 0.755 # dimensionless
+      missing_preds: 0 # count
+      missing_percent: 0.00% # fraction
Original file line number	Diff line number	Diff line change
`@@ -42,3 +42,4 @@ site/src/routes/api/*.md`
`42`	`42`
`43`	`43`	`# large files`
`44`	`44`	`data/-models-geo-opt-analysis-symprec=.csv.gz`
	`45`	`+.cursor`