materialsproject · tschaume · Dec 14, 2024 · Dec 10, 2024 · Dec 10, 2024 · Dec 11, 2024
diff --git a/mp_api/client/mprester.py b/mp_api/client/mprester.py
@@ -17,7 +17,7 @@
 from packaging import version
 from pymatgen.analysis.phase_diagram import PhaseDiagram
 from pymatgen.analysis.pourbaix_diagram import IonEntry
-from pymatgen.core import SETTINGS, Element, Structure
+from pymatgen.core import SETTINGS, Composition, Element, Structure
 from pymatgen.core.ion import Ion
 from pymatgen.entries.computed_entries import ComputedStructureEntry
 from pymatgen.io.vasp import Chgcar
@@ -1477,3 +1477,147 @@ def query(*args, **kwargs):
             please see the new documentation.
             """
         )
+
+    def get_cohesive_energy(
+        self,
+        material_ids: list[MPID | str],
+        normalization: Literal["atom", "formula_unit"] = "atom",
+    ) -> float | dict[str, float]:
+        """Obtain the cohesive energy of the structure(s) corresponding to multiple MPIDs.
+
+        Args:
+            material_ids ([MPID | str]) : List of MPIDs to compute cohesive energies.
+            normalization (str = "atom" (default) or "formula_unit") :
+                Whether to normalize the cohesive energy by the number of atoms (default)
+                or by the number of formula units.
+                Note that the current default is inconsistent with the legacy API.
+
+        Returns:
+            (dict[str,float]) : The cohesive energies (in eV/atom or eV/formula unit) for
+            each material, indexed by MPID.
+        """
+        entry_preference = {
+            k: i for i, k in enumerate(["GGA", "GGA_U", "SCAN", "R2SCAN"])
+        }
+        run_type_to_dfa = {"GGA": "PBE", "GGA_U": "PBE", "R2SCAN": "r2SCAN"}
+
+        energies = {mp_id: {} for mp_id in material_ids}
+        entries = self.get_entries(
+            material_ids,
+            compatible_only=False,
+            inc_structure=True,
+            property_data=None,
+            conventional_unit_cell=False,
+        )
+        for entry in entries:
+            # Ensure that this works with monty_decode = False and True
+            if not self.monty_decode:
+                entry["uncorrected_energy_per_atom"] = entry["energy"] / sum(
+                    entry["composition"].values()
+                )
+            else:
+                entry = {
+                    "data": entry.data,
+                    "uncorrected_energy_per_atom": entry.uncorrected_energy_per_atom,
+                    "composition": entry.composition,
+                }
+
+            mp_id = entry["data"]["material_id"]
+            if (run_type := entry["data"]["run_type"]) not in energies[mp_id]:
+                energies[mp_id][run_type] = {
+                    "total_energy_per_atom": float("inf"),
+                    "composition": None,
+                }
+
+            # Obtain lowest total energy/atom within a given run type
+            if (
+                entry["uncorrected_energy_per_atom"]
+                < energies[mp_id][run_type]["total_energy_per_atom"]
+            ):
+                energies[mp_id][run_type] = {
+                    "total_energy_per_atom": entry["uncorrected_energy_per_atom"],
+                    "composition": entry["composition"],
+                }
+
+        atomic_energies = self.get_atom_reference_data()
+
+        e_coh_per_atom = {}
+        for mp_id, entries in energies.items():
+            if not entries:
+                e_coh_per_atom[str(mp_id)] = None
+                continue
+            # take entry from most reliable and available functional
+            prefered_func = sorted(list(entries), key=lambda k: entry_preference[k])[-1]
+            e_coh_per_atom[str(mp_id)] = self._get_cohesive_energy(
+                entries[prefered_func]["composition"],
+                entries[prefered_func]["total_energy_per_atom"],
+                atomic_energies[run_type_to_dfa.get(prefered_func, prefered_func)],
+                normalization=normalization,
+            )
+        return e_coh_per_atom
+
+    @lru_cache
+    def get_atom_reference_data(
+        self,
+        funcs: tuple[str] = (
+            "PBE",
+            "SCAN",
+            "r2SCAN",
+        ),
+    ) -> dict[str, dict[str, float]]:
+        """Retrieve energies of isolated neutral atoms from MPContribs.
+
+        Args:
+            funcs ([str] or None) : list of functionals to retrieve data for.
+            Defaults to all available functionals ("PBE", "SCAN", "r2SCAN")
+            when set to None.
+
+        Returns:
+            (dict[str, dict[str, float]]) : dict containing isolated atom energies,
+            indexed first by the functionals in funcs, and second by the atom.
+        """
+        _atomic_energies = self.contribs.query_contributions(
+            query={"project": "isolated_atom_energies"},
+            fields=["formula", *[f"data.{dfa}.energy" for dfa in funcs]],
+        ).get("data")
+
+        return {
+            dfa: {
+                entry["formula"]: entry["data"][dfa]["energy"]["value"]
+                for entry in _atomic_energies
+            }
+            for dfa in funcs
+        }
+
+    @staticmethod
+    def _get_cohesive_energy(
+        composition: Composition | dict,
+        energy_per_atom: float,
+        atomic_energies: dict[str, float],
+        normalization: Literal["atom", "formula_unit"] = "atom",
+    ) -> float:
+        """Obtain the cohesive energy of a given composition and energy.
+
+        Args:
+            composition (Composition or dict) : the composition of the structure.
+            energy_per_atom (float) : the energy per atom of the structure.
+            atomic_energies (dict[str,float]) : a dict containing reference total energies
+                of neutral atoms.
+            normalization (str = "atom" (default) or "formula_unit") :
+                Whether to normalize the cohesive energy by the number of atoms (default)
+                or by the number of formula units.
+
+        Returns:
+            (float) : the cohesive energy per atom.
+        """
+        comp = Composition(composition).remove_charges()
+        atomic_energy = sum(
+            coeff * atomic_energies[str(element)] for element, coeff in comp.items()
+        )
+
+        natom = sum(comp.values())
+        if normalization == "atom":
+            return energy_per_atom - atomic_energy / natom
+        elif normalization == "formula_unit":
+            num_form_unit = comp.get_reduced_composition_and_factor()[1]
+            return (energy_per_atom * natom - atomic_energy) / num_form_unit
diff --git a/tests/test_mprester.py b/tests/test_mprester.py
@@ -371,3 +371,66 @@ def test_invalid_api_key(self, monkeypatch):
         monkeypatch.setenv("MP_API_KEY", "INVALID")
         with pytest.raises(ValueError, match="Keys for the new API are 32 characters"):
             MPRester().get_structure_by_material_id("mp-149")
+
+    def test_get_cohesive_energy_per_atom_utility(self):
+        composition = {
+            "H": 5,
+            "V": 2,
+            "P": 3,
+        }
+        toten_per_atom = -2.0e3
+        atomic_energies = {"H": -13.6, "V": -7.2, "P": -0.1}
+
+        by_hand_e_coh = toten_per_atom - sum(
+            atomic_energies[k] * v for k, v in composition.items()
+        ) / sum(composition.values())
+
+        assert MPRester._get_cohesive_energy(
+            composition, toten_per_atom, atomic_energies
+        ) == pytest.approx(by_hand_e_coh)
+
+    def test_get_atom_references(self, mpr):
+        ae = mpr.get_atom_reference_data(funcs=("PBE",))
+        assert list(ae) == ["PBE"]
+        assert len(ae["PBE"]) == 89
+        assert all(isinstance(v, float) for v in ae["PBE"].values())
+
+        ae = mpr.get_atom_reference_data()
+        assert set(ae) == {"PBE", "r2SCAN", "SCAN"}
+        assert all(len(entries) == 89 for entries in ae.values())
+        assert all(
+            isinstance(v, float) for entries in ae.values() for v in entries.values()
+        )
+
+    def test_get_cohesive_energy(self):
+        ref_e_coh = {
+            "atom": {
+                "mp-123": -4.029208982500002,
+                "mp-149": -4.669184594999999,
+                "mp-4163": -6.351402620416668,
+                "mp-19017": -4.933409960714286,
+            },
+            "formula_unit": {
+                "mp-123": -4.029208982500002,
+                "mp-149": -4.669184594999999,
+                "mp-4163": -76.21683144500001,
+                "mp-19017": -34.533869725,
+            },
+        }
+        e_coh = {}
+        for monty_decode in (True, False):
+            with MPRester(
+                use_document_model=monty_decode, monty_decode=monty_decode
+            ) as _mpr:
+                for norm, refs in ref_e_coh.items():
+                    _e_coh = _mpr.get_cohesive_energy(list(refs), normalization=norm)
+                    if norm == "atom":
+                        e_coh["serial" if monty_decode else "noserial"] = _e_coh.copy()
+
+                    # Ensure energies match reference data
+                    assert all(v == pytest.approx(refs[k]) for k, v in _e_coh.items())
+
+        # Ensure energies are the same regardless of serialization
+        assert all(
+            v == pytest.approx(e_coh["noserial"][k]) for k, v in e_coh["serial"].items()
+        )