janosh
diff --git a/‎assets/debug.py
Lines changed: 17 additions & 0 deletions b/‎assets/debug.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎examples/dataset_exploration/matpes/eda.py
Lines changed: 2 additions & 1 deletion b/‎examples/dataset_exploration/matpes/eda.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/make_assets/ptable.py
Lines changed: 1 addition & 1 deletion b/‎examples/make_assets/ptable.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pymatviz/__init__.py
Lines changed: 1 addition & 3 deletions b/‎pymatviz/__init__.py
Lines changed: 1 addition & 3 deletions
diff --git a/‎pymatviz/_preprocess_data.py
Lines changed: 0 additions & 38 deletions b/‎pymatviz/_preprocess_data.py
Lines changed: 0 additions & 38 deletions
diff --git a/‎pymatviz/histograms.py
Lines changed: 2 additions & 2 deletions b/‎pymatviz/histograms.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎pymatviz/process_data.py
Lines changed: 128 additions & 0 deletions b/‎pymatviz/process_data.py
Lines changed: 128 additions & 0 deletions
diff --git a/‎pymatviz/ptable/__init__.py
Lines changed: 13 additions & 0 deletions b/‎pymatviz/ptable/__init__.py
Lines changed: 13 additions & 0 deletions
@@ -0,0 +1,17 @@
+from matminer.datasets import load_dataset
+
+from pymatviz.enums import Key
+from pymatviz.io import save_and_compress_svg
+from pymatviz.ptable import ptable_heatmap_ratio
+
+
+df_expt_gap = load_dataset("matbench_expt_gap")
+df_steels = load_dataset("matbench_steels")
+
+
+fig = ptable_heatmap_ratio(
+    df_expt_gap[Key.composition], df_steels[Key.composition], log=True, values_fmt=".4g"
+)
+title = "Element ratios in Matbench Experimental Band Gap vs Matbench Steel"
+fig.suptitle(title, y=0.96, fontsize=20, fontweight="bold")
+save_and_compress_svg(fig, "debug")
@@ -16,7 +16,8 @@
 from pymatviz.histograms import spacegroup_hist
 from pymatviz.io import save_fig
 from pymatviz.powerups import add_identity_line
-from pymatviz.ptable import count_elements, ptable_heatmap, ptable_heatmap_splits
+from pymatviz.process_data import count_elements
+from pymatviz.ptable import ptable_heatmap, ptable_heatmap_splits
 from pymatviz.sunburst import spacegroup_sunburst
 
 
 
@@ -7,8 +7,8 @@
 
 from pymatviz.enums import Key
 from pymatviz.io import save_and_compress_svg
+from pymatviz.process_data import count_elements
 from pymatviz.ptable import (
-    count_elements,
     ptable_heatmap,
     ptable_heatmap_plotly,
     ptable_heatmap_ratio,
 
@@ -28,10 +28,8 @@
     plot_phonon_bands_and_dos,
     plot_phonon_dos,
 )
+from pymatviz.process_data import count_elements
 from pymatviz.ptable import (
-    ChildPlotters,
-    PTableProjector,
-    count_elements,
     ptable_heatmap,
     ptable_heatmap_plotly,
     ptable_heatmap_ratio,
 
@@ -17,7 +17,7 @@
 
 from pymatviz.enums import ElemCountMode, Key
 from pymatviz.powerups import annotate_bars
-from pymatviz.ptable import count_elements
+from pymatviz.process_data import count_elements
 from pymatviz.utils import (
     BACKENDS,
     MATPLOTLIB,
@@ -29,7 +29,7 @@
 
 
 if TYPE_CHECKING:
-    from pymatviz.ptable import ElemValues
+    from pymatviz.utils import ElemValues
 
 
 def spacegroup_hist(
 
@@ -0,0 +1,128 @@
+"""Various periodic table heatmaps with matplotlib and plotly."""
+
+from __future__ import annotations
+
+import itertools
+from collections.abc import Sequence
+from typing import Union
+
+import pandas as pd
+from pandas.api.types import is_numeric_dtype, is_string_dtype
+from pymatgen.core import Composition
+
+from pymatviz.enums import ElemCountMode, Key
+from pymatviz.utils import df_ptable
+
+
+ElemValues = Union[dict[Union[str, int], float], pd.Series, Sequence[str]]
+
+
+def count_elements(
+    values: ElemValues,
+    count_mode: ElemCountMode = ElemCountMode.composition,
+    exclude_elements: Sequence[str] = (),
+    fill_value: float | None = 0,
+) -> pd.Series:
+    """Count element occurrence in list of formula strings or dict-like compositions.
+    If passed values are already a map from element symbol to counts, ensure the
+    data is a pd.Series filled with zero values for missing element symbols.
+
+    Provided as standalone function for external use or to cache long computations.
+    Caching long element counts is done by refactoring
+        ptable_heatmap(long_list_of_formulas) # slow
+    to
+        elem_counts = count_elements(long_list_of_formulas) # slow
+        ptable_heatmap(elem_counts) # fast, only rerun this line to update the plot
+
+    Args:
+        values (dict[str, int | float] | pd.Series | list[str]): Iterable of
+            composition strings/objects or map from element symbols to heatmap values.
+        count_mode ("(element|fractional|reduced)_composition"):
+            Only used when values is a list of composition strings/objects.
+            - composition (default): Count elements in each composition as is,
+                i.e. without reduction or normalization.
+            - fractional_composition: Convert to normalized compositions in which the
+                amounts of each species sum to before counting.
+                Example: Fe2 O3 -> Fe0.4 O0.6
+            - reduced_composition: Convert to reduced compositions (i.e. amounts
+                normalized by greatest common denominator) before counting.
+                Example: Fe4 P4 O16 -> Fe P O4.
+            - occurrence: Count the number of times each element occurs in a list of
+                formulas irrespective of compositions. E.g. [Fe2 O3, Fe O, Fe4 P4 O16]
+                counts to {Fe: 3, O: 3, P: 1}.
+        exclude_elements (Sequence[str]): Elements to exclude from the count. Defaults
+            to ().
+        fill_value (float | None): Value to fill in for missing elements. Defaults to 0.
+
+    Returns:
+        pd.Series: Map element symbols to heatmap values.
+    """
+    valid_count_modes = list(ElemCountMode.key_val_dict())
+    if count_mode not in valid_count_modes:
+        raise ValueError(f"Invalid {count_mode=} must be one of {valid_count_modes}")
+    # Ensure values is Series if we got dict/list/tuple
+    srs = pd.Series(values)
+
+    if is_numeric_dtype(srs):
+        pass
+    elif is_string_dtype(srs) or {*map(type, srs)} <= {str, Composition}:
+        # all items are formula strings or Composition objects
+        if count_mode == "occurrence":
+            srs = pd.Series(
+                itertools.chain.from_iterable(
+                    map(str, Composition(comp, allow_negative=True)) for comp in srs
+                )
+            ).value_counts()
+        else:
+            attr = (
+                "element_composition" if count_mode == Key.composition else count_mode
+            )
+            srs = pd.DataFrame(
+                getattr(Composition(formula, allow_negative=True), attr).as_dict()
+                for formula in srs
+            ).sum()  # sum up element occurrences
+    else:
+        raise ValueError(
+            "Expected values to be map from element symbols to heatmap values or "
+            f"list of compositions (strings or Pymatgen objects), got {values}"
+        )
+
+    try:
+        # If index consists entirely of strings representing integers, convert to ints
+        srs.index = srs.index.astype(int)
+    except (ValueError, TypeError):
+        pass
+
+    if pd.api.types.is_integer_dtype(srs.index):
+        # If index is all integers, assume they represent atomic
+        # numbers and map them to element symbols (H: 1, He: 2, ...)
+        idx_min, idx_max = srs.index.min(), srs.index.max()
+        if idx_max > 118 or idx_min < 1:
+            raise ValueError(
+                "element value keys were found to be integers and assumed to represent "
+                f"atomic numbers, but values range from {idx_min} to {idx_max}, "
+                "expected range [1, 118]."
+            )
+        map_atomic_num_to_elem_symbol = (
+            df_ptable.reset_index().set_index("atomic_number").symbol
+        )
+        srs.index = srs.index.map(map_atomic_num_to_elem_symbol)
+
+    # Ensure all elements are present in returned Series (with value zero if they
+    # weren't in values before)
+    srs = srs.reindex(df_ptable.index, fill_value=fill_value).rename("count")
+
+    if len(exclude_elements) > 0:
+        if isinstance(exclude_elements, str):
+            exclude_elements = [exclude_elements]
+        if isinstance(exclude_elements, tuple):
+            exclude_elements = list(exclude_elements)
+        try:
+            srs = srs.drop(exclude_elements)
+        except KeyError as exc:
+            bad_symbols = ", ".join(x for x in exclude_elements if x not in srs)
+            raise ValueError(
+                f"Unexpected symbol(s) {bad_symbols} in {exclude_elements=}"
+            ) from exc
+
+    return srs
@@ -0,0 +1,13 @@
+"""matplotlib and plotly periodic table figures."""
+
+from __future__ import annotations
+
+from pymatviz.ptable.matplotlib import (
+    ptable_heatmap,
+    ptable_heatmap_ratio,
+    ptable_heatmap_splits,
+    ptable_hists,
+    ptable_lines,
+    ptable_scatters,
+)
+from pymatviz.ptable.plotly import ptable_heatmap_plotly