merge all /api/* routes into single page /api

janosh · janosh · commit 2908fdf5255e · 2023-06-19T20:29:22.000-07:00
start adding comments to module globals
diff --git a/.gitignore b/.gitignore
@@ -29,5 +29,4 @@ models/voronoi/*.zip
 site/static/figures
 
 # generated docs
-site/src/routes/api/.*
 site/src/routes/api/*.md
diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py
@@ -4,9 +4,12 @@
 import sys
 from datetime import datetime
 
-ROOT = os.path.dirname(os.path.dirname(__file__))
+ROOT = os.path.dirname(os.path.dirname(__file__))  # repository root
+# whether a currently running slurm job is in debug mode
 DEBUG = "slurm-submit" not in sys.argv and "SLURM_JOB_ID" not in os.environ
+# directory to store model checkpoints downloaded from wandb cloud storage
 CHECKPOINT_DIR = f"{ROOT}/wandb/checkpoints"
+# wandb <entity>/<project name> to record new runs to
 WANDB_PATH = "materialsproject/matbench-discovery"
 
 timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py
@@ -2,7 +2,7 @@
 
 import os
 import urllib.error
-from collections.abc import Generator, Sequence
+from collections.abc import Sequence
 from glob import glob
 from pathlib import Path
 from typing import Any, Callable
@@ -17,7 +17,9 @@
 df_wbm = pd.read_csv(f"{ROOT}/data/wbm/2022-10-19-wbm-summary.csv")
 df_wbm.index = df_wbm.material_id
 
+# repo URL to raw files on GitHub
 RAW_REPO_URL = "https://raw.githubusercontent.com/janosh/matbench-discovery"
+# directory to cache downloaded data files
 default_cache_dir = os.path.expanduser("~/.cache/matbench-discovery")
 
 DATA_FILENAMES = {
@@ -31,10 +33,6 @@
 }
 
 
-def chunks(xs: Sequence[Any], n: int) -> Generator[Sequence[Any], None, None]:
-    return (xs[i : i + n] for i in range(0, len(xs), n))
-
-
 def as_dict_handler(obj: Any) -> dict[str, Any] | None:
     """Pass this to json.dump(default=) or as pandas.to_json(default_handler=) to
     convert Python classes with a as_dict() method to dictionaries on serialization.
@@ -50,7 +48,7 @@ def as_dict_handler(obj: Any) -> dict[str, Any] | None:
 def load_train_test(
     data_names: str | Sequence[str] = ("summary",),
     version: str = "1.0.0",
-    cache_dir: str | Path | None = default_cache_dir,
+    cache_dir: str | Path = default_cache_dir,
     hydrate: bool = False,
     **kwargs: Any,
 ) -> pd.DataFrame:
diff --git a/site/src/app.css b/site/src/app.css
@@ -1,5 +1,5 @@
 :root {
-  --night: #102030;
+  --night: #061e25;
   --blue: cornflowerblue;
   --text-color: rgb(208, 208, 208);
 
diff --git a/site/src/routes/+layout.svelte b/site/src/routes/+layout.svelte
@@ -19,13 +19,13 @@
   const routes = Object.keys(import.meta.glob(`./*/+page.{svx,svelte,md}`)).map(
     (filename) => `/` + filename.split(`/`)[1]
   )
+
+  $: headingSelector = `main > :is(${
+    $page.url.pathname === `/api` ? `h1, ` : ``
+  }h2, h3, h4):not(.toc-exclude)`
 </script>
 
-<Toc
-  headingSelector="main > :is(h2, h3, h4):not(.toc-exclude)"
-  breakpoint={1250}
-  warnOnEmpty={false}
-/>
+<Toc {headingSelector} breakpoint={1250} warnOnEmpty={false} />
 
 {#if $page.url.pathname !== `/`}
   <a href="/" aria-label="Back to index page">&laquo; home</a>
diff --git a/site/src/routes/api/+page.svelte b/site/src/routes/api/+page.svelte
@@ -1 +1,28 @@
-<!-- needed for /api page to show up -->
+<script lang="ts">
+  import { onMount } from 'svelte'
+
+  onMount(() => {
+    for (const img of [
+      ...document.querySelectorAll(
+        `img[src='https://img.shields.io/badge/-source-cccccc?style=flat-square']`
+      ),
+    ] as HTMLAnchorElement[]) {
+      img.src = `https://img.shields.io/badge/source-blue?style=flat`
+    }
+  })
+</script>
+
+{#each Object.values(import.meta.glob(`./*.md`, { eager: true })) as file}
+  <svelte:component this={file?.default} />
+{/each}
+
+<style>
+  :global(hr) {
+    border: none;
+    margin: 3em;
+  }
+  :global(code) {
+    line-height: 1em;
+    border-radius: 4pt;
+  }
+</style>
diff --git a/tests/test_data.py b/tests/test_data.py
@@ -3,6 +3,7 @@
 import os
 import urllib.request
 from pathlib import Path
+from random import random
 from tempfile import TemporaryDirectory
 from typing import Any
 from unittest.mock import patch
@@ -18,7 +19,6 @@
     PRED_FILENAMES,
     RAW_REPO_URL,
     as_dict_handler,
-    chunks,
     df_wbm,
     glob_to_df,
     load_df_wbm_with_preds,
@@ -38,29 +38,34 @@
 
 
 @pytest.mark.parametrize(
-    "data_names, cache_dir, hydrate",
+    "data_names, hydrate",
     [
-        (["wbm-summary"], None, True),
-        (["wbm-initial-structures"], TemporaryDirectory().name, True),
-        (["wbm-computed-structure-entries"], None, False),
-        (["wbm-summary", "wbm-initial-structures"], TemporaryDirectory().name, True),
-        (["mp-elemental-ref-energies"], None, True),
-        (["mp-energies"], None, True),
+        (["wbm-summary"], True),
+        (["wbm-initial-structures"], True),
+        (["wbm-computed-structure-entries"], False),
+        (["wbm-summary", "wbm-initial-structures"], True),
+        (["mp-elemental-ref-energies"], True),
+        (["mp-energies"], True),
     ],
 )
 def test_load_train_test(
     data_names: list[str],
-    cache_dir: str | None,
     hydrate: bool,
     dummy_df_with_structures: pd.DataFrame,
     capsys: CaptureFixture[str],
+    tmp_path: Path,
 ) -> None:
     # intercept HTTP requests to GitHub raw user content and return dummy df instead
     with patch("matbench_discovery.data.pd.read_csv") as read_csv, patch(
         "matbench_discovery.data.pd.read_json"
     ) as read_json:
         read_csv.return_value = read_json.return_value = dummy_df_with_structures
-        out = load_train_test(data_names, cache_dir=cache_dir, hydrate=hydrate)
+        out = load_train_test(
+            data_names,
+            hydrate=hydrate,
+            # test both str and Path cache_dir
+            cache_dir=TemporaryDirectory().name if random() < 0.5 else tmp_path,
+        )
 
     stdout, stderr = capsys.readouterr()
 
@@ -152,17 +157,6 @@ def test_load_train_test_no_mock(
     )
 
 
-def test_chunks() -> None:
-    assert list(chunks([], 1)) == []
-    assert list(chunks([1], 1)) == [[1]]
-    assert list(chunks([1, 2], 1)) == [[1], [2]]
-    assert list(chunks([1, 2, 3], 1)) == [[1], [2], [3]]
-    assert list(chunks([1, 2, 3], 2)) == [[1, 2], [3]]
-    assert list(chunks(range(1, 4), 2)) == [range(1, 3), range(3, 4)]
-    assert list(chunks(range(1, 5), 2)) == [range(1, 3), range(3, 5)]
-    assert list(chunks(range(1, 5), 3)) == [range(1, 4), range(4, 5)]
-
-
 def test_as_dict_handler() -> None:
     class C:
         def as_dict(self) -> dict[str, Any]: