Fix --lb-mode=Memory (#275)

juanjosegarcan · web-flow · commit d29a6cfe8a56 · 2025-05-20T16:39:13.000+02:00
## Context lb-mode Memory is failing when no cell_memory_usage.json or memory_per_metype.json commoning from dry_run are present. Fix #243 ## Scope Add dry_run memory files exists check and throw an error if not informing the user he must dry_run first Add test for this situation ## Testing Extent tests/unit/test_dry_run.py ## Review * [ ] PR description is complete * [ ] Coding style (imports, function length, New functions, classes or files) are good * [ ] Unit/Scientific test added * [ ] Updated Readme, in-code, developer documentation
diff --git a/neurodamus/node.py b/neurodamus/node.py
@@ -456,6 +456,17 @@ def compute_load_balance(self):
             if file_exists:
                 alloc = self._dry_run_stats.import_allocation_stats(filename, self._cycle_i)
             else:
+                if not Path(DryRunStats._MEMORY_USAGE_FILENAME).exists():
+                    raise FileNotFoundError(
+                        f"No such file {DryRunStats._MEMORY_USAGE_FILENAME}. "
+                        "Neurodamus must be run with --dry-run mode before proceeding."
+                    )
+                if not Path(DryRunStats._MEMORY_USAGE_PER_METYPE_FILENAME).exists():
+                    raise FileNotFoundError(
+                        f"No such file {DryRunStats._MEMORY_USAGE_PER_METYPE_FILENAME}. "
+                        "Neurodamus must be run with --dry-run mode before proceeding."
+                    )
+
                 logging.warning("Allocation file not found. Generating on-the-fly.")
                 self._dry_run_stats.try_import_cell_memory_usage()
                 cell_distributor = CellDistributor(circuit, self._target_manager, self._run_conf)
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,7 +1,8 @@
 import json
 import pytest
-from pathlib import Path
 import platform
+from pathlib import Path
+
 from neurodamus.core._utils import run_only_rank0
 
 try:
@@ -89,6 +90,20 @@ def change_test_dir(monkeypatch, tmp_path):
     monkeypatch.chdir(tmp_path)
 
 
+@pytest.fixture()
+def copy_memory_files(change_test_dir):
+    # Fix values to ensure allocation memory (0,0)[1, 3] (1,0)[2]
+    metypes_memory = {
+        "MTYPE0-ETYPE0": 100.0,
+        "MTYPE1-ETYPE1": 200.0,
+        "MTYPE2-ETYPE2": 1000.0,
+    }
+    with Path("memory_per_metype.json").open("w") as f:
+        json.dump(metypes_memory, f, indent=4)
+    with Path("cell_memory_usage.json").open("w") as f:
+        json.dump(metypes_memory, f, indent=4)
+
+
 @run_only_rank0
 def _create_simulation_config_file(params, dst_dir, sim_config_data=None) -> str:
     """create simulation config file in dst_dir from
diff --git a/tests/unit-mpi/test_dry_run.py b/tests/unit-mpi/test_dry_run.py
@@ -1,6 +1,5 @@
 import pytest
 import tempfile
-from pathlib import Path
 from mpi4py import MPI
 
 
@@ -13,7 +12,7 @@
 size = comm.Get_size()
 
 
-@pytest.fixture(scope="session")
+@pytest.fixture(scope="function")
 def tmp_folder():
     if rank == 0:
         path = tempfile.mkdtemp()
@@ -109,17 +108,15 @@ def test_dry_run_distribute_cells(create_tmp_simulation_config_file, mpi_ranks):
         }
     assert rank_allocation_standard == expected_allocation
 
-    Path(("allocation_r1_c1.pkl.gz")).unlink(missing_ok=True)
-    Path(("allocation_r2_c1.pkl.gz")).unlink(missing_ok=True)
-
 
 @pytest.mark.parametrize("create_tmp_simulation_config_file", [
     {
         "simconfig_fixture": "ringtest_baseconfig",
     },
 ], indirect=True)
 @pytest.mark.mpi(ranks=2)
-def test_dry_run_dynamic_distribute(create_tmp_simulation_config_file, mpi_ranks):
+def test_dry_run_dynamic_distribute(create_tmp_simulation_config_file, mpi_ranks,
+                                    copy_memory_files):
     nd = Neurodamus(create_tmp_simulation_config_file, dry_run=False, lb_mode="Memory",
                      num_target_ranks=2)
     nd.run()
@@ -132,6 +129,7 @@ def test_dry_run_dynamic_distribute(create_tmp_simulation_config_file, mpi_ranks
     # RingA neuron 1 always in rank 0, neuron 2 always in rank 1
     # but neuron 3 can be in  either of the two
     if rank == 0:
-        assert is_subset(rank_allocation_standard['RingA'][(0, 0)], [1, 3])
+        expected_allocation = {'RingA': {(0, 0): [1, 3]}}
     elif rank == 1:
-        assert is_subset(rank_allocation_standard['RingA'][(1, 0)], [2, 3])
+        expected_allocation = {'RingA': {(1, 0): [2]}}
+    assert rank_allocation_standard == expected_allocation
diff --git a/tests/unit/test_dry_run.py b/tests/unit/test_dry_run.py
@@ -2,29 +2,21 @@
 import numpy as np
 import numpy.testing as npt
 import unittest.mock
-import tempfile
 from pathlib import Path
 
 from tests.utils import defaultdict_to_standard_types
-from ..conftest import RINGTEST_DIR, NGV_DIR, PLATFORM_SYSTEM
+from ..conftest import NGV_DIR, PLATFORM_SYSTEM
 from neurodamus import Neurodamus
 
-TMP_FOLDER = tempfile.mkdtemp()
-
-
-@pytest.fixture(autouse=True)
-def change_test_dir(monkeypatch):
-    """
-    All tests in this file are using the same working directory, i.e TMP_FOLDER
-    Because test_dynamic_distribute requires memory_per_metype.json generated in the previous test
-    """
-    monkeypatch.chdir(TMP_FOLDER)
-
 
+@pytest.mark.parametrize("create_tmp_simulation_config_file", [
+    {
+        "simconfig_fixture": "ringtest_baseconfig",
+    },
+], indirect=True)
 @pytest.mark.forked
-def test_dry_run_memory_use():
-    nd = Neurodamus(str(RINGTEST_DIR / "simulation_config.json"),  dry_run=True, num_target_ranks=2)
-
+def test_dry_run_memory_use(create_tmp_simulation_config_file):
+    nd = Neurodamus(create_tmp_simulation_config_file,  dry_run=True, num_target_ranks=2)
     nd.run()
 
     isMacOS = PLATFORM_SYSTEM == "Darwin"
@@ -40,9 +32,14 @@ def test_dry_run_memory_use():
     assert nd._dry_run_stats.suggested_nodes > 0
 
 
+@pytest.mark.parametrize("create_tmp_simulation_config_file", [
+    {
+        "simconfig_fixture": "ringtest_baseconfig",
+    },
+], indirect=True)
 @pytest.mark.forked
-def test_dry_run_distribute_cells():
-    nd = Neurodamus(str(RINGTEST_DIR / "simulation_config.json"),  dry_run=True, num_target_ranks=2)
+def test_dry_run_distribute_cells(create_tmp_simulation_config_file):
+    nd = Neurodamus(create_tmp_simulation_config_file,  dry_run=True, num_target_ranks=2)
     nd.run()
 
     # Test allocation
@@ -89,31 +86,51 @@ def test_dry_run_distribute_cells():
     }
     assert rank_allocation_standard == expected_allocation
 
-    Path(("allocation_r1_c1.pkl.gz")).unlink(missing_ok=True)
-    Path(("allocation_r2_c1.pkl.gz")).unlink(missing_ok=True)
-
 
 @pytest.mark.parametrize("create_tmp_simulation_config_file", [
     {
         "simconfig_fixture": "ringtest_baseconfig",
     },
 ], indirect=True)
 @pytest.mark.forked
-def test_dry_run_dynamic_distribute(create_tmp_simulation_config_file):
+def test_dry_run_lb_mode_memory(create_tmp_simulation_config_file, copy_memory_files):
     nd = Neurodamus(create_tmp_simulation_config_file, dry_run=False, lb_mode="Memory",
                      num_target_ranks=1)
 
     rank_alloc, _, _ = nd._dry_run_stats.distribute_cells_with_validation(2, 1)
     rank_allocation_standard = defaultdict_to_standard_types(rank_alloc)
     expected_allocation = {
         'RingA': {
-            (0, 0): [1],
-            (1, 0): [2, 3]
+            (0, 0): [1, 3],
+            (1, 0): [2]
         }
     }
     assert rank_allocation_standard == expected_allocation
 
 
+@pytest.mark.parametrize("create_tmp_simulation_config_file", [
+    {
+        "simconfig_fixture": "ringtest_baseconfig",
+    },
+], indirect=True)
+@pytest.mark.forked
+def test_dry_run_lb_mode_memory_fail(create_tmp_simulation_config_file):
+    with pytest.raises(FileNotFoundError,
+                       match="No such file cell_memory_usage.json. "
+                       "Neurodamus must be run with --dry-run mode before proceeding."):
+        Neurodamus(create_tmp_simulation_config_file, dry_run=False, lb_mode="Memory",
+                     num_target_ranks=1)
+
+    with Path("cell_memory_usage.json").open('w'):
+        pass
+
+    with pytest.raises(FileNotFoundError,
+                       match="No such file memory_per_metype.json. "
+                       "Neurodamus must be run with --dry-run mode before proceeding."):
+        Neurodamus(create_tmp_simulation_config_file, dry_run=False, lb_mode="Memory",
+                     num_target_ranks=1)
+
+
 @pytest.mark.forked
 def test_dry_run_ngv_fail():
     with pytest.raises(Exception, match="Dry run not available for ngv circuit"):