make archival take an optional output path (#2510)

DeNeutoy · web-flow · commit 6d8da97312bf · 2019-02-21T15:18:18.000-08:00
diff --git a/allennlp/models/archival.py b/allennlp/models/archival.py
@@ -88,7 +88,8 @@ def extract_module(self, path: str, freeze: bool = True) -> Module:
 
 def archive_model(serialization_dir: str,
                   weights: str = _DEFAULT_WEIGHTS,
-                  files_to_archive: Dict[str, str] = None) -> None:
+                  files_to_archive: Dict[str, str] = None,
+                  archive_path: str = None) -> None:
     """
     Archive the model weights, its training configuration, and its
     vocabulary to `model.tar.gz`. Include the additional ``files_to_archive``
@@ -104,6 +105,10 @@ def archive_model(serialization_dir: str,
         A mapping {flattened_key -> filename} of supplementary files to include
         in the archive. That is, if you wanted to include ``params['model']['weights']``
         then you would specify the key as `"model.weights"`.
+    archive_path : ``str``, optional, (default = None)
+        A full path to serialize the model to. The default is "model.tar.gz" inside the
+        serialization_dir. If you pass a directory here, we'll serialize the model
+        to "model.tar.gz" inside the directory.
     """
     weights_file = os.path.join(serialization_dir, weights)
     if not os.path.exists(weights_file):
@@ -121,8 +126,12 @@ def archive_model(serialization_dir: str,
         with open(fta_filename, 'w') as fta_file:
             fta_file.write(json.dumps(files_to_archive))
 
-
-    archive_file = os.path.join(serialization_dir, "model.tar.gz")
+    if archive_path is not None:
+        archive_file = archive_path
+        if os.path.isdir(archive_file):
+            archive_file = os.path.join(archive_file, "model.tar.gz")
+    else:
+        archive_file = os.path.join(serialization_dir, "model.tar.gz")
     logger.info("archiving weights and vocabulary to %s", archive_file)
     with tarfile.open(archive_file, 'w:gz') as archive:
         archive.add(config_file, arcname=CONFIG_NAME)
diff --git a/allennlp/tests/models/archival_test.py b/allennlp/tests/models/archival_test.py
@@ -76,6 +76,17 @@ def test_archiving(self):
         params2 = archive.config
         assert params2.as_dict() == params_copy
 
+    def test_archive_model_uses_archive_path(self):
+
+        serialization_dir = self.TEST_DIR / 'serialization'
+        # Train a model
+        train_model(self.params, serialization_dir=serialization_dir)
+        # Use a new path.
+        archive_model(serialization_dir=serialization_dir,
+                      archive_path=serialization_dir / "new_path.tar.gz")
+        archive = load_archive(serialization_dir / 'new_path.tar.gz')
+        assert archive
+
     def test_extra_files(self):
 
         serialization_dir = self.TEST_DIR / 'serialization'