add infer_and_cast (#2324)

joelgrus · web-flow · commit 71ebcd8416b0 · 2019-01-10T11:57:37.000-08:00
* add infer_and_cast

* remove print statement + add comment

* address PR feedback

* pylint
diff --git a/allennlp/common/params.py b/allennlp/common/params.py
@@ -31,6 +31,46 @@ def evaluate_snippet(_filename: str, expr: str, **_kwargs) -> str:
 
 logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
 
+# pylint: disable=inconsistent-return-statements
+def infer_and_cast(value: Any):
+    """
+    In some cases we'll be feeding params dicts to functions we don't own;
+    for example, PyTorch optimizers. In that case we can't use ``pop_int``
+    or similar to force casts (which means you can't specify ``int`` parameters
+    using environment variables). This function takes something that looks JSON-like
+    and recursively casts things that look like (bool, int, float) to (bool, int, float).
+    """
+    # pylint: disable=too-many-return-statements
+    if isinstance(value, (int, float, bool)):
+        # Already one of our desired types, so leave as is.
+        return value
+    elif isinstance(value, list):
+        # Recursively call on each list element.
+        return [infer_and_cast(item) for item in value]
+    elif isinstance(value, dict):
+        # Recursively call on each dict value.
+        return {key: infer_and_cast(item) for key, item in value.items()}
+    elif isinstance(value, str):
+        # If it looks like a bool, make it a bool.
+        if value.lower() == "true":
+            return True
+        elif value.lower() == "false":
+            return False
+        else:
+            # See if it could be an int.
+            try:
+                return int(value)
+            except ValueError:
+                pass
+            # See if it could be a float.
+            try:
+                return float(value)
+            except ValueError:
+                # Just return it as a string.
+                return value
+    else:
+        raise ValueError(f"cannot infer type of {value}")
+# pylint: enable=inconsistent-return-statements
 
 def unflatten(flat_dict: Dict[str, Any]) -> Dict[str, Any]:
     """
@@ -259,18 +299,23 @@ def pop_choice(self, key: str, choices: List[Any], default_to_first_choice: bool
             raise ConfigurationError(message)
         return value
 
-    def as_dict(self, quiet=False):
+    def as_dict(self, quiet: bool = False, infer_type_and_cast: bool = False):
         """
         Sometimes we need to just represent the parameters as a dict, for instance when we pass
-        them to a Keras layer(so that they can be serialised).
+        them to PyTorch code.
 
         Parameters
         ----------
         quiet: bool, optional (default = False)
             Whether to log the parameters before returning them as a dict.
         """
+        if infer_type_and_cast:
+            params_as_dict = infer_and_cast(self.params)
+        else:
+            params_as_dict = self.params
+
         if quiet:
-            return self.params
+            return params_as_dict
 
         def log_recursively(parameters, history):
             for key, value in parameters.items():
@@ -285,7 +330,7 @@ def log_recursively(parameters, history):
                     "used subsequently.")
         logger.info("CURRENTLY DEFINED PARAMETERS: ")
         log_recursively(self.params, self.history)
-        return self.params
+        return params_as_dict
 
     def as_flat_dict(self):
         """
diff --git a/allennlp/tests/common/params_test.py b/allennlp/tests/common/params_test.py
@@ -7,7 +7,7 @@
 
 import pytest
 
-from allennlp.common.params import Params, unflatten, with_fallback, parse_overrides
+from allennlp.common.params import Params, unflatten, with_fallback, parse_overrides, infer_and_cast
 from allennlp.common.testing import AllenNlpTestCase
 
 
@@ -314,3 +314,27 @@ def test_to_file(self):
         assert json.dumps(expected_ordered_params_dict) == json.dumps(ordered_params_dict)
         # check without preference orders doesn't give error
         params.to_file(file_path)
+
+    def test_infer_and_cast(self):
+        lots_of_strings = {
+                "a": ["10", "1.3", "true"],
+                "b": {"x": 10, "y": "20.1", "z": "other things"},
+                "c": "just a string"
+        }
+
+        casted = {
+                "a": [10, 1.3, True],
+                "b": {"x": 10, "y": 20.1, "z": "other things"},
+                "c": "just a string"
+        }
+
+        assert infer_and_cast(lots_of_strings) == casted
+
+        contains_bad_data = {"x": 10, "y": int}
+        with pytest.raises(ValueError, match="cannot infer type"):
+            infer_and_cast(contains_bad_data)
+
+        params = Params(lots_of_strings)
+
+        assert params.as_dict() == lots_of_strings
+        assert params.as_dict(infer_type_and_cast=True) == casted
diff --git a/allennlp/tests/training/optimizer_test.py b/allennlp/tests/training/optimizer_test.py
@@ -1,4 +1,6 @@
 # pylint: disable=invalid-name
+import pytest
+
 from allennlp.common.testing import AllenNlpTestCase
 from allennlp.data import Vocabulary
 from allennlp.common.params import Params
@@ -75,6 +77,31 @@ def test_optimizer_parameter_groups(self):
         assert len(param_groups[2]['params']) == 3
 
 
+    def test_parameter_type_inference(self):
+        # Should work ok even with lr as a string
+        optimizer_params = Params({
+                "type": "sgd",
+                "lr": "0.1"
+        })
+
+        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
+        optimizer = Optimizer.from_params(parameters, optimizer_params)
+
+        assert optimizer.defaults["lr"] == 0.1
+
+        # But should crash (in the Pytorch code) if we don't do the type inference
+        optimizer_params = Params({
+                "type": "sgd",
+                "lr": "0.1",
+                "infer_type_and_cast": False
+        })
+
+        parameters = [[n, p] for n, p in self.model.named_parameters() if p.requires_grad]
+
+        with pytest.raises(TypeError):
+            optimizer = Optimizer.from_params(parameters, optimizer_params)
+
+
 class TestDenseSparseAdam(AllenNlpTestCase):
 
     def setUp(self):
diff --git a/allennlp/training/optimizers.py b/allennlp/training/optimizers.py
@@ -121,7 +121,14 @@ def from_params(cls, model_parameters: List, params: Params):  # type: ignore
             else:
                 num_parameters += parameter_group.numel()
         logger.info("Number of trainable parameters: %s", num_parameters)
-        return Optimizer.by_name(optimizer)(parameter_groups, **params.as_dict()) # type: ignore
+
+        # By default we cast things that e.g. look like floats to floats before handing them
+        # to the Optimizer constructor, but if you want to disable that behavior you could add a
+        #       "infer_type_and_cast": false
+        # key to your "trainer.optimizer" config.
+        infer_type_and_cast = params.pop_bool("infer_type_and_cast", True)
+        params_as_dict = params.as_dict(infer_type_and_cast=infer_type_and_cast)
+        return Optimizer.by_name(optimizer)(parameter_groups, **params_as_dict) # type: ignore
 
 # We just use the Pytorch optimizers, so here we force them into
 # Registry._registry so we can build them from params.