allenai
diff --git a/‎CHANGELOG.md
+10 b/‎CHANGELOG.md
+10
diff --git a/‎Makefile
+1-5 b/‎Makefile
+1-5
diff --git a/‎allennlp/__main__.py
+1-1 b/‎allennlp/__main__.py
+1-1
diff --git a/‎allennlp/commands/predict.py
+2-8 b/‎allennlp/commands/predict.py
+2-8
diff --git a/‎allennlp/commands/train.py
+14-16 b/‎allennlp/commands/train.py
+14-16
diff --git a/‎allennlp/common/file_utils.py
+1-1 b/‎allennlp/common/file_utils.py
+1-1
diff --git a/‎allennlp/common/from_params.py
+8-7 b/‎allennlp/common/from_params.py
+8-7
diff --git a/‎allennlp/common/lazy.py
+32-20 b/‎allennlp/common/lazy.py
+32-20
diff --git a/‎allennlp/common/logging.py
+1-1 b/‎allennlp/common/logging.py
+1-1
diff --git a/‎allennlp/common/params.py
+4-4 b/‎allennlp/common/params.py
+4-4
diff --git a/‎allennlp/common/registrable.py
+3-3 b/‎allennlp/common/registrable.py
+3-3
diff --git a/‎allennlp/common/testing/distributed_test.py
+7-6 b/‎allennlp/common/testing/distributed_test.py
+7-6
@@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Changed
+
+- Enforced stricter typing requirements around the use of `Optional[T]` types.
+- Changed the behavior of `Lazy` types in `from_params` methods. Previously, if you defined a `Lazy` parameter like
+  `foo: Lazy[Foo] = None` in a custom `from_params` classmethod, then `foo` would actually never be `None`.
+  This behavior is now different. If no params were given for `foo`, it will be `None`.
+  You can also now set default values for foo like `foo: Lazy[Foo] = Lazy(Foo)`.
+  Or, if you want you want a default value but also want to allow for `None` values, you can
+  write it like this: `foo: Optional[Lazy[Foo]] = Lazy(Foo)`.
+
 ### Fixed
 
 - Made it possible to instantiate `TrainerCallback` from config files.
 
@@ -50,11 +50,7 @@ format :
 
 .PHONY : typecheck
 typecheck :
-	mypy . \
-		--ignore-missing-imports \
-		--no-strict-optional \
-		--no-site-packages \
-		--cache-dir=/dev/null
+	mypy . --cache-dir=/dev/null
 
 .PHONY : test
 test :
 
@@ -6,7 +6,7 @@
 if os.environ.get("ALLENNLP_DEBUG"):
     LEVEL = logging.DEBUG
 else:
-    level_name = os.environ.get("ALLENNLP_LOG_LEVEL")
+    level_name = os.environ.get("ALLENNLP_LOG_LEVEL", "INFO")
     LEVEL = logging._nameToLevel.get(level_name, logging.INFO)
 
 sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
 
@@ -128,16 +128,10 @@ def __init__(
 
         self._predictor = predictor
         self._input_file = input_file
-        if output_file is not None:
-            self._output_file = open(output_file, "w")
-        else:
-            self._output_file = None
+        self._output_file = None if output_file is None else open(output_file, "w")
         self._batch_size = batch_size
         self._print_to_console = print_to_console
-        if has_dataset_reader:
-            self._dataset_reader = predictor._dataset_reader
-        else:
-            self._dataset_reader = None
+        self._dataset_reader = None if not has_dataset_reader else predictor._dataset_reader
 
     def _predict_json(self, batch_data: List[JsonDict]) -> Iterator[str]:
         if len(batch_data) == 1:
 
@@ -401,6 +401,8 @@ def _train_worker(
     include_package = include_package or []
 
     if distributed:
+        assert distributed_device_ids is not None
+
         # Since the worker is spawned and not forked, the extra imports need to be done again.
         # Both the ones from the plugins and the ones from `include_package`.
         import_plugins()
@@ -556,7 +558,7 @@ def from_partial_objects(
         model: Lazy[Model],
         data_loader: Lazy[DataLoader],
         trainer: Lazy[Trainer],
-        vocabulary: Lazy[Vocabulary] = None,
+        vocabulary: Lazy[Vocabulary] = Lazy(Vocabulary),
         datasets_for_vocab_creation: List[str] = None,
         validation_dataset_reader: DatasetReader = None,
         validation_data_path: str = None,
@@ -610,7 +612,7 @@ def from_partial_objects(
         trainer: `Lazy[Trainer]`
             The `Trainer` that actually implements the training loop.  This is a lazy object because
             it depends on the model that's going to be trained.
-        vocabulary: `Lazy[Vocabulary]`, optional (default=`None`)
+        vocabulary: `Lazy[Vocabulary]`, optional (default=`Lazy(Vocabulary)`)
             The `Vocabulary` that we will use to convert strings in the data to integer ids (and
             possibly set sizes of embedding matrices in the `Model`).  By default we construct the
             vocabulary from the instances that we read.
@@ -664,8 +666,7 @@ def from_partial_objects(
         )
 
         vocabulary_ = vocabulary.construct(instances=instance_generator)
-        if not vocabulary_:
-            vocabulary_ = Vocabulary.from_instances(instance_generator)
+
         model_ = model.construct(vocab=vocabulary_, serialization_dir=serialization_dir)
 
         # Initializing the model can have side effect of expanding the vocabulary.
@@ -682,13 +683,9 @@ def from_partial_objects(
 
         data_loader_ = data_loader.construct(dataset=datasets["train"])
         validation_data = datasets.get("validation")
+        validation_data_loader_: Optional[DataLoader] = None
         if validation_data is not None:
-            # Because of the way Lazy[T] works, we can't check it's existence
-            # _before_ we've tried to construct it. It returns None if it is not
-            # present, so we try to construct it first, and then afterward back off
-            # to the data_loader configuration used for training if it returns None.
-            validation_data_loader_ = validation_data_loader.construct(dataset=validation_data)
-            if validation_data_loader_ is None:
+            if validation_data_loader is None:
                 validation_data_loader_ = data_loader.construct(dataset=validation_data)
                 if getattr(validation_data_loader_, "_batches_per_epoch", None) is not None:
                     warnings.warn(
@@ -698,16 +695,16 @@ def from_partial_objects(
                         "validation datasets for each epoch.",
                         UserWarning,
                     )
-        else:
-            validation_data_loader_ = None
+            else:
+                validation_data_loader_ = validation_data_loader.construct(dataset=validation_data)
 
         test_data = datasets.get("test")
+        test_data_loader: Optional[DataLoader] = None
         if test_data is not None:
-            test_data_loader = validation_data_loader.construct(dataset=test_data)
-            if test_data_loader is None:
+            if validation_data_loader is None:
                 test_data_loader = data_loader.construct(dataset=test_data)
-        else:
-            test_data_loader = None
+            else:
+                test_data_loader = validation_data_loader.construct(dataset=test_data)
 
         # We don't need to pass serialization_dir and local_rank here, because they will have been
         # passed through the trainer by from_params already, because they were keyword arguments to
@@ -717,6 +714,7 @@ def from_partial_objects(
             data_loader=data_loader_,
             validation_data_loader=validation_data_loader_,
         )
+        assert trainer_ is not None
 
         return cls(
             serialization_dir=serialization_dir,
 
@@ -445,7 +445,7 @@ class method.
     The unix timestamp of when the corresponding resource was cached or extracted.
     """
 
-    size: int = None
+    size: int = 0
     """
     The size of the corresponding resource, in bytes.
     """
 
@@ -112,7 +112,9 @@ def remove_optional(annotation: type):
         return annotation
 
 
-def infer_params(cls: Type[T], constructor: Callable[..., T] = None) -> Dict[str, Any]:
+def infer_params(
+    cls: Type[T], constructor: Union[Callable[..., T], Callable[[T], None]] = None
+) -> Dict[str, Any]:
     if constructor is None:
         constructor = cls.__init__
 
@@ -298,9 +300,6 @@ def pop_and_construct_arg(
 
     popped_params = params.pop(name, default) if default != _NO_DEFAULT else params.pop(name)
     if popped_params is None:
-        origin = getattr(annotation, "__origin__", None)
-        if origin == Lazy:
-            return Lazy(lambda **kwargs: None)
         return None
 
     return construct_arg(class_name, name, popped_params, annotation, default, **extras)
@@ -450,7 +449,8 @@ def construct_arg(
         )
     elif origin == Lazy:
         if popped_params is default:
-            return Lazy(lambda **kwargs: default)
+            return default
+
         value_cls = args[0]
         subextras = create_extras(value_cls, extras)
 
@@ -509,7 +509,7 @@ def from_params(
         cls: Type[T],
         params: Params,
         constructor_to_call: Callable[..., T] = None,
-        constructor_to_inspect: Callable[..., T] = None,
+        constructor_to_inspect: Union[Callable[..., T], Callable[[T], None]] = None,
         **extras,
     ) -> T:
         """
@@ -584,7 +584,7 @@ def from_params(
                 constructor_to_inspect = subclass.__init__
                 constructor_to_call = subclass  # type: ignore
             else:
-                constructor_to_inspect = getattr(subclass, constructor_name)
+                constructor_to_inspect = cast(Callable[..., T], getattr(subclass, constructor_name))
                 constructor_to_call = constructor_to_inspect
 
             if hasattr(subclass, "from_params"):
@@ -623,6 +623,7 @@ def from_params(
                 params.assert_empty(cls.__name__)
             else:
                 # This class has a constructor, so create kwargs for it.
+                constructor_to_inspect = cast(Callable[..., T], constructor_to_inspect)
                 kwargs = create_kwargs(constructor_to_inspect, cls, params, **extras)
 
             return constructor_to_call(**kwargs)  # type: ignore
@@ -1,14 +1,19 @@
-from typing import Callable, Generic, TypeVar, Optional
+import inspect
+from typing import Callable, Generic, TypeVar, Type, Union
+
+from allennlp.common.params import Params
+
 
 T = TypeVar("T")
 
 
 class Lazy(Generic[T]):
     """
     This class is for use when constructing objects using `FromParams`, when an argument to a
-    constructor has a _sequential dependency_ with another argument to the same constructor.  For
-    example, in a `Trainer` class you might want to take a `Model` and an `Optimizer` as arguments,
-    but the `Optimizer` needs to be constructed using the parameters from the `Model`.  You can give
+    constructor has a _sequential dependency_ with another argument to the same constructor.
+
+    For example, in a `Trainer` class you might want to take a `Model` and an `Optimizer` as arguments,
+    but the `Optimizer` needs to be constructed using the parameters from the `Model`. You can give
     the type annotation `Lazy[Optimizer]` to the optimizer argument, then inside the constructor
     call `optimizer.construct(parameters=model.parameters)`.
 
@@ -21,26 +26,33 @@ class Lazy(Generic[T]):
     construction is actually found in `FromParams`, where we have a special case for a `Lazy` type
     annotation.
 
-    !!! Warning
-        The way this class is used in from_params means that optional constructor arguments CANNOT
-        be compared to `None` _before_ it is constructed. See the example below for correct usage.
-
-    ```
+    ```python
     @classmethod
-    def my_constructor(cls, some_object: Lazy[MyObject] = None) -> MyClass:
-        ...
-        # WRONG! some_object will never be None at this point, it will be
-        # a Lazy[] that returns None
-        obj = some_object or MyObjectDefault()
-        # CORRECT:
-        obj = some_object.construct(kwarg=kwarg) or MyObjectDefault()
-        ...
+    def my_constructor(
+        cls,
+        some_object: Lazy[MyObject],
+        optional_object: Lazy[MyObject] = None,
+        required_object_with_default: Lazy[MyObject] = Lazy(MyObjectDefault),
+    ) -> MyClass:
+        obj1 = some_object.construct()
+        obj2 = None if optional_object is None else optional_object.construct()
+        obj3 = required_object_with_default.construct()
     ```
 
     """
 
-    def __init__(self, constructor: Callable[..., T]):
-        self._constructor = constructor
+    def __init__(self, constructor: Union[Type[T], Callable[..., T]]):
+        constructor_to_use: Callable[..., T]
+
+        if inspect.isclass(constructor):
+
+            def constructor_to_use(**kwargs):
+                return constructor.from_params(Params({}), **kwargs)  # type: ignore[union-attr]
+
+        else:
+            constructor_to_use = constructor
+
+        self._constructor = constructor_to_use
 
-    def construct(self, **kwargs) -> Optional[T]:
+    def construct(self, **kwargs) -> T:
         return self._constructor(**kwargs)
@@ -99,7 +99,7 @@ def prepare_global_logging(
     if os.environ.get("ALLENNLP_DEBUG"):
         LEVEL = logging.DEBUG
     else:
-        level_name = os.environ.get("ALLENNLP_LOG_LEVEL")
+        level_name = os.environ.get("ALLENNLP_LOG_LEVEL", "INFO")
         LEVEL = logging._nameToLevel.get(level_name, logging.INFO)
 
     file_handler.setLevel(LEVEL)
 
@@ -6,7 +6,7 @@
 from collections import OrderedDict
 from collections.abc import MutableMapping
 from os import PathLike
-from typing import Any, Dict, List, Union
+from typing import Any, Dict, List, Union, Optional
 
 from overrides import overrides
 
@@ -250,7 +250,7 @@ def pop(self, key: str, default: Any = DEFAULT, keep_as_dict: bool = False) -> A
         else:
             return self._check_is_dict(key, value)
 
-    def pop_int(self, key: str, default: Any = DEFAULT) -> int:
+    def pop_int(self, key: str, default: Any = DEFAULT) -> Optional[int]:
         """
         Performs a pop and coerces to an int.
         """
@@ -260,7 +260,7 @@ def pop_int(self, key: str, default: Any = DEFAULT) -> int:
         else:
             return int(value)
 
-    def pop_float(self, key: str, default: Any = DEFAULT) -> float:
+    def pop_float(self, key: str, default: Any = DEFAULT) -> Optional[float]:
         """
         Performs a pop and coerces to a float.
         """
@@ -270,7 +270,7 @@ def pop_float(self, key: str, default: Any = DEFAULT) -> float:
         else:
             return float(value)
 
-    def pop_bool(self, key: str, default: Any = DEFAULT) -> bool:
+    def pop_bool(self, key: str, default: Any = DEFAULT) -> Optional[bool]:
         """
         Performs a pop and coerces to a bool.
         """
 
@@ -38,8 +38,8 @@ class Registrable(FromParams):
     a subclass to load all other subclasses and the abstract class).
     """
 
-    _registry: Dict[Type, Dict[str, Tuple[Type, str]]] = defaultdict(dict)
-    default_implementation: str = None
+    _registry: Dict[Type, Dict[str, Tuple[Type, Optional[str]]]] = defaultdict(dict)
+    default_implementation: Optional[str] = None
 
     @classmethod
     def register(cls: Type[T], name: str, constructor: str = None, exist_ok: bool = False):
@@ -152,7 +152,7 @@ def resolve_class_name(cls: Type[T], name: str) -> Tuple[Type[T], Optional[str]]
         function to use).
         """
         if name in Registrable._registry[cls]:
-            subclass, constructor = Registrable._registry[cls].get(name)
+            subclass, constructor = Registrable._registry[cls][name]
             return subclass, constructor
         elif "." in name:
             # This might be a fully qualified class name, so we'll try importing its "module"
 
@@ -9,9 +9,9 @@
 
 def init_process(
     process_rank: int,
-    distributed_device_ids: List[int] = None,
-    world_size: int = 1,
-    func: Callable = None,
+    world_size: int,
+    distributed_device_ids: List[int],
+    func: Callable,
     func_args: Tuple = None,
     func_kwargs: Dict[str, Any] = None,
     master_addr: str = "127.0.0.1",
@@ -40,13 +40,13 @@ def init_process(
             timeout=datetime.timedelta(seconds=120),
         )
 
-    func(global_rank, world_size, gpu_id, *func_args, **func_kwargs)
+    func(global_rank, world_size, gpu_id, *(func_args or []), **(func_kwargs or {}))
 
     dist.barrier()
 
 
 def run_distributed_test(
-    device_ids: List[int] = [-1, -1],
+    device_ids: List[int] = None,
     func: Callable = None,
     *args,
     **kwargs,
@@ -62,14 +62,15 @@ def run_distributed_test(
     func: `Callable`
         `func` needs to be global for spawning the processes, so that it can be pickled.
     """
+    device_ids = device_ids or [-1, -1]
     check_for_gpu(device_ids)
     # "fork" start method is the default and should be preferred, except when we're
     # running the tests on GPU, in which case we need to use "spawn".
     start_method = "spawn" if any(x >= 0 for x in device_ids) else "fork"
     nprocs = world_size = len(device_ids)
     mp.start_processes(
         init_process,
-        args=(device_ids, world_size, func, args, kwargs),
+        args=(world_size, device_ids, func, args, kwargs),
         nprocs=nprocs,
         start_method=start_method,
     )