allenai
diff --git a/‎CHANGELOG.md
+8-1 b/‎CHANGELOG.md
+8-1
diff --git a/‎allennlp/commands/train.py
+6-3 b/‎allennlp/commands/train.py
+6-3
diff --git a/‎allennlp/common/testing/model_test_case.py
+6 b/‎allennlp/common/testing/model_test_case.py
+6
diff --git a/‎allennlp/common/util.py
+20-3 b/‎allennlp/common/util.py
+20-3
diff --git a/‎allennlp/data/data_loaders/multi_process_data_loader.py
+1-1 b/‎allennlp/data/data_loaders/multi_process_data_loader.py
+1-1
diff --git a/‎allennlp/data/dataset_readers/__init__.py
+1-1 b/‎allennlp/data/dataset_readers/__init__.py
+1-1
diff --git a/‎allennlp/data/dataset_readers/dataset_reader.py
+13-5 b/‎allennlp/data/dataset_readers/dataset_reader.py
+13-5
@@ -10,6 +10,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added `TensorCache` class for caching tensors on disk
+- Added reader for the NLVR2 dataset
+- Added cache for Detectron models that we might re-use several times in the code base
+- Added abstraction and concrete implementation for image loading
+- Added abstraction and concrete implementation for `GridEmbedder`
+- Added abstraction and demo implementation for an image augmentation module.
+- Added abstraction and concrete implementation for region detectors.
 - A new high-performance default `DataLoader`: `MultiProcessDataLoading`.
 - A `MultiTaskModel` and abstractions to use with it, including `Backbone` and `Head`.  The
   `MultiTaskModel` first runs its inputs through the `Backbone`, then passes the result (and
@@ -33,7 +40,7 @@ dataset at every epoch) and a `MultiTaskScheduler` (for ordering the instances w
 - The `DataLoader` now decides whether to load instances lazily or not.
   With the `PyTorchDataLoader` this is controlled with the `lazy` parameter, but with
   the `MultiProcessDataLoading` this is controlled by the `max_instances_in_memory` setting.
-- `TensorField` is now implemented in terms of torch tensors, not numpy.
+- `ArrayField` is now called `TensorField`, and implemented in terms of torch tensors, not numpy.
 
 
 ## Unreleased (1.x branch)
 
@@ -483,6 +483,9 @@ def _train_worker(
     return None
 
 
+DataPath = Union[str, List[str], Dict[str, str]]
+
+
 class TrainModel(Registrable):
     """
     This class exists so that we can easily read a configuration file with the `allennlp train`
@@ -554,16 +557,16 @@ def from_partial_objects(
         serialization_dir: str,
         local_rank: int,
         dataset_reader: DatasetReader,
-        train_data_path: str,
+        train_data_path: DataPath,
         model: Lazy[Model],
         data_loader: Lazy[DataLoader],
         trainer: Lazy[Trainer],
         vocabulary: Lazy[Vocabulary] = Lazy(Vocabulary),
         datasets_for_vocab_creation: List[str] = None,
         validation_dataset_reader: DatasetReader = None,
-        validation_data_path: str = None,
+        validation_data_path: DataPath = None,
         validation_data_loader: Lazy[DataLoader] = None,
-        test_data_path: str = None,
+        test_data_path: DataPath = None,
         evaluate_on_test: bool = False,
         batch_weight_key: str = "",
     ) -> "TrainModel":
 
@@ -73,6 +73,7 @@ def ensure_model_can_train_save_and_load(
         metric_terminal_value: float = None,
         metric_tolerance: float = 1e-4,
         disable_dropout: bool = True,
+        seed: int = None,
     ):
         """
         # Parameters
@@ -108,6 +109,11 @@ def ensure_model_can_train_save_and_load(
             If True we will set all dropout to 0 before checking gradients. (Otherwise, with small
             datasets, you may get zero gradients because of unlucky dropout.)
         """
+        if seed is not None:
+            random.seed(seed)
+            numpy.random.seed(seed)
+            torch.manual_seed(seed)
+
         save_dir = self.TEST_DIR / "save_and_load_test"
         archive_file = save_dir / "model.tar.gz"
         model = train_model_from_file(param_file, save_dir, overrides=overrides)
 
@@ -27,6 +27,7 @@
     Tuple,
     TypeVar,
     Union,
+    Sequence,
 )
 
 import numpy
@@ -143,7 +144,7 @@ def lazy_groups_of(iterable: Iterable[A], group_size: int) -> Iterator[List[A]]:
 
 
 def pad_sequence_to_length(
-    sequence: List,
+    sequence: Sequence,
     desired_length: int,
     default_value: Callable[[], Any] = lambda: 0,
     padding_on_right: bool = True,
@@ -174,6 +175,7 @@ def pad_sequence_to_length(
 
     padded_sequence : `List`
     """
+    sequence = list(sequence)
     # Truncates the sequence to the desired length.
     if padding_on_right:
         padded_sequence = sequence[:desired_length]
@@ -342,8 +344,8 @@ def import_module_and_submodules(package_name: str) -> None:
         # Import at top level
         try:
             module = importlib.import_module(package_name)
-        except ModuleNotFoundError as err:
-            if err.name in ("detectron2", "torchvision"):
+        except ImportError as err:
+            if err.name in {"detectron2", "torchvision"}:
                 logger.warning(
                     "vision module '%s' is unavailable since '%s' is not installed",
                     package_name,
@@ -651,6 +653,21 @@ def format_size(size: int) -> str:
     return f"{size}B"
 
 
+def nan_safe_tensor_divide(numerator, denominator):
+    """Performs division and handles divide-by-zero.
+
+    On zero-division, sets the corresponding result elements to zero.
+    """
+    result = numerator / denominator
+    mask = denominator == 0.0
+    if not mask.any():
+        return result
+
+    # remove nan
+    result[mask] = 0.0
+    return result
+
+
 def shuffle_iterable(i: Iterable[T], pool_size: int = 1024) -> Iterable[T]:
     import random
 
 
@@ -81,7 +81,7 @@ class MultiProcessDataLoader(DataLoader):
     max_instances_in_memory: `int`, optional (default = `None`)
         If not specified, all instances will be read and cached in memory for the duration
         of the data loader's life. This is generally ideal when your data can fit in memory
-        during training. However, when you're datasets are too big, using this option
+        during training. However, when your datasets are too big, using this option
         will turn on lazy loading, where only `max_instances_in_memory` instances are processed
         at a time.
 
 
@@ -20,7 +20,7 @@
 from allennlp.data.dataset_readers.text_classification_json import TextClassificationJsonReader
 
 try:
-    from allennlp.data.dataset_readers.nlvr2 import Nlvr2Reader
+    from allennlp.data.dataset_readers.vqav2 import VQAv2Reader
 except ModuleNotFoundError as err:
     if err.name not in ("detectron2", "torchvision"):
         raise
@@ -1,7 +1,7 @@
 from dataclasses import dataclass
 import itertools
 from os import PathLike
-from typing import Iterable, Iterator, Optional, Union, TypeVar
+from typing import Iterable, Iterator, Optional, Union, TypeVar, Dict, List
 import logging
 import warnings
 
@@ -58,6 +58,9 @@ class DistributedInfo:
 
 _T = TypeVar("_T")
 
+PathOrStr = Union[PathLike, str]
+DatasetReaderInput = Union[PathOrStr, List[PathOrStr], Dict[str, PathOrStr]]
+
 
 class DatasetReader(Registrable):
     """
@@ -178,14 +181,19 @@ def __init__(
         if util.is_distributed():
             self._distributed_info = DistributedInfo(dist.get_world_size(), dist.get_rank())
 
-    def read(self, file_path: Union[PathLike, str]) -> Iterator[Instance]:
+    def read(self, file_path: DatasetReaderInput) -> Iterator[Instance]:
         """
         Returns an iterator of instances that can be read from the file path.
         """
         if not isinstance(file_path, str):
-            file_path = str(file_path)
-
-        for instance in self._multi_worker_islice(self._read(file_path)):
+            if isinstance(file_path, list):
+                file_path = [str(f) for f in file_path]
+            elif isinstance(file_path, dict):
+                file_path = {k: str(v) for k, v in file_path.items()}
+            else:
+                file_path = str(file_path)
+
+        for instance in self._multi_worker_islice(self._read(file_path)):  # type: ignore
             if self._worker_info is None:
                 # If not running in a subprocess, it's safe to apply the token_indexers right away.
                 self.apply_token_indexers(instance)