allenai
diff --git a/‎.github/workflows/ci.yml
+11-6 b/‎.github/workflows/ci.yml
+11-6
diff --git a/‎CHANGELOG.md
+76-4 b/‎CHANGELOG.md
+76-4
diff --git a/‎Dockerfile
+1-1 b/‎Dockerfile
+1-1
diff --git a/‎Dockerfile.test
+2-2 b/‎Dockerfile.test
+2-2
diff --git a/‎Makefile
+2-3 b/‎Makefile
+2-3
diff --git a/‎README.md
+4-1 b/‎README.md
+4-1
diff --git a/‎allennlp/commands/__init__.py
+1 b/‎allennlp/commands/__init__.py
+1
diff --git a/‎allennlp/commands/build_vocab.py
+2-1 b/‎allennlp/commands/build_vocab.py
+2-1
diff --git a/‎allennlp/commands/count_instances.py
+52 b/‎allennlp/commands/count_instances.py
+52
diff --git a/‎allennlp/commands/evaluate.py
+11-9 b/‎allennlp/commands/evaluate.py
+11-9
@@ -151,7 +151,8 @@ jobs:
         ALLENNLP_VERSION_OVERRIDE: ""  # Don't replace the core library.
       run: |
         git clone https://github.com/allenai/allennlp-models.git
-        cd allennlp-models && pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
+        cd allennlp-models
+        pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
 
     - name: Run models tests
       run: |
@@ -288,11 +289,11 @@ jobs:
       run: |
         # Check the install instructions on https://pytorch.org/ to keep these up-to-date.
         if [[ $CUDA == '10.1' ]]; then
-            echo "DOCKER_TORCH_VERSION='torch==1.7.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
+            echo "DOCKER_TORCH_VERSION='torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
         elif [[ $CUDA == '10.2' ]]; then
             echo "DOCKER_TORCH_VERSION='torch==1.7.1'" >> $GITHUB_ENV;
         elif [[ $CUDA == '11.0' ]]; then
-            echo "DOCKER_TORCH_VERSION='torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
+            echo "DOCKER_TORCH_VERSION='torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html'" >> $GITHUB_ENV;
         else
             echo "Unhandled CUDA version $CUDA";
             exit 1;
@@ -389,15 +390,19 @@ jobs:
       run: |
         ./scripts/build_docs.sh
 
+    - name: Print the ref
+      run: |
+        echo ${{ github.ref }}
+
     - name: Configure Git
-      if: github.event_name == 'release' || github.event_name == 'push'
+      if: github.event_name == 'release' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
       run: |
         git config --global user.email "[email protected]"
         git config --global user.name "ai2service"
         git config --global push.default simple
 
     - name: Stage docs
-      if: github.event_name == 'release' || github.event_name == 'push'
+      if: github.event_name == 'release' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
       run: |
         echo "Staging docs to $DOCS_FOLDER"
 
@@ -449,7 +454,7 @@ jobs:
         EOL
 
     - name: Deploy docs
-      if: github.event_name == 'release' || github.event_name == 'push'
+      if: github.event_name == 'release' || (github.event_name == 'push' && github.ref == 'refs/heads/main')
       run: |
         # And push them up to GitHub
         cd ~/allennlp-docs/
 
@@ -10,6 +10,49 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- The `TrainerCallback` constructor accepts `serialization_dir` provided by `Trainer`. This can be useful for `Logger` callbacks those need to store files in the run directory.
+- The `TrainerCallback.on_start()` is fired at the start of the training.
+- The `TrainerCallback` event methods now accept `**kwargs`. This may be useful to maintain backwards-compability of callbacks easier in the future. E.g. we may decide to pass the exception/traceback object in case of failure to `on_end()` and this older callbacks may simply ignore the argument instead of raising a `TypeError`.
+
+### Changed
+
+- The `TrainerCallack.on_epoch()` does not fire with `epoch=-1` at the start of the training.
+  Instead, `TrainerCallback.on_start()` should be used for these cases.
+- `TensorBoardBatchMemoryUsage` is converted from `BatchCallback` into `TrainerCallback`.
+- `TrackEpochCallback` is converted from `EpochCallback` into `TrainerCallback`.
+- `Trainer` can accept callbacks simply with name `callbacks` instead of `trainer_callbacks`.
+
+### Removed
+
+- Removed `EpochCallback`, `BatchCallback` in favour of `TrainerCallback`.
+  The metaclass-wrapping implementation is removed as well.
+
+### Fixed
+
+- Now Trainer always fires `TrainerCallback.on_end()` so all the resources can be cleaned up properly.
+- Fixed the misspelling, changed `TensoboardBatchMemoryUsage` to `TensorBoardBatchMemoryUsage`.
+- We set a value to `epoch` so in case of firing `TrainerCallback.on_end()` the variable is bound.
+  This could have lead to an error in case of trying to recover a run after it was finished training.
+
+
+## [v2.0.0rc1](https://github.com/allenai/allennlp/releases/tag/v2.0.0rc1) - 2021-01-21
+
+### Added
+
+- Added `TensorCache` class for caching tensors on disk
+- Added abstraction and concrete implementation for image loading
+- Added abstraction and concrete implementation for `GridEmbedder`
+- Added abstraction and demo implementation for an image augmentation module.
+- Added abstraction and concrete implementation for region detectors.
+- A new high-performance default `DataLoader`: `MultiProcessDataLoading`.
+- A `MultiTaskModel` and abstractions to use with it, including `Backbone` and `Head`.  The
+  `MultiTaskModel` first runs its inputs through the `Backbone`, then passes the result (and
+  whatever other relevant inputs it got) to each `Head` that's in use.
+- A `MultiTaskDataLoader`, with a corresponding `MultiTaskDatasetReader`, and a couple of new
+  configuration objects: `MultiTaskEpochSampler` (for deciding what proportion to sample from each
+  dataset at every epoch) and a `MultiTaskScheduler` (for ordering the instances within an epoch).
+- Transformer toolkit to plug and play with modular components of transformer architectures.
+- Added a command to count the number of instances we're going to be training with
 - Added a `FileLock` class to `common.file_utils`. This is just like the `FileLock` from the `filelock` library, except that
   it adds an optional flag `read_only_ok: bool`, which when set to `True` changes the behavior so that a warning will be emitted
   instead of an exception when lacking write permissions on an existing file lock.
@@ -19,14 +62,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Moving `ModelCard` and `TaskCard` abstractions into the main repository.
 - Added a util function `allennlp.nn.util.dist_reduce(...)` for handling distributed reductions.
   This is especially useful when implementing a distributed `Metric`.
+- Added a `FileLock` class to `common.file_utils`. This is just like the `FileLock` from the `filelock` library, except that
+  it adds an optional flag `read_only_ok: bool`, which when set to `True` changes the behavior so that a warning will be emitted
+  instead of an exception when lacking write permissions on an existing file lock.
+  This makes it possible to use the `FileLock` class on a read-only file system.
+- Added a new learning rate scheduler: `CombinedLearningRateScheduler`. This can be used to combine different LR schedulers, using one after the other.
+- Moving `ModelCard` and `TaskCard` abstractions into the main repository.
 
 ### Changed
 
+- `DatasetReader`s are now always lazy. This means there is no `lazy` parameter in the base
+  class, and the `_read()` method should always be a generator.
+- The `DataLoader` now decides whether to load instances lazily or not.
+  With the `PyTorchDataLoader` this is controlled with the `lazy` parameter, but with
+  the `MultiProcessDataLoading` this is controlled by the `max_instances_in_memory` setting.
+- `ArrayField` is now called `TensorField`, and implemented in terms of torch tensors, not numpy.
+- Improved `nn.util.move_to_device` function by avoiding an unnecessary recursive check for tensors and
+  adding a `non_blocking` optional argument, which is the same argument as in `torch.Tensor.to()`.
+- If you are trying to create a heterogeneous batch, you now get a better error message.
+- Readers using the new vision features now explicitly log how they are featurizing images.
+- `master_addr` and `master_port` renamed to `primary_addr` and `primary_port`, respectively.
+- `is_master` parameter for training callbacks renamed to `is_primary`.
+- `master` branch renamed to `main`
+- Torch version bumped to 1.7.1 in Docker images.
 - 'master' branch renamed to 'main'
 - Torch version bumped to 1.7.1 in Docker images.
 
+### Removed
+
+- Removed `nn.util.has_tensor`.
+
 ### Fixed
 
+- The `build-vocab` command no longer crashes when the resulting vocab file is
+  in the current working directory.
+- VQA models now use the `vqa_score` metric for early stopping. This results in
+  much better scores.
 - Fixed typo with `LabelField` string representation: removed trailing apostrophe.
 - `Vocabulary.from_files` and `cached_path` will issue a warning, instead of failing, when a lock on an existing resource
   can't be acquired because the file system is read-only.
@@ -58,11 +129,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   were not passed to the constructor if the value of the parameter was equal to the default value.
   This caused bugs in some edge cases where a subclass that takes `**kwargs` needs to inspect
   `kwargs` before passing them to its superclass.
-- Improved the band-aid solution for segmentation faults and the "ImportError: dlopen: cannot load any more object with static TLS" 
+- Improved the band-aid solution for segmentation faults and the "ImportError: dlopen: cannot load any more object with static TLS"
   by adding a `transformers` import.
 - Added safety checks for extracting tar files
 - Turned superfluous warning to info when extending the vocab in the embedding matrix, if no pretrained file was provided
 
+
 ## [v1.2.2](https://github.com/allenai/allennlp/releases/tag/v1.2.2) - 2020-11-17
 
 ### Added
@@ -213,6 +285,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Fixed a bug in the cnn_encoder where activations involving masked tokens could be picked up by the max
 - Fix intra word tokenization for `PretrainedTransformerTokenizer` when disabling fast tokenizer.
 
+
 ## [v1.1.0](https://github.com/allenai/allennlp/releases/tag/v1.1.0) - 2020-09-08
 
 ### Fixed
@@ -227,8 +300,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- `Predictor.capture_model_internals()` now accepts a regex specifying
-  which modules to capture
+- `Predictor.capture_model_internals()` now accepts a regex specifying which modules to capture.
 
 
 ## [v1.1.0rc4](https://github.com/allenai/allennlp/releases/tag/v1.1.0rc4) - 2020-08-20
@@ -295,7 +367,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   in the log output even when `train_parameters` was set to `False`.
 - Fixed a bug with the sharded dataset reader where it would only read a fraction of the instances
   in distributed training.
-- Fixed checking equality of `ArrayField`s.
+- Fixed checking equality of `TensorField`s.
 - Fixed a bug where `NamespaceSwappingField` did not work correctly with `.empty_field()`.
 - Put more sensible defaults on the `huggingface_adamw` optimizer.
 - Simplified logging so that all logging output always goes to one file.
 
@@ -18,7 +18,7 @@ LABEL com.nvidia.volumes.needed="nvidia_driver"
 
 WORKDIR /stage/allennlp
 
-# Install torch first. This build arg should be in the form of a version requirement,
+# Install torch ecosystem first. This build arg should be in the form of a version requirement,
 # like 'torch==1.7' or 'torch==1.7+cu102 -f https://download.pytorch.org/whl/torch_stable.html'.
 ARG TORCH
 RUN pip install --no-cache-dir ${TORCH}
 
@@ -17,7 +17,7 @@ LABEL com.nvidia.volumes.needed="nvidia_driver"
 
 WORKDIR /stage/allennlp
 
-# Install torch first. This build arg should be in the form of a version requirement,
+# Install torch ecosystem first. This build arg should be in the form of a version requirement,
 # like 'torch==1.7' or 'torch==1.7+cu102 -f https://download.pytorch.org/whl/torch_stable.html'.
 ARG TORCH
 RUN pip install --no-cache-dir ${TORCH}
@@ -30,7 +30,7 @@ COPY setup.py .
 COPY dev-requirements.txt .
 RUN touch allennlp/__init__.py \
     && touch README.md \
-    && pip install --no-cache-dir -r dev-requirements.txt -e .
+    && pip install --no-cache-dir -e . -r dev-requirements.txt
 
 # Now add the full package source and re-install just the package.
 COPY . .
 
@@ -13,9 +13,9 @@ MD_DOCS_EXTRAS = $(addprefix $(MD_DOCS_ROOT),README.md CHANGELOG.md CONTRIBUTING
 DOCKER_TAG = latest
 DOCKER_IMAGE_NAME = allennlp/allennlp:$(DOCKER_TAG)
 DOCKER_TEST_IMAGE_NAME = allennlp/test:$(DOCKER_TAG)
-DOCKER_TORCH_VERSION = 'torch==1.7.1'
+DOCKER_TORCH_VERSION = 'torch==1.7.1 torchvision==0.8.2'
 # Our self-hosted runner currently has CUDA 11.0.
-DOCKER_TEST_TORCH_VERSION = 'torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html'
+DOCKER_TEST_TORCH_VERSION = 'torch==1.7.1+cu110 torchvision==0.8.2+cu110 -f https://download.pytorch.org/whl/torch_stable.html'
 DOCKER_RUN_CMD = docker run --rm \
 		-v $$HOME/.allennlp:/root/.allennlp \
 		-v $$HOME/.cache/huggingface:/root/.cache/huggingface \
@@ -85,7 +85,6 @@ install :
 	# Due to a weird thing with pip, we may need egg-info before running `pip install -e`.
 	# See https://github.com/pypa/pip/issues/4537.
 	python setup.py install_egg_info
-	# Install allennlp as editable and all dependencies.
 	pip install --upgrade --upgrade-strategy eager -e . -r dev-requirements.txt
 
 #
 
@@ -148,7 +148,10 @@ to distribute as a plugin, see the [subcommand API docs](https://docs.allennlp.o
 
 ## Installation
 
-AllenNLP requires Python 3.6.1 or later. The preferred way to install AllenNLP is via `pip`.  Just run `pip install allennlp` in your Python environment and you're good to go!
+AllenNLP requires Python 3.6.1 or later and [PyTorch](https://pytorch.org/).
+It's recommended that you install the PyTorch ecosystem **before** installing AllenNLP by following the instructions on [pytorch.org](https://pytorch.org/).
+
+The preferred way to install AllenNLP is via `pip`. Just run `pip install allennlp`.
 
 > ⚠️ If you're using Python 3.7 or greater, you should ensure that you don't have the PyPI version of `dataclasses` installed after running the above command, as this could cause issues on certain platforms. You can quickly check this by running `pip freeze | grep dataclasses`. If you see something like `dataclasses=0.6` in the output, then just run `pip uninstall -y dataclasses`.
 
 
@@ -15,6 +15,7 @@
 from allennlp.commands.subcommand import Subcommand
 from allennlp.commands.test_install import TestInstall
 from allennlp.commands.train import Train
+from allennlp.commands.count_instances import CountInstances
 from allennlp.common.plugins import import_plugins
 from allennlp.common.util import import_module_and_submodules
 
 
@@ -65,7 +65,8 @@ def build_vocab_from_args(args: argparse.Namespace):
         raise RuntimeError(f"{args.output_path} already exists. Use --force to overwrite.")
 
     output_directory = os.path.dirname(args.output_path)
-    os.makedirs(output_directory, exist_ok=True)
+    if len(output_directory) > 0:
+        os.makedirs(output_directory, exist_ok=True)
 
     params = Params.from_file(args.param_path)
 
 
@@ -0,0 +1,52 @@
+"""
+Subcommand for counting the number of instances from a training config.
+"""
+
+import argparse
+import logging
+
+from overrides import overrides
+
+from allennlp.commands.subcommand import Subcommand
+from allennlp.common.params import Params
+
+
+logger = logging.getLogger(__name__)
+
+
+@Subcommand.register("count-instances")
+class CountInstances(Subcommand):
+    @overrides
+    def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser:
+        description = """Count the number of training instances in an experiment config file."""
+        subparser = parser.add_parser(self.name, description=description, help=description)
+        subparser.add_argument("param_path", type=str, help="path to an experiment config file")
+
+        subparser.add_argument(
+            "-o",
+            "--overrides",
+            type=str,
+            default="",
+            help=(
+                "a json(net) structure used to override the experiment configuration, e.g., "
+                "'{\"vocabulary.min_count.labels\": 10}'.  Nested parameters can be specified either"
+                " with nested dictionaries or with dot syntax."
+            ),
+        )
+
+        subparser.set_defaults(func=count_instances_from_args)
+
+        return subparser
+
+
+def count_instances_from_args(args: argparse.Namespace):
+    from allennlp.training.util import data_loaders_from_params
+
+    params = Params.from_file(args.param_path)
+
+    data_loaders = data_loaders_from_params(params, train=True, validation=False, test=False)
+    instances = sum(
+        1 for data_loader in data_loaders.values() for _ in data_loader.iter_instances()
+    )
+
+    print(f"Success! One epoch of training contains {instances} instances.")
@@ -134,24 +134,26 @@ def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
 
     evaluation_data_path = args.input_file
     logger.info("Reading evaluation data from %s", evaluation_data_path)
-    instances = dataset_reader.read(evaluation_data_path)
+
+    data_loader_params = config.pop("validation_data_loader", None)
+    if data_loader_params is None:
+        data_loader_params = config.pop("data_loader")
+    if args.batch_size:
+        data_loader_params["batch_size"] = args.batch_size
+    data_loader = DataLoader.from_params(
+        params=data_loader_params, reader=dataset_reader, data_path=evaluation_data_path
+    )
 
     embedding_sources = (
         json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}
     )
 
     if args.extend_vocab:
         logger.info("Vocabulary is being extended with test instances.")
-        model.vocab.extend_from_instances(instances=instances)
+        model.vocab.extend_from_instances(instances=data_loader.iter_instances())
         model.extend_embedder_vocab(embedding_sources)
 
-    instances.index_with(model.vocab)
-    data_loader_params = config.pop("validation_data_loader", None)
-    if data_loader_params is None:
-        data_loader_params = config.pop("data_loader")
-    if args.batch_size:
-        data_loader_params["batch_size"] = args.batch_size
-    data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params)
+    data_loader.index_with(model.vocab)
 
     metrics = evaluate(
         model,