Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit 3e62365

Browse files
aleSugliaepwalsh
andauthored
Bugfix for attribute inheritance in ShardedDatasetReader (#4830)
* Bugfix for issue #4825 Implemented automatic parameters inheritance from base_reader in ShardedDatasetReader. * Updated CHANGELOG.md * Constrained the parameters that can be inherited from the base reader * Simplified logic for setting only `lazy` parameters * Update CHANGELOG.md Co-authored-by: Evan Pete Walsh <[email protected]>
1 parent 458c4c2 commit 3e62365

File tree

3 files changed

+30
-3
lines changed

3 files changed

+30
-3
lines changed

CHANGELOG.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414

1515
### Fixed
1616

17-
- Fixed issue with GradientDescentTrainer when constructed with validation_data_loader==None and learning_rate_scheduler!=None.
17+
- Fixed issue with `GradientDescentTrainer` when constructed with `validation_data_loader=None` and `learning_rate_scheduler!=None`.
1818
- Fixed a bug when removing all handlers in root logger.
19-
19+
- `ShardedDatasetReader` now inherits parameters from `base_reader` when required.
2020

2121
## [v1.2.2](https://github.com/allenai/allennlp/releases/tag/v1.2.2) - 2020-11-17
2222

allennlp/data/dataset_readers/sharded_dataset_reader.py

+10
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,23 @@ class ShardedDatasetReader(DatasetReader):
3030
3131
Registered as a `DatasetReader` with name "sharded".
3232
33+
This class accepts all additional parameters of any `DatasetReader` class via `**kwargs`.
34+
We give priority to the values set in the constructor for the instance of this class.
35+
Optionally, we will automatically inherit attributes from the `base_reader` when required.
36+
3337
# Parameters
3438
3539
base_reader : `DatasetReader`
3640
Reader with a read method that accepts a single file.
3741
"""
3842

3943
def __init__(self, base_reader: DatasetReader, **kwargs) -> None:
44+
# ShardedDatasetReader is a wrapper for the original base_reader so some of the parameters like 'lazy'
45+
# can be safely inherited. However, ShardedDatasetReader is a class instance of a DatasetReader as well.
46+
# So we give priority to the parameters for the current instance stored in 'kwargs'.
47+
# If not present, we check the ones in the base reader
48+
kwargs["lazy"] = kwargs.get("lazy", base_reader.lazy)
49+
4050
super().__init__(manual_distributed_sharding=True, **kwargs)
4151

4252
if util.is_distributed():

tests/data/dataset_readers/sharded_dataset_reader_test.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from collections import Counter
21
import glob
32
import os
43
import tarfile
4+
from collections import Counter
55
from typing import Tuple
66

77
import pytest
@@ -91,3 +91,20 @@ def test_sharded_read_glob(self):
9191

9292
def test_sharded_read_archive(self):
9393
self.read_and_check_instances(str(self.archive_filename))
94+
95+
def test_attributes_inheritance(self):
96+
# current reader has lazy set to true
97+
base_reader = SequenceTaggingDatasetReader(lazy=True)
98+
reader = ShardedDatasetReader(base_reader=base_reader)
99+
100+
assert (
101+
reader.lazy
102+
), "The ShardedDatasetReader didn't inherit the 'lazy' attribute from base_reader"
103+
104+
def test_set_attributes_main(self):
105+
base_reader = SequenceTaggingDatasetReader(lazy=True)
106+
reader = ShardedDatasetReader(base_reader=base_reader, lazy=False)
107+
108+
assert (
109+
not reader.lazy
110+
), "The ShardedDatasetReader inherited the 'lazy' attribute from base_reader. It should be False"

0 commit comments

Comments
 (0)