Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit ad78b69

Browse files
committed
Signed-off-by: Abhishek P (VMware) <[email protected]>
Converted HFDatasetSplitReader to HFDatasetReader Now all splits can be used in the same reader Support for both pre-load of all splits or on demand load of the split Reduced tests to glue-cola dataset:config which is ~ 0.36MB download
1 parent 6e613b9 commit ad78b69

File tree

3 files changed

+4
-4
lines changed

3 files changed

+4
-4
lines changed

allennlp/data/dataset_readers/huggingface_datasets_reader.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from datasets.features import Value
99

1010
# TODO pab complete the documentation comments
11-
class HuggingfaceDatasetSplitReader(DatasetReader):
11+
class HuggingfaceDatasetReader(DatasetReader):
1212
"""
1313
This reader implementation wraps the huggingface datasets package
1414
to utilize it's dataset management functionality and load the information in AllenNLP friendly formats

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
"lmdb",
7474
"more-itertools",
7575
"wandb>=0.10.0,<0.11.0",
76-
"datasets>=1.5.0,<1.6.0",
76+
"datasets>=1.5.0,",
7777
],
7878
entry_points={"console_scripts": ["allennlp=allennlp.__main__:run"]},
7979
include_package_data=True,

tests/data/dataset_readers/huggingface_datasets_test.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import pytest
22

3-
from allennlp.data.dataset_readers.huggingface_datasets_reader import HuggingfaceDatasetSplitReader
3+
from allennlp.data.dataset_readers.huggingface_datasets_reader import HuggingfaceDatasetReader
44
import logging
55

66
logger = logging.getLogger(__name__)
@@ -15,7 +15,7 @@ class HuggingfaceDatasetSplitReaderTest:
1515
"""
1616
@pytest.mark.parametrize("dataset, config, split", (("glue", "cola", "train"), ("glue", "cola", "test")))
1717
def test_read_for_datasets_requiring_config(self, dataset, config, split):
18-
huggingface_reader = HuggingfaceDatasetSplitReader(dataset_name=dataset, config_name=config)
18+
huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset, config_name=config)
1919
instances = list(huggingface_reader.read(split))
2020
assert len(instances) == len(huggingface_reader.datasets[split])
2121
print(instances[0], print(huggingface_reader.datasets[split][0]))

0 commit comments

Comments
 (0)