Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit 3fa5193

Browse files
authored
Makes the evaluate command work for the multitask case (Second Edition) (#5579)
* Adds the ability to evaluate on JSON blobs * Formatting
1 parent 9f03803 commit 3fa5193

File tree

5 files changed

+110
-10
lines changed

5 files changed

+110
-10
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717

1818
- We can now transparently read compressed input files during prediction.
1919
- LZMA compression is now supported.
20+
- Added a way to give JSON blobs as input to dataset readers in the `evaluate` command.
2021
- Added the argument `sub_module` in `PretrainedTransformerMismatchedEmbedder`
2122

2223

allennlp/commands/evaluate.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import argparse
88
import json
99
import logging
10+
from json import JSONDecodeError
1011
from pathlib import Path
1112
from os import PathLike
1213
from typing import Union, Dict, Any, Optional
@@ -35,14 +36,14 @@ def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.Argument
3536
subparser.add_argument(
3637
"input_file",
3738
type=str,
38-
help="path to the file containing the evaluation data (for mutiple "
39+
help="path to the file containing the evaluation data (for multiple "
3940
"files, put between filenames e.g., input1.txt,input2.txt)",
4041
)
4142

4243
subparser.add_argument(
4344
"--output-file",
4445
type=str,
45-
help="optional path to write the metrics to as JSON (for mutiple "
46+
help="optional path to write the metrics to as JSON (for multiple "
4647
"files, put between filenames e.g., output1.txt,output2.txt)",
4748
)
4849

@@ -258,17 +259,26 @@ def evaluate_from_archive(
258259
dataset_reader = archive.validation_dataset_reader
259260

260261
# split files
261-
evaluation_data_path_list = input_file.split(",")
262+
try:
263+
# Try reading it as a list of JSON objects first. Some readers require
264+
# that kind of input.
265+
evaluation_data_path_list = json.loads(f"[{input_file}]")
266+
except JSONDecodeError:
267+
evaluation_data_path_list = input_file.split(",")
262268

263269
# TODO(gabeorlanski): Is it safe to always default to .outputs and .preds?
264270
# TODO(gabeorlanski): Add in way to save to specific output directory
265271
if metrics_output_file is not None:
266272
if auto_names == "METRICS" or auto_names == "ALL":
267273
logger.warning(
268-
f"Passed output_files will be ignored, auto_names is" f" set to {auto_names}"
274+
f"Passed output_files will be ignored, auto_names is set to {auto_names}"
269275
)
270276

271277
# Keep the path of the parent otherwise it will write to the CWD
278+
assert all(isinstance(p, str) for p in evaluation_data_path_list), (
279+
"When specifying JSON blobs as input, the output files must be explicitly named with "
280+
"--output-file."
281+
)
272282
output_file_list = [
273283
p.parent.joinpath(f"{p.stem}.outputs") for p in map(Path, evaluation_data_path_list)
274284
]
@@ -285,6 +295,10 @@ def evaluate_from_archive(
285295
)
286296

287297
# Keep the path of the parent otherwise it will write to the CWD
298+
assert all(isinstance(p, str) for p in evaluation_data_path_list), (
299+
"When specifying JSON blobs as input, the predictions output files must be explicitly named with "
300+
"--predictions-output-file."
301+
)
288302
predictions_output_file_list = [
289303
p.parent.joinpath(f"{p.stem}.preds") for p in map(Path, evaluation_data_path_list)
290304
]
@@ -307,13 +321,15 @@ def evaluate_from_archive(
307321
)
308322

309323
all_metrics = {}
310-
for index in range(len(evaluation_data_path_list)):
324+
for index, evaluation_data_path in enumerate(evaluation_data_path_list):
311325
config = deepcopy(archive.config)
312-
evaluation_data_path = evaluation_data_path_list[index]
313326

314327
# Get the eval file name so we can save each metric by file name in the
315328
# output dictionary.
316-
eval_file_name = Path(evaluation_data_path).stem
329+
if isinstance(evaluation_data_path, str):
330+
eval_file_name = Path(evaluation_data_path).stem
331+
else:
332+
eval_file_name = str(index)
317333

318334
if metrics_output_file is not None:
319335
# noinspection PyUnboundLocalVariable
@@ -323,7 +339,7 @@ def evaluate_from_archive(
323339
# noinspection PyUnboundLocalVariable
324340
predictions_output_file_path = predictions_output_file_list[index]
325341

326-
logger.info("Reading evaluation data from %s", evaluation_data_path)
342+
logger.info("Reading evaluation data from %s", eval_file_name)
327343
data_loader_params = config.get("validation_data_loader", None)
328344
if data_loader_params is None:
329345
data_loader_params = config.get("data_loader")

test_fixtures/basic_classifier/common.jsonnet

-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
"train_data_path": "test_fixtures/data/text_classification_json/imdb_corpus.jsonl",
1717
"validation_data_path": "test_fixtures/data/text_classification_json/imdb_corpus.jsonl",
1818
"data_loader": {
19-
2019
"batch_sampler": {
2120
"type": "bucket",
2221
"batch_size": 5

tests/commands/evaluate_test.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def test_evaluate_works_with_vocab_expansion(self):
163163
kebab_args = ["evaluate", archive_path, evaluate_data_path, "--cuda-device", "-1"]
164164

165165
# TODO(mattg): the unawarded_embeddings.gz file above doesn't exist, but this test still
166-
# passes. This suggests that vocab extension in evaluate isn't currently doing anything,
166+
# passes. This suggests that vocab extension in evaluate isn't currently doing anything,
167167
# and so it is broken.
168168

169169
# Evaluate 1 with no vocab expansion,

tests/models/multitask_test.py

+84
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import os
2+
13
import pytest
24

5+
from allennlp.common import Params
36
from allennlp.common.testing import ModelTestCase
47
from allennlp.data import Instance, Vocabulary, Batch
58
from allennlp.data.fields import LabelField, TextField, MetadataField
@@ -101,3 +104,84 @@ def test_forward_works(self):
101104
)
102105
with pytest.raises(ValueError, match="duplicate argument text"):
103106
outputs = model.forward_on_instance(instance)
107+
108+
def test_train_and_evaluate(self):
109+
from allennlp.commands.train import train_model
110+
from allennlp.commands.evaluate import evaluate_from_args
111+
import argparse
112+
from allennlp.commands import Evaluate
113+
114+
model_name = "epwalsh/bert-xsmall-dummy"
115+
116+
def reader():
117+
return {
118+
"type": "text_classification_json",
119+
"tokenizer": {"type": "pretrained_transformer", "model_name": model_name},
120+
"token_indexers": {
121+
"tokens": {"type": "pretrained_transformer", "model_name": model_name}
122+
},
123+
}
124+
125+
def head():
126+
return {
127+
"type": "classifier",
128+
"seq2vec_encoder": {"type": "cls_pooler", "embedding_dim": 20},
129+
"input_dim": 20,
130+
"num_labels": 2,
131+
}
132+
133+
head_eins_input = "test_fixtures/data/text_classification_json/imdb_corpus.jsonl"
134+
head_zwei_input = (
135+
"test_fixtures/data/text_classification_json/ag_news_corpus_fake_sentiment_labels.jsonl"
136+
)
137+
138+
params = Params(
139+
{
140+
"dataset_reader": {
141+
"type": "multitask",
142+
"readers": {
143+
"head_eins": reader(),
144+
"head_zwei": reader(),
145+
},
146+
},
147+
"model": {
148+
"type": "multitask",
149+
"backbone": {"type": "pretrained_transformer", "model_name": model_name},
150+
"heads": {
151+
"head_eins": head(),
152+
"head_zwei": head(),
153+
},
154+
"arg_name_mapping": {"backbone": {"tokens": "text"}},
155+
},
156+
"train_data_path": {"head_eins": head_eins_input, "head_zwei": head_zwei_input},
157+
"data_loader": {"type": "multitask", "scheduler": {"batch_size": 2}},
158+
"trainer": {
159+
"optimizer": {
160+
"type": "huggingface_adamw",
161+
"lr": 4e-5,
162+
},
163+
"num_epochs": 2,
164+
},
165+
}
166+
)
167+
168+
serialization_dir = os.path.join(self.TEST_DIR, "serialization_dir")
169+
train_model(params, serialization_dir=serialization_dir)
170+
171+
args = [
172+
"evaluate",
173+
str(self.TEST_DIR / "serialization_dir"),
174+
f'{{"head_eins": "{head_eins_input}", "head_zwei": "{head_zwei_input}"}}',
175+
"--output-file",
176+
str(self.TEST_DIR / "output.txt"),
177+
"--predictions-output-file",
178+
str(self.TEST_DIR / "predictions.json"),
179+
]
180+
181+
parser = argparse.ArgumentParser(description="Testing")
182+
subparsers = parser.add_subparsers(title="Commands", metavar="")
183+
Evaluate().add_subparser(subparsers)
184+
args = parser.parse_args(args)
185+
metrics = evaluate_from_args(args)
186+
assert "head_eins_accuracy" in metrics
187+
assert "head_zwei_accuracy" in metrics

0 commit comments

Comments
 (0)