Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit e32f486

Browse files
authored
fix BasicTextFieldEmbedder.from_params to reflect the constructor (#1474)
* fix BasicTextFieldEmbedder.from_params to reflect the constructor structure * remove print statement * use FromParams.from_params directly * add a type ignore * explicit is better than implicit
1 parent dc1ff36 commit e32f486

33 files changed

+366
-241
lines changed

allennlp/modules/text_field_embedders/basic_text_field_embedder.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from typing import Dict, List
2+
import warnings
23

34
import torch
45
from overrides import overrides
@@ -91,16 +92,42 @@ def forward(self, text_field_input: Dict[str, torch.Tensor], num_wrapping_dims:
9192
# This is some unusual logic, it needs a custom from_params.
9293
@classmethod
9394
def from_params(cls, vocab: Vocabulary, params: Params) -> 'BasicTextFieldEmbedder': # type: ignore
94-
# pylint: disable=arguments-differ
95+
# pylint: disable=arguments-differ,bad-super-call
96+
97+
# The original `from_params` for this class was designed in a way that didn't agree
98+
# with the constructor. The constructor wants a 'token_embedders' parameter that is a
99+
# `Dict[str, TokenEmbedder]`, but the original `from_params` implementation expected those
100+
# key-value pairs to be top-level in the params object.
101+
#
102+
# This breaks our 'configuration wizard' and configuration checks. Hence, going forward,
103+
# the params need a 'token_embedders' key so that they line up with what the constructor wants.
104+
# For now, the old behavior is still supported, but produces a DeprecationWarning.
105+
95106
embedder_to_indexer_map = params.pop("embedder_to_indexer_map", None)
96107
if embedder_to_indexer_map is not None:
97108
embedder_to_indexer_map = embedder_to_indexer_map.as_dict(quiet=True)
98109
allow_unmatched_keys = params.pop_bool("allow_unmatched_keys", False)
99110

100-
token_embedders = {}
101-
keys = list(params.keys())
102-
for key in keys:
103-
embedder_params = params.pop(key)
104-
token_embedders[key] = TokenEmbedder.from_params(vocab=vocab, params=embedder_params)
111+
token_embedder_params = params.pop('token_embedders', None)
112+
113+
if token_embedder_params is not None:
114+
# New way: explicitly specified, so use it.
115+
token_embedders = {
116+
name: TokenEmbedder.from_params(subparams, vocab=vocab)
117+
for name, subparams in token_embedder_params.items()
118+
}
119+
120+
else:
121+
# Warn that the original behavior is deprecated
122+
warnings.warn(DeprecationWarning("the token embedders for BasicTextFieldEmbedder should now "
123+
"be specified as a dict under the 'token_embedders' key, "
124+
"not as top-level key-value pairs"))
125+
126+
token_embedders = {}
127+
keys = list(params.keys())
128+
for key in keys:
129+
embedder_params = params.pop(key)
130+
token_embedders[key] = TokenEmbedder.from_params(vocab=vocab, params=embedder_params)
131+
105132
params.assert_empty(cls.__name__)
106133
return cls(token_embedders, embedder_to_indexer_map, allow_unmatched_keys)

allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/elmo_in_text_field_embedder.json

+14-12
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,20 @@
1919
"model": {
2020
"type": "bcn",
2121
"text_field_embedder": {
22-
"tokens": {
23-
"type": "embedding",
24-
"embedding_dim": 10
25-
},
26-
# BCN configs should not have an elmo_token_embedder
27-
# in their text_field_embedder
28-
"elmo": {
29-
"type": "elmo_token_embedder",
30-
"options_file": "allennlp/tests/fixtures/elmo/options.json",
31-
"weight_file": "allennlp/tests/fixtures/elmo/lm_weights.hdf5",
32-
"do_layer_norm": false,
33-
"dropout": 0.5
22+
"token_embedders": {
23+
"tokens": {
24+
"type": "embedding",
25+
"embedding_dim": 10
26+
},
27+
# BCN configs should not have an elmo_token_embedder
28+
# in their text_field_embedder
29+
"elmo": {
30+
"type": "elmo_token_embedder",
31+
"options_file": "allennlp/tests/fixtures/elmo/options.json",
32+
"weight_file": "allennlp/tests/fixtures/elmo/lm_weights.hdf5",
33+
"do_layer_norm": false,
34+
"dropout": 0.5
35+
}
3436
}
3537
},
3638
"embedding_dropout": 0.2,

allennlp/tests/fixtures/biattentive_classification_network/broken_experiments/no_elmo_tokenizer_for_elmo.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
"model": {
2121
"type": "bcn",
2222
"text_field_embedder": {
23-
"tokens": {
24-
"type": "embedding",
25-
"embedding_dim": 10
23+
"token_embedders": {
24+
"tokens": {
25+
"type": "embedding",
26+
"embedding_dim": 10
27+
}
2628
}
2729
},
2830
"embedding_dropout": 0.2,

allennlp/tests/fixtures/biattentive_classification_network/elmo_experiment.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
"model": {
1818
"type": "bcn",
1919
"text_field_embedder": {
20-
"tokens": {
21-
"type": "embedding",
22-
"embedding_dim": 10
20+
"token_embedders": {
21+
"tokens": {
22+
"type": "embedding",
23+
"embedding_dim": 10
24+
}
2325
}
2426
},
2527
"embedding_dropout": 0.2,

allennlp/tests/fixtures/biattentive_classification_network/experiment.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
"model": {
1010
"type": "bcn",
1111
"text_field_embedder": {
12-
"tokens": {
13-
"type": "embedding",
14-
"embedding_dim": 10
12+
"token_embedders": {
13+
"tokens": {
14+
"type": "embedding",
15+
"embedding_dim": 10
16+
}
1517
}
1618
},
1719
"embedding_dropout": 0.2,

allennlp/tests/fixtures/biattentive_classification_network/feedforward_experiment.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
"model": {
1010
"type": "bcn",
1111
"text_field_embedder": {
12-
"tokens": {
13-
"type": "embedding",
14-
"embedding_dim": 10
12+
"token_embedders": {
13+
"tokens": {
14+
"type": "embedding",
15+
"embedding_dim": 10
16+
}
1517
}
1618
},
1719
"embedding_dropout": 0.2,

allennlp/tests/fixtures/biattentive_classification_network/output_only_elmo_experiment.json

+5-3
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717
"model": {
1818
"type": "bcn",
1919
"text_field_embedder": {
20-
"tokens": {
21-
"type": "embedding",
22-
"embedding_dim": 10
20+
"token_embedders": {
21+
"tokens": {
22+
"type": "embedding",
23+
"embedding_dim": 10
24+
}
2325
}
2426
},
2527
"embedding_dropout": 0.2,

allennlp/tests/fixtures/bidaf/experiment.json

+17-15
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,24 @@
1919
"model": {
2020
"type": "bidaf",
2121
"text_field_embedder": {
22-
"tokens": {
23-
"type": "embedding",
24-
"embedding_dim": 2,
25-
"trainable": false
26-
},
27-
"token_characters": {
28-
"type": "character_encoding",
29-
"embedding": {
30-
"num_embeddings": 260,
31-
"embedding_dim": 8
22+
"token_embedders": {
23+
"tokens": {
24+
"type": "embedding",
25+
"embedding_dim": 2,
26+
"trainable": false
3227
},
33-
"encoder": {
34-
"type": "cnn",
35-
"embedding_dim": 8,
36-
"num_filters": 8,
37-
"ngram_filter_sizes": [5]
28+
"token_characters": {
29+
"type": "character_encoding",
30+
"embedding": {
31+
"num_embeddings": 260,
32+
"embedding_dim": 8
33+
},
34+
"encoder": {
35+
"type": "cnn",
36+
"embedding_dim": 8,
37+
"num_filters": 8,
38+
"ngram_filter_sizes": [5]
39+
}
3840
}
3941
}
4042
},

allennlp/tests/fixtures/constituency_parser/constituency_parser.json

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
"model": {
99
"type": "constituency_parser",
1010
"text_field_embedder": {
11-
"tokens": {
12-
"type": "embedding",
13-
"embedding_dim": 2,
14-
"trainable": true
11+
"token_embedders": {
12+
"tokens": {
13+
"type": "embedding",
14+
"embedding_dim": 2,
15+
"trainable": true
16+
}
1517
}
1618
},
1719
"pos_tag_embedding": {

allennlp/tests/fixtures/constituency_parser/experiment.json

+6-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,12 @@
88
"model": {
99
"type": "constituency_parser",
1010
"text_field_embedder": {
11-
"tokens": {
12-
"type": "embedding",
13-
"embedding_dim": 2,
14-
"trainable": true
11+
"token_embedders": {
12+
"tokens": {
13+
"type": "embedding",
14+
"embedding_dim": 2,
15+
"trainable": true
16+
}
1517
}
1618
},
1719
"encoder": {

allennlp/tests/fixtures/coref/experiment.json

+19-17
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,26 @@
2020
"model": {
2121
"type": "coref",
2222
"text_field_embedder": {
23-
"tokens": {
24-
"type": "embedding",
25-
"embedding_dim": 10,
26-
"trainable": false
27-
},
28-
"token_characters": {
29-
"type": "character_encoding",
30-
"embedding": {
31-
"num_embeddings": 262,
32-
"embedding_dim": 8
23+
"token_embedders": {
24+
"tokens": {
25+
"type": "embedding",
26+
"embedding_dim": 10,
27+
"trainable": false
3328
},
34-
"encoder": {
35-
"type": "cnn",
36-
"embedding_dim": 8,
37-
"num_filters": 10,
38-
"ngram_filter_sizes": [5]
39-
},
40-
"dropout": 0.2
29+
"token_characters": {
30+
"type": "character_encoding",
31+
"embedding": {
32+
"num_embeddings": 262,
33+
"embedding_dim": 8
34+
},
35+
"encoder": {
36+
"type": "cnn",
37+
"embedding_dim": 8,
38+
"num_filters": 10,
39+
"ngram_filter_sizes": [5]
40+
},
41+
"dropout": 0.2
42+
}
4143
}
4244
},
4345
"context_layer": {

allennlp/tests/fixtures/crf_tagger/experiment.json

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"model": {
1818
"type": "crf_tagger",
1919
"text_field_embedder": {
20+
"token_embedders": {
2021
"tokens": {
2122
"type": "embedding",
2223
"embedding_dim": 50
@@ -35,6 +36,7 @@
3536
"bidirectional": true
3637
}
3738
}
39+
}
3840
},
3941
"encoder": {
4042
"type": "gru",

allennlp/tests/fixtures/decomposable_attention/experiment.json

+8-6
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
"model": {
1414
"type": "decomposable_attention",
1515
"text_field_embedder": {
16-
"tokens": {
17-
"type": "embedding",
18-
"projection_dim": 2,
19-
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.300d.sample.txt.gz",
20-
"embedding_dim": 300,
21-
"trainable": false
16+
"token_embedders": {
17+
"tokens": {
18+
"type": "embedding",
19+
"projection_dim": 2,
20+
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.300d.sample.txt.gz",
21+
"embedding_dim": 300,
22+
"trainable": false
23+
}
2224
}
2325
},
2426
"attend_feedforward": {

allennlp/tests/fixtures/elmo/config/characters_token_embedder.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"model": {
1818
"type": "crf_tagger",
1919
"text_field_embedder": {
20+
"token_embedders": {
2021
"tokens": {
2122
"type": "embedding",
2223
"embedding_dim": 50
@@ -25,7 +26,8 @@
2526
"type": "elmo_token_embedder",
2627
"options_file": "allennlp/tests/fixtures/elmo/options.json",
2728
"weight_file": "allennlp/tests/fixtures/elmo/lm_weights.hdf5",
28-
},
29+
}
30+
}
2931
},
3032
"encoder": {
3133
"type": "gru",

allennlp/tests/fixtures/esim/experiment.json

+8-6
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
"type": "esim",
1515
"dropout": 0.5,
1616
"text_field_embedder": {
17-
"tokens": {
18-
"type": "embedding",
19-
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.300d.sample.txt.gz",
20-
"embedding_dim": 300,
21-
"trainable": false,
22-
"projection_dim": 10,
17+
"token_embedders": {
18+
"tokens": {
19+
"type": "embedding",
20+
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.300d.sample.txt.gz",
21+
"embedding_dim": 300,
22+
"trainable": false,
23+
"projection_dim": 10,
24+
}
2325
}
2426
},
2527
"encoder": {

allennlp/tests/fixtures/simple_tagger/experiment.json

+8-6
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"model": {
66
"type": "simple_tagger",
77
"text_field_embedder": {
8-
"tokens": {
9-
"type": "embedding",
10-
"projection_dim": 2,
11-
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz",
12-
"embedding_dim": 100,
13-
"trainable": true
8+
"token_embedders": {
9+
"tokens": {
10+
"type": "embedding",
11+
"projection_dim": 2,
12+
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz",
13+
"embedding_dim": 100,
14+
"trainable": true
15+
}
1416
}
1517
},
1618
"encoder": {

allennlp/tests/fixtures/simple_tagger/experiment_with_regularization.json

+8-6
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"model": {
66
"type": "simple_tagger",
77
"text_field_embedder": {
8-
"tokens": {
9-
"type": "embedding",
10-
"projection_dim": 2,
11-
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz",
12-
"embedding_dim": 100,
13-
"trainable": true
8+
"token_embedders": {
9+
"tokens": {
10+
"type": "embedding",
11+
"projection_dim": 2,
12+
"pretrained_file": "allennlp/tests/fixtures/embeddings/glove.6B.100d.sample.txt.gz",
13+
"embedding_dim": 100,
14+
"trainable": true
15+
}
1416
}
1517
},
1618
"encoder": {

0 commit comments

Comments
 (0)