Adding decoder to bimpm and improve demo server. (#1665)

hzeng-otterai · matt-gardner · commit 6c7c807220be · 2018-08-27T09:32:00.000-07:00
* Adding decoder to bimpm and add model weights and overrides to flask server.

* Refine comments for pylint.
diff --git a/allennlp/models/bimpm.py b/allennlp/models/bimpm.py
@@ -6,6 +6,7 @@
 
 from overrides import overrides
 import torch
+import numpy
 
 from allennlp.common.checks import check_dimensions_match
 from allennlp.data import Vocabulary
@@ -185,8 +186,9 @@ def add_matching_result(matcher, encoded_premise, encoded_hypothesis):
 
         # the final forward layer
         logits = self.classifier_feedforward(torch.cat([aggregated_premise, aggregated_hypothesis], dim=-1))
+        probs = torch.nn.functional.softmax(logits, dim=-1)
 
-        output_dict = {'logits': logits}
+        output_dict = {'logits': logits, "probs": probs}
         if label is not None:
             loss = self.loss(logits, label)
             for metric in self.metrics.values():
@@ -195,6 +197,18 @@ def add_matching_result(matcher, encoded_premise, encoded_hypothesis):
 
         return output_dict
 
+    @overrides
+    def decode(self, output_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
+        """
+        Converts indices to string labels, and adds a ``"label"`` key to the result.
+        """
+        predictions = output_dict["probs"].cpu().data.numpy()
+        argmax_indices = numpy.argmax(predictions, axis=-1)
+        labels = [self.vocab.get_token_from_index(x, namespace="labels")
+                  for x in argmax_indices]
+        output_dict['label'] = labels
+        return output_dict
+
     @overrides
     def get_metrics(self, reset: bool = False) -> Dict[str, float]:
         return {metric_name: metric.get_metric(reset) for metric_name, metric in self.metrics.items()}
diff --git a/allennlp/service/server_simple.py b/allennlp/service/server_simple.py
@@ -133,6 +133,10 @@ def main(args):
 
     parser.add_argument('--archive-path', type=str, required=True, help='path to trained archive file')
     parser.add_argument('--predictor', type=str, required=True, help='name of predictor')
+    parser.add_argument('--weights-file', type=str,
+                        help='a path that overrides which weights file to use')
+    parser.add_argument('-o', '--overrides', type=str, default="",
+                        help='a JSON structure used to override the experiment configuration')
     parser.add_argument('--static-dir', type=str, help='serve index.html from this directory')
     parser.add_argument('--title', type=str, help='change the default page title', default="AllenNLP Demo")
     parser.add_argument('--field-name', type=str, action='append',
@@ -151,7 +155,7 @@ def main(args):
     for package_name in args.include_package:
         import_submodules(package_name)
 
-    archive = load_archive(args.archive_path)
+    archive = load_archive(args.archive_path, weights_file=args.weights_file, overrides=args.overrides)
     predictor = Predictor.from_archive(archive, args.predictor)
     field_names = args.field_name
 
diff --git a/allennlp/tests/models/bimpm_test.py b/allennlp/tests/models/bimpm_test.py
@@ -19,3 +19,9 @@ def test_model_can_train_save_and_load(self):
 
     def test_batch_predictions_are_consistent(self):
         self.ensure_batch_predictions_are_consistent()
+
+    def test_decode_runs_correctly(self):
+        training_tensors = self.dataset.as_tensor_dict()
+        output_dict = self.model(**training_tensors)
+        decode_output_dict = self.model.decode(output_dict)
+        assert "label" in decode_output_dict