allenai
diff --git a/‎allennlp/data/dataset_readers/__init__.py
+1 b/‎allennlp/data/dataset_readers/__init__.py
+1
diff --git a/‎allennlp/data/dataset_readers/quora_paraphrase.py
+73 b/‎allennlp/data/dataset_readers/quora_paraphrase.py
+73
diff --git a/‎allennlp/models/__init__.py
+1 b/‎allennlp/models/__init__.py
+1
diff --git a/‎allennlp/models/bimpm.py
+200 b/‎allennlp/models/bimpm.py
+200
diff --git a/‎allennlp/modules/__init__.py
+1 b/‎allennlp/modules/__init__.py
+1
@@ -24,3 +24,4 @@
 from allennlp.data.dataset_readers.stanford_sentiment_tree_bank import (
         StanfordSentimentTreeBankDatasetReader)
 from allennlp.data.dataset_readers.wikitables import WikiTablesDatasetReader
+from allennlp.data.dataset_readers.quora_paraphrase import QuoraParaphraseDatasetReader
@@ -0,0 +1,73 @@
+from typing import Dict
+import logging
+import csv
+
+from overrides import overrides
+
+from allennlp.common.file_utils import cached_path
+from allennlp.data.dataset_readers.dataset_reader import DatasetReader
+from allennlp.data.fields import LabelField, TextField, Field
+from allennlp.data.instance import Instance
+from allennlp.data.tokenizers import Tokenizer, WordTokenizer
+from allennlp.data.tokenizers.word_splitter import JustSpacesWordSplitter
+from allennlp.data.token_indexers import TokenIndexer, SingleIdTokenIndexer
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
+@DatasetReader.register("quora_paraphrase")
+class QuoraParaphraseDatasetReader(DatasetReader):
+    """
+    Reads a file from the Quora Paraphrase dataset. The train/validation/test split of the data
+    comes from the paper `Bilateral Multi-Perspective Matching for Natural Language Sentences
+    <https://arxiv.org/abs/1702.03814>`_ by Zhiguo Wang et al., 2017. Each file of the data
+    is a tsv file without header. The columns are is_duplicate, question1, question2, and id.
+    All questions are pre-tokenized and tokens are space separated. We convert these keys into
+    fields named "label", "premise" and "hypothesis", so that it is compatible to some existing
+    natural language inference algorithms.
+
+    Parameters
+    ----------
+    lazy : ``bool`` (optional, default=False)
+        Passed to ``DatasetReader``.  If this is ``True``, training will start sooner, but will
+        take longer per batch.  This also allows training with datasets that are too large to fit
+        in memory.
+    tokenizer : ``Tokenizer``, optional
+        Tokenizer to use to split the premise and hypothesis into words or other kinds of tokens.
+        Defaults to ``WordTokenizer(JustSpacesWordSplitter())``.
+    token_indexers : ``Dict[str, TokenIndexer]``, optional
+        Indexers used to define input token representations. Defaults to ``{"tokens":
+        SingleIdTokenIndexer()}``.
+    """
+    def __init__(self,
+                 lazy: bool = False,
+                 tokenizer: Tokenizer = None,
+                 token_indexers: Dict[str, TokenIndexer] = None) -> None:
+        super().__init__(lazy)
+        self._tokenizer = tokenizer or WordTokenizer(JustSpacesWordSplitter())
+        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
+
+    @overrides
+    def _read(self, file_path):
+        logger.info("Reading instances from lines in file at: %s", file_path)
+        with open(cached_path(file_path), "r") as data_file:
+            tsv_in = csv.reader(data_file, delimiter='\t')
+            for row in tsv_in:
+                if len(row) == 4:
+                    yield self.text_to_instance(premise=row[1], hypothesis=row[2], label=row[0])
+
+    @overrides
+    def text_to_instance(self,  # type: ignore
+                         premise: str,
+                         hypothesis: str,
+                         label: str = None) -> Instance:
+        # pylint: disable=arguments-differ
+        fields: Dict[str, Field] = {}
+        tokenized_premise = self._tokenizer.tokenize(premise)
+        tokenized_hypothesis = self._tokenizer.tokenize(hypothesis)
+        fields["premise"] = TextField(tokenized_premise, self._token_indexers)
+        fields["hypothesis"] = TextField(tokenized_hypothesis, self._token_indexers)
+        if label is not None:
+            fields['label'] = LabelField(label)
+
+        return Instance(fields)
@@ -20,3 +20,4 @@
 from allennlp.models.semantic_role_labeler import SemanticRoleLabeler
 from allennlp.models.simple_tagger import SimpleTagger
 from allennlp.models.esim import ESIM
+from allennlp.models.bimpm import BiMpm
@@ -0,0 +1,200 @@
+"""
+BiMPM (Bilateral Multi-Perspective Matching) model implementation.
+"""
+
+from typing import Dict, Optional, List, Any
+
+from overrides import overrides
+import torch
+
+from allennlp.common.checks import check_dimensions_match
+from allennlp.data import Vocabulary
+from allennlp.modules import FeedForward, Seq2SeqEncoder, Seq2VecEncoder, TextFieldEmbedder
+from allennlp.models.model import Model
+from allennlp.nn import InitializerApplicator, RegularizerApplicator
+from allennlp.nn import util
+from allennlp.training.metrics import CategoricalAccuracy
+
+from allennlp.modules.bimpm_matching import BiMpmMatching
+
+
+@Model.register("bimpm")
+class BiMpm(Model):
+    """
+    This ``Model`` implements BiMPM model described in `Bilateral Multi-Perspective Matching
+    for Natural Language Sentences <https://arxiv.org/abs/1702.03814>`_ by Zhiguo Wang et al., 2017.
+    Also please refer to the `TensorFlow implementation <https://github.com/zhiguowang/BiMPM/>`_ and
+    `PyTorch implementation <https://github.com/galsang/BIMPM-pytorch>`_.
+
+    Parameters
+    ----------
+    vocab : ``Vocabulary``
+    text_field_embedder : ``TextFieldEmbedder``
+        Used to embed the ``premise`` and ``hypothesis`` ``TextFields`` we get as input to the
+        model.
+    matcher_word : ``BiMpmMatching``
+        BiMPM matching on the output of word embeddings of premise and hypothesis.
+    encoder1 : ``Seq2SeqEncoder``
+        First encoder layer for the premise and hypothesis
+    matcher_forward1 : ``BiMPMMatching``
+        BiMPM matching for the forward output of first encoder layer
+    matcher_backward1 : ``BiMPMMatching``
+        BiMPM matching for the backward output of first encoder layer
+    encoder2 : ``Seq2SeqEncoder``
+        Second encoder layer for the premise and hypothesis
+    matcher_forward2 : ``BiMPMMatching``
+        BiMPM matching for the forward output of second encoder layer
+    matcher_backward2 : ``BiMPMMatching``
+        BiMPM matching for the backward output of second encoder layer
+    aggregator : ``Seq2VecEncoder``
+        Aggregator of all BiMPM matching vectors
+    classifier_feedforward : ``FeedForward``
+        Fully connected layers for classification.
+    dropout : ``float``, optional (default=0.1)
+        Dropout percentage to use.
+    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
+        If provided, will be used to initialize the model parameters.
+    regularizer : ``RegularizerApplicator``, optional (default=``None``)
+        If provided, will be used to calculate the regularization penalty during training.
+    """
+    def __init__(self, vocab: Vocabulary,
+                 text_field_embedder: TextFieldEmbedder,
+                 matcher_word: BiMpmMatching,
+                 encoder1: Seq2SeqEncoder,
+                 matcher_forward1: BiMpmMatching,
+                 matcher_backward1: BiMpmMatching,
+                 encoder2: Seq2SeqEncoder,
+                 matcher_forward2: BiMpmMatching,
+                 matcher_backward2: BiMpmMatching,
+                 aggregator: Seq2VecEncoder,
+                 classifier_feedforward: FeedForward,
+                 dropout: float = 0.1,
+                 initializer: InitializerApplicator = InitializerApplicator(),
+                 regularizer: Optional[RegularizerApplicator] = None) -> None:
+        super(BiMpm, self).__init__(vocab, regularizer)
+
+        self.text_field_embedder = text_field_embedder
+
+        self.matcher_word = matcher_word
+
+        self.encoder1 = encoder1
+        self.matcher_forward1 = matcher_forward1
+        self.matcher_backward1 = matcher_backward1
+
+        self.encoder2 = encoder2
+        self.matcher_forward2 = matcher_forward2
+        self.matcher_backward2 = matcher_backward2
+
+        self.aggregator = aggregator
+
+        matching_dim = self.matcher_word.get_output_dim() + \
+                       self.matcher_forward1.get_output_dim() + self.matcher_backward1.get_output_dim() + \
+                       self.matcher_forward2.get_output_dim() + self.matcher_backward2.get_output_dim()
+
+        check_dimensions_match(matching_dim, self.aggregator.get_input_dim(),
+                               "sum of dim of all matching layers", "aggregator input dim")
+
+        self.classifier_feedforward = classifier_feedforward
+
+        self.dropout = torch.nn.Dropout(dropout)
+
+        self.metrics = {"accuracy": CategoricalAccuracy()}
+
+        self.loss = torch.nn.CrossEntropyLoss()
+
+        initializer(self)
+
+    @overrides
+    def forward(self,  # type: ignore
+                premise: Dict[str, torch.LongTensor],
+                hypothesis: Dict[str, torch.LongTensor],
+                label: torch.LongTensor = None,
+                metadata: List[Dict[str, Any]] = None  # pylint:disable=unused-argument
+               ) -> Dict[str, torch.Tensor]:
+        # pylint: disable=arguments-differ
+        """
+
+        Parameters
+        ----------
+        premise : Dict[str, torch.LongTensor]
+            The premise from a ``TextField``
+        hypothesis : Dict[str, torch.LongTensor]
+            The hypothesis from a ``TextField``
+        label : torch.LongTensor, optional (default = None)
+            The label for the pair of the premise and the hypothesis
+        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
+            Additional information about the pair
+        Returns
+        -------
+        An output dictionary consisting of:
+
+        logits : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_labels)`` representing unnormalised log
+            probabilities of the entailment label.
+        loss : torch.FloatTensor, optional
+            A scalar loss to be optimised.
+        """
+
+        mask_premise = util.get_text_field_mask(premise)
+        mask_hypothesis = util.get_text_field_mask(hypothesis)
+
+        # embedding and encoding of the premise
+        embedded_premise = self.dropout(self.text_field_embedder(premise))
+        encoded_premise1 = self.dropout(self.encoder1(embedded_premise, mask_premise))
+        encoded_premise2 = self.dropout(self.encoder2(encoded_premise1, mask_premise))
+
+        # embedding and encoding of the hypothesis
+        embedded_hypothesis = self.dropout(self.text_field_embedder(hypothesis))
+        encoded_hypothesis1 = self.dropout(self.encoder1(embedded_hypothesis, mask_hypothesis))
+        encoded_hypothesis2 = self.dropout(self.encoder2(encoded_hypothesis1, mask_hypothesis))
+
+        matching_vector_premise: List[torch.Tensor] = []
+        matching_vector_hypothesis: List[torch.Tensor] = []
+
+        def add_matching_result(matcher, encoded_premise, encoded_hypothesis):
+            # utility function to get matching result and add to the result list
+            matching_result = matcher(encoded_premise, mask_premise, encoded_hypothesis, mask_hypothesis)
+            matching_vector_premise.extend(matching_result[0])
+            matching_vector_hypothesis.extend(matching_result[1])
+
+        # calculate matching vectors from word embedding, first layer encoding, and second layer encoding
+        add_matching_result(self.matcher_word, embedded_premise, embedded_hypothesis)
+        half_hidden_size_1 = self.encoder1.get_output_dim() // 2
+        add_matching_result(self.matcher_forward1,
+                            encoded_premise1[:, :, :half_hidden_size_1],
+                            encoded_hypothesis1[:, :, :half_hidden_size_1])
+        add_matching_result(self.matcher_backward1,
+                            encoded_premise1[:, :, half_hidden_size_1:],
+                            encoded_hypothesis1[:, :, half_hidden_size_1:])
+
+        half_hidden_size_2 = self.encoder2.get_output_dim() // 2
+        add_matching_result(self.matcher_forward2,
+                            encoded_premise2[:, :, :half_hidden_size_2],
+                            encoded_hypothesis2[:, :, :half_hidden_size_2])
+        add_matching_result(self.matcher_backward2,
+                            encoded_premise2[:, :, half_hidden_size_2:],
+                            encoded_hypothesis2[:, :, half_hidden_size_2:])
+
+        # concat the matching vectors
+        matching_vector_cat_premise = self.dropout(torch.cat(matching_vector_premise, dim=2))
+        matching_vector_cat_hypothesis = self.dropout(torch.cat(matching_vector_hypothesis, dim=2))
+
+        # aggregate the matching vectors
+        aggregated_premise = self.dropout(self.aggregator(matching_vector_cat_premise, mask_premise))
+        aggregated_hypothesis = self.dropout(self.aggregator(matching_vector_cat_hypothesis, mask_hypothesis))
+
+        # the final forward layer
+        logits = self.classifier_feedforward(torch.cat([aggregated_premise, aggregated_hypothesis], dim=-1))
+
+        output_dict = {'logits': logits}
+        if label is not None:
+            loss = self.loss(logits, label)
+            for metric in self.metrics.values():
+                metric(logits, label)
+            output_dict["loss"] = loss
+
+        return output_dict
+
+    @overrides
+    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
+        return {metric_name: metric.get_metric(reset) for metric_name, metric in self.metrics.items()}
@@ -22,3 +22,4 @@
 from allennlp.modules.matrix_attention import MatrixAttention
 from allennlp.modules.attention import Attention
 from allennlp.modules.input_variational_dropout import InputVariationalDropout
+from allennlp.modules.bimpm_matching import BiMpmMatching