make things backward compatible with spacy 2.0 (#2644)

joelgrus · web-flow · commit 37a078afd661 · 2019-03-26T14:05:57.000-07:00
* make things backward compatible with spacy 2.0

* disable more tests
diff --git a/allennlp/data/tokenizers/sentence_splitter.py b/allennlp/data/tokenizers/sentence_splitter.py
@@ -1,5 +1,8 @@
 from typing import List
 from overrides import overrides
+
+import spacy
+
 from allennlp.common import Registrable
 from allennlp.common.util import get_spacy_model
 
@@ -44,8 +47,10 @@ def __init__(self,
         self.spacy = get_spacy_model(language, parse=not rule_based, ner=False, pos_tags=False)
         if rule_based:
             # we use `sentencizer`, a built-in spacy module for rule-based sentence boundary detection.
-            if not self.spacy.has_pipe('sentencizer'):
-                sbd = self.spacy.create_pipe('sentencizer')
+            # depending on the spacy version, it could be called 'sentencizer' or 'sbd'
+            sbd_name = 'sbd' if spacy.__version__ < '2.1' else 'sentencizer'
+            if not self.spacy.has_pipe(sbd_name):
+                sbd = self.spacy.create_pipe(sbd_name)
                 self.spacy.add_pipe(sbd)
 
     @overrides
diff --git a/allennlp/tests/data/dataset_readers/text_classification_json_test.py b/allennlp/tests/data/dataset_readers/text_classification_json_test.py
@@ -1,5 +1,6 @@
 # pylint: disable=no-self-use,invalid-name
 import pytest
+import spacy
 
 from allennlp.data.dataset_readers import TextClassificationJsonReader
 from allennlp.common.util import ensure_list
@@ -70,6 +71,7 @@ def test_read_from_file_ag_news_corpus_and_truncates_properly(self, lazy):
         assert [t.text for t in fields["tokens"].tokens] == instance3["tokens"]
         assert fields["label"].label == instance3["label"]
 
+    @pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
     @pytest.mark.parametrize("lazy", (True, False))
     def test_read_from_file_ag_news_corpus_and_segments_sentences_properly(self, lazy):
         reader = TextClassificationJsonReader(lazy=lazy, segment_sentences=True)
diff --git a/allennlp/tests/data/tokenizers/sentence_splitter_test.py b/allennlp/tests/data/tokenizers/sentence_splitter_test.py
@@ -1,4 +1,6 @@
 # pylint: disable=no-self-use,invalid-name
+import pytest
+import spacy
 
 from allennlp.common.testing import AllenNlpTestCase
 from allennlp.data.tokenizers.sentence_splitter import SpacySentenceSplitter
@@ -18,6 +20,7 @@ def test_rule_based_splitter_passes_through_correctly(self):
                            "Here's the '3rd' sentence - yes, it is.", "And yes; this is a fourth sentence?"]
         assert tokens == expected_tokens
 
+    @pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
     def test_dep_parse_splitter_passes_through_correctly(self):
         text = ("This is the first sentence. This is the second sentence! "
                 "Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")
diff --git a/allennlp/tests/models/sniff_test.py b/allennlp/tests/models/sniff_test.py
@@ -1,4 +1,6 @@
 # pylint: disable=no-self-use,line-too-long
+import pytest
+import spacy
 
 from allennlp.common.testing import AllenNlpTestCase
 from allennlp import pretrained
@@ -104,6 +106,7 @@ def test_ner(self):
         assert result["words"] == ["Michael", "Jordan", "is", "a", "professor", "at", "Berkeley", "."]
         assert result["tags"] == ["B-PER", "L-PER", "O", "O", "O", "O", "U-LOC", "O"]
 
+    @pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
     def test_constituency_parsing(self):
         predictor = pretrained.span_based_constituency_parsing_with_elmo_joshi_2018()