Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit 37a078a

Browse files
authored
make things backward compatible with spacy 2.0 (#2644)
* make things backward compatible with spacy 2.0 * disable more tests
1 parent e79b713 commit 37a078a

File tree

4 files changed

+15
-2
lines changed

4 files changed

+15
-2
lines changed

allennlp/data/tokenizers/sentence_splitter.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from typing import List
22
from overrides import overrides
3+
4+
import spacy
5+
36
from allennlp.common import Registrable
47
from allennlp.common.util import get_spacy_model
58

@@ -44,8 +47,10 @@ def __init__(self,
4447
self.spacy = get_spacy_model(language, parse=not rule_based, ner=False, pos_tags=False)
4548
if rule_based:
4649
# we use `sentencizer`, a built-in spacy module for rule-based sentence boundary detection.
47-
if not self.spacy.has_pipe('sentencizer'):
48-
sbd = self.spacy.create_pipe('sentencizer')
50+
# depending on the spacy version, it could be called 'sentencizer' or 'sbd'
51+
sbd_name = 'sbd' if spacy.__version__ < '2.1' else 'sentencizer'
52+
if not self.spacy.has_pipe(sbd_name):
53+
sbd = self.spacy.create_pipe(sbd_name)
4954
self.spacy.add_pipe(sbd)
5055

5156
@overrides

allennlp/tests/data/dataset_readers/text_classification_json_test.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# pylint: disable=no-self-use,invalid-name
22
import pytest
3+
import spacy
34

45
from allennlp.data.dataset_readers import TextClassificationJsonReader
56
from allennlp.common.util import ensure_list
@@ -70,6 +71,7 @@ def test_read_from_file_ag_news_corpus_and_truncates_properly(self, lazy):
7071
assert [t.text for t in fields["tokens"].tokens] == instance3["tokens"]
7172
assert fields["label"].label == instance3["label"]
7273

74+
@pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
7375
@pytest.mark.parametrize("lazy", (True, False))
7476
def test_read_from_file_ag_news_corpus_and_segments_sentences_properly(self, lazy):
7577
reader = TextClassificationJsonReader(lazy=lazy, segment_sentences=True)

allennlp/tests/data/tokenizers/sentence_splitter_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# pylint: disable=no-self-use,invalid-name
2+
import pytest
3+
import spacy
24

35
from allennlp.common.testing import AllenNlpTestCase
46
from allennlp.data.tokenizers.sentence_splitter import SpacySentenceSplitter
@@ -18,6 +20,7 @@ def test_rule_based_splitter_passes_through_correctly(self):
1820
"Here's the '3rd' sentence - yes, it is.", "And yes; this is a fourth sentence?"]
1921
assert tokens == expected_tokens
2022

23+
@pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
2124
def test_dep_parse_splitter_passes_through_correctly(self):
2225
text = ("This is the first sentence. This is the second sentence! "
2326
"Here's the '3rd' sentence - yes, it is. And yes; this is a fourth sentence?")

allennlp/tests/models/sniff_test.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# pylint: disable=no-self-use,line-too-long
2+
import pytest
3+
import spacy
24

35
from allennlp.common.testing import AllenNlpTestCase
46
from allennlp import pretrained
@@ -104,6 +106,7 @@ def test_ner(self):
104106
assert result["words"] == ["Michael", "Jordan", "is", "a", "professor", "at", "Berkeley", "."]
105107
assert result["tags"] == ["B-PER", "L-PER", "O", "O", "O", "O", "U-LOC", "O"]
106108

109+
@pytest.mark.skipif(spacy.__version__ < "2.1", reason="this model changed from 2.0 to 2.1")
107110
def test_constituency_parsing(self):
108111
predictor = pretrained.span_based_constituency_parsing_with_elmo_joshi_2018()
109112

0 commit comments

Comments
 (0)