Skip to content
This repository was archived by the owner on Dec 16, 2022. It is now read-only.

Commit 5fc7a00

Browse files
authored
Fix SpanBasedF1Measure for tags without conll labels (#1491)
Fixes `SpanBasedF1Measure` to work if the tags don't have conll labels (i.e., the labels are simply `{B, I, O}`). The only thing that had to be changed was the check to see whether the `active_conll_tag` is not `None`, since `""` is `False`.
1 parent 01ddd12 commit 5fc7a00

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

allennlp/data/dataset_readers/dataset_utils/span_utils.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import List, Tuple, Callable, TypeVar
1+
from typing import Callable, List, Set, Tuple, TypeVar
22

33
from allennlp.data.dataset_readers.dataset_utils.ontonotes import TypedStringSpan
44
from allennlp.data.tokenizers.token import Token
@@ -70,7 +70,8 @@ def bio_tags_to_spans(tag_sequence: List[str],
7070
Spans are inclusive and can be of zero length, representing a single word span.
7171
Ill-formed spans are also included (i.e those which do not start with a "B-LABEL"),
7272
as otherwise it is possible to get a perfect precision score whilst still predicting
73-
ill-formed spans in addition to the correct spans.
73+
ill-formed spans in addition to the correct spans. This function works properly when
74+
the spans are unlabeled (i.e., your labels are simply "B", "I", and "O").
7475
7576
Parameters
7677
----------
@@ -87,7 +88,7 @@ def bio_tags_to_spans(tag_sequence: List[str],
8788
Note that the label `does not` contain any BIO tag prefixes.
8889
"""
8990
classes_to_ignore = classes_to_ignore or []
90-
spans = set()
91+
spans: Set[Tuple[str, Tuple[int, int]]] = set()
9192
span_start = 0
9293
span_end = 0
9394
active_conll_tag = None
@@ -99,7 +100,7 @@ def bio_tags_to_spans(tag_sequence: List[str],
99100
conll_tag = string_tag[2:]
100101
if bio_tag == "O" or conll_tag in classes_to_ignore:
101102
# The span has ended.
102-
if active_conll_tag:
103+
if active_conll_tag is not None:
103104
spans.add((active_conll_tag, (span_start, span_end)))
104105
active_conll_tag = None
105106
# We don't care about tags we are
@@ -108,7 +109,7 @@ def bio_tags_to_spans(tag_sequence: List[str],
108109
elif bio_tag == "B":
109110
# We are entering a new span; reset indices
110111
# and active tag to new span.
111-
if active_conll_tag:
112+
if active_conll_tag is not None:
112113
spans.add((active_conll_tag, (span_start, span_end)))
113114
active_conll_tag = conll_tag
114115
span_start = index
@@ -124,13 +125,13 @@ def bio_tags_to_spans(tag_sequence: List[str],
124125
# include this span. This is important, because otherwise,
125126
# a model may get a perfect F1 score whilst still including
126127
# false positive ill-formed spans.
127-
if active_conll_tag:
128+
if active_conll_tag is not None:
128129
spans.add((active_conll_tag, (span_start, span_end)))
129130
active_conll_tag = conll_tag
130131
span_start = index
131132
span_end = index
132133
# Last token might have been a part of a valid span.
133-
if active_conll_tag:
134+
if active_conll_tag is not None:
134135
spans.add((active_conll_tag, (span_start, span_end)))
135136
return list(spans)
136137

@@ -141,6 +142,8 @@ def bioul_tags_to_spans(tag_sequence: List[str],
141142
Given a sequence corresponding to BIOUL tags, extracts spans.
142143
Spans are inclusive and can be of zero length, representing a single word span.
143144
Ill-formed spans are not allowed and will raise ``InvalidTagSequence``.
145+
This function works properly when the spans are unlabeled (i.e., your labels are
146+
simply "B", "I", "O", "U", and "L").
144147
145148
Parameters
146149
----------

allennlp/tests/data/dataset_readers/dataset_utils/span_utils_test.py

+24
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,21 @@ def test_bio_tags_to_spans_extracts_correct_spans(self):
2424
assert set(spans) == {("ARG1", (1, 2)), ("ARG2", (5, 6)), ("ARG1", (7, 7)),
2525
("ARG1", (4, 4)), ("ARG2", (8, 9))}
2626

27+
def test_bio_tags_to_spans_extracts_correct_spans_without_labels(self):
28+
tag_sequence = ["O", "B", "I", "O", "B", "I", "B", "B"]
29+
spans = span_utils.bio_tags_to_spans(tag_sequence)
30+
assert set(spans) == {("", (1, 2)), ("", (4, 5)), ("", (6, 6)), ("", (7, 7))}
31+
32+
# Check that it raises when we use U- tags for single tokens.
33+
tag_sequence = ["O", "B", "I", "O", "B", "I", "U", "U"]
34+
with self.assertRaises(span_utils.InvalidTagSequence):
35+
spans = span_utils.bio_tags_to_spans(tag_sequence)
36+
37+
# Check that invalid BIO sequences are also handled as spans.
38+
tag_sequence = ["O", "B", "I", "O", "I", "B", "I", "B", "I", "I"]
39+
spans = span_utils.bio_tags_to_spans(tag_sequence)
40+
assert set(spans) == {('', (1, 2)), ('', (4, 4)), ('', (5, 6)), ('', (7, 9))}
41+
2742
def test_bio_tags_to_spans_ignores_specified_tags(self):
2843
tag_sequence = ["B-V", "I-V", "O", "B-ARG1", "I-ARG1",
2944
"O", "B-ARG2", "I-ARG2", "B-ARG1", "B-ARG2"]
@@ -66,6 +81,15 @@ def test_bioul_tags_to_spans(self):
6681
with self.assertRaises(span_utils.InvalidTagSequence):
6782
spans = span_utils.bioul_tags_to_spans(tag_sequence)
6883

84+
def test_bioul_tags_to_spans_without_labels(self):
85+
tag_sequence = ['B', 'I', 'L', 'U', 'U', 'O']
86+
spans = span_utils.bioul_tags_to_spans(tag_sequence)
87+
assert spans == [('', (0, 2)), ('', (3, 3)), ('', (4, 4))]
88+
89+
tag_sequence = ['B', 'I', 'O']
90+
with self.assertRaises(span_utils.InvalidTagSequence):
91+
spans = span_utils.bioul_tags_to_spans(tag_sequence)
92+
6993
def test_iob1_to_bioul(self):
7094
tag_sequence = ['I-ORG', 'O', 'I-MISC', 'O']
7195
bioul_sequence = span_utils.iob1_to_bioul(tag_sequence)

allennlp/training/metrics/span_based_f1_measure.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ class SpanBasedF1Measure(Metric):
2424
is not exactly the same as the perl script used to evaluate the CONLL 2005
2525
data - particularly, it does not consider continuations or reference spans
2626
as constituents of the original span. However, it is a close proxy, which
27-
can be helpful for judging model peformance during training.
27+
can be helpful for judging model peformance during training. This metric
28+
works properly when the spans are unlabeled (i.e., your labels are
29+
simply "B", "I", "O" if using the "BIO" label encoding).
30+
2831
"""
2932
def __init__(self,
3033
vocabulary: Vocabulary,

0 commit comments

Comments
 (0)