Skip to content

Commit 5d79edb

Browse files
authored
Merge pull request #17 from makcedward/dev
Dev
2 parents 49e5166 + b32440c commit 5d79edb

28 files changed

+195
-126
lines changed

CHANGE.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
NLPAUG Change Log
22
================
33

4+
**0.0.6** Jul 29, 2019:
5+
- Added new augmenter [TF-IDF based word replacement augmenter](https://arxiv.org/pdf/1904.12848.pdf)(TfIdfAug)
6+
- Added new augmenter [Spelling mistake simulation augmenter](https://arxiv.org/pdf/1711.02173.pdf)(SpellingAug)
7+
- Added new augmenter [Stopword Dropout augmenter](https://arxiv.org/pdf/1809.02079.pdf)(StopWordsAug)
8+
- Fixed [#14](https://github.com/makcedward/nlpaug/issues/14)
9+
410
**0.0.5** Jul 2, 2019:
511
- Fixed [#3](https://github.com/makcedward/nlpaug/issues/3), [#4](https://github.com/makcedward/nlpaug/issues/4), [#5](https://github.com/makcedward/nlpaug/issues/5), [#7](https://github.com/makcedward/nlpaug/issues/7), [#10](https://github.com/makcedward/nlpaug/issues/10)
612

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ Download word2vec or GloVe files if you use `Word2VecAug`, `GloVeAug` or `Fastte
7575

7676
## Recent Changes
7777

78-
**BETA** Jul 22, 2019:
78+
**0.0.6** Jul 29, 2019:
7979
- Added new augmenter [TF-IDF based word replacement augmenter](https://arxiv.org/pdf/1904.12848.pdf)(TfIdfAug)
8080
- Added new augmenter [Spelling mistake simulation augmenter](https://arxiv.org/pdf/1711.02173.pdf)(SpellingAug)
8181
- Added new augmenter [Stopword Dropout augmenter](https://arxiv.org/pdf/1809.02079.pdf)(StopWordsAug)

nlpaug/augmenter/char/char_augmenter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,19 @@
44

55

66
class CharAugmenter(Augmenter):
7-
def __init__(self, action, name='Char_Aug', aug_min=1, min_char=2, aug_char_p=0.3, aug_word_p=0.3, tokenizer=None, stopwords=[],
7+
def __init__(self, action, name='Char_Aug', aug_min=1, min_char=2, aug_char_p=0.3, aug_word_p=0.3,
8+
tokenizer=None, reverse_tokenizer=None, stopwords=[],
89
verbose=0):
9-
super(CharAugmenter, self).__init__(
10+
super().__init__(
1011
name=name, method=Method.CHAR, action=action, aug_min=aug_min, verbose=verbose)
1112
self.aug_p = None
1213
self.aug_char_p = aug_char_p
1314
self.aug_word_p = aug_word_p
1415
self.min_char = min_char
1516
if tokenizer is not None:
1617
self.tokenizer = tokenizer
18+
if reverse_tokenizer is not None:
19+
self.reverse_tokenizer = reverse_tokenizer
1720
self.stopwords = stopwords
1821

1922
def tokenizer(self, text):

nlpaug/augmenter/char/ocr.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44

55
class OcrAug(CharAugmenter):
6-
def __init__(self, name='OCR_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3, stopwords=[], verbose=0):
6+
def __init__(self, name='OCR_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3, stopwords=[],
7+
tokenizer=None, reverse_tokenizer=None, verbose=0):
78
"""
89
Simulate OCR error on input text.
910
@@ -16,9 +17,9 @@ def __init__(self, name='OCR_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3, st
1617
:param stopwords: List of words which will be skipped from augment operation.
1718
:param verbose: Verbosity mode.
1819
"""
19-
super(OcrAug, self).__init__(
20+
super().__init__(
2021
action=Action.SUBSTITUTE, name=name, aug_char_p=aug_char_p, aug_word_p=aug_word_p, aug_min=aug_min,
21-
tokenizer=None, stopwords=stopwords, verbose=verbose)
22+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, stopwords=stopwords, verbose=verbose)
2223

2324
self.model = self.get_model()
2425

nlpaug/augmenter/char/qwerty.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66

77
class QwertyAug(CharAugmenter):
8-
def __init__(self, name='Qwerty_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3, stopwords=[], verbose=0):
8+
def __init__(self, name='Qwerty_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3, stopwords=[],
9+
tokenizer=None, reverse_tokenizer=None, verbose=0):
910
"""
1011
Simulate keyboard typo error on input text.
1112
@@ -19,9 +20,9 @@ def __init__(self, name='Qwerty_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3,
1920
:param verbose: Verbosity mode.
2021
"""
2122

22-
super(QwertyAug, self).__init__(
23+
super().__init__(
2324
action=Action.SUBSTITUTE, name=name, aug_char_p=aug_char_p, aug_word_p=aug_word_p, aug_min=aug_min,
24-
tokenizer=None, stopwords=stopwords, verbose=verbose)
25+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, stopwords=stopwords, verbose=verbose)
2526

2627
self.model = self.get_model()
2728

nlpaug/augmenter/char/random.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class RandomCharAug(CharAugmenter):
88
def __init__(self, action=Action.SUBSTITUTE, name='RandomChar_Aug', aug_min=1, aug_char_p=0.3, aug_word_p=0.3,
99
include_upper_case=True, include_lower_case=True, include_numeric=True,
10-
spec_char='!@#$%^&*()_+', stopwords=[], verbose=0):
10+
spec_char='!@#$%^&*()_+', stopwords=[], tokenizer=None, reverse_tokenizer=None, verbose=0):
1111
"""
1212
Apply random augment operation on input text
1313
@@ -26,9 +26,9 @@ def __init__(self, action=Action.SUBSTITUTE, name='RandomChar_Aug', aug_min=1, a
2626
:param verbose: Verbosity mode.
2727
"""
2828

29-
super(RandomCharAug, self).__init__(
29+
super().__init__(
3030
action=action, name=name, aug_char_p=aug_char_p, aug_word_p=aug_word_p, aug_min=aug_min,
31-
tokenizer=None, stopwords=stopwords, verbose=verbose)
31+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, stopwords=stopwords, verbose=verbose)
3232

3333
self.include_upper_case = include_upper_case
3434
self.include_lower_case = include_lower_case

nlpaug/augmenter/word/bert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def init_bert_model(model_path, tokenizer_path, force_reload=False):
2727
class BertAug(WordAugmenter):
2828
def __init__(self, model_path='bert-base-uncased', tokenizer_path='bert-base-uncased', action=Action.SUBSTITUTE,
2929
name='Bert_Aug', aug_min=1, aug_p=0.3, aug_n=5, stopwords=[], verbose=0):
30-
super(BertAug, self).__init__(
30+
super().__init__(
3131
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=None, stopwords=stopwords,
3232
verbose=verbose)
3333
self.model_path = model_path

nlpaug/augmenter/word/fasttext.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ def init_fasttext_model(model_path, force_reload=False):
2626

2727
class FasttextAug(WordEmbsAugmenter):
2828
def __init__(self, model_path='.', model=None, action=Action.SUBSTITUTE,
29-
name='Fasttext_Aug', aug_min=1, aug_p=0.3, aug_n=5, tokenizer=None, stopwords=[], force_reload=False,
29+
name='Fasttext_Aug', aug_min=1, aug_p=0.3, aug_n=5, stopwords=[],
30+
tokenizer=None, reverse_tokenizer=None, force_reload=False,
3031
verbose=0):
3132
super().__init__(
3233
model_path=model_path, aug_n=aug_n,
33-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
34-
verbose=verbose)
34+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
35+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
3536

3637
if model is None:
3738
self.model = self.get_model(force_reload=force_reload)

nlpaug/augmenter/word/glove.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ def init_glove_model(model_path, force_reload=False):
2626

2727
class GloVeAug(WordEmbsAugmenter):
2828
def __init__(self, model_path='.', model=None, action=Action.SUBSTITUTE,
29-
name='GloVe_Aug', aug_min=1, aug_p=0.3, aug_n=5, tokenizer=None, stopwords=[], force_reload=False,
29+
name='GloVe_Aug', aug_min=1, aug_p=0.3, aug_n=5, stopwords=[],
30+
tokenizer=None, reverse_tokenizer=None, force_reload=False,
3031
verbose=0):
31-
super(GloVeAug, self).__init__(
32+
super().__init__(
3233
model_path=model_path, aug_n=aug_n,
33-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
34-
verbose=verbose)
34+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
35+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
3536

3637
if model is None:
3738
self.model = self.get_model(force_reload=force_reload)

nlpaug/augmenter/word/random.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44

55
class RandomWordAug(WordAugmenter):
6-
def __init__(self, action=Action.DELETE, name='RandomWord_Aug', aug_min=1, aug_p=0.3, tokenizer=None, stopwords=[],
7-
verbose=0):
8-
super(RandomWordAug, self).__init__(
9-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
10-
verbose=verbose)
6+
def __init__(self, action=Action.DELETE, name='RandomWord_Aug', aug_min=1, aug_p=0.3, stopwords=[],
7+
tokenizer=None, reverse_tokenizer=None, verbose=0):
8+
super().__init__(
9+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
10+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
1111

1212
def swap(self, text):
1313
"""

nlpaug/augmenter/word/spelling.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ def init_spelling_error_model(dict_path, include_reverse, force_reload=False):
2020

2121

2222
class SpellingAug(WordAugmenter):
23-
def __init__(self, dict_path, name='Spelling_Aug', aug_min=1, aug_p=0.3, tokenizer=None, stopwords=[],
24-
include_reverse=True, verbose=0):
23+
def __init__(self, dict_path, name='Spelling_Aug', aug_min=1, aug_p=0.3, stopwords=[],
24+
tokenizer=None, reverse_tokenizer=None, include_reverse=True, verbose=0):
2525
super().__init__(
26-
action=Action.SUBSTITUTE, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
27-
verbose=verbose)
26+
action=Action.SUBSTITUTE, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
27+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
2828

2929
self.dict_path = dict_path
3030
self.include_reverse = include_reverse

nlpaug/augmenter/word/stopwords.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33

44

55
class StopWordsAug(WordAugmenter):
6-
def __init__(self, stopwords, action=Action.DELETE, name='StopWords_Aug', aug_min=1, aug_p=0.3, tokenizer=None,
7-
case_sensitive=False, verbose=0):
6+
def __init__(self, stopwords, action=Action.DELETE, name='StopWords_Aug', aug_min=1, aug_p=0.3,
7+
tokenizer=None, reverse_tokenizer=None, case_sensitive=False, verbose=0):
88

99
if not case_sensitive:
1010
stopwords = [t.lower() for t in stopwords]
1111

1212
super().__init__(
13-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
14-
verbose=verbose)
13+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
14+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
1515

1616
self.case_sensitive = case_sensitive
1717

nlpaug/augmenter/word/tfidf.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ def init_tfidf_model(model_path, force_reload=False):
2323

2424
class TfIdfAug(WordAugmenter):
2525
def __init__(self, model_path='.', action=Action.SUBSTITUTE,
26-
name='TfIdf_Aug', aug_min=1, aug_p=0.3, aug_n=5, tokenizer=None, n_gram_separator='_',
27-
stopwords=[], verbose=0):
26+
name='TfIdf_Aug', aug_min=1, aug_p=0.3, aug_n=5, n_gram_separator='_',
27+
stopwords=[], tokenizer=None, reverse_tokenizer=None, verbose=0):
2828
super().__init__(
29-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
30-
verbose=verbose)
29+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
30+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
3131
self.model_path = model_path
3232
self.aug_n = aug_n
3333
self.model = self.get_model(force_reload=False)

nlpaug/augmenter/word/word2vec.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,12 @@ def init_word2vec_model(model_path, force_reload=False):
2525

2626
class Word2vecAug(WordEmbsAugmenter):
2727
def __init__(self, model_path='.', model=None, action=Action.SUBSTITUTE,
28-
name='Word2vec_Aug', aug_min=1, aug_p=0.3, aug_n=5, tokenizer=None, stopwords=[], force_reload=False,
29-
verbose=0):
30-
super(Word2vecAug, self).__init__(
28+
name='Word2vec_Aug', aug_min=1, aug_p=0.3, aug_n=5, stopwords=[],
29+
tokenizer=None, reverse_tokenizer=None, force_reload=False, verbose=0):
30+
super().__init__(
3131
model_path=model_path, aug_n=aug_n,
32-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
33-
verbose=verbose)
32+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
33+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
3434

3535
if model is None:
3636
self.model = self.get_model(force_reload=force_reload)

nlpaug/augmenter/word/word_augmenter.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55

66
class WordAugmenter(Augmenter):
7-
def __init__(self, action, name='Word_Aug', aug_min=1, aug_p=0.3, tokenizer=None, stopwords=[], verbose=0):
8-
super(WordAugmenter, self).__init__(
7+
def __init__(self, action, name='Word_Aug', aug_min=1, aug_p=0.3, stopwords=[],
8+
tokenizer=None, reverse_tokenizer=None, verbose=0):
9+
super().__init__(
910
name=name, method=Method.WORD, action=action, aug_min=aug_min, verbose=verbose)
1011
self.aug_p = aug_p
1112
if tokenizer is not None:
1213
self.tokenizer = tokenizer
14+
if reverse_tokenizer is not None:
15+
self.reverse_tokenizer = reverse_tokenizer
1316
self.stopwords = stopwords
1417

1518
def tokenizer(self, text):

nlpaug/augmenter/word/word_embs_aug.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
class WordEmbsAugmenter(WordAugmenter):
1010
def __init__(self, model_path='.', action=Action.SUBSTITUTE,
11-
name='WordEmbs_Aug', aug_min=1, aug_p=0.3, aug_n=5, tokenizer=None, n_gram_separator='_',
12-
stopwords=[], verbose=0):
13-
super(WordEmbsAugmenter, self).__init__(
14-
action=action, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
15-
verbose=verbose)
11+
name='WordEmbs_Aug', aug_min=1, aug_p=0.3, aug_n=5, n_gram_separator='_',
12+
stopwords=[], tokenizer=None, reverse_tokenizer=None, verbose=0):
13+
super().__init__(
14+
action=action, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
15+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
1616
self.model_path = model_path
1717
self.aug_n = aug_n
1818
self.model = self.get_model(force_reload=False)

nlpaug/augmenter/word/wordnet.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66

77

88
class WordNetAug(WordAugmenter):
9-
def __init__(self, name='WordNet_Aug', aug_min=1, aug_p=0.3, lang='eng', tokenizer=None, stopwords=[], verbose=0):
9+
def __init__(self, name='WordNet_Aug', aug_min=1, aug_p=0.3, lang='eng', stopwords=[],
10+
tokenizer=None, reverse_tokenizer=None, verbose=0):
1011
super().__init__(
11-
action=Action.SUBSTITUTE, name=name, aug_p=aug_p, aug_min=aug_min, tokenizer=tokenizer, stopwords=stopwords,
12-
verbose=verbose)
12+
action=Action.SUBSTITUTE, name=name, aug_p=aug_p, aug_min=aug_min, stopwords=stopwords,
13+
tokenizer=tokenizer, reverse_tokenizer=reverse_tokenizer, verbose=verbose)
1314

1415
self.model = self.get_model()
1516
self.lang = lang

nlpaug/util/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from nlpaug.util.method import *
44
from nlpaug.util.exception import *
55
from nlpaug.util.math import *
6-
6+
from nlpaug.util.text import *
77

88
from nlpaug.util.part_of_speech import *
99

nlpaug/util/text/__init__.py

Whitespace-only changes.

nlpaug/util/text/tokenizer.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import re
2+
3+
ADDING_SPACE_AROUND_PUNCTUATION_REGEX = re.compile(r'(?<! )(?=[.,!?()])|(?<=[.,!?()])(?! )')
4+
SPLIT_WORD_REGEX = re.compile(r'\b.*?\S.*?(?:\b|$)')
5+
6+
# re.compile(r"(\W+)")
7+
# re.compile(r"\w+|[^\w\s]")
8+
9+
10+
def add_space_around_punctuation(text):
11+
return ADDING_SPACE_AROUND_PUNCTUATION_REGEX.sub(r' ', text)
12+
13+
14+
def split_sentence(text):
15+
return SPLIT_WORD_REGEX.findall(text)
16+

test/augmenter/char/test_char.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import unittest
2+
3+
import nlpaug.augmenter.char as nac
4+
import nlpaug.util.text.tokenizer as text_tokenizer
5+
6+
7+
class TestCharacter(unittest.TestCase):
8+
def test_empty(self):
9+
texts = ['', None]
10+
11+
augs = [
12+
nac.OcrAug(),
13+
nac.QwertyAug(),
14+
]
15+
16+
for text in texts:
17+
for aug in augs:
18+
augmented_text = aug.augment(text)
19+
self.assertEqual(text, augmented_text)
20+
21+
def test_tokenizer(self):
22+
augs = [
23+
nac.OcrAug(tokenizer=text_tokenizer.split_sentence),
24+
nac.QwertyAug(tokenizer=text_tokenizer.split_sentence),
25+
nac.RandomCharAug(tokenizer=text_tokenizer.split_sentence),
26+
]
27+
28+
text = 'The quick brown fox, jumps over lazy dog.'
29+
expected_tokens = ['The', ' quick', ' brown', ' fox', ', ', 'jumps', ' over', ' lazy', ' dog', '.']
30+
for aug in augs:
31+
tokens = aug.tokenizer(text)
32+
self.assertEqual(tokens, expected_tokens)
33+
34+
text = 'The quick !brown fox, jumps # over lazy dog .'
35+
expected_tokens = ['The', ' quick', ' !', 'brown', ' fox', ', ', 'jumps', ' # ', 'over', ' lazy', ' dog', ' .']
36+
for aug in augs:
37+
tokens = aug.tokenizer(text)
38+
self.assertEqual(tokens, expected_tokens)

test/augmenter/char/test_ocr.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,10 +39,3 @@ def test_ocr_multi_words(self):
3939
self.assertTrue(is_augmented)
4040

4141
self.assertTrue(len(texts) > 0)
42-
43-
def test_ocr_empty(self):
44-
texts = ['', None]
45-
aug = OcrAug()
46-
for text in texts:
47-
augmented_text = aug.augment(text)
48-
self.assertEqual(text, augmented_text)

test/augmenter/char/test_qwerty.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,3 @@ def test_qwerty_multi_words(self):
2121
self.assertNotEqual(text, augmented_text)
2222

2323
self.assertTrue(len(texts) > 0)
24-
25-
def test_qwerty_empty(self):
26-
texts = ['', None]
27-
aug = QwertyAug()
28-
for text in texts:
29-
augmented_text = aug.augment(text)
30-
self.assertEqual(text, augmented_text)

test/augmenter/word/test_bert.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,6 @@ def setUpClass(cls):
1414
os.path.dirname(__file__), '..', '..', '..', '.env'))
1515
load_dotenv(env_config_path)
1616

17-
def test_empty_input_for_insert(self):
18-
text = ' '
19-
20-
aug = naw.BertAug(action=Action.INSERT)
21-
augmented_text = aug.augment(text)
22-
23-
self.assertEqual(augmented_text, '')
24-
2517
def test_oov(self):
2618
unknown_token = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
2719
texts = [

0 commit comments

Comments
 (0)