forked from textlint-ja/textlint-rule-no-doubled-conjunction
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathno-doubled-conjunction.js
68 lines (66 loc) · 2.76 KB
/
no-doubled-conjunction.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
// LICENSE : MIT
"use strict";
import {RuleHelper} from "textlint-rule-helper";
import {getTokenizer} from "kuromojin";
import {split as splitSentences, Syntax as SentenceSyntax} from "sentence-splitter";
import StringSource from "textlint-util-to-string";
/*
1. Paragraph Node -> text
2. text -> sentences
3. tokenize sentence
4. report error if found word that match the rule.
TODO: need abstraction
*/
export default function (context, options = {}) {
const helper = new RuleHelper(context);
const {Syntax, report, getSource, RuleError} = context;
return {
[Syntax.Paragraph](node){
if (helper.isChildNode(node, [Syntax.Link, Syntax.Image, Syntax.BlockQuote, Syntax.Emphasis])) {
return;
}
const source = new StringSource(node);
const text = source.toString();
const isSentenceNode = (node) => node.type === SentenceSyntax.Sentence;
let sentences = splitSentences(text, {
charRegExp: /[..。\?\!?!]/
}).filter(isSentenceNode);
// if not have a sentence, early return
// It is for avoiding error of emptyArray.reduce().
if(sentences.length === 0) {
return;
}
return getTokenizer().then(tokenizer => {
const selectConjenction = (sentence) => {
let tokens = tokenizer.tokenizeForSentence(sentence.raw);
let conjunctionTokens = tokens.filter((token) => token.pos === "接続詞");
return [sentence, conjunctionTokens];
};
let prev_token = null;
sentences.map(selectConjenction).reduce((prev, current) => {
let token = prev_token;
let [sentence, current_tokens] = current;
let [prev_sentence, prev_tokens] = prev;
if (prev_tokens && prev_tokens.length > 0) {
token = prev_tokens[0];
}
if (current_tokens.length > 0) {
if (token && current_tokens[0].surface_form === token.surface_form) {
let originalIndex = source.originalIndexFromPosition({
line: sentence.loc.start.line,
column: sentence.loc.start.column + (current_tokens[0].word_position - 1)
});
// padding position
var padding = {
index: originalIndex
};
report(node, new RuleError(`同じ接続詞が連続して使われています。`, padding));
}
}
prev_token = token;
return current;
});
});
}
}
};