Skip to content

Commit 69e1c48

Browse files
feat: iterator API for pattern matches (#1155)
1 parent a7fa71a commit 69e1c48

File tree

4 files changed

+72
-91
lines changed

4 files changed

+72
-91
lines changed

harper-core/src/linting/no_oxford_comma.rs

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::{
22
Document, Token, TokenStringExt,
3-
patterns::{NominalPhrase, Pattern, SequencePattern, WordSet},
3+
patterns::{NominalPhrase, PatternExt, SequencePattern, WordSet},
44
};
55

66
use super::{Lint, LintKind, Linter, Suggestion};
@@ -52,28 +52,12 @@ impl Linter for NoOxfordComma {
5252
let mut lints = Vec::new();
5353

5454
for sentence in document.iter_sentences() {
55-
let mut tok_cursor = 0;
56-
57-
loop {
58-
if tok_cursor >= sentence.len() {
59-
break;
60-
}
61-
62-
let match_len = self
63-
.pattern
64-
.matches(&sentence[tok_cursor..], document.get_source());
65-
66-
if let Some(match_len) = match_len {
67-
let lint = self.match_to_lint(
68-
&sentence[tok_cursor..tok_cursor + match_len.get()],
69-
document.get_source(),
70-
);
71-
72-
lints.extend(lint);
73-
tok_cursor += match_len.get();
74-
} else {
75-
tok_cursor += 1;
76-
}
55+
for match_span in self.pattern.iter_matches(sentence, document.get_source()) {
56+
let lint = self.match_to_lint(
57+
&sentence[match_span.start..match_span.end],
58+
document.get_source(),
59+
);
60+
lints.extend(lint);
7761
}
7862
}
7963

harper-core/src/linting/oxford_comma.rs

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{
22
Lrc, Token, TokenStringExt,
33
linting::Linter,
4-
patterns::{OwnedPatternExt, Pattern, SequencePattern, WordSet},
4+
patterns::{OwnedPatternExt, Pattern, PatternExt, SequencePattern, WordSet},
55
};
66

77
use super::{super::Lint, LintKind, Suggestion};
@@ -58,7 +58,7 @@ impl Linter for OxfordComma {
5858
fn lint(&mut self, document: &crate::Document) -> Vec<crate::linting::Lint> {
5959
let mut lints = Vec::new();
6060
for sentence in document.iter_sentences() {
61-
let mut tok_cursor = 0;
61+
let mut skip = 0;
6262

6363
let mut words = sentence
6464
.iter_words()
@@ -67,33 +67,21 @@ impl Linter for OxfordComma {
6767

6868
if let (Some(first), Some(second)) = (words.next(), words.next()) {
6969
if first.preposition && second.is_likely_homograph() {
70-
tok_cursor = sentence
70+
skip = sentence
7171
.iter()
7272
.position(|t| t.kind.is_comma())
7373
.unwrap_or(sentence.iter().len())
7474
}
7575
}
7676

77-
loop {
78-
if tok_cursor >= sentence.len() {
79-
break;
80-
}
81-
82-
let match_len = self
83-
.pattern
84-
.matches(&sentence[tok_cursor..], document.get_source());
77+
let sentence = &sentence[skip..];
8578

86-
if let Some(match_len) = match_len {
87-
let lint = self.match_to_lint(
88-
&sentence[tok_cursor..tok_cursor + match_len.get()],
89-
document.get_source(),
90-
);
91-
92-
lints.extend(lint);
93-
tok_cursor += match_len.get();
94-
} else {
95-
tok_cursor += 1;
96-
}
79+
for match_span in self.pattern.iter_matches(sentence, document.get_source()) {
80+
let lint = self.match_to_lint(
81+
&sentence[match_span.start..match_span.end],
82+
document.get_source(),
83+
);
84+
lints.extend(lint);
9785
}
9886
}
9987

harper-core/src/linting/pattern_linter.rs

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
use blanket::blanket;
22

3-
use crate::{Document, LSend, Token, TokenStringExt, patterns::Pattern};
3+
use crate::{
4+
Document, LSend, Token, TokenStringExt,
5+
patterns::{Pattern, PatternExt},
6+
};
47

58
use super::{Lint, Linter};
69

@@ -44,24 +47,10 @@ where
4447

4548
pub fn run_on_chunk(linter: &impl PatternLinter, chunk: &[Token], source: &[char]) -> Vec<Lint> {
4649
let mut lints = Vec::new();
47-
let mut tok_cursor = 0;
4850

49-
loop {
50-
if tok_cursor >= chunk.len() {
51-
break;
52-
}
53-
54-
let match_len = linter.pattern().matches(&chunk[tok_cursor..], source);
55-
56-
if let Some(match_len) = match_len {
57-
let lint =
58-
linter.match_to_lint(&chunk[tok_cursor..tok_cursor + match_len.get()], source);
59-
60-
lints.extend(lint);
61-
tok_cursor += match_len.get();
62-
} else {
63-
tok_cursor += 1;
64-
}
51+
for match_span in linter.pattern().iter_matches(chunk, source) {
52+
let lint = linter.match_to_lint(&chunk[match_span.start..match_span.end], source);
53+
lints.extend(lint);
6554
}
6655

6756
lints

harper-core/src/patterns/mod.rs

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
//!
66
//! See the page about [`SequencePattern`] for a concrete example of their use.
77
8-
use std::{collections::VecDeque, num::NonZeroUsize};
8+
use std::num::NonZeroUsize;
99

10-
use crate::{Document, Span, Token, VecExt};
10+
use crate::{Document, Span, Token};
1111

1212
mod all;
1313
mod any_pattern;
@@ -69,43 +69,63 @@ pub trait Pattern: Send + Sync {
6969
}
7070

7171
pub trait PatternExt {
72+
fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
73+
7274
/// Search through all tokens to locate all non-overlapping pattern matches.
73-
fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
75+
fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
76+
self.iter_matches(tokens, source).collect()
77+
}
7478
}
7579

7680
impl<P> PatternExt for P
7781
where
78-
P: Pattern,
82+
P: Pattern + ?Sized,
7983
{
80-
fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
81-
let mut found = Vec::new();
82-
83-
for i in 0..tokens.len() {
84-
let len = self.matches(&tokens[i..], source);
85-
86-
if let Some(len) = len {
87-
found.push(Span::new_with_len(i, len.get()));
88-
}
89-
}
84+
fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
85+
MatchIter::new(self, tokens, source)
86+
}
87+
}
9088

91-
if found.len() < 2 {
92-
return found;
89+
struct MatchIter<'a, 'b, 'c, P: ?Sized> {
90+
pattern: &'a P,
91+
tokens: &'b [Token],
92+
source: &'c [char],
93+
index: usize,
94+
}
95+
impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
96+
where
97+
P: Pattern + ?Sized,
98+
{
99+
fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
100+
Self {
101+
pattern,
102+
tokens,
103+
source,
104+
index: 0,
93105
}
94-
95-
let mut remove_indices = VecDeque::new();
96-
97-
for i in 0..found.len() - 1 {
98-
let cur = &found[i];
99-
let next = &found[i + 1];
100-
101-
if cur.overlaps_with(*next) {
102-
remove_indices.push_back(i + 1);
106+
}
107+
}
108+
impl<P> Iterator for MatchIter<'_, '_, '_, P>
109+
where
110+
P: Pattern + ?Sized,
111+
{
112+
type Item = Span;
113+
114+
fn next(&mut self) -> Option<Self::Item> {
115+
while self.index < self.tokens.len() {
116+
if let Some(len) = self
117+
.pattern
118+
.matches(&self.tokens[self.index..], self.source)
119+
{
120+
let span = Span::new_with_len(self.index, len.get());
121+
self.index += len.get();
122+
return Some(span);
123+
} else {
124+
self.index += 1;
103125
}
104126
}
105127

106-
found.remove_indices(remove_indices);
107-
108-
found
128+
None
109129
}
110130
}
111131

0 commit comments

Comments
 (0)