feat: iterator API for pattern matches (#1155)

RunDevelopment · web-flow · commit 69e1c489d22f · 2025-04-29T17:57:58.000Z
diff --git a/harper-core/src/linting/no_oxford_comma.rs b/harper-core/src/linting/no_oxford_comma.rs
@@ -1,6 +1,6 @@
 use crate::{
     Document, Token, TokenStringExt,
-    patterns::{NominalPhrase, Pattern, SequencePattern, WordSet},
+    patterns::{NominalPhrase, PatternExt, SequencePattern, WordSet},
 };
 
 use super::{Lint, LintKind, Linter, Suggestion};
@@ -52,28 +52,12 @@ impl Linter for NoOxfordComma {
         let mut lints = Vec::new();
 
         for sentence in document.iter_sentences() {
-            let mut tok_cursor = 0;
-
-            loop {
-                if tok_cursor >= sentence.len() {
-                    break;
-                }
-
-                let match_len = self
-                    .pattern
-                    .matches(&sentence[tok_cursor..], document.get_source());
-
-                if let Some(match_len) = match_len {
-                    let lint = self.match_to_lint(
-                        &sentence[tok_cursor..tok_cursor + match_len.get()],
-                        document.get_source(),
-                    );
-
-                    lints.extend(lint);
-                    tok_cursor += match_len.get();
-                } else {
-                    tok_cursor += 1;
-                }
+            for match_span in self.pattern.iter_matches(sentence, document.get_source()) {
+                let lint = self.match_to_lint(
+                    &sentence[match_span.start..match_span.end],
+                    document.get_source(),
+                );
+                lints.extend(lint);
             }
         }
 
diff --git a/harper-core/src/linting/oxford_comma.rs b/harper-core/src/linting/oxford_comma.rs
@@ -1,7 +1,7 @@
 use crate::{
     Lrc, Token, TokenStringExt,
     linting::Linter,
-    patterns::{OwnedPatternExt, Pattern, SequencePattern, WordSet},
+    patterns::{OwnedPatternExt, Pattern, PatternExt, SequencePattern, WordSet},
 };
 
 use super::{super::Lint, LintKind, Suggestion};
@@ -58,7 +58,7 @@ impl Linter for OxfordComma {
     fn lint(&mut self, document: &crate::Document) -> Vec<crate::linting::Lint> {
         let mut lints = Vec::new();
         for sentence in document.iter_sentences() {
-            let mut tok_cursor = 0;
+            let mut skip = 0;
 
             let mut words = sentence
                 .iter_words()
@@ -67,33 +67,21 @@ impl Linter for OxfordComma {
 
             if let (Some(first), Some(second)) = (words.next(), words.next()) {
                 if first.preposition && second.is_likely_homograph() {
-                    tok_cursor = sentence
+                    skip = sentence
                         .iter()
                         .position(|t| t.kind.is_comma())
                         .unwrap_or(sentence.iter().len())
                 }
             }
 
-            loop {
-                if tok_cursor >= sentence.len() {
-                    break;
-                }
-
-                let match_len = self
-                    .pattern
-                    .matches(&sentence[tok_cursor..], document.get_source());
+            let sentence = &sentence[skip..];
 
-                if let Some(match_len) = match_len {
-                    let lint = self.match_to_lint(
-                        &sentence[tok_cursor..tok_cursor + match_len.get()],
-                        document.get_source(),
-                    );
-
-                    lints.extend(lint);
-                    tok_cursor += match_len.get();
-                } else {
-                    tok_cursor += 1;
-                }
+            for match_span in self.pattern.iter_matches(sentence, document.get_source()) {
+                let lint = self.match_to_lint(
+                    &sentence[match_span.start..match_span.end],
+                    document.get_source(),
+                );
+                lints.extend(lint);
             }
         }
 
diff --git a/harper-core/src/linting/pattern_linter.rs b/harper-core/src/linting/pattern_linter.rs
@@ -1,6 +1,9 @@
 use blanket::blanket;
 
-use crate::{Document, LSend, Token, TokenStringExt, patterns::Pattern};
+use crate::{
+    Document, LSend, Token, TokenStringExt,
+    patterns::{Pattern, PatternExt},
+};
 
 use super::{Lint, Linter};
 
@@ -44,24 +47,10 @@ where
 
 pub fn run_on_chunk(linter: &impl PatternLinter, chunk: &[Token], source: &[char]) -> Vec<Lint> {
     let mut lints = Vec::new();
-    let mut tok_cursor = 0;
 
-    loop {
-        if tok_cursor >= chunk.len() {
-            break;
-        }
-
-        let match_len = linter.pattern().matches(&chunk[tok_cursor..], source);
-
-        if let Some(match_len) = match_len {
-            let lint =
-                linter.match_to_lint(&chunk[tok_cursor..tok_cursor + match_len.get()], source);
-
-            lints.extend(lint);
-            tok_cursor += match_len.get();
-        } else {
-            tok_cursor += 1;
-        }
+    for match_span in linter.pattern().iter_matches(chunk, source) {
+        let lint = linter.match_to_lint(&chunk[match_span.start..match_span.end], source);
+        lints.extend(lint);
     }
 
     lints
diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs
@@ -5,9 +5,9 @@
 //!
 //! See the page about [`SequencePattern`] for a concrete example of their use.
 
-use std::{collections::VecDeque, num::NonZeroUsize};
+use std::num::NonZeroUsize;
 
-use crate::{Document, Span, Token, VecExt};
+use crate::{Document, Span, Token};
 
 mod all;
 mod any_pattern;
@@ -69,43 +69,63 @@ pub trait Pattern: Send + Sync {
 }
 
 pub trait PatternExt {
+    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span>;
+
     /// Search through all tokens to locate all non-overlapping pattern matches.
-    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span>;
+    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
+        self.iter_matches(tokens, source).collect()
+    }
 }
 
 impl<P> PatternExt for P
 where
-    P: Pattern,
+    P: Pattern + ?Sized,
 {
-    fn find_all_matches(&self, tokens: &[Token], source: &[char]) -> Vec<Span> {
-        let mut found = Vec::new();
-
-        for i in 0..tokens.len() {
-            let len = self.matches(&tokens[i..], source);
-
-            if let Some(len) = len {
-                found.push(Span::new_with_len(i, len.get()));
-            }
-        }
+    fn iter_matches(&self, tokens: &[Token], source: &[char]) -> impl Iterator<Item = Span> {
+        MatchIter::new(self, tokens, source)
+    }
+}
 
-        if found.len() < 2 {
-            return found;
+struct MatchIter<'a, 'b, 'c, P: ?Sized> {
+    pattern: &'a P,
+    tokens: &'b [Token],
+    source: &'c [char],
+    index: usize,
+}
+impl<'a, 'b, 'c, P> MatchIter<'a, 'b, 'c, P>
+where
+    P: Pattern + ?Sized,
+{
+    fn new(pattern: &'a P, tokens: &'b [Token], source: &'c [char]) -> Self {
+        Self {
+            pattern,
+            tokens,
+            source,
+            index: 0,
         }
-
-        let mut remove_indices = VecDeque::new();
-
-        for i in 0..found.len() - 1 {
-            let cur = &found[i];
-            let next = &found[i + 1];
-
-            if cur.overlaps_with(*next) {
-                remove_indices.push_back(i + 1);
+    }
+}
+impl<P> Iterator for MatchIter<'_, '_, '_, P>
+where
+    P: Pattern + ?Sized,
+{
+    type Item = Span;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while self.index < self.tokens.len() {
+            if let Some(len) = self
+                .pattern
+                .matches(&self.tokens[self.index..], self.source)
+            {
+                let span = Span::new_with_len(self.index, len.get());
+                self.index += len.get();
+                return Some(span);
+            } else {
+                self.index += 1;
             }
         }
 
-        found.remove_indices(remove_indices);
-
-        found
+        None
     }
 }