Skip to content

Tree-Sitter Powered Spell Checking #3830

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ members = [
"helix-tui",
"helix-lsp",
"helix-dap",
"helix-spell",
"helix-loader",
"xtask",
]
Expand Down
1 change: 1 addition & 0 deletions helix-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ pub mod test;
pub mod textobject;
mod transaction;
pub mod wrap;
pub mod spellcheck;

pub mod unicode {
pub use unicode_general_category as category;
Expand Down
23 changes: 23 additions & 0 deletions helix-core/src/spellcheck.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use crate::selection::Range;
use crate::syntax::LanguageConfiguration;
use crate::RopeSlice;
use tree_sitter::{Node, QueryCursor};

pub fn spellcheck_treesitter(
doc_tree: Node,
doc_slice: RopeSlice,
lang_config: &LanguageConfiguration,
) -> Option<Vec<Range>> {
let mut cursor = QueryCursor::new();
let mut ranges: Vec<Range> = lang_config
.spellcheck_query()?
.capture_nodes("spell", doc_tree, doc_slice, &mut cursor)?
.map(|node| {
let start_char = doc_slice.byte_to_char(node.start_byte());
let end_char = doc_slice.byte_to_char(node.end_byte());
Range::new(start_char, end_char)
})
.collect();
ranges.dedup();
Some(ranges)
}
11 changes: 11 additions & 0 deletions helix-core/src/syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ pub struct LanguageConfiguration {
pub(crate) indent_query: OnceCell<Option<Query>>,
#[serde(skip)]
pub(crate) textobject_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip)]
pub(crate) spellcheck_query: OnceCell<Option<TextObjectQuery>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub debugger: Option<DebugAdapterConfig>,

Expand Down Expand Up @@ -413,6 +415,15 @@ impl LanguageConfiguration {
.as_ref()
}

pub fn spellcheck_query(&self) -> Option<&TextObjectQuery> {
self.spellcheck_query
.get_or_init(|| {
self.load_query("spellchecks.scm")
.map(|query| TextObjectQuery { query })
})
.as_ref()
}

pub fn scope(&self) -> &str {
&self.scope
}
Expand Down
11 changes: 11 additions & 0 deletions helix-spell/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "helix-spell"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
helix-core = { version = "0.6", path = "../helix-core" }

ispell = "0.3"
27 changes: 27 additions & 0 deletions helix-spell/src/client.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
extern crate ispell;
use ispell::{IspellError, SpellChecker, SpellLauncher};

pub struct Client {
pub lang: &'static str,
checker: SpellChecker,
}

impl Client {
pub fn new() -> Self {
// TODO: accept lang, mode as an argument, configurable by the user
let lang = "en_US";
let checker = SpellLauncher::new()
.hunspell()
// .aspell()
.dictionary(lang)
.launch()
// TODO: instead of unwrap (which panics), figure out proper error handling
.unwrap();
Self { checker, lang }
}
pub fn check(&mut self, string: &str) -> Vec<IspellError> {
self.checker.check(string).unwrap_or(Vec::new())
}
}

// TODO: expose the ability to add words to a user's dictionary, which the ispell crate supports
32 changes: 32 additions & 0 deletions helix-spell/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
pub mod client;

pub use client::Client;

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn new_client() {
let client = Client::new();
assert_eq!(client.lang, "en_US");
}

#[test]
fn misspelled_word() {
let mut client = Client::new();
let word = "This sentence contains a misssspelled word";
let errors = client.check(word);
let error = errors.first().unwrap();
assert_eq!(error.misspelled, "misssspelled");
assert_eq!(error.position, 25);
}

#[test]
fn no_misspelled_word() {
let mut client = Client::new();
let word = "This sentence does not contain a misspelled word";
let errors = client.check(word);
assert_eq!(errors.len(), 0);
}
}
3 changes: 3 additions & 0 deletions helix-term/src/application.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,9 @@ impl Application {
// idle timeout
self.editor.clear_idle_timer();
self.handle_idle_timeout();
// HACK: force rendering until I can figure out how
// async jobs work
// self.render();

#[cfg(feature = "integration")]
{
Expand Down
33 changes: 33 additions & 0 deletions helix-term/src/commands/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,32 @@ fn format(

Ok(())
}

fn spell_check(
cx: &mut compositor::Context,
_args: &[Cow<str>],
event: PromptEvent,
) -> anyhow::Result<()> {
if event != PromptEvent::Validate {
return Ok(());
}
let doc = doc!(cx.editor);
// TODO: could probably just be a job?
let callback = make_spell_check_callback(doc.id());
cx.jobs.callback(callback);
Ok(())
}

async fn make_spell_check_callback(doc_id: DocumentId) -> anyhow::Result<job::Callback> {
let call: job::Callback = Box::new(move |editor, _compositor| {
if let Some(doc) = editor.document_mut(doc_id) {
let mut diagnostics = doc.spell_check();
doc.add_diagnostics(diagnostics.as_mut());
};
});
Ok(call)
}

fn set_indent_style(
cx: &mut compositor::Context,
args: &[Cow<str>],
Expand Down Expand Up @@ -1665,6 +1691,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[
fun: format,
completer: None,
},
TypableCommand {
name: "spell-check",
aliases: &["sc"],
doc: "Check spelling with aspell.",
fun: spell_check,
completer: None,
},
TypableCommand {
name: "indent-style",
aliases: &[],
Expand Down
1 change: 1 addition & 0 deletions helix-view/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ anyhow = "1"
helix-core = { version = "0.6", path = "../helix-core" }
helix-lsp = { version = "0.6", path = "../helix-lsp" }
helix-dap = { version = "0.6", path = "../helix-dap" }
helix-spell = { version = "0.1", path = "../helix-spell" }
crossterm = { version = "0.25", optional = true }

# Conversion traits
Expand Down
53 changes: 49 additions & 4 deletions helix-view/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ use std::str::FromStr;
use std::sync::Arc;

use helix_core::{
encoding,
diagnostic, encoding,
history::{History, UndoKind},
indent::{auto_detect_indent_style, IndentStyle},
line_ending::auto_detect_line_ending,
spellcheck,
syntax::{self, LanguageConfiguration},
ChangeSet, Diagnostic, LineEnding, Rope, RopeBuilder, Selection, State, Syntax, Transaction,
DEFAULT_LINE_ENDING,
Expand Down Expand Up @@ -118,6 +119,7 @@ pub struct Document {
version: i32, // should be usize?
pub(crate) modified_since_accessed: bool,

// TODO: add a seperate spell diagnostics list decoupled from the language server?
diagnostics: Vec<Diagnostic>,
language_server: Option<Arc<helix_lsp::Client>>,
}
Expand Down Expand Up @@ -392,8 +394,46 @@ impl Document {
Ok(doc)
}

/// The same as [`format`], but only returns formatting changes if auto-formatting
/// is configured.
pub fn spell_check(&mut self) -> Vec<Diagnostic> {
let mut diagnostics = Vec::new();
if let Some(node) = self.syntax() {
let mut spell_checker = helix_spell::Client::new();
let doc_slice = self.text.slice(..);
if let Some(ranges) = spellcheck::spellcheck_treesitter(
node.tree().root_node(),
doc_slice,
self.language_config().unwrap(),
) {
for range in ranges {
let slice = range.slice(doc_slice);
let (start_line, _end_line) = range.line_range(doc_slice);
let mut position = range.from();
for (i, line) in slice.lines().enumerate() {
let errors = spell_checker.check(&line.to_string());
for error in errors {
let word_count = error.misspelled.chars().count();
let diagnostic = Diagnostic {
line: start_line + i,
range: diagnostic::Range {
start: position + error.position,
end: position + error.position + word_count,
},
message: error.suggestions.join("\n"),
severity: Some(diagnostic::Severity::Warning),
code: None,
};
diagnostics.push(diagnostic);
// TODO: don't set doc diagnostics here, simply return the Vec<Diagnostic> instead
// and have the caller decide how to handle it
}
position += line.len_chars();
}
}
};
};
diagnostics
}

pub fn auto_format(&self) -> Option<BoxFuture<'static, Result<Transaction, FormatterError>>> {
if self.language_config()?.auto_format {
self.format()
Expand Down Expand Up @@ -619,7 +659,6 @@ impl Document {
self.apply(&transaction, view_id);
self.append_changes_to_history(view_id);
self.reset_modified();

self.detect_indent_and_line_ending();

Ok(())
Expand Down Expand Up @@ -1076,6 +1115,12 @@ impl Document {
.sort_unstable_by_key(|diagnostic| diagnostic.range);
}

pub fn add_diagnostics(&mut self, diagnostics: &mut Vec<Diagnostic>) {
self.diagnostics.append(diagnostics);
self.diagnostics
.sort_unstable_by_key(|diagnostic| diagnostic.range);
}

/// Get the document's auto pairs. If the document has a recognized
/// language config with auto pairs configured, returns that;
/// otherwise, falls back to the global auto pairs config. If the global
Expand Down
7 changes: 5 additions & 2 deletions helix-view/src/editor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ use helix_core::{
};
use helix_dap as dap;
use helix_lsp::lsp;
use helix_spell as spell;

use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};

Expand Down Expand Up @@ -632,6 +633,7 @@ pub struct Editor {
pub debugger: Option<dap::Client>,
pub debugger_events: SelectAll<UnboundedReceiverStream<dap::Payload>>,
pub breakpoints: HashMap<PathBuf, Vec<Breakpoint>>,
pub spell_checker: spell::Client,

pub clipboard_provider: Box<dyn ClipboardProvider>,

Expand Down Expand Up @@ -714,6 +716,7 @@ impl Editor {
debugger: None,
debugger_events: SelectAll::new(),
breakpoints: HashMap::new(),
spell_checker: spell::Client::new(),
syn_loader,
theme_loader,
last_theme: None,
Expand Down Expand Up @@ -881,8 +884,8 @@ impl Editor {
fn _refresh(&mut self) {
let config = self.config();
for (view, _) in self.tree.views_mut() {
let doc = &self.documents[&view.doc];
view.ensure_cursor_in_view(doc, config.scrolloff)
let doc = doc_mut!(self, &view.doc);
view.ensure_cursor_in_view(doc, config.scrolloff);
}
}

Expand Down
25 changes: 25 additions & 0 deletions runtime/queries/elixir/spellchecks.scm
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; Elixir Code Comments
(comment) @spell

; Elixir Documentation
(unary_operator
operator: "@"
operand: (call
target: ((identifier) @_identifier (#match? @_identifier "^(module|type|short)?doc$"))
(arguments [
(string (quoted_content) @spell)
(sigil (quoted_content) @spell)
])))

; Phoenix Live View Component Macros
(call
(identifier) @_identifier
(arguments
(atom)+
(keywords (pair
((keyword) @_keyword (#eq? @_keyword "doc: "))
[
(string (quoted_content) @spell)
(sigil (quoted_content) @spell)
]))
(#match? @_identifier "^(attr|slot)$")))
4 changes: 4 additions & 0 deletions runtime/queries/rust/spellchecks.scm
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[
(line_comment)
(block_comment)
] @spell