Skip to content

Commit c2a06e1

Browse files
the-mikedavisFrederik Vestre
authored andcommitted
Use TreeCursor to pretty-print :tree-sitter-subtree (helix-editor#4606)
The current `:tree-sitter-subtree` has a bug for field-names when the field name belongs to an unnamed child node. Take this ruby example: def self.method_name true end The subtree given by tree-sitter-cli is: (singleton_method [2, 0] - [4, 3] object: (self [2, 4] - [2, 8]) name: (identifier [2, 9] - [2, 20]) body: (body_statement [3, 2] - [3, 6] (true [3, 2] - [3, 6]))) But the `:tree-sitter-subtree` output was (singleton_method object: (self) body: (identifier) (body_statement (true))) The `singleton_method` rule defines the `name` and `body` fields in an unnamed helper rule `_method_rest` and the old implementation of `pretty_print_tree_impl` would pass the `field_name` down from the named `singleton_method` node. To fix it we switch to the [TreeCursor] API which is recommended by the tree-sitter docs for traversing the tree. `TreeCursor::field_name` accurately determines the field name for the current cursor position even when the node is unnamed. [TreeCursor]: https://docs.rs/tree-sitter/0.20.9/tree_sitter/struct.TreeCursor.html
1 parent dab649e commit c2a06e1

File tree

1 file changed

+71
-30
lines changed

1 file changed

+71
-30
lines changed

helix-core/src/syntax.rs

Lines changed: 71 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ use std::sync::atomic::{AtomicUsize, Ordering};
12831283
use std::{iter, mem, ops, str, usize};
12841284
use tree_sitter::{
12851285
Language as Grammar, Node, Parser, Point, Query, QueryCaptures, QueryCursor, QueryError,
1286-
QueryMatch, Range, TextProvider, Tree,
1286+
QueryMatch, Range, TextProvider, Tree, TreeCursor,
12871287
};
12881288

12891289
const CANCELLATION_CHECK_INTERVAL: usize = 100;
@@ -2153,57 +2153,68 @@ impl<I: Iterator<Item = HighlightEvent>> Iterator for Merge<I> {
21532153
}
21542154
}
21552155

2156+
fn node_is_visible(node: &Node) -> bool {
2157+
node.is_missing() || (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
2158+
}
2159+
21562160
pub fn pretty_print_tree<W: fmt::Write>(fmt: &mut W, node: Node) -> fmt::Result {
2157-
pretty_print_tree_impl(fmt, node, true, None, 0)
2161+
if node.child_count() == 0 {
2162+
if node_is_visible(&node) {
2163+
write!(fmt, "({})", node.kind())
2164+
} else {
2165+
write!(fmt, "\"{}\"", node.kind())
2166+
}
2167+
} else {
2168+
pretty_print_tree_impl(fmt, &mut node.walk(), 0)
2169+
}
21582170
}
21592171

21602172
fn pretty_print_tree_impl<W: fmt::Write>(
21612173
fmt: &mut W,
2162-
node: Node,
2163-
is_root: bool,
2164-
field_name: Option<&str>,
2174+
cursor: &mut TreeCursor,
21652175
depth: usize,
21662176
) -> fmt::Result {
2167-
fn is_visible(node: Node) -> bool {
2168-
node.is_missing()
2169-
|| (node.is_named() && node.language().node_kind_is_visible(node.kind_id()))
2170-
}
2177+
let node = cursor.node();
2178+
let visible = node_is_visible(&node);
21712179

2172-
if is_visible(node) {
2180+
if visible {
21732181
let indentation_columns = depth * 2;
21742182
write!(fmt, "{:indentation_columns$}", "")?;
21752183

2176-
if let Some(field_name) = field_name {
2184+
if let Some(field_name) = cursor.field_name() {
21772185
write!(fmt, "{}: ", field_name)?;
21782186
}
21792187

21802188
write!(fmt, "({}", node.kind())?;
2181-
} else if is_root {
2182-
write!(fmt, "(\"{}\")", node.kind())?;
21832189
}
21842190

2185-
for child_idx in 0..node.child_count() {
2186-
if let Some(child) = node.child(child_idx) {
2187-
if is_visible(child) {
2191+
// Handle children.
2192+
if cursor.goto_first_child() {
2193+
loop {
2194+
if node_is_visible(&cursor.node()) {
21882195
fmt.write_char('\n')?;
21892196
}
21902197

2191-
pretty_print_tree_impl(
2192-
fmt,
2193-
child,
2194-
false,
2195-
node.field_name_for_child(child_idx as u32),
2196-
depth + 1,
2197-
)?;
2198+
pretty_print_tree_impl(fmt, cursor, depth + 1)?;
2199+
2200+
if !cursor.goto_next_sibling() {
2201+
break;
2202+
}
21982203
}
2204+
2205+
let moved = cursor.goto_parent();
2206+
// The parent of the first child must exist, and must be `node`.
2207+
debug_assert!(moved);
2208+
debug_assert!(cursor.node() == node);
21992209
}
22002210

2201-
if is_visible(node) {
2202-
write!(fmt, ")")?;
2211+
if visible {
2212+
fmt.write_char(')')?;
22032213
}
22042214

22052215
Ok(())
22062216
}
2217+
22072218
#[cfg(test)]
22082219
mod test {
22092220
use super::*;
@@ -2376,11 +2387,17 @@ mod test {
23762387
}
23772388

23782389
#[track_caller]
2379-
fn assert_pretty_print(source: &str, expected: &str, start: usize, end: usize) {
2390+
fn assert_pretty_print(
2391+
language_name: &str,
2392+
source: &str,
2393+
expected: &str,
2394+
start: usize,
2395+
end: usize,
2396+
) {
23802397
let source = Rope::from_str(source);
23812398

23822399
let loader = Loader::new(Configuration { language: vec![] });
2383-
let language = get_language("rust").unwrap();
2400+
let language = get_language(language_name).unwrap();
23842401

23852402
let config = HighlightConfiguration::new(language, "", "", "").unwrap();
23862403
let syntax = Syntax::new(&source, Arc::new(config), Arc::new(loader));
@@ -2400,13 +2417,14 @@ mod test {
24002417
#[test]
24012418
fn test_pretty_print() {
24022419
let source = r#"/// Hello"#;
2403-
assert_pretty_print(source, "(line_comment)", 0, source.len());
2420+
assert_pretty_print("rust", source, "(line_comment)", 0, source.len());
24042421

24052422
// A large tree should be indented with fields:
24062423
let source = r#"fn main() {
24072424
println!("Hello, World!");
24082425
}"#;
24092426
assert_pretty_print(
2427+
"rust",
24102428
source,
24112429
concat!(
24122430
"(function_item\n",
@@ -2425,11 +2443,34 @@ mod test {
24252443

24262444
// Selecting a token should print just that token:
24272445
let source = r#"fn main() {}"#;
2428-
assert_pretty_print(source, r#"("fn")"#, 0, 1);
2446+
assert_pretty_print("rust", source, r#""fn""#, 0, 1);
24292447

24302448
// Error nodes are printed as errors:
24312449
let source = r#"}{"#;
2432-
assert_pretty_print(source, "(ERROR)", 0, source.len());
2450+
assert_pretty_print("rust", source, "(ERROR)", 0, source.len());
2451+
2452+
// Fields broken under unnamed nodes are determined correctly.
2453+
// In the following source, `object` belongs to the `singleton_method`
2454+
// rule but `name` and `body` belong to an unnamed helper `_method_rest`.
2455+
// This can cause a bug with a pretty-printing implementation that
2456+
// uses `Node::field_name_for_child` to determine field names but is
2457+
// fixed when using `TreeCursor::field_name`.
2458+
let source = "def self.method_name
2459+
true
2460+
end";
2461+
assert_pretty_print(
2462+
"ruby",
2463+
source,
2464+
concat!(
2465+
"(singleton_method\n",
2466+
" object: (self)\n",
2467+
" name: (identifier)\n",
2468+
" body: (body_statement\n",
2469+
" (true)))"
2470+
),
2471+
0,
2472+
source.len(),
2473+
);
24332474
}
24342475

24352476
#[test]

0 commit comments

Comments
 (0)