Skip to content

Commit f538b69

Browse files
significantly improve treesitter performance while editing large files (#4716)
* significantly improve treesitter performance while editing large files * Apply stylistic suggestions from code review Co-authored-by: Michael Davis <[email protected]> * use PartialEq and Hash instead of a freestanding function Co-authored-by: Michael Davis <[email protected]>
1 parent 9059c65 commit f538b69

File tree

3 files changed

+106
-40
lines changed

3 files changed

+106
-40
lines changed

Cargo.lock

Lines changed: 25 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ once_cell = "1.16"
3030
arc-swap = "1"
3131
regex = "1"
3232
bitflags = "1.3"
33+
ahash = "0.8.2"
34+
hashbrown = { version = "0.13.1", features = ["raw"] }
3335

3436
log = "0.4"
3537
serde = { version = "1.0", features = ["derive"] }

helix-core/src/syntax.rs

Lines changed: 79 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,19 @@ use crate::{
77
Rope, RopeSlice, Tendril,
88
};
99

10+
use ahash::RandomState;
1011
use arc_swap::{ArcSwap, Guard};
1112
use bitflags::bitflags;
13+
use hashbrown::raw::RawTable;
1214
use slotmap::{DefaultKey as LayerId, HopSlotMap};
1315

1416
use std::{
1517
borrow::Cow,
1618
cell::RefCell,
1719
collections::{HashMap, VecDeque},
1820
fmt,
19-
mem::replace,
21+
hash::{Hash, Hasher},
22+
mem::{replace, transmute},
2023
path::Path,
2124
str::FromStr,
2225
sync::Arc,
@@ -770,30 +773,38 @@ impl Syntax {
770773
// Convert the changeset into tree sitter edits.
771774
let edits = generate_edits(old_source, changeset);
772775

776+
// This table allows inverse indexing of `layers`.
777+
// That is by hashing a `Layer` you can find
778+
// the `LayerId` of an existing equivalent `Layer` in `layers`.
779+
//
780+
// It is used to determine if a new layer exists for an injection
781+
// or if an existing layer needs to be updated.
782+
let mut layers_table = RawTable::with_capacity(self.layers.len());
783+
let layers_hasher = RandomState::new();
773784
// Use the edits to update all layers markers
774-
if !edits.is_empty() {
775-
fn point_add(a: Point, b: Point) -> Point {
776-
if b.row > 0 {
777-
Point::new(a.row.saturating_add(b.row), b.column)
778-
} else {
779-
Point::new(0, a.column.saturating_add(b.column))
780-
}
785+
fn point_add(a: Point, b: Point) -> Point {
786+
if b.row > 0 {
787+
Point::new(a.row.saturating_add(b.row), b.column)
788+
} else {
789+
Point::new(0, a.column.saturating_add(b.column))
781790
}
782-
fn point_sub(a: Point, b: Point) -> Point {
783-
if a.row > b.row {
784-
Point::new(a.row.saturating_sub(b.row), a.column)
785-
} else {
786-
Point::new(0, a.column.saturating_sub(b.column))
787-
}
791+
}
792+
fn point_sub(a: Point, b: Point) -> Point {
793+
if a.row > b.row {
794+
Point::new(a.row.saturating_sub(b.row), a.column)
795+
} else {
796+
Point::new(0, a.column.saturating_sub(b.column))
788797
}
798+
}
789799

790-
for layer in self.layers.values_mut() {
791-
// The root layer always covers the whole range (0..usize::MAX)
792-
if layer.depth == 0 {
793-
layer.flags = LayerUpdateFlags::MODIFIED;
794-
continue;
795-
}
800+
for (layer_id, layer) in self.layers.iter_mut() {
801+
// The root layer always covers the whole range (0..usize::MAX)
802+
if layer.depth == 0 {
803+
layer.flags = LayerUpdateFlags::MODIFIED;
804+
continue;
805+
}
796806

807+
if !edits.is_empty() {
797808
for range in &mut layer.ranges {
798809
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
799810
for edit in edits.iter().rev() {
@@ -858,6 +869,12 @@ impl Syntax {
858869
}
859870
}
860871
}
872+
873+
let hash = layers_hasher.hash_one(layer);
874+
// Safety: insert_no_grow is unsafe because it assumes that the table
875+
// has enough capacity to hold additional elements.
876+
// This is always the case as we reserved enough capacity above.
877+
unsafe { layers_table.insert_no_grow(hash, layer_id) };
861878
}
862879

863880
PARSER.with(|ts_parser| {
@@ -982,27 +999,23 @@ impl Syntax {
982999
let depth = layer.depth + 1;
9831000
// TODO: can't inline this since matches borrows self.layers
9841001
for (config, ranges) in injections {
985-
// Find an existing layer
986-
let layer = self
987-
.layers
988-
.iter_mut()
989-
.find(|(_, layer)| {
990-
layer.depth == depth && // TODO: track parent id instead
991-
layer.config.language == config.language && layer.ranges == ranges
1002+
let new_layer = LanguageLayer {
1003+
tree: None,
1004+
config,
1005+
depth,
1006+
ranges,
1007+
flags: LayerUpdateFlags::empty(),
1008+
};
1009+
1010+
// Find an identical existing layer
1011+
let layer = layers_table
1012+
.get(layers_hasher.hash_one(&new_layer), |&it| {
1013+
self.layers[it] == new_layer
9921014
})
993-
.map(|(id, _layer)| id);
1015+
.copied();
9941016

9951017
// ...or insert a new one.
996-
let layer_id = layer.unwrap_or_else(|| {
997-
self.layers.insert(LanguageLayer {
998-
tree: None,
999-
config,
1000-
depth,
1001-
ranges,
1002-
// set the modified flag to ensure the layer is parsed
1003-
flags: LayerUpdateFlags::empty(),
1004-
})
1005-
});
1018+
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
10061019

10071020
queue.push_back(layer_id);
10081021
}
@@ -1139,6 +1152,34 @@ pub struct LanguageLayer {
11391152
flags: LayerUpdateFlags,
11401153
}
11411154

1155+
/// This PartialEq implementation only checks if that
1156+
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
1157+
/// It does not check whether the layers have the same internal treesitter
1158+
/// state.
1159+
impl PartialEq for LanguageLayer {
1160+
fn eq(&self, other: &Self) -> bool {
1161+
self.depth == other.depth
1162+
&& self.config.language == other.config.language
1163+
&& self.ranges == other.ranges
1164+
}
1165+
}
1166+
1167+
/// Hash implementation belongs to PartialEq implementation above.
1168+
/// See its documentation for details.
1169+
impl Hash for LanguageLayer {
1170+
fn hash<H: Hasher>(&self, state: &mut H) {
1171+
self.depth.hash(state);
1172+
// The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
1173+
// However it does use #[repr] transparent so the transmute here is safe
1174+
// as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
1175+
// This is also compatible with the PartialEq implementation of language
1176+
// as that is just a pointer comparison.
1177+
let language: *const () = unsafe { transmute(self.config.language) };
1178+
language.hash(state);
1179+
self.ranges.hash(state);
1180+
}
1181+
}
1182+
11421183
impl LanguageLayer {
11431184
pub fn tree(&self) -> &Tree {
11441185
// TODO: no unwrap

0 commit comments

Comments
 (0)