Skip to content

Commit 2437828

Browse files
committed
use PartialEq and Hash instead of a freestanding function
1 parent a9625da commit 2437828

File tree

1 file changed

+74
-95
lines changed

1 file changed

+74
-95
lines changed

helix-core/src/syntax.rs

Lines changed: 74 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use std::{
1818
cell::RefCell,
1919
collections::{HashMap, VecDeque},
2020
fmt,
21-
hash::{BuildHasher, Hash, Hasher},
21+
hash::{Hash, Hasher},
2222
mem::{replace, transmute},
2323
path::Path,
2424
str::FromStr,
@@ -709,25 +709,13 @@ thread_local! {
709709
})
710710
}
711711

712+
#[derive(Debug)]
712713
pub struct Syntax {
713714
layers: HopSlotMap<LayerId, LanguageLayer>,
714-
layers_lut: RawTable<LayerId>,
715-
layers_lut_hasher: RandomState,
716715
root: LayerId,
717716
loader: Arc<Loader>,
718717
}
719718

720-
impl fmt::Debug for Syntax {
721-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
722-
// skip the layer_lut and layer_lut hasher here as they do not implement Debug
723-
f.debug_struct("Syntax")
724-
.field("layers", &self.layers)
725-
.field("root", &self.root)
726-
.field("loader", &self.loader)
727-
.finish()
728-
}
729-
}
730-
731719
fn byte_range_to_str(range: std::ops::Range<usize>, source: RopeSlice) -> Cow<str> {
732720
Cow::from(source.byte_slice(range))
733721
}
@@ -756,8 +744,6 @@ impl Syntax {
756744
root,
757745
layers,
758746
loader,
759-
layers_lut: RawTable::new(),
760-
layers_lut_hasher: RandomState::new(),
761747
};
762748

763749
syntax
@@ -786,37 +772,38 @@ impl Syntax {
786772
// Convert the changeset into tree sitter edits.
787773
let edits = generate_edits(old_source, changeset);
788774

775+
// This table allows inverse indexing of `layers`.
776+
// That is by hashing a `Layer` you can find
777+
// the `LayerId` of an existing equivalent `Layer` in `layers`.
778+
//
779+
// It is used to determine if a new layer exists for an injection
780+
// or if an existing layer needs to be updated.
781+
let mut layers_table = RawTable::with_capacity(self.layers.len());
782+
let layers_hasher = RandomState::new();
789783
// Use the edits to update all layers markers
790-
if !edits.is_empty() {
791-
fn point_add(a: Point, b: Point) -> Point {
792-
if b.row > 0 {
793-
Point::new(a.row.saturating_add(b.row), b.column)
794-
} else {
795-
Point::new(0, a.column.saturating_add(b.column))
796-
}
784+
fn point_add(a: Point, b: Point) -> Point {
785+
if b.row > 0 {
786+
Point::new(a.row.saturating_add(b.row), b.column)
787+
} else {
788+
Point::new(0, a.column.saturating_add(b.column))
797789
}
798-
fn point_sub(a: Point, b: Point) -> Point {
799-
if a.row > b.row {
800-
Point::new(a.row.saturating_sub(b.row), a.column)
801-
} else {
802-
Point::new(0, a.column.saturating_sub(b.column))
803-
}
790+
}
791+
fn point_sub(a: Point, b: Point) -> Point {
792+
if a.row > b.row {
793+
Point::new(a.row.saturating_sub(b.row), a.column)
794+
} else {
795+
Point::new(0, a.column.saturating_sub(b.column))
804796
}
797+
}
805798

806-
// Ensure lut is large enough to hold all layers.
807-
// The lut should always be empty at this point because it is only
808-
// kept to avoid realloctions so rehashing is never requied (hence unreachable).
809-
assert!(self.layers_lut.is_empty());
810-
self.layers_lut
811-
.reserve(self.layers.len(), |_| unreachable!());
812-
813-
for (layer_id, layer) in self.layers.iter_mut() {
814-
// The root layer always covers the whole range (0..usize::MAX)
815-
if layer.depth == 0 {
816-
layer.flags = LayerUpdateFlags::MODIFIED;
817-
continue;
818-
}
799+
for (layer_id, layer) in self.layers.iter_mut() {
800+
// The root layer always covers the whole range (0..usize::MAX)
801+
if layer.depth == 0 {
802+
layer.flags = LayerUpdateFlags::MODIFIED;
803+
continue;
804+
}
819805

806+
if !edits.is_empty() {
820807
for range in &mut layer.ranges {
821808
// Roughly based on https://github.com/tree-sitter/tree-sitter/blob/ddeaa0c7f534268b35b4f6cb39b52df082754413/lib/src/subtree.c#L691-L720
822809
for edit in edits.iter().rev() {
@@ -880,20 +867,13 @@ impl Syntax {
880867
}
881868
}
882869
}
883-
884-
let hash = hash_injection_layer(
885-
&self.layers_lut_hasher,
886-
layer.depth,
887-
&layer.config,
888-
&layer.ranges,
889-
);
890-
// Safety: insert_no_grow is unsafe because it assumes that the table
891-
// has enough capacity to hold additional elements.
892-
// This is always the case as we reserved enough capacity above.
893-
unsafe {
894-
self.layers_lut.insert_no_grow(hash, layer_id);
895-
}
896870
}
871+
872+
let hash = layers_hasher.hash_one(layer);
873+
// Safety: insert_no_grow is unsafe because it assumes that the table
874+
// has enough capacity to hold additional elements.
875+
// This is always the case as we reserved enough capacity above.
876+
unsafe { layers_table.insert_no_grow(hash, layer_id) };
897877
}
898878

899879
PARSER.with(|ts_parser| {
@@ -1018,31 +998,23 @@ impl Syntax {
1018998
let depth = layer.depth + 1;
1019999
// TODO: can't inline this since matches borrows self.layers
10201000
for (config, ranges) in injections {
1021-
// Find an existing layer
1022-
1023-
let hash =
1024-
hash_injection_layer(&self.layers_lut_hasher, depth, &config, &ranges);
1025-
let layer = self
1026-
.layers_lut
1027-
.get(hash, |&it| {
1028-
let layer = &self.layers[it];
1029-
layer.depth == depth && // TODO: track parent id instead
1030-
layer.config.language == config.language &&
1031-
layer.ranges == ranges
1001+
let new_layer = LanguageLayer {
1002+
tree: None,
1003+
config,
1004+
depth,
1005+
ranges,
1006+
flags: LayerUpdateFlags::empty(),
1007+
};
1008+
1009+
// Find an identical existing layer
1010+
let layer = layers_table
1011+
.get(layers_hasher.hash_one(&new_layer), |&it| {
1012+
self.layers[it] == new_layer
10321013
})
10331014
.copied();
10341015

10351016
// ...or insert a new one.
1036-
let layer_id = layer.unwrap_or_else(|| {
1037-
self.layers.insert(LanguageLayer {
1038-
tree: None,
1039-
config,
1040-
depth,
1041-
ranges,
1042-
// set the modified flag to ensure the layer is parsed
1043-
flags: LayerUpdateFlags::empty(),
1044-
})
1045-
});
1017+
let layer_id = layer.unwrap_or_else(|| self.layers.insert(new_layer));
10461018

10471019
queue.push_back(layer_id);
10481020
}
@@ -1060,8 +1032,6 @@ impl Syntax {
10601032
.contains(LayerUpdateFlags::TOUCHED)
10611033
});
10621034

1063-
self.layers_lut.clear_no_drop();
1064-
10651035
Ok(())
10661036
})
10671037
}
@@ -1181,23 +1151,32 @@ pub struct LanguageLayer {
11811151
flags: LayerUpdateFlags,
11821152
}
11831153

1184-
fn hash_injection_layer(
1185-
state: &RandomState,
1186-
depth: u32,
1187-
config: &HighlightConfiguration,
1188-
ranges: &[Range],
1189-
) -> u64 {
1190-
let mut state = state.build_hasher();
1191-
depth.hash(&mut state);
1192-
// The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
1193-
// However it does use #[repr] transparent so the transmute here is safe
1194-
// as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
1195-
// This is also compatible with the PartialEq implementation of language
1196-
// as that is just a pointer comparison.
1197-
let language: *const () = unsafe { transmute(config.language) };
1198-
language.hash(&mut state);
1199-
ranges.hash(&mut state);
1200-
state.finish()
1154+
/// This PartialEq implementation only checks if that
1155+
/// two layers are theoretically identical (meaning they highlight the same text range with the same language).
1156+
/// It does not check whether the layers have the same internal treesitter
1157+
/// state.
1158+
impl PartialEq for LanguageLayer {
1159+
fn eq(&self, other: &Self) -> bool {
1160+
self.depth == other.depth
1161+
&& self.config.language == other.config.language
1162+
&& self.ranges == other.ranges
1163+
}
1164+
}
1165+
1166+
/// Hash implementation belongs to PartialEq implementation above.
1167+
/// See its documentation for details.
1168+
impl Hash for LanguageLayer {
1169+
fn hash<H: Hasher>(&self, state: &mut H) {
1170+
self.depth.hash(state);
1171+
// The transmute is necessary here because tree_sitter::Language does not derive Hash at the moment.
1172+
// However it does use #[repr] transparent so the transmute here is safe
1173+
// as `Language` (which `Grammar` is an alias for) is just a newtype wrapper around a (thin) pointer.
1174+
// This is also compatible with the PartialEq implementation of language
1175+
// as that is just a pointer comparison.
1176+
let language: *const () = unsafe { transmute(self.config.language) };
1177+
language.hash(state);
1178+
self.ranges.hash(state);
1179+
}
12011180
}
12021181

12031182
impl LanguageLayer {

0 commit comments

Comments
 (0)