Skip to content

Commit 32c6d8a

Browse files
committed
significantly improve performance of :reload
1 parent 801984c commit 32c6d8a

File tree

4 files changed

+233
-60
lines changed

4 files changed

+233
-60
lines changed

Cargo.lock

+25-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-core/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ integration = []
1717
[dependencies]
1818
helix-loader = { version = "0.6", path = "../helix-loader" }
1919

20-
ropey = { version = "1.5", default-features = false, features = ["simd"] }
20+
ropey = { version = "1.5", default-features = false, features = ["simd"] , git = "https://github.com/pascalkuthe/ropey.git", branch = "line_diff2" }
2121
smallvec = "1.10"
2222
smartstring = "1.0.1"
2323
unicode-segmentation = "1.10"
@@ -36,7 +36,7 @@ serde = { version = "1.0", features = ["derive"] }
3636
serde_json = "1.0"
3737
toml = "0.5"
3838

39-
similar = "2.2"
39+
imara-diff = "0.1.0"
4040

4141
encoding_rs = "0.8"
4242

helix-core/src/diff.rs

+203-45
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,195 @@
1-
use crate::{Rope, Transaction};
1+
use std::ops::Range;
2+
use std::time::Instant;
23

3-
/// Compares `old` and `new` to generate a [`Transaction`] describing
4-
/// the steps required to get from `old` to `new`.
5-
pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
6-
// `similar` only works on contiguous data, so a `Rope` has
7-
// to be temporarily converted into a `String`.
8-
let old_converted = old.to_string();
9-
let new_converted = new.to_string();
10-
11-
// A timeout is set so after 1 seconds, the algorithm will start
12-
// approximating. This is especially important for big `Rope`s or
13-
// `Rope`s that are extremely dissimilar to each other.
14-
let mut config = similar::TextDiff::configure();
15-
config.timeout(std::time::Duration::from_secs(1));
16-
17-
let diff = config.diff_chars(&old_converted, &new_converted);
18-
19-
// The current position of the change needs to be tracked to
20-
// construct the `Change`s.
21-
let mut pos = 0;
22-
Transaction::change(
23-
old,
24-
diff.ops()
4+
use imara_diff::intern::InternedInput;
5+
use imara_diff::Algorithm;
6+
use ropey::RopeSlice;
7+
8+
use crate::{ChangeSet, Rope, Tendril, Transaction};
9+
10+
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
11+
struct CharChangeSetBuilder<'a> {
12+
res: &'a mut ChangeSet,
13+
hunk: &'a InternedInput<char>,
14+
pos: u32,
15+
}
16+
17+
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
18+
type Out = ();
19+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
20+
self.res.retain((before.start - self.pos) as usize);
21+
self.res.delete(before.len());
22+
self.pos = before.end;
23+
24+
let res = self.hunk.after[after.start as usize..after.end as usize]
25+
.iter()
26+
.map(|&token| self.hunk.interner[token])
27+
.collect();
28+
29+
self.res.insert(res);
30+
}
31+
32+
fn finish(self) -> Self::Out {
33+
self.res.retain(self.hunk.before.len() - self.pos as usize);
34+
}
35+
}
36+
37+
struct LineChangeSetBuilder<'a> {
38+
res: ChangeSet,
39+
after: RopeSlice<'a>,
40+
file: &'a InternedInput<RopeSlice<'a>>,
41+
current_hunk: InternedInput<char>,
42+
pos: u32,
43+
}
44+
45+
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
46+
type Out = ChangeSet;
47+
48+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
49+
let len = self.file.before[self.pos as usize..before.start as usize]
2550
.iter()
26-
.map(|op| op.as_tag_tuple())
27-
.filter_map(|(tag, old_range, new_range)| {
28-
// `old_pos..pos` is equivalent to `start..end` for where
29-
// the change should be applied.
30-
let old_pos = pos;
31-
pos += old_range.end - old_range.start;
32-
33-
match tag {
34-
// Semantically, inserts and replacements are the same thing.
35-
similar::DiffTag::Insert | similar::DiffTag::Replace => {
36-
// This is the text from the `new` rope that should be
37-
// inserted into `old`.
38-
let text: &str = {
39-
let start = new.char_to_byte(new_range.start);
40-
let end = new.char_to_byte(new_range.end);
41-
&new_converted[start..end]
42-
};
43-
Some((old_pos, pos, Some(text.into())))
51+
.map(|&it| self.file.interner[it].len_chars())
52+
.sum();
53+
self.res.retain(len);
54+
self.pos = before.end;
55+
56+
// do not perform diffs on large hunks
57+
let len_before = before.end - before.start;
58+
let len_after = after.end - after.start;
59+
60+
// Pure insertions/removals do not require a character diff.
61+
// Very large changes are ignored because their character diff is expensive to compute
62+
// TODO adjust heuristic to detect large changes?
63+
if len_before == 0
64+
|| len_after == 0
65+
|| len_after > 5 * len_before
66+
|| 5 * len_after < len_before && len_before > 10
67+
|| len_before + len_after > 200
68+
{
69+
let remove = self.file.before[before.start as usize..before.end as usize]
70+
.iter()
71+
.map(|&it| self.file.interner[it].len_chars())
72+
.sum();
73+
self.res.delete(remove);
74+
let mut fragment = Tendril::new();
75+
if len_after > 500 {
76+
// copying a rope line by line is slower then copying the entire
77+
// rope. Use to_string for very large changes instead..
78+
if self.file.after.len() == after.end as usize {
79+
if after.start == 0 {
80+
fragment = self.after.to_string().into();
81+
} else {
82+
let start = self.after.line_to_char(after.start as usize);
83+
fragment = self.after.slice(start..).to_string().into();
4484
}
45-
similar::DiffTag::Delete => Some((old_pos, pos, None)),
46-
similar::DiffTag::Equal => None,
85+
} else if after.start == 0 {
86+
let end = self.after.line_to_char(after.end as usize);
87+
fragment = self.after.slice(..end).to_string().into();
88+
} else {
89+
let start = self.after.line_to_char(after.start as usize);
90+
let end = self.after.line_to_char(after.end as usize);
91+
fragment = self.after.slice(start..end).to_string().into();
4792
}
48-
}),
49-
)
93+
} else {
94+
for &line in &self.file.after[after.start as usize..after.end as usize] {
95+
for chunk in self.file.interner[line].chunks() {
96+
fragment.push_str(chunk)
97+
}
98+
}
99+
};
100+
self.res.insert(fragment);
101+
} else {
102+
// for reasonably small hunks, generating a ChangeSet from char diff can save memory
103+
// TODO use a tokenizer (word diff?) for improved performance
104+
let hunk_before = self.file.before[before.start as usize..before.end as usize]
105+
.iter()
106+
.flat_map(|&it| self.file.interner[it].chars());
107+
let hunk_after = self.file.after[after.start as usize..after.end as usize]
108+
.iter()
109+
.flat_map(|&it| self.file.interner[it].chars());
110+
self.current_hunk.update_before(hunk_before);
111+
self.current_hunk.update_after(hunk_after);
112+
113+
// the histogram heuristic does not work as well
114+
// for characters because the same characters often reoccur
115+
// use myer diff instead
116+
imara_diff::diff(
117+
Algorithm::Myers,
118+
&self.current_hunk,
119+
CharChangeSetBuilder {
120+
res: &mut self.res,
121+
hunk: &self.current_hunk,
122+
pos: 0,
123+
},
124+
);
125+
126+
self.current_hunk.clear();
127+
}
128+
}
129+
130+
fn finish(mut self) -> Self::Out {
131+
let len = self.file.before[self.pos as usize..]
132+
.iter()
133+
.map(|&it| self.file.interner[it].len_chars())
134+
.sum();
135+
136+
self.res.retain(len);
137+
self.res
138+
}
139+
}
140+
141+
struct RopeLines<'a>(RopeSlice<'a>);
142+
143+
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
144+
type Token = RopeSlice<'a>;
145+
// TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
146+
type Tokenizer = ropey::iter::Lines<'a>;
147+
148+
fn tokenize(&self) -> Self::Tokenizer {
149+
self.0.lines()
150+
}
151+
152+
fn estimate_tokens(&self) -> u32 {
153+
// we can provide a perfect estimate which is very nice for performance
154+
self.0.len_lines() as u32
155+
}
156+
}
157+
158+
/// Compares `old` and `new` to generate a [`Transaction`] describing
159+
/// the steps required to get from `old` to `new`.
160+
pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
161+
let start = Instant::now();
162+
let res = ChangeSet::with_capacity(32);
163+
let after = after.slice(..);
164+
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
165+
let builder = LineChangeSetBuilder {
166+
res,
167+
file: &file,
168+
after,
169+
pos: 0,
170+
current_hunk: InternedInput::default(),
171+
};
172+
173+
let res = imara_diff::diff(Algorithm::Histogram, &file, builder).into();
174+
175+
log::debug!(
176+
"rope diff took {}s",
177+
Instant::now().duration_since(start).as_secs_f64()
178+
);
179+
res
50180
}
51181

52182
#[cfg(test)]
53183
mod tests {
54184
use super::*;
55185

186+
fn test_identity(a: &str, b: &str) {
187+
let mut old = Rope::from(a);
188+
let new = Rope::from(b);
189+
compare_ropes(&old, &new).apply(&mut old);
190+
assert_eq!(old, new);
191+
}
192+
56193
quickcheck::quickcheck! {
57194
fn test_compare_ropes(a: String, b: String) -> bool {
58195
let mut old = Rope::from(a);
@@ -61,4 +198,25 @@ mod tests {
61198
old == new
62199
}
63200
}
201+
202+
#[test]
203+
fn equal_files() {
204+
test_identity("foo", "foo");
205+
}
206+
207+
#[test]
208+
fn trailing_newline() {
209+
test_identity("foo\n", "foo");
210+
test_identity("foo", "foo\n");
211+
}
212+
213+
#[test]
214+
fn new_file() {
215+
test_identity("", "foo");
216+
}
217+
218+
#[test]
219+
fn deleted_file() {
220+
test_identity("foo", "");
221+
}
64222
}

helix-core/src/transaction.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl ChangeSet {
5656
}
5757

5858
// Changeset builder operations: delete/insert/retain
59-
fn delete(&mut self, n: usize) {
59+
pub(crate) fn delete(&mut self, n: usize) {
6060
use Operation::*;
6161
if n == 0 {
6262
return;
@@ -71,7 +71,7 @@ impl ChangeSet {
7171
}
7272
}
7373

74-
fn insert(&mut self, fragment: Tendril) {
74+
pub(crate) fn insert(&mut self, fragment: Tendril) {
7575
use Operation::*;
7676

7777
if fragment.is_empty() {
@@ -93,7 +93,7 @@ impl ChangeSet {
9393
self.changes.push(new_last);
9494
}
9595

96-
fn retain(&mut self, n: usize) {
96+
pub(crate) fn retain(&mut self, n: usize) {
9797
use Operation::*;
9898
if n == 0 {
9999
return;

0 commit comments

Comments
 (0)