Skip to content

Commit 958e844

Browse files
committed
significantly improve performance of :reload
1 parent 801984c commit 958e844

File tree

4 files changed

+226
-57
lines changed

4 files changed

+226
-57
lines changed

Cargo.lock

Lines changed: 24 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ serde = { version = "1.0", features = ["derive"] }
3636
serde_json = "1.0"
3737
toml = "0.5"
3838

39-
similar = "2.2"
39+
imara-diff = "0.1.0"
4040

4141
encoding_rs = "0.8"
4242

helix-core/src/diff.rs

Lines changed: 198 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,190 @@
1-
use crate::{Rope, Transaction};
1+
use std::ops::Range;
22

3-
/// Compares `old` and `new` to generate a [`Transaction`] describing
4-
/// the steps required to get from `old` to `new`.
5-
pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
6-
// `similar` only works on contiguous data, so a `Rope` has
7-
// to be temporarily converted into a `String`.
8-
let old_converted = old.to_string();
9-
let new_converted = new.to_string();
10-
11-
// A timeout is set so after 1 seconds, the algorithm will start
12-
// approximating. This is especially important for big `Rope`s or
13-
// `Rope`s that are extremely dissimilar to each other.
14-
let mut config = similar::TextDiff::configure();
15-
config.timeout(std::time::Duration::from_secs(1));
16-
17-
let diff = config.diff_chars(&old_converted, &new_converted);
18-
19-
// The current position of the change needs to be tracked to
20-
// construct the `Change`s.
21-
let mut pos = 0;
22-
Transaction::change(
23-
old,
24-
diff.ops()
3+
use imara_diff::intern::InternedInput;
4+
use imara_diff::Algorithm;
5+
use ropey::RopeSlice;
6+
7+
use crate::{ChangeSet, Rope, Tendril, Transaction};
8+
9+
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
10+
struct CharChangeSetBuilder<'a> {
11+
res: &'a mut ChangeSet,
12+
hunk: &'a InternedInput<char>,
13+
pos: u32,
14+
}
15+
16+
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
17+
type Out = ();
18+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
19+
self.res.retain((before.start - self.pos) as usize);
20+
self.res.delete(before.len());
21+
self.pos = before.end;
22+
23+
let res = self.hunk.after[after.start as usize..after.end as usize]
24+
.iter()
25+
.map(|&token| self.hunk.interner[token])
26+
.collect();
27+
28+
self.res.insert(res);
29+
}
30+
31+
fn finish(self) -> Self::Out {
32+
self.res.retain(self.hunk.before.len() - self.pos as usize);
33+
}
34+
}
35+
36+
struct LineChangeSetBuilder<'a> {
37+
res: ChangeSet,
38+
after: RopeSlice<'a>,
39+
file: &'a InternedInput<RopeSlice<'a>>,
40+
current_hunk: InternedInput<char>,
41+
pos: u32,
42+
}
43+
44+
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
45+
type Out = ChangeSet;
46+
47+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
48+
use std::fmt::Write;
49+
50+
let len = self.file.before[self.pos as usize..before.start as usize]
2551
.iter()
26-
.map(|op| op.as_tag_tuple())
27-
.filter_map(|(tag, old_range, new_range)| {
28-
// `old_pos..pos` is equivalent to `start..end` for where
29-
// the change should be applied.
30-
let old_pos = pos;
31-
pos += old_range.end - old_range.start;
32-
33-
match tag {
34-
// Semantically, inserts and replacements are the same thing.
35-
similar::DiffTag::Insert | similar::DiffTag::Replace => {
36-
// This is the text from the `new` rope that should be
37-
// inserted into `old`.
38-
let text: &str = {
39-
let start = new.char_to_byte(new_range.start);
40-
let end = new.char_to_byte(new_range.end);
41-
&new_converted[start..end]
42-
};
43-
Some((old_pos, pos, Some(text.into())))
52+
.map(|&it| self.file.interner[it].len_chars())
53+
.sum();
54+
self.res.retain(len);
55+
self.pos = before.end;
56+
57+
// do not perform diffs on large hunks
58+
let len_before = before.end - before.start;
59+
let len_after = after.end - after.start;
60+
61+
// Pure insertions/removals do not require a character diff.
62+
// Very large changes are ignored because their character diff is expensive to compute
63+
// TODO adjust heuristic to detect large changes?
64+
if len_before == 0
65+
|| len_after == 0
66+
|| len_after > 5 * len_before
67+
|| 5 * len_after < len_before && len_before > 10
68+
|| len_before + len_after > 200
69+
{
70+
let remove = self.file.before[before.start as usize..before.end as usize]
71+
.iter()
72+
.map(|&it| self.file.interner[it].len_chars())
73+
.sum();
74+
self.res.delete(remove);
75+
let mut fragment = Tendril::new();
76+
if len_after > 500 {
77+
// copying a rope line by line is slower then copying the entire
78+
// rope. Use to_string for very large changes instead..
79+
if self.file.after.len() == after.end as usize {
80+
if after.start == 0 {
81+
fragment = self.after.to_string().into();
82+
} else {
83+
let start = self.after.line_to_char(after.start as usize);
84+
fragment = self.after.slice(start..).to_string().into();
4485
}
45-
similar::DiffTag::Delete => Some((old_pos, pos, None)),
46-
similar::DiffTag::Equal => None,
86+
} else if after.start == 0 {
87+
let end = self.after.line_to_char(after.end as usize);
88+
fragment = self.after.slice(..end).to_string().into();
89+
} else {
90+
let start = self.after.line_to_char(after.start as usize);
91+
let end = self.after.line_to_char(after.end as usize);
92+
fragment = self.after.slice(start..end).to_string().into();
4793
}
48-
}),
49-
)
94+
} else {
95+
for &line in &self.file.after[after.start as usize..after.end as usize] {
96+
for chunk in self.file.interner[line].chunks() {
97+
fragment.push_str(chunk)
98+
}
99+
}
100+
};
101+
self.res.insert(fragment);
102+
} else {
103+
// for reasonably small hunks, generating a ChangeSet from char diff can save memory
104+
// TODO use a tokenizer (word diff?) for improved performance
105+
let hunk_before = self.file.before[before.start as usize..before.end as usize]
106+
.iter()
107+
.flat_map(|&it| self.file.interner[it].chars());
108+
let hunk_after = self.file.after[after.start as usize..after.end as usize]
109+
.iter()
110+
.flat_map(|&it| self.file.interner[it].chars());
111+
self.current_hunk.update_before(hunk_before);
112+
self.current_hunk.update_after(hunk_after);
113+
114+
// the histogram heuristic does not work as well
115+
// for characters because the same characters often reoccur
116+
// use myer diff instead
117+
imara_diff::diff(
118+
Algorithm::Myers,
119+
&self.current_hunk,
120+
CharChangeSetBuilder {
121+
res: &mut self.res,
122+
hunk: &self.current_hunk,
123+
pos: 0,
124+
},
125+
);
126+
127+
self.current_hunk.clear();
128+
}
129+
}
130+
131+
fn finish(mut self) -> Self::Out {
132+
let len = self.file.before[self.pos as usize..]
133+
.iter()
134+
.map(|&it| self.file.interner[it].len_chars())
135+
.sum();
136+
137+
self.res.retain(len);
138+
self.res
139+
}
140+
}
141+
142+
struct RopeLines<'a>(RopeSlice<'a>);
143+
144+
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
145+
type Token = RopeSlice<'a>;
146+
// TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
147+
type Tokenizer = ropey::iter::Lines<'a>;
148+
149+
fn tokenize(&self) -> Self::Tokenizer {
150+
self.0.lines()
151+
}
152+
153+
fn estimate_tokens(&self) -> u32 {
154+
// we can provide a perfect estimate which is very nice for performance
155+
self.0.len_lines() as u32
156+
}
157+
}
158+
159+
/// Compares `old` and `new` to generate a [`Transaction`] describing
160+
/// the steps required to get from `old` to `new`.
161+
pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
162+
log::error!("diff");
163+
let res = ChangeSet::with_capacity(32);
164+
let after = after.slice(..);
165+
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
166+
let builder = LineChangeSetBuilder {
167+
res,
168+
file: &file,
169+
after,
170+
pos: 0,
171+
current_hunk: InternedInput::default(),
172+
};
173+
174+
imara_diff::diff(Algorithm::Histogram, &file, builder).into()
50175
}
51176

52177
#[cfg(test)]
53178
mod tests {
54179
use super::*;
55180

181+
fn test_identity(a: &str, b: &str) {
182+
let mut old = Rope::from(a);
183+
let new = Rope::from(b);
184+
compare_ropes(&old, &new).apply(&mut old);
185+
assert_eq!(old, new)
186+
}
187+
56188
quickcheck::quickcheck! {
57189
fn test_compare_ropes(a: String, b: String) -> bool {
58190
let mut old = Rope::from(a);
@@ -61,4 +193,25 @@ mod tests {
61193
old == new
62194
}
63195
}
196+
197+
#[test]
198+
fn equal_files() {
199+
test_identity("foo", "foo")
200+
}
201+
202+
#[test]
203+
fn trailing_newline() {
204+
test_identity("foo\n", "foo");
205+
test_identity("foo", "foo\n")
206+
}
207+
208+
#[test]
209+
fn new_file() {
210+
test_identity("", "foo")
211+
}
212+
213+
#[test]
214+
fn deleted_file() {
215+
test_identity("foo", "")
216+
}
64217
}

helix-core/src/transaction.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl ChangeSet {
5656
}
5757

5858
// Changeset builder operations: delete/insert/retain
59-
fn delete(&mut self, n: usize) {
59+
pub(crate) fn delete(&mut self, n: usize) {
6060
use Operation::*;
6161
if n == 0 {
6262
return;
@@ -71,7 +71,7 @@ impl ChangeSet {
7171
}
7272
}
7373

74-
fn insert(&mut self, fragment: Tendril) {
74+
pub(crate) fn insert(&mut self, fragment: Tendril) {
7575
use Operation::*;
7676

7777
if fragment.is_empty() {
@@ -93,7 +93,7 @@ impl ChangeSet {
9393
self.changes.push(new_last);
9494
}
9595

96-
fn retain(&mut self, n: usize) {
96+
pub(crate) fn retain(&mut self, n: usize) {
9797
use Operation::*;
9898
if n == 0 {
9999
return;

0 commit comments

Comments
 (0)