Skip to content

Commit 7676208

Browse files
committed
significantly improve performance of :reload
1 parent 801984c commit 7676208

File tree

4 files changed

+224
-57
lines changed

4 files changed

+224
-57
lines changed

Cargo.lock

Lines changed: 24 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

helix-core/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ serde = { version = "1.0", features = ["derive"] }
3636
serde_json = "1.0"
3737
toml = "0.5"
3838

39-
similar = "2.2"
39+
imara-diff = "0.1.0"
4040

4141
encoding_rs = "0.8"
4242

helix-core/src/diff.rs

Lines changed: 196 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,188 @@
1-
use crate::{Rope, Transaction};
1+
use std::ops::Range;
22

3-
/// Compares `old` and `new` to generate a [`Transaction`] describing
4-
/// the steps required to get from `old` to `new`.
5-
pub fn compare_ropes(old: &Rope, new: &Rope) -> Transaction {
6-
// `similar` only works on contiguous data, so a `Rope` has
7-
// to be temporarily converted into a `String`.
8-
let old_converted = old.to_string();
9-
let new_converted = new.to_string();
10-
11-
// A timeout is set so after 1 seconds, the algorithm will start
12-
// approximating. This is especially important for big `Rope`s or
13-
// `Rope`s that are extremely dissimilar to each other.
14-
let mut config = similar::TextDiff::configure();
15-
config.timeout(std::time::Duration::from_secs(1));
16-
17-
let diff = config.diff_chars(&old_converted, &new_converted);
18-
19-
// The current position of the change needs to be tracked to
20-
// construct the `Change`s.
21-
let mut pos = 0;
22-
Transaction::change(
23-
old,
24-
diff.ops()
3+
use imara_diff::intern::InternedInput;
4+
use imara_diff::Algorithm;
5+
use ropey::RopeSlice;
6+
7+
use crate::{ChangeSet, Rope, Tendril, Transaction};
8+
9+
/// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
10+
struct CharChangeSetBuilder<'a> {
11+
res: &'a mut ChangeSet,
12+
hunk: &'a InternedInput<char>,
13+
pos: u32,
14+
}
15+
16+
impl imara_diff::Sink for CharChangeSetBuilder<'_> {
17+
type Out = ();
18+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
19+
self.res.retain((before.start - self.pos) as usize);
20+
self.res.delete(before.len());
21+
self.pos = before.end;
22+
23+
let res = self.hunk.after[after.start as usize..after.end as usize]
24+
.iter()
25+
.map(|&token| self.hunk.interner[token])
26+
.collect();
27+
28+
self.res.insert(res);
29+
}
30+
31+
fn finish(self) -> Self::Out {
32+
self.res.retain(self.hunk.before.len() - self.pos as usize);
33+
}
34+
}
35+
36+
struct LineChangeSetBuilder<'a> {
37+
res: ChangeSet,
38+
after: RopeSlice<'a>,
39+
file: &'a InternedInput<RopeSlice<'a>>,
40+
current_hunk: InternedInput<char>,
41+
pos: u32,
42+
}
43+
44+
impl imara_diff::Sink for LineChangeSetBuilder<'_> {
45+
type Out = ChangeSet;
46+
47+
fn process_change(&mut self, before: Range<u32>, after: Range<u32>) {
48+
let len = self.file.before[self.pos as usize..before.start as usize]
2549
.iter()
26-
.map(|op| op.as_tag_tuple())
27-
.filter_map(|(tag, old_range, new_range)| {
28-
// `old_pos..pos` is equivalent to `start..end` for where
29-
// the change should be applied.
30-
let old_pos = pos;
31-
pos += old_range.end - old_range.start;
32-
33-
match tag {
34-
// Semantically, inserts and replacements are the same thing.
35-
similar::DiffTag::Insert | similar::DiffTag::Replace => {
36-
// This is the text from the `new` rope that should be
37-
// inserted into `old`.
38-
let text: &str = {
39-
let start = new.char_to_byte(new_range.start);
40-
let end = new.char_to_byte(new_range.end);
41-
&new_converted[start..end]
42-
};
43-
Some((old_pos, pos, Some(text.into())))
50+
.map(|&it| self.file.interner[it].len_chars())
51+
.sum();
52+
self.res.retain(len);
53+
self.pos = before.end;
54+
55+
// do not perform diffs on large hunks
56+
let len_before = before.end - before.start;
57+
let len_after = after.end - after.start;
58+
59+
// Pure insertions/removals do not require a character diff.
60+
// Very large changes are ignored because their character diff is expensive to compute
61+
// TODO adjust heuristic to detect large changes?
62+
if len_before == 0
63+
|| len_after == 0
64+
|| len_after > 5 * len_before
65+
|| 5 * len_after < len_before && len_before > 10
66+
|| len_before + len_after > 200
67+
{
68+
let remove = self.file.before[before.start as usize..before.end as usize]
69+
.iter()
70+
.map(|&it| self.file.interner[it].len_chars())
71+
.sum();
72+
self.res.delete(remove);
73+
let mut fragment = Tendril::new();
74+
if len_after > 500 {
75+
// copying a rope line by line is slower then copying the entire
76+
// rope. Use to_string for very large changes instead..
77+
if self.file.after.len() == after.end as usize {
78+
if after.start == 0 {
79+
fragment = self.after.to_string().into();
80+
} else {
81+
let start = self.after.line_to_char(after.start as usize);
82+
fragment = self.after.slice(start..).to_string().into();
4483
}
45-
similar::DiffTag::Delete => Some((old_pos, pos, None)),
46-
similar::DiffTag::Equal => None,
84+
} else if after.start == 0 {
85+
let end = self.after.line_to_char(after.end as usize);
86+
fragment = self.after.slice(..end).to_string().into();
87+
} else {
88+
let start = self.after.line_to_char(after.start as usize);
89+
let end = self.after.line_to_char(after.end as usize);
90+
fragment = self.after.slice(start..end).to_string().into();
4791
}
48-
}),
49-
)
92+
} else {
93+
for &line in &self.file.after[after.start as usize..after.end as usize] {
94+
for chunk in self.file.interner[line].chunks() {
95+
fragment.push_str(chunk)
96+
}
97+
}
98+
};
99+
self.res.insert(fragment);
100+
} else {
101+
// for reasonably small hunks, generating a ChangeSet from char diff can save memory
102+
// TODO use a tokenizer (word diff?) for improved performance
103+
let hunk_before = self.file.before[before.start as usize..before.end as usize]
104+
.iter()
105+
.flat_map(|&it| self.file.interner[it].chars());
106+
let hunk_after = self.file.after[after.start as usize..after.end as usize]
107+
.iter()
108+
.flat_map(|&it| self.file.interner[it].chars());
109+
self.current_hunk.update_before(hunk_before);
110+
self.current_hunk.update_after(hunk_after);
111+
112+
// the histogram heuristic does not work as well
113+
// for characters because the same characters often reoccur
114+
// use myer diff instead
115+
imara_diff::diff(
116+
Algorithm::Myers,
117+
&self.current_hunk,
118+
CharChangeSetBuilder {
119+
res: &mut self.res,
120+
hunk: &self.current_hunk,
121+
pos: 0,
122+
},
123+
);
124+
125+
self.current_hunk.clear();
126+
}
127+
}
128+
129+
fn finish(mut self) -> Self::Out {
130+
let len = self.file.before[self.pos as usize..]
131+
.iter()
132+
.map(|&it| self.file.interner[it].len_chars())
133+
.sum();
134+
135+
self.res.retain(len);
136+
self.res
137+
}
138+
}
139+
140+
struct RopeLines<'a>(RopeSlice<'a>);
141+
142+
impl<'a> imara_diff::intern::TokenSource for RopeLines<'a> {
143+
type Token = RopeSlice<'a>;
144+
// TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
145+
type Tokenizer = ropey::iter::Lines<'a>;
146+
147+
fn tokenize(&self) -> Self::Tokenizer {
148+
self.0.lines()
149+
}
150+
151+
fn estimate_tokens(&self) -> u32 {
152+
// we can provide a perfect estimate which is very nice for performance
153+
self.0.len_lines() as u32
154+
}
155+
}
156+
157+
/// Compares `old` and `new` to generate a [`Transaction`] describing
158+
/// the steps required to get from `old` to `new`.
159+
pub fn compare_ropes(before: &Rope, after: &Rope) -> Transaction {
160+
log::error!("diff");
161+
let res = ChangeSet::with_capacity(32);
162+
let after = after.slice(..);
163+
let file = InternedInput::new(RopeLines(before.slice(..)), RopeLines(after));
164+
let builder = LineChangeSetBuilder {
165+
res,
166+
file: &file,
167+
after,
168+
pos: 0,
169+
current_hunk: InternedInput::default(),
170+
};
171+
172+
imara_diff::diff(Algorithm::Histogram, &file, builder).into()
50173
}
51174

52175
#[cfg(test)]
53176
mod tests {
54177
use super::*;
55178

179+
fn test_identity(a: &str, b: &str) {
180+
let mut old = Rope::from(a);
181+
let new = Rope::from(b);
182+
compare_ropes(&old, &new).apply(&mut old);
183+
assert_eq!(old, new);
184+
}
185+
56186
quickcheck::quickcheck! {
57187
fn test_compare_ropes(a: String, b: String) -> bool {
58188
let mut old = Rope::from(a);
@@ -61,4 +191,25 @@ mod tests {
61191
old == new
62192
}
63193
}
194+
195+
#[test]
196+
fn equal_files() {
197+
test_identity("foo", "foo");
198+
}
199+
200+
#[test]
201+
fn trailing_newline() {
202+
test_identity("foo\n", "foo");
203+
test_identity("foo", "foo\n");
204+
}
205+
206+
#[test]
207+
fn new_file() {
208+
test_identity("", "foo");
209+
}
210+
211+
#[test]
212+
fn deleted_file() {
213+
test_identity("foo", "");
214+
}
64215
}

helix-core/src/transaction.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ impl ChangeSet {
5656
}
5757

5858
// Changeset builder operations: delete/insert/retain
59-
fn delete(&mut self, n: usize) {
59+
pub(crate) fn delete(&mut self, n: usize) {
6060
use Operation::*;
6161
if n == 0 {
6262
return;
@@ -71,7 +71,7 @@ impl ChangeSet {
7171
}
7272
}
7373

74-
fn insert(&mut self, fragment: Tendril) {
74+
pub(crate) fn insert(&mut self, fragment: Tendril) {
7575
use Operation::*;
7676

7777
if fragment.is_empty() {
@@ -93,7 +93,7 @@ impl ChangeSet {
9393
self.changes.push(new_last);
9494
}
9595

96-
fn retain(&mut self, n: usize) {
96+
pub(crate) fn retain(&mut self, n: usize) {
9797
use Operation::*;
9898
if n == 0 {
9999
return;

0 commit comments

Comments
 (0)