1
- use crate :: { Rope , Transaction } ;
1
+ use std:: ops:: Range ;
2
+ use std:: time:: Instant ;
2
3
3
- /// Compares `old` and `new` to generate a [`Transaction`] describing
4
- /// the steps required to get from `old` to `new`.
5
- pub fn compare_ropes ( old : & Rope , new : & Rope ) -> Transaction {
6
- // `similar` only works on contiguous data, so a `Rope` has
7
- // to be temporarily converted into a `String`.
8
- let old_converted = old. to_string ( ) ;
9
- let new_converted = new. to_string ( ) ;
10
-
11
- // A timeout is set so after 1 seconds, the algorithm will start
12
- // approximating. This is especially important for big `Rope`s or
13
- // `Rope`s that are extremely dissimilar to each other.
14
- let mut config = similar:: TextDiff :: configure ( ) ;
15
- config. timeout ( std:: time:: Duration :: from_secs ( 1 ) ) ;
16
-
17
- let diff = config. diff_chars ( & old_converted, & new_converted) ;
18
-
19
- // The current position of the change needs to be tracked to
20
- // construct the `Change`s.
21
- let mut pos = 0 ;
22
- Transaction :: change (
23
- old,
24
- diff. ops ( )
4
+ use imara_diff:: intern:: InternedInput ;
5
+ use imara_diff:: Algorithm ;
6
+ use ropey:: RopeSlice ;
7
+
8
+ use crate :: { ChangeSet , Rope , Tendril , Transaction } ;
9
+
10
+ /// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
11
+ struct CharChangeSetBuilder < ' a > {
12
+ res : & ' a mut ChangeSet ,
13
+ hunk : & ' a InternedInput < char > ,
14
+ pos : u32 ,
15
+ }
16
+
17
+ impl imara_diff:: Sink for CharChangeSetBuilder < ' _ > {
18
+ type Out = ( ) ;
19
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
20
+ self . res . retain ( ( before. start - self . pos ) as usize ) ;
21
+ self . res . delete ( before. len ( ) ) ;
22
+ self . pos = before. end ;
23
+
24
+ let res = self . hunk . after [ after. start as usize ..after. end as usize ]
25
+ . iter ( )
26
+ . map ( |& token| self . hunk . interner [ token] )
27
+ . collect ( ) ;
28
+
29
+ self . res . insert ( res) ;
30
+ }
31
+
32
+ fn finish ( self ) -> Self :: Out {
33
+ self . res . retain ( self . hunk . before . len ( ) - self . pos as usize ) ;
34
+ }
35
+ }
36
+
37
+ struct LineChangeSetBuilder < ' a > {
38
+ res : ChangeSet ,
39
+ after : RopeSlice < ' a > ,
40
+ file : & ' a InternedInput < RopeSlice < ' a > > ,
41
+ current_hunk : InternedInput < char > ,
42
+ pos : u32 ,
43
+ }
44
+
45
+ impl imara_diff:: Sink for LineChangeSetBuilder < ' _ > {
46
+ type Out = ChangeSet ;
47
+
48
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
49
+ let len = self . file . before [ self . pos as usize ..before. start as usize ]
25
50
. iter ( )
26
- . map ( |op| op. as_tag_tuple ( ) )
27
- . filter_map ( |( tag, old_range, new_range) | {
28
- // `old_pos..pos` is equivalent to `start..end` for where
29
- // the change should be applied.
30
- let old_pos = pos;
31
- pos += old_range. end - old_range. start ;
32
-
33
- match tag {
34
- // Semantically, inserts and replacements are the same thing.
35
- similar:: DiffTag :: Insert | similar:: DiffTag :: Replace => {
36
- // This is the text from the `new` rope that should be
37
- // inserted into `old`.
38
- let text: & str = {
39
- let start = new. char_to_byte ( new_range. start ) ;
40
- let end = new. char_to_byte ( new_range. end ) ;
41
- & new_converted[ start..end]
42
- } ;
43
- Some ( ( old_pos, pos, Some ( text. into ( ) ) ) )
51
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
52
+ . sum ( ) ;
53
+ self . res . retain ( len) ;
54
+ self . pos = before. end ;
55
+
56
+ // do not perform diffs on large hunks
57
+ let len_before = before. end - before. start ;
58
+ let len_after = after. end - after. start ;
59
+
60
+ // Pure insertions/removals do not require a character diff.
61
+ // Very large changes are ignored because their character diff is expensive to compute
62
+ // TODO adjust heuristic to detect large changes?
63
+ if len_before == 0
64
+ || len_after == 0
65
+ || len_after > 5 * len_before
66
+ || 5 * len_after < len_before && len_before > 10
67
+ || len_before + len_after > 200
68
+ {
69
+ let remove = self . file . before [ before. start as usize ..before. end as usize ]
70
+ . iter ( )
71
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
72
+ . sum ( ) ;
73
+ self . res . delete ( remove) ;
74
+ let mut fragment = Tendril :: new ( ) ;
75
+ if len_after > 500 {
76
+ // copying a rope line by line is slower then copying the entire
77
+ // rope. Use to_string for very large changes instead..
78
+ if self . file . after . len ( ) == after. end as usize {
79
+ if after. start == 0 {
80
+ fragment = self . after . to_string ( ) . into ( ) ;
81
+ } else {
82
+ let start = self . after . line_to_char ( after. start as usize ) ;
83
+ fragment = self . after . slice ( start..) . to_string ( ) . into ( ) ;
44
84
}
45
- similar:: DiffTag :: Delete => Some ( ( old_pos, pos, None ) ) ,
46
- similar:: DiffTag :: Equal => None ,
85
+ } else if after. start == 0 {
86
+ let end = self . after . line_to_char ( after. end as usize ) ;
87
+ fragment = self . after . slice ( ..end) . to_string ( ) . into ( ) ;
88
+ } else {
89
+ let start = self . after . line_to_char ( after. start as usize ) ;
90
+ let end = self . after . line_to_char ( after. end as usize ) ;
91
+ fragment = self . after . slice ( start..end) . to_string ( ) . into ( ) ;
47
92
}
48
- } ) ,
49
- )
93
+ } else {
94
+ for & line in & self . file . after [ after. start as usize ..after. end as usize ] {
95
+ for chunk in self . file . interner [ line] . chunks ( ) {
96
+ fragment. push_str ( chunk)
97
+ }
98
+ }
99
+ } ;
100
+ self . res . insert ( fragment) ;
101
+ } else {
102
+ // for reasonably small hunks, generating a ChangeSet from char diff can save memory
103
+ // TODO use a tokenizer (word diff?) for improved performance
104
+ let hunk_before = self . file . before [ before. start as usize ..before. end as usize ]
105
+ . iter ( )
106
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
107
+ let hunk_after = self . file . after [ after. start as usize ..after. end as usize ]
108
+ . iter ( )
109
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
110
+ self . current_hunk . update_before ( hunk_before) ;
111
+ self . current_hunk . update_after ( hunk_after) ;
112
+
113
+ // the histogram heuristic does not work as well
114
+ // for characters because the same characters often reoccur
115
+ // use myer diff instead
116
+ imara_diff:: diff (
117
+ Algorithm :: Myers ,
118
+ & self . current_hunk ,
119
+ CharChangeSetBuilder {
120
+ res : & mut self . res ,
121
+ hunk : & self . current_hunk ,
122
+ pos : 0 ,
123
+ } ,
124
+ ) ;
125
+
126
+ self . current_hunk . clear ( ) ;
127
+ }
128
+ }
129
+
130
+ fn finish ( mut self ) -> Self :: Out {
131
+ let len = self . file . before [ self . pos as usize ..]
132
+ . iter ( )
133
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
134
+ . sum ( ) ;
135
+
136
+ self . res . retain ( len) ;
137
+ self . res
138
+ }
139
+ }
140
+
141
+ struct RopeLines < ' a > ( RopeSlice < ' a > ) ;
142
+
143
+ impl < ' a > imara_diff:: intern:: TokenSource for RopeLines < ' a > {
144
+ type Token = RopeSlice < ' a > ;
145
+ // TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
146
+ type Tokenizer = ropey:: iter:: Lines < ' a > ;
147
+
148
+ fn tokenize ( & self ) -> Self :: Tokenizer {
149
+ self . 0 . lines ( )
150
+ }
151
+
152
+ fn estimate_tokens ( & self ) -> u32 {
153
+ // we can provide a perfect estimate which is very nice for performance
154
+ self . 0 . len_lines ( ) as u32
155
+ }
156
+ }
157
+
158
+ /// Compares `old` and `new` to generate a [`Transaction`] describing
159
+ /// the steps required to get from `old` to `new`.
160
+ pub fn compare_ropes ( before : & Rope , after : & Rope ) -> Transaction {
161
+ let start = Instant :: now ( ) ;
162
+ let res = ChangeSet :: with_capacity ( 32 ) ;
163
+ let after = after. slice ( ..) ;
164
+ let file = InternedInput :: new ( RopeLines ( before. slice ( ..) ) , RopeLines ( after) ) ;
165
+ let builder = LineChangeSetBuilder {
166
+ res,
167
+ file : & file,
168
+ after,
169
+ pos : 0 ,
170
+ current_hunk : InternedInput :: default ( ) ,
171
+ } ;
172
+
173
+ let res = imara_diff:: diff ( Algorithm :: Histogram , & file, builder) . into ( ) ;
174
+
175
+ log:: debug!(
176
+ "rope diff took {}s" ,
177
+ Instant :: now( ) . duration_since( start) . as_secs_f64( )
178
+ ) ;
179
+ res
50
180
}
51
181
52
182
#[ cfg( test) ]
53
183
mod tests {
54
184
use super :: * ;
55
185
186
+ fn test_identity ( a : & str , b : & str ) {
187
+ let mut old = Rope :: from ( a) ;
188
+ let new = Rope :: from ( b) ;
189
+ compare_ropes ( & old, & new) . apply ( & mut old) ;
190
+ assert_eq ! ( old, new) ;
191
+ }
192
+
56
193
quickcheck:: quickcheck! {
57
194
fn test_compare_ropes( a: String , b: String ) -> bool {
58
195
let mut old = Rope :: from( a) ;
@@ -61,4 +198,25 @@ mod tests {
61
198
old == new
62
199
}
63
200
}
201
+
202
+ #[ test]
203
+ fn equal_files ( ) {
204
+ test_identity ( "foo" , "foo" ) ;
205
+ }
206
+
207
+ #[ test]
208
+ fn trailing_newline ( ) {
209
+ test_identity ( "foo\n " , "foo" ) ;
210
+ test_identity ( "foo" , "foo\n " ) ;
211
+ }
212
+
213
+ #[ test]
214
+ fn new_file ( ) {
215
+ test_identity ( "" , "foo" ) ;
216
+ }
217
+
218
+ #[ test]
219
+ fn deleted_file ( ) {
220
+ test_identity ( "foo" , "" ) ;
221
+ }
64
222
}
0 commit comments