1
- use crate :: { Rope , Transaction } ;
1
+ use std :: ops :: Range ;
2
2
3
- /// Compares `old` and `new` to generate a [`Transaction`] describing
4
- /// the steps required to get from `old` to `new`.
5
- pub fn compare_ropes ( old : & Rope , new : & Rope ) -> Transaction {
6
- // `similar` only works on contiguous data, so a `Rope` has
7
- // to be temporarily converted into a `String`.
8
- let old_converted = old. to_string ( ) ;
9
- let new_converted = new. to_string ( ) ;
10
-
11
- // A timeout is set so after 1 seconds, the algorithm will start
12
- // approximating. This is especially important for big `Rope`s or
13
- // `Rope`s that are extremely dissimilar to each other.
14
- let mut config = similar:: TextDiff :: configure ( ) ;
15
- config. timeout ( std:: time:: Duration :: from_secs ( 1 ) ) ;
16
-
17
- let diff = config. diff_chars ( & old_converted, & new_converted) ;
18
-
19
- // The current position of the change needs to be tracked to
20
- // construct the `Change`s.
21
- let mut pos = 0 ;
22
- Transaction :: change (
23
- old,
24
- diff. ops ( )
3
+ use imara_diff:: intern:: InternedInput ;
4
+ use imara_diff:: Algorithm ;
5
+ use ropey:: RopeSlice ;
6
+
7
+ use crate :: { ChangeSet , Rope , Tendril , Transaction } ;
8
+
9
+ /// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
10
+ struct CharChangeSetBuilder < ' a > {
11
+ res : & ' a mut ChangeSet ,
12
+ hunk : & ' a InternedInput < char > ,
13
+ pos : u32 ,
14
+ }
15
+
16
+ impl imara_diff:: Sink for CharChangeSetBuilder < ' _ > {
17
+ type Out = ( ) ;
18
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
19
+ self . res . retain ( ( before. start - self . pos ) as usize ) ;
20
+ self . res . delete ( before. len ( ) ) ;
21
+ self . pos = before. end ;
22
+
23
+ let res = self . hunk . after [ after. start as usize ..after. end as usize ]
24
+ . iter ( )
25
+ . map ( |& token| self . hunk . interner [ token] )
26
+ . collect ( ) ;
27
+
28
+ self . res . insert ( res) ;
29
+ }
30
+
31
+ fn finish ( self ) -> Self :: Out {
32
+ self . res . retain ( self . hunk . before . len ( ) - self . pos as usize ) ;
33
+ }
34
+ }
35
+
36
+ struct LineChangeSetBuilder < ' a > {
37
+ res : ChangeSet ,
38
+ after : RopeSlice < ' a > ,
39
+ file : & ' a InternedInput < RopeSlice < ' a > > ,
40
+ current_hunk : InternedInput < char > ,
41
+ pos : u32 ,
42
+ }
43
+
44
+ impl imara_diff:: Sink for LineChangeSetBuilder < ' _ > {
45
+ type Out = ChangeSet ;
46
+
47
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
48
+ use std:: fmt:: Write ;
49
+
50
+ let len = self . file . before [ self . pos as usize ..before. start as usize ]
25
51
. iter ( )
26
- . map ( |op| op. as_tag_tuple ( ) )
27
- . filter_map ( |( tag, old_range, new_range) | {
28
- // `old_pos..pos` is equivalent to `start..end` for where
29
- // the change should be applied.
30
- let old_pos = pos;
31
- pos += old_range. end - old_range. start ;
32
-
33
- match tag {
34
- // Semantically, inserts and replacements are the same thing.
35
- similar:: DiffTag :: Insert | similar:: DiffTag :: Replace => {
36
- // This is the text from the `new` rope that should be
37
- // inserted into `old`.
38
- let text: & str = {
39
- let start = new. char_to_byte ( new_range. start ) ;
40
- let end = new. char_to_byte ( new_range. end ) ;
41
- & new_converted[ start..end]
42
- } ;
43
- Some ( ( old_pos, pos, Some ( text. into ( ) ) ) )
52
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
53
+ . sum ( ) ;
54
+ self . res . retain ( len) ;
55
+ self . pos = before. end ;
56
+
57
+ // do not perform diffs on large hunks
58
+ let len_before = before. end - before. start ;
59
+ let len_after = after. end - after. start ;
60
+
61
+ // Pure insertions/removals do not require a character diff.
62
+ // Very large changes are ignored because their character diff is expensive to compute
63
+ // TODO adjust heuristic to detect large changes?
64
+ if len_before == 0
65
+ || len_after == 0
66
+ || len_after > 5 * len_before
67
+ || 5 * len_after < len_before && len_before > 10
68
+ || len_before + len_after > 200
69
+ {
70
+ let remove = self . file . before [ before. start as usize ..before. end as usize ]
71
+ . iter ( )
72
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
73
+ . sum ( ) ;
74
+ self . res . delete ( remove) ;
75
+ let mut fragment = Tendril :: new ( ) ;
76
+ if len_after > 500 {
77
+ // copying a rope line by line is slower then copying the entire
78
+ // rope. Use to_string for very large changes instead..
79
+ if self . file . after . len ( ) == after. end as usize {
80
+ if after. start == 0 {
81
+ fragment = self . after . to_string ( ) . into ( ) ;
82
+ } else {
83
+ let start = self . after . line_to_char ( after. start as usize ) ;
84
+ fragment = self . after . slice ( start..) . to_string ( ) . into ( ) ;
44
85
}
45
- similar:: DiffTag :: Delete => Some ( ( old_pos, pos, None ) ) ,
46
- similar:: DiffTag :: Equal => None ,
86
+ } else if after. start == 0 {
87
+ let end = self . after . line_to_char ( after. end as usize ) ;
88
+ fragment = self . after . slice ( ..end) . to_string ( ) . into ( ) ;
89
+ } else {
90
+ let start = self . after . line_to_char ( after. start as usize ) ;
91
+ let end = self . after . line_to_char ( after. end as usize ) ;
92
+ fragment = self . after . slice ( start..end) . to_string ( ) . into ( ) ;
47
93
}
48
- } ) ,
49
- )
94
+ } else {
95
+ for & line in & self . file . after [ after. start as usize ..after. end as usize ] {
96
+ for chunk in self . file . interner [ line] . chunks ( ) {
97
+ fragment. push_str ( chunk)
98
+ }
99
+ }
100
+ } ;
101
+ self . res . insert ( fragment) ;
102
+ } else {
103
+ // for reasonably small hunks, generating a ChangeSet from char diff can save memory
104
+ // TODO use a tokenizer (word diff?) for improved performance
105
+ let hunk_before = self . file . before [ before. start as usize ..before. end as usize ]
106
+ . iter ( )
107
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
108
+ let hunk_after = self . file . after [ after. start as usize ..after. end as usize ]
109
+ . iter ( )
110
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
111
+ self . current_hunk . update_before ( hunk_before) ;
112
+ self . current_hunk . update_after ( hunk_after) ;
113
+
114
+ // the histogram heuristic does not work as well
115
+ // for characters because the same characters often reoccur
116
+ // use myer diff instead
117
+ imara_diff:: diff (
118
+ Algorithm :: Myers ,
119
+ & self . current_hunk ,
120
+ CharChangeSetBuilder {
121
+ res : & mut self . res ,
122
+ hunk : & self . current_hunk ,
123
+ pos : 0 ,
124
+ } ,
125
+ ) ;
126
+
127
+ self . current_hunk . clear ( ) ;
128
+ }
129
+ }
130
+
131
+ fn finish ( mut self ) -> Self :: Out {
132
+ let len = self . file . before [ self . pos as usize ..]
133
+ . iter ( )
134
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
135
+ . sum ( ) ;
136
+
137
+ self . res . retain ( len) ;
138
+ self . res
139
+ }
140
+ }
141
+
142
+ struct RopeLines < ' a > ( RopeSlice < ' a > ) ;
143
+
144
+ impl < ' a > imara_diff:: intern:: TokenSource for RopeLines < ' a > {
145
+ type Token = RopeSlice < ' a > ;
146
+ // TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
147
+ type Tokenizer = ropey:: iter:: Lines < ' a > ;
148
+
149
+ fn tokenize ( & self ) -> Self :: Tokenizer {
150
+ self . 0 . lines ( )
151
+ }
152
+
153
+ fn estimate_tokens ( & self ) -> u32 {
154
+ // we can provide a perfect estimate which is very nice for performance
155
+ self . 0 . len_lines ( ) as u32
156
+ }
157
+ }
158
+
159
+ /// Compares `old` and `new` to generate a [`Transaction`] describing
160
+ /// the steps required to get from `old` to `new`.
161
+ pub fn compare_ropes ( before : & Rope , after : & Rope ) -> Transaction {
162
+ log:: error!( "diff" ) ;
163
+ let res = ChangeSet :: with_capacity ( 32 ) ;
164
+ let after = after. slice ( ..) ;
165
+ let file = InternedInput :: new ( RopeLines ( before. slice ( ..) ) , RopeLines ( after) ) ;
166
+ let builder = LineChangeSetBuilder {
167
+ res,
168
+ file : & file,
169
+ after,
170
+ pos : 0 ,
171
+ current_hunk : InternedInput :: default ( ) ,
172
+ } ;
173
+
174
+ imara_diff:: diff ( Algorithm :: Histogram , & file, builder) . into ( )
50
175
}
51
176
52
177
#[ cfg( test) ]
53
178
mod tests {
54
179
use super :: * ;
55
180
181
+ fn test_identity ( a : & str , b : & str ) {
182
+ let mut old = Rope :: from ( a) ;
183
+ let new = Rope :: from ( b) ;
184
+ compare_ropes ( & old, & new) . apply ( & mut old) ;
185
+ assert_eq ! ( old, new)
186
+ }
187
+
56
188
quickcheck:: quickcheck! {
57
189
fn test_compare_ropes( a: String , b: String ) -> bool {
58
190
let mut old = Rope :: from( a) ;
@@ -61,4 +193,25 @@ mod tests {
61
193
old == new
62
194
}
63
195
}
196
+
197
+ #[ test]
198
+ fn equal_files ( ) {
199
+ test_identity ( "foo" , "foo" )
200
+ }
201
+
202
+ #[ test]
203
+ fn trailing_newline ( ) {
204
+ test_identity ( "foo\n " , "foo" ) ;
205
+ test_identity ( "foo" , "foo\n " )
206
+ }
207
+
208
+ #[ test]
209
+ fn new_file ( ) {
210
+ test_identity ( "" , "foo" )
211
+ }
212
+
213
+ #[ test]
214
+ fn deleted_file ( ) {
215
+ test_identity ( "foo" , "" )
216
+ }
64
217
}
0 commit comments