1
- use crate :: { Rope , Transaction } ;
1
+ use std :: ops :: Range ;
2
2
3
- /// Compares `old` and `new` to generate a [`Transaction`] describing
4
- /// the steps required to get from `old` to `new`.
5
- pub fn compare_ropes ( old : & Rope , new : & Rope ) -> Transaction {
6
- // `similar` only works on contiguous data, so a `Rope` has
7
- // to be temporarily converted into a `String`.
8
- let old_converted = old. to_string ( ) ;
9
- let new_converted = new. to_string ( ) ;
10
-
11
- // A timeout is set so after 1 seconds, the algorithm will start
12
- // approximating. This is especially important for big `Rope`s or
13
- // `Rope`s that are extremely dissimilar to each other.
14
- let mut config = similar:: TextDiff :: configure ( ) ;
15
- config. timeout ( std:: time:: Duration :: from_secs ( 1 ) ) ;
16
-
17
- let diff = config. diff_chars ( & old_converted, & new_converted) ;
18
-
19
- // The current position of the change needs to be tracked to
20
- // construct the `Change`s.
21
- let mut pos = 0 ;
22
- Transaction :: change (
23
- old,
24
- diff. ops ( )
3
+ use imara_diff:: intern:: InternedInput ;
4
+ use imara_diff:: Algorithm ;
5
+ use ropey:: RopeSlice ;
6
+
7
+ use crate :: { ChangeSet , Rope , Tendril , Transaction } ;
8
+
9
+ /// A `imara_diff::Sink` that builds a `ChangeSet` for a character diff of a hunk
10
+ struct CharChangeSetBuilder < ' a > {
11
+ res : & ' a mut ChangeSet ,
12
+ hunk : & ' a InternedInput < char > ,
13
+ pos : u32 ,
14
+ }
15
+
16
+ impl imara_diff:: Sink for CharChangeSetBuilder < ' _ > {
17
+ type Out = ( ) ;
18
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
19
+ self . res . retain ( ( before. start - self . pos ) as usize ) ;
20
+ self . res . delete ( before. len ( ) ) ;
21
+ self . pos = before. end ;
22
+
23
+ let res = self . hunk . after [ after. start as usize ..after. end as usize ]
24
+ . iter ( )
25
+ . map ( |& token| self . hunk . interner [ token] )
26
+ . collect ( ) ;
27
+
28
+ self . res . insert ( res) ;
29
+ }
30
+
31
+ fn finish ( self ) -> Self :: Out {
32
+ self . res . retain ( self . hunk . before . len ( ) - self . pos as usize ) ;
33
+ }
34
+ }
35
+
36
+ struct LineChangeSetBuilder < ' a > {
37
+ res : ChangeSet ,
38
+ after : RopeSlice < ' a > ,
39
+ file : & ' a InternedInput < RopeSlice < ' a > > ,
40
+ current_hunk : InternedInput < char > ,
41
+ pos : u32 ,
42
+ }
43
+
44
+ impl imara_diff:: Sink for LineChangeSetBuilder < ' _ > {
45
+ type Out = ChangeSet ;
46
+
47
+ fn process_change ( & mut self , before : Range < u32 > , after : Range < u32 > ) {
48
+ let len = self . file . before [ self . pos as usize ..before. start as usize ]
25
49
. iter ( )
26
- . map ( |op| op. as_tag_tuple ( ) )
27
- . filter_map ( |( tag, old_range, new_range) | {
28
- // `old_pos..pos` is equivalent to `start..end` for where
29
- // the change should be applied.
30
- let old_pos = pos;
31
- pos += old_range. end - old_range. start ;
32
-
33
- match tag {
34
- // Semantically, inserts and replacements are the same thing.
35
- similar:: DiffTag :: Insert | similar:: DiffTag :: Replace => {
36
- // This is the text from the `new` rope that should be
37
- // inserted into `old`.
38
- let text: & str = {
39
- let start = new. char_to_byte ( new_range. start ) ;
40
- let end = new. char_to_byte ( new_range. end ) ;
41
- & new_converted[ start..end]
42
- } ;
43
- Some ( ( old_pos, pos, Some ( text. into ( ) ) ) )
50
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
51
+ . sum ( ) ;
52
+ self . res . retain ( len) ;
53
+ self . pos = before. end ;
54
+
55
+ // do not perform diffs on large hunks
56
+ let len_before = before. end - before. start ;
57
+ let len_after = after. end - after. start ;
58
+
59
+ // Pure insertions/removals do not require a character diff.
60
+ // Very large changes are ignored because their character diff is expensive to compute
61
+ // TODO adjust heuristic to detect large changes?
62
+ if len_before == 0
63
+ || len_after == 0
64
+ || len_after > 5 * len_before
65
+ || 5 * len_after < len_before && len_before > 10
66
+ || len_before + len_after > 200
67
+ {
68
+ let remove = self . file . before [ before. start as usize ..before. end as usize ]
69
+ . iter ( )
70
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
71
+ . sum ( ) ;
72
+ self . res . delete ( remove) ;
73
+ let mut fragment = Tendril :: new ( ) ;
74
+ if len_after > 500 {
75
+ // copying a rope line by line is slower then copying the entire
76
+ // rope. Use to_string for very large changes instead..
77
+ if self . file . after . len ( ) == after. end as usize {
78
+ if after. start == 0 {
79
+ fragment = self . after . to_string ( ) . into ( ) ;
80
+ } else {
81
+ let start = self . after . line_to_char ( after. start as usize ) ;
82
+ fragment = self . after . slice ( start..) . to_string ( ) . into ( ) ;
44
83
}
45
- similar:: DiffTag :: Delete => Some ( ( old_pos, pos, None ) ) ,
46
- similar:: DiffTag :: Equal => None ,
84
+ } else if after. start == 0 {
85
+ let end = self . after . line_to_char ( after. end as usize ) ;
86
+ fragment = self . after . slice ( ..end) . to_string ( ) . into ( ) ;
87
+ } else {
88
+ let start = self . after . line_to_char ( after. start as usize ) ;
89
+ let end = self . after . line_to_char ( after. end as usize ) ;
90
+ fragment = self . after . slice ( start..end) . to_string ( ) . into ( ) ;
47
91
}
48
- } ) ,
49
- )
92
+ } else {
93
+ for & line in & self . file . after [ after. start as usize ..after. end as usize ] {
94
+ for chunk in self . file . interner [ line] . chunks ( ) {
95
+ fragment. push_str ( chunk)
96
+ }
97
+ }
98
+ } ;
99
+ self . res . insert ( fragment) ;
100
+ } else {
101
+ // for reasonably small hunks, generating a ChangeSet from char diff can save memory
102
+ // TODO use a tokenizer (word diff?) for improved performance
103
+ let hunk_before = self . file . before [ before. start as usize ..before. end as usize ]
104
+ . iter ( )
105
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
106
+ let hunk_after = self . file . after [ after. start as usize ..after. end as usize ]
107
+ . iter ( )
108
+ . flat_map ( |& it| self . file . interner [ it] . chars ( ) ) ;
109
+ self . current_hunk . update_before ( hunk_before) ;
110
+ self . current_hunk . update_after ( hunk_after) ;
111
+
112
+ // the histogram heuristic does not work as well
113
+ // for characters because the same characters often reoccur
114
+ // use myer diff instead
115
+ imara_diff:: diff (
116
+ Algorithm :: Myers ,
117
+ & self . current_hunk ,
118
+ CharChangeSetBuilder {
119
+ res : & mut self . res ,
120
+ hunk : & self . current_hunk ,
121
+ pos : 0 ,
122
+ } ,
123
+ ) ;
124
+
125
+ self . current_hunk . clear ( ) ;
126
+ }
127
+ }
128
+
129
+ fn finish ( mut self ) -> Self :: Out {
130
+ let len = self . file . before [ self . pos as usize ..]
131
+ . iter ( )
132
+ . map ( |& it| self . file . interner [ it] . len_chars ( ) )
133
+ . sum ( ) ;
134
+
135
+ self . res . retain ( len) ;
136
+ self . res
137
+ }
138
+ }
139
+
140
+ struct RopeLines < ' a > ( RopeSlice < ' a > ) ;
141
+
142
+ impl < ' a > imara_diff:: intern:: TokenSource for RopeLines < ' a > {
143
+ type Token = RopeSlice < ' a > ;
144
+ // TODO: improve performance of lines iterator (https://github.com/cessen/ropey/issues/25)
145
+ type Tokenizer = ropey:: iter:: Lines < ' a > ;
146
+
147
+ fn tokenize ( & self ) -> Self :: Tokenizer {
148
+ self . 0 . lines ( )
149
+ }
150
+
151
+ fn estimate_tokens ( & self ) -> u32 {
152
+ // we can provide a perfect estimate which is very nice for performance
153
+ self . 0 . len_lines ( ) as u32
154
+ }
155
+ }
156
+
157
+ /// Compares `old` and `new` to generate a [`Transaction`] describing
158
+ /// the steps required to get from `old` to `new`.
159
+ pub fn compare_ropes ( before : & Rope , after : & Rope ) -> Transaction {
160
+ log:: error!( "diff" ) ;
161
+ let res = ChangeSet :: with_capacity ( 32 ) ;
162
+ let after = after. slice ( ..) ;
163
+ let file = InternedInput :: new ( RopeLines ( before. slice ( ..) ) , RopeLines ( after) ) ;
164
+ let builder = LineChangeSetBuilder {
165
+ res,
166
+ file : & file,
167
+ after,
168
+ pos : 0 ,
169
+ current_hunk : InternedInput :: default ( ) ,
170
+ } ;
171
+
172
+ imara_diff:: diff ( Algorithm :: Histogram , & file, builder) . into ( )
50
173
}
51
174
52
175
#[ cfg( test) ]
53
176
mod tests {
54
177
use super :: * ;
55
178
179
+ fn test_identity ( a : & str , b : & str ) {
180
+ let mut old = Rope :: from ( a) ;
181
+ let new = Rope :: from ( b) ;
182
+ compare_ropes ( & old, & new) . apply ( & mut old) ;
183
+ assert_eq ! ( old, new) ;
184
+ }
185
+
56
186
quickcheck:: quickcheck! {
57
187
fn test_compare_ropes( a: String , b: String ) -> bool {
58
188
let mut old = Rope :: from( a) ;
@@ -61,4 +191,25 @@ mod tests {
61
191
old == new
62
192
}
63
193
}
194
+
195
+ #[ test]
196
+ fn equal_files ( ) {
197
+ test_identity ( "foo" , "foo" ) ;
198
+ }
199
+
200
+ #[ test]
201
+ fn trailing_newline ( ) {
202
+ test_identity ( "foo\n " , "foo" ) ;
203
+ test_identity ( "foo" , "foo\n " ) ;
204
+ }
205
+
206
+ #[ test]
207
+ fn new_file ( ) {
208
+ test_identity ( "" , "foo" ) ;
209
+ }
210
+
211
+ #[ test]
212
+ fn deleted_file ( ) {
213
+ test_identity ( "foo" , "" ) ;
214
+ }
64
215
}
0 commit comments