@@ -27,181 +27,172 @@ enum State {
27
27
DquoteEscaped ,
28
28
}
29
29
30
- /// Get the vec of escaped / quoted / doublequoted filenames from the input str
31
- pub fn shellwords ( input : & str ) -> Vec < Cow < ' _ , str > > {
32
- use State :: * ;
30
+ pub struct Shellwords < ' a > {
31
+ state : State ,
32
+ /// Shellwords where whitespace and escapes has been resolved.
33
+ words : Vec < Cow < ' a , str > > ,
34
+ /// The parts of the input that are divided into shellwords. This can be
35
+ /// used to retrieve the original text for a given word by looking up the
36
+ /// same index in the Vec as the word in `words`.
37
+ parts : Vec < & ' a str > ,
38
+ }
33
39
34
- let mut state = Unquoted ;
35
- let mut args : Vec < Cow < str > > = Vec :: new ( ) ;
36
- let mut escaped = String :: with_capacity ( input . len ( ) ) ;
40
+ impl < ' a > From < & ' a str > for Shellwords < ' a > {
41
+ fn from ( input : & ' a str ) -> Self {
42
+ use State :: * ;
37
43
38
- let mut start = 0 ;
39
- let mut end = 0 ;
44
+ let mut state = Unquoted ;
45
+ let mut words = Vec :: new ( ) ;
46
+ let mut parts = Vec :: new ( ) ;
47
+ let mut escaped = String :: with_capacity ( input. len ( ) ) ;
40
48
41
- for ( i, c) in input. char_indices ( ) {
42
- state = match state {
43
- OnWhitespace => match c {
44
- '"' => {
45
- end = i;
46
- Dquoted
47
- }
48
- '\'' => {
49
- end = i;
50
- Quoted
51
- }
52
- '\\' => {
53
- if cfg ! ( unix) {
54
- escaped. push_str ( & input[ start..i] ) ;
55
- start = i + 1 ;
56
- UnquotedEscaped
57
- } else {
49
+ let mut part_start = 0 ;
50
+ let mut unescaped_start = 0 ;
51
+ let mut end = 0 ;
52
+
53
+ for ( i, c) in input. char_indices ( ) {
54
+ state = match state {
55
+ OnWhitespace => match c {
56
+ '"' => {
57
+ end = i;
58
+ Dquoted
59
+ }
60
+ '\'' => {
61
+ end = i;
62
+ Quoted
63
+ }
64
+ '\\' => {
65
+ if cfg ! ( unix) {
66
+ escaped. push_str ( & input[ unescaped_start..i] ) ;
67
+ unescaped_start = i + 1 ;
68
+ UnquotedEscaped
69
+ } else {
70
+ OnWhitespace
71
+ }
72
+ }
73
+ c if c. is_ascii_whitespace ( ) => {
74
+ end = i;
58
75
OnWhitespace
59
76
}
60
- }
61
- c if c. is_ascii_whitespace ( ) => {
62
- end = i;
63
- OnWhitespace
64
- }
65
- _ => Unquoted ,
66
- } ,
67
- Unquoted => match c {
68
- '\\' => {
69
- if cfg ! ( unix) {
70
- escaped. push_str ( & input[ start..i] ) ;
71
- start = i + 1 ;
72
- UnquotedEscaped
73
- } else {
74
- Unquoted
77
+ _ => Unquoted ,
78
+ } ,
79
+ Unquoted => match c {
80
+ '\\' => {
81
+ if cfg ! ( unix) {
82
+ escaped. push_str ( & input[ unescaped_start..i] ) ;
83
+ unescaped_start = i + 1 ;
84
+ UnquotedEscaped
85
+ } else {
86
+ Unquoted
87
+ }
75
88
}
76
- }
77
- c if c. is_ascii_whitespace ( ) => {
78
- end = i;
79
- OnWhitespace
80
- }
81
- _ => Unquoted ,
82
- } ,
83
- UnquotedEscaped => Unquoted ,
84
- Quoted => match c {
85
- '\\' => {
86
- if cfg ! ( unix) {
87
- escaped. push_str ( & input[ start..i] ) ;
88
- start = i + 1 ;
89
- QuoteEscaped
90
- } else {
91
- Quoted
89
+ c if c. is_ascii_whitespace ( ) => {
90
+ end = i;
91
+ OnWhitespace
92
92
}
93
- }
94
- '\'' => {
95
- end = i;
96
- OnWhitespace
97
- }
98
- _ => Quoted ,
99
- } ,
100
- QuoteEscaped => Quoted ,
101
- Dquoted => match c {
102
- '\\' => {
103
- if cfg ! ( unix) {
104
- escaped. push_str ( & input[ start..i] ) ;
105
- start = i + 1 ;
106
- DquoteEscaped
107
- } else {
108
- Dquoted
93
+ _ => Unquoted ,
94
+ } ,
95
+ UnquotedEscaped => Unquoted ,
96
+ Quoted => match c {
97
+ '\\' => {
98
+ if cfg ! ( unix) {
99
+ escaped. push_str ( & input[ unescaped_start..i] ) ;
100
+ unescaped_start = i + 1 ;
101
+ QuoteEscaped
102
+ } else {
103
+ Quoted
104
+ }
109
105
}
110
- }
111
- '"' => {
112
- end = i;
113
- OnWhitespace
114
- }
115
- _ => Dquoted ,
116
- } ,
117
- DquoteEscaped => Dquoted ,
118
- } ;
106
+ '\'' => {
107
+ end = i;
108
+ OnWhitespace
109
+ }
110
+ _ => Quoted ,
111
+ } ,
112
+ QuoteEscaped => Quoted ,
113
+ Dquoted => match c {
114
+ '\\' => {
115
+ if cfg ! ( unix) {
116
+ escaped. push_str ( & input[ unescaped_start..i] ) ;
117
+ unescaped_start = i + 1 ;
118
+ DquoteEscaped
119
+ } else {
120
+ Dquoted
121
+ }
122
+ }
123
+ '"' => {
124
+ end = i;
125
+ OnWhitespace
126
+ }
127
+ _ => Dquoted ,
128
+ } ,
129
+ DquoteEscaped => Dquoted ,
130
+ } ;
119
131
120
- if i >= input. len ( ) - 1 && end == 0 {
121
- end = i + 1 ;
122
- }
132
+ if i >= input. len ( ) - 1 && end == 0 {
133
+ end = i + 1 ;
134
+ }
123
135
124
- if end > 0 {
125
- let esc_trim = escaped. trim ( ) ;
126
- let inp = & input[ start ..end] ;
136
+ if end > 0 {
137
+ let esc_trim = escaped. trim ( ) ;
138
+ let inp = & input[ unescaped_start ..end] ;
127
139
128
- if !( esc_trim. is_empty ( ) && inp. trim ( ) . is_empty ( ) ) {
129
- if esc_trim. is_empty ( ) {
130
- args. push ( inp. into ( ) ) ;
131
- } else {
132
- args. push ( [ escaped, inp. into ( ) ] . concat ( ) . into ( ) ) ;
133
- escaped = "" . to_string ( ) ;
140
+ if !( esc_trim. is_empty ( ) && inp. trim ( ) . is_empty ( ) ) {
141
+ if esc_trim. is_empty ( ) {
142
+ words. push ( inp. into ( ) ) ;
143
+ parts. push ( inp) ;
144
+ } else {
145
+ words. push ( [ escaped, inp. into ( ) ] . concat ( ) . into ( ) ) ;
146
+ parts. push ( & input[ part_start..end] ) ;
147
+ escaped = "" . to_string ( ) ;
148
+ }
134
149
}
150
+ unescaped_start = i + 1 ;
151
+ part_start = i + 1 ;
152
+ end = 0 ;
135
153
}
136
- start = i + 1 ;
137
- end = 0 ;
138
154
}
139
- }
140
- args
141
- }
142
155
143
- /// Checks that the input ends with an ascii whitespace character which is
144
- /// not escaped.
145
- ///
146
- /// # Examples
147
- ///
148
- /// ```rust
149
- /// use helix_core::shellwords::ends_with_whitespace;
150
- /// assert_eq!(ends_with_whitespace(" "), true);
151
- /// assert_eq!(ends_with_whitespace(":open "), true);
152
- /// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
153
- /// assert_eq!(ends_with_whitespace(":open"), false);
154
- /// #[cfg(unix)]
155
- /// assert_eq!(ends_with_whitespace(":open a\\ "), false);
156
- /// #[cfg(unix)]
157
- /// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
158
- /// ```
159
- pub fn ends_with_whitespace ( input : & str ) -> bool {
160
- use State :: * ;
156
+ debug_assert ! ( words. len( ) == parts. len( ) ) ;
161
157
162
- // Fast-lane: the input must end with a whitespace character
163
- // regardless of quoting.
164
- if !input. ends_with ( |c : char | c. is_ascii_whitespace ( ) ) {
165
- return false ;
158
+ Self {
159
+ state,
160
+ words,
161
+ parts,
162
+ }
166
163
}
164
+ }
167
165
168
- let mut state = Unquoted ;
166
+ impl < ' a > Shellwords < ' a > {
167
+ /// Checks that the input ends with a whitespace character which is not escaped.
168
+ ///
169
+ /// # Examples
170
+ ///
171
+ /// ```rust
172
+ /// use helix_core::shellwords::Shellwords;
173
+ /// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
174
+ /// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
175
+ /// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
176
+ /// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
177
+ /// #[cfg(unix)]
178
+ /// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
179
+ /// #[cfg(unix)]
180
+ /// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
181
+ /// ```
182
+ pub fn ends_with_whitespace ( & self ) -> bool {
183
+ matches ! ( self . state, State :: OnWhitespace )
184
+ }
169
185
170
- for c in input. chars ( ) {
171
- state = match state {
172
- OnWhitespace => match c {
173
- '"' => Dquoted ,
174
- '\'' => Quoted ,
175
- '\\' if cfg ! ( unix) => UnquotedEscaped ,
176
- '\\' => OnWhitespace ,
177
- c if c. is_ascii_whitespace ( ) => OnWhitespace ,
178
- _ => Unquoted ,
179
- } ,
180
- Unquoted => match c {
181
- '\\' if cfg ! ( unix) => UnquotedEscaped ,
182
- '\\' => Unquoted ,
183
- c if c. is_ascii_whitespace ( ) => OnWhitespace ,
184
- _ => Unquoted ,
185
- } ,
186
- UnquotedEscaped => Unquoted ,
187
- Quoted => match c {
188
- '\\' if cfg ! ( unix) => QuoteEscaped ,
189
- '\\' => Quoted ,
190
- '\'' => OnWhitespace ,
191
- _ => Quoted ,
192
- } ,
193
- QuoteEscaped => Quoted ,
194
- Dquoted => match c {
195
- '\\' if cfg ! ( unix) => DquoteEscaped ,
196
- '\\' => Dquoted ,
197
- '"' => OnWhitespace ,
198
- _ => Dquoted ,
199
- } ,
200
- DquoteEscaped => Dquoted ,
201
- }
186
+ /// Returns the list of shellwords calculated from the input string.
187
+ pub fn words ( & self ) -> & [ Cow < ' a , str > ] {
188
+ & self . words
202
189
}
203
190
204
- matches ! ( state, OnWhitespace )
191
+ /// Returns a list of strings which correspond to [words] but represent the original
192
+ /// text in the input string - including escape characters - without separating whitespace.
193
+ pub fn parts ( & self ) -> & [ & ' a str ] {
194
+ & self . parts
195
+ }
205
196
}
206
197
207
198
#[ cfg( test) ]
@@ -212,7 +203,8 @@ mod test {
212
203
#[ cfg( windows) ]
213
204
fn test_normal ( ) {
214
205
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
215
- let result = shellwords ( input) ;
206
+ let shellwords = Shellwords :: from ( input) ;
207
+ let result = shellwords. words ( ) . to_vec ( ) ;
216
208
let expected = vec ! [
217
209
Cow :: from( ":o" ) ,
218
210
Cow :: from( "single_word" ) ,
@@ -230,7 +222,8 @@ mod test {
230
222
#[ cfg( unix) ]
231
223
fn test_normal ( ) {
232
224
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"# ;
233
- let result = shellwords ( input) ;
225
+ let shellwords = Shellwords :: from ( input) ;
226
+ let result = shellwords. words ( ) . to_vec ( ) ;
234
227
let expected = vec ! [
235
228
Cow :: from( ":o" ) ,
236
229
Cow :: from( "single_word" ) ,
@@ -247,7 +240,8 @@ mod test {
247
240
fn test_quoted ( ) {
248
241
let quoted =
249
242
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"# ;
250
- let result = shellwords ( quoted) ;
243
+ let shellwords = Shellwords :: from ( quoted) ;
244
+ let result = shellwords. words ( ) . to_vec ( ) ;
251
245
let expected = vec ! [
252
246
Cow :: from( ":o" ) ,
253
247
Cow :: from( "single_word" ) ,
@@ -262,7 +256,8 @@ mod test {
262
256
#[ cfg( unix) ]
263
257
fn test_dquoted ( ) {
264
258
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"# ;
265
- let result = shellwords ( dquoted) ;
259
+ let shellwords = Shellwords :: from ( dquoted) ;
260
+ let result = shellwords. words ( ) . to_vec ( ) ;
266
261
let expected = vec ! [
267
262
Cow :: from( ":o" ) ,
268
263
Cow :: from( "single_word" ) ,
@@ -277,7 +272,8 @@ mod test {
277
272
#[ cfg( unix) ]
278
273
fn test_mixed ( ) {
279
274
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"# ;
280
- let result = shellwords ( dquoted) ;
275
+ let shellwords = Shellwords :: from ( dquoted) ;
276
+ let result = shellwords. words ( ) . to_vec ( ) ;
281
277
let expected = vec ! [
282
278
Cow :: from( ":o" ) ,
283
279
Cow :: from( "single_word" ) ,
@@ -298,7 +294,8 @@ mod test {
298
294
fn test_lists ( ) {
299
295
let input =
300
296
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"# ;
301
- let result = shellwords ( input) ;
297
+ let shellwords = Shellwords :: from ( input) ;
298
+ let result = shellwords. words ( ) . to_vec ( ) ;
302
299
let expected = vec ! [
303
300
Cow :: from( ":set" ) ,
304
301
Cow :: from( "statusline.center" ) ,
@@ -322,4 +319,10 @@ mod test {
322
319
assert_eq ! ( escape( "foobar" . into( ) ) , Cow :: Borrowed ( "foobar" ) ) ;
323
320
assert_eq ! ( escape( "foo bar" . into( ) ) , Cow :: Borrowed ( "\" foo bar\" " ) ) ;
324
321
}
322
+
323
+ #[ test]
324
+ fn test_parts ( ) {
325
+ assert_eq ! ( Shellwords :: from( ":o a" ) . parts( ) , & [ ":o" , "a" ] ) ;
326
+ assert_eq ! ( Shellwords :: from( ":o a\\ " ) . parts( ) , & [ ":o" , "a\\ " ] ) ;
327
+ }
325
328
}
0 commit comments