Skip to content

Commit 7043556

Browse files
committed
Capture word parts while calculating shellwords
This fixes an edge case for completing shellwords. With a file "a b.txt" in the current directory, the sequence `:open a\<tab>` will result in the prompt containing `:open aa\ b.txt`. This is because the length of the input which is trimmed when replacing with completion is calculated on the part of the input which is parsed by shellwords and then escaped (in a separate operation), which is lossy. In this case it loses the trailing backslash. The fix provided here refactors shellwords to track both the _words_ (shellwords with quotes and escapes resolved) and the _parts_ (chunks of the input which turned into each word, with separating whitespace removed). When calculating how much of the input to delete when replacing with the completion item, we now use the length of the last part. This also allows us to eliminate the duplicate work done in the `ends_with_whitespace` check.
1 parent eddf9f0 commit 7043556

File tree

2 files changed

+179
-171
lines changed

2 files changed

+179
-171
lines changed

helix-core/src/shellwords.rs

Lines changed: 163 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -27,181 +27,172 @@ enum State {
2727
DquoteEscaped,
2828
}
2929

30-
/// Get the vec of escaped / quoted / doublequoted filenames from the input str
31-
pub fn shellwords(input: &str) -> Vec<Cow<'_, str>> {
32-
use State::*;
30+
pub struct Shellwords<'a> {
31+
state: State,
32+
/// Shellwords where whitespace and escapes has been resolved.
33+
words: Vec<Cow<'a, str>>,
34+
/// The parts of the input that are divided into shellwords. This can be
35+
/// used to retrieve the original text for a given word by looking up the
36+
/// same index in the Vec as the word in `words`.
37+
parts: Vec<&'a str>,
38+
}
3339

34-
let mut state = Unquoted;
35-
let mut args: Vec<Cow<str>> = Vec::new();
36-
let mut escaped = String::with_capacity(input.len());
40+
impl<'a> From<&'a str> for Shellwords<'a> {
41+
fn from(input: &'a str) -> Self {
42+
use State::*;
3743

38-
let mut start = 0;
39-
let mut end = 0;
44+
let mut state = Unquoted;
45+
let mut words = Vec::new();
46+
let mut parts = Vec::new();
47+
let mut escaped = String::with_capacity(input.len());
4048

41-
for (i, c) in input.char_indices() {
42-
state = match state {
43-
OnWhitespace => match c {
44-
'"' => {
45-
end = i;
46-
Dquoted
47-
}
48-
'\'' => {
49-
end = i;
50-
Quoted
51-
}
52-
'\\' => {
53-
if cfg!(unix) {
54-
escaped.push_str(&input[start..i]);
55-
start = i + 1;
56-
UnquotedEscaped
57-
} else {
49+
let mut part_start = 0;
50+
let mut unescaped_start = 0;
51+
let mut end = 0;
52+
53+
for (i, c) in input.char_indices() {
54+
state = match state {
55+
OnWhitespace => match c {
56+
'"' => {
57+
end = i;
58+
Dquoted
59+
}
60+
'\'' => {
61+
end = i;
62+
Quoted
63+
}
64+
'\\' => {
65+
if cfg!(unix) {
66+
escaped.push_str(&input[unescaped_start..i]);
67+
unescaped_start = i + 1;
68+
UnquotedEscaped
69+
} else {
70+
OnWhitespace
71+
}
72+
}
73+
c if c.is_ascii_whitespace() => {
74+
end = i;
5875
OnWhitespace
5976
}
60-
}
61-
c if c.is_ascii_whitespace() => {
62-
end = i;
63-
OnWhitespace
64-
}
65-
_ => Unquoted,
66-
},
67-
Unquoted => match c {
68-
'\\' => {
69-
if cfg!(unix) {
70-
escaped.push_str(&input[start..i]);
71-
start = i + 1;
72-
UnquotedEscaped
73-
} else {
74-
Unquoted
77+
_ => Unquoted,
78+
},
79+
Unquoted => match c {
80+
'\\' => {
81+
if cfg!(unix) {
82+
escaped.push_str(&input[unescaped_start..i]);
83+
unescaped_start = i + 1;
84+
UnquotedEscaped
85+
} else {
86+
Unquoted
87+
}
7588
}
76-
}
77-
c if c.is_ascii_whitespace() => {
78-
end = i;
79-
OnWhitespace
80-
}
81-
_ => Unquoted,
82-
},
83-
UnquotedEscaped => Unquoted,
84-
Quoted => match c {
85-
'\\' => {
86-
if cfg!(unix) {
87-
escaped.push_str(&input[start..i]);
88-
start = i + 1;
89-
QuoteEscaped
90-
} else {
91-
Quoted
89+
c if c.is_ascii_whitespace() => {
90+
end = i;
91+
OnWhitespace
9292
}
93-
}
94-
'\'' => {
95-
end = i;
96-
OnWhitespace
97-
}
98-
_ => Quoted,
99-
},
100-
QuoteEscaped => Quoted,
101-
Dquoted => match c {
102-
'\\' => {
103-
if cfg!(unix) {
104-
escaped.push_str(&input[start..i]);
105-
start = i + 1;
106-
DquoteEscaped
107-
} else {
108-
Dquoted
93+
_ => Unquoted,
94+
},
95+
UnquotedEscaped => Unquoted,
96+
Quoted => match c {
97+
'\\' => {
98+
if cfg!(unix) {
99+
escaped.push_str(&input[unescaped_start..i]);
100+
unescaped_start = i + 1;
101+
QuoteEscaped
102+
} else {
103+
Quoted
104+
}
109105
}
110-
}
111-
'"' => {
112-
end = i;
113-
OnWhitespace
114-
}
115-
_ => Dquoted,
116-
},
117-
DquoteEscaped => Dquoted,
118-
};
106+
'\'' => {
107+
end = i;
108+
OnWhitespace
109+
}
110+
_ => Quoted,
111+
},
112+
QuoteEscaped => Quoted,
113+
Dquoted => match c {
114+
'\\' => {
115+
if cfg!(unix) {
116+
escaped.push_str(&input[unescaped_start..i]);
117+
unescaped_start = i + 1;
118+
DquoteEscaped
119+
} else {
120+
Dquoted
121+
}
122+
}
123+
'"' => {
124+
end = i;
125+
OnWhitespace
126+
}
127+
_ => Dquoted,
128+
},
129+
DquoteEscaped => Dquoted,
130+
};
119131

120-
if i >= input.len() - 1 && end == 0 {
121-
end = i + 1;
122-
}
132+
if i >= input.len() - 1 && end == 0 {
133+
end = i + 1;
134+
}
123135

124-
if end > 0 {
125-
let esc_trim = escaped.trim();
126-
let inp = &input[start..end];
136+
if end > 0 {
137+
let esc_trim = escaped.trim();
138+
let inp = &input[unescaped_start..end];
127139

128-
if !(esc_trim.is_empty() && inp.trim().is_empty()) {
129-
if esc_trim.is_empty() {
130-
args.push(inp.into());
131-
} else {
132-
args.push([escaped, inp.into()].concat().into());
133-
escaped = "".to_string();
140+
if !(esc_trim.is_empty() && inp.trim().is_empty()) {
141+
if esc_trim.is_empty() {
142+
words.push(inp.into());
143+
parts.push(inp);
144+
} else {
145+
words.push([escaped, inp.into()].concat().into());
146+
parts.push(&input[part_start..end]);
147+
escaped = "".to_string();
148+
}
134149
}
150+
unescaped_start = i + 1;
151+
part_start = i + 1;
152+
end = 0;
135153
}
136-
start = i + 1;
137-
end = 0;
138154
}
139-
}
140-
args
141-
}
142155

143-
/// Checks that the input ends with an ascii whitespace character which is
144-
/// not escaped.
145-
///
146-
/// # Examples
147-
///
148-
/// ```rust
149-
/// use helix_core::shellwords::ends_with_whitespace;
150-
/// assert_eq!(ends_with_whitespace(" "), true);
151-
/// assert_eq!(ends_with_whitespace(":open "), true);
152-
/// assert_eq!(ends_with_whitespace(":open foo.txt "), true);
153-
/// assert_eq!(ends_with_whitespace(":open"), false);
154-
/// #[cfg(unix)]
155-
/// assert_eq!(ends_with_whitespace(":open a\\ "), false);
156-
/// #[cfg(unix)]
157-
/// assert_eq!(ends_with_whitespace(":open a\\ b.txt"), false);
158-
/// ```
159-
pub fn ends_with_whitespace(input: &str) -> bool {
160-
use State::*;
156+
debug_assert!(words.len() == parts.len());
161157

162-
// Fast-lane: the input must end with a whitespace character
163-
// regardless of quoting.
164-
if !input.ends_with(|c: char| c.is_ascii_whitespace()) {
165-
return false;
158+
Self {
159+
state,
160+
words,
161+
parts,
162+
}
166163
}
164+
}
167165

168-
let mut state = Unquoted;
166+
impl<'a> Shellwords<'a> {
167+
/// Checks that the input ends with a whitespace character which is not escaped.
168+
///
169+
/// # Examples
170+
///
171+
/// ```rust
172+
/// use helix_core::shellwords::Shellwords;
173+
/// assert_eq!(Shellwords::from(" ").ends_with_whitespace(), true);
174+
/// assert_eq!(Shellwords::from(":open ").ends_with_whitespace(), true);
175+
/// assert_eq!(Shellwords::from(":open foo.txt ").ends_with_whitespace(), true);
176+
/// assert_eq!(Shellwords::from(":open").ends_with_whitespace(), false);
177+
/// #[cfg(unix)]
178+
/// assert_eq!(Shellwords::from(":open a\\ ").ends_with_whitespace(), false);
179+
/// #[cfg(unix)]
180+
/// assert_eq!(Shellwords::from(":open a\\ b.txt").ends_with_whitespace(), false);
181+
/// ```
182+
pub fn ends_with_whitespace(&self) -> bool {
183+
matches!(self.state, State::OnWhitespace)
184+
}
169185

170-
for c in input.chars() {
171-
state = match state {
172-
OnWhitespace => match c {
173-
'"' => Dquoted,
174-
'\'' => Quoted,
175-
'\\' if cfg!(unix) => UnquotedEscaped,
176-
'\\' => OnWhitespace,
177-
c if c.is_ascii_whitespace() => OnWhitespace,
178-
_ => Unquoted,
179-
},
180-
Unquoted => match c {
181-
'\\' if cfg!(unix) => UnquotedEscaped,
182-
'\\' => Unquoted,
183-
c if c.is_ascii_whitespace() => OnWhitespace,
184-
_ => Unquoted,
185-
},
186-
UnquotedEscaped => Unquoted,
187-
Quoted => match c {
188-
'\\' if cfg!(unix) => QuoteEscaped,
189-
'\\' => Quoted,
190-
'\'' => OnWhitespace,
191-
_ => Quoted,
192-
},
193-
QuoteEscaped => Quoted,
194-
Dquoted => match c {
195-
'\\' if cfg!(unix) => DquoteEscaped,
196-
'\\' => Dquoted,
197-
'"' => OnWhitespace,
198-
_ => Dquoted,
199-
},
200-
DquoteEscaped => Dquoted,
201-
}
186+
/// Returns the list of shellwords calculated from the input string.
187+
pub fn words(&self) -> &[Cow<'a, str>] {
188+
&self.words
202189
}
203190

204-
matches!(state, OnWhitespace)
191+
/// Returns a list of strings which correspond to [words] but represent the original
192+
/// text in the input string - including escape characters - without separating whitespace.
193+
pub fn parts(&self) -> &[&'a str] {
194+
&self.parts
195+
}
205196
}
206197

207198
#[cfg(test)]
@@ -212,7 +203,8 @@ mod test {
212203
#[cfg(windows)]
213204
fn test_normal() {
214205
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
215-
let result = shellwords(input);
206+
let shellwords = Shellwords::from(input);
207+
let result = shellwords.words().to_vec();
216208
let expected = vec![
217209
Cow::from(":o"),
218210
Cow::from("single_word"),
@@ -230,7 +222,8 @@ mod test {
230222
#[cfg(unix)]
231223
fn test_normal() {
232224
let input = r#":o single_word twó wörds \three\ \"with\ escaping\\"#;
233-
let result = shellwords(input);
225+
let shellwords = Shellwords::from(input);
226+
let result = shellwords.words().to_vec();
234227
let expected = vec![
235228
Cow::from(":o"),
236229
Cow::from("single_word"),
@@ -247,7 +240,8 @@ mod test {
247240
fn test_quoted() {
248241
let quoted =
249242
r#":o 'single_word' 'twó wörds' '' ' ''\three\' \"with\ escaping\\' 'quote incomplete"#;
250-
let result = shellwords(quoted);
243+
let shellwords = Shellwords::from(quoted);
244+
let result = shellwords.words().to_vec();
251245
let expected = vec![
252246
Cow::from(":o"),
253247
Cow::from("single_word"),
@@ -262,7 +256,8 @@ mod test {
262256
#[cfg(unix)]
263257
fn test_dquoted() {
264258
let dquoted = r#":o "single_word" "twó wörds" "" " ""\three\' \"with\ escaping\\" "dquote incomplete"#;
265-
let result = shellwords(dquoted);
259+
let shellwords = Shellwords::from(dquoted);
260+
let result = shellwords.words().to_vec();
266261
let expected = vec![
267262
Cow::from(":o"),
268263
Cow::from("single_word"),
@@ -277,7 +272,8 @@ mod test {
277272
#[cfg(unix)]
278273
fn test_mixed() {
279274
let dquoted = r#":o single_word 'twó wörds' "\three\' \"with\ escaping\\""no space before"'and after' $#%^@ "%^&(%^" ')(*&^%''a\\\\\b' '"#;
280-
let result = shellwords(dquoted);
275+
let shellwords = Shellwords::from(dquoted);
276+
let result = shellwords.words().to_vec();
281277
let expected = vec![
282278
Cow::from(":o"),
283279
Cow::from("single_word"),
@@ -298,7 +294,8 @@ mod test {
298294
fn test_lists() {
299295
let input =
300296
r#":set statusline.center ["file-type","file-encoding"] '["list", "in", "qoutes"]'"#;
301-
let result = shellwords(input);
297+
let shellwords = Shellwords::from(input);
298+
let result = shellwords.words().to_vec();
302299
let expected = vec![
303300
Cow::from(":set"),
304301
Cow::from("statusline.center"),
@@ -322,4 +319,10 @@ mod test {
322319
assert_eq!(escape("foobar".into()), Cow::Borrowed("foobar"));
323320
assert_eq!(escape("foo bar".into()), Cow::Borrowed("\"foo bar\""));
324321
}
322+
323+
#[test]
324+
fn test_parts() {
325+
assert_eq!(Shellwords::from(":o a").parts(), &[":o", "a"]);
326+
assert_eq!(Shellwords::from(":o a\\ ").parts(), &[":o", "a\\ "]);
327+
}
325328
}

0 commit comments

Comments
 (0)