Skip to content

Commit bc040f7

Browse files
Illegal escape detection (#17)
* Throw error on illegal escapes * Fix JavaScript tests, simplyfy Java * Update Ruby parser * Document escaping. Fixes #16. * Fix go escape validation (simplify java)
1 parent a727b9e commit bc040f7

File tree

10 files changed

+142
-141
lines changed

10 files changed

+142
-141
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1313

1414
### Changed
1515

16+
* Only allow escape character (\) in front of (, ), or \. Throw error otherwise. ([#17](https://github.com/cucumber/tag-expressions/pull/17))
17+
1618
### Deprecated
1719

1820
### Removed
1921

2022
### Fixed
2123

24+
* Document escaping. ([#16](https://github.com/cucumber/tag-expressions/issues/16), [#17](https://github.com/cucumber/tag-expressions/pull/17))
2225
* [Ruby] Empty expression evaluates to true
2326

2427
## [4.1.0] - 2021-10-08

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,16 @@ For more complex Tag Expressions you can use parenthesis for clarity, or to chan
2828

2929
(@smoke or @ui) and (not @slow)
3030

31+
## Escaping
32+
33+
If you need to use one of the reserved characters `(`, `)`, `\` or ` ` (whitespace) in a tag,
34+
you can escape it with a `\`. Examples:
35+
36+
| Gherkin Tag | Escaped Tag Expression |
37+
| ------------- | ---------------------- |
38+
| @x(y) | @x\(y\) |
39+
| @x\y | @x\\y |
40+
3141
## Migrating from old style tags
3242

3343
Older versions of Cucumber used a different syntax for tags. The list below

go/parser.go

Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@ type Evaluatable interface {
1616
}
1717

1818
func Parse(infix string) (Evaluatable, error) {
19-
tokens := tokenize(infix)
19+
tokens, err := tokenize(infix)
20+
if err != nil {
21+
return nil, err
22+
}
2023
if len(tokens) == 0 {
2124
return &trueExpr{}, nil
2225
}
@@ -96,51 +99,38 @@ var PREC = map[string]int{
9699
"not": 2,
97100
}
98101

99-
func tokenize(expr string) []string {
102+
func tokenize(expr string) ([]string, error) {
100103
var tokens []string
101104
var token bytes.Buffer
102105

103-
collectToken := func() {
104-
if token.Len() > 0 {
105-
tokens = append(tokens, token.String())
106-
token.Reset()
107-
}
108-
}
109-
110106
escaped := false
111107
for _, c := range expr {
112-
if unicode.IsSpace(c) {
113-
collectToken()
114-
escaped = false
115-
continue
116-
}
117-
118-
ch := string(c)
119-
120-
switch ch {
121-
case "\\":
122-
if escaped {
123-
token.WriteString(ch)
108+
if escaped {
109+
if c == '(' || c == ')' || c == '\\' || unicode.IsSpace(c) {
110+
token.WriteRune(c)
124111
escaped = false
125112
} else {
126-
escaped = true
113+
return nil, fmt.Errorf("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, string(c))
127114
}
128-
case "(", ")":
129-
if escaped {
130-
token.WriteString(ch)
131-
escaped = false
132-
} else {
133-
collectToken()
134-
tokens = append(tokens, ch)
115+
} else if c == '\\' {
116+
escaped = true
117+
} else if c == '(' || c == ')' || unicode.IsSpace(c) {
118+
if token.Len() > 0 {
119+
tokens = append(tokens, token.String())
120+
token.Reset()
135121
}
136-
default:
137-
token.WriteString(ch)
138-
escaped = false
122+
if !unicode.IsSpace(c) {
123+
tokens = append(tokens, string(c))
124+
}
125+
} else {
126+
token.WriteRune(c)
139127
}
140128
}
129+
if token.Len() > 0 {
130+
tokens = append(tokens, token.String())
131+
}
141132

142-
collectToken()
143-
return tokens
133+
return tokens, nil
144134
}
145135

146136
func isUnary(token string) bool {
@@ -197,17 +187,11 @@ func (l *literalExpr) Evaluate(variables []string) bool {
197187
}
198188

199189
func (l *literalExpr) ToString() string {
200-
return strings.Replace(
201-
strings.Replace(
202-
strings.Replace(l.value, "\\", "\\\\", -1),
203-
"(",
204-
"\\(",
205-
-1,
206-
),
207-
")",
208-
"\\)",
209-
-1,
210-
)
190+
s1 := l.value
191+
s2 := strings.Replace(s1, "\\", "\\\\", -1)
192+
s3 := strings.Replace(s2, "(", "\\(", -1)
193+
s4 := strings.Replace(s3, ")", "\\)", -1)
194+
return strings.Replace(s4, " ", "\\ ", -1)
211195
}
212196

213197
type orExpr struct {

java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java

Lines changed: 32 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import java.util.HashMap;
77
import java.util.List;
88
import java.util.Map;
9+
import java.util.regex.Matcher;
10+
import java.util.regex.Pattern;
911

1012
public final class TagExpressionParser {
1113
private static final Map<String, Assoc> ASSOC = new HashMap<String, Assoc>() {{
@@ -24,16 +26,16 @@ public final class TagExpressionParser {
2426
private final String infix;
2527

2628
public static Expression parse(String infix) {
27-
return new TagExpressionParser(infix).parse();
29+
return new TagExpressionParser(infix).parse();
2830
}
2931

3032
private TagExpressionParser(String infix) {
31-
this.infix = infix;
33+
this.infix = infix;
3234
}
3335

3436
private Expression parse() {
3537
List<String> tokens = tokenize(infix);
36-
if(tokens.isEmpty()) return new True();
38+
if (tokens.isEmpty()) return new True();
3739

3840
Deque<String> operators = new ArrayDeque<>();
3941
Deque<Expression> expressions = new ArrayDeque<>();
@@ -49,7 +51,7 @@ private Expression parse() {
4951
(ASSOC.get(token) == Assoc.LEFT && PREC.get(token) <= PREC.get(operators.peek()))
5052
||
5153
(ASSOC.get(token) == Assoc.RIGHT && PREC.get(token) < PREC.get(operators.peek())))
52-
) {
54+
) {
5355
pushExpr(pop(operators), expressions);
5456
}
5557
operators.push(token);
@@ -89,43 +91,32 @@ private Expression parse() {
8991

9092
private static List<String> tokenize(String expr) {
9193
List<String> tokens = new ArrayList<>();
92-
9394
boolean isEscaped = false;
94-
StringBuilder token = null;
95+
StringBuilder token = new StringBuilder();
9596
for (int i = 0; i < expr.length(); i++) {
9697
char c = expr.charAt(i);
97-
if (ESCAPING_CHAR == c && !isEscaped) {
98-
isEscaped = true;
99-
} else {
100-
if (Character.isWhitespace(c)) { // skip
101-
if (null != token) { // end of token
102-
tokens.add(token.toString());
103-
token = null;
104-
}
98+
if (isEscaped) {
99+
if (c == '(' || c == ')' || c == '\\' || Character.isWhitespace(c)) {
100+
token.append(c);
101+
isEscaped = false;
105102
} else {
106-
switch (c) {
107-
case '(':
108-
case ')':
109-
if (!isEscaped) {
110-
if (null != token) { // end of token
111-
tokens.add(token.toString());
112-
token = null;
113-
}
114-
tokens.add(String.valueOf(c));
115-
break;
116-
}
117-
default:
118-
if (null == token) { // start of token
119-
token = new StringBuilder();
120-
}
121-
token.append(c);
122-
break;
123-
}
103+
throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, c);
124104
}
125-
isEscaped = false;
105+
} else if (c == ESCAPING_CHAR) {
106+
isEscaped = true;
107+
} else if (c == '(' || c == ')' || Character.isWhitespace(c)) {
108+
if (token.length() > 0) {
109+
tokens.add(token.toString());
110+
token = new StringBuilder();
111+
}
112+
if (!Character.isWhitespace(c)) {
113+
tokens.add(String.valueOf(c));
114+
}
115+
} else {
116+
token.append(c);
126117
}
127118
}
128-
if (null != token) { // end of token
119+
if (token.length() > 0) {
129120
tokens.add(token.toString());
130121
}
131122
return tokens;
@@ -138,7 +129,8 @@ private void check(TokenType expectedTokenType, TokenType tokenType) {
138129
}
139130

140131
private <T> T pop(Deque<T> stack) {
141-
if (stack.isEmpty()) throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
132+
if (stack.isEmpty())
133+
throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
142134
return stack.pop();
143135
}
144136

@@ -197,7 +189,11 @@ public boolean evaluate(List<String> variables) {
197189

198190
@Override
199191
public String toString() {
200-
return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)");
192+
return value
193+
.replaceAll(Pattern.quote("\\"), Matcher.quoteReplacement("\\\\"))
194+
.replaceAll(Pattern.quote("("), Matcher.quoteReplacement("\\("))
195+
.replaceAll(Pattern.quote(")"), Matcher.quoteReplacement("\\)"))
196+
.replaceAll("\\s", "\\\\ ");
201197
}
202198
}
203199

javascript/src/index.ts

Lines changed: 28 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ export default function parse(infix: string): Node {
5555
pushExpr(pop(operators), expressions)
5656
}
5757
if (operators.length === 0) {
58-
throw Error(
58+
throw new Error(
5959
`Tag expression "${infix}" could not be parsed because of syntax error: Unmatched ).`
6060
)
6161
}
@@ -72,7 +72,7 @@ export default function parse(infix: string): Node {
7272

7373
while (operators.length > 0) {
7474
if (peek(operators) === '(') {
75-
throw Error(
75+
throw new Error(
7676
`Tag expression "${infix}" could not be parsed because of syntax error: Unmatched (.`
7777
)
7878
}
@@ -93,36 +93,33 @@ export default function parse(infix: string): Node {
9393
function tokenize(expr: string): string[] {
9494
const tokens = []
9595
let isEscaped = false
96-
let token: string[] | undefined
96+
let token: string[] = []
9797
for (let i = 0; i < expr.length; i++) {
9898
const c = expr.charAt(i)
99-
if ('\\' === c && !isEscaped) {
100-
isEscaped = true
101-
} else {
102-
if (/\s/.test(c)) {
103-
// skip
104-
if (token) {
105-
// end of token
106-
tokens.push(token.join(''))
107-
token = undefined
108-
}
109-
} else {
110-
if ((c === '(' || c === ')') && !isEscaped) {
111-
if (token) {
112-
// end of token
113-
tokens.push(token.join(''))
114-
token = undefined
115-
}
116-
tokens.push(c)
117-
continue
118-
}
119-
token = token ? token : [] // start of token
99+
if (isEscaped) {
100+
if (c === '(' || c === ')' || c === '\\' || /\s/.test(c)) {
120101
token.push(c)
102+
isEscaped = false
103+
} else {
104+
throw new Error(
105+
`Tag expression "${expr}" could not be parsed because of syntax error: Illegal escape before "${c}".`
106+
)
107+
}
108+
} else if (c === '\\') {
109+
isEscaped = true
110+
} else if (c === '(' || c === ')' || /\s/.test(c)) {
111+
if (token.length > 0) {
112+
tokens.push(token.join(''))
113+
token = []
121114
}
122-
isEscaped = false
115+
if (!/\s/.test(c)) {
116+
tokens.push(c)
117+
}
118+
} else {
119+
token.push(c)
123120
}
124121
}
125-
if (token) {
122+
if (token.length > 0) {
126123
tokens.push(token.join(''))
127124
}
128125
return tokens
@@ -177,7 +174,11 @@ class Literal implements Node {
177174
}
178175

179176
public toString() {
180-
return this.value.replace(/\\/g, '\\\\').replace(/\(/g, '\\(').replace(/\)/g, '\\)')
177+
return this.value
178+
.replace(/\\/g, '\\\\')
179+
.replace(/\(/g, '\\(')
180+
.replace(/\)/g, '\\)')
181+
.replace(/\s/g, '\\ ')
181182
}
182183
}
183184

ruby/lib/cucumber/tag_expressions/expressions.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@ def evaluate(variables)
1111
end
1212

1313
def to_s
14-
@value.gsub(/\\/, "\\\\\\\\").gsub(/\(/, "\\(").gsub(/\)/, "\\)")
14+
@value
15+
.gsub(/\\/, "\\\\\\\\")
16+
.gsub(/\(/, "\\(")
17+
.gsub(/\)/, "\\)")
18+
.gsub(/\s/, "\\ ")
1519
end
1620
end
1721

0 commit comments

Comments
 (0)