diff --git a/CHANGELOG.md b/CHANGELOG.md index e61b5988..1dbc8563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Changed +* Only allow escape character (\) in front of (, ), or \. Throw error otherwise. ([#17](https://github.com/cucumber/tag-expressions/pull/17)) + ### Deprecated ### Removed ### Fixed +* Document escaping. ([#16](https://github.com/cucumber/tag-expressions/issues/16), [#17](https://github.com/cucumber/tag-expressions/pull/17)) * [Ruby] Empty expression evaluates to true ## [4.1.0] - 2021-10-08 diff --git a/README.md b/README.md index 6bf01997..ff2ea9a3 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,16 @@ For more complex Tag Expressions you can use parenthesis for clarity, or to chan (@smoke or @ui) and (not @slow) +## Escaping + +If you need to use one of the reserved characters `(`, `)`, `\` or ` ` (whitespace) in a tag, +you can escape it with a `\`. Examples: + +| Gherkin Tag | Escaped Tag Expression | +| ------------- | ---------------------- | +| @x(y) | @x\(y\) | +| @x\y | @x\\y | + ## Migrating from old style tags Older versions of Cucumber used a different syntax for tags. The list below diff --git a/go/parser.go b/go/parser.go index f174f497..ebac8fcf 100644 --- a/go/parser.go +++ b/go/parser.go @@ -16,7 +16,10 @@ type Evaluatable interface { } func Parse(infix string) (Evaluatable, error) { - tokens := tokenize(infix) + tokens, err := tokenize(infix) + if err != nil { + return nil, err + } if len(tokens) == 0 { return &trueExpr{}, nil } @@ -96,51 +99,38 @@ var PREC = map[string]int{ "not": 2, } -func tokenize(expr string) []string { +func tokenize(expr string) ([]string, error) { var tokens []string var token bytes.Buffer - collectToken := func() { - if token.Len() > 0 { - tokens = append(tokens, token.String()) - token.Reset() - } - } - escaped := false for _, c := range expr { - if unicode.IsSpace(c) { - collectToken() - escaped = false - continue - } - - ch := string(c) - - switch ch { - case "\\": - if escaped { - token.WriteString(ch) + if escaped { + if c == '(' || c == ')' || c == '\\' || unicode.IsSpace(c) { + token.WriteRune(c) escaped = false } else { - escaped = true + return nil, fmt.Errorf("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, string(c)) } - case "(", ")": - if escaped { - token.WriteString(ch) - escaped = false - } else { - collectToken() - tokens = append(tokens, ch) + } else if c == '\\' { + escaped = true + } else if c == '(' || c == ')' || unicode.IsSpace(c) { + if token.Len() > 0 { + tokens = append(tokens, token.String()) + token.Reset() } - default: - token.WriteString(ch) - escaped = false + if !unicode.IsSpace(c) { + tokens = append(tokens, string(c)) + } + } else { + token.WriteRune(c) } } + if token.Len() > 0 { + tokens = append(tokens, token.String()) + } - collectToken() - return tokens + return tokens, nil } func isUnary(token string) bool { @@ -197,17 +187,11 @@ func (l *literalExpr) Evaluate(variables []string) bool { } func (l *literalExpr) ToString() string { - return strings.Replace( - strings.Replace( - strings.Replace(l.value, "\\", "\\\\", -1), - "(", - "\\(", - -1, - ), - ")", - "\\)", - -1, - ) + s1 := l.value + s2 := strings.Replace(s1, "\\", "\\\\", -1) + s3 := strings.Replace(s2, "(", "\\(", -1) + s4 := strings.Replace(s3, ")", "\\)", -1) + return strings.Replace(s4, " ", "\\ ", -1) } type orExpr struct { diff --git a/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java b/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java index 7247c92c..d1e7da6f 100644 --- a/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java +++ b/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java @@ -6,6 +6,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; public final class TagExpressionParser { private static final Map ASSOC = new HashMap() {{ @@ -24,16 +26,16 @@ public final class TagExpressionParser { private final String infix; public static Expression parse(String infix) { - return new TagExpressionParser(infix).parse(); + return new TagExpressionParser(infix).parse(); } private TagExpressionParser(String infix) { - this.infix = infix; + this.infix = infix; } private Expression parse() { List tokens = tokenize(infix); - if(tokens.isEmpty()) return new True(); + if (tokens.isEmpty()) return new True(); Deque operators = new ArrayDeque<>(); Deque expressions = new ArrayDeque<>(); @@ -49,7 +51,7 @@ private Expression parse() { (ASSOC.get(token) == Assoc.LEFT && PREC.get(token) <= PREC.get(operators.peek())) || (ASSOC.get(token) == Assoc.RIGHT && PREC.get(token) < PREC.get(operators.peek()))) - ) { + ) { pushExpr(pop(operators), expressions); } operators.push(token); @@ -89,43 +91,32 @@ private Expression parse() { private static List tokenize(String expr) { List tokens = new ArrayList<>(); - boolean isEscaped = false; - StringBuilder token = null; + StringBuilder token = new StringBuilder(); for (int i = 0; i < expr.length(); i++) { char c = expr.charAt(i); - if (ESCAPING_CHAR == c && !isEscaped) { - isEscaped = true; - } else { - if (Character.isWhitespace(c)) { // skip - if (null != token) { // end of token - tokens.add(token.toString()); - token = null; - } + if (isEscaped) { + if (c == '(' || c == ')' || c == '\\' || Character.isWhitespace(c)) { + token.append(c); + isEscaped = false; } else { - switch (c) { - case '(': - case ')': - if (!isEscaped) { - if (null != token) { // end of token - tokens.add(token.toString()); - token = null; - } - tokens.add(String.valueOf(c)); - break; - } - default: - if (null == token) { // start of token - token = new StringBuilder(); - } - token.append(c); - break; - } + throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, c); } - isEscaped = false; + } else if (c == ESCAPING_CHAR) { + isEscaped = true; + } else if (c == '(' || c == ')' || Character.isWhitespace(c)) { + if (token.length() > 0) { + tokens.add(token.toString()); + token = new StringBuilder(); + } + if (!Character.isWhitespace(c)) { + tokens.add(String.valueOf(c)); + } + } else { + token.append(c); } } - if (null != token) { // end of token + if (token.length() > 0) { tokens.add(token.toString()); } return tokens; @@ -138,7 +129,8 @@ private void check(TokenType expectedTokenType, TokenType tokenType) { } private T pop(Deque stack) { - if (stack.isEmpty()) throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix); + if (stack.isEmpty()) + throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix); return stack.pop(); } @@ -197,7 +189,11 @@ public boolean evaluate(List variables) { @Override public String toString() { - return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)"); + return value + .replaceAll(Pattern.quote("\\"), Matcher.quoteReplacement("\\\\")) + .replaceAll(Pattern.quote("("), Matcher.quoteReplacement("\\(")) + .replaceAll(Pattern.quote(")"), Matcher.quoteReplacement("\\)")) + .replaceAll("\\s", "\\\\ "); } } diff --git a/javascript/src/index.ts b/javascript/src/index.ts index cd3e6faa..49ced6af 100644 --- a/javascript/src/index.ts +++ b/javascript/src/index.ts @@ -55,7 +55,7 @@ export default function parse(infix: string): Node { pushExpr(pop(operators), expressions) } if (operators.length === 0) { - throw Error( + throw new Error( `Tag expression "${infix}" could not be parsed because of syntax error: Unmatched ).` ) } @@ -72,7 +72,7 @@ export default function parse(infix: string): Node { while (operators.length > 0) { if (peek(operators) === '(') { - throw Error( + throw new Error( `Tag expression "${infix}" could not be parsed because of syntax error: Unmatched (.` ) } @@ -93,36 +93,33 @@ export default function parse(infix: string): Node { function tokenize(expr: string): string[] { const tokens = [] let isEscaped = false - let token: string[] | undefined + let token: string[] = [] for (let i = 0; i < expr.length; i++) { const c = expr.charAt(i) - if ('\\' === c && !isEscaped) { - isEscaped = true - } else { - if (/\s/.test(c)) { - // skip - if (token) { - // end of token - tokens.push(token.join('')) - token = undefined - } - } else { - if ((c === '(' || c === ')') && !isEscaped) { - if (token) { - // end of token - tokens.push(token.join('')) - token = undefined - } - tokens.push(c) - continue - } - token = token ? token : [] // start of token + if (isEscaped) { + if (c === '(' || c === ')' || c === '\\' || /\s/.test(c)) { token.push(c) + isEscaped = false + } else { + throw new Error( + `Tag expression "${expr}" could not be parsed because of syntax error: Illegal escape before "${c}".` + ) + } + } else if (c === '\\') { + isEscaped = true + } else if (c === '(' || c === ')' || /\s/.test(c)) { + if (token.length > 0) { + tokens.push(token.join('')) + token = [] } - isEscaped = false + if (!/\s/.test(c)) { + tokens.push(c) + } + } else { + token.push(c) } } - if (token) { + if (token.length > 0) { tokens.push(token.join('')) } return tokens @@ -177,7 +174,11 @@ class Literal implements Node { } public toString() { - return this.value.replace(/\\/g, '\\\\').replace(/\(/g, '\\(').replace(/\)/g, '\\)') + return this.value + .replace(/\\/g, '\\\\') + .replace(/\(/g, '\\(') + .replace(/\)/g, '\\)') + .replace(/\s/g, '\\ ') } } diff --git a/ruby/lib/cucumber/tag_expressions/expressions.rb b/ruby/lib/cucumber/tag_expressions/expressions.rb index 9e58a73b..2157b924 100644 --- a/ruby/lib/cucumber/tag_expressions/expressions.rb +++ b/ruby/lib/cucumber/tag_expressions/expressions.rb @@ -11,7 +11,11 @@ def evaluate(variables) end def to_s - @value.gsub(/\\/, "\\\\\\\\").gsub(/\(/, "\\(").gsub(/\)/, "\\)") + @value + .gsub(/\\/, "\\\\\\\\") + .gsub(/\(/, "\\(") + .gsub(/\)/, "\\)") + .gsub(/\s/, "\\ ") end end diff --git a/ruby/lib/cucumber/tag_expressions/parser.rb b/ruby/lib/cucumber/tag_expressions/parser.rb index 773259fd..1c09450b 100644 --- a/ruby/lib/cucumber/tag_expressions/parser.rb +++ b/ruby/lib/cucumber/tag_expressions/parser.rb @@ -65,36 +65,35 @@ def precedence(token) end def tokenize(infix_expression) + tokens = [] escaped = false token = "" - result = [] infix_expression.chars.each do | ch | - if ch == '\\' && !escaped - escaped = true - else - if ch.match(/\s/) - if token.length > 0 - result.push(token) - token = "" - end + if escaped + if ch == '(' || ch == ')' || ch == '\\' || ch.match(/\s/) + token += ch + escaped = false else - if (ch == '(' || ch == ')') && !escaped - if token.length > 0 - result.push(token) - token = "" - end - result.push(ch) - else - token = token + ch - end + raise %Q{Tag expression "#{infix_expression}" could not be parsed because of syntax error: Illegal escape before "#{ch}".} + end + elsif ch == '\\' + escaped = true + elsif ch == '(' || ch == ')' || ch.match(/\s/) + if token.length > 0 + tokens.push(token) + token = "" + end + if !ch.match(/\s/) + tokens.push(ch) end - escaped = false + else + token += ch end end if token.length > 0 - result.push(token) + tokens.push(token) end - result + tokens end def push_expression(token) diff --git a/testdata/errors.yml b/testdata/errors.yml index 07f53182..f294e7b5 100644 --- a/testdata/errors.yml +++ b/testdata/errors.yml @@ -14,3 +14,7 @@ error: 'Tag expression "( a and b ) )" could not be parsed because of syntax error: Unmatched ).' - expression: '( ( a and b )' error: 'Tag expression "( ( a and b )" could not be parsed because of syntax error: Unmatched (.' +- expression: 'x or \y or z' + error: 'Tag expression "x or \y or z" could not be parsed because of syntax error: Illegal escape before "y".' +- expression: 'x\ or y' + error: 'Tag expression "x\ or y" could not be parsed because of syntax error: Expected operator.' diff --git a/testdata/evaluations.yml b/testdata/evaluations.yml index e34cf116..a84fb3ca 100644 --- a/testdata/evaluations.yml +++ b/testdata/evaluations.yml @@ -23,13 +23,13 @@ result: true - variables: ['y'] result: true -- expression: 'x\(1\) or(y\(2\))' +- expression: 'x\(1\) or y\(2\)' tests: - variables: ['x(1)'] result: true - variables: ['y(2)'] result: true -- expression: 'x\\ or(y\\\)) or(z\\)' +- expression: 'x\\ or y\\\) or z\\' tests: - variables: ['x\'] result: true @@ -43,17 +43,17 @@ result: false - variables: ['z'] result: false -- expression: '\x or y\ or z\' +- expression: '\\x or y\\ or z\\' tests: - - variables: ['x'] + - variables: ['\x'] result: true - - variables: ['y'] + - variables: ['y\'] result: true - - variables: ['z'] + - variables: ['z\'] result: true - - variables: ['\x'] + - variables: ['x'] result: false - - variables: ['\y'] + - variables: ['y'] result: false - - variables: ['\z'] + - variables: ['z'] result: false diff --git a/testdata/parsing.yml b/testdata/parsing.yml index d0180f43..e208e628 100644 --- a/testdata/parsing.yml +++ b/testdata/parsing.yml @@ -28,11 +28,11 @@ formatted: '( a and \\b )' - expression: 'x or(y) ' formatted: '( x or y )' -# Operands with escaped parenthesis - expression: 'x\(1\) or(y\(2\))' formatted: '( x\(1\) or y\(2\) )' - expression: '\\x or y\\ or z\\' formatted: '( ( \\x or y\\ ) or z\\ )' -# Operands with escaped backslash - expression: 'x\\ or(y\\\)) or(z\\)' formatted: '( ( x\\ or y\\\) ) or z\\ )' +- expression: 'x\ or y' + formatted: '( x\ or y )'