Skip to content

Illegal escape detection #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/).

### Changed

* Only allow escape character (\) in front of (, ), or \. Throw error otherwise. ([#17](https://github.com/cucumber/tag-expressions/pull/17))

### Deprecated

### Removed

### Fixed

* Document escaping. ([#16](https://github.com/cucumber/tag-expressions/issues/16), [#17](https://github.com/cucumber/tag-expressions/pull/17))
* [Ruby] Empty expression evaluates to true

## [4.1.0] - 2021-10-08
Expand Down
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ For more complex Tag Expressions you can use parenthesis for clarity, or to chan

(@smoke or @ui) and (not @slow)

## Escaping

If you need to use one of the reserved characters `(`, `)`, `\` or ` ` (whitespace) in a tag,
you can escape it with a `\`. Examples:

| Gherkin Tag | Escaped Tag Expression |
| ------------- | ---------------------- |
| @x(y) | @x\(y\) |
| @x\y | @x\\y |

## Migrating from old style tags

Older versions of Cucumber used a different syntax for tags. The list below
Expand Down
74 changes: 29 additions & 45 deletions go/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ type Evaluatable interface {
}

func Parse(infix string) (Evaluatable, error) {
tokens := tokenize(infix)
tokens, err := tokenize(infix)
if err != nil {
return nil, err
}
if len(tokens) == 0 {
return &trueExpr{}, nil
}
Expand Down Expand Up @@ -96,51 +99,38 @@ var PREC = map[string]int{
"not": 2,
}

func tokenize(expr string) []string {
func tokenize(expr string) ([]string, error) {
var tokens []string
var token bytes.Buffer

collectToken := func() {
if token.Len() > 0 {
tokens = append(tokens, token.String())
token.Reset()
}
}

escaped := false
for _, c := range expr {
if unicode.IsSpace(c) {
collectToken()
escaped = false
continue
}

ch := string(c)

switch ch {
case "\\":
if escaped {
token.WriteString(ch)
if escaped {
if c == '(' || c == ')' || c == '\\' || unicode.IsSpace(c) {
token.WriteRune(c)
escaped = false
} else {
escaped = true
return nil, fmt.Errorf("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, string(c))
}
case "(", ")":
if escaped {
token.WriteString(ch)
escaped = false
} else {
collectToken()
tokens = append(tokens, ch)
} else if c == '\\' {
escaped = true
} else if c == '(' || c == ')' || unicode.IsSpace(c) {
if token.Len() > 0 {
tokens = append(tokens, token.String())
token.Reset()
}
default:
token.WriteString(ch)
escaped = false
if !unicode.IsSpace(c) {
tokens = append(tokens, string(c))
}
} else {
token.WriteRune(c)
}
}
if token.Len() > 0 {
tokens = append(tokens, token.String())
}

collectToken()
return tokens
return tokens, nil
}

func isUnary(token string) bool {
Expand Down Expand Up @@ -197,17 +187,11 @@ func (l *literalExpr) Evaluate(variables []string) bool {
}

func (l *literalExpr) ToString() string {
return strings.Replace(
strings.Replace(
strings.Replace(l.value, "\\", "\\\\", -1),
"(",
"\\(",
-1,
),
")",
"\\)",
-1,
)
s1 := l.value
s2 := strings.Replace(s1, "\\", "\\\\", -1)
s3 := strings.Replace(s2, "(", "\\(", -1)
s4 := strings.Replace(s3, ")", "\\)", -1)
return strings.Replace(s4, " ", "\\ ", -1)
}

type orExpr struct {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public final class TagExpressionParser {
private static final Map<String, Assoc> ASSOC = new HashMap<String, Assoc>() {{
Expand All @@ -24,16 +26,16 @@ public final class TagExpressionParser {
private final String infix;

public static Expression parse(String infix) {
return new TagExpressionParser(infix).parse();
return new TagExpressionParser(infix).parse();
}

private TagExpressionParser(String infix) {
this.infix = infix;
this.infix = infix;
}

private Expression parse() {
List<String> tokens = tokenize(infix);
if(tokens.isEmpty()) return new True();
if (tokens.isEmpty()) return new True();

Deque<String> operators = new ArrayDeque<>();
Deque<Expression> expressions = new ArrayDeque<>();
Expand All @@ -49,7 +51,7 @@ private Expression parse() {
(ASSOC.get(token) == Assoc.LEFT && PREC.get(token) <= PREC.get(operators.peek()))
||
(ASSOC.get(token) == Assoc.RIGHT && PREC.get(token) < PREC.get(operators.peek())))
) {
) {
pushExpr(pop(operators), expressions);
}
operators.push(token);
Expand Down Expand Up @@ -89,43 +91,32 @@ private Expression parse() {

private static List<String> tokenize(String expr) {
List<String> tokens = new ArrayList<>();

boolean isEscaped = false;
StringBuilder token = null;
StringBuilder token = new StringBuilder();
for (int i = 0; i < expr.length(); i++) {
char c = expr.charAt(i);
if (ESCAPING_CHAR == c && !isEscaped) {
isEscaped = true;
} else {
if (Character.isWhitespace(c)) { // skip
if (null != token) { // end of token
tokens.add(token.toString());
token = null;
}
if (isEscaped) {
if (c == '(' || c == ')' || c == '\\' || Character.isWhitespace(c)) {
token.append(c);
isEscaped = false;
} else {
switch (c) {
case '(':
case ')':
if (!isEscaped) {
if (null != token) { // end of token
tokens.add(token.toString());
token = null;
}
tokens.add(String.valueOf(c));
break;
}
default:
if (null == token) { // start of token
token = new StringBuilder();
}
token.append(c);
break;
}
throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, c);
}
isEscaped = false;
} else if (c == ESCAPING_CHAR) {
isEscaped = true;
} else if (c == '(' || c == ')' || Character.isWhitespace(c)) {
if (token.length() > 0) {
tokens.add(token.toString());
token = new StringBuilder();
}
if (!Character.isWhitespace(c)) {
tokens.add(String.valueOf(c));
}
} else {
token.append(c);
}
}
if (null != token) { // end of token
if (token.length() > 0) {
tokens.add(token.toString());
}
return tokens;
Expand All @@ -138,7 +129,8 @@ private void check(TokenType expectedTokenType, TokenType tokenType) {
}

private <T> T pop(Deque<T> stack) {
if (stack.isEmpty()) throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
if (stack.isEmpty())
throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
return stack.pop();
}

Expand Down Expand Up @@ -197,7 +189,11 @@ public boolean evaluate(List<String> variables) {

@Override
public String toString() {
return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)");
return value
.replaceAll(Pattern.quote("\\"), Matcher.quoteReplacement("\\\\"))
.replaceAll(Pattern.quote("("), Matcher.quoteReplacement("\\("))
.replaceAll(Pattern.quote(")"), Matcher.quoteReplacement("\\)"))
.replaceAll("\\s", "\\\\ ");
}
}

Expand Down
55 changes: 28 additions & 27 deletions javascript/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ export default function parse(infix: string): Node {
pushExpr(pop(operators), expressions)
}
if (operators.length === 0) {
throw Error(
throw new Error(
`Tag expression "${infix}" could not be parsed because of syntax error: Unmatched ).`
)
}
Expand All @@ -72,7 +72,7 @@ export default function parse(infix: string): Node {

while (operators.length > 0) {
if (peek(operators) === '(') {
throw Error(
throw new Error(
`Tag expression "${infix}" could not be parsed because of syntax error: Unmatched (.`
)
}
Expand All @@ -93,36 +93,33 @@ export default function parse(infix: string): Node {
function tokenize(expr: string): string[] {
const tokens = []
let isEscaped = false
let token: string[] | undefined
let token: string[] = []
for (let i = 0; i < expr.length; i++) {
const c = expr.charAt(i)
if ('\\' === c && !isEscaped) {
isEscaped = true
} else {
if (/\s/.test(c)) {
// skip
if (token) {
// end of token
tokens.push(token.join(''))
token = undefined
}
} else {
if ((c === '(' || c === ')') && !isEscaped) {
if (token) {
// end of token
tokens.push(token.join(''))
token = undefined
}
tokens.push(c)
continue
}
token = token ? token : [] // start of token
if (isEscaped) {
if (c === '(' || c === ')' || c === '\\' || /\s/.test(c)) {
token.push(c)
isEscaped = false
} else {
throw new Error(
`Tag expression "${expr}" could not be parsed because of syntax error: Illegal escape before "${c}".`
)
}
} else if (c === '\\') {
isEscaped = true
} else if (c === '(' || c === ')' || /\s/.test(c)) {
if (token.length > 0) {
tokens.push(token.join(''))
token = []
}
isEscaped = false
if (!/\s/.test(c)) {
tokens.push(c)
}
} else {
token.push(c)
}
}
if (token) {
if (token.length > 0) {
tokens.push(token.join(''))
}
return tokens
Expand Down Expand Up @@ -177,7 +174,11 @@ class Literal implements Node {
}

public toString() {
return this.value.replace(/\\/g, '\\\\').replace(/\(/g, '\\(').replace(/\)/g, '\\)')
return this.value
.replace(/\\/g, '\\\\')
.replace(/\(/g, '\\(')
.replace(/\)/g, '\\)')
.replace(/\s/g, '\\ ')
}
}

Expand Down
6 changes: 5 additions & 1 deletion ruby/lib/cucumber/tag_expressions/expressions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ def evaluate(variables)
end

def to_s
@value.gsub(/\\/, "\\\\\\\\").gsub(/\(/, "\\(").gsub(/\)/, "\\)")
@value
.gsub(/\\/, "\\\\\\\\")
.gsub(/\(/, "\\(")
.gsub(/\)/, "\\)")
.gsub(/\s/, "\\ ")
end
end

Expand Down
Loading