cucumber · aslakhellesoy · Oct 18, 2021 · Oct 18, 2021 · Oct 18, 2021 · Oct 18, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,12 +13,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ### Changed
 
+* Only allow escape character (\) in front of (, ), or \. Throw error otherwise. ([#17](https://github.com/cucumber/tag-expressions/pull/17))
+
 ### Deprecated
 
 ### Removed
 
 ### Fixed
 
+* Document escaping. ([#16](https://github.com/cucumber/tag-expressions/issues/16), [#17](https://github.com/cucumber/tag-expressions/pull/17))
 * [Ruby] Empty expression evaluates to true
 
 ## [4.1.0] - 2021-10-08

diff --git a/README.md b/README.md
@@ -28,6 +28,16 @@ For more complex Tag Expressions you can use parenthesis for clarity, or to chan
 
     (@smoke or @ui) and (not @slow)
 
+## Escaping
+
+If you need to use one of the reserved characters `(`, `)`, `\` or ` ` (whitespace) in a tag,
+you can escape it with a `\`. Examples:
+
+| Gherkin Tag   | Escaped Tag Expression |
+| ------------- | ---------------------- |
+| @x(y)         | @x\(y\)                |
+| @x\y          | @x\\y                  |
+
 ## Migrating from old style tags
 
 Older versions of Cucumber used a different syntax for tags. The list below

diff --git a/go/parser.go b/go/parser.go
@@ -16,7 +16,10 @@ type Evaluatable interface {
 }
 
 func Parse(infix string) (Evaluatable, error) {
-	tokens := tokenize(infix)
+	tokens, err := tokenize(infix)
+	if err != nil {
+		return nil, err
+	}
 	if len(tokens) == 0 {
 		return &trueExpr{}, nil
 	}
@@ -96,51 +99,38 @@ var PREC = map[string]int{
 	"not": 2,
 }
 
-func tokenize(expr string) []string {
+func tokenize(expr string) ([]string, error) {
 	var tokens []string
 	var token bytes.Buffer
 
-	collectToken := func() {
-		if token.Len() > 0 {
-			tokens = append(tokens, token.String())
-			token.Reset()
-		}
-	}
-
 	escaped := false
 	for _, c := range expr {
-		if unicode.IsSpace(c) {
-			collectToken()
-			escaped = false
-			continue
-		}
-
-		ch := string(c)
-
-		switch ch {
-		case "\\":
-			if escaped {
-				token.WriteString(ch)
+		if escaped {
+			if c == '(' || c == ')' || c == '\\' || unicode.IsSpace(c) {
+				token.WriteRune(c)
 				escaped = false
 			} else {
-				escaped = true
+				return nil, fmt.Errorf("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, string(c))
 			}
-		case "(", ")":
-			if escaped {
-				token.WriteString(ch)
-				escaped = false
-			} else {
-				collectToken()
-				tokens = append(tokens, ch)
+		} else if c == '\\' {
+			escaped = true
+		} else if c == '(' || c == ')' || unicode.IsSpace(c) {
+			if token.Len() > 0 {
+				tokens = append(tokens, token.String())
+				token.Reset()
 			}
-		default:
-			token.WriteString(ch)
-			escaped = false
+			if !unicode.IsSpace(c) {
+				tokens = append(tokens, string(c))
+			}
+		} else {
+			token.WriteRune(c)
 		}
 	}
+	if token.Len() > 0 {
+		tokens = append(tokens, token.String())
+	}
 
-	collectToken()
-	return tokens
+	return tokens, nil
 }
 
 func isUnary(token string) bool {
@@ -197,17 +187,11 @@ func (l *literalExpr) Evaluate(variables []string) bool {
 }
 
 func (l *literalExpr) ToString() string {
-	return strings.Replace(
-		strings.Replace(
-			strings.Replace(l.value, "\\", "\\\\", -1),
-			"(",
-			"\\(",
-			-1,
-		),
-		")",
-		"\\)",
-		-1,
-	)
+	s1 := l.value
+	s2 := strings.Replace(s1, "\\", "\\\\", -1)
+	s3 := strings.Replace(s2, "(", "\\(", -1)
+	s4 := strings.Replace(s3, ")", "\\)", -1)
+	return strings.Replace(s4, " ", "\\ ", -1)
 }
 
 type orExpr struct {

diff --git a/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java b/java/src/main/java/io/cucumber/tagexpressions/TagExpressionParser.java
@@ -6,6 +6,8 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public final class TagExpressionParser {
     private static final Map<String, Assoc> ASSOC = new HashMap<String, Assoc>() {{
@@ -24,16 +26,16 @@ public final class TagExpressionParser {
     private final String infix;
 
     public static Expression parse(String infix) {
-	return new TagExpressionParser(infix).parse();
+        return new TagExpressionParser(infix).parse();
     }
 
     private TagExpressionParser(String infix) {
-	this.infix = infix;
+        this.infix = infix;
     }
 
     private Expression parse() {
         List<String> tokens = tokenize(infix);
-        if(tokens.isEmpty()) return new True();
+        if (tokens.isEmpty()) return new True();
 
         Deque<String> operators = new ArrayDeque<>();
         Deque<Expression> expressions = new ArrayDeque<>();
@@ -49,7 +51,7 @@ private Expression parse() {
                         (ASSOC.get(token) == Assoc.LEFT && PREC.get(token) <= PREC.get(operators.peek()))
                                 ||
                                 (ASSOC.get(token) == Assoc.RIGHT && PREC.get(token) < PREC.get(operators.peek())))
-                        ) {
+                ) {
                     pushExpr(pop(operators), expressions);
                 }
                 operators.push(token);
@@ -89,43 +91,32 @@ private Expression parse() {
 
     private static List<String> tokenize(String expr) {
         List<String> tokens = new ArrayList<>();
-
         boolean isEscaped = false;
-        StringBuilder token = null;
+        StringBuilder token = new StringBuilder();
         for (int i = 0; i < expr.length(); i++) {
             char c = expr.charAt(i);
-            if (ESCAPING_CHAR == c && !isEscaped) {
-                isEscaped = true;
-            } else {
-                if (Character.isWhitespace(c)) { // skip
-                    if (null != token) { // end of token
-                        tokens.add(token.toString());
-                        token = null;
-                    }
+            if (isEscaped) {
+                if (c == '(' || c == ')' || c == '\\' || Character.isWhitespace(c)) {
+                    token.append(c);
+                    isEscaped = false;
                 } else {
-                    switch (c) {
-                        case '(':
-                        case ')':
-                            if (!isEscaped) {
-                                if (null != token) { // end of token
-                                    tokens.add(token.toString());
-                                    token = null;
-                                }
-                                tokens.add(String.valueOf(c));
-                                break;
-                            }
-                        default:
-                            if (null == token) { // start of token
-                                token = new StringBuilder();
-                            }
-                            token.append(c);
-                            break;
-                    }
+                    throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of syntax error: Illegal escape before \"%s\".", expr, c);
                 }
-                isEscaped = false;
+            } else if (c == ESCAPING_CHAR) {
+                isEscaped = true;
+            } else if (c == '(' || c == ')' || Character.isWhitespace(c)) {
+                if (token.length() > 0) {
+                    tokens.add(token.toString());
+                    token = new StringBuilder();
+                }
+                if (!Character.isWhitespace(c)) {
+                    tokens.add(String.valueOf(c));
+                }
+            } else {
+                token.append(c);
             }
         }
-        if (null != token) { // end of token
+        if (token.length() > 0) {
             tokens.add(token.toString());
         }
         return tokens;
@@ -138,7 +129,8 @@ private void check(TokenType expectedTokenType, TokenType tokenType) {
     }
 
     private <T> T pop(Deque<T> stack) {
-        if (stack.isEmpty()) throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
+        if (stack.isEmpty())
+            throw new TagExpressionException("Tag expression \"%s\" could not be parsed because of an empty stack", infix);
         return stack.pop();
     }
 
@@ -197,7 +189,11 @@ public boolean evaluate(List<String> variables) {
 
         @Override
         public String toString() {
-            return value.replaceAll("\\\\", "\\\\\\\\").replaceAll("\\(", "\\\\(").replaceAll("\\)", "\\\\)");
+            return value
+                    .replaceAll(Pattern.quote("\\"), Matcher.quoteReplacement("\\\\"))
+                    .replaceAll(Pattern.quote("("), Matcher.quoteReplacement("\\("))
+                    .replaceAll(Pattern.quote(")"), Matcher.quoteReplacement("\\)"))
+                    .replaceAll("\\s", "\\\\ ");
         }
     }
 

diff --git a/javascript/src/index.ts b/javascript/src/index.ts
@@ -55,7 +55,7 @@ export default function parse(infix: string): Node {
         pushExpr(pop(operators), expressions)
       }
       if (operators.length === 0) {
-        throw Error(
+        throw new Error(
           `Tag expression "${infix}" could not be parsed because of syntax error: Unmatched ).`
         )
       }
@@ -72,7 +72,7 @@ export default function parse(infix: string): Node {
 
   while (operators.length > 0) {
     if (peek(operators) === '(') {
-      throw Error(
+      throw new Error(
         `Tag expression "${infix}" could not be parsed because of syntax error: Unmatched (.`
       )
     }
@@ -93,36 +93,33 @@ export default function parse(infix: string): Node {
 function tokenize(expr: string): string[] {
   const tokens = []
   let isEscaped = false
-  let token: string[] | undefined
+  let token: string[] = []
   for (let i = 0; i < expr.length; i++) {
     const c = expr.charAt(i)
-    if ('\\' === c && !isEscaped) {
-      isEscaped = true
-    } else {
-      if (/\s/.test(c)) {
-        // skip
-        if (token) {
-          // end of token
-          tokens.push(token.join(''))
-          token = undefined
-        }
-      } else {
-        if ((c === '(' || c === ')') && !isEscaped) {
-          if (token) {
-            // end of token
-            tokens.push(token.join(''))
-            token = undefined
-          }
-          tokens.push(c)
-          continue
-        }
-        token = token ? token : [] // start of token
+    if (isEscaped) {
+      if (c === '(' || c === ')' || c === '\\' || /\s/.test(c)) {
         token.push(c)
+        isEscaped = false
+      } else {
+        throw new Error(
+          `Tag expression "${expr}" could not be parsed because of syntax error: Illegal escape before "${c}".`
+        )
+      }
+    } else if (c === '\\') {
+      isEscaped = true
+    } else if (c === '(' || c === ')' || /\s/.test(c)) {
+      if (token.length > 0) {
+        tokens.push(token.join(''))
+        token = []
       }
-      isEscaped = false
+      if (!/\s/.test(c)) {
+        tokens.push(c)
+      }
+    } else {
+      token.push(c)
     }
   }
-  if (token) {
+  if (token.length > 0) {
     tokens.push(token.join(''))
   }
   return tokens
@@ -177,7 +174,11 @@ class Literal implements Node {
   }
 
   public toString() {
-    return this.value.replace(/\\/g, '\\\\').replace(/\(/g, '\\(').replace(/\)/g, '\\)')
+    return this.value
+      .replace(/\\/g, '\\\\')
+      .replace(/\(/g, '\\(')
+      .replace(/\)/g, '\\)')
+      .replace(/\s/g, '\\ ')
   }
 }
 

diff --git a/ruby/lib/cucumber/tag_expressions/expressions.rb b/ruby/lib/cucumber/tag_expressions/expressions.rb
@@ -11,7 +11,11 @@ def evaluate(variables)
       end
 
       def to_s
-        @value.gsub(/\\/, "\\\\\\\\").gsub(/\(/, "\\(").gsub(/\)/, "\\)")
+        @value
+          .gsub(/\\/, "\\\\\\\\")
+          .gsub(/\(/, "\\(")
+          .gsub(/\)/, "\\)")
+          .gsub(/\s/, "\\ ")
       end
     end