openqasm
diff --git a/‎releasenotes/notes/indented-annotations-661bf85c6702a134.yaml
Lines changed: 17 additions & 0 deletions b/‎releasenotes/notes/indented-annotations-661bf85c6702a134.yaml
Lines changed: 17 additions & 0 deletions
diff --git a/‎scripts/bless_examples.py
Lines changed: 34 additions & 0 deletions b/‎scripts/bless_examples.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎src/openqasm_pygments/qasm3.py
Lines changed: 14 additions & 4 deletions b/‎src/openqasm_pygments/qasm3.py
Lines changed: 14 additions & 4 deletions
diff --git a/‎tests/examples/qasm3/adder.qasm.output
Lines changed: 16 additions & 8 deletions b/‎tests/examples/qasm3/adder.qasm.output
Lines changed: 16 additions & 8 deletions
diff --git a/‎tests/examples/qasm3/arrays.qasm.output
Lines changed: 18 additions & 9 deletions b/‎tests/examples/qasm3/arrays.qasm.output
Lines changed: 18 additions & 9 deletions
diff --git a/‎tests/examples/qasm3/cphase.qasm.output
Lines changed: 10 additions & 5 deletions b/‎tests/examples/qasm3/cphase.qasm.output
Lines changed: 10 additions & 5 deletions
@@ -0,0 +1,17 @@
+---
+features:
+  - |
+    Annotations and pragmas that are indented will now tokenise correctly, without including the
+    leading whitespace of the line.
+fixes:
+  - |
+    The free-form payloads of annotations and pragmas will now no longer include preceding spaces
+    that only separate the payload from the annotation/pragma keywords.
+  - |
+    Indented annotations and pragmas will now more reliably tokenise correctly; previously, they
+    were highly sensitive to whitespace on the preceding lines.
+other:
+  - |
+    Whitespace tokens now split after newlines, except if the following character is also a newline
+    character.  This is so that newline-sensitive tokenisation like annotations and pragmas can
+    match correctly.
@@ -0,0 +1,34 @@
+# Rewrite the output files in the `tests/examples` with the new output of the lexer.  The resulting
+# output should be checked that it is actually correct, since this blesses things for the new test
+# suite.
+
+import pathlib
+import openqasm_pygments
+
+
+def rewrite(fname, lexer):
+    with open(fname, "r", encoding="utf-8") as fptr:
+        content = fptr.read().strip()
+    return "\n".join(
+        f"{repr(token):<19s} {ttype}" for ttype, token in lexer.get_tokens(content)
+    )
+
+
+if __name__ == "__main__":
+    repo_root = pathlib.Path(__file__).parents[1]
+    examples_dir = repo_root / "tests" / "examples"
+    configs = [
+        (examples_dir / "qasm2", (".qasm", ".inc"), openqasm_pygments.OpenQASM2Lexer()),
+        (examples_dir / "qasm3", (".qasm",), openqasm_pygments.OpenQASM3Lexer()),
+        (
+            examples_dir / "openqasm",
+            (".openpulse",),
+            openqasm_pygments.OpenPulseLexer(),
+        ),
+    ]
+    for dir_, suffixes, lexer in configs:
+        for suffix in suffixes:
+            for file in dir_.glob(f"**/*{suffix}"):
+                new_tokens = rewrite(file, lexer)
+                with open(str(file) + ".output", "w", encoding="utf-8") as fptr:
+                    print(new_tokens, file=fptr)
@@ -7,7 +7,7 @@
 from typing import Union, Mapping, Optional, Sequence, Tuple
 
 from pygments import token
-from pygments.lexer import Lexer, RegexLexer, words, include
+from pygments.lexer import Lexer, RegexLexer, words, include, bygroups
 from pygments.lexers import get_lexer_by_name
 from pygments.util import ClassNotFound
 
@@ -74,9 +74,19 @@ def _defcalgrammar_callback(self, match):
 
     tokens = {
         "root": [
-            (r"^[ \t]*#?pragma", token.Comment.Preproc, "pragma"),
-            (r"^[ \t]*@\w+(\.\w+)*", token.Name.Decorator, "annotation"),
-            (r"[ \r\n\t]+", token.Whitespace),
+            (
+                r"^([ \t]*)(#?pragma)([ \t]*)",
+                bygroups(token.Whitespace, token.Comment.Preproc, token.Whitespace),
+                "annotation",
+            ),
+            (
+                r"^([ \t]*)(@\w+(\.\w+)*)([ \t]*)",
+                bygroups(token.Whitespace, token.Name.Decorator, token.Whitespace),
+                "annotation",
+            ),
+            # Newline terminates the tokenisation so that new-line sensitive matches like annotations
+            # get to see the start-of-line character in their match.
+            (r"([ \r\t]+\n*)|(\n+)", token.Whitespace),
             (r"\bOPENQASM\b", token.Comment.Preproc, "version"),
             (r"//.*$", token.Comment.Single),
             (r"/\*", token.Comment.Multiline, "comment"),
 
@@ -31,23 +31,26 @@
 'c'                 Token.Name
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'cx'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'c'                 Token.Name
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 'b'                 Token.Name
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'cx'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'c'                 Token.Name
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'ccx'               Token.Name.Function
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
@@ -74,7 +77,8 @@
 'c'                 Token.Name
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'ccx'               Token.Name.Function
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
@@ -85,15 +89,17 @@
 ' '                 Token.Text.Whitespace
 'c'                 Token.Name
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'cx'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'c'                 Token.Name
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'cx'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
@@ -212,7 +218,8 @@
 ']'                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'if'                Token.Keyword
 '('                 Token.Punctuation
 'bool'              Token.Keyword.Type
@@ -231,7 +238,8 @@
 'i'                 Token.Name
 ']'                 Token.Punctuation
 ';'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'if'                Token.Keyword
 '('                 Token.Punctuation
 'bool'              Token.Keyword.Type
 
@@ -100,23 +100,26 @@
 '='                 Token.Operator
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '{'                 Token.Punctuation
 '0.5'               Token.Literal.Number.Float
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 '0.5'               Token.Literal.Number.Float
 '}'                 Token.Punctuation
 ','                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '{'                 Token.Punctuation
 '1.0'               Token.Literal.Number.Float
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 '2.0'               Token.Literal.Number.Float
 '}'                 Token.Punctuation
 ','                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '{'                 Token.Punctuation
 '-'                 Token.Operator
 '0.4'               Token.Literal.Number.Float
@@ -125,7 +128,8 @@
 '0.7'               Token.Literal.Number.Float
 '}'                 Token.Punctuation
 ','                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '{'                 Token.Punctuation
 '1.3'               Token.Literal.Number.Float
 ','                 Token.Punctuation
@@ -505,9 +509,11 @@
 ')'                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '// Within this block, ``in_array`` can be read from, but not written to,' Token.Comment.Single
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 '// whereas ``out_array`` can be both read from and written to.' Token.Comment.Single
 '\n'                Token.Text.Whitespace
 '}'                 Token.Punctuation
@@ -543,7 +549,8 @@
 ')'                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'uint'              Token.Keyword.Type
 '['                 Token.Punctuation
 '32'                Token.Literal.Number
@@ -561,7 +568,8 @@
 '0'                 Token.Literal.Number
 ')'                 Token.Punctuation
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'uint'              Token.Keyword.Type
 '['                 Token.Punctuation
 '32'                Token.Literal.Number
@@ -579,7 +587,8 @@
 '1'                 Token.Literal.Number
 ')'                 Token.Punctuation
 ';'                 Token.Punctuation
-'\n    '            Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'    '              Token.Text.Whitespace
 'uint'              Token.Keyword.Type
 '['                 Token.Punctuation
 '32'                Token.Literal.Number
 
@@ -11,7 +11,8 @@
 'b'                 Token.Name
 '\n'                Token.Text.Whitespace
 '{'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'U'                 Token.Name.Builtin
 '('                 Token.Punctuation
 '0'                 Token.Literal.Number
@@ -29,15 +30,17 @@
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
 ';'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'CX'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 'b'                 Token.Name
 ';'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'U'                 Token.Name.Builtin
 '('                 Token.Punctuation
 '0'                 Token.Literal.Number
@@ -56,15 +59,17 @@
 ' '                 Token.Text.Whitespace
 'b'                 Token.Name
 ';'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'CX'                Token.Name.Function
 ' '                 Token.Text.Whitespace
 'a'                 Token.Name
 ','                 Token.Punctuation
 ' '                 Token.Text.Whitespace
 'b'                 Token.Name
 ';'                 Token.Punctuation
-'\n  '              Token.Text.Whitespace
+'\n'                Token.Text.Whitespace
+'  '                Token.Text.Whitespace
 'U'                 Token.Name.Builtin
 '('                 Token.Punctuation
 '0'                 Token.Literal.Number