Skip to content

Commit 6c5a813

Browse files
authored
Fix indented annotations and pragmas (#8)
Pygments offers the `bygroups` helper to split simple context-sensitive tokens (like annotations) up into components based on regex groups, which lets us avoid including leading whitespace in annotation and pragma tokens while remaining newline-sensitive. The whitespace tokenisation needs to break after runs of newlines to allow the newline-sensitive tokens to detect that they _are_ at the start of a line.
1 parent d6d0e07 commit 6c5a813

16 files changed

+646
-269
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
features:
3+
- |
4+
Annotations and pragmas that are indented will now tokenise correctly, without including the
5+
leading whitespace of the line.
6+
fixes:
7+
- |
8+
The free-form payloads of annotations and pragmas will now no longer include preceding spaces
9+
that only separate the payload from the annotation/pragma keywords.
10+
- |
11+
Indented annotations and pragmas will now more reliably tokenise correctly; previously, they
12+
were highly sensitive to whitespace on the preceding lines.
13+
other:
14+
- |
15+
Whitespace tokens now split after newlines, except if the following character is also a newline
16+
character. This is so that newline-sensitive tokenisation like annotations and pragmas can
17+
match correctly.

scripts/bless_examples.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Rewrite the output files in the `tests/examples` with the new output of the lexer. The resulting
2+
# output should be checked that it is actually correct, since this blesses things for the new test
3+
# suite.
4+
5+
import pathlib
6+
import openqasm_pygments
7+
8+
9+
def rewrite(fname, lexer):
10+
with open(fname, "r", encoding="utf-8") as fptr:
11+
content = fptr.read().strip()
12+
return "\n".join(
13+
f"{repr(token):<19s} {ttype}" for ttype, token in lexer.get_tokens(content)
14+
)
15+
16+
17+
if __name__ == "__main__":
18+
repo_root = pathlib.Path(__file__).parents[1]
19+
examples_dir = repo_root / "tests" / "examples"
20+
configs = [
21+
(examples_dir / "qasm2", (".qasm", ".inc"), openqasm_pygments.OpenQASM2Lexer()),
22+
(examples_dir / "qasm3", (".qasm",), openqasm_pygments.OpenQASM3Lexer()),
23+
(
24+
examples_dir / "openqasm",
25+
(".openpulse",),
26+
openqasm_pygments.OpenPulseLexer(),
27+
),
28+
]
29+
for dir_, suffixes, lexer in configs:
30+
for suffix in suffixes:
31+
for file in dir_.glob(f"**/*{suffix}"):
32+
new_tokens = rewrite(file, lexer)
33+
with open(str(file) + ".output", "w", encoding="utf-8") as fptr:
34+
print(new_tokens, file=fptr)

src/openqasm_pygments/qasm3.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Union, Mapping, Optional, Sequence, Tuple
88

99
from pygments import token
10-
from pygments.lexer import Lexer, RegexLexer, words, include
10+
from pygments.lexer import Lexer, RegexLexer, words, include, bygroups
1111
from pygments.lexers import get_lexer_by_name
1212
from pygments.util import ClassNotFound
1313

@@ -74,9 +74,19 @@ def _defcalgrammar_callback(self, match):
7474

7575
tokens = {
7676
"root": [
77-
(r"^[ \t]*#?pragma", token.Comment.Preproc, "pragma"),
78-
(r"^[ \t]*@\w+(\.\w+)*", token.Name.Decorator, "annotation"),
79-
(r"[ \r\n\t]+", token.Whitespace),
77+
(
78+
r"^([ \t]*)(#?pragma)([ \t]*)",
79+
bygroups(token.Whitespace, token.Comment.Preproc, token.Whitespace),
80+
"annotation",
81+
),
82+
(
83+
r"^([ \t]*)(@\w+(\.\w+)*)([ \t]*)",
84+
bygroups(token.Whitespace, token.Name.Decorator, token.Whitespace),
85+
"annotation",
86+
),
87+
# Newline terminates the tokenisation so that new-line sensitive matches like annotations
88+
# get to see the start-of-line character in their match.
89+
(r"([ \r\t]+\n*)|(\n+)", token.Whitespace),
8090
(r"\bOPENQASM\b", token.Comment.Preproc, "version"),
8191
(r"//.*$", token.Comment.Single),
8292
(r"/\*", token.Comment.Multiline, "comment"),

tests/examples/qasm3/adder.qasm.output

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,23 +31,26 @@
3131
'c' Token.Name
3232
' ' Token.Text.Whitespace
3333
'{' Token.Punctuation
34-
'\n ' Token.Text.Whitespace
34+
'\n' Token.Text.Whitespace
35+
' ' Token.Text.Whitespace
3536
'cx' Token.Name.Function
3637
' ' Token.Text.Whitespace
3738
'c' Token.Name
3839
',' Token.Punctuation
3940
' ' Token.Text.Whitespace
4041
'b' Token.Name
4142
';' Token.Punctuation
42-
'\n ' Token.Text.Whitespace
43+
'\n' Token.Text.Whitespace
44+
' ' Token.Text.Whitespace
4345
'cx' Token.Name.Function
4446
' ' Token.Text.Whitespace
4547
'c' Token.Name
4648
',' Token.Punctuation
4749
' ' Token.Text.Whitespace
4850
'a' Token.Name
4951
';' Token.Punctuation
50-
'\n ' Token.Text.Whitespace
52+
'\n' Token.Text.Whitespace
53+
' ' Token.Text.Whitespace
5154
'ccx' Token.Name.Function
5255
' ' Token.Text.Whitespace
5356
'a' Token.Name
@@ -74,7 +77,8 @@
7477
'c' Token.Name
7578
' ' Token.Text.Whitespace
7679
'{' Token.Punctuation
77-
'\n ' Token.Text.Whitespace
80+
'\n' Token.Text.Whitespace
81+
' ' Token.Text.Whitespace
7882
'ccx' Token.Name.Function
7983
' ' Token.Text.Whitespace
8084
'a' Token.Name
@@ -85,15 +89,17 @@
8589
' ' Token.Text.Whitespace
8690
'c' Token.Name
8791
';' Token.Punctuation
88-
'\n ' Token.Text.Whitespace
92+
'\n' Token.Text.Whitespace
93+
' ' Token.Text.Whitespace
8994
'cx' Token.Name.Function
9095
' ' Token.Text.Whitespace
9196
'c' Token.Name
9297
',' Token.Punctuation
9398
' ' Token.Text.Whitespace
9499
'a' Token.Name
95100
';' Token.Punctuation
96-
'\n ' Token.Text.Whitespace
101+
'\n' Token.Text.Whitespace
102+
' ' Token.Text.Whitespace
97103
'cx' Token.Name.Function
98104
' ' Token.Text.Whitespace
99105
'a' Token.Name
@@ -212,7 +218,8 @@
212218
']' Token.Punctuation
213219
' ' Token.Text.Whitespace
214220
'{' Token.Punctuation
215-
'\n ' Token.Text.Whitespace
221+
'\n' Token.Text.Whitespace
222+
' ' Token.Text.Whitespace
216223
'if' Token.Keyword
217224
'(' Token.Punctuation
218225
'bool' Token.Keyword.Type
@@ -231,7 +238,8 @@
231238
'i' Token.Name
232239
']' Token.Punctuation
233240
';' Token.Punctuation
234-
'\n ' Token.Text.Whitespace
241+
'\n' Token.Text.Whitespace
242+
' ' Token.Text.Whitespace
235243
'if' Token.Keyword
236244
'(' Token.Punctuation
237245
'bool' Token.Keyword.Type

tests/examples/qasm3/arrays.qasm.output

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,23 +100,26 @@
100100
'=' Token.Operator
101101
' ' Token.Text.Whitespace
102102
'{' Token.Punctuation
103-
'\n ' Token.Text.Whitespace
103+
'\n' Token.Text.Whitespace
104+
' ' Token.Text.Whitespace
104105
'{' Token.Punctuation
105106
'0.5' Token.Literal.Number.Float
106107
',' Token.Punctuation
107108
' ' Token.Text.Whitespace
108109
'0.5' Token.Literal.Number.Float
109110
'}' Token.Punctuation
110111
',' Token.Punctuation
111-
'\n ' Token.Text.Whitespace
112+
'\n' Token.Text.Whitespace
113+
' ' Token.Text.Whitespace
112114
'{' Token.Punctuation
113115
'1.0' Token.Literal.Number.Float
114116
',' Token.Punctuation
115117
' ' Token.Text.Whitespace
116118
'2.0' Token.Literal.Number.Float
117119
'}' Token.Punctuation
118120
',' Token.Punctuation
119-
'\n ' Token.Text.Whitespace
121+
'\n' Token.Text.Whitespace
122+
' ' Token.Text.Whitespace
120123
'{' Token.Punctuation
121124
'-' Token.Operator
122125
'0.4' Token.Literal.Number.Float
@@ -125,7 +128,8 @@
125128
'0.7' Token.Literal.Number.Float
126129
'}' Token.Punctuation
127130
',' Token.Punctuation
128-
'\n ' Token.Text.Whitespace
131+
'\n' Token.Text.Whitespace
132+
' ' Token.Text.Whitespace
129133
'{' Token.Punctuation
130134
'1.3' Token.Literal.Number.Float
131135
',' Token.Punctuation
@@ -505,9 +509,11 @@
505509
')' Token.Punctuation
506510
' ' Token.Text.Whitespace
507511
'{' Token.Punctuation
508-
'\n ' Token.Text.Whitespace
512+
'\n' Token.Text.Whitespace
513+
' ' Token.Text.Whitespace
509514
'// Within this block, ``in_array`` can be read from, but not written to,' Token.Comment.Single
510-
'\n ' Token.Text.Whitespace
515+
'\n' Token.Text.Whitespace
516+
' ' Token.Text.Whitespace
511517
'// whereas ``out_array`` can be both read from and written to.' Token.Comment.Single
512518
'\n' Token.Text.Whitespace
513519
'}' Token.Punctuation
@@ -543,7 +549,8 @@
543549
')' Token.Punctuation
544550
' ' Token.Text.Whitespace
545551
'{' Token.Punctuation
546-
'\n ' Token.Text.Whitespace
552+
'\n' Token.Text.Whitespace
553+
' ' Token.Text.Whitespace
547554
'uint' Token.Keyword.Type
548555
'[' Token.Punctuation
549556
'32' Token.Literal.Number
@@ -561,7 +568,8 @@
561568
'0' Token.Literal.Number
562569
')' Token.Punctuation
563570
';' Token.Punctuation
564-
'\n ' Token.Text.Whitespace
571+
'\n' Token.Text.Whitespace
572+
' ' Token.Text.Whitespace
565573
'uint' Token.Keyword.Type
566574
'[' Token.Punctuation
567575
'32' Token.Literal.Number
@@ -579,7 +587,8 @@
579587
'1' Token.Literal.Number
580588
')' Token.Punctuation
581589
';' Token.Punctuation
582-
'\n ' Token.Text.Whitespace
590+
'\n' Token.Text.Whitespace
591+
' ' Token.Text.Whitespace
583592
'uint' Token.Keyword.Type
584593
'[' Token.Punctuation
585594
'32' Token.Literal.Number

tests/examples/qasm3/cphase.qasm.output

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
'b' Token.Name
1212
'\n' Token.Text.Whitespace
1313
'{' Token.Punctuation
14-
'\n ' Token.Text.Whitespace
14+
'\n' Token.Text.Whitespace
15+
' ' Token.Text.Whitespace
1516
'U' Token.Name.Builtin
1617
'(' Token.Punctuation
1718
'0' Token.Literal.Number
@@ -29,15 +30,17 @@
2930
' ' Token.Text.Whitespace
3031
'a' Token.Name
3132
';' Token.Punctuation
32-
'\n ' Token.Text.Whitespace
33+
'\n' Token.Text.Whitespace
34+
' ' Token.Text.Whitespace
3335
'CX' Token.Name.Function
3436
' ' Token.Text.Whitespace
3537
'a' Token.Name
3638
',' Token.Punctuation
3739
' ' Token.Text.Whitespace
3840
'b' Token.Name
3941
';' Token.Punctuation
40-
'\n ' Token.Text.Whitespace
42+
'\n' Token.Text.Whitespace
43+
' ' Token.Text.Whitespace
4144
'U' Token.Name.Builtin
4245
'(' Token.Punctuation
4346
'0' Token.Literal.Number
@@ -56,15 +59,17 @@
5659
' ' Token.Text.Whitespace
5760
'b' Token.Name
5861
';' Token.Punctuation
59-
'\n ' Token.Text.Whitespace
62+
'\n' Token.Text.Whitespace
63+
' ' Token.Text.Whitespace
6064
'CX' Token.Name.Function
6165
' ' Token.Text.Whitespace
6266
'a' Token.Name
6367
',' Token.Punctuation
6468
' ' Token.Text.Whitespace
6569
'b' Token.Name
6670
';' Token.Punctuation
67-
'\n ' Token.Text.Whitespace
71+
'\n' Token.Text.Whitespace
72+
' ' Token.Text.Whitespace
6873
'U' Token.Name.Builtin
6974
'(' Token.Punctuation
7075
'0' Token.Literal.Number

0 commit comments

Comments
 (0)