Skip to content

Commit f75615b

Browse files
authored
Merge pull request #17822 from github/tausbn/python-more-parser-fixes
Python: A few more parser fixes
2 parents 2b678c9 + 5db601a commit f75615b

File tree

15 files changed

+32137
-34407
lines changed

15 files changed

+32137
-34407
lines changed

python/extractor/semmle/python/parser/dump_ast.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,27 @@ def visit(self, node, level=0, visited=None):
9797

9898

9999
class StdoutLogger(logging.Logger):
100+
error_count = 0
100101
def log(self, level, fmt, *args):
101102
sys.stdout.write(fmt % args + "\n")
102103

104+
def info(self, fmt, *args):
105+
self.log(logging.INFO, fmt, *args)
106+
107+
def warn(self, fmt, *args):
108+
self.log(logging.WARN, fmt, *args)
109+
self.error_count += 1
110+
111+
def error(self, fmt, *args):
112+
self.log(logging.ERROR, fmt, *args)
113+
self.error_count += 1
114+
115+
def had_errors(self):
116+
return self.error_count > 0
117+
118+
def reset_error_count(self):
119+
self.error_count = 0
120+
103121
def old_parser(inputfile, logger):
104122
mod = PythonSourceModule(None, inputfile, logger)
105123
logger.close()

python/extractor/semmle/python/parser/tsg_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ def concatenate_stringparts(stringparts, logger):
440440
try:
441441
return "".join(decode_str(stringpart.s) for stringpart in stringparts)
442442
except Exception as ex:
443-
logger.error("Unable to concatenate string %s getting error %s", stringparts, ex)
443+
logger.error("Unable to concatenate string {} getting error {}".format(stringparts, ex))
444444
return stringparts[0].s
445445

446446

python/extractor/tests/parser/assignment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@
1414

1515
s, *t = u
1616

17+
[v, *w] = x
18+
1719
o,p, = q,r,

python/extractor/tests/parser/comprehensions.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,13 @@
5555
t = tuple(x for y in z)
5656

5757
[( t, ) for v in w]
58+
59+
[# comment
60+
a for b in c # comment
61+
# comment
62+
] # comment
63+
64+
[# comment
65+
d for e in f if g # comment
66+
# comment
67+
] # comment

python/extractor/tests/parser/exception_groups_new.expected

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Module: [1, 0] - [22, 0]
1+
Module: [1, 0] - [27, 0]
22
body: [
33
Try: [1, 0] - [1, 4]
44
body: [
@@ -133,4 +133,24 @@ Module: [1, 0] - [22, 0]
133133
variable: Variable('v', None)
134134
ctx: Load
135135
]
136+
Try: [23, 0] - [23, 4]
137+
body: [
138+
Pass: [24, 4] - [24, 8]
139+
]
140+
orelse: []
141+
handlers: [
142+
ExceptGroupStmt: [25, 0] - [26, 8]
143+
type:
144+
Name: [25, 8] - [25, 11]
145+
variable: Variable('foo', None)
146+
ctx: Load
147+
name:
148+
Name: [25, 15] - [25, 16]
149+
variable: Variable('e', None)
150+
ctx: Store
151+
body: [
152+
Pass: [26, 4] - [26, 8]
153+
]
154+
]
155+
finalbody: []
136156
]

python/extractor/tests/parser/exception_groups_new.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,8 @@
1919
finally:
2020
u
2121
v
22+
23+
try:
24+
pass
25+
except *foo as e:
26+
pass
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
Module: [1, 0] - [3, 0]
2+
body: [
3+
Assign: [1, 0] - [1, 42]
4+
targets: [
5+
Name: [1, 4] - [1, 26]
6+
variable: Variable('tuple_typed_list_splat', None)
7+
ctx: Store
8+
]
9+
value:
10+
FunctionExpr: [1, 0] - [1, 42]
11+
name: 'tuple_typed_list_splat'
12+
args:
13+
arguments
14+
defaults: []
15+
kw_defaults: []
16+
annotations: []
17+
varargannotation:
18+
Starred: [1, 35] - [1, 40]
19+
value:
20+
Name: [1, 36] - [1, 40]
21+
variable: Variable('ARGS', None)
22+
ctx: Load
23+
ctx: Load
24+
kwargannotation: None
25+
kw_annotations: []
26+
returns: None
27+
inner_scope:
28+
Function: [1, 0] - [1, 42]
29+
name: 'tuple_typed_list_splat'
30+
type_parameters: []
31+
args: []
32+
vararg:
33+
Name: [1, 28] - [1, 32]
34+
variable: Variable('args', None)
35+
ctx: Param
36+
kwonlyargs: []
37+
kwarg: None
38+
body: [
39+
Pass: [2, 4] - [2, 8]
40+
]
41+
]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def tuple_typed_list_splat(*args : *ARGS):
2+
pass

python/extractor/tests/test_parser.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ def compare_parses(self, filename, logger):
4949
diff = e.output
5050
if diff:
5151
pytest.fail(diff.decode("utf-8"))
52+
self.check_for_stdout_errors(logger)
53+
5254
self.assertEqual(self.capsys.readouterr().err, "")
5355
os.remove(oldfile)
5456
os.remove(newfile)
@@ -84,9 +86,15 @@ def compare_expected(self, filename, logger, new=True ):
8486
diff = e.output
8587
if diff:
8688
pytest.fail(diff.decode("utf-8"))
89+
90+
self.check_for_stdout_errors(logger)
8791
self.assertEqual(self.capsys.readouterr().err, "")
8892
os.remove(actual)
8993

94+
def check_for_stdout_errors(self, logger):
95+
if logger.had_errors():
96+
logger.reset_error_count()
97+
pytest.fail("Errors/warnings were logged to stdout during testing.")
9098

9199
def setup_tests():
92100
test_folder = os.path.join(os.path.dirname(__file__), "parser")

python/extractor/tsg-python/python.tsg

Lines changed: 40 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
[ (expression_list) (tuple) (tuple_pattern) (pattern_list) ] @tuple
2626
{ let @tuple.node = (ast-node @tuple "Tuple") }
2727

28+
(list_pattern) @list
29+
{ let @list.node = (ast-node @list "List") }
30+
2831
(call) @call { let @call.node = (ast-node @call "Call") }
2932

3033
(for_statement) @for
@@ -1059,30 +1062,38 @@
10591062
let @genexpr.result = tuple
10601063
}
10611064

1062-
; For the final `if` clause, we need to hook it up with the `yield` expression and with its associated `for` clause.
1065+
; For the final clause, we need to hook it up with the rest of the expression.
1066+
; If it's an `if` clause, we need to hook it up with the `yield` expression and with its associated
1067+
; `for` clause.
1068+
; If it's a `for` clause, we only need to create and hook it up with the `yield` expression.
1069+
;
1070+
; It would be tempting to use anchors here, but they just don't work. In particular, an anchor of
1071+
; the form `. (comment)* . )` (which would be needed in order to handle the case where there are
1072+
; comments after the last clause) cause the `tree-sitter` query engine to match _all_ clauses, not
1073+
; just the last one.
1074+
; Instead, we gather up all clauses in a list (these will be in the order they appear in the source
1075+
; code), and extract the last element using a custom Rust function.
10631076
[
10641077
(generator_expression
10651078
body: (_) @body
1066-
(if_clause) @last
1067-
.
1079+
[(if_clause) (for_in_clause)]+ @last_candidates
10681080
) @genexpr
10691081
(list_comprehension
10701082
body: (_) @body
1071-
(if_clause) @last
1072-
.
1083+
[(if_clause) (for_in_clause)]+ @last_candidates
10731084
) @genexpr
10741085
(set_comprehension
10751086
body: (_) @body
1076-
(if_clause) @last
1077-
.
1087+
[(if_clause) (for_in_clause)]+ @last_candidates
10781088
) @genexpr
10791089
(dictionary_comprehension
10801090
body: (_) @body
1081-
(if_clause) @last
1082-
.
1091+
[(if_clause) (for_in_clause)]+ @last_candidates
10831092
) @genexpr
10841093
]
10851094
{
1095+
let last = (get-last-element @last_candidates)
1096+
10861097
let expr = (ast-node @body "Expr")
10871098
let yield = (ast-node @body "Yield")
10881099

@@ -1093,50 +1104,19 @@
10931104

10941105
attr (yield) value = @genexpr.result
10951106
attr (@body.node) ctx = "load"
1096-
edge @last.first_if -> expr
1097-
attr (@last.first_if -> expr) body = 0
10981107

1099-
; Hook up this `if` clause with its `for` clause
1100-
edge @last.for -> @last.node
1101-
attr (@last.for -> @last.node) body = 0
1102-
}
1108+
if (instance-of last "if_clause") {
1109+
edge last.first_if -> expr
1110+
attr (last.first_if -> expr) body = 0
11031111

1104-
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
1105-
[
1106-
(generator_expression
1107-
body: (_) @body
1108-
(for_in_clause) @last
1109-
.
1110-
) @genexpr
1111-
(list_comprehension
1112-
body: (_) @body
1113-
(for_in_clause) @last
1114-
.
1115-
) @genexpr
1116-
(set_comprehension
1117-
body: (_) @body
1118-
(for_in_clause) @last
1119-
.
1120-
) @genexpr
1121-
(dictionary_comprehension
1122-
body: (_) @body
1123-
(for_in_clause) @last
1124-
.
1125-
) @genexpr
1126-
]
1127-
{
1128-
let expr = (ast-node @body "Expr")
1129-
let yield = (ast-node @body "Yield")
1130-
1131-
let @genexpr.expr = expr
1132-
let @genexpr.yield = yield
1133-
1134-
attr (expr) value = yield
1135-
1136-
attr (yield) value = @genexpr.result
1137-
attr (@body.node) ctx = "load"
1138-
edge @last.node -> expr
1139-
attr (@last.node -> expr) body = 0
1112+
; Hook up this `if` clause with its `for` clause
1113+
edge last.for -> last.node
1114+
attr (last.for -> last.node) body = 0
1115+
} else {
1116+
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
1117+
edge last.node -> expr
1118+
attr (last.node -> expr) body = 0
1119+
}
11401120
}
11411121

11421122
; For whatever reason, we do not consider parentheses around the yielded expression if they are present, so
@@ -3180,11 +3160,11 @@
31803160
(typed_parameter
31813161
(identifier) @name
31823162
.
3183-
type: (type (expression) @type)
3163+
type: (type (_) @type)
31843164
)
31853165
(typed_default_parameter
31863166
name: (_) @name
3187-
type: (type (expression) @type)
3167+
type: (type (_) @type)
31883168
value: (_) @value
31893169
)
31903170
] @param
@@ -3239,7 +3219,7 @@
32393219
(list_splat_pattern vararg: (_) @name) @starred
32403220
(typed_parameter
32413221
(list_splat_pattern vararg: (_) @name) @starred
3242-
type: (type (expression) @type)
3222+
type: (type (_) @type)
32433223
)
32443224
]
32453225
) @params
@@ -3256,7 +3236,7 @@
32563236

32573237
; Return type
32583238
(function_definition
3259-
return_type: (type (expression) @type)
3239+
return_type: (type (_) @type)
32603240
) @funcdef
32613241
{
32623242
attr (@funcdef.funcexpr) returns = @type.node
@@ -3270,7 +3250,7 @@
32703250
(dictionary_splat_pattern kwarg: (identifier) @name)
32713251
(typed_parameter
32723252
(dictionary_splat_pattern kwarg: (identifier) @name)
3273-
type: (type (expression) @type)
3253+
type: (type (_) @type)
32743254
)
32753255
]
32763256
) @params
@@ -3447,6 +3427,9 @@
34473427
; Left hand side of an assignment such as `foo, bar = ...`
34483428
(pattern_list element: (_) @elt) @parent
34493429

3430+
; Left hand side of an assignment such as `[foo, bar] = ...`
3431+
(list_pattern element: (_) @elt) @parent
3432+
34503433
; An unadorned tuple (such as in `x = y, z`)
34513434
(expression_list element: (_) @elt) @parent
34523435

@@ -3483,6 +3466,7 @@
34833466
(tuple element: (_) @elt)
34843467
(tuple_pattern element: (_) @elt)
34853468
(pattern_list element: (_) @elt)
3469+
(list_pattern element: (_) @elt)
34863470
(expression_list element: (_) @elt)
34873471
(parenthesized_expression inner: (_) @elt)
34883472
(set element: (_) @elt)

python/extractor/tsg-python/src/main.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,22 @@ pub mod extra_functions {
463463
Ok(Value::Integer(left % right))
464464
}
465465
}
466+
467+
pub struct GetLastElement;
468+
469+
impl Function for GetLastElement {
470+
fn call(
471+
&self,
472+
_graph: &mut Graph,
473+
_source: &str,
474+
parameters: &mut dyn Parameters,
475+
) -> Result<Value, ExecutionError> {
476+
let list = parameters.param()?.into_list()?;
477+
parameters.finish()?;
478+
let last = list.last().unwrap_or(&Value::Null).clone();
479+
Ok(last)
480+
}
481+
}
466482
}
467483

468484
fn main() -> Result<()> {
@@ -562,6 +578,12 @@ fn main() -> Result<()> {
562578
);
563579

564580
functions.add(Identifier::from("mod"), extra_functions::Modulo);
581+
582+
functions.add(
583+
Identifier::from("get-last-element"),
584+
extra_functions::GetLastElement,
585+
);
586+
565587
let globals = Variables::new();
566588
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
567589
let graph = file

0 commit comments

Comments
 (0)