Skip to content

Python: A few more parser fixes #17822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions python/extractor/semmle/python/parser/dump_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,27 @@ def visit(self, node, level=0, visited=None):


class StdoutLogger(logging.Logger):
error_count = 0
def log(self, level, fmt, *args):
sys.stdout.write(fmt % args + "\n")

def info(self, fmt, *args):
self.log(logging.INFO, fmt, *args)

def warn(self, fmt, *args):
self.log(logging.WARN, fmt, *args)
self.error_count += 1

def error(self, fmt, *args):
self.log(logging.ERROR, fmt, *args)
self.error_count += 1

def had_errors(self):
return self.error_count > 0

def reset_error_count(self):
self.error_count = 0

def old_parser(inputfile, logger):
mod = PythonSourceModule(None, inputfile, logger)
logger.close()
Expand Down
2 changes: 1 addition & 1 deletion python/extractor/semmle/python/parser/tsg_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,7 @@ def concatenate_stringparts(stringparts, logger):
try:
return "".join(decode_str(stringpart.s) for stringpart in stringparts)
except Exception as ex:
logger.error("Unable to concatenate string %s getting error %s", stringparts, ex)
logger.error("Unable to concatenate string {} getting error {}".format(stringparts, ex))
return stringparts[0].s


Expand Down
2 changes: 2 additions & 0 deletions python/extractor/tests/parser/assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@

s, *t = u

[v, *w] = x

o,p, = q,r,
10 changes: 10 additions & 0 deletions python/extractor/tests/parser/comprehensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,13 @@
t = tuple(x for y in z)

[( t, ) for v in w]

[# comment
a for b in c # comment
# comment
] # comment

[# comment
d for e in f if g # comment
# comment
] # comment
22 changes: 21 additions & 1 deletion python/extractor/tests/parser/exception_groups_new.expected
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Module: [1, 0] - [22, 0]
Module: [1, 0] - [27, 0]
body: [
Try: [1, 0] - [1, 4]
body: [
Expand Down Expand Up @@ -133,4 +133,24 @@ Module: [1, 0] - [22, 0]
variable: Variable('v', None)
ctx: Load
]
Try: [23, 0] - [23, 4]
body: [
Pass: [24, 4] - [24, 8]
]
orelse: []
handlers: [
ExceptGroupStmt: [25, 0] - [26, 8]
type:
Name: [25, 8] - [25, 11]
variable: Variable('foo', None)
ctx: Load
name:
Name: [25, 15] - [25, 16]
variable: Variable('e', None)
ctx: Store
body: [
Pass: [26, 4] - [26, 8]
]
]
finalbody: []
]
5 changes: 5 additions & 0 deletions python/extractor/tests/parser/exception_groups_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,8 @@
finally:
u
v

try:
pass
except *foo as e:
pass
41 changes: 41 additions & 0 deletions python/extractor/tests/parser/functions_new.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
Module: [1, 0] - [3, 0]
body: [
Assign: [1, 0] - [1, 42]
targets: [
Name: [1, 4] - [1, 26]
variable: Variable('tuple_typed_list_splat', None)
ctx: Store
]
value:
FunctionExpr: [1, 0] - [1, 42]
name: 'tuple_typed_list_splat'
args:
arguments
defaults: []
kw_defaults: []
annotations: []
varargannotation:
Starred: [1, 35] - [1, 40]
value:
Name: [1, 36] - [1, 40]
variable: Variable('ARGS', None)
ctx: Load
ctx: Load
kwargannotation: None
kw_annotations: []
returns: None
inner_scope:
Function: [1, 0] - [1, 42]
name: 'tuple_typed_list_splat'
type_parameters: []
args: []
vararg:
Name: [1, 28] - [1, 32]
variable: Variable('args', None)
ctx: Param
kwonlyargs: []
kwarg: None
body: [
Pass: [2, 4] - [2, 8]
]
]
2 changes: 2 additions & 0 deletions python/extractor/tests/parser/functions_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def tuple_typed_list_splat(*args : *ARGS):
pass
8 changes: 8 additions & 0 deletions python/extractor/tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def compare_parses(self, filename, logger):
diff = e.output
if diff:
pytest.fail(diff.decode("utf-8"))
self.check_for_stdout_errors(logger)

self.assertEqual(self.capsys.readouterr().err, "")
os.remove(oldfile)
os.remove(newfile)
Expand Down Expand Up @@ -84,9 +86,15 @@ def compare_expected(self, filename, logger, new=True ):
diff = e.output
if diff:
pytest.fail(diff.decode("utf-8"))

self.check_for_stdout_errors(logger)
self.assertEqual(self.capsys.readouterr().err, "")
os.remove(actual)

def check_for_stdout_errors(self, logger):
if logger.had_errors():
logger.reset_error_count()
pytest.fail("Errors/warnings were logged to stdout during testing.")

def setup_tests():
test_folder = os.path.join(os.path.dirname(__file__), "parser")
Expand Down
96 changes: 40 additions & 56 deletions python/extractor/tsg-python/python.tsg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
[ (expression_list) (tuple) (tuple_pattern) (pattern_list) ] @tuple
{ let @tuple.node = (ast-node @tuple "Tuple") }

(list_pattern) @list
{ let @list.node = (ast-node @list "List") }

(call) @call { let @call.node = (ast-node @call "Call") }

(for_statement) @for
Expand Down Expand Up @@ -1059,30 +1062,38 @@
let @genexpr.result = tuple
}

; For the final `if` clause, we need to hook it up with the `yield` expression and with its associated `for` clause.
; For the final clause, we need to hook it up with the rest of the expression.
; If it's an `if` clause, we need to hook it up with the `yield` expression and with its associated
; `for` clause.
; If it's a `for` clause, we only need to create and hook it up with the `yield` expression.
;
; It would be tempting to use anchors here, but they just don't work. In particular, an anchor of
; the form `. (comment)* . )` (which would be needed in order to handle the case where there are
; comments after the last clause) cause the `tree-sitter` query engine to match _all_ clauses, not
; just the last one.
; Instead, we gather up all clauses in a list (these will be in the order they appear in the source
; code), and extract the last element using a custom Rust function.
[
(generator_expression
body: (_) @body
(if_clause) @last
.
[(if_clause) (for_in_clause)]+ @last_candidates
) @genexpr
(list_comprehension
body: (_) @body
(if_clause) @last
.
[(if_clause) (for_in_clause)]+ @last_candidates
) @genexpr
(set_comprehension
body: (_) @body
(if_clause) @last
.
[(if_clause) (for_in_clause)]+ @last_candidates
) @genexpr
(dictionary_comprehension
body: (_) @body
(if_clause) @last
.
[(if_clause) (for_in_clause)]+ @last_candidates
) @genexpr
]
{
let last = (get-last-element @last_candidates)

let expr = (ast-node @body "Expr")
let yield = (ast-node @body "Yield")

Expand All @@ -1093,50 +1104,19 @@

attr (yield) value = @genexpr.result
attr (@body.node) ctx = "load"
edge @last.first_if -> expr
attr (@last.first_if -> expr) body = 0

; Hook up this `if` clause with its `for` clause
edge @last.for -> @last.node
attr (@last.for -> @last.node) body = 0
}
if (instance-of last "if_clause") {
edge last.first_if -> expr
attr (last.first_if -> expr) body = 0

; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
[
(generator_expression
body: (_) @body
(for_in_clause) @last
.
) @genexpr
(list_comprehension
body: (_) @body
(for_in_clause) @last
.
) @genexpr
(set_comprehension
body: (_) @body
(for_in_clause) @last
.
) @genexpr
(dictionary_comprehension
body: (_) @body
(for_in_clause) @last
.
) @genexpr
]
{
let expr = (ast-node @body "Expr")
let yield = (ast-node @body "Yield")

let @genexpr.expr = expr
let @genexpr.yield = yield

attr (expr) value = yield

attr (yield) value = @genexpr.result
attr (@body.node) ctx = "load"
edge @last.node -> expr
attr (@last.node -> expr) body = 0
; Hook up this `if` clause with its `for` clause
edge last.for -> last.node
attr (last.for -> last.node) body = 0
} else {
; If the last clause is a `for`, we only have to create and hook up the `yield` expression.
edge last.node -> expr
attr (last.node -> expr) body = 0
}
}

; For whatever reason, we do not consider parentheses around the yielded expression if they are present, so
Expand Down Expand Up @@ -3169,11 +3149,11 @@
(typed_parameter
(identifier) @name
.
type: (type (expression) @type)
type: (type (_) @type)
)
(typed_default_parameter
name: (_) @name
type: (type (expression) @type)
type: (type (_) @type)
value: (_) @value
)
] @param
Expand Down Expand Up @@ -3228,7 +3208,7 @@
(list_splat_pattern vararg: (_) @name) @starred
(typed_parameter
(list_splat_pattern vararg: (_) @name) @starred
type: (type (expression) @type)
type: (type (_) @type)
)
]
) @params
Expand All @@ -3245,7 +3225,7 @@

; Return type
(function_definition
return_type: (type (expression) @type)
return_type: (type (_) @type)
) @funcdef
{
attr (@funcdef.funcexpr) returns = @type.node
Expand All @@ -3259,7 +3239,7 @@
(dictionary_splat_pattern kwarg: (identifier) @name)
(typed_parameter
(dictionary_splat_pattern kwarg: (identifier) @name)
type: (type (expression) @type)
type: (type (_) @type)
)
]
) @params
Expand Down Expand Up @@ -3436,6 +3416,9 @@
; Left hand side of an assignment such as `foo, bar = ...`
(pattern_list element: (_) @elt) @parent

; Left hand side of an assignment such as `[foo, bar] = ...`
(list_pattern element: (_) @elt) @parent

; An unadorned tuple (such as in `x = y, z`)
(expression_list element: (_) @elt) @parent

Expand Down Expand Up @@ -3472,6 +3455,7 @@
(tuple element: (_) @elt)
(tuple_pattern element: (_) @elt)
(pattern_list element: (_) @elt)
(list_pattern element: (_) @elt)
(expression_list element: (_) @elt)
(parenthesized_expression inner: (_) @elt)
(set element: (_) @elt)
Expand Down
22 changes: 22 additions & 0 deletions python/extractor/tsg-python/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,22 @@ pub mod extra_functions {
Ok(Value::Integer(left % right))
}
}

pub struct GetLastElement;

impl Function for GetLastElement {
fn call(
&self,
_graph: &mut Graph,
_source: &str,
parameters: &mut dyn Parameters,
) -> Result<Value, ExecutionError> {
let list = parameters.param()?.into_list()?;
parameters.finish()?;
let last = list.last().unwrap_or(&Value::Null).clone();
Ok(last)
}
}
}

fn main() -> Result<()> {
Expand Down Expand Up @@ -562,6 +578,12 @@ fn main() -> Result<()> {
);

functions.add(Identifier::from("mod"), extra_functions::Modulo);

functions.add(
Identifier::from("get-last-element"),
extra_functions::GetLastElement,
);

let globals = Variables::new();
let mut config = ExecutionConfig::new(&mut functions, &globals).lazy(false);
let graph = file
Expand Down
Loading
Loading