Skip to content

Commit 29c62a9

Browse files
committed
Use Jupyter mode for the parser with Notebook files
1 parent 025fa4e commit 29c62a9

File tree

16 files changed

+206
-42
lines changed

16 files changed

+206
-42
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"execution_count": null,
3+
"cell_type": "code",
4+
"id": "1",
5+
"metadata": {},
6+
"outputs": [],
7+
"source": ["%%timeit\n", "print('hello world')"]
8+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "eab4754a-d6df-4b41-8ee8-7e23aef440f9",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import math\n",
11+
"\n",
12+
"%matplotlib inline\n",
13+
"\n",
14+
"import os\n",
15+
"\n",
16+
"_ = math.pi"
17+
]
18+
}
19+
],
20+
"metadata": {
21+
"kernelspec": {
22+
"display_name": "Python (ruff)",
23+
"language": "python",
24+
"name": "ruff"
25+
},
26+
"language_info": {
27+
"codemirror_mode": {
28+
"name": "ipython",
29+
"version": 3
30+
},
31+
"file_extension": ".py",
32+
"mimetype": "text/x-python",
33+
"name": "python",
34+
"nbconvert_exporter": "python",
35+
"pygments_lexer": "ipython3",
36+
"version": "3.11.3"
37+
}
38+
},
39+
"nbformat": 4,
40+
"nbformat_minor": 5
41+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "cad32845-44f9-4a53-8b8c-a6b1bb3f3378",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import math\n",
11+
"\n",
12+
"%matplotlib inline\n",
13+
"\n",
14+
"\n",
15+
"_ = math.pi"
16+
]
17+
}
18+
],
19+
"metadata": {
20+
"kernelspec": {
21+
"display_name": "Python (ruff)",
22+
"language": "python",
23+
"name": "ruff"
24+
},
25+
"language_info": {
26+
"codemirror_mode": {
27+
"name": "ipython",
28+
"version": 3
29+
},
30+
"file_extension": ".py",
31+
"mimetype": "text/x-python",
32+
"name": "python",
33+
"nbconvert_exporter": "python",
34+
"pygments_lexer": "ipython3",
35+
"version": "3.11.3"
36+
}
37+
},
38+
"nbformat": 4,
39+
"nbformat_minor": 5
40+
}

crates/ruff/src/importer/insertion.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ mod tests {
302302
use ruff_text_size::TextSize;
303303
use rustpython_parser::ast::Suite;
304304
use rustpython_parser::lexer::LexResult;
305-
use rustpython_parser::Parse;
305+
use rustpython_parser::{Mode, Parse};
306306

307307
use ruff_python_ast::source_code::{Locator, Stylist};
308308
use ruff_python_trivia::LineEnding;
@@ -313,7 +313,7 @@ mod tests {
313313
fn start_of_file() -> Result<()> {
314314
fn insert(contents: &str) -> Result<Insertion> {
315315
let program = Suite::parse(contents, "<filename>")?;
316-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
316+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
317317
let locator = Locator::new(contents);
318318
let stylist = Stylist::from_tokens(&tokens, &locator);
319319
Ok(Insertion::start_of_file(&program, &locator, &stylist))
@@ -424,7 +424,7 @@ x = 1
424424
#[test]
425425
fn start_of_block() {
426426
fn insert(contents: &str, offset: TextSize) -> Insertion {
427-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
427+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
428428
let locator = Locator::new(contents);
429429
let stylist = Stylist::from_tokens(&tokens, &locator);
430430
Insertion::start_of_block(offset, &locator, &stylist)

crates/ruff/src/jupyter/notebook.rs

Lines changed: 26 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::path::Path;
66

77
use itertools::Itertools;
88
use once_cell::sync::OnceCell;
9+
use rustpython_parser::Mode;
910
use serde::Serialize;
1011
use serde_json::error::Category;
1112

@@ -21,8 +22,6 @@ use crate::IOError;
2122

2223
pub const JUPYTER_NOTEBOOK_EXT: &str = "ipynb";
2324

24-
const MAGIC_PREFIX: [&str; 3] = ["%", "!", "?"];
25-
2625
/// Run round-trip source code generation on a given Jupyter notebook file path.
2726
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
2827
let mut notebook = Notebook::read(path).map_err(|err| {
@@ -61,26 +60,21 @@ impl Cell {
6160
/// Return `true` if it's a valid code cell.
6261
///
6362
/// A valid code cell is a cell where the cell type is [`Cell::Code`] and the
64-
/// source doesn't contain a magic, shell or help command.
63+
/// source doesn't contain a cell magic.
6564
fn is_valid_code_cell(&self) -> bool {
6665
let source = match self {
6766
Cell::Code(cell) => &cell.source,
6867
_ => return false,
6968
};
70-
// Ignore a cell if it contains a magic command. There could be valid
71-
// Python code as well, but we'll ignore that for now.
72-
// TODO(dhruvmanila): https://github.com/psf/black/blob/main/src/black/handle_ipynb_magics.py
69+
// Ignore cells containing cell magic. This is different from line magic
70+
// which is allowed and ignored by the parser.
7371
!match source {
74-
SourceValue::String(string) => string.lines().any(|line| {
75-
MAGIC_PREFIX
76-
.iter()
77-
.any(|prefix| line.trim_start().starts_with(prefix))
78-
}),
79-
SourceValue::StringArray(string_array) => string_array.iter().any(|line| {
80-
MAGIC_PREFIX
81-
.iter()
82-
.any(|prefix| line.trim_start().starts_with(prefix))
83-
}),
72+
SourceValue::String(string) => string
73+
.lines()
74+
.any(|line| line.trim_start().starts_with("%%")),
75+
SourceValue::StringArray(string_array) => string_array
76+
.iter()
77+
.any(|line| line.trim_start().starts_with("%%")),
8478
}
8579
}
8680
}
@@ -158,7 +152,7 @@ impl Notebook {
158152
)
159153
})?;
160154
// Check if tokenizing was successful and the file is non-empty
161-
if (ruff_rustpython::tokenize(&contents))
155+
if (ruff_rustpython::tokenize(&contents, Mode::Module))
162156
.last()
163157
.map_or(true, Result::is_err)
164158
{
@@ -494,9 +488,10 @@ mod tests {
494488
}
495489

496490
#[test_case(Path::new("markdown.json"), false; "markdown")]
497-
#[test_case(Path::new("only_magic.json"), false; "only_magic")]
498-
#[test_case(Path::new("code_and_magic.json"), false; "code_and_magic")]
491+
#[test_case(Path::new("only_magic.json"), true; "only_magic")]
492+
#[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")]
499493
#[test_case(Path::new("only_code.json"), true; "only_code")]
494+
#[test_case(Path::new("cell_magic.json"), false; "cell_magic")]
500495
fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> {
501496
assert_eq!(read_jupyter_cell(path)?.is_valid_code_cell(), expected);
502497
Ok(())
@@ -557,6 +552,18 @@ print("after empty cells")
557552
Ok(())
558553
}
559554

555+
#[test]
556+
fn test_line_magics() -> Result<()> {
557+
let path = "line_magics.ipynb".to_string();
558+
let (diagnostics, source_kind) = test_notebook_path(
559+
&path,
560+
Path::new("line_magics_expected.ipynb"),
561+
&settings::Settings::for_rule(Rule::UnusedImport),
562+
)?;
563+
assert_messages!(diagnostics, path, source_kind);
564+
Ok(())
565+
}
566+
560567
#[test]
561568
fn test_json_consistency() -> Result<()> {
562569
let path = "before_fix.ipynb".to_string();
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
source: crates/ruff/src/jupyter/notebook.rs
3+
---
4+
line_magics.ipynb:cell 1:5:8: F401 [*] `os` imported but unused
5+
|
6+
3 | %matplotlib inline
7+
4 |
8+
5 | import os
9+
| ^^ F401
10+
|
11+
= help: Remove unused import: `os`
12+
13+
Fix
14+
2 2 |
15+
3 3 | %matplotlib inline
16+
4 4 |
17+
5 |-import os
18+
6 5 |
19+
7 6 | _ = math.pi
20+
21+

crates/ruff/src/linter.rs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use itertools::Itertools;
88
use log::error;
99
use rustc_hash::FxHashMap;
1010
use rustpython_parser::lexer::LexResult;
11-
use rustpython_parser::ParseError;
11+
use rustpython_parser::{Mode, ParseError};
1212

1313
use ruff_diagnostics::Diagnostic;
1414
use ruff_python_ast::imports::ImportMap;
@@ -136,7 +136,11 @@ pub fn check_path(
136136
.iter_enabled()
137137
.any(|rule_code| rule_code.lint_source().is_imports());
138138
if use_ast || use_imports || use_doc_lines {
139-
match ruff_rustpython::parse_program_tokens(tokens, &path.to_string_lossy()) {
139+
match ruff_rustpython::parse_program_tokens(
140+
tokens,
141+
&path.to_string_lossy(),
142+
source_kind.map_or(false, SourceKind::is_jupyter),
143+
) {
140144
Ok(python_ast) => {
141145
if use_ast {
142146
diagnostics.extend(check_ast(
@@ -258,7 +262,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings
258262
let contents = std::fs::read_to_string(path)?;
259263

260264
// Tokenize once.
261-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
265+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);
262266

263267
// Map row and column locations to byte slices (lazily).
264268
let locator = Locator::new(&contents);
@@ -325,8 +329,14 @@ pub fn lint_only(
325329
noqa: flags::Noqa,
326330
source_kind: Option<&SourceKind>,
327331
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
332+
let mode = if source_kind.map_or(false, SourceKind::is_jupyter) {
333+
Mode::Jupyter
334+
} else {
335+
Mode::Module
336+
};
337+
328338
// Tokenize once.
329-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
339+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, mode);
330340

331341
// Map row and column locations to byte slices (lazily).
332342
let locator = Locator::new(contents);
@@ -415,10 +425,16 @@ pub fn lint_fix<'a>(
415425
// Track whether the _initial_ source code was parseable.
416426
let mut parseable = false;
417427

428+
let mode = if source_kind.is_jupyter() {
429+
Mode::Jupyter
430+
} else {
431+
Mode::Module
432+
};
433+
418434
// Continuously autofix until the source code stabilizes.
419435
loop {
420436
// Tokenize once.
421-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed);
437+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed, mode);
422438

423439
// Map row and column locations to byte slices (lazily).
424440
let locator = Locator::new(&transformed);

crates/ruff/src/rules/pyflakes/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod tests {
1212
use anyhow::Result;
1313
use regex::Regex;
1414
use rustpython_parser::lexer::LexResult;
15+
use rustpython_parser::Mode;
1516
use test_case::test_case;
1617

1718
use ruff_diagnostics::Diagnostic;
@@ -502,7 +503,7 @@ mod tests {
502503
fn flakes(contents: &str, expected: &[Rule]) {
503504
let contents = dedent(contents);
504505
let settings = Settings::for_rules(Linter::Pyflakes.rules());
505-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
506+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);
506507
let locator = Locator::new(&contents);
507508
let stylist = Stylist::from_tokens(&tokens, &locator);
508509
let indexer = Indexer::from_tokens(&tokens, &locator);

crates/ruff/src/test.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use itertools::Itertools;
99
use ruff_textwrap::dedent;
1010
use rustc_hash::FxHashMap;
1111
use rustpython_parser::lexer::LexResult;
12+
use rustpython_parser::Mode;
1213

1314
use ruff_diagnostics::{AutofixKind, Diagnostic};
1415
use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist};
@@ -97,8 +98,13 @@ pub(crate) fn max_iterations() -> usize {
9798
/// A convenient wrapper around [`check_path`], that additionally
9899
/// asserts that autofixes converge after a fixed number of iterations.
99100
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
101+
let mode = if source_kind.is_jupyter() {
102+
Mode::Jupyter
103+
} else {
104+
Mode::Module
105+
};
100106
let contents = source_kind.content().to_string();
101-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
107+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, mode);
102108
let locator = Locator::new(&contents);
103109
let stylist = Stylist::from_tokens(&tokens, &locator);
104110
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -160,7 +166,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings)
160166
notebook.update(&source_map, &fixed_contents);
161167
};
162168

163-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents);
169+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents, mode);
164170
let locator = Locator::new(&fixed_contents);
165171
let stylist = Stylist::from_tokens(&tokens, &locator);
166172
let indexer = Indexer::from_tokens(&tokens, &locator);

crates/ruff_dev/src/print_ast.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,26 @@ use std::fs;
55
use std::path::PathBuf;
66

77
use anyhow::Result;
8-
use rustpython_parser::ast::Suite;
9-
use rustpython_parser::Parse;
8+
use rustpython_parser::{parse, Mode};
109

1110
#[derive(clap::Args)]
1211
pub(crate) struct Args {
1312
/// Python file for which to generate the AST.
1413
#[arg(required = true)]
1514
file: PathBuf,
15+
/// Run in Jupyter mode i.e., allow line magics.
16+
#[arg(long)]
17+
jupyter: bool,
1618
}
1719

1820
pub(crate) fn main(args: &Args) -> Result<()> {
1921
let contents = fs::read_to_string(&args.file)?;
20-
let python_ast = Suite::parse(&contents, &args.file.to_string_lossy())?;
22+
let mode = if args.jupyter {
23+
Mode::Jupyter
24+
} else {
25+
Mode::Module
26+
};
27+
let python_ast = parse(&contents, mode, &args.file.to_string_lossy())?;
2128
println!("{python_ast:#?}");
2229
Ok(())
2330
}

0 commit comments

Comments
 (0)