Skip to content

Commit 9e7eab2

Browse files
committed
Use Jupyter mode for the parser with Notebook files
1 parent d810bc9 commit 9e7eab2

File tree

11 files changed

+76
-20
lines changed

11 files changed

+76
-20
lines changed

crates/ruff/src/importer/insertion.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ mod tests {
302302
use ruff_text_size::TextSize;
303303
use rustpython_parser::ast::Suite;
304304
use rustpython_parser::lexer::LexResult;
305-
use rustpython_parser::Parse;
305+
use rustpython_parser::{Mode, Parse};
306306

307307
use ruff_python_ast::source_code::{Locator, Stylist};
308308
use ruff_python_whitespace::LineEnding;
@@ -313,7 +313,7 @@ mod tests {
313313
fn start_of_file() -> Result<()> {
314314
fn insert(contents: &str) -> Result<Insertion> {
315315
let program = Suite::parse(contents, "<filename>")?;
316-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
316+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
317317
let locator = Locator::new(contents);
318318
let stylist = Stylist::from_tokens(&tokens, &locator);
319319
Ok(Insertion::start_of_file(&program, &locator, &stylist))
@@ -424,7 +424,7 @@ x = 1
424424
#[test]
425425
fn start_of_block() {
426426
fn insert(contents: &str, offset: TextSize) -> Insertion {
427-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
427+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
428428
let locator = Locator::new(contents);
429429
let stylist = Stylist::from_tokens(&tokens, &locator);
430430
Insertion::start_of_block(offset, &locator, &stylist)

crates/ruff/src/jupyter/notebook.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::path::Path;
66

77
use itertools::Itertools;
88
use once_cell::sync::OnceCell;
9+
use rustpython_parser::Mode;
910
use serde::Serialize;
1011
use serde_json::error::Category;
1112

@@ -151,7 +152,7 @@ impl Notebook {
151152
)
152153
})?;
153154
// Check if tokenizing was successful and the file is non-empty
154-
if (ruff_rustpython::tokenize(&contents))
155+
if (ruff_rustpython::tokenize(&contents, Mode::Module))
155156
.last()
156157
.map_or(true, Result::is_err)
157158
{

crates/ruff/src/linter.rs

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use itertools::Itertools;
88
use log::error;
99
use rustc_hash::FxHashMap;
1010
use rustpython_parser::lexer::LexResult;
11-
use rustpython_parser::ParseError;
11+
use rustpython_parser::{Mode, ParseError};
1212

1313
use ruff_diagnostics::Diagnostic;
1414
use ruff_python_ast::imports::ImportMap;
@@ -134,7 +134,12 @@ pub fn check_path(
134134
.iter_enabled()
135135
.any(|rule_code| rule_code.lint_source().is_imports());
136136
if use_ast || use_imports || use_doc_lines {
137-
match ruff_rustpython::parse_program_tokens(tokens, &path.to_string_lossy()) {
137+
let mode = if source_kind.map_or(false, |kind| kind.is_jupyter()) {
138+
Mode::Jupyter
139+
} else {
140+
Mode::Module
141+
};
142+
match ruff_rustpython::parse_program_tokens(tokens, mode, &path.to_string_lossy()) {
138143
Ok(python_ast) => {
139144
if use_ast {
140145
diagnostics.extend(check_ast(
@@ -255,7 +260,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings
255260
let contents = std::fs::read_to_string(path)?;
256261

257262
// Tokenize once.
258-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
263+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);
259264

260265
// Map row and column locations to byte slices (lazily).
261266
let locator = Locator::new(&contents);
@@ -320,9 +325,16 @@ pub fn lint_only(
320325
package: Option<&Path>,
321326
settings: &Settings,
322327
noqa: flags::Noqa,
328+
source_kind: Option<&SourceKind>,
323329
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
330+
let mode = if source_kind.map_or(false, |source_kind| source_kind.is_jupyter()) {
331+
Mode::Jupyter
332+
} else {
333+
Mode::Module
334+
};
335+
324336
// Tokenize once.
325-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
337+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, mode);
326338

327339
// Map row and column locations to byte slices (lazily).
328340
let locator = Locator::new(contents);
@@ -352,7 +364,7 @@ pub fn lint_only(
352364
&directives,
353365
settings,
354366
noqa,
355-
None,
367+
source_kind,
356368
);
357369

358370
result.map(|(diagnostics, imports)| {
@@ -411,10 +423,16 @@ pub fn lint_fix<'a>(
411423
// Track whether the _initial_ source code was parseable.
412424
let mut parseable = false;
413425

426+
let mode = if source_kind.is_jupyter() {
427+
Mode::Jupyter
428+
} else {
429+
Mode::Module
430+
};
431+
414432
// Continuously autofix until the source code stabilizes.
415433
loop {
416434
// Tokenize once.
417-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed);
435+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed, mode);
418436

419437
// Map row and column locations to byte slices (lazily).
420438
let locator = Locator::new(&transformed);

crates/ruff/src/rules/pyflakes/mod.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ mod tests {
1212
use anyhow::Result;
1313
use regex::Regex;
1414
use rustpython_parser::lexer::LexResult;
15+
use rustpython_parser::Mode;
1516
use test_case::test_case;
1617

1718
use ruff_diagnostics::Diagnostic;
@@ -499,7 +500,7 @@ mod tests {
499500
fn flakes(contents: &str, expected: &[Rule]) {
500501
let contents = dedent(contents);
501502
let settings = Settings::for_rules(Linter::Pyflakes.rules());
502-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
503+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);
503504
let locator = Locator::new(&contents);
504505
let stylist = Stylist::from_tokens(&tokens, &locator);
505506
let indexer = Indexer::from_tokens(&tokens, &locator);

crates/ruff/src/test.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use itertools::Itertools;
99
use ruff_textwrap::dedent;
1010
use rustc_hash::FxHashMap;
1111
use rustpython_parser::lexer::LexResult;
12+
use rustpython_parser::Mode;
1213

1314
use ruff_diagnostics::{AutofixKind, Diagnostic};
1415
use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist};
@@ -97,8 +98,13 @@ pub(crate) fn max_iterations() -> usize {
9798
/// A convenient wrapper around [`check_path`], that additionally
9899
/// asserts that autofixes converge after a fixed number of iterations.
99100
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
101+
let mode = if source_kind.is_jupyter() {
102+
Mode::Jupyter
103+
} else {
104+
Mode::Module
105+
};
100106
let contents = source_kind.content().to_string();
101-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
107+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, mode);
102108
let locator = Locator::new(&contents);
103109
let stylist = Stylist::from_tokens(&tokens, &locator);
104110
let indexer = Indexer::from_tokens(&tokens, &locator);
@@ -160,7 +166,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings)
160166
notebook.update(&source_map, &fixed_contents);
161167
};
162168

163-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents);
169+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents, mode);
164170
let locator = Locator::new(&fixed_contents);
165171
let stylist = Stylist::from_tokens(&tokens, &locator);
166172
let indexer = Indexer::from_tokens(&tokens, &locator);

crates/ruff_benchmark/benches/linter.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ fn benchmark_linter(mut group: BenchmarkGroup<WallTime>, settings: &Settings) {
6363
None,
6464
settings,
6565
flags::Noqa::Enabled,
66+
None,
6667
);
6768

6869
// Assert that file contains no parse errors

crates/ruff_cli/src/diagnostics.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,12 +195,26 @@ pub(crate) fn lint_path(
195195
(result, fixed)
196196
} else {
197197
// If we fail to autofix, lint the original source code.
198-
let result = lint_only(&contents, path, package, &settings.lib, noqa);
198+
let result = lint_only(
199+
&contents,
200+
path,
201+
package,
202+
&settings.lib,
203+
noqa,
204+
Some(&source_kind),
205+
);
199206
let fixed = FxHashMap::default();
200207
(result, fixed)
201208
}
202209
} else {
203-
let result = lint_only(&contents, path, package, &settings.lib, noqa);
210+
let result = lint_only(
211+
&contents,
212+
path,
213+
package,
214+
&settings.lib,
215+
noqa,
216+
Some(&source_kind),
217+
);
204218
let fixed = FxHashMap::default();
205219
(result, fixed)
206220
};
@@ -307,6 +321,7 @@ pub(crate) fn lint_stdin(
307321
package,
308322
settings,
309323
noqa,
324+
Some(&source_kind),
310325
);
311326
let fixed = FxHashMap::default();
312327

@@ -324,6 +339,7 @@ pub(crate) fn lint_stdin(
324339
package,
325340
settings,
326341
noqa,
342+
Some(&source_kind),
327343
);
328344
let fixed = FxHashMap::default();
329345
(result, fixed)

crates/ruff_dev/src/print_tokens.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,19 @@ pub(crate) struct Args {
1212
/// Python file for which to generate the AST.
1313
#[arg(required = true)]
1414
file: PathBuf,
15+
/// Run in Jupyter mode i.e., allow line magics (%), shell commands (!), and help (?).
16+
#[arg(long)]
17+
jupyter: bool,
1518
}
1619

1720
pub(crate) fn main(args: &Args) -> Result<()> {
1821
let contents = fs::read_to_string(&args.file)?;
19-
for (tok, range) in lexer::lex(&contents, Mode::Module).flatten() {
22+
let mode = if args.jupyter {
23+
Mode::Jupyter
24+
} else {
25+
Mode::Module
26+
};
27+
for (tok, range) in lexer::lex(&contents, mode).flatten() {
2028
println!(
2129
"{start:#?} {tok:#?} {end:#?}",
2230
start = range.start(),

crates/ruff_python_ast/src/token_kind.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ pub enum TokenKind {
1919
/// Token value for a newline that is not a logical line break. These are filtered out of
2020
/// the token stream prior to parsing.
2121
NonLogicalNewline,
22+
/// Token value for a Jupyter magic command.
23+
MagicCommand,
2224
/// Token value for an indent.
2325
Indent,
2426
/// Token value for a dedent.
@@ -341,6 +343,7 @@ impl TokenKind {
341343
Tok::Comment(_) => TokenKind::Comment,
342344
Tok::Newline => TokenKind::Newline,
343345
Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
346+
Tok::MagicCommand(_) => TokenKind::MagicCommand,
344347
Tok::Indent => TokenKind::Indent,
345348
Tok::Dedent => TokenKind::Dedent,
346349
Tok::EndOfFile => TokenKind::EndOfFile,

crates/ruff_rustpython/src/lib.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ use rustpython_parser::lexer::LexResult;
44
use rustpython_parser::{lexer, Mode, ParseError};
55

66
/// Collect tokens up to and including the first error.
7-
pub fn tokenize(contents: &str) -> Vec<LexResult> {
7+
pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
88
let mut tokens: Vec<LexResult> = vec![];
9-
for tok in lexer::lex(contents, Mode::Module) {
9+
for tok in lexer::lex(contents, mode) {
1010
let is_err = tok.is_err();
1111
tokens.push(tok);
1212
if is_err {
@@ -19,9 +19,10 @@ pub fn tokenize(contents: &str) -> Vec<LexResult> {
1919
/// Parse a full Python program from its tokens.
2020
pub fn parse_program_tokens(
2121
lxr: Vec<LexResult>,
22+
mode: Mode,
2223
source_path: &str,
2324
) -> anyhow::Result<Suite, ParseError> {
24-
parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top {
25+
parser::parse_tokens(lxr, mode, source_path).map(|top| match top {
2526
Mod::Module(ModModule { body, .. }) => body,
2627
_ => unreachable!(),
2728
})

crates/ruff_wasm/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::path::Path;
22

33
use rustpython_parser::lexer::LexResult;
4+
use rustpython_parser::Mode;
45
use serde::{Deserialize, Serialize};
56
use wasm_bindgen::prelude::*;
67

@@ -180,7 +181,7 @@ pub fn check(contents: &str, options: JsValue) -> Result<JsValue, JsValue> {
180181
Settings::from_configuration(configuration, Path::new(".")).map_err(|e| e.to_string())?;
181182

182183
// Tokenize once.
183-
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
184+
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
184185

185186
// Map row and column locations to byte slices (lazily).
186187
let locator = Locator::new(contents);

0 commit comments

Comments
 (0)