Skip to content

Commit aaa56eb

Browse files
authored
Fix NFKC normalization bug when removing unused imports (#12571)
1 parent f3c14a4 commit aaa56eb

File tree

6 files changed

+72
-27
lines changed

6 files changed

+72
-27
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ruff_linter/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ toml = { workspace = true }
6969
typed-arena = { workspace = true }
7070
unicode-width = { workspace = true }
7171
unicode_names2 = { workspace = true }
72+
unicode-normalization = { workspace = true }
7273
url = { workspace = true }
7374

7475
[dev-dependencies]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
"""
2+
Test: ensure we're able to correctly remove unused imports
3+
even if they have characters in them that undergo NFKC normalization
4+
"""
5+
6+
from .main import MaµToMan

crates/ruff_linter/src/fix/codemods.rs

Lines changed: 46 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
//! Interface for editing code snippets. These functions take statements or expressions as input,
22
//! and return the modified code snippet as output.
3+
use std::borrow::Cow;
4+
35
use anyhow::{bail, Result};
46
use libcst_native::{
57
Codegen, CodegenState, Expression, ImportNames, NameOrAttribute, ParenthesizableWhitespace,
68
SmallStatement, Statement,
79
};
8-
use ruff_python_ast::name::UnqualifiedName;
910
use smallvec::{smallvec, SmallVec};
11+
use unicode_normalization::UnicodeNormalization;
1012

13+
use ruff_python_ast::name::UnqualifiedName;
1114
use ruff_python_ast::Stmt;
1215
use ruff_python_codegen::Stylist;
1316
use ruff_source_file::Locator;
@@ -167,39 +170,55 @@ pub(crate) fn retain_imports(
167170
Ok(tree.codegen_stylist(stylist))
168171
}
169172

170-
fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) {
171-
match expr {
172-
Expression::Call(expr) => {
173-
collect_segments(&expr.func, parts);
174-
}
175-
Expression::Attribute(expr) => {
176-
collect_segments(&expr.value, parts);
177-
parts.push(expr.attr.value);
178-
}
179-
Expression::Name(expr) => {
180-
parts.push(expr.value);
173+
/// Create an NFKC-normalized qualified name from a libCST node.
174+
fn qualified_name_from_name_or_attribute(module: &NameOrAttribute) -> String {
175+
fn collect_segments<'a>(expr: &'a Expression, parts: &mut SmallVec<[&'a str; 8]>) {
176+
match expr {
177+
Expression::Call(expr) => {
178+
collect_segments(&expr.func, parts);
179+
}
180+
Expression::Attribute(expr) => {
181+
collect_segments(&expr.value, parts);
182+
parts.push(expr.attr.value);
183+
}
184+
Expression::Name(expr) => {
185+
parts.push(expr.value);
186+
}
187+
_ => {}
181188
}
182-
_ => {}
183189
}
184-
}
185190

186-
fn unqualified_name_from_expression<'a>(expr: &'a Expression<'a>) -> Option<UnqualifiedName<'a>> {
187-
let mut segments = smallvec![];
188-
collect_segments(expr, &mut segments);
189-
if segments.is_empty() {
190-
None
191-
} else {
192-
Some(segments.into_iter().collect())
191+
/// Attempt to create an [`UnqualifiedName`] from a libCST expression.
192+
///
193+
/// Strictly speaking, the `UnqualifiedName` returned by this function may be invalid,
194+
/// since it hasn't been NFKC-normalized. In order for an `UnqualifiedName` to be
195+
/// comparable to one constructed from a `ruff_python_ast` node, it has to undergo
196+
/// NFKC normalization. As a local function, however, this is fine;
197+
/// the outer function always performs NFKC normalization before returning the
198+
/// qualified name to the caller.
199+
fn unqualified_name_from_expression<'a>(
200+
expr: &'a Expression<'a>,
201+
) -> Option<UnqualifiedName<'a>> {
202+
let mut segments = smallvec![];
203+
collect_segments(expr, &mut segments);
204+
if segments.is_empty() {
205+
None
206+
} else {
207+
Some(segments.into_iter().collect())
208+
}
193209
}
194-
}
195210

196-
fn qualified_name_from_name_or_attribute(module: &NameOrAttribute) -> String {
197-
match module {
198-
NameOrAttribute::N(name) => name.value.to_string(),
211+
let unnormalized = match module {
212+
NameOrAttribute::N(name) => Cow::Borrowed(name.value),
199213
NameOrAttribute::A(attr) => {
200214
let name = attr.attr.value;
201215
let prefix = unqualified_name_from_expression(&attr.value);
202-
prefix.map_or_else(|| name.to_string(), |prefix| format!("{prefix}.{name}"))
216+
prefix.map_or_else(
217+
|| Cow::Borrowed(name),
218+
|prefix| Cow::Owned(format!("{prefix}.{name}")),
219+
)
203220
}
204-
}
221+
};
222+
223+
unnormalized.nfkc().collect()
205224
}

crates/ruff_linter/src/rules/pyflakes/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ mod tests {
258258
#[test_case(Rule::UnusedImport, Path::new("F401_27__all_mistyped/__init__.py"))]
259259
#[test_case(Rule::UnusedImport, Path::new("F401_28__all_multiple/__init__.py"))]
260260
#[test_case(Rule::UnusedImport, Path::new("F401_29__all_conditional/__init__.py"))]
261+
#[test_case(Rule::UnusedImport, Path::new("F401_30.py"))]
261262
fn f401_deprecated_option(rule_code: Rule, path: &Path) -> Result<()> {
262263
let snapshot = format!(
263264
"{}_deprecated_option_{}",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
---
2+
source: crates/ruff_linter/src/rules/pyflakes/mod.rs
3+
---
4+
F401_30.py:6:19: F401 [*] `.main.MaμToMan` imported but unused
5+
|
6+
4 | """
7+
5 |
8+
6 | from .main import MaµToMan
9+
| ^^^^^^^^ F401
10+
|
11+
= help: Remove unused import: `.main.MaμToMan`
12+
13+
Safe fix
14+
3 3 | even if they have characters in them that undergo NFKC normalization
15+
4 4 | """
16+
5 5 |
17+
6 |-from .main import MaµToMan

0 commit comments

Comments
 (0)