Skip to content

Commit a8e00a6

Browse files
committed
Implement support for Jupyter Notebooks in ruff server
1 parent a347a1b commit a8e00a6

38 files changed

+1351
-614
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/ruff_notebook/src/cell.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ impl fmt::Display for SourceValue {
2323

2424
impl Cell {
2525
/// Return the [`SourceValue`] of the cell.
26-
pub(crate) fn source(&self) -> &SourceValue {
26+
pub fn source(&self) -> &SourceValue {
2727
match self {
2828
Cell::Code(cell) => &cell.source,
2929
Cell::Markdown(cell) => &cell.source,

crates/ruff_notebook/src/notebook.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,23 @@ impl Notebook {
8585
Self::from_reader(Cursor::new(source_code))
8686
}
8787

88+
/// Generate a pseudo-representation of a notebook that can be used
89+
/// for linting by the language server. As this is not generated directly from the raw JSON
90+
/// of a notebook file, writing this back into the file system is a bad idea.
91+
pub fn from_cells(
92+
cells: Vec<Cell>,
93+
metadata: crate::RawNotebookMetadata,
94+
) -> Result<Self, NotebookError> {
95+
let raw_notebook = RawNotebook {
96+
cells,
97+
metadata,
98+
nbformat: 4,
99+
nbformat_minor: 5,
100+
};
101+
102+
Self::from_raw(raw_notebook, false)
103+
}
104+
88105
/// Read a Jupyter Notebook from a [`Read`] implementer.
89106
///
90107
/// See also the black implementation
@@ -98,7 +115,7 @@ impl Notebook {
98115
reader.read_exact(&mut buf).is_ok_and(|()| buf[0] == b'\n')
99116
});
100117
reader.rewind()?;
101-
let mut raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
118+
let raw_notebook: RawNotebook = match serde_json::from_reader(reader.by_ref()) {
102119
Ok(notebook) => notebook,
103120
Err(err) => {
104121
// Translate the error into a diagnostic
@@ -113,7 +130,13 @@ impl Notebook {
113130
});
114131
}
115132
};
133+
Self::from_raw(raw_notebook, trailing_newline)
134+
}
116135

136+
fn from_raw(
137+
mut raw_notebook: RawNotebook,
138+
trailing_newline: bool,
139+
) -> Result<Self, NotebookError> {
117140
// v4 is what everybody uses
118141
if raw_notebook.nbformat != 4 {
119142
// bail because we should have already failed at the json schema stage

crates/ruff_server/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ ruff_python_codegen = { workspace = true }
2121
ruff_python_formatter = { workspace = true }
2222
ruff_python_index = { workspace = true }
2323
ruff_python_parser = { workspace = true }
24+
ruff_notebook = { path = "../ruff_notebook" }
2425
ruff_source_file = { workspace = true }
2526
ruff_text_size = { workspace = true }
2627
ruff_workspace = { workspace = true }

crates/ruff_server/src/edit.rs

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,20 @@
11
//! Types and utilities for working with text, modifying source files, and `Ruff <-> LSP` type conversion.
22
33
mod document;
4+
mod notebook;
45
mod range;
56
mod replacement;
67

7-
use std::collections::HashMap;
8+
use std::{collections::HashMap, ffi::OsStr, path::PathBuf};
89

9-
pub use document::Document;
1010
pub(crate) use document::DocumentVersion;
11+
pub use document::TextDocument;
1112
use lsp_types::PositionEncodingKind;
13+
pub(crate) use notebook::NotebookDocument;
1214
pub(crate) use range::{RangeExt, ToRangeExt};
1315
pub(crate) use replacement::Replacement;
1416

15-
use crate::session::ResolvedClientCapabilities;
17+
use crate::{fix::Fixes, session::ResolvedClientCapabilities};
1618

1719
/// A convenient enumeration for supported text encodings. Can be converted to [`lsp_types::PositionEncodingKind`].
1820
// Please maintain the order from least to greatest priority for the derived `Ord` impl.
@@ -29,6 +31,57 @@ pub enum PositionEncoding {
2931
UTF8,
3032
}
3133

34+
/// A unique document ID, derived from a URL passed as part of an LSP request.
35+
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
36+
#[derive(Clone, Debug)]
37+
pub(crate) enum DocumentKey {
38+
Notebook(PathBuf),
39+
NotebookCell(lsp_types::Url),
40+
Text(PathBuf),
41+
}
42+
43+
impl DocumentKey {
44+
/// Creates a document key from a URL provided in an LSP request.
45+
pub(crate) fn from_url(url: &lsp_types::Url) -> Self {
46+
if url.scheme() != "file" {
47+
return Self::NotebookCell(url.clone());
48+
}
49+
let Some(path) = url.to_file_path().ok() else {
50+
return Self::NotebookCell(url.clone());
51+
};
52+
53+
// figure out whether this is a notebook or a text document
54+
if path.extension() == Some(OsStr::new("ipynb")) {
55+
Self::Notebook(path)
56+
} else {
57+
// Until we support additional document types, we need to confirm
58+
// that any non-notebook file is a Python file
59+
debug_assert_eq!(path.extension(), Some(OsStr::new("py")));
60+
Self::Text(path)
61+
}
62+
}
63+
64+
/// Converts the key back into its original URL.
65+
pub(crate) fn into_url(self) -> lsp_types::Url {
66+
match self {
67+
DocumentKey::NotebookCell(url) => url,
68+
DocumentKey::Notebook(path) | DocumentKey::Text(path) => {
69+
lsp_types::Url::from_file_path(path)
70+
.expect("file path originally from URL should convert back to URL")
71+
}
72+
}
73+
}
74+
}
75+
76+
impl std::fmt::Display for DocumentKey {
77+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
78+
match self {
79+
Self::NotebookCell(url) => url.fmt(f),
80+
Self::Notebook(path) | Self::Text(path) => path.display().fmt(f),
81+
}
82+
}
83+
}
84+
3285
/// Tracks multi-document edits to eventually merge into a `WorkspaceEdit`.
3386
/// Compatible with clients that don't support `workspace.workspaceEdit.documentChanges`.
3487
#[derive(Debug)]
@@ -72,6 +125,18 @@ impl WorkspaceEditTracker {
72125
}
73126
}
74127

128+
/// Sets a series of [`Fixes`] for a text or notebook document.
129+
pub(crate) fn set_fixes_for_document(
130+
&mut self,
131+
fixes: Fixes,
132+
version: DocumentVersion,
133+
) -> crate::Result<()> {
134+
for (uri, edits) in fixes {
135+
self.set_edits_for_document(uri, version, edits)?;
136+
}
137+
Ok(())
138+
}
139+
75140
/// Sets the edits made to a specific document. This should only be called
76141
/// once for each document `uri`, and will fail if this is called for the same `uri`
77142
/// multiple times.

crates/ruff_server/src/edit/document.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ use super::RangeExt;
77

88
pub(crate) type DocumentVersion = i32;
99

10-
/// The state for an individual document in the server. Stays up-to-date
10+
/// The state of an individual document in the server. Stays up-to-date
1111
/// with changes made by the user, including unsaved changes.
1212
#[derive(Debug, Clone)]
13-
pub struct Document {
13+
pub struct TextDocument {
1414
/// The string contents of the document.
1515
contents: String,
1616
/// A computed line index for the document. This should always reflect
@@ -22,7 +22,7 @@ pub struct Document {
2222
version: DocumentVersion,
2323
}
2424

25-
impl Document {
25+
impl TextDocument {
2626
pub fn new(contents: String, version: DocumentVersion) -> Self {
2727
let index = LineIndex::from_source_text(&contents);
2828
Self {
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
use std::{collections::HashMap, hash::BuildHasherDefault};
2+
3+
use anyhow::Ok;
4+
use lsp_types::{NotebookCellKind, Url};
5+
use rustc_hash::FxHashMap;
6+
7+
use crate::{PositionEncoding, TextDocument};
8+
9+
use super::DocumentVersion;
10+
11+
/// The state of a notebook document in the server. Contains an array of cells whose
12+
/// contents are internally represented by [`TextDocument`]s.
13+
#[derive(Clone, Debug)]
14+
pub(crate) struct NotebookDocument {
15+
cells: Vec<NotebookCell>,
16+
metadata: ruff_notebook::RawNotebookMetadata,
17+
version: DocumentVersion,
18+
// Used to quickly find the index of a cell for a given URL.
19+
cell_index: FxHashMap<lsp_types::Url, usize>,
20+
}
21+
22+
#[derive(Clone, Debug)]
23+
struct NotebookCell {
24+
url: Url,
25+
kind: NotebookCellKind,
26+
document: TextDocument,
27+
}
28+
29+
impl NotebookDocument {
30+
pub(crate) fn new(
31+
version: DocumentVersion,
32+
cells: Vec<lsp_types::NotebookCell>,
33+
metadata: serde_json::Map<String, serde_json::Value>,
34+
cell_documents: Vec<lsp_types::TextDocumentItem>,
35+
) -> crate::Result<Self> {
36+
let mut cell_contents: FxHashMap<_, _> = cell_documents
37+
.into_iter()
38+
.map(|document| (document.uri, document.text))
39+
.collect();
40+
41+
let cells: Vec<_> = cells
42+
.into_iter()
43+
.map(|cell| {
44+
let contents = cell_contents.remove(&cell.document).unwrap_or_default();
45+
NotebookCell::new(cell, contents, version)
46+
})
47+
.collect();
48+
49+
Ok(Self {
50+
version,
51+
cell_index: Self::make_cell_index(cells.as_slice()),
52+
metadata: serde_json::from_value(serde_json::Value::Object(metadata))?,
53+
cells,
54+
})
55+
}
56+
57+
/// Generates a pseudo-representation of a notebook that lacks per-cell metadata and contextual information
58+
/// but should still work with Ruff's linter.
59+
pub(crate) fn make_ruff_notebook(&self) -> ruff_notebook::Notebook {
60+
let cells = self
61+
.cells
62+
.iter()
63+
.map(|cell| match cell.kind {
64+
NotebookCellKind::Code => ruff_notebook::Cell::Code(ruff_notebook::CodeCell {
65+
execution_count: None,
66+
id: None,
67+
metadata: serde_json::Value::Null,
68+
outputs: vec![],
69+
source: ruff_notebook::SourceValue::String(
70+
cell.document.contents().to_string(),
71+
),
72+
}),
73+
NotebookCellKind::Markup => {
74+
ruff_notebook::Cell::Markdown(ruff_notebook::MarkdownCell {
75+
attachments: None,
76+
id: None,
77+
metadata: serde_json::Value::Null,
78+
source: ruff_notebook::SourceValue::String(
79+
cell.document.contents().to_string(),
80+
),
81+
})
82+
}
83+
})
84+
.collect();
85+
86+
ruff_notebook::Notebook::from_cells(cells, self.metadata.clone())
87+
.expect("notebook should convert successfully")
88+
}
89+
90+
pub(crate) fn update(
91+
&mut self,
92+
cells: Option<lsp_types::NotebookDocumentCellChange>,
93+
metadata_change: Option<serde_json::Map<String, serde_json::Value>>,
94+
version: DocumentVersion,
95+
encoding: PositionEncoding,
96+
) -> crate::Result<()> {
97+
self.version = version;
98+
99+
if let Some(lsp_types::NotebookDocumentCellChange {
100+
structure,
101+
data,
102+
text_content,
103+
}) = cells
104+
{
105+
if let Some(structure) = structure {
106+
let start = usize::try_from(structure.array.start).unwrap();
107+
let delete = usize::try_from(structure.array.delete_count).unwrap();
108+
if delete > 0 {
109+
self.cells.drain(start..start + delete);
110+
}
111+
for cell in structure.array.cells.into_iter().flatten().rev() {
112+
self.cells
113+
.insert(start, NotebookCell::new(cell, String::new(), version));
114+
}
115+
116+
// the array has been updated - rebuild the cell index
117+
self.rebuild_cell_index();
118+
}
119+
if let Some(cell_data) = data {
120+
for cell in cell_data {
121+
if let Some(existing_cell) = self.cell_by_uri_mut(&cell.document) {
122+
existing_cell.kind = cell.kind;
123+
}
124+
}
125+
}
126+
if let Some(content_changes) = text_content {
127+
for content_change in content_changes {
128+
if let Some(cell) = self.cell_by_uri_mut(&content_change.document.uri) {
129+
cell.document
130+
.apply_changes(content_change.changes, version, encoding);
131+
}
132+
}
133+
}
134+
}
135+
if let Some(metadata_change) = metadata_change {
136+
self.metadata = serde_json::from_value(serde_json::Value::Object(metadata_change))?;
137+
}
138+
Ok(())
139+
}
140+
141+
pub(crate) fn version(&self) -> DocumentVersion {
142+
self.version
143+
}
144+
145+
pub(crate) fn cell_uri_by_index(&self, index: usize) -> Option<&lsp_types::Url> {
146+
self.cells.get(index).map(|cell| &cell.url)
147+
}
148+
149+
pub(crate) fn cell_document_by_uri(&self, uri: &lsp_types::Url) -> Option<&TextDocument> {
150+
self.cells
151+
.get(*self.cell_index.get(uri)?)
152+
.map(|cell| &cell.document)
153+
}
154+
155+
pub(crate) fn urls(&self) -> impl Iterator<Item = &lsp_types::Url> {
156+
self.cells.iter().map(|cell| &cell.url)
157+
}
158+
159+
fn cell_by_uri_mut(&mut self, uri: &lsp_types::Url) -> Option<&mut NotebookCell> {
160+
self.cells.get_mut(*self.cell_index.get(uri)?)
161+
}
162+
163+
fn rebuild_cell_index(&mut self) {
164+
self.cell_index = Self::make_cell_index(&self.cells);
165+
}
166+
167+
fn make_cell_index(cells: &[NotebookCell]) -> FxHashMap<lsp_types::Url, usize> {
168+
let mut index =
169+
HashMap::with_capacity_and_hasher(cells.len(), BuildHasherDefault::default());
170+
for (i, cell) in cells.iter().enumerate() {
171+
index.insert(cell.url.clone(), i);
172+
}
173+
index
174+
}
175+
}
176+
177+
impl NotebookCell {
178+
pub(crate) fn new(
179+
cell: lsp_types::NotebookCell,
180+
contents: String,
181+
version: DocumentVersion,
182+
) -> Self {
183+
Self {
184+
url: cell.document,
185+
kind: cell.kind,
186+
document: TextDocument::new(contents, version),
187+
}
188+
}
189+
}

0 commit comments

Comments
 (0)