Skip to content

Commit ce9e519

Browse files
authored
feat(frontend): Enhance parsing error reporting by providing location information. (risingwavelabs#8646)
1 parent 7fbab01 commit ce9e519

File tree

11 files changed

+257
-114
lines changed

11 files changed

+257
-114
lines changed

src/sqlparser/examples/parse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ fn main() {
2727
continue;
2828
}
2929

30-
let tokens = Tokenizer::new(&sql).tokenize().unwrap();
30+
let tokens = Tokenizer::new(&sql).tokenize_with_location().unwrap();
3131
println!("tokens: {:?}", tokens);
3232
let ast = Parser::parse_sql(&sql).unwrap();
3333
println!("ast: {:?}", ast);

src/sqlparser/src/ast/statement.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,7 @@ impl ParseTo for UserOptions {
710710
break;
711711
}
712712

713-
if let Token::Word(ref w) = token {
713+
if let Token::Word(ref w) = token.token {
714714
parser.next_token();
715715
let (item_mut_ref, user_option) = match w.keyword {
716716
Keyword::SUPERUSER => (&mut builder.super_user, UserOption::SuperUser),

src/sqlparser/src/parser.rs

Lines changed: 166 additions & 92 deletions
Large diffs are not rendered by default.

src/sqlparser/src/test_utils.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ where
3434
F: Fn(&mut Parser) -> T,
3535
{
3636
let mut tokenizer = Tokenizer::new(sql);
37-
let tokens = tokenizer.tokenize().unwrap();
37+
let tokens = tokenizer.tokenize_with_location().unwrap();
3838
f(&mut Parser::new(tokens))
3939
}
4040

src/sqlparser/src/tokenizer.rs

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ use alloc::{
2525
vec::Vec,
2626
};
2727
use core::fmt;
28+
use core::fmt::Debug;
2829
use core::iter::Peekable;
2930
use core::str::Chars;
3031

@@ -237,6 +238,10 @@ impl Token {
237238
},
238239
})
239240
}
241+
242+
pub fn with_location(self, location: Location) -> TokenWithLocation {
243+
TokenWithLocation::new(self, location.line, location.column)
244+
}
240245
}
241246

242247
/// A keyword (like SELECT) or an optionally quoted SQL identifier
@@ -300,6 +305,61 @@ impl fmt::Display for Whitespace {
300305
}
301306
}
302307

308+
/// Location in input string
309+
#[derive(Debug, Eq, PartialEq, Clone)]
310+
pub struct Location {
311+
/// Line number, starting from 1
312+
pub line: u64,
313+
/// Line column, starting from 1
314+
pub column: u64,
315+
}
316+
317+
/// A [Token] with [Location] attached to it
318+
#[derive(Debug, Eq, PartialEq, Clone)]
319+
pub struct TokenWithLocation {
320+
pub token: Token,
321+
pub location: Location,
322+
}
323+
324+
impl TokenWithLocation {
325+
pub fn new(token: Token, line: u64, column: u64) -> TokenWithLocation {
326+
TokenWithLocation {
327+
token,
328+
location: Location { line, column },
329+
}
330+
}
331+
332+
pub fn wrap(token: Token) -> TokenWithLocation {
333+
TokenWithLocation::new(token, 0, 0)
334+
}
335+
}
336+
337+
impl PartialEq<Token> for TokenWithLocation {
338+
fn eq(&self, other: &Token) -> bool {
339+
&self.token == other
340+
}
341+
}
342+
343+
impl PartialEq<TokenWithLocation> for Token {
344+
fn eq(&self, other: &TokenWithLocation) -> bool {
345+
self == &other.token
346+
}
347+
}
348+
349+
impl fmt::Display for TokenWithLocation {
350+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351+
if self.token == Token::EOF {
352+
write!(f, "EOF at the end")
353+
} else {
354+
write!(
355+
f,
356+
"{} at line:{}, column:{}",
357+
self.token, self.location.line, self.location.column
358+
)
359+
}
360+
}
361+
}
362+
303363
/// Tokenizer error
304364
#[derive(Debug, PartialEq)]
305365
pub struct TokenizerError {
@@ -338,11 +398,11 @@ impl<'a> Tokenizer<'a> {
338398
}
339399
}
340400

341-
/// Tokenize the statement and produce a vector of tokens
342-
pub fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
401+
/// Tokenize the statement and produce a vector of tokens with locations.
402+
pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
343403
let mut peekable = self.query.chars().peekable();
344404

345-
let mut tokens: Vec<Token> = vec![];
405+
let mut tokens: Vec<TokenWithLocation> = vec![];
346406

347407
while let Some(token) = self.next_token(&mut peekable)? {
348408
match &token {
@@ -359,11 +419,20 @@ impl<'a> Tokenizer<'a> {
359419
_ => self.col += 1,
360420
}
361421

362-
tokens.push(token);
422+
let token_with_location = TokenWithLocation::new(token, self.line, self.col);
423+
424+
tokens.push(token_with_location);
363425
}
364426
Ok(tokens)
365427
}
366428

429+
/// Tokenize the statement and produce a vector of tokens without locations.
430+
#[allow(dead_code)]
431+
fn tokenize(&mut self) -> Result<Vec<Token>, TokenizerError> {
432+
self.tokenize_with_location()
433+
.map(|v| v.into_iter().map(|t| t.token).collect())
434+
}
435+
367436
/// Get the next token or return None
368437
fn next_token(&self, chars: &mut Peekable<Chars<'_>>) -> Result<Option<Token>, TokenizerError> {
369438
match chars.peek() {

src/sqlparser/tests/sqlparser_postgres.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,31 +1021,31 @@ fn parse_array() {
10211021
assert_eq!(
10221022
parse_sql_statements(sql),
10231023
Err(ParserError::ParserError(
1024-
"syntax error at or near '['".to_string()
1024+
"syntax error at or near '[ at line:1, column:28'".to_string()
10251025
))
10261026
);
10271027

10281028
let sql = "SELECT ARRAY[ARRAY[], []]";
10291029
assert_eq!(
10301030
parse_sql_statements(sql),
10311031
Err(ParserError::ParserError(
1032-
"syntax error at or near '['".to_string()
1032+
"syntax error at or near '[ at line:1, column:24'".to_string()
10331033
))
10341034
);
10351035

10361036
let sql = "SELECT ARRAY[[1, 2], ARRAY[3, 4]]";
10371037
assert_eq!(
10381038
parse_sql_statements(sql),
10391039
Err(ParserError::ParserError(
1040-
"syntax error at or near 'ARRAY'".to_string()
1040+
"syntax error at or near 'ARRAY at line:1, column:27'".to_string()
10411041
))
10421042
);
10431043

10441044
let sql = "SELECT ARRAY[[], ARRAY[]]";
10451045
assert_eq!(
10461046
parse_sql_statements(sql),
10471047
Err(ParserError::ParserError(
1048-
"syntax error at or near 'ARRAY'".to_string()
1048+
"syntax error at or near 'ARRAY at line:1, column:23'".to_string()
10491049
))
10501050
);
10511051

src/sqlparser/tests/testdata/array.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,15 @@
77
formatted_sql: CREATE TABLE t (a INT[][][])
88
- input: CREATE TABLE t(a int[);
99
error_msg: |-
10-
sql parser error: Expected ], found: )
10+
sql parser error: Expected ], found: ) at line:1, column:23
1111
Near "CREATE TABLE t(a int["
1212
- input: CREATE TABLE t(a int[[]);
1313
error_msg: |-
14-
sql parser error: Expected ], found: [
14+
sql parser error: Expected ], found: [ at line:1, column:23
1515
Near "CREATE TABLE t(a int["
1616
- input: CREATE TABLE t(a int]);
1717
error_msg: |-
18-
sql parser error: Expected ',' or ')' after column definition, found: ]
18+
sql parser error: Expected ',' or ')' after column definition, found: ] at line:1, column:22
1919
Near "CREATE TABLE t(a int"
2020
- input: SELECT foo[0] FROM foos
2121
formatted_sql: SELECT foo[0] FROM foos

src/sqlparser/tests/testdata/create.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
formatted_sql: CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a
1818
- input: CREATE SOURCE src
1919
error_msg: |-
20-
sql parser error: Expected ROW, found: EOF
20+
sql parser error: Expected ROW, found: EOF at the end
2121
Near "CREATE SOURCE src"
2222
- input: CREATE SOURCE src ROW FORMAT JSON
2323
formatted_sql: CREATE SOURCE src ROW FORMAT JSON
@@ -44,7 +44,7 @@
4444
formatted_sql: CREATE USER user WITH SUPERUSER CREATEDB PASSWORD 'password'
4545
- input: CREATE SINK snk
4646
error_msg: |-
47-
sql parser error: Expected FROM or AS after CREATE SINK sink_name, found: EOF
47+
sql parser error: Expected FROM or AS after CREATE SINK sink_name, found: EOF at the end
4848
Near "CREATE SINK snk"
4949
- input: CREATE SINK IF NOT EXISTS snk FROM mv WITH (connector = 'mysql', mysql.endpoint = '127.0.0.1:3306', mysql.table = '<table_name>', mysql.database = '<database_name>', mysql.user = '<user_name>', mysql.password = '<password>')
5050
formatted_sql: CREATE SINK IF NOT EXISTS snk FROM mv WITH (connector = 'mysql', mysql.endpoint = '127.0.0.1:3306', mysql.table = '<table_name>', mysql.database = '<database_name>', mysql.user = '<user_name>', mysql.password = '<password>')
@@ -60,5 +60,5 @@
6060
error_msg: 'sql parser error: conflicting or redundant options'
6161
- input: create user tmp with encrypted password null
6262
error_msg: |-
63-
sql parser error: Expected literal string, found: null
63+
sql parser error: Expected literal string, found: null at line:1, column:45
6464
Near " tmp with encrypted password null"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# This file is automatically generated. See `src/sqlparser/test_runner/src/bin/apply.rs` for more information.
22
- input: INSERT public.customer (id, name, active) VALUES (1, 2, 3)
33
error_msg: |-
4-
sql parser error: Expected INTO, found: public
4+
sql parser error: Expected INTO, found: public at line:1, column:14
55
Near "INSERT"
66
- input: INSERT INTO t VALUES(1,3), (2,4) RETURNING *, a, a as aaa
77
formatted_sql: INSERT INTO t VALUES (1, 3), (2, 4) RETURNING (*, a, a AS aaa)

src/sqlparser/tests/testdata/select.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@
4343
error_msg: 'sql parser error: WITH TIES cannot be specified without ORDER BY clause'
4444
- input: select * from (select 1 from 1);
4545
error_msg: |-
46-
sql parser error: Expected identifier, found: 1
46+
sql parser error: Expected identifier, found: 1 at line:1, column:31
4747
Near "from (select 1 from 1"
4848
- input: select * from (select * from tumble(t, x, interval '10' minutes))
4949
error_msg: |-
50-
sql parser error: Expected ), found: minutes
50+
sql parser error: Expected ), found: minutes at line:1, column:62
5151
Near "(t, x, interval '10'"
5252
- input: SELECT 1, FROM t
5353
error_msg: 'sql parser error: syntax error at or near "FROM"'
@@ -74,7 +74,7 @@
7474
error_msg: 'sql parser error: precision for type float must be less than 54 bits'
7575
- input: SELECT 1::int(2)
7676
error_msg: |-
77-
sql parser error: Expected end of statement, found: (
77+
sql parser error: Expected end of statement, found: ( at line:1, column:14
7878
Near "SELECT 1::int"
7979
- input: select id1, a1, id2, a2 from stream as S join version FOR SYSTEM_TIME AS OF NOW() AS V on id1= id2
8080
formatted_sql: SELECT id1, a1, id2, a2 FROM stream AS S JOIN version FOR SYSTEM_TIME AS OF NOW() AS V ON id1 = id2

0 commit comments

Comments
 (0)