Skip to content

Commit 9fef15e

Browse files
authored
Chapter 4: Scanning (#1)
This implements the [token scanning](http://craftinginterpreters.com/scanning.html). I tried to stay as close to the original Java implementation as possible while still being "rusty". Differences to the Java implementation: * There is no `null` check and a few `if-else` blocks became `unwrap_or'('\0')`. * Literals are not of type `Object` but use `String` and `f64`. * `is_digit` and `is_alpha` are much simpler in Rust. * The `KEYWORDS` map is not only static but create at compile time thanks to [phf](https://crates.io/crates/phf). Possible improvements in Rust: * One could use a `PeekableIterator` for `Chars`. However, this would diverged from the original scanner quite a bit. * One could strike for zero copy for lexemes. To keep it simple I stuck with `String`. * The strangest part is creating a substring with ``` self.source .get(self.start..self.current).expect("Source token is empty."); ``` I was afraid that changing it would make it harder to relate to the original chapter. * The scanner could be less stateful and avoid `self.current` and `self.start`.
1 parent edcd88e commit 9fef15e

File tree

9 files changed

+407
-3
lines changed

9 files changed

+407
-3
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
66
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
7-
Cargo.lock
7+
# Cargo.lock
88

99
# These are backup files generated by rustfmt
1010
**/*.rs.bk

.travis.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
language: rust
2+
rust:
3+
- stable
4+
- beta
5+
- nightly

Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[package]
2+
name = "lox-rs"
3+
version = "0.1.0"
4+
authors = ["Karsten Jeschkies <[email protected]>"]
5+
edition = "2018"
6+
build = "build.rs"
7+
8+
[dependencies]
9+
phf = "0.7.24"
10+
11+
[build-dependencies]
12+
phf_codegen = "0.7.24"

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1-
# lox-rs
2-
A Lox Interpreter in Rust
1+
# Lox-rs [![Build Status](https://travis-ci.com/jeschkies/lox-rs.svg?branch=master)](https://travis-ci.com/jeschkies/lox-rs)
2+
3+
A [Lox](http://craftinginterpreters.com/the-lox-language.html) Interpreter in Rust based on the
4+
[Crafting Interpreters](http://craftinginterpreters.com) book.
5+
6+
Each commit corresponds to one chapter in the book.

build.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
extern crate phf_codegen;
2+
3+
use std::env;
4+
use std::fs::File;
5+
use std::io::{BufWriter, Write};
6+
use std::path::Path;
7+
8+
fn main() {
9+
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("keywords.rs");
10+
let mut file = BufWriter::new(File::create(&path).unwrap());
11+
12+
write!(
13+
&mut file,
14+
"pub static KEYWORDS: phf::Map<&'static str, TokenType> =
15+
"
16+
)
17+
.unwrap();
18+
phf_codegen::Map::new()
19+
.entry("and", "TokenType::And")
20+
.entry("class", "TokenType::Class")
21+
.entry("else", "TokenType::Else")
22+
.entry("false", "TokenType::False")
23+
.entry("fun", "TokenType::Fun")
24+
.entry("for", "TokenType::For")
25+
.entry("if", "TokenType::If")
26+
.entry("nil", "TokenType::Nil")
27+
.entry("or", "TokenType::Or")
28+
.entry("print", "TokenType::Print")
29+
.entry("return", "TokenType::Return")
30+
.entry("super", "TokenType::Super")
31+
.entry("this", "TokenType::This")
32+
.entry("true", "TokenType::True")
33+
.entry("var", "TokenType::Var")
34+
.entry("while", "TokenType::While")
35+
.build(&mut file)
36+
.unwrap();
37+
write!(&mut file, ";\n").unwrap();
38+
}

src/error.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
pub fn error(line: i32, message: &str) {
2+
report(line, "", message);
3+
}
4+
5+
pub fn report(line: i32, where_: &str, message: &str) {
6+
eprintln!("[line {}] Error{}: {}", line, where_, message);
7+
// had_error = true; TODO: Use custom Error type
8+
}

src/main.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
mod error;
2+
mod scanner;
3+
mod token;
4+
5+
use std::io::{self, BufRead};
6+
use std::process::exit;
7+
use std::{env, fs};
8+
9+
use scanner::Scanner;
10+
11+
fn main() -> Result<(), Box<dyn std::error::Error + 'static>> {
12+
let args: Vec<String> = env::args().collect();
13+
match args.as_slice() {
14+
[_, file] => run_file(file)?,
15+
[_] => run_prompt()?,
16+
_ => {
17+
eprintln!("Usage: lox-rs [script]");
18+
exit(64)
19+
}
20+
}
21+
Ok(())
22+
}
23+
24+
fn run_file(path: &str) -> io::Result<()> {
25+
let source = fs::read_to_string(path)?;
26+
run(source)
27+
}
28+
29+
fn run_prompt() -> io::Result<()> {
30+
let stdin = io::stdin();
31+
for line in stdin.lock().lines() {
32+
run(line?); // Ignore error.
33+
print!("> ");
34+
}
35+
Ok(())
36+
}
37+
38+
fn run(source: String) -> io::Result<()> {
39+
let mut scanner = Scanner::new(source);
40+
let tokens = scanner.scan_tokens();
41+
42+
for token in tokens {
43+
println!("{}", token);
44+
}
45+
Ok(())
46+
}

src/scanner.rs

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
use crate::error::error;
2+
use crate::token::{Token, TokenType, KEYWORDS};
3+
4+
pub struct Scanner {
5+
source: String,
6+
tokens: Vec<Token>,
7+
start: usize,
8+
current: usize,
9+
line: i32,
10+
}
11+
12+
impl Scanner {
13+
pub fn new(source: String) -> Self {
14+
Self {
15+
source,
16+
tokens: Vec::new(),
17+
start: 0,
18+
current: 0,
19+
line: 1,
20+
}
21+
}
22+
23+
pub fn scan_tokens(&mut self) -> &Vec<Token> {
24+
while !self.is_at_end() {
25+
self.start = self.current;
26+
self.scan_token();
27+
}
28+
29+
self.tokens.push(Token::new(TokenType::EOF, "", self.line));
30+
&self.tokens
31+
}
32+
33+
fn scan_token(&mut self) {
34+
let c: char = self.advance();
35+
match c {
36+
'(' => self.add_token(TokenType::LeftParen),
37+
')' => self.add_token(TokenType::RightParen),
38+
'{' => self.add_token(TokenType::LeftBrace),
39+
'}' => self.add_token(TokenType::RightBrace),
40+
',' => self.add_token(TokenType::Comma),
41+
'.' => self.add_token(TokenType::Dot),
42+
'-' => self.add_token(TokenType::Minus),
43+
'+' => self.add_token(TokenType::Plus),
44+
';' => self.add_token(TokenType::Semicolon),
45+
'*' => self.add_token(TokenType::Star),
46+
'!' => {
47+
if self.r#match('=') {
48+
self.add_token(TokenType::BangEqual)
49+
} else {
50+
self.add_token(TokenType::Bang)
51+
}
52+
}
53+
'=' => {
54+
if self.r#match('=') {
55+
self.add_token(TokenType::EqualEqual)
56+
} else {
57+
self.add_token(TokenType::Equal)
58+
}
59+
}
60+
'<' => {
61+
if self.r#match('=') {
62+
self.add_token(TokenType::LessEqual)
63+
} else {
64+
self.add_token(TokenType::Less)
65+
}
66+
}
67+
'>' => {
68+
if self.r#match('=') {
69+
self.add_token(TokenType::GreaterEqual)
70+
} else {
71+
self.add_token(TokenType::Greater)
72+
}
73+
}
74+
'/' => {
75+
if self.r#match('/') {
76+
// A comment goes until the end of the line.
77+
while self.peek() != '\n' && !self.is_at_end() {
78+
self.advance();
79+
}
80+
} else {
81+
self.add_token(TokenType::Slash)
82+
}
83+
}
84+
' ' | '\r' | '\t' => (), // Ignore whitespace
85+
'\n' => self.line += 1,
86+
'"' => self.string(),
87+
c => {
88+
if c.is_digit(10) {
89+
self.number()
90+
} else if c.is_alphabetic() || c == '_' {
91+
self.identifier()
92+
} else {
93+
error(self.line, "Unexpected character.")
94+
}
95+
}
96+
}
97+
}
98+
99+
fn identifier(&mut self) {
100+
while self.peek().is_alphanumeric() || self.peek() == '_' {
101+
self.advance();
102+
}
103+
104+
// See if the identifier is a reserved word.
105+
let text = self
106+
.source
107+
.get(self.start..self.current)
108+
.expect("Unexpected end.");
109+
110+
let tpe: TokenType = KEYWORDS.get(text).cloned().unwrap_or(TokenType::Identifier);
111+
self.add_token(tpe);
112+
}
113+
114+
fn number(&mut self) {
115+
while self.peek().is_digit(10) {
116+
self.advance();
117+
}
118+
119+
// Look for a fractional part.
120+
if self.peek() == '.' && self.peek_next().is_digit(10) {
121+
// Consumer the ".".
122+
self.advance();
123+
124+
while self.peek().is_digit(10) {
125+
self.advance();
126+
}
127+
}
128+
129+
let n: f64 = self
130+
.source
131+
.get(self.start..self.current)
132+
.expect("Unexpected end.")
133+
.parse()
134+
.expect("Scanned number could not be parsed.");
135+
self.add_token(TokenType::Number { literal: n })
136+
}
137+
138+
fn string(&mut self) {
139+
while self.peek() != '"' && !self.is_at_end() {
140+
if self.peek() == '\n' {
141+
self.line += 1;
142+
}
143+
self.advance();
144+
}
145+
146+
// Unterminated string.
147+
if self.is_at_end() {
148+
error(self.line, "Unterminated string.");
149+
}
150+
151+
// The closing ".
152+
self.advance();
153+
154+
// Trim the surrounding quotes.
155+
let literal = self
156+
.source
157+
.get((self.start + 1)..(self.current - 1))
158+
.expect("Unexpected end.")
159+
.to_string();
160+
self.add_token(TokenType::String { literal });
161+
}
162+
163+
fn r#match(&mut self, expected: char) -> bool {
164+
if self.is_at_end() {
165+
return false;
166+
}
167+
// TODO: !self.source.get(self.current..self.current).contains(expected)
168+
if self
169+
.source
170+
.chars()
171+
.nth(self.current)
172+
.expect("Unexpected end of source.")
173+
!= expected
174+
{
175+
return false;
176+
}
177+
178+
self.current += 1;
179+
true
180+
}
181+
182+
fn peek(&self) -> char {
183+
self.source.chars().nth(self.current).unwrap_or('\0')
184+
}
185+
186+
fn peek_next(&self) -> char {
187+
self.source.chars().nth(self.current + 1).unwrap_or('\0')
188+
}
189+
190+
fn is_at_end(&self) -> bool {
191+
self.current >= self.source.len()
192+
}
193+
194+
fn advance(&mut self) -> char {
195+
self.current += 1;
196+
// TODO: work on &str directly.
197+
let char_vec: Vec<char> = self.source.chars().collect();
198+
char_vec[self.current - 1]
199+
}
200+
201+
fn add_token(&mut self, tpe: TokenType) {
202+
let text = self
203+
.source
204+
.get(self.start..self.current)
205+
.expect("Source token is empty.");
206+
self.tokens.push(Token::new(tpe, text, self.line))
207+
}
208+
}

0 commit comments

Comments
 (0)