Skip to content

Commit 0d630fe

Browse files
committed
Merge remote-tracking branch 'refs/remotes/origin/main'
2 parents 8d5f33d + c100f86 commit 0d630fe

File tree

6 files changed

+78
-74
lines changed

6 files changed

+78
-74
lines changed

app/browser-rs/Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,4 @@ clippy :
2929
$(CARGO) clippy $(CARGO_ARGS)
3030

3131
test:
32-
$(CARGO) test $(CARGO_ARGS) --tests -- --test-threads=1
32+
$(CARGO) test $(CARGO_ARGS) --tests -- --test-threads=1 --nocapture

app/browser-rs/src/rendering.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,5 @@ pub fn render(html: String) {
1010
println!("===== rendering start ===== ");
1111
println!("{}", html);
1212

13-
let tokenizer = Tokenizer::new(html).tokens();
14-
println!("{:?}", tokenizer);
13+
let _tokenizer = Tokenizer::new(html);
1514
}

app/browser-rs/src/rendering/tokenizer.rs

+44-60
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
44
use alloc::string::String;
55
use alloc::vec::Vec;
6+
use core::iter::Iterator;
7+
#[allow(unused_imports)]
8+
use liumlib::*;
69

710
#[allow(dead_code)]
811
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -16,95 +19,76 @@ pub enum State {
1619

1720
#[allow(dead_code)]
1821
#[derive(Debug, Clone, PartialEq, Eq)]
19-
pub enum TokenType {
22+
pub enum Token {
2023
Doctype,
21-
StartTag,
22-
EndTag,
23-
Char,
24+
StartTag { tag: String, self_closing: bool },
25+
EndTag { tag: String, self_closing: bool },
26+
Char(char),
2427
Eof,
2528
}
2629

27-
#[allow(dead_code)]
2830
#[derive(Debug, Clone, PartialEq, Eq)]
29-
pub struct Token {
30-
token_type: TokenType,
31-
tag: String,
32-
self_closing: bool,
33-
data: String,
34-
}
35-
36-
impl Token {
37-
pub fn new(token_type: TokenType, tag: String, self_closing: bool, data: String) -> Self {
38-
Self {
39-
token_type,
40-
tag,
41-
self_closing,
42-
data,
43-
}
44-
}
45-
}
46-
47-
#[derive(Debug)]
4831
pub struct Tokenizer {
4932
state: State,
5033
pos: usize,
51-
length: usize,
52-
html: String,
53-
tokens: Vec<Token>,
34+
input: Vec<char>,
5435
}
5536

5637
impl Tokenizer {
5738
pub fn new(html: String) -> Self {
5839
Self {
5940
state: State::Data,
6041
pos: 0,
61-
length: html.len(),
62-
html,
63-
tokens: Vec::new(),
42+
input: html.chars().collect(),
6443
}
6544
}
6645

67-
pub fn tokens(&mut self) -> Vec<Token> {
46+
fn switch_to(&mut self, s: State) {
47+
self.state = s;
48+
}
49+
50+
/// Consume the next input character.
51+
fn consume_next_input(&mut self) -> char {
52+
let c = self.input[self.pos];
53+
self.pos += 1;
54+
c
55+
}
56+
}
57+
58+
impl Iterator for Tokenizer {
59+
type Item = Token;
60+
61+
fn next(&mut self) -> Option<Self::Item> {
62+
if self.pos >= self.input.len() {
63+
return None;
64+
}
65+
6866
loop {
69-
// https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
67+
let c = self.consume_next_input();
7068

7169
match self.state {
7270
State::Data => {
73-
if self.html[self.pos..].starts_with("<") {
74-
self.state = State::TagOpen;
71+
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
72+
if c == '<' {
73+
self.switch_to(State::TagOpen);
7574
continue;
7675
}
77-
if self.pos == self.length {
78-
self.append_eof();
79-
return self.tokens.clone();
76+
77+
if self.pos > self.input.len() {
78+
return Some(Token::Eof);
8079
}
81-
self.append_char();
82-
break;
80+
81+
return Some(Token::Char(c));
82+
}
83+
State::TagOpen => {
84+
// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
8385
}
8486
_ => {}
8587
}
8688

87-
self.pos += 1;
89+
if self.pos == self.input.len() {
90+
return Some(Token::Eof);
91+
}
8892
}
89-
90-
self.tokens.clone()
91-
}
92-
93-
pub fn append_eof(&mut self) {
94-
self.tokens.push(Token::new(
95-
TokenType::Eof,
96-
String::new(),
97-
false,
98-
String::new(),
99-
));
100-
}
101-
102-
fn append_char(&mut self) {
103-
self.tokens.push(Token::new(
104-
TokenType::Char,
105-
String::new(),
106-
false,
107-
String::new(),
108-
));
10993
}
11094
}

app/browser-rs/tests/tokenizer.rs

+30-9
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,34 @@ fn main() {
4747
#[test_case]
4848
fn no_input() {
4949
let mut t = Tokenizer::new(String::new());
50-
let mut tokens = Vec::new();
51-
tokens.push(Token::new(
52-
TokenType::Eof,
53-
String::new(),
54-
false,
55-
String::new(),
56-
));
57-
58-
assert_eq!(t.tokens(), tokens);
50+
assert_eq!(t.next(), None);
5951
}
52+
53+
#[test_case]
54+
fn chars() {
55+
let mut t = Tokenizer::new(String::from("foo"));
56+
57+
let mut expected = Vec::new();
58+
expected.push(Token::Char('f'));
59+
expected.push(Token::Char('o'));
60+
expected.push(Token::Char('o'));
61+
62+
for e in expected {
63+
assert_eq!(t.next().expect("toknizer should have a next Token"), e);
64+
}
65+
}
66+
67+
/*
68+
#[test_case]
69+
fn body() {
70+
let mut t = Tokenizer::new(String::from("<body></body>"));
71+
72+
let mut expected = Vec::new();
73+
expected.push(Token::StartTag({ tag: String::from("body"), self_closing: false }));
74+
expected.push(Token::EndTag({ tag: String::from("body"), self_closing: false }));
75+
76+
for e in expected {
77+
assert_eq!(t.next().expect("toknizer should have a next Token"), e);
78+
}
79+
}
80+
*/

app/browser/tokenize.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ void Tokenize(char *html) {
6161
}
6262

6363
switch (state) {
64-
case DATA:
64+
case DATA:
6565
// https://html.spec.whatwg.org/multipage/parsing.html#data-state
6666
if (*html == '<') {
6767
// U+003C LESS-THAN SIGN (<)

app/liumlib/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ trait MutableAllocator {
281281
fn dealloc(&mut self, _ptr: *mut u8, _layout: Layout);
282282
}
283283

284-
const ALLOCATOR_BUF_SIZE: usize = 0x2000;
284+
const ALLOCATOR_BUF_SIZE: usize = 0x100000;
285285
pub struct WaterMarkAllocator {
286286
buf: [u8; ALLOCATOR_BUF_SIZE],
287287
used_bytes: usize,

0 commit comments

Comments
 (0)