3
3
4
4
use alloc:: string:: String ;
5
5
use alloc:: vec:: Vec ;
6
+ use core:: iter:: Iterator ;
7
+ #[ allow( unused_imports) ]
8
+ use liumlib:: * ;
6
9
7
10
#[ allow( dead_code) ]
8
11
#[ derive( Debug , Clone , PartialEq , Eq ) ]
@@ -16,95 +19,76 @@ pub enum State {
16
19
17
20
#[ allow( dead_code) ]
18
21
#[ derive( Debug , Clone , PartialEq , Eq ) ]
19
- pub enum TokenType {
22
+ pub enum Token {
20
23
Doctype ,
21
- StartTag ,
22
- EndTag ,
23
- Char ,
24
+ StartTag { tag : String , self_closing : bool } ,
25
+ EndTag { tag : String , self_closing : bool } ,
26
+ Char ( char ) ,
24
27
Eof ,
25
28
}
26
29
27
- #[ allow( dead_code) ]
28
30
#[ derive( Debug , Clone , PartialEq , Eq ) ]
29
- pub struct Token {
30
- token_type : TokenType ,
31
- tag : String ,
32
- self_closing : bool ,
33
- data : String ,
34
- }
35
-
36
- impl Token {
37
- pub fn new ( token_type : TokenType , tag : String , self_closing : bool , data : String ) -> Self {
38
- Self {
39
- token_type,
40
- tag,
41
- self_closing,
42
- data,
43
- }
44
- }
45
- }
46
-
47
- #[ derive( Debug ) ]
48
31
pub struct Tokenizer {
49
32
state : State ,
50
33
pos : usize ,
51
- length : usize ,
52
- html : String ,
53
- tokens : Vec < Token > ,
34
+ input : Vec < char > ,
54
35
}
55
36
56
37
impl Tokenizer {
57
38
pub fn new ( html : String ) -> Self {
58
39
Self {
59
40
state : State :: Data ,
60
41
pos : 0 ,
61
- length : html. len ( ) ,
62
- html,
63
- tokens : Vec :: new ( ) ,
42
+ input : html. chars ( ) . collect ( ) ,
64
43
}
65
44
}
66
45
67
- pub fn tokens ( & mut self ) -> Vec < Token > {
46
+ fn switch_to ( & mut self , s : State ) {
47
+ self . state = s;
48
+ }
49
+
50
+ /// Consume the next input character.
51
+ fn consume_next_input ( & mut self ) -> char {
52
+ let c = self . input [ self . pos ] ;
53
+ self . pos += 1 ;
54
+ c
55
+ }
56
+ }
57
+
58
+ impl Iterator for Tokenizer {
59
+ type Item = Token ;
60
+
61
+ fn next ( & mut self ) -> Option < Self :: Item > {
62
+ if self . pos >= self . input . len ( ) {
63
+ return None ;
64
+ }
65
+
68
66
loop {
69
- // https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
67
+ let c = self . consume_next_input ( ) ;
70
68
71
69
match self . state {
72
70
State :: Data => {
73
- if self . html [ self . pos ..] . starts_with ( "<" ) {
74
- self . state = State :: TagOpen ;
71
+ // https://html.spec.whatwg.org/multipage/parsing.html#data-state
72
+ if c == '<' {
73
+ self . switch_to ( State :: TagOpen ) ;
75
74
continue ;
76
75
}
77
- if self . pos == self . length {
78
- self . append_eof ( ) ;
79
- return self . tokens . clone ( ) ;
76
+
77
+ if self . pos > self . input . len ( ) {
78
+ return Some ( Token :: Eof ) ;
80
79
}
81
- self . append_char ( ) ;
82
- break ;
80
+
81
+ return Some ( Token :: Char ( c) ) ;
82
+ }
83
+ State :: TagOpen => {
84
+ // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
83
85
}
84
86
_ => { }
85
87
}
86
88
87
- self . pos += 1 ;
89
+ if self . pos == self . input . len ( ) {
90
+ return Some ( Token :: Eof ) ;
91
+ }
88
92
}
89
-
90
- self . tokens . clone ( )
91
- }
92
-
93
- pub fn append_eof ( & mut self ) {
94
- self . tokens . push ( Token :: new (
95
- TokenType :: Eof ,
96
- String :: new ( ) ,
97
- false ,
98
- String :: new ( ) ,
99
- ) ) ;
100
- }
101
-
102
- fn append_char ( & mut self ) {
103
- self . tokens . push ( Token :: new (
104
- TokenType :: Char ,
105
- String :: new ( ) ,
106
- false ,
107
- String :: new ( ) ,
108
- ) ) ;
109
93
}
110
94
}
0 commit comments