Skip to content

Commit 2fe62e2

Browse files
koumame
authored andcommitted
Fix a bug that invalid notation declaration may be accepted
HackerOne: HO-1104077 It's caused by quote character. Reported by Juho Nurminen. Thanks!!!
1 parent a659c63 commit 2fe62e2

File tree

2 files changed

+234
-6
lines changed

2 files changed

+234
-6
lines changed

lib/rexml/parsers/baseparser.rb

+53-6
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,6 @@ class BaseParser
8383
ATTDEF_RE = /#{ATTDEF}/
8484
ATTLISTDECL_START = /\A\s*<!ATTLIST/um
8585
ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
86-
NOTATIONDECL_START = /\A\s*<!NOTATION/um
87-
PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um
88-
SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um
8986

9087
TEXT_PATTERN = /\A([^<]*)/um
9188

@@ -103,6 +100,10 @@ class BaseParser
103100
GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
104101
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
105102

103+
NOTATIONDECL_START = /\A\s*<!NOTATION/um
104+
PUBLIC = /\A\s*<!NOTATION\s+#{NAME}\s+(PUBLIC)\s+#{PUBIDLITERAL}(?:\s+#{SYSTEMLITERAL})?\s*>/um
105+
SYSTEM = /\A\s*<!NOTATION\s+#{NAME}\s+(SYSTEM)\s+#{SYSTEMLITERAL}\s*>/um
106+
106107
EREFERENCE = /&(?!#{NAME};)/
107108

108109
DEFAULT_ENTITIES = {
@@ -315,12 +316,22 @@ def pull_event
315316
md = nil
316317
if @source.match( PUBLIC )
317318
md = @source.match( PUBLIC, true )
318-
vals = [md[1],md[2],md[4],md[6]]
319+
pubid = system = nil
320+
pubid_literal = md[3]
321+
pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
322+
system_literal = md[4]
323+
system = system_literal[1..-2] if system_literal # Remove quote
324+
vals = [md[1], md[2], pubid, system]
319325
elsif @source.match( SYSTEM )
320326
md = @source.match( SYSTEM, true )
321-
vals = [md[1],md[2],nil,md[4]]
327+
system = nil
328+
system_literal = md[3]
329+
system = system_literal[1..-2] if system_literal # Remove quote
330+
vals = [md[1], md[2], nil, system]
322331
else
323-
raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
332+
details = notation_decl_invalid_details
333+
message = "Malformed notation declaration: #{details}"
334+
raise REXML::ParseException.new(message, @source)
324335
end
325336
return [ :notationdecl, *vals ]
326337
when DOCTYPE_END
@@ -569,6 +580,42 @@ def parse_attributes(prefixes, curr_ns)
569580
end
570581
return attributes, closed
571582
end
583+
584+
def notation_decl_invalid_details
585+
name = /#{NOTATIONDECL_START}\s+#{NAME}/um
586+
public = /#{name}\s+PUBLIC/um
587+
system = /#{name}\s+SYSTEM/um
588+
if @source.match(/#{NOTATIONDECL_START}\s*>/um)
589+
return "name is missing"
590+
elsif not @source.match(/#{name}[\s>]/um)
591+
return "invalid name"
592+
elsif @source.match(/#{name}\s*>/um)
593+
return "ID type is missing"
594+
elsif not @source.match(/#{name}\s+(?:PUBLIC|SYSTEM)[\s>]/um)
595+
return "invalid ID type"
596+
elsif @source.match(/#{public}/um)
597+
if @source.match(/#{public}\s*>/um)
598+
return "public ID literal is missing"
599+
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
600+
return "invalid public ID literal"
601+
elsif @source.match(/#{public}\s+#{PUBIDLITERAL}[^\s>]/um)
602+
return "garbage after public ID literal"
603+
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
604+
return "invalid system literal"
605+
elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*>/um)
606+
return "garbage after system literal"
607+
end
608+
elsif @source.match(/#{system}/um)
609+
if @source.match(/#{system}\s*>/um)
610+
return "system literal is missing"
611+
elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
612+
return "invalid system literal"
613+
elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}\s*>/um)
614+
return "garbage after system literal"
615+
end
616+
end
617+
"end > is missing"
618+
end
572619
end
573620
end
574621
end

test/parse/test_notation_declaration.rb

+181
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,100 @@ def test_name
2323
doctype = parse("<!NOTATION name PUBLIC 'urn:public-id'>")
2424
assert_equal("name", doctype.notation("name").name)
2525
end
26+
27+
def test_no_name
28+
exception = assert_raise(REXML::ParseException) do
29+
parse(<<-INTERNAL_SUBSET)
30+
<!NOTATION>
31+
INTERNAL_SUBSET
32+
end
33+
assert_equal(<<-DETAIL.chomp, exception.to_s)
34+
Malformed notation declaration: name is missing
35+
Line: 5
36+
Position: 72
37+
Last 80 unconsumed characters:
38+
<!NOTATION> ]> <r/>
39+
DETAIL
40+
end
41+
42+
def test_invalid_name
43+
exception = assert_raise(REXML::ParseException) do
44+
parse(<<-INTERNAL_SUBSET)
45+
<!NOTATION '>
46+
INTERNAL_SUBSET
47+
end
48+
assert_equal(<<-DETAIL.chomp, exception.to_s)
49+
Malformed notation declaration: invalid name
50+
Line: 5
51+
Position: 74
52+
Last 80 unconsumed characters:
53+
<!NOTATION '> ]> <r/>
54+
DETAIL
55+
end
56+
57+
def test_no_id_type
58+
exception = assert_raise(REXML::ParseException) do
59+
parse(<<-INTERNAL_SUBSET)
60+
<!NOTATION name>
61+
INTERNAL_SUBSET
62+
end
63+
assert_equal(<<-DETAIL.chomp, exception.to_s)
64+
Malformed notation declaration: ID type is missing
65+
Line: 5
66+
Position: 77
67+
Last 80 unconsumed characters:
68+
<!NOTATION name> ]> <r/>
69+
DETAIL
70+
end
71+
72+
def test_invalid_id_type
73+
exception = assert_raise(REXML::ParseException) do
74+
parse(<<-INTERNAL_SUBSET)
75+
<!NOTATION name INVALID>
76+
INTERNAL_SUBSET
77+
end
78+
assert_equal(<<-DETAIL.chomp, exception.to_s)
79+
Malformed notation declaration: invalid ID type
80+
Line: 5
81+
Position: 85
82+
Last 80 unconsumed characters:
83+
<!NOTATION name INVALID> ]> <r/>
84+
DETAIL
85+
end
2686
end
2787

2888
class TestExternalID < self
2989
class TestSystem < self
90+
def test_no_literal
91+
exception = assert_raise(REXML::ParseException) do
92+
parse(<<-INTERNAL_SUBSET)
93+
<!NOTATION name SYSTEM>
94+
INTERNAL_SUBSET
95+
end
96+
assert_equal(<<-DETAIL.chomp, exception.to_s)
97+
Malformed notation declaration: system literal is missing
98+
Line: 5
99+
Position: 84
100+
Last 80 unconsumed characters:
101+
<!NOTATION name SYSTEM> ]> <r/>
102+
DETAIL
103+
end
104+
105+
def test_garbage_after_literal
106+
exception = assert_raise(REXML::ParseException) do
107+
parse(<<-INTERNAL_SUBSET)
108+
<!NOTATION name SYSTEM 'system-literal'x'>
109+
INTERNAL_SUBSET
110+
end
111+
assert_equal(<<-DETAIL.chomp, exception.to_s)
112+
Malformed notation declaration: garbage after system literal
113+
Line: 5
114+
Position: 103
115+
Last 80 unconsumed characters:
116+
<!NOTATION name SYSTEM 'system-literal'x'> ]> <r/>
117+
DETAIL
118+
end
119+
30120
def test_single_quote
31121
doctype = parse(<<-INTERNAL_SUBSET)
32122
<!NOTATION name SYSTEM 'system-literal'>
@@ -44,6 +134,21 @@ def test_double_quote
44134

45135
class TestPublic < self
46136
class TestPublicIDLiteral < self
137+
def test_content_double_quote
138+
exception = assert_raise(REXML::ParseException) do
139+
parse(<<-INTERNAL_SUBSET)
140+
<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal">
141+
INTERNAL_SUBSET
142+
end
143+
assert_equal(<<-DETAIL.chomp, exception.to_s)
144+
Malformed notation declaration: invalid public ID literal
145+
Line: 5
146+
Position: 129
147+
Last 80 unconsumed characters:
148+
<!NOTATION name PUBLIC 'double quote " is invalid' "system-literal"> ]> <r/>
149+
DETAIL
150+
end
151+
47152
def test_single_quote
48153
doctype = parse(<<-INTERNAL_SUBSET)
49154
<!NOTATION name PUBLIC 'public-id-literal' "system-literal">
@@ -60,6 +165,21 @@ def test_double_quote
60165
end
61166

62167
class TestSystemLiteral < self
168+
def test_garbage_after_literal
169+
exception = assert_raise(REXML::ParseException) do
170+
parse(<<-INTERNAL_SUBSET)
171+
<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'>
172+
INTERNAL_SUBSET
173+
end
174+
assert_equal(<<-DETAIL.chomp, exception.to_s)
175+
Malformed notation declaration: garbage after system literal
176+
Line: 5
177+
Position: 123
178+
Last 80 unconsumed characters:
179+
<!NOTATION name PUBLIC 'public-id-literal' 'system-literal'x'> ]> <r/>
180+
DETAIL
181+
end
182+
63183
def test_single_quote
64184
doctype = parse(<<-INTERNAL_SUBSET)
65185
<!NOTATION name PUBLIC "public-id-literal" 'system-literal'>
@@ -96,5 +216,66 @@ def test_public_system
96216
end
97217
end
98218
end
219+
220+
class TestPublicID < self
221+
def test_no_literal
222+
exception = assert_raise(REXML::ParseException) do
223+
parse(<<-INTERNAL_SUBSET)
224+
<!NOTATION name PUBLIC>
225+
INTERNAL_SUBSET
226+
end
227+
assert_equal(<<-DETAIL.chomp, exception.to_s)
228+
Malformed notation declaration: public ID literal is missing
229+
Line: 5
230+
Position: 84
231+
Last 80 unconsumed characters:
232+
<!NOTATION name PUBLIC> ]> <r/>
233+
DETAIL
234+
end
235+
236+
def test_literal_content_double_quote
237+
exception = assert_raise(REXML::ParseException) do
238+
parse(<<-INTERNAL_SUBSET)
239+
<!NOTATION name PUBLIC 'double quote " is invalid in PubidLiteral'>
240+
INTERNAL_SUBSET
241+
end
242+
assert_equal(<<-DETAIL.chomp, exception.to_s)
243+
Malformed notation declaration: invalid public ID literal
244+
Line: 5
245+
Position: 128
246+
Last 80 unconsumed characters:
247+
<!NOTATION name PUBLIC 'double quote \" is invalid in PubidLiteral'> ]> <r/>
248+
DETAIL
249+
end
250+
251+
def test_garbage_after_literal
252+
exception = assert_raise(REXML::ParseException) do
253+
parse(<<-INTERNAL_SUBSET)
254+
<!NOTATION name PUBLIC 'public-id-literal'x'>
255+
INTERNAL_SUBSET
256+
end
257+
assert_equal(<<-DETAIL.chomp, exception.to_s)
258+
Malformed notation declaration: garbage after public ID literal
259+
Line: 5
260+
Position: 106
261+
Last 80 unconsumed characters:
262+
<!NOTATION name PUBLIC 'public-id-literal'x'> ]> <r/>
263+
DETAIL
264+
end
265+
266+
def test_literal_single_quote
267+
doctype = parse(<<-INTERNAL_SUBSET)
268+
<!NOTATION name PUBLIC 'public-id-literal'>
269+
INTERNAL_SUBSET
270+
assert_equal("public-id-literal", doctype.notation("name").public)
271+
end
272+
273+
def test_literal_double_quote
274+
doctype = parse(<<-INTERNAL_SUBSET)
275+
<!NOTATION name PUBLIC "public-id-literal">
276+
INTERNAL_SUBSET
277+
assert_equal("public-id-literal", doctype.notation("name").public)
278+
end
279+
end
99280
end
100281
end

0 commit comments

Comments
 (0)