Skip to content

Commit ce59f2e

Browse files
committed
parser: fix a bug that &#0x...; is accepted as a character reference
1 parent a09646d commit ce59f2e

File tree

2 files changed

+13
-3
lines changed

2 files changed

+13
-3
lines changed

lib/rexml/parsers/baseparser.rb

+7-3
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ module Private
150150
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
151151
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
152152
CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/
153-
CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
153+
CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/
154154
DEFAULT_ENTITIES_PATTERNS = {}
155155
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
156156
default_entities.each do |term|
@@ -570,8 +570,12 @@ def unnormalize( string, entities=nil, filter=nil )
570570
return rv if matches.size == 0
571571
rv.gsub!( Private::CHARACTER_REFERENCES ) {
572572
m=$1
573-
m = "0#{m}" if m[0] == ?x
574-
[Integer(m)].pack('U*')
573+
if m.start_with?("x")
574+
code_point = Integer(m[1..-1], 16)
575+
else
576+
code_point = Integer(m, 10)
577+
end
578+
[code_point].pack('U*')
575579
}
576580
matches.collect!{|x|x[0]}.compact!
577581
if filter

test/parse/test_character_reference.rb

+6
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,11 @@ def test_linear_performance_many_preceding_zeros
1313
REXML::Document.new('<test testing="&#' + "0" * n + '97;"/>')
1414
end
1515
end
16+
17+
def test_hex_precedding_zero
18+
parser = REXML::Parsers::PullParser.new("<root>&#x61;&#0x61;</root>")
19+
parser.pull # :start_element
20+
assert_equal("a&#0x61;", parser.pull[1]) # :text
21+
end
1622
end
1723
end

0 commit comments

Comments
 (0)