Skip to content

Commit a6e0a1a

Browse files
committed
fix: replace slow regex attribute check with crass parser
1 parent ea853aa commit a6e0a1a

File tree

3 files changed

+33
-18
lines changed

3 files changed

+33
-18
lines changed

lib/loofah/html5/scrub.rb

+26-1
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,11 @@ def scrub_attributes(node)
5151
end
5252
end
5353
end
54+
5455
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55-
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
56+
scrub_attribute_that_allows_local_ref(attr_node)
5657
end
58+
5759
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
5860
attr_node.remove
5961
next
@@ -127,6 +129,29 @@ def scrub_css(style)
127129
Crass::Parser.stringify(sanitized_tree)
128130
end
129131

132+
def scrub_attribute_that_allows_local_ref(attr_node)
133+
return unless attr_node.value
134+
135+
nodes = Crass::Parser.new(attr_node.value).parse_component_values
136+
137+
values = nodes.map do |node|
138+
case node[:node]
139+
when :url
140+
if node[:value].start_with?("#")
141+
node[:raw]
142+
else
143+
nil
144+
end
145+
when :hash, :ident, :string
146+
node[:raw]
147+
else
148+
nil
149+
end
150+
end.compact
151+
152+
attr_node.value = values.join(" ")
153+
end
154+
130155
#
131156
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
132157
#

test/assets/testdata_sanitizer_tests1.dat

+3-3
Original file line numberDiff line numberDiff line change
@@ -463,9 +463,9 @@
463463
{
464464
"name": "absolute_uri_refs_in_svg_attributes",
465465
"input": "<rect fill='url(http://bad.com/) #fff' />",
466-
"rexml": "<rect fill=' #fff'></rect>",
467-
"xhtml": "<rect fill=' #fff'></rect>",
468-
"output": "<rect fill=' #fff'/>"
466+
"rexml": "<rect fill='#fff'></rect>",
467+
"xhtml": "<rect fill='#fff'></rect>",
468+
"output": "<rect fill='#fff'/>"
469469
},
470470

471471
{

test/html5/test_sanitizer.rb

+4-14
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,15 @@ def test_figure_element_is_valid
267267

268268
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
269269
HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
270-
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
270+
define_method "test_allow_uri_refs_in_svg_attribute_#{attr_name}" do
271271
input = "<rect fill='url(#foo)' />"
272272
output = "<rect fill='url(#foo)'></rect>"
273273
check_sanitization(input, output, output, output)
274274
end
275275

276-
define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
277-
input = "<rect fill='url(http://bad.com/) #fff' />"
278-
output = "<rect fill=' #fff'></rect>"
276+
define_method "test_disallow_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
277+
input = "<rect fill='yellow url(http://bad.com/) #fff \"blue\"' />"
278+
output = "<rect fill='yellow #fff \"blue\"'></rect>"
279279
check_sanitization(input, output, output, output)
280280
end
281281
end
@@ -480,16 +480,6 @@ def test_css_order
480480
assert_match %r/order:5/, sane.inner_html
481481
end
482482

483-
def test_issue_90_slow_regex
484-
skip("timing tests are hard to make pass and have little regression-testing value")
485-
486-
html = %q{<span style="background: url('data:image/svg&#43;xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2232%22%20height%3D%2232%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20fill%3D%22%23D4C8AE%22%20d%3D%22M0%200h32v32h-32z%22%2F%3E%3Cpath%20fill%3D%22%2383604B%22%20d%3D%22M0%200h31.99v11.75h-31.99z%22%2F%3E%3Cpath%20fill%3D%22%233D2319%22%20d%3D%22M0%2011.5h32v.5h-32z%22%2F%3E%3Cpath%20fill%3D%22%23F83651%22%20d%3D%22M5%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23FCD050%22%20d%3D%22M6%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%2371C797%22%20d%3D%22M7%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23509CF9%22%20d%3D%22M8%200h1v10.5h-1z%22%2F%3E%3ClinearGradient%20id%3D%22a%22%20gradientUnits%3D%22userSpaceOnUse%22%20x1%3D%2224.996%22%20y1%3D%2210.5%22%20x2%3D%2224.996%22%20y2%3D%224.5%22%3E%3Cstop%20offset%3D%220%22%20stop-color%3D%22%23796055%22%2F%3E%3Cstop%20offset%3D%22.434%22%20stop-color%3D%22%23614C43%22%2F%3E%3Cstop%20offset%3D%221%22%20stop-color%3D%22%233D2D28%22%2F%3E%3C%2FlinearGradient%3E%3Cpath%20fill%3D%22url(%23a)%22%20d%3D%22M28%208.5c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3Cpath%20fill%3D%22%235F402E%22%20d%3D%22M28%208c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3C');"></span>}
487-
488-
assert_completes_in_reasonable_time {
489-
Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
490-
}
491-
end
492-
493483
def test_upper_case_css_property
494484
html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
495485
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)

0 commit comments

Comments
 (0)