Skip to content

Commit 84ca20c

Browse files
committed
refactor: extract scrub_uri_attribute for downstream use
1 parent 47a835a commit 84ca20c

File tree

1 file changed

+19
-14
lines changed

1 file changed

+19
-14
lines changed

lib/loofah/html5/scrub.rb

+19-14
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,7 @@ def scrub_attributes(node)
3636
end
3737

3838
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39-
# this block lifted nearly verbatim from HTML5 sanitization
40-
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
41-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42-
attr_node.remove
43-
next
44-
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
45-
# permit only allowed data mediatypes
46-
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47-
mediatype, _ = mediatype.split(";")[0..1] if mediatype
48-
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49-
attr_node.remove
50-
next
51-
end
52-
end
39+
next if scrub_uri_attribute(attr_node)
5340
end
5441

5542
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
@@ -152,6 +139,24 @@ def scrub_attribute_that_allows_local_ref(attr_node)
152139
attr_node.value = values.join(" ")
153140
end
154141

142+
def scrub_uri_attribute(attr_node)
143+
# this block lifted nearly verbatim from HTML5 sanitization
144+
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
145+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
146+
attr_node.remove
147+
return true
148+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
149+
# permit only allowed data mediatypes
150+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
151+
mediatype, _ = mediatype.split(";")[0..1] if mediatype
152+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
153+
attr_node.remove
154+
return true
155+
end
156+
end
157+
false
158+
end
159+
155160
#
156161
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
157162
#

0 commit comments

Comments
 (0)