Skip to content

Commit 9ec785a

Browse files
committed
WIP for gh-71
dtd option extraction handling dtd: [:all, :none, :internal_only, only: [allowed entities]] more tests needed possibly evolve options handling
1 parent db86841 commit 9ec785a

File tree

4 files changed

+106
-5
lines changed

4 files changed

+106
-5
lines changed

lib/sweet_xml.ex

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -225,15 +225,26 @@ defmodule SweetXml do
225225
226226
Returns an `xmlElement` record.
227227
"""
228-
def parse(doc), do: parse(doc, [])
229-
def parse(doc, options) when is_binary(doc) do
230-
doc |> :erlang.binary_to_list |> parse(options)
228+
def parse(doc, opts \\ []) do
229+
ets = :ets.new(nil, [])
230+
{dtd_arg, opts} = Keyword.pop(opts, :dtd, :all)
231+
opts = SweetXml.Options.handle_dtd(dtd_arg).(ets) ++ opts
232+
try do
233+
do_parse(doc, opts)
234+
after
235+
_ = :ets.delete(ets)
236+
end
237+
end
238+
239+
@doc false
240+
def do_parse(doc, options) when is_binary(doc) do
241+
doc |> :erlang.binary_to_list |> do_parse(options)
231242
end
232-
def parse([c | _] = doc, options) when is_integer(c) do
243+
def do_parse([c | _] = doc, options) when is_integer(c) do
233244
{parsed_doc, _} = :xmerl_scan.string(doc, options)
234245
parsed_doc
235246
end
236-
def parse(doc_enum, options) do
247+
def do_parse(doc_enum, options) do
237248
{parsed_doc, _} = :xmerl_scan.string('', options ++ continuation_opts(doc_enum))
238249
parsed_doc
239250
end

lib/sweet_xml/options.ex

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
defmodule SweetXml.Options do
2+
def handle_dtd(:all) do
3+
fn _ -> [] end
4+
end
5+
def handle_dtd(:none) do
6+
fn ets ->
7+
handle_dtd(:internal_only).(ets) ++ handle_dtd(only: []).(ets)
8+
end
9+
end
10+
def handle_dtd(:internal_only) do
11+
fn _ ->
12+
[fetch_fun: fn _, _ -> {:error, "no external entity allowed"} end]
13+
end
14+
end
15+
def handle_dtd(only: entity) when is_atom(entity) do
16+
handle_dtd(only: [entity])
17+
end
18+
def handle_dtd(only: entities) when is_list(entities) do
19+
fn ets ->
20+
read = fn
21+
context, name, state ->
22+
ets = :xmerl_scan.rules_state(state)
23+
case :ets.lookup(ets, {context, name}) do
24+
[] -> :undefined
25+
[{_, value}] -> value
26+
end
27+
end
28+
29+
write = fn
30+
:entity = context, name, value, state ->
31+
_ = case name in entities do
32+
true ->
33+
ets = :xmerl_scan.rules_state(state)
34+
_ = case :ets.lookup(ets, {context, name}) do
35+
[] -> :ets.insert(ets, {{context, name}, value})
36+
_ -> :ok
37+
end
38+
false -> raise("DTD not allowed: #{name}")
39+
end
40+
state
41+
42+
context, name, value, state ->
43+
ets = :xmerl_scan.rules_state(state)
44+
_ = case :ets.lookup(ets, {context, name}) do
45+
[] -> :ets.insert(ets, {{context, name}, value})
46+
_ -> :ok
47+
end
48+
state
49+
end
50+
51+
[{:rules, read, write, ets}]
52+
end
53+
end
54+
end

test/files/billion_laugh.xml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?xml version="1.0"?>
2+
<!DOCTYPE lolz [
3+
<!ELEMENT lolz (#PCDATA)>
4+
<!ENTITY lol1 "&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;">
5+
<!ENTITY lol2 "&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;&lol1;">
6+
<!ENTITY lol3 "&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;">
7+
<!ENTITY lol4 "&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;&lol3;">
8+
<!ENTITY lol5 "&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;&lol4;">
9+
<!ENTITY lol6 "&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;&lol5;">
10+
<!ENTITY lol7 "&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;&lol6;">
11+
<!ENTITY lol8 "&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;&lol7;">
12+
<!ENTITY lol9 "&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;&lol8;">
13+
]>
14+
<lolz>&lol9;</lolz>

test/issue_71_test.exs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
defmodule Issue71Test do
2+
use ExUnit.Case
3+
4+
test "read /etc/passwd with dtd: :none" do
5+
sneaky_xml = """
6+
<?xml version=\"1.0\" encoding=\"UTF-8\"?>
7+
<!DOCTYPE foo [ <!ELEMENT foo ANY >
8+
<!ENTITY xxe SYSTEM \"file:///etc/passwd\" >]>
9+
<response><result>&xxe;</result></response>
10+
"""
11+
12+
assert {:fatal, {{:error_fetching_DTD, {_, _}}, _file, _line, _col}} =
13+
catch_exit(SweetXml.parse(sneaky_xml, dtd: :none))
14+
end
15+
16+
test "raise on billion_laugh.xml with dtd: :none" do
17+
dangerous_xml = File.read!("./test/files/billion_laugh.xml")
18+
assert_raise RuntimeError, fn ->
19+
SweetXml.parse(dangerous_xml, dtd: :none)
20+
end
21+
end
22+
end

0 commit comments

Comments
 (0)