1
1
defmodule SweetXpath do
2
+ @ moduledoc false
2
3
3
4
defmodule Priv do
4
5
@ moduledoc false
@@ -115,20 +116,34 @@ defmodule SweetXml do
115
116
"""
116
117
117
118
require Record
119
+ @ doc false
118
120
Record . defrecord :xmlDecl , Record . extract ( :xmlDecl , from_lib: "xmerl/include/xmerl.hrl" )
121
+ @ doc false
119
122
Record . defrecord :xmlAttribute , Record . extract ( :xmlAttribute , from_lib: "xmerl/include/xmerl.hrl" )
123
+ @ doc false
120
124
Record . defrecord :xmlNamespace , Record . extract ( :xmlNamespace , from_lib: "xmerl/include/xmerl.hrl" )
125
+ @ doc false
121
126
Record . defrecord :xmlNsNode , Record . extract ( :xmlNsNode , from_lib: "xmerl/include/xmerl.hrl" )
127
+ @ doc false
122
128
Record . defrecord :xmlElement , Record . extract ( :xmlElement , from_lib: "xmerl/include/xmerl.hrl" )
129
+ @ doc false
123
130
Record . defrecord :xmlText , Record . extract ( :xmlText , from_lib: "xmerl/include/xmerl.hrl" )
131
+ @ doc false
124
132
Record . defrecord :xmlComment , Record . extract ( :xmlComment , from_lib: "xmerl/include/xmerl.hrl" )
133
+ @ doc false
125
134
Record . defrecord :xmlPI , Record . extract ( :xmlPI , from_lib: "xmerl/include/xmerl.hrl" )
135
+ @ doc false
126
136
Record . defrecord :xmlDocument , Record . extract ( :xmlDocument , from_lib: "xmerl/include/xmerl.hrl" )
137
+ @ doc false
127
138
Record . defrecord :xmlObj , Record . extract ( :xmlObj , from_lib: "xmerl/include/xmerl.hrl" )
128
139
140
+ @ type doc :: ( iodata | String . t | Enum . t )
141
+ @ type spec :: % SweetXpath { }
142
+ @ opaque xmlElement :: record ( :xmlElement )
143
+
129
144
130
145
@ doc ~s"""
131
- `sigil_x/2` simply returns a `SweetXpath` struct, with modifiers converted to
146
+ `sigil_x/2` simply returns a `% SweetXpath{} ` struct, with modifiers converted to
132
147
boolean fields:
133
148
134
149
iex> SweetXml.sigil_x("//some/path", 'e')
@@ -211,20 +226,31 @@ defmodule SweetXml do
211
226
| xpath . namespaces ] }
212
227
end
213
228
214
- @ doc """
229
+ @ doc """
230
+ Parse a document into a form ready to be used by `xpath/3` and `xmap/2`.
231
+
215
232
`doc` can be
216
233
217
234
- a byte list (iodata)
218
235
- a binary
219
236
- any enumerable of binaries (for instance `File.stream!/3` result)
220
237
221
- `options` are `xmerl` options described here [http://www.erlang.org/doc/man/xmerl_scan.html](http://www.erlang.org/doc/man/xmerl_scan.html),
222
- see [the erlang tutorial](http://www.erlang.org/doc/apps/xmerl/xmerl_examples.html) for usage.
238
+ `options` can be both:
239
+ * `xmerl`'s options as described on the [xmerl_scan](http://www.erlang.org/doc/man/xmerl_scan.html) documentation page,
240
+ see [the erlang tutorial](http://www.erlang.org/doc/apps/xmerl/xmerl_examples.html) for some advanced usage.
241
+ For example: `parse(doc, quiet: true)`
242
+ * `:dtd` to prevent DTD parsing or fetching, with the following possibilities:
243
+ * `:none`, will prevent both internal and external entities, it is the recommended options on untrusted XML;
244
+ * `:all`, the default, for backward compatibility, allows all DTDs;
245
+ * `:internal_only`, will block all attempt at external fetching;
246
+ * `[only: entities]` where `entities` is either an atom for a single entity, or a list of atoms.
247
+ If any other entity is defined in the XML, `parse` will raise on them.
223
248
224
249
When `doc` is an enumerable, the `:cont_fun` option cannot be given.
225
250
226
251
Returns an `xmlElement` record.
227
252
"""
253
+ @ spec parse ( doc , opts :: list ) :: xmlElement
228
254
def parse ( doc , opts \\ [ ] ) do
229
255
ets = :ets . new ( nil , [ ] )
230
256
dtd_arg = :proplists . get_value ( :dtd , opts , :all )
@@ -264,6 +290,7 @@ defmodule SweetXml do
264
290
will be `{:tagname, xmlelem}`. e.g. :li, :header
265
291
- `options[:discard]` is the list of tag which will be discarded:
266
292
not added to its parent DOM.
293
+ - More options details are available with `parse/2`.
267
294
268
295
## Examples
269
296
@@ -338,9 +365,9 @@ defmodule SweetXml do
338
365
339
366
- `doc` is an enumerable, data will be pulled during the result stream
340
367
enumeration. e.g. `File.stream!("some_file.xml")`
341
- - `options_callback` is an anonymous function `fn emit -> xmerl_opts` use it to
368
+ - `options_callback` is an anonymous function `fn emit -> ( xmerl_opts | opts) ` use it to
342
369
define your :xmerl callbacks and put data into the stream using
343
- `emit.(elem)` in the callbacks.
370
+ `emit.(elem)` in the callbacks. More details are available with `parse/2`.
344
371
345
372
For example, here you define a stream of all `xmlElement` :
346
373
@@ -400,12 +427,12 @@ defmodule SweetXml do
400
427
end
401
428
402
429
@ doc ~S"""
403
- `xpath` allows you to query an XML document with xpath .
430
+ `xpath` allows you to query an XML document with XPath .
404
431
405
- The second argument to xpath is a `SweetXpath` struct. The optional third
432
+ The second argument to xpath is a `% SweetXpath{} ` struct. The optional third
406
433
argument is a keyword list, such that the value of each keyword is also
407
- either a `SweetXpath` or a list with head being a `SweetXpath` and tail being
408
- another keyword list exactly like before. Please see examples below for better
434
+ either a `% SweetXpath{} ` or a list with head being a `% SweetXpath{} ` and tail being
435
+ another keyword list exactly like before. Please see the examples below for better
409
436
understanding.
410
437
411
438
## Examples
@@ -438,32 +465,49 @@ defmodule SweetXml do
438
465
...> )
439
466
%{ul: %{a: 'Two'}}
440
467
468
+ ## Security
469
+
470
+ Whenever you are working with some xml that was not generated by your system,
471
+ it is highly recommended that you restrain some functionalities of XML
472
+ during the parsing. SweetXml allows in particular to prevent DTD parsing and fetching.
473
+ Unless you know exactly what kind of DTD you want to permit in your xml,
474
+ it is recommended that you use the following code example to prevent possible attacks:
475
+ ```
476
+ doc
477
+ |> parse(dtd: :none)
478
+ |> xpath(spec, subspec)
479
+ ```
480
+ For more details, see `parse/2`.
441
481
"""
442
- def xpath ( parent , spec ) when not is_tuple ( parent ) do
482
+ @ spec xpath ( parent :: ( doc | xmlElement ) , spec , subspec ) :: any
483
+ when subspec: keyword ( spec | subspec )
484
+ def xpath ( parent , spec , subspec \\ [ ] )
485
+
486
+ def xpath ( parent , spec , [ ] ) when not is_tuple ( parent ) do
443
487
parent |> parse |> xpath ( spec )
444
488
end
445
489
446
- def xpath ( parent , % SweetXpath { is_list: true , is_value: true , cast_to: cast , is_optional: is_opt? } = spec ) do
490
+ def xpath ( parent , % SweetXpath { is_list: true , is_value: true , cast_to: cast , is_optional: is_opt? } = spec , [ ] ) do
447
491
get_current_entities ( parent , spec ) |> Enum . map ( & ( _value ( & 1 ) ) |> to_cast ( cast , is_opt? ) ) |> spec . transform_fun . ( )
448
492
end
449
493
450
- def xpath ( parent , % SweetXpath { is_list: true , is_value: false } = spec ) do
494
+ def xpath ( parent , % SweetXpath { is_list: true , is_value: false } = spec , [ ] ) do
451
495
get_current_entities ( parent , spec ) |> spec . transform_fun . ( )
452
496
end
453
497
454
- def xpath ( parent , % SweetXpath { is_list: false , is_value: true , cast_to: string_type , is_optional: is_opt? } = spec ) when string_type in [ :string , :soft_string ] do
498
+ def xpath ( parent , % SweetXpath { is_list: false , is_value: true , cast_to: string_type , is_optional: is_opt? } = spec , [ ] ) when string_type in [ :string , :soft_string ] do
455
499
spec = % SweetXpath { spec | is_list: true }
456
500
get_current_entities ( parent , spec )
457
501
|> Enum . map ( & ( _value ( & 1 ) |> to_cast ( string_type , is_opt? ) ) )
458
502
|> Enum . join
459
503
|> spec . transform_fun . ( )
460
504
end
461
505
462
- def xpath ( parent , % SweetXpath { is_list: false , is_value: true , cast_to: cast , is_optional: is_opt? } = spec ) do
506
+ def xpath ( parent , % SweetXpath { is_list: false , is_value: true , cast_to: cast , is_optional: is_opt? } = spec , [ ] ) do
463
507
get_current_entities ( parent , spec ) |> _value |> to_cast ( cast , is_opt? ) |> spec . transform_fun . ( )
464
508
end
465
509
466
- def xpath ( parent , % SweetXpath { is_list: false , is_value: false } = spec ) do
510
+ def xpath ( parent , % SweetXpath { is_list: false , is_value: false } = spec , [ ] ) do
467
511
get_current_entities ( parent , spec ) |> spec . transform_fun . ( )
468
512
end
469
513
@@ -478,11 +522,13 @@ defmodule SweetXml do
478
522
end
479
523
480
524
@ doc ~S"""
481
- `xmap` returns a mapping with each value being the result of `xpath`
525
+ `xmap` returns a mapping with each value being the result of `xpath`.
482
526
483
- Just as `xpath`, you can nest the mapping structure. Please see `xpath` for
527
+ Just as `xpath`, you can nest the mapping structure. Please see `xpath/3 ` for
484
528
more detail.
485
529
530
+ You can give the option `true` to get the result as a keyword list instead of a map.
531
+
486
532
## Examples
487
533
488
534
Simple:
@@ -530,8 +576,24 @@ defmodule SweetXml do
530
576
...> ]
531
577
...> ], true)
532
578
[message: 'Message', ul: %{a: 'Two'}]
579
+
580
+ ## Security
581
+
582
+ Whenever you are working with some xml that was not generated by your system,
583
+ it is highly recommended that you restrain some functionalities of XML
584
+ during the parsing. SweetXml allows in particular to prevent DTD parsing and fetching.
585
+ Unless you know exactly what kind of DTD you want to permit in your xml,
586
+ it is recommended that you use the following code example to prevent possible attacks:
587
+ ```
588
+ doc
589
+ |> parse(dtd: :none)
590
+ |> xmap(specs, options)
591
+ ```
592
+ For more details, see `parse/2`.
533
593
"""
534
- def xmap ( parent , mapping ) , do: xmap ( parent , mapping , % { is_keyword: false } )
594
+ @ spec xmap ( parent :: ( doc | xmlElement ) , mapping :: specs , options :: ( boolean | map ) ) :: ( map | keyword )
595
+ when specs: keyword ( spec | specs )
596
+ def xmap ( parent , mapping , options \\ false )
535
597
536
598
def xmap ( nil , _ , % { is_optional: true } ) , do: nil
537
599
0 commit comments