@@ -31,9 +31,14 @@ var ErrXML = errors.New("etree: invalid XML format")
31
31
var cdataPrefix = []byte ("<![CDATA[" )
32
32
33
33
// ReadSettings determine the default behavior of the Document's ReadFrom*
34
- // methods .
34
+ // functions .
35
35
type ReadSettings struct {
36
- // CharsetReader to be passed to standard xml.Decoder. Default: nil.
36
+ // CharsetReader, if non-nil, defines a function to generate
37
+ // charset-conversion readers, converting from the provided non-UTF-8
38
+ // charset into UTF-8. If nil, the ReadFrom* functions will use a
39
+ // "pass-through" CharsetReader that performs no conversion on the reader's
40
+ // data regardless of the value of the "charset" encoding string. Default:
41
+ // nil.
37
42
CharsetReader func (charset string , input io.Reader ) (io.Reader , error )
38
43
39
44
// Permissive allows input containing common mistakes such as missing tags
@@ -72,13 +77,11 @@ type ReadSettings struct {
72
77
AutoClose []string
73
78
}
74
79
75
- // newReadSettings creates a default ReadSettings record.
76
- func newReadSettings () ReadSettings {
77
- return ReadSettings {
78
- CharsetReader : func (label string , input io.Reader ) (io.Reader , error ) {
79
- return input , nil
80
- },
81
- }
80
+ // defaultCharsetReader is used by the xml decoder when the ReadSettings
81
+ // CharsetReader value is nil. It behaves as a "pass-through", ignoring
82
+ // the requested charset parameter and skipping conversion altogether.
83
+ func defaultCharsetReader (charset string , input io.Reader ) (io.Reader , error ) {
84
+ return input , nil
82
85
}
83
86
84
87
// dup creates a duplicate of the ReadSettings object.
@@ -97,7 +100,7 @@ func (s *ReadSettings) dup() ReadSettings {
97
100
}
98
101
}
99
102
100
- // WriteSettings determine the behavior of the Document's WriteTo* methods .
103
+ // WriteSettings determine the behavior of the Document's WriteTo* functions .
101
104
type WriteSettings struct {
102
105
// CanonicalEndTags forces the production of XML end tags, even for
103
106
// elements that have no child elements. Default: false.
@@ -118,31 +121,20 @@ type WriteSettings struct {
118
121
// false.
119
122
AttrSingleQuote bool
120
123
121
- // UseCRLF causes the document's Indent* methods to use a carriage return
124
+ // UseCRLF causes the document's Indent* functions to use a carriage return
122
125
// followed by a linefeed ("\r\n") when outputting a newline. If false,
123
126
// only a linefeed is used ("\n"). Default: false.
124
127
//
125
128
// Deprecated: UseCRLF is deprecated. Use IndentSettings.UseCRLF instead.
126
129
UseCRLF bool
127
130
}
128
131
129
- // newWriteSettings creates a default WriteSettings record.
130
- func newWriteSettings () WriteSettings {
131
- return WriteSettings {
132
- CanonicalEndTags : false ,
133
- CanonicalText : false ,
134
- CanonicalAttrVal : false ,
135
- AttrSingleQuote : false ,
136
- UseCRLF : false ,
137
- }
138
- }
139
-
140
132
// dup creates a duplicate of the WriteSettings object.
141
133
func (s * WriteSettings ) dup () WriteSettings {
142
134
return * s
143
135
}
144
136
145
- // IndentSettings determine the behavior of the Document's Indent* methods .
137
+ // IndentSettings determine the behavior of the Document's Indent* functions .
146
138
type IndentSettings struct {
147
139
// Spaces indicates the number of spaces to insert for each level of
148
140
// indentation. Set to etree.NoIndent to remove all indentation. Ignored
@@ -158,7 +150,7 @@ type IndentSettings struct {
158
150
// for a newline ("\n"). Default: false.
159
151
UseCRLF bool
160
152
161
- // PreserveLeafWhitespace causes indent methods to preserve whitespace
153
+ // PreserveLeafWhitespace causes indent functions to preserve whitespace
162
154
// within XML elements containing only non-CDATA character data. Default:
163
155
// false.
164
156
PreserveLeafWhitespace bool
@@ -200,7 +192,7 @@ func getIndentFunc(s *IndentSettings) indentFunc {
200
192
}
201
193
}
202
194
203
- // Writer is the interface that wraps the Write* methods called by each token
195
+ // Writer is the interface that wraps the Write* functions called by each token
204
196
// type's WriteTo function.
205
197
type Writer interface {
206
198
io.StringWriter
@@ -265,7 +257,7 @@ const (
265
257
266
258
// CharData may be used to represent simple text data or a CDATA section
267
259
// within an XML document. The Data property should never be modified
268
- // directly; use the SetData method instead.
260
+ // directly; use the SetData function instead.
269
261
type CharData struct {
270
262
Data string // the simple text or CDATA section content
271
263
parent * Element
@@ -298,9 +290,7 @@ type ProcInst struct {
298
290
// NewDocument creates an XML document without a root element.
299
291
func NewDocument () * Document {
300
292
return & Document {
301
- Element : Element {Child : make ([]Token , 0 )},
302
- ReadSettings : newReadSettings (),
303
- WriteSettings : newWriteSettings (),
293
+ Element : Element {Child : make ([]Token , 0 )},
304
294
}
305
295
}
306
296
@@ -433,6 +423,9 @@ func validateXML(r io.Reader, settings ReadSettings) error {
433
423
func newDecoder (r io.Reader , settings ReadSettings ) * xml.Decoder {
434
424
d := xml .NewDecoder (r )
435
425
d .CharsetReader = settings .CharsetReader
426
+ if d .CharsetReader == nil {
427
+ d .CharsetReader = defaultCharsetReader
428
+ }
436
429
d .Strict = ! settings .Permissive
437
430
d .Entity = settings .Entity
438
431
d .AutoClose = settings .AutoClose
0 commit comments