Skip to content

Commit fdd11bd

Browse files
committed
Attributes deduplicated more efficiently
When reading an XML document, this package uses a more time-efficient technique to detect and remove attributes with duplicated names (within each element).
1 parent e8292cc commit fdd11bd

File tree

2 files changed

+38
-28
lines changed

2 files changed

+38
-28
lines changed

etree.go

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,7 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
887887
r = newXmlSimpleReader(ri)
888888
}
889889

890+
attrCheck := make(map[xml.Name]int)
890891
dec := newDecoder(r, settings)
891892

892893
var stack stack[*Element]
@@ -919,8 +920,19 @@ func (e *Element) readFrom(ri io.Reader, settings ReadSettings) (n int64, err er
919920
switch t := t.(type) {
920921
case xml.StartElement:
921922
e := newElement(t.Name.Space, t.Name.Local, top)
922-
for _, a := range t.Attr {
923-
e.createAttr(a.Name.Space, a.Name.Local, a.Value, e, settings.PreserveDuplicateAttrs)
923+
if settings.PreserveDuplicateAttrs || len(t.Attr) < 2 {
924+
for _, a := range t.Attr {
925+
e.addAttr(a.Name.Space, a.Name.Local, a.Value)
926+
}
927+
} else {
928+
for _, a := range t.Attr {
929+
if i, contains := attrCheck[a.Name]; contains {
930+
e.Attr[i].Value = a.Value
931+
} else {
932+
attrCheck[a.Name] = e.addAttr(a.Name.Space, a.Name.Local, a.Value)
933+
}
934+
}
935+
clear(attrCheck)
924936
}
925937
stack.push(e)
926938
case xml.EndElement:
@@ -1363,28 +1375,29 @@ func (e *Element) addChild(t Token) {
13631375
// prefix followed by a colon.
13641376
func (e *Element) CreateAttr(key, value string) *Attr {
13651377
space, skey := spaceDecompose(key)
1366-
return e.createAttr(space, skey, value, e, false)
1367-
}
13681378

1369-
// createAttr is a helper function that creates attributes.
1370-
func (e *Element) createAttr(space, key, value string, parent *Element, preserveDups bool) *Attr {
1371-
if !preserveDups {
1372-
for i, a := range e.Attr {
1373-
if space == a.Space && key == a.Key {
1374-
e.Attr[i].Value = value
1375-
return &e.Attr[i]
1376-
}
1379+
for i, a := range e.Attr {
1380+
if space == a.Space && skey == a.Key {
1381+
e.Attr[i].Value = value
1382+
return &e.Attr[i]
13771383
}
13781384
}
13791385

1386+
i := e.addAttr(space, skey, value)
1387+
return &e.Attr[i]
1388+
}
1389+
1390+
// addAttr is a helper function that adds an attribute to an element. Returns
1391+
// the index of the added attribute.
1392+
func (e *Element) addAttr(space, key, value string) int {
13801393
a := Attr{
13811394
Space: space,
13821395
Key: key,
13831396
Value: value,
1384-
element: parent,
1397+
element: e,
13851398
}
13861399
e.Attr = append(e.Attr, a)
1387-
return &e.Attr[len(e.Attr)-1]
1400+
return len(e.Attr) - 1
13881401
}
13891402

13901403
// RemoveAttr removes the first attribute of this element whose key matches

etree_test.go

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,7 +1469,7 @@ func TestReindexChildren(t *testing.T) {
14691469
}
14701470

14711471
func TestPreserveDuplicateAttrs(t *testing.T) {
1472-
s := `<element attr="test" attr="test2"/>`
1472+
s := `<element x="value1" y="value2" x="value3" x="value4" y="value5"/>`
14731473

14741474
checkAttrCount := func(e *Element, n int) {
14751475
if len(e.Attr) != n {
@@ -1492,23 +1492,20 @@ func TestPreserveDuplicateAttrs(t *testing.T) {
14921492
t.Run("enabled", func(t *testing.T) {
14931493
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: true})
14941494
e := doc.FindElement("element")
1495-
checkAttrCount(e, 2)
1496-
checkAttr(e, 0, "attr", "test")
1497-
checkAttr(e, 1, "attr", "test2")
1495+
checkAttrCount(e, 5)
1496+
checkAttr(e, 0, "x", "value1")
1497+
checkAttr(e, 1, "y", "value2")
1498+
checkAttr(e, 2, "x", "value3")
1499+
checkAttr(e, 3, "x", "value4")
1500+
checkAttr(e, 4, "y", "value5")
14981501
})
14991502

15001503
t.Run("disabled", func(t *testing.T) {
1501-
doc := newDocumentFromString2(t, s, ReadSettings{PreserveDuplicateAttrs: false})
1504+
doc := newDocumentFromString2(t, s, ReadSettings{})
15021505
e := doc.FindElement("element")
1503-
checkAttrCount(e, 1)
1504-
checkAttr(e, 0, "attr", "test2")
1505-
})
1506-
1507-
t.Run("default", func(t *testing.T) {
1508-
doc := newDocumentFromString(t, s)
1509-
e := doc.FindElement("element")
1510-
checkAttrCount(e, 1)
1511-
checkAttr(e, 0, "attr", "test2")
1506+
checkAttrCount(e, 2)
1507+
checkAttr(e, 0, "x", "value4")
1508+
checkAttr(e, 1, "y", "value5")
15121509
})
15131510
}
15141511

0 commit comments

Comments
 (0)