Skip to content

Commit 7589705

Browse files
committed
fix: DOCTYPE entity value should be read correctly
- was not matching till correct quote
1 parent 02eb4a7 commit 7589705

File tree

2 files changed

+71
-18
lines changed

2 files changed

+71
-18
lines changed

spec/entities_spec.js

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ describe("XMLParser Entities", function() {
4747
});
4848

4949
it("should parse XML with DOCTYPE without internal DTD", function() {
50-
const xmlData = "<?xml version='1.0' standalone='no'?><!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\" ><svg><metadata>test</metadata></svg>";
50+
const xmlData = `<?xml version='1.0' standalone='no'?>
51+
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd" >
52+
<svg><metadata>test</metadata></svg>`;
5153
const expected = {
5254
"?xml": {
5355
"@_version": "1.0",
@@ -103,7 +105,7 @@ describe("XMLParser Entities", function() {
103105
}).toThrowError("Unclosed DOCTYPE")
104106
})
105107

106-
it("should parse XML with DOCTYPE", function() {
108+
it("should parse XML with DOCTYPE with valid comment expressions ", function() {
107109
const xmlData = "<?xml version=\"1.0\" standalone=\"yes\" ?>" +
108110
"<!--open the DOCTYPE declaration -" +
109111
" the open square bracket indicates an internal DTD-->" +
@@ -135,6 +137,22 @@ describe("XMLParser Entities", function() {
135137
parser.parse(xmlData);
136138
});
137139

140+
it("should read entity value between correct matching quote char", function() {
141+
const xmlData = `<!DOCTYPE x [ <!ENTITY x 'x">]><!--'> ]>
142+
<X>
143+
<Y/><![CDATA[--><X><Z/><!--]]>-->
144+
</X>`;
145+
const expected = {
146+
X: {
147+
Y: '',
148+
'#text': '--><X><Z/><!---->'
149+
} };
150+
const parser = new XMLParser();
151+
let result = parser.parse(xmlData);
152+
// console.log(result);
153+
expect(result).toEqual(expected);
154+
});
155+
138156
it("should parse attributes having '>' in value", function() {
139157
const xmlData = `
140158
<?xml version="1.0" encoding="UTF-8"?>
@@ -475,6 +493,7 @@ describe("XMLParser Entities", function() {
475493
});
476494

477495
describe("XMLParser External Entities", function() {
496+
478497
it("should throw error when an entity value has '&'", function() {
479498
const parser = new XMLParser();
480499
expect( () => {

src/xmlparser/DocTypeReader.js

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ export default function readDocType(xmlData, i){
2222
let entityName, val;
2323
[entityName, val,i] = readEntityExp(xmlData,i+1);
2424
if(val.indexOf("&") === -1) //Parameter entities are not supported
25-
entities[ validateEntityName(entityName) ] = {
25+
entities[ entityName ] = {
2626
regx : RegExp( `&${entityName};`,"g"),
2727
val: val
2828
};
@@ -62,7 +62,14 @@ export default function readDocType(xmlData, i){
6262
return {entities, i};
6363
}
6464

65-
function readEntityExp(xmlData,i){
65+
const skipWhitespace = (data, index) => {
66+
while (index < data.length && /\s/.test(data[index])) {
67+
index++;
68+
}
69+
return index;
70+
};
71+
72+
function readEntityExp(xmlData, i) {
6673
//External entities are not supported
6774
// <!ENTITY ext SYSTEM "http://normal-website.com" >
6875

@@ -71,26 +78,53 @@ function readEntityExp(xmlData,i){
7178

7279
//Internal entities are supported
7380
// <!ENTITY entityname "replacement text">
74-
75-
//read EntityName
81+
82+
// Skip leading whitespace after <!ENTITY
83+
i = skipWhitespace(xmlData, i);
84+
85+
// Read entity name
7686
let entityName = "";
77-
for (; i < xmlData.length && (xmlData[i] !== "'" && xmlData[i] !== '"' ); i++) {
78-
// if(xmlData[i] === " ") continue;
79-
// else
87+
while (i < xmlData.length && !/\s/.test(xmlData[i]) && xmlData[i] !== '"' && xmlData[i] !== "'") {
8088
entityName += xmlData[i];
89+
i++;
8190
}
82-
entityName = entityName.trim();
83-
if(entityName.indexOf(" ") !== -1) throw new Error("External entites are not supported");
84-
85-
//read Entity Value
86-
const startChar = xmlData[i++];
87-
let val = ""
88-
for (; i < xmlData.length && xmlData[i] !== startChar ; i++) {
89-
val += xmlData[i];
91+
92+
// Validate entity name
93+
if (!validateEntityName(entityName)) {
94+
throw new Error(`Invalid entity name: "${entityName}"`);
95+
}
96+
97+
// Skip whitespace after entity name
98+
i = skipWhitespace(xmlData, i);
99+
100+
// Check for unsupported constructs (external entities or parameter entities)
101+
if (xmlData.substring(i, i + 6).toUpperCase() === "SYSTEM") {
102+
throw new Error("External entities are not supported");
103+
}else if (xmlData[i] === "%") {
104+
throw new Error("Parameter entities are not supported");
90105
}
91-
return [entityName, val, i];
106+
107+
// Read entity value (internal entity)
108+
const quoteChar = xmlData[i];
109+
if (quoteChar !== '"' && quoteChar !== "'") {
110+
throw new Error(`Expected quoted string, found "${quoteChar}"`);
111+
}
112+
i++;
113+
114+
let entityValue = "";
115+
while (i < xmlData.length && xmlData[i] !== quoteChar) {
116+
entityValue += xmlData[i];
117+
i++;
118+
}
119+
120+
if (xmlData[i] !== quoteChar) {
121+
throw new Error("Unterminated entity value");
122+
}
123+
124+
return [entityName, entityValue, i ];
92125
}
93126

127+
94128
function isComment(xmlData, i){
95129
if(xmlData[i+1] === '!' &&
96130
xmlData[i+2] === '-' &&

0 commit comments

Comments
 (0)