Skip to content

Commit 3427fc9

Browse files
authored
Improve whitespace handling (#869)
1 parent f5cc982 commit 3427fc9

File tree

2 files changed

+147
-2
lines changed

2 files changed

+147
-2
lines changed

src/DocumentFormat.OpenXml/OpenXmlPartReader.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,12 @@ private bool MoveToNextElement()
451451
case ElementState.MiscNode:
452452
// cursor is end element, pop stack
453453
_elementStack.Pop();
454+
if (_elementStack.Count == 0)
455+
{
456+
_elementState = ElementState.EOF;
457+
return false;
458+
}
459+
454460
break;
455461

456462
case ElementState.LeafStart:

test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlReaderTest.cs

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,7 @@ public void PartReaderIgnoreWhitespaceTest(bool ignoreWhitespace)
399399
"</w:body>" +
400400
"</w:document>";
401401

402-
UTF8Encoding utf8Encoding = new UTF8Encoding();
403-
using var stream = new MemoryStream(utf8Encoding.GetBytes(PartText), false);
402+
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(PartText), false);
404403

405404
using var reader = OpenXmlReader.Create(stream, false, ignoreWhitespace);
406405
Assert.False(reader.EOF);
@@ -425,6 +424,146 @@ public void PartReaderIgnoreWhitespaceTest(bool ignoreWhitespace)
425424
reader.Close();
426425
}
427426

427+
/// <summary>
428+
/// Test that the OpenXmlReader can handle formatted xml.
429+
///</summary>
430+
[Theory]
431+
[InlineData(true)]
432+
[InlineData(false)]
433+
public void TestIgnoreWhitespaceWithFormattedXml(bool ignoreWhitespace) {
434+
const string PartText = @"
435+
<w:document xmlns:v=""urn:schemas-microsoft-com:vml"" xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
436+
<w:body>
437+
<w:p>
438+
<w:r>
439+
<w:t>First Text</w:t>
440+
</w:r>
441+
</w:p>
442+
</w:body>
443+
</w:document>";
444+
445+
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(PartText), false);
446+
using var reader = OpenXmlReader.Create(stream, false, ignoreWhitespace);
447+
Assert.True(reader.Read());
448+
Assert.False(reader.EOF);
449+
Assert.Equal(typeof(Document), reader.ElementType);
450+
Assert.True(reader.IsStartElement);
451+
Assert.True(reader.Read());
452+
Assert.Equal(typeof(Body), reader.ElementType);
453+
Assert.True(reader.IsStartElement);
454+
Assert.True(reader.Read());
455+
Assert.Equal(typeof(Paragraph), reader.ElementType);
456+
Assert.True(reader.IsStartElement);
457+
Assert.True(reader.Read());
458+
Assert.Equal(typeof(Run), reader.ElementType);
459+
Assert.True(reader.IsStartElement);
460+
Assert.True(reader.Read());
461+
Assert.Equal(typeof(Text), reader.ElementType);
462+
Assert.True(reader.IsStartElement);
463+
Assert.True(reader.Read());
464+
Assert.Equal(typeof(Text), reader.ElementType);
465+
Assert.True(reader.IsEndElement);
466+
Assert.True(reader.Read());
467+
Assert.Equal(typeof(Run), reader.ElementType);
468+
Assert.True(reader.IsEndElement);
469+
Assert.True(reader.Read());
470+
Assert.Equal(typeof(Paragraph), reader.ElementType);
471+
Assert.True(reader.IsEndElement);
472+
Assert.True(reader.Read());
473+
Assert.Equal(typeof(Body), reader.ElementType);
474+
Assert.True(reader.IsEndElement);
475+
Assert.True(reader.Read());
476+
Assert.Equal(typeof(Document), reader.ElementType);
477+
Assert.True(reader.IsEndElement);
478+
Assert.False(reader.Read());
479+
Assert.True(reader.EOF);
480+
481+
reader.Close();
482+
}
483+
484+
/// <summary>
485+
/// Test that the OpenXmlReader can handle a whitespace after the last element.
486+
///</summary>
487+
[Theory]
488+
[InlineData(true)]
489+
[InlineData(false)]
490+
public void TestIgnoreWhitespaceWhitespaceAfterLastElement(bool ignoreWhitespace) {
491+
const string PartText = @"
492+
<w:document xmlns:v=""urn:schemas-microsoft-com:vml"" xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
493+
<w:body>
494+
<w:p>
495+
</w:p>
496+
</w:body>
497+
</w:document> ";
498+
499+
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(PartText), false);
500+
using var reader = OpenXmlReader.Create(stream, false, ignoreWhitespace);
501+
Assert.True(reader.Read());
502+
Assert.False(reader.EOF);
503+
Assert.Equal(typeof(Document), reader.ElementType);
504+
Assert.True(reader.IsStartElement);
505+
Assert.True(reader.Read());
506+
Assert.Equal(typeof(Body), reader.ElementType);
507+
Assert.True(reader.IsStartElement);
508+
Assert.True(reader.Read());
509+
Assert.Equal(typeof(Paragraph), reader.ElementType);
510+
Assert.True(reader.IsStartElement);
511+
Assert.True(reader.Read());
512+
Assert.Equal(typeof(Paragraph), reader.ElementType);
513+
Assert.True(reader.IsEndElement);
514+
Assert.True(reader.Read());
515+
Assert.Equal(typeof(Body), reader.ElementType);
516+
Assert.True(reader.IsEndElement);
517+
Assert.True(reader.Read());
518+
Assert.Equal(typeof(Document), reader.ElementType);
519+
Assert.True(reader.IsEndElement);
520+
Assert.False(reader.Read());
521+
Assert.True(reader.EOF);
522+
523+
reader.Close();
524+
}
525+
526+
/// <summary>
527+
/// Test that the OpenXmlReader can handle a misc node after the last element.
528+
///</summary>
529+
[Fact]
530+
public void TestMiscNodeAfterDocument() {
531+
const string PartText = @"
532+
<w:document xmlns:v=""urn:schemas-microsoft-com:vml"" xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
533+
<w:body>
534+
<w:p>
535+
</w:p>
536+
</w:body>
537+
</w:document> <!--Your comment-->";
538+
539+
using var stream = new MemoryStream(Encoding.UTF8.GetBytes(PartText), true);
540+
using var reader = OpenXmlReader.Create(stream);
541+
Assert.True(reader.Read());
542+
Assert.False(reader.EOF);
543+
Assert.Equal(typeof(Document), reader.ElementType);
544+
Assert.True(reader.IsStartElement);
545+
Assert.True(reader.Read());
546+
Assert.Equal(typeof(Body), reader.ElementType);
547+
Assert.True(reader.IsStartElement);
548+
Assert.True(reader.Read());
549+
Assert.Equal(typeof(Paragraph), reader.ElementType);
550+
Assert.True(reader.IsStartElement);
551+
Assert.True(reader.Read());
552+
Assert.Equal(typeof(Paragraph), reader.ElementType);
553+
Assert.True(reader.IsEndElement);
554+
Assert.True(reader.Read());
555+
Assert.Equal(typeof(Body), reader.ElementType);
556+
Assert.True(reader.IsEndElement);
557+
Assert.True(reader.Read());
558+
Assert.Equal(typeof(Document), reader.ElementType);
559+
Assert.True(reader.IsEndElement);
560+
561+
// not reading the misc node after the end of the document
562+
Assert.False(reader.Read());
563+
Assert.True(reader.EOF);
564+
reader.Close();
565+
}
566+
428567
/// <summary>
429568
///A test for OpenXmlPartReader
430569
///</summary>

0 commit comments

Comments
 (0)