Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Openxmlreader ignorews option #857

Merged
merged 5 commits into from
Jan 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions src/DocumentFormat.OpenXml/OpenXmlPartReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public OpenXmlPartReader(OpenXmlPart openXmlPart) : this()
throw new ArgumentNullException(nameof(openXmlPart));
}

_xmlReader = CreateReader(openXmlPart.GetStream(FileMode.Open), true, openXmlPart.MaxCharactersInPart, out _standalone, out _encoding);
_xmlReader = CreateReader(openXmlPart.GetStream(FileMode.Open), true, openXmlPart.MaxCharactersInPart, ignoreWhitespace: true, out _standalone, out _encoding);
}

/// <summary>
Expand All @@ -70,7 +70,23 @@ public OpenXmlPartReader(OpenXmlPart openXmlPart, bool readMiscNodes)
throw new ArgumentNullException(nameof(openXmlPart));
}

_xmlReader = CreateReader(openXmlPart.GetStream(FileMode.Open), true, openXmlPart.MaxCharactersInPart, out _standalone, out _encoding);
_xmlReader = CreateReader(openXmlPart.GetStream(FileMode.Open), true, openXmlPart.MaxCharactersInPart, ignoreWhitespace: true, out _standalone, out _encoding);
}

/// <summary>
/// Initializes a new instance of the OpenXmlPartReader class using the supplied OpenXmlPart and Boolean values.
/// </summary>
/// <param name="openXmlPart">The OpenXmlPart to read.</param>
/// <param name="readMiscNodes">Specify false to indicate to the reader to skip all miscellaneous nodes.</param>
/// <param name="ignoreWhitespace">Specify true to indicate to the reader to ignore insignificant white space.</param>
public OpenXmlPartReader(OpenXmlPart openXmlPart, bool readMiscNodes, bool ignoreWhitespace)
: this(readMiscNodes)
{
if (openXmlPart is null) {
throw new ArgumentNullException(nameof(openXmlPart));
}

_xmlReader = CreateReader(openXmlPart.GetStream(FileMode.Open), true, openXmlPart.MaxCharactersInPart, ignoreWhitespace, out _standalone, out _encoding);
}

/// <summary>
Expand All @@ -84,7 +100,7 @@ public OpenXmlPartReader(Stream partStream) : this()
throw new ArgumentNullException(nameof(partStream));
}

_xmlReader = CreateReader(partStream, false, 0, out _standalone, out _encoding);
_xmlReader = CreateReader(partStream, false, 0, ignoreWhitespace: true, out _standalone, out _encoding);
}

/// <summary>
Expand All @@ -100,7 +116,23 @@ public OpenXmlPartReader(Stream partStream, bool readMiscNodes)
throw new ArgumentNullException(nameof(partStream));
}

_xmlReader = CreateReader(partStream, false, 0, out _standalone, out _encoding);
_xmlReader = CreateReader(partStream, false, 0, ignoreWhitespace: true, out _standalone, out _encoding);
}

/// <summary>
/// Initializes a new instance of the OpenXmlPartReader class using the supplied stream and Boolean values.
/// </summary>
/// <param name="partStream">The part stream of the OpenXmlPart to read.</param>
/// <param name="readMiscNodes">Specify false to indicate to the reader to skip all miscellaneous nodes.</param>
/// <param name="ignoreWhitespace">Specify true to indicate to the reader to ignore insignificant white space.</param>
public OpenXmlPartReader(Stream partStream, bool readMiscNodes, bool ignoreWhitespace)
: this(readMiscNodes)
{
if (partStream is null) {
throw new ArgumentNullException(nameof(partStream));
}

_xmlReader = CreateReader(partStream, false, 0, ignoreWhitespace, out _standalone, out _encoding);
}

/// <summary>
Expand Down Expand Up @@ -671,13 +703,13 @@ public override void Close()
#endif
}

private static XmlReader CreateReader(Stream partStream, bool closeInput, long maxCharactersInPart, out bool? _standalone, out string _encoding)
private static XmlReader CreateReader(Stream partStream, bool closeInput, long maxCharactersInPart, bool ignoreWhitespace, out bool? _standalone, out string _encoding)
{
var settings = new XmlReaderSettings
{
MaxCharactersInDocument = maxCharactersInPart,
CloseInput = closeInput,
IgnoreWhitespace = true,
IgnoreWhitespace = ignoreWhitespace,
#if FEATURE_XML_PROHIBIT_DTD
ProhibitDtd = true,
#else
Expand Down
18 changes: 18 additions & 0 deletions src/DocumentFormat.OpenXml/OpenXmlReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ protected OpenXmlReader(bool readMiscNodes)
/// <returns>The newly created OpenXmlReader.</returns>
public static OpenXmlReader Create(OpenXmlPart openXmlPart, bool readMiscNodes) => new OpenXmlPartReader(openXmlPart, readMiscNodes);

/// <summary>
/// Creates an OpenXmlReader from the specified OpenXmlPart and Boolean values.
/// </summary>
/// <param name="openXmlPart">The OpenXmlPart to read.</param>
/// <param name="readMiscNodes">Specify false to indicate to the reader to skip all miscellaneous nodes. The default value is false.</param>
/// <param name="ignoreWhitespace">Specify true to indicate to the reader to ignore insignificant white space. The default value is true.</param>
/// <returns>The newly created OpenXmlReader.</returns>
public static OpenXmlReader Create(OpenXmlPart openXmlPart, bool readMiscNodes, bool ignoreWhitespace) => new OpenXmlPartReader(openXmlPart, readMiscNodes, ignoreWhitespace);

/// <summary>
/// Creates an OpenXmlReader from the specified part stream.
/// </summary>
Expand All @@ -67,6 +76,15 @@ protected OpenXmlReader(bool readMiscNodes)
/// <returns></returns>
public static OpenXmlReader Create(Stream partStream, bool readMiscNodes) => new OpenXmlPartReader(partStream, readMiscNodes);

/// <summary>
/// Creates an OpenXmlReader from the specified part stream and Boolean values.
/// </summary>
/// <param name="partStream">The part stream.</param>
/// <param name="readMiscNodes">Specify false to indicate to the reader to skip all miscellaneous nodes. The default value is false.</param>
/// <param name="ignoreWhitespace">Specify true to indicate to the reader to ignore insignificant white space. The default value is true.</param>
/// <returns></returns>
public static OpenXmlReader Create(Stream partStream, bool readMiscNodes, bool ignoreWhitespace) => new OpenXmlPartReader(partStream, readMiscNodes, ignoreWhitespace);

/// <summary>
/// Creates an OpenXmlReader from the OpenXmlElement (travel the DOM tree).
/// </summary>
Expand Down
40 changes: 40 additions & 0 deletions test/DocumentFormat.OpenXml.Tests/ofapiTest/OpenXmlReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,46 @@ public void PartReaderMiscNodeTest()
reader.Close();
}

/// <summary>
///A test for OpenXmlPartReader to test the ignoreWhitespace option
///</summary>
[Theory]
[InlineData(true)]
[InlineData(false)]
public void PartReaderIgnoreWhitespaceTest(bool ignoreWhitespace)
{
const string PartText = "<w:document xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\">" +
"<w:body>" +
"<w:p w:rsidP=\"001\"><w:r><w:t> </w:t></w:r></w:p>" +
"</w:body>" +
"</w:document>";

UTF8Encoding utf8Encoding = new UTF8Encoding();
using var stream = new MemoryStream(utf8Encoding.GetBytes(PartText), false);

using var reader = OpenXmlReader.Create(stream, false, ignoreWhitespace);
Assert.False(reader.EOF);

reader.Read();
Assert.False(reader.EOF);

reader.ReadFirstChild();
Assert.True(reader.IsStartElement);
Assert.Equal(typeof(Body), reader.ElementType);
reader.Read();
Assert.True(reader.IsStartElement);
Assert.Equal(typeof(Paragraph), reader.ElementType);
reader.Read();
Assert.True(reader.IsStartElement);
Assert.Equal(typeof(Run), reader.ElementType);
reader.Read();
Assert.True(reader.IsStartElement);
Assert.Equal(typeof(Text), reader.ElementType);
Assert.Equal(ignoreWhitespace, reader.GetText() == string.Empty);

reader.Close();
}

/// <summary>
///A test for OpenXmlPartReader
///</summary>
Expand Down