Merge pull request #2 from Jan5366x/net6_and_manual_write_and_read

Net6 and manual write and read
Jan5366x · May 25, 2023 · fb33619 · fb33619
2 parents d635900 + 5007198
commit fb33619
Show file tree

Hide file tree

Showing 21 changed files with 882 additions and 780 deletions.
diff --git a/.github/workflows/dotnet-core.yml b/.github/workflows/dotnet-core.yml
@@ -14,11 +14,11 @@ jobs:
         os: ['ubuntu-latest', 'windows-latest']
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup .NET Core
-      uses: actions/setup-dotnet@v1
+    - uses: actions/checkout@v3
+    - name: Setup .NET 6.0
+      uses: actions/setup-dotnet@v2
       with:
-        dotnet-version: 3.1.301
+        dotnet-version: 6.0.x
     - name: Install dependencies
       run: dotnet restore
     - name: Build

diff --git a/.gitignore b/.gitignore
@@ -503,3 +503,4 @@ MigrationBackup/
 # Fody - auto-generated XML schema
 FodyWeavers.xsd
 
+.idea/.idea.MagicFileEncoding/.idea/sonarlint
diff --git a/.idea/.idea.MagicFileEncoding/.idea/indexLayout.xml b/.idea/.idea.MagicFileEncoding/.idea/indexLayout.xml
diff --git a/.idea/.idea.MagicFileEncoding/.idea/modules.xml b/.idea/.idea.MagicFileEncoding/.idea/modules.xml
diff --git a/.idea/.idea.MagicFileEncoding/.idea/riderModule.iml b/.idea/.idea.MagicFileEncoding/.idea/riderModule.iml
diff --git a/.idea/.idea.MagicFileEncoding/riderModule.iml b/.idea/.idea.MagicFileEncoding/riderModule.iml
diff --git a/CHANGELOG.MD b/CHANGELOG.MD
@@ -0,0 +1,22 @@
+## v2.0.0 - ?? ??? ????
+* Fixed that some files are read twice
+* Add support to read and analyze byte arrays
+* Switch to Net 6.0
+* Enabled nullable reference types
+* Adjust nullability in the entire solution
+* Add SYSLIB0001 UTF-7 Encoding safety check
+* Remove IO Tools from public surface since they are off-topic internal logic which is object to (breaking) changes
+* Refactoring and minor improvements
+
+## v1.1.0 - 13 Feb 2021
+* **Breaking Change!** Adjusted public api surface to static access
+* **Breaking Change!** Additional encodings are now null if not found in the code pages
+* Added more tests
+* Added more additional encodings
+* Improved documentation
+
+**disclaimer:** This version contains breaking changes without major version bump,
+this will be avoided in future releases
+
+## 1.0.0 - 28 Sep 2020
+* Added basic feature set
diff --git a/MagicFileEncoding/AdditionalEncoding.cs b/MagicFileEncoding/AdditionalEncoding.cs
@@ -1,108 +1,107 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Text;
 
-namespace MagicFileEncoding
-{  
+namespace MagicFileEncoding;
+
+/// <summary>
+/// <para>List of additional encodings</para>
+/// Encoding will be <i>null</i> if required codepage can't be retrieved
+/// </summary>
+[SuppressMessage("ReSharper", "InconsistentNaming")]
+public static class AdditionalEncoding
+{
     /// <summary>
-    /// <para>List of additional encodings</para>
-    /// Encoding will be <i>null</i> if required codepage can't be retrieved
+    /// <para>(Latin-1)</para>
+    /// This character set contains the script-specific characters for Western European and American languages.
+    /// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
+    /// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
+    /// Dutch "ij" or the German quotation marks below are missing.
     /// </summary>
-    [SuppressMessage("ReSharper", "InconsistentNaming")]
-    public static class AdditionalEncoding
-    {
-        /// <summary>
-        /// <para>(Latin-1)</para>
-        /// This character set contains the script-specific characters for Western European and American languages.
-        /// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
-        /// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
-        /// Dutch "ij" or the German quotation marks below are missing.
-        /// </summary>
-        public static readonly Encoding ISO_8859_1 = SoftFetchEncoding("iso-8859-1");
+    public static readonly Encoding? ISO_8859_1 = SoftFetchEncoding("iso-8859-1");
 
-        /// <summary>
-        /// <para>(Latin-2)</para>
-        /// This character set contains the script-specific characters for most Central European and Slavic languages.
-        /// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
-        /// </summary>
-        public static readonly Encoding ISO_8859_2 = SoftFetchEncoding("iso-8859-2");
+    /// <summary>
+    /// <para>(Latin-2)</para>
+    /// This character set contains the script-specific characters for most Central European and Slavic languages.
+    /// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_2 = SoftFetchEncoding("iso-8859-2");
 
-        /// <summary>
-        /// <para>(Latin-3)</para>
-        /// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
-        /// </summary>
-        public static readonly Encoding ISO_8859_3 = SoftFetchEncoding("iso-8859-3");
+    /// <summary>
+    /// <para>(Latin-3)</para>
+    /// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_3 = SoftFetchEncoding("iso-8859-3");
 
-        /// <summary>
-        /// <para>(Latin-4)</para>
-        /// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
-        /// Compare this character set also with ISO 8859-10, which is very similar.
-        /// </summary>
-        public static readonly Encoding ISO_8859_4 = SoftFetchEncoding("iso-8859-4");
+    /// <summary>
+    /// <para>(Latin-4)</para>
+    /// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
+    /// Compare this character set also with ISO 8859-10, which is very similar.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_4 = SoftFetchEncoding("iso-8859-4");
 
-        /// <summary>
-        /// This character set contains Cyrillic characters.
-        /// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
-        /// </summary>
-        public static readonly Encoding ISO_8859_5 = SoftFetchEncoding("iso-8859-5");
+    /// <summary>
+    /// This character set contains Cyrillic characters.
+    /// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_5 = SoftFetchEncoding("iso-8859-5");
 
-        /// <summary>
-        /// This character set contains characters of Arabic script. However, the representation of the characters
-        /// in the following table is "abstract" because the characters vary in writing practice depending on whether
-        /// they are at the beginning, middle, or end of a word, or individually.
-        /// Arabic is further characterized by the fact that the direction of writing is from right to left.
-        /// </summary>
-        public static readonly Encoding ISO_8859_6 = SoftFetchEncoding("iso-8859-6");
+    /// <summary>
+    /// This character set contains characters of Arabic script. However, the representation of the characters
+    /// in the following table is "abstract" because the characters vary in writing practice depending on whether
+    /// they are at the beginning, middle, or end of a word, or individually.
+    /// Arabic is further characterized by the fact that the direction of writing is from right to left.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_6 = SoftFetchEncoding("iso-8859-6");
 
-        /// <summary>
-        /// This character set contains the characters of the Modern Greek script.
-        /// </summary>
-        public static readonly Encoding ISO_8859_7 = SoftFetchEncoding("iso-8859-7");
+    /// <summary>
+    /// This character set contains the characters of the Modern Greek script.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_7 = SoftFetchEncoding("iso-8859-7");
 
-        /// <summary>
-        /// This character set contains the characters of the Hebrew script.
-        /// As with the Arabic script, the direction of writing is from right to left.
-        /// </summary>
-        public static readonly Encoding ISO_8859_8 = SoftFetchEncoding("iso-8859-8");
+    /// <summary>
+    /// This character set contains the characters of the Hebrew script.
+    /// As with the Arabic script, the direction of writing is from right to left.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_8 = SoftFetchEncoding("iso-8859-8");
 
-        /// <summary>
-        /// <para>(Latin-5)</para>
-        /// This character set is specially designed for Turkish. It is based on ISO 8859-1,
-        /// but contains Turkish characters instead of the Icelandic special characters.
-        /// </summary>
-        public static readonly Encoding ISO_8859_9 = SoftFetchEncoding("iso-8859-9");
+    /// <summary>
+    /// <para>(Latin-5)</para>
+    /// This character set is specially designed for Turkish. It is based on ISO 8859-1,
+    /// but contains Turkish characters instead of the Icelandic special characters.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_9 = SoftFetchEncoding("iso-8859-9");
 
-        /// <summary>
-        /// <para>(Latin-6)</para>
-        /// This character set specifically contains characters for
-        /// the Greenlandic (Inuit) and Lappish (Sami) languages.
-        /// </summary>
-        public static readonly Encoding ISO_8859_10 = SoftFetchEncoding("iso-8859-10");
+    /// <summary>
+    /// <para>(Latin-6)</para>
+    /// This character set specifically contains characters for
+    /// the Greenlandic (Inuit) and Lappish (Sami) languages.
+    /// </summary>
+    public static readonly Encoding? ISO_8859_10 = SoftFetchEncoding("iso-8859-10");
 
-        /// <summary>
-        /// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
-        /// length is used and no intelligent algorithm is required, but at the expense of memory size
-        /// if only characters of the ASCII character set are used, more than four times as much memory is required
-        /// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
-        /// significant byte or the most significant byte is transmitted first,
-        /// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>. 
-        /// </summary>
-        public static readonly Encoding UTF32BE = SoftFetchEncoding("utf-32BE");
+    /// <summary>
+    /// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
+    /// length is used and no intelligent algorithm is required, but at the expense of memory size
+    /// if only characters of the ASCII character set are used, more than four times as much memory is required
+    /// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
+    /// significant byte or the most significant byte is transmitted first,
+    /// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>. 
+    /// </summary>
+    public static readonly Encoding? UTF32BE = SoftFetchEncoding("utf-32BE");
 
-        /// <summary>
-        /// Get the requested encoding and consume exception if it can't be found in code pages 
-        /// </summary>
-        /// <param name="encoding">The encoding name</param>
-        /// <returns>The encoding object or <i>null</i></returns>
-        private static Encoding SoftFetchEncoding(string encoding)
+    /// <summary>
+    /// Get the requested encoding and consume exception if it can't be found in code pages 
+    /// </summary>
+    /// <param name="encoding">The encoding name</param>
+    /// <returns>The encoding object or <i>null</i></returns>
+    private static Encoding? SoftFetchEncoding(string encoding)
+    {
+        try
+        {
+            return Encoding.GetEncoding(encoding);
+        }
+        catch
         {
-            try
-            {
-               return Encoding.GetEncoding(encoding);
-            }
-            catch
-            {
-                return null;
-            }
+            return null;
         }
     }
 }
diff --git a/MagicFileEncoding/ByteOrderMask.cs b/MagicFileEncoding/ByteOrderMask.cs
@@ -2,28 +2,29 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Text;
 
-namespace MagicFileEncoding
+#pragma warning disable SYSLIB0001
+namespace MagicFileEncoding;
+
+[SuppressMessage("ReSharper", "InconsistentNaming")]
+[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
+public static class ByteOrderMask
 {
-    [SuppressMessage("ReSharper", "InconsistentNaming")]
-    [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
-    public static class ByteOrderMask
+    public static readonly ByteOrderMaskInfo UTF32BE
+        =  new (AdditionalEncoding.UTF32BE!, 0x00, 0x00, 0xFE, 0xFF);
+    public static readonly ByteOrderMaskInfo UTF32 
+        = new (Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
+    public static readonly ByteOrderMaskInfo UTF16BE
+        = new (Encoding.BigEndianUnicode, 0xFE, 0xFF);
+    public static readonly ByteOrderMaskInfo UTF16 
+        = new (Encoding.Unicode, 0xFF, 0xFE);
+    public static readonly ByteOrderMaskInfo UTF8
+        = new (Encoding.UTF8, 0xEF, 0xBB, 0xBF);
+    public static readonly ByteOrderMaskInfo UTF7
+        = new (Encoding.UTF7, 0x2b, 0x2f, 0x76);
+
+    public static readonly List<ByteOrderMaskInfo> List = new ()
     {
-        public static readonly ByteOrderMaskInfo UTF32BE
-            = new ByteOrderMaskInfo(AdditionalEncoding.UTF32BE, 0x00, 0x00, 0xFE, 0xFF);
-        public static readonly ByteOrderMaskInfo UTF32 
-            = new ByteOrderMaskInfo(Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
-        public static readonly ByteOrderMaskInfo UTF16BE
-            = new ByteOrderMaskInfo(Encoding.BigEndianUnicode, 0xFE, 0xFF);
-        public static readonly ByteOrderMaskInfo UTF16 
-            = new ByteOrderMaskInfo(Encoding.Unicode, 0xFF, 0xFE);
-        public static readonly ByteOrderMaskInfo UTF8
-            = new ByteOrderMaskInfo(Encoding.UTF8, 0xEF, 0xBB, 0xBF);
-        public static readonly ByteOrderMaskInfo UTF7
-            = new ByteOrderMaskInfo(Encoding.UTF7, 0x2b, 0x2f, 0x76);
-
-        public static readonly List<ByteOrderMaskInfo> List = new List<ByteOrderMaskInfo>()
-        {
-            UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
-        };
-    }
-}
+        UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
+    };
+}
+#pragma warning restore SYSLIB0001
Original file line number	Diff line number	Diff line change
Expand Up		@@ -503,3 +503,4 @@ MigrationBackup/
		# Fody - auto-generated XML schema
		FodyWeavers.xsd

		.idea/.idea.MagicFileEncoding/.idea/sonarlint