Skip to content

Commit

Permalink
Merge pull request #2 from Jan5366x/net6_and_manual_write_and_read
Browse files Browse the repository at this point in the history
Net6 and manual write and read
  • Loading branch information
Jan5366x authored May 25, 2023
2 parents d635900 + 5007198 commit fb33619
Show file tree
Hide file tree
Showing 21 changed files with 882 additions and 780 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/dotnet-core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ jobs:
os: ['ubuntu-latest', 'windows-latest']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Setup .NET Core
uses: actions/setup-dotnet@v1
- uses: actions/checkout@v3
- name: Setup .NET 6.0
uses: actions/setup-dotnet@v2
with:
dotnet-version: 3.1.301
dotnet-version: 6.0.x
- name: Install dependencies
run: dotnet restore
- name: Build
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -503,3 +503,4 @@ MigrationBackup/
# Fody - auto-generated XML schema
FodyWeavers.xsd

.idea/.idea.MagicFileEncoding/.idea/sonarlint
2 changes: 1 addition & 1 deletion .idea/.idea.MagicFileEncoding/.idea/indexLayout.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 0 additions & 8 deletions .idea/.idea.MagicFileEncoding/.idea/modules.xml

This file was deleted.

16 changes: 0 additions & 16 deletions .idea/.idea.MagicFileEncoding/.idea/riderModule.iml

This file was deleted.

14 changes: 0 additions & 14 deletions .idea/.idea.MagicFileEncoding/riderModule.iml

This file was deleted.

22 changes: 22 additions & 0 deletions CHANGELOG.MD
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## v2.0.0 - ?? ??? ????
* Fixed that some files are read twice
* Add support to read and analyze byte arrays
* Switch to Net 6.0
* Enabled nullable reference types
* Adjust nullability in the entire solution
* Add SYSLIB0001 UTF-7 Encoding safety check
* Remove IO Tools from public surface since they are off-topic internal logic which is object to (breaking) changes
* Refactoring and minor improvements

## v1.1.0 - 13 Feb 2021
* **Breaking Change!** Adjusted public api surface to static access
* **Breaking Change!** Additional encodings are now null if not found in the code pages
* Added more tests
* Added more additional encodings
* Improved documentation

**disclaimer:** This version contains breaking changes without major version bump,
this will be avoided in future releases

## 1.0.0 - 28 Sep 2020
* Added basic feature set
175 changes: 87 additions & 88 deletions MagicFileEncoding/AdditionalEncoding.cs
Original file line number Diff line number Diff line change
@@ -1,108 +1,107 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;

namespace MagicFileEncoding
{
namespace MagicFileEncoding;

/// <summary>
/// <para>List of additional encodings</para>
/// Encoding will be <i>null</i> if required codepage can't be retrieved
/// </summary>
[SuppressMessage("ReSharper", "InconsistentNaming")]
public static class AdditionalEncoding
{
/// <summary>
/// <para>List of additional encodings</para>
/// Encoding will be <i>null</i> if required codepage can't be retrieved
/// <para>(Latin-1)</para>
/// This character set contains the script-specific characters for Western European and American languages.
/// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
/// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
/// Dutch "ij" or the German quotation marks below are missing.
/// </summary>
[SuppressMessage("ReSharper", "InconsistentNaming")]
public static class AdditionalEncoding
{
/// <summary>
/// <para>(Latin-1)</para>
/// This character set contains the script-specific characters for Western European and American languages.
/// The character set covers Albanian, Catalan, Danish, Dutch, English, Faroese, Finnish, French, Galician,
/// Icelandic, German, Italian, Norwegian, Portuguese, Spanish and Swedish. Only single characters like the
/// Dutch "ij" or the German quotation marks below are missing.
/// </summary>
public static readonly Encoding ISO_8859_1 = SoftFetchEncoding("iso-8859-1");
public static readonly Encoding? ISO_8859_1 = SoftFetchEncoding("iso-8859-1");

/// <summary>
/// <para>(Latin-2)</para>
/// This character set contains the script-specific characters for most Central European and Slavic languages.
/// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
/// </summary>
public static readonly Encoding ISO_8859_2 = SoftFetchEncoding("iso-8859-2");
/// <summary>
/// <para>(Latin-2)</para>
/// This character set contains the script-specific characters for most Central European and Slavic languages.
/// The character set covers Croatian, Polish, Romanian, Slovak, Slovenian, Czech and Hungarian.
/// </summary>
public static readonly Encoding? ISO_8859_2 = SoftFetchEncoding("iso-8859-2");

/// <summary>
/// <para>(Latin-3)</para>
/// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
/// </summary>
public static readonly Encoding ISO_8859_3 = SoftFetchEncoding("iso-8859-3");
/// <summary>
/// <para>(Latin-3)</para>
/// This character set covers the languages Esperanto, Galician, Maltese and Turkish.
/// </summary>
public static readonly Encoding? ISO_8859_3 = SoftFetchEncoding("iso-8859-3");

/// <summary>
/// <para>(Latin-4)</para>
/// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
/// Compare this character set also with ISO 8859-10, which is very similar.
/// </summary>
public static readonly Encoding ISO_8859_4 = SoftFetchEncoding("iso-8859-4");
/// <summary>
/// <para>(Latin-4)</para>
/// This character set contains some characters of Estonian, Latvian and Lithuanian languages.
/// Compare this character set also with ISO 8859-10, which is very similar.
/// </summary>
public static readonly Encoding? ISO_8859_4 = SoftFetchEncoding("iso-8859-4");

/// <summary>
/// This character set contains Cyrillic characters.
/// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
/// </summary>
public static readonly Encoding ISO_8859_5 = SoftFetchEncoding("iso-8859-5");
/// <summary>
/// This character set contains Cyrillic characters.
/// It largely covers the Bulgarian, Macedonian, Russian, Serbian and Ukrainian languages.
/// </summary>
public static readonly Encoding? ISO_8859_5 = SoftFetchEncoding("iso-8859-5");

/// <summary>
/// This character set contains characters of Arabic script. However, the representation of the characters
/// in the following table is "abstract" because the characters vary in writing practice depending on whether
/// they are at the beginning, middle, or end of a word, or individually.
/// Arabic is further characterized by the fact that the direction of writing is from right to left.
/// </summary>
public static readonly Encoding ISO_8859_6 = SoftFetchEncoding("iso-8859-6");
/// <summary>
/// This character set contains characters of Arabic script. However, the representation of the characters
/// in the following table is "abstract" because the characters vary in writing practice depending on whether
/// they are at the beginning, middle, or end of a word, or individually.
/// Arabic is further characterized by the fact that the direction of writing is from right to left.
/// </summary>
public static readonly Encoding? ISO_8859_6 = SoftFetchEncoding("iso-8859-6");

/// <summary>
/// This character set contains the characters of the Modern Greek script.
/// </summary>
public static readonly Encoding ISO_8859_7 = SoftFetchEncoding("iso-8859-7");
/// <summary>
/// This character set contains the characters of the Modern Greek script.
/// </summary>
public static readonly Encoding? ISO_8859_7 = SoftFetchEncoding("iso-8859-7");

/// <summary>
/// This character set contains the characters of the Hebrew script.
/// As with the Arabic script, the direction of writing is from right to left.
/// </summary>
public static readonly Encoding ISO_8859_8 = SoftFetchEncoding("iso-8859-8");
/// <summary>
/// This character set contains the characters of the Hebrew script.
/// As with the Arabic script, the direction of writing is from right to left.
/// </summary>
public static readonly Encoding? ISO_8859_8 = SoftFetchEncoding("iso-8859-8");

/// <summary>
/// <para>(Latin-5)</para>
/// This character set is specially designed for Turkish. It is based on ISO 8859-1,
/// but contains Turkish characters instead of the Icelandic special characters.
/// </summary>
public static readonly Encoding ISO_8859_9 = SoftFetchEncoding("iso-8859-9");
/// <summary>
/// <para>(Latin-5)</para>
/// This character set is specially designed for Turkish. It is based on ISO 8859-1,
/// but contains Turkish characters instead of the Icelandic special characters.
/// </summary>
public static readonly Encoding? ISO_8859_9 = SoftFetchEncoding("iso-8859-9");

/// <summary>
/// <para>(Latin-6)</para>
/// This character set specifically contains characters for
/// the Greenlandic (Inuit) and Lappish (Sami) languages.
/// </summary>
public static readonly Encoding ISO_8859_10 = SoftFetchEncoding("iso-8859-10");
/// <summary>
/// <para>(Latin-6)</para>
/// This character set specifically contains characters for
/// the Greenlandic (Inuit) and Lappish (Sami) languages.
/// </summary>
public static readonly Encoding? ISO_8859_10 = SoftFetchEncoding("iso-8859-10");

/// <summary>
/// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
/// length is used and no intelligent algorithm is required, but at the expense of memory size
/// if only characters of the ASCII character set are used, more than four times as much memory is required
/// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
/// significant byte or the most significant byte is transmitted first,
/// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
/// </summary>
public static readonly Encoding UTF32BE = SoftFetchEncoding("utf-32BE");
/// <summary>
/// UTF-32 always encodes a character in exactly 32 bits and is thus the simplest, since no variable character
/// length is used and no intelligent algorithm is required, but at the expense of memory size
/// if only characters of the ASCII character set are used, more than four times as much memory is required
/// as with encoding in ASCII (7 bits required). Depending on the sequence of the bytes, whether the least
/// significant byte or the most significant byte is transmitted first,
/// one speaks of Little Endian (UTF-32LE) or <b>Big Endian (UTF-32BE)</b>.
/// </summary>
public static readonly Encoding? UTF32BE = SoftFetchEncoding("utf-32BE");

/// <summary>
/// Get the requested encoding and consume exception if it can't be found in code pages
/// </summary>
/// <param name="encoding">The encoding name</param>
/// <returns>The encoding object or <i>null</i></returns>
private static Encoding SoftFetchEncoding(string encoding)
/// <summary>
/// Get the requested encoding and consume exception if it can't be found in code pages
/// </summary>
/// <param name="encoding">The encoding name</param>
/// <returns>The encoding object or <i>null</i></returns>
private static Encoding? SoftFetchEncoding(string encoding)
{
try
{
return Encoding.GetEncoding(encoding);
}
catch
{
try
{
return Encoding.GetEncoding(encoding);
}
catch
{
return null;
}
return null;
}
}
}
47 changes: 24 additions & 23 deletions MagicFileEncoding/ByteOrderMask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,29 @@
using System.Diagnostics.CodeAnalysis;
using System.Text;

namespace MagicFileEncoding
#pragma warning disable SYSLIB0001
namespace MagicFileEncoding;

[SuppressMessage("ReSharper", "InconsistentNaming")]
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
public static class ByteOrderMask
{
[SuppressMessage("ReSharper", "InconsistentNaming")]
[SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
public static class ByteOrderMask
public static readonly ByteOrderMaskInfo UTF32BE
= new (AdditionalEncoding.UTF32BE!, 0x00, 0x00, 0xFE, 0xFF);
public static readonly ByteOrderMaskInfo UTF32
= new (Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
public static readonly ByteOrderMaskInfo UTF16BE
= new (Encoding.BigEndianUnicode, 0xFE, 0xFF);
public static readonly ByteOrderMaskInfo UTF16
= new (Encoding.Unicode, 0xFF, 0xFE);
public static readonly ByteOrderMaskInfo UTF8
= new (Encoding.UTF8, 0xEF, 0xBB, 0xBF);
public static readonly ByteOrderMaskInfo UTF7
= new (Encoding.UTF7, 0x2b, 0x2f, 0x76);

public static readonly List<ByteOrderMaskInfo> List = new ()
{
public static readonly ByteOrderMaskInfo UTF32BE
= new ByteOrderMaskInfo(AdditionalEncoding.UTF32BE, 0x00, 0x00, 0xFE, 0xFF);
public static readonly ByteOrderMaskInfo UTF32
= new ByteOrderMaskInfo(Encoding.UTF32, 0xFF, 0xFE, 0x00, 0x00);
public static readonly ByteOrderMaskInfo UTF16BE
= new ByteOrderMaskInfo(Encoding.BigEndianUnicode, 0xFE, 0xFF);
public static readonly ByteOrderMaskInfo UTF16
= new ByteOrderMaskInfo(Encoding.Unicode, 0xFF, 0xFE);
public static readonly ByteOrderMaskInfo UTF8
= new ByteOrderMaskInfo(Encoding.UTF8, 0xEF, 0xBB, 0xBF);
public static readonly ByteOrderMaskInfo UTF7
= new ByteOrderMaskInfo(Encoding.UTF7, 0x2b, 0x2f, 0x76);

public static readonly List<ByteOrderMaskInfo> List = new List<ByteOrderMaskInfo>()
{
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
};
}
}
UTF32BE, UTF32, UTF16BE, UTF16, UTF8, UTF7
};
}
#pragma warning restore SYSLIB0001
Loading

0 comments on commit fb33619

Please sign in to comment.