forked from KSP-CKAN/CKAN-core
-
Notifications
You must be signed in to change notification settings - Fork 0
/
FileIdentifier.cs
232 lines (197 loc) · 5.57 KB
/
FileIdentifier.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
using System;
using System.IO;
using System.Linq;
using log4net;
namespace CKAN
{
public class FileIdentifier
{
private static readonly ILog log = LogManager.GetLogger(typeof(FileIdentifier));
/// <summary>
/// Checks if the file is of type ASCII.
/// </summary>
/// <returns><c>true</c>, if most likely ASCII, <c>false</c> otherwise.</returns>
/// <param name="stream">Stream to the file.</param>
private static bool CheckASCII(Stream stream)
{
// Rewind the stream to the origin of the file.
stream.Seek(0, SeekOrigin.Begin);
// Define the buffer.
byte[] buffer = new byte[1024 * 32];
// Read as many bytes as possible.
int bytes_read = stream.Read(buffer, 0, buffer.Length);
// Look for a 0 termination char before the end.
for (int i = 0; i < bytes_read - 1; i++)
{
if (buffer[i] == 0)
{
return false;
}
}
// Most likely an ASCII file.
return true;
}
/// <summary>
/// Checks if the file is of type gzip.
/// </summary>
/// <returns><c>true</c>, if gzip, <c>false</c> otherwise.</returns>
/// <param name="stream">Stream to the file.</param>
private static bool CheckGZip(Stream stream)
{
// Rewind the stream to the origin of the file.
stream.Seek(0, SeekOrigin.Begin);
// Define the buffer and magic types to compare against.
byte[] buffer = new byte[2];
byte[] gzip_identifier = new byte[] { 0x1F, 0x8B };
// Read the first 2 bytes of the file into the buffer.
int bytes_read = stream.Read(buffer, 0, buffer.Length);
// Check if we reached EOF before reading enough bytes.
if (bytes_read != buffer.Length)
{
return false;
}
// Compare against the magic numbers.
if (buffer.SequenceEqual(gzip_identifier))
{
return true;
}
return false;
}
/// <summary>
/// Checks if the file is of type tar.
/// </summary>
/// <returns><c>true</c>, if tar, <c>false</c> otherwise.</returns>
/// <param name="stream">Stream to the file.</param>
private static bool CheckTar(Stream stream)
{
if (stream.CanSeek)
{
// Rewind the stream to the origin of the file.
stream.Seek (0, SeekOrigin.Begin);
}
// Define the buffer and magic types to compare against.
byte[] buffer = new byte[5];
byte[] tar_identifier = new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72 };
// Advance the stream position to offset 257. This method circumvents stream which can't seek.
for(int i = 0; i < 257; i++)
{
stream.ReadByte();
}
// Read 5 bytes into the buffer.
int bytes_read = stream.Read(buffer, 0, buffer.Length);
// Check if we reached EOF before reading enough bytes.
if (bytes_read != buffer.Length)
{
return false;
}
// Compare against the magic numbers.
if (buffer.SequenceEqual(tar_identifier))
{
return true;
}
return false;
}
/// <summary>
/// Checks if the file is of type zip.
/// </summary>
/// <returns><c>true</c>, if zip, <c>false</c> otherwise.</returns>
/// <param name="stream">Stream to the file.</param>
private static bool CheckZip(Stream stream)
{
// Rewind the stream to the origin of the file.
stream.Seek(0, SeekOrigin.Begin);
// Define the buffer and magic types to compare against.
byte[] buffer = new byte[4];
byte[] zip_identifier = new byte[] { 0x50, 0x4B, 0x03, 0x04 };
byte[] zip_identifier_empty = new byte[] { 0x50, 0x4B, 0x05, 0x06 };
byte[] zip_identifier_spanned = new byte[] { 0x50, 0x4B, 0x07, 0x08 };
// Read the first 4 bytes of the file into the buffer.
int bytes_read = stream.Read(buffer, 0, buffer.Length);
// Check if we reached EOF before reading enough bytes.
if (bytes_read != buffer.Length)
{
return false;
}
// Compare against the magic numbers.
if (buffer.SequenceEqual(zip_identifier) || buffer.SequenceEqual(zip_identifier_empty) || buffer.SequenceEqual(zip_identifier_spanned))
{
return true;
}
return false;
}
/// <summary>
/// Identifies the file using magic numbers.
/// </summary>
/// <returns>The filetype.</returns>
/// <param name="stream">Open stream to the file.</param>
public static FileType IdentifyFile(Stream stream)
{
FileType type = FileType.Unknown;
// Check the input.
if (stream == null)
{
return type;
}
// Make sure the stream supports seeking.
if (!stream.CanSeek)
{
return type;
}
// Start performing checks.
if (CheckGZip(stream))
{
// This may contain a tar file inside, create a new stream and check.
stream.Seek (0, SeekOrigin.Begin);
using (ICSharpCode.SharpZipLib.GZip.GZipInputStream gz_stream = new ICSharpCode.SharpZipLib.GZip.GZipInputStream (stream))
{
if (CheckTar(gz_stream))
{
type = FileType.TarGz;
}
else
{
type = FileType.GZip;
}
}
}
else if (CheckTar(stream))
{
type = FileType.Tar;
}
else if (CheckZip(stream))
{
type = FileType.Zip;
}
else if (CheckASCII(stream))
{
type = FileType.ASCII;
}
return type;
}
/// <summary>
/// Identifies the file using magic numbers.
/// </summary>
/// <returns>The filetype.</returns>
/// <param name="path">Path to the file.</param>
public static FileType IdentifyFile(string path)
{
FileType type = FileType.Unknown;
// Check input.
if (string.IsNullOrWhiteSpace(path))
{
return type;
}
// Check that the file exists.
if (!File.Exists(path))
{
return type;
}
// Identify the file using the stream method.
using (Stream stream = File.OpenRead (path))
{
type = IdentifyFile(stream);
}
return type;
}
}
}