-
Notifications
You must be signed in to change notification settings - Fork 2
/
BencodeTorrent.php
423 lines (395 loc) · 14.7 KB
/
BencodeTorrent.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
<?php
declare(strict_types=1);
namespace OrpheusNET\BencodeTorrent;
/**
* BEncode service that allows us to encode PHP objects into BEncode and decode
* BEncode into PHP objects for torrents. BEncode supports the following PHP objects:
* - Associated Array
* - Lists
* - Strings
* - Integers
* with any other type throwing an exception. A list is defined for our purposes
* as an array with only numeric keys in perfect order, otherwise we assume it's
* an associated array and will encode as a dictionary.
*
* Additionally, as this is for torrent files, we can make the following assumptions
* and requirements:
* 1. Top level data structure must be a dictionary
* 2. Dictionary must contain an info key
* If any of these are violated, then we raise an exception for this particular file.
*
* @see https://wiki.theory.org/index.php/BitTorrentSpecification
*
* For Gazelle, this also acts as a unification of the two original BEncode implementations
* which were both used in separate areas of the codebase.
*/
class BencodeTorrent extends Bencode
{
public const FILELIST_DELIM = 0xF7;
/** @var string */
private static $utf8_filelist_delim = null;
public function __construct()
{
$this->setDelim();
}
/**
* Internal function that sets up the filelist_delim character for use. We cannot use encode
* and char to set a class constant or variable, so we wait till the class is initialized
* for the first time to set it.
*/
private function setDelim(): void
{
if (self::$utf8_filelist_delim === null) {
self::$utf8_filelist_delim = mb_convert_encoding(chr(self::FILELIST_DELIM), 'UTF-8', 'ISO-8859-1');
}
}
/**
* Sets the internal data array
* @param array $data
* @throws \RuntimeException
*/
public function setData($data): void
{
parent::setData($data);
$this->validate();
}
/**
* Given a BEncoded string and decode it
* @param string $data
* @throws \RuntimeException
*/
public function decodeString(string $data): void
{
parent::decodeString($data);
$this->validate();
}
/**
* Given a path to a file, decode the contents of it
*
* @param string $path
* @throws \RuntimeException
*/
public function decodeFile(string $path): void
{
parent::decodeFile($path);
$this->validate();
}
/**
* Validates that the internal data array
* @throws \TypeError|\RuntimeException
*/
public function validate(): void
{
if (!is_array($this->data)) {
throw new \TypeError('Data must be an array');
}
if (empty($this->data['info'])) {
throw new \RuntimeException("Torrent dictionary doesn't have info key");
}
if (isset($this->data['info']['files'])) {
foreach ($this->data['info']['files'] as $file) {
$path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
if (isset($file[$path_key])) {
$filter = array_filter(
$file[$path_key],
function ($element) {
return strlen($element) === 0;
}
);
if (count($filter) === count($file[$path_key]) && $file['length'] !== 0) {
throw new \RuntimeException('Cannot have a file with no path');
}
}
}
}
}
/**
* Utility function to clean out keys in the data and info dictionaries that we don't need in
* our torrent file when we go to store it in the DB or serve it up to the user (with the
* expectation that we'll be calling at least setAnnounceUrl(...) when a user asks for a valid
* torrent file).
*
* @return bool flag to indicate if we altered the info dictionary
*/
public function clean(): bool
{
$this->cleanDataDictionary();
return $this->cleanInfoDictionary();
}
/**
* Clean out keys within the data dictionary that are not strictly necessary or will be
* overwritten dynamically on any downloaded torrent (like announce or comment), so that we
* store the smallest encoded string within the database and cuts down on potential waste.
*/
public function cleanDataDictionary(): void
{
$allowed_keys = array('encoding', 'info');
foreach ($this->data as $key => $value) {
if (!in_array($key, $allowed_keys)) {
unset($this->data[$key]);
}
}
}
/**
* Cleans out keys within the info dictionary (and would affect the generated info_hash)
* that are not standard or expected. We do allow some keys that are not strictly necessary
* (primarily the two below), but that's because it's better to just have the extra bits in
* the dictionary than having to force a user to re-download the torrent file for something
* that they might have no idea their client is doing nor how to stop it. Returns TRUE if
* we had to change something in the info dictionary that would affect the info_hash (thus
* requiring a re-download), else return FALSE.
*
* x_cross_seed is added by PyroCor (@see https://github.com/pyroscope/pyrocore)
* unique is added by xseed (@see https://whatbox.ca/wiki/xseed)
*
* @return bool
*/
public function cleanInfoDictionary(): bool
{
$cleaned = false;
$allowed_keys = array('files', 'name', 'piece length', 'pieces', 'private', 'length',
'name.utf8', 'name.utf-8', 'md5sum', 'sha1', 'source',
'file-duration', 'file-media', 'profiles', 'x_cross_seed', 'unique');
foreach ($this->data['info'] as $key => $value) {
if (!in_array($key, $allowed_keys)) {
unset($this->data['info'][$key]);
$cleaned = true;
}
}
return $cleaned;
}
/**
* Returns a bool on whether the private flag set to 1 within the info dictionary.
*
* @return bool
*/
public function isPrivate(): bool
{
$this->hasData();
return isset($this->data['info']['private']) && $this->data['info']['private'] === 1;
}
/**
* Sets the private flag (if not already set) in the info dictionary. Setting this to 1 makes
* it so a client will only publish its presence in the swarm via the tracker in the announce
* URL, else it'll be discoverable via other means such as PEX peer exchange or dht, which is
* a negative for security and privacy of a private swarm. Returns a bool on whether or not
* the flag was changed so that an appropriate screen can be shown to the user.
*
* @return bool
*/
public function makePrivate(): bool
{
$this->hasData();
if ($this->isPrivate()) {
return false;
}
$this->data['info']['private'] = 1;
ksort($this->data['info']);
return true;
}
/**
* Set the source flag in the info dictionary equal to $source. This can be used to ensure a
* unique info hash across sites so long as all sites use the source flag. This isn't an
* 'official' flag (no accepted BEP on it), but it has become the defacto standard with more
* clients supporting it natively. Returns a boolean on whether or not the source was changed
* so that an appropriate screen can be shown to the user.
*/
public function setSource(string $source): bool
{
$this->hasData();
if (isset($this->data['info']['source']) && $this->data['info']['source'] === $source) {
return false;
}
// Since we've set the source and will require a re-download, we might as well clean
// these out as well
unset($this->data['info']['x_cross_seed']);
unset($this->data['info']['unique']);
$this->setValue(['info.source' => $source]);
return true;
}
/**
* Get the source flag if one has been set
*/
public function getSource(): ?string
{
$this->hasData();
return $this->data['info']['source'] ?? null;
}
/**
* Get the creation date for torrent if one has been set
*/
public function getCreationDate(): ?int
{
$this->hasData();
return $this->data['creation date'] ?? null;
}
/**
* Function to allow you set any number of keys and values in the data dictionary. You can
* set the value in a dictionary by concatenating the keys into a string with a period
* separator (ex: info.name will set name field in the info dictionary) so that the rest
* of the dictionary is unaffected.
*/
public function setValue(array $array): void
{
foreach ($array as $key => $value) {
if (is_array($value)) {
ksort($value);
}
$keys = explode('.', $key);
$data = &$this->data;
for ($i = 0; $i < count($keys); $i++) {
$data = &$data[$keys[$i]];
}
$data = $value;
$data = &$this->data;
for ($i = 0; $i < count($keys); $i++) {
$data = &$data[$keys[$i]];
if (is_array($data)) {
ksort($data);
}
}
}
ksort($this->data);
$this->validate();
}
/**
* Get a sha1 encoding of the BEncoded info dictionary. The SHA1 encoding allows us to transmit
* the info dictionary over the wire (such as within URLs or in submitted forms). Gazelle
* primarily relies on this so that it can ensure that all torrents uploaded have unique
* info hashes and so a user could search for a torrent based on its info hash. The
* BitTorrent protocol uses this when announcing/scraping a torrent so that the tracker can
* identify the torrent the client is talking about.
*
* @return string
*/
public function getInfoHash(): string
{
$this->hasData();
return sha1($this->encodeVal($this->data['info']));
}
public function getHexInfoHash(): string
{
return pack('H*', $this->getInfoHash());
}
public function getName(): string
{
$this->hasData();
if (isset($this->data['info']['name.utf-8'])) {
return $this->data['info']['name.utf-8'];
}
return $this->data['info']['name'];
}
/**
* Get the total size in bytes of the files in the torrent. For a single file torrent, it'll
* just be the 'length' key in the 'info' dictionary, else we iterate through the 'files' list
* and add up the 'length' of each element.
*/
public function getSize(): int
{
$this->hasData();
$cur_size = 0;
if (!isset($this->data['info']['files'])) {
$cur_size = $this->data['info']['length'];
} else {
foreach ($this->data['info']['files'] as $file) {
$cur_size += $file['length'];
}
}
return $cur_size;
}
/**
* Get an array of files that are in the torrent, where each element is a array that contains
* the keys 'name' and 'size'. For single torrent files, then we just take the name and length
* keys from the info dictionary. For multiple file torrents, we then iterate through the
* 'files' list where each element has 'length' and 'path' (which is a list of all components
* of the path, which we can join together with '/').
*
* @return array
*/
public function getFileList(): array
{
$files = [];
if (!isset($this->data['info']['files'])) {
// Single-file torrent
$name = (isset($this->data['info']['name.utf-8']) ?
$this->data['info']['name.utf-8'] :
$this->data['info']['name']);
$size = $this->data['info']['length'];
$files[] = ['path' => $name, 'size' => $size];
} else {
$size = 0;
foreach ($this->data['info']['files'] as $file) {
$size += $file['length'];
$path_key = isset($file['path.utf-8']) ? 'path.utf-8' : 'path';
$files[] = ['path' => implode('/', $file[$path_key]), 'size' => $file['length']];
}
usort(
$files,
function ($a, $b) {
return strnatcasecmp($a['path'], $b['path']);
}
);
}
return array('total_size' => $size, 'files' => $files);
}
public function hasFiles(): bool
{
return isset($this->data['info']['files']);
}
public function hasEncryptedFiles(): bool
{
return isset($this->data['encrypted_files']);
}
/**
* Returns an array of strings formatted to be inserted into a Gazelle database into the table
* torrents.FileList which is then used for displaying the table of files to the user when
* viewing the group. Format of each string is:
* {extension} s{size} {name} {delimiter}
* We use the delimiter so that we can split the first three apart via ' ' and that then we
* use the delimiter to find where the name ends.
*
* @return array
*/
public function getGazelleFileList(): array
{
$files = [];
foreach ($this->getFileList()['files'] as $file) {
$path = $file['path'];
$size = $file['size'];
$path = $this->makeUTF8(strtr($path, "\n\r\t", ' '));
$ext_pos = strrpos($path, '.');
// Should not be $ExtPos !== false. Extension-less files that start with a .
// should not get extensions
$ext = ($ext_pos ? trim(substr($path, $ext_pos + 1)) : '');
$files[] = sprintf("%s s%ds %s %s", ".$ext", $size, $path, self::$utf8_filelist_delim);
}
return $files;
}
/**
* Given a string, convert it to UTF-8 format, if it's not already in UTF-8.
*
* @param string $str input to convert to utf-8 format
*
* @return string
*/
private function makeUTF8(string $str): string
{
if (preg_match('//u', $str)) {
$encoding = 'UTF-8';
}
if (empty($encoding)) {
$encoding = mb_detect_encoding($str, 'UTF-8, ISO-8859-1');
}
// Legacy thing for Gazelle, leaving it in, but not going to bother testing
// @codeCoverageIgnoreStart
if (empty($encoding)) {
$encoding = 'ISO-8859-1';
}
// @codeCoverageIgnoreEnd
if ($encoding === 'UTF-8') {
return $str;
} else {
return @mb_convert_encoding($str, 'UTF-8', $encoding);
}
}
}