-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscrape.py
64 lines (50 loc) · 1.54 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
double check:
sql database.ext == mdf
breaks:
missing comma:
ea interchange format_3
matlab
extra comma:
Huskygram, Poem, or Singer embroidery
dupes:
BZ2
SXC
"""
import os, json
from bs4 import BeautifulSoup
from sl4ng import show, pop, getsource, regenerator, multisplit, flat
import requests, pyperclip as pc
"Description,Header (hex),Extension,FileClass,Header_offset,Trailer (hex)"
def scrapetxt(path):
with open(path, 'r') as fob:
for line in fob.readlines():
# print(line)
yield line.strip().split(',')
def by_ext(path):
d = {}
for data in scrapetxt(path):
desc, head, ext, kind, offset, tail = data
d[ext] = {
'offset': int(offset),
'head': [int(i, 16) for i in head.split()],
'tail': ([int(i, 16) for i in head.split()], [])['null' in tail],
'kind': kind,
'desc': desc,
}
return d
if __name__ == '__main__':
path = r'.\FileSigs_20200424-gary_version\file_sigs_RAW.txt'
name = 'magic_numbers-by_ext.json'
with open(name, 'w') as fob:
json.dump(by_ext(path), fob, sort_keys=True)
os.startfile(name)
be = by_ext(path)
m = regenerator(flat(map(multisplit('|'), be)))
duped = filter(lambda x: m.count(x) > 1, m)
duped = {
i: [be[key]['head'] for key in be if i in key] for i in duped
}
for key, val in duped.items():
if all(val.count(i)==len(val) for i in val):
print(key)