-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingestion to json to bz2
83 lines (59 loc) · 1.92 KB
/
ingestion to json to bz2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# Ingestion File Name Input
import pandas as pd
filename = input('Ingestion Error File Name: ')
ingestionfile = pd.read_csv(filename, header = None, sep = '\n')
ingestionfile = ingestionfile[0].str.split(',', expand = True)
# Error Code Input
# slice useful columns
info = ingestionfile.iloc[:, [0, 2, 3]]
info.columns = ['errorcode', 'errorROEID', 'firmROEID']
# shows the distinct error code
errorlist = []
for row in info['errorcode']:
if row not in errorlist:
errorlist.append(row)
print(errorlist)
errorcode = input('Error code: ')
import json
# create deletion json file using errorROEID
errorroeid = info.loc[:, 'errorROEID'][info['errorcode'].str.contains(errorcode)]
jsonfilename = '' + filename[5:20] + 'OrderEvents_90' + errorcode + '.json'
with open(jsonfilename, 'w') as errorjson:
for i in errorroeid:
line = {"actionType": "DEL", "errorROEID": int(i)}
json.dump(line, errorjson)
errorjson.write("\n")
# compress deletion json file to bz2
import bz2
bz2file = jsonfilename + '.bz2'
with bz2.BZ2File(bz2file, 'w') as outfile:
for i in errorroeid:
line = {"actionType", "DEL", "firmROEID": i}
json_str = json.dumps(line) + '\n'
json_bytes = json_str.encode('utf-8')
outfile.write(json_bytes)
# Meta File Generation
import time
count = len(open(jsonfilename).readlines())
import hashlib
with open(bz2file, 'rb') as bz2:
bytes = bz2.read()
readable_hash = hashlib.sha256(bytes).hexdigest()
meta = {
"type": "META",
"doneForDay": bool(""),
"fileGenerationDate": jsonfiename[12:20],
"reporter": "",
"submitter": "",
"fileVersion": "2.2.1",
"files": [
{
"fileName": bz2file,
"recordCount": count,
"compressedHash": readable_hash
}
]
}
# generate the meta json file
with open(jsonfilename[0:-5] + '.meta.json', 'w') as metafile:
json.dump(meta, metafile)