-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathverisr_to_json.txt
51 lines (41 loc) · 1.9 KB
/
verisr_to_json.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# R
DESCRIPTION <- "Parse incidents from data set in R and then retrieve the associated json files using python."
names <- "./incident_id.txt"
## Filter some list of incidents. In this case healthcare not in VCDB where data disclosure was yes or potentially
vz_healthcare <- vz %>% filter(victim.industry2.62 | attribute.confidentiality.data.variety.Medical | grepl('Patient', attribute.confidentiality.data_victim)) %>% filter(attribute.confidentiality.data_disclosure.Potentially | attribute.confidentiality.data_disclosure.Yes) %>% filter(source_id != "vcdb")
write(vz_healthcare$incident_id, names)
# Python
import fnmatch
import os
import shutil
import json
outdir = "./healthcare_incidents/"
indir1 = "./gitlab_repo/data/json" # 2015 data set
indir2 = "./data/1.3" # older data set
indir3 = "./vcdb"
names = "./incident_id.txt"
ids = set()
with open(names, 'r') as f:
for line in f:
ids.add(line.strip())
for root, dirnames, filenames in os.walk(indir1):
for filename in fnmatch.filter(filenames, '*.json'):
if "vcdb" not in root:
with open(root + "/" + filename) as j:
incident = json.load(j)
if incident['incident_id'] in ids:
shutil.copy(root + "/" + filename, outdir)
for root, dirnames, filenames in os.walk(indir2):
for filename in fnmatch.filter(filenames, '*.json'):
if "vcdb" not in root:
with open(root + "/" + filename) as j:
incident = json.load(j)
if incident['incident_id'] in ids:
shutil.copy(root + "/" + filename, outdir)
for root, dirnames, filenames in os.walk(indir3):
for filename in fnmatch.filter(filenames, '*.json'):
if True:
with open(root + "/" + filename) as j:
incident = json.load(j)
if incident['incident_id'] in ids:
shutil.copy(root + "/" + filename, outdir)