-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconsistency-report.py
76 lines (63 loc) · 2.36 KB
/
consistency-report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import json
import re
import sys
filename = sys.argv[1]
log = {}
all_keys = []
def get_user(meta):
if 'createdByUser' in meta:
user_info = meta['createdByUser']
if user_info['name']:
return user_info['name']
else:
return user_info['username']
def log_problem(e, msg):
current_key = e['data']['key']
link = e['data']['parentItem'] if 'parentItem' in e['data'] else current_key
usr = get_user(e['meta'])
string = log.get(usr, '')
string += f'[{current_key}](https://www.zotero.org/groups/2480461/ag-gipp/items/{link}/item-details) {msg}\n\n'
log[usr] = string
def parse_extra_field(d, ent):
extra = d.split('\n')
extra_dictionary = {}
for e in extra:
if len(e.strip()) > 0:
parts = e.split(':', 1)
if len(parts) == 2:
extra_dictionary[parts[0].strip()] = parts[1].strip()
else:
log_problem(ent, 'information in extra field not formatted as key:value pair: ' + e)
return extra_dictionary
def has_valid_parent(e):
if 'parentItem' in e['data']:
# We daringly assume that the parent occurs before its children.
if e['data']['parentItem'] in all_keys:
return True
return False
with open(filename) as f:
bibtex = json.loads(f.read())
file_pat = re.compile('--[a-zA-Z]{2,}--')
for entry in bibtex:
data = entry['data']
if data['itemType'] == 'annotation':
continue
tags = data['tags']
biblatex = entry['biblatex']
all_keys.append(data['key'])
if len(tags) == 0 and len(biblatex) > 3:
log_problem(entry, "has no tags")
if 'extra' in data:
edict = parse_extra_field(data['extra'], entry)
if 'Citation Key' in edict:
cite_key = edict['Citation Key']
if len(cite_key) < 3:
log_problem(entry, cite_key + ' is too short as a citation key.')
if 'filename' in data:
fname = data['filename']
if not file_pat.search(fname):
if "Snapshot" not in fname + data.get('title', '') and has_valid_parent(entry):
log_problem(entry, 'does not comply with file naming convention: ' + fname)
if len(log) > 0:
for key, value in log.items():
print(f'### {key}\n\n{value}\n\n')