-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathmd5verify.py
executable file
·187 lines (140 loc) · 6.04 KB
/
md5verify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#! /usr/bin/env python
# This is a script to walk a directory structure and verify file integrity of
# each file it encounters. On first run, it will open a directory, hash each
# file, and store the hashes in an md5sum compatible file in that directory.
# Subsequent runs will process new files in a similar way, but will also check
# existing files with the current hashes. Any inconsistencies are reported.
#
# A similar project is http://snipplr.com/view/4023/ which I stumbled across and
# stole some ideas from, but I decided to finish this script instead of updating
# that one with the things I wanted. That one has features this one doesn't,
# and this one has a little more error checking, script/cron/nagios
# friendly output, and 100% compatibility with `md5sum` output.
import hashlib
import logging as log
import optparse
import os
import re
import sys
md5line = re.compile(r"^(\\?)([0-9a-f]{32}) [\ \*](.*)$")
def process_directory(path, outfile):
consistency = True # Nothing bad found yet
oldcwd = os.getcwd()
for root, dirs, files in os.walk(path, onerror=log.error):
os.chdir(root)
log.info("Operating on %s" % root)
updated = False # We haven't detected any changes
c_file = "%s/%s" % (root, outfile)
new = {}
existing = {}
if os.path.isfile(c_file):
log.debug("Found existing checksums for %s" % root)
for hash, file in read_checksums(c_file):
if os.path.isfile(file):
existing[file] = hash
else:
updated = True
consistency = False
fullname = "%s/%s" % (root, file)
log.warning("Missing a file: %s (hash: %s)" % (fullname,
hash))
for file in sorted(files):
if file == outfile:
continue
hash = calculate_hash(file)
if file in existing:
if not existing[file] == hash:
updated = True
consistency = False
fullname = "%s/%s" % (root, file)
msg = ("Inconsistent hashes detected for %s! Old: %s "
"New: %s" % (fullname, existing[file], hash))
log.warning(msg)
existing[file] = hash # Will write the new hash to disk
else:
updated = True
new[file] = hash
new.update(existing)
if updated:
write_checksums(c_file, new)
os.chdir(oldcwd)
return consistency
def calculate_hash(file):
md5 = hashlib.md5()
try:
with open(file, 'rb') as f:
while True:
chunk = f.read(32768) # picked a number
md5.update(chunk)
if not chunk:
return md5.hexdigest()
except IOError, e:
log.error("Error opening %s: %s" % (file, e))
def read_checksums(file):
try:
with open(file, 'r') as f:
for line in f:
match = md5line.match(line)
if not match:
msg = "Invalid syntax in checksum file. File: %s:%s" % (file, line)
log.error(msg)
continue
# If a line starts with \, the filename has escaped
# characters. Python won't expect that so we strip them.
if match.group(1):
name = (match.group(3).replace("\\\\", "\\")
.replace("\\\n", "\n"))
else:
name = match.group(3)
yield match.group(2), name
except IOError, e:
log.error("Error reading checksums file %s: %s" % (file, e))
def write_checksums(file, results):
if not results:
return
try:
with open(file, 'w') as f:
for name, hash in results.iteritems():
line = ""
# The md5sum utility will prefix a line with \ if it contains
# certain characters. We'll do the same here for compatibilty's
# sake. Read `info md5sum` for more info.
if "\\" in name or "\n" in name:
name = (name.replace("\\", "\\\\")
.replace("\n", "\\\n"))
line = "\\"
# Linux (and its md5sum) don't care if a file is binary or not,
# so I'm not going to care either. If you care you'll need to:
# 1) Determine if the file is binary (most tools scan the file
# for a null char)
# 2) If the file is binary, change the second space in this
# string to an `*'
line = "%s%s %s\n" % (line, hash, name)
f.write(line)
except IOError, e:
log.error("Error writing checksums file %s: %s" % (file, e))
if __name__ == "__main__":
usage = "usage: %prog [options] path"
parser = optparse.OptionParser(usage)
parser.add_option("-o", "--outfile", default=".checksums",
help=("What should the name of the output file be?"
" [default: %default]"))
parser.add_option("-v", "--verbose", action="count", dest="verbose",
help="Print more output (up to -v -v)")
(options, args) = parser.parse_args()
LOG_LEVEL = log.WARNING
if options.verbose == 1:
LOG_LEVEL = log.INFO
elif options.verbose >=2:
LOG_LEVEL = log.DEBUG
log.basicConfig(level=LOG_LEVEL, format="%(levelname)s - %(message)s")
if not len(args) == 1:
log.critical("You need to specify the directory to process.")
parser.print_usage()
sys.exit(2)
if not os.path.isdir(args[0]):
log.critical("%s is not a directory" % args[0])
sys.exit(2)
# Returns false when inconsistencies are found
if not process_directory(os.path.abspath(args[0]), outfile=options.outfile):
sys.exit(1)