-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze_git_repositories.py
137 lines (102 loc) · 3.85 KB
/
analyze_git_repositories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
"""
This is a Titan module
- Analyze Git Repositories is to let the administrators
know what data is checked out on a device in the event
of a compromise or the misplacement of a device
To use:
sudo pip install --upgrade titantools
"""
import json
import logging
from sys import argv
from titantools.orm import TiORM
from titantools.data_science import DataScience
from titantools.system import execute_command
from time import time, gmtime, strftime
from os.path import dirname,basename,isfile
from os import chmod
#from titantools.decorators import run_every_5
# Set Logging Status
logging_enabled = False
# Set datastore directory
DATASTORE = argv[1]
#@run_every_5
class AnalyzeGitCheckouts(object):
""" AnalyzeGitCheckouts """
def __init__(self):
self.message = type(self).__name__
self.status = 0
self.datastore = []
def check_git_dirs(self):
"""
Log all loaded kernel extensions
"""
repos = execute_command('find /Users -type d -name ".git"').splitlines()
for repo in repos:
repo_path = dirname(repo)
repo_git_path = repo
repo_name = basename(dirname(repo))
if logging_enabled:
print "Found '%s' @ '%s'" % (repo_name, repo_path)
# Get latest commit
most_recent_commit = execute_command('git --git-dir="%s" --work-tree="%s" log --pretty=format:"%%cd|[Commit: %%h] [Date: %%cd] [Commiter: %%cn] [Commiter Email: %%ce] [Message: %%s]" -1' % (repo_git_path, repo_path)).split("|", 1)
commit_date = most_recent_commit[0]
commit_details = most_recent_commit[1]
# Get remote
remotes = execute_command("git --git-dir=%s --work-tree=%s remote -v" % (repo_git_path, repo_path))
# Append to master
self.datastore.append({
"name": repo_name,
"path": repo_path,
"commit_date": commit_date,
"last_commit": commit_details,
"remotes": remotes,
"date": exec_date
})
# Set Message
self.message = "Found %d repositories" % len(self.datastore)
# If no issues, return 0
self.status = 0
def analyze(self):
"""
This is the 'main' method that launches all of the other checks
"""
self.check_git_dirs()
return json.JSONEncoder().encode({"status": self.status, "message": self.message})
def store(self):
# the table definitions are stored in a library file. this is instantiating
# the ORM object and initializing the tables
module_schema_file = '%s/schema.json' % dirname(__file__)
# Is file
if isfile(module_schema_file):
with open(module_schema_file) as schema_file:
schema = json.load(schema_file)
# ORM
ORM = TiORM(DATASTORE)
for k, v in schema.iteritems():
ORM.initialize_table(k, v)
data_science = DataScience(ORM, self.datastore, "git_repos")
data_science.get_new_entries()
if __name__ == "__main__":
start = time()
# the "exec_date" is used as the "date" field in the datastore
exec_date = strftime("%a, %d %b %Y %H:%M:%S-%Z", gmtime())
###########################################################################
# Gather data
###########################################################################
try:
a = AnalyzeGitCheckouts()
if a is not None:
output = a.analyze()
a.store()
print output
except Exception, error:
print error
end = time()
# to see how long this module took to execute, launch the module with
# "--log" as a command line argument
if "--log" in argv[1:]:
logging_enabled = True
logging.basicConfig(format='%(message)s', level=logging.INFO)
logging.info("Execution took %s seconds.", str(end - start))