Skip to content

Commit

Permalink
Remove lief dependency and a few bigfixes in bingraph
Browse files Browse the repository at this point in the history
  • Loading branch information
kevoreilly committed Oct 15, 2018
1 parent c162a3e commit 355789c
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 42 deletions.
8 changes: 3 additions & 5 deletions lib/cuckoo/common/graphs/binGraph/binGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def generate_graphs(args_dict, file_path=False, save_dir=False):
'recurse': False,
'json': False,
'__dummy': True,
'entcolour': '#ff01d5',
'entcolour': '#ff00ff',
'prefix': None,
'dpi': 100,
'file': ['malware.exe'],
Expand All @@ -173,16 +173,14 @@ def generate_graphs(args_dict, file_path=False, save_dir=False):
'recurse': False,
'json': False,
'__dummy': True,
'entcolour': '#ff01d5',
'entcolour': '#ff00ff',
'prefix': None,
'dpi': 100,
'file': [],
'save_dir': '',
'chunks': 750,
'ibytes': [
{'colour': (0.08235294117647059, 1.0, 0.01568627450980392, 1.0), 'bytes': [0], 'name': u"0's"},
{'colour': (1.0, 0.16862745098039217, 0.00392156862745098, 1.0), 'bytes': [44, 144], 'name': u'Exploit'},
{'colour': (0.0, 0.0, 1.0, 1), 'bytes': [32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126], 'name': u'Printable ASCII'}
{'colour': (0, 1, 0, 1), 'bytes': [0], 'name': u"Zeros"},
],
'figsize': (12, 4), 'blob': False
}
Expand Down
159 changes: 125 additions & 34 deletions lib/cuckoo/common/graphs/binGraph/graphs/ent/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
"""
from __future__ import division


# # Import graph specific libs
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.ticker import MaxNLocator
Expand All @@ -34,8 +32,13 @@
import sys
import re


import lief
try:
import pefile
except ImportError as e1:
try:
import lief
except ImportError as e2:
pass


# # Python 2/3 fix
Expand All @@ -52,7 +55,7 @@
__chunks__ = 750
__ibytes__= '[ {"name":"0\'s", "colour": "#15ff04", "bytes": [0]}, {"name":"Exploit", "bytes": [44,144], "colour":"#ff2b01"}, {"name":"Printable ASCII", "colour":"b", "bytes": [32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126]} ]'
__ibytes_dict__ = json.loads(__ibytes__)
__entcolour__ = '#ff01d5'
__entcolour__ = '#ff00ff'

# # Set args in args parse - the given parser is a sub parser
def args_setup(arg_parser):
Expand Down Expand Up @@ -175,7 +178,7 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
fig, host = plt.subplots()

log.debug('Plotting shannon samples')
host.plot(np.array(shannon_samples), label='Entropy', c=hash_colour('Entropy'), zorder=1001, linewidth=1.5)
host.plot(np.array(shannon_samples), label='Entropy', c=kwargs['entcolour'], zorder=1001, linewidth=1.2)

host.set_ylabel('Entropy\n'.format(chunksize))
host.set_xlabel('Raw file offset')
Expand All @@ -194,9 +197,9 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
axBytePc.yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, pos: ('{:d}%'.format(int(x)))))

for index, _ in enumerate(ibytes):
zorder -= 1
c = ibytes[index]['colour']
axBytePc.plot(np.array(ibytes[index]['percentages']), label=ibytes[index]['name'], c=c, zorder=zorder, linewidth=0.7, alpha=0.75)
axBytePc.plot(np.array(ibytes[index]['percentages']), label=ibytes[index]['name'], c=c, zorder=zorder, linewidth=1.2, alpha=0.75)
zorder -= 1

axBytePc.set_ybound(lower=-0.3, upper=101)

Expand All @@ -210,30 +213,27 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
log.warning('Parsing file as blob (as requested)')
else:

try:
parsedbin = lief.parse(filepath=abs_fpath)
log.debug('Parsed with lief as: {}'.format(type(parsedbin)))
bp = bin_proxy(abs_fpath)

except lief.bad_file as e:
parsedbin = None
log.warning('Failed to parse binary format. Not adding file specific info')
if None in (bp.bin, bp.type):
log.warning('Failed to parse binary format, parsing like --blob')

else:

if type(parsedbin) == lief.PE.Binary:
if bp.type == 'PE':

log.debug('Adding PE customisations')

# # Entrypoint (EP) pointer and vline
phy_ep_pointer = parsedbin.rva_to_offset(parsedbin.optional_header.addressof_entrypoint) / nr_chunksize
log.debug('{}: {}'.format('Entrypoint', hex(parsedbin.optional_header.addressof_entrypoint)))
phy_ep_pointer = bp.get_physical_from_rva(bp.get_virtual_ep()) / nr_chunksize
log.debug('{}: {}'.format('Entrypoint', hex(bp.get_virtual_ep())))

host.axvline(x=phy_ep_pointer, linestyle=':', c='r', zorder=zorder-1)
host.text(x=phy_ep_pointer, y=1.07, s='EP', rotation=45, va='bottom', ha='left')

longest_section_name = 0
# # Section vlines
for index, section in enumerate(parsedbin.sections):
for index, section in bp.sections():
zorder -= 1

section_name = safe_section_name(section.name, index)
Expand All @@ -248,14 +248,12 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
longest_section_name = len(section_name) if len(section_name) > longest_section_name else longest_section_name

# # Eval the space required to show the section names
if longest_section_name <= 5:
title_gap = '\n' * 2
elif longest_section_name <= 9:
title_gap = '\n' * 3
if longest_section_name <= 9:
title_gap = '\n'
elif longest_section_name <= 15:
title_gap = '\n' * 4
title_gap = '\n' * 2

elif type(parsedbin) == lief.ELF.Binary:
elif bp.type == 'ELF':

log.debug('Adding ELF customisations')

Expand Down Expand Up @@ -283,12 +281,10 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
longest_section_name = len(section_name) if len(section_name) > longest_section_name else longest_section_name

# # Eval the space required to show the section names
if longest_section_name <= 5:
title_gap = '\n' * 2
elif longest_section_name <= 9:
title_gap = '\n' * 3
if longest_section_name <= 9:
title_gap = '\n'
elif longest_section_name <= 15:
title_gap = '\n' * 4
title_gap = '\n' * 2

else:
log.debug('File is a currently unsupported format - (supported by lief, not yet supported by binGraph)')
Expand All @@ -305,17 +301,112 @@ def generate(abs_fpath, fname, blob, chunks=__chunks__, ibytes=__ibytes_dict__,
else:
legends.append(host.legend(loc='upper left', bbox_to_anchor=(1.01, 1), frameon=False))

if blob:
host.set_title('Binary entropy (sampled over {chunksize} byte chunks): {fname}{title_gap}'.format(chunksize=chunksize, fname=fname, title_gap=title_gap))
else:
host.set_title('Binary entropy (sampled over {chunksize} byte chunks): {fname}{title_gap}'.format(chunksize=chunksize, fname=fname, title_gap=title_gap))
host.set_title('{title_gap}'.format(title_gap=title_gap))

# # Return the plt, kwargs for the plt.savefig function, and additional information for json data
return plt, {'bbox_inches':'tight', 'bbox_extra_artists':tuple(legends)}, {}
json_data = {
'title':fname,
'info': {
'Mean': statistics.mean(shannon_samples),
'Standard deviation': statistics.stdev(shannon_samples)
}
}

return plt, {'bbox_inches':'tight', 'bbox_extra_artists':tuple(legends)}, json_data


# ### Helper functions

# # Abstracts the bin properties away from specific library calls enabling lief and pefile usage
class bin_proxy(object):
"""Abstract for different binary parsers types in use"""
def __init__(self, abs_fpath, lib=None):
super(bin_proxy, self).__init__()
self.abs_fpath = abs_fpath

if lib:
self.lib = lib
else:

if 'lief' in sys.modules:
self.lib = 'lief'
elif 'pefile' in sys.modules:
self.lib = 'pefile'
else:
# # We dont have a parser
return None, None

self.bin, self.type = None, None
self.__parse_bin()

class __ParseError(Exception):

pass

def __parse_bin(self):

if self.lib == 'lief':
try:
self.bin = lief.parse(filepath=self.abs_fpath)
if type(self.bin) == lief.PE.Binary:
self.type = 'PE'
log.debug('Parsed with lief as: {}'.format(self.type))
else:
log.debug('File is a currently unsupported format: {}'.format(self.type))

except lief.bad_file as e:
log.warning('Failed to parse with lief: {}'.format(e))

elif self.lib == 'pefile':
try:
self.bin = pefile.PE(self.abs_fpath)
self.type = 'PE'

log.debug('Parsed with pefile as: {}'.format(self.type))

except pefile.PEFormatError as e:
log.warning('Failed to parse with pefile: {}'.format(e))

def get_virtual_ep(self):

if self.lib == 'lief':
return self.bin.optional_header.addressof_entrypoint
elif self.lib == 'pefile':
return self.bin.OPTIONAL_HEADER.AddressOfEntryPoint

def get_physical_from_rva(self, rva):

if self.lib == 'lief':
return self.bin.rva_to_offset(rva)
elif self.lib == 'pefile':
return self.bin.get_physical_by_rva(rva)

def sections(self):

index = 0
sections = []

for lib_section in self.bin.sections:

section = section_proxy(self.lib, lib_section)

yield index, section
index += 1
# # Part of bin_proxy - abstracts section calls
class section_proxy(object):
"""Abstract for different binary parsers types in use"""
def __init__(self, lib, lib_section):
super(section_proxy, self).__init__()
self.lib = lib
self.lib_section = lib_section

if self.lib == 'lief':
self.name = lib_section.name
self.offset = lib_section.offset
elif self.lib == 'pefile':
self.name = str(lib_section.Name.rstrip(b'\x00').decode("utf-8"))
self.offset = self.lib_section.get_offset_from_rva(self.lib_section.VirtualAddress)

# # Read files as chunks
def get_chunk(fh, chunksize=8192):
while True:
Expand Down
5 changes: 3 additions & 2 deletions modules/processing/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
HAVE_VBA2GRAPH = False

try:
from lib.cuckoo.common.graphs.binGraph.binGraph import generate_graphs as binGraph_gen
from lib.cuckoo.common.graphs.binGraph.binGraph import generate_graphs as bingraph_gen
HAVE_BINGRAPH = True
except ImportError:
HAVE_BINGRAPH = False
Expand Down Expand Up @@ -1516,7 +1516,8 @@ def run(self):
bingraph_path = os.path.join(CUCKOO_ROOT, "storage", "analyses", str(self.results["info"]["id"]), "bingraph")
if not os.path.exists(bingraph_path):
os.makedirs(bingraph_path)
binGraph_gen("", self.file_path, bingraph_path)
binggraph_args = '[{"name":"Zeros", "colour": "#15ff04", "bytes": [0]}]'
bingraph_gen(binggraph_args, self.file_path, bingraph_path)
except Exception as e:
log.info(e)
elif "PDF" in thetype or self.task["target"].endswith(".pdf"):
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,3 @@ matplotlib==2.2.2
numpy==1.15.0
six==1.11.0
statistics==1.0.3.5
lief==0.9.0

5 comments on commit 355789c

@enzok
Copy link
Contributor

@enzok enzok commented on 355789c Oct 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Graphs are no longer generating. The bingraph directory is empty.

@kevoreilly
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tested with the latest? There is 1d98fa4 which should regenerate for empty bingraph directory.

@enzok
Copy link
Contributor

@enzok enzok commented on 355789c Oct 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like its because of the following line removal.
I see " no display name and no $DISPLAY environment variable " in my logs.

matplotlib.use('Agg')

@kevoreilly
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have pushed replacing that line - let me know if that works.

@enzok
Copy link
Contributor

@enzok enzok commented on 355789c Oct 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That fixed it thanks!

Please sign in to comment.