Skip to content

Commit

Permalink
Merge pull request #111 from opencybersecurityalliance/develop
Browse files Browse the repository at this point in the history
2.3.29
  • Loading branch information
pcoccoli authored Sep 13, 2023
2 parents 4cc19fa + 797f1cd commit 9442a25
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 10 deletions.
2 changes: 1 addition & 1 deletion firepit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

__author__ = """IBM Security"""
__email__ = 'pcoccoli@us.ibm.com'
__version__ = '2.3.28'
__version__ = '2.3.29'


import re
Expand Down
6 changes: 5 additions & 1 deletion firepit/aio/asyncpgstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ async def attach(self):

await self.conn.execute(f'SET search_path TO "{self.session_id}", firepit_common')

async def execute(self, stmt, *values):
"""Execute SQL statement `stmt` with parameters `*values`"""
return await self.conn.execute(stmt, *values)

async def cache(self,
query_id: str,
bundle: dict):
Expand Down Expand Up @@ -607,7 +611,7 @@ async def write(self, obj):
if key in ['type', 'spec_version']:
continue
# shorten key (STIX prop) to make column names more manageable
if len(key) > 63 or 'extensions.' in key:
if len(key) > 48 or 'extensions.' in key:
shortname = self.writer.shorten(key) # Need to detect collisions!
else:
shortname = key
Expand Down
20 changes: 19 additions & 1 deletion firepit/aio/asyncstorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,13 @@ def _lookup_shortname(self, table, shortname):
cols = self.meta_dict.get(table, {})
return cols.get(shortname)

def _lookup_longname(self, table, longname):
cols = self.meta_dict.get(table, {})
for _, meta in cols.items():
if meta['path'] == longname:
return meta
return None

def column_metadata(self, table, path):
"""Get DB column metadata for STIX object path `path`"""
# 'path' here could be the prop side of a STIX object path,
Expand All @@ -277,25 +284,36 @@ def column_metadata(self, table, path):
# parse_path, then use final node to look up longname
# Strip obj_type from longname and replace with table:ref.
if table == 'observed-data':
longname = ''
longname = '' #TODO: could fast-path this?
else:
longname = f"{table}:"
links = parse_prop(table, path)
tgt_prop = None
if len(links) > 1:
# There's at least 1 reference/join
longname_parts = []
tgt_prop_parts = []
tgt_type = table
refs = []
for link in links:
if link[0] == 'rel':
# Store last referenced table as the "target" table
tgt_type = link[3]
refs.append(link[2])
longname_parts.append(link[2])
else:
tgt_prop_parts.append(link[2])
tgt_prop = '.'.join(tgt_prop_parts)
longname += '.'.join(longname_parts) + '.'
logger.debug('ref lookup shortname %s %s', tgt_type, tgt_prop)
data = self._lookup_shortname(tgt_type, tgt_prop)
if not data:
logger.debug('lookup longname %s %s (refs: %s)', tgt_type, tgt_prop, refs)
data = self._lookup_longname(tgt_type, tgt_prop)
if data:
# Return "path" of refs to caller
data['refs'] = refs
path = tgt_prop
else:
data = self._lookup_shortname(table, path)
if not data:
Expand Down
5 changes: 5 additions & 0 deletions firepit/aio/asyncwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ async def attach(self):
raise SessionNotFound(self.connstring)
logger.debug('Attaching to storage for session %s', self.session_id)
self.store = get_storage(self.connstring)
self.conn = self.store.connection
self.placeholder = self.store.placeholder
self.dialect = self.store.dialect

Expand All @@ -66,6 +67,10 @@ async def cache(self,
"""
self.store.cache(query_id, bundle)

async def execute(self, stmt, *values):
"""Execute SQL statement `stmt` with parameters `*values`"""
return self.store.connection.execute(stmt, values)

async def tables(self):
return self.store.tables()

Expand Down
19 changes: 14 additions & 5 deletions firepit/aio/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

from firepit.aio.asyncstorage import AsyncStorage
from firepit.exceptions import DuplicateTable
from firepit.pgcommon import pg_shorten
from firepit.props import KNOWN_PROPS
from firepit.raft import json_normalize
from firepit.splitter import shorten_extension_name
from firepit.stix21 import makeid
from firepit.timestamp import timefmt

Expand Down Expand Up @@ -583,6 +583,7 @@ async def ingest(
schemas = defaultdict(OrderedDict)
objects = set()
columns = []
renames = [] # Might need to rename some columns
for col in df.columns:
# col is in the form [<obj_name>#]<obj_type>:<obj_attr>
h = col.find('#') # Might be able to do all this in advance?
Expand All @@ -600,6 +601,7 @@ async def ingest(
obj_key = obj_type
objects.add(obj_key)
if col.endswith('_refs'):
renames.append(col)
continue
pd_col = df[col]
index = pd_col.first_valid_index()
Expand All @@ -618,14 +620,18 @@ async def ingest(
dtype = value.__class__.__name__
if dtype == 'list':
df[col] = df[col].apply(lambda x: ujson.dumps(x, ensure_ascii=False))
schemas[obj_type][obj_attr] = _infer_type(writer, obj_attr, value, dtype)
logger.debug('ingest: col %s val %s dtype %s (%s)', col, value, dtype, schemas[obj_type][obj_attr])

# shorten key (STIX prop) to make column names more manageable
if len(obj_attr) > 63 or 'extensions.' in obj_attr:
shortname = shorten_extension_name(obj_attr) # Need to detect collisions!
if len(obj_attr) > 48 or 'extensions.' in obj_attr:
shortname = pg_shorten(obj_attr) # Need to detect collisions!
renames.append(f'{obj_name}#{obj_type}:{shortname}')
else:
shortname = obj_attr
renames.append(col)

sql_type = _infer_type(writer, shortname, value, dtype)
schemas[obj_type][shortname] = sql_type
logger.debug('ingest: col %s val %s dtype %s (%s)', col, value, dtype, sql_type)

# Generate __columns entry
if dtype.endswith('64'):
Expand All @@ -638,6 +644,9 @@ async def ingest(
'dtype': dtype,
})

# Now do the actual column renaming/shortening
df.columns = renames

col_df = pd.DataFrame.from_records(columns)
col_df = col_df.drop_duplicates(['otype', 'path'])
await writer.write_df('__columns', col_df, None, COLUMNS_SCHEMA)
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 2.3.28
current_version = 2.3.29
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,6 @@
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/opencybersecurityalliance/firepit',
version='2.3.28',
version='2.3.29',
zip_safe=False,
)

0 comments on commit 9442a25

Please sign in to comment.