Skip to content

Commit

Permalink
Merge pull request #102 from obsidianforensics/update-ccl_chrome_inde…
Browse files Browse the repository at this point in the history
…xeddb

Update ccl chrome indexeddb
  • Loading branch information
obsidianforensics authored Oct 11, 2021
2 parents 8d73d5b + ba5f551 commit 1739b89
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 22 deletions.
2 changes: 1 addition & 1 deletion pyhindsight/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ def generate_excel(self, output_object):
s.write(row_number, 11, item.file_size, black_value_format)
s.write(row_number, 12, item.magic_results, black_value_format)

elif item.row_type.startswith("local storage"):
elif item.row_type.startswith(("local storage", "session storage")):
s.write_string(row_number, 0, item.row_type, black_type_format)
s.write_string(row_number, 1, item.origin, black_url_format)
s.write_string(row_number, 2, item.key, black_field_format)
Expand Down
44 changes: 44 additions & 0 deletions pyhindsight/browsers/chrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import puremagic
import urllib
import base64

import pyhindsight.lib.ccl_chrome_indexeddb.ccl_blink_value_deserializer
from pyhindsight.browsers.webbrowser import WebBrowser
from pyhindsight import utils

Expand Down Expand Up @@ -945,6 +947,41 @@ def get_local_storage(self, path, dir_name):
log.info(f' - Parsed {len(results)} items from {len(filtered_listing)} files')
self.parsed_storage.extend(results)

def get_session_storage(self, path, dir_name):
results = []

# Grab file list of 'Session Storage' directory
ss_path = os.path.join(path, dir_name)
log.info('Session Storage:')
log.info(f' - Reading from {ss_path}')

session_storage_listing = os.listdir(ss_path)
log.debug(f' - {len(session_storage_listing)} files in Session Storage directory')

# Session Storage parsing is thanks to Alex Caithness of CCL Forensics; ccl_chrome_indexeddb
# is bundled with Hindsight with his consent (and our thanks!). The below logic is adapted
# from his Chromium_dump_session_storage.py script.
import pathlib
from pyhindsight.lib.ccl_chrome_indexeddb import ccl_chromium_sessionstorage

ss_ldb_records = ccl_chromium_sessionstorage.SessionStoreDb(pathlib.Path(ss_path))
for origin in ss_ldb_records.iter_hosts():
origin_kvs = ss_ldb_records.get_all_for_host(origin)
for key, values in origin_kvs.items():
for value in values:
results.append(Chrome.SessionStorageItem(
self.profile_path, origin, key, value.value, value.leveldb_sequence_number, 'Live', ss_path))

# Some records don't have an associated host for some unknown reason; still include them.
for key, value in ss_ldb_records.iter_orphans():
results.append(Chrome.SessionStorageItem(
self.profile_path, '<orphan>', key, value.value, value.leveldb_sequence_number, 'Live', ss_path))

ss_ldb_records.close()
self.artifacts_counts['Session Storage'] = len(results)
log.info(f' - Parsed {len(results)} Session Storage items')
self.parsed_storage.extend(results)

def get_extensions(self, path, dir_name):
results = []
log.info('Extensions:')
Expand Down Expand Up @@ -2327,6 +2364,13 @@ def process(self):
self.artifacts_display['Local Storage'],
self.artifacts_counts.get('Local Storage', '0')))

if 'Session Storage' in input_listing:
self.get_session_storage(self.profile_path, 'Session Storage')
self.artifacts_display['Session Storage'] = 'Session Storage records'
print(self.format_processing_output(
self.artifacts_display['Session Storage'],
self.artifacts_counts.get('Session Storage', '0')))

if 'Extensions' in input_listing:
self.get_extensions(self.profile_path, 'Extensions')
self.artifacts_display['Extensions'] = 'Extensions'
Expand Down
23 changes: 23 additions & 0 deletions pyhindsight/browsers/webbrowser.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,29 @@ def __init__(self, profile, origin, key, value, seq, state, source_path, last_mo
self.source_path = source_path
self.last_modified = last_modified

class SessionStorageItem(StorageItem):
def __init__(self, profile, origin, key, value, seq, state, source_path):
"""
:param profile: The path to the browser profile this item is part of.
:param origin: The web origin this SessionStorage item belongs to.
:param key: The key of the SessionStorage item.
:param value: The value of the SessionStorage item (rendered in UTF-16).
:param seq: The sequence number of the key.
:param state: The state of the record (live or deleted).
:param source_path: The path to the source of the record.
"""
super(WebBrowser.SessionStorageItem, self).__init__(
'session storage', profile=profile, origin=origin, key=key, value=value, seq=seq, state=state,
source_path=source_path)
self.profile = profile
self.origin = origin
self.key = key
self.value = value
self.seq = seq
self.state = state
self.source_path = source_path

class FileSystemItem(StorageItem):
def __init__(self, profile, origin, key, value, seq, state, source_path, last_modified=None,
file_exists=None, file_size=None, magic_results=None):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import typing
from dataclasses import dataclass

import ccl_v8_value_deserializer
from pyhindsight.lib.ccl_chrome_indexeddb import ccl_v8_value_deserializer

# See: https://chromium.googlesource.com/chromium/src/third_party/+/master/blink/renderer/bindings/core/v8/serialization

Expand Down Expand Up @@ -101,7 +101,7 @@ class Constants:
# height:uint32_t, pixelDataLength:uint32_t,
# data:byte[pixelDataLength]
# -> ImageBitmap (ref)
tag_kImageBitmapTransferTag = "G" # index:uint32_t -> ImageBitmap. For ImageBitmap transfer
tag_kImageBitmapTransferTag = b"G" # index:uint32_t -> ImageBitmap. For ImageBitmap transfer
tag_kOffscreenCanvasTransferTag = b"H" # index, width, height, id,
# filter_quality::uint32_t ->
# OffscreenCanvas. For OffscreenCanvas
Expand Down
68 changes: 55 additions & 13 deletions pyhindsight/lib/ccl_chrome_indexeddb/ccl_chromium_indexeddb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Copyright 2020, CCL Forensics
Copyright 2020-2021, CCL Forensics
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand All @@ -20,6 +20,7 @@
SOFTWARE.
"""

import sys
import struct
import os
import pathlib
Expand All @@ -34,7 +35,7 @@
import ccl_v8_value_deserializer
import ccl_blink_value_deserializer

__version__ = "0.2"
__version__ = "0.6"
__description__ = "Module for reading Chromium IndexedDB LevelDB databases."
__contact__ = "Alex Caithness"

Expand Down Expand Up @@ -135,12 +136,23 @@ def __init__(self, buffer: bytes):
else:
raise ValueError() # Shouldn't happen

# trim the raw_key in case this is an inner key:
self.raw_key = self.raw_key[0: self._raw_length]

def __repr__(self):
return f"<IdbKey {self.value}>"

def __str__(self):
return self.__repr__()

def __eq__(self, other):
if not isinstance(other, IdbKey):
raise NotImplementedError()
return self.raw_key == other.raw_key

def __ne__(self, other):
return not self == other


class IndexedDBExternalObjectType(enum.IntEnum):
# see: https://github.com/chromium/chromium/blob/master/content/browser/indexed_db/indexed_db_external_object.h
Expand Down Expand Up @@ -278,12 +290,16 @@ def get_meta(self, db_id: int, obj_store_id: int, meta_type: ObjectStoreMetadata


class IndexedDbRecord:
def __init__(self, owner: "IndexedDb", db_id: int, obj_store_id: int, key: IdbKey, value: typing.Any):
def __init__(
self, owner: "IndexedDb", db_id: int, obj_store_id: int, key: IdbKey,
value: typing.Any, is_live: bool, ldb_seq_no: int):
self.owner = owner
self.db_id = db_id
self.obj_store_id = obj_store_id
self.key = key
self.value = value
self.is_live = is_live
self.sequence_number = ldb_seq_no

def resolve_blob_index(self, blob_index: ccl_blink_value_deserializer.BlobIndex) -> IndexedDBExternalObject:
"""Resolve a ccl_blink_value_deserializer.BlobIndex to its IndexedDBExternalObject
Expand Down Expand Up @@ -372,7 +388,9 @@ def _get_raw_database_metadata(self, live_only=True):
if record.key.startswith(prefix) and record.state == ccl_leveldb.KeyState.Live:
# we only want live keys and the newest version thereof (highest seq)
meta_type = record.key[len(prefix)]
db_meta[(db_id.dbid_no, meta_type)] = record
old_version = db_meta.get((db_id.dbid_no, meta_type))
if old_version is None or old_version.seq < record.seq:
db_meta[(db_id.dbid_no, meta_type)] = record

return db_meta

Expand Down Expand Up @@ -403,7 +421,7 @@ def _get_raw_object_store_metadata(self, live_only=True):

def iterate_records(
self, db_id: int, store_id: int, *,
live_only=True, bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any] = None):
live_only=False, bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any] = None):
if db_id > 0x7f or store_id > 0x7f:
raise NotImplementedError("there could be this many dbs or object stores, but I don't support it yet")

Expand All @@ -423,7 +441,11 @@ def iterate_records(
blink_type_tag = record.value[val_idx]
if blink_type_tag != 0xff:
# TODO: probably don't want to fail hard here long term...
raise ValueError("Blink type tag not present")
if bad_deserializer_data_handler is not None:
bad_deserializer_data_handler(key, record.value)
continue
else:
raise ValueError("Blink type tag not present")
val_idx += 1

blink_version, varint_raw = _le_varint_from_bytes(record.value[val_idx:])
Expand All @@ -437,9 +459,11 @@ def iterate_records(
value = deserializer.read()
except Exception:
if bad_deserializer_data_handler is not None:
bad_deserializer_data_handler(key, record.value[val_idx:])
bad_deserializer_data_handler(key, record.value)
continue
raise
yield IndexedDbRecord(self, db_id, store_id, key, value)
yield IndexedDbRecord(self, db_id, store_id, key, value,
record.state == ccl_leveldb.KeyState.Live, record.seq)

def get_blob_info(self, db_id: int, store_id: int, raw_key: bytes, file_index: int) -> IndexedDBExternalObject:
if db_id > 0x7f or store_id > 0x7f:
Expand Down Expand Up @@ -501,14 +525,31 @@ def name(self) -> str:
return self._raw_db.get_object_store_metadata(
self._dbid_no, self._obj_store_id, ObjectStoreMetadataType.StoreName)

@staticmethod
def _log_error(key: IdbKey, data: bytes):
sys.stderr.write(f"ERROR decoding key: {key}\n")

def get_blob(self, raw_key: bytes, file_index: int) -> typing.BinaryIO:
return self._raw_db.get_blob(self._dbid_no, self.object_store_id, raw_key, file_index)

# def __iter__(self):
# yield from self._raw_db.iterate_records(self._dbid_no, self._obj_store_id)

def iterate_records(self):
yield from self._raw_db.iterate_records(self._dbid_no, self._obj_store_id)
def iterate_records(
self, *, live_only=False, errors_to_stdout=False,
bad_deserializer_data_handler: typing.Callable[[IdbKey, bytes], typing.Any] = None):

def _handler(key, record):
if bad_deserializer_data_handler is not None:
bad_deserializer_data_handler(key, record)
if errors_to_stdout:
WrappedObjectStore._log_error(key, record)

handler = _handler if errors_to_stdout or bad_deserializer_data_handler is not None else None

yield from self._raw_db.iterate_records(
self._dbid_no, self._obj_store_id, live_only=live_only,
bad_deserializer_data_handler=handler)

def __repr__(self):
return f"<WrappedObjectStore: object_store_id={self.object_store_id}; name={self.name}>"
Expand All @@ -526,7 +567,8 @@ def __init__(self, raw_db: IndexedDb, dbid: DatabaseId):
self._obj_store_names = tuple(names)
# pre-compile object store wrappers as there's little overhead
self._obj_stores = tuple(
WrappedObjectStore(self._raw_db, self.db_number, i) for i in range(1, self.object_store_count + 1))
WrappedObjectStore(
self._raw_db, self.db_number, i) for i in range(1, self.object_store_count + 1))

@property
def name(self) -> str:
Expand Down Expand Up @@ -568,7 +610,7 @@ def __len__(self):
def __contains__(self, item):
return item in self._obj_store_names

def __getitem__(self, item) -> "WrappedObjectStore":
def __getitem__(self, item) -> WrappedObjectStore:
if isinstance(item, int):
return self.get_object_store_by_id(item)
elif isinstance(item, str):
Expand Down Expand Up @@ -623,7 +665,7 @@ def __contains__(self, item):
else:
raise TypeError("keys must be provided as a tuple of (name, origin) or a str (if only single origin) or int")

def __getitem__(self, item: typing.Union[int, str, typing.Tuple[str, str]]) -> "WrappedDatabase":
def __getitem__(self, item: typing.Union[int, str, typing.Tuple[str, str]]) -> WrappedDatabase:
if isinstance(item, int):
if item in self._db_number_lookup:
return self._db_number_lookup[item]
Expand Down
Loading

0 comments on commit 1739b89

Please sign in to comment.